]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <kern/xpr.h>
98
99 #include <mach/vm_map_server.h>
100 #include <mach/mach_host_server.h>
101 #include <vm/vm_protos.h>
102 #include <vm/vm_purgeable_internal.h>
103
104 #include <vm/vm_protos.h>
105 #include <vm/vm_shared_region.h>
106 #include <vm/vm_map_store.h>
107
108 extern u_int32_t random(void); /* from <libkern/libkern.h> */
109 /* Internal prototypes
110 */
111
112 static void vm_map_simplify_range(
113 vm_map_t map,
114 vm_map_offset_t start,
115 vm_map_offset_t end); /* forward */
116
117 static boolean_t vm_map_range_check(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end,
121 vm_map_entry_t *entry);
122
123 static vm_map_entry_t _vm_map_entry_create(
124 struct vm_map_header *map_header, boolean_t map_locked);
125
126 static void _vm_map_entry_dispose(
127 struct vm_map_header *map_header,
128 vm_map_entry_t entry);
129
130 static void vm_map_pmap_enter(
131 vm_map_t map,
132 vm_map_offset_t addr,
133 vm_map_offset_t end_addr,
134 vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t protection);
137
138 static void _vm_map_clip_end(
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
141 vm_map_offset_t end);
142
143 static void _vm_map_clip_start(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t start);
147
148 static void vm_map_entry_delete(
149 vm_map_t map,
150 vm_map_entry_t entry);
151
152 static kern_return_t vm_map_delete(
153 vm_map_t map,
154 vm_map_offset_t start,
155 vm_map_offset_t end,
156 int flags,
157 vm_map_t zap_map);
158
159 static kern_return_t vm_map_copy_overwrite_unaligned(
160 vm_map_t dst_map,
161 vm_map_entry_t entry,
162 vm_map_copy_t copy,
163 vm_map_address_t start);
164
165 static kern_return_t vm_map_copy_overwrite_aligned(
166 vm_map_t dst_map,
167 vm_map_entry_t tmp_entry,
168 vm_map_copy_t copy,
169 vm_map_offset_t start,
170 pmap_t pmap);
171
172 static kern_return_t vm_map_copyin_kernel_buffer(
173 vm_map_t src_map,
174 vm_map_address_t src_addr,
175 vm_map_size_t len,
176 boolean_t src_destroy,
177 vm_map_copy_t *copy_result); /* OUT */
178
179 static kern_return_t vm_map_copyout_kernel_buffer(
180 vm_map_t map,
181 vm_map_address_t *addr, /* IN/OUT */
182 vm_map_copy_t copy,
183 boolean_t overwrite);
184
185 static void vm_map_fork_share(
186 vm_map_t old_map,
187 vm_map_entry_t old_entry,
188 vm_map_t new_map);
189
190 static boolean_t vm_map_fork_copy(
191 vm_map_t old_map,
192 vm_map_entry_t *old_entry_p,
193 vm_map_t new_map);
194
195 void vm_map_region_top_walk(
196 vm_map_entry_t entry,
197 vm_region_top_info_t top);
198
199 void vm_map_region_walk(
200 vm_map_t map,
201 vm_map_offset_t va,
202 vm_map_entry_t entry,
203 vm_object_offset_t offset,
204 vm_object_size_t range,
205 vm_region_extended_info_t extended,
206 boolean_t look_for_pages);
207
208 static kern_return_t vm_map_wire_nested(
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
216
217 static kern_return_t vm_map_unwire_nested(
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
224
225 static kern_return_t vm_map_overwrite_submap_recurse(
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
229
230 static kern_return_t vm_map_copy_overwrite_nested(
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
235 pmap_t pmap,
236 boolean_t discard_on_success);
237
238 static kern_return_t vm_map_remap_extract(
239 vm_map_t map,
240 vm_map_offset_t addr,
241 vm_map_size_t size,
242 boolean_t copy,
243 struct vm_map_header *map_header,
244 vm_prot_t *cur_protection,
245 vm_prot_t *max_protection,
246 vm_inherit_t inheritance,
247 boolean_t pageable);
248
249 static kern_return_t vm_map_remap_range_allocate(
250 vm_map_t map,
251 vm_map_address_t *address,
252 vm_map_size_t size,
253 vm_map_offset_t mask,
254 int flags,
255 vm_map_entry_t *map_entry);
256
257 static void vm_map_region_look_for_page(
258 vm_map_t map,
259 vm_map_offset_t va,
260 vm_object_t object,
261 vm_object_offset_t offset,
262 int max_refcnt,
263 int depth,
264 vm_region_extended_info_t extended);
265
266 static int vm_map_region_count_obj_refs(
267 vm_map_entry_t entry,
268 vm_object_t object);
269
270
271 static kern_return_t vm_map_willneed(
272 vm_map_t map,
273 vm_map_offset_t start,
274 vm_map_offset_t end);
275
276 static kern_return_t vm_map_reuse_pages(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281 static kern_return_t vm_map_reusable_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_can_reuse(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291
292 /*
293 * Macros to copy a vm_map_entry. We must be careful to correctly
294 * manage the wired page count. vm_map_entry_copy() creates a new
295 * map entry to the same memory - the wired count in the new entry
296 * must be set to zero. vm_map_entry_copy_full() creates a new
297 * entry that is identical to the old entry. This preserves the
298 * wire count; it's used for map splitting and zone changing in
299 * vm_map_copyout.
300 */
301
302 #define vm_map_entry_copy(NEW,OLD) \
303 MACRO_BEGIN \
304 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
305 *(NEW) = *(OLD); \
306 (NEW)->is_shared = FALSE; \
307 (NEW)->needs_wakeup = FALSE; \
308 (NEW)->in_transition = FALSE; \
309 (NEW)->wired_count = 0; \
310 (NEW)->user_wired_count = 0; \
311 (NEW)->permanent = FALSE; \
312 (NEW)->used_for_jit = FALSE; \
313 (NEW)->from_reserved_zone = _vmec_reserved; \
314 MACRO_END
315
316 #define vm_map_entry_copy_full(NEW,OLD) \
317 MACRO_BEGIN \
318 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
319 (*(NEW) = *(OLD)); \
320 (NEW)->from_reserved_zone = _vmecf_reserved; \
321 MACRO_END
322
323 /*
324 * Decide if we want to allow processes to execute from their data or stack areas.
325 * override_nx() returns true if we do. Data/stack execution can be enabled independently
326 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
327 * or allow_stack_exec to enable data execution for that type of data area for that particular
328 * ABI (or both by or'ing the flags together). These are initialized in the architecture
329 * specific pmap files since the default behavior varies according to architecture. The
330 * main reason it varies is because of the need to provide binary compatibility with old
331 * applications that were written before these restrictions came into being. In the old
332 * days, an app could execute anything it could read, but this has slowly been tightened
333 * up over time. The default behavior is:
334 *
335 * 32-bit PPC apps may execute from both stack and data areas
336 * 32-bit Intel apps may exeucte from data areas but not stack
337 * 64-bit PPC/Intel apps may not execute from either data or stack
338 *
339 * An application on any architecture may override these defaults by explicitly
340 * adding PROT_EXEC permission to the page in question with the mprotect(2)
341 * system call. This code here just determines what happens when an app tries to
342 * execute from a page that lacks execute permission.
343 *
344 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
345 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
346 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
347 * execution from data areas for a particular binary even if the arch normally permits it. As
348 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
349 * to support some complicated use cases, notably browsers with out-of-process plugins that
350 * are not all NX-safe.
351 */
352
353 extern int allow_data_exec, allow_stack_exec;
354
355 int
356 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
357 {
358 int current_abi;
359
360 /*
361 * Determine if the app is running in 32 or 64 bit mode.
362 */
363
364 if (vm_map_is_64bit(map))
365 current_abi = VM_ABI_64;
366 else
367 current_abi = VM_ABI_32;
368
369 /*
370 * Determine if we should allow the execution based on whether it's a
371 * stack or data area and the current architecture.
372 */
373
374 if (user_tag == VM_MEMORY_STACK)
375 return allow_stack_exec & current_abi;
376
377 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
378 }
379
380
381 /*
382 * Virtual memory maps provide for the mapping, protection,
383 * and sharing of virtual memory objects. In addition,
384 * this module provides for an efficient virtual copy of
385 * memory from one map to another.
386 *
387 * Synchronization is required prior to most operations.
388 *
389 * Maps consist of an ordered doubly-linked list of simple
390 * entries; a single hint is used to speed up lookups.
391 *
392 * Sharing maps have been deleted from this version of Mach.
393 * All shared objects are now mapped directly into the respective
394 * maps. This requires a change in the copy on write strategy;
395 * the asymmetric (delayed) strategy is used for shared temporary
396 * objects instead of the symmetric (shadow) strategy. All maps
397 * are now "top level" maps (either task map, kernel map or submap
398 * of the kernel map).
399 *
400 * Since portions of maps are specified by start/end addreses,
401 * which may not align with existing map entries, all
402 * routines merely "clip" entries to these start/end values.
403 * [That is, an entry is split into two, bordering at a
404 * start or end value.] Note that these clippings may not
405 * always be necessary (as the two resulting entries are then
406 * not changed); however, the clipping is done for convenience.
407 * No attempt is currently made to "glue back together" two
408 * abutting entries.
409 *
410 * The symmetric (shadow) copy strategy implements virtual copy
411 * by copying VM object references from one map to
412 * another, and then marking both regions as copy-on-write.
413 * It is important to note that only one writeable reference
414 * to a VM object region exists in any map when this strategy
415 * is used -- this means that shadow object creation can be
416 * delayed until a write operation occurs. The symmetric (delayed)
417 * strategy allows multiple maps to have writeable references to
418 * the same region of a vm object, and hence cannot delay creating
419 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
420 * Copying of permanent objects is completely different; see
421 * vm_object_copy_strategically() in vm_object.c.
422 */
423
424 static zone_t vm_map_zone; /* zone for vm_map structures */
425 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
426 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
427 * allocations */
428 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
429
430
431 /*
432 * Placeholder object for submap operations. This object is dropped
433 * into the range by a call to vm_map_find, and removed when
434 * vm_map_submap creates the submap.
435 */
436
437 vm_object_t vm_submap_object;
438
439 static void *map_data;
440 static vm_size_t map_data_size;
441 static void *kentry_data;
442 static vm_size_t kentry_data_size;
443
444 #if CONFIG_EMBEDDED
445 #define NO_COALESCE_LIMIT 0
446 #else
447 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
448 #endif
449
450 /* Skip acquiring locks if we're in the midst of a kernel core dump */
451 unsigned int not_in_kdp = 1;
452
453 unsigned int vm_map_set_cache_attr_count = 0;
454
455 kern_return_t
456 vm_map_set_cache_attr(
457 vm_map_t map,
458 vm_map_offset_t va)
459 {
460 vm_map_entry_t map_entry;
461 vm_object_t object;
462 kern_return_t kr = KERN_SUCCESS;
463
464 vm_map_lock_read(map);
465
466 if (!vm_map_lookup_entry(map, va, &map_entry) ||
467 map_entry->is_sub_map) {
468 /*
469 * that memory is not properly mapped
470 */
471 kr = KERN_INVALID_ARGUMENT;
472 goto done;
473 }
474 object = map_entry->object.vm_object;
475
476 if (object == VM_OBJECT_NULL) {
477 /*
478 * there should be a VM object here at this point
479 */
480 kr = KERN_INVALID_ARGUMENT;
481 goto done;
482 }
483 vm_object_lock(object);
484 object->set_cache_attr = TRUE;
485 vm_object_unlock(object);
486
487 vm_map_set_cache_attr_count++;
488 done:
489 vm_map_unlock_read(map);
490
491 return kr;
492 }
493
494
495 #if CONFIG_CODE_DECRYPTION
496 /*
497 * vm_map_apple_protected:
498 * This remaps the requested part of the object with an object backed by
499 * the decrypting pager.
500 * crypt_info contains entry points and session data for the crypt module.
501 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
502 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
503 */
504 kern_return_t
505 vm_map_apple_protected(
506 vm_map_t map,
507 vm_map_offset_t start,
508 vm_map_offset_t end,
509 struct pager_crypt_info *crypt_info)
510 {
511 boolean_t map_locked;
512 kern_return_t kr;
513 vm_map_entry_t map_entry;
514 memory_object_t protected_mem_obj;
515 vm_object_t protected_object;
516 vm_map_offset_t map_addr;
517
518 vm_map_lock_read(map);
519 map_locked = TRUE;
520
521 /* lookup the protected VM object */
522 if (!vm_map_lookup_entry(map,
523 start,
524 &map_entry) ||
525 map_entry->vme_end < end ||
526 map_entry->is_sub_map) {
527 /* that memory is not properly mapped */
528 kr = KERN_INVALID_ARGUMENT;
529 goto done;
530 }
531 protected_object = map_entry->object.vm_object;
532 if (protected_object == VM_OBJECT_NULL) {
533 /* there should be a VM object here at this point */
534 kr = KERN_INVALID_ARGUMENT;
535 goto done;
536 }
537
538 /* make sure protected object stays alive while map is unlocked */
539 vm_object_reference(protected_object);
540
541 vm_map_unlock_read(map);
542 map_locked = FALSE;
543
544 /*
545 * Lookup (and create if necessary) the protected memory object
546 * matching that VM object.
547 * If successful, this also grabs a reference on the memory object,
548 * to guarantee that it doesn't go away before we get a chance to map
549 * it.
550 */
551 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
552
553 /* release extra ref on protected object */
554 vm_object_deallocate(protected_object);
555
556 if (protected_mem_obj == NULL) {
557 kr = KERN_FAILURE;
558 goto done;
559 }
560
561 /* map this memory object in place of the current one */
562 map_addr = start;
563 kr = vm_map_enter_mem_object(map,
564 &map_addr,
565 end - start,
566 (mach_vm_offset_t) 0,
567 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
568 (ipc_port_t) protected_mem_obj,
569 (map_entry->offset +
570 (start - map_entry->vme_start)),
571 TRUE,
572 map_entry->protection,
573 map_entry->max_protection,
574 map_entry->inheritance);
575 assert(map_addr == start);
576 /*
577 * Release the reference obtained by apple_protect_pager_setup().
578 * The mapping (if it succeeded) is now holding a reference on the
579 * memory object.
580 */
581 memory_object_deallocate(protected_mem_obj);
582
583 done:
584 if (map_locked) {
585 vm_map_unlock_read(map);
586 }
587 return kr;
588 }
589 #endif /* CONFIG_CODE_DECRYPTION */
590
591
592 lck_grp_t vm_map_lck_grp;
593 lck_grp_attr_t vm_map_lck_grp_attr;
594 lck_attr_t vm_map_lck_attr;
595
596
597 /*
598 * vm_map_init:
599 *
600 * Initialize the vm_map module. Must be called before
601 * any other vm_map routines.
602 *
603 * Map and entry structures are allocated from zones -- we must
604 * initialize those zones.
605 *
606 * There are three zones of interest:
607 *
608 * vm_map_zone: used to allocate maps.
609 * vm_map_entry_zone: used to allocate map entries.
610 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
611 *
612 * The kernel allocates map entries from a special zone that is initially
613 * "crammed" with memory. It would be difficult (perhaps impossible) for
614 * the kernel to allocate more memory to a entry zone when it became
615 * empty since the very act of allocating memory implies the creation
616 * of a new entry.
617 */
618 void
619 vm_map_init(
620 void)
621 {
622 vm_size_t entry_zone_alloc_size;
623 const char *mez_name = "VM map entries";
624
625 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
626 PAGE_SIZE, "maps");
627 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
628 #if defined(__LP64__)
629 entry_zone_alloc_size = PAGE_SIZE * 5;
630 #else
631 entry_zone_alloc_size = PAGE_SIZE * 6;
632 #endif
633 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
634 1024*1024, entry_zone_alloc_size,
635 mez_name);
636 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
637 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
638 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
639
640 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
641 kentry_data_size * 64, kentry_data_size,
642 "Reserved VM map entries");
643 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
644
645 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
646 16*1024, PAGE_SIZE, "VM map copies");
647 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
648
649 /*
650 * Cram the map and kentry zones with initial data.
651 * Set reserved_zone non-collectible to aid zone_gc().
652 */
653 zone_change(vm_map_zone, Z_COLLECT, FALSE);
654
655 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
656 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
657 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
658 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
659 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
660 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
661 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
662
663 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
664 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
665
666 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
667 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
668 lck_attr_setdefault(&vm_map_lck_attr);
669
670 #if CONFIG_FREEZE
671 default_freezer_init();
672 #endif /* CONFIG_FREEZE */
673 }
674
675 void
676 vm_map_steal_memory(
677 void)
678 {
679 uint32_t kentry_initial_pages;
680
681 map_data_size = round_page(10 * sizeof(struct _vm_map));
682 map_data = pmap_steal_memory(map_data_size);
683
684 /*
685 * kentry_initial_pages corresponds to the number of kernel map entries
686 * required during bootstrap until the asynchronous replenishment
687 * scheme is activated and/or entries are available from the general
688 * map entry pool.
689 */
690 #if defined(__LP64__)
691 kentry_initial_pages = 10;
692 #else
693 kentry_initial_pages = 6;
694 #endif
695
696 #if CONFIG_GZALLOC
697 /* If using the guard allocator, reserve more memory for the kernel
698 * reserved map entry pool.
699 */
700 if (gzalloc_enabled())
701 kentry_initial_pages *= 1024;
702 #endif
703
704 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
705 kentry_data = pmap_steal_memory(kentry_data_size);
706 }
707
708 void vm_kernel_reserved_entry_init(void) {
709 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
710 }
711
712 /*
713 * vm_map_create:
714 *
715 * Creates and returns a new empty VM map with
716 * the given physical map structure, and having
717 * the given lower and upper address bounds.
718 */
719 vm_map_t
720 vm_map_create(
721 pmap_t pmap,
722 vm_map_offset_t min,
723 vm_map_offset_t max,
724 boolean_t pageable)
725 {
726 static int color_seed = 0;
727 register vm_map_t result;
728
729 result = (vm_map_t) zalloc(vm_map_zone);
730 if (result == VM_MAP_NULL)
731 panic("vm_map_create");
732
733 vm_map_first_entry(result) = vm_map_to_entry(result);
734 vm_map_last_entry(result) = vm_map_to_entry(result);
735 result->hdr.nentries = 0;
736 result->hdr.entries_pageable = pageable;
737
738 vm_map_store_init( &(result->hdr) );
739
740 result->size = 0;
741 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
742 result->user_wire_size = 0;
743 result->ref_count = 1;
744 #if TASK_SWAPPER
745 result->res_count = 1;
746 result->sw_state = MAP_SW_IN;
747 #endif /* TASK_SWAPPER */
748 result->pmap = pmap;
749 result->min_offset = min;
750 result->max_offset = max;
751 result->wiring_required = FALSE;
752 result->no_zero_fill = FALSE;
753 result->mapped_in_other_pmaps = FALSE;
754 result->wait_for_space = FALSE;
755 result->switch_protect = FALSE;
756 result->disable_vmentry_reuse = FALSE;
757 result->map_disallow_data_exec = FALSE;
758 result->highest_entry_end = 0;
759 result->first_free = vm_map_to_entry(result);
760 result->hint = vm_map_to_entry(result);
761 result->color_rr = (color_seed++) & vm_color_mask;
762 result->jit_entry_exists = FALSE;
763 #if CONFIG_FREEZE
764 result->default_freezer_handle = NULL;
765 #endif
766 vm_map_lock_init(result);
767 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
768
769 return(result);
770 }
771
772 /*
773 * vm_map_entry_create: [ internal use only ]
774 *
775 * Allocates a VM map entry for insertion in the
776 * given map (or map copy). No fields are filled.
777 */
778 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
779
780 #define vm_map_copy_entry_create(copy, map_locked) \
781 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
782 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
783
784 static vm_map_entry_t
785 _vm_map_entry_create(
786 struct vm_map_header *map_header, boolean_t __unused map_locked)
787 {
788 zone_t zone;
789 vm_map_entry_t entry;
790
791 zone = vm_map_entry_zone;
792
793 assert(map_header->entries_pageable ? !map_locked : TRUE);
794
795 if (map_header->entries_pageable) {
796 entry = (vm_map_entry_t) zalloc(zone);
797 }
798 else {
799 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
800
801 if (entry == VM_MAP_ENTRY_NULL) {
802 zone = vm_map_entry_reserved_zone;
803 entry = (vm_map_entry_t) zalloc(zone);
804 OSAddAtomic(1, &reserved_zalloc_count);
805 } else
806 OSAddAtomic(1, &nonreserved_zalloc_count);
807 }
808
809 if (entry == VM_MAP_ENTRY_NULL)
810 panic("vm_map_entry_create");
811 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
812
813 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
814 #if MAP_ENTRY_CREATION_DEBUG
815 fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t)));
816 #endif
817 return(entry);
818 }
819
820 /*
821 * vm_map_entry_dispose: [ internal use only ]
822 *
823 * Inverse of vm_map_entry_create.
824 *
825 * write map lock held so no need to
826 * do anything special to insure correctness
827 * of the stores
828 */
829 #define vm_map_entry_dispose(map, entry) \
830 _vm_map_entry_dispose(&(map)->hdr, (entry))
831
832 #define vm_map_copy_entry_dispose(map, entry) \
833 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
834
835 static void
836 _vm_map_entry_dispose(
837 register struct vm_map_header *map_header,
838 register vm_map_entry_t entry)
839 {
840 register zone_t zone;
841
842 if (map_header->entries_pageable || !(entry->from_reserved_zone))
843 zone = vm_map_entry_zone;
844 else
845 zone = vm_map_entry_reserved_zone;
846
847 if (!map_header->entries_pageable) {
848 if (zone == vm_map_entry_zone)
849 OSAddAtomic(-1, &nonreserved_zalloc_count);
850 else
851 OSAddAtomic(-1, &reserved_zalloc_count);
852 }
853
854 zfree(zone, entry);
855 }
856
857 #if MACH_ASSERT
858 static boolean_t first_free_check = FALSE;
859 boolean_t
860 first_free_is_valid(
861 vm_map_t map)
862 {
863 if (!first_free_check)
864 return TRUE;
865
866 return( first_free_is_valid_store( map ));
867 }
868 #endif /* MACH_ASSERT */
869
870
871 #define vm_map_copy_entry_link(copy, after_where, entry) \
872 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
873
874 #define vm_map_copy_entry_unlink(copy, entry) \
875 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
876
877 #if MACH_ASSERT && TASK_SWAPPER
878 /*
879 * vm_map_res_reference:
880 *
881 * Adds another valid residence count to the given map.
882 *
883 * Map is locked so this function can be called from
884 * vm_map_swapin.
885 *
886 */
887 void vm_map_res_reference(register vm_map_t map)
888 {
889 /* assert map is locked */
890 assert(map->res_count >= 0);
891 assert(map->ref_count >= map->res_count);
892 if (map->res_count == 0) {
893 lck_mtx_unlock(&map->s_lock);
894 vm_map_lock(map);
895 vm_map_swapin(map);
896 lck_mtx_lock(&map->s_lock);
897 ++map->res_count;
898 vm_map_unlock(map);
899 } else
900 ++map->res_count;
901 }
902
903 /*
904 * vm_map_reference_swap:
905 *
906 * Adds valid reference and residence counts to the given map.
907 *
908 * The map may not be in memory (i.e. zero residence count).
909 *
910 */
911 void vm_map_reference_swap(register vm_map_t map)
912 {
913 assert(map != VM_MAP_NULL);
914 lck_mtx_lock(&map->s_lock);
915 assert(map->res_count >= 0);
916 assert(map->ref_count >= map->res_count);
917 map->ref_count++;
918 vm_map_res_reference(map);
919 lck_mtx_unlock(&map->s_lock);
920 }
921
922 /*
923 * vm_map_res_deallocate:
924 *
925 * Decrement residence count on a map; possibly causing swapout.
926 *
927 * The map must be in memory (i.e. non-zero residence count).
928 *
929 * The map is locked, so this function is callable from vm_map_deallocate.
930 *
931 */
932 void vm_map_res_deallocate(register vm_map_t map)
933 {
934 assert(map->res_count > 0);
935 if (--map->res_count == 0) {
936 lck_mtx_unlock(&map->s_lock);
937 vm_map_lock(map);
938 vm_map_swapout(map);
939 vm_map_unlock(map);
940 lck_mtx_lock(&map->s_lock);
941 }
942 assert(map->ref_count >= map->res_count);
943 }
944 #endif /* MACH_ASSERT && TASK_SWAPPER */
945
946 /*
947 * vm_map_destroy:
948 *
949 * Actually destroy a map.
950 */
951 void
952 vm_map_destroy(
953 vm_map_t map,
954 int flags)
955 {
956 vm_map_lock(map);
957
958 /* clean up regular map entries */
959 (void) vm_map_delete(map, map->min_offset, map->max_offset,
960 flags, VM_MAP_NULL);
961 /* clean up leftover special mappings (commpage, etc...) */
962 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
963 flags, VM_MAP_NULL);
964
965 #if CONFIG_FREEZE
966 if (map->default_freezer_handle) {
967 default_freezer_handle_deallocate(map->default_freezer_handle);
968 map->default_freezer_handle = NULL;
969 }
970 #endif
971 vm_map_unlock(map);
972
973 assert(map->hdr.nentries == 0);
974
975 if(map->pmap)
976 pmap_destroy(map->pmap);
977
978 zfree(vm_map_zone, map);
979 }
980
981 #if TASK_SWAPPER
982 /*
983 * vm_map_swapin/vm_map_swapout
984 *
985 * Swap a map in and out, either referencing or releasing its resources.
986 * These functions are internal use only; however, they must be exported
987 * because they may be called from macros, which are exported.
988 *
989 * In the case of swapout, there could be races on the residence count,
990 * so if the residence count is up, we return, assuming that a
991 * vm_map_deallocate() call in the near future will bring us back.
992 *
993 * Locking:
994 * -- We use the map write lock for synchronization among races.
995 * -- The map write lock, and not the simple s_lock, protects the
996 * swap state of the map.
997 * -- If a map entry is a share map, then we hold both locks, in
998 * hierarchical order.
999 *
1000 * Synchronization Notes:
1001 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1002 * will block on the map lock and proceed when swapout is through.
1003 * 2) A vm_map_reference() call at this time is illegal, and will
1004 * cause a panic. vm_map_reference() is only allowed on resident
1005 * maps, since it refuses to block.
1006 * 3) A vm_map_swapin() call during a swapin will block, and
1007 * proceeed when the first swapin is done, turning into a nop.
1008 * This is the reason the res_count is not incremented until
1009 * after the swapin is complete.
1010 * 4) There is a timing hole after the checks of the res_count, before
1011 * the map lock is taken, during which a swapin may get the lock
1012 * before a swapout about to happen. If this happens, the swapin
1013 * will detect the state and increment the reference count, causing
1014 * the swapout to be a nop, thereby delaying it until a later
1015 * vm_map_deallocate. If the swapout gets the lock first, then
1016 * the swapin will simply block until the swapout is done, and
1017 * then proceed.
1018 *
1019 * Because vm_map_swapin() is potentially an expensive operation, it
1020 * should be used with caution.
1021 *
1022 * Invariants:
1023 * 1) A map with a residence count of zero is either swapped, or
1024 * being swapped.
1025 * 2) A map with a non-zero residence count is either resident,
1026 * or being swapped in.
1027 */
1028
1029 int vm_map_swap_enable = 1;
1030
1031 void vm_map_swapin (vm_map_t map)
1032 {
1033 register vm_map_entry_t entry;
1034
1035 if (!vm_map_swap_enable) /* debug */
1036 return;
1037
1038 /*
1039 * Map is locked
1040 * First deal with various races.
1041 */
1042 if (map->sw_state == MAP_SW_IN)
1043 /*
1044 * we raced with swapout and won. Returning will incr.
1045 * the res_count, turning the swapout into a nop.
1046 */
1047 return;
1048
1049 /*
1050 * The residence count must be zero. If we raced with another
1051 * swapin, the state would have been IN; if we raced with a
1052 * swapout (after another competing swapin), we must have lost
1053 * the race to get here (see above comment), in which case
1054 * res_count is still 0.
1055 */
1056 assert(map->res_count == 0);
1057
1058 /*
1059 * There are no intermediate states of a map going out or
1060 * coming in, since the map is locked during the transition.
1061 */
1062 assert(map->sw_state == MAP_SW_OUT);
1063
1064 /*
1065 * We now operate upon each map entry. If the entry is a sub-
1066 * or share-map, we call vm_map_res_reference upon it.
1067 * If the entry is an object, we call vm_object_res_reference
1068 * (this may iterate through the shadow chain).
1069 * Note that we hold the map locked the entire time,
1070 * even if we get back here via a recursive call in
1071 * vm_map_res_reference.
1072 */
1073 entry = vm_map_first_entry(map);
1074
1075 while (entry != vm_map_to_entry(map)) {
1076 if (entry->object.vm_object != VM_OBJECT_NULL) {
1077 if (entry->is_sub_map) {
1078 vm_map_t lmap = entry->object.sub_map;
1079 lck_mtx_lock(&lmap->s_lock);
1080 vm_map_res_reference(lmap);
1081 lck_mtx_unlock(&lmap->s_lock);
1082 } else {
1083 vm_object_t object = entry->object.vm_object;
1084 vm_object_lock(object);
1085 /*
1086 * This call may iterate through the
1087 * shadow chain.
1088 */
1089 vm_object_res_reference(object);
1090 vm_object_unlock(object);
1091 }
1092 }
1093 entry = entry->vme_next;
1094 }
1095 assert(map->sw_state == MAP_SW_OUT);
1096 map->sw_state = MAP_SW_IN;
1097 }
1098
1099 void vm_map_swapout(vm_map_t map)
1100 {
1101 register vm_map_entry_t entry;
1102
1103 /*
1104 * Map is locked
1105 * First deal with various races.
1106 * If we raced with a swapin and lost, the residence count
1107 * will have been incremented to 1, and we simply return.
1108 */
1109 lck_mtx_lock(&map->s_lock);
1110 if (map->res_count != 0) {
1111 lck_mtx_unlock(&map->s_lock);
1112 return;
1113 }
1114 lck_mtx_unlock(&map->s_lock);
1115
1116 /*
1117 * There are no intermediate states of a map going out or
1118 * coming in, since the map is locked during the transition.
1119 */
1120 assert(map->sw_state == MAP_SW_IN);
1121
1122 if (!vm_map_swap_enable)
1123 return;
1124
1125 /*
1126 * We now operate upon each map entry. If the entry is a sub-
1127 * or share-map, we call vm_map_res_deallocate upon it.
1128 * If the entry is an object, we call vm_object_res_deallocate
1129 * (this may iterate through the shadow chain).
1130 * Note that we hold the map locked the entire time,
1131 * even if we get back here via a recursive call in
1132 * vm_map_res_deallocate.
1133 */
1134 entry = vm_map_first_entry(map);
1135
1136 while (entry != vm_map_to_entry(map)) {
1137 if (entry->object.vm_object != VM_OBJECT_NULL) {
1138 if (entry->is_sub_map) {
1139 vm_map_t lmap = entry->object.sub_map;
1140 lck_mtx_lock(&lmap->s_lock);
1141 vm_map_res_deallocate(lmap);
1142 lck_mtx_unlock(&lmap->s_lock);
1143 } else {
1144 vm_object_t object = entry->object.vm_object;
1145 vm_object_lock(object);
1146 /*
1147 * This call may take a long time,
1148 * since it could actively push
1149 * out pages (if we implement it
1150 * that way).
1151 */
1152 vm_object_res_deallocate(object);
1153 vm_object_unlock(object);
1154 }
1155 }
1156 entry = entry->vme_next;
1157 }
1158 assert(map->sw_state == MAP_SW_IN);
1159 map->sw_state = MAP_SW_OUT;
1160 }
1161
1162 #endif /* TASK_SWAPPER */
1163
1164 /*
1165 * vm_map_lookup_entry: [ internal use only ]
1166 *
1167 * Calls into the vm map store layer to find the map
1168 * entry containing (or immediately preceding) the
1169 * specified address in the given map; the entry is returned
1170 * in the "entry" parameter. The boolean
1171 * result indicates whether the address is
1172 * actually contained in the map.
1173 */
1174 boolean_t
1175 vm_map_lookup_entry(
1176 register vm_map_t map,
1177 register vm_map_offset_t address,
1178 vm_map_entry_t *entry) /* OUT */
1179 {
1180 return ( vm_map_store_lookup_entry( map, address, entry ));
1181 }
1182
1183 /*
1184 * Routine: vm_map_find_space
1185 * Purpose:
1186 * Allocate a range in the specified virtual address map,
1187 * returning the entry allocated for that range.
1188 * Used by kmem_alloc, etc.
1189 *
1190 * The map must be NOT be locked. It will be returned locked
1191 * on KERN_SUCCESS, unlocked on failure.
1192 *
1193 * If an entry is allocated, the object/offset fields
1194 * are initialized to zero.
1195 */
1196 kern_return_t
1197 vm_map_find_space(
1198 register vm_map_t map,
1199 vm_map_offset_t *address, /* OUT */
1200 vm_map_size_t size,
1201 vm_map_offset_t mask,
1202 int flags,
1203 vm_map_entry_t *o_entry) /* OUT */
1204 {
1205 register vm_map_entry_t entry, new_entry;
1206 register vm_map_offset_t start;
1207 register vm_map_offset_t end;
1208
1209 if (size == 0) {
1210 *address = 0;
1211 return KERN_INVALID_ARGUMENT;
1212 }
1213
1214 if (flags & VM_FLAGS_GUARD_AFTER) {
1215 /* account for the back guard page in the size */
1216 size += PAGE_SIZE_64;
1217 }
1218
1219 new_entry = vm_map_entry_create(map, FALSE);
1220
1221 /*
1222 * Look for the first possible address; if there's already
1223 * something at this address, we have to start after it.
1224 */
1225
1226 vm_map_lock(map);
1227
1228 if( map->disable_vmentry_reuse == TRUE) {
1229 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1230 } else {
1231 assert(first_free_is_valid(map));
1232 if ((entry = map->first_free) == vm_map_to_entry(map))
1233 start = map->min_offset;
1234 else
1235 start = entry->vme_end;
1236 }
1237
1238 /*
1239 * In any case, the "entry" always precedes
1240 * the proposed new region throughout the loop:
1241 */
1242
1243 while (TRUE) {
1244 register vm_map_entry_t next;
1245
1246 /*
1247 * Find the end of the proposed new region.
1248 * Be sure we didn't go beyond the end, or
1249 * wrap around the address.
1250 */
1251
1252 if (flags & VM_FLAGS_GUARD_BEFORE) {
1253 /* reserve space for the front guard page */
1254 start += PAGE_SIZE_64;
1255 }
1256 end = ((start + mask) & ~mask);
1257
1258 if (end < start) {
1259 vm_map_entry_dispose(map, new_entry);
1260 vm_map_unlock(map);
1261 return(KERN_NO_SPACE);
1262 }
1263 start = end;
1264 end += size;
1265
1266 if ((end > map->max_offset) || (end < start)) {
1267 vm_map_entry_dispose(map, new_entry);
1268 vm_map_unlock(map);
1269 return(KERN_NO_SPACE);
1270 }
1271
1272 /*
1273 * If there are no more entries, we must win.
1274 */
1275
1276 next = entry->vme_next;
1277 if (next == vm_map_to_entry(map))
1278 break;
1279
1280 /*
1281 * If there is another entry, it must be
1282 * after the end of the potential new region.
1283 */
1284
1285 if (next->vme_start >= end)
1286 break;
1287
1288 /*
1289 * Didn't fit -- move to the next entry.
1290 */
1291
1292 entry = next;
1293 start = entry->vme_end;
1294 }
1295
1296 /*
1297 * At this point,
1298 * "start" and "end" should define the endpoints of the
1299 * available new range, and
1300 * "entry" should refer to the region before the new
1301 * range, and
1302 *
1303 * the map should be locked.
1304 */
1305
1306 if (flags & VM_FLAGS_GUARD_BEFORE) {
1307 /* go back for the front guard page */
1308 start -= PAGE_SIZE_64;
1309 }
1310 *address = start;
1311
1312 assert(start < end);
1313 new_entry->vme_start = start;
1314 new_entry->vme_end = end;
1315 assert(page_aligned(new_entry->vme_start));
1316 assert(page_aligned(new_entry->vme_end));
1317
1318 new_entry->is_shared = FALSE;
1319 new_entry->is_sub_map = FALSE;
1320 new_entry->use_pmap = FALSE;
1321 new_entry->object.vm_object = VM_OBJECT_NULL;
1322 new_entry->offset = (vm_object_offset_t) 0;
1323
1324 new_entry->needs_copy = FALSE;
1325
1326 new_entry->inheritance = VM_INHERIT_DEFAULT;
1327 new_entry->protection = VM_PROT_DEFAULT;
1328 new_entry->max_protection = VM_PROT_ALL;
1329 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1330 new_entry->wired_count = 0;
1331 new_entry->user_wired_count = 0;
1332
1333 new_entry->in_transition = FALSE;
1334 new_entry->needs_wakeup = FALSE;
1335 new_entry->no_cache = FALSE;
1336 new_entry->permanent = FALSE;
1337 new_entry->superpage_size = 0;
1338
1339 new_entry->used_for_jit = 0;
1340
1341 new_entry->alias = 0;
1342 new_entry->zero_wired_pages = FALSE;
1343
1344 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1345
1346 /*
1347 * Insert the new entry into the list
1348 */
1349
1350 vm_map_store_entry_link(map, entry, new_entry);
1351
1352 map->size += size;
1353
1354 /*
1355 * Update the lookup hint
1356 */
1357 SAVE_HINT_MAP_WRITE(map, new_entry);
1358
1359 *o_entry = new_entry;
1360 return(KERN_SUCCESS);
1361 }
1362
1363 int vm_map_pmap_enter_print = FALSE;
1364 int vm_map_pmap_enter_enable = FALSE;
1365
1366 /*
1367 * Routine: vm_map_pmap_enter [internal only]
1368 *
1369 * Description:
1370 * Force pages from the specified object to be entered into
1371 * the pmap at the specified address if they are present.
1372 * As soon as a page not found in the object the scan ends.
1373 *
1374 * Returns:
1375 * Nothing.
1376 *
1377 * In/out conditions:
1378 * The source map should not be locked on entry.
1379 */
1380 static void
1381 vm_map_pmap_enter(
1382 vm_map_t map,
1383 register vm_map_offset_t addr,
1384 register vm_map_offset_t end_addr,
1385 register vm_object_t object,
1386 vm_object_offset_t offset,
1387 vm_prot_t protection)
1388 {
1389 int type_of_fault;
1390 kern_return_t kr;
1391
1392 if(map->pmap == 0)
1393 return;
1394
1395 while (addr < end_addr) {
1396 register vm_page_t m;
1397
1398 vm_object_lock(object);
1399
1400 m = vm_page_lookup(object, offset);
1401 /*
1402 * ENCRYPTED SWAP:
1403 * The user should never see encrypted data, so do not
1404 * enter an encrypted page in the page table.
1405 */
1406 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1407 m->fictitious ||
1408 (m->unusual && ( m->error || m->restart || m->absent))) {
1409 vm_object_unlock(object);
1410 return;
1411 }
1412
1413 if (vm_map_pmap_enter_print) {
1414 printf("vm_map_pmap_enter:");
1415 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1416 map, (unsigned long long)addr, object, (unsigned long long)offset);
1417 }
1418 type_of_fault = DBG_CACHE_HIT_FAULT;
1419 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1420 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
1421 &type_of_fault);
1422
1423 vm_object_unlock(object);
1424
1425 offset += PAGE_SIZE_64;
1426 addr += PAGE_SIZE;
1427 }
1428 }
1429
1430 boolean_t vm_map_pmap_is_empty(
1431 vm_map_t map,
1432 vm_map_offset_t start,
1433 vm_map_offset_t end);
1434 boolean_t vm_map_pmap_is_empty(
1435 vm_map_t map,
1436 vm_map_offset_t start,
1437 vm_map_offset_t end)
1438 {
1439 #ifdef MACHINE_PMAP_IS_EMPTY
1440 return pmap_is_empty(map->pmap, start, end);
1441 #else /* MACHINE_PMAP_IS_EMPTY */
1442 vm_map_offset_t offset;
1443 ppnum_t phys_page;
1444
1445 if (map->pmap == NULL) {
1446 return TRUE;
1447 }
1448
1449 for (offset = start;
1450 offset < end;
1451 offset += PAGE_SIZE) {
1452 phys_page = pmap_find_phys(map->pmap, offset);
1453 if (phys_page) {
1454 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1455 "page %d at 0x%llx\n",
1456 map, (long long)start, (long long)end,
1457 phys_page, (long long)offset);
1458 return FALSE;
1459 }
1460 }
1461 return TRUE;
1462 #endif /* MACHINE_PMAP_IS_EMPTY */
1463 }
1464
1465 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1466 kern_return_t
1467 vm_map_random_address_for_size(
1468 vm_map_t map,
1469 vm_map_offset_t *address,
1470 vm_map_size_t size)
1471 {
1472 kern_return_t kr = KERN_SUCCESS;
1473 int tries = 0;
1474 vm_map_offset_t random_addr = 0;
1475 vm_map_offset_t hole_end;
1476
1477 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1478 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1479 vm_map_size_t vm_hole_size = 0;
1480 vm_map_size_t addr_space_size;
1481
1482 addr_space_size = vm_map_max(map) - vm_map_min(map);
1483
1484 assert(page_aligned(size));
1485
1486 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1487 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1488 random_addr = trunc_page(vm_map_min(map) +
1489 (random_addr % addr_space_size));
1490
1491 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1492 if (prev_entry == vm_map_to_entry(map)) {
1493 next_entry = vm_map_first_entry(map);
1494 } else {
1495 next_entry = prev_entry->vme_next;
1496 }
1497 if (next_entry == vm_map_to_entry(map)) {
1498 hole_end = vm_map_max(map);
1499 } else {
1500 hole_end = next_entry->vme_start;
1501 }
1502 vm_hole_size = hole_end - random_addr;
1503 if (vm_hole_size >= size) {
1504 *address = random_addr;
1505 break;
1506 }
1507 }
1508 tries++;
1509 }
1510
1511 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1512 kr = KERN_NO_SPACE;
1513 }
1514 return kr;
1515 }
1516
1517 /*
1518 * Routine: vm_map_enter
1519 *
1520 * Description:
1521 * Allocate a range in the specified virtual address map.
1522 * The resulting range will refer to memory defined by
1523 * the given memory object and offset into that object.
1524 *
1525 * Arguments are as defined in the vm_map call.
1526 */
1527 int _map_enter_debug = 0;
1528 static unsigned int vm_map_enter_restore_successes = 0;
1529 static unsigned int vm_map_enter_restore_failures = 0;
1530 kern_return_t
1531 vm_map_enter(
1532 vm_map_t map,
1533 vm_map_offset_t *address, /* IN/OUT */
1534 vm_map_size_t size,
1535 vm_map_offset_t mask,
1536 int flags,
1537 vm_object_t object,
1538 vm_object_offset_t offset,
1539 boolean_t needs_copy,
1540 vm_prot_t cur_protection,
1541 vm_prot_t max_protection,
1542 vm_inherit_t inheritance)
1543 {
1544 vm_map_entry_t entry, new_entry;
1545 vm_map_offset_t start, tmp_start, tmp_offset;
1546 vm_map_offset_t end, tmp_end;
1547 vm_map_offset_t tmp2_start, tmp2_end;
1548 vm_map_offset_t step;
1549 kern_return_t result = KERN_SUCCESS;
1550 vm_map_t zap_old_map = VM_MAP_NULL;
1551 vm_map_t zap_new_map = VM_MAP_NULL;
1552 boolean_t map_locked = FALSE;
1553 boolean_t pmap_empty = TRUE;
1554 boolean_t new_mapping_established = FALSE;
1555 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1556 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1557 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1558 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1559 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1560 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1561 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1562 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1563 char alias;
1564 vm_map_offset_t effective_min_offset, effective_max_offset;
1565 kern_return_t kr;
1566
1567 if (superpage_size) {
1568 switch (superpage_size) {
1569 /*
1570 * Note that the current implementation only supports
1571 * a single size for superpages, SUPERPAGE_SIZE, per
1572 * architecture. As soon as more sizes are supposed
1573 * to be supported, SUPERPAGE_SIZE has to be replaced
1574 * with a lookup of the size depending on superpage_size.
1575 */
1576 #ifdef __x86_64__
1577 case SUPERPAGE_SIZE_ANY:
1578 /* handle it like 2 MB and round up to page size */
1579 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1580 case SUPERPAGE_SIZE_2MB:
1581 break;
1582 #endif
1583 default:
1584 return KERN_INVALID_ARGUMENT;
1585 }
1586 mask = SUPERPAGE_SIZE-1;
1587 if (size & (SUPERPAGE_SIZE-1))
1588 return KERN_INVALID_ARGUMENT;
1589 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1590 }
1591
1592
1593 #if CONFIG_EMBEDDED
1594 if (cur_protection & VM_PROT_WRITE){
1595 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
1596 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1597 cur_protection &= ~VM_PROT_EXECUTE;
1598 }
1599 }
1600 #endif /* CONFIG_EMBEDDED */
1601
1602 if (is_submap) {
1603 if (purgable) {
1604 /* submaps can not be purgeable */
1605 return KERN_INVALID_ARGUMENT;
1606 }
1607 if (object == VM_OBJECT_NULL) {
1608 /* submaps can not be created lazily */
1609 return KERN_INVALID_ARGUMENT;
1610 }
1611 }
1612 if (flags & VM_FLAGS_ALREADY) {
1613 /*
1614 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1615 * is already present. For it to be meaningul, the requested
1616 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1617 * we shouldn't try and remove what was mapped there first
1618 * (!VM_FLAGS_OVERWRITE).
1619 */
1620 if ((flags & VM_FLAGS_ANYWHERE) ||
1621 (flags & VM_FLAGS_OVERWRITE)) {
1622 return KERN_INVALID_ARGUMENT;
1623 }
1624 }
1625
1626 effective_min_offset = map->min_offset;
1627
1628 if (flags & VM_FLAGS_BEYOND_MAX) {
1629 /*
1630 * Allow an insertion beyond the map's max offset.
1631 */
1632 if (vm_map_is_64bit(map))
1633 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1634 else
1635 effective_max_offset = 0x00000000FFFFF000ULL;
1636 } else {
1637 effective_max_offset = map->max_offset;
1638 }
1639
1640 if (size == 0 ||
1641 (offset & PAGE_MASK_64) != 0) {
1642 *address = 0;
1643 return KERN_INVALID_ARGUMENT;
1644 }
1645
1646 VM_GET_FLAGS_ALIAS(flags, alias);
1647
1648 #define RETURN(value) { result = value; goto BailOut; }
1649
1650 assert(page_aligned(*address));
1651 assert(page_aligned(size));
1652
1653 /*
1654 * Only zero-fill objects are allowed to be purgable.
1655 * LP64todo - limit purgable objects to 32-bits for now
1656 */
1657 if (purgable &&
1658 (offset != 0 ||
1659 (object != VM_OBJECT_NULL &&
1660 (object->vo_size != size ||
1661 object->purgable == VM_PURGABLE_DENY))
1662 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1663 return KERN_INVALID_ARGUMENT;
1664
1665 if (!anywhere && overwrite) {
1666 /*
1667 * Create a temporary VM map to hold the old mappings in the
1668 * affected area while we create the new one.
1669 * This avoids releasing the VM map lock in
1670 * vm_map_entry_delete() and allows atomicity
1671 * when we want to replace some mappings with a new one.
1672 * It also allows us to restore the old VM mappings if the
1673 * new mapping fails.
1674 */
1675 zap_old_map = vm_map_create(PMAP_NULL,
1676 *address,
1677 *address + size,
1678 map->hdr.entries_pageable);
1679 }
1680
1681 StartAgain: ;
1682
1683 start = *address;
1684
1685 if (anywhere) {
1686 vm_map_lock(map);
1687 map_locked = TRUE;
1688
1689 if (entry_for_jit) {
1690 if (map->jit_entry_exists) {
1691 result = KERN_INVALID_ARGUMENT;
1692 goto BailOut;
1693 }
1694 /*
1695 * Get a random start address.
1696 */
1697 result = vm_map_random_address_for_size(map, address, size);
1698 if (result != KERN_SUCCESS) {
1699 goto BailOut;
1700 }
1701 start = *address;
1702 }
1703
1704
1705 /*
1706 * Calculate the first possible address.
1707 */
1708
1709 if (start < effective_min_offset)
1710 start = effective_min_offset;
1711 if (start > effective_max_offset)
1712 RETURN(KERN_NO_SPACE);
1713
1714 /*
1715 * Look for the first possible address;
1716 * if there's already something at this
1717 * address, we have to start after it.
1718 */
1719
1720 if( map->disable_vmentry_reuse == TRUE) {
1721 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1722 } else {
1723 assert(first_free_is_valid(map));
1724
1725 entry = map->first_free;
1726
1727 if (entry == vm_map_to_entry(map)) {
1728 entry = NULL;
1729 } else {
1730 if (entry->vme_next == vm_map_to_entry(map)){
1731 /*
1732 * Hole at the end of the map.
1733 */
1734 entry = NULL;
1735 } else {
1736 if (start < (entry->vme_next)->vme_start ) {
1737 start = entry->vme_end;
1738 } else {
1739 /*
1740 * Need to do a lookup.
1741 */
1742 entry = NULL;
1743 }
1744 }
1745 }
1746
1747 if (entry == NULL) {
1748 vm_map_entry_t tmp_entry;
1749 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1750 assert(!entry_for_jit);
1751 start = tmp_entry->vme_end;
1752 }
1753 entry = tmp_entry;
1754 }
1755 }
1756
1757 /*
1758 * In any case, the "entry" always precedes
1759 * the proposed new region throughout the
1760 * loop:
1761 */
1762
1763 while (TRUE) {
1764 register vm_map_entry_t next;
1765
1766 /*
1767 * Find the end of the proposed new region.
1768 * Be sure we didn't go beyond the end, or
1769 * wrap around the address.
1770 */
1771
1772 end = ((start + mask) & ~mask);
1773 if (end < start)
1774 RETURN(KERN_NO_SPACE);
1775 start = end;
1776 end += size;
1777
1778 if ((end > effective_max_offset) || (end < start)) {
1779 if (map->wait_for_space) {
1780 if (size <= (effective_max_offset -
1781 effective_min_offset)) {
1782 assert_wait((event_t)map,
1783 THREAD_ABORTSAFE);
1784 vm_map_unlock(map);
1785 map_locked = FALSE;
1786 thread_block(THREAD_CONTINUE_NULL);
1787 goto StartAgain;
1788 }
1789 }
1790 RETURN(KERN_NO_SPACE);
1791 }
1792
1793 /*
1794 * If there are no more entries, we must win.
1795 */
1796
1797 next = entry->vme_next;
1798 if (next == vm_map_to_entry(map))
1799 break;
1800
1801 /*
1802 * If there is another entry, it must be
1803 * after the end of the potential new region.
1804 */
1805
1806 if (next->vme_start >= end)
1807 break;
1808
1809 /*
1810 * Didn't fit -- move to the next entry.
1811 */
1812
1813 entry = next;
1814 start = entry->vme_end;
1815 }
1816 *address = start;
1817 } else {
1818 /*
1819 * Verify that:
1820 * the address doesn't itself violate
1821 * the mask requirement.
1822 */
1823
1824 vm_map_lock(map);
1825 map_locked = TRUE;
1826 if ((start & mask) != 0)
1827 RETURN(KERN_NO_SPACE);
1828
1829 /*
1830 * ... the address is within bounds
1831 */
1832
1833 end = start + size;
1834
1835 if ((start < effective_min_offset) ||
1836 (end > effective_max_offset) ||
1837 (start >= end)) {
1838 RETURN(KERN_INVALID_ADDRESS);
1839 }
1840
1841 if (overwrite && zap_old_map != VM_MAP_NULL) {
1842 /*
1843 * Fixed mapping and "overwrite" flag: attempt to
1844 * remove all existing mappings in the specified
1845 * address range, saving them in our "zap_old_map".
1846 */
1847 (void) vm_map_delete(map, start, end,
1848 VM_MAP_REMOVE_SAVE_ENTRIES,
1849 zap_old_map);
1850 }
1851
1852 /*
1853 * ... the starting address isn't allocated
1854 */
1855
1856 if (vm_map_lookup_entry(map, start, &entry)) {
1857 if (! (flags & VM_FLAGS_ALREADY)) {
1858 RETURN(KERN_NO_SPACE);
1859 }
1860 /*
1861 * Check if what's already there is what we want.
1862 */
1863 tmp_start = start;
1864 tmp_offset = offset;
1865 if (entry->vme_start < start) {
1866 tmp_start -= start - entry->vme_start;
1867 tmp_offset -= start - entry->vme_start;
1868
1869 }
1870 for (; entry->vme_start < end;
1871 entry = entry->vme_next) {
1872 /*
1873 * Check if the mapping's attributes
1874 * match the existing map entry.
1875 */
1876 if (entry == vm_map_to_entry(map) ||
1877 entry->vme_start != tmp_start ||
1878 entry->is_sub_map != is_submap ||
1879 entry->offset != tmp_offset ||
1880 entry->needs_copy != needs_copy ||
1881 entry->protection != cur_protection ||
1882 entry->max_protection != max_protection ||
1883 entry->inheritance != inheritance ||
1884 entry->alias != alias) {
1885 /* not the same mapping ! */
1886 RETURN(KERN_NO_SPACE);
1887 }
1888 /*
1889 * Check if the same object is being mapped.
1890 */
1891 if (is_submap) {
1892 if (entry->object.sub_map !=
1893 (vm_map_t) object) {
1894 /* not the same submap */
1895 RETURN(KERN_NO_SPACE);
1896 }
1897 } else {
1898 if (entry->object.vm_object != object) {
1899 /* not the same VM object... */
1900 vm_object_t obj2;
1901
1902 obj2 = entry->object.vm_object;
1903 if ((obj2 == VM_OBJECT_NULL ||
1904 obj2->internal) &&
1905 (object == VM_OBJECT_NULL ||
1906 object->internal)) {
1907 /*
1908 * ... but both are
1909 * anonymous memory,
1910 * so equivalent.
1911 */
1912 } else {
1913 RETURN(KERN_NO_SPACE);
1914 }
1915 }
1916 }
1917
1918 tmp_offset += entry->vme_end - entry->vme_start;
1919 tmp_start += entry->vme_end - entry->vme_start;
1920 if (entry->vme_end >= end) {
1921 /* reached the end of our mapping */
1922 break;
1923 }
1924 }
1925 /* it all matches: let's use what's already there ! */
1926 RETURN(KERN_MEMORY_PRESENT);
1927 }
1928
1929 /*
1930 * ... the next region doesn't overlap the
1931 * end point.
1932 */
1933
1934 if ((entry->vme_next != vm_map_to_entry(map)) &&
1935 (entry->vme_next->vme_start < end))
1936 RETURN(KERN_NO_SPACE);
1937 }
1938
1939 /*
1940 * At this point,
1941 * "start" and "end" should define the endpoints of the
1942 * available new range, and
1943 * "entry" should refer to the region before the new
1944 * range, and
1945 *
1946 * the map should be locked.
1947 */
1948
1949 /*
1950 * See whether we can avoid creating a new entry (and object) by
1951 * extending one of our neighbors. [So far, we only attempt to
1952 * extend from below.] Note that we can never extend/join
1953 * purgable objects because they need to remain distinct
1954 * entities in order to implement their "volatile object"
1955 * semantics.
1956 */
1957
1958 if (purgable || entry_for_jit) {
1959 if (object == VM_OBJECT_NULL) {
1960 object = vm_object_allocate(size);
1961 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1962 if (purgable) {
1963 object->purgable = VM_PURGABLE_NONVOLATILE;
1964 }
1965 offset = (vm_object_offset_t)0;
1966 }
1967 } else if ((is_submap == FALSE) &&
1968 (object == VM_OBJECT_NULL) &&
1969 (entry != vm_map_to_entry(map)) &&
1970 (entry->vme_end == start) &&
1971 (!entry->is_shared) &&
1972 (!entry->is_sub_map) &&
1973 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1974 (entry->inheritance == inheritance) &&
1975 (entry->protection == cur_protection) &&
1976 (entry->max_protection == max_protection) &&
1977 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1978 (entry->in_transition == 0) &&
1979 (entry->no_cache == no_cache) &&
1980 ((entry->vme_end - entry->vme_start) + size <=
1981 (alias == VM_MEMORY_REALLOC ?
1982 ANON_CHUNK_SIZE :
1983 NO_COALESCE_LIMIT)) &&
1984 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1985 if (vm_object_coalesce(entry->object.vm_object,
1986 VM_OBJECT_NULL,
1987 entry->offset,
1988 (vm_object_offset_t) 0,
1989 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1990 (vm_map_size_t)(end - entry->vme_end))) {
1991
1992 /*
1993 * Coalesced the two objects - can extend
1994 * the previous map entry to include the
1995 * new range.
1996 */
1997 map->size += (end - entry->vme_end);
1998 assert(entry->vme_start < end);
1999 entry->vme_end = end;
2000 vm_map_store_update_first_free(map, map->first_free);
2001 RETURN(KERN_SUCCESS);
2002 }
2003 }
2004
2005 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2006 new_entry = NULL;
2007
2008 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2009 tmp2_end = tmp2_start + step;
2010 /*
2011 * Create a new entry
2012 * LP64todo - for now, we can only allocate 4GB internal objects
2013 * because the default pager can't page bigger ones. Remove this
2014 * when it can.
2015 *
2016 * XXX FBDP
2017 * The reserved "page zero" in each process's address space can
2018 * be arbitrarily large. Splitting it into separate 4GB objects and
2019 * therefore different VM map entries serves no purpose and just
2020 * slows down operations on the VM map, so let's not split the
2021 * allocation into 4GB chunks if the max protection is NONE. That
2022 * memory should never be accessible, so it will never get to the
2023 * default pager.
2024 */
2025 tmp_start = tmp2_start;
2026 if (object == VM_OBJECT_NULL &&
2027 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2028 max_protection != VM_PROT_NONE &&
2029 superpage_size == 0)
2030 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2031 else
2032 tmp_end = tmp2_end;
2033 do {
2034 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2035 object, offset, needs_copy,
2036 FALSE, FALSE,
2037 cur_protection, max_protection,
2038 VM_BEHAVIOR_DEFAULT,
2039 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2040 0, no_cache,
2041 permanent, superpage_size);
2042 new_entry->alias = alias;
2043 if (entry_for_jit){
2044 if (!(map->jit_entry_exists)){
2045 new_entry->used_for_jit = TRUE;
2046 map->jit_entry_exists = TRUE;
2047 }
2048 }
2049
2050 if (is_submap) {
2051 vm_map_t submap;
2052 boolean_t submap_is_64bit;
2053 boolean_t use_pmap;
2054
2055 new_entry->is_sub_map = TRUE;
2056 submap = (vm_map_t) object;
2057 submap_is_64bit = vm_map_is_64bit(submap);
2058 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2059 #ifndef NO_NESTED_PMAP
2060 if (use_pmap && submap->pmap == NULL) {
2061 ledger_t ledger = map->pmap->ledger;
2062 /* we need a sub pmap to nest... */
2063 submap->pmap = pmap_create(ledger, 0,
2064 submap_is_64bit);
2065 if (submap->pmap == NULL) {
2066 /* let's proceed without nesting... */
2067 }
2068 }
2069 if (use_pmap && submap->pmap != NULL) {
2070 kr = pmap_nest(map->pmap,
2071 submap->pmap,
2072 tmp_start,
2073 tmp_start,
2074 tmp_end - tmp_start);
2075 if (kr != KERN_SUCCESS) {
2076 printf("vm_map_enter: "
2077 "pmap_nest(0x%llx,0x%llx) "
2078 "error 0x%x\n",
2079 (long long)tmp_start,
2080 (long long)tmp_end,
2081 kr);
2082 } else {
2083 /* we're now nested ! */
2084 new_entry->use_pmap = TRUE;
2085 pmap_empty = FALSE;
2086 }
2087 }
2088 #endif /* NO_NESTED_PMAP */
2089 }
2090 entry = new_entry;
2091
2092 if (superpage_size) {
2093 vm_page_t pages, m;
2094 vm_object_t sp_object;
2095
2096 entry->offset = 0;
2097
2098 /* allocate one superpage */
2099 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2100 if (kr != KERN_SUCCESS) {
2101 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2102 RETURN(kr);
2103 }
2104
2105 /* create one vm_object per superpage */
2106 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2107 sp_object->phys_contiguous = TRUE;
2108 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2109 entry->object.vm_object = sp_object;
2110
2111 /* enter the base pages into the object */
2112 vm_object_lock(sp_object);
2113 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2114 m = pages;
2115 pmap_zero_page(m->phys_page);
2116 pages = NEXT_PAGE(m);
2117 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2118 vm_page_insert(m, sp_object, offset);
2119 }
2120 vm_object_unlock(sp_object);
2121 }
2122 } while (tmp_end != tmp2_end &&
2123 (tmp_start = tmp_end) &&
2124 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2125 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2126 }
2127
2128 vm_map_unlock(map);
2129 map_locked = FALSE;
2130
2131 new_mapping_established = TRUE;
2132
2133 /* Wire down the new entry if the user
2134 * requested all new map entries be wired.
2135 */
2136 if ((map->wiring_required)||(superpage_size)) {
2137 pmap_empty = FALSE; /* pmap won't be empty */
2138 kr = vm_map_wire(map, start, end,
2139 new_entry->protection, TRUE);
2140 RETURN(kr);
2141 }
2142
2143 if ((object != VM_OBJECT_NULL) &&
2144 (vm_map_pmap_enter_enable) &&
2145 (!anywhere) &&
2146 (!needs_copy) &&
2147 (size < (128*1024))) {
2148 pmap_empty = FALSE; /* pmap won't be empty */
2149
2150 if (override_nx(map, alias) && cur_protection)
2151 cur_protection |= VM_PROT_EXECUTE;
2152
2153 vm_map_pmap_enter(map, start, end,
2154 object, offset, cur_protection);
2155 }
2156
2157 BailOut: ;
2158 if (result == KERN_SUCCESS) {
2159 vm_prot_t pager_prot;
2160 memory_object_t pager;
2161
2162 if (pmap_empty &&
2163 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2164 assert(vm_map_pmap_is_empty(map,
2165 *address,
2166 *address+size));
2167 }
2168
2169 /*
2170 * For "named" VM objects, let the pager know that the
2171 * memory object is being mapped. Some pagers need to keep
2172 * track of this, to know when they can reclaim the memory
2173 * object, for example.
2174 * VM calls memory_object_map() for each mapping (specifying
2175 * the protection of each mapping) and calls
2176 * memory_object_last_unmap() when all the mappings are gone.
2177 */
2178 pager_prot = max_protection;
2179 if (needs_copy) {
2180 /*
2181 * Copy-On-Write mapping: won't modify
2182 * the memory object.
2183 */
2184 pager_prot &= ~VM_PROT_WRITE;
2185 }
2186 if (!is_submap &&
2187 object != VM_OBJECT_NULL &&
2188 object->named &&
2189 object->pager != MEMORY_OBJECT_NULL) {
2190 vm_object_lock(object);
2191 pager = object->pager;
2192 if (object->named &&
2193 pager != MEMORY_OBJECT_NULL) {
2194 assert(object->pager_ready);
2195 vm_object_mapping_wait(object, THREAD_UNINT);
2196 vm_object_mapping_begin(object);
2197 vm_object_unlock(object);
2198
2199 kr = memory_object_map(pager, pager_prot);
2200 assert(kr == KERN_SUCCESS);
2201
2202 vm_object_lock(object);
2203 vm_object_mapping_end(object);
2204 }
2205 vm_object_unlock(object);
2206 }
2207 } else {
2208 if (new_mapping_established) {
2209 /*
2210 * We have to get rid of the new mappings since we
2211 * won't make them available to the user.
2212 * Try and do that atomically, to minimize the risk
2213 * that someone else create new mappings that range.
2214 */
2215 zap_new_map = vm_map_create(PMAP_NULL,
2216 *address,
2217 *address + size,
2218 map->hdr.entries_pageable);
2219 if (!map_locked) {
2220 vm_map_lock(map);
2221 map_locked = TRUE;
2222 }
2223 (void) vm_map_delete(map, *address, *address+size,
2224 VM_MAP_REMOVE_SAVE_ENTRIES,
2225 zap_new_map);
2226 }
2227 if (zap_old_map != VM_MAP_NULL &&
2228 zap_old_map->hdr.nentries != 0) {
2229 vm_map_entry_t entry1, entry2;
2230
2231 /*
2232 * The new mapping failed. Attempt to restore
2233 * the old mappings, saved in the "zap_old_map".
2234 */
2235 if (!map_locked) {
2236 vm_map_lock(map);
2237 map_locked = TRUE;
2238 }
2239
2240 /* first check if the coast is still clear */
2241 start = vm_map_first_entry(zap_old_map)->vme_start;
2242 end = vm_map_last_entry(zap_old_map)->vme_end;
2243 if (vm_map_lookup_entry(map, start, &entry1) ||
2244 vm_map_lookup_entry(map, end, &entry2) ||
2245 entry1 != entry2) {
2246 /*
2247 * Part of that range has already been
2248 * re-mapped: we can't restore the old
2249 * mappings...
2250 */
2251 vm_map_enter_restore_failures++;
2252 } else {
2253 /*
2254 * Transfer the saved map entries from
2255 * "zap_old_map" to the original "map",
2256 * inserting them all after "entry1".
2257 */
2258 for (entry2 = vm_map_first_entry(zap_old_map);
2259 entry2 != vm_map_to_entry(zap_old_map);
2260 entry2 = vm_map_first_entry(zap_old_map)) {
2261 vm_map_size_t entry_size;
2262
2263 entry_size = (entry2->vme_end -
2264 entry2->vme_start);
2265 vm_map_store_entry_unlink(zap_old_map,
2266 entry2);
2267 zap_old_map->size -= entry_size;
2268 vm_map_store_entry_link(map, entry1, entry2);
2269 map->size += entry_size;
2270 entry1 = entry2;
2271 }
2272 if (map->wiring_required) {
2273 /*
2274 * XXX TODO: we should rewire the
2275 * old pages here...
2276 */
2277 }
2278 vm_map_enter_restore_successes++;
2279 }
2280 }
2281 }
2282
2283 if (map_locked) {
2284 vm_map_unlock(map);
2285 }
2286
2287 /*
2288 * Get rid of the "zap_maps" and all the map entries that
2289 * they may still contain.
2290 */
2291 if (zap_old_map != VM_MAP_NULL) {
2292 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2293 zap_old_map = VM_MAP_NULL;
2294 }
2295 if (zap_new_map != VM_MAP_NULL) {
2296 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2297 zap_new_map = VM_MAP_NULL;
2298 }
2299
2300 return result;
2301
2302 #undef RETURN
2303 }
2304
2305 kern_return_t
2306 vm_map_enter_mem_object(
2307 vm_map_t target_map,
2308 vm_map_offset_t *address,
2309 vm_map_size_t initial_size,
2310 vm_map_offset_t mask,
2311 int flags,
2312 ipc_port_t port,
2313 vm_object_offset_t offset,
2314 boolean_t copy,
2315 vm_prot_t cur_protection,
2316 vm_prot_t max_protection,
2317 vm_inherit_t inheritance)
2318 {
2319 vm_map_address_t map_addr;
2320 vm_map_size_t map_size;
2321 vm_object_t object;
2322 vm_object_size_t size;
2323 kern_return_t result;
2324 boolean_t mask_cur_protection, mask_max_protection;
2325
2326 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2327 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2328 cur_protection &= ~VM_PROT_IS_MASK;
2329 max_protection &= ~VM_PROT_IS_MASK;
2330
2331 /*
2332 * Check arguments for validity
2333 */
2334 if ((target_map == VM_MAP_NULL) ||
2335 (cur_protection & ~VM_PROT_ALL) ||
2336 (max_protection & ~VM_PROT_ALL) ||
2337 (inheritance > VM_INHERIT_LAST_VALID) ||
2338 initial_size == 0)
2339 return KERN_INVALID_ARGUMENT;
2340
2341 map_addr = vm_map_trunc_page(*address);
2342 map_size = vm_map_round_page(initial_size);
2343 size = vm_object_round_page(initial_size);
2344
2345 /*
2346 * Find the vm object (if any) corresponding to this port.
2347 */
2348 if (!IP_VALID(port)) {
2349 object = VM_OBJECT_NULL;
2350 offset = 0;
2351 copy = FALSE;
2352 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2353 vm_named_entry_t named_entry;
2354
2355 named_entry = (vm_named_entry_t) port->ip_kobject;
2356 /* a few checks to make sure user is obeying rules */
2357 if (size == 0) {
2358 if (offset >= named_entry->size)
2359 return KERN_INVALID_RIGHT;
2360 size = named_entry->size - offset;
2361 }
2362 if (mask_max_protection) {
2363 max_protection &= named_entry->protection;
2364 }
2365 if (mask_cur_protection) {
2366 cur_protection &= named_entry->protection;
2367 }
2368 if ((named_entry->protection & max_protection) !=
2369 max_protection)
2370 return KERN_INVALID_RIGHT;
2371 if ((named_entry->protection & cur_protection) !=
2372 cur_protection)
2373 return KERN_INVALID_RIGHT;
2374 if (named_entry->size < (offset + size))
2375 return KERN_INVALID_ARGUMENT;
2376
2377 /* the callers parameter offset is defined to be the */
2378 /* offset from beginning of named entry offset in object */
2379 offset = offset + named_entry->offset;
2380
2381 named_entry_lock(named_entry);
2382 if (named_entry->is_sub_map) {
2383 vm_map_t submap;
2384
2385 submap = named_entry->backing.map;
2386 vm_map_lock(submap);
2387 vm_map_reference(submap);
2388 vm_map_unlock(submap);
2389 named_entry_unlock(named_entry);
2390
2391 result = vm_map_enter(target_map,
2392 &map_addr,
2393 map_size,
2394 mask,
2395 flags | VM_FLAGS_SUBMAP,
2396 (vm_object_t) submap,
2397 offset,
2398 copy,
2399 cur_protection,
2400 max_protection,
2401 inheritance);
2402 if (result != KERN_SUCCESS) {
2403 vm_map_deallocate(submap);
2404 } else {
2405 /*
2406 * No need to lock "submap" just to check its
2407 * "mapped" flag: that flag is never reset
2408 * once it's been set and if we race, we'll
2409 * just end up setting it twice, which is OK.
2410 */
2411 if (submap->mapped_in_other_pmaps == FALSE &&
2412 vm_map_pmap(submap) != PMAP_NULL &&
2413 vm_map_pmap(submap) !=
2414 vm_map_pmap(target_map)) {
2415 /*
2416 * This submap is being mapped in a map
2417 * that uses a different pmap.
2418 * Set its "mapped_in_other_pmaps" flag
2419 * to indicate that we now need to
2420 * remove mappings from all pmaps rather
2421 * than just the submap's pmap.
2422 */
2423 vm_map_lock(submap);
2424 submap->mapped_in_other_pmaps = TRUE;
2425 vm_map_unlock(submap);
2426 }
2427 *address = map_addr;
2428 }
2429 return result;
2430
2431 } else if (named_entry->is_pager) {
2432 unsigned int access;
2433 vm_prot_t protections;
2434 unsigned int wimg_mode;
2435
2436 protections = named_entry->protection & VM_PROT_ALL;
2437 access = GET_MAP_MEM(named_entry->protection);
2438
2439 object = vm_object_enter(named_entry->backing.pager,
2440 named_entry->size,
2441 named_entry->internal,
2442 FALSE,
2443 FALSE);
2444 if (object == VM_OBJECT_NULL) {
2445 named_entry_unlock(named_entry);
2446 return KERN_INVALID_OBJECT;
2447 }
2448
2449 /* JMM - drop reference on pager here */
2450
2451 /* create an extra ref for the named entry */
2452 vm_object_lock(object);
2453 vm_object_reference_locked(object);
2454 named_entry->backing.object = object;
2455 named_entry->is_pager = FALSE;
2456 named_entry_unlock(named_entry);
2457
2458 wimg_mode = object->wimg_bits;
2459
2460 if (access == MAP_MEM_IO) {
2461 wimg_mode = VM_WIMG_IO;
2462 } else if (access == MAP_MEM_COPYBACK) {
2463 wimg_mode = VM_WIMG_USE_DEFAULT;
2464 } else if (access == MAP_MEM_INNERWBACK) {
2465 wimg_mode = VM_WIMG_INNERWBACK;
2466 } else if (access == MAP_MEM_WTHRU) {
2467 wimg_mode = VM_WIMG_WTHRU;
2468 } else if (access == MAP_MEM_WCOMB) {
2469 wimg_mode = VM_WIMG_WCOMB;
2470 }
2471
2472 /* wait for object (if any) to be ready */
2473 if (!named_entry->internal) {
2474 while (!object->pager_ready) {
2475 vm_object_wait(
2476 object,
2477 VM_OBJECT_EVENT_PAGER_READY,
2478 THREAD_UNINT);
2479 vm_object_lock(object);
2480 }
2481 }
2482
2483 if (object->wimg_bits != wimg_mode)
2484 vm_object_change_wimg_mode(object, wimg_mode);
2485
2486 object->true_share = TRUE;
2487
2488 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2489 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2490 vm_object_unlock(object);
2491 } else {
2492 /* This is the case where we are going to map */
2493 /* an already mapped object. If the object is */
2494 /* not ready it is internal. An external */
2495 /* object cannot be mapped until it is ready */
2496 /* we can therefore avoid the ready check */
2497 /* in this case. */
2498 object = named_entry->backing.object;
2499 assert(object != VM_OBJECT_NULL);
2500 named_entry_unlock(named_entry);
2501 vm_object_reference(object);
2502 }
2503 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2504 /*
2505 * JMM - This is temporary until we unify named entries
2506 * and raw memory objects.
2507 *
2508 * Detected fake ip_kotype for a memory object. In
2509 * this case, the port isn't really a port at all, but
2510 * instead is just a raw memory object.
2511 */
2512
2513 object = vm_object_enter((memory_object_t)port,
2514 size, FALSE, FALSE, FALSE);
2515 if (object == VM_OBJECT_NULL)
2516 return KERN_INVALID_OBJECT;
2517
2518 /* wait for object (if any) to be ready */
2519 if (object != VM_OBJECT_NULL) {
2520 if (object == kernel_object) {
2521 printf("Warning: Attempt to map kernel object"
2522 " by a non-private kernel entity\n");
2523 return KERN_INVALID_OBJECT;
2524 }
2525 if (!object->pager_ready) {
2526 vm_object_lock(object);
2527
2528 while (!object->pager_ready) {
2529 vm_object_wait(object,
2530 VM_OBJECT_EVENT_PAGER_READY,
2531 THREAD_UNINT);
2532 vm_object_lock(object);
2533 }
2534 vm_object_unlock(object);
2535 }
2536 }
2537 } else {
2538 return KERN_INVALID_OBJECT;
2539 }
2540
2541 if (object != VM_OBJECT_NULL &&
2542 object->named &&
2543 object->pager != MEMORY_OBJECT_NULL &&
2544 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2545 memory_object_t pager;
2546 vm_prot_t pager_prot;
2547 kern_return_t kr;
2548
2549 /*
2550 * For "named" VM objects, let the pager know that the
2551 * memory object is being mapped. Some pagers need to keep
2552 * track of this, to know when they can reclaim the memory
2553 * object, for example.
2554 * VM calls memory_object_map() for each mapping (specifying
2555 * the protection of each mapping) and calls
2556 * memory_object_last_unmap() when all the mappings are gone.
2557 */
2558 pager_prot = max_protection;
2559 if (copy) {
2560 /*
2561 * Copy-On-Write mapping: won't modify the
2562 * memory object.
2563 */
2564 pager_prot &= ~VM_PROT_WRITE;
2565 }
2566 vm_object_lock(object);
2567 pager = object->pager;
2568 if (object->named &&
2569 pager != MEMORY_OBJECT_NULL &&
2570 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2571 assert(object->pager_ready);
2572 vm_object_mapping_wait(object, THREAD_UNINT);
2573 vm_object_mapping_begin(object);
2574 vm_object_unlock(object);
2575
2576 kr = memory_object_map(pager, pager_prot);
2577 assert(kr == KERN_SUCCESS);
2578
2579 vm_object_lock(object);
2580 vm_object_mapping_end(object);
2581 }
2582 vm_object_unlock(object);
2583 }
2584
2585 /*
2586 * Perform the copy if requested
2587 */
2588
2589 if (copy) {
2590 vm_object_t new_object;
2591 vm_object_offset_t new_offset;
2592
2593 result = vm_object_copy_strategically(object, offset, size,
2594 &new_object, &new_offset,
2595 &copy);
2596
2597
2598 if (result == KERN_MEMORY_RESTART_COPY) {
2599 boolean_t success;
2600 boolean_t src_needs_copy;
2601
2602 /*
2603 * XXX
2604 * We currently ignore src_needs_copy.
2605 * This really is the issue of how to make
2606 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2607 * non-kernel users to use. Solution forthcoming.
2608 * In the meantime, since we don't allow non-kernel
2609 * memory managers to specify symmetric copy,
2610 * we won't run into problems here.
2611 */
2612 new_object = object;
2613 new_offset = offset;
2614 success = vm_object_copy_quickly(&new_object,
2615 new_offset, size,
2616 &src_needs_copy,
2617 &copy);
2618 assert(success);
2619 result = KERN_SUCCESS;
2620 }
2621 /*
2622 * Throw away the reference to the
2623 * original object, as it won't be mapped.
2624 */
2625
2626 vm_object_deallocate(object);
2627
2628 if (result != KERN_SUCCESS)
2629 return result;
2630
2631 object = new_object;
2632 offset = new_offset;
2633 }
2634
2635 result = vm_map_enter(target_map,
2636 &map_addr, map_size,
2637 (vm_map_offset_t)mask,
2638 flags,
2639 object, offset,
2640 copy,
2641 cur_protection, max_protection, inheritance);
2642 if (result != KERN_SUCCESS)
2643 vm_object_deallocate(object);
2644 *address = map_addr;
2645 return result;
2646 }
2647
2648
2649
2650
2651 kern_return_t
2652 vm_map_enter_mem_object_control(
2653 vm_map_t target_map,
2654 vm_map_offset_t *address,
2655 vm_map_size_t initial_size,
2656 vm_map_offset_t mask,
2657 int flags,
2658 memory_object_control_t control,
2659 vm_object_offset_t offset,
2660 boolean_t copy,
2661 vm_prot_t cur_protection,
2662 vm_prot_t max_protection,
2663 vm_inherit_t inheritance)
2664 {
2665 vm_map_address_t map_addr;
2666 vm_map_size_t map_size;
2667 vm_object_t object;
2668 vm_object_size_t size;
2669 kern_return_t result;
2670 memory_object_t pager;
2671 vm_prot_t pager_prot;
2672 kern_return_t kr;
2673
2674 /*
2675 * Check arguments for validity
2676 */
2677 if ((target_map == VM_MAP_NULL) ||
2678 (cur_protection & ~VM_PROT_ALL) ||
2679 (max_protection & ~VM_PROT_ALL) ||
2680 (inheritance > VM_INHERIT_LAST_VALID) ||
2681 initial_size == 0)
2682 return KERN_INVALID_ARGUMENT;
2683
2684 map_addr = vm_map_trunc_page(*address);
2685 map_size = vm_map_round_page(initial_size);
2686 size = vm_object_round_page(initial_size);
2687
2688 object = memory_object_control_to_vm_object(control);
2689
2690 if (object == VM_OBJECT_NULL)
2691 return KERN_INVALID_OBJECT;
2692
2693 if (object == kernel_object) {
2694 printf("Warning: Attempt to map kernel object"
2695 " by a non-private kernel entity\n");
2696 return KERN_INVALID_OBJECT;
2697 }
2698
2699 vm_object_lock(object);
2700 object->ref_count++;
2701 vm_object_res_reference(object);
2702
2703 /*
2704 * For "named" VM objects, let the pager know that the
2705 * memory object is being mapped. Some pagers need to keep
2706 * track of this, to know when they can reclaim the memory
2707 * object, for example.
2708 * VM calls memory_object_map() for each mapping (specifying
2709 * the protection of each mapping) and calls
2710 * memory_object_last_unmap() when all the mappings are gone.
2711 */
2712 pager_prot = max_protection;
2713 if (copy) {
2714 pager_prot &= ~VM_PROT_WRITE;
2715 }
2716 pager = object->pager;
2717 if (object->named &&
2718 pager != MEMORY_OBJECT_NULL &&
2719 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2720 assert(object->pager_ready);
2721 vm_object_mapping_wait(object, THREAD_UNINT);
2722 vm_object_mapping_begin(object);
2723 vm_object_unlock(object);
2724
2725 kr = memory_object_map(pager, pager_prot);
2726 assert(kr == KERN_SUCCESS);
2727
2728 vm_object_lock(object);
2729 vm_object_mapping_end(object);
2730 }
2731 vm_object_unlock(object);
2732
2733 /*
2734 * Perform the copy if requested
2735 */
2736
2737 if (copy) {
2738 vm_object_t new_object;
2739 vm_object_offset_t new_offset;
2740
2741 result = vm_object_copy_strategically(object, offset, size,
2742 &new_object, &new_offset,
2743 &copy);
2744
2745
2746 if (result == KERN_MEMORY_RESTART_COPY) {
2747 boolean_t success;
2748 boolean_t src_needs_copy;
2749
2750 /*
2751 * XXX
2752 * We currently ignore src_needs_copy.
2753 * This really is the issue of how to make
2754 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2755 * non-kernel users to use. Solution forthcoming.
2756 * In the meantime, since we don't allow non-kernel
2757 * memory managers to specify symmetric copy,
2758 * we won't run into problems here.
2759 */
2760 new_object = object;
2761 new_offset = offset;
2762 success = vm_object_copy_quickly(&new_object,
2763 new_offset, size,
2764 &src_needs_copy,
2765 &copy);
2766 assert(success);
2767 result = KERN_SUCCESS;
2768 }
2769 /*
2770 * Throw away the reference to the
2771 * original object, as it won't be mapped.
2772 */
2773
2774 vm_object_deallocate(object);
2775
2776 if (result != KERN_SUCCESS)
2777 return result;
2778
2779 object = new_object;
2780 offset = new_offset;
2781 }
2782
2783 result = vm_map_enter(target_map,
2784 &map_addr, map_size,
2785 (vm_map_offset_t)mask,
2786 flags,
2787 object, offset,
2788 copy,
2789 cur_protection, max_protection, inheritance);
2790 if (result != KERN_SUCCESS)
2791 vm_object_deallocate(object);
2792 *address = map_addr;
2793
2794 return result;
2795 }
2796
2797
2798 #if VM_CPM
2799
2800 #ifdef MACH_ASSERT
2801 extern pmap_paddr_t avail_start, avail_end;
2802 #endif
2803
2804 /*
2805 * Allocate memory in the specified map, with the caveat that
2806 * the memory is physically contiguous. This call may fail
2807 * if the system can't find sufficient contiguous memory.
2808 * This call may cause or lead to heart-stopping amounts of
2809 * paging activity.
2810 *
2811 * Memory obtained from this call should be freed in the
2812 * normal way, viz., via vm_deallocate.
2813 */
2814 kern_return_t
2815 vm_map_enter_cpm(
2816 vm_map_t map,
2817 vm_map_offset_t *addr,
2818 vm_map_size_t size,
2819 int flags)
2820 {
2821 vm_object_t cpm_obj;
2822 pmap_t pmap;
2823 vm_page_t m, pages;
2824 kern_return_t kr;
2825 vm_map_offset_t va, start, end, offset;
2826 #if MACH_ASSERT
2827 vm_map_offset_t prev_addr = 0;
2828 #endif /* MACH_ASSERT */
2829
2830 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2831
2832 if (size == 0) {
2833 *addr = 0;
2834 return KERN_SUCCESS;
2835 }
2836 if (anywhere)
2837 *addr = vm_map_min(map);
2838 else
2839 *addr = vm_map_trunc_page(*addr);
2840 size = vm_map_round_page(size);
2841
2842 /*
2843 * LP64todo - cpm_allocate should probably allow
2844 * allocations of >4GB, but not with the current
2845 * algorithm, so just cast down the size for now.
2846 */
2847 if (size > VM_MAX_ADDRESS)
2848 return KERN_RESOURCE_SHORTAGE;
2849 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2850 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2851 return kr;
2852
2853 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2854 assert(cpm_obj != VM_OBJECT_NULL);
2855 assert(cpm_obj->internal);
2856 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2857 assert(cpm_obj->can_persist == FALSE);
2858 assert(cpm_obj->pager_created == FALSE);
2859 assert(cpm_obj->pageout == FALSE);
2860 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2861
2862 /*
2863 * Insert pages into object.
2864 */
2865
2866 vm_object_lock(cpm_obj);
2867 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2868 m = pages;
2869 pages = NEXT_PAGE(m);
2870 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2871
2872 assert(!m->gobbled);
2873 assert(!m->wanted);
2874 assert(!m->pageout);
2875 assert(!m->tabled);
2876 assert(VM_PAGE_WIRED(m));
2877 /*
2878 * ENCRYPTED SWAP:
2879 * "m" is not supposed to be pageable, so it
2880 * should not be encrypted. It wouldn't be safe
2881 * to enter it in a new VM object while encrypted.
2882 */
2883 ASSERT_PAGE_DECRYPTED(m);
2884 assert(m->busy);
2885 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2886
2887 m->busy = FALSE;
2888 vm_page_insert(m, cpm_obj, offset);
2889 }
2890 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2891 vm_object_unlock(cpm_obj);
2892
2893 /*
2894 * Hang onto a reference on the object in case a
2895 * multi-threaded application for some reason decides
2896 * to deallocate the portion of the address space into
2897 * which we will insert this object.
2898 *
2899 * Unfortunately, we must insert the object now before
2900 * we can talk to the pmap module about which addresses
2901 * must be wired down. Hence, the race with a multi-
2902 * threaded app.
2903 */
2904 vm_object_reference(cpm_obj);
2905
2906 /*
2907 * Insert object into map.
2908 */
2909
2910 kr = vm_map_enter(
2911 map,
2912 addr,
2913 size,
2914 (vm_map_offset_t)0,
2915 flags,
2916 cpm_obj,
2917 (vm_object_offset_t)0,
2918 FALSE,
2919 VM_PROT_ALL,
2920 VM_PROT_ALL,
2921 VM_INHERIT_DEFAULT);
2922
2923 if (kr != KERN_SUCCESS) {
2924 /*
2925 * A CPM object doesn't have can_persist set,
2926 * so all we have to do is deallocate it to
2927 * free up these pages.
2928 */
2929 assert(cpm_obj->pager_created == FALSE);
2930 assert(cpm_obj->can_persist == FALSE);
2931 assert(cpm_obj->pageout == FALSE);
2932 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2933 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2934 vm_object_deallocate(cpm_obj); /* kill creation ref */
2935 }
2936
2937 /*
2938 * Inform the physical mapping system that the
2939 * range of addresses may not fault, so that
2940 * page tables and such can be locked down as well.
2941 */
2942 start = *addr;
2943 end = start + size;
2944 pmap = vm_map_pmap(map);
2945 pmap_pageable(pmap, start, end, FALSE);
2946
2947 /*
2948 * Enter each page into the pmap, to avoid faults.
2949 * Note that this loop could be coded more efficiently,
2950 * if the need arose, rather than looking up each page
2951 * again.
2952 */
2953 for (offset = 0, va = start; offset < size;
2954 va += PAGE_SIZE, offset += PAGE_SIZE) {
2955 int type_of_fault;
2956
2957 vm_object_lock(cpm_obj);
2958 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2959 assert(m != VM_PAGE_NULL);
2960
2961 vm_page_zero_fill(m);
2962
2963 type_of_fault = DBG_ZERO_FILL_FAULT;
2964
2965 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2966 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2967 &type_of_fault);
2968
2969 vm_object_unlock(cpm_obj);
2970 }
2971
2972 #if MACH_ASSERT
2973 /*
2974 * Verify ordering in address space.
2975 */
2976 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2977 vm_object_lock(cpm_obj);
2978 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2979 vm_object_unlock(cpm_obj);
2980 if (m == VM_PAGE_NULL)
2981 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
2982 cpm_obj, (uint64_t)offset);
2983 assert(m->tabled);
2984 assert(!m->busy);
2985 assert(!m->wanted);
2986 assert(!m->fictitious);
2987 assert(!m->private);
2988 assert(!m->absent);
2989 assert(!m->error);
2990 assert(!m->cleaning);
2991 assert(!m->laundry);
2992 assert(!m->precious);
2993 assert(!m->clustered);
2994 if (offset != 0) {
2995 if (m->phys_page != prev_addr + 1) {
2996 printf("start 0x%llx end 0x%llx va 0x%llx\n",
2997 (uint64_t)start, (uint64_t)end, (uint64_t)va);
2998 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
2999 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3000 panic("vm_allocate_cpm: pages not contig!");
3001 }
3002 }
3003 prev_addr = m->phys_page;
3004 }
3005 #endif /* MACH_ASSERT */
3006
3007 vm_object_deallocate(cpm_obj); /* kill extra ref */
3008
3009 return kr;
3010 }
3011
3012
3013 #else /* VM_CPM */
3014
3015 /*
3016 * Interface is defined in all cases, but unless the kernel
3017 * is built explicitly for this option, the interface does
3018 * nothing.
3019 */
3020
3021 kern_return_t
3022 vm_map_enter_cpm(
3023 __unused vm_map_t map,
3024 __unused vm_map_offset_t *addr,
3025 __unused vm_map_size_t size,
3026 __unused int flags)
3027 {
3028 return KERN_FAILURE;
3029 }
3030 #endif /* VM_CPM */
3031
3032 /* Not used without nested pmaps */
3033 #ifndef NO_NESTED_PMAP
3034 /*
3035 * Clip and unnest a portion of a nested submap mapping.
3036 */
3037
3038
3039 static void
3040 vm_map_clip_unnest(
3041 vm_map_t map,
3042 vm_map_entry_t entry,
3043 vm_map_offset_t start_unnest,
3044 vm_map_offset_t end_unnest)
3045 {
3046 vm_map_offset_t old_start_unnest = start_unnest;
3047 vm_map_offset_t old_end_unnest = end_unnest;
3048
3049 assert(entry->is_sub_map);
3050 assert(entry->object.sub_map != NULL);
3051
3052 /*
3053 * Query the platform for the optimal unnest range.
3054 * DRK: There's some duplication of effort here, since
3055 * callers may have adjusted the range to some extent. This
3056 * routine was introduced to support 1GiB subtree nesting
3057 * for x86 platforms, which can also nest on 2MiB boundaries
3058 * depending on size/alignment.
3059 */
3060 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3061 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3062 }
3063
3064 if (entry->vme_start > start_unnest ||
3065 entry->vme_end < end_unnest) {
3066 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3067 "bad nested entry: start=0x%llx end=0x%llx\n",
3068 (long long)start_unnest, (long long)end_unnest,
3069 (long long)entry->vme_start, (long long)entry->vme_end);
3070 }
3071
3072 if (start_unnest > entry->vme_start) {
3073 _vm_map_clip_start(&map->hdr,
3074 entry,
3075 start_unnest);
3076 vm_map_store_update_first_free(map, map->first_free);
3077 }
3078 if (entry->vme_end > end_unnest) {
3079 _vm_map_clip_end(&map->hdr,
3080 entry,
3081 end_unnest);
3082 vm_map_store_update_first_free(map, map->first_free);
3083 }
3084
3085 pmap_unnest(map->pmap,
3086 entry->vme_start,
3087 entry->vme_end - entry->vme_start);
3088 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3089 /* clean up parent map/maps */
3090 vm_map_submap_pmap_clean(
3091 map, entry->vme_start,
3092 entry->vme_end,
3093 entry->object.sub_map,
3094 entry->offset);
3095 }
3096 entry->use_pmap = FALSE;
3097 if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3098 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3099 }
3100 }
3101 #endif /* NO_NESTED_PMAP */
3102
3103 /*
3104 * vm_map_clip_start: [ internal use only ]
3105 *
3106 * Asserts that the given entry begins at or after
3107 * the specified address; if necessary,
3108 * it splits the entry into two.
3109 */
3110 void
3111 vm_map_clip_start(
3112 vm_map_t map,
3113 vm_map_entry_t entry,
3114 vm_map_offset_t startaddr)
3115 {
3116 #ifndef NO_NESTED_PMAP
3117 if (entry->use_pmap &&
3118 startaddr >= entry->vme_start) {
3119 vm_map_offset_t start_unnest, end_unnest;
3120
3121 /*
3122 * Make sure "startaddr" is no longer in a nested range
3123 * before we clip. Unnest only the minimum range the platform
3124 * can handle.
3125 * vm_map_clip_unnest may perform additional adjustments to
3126 * the unnest range.
3127 */
3128 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3129 end_unnest = start_unnest + pmap_nesting_size_min;
3130 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3131 }
3132 #endif /* NO_NESTED_PMAP */
3133 if (startaddr > entry->vme_start) {
3134 if (entry->object.vm_object &&
3135 !entry->is_sub_map &&
3136 entry->object.vm_object->phys_contiguous) {
3137 pmap_remove(map->pmap,
3138 (addr64_t)(entry->vme_start),
3139 (addr64_t)(entry->vme_end));
3140 }
3141 _vm_map_clip_start(&map->hdr, entry, startaddr);
3142 vm_map_store_update_first_free(map, map->first_free);
3143 }
3144 }
3145
3146
3147 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3148 MACRO_BEGIN \
3149 if ((startaddr) > (entry)->vme_start) \
3150 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3151 MACRO_END
3152
3153 /*
3154 * This routine is called only when it is known that
3155 * the entry must be split.
3156 */
3157 static void
3158 _vm_map_clip_start(
3159 register struct vm_map_header *map_header,
3160 register vm_map_entry_t entry,
3161 register vm_map_offset_t start)
3162 {
3163 register vm_map_entry_t new_entry;
3164
3165 /*
3166 * Split off the front portion --
3167 * note that we must insert the new
3168 * entry BEFORE this one, so that
3169 * this entry has the specified starting
3170 * address.
3171 */
3172
3173 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3174 vm_map_entry_copy_full(new_entry, entry);
3175
3176 new_entry->vme_end = start;
3177 assert(new_entry->vme_start < new_entry->vme_end);
3178 entry->offset += (start - entry->vme_start);
3179 assert(start < entry->vme_end);
3180 entry->vme_start = start;
3181
3182 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3183
3184 if (entry->is_sub_map)
3185 vm_map_reference(new_entry->object.sub_map);
3186 else
3187 vm_object_reference(new_entry->object.vm_object);
3188 }
3189
3190
3191 /*
3192 * vm_map_clip_end: [ internal use only ]
3193 *
3194 * Asserts that the given entry ends at or before
3195 * the specified address; if necessary,
3196 * it splits the entry into two.
3197 */
3198 void
3199 vm_map_clip_end(
3200 vm_map_t map,
3201 vm_map_entry_t entry,
3202 vm_map_offset_t endaddr)
3203 {
3204 if (endaddr > entry->vme_end) {
3205 /*
3206 * Within the scope of this clipping, limit "endaddr" to
3207 * the end of this map entry...
3208 */
3209 endaddr = entry->vme_end;
3210 }
3211 #ifndef NO_NESTED_PMAP
3212 if (entry->use_pmap) {
3213 vm_map_offset_t start_unnest, end_unnest;
3214
3215 /*
3216 * Make sure the range between the start of this entry and
3217 * the new "endaddr" is no longer nested before we clip.
3218 * Unnest only the minimum range the platform can handle.
3219 * vm_map_clip_unnest may perform additional adjustments to
3220 * the unnest range.
3221 */
3222 start_unnest = entry->vme_start;
3223 end_unnest =
3224 (endaddr + pmap_nesting_size_min - 1) &
3225 ~(pmap_nesting_size_min - 1);
3226 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3227 }
3228 #endif /* NO_NESTED_PMAP */
3229 if (endaddr < entry->vme_end) {
3230 if (entry->object.vm_object &&
3231 !entry->is_sub_map &&
3232 entry->object.vm_object->phys_contiguous) {
3233 pmap_remove(map->pmap,
3234 (addr64_t)(entry->vme_start),
3235 (addr64_t)(entry->vme_end));
3236 }
3237 _vm_map_clip_end(&map->hdr, entry, endaddr);
3238 vm_map_store_update_first_free(map, map->first_free);
3239 }
3240 }
3241
3242
3243 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3244 MACRO_BEGIN \
3245 if ((endaddr) < (entry)->vme_end) \
3246 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3247 MACRO_END
3248
3249 /*
3250 * This routine is called only when it is known that
3251 * the entry must be split.
3252 */
3253 static void
3254 _vm_map_clip_end(
3255 register struct vm_map_header *map_header,
3256 register vm_map_entry_t entry,
3257 register vm_map_offset_t end)
3258 {
3259 register vm_map_entry_t new_entry;
3260
3261 /*
3262 * Create a new entry and insert it
3263 * AFTER the specified entry
3264 */
3265
3266 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3267 vm_map_entry_copy_full(new_entry, entry);
3268
3269 assert(entry->vme_start < end);
3270 new_entry->vme_start = entry->vme_end = end;
3271 new_entry->offset += (end - entry->vme_start);
3272 assert(new_entry->vme_start < new_entry->vme_end);
3273
3274 _vm_map_store_entry_link(map_header, entry, new_entry);
3275
3276 if (entry->is_sub_map)
3277 vm_map_reference(new_entry->object.sub_map);
3278 else
3279 vm_object_reference(new_entry->object.vm_object);
3280 }
3281
3282
3283 /*
3284 * VM_MAP_RANGE_CHECK: [ internal use only ]
3285 *
3286 * Asserts that the starting and ending region
3287 * addresses fall within the valid range of the map.
3288 */
3289 #define VM_MAP_RANGE_CHECK(map, start, end) \
3290 MACRO_BEGIN \
3291 if (start < vm_map_min(map)) \
3292 start = vm_map_min(map); \
3293 if (end > vm_map_max(map)) \
3294 end = vm_map_max(map); \
3295 if (start > end) \
3296 start = end; \
3297 MACRO_END
3298
3299 /*
3300 * vm_map_range_check: [ internal use only ]
3301 *
3302 * Check that the region defined by the specified start and
3303 * end addresses are wholly contained within a single map
3304 * entry or set of adjacent map entries of the spacified map,
3305 * i.e. the specified region contains no unmapped space.
3306 * If any or all of the region is unmapped, FALSE is returned.
3307 * Otherwise, TRUE is returned and if the output argument 'entry'
3308 * is not NULL it points to the map entry containing the start
3309 * of the region.
3310 *
3311 * The map is locked for reading on entry and is left locked.
3312 */
3313 static boolean_t
3314 vm_map_range_check(
3315 register vm_map_t map,
3316 register vm_map_offset_t start,
3317 register vm_map_offset_t end,
3318 vm_map_entry_t *entry)
3319 {
3320 vm_map_entry_t cur;
3321 register vm_map_offset_t prev;
3322
3323 /*
3324 * Basic sanity checks first
3325 */
3326 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3327 return (FALSE);
3328
3329 /*
3330 * Check first if the region starts within a valid
3331 * mapping for the map.
3332 */
3333 if (!vm_map_lookup_entry(map, start, &cur))
3334 return (FALSE);
3335
3336 /*
3337 * Optimize for the case that the region is contained
3338 * in a single map entry.
3339 */
3340 if (entry != (vm_map_entry_t *) NULL)
3341 *entry = cur;
3342 if (end <= cur->vme_end)
3343 return (TRUE);
3344
3345 /*
3346 * If the region is not wholly contained within a
3347 * single entry, walk the entries looking for holes.
3348 */
3349 prev = cur->vme_end;
3350 cur = cur->vme_next;
3351 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3352 if (end <= cur->vme_end)
3353 return (TRUE);
3354 prev = cur->vme_end;
3355 cur = cur->vme_next;
3356 }
3357 return (FALSE);
3358 }
3359
3360 /*
3361 * vm_map_submap: [ kernel use only ]
3362 *
3363 * Mark the given range as handled by a subordinate map.
3364 *
3365 * This range must have been created with vm_map_find using
3366 * the vm_submap_object, and no other operations may have been
3367 * performed on this range prior to calling vm_map_submap.
3368 *
3369 * Only a limited number of operations can be performed
3370 * within this rage after calling vm_map_submap:
3371 * vm_fault
3372 * [Don't try vm_map_copyin!]
3373 *
3374 * To remove a submapping, one must first remove the
3375 * range from the superior map, and then destroy the
3376 * submap (if desired). [Better yet, don't try it.]
3377 */
3378 kern_return_t
3379 vm_map_submap(
3380 vm_map_t map,
3381 vm_map_offset_t start,
3382 vm_map_offset_t end,
3383 vm_map_t submap,
3384 vm_map_offset_t offset,
3385 #ifdef NO_NESTED_PMAP
3386 __unused
3387 #endif /* NO_NESTED_PMAP */
3388 boolean_t use_pmap)
3389 {
3390 vm_map_entry_t entry;
3391 register kern_return_t result = KERN_INVALID_ARGUMENT;
3392 register vm_object_t object;
3393
3394 vm_map_lock(map);
3395
3396 if (! vm_map_lookup_entry(map, start, &entry)) {
3397 entry = entry->vme_next;
3398 }
3399
3400 if (entry == vm_map_to_entry(map) ||
3401 entry->is_sub_map) {
3402 vm_map_unlock(map);
3403 return KERN_INVALID_ARGUMENT;
3404 }
3405
3406 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3407 vm_map_clip_start(map, entry, start);
3408 vm_map_clip_end(map, entry, end);
3409
3410 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3411 (!entry->is_sub_map) &&
3412 ((object = entry->object.vm_object) == vm_submap_object) &&
3413 (object->resident_page_count == 0) &&
3414 (object->copy == VM_OBJECT_NULL) &&
3415 (object->shadow == VM_OBJECT_NULL) &&
3416 (!object->pager_created)) {
3417 entry->offset = (vm_object_offset_t)offset;
3418 entry->object.vm_object = VM_OBJECT_NULL;
3419 vm_object_deallocate(object);
3420 entry->is_sub_map = TRUE;
3421 entry->object.sub_map = submap;
3422 vm_map_reference(submap);
3423 if (submap->mapped_in_other_pmaps == FALSE &&
3424 vm_map_pmap(submap) != PMAP_NULL &&
3425 vm_map_pmap(submap) != vm_map_pmap(map)) {
3426 /*
3427 * This submap is being mapped in a map
3428 * that uses a different pmap.
3429 * Set its "mapped_in_other_pmaps" flag
3430 * to indicate that we now need to
3431 * remove mappings from all pmaps rather
3432 * than just the submap's pmap.
3433 */
3434 submap->mapped_in_other_pmaps = TRUE;
3435 }
3436
3437 #ifndef NO_NESTED_PMAP
3438 if (use_pmap) {
3439 /* nest if platform code will allow */
3440 if(submap->pmap == NULL) {
3441 ledger_t ledger = map->pmap->ledger;
3442 submap->pmap = pmap_create(ledger,
3443 (vm_map_size_t) 0, FALSE);
3444 if(submap->pmap == PMAP_NULL) {
3445 vm_map_unlock(map);
3446 return(KERN_NO_SPACE);
3447 }
3448 }
3449 result = pmap_nest(map->pmap,
3450 (entry->object.sub_map)->pmap,
3451 (addr64_t)start,
3452 (addr64_t)start,
3453 (uint64_t)(end - start));
3454 if(result)
3455 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3456 entry->use_pmap = TRUE;
3457 }
3458 #else /* NO_NESTED_PMAP */
3459 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3460 #endif /* NO_NESTED_PMAP */
3461 result = KERN_SUCCESS;
3462 }
3463 vm_map_unlock(map);
3464
3465 return(result);
3466 }
3467
3468 /*
3469 * vm_map_protect:
3470 *
3471 * Sets the protection of the specified address
3472 * region in the target map. If "set_max" is
3473 * specified, the maximum protection is to be set;
3474 * otherwise, only the current protection is affected.
3475 */
3476 kern_return_t
3477 vm_map_protect(
3478 register vm_map_t map,
3479 register vm_map_offset_t start,
3480 register vm_map_offset_t end,
3481 register vm_prot_t new_prot,
3482 register boolean_t set_max)
3483 {
3484 register vm_map_entry_t current;
3485 register vm_map_offset_t prev;
3486 vm_map_entry_t entry;
3487 vm_prot_t new_max;
3488
3489 XPR(XPR_VM_MAP,
3490 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3491 map, start, end, new_prot, set_max);
3492
3493 vm_map_lock(map);
3494
3495 /* LP64todo - remove this check when vm_map_commpage64()
3496 * no longer has to stuff in a map_entry for the commpage
3497 * above the map's max_offset.
3498 */
3499 if (start >= map->max_offset) {
3500 vm_map_unlock(map);
3501 return(KERN_INVALID_ADDRESS);
3502 }
3503
3504 while(1) {
3505 /*
3506 * Lookup the entry. If it doesn't start in a valid
3507 * entry, return an error.
3508 */
3509 if (! vm_map_lookup_entry(map, start, &entry)) {
3510 vm_map_unlock(map);
3511 return(KERN_INVALID_ADDRESS);
3512 }
3513
3514 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3515 start = SUPERPAGE_ROUND_DOWN(start);
3516 continue;
3517 }
3518 break;
3519 }
3520 if (entry->superpage_size)
3521 end = SUPERPAGE_ROUND_UP(end);
3522
3523 /*
3524 * Make a first pass to check for protection and address
3525 * violations.
3526 */
3527
3528 current = entry;
3529 prev = current->vme_start;
3530 while ((current != vm_map_to_entry(map)) &&
3531 (current->vme_start < end)) {
3532
3533 /*
3534 * If there is a hole, return an error.
3535 */
3536 if (current->vme_start != prev) {
3537 vm_map_unlock(map);
3538 return(KERN_INVALID_ADDRESS);
3539 }
3540
3541 new_max = current->max_protection;
3542 if(new_prot & VM_PROT_COPY) {
3543 new_max |= VM_PROT_WRITE;
3544 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3545 vm_map_unlock(map);
3546 return(KERN_PROTECTION_FAILURE);
3547 }
3548 } else {
3549 if ((new_prot & new_max) != new_prot) {
3550 vm_map_unlock(map);
3551 return(KERN_PROTECTION_FAILURE);
3552 }
3553 }
3554
3555 #if CONFIG_EMBEDDED
3556 if (new_prot & VM_PROT_WRITE) {
3557 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3558 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3559 new_prot &= ~VM_PROT_EXECUTE;
3560 }
3561 }
3562 #endif
3563
3564 prev = current->vme_end;
3565 current = current->vme_next;
3566 }
3567 if (end > prev) {
3568 vm_map_unlock(map);
3569 return(KERN_INVALID_ADDRESS);
3570 }
3571
3572 /*
3573 * Go back and fix up protections.
3574 * Clip to start here if the range starts within
3575 * the entry.
3576 */
3577
3578 current = entry;
3579 if (current != vm_map_to_entry(map)) {
3580 /* clip and unnest if necessary */
3581 vm_map_clip_start(map, current, start);
3582 }
3583
3584 while ((current != vm_map_to_entry(map)) &&
3585 (current->vme_start < end)) {
3586
3587 vm_prot_t old_prot;
3588
3589 vm_map_clip_end(map, current, end);
3590
3591 assert(!current->use_pmap); /* clipping did unnest if needed */
3592
3593 old_prot = current->protection;
3594
3595 if(new_prot & VM_PROT_COPY) {
3596 /* caller is asking specifically to copy the */
3597 /* mapped data, this implies that max protection */
3598 /* will include write. Caller must be prepared */
3599 /* for loss of shared memory communication in the */
3600 /* target area after taking this step */
3601
3602 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3603 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3604 current->offset = 0;
3605 }
3606 current->needs_copy = TRUE;
3607 current->max_protection |= VM_PROT_WRITE;
3608 }
3609
3610 if (set_max)
3611 current->protection =
3612 (current->max_protection =
3613 new_prot & ~VM_PROT_COPY) &
3614 old_prot;
3615 else
3616 current->protection = new_prot & ~VM_PROT_COPY;
3617
3618 /*
3619 * Update physical map if necessary.
3620 * If the request is to turn off write protection,
3621 * we won't do it for real (in pmap). This is because
3622 * it would cause copy-on-write to fail. We've already
3623 * set, the new protection in the map, so if a
3624 * write-protect fault occurred, it will be fixed up
3625 * properly, COW or not.
3626 */
3627 if (current->protection != old_prot) {
3628 /* Look one level in we support nested pmaps */
3629 /* from mapped submaps which are direct entries */
3630 /* in our map */
3631
3632 vm_prot_t prot;
3633
3634 prot = current->protection & ~VM_PROT_WRITE;
3635
3636 if (override_nx(map, current->alias) && prot)
3637 prot |= VM_PROT_EXECUTE;
3638
3639 if (current->is_sub_map && current->use_pmap) {
3640 pmap_protect(current->object.sub_map->pmap,
3641 current->vme_start,
3642 current->vme_end,
3643 prot);
3644 } else {
3645 pmap_protect(map->pmap,
3646 current->vme_start,
3647 current->vme_end,
3648 prot);
3649 }
3650 }
3651 current = current->vme_next;
3652 }
3653
3654 current = entry;
3655 while ((current != vm_map_to_entry(map)) &&
3656 (current->vme_start <= end)) {
3657 vm_map_simplify_entry(map, current);
3658 current = current->vme_next;
3659 }
3660
3661 vm_map_unlock(map);
3662 return(KERN_SUCCESS);
3663 }
3664
3665 /*
3666 * vm_map_inherit:
3667 *
3668 * Sets the inheritance of the specified address
3669 * range in the target map. Inheritance
3670 * affects how the map will be shared with
3671 * child maps at the time of vm_map_fork.
3672 */
3673 kern_return_t
3674 vm_map_inherit(
3675 register vm_map_t map,
3676 register vm_map_offset_t start,
3677 register vm_map_offset_t end,
3678 register vm_inherit_t new_inheritance)
3679 {
3680 register vm_map_entry_t entry;
3681 vm_map_entry_t temp_entry;
3682
3683 vm_map_lock(map);
3684
3685 VM_MAP_RANGE_CHECK(map, start, end);
3686
3687 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3688 entry = temp_entry;
3689 }
3690 else {
3691 temp_entry = temp_entry->vme_next;
3692 entry = temp_entry;
3693 }
3694
3695 /* first check entire range for submaps which can't support the */
3696 /* given inheritance. */
3697 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3698 if(entry->is_sub_map) {
3699 if(new_inheritance == VM_INHERIT_COPY) {
3700 vm_map_unlock(map);
3701 return(KERN_INVALID_ARGUMENT);
3702 }
3703 }
3704
3705 entry = entry->vme_next;
3706 }
3707
3708 entry = temp_entry;
3709 if (entry != vm_map_to_entry(map)) {
3710 /* clip and unnest if necessary */
3711 vm_map_clip_start(map, entry, start);
3712 }
3713
3714 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3715 vm_map_clip_end(map, entry, end);
3716 assert(!entry->use_pmap); /* clip did unnest if needed */
3717
3718 entry->inheritance = new_inheritance;
3719
3720 entry = entry->vme_next;
3721 }
3722
3723 vm_map_unlock(map);
3724 return(KERN_SUCCESS);
3725 }
3726
3727 /*
3728 * Update the accounting for the amount of wired memory in this map. If the user has
3729 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3730 */
3731
3732 static kern_return_t
3733 add_wire_counts(
3734 vm_map_t map,
3735 vm_map_entry_t entry,
3736 boolean_t user_wire)
3737 {
3738 vm_map_size_t size;
3739
3740 if (user_wire) {
3741 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
3742
3743 /*
3744 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3745 * this map entry.
3746 */
3747
3748 if (entry->user_wired_count == 0) {
3749 size = entry->vme_end - entry->vme_start;
3750
3751 /*
3752 * Since this is the first time the user is wiring this map entry, check to see if we're
3753 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3754 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3755 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3756 * limit, then we fail.
3757 */
3758
3759 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3760 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3761 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3762 return KERN_RESOURCE_SHORTAGE;
3763
3764 /*
3765 * The first time the user wires an entry, we also increment the wired_count and add this to
3766 * the total that has been wired in the map.
3767 */
3768
3769 if (entry->wired_count >= MAX_WIRE_COUNT)
3770 return KERN_FAILURE;
3771
3772 entry->wired_count++;
3773 map->user_wire_size += size;
3774 }
3775
3776 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3777 return KERN_FAILURE;
3778
3779 entry->user_wired_count++;
3780
3781 } else {
3782
3783 /*
3784 * The kernel's wiring the memory. Just bump the count and continue.
3785 */
3786
3787 if (entry->wired_count >= MAX_WIRE_COUNT)
3788 panic("vm_map_wire: too many wirings");
3789
3790 entry->wired_count++;
3791 }
3792
3793 return KERN_SUCCESS;
3794 }
3795
3796 /*
3797 * Update the memory wiring accounting now that the given map entry is being unwired.
3798 */
3799
3800 static void
3801 subtract_wire_counts(
3802 vm_map_t map,
3803 vm_map_entry_t entry,
3804 boolean_t user_wire)
3805 {
3806
3807 if (user_wire) {
3808
3809 /*
3810 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3811 */
3812
3813 if (entry->user_wired_count == 1) {
3814
3815 /*
3816 * We're removing the last user wire reference. Decrement the wired_count and the total
3817 * user wired memory for this map.
3818 */
3819
3820 assert(entry->wired_count >= 1);
3821 entry->wired_count--;
3822 map->user_wire_size -= entry->vme_end - entry->vme_start;
3823 }
3824
3825 assert(entry->user_wired_count >= 1);
3826 entry->user_wired_count--;
3827
3828 } else {
3829
3830 /*
3831 * The kernel is unwiring the memory. Just update the count.
3832 */
3833
3834 assert(entry->wired_count >= 1);
3835 entry->wired_count--;
3836 }
3837 }
3838
3839 /*
3840 * vm_map_wire:
3841 *
3842 * Sets the pageability of the specified address range in the
3843 * target map as wired. Regions specified as not pageable require
3844 * locked-down physical memory and physical page maps. The
3845 * access_type variable indicates types of accesses that must not
3846 * generate page faults. This is checked against protection of
3847 * memory being locked-down.
3848 *
3849 * The map must not be locked, but a reference must remain to the
3850 * map throughout the call.
3851 */
3852 static kern_return_t
3853 vm_map_wire_nested(
3854 register vm_map_t map,
3855 register vm_map_offset_t start,
3856 register vm_map_offset_t end,
3857 register vm_prot_t access_type,
3858 boolean_t user_wire,
3859 pmap_t map_pmap,
3860 vm_map_offset_t pmap_addr)
3861 {
3862 register vm_map_entry_t entry;
3863 struct vm_map_entry *first_entry, tmp_entry;
3864 vm_map_t real_map;
3865 register vm_map_offset_t s,e;
3866 kern_return_t rc;
3867 boolean_t need_wakeup;
3868 boolean_t main_map = FALSE;
3869 wait_interrupt_t interruptible_state;
3870 thread_t cur_thread;
3871 unsigned int last_timestamp;
3872 vm_map_size_t size;
3873
3874 vm_map_lock(map);
3875 if(map_pmap == NULL)
3876 main_map = TRUE;
3877 last_timestamp = map->timestamp;
3878
3879 VM_MAP_RANGE_CHECK(map, start, end);
3880 assert(page_aligned(start));
3881 assert(page_aligned(end));
3882 if (start == end) {
3883 /* We wired what the caller asked for, zero pages */
3884 vm_map_unlock(map);
3885 return KERN_SUCCESS;
3886 }
3887
3888 need_wakeup = FALSE;
3889 cur_thread = current_thread();
3890
3891 s = start;
3892 rc = KERN_SUCCESS;
3893
3894 if (vm_map_lookup_entry(map, s, &first_entry)) {
3895 entry = first_entry;
3896 /*
3897 * vm_map_clip_start will be done later.
3898 * We don't want to unnest any nested submaps here !
3899 */
3900 } else {
3901 /* Start address is not in map */
3902 rc = KERN_INVALID_ADDRESS;
3903 goto done;
3904 }
3905
3906 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3907 /*
3908 * At this point, we have wired from "start" to "s".
3909 * We still need to wire from "s" to "end".
3910 *
3911 * "entry" hasn't been clipped, so it could start before "s"
3912 * and/or end after "end".
3913 */
3914
3915 /* "e" is how far we want to wire in this entry */
3916 e = entry->vme_end;
3917 if (e > end)
3918 e = end;
3919
3920 /*
3921 * If another thread is wiring/unwiring this entry then
3922 * block after informing other thread to wake us up.
3923 */
3924 if (entry->in_transition) {
3925 wait_result_t wait_result;
3926
3927 /*
3928 * We have not clipped the entry. Make sure that
3929 * the start address is in range so that the lookup
3930 * below will succeed.
3931 * "s" is the current starting point: we've already
3932 * wired from "start" to "s" and we still have
3933 * to wire from "s" to "end".
3934 */
3935
3936 entry->needs_wakeup = TRUE;
3937
3938 /*
3939 * wake up anybody waiting on entries that we have
3940 * already wired.
3941 */
3942 if (need_wakeup) {
3943 vm_map_entry_wakeup(map);
3944 need_wakeup = FALSE;
3945 }
3946 /*
3947 * User wiring is interruptible
3948 */
3949 wait_result = vm_map_entry_wait(map,
3950 (user_wire) ? THREAD_ABORTSAFE :
3951 THREAD_UNINT);
3952 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3953 /*
3954 * undo the wirings we have done so far
3955 * We do not clear the needs_wakeup flag,
3956 * because we cannot tell if we were the
3957 * only one waiting.
3958 */
3959 rc = KERN_FAILURE;
3960 goto done;
3961 }
3962
3963 /*
3964 * Cannot avoid a lookup here. reset timestamp.
3965 */
3966 last_timestamp = map->timestamp;
3967
3968 /*
3969 * The entry could have been clipped, look it up again.
3970 * Worse that can happen is, it may not exist anymore.
3971 */
3972 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3973 /*
3974 * User: undo everything upto the previous
3975 * entry. let vm_map_unwire worry about
3976 * checking the validity of the range.
3977 */
3978 rc = KERN_FAILURE;
3979 goto done;
3980 }
3981 entry = first_entry;
3982 continue;
3983 }
3984
3985 if (entry->is_sub_map) {
3986 vm_map_offset_t sub_start;
3987 vm_map_offset_t sub_end;
3988 vm_map_offset_t local_start;
3989 vm_map_offset_t local_end;
3990 pmap_t pmap;
3991
3992 vm_map_clip_start(map, entry, s);
3993 vm_map_clip_end(map, entry, end);
3994
3995 sub_start = entry->offset;
3996 sub_end = entry->vme_end;
3997 sub_end += entry->offset - entry->vme_start;
3998
3999 local_end = entry->vme_end;
4000 if(map_pmap == NULL) {
4001 vm_object_t object;
4002 vm_object_offset_t offset;
4003 vm_prot_t prot;
4004 boolean_t wired;
4005 vm_map_entry_t local_entry;
4006 vm_map_version_t version;
4007 vm_map_t lookup_map;
4008
4009 if(entry->use_pmap) {
4010 pmap = entry->object.sub_map->pmap;
4011 /* ppc implementation requires that */
4012 /* submaps pmap address ranges line */
4013 /* up with parent map */
4014 #ifdef notdef
4015 pmap_addr = sub_start;
4016 #endif
4017 pmap_addr = s;
4018 } else {
4019 pmap = map->pmap;
4020 pmap_addr = s;
4021 }
4022
4023 if (entry->wired_count) {
4024 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4025 goto done;
4026
4027 /*
4028 * The map was not unlocked:
4029 * no need to goto re-lookup.
4030 * Just go directly to next entry.
4031 */
4032 entry = entry->vme_next;
4033 s = entry->vme_start;
4034 continue;
4035
4036 }
4037
4038 /* call vm_map_lookup_locked to */
4039 /* cause any needs copy to be */
4040 /* evaluated */
4041 local_start = entry->vme_start;
4042 lookup_map = map;
4043 vm_map_lock_write_to_read(map);
4044 if(vm_map_lookup_locked(
4045 &lookup_map, local_start,
4046 access_type,
4047 OBJECT_LOCK_EXCLUSIVE,
4048 &version, &object,
4049 &offset, &prot, &wired,
4050 NULL,
4051 &real_map)) {
4052
4053 vm_map_unlock_read(lookup_map);
4054 vm_map_unwire(map, start,
4055 s, user_wire);
4056 return(KERN_FAILURE);
4057 }
4058 vm_object_unlock(object);
4059 if(real_map != lookup_map)
4060 vm_map_unlock(real_map);
4061 vm_map_unlock_read(lookup_map);
4062 vm_map_lock(map);
4063
4064 /* we unlocked, so must re-lookup */
4065 if (!vm_map_lookup_entry(map,
4066 local_start,
4067 &local_entry)) {
4068 rc = KERN_FAILURE;
4069 goto done;
4070 }
4071
4072 /*
4073 * entry could have been "simplified",
4074 * so re-clip
4075 */
4076 entry = local_entry;
4077 assert(s == local_start);
4078 vm_map_clip_start(map, entry, s);
4079 vm_map_clip_end(map, entry, end);
4080 /* re-compute "e" */
4081 e = entry->vme_end;
4082 if (e > end)
4083 e = end;
4084
4085 /* did we have a change of type? */
4086 if (!entry->is_sub_map) {
4087 last_timestamp = map->timestamp;
4088 continue;
4089 }
4090 } else {
4091 local_start = entry->vme_start;
4092 pmap = map_pmap;
4093 }
4094
4095 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4096 goto done;
4097
4098 entry->in_transition = TRUE;
4099
4100 vm_map_unlock(map);
4101 rc = vm_map_wire_nested(entry->object.sub_map,
4102 sub_start, sub_end,
4103 access_type,
4104 user_wire, pmap, pmap_addr);
4105 vm_map_lock(map);
4106
4107 /*
4108 * Find the entry again. It could have been clipped
4109 * after we unlocked the map.
4110 */
4111 if (!vm_map_lookup_entry(map, local_start,
4112 &first_entry))
4113 panic("vm_map_wire: re-lookup failed");
4114 entry = first_entry;
4115
4116 assert(local_start == s);
4117 /* re-compute "e" */
4118 e = entry->vme_end;
4119 if (e > end)
4120 e = end;
4121
4122 last_timestamp = map->timestamp;
4123 while ((entry != vm_map_to_entry(map)) &&
4124 (entry->vme_start < e)) {
4125 assert(entry->in_transition);
4126 entry->in_transition = FALSE;
4127 if (entry->needs_wakeup) {
4128 entry->needs_wakeup = FALSE;
4129 need_wakeup = TRUE;
4130 }
4131 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4132 subtract_wire_counts(map, entry, user_wire);
4133 }
4134 entry = entry->vme_next;
4135 }
4136 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4137 goto done;
4138 }
4139
4140 /* no need to relookup again */
4141 s = entry->vme_start;
4142 continue;
4143 }
4144
4145 /*
4146 * If this entry is already wired then increment
4147 * the appropriate wire reference count.
4148 */
4149 if (entry->wired_count) {
4150 /*
4151 * entry is already wired down, get our reference
4152 * after clipping to our range.
4153 */
4154 vm_map_clip_start(map, entry, s);
4155 vm_map_clip_end(map, entry, end);
4156
4157 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4158 goto done;
4159
4160 /* map was not unlocked: no need to relookup */
4161 entry = entry->vme_next;
4162 s = entry->vme_start;
4163 continue;
4164 }
4165
4166 /*
4167 * Unwired entry or wire request transmitted via submap
4168 */
4169
4170
4171 /*
4172 * Perform actions of vm_map_lookup that need the write
4173 * lock on the map: create a shadow object for a
4174 * copy-on-write region, or an object for a zero-fill
4175 * region.
4176 */
4177 size = entry->vme_end - entry->vme_start;
4178 /*
4179 * If wiring a copy-on-write page, we need to copy it now
4180 * even if we're only (currently) requesting read access.
4181 * This is aggressive, but once it's wired we can't move it.
4182 */
4183 if (entry->needs_copy) {
4184 vm_object_shadow(&entry->object.vm_object,
4185 &entry->offset, size);
4186 entry->needs_copy = FALSE;
4187 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4188 entry->object.vm_object = vm_object_allocate(size);
4189 entry->offset = (vm_object_offset_t)0;
4190 }
4191
4192 vm_map_clip_start(map, entry, s);
4193 vm_map_clip_end(map, entry, end);
4194
4195 /* re-compute "e" */
4196 e = entry->vme_end;
4197 if (e > end)
4198 e = end;
4199
4200 /*
4201 * Check for holes and protection mismatch.
4202 * Holes: Next entry should be contiguous unless this
4203 * is the end of the region.
4204 * Protection: Access requested must be allowed, unless
4205 * wiring is by protection class
4206 */
4207 if ((entry->vme_end < end) &&
4208 ((entry->vme_next == vm_map_to_entry(map)) ||
4209 (entry->vme_next->vme_start > entry->vme_end))) {
4210 /* found a hole */
4211 rc = KERN_INVALID_ADDRESS;
4212 goto done;
4213 }
4214 if ((entry->protection & access_type) != access_type) {
4215 /* found a protection problem */
4216 rc = KERN_PROTECTION_FAILURE;
4217 goto done;
4218 }
4219
4220 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4221
4222 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4223 goto done;
4224
4225 entry->in_transition = TRUE;
4226
4227 /*
4228 * This entry might get split once we unlock the map.
4229 * In vm_fault_wire(), we need the current range as
4230 * defined by this entry. In order for this to work
4231 * along with a simultaneous clip operation, we make a
4232 * temporary copy of this entry and use that for the
4233 * wiring. Note that the underlying objects do not
4234 * change during a clip.
4235 */
4236 tmp_entry = *entry;
4237
4238 /*
4239 * The in_transition state guarentees that the entry
4240 * (or entries for this range, if split occured) will be
4241 * there when the map lock is acquired for the second time.
4242 */
4243 vm_map_unlock(map);
4244
4245 if (!user_wire && cur_thread != THREAD_NULL)
4246 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4247 else
4248 interruptible_state = THREAD_UNINT;
4249
4250 if(map_pmap)
4251 rc = vm_fault_wire(map,
4252 &tmp_entry, map_pmap, pmap_addr);
4253 else
4254 rc = vm_fault_wire(map,
4255 &tmp_entry, map->pmap,
4256 tmp_entry.vme_start);
4257
4258 if (!user_wire && cur_thread != THREAD_NULL)
4259 thread_interrupt_level(interruptible_state);
4260
4261 vm_map_lock(map);
4262
4263 if (last_timestamp+1 != map->timestamp) {
4264 /*
4265 * Find the entry again. It could have been clipped
4266 * after we unlocked the map.
4267 */
4268 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4269 &first_entry))
4270 panic("vm_map_wire: re-lookup failed");
4271
4272 entry = first_entry;
4273 }
4274
4275 last_timestamp = map->timestamp;
4276
4277 while ((entry != vm_map_to_entry(map)) &&
4278 (entry->vme_start < tmp_entry.vme_end)) {
4279 assert(entry->in_transition);
4280 entry->in_transition = FALSE;
4281 if (entry->needs_wakeup) {
4282 entry->needs_wakeup = FALSE;
4283 need_wakeup = TRUE;
4284 }
4285 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4286 subtract_wire_counts(map, entry, user_wire);
4287 }
4288 entry = entry->vme_next;
4289 }
4290
4291 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4292 goto done;
4293 }
4294
4295 s = entry->vme_start;
4296 } /* end while loop through map entries */
4297
4298 done:
4299 if (rc == KERN_SUCCESS) {
4300 /* repair any damage we may have made to the VM map */
4301 vm_map_simplify_range(map, start, end);
4302 }
4303
4304 vm_map_unlock(map);
4305
4306 /*
4307 * wake up anybody waiting on entries we wired.
4308 */
4309 if (need_wakeup)
4310 vm_map_entry_wakeup(map);
4311
4312 if (rc != KERN_SUCCESS) {
4313 /* undo what has been wired so far */
4314 vm_map_unwire(map, start, s, user_wire);
4315 }
4316
4317 return rc;
4318
4319 }
4320
4321 kern_return_t
4322 vm_map_wire(
4323 register vm_map_t map,
4324 register vm_map_offset_t start,
4325 register vm_map_offset_t end,
4326 register vm_prot_t access_type,
4327 boolean_t user_wire)
4328 {
4329
4330 kern_return_t kret;
4331
4332 kret = vm_map_wire_nested(map, start, end, access_type,
4333 user_wire, (pmap_t)NULL, 0);
4334 return kret;
4335 }
4336
4337 /*
4338 * vm_map_unwire:
4339 *
4340 * Sets the pageability of the specified address range in the target
4341 * as pageable. Regions specified must have been wired previously.
4342 *
4343 * The map must not be locked, but a reference must remain to the map
4344 * throughout the call.
4345 *
4346 * Kernel will panic on failures. User unwire ignores holes and
4347 * unwired and intransition entries to avoid losing memory by leaving
4348 * it unwired.
4349 */
4350 static kern_return_t
4351 vm_map_unwire_nested(
4352 register vm_map_t map,
4353 register vm_map_offset_t start,
4354 register vm_map_offset_t end,
4355 boolean_t user_wire,
4356 pmap_t map_pmap,
4357 vm_map_offset_t pmap_addr)
4358 {
4359 register vm_map_entry_t entry;
4360 struct vm_map_entry *first_entry, tmp_entry;
4361 boolean_t need_wakeup;
4362 boolean_t main_map = FALSE;
4363 unsigned int last_timestamp;
4364
4365 vm_map_lock(map);
4366 if(map_pmap == NULL)
4367 main_map = TRUE;
4368 last_timestamp = map->timestamp;
4369
4370 VM_MAP_RANGE_CHECK(map, start, end);
4371 assert(page_aligned(start));
4372 assert(page_aligned(end));
4373
4374 if (start == end) {
4375 /* We unwired what the caller asked for: zero pages */
4376 vm_map_unlock(map);
4377 return KERN_SUCCESS;
4378 }
4379
4380 if (vm_map_lookup_entry(map, start, &first_entry)) {
4381 entry = first_entry;
4382 /*
4383 * vm_map_clip_start will be done later.
4384 * We don't want to unnest any nested sub maps here !
4385 */
4386 }
4387 else {
4388 if (!user_wire) {
4389 panic("vm_map_unwire: start not found");
4390 }
4391 /* Start address is not in map. */
4392 vm_map_unlock(map);
4393 return(KERN_INVALID_ADDRESS);
4394 }
4395
4396 if (entry->superpage_size) {
4397 /* superpages are always wired */
4398 vm_map_unlock(map);
4399 return KERN_INVALID_ADDRESS;
4400 }
4401
4402 need_wakeup = FALSE;
4403 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4404 if (entry->in_transition) {
4405 /*
4406 * 1)
4407 * Another thread is wiring down this entry. Note
4408 * that if it is not for the other thread we would
4409 * be unwiring an unwired entry. This is not
4410 * permitted. If we wait, we will be unwiring memory
4411 * we did not wire.
4412 *
4413 * 2)
4414 * Another thread is unwiring this entry. We did not
4415 * have a reference to it, because if we did, this
4416 * entry will not be getting unwired now.
4417 */
4418 if (!user_wire) {
4419 /*
4420 * XXX FBDP
4421 * This could happen: there could be some
4422 * overlapping vslock/vsunlock operations
4423 * going on.
4424 * We should probably just wait and retry,
4425 * but then we have to be careful that this
4426 * entry could get "simplified" after
4427 * "in_transition" gets unset and before
4428 * we re-lookup the entry, so we would
4429 * have to re-clip the entry to avoid
4430 * re-unwiring what we have already unwired...
4431 * See vm_map_wire_nested().
4432 *
4433 * Or we could just ignore "in_transition"
4434 * here and proceed to decement the wired
4435 * count(s) on this entry. That should be fine
4436 * as long as "wired_count" doesn't drop all
4437 * the way to 0 (and we should panic if THAT
4438 * happens).
4439 */
4440 panic("vm_map_unwire: in_transition entry");
4441 }
4442
4443 entry = entry->vme_next;
4444 continue;
4445 }
4446
4447 if (entry->is_sub_map) {
4448 vm_map_offset_t sub_start;
4449 vm_map_offset_t sub_end;
4450 vm_map_offset_t local_end;
4451 pmap_t pmap;
4452
4453 vm_map_clip_start(map, entry, start);
4454 vm_map_clip_end(map, entry, end);
4455
4456 sub_start = entry->offset;
4457 sub_end = entry->vme_end - entry->vme_start;
4458 sub_end += entry->offset;
4459 local_end = entry->vme_end;
4460 if(map_pmap == NULL) {
4461 if(entry->use_pmap) {
4462 pmap = entry->object.sub_map->pmap;
4463 pmap_addr = sub_start;
4464 } else {
4465 pmap = map->pmap;
4466 pmap_addr = start;
4467 }
4468 if (entry->wired_count == 0 ||
4469 (user_wire && entry->user_wired_count == 0)) {
4470 if (!user_wire)
4471 panic("vm_map_unwire: entry is unwired");
4472 entry = entry->vme_next;
4473 continue;
4474 }
4475
4476 /*
4477 * Check for holes
4478 * Holes: Next entry should be contiguous unless
4479 * this is the end of the region.
4480 */
4481 if (((entry->vme_end < end) &&
4482 ((entry->vme_next == vm_map_to_entry(map)) ||
4483 (entry->vme_next->vme_start
4484 > entry->vme_end)))) {
4485 if (!user_wire)
4486 panic("vm_map_unwire: non-contiguous region");
4487 /*
4488 entry = entry->vme_next;
4489 continue;
4490 */
4491 }
4492
4493 subtract_wire_counts(map, entry, user_wire);
4494
4495 if (entry->wired_count != 0) {
4496 entry = entry->vme_next;
4497 continue;
4498 }
4499
4500 entry->in_transition = TRUE;
4501 tmp_entry = *entry;/* see comment in vm_map_wire() */
4502
4503 /*
4504 * We can unlock the map now. The in_transition state
4505 * guarantees existance of the entry.
4506 */
4507 vm_map_unlock(map);
4508 vm_map_unwire_nested(entry->object.sub_map,
4509 sub_start, sub_end, user_wire, pmap, pmap_addr);
4510 vm_map_lock(map);
4511
4512 if (last_timestamp+1 != map->timestamp) {
4513 /*
4514 * Find the entry again. It could have been
4515 * clipped or deleted after we unlocked the map.
4516 */
4517 if (!vm_map_lookup_entry(map,
4518 tmp_entry.vme_start,
4519 &first_entry)) {
4520 if (!user_wire)
4521 panic("vm_map_unwire: re-lookup failed");
4522 entry = first_entry->vme_next;
4523 } else
4524 entry = first_entry;
4525 }
4526 last_timestamp = map->timestamp;
4527
4528 /*
4529 * clear transition bit for all constituent entries
4530 * that were in the original entry (saved in
4531 * tmp_entry). Also check for waiters.
4532 */
4533 while ((entry != vm_map_to_entry(map)) &&
4534 (entry->vme_start < tmp_entry.vme_end)) {
4535 assert(entry->in_transition);
4536 entry->in_transition = FALSE;
4537 if (entry->needs_wakeup) {
4538 entry->needs_wakeup = FALSE;
4539 need_wakeup = TRUE;
4540 }
4541 entry = entry->vme_next;
4542 }
4543 continue;
4544 } else {
4545 vm_map_unlock(map);
4546 vm_map_unwire_nested(entry->object.sub_map,
4547 sub_start, sub_end, user_wire, map_pmap,
4548 pmap_addr);
4549 vm_map_lock(map);
4550
4551 if (last_timestamp+1 != map->timestamp) {
4552 /*
4553 * Find the entry again. It could have been
4554 * clipped or deleted after we unlocked the map.
4555 */
4556 if (!vm_map_lookup_entry(map,
4557 tmp_entry.vme_start,
4558 &first_entry)) {
4559 if (!user_wire)
4560 panic("vm_map_unwire: re-lookup failed");
4561 entry = first_entry->vme_next;
4562 } else
4563 entry = first_entry;
4564 }
4565 last_timestamp = map->timestamp;
4566 }
4567 }
4568
4569
4570 if ((entry->wired_count == 0) ||
4571 (user_wire && entry->user_wired_count == 0)) {
4572 if (!user_wire)
4573 panic("vm_map_unwire: entry is unwired");
4574
4575 entry = entry->vme_next;
4576 continue;
4577 }
4578
4579 assert(entry->wired_count > 0 &&
4580 (!user_wire || entry->user_wired_count > 0));
4581
4582 vm_map_clip_start(map, entry, start);
4583 vm_map_clip_end(map, entry, end);
4584
4585 /*
4586 * Check for holes
4587 * Holes: Next entry should be contiguous unless
4588 * this is the end of the region.
4589 */
4590 if (((entry->vme_end < end) &&
4591 ((entry->vme_next == vm_map_to_entry(map)) ||
4592 (entry->vme_next->vme_start > entry->vme_end)))) {
4593
4594 if (!user_wire)
4595 panic("vm_map_unwire: non-contiguous region");
4596 entry = entry->vme_next;
4597 continue;
4598 }
4599
4600 subtract_wire_counts(map, entry, user_wire);
4601
4602 if (entry->wired_count != 0) {
4603 entry = entry->vme_next;
4604 continue;
4605 }
4606
4607 if(entry->zero_wired_pages) {
4608 entry->zero_wired_pages = FALSE;
4609 }
4610
4611 entry->in_transition = TRUE;
4612 tmp_entry = *entry; /* see comment in vm_map_wire() */
4613
4614 /*
4615 * We can unlock the map now. The in_transition state
4616 * guarantees existance of the entry.
4617 */
4618 vm_map_unlock(map);
4619 if(map_pmap) {
4620 vm_fault_unwire(map,
4621 &tmp_entry, FALSE, map_pmap, pmap_addr);
4622 } else {
4623 vm_fault_unwire(map,
4624 &tmp_entry, FALSE, map->pmap,
4625 tmp_entry.vme_start);
4626 }
4627 vm_map_lock(map);
4628
4629 if (last_timestamp+1 != map->timestamp) {
4630 /*
4631 * Find the entry again. It could have been clipped
4632 * or deleted after we unlocked the map.
4633 */
4634 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4635 &first_entry)) {
4636 if (!user_wire)
4637 panic("vm_map_unwire: re-lookup failed");
4638 entry = first_entry->vme_next;
4639 } else
4640 entry = first_entry;
4641 }
4642 last_timestamp = map->timestamp;
4643
4644 /*
4645 * clear transition bit for all constituent entries that
4646 * were in the original entry (saved in tmp_entry). Also
4647 * check for waiters.
4648 */
4649 while ((entry != vm_map_to_entry(map)) &&
4650 (entry->vme_start < tmp_entry.vme_end)) {
4651 assert(entry->in_transition);
4652 entry->in_transition = FALSE;
4653 if (entry->needs_wakeup) {
4654 entry->needs_wakeup = FALSE;
4655 need_wakeup = TRUE;
4656 }
4657 entry = entry->vme_next;
4658 }
4659 }
4660
4661 /*
4662 * We might have fragmented the address space when we wired this
4663 * range of addresses. Attempt to re-coalesce these VM map entries
4664 * with their neighbors now that they're no longer wired.
4665 * Under some circumstances, address space fragmentation can
4666 * prevent VM object shadow chain collapsing, which can cause
4667 * swap space leaks.
4668 */
4669 vm_map_simplify_range(map, start, end);
4670
4671 vm_map_unlock(map);
4672 /*
4673 * wake up anybody waiting on entries that we have unwired.
4674 */
4675 if (need_wakeup)
4676 vm_map_entry_wakeup(map);
4677 return(KERN_SUCCESS);
4678
4679 }
4680
4681 kern_return_t
4682 vm_map_unwire(
4683 register vm_map_t map,
4684 register vm_map_offset_t start,
4685 register vm_map_offset_t end,
4686 boolean_t user_wire)
4687 {
4688 return vm_map_unwire_nested(map, start, end,
4689 user_wire, (pmap_t)NULL, 0);
4690 }
4691
4692
4693 /*
4694 * vm_map_entry_delete: [ internal use only ]
4695 *
4696 * Deallocate the given entry from the target map.
4697 */
4698 static void
4699 vm_map_entry_delete(
4700 register vm_map_t map,
4701 register vm_map_entry_t entry)
4702 {
4703 register vm_map_offset_t s, e;
4704 register vm_object_t object;
4705 register vm_map_t submap;
4706
4707 s = entry->vme_start;
4708 e = entry->vme_end;
4709 assert(page_aligned(s));
4710 assert(page_aligned(e));
4711 assert(entry->wired_count == 0);
4712 assert(entry->user_wired_count == 0);
4713 assert(!entry->permanent);
4714
4715 if (entry->is_sub_map) {
4716 object = NULL;
4717 submap = entry->object.sub_map;
4718 } else {
4719 submap = NULL;
4720 object = entry->object.vm_object;
4721 }
4722
4723 vm_map_store_entry_unlink(map, entry);
4724 map->size -= e - s;
4725
4726 vm_map_entry_dispose(map, entry);
4727
4728 vm_map_unlock(map);
4729 /*
4730 * Deallocate the object only after removing all
4731 * pmap entries pointing to its pages.
4732 */
4733 if (submap)
4734 vm_map_deallocate(submap);
4735 else
4736 vm_object_deallocate(object);
4737
4738 }
4739
4740 void
4741 vm_map_submap_pmap_clean(
4742 vm_map_t map,
4743 vm_map_offset_t start,
4744 vm_map_offset_t end,
4745 vm_map_t sub_map,
4746 vm_map_offset_t offset)
4747 {
4748 vm_map_offset_t submap_start;
4749 vm_map_offset_t submap_end;
4750 vm_map_size_t remove_size;
4751 vm_map_entry_t entry;
4752
4753 submap_end = offset + (end - start);
4754 submap_start = offset;
4755
4756 vm_map_lock_read(sub_map);
4757 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4758
4759 remove_size = (entry->vme_end - entry->vme_start);
4760 if(offset > entry->vme_start)
4761 remove_size -= offset - entry->vme_start;
4762
4763
4764 if(submap_end < entry->vme_end) {
4765 remove_size -=
4766 entry->vme_end - submap_end;
4767 }
4768 if(entry->is_sub_map) {
4769 vm_map_submap_pmap_clean(
4770 sub_map,
4771 start,
4772 start + remove_size,
4773 entry->object.sub_map,
4774 entry->offset);
4775 } else {
4776
4777 if((map->mapped_in_other_pmaps) && (map->ref_count)
4778 && (entry->object.vm_object != NULL)) {
4779 vm_object_pmap_protect(
4780 entry->object.vm_object,
4781 entry->offset+(offset-entry->vme_start),
4782 remove_size,
4783 PMAP_NULL,
4784 entry->vme_start,
4785 VM_PROT_NONE);
4786 } else {
4787 pmap_remove(map->pmap,
4788 (addr64_t)start,
4789 (addr64_t)(start + remove_size));
4790 }
4791 }
4792 }
4793
4794 entry = entry->vme_next;
4795
4796 while((entry != vm_map_to_entry(sub_map))
4797 && (entry->vme_start < submap_end)) {
4798 remove_size = (entry->vme_end - entry->vme_start);
4799 if(submap_end < entry->vme_end) {
4800 remove_size -= entry->vme_end - submap_end;
4801 }
4802 if(entry->is_sub_map) {
4803 vm_map_submap_pmap_clean(
4804 sub_map,
4805 (start + entry->vme_start) - offset,
4806 ((start + entry->vme_start) - offset) + remove_size,
4807 entry->object.sub_map,
4808 entry->offset);
4809 } else {
4810 if((map->mapped_in_other_pmaps) && (map->ref_count)
4811 && (entry->object.vm_object != NULL)) {
4812 vm_object_pmap_protect(
4813 entry->object.vm_object,
4814 entry->offset,
4815 remove_size,
4816 PMAP_NULL,
4817 entry->vme_start,
4818 VM_PROT_NONE);
4819 } else {
4820 pmap_remove(map->pmap,
4821 (addr64_t)((start + entry->vme_start)
4822 - offset),
4823 (addr64_t)(((start + entry->vme_start)
4824 - offset) + remove_size));
4825 }
4826 }
4827 entry = entry->vme_next;
4828 }
4829 vm_map_unlock_read(sub_map);
4830 return;
4831 }
4832
4833 /*
4834 * vm_map_delete: [ internal use only ]
4835 *
4836 * Deallocates the given address range from the target map.
4837 * Removes all user wirings. Unwires one kernel wiring if
4838 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4839 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4840 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4841 *
4842 * This routine is called with map locked and leaves map locked.
4843 */
4844 static kern_return_t
4845 vm_map_delete(
4846 vm_map_t map,
4847 vm_map_offset_t start,
4848 vm_map_offset_t end,
4849 int flags,
4850 vm_map_t zap_map)
4851 {
4852 vm_map_entry_t entry, next;
4853 struct vm_map_entry *first_entry, tmp_entry;
4854 register vm_map_offset_t s;
4855 register vm_object_t object;
4856 boolean_t need_wakeup;
4857 unsigned int last_timestamp = ~0; /* unlikely value */
4858 int interruptible;
4859
4860 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4861 THREAD_ABORTSAFE : THREAD_UNINT;
4862
4863 /*
4864 * All our DMA I/O operations in IOKit are currently done by
4865 * wiring through the map entries of the task requesting the I/O.
4866 * Because of this, we must always wait for kernel wirings
4867 * to go away on the entries before deleting them.
4868 *
4869 * Any caller who wants to actually remove a kernel wiring
4870 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4871 * properly remove one wiring instead of blasting through
4872 * them all.
4873 */
4874 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4875
4876 while(1) {
4877 /*
4878 * Find the start of the region, and clip it
4879 */
4880 if (vm_map_lookup_entry(map, start, &first_entry)) {
4881 entry = first_entry;
4882 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4883 start = SUPERPAGE_ROUND_DOWN(start);
4884 continue;
4885 }
4886 if (start == entry->vme_start) {
4887 /*
4888 * No need to clip. We don't want to cause
4889 * any unnecessary unnesting in this case...
4890 */
4891 } else {
4892 vm_map_clip_start(map, entry, start);
4893 }
4894
4895 /*
4896 * Fix the lookup hint now, rather than each
4897 * time through the loop.
4898 */
4899 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4900 } else {
4901 entry = first_entry->vme_next;
4902 }
4903 break;
4904 }
4905 if (entry->superpage_size)
4906 end = SUPERPAGE_ROUND_UP(end);
4907
4908 need_wakeup = FALSE;
4909 /*
4910 * Step through all entries in this region
4911 */
4912 s = entry->vme_start;
4913 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4914 /*
4915 * At this point, we have deleted all the memory entries
4916 * between "start" and "s". We still need to delete
4917 * all memory entries between "s" and "end".
4918 * While we were blocked and the map was unlocked, some
4919 * new memory entries could have been re-allocated between
4920 * "start" and "s" and we don't want to mess with those.
4921 * Some of those entries could even have been re-assembled
4922 * with an entry after "s" (in vm_map_simplify_entry()), so
4923 * we may have to vm_map_clip_start() again.
4924 */
4925
4926 if (entry->vme_start >= s) {
4927 /*
4928 * This entry starts on or after "s"
4929 * so no need to clip its start.
4930 */
4931 } else {
4932 /*
4933 * This entry has been re-assembled by a
4934 * vm_map_simplify_entry(). We need to
4935 * re-clip its start.
4936 */
4937 vm_map_clip_start(map, entry, s);
4938 }
4939 if (entry->vme_end <= end) {
4940 /*
4941 * This entry is going away completely, so no need
4942 * to clip and possibly cause an unnecessary unnesting.
4943 */
4944 } else {
4945 vm_map_clip_end(map, entry, end);
4946 }
4947
4948 if (entry->permanent) {
4949 panic("attempt to remove permanent VM map entry "
4950 "%p [0x%llx:0x%llx]\n",
4951 entry, (uint64_t) s, (uint64_t) end);
4952 }
4953
4954
4955 if (entry->in_transition) {
4956 wait_result_t wait_result;
4957
4958 /*
4959 * Another thread is wiring/unwiring this entry.
4960 * Let the other thread know we are waiting.
4961 */
4962 assert(s == entry->vme_start);
4963 entry->needs_wakeup = TRUE;
4964
4965 /*
4966 * wake up anybody waiting on entries that we have
4967 * already unwired/deleted.
4968 */
4969 if (need_wakeup) {
4970 vm_map_entry_wakeup(map);
4971 need_wakeup = FALSE;
4972 }
4973
4974 wait_result = vm_map_entry_wait(map, interruptible);
4975
4976 if (interruptible &&
4977 wait_result == THREAD_INTERRUPTED) {
4978 /*
4979 * We do not clear the needs_wakeup flag,
4980 * since we cannot tell if we were the only one.
4981 */
4982 vm_map_unlock(map);
4983 return KERN_ABORTED;
4984 }
4985
4986 /*
4987 * The entry could have been clipped or it
4988 * may not exist anymore. Look it up again.
4989 */
4990 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4991 assert((map != kernel_map) &&
4992 (!entry->is_sub_map));
4993 /*
4994 * User: use the next entry
4995 */
4996 entry = first_entry->vme_next;
4997 s = entry->vme_start;
4998 } else {
4999 entry = first_entry;
5000 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5001 }
5002 last_timestamp = map->timestamp;
5003 continue;
5004 } /* end in_transition */
5005
5006 if (entry->wired_count) {
5007 boolean_t user_wire;
5008
5009 user_wire = entry->user_wired_count > 0;
5010
5011 /*
5012 * Remove a kernel wiring if requested
5013 */
5014 if (flags & VM_MAP_REMOVE_KUNWIRE) {
5015 entry->wired_count--;
5016 }
5017
5018 /*
5019 * Remove all user wirings for proper accounting
5020 */
5021 if (entry->user_wired_count > 0) {
5022 while (entry->user_wired_count)
5023 subtract_wire_counts(map, entry, user_wire);
5024 }
5025
5026 if (entry->wired_count != 0) {
5027 assert(map != kernel_map);
5028 /*
5029 * Cannot continue. Typical case is when
5030 * a user thread has physical io pending on
5031 * on this page. Either wait for the
5032 * kernel wiring to go away or return an
5033 * error.
5034 */
5035 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5036 wait_result_t wait_result;
5037
5038 assert(s == entry->vme_start);
5039 entry->needs_wakeup = TRUE;
5040 wait_result = vm_map_entry_wait(map,
5041 interruptible);
5042
5043 if (interruptible &&
5044 wait_result == THREAD_INTERRUPTED) {
5045 /*
5046 * We do not clear the
5047 * needs_wakeup flag, since we
5048 * cannot tell if we were the
5049 * only one.
5050 */
5051 vm_map_unlock(map);
5052 return KERN_ABORTED;
5053 }
5054
5055 /*
5056 * The entry could have been clipped or
5057 * it may not exist anymore. Look it
5058 * up again.
5059 */
5060 if (!vm_map_lookup_entry(map, s,
5061 &first_entry)) {
5062 assert(map != kernel_map);
5063 /*
5064 * User: use the next entry
5065 */
5066 entry = first_entry->vme_next;
5067 s = entry->vme_start;
5068 } else {
5069 entry = first_entry;
5070 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5071 }
5072 last_timestamp = map->timestamp;
5073 continue;
5074 }
5075 else {
5076 return KERN_FAILURE;
5077 }
5078 }
5079
5080 entry->in_transition = TRUE;
5081 /*
5082 * copy current entry. see comment in vm_map_wire()
5083 */
5084 tmp_entry = *entry;
5085 assert(s == entry->vme_start);
5086
5087 /*
5088 * We can unlock the map now. The in_transition
5089 * state guarentees existance of the entry.
5090 */
5091 vm_map_unlock(map);
5092
5093 if (tmp_entry.is_sub_map) {
5094 vm_map_t sub_map;
5095 vm_map_offset_t sub_start, sub_end;
5096 pmap_t pmap;
5097 vm_map_offset_t pmap_addr;
5098
5099
5100 sub_map = tmp_entry.object.sub_map;
5101 sub_start = tmp_entry.offset;
5102 sub_end = sub_start + (tmp_entry.vme_end -
5103 tmp_entry.vme_start);
5104 if (tmp_entry.use_pmap) {
5105 pmap = sub_map->pmap;
5106 pmap_addr = tmp_entry.vme_start;
5107 } else {
5108 pmap = map->pmap;
5109 pmap_addr = tmp_entry.vme_start;
5110 }
5111 (void) vm_map_unwire_nested(sub_map,
5112 sub_start, sub_end,
5113 user_wire,
5114 pmap, pmap_addr);
5115 } else {
5116
5117 vm_fault_unwire(map, &tmp_entry,
5118 tmp_entry.object.vm_object == kernel_object,
5119 map->pmap, tmp_entry.vme_start);
5120 }
5121
5122 vm_map_lock(map);
5123
5124 if (last_timestamp+1 != map->timestamp) {
5125 /*
5126 * Find the entry again. It could have
5127 * been clipped after we unlocked the map.
5128 */
5129 if (!vm_map_lookup_entry(map, s, &first_entry)){
5130 assert((map != kernel_map) &&
5131 (!entry->is_sub_map));
5132 first_entry = first_entry->vme_next;
5133 s = first_entry->vme_start;
5134 } else {
5135 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5136 }
5137 } else {
5138 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5139 first_entry = entry;
5140 }
5141
5142 last_timestamp = map->timestamp;
5143
5144 entry = first_entry;
5145 while ((entry != vm_map_to_entry(map)) &&
5146 (entry->vme_start < tmp_entry.vme_end)) {
5147 assert(entry->in_transition);
5148 entry->in_transition = FALSE;
5149 if (entry->needs_wakeup) {
5150 entry->needs_wakeup = FALSE;
5151 need_wakeup = TRUE;
5152 }
5153 entry = entry->vme_next;
5154 }
5155 /*
5156 * We have unwired the entry(s). Go back and
5157 * delete them.
5158 */
5159 entry = first_entry;
5160 continue;
5161 }
5162
5163 /* entry is unwired */
5164 assert(entry->wired_count == 0);
5165 assert(entry->user_wired_count == 0);
5166
5167 assert(s == entry->vme_start);
5168
5169 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5170 /*
5171 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5172 * vm_map_delete(), some map entries might have been
5173 * transferred to a "zap_map", which doesn't have a
5174 * pmap. The original pmap has already been flushed
5175 * in the vm_map_delete() call targeting the original
5176 * map, but when we get to destroying the "zap_map",
5177 * we don't have any pmap to flush, so let's just skip
5178 * all this.
5179 */
5180 } else if (entry->is_sub_map) {
5181 if (entry->use_pmap) {
5182 #ifndef NO_NESTED_PMAP
5183 pmap_unnest(map->pmap,
5184 (addr64_t)entry->vme_start,
5185 entry->vme_end - entry->vme_start);
5186 #endif /* NO_NESTED_PMAP */
5187 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5188 /* clean up parent map/maps */
5189 vm_map_submap_pmap_clean(
5190 map, entry->vme_start,
5191 entry->vme_end,
5192 entry->object.sub_map,
5193 entry->offset);
5194 }
5195 } else {
5196 vm_map_submap_pmap_clean(
5197 map, entry->vme_start, entry->vme_end,
5198 entry->object.sub_map,
5199 entry->offset);
5200 }
5201 } else if (entry->object.vm_object != kernel_object) {
5202 object = entry->object.vm_object;
5203 if((map->mapped_in_other_pmaps) && (map->ref_count)) {
5204 vm_object_pmap_protect(
5205 object, entry->offset,
5206 entry->vme_end - entry->vme_start,
5207 PMAP_NULL,
5208 entry->vme_start,
5209 VM_PROT_NONE);
5210 } else {
5211 pmap_remove(map->pmap,
5212 (addr64_t)entry->vme_start,
5213 (addr64_t)entry->vme_end);
5214 }
5215 }
5216
5217 /*
5218 * All pmap mappings for this map entry must have been
5219 * cleared by now.
5220 */
5221 assert(vm_map_pmap_is_empty(map,
5222 entry->vme_start,
5223 entry->vme_end));
5224
5225 next = entry->vme_next;
5226 s = next->vme_start;
5227 last_timestamp = map->timestamp;
5228
5229 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5230 zap_map != VM_MAP_NULL) {
5231 vm_map_size_t entry_size;
5232 /*
5233 * The caller wants to save the affected VM map entries
5234 * into the "zap_map". The caller will take care of
5235 * these entries.
5236 */
5237 /* unlink the entry from "map" ... */
5238 vm_map_store_entry_unlink(map, entry);
5239 /* ... and add it to the end of the "zap_map" */
5240 vm_map_store_entry_link(zap_map,
5241 vm_map_last_entry(zap_map),
5242 entry);
5243 entry_size = entry->vme_end - entry->vme_start;
5244 map->size -= entry_size;
5245 zap_map->size += entry_size;
5246 /* we didn't unlock the map, so no timestamp increase */
5247 last_timestamp--;
5248 } else {
5249 vm_map_entry_delete(map, entry);
5250 /* vm_map_entry_delete unlocks the map */
5251 vm_map_lock(map);
5252 }
5253
5254 entry = next;
5255
5256 if(entry == vm_map_to_entry(map)) {
5257 break;
5258 }
5259 if (last_timestamp+1 != map->timestamp) {
5260 /*
5261 * we are responsible for deleting everything
5262 * from the give space, if someone has interfered
5263 * we pick up where we left off, back fills should
5264 * be all right for anyone except map_delete and
5265 * we have to assume that the task has been fully
5266 * disabled before we get here
5267 */
5268 if (!vm_map_lookup_entry(map, s, &entry)){
5269 entry = entry->vme_next;
5270 s = entry->vme_start;
5271 } else {
5272 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5273 }
5274 /*
5275 * others can not only allocate behind us, we can
5276 * also see coalesce while we don't have the map lock
5277 */
5278 if(entry == vm_map_to_entry(map)) {
5279 break;
5280 }
5281 }
5282 last_timestamp = map->timestamp;
5283 }
5284
5285 if (map->wait_for_space)
5286 thread_wakeup((event_t) map);
5287 /*
5288 * wake up anybody waiting on entries that we have already deleted.
5289 */
5290 if (need_wakeup)
5291 vm_map_entry_wakeup(map);
5292
5293 return KERN_SUCCESS;
5294 }
5295
5296 /*
5297 * vm_map_remove:
5298 *
5299 * Remove the given address range from the target map.
5300 * This is the exported form of vm_map_delete.
5301 */
5302 kern_return_t
5303 vm_map_remove(
5304 register vm_map_t map,
5305 register vm_map_offset_t start,
5306 register vm_map_offset_t end,
5307 register boolean_t flags)
5308 {
5309 register kern_return_t result;
5310
5311 vm_map_lock(map);
5312 VM_MAP_RANGE_CHECK(map, start, end);
5313 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5314 vm_map_unlock(map);
5315
5316 return(result);
5317 }
5318
5319
5320 /*
5321 * Routine: vm_map_copy_discard
5322 *
5323 * Description:
5324 * Dispose of a map copy object (returned by
5325 * vm_map_copyin).
5326 */
5327 void
5328 vm_map_copy_discard(
5329 vm_map_copy_t copy)
5330 {
5331 if (copy == VM_MAP_COPY_NULL)
5332 return;
5333
5334 switch (copy->type) {
5335 case VM_MAP_COPY_ENTRY_LIST:
5336 while (vm_map_copy_first_entry(copy) !=
5337 vm_map_copy_to_entry(copy)) {
5338 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5339
5340 vm_map_copy_entry_unlink(copy, entry);
5341 vm_object_deallocate(entry->object.vm_object);
5342 vm_map_copy_entry_dispose(copy, entry);
5343 }
5344 break;
5345 case VM_MAP_COPY_OBJECT:
5346 vm_object_deallocate(copy->cpy_object);
5347 break;
5348 case VM_MAP_COPY_KERNEL_BUFFER:
5349
5350 /*
5351 * The vm_map_copy_t and possibly the data buffer were
5352 * allocated by a single call to kalloc(), i.e. the
5353 * vm_map_copy_t was not allocated out of the zone.
5354 */
5355 kfree(copy, copy->cpy_kalloc_size);
5356 return;
5357 }
5358 zfree(vm_map_copy_zone, copy);
5359 }
5360
5361 /*
5362 * Routine: vm_map_copy_copy
5363 *
5364 * Description:
5365 * Move the information in a map copy object to
5366 * a new map copy object, leaving the old one
5367 * empty.
5368 *
5369 * This is used by kernel routines that need
5370 * to look at out-of-line data (in copyin form)
5371 * before deciding whether to return SUCCESS.
5372 * If the routine returns FAILURE, the original
5373 * copy object will be deallocated; therefore,
5374 * these routines must make a copy of the copy
5375 * object and leave the original empty so that
5376 * deallocation will not fail.
5377 */
5378 vm_map_copy_t
5379 vm_map_copy_copy(
5380 vm_map_copy_t copy)
5381 {
5382 vm_map_copy_t new_copy;
5383
5384 if (copy == VM_MAP_COPY_NULL)
5385 return VM_MAP_COPY_NULL;
5386
5387 /*
5388 * Allocate a new copy object, and copy the information
5389 * from the old one into it.
5390 */
5391
5392 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5393 *new_copy = *copy;
5394
5395 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5396 /*
5397 * The links in the entry chain must be
5398 * changed to point to the new copy object.
5399 */
5400 vm_map_copy_first_entry(copy)->vme_prev
5401 = vm_map_copy_to_entry(new_copy);
5402 vm_map_copy_last_entry(copy)->vme_next
5403 = vm_map_copy_to_entry(new_copy);
5404 }
5405
5406 /*
5407 * Change the old copy object into one that contains
5408 * nothing to be deallocated.
5409 */
5410 copy->type = VM_MAP_COPY_OBJECT;
5411 copy->cpy_object = VM_OBJECT_NULL;
5412
5413 /*
5414 * Return the new object.
5415 */
5416 return new_copy;
5417 }
5418
5419 static kern_return_t
5420 vm_map_overwrite_submap_recurse(
5421 vm_map_t dst_map,
5422 vm_map_offset_t dst_addr,
5423 vm_map_size_t dst_size)
5424 {
5425 vm_map_offset_t dst_end;
5426 vm_map_entry_t tmp_entry;
5427 vm_map_entry_t entry;
5428 kern_return_t result;
5429 boolean_t encountered_sub_map = FALSE;
5430
5431
5432
5433 /*
5434 * Verify that the destination is all writeable
5435 * initially. We have to trunc the destination
5436 * address and round the copy size or we'll end up
5437 * splitting entries in strange ways.
5438 */
5439
5440 dst_end = vm_map_round_page(dst_addr + dst_size);
5441 vm_map_lock(dst_map);
5442
5443 start_pass_1:
5444 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5445 vm_map_unlock(dst_map);
5446 return(KERN_INVALID_ADDRESS);
5447 }
5448
5449 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5450 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5451
5452 for (entry = tmp_entry;;) {
5453 vm_map_entry_t next;
5454
5455 next = entry->vme_next;
5456 while(entry->is_sub_map) {
5457 vm_map_offset_t sub_start;
5458 vm_map_offset_t sub_end;
5459 vm_map_offset_t local_end;
5460
5461 if (entry->in_transition) {
5462 /*
5463 * Say that we are waiting, and wait for entry.
5464 */
5465 entry->needs_wakeup = TRUE;
5466 vm_map_entry_wait(dst_map, THREAD_UNINT);
5467
5468 goto start_pass_1;
5469 }
5470
5471 encountered_sub_map = TRUE;
5472 sub_start = entry->offset;
5473
5474 if(entry->vme_end < dst_end)
5475 sub_end = entry->vme_end;
5476 else
5477 sub_end = dst_end;
5478 sub_end -= entry->vme_start;
5479 sub_end += entry->offset;
5480 local_end = entry->vme_end;
5481 vm_map_unlock(dst_map);
5482
5483 result = vm_map_overwrite_submap_recurse(
5484 entry->object.sub_map,
5485 sub_start,
5486 sub_end - sub_start);
5487
5488 if(result != KERN_SUCCESS)
5489 return result;
5490 if (dst_end <= entry->vme_end)
5491 return KERN_SUCCESS;
5492 vm_map_lock(dst_map);
5493 if(!vm_map_lookup_entry(dst_map, local_end,
5494 &tmp_entry)) {
5495 vm_map_unlock(dst_map);
5496 return(KERN_INVALID_ADDRESS);
5497 }
5498 entry = tmp_entry;
5499 next = entry->vme_next;
5500 }
5501
5502 if ( ! (entry->protection & VM_PROT_WRITE)) {
5503 vm_map_unlock(dst_map);
5504 return(KERN_PROTECTION_FAILURE);
5505 }
5506
5507 /*
5508 * If the entry is in transition, we must wait
5509 * for it to exit that state. Anything could happen
5510 * when we unlock the map, so start over.
5511 */
5512 if (entry->in_transition) {
5513
5514 /*
5515 * Say that we are waiting, and wait for entry.
5516 */
5517 entry->needs_wakeup = TRUE;
5518 vm_map_entry_wait(dst_map, THREAD_UNINT);
5519
5520 goto start_pass_1;
5521 }
5522
5523 /*
5524 * our range is contained completely within this map entry
5525 */
5526 if (dst_end <= entry->vme_end) {
5527 vm_map_unlock(dst_map);
5528 return KERN_SUCCESS;
5529 }
5530 /*
5531 * check that range specified is contiguous region
5532 */
5533 if ((next == vm_map_to_entry(dst_map)) ||
5534 (next->vme_start != entry->vme_end)) {
5535 vm_map_unlock(dst_map);
5536 return(KERN_INVALID_ADDRESS);
5537 }
5538
5539 /*
5540 * Check for permanent objects in the destination.
5541 */
5542 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5543 ((!entry->object.vm_object->internal) ||
5544 (entry->object.vm_object->true_share))) {
5545 if(encountered_sub_map) {
5546 vm_map_unlock(dst_map);
5547 return(KERN_FAILURE);
5548 }
5549 }
5550
5551
5552 entry = next;
5553 }/* for */
5554 vm_map_unlock(dst_map);
5555 return(KERN_SUCCESS);
5556 }
5557
5558 /*
5559 * Routine: vm_map_copy_overwrite
5560 *
5561 * Description:
5562 * Copy the memory described by the map copy
5563 * object (copy; returned by vm_map_copyin) onto
5564 * the specified destination region (dst_map, dst_addr).
5565 * The destination must be writeable.
5566 *
5567 * Unlike vm_map_copyout, this routine actually
5568 * writes over previously-mapped memory. If the
5569 * previous mapping was to a permanent (user-supplied)
5570 * memory object, it is preserved.
5571 *
5572 * The attributes (protection and inheritance) of the
5573 * destination region are preserved.
5574 *
5575 * If successful, consumes the copy object.
5576 * Otherwise, the caller is responsible for it.
5577 *
5578 * Implementation notes:
5579 * To overwrite aligned temporary virtual memory, it is
5580 * sufficient to remove the previous mapping and insert
5581 * the new copy. This replacement is done either on
5582 * the whole region (if no permanent virtual memory
5583 * objects are embedded in the destination region) or
5584 * in individual map entries.
5585 *
5586 * To overwrite permanent virtual memory , it is necessary
5587 * to copy each page, as the external memory management
5588 * interface currently does not provide any optimizations.
5589 *
5590 * Unaligned memory also has to be copied. It is possible
5591 * to use 'vm_trickery' to copy the aligned data. This is
5592 * not done but not hard to implement.
5593 *
5594 * Once a page of permanent memory has been overwritten,
5595 * it is impossible to interrupt this function; otherwise,
5596 * the call would be neither atomic nor location-independent.
5597 * The kernel-state portion of a user thread must be
5598 * interruptible.
5599 *
5600 * It may be expensive to forward all requests that might
5601 * overwrite permanent memory (vm_write, vm_copy) to
5602 * uninterruptible kernel threads. This routine may be
5603 * called by interruptible threads; however, success is
5604 * not guaranteed -- if the request cannot be performed
5605 * atomically and interruptibly, an error indication is
5606 * returned.
5607 */
5608
5609 static kern_return_t
5610 vm_map_copy_overwrite_nested(
5611 vm_map_t dst_map,
5612 vm_map_address_t dst_addr,
5613 vm_map_copy_t copy,
5614 boolean_t interruptible,
5615 pmap_t pmap,
5616 boolean_t discard_on_success)
5617 {
5618 vm_map_offset_t dst_end;
5619 vm_map_entry_t tmp_entry;
5620 vm_map_entry_t entry;
5621 kern_return_t kr;
5622 boolean_t aligned = TRUE;
5623 boolean_t contains_permanent_objects = FALSE;
5624 boolean_t encountered_sub_map = FALSE;
5625 vm_map_offset_t base_addr;
5626 vm_map_size_t copy_size;
5627 vm_map_size_t total_size;
5628
5629
5630 /*
5631 * Check for null copy object.
5632 */
5633
5634 if (copy == VM_MAP_COPY_NULL)
5635 return(KERN_SUCCESS);
5636
5637 /*
5638 * Check for special kernel buffer allocated
5639 * by new_ipc_kmsg_copyin.
5640 */
5641
5642 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5643 return(vm_map_copyout_kernel_buffer(
5644 dst_map, &dst_addr,
5645 copy, TRUE));
5646 }
5647
5648 /*
5649 * Only works for entry lists at the moment. Will
5650 * support page lists later.
5651 */
5652
5653 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5654
5655 if (copy->size == 0) {
5656 if (discard_on_success)
5657 vm_map_copy_discard(copy);
5658 return(KERN_SUCCESS);
5659 }
5660
5661 /*
5662 * Verify that the destination is all writeable
5663 * initially. We have to trunc the destination
5664 * address and round the copy size or we'll end up
5665 * splitting entries in strange ways.
5666 */
5667
5668 if (!page_aligned(copy->size) ||
5669 !page_aligned (copy->offset) ||
5670 !page_aligned (dst_addr))
5671 {
5672 aligned = FALSE;
5673 dst_end = vm_map_round_page(dst_addr + copy->size);
5674 } else {
5675 dst_end = dst_addr + copy->size;
5676 }
5677
5678 vm_map_lock(dst_map);
5679
5680 /* LP64todo - remove this check when vm_map_commpage64()
5681 * no longer has to stuff in a map_entry for the commpage
5682 * above the map's max_offset.
5683 */
5684 if (dst_addr >= dst_map->max_offset) {
5685 vm_map_unlock(dst_map);
5686 return(KERN_INVALID_ADDRESS);
5687 }
5688
5689 start_pass_1:
5690 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5691 vm_map_unlock(dst_map);
5692 return(KERN_INVALID_ADDRESS);
5693 }
5694 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5695 for (entry = tmp_entry;;) {
5696 vm_map_entry_t next = entry->vme_next;
5697
5698 while(entry->is_sub_map) {
5699 vm_map_offset_t sub_start;
5700 vm_map_offset_t sub_end;
5701 vm_map_offset_t local_end;
5702
5703 if (entry->in_transition) {
5704
5705 /*
5706 * Say that we are waiting, and wait for entry.
5707 */
5708 entry->needs_wakeup = TRUE;
5709 vm_map_entry_wait(dst_map, THREAD_UNINT);
5710
5711 goto start_pass_1;
5712 }
5713
5714 local_end = entry->vme_end;
5715 if (!(entry->needs_copy)) {
5716 /* if needs_copy we are a COW submap */
5717 /* in such a case we just replace so */
5718 /* there is no need for the follow- */
5719 /* ing check. */
5720 encountered_sub_map = TRUE;
5721 sub_start = entry->offset;
5722
5723 if(entry->vme_end < dst_end)
5724 sub_end = entry->vme_end;
5725 else
5726 sub_end = dst_end;
5727 sub_end -= entry->vme_start;
5728 sub_end += entry->offset;
5729 vm_map_unlock(dst_map);
5730
5731 kr = vm_map_overwrite_submap_recurse(
5732 entry->object.sub_map,
5733 sub_start,
5734 sub_end - sub_start);
5735 if(kr != KERN_SUCCESS)
5736 return kr;
5737 vm_map_lock(dst_map);
5738 }
5739
5740 if (dst_end <= entry->vme_end)
5741 goto start_overwrite;
5742 if(!vm_map_lookup_entry(dst_map, local_end,
5743 &entry)) {
5744 vm_map_unlock(dst_map);
5745 return(KERN_INVALID_ADDRESS);
5746 }
5747 next = entry->vme_next;
5748 }
5749
5750 if ( ! (entry->protection & VM_PROT_WRITE)) {
5751 vm_map_unlock(dst_map);
5752 return(KERN_PROTECTION_FAILURE);
5753 }
5754
5755 /*
5756 * If the entry is in transition, we must wait
5757 * for it to exit that state. Anything could happen
5758 * when we unlock the map, so start over.
5759 */
5760 if (entry->in_transition) {
5761
5762 /*
5763 * Say that we are waiting, and wait for entry.
5764 */
5765 entry->needs_wakeup = TRUE;
5766 vm_map_entry_wait(dst_map, THREAD_UNINT);
5767
5768 goto start_pass_1;
5769 }
5770
5771 /*
5772 * our range is contained completely within this map entry
5773 */
5774 if (dst_end <= entry->vme_end)
5775 break;
5776 /*
5777 * check that range specified is contiguous region
5778 */
5779 if ((next == vm_map_to_entry(dst_map)) ||
5780 (next->vme_start != entry->vme_end)) {
5781 vm_map_unlock(dst_map);
5782 return(KERN_INVALID_ADDRESS);
5783 }
5784
5785
5786 /*
5787 * Check for permanent objects in the destination.
5788 */
5789 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5790 ((!entry->object.vm_object->internal) ||
5791 (entry->object.vm_object->true_share))) {
5792 contains_permanent_objects = TRUE;
5793 }
5794
5795 entry = next;
5796 }/* for */
5797
5798 start_overwrite:
5799 /*
5800 * If there are permanent objects in the destination, then
5801 * the copy cannot be interrupted.
5802 */
5803
5804 if (interruptible && contains_permanent_objects) {
5805 vm_map_unlock(dst_map);
5806 return(KERN_FAILURE); /* XXX */
5807 }
5808
5809 /*
5810 *
5811 * Make a second pass, overwriting the data
5812 * At the beginning of each loop iteration,
5813 * the next entry to be overwritten is "tmp_entry"
5814 * (initially, the value returned from the lookup above),
5815 * and the starting address expected in that entry
5816 * is "start".
5817 */
5818
5819 total_size = copy->size;
5820 if(encountered_sub_map) {
5821 copy_size = 0;
5822 /* re-calculate tmp_entry since we've had the map */
5823 /* unlocked */
5824 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5825 vm_map_unlock(dst_map);
5826 return(KERN_INVALID_ADDRESS);
5827 }
5828 } else {
5829 copy_size = copy->size;
5830 }
5831
5832 base_addr = dst_addr;
5833 while(TRUE) {
5834 /* deconstruct the copy object and do in parts */
5835 /* only in sub_map, interruptable case */
5836 vm_map_entry_t copy_entry;
5837 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5838 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5839 int nentries;
5840 int remaining_entries = 0;
5841 vm_map_offset_t new_offset = 0;
5842
5843 for (entry = tmp_entry; copy_size == 0;) {
5844 vm_map_entry_t next;
5845
5846 next = entry->vme_next;
5847
5848 /* tmp_entry and base address are moved along */
5849 /* each time we encounter a sub-map. Otherwise */
5850 /* entry can outpase tmp_entry, and the copy_size */
5851 /* may reflect the distance between them */
5852 /* if the current entry is found to be in transition */
5853 /* we will start over at the beginning or the last */
5854 /* encounter of a submap as dictated by base_addr */
5855 /* we will zero copy_size accordingly. */
5856 if (entry->in_transition) {
5857 /*
5858 * Say that we are waiting, and wait for entry.
5859 */
5860 entry->needs_wakeup = TRUE;
5861 vm_map_entry_wait(dst_map, THREAD_UNINT);
5862
5863 if(!vm_map_lookup_entry(dst_map, base_addr,
5864 &tmp_entry)) {
5865 vm_map_unlock(dst_map);
5866 return(KERN_INVALID_ADDRESS);
5867 }
5868 copy_size = 0;
5869 entry = tmp_entry;
5870 continue;
5871 }
5872 if(entry->is_sub_map) {
5873 vm_map_offset_t sub_start;
5874 vm_map_offset_t sub_end;
5875 vm_map_offset_t local_end;
5876
5877 if (entry->needs_copy) {
5878 /* if this is a COW submap */
5879 /* just back the range with a */
5880 /* anonymous entry */
5881 if(entry->vme_end < dst_end)
5882 sub_end = entry->vme_end;
5883 else
5884 sub_end = dst_end;
5885 if(entry->vme_start < base_addr)
5886 sub_start = base_addr;
5887 else
5888 sub_start = entry->vme_start;
5889 vm_map_clip_end(
5890 dst_map, entry, sub_end);
5891 vm_map_clip_start(
5892 dst_map, entry, sub_start);
5893 assert(!entry->use_pmap);
5894 entry->is_sub_map = FALSE;
5895 vm_map_deallocate(
5896 entry->object.sub_map);
5897 entry->object.sub_map = NULL;
5898 entry->is_shared = FALSE;
5899 entry->needs_copy = FALSE;
5900 entry->offset = 0;
5901 /*
5902 * XXX FBDP
5903 * We should propagate the protections
5904 * of the submap entry here instead
5905 * of forcing them to VM_PROT_ALL...
5906 * Or better yet, we should inherit
5907 * the protection of the copy_entry.
5908 */
5909 entry->protection = VM_PROT_ALL;
5910 entry->max_protection = VM_PROT_ALL;
5911 entry->wired_count = 0;
5912 entry->user_wired_count = 0;
5913 if(entry->inheritance
5914 == VM_INHERIT_SHARE)
5915 entry->inheritance = VM_INHERIT_COPY;
5916 continue;
5917 }
5918 /* first take care of any non-sub_map */
5919 /* entries to send */
5920 if(base_addr < entry->vme_start) {
5921 /* stuff to send */
5922 copy_size =
5923 entry->vme_start - base_addr;
5924 break;
5925 }
5926 sub_start = entry->offset;
5927
5928 if(entry->vme_end < dst_end)
5929 sub_end = entry->vme_end;
5930 else
5931 sub_end = dst_end;
5932 sub_end -= entry->vme_start;
5933 sub_end += entry->offset;
5934 local_end = entry->vme_end;
5935 vm_map_unlock(dst_map);
5936 copy_size = sub_end - sub_start;
5937
5938 /* adjust the copy object */
5939 if (total_size > copy_size) {
5940 vm_map_size_t local_size = 0;
5941 vm_map_size_t entry_size;
5942
5943 nentries = 1;
5944 new_offset = copy->offset;
5945 copy_entry = vm_map_copy_first_entry(copy);
5946 while(copy_entry !=
5947 vm_map_copy_to_entry(copy)){
5948 entry_size = copy_entry->vme_end -
5949 copy_entry->vme_start;
5950 if((local_size < copy_size) &&
5951 ((local_size + entry_size)
5952 >= copy_size)) {
5953 vm_map_copy_clip_end(copy,
5954 copy_entry,
5955 copy_entry->vme_start +
5956 (copy_size - local_size));
5957 entry_size = copy_entry->vme_end -
5958 copy_entry->vme_start;
5959 local_size += entry_size;
5960 new_offset += entry_size;
5961 }
5962 if(local_size >= copy_size) {
5963 next_copy = copy_entry->vme_next;
5964 copy_entry->vme_next =
5965 vm_map_copy_to_entry(copy);
5966 previous_prev =
5967 copy->cpy_hdr.links.prev;
5968 copy->cpy_hdr.links.prev = copy_entry;
5969 copy->size = copy_size;
5970 remaining_entries =
5971 copy->cpy_hdr.nentries;
5972 remaining_entries -= nentries;
5973 copy->cpy_hdr.nentries = nentries;
5974 break;
5975 } else {
5976 local_size += entry_size;
5977 new_offset += entry_size;
5978 nentries++;
5979 }
5980 copy_entry = copy_entry->vme_next;
5981 }
5982 }
5983
5984 if((entry->use_pmap) && (pmap == NULL)) {
5985 kr = vm_map_copy_overwrite_nested(
5986 entry->object.sub_map,
5987 sub_start,
5988 copy,
5989 interruptible,
5990 entry->object.sub_map->pmap,
5991 TRUE);
5992 } else if (pmap != NULL) {
5993 kr = vm_map_copy_overwrite_nested(
5994 entry->object.sub_map,
5995 sub_start,
5996 copy,
5997 interruptible, pmap,
5998 TRUE);
5999 } else {
6000 kr = vm_map_copy_overwrite_nested(
6001 entry->object.sub_map,
6002 sub_start,
6003 copy,
6004 interruptible,
6005 dst_map->pmap,
6006 TRUE);
6007 }
6008 if(kr != KERN_SUCCESS) {
6009 if(next_copy != NULL) {
6010 copy->cpy_hdr.nentries +=
6011 remaining_entries;
6012 copy->cpy_hdr.links.prev->vme_next =
6013 next_copy;
6014 copy->cpy_hdr.links.prev
6015 = previous_prev;
6016 copy->size = total_size;
6017 }
6018 return kr;
6019 }
6020 if (dst_end <= local_end) {
6021 return(KERN_SUCCESS);
6022 }
6023 /* otherwise copy no longer exists, it was */
6024 /* destroyed after successful copy_overwrite */
6025 copy = (vm_map_copy_t)
6026 zalloc(vm_map_copy_zone);
6027 vm_map_copy_first_entry(copy) =
6028 vm_map_copy_last_entry(copy) =
6029 vm_map_copy_to_entry(copy);
6030 copy->type = VM_MAP_COPY_ENTRY_LIST;
6031 copy->offset = new_offset;
6032
6033 /*
6034 * XXX FBDP
6035 * this does not seem to deal with
6036 * the VM map store (R&B tree)
6037 */
6038
6039 total_size -= copy_size;
6040 copy_size = 0;
6041 /* put back remainder of copy in container */
6042 if(next_copy != NULL) {
6043 copy->cpy_hdr.nentries = remaining_entries;
6044 copy->cpy_hdr.links.next = next_copy;
6045 copy->cpy_hdr.links.prev = previous_prev;
6046 copy->size = total_size;
6047 next_copy->vme_prev =
6048 vm_map_copy_to_entry(copy);
6049 next_copy = NULL;
6050 }
6051 base_addr = local_end;
6052 vm_map_lock(dst_map);
6053 if(!vm_map_lookup_entry(dst_map,
6054 local_end, &tmp_entry)) {
6055 vm_map_unlock(dst_map);
6056 return(KERN_INVALID_ADDRESS);
6057 }
6058 entry = tmp_entry;
6059 continue;
6060 }
6061 if (dst_end <= entry->vme_end) {
6062 copy_size = dst_end - base_addr;
6063 break;
6064 }
6065
6066 if ((next == vm_map_to_entry(dst_map)) ||
6067 (next->vme_start != entry->vme_end)) {
6068 vm_map_unlock(dst_map);
6069 return(KERN_INVALID_ADDRESS);
6070 }
6071
6072 entry = next;
6073 }/* for */
6074
6075 next_copy = NULL;
6076 nentries = 1;
6077
6078 /* adjust the copy object */
6079 if (total_size > copy_size) {
6080 vm_map_size_t local_size = 0;
6081 vm_map_size_t entry_size;
6082
6083 new_offset = copy->offset;
6084 copy_entry = vm_map_copy_first_entry(copy);
6085 while(copy_entry != vm_map_copy_to_entry(copy)) {
6086 entry_size = copy_entry->vme_end -
6087 copy_entry->vme_start;
6088 if((local_size < copy_size) &&
6089 ((local_size + entry_size)
6090 >= copy_size)) {
6091 vm_map_copy_clip_end(copy, copy_entry,
6092 copy_entry->vme_start +
6093 (copy_size - local_size));
6094 entry_size = copy_entry->vme_end -
6095 copy_entry->vme_start;
6096 local_size += entry_size;
6097 new_offset += entry_size;
6098 }
6099 if(local_size >= copy_size) {
6100 next_copy = copy_entry->vme_next;
6101 copy_entry->vme_next =
6102 vm_map_copy_to_entry(copy);
6103 previous_prev =
6104 copy->cpy_hdr.links.prev;
6105 copy->cpy_hdr.links.prev = copy_entry;
6106 copy->size = copy_size;
6107 remaining_entries =
6108 copy->cpy_hdr.nentries;
6109 remaining_entries -= nentries;
6110 copy->cpy_hdr.nentries = nentries;
6111 break;
6112 } else {
6113 local_size += entry_size;
6114 new_offset += entry_size;
6115 nentries++;
6116 }
6117 copy_entry = copy_entry->vme_next;
6118 }
6119 }
6120
6121 if (aligned) {
6122 pmap_t local_pmap;
6123
6124 if(pmap)
6125 local_pmap = pmap;
6126 else
6127 local_pmap = dst_map->pmap;
6128
6129 if ((kr = vm_map_copy_overwrite_aligned(
6130 dst_map, tmp_entry, copy,
6131 base_addr, local_pmap)) != KERN_SUCCESS) {
6132 if(next_copy != NULL) {
6133 copy->cpy_hdr.nentries +=
6134 remaining_entries;
6135 copy->cpy_hdr.links.prev->vme_next =
6136 next_copy;
6137 copy->cpy_hdr.links.prev =
6138 previous_prev;
6139 copy->size += copy_size;
6140 }
6141 return kr;
6142 }
6143 vm_map_unlock(dst_map);
6144 } else {
6145 /*
6146 * Performance gain:
6147 *
6148 * if the copy and dst address are misaligned but the same
6149 * offset within the page we can copy_not_aligned the
6150 * misaligned parts and copy aligned the rest. If they are
6151 * aligned but len is unaligned we simply need to copy
6152 * the end bit unaligned. We'll need to split the misaligned
6153 * bits of the region in this case !
6154 */
6155 /* ALWAYS UNLOCKS THE dst_map MAP */
6156 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
6157 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6158 if(next_copy != NULL) {
6159 copy->cpy_hdr.nentries +=
6160 remaining_entries;
6161 copy->cpy_hdr.links.prev->vme_next =
6162 next_copy;
6163 copy->cpy_hdr.links.prev =
6164 previous_prev;
6165 copy->size += copy_size;
6166 }
6167 return kr;
6168 }
6169 }
6170 total_size -= copy_size;
6171 if(total_size == 0)
6172 break;
6173 base_addr += copy_size;
6174 copy_size = 0;
6175 copy->offset = new_offset;
6176 if(next_copy != NULL) {
6177 copy->cpy_hdr.nentries = remaining_entries;
6178 copy->cpy_hdr.links.next = next_copy;
6179 copy->cpy_hdr.links.prev = previous_prev;
6180 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6181 copy->size = total_size;
6182 }
6183 vm_map_lock(dst_map);
6184 while(TRUE) {
6185 if (!vm_map_lookup_entry(dst_map,
6186 base_addr, &tmp_entry)) {
6187 vm_map_unlock(dst_map);
6188 return(KERN_INVALID_ADDRESS);
6189 }
6190 if (tmp_entry->in_transition) {
6191 entry->needs_wakeup = TRUE;
6192 vm_map_entry_wait(dst_map, THREAD_UNINT);
6193 } else {
6194 break;
6195 }
6196 }
6197 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6198
6199 entry = tmp_entry;
6200 } /* while */
6201
6202 /*
6203 * Throw away the vm_map_copy object
6204 */
6205 if (discard_on_success)
6206 vm_map_copy_discard(copy);
6207
6208 return(KERN_SUCCESS);
6209 }/* vm_map_copy_overwrite */
6210
6211 kern_return_t
6212 vm_map_copy_overwrite(
6213 vm_map_t dst_map,
6214 vm_map_offset_t dst_addr,
6215 vm_map_copy_t copy,
6216 boolean_t interruptible)
6217 {
6218 vm_map_size_t head_size, tail_size;
6219 vm_map_copy_t head_copy, tail_copy;
6220 vm_map_offset_t head_addr, tail_addr;
6221 vm_map_entry_t entry;
6222 kern_return_t kr;
6223
6224 head_size = 0;
6225 tail_size = 0;
6226 head_copy = NULL;
6227 tail_copy = NULL;
6228 head_addr = 0;
6229 tail_addr = 0;
6230
6231 if (interruptible ||
6232 copy == VM_MAP_COPY_NULL ||
6233 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6234 /*
6235 * We can't split the "copy" map if we're interruptible
6236 * or if we don't have a "copy" map...
6237 */
6238 blunt_copy:
6239 return vm_map_copy_overwrite_nested(dst_map,
6240 dst_addr,
6241 copy,
6242 interruptible,
6243 (pmap_t) NULL,
6244 TRUE);
6245 }
6246
6247 if (copy->size < 3 * PAGE_SIZE) {
6248 /*
6249 * Too small to bother with optimizing...
6250 */
6251 goto blunt_copy;
6252 }
6253
6254 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6255 /*
6256 * Incompatible mis-alignment of source and destination...
6257 */
6258 goto blunt_copy;
6259 }
6260
6261 /*
6262 * Proper alignment or identical mis-alignment at the beginning.
6263 * Let's try and do a small unaligned copy first (if needed)
6264 * and then an aligned copy for the rest.
6265 */
6266 if (!page_aligned(dst_addr)) {
6267 head_addr = dst_addr;
6268 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6269 }
6270 if (!page_aligned(copy->offset + copy->size)) {
6271 /*
6272 * Mis-alignment at the end.
6273 * Do an aligned copy up to the last page and
6274 * then an unaligned copy for the remaining bytes.
6275 */
6276 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6277 tail_addr = dst_addr + copy->size - tail_size;
6278 }
6279
6280 if (head_size + tail_size == copy->size) {
6281 /*
6282 * It's all unaligned, no optimization possible...
6283 */
6284 goto blunt_copy;
6285 }
6286
6287 /*
6288 * Can't optimize if there are any submaps in the
6289 * destination due to the way we free the "copy" map
6290 * progressively in vm_map_copy_overwrite_nested()
6291 * in that case.
6292 */
6293 vm_map_lock_read(dst_map);
6294 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6295 vm_map_unlock_read(dst_map);
6296 goto blunt_copy;
6297 }
6298 for (;
6299 (entry != vm_map_copy_to_entry(copy) &&
6300 entry->vme_start < dst_addr + copy->size);
6301 entry = entry->vme_next) {
6302 if (entry->is_sub_map) {
6303 vm_map_unlock_read(dst_map);
6304 goto blunt_copy;
6305 }
6306 }
6307 vm_map_unlock_read(dst_map);
6308
6309 if (head_size) {
6310 /*
6311 * Unaligned copy of the first "head_size" bytes, to reach
6312 * a page boundary.
6313 */
6314
6315 /*
6316 * Extract "head_copy" out of "copy".
6317 */
6318 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6319 vm_map_copy_first_entry(head_copy) =
6320 vm_map_copy_to_entry(head_copy);
6321 vm_map_copy_last_entry(head_copy) =
6322 vm_map_copy_to_entry(head_copy);
6323 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6324 head_copy->cpy_hdr.nentries = 0;
6325 head_copy->cpy_hdr.entries_pageable =
6326 copy->cpy_hdr.entries_pageable;
6327 vm_map_store_init(&head_copy->cpy_hdr);
6328
6329 head_copy->offset = copy->offset;
6330 head_copy->size = head_size;
6331
6332 copy->offset += head_size;
6333 copy->size -= head_size;
6334
6335 entry = vm_map_copy_first_entry(copy);
6336 vm_map_copy_clip_end(copy, entry, copy->offset);
6337 vm_map_copy_entry_unlink(copy, entry);
6338 vm_map_copy_entry_link(head_copy,
6339 vm_map_copy_to_entry(head_copy),
6340 entry);
6341
6342 /*
6343 * Do the unaligned copy.
6344 */
6345 kr = vm_map_copy_overwrite_nested(dst_map,
6346 head_addr,
6347 head_copy,
6348 interruptible,
6349 (pmap_t) NULL,
6350 FALSE);
6351 if (kr != KERN_SUCCESS)
6352 goto done;
6353 }
6354
6355 if (tail_size) {
6356 /*
6357 * Extract "tail_copy" out of "copy".
6358 */
6359 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6360 vm_map_copy_first_entry(tail_copy) =
6361 vm_map_copy_to_entry(tail_copy);
6362 vm_map_copy_last_entry(tail_copy) =
6363 vm_map_copy_to_entry(tail_copy);
6364 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6365 tail_copy->cpy_hdr.nentries = 0;
6366 tail_copy->cpy_hdr.entries_pageable =
6367 copy->cpy_hdr.entries_pageable;
6368 vm_map_store_init(&tail_copy->cpy_hdr);
6369
6370 tail_copy->offset = copy->offset + copy->size - tail_size;
6371 tail_copy->size = tail_size;
6372
6373 copy->size -= tail_size;
6374
6375 entry = vm_map_copy_last_entry(copy);
6376 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6377 entry = vm_map_copy_last_entry(copy);
6378 vm_map_copy_entry_unlink(copy, entry);
6379 vm_map_copy_entry_link(tail_copy,
6380 vm_map_copy_last_entry(tail_copy),
6381 entry);
6382 }
6383
6384 /*
6385 * Copy most (or possibly all) of the data.
6386 */
6387 kr = vm_map_copy_overwrite_nested(dst_map,
6388 dst_addr + head_size,
6389 copy,
6390 interruptible,
6391 (pmap_t) NULL,
6392 FALSE);
6393 if (kr != KERN_SUCCESS) {
6394 goto done;
6395 }
6396
6397 if (tail_size) {
6398 kr = vm_map_copy_overwrite_nested(dst_map,
6399 tail_addr,
6400 tail_copy,
6401 interruptible,
6402 (pmap_t) NULL,
6403 FALSE);
6404 }
6405
6406 done:
6407 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6408 if (kr == KERN_SUCCESS) {
6409 /*
6410 * Discard all the copy maps.
6411 */
6412 if (head_copy) {
6413 vm_map_copy_discard(head_copy);
6414 head_copy = NULL;
6415 }
6416 vm_map_copy_discard(copy);
6417 if (tail_copy) {
6418 vm_map_copy_discard(tail_copy);
6419 tail_copy = NULL;
6420 }
6421 } else {
6422 /*
6423 * Re-assemble the original copy map.
6424 */
6425 if (head_copy) {
6426 entry = vm_map_copy_first_entry(head_copy);
6427 vm_map_copy_entry_unlink(head_copy, entry);
6428 vm_map_copy_entry_link(copy,
6429 vm_map_copy_to_entry(copy),
6430 entry);
6431 copy->offset -= head_size;
6432 copy->size += head_size;
6433 vm_map_copy_discard(head_copy);
6434 head_copy = NULL;
6435 }
6436 if (tail_copy) {
6437 entry = vm_map_copy_last_entry(tail_copy);
6438 vm_map_copy_entry_unlink(tail_copy, entry);
6439 vm_map_copy_entry_link(copy,
6440 vm_map_copy_last_entry(copy),
6441 entry);
6442 copy->size += tail_size;
6443 vm_map_copy_discard(tail_copy);
6444 tail_copy = NULL;
6445 }
6446 }
6447 return kr;
6448 }
6449
6450
6451 /*
6452 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6453 *
6454 * Decription:
6455 * Physically copy unaligned data
6456 *
6457 * Implementation:
6458 * Unaligned parts of pages have to be physically copied. We use
6459 * a modified form of vm_fault_copy (which understands none-aligned
6460 * page offsets and sizes) to do the copy. We attempt to copy as
6461 * much memory in one go as possibly, however vm_fault_copy copies
6462 * within 1 memory object so we have to find the smaller of "amount left"
6463 * "source object data size" and "target object data size". With
6464 * unaligned data we don't need to split regions, therefore the source
6465 * (copy) object should be one map entry, the target range may be split
6466 * over multiple map entries however. In any event we are pessimistic
6467 * about these assumptions.
6468 *
6469 * Assumptions:
6470 * dst_map is locked on entry and is return locked on success,
6471 * unlocked on error.
6472 */
6473
6474 static kern_return_t
6475 vm_map_copy_overwrite_unaligned(
6476 vm_map_t dst_map,
6477 vm_map_entry_t entry,
6478 vm_map_copy_t copy,
6479 vm_map_offset_t start)
6480 {
6481 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6482 vm_map_version_t version;
6483 vm_object_t dst_object;
6484 vm_object_offset_t dst_offset;
6485 vm_object_offset_t src_offset;
6486 vm_object_offset_t entry_offset;
6487 vm_map_offset_t entry_end;
6488 vm_map_size_t src_size,
6489 dst_size,
6490 copy_size,
6491 amount_left;
6492 kern_return_t kr = KERN_SUCCESS;
6493
6494 vm_map_lock_write_to_read(dst_map);
6495
6496 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6497 amount_left = copy->size;
6498 /*
6499 * unaligned so we never clipped this entry, we need the offset into
6500 * the vm_object not just the data.
6501 */
6502 while (amount_left > 0) {
6503
6504 if (entry == vm_map_to_entry(dst_map)) {
6505 vm_map_unlock_read(dst_map);
6506 return KERN_INVALID_ADDRESS;
6507 }
6508
6509 /* "start" must be within the current map entry */
6510 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6511
6512 dst_offset = start - entry->vme_start;
6513
6514 dst_size = entry->vme_end - start;
6515
6516 src_size = copy_entry->vme_end -
6517 (copy_entry->vme_start + src_offset);
6518
6519 if (dst_size < src_size) {
6520 /*
6521 * we can only copy dst_size bytes before
6522 * we have to get the next destination entry
6523 */
6524 copy_size = dst_size;
6525 } else {
6526 /*
6527 * we can only copy src_size bytes before
6528 * we have to get the next source copy entry
6529 */
6530 copy_size = src_size;
6531 }
6532
6533 if (copy_size > amount_left) {
6534 copy_size = amount_left;
6535 }
6536 /*
6537 * Entry needs copy, create a shadow shadow object for
6538 * Copy on write region.
6539 */
6540 if (entry->needs_copy &&
6541 ((entry->protection & VM_PROT_WRITE) != 0))
6542 {
6543 if (vm_map_lock_read_to_write(dst_map)) {
6544 vm_map_lock_read(dst_map);
6545 goto RetryLookup;
6546 }
6547 vm_object_shadow(&entry->object.vm_object,
6548 &entry->offset,
6549 (vm_map_size_t)(entry->vme_end
6550 - entry->vme_start));
6551 entry->needs_copy = FALSE;
6552 vm_map_lock_write_to_read(dst_map);
6553 }
6554 dst_object = entry->object.vm_object;
6555 /*
6556 * unlike with the virtual (aligned) copy we're going
6557 * to fault on it therefore we need a target object.
6558 */
6559 if (dst_object == VM_OBJECT_NULL) {
6560 if (vm_map_lock_read_to_write(dst_map)) {
6561 vm_map_lock_read(dst_map);
6562 goto RetryLookup;
6563 }
6564 dst_object = vm_object_allocate((vm_map_size_t)
6565 entry->vme_end - entry->vme_start);
6566 entry->object.vm_object = dst_object;
6567 entry->offset = 0;
6568 vm_map_lock_write_to_read(dst_map);
6569 }
6570 /*
6571 * Take an object reference and unlock map. The "entry" may
6572 * disappear or change when the map is unlocked.
6573 */
6574 vm_object_reference(dst_object);
6575 version.main_timestamp = dst_map->timestamp;
6576 entry_offset = entry->offset;
6577 entry_end = entry->vme_end;
6578 vm_map_unlock_read(dst_map);
6579 /*
6580 * Copy as much as possible in one pass
6581 */
6582 kr = vm_fault_copy(
6583 copy_entry->object.vm_object,
6584 copy_entry->offset + src_offset,
6585 &copy_size,
6586 dst_object,
6587 entry_offset + dst_offset,
6588 dst_map,
6589 &version,
6590 THREAD_UNINT );
6591
6592 start += copy_size;
6593 src_offset += copy_size;
6594 amount_left -= copy_size;
6595 /*
6596 * Release the object reference
6597 */
6598 vm_object_deallocate(dst_object);
6599 /*
6600 * If a hard error occurred, return it now
6601 */
6602 if (kr != KERN_SUCCESS)
6603 return kr;
6604
6605 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6606 || amount_left == 0)
6607 {
6608 /*
6609 * all done with this copy entry, dispose.
6610 */
6611 vm_map_copy_entry_unlink(copy, copy_entry);
6612 vm_object_deallocate(copy_entry->object.vm_object);
6613 vm_map_copy_entry_dispose(copy, copy_entry);
6614
6615 if ((copy_entry = vm_map_copy_first_entry(copy))
6616 == vm_map_copy_to_entry(copy) && amount_left) {
6617 /*
6618 * not finished copying but run out of source
6619 */
6620 return KERN_INVALID_ADDRESS;
6621 }
6622 src_offset = 0;
6623 }
6624
6625 if (amount_left == 0)
6626 return KERN_SUCCESS;
6627
6628 vm_map_lock_read(dst_map);
6629 if (version.main_timestamp == dst_map->timestamp) {
6630 if (start == entry_end) {
6631 /*
6632 * destination region is split. Use the version
6633 * information to avoid a lookup in the normal
6634 * case.
6635 */
6636 entry = entry->vme_next;
6637 /*
6638 * should be contiguous. Fail if we encounter
6639 * a hole in the destination.
6640 */
6641 if (start != entry->vme_start) {
6642 vm_map_unlock_read(dst_map);
6643 return KERN_INVALID_ADDRESS ;
6644 }
6645 }
6646 } else {
6647 /*
6648 * Map version check failed.
6649 * we must lookup the entry because somebody
6650 * might have changed the map behind our backs.
6651 */
6652 RetryLookup:
6653 if (!vm_map_lookup_entry(dst_map, start, &entry))
6654 {
6655 vm_map_unlock_read(dst_map);
6656 return KERN_INVALID_ADDRESS ;
6657 }
6658 }
6659 }/* while */
6660
6661 return KERN_SUCCESS;
6662 }/* vm_map_copy_overwrite_unaligned */
6663
6664 /*
6665 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6666 *
6667 * Description:
6668 * Does all the vm_trickery possible for whole pages.
6669 *
6670 * Implementation:
6671 *
6672 * If there are no permanent objects in the destination,
6673 * and the source and destination map entry zones match,
6674 * and the destination map entry is not shared,
6675 * then the map entries can be deleted and replaced
6676 * with those from the copy. The following code is the
6677 * basic idea of what to do, but there are lots of annoying
6678 * little details about getting protection and inheritance
6679 * right. Should add protection, inheritance, and sharing checks
6680 * to the above pass and make sure that no wiring is involved.
6681 */
6682
6683 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6684 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6685 int vm_map_copy_overwrite_aligned_src_large = 0;
6686
6687 static kern_return_t
6688 vm_map_copy_overwrite_aligned(
6689 vm_map_t dst_map,
6690 vm_map_entry_t tmp_entry,
6691 vm_map_copy_t copy,
6692 vm_map_offset_t start,
6693 __unused pmap_t pmap)
6694 {
6695 vm_object_t object;
6696 vm_map_entry_t copy_entry;
6697 vm_map_size_t copy_size;
6698 vm_map_size_t size;
6699 vm_map_entry_t entry;
6700
6701 while ((copy_entry = vm_map_copy_first_entry(copy))
6702 != vm_map_copy_to_entry(copy))
6703 {
6704 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6705
6706 entry = tmp_entry;
6707 assert(!entry->use_pmap); /* unnested when clipped earlier */
6708 if (entry == vm_map_to_entry(dst_map)) {
6709 vm_map_unlock(dst_map);
6710 return KERN_INVALID_ADDRESS;
6711 }
6712 size = (entry->vme_end - entry->vme_start);
6713 /*
6714 * Make sure that no holes popped up in the
6715 * address map, and that the protection is
6716 * still valid, in case the map was unlocked
6717 * earlier.
6718 */
6719
6720 if ((entry->vme_start != start) || ((entry->is_sub_map)
6721 && !entry->needs_copy)) {
6722 vm_map_unlock(dst_map);
6723 return(KERN_INVALID_ADDRESS);
6724 }
6725 assert(entry != vm_map_to_entry(dst_map));
6726
6727 /*
6728 * Check protection again
6729 */
6730
6731 if ( ! (entry->protection & VM_PROT_WRITE)) {
6732 vm_map_unlock(dst_map);
6733 return(KERN_PROTECTION_FAILURE);
6734 }
6735
6736 /*
6737 * Adjust to source size first
6738 */
6739
6740 if (copy_size < size) {
6741 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6742 size = copy_size;
6743 }
6744
6745 /*
6746 * Adjust to destination size
6747 */
6748
6749 if (size < copy_size) {
6750 vm_map_copy_clip_end(copy, copy_entry,
6751 copy_entry->vme_start + size);
6752 copy_size = size;
6753 }
6754
6755 assert((entry->vme_end - entry->vme_start) == size);
6756 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6757 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6758
6759 /*
6760 * If the destination contains temporary unshared memory,
6761 * we can perform the copy by throwing it away and
6762 * installing the source data.
6763 */
6764
6765 object = entry->object.vm_object;
6766 if ((!entry->is_shared &&
6767 ((object == VM_OBJECT_NULL) ||
6768 (object->internal && !object->true_share))) ||
6769 entry->needs_copy) {
6770 vm_object_t old_object = entry->object.vm_object;
6771 vm_object_offset_t old_offset = entry->offset;
6772 vm_object_offset_t offset;
6773
6774 /*
6775 * Ensure that the source and destination aren't
6776 * identical
6777 */
6778 if (old_object == copy_entry->object.vm_object &&
6779 old_offset == copy_entry->offset) {
6780 vm_map_copy_entry_unlink(copy, copy_entry);
6781 vm_map_copy_entry_dispose(copy, copy_entry);
6782
6783 if (old_object != VM_OBJECT_NULL)
6784 vm_object_deallocate(old_object);
6785
6786 start = tmp_entry->vme_end;
6787 tmp_entry = tmp_entry->vme_next;
6788 continue;
6789 }
6790
6791 #if !CONFIG_EMBEDDED
6792 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6793 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
6794 if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6795 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6796 copy_size <= __TRADEOFF1_COPY_SIZE) {
6797 /*
6798 * Virtual vs. Physical copy tradeoff #1.
6799 *
6800 * Copying only a few pages out of a large
6801 * object: do a physical copy instead of
6802 * a virtual copy, to avoid possibly keeping
6803 * the entire large object alive because of
6804 * those few copy-on-write pages.
6805 */
6806 vm_map_copy_overwrite_aligned_src_large++;
6807 goto slow_copy;
6808 }
6809 #endif /* !CONFIG_EMBEDDED */
6810
6811 if (entry->alias >= VM_MEMORY_MALLOC &&
6812 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6813 vm_object_t new_object, new_shadow;
6814
6815 /*
6816 * We're about to map something over a mapping
6817 * established by malloc()...
6818 */
6819 new_object = copy_entry->object.vm_object;
6820 if (new_object != VM_OBJECT_NULL) {
6821 vm_object_lock_shared(new_object);
6822 }
6823 while (new_object != VM_OBJECT_NULL &&
6824 #if !CONFIG_EMBEDDED
6825 !new_object->true_share &&
6826 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6827 #endif /* !CONFIG_EMBEDDED */
6828 new_object->internal) {
6829 new_shadow = new_object->shadow;
6830 if (new_shadow == VM_OBJECT_NULL) {
6831 break;
6832 }
6833 vm_object_lock_shared(new_shadow);
6834 vm_object_unlock(new_object);
6835 new_object = new_shadow;
6836 }
6837 if (new_object != VM_OBJECT_NULL) {
6838 if (!new_object->internal) {
6839 /*
6840 * The new mapping is backed
6841 * by an external object. We
6842 * don't want malloc'ed memory
6843 * to be replaced with such a
6844 * non-anonymous mapping, so
6845 * let's go off the optimized
6846 * path...
6847 */
6848 vm_map_copy_overwrite_aligned_src_not_internal++;
6849 vm_object_unlock(new_object);
6850 goto slow_copy;
6851 }
6852 #if !CONFIG_EMBEDDED
6853 if (new_object->true_share ||
6854 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6855 /*
6856 * Same if there's a "true_share"
6857 * object in the shadow chain, or
6858 * an object with a non-default
6859 * (SYMMETRIC) copy strategy.
6860 */
6861 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6862 vm_object_unlock(new_object);
6863 goto slow_copy;
6864 }
6865 #endif /* !CONFIG_EMBEDDED */
6866 vm_object_unlock(new_object);
6867 }
6868 /*
6869 * The new mapping is still backed by
6870 * anonymous (internal) memory, so it's
6871 * OK to substitute it for the original
6872 * malloc() mapping.
6873 */
6874 }
6875
6876 if (old_object != VM_OBJECT_NULL) {
6877 if(entry->is_sub_map) {
6878 if(entry->use_pmap) {
6879 #ifndef NO_NESTED_PMAP
6880 pmap_unnest(dst_map->pmap,
6881 (addr64_t)entry->vme_start,
6882 entry->vme_end - entry->vme_start);
6883 #endif /* NO_NESTED_PMAP */
6884 if(dst_map->mapped_in_other_pmaps) {
6885 /* clean up parent */
6886 /* map/maps */
6887 vm_map_submap_pmap_clean(
6888 dst_map, entry->vme_start,
6889 entry->vme_end,
6890 entry->object.sub_map,
6891 entry->offset);
6892 }
6893 } else {
6894 vm_map_submap_pmap_clean(
6895 dst_map, entry->vme_start,
6896 entry->vme_end,
6897 entry->object.sub_map,
6898 entry->offset);
6899 }
6900 vm_map_deallocate(
6901 entry->object.sub_map);
6902 } else {
6903 if(dst_map->mapped_in_other_pmaps) {
6904 vm_object_pmap_protect(
6905 entry->object.vm_object,
6906 entry->offset,
6907 entry->vme_end
6908 - entry->vme_start,
6909 PMAP_NULL,
6910 entry->vme_start,
6911 VM_PROT_NONE);
6912 } else {
6913 pmap_remove(dst_map->pmap,
6914 (addr64_t)(entry->vme_start),
6915 (addr64_t)(entry->vme_end));
6916 }
6917 vm_object_deallocate(old_object);
6918 }
6919 }
6920
6921 entry->is_sub_map = FALSE;
6922 entry->object = copy_entry->object;
6923 object = entry->object.vm_object;
6924 entry->needs_copy = copy_entry->needs_copy;
6925 entry->wired_count = 0;
6926 entry->user_wired_count = 0;
6927 offset = entry->offset = copy_entry->offset;
6928
6929 vm_map_copy_entry_unlink(copy, copy_entry);
6930 vm_map_copy_entry_dispose(copy, copy_entry);
6931
6932 /*
6933 * we could try to push pages into the pmap at this point, BUT
6934 * this optimization only saved on average 2 us per page if ALL
6935 * the pages in the source were currently mapped
6936 * and ALL the pages in the dest were touched, if there were fewer
6937 * than 2/3 of the pages touched, this optimization actually cost more cycles
6938 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6939 */
6940
6941 /*
6942 * Set up for the next iteration. The map
6943 * has not been unlocked, so the next
6944 * address should be at the end of this
6945 * entry, and the next map entry should be
6946 * the one following it.
6947 */
6948
6949 start = tmp_entry->vme_end;
6950 tmp_entry = tmp_entry->vme_next;
6951 } else {
6952 vm_map_version_t version;
6953 vm_object_t dst_object;
6954 vm_object_offset_t dst_offset;
6955 kern_return_t r;
6956
6957 slow_copy:
6958 if (entry->needs_copy) {
6959 vm_object_shadow(&entry->object.vm_object,
6960 &entry->offset,
6961 (entry->vme_end -
6962 entry->vme_start));
6963 entry->needs_copy = FALSE;
6964 }
6965
6966 dst_object = entry->object.vm_object;
6967 dst_offset = entry->offset;
6968
6969 /*
6970 * Take an object reference, and record
6971 * the map version information so that the
6972 * map can be safely unlocked.
6973 */
6974
6975 if (dst_object == VM_OBJECT_NULL) {
6976 /*
6977 * We would usually have just taken the
6978 * optimized path above if the destination
6979 * object has not been allocated yet. But we
6980 * now disable that optimization if the copy
6981 * entry's object is not backed by anonymous
6982 * memory to avoid replacing malloc'ed
6983 * (i.e. re-usable) anonymous memory with a
6984 * not-so-anonymous mapping.
6985 * So we have to handle this case here and
6986 * allocate a new VM object for this map entry.
6987 */
6988 dst_object = vm_object_allocate(
6989 entry->vme_end - entry->vme_start);
6990 dst_offset = 0;
6991 entry->object.vm_object = dst_object;
6992 entry->offset = dst_offset;
6993
6994 }
6995
6996 vm_object_reference(dst_object);
6997
6998 /* account for unlock bumping up timestamp */
6999 version.main_timestamp = dst_map->timestamp + 1;
7000
7001 vm_map_unlock(dst_map);
7002
7003 /*
7004 * Copy as much as possible in one pass
7005 */
7006
7007 copy_size = size;
7008 r = vm_fault_copy(
7009 copy_entry->object.vm_object,
7010 copy_entry->offset,
7011 &copy_size,
7012 dst_object,
7013 dst_offset,
7014 dst_map,
7015 &version,
7016 THREAD_UNINT );
7017
7018 /*
7019 * Release the object reference
7020 */
7021
7022 vm_object_deallocate(dst_object);
7023
7024 /*
7025 * If a hard error occurred, return it now
7026 */
7027
7028 if (r != KERN_SUCCESS)
7029 return(r);
7030
7031 if (copy_size != 0) {
7032 /*
7033 * Dispose of the copied region
7034 */
7035
7036 vm_map_copy_clip_end(copy, copy_entry,
7037 copy_entry->vme_start + copy_size);
7038 vm_map_copy_entry_unlink(copy, copy_entry);
7039 vm_object_deallocate(copy_entry->object.vm_object);
7040 vm_map_copy_entry_dispose(copy, copy_entry);
7041 }
7042
7043 /*
7044 * Pick up in the destination map where we left off.
7045 *
7046 * Use the version information to avoid a lookup
7047 * in the normal case.
7048 */
7049
7050 start += copy_size;
7051 vm_map_lock(dst_map);
7052 if (version.main_timestamp == dst_map->timestamp &&
7053 copy_size != 0) {
7054 /* We can safely use saved tmp_entry value */
7055
7056 vm_map_clip_end(dst_map, tmp_entry, start);
7057 tmp_entry = tmp_entry->vme_next;
7058 } else {
7059 /* Must do lookup of tmp_entry */
7060
7061 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7062 vm_map_unlock(dst_map);
7063 return(KERN_INVALID_ADDRESS);
7064 }
7065 vm_map_clip_start(dst_map, tmp_entry, start);
7066 }
7067 }
7068 }/* while */
7069
7070 return(KERN_SUCCESS);
7071 }/* vm_map_copy_overwrite_aligned */
7072
7073 /*
7074 * Routine: vm_map_copyin_kernel_buffer [internal use only]
7075 *
7076 * Description:
7077 * Copy in data to a kernel buffer from space in the
7078 * source map. The original space may be optionally
7079 * deallocated.
7080 *
7081 * If successful, returns a new copy object.
7082 */
7083 static kern_return_t
7084 vm_map_copyin_kernel_buffer(
7085 vm_map_t src_map,
7086 vm_map_offset_t src_addr,
7087 vm_map_size_t len,
7088 boolean_t src_destroy,
7089 vm_map_copy_t *copy_result)
7090 {
7091 kern_return_t kr;
7092 vm_map_copy_t copy;
7093 vm_size_t kalloc_size;
7094
7095 if ((vm_size_t) len != len) {
7096 /* "len" is too big and doesn't fit in a "vm_size_t" */
7097 return KERN_RESOURCE_SHORTAGE;
7098 }
7099 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7100 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7101
7102 copy = (vm_map_copy_t) kalloc(kalloc_size);
7103 if (copy == VM_MAP_COPY_NULL) {
7104 return KERN_RESOURCE_SHORTAGE;
7105 }
7106 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7107 copy->size = len;
7108 copy->offset = 0;
7109 copy->cpy_kdata = (void *) (copy + 1);
7110 copy->cpy_kalloc_size = kalloc_size;
7111
7112 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7113 if (kr != KERN_SUCCESS) {
7114 kfree(copy, kalloc_size);
7115 return kr;
7116 }
7117 if (src_destroy) {
7118 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
7119 vm_map_round_page(src_addr + len),
7120 VM_MAP_REMOVE_INTERRUPTIBLE |
7121 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7122 (src_map == kernel_map) ?
7123 VM_MAP_REMOVE_KUNWIRE : 0);
7124 }
7125 *copy_result = copy;
7126 return KERN_SUCCESS;
7127 }
7128
7129 /*
7130 * Routine: vm_map_copyout_kernel_buffer [internal use only]
7131 *
7132 * Description:
7133 * Copy out data from a kernel buffer into space in the
7134 * destination map. The space may be otpionally dynamically
7135 * allocated.
7136 *
7137 * If successful, consumes the copy object.
7138 * Otherwise, the caller is responsible for it.
7139 */
7140 static int vm_map_copyout_kernel_buffer_failures = 0;
7141 static kern_return_t
7142 vm_map_copyout_kernel_buffer(
7143 vm_map_t map,
7144 vm_map_address_t *addr, /* IN/OUT */
7145 vm_map_copy_t copy,
7146 boolean_t overwrite)
7147 {
7148 kern_return_t kr = KERN_SUCCESS;
7149 thread_t thread = current_thread();
7150
7151 if (!overwrite) {
7152
7153 /*
7154 * Allocate space in the target map for the data
7155 */
7156 *addr = 0;
7157 kr = vm_map_enter(map,
7158 addr,
7159 vm_map_round_page(copy->size),
7160 (vm_map_offset_t) 0,
7161 VM_FLAGS_ANYWHERE,
7162 VM_OBJECT_NULL,
7163 (vm_object_offset_t) 0,
7164 FALSE,
7165 VM_PROT_DEFAULT,
7166 VM_PROT_ALL,
7167 VM_INHERIT_DEFAULT);
7168 if (kr != KERN_SUCCESS)
7169 return kr;
7170 }
7171
7172 /*
7173 * Copyout the data from the kernel buffer to the target map.
7174 */
7175 if (thread->map == map) {
7176
7177 /*
7178 * If the target map is the current map, just do
7179 * the copy.
7180 */
7181 assert((vm_size_t) copy->size == copy->size);
7182 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7183 kr = KERN_INVALID_ADDRESS;
7184 }
7185 }
7186 else {
7187 vm_map_t oldmap;
7188
7189 /*
7190 * If the target map is another map, assume the
7191 * target's address space identity for the duration
7192 * of the copy.
7193 */
7194 vm_map_reference(map);
7195 oldmap = vm_map_switch(map);
7196
7197 assert((vm_size_t) copy->size == copy->size);
7198 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7199 vm_map_copyout_kernel_buffer_failures++;
7200 kr = KERN_INVALID_ADDRESS;
7201 }
7202
7203 (void) vm_map_switch(oldmap);
7204 vm_map_deallocate(map);
7205 }
7206
7207 if (kr != KERN_SUCCESS) {
7208 /* the copy failed, clean up */
7209 if (!overwrite) {
7210 /*
7211 * Deallocate the space we allocated in the target map.
7212 */
7213 (void) vm_map_remove(map,
7214 vm_map_trunc_page(*addr),
7215 vm_map_round_page(*addr +
7216 vm_map_round_page(copy->size)),
7217 VM_MAP_NO_FLAGS);
7218 *addr = 0;
7219 }
7220 } else {
7221 /* copy was successful, dicard the copy structure */
7222 kfree(copy, copy->cpy_kalloc_size);
7223 }
7224
7225 return kr;
7226 }
7227
7228 /*
7229 * Macro: vm_map_copy_insert
7230 *
7231 * Description:
7232 * Link a copy chain ("copy") into a map at the
7233 * specified location (after "where").
7234 * Side effects:
7235 * The copy chain is destroyed.
7236 * Warning:
7237 * The arguments are evaluated multiple times.
7238 */
7239 #define vm_map_copy_insert(map, where, copy) \
7240 MACRO_BEGIN \
7241 vm_map_store_copy_insert(map, where, copy); \
7242 zfree(vm_map_copy_zone, copy); \
7243 MACRO_END
7244
7245 /*
7246 * Routine: vm_map_copyout
7247 *
7248 * Description:
7249 * Copy out a copy chain ("copy") into newly-allocated
7250 * space in the destination map.
7251 *
7252 * If successful, consumes the copy object.
7253 * Otherwise, the caller is responsible for it.
7254 */
7255 kern_return_t
7256 vm_map_copyout(
7257 vm_map_t dst_map,
7258 vm_map_address_t *dst_addr, /* OUT */
7259 vm_map_copy_t copy)
7260 {
7261 vm_map_size_t size;
7262 vm_map_size_t adjustment;
7263 vm_map_offset_t start;
7264 vm_object_offset_t vm_copy_start;
7265 vm_map_entry_t last;
7266 register
7267 vm_map_entry_t entry;
7268
7269 /*
7270 * Check for null copy object.
7271 */
7272
7273 if (copy == VM_MAP_COPY_NULL) {
7274 *dst_addr = 0;
7275 return(KERN_SUCCESS);
7276 }
7277
7278 /*
7279 * Check for special copy object, created
7280 * by vm_map_copyin_object.
7281 */
7282
7283 if (copy->type == VM_MAP_COPY_OBJECT) {
7284 vm_object_t object = copy->cpy_object;
7285 kern_return_t kr;
7286 vm_object_offset_t offset;
7287
7288 offset = vm_object_trunc_page(copy->offset);
7289 size = vm_map_round_page(copy->size +
7290 (vm_map_size_t)(copy->offset - offset));
7291 *dst_addr = 0;
7292 kr = vm_map_enter(dst_map, dst_addr, size,
7293 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7294 object, offset, FALSE,
7295 VM_PROT_DEFAULT, VM_PROT_ALL,
7296 VM_INHERIT_DEFAULT);
7297 if (kr != KERN_SUCCESS)
7298 return(kr);
7299 /* Account for non-pagealigned copy object */
7300 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7301 zfree(vm_map_copy_zone, copy);
7302 return(KERN_SUCCESS);
7303 }
7304
7305 /*
7306 * Check for special kernel buffer allocated
7307 * by new_ipc_kmsg_copyin.
7308 */
7309
7310 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7311 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7312 copy, FALSE));
7313 }
7314
7315 /*
7316 * Find space for the data
7317 */
7318
7319 vm_copy_start = vm_object_trunc_page(copy->offset);
7320 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7321 - vm_copy_start;
7322
7323 StartAgain: ;
7324
7325 vm_map_lock(dst_map);
7326 if( dst_map->disable_vmentry_reuse == TRUE) {
7327 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7328 last = entry;
7329 } else {
7330 assert(first_free_is_valid(dst_map));
7331 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7332 vm_map_min(dst_map) : last->vme_end;
7333 }
7334
7335 while (TRUE) {
7336 vm_map_entry_t next = last->vme_next;
7337 vm_map_offset_t end = start + size;
7338
7339 if ((end > dst_map->max_offset) || (end < start)) {
7340 if (dst_map->wait_for_space) {
7341 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7342 assert_wait((event_t) dst_map,
7343 THREAD_INTERRUPTIBLE);
7344 vm_map_unlock(dst_map);
7345 thread_block(THREAD_CONTINUE_NULL);
7346 goto StartAgain;
7347 }
7348 }
7349 vm_map_unlock(dst_map);
7350 return(KERN_NO_SPACE);
7351 }
7352
7353 if ((next == vm_map_to_entry(dst_map)) ||
7354 (next->vme_start >= end))
7355 break;
7356
7357 last = next;
7358 start = last->vme_end;
7359 }
7360
7361 /*
7362 * Since we're going to just drop the map
7363 * entries from the copy into the destination
7364 * map, they must come from the same pool.
7365 */
7366
7367 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7368 /*
7369 * Mismatches occur when dealing with the default
7370 * pager.
7371 */
7372 zone_t old_zone;
7373 vm_map_entry_t next, new;
7374
7375 /*
7376 * Find the zone that the copies were allocated from
7377 */
7378
7379 entry = vm_map_copy_first_entry(copy);
7380
7381 /*
7382 * Reinitialize the copy so that vm_map_copy_entry_link
7383 * will work.
7384 */
7385 vm_map_store_copy_reset(copy, entry);
7386 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7387
7388 /*
7389 * Copy each entry.
7390 */
7391 while (entry != vm_map_copy_to_entry(copy)) {
7392 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7393 vm_map_entry_copy_full(new, entry);
7394 new->use_pmap = FALSE; /* clr address space specifics */
7395 vm_map_copy_entry_link(copy,
7396 vm_map_copy_last_entry(copy),
7397 new);
7398 next = entry->vme_next;
7399 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7400 zfree(old_zone, entry);
7401 entry = next;
7402 }
7403 }
7404
7405 /*
7406 * Adjust the addresses in the copy chain, and
7407 * reset the region attributes.
7408 */
7409
7410 adjustment = start - vm_copy_start;
7411 for (entry = vm_map_copy_first_entry(copy);
7412 entry != vm_map_copy_to_entry(copy);
7413 entry = entry->vme_next) {
7414 entry->vme_start += adjustment;
7415 entry->vme_end += adjustment;
7416
7417 entry->inheritance = VM_INHERIT_DEFAULT;
7418 entry->protection = VM_PROT_DEFAULT;
7419 entry->max_protection = VM_PROT_ALL;
7420 entry->behavior = VM_BEHAVIOR_DEFAULT;
7421
7422 /*
7423 * If the entry is now wired,
7424 * map the pages into the destination map.
7425 */
7426 if (entry->wired_count != 0) {
7427 register vm_map_offset_t va;
7428 vm_object_offset_t offset;
7429 register vm_object_t object;
7430 vm_prot_t prot;
7431 int type_of_fault;
7432
7433 object = entry->object.vm_object;
7434 offset = entry->offset;
7435 va = entry->vme_start;
7436
7437 pmap_pageable(dst_map->pmap,
7438 entry->vme_start,
7439 entry->vme_end,
7440 TRUE);
7441
7442 while (va < entry->vme_end) {
7443 register vm_page_t m;
7444
7445 /*
7446 * Look up the page in the object.
7447 * Assert that the page will be found in the
7448 * top object:
7449 * either
7450 * the object was newly created by
7451 * vm_object_copy_slowly, and has
7452 * copies of all of the pages from
7453 * the source object
7454 * or
7455 * the object was moved from the old
7456 * map entry; because the old map
7457 * entry was wired, all of the pages
7458 * were in the top-level object.
7459 * (XXX not true if we wire pages for
7460 * reading)
7461 */
7462 vm_object_lock(object);
7463
7464 m = vm_page_lookup(object, offset);
7465 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7466 m->absent)
7467 panic("vm_map_copyout: wiring %p", m);
7468
7469 /*
7470 * ENCRYPTED SWAP:
7471 * The page is assumed to be wired here, so it
7472 * shouldn't be encrypted. Otherwise, we
7473 * couldn't enter it in the page table, since
7474 * we don't want the user to see the encrypted
7475 * data.
7476 */
7477 ASSERT_PAGE_DECRYPTED(m);
7478
7479 prot = entry->protection;
7480
7481 if (override_nx(dst_map, entry->alias) && prot)
7482 prot |= VM_PROT_EXECUTE;
7483
7484 type_of_fault = DBG_CACHE_HIT_FAULT;
7485
7486 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7487 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
7488 &type_of_fault);
7489
7490 vm_object_unlock(object);
7491
7492 offset += PAGE_SIZE_64;
7493 va += PAGE_SIZE;
7494 }
7495 }
7496 }
7497
7498 /*
7499 * Correct the page alignment for the result
7500 */
7501
7502 *dst_addr = start + (copy->offset - vm_copy_start);
7503
7504 /*
7505 * Update the hints and the map size
7506 */
7507
7508 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7509
7510 dst_map->size += size;
7511
7512 /*
7513 * Link in the copy
7514 */
7515
7516 vm_map_copy_insert(dst_map, last, copy);
7517
7518 vm_map_unlock(dst_map);
7519
7520 /*
7521 * XXX If wiring_required, call vm_map_pageable
7522 */
7523
7524 return(KERN_SUCCESS);
7525 }
7526
7527 /*
7528 * Routine: vm_map_copyin
7529 *
7530 * Description:
7531 * see vm_map_copyin_common. Exported via Unsupported.exports.
7532 *
7533 */
7534
7535 #undef vm_map_copyin
7536
7537 kern_return_t
7538 vm_map_copyin(
7539 vm_map_t src_map,
7540 vm_map_address_t src_addr,
7541 vm_map_size_t len,
7542 boolean_t src_destroy,
7543 vm_map_copy_t *copy_result) /* OUT */
7544 {
7545 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7546 FALSE, copy_result, FALSE));
7547 }
7548
7549 /*
7550 * Routine: vm_map_copyin_common
7551 *
7552 * Description:
7553 * Copy the specified region (src_addr, len) from the
7554 * source address space (src_map), possibly removing
7555 * the region from the source address space (src_destroy).
7556 *
7557 * Returns:
7558 * A vm_map_copy_t object (copy_result), suitable for
7559 * insertion into another address space (using vm_map_copyout),
7560 * copying over another address space region (using
7561 * vm_map_copy_overwrite). If the copy is unused, it
7562 * should be destroyed (using vm_map_copy_discard).
7563 *
7564 * In/out conditions:
7565 * The source map should not be locked on entry.
7566 */
7567
7568 typedef struct submap_map {
7569 vm_map_t parent_map;
7570 vm_map_offset_t base_start;
7571 vm_map_offset_t base_end;
7572 vm_map_size_t base_len;
7573 struct submap_map *next;
7574 } submap_map_t;
7575
7576 kern_return_t
7577 vm_map_copyin_common(
7578 vm_map_t src_map,
7579 vm_map_address_t src_addr,
7580 vm_map_size_t len,
7581 boolean_t src_destroy,
7582 __unused boolean_t src_volatile,
7583 vm_map_copy_t *copy_result, /* OUT */
7584 boolean_t use_maxprot)
7585 {
7586 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7587 * in multi-level lookup, this
7588 * entry contains the actual
7589 * vm_object/offset.
7590 */
7591 register
7592 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7593
7594 vm_map_offset_t src_start; /* Start of current entry --
7595 * where copy is taking place now
7596 */
7597 vm_map_offset_t src_end; /* End of entire region to be
7598 * copied */
7599 vm_map_offset_t src_base;
7600 vm_map_t base_map = src_map;
7601 boolean_t map_share=FALSE;
7602 submap_map_t *parent_maps = NULL;
7603
7604 register
7605 vm_map_copy_t copy; /* Resulting copy */
7606 vm_map_address_t copy_addr;
7607
7608 /*
7609 * Check for copies of zero bytes.
7610 */
7611
7612 if (len == 0) {
7613 *copy_result = VM_MAP_COPY_NULL;
7614 return(KERN_SUCCESS);
7615 }
7616
7617 /*
7618 * Check that the end address doesn't overflow
7619 */
7620 src_end = src_addr + len;
7621 if (src_end < src_addr)
7622 return KERN_INVALID_ADDRESS;
7623
7624 /*
7625 * If the copy is sufficiently small, use a kernel buffer instead
7626 * of making a virtual copy. The theory being that the cost of
7627 * setting up VM (and taking C-O-W faults) dominates the copy costs
7628 * for small regions.
7629 */
7630 if ((len < msg_ool_size_small) && !use_maxprot)
7631 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7632 src_destroy, copy_result);
7633
7634 /*
7635 * Compute (page aligned) start and end of region
7636 */
7637 src_start = vm_map_trunc_page(src_addr);
7638 src_end = vm_map_round_page(src_end);
7639
7640 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7641
7642 /*
7643 * Allocate a header element for the list.
7644 *
7645 * Use the start and end in the header to
7646 * remember the endpoints prior to rounding.
7647 */
7648
7649 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7650 vm_map_copy_first_entry(copy) =
7651 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7652 copy->type = VM_MAP_COPY_ENTRY_LIST;
7653 copy->cpy_hdr.nentries = 0;
7654 copy->cpy_hdr.entries_pageable = TRUE;
7655
7656 vm_map_store_init( &(copy->cpy_hdr) );
7657
7658 copy->offset = src_addr;
7659 copy->size = len;
7660
7661 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7662
7663 #define RETURN(x) \
7664 MACRO_BEGIN \
7665 vm_map_unlock(src_map); \
7666 if(src_map != base_map) \
7667 vm_map_deallocate(src_map); \
7668 if (new_entry != VM_MAP_ENTRY_NULL) \
7669 vm_map_copy_entry_dispose(copy,new_entry); \
7670 vm_map_copy_discard(copy); \
7671 { \
7672 submap_map_t *_ptr; \
7673 \
7674 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7675 parent_maps=parent_maps->next; \
7676 if (_ptr->parent_map != base_map) \
7677 vm_map_deallocate(_ptr->parent_map); \
7678 kfree(_ptr, sizeof(submap_map_t)); \
7679 } \
7680 } \
7681 MACRO_RETURN(x); \
7682 MACRO_END
7683
7684 /*
7685 * Find the beginning of the region.
7686 */
7687
7688 vm_map_lock(src_map);
7689
7690 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7691 RETURN(KERN_INVALID_ADDRESS);
7692 if(!tmp_entry->is_sub_map) {
7693 vm_map_clip_start(src_map, tmp_entry, src_start);
7694 }
7695 /* set for later submap fix-up */
7696 copy_addr = src_start;
7697
7698 /*
7699 * Go through entries until we get to the end.
7700 */
7701
7702 while (TRUE) {
7703 register
7704 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7705 vm_map_size_t src_size; /* Size of source
7706 * map entry (in both
7707 * maps)
7708 */
7709
7710 register
7711 vm_object_t src_object; /* Object to copy */
7712 vm_object_offset_t src_offset;
7713
7714 boolean_t src_needs_copy; /* Should source map
7715 * be made read-only
7716 * for copy-on-write?
7717 */
7718
7719 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7720
7721 boolean_t was_wired; /* Was source wired? */
7722 vm_map_version_t version; /* Version before locks
7723 * dropped to make copy
7724 */
7725 kern_return_t result; /* Return value from
7726 * copy_strategically.
7727 */
7728 while(tmp_entry->is_sub_map) {
7729 vm_map_size_t submap_len;
7730 submap_map_t *ptr;
7731
7732 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7733 ptr->next = parent_maps;
7734 parent_maps = ptr;
7735 ptr->parent_map = src_map;
7736 ptr->base_start = src_start;
7737 ptr->base_end = src_end;
7738 submap_len = tmp_entry->vme_end - src_start;
7739 if(submap_len > (src_end-src_start))
7740 submap_len = src_end-src_start;
7741 ptr->base_len = submap_len;
7742
7743 src_start -= tmp_entry->vme_start;
7744 src_start += tmp_entry->offset;
7745 src_end = src_start + submap_len;
7746 src_map = tmp_entry->object.sub_map;
7747 vm_map_lock(src_map);
7748 /* keep an outstanding reference for all maps in */
7749 /* the parents tree except the base map */
7750 vm_map_reference(src_map);
7751 vm_map_unlock(ptr->parent_map);
7752 if (!vm_map_lookup_entry(
7753 src_map, src_start, &tmp_entry))
7754 RETURN(KERN_INVALID_ADDRESS);
7755 map_share = TRUE;
7756 if(!tmp_entry->is_sub_map)
7757 vm_map_clip_start(src_map, tmp_entry, src_start);
7758 src_entry = tmp_entry;
7759 }
7760 /* we are now in the lowest level submap... */
7761
7762 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7763 (tmp_entry->object.vm_object->phys_contiguous)) {
7764 /* This is not, supported for now.In future */
7765 /* we will need to detect the phys_contig */
7766 /* condition and then upgrade copy_slowly */
7767 /* to do physical copy from the device mem */
7768 /* based object. We can piggy-back off of */
7769 /* the was wired boolean to set-up the */
7770 /* proper handling */
7771 RETURN(KERN_PROTECTION_FAILURE);
7772 }
7773 /*
7774 * Create a new address map entry to hold the result.
7775 * Fill in the fields from the appropriate source entries.
7776 * We must unlock the source map to do this if we need
7777 * to allocate a map entry.
7778 */
7779 if (new_entry == VM_MAP_ENTRY_NULL) {
7780 version.main_timestamp = src_map->timestamp;
7781 vm_map_unlock(src_map);
7782
7783 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7784
7785 vm_map_lock(src_map);
7786 if ((version.main_timestamp + 1) != src_map->timestamp) {
7787 if (!vm_map_lookup_entry(src_map, src_start,
7788 &tmp_entry)) {
7789 RETURN(KERN_INVALID_ADDRESS);
7790 }
7791 if (!tmp_entry->is_sub_map)
7792 vm_map_clip_start(src_map, tmp_entry, src_start);
7793 continue; /* restart w/ new tmp_entry */
7794 }
7795 }
7796
7797 /*
7798 * Verify that the region can be read.
7799 */
7800 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7801 !use_maxprot) ||
7802 (src_entry->max_protection & VM_PROT_READ) == 0)
7803 RETURN(KERN_PROTECTION_FAILURE);
7804
7805 /*
7806 * Clip against the endpoints of the entire region.
7807 */
7808
7809 vm_map_clip_end(src_map, src_entry, src_end);
7810
7811 src_size = src_entry->vme_end - src_start;
7812 src_object = src_entry->object.vm_object;
7813 src_offset = src_entry->offset;
7814 was_wired = (src_entry->wired_count != 0);
7815
7816 vm_map_entry_copy(new_entry, src_entry);
7817 new_entry->use_pmap = FALSE; /* clr address space specifics */
7818
7819 /*
7820 * Attempt non-blocking copy-on-write optimizations.
7821 */
7822
7823 if (src_destroy &&
7824 (src_object == VM_OBJECT_NULL ||
7825 (src_object->internal && !src_object->true_share
7826 && !map_share))) {
7827 /*
7828 * If we are destroying the source, and the object
7829 * is internal, we can move the object reference
7830 * from the source to the copy. The copy is
7831 * copy-on-write only if the source is.
7832 * We make another reference to the object, because
7833 * destroying the source entry will deallocate it.
7834 */
7835 vm_object_reference(src_object);
7836
7837 /*
7838 * Copy is always unwired. vm_map_copy_entry
7839 * set its wired count to zero.
7840 */
7841
7842 goto CopySuccessful;
7843 }
7844
7845
7846 RestartCopy:
7847 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7848 src_object, new_entry, new_entry->object.vm_object,
7849 was_wired, 0);
7850 if ((src_object == VM_OBJECT_NULL ||
7851 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7852 vm_object_copy_quickly(
7853 &new_entry->object.vm_object,
7854 src_offset,
7855 src_size,
7856 &src_needs_copy,
7857 &new_entry_needs_copy)) {
7858
7859 new_entry->needs_copy = new_entry_needs_copy;
7860
7861 /*
7862 * Handle copy-on-write obligations
7863 */
7864
7865 if (src_needs_copy && !tmp_entry->needs_copy) {
7866 vm_prot_t prot;
7867
7868 prot = src_entry->protection & ~VM_PROT_WRITE;
7869
7870 if (override_nx(src_map, src_entry->alias) && prot)
7871 prot |= VM_PROT_EXECUTE;
7872
7873 vm_object_pmap_protect(
7874 src_object,
7875 src_offset,
7876 src_size,
7877 (src_entry->is_shared ?
7878 PMAP_NULL
7879 : src_map->pmap),
7880 src_entry->vme_start,
7881 prot);
7882
7883 tmp_entry->needs_copy = TRUE;
7884 }
7885
7886 /*
7887 * The map has never been unlocked, so it's safe
7888 * to move to the next entry rather than doing
7889 * another lookup.
7890 */
7891
7892 goto CopySuccessful;
7893 }
7894
7895 /*
7896 * Take an object reference, so that we may
7897 * release the map lock(s).
7898 */
7899
7900 assert(src_object != VM_OBJECT_NULL);
7901 vm_object_reference(src_object);
7902
7903 /*
7904 * Record the timestamp for later verification.
7905 * Unlock the map.
7906 */
7907
7908 version.main_timestamp = src_map->timestamp;
7909 vm_map_unlock(src_map); /* Increments timestamp once! */
7910
7911 /*
7912 * Perform the copy
7913 */
7914
7915 if (was_wired) {
7916 CopySlowly:
7917 vm_object_lock(src_object);
7918 result = vm_object_copy_slowly(
7919 src_object,
7920 src_offset,
7921 src_size,
7922 THREAD_UNINT,
7923 &new_entry->object.vm_object);
7924 new_entry->offset = 0;
7925 new_entry->needs_copy = FALSE;
7926
7927 }
7928 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7929 (tmp_entry->is_shared || map_share)) {
7930 vm_object_t new_object;
7931
7932 vm_object_lock_shared(src_object);
7933 new_object = vm_object_copy_delayed(
7934 src_object,
7935 src_offset,
7936 src_size,
7937 TRUE);
7938 if (new_object == VM_OBJECT_NULL)
7939 goto CopySlowly;
7940
7941 new_entry->object.vm_object = new_object;
7942 new_entry->needs_copy = TRUE;
7943 result = KERN_SUCCESS;
7944
7945 } else {
7946 result = vm_object_copy_strategically(src_object,
7947 src_offset,
7948 src_size,
7949 &new_entry->object.vm_object,
7950 &new_entry->offset,
7951 &new_entry_needs_copy);
7952
7953 new_entry->needs_copy = new_entry_needs_copy;
7954 }
7955
7956 if (result != KERN_SUCCESS &&
7957 result != KERN_MEMORY_RESTART_COPY) {
7958 vm_map_lock(src_map);
7959 RETURN(result);
7960 }
7961
7962 /*
7963 * Throw away the extra reference
7964 */
7965
7966 vm_object_deallocate(src_object);
7967
7968 /*
7969 * Verify that the map has not substantially
7970 * changed while the copy was being made.
7971 */
7972
7973 vm_map_lock(src_map);
7974
7975 if ((version.main_timestamp + 1) == src_map->timestamp)
7976 goto VerificationSuccessful;
7977
7978 /*
7979 * Simple version comparison failed.
7980 *
7981 * Retry the lookup and verify that the
7982 * same object/offset are still present.
7983 *
7984 * [Note: a memory manager that colludes with
7985 * the calling task can detect that we have
7986 * cheated. While the map was unlocked, the
7987 * mapping could have been changed and restored.]
7988 */
7989
7990 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7991 RETURN(KERN_INVALID_ADDRESS);
7992 }
7993
7994 src_entry = tmp_entry;
7995 vm_map_clip_start(src_map, src_entry, src_start);
7996
7997 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7998 !use_maxprot) ||
7999 ((src_entry->max_protection & VM_PROT_READ) == 0))
8000 goto VerificationFailed;
8001
8002 if (src_entry->vme_end < new_entry->vme_end)
8003 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
8004
8005 if ((src_entry->object.vm_object != src_object) ||
8006 (src_entry->offset != src_offset) ) {
8007
8008 /*
8009 * Verification failed.
8010 *
8011 * Start over with this top-level entry.
8012 */
8013
8014 VerificationFailed: ;
8015
8016 vm_object_deallocate(new_entry->object.vm_object);
8017 tmp_entry = src_entry;
8018 continue;
8019 }
8020
8021 /*
8022 * Verification succeeded.
8023 */
8024
8025 VerificationSuccessful: ;
8026
8027 if (result == KERN_MEMORY_RESTART_COPY)
8028 goto RestartCopy;
8029
8030 /*
8031 * Copy succeeded.
8032 */
8033
8034 CopySuccessful: ;
8035
8036 /*
8037 * Link in the new copy entry.
8038 */
8039
8040 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8041 new_entry);
8042
8043 /*
8044 * Determine whether the entire region
8045 * has been copied.
8046 */
8047 src_base = src_start;
8048 src_start = new_entry->vme_end;
8049 new_entry = VM_MAP_ENTRY_NULL;
8050 while ((src_start >= src_end) && (src_end != 0)) {
8051 if (src_map != base_map) {
8052 submap_map_t *ptr;
8053
8054 ptr = parent_maps;
8055 assert(ptr != NULL);
8056 parent_maps = parent_maps->next;
8057
8058 /* fix up the damage we did in that submap */
8059 vm_map_simplify_range(src_map,
8060 src_base,
8061 src_end);
8062
8063 vm_map_unlock(src_map);
8064 vm_map_deallocate(src_map);
8065 vm_map_lock(ptr->parent_map);
8066 src_map = ptr->parent_map;
8067 src_base = ptr->base_start;
8068 src_start = ptr->base_start + ptr->base_len;
8069 src_end = ptr->base_end;
8070 if ((src_end > src_start) &&
8071 !vm_map_lookup_entry(
8072 src_map, src_start, &tmp_entry))
8073 RETURN(KERN_INVALID_ADDRESS);
8074 kfree(ptr, sizeof(submap_map_t));
8075 if(parent_maps == NULL)
8076 map_share = FALSE;
8077 src_entry = tmp_entry->vme_prev;
8078 } else
8079 break;
8080 }
8081 if ((src_start >= src_end) && (src_end != 0))
8082 break;
8083
8084 /*
8085 * Verify that there are no gaps in the region
8086 */
8087
8088 tmp_entry = src_entry->vme_next;
8089 if ((tmp_entry->vme_start != src_start) ||
8090 (tmp_entry == vm_map_to_entry(src_map)))
8091 RETURN(KERN_INVALID_ADDRESS);
8092 }
8093
8094 /*
8095 * If the source should be destroyed, do it now, since the
8096 * copy was successful.
8097 */
8098 if (src_destroy) {
8099 (void) vm_map_delete(src_map,
8100 vm_map_trunc_page(src_addr),
8101 src_end,
8102 (src_map == kernel_map) ?
8103 VM_MAP_REMOVE_KUNWIRE :
8104 VM_MAP_NO_FLAGS,
8105 VM_MAP_NULL);
8106 } else {
8107 /* fix up the damage we did in the base map */
8108 vm_map_simplify_range(src_map,
8109 vm_map_trunc_page(src_addr),
8110 vm_map_round_page(src_end));
8111 }
8112
8113 vm_map_unlock(src_map);
8114
8115 /* Fix-up start and end points in copy. This is necessary */
8116 /* when the various entries in the copy object were picked */
8117 /* up from different sub-maps */
8118
8119 tmp_entry = vm_map_copy_first_entry(copy);
8120 while (tmp_entry != vm_map_copy_to_entry(copy)) {
8121 tmp_entry->vme_end = copy_addr +
8122 (tmp_entry->vme_end - tmp_entry->vme_start);
8123 tmp_entry->vme_start = copy_addr;
8124 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8125 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8126 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8127 }
8128
8129 *copy_result = copy;
8130 return(KERN_SUCCESS);
8131
8132 #undef RETURN
8133 }
8134
8135 /*
8136 * vm_map_copyin_object:
8137 *
8138 * Create a copy object from an object.
8139 * Our caller donates an object reference.
8140 */
8141
8142 kern_return_t
8143 vm_map_copyin_object(
8144 vm_object_t object,
8145 vm_object_offset_t offset, /* offset of region in object */
8146 vm_object_size_t size, /* size of region in object */
8147 vm_map_copy_t *copy_result) /* OUT */
8148 {
8149 vm_map_copy_t copy; /* Resulting copy */
8150
8151 /*
8152 * We drop the object into a special copy object
8153 * that contains the object directly.
8154 */
8155
8156 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8157 copy->type = VM_MAP_COPY_OBJECT;
8158 copy->cpy_object = object;
8159 copy->offset = offset;
8160 copy->size = size;
8161
8162 *copy_result = copy;
8163 return(KERN_SUCCESS);
8164 }
8165
8166 static void
8167 vm_map_fork_share(
8168 vm_map_t old_map,
8169 vm_map_entry_t old_entry,
8170 vm_map_t new_map)
8171 {
8172 vm_object_t object;
8173 vm_map_entry_t new_entry;
8174
8175 /*
8176 * New sharing code. New map entry
8177 * references original object. Internal
8178 * objects use asynchronous copy algorithm for
8179 * future copies. First make sure we have
8180 * the right object. If we need a shadow,
8181 * or someone else already has one, then
8182 * make a new shadow and share it.
8183 */
8184
8185 object = old_entry->object.vm_object;
8186 if (old_entry->is_sub_map) {
8187 assert(old_entry->wired_count == 0);
8188 #ifndef NO_NESTED_PMAP
8189 if(old_entry->use_pmap) {
8190 kern_return_t result;
8191
8192 result = pmap_nest(new_map->pmap,
8193 (old_entry->object.sub_map)->pmap,
8194 (addr64_t)old_entry->vme_start,
8195 (addr64_t)old_entry->vme_start,
8196 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8197 if(result)
8198 panic("vm_map_fork_share: pmap_nest failed!");
8199 }
8200 #endif /* NO_NESTED_PMAP */
8201 } else if (object == VM_OBJECT_NULL) {
8202 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8203 old_entry->vme_start));
8204 old_entry->offset = 0;
8205 old_entry->object.vm_object = object;
8206 assert(!old_entry->needs_copy);
8207 } else if (object->copy_strategy !=
8208 MEMORY_OBJECT_COPY_SYMMETRIC) {
8209
8210 /*
8211 * We are already using an asymmetric
8212 * copy, and therefore we already have
8213 * the right object.
8214 */
8215
8216 assert(! old_entry->needs_copy);
8217 }
8218 else if (old_entry->needs_copy || /* case 1 */
8219 object->shadowed || /* case 2 */
8220 (!object->true_share && /* case 3 */
8221 !old_entry->is_shared &&
8222 (object->vo_size >
8223 (vm_map_size_t)(old_entry->vme_end -
8224 old_entry->vme_start)))) {
8225
8226 /*
8227 * We need to create a shadow.
8228 * There are three cases here.
8229 * In the first case, we need to
8230 * complete a deferred symmetrical
8231 * copy that we participated in.
8232 * In the second and third cases,
8233 * we need to create the shadow so
8234 * that changes that we make to the
8235 * object do not interfere with
8236 * any symmetrical copies which
8237 * have occured (case 2) or which
8238 * might occur (case 3).
8239 *
8240 * The first case is when we had
8241 * deferred shadow object creation
8242 * via the entry->needs_copy mechanism.
8243 * This mechanism only works when
8244 * only one entry points to the source
8245 * object, and we are about to create
8246 * a second entry pointing to the
8247 * same object. The problem is that
8248 * there is no way of mapping from
8249 * an object to the entries pointing
8250 * to it. (Deferred shadow creation
8251 * works with one entry because occurs
8252 * at fault time, and we walk from the
8253 * entry to the object when handling
8254 * the fault.)
8255 *
8256 * The second case is when the object
8257 * to be shared has already been copied
8258 * with a symmetric copy, but we point
8259 * directly to the object without
8260 * needs_copy set in our entry. (This
8261 * can happen because different ranges
8262 * of an object can be pointed to by
8263 * different entries. In particular,
8264 * a single entry pointing to an object
8265 * can be split by a call to vm_inherit,
8266 * which, combined with task_create, can
8267 * result in the different entries
8268 * having different needs_copy values.)
8269 * The shadowed flag in the object allows
8270 * us to detect this case. The problem
8271 * with this case is that if this object
8272 * has or will have shadows, then we
8273 * must not perform an asymmetric copy
8274 * of this object, since such a copy
8275 * allows the object to be changed, which
8276 * will break the previous symmetrical
8277 * copies (which rely upon the object
8278 * not changing). In a sense, the shadowed
8279 * flag says "don't change this object".
8280 * We fix this by creating a shadow
8281 * object for this object, and sharing
8282 * that. This works because we are free
8283 * to change the shadow object (and thus
8284 * to use an asymmetric copy strategy);
8285 * this is also semantically correct,
8286 * since this object is temporary, and
8287 * therefore a copy of the object is
8288 * as good as the object itself. (This
8289 * is not true for permanent objects,
8290 * since the pager needs to see changes,
8291 * which won't happen if the changes
8292 * are made to a copy.)
8293 *
8294 * The third case is when the object
8295 * to be shared has parts sticking
8296 * outside of the entry we're working
8297 * with, and thus may in the future
8298 * be subject to a symmetrical copy.
8299 * (This is a preemptive version of
8300 * case 2.)
8301 */
8302 vm_object_shadow(&old_entry->object.vm_object,
8303 &old_entry->offset,
8304 (vm_map_size_t) (old_entry->vme_end -
8305 old_entry->vme_start));
8306
8307 /*
8308 * If we're making a shadow for other than
8309 * copy on write reasons, then we have
8310 * to remove write permission.
8311 */
8312
8313 if (!old_entry->needs_copy &&
8314 (old_entry->protection & VM_PROT_WRITE)) {
8315 vm_prot_t prot;
8316
8317 prot = old_entry->protection & ~VM_PROT_WRITE;
8318
8319 if (override_nx(old_map, old_entry->alias) && prot)
8320 prot |= VM_PROT_EXECUTE;
8321
8322 if (old_map->mapped_in_other_pmaps) {
8323 vm_object_pmap_protect(
8324 old_entry->object.vm_object,
8325 old_entry->offset,
8326 (old_entry->vme_end -
8327 old_entry->vme_start),
8328 PMAP_NULL,
8329 old_entry->vme_start,
8330 prot);
8331 } else {
8332 pmap_protect(old_map->pmap,
8333 old_entry->vme_start,
8334 old_entry->vme_end,
8335 prot);
8336 }
8337 }
8338
8339 old_entry->needs_copy = FALSE;
8340 object = old_entry->object.vm_object;
8341 }
8342
8343
8344 /*
8345 * If object was using a symmetric copy strategy,
8346 * change its copy strategy to the default
8347 * asymmetric copy strategy, which is copy_delay
8348 * in the non-norma case and copy_call in the
8349 * norma case. Bump the reference count for the
8350 * new entry.
8351 */
8352
8353 if(old_entry->is_sub_map) {
8354 vm_map_lock(old_entry->object.sub_map);
8355 vm_map_reference(old_entry->object.sub_map);
8356 vm_map_unlock(old_entry->object.sub_map);
8357 } else {
8358 vm_object_lock(object);
8359 vm_object_reference_locked(object);
8360 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8361 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8362 }
8363 vm_object_unlock(object);
8364 }
8365
8366 /*
8367 * Clone the entry, using object ref from above.
8368 * Mark both entries as shared.
8369 */
8370
8371 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
8372 * map or descendants */
8373 vm_map_entry_copy(new_entry, old_entry);
8374 old_entry->is_shared = TRUE;
8375 new_entry->is_shared = TRUE;
8376
8377 /*
8378 * Insert the entry into the new map -- we
8379 * know we're inserting at the end of the new
8380 * map.
8381 */
8382
8383 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8384
8385 /*
8386 * Update the physical map
8387 */
8388
8389 if (old_entry->is_sub_map) {
8390 /* Bill Angell pmap support goes here */
8391 } else {
8392 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8393 old_entry->vme_end - old_entry->vme_start,
8394 old_entry->vme_start);
8395 }
8396 }
8397
8398 static boolean_t
8399 vm_map_fork_copy(
8400 vm_map_t old_map,
8401 vm_map_entry_t *old_entry_p,
8402 vm_map_t new_map)
8403 {
8404 vm_map_entry_t old_entry = *old_entry_p;
8405 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8406 vm_map_offset_t start = old_entry->vme_start;
8407 vm_map_copy_t copy;
8408 vm_map_entry_t last = vm_map_last_entry(new_map);
8409
8410 vm_map_unlock(old_map);
8411 /*
8412 * Use maxprot version of copyin because we
8413 * care about whether this memory can ever
8414 * be accessed, not just whether it's accessible
8415 * right now.
8416 */
8417 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8418 != KERN_SUCCESS) {
8419 /*
8420 * The map might have changed while it
8421 * was unlocked, check it again. Skip
8422 * any blank space or permanently
8423 * unreadable region.
8424 */
8425 vm_map_lock(old_map);
8426 if (!vm_map_lookup_entry(old_map, start, &last) ||
8427 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8428 last = last->vme_next;
8429 }
8430 *old_entry_p = last;
8431
8432 /*
8433 * XXX For some error returns, want to
8434 * XXX skip to the next element. Note
8435 * that INVALID_ADDRESS and
8436 * PROTECTION_FAILURE are handled above.
8437 */
8438
8439 return FALSE;
8440 }
8441
8442 /*
8443 * Insert the copy into the new map
8444 */
8445
8446 vm_map_copy_insert(new_map, last, copy);
8447
8448 /*
8449 * Pick up the traversal at the end of
8450 * the copied region.
8451 */
8452
8453 vm_map_lock(old_map);
8454 start += entry_size;
8455 if (! vm_map_lookup_entry(old_map, start, &last)) {
8456 last = last->vme_next;
8457 } else {
8458 if (last->vme_start == start) {
8459 /*
8460 * No need to clip here and we don't
8461 * want to cause any unnecessary
8462 * unnesting...
8463 */
8464 } else {
8465 vm_map_clip_start(old_map, last, start);
8466 }
8467 }
8468 *old_entry_p = last;
8469
8470 return TRUE;
8471 }
8472
8473 /*
8474 * vm_map_fork:
8475 *
8476 * Create and return a new map based on the old
8477 * map, according to the inheritance values on the
8478 * regions in that map.
8479 *
8480 * The source map must not be locked.
8481 */
8482 vm_map_t
8483 vm_map_fork(
8484 ledger_t ledger,
8485 vm_map_t old_map)
8486 {
8487 pmap_t new_pmap;
8488 vm_map_t new_map;
8489 vm_map_entry_t old_entry;
8490 vm_map_size_t new_size = 0, entry_size;
8491 vm_map_entry_t new_entry;
8492 boolean_t src_needs_copy;
8493 boolean_t new_entry_needs_copy;
8494
8495 new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
8496 #if defined(__i386__) || defined(__x86_64__)
8497 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8498 #else
8499 #error Unknown architecture.
8500 #endif
8501 );
8502 #if defined(__i386__)
8503 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8504 pmap_set_4GB_pagezero(new_pmap);
8505 #endif
8506
8507 vm_map_reference_swap(old_map);
8508 vm_map_lock(old_map);
8509
8510 new_map = vm_map_create(new_pmap,
8511 old_map->min_offset,
8512 old_map->max_offset,
8513 old_map->hdr.entries_pageable);
8514 for (
8515 old_entry = vm_map_first_entry(old_map);
8516 old_entry != vm_map_to_entry(old_map);
8517 ) {
8518
8519 entry_size = old_entry->vme_end - old_entry->vme_start;
8520
8521 switch (old_entry->inheritance) {
8522 case VM_INHERIT_NONE:
8523 break;
8524
8525 case VM_INHERIT_SHARE:
8526 vm_map_fork_share(old_map, old_entry, new_map);
8527 new_size += entry_size;
8528 break;
8529
8530 case VM_INHERIT_COPY:
8531
8532 /*
8533 * Inline the copy_quickly case;
8534 * upon failure, fall back on call
8535 * to vm_map_fork_copy.
8536 */
8537
8538 if(old_entry->is_sub_map)
8539 break;
8540 if ((old_entry->wired_count != 0) ||
8541 ((old_entry->object.vm_object != NULL) &&
8542 (old_entry->object.vm_object->true_share))) {
8543 goto slow_vm_map_fork_copy;
8544 }
8545
8546 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
8547 vm_map_entry_copy(new_entry, old_entry);
8548 /* clear address space specifics */
8549 new_entry->use_pmap = FALSE;
8550
8551 if (! vm_object_copy_quickly(
8552 &new_entry->object.vm_object,
8553 old_entry->offset,
8554 (old_entry->vme_end -
8555 old_entry->vme_start),
8556 &src_needs_copy,
8557 &new_entry_needs_copy)) {
8558 vm_map_entry_dispose(new_map, new_entry);
8559 goto slow_vm_map_fork_copy;
8560 }
8561
8562 /*
8563 * Handle copy-on-write obligations
8564 */
8565
8566 if (src_needs_copy && !old_entry->needs_copy) {
8567 vm_prot_t prot;
8568
8569 prot = old_entry->protection & ~VM_PROT_WRITE;
8570
8571 if (override_nx(old_map, old_entry->alias) && prot)
8572 prot |= VM_PROT_EXECUTE;
8573
8574 vm_object_pmap_protect(
8575 old_entry->object.vm_object,
8576 old_entry->offset,
8577 (old_entry->vme_end -
8578 old_entry->vme_start),
8579 ((old_entry->is_shared
8580 || old_map->mapped_in_other_pmaps)
8581 ? PMAP_NULL :
8582 old_map->pmap),
8583 old_entry->vme_start,
8584 prot);
8585
8586 old_entry->needs_copy = TRUE;
8587 }
8588 new_entry->needs_copy = new_entry_needs_copy;
8589
8590 /*
8591 * Insert the entry at the end
8592 * of the map.
8593 */
8594
8595 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8596 new_entry);
8597 new_size += entry_size;
8598 break;
8599
8600 slow_vm_map_fork_copy:
8601 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8602 new_size += entry_size;
8603 }
8604 continue;
8605 }
8606 old_entry = old_entry->vme_next;
8607 }
8608
8609 new_map->size = new_size;
8610 vm_map_unlock(old_map);
8611 vm_map_deallocate(old_map);
8612
8613 return(new_map);
8614 }
8615
8616 /*
8617 * vm_map_exec:
8618 *
8619 * Setup the "new_map" with the proper execution environment according
8620 * to the type of executable (platform, 64bit, chroot environment).
8621 * Map the comm page and shared region, etc...
8622 */
8623 kern_return_t
8624 vm_map_exec(
8625 vm_map_t new_map,
8626 task_t task,
8627 void *fsroot,
8628 cpu_type_t cpu)
8629 {
8630 SHARED_REGION_TRACE_DEBUG(
8631 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8632 current_task(), new_map, task, fsroot, cpu));
8633 (void) vm_commpage_enter(new_map, task);
8634 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8635 SHARED_REGION_TRACE_DEBUG(
8636 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8637 current_task(), new_map, task, fsroot, cpu));
8638 return KERN_SUCCESS;
8639 }
8640
8641 /*
8642 * vm_map_lookup_locked:
8643 *
8644 * Finds the VM object, offset, and
8645 * protection for a given virtual address in the
8646 * specified map, assuming a page fault of the
8647 * type specified.
8648 *
8649 * Returns the (object, offset, protection) for
8650 * this address, whether it is wired down, and whether
8651 * this map has the only reference to the data in question.
8652 * In order to later verify this lookup, a "version"
8653 * is returned.
8654 *
8655 * The map MUST be locked by the caller and WILL be
8656 * locked on exit. In order to guarantee the
8657 * existence of the returned object, it is returned
8658 * locked.
8659 *
8660 * If a lookup is requested with "write protection"
8661 * specified, the map may be changed to perform virtual
8662 * copying operations, although the data referenced will
8663 * remain the same.
8664 */
8665 kern_return_t
8666 vm_map_lookup_locked(
8667 vm_map_t *var_map, /* IN/OUT */
8668 vm_map_offset_t vaddr,
8669 vm_prot_t fault_type,
8670 int object_lock_type,
8671 vm_map_version_t *out_version, /* OUT */
8672 vm_object_t *object, /* OUT */
8673 vm_object_offset_t *offset, /* OUT */
8674 vm_prot_t *out_prot, /* OUT */
8675 boolean_t *wired, /* OUT */
8676 vm_object_fault_info_t fault_info, /* OUT */
8677 vm_map_t *real_map)
8678 {
8679 vm_map_entry_t entry;
8680 register vm_map_t map = *var_map;
8681 vm_map_t old_map = *var_map;
8682 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8683 vm_map_offset_t cow_parent_vaddr = 0;
8684 vm_map_offset_t old_start = 0;
8685 vm_map_offset_t old_end = 0;
8686 register vm_prot_t prot;
8687 boolean_t mask_protections;
8688 vm_prot_t original_fault_type;
8689
8690 /*
8691 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8692 * as a mask against the mapping's actual protections, not as an
8693 * absolute value.
8694 */
8695 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8696 fault_type &= ~VM_PROT_IS_MASK;
8697 original_fault_type = fault_type;
8698
8699 *real_map = map;
8700
8701 RetryLookup:
8702 fault_type = original_fault_type;
8703
8704 /*
8705 * If the map has an interesting hint, try it before calling
8706 * full blown lookup routine.
8707 */
8708 entry = map->hint;
8709
8710 if ((entry == vm_map_to_entry(map)) ||
8711 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8712 vm_map_entry_t tmp_entry;
8713
8714 /*
8715 * Entry was either not a valid hint, or the vaddr
8716 * was not contained in the entry, so do a full lookup.
8717 */
8718 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8719 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8720 vm_map_unlock(cow_sub_map_parent);
8721 if((*real_map != map)
8722 && (*real_map != cow_sub_map_parent))
8723 vm_map_unlock(*real_map);
8724 return KERN_INVALID_ADDRESS;
8725 }
8726
8727 entry = tmp_entry;
8728 }
8729 if(map == old_map) {
8730 old_start = entry->vme_start;
8731 old_end = entry->vme_end;
8732 }
8733
8734 /*
8735 * Handle submaps. Drop lock on upper map, submap is
8736 * returned locked.
8737 */
8738
8739 submap_recurse:
8740 if (entry->is_sub_map) {
8741 vm_map_offset_t local_vaddr;
8742 vm_map_offset_t end_delta;
8743 vm_map_offset_t start_delta;
8744 vm_map_entry_t submap_entry;
8745 boolean_t mapped_needs_copy=FALSE;
8746
8747 local_vaddr = vaddr;
8748
8749 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8750 /* if real_map equals map we unlock below */
8751 if ((*real_map != map) &&
8752 (*real_map != cow_sub_map_parent))
8753 vm_map_unlock(*real_map);
8754 *real_map = entry->object.sub_map;
8755 }
8756
8757 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8758 if (!mapped_needs_copy) {
8759 if (vm_map_lock_read_to_write(map)) {
8760 vm_map_lock_read(map);
8761 /* XXX FBDP: entry still valid ? */
8762 if(*real_map == entry->object.sub_map)
8763 *real_map = map;
8764 goto RetryLookup;
8765 }
8766 vm_map_lock_read(entry->object.sub_map);
8767 cow_sub_map_parent = map;
8768 /* reset base to map before cow object */
8769 /* this is the map which will accept */
8770 /* the new cow object */
8771 old_start = entry->vme_start;
8772 old_end = entry->vme_end;
8773 cow_parent_vaddr = vaddr;
8774 mapped_needs_copy = TRUE;
8775 } else {
8776 vm_map_lock_read(entry->object.sub_map);
8777 if((cow_sub_map_parent != map) &&
8778 (*real_map != map))
8779 vm_map_unlock(map);
8780 }
8781 } else {
8782 vm_map_lock_read(entry->object.sub_map);
8783 /* leave map locked if it is a target */
8784 /* cow sub_map above otherwise, just */
8785 /* follow the maps down to the object */
8786 /* here we unlock knowing we are not */
8787 /* revisiting the map. */
8788 if((*real_map != map) && (map != cow_sub_map_parent))
8789 vm_map_unlock_read(map);
8790 }
8791
8792 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8793 *var_map = map = entry->object.sub_map;
8794
8795 /* calculate the offset in the submap for vaddr */
8796 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8797
8798 RetrySubMap:
8799 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8800 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8801 vm_map_unlock(cow_sub_map_parent);
8802 }
8803 if((*real_map != map)
8804 && (*real_map != cow_sub_map_parent)) {
8805 vm_map_unlock(*real_map);
8806 }
8807 *real_map = map;
8808 return KERN_INVALID_ADDRESS;
8809 }
8810
8811 /* find the attenuated shadow of the underlying object */
8812 /* on our target map */
8813
8814 /* in english the submap object may extend beyond the */
8815 /* region mapped by the entry or, may only fill a portion */
8816 /* of it. For our purposes, we only care if the object */
8817 /* doesn't fill. In this case the area which will */
8818 /* ultimately be clipped in the top map will only need */
8819 /* to be as big as the portion of the underlying entry */
8820 /* which is mapped */
8821 start_delta = submap_entry->vme_start > entry->offset ?
8822 submap_entry->vme_start - entry->offset : 0;
8823
8824 end_delta =
8825 (entry->offset + start_delta + (old_end - old_start)) <=
8826 submap_entry->vme_end ?
8827 0 : (entry->offset +
8828 (old_end - old_start))
8829 - submap_entry->vme_end;
8830
8831 old_start += start_delta;
8832 old_end -= end_delta;
8833
8834 if(submap_entry->is_sub_map) {
8835 entry = submap_entry;
8836 vaddr = local_vaddr;
8837 goto submap_recurse;
8838 }
8839
8840 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8841
8842 vm_object_t sub_object, copy_object;
8843 vm_object_offset_t copy_offset;
8844 vm_map_offset_t local_start;
8845 vm_map_offset_t local_end;
8846 boolean_t copied_slowly = FALSE;
8847
8848 if (vm_map_lock_read_to_write(map)) {
8849 vm_map_lock_read(map);
8850 old_start -= start_delta;
8851 old_end += end_delta;
8852 goto RetrySubMap;
8853 }
8854
8855
8856 sub_object = submap_entry->object.vm_object;
8857 if (sub_object == VM_OBJECT_NULL) {
8858 sub_object =
8859 vm_object_allocate(
8860 (vm_map_size_t)
8861 (submap_entry->vme_end -
8862 submap_entry->vme_start));
8863 submap_entry->object.vm_object = sub_object;
8864 submap_entry->offset = 0;
8865 }
8866 local_start = local_vaddr -
8867 (cow_parent_vaddr - old_start);
8868 local_end = local_vaddr +
8869 (old_end - cow_parent_vaddr);
8870 vm_map_clip_start(map, submap_entry, local_start);
8871 vm_map_clip_end(map, submap_entry, local_end);
8872 /* unnesting was done in vm_map_clip_start/end() */
8873 assert(!submap_entry->use_pmap);
8874
8875 /* This is the COW case, lets connect */
8876 /* an entry in our space to the underlying */
8877 /* object in the submap, bypassing the */
8878 /* submap. */
8879
8880
8881 if(submap_entry->wired_count != 0 ||
8882 (sub_object->copy_strategy ==
8883 MEMORY_OBJECT_COPY_NONE)) {
8884 vm_object_lock(sub_object);
8885 vm_object_copy_slowly(sub_object,
8886 submap_entry->offset,
8887 (submap_entry->vme_end -
8888 submap_entry->vme_start),
8889 FALSE,
8890 &copy_object);
8891 copied_slowly = TRUE;
8892 } else {
8893
8894 /* set up shadow object */
8895 copy_object = sub_object;
8896 vm_object_reference(copy_object);
8897 sub_object->shadowed = TRUE;
8898 submap_entry->needs_copy = TRUE;
8899
8900 prot = submap_entry->protection & ~VM_PROT_WRITE;
8901
8902 if (override_nx(old_map, submap_entry->alias) && prot)
8903 prot |= VM_PROT_EXECUTE;
8904
8905 vm_object_pmap_protect(
8906 sub_object,
8907 submap_entry->offset,
8908 submap_entry->vme_end -
8909 submap_entry->vme_start,
8910 (submap_entry->is_shared
8911 || map->mapped_in_other_pmaps) ?
8912 PMAP_NULL : map->pmap,
8913 submap_entry->vme_start,
8914 prot);
8915 }
8916
8917 /*
8918 * Adjust the fault offset to the submap entry.
8919 */
8920 copy_offset = (local_vaddr -
8921 submap_entry->vme_start +
8922 submap_entry->offset);
8923
8924 /* This works diffently than the */
8925 /* normal submap case. We go back */
8926 /* to the parent of the cow map and*/
8927 /* clip out the target portion of */
8928 /* the sub_map, substituting the */
8929 /* new copy object, */
8930
8931 vm_map_unlock(map);
8932 local_start = old_start;
8933 local_end = old_end;
8934 map = cow_sub_map_parent;
8935 *var_map = cow_sub_map_parent;
8936 vaddr = cow_parent_vaddr;
8937 cow_sub_map_parent = NULL;
8938
8939 if(!vm_map_lookup_entry(map,
8940 vaddr, &entry)) {
8941 vm_object_deallocate(
8942 copy_object);
8943 vm_map_lock_write_to_read(map);
8944 return KERN_INVALID_ADDRESS;
8945 }
8946
8947 /* clip out the portion of space */
8948 /* mapped by the sub map which */
8949 /* corresponds to the underlying */
8950 /* object */
8951
8952 /*
8953 * Clip (and unnest) the smallest nested chunk
8954 * possible around the faulting address...
8955 */
8956 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8957 local_end = local_start + pmap_nesting_size_min;
8958 /*
8959 * ... but don't go beyond the "old_start" to "old_end"
8960 * range, to avoid spanning over another VM region
8961 * with a possibly different VM object and/or offset.
8962 */
8963 if (local_start < old_start) {
8964 local_start = old_start;
8965 }
8966 if (local_end > old_end) {
8967 local_end = old_end;
8968 }
8969 /*
8970 * Adjust copy_offset to the start of the range.
8971 */
8972 copy_offset -= (vaddr - local_start);
8973
8974 vm_map_clip_start(map, entry, local_start);
8975 vm_map_clip_end(map, entry, local_end);
8976 /* unnesting was done in vm_map_clip_start/end() */
8977 assert(!entry->use_pmap);
8978
8979 /* substitute copy object for */
8980 /* shared map entry */
8981 vm_map_deallocate(entry->object.sub_map);
8982 entry->is_sub_map = FALSE;
8983 entry->object.vm_object = copy_object;
8984
8985 /* propagate the submap entry's protections */
8986 entry->protection |= submap_entry->protection;
8987 entry->max_protection |= submap_entry->max_protection;
8988
8989 if(copied_slowly) {
8990 entry->offset = local_start - old_start;
8991 entry->needs_copy = FALSE;
8992 entry->is_shared = FALSE;
8993 } else {
8994 entry->offset = copy_offset;
8995 entry->needs_copy = TRUE;
8996 if(entry->inheritance == VM_INHERIT_SHARE)
8997 entry->inheritance = VM_INHERIT_COPY;
8998 if (map != old_map)
8999 entry->is_shared = TRUE;
9000 }
9001 if(entry->inheritance == VM_INHERIT_SHARE)
9002 entry->inheritance = VM_INHERIT_COPY;
9003
9004 vm_map_lock_write_to_read(map);
9005 } else {
9006 if((cow_sub_map_parent)
9007 && (cow_sub_map_parent != *real_map)
9008 && (cow_sub_map_parent != map)) {
9009 vm_map_unlock(cow_sub_map_parent);
9010 }
9011 entry = submap_entry;
9012 vaddr = local_vaddr;
9013 }
9014 }
9015
9016 /*
9017 * Check whether this task is allowed to have
9018 * this page.
9019 */
9020
9021 prot = entry->protection;
9022
9023 if (override_nx(old_map, entry->alias) && prot) {
9024 /*
9025 * HACK -- if not a stack, then allow execution
9026 */
9027 prot |= VM_PROT_EXECUTE;
9028 }
9029
9030 if (mask_protections) {
9031 fault_type &= prot;
9032 if (fault_type == VM_PROT_NONE) {
9033 goto protection_failure;
9034 }
9035 }
9036 if ((fault_type & (prot)) != fault_type) {
9037 protection_failure:
9038 if (*real_map != map) {
9039 vm_map_unlock(*real_map);
9040 }
9041 *real_map = map;
9042
9043 if ((fault_type & VM_PROT_EXECUTE) && prot)
9044 log_stack_execution_failure((addr64_t)vaddr, prot);
9045
9046 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
9047 return KERN_PROTECTION_FAILURE;
9048 }
9049
9050 /*
9051 * If this page is not pageable, we have to get
9052 * it for all possible accesses.
9053 */
9054
9055 *wired = (entry->wired_count != 0);
9056 if (*wired)
9057 fault_type = prot;
9058
9059 /*
9060 * If the entry was copy-on-write, we either ...
9061 */
9062
9063 if (entry->needs_copy) {
9064 /*
9065 * If we want to write the page, we may as well
9066 * handle that now since we've got the map locked.
9067 *
9068 * If we don't need to write the page, we just
9069 * demote the permissions allowed.
9070 */
9071
9072 if ((fault_type & VM_PROT_WRITE) || *wired) {
9073 /*
9074 * Make a new object, and place it in the
9075 * object chain. Note that no new references
9076 * have appeared -- one just moved from the
9077 * map to the new object.
9078 */
9079
9080 if (vm_map_lock_read_to_write(map)) {
9081 vm_map_lock_read(map);
9082 goto RetryLookup;
9083 }
9084 vm_object_shadow(&entry->object.vm_object,
9085 &entry->offset,
9086 (vm_map_size_t) (entry->vme_end -
9087 entry->vme_start));
9088
9089 entry->object.vm_object->shadowed = TRUE;
9090 entry->needs_copy = FALSE;
9091 vm_map_lock_write_to_read(map);
9092 }
9093 else {
9094 /*
9095 * We're attempting to read a copy-on-write
9096 * page -- don't allow writes.
9097 */
9098
9099 prot &= (~VM_PROT_WRITE);
9100 }
9101 }
9102
9103 /*
9104 * Create an object if necessary.
9105 */
9106 if (entry->object.vm_object == VM_OBJECT_NULL) {
9107
9108 if (vm_map_lock_read_to_write(map)) {
9109 vm_map_lock_read(map);
9110 goto RetryLookup;
9111 }
9112
9113 entry->object.vm_object = vm_object_allocate(
9114 (vm_map_size_t)(entry->vme_end - entry->vme_start));
9115 entry->offset = 0;
9116 vm_map_lock_write_to_read(map);
9117 }
9118
9119 /*
9120 * Return the object/offset from this entry. If the entry
9121 * was copy-on-write or empty, it has been fixed up. Also
9122 * return the protection.
9123 */
9124
9125 *offset = (vaddr - entry->vme_start) + entry->offset;
9126 *object = entry->object.vm_object;
9127 *out_prot = prot;
9128
9129 if (fault_info) {
9130 fault_info->interruptible = THREAD_UNINT; /* for now... */
9131 /* ... the caller will change "interruptible" if needed */
9132 fault_info->cluster_size = 0;
9133 fault_info->user_tag = entry->alias;
9134 fault_info->behavior = entry->behavior;
9135 fault_info->lo_offset = entry->offset;
9136 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9137 fault_info->no_cache = entry->no_cache;
9138 fault_info->stealth = FALSE;
9139 fault_info->io_sync = FALSE;
9140 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9141 fault_info->mark_zf_absent = FALSE;
9142 fault_info->batch_pmap_op = FALSE;
9143 }
9144
9145 /*
9146 * Lock the object to prevent it from disappearing
9147 */
9148 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9149 vm_object_lock(*object);
9150 else
9151 vm_object_lock_shared(*object);
9152
9153 /*
9154 * Save the version number
9155 */
9156
9157 out_version->main_timestamp = map->timestamp;
9158
9159 return KERN_SUCCESS;
9160 }
9161
9162
9163 /*
9164 * vm_map_verify:
9165 *
9166 * Verifies that the map in question has not changed
9167 * since the given version. If successful, the map
9168 * will not change until vm_map_verify_done() is called.
9169 */
9170 boolean_t
9171 vm_map_verify(
9172 register vm_map_t map,
9173 register vm_map_version_t *version) /* REF */
9174 {
9175 boolean_t result;
9176
9177 vm_map_lock_read(map);
9178 result = (map->timestamp == version->main_timestamp);
9179
9180 if (!result)
9181 vm_map_unlock_read(map);
9182
9183 return(result);
9184 }
9185
9186 /*
9187 * vm_map_verify_done:
9188 *
9189 * Releases locks acquired by a vm_map_verify.
9190 *
9191 * This is now a macro in vm/vm_map.h. It does a
9192 * vm_map_unlock_read on the map.
9193 */
9194
9195
9196 /*
9197 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9198 * Goes away after regular vm_region_recurse function migrates to
9199 * 64 bits
9200 * vm_region_recurse: A form of vm_region which follows the
9201 * submaps in a target map
9202 *
9203 */
9204
9205 kern_return_t
9206 vm_map_region_recurse_64(
9207 vm_map_t map,
9208 vm_map_offset_t *address, /* IN/OUT */
9209 vm_map_size_t *size, /* OUT */
9210 natural_t *nesting_depth, /* IN/OUT */
9211 vm_region_submap_info_64_t submap_info, /* IN/OUT */
9212 mach_msg_type_number_t *count) /* IN/OUT */
9213 {
9214 vm_region_extended_info_data_t extended;
9215 vm_map_entry_t tmp_entry;
9216 vm_map_offset_t user_address;
9217 unsigned int user_max_depth;
9218
9219 /*
9220 * "curr_entry" is the VM map entry preceding or including the
9221 * address we're looking for.
9222 * "curr_map" is the map or sub-map containing "curr_entry".
9223 * "curr_address" is the equivalent of the top map's "user_address"
9224 * in the current map.
9225 * "curr_offset" is the cumulated offset of "curr_map" in the
9226 * target task's address space.
9227 * "curr_depth" is the depth of "curr_map" in the chain of
9228 * sub-maps.
9229 *
9230 * "curr_max_below" and "curr_max_above" limit the range (around
9231 * "curr_address") we should take into account in the current (sub)map.
9232 * They limit the range to what's visible through the map entries
9233 * we've traversed from the top map to the current map.
9234
9235 */
9236 vm_map_entry_t curr_entry;
9237 vm_map_address_t curr_address;
9238 vm_map_offset_t curr_offset;
9239 vm_map_t curr_map;
9240 unsigned int curr_depth;
9241 vm_map_offset_t curr_max_below, curr_max_above;
9242 vm_map_offset_t curr_skip;
9243
9244 /*
9245 * "next_" is the same as "curr_" but for the VM region immediately
9246 * after the address we're looking for. We need to keep track of this
9247 * too because we want to return info about that region if the
9248 * address we're looking for is not mapped.
9249 */
9250 vm_map_entry_t next_entry;
9251 vm_map_offset_t next_offset;
9252 vm_map_offset_t next_address;
9253 vm_map_t next_map;
9254 unsigned int next_depth;
9255 vm_map_offset_t next_max_below, next_max_above;
9256 vm_map_offset_t next_skip;
9257
9258 boolean_t look_for_pages;
9259 vm_region_submap_short_info_64_t short_info;
9260
9261 if (map == VM_MAP_NULL) {
9262 /* no address space to work on */
9263 return KERN_INVALID_ARGUMENT;
9264 }
9265
9266 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9267 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9268 /*
9269 * "info" structure is not big enough and
9270 * would overflow
9271 */
9272 return KERN_INVALID_ARGUMENT;
9273 } else {
9274 look_for_pages = FALSE;
9275 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9276 short_info = (vm_region_submap_short_info_64_t) submap_info;
9277 submap_info = NULL;
9278 }
9279 } else {
9280 look_for_pages = TRUE;
9281 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9282 short_info = NULL;
9283 }
9284
9285
9286 user_address = *address;
9287 user_max_depth = *nesting_depth;
9288
9289 curr_entry = NULL;
9290 curr_map = map;
9291 curr_address = user_address;
9292 curr_offset = 0;
9293 curr_skip = 0;
9294 curr_depth = 0;
9295 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9296 curr_max_below = curr_address;
9297
9298 next_entry = NULL;
9299 next_map = NULL;
9300 next_address = 0;
9301 next_offset = 0;
9302 next_skip = 0;
9303 next_depth = 0;
9304 next_max_above = (vm_map_offset_t) -1;
9305 next_max_below = (vm_map_offset_t) -1;
9306
9307 if (not_in_kdp) {
9308 vm_map_lock_read(curr_map);
9309 }
9310
9311 for (;;) {
9312 if (vm_map_lookup_entry(curr_map,
9313 curr_address,
9314 &tmp_entry)) {
9315 /* tmp_entry contains the address we're looking for */
9316 curr_entry = tmp_entry;
9317 } else {
9318 vm_map_offset_t skip;
9319 /*
9320 * The address is not mapped. "tmp_entry" is the
9321 * map entry preceding the address. We want the next
9322 * one, if it exists.
9323 */
9324 curr_entry = tmp_entry->vme_next;
9325
9326 if (curr_entry == vm_map_to_entry(curr_map) ||
9327 (curr_entry->vme_start >=
9328 curr_address + curr_max_above)) {
9329 /* no next entry at this level: stop looking */
9330 if (not_in_kdp) {
9331 vm_map_unlock_read(curr_map);
9332 }
9333 curr_entry = NULL;
9334 curr_map = NULL;
9335 curr_offset = 0;
9336 curr_depth = 0;
9337 curr_max_above = 0;
9338 curr_max_below = 0;
9339 break;
9340 }
9341
9342 /* adjust current address and offset */
9343 skip = curr_entry->vme_start - curr_address;
9344 curr_address = curr_entry->vme_start;
9345 curr_skip = skip;
9346 curr_offset += skip;
9347 curr_max_above -= skip;
9348 curr_max_below = 0;
9349 }
9350
9351 /*
9352 * Is the next entry at this level closer to the address (or
9353 * deeper in the submap chain) than the one we had
9354 * so far ?
9355 */
9356 tmp_entry = curr_entry->vme_next;
9357 if (tmp_entry == vm_map_to_entry(curr_map)) {
9358 /* no next entry at this level */
9359 } else if (tmp_entry->vme_start >=
9360 curr_address + curr_max_above) {
9361 /*
9362 * tmp_entry is beyond the scope of what we mapped of
9363 * this submap in the upper level: ignore it.
9364 */
9365 } else if ((next_entry == NULL) ||
9366 (tmp_entry->vme_start + curr_offset <=
9367 next_entry->vme_start + next_offset)) {
9368 /*
9369 * We didn't have a "next_entry" or this one is
9370 * closer to the address we're looking for:
9371 * use this "tmp_entry" as the new "next_entry".
9372 */
9373 if (next_entry != NULL) {
9374 /* unlock the last "next_map" */
9375 if (next_map != curr_map && not_in_kdp) {
9376 vm_map_unlock_read(next_map);
9377 }
9378 }
9379 next_entry = tmp_entry;
9380 next_map = curr_map;
9381 next_depth = curr_depth;
9382 next_address = next_entry->vme_start;
9383 next_skip = curr_skip;
9384 next_offset = curr_offset;
9385 next_offset += (next_address - curr_address);
9386 next_max_above = MIN(next_max_above, curr_max_above);
9387 next_max_above = MIN(next_max_above,
9388 next_entry->vme_end - next_address);
9389 next_max_below = MIN(next_max_below, curr_max_below);
9390 next_max_below = MIN(next_max_below,
9391 next_address - next_entry->vme_start);
9392 }
9393
9394 /*
9395 * "curr_max_{above,below}" allow us to keep track of the
9396 * portion of the submap that is actually mapped at this level:
9397 * the rest of that submap is irrelevant to us, since it's not
9398 * mapped here.
9399 * The relevant portion of the map starts at
9400 * "curr_entry->offset" up to the size of "curr_entry".
9401 */
9402 curr_max_above = MIN(curr_max_above,
9403 curr_entry->vme_end - curr_address);
9404 curr_max_below = MIN(curr_max_below,
9405 curr_address - curr_entry->vme_start);
9406
9407 if (!curr_entry->is_sub_map ||
9408 curr_depth >= user_max_depth) {
9409 /*
9410 * We hit a leaf map or we reached the maximum depth
9411 * we could, so stop looking. Keep the current map
9412 * locked.
9413 */
9414 break;
9415 }
9416
9417 /*
9418 * Get down to the next submap level.
9419 */
9420
9421 /*
9422 * Lock the next level and unlock the current level,
9423 * unless we need to keep it locked to access the "next_entry"
9424 * later.
9425 */
9426 if (not_in_kdp) {
9427 vm_map_lock_read(curr_entry->object.sub_map);
9428 }
9429 if (curr_map == next_map) {
9430 /* keep "next_map" locked in case we need it */
9431 } else {
9432 /* release this map */
9433 if (not_in_kdp)
9434 vm_map_unlock_read(curr_map);
9435 }
9436
9437 /*
9438 * Adjust the offset. "curr_entry" maps the submap
9439 * at relative address "curr_entry->vme_start" in the
9440 * curr_map but skips the first "curr_entry->offset"
9441 * bytes of the submap.
9442 * "curr_offset" always represents the offset of a virtual
9443 * address in the curr_map relative to the absolute address
9444 * space (i.e. the top-level VM map).
9445 */
9446 curr_offset +=
9447 (curr_entry->offset - curr_entry->vme_start);
9448 curr_address = user_address + curr_offset;
9449 /* switch to the submap */
9450 curr_map = curr_entry->object.sub_map;
9451 curr_depth++;
9452 curr_entry = NULL;
9453 }
9454
9455 if (curr_entry == NULL) {
9456 /* no VM region contains the address... */
9457 if (next_entry == NULL) {
9458 /* ... and no VM region follows it either */
9459 return KERN_INVALID_ADDRESS;
9460 }
9461 /* ... gather info about the next VM region */
9462 curr_entry = next_entry;
9463 curr_map = next_map; /* still locked ... */
9464 curr_address = next_address;
9465 curr_skip = next_skip;
9466 curr_offset = next_offset;
9467 curr_depth = next_depth;
9468 curr_max_above = next_max_above;
9469 curr_max_below = next_max_below;
9470 if (curr_map == map) {
9471 user_address = curr_address;
9472 }
9473 } else {
9474 /* we won't need "next_entry" after all */
9475 if (next_entry != NULL) {
9476 /* release "next_map" */
9477 if (next_map != curr_map && not_in_kdp) {
9478 vm_map_unlock_read(next_map);
9479 }
9480 }
9481 }
9482 next_entry = NULL;
9483 next_map = NULL;
9484 next_offset = 0;
9485 next_skip = 0;
9486 next_depth = 0;
9487 next_max_below = -1;
9488 next_max_above = -1;
9489
9490 *nesting_depth = curr_depth;
9491 *size = curr_max_above + curr_max_below;
9492 *address = user_address + curr_skip - curr_max_below;
9493
9494 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9495 // so probably should be a real 32b ID vs. ptr.
9496 // Current users just check for equality
9497 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9498
9499 if (look_for_pages) {
9500 submap_info->user_tag = curr_entry->alias;
9501 submap_info->offset = curr_entry->offset;
9502 submap_info->protection = curr_entry->protection;
9503 submap_info->inheritance = curr_entry->inheritance;
9504 submap_info->max_protection = curr_entry->max_protection;
9505 submap_info->behavior = curr_entry->behavior;
9506 submap_info->user_wired_count = curr_entry->user_wired_count;
9507 submap_info->is_submap = curr_entry->is_sub_map;
9508 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9509 } else {
9510 short_info->user_tag = curr_entry->alias;
9511 short_info->offset = curr_entry->offset;
9512 short_info->protection = curr_entry->protection;
9513 short_info->inheritance = curr_entry->inheritance;
9514 short_info->max_protection = curr_entry->max_protection;
9515 short_info->behavior = curr_entry->behavior;
9516 short_info->user_wired_count = curr_entry->user_wired_count;
9517 short_info->is_submap = curr_entry->is_sub_map;
9518 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9519 }
9520
9521 extended.pages_resident = 0;
9522 extended.pages_swapped_out = 0;
9523 extended.pages_shared_now_private = 0;
9524 extended.pages_dirtied = 0;
9525 extended.external_pager = 0;
9526 extended.shadow_depth = 0;
9527
9528 if (not_in_kdp) {
9529 if (!curr_entry->is_sub_map) {
9530 vm_map_offset_t range_start, range_end;
9531 range_start = MAX((curr_address - curr_max_below),
9532 curr_entry->vme_start);
9533 range_end = MIN((curr_address + curr_max_above),
9534 curr_entry->vme_end);
9535 vm_map_region_walk(curr_map,
9536 range_start,
9537 curr_entry,
9538 (curr_entry->offset +
9539 (range_start -
9540 curr_entry->vme_start)),
9541 range_end - range_start,
9542 &extended,
9543 look_for_pages);
9544 if (extended.external_pager &&
9545 extended.ref_count == 2 &&
9546 extended.share_mode == SM_SHARED) {
9547 extended.share_mode = SM_PRIVATE;
9548 }
9549 } else {
9550 if (curr_entry->use_pmap) {
9551 extended.share_mode = SM_TRUESHARED;
9552 } else {
9553 extended.share_mode = SM_PRIVATE;
9554 }
9555 extended.ref_count =
9556 curr_entry->object.sub_map->ref_count;
9557 }
9558 }
9559
9560 if (look_for_pages) {
9561 submap_info->pages_resident = extended.pages_resident;
9562 submap_info->pages_swapped_out = extended.pages_swapped_out;
9563 submap_info->pages_shared_now_private =
9564 extended.pages_shared_now_private;
9565 submap_info->pages_dirtied = extended.pages_dirtied;
9566 submap_info->external_pager = extended.external_pager;
9567 submap_info->shadow_depth = extended.shadow_depth;
9568 submap_info->share_mode = extended.share_mode;
9569 submap_info->ref_count = extended.ref_count;
9570 } else {
9571 short_info->external_pager = extended.external_pager;
9572 short_info->shadow_depth = extended.shadow_depth;
9573 short_info->share_mode = extended.share_mode;
9574 short_info->ref_count = extended.ref_count;
9575 }
9576
9577 if (not_in_kdp) {
9578 vm_map_unlock_read(curr_map);
9579 }
9580
9581 return KERN_SUCCESS;
9582 }
9583
9584 /*
9585 * vm_region:
9586 *
9587 * User call to obtain information about a region in
9588 * a task's address map. Currently, only one flavor is
9589 * supported.
9590 *
9591 * XXX The reserved and behavior fields cannot be filled
9592 * in until the vm merge from the IK is completed, and
9593 * vm_reserve is implemented.
9594 */
9595
9596 kern_return_t
9597 vm_map_region(
9598 vm_map_t map,
9599 vm_map_offset_t *address, /* IN/OUT */
9600 vm_map_size_t *size, /* OUT */
9601 vm_region_flavor_t flavor, /* IN */
9602 vm_region_info_t info, /* OUT */
9603 mach_msg_type_number_t *count, /* IN/OUT */
9604 mach_port_t *object_name) /* OUT */
9605 {
9606 vm_map_entry_t tmp_entry;
9607 vm_map_entry_t entry;
9608 vm_map_offset_t start;
9609
9610 if (map == VM_MAP_NULL)
9611 return(KERN_INVALID_ARGUMENT);
9612
9613 switch (flavor) {
9614
9615 case VM_REGION_BASIC_INFO:
9616 /* legacy for old 32-bit objects info */
9617 {
9618 vm_region_basic_info_t basic;
9619
9620 if (*count < VM_REGION_BASIC_INFO_COUNT)
9621 return(KERN_INVALID_ARGUMENT);
9622
9623 basic = (vm_region_basic_info_t) info;
9624 *count = VM_REGION_BASIC_INFO_COUNT;
9625
9626 vm_map_lock_read(map);
9627
9628 start = *address;
9629 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9630 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9631 vm_map_unlock_read(map);
9632 return(KERN_INVALID_ADDRESS);
9633 }
9634 } else {
9635 entry = tmp_entry;
9636 }
9637
9638 start = entry->vme_start;
9639
9640 basic->offset = (uint32_t)entry->offset;
9641 basic->protection = entry->protection;
9642 basic->inheritance = entry->inheritance;
9643 basic->max_protection = entry->max_protection;
9644 basic->behavior = entry->behavior;
9645 basic->user_wired_count = entry->user_wired_count;
9646 basic->reserved = entry->is_sub_map;
9647 *address = start;
9648 *size = (entry->vme_end - start);
9649
9650 if (object_name) *object_name = IP_NULL;
9651 if (entry->is_sub_map) {
9652 basic->shared = FALSE;
9653 } else {
9654 basic->shared = entry->is_shared;
9655 }
9656
9657 vm_map_unlock_read(map);
9658 return(KERN_SUCCESS);
9659 }
9660
9661 case VM_REGION_BASIC_INFO_64:
9662 {
9663 vm_region_basic_info_64_t basic;
9664
9665 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9666 return(KERN_INVALID_ARGUMENT);
9667
9668 basic = (vm_region_basic_info_64_t) info;
9669 *count = VM_REGION_BASIC_INFO_COUNT_64;
9670
9671 vm_map_lock_read(map);
9672
9673 start = *address;
9674 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9675 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9676 vm_map_unlock_read(map);
9677 return(KERN_INVALID_ADDRESS);
9678 }
9679 } else {
9680 entry = tmp_entry;
9681 }
9682
9683 start = entry->vme_start;
9684
9685 basic->offset = entry->offset;
9686 basic->protection = entry->protection;
9687 basic->inheritance = entry->inheritance;
9688 basic->max_protection = entry->max_protection;
9689 basic->behavior = entry->behavior;
9690 basic->user_wired_count = entry->user_wired_count;
9691 basic->reserved = entry->is_sub_map;
9692 *address = start;
9693 *size = (entry->vme_end - start);
9694
9695 if (object_name) *object_name = IP_NULL;
9696 if (entry->is_sub_map) {
9697 basic->shared = FALSE;
9698 } else {
9699 basic->shared = entry->is_shared;
9700 }
9701
9702 vm_map_unlock_read(map);
9703 return(KERN_SUCCESS);
9704 }
9705 case VM_REGION_EXTENDED_INFO:
9706 {
9707 vm_region_extended_info_t extended;
9708
9709 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9710 return(KERN_INVALID_ARGUMENT);
9711
9712 extended = (vm_region_extended_info_t) info;
9713 *count = VM_REGION_EXTENDED_INFO_COUNT;
9714
9715 vm_map_lock_read(map);
9716
9717 start = *address;
9718 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9719 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9720 vm_map_unlock_read(map);
9721 return(KERN_INVALID_ADDRESS);
9722 }
9723 } else {
9724 entry = tmp_entry;
9725 }
9726 start = entry->vme_start;
9727
9728 extended->protection = entry->protection;
9729 extended->user_tag = entry->alias;
9730 extended->pages_resident = 0;
9731 extended->pages_swapped_out = 0;
9732 extended->pages_shared_now_private = 0;
9733 extended->pages_dirtied = 0;
9734 extended->external_pager = 0;
9735 extended->shadow_depth = 0;
9736
9737 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9738
9739 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9740 extended->share_mode = SM_PRIVATE;
9741
9742 if (object_name)
9743 *object_name = IP_NULL;
9744 *address = start;
9745 *size = (entry->vme_end - start);
9746
9747 vm_map_unlock_read(map);
9748 return(KERN_SUCCESS);
9749 }
9750 case VM_REGION_TOP_INFO:
9751 {
9752 vm_region_top_info_t top;
9753
9754 if (*count < VM_REGION_TOP_INFO_COUNT)
9755 return(KERN_INVALID_ARGUMENT);
9756
9757 top = (vm_region_top_info_t) info;
9758 *count = VM_REGION_TOP_INFO_COUNT;
9759
9760 vm_map_lock_read(map);
9761
9762 start = *address;
9763 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9764 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9765 vm_map_unlock_read(map);
9766 return(KERN_INVALID_ADDRESS);
9767 }
9768 } else {
9769 entry = tmp_entry;
9770
9771 }
9772 start = entry->vme_start;
9773
9774 top->private_pages_resident = 0;
9775 top->shared_pages_resident = 0;
9776
9777 vm_map_region_top_walk(entry, top);
9778
9779 if (object_name)
9780 *object_name = IP_NULL;
9781 *address = start;
9782 *size = (entry->vme_end - start);
9783
9784 vm_map_unlock_read(map);
9785 return(KERN_SUCCESS);
9786 }
9787 default:
9788 return(KERN_INVALID_ARGUMENT);
9789 }
9790 }
9791
9792 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9793 MIN((entry_size), \
9794 ((obj)->all_reusable ? \
9795 (obj)->wired_page_count : \
9796 (obj)->resident_page_count - (obj)->reusable_page_count))
9797
9798 void
9799 vm_map_region_top_walk(
9800 vm_map_entry_t entry,
9801 vm_region_top_info_t top)
9802 {
9803
9804 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9805 top->share_mode = SM_EMPTY;
9806 top->ref_count = 0;
9807 top->obj_id = 0;
9808 return;
9809 }
9810
9811 {
9812 struct vm_object *obj, *tmp_obj;
9813 int ref_count;
9814 uint32_t entry_size;
9815
9816 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9817
9818 obj = entry->object.vm_object;
9819
9820 vm_object_lock(obj);
9821
9822 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9823 ref_count--;
9824
9825 assert(obj->reusable_page_count <= obj->resident_page_count);
9826 if (obj->shadow) {
9827 if (ref_count == 1)
9828 top->private_pages_resident =
9829 OBJ_RESIDENT_COUNT(obj, entry_size);
9830 else
9831 top->shared_pages_resident =
9832 OBJ_RESIDENT_COUNT(obj, entry_size);
9833 top->ref_count = ref_count;
9834 top->share_mode = SM_COW;
9835
9836 while ((tmp_obj = obj->shadow)) {
9837 vm_object_lock(tmp_obj);
9838 vm_object_unlock(obj);
9839 obj = tmp_obj;
9840
9841 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9842 ref_count--;
9843
9844 assert(obj->reusable_page_count <= obj->resident_page_count);
9845 top->shared_pages_resident +=
9846 OBJ_RESIDENT_COUNT(obj, entry_size);
9847 top->ref_count += ref_count - 1;
9848 }
9849 } else {
9850 if (entry->superpage_size) {
9851 top->share_mode = SM_LARGE_PAGE;
9852 top->shared_pages_resident = 0;
9853 top->private_pages_resident = entry_size;
9854 } else if (entry->needs_copy) {
9855 top->share_mode = SM_COW;
9856 top->shared_pages_resident =
9857 OBJ_RESIDENT_COUNT(obj, entry_size);
9858 } else {
9859 if (ref_count == 1 ||
9860 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9861 top->share_mode = SM_PRIVATE;
9862 top->private_pages_resident =
9863 OBJ_RESIDENT_COUNT(obj,
9864 entry_size);
9865 } else {
9866 top->share_mode = SM_SHARED;
9867 top->shared_pages_resident =
9868 OBJ_RESIDENT_COUNT(obj,
9869 entry_size);
9870 }
9871 }
9872 top->ref_count = ref_count;
9873 }
9874 /* XXX K64: obj_id will be truncated */
9875 top->obj_id = (unsigned int) (uintptr_t)obj;
9876
9877 vm_object_unlock(obj);
9878 }
9879 }
9880
9881 void
9882 vm_map_region_walk(
9883 vm_map_t map,
9884 vm_map_offset_t va,
9885 vm_map_entry_t entry,
9886 vm_object_offset_t offset,
9887 vm_object_size_t range,
9888 vm_region_extended_info_t extended,
9889 boolean_t look_for_pages)
9890 {
9891 register struct vm_object *obj, *tmp_obj;
9892 register vm_map_offset_t last_offset;
9893 register int i;
9894 register int ref_count;
9895 struct vm_object *shadow_object;
9896 int shadow_depth;
9897
9898 if ((entry->object.vm_object == 0) ||
9899 (entry->is_sub_map) ||
9900 (entry->object.vm_object->phys_contiguous &&
9901 !entry->superpage_size)) {
9902 extended->share_mode = SM_EMPTY;
9903 extended->ref_count = 0;
9904 return;
9905 }
9906
9907 if (entry->superpage_size) {
9908 extended->shadow_depth = 0;
9909 extended->share_mode = SM_LARGE_PAGE;
9910 extended->ref_count = 1;
9911 extended->external_pager = 0;
9912 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9913 extended->shadow_depth = 0;
9914 return;
9915 }
9916
9917 {
9918 obj = entry->object.vm_object;
9919
9920 vm_object_lock(obj);
9921
9922 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9923 ref_count--;
9924
9925 if (look_for_pages) {
9926 for (last_offset = offset + range;
9927 offset < last_offset;
9928 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9929 vm_map_region_look_for_page(map, va, obj,
9930 offset, ref_count,
9931 0, extended);
9932 } else {
9933 shadow_object = obj->shadow;
9934 shadow_depth = 0;
9935
9936 if ( !(obj->pager_trusted) && !(obj->internal))
9937 extended->external_pager = 1;
9938
9939 if (shadow_object != VM_OBJECT_NULL) {
9940 vm_object_lock(shadow_object);
9941 for (;
9942 shadow_object != VM_OBJECT_NULL;
9943 shadow_depth++) {
9944 vm_object_t next_shadow;
9945
9946 if ( !(shadow_object->pager_trusted) &&
9947 !(shadow_object->internal))
9948 extended->external_pager = 1;
9949
9950 next_shadow = shadow_object->shadow;
9951 if (next_shadow) {
9952 vm_object_lock(next_shadow);
9953 }
9954 vm_object_unlock(shadow_object);
9955 shadow_object = next_shadow;
9956 }
9957 }
9958 extended->shadow_depth = shadow_depth;
9959 }
9960
9961 if (extended->shadow_depth || entry->needs_copy)
9962 extended->share_mode = SM_COW;
9963 else {
9964 if (ref_count == 1)
9965 extended->share_mode = SM_PRIVATE;
9966 else {
9967 if (obj->true_share)
9968 extended->share_mode = SM_TRUESHARED;
9969 else
9970 extended->share_mode = SM_SHARED;
9971 }
9972 }
9973 extended->ref_count = ref_count - extended->shadow_depth;
9974
9975 for (i = 0; i < extended->shadow_depth; i++) {
9976 if ((tmp_obj = obj->shadow) == 0)
9977 break;
9978 vm_object_lock(tmp_obj);
9979 vm_object_unlock(obj);
9980
9981 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9982 ref_count--;
9983
9984 extended->ref_count += ref_count;
9985 obj = tmp_obj;
9986 }
9987 vm_object_unlock(obj);
9988
9989 if (extended->share_mode == SM_SHARED) {
9990 register vm_map_entry_t cur;
9991 register vm_map_entry_t last;
9992 int my_refs;
9993
9994 obj = entry->object.vm_object;
9995 last = vm_map_to_entry(map);
9996 my_refs = 0;
9997
9998 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9999 ref_count--;
10000 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10001 my_refs += vm_map_region_count_obj_refs(cur, obj);
10002
10003 if (my_refs == ref_count)
10004 extended->share_mode = SM_PRIVATE_ALIASED;
10005 else if (my_refs > 1)
10006 extended->share_mode = SM_SHARED_ALIASED;
10007 }
10008 }
10009 }
10010
10011
10012 /* object is locked on entry and locked on return */
10013
10014
10015 static void
10016 vm_map_region_look_for_page(
10017 __unused vm_map_t map,
10018 __unused vm_map_offset_t va,
10019 vm_object_t object,
10020 vm_object_offset_t offset,
10021 int max_refcnt,
10022 int depth,
10023 vm_region_extended_info_t extended)
10024 {
10025 register vm_page_t p;
10026 register vm_object_t shadow;
10027 register int ref_count;
10028 vm_object_t caller_object;
10029 #if MACH_PAGEMAP
10030 kern_return_t kr;
10031 #endif
10032 shadow = object->shadow;
10033 caller_object = object;
10034
10035
10036 while (TRUE) {
10037
10038 if ( !(object->pager_trusted) && !(object->internal))
10039 extended->external_pager = 1;
10040
10041 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10042 if (shadow && (max_refcnt == 1))
10043 extended->pages_shared_now_private++;
10044
10045 if (!p->fictitious &&
10046 (p->dirty || pmap_is_modified(p->phys_page)))
10047 extended->pages_dirtied++;
10048
10049 extended->pages_resident++;
10050
10051 if(object != caller_object)
10052 vm_object_unlock(object);
10053
10054 return;
10055 }
10056 #if MACH_PAGEMAP
10057 if (object->existence_map) {
10058 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
10059
10060 extended->pages_swapped_out++;
10061
10062 if(object != caller_object)
10063 vm_object_unlock(object);
10064
10065 return;
10066 }
10067 } else if (object->internal &&
10068 object->alive &&
10069 !object->terminating &&
10070 object->pager_ready) {
10071
10072 memory_object_t pager;
10073
10074 vm_object_paging_begin(object);
10075 pager = object->pager;
10076 vm_object_unlock(object);
10077
10078 kr = memory_object_data_request(
10079 pager,
10080 offset + object->paging_offset,
10081 0, /* just poke the pager */
10082 VM_PROT_READ,
10083 NULL);
10084
10085 vm_object_lock(object);
10086 vm_object_paging_end(object);
10087
10088 if (kr == KERN_SUCCESS) {
10089 /* the pager has that page */
10090 extended->pages_swapped_out++;
10091 if (object != caller_object)
10092 vm_object_unlock(object);
10093 return;
10094 }
10095 }
10096 #endif /* MACH_PAGEMAP */
10097
10098 if (shadow) {
10099 vm_object_lock(shadow);
10100
10101 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10102 ref_count--;
10103
10104 if (++depth > extended->shadow_depth)
10105 extended->shadow_depth = depth;
10106
10107 if (ref_count > max_refcnt)
10108 max_refcnt = ref_count;
10109
10110 if(object != caller_object)
10111 vm_object_unlock(object);
10112
10113 offset = offset + object->vo_shadow_offset;
10114 object = shadow;
10115 shadow = object->shadow;
10116 continue;
10117 }
10118 if(object != caller_object)
10119 vm_object_unlock(object);
10120 break;
10121 }
10122 }
10123
10124 static int
10125 vm_map_region_count_obj_refs(
10126 vm_map_entry_t entry,
10127 vm_object_t object)
10128 {
10129 register int ref_count;
10130 register vm_object_t chk_obj;
10131 register vm_object_t tmp_obj;
10132
10133 if (entry->object.vm_object == 0)
10134 return(0);
10135
10136 if (entry->is_sub_map)
10137 return(0);
10138 else {
10139 ref_count = 0;
10140
10141 chk_obj = entry->object.vm_object;
10142 vm_object_lock(chk_obj);
10143
10144 while (chk_obj) {
10145 if (chk_obj == object)
10146 ref_count++;
10147 tmp_obj = chk_obj->shadow;
10148 if (tmp_obj)
10149 vm_object_lock(tmp_obj);
10150 vm_object_unlock(chk_obj);
10151
10152 chk_obj = tmp_obj;
10153 }
10154 }
10155 return(ref_count);
10156 }
10157
10158
10159 /*
10160 * Routine: vm_map_simplify
10161 *
10162 * Description:
10163 * Attempt to simplify the map representation in
10164 * the vicinity of the given starting address.
10165 * Note:
10166 * This routine is intended primarily to keep the
10167 * kernel maps more compact -- they generally don't
10168 * benefit from the "expand a map entry" technology
10169 * at allocation time because the adjacent entry
10170 * is often wired down.
10171 */
10172 void
10173 vm_map_simplify_entry(
10174 vm_map_t map,
10175 vm_map_entry_t this_entry)
10176 {
10177 vm_map_entry_t prev_entry;
10178
10179 counter(c_vm_map_simplify_entry_called++);
10180
10181 prev_entry = this_entry->vme_prev;
10182
10183 if ((this_entry != vm_map_to_entry(map)) &&
10184 (prev_entry != vm_map_to_entry(map)) &&
10185
10186 (prev_entry->vme_end == this_entry->vme_start) &&
10187
10188 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10189
10190 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10191 ((prev_entry->offset + (prev_entry->vme_end -
10192 prev_entry->vme_start))
10193 == this_entry->offset) &&
10194
10195 (prev_entry->inheritance == this_entry->inheritance) &&
10196 (prev_entry->protection == this_entry->protection) &&
10197 (prev_entry->max_protection == this_entry->max_protection) &&
10198 (prev_entry->behavior == this_entry->behavior) &&
10199 (prev_entry->alias == this_entry->alias) &&
10200 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10201 (prev_entry->no_cache == this_entry->no_cache) &&
10202 (prev_entry->wired_count == this_entry->wired_count) &&
10203 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10204
10205 (prev_entry->needs_copy == this_entry->needs_copy) &&
10206 (prev_entry->permanent == this_entry->permanent) &&
10207
10208 (prev_entry->use_pmap == FALSE) &&
10209 (this_entry->use_pmap == FALSE) &&
10210 (prev_entry->in_transition == FALSE) &&
10211 (this_entry->in_transition == FALSE) &&
10212 (prev_entry->needs_wakeup == FALSE) &&
10213 (this_entry->needs_wakeup == FALSE) &&
10214 (prev_entry->is_shared == FALSE) &&
10215 (this_entry->is_shared == FALSE)
10216 ) {
10217 vm_map_store_entry_unlink(map, prev_entry);
10218 assert(prev_entry->vme_start < this_entry->vme_end);
10219 this_entry->vme_start = prev_entry->vme_start;
10220 this_entry->offset = prev_entry->offset;
10221 if (prev_entry->is_sub_map) {
10222 vm_map_deallocate(prev_entry->object.sub_map);
10223 } else {
10224 vm_object_deallocate(prev_entry->object.vm_object);
10225 }
10226 vm_map_entry_dispose(map, prev_entry);
10227 SAVE_HINT_MAP_WRITE(map, this_entry);
10228 counter(c_vm_map_simplified++);
10229 }
10230 }
10231
10232 void
10233 vm_map_simplify(
10234 vm_map_t map,
10235 vm_map_offset_t start)
10236 {
10237 vm_map_entry_t this_entry;
10238
10239 vm_map_lock(map);
10240 if (vm_map_lookup_entry(map, start, &this_entry)) {
10241 vm_map_simplify_entry(map, this_entry);
10242 vm_map_simplify_entry(map, this_entry->vme_next);
10243 }
10244 counter(c_vm_map_simplify_called++);
10245 vm_map_unlock(map);
10246 }
10247
10248 static void
10249 vm_map_simplify_range(
10250 vm_map_t map,
10251 vm_map_offset_t start,
10252 vm_map_offset_t end)
10253 {
10254 vm_map_entry_t entry;
10255
10256 /*
10257 * The map should be locked (for "write") by the caller.
10258 */
10259
10260 if (start >= end) {
10261 /* invalid address range */
10262 return;
10263 }
10264
10265 start = vm_map_trunc_page(start);
10266 end = vm_map_round_page(end);
10267
10268 if (!vm_map_lookup_entry(map, start, &entry)) {
10269 /* "start" is not mapped and "entry" ends before "start" */
10270 if (entry == vm_map_to_entry(map)) {
10271 /* start with first entry in the map */
10272 entry = vm_map_first_entry(map);
10273 } else {
10274 /* start with next entry */
10275 entry = entry->vme_next;
10276 }
10277 }
10278
10279 while (entry != vm_map_to_entry(map) &&
10280 entry->vme_start <= end) {
10281 /* try and coalesce "entry" with its previous entry */
10282 vm_map_simplify_entry(map, entry);
10283 entry = entry->vme_next;
10284 }
10285 }
10286
10287
10288 /*
10289 * Routine: vm_map_machine_attribute
10290 * Purpose:
10291 * Provide machine-specific attributes to mappings,
10292 * such as cachability etc. for machines that provide
10293 * them. NUMA architectures and machines with big/strange
10294 * caches will use this.
10295 * Note:
10296 * Responsibilities for locking and checking are handled here,
10297 * everything else in the pmap module. If any non-volatile
10298 * information must be kept, the pmap module should handle
10299 * it itself. [This assumes that attributes do not
10300 * need to be inherited, which seems ok to me]
10301 */
10302 kern_return_t
10303 vm_map_machine_attribute(
10304 vm_map_t map,
10305 vm_map_offset_t start,
10306 vm_map_offset_t end,
10307 vm_machine_attribute_t attribute,
10308 vm_machine_attribute_val_t* value) /* IN/OUT */
10309 {
10310 kern_return_t ret;
10311 vm_map_size_t sync_size;
10312 vm_map_entry_t entry;
10313
10314 if (start < vm_map_min(map) || end > vm_map_max(map))
10315 return KERN_INVALID_ADDRESS;
10316
10317 /* Figure how much memory we need to flush (in page increments) */
10318 sync_size = end - start;
10319
10320 vm_map_lock(map);
10321
10322 if (attribute != MATTR_CACHE) {
10323 /* If we don't have to find physical addresses, we */
10324 /* don't have to do an explicit traversal here. */
10325 ret = pmap_attribute(map->pmap, start, end-start,
10326 attribute, value);
10327 vm_map_unlock(map);
10328 return ret;
10329 }
10330
10331 ret = KERN_SUCCESS; /* Assume it all worked */
10332
10333 while(sync_size) {
10334 if (vm_map_lookup_entry(map, start, &entry)) {
10335 vm_map_size_t sub_size;
10336 if((entry->vme_end - start) > sync_size) {
10337 sub_size = sync_size;
10338 sync_size = 0;
10339 } else {
10340 sub_size = entry->vme_end - start;
10341 sync_size -= sub_size;
10342 }
10343 if(entry->is_sub_map) {
10344 vm_map_offset_t sub_start;
10345 vm_map_offset_t sub_end;
10346
10347 sub_start = (start - entry->vme_start)
10348 + entry->offset;
10349 sub_end = sub_start + sub_size;
10350 vm_map_machine_attribute(
10351 entry->object.sub_map,
10352 sub_start,
10353 sub_end,
10354 attribute, value);
10355 } else {
10356 if(entry->object.vm_object) {
10357 vm_page_t m;
10358 vm_object_t object;
10359 vm_object_t base_object;
10360 vm_object_t last_object;
10361 vm_object_offset_t offset;
10362 vm_object_offset_t base_offset;
10363 vm_map_size_t range;
10364 range = sub_size;
10365 offset = (start - entry->vme_start)
10366 + entry->offset;
10367 base_offset = offset;
10368 object = entry->object.vm_object;
10369 base_object = object;
10370 last_object = NULL;
10371
10372 vm_object_lock(object);
10373
10374 while (range) {
10375 m = vm_page_lookup(
10376 object, offset);
10377
10378 if (m && !m->fictitious) {
10379 ret =
10380 pmap_attribute_cache_sync(
10381 m->phys_page,
10382 PAGE_SIZE,
10383 attribute, value);
10384
10385 } else if (object->shadow) {
10386 offset = offset + object->vo_shadow_offset;
10387 last_object = object;
10388 object = object->shadow;
10389 vm_object_lock(last_object->shadow);
10390 vm_object_unlock(last_object);
10391 continue;
10392 }
10393 range -= PAGE_SIZE;
10394
10395 if (base_object != object) {
10396 vm_object_unlock(object);
10397 vm_object_lock(base_object);
10398 object = base_object;
10399 }
10400 /* Bump to the next page */
10401 base_offset += PAGE_SIZE;
10402 offset = base_offset;
10403 }
10404 vm_object_unlock(object);
10405 }
10406 }
10407 start += sub_size;
10408 } else {
10409 vm_map_unlock(map);
10410 return KERN_FAILURE;
10411 }
10412
10413 }
10414
10415 vm_map_unlock(map);
10416
10417 return ret;
10418 }
10419
10420 /*
10421 * vm_map_behavior_set:
10422 *
10423 * Sets the paging reference behavior of the specified address
10424 * range in the target map. Paging reference behavior affects
10425 * how pagein operations resulting from faults on the map will be
10426 * clustered.
10427 */
10428 kern_return_t
10429 vm_map_behavior_set(
10430 vm_map_t map,
10431 vm_map_offset_t start,
10432 vm_map_offset_t end,
10433 vm_behavior_t new_behavior)
10434 {
10435 register vm_map_entry_t entry;
10436 vm_map_entry_t temp_entry;
10437
10438 XPR(XPR_VM_MAP,
10439 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10440 map, start, end, new_behavior, 0);
10441
10442 if (start > end ||
10443 start < vm_map_min(map) ||
10444 end > vm_map_max(map)) {
10445 return KERN_NO_SPACE;
10446 }
10447
10448 switch (new_behavior) {
10449
10450 /*
10451 * This first block of behaviors all set a persistent state on the specified
10452 * memory range. All we have to do here is to record the desired behavior
10453 * in the vm_map_entry_t's.
10454 */
10455
10456 case VM_BEHAVIOR_DEFAULT:
10457 case VM_BEHAVIOR_RANDOM:
10458 case VM_BEHAVIOR_SEQUENTIAL:
10459 case VM_BEHAVIOR_RSEQNTL:
10460 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10461 vm_map_lock(map);
10462
10463 /*
10464 * The entire address range must be valid for the map.
10465 * Note that vm_map_range_check() does a
10466 * vm_map_lookup_entry() internally and returns the
10467 * entry containing the start of the address range if
10468 * the entire range is valid.
10469 */
10470 if (vm_map_range_check(map, start, end, &temp_entry)) {
10471 entry = temp_entry;
10472 vm_map_clip_start(map, entry, start);
10473 }
10474 else {
10475 vm_map_unlock(map);
10476 return(KERN_INVALID_ADDRESS);
10477 }
10478
10479 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10480 vm_map_clip_end(map, entry, end);
10481 assert(!entry->use_pmap);
10482
10483 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10484 entry->zero_wired_pages = TRUE;
10485 } else {
10486 entry->behavior = new_behavior;
10487 }
10488 entry = entry->vme_next;
10489 }
10490
10491 vm_map_unlock(map);
10492 break;
10493
10494 /*
10495 * The rest of these are different from the above in that they cause
10496 * an immediate action to take place as opposed to setting a behavior that
10497 * affects future actions.
10498 */
10499
10500 case VM_BEHAVIOR_WILLNEED:
10501 return vm_map_willneed(map, start, end);
10502
10503 case VM_BEHAVIOR_DONTNEED:
10504 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10505
10506 case VM_BEHAVIOR_FREE:
10507 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10508
10509 case VM_BEHAVIOR_REUSABLE:
10510 return vm_map_reusable_pages(map, start, end);
10511
10512 case VM_BEHAVIOR_REUSE:
10513 return vm_map_reuse_pages(map, start, end);
10514
10515 case VM_BEHAVIOR_CAN_REUSE:
10516 return vm_map_can_reuse(map, start, end);
10517
10518 default:
10519 return(KERN_INVALID_ARGUMENT);
10520 }
10521
10522 return(KERN_SUCCESS);
10523 }
10524
10525
10526 /*
10527 * Internals for madvise(MADV_WILLNEED) system call.
10528 *
10529 * The present implementation is to do a read-ahead if the mapping corresponds
10530 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10531 * and basically ignore the "advice" (which we are always free to do).
10532 */
10533
10534
10535 static kern_return_t
10536 vm_map_willneed(
10537 vm_map_t map,
10538 vm_map_offset_t start,
10539 vm_map_offset_t end
10540 )
10541 {
10542 vm_map_entry_t entry;
10543 vm_object_t object;
10544 memory_object_t pager;
10545 struct vm_object_fault_info fault_info;
10546 kern_return_t kr;
10547 vm_object_size_t len;
10548 vm_object_offset_t offset;
10549
10550 /*
10551 * Fill in static values in fault_info. Several fields get ignored by the code
10552 * we call, but we'll fill them in anyway since uninitialized fields are bad
10553 * when it comes to future backwards compatibility.
10554 */
10555
10556 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10557 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10558 fault_info.no_cache = FALSE; /* ignored value */
10559 fault_info.stealth = TRUE;
10560 fault_info.io_sync = FALSE;
10561 fault_info.cs_bypass = FALSE;
10562 fault_info.mark_zf_absent = FALSE;
10563 fault_info.batch_pmap_op = FALSE;
10564
10565 /*
10566 * The MADV_WILLNEED operation doesn't require any changes to the
10567 * vm_map_entry_t's, so the read lock is sufficient.
10568 */
10569
10570 vm_map_lock_read(map);
10571
10572 /*
10573 * The madvise semantics require that the address range be fully
10574 * allocated with no holes. Otherwise, we're required to return
10575 * an error.
10576 */
10577
10578 if (! vm_map_range_check(map, start, end, &entry)) {
10579 vm_map_unlock_read(map);
10580 return KERN_INVALID_ADDRESS;
10581 }
10582
10583 /*
10584 * Examine each vm_map_entry_t in the range.
10585 */
10586 for (; entry != vm_map_to_entry(map) && start < end; ) {
10587
10588 /*
10589 * The first time through, the start address could be anywhere
10590 * within the vm_map_entry we found. So adjust the offset to
10591 * correspond. After that, the offset will always be zero to
10592 * correspond to the beginning of the current vm_map_entry.
10593 */
10594 offset = (start - entry->vme_start) + entry->offset;
10595
10596 /*
10597 * Set the length so we don't go beyond the end of the
10598 * map_entry or beyond the end of the range we were given.
10599 * This range could span also multiple map entries all of which
10600 * map different files, so make sure we only do the right amount
10601 * of I/O for each object. Note that it's possible for there
10602 * to be multiple map entries all referring to the same object
10603 * but with different page permissions, but it's not worth
10604 * trying to optimize that case.
10605 */
10606 len = MIN(entry->vme_end - start, end - start);
10607
10608 if ((vm_size_t) len != len) {
10609 /* 32-bit overflow */
10610 len = (vm_size_t) (0 - PAGE_SIZE);
10611 }
10612 fault_info.cluster_size = (vm_size_t) len;
10613 fault_info.lo_offset = offset;
10614 fault_info.hi_offset = offset + len;
10615 fault_info.user_tag = entry->alias;
10616
10617 /*
10618 * If there's no read permission to this mapping, then just
10619 * skip it.
10620 */
10621 if ((entry->protection & VM_PROT_READ) == 0) {
10622 entry = entry->vme_next;
10623 start = entry->vme_start;
10624 continue;
10625 }
10626
10627 /*
10628 * Find the file object backing this map entry. If there is
10629 * none, then we simply ignore the "will need" advice for this
10630 * entry and go on to the next one.
10631 */
10632 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10633 entry = entry->vme_next;
10634 start = entry->vme_start;
10635 continue;
10636 }
10637
10638 /*
10639 * The data_request() could take a long time, so let's
10640 * release the map lock to avoid blocking other threads.
10641 */
10642 vm_map_unlock_read(map);
10643
10644 vm_object_paging_begin(object);
10645 pager = object->pager;
10646 vm_object_unlock(object);
10647
10648 /*
10649 * Get the data from the object asynchronously.
10650 *
10651 * Note that memory_object_data_request() places limits on the
10652 * amount of I/O it will do. Regardless of the len we
10653 * specified, it won't do more than MAX_UPL_TRANSFER and it
10654 * silently truncates the len to that size. This isn't
10655 * necessarily bad since madvise shouldn't really be used to
10656 * page in unlimited amounts of data. Other Unix variants
10657 * limit the willneed case as well. If this turns out to be an
10658 * issue for developers, then we can always adjust the policy
10659 * here and still be backwards compatible since this is all
10660 * just "advice".
10661 */
10662 kr = memory_object_data_request(
10663 pager,
10664 offset + object->paging_offset,
10665 0, /* ignored */
10666 VM_PROT_READ,
10667 (memory_object_fault_info_t)&fault_info);
10668
10669 vm_object_lock(object);
10670 vm_object_paging_end(object);
10671 vm_object_unlock(object);
10672
10673 /*
10674 * If we couldn't do the I/O for some reason, just give up on
10675 * the madvise. We still return success to the user since
10676 * madvise isn't supposed to fail when the advice can't be
10677 * taken.
10678 */
10679 if (kr != KERN_SUCCESS) {
10680 return KERN_SUCCESS;
10681 }
10682
10683 start += len;
10684 if (start >= end) {
10685 /* done */
10686 return KERN_SUCCESS;
10687 }
10688
10689 /* look up next entry */
10690 vm_map_lock_read(map);
10691 if (! vm_map_lookup_entry(map, start, &entry)) {
10692 /*
10693 * There's a new hole in the address range.
10694 */
10695 vm_map_unlock_read(map);
10696 return KERN_INVALID_ADDRESS;
10697 }
10698 }
10699
10700 vm_map_unlock_read(map);
10701 return KERN_SUCCESS;
10702 }
10703
10704 static boolean_t
10705 vm_map_entry_is_reusable(
10706 vm_map_entry_t entry)
10707 {
10708 vm_object_t object;
10709
10710 if (entry->is_shared ||
10711 entry->is_sub_map ||
10712 entry->in_transition ||
10713 entry->protection != VM_PROT_DEFAULT ||
10714 entry->max_protection != VM_PROT_ALL ||
10715 entry->inheritance != VM_INHERIT_DEFAULT ||
10716 entry->no_cache ||
10717 entry->permanent ||
10718 entry->superpage_size != 0 ||
10719 entry->zero_wired_pages ||
10720 entry->wired_count != 0 ||
10721 entry->user_wired_count != 0) {
10722 return FALSE;
10723 }
10724
10725 object = entry->object.vm_object;
10726 if (object == VM_OBJECT_NULL) {
10727 return TRUE;
10728 }
10729 if (
10730 #if 0
10731 /*
10732 * Let's proceed even if the VM object is potentially
10733 * shared.
10734 * We check for this later when processing the actual
10735 * VM pages, so the contents will be safe if shared.
10736 *
10737 * But we can still mark this memory region as "reusable" to
10738 * acknowledge that the caller did let us know that the memory
10739 * could be re-used and should not be penalized for holding
10740 * on to it. This allows its "resident size" to not include
10741 * the reusable range.
10742 */
10743 object->ref_count == 1 &&
10744 #endif
10745 object->wired_page_count == 0 &&
10746 object->copy == VM_OBJECT_NULL &&
10747 object->shadow == VM_OBJECT_NULL &&
10748 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10749 object->internal &&
10750 !object->true_share &&
10751 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10752 !object->code_signed) {
10753 return TRUE;
10754 }
10755 return FALSE;
10756
10757
10758 }
10759
10760 static kern_return_t
10761 vm_map_reuse_pages(
10762 vm_map_t map,
10763 vm_map_offset_t start,
10764 vm_map_offset_t end)
10765 {
10766 vm_map_entry_t entry;
10767 vm_object_t object;
10768 vm_object_offset_t start_offset, end_offset;
10769
10770 /*
10771 * The MADV_REUSE operation doesn't require any changes to the
10772 * vm_map_entry_t's, so the read lock is sufficient.
10773 */
10774
10775 vm_map_lock_read(map);
10776
10777 /*
10778 * The madvise semantics require that the address range be fully
10779 * allocated with no holes. Otherwise, we're required to return
10780 * an error.
10781 */
10782
10783 if (!vm_map_range_check(map, start, end, &entry)) {
10784 vm_map_unlock_read(map);
10785 vm_page_stats_reusable.reuse_pages_failure++;
10786 return KERN_INVALID_ADDRESS;
10787 }
10788
10789 /*
10790 * Examine each vm_map_entry_t in the range.
10791 */
10792 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10793 entry = entry->vme_next) {
10794 /*
10795 * Sanity check on the VM map entry.
10796 */
10797 if (! vm_map_entry_is_reusable(entry)) {
10798 vm_map_unlock_read(map);
10799 vm_page_stats_reusable.reuse_pages_failure++;
10800 return KERN_INVALID_ADDRESS;
10801 }
10802
10803 /*
10804 * The first time through, the start address could be anywhere
10805 * within the vm_map_entry we found. So adjust the offset to
10806 * correspond.
10807 */
10808 if (entry->vme_start < start) {
10809 start_offset = start - entry->vme_start;
10810 } else {
10811 start_offset = 0;
10812 }
10813 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10814 start_offset += entry->offset;
10815 end_offset += entry->offset;
10816
10817 object = entry->object.vm_object;
10818 if (object != VM_OBJECT_NULL) {
10819 vm_object_lock(object);
10820 vm_object_reuse_pages(object, start_offset, end_offset,
10821 TRUE);
10822 vm_object_unlock(object);
10823 }
10824
10825 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10826 /*
10827 * XXX
10828 * We do not hold the VM map exclusively here.
10829 * The "alias" field is not that critical, so it's
10830 * safe to update it here, as long as it is the only
10831 * one that can be modified while holding the VM map
10832 * "shared".
10833 */
10834 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10835 }
10836 }
10837
10838 vm_map_unlock_read(map);
10839 vm_page_stats_reusable.reuse_pages_success++;
10840 return KERN_SUCCESS;
10841 }
10842
10843
10844 static kern_return_t
10845 vm_map_reusable_pages(
10846 vm_map_t map,
10847 vm_map_offset_t start,
10848 vm_map_offset_t end)
10849 {
10850 vm_map_entry_t entry;
10851 vm_object_t object;
10852 vm_object_offset_t start_offset, end_offset;
10853
10854 /*
10855 * The MADV_REUSABLE operation doesn't require any changes to the
10856 * vm_map_entry_t's, so the read lock is sufficient.
10857 */
10858
10859 vm_map_lock_read(map);
10860
10861 /*
10862 * The madvise semantics require that the address range be fully
10863 * allocated with no holes. Otherwise, we're required to return
10864 * an error.
10865 */
10866
10867 if (!vm_map_range_check(map, start, end, &entry)) {
10868 vm_map_unlock_read(map);
10869 vm_page_stats_reusable.reusable_pages_failure++;
10870 return KERN_INVALID_ADDRESS;
10871 }
10872
10873 /*
10874 * Examine each vm_map_entry_t in the range.
10875 */
10876 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10877 entry = entry->vme_next) {
10878 int kill_pages = 0;
10879
10880 /*
10881 * Sanity check on the VM map entry.
10882 */
10883 if (! vm_map_entry_is_reusable(entry)) {
10884 vm_map_unlock_read(map);
10885 vm_page_stats_reusable.reusable_pages_failure++;
10886 return KERN_INVALID_ADDRESS;
10887 }
10888
10889 /*
10890 * The first time through, the start address could be anywhere
10891 * within the vm_map_entry we found. So adjust the offset to
10892 * correspond.
10893 */
10894 if (entry->vme_start < start) {
10895 start_offset = start - entry->vme_start;
10896 } else {
10897 start_offset = 0;
10898 }
10899 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10900 start_offset += entry->offset;
10901 end_offset += entry->offset;
10902
10903 object = entry->object.vm_object;
10904 if (object == VM_OBJECT_NULL)
10905 continue;
10906
10907
10908 vm_object_lock(object);
10909 if (object->ref_count == 1 && !object->shadow)
10910 kill_pages = 1;
10911 else
10912 kill_pages = -1;
10913 if (kill_pages != -1) {
10914 vm_object_deactivate_pages(object,
10915 start_offset,
10916 end_offset - start_offset,
10917 kill_pages,
10918 TRUE /*reusable_pages*/);
10919 } else {
10920 vm_page_stats_reusable.reusable_pages_shared++;
10921 }
10922 vm_object_unlock(object);
10923
10924 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10925 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10926 /*
10927 * XXX
10928 * We do not hold the VM map exclusively here.
10929 * The "alias" field is not that critical, so it's
10930 * safe to update it here, as long as it is the only
10931 * one that can be modified while holding the VM map
10932 * "shared".
10933 */
10934 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10935 }
10936 }
10937
10938 vm_map_unlock_read(map);
10939 vm_page_stats_reusable.reusable_pages_success++;
10940 return KERN_SUCCESS;
10941 }
10942
10943
10944 static kern_return_t
10945 vm_map_can_reuse(
10946 vm_map_t map,
10947 vm_map_offset_t start,
10948 vm_map_offset_t end)
10949 {
10950 vm_map_entry_t entry;
10951
10952 /*
10953 * The MADV_REUSABLE operation doesn't require any changes to the
10954 * vm_map_entry_t's, so the read lock is sufficient.
10955 */
10956
10957 vm_map_lock_read(map);
10958
10959 /*
10960 * The madvise semantics require that the address range be fully
10961 * allocated with no holes. Otherwise, we're required to return
10962 * an error.
10963 */
10964
10965 if (!vm_map_range_check(map, start, end, &entry)) {
10966 vm_map_unlock_read(map);
10967 vm_page_stats_reusable.can_reuse_failure++;
10968 return KERN_INVALID_ADDRESS;
10969 }
10970
10971 /*
10972 * Examine each vm_map_entry_t in the range.
10973 */
10974 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10975 entry = entry->vme_next) {
10976 /*
10977 * Sanity check on the VM map entry.
10978 */
10979 if (! vm_map_entry_is_reusable(entry)) {
10980 vm_map_unlock_read(map);
10981 vm_page_stats_reusable.can_reuse_failure++;
10982 return KERN_INVALID_ADDRESS;
10983 }
10984 }
10985
10986 vm_map_unlock_read(map);
10987 vm_page_stats_reusable.can_reuse_success++;
10988 return KERN_SUCCESS;
10989 }
10990
10991
10992 /*
10993 * Routine: vm_map_entry_insert
10994 *
10995 * Descritpion: This routine inserts a new vm_entry in a locked map.
10996 */
10997 vm_map_entry_t
10998 vm_map_entry_insert(
10999 vm_map_t map,
11000 vm_map_entry_t insp_entry,
11001 vm_map_offset_t start,
11002 vm_map_offset_t end,
11003 vm_object_t object,
11004 vm_object_offset_t offset,
11005 boolean_t needs_copy,
11006 boolean_t is_shared,
11007 boolean_t in_transition,
11008 vm_prot_t cur_protection,
11009 vm_prot_t max_protection,
11010 vm_behavior_t behavior,
11011 vm_inherit_t inheritance,
11012 unsigned wired_count,
11013 boolean_t no_cache,
11014 boolean_t permanent,
11015 unsigned int superpage_size)
11016 {
11017 vm_map_entry_t new_entry;
11018
11019 assert(insp_entry != (vm_map_entry_t)0);
11020
11021 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11022
11023 new_entry->vme_start = start;
11024 new_entry->vme_end = end;
11025 assert(page_aligned(new_entry->vme_start));
11026 assert(page_aligned(new_entry->vme_end));
11027 assert(new_entry->vme_start < new_entry->vme_end);
11028
11029 new_entry->object.vm_object = object;
11030 new_entry->offset = offset;
11031 new_entry->is_shared = is_shared;
11032 new_entry->is_sub_map = FALSE;
11033 new_entry->needs_copy = needs_copy;
11034 new_entry->in_transition = in_transition;
11035 new_entry->needs_wakeup = FALSE;
11036 new_entry->inheritance = inheritance;
11037 new_entry->protection = cur_protection;
11038 new_entry->max_protection = max_protection;
11039 new_entry->behavior = behavior;
11040 new_entry->wired_count = wired_count;
11041 new_entry->user_wired_count = 0;
11042 new_entry->use_pmap = FALSE;
11043 new_entry->alias = 0;
11044 new_entry->zero_wired_pages = FALSE;
11045 new_entry->no_cache = no_cache;
11046 new_entry->permanent = permanent;
11047 new_entry->superpage_size = superpage_size;
11048 new_entry->used_for_jit = FALSE;
11049
11050 /*
11051 * Insert the new entry into the list.
11052 */
11053
11054 vm_map_store_entry_link(map, insp_entry, new_entry);
11055 map->size += end - start;
11056
11057 /*
11058 * Update the free space hint and the lookup hint.
11059 */
11060
11061 SAVE_HINT_MAP_WRITE(map, new_entry);
11062 return new_entry;
11063 }
11064
11065 /*
11066 * Routine: vm_map_remap_extract
11067 *
11068 * Descritpion: This routine returns a vm_entry list from a map.
11069 */
11070 static kern_return_t
11071 vm_map_remap_extract(
11072 vm_map_t map,
11073 vm_map_offset_t addr,
11074 vm_map_size_t size,
11075 boolean_t copy,
11076 struct vm_map_header *map_header,
11077 vm_prot_t *cur_protection,
11078 vm_prot_t *max_protection,
11079 /* What, no behavior? */
11080 vm_inherit_t inheritance,
11081 boolean_t pageable)
11082 {
11083 kern_return_t result;
11084 vm_map_size_t mapped_size;
11085 vm_map_size_t tmp_size;
11086 vm_map_entry_t src_entry; /* result of last map lookup */
11087 vm_map_entry_t new_entry;
11088 vm_object_offset_t offset;
11089 vm_map_offset_t map_address;
11090 vm_map_offset_t src_start; /* start of entry to map */
11091 vm_map_offset_t src_end; /* end of region to be mapped */
11092 vm_object_t object;
11093 vm_map_version_t version;
11094 boolean_t src_needs_copy;
11095 boolean_t new_entry_needs_copy;
11096
11097 assert(map != VM_MAP_NULL);
11098 assert(size != 0 && size == vm_map_round_page(size));
11099 assert(inheritance == VM_INHERIT_NONE ||
11100 inheritance == VM_INHERIT_COPY ||
11101 inheritance == VM_INHERIT_SHARE);
11102
11103 /*
11104 * Compute start and end of region.
11105 */
11106 src_start = vm_map_trunc_page(addr);
11107 src_end = vm_map_round_page(src_start + size);
11108
11109 /*
11110 * Initialize map_header.
11111 */
11112 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11113 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11114 map_header->nentries = 0;
11115 map_header->entries_pageable = pageable;
11116
11117 vm_map_store_init( map_header );
11118
11119 *cur_protection = VM_PROT_ALL;
11120 *max_protection = VM_PROT_ALL;
11121
11122 map_address = 0;
11123 mapped_size = 0;
11124 result = KERN_SUCCESS;
11125
11126 /*
11127 * The specified source virtual space might correspond to
11128 * multiple map entries, need to loop on them.
11129 */
11130 vm_map_lock(map);
11131 while (mapped_size != size) {
11132 vm_map_size_t entry_size;
11133
11134 /*
11135 * Find the beginning of the region.
11136 */
11137 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11138 result = KERN_INVALID_ADDRESS;
11139 break;
11140 }
11141
11142 if (src_start < src_entry->vme_start ||
11143 (mapped_size && src_start != src_entry->vme_start)) {
11144 result = KERN_INVALID_ADDRESS;
11145 break;
11146 }
11147
11148 tmp_size = size - mapped_size;
11149 if (src_end > src_entry->vme_end)
11150 tmp_size -= (src_end - src_entry->vme_end);
11151
11152 entry_size = (vm_map_size_t)(src_entry->vme_end -
11153 src_entry->vme_start);
11154
11155 if(src_entry->is_sub_map) {
11156 vm_map_reference(src_entry->object.sub_map);
11157 object = VM_OBJECT_NULL;
11158 } else {
11159 object = src_entry->object.vm_object;
11160
11161 if (object == VM_OBJECT_NULL) {
11162 object = vm_object_allocate(entry_size);
11163 src_entry->offset = 0;
11164 src_entry->object.vm_object = object;
11165 } else if (object->copy_strategy !=
11166 MEMORY_OBJECT_COPY_SYMMETRIC) {
11167 /*
11168 * We are already using an asymmetric
11169 * copy, and therefore we already have
11170 * the right object.
11171 */
11172 assert(!src_entry->needs_copy);
11173 } else if (src_entry->needs_copy || object->shadowed ||
11174 (object->internal && !object->true_share &&
11175 !src_entry->is_shared &&
11176 object->vo_size > entry_size)) {
11177
11178 vm_object_shadow(&src_entry->object.vm_object,
11179 &src_entry->offset,
11180 entry_size);
11181
11182 if (!src_entry->needs_copy &&
11183 (src_entry->protection & VM_PROT_WRITE)) {
11184 vm_prot_t prot;
11185
11186 prot = src_entry->protection & ~VM_PROT_WRITE;
11187
11188 if (override_nx(map, src_entry->alias) && prot)
11189 prot |= VM_PROT_EXECUTE;
11190
11191 if(map->mapped_in_other_pmaps) {
11192 vm_object_pmap_protect(
11193 src_entry->object.vm_object,
11194 src_entry->offset,
11195 entry_size,
11196 PMAP_NULL,
11197 src_entry->vme_start,
11198 prot);
11199 } else {
11200 pmap_protect(vm_map_pmap(map),
11201 src_entry->vme_start,
11202 src_entry->vme_end,
11203 prot);
11204 }
11205 }
11206
11207 object = src_entry->object.vm_object;
11208 src_entry->needs_copy = FALSE;
11209 }
11210
11211
11212 vm_object_lock(object);
11213 vm_object_reference_locked(object); /* object ref. for new entry */
11214 if (object->copy_strategy ==
11215 MEMORY_OBJECT_COPY_SYMMETRIC) {
11216 object->copy_strategy =
11217 MEMORY_OBJECT_COPY_DELAY;
11218 }
11219 vm_object_unlock(object);
11220 }
11221
11222 offset = src_entry->offset + (src_start - src_entry->vme_start);
11223
11224 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11225 vm_map_entry_copy(new_entry, src_entry);
11226 new_entry->use_pmap = FALSE; /* clr address space specifics */
11227
11228 new_entry->vme_start = map_address;
11229 new_entry->vme_end = map_address + tmp_size;
11230 assert(new_entry->vme_start < new_entry->vme_end);
11231 new_entry->inheritance = inheritance;
11232 new_entry->offset = offset;
11233
11234 /*
11235 * The new region has to be copied now if required.
11236 */
11237 RestartCopy:
11238 if (!copy) {
11239 /*
11240 * Cannot allow an entry describing a JIT
11241 * region to be shared across address spaces.
11242 */
11243 if (src_entry->used_for_jit == TRUE) {
11244 result = KERN_INVALID_ARGUMENT;
11245 break;
11246 }
11247 src_entry->is_shared = TRUE;
11248 new_entry->is_shared = TRUE;
11249 if (!(new_entry->is_sub_map))
11250 new_entry->needs_copy = FALSE;
11251
11252 } else if (src_entry->is_sub_map) {
11253 /* make this a COW sub_map if not already */
11254 new_entry->needs_copy = TRUE;
11255 object = VM_OBJECT_NULL;
11256 } else if (src_entry->wired_count == 0 &&
11257 vm_object_copy_quickly(&new_entry->object.vm_object,
11258 new_entry->offset,
11259 (new_entry->vme_end -
11260 new_entry->vme_start),
11261 &src_needs_copy,
11262 &new_entry_needs_copy)) {
11263
11264 new_entry->needs_copy = new_entry_needs_copy;
11265 new_entry->is_shared = FALSE;
11266
11267 /*
11268 * Handle copy_on_write semantics.
11269 */
11270 if (src_needs_copy && !src_entry->needs_copy) {
11271 vm_prot_t prot;
11272
11273 prot = src_entry->protection & ~VM_PROT_WRITE;
11274
11275 if (override_nx(map, src_entry->alias) && prot)
11276 prot |= VM_PROT_EXECUTE;
11277
11278 vm_object_pmap_protect(object,
11279 offset,
11280 entry_size,
11281 ((src_entry->is_shared
11282 || map->mapped_in_other_pmaps) ?
11283 PMAP_NULL : map->pmap),
11284 src_entry->vme_start,
11285 prot);
11286
11287 src_entry->needs_copy = TRUE;
11288 }
11289 /*
11290 * Throw away the old object reference of the new entry.
11291 */
11292 vm_object_deallocate(object);
11293
11294 } else {
11295 new_entry->is_shared = FALSE;
11296
11297 /*
11298 * The map can be safely unlocked since we
11299 * already hold a reference on the object.
11300 *
11301 * Record the timestamp of the map for later
11302 * verification, and unlock the map.
11303 */
11304 version.main_timestamp = map->timestamp;
11305 vm_map_unlock(map); /* Increments timestamp once! */
11306
11307 /*
11308 * Perform the copy.
11309 */
11310 if (src_entry->wired_count > 0) {
11311 vm_object_lock(object);
11312 result = vm_object_copy_slowly(
11313 object,
11314 offset,
11315 entry_size,
11316 THREAD_UNINT,
11317 &new_entry->object.vm_object);
11318
11319 new_entry->offset = 0;
11320 new_entry->needs_copy = FALSE;
11321 } else {
11322 result = vm_object_copy_strategically(
11323 object,
11324 offset,
11325 entry_size,
11326 &new_entry->object.vm_object,
11327 &new_entry->offset,
11328 &new_entry_needs_copy);
11329
11330 new_entry->needs_copy = new_entry_needs_copy;
11331 }
11332
11333 /*
11334 * Throw away the old object reference of the new entry.
11335 */
11336 vm_object_deallocate(object);
11337
11338 if (result != KERN_SUCCESS &&
11339 result != KERN_MEMORY_RESTART_COPY) {
11340 _vm_map_entry_dispose(map_header, new_entry);
11341 break;
11342 }
11343
11344 /*
11345 * Verify that the map has not substantially
11346 * changed while the copy was being made.
11347 */
11348
11349 vm_map_lock(map);
11350 if (version.main_timestamp + 1 != map->timestamp) {
11351 /*
11352 * Simple version comparison failed.
11353 *
11354 * Retry the lookup and verify that the
11355 * same object/offset are still present.
11356 */
11357 vm_object_deallocate(new_entry->
11358 object.vm_object);
11359 _vm_map_entry_dispose(map_header, new_entry);
11360 if (result == KERN_MEMORY_RESTART_COPY)
11361 result = KERN_SUCCESS;
11362 continue;
11363 }
11364
11365 if (result == KERN_MEMORY_RESTART_COPY) {
11366 vm_object_reference(object);
11367 goto RestartCopy;
11368 }
11369 }
11370
11371 _vm_map_store_entry_link(map_header,
11372 map_header->links.prev, new_entry);
11373
11374 /*Protections for submap mapping are irrelevant here*/
11375 if( !src_entry->is_sub_map ) {
11376 *cur_protection &= src_entry->protection;
11377 *max_protection &= src_entry->max_protection;
11378 }
11379 map_address += tmp_size;
11380 mapped_size += tmp_size;
11381 src_start += tmp_size;
11382
11383 } /* end while */
11384
11385 vm_map_unlock(map);
11386 if (result != KERN_SUCCESS) {
11387 /*
11388 * Free all allocated elements.
11389 */
11390 for (src_entry = map_header->links.next;
11391 src_entry != (struct vm_map_entry *)&map_header->links;
11392 src_entry = new_entry) {
11393 new_entry = src_entry->vme_next;
11394 _vm_map_store_entry_unlink(map_header, src_entry);
11395 vm_object_deallocate(src_entry->object.vm_object);
11396 _vm_map_entry_dispose(map_header, src_entry);
11397 }
11398 }
11399 return result;
11400 }
11401
11402 /*
11403 * Routine: vm_remap
11404 *
11405 * Map portion of a task's address space.
11406 * Mapped region must not overlap more than
11407 * one vm memory object. Protections and
11408 * inheritance attributes remain the same
11409 * as in the original task and are out parameters.
11410 * Source and Target task can be identical
11411 * Other attributes are identical as for vm_map()
11412 */
11413 kern_return_t
11414 vm_map_remap(
11415 vm_map_t target_map,
11416 vm_map_address_t *address,
11417 vm_map_size_t size,
11418 vm_map_offset_t mask,
11419 int flags,
11420 vm_map_t src_map,
11421 vm_map_offset_t memory_address,
11422 boolean_t copy,
11423 vm_prot_t *cur_protection,
11424 vm_prot_t *max_protection,
11425 vm_inherit_t inheritance)
11426 {
11427 kern_return_t result;
11428 vm_map_entry_t entry;
11429 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11430 vm_map_entry_t new_entry;
11431 struct vm_map_header map_header;
11432
11433 if (target_map == VM_MAP_NULL)
11434 return KERN_INVALID_ARGUMENT;
11435
11436 switch (inheritance) {
11437 case VM_INHERIT_NONE:
11438 case VM_INHERIT_COPY:
11439 case VM_INHERIT_SHARE:
11440 if (size != 0 && src_map != VM_MAP_NULL)
11441 break;
11442 /*FALL THRU*/
11443 default:
11444 return KERN_INVALID_ARGUMENT;
11445 }
11446
11447 size = vm_map_round_page(size);
11448
11449 result = vm_map_remap_extract(src_map, memory_address,
11450 size, copy, &map_header,
11451 cur_protection,
11452 max_protection,
11453 inheritance,
11454 target_map->hdr.
11455 entries_pageable);
11456
11457 if (result != KERN_SUCCESS) {
11458 return result;
11459 }
11460
11461 /*
11462 * Allocate/check a range of free virtual address
11463 * space for the target
11464 */
11465 *address = vm_map_trunc_page(*address);
11466 vm_map_lock(target_map);
11467 result = vm_map_remap_range_allocate(target_map, address, size,
11468 mask, flags, &insp_entry);
11469
11470 for (entry = map_header.links.next;
11471 entry != (struct vm_map_entry *)&map_header.links;
11472 entry = new_entry) {
11473 new_entry = entry->vme_next;
11474 _vm_map_store_entry_unlink(&map_header, entry);
11475 if (result == KERN_SUCCESS) {
11476 entry->vme_start += *address;
11477 entry->vme_end += *address;
11478 vm_map_store_entry_link(target_map, insp_entry, entry);
11479 insp_entry = entry;
11480 } else {
11481 if (!entry->is_sub_map) {
11482 vm_object_deallocate(entry->object.vm_object);
11483 } else {
11484 vm_map_deallocate(entry->object.sub_map);
11485 }
11486 _vm_map_entry_dispose(&map_header, entry);
11487 }
11488 }
11489
11490 if( target_map->disable_vmentry_reuse == TRUE) {
11491 if( target_map->highest_entry_end < insp_entry->vme_end ){
11492 target_map->highest_entry_end = insp_entry->vme_end;
11493 }
11494 }
11495
11496 if (result == KERN_SUCCESS) {
11497 target_map->size += size;
11498 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11499 }
11500 vm_map_unlock(target_map);
11501
11502 if (result == KERN_SUCCESS && target_map->wiring_required)
11503 result = vm_map_wire(target_map, *address,
11504 *address + size, *cur_protection, TRUE);
11505 return result;
11506 }
11507
11508 /*
11509 * Routine: vm_map_remap_range_allocate
11510 *
11511 * Description:
11512 * Allocate a range in the specified virtual address map.
11513 * returns the address and the map entry just before the allocated
11514 * range
11515 *
11516 * Map must be locked.
11517 */
11518
11519 static kern_return_t
11520 vm_map_remap_range_allocate(
11521 vm_map_t map,
11522 vm_map_address_t *address, /* IN/OUT */
11523 vm_map_size_t size,
11524 vm_map_offset_t mask,
11525 int flags,
11526 vm_map_entry_t *map_entry) /* OUT */
11527 {
11528 vm_map_entry_t entry;
11529 vm_map_offset_t start;
11530 vm_map_offset_t end;
11531 kern_return_t kr;
11532
11533 StartAgain: ;
11534
11535 start = *address;
11536
11537 if (flags & VM_FLAGS_ANYWHERE)
11538 {
11539 /*
11540 * Calculate the first possible address.
11541 */
11542
11543 if (start < map->min_offset)
11544 start = map->min_offset;
11545 if (start > map->max_offset)
11546 return(KERN_NO_SPACE);
11547
11548 /*
11549 * Look for the first possible address;
11550 * if there's already something at this
11551 * address, we have to start after it.
11552 */
11553
11554 if( map->disable_vmentry_reuse == TRUE) {
11555 VM_MAP_HIGHEST_ENTRY(map, entry, start);
11556 } else {
11557 assert(first_free_is_valid(map));
11558 if (start == map->min_offset) {
11559 if ((entry = map->first_free) != vm_map_to_entry(map))
11560 start = entry->vme_end;
11561 } else {
11562 vm_map_entry_t tmp_entry;
11563 if (vm_map_lookup_entry(map, start, &tmp_entry))
11564 start = tmp_entry->vme_end;
11565 entry = tmp_entry;
11566 }
11567 }
11568
11569 /*
11570 * In any case, the "entry" always precedes
11571 * the proposed new region throughout the
11572 * loop:
11573 */
11574
11575 while (TRUE) {
11576 register vm_map_entry_t next;
11577
11578 /*
11579 * Find the end of the proposed new region.
11580 * Be sure we didn't go beyond the end, or
11581 * wrap around the address.
11582 */
11583
11584 end = ((start + mask) & ~mask);
11585 if (end < start)
11586 return(KERN_NO_SPACE);
11587 start = end;
11588 end += size;
11589
11590 if ((end > map->max_offset) || (end < start)) {
11591 if (map->wait_for_space) {
11592 if (size <= (map->max_offset -
11593 map->min_offset)) {
11594 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11595 vm_map_unlock(map);
11596 thread_block(THREAD_CONTINUE_NULL);
11597 vm_map_lock(map);
11598 goto StartAgain;
11599 }
11600 }
11601
11602 return(KERN_NO_SPACE);
11603 }
11604
11605 /*
11606 * If there are no more entries, we must win.
11607 */
11608
11609 next = entry->vme_next;
11610 if (next == vm_map_to_entry(map))
11611 break;
11612
11613 /*
11614 * If there is another entry, it must be
11615 * after the end of the potential new region.
11616 */
11617
11618 if (next->vme_start >= end)
11619 break;
11620
11621 /*
11622 * Didn't fit -- move to the next entry.
11623 */
11624
11625 entry = next;
11626 start = entry->vme_end;
11627 }
11628 *address = start;
11629 } else {
11630 vm_map_entry_t temp_entry;
11631
11632 /*
11633 * Verify that:
11634 * the address doesn't itself violate
11635 * the mask requirement.
11636 */
11637
11638 if ((start & mask) != 0)
11639 return(KERN_NO_SPACE);
11640
11641
11642 /*
11643 * ... the address is within bounds
11644 */
11645
11646 end = start + size;
11647
11648 if ((start < map->min_offset) ||
11649 (end > map->max_offset) ||
11650 (start >= end)) {
11651 return(KERN_INVALID_ADDRESS);
11652 }
11653
11654 /*
11655 * If we're asked to overwrite whatever was mapped in that
11656 * range, first deallocate that range.
11657 */
11658 if (flags & VM_FLAGS_OVERWRITE) {
11659 vm_map_t zap_map;
11660
11661 /*
11662 * We use a "zap_map" to avoid having to unlock
11663 * the "map" in vm_map_delete(), which would compromise
11664 * the atomicity of the "deallocate" and then "remap"
11665 * combination.
11666 */
11667 zap_map = vm_map_create(PMAP_NULL,
11668 start,
11669 end,
11670 map->hdr.entries_pageable);
11671 if (zap_map == VM_MAP_NULL) {
11672 return KERN_RESOURCE_SHORTAGE;
11673 }
11674
11675 kr = vm_map_delete(map, start, end,
11676 VM_MAP_REMOVE_SAVE_ENTRIES,
11677 zap_map);
11678 if (kr == KERN_SUCCESS) {
11679 vm_map_destroy(zap_map,
11680 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11681 zap_map = VM_MAP_NULL;
11682 }
11683 }
11684
11685 /*
11686 * ... the starting address isn't allocated
11687 */
11688
11689 if (vm_map_lookup_entry(map, start, &temp_entry))
11690 return(KERN_NO_SPACE);
11691
11692 entry = temp_entry;
11693
11694 /*
11695 * ... the next region doesn't overlap the
11696 * end point.
11697 */
11698
11699 if ((entry->vme_next != vm_map_to_entry(map)) &&
11700 (entry->vme_next->vme_start < end))
11701 return(KERN_NO_SPACE);
11702 }
11703 *map_entry = entry;
11704 return(KERN_SUCCESS);
11705 }
11706
11707 /*
11708 * vm_map_switch:
11709 *
11710 * Set the address map for the current thread to the specified map
11711 */
11712
11713 vm_map_t
11714 vm_map_switch(
11715 vm_map_t map)
11716 {
11717 int mycpu;
11718 thread_t thread = current_thread();
11719 vm_map_t oldmap = thread->map;
11720
11721 mp_disable_preemption();
11722 mycpu = cpu_number();
11723
11724 /*
11725 * Deactivate the current map and activate the requested map
11726 */
11727 PMAP_SWITCH_USER(thread, map, mycpu);
11728
11729 mp_enable_preemption();
11730 return(oldmap);
11731 }
11732
11733
11734 /*
11735 * Routine: vm_map_write_user
11736 *
11737 * Description:
11738 * Copy out data from a kernel space into space in the
11739 * destination map. The space must already exist in the
11740 * destination map.
11741 * NOTE: This routine should only be called by threads
11742 * which can block on a page fault. i.e. kernel mode user
11743 * threads.
11744 *
11745 */
11746 kern_return_t
11747 vm_map_write_user(
11748 vm_map_t map,
11749 void *src_p,
11750 vm_map_address_t dst_addr,
11751 vm_size_t size)
11752 {
11753 kern_return_t kr = KERN_SUCCESS;
11754
11755 if(current_map() == map) {
11756 if (copyout(src_p, dst_addr, size)) {
11757 kr = KERN_INVALID_ADDRESS;
11758 }
11759 } else {
11760 vm_map_t oldmap;
11761
11762 /* take on the identity of the target map while doing */
11763 /* the transfer */
11764
11765 vm_map_reference(map);
11766 oldmap = vm_map_switch(map);
11767 if (copyout(src_p, dst_addr, size)) {
11768 kr = KERN_INVALID_ADDRESS;
11769 }
11770 vm_map_switch(oldmap);
11771 vm_map_deallocate(map);
11772 }
11773 return kr;
11774 }
11775
11776 /*
11777 * Routine: vm_map_read_user
11778 *
11779 * Description:
11780 * Copy in data from a user space source map into the
11781 * kernel map. The space must already exist in the
11782 * kernel map.
11783 * NOTE: This routine should only be called by threads
11784 * which can block on a page fault. i.e. kernel mode user
11785 * threads.
11786 *
11787 */
11788 kern_return_t
11789 vm_map_read_user(
11790 vm_map_t map,
11791 vm_map_address_t src_addr,
11792 void *dst_p,
11793 vm_size_t size)
11794 {
11795 kern_return_t kr = KERN_SUCCESS;
11796
11797 if(current_map() == map) {
11798 if (copyin(src_addr, dst_p, size)) {
11799 kr = KERN_INVALID_ADDRESS;
11800 }
11801 } else {
11802 vm_map_t oldmap;
11803
11804 /* take on the identity of the target map while doing */
11805 /* the transfer */
11806
11807 vm_map_reference(map);
11808 oldmap = vm_map_switch(map);
11809 if (copyin(src_addr, dst_p, size)) {
11810 kr = KERN_INVALID_ADDRESS;
11811 }
11812 vm_map_switch(oldmap);
11813 vm_map_deallocate(map);
11814 }
11815 return kr;
11816 }
11817
11818
11819 /*
11820 * vm_map_check_protection:
11821 *
11822 * Assert that the target map allows the specified
11823 * privilege on the entire address region given.
11824 * The entire region must be allocated.
11825 */
11826 boolean_t
11827 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11828 vm_map_offset_t end, vm_prot_t protection)
11829 {
11830 vm_map_entry_t entry;
11831 vm_map_entry_t tmp_entry;
11832
11833 vm_map_lock(map);
11834
11835 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11836 {
11837 vm_map_unlock(map);
11838 return (FALSE);
11839 }
11840
11841 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11842 vm_map_unlock(map);
11843 return(FALSE);
11844 }
11845
11846 entry = tmp_entry;
11847
11848 while (start < end) {
11849 if (entry == vm_map_to_entry(map)) {
11850 vm_map_unlock(map);
11851 return(FALSE);
11852 }
11853
11854 /*
11855 * No holes allowed!
11856 */
11857
11858 if (start < entry->vme_start) {
11859 vm_map_unlock(map);
11860 return(FALSE);
11861 }
11862
11863 /*
11864 * Check protection associated with entry.
11865 */
11866
11867 if ((entry->protection & protection) != protection) {
11868 vm_map_unlock(map);
11869 return(FALSE);
11870 }
11871
11872 /* go to next entry */
11873
11874 start = entry->vme_end;
11875 entry = entry->vme_next;
11876 }
11877 vm_map_unlock(map);
11878 return(TRUE);
11879 }
11880
11881 kern_return_t
11882 vm_map_purgable_control(
11883 vm_map_t map,
11884 vm_map_offset_t address,
11885 vm_purgable_t control,
11886 int *state)
11887 {
11888 vm_map_entry_t entry;
11889 vm_object_t object;
11890 kern_return_t kr;
11891
11892 /*
11893 * Vet all the input parameters and current type and state of the
11894 * underlaying object. Return with an error if anything is amiss.
11895 */
11896 if (map == VM_MAP_NULL)
11897 return(KERN_INVALID_ARGUMENT);
11898
11899 if (control != VM_PURGABLE_SET_STATE &&
11900 control != VM_PURGABLE_GET_STATE &&
11901 control != VM_PURGABLE_PURGE_ALL)
11902 return(KERN_INVALID_ARGUMENT);
11903
11904 if (control == VM_PURGABLE_PURGE_ALL) {
11905 vm_purgeable_object_purge_all();
11906 return KERN_SUCCESS;
11907 }
11908
11909 if (control == VM_PURGABLE_SET_STATE &&
11910 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11911 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11912 return(KERN_INVALID_ARGUMENT);
11913
11914 vm_map_lock_read(map);
11915
11916 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11917
11918 /*
11919 * Must pass a valid non-submap address.
11920 */
11921 vm_map_unlock_read(map);
11922 return(KERN_INVALID_ADDRESS);
11923 }
11924
11925 if ((entry->protection & VM_PROT_WRITE) == 0) {
11926 /*
11927 * Can't apply purgable controls to something you can't write.
11928 */
11929 vm_map_unlock_read(map);
11930 return(KERN_PROTECTION_FAILURE);
11931 }
11932
11933 object = entry->object.vm_object;
11934 if (object == VM_OBJECT_NULL) {
11935 /*
11936 * Object must already be present or it can't be purgable.
11937 */
11938 vm_map_unlock_read(map);
11939 return KERN_INVALID_ARGUMENT;
11940 }
11941
11942 vm_object_lock(object);
11943
11944 if (entry->offset != 0 ||
11945 entry->vme_end - entry->vme_start != object->vo_size) {
11946 /*
11947 * Can only apply purgable controls to the whole (existing)
11948 * object at once.
11949 */
11950 vm_map_unlock_read(map);
11951 vm_object_unlock(object);
11952 return KERN_INVALID_ARGUMENT;
11953 }
11954
11955 vm_map_unlock_read(map);
11956
11957 kr = vm_object_purgable_control(object, control, state);
11958
11959 vm_object_unlock(object);
11960
11961 return kr;
11962 }
11963
11964 kern_return_t
11965 vm_map_page_query_internal(
11966 vm_map_t target_map,
11967 vm_map_offset_t offset,
11968 int *disposition,
11969 int *ref_count)
11970 {
11971 kern_return_t kr;
11972 vm_page_info_basic_data_t info;
11973 mach_msg_type_number_t count;
11974
11975 count = VM_PAGE_INFO_BASIC_COUNT;
11976 kr = vm_map_page_info(target_map,
11977 offset,
11978 VM_PAGE_INFO_BASIC,
11979 (vm_page_info_t) &info,
11980 &count);
11981 if (kr == KERN_SUCCESS) {
11982 *disposition = info.disposition;
11983 *ref_count = info.ref_count;
11984 } else {
11985 *disposition = 0;
11986 *ref_count = 0;
11987 }
11988
11989 return kr;
11990 }
11991
11992 kern_return_t
11993 vm_map_page_info(
11994 vm_map_t map,
11995 vm_map_offset_t offset,
11996 vm_page_info_flavor_t flavor,
11997 vm_page_info_t info,
11998 mach_msg_type_number_t *count)
11999 {
12000 vm_map_entry_t map_entry;
12001 vm_object_t object;
12002 vm_page_t m;
12003 kern_return_t kr;
12004 kern_return_t retval = KERN_SUCCESS;
12005 boolean_t top_object;
12006 int disposition;
12007 int ref_count;
12008 vm_object_id_t object_id;
12009 vm_page_info_basic_t basic_info;
12010 int depth;
12011 vm_map_offset_t offset_in_page;
12012
12013 switch (flavor) {
12014 case VM_PAGE_INFO_BASIC:
12015 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12016 /*
12017 * The "vm_page_info_basic_data" structure was not
12018 * properly padded, so allow the size to be off by
12019 * one to maintain backwards binary compatibility...
12020 */
12021 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12022 return KERN_INVALID_ARGUMENT;
12023 }
12024 break;
12025 default:
12026 return KERN_INVALID_ARGUMENT;
12027 }
12028
12029 disposition = 0;
12030 ref_count = 0;
12031 object_id = 0;
12032 top_object = TRUE;
12033 depth = 0;
12034
12035 retval = KERN_SUCCESS;
12036 offset_in_page = offset & PAGE_MASK;
12037 offset = vm_map_trunc_page(offset);
12038
12039 vm_map_lock_read(map);
12040
12041 /*
12042 * First, find the map entry covering "offset", going down
12043 * submaps if necessary.
12044 */
12045 for (;;) {
12046 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12047 vm_map_unlock_read(map);
12048 return KERN_INVALID_ADDRESS;
12049 }
12050 /* compute offset from this map entry's start */
12051 offset -= map_entry->vme_start;
12052 /* compute offset into this map entry's object (or submap) */
12053 offset += map_entry->offset;
12054
12055 if (map_entry->is_sub_map) {
12056 vm_map_t sub_map;
12057
12058 sub_map = map_entry->object.sub_map;
12059 vm_map_lock_read(sub_map);
12060 vm_map_unlock_read(map);
12061
12062 map = sub_map;
12063
12064 ref_count = MAX(ref_count, map->ref_count);
12065 continue;
12066 }
12067 break;
12068 }
12069
12070 object = map_entry->object.vm_object;
12071 if (object == VM_OBJECT_NULL) {
12072 /* no object -> no page */
12073 vm_map_unlock_read(map);
12074 goto done;
12075 }
12076
12077 vm_object_lock(object);
12078 vm_map_unlock_read(map);
12079
12080 /*
12081 * Go down the VM object shadow chain until we find the page
12082 * we're looking for.
12083 */
12084 for (;;) {
12085 ref_count = MAX(ref_count, object->ref_count);
12086
12087 m = vm_page_lookup(object, offset);
12088
12089 if (m != VM_PAGE_NULL) {
12090 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12091 break;
12092 } else {
12093 #if MACH_PAGEMAP
12094 if (object->existence_map) {
12095 if (vm_external_state_get(object->existence_map,
12096 offset) ==
12097 VM_EXTERNAL_STATE_EXISTS) {
12098 /*
12099 * this page has been paged out
12100 */
12101 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12102 break;
12103 }
12104 } else
12105 #endif
12106 {
12107 if (object->internal &&
12108 object->alive &&
12109 !object->terminating &&
12110 object->pager_ready) {
12111
12112 memory_object_t pager;
12113
12114 vm_object_paging_begin(object);
12115 pager = object->pager;
12116 vm_object_unlock(object);
12117
12118 /*
12119 * Ask the default pager if
12120 * it has this page.
12121 */
12122 kr = memory_object_data_request(
12123 pager,
12124 offset + object->paging_offset,
12125 0, /* just poke the pager */
12126 VM_PROT_READ,
12127 NULL);
12128
12129 vm_object_lock(object);
12130 vm_object_paging_end(object);
12131
12132 if (kr == KERN_SUCCESS) {
12133 /* the default pager has it */
12134 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12135 break;
12136 }
12137 }
12138 }
12139
12140 if (object->shadow != VM_OBJECT_NULL) {
12141 vm_object_t shadow;
12142
12143 offset += object->vo_shadow_offset;
12144 shadow = object->shadow;
12145
12146 vm_object_lock(shadow);
12147 vm_object_unlock(object);
12148
12149 object = shadow;
12150 top_object = FALSE;
12151 depth++;
12152 } else {
12153 // if (!object->internal)
12154 // break;
12155 // retval = KERN_FAILURE;
12156 // goto done_with_object;
12157 break;
12158 }
12159 }
12160 }
12161 /* The ref_count is not strictly accurate, it measures the number */
12162 /* of entities holding a ref on the object, they may not be mapping */
12163 /* the object or may not be mapping the section holding the */
12164 /* target page but its still a ball park number and though an over- */
12165 /* count, it picks up the copy-on-write cases */
12166
12167 /* We could also get a picture of page sharing from pmap_attributes */
12168 /* but this would under count as only faulted-in mappings would */
12169 /* show up. */
12170
12171 if (top_object == TRUE && object->shadow)
12172 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12173
12174 if (! object->internal)
12175 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12176
12177 if (m == VM_PAGE_NULL)
12178 goto done_with_object;
12179
12180 if (m->fictitious) {
12181 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12182 goto done_with_object;
12183 }
12184 if (m->dirty || pmap_is_modified(m->phys_page))
12185 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12186
12187 if (m->reference || pmap_is_referenced(m->phys_page))
12188 disposition |= VM_PAGE_QUERY_PAGE_REF;
12189
12190 if (m->speculative)
12191 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12192
12193 if (m->cs_validated)
12194 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12195 if (m->cs_tainted)
12196 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12197
12198 done_with_object:
12199 vm_object_unlock(object);
12200 done:
12201
12202 switch (flavor) {
12203 case VM_PAGE_INFO_BASIC:
12204 basic_info = (vm_page_info_basic_t) info;
12205 basic_info->disposition = disposition;
12206 basic_info->ref_count = ref_count;
12207 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12208 basic_info->offset =
12209 (memory_object_offset_t) offset + offset_in_page;
12210 basic_info->depth = depth;
12211 break;
12212 }
12213
12214 return retval;
12215 }
12216
12217 /*
12218 * vm_map_msync
12219 *
12220 * Synchronises the memory range specified with its backing store
12221 * image by either flushing or cleaning the contents to the appropriate
12222 * memory manager engaging in a memory object synchronize dialog with
12223 * the manager. The client doesn't return until the manager issues
12224 * m_o_s_completed message. MIG Magically converts user task parameter
12225 * to the task's address map.
12226 *
12227 * interpretation of sync_flags
12228 * VM_SYNC_INVALIDATE - discard pages, only return precious
12229 * pages to manager.
12230 *
12231 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12232 * - discard pages, write dirty or precious
12233 * pages back to memory manager.
12234 *
12235 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12236 * - write dirty or precious pages back to
12237 * the memory manager.
12238 *
12239 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
12240 * is a hole in the region, and we would
12241 * have returned KERN_SUCCESS, return
12242 * KERN_INVALID_ADDRESS instead.
12243 *
12244 * NOTE
12245 * The memory object attributes have not yet been implemented, this
12246 * function will have to deal with the invalidate attribute
12247 *
12248 * RETURNS
12249 * KERN_INVALID_TASK Bad task parameter
12250 * KERN_INVALID_ARGUMENT both sync and async were specified.
12251 * KERN_SUCCESS The usual.
12252 * KERN_INVALID_ADDRESS There was a hole in the region.
12253 */
12254
12255 kern_return_t
12256 vm_map_msync(
12257 vm_map_t map,
12258 vm_map_address_t address,
12259 vm_map_size_t size,
12260 vm_sync_t sync_flags)
12261 {
12262 msync_req_t msr;
12263 msync_req_t new_msr;
12264 queue_chain_t req_q; /* queue of requests for this msync */
12265 vm_map_entry_t entry;
12266 vm_map_size_t amount_left;
12267 vm_object_offset_t offset;
12268 boolean_t do_sync_req;
12269 boolean_t had_hole = FALSE;
12270 memory_object_t pager;
12271
12272 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12273 (sync_flags & VM_SYNC_SYNCHRONOUS))
12274 return(KERN_INVALID_ARGUMENT);
12275
12276 /*
12277 * align address and size on page boundaries
12278 */
12279 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12280 address = vm_map_trunc_page(address);
12281
12282 if (map == VM_MAP_NULL)
12283 return(KERN_INVALID_TASK);
12284
12285 if (size == 0)
12286 return(KERN_SUCCESS);
12287
12288 queue_init(&req_q);
12289 amount_left = size;
12290
12291 while (amount_left > 0) {
12292 vm_object_size_t flush_size;
12293 vm_object_t object;
12294
12295 vm_map_lock(map);
12296 if (!vm_map_lookup_entry(map,
12297 vm_map_trunc_page(address), &entry)) {
12298
12299 vm_map_size_t skip;
12300
12301 /*
12302 * hole in the address map.
12303 */
12304 had_hole = TRUE;
12305
12306 /*
12307 * Check for empty map.
12308 */
12309 if (entry == vm_map_to_entry(map) &&
12310 entry->vme_next == entry) {
12311 vm_map_unlock(map);
12312 break;
12313 }
12314 /*
12315 * Check that we don't wrap and that
12316 * we have at least one real map entry.
12317 */
12318 if ((map->hdr.nentries == 0) ||
12319 (entry->vme_next->vme_start < address)) {
12320 vm_map_unlock(map);
12321 break;
12322 }
12323 /*
12324 * Move up to the next entry if needed
12325 */
12326 skip = (entry->vme_next->vme_start - address);
12327 if (skip >= amount_left)
12328 amount_left = 0;
12329 else
12330 amount_left -= skip;
12331 address = entry->vme_next->vme_start;
12332 vm_map_unlock(map);
12333 continue;
12334 }
12335
12336 offset = address - entry->vme_start;
12337
12338 /*
12339 * do we have more to flush than is contained in this
12340 * entry ?
12341 */
12342 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12343 flush_size = entry->vme_end -
12344 (entry->vme_start + offset);
12345 } else {
12346 flush_size = amount_left;
12347 }
12348 amount_left -= flush_size;
12349 address += flush_size;
12350
12351 if (entry->is_sub_map == TRUE) {
12352 vm_map_t local_map;
12353 vm_map_offset_t local_offset;
12354
12355 local_map = entry->object.sub_map;
12356 local_offset = entry->offset;
12357 vm_map_unlock(map);
12358 if (vm_map_msync(
12359 local_map,
12360 local_offset,
12361 flush_size,
12362 sync_flags) == KERN_INVALID_ADDRESS) {
12363 had_hole = TRUE;
12364 }
12365 continue;
12366 }
12367 object = entry->object.vm_object;
12368
12369 /*
12370 * We can't sync this object if the object has not been
12371 * created yet
12372 */
12373 if (object == VM_OBJECT_NULL) {
12374 vm_map_unlock(map);
12375 continue;
12376 }
12377 offset += entry->offset;
12378
12379 vm_object_lock(object);
12380
12381 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12382 int kill_pages = 0;
12383 boolean_t reusable_pages = FALSE;
12384
12385 if (sync_flags & VM_SYNC_KILLPAGES) {
12386 if (object->ref_count == 1 && !object->shadow)
12387 kill_pages = 1;
12388 else
12389 kill_pages = -1;
12390 }
12391 if (kill_pages != -1)
12392 vm_object_deactivate_pages(object, offset,
12393 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12394 vm_object_unlock(object);
12395 vm_map_unlock(map);
12396 continue;
12397 }
12398 /*
12399 * We can't sync this object if there isn't a pager.
12400 * Don't bother to sync internal objects, since there can't
12401 * be any "permanent" storage for these objects anyway.
12402 */
12403 if ((object->pager == MEMORY_OBJECT_NULL) ||
12404 (object->internal) || (object->private)) {
12405 vm_object_unlock(object);
12406 vm_map_unlock(map);
12407 continue;
12408 }
12409 /*
12410 * keep reference on the object until syncing is done
12411 */
12412 vm_object_reference_locked(object);
12413 vm_object_unlock(object);
12414
12415 vm_map_unlock(map);
12416
12417 do_sync_req = vm_object_sync(object,
12418 offset,
12419 flush_size,
12420 sync_flags & VM_SYNC_INVALIDATE,
12421 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12422 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12423 sync_flags & VM_SYNC_SYNCHRONOUS);
12424 /*
12425 * only send a m_o_s if we returned pages or if the entry
12426 * is writable (ie dirty pages may have already been sent back)
12427 */
12428 if (!do_sync_req) {
12429 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12430 /*
12431 * clear out the clustering and read-ahead hints
12432 */
12433 vm_object_lock(object);
12434
12435 object->pages_created = 0;
12436 object->pages_used = 0;
12437 object->sequential = 0;
12438 object->last_alloc = 0;
12439
12440 vm_object_unlock(object);
12441 }
12442 vm_object_deallocate(object);
12443 continue;
12444 }
12445 msync_req_alloc(new_msr);
12446
12447 vm_object_lock(object);
12448 offset += object->paging_offset;
12449
12450 new_msr->offset = offset;
12451 new_msr->length = flush_size;
12452 new_msr->object = object;
12453 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12454 re_iterate:
12455
12456 /*
12457 * We can't sync this object if there isn't a pager. The
12458 * pager can disappear anytime we're not holding the object
12459 * lock. So this has to be checked anytime we goto re_iterate.
12460 */
12461
12462 pager = object->pager;
12463
12464 if (pager == MEMORY_OBJECT_NULL) {
12465 vm_object_unlock(object);
12466 vm_object_deallocate(object);
12467 continue;
12468 }
12469
12470 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12471 /*
12472 * need to check for overlapping entry, if found, wait
12473 * on overlapping msr to be done, then reiterate
12474 */
12475 msr_lock(msr);
12476 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12477 ((offset >= msr->offset &&
12478 offset < (msr->offset + msr->length)) ||
12479 (msr->offset >= offset &&
12480 msr->offset < (offset + flush_size))))
12481 {
12482 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12483 msr_unlock(msr);
12484 vm_object_unlock(object);
12485 thread_block(THREAD_CONTINUE_NULL);
12486 vm_object_lock(object);
12487 goto re_iterate;
12488 }
12489 msr_unlock(msr);
12490 }/* queue_iterate */
12491
12492 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12493
12494 vm_object_paging_begin(object);
12495 vm_object_unlock(object);
12496
12497 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12498
12499 (void) memory_object_synchronize(
12500 pager,
12501 offset,
12502 flush_size,
12503 sync_flags & ~VM_SYNC_CONTIGUOUS);
12504
12505 vm_object_lock(object);
12506 vm_object_paging_end(object);
12507 vm_object_unlock(object);
12508 }/* while */
12509
12510 /*
12511 * wait for memory_object_sychronize_completed messages from pager(s)
12512 */
12513
12514 while (!queue_empty(&req_q)) {
12515 msr = (msync_req_t)queue_first(&req_q);
12516 msr_lock(msr);
12517 while(msr->flag != VM_MSYNC_DONE) {
12518 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12519 msr_unlock(msr);
12520 thread_block(THREAD_CONTINUE_NULL);
12521 msr_lock(msr);
12522 }/* while */
12523 queue_remove(&req_q, msr, msync_req_t, req_q);
12524 msr_unlock(msr);
12525 vm_object_deallocate(msr->object);
12526 msync_req_free(msr);
12527 }/* queue_iterate */
12528
12529 /* for proper msync() behaviour */
12530 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12531 return(KERN_INVALID_ADDRESS);
12532
12533 return(KERN_SUCCESS);
12534 }/* vm_msync */
12535
12536 /*
12537 * Routine: convert_port_entry_to_map
12538 * Purpose:
12539 * Convert from a port specifying an entry or a task
12540 * to a map. Doesn't consume the port ref; produces a map ref,
12541 * which may be null. Unlike convert_port_to_map, the
12542 * port may be task or a named entry backed.
12543 * Conditions:
12544 * Nothing locked.
12545 */
12546
12547
12548 vm_map_t
12549 convert_port_entry_to_map(
12550 ipc_port_t port)
12551 {
12552 vm_map_t map;
12553 vm_named_entry_t named_entry;
12554 uint32_t try_failed_count = 0;
12555
12556 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12557 while(TRUE) {
12558 ip_lock(port);
12559 if(ip_active(port) && (ip_kotype(port)
12560 == IKOT_NAMED_ENTRY)) {
12561 named_entry =
12562 (vm_named_entry_t)port->ip_kobject;
12563 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12564 ip_unlock(port);
12565
12566 try_failed_count++;
12567 mutex_pause(try_failed_count);
12568 continue;
12569 }
12570 named_entry->ref_count++;
12571 lck_mtx_unlock(&(named_entry)->Lock);
12572 ip_unlock(port);
12573 if ((named_entry->is_sub_map) &&
12574 (named_entry->protection
12575 & VM_PROT_WRITE)) {
12576 map = named_entry->backing.map;
12577 } else {
12578 mach_destroy_memory_entry(port);
12579 return VM_MAP_NULL;
12580 }
12581 vm_map_reference_swap(map);
12582 mach_destroy_memory_entry(port);
12583 break;
12584 }
12585 else
12586 return VM_MAP_NULL;
12587 }
12588 }
12589 else
12590 map = convert_port_to_map(port);
12591
12592 return map;
12593 }
12594
12595 /*
12596 * Routine: convert_port_entry_to_object
12597 * Purpose:
12598 * Convert from a port specifying a named entry to an
12599 * object. Doesn't consume the port ref; produces a map ref,
12600 * which may be null.
12601 * Conditions:
12602 * Nothing locked.
12603 */
12604
12605
12606 vm_object_t
12607 convert_port_entry_to_object(
12608 ipc_port_t port)
12609 {
12610 vm_object_t object;
12611 vm_named_entry_t named_entry;
12612 uint32_t try_failed_count = 0;
12613
12614 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12615 while(TRUE) {
12616 ip_lock(port);
12617 if(ip_active(port) && (ip_kotype(port)
12618 == IKOT_NAMED_ENTRY)) {
12619 named_entry =
12620 (vm_named_entry_t)port->ip_kobject;
12621 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12622 ip_unlock(port);
12623
12624 try_failed_count++;
12625 mutex_pause(try_failed_count);
12626 continue;
12627 }
12628 named_entry->ref_count++;
12629 lck_mtx_unlock(&(named_entry)->Lock);
12630 ip_unlock(port);
12631 if ((!named_entry->is_sub_map) &&
12632 (!named_entry->is_pager) &&
12633 (named_entry->protection
12634 & VM_PROT_WRITE)) {
12635 object = named_entry->backing.object;
12636 } else {
12637 mach_destroy_memory_entry(port);
12638 return (vm_object_t)NULL;
12639 }
12640 vm_object_reference(named_entry->backing.object);
12641 mach_destroy_memory_entry(port);
12642 break;
12643 }
12644 else
12645 return (vm_object_t)NULL;
12646 }
12647 } else {
12648 return (vm_object_t)NULL;
12649 }
12650
12651 return object;
12652 }
12653
12654 /*
12655 * Export routines to other components for the things we access locally through
12656 * macros.
12657 */
12658 #undef current_map
12659 vm_map_t
12660 current_map(void)
12661 {
12662 return (current_map_fast());
12663 }
12664
12665 /*
12666 * vm_map_reference:
12667 *
12668 * Most code internal to the osfmk will go through a
12669 * macro defining this. This is always here for the
12670 * use of other kernel components.
12671 */
12672 #undef vm_map_reference
12673 void
12674 vm_map_reference(
12675 register vm_map_t map)
12676 {
12677 if (map == VM_MAP_NULL)
12678 return;
12679
12680 lck_mtx_lock(&map->s_lock);
12681 #if TASK_SWAPPER
12682 assert(map->res_count > 0);
12683 assert(map->ref_count >= map->res_count);
12684 map->res_count++;
12685 #endif
12686 map->ref_count++;
12687 lck_mtx_unlock(&map->s_lock);
12688 }
12689
12690 /*
12691 * vm_map_deallocate:
12692 *
12693 * Removes a reference from the specified map,
12694 * destroying it if no references remain.
12695 * The map should not be locked.
12696 */
12697 void
12698 vm_map_deallocate(
12699 register vm_map_t map)
12700 {
12701 unsigned int ref;
12702
12703 if (map == VM_MAP_NULL)
12704 return;
12705
12706 lck_mtx_lock(&map->s_lock);
12707 ref = --map->ref_count;
12708 if (ref > 0) {
12709 vm_map_res_deallocate(map);
12710 lck_mtx_unlock(&map->s_lock);
12711 return;
12712 }
12713 assert(map->ref_count == 0);
12714 lck_mtx_unlock(&map->s_lock);
12715
12716 #if TASK_SWAPPER
12717 /*
12718 * The map residence count isn't decremented here because
12719 * the vm_map_delete below will traverse the entire map,
12720 * deleting entries, and the residence counts on objects
12721 * and sharing maps will go away then.
12722 */
12723 #endif
12724
12725 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12726 }
12727
12728
12729 void
12730 vm_map_disable_NX(vm_map_t map)
12731 {
12732 if (map == NULL)
12733 return;
12734 if (map->pmap == NULL)
12735 return;
12736
12737 pmap_disable_NX(map->pmap);
12738 }
12739
12740 void
12741 vm_map_disallow_data_exec(vm_map_t map)
12742 {
12743 if (map == NULL)
12744 return;
12745
12746 map->map_disallow_data_exec = TRUE;
12747 }
12748
12749 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12750 * more descriptive.
12751 */
12752 void
12753 vm_map_set_32bit(vm_map_t map)
12754 {
12755 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12756 }
12757
12758
12759 void
12760 vm_map_set_64bit(vm_map_t map)
12761 {
12762 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12763 }
12764
12765 vm_map_offset_t
12766 vm_compute_max_offset(unsigned is64)
12767 {
12768 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12769 }
12770
12771 boolean_t
12772 vm_map_is_64bit(
12773 vm_map_t map)
12774 {
12775 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12776 }
12777
12778 boolean_t
12779 vm_map_has_hard_pagezero(
12780 vm_map_t map,
12781 vm_map_offset_t pagezero_size)
12782 {
12783 /*
12784 * XXX FBDP
12785 * We should lock the VM map (for read) here but we can get away
12786 * with it for now because there can't really be any race condition:
12787 * the VM map's min_offset is changed only when the VM map is created
12788 * and when the zero page is established (when the binary gets loaded),
12789 * and this routine gets called only when the task terminates and the
12790 * VM map is being torn down, and when a new map is created via
12791 * load_machfile()/execve().
12792 */
12793 return (map->min_offset >= pagezero_size);
12794 }
12795
12796 void
12797 vm_map_set_4GB_pagezero(vm_map_t map)
12798 {
12799 #if defined(__i386__)
12800 pmap_set_4GB_pagezero(map->pmap);
12801 #else
12802 #pragma unused(map)
12803 #endif
12804
12805 }
12806
12807 void
12808 vm_map_clear_4GB_pagezero(vm_map_t map)
12809 {
12810 #if defined(__i386__)
12811 pmap_clear_4GB_pagezero(map->pmap);
12812 #else
12813 #pragma unused(map)
12814 #endif
12815 }
12816
12817 /*
12818 * Raise a VM map's maximun offset.
12819 */
12820 kern_return_t
12821 vm_map_raise_max_offset(
12822 vm_map_t map,
12823 vm_map_offset_t new_max_offset)
12824 {
12825 kern_return_t ret;
12826
12827 vm_map_lock(map);
12828 ret = KERN_INVALID_ADDRESS;
12829
12830 if (new_max_offset >= map->max_offset) {
12831 if (!vm_map_is_64bit(map)) {
12832 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
12833 map->max_offset = new_max_offset;
12834 ret = KERN_SUCCESS;
12835 }
12836 } else {
12837 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
12838 map->max_offset = new_max_offset;
12839 ret = KERN_SUCCESS;
12840 }
12841 }
12842 }
12843
12844 vm_map_unlock(map);
12845 return ret;
12846 }
12847
12848
12849 /*
12850 * Raise a VM map's minimum offset.
12851 * To strictly enforce "page zero" reservation.
12852 */
12853 kern_return_t
12854 vm_map_raise_min_offset(
12855 vm_map_t map,
12856 vm_map_offset_t new_min_offset)
12857 {
12858 vm_map_entry_t first_entry;
12859
12860 new_min_offset = vm_map_round_page(new_min_offset);
12861
12862 vm_map_lock(map);
12863
12864 if (new_min_offset < map->min_offset) {
12865 /*
12866 * Can't move min_offset backwards, as that would expose
12867 * a part of the address space that was previously, and for
12868 * possibly good reasons, inaccessible.
12869 */
12870 vm_map_unlock(map);
12871 return KERN_INVALID_ADDRESS;
12872 }
12873
12874 first_entry = vm_map_first_entry(map);
12875 if (first_entry != vm_map_to_entry(map) &&
12876 first_entry->vme_start < new_min_offset) {
12877 /*
12878 * Some memory was already allocated below the new
12879 * minimun offset. It's too late to change it now...
12880 */
12881 vm_map_unlock(map);
12882 return KERN_NO_SPACE;
12883 }
12884
12885 map->min_offset = new_min_offset;
12886
12887 vm_map_unlock(map);
12888
12889 return KERN_SUCCESS;
12890 }
12891
12892 /*
12893 * Set the limit on the maximum amount of user wired memory allowed for this map.
12894 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12895 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12896 * don't have to reach over to the BSD data structures.
12897 */
12898
12899 void
12900 vm_map_set_user_wire_limit(vm_map_t map,
12901 vm_size_t limit)
12902 {
12903 map->user_wire_limit = limit;
12904 }
12905
12906
12907 void vm_map_switch_protect(vm_map_t map,
12908 boolean_t val)
12909 {
12910 vm_map_lock(map);
12911 map->switch_protect=val;
12912 vm_map_unlock(map);
12913 }
12914
12915 /* Add (generate) code signature for memory range */
12916 #if CONFIG_DYNAMIC_CODE_SIGNING
12917 kern_return_t vm_map_sign(vm_map_t map,
12918 vm_map_offset_t start,
12919 vm_map_offset_t end)
12920 {
12921 vm_map_entry_t entry;
12922 vm_page_t m;
12923 vm_object_t object;
12924
12925 /*
12926 * Vet all the input parameters and current type and state of the
12927 * underlaying object. Return with an error if anything is amiss.
12928 */
12929 if (map == VM_MAP_NULL)
12930 return(KERN_INVALID_ARGUMENT);
12931
12932 vm_map_lock_read(map);
12933
12934 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12935 /*
12936 * Must pass a valid non-submap address.
12937 */
12938 vm_map_unlock_read(map);
12939 return(KERN_INVALID_ADDRESS);
12940 }
12941
12942 if((entry->vme_start > start) || (entry->vme_end < end)) {
12943 /*
12944 * Map entry doesn't cover the requested range. Not handling
12945 * this situation currently.
12946 */
12947 vm_map_unlock_read(map);
12948 return(KERN_INVALID_ARGUMENT);
12949 }
12950
12951 object = entry->object.vm_object;
12952 if (object == VM_OBJECT_NULL) {
12953 /*
12954 * Object must already be present or we can't sign.
12955 */
12956 vm_map_unlock_read(map);
12957 return KERN_INVALID_ARGUMENT;
12958 }
12959
12960 vm_object_lock(object);
12961 vm_map_unlock_read(map);
12962
12963 while(start < end) {
12964 uint32_t refmod;
12965
12966 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12967 if (m==VM_PAGE_NULL) {
12968 /* shoud we try to fault a page here? we can probably
12969 * demand it exists and is locked for this request */
12970 vm_object_unlock(object);
12971 return KERN_FAILURE;
12972 }
12973 /* deal with special page status */
12974 if (m->busy ||
12975 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12976 vm_object_unlock(object);
12977 return KERN_FAILURE;
12978 }
12979
12980 /* Page is OK... now "validate" it */
12981 /* This is the place where we'll call out to create a code
12982 * directory, later */
12983 m->cs_validated = TRUE;
12984
12985 /* The page is now "clean" for codesigning purposes. That means
12986 * we don't consider it as modified (wpmapped) anymore. But
12987 * we'll disconnect the page so we note any future modification
12988 * attempts. */
12989 m->wpmapped = FALSE;
12990 refmod = pmap_disconnect(m->phys_page);
12991
12992 /* Pull the dirty status from the pmap, since we cleared the
12993 * wpmapped bit */
12994 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12995 SET_PAGE_DIRTY(m, FALSE);
12996 }
12997
12998 /* On to the next page */
12999 start += PAGE_SIZE;
13000 }
13001 vm_object_unlock(object);
13002
13003 return KERN_SUCCESS;
13004 }
13005 #endif
13006
13007 #if CONFIG_FREEZE
13008
13009 kern_return_t vm_map_freeze_walk(
13010 vm_map_t map,
13011 unsigned int *purgeable_count,
13012 unsigned int *wired_count,
13013 unsigned int *clean_count,
13014 unsigned int *dirty_count,
13015 unsigned int dirty_budget,
13016 boolean_t *has_shared)
13017 {
13018 vm_map_entry_t entry;
13019
13020 vm_map_lock_read(map);
13021
13022 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13023 *has_shared = FALSE;
13024
13025 for (entry = vm_map_first_entry(map);
13026 entry != vm_map_to_entry(map);
13027 entry = entry->vme_next) {
13028 unsigned int purgeable, clean, dirty, wired;
13029 boolean_t shared;
13030
13031 if ((entry->object.vm_object == 0) ||
13032 (entry->is_sub_map) ||
13033 (entry->object.vm_object->phys_contiguous)) {
13034 continue;
13035 }
13036
13037 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
13038
13039 *purgeable_count += purgeable;
13040 *wired_count += wired;
13041 *clean_count += clean;
13042 *dirty_count += dirty;
13043
13044 if (shared) {
13045 *has_shared = TRUE;
13046 }
13047
13048 /* Adjust pageout budget and finish up if reached */
13049 if (dirty_budget) {
13050 dirty_budget -= dirty;
13051 if (dirty_budget == 0) {
13052 break;
13053 }
13054 }
13055 }
13056
13057 vm_map_unlock_read(map);
13058
13059 return KERN_SUCCESS;
13060 }
13061
13062 kern_return_t vm_map_freeze(
13063 vm_map_t map,
13064 unsigned int *purgeable_count,
13065 unsigned int *wired_count,
13066 unsigned int *clean_count,
13067 unsigned int *dirty_count,
13068 unsigned int dirty_budget,
13069 boolean_t *has_shared)
13070 {
13071 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13072 kern_return_t kr = KERN_SUCCESS;
13073
13074 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13075 *has_shared = FALSE;
13076
13077 /*
13078 * We need the exclusive lock here so that we can
13079 * block any page faults or lookups while we are
13080 * in the middle of freezing this vm map.
13081 */
13082 vm_map_lock(map);
13083
13084 if (map->default_freezer_handle == NULL) {
13085 map->default_freezer_handle = default_freezer_handle_allocate();
13086 }
13087
13088 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13089 /*
13090 * Can happen if default_freezer_handle passed in is NULL
13091 * Or, a table has already been allocated and associated
13092 * with this handle, i.e. the map is already frozen.
13093 */
13094 goto done;
13095 }
13096
13097 for (entry2 = vm_map_first_entry(map);
13098 entry2 != vm_map_to_entry(map);
13099 entry2 = entry2->vme_next) {
13100
13101 vm_object_t src_object = entry2->object.vm_object;
13102
13103 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13104 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13105 unsigned int purgeable, clean, dirty, wired;
13106 boolean_t shared;
13107
13108 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13109 src_object, map->default_freezer_handle);
13110
13111 *purgeable_count += purgeable;
13112 *wired_count += wired;
13113 *clean_count += clean;
13114 *dirty_count += dirty;
13115
13116 /* Adjust pageout budget and finish up if reached */
13117 if (dirty_budget) {
13118 dirty_budget -= dirty;
13119 if (dirty_budget == 0) {
13120 break;
13121 }
13122 }
13123
13124 if (shared) {
13125 *has_shared = TRUE;
13126 }
13127 }
13128 }
13129
13130 /* Finally, throw out the pages to swap */
13131 default_freezer_pageout(map->default_freezer_handle);
13132
13133 done:
13134 vm_map_unlock(map);
13135
13136 return kr;
13137 }
13138
13139 kern_return_t
13140 vm_map_thaw(
13141 vm_map_t map)
13142 {
13143 kern_return_t kr = KERN_SUCCESS;
13144
13145 vm_map_lock(map);
13146
13147 if (map->default_freezer_handle == NULL) {
13148 /*
13149 * This map is not in a frozen state.
13150 */
13151 kr = KERN_FAILURE;
13152 goto out;
13153 }
13154
13155 default_freezer_unpack(map->default_freezer_handle);
13156 out:
13157 vm_map_unlock(map);
13158
13159 return kr;
13160 }
13161 #endif
13162
13163 #if !CONFIG_EMBEDDED
13164 /*
13165 * vm_map_entry_should_cow_for_true_share:
13166 *
13167 * Determines if the map entry should be clipped and setup for copy-on-write
13168 * to avoid applying "true_share" to a large VM object when only a subset is
13169 * targeted.
13170 *
13171 * For now, we target only the map entries created for the Objective C
13172 * Garbage Collector, which initially have the following properties:
13173 * - alias == VM_MEMORY_MALLOC
13174 * - wired_count == 0
13175 * - !needs_copy
13176 * and a VM object with:
13177 * - internal
13178 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13179 * - !true_share
13180 * - vo_size == ANON_CHUNK_SIZE
13181 */
13182 boolean_t
13183 vm_map_entry_should_cow_for_true_share(
13184 vm_map_entry_t entry)
13185 {
13186 vm_object_t object;
13187
13188 if (entry->is_sub_map) {
13189 /* entry does not point at a VM object */
13190 return FALSE;
13191 }
13192
13193 if (entry->needs_copy) {
13194 /* already set for copy_on_write: done! */
13195 return FALSE;
13196 }
13197
13198 if (entry->alias != VM_MEMORY_MALLOC) {
13199 /* not tagged as an ObjectiveC's Garbage Collector entry */
13200 return FALSE;
13201 }
13202
13203 if (entry->wired_count) {
13204 /* wired: can't change the map entry... */
13205 return FALSE;
13206 }
13207
13208 object = entry->object.vm_object;
13209
13210 if (object == VM_OBJECT_NULL) {
13211 /* no object yet... */
13212 return FALSE;
13213 }
13214
13215 if (!object->internal) {
13216 /* not an internal object */
13217 return FALSE;
13218 }
13219
13220 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13221 /* not the default copy strategy */
13222 return FALSE;
13223 }
13224
13225 if (object->true_share) {
13226 /* already true_share: too late to avoid it */
13227 return FALSE;
13228 }
13229
13230 if (object->vo_size != ANON_CHUNK_SIZE) {
13231 /* not an object created for the ObjC Garbage Collector */
13232 return FALSE;
13233 }
13234
13235 /*
13236 * All the criteria match: we have a large object being targeted for "true_share".
13237 * To limit the adverse side-effects linked with "true_share", tell the caller to
13238 * try and avoid setting up the entire object for "true_share" by clipping the
13239 * targeted range and setting it up for copy-on-write.
13240 */
13241 return TRUE;
13242 }
13243 #endif /* !CONFIG_EMBEDDED */