]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
104
105 #include <vm/vm_protos.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_map_store.h>
108
109 /* Internal prototypes
110 */
111
112 static void vm_map_simplify_range(
113 vm_map_t map,
114 vm_map_offset_t start,
115 vm_map_offset_t end); /* forward */
116
117 static boolean_t vm_map_range_check(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end,
121 vm_map_entry_t *entry);
122
123 static vm_map_entry_t _vm_map_entry_create(
124 struct vm_map_header *map_header, boolean_t map_locked);
125
126 static void _vm_map_entry_dispose(
127 struct vm_map_header *map_header,
128 vm_map_entry_t entry);
129
130 static void vm_map_pmap_enter(
131 vm_map_t map,
132 vm_map_offset_t addr,
133 vm_map_offset_t end_addr,
134 vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t protection);
137
138 static void _vm_map_clip_end(
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
141 vm_map_offset_t end);
142
143 static void _vm_map_clip_start(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t start);
147
148 static void vm_map_entry_delete(
149 vm_map_t map,
150 vm_map_entry_t entry);
151
152 static kern_return_t vm_map_delete(
153 vm_map_t map,
154 vm_map_offset_t start,
155 vm_map_offset_t end,
156 int flags,
157 vm_map_t zap_map);
158
159 static kern_return_t vm_map_copy_overwrite_unaligned(
160 vm_map_t dst_map,
161 vm_map_entry_t entry,
162 vm_map_copy_t copy,
163 vm_map_address_t start);
164
165 static kern_return_t vm_map_copy_overwrite_aligned(
166 vm_map_t dst_map,
167 vm_map_entry_t tmp_entry,
168 vm_map_copy_t copy,
169 vm_map_offset_t start,
170 pmap_t pmap);
171
172 static kern_return_t vm_map_copyin_kernel_buffer(
173 vm_map_t src_map,
174 vm_map_address_t src_addr,
175 vm_map_size_t len,
176 boolean_t src_destroy,
177 vm_map_copy_t *copy_result); /* OUT */
178
179 static kern_return_t vm_map_copyout_kernel_buffer(
180 vm_map_t map,
181 vm_map_address_t *addr, /* IN/OUT */
182 vm_map_copy_t copy,
183 boolean_t overwrite);
184
185 static void vm_map_fork_share(
186 vm_map_t old_map,
187 vm_map_entry_t old_entry,
188 vm_map_t new_map);
189
190 static boolean_t vm_map_fork_copy(
191 vm_map_t old_map,
192 vm_map_entry_t *old_entry_p,
193 vm_map_t new_map);
194
195 void vm_map_region_top_walk(
196 vm_map_entry_t entry,
197 vm_region_top_info_t top);
198
199 void vm_map_region_walk(
200 vm_map_t map,
201 vm_map_offset_t va,
202 vm_map_entry_t entry,
203 vm_object_offset_t offset,
204 vm_object_size_t range,
205 vm_region_extended_info_t extended,
206 boolean_t look_for_pages);
207
208 static kern_return_t vm_map_wire_nested(
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
216
217 static kern_return_t vm_map_unwire_nested(
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
224
225 static kern_return_t vm_map_overwrite_submap_recurse(
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
229
230 static kern_return_t vm_map_copy_overwrite_nested(
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
235 pmap_t pmap,
236 boolean_t discard_on_success);
237
238 static kern_return_t vm_map_remap_extract(
239 vm_map_t map,
240 vm_map_offset_t addr,
241 vm_map_size_t size,
242 boolean_t copy,
243 struct vm_map_header *map_header,
244 vm_prot_t *cur_protection,
245 vm_prot_t *max_protection,
246 vm_inherit_t inheritance,
247 boolean_t pageable);
248
249 static kern_return_t vm_map_remap_range_allocate(
250 vm_map_t map,
251 vm_map_address_t *address,
252 vm_map_size_t size,
253 vm_map_offset_t mask,
254 int flags,
255 vm_map_entry_t *map_entry);
256
257 static void vm_map_region_look_for_page(
258 vm_map_t map,
259 vm_map_offset_t va,
260 vm_object_t object,
261 vm_object_offset_t offset,
262 int max_refcnt,
263 int depth,
264 vm_region_extended_info_t extended);
265
266 static int vm_map_region_count_obj_refs(
267 vm_map_entry_t entry,
268 vm_object_t object);
269
270
271 static kern_return_t vm_map_willneed(
272 vm_map_t map,
273 vm_map_offset_t start,
274 vm_map_offset_t end);
275
276 static kern_return_t vm_map_reuse_pages(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281 static kern_return_t vm_map_reusable_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_can_reuse(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291 #if CONFIG_FREEZE
292 struct default_freezer_table;
293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
294 __private_extern__ void default_freezer_mapping_free(void**, boolean_t all);
295 #endif
296
297 /*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
306 #define vm_map_entry_copy(NEW,OLD) \
307 MACRO_BEGIN \
308 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
309 *(NEW) = *(OLD); \
310 (NEW)->is_shared = FALSE; \
311 (NEW)->needs_wakeup = FALSE; \
312 (NEW)->in_transition = FALSE; \
313 (NEW)->wired_count = 0; \
314 (NEW)->user_wired_count = 0; \
315 (NEW)->permanent = FALSE; \
316 (NEW)->from_reserved_zone = _vmec_reserved; \
317 MACRO_END
318
319 #define vm_map_entry_copy_full(NEW,OLD) \
320 MACRO_BEGIN \
321 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
322 (*(NEW) = *(OLD)); \
323 (NEW)->from_reserved_zone = _vmecf_reserved; \
324 MACRO_END
325
326 /*
327 * Decide if we want to allow processes to execute from their data or stack areas.
328 * override_nx() returns true if we do. Data/stack execution can be enabled independently
329 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
330 * or allow_stack_exec to enable data execution for that type of data area for that particular
331 * ABI (or both by or'ing the flags together). These are initialized in the architecture
332 * specific pmap files since the default behavior varies according to architecture. The
333 * main reason it varies is because of the need to provide binary compatibility with old
334 * applications that were written before these restrictions came into being. In the old
335 * days, an app could execute anything it could read, but this has slowly been tightened
336 * up over time. The default behavior is:
337 *
338 * 32-bit PPC apps may execute from both stack and data areas
339 * 32-bit Intel apps may exeucte from data areas but not stack
340 * 64-bit PPC/Intel apps may not execute from either data or stack
341 *
342 * An application on any architecture may override these defaults by explicitly
343 * adding PROT_EXEC permission to the page in question with the mprotect(2)
344 * system call. This code here just determines what happens when an app tries to
345 * execute from a page that lacks execute permission.
346 *
347 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
348 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
349 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
350 * execution from data areas for a particular binary even if the arch normally permits it. As
351 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
352 * to support some complicated use cases, notably browsers with out-of-process plugins that
353 * are not all NX-safe.
354 */
355
356 extern int allow_data_exec, allow_stack_exec;
357
358 int
359 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
360 {
361 int current_abi;
362
363 /*
364 * Determine if the app is running in 32 or 64 bit mode.
365 */
366
367 if (vm_map_is_64bit(map))
368 current_abi = VM_ABI_64;
369 else
370 current_abi = VM_ABI_32;
371
372 /*
373 * Determine if we should allow the execution based on whether it's a
374 * stack or data area and the current architecture.
375 */
376
377 if (user_tag == VM_MEMORY_STACK)
378 return allow_stack_exec & current_abi;
379
380 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
381 }
382
383
384 /*
385 * Virtual memory maps provide for the mapping, protection,
386 * and sharing of virtual memory objects. In addition,
387 * this module provides for an efficient virtual copy of
388 * memory from one map to another.
389 *
390 * Synchronization is required prior to most operations.
391 *
392 * Maps consist of an ordered doubly-linked list of simple
393 * entries; a single hint is used to speed up lookups.
394 *
395 * Sharing maps have been deleted from this version of Mach.
396 * All shared objects are now mapped directly into the respective
397 * maps. This requires a change in the copy on write strategy;
398 * the asymmetric (delayed) strategy is used for shared temporary
399 * objects instead of the symmetric (shadow) strategy. All maps
400 * are now "top level" maps (either task map, kernel map or submap
401 * of the kernel map).
402 *
403 * Since portions of maps are specified by start/end addreses,
404 * which may not align with existing map entries, all
405 * routines merely "clip" entries to these start/end values.
406 * [That is, an entry is split into two, bordering at a
407 * start or end value.] Note that these clippings may not
408 * always be necessary (as the two resulting entries are then
409 * not changed); however, the clipping is done for convenience.
410 * No attempt is currently made to "glue back together" two
411 * abutting entries.
412 *
413 * The symmetric (shadow) copy strategy implements virtual copy
414 * by copying VM object references from one map to
415 * another, and then marking both regions as copy-on-write.
416 * It is important to note that only one writeable reference
417 * to a VM object region exists in any map when this strategy
418 * is used -- this means that shadow object creation can be
419 * delayed until a write operation occurs. The symmetric (delayed)
420 * strategy allows multiple maps to have writeable references to
421 * the same region of a vm object, and hence cannot delay creating
422 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
423 * Copying of permanent objects is completely different; see
424 * vm_object_copy_strategically() in vm_object.c.
425 */
426
427 static zone_t vm_map_zone; /* zone for vm_map structures */
428 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
429 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
430 * allocations */
431 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
432
433
434 /*
435 * Placeholder object for submap operations. This object is dropped
436 * into the range by a call to vm_map_find, and removed when
437 * vm_map_submap creates the submap.
438 */
439
440 vm_object_t vm_submap_object;
441
442 static void *map_data;
443 static vm_size_t map_data_size;
444 static void *kentry_data;
445 static vm_size_t kentry_data_size;
446
447 #if CONFIG_EMBEDDED
448 #define NO_COALESCE_LIMIT 0
449 #else
450 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
451 #endif
452
453 /* Skip acquiring locks if we're in the midst of a kernel core dump */
454 unsigned int not_in_kdp = 1;
455
456 unsigned int vm_map_set_cache_attr_count = 0;
457
458 kern_return_t
459 vm_map_set_cache_attr(
460 vm_map_t map,
461 vm_map_offset_t va)
462 {
463 vm_map_entry_t map_entry;
464 vm_object_t object;
465 kern_return_t kr = KERN_SUCCESS;
466
467 vm_map_lock_read(map);
468
469 if (!vm_map_lookup_entry(map, va, &map_entry) ||
470 map_entry->is_sub_map) {
471 /*
472 * that memory is not properly mapped
473 */
474 kr = KERN_INVALID_ARGUMENT;
475 goto done;
476 }
477 object = map_entry->object.vm_object;
478
479 if (object == VM_OBJECT_NULL) {
480 /*
481 * there should be a VM object here at this point
482 */
483 kr = KERN_INVALID_ARGUMENT;
484 goto done;
485 }
486 vm_object_lock(object);
487 object->set_cache_attr = TRUE;
488 vm_object_unlock(object);
489
490 vm_map_set_cache_attr_count++;
491 done:
492 vm_map_unlock_read(map);
493
494 return kr;
495 }
496
497
498 #if CONFIG_CODE_DECRYPTION
499 /*
500 * vm_map_apple_protected:
501 * This remaps the requested part of the object with an object backed by
502 * the decrypting pager.
503 * crypt_info contains entry points and session data for the crypt module.
504 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
505 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
506 */
507 kern_return_t
508 vm_map_apple_protected(
509 vm_map_t map,
510 vm_map_offset_t start,
511 vm_map_offset_t end,
512 struct pager_crypt_info *crypt_info)
513 {
514 boolean_t map_locked;
515 kern_return_t kr;
516 vm_map_entry_t map_entry;
517 memory_object_t protected_mem_obj;
518 vm_object_t protected_object;
519 vm_map_offset_t map_addr;
520
521 vm_map_lock_read(map);
522 map_locked = TRUE;
523
524 /* lookup the protected VM object */
525 if (!vm_map_lookup_entry(map,
526 start,
527 &map_entry) ||
528 map_entry->vme_end < end ||
529 map_entry->is_sub_map) {
530 /* that memory is not properly mapped */
531 kr = KERN_INVALID_ARGUMENT;
532 goto done;
533 }
534 protected_object = map_entry->object.vm_object;
535 if (protected_object == VM_OBJECT_NULL) {
536 /* there should be a VM object here at this point */
537 kr = KERN_INVALID_ARGUMENT;
538 goto done;
539 }
540
541 /* make sure protected object stays alive while map is unlocked */
542 vm_object_reference(protected_object);
543
544 vm_map_unlock_read(map);
545 map_locked = FALSE;
546
547 /*
548 * Lookup (and create if necessary) the protected memory object
549 * matching that VM object.
550 * If successful, this also grabs a reference on the memory object,
551 * to guarantee that it doesn't go away before we get a chance to map
552 * it.
553 */
554 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
555
556 /* release extra ref on protected object */
557 vm_object_deallocate(protected_object);
558
559 if (protected_mem_obj == NULL) {
560 kr = KERN_FAILURE;
561 goto done;
562 }
563
564 /* map this memory object in place of the current one */
565 map_addr = start;
566 kr = vm_map_enter_mem_object(map,
567 &map_addr,
568 end - start,
569 (mach_vm_offset_t) 0,
570 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
571 (ipc_port_t) protected_mem_obj,
572 (map_entry->offset +
573 (start - map_entry->vme_start)),
574 TRUE,
575 map_entry->protection,
576 map_entry->max_protection,
577 map_entry->inheritance);
578 assert(map_addr == start);
579 /*
580 * Release the reference obtained by apple_protect_pager_setup().
581 * The mapping (if it succeeded) is now holding a reference on the
582 * memory object.
583 */
584 memory_object_deallocate(protected_mem_obj);
585
586 done:
587 if (map_locked) {
588 vm_map_unlock_read(map);
589 }
590 return kr;
591 }
592 #endif /* CONFIG_CODE_DECRYPTION */
593
594
595 lck_grp_t vm_map_lck_grp;
596 lck_grp_attr_t vm_map_lck_grp_attr;
597 lck_attr_t vm_map_lck_attr;
598
599
600 /*
601 * vm_map_init:
602 *
603 * Initialize the vm_map module. Must be called before
604 * any other vm_map routines.
605 *
606 * Map and entry structures are allocated from zones -- we must
607 * initialize those zones.
608 *
609 * There are three zones of interest:
610 *
611 * vm_map_zone: used to allocate maps.
612 * vm_map_entry_zone: used to allocate map entries.
613 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
614 *
615 * The kernel allocates map entries from a special zone that is initially
616 * "crammed" with memory. It would be difficult (perhaps impossible) for
617 * the kernel to allocate more memory to a entry zone when it became
618 * empty since the very act of allocating memory implies the creation
619 * of a new entry.
620 */
621 void
622 vm_map_init(
623 void)
624 {
625 vm_size_t entry_zone_alloc_size;
626 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
627 PAGE_SIZE, "maps");
628 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
629 #if defined(__LP64__)
630 entry_zone_alloc_size = PAGE_SIZE * 5;
631 #else
632 entry_zone_alloc_size = PAGE_SIZE * 6;
633 #endif
634
635 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
636 1024*1024, entry_zone_alloc_size,
637 "VM map entries");
638 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
639 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
640
641 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
642 kentry_data_size * 64, kentry_data_size,
643 "Reserved VM map entries");
644 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
645
646 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
647 16*1024, PAGE_SIZE, "VM map copies");
648 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
649
650 /*
651 * Cram the map and kentry zones with initial data.
652 * Set reserved_zone non-collectible to aid zone_gc().
653 */
654 zone_change(vm_map_zone, Z_COLLECT, FALSE);
655
656 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
657 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
658 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
659 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
660 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
661 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
662
663 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
664 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
665
666 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
667 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
668 lck_attr_setdefault(&vm_map_lck_attr);
669 }
670
671 void
672 vm_map_steal_memory(
673 void)
674 {
675 uint32_t kentry_initial_pages;
676
677 map_data_size = round_page(10 * sizeof(struct _vm_map));
678 map_data = pmap_steal_memory(map_data_size);
679
680 /*
681 * kentry_initial_pages corresponds to the number of kernel map entries
682 * required during bootstrap until the asynchronous replenishment
683 * scheme is activated and/or entries are available from the general
684 * map entry pool.
685 */
686 #if defined(__LP64__)
687 kentry_initial_pages = 10;
688 #else
689 kentry_initial_pages = 6;
690 #endif
691 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
692 kentry_data = pmap_steal_memory(kentry_data_size);
693 }
694
695 void vm_kernel_reserved_entry_init(void) {
696 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
697 }
698
699 /*
700 * vm_map_create:
701 *
702 * Creates and returns a new empty VM map with
703 * the given physical map structure, and having
704 * the given lower and upper address bounds.
705 */
706 vm_map_t
707 vm_map_create(
708 pmap_t pmap,
709 vm_map_offset_t min,
710 vm_map_offset_t max,
711 boolean_t pageable)
712 {
713 static int color_seed = 0;
714 register vm_map_t result;
715
716 result = (vm_map_t) zalloc(vm_map_zone);
717 if (result == VM_MAP_NULL)
718 panic("vm_map_create");
719
720 vm_map_first_entry(result) = vm_map_to_entry(result);
721 vm_map_last_entry(result) = vm_map_to_entry(result);
722 result->hdr.nentries = 0;
723 result->hdr.entries_pageable = pageable;
724
725 vm_map_store_init( &(result->hdr) );
726
727 result->size = 0;
728 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
729 result->user_wire_size = 0;
730 result->ref_count = 1;
731 #if TASK_SWAPPER
732 result->res_count = 1;
733 result->sw_state = MAP_SW_IN;
734 #endif /* TASK_SWAPPER */
735 result->pmap = pmap;
736 result->min_offset = min;
737 result->max_offset = max;
738 result->wiring_required = FALSE;
739 result->no_zero_fill = FALSE;
740 result->mapped = FALSE;
741 result->wait_for_space = FALSE;
742 result->switch_protect = FALSE;
743 result->disable_vmentry_reuse = FALSE;
744 result->map_disallow_data_exec = FALSE;
745 result->highest_entry_end = 0;
746 result->first_free = vm_map_to_entry(result);
747 result->hint = vm_map_to_entry(result);
748 result->color_rr = (color_seed++) & vm_color_mask;
749 result->jit_entry_exists = FALSE;
750 #if CONFIG_FREEZE
751 result->default_freezer_toc = NULL;
752 #endif
753 vm_map_lock_init(result);
754 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
755
756 return(result);
757 }
758
759 /*
760 * vm_map_entry_create: [ internal use only ]
761 *
762 * Allocates a VM map entry for insertion in the
763 * given map (or map copy). No fields are filled.
764 */
765 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
766
767 #define vm_map_copy_entry_create(copy, map_locked) \
768 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
769 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
770
771 static vm_map_entry_t
772 _vm_map_entry_create(
773 struct vm_map_header *map_header, boolean_t __unused map_locked)
774 {
775 zone_t zone;
776 vm_map_entry_t entry;
777
778 zone = vm_map_entry_zone;
779
780 assert(map_header->entries_pageable ? !map_locked : TRUE);
781
782 if (map_header->entries_pageable) {
783 entry = (vm_map_entry_t) zalloc(zone);
784 }
785 else {
786 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
787
788 if (entry == VM_MAP_ENTRY_NULL) {
789 zone = vm_map_entry_reserved_zone;
790 entry = (vm_map_entry_t) zalloc(zone);
791 OSAddAtomic(1, &reserved_zalloc_count);
792 } else
793 OSAddAtomic(1, &nonreserved_zalloc_count);
794 }
795
796 if (entry == VM_MAP_ENTRY_NULL)
797 panic("vm_map_entry_create");
798 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
799
800 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
801
802 return(entry);
803 }
804
805 /*
806 * vm_map_entry_dispose: [ internal use only ]
807 *
808 * Inverse of vm_map_entry_create.
809 *
810 * write map lock held so no need to
811 * do anything special to insure correctness
812 * of the stores
813 */
814 #define vm_map_entry_dispose(map, entry) \
815 vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE); \
816 _vm_map_entry_dispose(&(map)->hdr, (entry))
817
818 #define vm_map_copy_entry_dispose(map, entry) \
819 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
820
821 static void
822 _vm_map_entry_dispose(
823 register struct vm_map_header *map_header,
824 register vm_map_entry_t entry)
825 {
826 register zone_t zone;
827
828 if (map_header->entries_pageable || !(entry->from_reserved_zone))
829 zone = vm_map_entry_zone;
830 else
831 zone = vm_map_entry_reserved_zone;
832
833 if (!map_header->entries_pageable) {
834 if (zone == vm_map_entry_zone)
835 OSAddAtomic(-1, &nonreserved_zalloc_count);
836 else
837 OSAddAtomic(-1, &reserved_zalloc_count);
838 }
839
840 zfree(zone, entry);
841 }
842
843 #if MACH_ASSERT
844 static boolean_t first_free_check = FALSE;
845 boolean_t
846 first_free_is_valid(
847 vm_map_t map)
848 {
849 if (!first_free_check)
850 return TRUE;
851
852 return( first_free_is_valid_store( map ));
853 }
854 #endif /* MACH_ASSERT */
855
856
857 #define vm_map_copy_entry_link(copy, after_where, entry) \
858 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
859
860 #define vm_map_copy_entry_unlink(copy, entry) \
861 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
862
863 #if MACH_ASSERT && TASK_SWAPPER
864 /*
865 * vm_map_res_reference:
866 *
867 * Adds another valid residence count to the given map.
868 *
869 * Map is locked so this function can be called from
870 * vm_map_swapin.
871 *
872 */
873 void vm_map_res_reference(register vm_map_t map)
874 {
875 /* assert map is locked */
876 assert(map->res_count >= 0);
877 assert(map->ref_count >= map->res_count);
878 if (map->res_count == 0) {
879 lck_mtx_unlock(&map->s_lock);
880 vm_map_lock(map);
881 vm_map_swapin(map);
882 lck_mtx_lock(&map->s_lock);
883 ++map->res_count;
884 vm_map_unlock(map);
885 } else
886 ++map->res_count;
887 }
888
889 /*
890 * vm_map_reference_swap:
891 *
892 * Adds valid reference and residence counts to the given map.
893 *
894 * The map may not be in memory (i.e. zero residence count).
895 *
896 */
897 void vm_map_reference_swap(register vm_map_t map)
898 {
899 assert(map != VM_MAP_NULL);
900 lck_mtx_lock(&map->s_lock);
901 assert(map->res_count >= 0);
902 assert(map->ref_count >= map->res_count);
903 map->ref_count++;
904 vm_map_res_reference(map);
905 lck_mtx_unlock(&map->s_lock);
906 }
907
908 /*
909 * vm_map_res_deallocate:
910 *
911 * Decrement residence count on a map; possibly causing swapout.
912 *
913 * The map must be in memory (i.e. non-zero residence count).
914 *
915 * The map is locked, so this function is callable from vm_map_deallocate.
916 *
917 */
918 void vm_map_res_deallocate(register vm_map_t map)
919 {
920 assert(map->res_count > 0);
921 if (--map->res_count == 0) {
922 lck_mtx_unlock(&map->s_lock);
923 vm_map_lock(map);
924 vm_map_swapout(map);
925 vm_map_unlock(map);
926 lck_mtx_lock(&map->s_lock);
927 }
928 assert(map->ref_count >= map->res_count);
929 }
930 #endif /* MACH_ASSERT && TASK_SWAPPER */
931
932 /*
933 * vm_map_destroy:
934 *
935 * Actually destroy a map.
936 */
937 void
938 vm_map_destroy(
939 vm_map_t map,
940 int flags)
941 {
942 vm_map_lock(map);
943
944 /* clean up regular map entries */
945 (void) vm_map_delete(map, map->min_offset, map->max_offset,
946 flags, VM_MAP_NULL);
947 /* clean up leftover special mappings (commpage, etc...) */
948 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
949 flags, VM_MAP_NULL);
950
951 #if CONFIG_FREEZE
952 if (map->default_freezer_toc){
953 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
954 }
955 #endif
956 vm_map_unlock(map);
957
958 assert(map->hdr.nentries == 0);
959
960 if(map->pmap)
961 pmap_destroy(map->pmap);
962
963 zfree(vm_map_zone, map);
964 }
965
966 #if TASK_SWAPPER
967 /*
968 * vm_map_swapin/vm_map_swapout
969 *
970 * Swap a map in and out, either referencing or releasing its resources.
971 * These functions are internal use only; however, they must be exported
972 * because they may be called from macros, which are exported.
973 *
974 * In the case of swapout, there could be races on the residence count,
975 * so if the residence count is up, we return, assuming that a
976 * vm_map_deallocate() call in the near future will bring us back.
977 *
978 * Locking:
979 * -- We use the map write lock for synchronization among races.
980 * -- The map write lock, and not the simple s_lock, protects the
981 * swap state of the map.
982 * -- If a map entry is a share map, then we hold both locks, in
983 * hierarchical order.
984 *
985 * Synchronization Notes:
986 * 1) If a vm_map_swapin() call happens while swapout in progress, it
987 * will block on the map lock and proceed when swapout is through.
988 * 2) A vm_map_reference() call at this time is illegal, and will
989 * cause a panic. vm_map_reference() is only allowed on resident
990 * maps, since it refuses to block.
991 * 3) A vm_map_swapin() call during a swapin will block, and
992 * proceeed when the first swapin is done, turning into a nop.
993 * This is the reason the res_count is not incremented until
994 * after the swapin is complete.
995 * 4) There is a timing hole after the checks of the res_count, before
996 * the map lock is taken, during which a swapin may get the lock
997 * before a swapout about to happen. If this happens, the swapin
998 * will detect the state and increment the reference count, causing
999 * the swapout to be a nop, thereby delaying it until a later
1000 * vm_map_deallocate. If the swapout gets the lock first, then
1001 * the swapin will simply block until the swapout is done, and
1002 * then proceed.
1003 *
1004 * Because vm_map_swapin() is potentially an expensive operation, it
1005 * should be used with caution.
1006 *
1007 * Invariants:
1008 * 1) A map with a residence count of zero is either swapped, or
1009 * being swapped.
1010 * 2) A map with a non-zero residence count is either resident,
1011 * or being swapped in.
1012 */
1013
1014 int vm_map_swap_enable = 1;
1015
1016 void vm_map_swapin (vm_map_t map)
1017 {
1018 register vm_map_entry_t entry;
1019
1020 if (!vm_map_swap_enable) /* debug */
1021 return;
1022
1023 /*
1024 * Map is locked
1025 * First deal with various races.
1026 */
1027 if (map->sw_state == MAP_SW_IN)
1028 /*
1029 * we raced with swapout and won. Returning will incr.
1030 * the res_count, turning the swapout into a nop.
1031 */
1032 return;
1033
1034 /*
1035 * The residence count must be zero. If we raced with another
1036 * swapin, the state would have been IN; if we raced with a
1037 * swapout (after another competing swapin), we must have lost
1038 * the race to get here (see above comment), in which case
1039 * res_count is still 0.
1040 */
1041 assert(map->res_count == 0);
1042
1043 /*
1044 * There are no intermediate states of a map going out or
1045 * coming in, since the map is locked during the transition.
1046 */
1047 assert(map->sw_state == MAP_SW_OUT);
1048
1049 /*
1050 * We now operate upon each map entry. If the entry is a sub-
1051 * or share-map, we call vm_map_res_reference upon it.
1052 * If the entry is an object, we call vm_object_res_reference
1053 * (this may iterate through the shadow chain).
1054 * Note that we hold the map locked the entire time,
1055 * even if we get back here via a recursive call in
1056 * vm_map_res_reference.
1057 */
1058 entry = vm_map_first_entry(map);
1059
1060 while (entry != vm_map_to_entry(map)) {
1061 if (entry->object.vm_object != VM_OBJECT_NULL) {
1062 if (entry->is_sub_map) {
1063 vm_map_t lmap = entry->object.sub_map;
1064 lck_mtx_lock(&lmap->s_lock);
1065 vm_map_res_reference(lmap);
1066 lck_mtx_unlock(&lmap->s_lock);
1067 } else {
1068 vm_object_t object = entry->object.vm_object;
1069 vm_object_lock(object);
1070 /*
1071 * This call may iterate through the
1072 * shadow chain.
1073 */
1074 vm_object_res_reference(object);
1075 vm_object_unlock(object);
1076 }
1077 }
1078 entry = entry->vme_next;
1079 }
1080 assert(map->sw_state == MAP_SW_OUT);
1081 map->sw_state = MAP_SW_IN;
1082 }
1083
1084 void vm_map_swapout(vm_map_t map)
1085 {
1086 register vm_map_entry_t entry;
1087
1088 /*
1089 * Map is locked
1090 * First deal with various races.
1091 * If we raced with a swapin and lost, the residence count
1092 * will have been incremented to 1, and we simply return.
1093 */
1094 lck_mtx_lock(&map->s_lock);
1095 if (map->res_count != 0) {
1096 lck_mtx_unlock(&map->s_lock);
1097 return;
1098 }
1099 lck_mtx_unlock(&map->s_lock);
1100
1101 /*
1102 * There are no intermediate states of a map going out or
1103 * coming in, since the map is locked during the transition.
1104 */
1105 assert(map->sw_state == MAP_SW_IN);
1106
1107 if (!vm_map_swap_enable)
1108 return;
1109
1110 /*
1111 * We now operate upon each map entry. If the entry is a sub-
1112 * or share-map, we call vm_map_res_deallocate upon it.
1113 * If the entry is an object, we call vm_object_res_deallocate
1114 * (this may iterate through the shadow chain).
1115 * Note that we hold the map locked the entire time,
1116 * even if we get back here via a recursive call in
1117 * vm_map_res_deallocate.
1118 */
1119 entry = vm_map_first_entry(map);
1120
1121 while (entry != vm_map_to_entry(map)) {
1122 if (entry->object.vm_object != VM_OBJECT_NULL) {
1123 if (entry->is_sub_map) {
1124 vm_map_t lmap = entry->object.sub_map;
1125 lck_mtx_lock(&lmap->s_lock);
1126 vm_map_res_deallocate(lmap);
1127 lck_mtx_unlock(&lmap->s_lock);
1128 } else {
1129 vm_object_t object = entry->object.vm_object;
1130 vm_object_lock(object);
1131 /*
1132 * This call may take a long time,
1133 * since it could actively push
1134 * out pages (if we implement it
1135 * that way).
1136 */
1137 vm_object_res_deallocate(object);
1138 vm_object_unlock(object);
1139 }
1140 }
1141 entry = entry->vme_next;
1142 }
1143 assert(map->sw_state == MAP_SW_IN);
1144 map->sw_state = MAP_SW_OUT;
1145 }
1146
1147 #endif /* TASK_SWAPPER */
1148
1149 /*
1150 * vm_map_lookup_entry: [ internal use only ]
1151 *
1152 * Calls into the vm map store layer to find the map
1153 * entry containing (or immediately preceding) the
1154 * specified address in the given map; the entry is returned
1155 * in the "entry" parameter. The boolean
1156 * result indicates whether the address is
1157 * actually contained in the map.
1158 */
1159 boolean_t
1160 vm_map_lookup_entry(
1161 register vm_map_t map,
1162 register vm_map_offset_t address,
1163 vm_map_entry_t *entry) /* OUT */
1164 {
1165 return ( vm_map_store_lookup_entry( map, address, entry ));
1166 }
1167
1168 /*
1169 * Routine: vm_map_find_space
1170 * Purpose:
1171 * Allocate a range in the specified virtual address map,
1172 * returning the entry allocated for that range.
1173 * Used by kmem_alloc, etc.
1174 *
1175 * The map must be NOT be locked. It will be returned locked
1176 * on KERN_SUCCESS, unlocked on failure.
1177 *
1178 * If an entry is allocated, the object/offset fields
1179 * are initialized to zero.
1180 */
1181 kern_return_t
1182 vm_map_find_space(
1183 register vm_map_t map,
1184 vm_map_offset_t *address, /* OUT */
1185 vm_map_size_t size,
1186 vm_map_offset_t mask,
1187 int flags,
1188 vm_map_entry_t *o_entry) /* OUT */
1189 {
1190 register vm_map_entry_t entry, new_entry;
1191 register vm_map_offset_t start;
1192 register vm_map_offset_t end;
1193
1194 if (size == 0) {
1195 *address = 0;
1196 return KERN_INVALID_ARGUMENT;
1197 }
1198
1199 if (flags & VM_FLAGS_GUARD_AFTER) {
1200 /* account for the back guard page in the size */
1201 size += PAGE_SIZE_64;
1202 }
1203
1204 new_entry = vm_map_entry_create(map, FALSE);
1205
1206 /*
1207 * Look for the first possible address; if there's already
1208 * something at this address, we have to start after it.
1209 */
1210
1211 vm_map_lock(map);
1212
1213 if( map->disable_vmentry_reuse == TRUE) {
1214 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1215 } else {
1216 assert(first_free_is_valid(map));
1217 if ((entry = map->first_free) == vm_map_to_entry(map))
1218 start = map->min_offset;
1219 else
1220 start = entry->vme_end;
1221 }
1222
1223 /*
1224 * In any case, the "entry" always precedes
1225 * the proposed new region throughout the loop:
1226 */
1227
1228 while (TRUE) {
1229 register vm_map_entry_t next;
1230
1231 /*
1232 * Find the end of the proposed new region.
1233 * Be sure we didn't go beyond the end, or
1234 * wrap around the address.
1235 */
1236
1237 if (flags & VM_FLAGS_GUARD_BEFORE) {
1238 /* reserve space for the front guard page */
1239 start += PAGE_SIZE_64;
1240 }
1241 end = ((start + mask) & ~mask);
1242
1243 if (end < start) {
1244 vm_map_entry_dispose(map, new_entry);
1245 vm_map_unlock(map);
1246 return(KERN_NO_SPACE);
1247 }
1248 start = end;
1249 end += size;
1250
1251 if ((end > map->max_offset) || (end < start)) {
1252 vm_map_entry_dispose(map, new_entry);
1253 vm_map_unlock(map);
1254 return(KERN_NO_SPACE);
1255 }
1256
1257 /*
1258 * If there are no more entries, we must win.
1259 */
1260
1261 next = entry->vme_next;
1262 if (next == vm_map_to_entry(map))
1263 break;
1264
1265 /*
1266 * If there is another entry, it must be
1267 * after the end of the potential new region.
1268 */
1269
1270 if (next->vme_start >= end)
1271 break;
1272
1273 /*
1274 * Didn't fit -- move to the next entry.
1275 */
1276
1277 entry = next;
1278 start = entry->vme_end;
1279 }
1280
1281 /*
1282 * At this point,
1283 * "start" and "end" should define the endpoints of the
1284 * available new range, and
1285 * "entry" should refer to the region before the new
1286 * range, and
1287 *
1288 * the map should be locked.
1289 */
1290
1291 if (flags & VM_FLAGS_GUARD_BEFORE) {
1292 /* go back for the front guard page */
1293 start -= PAGE_SIZE_64;
1294 }
1295 *address = start;
1296
1297 assert(start < end);
1298 new_entry->vme_start = start;
1299 new_entry->vme_end = end;
1300 assert(page_aligned(new_entry->vme_start));
1301 assert(page_aligned(new_entry->vme_end));
1302
1303 new_entry->is_shared = FALSE;
1304 new_entry->is_sub_map = FALSE;
1305 new_entry->use_pmap = FALSE;
1306 new_entry->object.vm_object = VM_OBJECT_NULL;
1307 new_entry->offset = (vm_object_offset_t) 0;
1308
1309 new_entry->needs_copy = FALSE;
1310
1311 new_entry->inheritance = VM_INHERIT_DEFAULT;
1312 new_entry->protection = VM_PROT_DEFAULT;
1313 new_entry->max_protection = VM_PROT_ALL;
1314 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1315 new_entry->wired_count = 0;
1316 new_entry->user_wired_count = 0;
1317
1318 new_entry->in_transition = FALSE;
1319 new_entry->needs_wakeup = FALSE;
1320 new_entry->no_cache = FALSE;
1321 new_entry->permanent = FALSE;
1322 new_entry->superpage_size = 0;
1323
1324 new_entry->alias = 0;
1325 new_entry->zero_wired_pages = FALSE;
1326
1327 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1328
1329 /*
1330 * Insert the new entry into the list
1331 */
1332
1333 vm_map_store_entry_link(map, entry, new_entry);
1334
1335 map->size += size;
1336
1337 /*
1338 * Update the lookup hint
1339 */
1340 SAVE_HINT_MAP_WRITE(map, new_entry);
1341
1342 *o_entry = new_entry;
1343 return(KERN_SUCCESS);
1344 }
1345
1346 int vm_map_pmap_enter_print = FALSE;
1347 int vm_map_pmap_enter_enable = FALSE;
1348
1349 /*
1350 * Routine: vm_map_pmap_enter [internal only]
1351 *
1352 * Description:
1353 * Force pages from the specified object to be entered into
1354 * the pmap at the specified address if they are present.
1355 * As soon as a page not found in the object the scan ends.
1356 *
1357 * Returns:
1358 * Nothing.
1359 *
1360 * In/out conditions:
1361 * The source map should not be locked on entry.
1362 */
1363 static void
1364 vm_map_pmap_enter(
1365 vm_map_t map,
1366 register vm_map_offset_t addr,
1367 register vm_map_offset_t end_addr,
1368 register vm_object_t object,
1369 vm_object_offset_t offset,
1370 vm_prot_t protection)
1371 {
1372 int type_of_fault;
1373 kern_return_t kr;
1374
1375 if(map->pmap == 0)
1376 return;
1377
1378 while (addr < end_addr) {
1379 register vm_page_t m;
1380
1381 vm_object_lock(object);
1382
1383 m = vm_page_lookup(object, offset);
1384 /*
1385 * ENCRYPTED SWAP:
1386 * The user should never see encrypted data, so do not
1387 * enter an encrypted page in the page table.
1388 */
1389 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1390 m->fictitious ||
1391 (m->unusual && ( m->error || m->restart || m->absent))) {
1392 vm_object_unlock(object);
1393 return;
1394 }
1395
1396 if (vm_map_pmap_enter_print) {
1397 printf("vm_map_pmap_enter:");
1398 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1399 map, (unsigned long long)addr, object, (unsigned long long)offset);
1400 }
1401 type_of_fault = DBG_CACHE_HIT_FAULT;
1402 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1403 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1404 &type_of_fault);
1405
1406 vm_object_unlock(object);
1407
1408 offset += PAGE_SIZE_64;
1409 addr += PAGE_SIZE;
1410 }
1411 }
1412
1413 boolean_t vm_map_pmap_is_empty(
1414 vm_map_t map,
1415 vm_map_offset_t start,
1416 vm_map_offset_t end);
1417 boolean_t vm_map_pmap_is_empty(
1418 vm_map_t map,
1419 vm_map_offset_t start,
1420 vm_map_offset_t end)
1421 {
1422 #ifdef MACHINE_PMAP_IS_EMPTY
1423 return pmap_is_empty(map->pmap, start, end);
1424 #else /* MACHINE_PMAP_IS_EMPTY */
1425 vm_map_offset_t offset;
1426 ppnum_t phys_page;
1427
1428 if (map->pmap == NULL) {
1429 return TRUE;
1430 }
1431
1432 for (offset = start;
1433 offset < end;
1434 offset += PAGE_SIZE) {
1435 phys_page = pmap_find_phys(map->pmap, offset);
1436 if (phys_page) {
1437 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1438 "page %d at 0x%llx\n",
1439 map, (long long)start, (long long)end,
1440 phys_page, (long long)offset);
1441 return FALSE;
1442 }
1443 }
1444 return TRUE;
1445 #endif /* MACHINE_PMAP_IS_EMPTY */
1446 }
1447
1448 /*
1449 * Routine: vm_map_enter
1450 *
1451 * Description:
1452 * Allocate a range in the specified virtual address map.
1453 * The resulting range will refer to memory defined by
1454 * the given memory object and offset into that object.
1455 *
1456 * Arguments are as defined in the vm_map call.
1457 */
1458 int _map_enter_debug = 0;
1459 static unsigned int vm_map_enter_restore_successes = 0;
1460 static unsigned int vm_map_enter_restore_failures = 0;
1461 kern_return_t
1462 vm_map_enter(
1463 vm_map_t map,
1464 vm_map_offset_t *address, /* IN/OUT */
1465 vm_map_size_t size,
1466 vm_map_offset_t mask,
1467 int flags,
1468 vm_object_t object,
1469 vm_object_offset_t offset,
1470 boolean_t needs_copy,
1471 vm_prot_t cur_protection,
1472 vm_prot_t max_protection,
1473 vm_inherit_t inheritance)
1474 {
1475 vm_map_entry_t entry, new_entry;
1476 vm_map_offset_t start, tmp_start, tmp_offset;
1477 vm_map_offset_t end, tmp_end;
1478 vm_map_offset_t tmp2_start, tmp2_end;
1479 vm_map_offset_t step;
1480 kern_return_t result = KERN_SUCCESS;
1481 vm_map_t zap_old_map = VM_MAP_NULL;
1482 vm_map_t zap_new_map = VM_MAP_NULL;
1483 boolean_t map_locked = FALSE;
1484 boolean_t pmap_empty = TRUE;
1485 boolean_t new_mapping_established = FALSE;
1486 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1487 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1488 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1489 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1490 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1491 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1492 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1493 char alias;
1494 vm_map_offset_t effective_min_offset, effective_max_offset;
1495 kern_return_t kr;
1496
1497 if (superpage_size) {
1498 switch (superpage_size) {
1499 /*
1500 * Note that the current implementation only supports
1501 * a single size for superpages, SUPERPAGE_SIZE, per
1502 * architecture. As soon as more sizes are supposed
1503 * to be supported, SUPERPAGE_SIZE has to be replaced
1504 * with a lookup of the size depending on superpage_size.
1505 */
1506 #ifdef __x86_64__
1507 case SUPERPAGE_SIZE_ANY:
1508 /* handle it like 2 MB and round up to page size */
1509 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1510 case SUPERPAGE_SIZE_2MB:
1511 break;
1512 #endif
1513 default:
1514 return KERN_INVALID_ARGUMENT;
1515 }
1516 mask = SUPERPAGE_SIZE-1;
1517 if (size & (SUPERPAGE_SIZE-1))
1518 return KERN_INVALID_ARGUMENT;
1519 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1520 }
1521
1522
1523 #if CONFIG_EMBEDDED
1524 if (cur_protection & VM_PROT_WRITE){
1525 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1526 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1527 cur_protection &= ~VM_PROT_EXECUTE;
1528 }
1529 }
1530 #endif /* CONFIG_EMBEDDED */
1531
1532 if (is_submap) {
1533 if (purgable) {
1534 /* submaps can not be purgeable */
1535 return KERN_INVALID_ARGUMENT;
1536 }
1537 if (object == VM_OBJECT_NULL) {
1538 /* submaps can not be created lazily */
1539 return KERN_INVALID_ARGUMENT;
1540 }
1541 }
1542 if (flags & VM_FLAGS_ALREADY) {
1543 /*
1544 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1545 * is already present. For it to be meaningul, the requested
1546 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1547 * we shouldn't try and remove what was mapped there first
1548 * (!VM_FLAGS_OVERWRITE).
1549 */
1550 if ((flags & VM_FLAGS_ANYWHERE) ||
1551 (flags & VM_FLAGS_OVERWRITE)) {
1552 return KERN_INVALID_ARGUMENT;
1553 }
1554 }
1555
1556 effective_min_offset = map->min_offset;
1557
1558 if (flags & VM_FLAGS_BEYOND_MAX) {
1559 /*
1560 * Allow an insertion beyond the map's max offset.
1561 */
1562 if (vm_map_is_64bit(map))
1563 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1564 else
1565 effective_max_offset = 0x00000000FFFFF000ULL;
1566 } else {
1567 effective_max_offset = map->max_offset;
1568 }
1569
1570 if (size == 0 ||
1571 (offset & PAGE_MASK_64) != 0) {
1572 *address = 0;
1573 return KERN_INVALID_ARGUMENT;
1574 }
1575
1576 VM_GET_FLAGS_ALIAS(flags, alias);
1577
1578 #define RETURN(value) { result = value; goto BailOut; }
1579
1580 assert(page_aligned(*address));
1581 assert(page_aligned(size));
1582
1583 /*
1584 * Only zero-fill objects are allowed to be purgable.
1585 * LP64todo - limit purgable objects to 32-bits for now
1586 */
1587 if (purgable &&
1588 (offset != 0 ||
1589 (object != VM_OBJECT_NULL &&
1590 (object->vo_size != size ||
1591 object->purgable == VM_PURGABLE_DENY))
1592 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1593 return KERN_INVALID_ARGUMENT;
1594
1595 if (!anywhere && overwrite) {
1596 /*
1597 * Create a temporary VM map to hold the old mappings in the
1598 * affected area while we create the new one.
1599 * This avoids releasing the VM map lock in
1600 * vm_map_entry_delete() and allows atomicity
1601 * when we want to replace some mappings with a new one.
1602 * It also allows us to restore the old VM mappings if the
1603 * new mapping fails.
1604 */
1605 zap_old_map = vm_map_create(PMAP_NULL,
1606 *address,
1607 *address + size,
1608 map->hdr.entries_pageable);
1609 }
1610
1611 StartAgain: ;
1612
1613 start = *address;
1614
1615 if (anywhere) {
1616 vm_map_lock(map);
1617 map_locked = TRUE;
1618
1619 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1620 result = KERN_INVALID_ARGUMENT;
1621 goto BailOut;
1622 }
1623
1624 /*
1625 * Calculate the first possible address.
1626 */
1627
1628 if (start < effective_min_offset)
1629 start = effective_min_offset;
1630 if (start > effective_max_offset)
1631 RETURN(KERN_NO_SPACE);
1632
1633 /*
1634 * Look for the first possible address;
1635 * if there's already something at this
1636 * address, we have to start after it.
1637 */
1638
1639 if( map->disable_vmentry_reuse == TRUE) {
1640 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1641 } else {
1642 assert(first_free_is_valid(map));
1643
1644 entry = map->first_free;
1645
1646 if (entry == vm_map_to_entry(map)) {
1647 entry = NULL;
1648 } else {
1649 if (entry->vme_next == vm_map_to_entry(map)){
1650 /*
1651 * Hole at the end of the map.
1652 */
1653 entry = NULL;
1654 } else {
1655 if (start < (entry->vme_next)->vme_start ) {
1656 start = entry->vme_end;
1657 } else {
1658 /*
1659 * Need to do a lookup.
1660 */
1661 entry = NULL;
1662 }
1663 }
1664 }
1665
1666 if (entry == NULL) {
1667 vm_map_entry_t tmp_entry;
1668 if (vm_map_lookup_entry(map, start, &tmp_entry))
1669 start = tmp_entry->vme_end;
1670 entry = tmp_entry;
1671 }
1672 }
1673
1674 /*
1675 * In any case, the "entry" always precedes
1676 * the proposed new region throughout the
1677 * loop:
1678 */
1679
1680 while (TRUE) {
1681 register vm_map_entry_t next;
1682
1683 /*
1684 * Find the end of the proposed new region.
1685 * Be sure we didn't go beyond the end, or
1686 * wrap around the address.
1687 */
1688
1689 end = ((start + mask) & ~mask);
1690 if (end < start)
1691 RETURN(KERN_NO_SPACE);
1692 start = end;
1693 end += size;
1694
1695 if ((end > effective_max_offset) || (end < start)) {
1696 if (map->wait_for_space) {
1697 if (size <= (effective_max_offset -
1698 effective_min_offset)) {
1699 assert_wait((event_t)map,
1700 THREAD_ABORTSAFE);
1701 vm_map_unlock(map);
1702 map_locked = FALSE;
1703 thread_block(THREAD_CONTINUE_NULL);
1704 goto StartAgain;
1705 }
1706 }
1707 RETURN(KERN_NO_SPACE);
1708 }
1709
1710 /*
1711 * If there are no more entries, we must win.
1712 */
1713
1714 next = entry->vme_next;
1715 if (next == vm_map_to_entry(map))
1716 break;
1717
1718 /*
1719 * If there is another entry, it must be
1720 * after the end of the potential new region.
1721 */
1722
1723 if (next->vme_start >= end)
1724 break;
1725
1726 /*
1727 * Didn't fit -- move to the next entry.
1728 */
1729
1730 entry = next;
1731 start = entry->vme_end;
1732 }
1733 *address = start;
1734 } else {
1735 /*
1736 * Verify that:
1737 * the address doesn't itself violate
1738 * the mask requirement.
1739 */
1740
1741 vm_map_lock(map);
1742 map_locked = TRUE;
1743 if ((start & mask) != 0)
1744 RETURN(KERN_NO_SPACE);
1745
1746 /*
1747 * ... the address is within bounds
1748 */
1749
1750 end = start + size;
1751
1752 if ((start < effective_min_offset) ||
1753 (end > effective_max_offset) ||
1754 (start >= end)) {
1755 RETURN(KERN_INVALID_ADDRESS);
1756 }
1757
1758 if (overwrite && zap_old_map != VM_MAP_NULL) {
1759 /*
1760 * Fixed mapping and "overwrite" flag: attempt to
1761 * remove all existing mappings in the specified
1762 * address range, saving them in our "zap_old_map".
1763 */
1764 (void) vm_map_delete(map, start, end,
1765 VM_MAP_REMOVE_SAVE_ENTRIES,
1766 zap_old_map);
1767 }
1768
1769 /*
1770 * ... the starting address isn't allocated
1771 */
1772
1773 if (vm_map_lookup_entry(map, start, &entry)) {
1774 if (! (flags & VM_FLAGS_ALREADY)) {
1775 RETURN(KERN_NO_SPACE);
1776 }
1777 /*
1778 * Check if what's already there is what we want.
1779 */
1780 tmp_start = start;
1781 tmp_offset = offset;
1782 if (entry->vme_start < start) {
1783 tmp_start -= start - entry->vme_start;
1784 tmp_offset -= start - entry->vme_start;
1785
1786 }
1787 for (; entry->vme_start < end;
1788 entry = entry->vme_next) {
1789 /*
1790 * Check if the mapping's attributes
1791 * match the existing map entry.
1792 */
1793 if (entry == vm_map_to_entry(map) ||
1794 entry->vme_start != tmp_start ||
1795 entry->is_sub_map != is_submap ||
1796 entry->offset != tmp_offset ||
1797 entry->needs_copy != needs_copy ||
1798 entry->protection != cur_protection ||
1799 entry->max_protection != max_protection ||
1800 entry->inheritance != inheritance ||
1801 entry->alias != alias) {
1802 /* not the same mapping ! */
1803 RETURN(KERN_NO_SPACE);
1804 }
1805 /*
1806 * Check if the same object is being mapped.
1807 */
1808 if (is_submap) {
1809 if (entry->object.sub_map !=
1810 (vm_map_t) object) {
1811 /* not the same submap */
1812 RETURN(KERN_NO_SPACE);
1813 }
1814 } else {
1815 if (entry->object.vm_object != object) {
1816 /* not the same VM object... */
1817 vm_object_t obj2;
1818
1819 obj2 = entry->object.vm_object;
1820 if ((obj2 == VM_OBJECT_NULL ||
1821 obj2->internal) &&
1822 (object == VM_OBJECT_NULL ||
1823 object->internal)) {
1824 /*
1825 * ... but both are
1826 * anonymous memory,
1827 * so equivalent.
1828 */
1829 } else {
1830 RETURN(KERN_NO_SPACE);
1831 }
1832 }
1833 }
1834
1835 tmp_offset += entry->vme_end - entry->vme_start;
1836 tmp_start += entry->vme_end - entry->vme_start;
1837 if (entry->vme_end >= end) {
1838 /* reached the end of our mapping */
1839 break;
1840 }
1841 }
1842 /* it all matches: let's use what's already there ! */
1843 RETURN(KERN_MEMORY_PRESENT);
1844 }
1845
1846 /*
1847 * ... the next region doesn't overlap the
1848 * end point.
1849 */
1850
1851 if ((entry->vme_next != vm_map_to_entry(map)) &&
1852 (entry->vme_next->vme_start < end))
1853 RETURN(KERN_NO_SPACE);
1854 }
1855
1856 /*
1857 * At this point,
1858 * "start" and "end" should define the endpoints of the
1859 * available new range, and
1860 * "entry" should refer to the region before the new
1861 * range, and
1862 *
1863 * the map should be locked.
1864 */
1865
1866 /*
1867 * See whether we can avoid creating a new entry (and object) by
1868 * extending one of our neighbors. [So far, we only attempt to
1869 * extend from below.] Note that we can never extend/join
1870 * purgable objects because they need to remain distinct
1871 * entities in order to implement their "volatile object"
1872 * semantics.
1873 */
1874
1875 if (purgable) {
1876 if (object == VM_OBJECT_NULL) {
1877 object = vm_object_allocate(size);
1878 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1879 object->purgable = VM_PURGABLE_NONVOLATILE;
1880 offset = (vm_object_offset_t)0;
1881 }
1882 } else if ((is_submap == FALSE) &&
1883 (object == VM_OBJECT_NULL) &&
1884 (entry != vm_map_to_entry(map)) &&
1885 (entry->vme_end == start) &&
1886 (!entry->is_shared) &&
1887 (!entry->is_sub_map) &&
1888 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1889 (entry->inheritance == inheritance) &&
1890 (entry->protection == cur_protection) &&
1891 (entry->max_protection == max_protection) &&
1892 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1893 (entry->in_transition == 0) &&
1894 (entry->no_cache == no_cache) &&
1895 ((entry->vme_end - entry->vme_start) + size <=
1896 (alias == VM_MEMORY_REALLOC ?
1897 ANON_CHUNK_SIZE :
1898 NO_COALESCE_LIMIT)) &&
1899 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1900 if (vm_object_coalesce(entry->object.vm_object,
1901 VM_OBJECT_NULL,
1902 entry->offset,
1903 (vm_object_offset_t) 0,
1904 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1905 (vm_map_size_t)(end - entry->vme_end))) {
1906
1907 /*
1908 * Coalesced the two objects - can extend
1909 * the previous map entry to include the
1910 * new range.
1911 */
1912 map->size += (end - entry->vme_end);
1913 assert(entry->vme_start < end);
1914 entry->vme_end = end;
1915 vm_map_store_update_first_free(map, map->first_free);
1916 RETURN(KERN_SUCCESS);
1917 }
1918 }
1919
1920 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1921 new_entry = NULL;
1922
1923 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1924 tmp2_end = tmp2_start + step;
1925 /*
1926 * Create a new entry
1927 * LP64todo - for now, we can only allocate 4GB internal objects
1928 * because the default pager can't page bigger ones. Remove this
1929 * when it can.
1930 *
1931 * XXX FBDP
1932 * The reserved "page zero" in each process's address space can
1933 * be arbitrarily large. Splitting it into separate 4GB objects and
1934 * therefore different VM map entries serves no purpose and just
1935 * slows down operations on the VM map, so let's not split the
1936 * allocation into 4GB chunks if the max protection is NONE. That
1937 * memory should never be accessible, so it will never get to the
1938 * default pager.
1939 */
1940 tmp_start = tmp2_start;
1941 if (object == VM_OBJECT_NULL &&
1942 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1943 max_protection != VM_PROT_NONE &&
1944 superpage_size == 0)
1945 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1946 else
1947 tmp_end = tmp2_end;
1948 do {
1949 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1950 object, offset, needs_copy,
1951 FALSE, FALSE,
1952 cur_protection, max_protection,
1953 VM_BEHAVIOR_DEFAULT,
1954 (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1955 0, no_cache,
1956 permanent, superpage_size);
1957 new_entry->alias = alias;
1958 if (flags & VM_FLAGS_MAP_JIT){
1959 if (!(map->jit_entry_exists)){
1960 new_entry->used_for_jit = TRUE;
1961 map->jit_entry_exists = TRUE;
1962 }
1963 }
1964
1965 if (is_submap) {
1966 vm_map_t submap;
1967 boolean_t submap_is_64bit;
1968 boolean_t use_pmap;
1969
1970 new_entry->is_sub_map = TRUE;
1971 submap = (vm_map_t) object;
1972 submap_is_64bit = vm_map_is_64bit(submap);
1973 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1974 #ifndef NO_NESTED_PMAP
1975 if (use_pmap && submap->pmap == NULL) {
1976 /* we need a sub pmap to nest... */
1977 submap->pmap = pmap_create(0, submap_is_64bit);
1978 if (submap->pmap == NULL) {
1979 /* let's proceed without nesting... */
1980 }
1981 }
1982 if (use_pmap && submap->pmap != NULL) {
1983 kr = pmap_nest(map->pmap,
1984 submap->pmap,
1985 tmp_start,
1986 tmp_start,
1987 tmp_end - tmp_start);
1988 if (kr != KERN_SUCCESS) {
1989 printf("vm_map_enter: "
1990 "pmap_nest(0x%llx,0x%llx) "
1991 "error 0x%x\n",
1992 (long long)tmp_start,
1993 (long long)tmp_end,
1994 kr);
1995 } else {
1996 /* we're now nested ! */
1997 new_entry->use_pmap = TRUE;
1998 pmap_empty = FALSE;
1999 }
2000 }
2001 #endif /* NO_NESTED_PMAP */
2002 }
2003 entry = new_entry;
2004
2005 if (superpage_size) {
2006 vm_page_t pages, m;
2007 vm_object_t sp_object;
2008
2009 entry->offset = 0;
2010
2011 /* allocate one superpage */
2012 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2013 if (kr != KERN_SUCCESS) {
2014 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2015 RETURN(kr);
2016 }
2017
2018 /* create one vm_object per superpage */
2019 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2020 sp_object->phys_contiguous = TRUE;
2021 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2022 entry->object.vm_object = sp_object;
2023
2024 /* enter the base pages into the object */
2025 vm_object_lock(sp_object);
2026 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2027 m = pages;
2028 pmap_zero_page(m->phys_page);
2029 pages = NEXT_PAGE(m);
2030 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2031 vm_page_insert(m, sp_object, offset);
2032 }
2033 vm_object_unlock(sp_object);
2034 }
2035 } while (tmp_end != tmp2_end &&
2036 (tmp_start = tmp_end) &&
2037 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2038 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2039 }
2040
2041 vm_map_unlock(map);
2042 map_locked = FALSE;
2043
2044 new_mapping_established = TRUE;
2045
2046 /* Wire down the new entry if the user
2047 * requested all new map entries be wired.
2048 */
2049 if ((map->wiring_required)||(superpage_size)) {
2050 pmap_empty = FALSE; /* pmap won't be empty */
2051 result = vm_map_wire(map, start, end,
2052 new_entry->protection, TRUE);
2053 RETURN(result);
2054 }
2055
2056 if ((object != VM_OBJECT_NULL) &&
2057 (vm_map_pmap_enter_enable) &&
2058 (!anywhere) &&
2059 (!needs_copy) &&
2060 (size < (128*1024))) {
2061 pmap_empty = FALSE; /* pmap won't be empty */
2062
2063 if (override_nx(map, alias) && cur_protection)
2064 cur_protection |= VM_PROT_EXECUTE;
2065
2066 vm_map_pmap_enter(map, start, end,
2067 object, offset, cur_protection);
2068 }
2069
2070 BailOut: ;
2071 if (result == KERN_SUCCESS) {
2072 vm_prot_t pager_prot;
2073 memory_object_t pager;
2074
2075 if (pmap_empty &&
2076 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2077 assert(vm_map_pmap_is_empty(map,
2078 *address,
2079 *address+size));
2080 }
2081
2082 /*
2083 * For "named" VM objects, let the pager know that the
2084 * memory object is being mapped. Some pagers need to keep
2085 * track of this, to know when they can reclaim the memory
2086 * object, for example.
2087 * VM calls memory_object_map() for each mapping (specifying
2088 * the protection of each mapping) and calls
2089 * memory_object_last_unmap() when all the mappings are gone.
2090 */
2091 pager_prot = max_protection;
2092 if (needs_copy) {
2093 /*
2094 * Copy-On-Write mapping: won't modify
2095 * the memory object.
2096 */
2097 pager_prot &= ~VM_PROT_WRITE;
2098 }
2099 if (!is_submap &&
2100 object != VM_OBJECT_NULL &&
2101 object->named &&
2102 object->pager != MEMORY_OBJECT_NULL) {
2103 vm_object_lock(object);
2104 pager = object->pager;
2105 if (object->named &&
2106 pager != MEMORY_OBJECT_NULL) {
2107 assert(object->pager_ready);
2108 vm_object_mapping_wait(object, THREAD_UNINT);
2109 vm_object_mapping_begin(object);
2110 vm_object_unlock(object);
2111
2112 kr = memory_object_map(pager, pager_prot);
2113 assert(kr == KERN_SUCCESS);
2114
2115 vm_object_lock(object);
2116 vm_object_mapping_end(object);
2117 }
2118 vm_object_unlock(object);
2119 }
2120 } else {
2121 if (new_mapping_established) {
2122 /*
2123 * We have to get rid of the new mappings since we
2124 * won't make them available to the user.
2125 * Try and do that atomically, to minimize the risk
2126 * that someone else create new mappings that range.
2127 */
2128 zap_new_map = vm_map_create(PMAP_NULL,
2129 *address,
2130 *address + size,
2131 map->hdr.entries_pageable);
2132 if (!map_locked) {
2133 vm_map_lock(map);
2134 map_locked = TRUE;
2135 }
2136 (void) vm_map_delete(map, *address, *address+size,
2137 VM_MAP_REMOVE_SAVE_ENTRIES,
2138 zap_new_map);
2139 }
2140 if (zap_old_map != VM_MAP_NULL &&
2141 zap_old_map->hdr.nentries != 0) {
2142 vm_map_entry_t entry1, entry2;
2143
2144 /*
2145 * The new mapping failed. Attempt to restore
2146 * the old mappings, saved in the "zap_old_map".
2147 */
2148 if (!map_locked) {
2149 vm_map_lock(map);
2150 map_locked = TRUE;
2151 }
2152
2153 /* first check if the coast is still clear */
2154 start = vm_map_first_entry(zap_old_map)->vme_start;
2155 end = vm_map_last_entry(zap_old_map)->vme_end;
2156 if (vm_map_lookup_entry(map, start, &entry1) ||
2157 vm_map_lookup_entry(map, end, &entry2) ||
2158 entry1 != entry2) {
2159 /*
2160 * Part of that range has already been
2161 * re-mapped: we can't restore the old
2162 * mappings...
2163 */
2164 vm_map_enter_restore_failures++;
2165 } else {
2166 /*
2167 * Transfer the saved map entries from
2168 * "zap_old_map" to the original "map",
2169 * inserting them all after "entry1".
2170 */
2171 for (entry2 = vm_map_first_entry(zap_old_map);
2172 entry2 != vm_map_to_entry(zap_old_map);
2173 entry2 = vm_map_first_entry(zap_old_map)) {
2174 vm_map_size_t entry_size;
2175
2176 entry_size = (entry2->vme_end -
2177 entry2->vme_start);
2178 vm_map_store_entry_unlink(zap_old_map,
2179 entry2);
2180 zap_old_map->size -= entry_size;
2181 vm_map_store_entry_link(map, entry1, entry2);
2182 map->size += entry_size;
2183 entry1 = entry2;
2184 }
2185 if (map->wiring_required) {
2186 /*
2187 * XXX TODO: we should rewire the
2188 * old pages here...
2189 */
2190 }
2191 vm_map_enter_restore_successes++;
2192 }
2193 }
2194 }
2195
2196 if (map_locked) {
2197 vm_map_unlock(map);
2198 }
2199
2200 /*
2201 * Get rid of the "zap_maps" and all the map entries that
2202 * they may still contain.
2203 */
2204 if (zap_old_map != VM_MAP_NULL) {
2205 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2206 zap_old_map = VM_MAP_NULL;
2207 }
2208 if (zap_new_map != VM_MAP_NULL) {
2209 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2210 zap_new_map = VM_MAP_NULL;
2211 }
2212
2213 return result;
2214
2215 #undef RETURN
2216 }
2217
2218 kern_return_t
2219 vm_map_enter_mem_object(
2220 vm_map_t target_map,
2221 vm_map_offset_t *address,
2222 vm_map_size_t initial_size,
2223 vm_map_offset_t mask,
2224 int flags,
2225 ipc_port_t port,
2226 vm_object_offset_t offset,
2227 boolean_t copy,
2228 vm_prot_t cur_protection,
2229 vm_prot_t max_protection,
2230 vm_inherit_t inheritance)
2231 {
2232 vm_map_address_t map_addr;
2233 vm_map_size_t map_size;
2234 vm_object_t object;
2235 vm_object_size_t size;
2236 kern_return_t result;
2237 boolean_t mask_cur_protection, mask_max_protection;
2238
2239 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2240 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2241 cur_protection &= ~VM_PROT_IS_MASK;
2242 max_protection &= ~VM_PROT_IS_MASK;
2243
2244 /*
2245 * Check arguments for validity
2246 */
2247 if ((target_map == VM_MAP_NULL) ||
2248 (cur_protection & ~VM_PROT_ALL) ||
2249 (max_protection & ~VM_PROT_ALL) ||
2250 (inheritance > VM_INHERIT_LAST_VALID) ||
2251 initial_size == 0)
2252 return KERN_INVALID_ARGUMENT;
2253
2254 map_addr = vm_map_trunc_page(*address);
2255 map_size = vm_map_round_page(initial_size);
2256 size = vm_object_round_page(initial_size);
2257
2258 /*
2259 * Find the vm object (if any) corresponding to this port.
2260 */
2261 if (!IP_VALID(port)) {
2262 object = VM_OBJECT_NULL;
2263 offset = 0;
2264 copy = FALSE;
2265 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2266 vm_named_entry_t named_entry;
2267
2268 named_entry = (vm_named_entry_t) port->ip_kobject;
2269 /* a few checks to make sure user is obeying rules */
2270 if (size == 0) {
2271 if (offset >= named_entry->size)
2272 return KERN_INVALID_RIGHT;
2273 size = named_entry->size - offset;
2274 }
2275 if (mask_max_protection) {
2276 max_protection &= named_entry->protection;
2277 }
2278 if (mask_cur_protection) {
2279 cur_protection &= named_entry->protection;
2280 }
2281 if ((named_entry->protection & max_protection) !=
2282 max_protection)
2283 return KERN_INVALID_RIGHT;
2284 if ((named_entry->protection & cur_protection) !=
2285 cur_protection)
2286 return KERN_INVALID_RIGHT;
2287 if (named_entry->size < (offset + size))
2288 return KERN_INVALID_ARGUMENT;
2289
2290 /* the callers parameter offset is defined to be the */
2291 /* offset from beginning of named entry offset in object */
2292 offset = offset + named_entry->offset;
2293
2294 named_entry_lock(named_entry);
2295 if (named_entry->is_sub_map) {
2296 vm_map_t submap;
2297
2298 submap = named_entry->backing.map;
2299 vm_map_lock(submap);
2300 vm_map_reference(submap);
2301 vm_map_unlock(submap);
2302 named_entry_unlock(named_entry);
2303
2304 result = vm_map_enter(target_map,
2305 &map_addr,
2306 map_size,
2307 mask,
2308 flags | VM_FLAGS_SUBMAP,
2309 (vm_object_t) submap,
2310 offset,
2311 copy,
2312 cur_protection,
2313 max_protection,
2314 inheritance);
2315 if (result != KERN_SUCCESS) {
2316 vm_map_deallocate(submap);
2317 } else {
2318 /*
2319 * No need to lock "submap" just to check its
2320 * "mapped" flag: that flag is never reset
2321 * once it's been set and if we race, we'll
2322 * just end up setting it twice, which is OK.
2323 */
2324 if (submap->mapped == FALSE) {
2325 /*
2326 * This submap has never been mapped.
2327 * Set its "mapped" flag now that it
2328 * has been mapped.
2329 * This happens only for the first ever
2330 * mapping of a "submap".
2331 */
2332 vm_map_lock(submap);
2333 submap->mapped = TRUE;
2334 vm_map_unlock(submap);
2335 }
2336 *address = map_addr;
2337 }
2338 return result;
2339
2340 } else if (named_entry->is_pager) {
2341 unsigned int access;
2342 vm_prot_t protections;
2343 unsigned int wimg_mode;
2344
2345 protections = named_entry->protection & VM_PROT_ALL;
2346 access = GET_MAP_MEM(named_entry->protection);
2347
2348 object = vm_object_enter(named_entry->backing.pager,
2349 named_entry->size,
2350 named_entry->internal,
2351 FALSE,
2352 FALSE);
2353 if (object == VM_OBJECT_NULL) {
2354 named_entry_unlock(named_entry);
2355 return KERN_INVALID_OBJECT;
2356 }
2357
2358 /* JMM - drop reference on pager here */
2359
2360 /* create an extra ref for the named entry */
2361 vm_object_lock(object);
2362 vm_object_reference_locked(object);
2363 named_entry->backing.object = object;
2364 named_entry->is_pager = FALSE;
2365 named_entry_unlock(named_entry);
2366
2367 wimg_mode = object->wimg_bits;
2368
2369 if (access == MAP_MEM_IO) {
2370 wimg_mode = VM_WIMG_IO;
2371 } else if (access == MAP_MEM_COPYBACK) {
2372 wimg_mode = VM_WIMG_USE_DEFAULT;
2373 } else if (access == MAP_MEM_WTHRU) {
2374 wimg_mode = VM_WIMG_WTHRU;
2375 } else if (access == MAP_MEM_WCOMB) {
2376 wimg_mode = VM_WIMG_WCOMB;
2377 }
2378
2379 /* wait for object (if any) to be ready */
2380 if (!named_entry->internal) {
2381 while (!object->pager_ready) {
2382 vm_object_wait(
2383 object,
2384 VM_OBJECT_EVENT_PAGER_READY,
2385 THREAD_UNINT);
2386 vm_object_lock(object);
2387 }
2388 }
2389
2390 if (object->wimg_bits != wimg_mode)
2391 vm_object_change_wimg_mode(object, wimg_mode);
2392
2393 object->true_share = TRUE;
2394
2395 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2396 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2397 vm_object_unlock(object);
2398 } else {
2399 /* This is the case where we are going to map */
2400 /* an already mapped object. If the object is */
2401 /* not ready it is internal. An external */
2402 /* object cannot be mapped until it is ready */
2403 /* we can therefore avoid the ready check */
2404 /* in this case. */
2405 object = named_entry->backing.object;
2406 assert(object != VM_OBJECT_NULL);
2407 named_entry_unlock(named_entry);
2408 vm_object_reference(object);
2409 }
2410 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2411 /*
2412 * JMM - This is temporary until we unify named entries
2413 * and raw memory objects.
2414 *
2415 * Detected fake ip_kotype for a memory object. In
2416 * this case, the port isn't really a port at all, but
2417 * instead is just a raw memory object.
2418 */
2419
2420 object = vm_object_enter((memory_object_t)port,
2421 size, FALSE, FALSE, FALSE);
2422 if (object == VM_OBJECT_NULL)
2423 return KERN_INVALID_OBJECT;
2424
2425 /* wait for object (if any) to be ready */
2426 if (object != VM_OBJECT_NULL) {
2427 if (object == kernel_object) {
2428 printf("Warning: Attempt to map kernel object"
2429 " by a non-private kernel entity\n");
2430 return KERN_INVALID_OBJECT;
2431 }
2432 if (!object->pager_ready) {
2433 vm_object_lock(object);
2434
2435 while (!object->pager_ready) {
2436 vm_object_wait(object,
2437 VM_OBJECT_EVENT_PAGER_READY,
2438 THREAD_UNINT);
2439 vm_object_lock(object);
2440 }
2441 vm_object_unlock(object);
2442 }
2443 }
2444 } else {
2445 return KERN_INVALID_OBJECT;
2446 }
2447
2448 if (object != VM_OBJECT_NULL &&
2449 object->named &&
2450 object->pager != MEMORY_OBJECT_NULL &&
2451 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2452 memory_object_t pager;
2453 vm_prot_t pager_prot;
2454 kern_return_t kr;
2455
2456 /*
2457 * For "named" VM objects, let the pager know that the
2458 * memory object is being mapped. Some pagers need to keep
2459 * track of this, to know when they can reclaim the memory
2460 * object, for example.
2461 * VM calls memory_object_map() for each mapping (specifying
2462 * the protection of each mapping) and calls
2463 * memory_object_last_unmap() when all the mappings are gone.
2464 */
2465 pager_prot = max_protection;
2466 if (copy) {
2467 /*
2468 * Copy-On-Write mapping: won't modify the
2469 * memory object.
2470 */
2471 pager_prot &= ~VM_PROT_WRITE;
2472 }
2473 vm_object_lock(object);
2474 pager = object->pager;
2475 if (object->named &&
2476 pager != MEMORY_OBJECT_NULL &&
2477 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2478 assert(object->pager_ready);
2479 vm_object_mapping_wait(object, THREAD_UNINT);
2480 vm_object_mapping_begin(object);
2481 vm_object_unlock(object);
2482
2483 kr = memory_object_map(pager, pager_prot);
2484 assert(kr == KERN_SUCCESS);
2485
2486 vm_object_lock(object);
2487 vm_object_mapping_end(object);
2488 }
2489 vm_object_unlock(object);
2490 }
2491
2492 /*
2493 * Perform the copy if requested
2494 */
2495
2496 if (copy) {
2497 vm_object_t new_object;
2498 vm_object_offset_t new_offset;
2499
2500 result = vm_object_copy_strategically(object, offset, size,
2501 &new_object, &new_offset,
2502 &copy);
2503
2504
2505 if (result == KERN_MEMORY_RESTART_COPY) {
2506 boolean_t success;
2507 boolean_t src_needs_copy;
2508
2509 /*
2510 * XXX
2511 * We currently ignore src_needs_copy.
2512 * This really is the issue of how to make
2513 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2514 * non-kernel users to use. Solution forthcoming.
2515 * In the meantime, since we don't allow non-kernel
2516 * memory managers to specify symmetric copy,
2517 * we won't run into problems here.
2518 */
2519 new_object = object;
2520 new_offset = offset;
2521 success = vm_object_copy_quickly(&new_object,
2522 new_offset, size,
2523 &src_needs_copy,
2524 &copy);
2525 assert(success);
2526 result = KERN_SUCCESS;
2527 }
2528 /*
2529 * Throw away the reference to the
2530 * original object, as it won't be mapped.
2531 */
2532
2533 vm_object_deallocate(object);
2534
2535 if (result != KERN_SUCCESS)
2536 return result;
2537
2538 object = new_object;
2539 offset = new_offset;
2540 }
2541
2542 result = vm_map_enter(target_map,
2543 &map_addr, map_size,
2544 (vm_map_offset_t)mask,
2545 flags,
2546 object, offset,
2547 copy,
2548 cur_protection, max_protection, inheritance);
2549 if (result != KERN_SUCCESS)
2550 vm_object_deallocate(object);
2551 *address = map_addr;
2552 return result;
2553 }
2554
2555
2556
2557
2558 kern_return_t
2559 vm_map_enter_mem_object_control(
2560 vm_map_t target_map,
2561 vm_map_offset_t *address,
2562 vm_map_size_t initial_size,
2563 vm_map_offset_t mask,
2564 int flags,
2565 memory_object_control_t control,
2566 vm_object_offset_t offset,
2567 boolean_t copy,
2568 vm_prot_t cur_protection,
2569 vm_prot_t max_protection,
2570 vm_inherit_t inheritance)
2571 {
2572 vm_map_address_t map_addr;
2573 vm_map_size_t map_size;
2574 vm_object_t object;
2575 vm_object_size_t size;
2576 kern_return_t result;
2577 memory_object_t pager;
2578 vm_prot_t pager_prot;
2579 kern_return_t kr;
2580
2581 /*
2582 * Check arguments for validity
2583 */
2584 if ((target_map == VM_MAP_NULL) ||
2585 (cur_protection & ~VM_PROT_ALL) ||
2586 (max_protection & ~VM_PROT_ALL) ||
2587 (inheritance > VM_INHERIT_LAST_VALID) ||
2588 initial_size == 0)
2589 return KERN_INVALID_ARGUMENT;
2590
2591 map_addr = vm_map_trunc_page(*address);
2592 map_size = vm_map_round_page(initial_size);
2593 size = vm_object_round_page(initial_size);
2594
2595 object = memory_object_control_to_vm_object(control);
2596
2597 if (object == VM_OBJECT_NULL)
2598 return KERN_INVALID_OBJECT;
2599
2600 if (object == kernel_object) {
2601 printf("Warning: Attempt to map kernel object"
2602 " by a non-private kernel entity\n");
2603 return KERN_INVALID_OBJECT;
2604 }
2605
2606 vm_object_lock(object);
2607 object->ref_count++;
2608 vm_object_res_reference(object);
2609
2610 /*
2611 * For "named" VM objects, let the pager know that the
2612 * memory object is being mapped. Some pagers need to keep
2613 * track of this, to know when they can reclaim the memory
2614 * object, for example.
2615 * VM calls memory_object_map() for each mapping (specifying
2616 * the protection of each mapping) and calls
2617 * memory_object_last_unmap() when all the mappings are gone.
2618 */
2619 pager_prot = max_protection;
2620 if (copy) {
2621 pager_prot &= ~VM_PROT_WRITE;
2622 }
2623 pager = object->pager;
2624 if (object->named &&
2625 pager != MEMORY_OBJECT_NULL &&
2626 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2627 assert(object->pager_ready);
2628 vm_object_mapping_wait(object, THREAD_UNINT);
2629 vm_object_mapping_begin(object);
2630 vm_object_unlock(object);
2631
2632 kr = memory_object_map(pager, pager_prot);
2633 assert(kr == KERN_SUCCESS);
2634
2635 vm_object_lock(object);
2636 vm_object_mapping_end(object);
2637 }
2638 vm_object_unlock(object);
2639
2640 /*
2641 * Perform the copy if requested
2642 */
2643
2644 if (copy) {
2645 vm_object_t new_object;
2646 vm_object_offset_t new_offset;
2647
2648 result = vm_object_copy_strategically(object, offset, size,
2649 &new_object, &new_offset,
2650 &copy);
2651
2652
2653 if (result == KERN_MEMORY_RESTART_COPY) {
2654 boolean_t success;
2655 boolean_t src_needs_copy;
2656
2657 /*
2658 * XXX
2659 * We currently ignore src_needs_copy.
2660 * This really is the issue of how to make
2661 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2662 * non-kernel users to use. Solution forthcoming.
2663 * In the meantime, since we don't allow non-kernel
2664 * memory managers to specify symmetric copy,
2665 * we won't run into problems here.
2666 */
2667 new_object = object;
2668 new_offset = offset;
2669 success = vm_object_copy_quickly(&new_object,
2670 new_offset, size,
2671 &src_needs_copy,
2672 &copy);
2673 assert(success);
2674 result = KERN_SUCCESS;
2675 }
2676 /*
2677 * Throw away the reference to the
2678 * original object, as it won't be mapped.
2679 */
2680
2681 vm_object_deallocate(object);
2682
2683 if (result != KERN_SUCCESS)
2684 return result;
2685
2686 object = new_object;
2687 offset = new_offset;
2688 }
2689
2690 result = vm_map_enter(target_map,
2691 &map_addr, map_size,
2692 (vm_map_offset_t)mask,
2693 flags,
2694 object, offset,
2695 copy,
2696 cur_protection, max_protection, inheritance);
2697 if (result != KERN_SUCCESS)
2698 vm_object_deallocate(object);
2699 *address = map_addr;
2700
2701 return result;
2702 }
2703
2704
2705 #if VM_CPM
2706
2707 #ifdef MACH_ASSERT
2708 extern pmap_paddr_t avail_start, avail_end;
2709 #endif
2710
2711 /*
2712 * Allocate memory in the specified map, with the caveat that
2713 * the memory is physically contiguous. This call may fail
2714 * if the system can't find sufficient contiguous memory.
2715 * This call may cause or lead to heart-stopping amounts of
2716 * paging activity.
2717 *
2718 * Memory obtained from this call should be freed in the
2719 * normal way, viz., via vm_deallocate.
2720 */
2721 kern_return_t
2722 vm_map_enter_cpm(
2723 vm_map_t map,
2724 vm_map_offset_t *addr,
2725 vm_map_size_t size,
2726 int flags)
2727 {
2728 vm_object_t cpm_obj;
2729 pmap_t pmap;
2730 vm_page_t m, pages;
2731 kern_return_t kr;
2732 vm_map_offset_t va, start, end, offset;
2733 #if MACH_ASSERT
2734 vm_map_offset_t prev_addr;
2735 #endif /* MACH_ASSERT */
2736
2737 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2738
2739 if (!vm_allocate_cpm_enabled)
2740 return KERN_FAILURE;
2741
2742 if (size == 0) {
2743 *addr = 0;
2744 return KERN_SUCCESS;
2745 }
2746 if (anywhere)
2747 *addr = vm_map_min(map);
2748 else
2749 *addr = vm_map_trunc_page(*addr);
2750 size = vm_map_round_page(size);
2751
2752 /*
2753 * LP64todo - cpm_allocate should probably allow
2754 * allocations of >4GB, but not with the current
2755 * algorithm, so just cast down the size for now.
2756 */
2757 if (size > VM_MAX_ADDRESS)
2758 return KERN_RESOURCE_SHORTAGE;
2759 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2760 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2761 return kr;
2762
2763 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2764 assert(cpm_obj != VM_OBJECT_NULL);
2765 assert(cpm_obj->internal);
2766 assert(cpm_obj->size == (vm_object_size_t)size);
2767 assert(cpm_obj->can_persist == FALSE);
2768 assert(cpm_obj->pager_created == FALSE);
2769 assert(cpm_obj->pageout == FALSE);
2770 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2771
2772 /*
2773 * Insert pages into object.
2774 */
2775
2776 vm_object_lock(cpm_obj);
2777 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2778 m = pages;
2779 pages = NEXT_PAGE(m);
2780 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2781
2782 assert(!m->gobbled);
2783 assert(!m->wanted);
2784 assert(!m->pageout);
2785 assert(!m->tabled);
2786 assert(VM_PAGE_WIRED(m));
2787 /*
2788 * ENCRYPTED SWAP:
2789 * "m" is not supposed to be pageable, so it
2790 * should not be encrypted. It wouldn't be safe
2791 * to enter it in a new VM object while encrypted.
2792 */
2793 ASSERT_PAGE_DECRYPTED(m);
2794 assert(m->busy);
2795 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2796
2797 m->busy = FALSE;
2798 vm_page_insert(m, cpm_obj, offset);
2799 }
2800 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2801 vm_object_unlock(cpm_obj);
2802
2803 /*
2804 * Hang onto a reference on the object in case a
2805 * multi-threaded application for some reason decides
2806 * to deallocate the portion of the address space into
2807 * which we will insert this object.
2808 *
2809 * Unfortunately, we must insert the object now before
2810 * we can talk to the pmap module about which addresses
2811 * must be wired down. Hence, the race with a multi-
2812 * threaded app.
2813 */
2814 vm_object_reference(cpm_obj);
2815
2816 /*
2817 * Insert object into map.
2818 */
2819
2820 kr = vm_map_enter(
2821 map,
2822 addr,
2823 size,
2824 (vm_map_offset_t)0,
2825 flags,
2826 cpm_obj,
2827 (vm_object_offset_t)0,
2828 FALSE,
2829 VM_PROT_ALL,
2830 VM_PROT_ALL,
2831 VM_INHERIT_DEFAULT);
2832
2833 if (kr != KERN_SUCCESS) {
2834 /*
2835 * A CPM object doesn't have can_persist set,
2836 * so all we have to do is deallocate it to
2837 * free up these pages.
2838 */
2839 assert(cpm_obj->pager_created == FALSE);
2840 assert(cpm_obj->can_persist == FALSE);
2841 assert(cpm_obj->pageout == FALSE);
2842 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2843 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2844 vm_object_deallocate(cpm_obj); /* kill creation ref */
2845 }
2846
2847 /*
2848 * Inform the physical mapping system that the
2849 * range of addresses may not fault, so that
2850 * page tables and such can be locked down as well.
2851 */
2852 start = *addr;
2853 end = start + size;
2854 pmap = vm_map_pmap(map);
2855 pmap_pageable(pmap, start, end, FALSE);
2856
2857 /*
2858 * Enter each page into the pmap, to avoid faults.
2859 * Note that this loop could be coded more efficiently,
2860 * if the need arose, rather than looking up each page
2861 * again.
2862 */
2863 for (offset = 0, va = start; offset < size;
2864 va += PAGE_SIZE, offset += PAGE_SIZE) {
2865 int type_of_fault;
2866
2867 vm_object_lock(cpm_obj);
2868 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2869 assert(m != VM_PAGE_NULL);
2870
2871 vm_page_zero_fill(m);
2872
2873 type_of_fault = DBG_ZERO_FILL_FAULT;
2874
2875 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2876 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2877 &type_of_fault);
2878
2879 vm_object_unlock(cpm_obj);
2880 }
2881
2882 #if MACH_ASSERT
2883 /*
2884 * Verify ordering in address space.
2885 */
2886 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2887 vm_object_lock(cpm_obj);
2888 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2889 vm_object_unlock(cpm_obj);
2890 if (m == VM_PAGE_NULL)
2891 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2892 cpm_obj, offset);
2893 assert(m->tabled);
2894 assert(!m->busy);
2895 assert(!m->wanted);
2896 assert(!m->fictitious);
2897 assert(!m->private);
2898 assert(!m->absent);
2899 assert(!m->error);
2900 assert(!m->cleaning);
2901 assert(!m->precious);
2902 assert(!m->clustered);
2903 if (offset != 0) {
2904 if (m->phys_page != prev_addr + 1) {
2905 printf("start 0x%x end 0x%x va 0x%x\n",
2906 start, end, va);
2907 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2908 printf("m 0x%x prev_address 0x%x\n", m,
2909 prev_addr);
2910 panic("vm_allocate_cpm: pages not contig!");
2911 }
2912 }
2913 prev_addr = m->phys_page;
2914 }
2915 #endif /* MACH_ASSERT */
2916
2917 vm_object_deallocate(cpm_obj); /* kill extra ref */
2918
2919 return kr;
2920 }
2921
2922
2923 #else /* VM_CPM */
2924
2925 /*
2926 * Interface is defined in all cases, but unless the kernel
2927 * is built explicitly for this option, the interface does
2928 * nothing.
2929 */
2930
2931 kern_return_t
2932 vm_map_enter_cpm(
2933 __unused vm_map_t map,
2934 __unused vm_map_offset_t *addr,
2935 __unused vm_map_size_t size,
2936 __unused int flags)
2937 {
2938 return KERN_FAILURE;
2939 }
2940 #endif /* VM_CPM */
2941
2942 /* Not used without nested pmaps */
2943 #ifndef NO_NESTED_PMAP
2944 /*
2945 * Clip and unnest a portion of a nested submap mapping.
2946 */
2947
2948
2949 static void
2950 vm_map_clip_unnest(
2951 vm_map_t map,
2952 vm_map_entry_t entry,
2953 vm_map_offset_t start_unnest,
2954 vm_map_offset_t end_unnest)
2955 {
2956 vm_map_offset_t old_start_unnest = start_unnest;
2957 vm_map_offset_t old_end_unnest = end_unnest;
2958
2959 assert(entry->is_sub_map);
2960 assert(entry->object.sub_map != NULL);
2961
2962 /*
2963 * Query the platform for the optimal unnest range.
2964 * DRK: There's some duplication of effort here, since
2965 * callers may have adjusted the range to some extent. This
2966 * routine was introduced to support 1GiB subtree nesting
2967 * for x86 platforms, which can also nest on 2MiB boundaries
2968 * depending on size/alignment.
2969 */
2970 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2971 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2972 }
2973
2974 if (entry->vme_start > start_unnest ||
2975 entry->vme_end < end_unnest) {
2976 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2977 "bad nested entry: start=0x%llx end=0x%llx\n",
2978 (long long)start_unnest, (long long)end_unnest,
2979 (long long)entry->vme_start, (long long)entry->vme_end);
2980 }
2981
2982 if (start_unnest > entry->vme_start) {
2983 _vm_map_clip_start(&map->hdr,
2984 entry,
2985 start_unnest);
2986 vm_map_store_update_first_free(map, map->first_free);
2987 }
2988 if (entry->vme_end > end_unnest) {
2989 _vm_map_clip_end(&map->hdr,
2990 entry,
2991 end_unnest);
2992 vm_map_store_update_first_free(map, map->first_free);
2993 }
2994
2995 pmap_unnest(map->pmap,
2996 entry->vme_start,
2997 entry->vme_end - entry->vme_start);
2998 if ((map->mapped) && (map->ref_count)) {
2999 /* clean up parent map/maps */
3000 vm_map_submap_pmap_clean(
3001 map, entry->vme_start,
3002 entry->vme_end,
3003 entry->object.sub_map,
3004 entry->offset);
3005 }
3006 entry->use_pmap = FALSE;
3007 }
3008 #endif /* NO_NESTED_PMAP */
3009
3010 /*
3011 * vm_map_clip_start: [ internal use only ]
3012 *
3013 * Asserts that the given entry begins at or after
3014 * the specified address; if necessary,
3015 * it splits the entry into two.
3016 */
3017 void
3018 vm_map_clip_start(
3019 vm_map_t map,
3020 vm_map_entry_t entry,
3021 vm_map_offset_t startaddr)
3022 {
3023 #ifndef NO_NESTED_PMAP
3024 if (entry->use_pmap &&
3025 startaddr >= entry->vme_start) {
3026 vm_map_offset_t start_unnest, end_unnest;
3027
3028 /*
3029 * Make sure "startaddr" is no longer in a nested range
3030 * before we clip. Unnest only the minimum range the platform
3031 * can handle.
3032 * vm_map_clip_unnest may perform additional adjustments to
3033 * the unnest range.
3034 */
3035 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3036 end_unnest = start_unnest + pmap_nesting_size_min;
3037 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3038 }
3039 #endif /* NO_NESTED_PMAP */
3040 if (startaddr > entry->vme_start) {
3041 if (entry->object.vm_object &&
3042 !entry->is_sub_map &&
3043 entry->object.vm_object->phys_contiguous) {
3044 pmap_remove(map->pmap,
3045 (addr64_t)(entry->vme_start),
3046 (addr64_t)(entry->vme_end));
3047 }
3048 _vm_map_clip_start(&map->hdr, entry, startaddr);
3049 vm_map_store_update_first_free(map, map->first_free);
3050 }
3051 }
3052
3053
3054 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3055 MACRO_BEGIN \
3056 if ((startaddr) > (entry)->vme_start) \
3057 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3058 MACRO_END
3059
3060 /*
3061 * This routine is called only when it is known that
3062 * the entry must be split.
3063 */
3064 static void
3065 _vm_map_clip_start(
3066 register struct vm_map_header *map_header,
3067 register vm_map_entry_t entry,
3068 register vm_map_offset_t start)
3069 {
3070 register vm_map_entry_t new_entry;
3071
3072 /*
3073 * Split off the front portion --
3074 * note that we must insert the new
3075 * entry BEFORE this one, so that
3076 * this entry has the specified starting
3077 * address.
3078 */
3079
3080 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3081 vm_map_entry_copy_full(new_entry, entry);
3082
3083 new_entry->vme_end = start;
3084 assert(new_entry->vme_start < new_entry->vme_end);
3085 entry->offset += (start - entry->vme_start);
3086 assert(start < entry->vme_end);
3087 entry->vme_start = start;
3088
3089 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3090
3091 if (entry->is_sub_map)
3092 vm_map_reference(new_entry->object.sub_map);
3093 else
3094 vm_object_reference(new_entry->object.vm_object);
3095 }
3096
3097
3098 /*
3099 * vm_map_clip_end: [ internal use only ]
3100 *
3101 * Asserts that the given entry ends at or before
3102 * the specified address; if necessary,
3103 * it splits the entry into two.
3104 */
3105 void
3106 vm_map_clip_end(
3107 vm_map_t map,
3108 vm_map_entry_t entry,
3109 vm_map_offset_t endaddr)
3110 {
3111 if (endaddr > entry->vme_end) {
3112 /*
3113 * Within the scope of this clipping, limit "endaddr" to
3114 * the end of this map entry...
3115 */
3116 endaddr = entry->vme_end;
3117 }
3118 #ifndef NO_NESTED_PMAP
3119 if (entry->use_pmap) {
3120 vm_map_offset_t start_unnest, end_unnest;
3121
3122 /*
3123 * Make sure the range between the start of this entry and
3124 * the new "endaddr" is no longer nested before we clip.
3125 * Unnest only the minimum range the platform can handle.
3126 * vm_map_clip_unnest may perform additional adjustments to
3127 * the unnest range.
3128 */
3129 start_unnest = entry->vme_start;
3130 end_unnest =
3131 (endaddr + pmap_nesting_size_min - 1) &
3132 ~(pmap_nesting_size_min - 1);
3133 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3134 }
3135 #endif /* NO_NESTED_PMAP */
3136 if (endaddr < entry->vme_end) {
3137 if (entry->object.vm_object &&
3138 !entry->is_sub_map &&
3139 entry->object.vm_object->phys_contiguous) {
3140 pmap_remove(map->pmap,
3141 (addr64_t)(entry->vme_start),
3142 (addr64_t)(entry->vme_end));
3143 }
3144 _vm_map_clip_end(&map->hdr, entry, endaddr);
3145 vm_map_store_update_first_free(map, map->first_free);
3146 }
3147 }
3148
3149
3150 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3151 MACRO_BEGIN \
3152 if ((endaddr) < (entry)->vme_end) \
3153 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3154 MACRO_END
3155
3156 /*
3157 * This routine is called only when it is known that
3158 * the entry must be split.
3159 */
3160 static void
3161 _vm_map_clip_end(
3162 register struct vm_map_header *map_header,
3163 register vm_map_entry_t entry,
3164 register vm_map_offset_t end)
3165 {
3166 register vm_map_entry_t new_entry;
3167
3168 /*
3169 * Create a new entry and insert it
3170 * AFTER the specified entry
3171 */
3172
3173 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3174 vm_map_entry_copy_full(new_entry, entry);
3175
3176 assert(entry->vme_start < end);
3177 new_entry->vme_start = entry->vme_end = end;
3178 new_entry->offset += (end - entry->vme_start);
3179 assert(new_entry->vme_start < new_entry->vme_end);
3180
3181 _vm_map_store_entry_link(map_header, entry, new_entry);
3182
3183 if (entry->is_sub_map)
3184 vm_map_reference(new_entry->object.sub_map);
3185 else
3186 vm_object_reference(new_entry->object.vm_object);
3187 }
3188
3189
3190 /*
3191 * VM_MAP_RANGE_CHECK: [ internal use only ]
3192 *
3193 * Asserts that the starting and ending region
3194 * addresses fall within the valid range of the map.
3195 */
3196 #define VM_MAP_RANGE_CHECK(map, start, end) \
3197 MACRO_BEGIN \
3198 if (start < vm_map_min(map)) \
3199 start = vm_map_min(map); \
3200 if (end > vm_map_max(map)) \
3201 end = vm_map_max(map); \
3202 if (start > end) \
3203 start = end; \
3204 MACRO_END
3205
3206 /*
3207 * vm_map_range_check: [ internal use only ]
3208 *
3209 * Check that the region defined by the specified start and
3210 * end addresses are wholly contained within a single map
3211 * entry or set of adjacent map entries of the spacified map,
3212 * i.e. the specified region contains no unmapped space.
3213 * If any or all of the region is unmapped, FALSE is returned.
3214 * Otherwise, TRUE is returned and if the output argument 'entry'
3215 * is not NULL it points to the map entry containing the start
3216 * of the region.
3217 *
3218 * The map is locked for reading on entry and is left locked.
3219 */
3220 static boolean_t
3221 vm_map_range_check(
3222 register vm_map_t map,
3223 register vm_map_offset_t start,
3224 register vm_map_offset_t end,
3225 vm_map_entry_t *entry)
3226 {
3227 vm_map_entry_t cur;
3228 register vm_map_offset_t prev;
3229
3230 /*
3231 * Basic sanity checks first
3232 */
3233 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3234 return (FALSE);
3235
3236 /*
3237 * Check first if the region starts within a valid
3238 * mapping for the map.
3239 */
3240 if (!vm_map_lookup_entry(map, start, &cur))
3241 return (FALSE);
3242
3243 /*
3244 * Optimize for the case that the region is contained
3245 * in a single map entry.
3246 */
3247 if (entry != (vm_map_entry_t *) NULL)
3248 *entry = cur;
3249 if (end <= cur->vme_end)
3250 return (TRUE);
3251
3252 /*
3253 * If the region is not wholly contained within a
3254 * single entry, walk the entries looking for holes.
3255 */
3256 prev = cur->vme_end;
3257 cur = cur->vme_next;
3258 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3259 if (end <= cur->vme_end)
3260 return (TRUE);
3261 prev = cur->vme_end;
3262 cur = cur->vme_next;
3263 }
3264 return (FALSE);
3265 }
3266
3267 /*
3268 * vm_map_submap: [ kernel use only ]
3269 *
3270 * Mark the given range as handled by a subordinate map.
3271 *
3272 * This range must have been created with vm_map_find using
3273 * the vm_submap_object, and no other operations may have been
3274 * performed on this range prior to calling vm_map_submap.
3275 *
3276 * Only a limited number of operations can be performed
3277 * within this rage after calling vm_map_submap:
3278 * vm_fault
3279 * [Don't try vm_map_copyin!]
3280 *
3281 * To remove a submapping, one must first remove the
3282 * range from the superior map, and then destroy the
3283 * submap (if desired). [Better yet, don't try it.]
3284 */
3285 kern_return_t
3286 vm_map_submap(
3287 vm_map_t map,
3288 vm_map_offset_t start,
3289 vm_map_offset_t end,
3290 vm_map_t submap,
3291 vm_map_offset_t offset,
3292 #ifdef NO_NESTED_PMAP
3293 __unused
3294 #endif /* NO_NESTED_PMAP */
3295 boolean_t use_pmap)
3296 {
3297 vm_map_entry_t entry;
3298 register kern_return_t result = KERN_INVALID_ARGUMENT;
3299 register vm_object_t object;
3300
3301 vm_map_lock(map);
3302
3303 if (! vm_map_lookup_entry(map, start, &entry)) {
3304 entry = entry->vme_next;
3305 }
3306
3307 if (entry == vm_map_to_entry(map) ||
3308 entry->is_sub_map) {
3309 vm_map_unlock(map);
3310 return KERN_INVALID_ARGUMENT;
3311 }
3312
3313 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3314 vm_map_clip_start(map, entry, start);
3315 vm_map_clip_end(map, entry, end);
3316
3317 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3318 (!entry->is_sub_map) &&
3319 ((object = entry->object.vm_object) == vm_submap_object) &&
3320 (object->resident_page_count == 0) &&
3321 (object->copy == VM_OBJECT_NULL) &&
3322 (object->shadow == VM_OBJECT_NULL) &&
3323 (!object->pager_created)) {
3324 entry->offset = (vm_object_offset_t)offset;
3325 entry->object.vm_object = VM_OBJECT_NULL;
3326 vm_object_deallocate(object);
3327 entry->is_sub_map = TRUE;
3328 entry->object.sub_map = submap;
3329 vm_map_reference(submap);
3330 submap->mapped = TRUE;
3331
3332 #ifndef NO_NESTED_PMAP
3333 if (use_pmap) {
3334 /* nest if platform code will allow */
3335 if(submap->pmap == NULL) {
3336 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3337 if(submap->pmap == PMAP_NULL) {
3338 vm_map_unlock(map);
3339 return(KERN_NO_SPACE);
3340 }
3341 }
3342 result = pmap_nest(map->pmap,
3343 (entry->object.sub_map)->pmap,
3344 (addr64_t)start,
3345 (addr64_t)start,
3346 (uint64_t)(end - start));
3347 if(result)
3348 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3349 entry->use_pmap = TRUE;
3350 }
3351 #else /* NO_NESTED_PMAP */
3352 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3353 #endif /* NO_NESTED_PMAP */
3354 result = KERN_SUCCESS;
3355 }
3356 vm_map_unlock(map);
3357
3358 return(result);
3359 }
3360
3361 /*
3362 * vm_map_protect:
3363 *
3364 * Sets the protection of the specified address
3365 * region in the target map. If "set_max" is
3366 * specified, the maximum protection is to be set;
3367 * otherwise, only the current protection is affected.
3368 */
3369 kern_return_t
3370 vm_map_protect(
3371 register vm_map_t map,
3372 register vm_map_offset_t start,
3373 register vm_map_offset_t end,
3374 register vm_prot_t new_prot,
3375 register boolean_t set_max)
3376 {
3377 register vm_map_entry_t current;
3378 register vm_map_offset_t prev;
3379 vm_map_entry_t entry;
3380 vm_prot_t new_max;
3381
3382 XPR(XPR_VM_MAP,
3383 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3384 map, start, end, new_prot, set_max);
3385
3386 vm_map_lock(map);
3387
3388 /* LP64todo - remove this check when vm_map_commpage64()
3389 * no longer has to stuff in a map_entry for the commpage
3390 * above the map's max_offset.
3391 */
3392 if (start >= map->max_offset) {
3393 vm_map_unlock(map);
3394 return(KERN_INVALID_ADDRESS);
3395 }
3396
3397 while(1) {
3398 /*
3399 * Lookup the entry. If it doesn't start in a valid
3400 * entry, return an error.
3401 */
3402 if (! vm_map_lookup_entry(map, start, &entry)) {
3403 vm_map_unlock(map);
3404 return(KERN_INVALID_ADDRESS);
3405 }
3406
3407 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3408 start = SUPERPAGE_ROUND_DOWN(start);
3409 continue;
3410 }
3411 break;
3412 }
3413 if (entry->superpage_size)
3414 end = SUPERPAGE_ROUND_UP(end);
3415
3416 /*
3417 * Make a first pass to check for protection and address
3418 * violations.
3419 */
3420
3421 current = entry;
3422 prev = current->vme_start;
3423 while ((current != vm_map_to_entry(map)) &&
3424 (current->vme_start < end)) {
3425
3426 /*
3427 * If there is a hole, return an error.
3428 */
3429 if (current->vme_start != prev) {
3430 vm_map_unlock(map);
3431 return(KERN_INVALID_ADDRESS);
3432 }
3433
3434 new_max = current->max_protection;
3435 if(new_prot & VM_PROT_COPY) {
3436 new_max |= VM_PROT_WRITE;
3437 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3438 vm_map_unlock(map);
3439 return(KERN_PROTECTION_FAILURE);
3440 }
3441 } else {
3442 if ((new_prot & new_max) != new_prot) {
3443 vm_map_unlock(map);
3444 return(KERN_PROTECTION_FAILURE);
3445 }
3446 }
3447
3448 #if CONFIG_EMBEDDED
3449 if (new_prot & VM_PROT_WRITE) {
3450 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3451 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3452 new_prot &= ~VM_PROT_EXECUTE;
3453 }
3454 }
3455 #endif
3456
3457 prev = current->vme_end;
3458 current = current->vme_next;
3459 }
3460 if (end > prev) {
3461 vm_map_unlock(map);
3462 return(KERN_INVALID_ADDRESS);
3463 }
3464
3465 /*
3466 * Go back and fix up protections.
3467 * Clip to start here if the range starts within
3468 * the entry.
3469 */
3470
3471 current = entry;
3472 if (current != vm_map_to_entry(map)) {
3473 /* clip and unnest if necessary */
3474 vm_map_clip_start(map, current, start);
3475 }
3476
3477 while ((current != vm_map_to_entry(map)) &&
3478 (current->vme_start < end)) {
3479
3480 vm_prot_t old_prot;
3481
3482 vm_map_clip_end(map, current, end);
3483
3484 assert(!current->use_pmap); /* clipping did unnest if needed */
3485
3486 old_prot = current->protection;
3487
3488 if(new_prot & VM_PROT_COPY) {
3489 /* caller is asking specifically to copy the */
3490 /* mapped data, this implies that max protection */
3491 /* will include write. Caller must be prepared */
3492 /* for loss of shared memory communication in the */
3493 /* target area after taking this step */
3494
3495 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3496 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3497 current->offset = 0;
3498 }
3499 current->needs_copy = TRUE;
3500 current->max_protection |= VM_PROT_WRITE;
3501 }
3502
3503 if (set_max)
3504 current->protection =
3505 (current->max_protection =
3506 new_prot & ~VM_PROT_COPY) &
3507 old_prot;
3508 else
3509 current->protection = new_prot & ~VM_PROT_COPY;
3510
3511 /*
3512 * Update physical map if necessary.
3513 * If the request is to turn off write protection,
3514 * we won't do it for real (in pmap). This is because
3515 * it would cause copy-on-write to fail. We've already
3516 * set, the new protection in the map, so if a
3517 * write-protect fault occurred, it will be fixed up
3518 * properly, COW or not.
3519 */
3520 if (current->protection != old_prot) {
3521 /* Look one level in we support nested pmaps */
3522 /* from mapped submaps which are direct entries */
3523 /* in our map */
3524
3525 vm_prot_t prot;
3526
3527 prot = current->protection & ~VM_PROT_WRITE;
3528
3529 if (override_nx(map, current->alias) && prot)
3530 prot |= VM_PROT_EXECUTE;
3531
3532 if (current->is_sub_map && current->use_pmap) {
3533 pmap_protect(current->object.sub_map->pmap,
3534 current->vme_start,
3535 current->vme_end,
3536 prot);
3537 } else {
3538 pmap_protect(map->pmap,
3539 current->vme_start,
3540 current->vme_end,
3541 prot);
3542 }
3543 }
3544 current = current->vme_next;
3545 }
3546
3547 current = entry;
3548 while ((current != vm_map_to_entry(map)) &&
3549 (current->vme_start <= end)) {
3550 vm_map_simplify_entry(map, current);
3551 current = current->vme_next;
3552 }
3553
3554 vm_map_unlock(map);
3555 return(KERN_SUCCESS);
3556 }
3557
3558 /*
3559 * vm_map_inherit:
3560 *
3561 * Sets the inheritance of the specified address
3562 * range in the target map. Inheritance
3563 * affects how the map will be shared with
3564 * child maps at the time of vm_map_fork.
3565 */
3566 kern_return_t
3567 vm_map_inherit(
3568 register vm_map_t map,
3569 register vm_map_offset_t start,
3570 register vm_map_offset_t end,
3571 register vm_inherit_t new_inheritance)
3572 {
3573 register vm_map_entry_t entry;
3574 vm_map_entry_t temp_entry;
3575
3576 vm_map_lock(map);
3577
3578 VM_MAP_RANGE_CHECK(map, start, end);
3579
3580 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3581 entry = temp_entry;
3582 }
3583 else {
3584 temp_entry = temp_entry->vme_next;
3585 entry = temp_entry;
3586 }
3587
3588 /* first check entire range for submaps which can't support the */
3589 /* given inheritance. */
3590 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3591 if(entry->is_sub_map) {
3592 if(new_inheritance == VM_INHERIT_COPY) {
3593 vm_map_unlock(map);
3594 return(KERN_INVALID_ARGUMENT);
3595 }
3596 }
3597
3598 entry = entry->vme_next;
3599 }
3600
3601 entry = temp_entry;
3602 if (entry != vm_map_to_entry(map)) {
3603 /* clip and unnest if necessary */
3604 vm_map_clip_start(map, entry, start);
3605 }
3606
3607 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3608 vm_map_clip_end(map, entry, end);
3609 assert(!entry->use_pmap); /* clip did unnest if needed */
3610
3611 entry->inheritance = new_inheritance;
3612
3613 entry = entry->vme_next;
3614 }
3615
3616 vm_map_unlock(map);
3617 return(KERN_SUCCESS);
3618 }
3619
3620 /*
3621 * Update the accounting for the amount of wired memory in this map. If the user has
3622 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3623 */
3624
3625 static kern_return_t
3626 add_wire_counts(
3627 vm_map_t map,
3628 vm_map_entry_t entry,
3629 boolean_t user_wire)
3630 {
3631 vm_map_size_t size;
3632
3633 if (user_wire) {
3634 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
3635
3636 /*
3637 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3638 * this map entry.
3639 */
3640
3641 if (entry->user_wired_count == 0) {
3642 size = entry->vme_end - entry->vme_start;
3643
3644 /*
3645 * Since this is the first time the user is wiring this map entry, check to see if we're
3646 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3647 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3648 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3649 * limit, then we fail.
3650 */
3651
3652 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3653 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3654 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3655 return KERN_RESOURCE_SHORTAGE;
3656
3657 /*
3658 * The first time the user wires an entry, we also increment the wired_count and add this to
3659 * the total that has been wired in the map.
3660 */
3661
3662 if (entry->wired_count >= MAX_WIRE_COUNT)
3663 return KERN_FAILURE;
3664
3665 entry->wired_count++;
3666 map->user_wire_size += size;
3667 }
3668
3669 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3670 return KERN_FAILURE;
3671
3672 entry->user_wired_count++;
3673
3674 } else {
3675
3676 /*
3677 * The kernel's wiring the memory. Just bump the count and continue.
3678 */
3679
3680 if (entry->wired_count >= MAX_WIRE_COUNT)
3681 panic("vm_map_wire: too many wirings");
3682
3683 entry->wired_count++;
3684 }
3685
3686 return KERN_SUCCESS;
3687 }
3688
3689 /*
3690 * Update the memory wiring accounting now that the given map entry is being unwired.
3691 */
3692
3693 static void
3694 subtract_wire_counts(
3695 vm_map_t map,
3696 vm_map_entry_t entry,
3697 boolean_t user_wire)
3698 {
3699
3700 if (user_wire) {
3701
3702 /*
3703 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3704 */
3705
3706 if (entry->user_wired_count == 1) {
3707
3708 /*
3709 * We're removing the last user wire reference. Decrement the wired_count and the total
3710 * user wired memory for this map.
3711 */
3712
3713 assert(entry->wired_count >= 1);
3714 entry->wired_count--;
3715 map->user_wire_size -= entry->vme_end - entry->vme_start;
3716 }
3717
3718 assert(entry->user_wired_count >= 1);
3719 entry->user_wired_count--;
3720
3721 } else {
3722
3723 /*
3724 * The kernel is unwiring the memory. Just update the count.
3725 */
3726
3727 assert(entry->wired_count >= 1);
3728 entry->wired_count--;
3729 }
3730 }
3731
3732 /*
3733 * vm_map_wire:
3734 *
3735 * Sets the pageability of the specified address range in the
3736 * target map as wired. Regions specified as not pageable require
3737 * locked-down physical memory and physical page maps. The
3738 * access_type variable indicates types of accesses that must not
3739 * generate page faults. This is checked against protection of
3740 * memory being locked-down.
3741 *
3742 * The map must not be locked, but a reference must remain to the
3743 * map throughout the call.
3744 */
3745 static kern_return_t
3746 vm_map_wire_nested(
3747 register vm_map_t map,
3748 register vm_map_offset_t start,
3749 register vm_map_offset_t end,
3750 register vm_prot_t access_type,
3751 boolean_t user_wire,
3752 pmap_t map_pmap,
3753 vm_map_offset_t pmap_addr)
3754 {
3755 register vm_map_entry_t entry;
3756 struct vm_map_entry *first_entry, tmp_entry;
3757 vm_map_t real_map;
3758 register vm_map_offset_t s,e;
3759 kern_return_t rc;
3760 boolean_t need_wakeup;
3761 boolean_t main_map = FALSE;
3762 wait_interrupt_t interruptible_state;
3763 thread_t cur_thread;
3764 unsigned int last_timestamp;
3765 vm_map_size_t size;
3766
3767 vm_map_lock(map);
3768 if(map_pmap == NULL)
3769 main_map = TRUE;
3770 last_timestamp = map->timestamp;
3771
3772 VM_MAP_RANGE_CHECK(map, start, end);
3773 assert(page_aligned(start));
3774 assert(page_aligned(end));
3775 if (start == end) {
3776 /* We wired what the caller asked for, zero pages */
3777 vm_map_unlock(map);
3778 return KERN_SUCCESS;
3779 }
3780
3781 need_wakeup = FALSE;
3782 cur_thread = current_thread();
3783
3784 s = start;
3785 rc = KERN_SUCCESS;
3786
3787 if (vm_map_lookup_entry(map, s, &first_entry)) {
3788 entry = first_entry;
3789 /*
3790 * vm_map_clip_start will be done later.
3791 * We don't want to unnest any nested submaps here !
3792 */
3793 } else {
3794 /* Start address is not in map */
3795 rc = KERN_INVALID_ADDRESS;
3796 goto done;
3797 }
3798
3799 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3800 /*
3801 * At this point, we have wired from "start" to "s".
3802 * We still need to wire from "s" to "end".
3803 *
3804 * "entry" hasn't been clipped, so it could start before "s"
3805 * and/or end after "end".
3806 */
3807
3808 /* "e" is how far we want to wire in this entry */
3809 e = entry->vme_end;
3810 if (e > end)
3811 e = end;
3812
3813 /*
3814 * If another thread is wiring/unwiring this entry then
3815 * block after informing other thread to wake us up.
3816 */
3817 if (entry->in_transition) {
3818 wait_result_t wait_result;
3819
3820 /*
3821 * We have not clipped the entry. Make sure that
3822 * the start address is in range so that the lookup
3823 * below will succeed.
3824 * "s" is the current starting point: we've already
3825 * wired from "start" to "s" and we still have
3826 * to wire from "s" to "end".
3827 */
3828
3829 entry->needs_wakeup = TRUE;
3830
3831 /*
3832 * wake up anybody waiting on entries that we have
3833 * already wired.
3834 */
3835 if (need_wakeup) {
3836 vm_map_entry_wakeup(map);
3837 need_wakeup = FALSE;
3838 }
3839 /*
3840 * User wiring is interruptible
3841 */
3842 wait_result = vm_map_entry_wait(map,
3843 (user_wire) ? THREAD_ABORTSAFE :
3844 THREAD_UNINT);
3845 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3846 /*
3847 * undo the wirings we have done so far
3848 * We do not clear the needs_wakeup flag,
3849 * because we cannot tell if we were the
3850 * only one waiting.
3851 */
3852 rc = KERN_FAILURE;
3853 goto done;
3854 }
3855
3856 /*
3857 * Cannot avoid a lookup here. reset timestamp.
3858 */
3859 last_timestamp = map->timestamp;
3860
3861 /*
3862 * The entry could have been clipped, look it up again.
3863 * Worse that can happen is, it may not exist anymore.
3864 */
3865 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3866 /*
3867 * User: undo everything upto the previous
3868 * entry. let vm_map_unwire worry about
3869 * checking the validity of the range.
3870 */
3871 rc = KERN_FAILURE;
3872 goto done;
3873 }
3874 entry = first_entry;
3875 continue;
3876 }
3877
3878 if (entry->is_sub_map) {
3879 vm_map_offset_t sub_start;
3880 vm_map_offset_t sub_end;
3881 vm_map_offset_t local_start;
3882 vm_map_offset_t local_end;
3883 pmap_t pmap;
3884
3885 vm_map_clip_start(map, entry, s);
3886 vm_map_clip_end(map, entry, end);
3887
3888 sub_start = entry->offset;
3889 sub_end = entry->vme_end;
3890 sub_end += entry->offset - entry->vme_start;
3891
3892 local_end = entry->vme_end;
3893 if(map_pmap == NULL) {
3894 vm_object_t object;
3895 vm_object_offset_t offset;
3896 vm_prot_t prot;
3897 boolean_t wired;
3898 vm_map_entry_t local_entry;
3899 vm_map_version_t version;
3900 vm_map_t lookup_map;
3901
3902 if(entry->use_pmap) {
3903 pmap = entry->object.sub_map->pmap;
3904 /* ppc implementation requires that */
3905 /* submaps pmap address ranges line */
3906 /* up with parent map */
3907 #ifdef notdef
3908 pmap_addr = sub_start;
3909 #endif
3910 pmap_addr = s;
3911 } else {
3912 pmap = map->pmap;
3913 pmap_addr = s;
3914 }
3915
3916 if (entry->wired_count) {
3917 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3918 goto done;
3919
3920 /*
3921 * The map was not unlocked:
3922 * no need to goto re-lookup.
3923 * Just go directly to next entry.
3924 */
3925 entry = entry->vme_next;
3926 s = entry->vme_start;
3927 continue;
3928
3929 }
3930
3931 /* call vm_map_lookup_locked to */
3932 /* cause any needs copy to be */
3933 /* evaluated */
3934 local_start = entry->vme_start;
3935 lookup_map = map;
3936 vm_map_lock_write_to_read(map);
3937 if(vm_map_lookup_locked(
3938 &lookup_map, local_start,
3939 access_type,
3940 OBJECT_LOCK_EXCLUSIVE,
3941 &version, &object,
3942 &offset, &prot, &wired,
3943 NULL,
3944 &real_map)) {
3945
3946 vm_map_unlock_read(lookup_map);
3947 vm_map_unwire(map, start,
3948 s, user_wire);
3949 return(KERN_FAILURE);
3950 }
3951 if(real_map != lookup_map)
3952 vm_map_unlock(real_map);
3953 vm_map_unlock_read(lookup_map);
3954 vm_map_lock(map);
3955 vm_object_unlock(object);
3956
3957 /* we unlocked, so must re-lookup */
3958 if (!vm_map_lookup_entry(map,
3959 local_start,
3960 &local_entry)) {
3961 rc = KERN_FAILURE;
3962 goto done;
3963 }
3964
3965 /*
3966 * entry could have been "simplified",
3967 * so re-clip
3968 */
3969 entry = local_entry;
3970 assert(s == local_start);
3971 vm_map_clip_start(map, entry, s);
3972 vm_map_clip_end(map, entry, end);
3973 /* re-compute "e" */
3974 e = entry->vme_end;
3975 if (e > end)
3976 e = end;
3977
3978 /* did we have a change of type? */
3979 if (!entry->is_sub_map) {
3980 last_timestamp = map->timestamp;
3981 continue;
3982 }
3983 } else {
3984 local_start = entry->vme_start;
3985 pmap = map_pmap;
3986 }
3987
3988 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3989 goto done;
3990
3991 entry->in_transition = TRUE;
3992
3993 vm_map_unlock(map);
3994 rc = vm_map_wire_nested(entry->object.sub_map,
3995 sub_start, sub_end,
3996 access_type,
3997 user_wire, pmap, pmap_addr);
3998 vm_map_lock(map);
3999
4000 /*
4001 * Find the entry again. It could have been clipped
4002 * after we unlocked the map.
4003 */
4004 if (!vm_map_lookup_entry(map, local_start,
4005 &first_entry))
4006 panic("vm_map_wire: re-lookup failed");
4007 entry = first_entry;
4008
4009 assert(local_start == s);
4010 /* re-compute "e" */
4011 e = entry->vme_end;
4012 if (e > end)
4013 e = end;
4014
4015 last_timestamp = map->timestamp;
4016 while ((entry != vm_map_to_entry(map)) &&
4017 (entry->vme_start < e)) {
4018 assert(entry->in_transition);
4019 entry->in_transition = FALSE;
4020 if (entry->needs_wakeup) {
4021 entry->needs_wakeup = FALSE;
4022 need_wakeup = TRUE;
4023 }
4024 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4025 subtract_wire_counts(map, entry, user_wire);
4026 }
4027 entry = entry->vme_next;
4028 }
4029 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4030 goto done;
4031 }
4032
4033 /* no need to relookup again */
4034 s = entry->vme_start;
4035 continue;
4036 }
4037
4038 /*
4039 * If this entry is already wired then increment
4040 * the appropriate wire reference count.
4041 */
4042 if (entry->wired_count) {
4043 /*
4044 * entry is already wired down, get our reference
4045 * after clipping to our range.
4046 */
4047 vm_map_clip_start(map, entry, s);
4048 vm_map_clip_end(map, entry, end);
4049
4050 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4051 goto done;
4052
4053 /* map was not unlocked: no need to relookup */
4054 entry = entry->vme_next;
4055 s = entry->vme_start;
4056 continue;
4057 }
4058
4059 /*
4060 * Unwired entry or wire request transmitted via submap
4061 */
4062
4063
4064 /*
4065 * Perform actions of vm_map_lookup that need the write
4066 * lock on the map: create a shadow object for a
4067 * copy-on-write region, or an object for a zero-fill
4068 * region.
4069 */
4070 size = entry->vme_end - entry->vme_start;
4071 /*
4072 * If wiring a copy-on-write page, we need to copy it now
4073 * even if we're only (currently) requesting read access.
4074 * This is aggressive, but once it's wired we can't move it.
4075 */
4076 if (entry->needs_copy) {
4077 vm_object_shadow(&entry->object.vm_object,
4078 &entry->offset, size);
4079 entry->needs_copy = FALSE;
4080 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4081 entry->object.vm_object = vm_object_allocate(size);
4082 entry->offset = (vm_object_offset_t)0;
4083 }
4084
4085 vm_map_clip_start(map, entry, s);
4086 vm_map_clip_end(map, entry, end);
4087
4088 /* re-compute "e" */
4089 e = entry->vme_end;
4090 if (e > end)
4091 e = end;
4092
4093 /*
4094 * Check for holes and protection mismatch.
4095 * Holes: Next entry should be contiguous unless this
4096 * is the end of the region.
4097 * Protection: Access requested must be allowed, unless
4098 * wiring is by protection class
4099 */
4100 if ((entry->vme_end < end) &&
4101 ((entry->vme_next == vm_map_to_entry(map)) ||
4102 (entry->vme_next->vme_start > entry->vme_end))) {
4103 /* found a hole */
4104 rc = KERN_INVALID_ADDRESS;
4105 goto done;
4106 }
4107 if ((entry->protection & access_type) != access_type) {
4108 /* found a protection problem */
4109 rc = KERN_PROTECTION_FAILURE;
4110 goto done;
4111 }
4112
4113 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4114
4115 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4116 goto done;
4117
4118 entry->in_transition = TRUE;
4119
4120 /*
4121 * This entry might get split once we unlock the map.
4122 * In vm_fault_wire(), we need the current range as
4123 * defined by this entry. In order for this to work
4124 * along with a simultaneous clip operation, we make a
4125 * temporary copy of this entry and use that for the
4126 * wiring. Note that the underlying objects do not
4127 * change during a clip.
4128 */
4129 tmp_entry = *entry;
4130
4131 /*
4132 * The in_transition state guarentees that the entry
4133 * (or entries for this range, if split occured) will be
4134 * there when the map lock is acquired for the second time.
4135 */
4136 vm_map_unlock(map);
4137
4138 if (!user_wire && cur_thread != THREAD_NULL)
4139 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4140 else
4141 interruptible_state = THREAD_UNINT;
4142
4143 if(map_pmap)
4144 rc = vm_fault_wire(map,
4145 &tmp_entry, map_pmap, pmap_addr);
4146 else
4147 rc = vm_fault_wire(map,
4148 &tmp_entry, map->pmap,
4149 tmp_entry.vme_start);
4150
4151 if (!user_wire && cur_thread != THREAD_NULL)
4152 thread_interrupt_level(interruptible_state);
4153
4154 vm_map_lock(map);
4155
4156 if (last_timestamp+1 != map->timestamp) {
4157 /*
4158 * Find the entry again. It could have been clipped
4159 * after we unlocked the map.
4160 */
4161 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4162 &first_entry))
4163 panic("vm_map_wire: re-lookup failed");
4164
4165 entry = first_entry;
4166 }
4167
4168 last_timestamp = map->timestamp;
4169
4170 while ((entry != vm_map_to_entry(map)) &&
4171 (entry->vme_start < tmp_entry.vme_end)) {
4172 assert(entry->in_transition);
4173 entry->in_transition = FALSE;
4174 if (entry->needs_wakeup) {
4175 entry->needs_wakeup = FALSE;
4176 need_wakeup = TRUE;
4177 }
4178 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4179 subtract_wire_counts(map, entry, user_wire);
4180 }
4181 entry = entry->vme_next;
4182 }
4183
4184 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4185 goto done;
4186 }
4187
4188 s = entry->vme_start;
4189 } /* end while loop through map entries */
4190
4191 done:
4192 if (rc == KERN_SUCCESS) {
4193 /* repair any damage we may have made to the VM map */
4194 vm_map_simplify_range(map, start, end);
4195 }
4196
4197 vm_map_unlock(map);
4198
4199 /*
4200 * wake up anybody waiting on entries we wired.
4201 */
4202 if (need_wakeup)
4203 vm_map_entry_wakeup(map);
4204
4205 if (rc != KERN_SUCCESS) {
4206 /* undo what has been wired so far */
4207 vm_map_unwire(map, start, s, user_wire);
4208 }
4209
4210 return rc;
4211
4212 }
4213
4214 kern_return_t
4215 vm_map_wire(
4216 register vm_map_t map,
4217 register vm_map_offset_t start,
4218 register vm_map_offset_t end,
4219 register vm_prot_t access_type,
4220 boolean_t user_wire)
4221 {
4222
4223 kern_return_t kret;
4224
4225 kret = vm_map_wire_nested(map, start, end, access_type,
4226 user_wire, (pmap_t)NULL, 0);
4227 return kret;
4228 }
4229
4230 /*
4231 * vm_map_unwire:
4232 *
4233 * Sets the pageability of the specified address range in the target
4234 * as pageable. Regions specified must have been wired previously.
4235 *
4236 * The map must not be locked, but a reference must remain to the map
4237 * throughout the call.
4238 *
4239 * Kernel will panic on failures. User unwire ignores holes and
4240 * unwired and intransition entries to avoid losing memory by leaving
4241 * it unwired.
4242 */
4243 static kern_return_t
4244 vm_map_unwire_nested(
4245 register vm_map_t map,
4246 register vm_map_offset_t start,
4247 register vm_map_offset_t end,
4248 boolean_t user_wire,
4249 pmap_t map_pmap,
4250 vm_map_offset_t pmap_addr)
4251 {
4252 register vm_map_entry_t entry;
4253 struct vm_map_entry *first_entry, tmp_entry;
4254 boolean_t need_wakeup;
4255 boolean_t main_map = FALSE;
4256 unsigned int last_timestamp;
4257
4258 vm_map_lock(map);
4259 if(map_pmap == NULL)
4260 main_map = TRUE;
4261 last_timestamp = map->timestamp;
4262
4263 VM_MAP_RANGE_CHECK(map, start, end);
4264 assert(page_aligned(start));
4265 assert(page_aligned(end));
4266
4267 if (start == end) {
4268 /* We unwired what the caller asked for: zero pages */
4269 vm_map_unlock(map);
4270 return KERN_SUCCESS;
4271 }
4272
4273 if (vm_map_lookup_entry(map, start, &first_entry)) {
4274 entry = first_entry;
4275 /*
4276 * vm_map_clip_start will be done later.
4277 * We don't want to unnest any nested sub maps here !
4278 */
4279 }
4280 else {
4281 if (!user_wire) {
4282 panic("vm_map_unwire: start not found");
4283 }
4284 /* Start address is not in map. */
4285 vm_map_unlock(map);
4286 return(KERN_INVALID_ADDRESS);
4287 }
4288
4289 if (entry->superpage_size) {
4290 /* superpages are always wired */
4291 vm_map_unlock(map);
4292 return KERN_INVALID_ADDRESS;
4293 }
4294
4295 need_wakeup = FALSE;
4296 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4297 if (entry->in_transition) {
4298 /*
4299 * 1)
4300 * Another thread is wiring down this entry. Note
4301 * that if it is not for the other thread we would
4302 * be unwiring an unwired entry. This is not
4303 * permitted. If we wait, we will be unwiring memory
4304 * we did not wire.
4305 *
4306 * 2)
4307 * Another thread is unwiring this entry. We did not
4308 * have a reference to it, because if we did, this
4309 * entry will not be getting unwired now.
4310 */
4311 if (!user_wire) {
4312 /*
4313 * XXX FBDP
4314 * This could happen: there could be some
4315 * overlapping vslock/vsunlock operations
4316 * going on.
4317 * We should probably just wait and retry,
4318 * but then we have to be careful that this
4319 * entry could get "simplified" after
4320 * "in_transition" gets unset and before
4321 * we re-lookup the entry, so we would
4322 * have to re-clip the entry to avoid
4323 * re-unwiring what we have already unwired...
4324 * See vm_map_wire_nested().
4325 *
4326 * Or we could just ignore "in_transition"
4327 * here and proceed to decement the wired
4328 * count(s) on this entry. That should be fine
4329 * as long as "wired_count" doesn't drop all
4330 * the way to 0 (and we should panic if THAT
4331 * happens).
4332 */
4333 panic("vm_map_unwire: in_transition entry");
4334 }
4335
4336 entry = entry->vme_next;
4337 continue;
4338 }
4339
4340 if (entry->is_sub_map) {
4341 vm_map_offset_t sub_start;
4342 vm_map_offset_t sub_end;
4343 vm_map_offset_t local_end;
4344 pmap_t pmap;
4345
4346 vm_map_clip_start(map, entry, start);
4347 vm_map_clip_end(map, entry, end);
4348
4349 sub_start = entry->offset;
4350 sub_end = entry->vme_end - entry->vme_start;
4351 sub_end += entry->offset;
4352 local_end = entry->vme_end;
4353 if(map_pmap == NULL) {
4354 if(entry->use_pmap) {
4355 pmap = entry->object.sub_map->pmap;
4356 pmap_addr = sub_start;
4357 } else {
4358 pmap = map->pmap;
4359 pmap_addr = start;
4360 }
4361 if (entry->wired_count == 0 ||
4362 (user_wire && entry->user_wired_count == 0)) {
4363 if (!user_wire)
4364 panic("vm_map_unwire: entry is unwired");
4365 entry = entry->vme_next;
4366 continue;
4367 }
4368
4369 /*
4370 * Check for holes
4371 * Holes: Next entry should be contiguous unless
4372 * this is the end of the region.
4373 */
4374 if (((entry->vme_end < end) &&
4375 ((entry->vme_next == vm_map_to_entry(map)) ||
4376 (entry->vme_next->vme_start
4377 > entry->vme_end)))) {
4378 if (!user_wire)
4379 panic("vm_map_unwire: non-contiguous region");
4380 /*
4381 entry = entry->vme_next;
4382 continue;
4383 */
4384 }
4385
4386 subtract_wire_counts(map, entry, user_wire);
4387
4388 if (entry->wired_count != 0) {
4389 entry = entry->vme_next;
4390 continue;
4391 }
4392
4393 entry->in_transition = TRUE;
4394 tmp_entry = *entry;/* see comment in vm_map_wire() */
4395
4396 /*
4397 * We can unlock the map now. The in_transition state
4398 * guarantees existance of the entry.
4399 */
4400 vm_map_unlock(map);
4401 vm_map_unwire_nested(entry->object.sub_map,
4402 sub_start, sub_end, user_wire, pmap, pmap_addr);
4403 vm_map_lock(map);
4404
4405 if (last_timestamp+1 != map->timestamp) {
4406 /*
4407 * Find the entry again. It could have been
4408 * clipped or deleted after we unlocked the map.
4409 */
4410 if (!vm_map_lookup_entry(map,
4411 tmp_entry.vme_start,
4412 &first_entry)) {
4413 if (!user_wire)
4414 panic("vm_map_unwire: re-lookup failed");
4415 entry = first_entry->vme_next;
4416 } else
4417 entry = first_entry;
4418 }
4419 last_timestamp = map->timestamp;
4420
4421 /*
4422 * clear transition bit for all constituent entries
4423 * that were in the original entry (saved in
4424 * tmp_entry). Also check for waiters.
4425 */
4426 while ((entry != vm_map_to_entry(map)) &&
4427 (entry->vme_start < tmp_entry.vme_end)) {
4428 assert(entry->in_transition);
4429 entry->in_transition = FALSE;
4430 if (entry->needs_wakeup) {
4431 entry->needs_wakeup = FALSE;
4432 need_wakeup = TRUE;
4433 }
4434 entry = entry->vme_next;
4435 }
4436 continue;
4437 } else {
4438 vm_map_unlock(map);
4439 vm_map_unwire_nested(entry->object.sub_map,
4440 sub_start, sub_end, user_wire, map_pmap,
4441 pmap_addr);
4442 vm_map_lock(map);
4443
4444 if (last_timestamp+1 != map->timestamp) {
4445 /*
4446 * Find the entry again. It could have been
4447 * clipped or deleted after we unlocked the map.
4448 */
4449 if (!vm_map_lookup_entry(map,
4450 tmp_entry.vme_start,
4451 &first_entry)) {
4452 if (!user_wire)
4453 panic("vm_map_unwire: re-lookup failed");
4454 entry = first_entry->vme_next;
4455 } else
4456 entry = first_entry;
4457 }
4458 last_timestamp = map->timestamp;
4459 }
4460 }
4461
4462
4463 if ((entry->wired_count == 0) ||
4464 (user_wire && entry->user_wired_count == 0)) {
4465 if (!user_wire)
4466 panic("vm_map_unwire: entry is unwired");
4467
4468 entry = entry->vme_next;
4469 continue;
4470 }
4471
4472 assert(entry->wired_count > 0 &&
4473 (!user_wire || entry->user_wired_count > 0));
4474
4475 vm_map_clip_start(map, entry, start);
4476 vm_map_clip_end(map, entry, end);
4477
4478 /*
4479 * Check for holes
4480 * Holes: Next entry should be contiguous unless
4481 * this is the end of the region.
4482 */
4483 if (((entry->vme_end < end) &&
4484 ((entry->vme_next == vm_map_to_entry(map)) ||
4485 (entry->vme_next->vme_start > entry->vme_end)))) {
4486
4487 if (!user_wire)
4488 panic("vm_map_unwire: non-contiguous region");
4489 entry = entry->vme_next;
4490 continue;
4491 }
4492
4493 subtract_wire_counts(map, entry, user_wire);
4494
4495 if (entry->wired_count != 0) {
4496 entry = entry->vme_next;
4497 continue;
4498 }
4499
4500 if(entry->zero_wired_pages) {
4501 entry->zero_wired_pages = FALSE;
4502 }
4503
4504 entry->in_transition = TRUE;
4505 tmp_entry = *entry; /* see comment in vm_map_wire() */
4506
4507 /*
4508 * We can unlock the map now. The in_transition state
4509 * guarantees existance of the entry.
4510 */
4511 vm_map_unlock(map);
4512 if(map_pmap) {
4513 vm_fault_unwire(map,
4514 &tmp_entry, FALSE, map_pmap, pmap_addr);
4515 } else {
4516 vm_fault_unwire(map,
4517 &tmp_entry, FALSE, map->pmap,
4518 tmp_entry.vme_start);
4519 }
4520 vm_map_lock(map);
4521
4522 if (last_timestamp+1 != map->timestamp) {
4523 /*
4524 * Find the entry again. It could have been clipped
4525 * or deleted after we unlocked the map.
4526 */
4527 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4528 &first_entry)) {
4529 if (!user_wire)
4530 panic("vm_map_unwire: re-lookup failed");
4531 entry = first_entry->vme_next;
4532 } else
4533 entry = first_entry;
4534 }
4535 last_timestamp = map->timestamp;
4536
4537 /*
4538 * clear transition bit for all constituent entries that
4539 * were in the original entry (saved in tmp_entry). Also
4540 * check for waiters.
4541 */
4542 while ((entry != vm_map_to_entry(map)) &&
4543 (entry->vme_start < tmp_entry.vme_end)) {
4544 assert(entry->in_transition);
4545 entry->in_transition = FALSE;
4546 if (entry->needs_wakeup) {
4547 entry->needs_wakeup = FALSE;
4548 need_wakeup = TRUE;
4549 }
4550 entry = entry->vme_next;
4551 }
4552 }
4553
4554 /*
4555 * We might have fragmented the address space when we wired this
4556 * range of addresses. Attempt to re-coalesce these VM map entries
4557 * with their neighbors now that they're no longer wired.
4558 * Under some circumstances, address space fragmentation can
4559 * prevent VM object shadow chain collapsing, which can cause
4560 * swap space leaks.
4561 */
4562 vm_map_simplify_range(map, start, end);
4563
4564 vm_map_unlock(map);
4565 /*
4566 * wake up anybody waiting on entries that we have unwired.
4567 */
4568 if (need_wakeup)
4569 vm_map_entry_wakeup(map);
4570 return(KERN_SUCCESS);
4571
4572 }
4573
4574 kern_return_t
4575 vm_map_unwire(
4576 register vm_map_t map,
4577 register vm_map_offset_t start,
4578 register vm_map_offset_t end,
4579 boolean_t user_wire)
4580 {
4581 return vm_map_unwire_nested(map, start, end,
4582 user_wire, (pmap_t)NULL, 0);
4583 }
4584
4585
4586 /*
4587 * vm_map_entry_delete: [ internal use only ]
4588 *
4589 * Deallocate the given entry from the target map.
4590 */
4591 static void
4592 vm_map_entry_delete(
4593 register vm_map_t map,
4594 register vm_map_entry_t entry)
4595 {
4596 register vm_map_offset_t s, e;
4597 register vm_object_t object;
4598 register vm_map_t submap;
4599
4600 s = entry->vme_start;
4601 e = entry->vme_end;
4602 assert(page_aligned(s));
4603 assert(page_aligned(e));
4604 assert(entry->wired_count == 0);
4605 assert(entry->user_wired_count == 0);
4606 assert(!entry->permanent);
4607
4608 if (entry->is_sub_map) {
4609 object = NULL;
4610 submap = entry->object.sub_map;
4611 } else {
4612 submap = NULL;
4613 object = entry->object.vm_object;
4614 }
4615
4616 vm_map_store_entry_unlink(map, entry);
4617 map->size -= e - s;
4618
4619 vm_map_entry_dispose(map, entry);
4620
4621 vm_map_unlock(map);
4622 /*
4623 * Deallocate the object only after removing all
4624 * pmap entries pointing to its pages.
4625 */
4626 if (submap)
4627 vm_map_deallocate(submap);
4628 else
4629 vm_object_deallocate(object);
4630
4631 }
4632
4633 void
4634 vm_map_submap_pmap_clean(
4635 vm_map_t map,
4636 vm_map_offset_t start,
4637 vm_map_offset_t end,
4638 vm_map_t sub_map,
4639 vm_map_offset_t offset)
4640 {
4641 vm_map_offset_t submap_start;
4642 vm_map_offset_t submap_end;
4643 vm_map_size_t remove_size;
4644 vm_map_entry_t entry;
4645
4646 submap_end = offset + (end - start);
4647 submap_start = offset;
4648
4649 vm_map_lock_read(sub_map);
4650 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4651
4652 remove_size = (entry->vme_end - entry->vme_start);
4653 if(offset > entry->vme_start)
4654 remove_size -= offset - entry->vme_start;
4655
4656
4657 if(submap_end < entry->vme_end) {
4658 remove_size -=
4659 entry->vme_end - submap_end;
4660 }
4661 if(entry->is_sub_map) {
4662 vm_map_submap_pmap_clean(
4663 sub_map,
4664 start,
4665 start + remove_size,
4666 entry->object.sub_map,
4667 entry->offset);
4668 } else {
4669
4670 if((map->mapped) && (map->ref_count)
4671 && (entry->object.vm_object != NULL)) {
4672 vm_object_pmap_protect(
4673 entry->object.vm_object,
4674 entry->offset+(offset-entry->vme_start),
4675 remove_size,
4676 PMAP_NULL,
4677 entry->vme_start,
4678 VM_PROT_NONE);
4679 } else {
4680 pmap_remove(map->pmap,
4681 (addr64_t)start,
4682 (addr64_t)(start + remove_size));
4683 }
4684 }
4685 }
4686
4687 entry = entry->vme_next;
4688
4689 while((entry != vm_map_to_entry(sub_map))
4690 && (entry->vme_start < submap_end)) {
4691 remove_size = (entry->vme_end - entry->vme_start);
4692 if(submap_end < entry->vme_end) {
4693 remove_size -= entry->vme_end - submap_end;
4694 }
4695 if(entry->is_sub_map) {
4696 vm_map_submap_pmap_clean(
4697 sub_map,
4698 (start + entry->vme_start) - offset,
4699 ((start + entry->vme_start) - offset) + remove_size,
4700 entry->object.sub_map,
4701 entry->offset);
4702 } else {
4703 if((map->mapped) && (map->ref_count)
4704 && (entry->object.vm_object != NULL)) {
4705 vm_object_pmap_protect(
4706 entry->object.vm_object,
4707 entry->offset,
4708 remove_size,
4709 PMAP_NULL,
4710 entry->vme_start,
4711 VM_PROT_NONE);
4712 } else {
4713 pmap_remove(map->pmap,
4714 (addr64_t)((start + entry->vme_start)
4715 - offset),
4716 (addr64_t)(((start + entry->vme_start)
4717 - offset) + remove_size));
4718 }
4719 }
4720 entry = entry->vme_next;
4721 }
4722 vm_map_unlock_read(sub_map);
4723 return;
4724 }
4725
4726 /*
4727 * vm_map_delete: [ internal use only ]
4728 *
4729 * Deallocates the given address range from the target map.
4730 * Removes all user wirings. Unwires one kernel wiring if
4731 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4732 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4733 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4734 *
4735 * This routine is called with map locked and leaves map locked.
4736 */
4737 static kern_return_t
4738 vm_map_delete(
4739 vm_map_t map,
4740 vm_map_offset_t start,
4741 vm_map_offset_t end,
4742 int flags,
4743 vm_map_t zap_map)
4744 {
4745 vm_map_entry_t entry, next;
4746 struct vm_map_entry *first_entry, tmp_entry;
4747 register vm_map_offset_t s;
4748 register vm_object_t object;
4749 boolean_t need_wakeup;
4750 unsigned int last_timestamp = ~0; /* unlikely value */
4751 int interruptible;
4752
4753 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4754 THREAD_ABORTSAFE : THREAD_UNINT;
4755
4756 /*
4757 * All our DMA I/O operations in IOKit are currently done by
4758 * wiring through the map entries of the task requesting the I/O.
4759 * Because of this, we must always wait for kernel wirings
4760 * to go away on the entries before deleting them.
4761 *
4762 * Any caller who wants to actually remove a kernel wiring
4763 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4764 * properly remove one wiring instead of blasting through
4765 * them all.
4766 */
4767 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4768
4769 while(1) {
4770 /*
4771 * Find the start of the region, and clip it
4772 */
4773 if (vm_map_lookup_entry(map, start, &first_entry)) {
4774 entry = first_entry;
4775 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4776 start = SUPERPAGE_ROUND_DOWN(start);
4777 continue;
4778 }
4779 if (start == entry->vme_start) {
4780 /*
4781 * No need to clip. We don't want to cause
4782 * any unnecessary unnesting in this case...
4783 */
4784 } else {
4785 vm_map_clip_start(map, entry, start);
4786 }
4787
4788 /*
4789 * Fix the lookup hint now, rather than each
4790 * time through the loop.
4791 */
4792 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4793 } else {
4794 entry = first_entry->vme_next;
4795 }
4796 break;
4797 }
4798 if (entry->superpage_size)
4799 end = SUPERPAGE_ROUND_UP(end);
4800
4801 need_wakeup = FALSE;
4802 /*
4803 * Step through all entries in this region
4804 */
4805 s = entry->vme_start;
4806 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4807 /*
4808 * At this point, we have deleted all the memory entries
4809 * between "start" and "s". We still need to delete
4810 * all memory entries between "s" and "end".
4811 * While we were blocked and the map was unlocked, some
4812 * new memory entries could have been re-allocated between
4813 * "start" and "s" and we don't want to mess with those.
4814 * Some of those entries could even have been re-assembled
4815 * with an entry after "s" (in vm_map_simplify_entry()), so
4816 * we may have to vm_map_clip_start() again.
4817 */
4818
4819 if (entry->vme_start >= s) {
4820 /*
4821 * This entry starts on or after "s"
4822 * so no need to clip its start.
4823 */
4824 } else {
4825 /*
4826 * This entry has been re-assembled by a
4827 * vm_map_simplify_entry(). We need to
4828 * re-clip its start.
4829 */
4830 vm_map_clip_start(map, entry, s);
4831 }
4832 if (entry->vme_end <= end) {
4833 /*
4834 * This entry is going away completely, so no need
4835 * to clip and possibly cause an unnecessary unnesting.
4836 */
4837 } else {
4838 vm_map_clip_end(map, entry, end);
4839 }
4840
4841 if (entry->permanent) {
4842 panic("attempt to remove permanent VM map entry "
4843 "%p [0x%llx:0x%llx]\n",
4844 entry, (uint64_t) s, (uint64_t) end);
4845 }
4846
4847
4848 if (entry->in_transition) {
4849 wait_result_t wait_result;
4850
4851 /*
4852 * Another thread is wiring/unwiring this entry.
4853 * Let the other thread know we are waiting.
4854 */
4855 assert(s == entry->vme_start);
4856 entry->needs_wakeup = TRUE;
4857
4858 /*
4859 * wake up anybody waiting on entries that we have
4860 * already unwired/deleted.
4861 */
4862 if (need_wakeup) {
4863 vm_map_entry_wakeup(map);
4864 need_wakeup = FALSE;
4865 }
4866
4867 wait_result = vm_map_entry_wait(map, interruptible);
4868
4869 if (interruptible &&
4870 wait_result == THREAD_INTERRUPTED) {
4871 /*
4872 * We do not clear the needs_wakeup flag,
4873 * since we cannot tell if we were the only one.
4874 */
4875 vm_map_unlock(map);
4876 return KERN_ABORTED;
4877 }
4878
4879 /*
4880 * The entry could have been clipped or it
4881 * may not exist anymore. Look it up again.
4882 */
4883 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4884 assert((map != kernel_map) &&
4885 (!entry->is_sub_map));
4886 /*
4887 * User: use the next entry
4888 */
4889 entry = first_entry->vme_next;
4890 s = entry->vme_start;
4891 } else {
4892 entry = first_entry;
4893 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4894 }
4895 last_timestamp = map->timestamp;
4896 continue;
4897 } /* end in_transition */
4898
4899 if (entry->wired_count) {
4900 boolean_t user_wire;
4901
4902 user_wire = entry->user_wired_count > 0;
4903
4904 /*
4905 * Remove a kernel wiring if requested
4906 */
4907 if (flags & VM_MAP_REMOVE_KUNWIRE) {
4908 entry->wired_count--;
4909 }
4910
4911 /*
4912 * Remove all user wirings for proper accounting
4913 */
4914 if (entry->user_wired_count > 0) {
4915 while (entry->user_wired_count)
4916 subtract_wire_counts(map, entry, user_wire);
4917 }
4918
4919 if (entry->wired_count != 0) {
4920 assert(map != kernel_map);
4921 /*
4922 * Cannot continue. Typical case is when
4923 * a user thread has physical io pending on
4924 * on this page. Either wait for the
4925 * kernel wiring to go away or return an
4926 * error.
4927 */
4928 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4929 wait_result_t wait_result;
4930
4931 assert(s == entry->vme_start);
4932 entry->needs_wakeup = TRUE;
4933 wait_result = vm_map_entry_wait(map,
4934 interruptible);
4935
4936 if (interruptible &&
4937 wait_result == THREAD_INTERRUPTED) {
4938 /*
4939 * We do not clear the
4940 * needs_wakeup flag, since we
4941 * cannot tell if we were the
4942 * only one.
4943 */
4944 vm_map_unlock(map);
4945 return KERN_ABORTED;
4946 }
4947
4948 /*
4949 * The entry could have been clipped or
4950 * it may not exist anymore. Look it
4951 * up again.
4952 */
4953 if (!vm_map_lookup_entry(map, s,
4954 &first_entry)) {
4955 assert(map != kernel_map);
4956 /*
4957 * User: use the next entry
4958 */
4959 entry = first_entry->vme_next;
4960 s = entry->vme_start;
4961 } else {
4962 entry = first_entry;
4963 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4964 }
4965 last_timestamp = map->timestamp;
4966 continue;
4967 }
4968 else {
4969 return KERN_FAILURE;
4970 }
4971 }
4972
4973 entry->in_transition = TRUE;
4974 /*
4975 * copy current entry. see comment in vm_map_wire()
4976 */
4977 tmp_entry = *entry;
4978 assert(s == entry->vme_start);
4979
4980 /*
4981 * We can unlock the map now. The in_transition
4982 * state guarentees existance of the entry.
4983 */
4984 vm_map_unlock(map);
4985
4986 if (tmp_entry.is_sub_map) {
4987 vm_map_t sub_map;
4988 vm_map_offset_t sub_start, sub_end;
4989 pmap_t pmap;
4990 vm_map_offset_t pmap_addr;
4991
4992
4993 sub_map = tmp_entry.object.sub_map;
4994 sub_start = tmp_entry.offset;
4995 sub_end = sub_start + (tmp_entry.vme_end -
4996 tmp_entry.vme_start);
4997 if (tmp_entry.use_pmap) {
4998 pmap = sub_map->pmap;
4999 pmap_addr = tmp_entry.vme_start;
5000 } else {
5001 pmap = map->pmap;
5002 pmap_addr = tmp_entry.vme_start;
5003 }
5004 (void) vm_map_unwire_nested(sub_map,
5005 sub_start, sub_end,
5006 user_wire,
5007 pmap, pmap_addr);
5008 } else {
5009
5010 vm_fault_unwire(map, &tmp_entry,
5011 tmp_entry.object.vm_object == kernel_object,
5012 map->pmap, tmp_entry.vme_start);
5013 }
5014
5015 vm_map_lock(map);
5016
5017 if (last_timestamp+1 != map->timestamp) {
5018 /*
5019 * Find the entry again. It could have
5020 * been clipped after we unlocked the map.
5021 */
5022 if (!vm_map_lookup_entry(map, s, &first_entry)){
5023 assert((map != kernel_map) &&
5024 (!entry->is_sub_map));
5025 first_entry = first_entry->vme_next;
5026 s = first_entry->vme_start;
5027 } else {
5028 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5029 }
5030 } else {
5031 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5032 first_entry = entry;
5033 }
5034
5035 last_timestamp = map->timestamp;
5036
5037 entry = first_entry;
5038 while ((entry != vm_map_to_entry(map)) &&
5039 (entry->vme_start < tmp_entry.vme_end)) {
5040 assert(entry->in_transition);
5041 entry->in_transition = FALSE;
5042 if (entry->needs_wakeup) {
5043 entry->needs_wakeup = FALSE;
5044 need_wakeup = TRUE;
5045 }
5046 entry = entry->vme_next;
5047 }
5048 /*
5049 * We have unwired the entry(s). Go back and
5050 * delete them.
5051 */
5052 entry = first_entry;
5053 continue;
5054 }
5055
5056 /* entry is unwired */
5057 assert(entry->wired_count == 0);
5058 assert(entry->user_wired_count == 0);
5059
5060 assert(s == entry->vme_start);
5061
5062 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5063 /*
5064 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5065 * vm_map_delete(), some map entries might have been
5066 * transferred to a "zap_map", which doesn't have a
5067 * pmap. The original pmap has already been flushed
5068 * in the vm_map_delete() call targeting the original
5069 * map, but when we get to destroying the "zap_map",
5070 * we don't have any pmap to flush, so let's just skip
5071 * all this.
5072 */
5073 } else if (entry->is_sub_map) {
5074 if (entry->use_pmap) {
5075 #ifndef NO_NESTED_PMAP
5076 pmap_unnest(map->pmap,
5077 (addr64_t)entry->vme_start,
5078 entry->vme_end - entry->vme_start);
5079 #endif /* NO_NESTED_PMAP */
5080 if ((map->mapped) && (map->ref_count)) {
5081 /* clean up parent map/maps */
5082 vm_map_submap_pmap_clean(
5083 map, entry->vme_start,
5084 entry->vme_end,
5085 entry->object.sub_map,
5086 entry->offset);
5087 }
5088 } else {
5089 vm_map_submap_pmap_clean(
5090 map, entry->vme_start, entry->vme_end,
5091 entry->object.sub_map,
5092 entry->offset);
5093 }
5094 } else if (entry->object.vm_object != kernel_object) {
5095 object = entry->object.vm_object;
5096 if((map->mapped) && (map->ref_count)) {
5097 vm_object_pmap_protect(
5098 object, entry->offset,
5099 entry->vme_end - entry->vme_start,
5100 PMAP_NULL,
5101 entry->vme_start,
5102 VM_PROT_NONE);
5103 } else {
5104 pmap_remove(map->pmap,
5105 (addr64_t)entry->vme_start,
5106 (addr64_t)entry->vme_end);
5107 }
5108 }
5109
5110 /*
5111 * All pmap mappings for this map entry must have been
5112 * cleared by now.
5113 */
5114 assert(vm_map_pmap_is_empty(map,
5115 entry->vme_start,
5116 entry->vme_end));
5117
5118 next = entry->vme_next;
5119 s = next->vme_start;
5120 last_timestamp = map->timestamp;
5121
5122 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5123 zap_map != VM_MAP_NULL) {
5124 vm_map_size_t entry_size;
5125 /*
5126 * The caller wants to save the affected VM map entries
5127 * into the "zap_map". The caller will take care of
5128 * these entries.
5129 */
5130 /* unlink the entry from "map" ... */
5131 vm_map_store_entry_unlink(map, entry);
5132 /* ... and add it to the end of the "zap_map" */
5133 vm_map_store_entry_link(zap_map,
5134 vm_map_last_entry(zap_map),
5135 entry);
5136 entry_size = entry->vme_end - entry->vme_start;
5137 map->size -= entry_size;
5138 zap_map->size += entry_size;
5139 /* we didn't unlock the map, so no timestamp increase */
5140 last_timestamp--;
5141 } else {
5142 vm_map_entry_delete(map, entry);
5143 /* vm_map_entry_delete unlocks the map */
5144 vm_map_lock(map);
5145 }
5146
5147 entry = next;
5148
5149 if(entry == vm_map_to_entry(map)) {
5150 break;
5151 }
5152 if (last_timestamp+1 != map->timestamp) {
5153 /*
5154 * we are responsible for deleting everything
5155 * from the give space, if someone has interfered
5156 * we pick up where we left off, back fills should
5157 * be all right for anyone except map_delete and
5158 * we have to assume that the task has been fully
5159 * disabled before we get here
5160 */
5161 if (!vm_map_lookup_entry(map, s, &entry)){
5162 entry = entry->vme_next;
5163 s = entry->vme_start;
5164 } else {
5165 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5166 }
5167 /*
5168 * others can not only allocate behind us, we can
5169 * also see coalesce while we don't have the map lock
5170 */
5171 if(entry == vm_map_to_entry(map)) {
5172 break;
5173 }
5174 }
5175 last_timestamp = map->timestamp;
5176 }
5177
5178 if (map->wait_for_space)
5179 thread_wakeup((event_t) map);
5180 /*
5181 * wake up anybody waiting on entries that we have already deleted.
5182 */
5183 if (need_wakeup)
5184 vm_map_entry_wakeup(map);
5185
5186 return KERN_SUCCESS;
5187 }
5188
5189 /*
5190 * vm_map_remove:
5191 *
5192 * Remove the given address range from the target map.
5193 * This is the exported form of vm_map_delete.
5194 */
5195 kern_return_t
5196 vm_map_remove(
5197 register vm_map_t map,
5198 register vm_map_offset_t start,
5199 register vm_map_offset_t end,
5200 register boolean_t flags)
5201 {
5202 register kern_return_t result;
5203
5204 vm_map_lock(map);
5205 VM_MAP_RANGE_CHECK(map, start, end);
5206 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5207 vm_map_unlock(map);
5208
5209 return(result);
5210 }
5211
5212
5213 /*
5214 * Routine: vm_map_copy_discard
5215 *
5216 * Description:
5217 * Dispose of a map copy object (returned by
5218 * vm_map_copyin).
5219 */
5220 void
5221 vm_map_copy_discard(
5222 vm_map_copy_t copy)
5223 {
5224 if (copy == VM_MAP_COPY_NULL)
5225 return;
5226
5227 switch (copy->type) {
5228 case VM_MAP_COPY_ENTRY_LIST:
5229 while (vm_map_copy_first_entry(copy) !=
5230 vm_map_copy_to_entry(copy)) {
5231 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5232
5233 vm_map_copy_entry_unlink(copy, entry);
5234 vm_object_deallocate(entry->object.vm_object);
5235 vm_map_copy_entry_dispose(copy, entry);
5236 }
5237 break;
5238 case VM_MAP_COPY_OBJECT:
5239 vm_object_deallocate(copy->cpy_object);
5240 break;
5241 case VM_MAP_COPY_KERNEL_BUFFER:
5242
5243 /*
5244 * The vm_map_copy_t and possibly the data buffer were
5245 * allocated by a single call to kalloc(), i.e. the
5246 * vm_map_copy_t was not allocated out of the zone.
5247 */
5248 kfree(copy, copy->cpy_kalloc_size);
5249 return;
5250 }
5251 zfree(vm_map_copy_zone, copy);
5252 }
5253
5254 /*
5255 * Routine: vm_map_copy_copy
5256 *
5257 * Description:
5258 * Move the information in a map copy object to
5259 * a new map copy object, leaving the old one
5260 * empty.
5261 *
5262 * This is used by kernel routines that need
5263 * to look at out-of-line data (in copyin form)
5264 * before deciding whether to return SUCCESS.
5265 * If the routine returns FAILURE, the original
5266 * copy object will be deallocated; therefore,
5267 * these routines must make a copy of the copy
5268 * object and leave the original empty so that
5269 * deallocation will not fail.
5270 */
5271 vm_map_copy_t
5272 vm_map_copy_copy(
5273 vm_map_copy_t copy)
5274 {
5275 vm_map_copy_t new_copy;
5276
5277 if (copy == VM_MAP_COPY_NULL)
5278 return VM_MAP_COPY_NULL;
5279
5280 /*
5281 * Allocate a new copy object, and copy the information
5282 * from the old one into it.
5283 */
5284
5285 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5286 *new_copy = *copy;
5287
5288 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5289 /*
5290 * The links in the entry chain must be
5291 * changed to point to the new copy object.
5292 */
5293 vm_map_copy_first_entry(copy)->vme_prev
5294 = vm_map_copy_to_entry(new_copy);
5295 vm_map_copy_last_entry(copy)->vme_next
5296 = vm_map_copy_to_entry(new_copy);
5297 }
5298
5299 /*
5300 * Change the old copy object into one that contains
5301 * nothing to be deallocated.
5302 */
5303 copy->type = VM_MAP_COPY_OBJECT;
5304 copy->cpy_object = VM_OBJECT_NULL;
5305
5306 /*
5307 * Return the new object.
5308 */
5309 return new_copy;
5310 }
5311
5312 static kern_return_t
5313 vm_map_overwrite_submap_recurse(
5314 vm_map_t dst_map,
5315 vm_map_offset_t dst_addr,
5316 vm_map_size_t dst_size)
5317 {
5318 vm_map_offset_t dst_end;
5319 vm_map_entry_t tmp_entry;
5320 vm_map_entry_t entry;
5321 kern_return_t result;
5322 boolean_t encountered_sub_map = FALSE;
5323
5324
5325
5326 /*
5327 * Verify that the destination is all writeable
5328 * initially. We have to trunc the destination
5329 * address and round the copy size or we'll end up
5330 * splitting entries in strange ways.
5331 */
5332
5333 dst_end = vm_map_round_page(dst_addr + dst_size);
5334 vm_map_lock(dst_map);
5335
5336 start_pass_1:
5337 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5338 vm_map_unlock(dst_map);
5339 return(KERN_INVALID_ADDRESS);
5340 }
5341
5342 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5343 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5344
5345 for (entry = tmp_entry;;) {
5346 vm_map_entry_t next;
5347
5348 next = entry->vme_next;
5349 while(entry->is_sub_map) {
5350 vm_map_offset_t sub_start;
5351 vm_map_offset_t sub_end;
5352 vm_map_offset_t local_end;
5353
5354 if (entry->in_transition) {
5355 /*
5356 * Say that we are waiting, and wait for entry.
5357 */
5358 entry->needs_wakeup = TRUE;
5359 vm_map_entry_wait(dst_map, THREAD_UNINT);
5360
5361 goto start_pass_1;
5362 }
5363
5364 encountered_sub_map = TRUE;
5365 sub_start = entry->offset;
5366
5367 if(entry->vme_end < dst_end)
5368 sub_end = entry->vme_end;
5369 else
5370 sub_end = dst_end;
5371 sub_end -= entry->vme_start;
5372 sub_end += entry->offset;
5373 local_end = entry->vme_end;
5374 vm_map_unlock(dst_map);
5375
5376 result = vm_map_overwrite_submap_recurse(
5377 entry->object.sub_map,
5378 sub_start,
5379 sub_end - sub_start);
5380
5381 if(result != KERN_SUCCESS)
5382 return result;
5383 if (dst_end <= entry->vme_end)
5384 return KERN_SUCCESS;
5385 vm_map_lock(dst_map);
5386 if(!vm_map_lookup_entry(dst_map, local_end,
5387 &tmp_entry)) {
5388 vm_map_unlock(dst_map);
5389 return(KERN_INVALID_ADDRESS);
5390 }
5391 entry = tmp_entry;
5392 next = entry->vme_next;
5393 }
5394
5395 if ( ! (entry->protection & VM_PROT_WRITE)) {
5396 vm_map_unlock(dst_map);
5397 return(KERN_PROTECTION_FAILURE);
5398 }
5399
5400 /*
5401 * If the entry is in transition, we must wait
5402 * for it to exit that state. Anything could happen
5403 * when we unlock the map, so start over.
5404 */
5405 if (entry->in_transition) {
5406
5407 /*
5408 * Say that we are waiting, and wait for entry.
5409 */
5410 entry->needs_wakeup = TRUE;
5411 vm_map_entry_wait(dst_map, THREAD_UNINT);
5412
5413 goto start_pass_1;
5414 }
5415
5416 /*
5417 * our range is contained completely within this map entry
5418 */
5419 if (dst_end <= entry->vme_end) {
5420 vm_map_unlock(dst_map);
5421 return KERN_SUCCESS;
5422 }
5423 /*
5424 * check that range specified is contiguous region
5425 */
5426 if ((next == vm_map_to_entry(dst_map)) ||
5427 (next->vme_start != entry->vme_end)) {
5428 vm_map_unlock(dst_map);
5429 return(KERN_INVALID_ADDRESS);
5430 }
5431
5432 /*
5433 * Check for permanent objects in the destination.
5434 */
5435 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5436 ((!entry->object.vm_object->internal) ||
5437 (entry->object.vm_object->true_share))) {
5438 if(encountered_sub_map) {
5439 vm_map_unlock(dst_map);
5440 return(KERN_FAILURE);
5441 }
5442 }
5443
5444
5445 entry = next;
5446 }/* for */
5447 vm_map_unlock(dst_map);
5448 return(KERN_SUCCESS);
5449 }
5450
5451 /*
5452 * Routine: vm_map_copy_overwrite
5453 *
5454 * Description:
5455 * Copy the memory described by the map copy
5456 * object (copy; returned by vm_map_copyin) onto
5457 * the specified destination region (dst_map, dst_addr).
5458 * The destination must be writeable.
5459 *
5460 * Unlike vm_map_copyout, this routine actually
5461 * writes over previously-mapped memory. If the
5462 * previous mapping was to a permanent (user-supplied)
5463 * memory object, it is preserved.
5464 *
5465 * The attributes (protection and inheritance) of the
5466 * destination region are preserved.
5467 *
5468 * If successful, consumes the copy object.
5469 * Otherwise, the caller is responsible for it.
5470 *
5471 * Implementation notes:
5472 * To overwrite aligned temporary virtual memory, it is
5473 * sufficient to remove the previous mapping and insert
5474 * the new copy. This replacement is done either on
5475 * the whole region (if no permanent virtual memory
5476 * objects are embedded in the destination region) or
5477 * in individual map entries.
5478 *
5479 * To overwrite permanent virtual memory , it is necessary
5480 * to copy each page, as the external memory management
5481 * interface currently does not provide any optimizations.
5482 *
5483 * Unaligned memory also has to be copied. It is possible
5484 * to use 'vm_trickery' to copy the aligned data. This is
5485 * not done but not hard to implement.
5486 *
5487 * Once a page of permanent memory has been overwritten,
5488 * it is impossible to interrupt this function; otherwise,
5489 * the call would be neither atomic nor location-independent.
5490 * The kernel-state portion of a user thread must be
5491 * interruptible.
5492 *
5493 * It may be expensive to forward all requests that might
5494 * overwrite permanent memory (vm_write, vm_copy) to
5495 * uninterruptible kernel threads. This routine may be
5496 * called by interruptible threads; however, success is
5497 * not guaranteed -- if the request cannot be performed
5498 * atomically and interruptibly, an error indication is
5499 * returned.
5500 */
5501
5502 static kern_return_t
5503 vm_map_copy_overwrite_nested(
5504 vm_map_t dst_map,
5505 vm_map_address_t dst_addr,
5506 vm_map_copy_t copy,
5507 boolean_t interruptible,
5508 pmap_t pmap,
5509 boolean_t discard_on_success)
5510 {
5511 vm_map_offset_t dst_end;
5512 vm_map_entry_t tmp_entry;
5513 vm_map_entry_t entry;
5514 kern_return_t kr;
5515 boolean_t aligned = TRUE;
5516 boolean_t contains_permanent_objects = FALSE;
5517 boolean_t encountered_sub_map = FALSE;
5518 vm_map_offset_t base_addr;
5519 vm_map_size_t copy_size;
5520 vm_map_size_t total_size;
5521
5522
5523 /*
5524 * Check for null copy object.
5525 */
5526
5527 if (copy == VM_MAP_COPY_NULL)
5528 return(KERN_SUCCESS);
5529
5530 /*
5531 * Check for special kernel buffer allocated
5532 * by new_ipc_kmsg_copyin.
5533 */
5534
5535 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5536 return(vm_map_copyout_kernel_buffer(
5537 dst_map, &dst_addr,
5538 copy, TRUE));
5539 }
5540
5541 /*
5542 * Only works for entry lists at the moment. Will
5543 * support page lists later.
5544 */
5545
5546 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5547
5548 if (copy->size == 0) {
5549 if (discard_on_success)
5550 vm_map_copy_discard(copy);
5551 return(KERN_SUCCESS);
5552 }
5553
5554 /*
5555 * Verify that the destination is all writeable
5556 * initially. We have to trunc the destination
5557 * address and round the copy size or we'll end up
5558 * splitting entries in strange ways.
5559 */
5560
5561 if (!page_aligned(copy->size) ||
5562 !page_aligned (copy->offset) ||
5563 !page_aligned (dst_addr))
5564 {
5565 aligned = FALSE;
5566 dst_end = vm_map_round_page(dst_addr + copy->size);
5567 } else {
5568 dst_end = dst_addr + copy->size;
5569 }
5570
5571 vm_map_lock(dst_map);
5572
5573 /* LP64todo - remove this check when vm_map_commpage64()
5574 * no longer has to stuff in a map_entry for the commpage
5575 * above the map's max_offset.
5576 */
5577 if (dst_addr >= dst_map->max_offset) {
5578 vm_map_unlock(dst_map);
5579 return(KERN_INVALID_ADDRESS);
5580 }
5581
5582 start_pass_1:
5583 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5584 vm_map_unlock(dst_map);
5585 return(KERN_INVALID_ADDRESS);
5586 }
5587 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5588 for (entry = tmp_entry;;) {
5589 vm_map_entry_t next = entry->vme_next;
5590
5591 while(entry->is_sub_map) {
5592 vm_map_offset_t sub_start;
5593 vm_map_offset_t sub_end;
5594 vm_map_offset_t local_end;
5595
5596 if (entry->in_transition) {
5597
5598 /*
5599 * Say that we are waiting, and wait for entry.
5600 */
5601 entry->needs_wakeup = TRUE;
5602 vm_map_entry_wait(dst_map, THREAD_UNINT);
5603
5604 goto start_pass_1;
5605 }
5606
5607 local_end = entry->vme_end;
5608 if (!(entry->needs_copy)) {
5609 /* if needs_copy we are a COW submap */
5610 /* in such a case we just replace so */
5611 /* there is no need for the follow- */
5612 /* ing check. */
5613 encountered_sub_map = TRUE;
5614 sub_start = entry->offset;
5615
5616 if(entry->vme_end < dst_end)
5617 sub_end = entry->vme_end;
5618 else
5619 sub_end = dst_end;
5620 sub_end -= entry->vme_start;
5621 sub_end += entry->offset;
5622 vm_map_unlock(dst_map);
5623
5624 kr = vm_map_overwrite_submap_recurse(
5625 entry->object.sub_map,
5626 sub_start,
5627 sub_end - sub_start);
5628 if(kr != KERN_SUCCESS)
5629 return kr;
5630 vm_map_lock(dst_map);
5631 }
5632
5633 if (dst_end <= entry->vme_end)
5634 goto start_overwrite;
5635 if(!vm_map_lookup_entry(dst_map, local_end,
5636 &entry)) {
5637 vm_map_unlock(dst_map);
5638 return(KERN_INVALID_ADDRESS);
5639 }
5640 next = entry->vme_next;
5641 }
5642
5643 if ( ! (entry->protection & VM_PROT_WRITE)) {
5644 vm_map_unlock(dst_map);
5645 return(KERN_PROTECTION_FAILURE);
5646 }
5647
5648 /*
5649 * If the entry is in transition, we must wait
5650 * for it to exit that state. Anything could happen
5651 * when we unlock the map, so start over.
5652 */
5653 if (entry->in_transition) {
5654
5655 /*
5656 * Say that we are waiting, and wait for entry.
5657 */
5658 entry->needs_wakeup = TRUE;
5659 vm_map_entry_wait(dst_map, THREAD_UNINT);
5660
5661 goto start_pass_1;
5662 }
5663
5664 /*
5665 * our range is contained completely within this map entry
5666 */
5667 if (dst_end <= entry->vme_end)
5668 break;
5669 /*
5670 * check that range specified is contiguous region
5671 */
5672 if ((next == vm_map_to_entry(dst_map)) ||
5673 (next->vme_start != entry->vme_end)) {
5674 vm_map_unlock(dst_map);
5675 return(KERN_INVALID_ADDRESS);
5676 }
5677
5678
5679 /*
5680 * Check for permanent objects in the destination.
5681 */
5682 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5683 ((!entry->object.vm_object->internal) ||
5684 (entry->object.vm_object->true_share))) {
5685 contains_permanent_objects = TRUE;
5686 }
5687
5688 entry = next;
5689 }/* for */
5690
5691 start_overwrite:
5692 /*
5693 * If there are permanent objects in the destination, then
5694 * the copy cannot be interrupted.
5695 */
5696
5697 if (interruptible && contains_permanent_objects) {
5698 vm_map_unlock(dst_map);
5699 return(KERN_FAILURE); /* XXX */
5700 }
5701
5702 /*
5703 *
5704 * Make a second pass, overwriting the data
5705 * At the beginning of each loop iteration,
5706 * the next entry to be overwritten is "tmp_entry"
5707 * (initially, the value returned from the lookup above),
5708 * and the starting address expected in that entry
5709 * is "start".
5710 */
5711
5712 total_size = copy->size;
5713 if(encountered_sub_map) {
5714 copy_size = 0;
5715 /* re-calculate tmp_entry since we've had the map */
5716 /* unlocked */
5717 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5718 vm_map_unlock(dst_map);
5719 return(KERN_INVALID_ADDRESS);
5720 }
5721 } else {
5722 copy_size = copy->size;
5723 }
5724
5725 base_addr = dst_addr;
5726 while(TRUE) {
5727 /* deconstruct the copy object and do in parts */
5728 /* only in sub_map, interruptable case */
5729 vm_map_entry_t copy_entry;
5730 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5731 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5732 int nentries;
5733 int remaining_entries = 0;
5734 vm_map_offset_t new_offset = 0;
5735
5736 for (entry = tmp_entry; copy_size == 0;) {
5737 vm_map_entry_t next;
5738
5739 next = entry->vme_next;
5740
5741 /* tmp_entry and base address are moved along */
5742 /* each time we encounter a sub-map. Otherwise */
5743 /* entry can outpase tmp_entry, and the copy_size */
5744 /* may reflect the distance between them */
5745 /* if the current entry is found to be in transition */
5746 /* we will start over at the beginning or the last */
5747 /* encounter of a submap as dictated by base_addr */
5748 /* we will zero copy_size accordingly. */
5749 if (entry->in_transition) {
5750 /*
5751 * Say that we are waiting, and wait for entry.
5752 */
5753 entry->needs_wakeup = TRUE;
5754 vm_map_entry_wait(dst_map, THREAD_UNINT);
5755
5756 if(!vm_map_lookup_entry(dst_map, base_addr,
5757 &tmp_entry)) {
5758 vm_map_unlock(dst_map);
5759 return(KERN_INVALID_ADDRESS);
5760 }
5761 copy_size = 0;
5762 entry = tmp_entry;
5763 continue;
5764 }
5765 if(entry->is_sub_map) {
5766 vm_map_offset_t sub_start;
5767 vm_map_offset_t sub_end;
5768 vm_map_offset_t local_end;
5769
5770 if (entry->needs_copy) {
5771 /* if this is a COW submap */
5772 /* just back the range with a */
5773 /* anonymous entry */
5774 if(entry->vme_end < dst_end)
5775 sub_end = entry->vme_end;
5776 else
5777 sub_end = dst_end;
5778 if(entry->vme_start < base_addr)
5779 sub_start = base_addr;
5780 else
5781 sub_start = entry->vme_start;
5782 vm_map_clip_end(
5783 dst_map, entry, sub_end);
5784 vm_map_clip_start(
5785 dst_map, entry, sub_start);
5786 assert(!entry->use_pmap);
5787 entry->is_sub_map = FALSE;
5788 vm_map_deallocate(
5789 entry->object.sub_map);
5790 entry->object.sub_map = NULL;
5791 entry->is_shared = FALSE;
5792 entry->needs_copy = FALSE;
5793 entry->offset = 0;
5794 /*
5795 * XXX FBDP
5796 * We should propagate the protections
5797 * of the submap entry here instead
5798 * of forcing them to VM_PROT_ALL...
5799 * Or better yet, we should inherit
5800 * the protection of the copy_entry.
5801 */
5802 entry->protection = VM_PROT_ALL;
5803 entry->max_protection = VM_PROT_ALL;
5804 entry->wired_count = 0;
5805 entry->user_wired_count = 0;
5806 if(entry->inheritance
5807 == VM_INHERIT_SHARE)
5808 entry->inheritance = VM_INHERIT_COPY;
5809 continue;
5810 }
5811 /* first take care of any non-sub_map */
5812 /* entries to send */
5813 if(base_addr < entry->vme_start) {
5814 /* stuff to send */
5815 copy_size =
5816 entry->vme_start - base_addr;
5817 break;
5818 }
5819 sub_start = entry->offset;
5820
5821 if(entry->vme_end < dst_end)
5822 sub_end = entry->vme_end;
5823 else
5824 sub_end = dst_end;
5825 sub_end -= entry->vme_start;
5826 sub_end += entry->offset;
5827 local_end = entry->vme_end;
5828 vm_map_unlock(dst_map);
5829 copy_size = sub_end - sub_start;
5830
5831 /* adjust the copy object */
5832 if (total_size > copy_size) {
5833 vm_map_size_t local_size = 0;
5834 vm_map_size_t entry_size;
5835
5836 nentries = 1;
5837 new_offset = copy->offset;
5838 copy_entry = vm_map_copy_first_entry(copy);
5839 while(copy_entry !=
5840 vm_map_copy_to_entry(copy)){
5841 entry_size = copy_entry->vme_end -
5842 copy_entry->vme_start;
5843 if((local_size < copy_size) &&
5844 ((local_size + entry_size)
5845 >= copy_size)) {
5846 vm_map_copy_clip_end(copy,
5847 copy_entry,
5848 copy_entry->vme_start +
5849 (copy_size - local_size));
5850 entry_size = copy_entry->vme_end -
5851 copy_entry->vme_start;
5852 local_size += entry_size;
5853 new_offset += entry_size;
5854 }
5855 if(local_size >= copy_size) {
5856 next_copy = copy_entry->vme_next;
5857 copy_entry->vme_next =
5858 vm_map_copy_to_entry(copy);
5859 previous_prev =
5860 copy->cpy_hdr.links.prev;
5861 copy->cpy_hdr.links.prev = copy_entry;
5862 copy->size = copy_size;
5863 remaining_entries =
5864 copy->cpy_hdr.nentries;
5865 remaining_entries -= nentries;
5866 copy->cpy_hdr.nentries = nentries;
5867 break;
5868 } else {
5869 local_size += entry_size;
5870 new_offset += entry_size;
5871 nentries++;
5872 }
5873 copy_entry = copy_entry->vme_next;
5874 }
5875 }
5876
5877 if((entry->use_pmap) && (pmap == NULL)) {
5878 kr = vm_map_copy_overwrite_nested(
5879 entry->object.sub_map,
5880 sub_start,
5881 copy,
5882 interruptible,
5883 entry->object.sub_map->pmap,
5884 TRUE);
5885 } else if (pmap != NULL) {
5886 kr = vm_map_copy_overwrite_nested(
5887 entry->object.sub_map,
5888 sub_start,
5889 copy,
5890 interruptible, pmap,
5891 TRUE);
5892 } else {
5893 kr = vm_map_copy_overwrite_nested(
5894 entry->object.sub_map,
5895 sub_start,
5896 copy,
5897 interruptible,
5898 dst_map->pmap,
5899 TRUE);
5900 }
5901 if(kr != KERN_SUCCESS) {
5902 if(next_copy != NULL) {
5903 copy->cpy_hdr.nentries +=
5904 remaining_entries;
5905 copy->cpy_hdr.links.prev->vme_next =
5906 next_copy;
5907 copy->cpy_hdr.links.prev
5908 = previous_prev;
5909 copy->size = total_size;
5910 }
5911 return kr;
5912 }
5913 if (dst_end <= local_end) {
5914 return(KERN_SUCCESS);
5915 }
5916 /* otherwise copy no longer exists, it was */
5917 /* destroyed after successful copy_overwrite */
5918 copy = (vm_map_copy_t)
5919 zalloc(vm_map_copy_zone);
5920 vm_map_copy_first_entry(copy) =
5921 vm_map_copy_last_entry(copy) =
5922 vm_map_copy_to_entry(copy);
5923 copy->type = VM_MAP_COPY_ENTRY_LIST;
5924 copy->offset = new_offset;
5925
5926 /*
5927 * XXX FBDP
5928 * this does not seem to deal with
5929 * the VM map store (R&B tree)
5930 */
5931
5932 total_size -= copy_size;
5933 copy_size = 0;
5934 /* put back remainder of copy in container */
5935 if(next_copy != NULL) {
5936 copy->cpy_hdr.nentries = remaining_entries;
5937 copy->cpy_hdr.links.next = next_copy;
5938 copy->cpy_hdr.links.prev = previous_prev;
5939 copy->size = total_size;
5940 next_copy->vme_prev =
5941 vm_map_copy_to_entry(copy);
5942 next_copy = NULL;
5943 }
5944 base_addr = local_end;
5945 vm_map_lock(dst_map);
5946 if(!vm_map_lookup_entry(dst_map,
5947 local_end, &tmp_entry)) {
5948 vm_map_unlock(dst_map);
5949 return(KERN_INVALID_ADDRESS);
5950 }
5951 entry = tmp_entry;
5952 continue;
5953 }
5954 if (dst_end <= entry->vme_end) {
5955 copy_size = dst_end - base_addr;
5956 break;
5957 }
5958
5959 if ((next == vm_map_to_entry(dst_map)) ||
5960 (next->vme_start != entry->vme_end)) {
5961 vm_map_unlock(dst_map);
5962 return(KERN_INVALID_ADDRESS);
5963 }
5964
5965 entry = next;
5966 }/* for */
5967
5968 next_copy = NULL;
5969 nentries = 1;
5970
5971 /* adjust the copy object */
5972 if (total_size > copy_size) {
5973 vm_map_size_t local_size = 0;
5974 vm_map_size_t entry_size;
5975
5976 new_offset = copy->offset;
5977 copy_entry = vm_map_copy_first_entry(copy);
5978 while(copy_entry != vm_map_copy_to_entry(copy)) {
5979 entry_size = copy_entry->vme_end -
5980 copy_entry->vme_start;
5981 if((local_size < copy_size) &&
5982 ((local_size + entry_size)
5983 >= copy_size)) {
5984 vm_map_copy_clip_end(copy, copy_entry,
5985 copy_entry->vme_start +
5986 (copy_size - local_size));
5987 entry_size = copy_entry->vme_end -
5988 copy_entry->vme_start;
5989 local_size += entry_size;
5990 new_offset += entry_size;
5991 }
5992 if(local_size >= copy_size) {
5993 next_copy = copy_entry->vme_next;
5994 copy_entry->vme_next =
5995 vm_map_copy_to_entry(copy);
5996 previous_prev =
5997 copy->cpy_hdr.links.prev;
5998 copy->cpy_hdr.links.prev = copy_entry;
5999 copy->size = copy_size;
6000 remaining_entries =
6001 copy->cpy_hdr.nentries;
6002 remaining_entries -= nentries;
6003 copy->cpy_hdr.nentries = nentries;
6004 break;
6005 } else {
6006 local_size += entry_size;
6007 new_offset += entry_size;
6008 nentries++;
6009 }
6010 copy_entry = copy_entry->vme_next;
6011 }
6012 }
6013
6014 if (aligned) {
6015 pmap_t local_pmap;
6016
6017 if(pmap)
6018 local_pmap = pmap;
6019 else
6020 local_pmap = dst_map->pmap;
6021
6022 if ((kr = vm_map_copy_overwrite_aligned(
6023 dst_map, tmp_entry, copy,
6024 base_addr, local_pmap)) != KERN_SUCCESS) {
6025 if(next_copy != NULL) {
6026 copy->cpy_hdr.nentries +=
6027 remaining_entries;
6028 copy->cpy_hdr.links.prev->vme_next =
6029 next_copy;
6030 copy->cpy_hdr.links.prev =
6031 previous_prev;
6032 copy->size += copy_size;
6033 }
6034 return kr;
6035 }
6036 vm_map_unlock(dst_map);
6037 } else {
6038 /*
6039 * Performance gain:
6040 *
6041 * if the copy and dst address are misaligned but the same
6042 * offset within the page we can copy_not_aligned the
6043 * misaligned parts and copy aligned the rest. If they are
6044 * aligned but len is unaligned we simply need to copy
6045 * the end bit unaligned. We'll need to split the misaligned
6046 * bits of the region in this case !
6047 */
6048 /* ALWAYS UNLOCKS THE dst_map MAP */
6049 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
6050 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6051 if(next_copy != NULL) {
6052 copy->cpy_hdr.nentries +=
6053 remaining_entries;
6054 copy->cpy_hdr.links.prev->vme_next =
6055 next_copy;
6056 copy->cpy_hdr.links.prev =
6057 previous_prev;
6058 copy->size += copy_size;
6059 }
6060 return kr;
6061 }
6062 }
6063 total_size -= copy_size;
6064 if(total_size == 0)
6065 break;
6066 base_addr += copy_size;
6067 copy_size = 0;
6068 copy->offset = new_offset;
6069 if(next_copy != NULL) {
6070 copy->cpy_hdr.nentries = remaining_entries;
6071 copy->cpy_hdr.links.next = next_copy;
6072 copy->cpy_hdr.links.prev = previous_prev;
6073 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6074 copy->size = total_size;
6075 }
6076 vm_map_lock(dst_map);
6077 while(TRUE) {
6078 if (!vm_map_lookup_entry(dst_map,
6079 base_addr, &tmp_entry)) {
6080 vm_map_unlock(dst_map);
6081 return(KERN_INVALID_ADDRESS);
6082 }
6083 if (tmp_entry->in_transition) {
6084 entry->needs_wakeup = TRUE;
6085 vm_map_entry_wait(dst_map, THREAD_UNINT);
6086 } else {
6087 break;
6088 }
6089 }
6090 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6091
6092 entry = tmp_entry;
6093 } /* while */
6094
6095 /*
6096 * Throw away the vm_map_copy object
6097 */
6098 if (discard_on_success)
6099 vm_map_copy_discard(copy);
6100
6101 return(KERN_SUCCESS);
6102 }/* vm_map_copy_overwrite */
6103
6104 kern_return_t
6105 vm_map_copy_overwrite(
6106 vm_map_t dst_map,
6107 vm_map_offset_t dst_addr,
6108 vm_map_copy_t copy,
6109 boolean_t interruptible)
6110 {
6111 vm_map_size_t head_size, tail_size;
6112 vm_map_copy_t head_copy, tail_copy;
6113 vm_map_offset_t head_addr, tail_addr;
6114 vm_map_entry_t entry;
6115 kern_return_t kr;
6116
6117 head_size = 0;
6118 tail_size = 0;
6119 head_copy = NULL;
6120 tail_copy = NULL;
6121 head_addr = 0;
6122 tail_addr = 0;
6123
6124 if (interruptible ||
6125 copy == VM_MAP_COPY_NULL ||
6126 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6127 /*
6128 * We can't split the "copy" map if we're interruptible
6129 * or if we don't have a "copy" map...
6130 */
6131 blunt_copy:
6132 return vm_map_copy_overwrite_nested(dst_map,
6133 dst_addr,
6134 copy,
6135 interruptible,
6136 (pmap_t) NULL,
6137 TRUE);
6138 }
6139
6140 if (copy->size < 3 * PAGE_SIZE) {
6141 /*
6142 * Too small to bother with optimizing...
6143 */
6144 goto blunt_copy;
6145 }
6146
6147 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6148 /*
6149 * Incompatible mis-alignment of source and destination...
6150 */
6151 goto blunt_copy;
6152 }
6153
6154 /*
6155 * Proper alignment or identical mis-alignment at the beginning.
6156 * Let's try and do a small unaligned copy first (if needed)
6157 * and then an aligned copy for the rest.
6158 */
6159 if (!page_aligned(dst_addr)) {
6160 head_addr = dst_addr;
6161 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6162 }
6163 if (!page_aligned(copy->offset + copy->size)) {
6164 /*
6165 * Mis-alignment at the end.
6166 * Do an aligned copy up to the last page and
6167 * then an unaligned copy for the remaining bytes.
6168 */
6169 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6170 tail_addr = dst_addr + copy->size - tail_size;
6171 }
6172
6173 if (head_size + tail_size == copy->size) {
6174 /*
6175 * It's all unaligned, no optimization possible...
6176 */
6177 goto blunt_copy;
6178 }
6179
6180 /*
6181 * Can't optimize if there are any submaps in the
6182 * destination due to the way we free the "copy" map
6183 * progressively in vm_map_copy_overwrite_nested()
6184 * in that case.
6185 */
6186 vm_map_lock_read(dst_map);
6187 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6188 vm_map_unlock_read(dst_map);
6189 goto blunt_copy;
6190 }
6191 for (;
6192 (entry != vm_map_copy_to_entry(copy) &&
6193 entry->vme_start < dst_addr + copy->size);
6194 entry = entry->vme_next) {
6195 if (entry->is_sub_map) {
6196 vm_map_unlock_read(dst_map);
6197 goto blunt_copy;
6198 }
6199 }
6200 vm_map_unlock_read(dst_map);
6201
6202 if (head_size) {
6203 /*
6204 * Unaligned copy of the first "head_size" bytes, to reach
6205 * a page boundary.
6206 */
6207
6208 /*
6209 * Extract "head_copy" out of "copy".
6210 */
6211 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6212 vm_map_copy_first_entry(head_copy) =
6213 vm_map_copy_to_entry(head_copy);
6214 vm_map_copy_last_entry(head_copy) =
6215 vm_map_copy_to_entry(head_copy);
6216 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6217 head_copy->cpy_hdr.nentries = 0;
6218 head_copy->cpy_hdr.entries_pageable =
6219 copy->cpy_hdr.entries_pageable;
6220 vm_map_store_init(&head_copy->cpy_hdr);
6221
6222 head_copy->offset = copy->offset;
6223 head_copy->size = head_size;
6224
6225 copy->offset += head_size;
6226 copy->size -= head_size;
6227
6228 entry = vm_map_copy_first_entry(copy);
6229 vm_map_copy_clip_end(copy, entry, copy->offset);
6230 vm_map_copy_entry_unlink(copy, entry);
6231 vm_map_copy_entry_link(head_copy,
6232 vm_map_copy_to_entry(head_copy),
6233 entry);
6234
6235 /*
6236 * Do the unaligned copy.
6237 */
6238 kr = vm_map_copy_overwrite_nested(dst_map,
6239 head_addr,
6240 head_copy,
6241 interruptible,
6242 (pmap_t) NULL,
6243 FALSE);
6244 if (kr != KERN_SUCCESS)
6245 goto done;
6246 }
6247
6248 if (tail_size) {
6249 /*
6250 * Extract "tail_copy" out of "copy".
6251 */
6252 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6253 vm_map_copy_first_entry(tail_copy) =
6254 vm_map_copy_to_entry(tail_copy);
6255 vm_map_copy_last_entry(tail_copy) =
6256 vm_map_copy_to_entry(tail_copy);
6257 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6258 tail_copy->cpy_hdr.nentries = 0;
6259 tail_copy->cpy_hdr.entries_pageable =
6260 copy->cpy_hdr.entries_pageable;
6261 vm_map_store_init(&tail_copy->cpy_hdr);
6262
6263 tail_copy->offset = copy->offset + copy->size - tail_size;
6264 tail_copy->size = tail_size;
6265
6266 copy->size -= tail_size;
6267
6268 entry = vm_map_copy_last_entry(copy);
6269 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6270 entry = vm_map_copy_last_entry(copy);
6271 vm_map_copy_entry_unlink(copy, entry);
6272 vm_map_copy_entry_link(tail_copy,
6273 vm_map_copy_last_entry(tail_copy),
6274 entry);
6275 }
6276
6277 /*
6278 * Copy most (or possibly all) of the data.
6279 */
6280 kr = vm_map_copy_overwrite_nested(dst_map,
6281 dst_addr + head_size,
6282 copy,
6283 interruptible,
6284 (pmap_t) NULL,
6285 FALSE);
6286 if (kr != KERN_SUCCESS) {
6287 goto done;
6288 }
6289
6290 if (tail_size) {
6291 kr = vm_map_copy_overwrite_nested(dst_map,
6292 tail_addr,
6293 tail_copy,
6294 interruptible,
6295 (pmap_t) NULL,
6296 FALSE);
6297 }
6298
6299 done:
6300 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6301 if (kr == KERN_SUCCESS) {
6302 /*
6303 * Discard all the copy maps.
6304 */
6305 if (head_copy) {
6306 vm_map_copy_discard(head_copy);
6307 head_copy = NULL;
6308 }
6309 vm_map_copy_discard(copy);
6310 if (tail_copy) {
6311 vm_map_copy_discard(tail_copy);
6312 tail_copy = NULL;
6313 }
6314 } else {
6315 /*
6316 * Re-assemble the original copy map.
6317 */
6318 if (head_copy) {
6319 entry = vm_map_copy_first_entry(head_copy);
6320 vm_map_copy_entry_unlink(head_copy, entry);
6321 vm_map_copy_entry_link(copy,
6322 vm_map_copy_to_entry(copy),
6323 entry);
6324 copy->offset -= head_size;
6325 copy->size += head_size;
6326 vm_map_copy_discard(head_copy);
6327 head_copy = NULL;
6328 }
6329 if (tail_copy) {
6330 entry = vm_map_copy_last_entry(tail_copy);
6331 vm_map_copy_entry_unlink(tail_copy, entry);
6332 vm_map_copy_entry_link(copy,
6333 vm_map_copy_last_entry(copy),
6334 entry);
6335 copy->size += tail_size;
6336 vm_map_copy_discard(tail_copy);
6337 tail_copy = NULL;
6338 }
6339 }
6340 return kr;
6341 }
6342
6343
6344 /*
6345 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6346 *
6347 * Decription:
6348 * Physically copy unaligned data
6349 *
6350 * Implementation:
6351 * Unaligned parts of pages have to be physically copied. We use
6352 * a modified form of vm_fault_copy (which understands none-aligned
6353 * page offsets and sizes) to do the copy. We attempt to copy as
6354 * much memory in one go as possibly, however vm_fault_copy copies
6355 * within 1 memory object so we have to find the smaller of "amount left"
6356 * "source object data size" and "target object data size". With
6357 * unaligned data we don't need to split regions, therefore the source
6358 * (copy) object should be one map entry, the target range may be split
6359 * over multiple map entries however. In any event we are pessimistic
6360 * about these assumptions.
6361 *
6362 * Assumptions:
6363 * dst_map is locked on entry and is return locked on success,
6364 * unlocked on error.
6365 */
6366
6367 static kern_return_t
6368 vm_map_copy_overwrite_unaligned(
6369 vm_map_t dst_map,
6370 vm_map_entry_t entry,
6371 vm_map_copy_t copy,
6372 vm_map_offset_t start)
6373 {
6374 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6375 vm_map_version_t version;
6376 vm_object_t dst_object;
6377 vm_object_offset_t dst_offset;
6378 vm_object_offset_t src_offset;
6379 vm_object_offset_t entry_offset;
6380 vm_map_offset_t entry_end;
6381 vm_map_size_t src_size,
6382 dst_size,
6383 copy_size,
6384 amount_left;
6385 kern_return_t kr = KERN_SUCCESS;
6386
6387 vm_map_lock_write_to_read(dst_map);
6388
6389 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6390 amount_left = copy->size;
6391 /*
6392 * unaligned so we never clipped this entry, we need the offset into
6393 * the vm_object not just the data.
6394 */
6395 while (amount_left > 0) {
6396
6397 if (entry == vm_map_to_entry(dst_map)) {
6398 vm_map_unlock_read(dst_map);
6399 return KERN_INVALID_ADDRESS;
6400 }
6401
6402 /* "start" must be within the current map entry */
6403 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6404
6405 dst_offset = start - entry->vme_start;
6406
6407 dst_size = entry->vme_end - start;
6408
6409 src_size = copy_entry->vme_end -
6410 (copy_entry->vme_start + src_offset);
6411
6412 if (dst_size < src_size) {
6413 /*
6414 * we can only copy dst_size bytes before
6415 * we have to get the next destination entry
6416 */
6417 copy_size = dst_size;
6418 } else {
6419 /*
6420 * we can only copy src_size bytes before
6421 * we have to get the next source copy entry
6422 */
6423 copy_size = src_size;
6424 }
6425
6426 if (copy_size > amount_left) {
6427 copy_size = amount_left;
6428 }
6429 /*
6430 * Entry needs copy, create a shadow shadow object for
6431 * Copy on write region.
6432 */
6433 if (entry->needs_copy &&
6434 ((entry->protection & VM_PROT_WRITE) != 0))
6435 {
6436 if (vm_map_lock_read_to_write(dst_map)) {
6437 vm_map_lock_read(dst_map);
6438 goto RetryLookup;
6439 }
6440 vm_object_shadow(&entry->object.vm_object,
6441 &entry->offset,
6442 (vm_map_size_t)(entry->vme_end
6443 - entry->vme_start));
6444 entry->needs_copy = FALSE;
6445 vm_map_lock_write_to_read(dst_map);
6446 }
6447 dst_object = entry->object.vm_object;
6448 /*
6449 * unlike with the virtual (aligned) copy we're going
6450 * to fault on it therefore we need a target object.
6451 */
6452 if (dst_object == VM_OBJECT_NULL) {
6453 if (vm_map_lock_read_to_write(dst_map)) {
6454 vm_map_lock_read(dst_map);
6455 goto RetryLookup;
6456 }
6457 dst_object = vm_object_allocate((vm_map_size_t)
6458 entry->vme_end - entry->vme_start);
6459 entry->object.vm_object = dst_object;
6460 entry->offset = 0;
6461 vm_map_lock_write_to_read(dst_map);
6462 }
6463 /*
6464 * Take an object reference and unlock map. The "entry" may
6465 * disappear or change when the map is unlocked.
6466 */
6467 vm_object_reference(dst_object);
6468 version.main_timestamp = dst_map->timestamp;
6469 entry_offset = entry->offset;
6470 entry_end = entry->vme_end;
6471 vm_map_unlock_read(dst_map);
6472 /*
6473 * Copy as much as possible in one pass
6474 */
6475 kr = vm_fault_copy(
6476 copy_entry->object.vm_object,
6477 copy_entry->offset + src_offset,
6478 &copy_size,
6479 dst_object,
6480 entry_offset + dst_offset,
6481 dst_map,
6482 &version,
6483 THREAD_UNINT );
6484
6485 start += copy_size;
6486 src_offset += copy_size;
6487 amount_left -= copy_size;
6488 /*
6489 * Release the object reference
6490 */
6491 vm_object_deallocate(dst_object);
6492 /*
6493 * If a hard error occurred, return it now
6494 */
6495 if (kr != KERN_SUCCESS)
6496 return kr;
6497
6498 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6499 || amount_left == 0)
6500 {
6501 /*
6502 * all done with this copy entry, dispose.
6503 */
6504 vm_map_copy_entry_unlink(copy, copy_entry);
6505 vm_object_deallocate(copy_entry->object.vm_object);
6506 vm_map_copy_entry_dispose(copy, copy_entry);
6507
6508 if ((copy_entry = vm_map_copy_first_entry(copy))
6509 == vm_map_copy_to_entry(copy) && amount_left) {
6510 /*
6511 * not finished copying but run out of source
6512 */
6513 return KERN_INVALID_ADDRESS;
6514 }
6515 src_offset = 0;
6516 }
6517
6518 if (amount_left == 0)
6519 return KERN_SUCCESS;
6520
6521 vm_map_lock_read(dst_map);
6522 if (version.main_timestamp == dst_map->timestamp) {
6523 if (start == entry_end) {
6524 /*
6525 * destination region is split. Use the version
6526 * information to avoid a lookup in the normal
6527 * case.
6528 */
6529 entry = entry->vme_next;
6530 /*
6531 * should be contiguous. Fail if we encounter
6532 * a hole in the destination.
6533 */
6534 if (start != entry->vme_start) {
6535 vm_map_unlock_read(dst_map);
6536 return KERN_INVALID_ADDRESS ;
6537 }
6538 }
6539 } else {
6540 /*
6541 * Map version check failed.
6542 * we must lookup the entry because somebody
6543 * might have changed the map behind our backs.
6544 */
6545 RetryLookup:
6546 if (!vm_map_lookup_entry(dst_map, start, &entry))
6547 {
6548 vm_map_unlock_read(dst_map);
6549 return KERN_INVALID_ADDRESS ;
6550 }
6551 }
6552 }/* while */
6553
6554 return KERN_SUCCESS;
6555 }/* vm_map_copy_overwrite_unaligned */
6556
6557 /*
6558 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6559 *
6560 * Description:
6561 * Does all the vm_trickery possible for whole pages.
6562 *
6563 * Implementation:
6564 *
6565 * If there are no permanent objects in the destination,
6566 * and the source and destination map entry zones match,
6567 * and the destination map entry is not shared,
6568 * then the map entries can be deleted and replaced
6569 * with those from the copy. The following code is the
6570 * basic idea of what to do, but there are lots of annoying
6571 * little details about getting protection and inheritance
6572 * right. Should add protection, inheritance, and sharing checks
6573 * to the above pass and make sure that no wiring is involved.
6574 */
6575
6576 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6577 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6578 int vm_map_copy_overwrite_aligned_src_large = 0;
6579
6580 static kern_return_t
6581 vm_map_copy_overwrite_aligned(
6582 vm_map_t dst_map,
6583 vm_map_entry_t tmp_entry,
6584 vm_map_copy_t copy,
6585 vm_map_offset_t start,
6586 __unused pmap_t pmap)
6587 {
6588 vm_object_t object;
6589 vm_map_entry_t copy_entry;
6590 vm_map_size_t copy_size;
6591 vm_map_size_t size;
6592 vm_map_entry_t entry;
6593
6594 while ((copy_entry = vm_map_copy_first_entry(copy))
6595 != vm_map_copy_to_entry(copy))
6596 {
6597 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6598
6599 entry = tmp_entry;
6600 assert(!entry->use_pmap); /* unnested when clipped earlier */
6601 if (entry == vm_map_to_entry(dst_map)) {
6602 vm_map_unlock(dst_map);
6603 return KERN_INVALID_ADDRESS;
6604 }
6605 size = (entry->vme_end - entry->vme_start);
6606 /*
6607 * Make sure that no holes popped up in the
6608 * address map, and that the protection is
6609 * still valid, in case the map was unlocked
6610 * earlier.
6611 */
6612
6613 if ((entry->vme_start != start) || ((entry->is_sub_map)
6614 && !entry->needs_copy)) {
6615 vm_map_unlock(dst_map);
6616 return(KERN_INVALID_ADDRESS);
6617 }
6618 assert(entry != vm_map_to_entry(dst_map));
6619
6620 /*
6621 * Check protection again
6622 */
6623
6624 if ( ! (entry->protection & VM_PROT_WRITE)) {
6625 vm_map_unlock(dst_map);
6626 return(KERN_PROTECTION_FAILURE);
6627 }
6628
6629 /*
6630 * Adjust to source size first
6631 */
6632
6633 if (copy_size < size) {
6634 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6635 size = copy_size;
6636 }
6637
6638 /*
6639 * Adjust to destination size
6640 */
6641
6642 if (size < copy_size) {
6643 vm_map_copy_clip_end(copy, copy_entry,
6644 copy_entry->vme_start + size);
6645 copy_size = size;
6646 }
6647
6648 assert((entry->vme_end - entry->vme_start) == size);
6649 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6650 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6651
6652 /*
6653 * If the destination contains temporary unshared memory,
6654 * we can perform the copy by throwing it away and
6655 * installing the source data.
6656 */
6657
6658 object = entry->object.vm_object;
6659 if ((!entry->is_shared &&
6660 ((object == VM_OBJECT_NULL) ||
6661 (object->internal && !object->true_share))) ||
6662 entry->needs_copy) {
6663 vm_object_t old_object = entry->object.vm_object;
6664 vm_object_offset_t old_offset = entry->offset;
6665 vm_object_offset_t offset;
6666
6667 /*
6668 * Ensure that the source and destination aren't
6669 * identical
6670 */
6671 if (old_object == copy_entry->object.vm_object &&
6672 old_offset == copy_entry->offset) {
6673 vm_map_copy_entry_unlink(copy, copy_entry);
6674 vm_map_copy_entry_dispose(copy, copy_entry);
6675
6676 if (old_object != VM_OBJECT_NULL)
6677 vm_object_deallocate(old_object);
6678
6679 start = tmp_entry->vme_end;
6680 tmp_entry = tmp_entry->vme_next;
6681 continue;
6682 }
6683
6684 #if !CONFIG_EMBEDDED
6685 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6686 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
6687 if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6688 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6689 copy_size <= __TRADEOFF1_COPY_SIZE) {
6690 /*
6691 * Virtual vs. Physical copy tradeoff #1.
6692 *
6693 * Copying only a few pages out of a large
6694 * object: do a physical copy instead of
6695 * a virtual copy, to avoid possibly keeping
6696 * the entire large object alive because of
6697 * those few copy-on-write pages.
6698 */
6699 vm_map_copy_overwrite_aligned_src_large++;
6700 goto slow_copy;
6701 }
6702 #endif /* !CONFIG_EMBEDDED */
6703
6704 if (entry->alias >= VM_MEMORY_MALLOC &&
6705 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6706 vm_object_t new_object, new_shadow;
6707
6708 /*
6709 * We're about to map something over a mapping
6710 * established by malloc()...
6711 */
6712 new_object = copy_entry->object.vm_object;
6713 if (new_object != VM_OBJECT_NULL) {
6714 vm_object_lock_shared(new_object);
6715 }
6716 while (new_object != VM_OBJECT_NULL &&
6717 #if !CONFIG_EMBEDDED
6718 !new_object->true_share &&
6719 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6720 #endif /* !CONFIG_EMBEDDED */
6721 new_object->internal) {
6722 new_shadow = new_object->shadow;
6723 if (new_shadow == VM_OBJECT_NULL) {
6724 break;
6725 }
6726 vm_object_lock_shared(new_shadow);
6727 vm_object_unlock(new_object);
6728 new_object = new_shadow;
6729 }
6730 if (new_object != VM_OBJECT_NULL) {
6731 if (!new_object->internal) {
6732 /*
6733 * The new mapping is backed
6734 * by an external object. We
6735 * don't want malloc'ed memory
6736 * to be replaced with such a
6737 * non-anonymous mapping, so
6738 * let's go off the optimized
6739 * path...
6740 */
6741 vm_map_copy_overwrite_aligned_src_not_internal++;
6742 vm_object_unlock(new_object);
6743 goto slow_copy;
6744 }
6745 #if !CONFIG_EMBEDDED
6746 if (new_object->true_share ||
6747 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6748 /*
6749 * Same if there's a "true_share"
6750 * object in the shadow chain, or
6751 * an object with a non-default
6752 * (SYMMETRIC) copy strategy.
6753 */
6754 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6755 vm_object_unlock(new_object);
6756 goto slow_copy;
6757 }
6758 #endif /* !CONFIG_EMBEDDED */
6759 vm_object_unlock(new_object);
6760 }
6761 /*
6762 * The new mapping is still backed by
6763 * anonymous (internal) memory, so it's
6764 * OK to substitute it for the original
6765 * malloc() mapping.
6766 */
6767 }
6768
6769 if (old_object != VM_OBJECT_NULL) {
6770 if(entry->is_sub_map) {
6771 if(entry->use_pmap) {
6772 #ifndef NO_NESTED_PMAP
6773 pmap_unnest(dst_map->pmap,
6774 (addr64_t)entry->vme_start,
6775 entry->vme_end - entry->vme_start);
6776 #endif /* NO_NESTED_PMAP */
6777 if(dst_map->mapped) {
6778 /* clean up parent */
6779 /* map/maps */
6780 vm_map_submap_pmap_clean(
6781 dst_map, entry->vme_start,
6782 entry->vme_end,
6783 entry->object.sub_map,
6784 entry->offset);
6785 }
6786 } else {
6787 vm_map_submap_pmap_clean(
6788 dst_map, entry->vme_start,
6789 entry->vme_end,
6790 entry->object.sub_map,
6791 entry->offset);
6792 }
6793 vm_map_deallocate(
6794 entry->object.sub_map);
6795 } else {
6796 if(dst_map->mapped) {
6797 vm_object_pmap_protect(
6798 entry->object.vm_object,
6799 entry->offset,
6800 entry->vme_end
6801 - entry->vme_start,
6802 PMAP_NULL,
6803 entry->vme_start,
6804 VM_PROT_NONE);
6805 } else {
6806 pmap_remove(dst_map->pmap,
6807 (addr64_t)(entry->vme_start),
6808 (addr64_t)(entry->vme_end));
6809 }
6810 vm_object_deallocate(old_object);
6811 }
6812 }
6813
6814 entry->is_sub_map = FALSE;
6815 entry->object = copy_entry->object;
6816 object = entry->object.vm_object;
6817 entry->needs_copy = copy_entry->needs_copy;
6818 entry->wired_count = 0;
6819 entry->user_wired_count = 0;
6820 offset = entry->offset = copy_entry->offset;
6821
6822 vm_map_copy_entry_unlink(copy, copy_entry);
6823 vm_map_copy_entry_dispose(copy, copy_entry);
6824
6825 /*
6826 * we could try to push pages into the pmap at this point, BUT
6827 * this optimization only saved on average 2 us per page if ALL
6828 * the pages in the source were currently mapped
6829 * and ALL the pages in the dest were touched, if there were fewer
6830 * than 2/3 of the pages touched, this optimization actually cost more cycles
6831 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6832 */
6833
6834 /*
6835 * Set up for the next iteration. The map
6836 * has not been unlocked, so the next
6837 * address should be at the end of this
6838 * entry, and the next map entry should be
6839 * the one following it.
6840 */
6841
6842 start = tmp_entry->vme_end;
6843 tmp_entry = tmp_entry->vme_next;
6844 } else {
6845 vm_map_version_t version;
6846 vm_object_t dst_object;
6847 vm_object_offset_t dst_offset;
6848 kern_return_t r;
6849
6850 slow_copy:
6851 if (entry->needs_copy) {
6852 vm_object_shadow(&entry->object.vm_object,
6853 &entry->offset,
6854 (entry->vme_end -
6855 entry->vme_start));
6856 entry->needs_copy = FALSE;
6857 }
6858
6859 dst_object = entry->object.vm_object;
6860 dst_offset = entry->offset;
6861
6862 /*
6863 * Take an object reference, and record
6864 * the map version information so that the
6865 * map can be safely unlocked.
6866 */
6867
6868 if (dst_object == VM_OBJECT_NULL) {
6869 /*
6870 * We would usually have just taken the
6871 * optimized path above if the destination
6872 * object has not been allocated yet. But we
6873 * now disable that optimization if the copy
6874 * entry's object is not backed by anonymous
6875 * memory to avoid replacing malloc'ed
6876 * (i.e. re-usable) anonymous memory with a
6877 * not-so-anonymous mapping.
6878 * So we have to handle this case here and
6879 * allocate a new VM object for this map entry.
6880 */
6881 dst_object = vm_object_allocate(
6882 entry->vme_end - entry->vme_start);
6883 dst_offset = 0;
6884 entry->object.vm_object = dst_object;
6885 entry->offset = dst_offset;
6886
6887 }
6888
6889 vm_object_reference(dst_object);
6890
6891 /* account for unlock bumping up timestamp */
6892 version.main_timestamp = dst_map->timestamp + 1;
6893
6894 vm_map_unlock(dst_map);
6895
6896 /*
6897 * Copy as much as possible in one pass
6898 */
6899
6900 copy_size = size;
6901 r = vm_fault_copy(
6902 copy_entry->object.vm_object,
6903 copy_entry->offset,
6904 &copy_size,
6905 dst_object,
6906 dst_offset,
6907 dst_map,
6908 &version,
6909 THREAD_UNINT );
6910
6911 /*
6912 * Release the object reference
6913 */
6914
6915 vm_object_deallocate(dst_object);
6916
6917 /*
6918 * If a hard error occurred, return it now
6919 */
6920
6921 if (r != KERN_SUCCESS)
6922 return(r);
6923
6924 if (copy_size != 0) {
6925 /*
6926 * Dispose of the copied region
6927 */
6928
6929 vm_map_copy_clip_end(copy, copy_entry,
6930 copy_entry->vme_start + copy_size);
6931 vm_map_copy_entry_unlink(copy, copy_entry);
6932 vm_object_deallocate(copy_entry->object.vm_object);
6933 vm_map_copy_entry_dispose(copy, copy_entry);
6934 }
6935
6936 /*
6937 * Pick up in the destination map where we left off.
6938 *
6939 * Use the version information to avoid a lookup
6940 * in the normal case.
6941 */
6942
6943 start += copy_size;
6944 vm_map_lock(dst_map);
6945 if (version.main_timestamp == dst_map->timestamp &&
6946 copy_size != 0) {
6947 /* We can safely use saved tmp_entry value */
6948
6949 vm_map_clip_end(dst_map, tmp_entry, start);
6950 tmp_entry = tmp_entry->vme_next;
6951 } else {
6952 /* Must do lookup of tmp_entry */
6953
6954 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6955 vm_map_unlock(dst_map);
6956 return(KERN_INVALID_ADDRESS);
6957 }
6958 vm_map_clip_start(dst_map, tmp_entry, start);
6959 }
6960 }
6961 }/* while */
6962
6963 return(KERN_SUCCESS);
6964 }/* vm_map_copy_overwrite_aligned */
6965
6966 /*
6967 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6968 *
6969 * Description:
6970 * Copy in data to a kernel buffer from space in the
6971 * source map. The original space may be optionally
6972 * deallocated.
6973 *
6974 * If successful, returns a new copy object.
6975 */
6976 static kern_return_t
6977 vm_map_copyin_kernel_buffer(
6978 vm_map_t src_map,
6979 vm_map_offset_t src_addr,
6980 vm_map_size_t len,
6981 boolean_t src_destroy,
6982 vm_map_copy_t *copy_result)
6983 {
6984 kern_return_t kr;
6985 vm_map_copy_t copy;
6986 vm_size_t kalloc_size;
6987
6988 if ((vm_size_t) len != len) {
6989 /* "len" is too big and doesn't fit in a "vm_size_t" */
6990 return KERN_RESOURCE_SHORTAGE;
6991 }
6992 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6993 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6994
6995 copy = (vm_map_copy_t) kalloc(kalloc_size);
6996 if (copy == VM_MAP_COPY_NULL) {
6997 return KERN_RESOURCE_SHORTAGE;
6998 }
6999 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7000 copy->size = len;
7001 copy->offset = 0;
7002 copy->cpy_kdata = (void *) (copy + 1);
7003 copy->cpy_kalloc_size = kalloc_size;
7004
7005 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7006 if (kr != KERN_SUCCESS) {
7007 kfree(copy, kalloc_size);
7008 return kr;
7009 }
7010 if (src_destroy) {
7011 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
7012 vm_map_round_page(src_addr + len),
7013 VM_MAP_REMOVE_INTERRUPTIBLE |
7014 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7015 (src_map == kernel_map) ?
7016 VM_MAP_REMOVE_KUNWIRE : 0);
7017 }
7018 *copy_result = copy;
7019 return KERN_SUCCESS;
7020 }
7021
7022 /*
7023 * Routine: vm_map_copyout_kernel_buffer [internal use only]
7024 *
7025 * Description:
7026 * Copy out data from a kernel buffer into space in the
7027 * destination map. The space may be otpionally dynamically
7028 * allocated.
7029 *
7030 * If successful, consumes the copy object.
7031 * Otherwise, the caller is responsible for it.
7032 */
7033 static int vm_map_copyout_kernel_buffer_failures = 0;
7034 static kern_return_t
7035 vm_map_copyout_kernel_buffer(
7036 vm_map_t map,
7037 vm_map_address_t *addr, /* IN/OUT */
7038 vm_map_copy_t copy,
7039 boolean_t overwrite)
7040 {
7041 kern_return_t kr = KERN_SUCCESS;
7042 thread_t thread = current_thread();
7043
7044 if (!overwrite) {
7045
7046 /*
7047 * Allocate space in the target map for the data
7048 */
7049 *addr = 0;
7050 kr = vm_map_enter(map,
7051 addr,
7052 vm_map_round_page(copy->size),
7053 (vm_map_offset_t) 0,
7054 VM_FLAGS_ANYWHERE,
7055 VM_OBJECT_NULL,
7056 (vm_object_offset_t) 0,
7057 FALSE,
7058 VM_PROT_DEFAULT,
7059 VM_PROT_ALL,
7060 VM_INHERIT_DEFAULT);
7061 if (kr != KERN_SUCCESS)
7062 return kr;
7063 }
7064
7065 /*
7066 * Copyout the data from the kernel buffer to the target map.
7067 */
7068 if (thread->map == map) {
7069
7070 /*
7071 * If the target map is the current map, just do
7072 * the copy.
7073 */
7074 assert((vm_size_t) copy->size == copy->size);
7075 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7076 kr = KERN_INVALID_ADDRESS;
7077 }
7078 }
7079 else {
7080 vm_map_t oldmap;
7081
7082 /*
7083 * If the target map is another map, assume the
7084 * target's address space identity for the duration
7085 * of the copy.
7086 */
7087 vm_map_reference(map);
7088 oldmap = vm_map_switch(map);
7089
7090 assert((vm_size_t) copy->size == copy->size);
7091 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7092 vm_map_copyout_kernel_buffer_failures++;
7093 kr = KERN_INVALID_ADDRESS;
7094 }
7095
7096 (void) vm_map_switch(oldmap);
7097 vm_map_deallocate(map);
7098 }
7099
7100 if (kr != KERN_SUCCESS) {
7101 /* the copy failed, clean up */
7102 if (!overwrite) {
7103 /*
7104 * Deallocate the space we allocated in the target map.
7105 */
7106 (void) vm_map_remove(map,
7107 vm_map_trunc_page(*addr),
7108 vm_map_round_page(*addr +
7109 vm_map_round_page(copy->size)),
7110 VM_MAP_NO_FLAGS);
7111 *addr = 0;
7112 }
7113 } else {
7114 /* copy was successful, dicard the copy structure */
7115 kfree(copy, copy->cpy_kalloc_size);
7116 }
7117
7118 return kr;
7119 }
7120
7121 /*
7122 * Macro: vm_map_copy_insert
7123 *
7124 * Description:
7125 * Link a copy chain ("copy") into a map at the
7126 * specified location (after "where").
7127 * Side effects:
7128 * The copy chain is destroyed.
7129 * Warning:
7130 * The arguments are evaluated multiple times.
7131 */
7132 #define vm_map_copy_insert(map, where, copy) \
7133 MACRO_BEGIN \
7134 vm_map_store_copy_insert(map, where, copy); \
7135 zfree(vm_map_copy_zone, copy); \
7136 MACRO_END
7137
7138 /*
7139 * Routine: vm_map_copyout
7140 *
7141 * Description:
7142 * Copy out a copy chain ("copy") into newly-allocated
7143 * space in the destination map.
7144 *
7145 * If successful, consumes the copy object.
7146 * Otherwise, the caller is responsible for it.
7147 */
7148 kern_return_t
7149 vm_map_copyout(
7150 vm_map_t dst_map,
7151 vm_map_address_t *dst_addr, /* OUT */
7152 vm_map_copy_t copy)
7153 {
7154 vm_map_size_t size;
7155 vm_map_size_t adjustment;
7156 vm_map_offset_t start;
7157 vm_object_offset_t vm_copy_start;
7158 vm_map_entry_t last;
7159 register
7160 vm_map_entry_t entry;
7161
7162 /*
7163 * Check for null copy object.
7164 */
7165
7166 if (copy == VM_MAP_COPY_NULL) {
7167 *dst_addr = 0;
7168 return(KERN_SUCCESS);
7169 }
7170
7171 /*
7172 * Check for special copy object, created
7173 * by vm_map_copyin_object.
7174 */
7175
7176 if (copy->type == VM_MAP_COPY_OBJECT) {
7177 vm_object_t object = copy->cpy_object;
7178 kern_return_t kr;
7179 vm_object_offset_t offset;
7180
7181 offset = vm_object_trunc_page(copy->offset);
7182 size = vm_map_round_page(copy->size +
7183 (vm_map_size_t)(copy->offset - offset));
7184 *dst_addr = 0;
7185 kr = vm_map_enter(dst_map, dst_addr, size,
7186 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7187 object, offset, FALSE,
7188 VM_PROT_DEFAULT, VM_PROT_ALL,
7189 VM_INHERIT_DEFAULT);
7190 if (kr != KERN_SUCCESS)
7191 return(kr);
7192 /* Account for non-pagealigned copy object */
7193 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7194 zfree(vm_map_copy_zone, copy);
7195 return(KERN_SUCCESS);
7196 }
7197
7198 /*
7199 * Check for special kernel buffer allocated
7200 * by new_ipc_kmsg_copyin.
7201 */
7202
7203 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7204 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7205 copy, FALSE));
7206 }
7207
7208 /*
7209 * Find space for the data
7210 */
7211
7212 vm_copy_start = vm_object_trunc_page(copy->offset);
7213 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7214 - vm_copy_start;
7215
7216 StartAgain: ;
7217
7218 vm_map_lock(dst_map);
7219 if( dst_map->disable_vmentry_reuse == TRUE) {
7220 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7221 last = entry;
7222 } else {
7223 assert(first_free_is_valid(dst_map));
7224 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7225 vm_map_min(dst_map) : last->vme_end;
7226 }
7227
7228 while (TRUE) {
7229 vm_map_entry_t next = last->vme_next;
7230 vm_map_offset_t end = start + size;
7231
7232 if ((end > dst_map->max_offset) || (end < start)) {
7233 if (dst_map->wait_for_space) {
7234 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7235 assert_wait((event_t) dst_map,
7236 THREAD_INTERRUPTIBLE);
7237 vm_map_unlock(dst_map);
7238 thread_block(THREAD_CONTINUE_NULL);
7239 goto StartAgain;
7240 }
7241 }
7242 vm_map_unlock(dst_map);
7243 return(KERN_NO_SPACE);
7244 }
7245
7246 if ((next == vm_map_to_entry(dst_map)) ||
7247 (next->vme_start >= end))
7248 break;
7249
7250 last = next;
7251 start = last->vme_end;
7252 }
7253
7254 /*
7255 * Since we're going to just drop the map
7256 * entries from the copy into the destination
7257 * map, they must come from the same pool.
7258 */
7259
7260 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7261 /*
7262 * Mismatches occur when dealing with the default
7263 * pager.
7264 */
7265 zone_t old_zone;
7266 vm_map_entry_t next, new;
7267
7268 /*
7269 * Find the zone that the copies were allocated from
7270 */
7271
7272 entry = vm_map_copy_first_entry(copy);
7273
7274 /*
7275 * Reinitialize the copy so that vm_map_copy_entry_link
7276 * will work.
7277 */
7278 vm_map_store_copy_reset(copy, entry);
7279 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7280
7281 /*
7282 * Copy each entry.
7283 */
7284 while (entry != vm_map_copy_to_entry(copy)) {
7285 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7286 vm_map_entry_copy_full(new, entry);
7287 new->use_pmap = FALSE; /* clr address space specifics */
7288 vm_map_copy_entry_link(copy,
7289 vm_map_copy_last_entry(copy),
7290 new);
7291 next = entry->vme_next;
7292 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7293 zfree(old_zone, entry);
7294 entry = next;
7295 }
7296 }
7297
7298 /*
7299 * Adjust the addresses in the copy chain, and
7300 * reset the region attributes.
7301 */
7302
7303 adjustment = start - vm_copy_start;
7304 for (entry = vm_map_copy_first_entry(copy);
7305 entry != vm_map_copy_to_entry(copy);
7306 entry = entry->vme_next) {
7307 entry->vme_start += adjustment;
7308 entry->vme_end += adjustment;
7309
7310 entry->inheritance = VM_INHERIT_DEFAULT;
7311 entry->protection = VM_PROT_DEFAULT;
7312 entry->max_protection = VM_PROT_ALL;
7313 entry->behavior = VM_BEHAVIOR_DEFAULT;
7314
7315 /*
7316 * If the entry is now wired,
7317 * map the pages into the destination map.
7318 */
7319 if (entry->wired_count != 0) {
7320 register vm_map_offset_t va;
7321 vm_object_offset_t offset;
7322 register vm_object_t object;
7323 vm_prot_t prot;
7324 int type_of_fault;
7325
7326 object = entry->object.vm_object;
7327 offset = entry->offset;
7328 va = entry->vme_start;
7329
7330 pmap_pageable(dst_map->pmap,
7331 entry->vme_start,
7332 entry->vme_end,
7333 TRUE);
7334
7335 while (va < entry->vme_end) {
7336 register vm_page_t m;
7337
7338 /*
7339 * Look up the page in the object.
7340 * Assert that the page will be found in the
7341 * top object:
7342 * either
7343 * the object was newly created by
7344 * vm_object_copy_slowly, and has
7345 * copies of all of the pages from
7346 * the source object
7347 * or
7348 * the object was moved from the old
7349 * map entry; because the old map
7350 * entry was wired, all of the pages
7351 * were in the top-level object.
7352 * (XXX not true if we wire pages for
7353 * reading)
7354 */
7355 vm_object_lock(object);
7356
7357 m = vm_page_lookup(object, offset);
7358 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7359 m->absent)
7360 panic("vm_map_copyout: wiring %p", m);
7361
7362 /*
7363 * ENCRYPTED SWAP:
7364 * The page is assumed to be wired here, so it
7365 * shouldn't be encrypted. Otherwise, we
7366 * couldn't enter it in the page table, since
7367 * we don't want the user to see the encrypted
7368 * data.
7369 */
7370 ASSERT_PAGE_DECRYPTED(m);
7371
7372 prot = entry->protection;
7373
7374 if (override_nx(dst_map, entry->alias) && prot)
7375 prot |= VM_PROT_EXECUTE;
7376
7377 type_of_fault = DBG_CACHE_HIT_FAULT;
7378
7379 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7380 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7381 &type_of_fault);
7382
7383 vm_object_unlock(object);
7384
7385 offset += PAGE_SIZE_64;
7386 va += PAGE_SIZE;
7387 }
7388 }
7389 }
7390
7391 /*
7392 * Correct the page alignment for the result
7393 */
7394
7395 *dst_addr = start + (copy->offset - vm_copy_start);
7396
7397 /*
7398 * Update the hints and the map size
7399 */
7400
7401 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7402
7403 dst_map->size += size;
7404
7405 /*
7406 * Link in the copy
7407 */
7408
7409 vm_map_copy_insert(dst_map, last, copy);
7410
7411 vm_map_unlock(dst_map);
7412
7413 /*
7414 * XXX If wiring_required, call vm_map_pageable
7415 */
7416
7417 return(KERN_SUCCESS);
7418 }
7419
7420 /*
7421 * Routine: vm_map_copyin
7422 *
7423 * Description:
7424 * see vm_map_copyin_common. Exported via Unsupported.exports.
7425 *
7426 */
7427
7428 #undef vm_map_copyin
7429
7430 kern_return_t
7431 vm_map_copyin(
7432 vm_map_t src_map,
7433 vm_map_address_t src_addr,
7434 vm_map_size_t len,
7435 boolean_t src_destroy,
7436 vm_map_copy_t *copy_result) /* OUT */
7437 {
7438 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7439 FALSE, copy_result, FALSE));
7440 }
7441
7442 /*
7443 * Routine: vm_map_copyin_common
7444 *
7445 * Description:
7446 * Copy the specified region (src_addr, len) from the
7447 * source address space (src_map), possibly removing
7448 * the region from the source address space (src_destroy).
7449 *
7450 * Returns:
7451 * A vm_map_copy_t object (copy_result), suitable for
7452 * insertion into another address space (using vm_map_copyout),
7453 * copying over another address space region (using
7454 * vm_map_copy_overwrite). If the copy is unused, it
7455 * should be destroyed (using vm_map_copy_discard).
7456 *
7457 * In/out conditions:
7458 * The source map should not be locked on entry.
7459 */
7460
7461 typedef struct submap_map {
7462 vm_map_t parent_map;
7463 vm_map_offset_t base_start;
7464 vm_map_offset_t base_end;
7465 vm_map_size_t base_len;
7466 struct submap_map *next;
7467 } submap_map_t;
7468
7469 kern_return_t
7470 vm_map_copyin_common(
7471 vm_map_t src_map,
7472 vm_map_address_t src_addr,
7473 vm_map_size_t len,
7474 boolean_t src_destroy,
7475 __unused boolean_t src_volatile,
7476 vm_map_copy_t *copy_result, /* OUT */
7477 boolean_t use_maxprot)
7478 {
7479 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7480 * in multi-level lookup, this
7481 * entry contains the actual
7482 * vm_object/offset.
7483 */
7484 register
7485 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7486
7487 vm_map_offset_t src_start; /* Start of current entry --
7488 * where copy is taking place now
7489 */
7490 vm_map_offset_t src_end; /* End of entire region to be
7491 * copied */
7492 vm_map_offset_t src_base;
7493 vm_map_t base_map = src_map;
7494 boolean_t map_share=FALSE;
7495 submap_map_t *parent_maps = NULL;
7496
7497 register
7498 vm_map_copy_t copy; /* Resulting copy */
7499 vm_map_address_t copy_addr;
7500
7501 /*
7502 * Check for copies of zero bytes.
7503 */
7504
7505 if (len == 0) {
7506 *copy_result = VM_MAP_COPY_NULL;
7507 return(KERN_SUCCESS);
7508 }
7509
7510 /*
7511 * Check that the end address doesn't overflow
7512 */
7513 src_end = src_addr + len;
7514 if (src_end < src_addr)
7515 return KERN_INVALID_ADDRESS;
7516
7517 /*
7518 * If the copy is sufficiently small, use a kernel buffer instead
7519 * of making a virtual copy. The theory being that the cost of
7520 * setting up VM (and taking C-O-W faults) dominates the copy costs
7521 * for small regions.
7522 */
7523 if ((len < msg_ool_size_small) && !use_maxprot)
7524 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7525 src_destroy, copy_result);
7526
7527 /*
7528 * Compute (page aligned) start and end of region
7529 */
7530 src_start = vm_map_trunc_page(src_addr);
7531 src_end = vm_map_round_page(src_end);
7532
7533 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7534
7535 /*
7536 * Allocate a header element for the list.
7537 *
7538 * Use the start and end in the header to
7539 * remember the endpoints prior to rounding.
7540 */
7541
7542 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7543 vm_map_copy_first_entry(copy) =
7544 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7545 copy->type = VM_MAP_COPY_ENTRY_LIST;
7546 copy->cpy_hdr.nentries = 0;
7547 copy->cpy_hdr.entries_pageable = TRUE;
7548
7549 vm_map_store_init( &(copy->cpy_hdr) );
7550
7551 copy->offset = src_addr;
7552 copy->size = len;
7553
7554 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7555
7556 #define RETURN(x) \
7557 MACRO_BEGIN \
7558 vm_map_unlock(src_map); \
7559 if(src_map != base_map) \
7560 vm_map_deallocate(src_map); \
7561 if (new_entry != VM_MAP_ENTRY_NULL) \
7562 vm_map_copy_entry_dispose(copy,new_entry); \
7563 vm_map_copy_discard(copy); \
7564 { \
7565 submap_map_t *_ptr; \
7566 \
7567 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7568 parent_maps=parent_maps->next; \
7569 if (_ptr->parent_map != base_map) \
7570 vm_map_deallocate(_ptr->parent_map); \
7571 kfree(_ptr, sizeof(submap_map_t)); \
7572 } \
7573 } \
7574 MACRO_RETURN(x); \
7575 MACRO_END
7576
7577 /*
7578 * Find the beginning of the region.
7579 */
7580
7581 vm_map_lock(src_map);
7582
7583 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7584 RETURN(KERN_INVALID_ADDRESS);
7585 if(!tmp_entry->is_sub_map) {
7586 vm_map_clip_start(src_map, tmp_entry, src_start);
7587 }
7588 /* set for later submap fix-up */
7589 copy_addr = src_start;
7590
7591 /*
7592 * Go through entries until we get to the end.
7593 */
7594
7595 while (TRUE) {
7596 register
7597 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7598 vm_map_size_t src_size; /* Size of source
7599 * map entry (in both
7600 * maps)
7601 */
7602
7603 register
7604 vm_object_t src_object; /* Object to copy */
7605 vm_object_offset_t src_offset;
7606
7607 boolean_t src_needs_copy; /* Should source map
7608 * be made read-only
7609 * for copy-on-write?
7610 */
7611
7612 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7613
7614 boolean_t was_wired; /* Was source wired? */
7615 vm_map_version_t version; /* Version before locks
7616 * dropped to make copy
7617 */
7618 kern_return_t result; /* Return value from
7619 * copy_strategically.
7620 */
7621 while(tmp_entry->is_sub_map) {
7622 vm_map_size_t submap_len;
7623 submap_map_t *ptr;
7624
7625 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7626 ptr->next = parent_maps;
7627 parent_maps = ptr;
7628 ptr->parent_map = src_map;
7629 ptr->base_start = src_start;
7630 ptr->base_end = src_end;
7631 submap_len = tmp_entry->vme_end - src_start;
7632 if(submap_len > (src_end-src_start))
7633 submap_len = src_end-src_start;
7634 ptr->base_len = submap_len;
7635
7636 src_start -= tmp_entry->vme_start;
7637 src_start += tmp_entry->offset;
7638 src_end = src_start + submap_len;
7639 src_map = tmp_entry->object.sub_map;
7640 vm_map_lock(src_map);
7641 /* keep an outstanding reference for all maps in */
7642 /* the parents tree except the base map */
7643 vm_map_reference(src_map);
7644 vm_map_unlock(ptr->parent_map);
7645 if (!vm_map_lookup_entry(
7646 src_map, src_start, &tmp_entry))
7647 RETURN(KERN_INVALID_ADDRESS);
7648 map_share = TRUE;
7649 if(!tmp_entry->is_sub_map)
7650 vm_map_clip_start(src_map, tmp_entry, src_start);
7651 src_entry = tmp_entry;
7652 }
7653 /* we are now in the lowest level submap... */
7654
7655 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7656 (tmp_entry->object.vm_object->phys_contiguous)) {
7657 /* This is not, supported for now.In future */
7658 /* we will need to detect the phys_contig */
7659 /* condition and then upgrade copy_slowly */
7660 /* to do physical copy from the device mem */
7661 /* based object. We can piggy-back off of */
7662 /* the was wired boolean to set-up the */
7663 /* proper handling */
7664 RETURN(KERN_PROTECTION_FAILURE);
7665 }
7666 /*
7667 * Create a new address map entry to hold the result.
7668 * Fill in the fields from the appropriate source entries.
7669 * We must unlock the source map to do this if we need
7670 * to allocate a map entry.
7671 */
7672 if (new_entry == VM_MAP_ENTRY_NULL) {
7673 version.main_timestamp = src_map->timestamp;
7674 vm_map_unlock(src_map);
7675
7676 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7677
7678 vm_map_lock(src_map);
7679 if ((version.main_timestamp + 1) != src_map->timestamp) {
7680 if (!vm_map_lookup_entry(src_map, src_start,
7681 &tmp_entry)) {
7682 RETURN(KERN_INVALID_ADDRESS);
7683 }
7684 if (!tmp_entry->is_sub_map)
7685 vm_map_clip_start(src_map, tmp_entry, src_start);
7686 continue; /* restart w/ new tmp_entry */
7687 }
7688 }
7689
7690 /*
7691 * Verify that the region can be read.
7692 */
7693 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7694 !use_maxprot) ||
7695 (src_entry->max_protection & VM_PROT_READ) == 0)
7696 RETURN(KERN_PROTECTION_FAILURE);
7697
7698 /*
7699 * Clip against the endpoints of the entire region.
7700 */
7701
7702 vm_map_clip_end(src_map, src_entry, src_end);
7703
7704 src_size = src_entry->vme_end - src_start;
7705 src_object = src_entry->object.vm_object;
7706 src_offset = src_entry->offset;
7707 was_wired = (src_entry->wired_count != 0);
7708
7709 vm_map_entry_copy(new_entry, src_entry);
7710 new_entry->use_pmap = FALSE; /* clr address space specifics */
7711
7712 /*
7713 * Attempt non-blocking copy-on-write optimizations.
7714 */
7715
7716 if (src_destroy &&
7717 (src_object == VM_OBJECT_NULL ||
7718 (src_object->internal && !src_object->true_share
7719 && !map_share))) {
7720 /*
7721 * If we are destroying the source, and the object
7722 * is internal, we can move the object reference
7723 * from the source to the copy. The copy is
7724 * copy-on-write only if the source is.
7725 * We make another reference to the object, because
7726 * destroying the source entry will deallocate it.
7727 */
7728 vm_object_reference(src_object);
7729
7730 /*
7731 * Copy is always unwired. vm_map_copy_entry
7732 * set its wired count to zero.
7733 */
7734
7735 goto CopySuccessful;
7736 }
7737
7738
7739 RestartCopy:
7740 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7741 src_object, new_entry, new_entry->object.vm_object,
7742 was_wired, 0);
7743 if ((src_object == VM_OBJECT_NULL ||
7744 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7745 vm_object_copy_quickly(
7746 &new_entry->object.vm_object,
7747 src_offset,
7748 src_size,
7749 &src_needs_copy,
7750 &new_entry_needs_copy)) {
7751
7752 new_entry->needs_copy = new_entry_needs_copy;
7753
7754 /*
7755 * Handle copy-on-write obligations
7756 */
7757
7758 if (src_needs_copy && !tmp_entry->needs_copy) {
7759 vm_prot_t prot;
7760
7761 prot = src_entry->protection & ~VM_PROT_WRITE;
7762
7763 if (override_nx(src_map, src_entry->alias) && prot)
7764 prot |= VM_PROT_EXECUTE;
7765
7766 vm_object_pmap_protect(
7767 src_object,
7768 src_offset,
7769 src_size,
7770 (src_entry->is_shared ?
7771 PMAP_NULL
7772 : src_map->pmap),
7773 src_entry->vme_start,
7774 prot);
7775
7776 tmp_entry->needs_copy = TRUE;
7777 }
7778
7779 /*
7780 * The map has never been unlocked, so it's safe
7781 * to move to the next entry rather than doing
7782 * another lookup.
7783 */
7784
7785 goto CopySuccessful;
7786 }
7787
7788 /*
7789 * Take an object reference, so that we may
7790 * release the map lock(s).
7791 */
7792
7793 assert(src_object != VM_OBJECT_NULL);
7794 vm_object_reference(src_object);
7795
7796 /*
7797 * Record the timestamp for later verification.
7798 * Unlock the map.
7799 */
7800
7801 version.main_timestamp = src_map->timestamp;
7802 vm_map_unlock(src_map); /* Increments timestamp once! */
7803
7804 /*
7805 * Perform the copy
7806 */
7807
7808 if (was_wired) {
7809 CopySlowly:
7810 vm_object_lock(src_object);
7811 result = vm_object_copy_slowly(
7812 src_object,
7813 src_offset,
7814 src_size,
7815 THREAD_UNINT,
7816 &new_entry->object.vm_object);
7817 new_entry->offset = 0;
7818 new_entry->needs_copy = FALSE;
7819
7820 }
7821 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7822 (tmp_entry->is_shared || map_share)) {
7823 vm_object_t new_object;
7824
7825 vm_object_lock_shared(src_object);
7826 new_object = vm_object_copy_delayed(
7827 src_object,
7828 src_offset,
7829 src_size,
7830 TRUE);
7831 if (new_object == VM_OBJECT_NULL)
7832 goto CopySlowly;
7833
7834 new_entry->object.vm_object = new_object;
7835 new_entry->needs_copy = TRUE;
7836 result = KERN_SUCCESS;
7837
7838 } else {
7839 result = vm_object_copy_strategically(src_object,
7840 src_offset,
7841 src_size,
7842 &new_entry->object.vm_object,
7843 &new_entry->offset,
7844 &new_entry_needs_copy);
7845
7846 new_entry->needs_copy = new_entry_needs_copy;
7847 }
7848
7849 if (result != KERN_SUCCESS &&
7850 result != KERN_MEMORY_RESTART_COPY) {
7851 vm_map_lock(src_map);
7852 RETURN(result);
7853 }
7854
7855 /*
7856 * Throw away the extra reference
7857 */
7858
7859 vm_object_deallocate(src_object);
7860
7861 /*
7862 * Verify that the map has not substantially
7863 * changed while the copy was being made.
7864 */
7865
7866 vm_map_lock(src_map);
7867
7868 if ((version.main_timestamp + 1) == src_map->timestamp)
7869 goto VerificationSuccessful;
7870
7871 /*
7872 * Simple version comparison failed.
7873 *
7874 * Retry the lookup and verify that the
7875 * same object/offset are still present.
7876 *
7877 * [Note: a memory manager that colludes with
7878 * the calling task can detect that we have
7879 * cheated. While the map was unlocked, the
7880 * mapping could have been changed and restored.]
7881 */
7882
7883 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7884 RETURN(KERN_INVALID_ADDRESS);
7885 }
7886
7887 src_entry = tmp_entry;
7888 vm_map_clip_start(src_map, src_entry, src_start);
7889
7890 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7891 !use_maxprot) ||
7892 ((src_entry->max_protection & VM_PROT_READ) == 0))
7893 goto VerificationFailed;
7894
7895 if (src_entry->vme_end < new_entry->vme_end)
7896 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7897
7898 if ((src_entry->object.vm_object != src_object) ||
7899 (src_entry->offset != src_offset) ) {
7900
7901 /*
7902 * Verification failed.
7903 *
7904 * Start over with this top-level entry.
7905 */
7906
7907 VerificationFailed: ;
7908
7909 vm_object_deallocate(new_entry->object.vm_object);
7910 tmp_entry = src_entry;
7911 continue;
7912 }
7913
7914 /*
7915 * Verification succeeded.
7916 */
7917
7918 VerificationSuccessful: ;
7919
7920 if (result == KERN_MEMORY_RESTART_COPY)
7921 goto RestartCopy;
7922
7923 /*
7924 * Copy succeeded.
7925 */
7926
7927 CopySuccessful: ;
7928
7929 /*
7930 * Link in the new copy entry.
7931 */
7932
7933 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7934 new_entry);
7935
7936 /*
7937 * Determine whether the entire region
7938 * has been copied.
7939 */
7940 src_base = src_start;
7941 src_start = new_entry->vme_end;
7942 new_entry = VM_MAP_ENTRY_NULL;
7943 while ((src_start >= src_end) && (src_end != 0)) {
7944 if (src_map != base_map) {
7945 submap_map_t *ptr;
7946
7947 ptr = parent_maps;
7948 assert(ptr != NULL);
7949 parent_maps = parent_maps->next;
7950
7951 /* fix up the damage we did in that submap */
7952 vm_map_simplify_range(src_map,
7953 src_base,
7954 src_end);
7955
7956 vm_map_unlock(src_map);
7957 vm_map_deallocate(src_map);
7958 vm_map_lock(ptr->parent_map);
7959 src_map = ptr->parent_map;
7960 src_base = ptr->base_start;
7961 src_start = ptr->base_start + ptr->base_len;
7962 src_end = ptr->base_end;
7963 if ((src_end > src_start) &&
7964 !vm_map_lookup_entry(
7965 src_map, src_start, &tmp_entry))
7966 RETURN(KERN_INVALID_ADDRESS);
7967 kfree(ptr, sizeof(submap_map_t));
7968 if(parent_maps == NULL)
7969 map_share = FALSE;
7970 src_entry = tmp_entry->vme_prev;
7971 } else
7972 break;
7973 }
7974 if ((src_start >= src_end) && (src_end != 0))
7975 break;
7976
7977 /*
7978 * Verify that there are no gaps in the region
7979 */
7980
7981 tmp_entry = src_entry->vme_next;
7982 if ((tmp_entry->vme_start != src_start) ||
7983 (tmp_entry == vm_map_to_entry(src_map)))
7984 RETURN(KERN_INVALID_ADDRESS);
7985 }
7986
7987 /*
7988 * If the source should be destroyed, do it now, since the
7989 * copy was successful.
7990 */
7991 if (src_destroy) {
7992 (void) vm_map_delete(src_map,
7993 vm_map_trunc_page(src_addr),
7994 src_end,
7995 (src_map == kernel_map) ?
7996 VM_MAP_REMOVE_KUNWIRE :
7997 VM_MAP_NO_FLAGS,
7998 VM_MAP_NULL);
7999 } else {
8000 /* fix up the damage we did in the base map */
8001 vm_map_simplify_range(src_map,
8002 vm_map_trunc_page(src_addr),
8003 vm_map_round_page(src_end));
8004 }
8005
8006 vm_map_unlock(src_map);
8007
8008 /* Fix-up start and end points in copy. This is necessary */
8009 /* when the various entries in the copy object were picked */
8010 /* up from different sub-maps */
8011
8012 tmp_entry = vm_map_copy_first_entry(copy);
8013 while (tmp_entry != vm_map_copy_to_entry(copy)) {
8014 tmp_entry->vme_end = copy_addr +
8015 (tmp_entry->vme_end - tmp_entry->vme_start);
8016 tmp_entry->vme_start = copy_addr;
8017 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8018 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8019 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8020 }
8021
8022 *copy_result = copy;
8023 return(KERN_SUCCESS);
8024
8025 #undef RETURN
8026 }
8027
8028 /*
8029 * vm_map_copyin_object:
8030 *
8031 * Create a copy object from an object.
8032 * Our caller donates an object reference.
8033 */
8034
8035 kern_return_t
8036 vm_map_copyin_object(
8037 vm_object_t object,
8038 vm_object_offset_t offset, /* offset of region in object */
8039 vm_object_size_t size, /* size of region in object */
8040 vm_map_copy_t *copy_result) /* OUT */
8041 {
8042 vm_map_copy_t copy; /* Resulting copy */
8043
8044 /*
8045 * We drop the object into a special copy object
8046 * that contains the object directly.
8047 */
8048
8049 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8050 copy->type = VM_MAP_COPY_OBJECT;
8051 copy->cpy_object = object;
8052 copy->offset = offset;
8053 copy->size = size;
8054
8055 *copy_result = copy;
8056 return(KERN_SUCCESS);
8057 }
8058
8059 static void
8060 vm_map_fork_share(
8061 vm_map_t old_map,
8062 vm_map_entry_t old_entry,
8063 vm_map_t new_map)
8064 {
8065 vm_object_t object;
8066 vm_map_entry_t new_entry;
8067
8068 /*
8069 * New sharing code. New map entry
8070 * references original object. Internal
8071 * objects use asynchronous copy algorithm for
8072 * future copies. First make sure we have
8073 * the right object. If we need a shadow,
8074 * or someone else already has one, then
8075 * make a new shadow and share it.
8076 */
8077
8078 object = old_entry->object.vm_object;
8079 if (old_entry->is_sub_map) {
8080 assert(old_entry->wired_count == 0);
8081 #ifndef NO_NESTED_PMAP
8082 if(old_entry->use_pmap) {
8083 kern_return_t result;
8084
8085 result = pmap_nest(new_map->pmap,
8086 (old_entry->object.sub_map)->pmap,
8087 (addr64_t)old_entry->vme_start,
8088 (addr64_t)old_entry->vme_start,
8089 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8090 if(result)
8091 panic("vm_map_fork_share: pmap_nest failed!");
8092 }
8093 #endif /* NO_NESTED_PMAP */
8094 } else if (object == VM_OBJECT_NULL) {
8095 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8096 old_entry->vme_start));
8097 old_entry->offset = 0;
8098 old_entry->object.vm_object = object;
8099 assert(!old_entry->needs_copy);
8100 } else if (object->copy_strategy !=
8101 MEMORY_OBJECT_COPY_SYMMETRIC) {
8102
8103 /*
8104 * We are already using an asymmetric
8105 * copy, and therefore we already have
8106 * the right object.
8107 */
8108
8109 assert(! old_entry->needs_copy);
8110 }
8111 else if (old_entry->needs_copy || /* case 1 */
8112 object->shadowed || /* case 2 */
8113 (!object->true_share && /* case 3 */
8114 !old_entry->is_shared &&
8115 (object->vo_size >
8116 (vm_map_size_t)(old_entry->vme_end -
8117 old_entry->vme_start)))) {
8118
8119 /*
8120 * We need to create a shadow.
8121 * There are three cases here.
8122 * In the first case, we need to
8123 * complete a deferred symmetrical
8124 * copy that we participated in.
8125 * In the second and third cases,
8126 * we need to create the shadow so
8127 * that changes that we make to the
8128 * object do not interfere with
8129 * any symmetrical copies which
8130 * have occured (case 2) or which
8131 * might occur (case 3).
8132 *
8133 * The first case is when we had
8134 * deferred shadow object creation
8135 * via the entry->needs_copy mechanism.
8136 * This mechanism only works when
8137 * only one entry points to the source
8138 * object, and we are about to create
8139 * a second entry pointing to the
8140 * same object. The problem is that
8141 * there is no way of mapping from
8142 * an object to the entries pointing
8143 * to it. (Deferred shadow creation
8144 * works with one entry because occurs
8145 * at fault time, and we walk from the
8146 * entry to the object when handling
8147 * the fault.)
8148 *
8149 * The second case is when the object
8150 * to be shared has already been copied
8151 * with a symmetric copy, but we point
8152 * directly to the object without
8153 * needs_copy set in our entry. (This
8154 * can happen because different ranges
8155 * of an object can be pointed to by
8156 * different entries. In particular,
8157 * a single entry pointing to an object
8158 * can be split by a call to vm_inherit,
8159 * which, combined with task_create, can
8160 * result in the different entries
8161 * having different needs_copy values.)
8162 * The shadowed flag in the object allows
8163 * us to detect this case. The problem
8164 * with this case is that if this object
8165 * has or will have shadows, then we
8166 * must not perform an asymmetric copy
8167 * of this object, since such a copy
8168 * allows the object to be changed, which
8169 * will break the previous symmetrical
8170 * copies (which rely upon the object
8171 * not changing). In a sense, the shadowed
8172 * flag says "don't change this object".
8173 * We fix this by creating a shadow
8174 * object for this object, and sharing
8175 * that. This works because we are free
8176 * to change the shadow object (and thus
8177 * to use an asymmetric copy strategy);
8178 * this is also semantically correct,
8179 * since this object is temporary, and
8180 * therefore a copy of the object is
8181 * as good as the object itself. (This
8182 * is not true for permanent objects,
8183 * since the pager needs to see changes,
8184 * which won't happen if the changes
8185 * are made to a copy.)
8186 *
8187 * The third case is when the object
8188 * to be shared has parts sticking
8189 * outside of the entry we're working
8190 * with, and thus may in the future
8191 * be subject to a symmetrical copy.
8192 * (This is a preemptive version of
8193 * case 2.)
8194 */
8195 vm_object_shadow(&old_entry->object.vm_object,
8196 &old_entry->offset,
8197 (vm_map_size_t) (old_entry->vme_end -
8198 old_entry->vme_start));
8199
8200 /*
8201 * If we're making a shadow for other than
8202 * copy on write reasons, then we have
8203 * to remove write permission.
8204 */
8205
8206 if (!old_entry->needs_copy &&
8207 (old_entry->protection & VM_PROT_WRITE)) {
8208 vm_prot_t prot;
8209
8210 prot = old_entry->protection & ~VM_PROT_WRITE;
8211
8212 if (override_nx(old_map, old_entry->alias) && prot)
8213 prot |= VM_PROT_EXECUTE;
8214
8215 if (old_map->mapped) {
8216 vm_object_pmap_protect(
8217 old_entry->object.vm_object,
8218 old_entry->offset,
8219 (old_entry->vme_end -
8220 old_entry->vme_start),
8221 PMAP_NULL,
8222 old_entry->vme_start,
8223 prot);
8224 } else {
8225 pmap_protect(old_map->pmap,
8226 old_entry->vme_start,
8227 old_entry->vme_end,
8228 prot);
8229 }
8230 }
8231
8232 old_entry->needs_copy = FALSE;
8233 object = old_entry->object.vm_object;
8234 }
8235
8236
8237 /*
8238 * If object was using a symmetric copy strategy,
8239 * change its copy strategy to the default
8240 * asymmetric copy strategy, which is copy_delay
8241 * in the non-norma case and copy_call in the
8242 * norma case. Bump the reference count for the
8243 * new entry.
8244 */
8245
8246 if(old_entry->is_sub_map) {
8247 vm_map_lock(old_entry->object.sub_map);
8248 vm_map_reference(old_entry->object.sub_map);
8249 vm_map_unlock(old_entry->object.sub_map);
8250 } else {
8251 vm_object_lock(object);
8252 vm_object_reference_locked(object);
8253 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8254 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8255 }
8256 vm_object_unlock(object);
8257 }
8258
8259 /*
8260 * Clone the entry, using object ref from above.
8261 * Mark both entries as shared.
8262 */
8263
8264 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
8265 * map or descendants */
8266 vm_map_entry_copy(new_entry, old_entry);
8267 old_entry->is_shared = TRUE;
8268 new_entry->is_shared = TRUE;
8269
8270 /*
8271 * Insert the entry into the new map -- we
8272 * know we're inserting at the end of the new
8273 * map.
8274 */
8275
8276 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8277
8278 /*
8279 * Update the physical map
8280 */
8281
8282 if (old_entry->is_sub_map) {
8283 /* Bill Angell pmap support goes here */
8284 } else {
8285 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8286 old_entry->vme_end - old_entry->vme_start,
8287 old_entry->vme_start);
8288 }
8289 }
8290
8291 static boolean_t
8292 vm_map_fork_copy(
8293 vm_map_t old_map,
8294 vm_map_entry_t *old_entry_p,
8295 vm_map_t new_map)
8296 {
8297 vm_map_entry_t old_entry = *old_entry_p;
8298 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8299 vm_map_offset_t start = old_entry->vme_start;
8300 vm_map_copy_t copy;
8301 vm_map_entry_t last = vm_map_last_entry(new_map);
8302
8303 vm_map_unlock(old_map);
8304 /*
8305 * Use maxprot version of copyin because we
8306 * care about whether this memory can ever
8307 * be accessed, not just whether it's accessible
8308 * right now.
8309 */
8310 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8311 != KERN_SUCCESS) {
8312 /*
8313 * The map might have changed while it
8314 * was unlocked, check it again. Skip
8315 * any blank space or permanently
8316 * unreadable region.
8317 */
8318 vm_map_lock(old_map);
8319 if (!vm_map_lookup_entry(old_map, start, &last) ||
8320 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8321 last = last->vme_next;
8322 }
8323 *old_entry_p = last;
8324
8325 /*
8326 * XXX For some error returns, want to
8327 * XXX skip to the next element. Note
8328 * that INVALID_ADDRESS and
8329 * PROTECTION_FAILURE are handled above.
8330 */
8331
8332 return FALSE;
8333 }
8334
8335 /*
8336 * Insert the copy into the new map
8337 */
8338
8339 vm_map_copy_insert(new_map, last, copy);
8340
8341 /*
8342 * Pick up the traversal at the end of
8343 * the copied region.
8344 */
8345
8346 vm_map_lock(old_map);
8347 start += entry_size;
8348 if (! vm_map_lookup_entry(old_map, start, &last)) {
8349 last = last->vme_next;
8350 } else {
8351 if (last->vme_start == start) {
8352 /*
8353 * No need to clip here and we don't
8354 * want to cause any unnecessary
8355 * unnesting...
8356 */
8357 } else {
8358 vm_map_clip_start(old_map, last, start);
8359 }
8360 }
8361 *old_entry_p = last;
8362
8363 return TRUE;
8364 }
8365
8366 /*
8367 * vm_map_fork:
8368 *
8369 * Create and return a new map based on the old
8370 * map, according to the inheritance values on the
8371 * regions in that map.
8372 *
8373 * The source map must not be locked.
8374 */
8375 vm_map_t
8376 vm_map_fork(
8377 vm_map_t old_map)
8378 {
8379 pmap_t new_pmap;
8380 vm_map_t new_map;
8381 vm_map_entry_t old_entry;
8382 vm_map_size_t new_size = 0, entry_size;
8383 vm_map_entry_t new_entry;
8384 boolean_t src_needs_copy;
8385 boolean_t new_entry_needs_copy;
8386
8387 new_pmap = pmap_create((vm_map_size_t) 0,
8388 #if defined(__i386__) || defined(__x86_64__)
8389 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8390 #else
8391 0
8392 #endif
8393 );
8394 #if defined(__i386__)
8395 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8396 pmap_set_4GB_pagezero(new_pmap);
8397 #endif
8398
8399 vm_map_reference_swap(old_map);
8400 vm_map_lock(old_map);
8401
8402 new_map = vm_map_create(new_pmap,
8403 old_map->min_offset,
8404 old_map->max_offset,
8405 old_map->hdr.entries_pageable);
8406 for (
8407 old_entry = vm_map_first_entry(old_map);
8408 old_entry != vm_map_to_entry(old_map);
8409 ) {
8410
8411 entry_size = old_entry->vme_end - old_entry->vme_start;
8412
8413 switch (old_entry->inheritance) {
8414 case VM_INHERIT_NONE:
8415 break;
8416
8417 case VM_INHERIT_SHARE:
8418 vm_map_fork_share(old_map, old_entry, new_map);
8419 new_size += entry_size;
8420 break;
8421
8422 case VM_INHERIT_COPY:
8423
8424 /*
8425 * Inline the copy_quickly case;
8426 * upon failure, fall back on call
8427 * to vm_map_fork_copy.
8428 */
8429
8430 if(old_entry->is_sub_map)
8431 break;
8432 if ((old_entry->wired_count != 0) ||
8433 ((old_entry->object.vm_object != NULL) &&
8434 (old_entry->object.vm_object->true_share))) {
8435 goto slow_vm_map_fork_copy;
8436 }
8437
8438 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
8439 vm_map_entry_copy(new_entry, old_entry);
8440 /* clear address space specifics */
8441 new_entry->use_pmap = FALSE;
8442
8443 if (! vm_object_copy_quickly(
8444 &new_entry->object.vm_object,
8445 old_entry->offset,
8446 (old_entry->vme_end -
8447 old_entry->vme_start),
8448 &src_needs_copy,
8449 &new_entry_needs_copy)) {
8450 vm_map_entry_dispose(new_map, new_entry);
8451 goto slow_vm_map_fork_copy;
8452 }
8453
8454 /*
8455 * Handle copy-on-write obligations
8456 */
8457
8458 if (src_needs_copy && !old_entry->needs_copy) {
8459 vm_prot_t prot;
8460
8461 prot = old_entry->protection & ~VM_PROT_WRITE;
8462
8463 if (override_nx(old_map, old_entry->alias) && prot)
8464 prot |= VM_PROT_EXECUTE;
8465
8466 vm_object_pmap_protect(
8467 old_entry->object.vm_object,
8468 old_entry->offset,
8469 (old_entry->vme_end -
8470 old_entry->vme_start),
8471 ((old_entry->is_shared
8472 || old_map->mapped)
8473 ? PMAP_NULL :
8474 old_map->pmap),
8475 old_entry->vme_start,
8476 prot);
8477
8478 old_entry->needs_copy = TRUE;
8479 }
8480 new_entry->needs_copy = new_entry_needs_copy;
8481
8482 /*
8483 * Insert the entry at the end
8484 * of the map.
8485 */
8486
8487 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8488 new_entry);
8489 new_size += entry_size;
8490 break;
8491
8492 slow_vm_map_fork_copy:
8493 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8494 new_size += entry_size;
8495 }
8496 continue;
8497 }
8498 old_entry = old_entry->vme_next;
8499 }
8500
8501 new_map->size = new_size;
8502 vm_map_unlock(old_map);
8503 vm_map_deallocate(old_map);
8504
8505 return(new_map);
8506 }
8507
8508 /*
8509 * vm_map_exec:
8510 *
8511 * Setup the "new_map" with the proper execution environment according
8512 * to the type of executable (platform, 64bit, chroot environment).
8513 * Map the comm page and shared region, etc...
8514 */
8515 kern_return_t
8516 vm_map_exec(
8517 vm_map_t new_map,
8518 task_t task,
8519 void *fsroot,
8520 cpu_type_t cpu)
8521 {
8522 SHARED_REGION_TRACE_DEBUG(
8523 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8524 current_task(), new_map, task, fsroot, cpu));
8525 (void) vm_commpage_enter(new_map, task);
8526 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8527 SHARED_REGION_TRACE_DEBUG(
8528 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8529 current_task(), new_map, task, fsroot, cpu));
8530 return KERN_SUCCESS;
8531 }
8532
8533 /*
8534 * vm_map_lookup_locked:
8535 *
8536 * Finds the VM object, offset, and
8537 * protection for a given virtual address in the
8538 * specified map, assuming a page fault of the
8539 * type specified.
8540 *
8541 * Returns the (object, offset, protection) for
8542 * this address, whether it is wired down, and whether
8543 * this map has the only reference to the data in question.
8544 * In order to later verify this lookup, a "version"
8545 * is returned.
8546 *
8547 * The map MUST be locked by the caller and WILL be
8548 * locked on exit. In order to guarantee the
8549 * existence of the returned object, it is returned
8550 * locked.
8551 *
8552 * If a lookup is requested with "write protection"
8553 * specified, the map may be changed to perform virtual
8554 * copying operations, although the data referenced will
8555 * remain the same.
8556 */
8557 kern_return_t
8558 vm_map_lookup_locked(
8559 vm_map_t *var_map, /* IN/OUT */
8560 vm_map_offset_t vaddr,
8561 vm_prot_t fault_type,
8562 int object_lock_type,
8563 vm_map_version_t *out_version, /* OUT */
8564 vm_object_t *object, /* OUT */
8565 vm_object_offset_t *offset, /* OUT */
8566 vm_prot_t *out_prot, /* OUT */
8567 boolean_t *wired, /* OUT */
8568 vm_object_fault_info_t fault_info, /* OUT */
8569 vm_map_t *real_map)
8570 {
8571 vm_map_entry_t entry;
8572 register vm_map_t map = *var_map;
8573 vm_map_t old_map = *var_map;
8574 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8575 vm_map_offset_t cow_parent_vaddr = 0;
8576 vm_map_offset_t old_start = 0;
8577 vm_map_offset_t old_end = 0;
8578 register vm_prot_t prot;
8579 boolean_t mask_protections;
8580 vm_prot_t original_fault_type;
8581
8582 /*
8583 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8584 * as a mask against the mapping's actual protections, not as an
8585 * absolute value.
8586 */
8587 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8588 fault_type &= ~VM_PROT_IS_MASK;
8589 original_fault_type = fault_type;
8590
8591 *real_map = map;
8592
8593 RetryLookup:
8594 fault_type = original_fault_type;
8595
8596 /*
8597 * If the map has an interesting hint, try it before calling
8598 * full blown lookup routine.
8599 */
8600 entry = map->hint;
8601
8602 if ((entry == vm_map_to_entry(map)) ||
8603 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8604 vm_map_entry_t tmp_entry;
8605
8606 /*
8607 * Entry was either not a valid hint, or the vaddr
8608 * was not contained in the entry, so do a full lookup.
8609 */
8610 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8611 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8612 vm_map_unlock(cow_sub_map_parent);
8613 if((*real_map != map)
8614 && (*real_map != cow_sub_map_parent))
8615 vm_map_unlock(*real_map);
8616 return KERN_INVALID_ADDRESS;
8617 }
8618
8619 entry = tmp_entry;
8620 }
8621 if(map == old_map) {
8622 old_start = entry->vme_start;
8623 old_end = entry->vme_end;
8624 }
8625
8626 /*
8627 * Handle submaps. Drop lock on upper map, submap is
8628 * returned locked.
8629 */
8630
8631 submap_recurse:
8632 if (entry->is_sub_map) {
8633 vm_map_offset_t local_vaddr;
8634 vm_map_offset_t end_delta;
8635 vm_map_offset_t start_delta;
8636 vm_map_entry_t submap_entry;
8637 boolean_t mapped_needs_copy=FALSE;
8638
8639 local_vaddr = vaddr;
8640
8641 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8642 /* if real_map equals map we unlock below */
8643 if ((*real_map != map) &&
8644 (*real_map != cow_sub_map_parent))
8645 vm_map_unlock(*real_map);
8646 *real_map = entry->object.sub_map;
8647 }
8648
8649 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8650 if (!mapped_needs_copy) {
8651 if (vm_map_lock_read_to_write(map)) {
8652 vm_map_lock_read(map);
8653 /* XXX FBDP: entry still valid ? */
8654 if(*real_map == entry->object.sub_map)
8655 *real_map = map;
8656 goto RetryLookup;
8657 }
8658 vm_map_lock_read(entry->object.sub_map);
8659 cow_sub_map_parent = map;
8660 /* reset base to map before cow object */
8661 /* this is the map which will accept */
8662 /* the new cow object */
8663 old_start = entry->vme_start;
8664 old_end = entry->vme_end;
8665 cow_parent_vaddr = vaddr;
8666 mapped_needs_copy = TRUE;
8667 } else {
8668 vm_map_lock_read(entry->object.sub_map);
8669 if((cow_sub_map_parent != map) &&
8670 (*real_map != map))
8671 vm_map_unlock(map);
8672 }
8673 } else {
8674 vm_map_lock_read(entry->object.sub_map);
8675 /* leave map locked if it is a target */
8676 /* cow sub_map above otherwise, just */
8677 /* follow the maps down to the object */
8678 /* here we unlock knowing we are not */
8679 /* revisiting the map. */
8680 if((*real_map != map) && (map != cow_sub_map_parent))
8681 vm_map_unlock_read(map);
8682 }
8683
8684 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8685 *var_map = map = entry->object.sub_map;
8686
8687 /* calculate the offset in the submap for vaddr */
8688 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8689
8690 RetrySubMap:
8691 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8692 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8693 vm_map_unlock(cow_sub_map_parent);
8694 }
8695 if((*real_map != map)
8696 && (*real_map != cow_sub_map_parent)) {
8697 vm_map_unlock(*real_map);
8698 }
8699 *real_map = map;
8700 return KERN_INVALID_ADDRESS;
8701 }
8702
8703 /* find the attenuated shadow of the underlying object */
8704 /* on our target map */
8705
8706 /* in english the submap object may extend beyond the */
8707 /* region mapped by the entry or, may only fill a portion */
8708 /* of it. For our purposes, we only care if the object */
8709 /* doesn't fill. In this case the area which will */
8710 /* ultimately be clipped in the top map will only need */
8711 /* to be as big as the portion of the underlying entry */
8712 /* which is mapped */
8713 start_delta = submap_entry->vme_start > entry->offset ?
8714 submap_entry->vme_start - entry->offset : 0;
8715
8716 end_delta =
8717 (entry->offset + start_delta + (old_end - old_start)) <=
8718 submap_entry->vme_end ?
8719 0 : (entry->offset +
8720 (old_end - old_start))
8721 - submap_entry->vme_end;
8722
8723 old_start += start_delta;
8724 old_end -= end_delta;
8725
8726 if(submap_entry->is_sub_map) {
8727 entry = submap_entry;
8728 vaddr = local_vaddr;
8729 goto submap_recurse;
8730 }
8731
8732 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8733
8734 vm_object_t sub_object, copy_object;
8735 vm_object_offset_t copy_offset;
8736 vm_map_offset_t local_start;
8737 vm_map_offset_t local_end;
8738 boolean_t copied_slowly = FALSE;
8739
8740 if (vm_map_lock_read_to_write(map)) {
8741 vm_map_lock_read(map);
8742 old_start -= start_delta;
8743 old_end += end_delta;
8744 goto RetrySubMap;
8745 }
8746
8747
8748 sub_object = submap_entry->object.vm_object;
8749 if (sub_object == VM_OBJECT_NULL) {
8750 sub_object =
8751 vm_object_allocate(
8752 (vm_map_size_t)
8753 (submap_entry->vme_end -
8754 submap_entry->vme_start));
8755 submap_entry->object.vm_object = sub_object;
8756 submap_entry->offset = 0;
8757 }
8758 local_start = local_vaddr -
8759 (cow_parent_vaddr - old_start);
8760 local_end = local_vaddr +
8761 (old_end - cow_parent_vaddr);
8762 vm_map_clip_start(map, submap_entry, local_start);
8763 vm_map_clip_end(map, submap_entry, local_end);
8764 /* unnesting was done in vm_map_clip_start/end() */
8765 assert(!submap_entry->use_pmap);
8766
8767 /* This is the COW case, lets connect */
8768 /* an entry in our space to the underlying */
8769 /* object in the submap, bypassing the */
8770 /* submap. */
8771
8772
8773 if(submap_entry->wired_count != 0 ||
8774 (sub_object->copy_strategy ==
8775 MEMORY_OBJECT_COPY_NONE)) {
8776 vm_object_lock(sub_object);
8777 vm_object_copy_slowly(sub_object,
8778 submap_entry->offset,
8779 (submap_entry->vme_end -
8780 submap_entry->vme_start),
8781 FALSE,
8782 &copy_object);
8783 copied_slowly = TRUE;
8784 } else {
8785
8786 /* set up shadow object */
8787 copy_object = sub_object;
8788 vm_object_reference(copy_object);
8789 sub_object->shadowed = TRUE;
8790 submap_entry->needs_copy = TRUE;
8791
8792 prot = submap_entry->protection & ~VM_PROT_WRITE;
8793
8794 if (override_nx(map, submap_entry->alias) && prot)
8795 prot |= VM_PROT_EXECUTE;
8796
8797 vm_object_pmap_protect(
8798 sub_object,
8799 submap_entry->offset,
8800 submap_entry->vme_end -
8801 submap_entry->vme_start,
8802 (submap_entry->is_shared
8803 || map->mapped) ?
8804 PMAP_NULL : map->pmap,
8805 submap_entry->vme_start,
8806 prot);
8807 }
8808
8809 /*
8810 * Adjust the fault offset to the submap entry.
8811 */
8812 copy_offset = (local_vaddr -
8813 submap_entry->vme_start +
8814 submap_entry->offset);
8815
8816 /* This works diffently than the */
8817 /* normal submap case. We go back */
8818 /* to the parent of the cow map and*/
8819 /* clip out the target portion of */
8820 /* the sub_map, substituting the */
8821 /* new copy object, */
8822
8823 vm_map_unlock(map);
8824 local_start = old_start;
8825 local_end = old_end;
8826 map = cow_sub_map_parent;
8827 *var_map = cow_sub_map_parent;
8828 vaddr = cow_parent_vaddr;
8829 cow_sub_map_parent = NULL;
8830
8831 if(!vm_map_lookup_entry(map,
8832 vaddr, &entry)) {
8833 vm_object_deallocate(
8834 copy_object);
8835 vm_map_lock_write_to_read(map);
8836 return KERN_INVALID_ADDRESS;
8837 }
8838
8839 /* clip out the portion of space */
8840 /* mapped by the sub map which */
8841 /* corresponds to the underlying */
8842 /* object */
8843
8844 /*
8845 * Clip (and unnest) the smallest nested chunk
8846 * possible around the faulting address...
8847 */
8848 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8849 local_end = local_start + pmap_nesting_size_min;
8850 /*
8851 * ... but don't go beyond the "old_start" to "old_end"
8852 * range, to avoid spanning over another VM region
8853 * with a possibly different VM object and/or offset.
8854 */
8855 if (local_start < old_start) {
8856 local_start = old_start;
8857 }
8858 if (local_end > old_end) {
8859 local_end = old_end;
8860 }
8861 /*
8862 * Adjust copy_offset to the start of the range.
8863 */
8864 copy_offset -= (vaddr - local_start);
8865
8866 vm_map_clip_start(map, entry, local_start);
8867 vm_map_clip_end(map, entry, local_end);
8868 /* unnesting was done in vm_map_clip_start/end() */
8869 assert(!entry->use_pmap);
8870
8871 /* substitute copy object for */
8872 /* shared map entry */
8873 vm_map_deallocate(entry->object.sub_map);
8874 entry->is_sub_map = FALSE;
8875 entry->object.vm_object = copy_object;
8876
8877 /* propagate the submap entry's protections */
8878 entry->protection |= submap_entry->protection;
8879 entry->max_protection |= submap_entry->max_protection;
8880
8881 if(copied_slowly) {
8882 entry->offset = local_start - old_start;
8883 entry->needs_copy = FALSE;
8884 entry->is_shared = FALSE;
8885 } else {
8886 entry->offset = copy_offset;
8887 entry->needs_copy = TRUE;
8888 if(entry->inheritance == VM_INHERIT_SHARE)
8889 entry->inheritance = VM_INHERIT_COPY;
8890 if (map != old_map)
8891 entry->is_shared = TRUE;
8892 }
8893 if(entry->inheritance == VM_INHERIT_SHARE)
8894 entry->inheritance = VM_INHERIT_COPY;
8895
8896 vm_map_lock_write_to_read(map);
8897 } else {
8898 if((cow_sub_map_parent)
8899 && (cow_sub_map_parent != *real_map)
8900 && (cow_sub_map_parent != map)) {
8901 vm_map_unlock(cow_sub_map_parent);
8902 }
8903 entry = submap_entry;
8904 vaddr = local_vaddr;
8905 }
8906 }
8907
8908 /*
8909 * Check whether this task is allowed to have
8910 * this page.
8911 */
8912
8913 prot = entry->protection;
8914
8915 if (override_nx(map, entry->alias) && prot) {
8916 /*
8917 * HACK -- if not a stack, then allow execution
8918 */
8919 prot |= VM_PROT_EXECUTE;
8920 }
8921
8922 if (mask_protections) {
8923 fault_type &= prot;
8924 if (fault_type == VM_PROT_NONE) {
8925 goto protection_failure;
8926 }
8927 }
8928 if ((fault_type & (prot)) != fault_type) {
8929 protection_failure:
8930 if (*real_map != map) {
8931 vm_map_unlock(*real_map);
8932 }
8933 *real_map = map;
8934
8935 if ((fault_type & VM_PROT_EXECUTE) && prot)
8936 log_stack_execution_failure((addr64_t)vaddr, prot);
8937
8938 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8939 return KERN_PROTECTION_FAILURE;
8940 }
8941
8942 /*
8943 * If this page is not pageable, we have to get
8944 * it for all possible accesses.
8945 */
8946
8947 *wired = (entry->wired_count != 0);
8948 if (*wired)
8949 fault_type = prot;
8950
8951 /*
8952 * If the entry was copy-on-write, we either ...
8953 */
8954
8955 if (entry->needs_copy) {
8956 /*
8957 * If we want to write the page, we may as well
8958 * handle that now since we've got the map locked.
8959 *
8960 * If we don't need to write the page, we just
8961 * demote the permissions allowed.
8962 */
8963
8964 if ((fault_type & VM_PROT_WRITE) || *wired) {
8965 /*
8966 * Make a new object, and place it in the
8967 * object chain. Note that no new references
8968 * have appeared -- one just moved from the
8969 * map to the new object.
8970 */
8971
8972 if (vm_map_lock_read_to_write(map)) {
8973 vm_map_lock_read(map);
8974 goto RetryLookup;
8975 }
8976 vm_object_shadow(&entry->object.vm_object,
8977 &entry->offset,
8978 (vm_map_size_t) (entry->vme_end -
8979 entry->vme_start));
8980
8981 entry->object.vm_object->shadowed = TRUE;
8982 entry->needs_copy = FALSE;
8983 vm_map_lock_write_to_read(map);
8984 }
8985 else {
8986 /*
8987 * We're attempting to read a copy-on-write
8988 * page -- don't allow writes.
8989 */
8990
8991 prot &= (~VM_PROT_WRITE);
8992 }
8993 }
8994
8995 /*
8996 * Create an object if necessary.
8997 */
8998 if (entry->object.vm_object == VM_OBJECT_NULL) {
8999
9000 if (vm_map_lock_read_to_write(map)) {
9001 vm_map_lock_read(map);
9002 goto RetryLookup;
9003 }
9004
9005 entry->object.vm_object = vm_object_allocate(
9006 (vm_map_size_t)(entry->vme_end - entry->vme_start));
9007 entry->offset = 0;
9008 vm_map_lock_write_to_read(map);
9009 }
9010
9011 /*
9012 * Return the object/offset from this entry. If the entry
9013 * was copy-on-write or empty, it has been fixed up. Also
9014 * return the protection.
9015 */
9016
9017 *offset = (vaddr - entry->vme_start) + entry->offset;
9018 *object = entry->object.vm_object;
9019 *out_prot = prot;
9020
9021 if (fault_info) {
9022 fault_info->interruptible = THREAD_UNINT; /* for now... */
9023 /* ... the caller will change "interruptible" if needed */
9024 fault_info->cluster_size = 0;
9025 fault_info->user_tag = entry->alias;
9026 fault_info->behavior = entry->behavior;
9027 fault_info->lo_offset = entry->offset;
9028 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9029 fault_info->no_cache = entry->no_cache;
9030 fault_info->stealth = FALSE;
9031 fault_info->io_sync = FALSE;
9032 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9033 fault_info->mark_zf_absent = FALSE;
9034 }
9035
9036 /*
9037 * Lock the object to prevent it from disappearing
9038 */
9039 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9040 vm_object_lock(*object);
9041 else
9042 vm_object_lock_shared(*object);
9043
9044 /*
9045 * Save the version number
9046 */
9047
9048 out_version->main_timestamp = map->timestamp;
9049
9050 return KERN_SUCCESS;
9051 }
9052
9053
9054 /*
9055 * vm_map_verify:
9056 *
9057 * Verifies that the map in question has not changed
9058 * since the given version. If successful, the map
9059 * will not change until vm_map_verify_done() is called.
9060 */
9061 boolean_t
9062 vm_map_verify(
9063 register vm_map_t map,
9064 register vm_map_version_t *version) /* REF */
9065 {
9066 boolean_t result;
9067
9068 vm_map_lock_read(map);
9069 result = (map->timestamp == version->main_timestamp);
9070
9071 if (!result)
9072 vm_map_unlock_read(map);
9073
9074 return(result);
9075 }
9076
9077 /*
9078 * vm_map_verify_done:
9079 *
9080 * Releases locks acquired by a vm_map_verify.
9081 *
9082 * This is now a macro in vm/vm_map.h. It does a
9083 * vm_map_unlock_read on the map.
9084 */
9085
9086
9087 /*
9088 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9089 * Goes away after regular vm_region_recurse function migrates to
9090 * 64 bits
9091 * vm_region_recurse: A form of vm_region which follows the
9092 * submaps in a target map
9093 *
9094 */
9095
9096 kern_return_t
9097 vm_map_region_recurse_64(
9098 vm_map_t map,
9099 vm_map_offset_t *address, /* IN/OUT */
9100 vm_map_size_t *size, /* OUT */
9101 natural_t *nesting_depth, /* IN/OUT */
9102 vm_region_submap_info_64_t submap_info, /* IN/OUT */
9103 mach_msg_type_number_t *count) /* IN/OUT */
9104 {
9105 vm_region_extended_info_data_t extended;
9106 vm_map_entry_t tmp_entry;
9107 vm_map_offset_t user_address;
9108 unsigned int user_max_depth;
9109
9110 /*
9111 * "curr_entry" is the VM map entry preceding or including the
9112 * address we're looking for.
9113 * "curr_map" is the map or sub-map containing "curr_entry".
9114 * "curr_address" is the equivalent of the top map's "user_address"
9115 * in the current map.
9116 * "curr_offset" is the cumulated offset of "curr_map" in the
9117 * target task's address space.
9118 * "curr_depth" is the depth of "curr_map" in the chain of
9119 * sub-maps.
9120 *
9121 * "curr_max_below" and "curr_max_above" limit the range (around
9122 * "curr_address") we should take into account in the current (sub)map.
9123 * They limit the range to what's visible through the map entries
9124 * we've traversed from the top map to the current map.
9125
9126 */
9127 vm_map_entry_t curr_entry;
9128 vm_map_address_t curr_address;
9129 vm_map_offset_t curr_offset;
9130 vm_map_t curr_map;
9131 unsigned int curr_depth;
9132 vm_map_offset_t curr_max_below, curr_max_above;
9133 vm_map_offset_t curr_skip;
9134
9135 /*
9136 * "next_" is the same as "curr_" but for the VM region immediately
9137 * after the address we're looking for. We need to keep track of this
9138 * too because we want to return info about that region if the
9139 * address we're looking for is not mapped.
9140 */
9141 vm_map_entry_t next_entry;
9142 vm_map_offset_t next_offset;
9143 vm_map_offset_t next_address;
9144 vm_map_t next_map;
9145 unsigned int next_depth;
9146 vm_map_offset_t next_max_below, next_max_above;
9147 vm_map_offset_t next_skip;
9148
9149 boolean_t look_for_pages;
9150 vm_region_submap_short_info_64_t short_info;
9151
9152 if (map == VM_MAP_NULL) {
9153 /* no address space to work on */
9154 return KERN_INVALID_ARGUMENT;
9155 }
9156
9157 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9158 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9159 /*
9160 * "info" structure is not big enough and
9161 * would overflow
9162 */
9163 return KERN_INVALID_ARGUMENT;
9164 } else {
9165 look_for_pages = FALSE;
9166 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9167 short_info = (vm_region_submap_short_info_64_t) submap_info;
9168 submap_info = NULL;
9169 }
9170 } else {
9171 look_for_pages = TRUE;
9172 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9173 short_info = NULL;
9174 }
9175
9176
9177 user_address = *address;
9178 user_max_depth = *nesting_depth;
9179
9180 curr_entry = NULL;
9181 curr_map = map;
9182 curr_address = user_address;
9183 curr_offset = 0;
9184 curr_skip = 0;
9185 curr_depth = 0;
9186 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9187 curr_max_below = curr_address;
9188
9189 next_entry = NULL;
9190 next_map = NULL;
9191 next_address = 0;
9192 next_offset = 0;
9193 next_skip = 0;
9194 next_depth = 0;
9195 next_max_above = (vm_map_offset_t) -1;
9196 next_max_below = (vm_map_offset_t) -1;
9197
9198 if (not_in_kdp) {
9199 vm_map_lock_read(curr_map);
9200 }
9201
9202 for (;;) {
9203 if (vm_map_lookup_entry(curr_map,
9204 curr_address,
9205 &tmp_entry)) {
9206 /* tmp_entry contains the address we're looking for */
9207 curr_entry = tmp_entry;
9208 } else {
9209 vm_map_offset_t skip;
9210 /*
9211 * The address is not mapped. "tmp_entry" is the
9212 * map entry preceding the address. We want the next
9213 * one, if it exists.
9214 */
9215 curr_entry = tmp_entry->vme_next;
9216
9217 if (curr_entry == vm_map_to_entry(curr_map) ||
9218 (curr_entry->vme_start >=
9219 curr_address + curr_max_above)) {
9220 /* no next entry at this level: stop looking */
9221 if (not_in_kdp) {
9222 vm_map_unlock_read(curr_map);
9223 }
9224 curr_entry = NULL;
9225 curr_map = NULL;
9226 curr_offset = 0;
9227 curr_depth = 0;
9228 curr_max_above = 0;
9229 curr_max_below = 0;
9230 break;
9231 }
9232
9233 /* adjust current address and offset */
9234 skip = curr_entry->vme_start - curr_address;
9235 curr_address = curr_entry->vme_start;
9236 curr_skip = skip;
9237 curr_offset += skip;
9238 curr_max_above -= skip;
9239 curr_max_below = 0;
9240 }
9241
9242 /*
9243 * Is the next entry at this level closer to the address (or
9244 * deeper in the submap chain) than the one we had
9245 * so far ?
9246 */
9247 tmp_entry = curr_entry->vme_next;
9248 if (tmp_entry == vm_map_to_entry(curr_map)) {
9249 /* no next entry at this level */
9250 } else if (tmp_entry->vme_start >=
9251 curr_address + curr_max_above) {
9252 /*
9253 * tmp_entry is beyond the scope of what we mapped of
9254 * this submap in the upper level: ignore it.
9255 */
9256 } else if ((next_entry == NULL) ||
9257 (tmp_entry->vme_start + curr_offset <=
9258 next_entry->vme_start + next_offset)) {
9259 /*
9260 * We didn't have a "next_entry" or this one is
9261 * closer to the address we're looking for:
9262 * use this "tmp_entry" as the new "next_entry".
9263 */
9264 if (next_entry != NULL) {
9265 /* unlock the last "next_map" */
9266 if (next_map != curr_map && not_in_kdp) {
9267 vm_map_unlock_read(next_map);
9268 }
9269 }
9270 next_entry = tmp_entry;
9271 next_map = curr_map;
9272 next_depth = curr_depth;
9273 next_address = next_entry->vme_start;
9274 next_skip = curr_skip;
9275 next_offset = curr_offset;
9276 next_offset += (next_address - curr_address);
9277 next_max_above = MIN(next_max_above, curr_max_above);
9278 next_max_above = MIN(next_max_above,
9279 next_entry->vme_end - next_address);
9280 next_max_below = MIN(next_max_below, curr_max_below);
9281 next_max_below = MIN(next_max_below,
9282 next_address - next_entry->vme_start);
9283 }
9284
9285 /*
9286 * "curr_max_{above,below}" allow us to keep track of the
9287 * portion of the submap that is actually mapped at this level:
9288 * the rest of that submap is irrelevant to us, since it's not
9289 * mapped here.
9290 * The relevant portion of the map starts at
9291 * "curr_entry->offset" up to the size of "curr_entry".
9292 */
9293 curr_max_above = MIN(curr_max_above,
9294 curr_entry->vme_end - curr_address);
9295 curr_max_below = MIN(curr_max_below,
9296 curr_address - curr_entry->vme_start);
9297
9298 if (!curr_entry->is_sub_map ||
9299 curr_depth >= user_max_depth) {
9300 /*
9301 * We hit a leaf map or we reached the maximum depth
9302 * we could, so stop looking. Keep the current map
9303 * locked.
9304 */
9305 break;
9306 }
9307
9308 /*
9309 * Get down to the next submap level.
9310 */
9311
9312 /*
9313 * Lock the next level and unlock the current level,
9314 * unless we need to keep it locked to access the "next_entry"
9315 * later.
9316 */
9317 if (not_in_kdp) {
9318 vm_map_lock_read(curr_entry->object.sub_map);
9319 }
9320 if (curr_map == next_map) {
9321 /* keep "next_map" locked in case we need it */
9322 } else {
9323 /* release this map */
9324 if (not_in_kdp)
9325 vm_map_unlock_read(curr_map);
9326 }
9327
9328 /*
9329 * Adjust the offset. "curr_entry" maps the submap
9330 * at relative address "curr_entry->vme_start" in the
9331 * curr_map but skips the first "curr_entry->offset"
9332 * bytes of the submap.
9333 * "curr_offset" always represents the offset of a virtual
9334 * address in the curr_map relative to the absolute address
9335 * space (i.e. the top-level VM map).
9336 */
9337 curr_offset +=
9338 (curr_entry->offset - curr_entry->vme_start);
9339 curr_address = user_address + curr_offset;
9340 /* switch to the submap */
9341 curr_map = curr_entry->object.sub_map;
9342 curr_depth++;
9343 curr_entry = NULL;
9344 }
9345
9346 if (curr_entry == NULL) {
9347 /* no VM region contains the address... */
9348 if (next_entry == NULL) {
9349 /* ... and no VM region follows it either */
9350 return KERN_INVALID_ADDRESS;
9351 }
9352 /* ... gather info about the next VM region */
9353 curr_entry = next_entry;
9354 curr_map = next_map; /* still locked ... */
9355 curr_address = next_address;
9356 curr_skip = next_skip;
9357 curr_offset = next_offset;
9358 curr_depth = next_depth;
9359 curr_max_above = next_max_above;
9360 curr_max_below = next_max_below;
9361 if (curr_map == map) {
9362 user_address = curr_address;
9363 }
9364 } else {
9365 /* we won't need "next_entry" after all */
9366 if (next_entry != NULL) {
9367 /* release "next_map" */
9368 if (next_map != curr_map && not_in_kdp) {
9369 vm_map_unlock_read(next_map);
9370 }
9371 }
9372 }
9373 next_entry = NULL;
9374 next_map = NULL;
9375 next_offset = 0;
9376 next_skip = 0;
9377 next_depth = 0;
9378 next_max_below = -1;
9379 next_max_above = -1;
9380
9381 *nesting_depth = curr_depth;
9382 *size = curr_max_above + curr_max_below;
9383 *address = user_address + curr_skip - curr_max_below;
9384
9385 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9386 // so probably should be a real 32b ID vs. ptr.
9387 // Current users just check for equality
9388 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9389
9390 if (look_for_pages) {
9391 submap_info->user_tag = curr_entry->alias;
9392 submap_info->offset = curr_entry->offset;
9393 submap_info->protection = curr_entry->protection;
9394 submap_info->inheritance = curr_entry->inheritance;
9395 submap_info->max_protection = curr_entry->max_protection;
9396 submap_info->behavior = curr_entry->behavior;
9397 submap_info->user_wired_count = curr_entry->user_wired_count;
9398 submap_info->is_submap = curr_entry->is_sub_map;
9399 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9400 } else {
9401 short_info->user_tag = curr_entry->alias;
9402 short_info->offset = curr_entry->offset;
9403 short_info->protection = curr_entry->protection;
9404 short_info->inheritance = curr_entry->inheritance;
9405 short_info->max_protection = curr_entry->max_protection;
9406 short_info->behavior = curr_entry->behavior;
9407 short_info->user_wired_count = curr_entry->user_wired_count;
9408 short_info->is_submap = curr_entry->is_sub_map;
9409 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9410 }
9411
9412 extended.pages_resident = 0;
9413 extended.pages_swapped_out = 0;
9414 extended.pages_shared_now_private = 0;
9415 extended.pages_dirtied = 0;
9416 extended.external_pager = 0;
9417 extended.shadow_depth = 0;
9418
9419 if (not_in_kdp) {
9420 if (!curr_entry->is_sub_map) {
9421 vm_map_offset_t range_start, range_end;
9422 range_start = MAX((curr_address - curr_max_below),
9423 curr_entry->vme_start);
9424 range_end = MIN((curr_address + curr_max_above),
9425 curr_entry->vme_end);
9426 vm_map_region_walk(curr_map,
9427 range_start,
9428 curr_entry,
9429 (curr_entry->offset +
9430 (range_start -
9431 curr_entry->vme_start)),
9432 range_end - range_start,
9433 &extended,
9434 look_for_pages);
9435 if (extended.external_pager &&
9436 extended.ref_count == 2 &&
9437 extended.share_mode == SM_SHARED) {
9438 extended.share_mode = SM_PRIVATE;
9439 }
9440 } else {
9441 if (curr_entry->use_pmap) {
9442 extended.share_mode = SM_TRUESHARED;
9443 } else {
9444 extended.share_mode = SM_PRIVATE;
9445 }
9446 extended.ref_count =
9447 curr_entry->object.sub_map->ref_count;
9448 }
9449 }
9450
9451 if (look_for_pages) {
9452 submap_info->pages_resident = extended.pages_resident;
9453 submap_info->pages_swapped_out = extended.pages_swapped_out;
9454 submap_info->pages_shared_now_private =
9455 extended.pages_shared_now_private;
9456 submap_info->pages_dirtied = extended.pages_dirtied;
9457 submap_info->external_pager = extended.external_pager;
9458 submap_info->shadow_depth = extended.shadow_depth;
9459 submap_info->share_mode = extended.share_mode;
9460 submap_info->ref_count = extended.ref_count;
9461 } else {
9462 short_info->external_pager = extended.external_pager;
9463 short_info->shadow_depth = extended.shadow_depth;
9464 short_info->share_mode = extended.share_mode;
9465 short_info->ref_count = extended.ref_count;
9466 }
9467
9468 if (not_in_kdp) {
9469 vm_map_unlock_read(curr_map);
9470 }
9471
9472 return KERN_SUCCESS;
9473 }
9474
9475 /*
9476 * vm_region:
9477 *
9478 * User call to obtain information about a region in
9479 * a task's address map. Currently, only one flavor is
9480 * supported.
9481 *
9482 * XXX The reserved and behavior fields cannot be filled
9483 * in until the vm merge from the IK is completed, and
9484 * vm_reserve is implemented.
9485 */
9486
9487 kern_return_t
9488 vm_map_region(
9489 vm_map_t map,
9490 vm_map_offset_t *address, /* IN/OUT */
9491 vm_map_size_t *size, /* OUT */
9492 vm_region_flavor_t flavor, /* IN */
9493 vm_region_info_t info, /* OUT */
9494 mach_msg_type_number_t *count, /* IN/OUT */
9495 mach_port_t *object_name) /* OUT */
9496 {
9497 vm_map_entry_t tmp_entry;
9498 vm_map_entry_t entry;
9499 vm_map_offset_t start;
9500
9501 if (map == VM_MAP_NULL)
9502 return(KERN_INVALID_ARGUMENT);
9503
9504 switch (flavor) {
9505
9506 case VM_REGION_BASIC_INFO:
9507 /* legacy for old 32-bit objects info */
9508 {
9509 vm_region_basic_info_t basic;
9510
9511 if (*count < VM_REGION_BASIC_INFO_COUNT)
9512 return(KERN_INVALID_ARGUMENT);
9513
9514 basic = (vm_region_basic_info_t) info;
9515 *count = VM_REGION_BASIC_INFO_COUNT;
9516
9517 vm_map_lock_read(map);
9518
9519 start = *address;
9520 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9521 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9522 vm_map_unlock_read(map);
9523 return(KERN_INVALID_ADDRESS);
9524 }
9525 } else {
9526 entry = tmp_entry;
9527 }
9528
9529 start = entry->vme_start;
9530
9531 basic->offset = (uint32_t)entry->offset;
9532 basic->protection = entry->protection;
9533 basic->inheritance = entry->inheritance;
9534 basic->max_protection = entry->max_protection;
9535 basic->behavior = entry->behavior;
9536 basic->user_wired_count = entry->user_wired_count;
9537 basic->reserved = entry->is_sub_map;
9538 *address = start;
9539 *size = (entry->vme_end - start);
9540
9541 if (object_name) *object_name = IP_NULL;
9542 if (entry->is_sub_map) {
9543 basic->shared = FALSE;
9544 } else {
9545 basic->shared = entry->is_shared;
9546 }
9547
9548 vm_map_unlock_read(map);
9549 return(KERN_SUCCESS);
9550 }
9551
9552 case VM_REGION_BASIC_INFO_64:
9553 {
9554 vm_region_basic_info_64_t basic;
9555
9556 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9557 return(KERN_INVALID_ARGUMENT);
9558
9559 basic = (vm_region_basic_info_64_t) info;
9560 *count = VM_REGION_BASIC_INFO_COUNT_64;
9561
9562 vm_map_lock_read(map);
9563
9564 start = *address;
9565 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9566 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9567 vm_map_unlock_read(map);
9568 return(KERN_INVALID_ADDRESS);
9569 }
9570 } else {
9571 entry = tmp_entry;
9572 }
9573
9574 start = entry->vme_start;
9575
9576 basic->offset = entry->offset;
9577 basic->protection = entry->protection;
9578 basic->inheritance = entry->inheritance;
9579 basic->max_protection = entry->max_protection;
9580 basic->behavior = entry->behavior;
9581 basic->user_wired_count = entry->user_wired_count;
9582 basic->reserved = entry->is_sub_map;
9583 *address = start;
9584 *size = (entry->vme_end - start);
9585
9586 if (object_name) *object_name = IP_NULL;
9587 if (entry->is_sub_map) {
9588 basic->shared = FALSE;
9589 } else {
9590 basic->shared = entry->is_shared;
9591 }
9592
9593 vm_map_unlock_read(map);
9594 return(KERN_SUCCESS);
9595 }
9596 case VM_REGION_EXTENDED_INFO:
9597 {
9598 vm_region_extended_info_t extended;
9599
9600 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9601 return(KERN_INVALID_ARGUMENT);
9602
9603 extended = (vm_region_extended_info_t) info;
9604 *count = VM_REGION_EXTENDED_INFO_COUNT;
9605
9606 vm_map_lock_read(map);
9607
9608 start = *address;
9609 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9610 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9611 vm_map_unlock_read(map);
9612 return(KERN_INVALID_ADDRESS);
9613 }
9614 } else {
9615 entry = tmp_entry;
9616 }
9617 start = entry->vme_start;
9618
9619 extended->protection = entry->protection;
9620 extended->user_tag = entry->alias;
9621 extended->pages_resident = 0;
9622 extended->pages_swapped_out = 0;
9623 extended->pages_shared_now_private = 0;
9624 extended->pages_dirtied = 0;
9625 extended->external_pager = 0;
9626 extended->shadow_depth = 0;
9627
9628 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9629
9630 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9631 extended->share_mode = SM_PRIVATE;
9632
9633 if (object_name)
9634 *object_name = IP_NULL;
9635 *address = start;
9636 *size = (entry->vme_end - start);
9637
9638 vm_map_unlock_read(map);
9639 return(KERN_SUCCESS);
9640 }
9641 case VM_REGION_TOP_INFO:
9642 {
9643 vm_region_top_info_t top;
9644
9645 if (*count < VM_REGION_TOP_INFO_COUNT)
9646 return(KERN_INVALID_ARGUMENT);
9647
9648 top = (vm_region_top_info_t) info;
9649 *count = VM_REGION_TOP_INFO_COUNT;
9650
9651 vm_map_lock_read(map);
9652
9653 start = *address;
9654 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9655 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9656 vm_map_unlock_read(map);
9657 return(KERN_INVALID_ADDRESS);
9658 }
9659 } else {
9660 entry = tmp_entry;
9661
9662 }
9663 start = entry->vme_start;
9664
9665 top->private_pages_resident = 0;
9666 top->shared_pages_resident = 0;
9667
9668 vm_map_region_top_walk(entry, top);
9669
9670 if (object_name)
9671 *object_name = IP_NULL;
9672 *address = start;
9673 *size = (entry->vme_end - start);
9674
9675 vm_map_unlock_read(map);
9676 return(KERN_SUCCESS);
9677 }
9678 default:
9679 return(KERN_INVALID_ARGUMENT);
9680 }
9681 }
9682
9683 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9684 MIN((entry_size), \
9685 ((obj)->all_reusable ? \
9686 (obj)->wired_page_count : \
9687 (obj)->resident_page_count - (obj)->reusable_page_count))
9688
9689 void
9690 vm_map_region_top_walk(
9691 vm_map_entry_t entry,
9692 vm_region_top_info_t top)
9693 {
9694
9695 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9696 top->share_mode = SM_EMPTY;
9697 top->ref_count = 0;
9698 top->obj_id = 0;
9699 return;
9700 }
9701
9702 {
9703 struct vm_object *obj, *tmp_obj;
9704 int ref_count;
9705 uint32_t entry_size;
9706
9707 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9708
9709 obj = entry->object.vm_object;
9710
9711 vm_object_lock(obj);
9712
9713 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9714 ref_count--;
9715
9716 assert(obj->reusable_page_count <= obj->resident_page_count);
9717 if (obj->shadow) {
9718 if (ref_count == 1)
9719 top->private_pages_resident =
9720 OBJ_RESIDENT_COUNT(obj, entry_size);
9721 else
9722 top->shared_pages_resident =
9723 OBJ_RESIDENT_COUNT(obj, entry_size);
9724 top->ref_count = ref_count;
9725 top->share_mode = SM_COW;
9726
9727 while ((tmp_obj = obj->shadow)) {
9728 vm_object_lock(tmp_obj);
9729 vm_object_unlock(obj);
9730 obj = tmp_obj;
9731
9732 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9733 ref_count--;
9734
9735 assert(obj->reusable_page_count <= obj->resident_page_count);
9736 top->shared_pages_resident +=
9737 OBJ_RESIDENT_COUNT(obj, entry_size);
9738 top->ref_count += ref_count - 1;
9739 }
9740 } else {
9741 if (entry->superpage_size) {
9742 top->share_mode = SM_LARGE_PAGE;
9743 top->shared_pages_resident = 0;
9744 top->private_pages_resident = entry_size;
9745 } else if (entry->needs_copy) {
9746 top->share_mode = SM_COW;
9747 top->shared_pages_resident =
9748 OBJ_RESIDENT_COUNT(obj, entry_size);
9749 } else {
9750 if (ref_count == 1 ||
9751 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9752 top->share_mode = SM_PRIVATE;
9753 top->private_pages_resident =
9754 OBJ_RESIDENT_COUNT(obj,
9755 entry_size);
9756 } else {
9757 top->share_mode = SM_SHARED;
9758 top->shared_pages_resident =
9759 OBJ_RESIDENT_COUNT(obj,
9760 entry_size);
9761 }
9762 }
9763 top->ref_count = ref_count;
9764 }
9765 /* XXX K64: obj_id will be truncated */
9766 top->obj_id = (unsigned int) (uintptr_t)obj;
9767
9768 vm_object_unlock(obj);
9769 }
9770 }
9771
9772 void
9773 vm_map_region_walk(
9774 vm_map_t map,
9775 vm_map_offset_t va,
9776 vm_map_entry_t entry,
9777 vm_object_offset_t offset,
9778 vm_object_size_t range,
9779 vm_region_extended_info_t extended,
9780 boolean_t look_for_pages)
9781 {
9782 register struct vm_object *obj, *tmp_obj;
9783 register vm_map_offset_t last_offset;
9784 register int i;
9785 register int ref_count;
9786 struct vm_object *shadow_object;
9787 int shadow_depth;
9788
9789 if ((entry->object.vm_object == 0) ||
9790 (entry->is_sub_map) ||
9791 (entry->object.vm_object->phys_contiguous &&
9792 !entry->superpage_size)) {
9793 extended->share_mode = SM_EMPTY;
9794 extended->ref_count = 0;
9795 return;
9796 }
9797
9798 if (entry->superpage_size) {
9799 extended->shadow_depth = 0;
9800 extended->share_mode = SM_LARGE_PAGE;
9801 extended->ref_count = 1;
9802 extended->external_pager = 0;
9803 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9804 extended->shadow_depth = 0;
9805 return;
9806 }
9807
9808 {
9809 obj = entry->object.vm_object;
9810
9811 vm_object_lock(obj);
9812
9813 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9814 ref_count--;
9815
9816 if (look_for_pages) {
9817 for (last_offset = offset + range;
9818 offset < last_offset;
9819 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9820 vm_map_region_look_for_page(map, va, obj,
9821 offset, ref_count,
9822 0, extended);
9823 } else {
9824 shadow_object = obj->shadow;
9825 shadow_depth = 0;
9826
9827 if ( !(obj->pager_trusted) && !(obj->internal))
9828 extended->external_pager = 1;
9829
9830 if (shadow_object != VM_OBJECT_NULL) {
9831 vm_object_lock(shadow_object);
9832 for (;
9833 shadow_object != VM_OBJECT_NULL;
9834 shadow_depth++) {
9835 vm_object_t next_shadow;
9836
9837 if ( !(shadow_object->pager_trusted) &&
9838 !(shadow_object->internal))
9839 extended->external_pager = 1;
9840
9841 next_shadow = shadow_object->shadow;
9842 if (next_shadow) {
9843 vm_object_lock(next_shadow);
9844 }
9845 vm_object_unlock(shadow_object);
9846 shadow_object = next_shadow;
9847 }
9848 }
9849 extended->shadow_depth = shadow_depth;
9850 }
9851
9852 if (extended->shadow_depth || entry->needs_copy)
9853 extended->share_mode = SM_COW;
9854 else {
9855 if (ref_count == 1)
9856 extended->share_mode = SM_PRIVATE;
9857 else {
9858 if (obj->true_share)
9859 extended->share_mode = SM_TRUESHARED;
9860 else
9861 extended->share_mode = SM_SHARED;
9862 }
9863 }
9864 extended->ref_count = ref_count - extended->shadow_depth;
9865
9866 for (i = 0; i < extended->shadow_depth; i++) {
9867 if ((tmp_obj = obj->shadow) == 0)
9868 break;
9869 vm_object_lock(tmp_obj);
9870 vm_object_unlock(obj);
9871
9872 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9873 ref_count--;
9874
9875 extended->ref_count += ref_count;
9876 obj = tmp_obj;
9877 }
9878 vm_object_unlock(obj);
9879
9880 if (extended->share_mode == SM_SHARED) {
9881 register vm_map_entry_t cur;
9882 register vm_map_entry_t last;
9883 int my_refs;
9884
9885 obj = entry->object.vm_object;
9886 last = vm_map_to_entry(map);
9887 my_refs = 0;
9888
9889 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9890 ref_count--;
9891 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9892 my_refs += vm_map_region_count_obj_refs(cur, obj);
9893
9894 if (my_refs == ref_count)
9895 extended->share_mode = SM_PRIVATE_ALIASED;
9896 else if (my_refs > 1)
9897 extended->share_mode = SM_SHARED_ALIASED;
9898 }
9899 }
9900 }
9901
9902
9903 /* object is locked on entry and locked on return */
9904
9905
9906 static void
9907 vm_map_region_look_for_page(
9908 __unused vm_map_t map,
9909 __unused vm_map_offset_t va,
9910 vm_object_t object,
9911 vm_object_offset_t offset,
9912 int max_refcnt,
9913 int depth,
9914 vm_region_extended_info_t extended)
9915 {
9916 register vm_page_t p;
9917 register vm_object_t shadow;
9918 register int ref_count;
9919 vm_object_t caller_object;
9920 #if MACH_PAGEMAP
9921 kern_return_t kr;
9922 #endif
9923 shadow = object->shadow;
9924 caller_object = object;
9925
9926
9927 while (TRUE) {
9928
9929 if ( !(object->pager_trusted) && !(object->internal))
9930 extended->external_pager = 1;
9931
9932 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9933 if (shadow && (max_refcnt == 1))
9934 extended->pages_shared_now_private++;
9935
9936 if (!p->fictitious &&
9937 (p->dirty || pmap_is_modified(p->phys_page)))
9938 extended->pages_dirtied++;
9939
9940 extended->pages_resident++;
9941
9942 if(object != caller_object)
9943 vm_object_unlock(object);
9944
9945 return;
9946 }
9947 #if MACH_PAGEMAP
9948 if (object->existence_map) {
9949 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9950
9951 extended->pages_swapped_out++;
9952
9953 if(object != caller_object)
9954 vm_object_unlock(object);
9955
9956 return;
9957 }
9958 } else if (object->internal &&
9959 object->alive &&
9960 !object->terminating &&
9961 object->pager_ready) {
9962
9963 memory_object_t pager;
9964
9965 vm_object_paging_begin(object);
9966 pager = object->pager;
9967 vm_object_unlock(object);
9968
9969 kr = memory_object_data_request(
9970 pager,
9971 offset + object->paging_offset,
9972 0, /* just poke the pager */
9973 VM_PROT_READ,
9974 NULL);
9975
9976 vm_object_lock(object);
9977 vm_object_paging_end(object);
9978
9979 if (kr == KERN_SUCCESS) {
9980 /* the pager has that page */
9981 extended->pages_swapped_out++;
9982 if (object != caller_object)
9983 vm_object_unlock(object);
9984 return;
9985 }
9986 }
9987 #endif /* MACH_PAGEMAP */
9988
9989 if (shadow) {
9990 vm_object_lock(shadow);
9991
9992 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9993 ref_count--;
9994
9995 if (++depth > extended->shadow_depth)
9996 extended->shadow_depth = depth;
9997
9998 if (ref_count > max_refcnt)
9999 max_refcnt = ref_count;
10000
10001 if(object != caller_object)
10002 vm_object_unlock(object);
10003
10004 offset = offset + object->vo_shadow_offset;
10005 object = shadow;
10006 shadow = object->shadow;
10007 continue;
10008 }
10009 if(object != caller_object)
10010 vm_object_unlock(object);
10011 break;
10012 }
10013 }
10014
10015 static int
10016 vm_map_region_count_obj_refs(
10017 vm_map_entry_t entry,
10018 vm_object_t object)
10019 {
10020 register int ref_count;
10021 register vm_object_t chk_obj;
10022 register vm_object_t tmp_obj;
10023
10024 if (entry->object.vm_object == 0)
10025 return(0);
10026
10027 if (entry->is_sub_map)
10028 return(0);
10029 else {
10030 ref_count = 0;
10031
10032 chk_obj = entry->object.vm_object;
10033 vm_object_lock(chk_obj);
10034
10035 while (chk_obj) {
10036 if (chk_obj == object)
10037 ref_count++;
10038 tmp_obj = chk_obj->shadow;
10039 if (tmp_obj)
10040 vm_object_lock(tmp_obj);
10041 vm_object_unlock(chk_obj);
10042
10043 chk_obj = tmp_obj;
10044 }
10045 }
10046 return(ref_count);
10047 }
10048
10049
10050 /*
10051 * Routine: vm_map_simplify
10052 *
10053 * Description:
10054 * Attempt to simplify the map representation in
10055 * the vicinity of the given starting address.
10056 * Note:
10057 * This routine is intended primarily to keep the
10058 * kernel maps more compact -- they generally don't
10059 * benefit from the "expand a map entry" technology
10060 * at allocation time because the adjacent entry
10061 * is often wired down.
10062 */
10063 void
10064 vm_map_simplify_entry(
10065 vm_map_t map,
10066 vm_map_entry_t this_entry)
10067 {
10068 vm_map_entry_t prev_entry;
10069
10070 counter(c_vm_map_simplify_entry_called++);
10071
10072 prev_entry = this_entry->vme_prev;
10073
10074 if ((this_entry != vm_map_to_entry(map)) &&
10075 (prev_entry != vm_map_to_entry(map)) &&
10076
10077 (prev_entry->vme_end == this_entry->vme_start) &&
10078
10079 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10080
10081 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10082 ((prev_entry->offset + (prev_entry->vme_end -
10083 prev_entry->vme_start))
10084 == this_entry->offset) &&
10085
10086 (prev_entry->inheritance == this_entry->inheritance) &&
10087 (prev_entry->protection == this_entry->protection) &&
10088 (prev_entry->max_protection == this_entry->max_protection) &&
10089 (prev_entry->behavior == this_entry->behavior) &&
10090 (prev_entry->alias == this_entry->alias) &&
10091 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10092 (prev_entry->no_cache == this_entry->no_cache) &&
10093 (prev_entry->wired_count == this_entry->wired_count) &&
10094 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10095
10096 (prev_entry->needs_copy == this_entry->needs_copy) &&
10097 (prev_entry->permanent == this_entry->permanent) &&
10098
10099 (prev_entry->use_pmap == FALSE) &&
10100 (this_entry->use_pmap == FALSE) &&
10101 (prev_entry->in_transition == FALSE) &&
10102 (this_entry->in_transition == FALSE) &&
10103 (prev_entry->needs_wakeup == FALSE) &&
10104 (this_entry->needs_wakeup == FALSE) &&
10105 (prev_entry->is_shared == FALSE) &&
10106 (this_entry->is_shared == FALSE)
10107 ) {
10108 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10109 assert(prev_entry->vme_start < this_entry->vme_end);
10110 this_entry->vme_start = prev_entry->vme_start;
10111 this_entry->offset = prev_entry->offset;
10112 if (prev_entry->is_sub_map) {
10113 vm_map_deallocate(prev_entry->object.sub_map);
10114 } else {
10115 vm_object_deallocate(prev_entry->object.vm_object);
10116 }
10117 vm_map_entry_dispose(map, prev_entry);
10118 SAVE_HINT_MAP_WRITE(map, this_entry);
10119 counter(c_vm_map_simplified++);
10120 }
10121 }
10122
10123 void
10124 vm_map_simplify(
10125 vm_map_t map,
10126 vm_map_offset_t start)
10127 {
10128 vm_map_entry_t this_entry;
10129
10130 vm_map_lock(map);
10131 if (vm_map_lookup_entry(map, start, &this_entry)) {
10132 vm_map_simplify_entry(map, this_entry);
10133 vm_map_simplify_entry(map, this_entry->vme_next);
10134 }
10135 counter(c_vm_map_simplify_called++);
10136 vm_map_unlock(map);
10137 }
10138
10139 static void
10140 vm_map_simplify_range(
10141 vm_map_t map,
10142 vm_map_offset_t start,
10143 vm_map_offset_t end)
10144 {
10145 vm_map_entry_t entry;
10146
10147 /*
10148 * The map should be locked (for "write") by the caller.
10149 */
10150
10151 if (start >= end) {
10152 /* invalid address range */
10153 return;
10154 }
10155
10156 start = vm_map_trunc_page(start);
10157 end = vm_map_round_page(end);
10158
10159 if (!vm_map_lookup_entry(map, start, &entry)) {
10160 /* "start" is not mapped and "entry" ends before "start" */
10161 if (entry == vm_map_to_entry(map)) {
10162 /* start with first entry in the map */
10163 entry = vm_map_first_entry(map);
10164 } else {
10165 /* start with next entry */
10166 entry = entry->vme_next;
10167 }
10168 }
10169
10170 while (entry != vm_map_to_entry(map) &&
10171 entry->vme_start <= end) {
10172 /* try and coalesce "entry" with its previous entry */
10173 vm_map_simplify_entry(map, entry);
10174 entry = entry->vme_next;
10175 }
10176 }
10177
10178
10179 /*
10180 * Routine: vm_map_machine_attribute
10181 * Purpose:
10182 * Provide machine-specific attributes to mappings,
10183 * such as cachability etc. for machines that provide
10184 * them. NUMA architectures and machines with big/strange
10185 * caches will use this.
10186 * Note:
10187 * Responsibilities for locking and checking are handled here,
10188 * everything else in the pmap module. If any non-volatile
10189 * information must be kept, the pmap module should handle
10190 * it itself. [This assumes that attributes do not
10191 * need to be inherited, which seems ok to me]
10192 */
10193 kern_return_t
10194 vm_map_machine_attribute(
10195 vm_map_t map,
10196 vm_map_offset_t start,
10197 vm_map_offset_t end,
10198 vm_machine_attribute_t attribute,
10199 vm_machine_attribute_val_t* value) /* IN/OUT */
10200 {
10201 kern_return_t ret;
10202 vm_map_size_t sync_size;
10203 vm_map_entry_t entry;
10204
10205 if (start < vm_map_min(map) || end > vm_map_max(map))
10206 return KERN_INVALID_ADDRESS;
10207
10208 /* Figure how much memory we need to flush (in page increments) */
10209 sync_size = end - start;
10210
10211 vm_map_lock(map);
10212
10213 if (attribute != MATTR_CACHE) {
10214 /* If we don't have to find physical addresses, we */
10215 /* don't have to do an explicit traversal here. */
10216 ret = pmap_attribute(map->pmap, start, end-start,
10217 attribute, value);
10218 vm_map_unlock(map);
10219 return ret;
10220 }
10221
10222 ret = KERN_SUCCESS; /* Assume it all worked */
10223
10224 while(sync_size) {
10225 if (vm_map_lookup_entry(map, start, &entry)) {
10226 vm_map_size_t sub_size;
10227 if((entry->vme_end - start) > sync_size) {
10228 sub_size = sync_size;
10229 sync_size = 0;
10230 } else {
10231 sub_size = entry->vme_end - start;
10232 sync_size -= sub_size;
10233 }
10234 if(entry->is_sub_map) {
10235 vm_map_offset_t sub_start;
10236 vm_map_offset_t sub_end;
10237
10238 sub_start = (start - entry->vme_start)
10239 + entry->offset;
10240 sub_end = sub_start + sub_size;
10241 vm_map_machine_attribute(
10242 entry->object.sub_map,
10243 sub_start,
10244 sub_end,
10245 attribute, value);
10246 } else {
10247 if(entry->object.vm_object) {
10248 vm_page_t m;
10249 vm_object_t object;
10250 vm_object_t base_object;
10251 vm_object_t last_object;
10252 vm_object_offset_t offset;
10253 vm_object_offset_t base_offset;
10254 vm_map_size_t range;
10255 range = sub_size;
10256 offset = (start - entry->vme_start)
10257 + entry->offset;
10258 base_offset = offset;
10259 object = entry->object.vm_object;
10260 base_object = object;
10261 last_object = NULL;
10262
10263 vm_object_lock(object);
10264
10265 while (range) {
10266 m = vm_page_lookup(
10267 object, offset);
10268
10269 if (m && !m->fictitious) {
10270 ret =
10271 pmap_attribute_cache_sync(
10272 m->phys_page,
10273 PAGE_SIZE,
10274 attribute, value);
10275
10276 } else if (object->shadow) {
10277 offset = offset + object->vo_shadow_offset;
10278 last_object = object;
10279 object = object->shadow;
10280 vm_object_lock(last_object->shadow);
10281 vm_object_unlock(last_object);
10282 continue;
10283 }
10284 range -= PAGE_SIZE;
10285
10286 if (base_object != object) {
10287 vm_object_unlock(object);
10288 vm_object_lock(base_object);
10289 object = base_object;
10290 }
10291 /* Bump to the next page */
10292 base_offset += PAGE_SIZE;
10293 offset = base_offset;
10294 }
10295 vm_object_unlock(object);
10296 }
10297 }
10298 start += sub_size;
10299 } else {
10300 vm_map_unlock(map);
10301 return KERN_FAILURE;
10302 }
10303
10304 }
10305
10306 vm_map_unlock(map);
10307
10308 return ret;
10309 }
10310
10311 /*
10312 * vm_map_behavior_set:
10313 *
10314 * Sets the paging reference behavior of the specified address
10315 * range in the target map. Paging reference behavior affects
10316 * how pagein operations resulting from faults on the map will be
10317 * clustered.
10318 */
10319 kern_return_t
10320 vm_map_behavior_set(
10321 vm_map_t map,
10322 vm_map_offset_t start,
10323 vm_map_offset_t end,
10324 vm_behavior_t new_behavior)
10325 {
10326 register vm_map_entry_t entry;
10327 vm_map_entry_t temp_entry;
10328
10329 XPR(XPR_VM_MAP,
10330 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10331 map, start, end, new_behavior, 0);
10332
10333 if (start > end ||
10334 start < vm_map_min(map) ||
10335 end > vm_map_max(map)) {
10336 return KERN_NO_SPACE;
10337 }
10338
10339 switch (new_behavior) {
10340
10341 /*
10342 * This first block of behaviors all set a persistent state on the specified
10343 * memory range. All we have to do here is to record the desired behavior
10344 * in the vm_map_entry_t's.
10345 */
10346
10347 case VM_BEHAVIOR_DEFAULT:
10348 case VM_BEHAVIOR_RANDOM:
10349 case VM_BEHAVIOR_SEQUENTIAL:
10350 case VM_BEHAVIOR_RSEQNTL:
10351 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10352 vm_map_lock(map);
10353
10354 /*
10355 * The entire address range must be valid for the map.
10356 * Note that vm_map_range_check() does a
10357 * vm_map_lookup_entry() internally and returns the
10358 * entry containing the start of the address range if
10359 * the entire range is valid.
10360 */
10361 if (vm_map_range_check(map, start, end, &temp_entry)) {
10362 entry = temp_entry;
10363 vm_map_clip_start(map, entry, start);
10364 }
10365 else {
10366 vm_map_unlock(map);
10367 return(KERN_INVALID_ADDRESS);
10368 }
10369
10370 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10371 vm_map_clip_end(map, entry, end);
10372 assert(!entry->use_pmap);
10373
10374 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10375 entry->zero_wired_pages = TRUE;
10376 } else {
10377 entry->behavior = new_behavior;
10378 }
10379 entry = entry->vme_next;
10380 }
10381
10382 vm_map_unlock(map);
10383 break;
10384
10385 /*
10386 * The rest of these are different from the above in that they cause
10387 * an immediate action to take place as opposed to setting a behavior that
10388 * affects future actions.
10389 */
10390
10391 case VM_BEHAVIOR_WILLNEED:
10392 return vm_map_willneed(map, start, end);
10393
10394 case VM_BEHAVIOR_DONTNEED:
10395 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10396
10397 case VM_BEHAVIOR_FREE:
10398 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10399
10400 case VM_BEHAVIOR_REUSABLE:
10401 return vm_map_reusable_pages(map, start, end);
10402
10403 case VM_BEHAVIOR_REUSE:
10404 return vm_map_reuse_pages(map, start, end);
10405
10406 case VM_BEHAVIOR_CAN_REUSE:
10407 return vm_map_can_reuse(map, start, end);
10408
10409 default:
10410 return(KERN_INVALID_ARGUMENT);
10411 }
10412
10413 return(KERN_SUCCESS);
10414 }
10415
10416
10417 /*
10418 * Internals for madvise(MADV_WILLNEED) system call.
10419 *
10420 * The present implementation is to do a read-ahead if the mapping corresponds
10421 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10422 * and basically ignore the "advice" (which we are always free to do).
10423 */
10424
10425
10426 static kern_return_t
10427 vm_map_willneed(
10428 vm_map_t map,
10429 vm_map_offset_t start,
10430 vm_map_offset_t end
10431 )
10432 {
10433 vm_map_entry_t entry;
10434 vm_object_t object;
10435 memory_object_t pager;
10436 struct vm_object_fault_info fault_info;
10437 kern_return_t kr;
10438 vm_object_size_t len;
10439 vm_object_offset_t offset;
10440
10441 /*
10442 * Fill in static values in fault_info. Several fields get ignored by the code
10443 * we call, but we'll fill them in anyway since uninitialized fields are bad
10444 * when it comes to future backwards compatibility.
10445 */
10446
10447 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10448 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10449 fault_info.no_cache = FALSE; /* ignored value */
10450 fault_info.stealth = TRUE;
10451 fault_info.io_sync = FALSE;
10452 fault_info.cs_bypass = FALSE;
10453 fault_info.mark_zf_absent = FALSE;
10454
10455 /*
10456 * The MADV_WILLNEED operation doesn't require any changes to the
10457 * vm_map_entry_t's, so the read lock is sufficient.
10458 */
10459
10460 vm_map_lock_read(map);
10461
10462 /*
10463 * The madvise semantics require that the address range be fully
10464 * allocated with no holes. Otherwise, we're required to return
10465 * an error.
10466 */
10467
10468 if (! vm_map_range_check(map, start, end, &entry)) {
10469 vm_map_unlock_read(map);
10470 return KERN_INVALID_ADDRESS;
10471 }
10472
10473 /*
10474 * Examine each vm_map_entry_t in the range.
10475 */
10476 for (; entry != vm_map_to_entry(map) && start < end; ) {
10477
10478 /*
10479 * The first time through, the start address could be anywhere
10480 * within the vm_map_entry we found. So adjust the offset to
10481 * correspond. After that, the offset will always be zero to
10482 * correspond to the beginning of the current vm_map_entry.
10483 */
10484 offset = (start - entry->vme_start) + entry->offset;
10485
10486 /*
10487 * Set the length so we don't go beyond the end of the
10488 * map_entry or beyond the end of the range we were given.
10489 * This range could span also multiple map entries all of which
10490 * map different files, so make sure we only do the right amount
10491 * of I/O for each object. Note that it's possible for there
10492 * to be multiple map entries all referring to the same object
10493 * but with different page permissions, but it's not worth
10494 * trying to optimize that case.
10495 */
10496 len = MIN(entry->vme_end - start, end - start);
10497
10498 if ((vm_size_t) len != len) {
10499 /* 32-bit overflow */
10500 len = (vm_size_t) (0 - PAGE_SIZE);
10501 }
10502 fault_info.cluster_size = (vm_size_t) len;
10503 fault_info.lo_offset = offset;
10504 fault_info.hi_offset = offset + len;
10505 fault_info.user_tag = entry->alias;
10506
10507 /*
10508 * If there's no read permission to this mapping, then just
10509 * skip it.
10510 */
10511 if ((entry->protection & VM_PROT_READ) == 0) {
10512 entry = entry->vme_next;
10513 start = entry->vme_start;
10514 continue;
10515 }
10516
10517 /*
10518 * Find the file object backing this map entry. If there is
10519 * none, then we simply ignore the "will need" advice for this
10520 * entry and go on to the next one.
10521 */
10522 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10523 entry = entry->vme_next;
10524 start = entry->vme_start;
10525 continue;
10526 }
10527
10528 /*
10529 * The data_request() could take a long time, so let's
10530 * release the map lock to avoid blocking other threads.
10531 */
10532 vm_map_unlock_read(map);
10533
10534 vm_object_paging_begin(object);
10535 pager = object->pager;
10536 vm_object_unlock(object);
10537
10538 /*
10539 * Get the data from the object asynchronously.
10540 *
10541 * Note that memory_object_data_request() places limits on the
10542 * amount of I/O it will do. Regardless of the len we
10543 * specified, it won't do more than MAX_UPL_TRANSFER and it
10544 * silently truncates the len to that size. This isn't
10545 * necessarily bad since madvise shouldn't really be used to
10546 * page in unlimited amounts of data. Other Unix variants
10547 * limit the willneed case as well. If this turns out to be an
10548 * issue for developers, then we can always adjust the policy
10549 * here and still be backwards compatible since this is all
10550 * just "advice".
10551 */
10552 kr = memory_object_data_request(
10553 pager,
10554 offset + object->paging_offset,
10555 0, /* ignored */
10556 VM_PROT_READ,
10557 (memory_object_fault_info_t)&fault_info);
10558
10559 vm_object_lock(object);
10560 vm_object_paging_end(object);
10561 vm_object_unlock(object);
10562
10563 /*
10564 * If we couldn't do the I/O for some reason, just give up on
10565 * the madvise. We still return success to the user since
10566 * madvise isn't supposed to fail when the advice can't be
10567 * taken.
10568 */
10569 if (kr != KERN_SUCCESS) {
10570 return KERN_SUCCESS;
10571 }
10572
10573 start += len;
10574 if (start >= end) {
10575 /* done */
10576 return KERN_SUCCESS;
10577 }
10578
10579 /* look up next entry */
10580 vm_map_lock_read(map);
10581 if (! vm_map_lookup_entry(map, start, &entry)) {
10582 /*
10583 * There's a new hole in the address range.
10584 */
10585 vm_map_unlock_read(map);
10586 return KERN_INVALID_ADDRESS;
10587 }
10588 }
10589
10590 vm_map_unlock_read(map);
10591 return KERN_SUCCESS;
10592 }
10593
10594 static boolean_t
10595 vm_map_entry_is_reusable(
10596 vm_map_entry_t entry)
10597 {
10598 vm_object_t object;
10599
10600 if (entry->is_shared ||
10601 entry->is_sub_map ||
10602 entry->in_transition ||
10603 entry->protection != VM_PROT_DEFAULT ||
10604 entry->max_protection != VM_PROT_ALL ||
10605 entry->inheritance != VM_INHERIT_DEFAULT ||
10606 entry->no_cache ||
10607 entry->permanent ||
10608 entry->superpage_size != 0 ||
10609 entry->zero_wired_pages ||
10610 entry->wired_count != 0 ||
10611 entry->user_wired_count != 0) {
10612 return FALSE;
10613 }
10614
10615 object = entry->object.vm_object;
10616 if (object == VM_OBJECT_NULL) {
10617 return TRUE;
10618 }
10619 if (object->ref_count == 1 &&
10620 object->wired_page_count == 0 &&
10621 object->copy == VM_OBJECT_NULL &&
10622 object->shadow == VM_OBJECT_NULL &&
10623 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10624 object->internal &&
10625 !object->true_share &&
10626 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10627 !object->code_signed) {
10628 return TRUE;
10629 }
10630 return FALSE;
10631
10632
10633 }
10634
10635 static kern_return_t
10636 vm_map_reuse_pages(
10637 vm_map_t map,
10638 vm_map_offset_t start,
10639 vm_map_offset_t end)
10640 {
10641 vm_map_entry_t entry;
10642 vm_object_t object;
10643 vm_object_offset_t start_offset, end_offset;
10644
10645 /*
10646 * The MADV_REUSE operation doesn't require any changes to the
10647 * vm_map_entry_t's, so the read lock is sufficient.
10648 */
10649
10650 vm_map_lock_read(map);
10651
10652 /*
10653 * The madvise semantics require that the address range be fully
10654 * allocated with no holes. Otherwise, we're required to return
10655 * an error.
10656 */
10657
10658 if (!vm_map_range_check(map, start, end, &entry)) {
10659 vm_map_unlock_read(map);
10660 vm_page_stats_reusable.reuse_pages_failure++;
10661 return KERN_INVALID_ADDRESS;
10662 }
10663
10664 /*
10665 * Examine each vm_map_entry_t in the range.
10666 */
10667 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10668 entry = entry->vme_next) {
10669 /*
10670 * Sanity check on the VM map entry.
10671 */
10672 if (! vm_map_entry_is_reusable(entry)) {
10673 vm_map_unlock_read(map);
10674 vm_page_stats_reusable.reuse_pages_failure++;
10675 return KERN_INVALID_ADDRESS;
10676 }
10677
10678 /*
10679 * The first time through, the start address could be anywhere
10680 * within the vm_map_entry we found. So adjust the offset to
10681 * correspond.
10682 */
10683 if (entry->vme_start < start) {
10684 start_offset = start - entry->vme_start;
10685 } else {
10686 start_offset = 0;
10687 }
10688 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10689 start_offset += entry->offset;
10690 end_offset += entry->offset;
10691
10692 object = entry->object.vm_object;
10693 if (object != VM_OBJECT_NULL) {
10694 vm_object_lock(object);
10695 vm_object_reuse_pages(object, start_offset, end_offset,
10696 TRUE);
10697 vm_object_unlock(object);
10698 }
10699
10700 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10701 /*
10702 * XXX
10703 * We do not hold the VM map exclusively here.
10704 * The "alias" field is not that critical, so it's
10705 * safe to update it here, as long as it is the only
10706 * one that can be modified while holding the VM map
10707 * "shared".
10708 */
10709 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10710 }
10711 }
10712
10713 vm_map_unlock_read(map);
10714 vm_page_stats_reusable.reuse_pages_success++;
10715 return KERN_SUCCESS;
10716 }
10717
10718
10719 static kern_return_t
10720 vm_map_reusable_pages(
10721 vm_map_t map,
10722 vm_map_offset_t start,
10723 vm_map_offset_t end)
10724 {
10725 vm_map_entry_t entry;
10726 vm_object_t object;
10727 vm_object_offset_t start_offset, end_offset;
10728
10729 /*
10730 * The MADV_REUSABLE operation doesn't require any changes to the
10731 * vm_map_entry_t's, so the read lock is sufficient.
10732 */
10733
10734 vm_map_lock_read(map);
10735
10736 /*
10737 * The madvise semantics require that the address range be fully
10738 * allocated with no holes. Otherwise, we're required to return
10739 * an error.
10740 */
10741
10742 if (!vm_map_range_check(map, start, end, &entry)) {
10743 vm_map_unlock_read(map);
10744 vm_page_stats_reusable.reusable_pages_failure++;
10745 return KERN_INVALID_ADDRESS;
10746 }
10747
10748 /*
10749 * Examine each vm_map_entry_t in the range.
10750 */
10751 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10752 entry = entry->vme_next) {
10753 int kill_pages = 0;
10754
10755 /*
10756 * Sanity check on the VM map entry.
10757 */
10758 if (! vm_map_entry_is_reusable(entry)) {
10759 vm_map_unlock_read(map);
10760 vm_page_stats_reusable.reusable_pages_failure++;
10761 return KERN_INVALID_ADDRESS;
10762 }
10763
10764 /*
10765 * The first time through, the start address could be anywhere
10766 * within the vm_map_entry we found. So adjust the offset to
10767 * correspond.
10768 */
10769 if (entry->vme_start < start) {
10770 start_offset = start - entry->vme_start;
10771 } else {
10772 start_offset = 0;
10773 }
10774 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10775 start_offset += entry->offset;
10776 end_offset += entry->offset;
10777
10778 object = entry->object.vm_object;
10779 if (object == VM_OBJECT_NULL)
10780 continue;
10781
10782
10783 vm_object_lock(object);
10784 if (object->ref_count == 1 && !object->shadow)
10785 kill_pages = 1;
10786 else
10787 kill_pages = -1;
10788 if (kill_pages != -1) {
10789 vm_object_deactivate_pages(object,
10790 start_offset,
10791 end_offset - start_offset,
10792 kill_pages,
10793 TRUE /*reusable_pages*/);
10794 } else {
10795 vm_page_stats_reusable.reusable_pages_shared++;
10796 }
10797 vm_object_unlock(object);
10798
10799 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10800 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10801 /*
10802 * XXX
10803 * We do not hold the VM map exclusively here.
10804 * The "alias" field is not that critical, so it's
10805 * safe to update it here, as long as it is the only
10806 * one that can be modified while holding the VM map
10807 * "shared".
10808 */
10809 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10810 }
10811 }
10812
10813 vm_map_unlock_read(map);
10814 vm_page_stats_reusable.reusable_pages_success++;
10815 return KERN_SUCCESS;
10816 }
10817
10818
10819 static kern_return_t
10820 vm_map_can_reuse(
10821 vm_map_t map,
10822 vm_map_offset_t start,
10823 vm_map_offset_t end)
10824 {
10825 vm_map_entry_t entry;
10826
10827 /*
10828 * The MADV_REUSABLE operation doesn't require any changes to the
10829 * vm_map_entry_t's, so the read lock is sufficient.
10830 */
10831
10832 vm_map_lock_read(map);
10833
10834 /*
10835 * The madvise semantics require that the address range be fully
10836 * allocated with no holes. Otherwise, we're required to return
10837 * an error.
10838 */
10839
10840 if (!vm_map_range_check(map, start, end, &entry)) {
10841 vm_map_unlock_read(map);
10842 vm_page_stats_reusable.can_reuse_failure++;
10843 return KERN_INVALID_ADDRESS;
10844 }
10845
10846 /*
10847 * Examine each vm_map_entry_t in the range.
10848 */
10849 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10850 entry = entry->vme_next) {
10851 /*
10852 * Sanity check on the VM map entry.
10853 */
10854 if (! vm_map_entry_is_reusable(entry)) {
10855 vm_map_unlock_read(map);
10856 vm_page_stats_reusable.can_reuse_failure++;
10857 return KERN_INVALID_ADDRESS;
10858 }
10859 }
10860
10861 vm_map_unlock_read(map);
10862 vm_page_stats_reusable.can_reuse_success++;
10863 return KERN_SUCCESS;
10864 }
10865
10866
10867
10868 #include <mach_kdb.h>
10869 #if MACH_KDB
10870 #include <ddb/db_output.h>
10871 #include <vm/vm_print.h>
10872
10873 #define printf db_printf
10874
10875 /*
10876 * Forward declarations for internal functions.
10877 */
10878 extern void vm_map_links_print(
10879 struct vm_map_links *links);
10880
10881 extern void vm_map_header_print(
10882 struct vm_map_header *header);
10883
10884 extern void vm_map_entry_print(
10885 vm_map_entry_t entry);
10886
10887 extern void vm_follow_entry(
10888 vm_map_entry_t entry);
10889
10890 extern void vm_follow_map(
10891 vm_map_t map);
10892
10893 /*
10894 * vm_map_links_print: [ debug ]
10895 */
10896 void
10897 vm_map_links_print(
10898 struct vm_map_links *links)
10899 {
10900 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10901 links->prev,
10902 links->next,
10903 (unsigned long long)links->start,
10904 (unsigned long long)links->end);
10905 }
10906
10907 /*
10908 * vm_map_header_print: [ debug ]
10909 */
10910 void
10911 vm_map_header_print(
10912 struct vm_map_header *header)
10913 {
10914 vm_map_links_print(&header->links);
10915 iprintf("nentries = %08X, %sentries_pageable\n",
10916 header->nentries,
10917 (header->entries_pageable ? "" : "!"));
10918 }
10919
10920 /*
10921 * vm_follow_entry: [ debug ]
10922 */
10923 void
10924 vm_follow_entry(
10925 vm_map_entry_t entry)
10926 {
10927 int shadows;
10928
10929 iprintf("map entry %08X\n", entry);
10930
10931 db_indent += 2;
10932
10933 shadows = vm_follow_object(entry->object.vm_object);
10934 iprintf("Total objects : %d\n",shadows);
10935
10936 db_indent -= 2;
10937 }
10938
10939 /*
10940 * vm_map_entry_print: [ debug ]
10941 */
10942 void
10943 vm_map_entry_print(
10944 register vm_map_entry_t entry)
10945 {
10946 static const char *inheritance_name[4] =
10947 { "share", "copy", "none", "?"};
10948 static const char *behavior_name[4] =
10949 { "dflt", "rand", "seqtl", "rseqntl" };
10950
10951 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10952
10953 db_indent += 2;
10954
10955 vm_map_links_print(&entry->links);
10956
10957 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10958 (unsigned long long)entry->vme_start,
10959 (unsigned long long)entry->vme_end,
10960 entry->protection,
10961 entry->max_protection,
10962 inheritance_name[(entry->inheritance & 0x3)]);
10963
10964 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10965 behavior_name[(entry->behavior & 0x3)],
10966 entry->wired_count,
10967 entry->user_wired_count);
10968 iprintf("%sin_transition, %sneeds_wakeup\n",
10969 (entry->in_transition ? "" : "!"),
10970 (entry->needs_wakeup ? "" : "!"));
10971
10972 if (entry->is_sub_map) {
10973 iprintf("submap = %08X - offset = %016llX\n",
10974 entry->object.sub_map,
10975 (unsigned long long)entry->offset);
10976 } else {
10977 iprintf("object = %08X offset = %016llX - ",
10978 entry->object.vm_object,
10979 (unsigned long long)entry->offset);
10980 printf("%sis_shared, %sneeds_copy\n",
10981 (entry->is_shared ? "" : "!"),
10982 (entry->needs_copy ? "" : "!"));
10983 }
10984
10985 db_indent -= 2;
10986 }
10987
10988 /*
10989 * vm_follow_map: [ debug ]
10990 */
10991 void
10992 vm_follow_map(
10993 vm_map_t map)
10994 {
10995 register vm_map_entry_t entry;
10996
10997 iprintf("task map %08X\n", map);
10998
10999 db_indent += 2;
11000
11001 for (entry = vm_map_first_entry(map);
11002 entry && entry != vm_map_to_entry(map);
11003 entry = entry->vme_next) {
11004 vm_follow_entry(entry);
11005 }
11006
11007 db_indent -= 2;
11008 }
11009
11010 /*
11011 * vm_map_print: [ debug ]
11012 */
11013 void
11014 vm_map_print(
11015 db_addr_t inmap)
11016 {
11017 register vm_map_entry_t entry;
11018 vm_map_t map;
11019 #if TASK_SWAPPER
11020 char *swstate;
11021 #endif /* TASK_SWAPPER */
11022
11023 map = (vm_map_t)(long)
11024 inmap; /* Make sure we have the right type */
11025
11026 iprintf("task map %08X\n", map);
11027
11028 db_indent += 2;
11029
11030 vm_map_header_print(&map->hdr);
11031
11032 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
11033 map->pmap,
11034 map->size,
11035 map->ref_count,
11036 map->hint,
11037 map->first_free);
11038
11039 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
11040 (map->wait_for_space ? "" : "!"),
11041 (map->wiring_required ? "" : "!"),
11042 map->timestamp);
11043
11044 #if TASK_SWAPPER
11045 switch (map->sw_state) {
11046 case MAP_SW_IN:
11047 swstate = "SW_IN";
11048 break;
11049 case MAP_SW_OUT:
11050 swstate = "SW_OUT";
11051 break;
11052 default:
11053 swstate = "????";
11054 break;
11055 }
11056 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
11057 #endif /* TASK_SWAPPER */
11058
11059 for (entry = vm_map_first_entry(map);
11060 entry && entry != vm_map_to_entry(map);
11061 entry = entry->vme_next) {
11062 vm_map_entry_print(entry);
11063 }
11064
11065 db_indent -= 2;
11066 }
11067
11068 /*
11069 * Routine: vm_map_copy_print
11070 * Purpose:
11071 * Pretty-print a copy object for ddb.
11072 */
11073
11074 void
11075 vm_map_copy_print(
11076 db_addr_t incopy)
11077 {
11078 vm_map_copy_t copy;
11079 vm_map_entry_t entry;
11080
11081 copy = (vm_map_copy_t)(long)
11082 incopy; /* Make sure we have the right type */
11083
11084 printf("copy object 0x%x\n", copy);
11085
11086 db_indent += 2;
11087
11088 iprintf("type=%d", copy->type);
11089 switch (copy->type) {
11090 case VM_MAP_COPY_ENTRY_LIST:
11091 printf("[entry_list]");
11092 break;
11093
11094 case VM_MAP_COPY_OBJECT:
11095 printf("[object]");
11096 break;
11097
11098 case VM_MAP_COPY_KERNEL_BUFFER:
11099 printf("[kernel_buffer]");
11100 break;
11101
11102 default:
11103 printf("[bad type]");
11104 break;
11105 }
11106 printf(", offset=0x%llx", (unsigned long long)copy->offset);
11107 printf(", size=0x%x\n", copy->size);
11108
11109 switch (copy->type) {
11110 case VM_MAP_COPY_ENTRY_LIST:
11111 vm_map_header_print(&copy->cpy_hdr);
11112 for (entry = vm_map_copy_first_entry(copy);
11113 entry && entry != vm_map_copy_to_entry(copy);
11114 entry = entry->vme_next) {
11115 vm_map_entry_print(entry);
11116 }
11117 break;
11118
11119 case VM_MAP_COPY_OBJECT:
11120 iprintf("object=0x%x\n", copy->cpy_object);
11121 break;
11122
11123 case VM_MAP_COPY_KERNEL_BUFFER:
11124 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11125 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11126 break;
11127
11128 }
11129
11130 db_indent -=2;
11131 }
11132
11133 /*
11134 * db_vm_map_total_size(map) [ debug ]
11135 *
11136 * return the total virtual size (in bytes) of the map
11137 */
11138 vm_map_size_t
11139 db_vm_map_total_size(
11140 db_addr_t inmap)
11141 {
11142 vm_map_entry_t entry;
11143 vm_map_size_t total;
11144 vm_map_t map;
11145
11146 map = (vm_map_t)(long)
11147 inmap; /* Make sure we have the right type */
11148
11149 total = 0;
11150 for (entry = vm_map_first_entry(map);
11151 entry != vm_map_to_entry(map);
11152 entry = entry->vme_next) {
11153 total += entry->vme_end - entry->vme_start;
11154 }
11155
11156 return total;
11157 }
11158
11159 #endif /* MACH_KDB */
11160
11161 /*
11162 * Routine: vm_map_entry_insert
11163 *
11164 * Descritpion: This routine inserts a new vm_entry in a locked map.
11165 */
11166 vm_map_entry_t
11167 vm_map_entry_insert(
11168 vm_map_t map,
11169 vm_map_entry_t insp_entry,
11170 vm_map_offset_t start,
11171 vm_map_offset_t end,
11172 vm_object_t object,
11173 vm_object_offset_t offset,
11174 boolean_t needs_copy,
11175 boolean_t is_shared,
11176 boolean_t in_transition,
11177 vm_prot_t cur_protection,
11178 vm_prot_t max_protection,
11179 vm_behavior_t behavior,
11180 vm_inherit_t inheritance,
11181 unsigned wired_count,
11182 boolean_t no_cache,
11183 boolean_t permanent,
11184 unsigned int superpage_size)
11185 {
11186 vm_map_entry_t new_entry;
11187
11188 assert(insp_entry != (vm_map_entry_t)0);
11189
11190 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11191
11192 new_entry->vme_start = start;
11193 new_entry->vme_end = end;
11194 assert(page_aligned(new_entry->vme_start));
11195 assert(page_aligned(new_entry->vme_end));
11196 assert(new_entry->vme_start < new_entry->vme_end);
11197
11198 new_entry->object.vm_object = object;
11199 new_entry->offset = offset;
11200 new_entry->is_shared = is_shared;
11201 new_entry->is_sub_map = FALSE;
11202 new_entry->needs_copy = needs_copy;
11203 new_entry->in_transition = in_transition;
11204 new_entry->needs_wakeup = FALSE;
11205 new_entry->inheritance = inheritance;
11206 new_entry->protection = cur_protection;
11207 new_entry->max_protection = max_protection;
11208 new_entry->behavior = behavior;
11209 new_entry->wired_count = wired_count;
11210 new_entry->user_wired_count = 0;
11211 new_entry->use_pmap = FALSE;
11212 new_entry->alias = 0;
11213 new_entry->zero_wired_pages = FALSE;
11214 new_entry->no_cache = no_cache;
11215 new_entry->permanent = permanent;
11216 new_entry->superpage_size = superpage_size;
11217 new_entry->used_for_jit = FALSE;
11218
11219 /*
11220 * Insert the new entry into the list.
11221 */
11222
11223 vm_map_store_entry_link(map, insp_entry, new_entry);
11224 map->size += end - start;
11225
11226 /*
11227 * Update the free space hint and the lookup hint.
11228 */
11229
11230 SAVE_HINT_MAP_WRITE(map, new_entry);
11231 return new_entry;
11232 }
11233
11234 /*
11235 * Routine: vm_map_remap_extract
11236 *
11237 * Descritpion: This routine returns a vm_entry list from a map.
11238 */
11239 static kern_return_t
11240 vm_map_remap_extract(
11241 vm_map_t map,
11242 vm_map_offset_t addr,
11243 vm_map_size_t size,
11244 boolean_t copy,
11245 struct vm_map_header *map_header,
11246 vm_prot_t *cur_protection,
11247 vm_prot_t *max_protection,
11248 /* What, no behavior? */
11249 vm_inherit_t inheritance,
11250 boolean_t pageable)
11251 {
11252 kern_return_t result;
11253 vm_map_size_t mapped_size;
11254 vm_map_size_t tmp_size;
11255 vm_map_entry_t src_entry; /* result of last map lookup */
11256 vm_map_entry_t new_entry;
11257 vm_object_offset_t offset;
11258 vm_map_offset_t map_address;
11259 vm_map_offset_t src_start; /* start of entry to map */
11260 vm_map_offset_t src_end; /* end of region to be mapped */
11261 vm_object_t object;
11262 vm_map_version_t version;
11263 boolean_t src_needs_copy;
11264 boolean_t new_entry_needs_copy;
11265
11266 assert(map != VM_MAP_NULL);
11267 assert(size != 0 && size == vm_map_round_page(size));
11268 assert(inheritance == VM_INHERIT_NONE ||
11269 inheritance == VM_INHERIT_COPY ||
11270 inheritance == VM_INHERIT_SHARE);
11271
11272 /*
11273 * Compute start and end of region.
11274 */
11275 src_start = vm_map_trunc_page(addr);
11276 src_end = vm_map_round_page(src_start + size);
11277
11278 /*
11279 * Initialize map_header.
11280 */
11281 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11282 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11283 map_header->nentries = 0;
11284 map_header->entries_pageable = pageable;
11285
11286 vm_map_store_init( map_header );
11287
11288 *cur_protection = VM_PROT_ALL;
11289 *max_protection = VM_PROT_ALL;
11290
11291 map_address = 0;
11292 mapped_size = 0;
11293 result = KERN_SUCCESS;
11294
11295 /*
11296 * The specified source virtual space might correspond to
11297 * multiple map entries, need to loop on them.
11298 */
11299 vm_map_lock(map);
11300 while (mapped_size != size) {
11301 vm_map_size_t entry_size;
11302
11303 /*
11304 * Find the beginning of the region.
11305 */
11306 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11307 result = KERN_INVALID_ADDRESS;
11308 break;
11309 }
11310
11311 if (src_start < src_entry->vme_start ||
11312 (mapped_size && src_start != src_entry->vme_start)) {
11313 result = KERN_INVALID_ADDRESS;
11314 break;
11315 }
11316
11317 tmp_size = size - mapped_size;
11318 if (src_end > src_entry->vme_end)
11319 tmp_size -= (src_end - src_entry->vme_end);
11320
11321 entry_size = (vm_map_size_t)(src_entry->vme_end -
11322 src_entry->vme_start);
11323
11324 if(src_entry->is_sub_map) {
11325 vm_map_reference(src_entry->object.sub_map);
11326 object = VM_OBJECT_NULL;
11327 } else {
11328 object = src_entry->object.vm_object;
11329
11330 if (object == VM_OBJECT_NULL) {
11331 object = vm_object_allocate(entry_size);
11332 src_entry->offset = 0;
11333 src_entry->object.vm_object = object;
11334 } else if (object->copy_strategy !=
11335 MEMORY_OBJECT_COPY_SYMMETRIC) {
11336 /*
11337 * We are already using an asymmetric
11338 * copy, and therefore we already have
11339 * the right object.
11340 */
11341 assert(!src_entry->needs_copy);
11342 } else if (src_entry->needs_copy || object->shadowed ||
11343 (object->internal && !object->true_share &&
11344 !src_entry->is_shared &&
11345 object->vo_size > entry_size)) {
11346
11347 vm_object_shadow(&src_entry->object.vm_object,
11348 &src_entry->offset,
11349 entry_size);
11350
11351 if (!src_entry->needs_copy &&
11352 (src_entry->protection & VM_PROT_WRITE)) {
11353 vm_prot_t prot;
11354
11355 prot = src_entry->protection & ~VM_PROT_WRITE;
11356
11357 if (override_nx(map, src_entry->alias) && prot)
11358 prot |= VM_PROT_EXECUTE;
11359
11360 if(map->mapped) {
11361 vm_object_pmap_protect(
11362 src_entry->object.vm_object,
11363 src_entry->offset,
11364 entry_size,
11365 PMAP_NULL,
11366 src_entry->vme_start,
11367 prot);
11368 } else {
11369 pmap_protect(vm_map_pmap(map),
11370 src_entry->vme_start,
11371 src_entry->vme_end,
11372 prot);
11373 }
11374 }
11375
11376 object = src_entry->object.vm_object;
11377 src_entry->needs_copy = FALSE;
11378 }
11379
11380
11381 vm_object_lock(object);
11382 vm_object_reference_locked(object); /* object ref. for new entry */
11383 if (object->copy_strategy ==
11384 MEMORY_OBJECT_COPY_SYMMETRIC) {
11385 object->copy_strategy =
11386 MEMORY_OBJECT_COPY_DELAY;
11387 }
11388 vm_object_unlock(object);
11389 }
11390
11391 offset = src_entry->offset + (src_start - src_entry->vme_start);
11392
11393 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11394 vm_map_entry_copy(new_entry, src_entry);
11395 new_entry->use_pmap = FALSE; /* clr address space specifics */
11396
11397 new_entry->vme_start = map_address;
11398 new_entry->vme_end = map_address + tmp_size;
11399 assert(new_entry->vme_start < new_entry->vme_end);
11400 new_entry->inheritance = inheritance;
11401 new_entry->offset = offset;
11402
11403 /*
11404 * The new region has to be copied now if required.
11405 */
11406 RestartCopy:
11407 if (!copy) {
11408 src_entry->is_shared = TRUE;
11409 new_entry->is_shared = TRUE;
11410 if (!(new_entry->is_sub_map))
11411 new_entry->needs_copy = FALSE;
11412
11413 } else if (src_entry->is_sub_map) {
11414 /* make this a COW sub_map if not already */
11415 new_entry->needs_copy = TRUE;
11416 object = VM_OBJECT_NULL;
11417 } else if (src_entry->wired_count == 0 &&
11418 vm_object_copy_quickly(&new_entry->object.vm_object,
11419 new_entry->offset,
11420 (new_entry->vme_end -
11421 new_entry->vme_start),
11422 &src_needs_copy,
11423 &new_entry_needs_copy)) {
11424
11425 new_entry->needs_copy = new_entry_needs_copy;
11426 new_entry->is_shared = FALSE;
11427
11428 /*
11429 * Handle copy_on_write semantics.
11430 */
11431 if (src_needs_copy && !src_entry->needs_copy) {
11432 vm_prot_t prot;
11433
11434 prot = src_entry->protection & ~VM_PROT_WRITE;
11435
11436 if (override_nx(map, src_entry->alias) && prot)
11437 prot |= VM_PROT_EXECUTE;
11438
11439 vm_object_pmap_protect(object,
11440 offset,
11441 entry_size,
11442 ((src_entry->is_shared
11443 || map->mapped) ?
11444 PMAP_NULL : map->pmap),
11445 src_entry->vme_start,
11446 prot);
11447
11448 src_entry->needs_copy = TRUE;
11449 }
11450 /*
11451 * Throw away the old object reference of the new entry.
11452 */
11453 vm_object_deallocate(object);
11454
11455 } else {
11456 new_entry->is_shared = FALSE;
11457
11458 /*
11459 * The map can be safely unlocked since we
11460 * already hold a reference on the object.
11461 *
11462 * Record the timestamp of the map for later
11463 * verification, and unlock the map.
11464 */
11465 version.main_timestamp = map->timestamp;
11466 vm_map_unlock(map); /* Increments timestamp once! */
11467
11468 /*
11469 * Perform the copy.
11470 */
11471 if (src_entry->wired_count > 0) {
11472 vm_object_lock(object);
11473 result = vm_object_copy_slowly(
11474 object,
11475 offset,
11476 entry_size,
11477 THREAD_UNINT,
11478 &new_entry->object.vm_object);
11479
11480 new_entry->offset = 0;
11481 new_entry->needs_copy = FALSE;
11482 } else {
11483 result = vm_object_copy_strategically(
11484 object,
11485 offset,
11486 entry_size,
11487 &new_entry->object.vm_object,
11488 &new_entry->offset,
11489 &new_entry_needs_copy);
11490
11491 new_entry->needs_copy = new_entry_needs_copy;
11492 }
11493
11494 /*
11495 * Throw away the old object reference of the new entry.
11496 */
11497 vm_object_deallocate(object);
11498
11499 if (result != KERN_SUCCESS &&
11500 result != KERN_MEMORY_RESTART_COPY) {
11501 _vm_map_entry_dispose(map_header, new_entry);
11502 break;
11503 }
11504
11505 /*
11506 * Verify that the map has not substantially
11507 * changed while the copy was being made.
11508 */
11509
11510 vm_map_lock(map);
11511 if (version.main_timestamp + 1 != map->timestamp) {
11512 /*
11513 * Simple version comparison failed.
11514 *
11515 * Retry the lookup and verify that the
11516 * same object/offset are still present.
11517 */
11518 vm_object_deallocate(new_entry->
11519 object.vm_object);
11520 _vm_map_entry_dispose(map_header, new_entry);
11521 if (result == KERN_MEMORY_RESTART_COPY)
11522 result = KERN_SUCCESS;
11523 continue;
11524 }
11525
11526 if (result == KERN_MEMORY_RESTART_COPY) {
11527 vm_object_reference(object);
11528 goto RestartCopy;
11529 }
11530 }
11531
11532 _vm_map_store_entry_link(map_header,
11533 map_header->links.prev, new_entry);
11534
11535 /*Protections for submap mapping are irrelevant here*/
11536 if( !src_entry->is_sub_map ) {
11537 *cur_protection &= src_entry->protection;
11538 *max_protection &= src_entry->max_protection;
11539 }
11540 map_address += tmp_size;
11541 mapped_size += tmp_size;
11542 src_start += tmp_size;
11543
11544 } /* end while */
11545
11546 vm_map_unlock(map);
11547 if (result != KERN_SUCCESS) {
11548 /*
11549 * Free all allocated elements.
11550 */
11551 for (src_entry = map_header->links.next;
11552 src_entry != (struct vm_map_entry *)&map_header->links;
11553 src_entry = new_entry) {
11554 new_entry = src_entry->vme_next;
11555 _vm_map_store_entry_unlink(map_header, src_entry);
11556 vm_object_deallocate(src_entry->object.vm_object);
11557 _vm_map_entry_dispose(map_header, src_entry);
11558 }
11559 }
11560 return result;
11561 }
11562
11563 /*
11564 * Routine: vm_remap
11565 *
11566 * Map portion of a task's address space.
11567 * Mapped region must not overlap more than
11568 * one vm memory object. Protections and
11569 * inheritance attributes remain the same
11570 * as in the original task and are out parameters.
11571 * Source and Target task can be identical
11572 * Other attributes are identical as for vm_map()
11573 */
11574 kern_return_t
11575 vm_map_remap(
11576 vm_map_t target_map,
11577 vm_map_address_t *address,
11578 vm_map_size_t size,
11579 vm_map_offset_t mask,
11580 int flags,
11581 vm_map_t src_map,
11582 vm_map_offset_t memory_address,
11583 boolean_t copy,
11584 vm_prot_t *cur_protection,
11585 vm_prot_t *max_protection,
11586 vm_inherit_t inheritance)
11587 {
11588 kern_return_t result;
11589 vm_map_entry_t entry;
11590 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11591 vm_map_entry_t new_entry;
11592 struct vm_map_header map_header;
11593
11594 if (target_map == VM_MAP_NULL)
11595 return KERN_INVALID_ARGUMENT;
11596
11597 switch (inheritance) {
11598 case VM_INHERIT_NONE:
11599 case VM_INHERIT_COPY:
11600 case VM_INHERIT_SHARE:
11601 if (size != 0 && src_map != VM_MAP_NULL)
11602 break;
11603 /*FALL THRU*/
11604 default:
11605 return KERN_INVALID_ARGUMENT;
11606 }
11607
11608 size = vm_map_round_page(size);
11609
11610 result = vm_map_remap_extract(src_map, memory_address,
11611 size, copy, &map_header,
11612 cur_protection,
11613 max_protection,
11614 inheritance,
11615 target_map->hdr.
11616 entries_pageable);
11617
11618 if (result != KERN_SUCCESS) {
11619 return result;
11620 }
11621
11622 /*
11623 * Allocate/check a range of free virtual address
11624 * space for the target
11625 */
11626 *address = vm_map_trunc_page(*address);
11627 vm_map_lock(target_map);
11628 result = vm_map_remap_range_allocate(target_map, address, size,
11629 mask, flags, &insp_entry);
11630
11631 for (entry = map_header.links.next;
11632 entry != (struct vm_map_entry *)&map_header.links;
11633 entry = new_entry) {
11634 new_entry = entry->vme_next;
11635 _vm_map_store_entry_unlink(&map_header, entry);
11636 if (result == KERN_SUCCESS) {
11637 entry->vme_start += *address;
11638 entry->vme_end += *address;
11639 vm_map_store_entry_link(target_map, insp_entry, entry);
11640 insp_entry = entry;
11641 } else {
11642 if (!entry->is_sub_map) {
11643 vm_object_deallocate(entry->object.vm_object);
11644 } else {
11645 vm_map_deallocate(entry->object.sub_map);
11646 }
11647 _vm_map_entry_dispose(&map_header, entry);
11648 }
11649 }
11650
11651 if( target_map->disable_vmentry_reuse == TRUE) {
11652 if( target_map->highest_entry_end < insp_entry->vme_end ){
11653 target_map->highest_entry_end = insp_entry->vme_end;
11654 }
11655 }
11656
11657 if (result == KERN_SUCCESS) {
11658 target_map->size += size;
11659 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11660 }
11661 vm_map_unlock(target_map);
11662
11663 if (result == KERN_SUCCESS && target_map->wiring_required)
11664 result = vm_map_wire(target_map, *address,
11665 *address + size, *cur_protection, TRUE);
11666 return result;
11667 }
11668
11669 /*
11670 * Routine: vm_map_remap_range_allocate
11671 *
11672 * Description:
11673 * Allocate a range in the specified virtual address map.
11674 * returns the address and the map entry just before the allocated
11675 * range
11676 *
11677 * Map must be locked.
11678 */
11679
11680 static kern_return_t
11681 vm_map_remap_range_allocate(
11682 vm_map_t map,
11683 vm_map_address_t *address, /* IN/OUT */
11684 vm_map_size_t size,
11685 vm_map_offset_t mask,
11686 int flags,
11687 vm_map_entry_t *map_entry) /* OUT */
11688 {
11689 vm_map_entry_t entry;
11690 vm_map_offset_t start;
11691 vm_map_offset_t end;
11692 kern_return_t kr;
11693
11694 StartAgain: ;
11695
11696 start = *address;
11697
11698 if (flags & VM_FLAGS_ANYWHERE)
11699 {
11700 /*
11701 * Calculate the first possible address.
11702 */
11703
11704 if (start < map->min_offset)
11705 start = map->min_offset;
11706 if (start > map->max_offset)
11707 return(KERN_NO_SPACE);
11708
11709 /*
11710 * Look for the first possible address;
11711 * if there's already something at this
11712 * address, we have to start after it.
11713 */
11714
11715 if( map->disable_vmentry_reuse == TRUE) {
11716 VM_MAP_HIGHEST_ENTRY(map, entry, start);
11717 } else {
11718 assert(first_free_is_valid(map));
11719 if (start == map->min_offset) {
11720 if ((entry = map->first_free) != vm_map_to_entry(map))
11721 start = entry->vme_end;
11722 } else {
11723 vm_map_entry_t tmp_entry;
11724 if (vm_map_lookup_entry(map, start, &tmp_entry))
11725 start = tmp_entry->vme_end;
11726 entry = tmp_entry;
11727 }
11728 }
11729
11730 /*
11731 * In any case, the "entry" always precedes
11732 * the proposed new region throughout the
11733 * loop:
11734 */
11735
11736 while (TRUE) {
11737 register vm_map_entry_t next;
11738
11739 /*
11740 * Find the end of the proposed new region.
11741 * Be sure we didn't go beyond the end, or
11742 * wrap around the address.
11743 */
11744
11745 end = ((start + mask) & ~mask);
11746 if (end < start)
11747 return(KERN_NO_SPACE);
11748 start = end;
11749 end += size;
11750
11751 if ((end > map->max_offset) || (end < start)) {
11752 if (map->wait_for_space) {
11753 if (size <= (map->max_offset -
11754 map->min_offset)) {
11755 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11756 vm_map_unlock(map);
11757 thread_block(THREAD_CONTINUE_NULL);
11758 vm_map_lock(map);
11759 goto StartAgain;
11760 }
11761 }
11762
11763 return(KERN_NO_SPACE);
11764 }
11765
11766 /*
11767 * If there are no more entries, we must win.
11768 */
11769
11770 next = entry->vme_next;
11771 if (next == vm_map_to_entry(map))
11772 break;
11773
11774 /*
11775 * If there is another entry, it must be
11776 * after the end of the potential new region.
11777 */
11778
11779 if (next->vme_start >= end)
11780 break;
11781
11782 /*
11783 * Didn't fit -- move to the next entry.
11784 */
11785
11786 entry = next;
11787 start = entry->vme_end;
11788 }
11789 *address = start;
11790 } else {
11791 vm_map_entry_t temp_entry;
11792
11793 /*
11794 * Verify that:
11795 * the address doesn't itself violate
11796 * the mask requirement.
11797 */
11798
11799 if ((start & mask) != 0)
11800 return(KERN_NO_SPACE);
11801
11802
11803 /*
11804 * ... the address is within bounds
11805 */
11806
11807 end = start + size;
11808
11809 if ((start < map->min_offset) ||
11810 (end > map->max_offset) ||
11811 (start >= end)) {
11812 return(KERN_INVALID_ADDRESS);
11813 }
11814
11815 /*
11816 * If we're asked to overwrite whatever was mapped in that
11817 * range, first deallocate that range.
11818 */
11819 if (flags & VM_FLAGS_OVERWRITE) {
11820 vm_map_t zap_map;
11821
11822 /*
11823 * We use a "zap_map" to avoid having to unlock
11824 * the "map" in vm_map_delete(), which would compromise
11825 * the atomicity of the "deallocate" and then "remap"
11826 * combination.
11827 */
11828 zap_map = vm_map_create(PMAP_NULL,
11829 start,
11830 end - start,
11831 map->hdr.entries_pageable);
11832 if (zap_map == VM_MAP_NULL) {
11833 return KERN_RESOURCE_SHORTAGE;
11834 }
11835
11836 kr = vm_map_delete(map, start, end,
11837 VM_MAP_REMOVE_SAVE_ENTRIES,
11838 zap_map);
11839 if (kr == KERN_SUCCESS) {
11840 vm_map_destroy(zap_map,
11841 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11842 zap_map = VM_MAP_NULL;
11843 }
11844 }
11845
11846 /*
11847 * ... the starting address isn't allocated
11848 */
11849
11850 if (vm_map_lookup_entry(map, start, &temp_entry))
11851 return(KERN_NO_SPACE);
11852
11853 entry = temp_entry;
11854
11855 /*
11856 * ... the next region doesn't overlap the
11857 * end point.
11858 */
11859
11860 if ((entry->vme_next != vm_map_to_entry(map)) &&
11861 (entry->vme_next->vme_start < end))
11862 return(KERN_NO_SPACE);
11863 }
11864 *map_entry = entry;
11865 return(KERN_SUCCESS);
11866 }
11867
11868 /*
11869 * vm_map_switch:
11870 *
11871 * Set the address map for the current thread to the specified map
11872 */
11873
11874 vm_map_t
11875 vm_map_switch(
11876 vm_map_t map)
11877 {
11878 int mycpu;
11879 thread_t thread = current_thread();
11880 vm_map_t oldmap = thread->map;
11881
11882 mp_disable_preemption();
11883 mycpu = cpu_number();
11884
11885 /*
11886 * Deactivate the current map and activate the requested map
11887 */
11888 PMAP_SWITCH_USER(thread, map, mycpu);
11889
11890 mp_enable_preemption();
11891 return(oldmap);
11892 }
11893
11894
11895 /*
11896 * Routine: vm_map_write_user
11897 *
11898 * Description:
11899 * Copy out data from a kernel space into space in the
11900 * destination map. The space must already exist in the
11901 * destination map.
11902 * NOTE: This routine should only be called by threads
11903 * which can block on a page fault. i.e. kernel mode user
11904 * threads.
11905 *
11906 */
11907 kern_return_t
11908 vm_map_write_user(
11909 vm_map_t map,
11910 void *src_p,
11911 vm_map_address_t dst_addr,
11912 vm_size_t size)
11913 {
11914 kern_return_t kr = KERN_SUCCESS;
11915
11916 if(current_map() == map) {
11917 if (copyout(src_p, dst_addr, size)) {
11918 kr = KERN_INVALID_ADDRESS;
11919 }
11920 } else {
11921 vm_map_t oldmap;
11922
11923 /* take on the identity of the target map while doing */
11924 /* the transfer */
11925
11926 vm_map_reference(map);
11927 oldmap = vm_map_switch(map);
11928 if (copyout(src_p, dst_addr, size)) {
11929 kr = KERN_INVALID_ADDRESS;
11930 }
11931 vm_map_switch(oldmap);
11932 vm_map_deallocate(map);
11933 }
11934 return kr;
11935 }
11936
11937 /*
11938 * Routine: vm_map_read_user
11939 *
11940 * Description:
11941 * Copy in data from a user space source map into the
11942 * kernel map. The space must already exist in the
11943 * kernel map.
11944 * NOTE: This routine should only be called by threads
11945 * which can block on a page fault. i.e. kernel mode user
11946 * threads.
11947 *
11948 */
11949 kern_return_t
11950 vm_map_read_user(
11951 vm_map_t map,
11952 vm_map_address_t src_addr,
11953 void *dst_p,
11954 vm_size_t size)
11955 {
11956 kern_return_t kr = KERN_SUCCESS;
11957
11958 if(current_map() == map) {
11959 if (copyin(src_addr, dst_p, size)) {
11960 kr = KERN_INVALID_ADDRESS;
11961 }
11962 } else {
11963 vm_map_t oldmap;
11964
11965 /* take on the identity of the target map while doing */
11966 /* the transfer */
11967
11968 vm_map_reference(map);
11969 oldmap = vm_map_switch(map);
11970 if (copyin(src_addr, dst_p, size)) {
11971 kr = KERN_INVALID_ADDRESS;
11972 }
11973 vm_map_switch(oldmap);
11974 vm_map_deallocate(map);
11975 }
11976 return kr;
11977 }
11978
11979
11980 /*
11981 * vm_map_check_protection:
11982 *
11983 * Assert that the target map allows the specified
11984 * privilege on the entire address region given.
11985 * The entire region must be allocated.
11986 */
11987 boolean_t
11988 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11989 vm_map_offset_t end, vm_prot_t protection)
11990 {
11991 vm_map_entry_t entry;
11992 vm_map_entry_t tmp_entry;
11993
11994 vm_map_lock(map);
11995
11996 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11997 {
11998 vm_map_unlock(map);
11999 return (FALSE);
12000 }
12001
12002 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12003 vm_map_unlock(map);
12004 return(FALSE);
12005 }
12006
12007 entry = tmp_entry;
12008
12009 while (start < end) {
12010 if (entry == vm_map_to_entry(map)) {
12011 vm_map_unlock(map);
12012 return(FALSE);
12013 }
12014
12015 /*
12016 * No holes allowed!
12017 */
12018
12019 if (start < entry->vme_start) {
12020 vm_map_unlock(map);
12021 return(FALSE);
12022 }
12023
12024 /*
12025 * Check protection associated with entry.
12026 */
12027
12028 if ((entry->protection & protection) != protection) {
12029 vm_map_unlock(map);
12030 return(FALSE);
12031 }
12032
12033 /* go to next entry */
12034
12035 start = entry->vme_end;
12036 entry = entry->vme_next;
12037 }
12038 vm_map_unlock(map);
12039 return(TRUE);
12040 }
12041
12042 kern_return_t
12043 vm_map_purgable_control(
12044 vm_map_t map,
12045 vm_map_offset_t address,
12046 vm_purgable_t control,
12047 int *state)
12048 {
12049 vm_map_entry_t entry;
12050 vm_object_t object;
12051 kern_return_t kr;
12052
12053 /*
12054 * Vet all the input parameters and current type and state of the
12055 * underlaying object. Return with an error if anything is amiss.
12056 */
12057 if (map == VM_MAP_NULL)
12058 return(KERN_INVALID_ARGUMENT);
12059
12060 if (control != VM_PURGABLE_SET_STATE &&
12061 control != VM_PURGABLE_GET_STATE &&
12062 control != VM_PURGABLE_PURGE_ALL)
12063 return(KERN_INVALID_ARGUMENT);
12064
12065 if (control == VM_PURGABLE_PURGE_ALL) {
12066 vm_purgeable_object_purge_all();
12067 return KERN_SUCCESS;
12068 }
12069
12070 if (control == VM_PURGABLE_SET_STATE &&
12071 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12072 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12073 return(KERN_INVALID_ARGUMENT);
12074
12075 vm_map_lock_read(map);
12076
12077 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12078
12079 /*
12080 * Must pass a valid non-submap address.
12081 */
12082 vm_map_unlock_read(map);
12083 return(KERN_INVALID_ADDRESS);
12084 }
12085
12086 if ((entry->protection & VM_PROT_WRITE) == 0) {
12087 /*
12088 * Can't apply purgable controls to something you can't write.
12089 */
12090 vm_map_unlock_read(map);
12091 return(KERN_PROTECTION_FAILURE);
12092 }
12093
12094 object = entry->object.vm_object;
12095 if (object == VM_OBJECT_NULL) {
12096 /*
12097 * Object must already be present or it can't be purgable.
12098 */
12099 vm_map_unlock_read(map);
12100 return KERN_INVALID_ARGUMENT;
12101 }
12102
12103 vm_object_lock(object);
12104
12105 if (entry->offset != 0 ||
12106 entry->vme_end - entry->vme_start != object->vo_size) {
12107 /*
12108 * Can only apply purgable controls to the whole (existing)
12109 * object at once.
12110 */
12111 vm_map_unlock_read(map);
12112 vm_object_unlock(object);
12113 return KERN_INVALID_ARGUMENT;
12114 }
12115
12116 vm_map_unlock_read(map);
12117
12118 kr = vm_object_purgable_control(object, control, state);
12119
12120 vm_object_unlock(object);
12121
12122 return kr;
12123 }
12124
12125 kern_return_t
12126 vm_map_page_query_internal(
12127 vm_map_t target_map,
12128 vm_map_offset_t offset,
12129 int *disposition,
12130 int *ref_count)
12131 {
12132 kern_return_t kr;
12133 vm_page_info_basic_data_t info;
12134 mach_msg_type_number_t count;
12135
12136 count = VM_PAGE_INFO_BASIC_COUNT;
12137 kr = vm_map_page_info(target_map,
12138 offset,
12139 VM_PAGE_INFO_BASIC,
12140 (vm_page_info_t) &info,
12141 &count);
12142 if (kr == KERN_SUCCESS) {
12143 *disposition = info.disposition;
12144 *ref_count = info.ref_count;
12145 } else {
12146 *disposition = 0;
12147 *ref_count = 0;
12148 }
12149
12150 return kr;
12151 }
12152
12153 kern_return_t
12154 vm_map_page_info(
12155 vm_map_t map,
12156 vm_map_offset_t offset,
12157 vm_page_info_flavor_t flavor,
12158 vm_page_info_t info,
12159 mach_msg_type_number_t *count)
12160 {
12161 vm_map_entry_t map_entry;
12162 vm_object_t object;
12163 vm_page_t m;
12164 kern_return_t kr;
12165 kern_return_t retval = KERN_SUCCESS;
12166 boolean_t top_object;
12167 int disposition;
12168 int ref_count;
12169 vm_object_id_t object_id;
12170 vm_page_info_basic_t basic_info;
12171 int depth;
12172 vm_map_offset_t offset_in_page;
12173
12174 switch (flavor) {
12175 case VM_PAGE_INFO_BASIC:
12176 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12177 /*
12178 * The "vm_page_info_basic_data" structure was not
12179 * properly padded, so allow the size to be off by
12180 * one to maintain backwards binary compatibility...
12181 */
12182 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12183 return KERN_INVALID_ARGUMENT;
12184 }
12185 break;
12186 default:
12187 return KERN_INVALID_ARGUMENT;
12188 }
12189
12190 disposition = 0;
12191 ref_count = 0;
12192 object_id = 0;
12193 top_object = TRUE;
12194 depth = 0;
12195
12196 retval = KERN_SUCCESS;
12197 offset_in_page = offset & PAGE_MASK;
12198 offset = vm_map_trunc_page(offset);
12199
12200 vm_map_lock_read(map);
12201
12202 /*
12203 * First, find the map entry covering "offset", going down
12204 * submaps if necessary.
12205 */
12206 for (;;) {
12207 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12208 vm_map_unlock_read(map);
12209 return KERN_INVALID_ADDRESS;
12210 }
12211 /* compute offset from this map entry's start */
12212 offset -= map_entry->vme_start;
12213 /* compute offset into this map entry's object (or submap) */
12214 offset += map_entry->offset;
12215
12216 if (map_entry->is_sub_map) {
12217 vm_map_t sub_map;
12218
12219 sub_map = map_entry->object.sub_map;
12220 vm_map_lock_read(sub_map);
12221 vm_map_unlock_read(map);
12222
12223 map = sub_map;
12224
12225 ref_count = MAX(ref_count, map->ref_count);
12226 continue;
12227 }
12228 break;
12229 }
12230
12231 object = map_entry->object.vm_object;
12232 if (object == VM_OBJECT_NULL) {
12233 /* no object -> no page */
12234 vm_map_unlock_read(map);
12235 goto done;
12236 }
12237
12238 vm_object_lock(object);
12239 vm_map_unlock_read(map);
12240
12241 /*
12242 * Go down the VM object shadow chain until we find the page
12243 * we're looking for.
12244 */
12245 for (;;) {
12246 ref_count = MAX(ref_count, object->ref_count);
12247
12248 m = vm_page_lookup(object, offset);
12249
12250 if (m != VM_PAGE_NULL) {
12251 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12252 break;
12253 } else {
12254 #if MACH_PAGEMAP
12255 if (object->existence_map) {
12256 if (vm_external_state_get(object->existence_map,
12257 offset) ==
12258 VM_EXTERNAL_STATE_EXISTS) {
12259 /*
12260 * this page has been paged out
12261 */
12262 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12263 break;
12264 }
12265 } else
12266 #endif
12267 {
12268 if (object->internal &&
12269 object->alive &&
12270 !object->terminating &&
12271 object->pager_ready) {
12272
12273 memory_object_t pager;
12274
12275 vm_object_paging_begin(object);
12276 pager = object->pager;
12277 vm_object_unlock(object);
12278
12279 /*
12280 * Ask the default pager if
12281 * it has this page.
12282 */
12283 kr = memory_object_data_request(
12284 pager,
12285 offset + object->paging_offset,
12286 0, /* just poke the pager */
12287 VM_PROT_READ,
12288 NULL);
12289
12290 vm_object_lock(object);
12291 vm_object_paging_end(object);
12292
12293 if (kr == KERN_SUCCESS) {
12294 /* the default pager has it */
12295 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12296 break;
12297 }
12298 }
12299 }
12300
12301 if (object->shadow != VM_OBJECT_NULL) {
12302 vm_object_t shadow;
12303
12304 offset += object->vo_shadow_offset;
12305 shadow = object->shadow;
12306
12307 vm_object_lock(shadow);
12308 vm_object_unlock(object);
12309
12310 object = shadow;
12311 top_object = FALSE;
12312 depth++;
12313 } else {
12314 // if (!object->internal)
12315 // break;
12316 // retval = KERN_FAILURE;
12317 // goto done_with_object;
12318 break;
12319 }
12320 }
12321 }
12322 /* The ref_count is not strictly accurate, it measures the number */
12323 /* of entities holding a ref on the object, they may not be mapping */
12324 /* the object or may not be mapping the section holding the */
12325 /* target page but its still a ball park number and though an over- */
12326 /* count, it picks up the copy-on-write cases */
12327
12328 /* We could also get a picture of page sharing from pmap_attributes */
12329 /* but this would under count as only faulted-in mappings would */
12330 /* show up. */
12331
12332 if (top_object == TRUE && object->shadow)
12333 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12334
12335 if (! object->internal)
12336 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12337
12338 if (m == VM_PAGE_NULL)
12339 goto done_with_object;
12340
12341 if (m->fictitious) {
12342 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12343 goto done_with_object;
12344 }
12345 if (m->dirty || pmap_is_modified(m->phys_page))
12346 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12347
12348 if (m->reference || pmap_is_referenced(m->phys_page))
12349 disposition |= VM_PAGE_QUERY_PAGE_REF;
12350
12351 if (m->speculative)
12352 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12353
12354 if (m->cs_validated)
12355 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12356 if (m->cs_tainted)
12357 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12358
12359 done_with_object:
12360 vm_object_unlock(object);
12361 done:
12362
12363 switch (flavor) {
12364 case VM_PAGE_INFO_BASIC:
12365 basic_info = (vm_page_info_basic_t) info;
12366 basic_info->disposition = disposition;
12367 basic_info->ref_count = ref_count;
12368 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12369 basic_info->offset =
12370 (memory_object_offset_t) offset + offset_in_page;
12371 basic_info->depth = depth;
12372 break;
12373 }
12374
12375 return retval;
12376 }
12377
12378 /*
12379 * vm_map_msync
12380 *
12381 * Synchronises the memory range specified with its backing store
12382 * image by either flushing or cleaning the contents to the appropriate
12383 * memory manager engaging in a memory object synchronize dialog with
12384 * the manager. The client doesn't return until the manager issues
12385 * m_o_s_completed message. MIG Magically converts user task parameter
12386 * to the task's address map.
12387 *
12388 * interpretation of sync_flags
12389 * VM_SYNC_INVALIDATE - discard pages, only return precious
12390 * pages to manager.
12391 *
12392 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12393 * - discard pages, write dirty or precious
12394 * pages back to memory manager.
12395 *
12396 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12397 * - write dirty or precious pages back to
12398 * the memory manager.
12399 *
12400 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
12401 * is a hole in the region, and we would
12402 * have returned KERN_SUCCESS, return
12403 * KERN_INVALID_ADDRESS instead.
12404 *
12405 * NOTE
12406 * The memory object attributes have not yet been implemented, this
12407 * function will have to deal with the invalidate attribute
12408 *
12409 * RETURNS
12410 * KERN_INVALID_TASK Bad task parameter
12411 * KERN_INVALID_ARGUMENT both sync and async were specified.
12412 * KERN_SUCCESS The usual.
12413 * KERN_INVALID_ADDRESS There was a hole in the region.
12414 */
12415
12416 kern_return_t
12417 vm_map_msync(
12418 vm_map_t map,
12419 vm_map_address_t address,
12420 vm_map_size_t size,
12421 vm_sync_t sync_flags)
12422 {
12423 msync_req_t msr;
12424 msync_req_t new_msr;
12425 queue_chain_t req_q; /* queue of requests for this msync */
12426 vm_map_entry_t entry;
12427 vm_map_size_t amount_left;
12428 vm_object_offset_t offset;
12429 boolean_t do_sync_req;
12430 boolean_t had_hole = FALSE;
12431 memory_object_t pager;
12432
12433 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12434 (sync_flags & VM_SYNC_SYNCHRONOUS))
12435 return(KERN_INVALID_ARGUMENT);
12436
12437 /*
12438 * align address and size on page boundaries
12439 */
12440 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12441 address = vm_map_trunc_page(address);
12442
12443 if (map == VM_MAP_NULL)
12444 return(KERN_INVALID_TASK);
12445
12446 if (size == 0)
12447 return(KERN_SUCCESS);
12448
12449 queue_init(&req_q);
12450 amount_left = size;
12451
12452 while (amount_left > 0) {
12453 vm_object_size_t flush_size;
12454 vm_object_t object;
12455
12456 vm_map_lock(map);
12457 if (!vm_map_lookup_entry(map,
12458 vm_map_trunc_page(address), &entry)) {
12459
12460 vm_map_size_t skip;
12461
12462 /*
12463 * hole in the address map.
12464 */
12465 had_hole = TRUE;
12466
12467 /*
12468 * Check for empty map.
12469 */
12470 if (entry == vm_map_to_entry(map) &&
12471 entry->vme_next == entry) {
12472 vm_map_unlock(map);
12473 break;
12474 }
12475 /*
12476 * Check that we don't wrap and that
12477 * we have at least one real map entry.
12478 */
12479 if ((map->hdr.nentries == 0) ||
12480 (entry->vme_next->vme_start < address)) {
12481 vm_map_unlock(map);
12482 break;
12483 }
12484 /*
12485 * Move up to the next entry if needed
12486 */
12487 skip = (entry->vme_next->vme_start - address);
12488 if (skip >= amount_left)
12489 amount_left = 0;
12490 else
12491 amount_left -= skip;
12492 address = entry->vme_next->vme_start;
12493 vm_map_unlock(map);
12494 continue;
12495 }
12496
12497 offset = address - entry->vme_start;
12498
12499 /*
12500 * do we have more to flush than is contained in this
12501 * entry ?
12502 */
12503 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12504 flush_size = entry->vme_end -
12505 (entry->vme_start + offset);
12506 } else {
12507 flush_size = amount_left;
12508 }
12509 amount_left -= flush_size;
12510 address += flush_size;
12511
12512 if (entry->is_sub_map == TRUE) {
12513 vm_map_t local_map;
12514 vm_map_offset_t local_offset;
12515
12516 local_map = entry->object.sub_map;
12517 local_offset = entry->offset;
12518 vm_map_unlock(map);
12519 if (vm_map_msync(
12520 local_map,
12521 local_offset,
12522 flush_size,
12523 sync_flags) == KERN_INVALID_ADDRESS) {
12524 had_hole = TRUE;
12525 }
12526 continue;
12527 }
12528 object = entry->object.vm_object;
12529
12530 /*
12531 * We can't sync this object if the object has not been
12532 * created yet
12533 */
12534 if (object == VM_OBJECT_NULL) {
12535 vm_map_unlock(map);
12536 continue;
12537 }
12538 offset += entry->offset;
12539
12540 vm_object_lock(object);
12541
12542 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12543 int kill_pages = 0;
12544 boolean_t reusable_pages = FALSE;
12545
12546 if (sync_flags & VM_SYNC_KILLPAGES) {
12547 if (object->ref_count == 1 && !object->shadow)
12548 kill_pages = 1;
12549 else
12550 kill_pages = -1;
12551 }
12552 if (kill_pages != -1)
12553 vm_object_deactivate_pages(object, offset,
12554 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12555 vm_object_unlock(object);
12556 vm_map_unlock(map);
12557 continue;
12558 }
12559 /*
12560 * We can't sync this object if there isn't a pager.
12561 * Don't bother to sync internal objects, since there can't
12562 * be any "permanent" storage for these objects anyway.
12563 */
12564 if ((object->pager == MEMORY_OBJECT_NULL) ||
12565 (object->internal) || (object->private)) {
12566 vm_object_unlock(object);
12567 vm_map_unlock(map);
12568 continue;
12569 }
12570 /*
12571 * keep reference on the object until syncing is done
12572 */
12573 vm_object_reference_locked(object);
12574 vm_object_unlock(object);
12575
12576 vm_map_unlock(map);
12577
12578 do_sync_req = vm_object_sync(object,
12579 offset,
12580 flush_size,
12581 sync_flags & VM_SYNC_INVALIDATE,
12582 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12583 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12584 sync_flags & VM_SYNC_SYNCHRONOUS);
12585 /*
12586 * only send a m_o_s if we returned pages or if the entry
12587 * is writable (ie dirty pages may have already been sent back)
12588 */
12589 if (!do_sync_req) {
12590 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12591 /*
12592 * clear out the clustering and read-ahead hints
12593 */
12594 vm_object_lock(object);
12595
12596 object->pages_created = 0;
12597 object->pages_used = 0;
12598 object->sequential = 0;
12599 object->last_alloc = 0;
12600
12601 vm_object_unlock(object);
12602 }
12603 vm_object_deallocate(object);
12604 continue;
12605 }
12606 msync_req_alloc(new_msr);
12607
12608 vm_object_lock(object);
12609 offset += object->paging_offset;
12610
12611 new_msr->offset = offset;
12612 new_msr->length = flush_size;
12613 new_msr->object = object;
12614 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12615 re_iterate:
12616
12617 /*
12618 * We can't sync this object if there isn't a pager. The
12619 * pager can disappear anytime we're not holding the object
12620 * lock. So this has to be checked anytime we goto re_iterate.
12621 */
12622
12623 pager = object->pager;
12624
12625 if (pager == MEMORY_OBJECT_NULL) {
12626 vm_object_unlock(object);
12627 vm_object_deallocate(object);
12628 continue;
12629 }
12630
12631 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12632 /*
12633 * need to check for overlapping entry, if found, wait
12634 * on overlapping msr to be done, then reiterate
12635 */
12636 msr_lock(msr);
12637 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12638 ((offset >= msr->offset &&
12639 offset < (msr->offset + msr->length)) ||
12640 (msr->offset >= offset &&
12641 msr->offset < (offset + flush_size))))
12642 {
12643 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12644 msr_unlock(msr);
12645 vm_object_unlock(object);
12646 thread_block(THREAD_CONTINUE_NULL);
12647 vm_object_lock(object);
12648 goto re_iterate;
12649 }
12650 msr_unlock(msr);
12651 }/* queue_iterate */
12652
12653 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12654
12655 vm_object_paging_begin(object);
12656 vm_object_unlock(object);
12657
12658 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12659
12660 (void) memory_object_synchronize(
12661 pager,
12662 offset,
12663 flush_size,
12664 sync_flags & ~VM_SYNC_CONTIGUOUS);
12665
12666 vm_object_lock(object);
12667 vm_object_paging_end(object);
12668 vm_object_unlock(object);
12669 }/* while */
12670
12671 /*
12672 * wait for memory_object_sychronize_completed messages from pager(s)
12673 */
12674
12675 while (!queue_empty(&req_q)) {
12676 msr = (msync_req_t)queue_first(&req_q);
12677 msr_lock(msr);
12678 while(msr->flag != VM_MSYNC_DONE) {
12679 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12680 msr_unlock(msr);
12681 thread_block(THREAD_CONTINUE_NULL);
12682 msr_lock(msr);
12683 }/* while */
12684 queue_remove(&req_q, msr, msync_req_t, req_q);
12685 msr_unlock(msr);
12686 vm_object_deallocate(msr->object);
12687 msync_req_free(msr);
12688 }/* queue_iterate */
12689
12690 /* for proper msync() behaviour */
12691 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12692 return(KERN_INVALID_ADDRESS);
12693
12694 return(KERN_SUCCESS);
12695 }/* vm_msync */
12696
12697 /*
12698 * Routine: convert_port_entry_to_map
12699 * Purpose:
12700 * Convert from a port specifying an entry or a task
12701 * to a map. Doesn't consume the port ref; produces a map ref,
12702 * which may be null. Unlike convert_port_to_map, the
12703 * port may be task or a named entry backed.
12704 * Conditions:
12705 * Nothing locked.
12706 */
12707
12708
12709 vm_map_t
12710 convert_port_entry_to_map(
12711 ipc_port_t port)
12712 {
12713 vm_map_t map;
12714 vm_named_entry_t named_entry;
12715 uint32_t try_failed_count = 0;
12716
12717 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12718 while(TRUE) {
12719 ip_lock(port);
12720 if(ip_active(port) && (ip_kotype(port)
12721 == IKOT_NAMED_ENTRY)) {
12722 named_entry =
12723 (vm_named_entry_t)port->ip_kobject;
12724 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12725 ip_unlock(port);
12726
12727 try_failed_count++;
12728 mutex_pause(try_failed_count);
12729 continue;
12730 }
12731 named_entry->ref_count++;
12732 lck_mtx_unlock(&(named_entry)->Lock);
12733 ip_unlock(port);
12734 if ((named_entry->is_sub_map) &&
12735 (named_entry->protection
12736 & VM_PROT_WRITE)) {
12737 map = named_entry->backing.map;
12738 } else {
12739 mach_destroy_memory_entry(port);
12740 return VM_MAP_NULL;
12741 }
12742 vm_map_reference_swap(map);
12743 mach_destroy_memory_entry(port);
12744 break;
12745 }
12746 else
12747 return VM_MAP_NULL;
12748 }
12749 }
12750 else
12751 map = convert_port_to_map(port);
12752
12753 return map;
12754 }
12755
12756 /*
12757 * Routine: convert_port_entry_to_object
12758 * Purpose:
12759 * Convert from a port specifying a named entry to an
12760 * object. Doesn't consume the port ref; produces a map ref,
12761 * which may be null.
12762 * Conditions:
12763 * Nothing locked.
12764 */
12765
12766
12767 vm_object_t
12768 convert_port_entry_to_object(
12769 ipc_port_t port)
12770 {
12771 vm_object_t object;
12772 vm_named_entry_t named_entry;
12773 uint32_t try_failed_count = 0;
12774
12775 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12776 while(TRUE) {
12777 ip_lock(port);
12778 if(ip_active(port) && (ip_kotype(port)
12779 == IKOT_NAMED_ENTRY)) {
12780 named_entry =
12781 (vm_named_entry_t)port->ip_kobject;
12782 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12783 ip_unlock(port);
12784
12785 try_failed_count++;
12786 mutex_pause(try_failed_count);
12787 continue;
12788 }
12789 named_entry->ref_count++;
12790 lck_mtx_unlock(&(named_entry)->Lock);
12791 ip_unlock(port);
12792 if ((!named_entry->is_sub_map) &&
12793 (!named_entry->is_pager) &&
12794 (named_entry->protection
12795 & VM_PROT_WRITE)) {
12796 object = named_entry->backing.object;
12797 } else {
12798 mach_destroy_memory_entry(port);
12799 return (vm_object_t)NULL;
12800 }
12801 vm_object_reference(named_entry->backing.object);
12802 mach_destroy_memory_entry(port);
12803 break;
12804 }
12805 else
12806 return (vm_object_t)NULL;
12807 }
12808 } else {
12809 return (vm_object_t)NULL;
12810 }
12811
12812 return object;
12813 }
12814
12815 /*
12816 * Export routines to other components for the things we access locally through
12817 * macros.
12818 */
12819 #undef current_map
12820 vm_map_t
12821 current_map(void)
12822 {
12823 return (current_map_fast());
12824 }
12825
12826 /*
12827 * vm_map_reference:
12828 *
12829 * Most code internal to the osfmk will go through a
12830 * macro defining this. This is always here for the
12831 * use of other kernel components.
12832 */
12833 #undef vm_map_reference
12834 void
12835 vm_map_reference(
12836 register vm_map_t map)
12837 {
12838 if (map == VM_MAP_NULL)
12839 return;
12840
12841 lck_mtx_lock(&map->s_lock);
12842 #if TASK_SWAPPER
12843 assert(map->res_count > 0);
12844 assert(map->ref_count >= map->res_count);
12845 map->res_count++;
12846 #endif
12847 map->ref_count++;
12848 lck_mtx_unlock(&map->s_lock);
12849 }
12850
12851 /*
12852 * vm_map_deallocate:
12853 *
12854 * Removes a reference from the specified map,
12855 * destroying it if no references remain.
12856 * The map should not be locked.
12857 */
12858 void
12859 vm_map_deallocate(
12860 register vm_map_t map)
12861 {
12862 unsigned int ref;
12863
12864 if (map == VM_MAP_NULL)
12865 return;
12866
12867 lck_mtx_lock(&map->s_lock);
12868 ref = --map->ref_count;
12869 if (ref > 0) {
12870 vm_map_res_deallocate(map);
12871 lck_mtx_unlock(&map->s_lock);
12872 return;
12873 }
12874 assert(map->ref_count == 0);
12875 lck_mtx_unlock(&map->s_lock);
12876
12877 #if TASK_SWAPPER
12878 /*
12879 * The map residence count isn't decremented here because
12880 * the vm_map_delete below will traverse the entire map,
12881 * deleting entries, and the residence counts on objects
12882 * and sharing maps will go away then.
12883 */
12884 #endif
12885
12886 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12887 }
12888
12889
12890 void
12891 vm_map_disable_NX(vm_map_t map)
12892 {
12893 if (map == NULL)
12894 return;
12895 if (map->pmap == NULL)
12896 return;
12897
12898 pmap_disable_NX(map->pmap);
12899 }
12900
12901 void
12902 vm_map_disallow_data_exec(vm_map_t map)
12903 {
12904 if (map == NULL)
12905 return;
12906
12907 map->map_disallow_data_exec = TRUE;
12908 }
12909
12910 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12911 * more descriptive.
12912 */
12913 void
12914 vm_map_set_32bit(vm_map_t map)
12915 {
12916 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12917 }
12918
12919
12920 void
12921 vm_map_set_64bit(vm_map_t map)
12922 {
12923 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12924 }
12925
12926 vm_map_offset_t
12927 vm_compute_max_offset(unsigned is64)
12928 {
12929 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12930 }
12931
12932 boolean_t
12933 vm_map_is_64bit(
12934 vm_map_t map)
12935 {
12936 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12937 }
12938
12939 boolean_t
12940 vm_map_has_4GB_pagezero(
12941 vm_map_t map)
12942 {
12943 /*
12944 * XXX FBDP
12945 * We should lock the VM map (for read) here but we can get away
12946 * with it for now because there can't really be any race condition:
12947 * the VM map's min_offset is changed only when the VM map is created
12948 * and when the zero page is established (when the binary gets loaded),
12949 * and this routine gets called only when the task terminates and the
12950 * VM map is being torn down, and when a new map is created via
12951 * load_machfile()/execve().
12952 */
12953 return (map->min_offset >= 0x100000000ULL);
12954 }
12955
12956 void
12957 vm_map_set_4GB_pagezero(vm_map_t map)
12958 {
12959 #if defined(__i386__)
12960 pmap_set_4GB_pagezero(map->pmap);
12961 #else
12962 #pragma unused(map)
12963 #endif
12964
12965 }
12966
12967 void
12968 vm_map_clear_4GB_pagezero(vm_map_t map)
12969 {
12970 #if defined(__i386__)
12971 pmap_clear_4GB_pagezero(map->pmap);
12972 #else
12973 #pragma unused(map)
12974 #endif
12975 }
12976
12977 /*
12978 * Raise a VM map's minimum offset.
12979 * To strictly enforce "page zero" reservation.
12980 */
12981 kern_return_t
12982 vm_map_raise_min_offset(
12983 vm_map_t map,
12984 vm_map_offset_t new_min_offset)
12985 {
12986 vm_map_entry_t first_entry;
12987
12988 new_min_offset = vm_map_round_page(new_min_offset);
12989
12990 vm_map_lock(map);
12991
12992 if (new_min_offset < map->min_offset) {
12993 /*
12994 * Can't move min_offset backwards, as that would expose
12995 * a part of the address space that was previously, and for
12996 * possibly good reasons, inaccessible.
12997 */
12998 vm_map_unlock(map);
12999 return KERN_INVALID_ADDRESS;
13000 }
13001
13002 first_entry = vm_map_first_entry(map);
13003 if (first_entry != vm_map_to_entry(map) &&
13004 first_entry->vme_start < new_min_offset) {
13005 /*
13006 * Some memory was already allocated below the new
13007 * minimun offset. It's too late to change it now...
13008 */
13009 vm_map_unlock(map);
13010 return KERN_NO_SPACE;
13011 }
13012
13013 map->min_offset = new_min_offset;
13014
13015 vm_map_unlock(map);
13016
13017 return KERN_SUCCESS;
13018 }
13019
13020 /*
13021 * Set the limit on the maximum amount of user wired memory allowed for this map.
13022 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13023 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
13024 * don't have to reach over to the BSD data structures.
13025 */
13026
13027 void
13028 vm_map_set_user_wire_limit(vm_map_t map,
13029 vm_size_t limit)
13030 {
13031 map->user_wire_limit = limit;
13032 }
13033
13034
13035 void vm_map_switch_protect(vm_map_t map,
13036 boolean_t val)
13037 {
13038 vm_map_lock(map);
13039 map->switch_protect=val;
13040 vm_map_unlock(map);
13041 }
13042
13043 /* Add (generate) code signature for memory range */
13044 #if CONFIG_DYNAMIC_CODE_SIGNING
13045 kern_return_t vm_map_sign(vm_map_t map,
13046 vm_map_offset_t start,
13047 vm_map_offset_t end)
13048 {
13049 vm_map_entry_t entry;
13050 vm_page_t m;
13051 vm_object_t object;
13052
13053 /*
13054 * Vet all the input parameters and current type and state of the
13055 * underlaying object. Return with an error if anything is amiss.
13056 */
13057 if (map == VM_MAP_NULL)
13058 return(KERN_INVALID_ARGUMENT);
13059
13060 vm_map_lock_read(map);
13061
13062 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13063 /*
13064 * Must pass a valid non-submap address.
13065 */
13066 vm_map_unlock_read(map);
13067 return(KERN_INVALID_ADDRESS);
13068 }
13069
13070 if((entry->vme_start > start) || (entry->vme_end < end)) {
13071 /*
13072 * Map entry doesn't cover the requested range. Not handling
13073 * this situation currently.
13074 */
13075 vm_map_unlock_read(map);
13076 return(KERN_INVALID_ARGUMENT);
13077 }
13078
13079 object = entry->object.vm_object;
13080 if (object == VM_OBJECT_NULL) {
13081 /*
13082 * Object must already be present or we can't sign.
13083 */
13084 vm_map_unlock_read(map);
13085 return KERN_INVALID_ARGUMENT;
13086 }
13087
13088 vm_object_lock(object);
13089 vm_map_unlock_read(map);
13090
13091 while(start < end) {
13092 uint32_t refmod;
13093
13094 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13095 if (m==VM_PAGE_NULL) {
13096 /* shoud we try to fault a page here? we can probably
13097 * demand it exists and is locked for this request */
13098 vm_object_unlock(object);
13099 return KERN_FAILURE;
13100 }
13101 /* deal with special page status */
13102 if (m->busy ||
13103 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13104 vm_object_unlock(object);
13105 return KERN_FAILURE;
13106 }
13107
13108 /* Page is OK... now "validate" it */
13109 /* This is the place where we'll call out to create a code
13110 * directory, later */
13111 m->cs_validated = TRUE;
13112
13113 /* The page is now "clean" for codesigning purposes. That means
13114 * we don't consider it as modified (wpmapped) anymore. But
13115 * we'll disconnect the page so we note any future modification
13116 * attempts. */
13117 m->wpmapped = FALSE;
13118 refmod = pmap_disconnect(m->phys_page);
13119
13120 /* Pull the dirty status from the pmap, since we cleared the
13121 * wpmapped bit */
13122 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13123 m->dirty = TRUE;
13124 }
13125
13126 /* On to the next page */
13127 start += PAGE_SIZE;
13128 }
13129 vm_object_unlock(object);
13130
13131 return KERN_SUCCESS;
13132 }
13133 #endif
13134
13135 #if CONFIG_FREEZE
13136
13137 kern_return_t vm_map_freeze_walk(
13138 vm_map_t map,
13139 unsigned int *purgeable_count,
13140 unsigned int *wired_count,
13141 unsigned int *clean_count,
13142 unsigned int *dirty_count,
13143 boolean_t *has_shared)
13144 {
13145 vm_map_entry_t entry;
13146
13147 vm_map_lock_read(map);
13148
13149 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13150 *has_shared = FALSE;
13151
13152 for (entry = vm_map_first_entry(map);
13153 entry != vm_map_to_entry(map);
13154 entry = entry->vme_next) {
13155 unsigned int purgeable, clean, dirty, wired;
13156 boolean_t shared;
13157
13158 if ((entry->object.vm_object == 0) ||
13159 (entry->is_sub_map) ||
13160 (entry->object.vm_object->phys_contiguous)) {
13161 continue;
13162 }
13163
13164 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13165
13166 *purgeable_count += purgeable;
13167 *wired_count += wired;
13168 *clean_count += clean;
13169 *dirty_count += dirty;
13170
13171 if (shared) {
13172 *has_shared = TRUE;
13173 }
13174 }
13175
13176 vm_map_unlock_read(map);
13177
13178 return KERN_SUCCESS;
13179 }
13180
13181 kern_return_t vm_map_freeze(
13182 vm_map_t map,
13183 unsigned int *purgeable_count,
13184 unsigned int *wired_count,
13185 unsigned int *clean_count,
13186 unsigned int *dirty_count,
13187 boolean_t *has_shared)
13188 {
13189 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13190 vm_object_t compact_object = VM_OBJECT_NULL;
13191 vm_object_offset_t offset = 0x0;
13192 kern_return_t kr = KERN_SUCCESS;
13193 void *default_freezer_toc = NULL;
13194 boolean_t cleanup = FALSE;
13195
13196 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13197 *has_shared = FALSE;
13198
13199 /* Create our compact object */
13200 compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13201 if (!compact_object) {
13202 kr = KERN_FAILURE;
13203 goto done;
13204 }
13205
13206 default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13207 if (!default_freezer_toc) {
13208 kr = KERN_FAILURE;
13209 goto done;
13210 }
13211
13212 /*
13213 * We need the exclusive lock here so that we can
13214 * block any page faults or lookups while we are
13215 * in the middle of freezing this vm map.
13216 */
13217 vm_map_lock(map);
13218
13219 if (map->default_freezer_toc != NULL){
13220 /*
13221 * This map has already been frozen.
13222 */
13223 cleanup = TRUE;
13224 kr = KERN_SUCCESS;
13225 goto done;
13226 }
13227
13228 /* Get a mapping in place for the freezing about to commence */
13229 map->default_freezer_toc = default_freezer_toc;
13230
13231 vm_object_lock(compact_object);
13232
13233 for (entry2 = vm_map_first_entry(map);
13234 entry2 != vm_map_to_entry(map);
13235 entry2 = entry2->vme_next) {
13236
13237 vm_object_t src_object = entry2->object.vm_object;
13238
13239 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13240 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13241 unsigned int purgeable, clean, dirty, wired;
13242 boolean_t shared;
13243
13244 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13245 src_object, compact_object, &default_freezer_toc, &offset);
13246
13247 *purgeable_count += purgeable;
13248 *wired_count += wired;
13249 *clean_count += clean;
13250 *dirty_count += dirty;
13251
13252 if (shared) {
13253 *has_shared = TRUE;
13254 }
13255 }
13256 }
13257
13258 vm_object_unlock(compact_object);
13259
13260 /* Finally, throw out the pages to swap */
13261 vm_object_pageout(compact_object);
13262
13263 done:
13264 vm_map_unlock(map);
13265
13266 /* Unwind if there was a failure */
13267 if ((cleanup) || (KERN_SUCCESS != kr)) {
13268 if (default_freezer_toc){
13269 default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13270 }
13271 if (compact_object){
13272 vm_object_deallocate(compact_object);
13273 }
13274 }
13275
13276 return kr;
13277 }
13278
13279 __private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** );
13280
13281 void
13282 vm_map_thaw(
13283 vm_map_t map)
13284 {
13285 void **default_freezer_toc;
13286 vm_object_t compact_object;
13287
13288 vm_map_lock(map);
13289
13290 if (map->default_freezer_toc == NULL){
13291 /*
13292 * This map is not in a frozen state.
13293 */
13294 goto out;
13295 }
13296
13297 default_freezer_toc = &(map->default_freezer_toc);
13298
13299 compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13300
13301 /* Bring the pages back in */
13302 vm_object_pagein(compact_object);
13303
13304 /* Shift pages back to their original objects */
13305 vm_object_unpack(compact_object, default_freezer_toc);
13306
13307 vm_object_deallocate(compact_object);
13308
13309 map->default_freezer_toc = NULL;
13310
13311 out:
13312 vm_map_unlock(map);
13313 }
13314 #endif
13315
13316 #if !CONFIG_EMBEDDED
13317 /*
13318 * vm_map_entry_should_cow_for_true_share:
13319 *
13320 * Determines if the map entry should be clipped and setup for copy-on-write
13321 * to avoid applying "true_share" to a large VM object when only a subset is
13322 * targeted.
13323 *
13324 * For now, we target only the map entries created for the Objective C
13325 * Garbage Collector, which initially have the following properties:
13326 * - alias == VM_MEMORY_MALLOC
13327 * - wired_count == 0
13328 * - !needs_copy
13329 * and a VM object with:
13330 * - internal
13331 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13332 * - !true_share
13333 * - vo_size == ANON_CHUNK_SIZE
13334 */
13335 boolean_t
13336 vm_map_entry_should_cow_for_true_share(
13337 vm_map_entry_t entry)
13338 {
13339 vm_object_t object;
13340
13341 if (entry->is_sub_map) {
13342 /* entry does not point at a VM object */
13343 return FALSE;
13344 }
13345
13346 if (entry->needs_copy) {
13347 /* already set for copy_on_write: done! */
13348 return FALSE;
13349 }
13350
13351 if (entry->alias != VM_MEMORY_MALLOC) {
13352 /* not tagged as an ObjectiveC's Garbage Collector entry */
13353 return FALSE;
13354 }
13355
13356 if (entry->wired_count) {
13357 /* wired: can't change the map entry... */
13358 return FALSE;
13359 }
13360
13361 object = entry->object.vm_object;
13362
13363 if (object == VM_OBJECT_NULL) {
13364 /* no object yet... */
13365 return FALSE;
13366 }
13367
13368 if (!object->internal) {
13369 /* not an internal object */
13370 return FALSE;
13371 }
13372
13373 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13374 /* not the default copy strategy */
13375 return FALSE;
13376 }
13377
13378 if (object->true_share) {
13379 /* already true_share: too late to avoid it */
13380 return FALSE;
13381 }
13382
13383 if (object->vo_size != ANON_CHUNK_SIZE) {
13384 /* not an object created for the ObjC Garbage Collector */
13385 return FALSE;
13386 }
13387
13388 /*
13389 * All the criteria match: we have a large object being targeted for "true_share".
13390 * To limit the adverse side-effects linked with "true_share", tell the caller to
13391 * try and avoid setting up the entire object for "true_share" by clipping the
13392 * targeted range and setting it up for copy-on-write.
13393 */
13394 return TRUE;
13395 }
13396 #endif /* !CONFIG_EMBEDDED */