]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1699.24.8.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
104
105 #include <vm/vm_protos.h>
106 #include <vm/vm_shared_region.h>
107 #include <vm/vm_map_store.h>
108
109 /* Internal prototypes
110 */
111
112 static void vm_map_simplify_range(
113 vm_map_t map,
114 vm_map_offset_t start,
115 vm_map_offset_t end); /* forward */
116
117 static boolean_t vm_map_range_check(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end,
121 vm_map_entry_t *entry);
122
123 static vm_map_entry_t _vm_map_entry_create(
124 struct vm_map_header *map_header);
125
126 static void _vm_map_entry_dispose(
127 struct vm_map_header *map_header,
128 vm_map_entry_t entry);
129
130 static void vm_map_pmap_enter(
131 vm_map_t map,
132 vm_map_offset_t addr,
133 vm_map_offset_t end_addr,
134 vm_object_t object,
135 vm_object_offset_t offset,
136 vm_prot_t protection);
137
138 static void _vm_map_clip_end(
139 struct vm_map_header *map_header,
140 vm_map_entry_t entry,
141 vm_map_offset_t end);
142
143 static void _vm_map_clip_start(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t start);
147
148 static void vm_map_entry_delete(
149 vm_map_t map,
150 vm_map_entry_t entry);
151
152 static kern_return_t vm_map_delete(
153 vm_map_t map,
154 vm_map_offset_t start,
155 vm_map_offset_t end,
156 int flags,
157 vm_map_t zap_map);
158
159 static kern_return_t vm_map_copy_overwrite_unaligned(
160 vm_map_t dst_map,
161 vm_map_entry_t entry,
162 vm_map_copy_t copy,
163 vm_map_address_t start);
164
165 static kern_return_t vm_map_copy_overwrite_aligned(
166 vm_map_t dst_map,
167 vm_map_entry_t tmp_entry,
168 vm_map_copy_t copy,
169 vm_map_offset_t start,
170 pmap_t pmap);
171
172 static kern_return_t vm_map_copyin_kernel_buffer(
173 vm_map_t src_map,
174 vm_map_address_t src_addr,
175 vm_map_size_t len,
176 boolean_t src_destroy,
177 vm_map_copy_t *copy_result); /* OUT */
178
179 static kern_return_t vm_map_copyout_kernel_buffer(
180 vm_map_t map,
181 vm_map_address_t *addr, /* IN/OUT */
182 vm_map_copy_t copy,
183 boolean_t overwrite);
184
185 static void vm_map_fork_share(
186 vm_map_t old_map,
187 vm_map_entry_t old_entry,
188 vm_map_t new_map);
189
190 static boolean_t vm_map_fork_copy(
191 vm_map_t old_map,
192 vm_map_entry_t *old_entry_p,
193 vm_map_t new_map);
194
195 void vm_map_region_top_walk(
196 vm_map_entry_t entry,
197 vm_region_top_info_t top);
198
199 void vm_map_region_walk(
200 vm_map_t map,
201 vm_map_offset_t va,
202 vm_map_entry_t entry,
203 vm_object_offset_t offset,
204 vm_object_size_t range,
205 vm_region_extended_info_t extended,
206 boolean_t look_for_pages);
207
208 static kern_return_t vm_map_wire_nested(
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
216
217 static kern_return_t vm_map_unwire_nested(
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
224
225 static kern_return_t vm_map_overwrite_submap_recurse(
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
229
230 static kern_return_t vm_map_copy_overwrite_nested(
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
235 pmap_t pmap,
236 boolean_t discard_on_success);
237
238 static kern_return_t vm_map_remap_extract(
239 vm_map_t map,
240 vm_map_offset_t addr,
241 vm_map_size_t size,
242 boolean_t copy,
243 struct vm_map_header *map_header,
244 vm_prot_t *cur_protection,
245 vm_prot_t *max_protection,
246 vm_inherit_t inheritance,
247 boolean_t pageable);
248
249 static kern_return_t vm_map_remap_range_allocate(
250 vm_map_t map,
251 vm_map_address_t *address,
252 vm_map_size_t size,
253 vm_map_offset_t mask,
254 int flags,
255 vm_map_entry_t *map_entry);
256
257 static void vm_map_region_look_for_page(
258 vm_map_t map,
259 vm_map_offset_t va,
260 vm_object_t object,
261 vm_object_offset_t offset,
262 int max_refcnt,
263 int depth,
264 vm_region_extended_info_t extended);
265
266 static int vm_map_region_count_obj_refs(
267 vm_map_entry_t entry,
268 vm_object_t object);
269
270
271 static kern_return_t vm_map_willneed(
272 vm_map_t map,
273 vm_map_offset_t start,
274 vm_map_offset_t end);
275
276 static kern_return_t vm_map_reuse_pages(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281 static kern_return_t vm_map_reusable_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_can_reuse(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291 #if CONFIG_FREEZE
292 struct default_freezer_table;
293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
294 __private_extern__ void default_freezer_mapping_free(void**, boolean_t all);
295 #endif
296
297 /*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
306 #define vm_map_entry_copy(NEW,OLD) \
307 MACRO_BEGIN \
308 *(NEW) = *(OLD); \
309 (NEW)->is_shared = FALSE; \
310 (NEW)->needs_wakeup = FALSE; \
311 (NEW)->in_transition = FALSE; \
312 (NEW)->wired_count = 0; \
313 (NEW)->user_wired_count = 0; \
314 (NEW)->permanent = FALSE; \
315 MACRO_END
316
317 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
318
319 /*
320 * Decide if we want to allow processes to execute from their data or stack areas.
321 * override_nx() returns true if we do. Data/stack execution can be enabled independently
322 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
323 * or allow_stack_exec to enable data execution for that type of data area for that particular
324 * ABI (or both by or'ing the flags together). These are initialized in the architecture
325 * specific pmap files since the default behavior varies according to architecture. The
326 * main reason it varies is because of the need to provide binary compatibility with old
327 * applications that were written before these restrictions came into being. In the old
328 * days, an app could execute anything it could read, but this has slowly been tightened
329 * up over time. The default behavior is:
330 *
331 * 32-bit PPC apps may execute from both stack and data areas
332 * 32-bit Intel apps may exeucte from data areas but not stack
333 * 64-bit PPC/Intel apps may not execute from either data or stack
334 *
335 * An application on any architecture may override these defaults by explicitly
336 * adding PROT_EXEC permission to the page in question with the mprotect(2)
337 * system call. This code here just determines what happens when an app tries to
338 * execute from a page that lacks execute permission.
339 *
340 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
341 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
342 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
343 * execution from data areas for a particular binary even if the arch normally permits it. As
344 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
345 * to support some complicated use cases, notably browsers with out-of-process plugins that
346 * are not all NX-safe.
347 */
348
349 extern int allow_data_exec, allow_stack_exec;
350
351 int
352 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
353 {
354 int current_abi;
355
356 /*
357 * Determine if the app is running in 32 or 64 bit mode.
358 */
359
360 if (vm_map_is_64bit(map))
361 current_abi = VM_ABI_64;
362 else
363 current_abi = VM_ABI_32;
364
365 /*
366 * Determine if we should allow the execution based on whether it's a
367 * stack or data area and the current architecture.
368 */
369
370 if (user_tag == VM_MEMORY_STACK)
371 return allow_stack_exec & current_abi;
372
373 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
374 }
375
376
377 /*
378 * Virtual memory maps provide for the mapping, protection,
379 * and sharing of virtual memory objects. In addition,
380 * this module provides for an efficient virtual copy of
381 * memory from one map to another.
382 *
383 * Synchronization is required prior to most operations.
384 *
385 * Maps consist of an ordered doubly-linked list of simple
386 * entries; a single hint is used to speed up lookups.
387 *
388 * Sharing maps have been deleted from this version of Mach.
389 * All shared objects are now mapped directly into the respective
390 * maps. This requires a change in the copy on write strategy;
391 * the asymmetric (delayed) strategy is used for shared temporary
392 * objects instead of the symmetric (shadow) strategy. All maps
393 * are now "top level" maps (either task map, kernel map or submap
394 * of the kernel map).
395 *
396 * Since portions of maps are specified by start/end addreses,
397 * which may not align with existing map entries, all
398 * routines merely "clip" entries to these start/end values.
399 * [That is, an entry is split into two, bordering at a
400 * start or end value.] Note that these clippings may not
401 * always be necessary (as the two resulting entries are then
402 * not changed); however, the clipping is done for convenience.
403 * No attempt is currently made to "glue back together" two
404 * abutting entries.
405 *
406 * The symmetric (shadow) copy strategy implements virtual copy
407 * by copying VM object references from one map to
408 * another, and then marking both regions as copy-on-write.
409 * It is important to note that only one writeable reference
410 * to a VM object region exists in any map when this strategy
411 * is used -- this means that shadow object creation can be
412 * delayed until a write operation occurs. The symmetric (delayed)
413 * strategy allows multiple maps to have writeable references to
414 * the same region of a vm object, and hence cannot delay creating
415 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
416 * Copying of permanent objects is completely different; see
417 * vm_object_copy_strategically() in vm_object.c.
418 */
419
420 static zone_t vm_map_zone; /* zone for vm_map structures */
421 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
422 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
423 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
424
425
426 /*
427 * Placeholder object for submap operations. This object is dropped
428 * into the range by a call to vm_map_find, and removed when
429 * vm_map_submap creates the submap.
430 */
431
432 vm_object_t vm_submap_object;
433
434 static void *map_data;
435 static vm_size_t map_data_size;
436 static void *kentry_data;
437 static vm_size_t kentry_data_size;
438 static int kentry_count = 2048; /* to init kentry_data_size */
439
440 #if CONFIG_EMBEDDED
441 #define NO_COALESCE_LIMIT 0
442 #else
443 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
444 #endif
445
446 /* Skip acquiring locks if we're in the midst of a kernel core dump */
447 unsigned int not_in_kdp = 1;
448
449 unsigned int vm_map_set_cache_attr_count = 0;
450
451 kern_return_t
452 vm_map_set_cache_attr(
453 vm_map_t map,
454 vm_map_offset_t va)
455 {
456 vm_map_entry_t map_entry;
457 vm_object_t object;
458 kern_return_t kr = KERN_SUCCESS;
459
460 vm_map_lock_read(map);
461
462 if (!vm_map_lookup_entry(map, va, &map_entry) ||
463 map_entry->is_sub_map) {
464 /*
465 * that memory is not properly mapped
466 */
467 kr = KERN_INVALID_ARGUMENT;
468 goto done;
469 }
470 object = map_entry->object.vm_object;
471
472 if (object == VM_OBJECT_NULL) {
473 /*
474 * there should be a VM object here at this point
475 */
476 kr = KERN_INVALID_ARGUMENT;
477 goto done;
478 }
479 vm_object_lock(object);
480 object->set_cache_attr = TRUE;
481 vm_object_unlock(object);
482
483 vm_map_set_cache_attr_count++;
484 done:
485 vm_map_unlock_read(map);
486
487 return kr;
488 }
489
490
491 #if CONFIG_CODE_DECRYPTION
492 /*
493 * vm_map_apple_protected:
494 * This remaps the requested part of the object with an object backed by
495 * the decrypting pager.
496 * crypt_info contains entry points and session data for the crypt module.
497 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
498 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
499 */
500 kern_return_t
501 vm_map_apple_protected(
502 vm_map_t map,
503 vm_map_offset_t start,
504 vm_map_offset_t end,
505 struct pager_crypt_info *crypt_info)
506 {
507 boolean_t map_locked;
508 kern_return_t kr;
509 vm_map_entry_t map_entry;
510 memory_object_t protected_mem_obj;
511 vm_object_t protected_object;
512 vm_map_offset_t map_addr;
513
514 vm_map_lock_read(map);
515 map_locked = TRUE;
516
517 /* lookup the protected VM object */
518 if (!vm_map_lookup_entry(map,
519 start,
520 &map_entry) ||
521 map_entry->vme_end < end ||
522 map_entry->is_sub_map) {
523 /* that memory is not properly mapped */
524 kr = KERN_INVALID_ARGUMENT;
525 goto done;
526 }
527 protected_object = map_entry->object.vm_object;
528 if (protected_object == VM_OBJECT_NULL) {
529 /* there should be a VM object here at this point */
530 kr = KERN_INVALID_ARGUMENT;
531 goto done;
532 }
533
534 /* make sure protected object stays alive while map is unlocked */
535 vm_object_reference(protected_object);
536
537 vm_map_unlock_read(map);
538 map_locked = FALSE;
539
540 /*
541 * Lookup (and create if necessary) the protected memory object
542 * matching that VM object.
543 * If successful, this also grabs a reference on the memory object,
544 * to guarantee that it doesn't go away before we get a chance to map
545 * it.
546 */
547 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
548
549 /* release extra ref on protected object */
550 vm_object_deallocate(protected_object);
551
552 if (protected_mem_obj == NULL) {
553 kr = KERN_FAILURE;
554 goto done;
555 }
556
557 /* map this memory object in place of the current one */
558 map_addr = start;
559 kr = vm_map_enter_mem_object(map,
560 &map_addr,
561 end - start,
562 (mach_vm_offset_t) 0,
563 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
564 (ipc_port_t) protected_mem_obj,
565 (map_entry->offset +
566 (start - map_entry->vme_start)),
567 TRUE,
568 map_entry->protection,
569 map_entry->max_protection,
570 map_entry->inheritance);
571 assert(map_addr == start);
572 /*
573 * Release the reference obtained by apple_protect_pager_setup().
574 * The mapping (if it succeeded) is now holding a reference on the
575 * memory object.
576 */
577 memory_object_deallocate(protected_mem_obj);
578
579 done:
580 if (map_locked) {
581 vm_map_unlock_read(map);
582 }
583 return kr;
584 }
585 #endif /* CONFIG_CODE_DECRYPTION */
586
587
588 lck_grp_t vm_map_lck_grp;
589 lck_grp_attr_t vm_map_lck_grp_attr;
590 lck_attr_t vm_map_lck_attr;
591
592
593 /*
594 * vm_map_init:
595 *
596 * Initialize the vm_map module. Must be called before
597 * any other vm_map routines.
598 *
599 * Map and entry structures are allocated from zones -- we must
600 * initialize those zones.
601 *
602 * There are three zones of interest:
603 *
604 * vm_map_zone: used to allocate maps.
605 * vm_map_entry_zone: used to allocate map entries.
606 * vm_map_kentry_zone: used to allocate map entries for the kernel.
607 *
608 * The kernel allocates map entries from a special zone that is initially
609 * "crammed" with memory. It would be difficult (perhaps impossible) for
610 * the kernel to allocate more memory to a entry zone when it became
611 * empty since the very act of allocating memory implies the creation
612 * of a new entry.
613 */
614 void
615 vm_map_init(
616 void)
617 {
618 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
619 PAGE_SIZE, "maps");
620 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
621
622 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
623 1024*1024, PAGE_SIZE*5,
624 "non-kernel map entries");
625 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
626
627 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
628 kentry_data_size, kentry_data_size,
629 "kernel map entries");
630 zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
631
632 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
633 16*1024, PAGE_SIZE, "map copies");
634 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
635
636 /*
637 * Cram the map and kentry zones with initial data.
638 * Set kentry_zone non-collectible to aid zone_gc().
639 */
640 zone_change(vm_map_zone, Z_COLLECT, FALSE);
641 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
642 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
643 zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
644 zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
645 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
646
647 zcram(vm_map_zone, map_data, map_data_size);
648 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
649
650 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
651 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
652 lck_attr_setdefault(&vm_map_lck_attr);
653 }
654
655 void
656 vm_map_steal_memory(
657 void)
658 {
659 map_data_size = round_page(10 * sizeof(struct _vm_map));
660 map_data = pmap_steal_memory(map_data_size);
661
662 #if 0
663 /*
664 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
665 * physical page (i.e. that beyond the kernel image and page tables)
666 * individually; we guess at most one entry per eight pages in the
667 * real world. This works out to roughly .1 of 1% of physical memory,
668 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
669 */
670 #endif
671 kentry_count = pmap_free_pages() / 8;
672
673
674 kentry_data_size =
675 round_page(kentry_count * sizeof(struct vm_map_entry));
676 kentry_data = pmap_steal_memory(kentry_data_size);
677 }
678
679 /*
680 * vm_map_create:
681 *
682 * Creates and returns a new empty VM map with
683 * the given physical map structure, and having
684 * the given lower and upper address bounds.
685 */
686 vm_map_t
687 vm_map_create(
688 pmap_t pmap,
689 vm_map_offset_t min,
690 vm_map_offset_t max,
691 boolean_t pageable)
692 {
693 static int color_seed = 0;
694 register vm_map_t result;
695
696 result = (vm_map_t) zalloc(vm_map_zone);
697 if (result == VM_MAP_NULL)
698 panic("vm_map_create");
699
700 vm_map_first_entry(result) = vm_map_to_entry(result);
701 vm_map_last_entry(result) = vm_map_to_entry(result);
702 result->hdr.nentries = 0;
703 result->hdr.entries_pageable = pageable;
704
705 vm_map_store_init( &(result->hdr) );
706
707 result->size = 0;
708 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
709 result->user_wire_size = 0;
710 result->ref_count = 1;
711 #if TASK_SWAPPER
712 result->res_count = 1;
713 result->sw_state = MAP_SW_IN;
714 #endif /* TASK_SWAPPER */
715 result->pmap = pmap;
716 result->min_offset = min;
717 result->max_offset = max;
718 result->wiring_required = FALSE;
719 result->no_zero_fill = FALSE;
720 result->mapped = FALSE;
721 result->wait_for_space = FALSE;
722 result->switch_protect = FALSE;
723 result->disable_vmentry_reuse = FALSE;
724 result->map_disallow_data_exec = FALSE;
725 result->highest_entry_end = 0;
726 result->first_free = vm_map_to_entry(result);
727 result->hint = vm_map_to_entry(result);
728 result->color_rr = (color_seed++) & vm_color_mask;
729 result->jit_entry_exists = FALSE;
730 #if CONFIG_FREEZE
731 result->default_freezer_toc = NULL;
732 #endif
733 vm_map_lock_init(result);
734 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
735
736 return(result);
737 }
738
739 /*
740 * vm_map_entry_create: [ internal use only ]
741 *
742 * Allocates a VM map entry for insertion in the
743 * given map (or map copy). No fields are filled.
744 */
745 #define vm_map_entry_create(map) \
746 _vm_map_entry_create(&(map)->hdr)
747
748 #define vm_map_copy_entry_create(copy) \
749 _vm_map_entry_create(&(copy)->cpy_hdr)
750
751 static vm_map_entry_t
752 _vm_map_entry_create(
753 register struct vm_map_header *map_header)
754 {
755 register zone_t zone;
756 register vm_map_entry_t entry;
757
758 if (map_header->entries_pageable)
759 zone = vm_map_entry_zone;
760 else
761 zone = vm_map_kentry_zone;
762
763 entry = (vm_map_entry_t) zalloc(zone);
764 if (entry == VM_MAP_ENTRY_NULL)
765 panic("vm_map_entry_create");
766 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
767
768 return(entry);
769 }
770
771 /*
772 * vm_map_entry_dispose: [ internal use only ]
773 *
774 * Inverse of vm_map_entry_create.
775 *
776 * write map lock held so no need to
777 * do anything special to insure correctness
778 * of the stores
779 */
780 #define vm_map_entry_dispose(map, entry) \
781 vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE); \
782 _vm_map_entry_dispose(&(map)->hdr, (entry))
783
784 #define vm_map_copy_entry_dispose(map, entry) \
785 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
786
787 static void
788 _vm_map_entry_dispose(
789 register struct vm_map_header *map_header,
790 register vm_map_entry_t entry)
791 {
792 register zone_t zone;
793
794 if (map_header->entries_pageable)
795 zone = vm_map_entry_zone;
796 else
797 zone = vm_map_kentry_zone;
798
799 zfree(zone, entry);
800 }
801
802 #if MACH_ASSERT
803 static boolean_t first_free_check = FALSE;
804 boolean_t
805 first_free_is_valid(
806 vm_map_t map)
807 {
808 if (!first_free_check)
809 return TRUE;
810
811 return( first_free_is_valid_store( map ));
812 }
813 #endif /* MACH_ASSERT */
814
815
816 #define vm_map_copy_entry_link(copy, after_where, entry) \
817 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
818
819 #define vm_map_copy_entry_unlink(copy, entry) \
820 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
821
822 #if MACH_ASSERT && TASK_SWAPPER
823 /*
824 * vm_map_res_reference:
825 *
826 * Adds another valid residence count to the given map.
827 *
828 * Map is locked so this function can be called from
829 * vm_map_swapin.
830 *
831 */
832 void vm_map_res_reference(register vm_map_t map)
833 {
834 /* assert map is locked */
835 assert(map->res_count >= 0);
836 assert(map->ref_count >= map->res_count);
837 if (map->res_count == 0) {
838 lck_mtx_unlock(&map->s_lock);
839 vm_map_lock(map);
840 vm_map_swapin(map);
841 lck_mtx_lock(&map->s_lock);
842 ++map->res_count;
843 vm_map_unlock(map);
844 } else
845 ++map->res_count;
846 }
847
848 /*
849 * vm_map_reference_swap:
850 *
851 * Adds valid reference and residence counts to the given map.
852 *
853 * The map may not be in memory (i.e. zero residence count).
854 *
855 */
856 void vm_map_reference_swap(register vm_map_t map)
857 {
858 assert(map != VM_MAP_NULL);
859 lck_mtx_lock(&map->s_lock);
860 assert(map->res_count >= 0);
861 assert(map->ref_count >= map->res_count);
862 map->ref_count++;
863 vm_map_res_reference(map);
864 lck_mtx_unlock(&map->s_lock);
865 }
866
867 /*
868 * vm_map_res_deallocate:
869 *
870 * Decrement residence count on a map; possibly causing swapout.
871 *
872 * The map must be in memory (i.e. non-zero residence count).
873 *
874 * The map is locked, so this function is callable from vm_map_deallocate.
875 *
876 */
877 void vm_map_res_deallocate(register vm_map_t map)
878 {
879 assert(map->res_count > 0);
880 if (--map->res_count == 0) {
881 lck_mtx_unlock(&map->s_lock);
882 vm_map_lock(map);
883 vm_map_swapout(map);
884 vm_map_unlock(map);
885 lck_mtx_lock(&map->s_lock);
886 }
887 assert(map->ref_count >= map->res_count);
888 }
889 #endif /* MACH_ASSERT && TASK_SWAPPER */
890
891 /*
892 * vm_map_destroy:
893 *
894 * Actually destroy a map.
895 */
896 void
897 vm_map_destroy(
898 vm_map_t map,
899 int flags)
900 {
901 vm_map_lock(map);
902
903 /* clean up regular map entries */
904 (void) vm_map_delete(map, map->min_offset, map->max_offset,
905 flags, VM_MAP_NULL);
906 /* clean up leftover special mappings (commpage, etc...) */
907 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
908 flags, VM_MAP_NULL);
909
910 #if CONFIG_FREEZE
911 if (map->default_freezer_toc){
912 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
913 }
914 #endif
915 vm_map_unlock(map);
916
917 assert(map->hdr.nentries == 0);
918
919 if(map->pmap)
920 pmap_destroy(map->pmap);
921
922 zfree(vm_map_zone, map);
923 }
924
925 #if TASK_SWAPPER
926 /*
927 * vm_map_swapin/vm_map_swapout
928 *
929 * Swap a map in and out, either referencing or releasing its resources.
930 * These functions are internal use only; however, they must be exported
931 * because they may be called from macros, which are exported.
932 *
933 * In the case of swapout, there could be races on the residence count,
934 * so if the residence count is up, we return, assuming that a
935 * vm_map_deallocate() call in the near future will bring us back.
936 *
937 * Locking:
938 * -- We use the map write lock for synchronization among races.
939 * -- The map write lock, and not the simple s_lock, protects the
940 * swap state of the map.
941 * -- If a map entry is a share map, then we hold both locks, in
942 * hierarchical order.
943 *
944 * Synchronization Notes:
945 * 1) If a vm_map_swapin() call happens while swapout in progress, it
946 * will block on the map lock and proceed when swapout is through.
947 * 2) A vm_map_reference() call at this time is illegal, and will
948 * cause a panic. vm_map_reference() is only allowed on resident
949 * maps, since it refuses to block.
950 * 3) A vm_map_swapin() call during a swapin will block, and
951 * proceeed when the first swapin is done, turning into a nop.
952 * This is the reason the res_count is not incremented until
953 * after the swapin is complete.
954 * 4) There is a timing hole after the checks of the res_count, before
955 * the map lock is taken, during which a swapin may get the lock
956 * before a swapout about to happen. If this happens, the swapin
957 * will detect the state and increment the reference count, causing
958 * the swapout to be a nop, thereby delaying it until a later
959 * vm_map_deallocate. If the swapout gets the lock first, then
960 * the swapin will simply block until the swapout is done, and
961 * then proceed.
962 *
963 * Because vm_map_swapin() is potentially an expensive operation, it
964 * should be used with caution.
965 *
966 * Invariants:
967 * 1) A map with a residence count of zero is either swapped, or
968 * being swapped.
969 * 2) A map with a non-zero residence count is either resident,
970 * or being swapped in.
971 */
972
973 int vm_map_swap_enable = 1;
974
975 void vm_map_swapin (vm_map_t map)
976 {
977 register vm_map_entry_t entry;
978
979 if (!vm_map_swap_enable) /* debug */
980 return;
981
982 /*
983 * Map is locked
984 * First deal with various races.
985 */
986 if (map->sw_state == MAP_SW_IN)
987 /*
988 * we raced with swapout and won. Returning will incr.
989 * the res_count, turning the swapout into a nop.
990 */
991 return;
992
993 /*
994 * The residence count must be zero. If we raced with another
995 * swapin, the state would have been IN; if we raced with a
996 * swapout (after another competing swapin), we must have lost
997 * the race to get here (see above comment), in which case
998 * res_count is still 0.
999 */
1000 assert(map->res_count == 0);
1001
1002 /*
1003 * There are no intermediate states of a map going out or
1004 * coming in, since the map is locked during the transition.
1005 */
1006 assert(map->sw_state == MAP_SW_OUT);
1007
1008 /*
1009 * We now operate upon each map entry. If the entry is a sub-
1010 * or share-map, we call vm_map_res_reference upon it.
1011 * If the entry is an object, we call vm_object_res_reference
1012 * (this may iterate through the shadow chain).
1013 * Note that we hold the map locked the entire time,
1014 * even if we get back here via a recursive call in
1015 * vm_map_res_reference.
1016 */
1017 entry = vm_map_first_entry(map);
1018
1019 while (entry != vm_map_to_entry(map)) {
1020 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021 if (entry->is_sub_map) {
1022 vm_map_t lmap = entry->object.sub_map;
1023 lck_mtx_lock(&lmap->s_lock);
1024 vm_map_res_reference(lmap);
1025 lck_mtx_unlock(&lmap->s_lock);
1026 } else {
1027 vm_object_t object = entry->object.vm_object;
1028 vm_object_lock(object);
1029 /*
1030 * This call may iterate through the
1031 * shadow chain.
1032 */
1033 vm_object_res_reference(object);
1034 vm_object_unlock(object);
1035 }
1036 }
1037 entry = entry->vme_next;
1038 }
1039 assert(map->sw_state == MAP_SW_OUT);
1040 map->sw_state = MAP_SW_IN;
1041 }
1042
1043 void vm_map_swapout(vm_map_t map)
1044 {
1045 register vm_map_entry_t entry;
1046
1047 /*
1048 * Map is locked
1049 * First deal with various races.
1050 * If we raced with a swapin and lost, the residence count
1051 * will have been incremented to 1, and we simply return.
1052 */
1053 lck_mtx_lock(&map->s_lock);
1054 if (map->res_count != 0) {
1055 lck_mtx_unlock(&map->s_lock);
1056 return;
1057 }
1058 lck_mtx_unlock(&map->s_lock);
1059
1060 /*
1061 * There are no intermediate states of a map going out or
1062 * coming in, since the map is locked during the transition.
1063 */
1064 assert(map->sw_state == MAP_SW_IN);
1065
1066 if (!vm_map_swap_enable)
1067 return;
1068
1069 /*
1070 * We now operate upon each map entry. If the entry is a sub-
1071 * or share-map, we call vm_map_res_deallocate upon it.
1072 * If the entry is an object, we call vm_object_res_deallocate
1073 * (this may iterate through the shadow chain).
1074 * Note that we hold the map locked the entire time,
1075 * even if we get back here via a recursive call in
1076 * vm_map_res_deallocate.
1077 */
1078 entry = vm_map_first_entry(map);
1079
1080 while (entry != vm_map_to_entry(map)) {
1081 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082 if (entry->is_sub_map) {
1083 vm_map_t lmap = entry->object.sub_map;
1084 lck_mtx_lock(&lmap->s_lock);
1085 vm_map_res_deallocate(lmap);
1086 lck_mtx_unlock(&lmap->s_lock);
1087 } else {
1088 vm_object_t object = entry->object.vm_object;
1089 vm_object_lock(object);
1090 /*
1091 * This call may take a long time,
1092 * since it could actively push
1093 * out pages (if we implement it
1094 * that way).
1095 */
1096 vm_object_res_deallocate(object);
1097 vm_object_unlock(object);
1098 }
1099 }
1100 entry = entry->vme_next;
1101 }
1102 assert(map->sw_state == MAP_SW_IN);
1103 map->sw_state = MAP_SW_OUT;
1104 }
1105
1106 #endif /* TASK_SWAPPER */
1107
1108 /*
1109 * vm_map_lookup_entry: [ internal use only ]
1110 *
1111 * Calls into the vm map store layer to find the map
1112 * entry containing (or immediately preceding) the
1113 * specified address in the given map; the entry is returned
1114 * in the "entry" parameter. The boolean
1115 * result indicates whether the address is
1116 * actually contained in the map.
1117 */
1118 boolean_t
1119 vm_map_lookup_entry(
1120 register vm_map_t map,
1121 register vm_map_offset_t address,
1122 vm_map_entry_t *entry) /* OUT */
1123 {
1124 return ( vm_map_store_lookup_entry( map, address, entry ));
1125 }
1126
1127 /*
1128 * Routine: vm_map_find_space
1129 * Purpose:
1130 * Allocate a range in the specified virtual address map,
1131 * returning the entry allocated for that range.
1132 * Used by kmem_alloc, etc.
1133 *
1134 * The map must be NOT be locked. It will be returned locked
1135 * on KERN_SUCCESS, unlocked on failure.
1136 *
1137 * If an entry is allocated, the object/offset fields
1138 * are initialized to zero.
1139 */
1140 kern_return_t
1141 vm_map_find_space(
1142 register vm_map_t map,
1143 vm_map_offset_t *address, /* OUT */
1144 vm_map_size_t size,
1145 vm_map_offset_t mask,
1146 int flags,
1147 vm_map_entry_t *o_entry) /* OUT */
1148 {
1149 register vm_map_entry_t entry, new_entry;
1150 register vm_map_offset_t start;
1151 register vm_map_offset_t end;
1152
1153 if (size == 0) {
1154 *address = 0;
1155 return KERN_INVALID_ARGUMENT;
1156 }
1157
1158 if (flags & VM_FLAGS_GUARD_AFTER) {
1159 /* account for the back guard page in the size */
1160 size += PAGE_SIZE_64;
1161 }
1162
1163 new_entry = vm_map_entry_create(map);
1164
1165 /*
1166 * Look for the first possible address; if there's already
1167 * something at this address, we have to start after it.
1168 */
1169
1170 vm_map_lock(map);
1171
1172 if( map->disable_vmentry_reuse == TRUE) {
1173 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174 } else {
1175 assert(first_free_is_valid(map));
1176 if ((entry = map->first_free) == vm_map_to_entry(map))
1177 start = map->min_offset;
1178 else
1179 start = entry->vme_end;
1180 }
1181
1182 /*
1183 * In any case, the "entry" always precedes
1184 * the proposed new region throughout the loop:
1185 */
1186
1187 while (TRUE) {
1188 register vm_map_entry_t next;
1189
1190 /*
1191 * Find the end of the proposed new region.
1192 * Be sure we didn't go beyond the end, or
1193 * wrap around the address.
1194 */
1195
1196 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197 /* reserve space for the front guard page */
1198 start += PAGE_SIZE_64;
1199 }
1200 end = ((start + mask) & ~mask);
1201
1202 if (end < start) {
1203 vm_map_entry_dispose(map, new_entry);
1204 vm_map_unlock(map);
1205 return(KERN_NO_SPACE);
1206 }
1207 start = end;
1208 end += size;
1209
1210 if ((end > map->max_offset) || (end < start)) {
1211 vm_map_entry_dispose(map, new_entry);
1212 vm_map_unlock(map);
1213 return(KERN_NO_SPACE);
1214 }
1215
1216 /*
1217 * If there are no more entries, we must win.
1218 */
1219
1220 next = entry->vme_next;
1221 if (next == vm_map_to_entry(map))
1222 break;
1223
1224 /*
1225 * If there is another entry, it must be
1226 * after the end of the potential new region.
1227 */
1228
1229 if (next->vme_start >= end)
1230 break;
1231
1232 /*
1233 * Didn't fit -- move to the next entry.
1234 */
1235
1236 entry = next;
1237 start = entry->vme_end;
1238 }
1239
1240 /*
1241 * At this point,
1242 * "start" and "end" should define the endpoints of the
1243 * available new range, and
1244 * "entry" should refer to the region before the new
1245 * range, and
1246 *
1247 * the map should be locked.
1248 */
1249
1250 if (flags & VM_FLAGS_GUARD_BEFORE) {
1251 /* go back for the front guard page */
1252 start -= PAGE_SIZE_64;
1253 }
1254 *address = start;
1255
1256 new_entry->vme_start = start;
1257 new_entry->vme_end = end;
1258 assert(page_aligned(new_entry->vme_start));
1259 assert(page_aligned(new_entry->vme_end));
1260
1261 new_entry->is_shared = FALSE;
1262 new_entry->is_sub_map = FALSE;
1263 new_entry->use_pmap = FALSE;
1264 new_entry->object.vm_object = VM_OBJECT_NULL;
1265 new_entry->offset = (vm_object_offset_t) 0;
1266
1267 new_entry->needs_copy = FALSE;
1268
1269 new_entry->inheritance = VM_INHERIT_DEFAULT;
1270 new_entry->protection = VM_PROT_DEFAULT;
1271 new_entry->max_protection = VM_PROT_ALL;
1272 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1273 new_entry->wired_count = 0;
1274 new_entry->user_wired_count = 0;
1275
1276 new_entry->in_transition = FALSE;
1277 new_entry->needs_wakeup = FALSE;
1278 new_entry->no_cache = FALSE;
1279 new_entry->permanent = FALSE;
1280 new_entry->superpage_size = 0;
1281
1282 new_entry->alias = 0;
1283 new_entry->zero_wired_pages = FALSE;
1284
1285 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1286
1287 /*
1288 * Insert the new entry into the list
1289 */
1290
1291 vm_map_store_entry_link(map, entry, new_entry);
1292
1293 map->size += size;
1294
1295 /*
1296 * Update the lookup hint
1297 */
1298 SAVE_HINT_MAP_WRITE(map, new_entry);
1299
1300 *o_entry = new_entry;
1301 return(KERN_SUCCESS);
1302 }
1303
1304 int vm_map_pmap_enter_print = FALSE;
1305 int vm_map_pmap_enter_enable = FALSE;
1306
1307 /*
1308 * Routine: vm_map_pmap_enter [internal only]
1309 *
1310 * Description:
1311 * Force pages from the specified object to be entered into
1312 * the pmap at the specified address if they are present.
1313 * As soon as a page not found in the object the scan ends.
1314 *
1315 * Returns:
1316 * Nothing.
1317 *
1318 * In/out conditions:
1319 * The source map should not be locked on entry.
1320 */
1321 static void
1322 vm_map_pmap_enter(
1323 vm_map_t map,
1324 register vm_map_offset_t addr,
1325 register vm_map_offset_t end_addr,
1326 register vm_object_t object,
1327 vm_object_offset_t offset,
1328 vm_prot_t protection)
1329 {
1330 int type_of_fault;
1331 kern_return_t kr;
1332
1333 if(map->pmap == 0)
1334 return;
1335
1336 while (addr < end_addr) {
1337 register vm_page_t m;
1338
1339 vm_object_lock(object);
1340
1341 m = vm_page_lookup(object, offset);
1342 /*
1343 * ENCRYPTED SWAP:
1344 * The user should never see encrypted data, so do not
1345 * enter an encrypted page in the page table.
1346 */
1347 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1348 m->fictitious ||
1349 (m->unusual && ( m->error || m->restart || m->absent))) {
1350 vm_object_unlock(object);
1351 return;
1352 }
1353
1354 if (vm_map_pmap_enter_print) {
1355 printf("vm_map_pmap_enter:");
1356 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1357 map, (unsigned long long)addr, object, (unsigned long long)offset);
1358 }
1359 type_of_fault = DBG_CACHE_HIT_FAULT;
1360 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1361 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1362 &type_of_fault);
1363
1364 vm_object_unlock(object);
1365
1366 offset += PAGE_SIZE_64;
1367 addr += PAGE_SIZE;
1368 }
1369 }
1370
1371 boolean_t vm_map_pmap_is_empty(
1372 vm_map_t map,
1373 vm_map_offset_t start,
1374 vm_map_offset_t end);
1375 boolean_t vm_map_pmap_is_empty(
1376 vm_map_t map,
1377 vm_map_offset_t start,
1378 vm_map_offset_t end)
1379 {
1380 #ifdef MACHINE_PMAP_IS_EMPTY
1381 return pmap_is_empty(map->pmap, start, end);
1382 #else /* MACHINE_PMAP_IS_EMPTY */
1383 vm_map_offset_t offset;
1384 ppnum_t phys_page;
1385
1386 if (map->pmap == NULL) {
1387 return TRUE;
1388 }
1389
1390 for (offset = start;
1391 offset < end;
1392 offset += PAGE_SIZE) {
1393 phys_page = pmap_find_phys(map->pmap, offset);
1394 if (phys_page) {
1395 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1396 "page %d at 0x%llx\n",
1397 map, (long long)start, (long long)end,
1398 phys_page, (long long)offset);
1399 return FALSE;
1400 }
1401 }
1402 return TRUE;
1403 #endif /* MACHINE_PMAP_IS_EMPTY */
1404 }
1405
1406 /*
1407 * Routine: vm_map_enter
1408 *
1409 * Description:
1410 * Allocate a range in the specified virtual address map.
1411 * The resulting range will refer to memory defined by
1412 * the given memory object and offset into that object.
1413 *
1414 * Arguments are as defined in the vm_map call.
1415 */
1416 int _map_enter_debug = 0;
1417 static unsigned int vm_map_enter_restore_successes = 0;
1418 static unsigned int vm_map_enter_restore_failures = 0;
1419 kern_return_t
1420 vm_map_enter(
1421 vm_map_t map,
1422 vm_map_offset_t *address, /* IN/OUT */
1423 vm_map_size_t size,
1424 vm_map_offset_t mask,
1425 int flags,
1426 vm_object_t object,
1427 vm_object_offset_t offset,
1428 boolean_t needs_copy,
1429 vm_prot_t cur_protection,
1430 vm_prot_t max_protection,
1431 vm_inherit_t inheritance)
1432 {
1433 vm_map_entry_t entry, new_entry;
1434 vm_map_offset_t start, tmp_start, tmp_offset;
1435 vm_map_offset_t end, tmp_end;
1436 vm_map_offset_t tmp2_start, tmp2_end;
1437 vm_map_offset_t step;
1438 kern_return_t result = KERN_SUCCESS;
1439 vm_map_t zap_old_map = VM_MAP_NULL;
1440 vm_map_t zap_new_map = VM_MAP_NULL;
1441 boolean_t map_locked = FALSE;
1442 boolean_t pmap_empty = TRUE;
1443 boolean_t new_mapping_established = FALSE;
1444 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1445 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1446 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1447 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1448 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1449 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1450 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1451 char alias;
1452 vm_map_offset_t effective_min_offset, effective_max_offset;
1453 kern_return_t kr;
1454
1455 if (superpage_size) {
1456 switch (superpage_size) {
1457 /*
1458 * Note that the current implementation only supports
1459 * a single size for superpages, SUPERPAGE_SIZE, per
1460 * architecture. As soon as more sizes are supposed
1461 * to be supported, SUPERPAGE_SIZE has to be replaced
1462 * with a lookup of the size depending on superpage_size.
1463 */
1464 #ifdef __x86_64__
1465 case SUPERPAGE_SIZE_ANY:
1466 /* handle it like 2 MB and round up to page size */
1467 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1468 case SUPERPAGE_SIZE_2MB:
1469 break;
1470 #endif
1471 default:
1472 return KERN_INVALID_ARGUMENT;
1473 }
1474 mask = SUPERPAGE_SIZE-1;
1475 if (size & (SUPERPAGE_SIZE-1))
1476 return KERN_INVALID_ARGUMENT;
1477 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1478 }
1479
1480
1481 #if CONFIG_EMBEDDED
1482 if (cur_protection & VM_PROT_WRITE){
1483 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1484 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1485 cur_protection &= ~VM_PROT_EXECUTE;
1486 }
1487 }
1488 #endif /* CONFIG_EMBEDDED */
1489
1490 if (is_submap) {
1491 if (purgable) {
1492 /* submaps can not be purgeable */
1493 return KERN_INVALID_ARGUMENT;
1494 }
1495 if (object == VM_OBJECT_NULL) {
1496 /* submaps can not be created lazily */
1497 return KERN_INVALID_ARGUMENT;
1498 }
1499 }
1500 if (flags & VM_FLAGS_ALREADY) {
1501 /*
1502 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1503 * is already present. For it to be meaningul, the requested
1504 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1505 * we shouldn't try and remove what was mapped there first
1506 * (!VM_FLAGS_OVERWRITE).
1507 */
1508 if ((flags & VM_FLAGS_ANYWHERE) ||
1509 (flags & VM_FLAGS_OVERWRITE)) {
1510 return KERN_INVALID_ARGUMENT;
1511 }
1512 }
1513
1514 effective_min_offset = map->min_offset;
1515
1516 if (flags & VM_FLAGS_BEYOND_MAX) {
1517 /*
1518 * Allow an insertion beyond the map's max offset.
1519 */
1520 if (vm_map_is_64bit(map))
1521 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1522 else
1523 effective_max_offset = 0x00000000FFFFF000ULL;
1524 } else {
1525 effective_max_offset = map->max_offset;
1526 }
1527
1528 if (size == 0 ||
1529 (offset & PAGE_MASK_64) != 0) {
1530 *address = 0;
1531 return KERN_INVALID_ARGUMENT;
1532 }
1533
1534 VM_GET_FLAGS_ALIAS(flags, alias);
1535
1536 #define RETURN(value) { result = value; goto BailOut; }
1537
1538 assert(page_aligned(*address));
1539 assert(page_aligned(size));
1540
1541 /*
1542 * Only zero-fill objects are allowed to be purgable.
1543 * LP64todo - limit purgable objects to 32-bits for now
1544 */
1545 if (purgable &&
1546 (offset != 0 ||
1547 (object != VM_OBJECT_NULL &&
1548 (object->vo_size != size ||
1549 object->purgable == VM_PURGABLE_DENY))
1550 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1551 return KERN_INVALID_ARGUMENT;
1552
1553 if (!anywhere && overwrite) {
1554 /*
1555 * Create a temporary VM map to hold the old mappings in the
1556 * affected area while we create the new one.
1557 * This avoids releasing the VM map lock in
1558 * vm_map_entry_delete() and allows atomicity
1559 * when we want to replace some mappings with a new one.
1560 * It also allows us to restore the old VM mappings if the
1561 * new mapping fails.
1562 */
1563 zap_old_map = vm_map_create(PMAP_NULL,
1564 *address,
1565 *address + size,
1566 map->hdr.entries_pageable);
1567 }
1568
1569 StartAgain: ;
1570
1571 start = *address;
1572
1573 if (anywhere) {
1574 vm_map_lock(map);
1575 map_locked = TRUE;
1576
1577 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1578 result = KERN_INVALID_ARGUMENT;
1579 goto BailOut;
1580 }
1581
1582 /*
1583 * Calculate the first possible address.
1584 */
1585
1586 if (start < effective_min_offset)
1587 start = effective_min_offset;
1588 if (start > effective_max_offset)
1589 RETURN(KERN_NO_SPACE);
1590
1591 /*
1592 * Look for the first possible address;
1593 * if there's already something at this
1594 * address, we have to start after it.
1595 */
1596
1597 if( map->disable_vmentry_reuse == TRUE) {
1598 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1599 } else {
1600 assert(first_free_is_valid(map));
1601
1602 entry = map->first_free;
1603
1604 if (entry == vm_map_to_entry(map)) {
1605 entry = NULL;
1606 } else {
1607 if (entry->vme_next == vm_map_to_entry(map)){
1608 /*
1609 * Hole at the end of the map.
1610 */
1611 entry = NULL;
1612 } else {
1613 if (start < (entry->vme_next)->vme_start ) {
1614 start = entry->vme_end;
1615 } else {
1616 /*
1617 * Need to do a lookup.
1618 */
1619 entry = NULL;
1620 }
1621 }
1622 }
1623
1624 if (entry == NULL) {
1625 vm_map_entry_t tmp_entry;
1626 if (vm_map_lookup_entry(map, start, &tmp_entry))
1627 start = tmp_entry->vme_end;
1628 entry = tmp_entry;
1629 }
1630 }
1631
1632 /*
1633 * In any case, the "entry" always precedes
1634 * the proposed new region throughout the
1635 * loop:
1636 */
1637
1638 while (TRUE) {
1639 register vm_map_entry_t next;
1640
1641 /*
1642 * Find the end of the proposed new region.
1643 * Be sure we didn't go beyond the end, or
1644 * wrap around the address.
1645 */
1646
1647 end = ((start + mask) & ~mask);
1648 if (end < start)
1649 RETURN(KERN_NO_SPACE);
1650 start = end;
1651 end += size;
1652
1653 if ((end > effective_max_offset) || (end < start)) {
1654 if (map->wait_for_space) {
1655 if (size <= (effective_max_offset -
1656 effective_min_offset)) {
1657 assert_wait((event_t)map,
1658 THREAD_ABORTSAFE);
1659 vm_map_unlock(map);
1660 map_locked = FALSE;
1661 thread_block(THREAD_CONTINUE_NULL);
1662 goto StartAgain;
1663 }
1664 }
1665 RETURN(KERN_NO_SPACE);
1666 }
1667
1668 /*
1669 * If there are no more entries, we must win.
1670 */
1671
1672 next = entry->vme_next;
1673 if (next == vm_map_to_entry(map))
1674 break;
1675
1676 /*
1677 * If there is another entry, it must be
1678 * after the end of the potential new region.
1679 */
1680
1681 if (next->vme_start >= end)
1682 break;
1683
1684 /*
1685 * Didn't fit -- move to the next entry.
1686 */
1687
1688 entry = next;
1689 start = entry->vme_end;
1690 }
1691 *address = start;
1692 } else {
1693 /*
1694 * Verify that:
1695 * the address doesn't itself violate
1696 * the mask requirement.
1697 */
1698
1699 vm_map_lock(map);
1700 map_locked = TRUE;
1701 if ((start & mask) != 0)
1702 RETURN(KERN_NO_SPACE);
1703
1704 /*
1705 * ... the address is within bounds
1706 */
1707
1708 end = start + size;
1709
1710 if ((start < effective_min_offset) ||
1711 (end > effective_max_offset) ||
1712 (start >= end)) {
1713 RETURN(KERN_INVALID_ADDRESS);
1714 }
1715
1716 if (overwrite && zap_old_map != VM_MAP_NULL) {
1717 /*
1718 * Fixed mapping and "overwrite" flag: attempt to
1719 * remove all existing mappings in the specified
1720 * address range, saving them in our "zap_old_map".
1721 */
1722 (void) vm_map_delete(map, start, end,
1723 VM_MAP_REMOVE_SAVE_ENTRIES,
1724 zap_old_map);
1725 }
1726
1727 /*
1728 * ... the starting address isn't allocated
1729 */
1730
1731 if (vm_map_lookup_entry(map, start, &entry)) {
1732 if (! (flags & VM_FLAGS_ALREADY)) {
1733 RETURN(KERN_NO_SPACE);
1734 }
1735 /*
1736 * Check if what's already there is what we want.
1737 */
1738 tmp_start = start;
1739 tmp_offset = offset;
1740 if (entry->vme_start < start) {
1741 tmp_start -= start - entry->vme_start;
1742 tmp_offset -= start - entry->vme_start;
1743
1744 }
1745 for (; entry->vme_start < end;
1746 entry = entry->vme_next) {
1747 /*
1748 * Check if the mapping's attributes
1749 * match the existing map entry.
1750 */
1751 if (entry == vm_map_to_entry(map) ||
1752 entry->vme_start != tmp_start ||
1753 entry->is_sub_map != is_submap ||
1754 entry->offset != tmp_offset ||
1755 entry->needs_copy != needs_copy ||
1756 entry->protection != cur_protection ||
1757 entry->max_protection != max_protection ||
1758 entry->inheritance != inheritance ||
1759 entry->alias != alias) {
1760 /* not the same mapping ! */
1761 RETURN(KERN_NO_SPACE);
1762 }
1763 /*
1764 * Check if the same object is being mapped.
1765 */
1766 if (is_submap) {
1767 if (entry->object.sub_map !=
1768 (vm_map_t) object) {
1769 /* not the same submap */
1770 RETURN(KERN_NO_SPACE);
1771 }
1772 } else {
1773 if (entry->object.vm_object != object) {
1774 /* not the same VM object... */
1775 vm_object_t obj2;
1776
1777 obj2 = entry->object.vm_object;
1778 if ((obj2 == VM_OBJECT_NULL ||
1779 obj2->internal) &&
1780 (object == VM_OBJECT_NULL ||
1781 object->internal)) {
1782 /*
1783 * ... but both are
1784 * anonymous memory,
1785 * so equivalent.
1786 */
1787 } else {
1788 RETURN(KERN_NO_SPACE);
1789 }
1790 }
1791 }
1792
1793 tmp_offset += entry->vme_end - entry->vme_start;
1794 tmp_start += entry->vme_end - entry->vme_start;
1795 if (entry->vme_end >= end) {
1796 /* reached the end of our mapping */
1797 break;
1798 }
1799 }
1800 /* it all matches: let's use what's already there ! */
1801 RETURN(KERN_MEMORY_PRESENT);
1802 }
1803
1804 /*
1805 * ... the next region doesn't overlap the
1806 * end point.
1807 */
1808
1809 if ((entry->vme_next != vm_map_to_entry(map)) &&
1810 (entry->vme_next->vme_start < end))
1811 RETURN(KERN_NO_SPACE);
1812 }
1813
1814 /*
1815 * At this point,
1816 * "start" and "end" should define the endpoints of the
1817 * available new range, and
1818 * "entry" should refer to the region before the new
1819 * range, and
1820 *
1821 * the map should be locked.
1822 */
1823
1824 /*
1825 * See whether we can avoid creating a new entry (and object) by
1826 * extending one of our neighbors. [So far, we only attempt to
1827 * extend from below.] Note that we can never extend/join
1828 * purgable objects because they need to remain distinct
1829 * entities in order to implement their "volatile object"
1830 * semantics.
1831 */
1832
1833 if (purgable) {
1834 if (object == VM_OBJECT_NULL) {
1835 object = vm_object_allocate(size);
1836 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1837 object->purgable = VM_PURGABLE_NONVOLATILE;
1838 offset = (vm_object_offset_t)0;
1839 }
1840 } else if ((is_submap == FALSE) &&
1841 (object == VM_OBJECT_NULL) &&
1842 (entry != vm_map_to_entry(map)) &&
1843 (entry->vme_end == start) &&
1844 (!entry->is_shared) &&
1845 (!entry->is_sub_map) &&
1846 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1847 (entry->inheritance == inheritance) &&
1848 (entry->protection == cur_protection) &&
1849 (entry->max_protection == max_protection) &&
1850 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1851 (entry->in_transition == 0) &&
1852 (entry->no_cache == no_cache) &&
1853 ((entry->vme_end - entry->vme_start) + size <=
1854 (alias == VM_MEMORY_REALLOC ?
1855 ANON_CHUNK_SIZE :
1856 NO_COALESCE_LIMIT)) &&
1857 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1858 if (vm_object_coalesce(entry->object.vm_object,
1859 VM_OBJECT_NULL,
1860 entry->offset,
1861 (vm_object_offset_t) 0,
1862 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1863 (vm_map_size_t)(end - entry->vme_end))) {
1864
1865 /*
1866 * Coalesced the two objects - can extend
1867 * the previous map entry to include the
1868 * new range.
1869 */
1870 map->size += (end - entry->vme_end);
1871 entry->vme_end = end;
1872 vm_map_store_update_first_free(map, map->first_free);
1873 RETURN(KERN_SUCCESS);
1874 }
1875 }
1876
1877 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1878 new_entry = NULL;
1879
1880 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1881 tmp2_end = tmp2_start + step;
1882 /*
1883 * Create a new entry
1884 * LP64todo - for now, we can only allocate 4GB internal objects
1885 * because the default pager can't page bigger ones. Remove this
1886 * when it can.
1887 *
1888 * XXX FBDP
1889 * The reserved "page zero" in each process's address space can
1890 * be arbitrarily large. Splitting it into separate 4GB objects and
1891 * therefore different VM map entries serves no purpose and just
1892 * slows down operations on the VM map, so let's not split the
1893 * allocation into 4GB chunks if the max protection is NONE. That
1894 * memory should never be accessible, so it will never get to the
1895 * default pager.
1896 */
1897 tmp_start = tmp2_start;
1898 if (object == VM_OBJECT_NULL &&
1899 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1900 max_protection != VM_PROT_NONE &&
1901 superpage_size == 0)
1902 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1903 else
1904 tmp_end = tmp2_end;
1905 do {
1906 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1907 object, offset, needs_copy,
1908 FALSE, FALSE,
1909 cur_protection, max_protection,
1910 VM_BEHAVIOR_DEFAULT,
1911 (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1912 0, no_cache,
1913 permanent, superpage_size);
1914 new_entry->alias = alias;
1915 if (flags & VM_FLAGS_MAP_JIT){
1916 if (!(map->jit_entry_exists)){
1917 new_entry->used_for_jit = TRUE;
1918 map->jit_entry_exists = TRUE;
1919 }
1920 }
1921
1922 if (is_submap) {
1923 vm_map_t submap;
1924 boolean_t submap_is_64bit;
1925 boolean_t use_pmap;
1926
1927 new_entry->is_sub_map = TRUE;
1928 submap = (vm_map_t) object;
1929 submap_is_64bit = vm_map_is_64bit(submap);
1930 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1931 #ifndef NO_NESTED_PMAP
1932 if (use_pmap && submap->pmap == NULL) {
1933 /* we need a sub pmap to nest... */
1934 submap->pmap = pmap_create(0, submap_is_64bit);
1935 if (submap->pmap == NULL) {
1936 /* let's proceed without nesting... */
1937 }
1938 }
1939 if (use_pmap && submap->pmap != NULL) {
1940 kr = pmap_nest(map->pmap,
1941 submap->pmap,
1942 tmp_start,
1943 tmp_start,
1944 tmp_end - tmp_start);
1945 if (kr != KERN_SUCCESS) {
1946 printf("vm_map_enter: "
1947 "pmap_nest(0x%llx,0x%llx) "
1948 "error 0x%x\n",
1949 (long long)tmp_start,
1950 (long long)tmp_end,
1951 kr);
1952 } else {
1953 /* we're now nested ! */
1954 new_entry->use_pmap = TRUE;
1955 pmap_empty = FALSE;
1956 }
1957 }
1958 #endif /* NO_NESTED_PMAP */
1959 }
1960 entry = new_entry;
1961
1962 if (superpage_size) {
1963 vm_page_t pages, m;
1964 vm_object_t sp_object;
1965
1966 entry->offset = 0;
1967
1968 /* allocate one superpage */
1969 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
1970 if (kr != KERN_SUCCESS) {
1971 new_mapping_established = TRUE; /* will cause deallocation of whole range */
1972 RETURN(kr);
1973 }
1974
1975 /* create one vm_object per superpage */
1976 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1977 sp_object->phys_contiguous = TRUE;
1978 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
1979 entry->object.vm_object = sp_object;
1980
1981 /* enter the base pages into the object */
1982 vm_object_lock(sp_object);
1983 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1984 m = pages;
1985 pmap_zero_page(m->phys_page);
1986 pages = NEXT_PAGE(m);
1987 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1988 vm_page_insert(m, sp_object, offset);
1989 }
1990 vm_object_unlock(sp_object);
1991 }
1992 } while (tmp_end != tmp2_end &&
1993 (tmp_start = tmp_end) &&
1994 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1995 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1996 }
1997
1998 vm_map_unlock(map);
1999 map_locked = FALSE;
2000
2001 new_mapping_established = TRUE;
2002
2003 /* Wire down the new entry if the user
2004 * requested all new map entries be wired.
2005 */
2006 if ((map->wiring_required)||(superpage_size)) {
2007 pmap_empty = FALSE; /* pmap won't be empty */
2008 result = vm_map_wire(map, start, end,
2009 new_entry->protection, TRUE);
2010 RETURN(result);
2011 }
2012
2013 if ((object != VM_OBJECT_NULL) &&
2014 (vm_map_pmap_enter_enable) &&
2015 (!anywhere) &&
2016 (!needs_copy) &&
2017 (size < (128*1024))) {
2018 pmap_empty = FALSE; /* pmap won't be empty */
2019
2020 if (override_nx(map, alias) && cur_protection)
2021 cur_protection |= VM_PROT_EXECUTE;
2022
2023 vm_map_pmap_enter(map, start, end,
2024 object, offset, cur_protection);
2025 }
2026
2027 BailOut: ;
2028 if (result == KERN_SUCCESS) {
2029 vm_prot_t pager_prot;
2030 memory_object_t pager;
2031
2032 if (pmap_empty &&
2033 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2034 assert(vm_map_pmap_is_empty(map,
2035 *address,
2036 *address+size));
2037 }
2038
2039 /*
2040 * For "named" VM objects, let the pager know that the
2041 * memory object is being mapped. Some pagers need to keep
2042 * track of this, to know when they can reclaim the memory
2043 * object, for example.
2044 * VM calls memory_object_map() for each mapping (specifying
2045 * the protection of each mapping) and calls
2046 * memory_object_last_unmap() when all the mappings are gone.
2047 */
2048 pager_prot = max_protection;
2049 if (needs_copy) {
2050 /*
2051 * Copy-On-Write mapping: won't modify
2052 * the memory object.
2053 */
2054 pager_prot &= ~VM_PROT_WRITE;
2055 }
2056 if (!is_submap &&
2057 object != VM_OBJECT_NULL &&
2058 object->named &&
2059 object->pager != MEMORY_OBJECT_NULL) {
2060 vm_object_lock(object);
2061 pager = object->pager;
2062 if (object->named &&
2063 pager != MEMORY_OBJECT_NULL) {
2064 assert(object->pager_ready);
2065 vm_object_mapping_wait(object, THREAD_UNINT);
2066 vm_object_mapping_begin(object);
2067 vm_object_unlock(object);
2068
2069 kr = memory_object_map(pager, pager_prot);
2070 assert(kr == KERN_SUCCESS);
2071
2072 vm_object_lock(object);
2073 vm_object_mapping_end(object);
2074 }
2075 vm_object_unlock(object);
2076 }
2077 } else {
2078 if (new_mapping_established) {
2079 /*
2080 * We have to get rid of the new mappings since we
2081 * won't make them available to the user.
2082 * Try and do that atomically, to minimize the risk
2083 * that someone else create new mappings that range.
2084 */
2085 zap_new_map = vm_map_create(PMAP_NULL,
2086 *address,
2087 *address + size,
2088 map->hdr.entries_pageable);
2089 if (!map_locked) {
2090 vm_map_lock(map);
2091 map_locked = TRUE;
2092 }
2093 (void) vm_map_delete(map, *address, *address+size,
2094 VM_MAP_REMOVE_SAVE_ENTRIES,
2095 zap_new_map);
2096 }
2097 if (zap_old_map != VM_MAP_NULL &&
2098 zap_old_map->hdr.nentries != 0) {
2099 vm_map_entry_t entry1, entry2;
2100
2101 /*
2102 * The new mapping failed. Attempt to restore
2103 * the old mappings, saved in the "zap_old_map".
2104 */
2105 if (!map_locked) {
2106 vm_map_lock(map);
2107 map_locked = TRUE;
2108 }
2109
2110 /* first check if the coast is still clear */
2111 start = vm_map_first_entry(zap_old_map)->vme_start;
2112 end = vm_map_last_entry(zap_old_map)->vme_end;
2113 if (vm_map_lookup_entry(map, start, &entry1) ||
2114 vm_map_lookup_entry(map, end, &entry2) ||
2115 entry1 != entry2) {
2116 /*
2117 * Part of that range has already been
2118 * re-mapped: we can't restore the old
2119 * mappings...
2120 */
2121 vm_map_enter_restore_failures++;
2122 } else {
2123 /*
2124 * Transfer the saved map entries from
2125 * "zap_old_map" to the original "map",
2126 * inserting them all after "entry1".
2127 */
2128 for (entry2 = vm_map_first_entry(zap_old_map);
2129 entry2 != vm_map_to_entry(zap_old_map);
2130 entry2 = vm_map_first_entry(zap_old_map)) {
2131 vm_map_size_t entry_size;
2132
2133 entry_size = (entry2->vme_end -
2134 entry2->vme_start);
2135 vm_map_store_entry_unlink(zap_old_map,
2136 entry2);
2137 zap_old_map->size -= entry_size;
2138 vm_map_store_entry_link(map, entry1, entry2);
2139 map->size += entry_size;
2140 entry1 = entry2;
2141 }
2142 if (map->wiring_required) {
2143 /*
2144 * XXX TODO: we should rewire the
2145 * old pages here...
2146 */
2147 }
2148 vm_map_enter_restore_successes++;
2149 }
2150 }
2151 }
2152
2153 if (map_locked) {
2154 vm_map_unlock(map);
2155 }
2156
2157 /*
2158 * Get rid of the "zap_maps" and all the map entries that
2159 * they may still contain.
2160 */
2161 if (zap_old_map != VM_MAP_NULL) {
2162 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2163 zap_old_map = VM_MAP_NULL;
2164 }
2165 if (zap_new_map != VM_MAP_NULL) {
2166 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2167 zap_new_map = VM_MAP_NULL;
2168 }
2169
2170 return result;
2171
2172 #undef RETURN
2173 }
2174
2175 kern_return_t
2176 vm_map_enter_mem_object(
2177 vm_map_t target_map,
2178 vm_map_offset_t *address,
2179 vm_map_size_t initial_size,
2180 vm_map_offset_t mask,
2181 int flags,
2182 ipc_port_t port,
2183 vm_object_offset_t offset,
2184 boolean_t copy,
2185 vm_prot_t cur_protection,
2186 vm_prot_t max_protection,
2187 vm_inherit_t inheritance)
2188 {
2189 vm_map_address_t map_addr;
2190 vm_map_size_t map_size;
2191 vm_object_t object;
2192 vm_object_size_t size;
2193 kern_return_t result;
2194 boolean_t mask_cur_protection, mask_max_protection;
2195
2196 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2197 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2198 cur_protection &= ~VM_PROT_IS_MASK;
2199 max_protection &= ~VM_PROT_IS_MASK;
2200
2201 /*
2202 * Check arguments for validity
2203 */
2204 if ((target_map == VM_MAP_NULL) ||
2205 (cur_protection & ~VM_PROT_ALL) ||
2206 (max_protection & ~VM_PROT_ALL) ||
2207 (inheritance > VM_INHERIT_LAST_VALID) ||
2208 initial_size == 0)
2209 return KERN_INVALID_ARGUMENT;
2210
2211 map_addr = vm_map_trunc_page(*address);
2212 map_size = vm_map_round_page(initial_size);
2213 size = vm_object_round_page(initial_size);
2214
2215 /*
2216 * Find the vm object (if any) corresponding to this port.
2217 */
2218 if (!IP_VALID(port)) {
2219 object = VM_OBJECT_NULL;
2220 offset = 0;
2221 copy = FALSE;
2222 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2223 vm_named_entry_t named_entry;
2224
2225 named_entry = (vm_named_entry_t) port->ip_kobject;
2226 /* a few checks to make sure user is obeying rules */
2227 if (size == 0) {
2228 if (offset >= named_entry->size)
2229 return KERN_INVALID_RIGHT;
2230 size = named_entry->size - offset;
2231 }
2232 if (mask_max_protection) {
2233 max_protection &= named_entry->protection;
2234 }
2235 if (mask_cur_protection) {
2236 cur_protection &= named_entry->protection;
2237 }
2238 if ((named_entry->protection & max_protection) !=
2239 max_protection)
2240 return KERN_INVALID_RIGHT;
2241 if ((named_entry->protection & cur_protection) !=
2242 cur_protection)
2243 return KERN_INVALID_RIGHT;
2244 if (named_entry->size < (offset + size))
2245 return KERN_INVALID_ARGUMENT;
2246
2247 /* the callers parameter offset is defined to be the */
2248 /* offset from beginning of named entry offset in object */
2249 offset = offset + named_entry->offset;
2250
2251 named_entry_lock(named_entry);
2252 if (named_entry->is_sub_map) {
2253 vm_map_t submap;
2254
2255 submap = named_entry->backing.map;
2256 vm_map_lock(submap);
2257 vm_map_reference(submap);
2258 vm_map_unlock(submap);
2259 named_entry_unlock(named_entry);
2260
2261 result = vm_map_enter(target_map,
2262 &map_addr,
2263 map_size,
2264 mask,
2265 flags | VM_FLAGS_SUBMAP,
2266 (vm_object_t) submap,
2267 offset,
2268 copy,
2269 cur_protection,
2270 max_protection,
2271 inheritance);
2272 if (result != KERN_SUCCESS) {
2273 vm_map_deallocate(submap);
2274 } else {
2275 /*
2276 * No need to lock "submap" just to check its
2277 * "mapped" flag: that flag is never reset
2278 * once it's been set and if we race, we'll
2279 * just end up setting it twice, which is OK.
2280 */
2281 if (submap->mapped == FALSE) {
2282 /*
2283 * This submap has never been mapped.
2284 * Set its "mapped" flag now that it
2285 * has been mapped.
2286 * This happens only for the first ever
2287 * mapping of a "submap".
2288 */
2289 vm_map_lock(submap);
2290 submap->mapped = TRUE;
2291 vm_map_unlock(submap);
2292 }
2293 *address = map_addr;
2294 }
2295 return result;
2296
2297 } else if (named_entry->is_pager) {
2298 unsigned int access;
2299 vm_prot_t protections;
2300 unsigned int wimg_mode;
2301
2302 protections = named_entry->protection & VM_PROT_ALL;
2303 access = GET_MAP_MEM(named_entry->protection);
2304
2305 object = vm_object_enter(named_entry->backing.pager,
2306 named_entry->size,
2307 named_entry->internal,
2308 FALSE,
2309 FALSE);
2310 if (object == VM_OBJECT_NULL) {
2311 named_entry_unlock(named_entry);
2312 return KERN_INVALID_OBJECT;
2313 }
2314
2315 /* JMM - drop reference on pager here */
2316
2317 /* create an extra ref for the named entry */
2318 vm_object_lock(object);
2319 vm_object_reference_locked(object);
2320 named_entry->backing.object = object;
2321 named_entry->is_pager = FALSE;
2322 named_entry_unlock(named_entry);
2323
2324 wimg_mode = object->wimg_bits;
2325
2326 if (access == MAP_MEM_IO) {
2327 wimg_mode = VM_WIMG_IO;
2328 } else if (access == MAP_MEM_COPYBACK) {
2329 wimg_mode = VM_WIMG_USE_DEFAULT;
2330 } else if (access == MAP_MEM_WTHRU) {
2331 wimg_mode = VM_WIMG_WTHRU;
2332 } else if (access == MAP_MEM_WCOMB) {
2333 wimg_mode = VM_WIMG_WCOMB;
2334 }
2335
2336 /* wait for object (if any) to be ready */
2337 if (!named_entry->internal) {
2338 while (!object->pager_ready) {
2339 vm_object_wait(
2340 object,
2341 VM_OBJECT_EVENT_PAGER_READY,
2342 THREAD_UNINT);
2343 vm_object_lock(object);
2344 }
2345 }
2346
2347 if (object->wimg_bits != wimg_mode)
2348 vm_object_change_wimg_mode(object, wimg_mode);
2349
2350 object->true_share = TRUE;
2351
2352 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2353 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2354 vm_object_unlock(object);
2355 } else {
2356 /* This is the case where we are going to map */
2357 /* an already mapped object. If the object is */
2358 /* not ready it is internal. An external */
2359 /* object cannot be mapped until it is ready */
2360 /* we can therefore avoid the ready check */
2361 /* in this case. */
2362 object = named_entry->backing.object;
2363 assert(object != VM_OBJECT_NULL);
2364 named_entry_unlock(named_entry);
2365 vm_object_reference(object);
2366 }
2367 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2368 /*
2369 * JMM - This is temporary until we unify named entries
2370 * and raw memory objects.
2371 *
2372 * Detected fake ip_kotype for a memory object. In
2373 * this case, the port isn't really a port at all, but
2374 * instead is just a raw memory object.
2375 */
2376
2377 object = vm_object_enter((memory_object_t)port,
2378 size, FALSE, FALSE, FALSE);
2379 if (object == VM_OBJECT_NULL)
2380 return KERN_INVALID_OBJECT;
2381
2382 /* wait for object (if any) to be ready */
2383 if (object != VM_OBJECT_NULL) {
2384 if (object == kernel_object) {
2385 printf("Warning: Attempt to map kernel object"
2386 " by a non-private kernel entity\n");
2387 return KERN_INVALID_OBJECT;
2388 }
2389 if (!object->pager_ready) {
2390 vm_object_lock(object);
2391
2392 while (!object->pager_ready) {
2393 vm_object_wait(object,
2394 VM_OBJECT_EVENT_PAGER_READY,
2395 THREAD_UNINT);
2396 vm_object_lock(object);
2397 }
2398 vm_object_unlock(object);
2399 }
2400 }
2401 } else {
2402 return KERN_INVALID_OBJECT;
2403 }
2404
2405 if (object != VM_OBJECT_NULL &&
2406 object->named &&
2407 object->pager != MEMORY_OBJECT_NULL &&
2408 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2409 memory_object_t pager;
2410 vm_prot_t pager_prot;
2411 kern_return_t kr;
2412
2413 /*
2414 * For "named" VM objects, let the pager know that the
2415 * memory object is being mapped. Some pagers need to keep
2416 * track of this, to know when they can reclaim the memory
2417 * object, for example.
2418 * VM calls memory_object_map() for each mapping (specifying
2419 * the protection of each mapping) and calls
2420 * memory_object_last_unmap() when all the mappings are gone.
2421 */
2422 pager_prot = max_protection;
2423 if (copy) {
2424 /*
2425 * Copy-On-Write mapping: won't modify the
2426 * memory object.
2427 */
2428 pager_prot &= ~VM_PROT_WRITE;
2429 }
2430 vm_object_lock(object);
2431 pager = object->pager;
2432 if (object->named &&
2433 pager != MEMORY_OBJECT_NULL &&
2434 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2435 assert(object->pager_ready);
2436 vm_object_mapping_wait(object, THREAD_UNINT);
2437 vm_object_mapping_begin(object);
2438 vm_object_unlock(object);
2439
2440 kr = memory_object_map(pager, pager_prot);
2441 assert(kr == KERN_SUCCESS);
2442
2443 vm_object_lock(object);
2444 vm_object_mapping_end(object);
2445 }
2446 vm_object_unlock(object);
2447 }
2448
2449 /*
2450 * Perform the copy if requested
2451 */
2452
2453 if (copy) {
2454 vm_object_t new_object;
2455 vm_object_offset_t new_offset;
2456
2457 result = vm_object_copy_strategically(object, offset, size,
2458 &new_object, &new_offset,
2459 &copy);
2460
2461
2462 if (result == KERN_MEMORY_RESTART_COPY) {
2463 boolean_t success;
2464 boolean_t src_needs_copy;
2465
2466 /*
2467 * XXX
2468 * We currently ignore src_needs_copy.
2469 * This really is the issue of how to make
2470 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2471 * non-kernel users to use. Solution forthcoming.
2472 * In the meantime, since we don't allow non-kernel
2473 * memory managers to specify symmetric copy,
2474 * we won't run into problems here.
2475 */
2476 new_object = object;
2477 new_offset = offset;
2478 success = vm_object_copy_quickly(&new_object,
2479 new_offset, size,
2480 &src_needs_copy,
2481 &copy);
2482 assert(success);
2483 result = KERN_SUCCESS;
2484 }
2485 /*
2486 * Throw away the reference to the
2487 * original object, as it won't be mapped.
2488 */
2489
2490 vm_object_deallocate(object);
2491
2492 if (result != KERN_SUCCESS)
2493 return result;
2494
2495 object = new_object;
2496 offset = new_offset;
2497 }
2498
2499 result = vm_map_enter(target_map,
2500 &map_addr, map_size,
2501 (vm_map_offset_t)mask,
2502 flags,
2503 object, offset,
2504 copy,
2505 cur_protection, max_protection, inheritance);
2506 if (result != KERN_SUCCESS)
2507 vm_object_deallocate(object);
2508 *address = map_addr;
2509 return result;
2510 }
2511
2512
2513
2514
2515 kern_return_t
2516 vm_map_enter_mem_object_control(
2517 vm_map_t target_map,
2518 vm_map_offset_t *address,
2519 vm_map_size_t initial_size,
2520 vm_map_offset_t mask,
2521 int flags,
2522 memory_object_control_t control,
2523 vm_object_offset_t offset,
2524 boolean_t copy,
2525 vm_prot_t cur_protection,
2526 vm_prot_t max_protection,
2527 vm_inherit_t inheritance)
2528 {
2529 vm_map_address_t map_addr;
2530 vm_map_size_t map_size;
2531 vm_object_t object;
2532 vm_object_size_t size;
2533 kern_return_t result;
2534 memory_object_t pager;
2535 vm_prot_t pager_prot;
2536 kern_return_t kr;
2537
2538 /*
2539 * Check arguments for validity
2540 */
2541 if ((target_map == VM_MAP_NULL) ||
2542 (cur_protection & ~VM_PROT_ALL) ||
2543 (max_protection & ~VM_PROT_ALL) ||
2544 (inheritance > VM_INHERIT_LAST_VALID) ||
2545 initial_size == 0)
2546 return KERN_INVALID_ARGUMENT;
2547
2548 map_addr = vm_map_trunc_page(*address);
2549 map_size = vm_map_round_page(initial_size);
2550 size = vm_object_round_page(initial_size);
2551
2552 object = memory_object_control_to_vm_object(control);
2553
2554 if (object == VM_OBJECT_NULL)
2555 return KERN_INVALID_OBJECT;
2556
2557 if (object == kernel_object) {
2558 printf("Warning: Attempt to map kernel object"
2559 " by a non-private kernel entity\n");
2560 return KERN_INVALID_OBJECT;
2561 }
2562
2563 vm_object_lock(object);
2564 object->ref_count++;
2565 vm_object_res_reference(object);
2566
2567 /*
2568 * For "named" VM objects, let the pager know that the
2569 * memory object is being mapped. Some pagers need to keep
2570 * track of this, to know when they can reclaim the memory
2571 * object, for example.
2572 * VM calls memory_object_map() for each mapping (specifying
2573 * the protection of each mapping) and calls
2574 * memory_object_last_unmap() when all the mappings are gone.
2575 */
2576 pager_prot = max_protection;
2577 if (copy) {
2578 pager_prot &= ~VM_PROT_WRITE;
2579 }
2580 pager = object->pager;
2581 if (object->named &&
2582 pager != MEMORY_OBJECT_NULL &&
2583 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2584 assert(object->pager_ready);
2585 vm_object_mapping_wait(object, THREAD_UNINT);
2586 vm_object_mapping_begin(object);
2587 vm_object_unlock(object);
2588
2589 kr = memory_object_map(pager, pager_prot);
2590 assert(kr == KERN_SUCCESS);
2591
2592 vm_object_lock(object);
2593 vm_object_mapping_end(object);
2594 }
2595 vm_object_unlock(object);
2596
2597 /*
2598 * Perform the copy if requested
2599 */
2600
2601 if (copy) {
2602 vm_object_t new_object;
2603 vm_object_offset_t new_offset;
2604
2605 result = vm_object_copy_strategically(object, offset, size,
2606 &new_object, &new_offset,
2607 &copy);
2608
2609
2610 if (result == KERN_MEMORY_RESTART_COPY) {
2611 boolean_t success;
2612 boolean_t src_needs_copy;
2613
2614 /*
2615 * XXX
2616 * We currently ignore src_needs_copy.
2617 * This really is the issue of how to make
2618 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2619 * non-kernel users to use. Solution forthcoming.
2620 * In the meantime, since we don't allow non-kernel
2621 * memory managers to specify symmetric copy,
2622 * we won't run into problems here.
2623 */
2624 new_object = object;
2625 new_offset = offset;
2626 success = vm_object_copy_quickly(&new_object,
2627 new_offset, size,
2628 &src_needs_copy,
2629 &copy);
2630 assert(success);
2631 result = KERN_SUCCESS;
2632 }
2633 /*
2634 * Throw away the reference to the
2635 * original object, as it won't be mapped.
2636 */
2637
2638 vm_object_deallocate(object);
2639
2640 if (result != KERN_SUCCESS)
2641 return result;
2642
2643 object = new_object;
2644 offset = new_offset;
2645 }
2646
2647 result = vm_map_enter(target_map,
2648 &map_addr, map_size,
2649 (vm_map_offset_t)mask,
2650 flags,
2651 object, offset,
2652 copy,
2653 cur_protection, max_protection, inheritance);
2654 if (result != KERN_SUCCESS)
2655 vm_object_deallocate(object);
2656 *address = map_addr;
2657
2658 return result;
2659 }
2660
2661
2662 #if VM_CPM
2663
2664 #ifdef MACH_ASSERT
2665 extern pmap_paddr_t avail_start, avail_end;
2666 #endif
2667
2668 /*
2669 * Allocate memory in the specified map, with the caveat that
2670 * the memory is physically contiguous. This call may fail
2671 * if the system can't find sufficient contiguous memory.
2672 * This call may cause or lead to heart-stopping amounts of
2673 * paging activity.
2674 *
2675 * Memory obtained from this call should be freed in the
2676 * normal way, viz., via vm_deallocate.
2677 */
2678 kern_return_t
2679 vm_map_enter_cpm(
2680 vm_map_t map,
2681 vm_map_offset_t *addr,
2682 vm_map_size_t size,
2683 int flags)
2684 {
2685 vm_object_t cpm_obj;
2686 pmap_t pmap;
2687 vm_page_t m, pages;
2688 kern_return_t kr;
2689 vm_map_offset_t va, start, end, offset;
2690 #if MACH_ASSERT
2691 vm_map_offset_t prev_addr;
2692 #endif /* MACH_ASSERT */
2693
2694 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2695
2696 if (!vm_allocate_cpm_enabled)
2697 return KERN_FAILURE;
2698
2699 if (size == 0) {
2700 *addr = 0;
2701 return KERN_SUCCESS;
2702 }
2703 if (anywhere)
2704 *addr = vm_map_min(map);
2705 else
2706 *addr = vm_map_trunc_page(*addr);
2707 size = vm_map_round_page(size);
2708
2709 /*
2710 * LP64todo - cpm_allocate should probably allow
2711 * allocations of >4GB, but not with the current
2712 * algorithm, so just cast down the size for now.
2713 */
2714 if (size > VM_MAX_ADDRESS)
2715 return KERN_RESOURCE_SHORTAGE;
2716 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2717 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2718 return kr;
2719
2720 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2721 assert(cpm_obj != VM_OBJECT_NULL);
2722 assert(cpm_obj->internal);
2723 assert(cpm_obj->size == (vm_object_size_t)size);
2724 assert(cpm_obj->can_persist == FALSE);
2725 assert(cpm_obj->pager_created == FALSE);
2726 assert(cpm_obj->pageout == FALSE);
2727 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2728
2729 /*
2730 * Insert pages into object.
2731 */
2732
2733 vm_object_lock(cpm_obj);
2734 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2735 m = pages;
2736 pages = NEXT_PAGE(m);
2737 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2738
2739 assert(!m->gobbled);
2740 assert(!m->wanted);
2741 assert(!m->pageout);
2742 assert(!m->tabled);
2743 assert(VM_PAGE_WIRED(m));
2744 /*
2745 * ENCRYPTED SWAP:
2746 * "m" is not supposed to be pageable, so it
2747 * should not be encrypted. It wouldn't be safe
2748 * to enter it in a new VM object while encrypted.
2749 */
2750 ASSERT_PAGE_DECRYPTED(m);
2751 assert(m->busy);
2752 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2753
2754 m->busy = FALSE;
2755 vm_page_insert(m, cpm_obj, offset);
2756 }
2757 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2758 vm_object_unlock(cpm_obj);
2759
2760 /*
2761 * Hang onto a reference on the object in case a
2762 * multi-threaded application for some reason decides
2763 * to deallocate the portion of the address space into
2764 * which we will insert this object.
2765 *
2766 * Unfortunately, we must insert the object now before
2767 * we can talk to the pmap module about which addresses
2768 * must be wired down. Hence, the race with a multi-
2769 * threaded app.
2770 */
2771 vm_object_reference(cpm_obj);
2772
2773 /*
2774 * Insert object into map.
2775 */
2776
2777 kr = vm_map_enter(
2778 map,
2779 addr,
2780 size,
2781 (vm_map_offset_t)0,
2782 flags,
2783 cpm_obj,
2784 (vm_object_offset_t)0,
2785 FALSE,
2786 VM_PROT_ALL,
2787 VM_PROT_ALL,
2788 VM_INHERIT_DEFAULT);
2789
2790 if (kr != KERN_SUCCESS) {
2791 /*
2792 * A CPM object doesn't have can_persist set,
2793 * so all we have to do is deallocate it to
2794 * free up these pages.
2795 */
2796 assert(cpm_obj->pager_created == FALSE);
2797 assert(cpm_obj->can_persist == FALSE);
2798 assert(cpm_obj->pageout == FALSE);
2799 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2800 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2801 vm_object_deallocate(cpm_obj); /* kill creation ref */
2802 }
2803
2804 /*
2805 * Inform the physical mapping system that the
2806 * range of addresses may not fault, so that
2807 * page tables and such can be locked down as well.
2808 */
2809 start = *addr;
2810 end = start + size;
2811 pmap = vm_map_pmap(map);
2812 pmap_pageable(pmap, start, end, FALSE);
2813
2814 /*
2815 * Enter each page into the pmap, to avoid faults.
2816 * Note that this loop could be coded more efficiently,
2817 * if the need arose, rather than looking up each page
2818 * again.
2819 */
2820 for (offset = 0, va = start; offset < size;
2821 va += PAGE_SIZE, offset += PAGE_SIZE) {
2822 int type_of_fault;
2823
2824 vm_object_lock(cpm_obj);
2825 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2826 assert(m != VM_PAGE_NULL);
2827
2828 vm_page_zero_fill(m);
2829
2830 type_of_fault = DBG_ZERO_FILL_FAULT;
2831
2832 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2833 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2834 &type_of_fault);
2835
2836 vm_object_unlock(cpm_obj);
2837 }
2838
2839 #if MACH_ASSERT
2840 /*
2841 * Verify ordering in address space.
2842 */
2843 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2844 vm_object_lock(cpm_obj);
2845 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2846 vm_object_unlock(cpm_obj);
2847 if (m == VM_PAGE_NULL)
2848 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2849 cpm_obj, offset);
2850 assert(m->tabled);
2851 assert(!m->busy);
2852 assert(!m->wanted);
2853 assert(!m->fictitious);
2854 assert(!m->private);
2855 assert(!m->absent);
2856 assert(!m->error);
2857 assert(!m->cleaning);
2858 assert(!m->precious);
2859 assert(!m->clustered);
2860 if (offset != 0) {
2861 if (m->phys_page != prev_addr + 1) {
2862 printf("start 0x%x end 0x%x va 0x%x\n",
2863 start, end, va);
2864 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2865 printf("m 0x%x prev_address 0x%x\n", m,
2866 prev_addr);
2867 panic("vm_allocate_cpm: pages not contig!");
2868 }
2869 }
2870 prev_addr = m->phys_page;
2871 }
2872 #endif /* MACH_ASSERT */
2873
2874 vm_object_deallocate(cpm_obj); /* kill extra ref */
2875
2876 return kr;
2877 }
2878
2879
2880 #else /* VM_CPM */
2881
2882 /*
2883 * Interface is defined in all cases, but unless the kernel
2884 * is built explicitly for this option, the interface does
2885 * nothing.
2886 */
2887
2888 kern_return_t
2889 vm_map_enter_cpm(
2890 __unused vm_map_t map,
2891 __unused vm_map_offset_t *addr,
2892 __unused vm_map_size_t size,
2893 __unused int flags)
2894 {
2895 return KERN_FAILURE;
2896 }
2897 #endif /* VM_CPM */
2898
2899 /* Not used without nested pmaps */
2900 #ifndef NO_NESTED_PMAP
2901 /*
2902 * Clip and unnest a portion of a nested submap mapping.
2903 */
2904
2905
2906 static void
2907 vm_map_clip_unnest(
2908 vm_map_t map,
2909 vm_map_entry_t entry,
2910 vm_map_offset_t start_unnest,
2911 vm_map_offset_t end_unnest)
2912 {
2913 vm_map_offset_t old_start_unnest = start_unnest;
2914 vm_map_offset_t old_end_unnest = end_unnest;
2915
2916 assert(entry->is_sub_map);
2917 assert(entry->object.sub_map != NULL);
2918
2919 /*
2920 * Query the platform for the optimal unnest range.
2921 * DRK: There's some duplication of effort here, since
2922 * callers may have adjusted the range to some extent. This
2923 * routine was introduced to support 1GiB subtree nesting
2924 * for x86 platforms, which can also nest on 2MiB boundaries
2925 * depending on size/alignment.
2926 */
2927 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2928 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2929 }
2930
2931 if (entry->vme_start > start_unnest ||
2932 entry->vme_end < end_unnest) {
2933 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2934 "bad nested entry: start=0x%llx end=0x%llx\n",
2935 (long long)start_unnest, (long long)end_unnest,
2936 (long long)entry->vme_start, (long long)entry->vme_end);
2937 }
2938
2939 if (start_unnest > entry->vme_start) {
2940 _vm_map_clip_start(&map->hdr,
2941 entry,
2942 start_unnest);
2943 vm_map_store_update_first_free(map, map->first_free);
2944 }
2945 if (entry->vme_end > end_unnest) {
2946 _vm_map_clip_end(&map->hdr,
2947 entry,
2948 end_unnest);
2949 vm_map_store_update_first_free(map, map->first_free);
2950 }
2951
2952 pmap_unnest(map->pmap,
2953 entry->vme_start,
2954 entry->vme_end - entry->vme_start);
2955 if ((map->mapped) && (map->ref_count)) {
2956 /* clean up parent map/maps */
2957 vm_map_submap_pmap_clean(
2958 map, entry->vme_start,
2959 entry->vme_end,
2960 entry->object.sub_map,
2961 entry->offset);
2962 }
2963 entry->use_pmap = FALSE;
2964 }
2965 #endif /* NO_NESTED_PMAP */
2966
2967 /*
2968 * vm_map_clip_start: [ internal use only ]
2969 *
2970 * Asserts that the given entry begins at or after
2971 * the specified address; if necessary,
2972 * it splits the entry into two.
2973 */
2974 static void
2975 vm_map_clip_start(
2976 vm_map_t map,
2977 vm_map_entry_t entry,
2978 vm_map_offset_t startaddr)
2979 {
2980 #ifndef NO_NESTED_PMAP
2981 if (entry->use_pmap &&
2982 startaddr >= entry->vme_start) {
2983 vm_map_offset_t start_unnest, end_unnest;
2984
2985 /*
2986 * Make sure "startaddr" is no longer in a nested range
2987 * before we clip. Unnest only the minimum range the platform
2988 * can handle.
2989 * vm_map_clip_unnest may perform additional adjustments to
2990 * the unnest range.
2991 */
2992 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2993 end_unnest = start_unnest + pmap_nesting_size_min;
2994 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2995 }
2996 #endif /* NO_NESTED_PMAP */
2997 if (startaddr > entry->vme_start) {
2998 if (entry->object.vm_object &&
2999 !entry->is_sub_map &&
3000 entry->object.vm_object->phys_contiguous) {
3001 pmap_remove(map->pmap,
3002 (addr64_t)(entry->vme_start),
3003 (addr64_t)(entry->vme_end));
3004 }
3005 _vm_map_clip_start(&map->hdr, entry, startaddr);
3006 vm_map_store_update_first_free(map, map->first_free);
3007 }
3008 }
3009
3010
3011 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3012 MACRO_BEGIN \
3013 if ((startaddr) > (entry)->vme_start) \
3014 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3015 MACRO_END
3016
3017 /*
3018 * This routine is called only when it is known that
3019 * the entry must be split.
3020 */
3021 static void
3022 _vm_map_clip_start(
3023 register struct vm_map_header *map_header,
3024 register vm_map_entry_t entry,
3025 register vm_map_offset_t start)
3026 {
3027 register vm_map_entry_t new_entry;
3028
3029 /*
3030 * Split off the front portion --
3031 * note that we must insert the new
3032 * entry BEFORE this one, so that
3033 * this entry has the specified starting
3034 * address.
3035 */
3036
3037 new_entry = _vm_map_entry_create(map_header);
3038 vm_map_entry_copy_full(new_entry, entry);
3039
3040 new_entry->vme_end = start;
3041 entry->offset += (start - entry->vme_start);
3042 entry->vme_start = start;
3043
3044 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3045
3046 if (entry->is_sub_map)
3047 vm_map_reference(new_entry->object.sub_map);
3048 else
3049 vm_object_reference(new_entry->object.vm_object);
3050 }
3051
3052
3053 /*
3054 * vm_map_clip_end: [ internal use only ]
3055 *
3056 * Asserts that the given entry ends at or before
3057 * the specified address; if necessary,
3058 * it splits the entry into two.
3059 */
3060 static void
3061 vm_map_clip_end(
3062 vm_map_t map,
3063 vm_map_entry_t entry,
3064 vm_map_offset_t endaddr)
3065 {
3066 if (endaddr > entry->vme_end) {
3067 /*
3068 * Within the scope of this clipping, limit "endaddr" to
3069 * the end of this map entry...
3070 */
3071 endaddr = entry->vme_end;
3072 }
3073 #ifndef NO_NESTED_PMAP
3074 if (entry->use_pmap) {
3075 vm_map_offset_t start_unnest, end_unnest;
3076
3077 /*
3078 * Make sure the range between the start of this entry and
3079 * the new "endaddr" is no longer nested before we clip.
3080 * Unnest only the minimum range the platform can handle.
3081 * vm_map_clip_unnest may perform additional adjustments to
3082 * the unnest range.
3083 */
3084 start_unnest = entry->vme_start;
3085 end_unnest =
3086 (endaddr + pmap_nesting_size_min - 1) &
3087 ~(pmap_nesting_size_min - 1);
3088 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3089 }
3090 #endif /* NO_NESTED_PMAP */
3091 if (endaddr < entry->vme_end) {
3092 if (entry->object.vm_object &&
3093 !entry->is_sub_map &&
3094 entry->object.vm_object->phys_contiguous) {
3095 pmap_remove(map->pmap,
3096 (addr64_t)(entry->vme_start),
3097 (addr64_t)(entry->vme_end));
3098 }
3099 _vm_map_clip_end(&map->hdr, entry, endaddr);
3100 vm_map_store_update_first_free(map, map->first_free);
3101 }
3102 }
3103
3104
3105 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3106 MACRO_BEGIN \
3107 if ((endaddr) < (entry)->vme_end) \
3108 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3109 MACRO_END
3110
3111 /*
3112 * This routine is called only when it is known that
3113 * the entry must be split.
3114 */
3115 static void
3116 _vm_map_clip_end(
3117 register struct vm_map_header *map_header,
3118 register vm_map_entry_t entry,
3119 register vm_map_offset_t end)
3120 {
3121 register vm_map_entry_t new_entry;
3122
3123 /*
3124 * Create a new entry and insert it
3125 * AFTER the specified entry
3126 */
3127
3128 new_entry = _vm_map_entry_create(map_header);
3129 vm_map_entry_copy_full(new_entry, entry);
3130
3131 new_entry->vme_start = entry->vme_end = end;
3132 new_entry->offset += (end - entry->vme_start);
3133
3134 _vm_map_store_entry_link(map_header, entry, new_entry);
3135
3136 if (entry->is_sub_map)
3137 vm_map_reference(new_entry->object.sub_map);
3138 else
3139 vm_object_reference(new_entry->object.vm_object);
3140 }
3141
3142
3143 /*
3144 * VM_MAP_RANGE_CHECK: [ internal use only ]
3145 *
3146 * Asserts that the starting and ending region
3147 * addresses fall within the valid range of the map.
3148 */
3149 #define VM_MAP_RANGE_CHECK(map, start, end) \
3150 MACRO_BEGIN \
3151 if (start < vm_map_min(map)) \
3152 start = vm_map_min(map); \
3153 if (end > vm_map_max(map)) \
3154 end = vm_map_max(map); \
3155 if (start > end) \
3156 start = end; \
3157 MACRO_END
3158
3159 /*
3160 * vm_map_range_check: [ internal use only ]
3161 *
3162 * Check that the region defined by the specified start and
3163 * end addresses are wholly contained within a single map
3164 * entry or set of adjacent map entries of the spacified map,
3165 * i.e. the specified region contains no unmapped space.
3166 * If any or all of the region is unmapped, FALSE is returned.
3167 * Otherwise, TRUE is returned and if the output argument 'entry'
3168 * is not NULL it points to the map entry containing the start
3169 * of the region.
3170 *
3171 * The map is locked for reading on entry and is left locked.
3172 */
3173 static boolean_t
3174 vm_map_range_check(
3175 register vm_map_t map,
3176 register vm_map_offset_t start,
3177 register vm_map_offset_t end,
3178 vm_map_entry_t *entry)
3179 {
3180 vm_map_entry_t cur;
3181 register vm_map_offset_t prev;
3182
3183 /*
3184 * Basic sanity checks first
3185 */
3186 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3187 return (FALSE);
3188
3189 /*
3190 * Check first if the region starts within a valid
3191 * mapping for the map.
3192 */
3193 if (!vm_map_lookup_entry(map, start, &cur))
3194 return (FALSE);
3195
3196 /*
3197 * Optimize for the case that the region is contained
3198 * in a single map entry.
3199 */
3200 if (entry != (vm_map_entry_t *) NULL)
3201 *entry = cur;
3202 if (end <= cur->vme_end)
3203 return (TRUE);
3204
3205 /*
3206 * If the region is not wholly contained within a
3207 * single entry, walk the entries looking for holes.
3208 */
3209 prev = cur->vme_end;
3210 cur = cur->vme_next;
3211 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3212 if (end <= cur->vme_end)
3213 return (TRUE);
3214 prev = cur->vme_end;
3215 cur = cur->vme_next;
3216 }
3217 return (FALSE);
3218 }
3219
3220 /*
3221 * vm_map_submap: [ kernel use only ]
3222 *
3223 * Mark the given range as handled by a subordinate map.
3224 *
3225 * This range must have been created with vm_map_find using
3226 * the vm_submap_object, and no other operations may have been
3227 * performed on this range prior to calling vm_map_submap.
3228 *
3229 * Only a limited number of operations can be performed
3230 * within this rage after calling vm_map_submap:
3231 * vm_fault
3232 * [Don't try vm_map_copyin!]
3233 *
3234 * To remove a submapping, one must first remove the
3235 * range from the superior map, and then destroy the
3236 * submap (if desired). [Better yet, don't try it.]
3237 */
3238 kern_return_t
3239 vm_map_submap(
3240 vm_map_t map,
3241 vm_map_offset_t start,
3242 vm_map_offset_t end,
3243 vm_map_t submap,
3244 vm_map_offset_t offset,
3245 #ifdef NO_NESTED_PMAP
3246 __unused
3247 #endif /* NO_NESTED_PMAP */
3248 boolean_t use_pmap)
3249 {
3250 vm_map_entry_t entry;
3251 register kern_return_t result = KERN_INVALID_ARGUMENT;
3252 register vm_object_t object;
3253
3254 vm_map_lock(map);
3255
3256 if (! vm_map_lookup_entry(map, start, &entry)) {
3257 entry = entry->vme_next;
3258 }
3259
3260 if (entry == vm_map_to_entry(map) ||
3261 entry->is_sub_map) {
3262 vm_map_unlock(map);
3263 return KERN_INVALID_ARGUMENT;
3264 }
3265
3266 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3267 vm_map_clip_start(map, entry, start);
3268 vm_map_clip_end(map, entry, end);
3269
3270 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3271 (!entry->is_sub_map) &&
3272 ((object = entry->object.vm_object) == vm_submap_object) &&
3273 (object->resident_page_count == 0) &&
3274 (object->copy == VM_OBJECT_NULL) &&
3275 (object->shadow == VM_OBJECT_NULL) &&
3276 (!object->pager_created)) {
3277 entry->offset = (vm_object_offset_t)offset;
3278 entry->object.vm_object = VM_OBJECT_NULL;
3279 vm_object_deallocate(object);
3280 entry->is_sub_map = TRUE;
3281 entry->object.sub_map = submap;
3282 vm_map_reference(submap);
3283 submap->mapped = TRUE;
3284
3285 #ifndef NO_NESTED_PMAP
3286 if (use_pmap) {
3287 /* nest if platform code will allow */
3288 if(submap->pmap == NULL) {
3289 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3290 if(submap->pmap == PMAP_NULL) {
3291 vm_map_unlock(map);
3292 return(KERN_NO_SPACE);
3293 }
3294 }
3295 result = pmap_nest(map->pmap,
3296 (entry->object.sub_map)->pmap,
3297 (addr64_t)start,
3298 (addr64_t)start,
3299 (uint64_t)(end - start));
3300 if(result)
3301 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3302 entry->use_pmap = TRUE;
3303 }
3304 #else /* NO_NESTED_PMAP */
3305 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3306 #endif /* NO_NESTED_PMAP */
3307 result = KERN_SUCCESS;
3308 }
3309 vm_map_unlock(map);
3310
3311 return(result);
3312 }
3313
3314 /*
3315 * vm_map_protect:
3316 *
3317 * Sets the protection of the specified address
3318 * region in the target map. If "set_max" is
3319 * specified, the maximum protection is to be set;
3320 * otherwise, only the current protection is affected.
3321 */
3322 kern_return_t
3323 vm_map_protect(
3324 register vm_map_t map,
3325 register vm_map_offset_t start,
3326 register vm_map_offset_t end,
3327 register vm_prot_t new_prot,
3328 register boolean_t set_max)
3329 {
3330 register vm_map_entry_t current;
3331 register vm_map_offset_t prev;
3332 vm_map_entry_t entry;
3333 vm_prot_t new_max;
3334
3335 XPR(XPR_VM_MAP,
3336 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3337 map, start, end, new_prot, set_max);
3338
3339 vm_map_lock(map);
3340
3341 /* LP64todo - remove this check when vm_map_commpage64()
3342 * no longer has to stuff in a map_entry for the commpage
3343 * above the map's max_offset.
3344 */
3345 if (start >= map->max_offset) {
3346 vm_map_unlock(map);
3347 return(KERN_INVALID_ADDRESS);
3348 }
3349
3350 while(1) {
3351 /*
3352 * Lookup the entry. If it doesn't start in a valid
3353 * entry, return an error.
3354 */
3355 if (! vm_map_lookup_entry(map, start, &entry)) {
3356 vm_map_unlock(map);
3357 return(KERN_INVALID_ADDRESS);
3358 }
3359
3360 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3361 start = SUPERPAGE_ROUND_DOWN(start);
3362 continue;
3363 }
3364 break;
3365 }
3366 if (entry->superpage_size)
3367 end = SUPERPAGE_ROUND_UP(end);
3368
3369 /*
3370 * Make a first pass to check for protection and address
3371 * violations.
3372 */
3373
3374 current = entry;
3375 prev = current->vme_start;
3376 while ((current != vm_map_to_entry(map)) &&
3377 (current->vme_start < end)) {
3378
3379 /*
3380 * If there is a hole, return an error.
3381 */
3382 if (current->vme_start != prev) {
3383 vm_map_unlock(map);
3384 return(KERN_INVALID_ADDRESS);
3385 }
3386
3387 new_max = current->max_protection;
3388 if(new_prot & VM_PROT_COPY) {
3389 new_max |= VM_PROT_WRITE;
3390 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3391 vm_map_unlock(map);
3392 return(KERN_PROTECTION_FAILURE);
3393 }
3394 } else {
3395 if ((new_prot & new_max) != new_prot) {
3396 vm_map_unlock(map);
3397 return(KERN_PROTECTION_FAILURE);
3398 }
3399 }
3400
3401 #if CONFIG_EMBEDDED
3402 if (new_prot & VM_PROT_WRITE) {
3403 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3404 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3405 new_prot &= ~VM_PROT_EXECUTE;
3406 }
3407 }
3408 #endif
3409
3410 prev = current->vme_end;
3411 current = current->vme_next;
3412 }
3413 if (end > prev) {
3414 vm_map_unlock(map);
3415 return(KERN_INVALID_ADDRESS);
3416 }
3417
3418 /*
3419 * Go back and fix up protections.
3420 * Clip to start here if the range starts within
3421 * the entry.
3422 */
3423
3424 current = entry;
3425 if (current != vm_map_to_entry(map)) {
3426 /* clip and unnest if necessary */
3427 vm_map_clip_start(map, current, start);
3428 }
3429
3430 while ((current != vm_map_to_entry(map)) &&
3431 (current->vme_start < end)) {
3432
3433 vm_prot_t old_prot;
3434
3435 vm_map_clip_end(map, current, end);
3436
3437 assert(!current->use_pmap); /* clipping did unnest if needed */
3438
3439 old_prot = current->protection;
3440
3441 if(new_prot & VM_PROT_COPY) {
3442 /* caller is asking specifically to copy the */
3443 /* mapped data, this implies that max protection */
3444 /* will include write. Caller must be prepared */
3445 /* for loss of shared memory communication in the */
3446 /* target area after taking this step */
3447
3448 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3449 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3450 current->offset = 0;
3451 }
3452 current->needs_copy = TRUE;
3453 current->max_protection |= VM_PROT_WRITE;
3454 }
3455
3456 if (set_max)
3457 current->protection =
3458 (current->max_protection =
3459 new_prot & ~VM_PROT_COPY) &
3460 old_prot;
3461 else
3462 current->protection = new_prot & ~VM_PROT_COPY;
3463
3464 /*
3465 * Update physical map if necessary.
3466 * If the request is to turn off write protection,
3467 * we won't do it for real (in pmap). This is because
3468 * it would cause copy-on-write to fail. We've already
3469 * set, the new protection in the map, so if a
3470 * write-protect fault occurred, it will be fixed up
3471 * properly, COW or not.
3472 */
3473 if (current->protection != old_prot) {
3474 /* Look one level in we support nested pmaps */
3475 /* from mapped submaps which are direct entries */
3476 /* in our map */
3477
3478 vm_prot_t prot;
3479
3480 prot = current->protection & ~VM_PROT_WRITE;
3481
3482 if (override_nx(map, current->alias) && prot)
3483 prot |= VM_PROT_EXECUTE;
3484
3485 if (current->is_sub_map && current->use_pmap) {
3486 pmap_protect(current->object.sub_map->pmap,
3487 current->vme_start,
3488 current->vme_end,
3489 prot);
3490 } else {
3491 pmap_protect(map->pmap,
3492 current->vme_start,
3493 current->vme_end,
3494 prot);
3495 }
3496 }
3497 current = current->vme_next;
3498 }
3499
3500 current = entry;
3501 while ((current != vm_map_to_entry(map)) &&
3502 (current->vme_start <= end)) {
3503 vm_map_simplify_entry(map, current);
3504 current = current->vme_next;
3505 }
3506
3507 vm_map_unlock(map);
3508 return(KERN_SUCCESS);
3509 }
3510
3511 /*
3512 * vm_map_inherit:
3513 *
3514 * Sets the inheritance of the specified address
3515 * range in the target map. Inheritance
3516 * affects how the map will be shared with
3517 * child maps at the time of vm_map_fork.
3518 */
3519 kern_return_t
3520 vm_map_inherit(
3521 register vm_map_t map,
3522 register vm_map_offset_t start,
3523 register vm_map_offset_t end,
3524 register vm_inherit_t new_inheritance)
3525 {
3526 register vm_map_entry_t entry;
3527 vm_map_entry_t temp_entry;
3528
3529 vm_map_lock(map);
3530
3531 VM_MAP_RANGE_CHECK(map, start, end);
3532
3533 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3534 entry = temp_entry;
3535 }
3536 else {
3537 temp_entry = temp_entry->vme_next;
3538 entry = temp_entry;
3539 }
3540
3541 /* first check entire range for submaps which can't support the */
3542 /* given inheritance. */
3543 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3544 if(entry->is_sub_map) {
3545 if(new_inheritance == VM_INHERIT_COPY) {
3546 vm_map_unlock(map);
3547 return(KERN_INVALID_ARGUMENT);
3548 }
3549 }
3550
3551 entry = entry->vme_next;
3552 }
3553
3554 entry = temp_entry;
3555 if (entry != vm_map_to_entry(map)) {
3556 /* clip and unnest if necessary */
3557 vm_map_clip_start(map, entry, start);
3558 }
3559
3560 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3561 vm_map_clip_end(map, entry, end);
3562 assert(!entry->use_pmap); /* clip did unnest if needed */
3563
3564 entry->inheritance = new_inheritance;
3565
3566 entry = entry->vme_next;
3567 }
3568
3569 vm_map_unlock(map);
3570 return(KERN_SUCCESS);
3571 }
3572
3573 /*
3574 * Update the accounting for the amount of wired memory in this map. If the user has
3575 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3576 */
3577
3578 static kern_return_t
3579 add_wire_counts(
3580 vm_map_t map,
3581 vm_map_entry_t entry,
3582 boolean_t user_wire)
3583 {
3584 vm_map_size_t size;
3585
3586 if (user_wire) {
3587 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
3588
3589 /*
3590 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3591 * this map entry.
3592 */
3593
3594 if (entry->user_wired_count == 0) {
3595 size = entry->vme_end - entry->vme_start;
3596
3597 /*
3598 * Since this is the first time the user is wiring this map entry, check to see if we're
3599 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3600 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3601 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3602 * limit, then we fail.
3603 */
3604
3605 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3606 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3607 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3608 return KERN_RESOURCE_SHORTAGE;
3609
3610 /*
3611 * The first time the user wires an entry, we also increment the wired_count and add this to
3612 * the total that has been wired in the map.
3613 */
3614
3615 if (entry->wired_count >= MAX_WIRE_COUNT)
3616 return KERN_FAILURE;
3617
3618 entry->wired_count++;
3619 map->user_wire_size += size;
3620 }
3621
3622 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3623 return KERN_FAILURE;
3624
3625 entry->user_wired_count++;
3626
3627 } else {
3628
3629 /*
3630 * The kernel's wiring the memory. Just bump the count and continue.
3631 */
3632
3633 if (entry->wired_count >= MAX_WIRE_COUNT)
3634 panic("vm_map_wire: too many wirings");
3635
3636 entry->wired_count++;
3637 }
3638
3639 return KERN_SUCCESS;
3640 }
3641
3642 /*
3643 * Update the memory wiring accounting now that the given map entry is being unwired.
3644 */
3645
3646 static void
3647 subtract_wire_counts(
3648 vm_map_t map,
3649 vm_map_entry_t entry,
3650 boolean_t user_wire)
3651 {
3652
3653 if (user_wire) {
3654
3655 /*
3656 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3657 */
3658
3659 if (entry->user_wired_count == 1) {
3660
3661 /*
3662 * We're removing the last user wire reference. Decrement the wired_count and the total
3663 * user wired memory for this map.
3664 */
3665
3666 assert(entry->wired_count >= 1);
3667 entry->wired_count--;
3668 map->user_wire_size -= entry->vme_end - entry->vme_start;
3669 }
3670
3671 assert(entry->user_wired_count >= 1);
3672 entry->user_wired_count--;
3673
3674 } else {
3675
3676 /*
3677 * The kernel is unwiring the memory. Just update the count.
3678 */
3679
3680 assert(entry->wired_count >= 1);
3681 entry->wired_count--;
3682 }
3683 }
3684
3685 /*
3686 * vm_map_wire:
3687 *
3688 * Sets the pageability of the specified address range in the
3689 * target map as wired. Regions specified as not pageable require
3690 * locked-down physical memory and physical page maps. The
3691 * access_type variable indicates types of accesses that must not
3692 * generate page faults. This is checked against protection of
3693 * memory being locked-down.
3694 *
3695 * The map must not be locked, but a reference must remain to the
3696 * map throughout the call.
3697 */
3698 static kern_return_t
3699 vm_map_wire_nested(
3700 register vm_map_t map,
3701 register vm_map_offset_t start,
3702 register vm_map_offset_t end,
3703 register vm_prot_t access_type,
3704 boolean_t user_wire,
3705 pmap_t map_pmap,
3706 vm_map_offset_t pmap_addr)
3707 {
3708 register vm_map_entry_t entry;
3709 struct vm_map_entry *first_entry, tmp_entry;
3710 vm_map_t real_map;
3711 register vm_map_offset_t s,e;
3712 kern_return_t rc;
3713 boolean_t need_wakeup;
3714 boolean_t main_map = FALSE;
3715 wait_interrupt_t interruptible_state;
3716 thread_t cur_thread;
3717 unsigned int last_timestamp;
3718 vm_map_size_t size;
3719
3720 vm_map_lock(map);
3721 if(map_pmap == NULL)
3722 main_map = TRUE;
3723 last_timestamp = map->timestamp;
3724
3725 VM_MAP_RANGE_CHECK(map, start, end);
3726 assert(page_aligned(start));
3727 assert(page_aligned(end));
3728 if (start == end) {
3729 /* We wired what the caller asked for, zero pages */
3730 vm_map_unlock(map);
3731 return KERN_SUCCESS;
3732 }
3733
3734 need_wakeup = FALSE;
3735 cur_thread = current_thread();
3736
3737 s = start;
3738 rc = KERN_SUCCESS;
3739
3740 if (vm_map_lookup_entry(map, s, &first_entry)) {
3741 entry = first_entry;
3742 /*
3743 * vm_map_clip_start will be done later.
3744 * We don't want to unnest any nested submaps here !
3745 */
3746 } else {
3747 /* Start address is not in map */
3748 rc = KERN_INVALID_ADDRESS;
3749 goto done;
3750 }
3751
3752 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3753 /*
3754 * At this point, we have wired from "start" to "s".
3755 * We still need to wire from "s" to "end".
3756 *
3757 * "entry" hasn't been clipped, so it could start before "s"
3758 * and/or end after "end".
3759 */
3760
3761 /* "e" is how far we want to wire in this entry */
3762 e = entry->vme_end;
3763 if (e > end)
3764 e = end;
3765
3766 /*
3767 * If another thread is wiring/unwiring this entry then
3768 * block after informing other thread to wake us up.
3769 */
3770 if (entry->in_transition) {
3771 wait_result_t wait_result;
3772
3773 /*
3774 * We have not clipped the entry. Make sure that
3775 * the start address is in range so that the lookup
3776 * below will succeed.
3777 * "s" is the current starting point: we've already
3778 * wired from "start" to "s" and we still have
3779 * to wire from "s" to "end".
3780 */
3781
3782 entry->needs_wakeup = TRUE;
3783
3784 /*
3785 * wake up anybody waiting on entries that we have
3786 * already wired.
3787 */
3788 if (need_wakeup) {
3789 vm_map_entry_wakeup(map);
3790 need_wakeup = FALSE;
3791 }
3792 /*
3793 * User wiring is interruptible
3794 */
3795 wait_result = vm_map_entry_wait(map,
3796 (user_wire) ? THREAD_ABORTSAFE :
3797 THREAD_UNINT);
3798 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3799 /*
3800 * undo the wirings we have done so far
3801 * We do not clear the needs_wakeup flag,
3802 * because we cannot tell if we were the
3803 * only one waiting.
3804 */
3805 rc = KERN_FAILURE;
3806 goto done;
3807 }
3808
3809 /*
3810 * Cannot avoid a lookup here. reset timestamp.
3811 */
3812 last_timestamp = map->timestamp;
3813
3814 /*
3815 * The entry could have been clipped, look it up again.
3816 * Worse that can happen is, it may not exist anymore.
3817 */
3818 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3819 /*
3820 * User: undo everything upto the previous
3821 * entry. let vm_map_unwire worry about
3822 * checking the validity of the range.
3823 */
3824 rc = KERN_FAILURE;
3825 goto done;
3826 }
3827 entry = first_entry;
3828 continue;
3829 }
3830
3831 if (entry->is_sub_map) {
3832 vm_map_offset_t sub_start;
3833 vm_map_offset_t sub_end;
3834 vm_map_offset_t local_start;
3835 vm_map_offset_t local_end;
3836 pmap_t pmap;
3837
3838 vm_map_clip_start(map, entry, s);
3839 vm_map_clip_end(map, entry, end);
3840
3841 sub_start = entry->offset;
3842 sub_end = entry->vme_end;
3843 sub_end += entry->offset - entry->vme_start;
3844
3845 local_end = entry->vme_end;
3846 if(map_pmap == NULL) {
3847 vm_object_t object;
3848 vm_object_offset_t offset;
3849 vm_prot_t prot;
3850 boolean_t wired;
3851 vm_map_entry_t local_entry;
3852 vm_map_version_t version;
3853 vm_map_t lookup_map;
3854
3855 if(entry->use_pmap) {
3856 pmap = entry->object.sub_map->pmap;
3857 /* ppc implementation requires that */
3858 /* submaps pmap address ranges line */
3859 /* up with parent map */
3860 #ifdef notdef
3861 pmap_addr = sub_start;
3862 #endif
3863 pmap_addr = s;
3864 } else {
3865 pmap = map->pmap;
3866 pmap_addr = s;
3867 }
3868
3869 if (entry->wired_count) {
3870 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3871 goto done;
3872
3873 /*
3874 * The map was not unlocked:
3875 * no need to goto re-lookup.
3876 * Just go directly to next entry.
3877 */
3878 entry = entry->vme_next;
3879 s = entry->vme_start;
3880 continue;
3881
3882 }
3883
3884 /* call vm_map_lookup_locked to */
3885 /* cause any needs copy to be */
3886 /* evaluated */
3887 local_start = entry->vme_start;
3888 lookup_map = map;
3889 vm_map_lock_write_to_read(map);
3890 if(vm_map_lookup_locked(
3891 &lookup_map, local_start,
3892 access_type,
3893 OBJECT_LOCK_EXCLUSIVE,
3894 &version, &object,
3895 &offset, &prot, &wired,
3896 NULL,
3897 &real_map)) {
3898
3899 vm_map_unlock_read(lookup_map);
3900 vm_map_unwire(map, start,
3901 s, user_wire);
3902 return(KERN_FAILURE);
3903 }
3904 if(real_map != lookup_map)
3905 vm_map_unlock(real_map);
3906 vm_map_unlock_read(lookup_map);
3907 vm_map_lock(map);
3908 vm_object_unlock(object);
3909
3910 /* we unlocked, so must re-lookup */
3911 if (!vm_map_lookup_entry(map,
3912 local_start,
3913 &local_entry)) {
3914 rc = KERN_FAILURE;
3915 goto done;
3916 }
3917
3918 /*
3919 * entry could have been "simplified",
3920 * so re-clip
3921 */
3922 entry = local_entry;
3923 assert(s == local_start);
3924 vm_map_clip_start(map, entry, s);
3925 vm_map_clip_end(map, entry, end);
3926 /* re-compute "e" */
3927 e = entry->vme_end;
3928 if (e > end)
3929 e = end;
3930
3931 /* did we have a change of type? */
3932 if (!entry->is_sub_map) {
3933 last_timestamp = map->timestamp;
3934 continue;
3935 }
3936 } else {
3937 local_start = entry->vme_start;
3938 pmap = map_pmap;
3939 }
3940
3941 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3942 goto done;
3943
3944 entry->in_transition = TRUE;
3945
3946 vm_map_unlock(map);
3947 rc = vm_map_wire_nested(entry->object.sub_map,
3948 sub_start, sub_end,
3949 access_type,
3950 user_wire, pmap, pmap_addr);
3951 vm_map_lock(map);
3952
3953 /*
3954 * Find the entry again. It could have been clipped
3955 * after we unlocked the map.
3956 */
3957 if (!vm_map_lookup_entry(map, local_start,
3958 &first_entry))
3959 panic("vm_map_wire: re-lookup failed");
3960 entry = first_entry;
3961
3962 assert(local_start == s);
3963 /* re-compute "e" */
3964 e = entry->vme_end;
3965 if (e > end)
3966 e = end;
3967
3968 last_timestamp = map->timestamp;
3969 while ((entry != vm_map_to_entry(map)) &&
3970 (entry->vme_start < e)) {
3971 assert(entry->in_transition);
3972 entry->in_transition = FALSE;
3973 if (entry->needs_wakeup) {
3974 entry->needs_wakeup = FALSE;
3975 need_wakeup = TRUE;
3976 }
3977 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3978 subtract_wire_counts(map, entry, user_wire);
3979 }
3980 entry = entry->vme_next;
3981 }
3982 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3983 goto done;
3984 }
3985
3986 /* no need to relookup again */
3987 s = entry->vme_start;
3988 continue;
3989 }
3990
3991 /*
3992 * If this entry is already wired then increment
3993 * the appropriate wire reference count.
3994 */
3995 if (entry->wired_count) {
3996 /*
3997 * entry is already wired down, get our reference
3998 * after clipping to our range.
3999 */
4000 vm_map_clip_start(map, entry, s);
4001 vm_map_clip_end(map, entry, end);
4002
4003 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4004 goto done;
4005
4006 /* map was not unlocked: no need to relookup */
4007 entry = entry->vme_next;
4008 s = entry->vme_start;
4009 continue;
4010 }
4011
4012 /*
4013 * Unwired entry or wire request transmitted via submap
4014 */
4015
4016
4017 /*
4018 * Perform actions of vm_map_lookup that need the write
4019 * lock on the map: create a shadow object for a
4020 * copy-on-write region, or an object for a zero-fill
4021 * region.
4022 */
4023 size = entry->vme_end - entry->vme_start;
4024 /*
4025 * If wiring a copy-on-write page, we need to copy it now
4026 * even if we're only (currently) requesting read access.
4027 * This is aggressive, but once it's wired we can't move it.
4028 */
4029 if (entry->needs_copy) {
4030 vm_object_shadow(&entry->object.vm_object,
4031 &entry->offset, size);
4032 entry->needs_copy = FALSE;
4033 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4034 entry->object.vm_object = vm_object_allocate(size);
4035 entry->offset = (vm_object_offset_t)0;
4036 }
4037
4038 vm_map_clip_start(map, entry, s);
4039 vm_map_clip_end(map, entry, end);
4040
4041 /* re-compute "e" */
4042 e = entry->vme_end;
4043 if (e > end)
4044 e = end;
4045
4046 /*
4047 * Check for holes and protection mismatch.
4048 * Holes: Next entry should be contiguous unless this
4049 * is the end of the region.
4050 * Protection: Access requested must be allowed, unless
4051 * wiring is by protection class
4052 */
4053 if ((entry->vme_end < end) &&
4054 ((entry->vme_next == vm_map_to_entry(map)) ||
4055 (entry->vme_next->vme_start > entry->vme_end))) {
4056 /* found a hole */
4057 rc = KERN_INVALID_ADDRESS;
4058 goto done;
4059 }
4060 if ((entry->protection & access_type) != access_type) {
4061 /* found a protection problem */
4062 rc = KERN_PROTECTION_FAILURE;
4063 goto done;
4064 }
4065
4066 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4067
4068 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4069 goto done;
4070
4071 entry->in_transition = TRUE;
4072
4073 /*
4074 * This entry might get split once we unlock the map.
4075 * In vm_fault_wire(), we need the current range as
4076 * defined by this entry. In order for this to work
4077 * along with a simultaneous clip operation, we make a
4078 * temporary copy of this entry and use that for the
4079 * wiring. Note that the underlying objects do not
4080 * change during a clip.
4081 */
4082 tmp_entry = *entry;
4083
4084 /*
4085 * The in_transition state guarentees that the entry
4086 * (or entries for this range, if split occured) will be
4087 * there when the map lock is acquired for the second time.
4088 */
4089 vm_map_unlock(map);
4090
4091 if (!user_wire && cur_thread != THREAD_NULL)
4092 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4093 else
4094 interruptible_state = THREAD_UNINT;
4095
4096 if(map_pmap)
4097 rc = vm_fault_wire(map,
4098 &tmp_entry, map_pmap, pmap_addr);
4099 else
4100 rc = vm_fault_wire(map,
4101 &tmp_entry, map->pmap,
4102 tmp_entry.vme_start);
4103
4104 if (!user_wire && cur_thread != THREAD_NULL)
4105 thread_interrupt_level(interruptible_state);
4106
4107 vm_map_lock(map);
4108
4109 if (last_timestamp+1 != map->timestamp) {
4110 /*
4111 * Find the entry again. It could have been clipped
4112 * after we unlocked the map.
4113 */
4114 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4115 &first_entry))
4116 panic("vm_map_wire: re-lookup failed");
4117
4118 entry = first_entry;
4119 }
4120
4121 last_timestamp = map->timestamp;
4122
4123 while ((entry != vm_map_to_entry(map)) &&
4124 (entry->vme_start < tmp_entry.vme_end)) {
4125 assert(entry->in_transition);
4126 entry->in_transition = FALSE;
4127 if (entry->needs_wakeup) {
4128 entry->needs_wakeup = FALSE;
4129 need_wakeup = TRUE;
4130 }
4131 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4132 subtract_wire_counts(map, entry, user_wire);
4133 }
4134 entry = entry->vme_next;
4135 }
4136
4137 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4138 goto done;
4139 }
4140
4141 s = entry->vme_start;
4142 } /* end while loop through map entries */
4143
4144 done:
4145 if (rc == KERN_SUCCESS) {
4146 /* repair any damage we may have made to the VM map */
4147 vm_map_simplify_range(map, start, end);
4148 }
4149
4150 vm_map_unlock(map);
4151
4152 /*
4153 * wake up anybody waiting on entries we wired.
4154 */
4155 if (need_wakeup)
4156 vm_map_entry_wakeup(map);
4157
4158 if (rc != KERN_SUCCESS) {
4159 /* undo what has been wired so far */
4160 vm_map_unwire(map, start, s, user_wire);
4161 }
4162
4163 return rc;
4164
4165 }
4166
4167 kern_return_t
4168 vm_map_wire(
4169 register vm_map_t map,
4170 register vm_map_offset_t start,
4171 register vm_map_offset_t end,
4172 register vm_prot_t access_type,
4173 boolean_t user_wire)
4174 {
4175
4176 kern_return_t kret;
4177
4178 kret = vm_map_wire_nested(map, start, end, access_type,
4179 user_wire, (pmap_t)NULL, 0);
4180 return kret;
4181 }
4182
4183 /*
4184 * vm_map_unwire:
4185 *
4186 * Sets the pageability of the specified address range in the target
4187 * as pageable. Regions specified must have been wired previously.
4188 *
4189 * The map must not be locked, but a reference must remain to the map
4190 * throughout the call.
4191 *
4192 * Kernel will panic on failures. User unwire ignores holes and
4193 * unwired and intransition entries to avoid losing memory by leaving
4194 * it unwired.
4195 */
4196 static kern_return_t
4197 vm_map_unwire_nested(
4198 register vm_map_t map,
4199 register vm_map_offset_t start,
4200 register vm_map_offset_t end,
4201 boolean_t user_wire,
4202 pmap_t map_pmap,
4203 vm_map_offset_t pmap_addr)
4204 {
4205 register vm_map_entry_t entry;
4206 struct vm_map_entry *first_entry, tmp_entry;
4207 boolean_t need_wakeup;
4208 boolean_t main_map = FALSE;
4209 unsigned int last_timestamp;
4210
4211 vm_map_lock(map);
4212 if(map_pmap == NULL)
4213 main_map = TRUE;
4214 last_timestamp = map->timestamp;
4215
4216 VM_MAP_RANGE_CHECK(map, start, end);
4217 assert(page_aligned(start));
4218 assert(page_aligned(end));
4219
4220 if (start == end) {
4221 /* We unwired what the caller asked for: zero pages */
4222 vm_map_unlock(map);
4223 return KERN_SUCCESS;
4224 }
4225
4226 if (vm_map_lookup_entry(map, start, &first_entry)) {
4227 entry = first_entry;
4228 /*
4229 * vm_map_clip_start will be done later.
4230 * We don't want to unnest any nested sub maps here !
4231 */
4232 }
4233 else {
4234 if (!user_wire) {
4235 panic("vm_map_unwire: start not found");
4236 }
4237 /* Start address is not in map. */
4238 vm_map_unlock(map);
4239 return(KERN_INVALID_ADDRESS);
4240 }
4241
4242 if (entry->superpage_size) {
4243 /* superpages are always wired */
4244 vm_map_unlock(map);
4245 return KERN_INVALID_ADDRESS;
4246 }
4247
4248 need_wakeup = FALSE;
4249 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4250 if (entry->in_transition) {
4251 /*
4252 * 1)
4253 * Another thread is wiring down this entry. Note
4254 * that if it is not for the other thread we would
4255 * be unwiring an unwired entry. This is not
4256 * permitted. If we wait, we will be unwiring memory
4257 * we did not wire.
4258 *
4259 * 2)
4260 * Another thread is unwiring this entry. We did not
4261 * have a reference to it, because if we did, this
4262 * entry will not be getting unwired now.
4263 */
4264 if (!user_wire) {
4265 /*
4266 * XXX FBDP
4267 * This could happen: there could be some
4268 * overlapping vslock/vsunlock operations
4269 * going on.
4270 * We should probably just wait and retry,
4271 * but then we have to be careful that this
4272 * entry could get "simplified" after
4273 * "in_transition" gets unset and before
4274 * we re-lookup the entry, so we would
4275 * have to re-clip the entry to avoid
4276 * re-unwiring what we have already unwired...
4277 * See vm_map_wire_nested().
4278 *
4279 * Or we could just ignore "in_transition"
4280 * here and proceed to decement the wired
4281 * count(s) on this entry. That should be fine
4282 * as long as "wired_count" doesn't drop all
4283 * the way to 0 (and we should panic if THAT
4284 * happens).
4285 */
4286 panic("vm_map_unwire: in_transition entry");
4287 }
4288
4289 entry = entry->vme_next;
4290 continue;
4291 }
4292
4293 if (entry->is_sub_map) {
4294 vm_map_offset_t sub_start;
4295 vm_map_offset_t sub_end;
4296 vm_map_offset_t local_end;
4297 pmap_t pmap;
4298
4299 vm_map_clip_start(map, entry, start);
4300 vm_map_clip_end(map, entry, end);
4301
4302 sub_start = entry->offset;
4303 sub_end = entry->vme_end - entry->vme_start;
4304 sub_end += entry->offset;
4305 local_end = entry->vme_end;
4306 if(map_pmap == NULL) {
4307 if(entry->use_pmap) {
4308 pmap = entry->object.sub_map->pmap;
4309 pmap_addr = sub_start;
4310 } else {
4311 pmap = map->pmap;
4312 pmap_addr = start;
4313 }
4314 if (entry->wired_count == 0 ||
4315 (user_wire && entry->user_wired_count == 0)) {
4316 if (!user_wire)
4317 panic("vm_map_unwire: entry is unwired");
4318 entry = entry->vme_next;
4319 continue;
4320 }
4321
4322 /*
4323 * Check for holes
4324 * Holes: Next entry should be contiguous unless
4325 * this is the end of the region.
4326 */
4327 if (((entry->vme_end < end) &&
4328 ((entry->vme_next == vm_map_to_entry(map)) ||
4329 (entry->vme_next->vme_start
4330 > entry->vme_end)))) {
4331 if (!user_wire)
4332 panic("vm_map_unwire: non-contiguous region");
4333 /*
4334 entry = entry->vme_next;
4335 continue;
4336 */
4337 }
4338
4339 subtract_wire_counts(map, entry, user_wire);
4340
4341 if (entry->wired_count != 0) {
4342 entry = entry->vme_next;
4343 continue;
4344 }
4345
4346 entry->in_transition = TRUE;
4347 tmp_entry = *entry;/* see comment in vm_map_wire() */
4348
4349 /*
4350 * We can unlock the map now. The in_transition state
4351 * guarantees existance of the entry.
4352 */
4353 vm_map_unlock(map);
4354 vm_map_unwire_nested(entry->object.sub_map,
4355 sub_start, sub_end, user_wire, pmap, pmap_addr);
4356 vm_map_lock(map);
4357
4358 if (last_timestamp+1 != map->timestamp) {
4359 /*
4360 * Find the entry again. It could have been
4361 * clipped or deleted after we unlocked the map.
4362 */
4363 if (!vm_map_lookup_entry(map,
4364 tmp_entry.vme_start,
4365 &first_entry)) {
4366 if (!user_wire)
4367 panic("vm_map_unwire: re-lookup failed");
4368 entry = first_entry->vme_next;
4369 } else
4370 entry = first_entry;
4371 }
4372 last_timestamp = map->timestamp;
4373
4374 /*
4375 * clear transition bit for all constituent entries
4376 * that were in the original entry (saved in
4377 * tmp_entry). Also check for waiters.
4378 */
4379 while ((entry != vm_map_to_entry(map)) &&
4380 (entry->vme_start < tmp_entry.vme_end)) {
4381 assert(entry->in_transition);
4382 entry->in_transition = FALSE;
4383 if (entry->needs_wakeup) {
4384 entry->needs_wakeup = FALSE;
4385 need_wakeup = TRUE;
4386 }
4387 entry = entry->vme_next;
4388 }
4389 continue;
4390 } else {
4391 vm_map_unlock(map);
4392 vm_map_unwire_nested(entry->object.sub_map,
4393 sub_start, sub_end, user_wire, map_pmap,
4394 pmap_addr);
4395 vm_map_lock(map);
4396
4397 if (last_timestamp+1 != map->timestamp) {
4398 /*
4399 * Find the entry again. It could have been
4400 * clipped or deleted after we unlocked the map.
4401 */
4402 if (!vm_map_lookup_entry(map,
4403 tmp_entry.vme_start,
4404 &first_entry)) {
4405 if (!user_wire)
4406 panic("vm_map_unwire: re-lookup failed");
4407 entry = first_entry->vme_next;
4408 } else
4409 entry = first_entry;
4410 }
4411 last_timestamp = map->timestamp;
4412 }
4413 }
4414
4415
4416 if ((entry->wired_count == 0) ||
4417 (user_wire && entry->user_wired_count == 0)) {
4418 if (!user_wire)
4419 panic("vm_map_unwire: entry is unwired");
4420
4421 entry = entry->vme_next;
4422 continue;
4423 }
4424
4425 assert(entry->wired_count > 0 &&
4426 (!user_wire || entry->user_wired_count > 0));
4427
4428 vm_map_clip_start(map, entry, start);
4429 vm_map_clip_end(map, entry, end);
4430
4431 /*
4432 * Check for holes
4433 * Holes: Next entry should be contiguous unless
4434 * this is the end of the region.
4435 */
4436 if (((entry->vme_end < end) &&
4437 ((entry->vme_next == vm_map_to_entry(map)) ||
4438 (entry->vme_next->vme_start > entry->vme_end)))) {
4439
4440 if (!user_wire)
4441 panic("vm_map_unwire: non-contiguous region");
4442 entry = entry->vme_next;
4443 continue;
4444 }
4445
4446 subtract_wire_counts(map, entry, user_wire);
4447
4448 if (entry->wired_count != 0) {
4449 entry = entry->vme_next;
4450 continue;
4451 }
4452
4453 if(entry->zero_wired_pages) {
4454 entry->zero_wired_pages = FALSE;
4455 }
4456
4457 entry->in_transition = TRUE;
4458 tmp_entry = *entry; /* see comment in vm_map_wire() */
4459
4460 /*
4461 * We can unlock the map now. The in_transition state
4462 * guarantees existance of the entry.
4463 */
4464 vm_map_unlock(map);
4465 if(map_pmap) {
4466 vm_fault_unwire(map,
4467 &tmp_entry, FALSE, map_pmap, pmap_addr);
4468 } else {
4469 vm_fault_unwire(map,
4470 &tmp_entry, FALSE, map->pmap,
4471 tmp_entry.vme_start);
4472 }
4473 vm_map_lock(map);
4474
4475 if (last_timestamp+1 != map->timestamp) {
4476 /*
4477 * Find the entry again. It could have been clipped
4478 * or deleted after we unlocked the map.
4479 */
4480 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4481 &first_entry)) {
4482 if (!user_wire)
4483 panic("vm_map_unwire: re-lookup failed");
4484 entry = first_entry->vme_next;
4485 } else
4486 entry = first_entry;
4487 }
4488 last_timestamp = map->timestamp;
4489
4490 /*
4491 * clear transition bit for all constituent entries that
4492 * were in the original entry (saved in tmp_entry). Also
4493 * check for waiters.
4494 */
4495 while ((entry != vm_map_to_entry(map)) &&
4496 (entry->vme_start < tmp_entry.vme_end)) {
4497 assert(entry->in_transition);
4498 entry->in_transition = FALSE;
4499 if (entry->needs_wakeup) {
4500 entry->needs_wakeup = FALSE;
4501 need_wakeup = TRUE;
4502 }
4503 entry = entry->vme_next;
4504 }
4505 }
4506
4507 /*
4508 * We might have fragmented the address space when we wired this
4509 * range of addresses. Attempt to re-coalesce these VM map entries
4510 * with their neighbors now that they're no longer wired.
4511 * Under some circumstances, address space fragmentation can
4512 * prevent VM object shadow chain collapsing, which can cause
4513 * swap space leaks.
4514 */
4515 vm_map_simplify_range(map, start, end);
4516
4517 vm_map_unlock(map);
4518 /*
4519 * wake up anybody waiting on entries that we have unwired.
4520 */
4521 if (need_wakeup)
4522 vm_map_entry_wakeup(map);
4523 return(KERN_SUCCESS);
4524
4525 }
4526
4527 kern_return_t
4528 vm_map_unwire(
4529 register vm_map_t map,
4530 register vm_map_offset_t start,
4531 register vm_map_offset_t end,
4532 boolean_t user_wire)
4533 {
4534 return vm_map_unwire_nested(map, start, end,
4535 user_wire, (pmap_t)NULL, 0);
4536 }
4537
4538
4539 /*
4540 * vm_map_entry_delete: [ internal use only ]
4541 *
4542 * Deallocate the given entry from the target map.
4543 */
4544 static void
4545 vm_map_entry_delete(
4546 register vm_map_t map,
4547 register vm_map_entry_t entry)
4548 {
4549 register vm_map_offset_t s, e;
4550 register vm_object_t object;
4551 register vm_map_t submap;
4552
4553 s = entry->vme_start;
4554 e = entry->vme_end;
4555 assert(page_aligned(s));
4556 assert(page_aligned(e));
4557 assert(entry->wired_count == 0);
4558 assert(entry->user_wired_count == 0);
4559 assert(!entry->permanent);
4560
4561 if (entry->is_sub_map) {
4562 object = NULL;
4563 submap = entry->object.sub_map;
4564 } else {
4565 submap = NULL;
4566 object = entry->object.vm_object;
4567 }
4568
4569 vm_map_store_entry_unlink(map, entry);
4570 map->size -= e - s;
4571
4572 vm_map_entry_dispose(map, entry);
4573
4574 vm_map_unlock(map);
4575 /*
4576 * Deallocate the object only after removing all
4577 * pmap entries pointing to its pages.
4578 */
4579 if (submap)
4580 vm_map_deallocate(submap);
4581 else
4582 vm_object_deallocate(object);
4583
4584 }
4585
4586 void
4587 vm_map_submap_pmap_clean(
4588 vm_map_t map,
4589 vm_map_offset_t start,
4590 vm_map_offset_t end,
4591 vm_map_t sub_map,
4592 vm_map_offset_t offset)
4593 {
4594 vm_map_offset_t submap_start;
4595 vm_map_offset_t submap_end;
4596 vm_map_size_t remove_size;
4597 vm_map_entry_t entry;
4598
4599 submap_end = offset + (end - start);
4600 submap_start = offset;
4601
4602 vm_map_lock_read(sub_map);
4603 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4604
4605 remove_size = (entry->vme_end - entry->vme_start);
4606 if(offset > entry->vme_start)
4607 remove_size -= offset - entry->vme_start;
4608
4609
4610 if(submap_end < entry->vme_end) {
4611 remove_size -=
4612 entry->vme_end - submap_end;
4613 }
4614 if(entry->is_sub_map) {
4615 vm_map_submap_pmap_clean(
4616 sub_map,
4617 start,
4618 start + remove_size,
4619 entry->object.sub_map,
4620 entry->offset);
4621 } else {
4622
4623 if((map->mapped) && (map->ref_count)
4624 && (entry->object.vm_object != NULL)) {
4625 vm_object_pmap_protect(
4626 entry->object.vm_object,
4627 entry->offset+(offset-entry->vme_start),
4628 remove_size,
4629 PMAP_NULL,
4630 entry->vme_start,
4631 VM_PROT_NONE);
4632 } else {
4633 pmap_remove(map->pmap,
4634 (addr64_t)start,
4635 (addr64_t)(start + remove_size));
4636 }
4637 }
4638 }
4639
4640 entry = entry->vme_next;
4641
4642 while((entry != vm_map_to_entry(sub_map))
4643 && (entry->vme_start < submap_end)) {
4644 remove_size = (entry->vme_end - entry->vme_start);
4645 if(submap_end < entry->vme_end) {
4646 remove_size -= entry->vme_end - submap_end;
4647 }
4648 if(entry->is_sub_map) {
4649 vm_map_submap_pmap_clean(
4650 sub_map,
4651 (start + entry->vme_start) - offset,
4652 ((start + entry->vme_start) - offset) + remove_size,
4653 entry->object.sub_map,
4654 entry->offset);
4655 } else {
4656 if((map->mapped) && (map->ref_count)
4657 && (entry->object.vm_object != NULL)) {
4658 vm_object_pmap_protect(
4659 entry->object.vm_object,
4660 entry->offset,
4661 remove_size,
4662 PMAP_NULL,
4663 entry->vme_start,
4664 VM_PROT_NONE);
4665 } else {
4666 pmap_remove(map->pmap,
4667 (addr64_t)((start + entry->vme_start)
4668 - offset),
4669 (addr64_t)(((start + entry->vme_start)
4670 - offset) + remove_size));
4671 }
4672 }
4673 entry = entry->vme_next;
4674 }
4675 vm_map_unlock_read(sub_map);
4676 return;
4677 }
4678
4679 /*
4680 * vm_map_delete: [ internal use only ]
4681 *
4682 * Deallocates the given address range from the target map.
4683 * Removes all user wirings. Unwires one kernel wiring if
4684 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4685 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4686 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4687 *
4688 * This routine is called with map locked and leaves map locked.
4689 */
4690 static kern_return_t
4691 vm_map_delete(
4692 vm_map_t map,
4693 vm_map_offset_t start,
4694 vm_map_offset_t end,
4695 int flags,
4696 vm_map_t zap_map)
4697 {
4698 vm_map_entry_t entry, next;
4699 struct vm_map_entry *first_entry, tmp_entry;
4700 register vm_map_offset_t s;
4701 register vm_object_t object;
4702 boolean_t need_wakeup;
4703 unsigned int last_timestamp = ~0; /* unlikely value */
4704 int interruptible;
4705
4706 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4707 THREAD_ABORTSAFE : THREAD_UNINT;
4708
4709 /*
4710 * All our DMA I/O operations in IOKit are currently done by
4711 * wiring through the map entries of the task requesting the I/O.
4712 * Because of this, we must always wait for kernel wirings
4713 * to go away on the entries before deleting them.
4714 *
4715 * Any caller who wants to actually remove a kernel wiring
4716 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4717 * properly remove one wiring instead of blasting through
4718 * them all.
4719 */
4720 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4721
4722 while(1) {
4723 /*
4724 * Find the start of the region, and clip it
4725 */
4726 if (vm_map_lookup_entry(map, start, &first_entry)) {
4727 entry = first_entry;
4728 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4729 start = SUPERPAGE_ROUND_DOWN(start);
4730 continue;
4731 }
4732 if (start == entry->vme_start) {
4733 /*
4734 * No need to clip. We don't want to cause
4735 * any unnecessary unnesting in this case...
4736 */
4737 } else {
4738 vm_map_clip_start(map, entry, start);
4739 }
4740
4741 /*
4742 * Fix the lookup hint now, rather than each
4743 * time through the loop.
4744 */
4745 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4746 } else {
4747 entry = first_entry->vme_next;
4748 }
4749 break;
4750 }
4751 if (entry->superpage_size)
4752 end = SUPERPAGE_ROUND_UP(end);
4753
4754 need_wakeup = FALSE;
4755 /*
4756 * Step through all entries in this region
4757 */
4758 s = entry->vme_start;
4759 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4760 /*
4761 * At this point, we have deleted all the memory entries
4762 * between "start" and "s". We still need to delete
4763 * all memory entries between "s" and "end".
4764 * While we were blocked and the map was unlocked, some
4765 * new memory entries could have been re-allocated between
4766 * "start" and "s" and we don't want to mess with those.
4767 * Some of those entries could even have been re-assembled
4768 * with an entry after "s" (in vm_map_simplify_entry()), so
4769 * we may have to vm_map_clip_start() again.
4770 */
4771
4772 if (entry->vme_start >= s) {
4773 /*
4774 * This entry starts on or after "s"
4775 * so no need to clip its start.
4776 */
4777 } else {
4778 /*
4779 * This entry has been re-assembled by a
4780 * vm_map_simplify_entry(). We need to
4781 * re-clip its start.
4782 */
4783 vm_map_clip_start(map, entry, s);
4784 }
4785 if (entry->vme_end <= end) {
4786 /*
4787 * This entry is going away completely, so no need
4788 * to clip and possibly cause an unnecessary unnesting.
4789 */
4790 } else {
4791 vm_map_clip_end(map, entry, end);
4792 }
4793
4794 if (entry->permanent) {
4795 panic("attempt to remove permanent VM map entry "
4796 "%p [0x%llx:0x%llx]\n",
4797 entry, (uint64_t) s, (uint64_t) end);
4798 }
4799
4800
4801 if (entry->in_transition) {
4802 wait_result_t wait_result;
4803
4804 /*
4805 * Another thread is wiring/unwiring this entry.
4806 * Let the other thread know we are waiting.
4807 */
4808 assert(s == entry->vme_start);
4809 entry->needs_wakeup = TRUE;
4810
4811 /*
4812 * wake up anybody waiting on entries that we have
4813 * already unwired/deleted.
4814 */
4815 if (need_wakeup) {
4816 vm_map_entry_wakeup(map);
4817 need_wakeup = FALSE;
4818 }
4819
4820 wait_result = vm_map_entry_wait(map, interruptible);
4821
4822 if (interruptible &&
4823 wait_result == THREAD_INTERRUPTED) {
4824 /*
4825 * We do not clear the needs_wakeup flag,
4826 * since we cannot tell if we were the only one.
4827 */
4828 vm_map_unlock(map);
4829 return KERN_ABORTED;
4830 }
4831
4832 /*
4833 * The entry could have been clipped or it
4834 * may not exist anymore. Look it up again.
4835 */
4836 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4837 assert((map != kernel_map) &&
4838 (!entry->is_sub_map));
4839 /*
4840 * User: use the next entry
4841 */
4842 entry = first_entry->vme_next;
4843 s = entry->vme_start;
4844 } else {
4845 entry = first_entry;
4846 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4847 }
4848 last_timestamp = map->timestamp;
4849 continue;
4850 } /* end in_transition */
4851
4852 if (entry->wired_count) {
4853 boolean_t user_wire;
4854
4855 user_wire = entry->user_wired_count > 0;
4856
4857 /*
4858 * Remove a kernel wiring if requested
4859 */
4860 if (flags & VM_MAP_REMOVE_KUNWIRE) {
4861 entry->wired_count--;
4862 }
4863
4864 /*
4865 * Remove all user wirings for proper accounting
4866 */
4867 if (entry->user_wired_count > 0) {
4868 while (entry->user_wired_count)
4869 subtract_wire_counts(map, entry, user_wire);
4870 }
4871
4872 if (entry->wired_count != 0) {
4873 assert(map != kernel_map);
4874 /*
4875 * Cannot continue. Typical case is when
4876 * a user thread has physical io pending on
4877 * on this page. Either wait for the
4878 * kernel wiring to go away or return an
4879 * error.
4880 */
4881 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4882 wait_result_t wait_result;
4883
4884 assert(s == entry->vme_start);
4885 entry->needs_wakeup = TRUE;
4886 wait_result = vm_map_entry_wait(map,
4887 interruptible);
4888
4889 if (interruptible &&
4890 wait_result == THREAD_INTERRUPTED) {
4891 /*
4892 * We do not clear the
4893 * needs_wakeup flag, since we
4894 * cannot tell if we were the
4895 * only one.
4896 */
4897 vm_map_unlock(map);
4898 return KERN_ABORTED;
4899 }
4900
4901 /*
4902 * The entry could have been clipped or
4903 * it may not exist anymore. Look it
4904 * up again.
4905 */
4906 if (!vm_map_lookup_entry(map, s,
4907 &first_entry)) {
4908 assert(map != kernel_map);
4909 /*
4910 * User: use the next entry
4911 */
4912 entry = first_entry->vme_next;
4913 s = entry->vme_start;
4914 } else {
4915 entry = first_entry;
4916 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4917 }
4918 last_timestamp = map->timestamp;
4919 continue;
4920 }
4921 else {
4922 return KERN_FAILURE;
4923 }
4924 }
4925
4926 entry->in_transition = TRUE;
4927 /*
4928 * copy current entry. see comment in vm_map_wire()
4929 */
4930 tmp_entry = *entry;
4931 assert(s == entry->vme_start);
4932
4933 /*
4934 * We can unlock the map now. The in_transition
4935 * state guarentees existance of the entry.
4936 */
4937 vm_map_unlock(map);
4938
4939 if (tmp_entry.is_sub_map) {
4940 vm_map_t sub_map;
4941 vm_map_offset_t sub_start, sub_end;
4942 pmap_t pmap;
4943 vm_map_offset_t pmap_addr;
4944
4945
4946 sub_map = tmp_entry.object.sub_map;
4947 sub_start = tmp_entry.offset;
4948 sub_end = sub_start + (tmp_entry.vme_end -
4949 tmp_entry.vme_start);
4950 if (tmp_entry.use_pmap) {
4951 pmap = sub_map->pmap;
4952 pmap_addr = tmp_entry.vme_start;
4953 } else {
4954 pmap = map->pmap;
4955 pmap_addr = tmp_entry.vme_start;
4956 }
4957 (void) vm_map_unwire_nested(sub_map,
4958 sub_start, sub_end,
4959 user_wire,
4960 pmap, pmap_addr);
4961 } else {
4962
4963 vm_fault_unwire(map, &tmp_entry,
4964 tmp_entry.object.vm_object == kernel_object,
4965 map->pmap, tmp_entry.vme_start);
4966 }
4967
4968 vm_map_lock(map);
4969
4970 if (last_timestamp+1 != map->timestamp) {
4971 /*
4972 * Find the entry again. It could have
4973 * been clipped after we unlocked the map.
4974 */
4975 if (!vm_map_lookup_entry(map, s, &first_entry)){
4976 assert((map != kernel_map) &&
4977 (!entry->is_sub_map));
4978 first_entry = first_entry->vme_next;
4979 s = first_entry->vme_start;
4980 } else {
4981 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4982 }
4983 } else {
4984 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4985 first_entry = entry;
4986 }
4987
4988 last_timestamp = map->timestamp;
4989
4990 entry = first_entry;
4991 while ((entry != vm_map_to_entry(map)) &&
4992 (entry->vme_start < tmp_entry.vme_end)) {
4993 assert(entry->in_transition);
4994 entry->in_transition = FALSE;
4995 if (entry->needs_wakeup) {
4996 entry->needs_wakeup = FALSE;
4997 need_wakeup = TRUE;
4998 }
4999 entry = entry->vme_next;
5000 }
5001 /*
5002 * We have unwired the entry(s). Go back and
5003 * delete them.
5004 */
5005 entry = first_entry;
5006 continue;
5007 }
5008
5009 /* entry is unwired */
5010 assert(entry->wired_count == 0);
5011 assert(entry->user_wired_count == 0);
5012
5013 assert(s == entry->vme_start);
5014
5015 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5016 /*
5017 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5018 * vm_map_delete(), some map entries might have been
5019 * transferred to a "zap_map", which doesn't have a
5020 * pmap. The original pmap has already been flushed
5021 * in the vm_map_delete() call targeting the original
5022 * map, but when we get to destroying the "zap_map",
5023 * we don't have any pmap to flush, so let's just skip
5024 * all this.
5025 */
5026 } else if (entry->is_sub_map) {
5027 if (entry->use_pmap) {
5028 #ifndef NO_NESTED_PMAP
5029 pmap_unnest(map->pmap,
5030 (addr64_t)entry->vme_start,
5031 entry->vme_end - entry->vme_start);
5032 #endif /* NO_NESTED_PMAP */
5033 if ((map->mapped) && (map->ref_count)) {
5034 /* clean up parent map/maps */
5035 vm_map_submap_pmap_clean(
5036 map, entry->vme_start,
5037 entry->vme_end,
5038 entry->object.sub_map,
5039 entry->offset);
5040 }
5041 } else {
5042 vm_map_submap_pmap_clean(
5043 map, entry->vme_start, entry->vme_end,
5044 entry->object.sub_map,
5045 entry->offset);
5046 }
5047 } else if (entry->object.vm_object != kernel_object) {
5048 object = entry->object.vm_object;
5049 if((map->mapped) && (map->ref_count)) {
5050 vm_object_pmap_protect(
5051 object, entry->offset,
5052 entry->vme_end - entry->vme_start,
5053 PMAP_NULL,
5054 entry->vme_start,
5055 VM_PROT_NONE);
5056 } else {
5057 pmap_remove(map->pmap,
5058 (addr64_t)entry->vme_start,
5059 (addr64_t)entry->vme_end);
5060 }
5061 }
5062
5063 /*
5064 * All pmap mappings for this map entry must have been
5065 * cleared by now.
5066 */
5067 assert(vm_map_pmap_is_empty(map,
5068 entry->vme_start,
5069 entry->vme_end));
5070
5071 next = entry->vme_next;
5072 s = next->vme_start;
5073 last_timestamp = map->timestamp;
5074
5075 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5076 zap_map != VM_MAP_NULL) {
5077 vm_map_size_t entry_size;
5078 /*
5079 * The caller wants to save the affected VM map entries
5080 * into the "zap_map". The caller will take care of
5081 * these entries.
5082 */
5083 /* unlink the entry from "map" ... */
5084 vm_map_store_entry_unlink(map, entry);
5085 /* ... and add it to the end of the "zap_map" */
5086 vm_map_store_entry_link(zap_map,
5087 vm_map_last_entry(zap_map),
5088 entry);
5089 entry_size = entry->vme_end - entry->vme_start;
5090 map->size -= entry_size;
5091 zap_map->size += entry_size;
5092 /* we didn't unlock the map, so no timestamp increase */
5093 last_timestamp--;
5094 } else {
5095 vm_map_entry_delete(map, entry);
5096 /* vm_map_entry_delete unlocks the map */
5097 vm_map_lock(map);
5098 }
5099
5100 entry = next;
5101
5102 if(entry == vm_map_to_entry(map)) {
5103 break;
5104 }
5105 if (last_timestamp+1 != map->timestamp) {
5106 /*
5107 * we are responsible for deleting everything
5108 * from the give space, if someone has interfered
5109 * we pick up where we left off, back fills should
5110 * be all right for anyone except map_delete and
5111 * we have to assume that the task has been fully
5112 * disabled before we get here
5113 */
5114 if (!vm_map_lookup_entry(map, s, &entry)){
5115 entry = entry->vme_next;
5116 s = entry->vme_start;
5117 } else {
5118 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5119 }
5120 /*
5121 * others can not only allocate behind us, we can
5122 * also see coalesce while we don't have the map lock
5123 */
5124 if(entry == vm_map_to_entry(map)) {
5125 break;
5126 }
5127 }
5128 last_timestamp = map->timestamp;
5129 }
5130
5131 if (map->wait_for_space)
5132 thread_wakeup((event_t) map);
5133 /*
5134 * wake up anybody waiting on entries that we have already deleted.
5135 */
5136 if (need_wakeup)
5137 vm_map_entry_wakeup(map);
5138
5139 return KERN_SUCCESS;
5140 }
5141
5142 /*
5143 * vm_map_remove:
5144 *
5145 * Remove the given address range from the target map.
5146 * This is the exported form of vm_map_delete.
5147 */
5148 kern_return_t
5149 vm_map_remove(
5150 register vm_map_t map,
5151 register vm_map_offset_t start,
5152 register vm_map_offset_t end,
5153 register boolean_t flags)
5154 {
5155 register kern_return_t result;
5156
5157 vm_map_lock(map);
5158 VM_MAP_RANGE_CHECK(map, start, end);
5159 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5160 vm_map_unlock(map);
5161
5162 return(result);
5163 }
5164
5165
5166 /*
5167 * Routine: vm_map_copy_discard
5168 *
5169 * Description:
5170 * Dispose of a map copy object (returned by
5171 * vm_map_copyin).
5172 */
5173 void
5174 vm_map_copy_discard(
5175 vm_map_copy_t copy)
5176 {
5177 if (copy == VM_MAP_COPY_NULL)
5178 return;
5179
5180 switch (copy->type) {
5181 case VM_MAP_COPY_ENTRY_LIST:
5182 while (vm_map_copy_first_entry(copy) !=
5183 vm_map_copy_to_entry(copy)) {
5184 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5185
5186 vm_map_copy_entry_unlink(copy, entry);
5187 vm_object_deallocate(entry->object.vm_object);
5188 vm_map_copy_entry_dispose(copy, entry);
5189 }
5190 break;
5191 case VM_MAP_COPY_OBJECT:
5192 vm_object_deallocate(copy->cpy_object);
5193 break;
5194 case VM_MAP_COPY_KERNEL_BUFFER:
5195
5196 /*
5197 * The vm_map_copy_t and possibly the data buffer were
5198 * allocated by a single call to kalloc(), i.e. the
5199 * vm_map_copy_t was not allocated out of the zone.
5200 */
5201 kfree(copy, copy->cpy_kalloc_size);
5202 return;
5203 }
5204 zfree(vm_map_copy_zone, copy);
5205 }
5206
5207 /*
5208 * Routine: vm_map_copy_copy
5209 *
5210 * Description:
5211 * Move the information in a map copy object to
5212 * a new map copy object, leaving the old one
5213 * empty.
5214 *
5215 * This is used by kernel routines that need
5216 * to look at out-of-line data (in copyin form)
5217 * before deciding whether to return SUCCESS.
5218 * If the routine returns FAILURE, the original
5219 * copy object will be deallocated; therefore,
5220 * these routines must make a copy of the copy
5221 * object and leave the original empty so that
5222 * deallocation will not fail.
5223 */
5224 vm_map_copy_t
5225 vm_map_copy_copy(
5226 vm_map_copy_t copy)
5227 {
5228 vm_map_copy_t new_copy;
5229
5230 if (copy == VM_MAP_COPY_NULL)
5231 return VM_MAP_COPY_NULL;
5232
5233 /*
5234 * Allocate a new copy object, and copy the information
5235 * from the old one into it.
5236 */
5237
5238 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5239 *new_copy = *copy;
5240
5241 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5242 /*
5243 * The links in the entry chain must be
5244 * changed to point to the new copy object.
5245 */
5246 vm_map_copy_first_entry(copy)->vme_prev
5247 = vm_map_copy_to_entry(new_copy);
5248 vm_map_copy_last_entry(copy)->vme_next
5249 = vm_map_copy_to_entry(new_copy);
5250 }
5251
5252 /*
5253 * Change the old copy object into one that contains
5254 * nothing to be deallocated.
5255 */
5256 copy->type = VM_MAP_COPY_OBJECT;
5257 copy->cpy_object = VM_OBJECT_NULL;
5258
5259 /*
5260 * Return the new object.
5261 */
5262 return new_copy;
5263 }
5264
5265 static kern_return_t
5266 vm_map_overwrite_submap_recurse(
5267 vm_map_t dst_map,
5268 vm_map_offset_t dst_addr,
5269 vm_map_size_t dst_size)
5270 {
5271 vm_map_offset_t dst_end;
5272 vm_map_entry_t tmp_entry;
5273 vm_map_entry_t entry;
5274 kern_return_t result;
5275 boolean_t encountered_sub_map = FALSE;
5276
5277
5278
5279 /*
5280 * Verify that the destination is all writeable
5281 * initially. We have to trunc the destination
5282 * address and round the copy size or we'll end up
5283 * splitting entries in strange ways.
5284 */
5285
5286 dst_end = vm_map_round_page(dst_addr + dst_size);
5287 vm_map_lock(dst_map);
5288
5289 start_pass_1:
5290 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5291 vm_map_unlock(dst_map);
5292 return(KERN_INVALID_ADDRESS);
5293 }
5294
5295 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5296 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5297
5298 for (entry = tmp_entry;;) {
5299 vm_map_entry_t next;
5300
5301 next = entry->vme_next;
5302 while(entry->is_sub_map) {
5303 vm_map_offset_t sub_start;
5304 vm_map_offset_t sub_end;
5305 vm_map_offset_t local_end;
5306
5307 if (entry->in_transition) {
5308 /*
5309 * Say that we are waiting, and wait for entry.
5310 */
5311 entry->needs_wakeup = TRUE;
5312 vm_map_entry_wait(dst_map, THREAD_UNINT);
5313
5314 goto start_pass_1;
5315 }
5316
5317 encountered_sub_map = TRUE;
5318 sub_start = entry->offset;
5319
5320 if(entry->vme_end < dst_end)
5321 sub_end = entry->vme_end;
5322 else
5323 sub_end = dst_end;
5324 sub_end -= entry->vme_start;
5325 sub_end += entry->offset;
5326 local_end = entry->vme_end;
5327 vm_map_unlock(dst_map);
5328
5329 result = vm_map_overwrite_submap_recurse(
5330 entry->object.sub_map,
5331 sub_start,
5332 sub_end - sub_start);
5333
5334 if(result != KERN_SUCCESS)
5335 return result;
5336 if (dst_end <= entry->vme_end)
5337 return KERN_SUCCESS;
5338 vm_map_lock(dst_map);
5339 if(!vm_map_lookup_entry(dst_map, local_end,
5340 &tmp_entry)) {
5341 vm_map_unlock(dst_map);
5342 return(KERN_INVALID_ADDRESS);
5343 }
5344 entry = tmp_entry;
5345 next = entry->vme_next;
5346 }
5347
5348 if ( ! (entry->protection & VM_PROT_WRITE)) {
5349 vm_map_unlock(dst_map);
5350 return(KERN_PROTECTION_FAILURE);
5351 }
5352
5353 /*
5354 * If the entry is in transition, we must wait
5355 * for it to exit that state. Anything could happen
5356 * when we unlock the map, so start over.
5357 */
5358 if (entry->in_transition) {
5359
5360 /*
5361 * Say that we are waiting, and wait for entry.
5362 */
5363 entry->needs_wakeup = TRUE;
5364 vm_map_entry_wait(dst_map, THREAD_UNINT);
5365
5366 goto start_pass_1;
5367 }
5368
5369 /*
5370 * our range is contained completely within this map entry
5371 */
5372 if (dst_end <= entry->vme_end) {
5373 vm_map_unlock(dst_map);
5374 return KERN_SUCCESS;
5375 }
5376 /*
5377 * check that range specified is contiguous region
5378 */
5379 if ((next == vm_map_to_entry(dst_map)) ||
5380 (next->vme_start != entry->vme_end)) {
5381 vm_map_unlock(dst_map);
5382 return(KERN_INVALID_ADDRESS);
5383 }
5384
5385 /*
5386 * Check for permanent objects in the destination.
5387 */
5388 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5389 ((!entry->object.vm_object->internal) ||
5390 (entry->object.vm_object->true_share))) {
5391 if(encountered_sub_map) {
5392 vm_map_unlock(dst_map);
5393 return(KERN_FAILURE);
5394 }
5395 }
5396
5397
5398 entry = next;
5399 }/* for */
5400 vm_map_unlock(dst_map);
5401 return(KERN_SUCCESS);
5402 }
5403
5404 /*
5405 * Routine: vm_map_copy_overwrite
5406 *
5407 * Description:
5408 * Copy the memory described by the map copy
5409 * object (copy; returned by vm_map_copyin) onto
5410 * the specified destination region (dst_map, dst_addr).
5411 * The destination must be writeable.
5412 *
5413 * Unlike vm_map_copyout, this routine actually
5414 * writes over previously-mapped memory. If the
5415 * previous mapping was to a permanent (user-supplied)
5416 * memory object, it is preserved.
5417 *
5418 * The attributes (protection and inheritance) of the
5419 * destination region are preserved.
5420 *
5421 * If successful, consumes the copy object.
5422 * Otherwise, the caller is responsible for it.
5423 *
5424 * Implementation notes:
5425 * To overwrite aligned temporary virtual memory, it is
5426 * sufficient to remove the previous mapping and insert
5427 * the new copy. This replacement is done either on
5428 * the whole region (if no permanent virtual memory
5429 * objects are embedded in the destination region) or
5430 * in individual map entries.
5431 *
5432 * To overwrite permanent virtual memory , it is necessary
5433 * to copy each page, as the external memory management
5434 * interface currently does not provide any optimizations.
5435 *
5436 * Unaligned memory also has to be copied. It is possible
5437 * to use 'vm_trickery' to copy the aligned data. This is
5438 * not done but not hard to implement.
5439 *
5440 * Once a page of permanent memory has been overwritten,
5441 * it is impossible to interrupt this function; otherwise,
5442 * the call would be neither atomic nor location-independent.
5443 * The kernel-state portion of a user thread must be
5444 * interruptible.
5445 *
5446 * It may be expensive to forward all requests that might
5447 * overwrite permanent memory (vm_write, vm_copy) to
5448 * uninterruptible kernel threads. This routine may be
5449 * called by interruptible threads; however, success is
5450 * not guaranteed -- if the request cannot be performed
5451 * atomically and interruptibly, an error indication is
5452 * returned.
5453 */
5454
5455 static kern_return_t
5456 vm_map_copy_overwrite_nested(
5457 vm_map_t dst_map,
5458 vm_map_address_t dst_addr,
5459 vm_map_copy_t copy,
5460 boolean_t interruptible,
5461 pmap_t pmap,
5462 boolean_t discard_on_success)
5463 {
5464 vm_map_offset_t dst_end;
5465 vm_map_entry_t tmp_entry;
5466 vm_map_entry_t entry;
5467 kern_return_t kr;
5468 boolean_t aligned = TRUE;
5469 boolean_t contains_permanent_objects = FALSE;
5470 boolean_t encountered_sub_map = FALSE;
5471 vm_map_offset_t base_addr;
5472 vm_map_size_t copy_size;
5473 vm_map_size_t total_size;
5474
5475
5476 /*
5477 * Check for null copy object.
5478 */
5479
5480 if (copy == VM_MAP_COPY_NULL)
5481 return(KERN_SUCCESS);
5482
5483 /*
5484 * Check for special kernel buffer allocated
5485 * by new_ipc_kmsg_copyin.
5486 */
5487
5488 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5489 return(vm_map_copyout_kernel_buffer(
5490 dst_map, &dst_addr,
5491 copy, TRUE));
5492 }
5493
5494 /*
5495 * Only works for entry lists at the moment. Will
5496 * support page lists later.
5497 */
5498
5499 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5500
5501 if (copy->size == 0) {
5502 if (discard_on_success)
5503 vm_map_copy_discard(copy);
5504 return(KERN_SUCCESS);
5505 }
5506
5507 /*
5508 * Verify that the destination is all writeable
5509 * initially. We have to trunc the destination
5510 * address and round the copy size or we'll end up
5511 * splitting entries in strange ways.
5512 */
5513
5514 if (!page_aligned(copy->size) ||
5515 !page_aligned (copy->offset) ||
5516 !page_aligned (dst_addr))
5517 {
5518 aligned = FALSE;
5519 dst_end = vm_map_round_page(dst_addr + copy->size);
5520 } else {
5521 dst_end = dst_addr + copy->size;
5522 }
5523
5524 vm_map_lock(dst_map);
5525
5526 /* LP64todo - remove this check when vm_map_commpage64()
5527 * no longer has to stuff in a map_entry for the commpage
5528 * above the map's max_offset.
5529 */
5530 if (dst_addr >= dst_map->max_offset) {
5531 vm_map_unlock(dst_map);
5532 return(KERN_INVALID_ADDRESS);
5533 }
5534
5535 start_pass_1:
5536 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5537 vm_map_unlock(dst_map);
5538 return(KERN_INVALID_ADDRESS);
5539 }
5540 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5541 for (entry = tmp_entry;;) {
5542 vm_map_entry_t next = entry->vme_next;
5543
5544 while(entry->is_sub_map) {
5545 vm_map_offset_t sub_start;
5546 vm_map_offset_t sub_end;
5547 vm_map_offset_t local_end;
5548
5549 if (entry->in_transition) {
5550
5551 /*
5552 * Say that we are waiting, and wait for entry.
5553 */
5554 entry->needs_wakeup = TRUE;
5555 vm_map_entry_wait(dst_map, THREAD_UNINT);
5556
5557 goto start_pass_1;
5558 }
5559
5560 local_end = entry->vme_end;
5561 if (!(entry->needs_copy)) {
5562 /* if needs_copy we are a COW submap */
5563 /* in such a case we just replace so */
5564 /* there is no need for the follow- */
5565 /* ing check. */
5566 encountered_sub_map = TRUE;
5567 sub_start = entry->offset;
5568
5569 if(entry->vme_end < dst_end)
5570 sub_end = entry->vme_end;
5571 else
5572 sub_end = dst_end;
5573 sub_end -= entry->vme_start;
5574 sub_end += entry->offset;
5575 vm_map_unlock(dst_map);
5576
5577 kr = vm_map_overwrite_submap_recurse(
5578 entry->object.sub_map,
5579 sub_start,
5580 sub_end - sub_start);
5581 if(kr != KERN_SUCCESS)
5582 return kr;
5583 vm_map_lock(dst_map);
5584 }
5585
5586 if (dst_end <= entry->vme_end)
5587 goto start_overwrite;
5588 if(!vm_map_lookup_entry(dst_map, local_end,
5589 &entry)) {
5590 vm_map_unlock(dst_map);
5591 return(KERN_INVALID_ADDRESS);
5592 }
5593 next = entry->vme_next;
5594 }
5595
5596 if ( ! (entry->protection & VM_PROT_WRITE)) {
5597 vm_map_unlock(dst_map);
5598 return(KERN_PROTECTION_FAILURE);
5599 }
5600
5601 /*
5602 * If the entry is in transition, we must wait
5603 * for it to exit that state. Anything could happen
5604 * when we unlock the map, so start over.
5605 */
5606 if (entry->in_transition) {
5607
5608 /*
5609 * Say that we are waiting, and wait for entry.
5610 */
5611 entry->needs_wakeup = TRUE;
5612 vm_map_entry_wait(dst_map, THREAD_UNINT);
5613
5614 goto start_pass_1;
5615 }
5616
5617 /*
5618 * our range is contained completely within this map entry
5619 */
5620 if (dst_end <= entry->vme_end)
5621 break;
5622 /*
5623 * check that range specified is contiguous region
5624 */
5625 if ((next == vm_map_to_entry(dst_map)) ||
5626 (next->vme_start != entry->vme_end)) {
5627 vm_map_unlock(dst_map);
5628 return(KERN_INVALID_ADDRESS);
5629 }
5630
5631
5632 /*
5633 * Check for permanent objects in the destination.
5634 */
5635 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5636 ((!entry->object.vm_object->internal) ||
5637 (entry->object.vm_object->true_share))) {
5638 contains_permanent_objects = TRUE;
5639 }
5640
5641 entry = next;
5642 }/* for */
5643
5644 start_overwrite:
5645 /*
5646 * If there are permanent objects in the destination, then
5647 * the copy cannot be interrupted.
5648 */
5649
5650 if (interruptible && contains_permanent_objects) {
5651 vm_map_unlock(dst_map);
5652 return(KERN_FAILURE); /* XXX */
5653 }
5654
5655 /*
5656 *
5657 * Make a second pass, overwriting the data
5658 * At the beginning of each loop iteration,
5659 * the next entry to be overwritten is "tmp_entry"
5660 * (initially, the value returned from the lookup above),
5661 * and the starting address expected in that entry
5662 * is "start".
5663 */
5664
5665 total_size = copy->size;
5666 if(encountered_sub_map) {
5667 copy_size = 0;
5668 /* re-calculate tmp_entry since we've had the map */
5669 /* unlocked */
5670 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5671 vm_map_unlock(dst_map);
5672 return(KERN_INVALID_ADDRESS);
5673 }
5674 } else {
5675 copy_size = copy->size;
5676 }
5677
5678 base_addr = dst_addr;
5679 while(TRUE) {
5680 /* deconstruct the copy object and do in parts */
5681 /* only in sub_map, interruptable case */
5682 vm_map_entry_t copy_entry;
5683 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5684 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5685 int nentries;
5686 int remaining_entries = 0;
5687 vm_map_offset_t new_offset = 0;
5688
5689 for (entry = tmp_entry; copy_size == 0;) {
5690 vm_map_entry_t next;
5691
5692 next = entry->vme_next;
5693
5694 /* tmp_entry and base address are moved along */
5695 /* each time we encounter a sub-map. Otherwise */
5696 /* entry can outpase tmp_entry, and the copy_size */
5697 /* may reflect the distance between them */
5698 /* if the current entry is found to be in transition */
5699 /* we will start over at the beginning or the last */
5700 /* encounter of a submap as dictated by base_addr */
5701 /* we will zero copy_size accordingly. */
5702 if (entry->in_transition) {
5703 /*
5704 * Say that we are waiting, and wait for entry.
5705 */
5706 entry->needs_wakeup = TRUE;
5707 vm_map_entry_wait(dst_map, THREAD_UNINT);
5708
5709 if(!vm_map_lookup_entry(dst_map, base_addr,
5710 &tmp_entry)) {
5711 vm_map_unlock(dst_map);
5712 return(KERN_INVALID_ADDRESS);
5713 }
5714 copy_size = 0;
5715 entry = tmp_entry;
5716 continue;
5717 }
5718 if(entry->is_sub_map) {
5719 vm_map_offset_t sub_start;
5720 vm_map_offset_t sub_end;
5721 vm_map_offset_t local_end;
5722
5723 if (entry->needs_copy) {
5724 /* if this is a COW submap */
5725 /* just back the range with a */
5726 /* anonymous entry */
5727 if(entry->vme_end < dst_end)
5728 sub_end = entry->vme_end;
5729 else
5730 sub_end = dst_end;
5731 if(entry->vme_start < base_addr)
5732 sub_start = base_addr;
5733 else
5734 sub_start = entry->vme_start;
5735 vm_map_clip_end(
5736 dst_map, entry, sub_end);
5737 vm_map_clip_start(
5738 dst_map, entry, sub_start);
5739 assert(!entry->use_pmap);
5740 entry->is_sub_map = FALSE;
5741 vm_map_deallocate(
5742 entry->object.sub_map);
5743 entry->object.sub_map = NULL;
5744 entry->is_shared = FALSE;
5745 entry->needs_copy = FALSE;
5746 entry->offset = 0;
5747 /*
5748 * XXX FBDP
5749 * We should propagate the protections
5750 * of the submap entry here instead
5751 * of forcing them to VM_PROT_ALL...
5752 * Or better yet, we should inherit
5753 * the protection of the copy_entry.
5754 */
5755 entry->protection = VM_PROT_ALL;
5756 entry->max_protection = VM_PROT_ALL;
5757 entry->wired_count = 0;
5758 entry->user_wired_count = 0;
5759 if(entry->inheritance
5760 == VM_INHERIT_SHARE)
5761 entry->inheritance = VM_INHERIT_COPY;
5762 continue;
5763 }
5764 /* first take care of any non-sub_map */
5765 /* entries to send */
5766 if(base_addr < entry->vme_start) {
5767 /* stuff to send */
5768 copy_size =
5769 entry->vme_start - base_addr;
5770 break;
5771 }
5772 sub_start = entry->offset;
5773
5774 if(entry->vme_end < dst_end)
5775 sub_end = entry->vme_end;
5776 else
5777 sub_end = dst_end;
5778 sub_end -= entry->vme_start;
5779 sub_end += entry->offset;
5780 local_end = entry->vme_end;
5781 vm_map_unlock(dst_map);
5782 copy_size = sub_end - sub_start;
5783
5784 /* adjust the copy object */
5785 if (total_size > copy_size) {
5786 vm_map_size_t local_size = 0;
5787 vm_map_size_t entry_size;
5788
5789 nentries = 1;
5790 new_offset = copy->offset;
5791 copy_entry = vm_map_copy_first_entry(copy);
5792 while(copy_entry !=
5793 vm_map_copy_to_entry(copy)){
5794 entry_size = copy_entry->vme_end -
5795 copy_entry->vme_start;
5796 if((local_size < copy_size) &&
5797 ((local_size + entry_size)
5798 >= copy_size)) {
5799 vm_map_copy_clip_end(copy,
5800 copy_entry,
5801 copy_entry->vme_start +
5802 (copy_size - local_size));
5803 entry_size = copy_entry->vme_end -
5804 copy_entry->vme_start;
5805 local_size += entry_size;
5806 new_offset += entry_size;
5807 }
5808 if(local_size >= copy_size) {
5809 next_copy = copy_entry->vme_next;
5810 copy_entry->vme_next =
5811 vm_map_copy_to_entry(copy);
5812 previous_prev =
5813 copy->cpy_hdr.links.prev;
5814 copy->cpy_hdr.links.prev = copy_entry;
5815 copy->size = copy_size;
5816 remaining_entries =
5817 copy->cpy_hdr.nentries;
5818 remaining_entries -= nentries;
5819 copy->cpy_hdr.nentries = nentries;
5820 break;
5821 } else {
5822 local_size += entry_size;
5823 new_offset += entry_size;
5824 nentries++;
5825 }
5826 copy_entry = copy_entry->vme_next;
5827 }
5828 }
5829
5830 if((entry->use_pmap) && (pmap == NULL)) {
5831 kr = vm_map_copy_overwrite_nested(
5832 entry->object.sub_map,
5833 sub_start,
5834 copy,
5835 interruptible,
5836 entry->object.sub_map->pmap,
5837 TRUE);
5838 } else if (pmap != NULL) {
5839 kr = vm_map_copy_overwrite_nested(
5840 entry->object.sub_map,
5841 sub_start,
5842 copy,
5843 interruptible, pmap,
5844 TRUE);
5845 } else {
5846 kr = vm_map_copy_overwrite_nested(
5847 entry->object.sub_map,
5848 sub_start,
5849 copy,
5850 interruptible,
5851 dst_map->pmap,
5852 TRUE);
5853 }
5854 if(kr != KERN_SUCCESS) {
5855 if(next_copy != NULL) {
5856 copy->cpy_hdr.nentries +=
5857 remaining_entries;
5858 copy->cpy_hdr.links.prev->vme_next =
5859 next_copy;
5860 copy->cpy_hdr.links.prev
5861 = previous_prev;
5862 copy->size = total_size;
5863 }
5864 return kr;
5865 }
5866 if (dst_end <= local_end) {
5867 return(KERN_SUCCESS);
5868 }
5869 /* otherwise copy no longer exists, it was */
5870 /* destroyed after successful copy_overwrite */
5871 copy = (vm_map_copy_t)
5872 zalloc(vm_map_copy_zone);
5873 vm_map_copy_first_entry(copy) =
5874 vm_map_copy_last_entry(copy) =
5875 vm_map_copy_to_entry(copy);
5876 copy->type = VM_MAP_COPY_ENTRY_LIST;
5877 copy->offset = new_offset;
5878
5879 total_size -= copy_size;
5880 copy_size = 0;
5881 /* put back remainder of copy in container */
5882 if(next_copy != NULL) {
5883 copy->cpy_hdr.nentries = remaining_entries;
5884 copy->cpy_hdr.links.next = next_copy;
5885 copy->cpy_hdr.links.prev = previous_prev;
5886 copy->size = total_size;
5887 next_copy->vme_prev =
5888 vm_map_copy_to_entry(copy);
5889 next_copy = NULL;
5890 }
5891 base_addr = local_end;
5892 vm_map_lock(dst_map);
5893 if(!vm_map_lookup_entry(dst_map,
5894 local_end, &tmp_entry)) {
5895 vm_map_unlock(dst_map);
5896 return(KERN_INVALID_ADDRESS);
5897 }
5898 entry = tmp_entry;
5899 continue;
5900 }
5901 if (dst_end <= entry->vme_end) {
5902 copy_size = dst_end - base_addr;
5903 break;
5904 }
5905
5906 if ((next == vm_map_to_entry(dst_map)) ||
5907 (next->vme_start != entry->vme_end)) {
5908 vm_map_unlock(dst_map);
5909 return(KERN_INVALID_ADDRESS);
5910 }
5911
5912 entry = next;
5913 }/* for */
5914
5915 next_copy = NULL;
5916 nentries = 1;
5917
5918 /* adjust the copy object */
5919 if (total_size > copy_size) {
5920 vm_map_size_t local_size = 0;
5921 vm_map_size_t entry_size;
5922
5923 new_offset = copy->offset;
5924 copy_entry = vm_map_copy_first_entry(copy);
5925 while(copy_entry != vm_map_copy_to_entry(copy)) {
5926 entry_size = copy_entry->vme_end -
5927 copy_entry->vme_start;
5928 if((local_size < copy_size) &&
5929 ((local_size + entry_size)
5930 >= copy_size)) {
5931 vm_map_copy_clip_end(copy, copy_entry,
5932 copy_entry->vme_start +
5933 (copy_size - local_size));
5934 entry_size = copy_entry->vme_end -
5935 copy_entry->vme_start;
5936 local_size += entry_size;
5937 new_offset += entry_size;
5938 }
5939 if(local_size >= copy_size) {
5940 next_copy = copy_entry->vme_next;
5941 copy_entry->vme_next =
5942 vm_map_copy_to_entry(copy);
5943 previous_prev =
5944 copy->cpy_hdr.links.prev;
5945 copy->cpy_hdr.links.prev = copy_entry;
5946 copy->size = copy_size;
5947 remaining_entries =
5948 copy->cpy_hdr.nentries;
5949 remaining_entries -= nentries;
5950 copy->cpy_hdr.nentries = nentries;
5951 break;
5952 } else {
5953 local_size += entry_size;
5954 new_offset += entry_size;
5955 nentries++;
5956 }
5957 copy_entry = copy_entry->vme_next;
5958 }
5959 }
5960
5961 if (aligned) {
5962 pmap_t local_pmap;
5963
5964 if(pmap)
5965 local_pmap = pmap;
5966 else
5967 local_pmap = dst_map->pmap;
5968
5969 if ((kr = vm_map_copy_overwrite_aligned(
5970 dst_map, tmp_entry, copy,
5971 base_addr, local_pmap)) != KERN_SUCCESS) {
5972 if(next_copy != NULL) {
5973 copy->cpy_hdr.nentries +=
5974 remaining_entries;
5975 copy->cpy_hdr.links.prev->vme_next =
5976 next_copy;
5977 copy->cpy_hdr.links.prev =
5978 previous_prev;
5979 copy->size += copy_size;
5980 }
5981 return kr;
5982 }
5983 vm_map_unlock(dst_map);
5984 } else {
5985 /*
5986 * Performance gain:
5987 *
5988 * if the copy and dst address are misaligned but the same
5989 * offset within the page we can copy_not_aligned the
5990 * misaligned parts and copy aligned the rest. If they are
5991 * aligned but len is unaligned we simply need to copy
5992 * the end bit unaligned. We'll need to split the misaligned
5993 * bits of the region in this case !
5994 */
5995 /* ALWAYS UNLOCKS THE dst_map MAP */
5996 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
5997 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5998 if(next_copy != NULL) {
5999 copy->cpy_hdr.nentries +=
6000 remaining_entries;
6001 copy->cpy_hdr.links.prev->vme_next =
6002 next_copy;
6003 copy->cpy_hdr.links.prev =
6004 previous_prev;
6005 copy->size += copy_size;
6006 }
6007 return kr;
6008 }
6009 }
6010 total_size -= copy_size;
6011 if(total_size == 0)
6012 break;
6013 base_addr += copy_size;
6014 copy_size = 0;
6015 copy->offset = new_offset;
6016 if(next_copy != NULL) {
6017 copy->cpy_hdr.nentries = remaining_entries;
6018 copy->cpy_hdr.links.next = next_copy;
6019 copy->cpy_hdr.links.prev = previous_prev;
6020 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6021 copy->size = total_size;
6022 }
6023 vm_map_lock(dst_map);
6024 while(TRUE) {
6025 if (!vm_map_lookup_entry(dst_map,
6026 base_addr, &tmp_entry)) {
6027 vm_map_unlock(dst_map);
6028 return(KERN_INVALID_ADDRESS);
6029 }
6030 if (tmp_entry->in_transition) {
6031 entry->needs_wakeup = TRUE;
6032 vm_map_entry_wait(dst_map, THREAD_UNINT);
6033 } else {
6034 break;
6035 }
6036 }
6037 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6038
6039 entry = tmp_entry;
6040 } /* while */
6041
6042 /*
6043 * Throw away the vm_map_copy object
6044 */
6045 if (discard_on_success)
6046 vm_map_copy_discard(copy);
6047
6048 return(KERN_SUCCESS);
6049 }/* vm_map_copy_overwrite */
6050
6051 kern_return_t
6052 vm_map_copy_overwrite(
6053 vm_map_t dst_map,
6054 vm_map_offset_t dst_addr,
6055 vm_map_copy_t copy,
6056 boolean_t interruptible)
6057 {
6058 vm_map_size_t head_size, tail_size;
6059 vm_map_copy_t head_copy, tail_copy;
6060 vm_map_offset_t head_addr, tail_addr;
6061 vm_map_entry_t entry;
6062 kern_return_t kr;
6063
6064 head_size = 0;
6065 tail_size = 0;
6066 head_copy = NULL;
6067 tail_copy = NULL;
6068 head_addr = 0;
6069 tail_addr = 0;
6070
6071 if (interruptible ||
6072 copy == VM_MAP_COPY_NULL ||
6073 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6074 /*
6075 * We can't split the "copy" map if we're interruptible
6076 * or if we don't have a "copy" map...
6077 */
6078 blunt_copy:
6079 return vm_map_copy_overwrite_nested(dst_map,
6080 dst_addr,
6081 copy,
6082 interruptible,
6083 (pmap_t) NULL,
6084 TRUE);
6085 }
6086
6087 if (copy->size < 3 * PAGE_SIZE) {
6088 /*
6089 * Too small to bother with optimizing...
6090 */
6091 goto blunt_copy;
6092 }
6093
6094 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6095 /*
6096 * Incompatible mis-alignment of source and destination...
6097 */
6098 goto blunt_copy;
6099 }
6100
6101 /*
6102 * Proper alignment or identical mis-alignment at the beginning.
6103 * Let's try and do a small unaligned copy first (if needed)
6104 * and then an aligned copy for the rest.
6105 */
6106 if (!page_aligned(dst_addr)) {
6107 head_addr = dst_addr;
6108 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6109 }
6110 if (!page_aligned(copy->offset + copy->size)) {
6111 /*
6112 * Mis-alignment at the end.
6113 * Do an aligned copy up to the last page and
6114 * then an unaligned copy for the remaining bytes.
6115 */
6116 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6117 tail_addr = dst_addr + copy->size - tail_size;
6118 }
6119
6120 if (head_size + tail_size == copy->size) {
6121 /*
6122 * It's all unaligned, no optimization possible...
6123 */
6124 goto blunt_copy;
6125 }
6126
6127 /*
6128 * Can't optimize if there are any submaps in the
6129 * destination due to the way we free the "copy" map
6130 * progressively in vm_map_copy_overwrite_nested()
6131 * in that case.
6132 */
6133 vm_map_lock_read(dst_map);
6134 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6135 vm_map_unlock_read(dst_map);
6136 goto blunt_copy;
6137 }
6138 for (;
6139 (entry != vm_map_copy_to_entry(copy) &&
6140 entry->vme_start < dst_addr + copy->size);
6141 entry = entry->vme_next) {
6142 if (entry->is_sub_map) {
6143 vm_map_unlock_read(dst_map);
6144 goto blunt_copy;
6145 }
6146 }
6147 vm_map_unlock_read(dst_map);
6148
6149 if (head_size) {
6150 /*
6151 * Unaligned copy of the first "head_size" bytes, to reach
6152 * a page boundary.
6153 */
6154
6155 /*
6156 * Extract "head_copy" out of "copy".
6157 */
6158 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6159 vm_map_copy_first_entry(head_copy) =
6160 vm_map_copy_to_entry(head_copy);
6161 vm_map_copy_last_entry(head_copy) =
6162 vm_map_copy_to_entry(head_copy);
6163 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6164 head_copy->cpy_hdr.nentries = 0;
6165 head_copy->cpy_hdr.entries_pageable =
6166 copy->cpy_hdr.entries_pageable;
6167 vm_map_store_init(&head_copy->cpy_hdr);
6168
6169 head_copy->offset = copy->offset;
6170 head_copy->size = head_size;
6171
6172 copy->offset += head_size;
6173 copy->size -= head_size;
6174
6175 entry = vm_map_copy_first_entry(copy);
6176 vm_map_copy_clip_end(copy, entry, copy->offset);
6177 vm_map_copy_entry_unlink(copy, entry);
6178 vm_map_copy_entry_link(head_copy,
6179 vm_map_copy_to_entry(head_copy),
6180 entry);
6181
6182 /*
6183 * Do the unaligned copy.
6184 */
6185 kr = vm_map_copy_overwrite_nested(dst_map,
6186 head_addr,
6187 head_copy,
6188 interruptible,
6189 (pmap_t) NULL,
6190 FALSE);
6191 if (kr != KERN_SUCCESS)
6192 goto done;
6193 }
6194
6195 if (tail_size) {
6196 /*
6197 * Extract "tail_copy" out of "copy".
6198 */
6199 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6200 vm_map_copy_first_entry(tail_copy) =
6201 vm_map_copy_to_entry(tail_copy);
6202 vm_map_copy_last_entry(tail_copy) =
6203 vm_map_copy_to_entry(tail_copy);
6204 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6205 tail_copy->cpy_hdr.nentries = 0;
6206 tail_copy->cpy_hdr.entries_pageable =
6207 copy->cpy_hdr.entries_pageable;
6208 vm_map_store_init(&tail_copy->cpy_hdr);
6209
6210 tail_copy->offset = copy->offset + copy->size - tail_size;
6211 tail_copy->size = tail_size;
6212
6213 copy->size -= tail_size;
6214
6215 entry = vm_map_copy_last_entry(copy);
6216 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6217 entry = vm_map_copy_last_entry(copy);
6218 vm_map_copy_entry_unlink(copy, entry);
6219 vm_map_copy_entry_link(tail_copy,
6220 vm_map_copy_last_entry(tail_copy),
6221 entry);
6222 }
6223
6224 /*
6225 * Copy most (or possibly all) of the data.
6226 */
6227 kr = vm_map_copy_overwrite_nested(dst_map,
6228 dst_addr + head_size,
6229 copy,
6230 interruptible,
6231 (pmap_t) NULL,
6232 FALSE);
6233 if (kr != KERN_SUCCESS) {
6234 goto done;
6235 }
6236
6237 if (tail_size) {
6238 kr = vm_map_copy_overwrite_nested(dst_map,
6239 tail_addr,
6240 tail_copy,
6241 interruptible,
6242 (pmap_t) NULL,
6243 FALSE);
6244 }
6245
6246 done:
6247 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6248 if (kr == KERN_SUCCESS) {
6249 /*
6250 * Discard all the copy maps.
6251 */
6252 if (head_copy) {
6253 vm_map_copy_discard(head_copy);
6254 head_copy = NULL;
6255 }
6256 vm_map_copy_discard(copy);
6257 if (tail_copy) {
6258 vm_map_copy_discard(tail_copy);
6259 tail_copy = NULL;
6260 }
6261 } else {
6262 /*
6263 * Re-assemble the original copy map.
6264 */
6265 if (head_copy) {
6266 entry = vm_map_copy_first_entry(head_copy);
6267 vm_map_copy_entry_unlink(head_copy, entry);
6268 vm_map_copy_entry_link(copy,
6269 vm_map_copy_to_entry(copy),
6270 entry);
6271 copy->offset -= head_size;
6272 copy->size += head_size;
6273 vm_map_copy_discard(head_copy);
6274 head_copy = NULL;
6275 }
6276 if (tail_copy) {
6277 entry = vm_map_copy_last_entry(tail_copy);
6278 vm_map_copy_entry_unlink(tail_copy, entry);
6279 vm_map_copy_entry_link(copy,
6280 vm_map_copy_last_entry(copy),
6281 entry);
6282 copy->size += tail_size;
6283 vm_map_copy_discard(tail_copy);
6284 tail_copy = NULL;
6285 }
6286 }
6287 return kr;
6288 }
6289
6290
6291 /*
6292 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6293 *
6294 * Decription:
6295 * Physically copy unaligned data
6296 *
6297 * Implementation:
6298 * Unaligned parts of pages have to be physically copied. We use
6299 * a modified form of vm_fault_copy (which understands none-aligned
6300 * page offsets and sizes) to do the copy. We attempt to copy as
6301 * much memory in one go as possibly, however vm_fault_copy copies
6302 * within 1 memory object so we have to find the smaller of "amount left"
6303 * "source object data size" and "target object data size". With
6304 * unaligned data we don't need to split regions, therefore the source
6305 * (copy) object should be one map entry, the target range may be split
6306 * over multiple map entries however. In any event we are pessimistic
6307 * about these assumptions.
6308 *
6309 * Assumptions:
6310 * dst_map is locked on entry and is return locked on success,
6311 * unlocked on error.
6312 */
6313
6314 static kern_return_t
6315 vm_map_copy_overwrite_unaligned(
6316 vm_map_t dst_map,
6317 vm_map_entry_t entry,
6318 vm_map_copy_t copy,
6319 vm_map_offset_t start)
6320 {
6321 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6322 vm_map_version_t version;
6323 vm_object_t dst_object;
6324 vm_object_offset_t dst_offset;
6325 vm_object_offset_t src_offset;
6326 vm_object_offset_t entry_offset;
6327 vm_map_offset_t entry_end;
6328 vm_map_size_t src_size,
6329 dst_size,
6330 copy_size,
6331 amount_left;
6332 kern_return_t kr = KERN_SUCCESS;
6333
6334 vm_map_lock_write_to_read(dst_map);
6335
6336 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6337 amount_left = copy->size;
6338 /*
6339 * unaligned so we never clipped this entry, we need the offset into
6340 * the vm_object not just the data.
6341 */
6342 while (amount_left > 0) {
6343
6344 if (entry == vm_map_to_entry(dst_map)) {
6345 vm_map_unlock_read(dst_map);
6346 return KERN_INVALID_ADDRESS;
6347 }
6348
6349 /* "start" must be within the current map entry */
6350 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6351
6352 dst_offset = start - entry->vme_start;
6353
6354 dst_size = entry->vme_end - start;
6355
6356 src_size = copy_entry->vme_end -
6357 (copy_entry->vme_start + src_offset);
6358
6359 if (dst_size < src_size) {
6360 /*
6361 * we can only copy dst_size bytes before
6362 * we have to get the next destination entry
6363 */
6364 copy_size = dst_size;
6365 } else {
6366 /*
6367 * we can only copy src_size bytes before
6368 * we have to get the next source copy entry
6369 */
6370 copy_size = src_size;
6371 }
6372
6373 if (copy_size > amount_left) {
6374 copy_size = amount_left;
6375 }
6376 /*
6377 * Entry needs copy, create a shadow shadow object for
6378 * Copy on write region.
6379 */
6380 if (entry->needs_copy &&
6381 ((entry->protection & VM_PROT_WRITE) != 0))
6382 {
6383 if (vm_map_lock_read_to_write(dst_map)) {
6384 vm_map_lock_read(dst_map);
6385 goto RetryLookup;
6386 }
6387 vm_object_shadow(&entry->object.vm_object,
6388 &entry->offset,
6389 (vm_map_size_t)(entry->vme_end
6390 - entry->vme_start));
6391 entry->needs_copy = FALSE;
6392 vm_map_lock_write_to_read(dst_map);
6393 }
6394 dst_object = entry->object.vm_object;
6395 /*
6396 * unlike with the virtual (aligned) copy we're going
6397 * to fault on it therefore we need a target object.
6398 */
6399 if (dst_object == VM_OBJECT_NULL) {
6400 if (vm_map_lock_read_to_write(dst_map)) {
6401 vm_map_lock_read(dst_map);
6402 goto RetryLookup;
6403 }
6404 dst_object = vm_object_allocate((vm_map_size_t)
6405 entry->vme_end - entry->vme_start);
6406 entry->object.vm_object = dst_object;
6407 entry->offset = 0;
6408 vm_map_lock_write_to_read(dst_map);
6409 }
6410 /*
6411 * Take an object reference and unlock map. The "entry" may
6412 * disappear or change when the map is unlocked.
6413 */
6414 vm_object_reference(dst_object);
6415 version.main_timestamp = dst_map->timestamp;
6416 entry_offset = entry->offset;
6417 entry_end = entry->vme_end;
6418 vm_map_unlock_read(dst_map);
6419 /*
6420 * Copy as much as possible in one pass
6421 */
6422 kr = vm_fault_copy(
6423 copy_entry->object.vm_object,
6424 copy_entry->offset + src_offset,
6425 &copy_size,
6426 dst_object,
6427 entry_offset + dst_offset,
6428 dst_map,
6429 &version,
6430 THREAD_UNINT );
6431
6432 start += copy_size;
6433 src_offset += copy_size;
6434 amount_left -= copy_size;
6435 /*
6436 * Release the object reference
6437 */
6438 vm_object_deallocate(dst_object);
6439 /*
6440 * If a hard error occurred, return it now
6441 */
6442 if (kr != KERN_SUCCESS)
6443 return kr;
6444
6445 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6446 || amount_left == 0)
6447 {
6448 /*
6449 * all done with this copy entry, dispose.
6450 */
6451 vm_map_copy_entry_unlink(copy, copy_entry);
6452 vm_object_deallocate(copy_entry->object.vm_object);
6453 vm_map_copy_entry_dispose(copy, copy_entry);
6454
6455 if ((copy_entry = vm_map_copy_first_entry(copy))
6456 == vm_map_copy_to_entry(copy) && amount_left) {
6457 /*
6458 * not finished copying but run out of source
6459 */
6460 return KERN_INVALID_ADDRESS;
6461 }
6462 src_offset = 0;
6463 }
6464
6465 if (amount_left == 0)
6466 return KERN_SUCCESS;
6467
6468 vm_map_lock_read(dst_map);
6469 if (version.main_timestamp == dst_map->timestamp) {
6470 if (start == entry_end) {
6471 /*
6472 * destination region is split. Use the version
6473 * information to avoid a lookup in the normal
6474 * case.
6475 */
6476 entry = entry->vme_next;
6477 /*
6478 * should be contiguous. Fail if we encounter
6479 * a hole in the destination.
6480 */
6481 if (start != entry->vme_start) {
6482 vm_map_unlock_read(dst_map);
6483 return KERN_INVALID_ADDRESS ;
6484 }
6485 }
6486 } else {
6487 /*
6488 * Map version check failed.
6489 * we must lookup the entry because somebody
6490 * might have changed the map behind our backs.
6491 */
6492 RetryLookup:
6493 if (!vm_map_lookup_entry(dst_map, start, &entry))
6494 {
6495 vm_map_unlock_read(dst_map);
6496 return KERN_INVALID_ADDRESS ;
6497 }
6498 }
6499 }/* while */
6500
6501 return KERN_SUCCESS;
6502 }/* vm_map_copy_overwrite_unaligned */
6503
6504 /*
6505 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6506 *
6507 * Description:
6508 * Does all the vm_trickery possible for whole pages.
6509 *
6510 * Implementation:
6511 *
6512 * If there are no permanent objects in the destination,
6513 * and the source and destination map entry zones match,
6514 * and the destination map entry is not shared,
6515 * then the map entries can be deleted and replaced
6516 * with those from the copy. The following code is the
6517 * basic idea of what to do, but there are lots of annoying
6518 * little details about getting protection and inheritance
6519 * right. Should add protection, inheritance, and sharing checks
6520 * to the above pass and make sure that no wiring is involved.
6521 */
6522
6523 static kern_return_t
6524 vm_map_copy_overwrite_aligned(
6525 vm_map_t dst_map,
6526 vm_map_entry_t tmp_entry,
6527 vm_map_copy_t copy,
6528 vm_map_offset_t start,
6529 __unused pmap_t pmap)
6530 {
6531 vm_object_t object;
6532 vm_map_entry_t copy_entry;
6533 vm_map_size_t copy_size;
6534 vm_map_size_t size;
6535 vm_map_entry_t entry;
6536
6537 while ((copy_entry = vm_map_copy_first_entry(copy))
6538 != vm_map_copy_to_entry(copy))
6539 {
6540 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6541
6542 entry = tmp_entry;
6543 assert(!entry->use_pmap); /* unnested when clipped earlier */
6544 if (entry == vm_map_to_entry(dst_map)) {
6545 vm_map_unlock(dst_map);
6546 return KERN_INVALID_ADDRESS;
6547 }
6548 size = (entry->vme_end - entry->vme_start);
6549 /*
6550 * Make sure that no holes popped up in the
6551 * address map, and that the protection is
6552 * still valid, in case the map was unlocked
6553 * earlier.
6554 */
6555
6556 if ((entry->vme_start != start) || ((entry->is_sub_map)
6557 && !entry->needs_copy)) {
6558 vm_map_unlock(dst_map);
6559 return(KERN_INVALID_ADDRESS);
6560 }
6561 assert(entry != vm_map_to_entry(dst_map));
6562
6563 /*
6564 * Check protection again
6565 */
6566
6567 if ( ! (entry->protection & VM_PROT_WRITE)) {
6568 vm_map_unlock(dst_map);
6569 return(KERN_PROTECTION_FAILURE);
6570 }
6571
6572 /*
6573 * Adjust to source size first
6574 */
6575
6576 if (copy_size < size) {
6577 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6578 size = copy_size;
6579 }
6580
6581 /*
6582 * Adjust to destination size
6583 */
6584
6585 if (size < copy_size) {
6586 vm_map_copy_clip_end(copy, copy_entry,
6587 copy_entry->vme_start + size);
6588 copy_size = size;
6589 }
6590
6591 assert((entry->vme_end - entry->vme_start) == size);
6592 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6593 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6594
6595 /*
6596 * If the destination contains temporary unshared memory,
6597 * we can perform the copy by throwing it away and
6598 * installing the source data.
6599 */
6600
6601 object = entry->object.vm_object;
6602 if ((!entry->is_shared &&
6603 ((object == VM_OBJECT_NULL) ||
6604 (object->internal && !object->true_share))) ||
6605 entry->needs_copy) {
6606 vm_object_t old_object = entry->object.vm_object;
6607 vm_object_offset_t old_offset = entry->offset;
6608 vm_object_offset_t offset;
6609
6610 /*
6611 * Ensure that the source and destination aren't
6612 * identical
6613 */
6614 if (old_object == copy_entry->object.vm_object &&
6615 old_offset == copy_entry->offset) {
6616 vm_map_copy_entry_unlink(copy, copy_entry);
6617 vm_map_copy_entry_dispose(copy, copy_entry);
6618
6619 if (old_object != VM_OBJECT_NULL)
6620 vm_object_deallocate(old_object);
6621
6622 start = tmp_entry->vme_end;
6623 tmp_entry = tmp_entry->vme_next;
6624 continue;
6625 }
6626
6627 if (entry->alias >= VM_MEMORY_MALLOC &&
6628 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6629 vm_object_t new_object, new_shadow;
6630
6631 /*
6632 * We're about to map something over a mapping
6633 * established by malloc()...
6634 */
6635 new_object = copy_entry->object.vm_object;
6636 if (new_object != VM_OBJECT_NULL) {
6637 vm_object_lock_shared(new_object);
6638 }
6639 while (new_object != VM_OBJECT_NULL &&
6640 new_object->internal) {
6641 new_shadow = new_object->shadow;
6642 if (new_shadow == VM_OBJECT_NULL) {
6643 break;
6644 }
6645 vm_object_lock_shared(new_shadow);
6646 vm_object_unlock(new_object);
6647 new_object = new_shadow;
6648 }
6649 if (new_object != VM_OBJECT_NULL) {
6650 if (!new_object->internal) {
6651 /*
6652 * The new mapping is backed
6653 * by an external object. We
6654 * don't want malloc'ed memory
6655 * to be replaced with such a
6656 * non-anonymous mapping, so
6657 * let's go off the optimized
6658 * path...
6659 */
6660 vm_object_unlock(new_object);
6661 goto slow_copy;
6662 }
6663 vm_object_unlock(new_object);
6664 }
6665 /*
6666 * The new mapping is still backed by
6667 * anonymous (internal) memory, so it's
6668 * OK to substitute it for the original
6669 * malloc() mapping.
6670 */
6671 }
6672
6673 if (old_object != VM_OBJECT_NULL) {
6674 if(entry->is_sub_map) {
6675 if(entry->use_pmap) {
6676 #ifndef NO_NESTED_PMAP
6677 pmap_unnest(dst_map->pmap,
6678 (addr64_t)entry->vme_start,
6679 entry->vme_end - entry->vme_start);
6680 #endif /* NO_NESTED_PMAP */
6681 if(dst_map->mapped) {
6682 /* clean up parent */
6683 /* map/maps */
6684 vm_map_submap_pmap_clean(
6685 dst_map, entry->vme_start,
6686 entry->vme_end,
6687 entry->object.sub_map,
6688 entry->offset);
6689 }
6690 } else {
6691 vm_map_submap_pmap_clean(
6692 dst_map, entry->vme_start,
6693 entry->vme_end,
6694 entry->object.sub_map,
6695 entry->offset);
6696 }
6697 vm_map_deallocate(
6698 entry->object.sub_map);
6699 } else {
6700 if(dst_map->mapped) {
6701 vm_object_pmap_protect(
6702 entry->object.vm_object,
6703 entry->offset,
6704 entry->vme_end
6705 - entry->vme_start,
6706 PMAP_NULL,
6707 entry->vme_start,
6708 VM_PROT_NONE);
6709 } else {
6710 pmap_remove(dst_map->pmap,
6711 (addr64_t)(entry->vme_start),
6712 (addr64_t)(entry->vme_end));
6713 }
6714 vm_object_deallocate(old_object);
6715 }
6716 }
6717
6718 entry->is_sub_map = FALSE;
6719 entry->object = copy_entry->object;
6720 object = entry->object.vm_object;
6721 entry->needs_copy = copy_entry->needs_copy;
6722 entry->wired_count = 0;
6723 entry->user_wired_count = 0;
6724 offset = entry->offset = copy_entry->offset;
6725
6726 vm_map_copy_entry_unlink(copy, copy_entry);
6727 vm_map_copy_entry_dispose(copy, copy_entry);
6728
6729 /*
6730 * we could try to push pages into the pmap at this point, BUT
6731 * this optimization only saved on average 2 us per page if ALL
6732 * the pages in the source were currently mapped
6733 * and ALL the pages in the dest were touched, if there were fewer
6734 * than 2/3 of the pages touched, this optimization actually cost more cycles
6735 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6736 */
6737
6738 /*
6739 * Set up for the next iteration. The map
6740 * has not been unlocked, so the next
6741 * address should be at the end of this
6742 * entry, and the next map entry should be
6743 * the one following it.
6744 */
6745
6746 start = tmp_entry->vme_end;
6747 tmp_entry = tmp_entry->vme_next;
6748 } else {
6749 vm_map_version_t version;
6750 vm_object_t dst_object;
6751 vm_object_offset_t dst_offset;
6752 kern_return_t r;
6753
6754 slow_copy:
6755 dst_object = entry->object.vm_object;
6756 dst_offset = entry->offset;
6757
6758 /*
6759 * Take an object reference, and record
6760 * the map version information so that the
6761 * map can be safely unlocked.
6762 */
6763
6764 if (dst_object == VM_OBJECT_NULL) {
6765 /*
6766 * We would usually have just taken the
6767 * optimized path above if the destination
6768 * object has not been allocated yet. But we
6769 * now disable that optimization if the copy
6770 * entry's object is not backed by anonymous
6771 * memory to avoid replacing malloc'ed
6772 * (i.e. re-usable) anonymous memory with a
6773 * not-so-anonymous mapping.
6774 * So we have to handle this case here and
6775 * allocate a new VM object for this map entry.
6776 */
6777 dst_object = vm_object_allocate(
6778 entry->vme_end - entry->vme_start);
6779 dst_offset = 0;
6780 entry->object.vm_object = dst_object;
6781 entry->offset = dst_offset;
6782
6783 }
6784
6785 vm_object_reference(dst_object);
6786
6787 /* account for unlock bumping up timestamp */
6788 version.main_timestamp = dst_map->timestamp + 1;
6789
6790 vm_map_unlock(dst_map);
6791
6792 /*
6793 * Copy as much as possible in one pass
6794 */
6795
6796 copy_size = size;
6797 r = vm_fault_copy(
6798 copy_entry->object.vm_object,
6799 copy_entry->offset,
6800 &copy_size,
6801 dst_object,
6802 dst_offset,
6803 dst_map,
6804 &version,
6805 THREAD_UNINT );
6806
6807 /*
6808 * Release the object reference
6809 */
6810
6811 vm_object_deallocate(dst_object);
6812
6813 /*
6814 * If a hard error occurred, return it now
6815 */
6816
6817 if (r != KERN_SUCCESS)
6818 return(r);
6819
6820 if (copy_size != 0) {
6821 /*
6822 * Dispose of the copied region
6823 */
6824
6825 vm_map_copy_clip_end(copy, copy_entry,
6826 copy_entry->vme_start + copy_size);
6827 vm_map_copy_entry_unlink(copy, copy_entry);
6828 vm_object_deallocate(copy_entry->object.vm_object);
6829 vm_map_copy_entry_dispose(copy, copy_entry);
6830 }
6831
6832 /*
6833 * Pick up in the destination map where we left off.
6834 *
6835 * Use the version information to avoid a lookup
6836 * in the normal case.
6837 */
6838
6839 start += copy_size;
6840 vm_map_lock(dst_map);
6841 if (version.main_timestamp == dst_map->timestamp) {
6842 /* We can safely use saved tmp_entry value */
6843
6844 vm_map_clip_end(dst_map, tmp_entry, start);
6845 tmp_entry = tmp_entry->vme_next;
6846 } else {
6847 /* Must do lookup of tmp_entry */
6848
6849 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6850 vm_map_unlock(dst_map);
6851 return(KERN_INVALID_ADDRESS);
6852 }
6853 vm_map_clip_start(dst_map, tmp_entry, start);
6854 }
6855 }
6856 }/* while */
6857
6858 return(KERN_SUCCESS);
6859 }/* vm_map_copy_overwrite_aligned */
6860
6861 /*
6862 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6863 *
6864 * Description:
6865 * Copy in data to a kernel buffer from space in the
6866 * source map. The original space may be optionally
6867 * deallocated.
6868 *
6869 * If successful, returns a new copy object.
6870 */
6871 static kern_return_t
6872 vm_map_copyin_kernel_buffer(
6873 vm_map_t src_map,
6874 vm_map_offset_t src_addr,
6875 vm_map_size_t len,
6876 boolean_t src_destroy,
6877 vm_map_copy_t *copy_result)
6878 {
6879 kern_return_t kr;
6880 vm_map_copy_t copy;
6881 vm_size_t kalloc_size;
6882
6883 if ((vm_size_t) len != len) {
6884 /* "len" is too big and doesn't fit in a "vm_size_t" */
6885 return KERN_RESOURCE_SHORTAGE;
6886 }
6887 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6888 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6889
6890 copy = (vm_map_copy_t) kalloc(kalloc_size);
6891 if (copy == VM_MAP_COPY_NULL) {
6892 return KERN_RESOURCE_SHORTAGE;
6893 }
6894 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6895 copy->size = len;
6896 copy->offset = 0;
6897 copy->cpy_kdata = (void *) (copy + 1);
6898 copy->cpy_kalloc_size = kalloc_size;
6899
6900 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6901 if (kr != KERN_SUCCESS) {
6902 kfree(copy, kalloc_size);
6903 return kr;
6904 }
6905 if (src_destroy) {
6906 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6907 vm_map_round_page(src_addr + len),
6908 VM_MAP_REMOVE_INTERRUPTIBLE |
6909 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6910 (src_map == kernel_map) ?
6911 VM_MAP_REMOVE_KUNWIRE : 0);
6912 }
6913 *copy_result = copy;
6914 return KERN_SUCCESS;
6915 }
6916
6917 /*
6918 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6919 *
6920 * Description:
6921 * Copy out data from a kernel buffer into space in the
6922 * destination map. The space may be otpionally dynamically
6923 * allocated.
6924 *
6925 * If successful, consumes the copy object.
6926 * Otherwise, the caller is responsible for it.
6927 */
6928 static int vm_map_copyout_kernel_buffer_failures = 0;
6929 static kern_return_t
6930 vm_map_copyout_kernel_buffer(
6931 vm_map_t map,
6932 vm_map_address_t *addr, /* IN/OUT */
6933 vm_map_copy_t copy,
6934 boolean_t overwrite)
6935 {
6936 kern_return_t kr = KERN_SUCCESS;
6937 thread_t thread = current_thread();
6938
6939 if (!overwrite) {
6940
6941 /*
6942 * Allocate space in the target map for the data
6943 */
6944 *addr = 0;
6945 kr = vm_map_enter(map,
6946 addr,
6947 vm_map_round_page(copy->size),
6948 (vm_map_offset_t) 0,
6949 VM_FLAGS_ANYWHERE,
6950 VM_OBJECT_NULL,
6951 (vm_object_offset_t) 0,
6952 FALSE,
6953 VM_PROT_DEFAULT,
6954 VM_PROT_ALL,
6955 VM_INHERIT_DEFAULT);
6956 if (kr != KERN_SUCCESS)
6957 return kr;
6958 }
6959
6960 /*
6961 * Copyout the data from the kernel buffer to the target map.
6962 */
6963 if (thread->map == map) {
6964
6965 /*
6966 * If the target map is the current map, just do
6967 * the copy.
6968 */
6969 assert((vm_size_t) copy->size == copy->size);
6970 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6971 kr = KERN_INVALID_ADDRESS;
6972 }
6973 }
6974 else {
6975 vm_map_t oldmap;
6976
6977 /*
6978 * If the target map is another map, assume the
6979 * target's address space identity for the duration
6980 * of the copy.
6981 */
6982 vm_map_reference(map);
6983 oldmap = vm_map_switch(map);
6984
6985 assert((vm_size_t) copy->size == copy->size);
6986 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6987 vm_map_copyout_kernel_buffer_failures++;
6988 kr = KERN_INVALID_ADDRESS;
6989 }
6990
6991 (void) vm_map_switch(oldmap);
6992 vm_map_deallocate(map);
6993 }
6994
6995 if (kr != KERN_SUCCESS) {
6996 /* the copy failed, clean up */
6997 if (!overwrite) {
6998 /*
6999 * Deallocate the space we allocated in the target map.
7000 */
7001 (void) vm_map_remove(map,
7002 vm_map_trunc_page(*addr),
7003 vm_map_round_page(*addr +
7004 vm_map_round_page(copy->size)),
7005 VM_MAP_NO_FLAGS);
7006 *addr = 0;
7007 }
7008 } else {
7009 /* copy was successful, dicard the copy structure */
7010 kfree(copy, copy->cpy_kalloc_size);
7011 }
7012
7013 return kr;
7014 }
7015
7016 /*
7017 * Macro: vm_map_copy_insert
7018 *
7019 * Description:
7020 * Link a copy chain ("copy") into a map at the
7021 * specified location (after "where").
7022 * Side effects:
7023 * The copy chain is destroyed.
7024 * Warning:
7025 * The arguments are evaluated multiple times.
7026 */
7027 #define vm_map_copy_insert(map, where, copy) \
7028 MACRO_BEGIN \
7029 vm_map_store_copy_insert(map, where, copy); \
7030 zfree(vm_map_copy_zone, copy); \
7031 MACRO_END
7032
7033 /*
7034 * Routine: vm_map_copyout
7035 *
7036 * Description:
7037 * Copy out a copy chain ("copy") into newly-allocated
7038 * space in the destination map.
7039 *
7040 * If successful, consumes the copy object.
7041 * Otherwise, the caller is responsible for it.
7042 */
7043 kern_return_t
7044 vm_map_copyout(
7045 vm_map_t dst_map,
7046 vm_map_address_t *dst_addr, /* OUT */
7047 vm_map_copy_t copy)
7048 {
7049 vm_map_size_t size;
7050 vm_map_size_t adjustment;
7051 vm_map_offset_t start;
7052 vm_object_offset_t vm_copy_start;
7053 vm_map_entry_t last;
7054 register
7055 vm_map_entry_t entry;
7056
7057 /*
7058 * Check for null copy object.
7059 */
7060
7061 if (copy == VM_MAP_COPY_NULL) {
7062 *dst_addr = 0;
7063 return(KERN_SUCCESS);
7064 }
7065
7066 /*
7067 * Check for special copy object, created
7068 * by vm_map_copyin_object.
7069 */
7070
7071 if (copy->type == VM_MAP_COPY_OBJECT) {
7072 vm_object_t object = copy->cpy_object;
7073 kern_return_t kr;
7074 vm_object_offset_t offset;
7075
7076 offset = vm_object_trunc_page(copy->offset);
7077 size = vm_map_round_page(copy->size +
7078 (vm_map_size_t)(copy->offset - offset));
7079 *dst_addr = 0;
7080 kr = vm_map_enter(dst_map, dst_addr, size,
7081 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7082 object, offset, FALSE,
7083 VM_PROT_DEFAULT, VM_PROT_ALL,
7084 VM_INHERIT_DEFAULT);
7085 if (kr != KERN_SUCCESS)
7086 return(kr);
7087 /* Account for non-pagealigned copy object */
7088 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7089 zfree(vm_map_copy_zone, copy);
7090 return(KERN_SUCCESS);
7091 }
7092
7093 /*
7094 * Check for special kernel buffer allocated
7095 * by new_ipc_kmsg_copyin.
7096 */
7097
7098 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7099 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7100 copy, FALSE));
7101 }
7102
7103 /*
7104 * Find space for the data
7105 */
7106
7107 vm_copy_start = vm_object_trunc_page(copy->offset);
7108 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7109 - vm_copy_start;
7110
7111 StartAgain: ;
7112
7113 vm_map_lock(dst_map);
7114 if( dst_map->disable_vmentry_reuse == TRUE) {
7115 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7116 last = entry;
7117 } else {
7118 assert(first_free_is_valid(dst_map));
7119 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7120 vm_map_min(dst_map) : last->vme_end;
7121 }
7122
7123 while (TRUE) {
7124 vm_map_entry_t next = last->vme_next;
7125 vm_map_offset_t end = start + size;
7126
7127 if ((end > dst_map->max_offset) || (end < start)) {
7128 if (dst_map->wait_for_space) {
7129 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7130 assert_wait((event_t) dst_map,
7131 THREAD_INTERRUPTIBLE);
7132 vm_map_unlock(dst_map);
7133 thread_block(THREAD_CONTINUE_NULL);
7134 goto StartAgain;
7135 }
7136 }
7137 vm_map_unlock(dst_map);
7138 return(KERN_NO_SPACE);
7139 }
7140
7141 if ((next == vm_map_to_entry(dst_map)) ||
7142 (next->vme_start >= end))
7143 break;
7144
7145 last = next;
7146 start = last->vme_end;
7147 }
7148
7149 /*
7150 * Since we're going to just drop the map
7151 * entries from the copy into the destination
7152 * map, they must come from the same pool.
7153 */
7154
7155 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7156 /*
7157 * Mismatches occur when dealing with the default
7158 * pager.
7159 */
7160 zone_t old_zone;
7161 vm_map_entry_t next, new;
7162
7163 /*
7164 * Find the zone that the copies were allocated from
7165 */
7166 old_zone = (copy->cpy_hdr.entries_pageable)
7167 ? vm_map_entry_zone
7168 : vm_map_kentry_zone;
7169 entry = vm_map_copy_first_entry(copy);
7170
7171 /*
7172 * Reinitialize the copy so that vm_map_copy_entry_link
7173 * will work.
7174 */
7175 vm_map_store_copy_reset(copy, entry);
7176 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7177
7178 /*
7179 * Copy each entry.
7180 */
7181 while (entry != vm_map_copy_to_entry(copy)) {
7182 new = vm_map_copy_entry_create(copy);
7183 vm_map_entry_copy_full(new, entry);
7184 new->use_pmap = FALSE; /* clr address space specifics */
7185 vm_map_copy_entry_link(copy,
7186 vm_map_copy_last_entry(copy),
7187 new);
7188 next = entry->vme_next;
7189 zfree(old_zone, entry);
7190 entry = next;
7191 }
7192 }
7193
7194 /*
7195 * Adjust the addresses in the copy chain, and
7196 * reset the region attributes.
7197 */
7198
7199 adjustment = start - vm_copy_start;
7200 for (entry = vm_map_copy_first_entry(copy);
7201 entry != vm_map_copy_to_entry(copy);
7202 entry = entry->vme_next) {
7203 entry->vme_start += adjustment;
7204 entry->vme_end += adjustment;
7205
7206 entry->inheritance = VM_INHERIT_DEFAULT;
7207 entry->protection = VM_PROT_DEFAULT;
7208 entry->max_protection = VM_PROT_ALL;
7209 entry->behavior = VM_BEHAVIOR_DEFAULT;
7210
7211 /*
7212 * If the entry is now wired,
7213 * map the pages into the destination map.
7214 */
7215 if (entry->wired_count != 0) {
7216 register vm_map_offset_t va;
7217 vm_object_offset_t offset;
7218 register vm_object_t object;
7219 vm_prot_t prot;
7220 int type_of_fault;
7221
7222 object = entry->object.vm_object;
7223 offset = entry->offset;
7224 va = entry->vme_start;
7225
7226 pmap_pageable(dst_map->pmap,
7227 entry->vme_start,
7228 entry->vme_end,
7229 TRUE);
7230
7231 while (va < entry->vme_end) {
7232 register vm_page_t m;
7233
7234 /*
7235 * Look up the page in the object.
7236 * Assert that the page will be found in the
7237 * top object:
7238 * either
7239 * the object was newly created by
7240 * vm_object_copy_slowly, and has
7241 * copies of all of the pages from
7242 * the source object
7243 * or
7244 * the object was moved from the old
7245 * map entry; because the old map
7246 * entry was wired, all of the pages
7247 * were in the top-level object.
7248 * (XXX not true if we wire pages for
7249 * reading)
7250 */
7251 vm_object_lock(object);
7252
7253 m = vm_page_lookup(object, offset);
7254 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7255 m->absent)
7256 panic("vm_map_copyout: wiring %p", m);
7257
7258 /*
7259 * ENCRYPTED SWAP:
7260 * The page is assumed to be wired here, so it
7261 * shouldn't be encrypted. Otherwise, we
7262 * couldn't enter it in the page table, since
7263 * we don't want the user to see the encrypted
7264 * data.
7265 */
7266 ASSERT_PAGE_DECRYPTED(m);
7267
7268 prot = entry->protection;
7269
7270 if (override_nx(dst_map, entry->alias) && prot)
7271 prot |= VM_PROT_EXECUTE;
7272
7273 type_of_fault = DBG_CACHE_HIT_FAULT;
7274
7275 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7276 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7277 &type_of_fault);
7278
7279 vm_object_unlock(object);
7280
7281 offset += PAGE_SIZE_64;
7282 va += PAGE_SIZE;
7283 }
7284 }
7285 }
7286
7287 /*
7288 * Correct the page alignment for the result
7289 */
7290
7291 *dst_addr = start + (copy->offset - vm_copy_start);
7292
7293 /*
7294 * Update the hints and the map size
7295 */
7296
7297 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7298
7299 dst_map->size += size;
7300
7301 /*
7302 * Link in the copy
7303 */
7304
7305 vm_map_copy_insert(dst_map, last, copy);
7306
7307 vm_map_unlock(dst_map);
7308
7309 /*
7310 * XXX If wiring_required, call vm_map_pageable
7311 */
7312
7313 return(KERN_SUCCESS);
7314 }
7315
7316 /*
7317 * Routine: vm_map_copyin
7318 *
7319 * Description:
7320 * see vm_map_copyin_common. Exported via Unsupported.exports.
7321 *
7322 */
7323
7324 #undef vm_map_copyin
7325
7326 kern_return_t
7327 vm_map_copyin(
7328 vm_map_t src_map,
7329 vm_map_address_t src_addr,
7330 vm_map_size_t len,
7331 boolean_t src_destroy,
7332 vm_map_copy_t *copy_result) /* OUT */
7333 {
7334 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7335 FALSE, copy_result, FALSE));
7336 }
7337
7338 /*
7339 * Routine: vm_map_copyin_common
7340 *
7341 * Description:
7342 * Copy the specified region (src_addr, len) from the
7343 * source address space (src_map), possibly removing
7344 * the region from the source address space (src_destroy).
7345 *
7346 * Returns:
7347 * A vm_map_copy_t object (copy_result), suitable for
7348 * insertion into another address space (using vm_map_copyout),
7349 * copying over another address space region (using
7350 * vm_map_copy_overwrite). If the copy is unused, it
7351 * should be destroyed (using vm_map_copy_discard).
7352 *
7353 * In/out conditions:
7354 * The source map should not be locked on entry.
7355 */
7356
7357 typedef struct submap_map {
7358 vm_map_t parent_map;
7359 vm_map_offset_t base_start;
7360 vm_map_offset_t base_end;
7361 vm_map_size_t base_len;
7362 struct submap_map *next;
7363 } submap_map_t;
7364
7365 kern_return_t
7366 vm_map_copyin_common(
7367 vm_map_t src_map,
7368 vm_map_address_t src_addr,
7369 vm_map_size_t len,
7370 boolean_t src_destroy,
7371 __unused boolean_t src_volatile,
7372 vm_map_copy_t *copy_result, /* OUT */
7373 boolean_t use_maxprot)
7374 {
7375 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7376 * in multi-level lookup, this
7377 * entry contains the actual
7378 * vm_object/offset.
7379 */
7380 register
7381 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7382
7383 vm_map_offset_t src_start; /* Start of current entry --
7384 * where copy is taking place now
7385 */
7386 vm_map_offset_t src_end; /* End of entire region to be
7387 * copied */
7388 vm_map_offset_t src_base;
7389 vm_map_t base_map = src_map;
7390 boolean_t map_share=FALSE;
7391 submap_map_t *parent_maps = NULL;
7392
7393 register
7394 vm_map_copy_t copy; /* Resulting copy */
7395 vm_map_address_t copy_addr;
7396
7397 /*
7398 * Check for copies of zero bytes.
7399 */
7400
7401 if (len == 0) {
7402 *copy_result = VM_MAP_COPY_NULL;
7403 return(KERN_SUCCESS);
7404 }
7405
7406 /*
7407 * Check that the end address doesn't overflow
7408 */
7409 src_end = src_addr + len;
7410 if (src_end < src_addr)
7411 return KERN_INVALID_ADDRESS;
7412
7413 /*
7414 * If the copy is sufficiently small, use a kernel buffer instead
7415 * of making a virtual copy. The theory being that the cost of
7416 * setting up VM (and taking C-O-W faults) dominates the copy costs
7417 * for small regions.
7418 */
7419 if ((len < msg_ool_size_small) && !use_maxprot)
7420 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7421 src_destroy, copy_result);
7422
7423 /*
7424 * Compute (page aligned) start and end of region
7425 */
7426 src_start = vm_map_trunc_page(src_addr);
7427 src_end = vm_map_round_page(src_end);
7428
7429 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7430
7431 /*
7432 * Allocate a header element for the list.
7433 *
7434 * Use the start and end in the header to
7435 * remember the endpoints prior to rounding.
7436 */
7437
7438 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7439 vm_map_copy_first_entry(copy) =
7440 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7441 copy->type = VM_MAP_COPY_ENTRY_LIST;
7442 copy->cpy_hdr.nentries = 0;
7443 copy->cpy_hdr.entries_pageable = TRUE;
7444
7445 vm_map_store_init( &(copy->cpy_hdr) );
7446
7447 copy->offset = src_addr;
7448 copy->size = len;
7449
7450 new_entry = vm_map_copy_entry_create(copy);
7451
7452 #define RETURN(x) \
7453 MACRO_BEGIN \
7454 vm_map_unlock(src_map); \
7455 if(src_map != base_map) \
7456 vm_map_deallocate(src_map); \
7457 if (new_entry != VM_MAP_ENTRY_NULL) \
7458 vm_map_copy_entry_dispose(copy,new_entry); \
7459 vm_map_copy_discard(copy); \
7460 { \
7461 submap_map_t *_ptr; \
7462 \
7463 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7464 parent_maps=parent_maps->next; \
7465 if (_ptr->parent_map != base_map) \
7466 vm_map_deallocate(_ptr->parent_map); \
7467 kfree(_ptr, sizeof(submap_map_t)); \
7468 } \
7469 } \
7470 MACRO_RETURN(x); \
7471 MACRO_END
7472
7473 /*
7474 * Find the beginning of the region.
7475 */
7476
7477 vm_map_lock(src_map);
7478
7479 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7480 RETURN(KERN_INVALID_ADDRESS);
7481 if(!tmp_entry->is_sub_map) {
7482 vm_map_clip_start(src_map, tmp_entry, src_start);
7483 }
7484 /* set for later submap fix-up */
7485 copy_addr = src_start;
7486
7487 /*
7488 * Go through entries until we get to the end.
7489 */
7490
7491 while (TRUE) {
7492 register
7493 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7494 vm_map_size_t src_size; /* Size of source
7495 * map entry (in both
7496 * maps)
7497 */
7498
7499 register
7500 vm_object_t src_object; /* Object to copy */
7501 vm_object_offset_t src_offset;
7502
7503 boolean_t src_needs_copy; /* Should source map
7504 * be made read-only
7505 * for copy-on-write?
7506 */
7507
7508 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7509
7510 boolean_t was_wired; /* Was source wired? */
7511 vm_map_version_t version; /* Version before locks
7512 * dropped to make copy
7513 */
7514 kern_return_t result; /* Return value from
7515 * copy_strategically.
7516 */
7517 while(tmp_entry->is_sub_map) {
7518 vm_map_size_t submap_len;
7519 submap_map_t *ptr;
7520
7521 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7522 ptr->next = parent_maps;
7523 parent_maps = ptr;
7524 ptr->parent_map = src_map;
7525 ptr->base_start = src_start;
7526 ptr->base_end = src_end;
7527 submap_len = tmp_entry->vme_end - src_start;
7528 if(submap_len > (src_end-src_start))
7529 submap_len = src_end-src_start;
7530 ptr->base_len = submap_len;
7531
7532 src_start -= tmp_entry->vme_start;
7533 src_start += tmp_entry->offset;
7534 src_end = src_start + submap_len;
7535 src_map = tmp_entry->object.sub_map;
7536 vm_map_lock(src_map);
7537 /* keep an outstanding reference for all maps in */
7538 /* the parents tree except the base map */
7539 vm_map_reference(src_map);
7540 vm_map_unlock(ptr->parent_map);
7541 if (!vm_map_lookup_entry(
7542 src_map, src_start, &tmp_entry))
7543 RETURN(KERN_INVALID_ADDRESS);
7544 map_share = TRUE;
7545 if(!tmp_entry->is_sub_map)
7546 vm_map_clip_start(src_map, tmp_entry, src_start);
7547 src_entry = tmp_entry;
7548 }
7549 /* we are now in the lowest level submap... */
7550
7551 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7552 (tmp_entry->object.vm_object->phys_contiguous)) {
7553 /* This is not, supported for now.In future */
7554 /* we will need to detect the phys_contig */
7555 /* condition and then upgrade copy_slowly */
7556 /* to do physical copy from the device mem */
7557 /* based object. We can piggy-back off of */
7558 /* the was wired boolean to set-up the */
7559 /* proper handling */
7560 RETURN(KERN_PROTECTION_FAILURE);
7561 }
7562 /*
7563 * Create a new address map entry to hold the result.
7564 * Fill in the fields from the appropriate source entries.
7565 * We must unlock the source map to do this if we need
7566 * to allocate a map entry.
7567 */
7568 if (new_entry == VM_MAP_ENTRY_NULL) {
7569 version.main_timestamp = src_map->timestamp;
7570 vm_map_unlock(src_map);
7571
7572 new_entry = vm_map_copy_entry_create(copy);
7573
7574 vm_map_lock(src_map);
7575 if ((version.main_timestamp + 1) != src_map->timestamp) {
7576 if (!vm_map_lookup_entry(src_map, src_start,
7577 &tmp_entry)) {
7578 RETURN(KERN_INVALID_ADDRESS);
7579 }
7580 if (!tmp_entry->is_sub_map)
7581 vm_map_clip_start(src_map, tmp_entry, src_start);
7582 continue; /* restart w/ new tmp_entry */
7583 }
7584 }
7585
7586 /*
7587 * Verify that the region can be read.
7588 */
7589 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7590 !use_maxprot) ||
7591 (src_entry->max_protection & VM_PROT_READ) == 0)
7592 RETURN(KERN_PROTECTION_FAILURE);
7593
7594 /*
7595 * Clip against the endpoints of the entire region.
7596 */
7597
7598 vm_map_clip_end(src_map, src_entry, src_end);
7599
7600 src_size = src_entry->vme_end - src_start;
7601 src_object = src_entry->object.vm_object;
7602 src_offset = src_entry->offset;
7603 was_wired = (src_entry->wired_count != 0);
7604
7605 vm_map_entry_copy(new_entry, src_entry);
7606 new_entry->use_pmap = FALSE; /* clr address space specifics */
7607
7608 /*
7609 * Attempt non-blocking copy-on-write optimizations.
7610 */
7611
7612 if (src_destroy &&
7613 (src_object == VM_OBJECT_NULL ||
7614 (src_object->internal && !src_object->true_share
7615 && !map_share))) {
7616 /*
7617 * If we are destroying the source, and the object
7618 * is internal, we can move the object reference
7619 * from the source to the copy. The copy is
7620 * copy-on-write only if the source is.
7621 * We make another reference to the object, because
7622 * destroying the source entry will deallocate it.
7623 */
7624 vm_object_reference(src_object);
7625
7626 /*
7627 * Copy is always unwired. vm_map_copy_entry
7628 * set its wired count to zero.
7629 */
7630
7631 goto CopySuccessful;
7632 }
7633
7634
7635 RestartCopy:
7636 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7637 src_object, new_entry, new_entry->object.vm_object,
7638 was_wired, 0);
7639 if ((src_object == VM_OBJECT_NULL ||
7640 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7641 vm_object_copy_quickly(
7642 &new_entry->object.vm_object,
7643 src_offset,
7644 src_size,
7645 &src_needs_copy,
7646 &new_entry_needs_copy)) {
7647
7648 new_entry->needs_copy = new_entry_needs_copy;
7649
7650 /*
7651 * Handle copy-on-write obligations
7652 */
7653
7654 if (src_needs_copy && !tmp_entry->needs_copy) {
7655 vm_prot_t prot;
7656
7657 prot = src_entry->protection & ~VM_PROT_WRITE;
7658
7659 if (override_nx(src_map, src_entry->alias) && prot)
7660 prot |= VM_PROT_EXECUTE;
7661
7662 vm_object_pmap_protect(
7663 src_object,
7664 src_offset,
7665 src_size,
7666 (src_entry->is_shared ?
7667 PMAP_NULL
7668 : src_map->pmap),
7669 src_entry->vme_start,
7670 prot);
7671
7672 tmp_entry->needs_copy = TRUE;
7673 }
7674
7675 /*
7676 * The map has never been unlocked, so it's safe
7677 * to move to the next entry rather than doing
7678 * another lookup.
7679 */
7680
7681 goto CopySuccessful;
7682 }
7683
7684 /*
7685 * Take an object reference, so that we may
7686 * release the map lock(s).
7687 */
7688
7689 assert(src_object != VM_OBJECT_NULL);
7690 vm_object_reference(src_object);
7691
7692 /*
7693 * Record the timestamp for later verification.
7694 * Unlock the map.
7695 */
7696
7697 version.main_timestamp = src_map->timestamp;
7698 vm_map_unlock(src_map); /* Increments timestamp once! */
7699
7700 /*
7701 * Perform the copy
7702 */
7703
7704 if (was_wired) {
7705 CopySlowly:
7706 vm_object_lock(src_object);
7707 result = vm_object_copy_slowly(
7708 src_object,
7709 src_offset,
7710 src_size,
7711 THREAD_UNINT,
7712 &new_entry->object.vm_object);
7713 new_entry->offset = 0;
7714 new_entry->needs_copy = FALSE;
7715
7716 }
7717 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7718 (tmp_entry->is_shared || map_share)) {
7719 vm_object_t new_object;
7720
7721 vm_object_lock_shared(src_object);
7722 new_object = vm_object_copy_delayed(
7723 src_object,
7724 src_offset,
7725 src_size,
7726 TRUE);
7727 if (new_object == VM_OBJECT_NULL)
7728 goto CopySlowly;
7729
7730 new_entry->object.vm_object = new_object;
7731 new_entry->needs_copy = TRUE;
7732 result = KERN_SUCCESS;
7733
7734 } else {
7735 result = vm_object_copy_strategically(src_object,
7736 src_offset,
7737 src_size,
7738 &new_entry->object.vm_object,
7739 &new_entry->offset,
7740 &new_entry_needs_copy);
7741
7742 new_entry->needs_copy = new_entry_needs_copy;
7743 }
7744
7745 if (result != KERN_SUCCESS &&
7746 result != KERN_MEMORY_RESTART_COPY) {
7747 vm_map_lock(src_map);
7748 RETURN(result);
7749 }
7750
7751 /*
7752 * Throw away the extra reference
7753 */
7754
7755 vm_object_deallocate(src_object);
7756
7757 /*
7758 * Verify that the map has not substantially
7759 * changed while the copy was being made.
7760 */
7761
7762 vm_map_lock(src_map);
7763
7764 if ((version.main_timestamp + 1) == src_map->timestamp)
7765 goto VerificationSuccessful;
7766
7767 /*
7768 * Simple version comparison failed.
7769 *
7770 * Retry the lookup and verify that the
7771 * same object/offset are still present.
7772 *
7773 * [Note: a memory manager that colludes with
7774 * the calling task can detect that we have
7775 * cheated. While the map was unlocked, the
7776 * mapping could have been changed and restored.]
7777 */
7778
7779 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7780 RETURN(KERN_INVALID_ADDRESS);
7781 }
7782
7783 src_entry = tmp_entry;
7784 vm_map_clip_start(src_map, src_entry, src_start);
7785
7786 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7787 !use_maxprot) ||
7788 ((src_entry->max_protection & VM_PROT_READ) == 0))
7789 goto VerificationFailed;
7790
7791 if (src_entry->vme_end < new_entry->vme_end)
7792 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7793
7794 if ((src_entry->object.vm_object != src_object) ||
7795 (src_entry->offset != src_offset) ) {
7796
7797 /*
7798 * Verification failed.
7799 *
7800 * Start over with this top-level entry.
7801 */
7802
7803 VerificationFailed: ;
7804
7805 vm_object_deallocate(new_entry->object.vm_object);
7806 tmp_entry = src_entry;
7807 continue;
7808 }
7809
7810 /*
7811 * Verification succeeded.
7812 */
7813
7814 VerificationSuccessful: ;
7815
7816 if (result == KERN_MEMORY_RESTART_COPY)
7817 goto RestartCopy;
7818
7819 /*
7820 * Copy succeeded.
7821 */
7822
7823 CopySuccessful: ;
7824
7825 /*
7826 * Link in the new copy entry.
7827 */
7828
7829 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7830 new_entry);
7831
7832 /*
7833 * Determine whether the entire region
7834 * has been copied.
7835 */
7836 src_base = src_start;
7837 src_start = new_entry->vme_end;
7838 new_entry = VM_MAP_ENTRY_NULL;
7839 while ((src_start >= src_end) && (src_end != 0)) {
7840 if (src_map != base_map) {
7841 submap_map_t *ptr;
7842
7843 ptr = parent_maps;
7844 assert(ptr != NULL);
7845 parent_maps = parent_maps->next;
7846
7847 /* fix up the damage we did in that submap */
7848 vm_map_simplify_range(src_map,
7849 src_base,
7850 src_end);
7851
7852 vm_map_unlock(src_map);
7853 vm_map_deallocate(src_map);
7854 vm_map_lock(ptr->parent_map);
7855 src_map = ptr->parent_map;
7856 src_base = ptr->base_start;
7857 src_start = ptr->base_start + ptr->base_len;
7858 src_end = ptr->base_end;
7859 if ((src_end > src_start) &&
7860 !vm_map_lookup_entry(
7861 src_map, src_start, &tmp_entry))
7862 RETURN(KERN_INVALID_ADDRESS);
7863 kfree(ptr, sizeof(submap_map_t));
7864 if(parent_maps == NULL)
7865 map_share = FALSE;
7866 src_entry = tmp_entry->vme_prev;
7867 } else
7868 break;
7869 }
7870 if ((src_start >= src_end) && (src_end != 0))
7871 break;
7872
7873 /*
7874 * Verify that there are no gaps in the region
7875 */
7876
7877 tmp_entry = src_entry->vme_next;
7878 if ((tmp_entry->vme_start != src_start) ||
7879 (tmp_entry == vm_map_to_entry(src_map)))
7880 RETURN(KERN_INVALID_ADDRESS);
7881 }
7882
7883 /*
7884 * If the source should be destroyed, do it now, since the
7885 * copy was successful.
7886 */
7887 if (src_destroy) {
7888 (void) vm_map_delete(src_map,
7889 vm_map_trunc_page(src_addr),
7890 src_end,
7891 (src_map == kernel_map) ?
7892 VM_MAP_REMOVE_KUNWIRE :
7893 VM_MAP_NO_FLAGS,
7894 VM_MAP_NULL);
7895 } else {
7896 /* fix up the damage we did in the base map */
7897 vm_map_simplify_range(src_map,
7898 vm_map_trunc_page(src_addr),
7899 vm_map_round_page(src_end));
7900 }
7901
7902 vm_map_unlock(src_map);
7903
7904 /* Fix-up start and end points in copy. This is necessary */
7905 /* when the various entries in the copy object were picked */
7906 /* up from different sub-maps */
7907
7908 tmp_entry = vm_map_copy_first_entry(copy);
7909 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7910 tmp_entry->vme_end = copy_addr +
7911 (tmp_entry->vme_end - tmp_entry->vme_start);
7912 tmp_entry->vme_start = copy_addr;
7913 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7914 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7915 }
7916
7917 *copy_result = copy;
7918 return(KERN_SUCCESS);
7919
7920 #undef RETURN
7921 }
7922
7923 /*
7924 * vm_map_copyin_object:
7925 *
7926 * Create a copy object from an object.
7927 * Our caller donates an object reference.
7928 */
7929
7930 kern_return_t
7931 vm_map_copyin_object(
7932 vm_object_t object,
7933 vm_object_offset_t offset, /* offset of region in object */
7934 vm_object_size_t size, /* size of region in object */
7935 vm_map_copy_t *copy_result) /* OUT */
7936 {
7937 vm_map_copy_t copy; /* Resulting copy */
7938
7939 /*
7940 * We drop the object into a special copy object
7941 * that contains the object directly.
7942 */
7943
7944 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7945 copy->type = VM_MAP_COPY_OBJECT;
7946 copy->cpy_object = object;
7947 copy->offset = offset;
7948 copy->size = size;
7949
7950 *copy_result = copy;
7951 return(KERN_SUCCESS);
7952 }
7953
7954 static void
7955 vm_map_fork_share(
7956 vm_map_t old_map,
7957 vm_map_entry_t old_entry,
7958 vm_map_t new_map)
7959 {
7960 vm_object_t object;
7961 vm_map_entry_t new_entry;
7962
7963 /*
7964 * New sharing code. New map entry
7965 * references original object. Internal
7966 * objects use asynchronous copy algorithm for
7967 * future copies. First make sure we have
7968 * the right object. If we need a shadow,
7969 * or someone else already has one, then
7970 * make a new shadow and share it.
7971 */
7972
7973 object = old_entry->object.vm_object;
7974 if (old_entry->is_sub_map) {
7975 assert(old_entry->wired_count == 0);
7976 #ifndef NO_NESTED_PMAP
7977 if(old_entry->use_pmap) {
7978 kern_return_t result;
7979
7980 result = pmap_nest(new_map->pmap,
7981 (old_entry->object.sub_map)->pmap,
7982 (addr64_t)old_entry->vme_start,
7983 (addr64_t)old_entry->vme_start,
7984 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7985 if(result)
7986 panic("vm_map_fork_share: pmap_nest failed!");
7987 }
7988 #endif /* NO_NESTED_PMAP */
7989 } else if (object == VM_OBJECT_NULL) {
7990 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7991 old_entry->vme_start));
7992 old_entry->offset = 0;
7993 old_entry->object.vm_object = object;
7994 assert(!old_entry->needs_copy);
7995 } else if (object->copy_strategy !=
7996 MEMORY_OBJECT_COPY_SYMMETRIC) {
7997
7998 /*
7999 * We are already using an asymmetric
8000 * copy, and therefore we already have
8001 * the right object.
8002 */
8003
8004 assert(! old_entry->needs_copy);
8005 }
8006 else if (old_entry->needs_copy || /* case 1 */
8007 object->shadowed || /* case 2 */
8008 (!object->true_share && /* case 3 */
8009 !old_entry->is_shared &&
8010 (object->vo_size >
8011 (vm_map_size_t)(old_entry->vme_end -
8012 old_entry->vme_start)))) {
8013
8014 /*
8015 * We need to create a shadow.
8016 * There are three cases here.
8017 * In the first case, we need to
8018 * complete a deferred symmetrical
8019 * copy that we participated in.
8020 * In the second and third cases,
8021 * we need to create the shadow so
8022 * that changes that we make to the
8023 * object do not interfere with
8024 * any symmetrical copies which
8025 * have occured (case 2) or which
8026 * might occur (case 3).
8027 *
8028 * The first case is when we had
8029 * deferred shadow object creation
8030 * via the entry->needs_copy mechanism.
8031 * This mechanism only works when
8032 * only one entry points to the source
8033 * object, and we are about to create
8034 * a second entry pointing to the
8035 * same object. The problem is that
8036 * there is no way of mapping from
8037 * an object to the entries pointing
8038 * to it. (Deferred shadow creation
8039 * works with one entry because occurs
8040 * at fault time, and we walk from the
8041 * entry to the object when handling
8042 * the fault.)
8043 *
8044 * The second case is when the object
8045 * to be shared has already been copied
8046 * with a symmetric copy, but we point
8047 * directly to the object without
8048 * needs_copy set in our entry. (This
8049 * can happen because different ranges
8050 * of an object can be pointed to by
8051 * different entries. In particular,
8052 * a single entry pointing to an object
8053 * can be split by a call to vm_inherit,
8054 * which, combined with task_create, can
8055 * result in the different entries
8056 * having different needs_copy values.)
8057 * The shadowed flag in the object allows
8058 * us to detect this case. The problem
8059 * with this case is that if this object
8060 * has or will have shadows, then we
8061 * must not perform an asymmetric copy
8062 * of this object, since such a copy
8063 * allows the object to be changed, which
8064 * will break the previous symmetrical
8065 * copies (which rely upon the object
8066 * not changing). In a sense, the shadowed
8067 * flag says "don't change this object".
8068 * We fix this by creating a shadow
8069 * object for this object, and sharing
8070 * that. This works because we are free
8071 * to change the shadow object (and thus
8072 * to use an asymmetric copy strategy);
8073 * this is also semantically correct,
8074 * since this object is temporary, and
8075 * therefore a copy of the object is
8076 * as good as the object itself. (This
8077 * is not true for permanent objects,
8078 * since the pager needs to see changes,
8079 * which won't happen if the changes
8080 * are made to a copy.)
8081 *
8082 * The third case is when the object
8083 * to be shared has parts sticking
8084 * outside of the entry we're working
8085 * with, and thus may in the future
8086 * be subject to a symmetrical copy.
8087 * (This is a preemptive version of
8088 * case 2.)
8089 */
8090 vm_object_shadow(&old_entry->object.vm_object,
8091 &old_entry->offset,
8092 (vm_map_size_t) (old_entry->vme_end -
8093 old_entry->vme_start));
8094
8095 /*
8096 * If we're making a shadow for other than
8097 * copy on write reasons, then we have
8098 * to remove write permission.
8099 */
8100
8101 if (!old_entry->needs_copy &&
8102 (old_entry->protection & VM_PROT_WRITE)) {
8103 vm_prot_t prot;
8104
8105 prot = old_entry->protection & ~VM_PROT_WRITE;
8106
8107 if (override_nx(old_map, old_entry->alias) && prot)
8108 prot |= VM_PROT_EXECUTE;
8109
8110 if (old_map->mapped) {
8111 vm_object_pmap_protect(
8112 old_entry->object.vm_object,
8113 old_entry->offset,
8114 (old_entry->vme_end -
8115 old_entry->vme_start),
8116 PMAP_NULL,
8117 old_entry->vme_start,
8118 prot);
8119 } else {
8120 pmap_protect(old_map->pmap,
8121 old_entry->vme_start,
8122 old_entry->vme_end,
8123 prot);
8124 }
8125 }
8126
8127 old_entry->needs_copy = FALSE;
8128 object = old_entry->object.vm_object;
8129 }
8130
8131
8132 /*
8133 * If object was using a symmetric copy strategy,
8134 * change its copy strategy to the default
8135 * asymmetric copy strategy, which is copy_delay
8136 * in the non-norma case and copy_call in the
8137 * norma case. Bump the reference count for the
8138 * new entry.
8139 */
8140
8141 if(old_entry->is_sub_map) {
8142 vm_map_lock(old_entry->object.sub_map);
8143 vm_map_reference(old_entry->object.sub_map);
8144 vm_map_unlock(old_entry->object.sub_map);
8145 } else {
8146 vm_object_lock(object);
8147 vm_object_reference_locked(object);
8148 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8149 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8150 }
8151 vm_object_unlock(object);
8152 }
8153
8154 /*
8155 * Clone the entry, using object ref from above.
8156 * Mark both entries as shared.
8157 */
8158
8159 new_entry = vm_map_entry_create(new_map);
8160 vm_map_entry_copy(new_entry, old_entry);
8161 old_entry->is_shared = TRUE;
8162 new_entry->is_shared = TRUE;
8163
8164 /*
8165 * Insert the entry into the new map -- we
8166 * know we're inserting at the end of the new
8167 * map.
8168 */
8169
8170 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8171
8172 /*
8173 * Update the physical map
8174 */
8175
8176 if (old_entry->is_sub_map) {
8177 /* Bill Angell pmap support goes here */
8178 } else {
8179 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8180 old_entry->vme_end - old_entry->vme_start,
8181 old_entry->vme_start);
8182 }
8183 }
8184
8185 static boolean_t
8186 vm_map_fork_copy(
8187 vm_map_t old_map,
8188 vm_map_entry_t *old_entry_p,
8189 vm_map_t new_map)
8190 {
8191 vm_map_entry_t old_entry = *old_entry_p;
8192 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8193 vm_map_offset_t start = old_entry->vme_start;
8194 vm_map_copy_t copy;
8195 vm_map_entry_t last = vm_map_last_entry(new_map);
8196
8197 vm_map_unlock(old_map);
8198 /*
8199 * Use maxprot version of copyin because we
8200 * care about whether this memory can ever
8201 * be accessed, not just whether it's accessible
8202 * right now.
8203 */
8204 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8205 != KERN_SUCCESS) {
8206 /*
8207 * The map might have changed while it
8208 * was unlocked, check it again. Skip
8209 * any blank space or permanently
8210 * unreadable region.
8211 */
8212 vm_map_lock(old_map);
8213 if (!vm_map_lookup_entry(old_map, start, &last) ||
8214 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8215 last = last->vme_next;
8216 }
8217 *old_entry_p = last;
8218
8219 /*
8220 * XXX For some error returns, want to
8221 * XXX skip to the next element. Note
8222 * that INVALID_ADDRESS and
8223 * PROTECTION_FAILURE are handled above.
8224 */
8225
8226 return FALSE;
8227 }
8228
8229 /*
8230 * Insert the copy into the new map
8231 */
8232
8233 vm_map_copy_insert(new_map, last, copy);
8234
8235 /*
8236 * Pick up the traversal at the end of
8237 * the copied region.
8238 */
8239
8240 vm_map_lock(old_map);
8241 start += entry_size;
8242 if (! vm_map_lookup_entry(old_map, start, &last)) {
8243 last = last->vme_next;
8244 } else {
8245 if (last->vme_start == start) {
8246 /*
8247 * No need to clip here and we don't
8248 * want to cause any unnecessary
8249 * unnesting...
8250 */
8251 } else {
8252 vm_map_clip_start(old_map, last, start);
8253 }
8254 }
8255 *old_entry_p = last;
8256
8257 return TRUE;
8258 }
8259
8260 /*
8261 * vm_map_fork:
8262 *
8263 * Create and return a new map based on the old
8264 * map, according to the inheritance values on the
8265 * regions in that map.
8266 *
8267 * The source map must not be locked.
8268 */
8269 vm_map_t
8270 vm_map_fork(
8271 vm_map_t old_map)
8272 {
8273 pmap_t new_pmap;
8274 vm_map_t new_map;
8275 vm_map_entry_t old_entry;
8276 vm_map_size_t new_size = 0, entry_size;
8277 vm_map_entry_t new_entry;
8278 boolean_t src_needs_copy;
8279 boolean_t new_entry_needs_copy;
8280
8281 new_pmap = pmap_create((vm_map_size_t) 0,
8282 #if defined(__i386__) || defined(__x86_64__)
8283 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8284 #else
8285 0
8286 #endif
8287 );
8288 #if defined(__i386__)
8289 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8290 pmap_set_4GB_pagezero(new_pmap);
8291 #endif
8292
8293 vm_map_reference_swap(old_map);
8294 vm_map_lock(old_map);
8295
8296 new_map = vm_map_create(new_pmap,
8297 old_map->min_offset,
8298 old_map->max_offset,
8299 old_map->hdr.entries_pageable);
8300 for (
8301 old_entry = vm_map_first_entry(old_map);
8302 old_entry != vm_map_to_entry(old_map);
8303 ) {
8304
8305 entry_size = old_entry->vme_end - old_entry->vme_start;
8306
8307 switch (old_entry->inheritance) {
8308 case VM_INHERIT_NONE:
8309 break;
8310
8311 case VM_INHERIT_SHARE:
8312 vm_map_fork_share(old_map, old_entry, new_map);
8313 new_size += entry_size;
8314 break;
8315
8316 case VM_INHERIT_COPY:
8317
8318 /*
8319 * Inline the copy_quickly case;
8320 * upon failure, fall back on call
8321 * to vm_map_fork_copy.
8322 */
8323
8324 if(old_entry->is_sub_map)
8325 break;
8326 if ((old_entry->wired_count != 0) ||
8327 ((old_entry->object.vm_object != NULL) &&
8328 (old_entry->object.vm_object->true_share))) {
8329 goto slow_vm_map_fork_copy;
8330 }
8331
8332 new_entry = vm_map_entry_create(new_map);
8333 vm_map_entry_copy(new_entry, old_entry);
8334 /* clear address space specifics */
8335 new_entry->use_pmap = FALSE;
8336
8337 if (! vm_object_copy_quickly(
8338 &new_entry->object.vm_object,
8339 old_entry->offset,
8340 (old_entry->vme_end -
8341 old_entry->vme_start),
8342 &src_needs_copy,
8343 &new_entry_needs_copy)) {
8344 vm_map_entry_dispose(new_map, new_entry);
8345 goto slow_vm_map_fork_copy;
8346 }
8347
8348 /*
8349 * Handle copy-on-write obligations
8350 */
8351
8352 if (src_needs_copy && !old_entry->needs_copy) {
8353 vm_prot_t prot;
8354
8355 prot = old_entry->protection & ~VM_PROT_WRITE;
8356
8357 if (override_nx(old_map, old_entry->alias) && prot)
8358 prot |= VM_PROT_EXECUTE;
8359
8360 vm_object_pmap_protect(
8361 old_entry->object.vm_object,
8362 old_entry->offset,
8363 (old_entry->vme_end -
8364 old_entry->vme_start),
8365 ((old_entry->is_shared
8366 || old_map->mapped)
8367 ? PMAP_NULL :
8368 old_map->pmap),
8369 old_entry->vme_start,
8370 prot);
8371
8372 old_entry->needs_copy = TRUE;
8373 }
8374 new_entry->needs_copy = new_entry_needs_copy;
8375
8376 /*
8377 * Insert the entry at the end
8378 * of the map.
8379 */
8380
8381 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8382 new_entry);
8383 new_size += entry_size;
8384 break;
8385
8386 slow_vm_map_fork_copy:
8387 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8388 new_size += entry_size;
8389 }
8390 continue;
8391 }
8392 old_entry = old_entry->vme_next;
8393 }
8394
8395 new_map->size = new_size;
8396 vm_map_unlock(old_map);
8397 vm_map_deallocate(old_map);
8398
8399 return(new_map);
8400 }
8401
8402 /*
8403 * vm_map_exec:
8404 *
8405 * Setup the "new_map" with the proper execution environment according
8406 * to the type of executable (platform, 64bit, chroot environment).
8407 * Map the comm page and shared region, etc...
8408 */
8409 kern_return_t
8410 vm_map_exec(
8411 vm_map_t new_map,
8412 task_t task,
8413 void *fsroot,
8414 cpu_type_t cpu)
8415 {
8416 SHARED_REGION_TRACE_DEBUG(
8417 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8418 current_task(), new_map, task, fsroot, cpu));
8419 (void) vm_commpage_enter(new_map, task);
8420 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8421 SHARED_REGION_TRACE_DEBUG(
8422 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8423 current_task(), new_map, task, fsroot, cpu));
8424 return KERN_SUCCESS;
8425 }
8426
8427 /*
8428 * vm_map_lookup_locked:
8429 *
8430 * Finds the VM object, offset, and
8431 * protection for a given virtual address in the
8432 * specified map, assuming a page fault of the
8433 * type specified.
8434 *
8435 * Returns the (object, offset, protection) for
8436 * this address, whether it is wired down, and whether
8437 * this map has the only reference to the data in question.
8438 * In order to later verify this lookup, a "version"
8439 * is returned.
8440 *
8441 * The map MUST be locked by the caller and WILL be
8442 * locked on exit. In order to guarantee the
8443 * existence of the returned object, it is returned
8444 * locked.
8445 *
8446 * If a lookup is requested with "write protection"
8447 * specified, the map may be changed to perform virtual
8448 * copying operations, although the data referenced will
8449 * remain the same.
8450 */
8451 kern_return_t
8452 vm_map_lookup_locked(
8453 vm_map_t *var_map, /* IN/OUT */
8454 vm_map_offset_t vaddr,
8455 vm_prot_t fault_type,
8456 int object_lock_type,
8457 vm_map_version_t *out_version, /* OUT */
8458 vm_object_t *object, /* OUT */
8459 vm_object_offset_t *offset, /* OUT */
8460 vm_prot_t *out_prot, /* OUT */
8461 boolean_t *wired, /* OUT */
8462 vm_object_fault_info_t fault_info, /* OUT */
8463 vm_map_t *real_map)
8464 {
8465 vm_map_entry_t entry;
8466 register vm_map_t map = *var_map;
8467 vm_map_t old_map = *var_map;
8468 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8469 vm_map_offset_t cow_parent_vaddr = 0;
8470 vm_map_offset_t old_start = 0;
8471 vm_map_offset_t old_end = 0;
8472 register vm_prot_t prot;
8473 boolean_t mask_protections;
8474 vm_prot_t original_fault_type;
8475
8476 /*
8477 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8478 * as a mask against the mapping's actual protections, not as an
8479 * absolute value.
8480 */
8481 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8482 fault_type &= ~VM_PROT_IS_MASK;
8483 original_fault_type = fault_type;
8484
8485 *real_map = map;
8486
8487 RetryLookup:
8488 fault_type = original_fault_type;
8489
8490 /*
8491 * If the map has an interesting hint, try it before calling
8492 * full blown lookup routine.
8493 */
8494 entry = map->hint;
8495
8496 if ((entry == vm_map_to_entry(map)) ||
8497 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8498 vm_map_entry_t tmp_entry;
8499
8500 /*
8501 * Entry was either not a valid hint, or the vaddr
8502 * was not contained in the entry, so do a full lookup.
8503 */
8504 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8505 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8506 vm_map_unlock(cow_sub_map_parent);
8507 if((*real_map != map)
8508 && (*real_map != cow_sub_map_parent))
8509 vm_map_unlock(*real_map);
8510 return KERN_INVALID_ADDRESS;
8511 }
8512
8513 entry = tmp_entry;
8514 }
8515 if(map == old_map) {
8516 old_start = entry->vme_start;
8517 old_end = entry->vme_end;
8518 }
8519
8520 /*
8521 * Handle submaps. Drop lock on upper map, submap is
8522 * returned locked.
8523 */
8524
8525 submap_recurse:
8526 if (entry->is_sub_map) {
8527 vm_map_offset_t local_vaddr;
8528 vm_map_offset_t end_delta;
8529 vm_map_offset_t start_delta;
8530 vm_map_entry_t submap_entry;
8531 boolean_t mapped_needs_copy=FALSE;
8532
8533 local_vaddr = vaddr;
8534
8535 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8536 /* if real_map equals map we unlock below */
8537 if ((*real_map != map) &&
8538 (*real_map != cow_sub_map_parent))
8539 vm_map_unlock(*real_map);
8540 *real_map = entry->object.sub_map;
8541 }
8542
8543 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8544 if (!mapped_needs_copy) {
8545 if (vm_map_lock_read_to_write(map)) {
8546 vm_map_lock_read(map);
8547 /* XXX FBDP: entry still valid ? */
8548 if(*real_map == entry->object.sub_map)
8549 *real_map = map;
8550 goto RetryLookup;
8551 }
8552 vm_map_lock_read(entry->object.sub_map);
8553 cow_sub_map_parent = map;
8554 /* reset base to map before cow object */
8555 /* this is the map which will accept */
8556 /* the new cow object */
8557 old_start = entry->vme_start;
8558 old_end = entry->vme_end;
8559 cow_parent_vaddr = vaddr;
8560 mapped_needs_copy = TRUE;
8561 } else {
8562 vm_map_lock_read(entry->object.sub_map);
8563 if((cow_sub_map_parent != map) &&
8564 (*real_map != map))
8565 vm_map_unlock(map);
8566 }
8567 } else {
8568 vm_map_lock_read(entry->object.sub_map);
8569 /* leave map locked if it is a target */
8570 /* cow sub_map above otherwise, just */
8571 /* follow the maps down to the object */
8572 /* here we unlock knowing we are not */
8573 /* revisiting the map. */
8574 if((*real_map != map) && (map != cow_sub_map_parent))
8575 vm_map_unlock_read(map);
8576 }
8577
8578 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8579 *var_map = map = entry->object.sub_map;
8580
8581 /* calculate the offset in the submap for vaddr */
8582 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8583
8584 RetrySubMap:
8585 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8586 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8587 vm_map_unlock(cow_sub_map_parent);
8588 }
8589 if((*real_map != map)
8590 && (*real_map != cow_sub_map_parent)) {
8591 vm_map_unlock(*real_map);
8592 }
8593 *real_map = map;
8594 return KERN_INVALID_ADDRESS;
8595 }
8596
8597 /* find the attenuated shadow of the underlying object */
8598 /* on our target map */
8599
8600 /* in english the submap object may extend beyond the */
8601 /* region mapped by the entry or, may only fill a portion */
8602 /* of it. For our purposes, we only care if the object */
8603 /* doesn't fill. In this case the area which will */
8604 /* ultimately be clipped in the top map will only need */
8605 /* to be as big as the portion of the underlying entry */
8606 /* which is mapped */
8607 start_delta = submap_entry->vme_start > entry->offset ?
8608 submap_entry->vme_start - entry->offset : 0;
8609
8610 end_delta =
8611 (entry->offset + start_delta + (old_end - old_start)) <=
8612 submap_entry->vme_end ?
8613 0 : (entry->offset +
8614 (old_end - old_start))
8615 - submap_entry->vme_end;
8616
8617 old_start += start_delta;
8618 old_end -= end_delta;
8619
8620 if(submap_entry->is_sub_map) {
8621 entry = submap_entry;
8622 vaddr = local_vaddr;
8623 goto submap_recurse;
8624 }
8625
8626 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8627
8628 vm_object_t sub_object, copy_object;
8629 vm_object_offset_t copy_offset;
8630 vm_map_offset_t local_start;
8631 vm_map_offset_t local_end;
8632 boolean_t copied_slowly = FALSE;
8633
8634 if (vm_map_lock_read_to_write(map)) {
8635 vm_map_lock_read(map);
8636 old_start -= start_delta;
8637 old_end += end_delta;
8638 goto RetrySubMap;
8639 }
8640
8641
8642 sub_object = submap_entry->object.vm_object;
8643 if (sub_object == VM_OBJECT_NULL) {
8644 sub_object =
8645 vm_object_allocate(
8646 (vm_map_size_t)
8647 (submap_entry->vme_end -
8648 submap_entry->vme_start));
8649 submap_entry->object.vm_object = sub_object;
8650 submap_entry->offset = 0;
8651 }
8652 local_start = local_vaddr -
8653 (cow_parent_vaddr - old_start);
8654 local_end = local_vaddr +
8655 (old_end - cow_parent_vaddr);
8656 vm_map_clip_start(map, submap_entry, local_start);
8657 vm_map_clip_end(map, submap_entry, local_end);
8658 /* unnesting was done in vm_map_clip_start/end() */
8659 assert(!submap_entry->use_pmap);
8660
8661 /* This is the COW case, lets connect */
8662 /* an entry in our space to the underlying */
8663 /* object in the submap, bypassing the */
8664 /* submap. */
8665
8666
8667 if(submap_entry->wired_count != 0 ||
8668 (sub_object->copy_strategy ==
8669 MEMORY_OBJECT_COPY_NONE)) {
8670 vm_object_lock(sub_object);
8671 vm_object_copy_slowly(sub_object,
8672 submap_entry->offset,
8673 (submap_entry->vme_end -
8674 submap_entry->vme_start),
8675 FALSE,
8676 &copy_object);
8677 copied_slowly = TRUE;
8678 } else {
8679
8680 /* set up shadow object */
8681 copy_object = sub_object;
8682 vm_object_reference(copy_object);
8683 sub_object->shadowed = TRUE;
8684 submap_entry->needs_copy = TRUE;
8685
8686 prot = submap_entry->protection & ~VM_PROT_WRITE;
8687
8688 if (override_nx(map, submap_entry->alias) && prot)
8689 prot |= VM_PROT_EXECUTE;
8690
8691 vm_object_pmap_protect(
8692 sub_object,
8693 submap_entry->offset,
8694 submap_entry->vme_end -
8695 submap_entry->vme_start,
8696 (submap_entry->is_shared
8697 || map->mapped) ?
8698 PMAP_NULL : map->pmap,
8699 submap_entry->vme_start,
8700 prot);
8701 }
8702
8703 /*
8704 * Adjust the fault offset to the submap entry.
8705 */
8706 copy_offset = (local_vaddr -
8707 submap_entry->vme_start +
8708 submap_entry->offset);
8709
8710 /* This works diffently than the */
8711 /* normal submap case. We go back */
8712 /* to the parent of the cow map and*/
8713 /* clip out the target portion of */
8714 /* the sub_map, substituting the */
8715 /* new copy object, */
8716
8717 vm_map_unlock(map);
8718 local_start = old_start;
8719 local_end = old_end;
8720 map = cow_sub_map_parent;
8721 *var_map = cow_sub_map_parent;
8722 vaddr = cow_parent_vaddr;
8723 cow_sub_map_parent = NULL;
8724
8725 if(!vm_map_lookup_entry(map,
8726 vaddr, &entry)) {
8727 vm_object_deallocate(
8728 copy_object);
8729 vm_map_lock_write_to_read(map);
8730 return KERN_INVALID_ADDRESS;
8731 }
8732
8733 /* clip out the portion of space */
8734 /* mapped by the sub map which */
8735 /* corresponds to the underlying */
8736 /* object */
8737
8738 /*
8739 * Clip (and unnest) the smallest nested chunk
8740 * possible around the faulting address...
8741 */
8742 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8743 local_end = local_start + pmap_nesting_size_min;
8744 /*
8745 * ... but don't go beyond the "old_start" to "old_end"
8746 * range, to avoid spanning over another VM region
8747 * with a possibly different VM object and/or offset.
8748 */
8749 if (local_start < old_start) {
8750 local_start = old_start;
8751 }
8752 if (local_end > old_end) {
8753 local_end = old_end;
8754 }
8755 /*
8756 * Adjust copy_offset to the start of the range.
8757 */
8758 copy_offset -= (vaddr - local_start);
8759
8760 vm_map_clip_start(map, entry, local_start);
8761 vm_map_clip_end(map, entry, local_end);
8762 /* unnesting was done in vm_map_clip_start/end() */
8763 assert(!entry->use_pmap);
8764
8765 /* substitute copy object for */
8766 /* shared map entry */
8767 vm_map_deallocate(entry->object.sub_map);
8768 entry->is_sub_map = FALSE;
8769 entry->object.vm_object = copy_object;
8770
8771 /* propagate the submap entry's protections */
8772 entry->protection |= submap_entry->protection;
8773 entry->max_protection |= submap_entry->max_protection;
8774
8775 if(copied_slowly) {
8776 entry->offset = local_start - old_start;
8777 entry->needs_copy = FALSE;
8778 entry->is_shared = FALSE;
8779 } else {
8780 entry->offset = copy_offset;
8781 entry->needs_copy = TRUE;
8782 if(entry->inheritance == VM_INHERIT_SHARE)
8783 entry->inheritance = VM_INHERIT_COPY;
8784 if (map != old_map)
8785 entry->is_shared = TRUE;
8786 }
8787 if(entry->inheritance == VM_INHERIT_SHARE)
8788 entry->inheritance = VM_INHERIT_COPY;
8789
8790 vm_map_lock_write_to_read(map);
8791 } else {
8792 if((cow_sub_map_parent)
8793 && (cow_sub_map_parent != *real_map)
8794 && (cow_sub_map_parent != map)) {
8795 vm_map_unlock(cow_sub_map_parent);
8796 }
8797 entry = submap_entry;
8798 vaddr = local_vaddr;
8799 }
8800 }
8801
8802 /*
8803 * Check whether this task is allowed to have
8804 * this page.
8805 */
8806
8807 prot = entry->protection;
8808
8809 if (override_nx(map, entry->alias) && prot) {
8810 /*
8811 * HACK -- if not a stack, then allow execution
8812 */
8813 prot |= VM_PROT_EXECUTE;
8814 }
8815
8816 if (mask_protections) {
8817 fault_type &= prot;
8818 if (fault_type == VM_PROT_NONE) {
8819 goto protection_failure;
8820 }
8821 }
8822 if ((fault_type & (prot)) != fault_type) {
8823 protection_failure:
8824 if (*real_map != map) {
8825 vm_map_unlock(*real_map);
8826 }
8827 *real_map = map;
8828
8829 if ((fault_type & VM_PROT_EXECUTE) && prot)
8830 log_stack_execution_failure((addr64_t)vaddr, prot);
8831
8832 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8833 return KERN_PROTECTION_FAILURE;
8834 }
8835
8836 /*
8837 * If this page is not pageable, we have to get
8838 * it for all possible accesses.
8839 */
8840
8841 *wired = (entry->wired_count != 0);
8842 if (*wired)
8843 fault_type = prot;
8844
8845 /*
8846 * If the entry was copy-on-write, we either ...
8847 */
8848
8849 if (entry->needs_copy) {
8850 /*
8851 * If we want to write the page, we may as well
8852 * handle that now since we've got the map locked.
8853 *
8854 * If we don't need to write the page, we just
8855 * demote the permissions allowed.
8856 */
8857
8858 if ((fault_type & VM_PROT_WRITE) || *wired) {
8859 /*
8860 * Make a new object, and place it in the
8861 * object chain. Note that no new references
8862 * have appeared -- one just moved from the
8863 * map to the new object.
8864 */
8865
8866 if (vm_map_lock_read_to_write(map)) {
8867 vm_map_lock_read(map);
8868 goto RetryLookup;
8869 }
8870 vm_object_shadow(&entry->object.vm_object,
8871 &entry->offset,
8872 (vm_map_size_t) (entry->vme_end -
8873 entry->vme_start));
8874
8875 entry->object.vm_object->shadowed = TRUE;
8876 entry->needs_copy = FALSE;
8877 vm_map_lock_write_to_read(map);
8878 }
8879 else {
8880 /*
8881 * We're attempting to read a copy-on-write
8882 * page -- don't allow writes.
8883 */
8884
8885 prot &= (~VM_PROT_WRITE);
8886 }
8887 }
8888
8889 /*
8890 * Create an object if necessary.
8891 */
8892 if (entry->object.vm_object == VM_OBJECT_NULL) {
8893
8894 if (vm_map_lock_read_to_write(map)) {
8895 vm_map_lock_read(map);
8896 goto RetryLookup;
8897 }
8898
8899 entry->object.vm_object = vm_object_allocate(
8900 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8901 entry->offset = 0;
8902 vm_map_lock_write_to_read(map);
8903 }
8904
8905 /*
8906 * Return the object/offset from this entry. If the entry
8907 * was copy-on-write or empty, it has been fixed up. Also
8908 * return the protection.
8909 */
8910
8911 *offset = (vaddr - entry->vme_start) + entry->offset;
8912 *object = entry->object.vm_object;
8913 *out_prot = prot;
8914
8915 if (fault_info) {
8916 fault_info->interruptible = THREAD_UNINT; /* for now... */
8917 /* ... the caller will change "interruptible" if needed */
8918 fault_info->cluster_size = 0;
8919 fault_info->user_tag = entry->alias;
8920 fault_info->behavior = entry->behavior;
8921 fault_info->lo_offset = entry->offset;
8922 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8923 fault_info->no_cache = entry->no_cache;
8924 fault_info->stealth = FALSE;
8925 fault_info->io_sync = FALSE;
8926 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
8927 fault_info->mark_zf_absent = FALSE;
8928 }
8929
8930 /*
8931 * Lock the object to prevent it from disappearing
8932 */
8933 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8934 vm_object_lock(*object);
8935 else
8936 vm_object_lock_shared(*object);
8937
8938 /*
8939 * Save the version number
8940 */
8941
8942 out_version->main_timestamp = map->timestamp;
8943
8944 return KERN_SUCCESS;
8945 }
8946
8947
8948 /*
8949 * vm_map_verify:
8950 *
8951 * Verifies that the map in question has not changed
8952 * since the given version. If successful, the map
8953 * will not change until vm_map_verify_done() is called.
8954 */
8955 boolean_t
8956 vm_map_verify(
8957 register vm_map_t map,
8958 register vm_map_version_t *version) /* REF */
8959 {
8960 boolean_t result;
8961
8962 vm_map_lock_read(map);
8963 result = (map->timestamp == version->main_timestamp);
8964
8965 if (!result)
8966 vm_map_unlock_read(map);
8967
8968 return(result);
8969 }
8970
8971 /*
8972 * vm_map_verify_done:
8973 *
8974 * Releases locks acquired by a vm_map_verify.
8975 *
8976 * This is now a macro in vm/vm_map.h. It does a
8977 * vm_map_unlock_read on the map.
8978 */
8979
8980
8981 /*
8982 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8983 * Goes away after regular vm_region_recurse function migrates to
8984 * 64 bits
8985 * vm_region_recurse: A form of vm_region which follows the
8986 * submaps in a target map
8987 *
8988 */
8989
8990 kern_return_t
8991 vm_map_region_recurse_64(
8992 vm_map_t map,
8993 vm_map_offset_t *address, /* IN/OUT */
8994 vm_map_size_t *size, /* OUT */
8995 natural_t *nesting_depth, /* IN/OUT */
8996 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8997 mach_msg_type_number_t *count) /* IN/OUT */
8998 {
8999 vm_region_extended_info_data_t extended;
9000 vm_map_entry_t tmp_entry;
9001 vm_map_offset_t user_address;
9002 unsigned int user_max_depth;
9003
9004 /*
9005 * "curr_entry" is the VM map entry preceding or including the
9006 * address we're looking for.
9007 * "curr_map" is the map or sub-map containing "curr_entry".
9008 * "curr_address" is the equivalent of the top map's "user_address"
9009 * in the current map.
9010 * "curr_offset" is the cumulated offset of "curr_map" in the
9011 * target task's address space.
9012 * "curr_depth" is the depth of "curr_map" in the chain of
9013 * sub-maps.
9014 *
9015 * "curr_max_below" and "curr_max_above" limit the range (around
9016 * "curr_address") we should take into account in the current (sub)map.
9017 * They limit the range to what's visible through the map entries
9018 * we've traversed from the top map to the current map.
9019
9020 */
9021 vm_map_entry_t curr_entry;
9022 vm_map_address_t curr_address;
9023 vm_map_offset_t curr_offset;
9024 vm_map_t curr_map;
9025 unsigned int curr_depth;
9026 vm_map_offset_t curr_max_below, curr_max_above;
9027 vm_map_offset_t curr_skip;
9028
9029 /*
9030 * "next_" is the same as "curr_" but for the VM region immediately
9031 * after the address we're looking for. We need to keep track of this
9032 * too because we want to return info about that region if the
9033 * address we're looking for is not mapped.
9034 */
9035 vm_map_entry_t next_entry;
9036 vm_map_offset_t next_offset;
9037 vm_map_offset_t next_address;
9038 vm_map_t next_map;
9039 unsigned int next_depth;
9040 vm_map_offset_t next_max_below, next_max_above;
9041 vm_map_offset_t next_skip;
9042
9043 boolean_t look_for_pages;
9044 vm_region_submap_short_info_64_t short_info;
9045
9046 if (map == VM_MAP_NULL) {
9047 /* no address space to work on */
9048 return KERN_INVALID_ARGUMENT;
9049 }
9050
9051 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9052 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9053 /*
9054 * "info" structure is not big enough and
9055 * would overflow
9056 */
9057 return KERN_INVALID_ARGUMENT;
9058 } else {
9059 look_for_pages = FALSE;
9060 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9061 short_info = (vm_region_submap_short_info_64_t) submap_info;
9062 submap_info = NULL;
9063 }
9064 } else {
9065 look_for_pages = TRUE;
9066 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9067 short_info = NULL;
9068 }
9069
9070
9071 user_address = *address;
9072 user_max_depth = *nesting_depth;
9073
9074 curr_entry = NULL;
9075 curr_map = map;
9076 curr_address = user_address;
9077 curr_offset = 0;
9078 curr_skip = 0;
9079 curr_depth = 0;
9080 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9081 curr_max_below = curr_address;
9082
9083 next_entry = NULL;
9084 next_map = NULL;
9085 next_address = 0;
9086 next_offset = 0;
9087 next_skip = 0;
9088 next_depth = 0;
9089 next_max_above = (vm_map_offset_t) -1;
9090 next_max_below = (vm_map_offset_t) -1;
9091
9092 if (not_in_kdp) {
9093 vm_map_lock_read(curr_map);
9094 }
9095
9096 for (;;) {
9097 if (vm_map_lookup_entry(curr_map,
9098 curr_address,
9099 &tmp_entry)) {
9100 /* tmp_entry contains the address we're looking for */
9101 curr_entry = tmp_entry;
9102 } else {
9103 vm_map_offset_t skip;
9104 /*
9105 * The address is not mapped. "tmp_entry" is the
9106 * map entry preceding the address. We want the next
9107 * one, if it exists.
9108 */
9109 curr_entry = tmp_entry->vme_next;
9110
9111 if (curr_entry == vm_map_to_entry(curr_map) ||
9112 (curr_entry->vme_start >=
9113 curr_address + curr_max_above)) {
9114 /* no next entry at this level: stop looking */
9115 if (not_in_kdp) {
9116 vm_map_unlock_read(curr_map);
9117 }
9118 curr_entry = NULL;
9119 curr_map = NULL;
9120 curr_offset = 0;
9121 curr_depth = 0;
9122 curr_max_above = 0;
9123 curr_max_below = 0;
9124 break;
9125 }
9126
9127 /* adjust current address and offset */
9128 skip = curr_entry->vme_start - curr_address;
9129 curr_address = curr_entry->vme_start;
9130 curr_skip = skip;
9131 curr_offset += skip;
9132 curr_max_above -= skip;
9133 curr_max_below = 0;
9134 }
9135
9136 /*
9137 * Is the next entry at this level closer to the address (or
9138 * deeper in the submap chain) than the one we had
9139 * so far ?
9140 */
9141 tmp_entry = curr_entry->vme_next;
9142 if (tmp_entry == vm_map_to_entry(curr_map)) {
9143 /* no next entry at this level */
9144 } else if (tmp_entry->vme_start >=
9145 curr_address + curr_max_above) {
9146 /*
9147 * tmp_entry is beyond the scope of what we mapped of
9148 * this submap in the upper level: ignore it.
9149 */
9150 } else if ((next_entry == NULL) ||
9151 (tmp_entry->vme_start + curr_offset <=
9152 next_entry->vme_start + next_offset)) {
9153 /*
9154 * We didn't have a "next_entry" or this one is
9155 * closer to the address we're looking for:
9156 * use this "tmp_entry" as the new "next_entry".
9157 */
9158 if (next_entry != NULL) {
9159 /* unlock the last "next_map" */
9160 if (next_map != curr_map && not_in_kdp) {
9161 vm_map_unlock_read(next_map);
9162 }
9163 }
9164 next_entry = tmp_entry;
9165 next_map = curr_map;
9166 next_depth = curr_depth;
9167 next_address = next_entry->vme_start;
9168 next_skip = curr_skip;
9169 next_offset = curr_offset;
9170 next_offset += (next_address - curr_address);
9171 next_max_above = MIN(next_max_above, curr_max_above);
9172 next_max_above = MIN(next_max_above,
9173 next_entry->vme_end - next_address);
9174 next_max_below = MIN(next_max_below, curr_max_below);
9175 next_max_below = MIN(next_max_below,
9176 next_address - next_entry->vme_start);
9177 }
9178
9179 /*
9180 * "curr_max_{above,below}" allow us to keep track of the
9181 * portion of the submap that is actually mapped at this level:
9182 * the rest of that submap is irrelevant to us, since it's not
9183 * mapped here.
9184 * The relevant portion of the map starts at
9185 * "curr_entry->offset" up to the size of "curr_entry".
9186 */
9187 curr_max_above = MIN(curr_max_above,
9188 curr_entry->vme_end - curr_address);
9189 curr_max_below = MIN(curr_max_below,
9190 curr_address - curr_entry->vme_start);
9191
9192 if (!curr_entry->is_sub_map ||
9193 curr_depth >= user_max_depth) {
9194 /*
9195 * We hit a leaf map or we reached the maximum depth
9196 * we could, so stop looking. Keep the current map
9197 * locked.
9198 */
9199 break;
9200 }
9201
9202 /*
9203 * Get down to the next submap level.
9204 */
9205
9206 /*
9207 * Lock the next level and unlock the current level,
9208 * unless we need to keep it locked to access the "next_entry"
9209 * later.
9210 */
9211 if (not_in_kdp) {
9212 vm_map_lock_read(curr_entry->object.sub_map);
9213 }
9214 if (curr_map == next_map) {
9215 /* keep "next_map" locked in case we need it */
9216 } else {
9217 /* release this map */
9218 if (not_in_kdp)
9219 vm_map_unlock_read(curr_map);
9220 }
9221
9222 /*
9223 * Adjust the offset. "curr_entry" maps the submap
9224 * at relative address "curr_entry->vme_start" in the
9225 * curr_map but skips the first "curr_entry->offset"
9226 * bytes of the submap.
9227 * "curr_offset" always represents the offset of a virtual
9228 * address in the curr_map relative to the absolute address
9229 * space (i.e. the top-level VM map).
9230 */
9231 curr_offset +=
9232 (curr_entry->offset - curr_entry->vme_start);
9233 curr_address = user_address + curr_offset;
9234 /* switch to the submap */
9235 curr_map = curr_entry->object.sub_map;
9236 curr_depth++;
9237 curr_entry = NULL;
9238 }
9239
9240 if (curr_entry == NULL) {
9241 /* no VM region contains the address... */
9242 if (next_entry == NULL) {
9243 /* ... and no VM region follows it either */
9244 return KERN_INVALID_ADDRESS;
9245 }
9246 /* ... gather info about the next VM region */
9247 curr_entry = next_entry;
9248 curr_map = next_map; /* still locked ... */
9249 curr_address = next_address;
9250 curr_skip = next_skip;
9251 curr_offset = next_offset;
9252 curr_depth = next_depth;
9253 curr_max_above = next_max_above;
9254 curr_max_below = next_max_below;
9255 if (curr_map == map) {
9256 user_address = curr_address;
9257 }
9258 } else {
9259 /* we won't need "next_entry" after all */
9260 if (next_entry != NULL) {
9261 /* release "next_map" */
9262 if (next_map != curr_map && not_in_kdp) {
9263 vm_map_unlock_read(next_map);
9264 }
9265 }
9266 }
9267 next_entry = NULL;
9268 next_map = NULL;
9269 next_offset = 0;
9270 next_skip = 0;
9271 next_depth = 0;
9272 next_max_below = -1;
9273 next_max_above = -1;
9274
9275 *nesting_depth = curr_depth;
9276 *size = curr_max_above + curr_max_below;
9277 *address = user_address + curr_skip - curr_max_below;
9278
9279 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9280 // so probably should be a real 32b ID vs. ptr.
9281 // Current users just check for equality
9282 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9283
9284 if (look_for_pages) {
9285 submap_info->user_tag = curr_entry->alias;
9286 submap_info->offset = curr_entry->offset;
9287 submap_info->protection = curr_entry->protection;
9288 submap_info->inheritance = curr_entry->inheritance;
9289 submap_info->max_protection = curr_entry->max_protection;
9290 submap_info->behavior = curr_entry->behavior;
9291 submap_info->user_wired_count = curr_entry->user_wired_count;
9292 submap_info->is_submap = curr_entry->is_sub_map;
9293 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9294 } else {
9295 short_info->user_tag = curr_entry->alias;
9296 short_info->offset = curr_entry->offset;
9297 short_info->protection = curr_entry->protection;
9298 short_info->inheritance = curr_entry->inheritance;
9299 short_info->max_protection = curr_entry->max_protection;
9300 short_info->behavior = curr_entry->behavior;
9301 short_info->user_wired_count = curr_entry->user_wired_count;
9302 short_info->is_submap = curr_entry->is_sub_map;
9303 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9304 }
9305
9306 extended.pages_resident = 0;
9307 extended.pages_swapped_out = 0;
9308 extended.pages_shared_now_private = 0;
9309 extended.pages_dirtied = 0;
9310 extended.external_pager = 0;
9311 extended.shadow_depth = 0;
9312
9313 if (not_in_kdp) {
9314 if (!curr_entry->is_sub_map) {
9315 vm_map_offset_t range_start, range_end;
9316 range_start = MAX((curr_address - curr_max_below),
9317 curr_entry->vme_start);
9318 range_end = MIN((curr_address + curr_max_above),
9319 curr_entry->vme_end);
9320 vm_map_region_walk(curr_map,
9321 range_start,
9322 curr_entry,
9323 (curr_entry->offset +
9324 (range_start -
9325 curr_entry->vme_start)),
9326 range_end - range_start,
9327 &extended,
9328 look_for_pages);
9329 if (extended.external_pager &&
9330 extended.ref_count == 2 &&
9331 extended.share_mode == SM_SHARED) {
9332 extended.share_mode = SM_PRIVATE;
9333 }
9334 } else {
9335 if (curr_entry->use_pmap) {
9336 extended.share_mode = SM_TRUESHARED;
9337 } else {
9338 extended.share_mode = SM_PRIVATE;
9339 }
9340 extended.ref_count =
9341 curr_entry->object.sub_map->ref_count;
9342 }
9343 }
9344
9345 if (look_for_pages) {
9346 submap_info->pages_resident = extended.pages_resident;
9347 submap_info->pages_swapped_out = extended.pages_swapped_out;
9348 submap_info->pages_shared_now_private =
9349 extended.pages_shared_now_private;
9350 submap_info->pages_dirtied = extended.pages_dirtied;
9351 submap_info->external_pager = extended.external_pager;
9352 submap_info->shadow_depth = extended.shadow_depth;
9353 submap_info->share_mode = extended.share_mode;
9354 submap_info->ref_count = extended.ref_count;
9355 } else {
9356 short_info->external_pager = extended.external_pager;
9357 short_info->shadow_depth = extended.shadow_depth;
9358 short_info->share_mode = extended.share_mode;
9359 short_info->ref_count = extended.ref_count;
9360 }
9361
9362 if (not_in_kdp) {
9363 vm_map_unlock_read(curr_map);
9364 }
9365
9366 return KERN_SUCCESS;
9367 }
9368
9369 /*
9370 * vm_region:
9371 *
9372 * User call to obtain information about a region in
9373 * a task's address map. Currently, only one flavor is
9374 * supported.
9375 *
9376 * XXX The reserved and behavior fields cannot be filled
9377 * in until the vm merge from the IK is completed, and
9378 * vm_reserve is implemented.
9379 */
9380
9381 kern_return_t
9382 vm_map_region(
9383 vm_map_t map,
9384 vm_map_offset_t *address, /* IN/OUT */
9385 vm_map_size_t *size, /* OUT */
9386 vm_region_flavor_t flavor, /* IN */
9387 vm_region_info_t info, /* OUT */
9388 mach_msg_type_number_t *count, /* IN/OUT */
9389 mach_port_t *object_name) /* OUT */
9390 {
9391 vm_map_entry_t tmp_entry;
9392 vm_map_entry_t entry;
9393 vm_map_offset_t start;
9394
9395 if (map == VM_MAP_NULL)
9396 return(KERN_INVALID_ARGUMENT);
9397
9398 switch (flavor) {
9399
9400 case VM_REGION_BASIC_INFO:
9401 /* legacy for old 32-bit objects info */
9402 {
9403 vm_region_basic_info_t basic;
9404
9405 if (*count < VM_REGION_BASIC_INFO_COUNT)
9406 return(KERN_INVALID_ARGUMENT);
9407
9408 basic = (vm_region_basic_info_t) info;
9409 *count = VM_REGION_BASIC_INFO_COUNT;
9410
9411 vm_map_lock_read(map);
9412
9413 start = *address;
9414 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9415 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9416 vm_map_unlock_read(map);
9417 return(KERN_INVALID_ADDRESS);
9418 }
9419 } else {
9420 entry = tmp_entry;
9421 }
9422
9423 start = entry->vme_start;
9424
9425 basic->offset = (uint32_t)entry->offset;
9426 basic->protection = entry->protection;
9427 basic->inheritance = entry->inheritance;
9428 basic->max_protection = entry->max_protection;
9429 basic->behavior = entry->behavior;
9430 basic->user_wired_count = entry->user_wired_count;
9431 basic->reserved = entry->is_sub_map;
9432 *address = start;
9433 *size = (entry->vme_end - start);
9434
9435 if (object_name) *object_name = IP_NULL;
9436 if (entry->is_sub_map) {
9437 basic->shared = FALSE;
9438 } else {
9439 basic->shared = entry->is_shared;
9440 }
9441
9442 vm_map_unlock_read(map);
9443 return(KERN_SUCCESS);
9444 }
9445
9446 case VM_REGION_BASIC_INFO_64:
9447 {
9448 vm_region_basic_info_64_t basic;
9449
9450 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9451 return(KERN_INVALID_ARGUMENT);
9452
9453 basic = (vm_region_basic_info_64_t) info;
9454 *count = VM_REGION_BASIC_INFO_COUNT_64;
9455
9456 vm_map_lock_read(map);
9457
9458 start = *address;
9459 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9460 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9461 vm_map_unlock_read(map);
9462 return(KERN_INVALID_ADDRESS);
9463 }
9464 } else {
9465 entry = tmp_entry;
9466 }
9467
9468 start = entry->vme_start;
9469
9470 basic->offset = entry->offset;
9471 basic->protection = entry->protection;
9472 basic->inheritance = entry->inheritance;
9473 basic->max_protection = entry->max_protection;
9474 basic->behavior = entry->behavior;
9475 basic->user_wired_count = entry->user_wired_count;
9476 basic->reserved = entry->is_sub_map;
9477 *address = start;
9478 *size = (entry->vme_end - start);
9479
9480 if (object_name) *object_name = IP_NULL;
9481 if (entry->is_sub_map) {
9482 basic->shared = FALSE;
9483 } else {
9484 basic->shared = entry->is_shared;
9485 }
9486
9487 vm_map_unlock_read(map);
9488 return(KERN_SUCCESS);
9489 }
9490 case VM_REGION_EXTENDED_INFO:
9491 {
9492 vm_region_extended_info_t extended;
9493
9494 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9495 return(KERN_INVALID_ARGUMENT);
9496
9497 extended = (vm_region_extended_info_t) info;
9498 *count = VM_REGION_EXTENDED_INFO_COUNT;
9499
9500 vm_map_lock_read(map);
9501
9502 start = *address;
9503 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9504 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9505 vm_map_unlock_read(map);
9506 return(KERN_INVALID_ADDRESS);
9507 }
9508 } else {
9509 entry = tmp_entry;
9510 }
9511 start = entry->vme_start;
9512
9513 extended->protection = entry->protection;
9514 extended->user_tag = entry->alias;
9515 extended->pages_resident = 0;
9516 extended->pages_swapped_out = 0;
9517 extended->pages_shared_now_private = 0;
9518 extended->pages_dirtied = 0;
9519 extended->external_pager = 0;
9520 extended->shadow_depth = 0;
9521
9522 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9523
9524 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9525 extended->share_mode = SM_PRIVATE;
9526
9527 if (object_name)
9528 *object_name = IP_NULL;
9529 *address = start;
9530 *size = (entry->vme_end - start);
9531
9532 vm_map_unlock_read(map);
9533 return(KERN_SUCCESS);
9534 }
9535 case VM_REGION_TOP_INFO:
9536 {
9537 vm_region_top_info_t top;
9538
9539 if (*count < VM_REGION_TOP_INFO_COUNT)
9540 return(KERN_INVALID_ARGUMENT);
9541
9542 top = (vm_region_top_info_t) info;
9543 *count = VM_REGION_TOP_INFO_COUNT;
9544
9545 vm_map_lock_read(map);
9546
9547 start = *address;
9548 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9549 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9550 vm_map_unlock_read(map);
9551 return(KERN_INVALID_ADDRESS);
9552 }
9553 } else {
9554 entry = tmp_entry;
9555
9556 }
9557 start = entry->vme_start;
9558
9559 top->private_pages_resident = 0;
9560 top->shared_pages_resident = 0;
9561
9562 vm_map_region_top_walk(entry, top);
9563
9564 if (object_name)
9565 *object_name = IP_NULL;
9566 *address = start;
9567 *size = (entry->vme_end - start);
9568
9569 vm_map_unlock_read(map);
9570 return(KERN_SUCCESS);
9571 }
9572 default:
9573 return(KERN_INVALID_ARGUMENT);
9574 }
9575 }
9576
9577 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9578 MIN((entry_size), \
9579 ((obj)->all_reusable ? \
9580 (obj)->wired_page_count : \
9581 (obj)->resident_page_count - (obj)->reusable_page_count))
9582
9583 void
9584 vm_map_region_top_walk(
9585 vm_map_entry_t entry,
9586 vm_region_top_info_t top)
9587 {
9588
9589 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9590 top->share_mode = SM_EMPTY;
9591 top->ref_count = 0;
9592 top->obj_id = 0;
9593 return;
9594 }
9595
9596 {
9597 struct vm_object *obj, *tmp_obj;
9598 int ref_count;
9599 uint32_t entry_size;
9600
9601 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9602
9603 obj = entry->object.vm_object;
9604
9605 vm_object_lock(obj);
9606
9607 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9608 ref_count--;
9609
9610 assert(obj->reusable_page_count <= obj->resident_page_count);
9611 if (obj->shadow) {
9612 if (ref_count == 1)
9613 top->private_pages_resident =
9614 OBJ_RESIDENT_COUNT(obj, entry_size);
9615 else
9616 top->shared_pages_resident =
9617 OBJ_RESIDENT_COUNT(obj, entry_size);
9618 top->ref_count = ref_count;
9619 top->share_mode = SM_COW;
9620
9621 while ((tmp_obj = obj->shadow)) {
9622 vm_object_lock(tmp_obj);
9623 vm_object_unlock(obj);
9624 obj = tmp_obj;
9625
9626 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9627 ref_count--;
9628
9629 assert(obj->reusable_page_count <= obj->resident_page_count);
9630 top->shared_pages_resident +=
9631 OBJ_RESIDENT_COUNT(obj, entry_size);
9632 top->ref_count += ref_count - 1;
9633 }
9634 } else {
9635 if (entry->superpage_size) {
9636 top->share_mode = SM_LARGE_PAGE;
9637 top->shared_pages_resident = 0;
9638 top->private_pages_resident = entry_size;
9639 } else if (entry->needs_copy) {
9640 top->share_mode = SM_COW;
9641 top->shared_pages_resident =
9642 OBJ_RESIDENT_COUNT(obj, entry_size);
9643 } else {
9644 if (ref_count == 1 ||
9645 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9646 top->share_mode = SM_PRIVATE;
9647 top->private_pages_resident =
9648 OBJ_RESIDENT_COUNT(obj,
9649 entry_size);
9650 } else {
9651 top->share_mode = SM_SHARED;
9652 top->shared_pages_resident =
9653 OBJ_RESIDENT_COUNT(obj,
9654 entry_size);
9655 }
9656 }
9657 top->ref_count = ref_count;
9658 }
9659 /* XXX K64: obj_id will be truncated */
9660 top->obj_id = (unsigned int) (uintptr_t)obj;
9661
9662 vm_object_unlock(obj);
9663 }
9664 }
9665
9666 void
9667 vm_map_region_walk(
9668 vm_map_t map,
9669 vm_map_offset_t va,
9670 vm_map_entry_t entry,
9671 vm_object_offset_t offset,
9672 vm_object_size_t range,
9673 vm_region_extended_info_t extended,
9674 boolean_t look_for_pages)
9675 {
9676 register struct vm_object *obj, *tmp_obj;
9677 register vm_map_offset_t last_offset;
9678 register int i;
9679 register int ref_count;
9680 struct vm_object *shadow_object;
9681 int shadow_depth;
9682
9683 if ((entry->object.vm_object == 0) ||
9684 (entry->is_sub_map) ||
9685 (entry->object.vm_object->phys_contiguous &&
9686 !entry->superpage_size)) {
9687 extended->share_mode = SM_EMPTY;
9688 extended->ref_count = 0;
9689 return;
9690 }
9691
9692 if (entry->superpage_size) {
9693 extended->shadow_depth = 0;
9694 extended->share_mode = SM_LARGE_PAGE;
9695 extended->ref_count = 1;
9696 extended->external_pager = 0;
9697 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9698 extended->shadow_depth = 0;
9699 return;
9700 }
9701
9702 {
9703 obj = entry->object.vm_object;
9704
9705 vm_object_lock(obj);
9706
9707 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9708 ref_count--;
9709
9710 if (look_for_pages) {
9711 for (last_offset = offset + range;
9712 offset < last_offset;
9713 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9714 vm_map_region_look_for_page(map, va, obj,
9715 offset, ref_count,
9716 0, extended);
9717 } else {
9718 shadow_object = obj->shadow;
9719 shadow_depth = 0;
9720
9721 if ( !(obj->pager_trusted) && !(obj->internal))
9722 extended->external_pager = 1;
9723
9724 if (shadow_object != VM_OBJECT_NULL) {
9725 vm_object_lock(shadow_object);
9726 for (;
9727 shadow_object != VM_OBJECT_NULL;
9728 shadow_depth++) {
9729 vm_object_t next_shadow;
9730
9731 if ( !(shadow_object->pager_trusted) &&
9732 !(shadow_object->internal))
9733 extended->external_pager = 1;
9734
9735 next_shadow = shadow_object->shadow;
9736 if (next_shadow) {
9737 vm_object_lock(next_shadow);
9738 }
9739 vm_object_unlock(shadow_object);
9740 shadow_object = next_shadow;
9741 }
9742 }
9743 extended->shadow_depth = shadow_depth;
9744 }
9745
9746 if (extended->shadow_depth || entry->needs_copy)
9747 extended->share_mode = SM_COW;
9748 else {
9749 if (ref_count == 1)
9750 extended->share_mode = SM_PRIVATE;
9751 else {
9752 if (obj->true_share)
9753 extended->share_mode = SM_TRUESHARED;
9754 else
9755 extended->share_mode = SM_SHARED;
9756 }
9757 }
9758 extended->ref_count = ref_count - extended->shadow_depth;
9759
9760 for (i = 0; i < extended->shadow_depth; i++) {
9761 if ((tmp_obj = obj->shadow) == 0)
9762 break;
9763 vm_object_lock(tmp_obj);
9764 vm_object_unlock(obj);
9765
9766 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9767 ref_count--;
9768
9769 extended->ref_count += ref_count;
9770 obj = tmp_obj;
9771 }
9772 vm_object_unlock(obj);
9773
9774 if (extended->share_mode == SM_SHARED) {
9775 register vm_map_entry_t cur;
9776 register vm_map_entry_t last;
9777 int my_refs;
9778
9779 obj = entry->object.vm_object;
9780 last = vm_map_to_entry(map);
9781 my_refs = 0;
9782
9783 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9784 ref_count--;
9785 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9786 my_refs += vm_map_region_count_obj_refs(cur, obj);
9787
9788 if (my_refs == ref_count)
9789 extended->share_mode = SM_PRIVATE_ALIASED;
9790 else if (my_refs > 1)
9791 extended->share_mode = SM_SHARED_ALIASED;
9792 }
9793 }
9794 }
9795
9796
9797 /* object is locked on entry and locked on return */
9798
9799
9800 static void
9801 vm_map_region_look_for_page(
9802 __unused vm_map_t map,
9803 __unused vm_map_offset_t va,
9804 vm_object_t object,
9805 vm_object_offset_t offset,
9806 int max_refcnt,
9807 int depth,
9808 vm_region_extended_info_t extended)
9809 {
9810 register vm_page_t p;
9811 register vm_object_t shadow;
9812 register int ref_count;
9813 vm_object_t caller_object;
9814 #if MACH_PAGEMAP
9815 kern_return_t kr;
9816 #endif
9817 shadow = object->shadow;
9818 caller_object = object;
9819
9820
9821 while (TRUE) {
9822
9823 if ( !(object->pager_trusted) && !(object->internal))
9824 extended->external_pager = 1;
9825
9826 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9827 if (shadow && (max_refcnt == 1))
9828 extended->pages_shared_now_private++;
9829
9830 if (!p->fictitious &&
9831 (p->dirty || pmap_is_modified(p->phys_page)))
9832 extended->pages_dirtied++;
9833
9834 extended->pages_resident++;
9835
9836 if(object != caller_object)
9837 vm_object_unlock(object);
9838
9839 return;
9840 }
9841 #if MACH_PAGEMAP
9842 if (object->existence_map) {
9843 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9844
9845 extended->pages_swapped_out++;
9846
9847 if(object != caller_object)
9848 vm_object_unlock(object);
9849
9850 return;
9851 }
9852 } else if (object->internal &&
9853 object->alive &&
9854 !object->terminating &&
9855 object->pager_ready) {
9856
9857 memory_object_t pager;
9858
9859 vm_object_paging_begin(object);
9860 pager = object->pager;
9861 vm_object_unlock(object);
9862
9863 kr = memory_object_data_request(
9864 pager,
9865 offset + object->paging_offset,
9866 0, /* just poke the pager */
9867 VM_PROT_READ,
9868 NULL);
9869
9870 vm_object_lock(object);
9871 vm_object_paging_end(object);
9872
9873 if (kr == KERN_SUCCESS) {
9874 /* the pager has that page */
9875 extended->pages_swapped_out++;
9876 if (object != caller_object)
9877 vm_object_unlock(object);
9878 return;
9879 }
9880 }
9881 #endif /* MACH_PAGEMAP */
9882
9883 if (shadow) {
9884 vm_object_lock(shadow);
9885
9886 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9887 ref_count--;
9888
9889 if (++depth > extended->shadow_depth)
9890 extended->shadow_depth = depth;
9891
9892 if (ref_count > max_refcnt)
9893 max_refcnt = ref_count;
9894
9895 if(object != caller_object)
9896 vm_object_unlock(object);
9897
9898 offset = offset + object->vo_shadow_offset;
9899 object = shadow;
9900 shadow = object->shadow;
9901 continue;
9902 }
9903 if(object != caller_object)
9904 vm_object_unlock(object);
9905 break;
9906 }
9907 }
9908
9909 static int
9910 vm_map_region_count_obj_refs(
9911 vm_map_entry_t entry,
9912 vm_object_t object)
9913 {
9914 register int ref_count;
9915 register vm_object_t chk_obj;
9916 register vm_object_t tmp_obj;
9917
9918 if (entry->object.vm_object == 0)
9919 return(0);
9920
9921 if (entry->is_sub_map)
9922 return(0);
9923 else {
9924 ref_count = 0;
9925
9926 chk_obj = entry->object.vm_object;
9927 vm_object_lock(chk_obj);
9928
9929 while (chk_obj) {
9930 if (chk_obj == object)
9931 ref_count++;
9932 tmp_obj = chk_obj->shadow;
9933 if (tmp_obj)
9934 vm_object_lock(tmp_obj);
9935 vm_object_unlock(chk_obj);
9936
9937 chk_obj = tmp_obj;
9938 }
9939 }
9940 return(ref_count);
9941 }
9942
9943
9944 /*
9945 * Routine: vm_map_simplify
9946 *
9947 * Description:
9948 * Attempt to simplify the map representation in
9949 * the vicinity of the given starting address.
9950 * Note:
9951 * This routine is intended primarily to keep the
9952 * kernel maps more compact -- they generally don't
9953 * benefit from the "expand a map entry" technology
9954 * at allocation time because the adjacent entry
9955 * is often wired down.
9956 */
9957 void
9958 vm_map_simplify_entry(
9959 vm_map_t map,
9960 vm_map_entry_t this_entry)
9961 {
9962 vm_map_entry_t prev_entry;
9963
9964 counter(c_vm_map_simplify_entry_called++);
9965
9966 prev_entry = this_entry->vme_prev;
9967
9968 if ((this_entry != vm_map_to_entry(map)) &&
9969 (prev_entry != vm_map_to_entry(map)) &&
9970
9971 (prev_entry->vme_end == this_entry->vme_start) &&
9972
9973 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9974
9975 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9976 ((prev_entry->offset + (prev_entry->vme_end -
9977 prev_entry->vme_start))
9978 == this_entry->offset) &&
9979
9980 (prev_entry->inheritance == this_entry->inheritance) &&
9981 (prev_entry->protection == this_entry->protection) &&
9982 (prev_entry->max_protection == this_entry->max_protection) &&
9983 (prev_entry->behavior == this_entry->behavior) &&
9984 (prev_entry->alias == this_entry->alias) &&
9985 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9986 (prev_entry->no_cache == this_entry->no_cache) &&
9987 (prev_entry->wired_count == this_entry->wired_count) &&
9988 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9989
9990 (prev_entry->needs_copy == this_entry->needs_copy) &&
9991 (prev_entry->permanent == this_entry->permanent) &&
9992
9993 (prev_entry->use_pmap == FALSE) &&
9994 (this_entry->use_pmap == FALSE) &&
9995 (prev_entry->in_transition == FALSE) &&
9996 (this_entry->in_transition == FALSE) &&
9997 (prev_entry->needs_wakeup == FALSE) &&
9998 (this_entry->needs_wakeup == FALSE) &&
9999 (prev_entry->is_shared == FALSE) &&
10000 (this_entry->is_shared == FALSE)
10001 ) {
10002 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10003 this_entry->vme_start = prev_entry->vme_start;
10004 this_entry->offset = prev_entry->offset;
10005 if (prev_entry->is_sub_map) {
10006 vm_map_deallocate(prev_entry->object.sub_map);
10007 } else {
10008 vm_object_deallocate(prev_entry->object.vm_object);
10009 }
10010 vm_map_entry_dispose(map, prev_entry);
10011 SAVE_HINT_MAP_WRITE(map, this_entry);
10012 counter(c_vm_map_simplified++);
10013 }
10014 }
10015
10016 void
10017 vm_map_simplify(
10018 vm_map_t map,
10019 vm_map_offset_t start)
10020 {
10021 vm_map_entry_t this_entry;
10022
10023 vm_map_lock(map);
10024 if (vm_map_lookup_entry(map, start, &this_entry)) {
10025 vm_map_simplify_entry(map, this_entry);
10026 vm_map_simplify_entry(map, this_entry->vme_next);
10027 }
10028 counter(c_vm_map_simplify_called++);
10029 vm_map_unlock(map);
10030 }
10031
10032 static void
10033 vm_map_simplify_range(
10034 vm_map_t map,
10035 vm_map_offset_t start,
10036 vm_map_offset_t end)
10037 {
10038 vm_map_entry_t entry;
10039
10040 /*
10041 * The map should be locked (for "write") by the caller.
10042 */
10043
10044 if (start >= end) {
10045 /* invalid address range */
10046 return;
10047 }
10048
10049 start = vm_map_trunc_page(start);
10050 end = vm_map_round_page(end);
10051
10052 if (!vm_map_lookup_entry(map, start, &entry)) {
10053 /* "start" is not mapped and "entry" ends before "start" */
10054 if (entry == vm_map_to_entry(map)) {
10055 /* start with first entry in the map */
10056 entry = vm_map_first_entry(map);
10057 } else {
10058 /* start with next entry */
10059 entry = entry->vme_next;
10060 }
10061 }
10062
10063 while (entry != vm_map_to_entry(map) &&
10064 entry->vme_start <= end) {
10065 /* try and coalesce "entry" with its previous entry */
10066 vm_map_simplify_entry(map, entry);
10067 entry = entry->vme_next;
10068 }
10069 }
10070
10071
10072 /*
10073 * Routine: vm_map_machine_attribute
10074 * Purpose:
10075 * Provide machine-specific attributes to mappings,
10076 * such as cachability etc. for machines that provide
10077 * them. NUMA architectures and machines with big/strange
10078 * caches will use this.
10079 * Note:
10080 * Responsibilities for locking and checking are handled here,
10081 * everything else in the pmap module. If any non-volatile
10082 * information must be kept, the pmap module should handle
10083 * it itself. [This assumes that attributes do not
10084 * need to be inherited, which seems ok to me]
10085 */
10086 kern_return_t
10087 vm_map_machine_attribute(
10088 vm_map_t map,
10089 vm_map_offset_t start,
10090 vm_map_offset_t end,
10091 vm_machine_attribute_t attribute,
10092 vm_machine_attribute_val_t* value) /* IN/OUT */
10093 {
10094 kern_return_t ret;
10095 vm_map_size_t sync_size;
10096 vm_map_entry_t entry;
10097
10098 if (start < vm_map_min(map) || end > vm_map_max(map))
10099 return KERN_INVALID_ADDRESS;
10100
10101 /* Figure how much memory we need to flush (in page increments) */
10102 sync_size = end - start;
10103
10104 vm_map_lock(map);
10105
10106 if (attribute != MATTR_CACHE) {
10107 /* If we don't have to find physical addresses, we */
10108 /* don't have to do an explicit traversal here. */
10109 ret = pmap_attribute(map->pmap, start, end-start,
10110 attribute, value);
10111 vm_map_unlock(map);
10112 return ret;
10113 }
10114
10115 ret = KERN_SUCCESS; /* Assume it all worked */
10116
10117 while(sync_size) {
10118 if (vm_map_lookup_entry(map, start, &entry)) {
10119 vm_map_size_t sub_size;
10120 if((entry->vme_end - start) > sync_size) {
10121 sub_size = sync_size;
10122 sync_size = 0;
10123 } else {
10124 sub_size = entry->vme_end - start;
10125 sync_size -= sub_size;
10126 }
10127 if(entry->is_sub_map) {
10128 vm_map_offset_t sub_start;
10129 vm_map_offset_t sub_end;
10130
10131 sub_start = (start - entry->vme_start)
10132 + entry->offset;
10133 sub_end = sub_start + sub_size;
10134 vm_map_machine_attribute(
10135 entry->object.sub_map,
10136 sub_start,
10137 sub_end,
10138 attribute, value);
10139 } else {
10140 if(entry->object.vm_object) {
10141 vm_page_t m;
10142 vm_object_t object;
10143 vm_object_t base_object;
10144 vm_object_t last_object;
10145 vm_object_offset_t offset;
10146 vm_object_offset_t base_offset;
10147 vm_map_size_t range;
10148 range = sub_size;
10149 offset = (start - entry->vme_start)
10150 + entry->offset;
10151 base_offset = offset;
10152 object = entry->object.vm_object;
10153 base_object = object;
10154 last_object = NULL;
10155
10156 vm_object_lock(object);
10157
10158 while (range) {
10159 m = vm_page_lookup(
10160 object, offset);
10161
10162 if (m && !m->fictitious) {
10163 ret =
10164 pmap_attribute_cache_sync(
10165 m->phys_page,
10166 PAGE_SIZE,
10167 attribute, value);
10168
10169 } else if (object->shadow) {
10170 offset = offset + object->vo_shadow_offset;
10171 last_object = object;
10172 object = object->shadow;
10173 vm_object_lock(last_object->shadow);
10174 vm_object_unlock(last_object);
10175 continue;
10176 }
10177 range -= PAGE_SIZE;
10178
10179 if (base_object != object) {
10180 vm_object_unlock(object);
10181 vm_object_lock(base_object);
10182 object = base_object;
10183 }
10184 /* Bump to the next page */
10185 base_offset += PAGE_SIZE;
10186 offset = base_offset;
10187 }
10188 vm_object_unlock(object);
10189 }
10190 }
10191 start += sub_size;
10192 } else {
10193 vm_map_unlock(map);
10194 return KERN_FAILURE;
10195 }
10196
10197 }
10198
10199 vm_map_unlock(map);
10200
10201 return ret;
10202 }
10203
10204 /*
10205 * vm_map_behavior_set:
10206 *
10207 * Sets the paging reference behavior of the specified address
10208 * range in the target map. Paging reference behavior affects
10209 * how pagein operations resulting from faults on the map will be
10210 * clustered.
10211 */
10212 kern_return_t
10213 vm_map_behavior_set(
10214 vm_map_t map,
10215 vm_map_offset_t start,
10216 vm_map_offset_t end,
10217 vm_behavior_t new_behavior)
10218 {
10219 register vm_map_entry_t entry;
10220 vm_map_entry_t temp_entry;
10221
10222 XPR(XPR_VM_MAP,
10223 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10224 map, start, end, new_behavior, 0);
10225
10226 if (start > end ||
10227 start < vm_map_min(map) ||
10228 end > vm_map_max(map)) {
10229 return KERN_NO_SPACE;
10230 }
10231
10232 switch (new_behavior) {
10233
10234 /*
10235 * This first block of behaviors all set a persistent state on the specified
10236 * memory range. All we have to do here is to record the desired behavior
10237 * in the vm_map_entry_t's.
10238 */
10239
10240 case VM_BEHAVIOR_DEFAULT:
10241 case VM_BEHAVIOR_RANDOM:
10242 case VM_BEHAVIOR_SEQUENTIAL:
10243 case VM_BEHAVIOR_RSEQNTL:
10244 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10245 vm_map_lock(map);
10246
10247 /*
10248 * The entire address range must be valid for the map.
10249 * Note that vm_map_range_check() does a
10250 * vm_map_lookup_entry() internally and returns the
10251 * entry containing the start of the address range if
10252 * the entire range is valid.
10253 */
10254 if (vm_map_range_check(map, start, end, &temp_entry)) {
10255 entry = temp_entry;
10256 vm_map_clip_start(map, entry, start);
10257 }
10258 else {
10259 vm_map_unlock(map);
10260 return(KERN_INVALID_ADDRESS);
10261 }
10262
10263 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10264 vm_map_clip_end(map, entry, end);
10265 assert(!entry->use_pmap);
10266
10267 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10268 entry->zero_wired_pages = TRUE;
10269 } else {
10270 entry->behavior = new_behavior;
10271 }
10272 entry = entry->vme_next;
10273 }
10274
10275 vm_map_unlock(map);
10276 break;
10277
10278 /*
10279 * The rest of these are different from the above in that they cause
10280 * an immediate action to take place as opposed to setting a behavior that
10281 * affects future actions.
10282 */
10283
10284 case VM_BEHAVIOR_WILLNEED:
10285 return vm_map_willneed(map, start, end);
10286
10287 case VM_BEHAVIOR_DONTNEED:
10288 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10289
10290 case VM_BEHAVIOR_FREE:
10291 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10292
10293 case VM_BEHAVIOR_REUSABLE:
10294 return vm_map_reusable_pages(map, start, end);
10295
10296 case VM_BEHAVIOR_REUSE:
10297 return vm_map_reuse_pages(map, start, end);
10298
10299 case VM_BEHAVIOR_CAN_REUSE:
10300 return vm_map_can_reuse(map, start, end);
10301
10302 default:
10303 return(KERN_INVALID_ARGUMENT);
10304 }
10305
10306 return(KERN_SUCCESS);
10307 }
10308
10309
10310 /*
10311 * Internals for madvise(MADV_WILLNEED) system call.
10312 *
10313 * The present implementation is to do a read-ahead if the mapping corresponds
10314 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10315 * and basically ignore the "advice" (which we are always free to do).
10316 */
10317
10318
10319 static kern_return_t
10320 vm_map_willneed(
10321 vm_map_t map,
10322 vm_map_offset_t start,
10323 vm_map_offset_t end
10324 )
10325 {
10326 vm_map_entry_t entry;
10327 vm_object_t object;
10328 memory_object_t pager;
10329 struct vm_object_fault_info fault_info;
10330 kern_return_t kr;
10331 vm_object_size_t len;
10332 vm_object_offset_t offset;
10333
10334 /*
10335 * Fill in static values in fault_info. Several fields get ignored by the code
10336 * we call, but we'll fill them in anyway since uninitialized fields are bad
10337 * when it comes to future backwards compatibility.
10338 */
10339
10340 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10341 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10342 fault_info.no_cache = FALSE; /* ignored value */
10343 fault_info.stealth = TRUE;
10344 fault_info.io_sync = FALSE;
10345 fault_info.cs_bypass = FALSE;
10346 fault_info.mark_zf_absent = FALSE;
10347
10348 /*
10349 * The MADV_WILLNEED operation doesn't require any changes to the
10350 * vm_map_entry_t's, so the read lock is sufficient.
10351 */
10352
10353 vm_map_lock_read(map);
10354
10355 /*
10356 * The madvise semantics require that the address range be fully
10357 * allocated with no holes. Otherwise, we're required to return
10358 * an error.
10359 */
10360
10361 if (! vm_map_range_check(map, start, end, &entry)) {
10362 vm_map_unlock_read(map);
10363 return KERN_INVALID_ADDRESS;
10364 }
10365
10366 /*
10367 * Examine each vm_map_entry_t in the range.
10368 */
10369 for (; entry != vm_map_to_entry(map) && start < end; ) {
10370
10371 /*
10372 * The first time through, the start address could be anywhere
10373 * within the vm_map_entry we found. So adjust the offset to
10374 * correspond. After that, the offset will always be zero to
10375 * correspond to the beginning of the current vm_map_entry.
10376 */
10377 offset = (start - entry->vme_start) + entry->offset;
10378
10379 /*
10380 * Set the length so we don't go beyond the end of the
10381 * map_entry or beyond the end of the range we were given.
10382 * This range could span also multiple map entries all of which
10383 * map different files, so make sure we only do the right amount
10384 * of I/O for each object. Note that it's possible for there
10385 * to be multiple map entries all referring to the same object
10386 * but with different page permissions, but it's not worth
10387 * trying to optimize that case.
10388 */
10389 len = MIN(entry->vme_end - start, end - start);
10390
10391 if ((vm_size_t) len != len) {
10392 /* 32-bit overflow */
10393 len = (vm_size_t) (0 - PAGE_SIZE);
10394 }
10395 fault_info.cluster_size = (vm_size_t) len;
10396 fault_info.lo_offset = offset;
10397 fault_info.hi_offset = offset + len;
10398 fault_info.user_tag = entry->alias;
10399
10400 /*
10401 * If there's no read permission to this mapping, then just
10402 * skip it.
10403 */
10404 if ((entry->protection & VM_PROT_READ) == 0) {
10405 entry = entry->vme_next;
10406 start = entry->vme_start;
10407 continue;
10408 }
10409
10410 /*
10411 * Find the file object backing this map entry. If there is
10412 * none, then we simply ignore the "will need" advice for this
10413 * entry and go on to the next one.
10414 */
10415 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10416 entry = entry->vme_next;
10417 start = entry->vme_start;
10418 continue;
10419 }
10420
10421 /*
10422 * The data_request() could take a long time, so let's
10423 * release the map lock to avoid blocking other threads.
10424 */
10425 vm_map_unlock_read(map);
10426
10427 vm_object_paging_begin(object);
10428 pager = object->pager;
10429 vm_object_unlock(object);
10430
10431 /*
10432 * Get the data from the object asynchronously.
10433 *
10434 * Note that memory_object_data_request() places limits on the
10435 * amount of I/O it will do. Regardless of the len we
10436 * specified, it won't do more than MAX_UPL_TRANSFER and it
10437 * silently truncates the len to that size. This isn't
10438 * necessarily bad since madvise shouldn't really be used to
10439 * page in unlimited amounts of data. Other Unix variants
10440 * limit the willneed case as well. If this turns out to be an
10441 * issue for developers, then we can always adjust the policy
10442 * here and still be backwards compatible since this is all
10443 * just "advice".
10444 */
10445 kr = memory_object_data_request(
10446 pager,
10447 offset + object->paging_offset,
10448 0, /* ignored */
10449 VM_PROT_READ,
10450 (memory_object_fault_info_t)&fault_info);
10451
10452 vm_object_lock(object);
10453 vm_object_paging_end(object);
10454 vm_object_unlock(object);
10455
10456 /*
10457 * If we couldn't do the I/O for some reason, just give up on
10458 * the madvise. We still return success to the user since
10459 * madvise isn't supposed to fail when the advice can't be
10460 * taken.
10461 */
10462 if (kr != KERN_SUCCESS) {
10463 return KERN_SUCCESS;
10464 }
10465
10466 start += len;
10467 if (start >= end) {
10468 /* done */
10469 return KERN_SUCCESS;
10470 }
10471
10472 /* look up next entry */
10473 vm_map_lock_read(map);
10474 if (! vm_map_lookup_entry(map, start, &entry)) {
10475 /*
10476 * There's a new hole in the address range.
10477 */
10478 vm_map_unlock_read(map);
10479 return KERN_INVALID_ADDRESS;
10480 }
10481 }
10482
10483 vm_map_unlock_read(map);
10484 return KERN_SUCCESS;
10485 }
10486
10487 static boolean_t
10488 vm_map_entry_is_reusable(
10489 vm_map_entry_t entry)
10490 {
10491 vm_object_t object;
10492
10493 if (entry->is_shared ||
10494 entry->is_sub_map ||
10495 entry->in_transition ||
10496 entry->protection != VM_PROT_DEFAULT ||
10497 entry->max_protection != VM_PROT_ALL ||
10498 entry->inheritance != VM_INHERIT_DEFAULT ||
10499 entry->no_cache ||
10500 entry->permanent ||
10501 entry->superpage_size != 0 ||
10502 entry->zero_wired_pages ||
10503 entry->wired_count != 0 ||
10504 entry->user_wired_count != 0) {
10505 return FALSE;
10506 }
10507
10508 object = entry->object.vm_object;
10509 if (object == VM_OBJECT_NULL) {
10510 return TRUE;
10511 }
10512 if (object->ref_count == 1 &&
10513 object->wired_page_count == 0 &&
10514 object->copy == VM_OBJECT_NULL &&
10515 object->shadow == VM_OBJECT_NULL &&
10516 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10517 object->internal &&
10518 !object->true_share &&
10519 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10520 !object->code_signed) {
10521 return TRUE;
10522 }
10523 return FALSE;
10524
10525
10526 }
10527
10528 static kern_return_t
10529 vm_map_reuse_pages(
10530 vm_map_t map,
10531 vm_map_offset_t start,
10532 vm_map_offset_t end)
10533 {
10534 vm_map_entry_t entry;
10535 vm_object_t object;
10536 vm_object_offset_t start_offset, end_offset;
10537
10538 /*
10539 * The MADV_REUSE operation doesn't require any changes to the
10540 * vm_map_entry_t's, so the read lock is sufficient.
10541 */
10542
10543 vm_map_lock_read(map);
10544
10545 /*
10546 * The madvise semantics require that the address range be fully
10547 * allocated with no holes. Otherwise, we're required to return
10548 * an error.
10549 */
10550
10551 if (!vm_map_range_check(map, start, end, &entry)) {
10552 vm_map_unlock_read(map);
10553 vm_page_stats_reusable.reuse_pages_failure++;
10554 return KERN_INVALID_ADDRESS;
10555 }
10556
10557 /*
10558 * Examine each vm_map_entry_t in the range.
10559 */
10560 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10561 entry = entry->vme_next) {
10562 /*
10563 * Sanity check on the VM map entry.
10564 */
10565 if (! vm_map_entry_is_reusable(entry)) {
10566 vm_map_unlock_read(map);
10567 vm_page_stats_reusable.reuse_pages_failure++;
10568 return KERN_INVALID_ADDRESS;
10569 }
10570
10571 /*
10572 * The first time through, the start address could be anywhere
10573 * within the vm_map_entry we found. So adjust the offset to
10574 * correspond.
10575 */
10576 if (entry->vme_start < start) {
10577 start_offset = start - entry->vme_start;
10578 } else {
10579 start_offset = 0;
10580 }
10581 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10582 start_offset += entry->offset;
10583 end_offset += entry->offset;
10584
10585 object = entry->object.vm_object;
10586 if (object != VM_OBJECT_NULL) {
10587 vm_object_lock(object);
10588 vm_object_reuse_pages(object, start_offset, end_offset,
10589 TRUE);
10590 vm_object_unlock(object);
10591 }
10592
10593 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10594 /*
10595 * XXX
10596 * We do not hold the VM map exclusively here.
10597 * The "alias" field is not that critical, so it's
10598 * safe to update it here, as long as it is the only
10599 * one that can be modified while holding the VM map
10600 * "shared".
10601 */
10602 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10603 }
10604 }
10605
10606 vm_map_unlock_read(map);
10607 vm_page_stats_reusable.reuse_pages_success++;
10608 return KERN_SUCCESS;
10609 }
10610
10611
10612 static kern_return_t
10613 vm_map_reusable_pages(
10614 vm_map_t map,
10615 vm_map_offset_t start,
10616 vm_map_offset_t end)
10617 {
10618 vm_map_entry_t entry;
10619 vm_object_t object;
10620 vm_object_offset_t start_offset, end_offset;
10621
10622 /*
10623 * The MADV_REUSABLE operation doesn't require any changes to the
10624 * vm_map_entry_t's, so the read lock is sufficient.
10625 */
10626
10627 vm_map_lock_read(map);
10628
10629 /*
10630 * The madvise semantics require that the address range be fully
10631 * allocated with no holes. Otherwise, we're required to return
10632 * an error.
10633 */
10634
10635 if (!vm_map_range_check(map, start, end, &entry)) {
10636 vm_map_unlock_read(map);
10637 vm_page_stats_reusable.reusable_pages_failure++;
10638 return KERN_INVALID_ADDRESS;
10639 }
10640
10641 /*
10642 * Examine each vm_map_entry_t in the range.
10643 */
10644 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10645 entry = entry->vme_next) {
10646 int kill_pages = 0;
10647
10648 /*
10649 * Sanity check on the VM map entry.
10650 */
10651 if (! vm_map_entry_is_reusable(entry)) {
10652 vm_map_unlock_read(map);
10653 vm_page_stats_reusable.reusable_pages_failure++;
10654 return KERN_INVALID_ADDRESS;
10655 }
10656
10657 /*
10658 * The first time through, the start address could be anywhere
10659 * within the vm_map_entry we found. So adjust the offset to
10660 * correspond.
10661 */
10662 if (entry->vme_start < start) {
10663 start_offset = start - entry->vme_start;
10664 } else {
10665 start_offset = 0;
10666 }
10667 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10668 start_offset += entry->offset;
10669 end_offset += entry->offset;
10670
10671 object = entry->object.vm_object;
10672 if (object == VM_OBJECT_NULL)
10673 continue;
10674
10675
10676 vm_object_lock(object);
10677 if (object->ref_count == 1 && !object->shadow)
10678 kill_pages = 1;
10679 else
10680 kill_pages = -1;
10681 if (kill_pages != -1) {
10682 vm_object_deactivate_pages(object,
10683 start_offset,
10684 end_offset - start_offset,
10685 kill_pages,
10686 TRUE /*reusable_pages*/);
10687 } else {
10688 vm_page_stats_reusable.reusable_pages_shared++;
10689 }
10690 vm_object_unlock(object);
10691
10692 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10693 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10694 /*
10695 * XXX
10696 * We do not hold the VM map exclusively here.
10697 * The "alias" field is not that critical, so it's
10698 * safe to update it here, as long as it is the only
10699 * one that can be modified while holding the VM map
10700 * "shared".
10701 */
10702 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10703 }
10704 }
10705
10706 vm_map_unlock_read(map);
10707 vm_page_stats_reusable.reusable_pages_success++;
10708 return KERN_SUCCESS;
10709 }
10710
10711
10712 static kern_return_t
10713 vm_map_can_reuse(
10714 vm_map_t map,
10715 vm_map_offset_t start,
10716 vm_map_offset_t end)
10717 {
10718 vm_map_entry_t entry;
10719
10720 /*
10721 * The MADV_REUSABLE operation doesn't require any changes to the
10722 * vm_map_entry_t's, so the read lock is sufficient.
10723 */
10724
10725 vm_map_lock_read(map);
10726
10727 /*
10728 * The madvise semantics require that the address range be fully
10729 * allocated with no holes. Otherwise, we're required to return
10730 * an error.
10731 */
10732
10733 if (!vm_map_range_check(map, start, end, &entry)) {
10734 vm_map_unlock_read(map);
10735 vm_page_stats_reusable.can_reuse_failure++;
10736 return KERN_INVALID_ADDRESS;
10737 }
10738
10739 /*
10740 * Examine each vm_map_entry_t in the range.
10741 */
10742 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10743 entry = entry->vme_next) {
10744 /*
10745 * Sanity check on the VM map entry.
10746 */
10747 if (! vm_map_entry_is_reusable(entry)) {
10748 vm_map_unlock_read(map);
10749 vm_page_stats_reusable.can_reuse_failure++;
10750 return KERN_INVALID_ADDRESS;
10751 }
10752 }
10753
10754 vm_map_unlock_read(map);
10755 vm_page_stats_reusable.can_reuse_success++;
10756 return KERN_SUCCESS;
10757 }
10758
10759
10760
10761 #include <mach_kdb.h>
10762 #if MACH_KDB
10763 #include <ddb/db_output.h>
10764 #include <vm/vm_print.h>
10765
10766 #define printf db_printf
10767
10768 /*
10769 * Forward declarations for internal functions.
10770 */
10771 extern void vm_map_links_print(
10772 struct vm_map_links *links);
10773
10774 extern void vm_map_header_print(
10775 struct vm_map_header *header);
10776
10777 extern void vm_map_entry_print(
10778 vm_map_entry_t entry);
10779
10780 extern void vm_follow_entry(
10781 vm_map_entry_t entry);
10782
10783 extern void vm_follow_map(
10784 vm_map_t map);
10785
10786 /*
10787 * vm_map_links_print: [ debug ]
10788 */
10789 void
10790 vm_map_links_print(
10791 struct vm_map_links *links)
10792 {
10793 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10794 links->prev,
10795 links->next,
10796 (unsigned long long)links->start,
10797 (unsigned long long)links->end);
10798 }
10799
10800 /*
10801 * vm_map_header_print: [ debug ]
10802 */
10803 void
10804 vm_map_header_print(
10805 struct vm_map_header *header)
10806 {
10807 vm_map_links_print(&header->links);
10808 iprintf("nentries = %08X, %sentries_pageable\n",
10809 header->nentries,
10810 (header->entries_pageable ? "" : "!"));
10811 }
10812
10813 /*
10814 * vm_follow_entry: [ debug ]
10815 */
10816 void
10817 vm_follow_entry(
10818 vm_map_entry_t entry)
10819 {
10820 int shadows;
10821
10822 iprintf("map entry %08X\n", entry);
10823
10824 db_indent += 2;
10825
10826 shadows = vm_follow_object(entry->object.vm_object);
10827 iprintf("Total objects : %d\n",shadows);
10828
10829 db_indent -= 2;
10830 }
10831
10832 /*
10833 * vm_map_entry_print: [ debug ]
10834 */
10835 void
10836 vm_map_entry_print(
10837 register vm_map_entry_t entry)
10838 {
10839 static const char *inheritance_name[4] =
10840 { "share", "copy", "none", "?"};
10841 static const char *behavior_name[4] =
10842 { "dflt", "rand", "seqtl", "rseqntl" };
10843
10844 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10845
10846 db_indent += 2;
10847
10848 vm_map_links_print(&entry->links);
10849
10850 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10851 (unsigned long long)entry->vme_start,
10852 (unsigned long long)entry->vme_end,
10853 entry->protection,
10854 entry->max_protection,
10855 inheritance_name[(entry->inheritance & 0x3)]);
10856
10857 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10858 behavior_name[(entry->behavior & 0x3)],
10859 entry->wired_count,
10860 entry->user_wired_count);
10861 iprintf("%sin_transition, %sneeds_wakeup\n",
10862 (entry->in_transition ? "" : "!"),
10863 (entry->needs_wakeup ? "" : "!"));
10864
10865 if (entry->is_sub_map) {
10866 iprintf("submap = %08X - offset = %016llX\n",
10867 entry->object.sub_map,
10868 (unsigned long long)entry->offset);
10869 } else {
10870 iprintf("object = %08X offset = %016llX - ",
10871 entry->object.vm_object,
10872 (unsigned long long)entry->offset);
10873 printf("%sis_shared, %sneeds_copy\n",
10874 (entry->is_shared ? "" : "!"),
10875 (entry->needs_copy ? "" : "!"));
10876 }
10877
10878 db_indent -= 2;
10879 }
10880
10881 /*
10882 * vm_follow_map: [ debug ]
10883 */
10884 void
10885 vm_follow_map(
10886 vm_map_t map)
10887 {
10888 register vm_map_entry_t entry;
10889
10890 iprintf("task map %08X\n", map);
10891
10892 db_indent += 2;
10893
10894 for (entry = vm_map_first_entry(map);
10895 entry && entry != vm_map_to_entry(map);
10896 entry = entry->vme_next) {
10897 vm_follow_entry(entry);
10898 }
10899
10900 db_indent -= 2;
10901 }
10902
10903 /*
10904 * vm_map_print: [ debug ]
10905 */
10906 void
10907 vm_map_print(
10908 db_addr_t inmap)
10909 {
10910 register vm_map_entry_t entry;
10911 vm_map_t map;
10912 #if TASK_SWAPPER
10913 char *swstate;
10914 #endif /* TASK_SWAPPER */
10915
10916 map = (vm_map_t)(long)
10917 inmap; /* Make sure we have the right type */
10918
10919 iprintf("task map %08X\n", map);
10920
10921 db_indent += 2;
10922
10923 vm_map_header_print(&map->hdr);
10924
10925 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10926 map->pmap,
10927 map->size,
10928 map->ref_count,
10929 map->hint,
10930 map->first_free);
10931
10932 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10933 (map->wait_for_space ? "" : "!"),
10934 (map->wiring_required ? "" : "!"),
10935 map->timestamp);
10936
10937 #if TASK_SWAPPER
10938 switch (map->sw_state) {
10939 case MAP_SW_IN:
10940 swstate = "SW_IN";
10941 break;
10942 case MAP_SW_OUT:
10943 swstate = "SW_OUT";
10944 break;
10945 default:
10946 swstate = "????";
10947 break;
10948 }
10949 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10950 #endif /* TASK_SWAPPER */
10951
10952 for (entry = vm_map_first_entry(map);
10953 entry && entry != vm_map_to_entry(map);
10954 entry = entry->vme_next) {
10955 vm_map_entry_print(entry);
10956 }
10957
10958 db_indent -= 2;
10959 }
10960
10961 /*
10962 * Routine: vm_map_copy_print
10963 * Purpose:
10964 * Pretty-print a copy object for ddb.
10965 */
10966
10967 void
10968 vm_map_copy_print(
10969 db_addr_t incopy)
10970 {
10971 vm_map_copy_t copy;
10972 vm_map_entry_t entry;
10973
10974 copy = (vm_map_copy_t)(long)
10975 incopy; /* Make sure we have the right type */
10976
10977 printf("copy object 0x%x\n", copy);
10978
10979 db_indent += 2;
10980
10981 iprintf("type=%d", copy->type);
10982 switch (copy->type) {
10983 case VM_MAP_COPY_ENTRY_LIST:
10984 printf("[entry_list]");
10985 break;
10986
10987 case VM_MAP_COPY_OBJECT:
10988 printf("[object]");
10989 break;
10990
10991 case VM_MAP_COPY_KERNEL_BUFFER:
10992 printf("[kernel_buffer]");
10993 break;
10994
10995 default:
10996 printf("[bad type]");
10997 break;
10998 }
10999 printf(", offset=0x%llx", (unsigned long long)copy->offset);
11000 printf(", size=0x%x\n", copy->size);
11001
11002 switch (copy->type) {
11003 case VM_MAP_COPY_ENTRY_LIST:
11004 vm_map_header_print(&copy->cpy_hdr);
11005 for (entry = vm_map_copy_first_entry(copy);
11006 entry && entry != vm_map_copy_to_entry(copy);
11007 entry = entry->vme_next) {
11008 vm_map_entry_print(entry);
11009 }
11010 break;
11011
11012 case VM_MAP_COPY_OBJECT:
11013 iprintf("object=0x%x\n", copy->cpy_object);
11014 break;
11015
11016 case VM_MAP_COPY_KERNEL_BUFFER:
11017 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11018 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11019 break;
11020
11021 }
11022
11023 db_indent -=2;
11024 }
11025
11026 /*
11027 * db_vm_map_total_size(map) [ debug ]
11028 *
11029 * return the total virtual size (in bytes) of the map
11030 */
11031 vm_map_size_t
11032 db_vm_map_total_size(
11033 db_addr_t inmap)
11034 {
11035 vm_map_entry_t entry;
11036 vm_map_size_t total;
11037 vm_map_t map;
11038
11039 map = (vm_map_t)(long)
11040 inmap; /* Make sure we have the right type */
11041
11042 total = 0;
11043 for (entry = vm_map_first_entry(map);
11044 entry != vm_map_to_entry(map);
11045 entry = entry->vme_next) {
11046 total += entry->vme_end - entry->vme_start;
11047 }
11048
11049 return total;
11050 }
11051
11052 #endif /* MACH_KDB */
11053
11054 /*
11055 * Routine: vm_map_entry_insert
11056 *
11057 * Descritpion: This routine inserts a new vm_entry in a locked map.
11058 */
11059 vm_map_entry_t
11060 vm_map_entry_insert(
11061 vm_map_t map,
11062 vm_map_entry_t insp_entry,
11063 vm_map_offset_t start,
11064 vm_map_offset_t end,
11065 vm_object_t object,
11066 vm_object_offset_t offset,
11067 boolean_t needs_copy,
11068 boolean_t is_shared,
11069 boolean_t in_transition,
11070 vm_prot_t cur_protection,
11071 vm_prot_t max_protection,
11072 vm_behavior_t behavior,
11073 vm_inherit_t inheritance,
11074 unsigned wired_count,
11075 boolean_t no_cache,
11076 boolean_t permanent,
11077 unsigned int superpage_size)
11078 {
11079 vm_map_entry_t new_entry;
11080
11081 assert(insp_entry != (vm_map_entry_t)0);
11082
11083 new_entry = vm_map_entry_create(map);
11084
11085 new_entry->vme_start = start;
11086 new_entry->vme_end = end;
11087 assert(page_aligned(new_entry->vme_start));
11088 assert(page_aligned(new_entry->vme_end));
11089
11090 new_entry->object.vm_object = object;
11091 new_entry->offset = offset;
11092 new_entry->is_shared = is_shared;
11093 new_entry->is_sub_map = FALSE;
11094 new_entry->needs_copy = needs_copy;
11095 new_entry->in_transition = in_transition;
11096 new_entry->needs_wakeup = FALSE;
11097 new_entry->inheritance = inheritance;
11098 new_entry->protection = cur_protection;
11099 new_entry->max_protection = max_protection;
11100 new_entry->behavior = behavior;
11101 new_entry->wired_count = wired_count;
11102 new_entry->user_wired_count = 0;
11103 new_entry->use_pmap = FALSE;
11104 new_entry->alias = 0;
11105 new_entry->zero_wired_pages = FALSE;
11106 new_entry->no_cache = no_cache;
11107 new_entry->permanent = permanent;
11108 new_entry->superpage_size = superpage_size;
11109 new_entry->used_for_jit = FALSE;
11110
11111 /*
11112 * Insert the new entry into the list.
11113 */
11114
11115 vm_map_store_entry_link(map, insp_entry, new_entry);
11116 map->size += end - start;
11117
11118 /*
11119 * Update the free space hint and the lookup hint.
11120 */
11121
11122 SAVE_HINT_MAP_WRITE(map, new_entry);
11123 return new_entry;
11124 }
11125
11126 /*
11127 * Routine: vm_map_remap_extract
11128 *
11129 * Descritpion: This routine returns a vm_entry list from a map.
11130 */
11131 static kern_return_t
11132 vm_map_remap_extract(
11133 vm_map_t map,
11134 vm_map_offset_t addr,
11135 vm_map_size_t size,
11136 boolean_t copy,
11137 struct vm_map_header *map_header,
11138 vm_prot_t *cur_protection,
11139 vm_prot_t *max_protection,
11140 /* What, no behavior? */
11141 vm_inherit_t inheritance,
11142 boolean_t pageable)
11143 {
11144 kern_return_t result;
11145 vm_map_size_t mapped_size;
11146 vm_map_size_t tmp_size;
11147 vm_map_entry_t src_entry; /* result of last map lookup */
11148 vm_map_entry_t new_entry;
11149 vm_object_offset_t offset;
11150 vm_map_offset_t map_address;
11151 vm_map_offset_t src_start; /* start of entry to map */
11152 vm_map_offset_t src_end; /* end of region to be mapped */
11153 vm_object_t object;
11154 vm_map_version_t version;
11155 boolean_t src_needs_copy;
11156 boolean_t new_entry_needs_copy;
11157
11158 assert(map != VM_MAP_NULL);
11159 assert(size != 0 && size == vm_map_round_page(size));
11160 assert(inheritance == VM_INHERIT_NONE ||
11161 inheritance == VM_INHERIT_COPY ||
11162 inheritance == VM_INHERIT_SHARE);
11163
11164 /*
11165 * Compute start and end of region.
11166 */
11167 src_start = vm_map_trunc_page(addr);
11168 src_end = vm_map_round_page(src_start + size);
11169
11170 /*
11171 * Initialize map_header.
11172 */
11173 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11174 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11175 map_header->nentries = 0;
11176 map_header->entries_pageable = pageable;
11177
11178 vm_map_store_init( map_header );
11179
11180 *cur_protection = VM_PROT_ALL;
11181 *max_protection = VM_PROT_ALL;
11182
11183 map_address = 0;
11184 mapped_size = 0;
11185 result = KERN_SUCCESS;
11186
11187 /*
11188 * The specified source virtual space might correspond to
11189 * multiple map entries, need to loop on them.
11190 */
11191 vm_map_lock(map);
11192 while (mapped_size != size) {
11193 vm_map_size_t entry_size;
11194
11195 /*
11196 * Find the beginning of the region.
11197 */
11198 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11199 result = KERN_INVALID_ADDRESS;
11200 break;
11201 }
11202
11203 if (src_start < src_entry->vme_start ||
11204 (mapped_size && src_start != src_entry->vme_start)) {
11205 result = KERN_INVALID_ADDRESS;
11206 break;
11207 }
11208
11209 tmp_size = size - mapped_size;
11210 if (src_end > src_entry->vme_end)
11211 tmp_size -= (src_end - src_entry->vme_end);
11212
11213 entry_size = (vm_map_size_t)(src_entry->vme_end -
11214 src_entry->vme_start);
11215
11216 if(src_entry->is_sub_map) {
11217 vm_map_reference(src_entry->object.sub_map);
11218 object = VM_OBJECT_NULL;
11219 } else {
11220 object = src_entry->object.vm_object;
11221
11222 if (object == VM_OBJECT_NULL) {
11223 object = vm_object_allocate(entry_size);
11224 src_entry->offset = 0;
11225 src_entry->object.vm_object = object;
11226 } else if (object->copy_strategy !=
11227 MEMORY_OBJECT_COPY_SYMMETRIC) {
11228 /*
11229 * We are already using an asymmetric
11230 * copy, and therefore we already have
11231 * the right object.
11232 */
11233 assert(!src_entry->needs_copy);
11234 } else if (src_entry->needs_copy || object->shadowed ||
11235 (object->internal && !object->true_share &&
11236 !src_entry->is_shared &&
11237 object->vo_size > entry_size)) {
11238
11239 vm_object_shadow(&src_entry->object.vm_object,
11240 &src_entry->offset,
11241 entry_size);
11242
11243 if (!src_entry->needs_copy &&
11244 (src_entry->protection & VM_PROT_WRITE)) {
11245 vm_prot_t prot;
11246
11247 prot = src_entry->protection & ~VM_PROT_WRITE;
11248
11249 if (override_nx(map, src_entry->alias) && prot)
11250 prot |= VM_PROT_EXECUTE;
11251
11252 if(map->mapped) {
11253 vm_object_pmap_protect(
11254 src_entry->object.vm_object,
11255 src_entry->offset,
11256 entry_size,
11257 PMAP_NULL,
11258 src_entry->vme_start,
11259 prot);
11260 } else {
11261 pmap_protect(vm_map_pmap(map),
11262 src_entry->vme_start,
11263 src_entry->vme_end,
11264 prot);
11265 }
11266 }
11267
11268 object = src_entry->object.vm_object;
11269 src_entry->needs_copy = FALSE;
11270 }
11271
11272
11273 vm_object_lock(object);
11274 vm_object_reference_locked(object); /* object ref. for new entry */
11275 if (object->copy_strategy ==
11276 MEMORY_OBJECT_COPY_SYMMETRIC) {
11277 object->copy_strategy =
11278 MEMORY_OBJECT_COPY_DELAY;
11279 }
11280 vm_object_unlock(object);
11281 }
11282
11283 offset = src_entry->offset + (src_start - src_entry->vme_start);
11284
11285 new_entry = _vm_map_entry_create(map_header);
11286 vm_map_entry_copy(new_entry, src_entry);
11287 new_entry->use_pmap = FALSE; /* clr address space specifics */
11288
11289 new_entry->vme_start = map_address;
11290 new_entry->vme_end = map_address + tmp_size;
11291 new_entry->inheritance = inheritance;
11292 new_entry->offset = offset;
11293
11294 /*
11295 * The new region has to be copied now if required.
11296 */
11297 RestartCopy:
11298 if (!copy) {
11299 src_entry->is_shared = TRUE;
11300 new_entry->is_shared = TRUE;
11301 if (!(new_entry->is_sub_map))
11302 new_entry->needs_copy = FALSE;
11303
11304 } else if (src_entry->is_sub_map) {
11305 /* make this a COW sub_map if not already */
11306 new_entry->needs_copy = TRUE;
11307 object = VM_OBJECT_NULL;
11308 } else if (src_entry->wired_count == 0 &&
11309 vm_object_copy_quickly(&new_entry->object.vm_object,
11310 new_entry->offset,
11311 (new_entry->vme_end -
11312 new_entry->vme_start),
11313 &src_needs_copy,
11314 &new_entry_needs_copy)) {
11315
11316 new_entry->needs_copy = new_entry_needs_copy;
11317 new_entry->is_shared = FALSE;
11318
11319 /*
11320 * Handle copy_on_write semantics.
11321 */
11322 if (src_needs_copy && !src_entry->needs_copy) {
11323 vm_prot_t prot;
11324
11325 prot = src_entry->protection & ~VM_PROT_WRITE;
11326
11327 if (override_nx(map, src_entry->alias) && prot)
11328 prot |= VM_PROT_EXECUTE;
11329
11330 vm_object_pmap_protect(object,
11331 offset,
11332 entry_size,
11333 ((src_entry->is_shared
11334 || map->mapped) ?
11335 PMAP_NULL : map->pmap),
11336 src_entry->vme_start,
11337 prot);
11338
11339 src_entry->needs_copy = TRUE;
11340 }
11341 /*
11342 * Throw away the old object reference of the new entry.
11343 */
11344 vm_object_deallocate(object);
11345
11346 } else {
11347 new_entry->is_shared = FALSE;
11348
11349 /*
11350 * The map can be safely unlocked since we
11351 * already hold a reference on the object.
11352 *
11353 * Record the timestamp of the map for later
11354 * verification, and unlock the map.
11355 */
11356 version.main_timestamp = map->timestamp;
11357 vm_map_unlock(map); /* Increments timestamp once! */
11358
11359 /*
11360 * Perform the copy.
11361 */
11362 if (src_entry->wired_count > 0) {
11363 vm_object_lock(object);
11364 result = vm_object_copy_slowly(
11365 object,
11366 offset,
11367 entry_size,
11368 THREAD_UNINT,
11369 &new_entry->object.vm_object);
11370
11371 new_entry->offset = 0;
11372 new_entry->needs_copy = FALSE;
11373 } else {
11374 result = vm_object_copy_strategically(
11375 object,
11376 offset,
11377 entry_size,
11378 &new_entry->object.vm_object,
11379 &new_entry->offset,
11380 &new_entry_needs_copy);
11381
11382 new_entry->needs_copy = new_entry_needs_copy;
11383 }
11384
11385 /*
11386 * Throw away the old object reference of the new entry.
11387 */
11388 vm_object_deallocate(object);
11389
11390 if (result != KERN_SUCCESS &&
11391 result != KERN_MEMORY_RESTART_COPY) {
11392 _vm_map_entry_dispose(map_header, new_entry);
11393 break;
11394 }
11395
11396 /*
11397 * Verify that the map has not substantially
11398 * changed while the copy was being made.
11399 */
11400
11401 vm_map_lock(map);
11402 if (version.main_timestamp + 1 != map->timestamp) {
11403 /*
11404 * Simple version comparison failed.
11405 *
11406 * Retry the lookup and verify that the
11407 * same object/offset are still present.
11408 */
11409 vm_object_deallocate(new_entry->
11410 object.vm_object);
11411 _vm_map_entry_dispose(map_header, new_entry);
11412 if (result == KERN_MEMORY_RESTART_COPY)
11413 result = KERN_SUCCESS;
11414 continue;
11415 }
11416
11417 if (result == KERN_MEMORY_RESTART_COPY) {
11418 vm_object_reference(object);
11419 goto RestartCopy;
11420 }
11421 }
11422
11423 _vm_map_store_entry_link(map_header,
11424 map_header->links.prev, new_entry);
11425
11426 /*Protections for submap mapping are irrelevant here*/
11427 if( !src_entry->is_sub_map ) {
11428 *cur_protection &= src_entry->protection;
11429 *max_protection &= src_entry->max_protection;
11430 }
11431 map_address += tmp_size;
11432 mapped_size += tmp_size;
11433 src_start += tmp_size;
11434
11435 } /* end while */
11436
11437 vm_map_unlock(map);
11438 if (result != KERN_SUCCESS) {
11439 /*
11440 * Free all allocated elements.
11441 */
11442 for (src_entry = map_header->links.next;
11443 src_entry != (struct vm_map_entry *)&map_header->links;
11444 src_entry = new_entry) {
11445 new_entry = src_entry->vme_next;
11446 _vm_map_store_entry_unlink(map_header, src_entry);
11447 vm_object_deallocate(src_entry->object.vm_object);
11448 _vm_map_entry_dispose(map_header, src_entry);
11449 }
11450 }
11451 return result;
11452 }
11453
11454 /*
11455 * Routine: vm_remap
11456 *
11457 * Map portion of a task's address space.
11458 * Mapped region must not overlap more than
11459 * one vm memory object. Protections and
11460 * inheritance attributes remain the same
11461 * as in the original task and are out parameters.
11462 * Source and Target task can be identical
11463 * Other attributes are identical as for vm_map()
11464 */
11465 kern_return_t
11466 vm_map_remap(
11467 vm_map_t target_map,
11468 vm_map_address_t *address,
11469 vm_map_size_t size,
11470 vm_map_offset_t mask,
11471 int flags,
11472 vm_map_t src_map,
11473 vm_map_offset_t memory_address,
11474 boolean_t copy,
11475 vm_prot_t *cur_protection,
11476 vm_prot_t *max_protection,
11477 vm_inherit_t inheritance)
11478 {
11479 kern_return_t result;
11480 vm_map_entry_t entry;
11481 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11482 vm_map_entry_t new_entry;
11483 struct vm_map_header map_header;
11484
11485 if (target_map == VM_MAP_NULL)
11486 return KERN_INVALID_ARGUMENT;
11487
11488 switch (inheritance) {
11489 case VM_INHERIT_NONE:
11490 case VM_INHERIT_COPY:
11491 case VM_INHERIT_SHARE:
11492 if (size != 0 && src_map != VM_MAP_NULL)
11493 break;
11494 /*FALL THRU*/
11495 default:
11496 return KERN_INVALID_ARGUMENT;
11497 }
11498
11499 size = vm_map_round_page(size);
11500
11501 result = vm_map_remap_extract(src_map, memory_address,
11502 size, copy, &map_header,
11503 cur_protection,
11504 max_protection,
11505 inheritance,
11506 target_map->hdr.
11507 entries_pageable);
11508
11509 if (result != KERN_SUCCESS) {
11510 return result;
11511 }
11512
11513 /*
11514 * Allocate/check a range of free virtual address
11515 * space for the target
11516 */
11517 *address = vm_map_trunc_page(*address);
11518 vm_map_lock(target_map);
11519 result = vm_map_remap_range_allocate(target_map, address, size,
11520 mask, flags, &insp_entry);
11521
11522 for (entry = map_header.links.next;
11523 entry != (struct vm_map_entry *)&map_header.links;
11524 entry = new_entry) {
11525 new_entry = entry->vme_next;
11526 _vm_map_store_entry_unlink(&map_header, entry);
11527 if (result == KERN_SUCCESS) {
11528 entry->vme_start += *address;
11529 entry->vme_end += *address;
11530 vm_map_store_entry_link(target_map, insp_entry, entry);
11531 insp_entry = entry;
11532 } else {
11533 if (!entry->is_sub_map) {
11534 vm_object_deallocate(entry->object.vm_object);
11535 } else {
11536 vm_map_deallocate(entry->object.sub_map);
11537 }
11538 _vm_map_entry_dispose(&map_header, entry);
11539 }
11540 }
11541
11542 if( target_map->disable_vmentry_reuse == TRUE) {
11543 if( target_map->highest_entry_end < insp_entry->vme_end ){
11544 target_map->highest_entry_end = insp_entry->vme_end;
11545 }
11546 }
11547
11548 if (result == KERN_SUCCESS) {
11549 target_map->size += size;
11550 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11551 }
11552 vm_map_unlock(target_map);
11553
11554 if (result == KERN_SUCCESS && target_map->wiring_required)
11555 result = vm_map_wire(target_map, *address,
11556 *address + size, *cur_protection, TRUE);
11557 return result;
11558 }
11559
11560 /*
11561 * Routine: vm_map_remap_range_allocate
11562 *
11563 * Description:
11564 * Allocate a range in the specified virtual address map.
11565 * returns the address and the map entry just before the allocated
11566 * range
11567 *
11568 * Map must be locked.
11569 */
11570
11571 static kern_return_t
11572 vm_map_remap_range_allocate(
11573 vm_map_t map,
11574 vm_map_address_t *address, /* IN/OUT */
11575 vm_map_size_t size,
11576 vm_map_offset_t mask,
11577 int flags,
11578 vm_map_entry_t *map_entry) /* OUT */
11579 {
11580 vm_map_entry_t entry;
11581 vm_map_offset_t start;
11582 vm_map_offset_t end;
11583 kern_return_t kr;
11584
11585 StartAgain: ;
11586
11587 start = *address;
11588
11589 if (flags & VM_FLAGS_ANYWHERE)
11590 {
11591 /*
11592 * Calculate the first possible address.
11593 */
11594
11595 if (start < map->min_offset)
11596 start = map->min_offset;
11597 if (start > map->max_offset)
11598 return(KERN_NO_SPACE);
11599
11600 /*
11601 * Look for the first possible address;
11602 * if there's already something at this
11603 * address, we have to start after it.
11604 */
11605
11606 if( map->disable_vmentry_reuse == TRUE) {
11607 VM_MAP_HIGHEST_ENTRY(map, entry, start);
11608 } else {
11609 assert(first_free_is_valid(map));
11610 if (start == map->min_offset) {
11611 if ((entry = map->first_free) != vm_map_to_entry(map))
11612 start = entry->vme_end;
11613 } else {
11614 vm_map_entry_t tmp_entry;
11615 if (vm_map_lookup_entry(map, start, &tmp_entry))
11616 start = tmp_entry->vme_end;
11617 entry = tmp_entry;
11618 }
11619 }
11620
11621 /*
11622 * In any case, the "entry" always precedes
11623 * the proposed new region throughout the
11624 * loop:
11625 */
11626
11627 while (TRUE) {
11628 register vm_map_entry_t next;
11629
11630 /*
11631 * Find the end of the proposed new region.
11632 * Be sure we didn't go beyond the end, or
11633 * wrap around the address.
11634 */
11635
11636 end = ((start + mask) & ~mask);
11637 if (end < start)
11638 return(KERN_NO_SPACE);
11639 start = end;
11640 end += size;
11641
11642 if ((end > map->max_offset) || (end < start)) {
11643 if (map->wait_for_space) {
11644 if (size <= (map->max_offset -
11645 map->min_offset)) {
11646 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11647 vm_map_unlock(map);
11648 thread_block(THREAD_CONTINUE_NULL);
11649 vm_map_lock(map);
11650 goto StartAgain;
11651 }
11652 }
11653
11654 return(KERN_NO_SPACE);
11655 }
11656
11657 /*
11658 * If there are no more entries, we must win.
11659 */
11660
11661 next = entry->vme_next;
11662 if (next == vm_map_to_entry(map))
11663 break;
11664
11665 /*
11666 * If there is another entry, it must be
11667 * after the end of the potential new region.
11668 */
11669
11670 if (next->vme_start >= end)
11671 break;
11672
11673 /*
11674 * Didn't fit -- move to the next entry.
11675 */
11676
11677 entry = next;
11678 start = entry->vme_end;
11679 }
11680 *address = start;
11681 } else {
11682 vm_map_entry_t temp_entry;
11683
11684 /*
11685 * Verify that:
11686 * the address doesn't itself violate
11687 * the mask requirement.
11688 */
11689
11690 if ((start & mask) != 0)
11691 return(KERN_NO_SPACE);
11692
11693
11694 /*
11695 * ... the address is within bounds
11696 */
11697
11698 end = start + size;
11699
11700 if ((start < map->min_offset) ||
11701 (end > map->max_offset) ||
11702 (start >= end)) {
11703 return(KERN_INVALID_ADDRESS);
11704 }
11705
11706 /*
11707 * If we're asked to overwrite whatever was mapped in that
11708 * range, first deallocate that range.
11709 */
11710 if (flags & VM_FLAGS_OVERWRITE) {
11711 vm_map_t zap_map;
11712
11713 /*
11714 * We use a "zap_map" to avoid having to unlock
11715 * the "map" in vm_map_delete(), which would compromise
11716 * the atomicity of the "deallocate" and then "remap"
11717 * combination.
11718 */
11719 zap_map = vm_map_create(PMAP_NULL,
11720 start,
11721 end - start,
11722 map->hdr.entries_pageable);
11723 if (zap_map == VM_MAP_NULL) {
11724 return KERN_RESOURCE_SHORTAGE;
11725 }
11726
11727 kr = vm_map_delete(map, start, end,
11728 VM_MAP_REMOVE_SAVE_ENTRIES,
11729 zap_map);
11730 if (kr == KERN_SUCCESS) {
11731 vm_map_destroy(zap_map,
11732 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11733 zap_map = VM_MAP_NULL;
11734 }
11735 }
11736
11737 /*
11738 * ... the starting address isn't allocated
11739 */
11740
11741 if (vm_map_lookup_entry(map, start, &temp_entry))
11742 return(KERN_NO_SPACE);
11743
11744 entry = temp_entry;
11745
11746 /*
11747 * ... the next region doesn't overlap the
11748 * end point.
11749 */
11750
11751 if ((entry->vme_next != vm_map_to_entry(map)) &&
11752 (entry->vme_next->vme_start < end))
11753 return(KERN_NO_SPACE);
11754 }
11755 *map_entry = entry;
11756 return(KERN_SUCCESS);
11757 }
11758
11759 /*
11760 * vm_map_switch:
11761 *
11762 * Set the address map for the current thread to the specified map
11763 */
11764
11765 vm_map_t
11766 vm_map_switch(
11767 vm_map_t map)
11768 {
11769 int mycpu;
11770 thread_t thread = current_thread();
11771 vm_map_t oldmap = thread->map;
11772
11773 mp_disable_preemption();
11774 mycpu = cpu_number();
11775
11776 /*
11777 * Deactivate the current map and activate the requested map
11778 */
11779 PMAP_SWITCH_USER(thread, map, mycpu);
11780
11781 mp_enable_preemption();
11782 return(oldmap);
11783 }
11784
11785
11786 /*
11787 * Routine: vm_map_write_user
11788 *
11789 * Description:
11790 * Copy out data from a kernel space into space in the
11791 * destination map. The space must already exist in the
11792 * destination map.
11793 * NOTE: This routine should only be called by threads
11794 * which can block on a page fault. i.e. kernel mode user
11795 * threads.
11796 *
11797 */
11798 kern_return_t
11799 vm_map_write_user(
11800 vm_map_t map,
11801 void *src_p,
11802 vm_map_address_t dst_addr,
11803 vm_size_t size)
11804 {
11805 kern_return_t kr = KERN_SUCCESS;
11806
11807 if(current_map() == map) {
11808 if (copyout(src_p, dst_addr, size)) {
11809 kr = KERN_INVALID_ADDRESS;
11810 }
11811 } else {
11812 vm_map_t oldmap;
11813
11814 /* take on the identity of the target map while doing */
11815 /* the transfer */
11816
11817 vm_map_reference(map);
11818 oldmap = vm_map_switch(map);
11819 if (copyout(src_p, dst_addr, size)) {
11820 kr = KERN_INVALID_ADDRESS;
11821 }
11822 vm_map_switch(oldmap);
11823 vm_map_deallocate(map);
11824 }
11825 return kr;
11826 }
11827
11828 /*
11829 * Routine: vm_map_read_user
11830 *
11831 * Description:
11832 * Copy in data from a user space source map into the
11833 * kernel map. The space must already exist in the
11834 * kernel map.
11835 * NOTE: This routine should only be called by threads
11836 * which can block on a page fault. i.e. kernel mode user
11837 * threads.
11838 *
11839 */
11840 kern_return_t
11841 vm_map_read_user(
11842 vm_map_t map,
11843 vm_map_address_t src_addr,
11844 void *dst_p,
11845 vm_size_t size)
11846 {
11847 kern_return_t kr = KERN_SUCCESS;
11848
11849 if(current_map() == map) {
11850 if (copyin(src_addr, dst_p, size)) {
11851 kr = KERN_INVALID_ADDRESS;
11852 }
11853 } else {
11854 vm_map_t oldmap;
11855
11856 /* take on the identity of the target map while doing */
11857 /* the transfer */
11858
11859 vm_map_reference(map);
11860 oldmap = vm_map_switch(map);
11861 if (copyin(src_addr, dst_p, size)) {
11862 kr = KERN_INVALID_ADDRESS;
11863 }
11864 vm_map_switch(oldmap);
11865 vm_map_deallocate(map);
11866 }
11867 return kr;
11868 }
11869
11870
11871 /*
11872 * vm_map_check_protection:
11873 *
11874 * Assert that the target map allows the specified
11875 * privilege on the entire address region given.
11876 * The entire region must be allocated.
11877 */
11878 boolean_t
11879 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11880 vm_map_offset_t end, vm_prot_t protection)
11881 {
11882 vm_map_entry_t entry;
11883 vm_map_entry_t tmp_entry;
11884
11885 vm_map_lock(map);
11886
11887 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11888 {
11889 vm_map_unlock(map);
11890 return (FALSE);
11891 }
11892
11893 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11894 vm_map_unlock(map);
11895 return(FALSE);
11896 }
11897
11898 entry = tmp_entry;
11899
11900 while (start < end) {
11901 if (entry == vm_map_to_entry(map)) {
11902 vm_map_unlock(map);
11903 return(FALSE);
11904 }
11905
11906 /*
11907 * No holes allowed!
11908 */
11909
11910 if (start < entry->vme_start) {
11911 vm_map_unlock(map);
11912 return(FALSE);
11913 }
11914
11915 /*
11916 * Check protection associated with entry.
11917 */
11918
11919 if ((entry->protection & protection) != protection) {
11920 vm_map_unlock(map);
11921 return(FALSE);
11922 }
11923
11924 /* go to next entry */
11925
11926 start = entry->vme_end;
11927 entry = entry->vme_next;
11928 }
11929 vm_map_unlock(map);
11930 return(TRUE);
11931 }
11932
11933 kern_return_t
11934 vm_map_purgable_control(
11935 vm_map_t map,
11936 vm_map_offset_t address,
11937 vm_purgable_t control,
11938 int *state)
11939 {
11940 vm_map_entry_t entry;
11941 vm_object_t object;
11942 kern_return_t kr;
11943
11944 /*
11945 * Vet all the input parameters and current type and state of the
11946 * underlaying object. Return with an error if anything is amiss.
11947 */
11948 if (map == VM_MAP_NULL)
11949 return(KERN_INVALID_ARGUMENT);
11950
11951 if (control != VM_PURGABLE_SET_STATE &&
11952 control != VM_PURGABLE_GET_STATE &&
11953 control != VM_PURGABLE_PURGE_ALL)
11954 return(KERN_INVALID_ARGUMENT);
11955
11956 if (control == VM_PURGABLE_PURGE_ALL) {
11957 vm_purgeable_object_purge_all();
11958 return KERN_SUCCESS;
11959 }
11960
11961 if (control == VM_PURGABLE_SET_STATE &&
11962 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11963 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11964 return(KERN_INVALID_ARGUMENT);
11965
11966 vm_map_lock_read(map);
11967
11968 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11969
11970 /*
11971 * Must pass a valid non-submap address.
11972 */
11973 vm_map_unlock_read(map);
11974 return(KERN_INVALID_ADDRESS);
11975 }
11976
11977 if ((entry->protection & VM_PROT_WRITE) == 0) {
11978 /*
11979 * Can't apply purgable controls to something you can't write.
11980 */
11981 vm_map_unlock_read(map);
11982 return(KERN_PROTECTION_FAILURE);
11983 }
11984
11985 object = entry->object.vm_object;
11986 if (object == VM_OBJECT_NULL) {
11987 /*
11988 * Object must already be present or it can't be purgable.
11989 */
11990 vm_map_unlock_read(map);
11991 return KERN_INVALID_ARGUMENT;
11992 }
11993
11994 vm_object_lock(object);
11995
11996 if (entry->offset != 0 ||
11997 entry->vme_end - entry->vme_start != object->vo_size) {
11998 /*
11999 * Can only apply purgable controls to the whole (existing)
12000 * object at once.
12001 */
12002 vm_map_unlock_read(map);
12003 vm_object_unlock(object);
12004 return KERN_INVALID_ARGUMENT;
12005 }
12006
12007 vm_map_unlock_read(map);
12008
12009 kr = vm_object_purgable_control(object, control, state);
12010
12011 vm_object_unlock(object);
12012
12013 return kr;
12014 }
12015
12016 kern_return_t
12017 vm_map_page_query_internal(
12018 vm_map_t target_map,
12019 vm_map_offset_t offset,
12020 int *disposition,
12021 int *ref_count)
12022 {
12023 kern_return_t kr;
12024 vm_page_info_basic_data_t info;
12025 mach_msg_type_number_t count;
12026
12027 count = VM_PAGE_INFO_BASIC_COUNT;
12028 kr = vm_map_page_info(target_map,
12029 offset,
12030 VM_PAGE_INFO_BASIC,
12031 (vm_page_info_t) &info,
12032 &count);
12033 if (kr == KERN_SUCCESS) {
12034 *disposition = info.disposition;
12035 *ref_count = info.ref_count;
12036 } else {
12037 *disposition = 0;
12038 *ref_count = 0;
12039 }
12040
12041 return kr;
12042 }
12043
12044 kern_return_t
12045 vm_map_page_info(
12046 vm_map_t map,
12047 vm_map_offset_t offset,
12048 vm_page_info_flavor_t flavor,
12049 vm_page_info_t info,
12050 mach_msg_type_number_t *count)
12051 {
12052 vm_map_entry_t map_entry;
12053 vm_object_t object;
12054 vm_page_t m;
12055 kern_return_t kr;
12056 kern_return_t retval = KERN_SUCCESS;
12057 boolean_t top_object;
12058 int disposition;
12059 int ref_count;
12060 vm_object_id_t object_id;
12061 vm_page_info_basic_t basic_info;
12062 int depth;
12063 vm_map_offset_t offset_in_page;
12064
12065 switch (flavor) {
12066 case VM_PAGE_INFO_BASIC:
12067 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12068 /*
12069 * The "vm_page_info_basic_data" structure was not
12070 * properly padded, so allow the size to be off by
12071 * one to maintain backwards binary compatibility...
12072 */
12073 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12074 return KERN_INVALID_ARGUMENT;
12075 }
12076 break;
12077 default:
12078 return KERN_INVALID_ARGUMENT;
12079 }
12080
12081 disposition = 0;
12082 ref_count = 0;
12083 object_id = 0;
12084 top_object = TRUE;
12085 depth = 0;
12086
12087 retval = KERN_SUCCESS;
12088 offset_in_page = offset & PAGE_MASK;
12089 offset = vm_map_trunc_page(offset);
12090
12091 vm_map_lock_read(map);
12092
12093 /*
12094 * First, find the map entry covering "offset", going down
12095 * submaps if necessary.
12096 */
12097 for (;;) {
12098 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12099 vm_map_unlock_read(map);
12100 return KERN_INVALID_ADDRESS;
12101 }
12102 /* compute offset from this map entry's start */
12103 offset -= map_entry->vme_start;
12104 /* compute offset into this map entry's object (or submap) */
12105 offset += map_entry->offset;
12106
12107 if (map_entry->is_sub_map) {
12108 vm_map_t sub_map;
12109
12110 sub_map = map_entry->object.sub_map;
12111 vm_map_lock_read(sub_map);
12112 vm_map_unlock_read(map);
12113
12114 map = sub_map;
12115
12116 ref_count = MAX(ref_count, map->ref_count);
12117 continue;
12118 }
12119 break;
12120 }
12121
12122 object = map_entry->object.vm_object;
12123 if (object == VM_OBJECT_NULL) {
12124 /* no object -> no page */
12125 vm_map_unlock_read(map);
12126 goto done;
12127 }
12128
12129 vm_object_lock(object);
12130 vm_map_unlock_read(map);
12131
12132 /*
12133 * Go down the VM object shadow chain until we find the page
12134 * we're looking for.
12135 */
12136 for (;;) {
12137 ref_count = MAX(ref_count, object->ref_count);
12138
12139 m = vm_page_lookup(object, offset);
12140
12141 if (m != VM_PAGE_NULL) {
12142 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12143 break;
12144 } else {
12145 #if MACH_PAGEMAP
12146 if (object->existence_map) {
12147 if (vm_external_state_get(object->existence_map,
12148 offset) ==
12149 VM_EXTERNAL_STATE_EXISTS) {
12150 /*
12151 * this page has been paged out
12152 */
12153 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12154 break;
12155 }
12156 } else
12157 #endif
12158 {
12159 if (object->internal &&
12160 object->alive &&
12161 !object->terminating &&
12162 object->pager_ready) {
12163
12164 memory_object_t pager;
12165
12166 vm_object_paging_begin(object);
12167 pager = object->pager;
12168 vm_object_unlock(object);
12169
12170 /*
12171 * Ask the default pager if
12172 * it has this page.
12173 */
12174 kr = memory_object_data_request(
12175 pager,
12176 offset + object->paging_offset,
12177 0, /* just poke the pager */
12178 VM_PROT_READ,
12179 NULL);
12180
12181 vm_object_lock(object);
12182 vm_object_paging_end(object);
12183
12184 if (kr == KERN_SUCCESS) {
12185 /* the default pager has it */
12186 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12187 break;
12188 }
12189 }
12190 }
12191
12192 if (object->shadow != VM_OBJECT_NULL) {
12193 vm_object_t shadow;
12194
12195 offset += object->vo_shadow_offset;
12196 shadow = object->shadow;
12197
12198 vm_object_lock(shadow);
12199 vm_object_unlock(object);
12200
12201 object = shadow;
12202 top_object = FALSE;
12203 depth++;
12204 } else {
12205 // if (!object->internal)
12206 // break;
12207 // retval = KERN_FAILURE;
12208 // goto done_with_object;
12209 break;
12210 }
12211 }
12212 }
12213 /* The ref_count is not strictly accurate, it measures the number */
12214 /* of entities holding a ref on the object, they may not be mapping */
12215 /* the object or may not be mapping the section holding the */
12216 /* target page but its still a ball park number and though an over- */
12217 /* count, it picks up the copy-on-write cases */
12218
12219 /* We could also get a picture of page sharing from pmap_attributes */
12220 /* but this would under count as only faulted-in mappings would */
12221 /* show up. */
12222
12223 if (top_object == TRUE && object->shadow)
12224 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12225
12226 if (! object->internal)
12227 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12228
12229 if (m == VM_PAGE_NULL)
12230 goto done_with_object;
12231
12232 if (m->fictitious) {
12233 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12234 goto done_with_object;
12235 }
12236 if (m->dirty || pmap_is_modified(m->phys_page))
12237 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12238
12239 if (m->reference || pmap_is_referenced(m->phys_page))
12240 disposition |= VM_PAGE_QUERY_PAGE_REF;
12241
12242 if (m->speculative)
12243 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12244
12245 if (m->cs_validated)
12246 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12247 if (m->cs_tainted)
12248 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12249
12250 done_with_object:
12251 vm_object_unlock(object);
12252 done:
12253
12254 switch (flavor) {
12255 case VM_PAGE_INFO_BASIC:
12256 basic_info = (vm_page_info_basic_t) info;
12257 basic_info->disposition = disposition;
12258 basic_info->ref_count = ref_count;
12259 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12260 basic_info->offset =
12261 (memory_object_offset_t) offset + offset_in_page;
12262 basic_info->depth = depth;
12263 break;
12264 }
12265
12266 return retval;
12267 }
12268
12269 /*
12270 * vm_map_msync
12271 *
12272 * Synchronises the memory range specified with its backing store
12273 * image by either flushing or cleaning the contents to the appropriate
12274 * memory manager engaging in a memory object synchronize dialog with
12275 * the manager. The client doesn't return until the manager issues
12276 * m_o_s_completed message. MIG Magically converts user task parameter
12277 * to the task's address map.
12278 *
12279 * interpretation of sync_flags
12280 * VM_SYNC_INVALIDATE - discard pages, only return precious
12281 * pages to manager.
12282 *
12283 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12284 * - discard pages, write dirty or precious
12285 * pages back to memory manager.
12286 *
12287 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12288 * - write dirty or precious pages back to
12289 * the memory manager.
12290 *
12291 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
12292 * is a hole in the region, and we would
12293 * have returned KERN_SUCCESS, return
12294 * KERN_INVALID_ADDRESS instead.
12295 *
12296 * NOTE
12297 * The memory object attributes have not yet been implemented, this
12298 * function will have to deal with the invalidate attribute
12299 *
12300 * RETURNS
12301 * KERN_INVALID_TASK Bad task parameter
12302 * KERN_INVALID_ARGUMENT both sync and async were specified.
12303 * KERN_SUCCESS The usual.
12304 * KERN_INVALID_ADDRESS There was a hole in the region.
12305 */
12306
12307 kern_return_t
12308 vm_map_msync(
12309 vm_map_t map,
12310 vm_map_address_t address,
12311 vm_map_size_t size,
12312 vm_sync_t sync_flags)
12313 {
12314 msync_req_t msr;
12315 msync_req_t new_msr;
12316 queue_chain_t req_q; /* queue of requests for this msync */
12317 vm_map_entry_t entry;
12318 vm_map_size_t amount_left;
12319 vm_object_offset_t offset;
12320 boolean_t do_sync_req;
12321 boolean_t had_hole = FALSE;
12322 memory_object_t pager;
12323
12324 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12325 (sync_flags & VM_SYNC_SYNCHRONOUS))
12326 return(KERN_INVALID_ARGUMENT);
12327
12328 /*
12329 * align address and size on page boundaries
12330 */
12331 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12332 address = vm_map_trunc_page(address);
12333
12334 if (map == VM_MAP_NULL)
12335 return(KERN_INVALID_TASK);
12336
12337 if (size == 0)
12338 return(KERN_SUCCESS);
12339
12340 queue_init(&req_q);
12341 amount_left = size;
12342
12343 while (amount_left > 0) {
12344 vm_object_size_t flush_size;
12345 vm_object_t object;
12346
12347 vm_map_lock(map);
12348 if (!vm_map_lookup_entry(map,
12349 vm_map_trunc_page(address), &entry)) {
12350
12351 vm_map_size_t skip;
12352
12353 /*
12354 * hole in the address map.
12355 */
12356 had_hole = TRUE;
12357
12358 /*
12359 * Check for empty map.
12360 */
12361 if (entry == vm_map_to_entry(map) &&
12362 entry->vme_next == entry) {
12363 vm_map_unlock(map);
12364 break;
12365 }
12366 /*
12367 * Check that we don't wrap and that
12368 * we have at least one real map entry.
12369 */
12370 if ((map->hdr.nentries == 0) ||
12371 (entry->vme_next->vme_start < address)) {
12372 vm_map_unlock(map);
12373 break;
12374 }
12375 /*
12376 * Move up to the next entry if needed
12377 */
12378 skip = (entry->vme_next->vme_start - address);
12379 if (skip >= amount_left)
12380 amount_left = 0;
12381 else
12382 amount_left -= skip;
12383 address = entry->vme_next->vme_start;
12384 vm_map_unlock(map);
12385 continue;
12386 }
12387
12388 offset = address - entry->vme_start;
12389
12390 /*
12391 * do we have more to flush than is contained in this
12392 * entry ?
12393 */
12394 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12395 flush_size = entry->vme_end -
12396 (entry->vme_start + offset);
12397 } else {
12398 flush_size = amount_left;
12399 }
12400 amount_left -= flush_size;
12401 address += flush_size;
12402
12403 if (entry->is_sub_map == TRUE) {
12404 vm_map_t local_map;
12405 vm_map_offset_t local_offset;
12406
12407 local_map = entry->object.sub_map;
12408 local_offset = entry->offset;
12409 vm_map_unlock(map);
12410 if (vm_map_msync(
12411 local_map,
12412 local_offset,
12413 flush_size,
12414 sync_flags) == KERN_INVALID_ADDRESS) {
12415 had_hole = TRUE;
12416 }
12417 continue;
12418 }
12419 object = entry->object.vm_object;
12420
12421 /*
12422 * We can't sync this object if the object has not been
12423 * created yet
12424 */
12425 if (object == VM_OBJECT_NULL) {
12426 vm_map_unlock(map);
12427 continue;
12428 }
12429 offset += entry->offset;
12430
12431 vm_object_lock(object);
12432
12433 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12434 int kill_pages = 0;
12435 boolean_t reusable_pages = FALSE;
12436
12437 if (sync_flags & VM_SYNC_KILLPAGES) {
12438 if (object->ref_count == 1 && !object->shadow)
12439 kill_pages = 1;
12440 else
12441 kill_pages = -1;
12442 }
12443 if (kill_pages != -1)
12444 vm_object_deactivate_pages(object, offset,
12445 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12446 vm_object_unlock(object);
12447 vm_map_unlock(map);
12448 continue;
12449 }
12450 /*
12451 * We can't sync this object if there isn't a pager.
12452 * Don't bother to sync internal objects, since there can't
12453 * be any "permanent" storage for these objects anyway.
12454 */
12455 if ((object->pager == MEMORY_OBJECT_NULL) ||
12456 (object->internal) || (object->private)) {
12457 vm_object_unlock(object);
12458 vm_map_unlock(map);
12459 continue;
12460 }
12461 /*
12462 * keep reference on the object until syncing is done
12463 */
12464 vm_object_reference_locked(object);
12465 vm_object_unlock(object);
12466
12467 vm_map_unlock(map);
12468
12469 do_sync_req = vm_object_sync(object,
12470 offset,
12471 flush_size,
12472 sync_flags & VM_SYNC_INVALIDATE,
12473 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12474 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12475 sync_flags & VM_SYNC_SYNCHRONOUS);
12476 /*
12477 * only send a m_o_s if we returned pages or if the entry
12478 * is writable (ie dirty pages may have already been sent back)
12479 */
12480 if (!do_sync_req) {
12481 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12482 /*
12483 * clear out the clustering and read-ahead hints
12484 */
12485 vm_object_lock(object);
12486
12487 object->pages_created = 0;
12488 object->pages_used = 0;
12489 object->sequential = 0;
12490 object->last_alloc = 0;
12491
12492 vm_object_unlock(object);
12493 }
12494 vm_object_deallocate(object);
12495 continue;
12496 }
12497 msync_req_alloc(new_msr);
12498
12499 vm_object_lock(object);
12500 offset += object->paging_offset;
12501
12502 new_msr->offset = offset;
12503 new_msr->length = flush_size;
12504 new_msr->object = object;
12505 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12506 re_iterate:
12507
12508 /*
12509 * We can't sync this object if there isn't a pager. The
12510 * pager can disappear anytime we're not holding the object
12511 * lock. So this has to be checked anytime we goto re_iterate.
12512 */
12513
12514 pager = object->pager;
12515
12516 if (pager == MEMORY_OBJECT_NULL) {
12517 vm_object_unlock(object);
12518 vm_object_deallocate(object);
12519 continue;
12520 }
12521
12522 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12523 /*
12524 * need to check for overlapping entry, if found, wait
12525 * on overlapping msr to be done, then reiterate
12526 */
12527 msr_lock(msr);
12528 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12529 ((offset >= msr->offset &&
12530 offset < (msr->offset + msr->length)) ||
12531 (msr->offset >= offset &&
12532 msr->offset < (offset + flush_size))))
12533 {
12534 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12535 msr_unlock(msr);
12536 vm_object_unlock(object);
12537 thread_block(THREAD_CONTINUE_NULL);
12538 vm_object_lock(object);
12539 goto re_iterate;
12540 }
12541 msr_unlock(msr);
12542 }/* queue_iterate */
12543
12544 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12545
12546 vm_object_paging_begin(object);
12547 vm_object_unlock(object);
12548
12549 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12550
12551 (void) memory_object_synchronize(
12552 pager,
12553 offset,
12554 flush_size,
12555 sync_flags & ~VM_SYNC_CONTIGUOUS);
12556
12557 vm_object_lock(object);
12558 vm_object_paging_end(object);
12559 vm_object_unlock(object);
12560 }/* while */
12561
12562 /*
12563 * wait for memory_object_sychronize_completed messages from pager(s)
12564 */
12565
12566 while (!queue_empty(&req_q)) {
12567 msr = (msync_req_t)queue_first(&req_q);
12568 msr_lock(msr);
12569 while(msr->flag != VM_MSYNC_DONE) {
12570 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12571 msr_unlock(msr);
12572 thread_block(THREAD_CONTINUE_NULL);
12573 msr_lock(msr);
12574 }/* while */
12575 queue_remove(&req_q, msr, msync_req_t, req_q);
12576 msr_unlock(msr);
12577 vm_object_deallocate(msr->object);
12578 msync_req_free(msr);
12579 }/* queue_iterate */
12580
12581 /* for proper msync() behaviour */
12582 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12583 return(KERN_INVALID_ADDRESS);
12584
12585 return(KERN_SUCCESS);
12586 }/* vm_msync */
12587
12588 /*
12589 * Routine: convert_port_entry_to_map
12590 * Purpose:
12591 * Convert from a port specifying an entry or a task
12592 * to a map. Doesn't consume the port ref; produces a map ref,
12593 * which may be null. Unlike convert_port_to_map, the
12594 * port may be task or a named entry backed.
12595 * Conditions:
12596 * Nothing locked.
12597 */
12598
12599
12600 vm_map_t
12601 convert_port_entry_to_map(
12602 ipc_port_t port)
12603 {
12604 vm_map_t map;
12605 vm_named_entry_t named_entry;
12606 uint32_t try_failed_count = 0;
12607
12608 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12609 while(TRUE) {
12610 ip_lock(port);
12611 if(ip_active(port) && (ip_kotype(port)
12612 == IKOT_NAMED_ENTRY)) {
12613 named_entry =
12614 (vm_named_entry_t)port->ip_kobject;
12615 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12616 ip_unlock(port);
12617
12618 try_failed_count++;
12619 mutex_pause(try_failed_count);
12620 continue;
12621 }
12622 named_entry->ref_count++;
12623 lck_mtx_unlock(&(named_entry)->Lock);
12624 ip_unlock(port);
12625 if ((named_entry->is_sub_map) &&
12626 (named_entry->protection
12627 & VM_PROT_WRITE)) {
12628 map = named_entry->backing.map;
12629 } else {
12630 mach_destroy_memory_entry(port);
12631 return VM_MAP_NULL;
12632 }
12633 vm_map_reference_swap(map);
12634 mach_destroy_memory_entry(port);
12635 break;
12636 }
12637 else
12638 return VM_MAP_NULL;
12639 }
12640 }
12641 else
12642 map = convert_port_to_map(port);
12643
12644 return map;
12645 }
12646
12647 /*
12648 * Routine: convert_port_entry_to_object
12649 * Purpose:
12650 * Convert from a port specifying a named entry to an
12651 * object. Doesn't consume the port ref; produces a map ref,
12652 * which may be null.
12653 * Conditions:
12654 * Nothing locked.
12655 */
12656
12657
12658 vm_object_t
12659 convert_port_entry_to_object(
12660 ipc_port_t port)
12661 {
12662 vm_object_t object;
12663 vm_named_entry_t named_entry;
12664 uint32_t try_failed_count = 0;
12665
12666 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12667 while(TRUE) {
12668 ip_lock(port);
12669 if(ip_active(port) && (ip_kotype(port)
12670 == IKOT_NAMED_ENTRY)) {
12671 named_entry =
12672 (vm_named_entry_t)port->ip_kobject;
12673 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12674 ip_unlock(port);
12675
12676 try_failed_count++;
12677 mutex_pause(try_failed_count);
12678 continue;
12679 }
12680 named_entry->ref_count++;
12681 lck_mtx_unlock(&(named_entry)->Lock);
12682 ip_unlock(port);
12683 if ((!named_entry->is_sub_map) &&
12684 (!named_entry->is_pager) &&
12685 (named_entry->protection
12686 & VM_PROT_WRITE)) {
12687 object = named_entry->backing.object;
12688 } else {
12689 mach_destroy_memory_entry(port);
12690 return (vm_object_t)NULL;
12691 }
12692 vm_object_reference(named_entry->backing.object);
12693 mach_destroy_memory_entry(port);
12694 break;
12695 }
12696 else
12697 return (vm_object_t)NULL;
12698 }
12699 } else {
12700 return (vm_object_t)NULL;
12701 }
12702
12703 return object;
12704 }
12705
12706 /*
12707 * Export routines to other components for the things we access locally through
12708 * macros.
12709 */
12710 #undef current_map
12711 vm_map_t
12712 current_map(void)
12713 {
12714 return (current_map_fast());
12715 }
12716
12717 /*
12718 * vm_map_reference:
12719 *
12720 * Most code internal to the osfmk will go through a
12721 * macro defining this. This is always here for the
12722 * use of other kernel components.
12723 */
12724 #undef vm_map_reference
12725 void
12726 vm_map_reference(
12727 register vm_map_t map)
12728 {
12729 if (map == VM_MAP_NULL)
12730 return;
12731
12732 lck_mtx_lock(&map->s_lock);
12733 #if TASK_SWAPPER
12734 assert(map->res_count > 0);
12735 assert(map->ref_count >= map->res_count);
12736 map->res_count++;
12737 #endif
12738 map->ref_count++;
12739 lck_mtx_unlock(&map->s_lock);
12740 }
12741
12742 /*
12743 * vm_map_deallocate:
12744 *
12745 * Removes a reference from the specified map,
12746 * destroying it if no references remain.
12747 * The map should not be locked.
12748 */
12749 void
12750 vm_map_deallocate(
12751 register vm_map_t map)
12752 {
12753 unsigned int ref;
12754
12755 if (map == VM_MAP_NULL)
12756 return;
12757
12758 lck_mtx_lock(&map->s_lock);
12759 ref = --map->ref_count;
12760 if (ref > 0) {
12761 vm_map_res_deallocate(map);
12762 lck_mtx_unlock(&map->s_lock);
12763 return;
12764 }
12765 assert(map->ref_count == 0);
12766 lck_mtx_unlock(&map->s_lock);
12767
12768 #if TASK_SWAPPER
12769 /*
12770 * The map residence count isn't decremented here because
12771 * the vm_map_delete below will traverse the entire map,
12772 * deleting entries, and the residence counts on objects
12773 * and sharing maps will go away then.
12774 */
12775 #endif
12776
12777 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12778 }
12779
12780
12781 void
12782 vm_map_disable_NX(vm_map_t map)
12783 {
12784 if (map == NULL)
12785 return;
12786 if (map->pmap == NULL)
12787 return;
12788
12789 pmap_disable_NX(map->pmap);
12790 }
12791
12792 void
12793 vm_map_disallow_data_exec(vm_map_t map)
12794 {
12795 if (map == NULL)
12796 return;
12797
12798 map->map_disallow_data_exec = TRUE;
12799 }
12800
12801 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12802 * more descriptive.
12803 */
12804 void
12805 vm_map_set_32bit(vm_map_t map)
12806 {
12807 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12808 }
12809
12810
12811 void
12812 vm_map_set_64bit(vm_map_t map)
12813 {
12814 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12815 }
12816
12817 vm_map_offset_t
12818 vm_compute_max_offset(unsigned is64)
12819 {
12820 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12821 }
12822
12823 boolean_t
12824 vm_map_is_64bit(
12825 vm_map_t map)
12826 {
12827 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12828 }
12829
12830 boolean_t
12831 vm_map_has_4GB_pagezero(
12832 vm_map_t map)
12833 {
12834 /*
12835 * XXX FBDP
12836 * We should lock the VM map (for read) here but we can get away
12837 * with it for now because there can't really be any race condition:
12838 * the VM map's min_offset is changed only when the VM map is created
12839 * and when the zero page is established (when the binary gets loaded),
12840 * and this routine gets called only when the task terminates and the
12841 * VM map is being torn down, and when a new map is created via
12842 * load_machfile()/execve().
12843 */
12844 return (map->min_offset >= 0x100000000ULL);
12845 }
12846
12847 void
12848 vm_map_set_4GB_pagezero(vm_map_t map)
12849 {
12850 #if defined(__i386__)
12851 pmap_set_4GB_pagezero(map->pmap);
12852 #else
12853 #pragma unused(map)
12854 #endif
12855
12856 }
12857
12858 void
12859 vm_map_clear_4GB_pagezero(vm_map_t map)
12860 {
12861 #if defined(__i386__)
12862 pmap_clear_4GB_pagezero(map->pmap);
12863 #else
12864 #pragma unused(map)
12865 #endif
12866 }
12867
12868 /*
12869 * Raise a VM map's minimum offset.
12870 * To strictly enforce "page zero" reservation.
12871 */
12872 kern_return_t
12873 vm_map_raise_min_offset(
12874 vm_map_t map,
12875 vm_map_offset_t new_min_offset)
12876 {
12877 vm_map_entry_t first_entry;
12878
12879 new_min_offset = vm_map_round_page(new_min_offset);
12880
12881 vm_map_lock(map);
12882
12883 if (new_min_offset < map->min_offset) {
12884 /*
12885 * Can't move min_offset backwards, as that would expose
12886 * a part of the address space that was previously, and for
12887 * possibly good reasons, inaccessible.
12888 */
12889 vm_map_unlock(map);
12890 return KERN_INVALID_ADDRESS;
12891 }
12892
12893 first_entry = vm_map_first_entry(map);
12894 if (first_entry != vm_map_to_entry(map) &&
12895 first_entry->vme_start < new_min_offset) {
12896 /*
12897 * Some memory was already allocated below the new
12898 * minimun offset. It's too late to change it now...
12899 */
12900 vm_map_unlock(map);
12901 return KERN_NO_SPACE;
12902 }
12903
12904 map->min_offset = new_min_offset;
12905
12906 vm_map_unlock(map);
12907
12908 return KERN_SUCCESS;
12909 }
12910
12911 /*
12912 * Set the limit on the maximum amount of user wired memory allowed for this map.
12913 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12914 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12915 * don't have to reach over to the BSD data structures.
12916 */
12917
12918 void
12919 vm_map_set_user_wire_limit(vm_map_t map,
12920 vm_size_t limit)
12921 {
12922 map->user_wire_limit = limit;
12923 }
12924
12925
12926 void vm_map_switch_protect(vm_map_t map,
12927 boolean_t val)
12928 {
12929 vm_map_lock(map);
12930 map->switch_protect=val;
12931 vm_map_unlock(map);
12932 }
12933
12934 /* Add (generate) code signature for memory range */
12935 #if CONFIG_DYNAMIC_CODE_SIGNING
12936 kern_return_t vm_map_sign(vm_map_t map,
12937 vm_map_offset_t start,
12938 vm_map_offset_t end)
12939 {
12940 vm_map_entry_t entry;
12941 vm_page_t m;
12942 vm_object_t object;
12943
12944 /*
12945 * Vet all the input parameters and current type and state of the
12946 * underlaying object. Return with an error if anything is amiss.
12947 */
12948 if (map == VM_MAP_NULL)
12949 return(KERN_INVALID_ARGUMENT);
12950
12951 vm_map_lock_read(map);
12952
12953 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12954 /*
12955 * Must pass a valid non-submap address.
12956 */
12957 vm_map_unlock_read(map);
12958 return(KERN_INVALID_ADDRESS);
12959 }
12960
12961 if((entry->vme_start > start) || (entry->vme_end < end)) {
12962 /*
12963 * Map entry doesn't cover the requested range. Not handling
12964 * this situation currently.
12965 */
12966 vm_map_unlock_read(map);
12967 return(KERN_INVALID_ARGUMENT);
12968 }
12969
12970 object = entry->object.vm_object;
12971 if (object == VM_OBJECT_NULL) {
12972 /*
12973 * Object must already be present or we can't sign.
12974 */
12975 vm_map_unlock_read(map);
12976 return KERN_INVALID_ARGUMENT;
12977 }
12978
12979 vm_object_lock(object);
12980 vm_map_unlock_read(map);
12981
12982 while(start < end) {
12983 uint32_t refmod;
12984
12985 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12986 if (m==VM_PAGE_NULL) {
12987 /* shoud we try to fault a page here? we can probably
12988 * demand it exists and is locked for this request */
12989 vm_object_unlock(object);
12990 return KERN_FAILURE;
12991 }
12992 /* deal with special page status */
12993 if (m->busy ||
12994 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12995 vm_object_unlock(object);
12996 return KERN_FAILURE;
12997 }
12998
12999 /* Page is OK... now "validate" it */
13000 /* This is the place where we'll call out to create a code
13001 * directory, later */
13002 m->cs_validated = TRUE;
13003
13004 /* The page is now "clean" for codesigning purposes. That means
13005 * we don't consider it as modified (wpmapped) anymore. But
13006 * we'll disconnect the page so we note any future modification
13007 * attempts. */
13008 m->wpmapped = FALSE;
13009 refmod = pmap_disconnect(m->phys_page);
13010
13011 /* Pull the dirty status from the pmap, since we cleared the
13012 * wpmapped bit */
13013 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13014 m->dirty = TRUE;
13015 }
13016
13017 /* On to the next page */
13018 start += PAGE_SIZE;
13019 }
13020 vm_object_unlock(object);
13021
13022 return KERN_SUCCESS;
13023 }
13024 #endif
13025
13026 #if CONFIG_FREEZE
13027
13028 kern_return_t vm_map_freeze_walk(
13029 vm_map_t map,
13030 unsigned int *purgeable_count,
13031 unsigned int *wired_count,
13032 unsigned int *clean_count,
13033 unsigned int *dirty_count,
13034 boolean_t *has_shared)
13035 {
13036 vm_map_entry_t entry;
13037
13038 vm_map_lock_read(map);
13039
13040 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13041 *has_shared = FALSE;
13042
13043 for (entry = vm_map_first_entry(map);
13044 entry != vm_map_to_entry(map);
13045 entry = entry->vme_next) {
13046 unsigned int purgeable, clean, dirty, wired;
13047 boolean_t shared;
13048
13049 if ((entry->object.vm_object == 0) ||
13050 (entry->is_sub_map) ||
13051 (entry->object.vm_object->phys_contiguous)) {
13052 continue;
13053 }
13054
13055 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13056
13057 *purgeable_count += purgeable;
13058 *wired_count += wired;
13059 *clean_count += clean;
13060 *dirty_count += dirty;
13061
13062 if (shared) {
13063 *has_shared = TRUE;
13064 }
13065 }
13066
13067 vm_map_unlock_read(map);
13068
13069 return KERN_SUCCESS;
13070 }
13071
13072 kern_return_t vm_map_freeze(
13073 vm_map_t map,
13074 unsigned int *purgeable_count,
13075 unsigned int *wired_count,
13076 unsigned int *clean_count,
13077 unsigned int *dirty_count,
13078 boolean_t *has_shared)
13079 {
13080 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13081 vm_object_t compact_object = VM_OBJECT_NULL;
13082 vm_object_offset_t offset = 0x0;
13083 kern_return_t kr = KERN_SUCCESS;
13084 void *default_freezer_toc = NULL;
13085 boolean_t cleanup = FALSE;
13086
13087 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13088 *has_shared = FALSE;
13089
13090 /* Create our compact object */
13091 compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13092 if (!compact_object) {
13093 kr = KERN_FAILURE;
13094 goto done;
13095 }
13096
13097 default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13098 if (!default_freezer_toc) {
13099 kr = KERN_FAILURE;
13100 goto done;
13101 }
13102
13103 /*
13104 * We need the exclusive lock here so that we can
13105 * block any page faults or lookups while we are
13106 * in the middle of freezing this vm map.
13107 */
13108 vm_map_lock(map);
13109
13110 if (map->default_freezer_toc != NULL){
13111 /*
13112 * This map has already been frozen.
13113 */
13114 cleanup = TRUE;
13115 kr = KERN_SUCCESS;
13116 goto done;
13117 }
13118
13119 /* Get a mapping in place for the freezing about to commence */
13120 map->default_freezer_toc = default_freezer_toc;
13121
13122 vm_object_lock(compact_object);
13123
13124 for (entry2 = vm_map_first_entry(map);
13125 entry2 != vm_map_to_entry(map);
13126 entry2 = entry2->vme_next) {
13127
13128 vm_object_t src_object = entry2->object.vm_object;
13129
13130 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13131 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13132 unsigned int purgeable, clean, dirty, wired;
13133 boolean_t shared;
13134
13135 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13136 src_object, compact_object, &default_freezer_toc, &offset);
13137
13138 *purgeable_count += purgeable;
13139 *wired_count += wired;
13140 *clean_count += clean;
13141 *dirty_count += dirty;
13142
13143 if (shared) {
13144 *has_shared = TRUE;
13145 }
13146 }
13147 }
13148
13149 vm_object_unlock(compact_object);
13150
13151 /* Finally, throw out the pages to swap */
13152 vm_object_pageout(compact_object);
13153
13154 done:
13155 vm_map_unlock(map);
13156
13157 /* Unwind if there was a failure */
13158 if ((cleanup) || (KERN_SUCCESS != kr)) {
13159 if (default_freezer_toc){
13160 default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13161 }
13162 if (compact_object){
13163 vm_object_deallocate(compact_object);
13164 }
13165 }
13166
13167 return kr;
13168 }
13169
13170 __private_extern__ vm_object_t default_freezer_get_compact_vm_object( void** );
13171
13172 void
13173 vm_map_thaw(
13174 vm_map_t map)
13175 {
13176 void **default_freezer_toc;
13177 vm_object_t compact_object;
13178
13179 vm_map_lock(map);
13180
13181 if (map->default_freezer_toc == NULL){
13182 /*
13183 * This map is not in a frozen state.
13184 */
13185 goto out;
13186 }
13187
13188 default_freezer_toc = &(map->default_freezer_toc);
13189
13190 compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13191
13192 /* Bring the pages back in */
13193 vm_object_pagein(compact_object);
13194
13195 /* Shift pages back to their original objects */
13196 vm_object_unpack(compact_object, default_freezer_toc);
13197
13198 vm_object_deallocate(compact_object);
13199
13200 map->default_freezer_toc = NULL;
13201
13202 out:
13203 vm_map_unlock(map);
13204 }
13205 #endif