]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79
80 #include <kern/assert.h>
81 #include <kern/counters.h>
82 #include <kern/kalloc.h>
83 #include <kern/zalloc.h>
84
85 #include <vm/cpm.h>
86 #include <vm/vm_init.h>
87 #include <vm/vm_fault.h>
88 #include <vm/vm_map.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_kern.h>
92 #include <ipc/ipc_port.h>
93 #include <kern/sched_prim.h>
94 #include <kern/misc_protos.h>
95 #include <ddb/tr.h>
96 #include <machine/db_machdep.h>
97 #include <kern/xpr.h>
98
99 #include <mach/vm_map_server.h>
100 #include <mach/mach_host_server.h>
101 #include <vm/vm_shared_memory_server.h>
102 #include <vm/vm_protos.h> // for vm_map_commpage64 and vm_map_remove_compage64
103
104 #ifdef ppc
105 #include <ppc/mappings.h>
106 #endif /* ppc */
107
108 #include <vm/vm_protos.h>
109
110 /* Internal prototypes
111 */
112
113 static void vm_map_simplify_range(
114 vm_map_t map,
115 vm_map_offset_t start,
116 vm_map_offset_t end); /* forward */
117
118 static boolean_t vm_map_range_check(
119 vm_map_t map,
120 vm_map_offset_t start,
121 vm_map_offset_t end,
122 vm_map_entry_t *entry);
123
124 static vm_map_entry_t _vm_map_entry_create(
125 struct vm_map_header *map_header);
126
127 static void _vm_map_entry_dispose(
128 struct vm_map_header *map_header,
129 vm_map_entry_t entry);
130
131 static void vm_map_pmap_enter(
132 vm_map_t map,
133 vm_map_offset_t addr,
134 vm_map_offset_t end_addr,
135 vm_object_t object,
136 vm_object_offset_t offset,
137 vm_prot_t protection);
138
139 static void _vm_map_clip_end(
140 struct vm_map_header *map_header,
141 vm_map_entry_t entry,
142 vm_map_offset_t end);
143
144 static void _vm_map_clip_start(
145 struct vm_map_header *map_header,
146 vm_map_entry_t entry,
147 vm_map_offset_t start);
148
149 static void vm_map_entry_delete(
150 vm_map_t map,
151 vm_map_entry_t entry);
152
153 static kern_return_t vm_map_delete(
154 vm_map_t map,
155 vm_map_offset_t start,
156 vm_map_offset_t end,
157 int flags,
158 vm_map_t zap_map);
159
160 static kern_return_t vm_map_copy_overwrite_unaligned(
161 vm_map_t dst_map,
162 vm_map_entry_t entry,
163 vm_map_copy_t copy,
164 vm_map_address_t start);
165
166 static kern_return_t vm_map_copy_overwrite_aligned(
167 vm_map_t dst_map,
168 vm_map_entry_t tmp_entry,
169 vm_map_copy_t copy,
170 vm_map_offset_t start,
171 pmap_t pmap);
172
173 static kern_return_t vm_map_copyin_kernel_buffer(
174 vm_map_t src_map,
175 vm_map_address_t src_addr,
176 vm_map_size_t len,
177 boolean_t src_destroy,
178 vm_map_copy_t *copy_result); /* OUT */
179
180 static kern_return_t vm_map_copyout_kernel_buffer(
181 vm_map_t map,
182 vm_map_address_t *addr, /* IN/OUT */
183 vm_map_copy_t copy,
184 boolean_t overwrite);
185
186 static void vm_map_fork_share(
187 vm_map_t old_map,
188 vm_map_entry_t old_entry,
189 vm_map_t new_map);
190
191 static boolean_t vm_map_fork_copy(
192 vm_map_t old_map,
193 vm_map_entry_t *old_entry_p,
194 vm_map_t new_map);
195
196 void vm_map_region_top_walk(
197 vm_map_entry_t entry,
198 vm_region_top_info_t top);
199
200 void vm_map_region_walk(
201 vm_map_t map,
202 vm_map_offset_t va,
203 vm_map_entry_t entry,
204 vm_object_offset_t offset,
205 vm_object_size_t range,
206 vm_region_extended_info_t extended);
207
208 static kern_return_t vm_map_wire_nested(
209 vm_map_t map,
210 vm_map_offset_t start,
211 vm_map_offset_t end,
212 vm_prot_t access_type,
213 boolean_t user_wire,
214 pmap_t map_pmap,
215 vm_map_offset_t pmap_addr);
216
217 static kern_return_t vm_map_unwire_nested(
218 vm_map_t map,
219 vm_map_offset_t start,
220 vm_map_offset_t end,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr);
224
225 static kern_return_t vm_map_overwrite_submap_recurse(
226 vm_map_t dst_map,
227 vm_map_offset_t dst_addr,
228 vm_map_size_t dst_size);
229
230 static kern_return_t vm_map_copy_overwrite_nested(
231 vm_map_t dst_map,
232 vm_map_offset_t dst_addr,
233 vm_map_copy_t copy,
234 boolean_t interruptible,
235 pmap_t pmap);
236
237 static kern_return_t vm_map_remap_extract(
238 vm_map_t map,
239 vm_map_offset_t addr,
240 vm_map_size_t size,
241 boolean_t copy,
242 struct vm_map_header *map_header,
243 vm_prot_t *cur_protection,
244 vm_prot_t *max_protection,
245 vm_inherit_t inheritance,
246 boolean_t pageable);
247
248 static kern_return_t vm_map_remap_range_allocate(
249 vm_map_t map,
250 vm_map_address_t *address,
251 vm_map_size_t size,
252 vm_map_offset_t mask,
253 boolean_t anywhere,
254 vm_map_entry_t *map_entry);
255
256 static void vm_map_region_look_for_page(
257 vm_map_t map,
258 vm_map_offset_t va,
259 vm_object_t object,
260 vm_object_offset_t offset,
261 int max_refcnt,
262 int depth,
263 vm_region_extended_info_t extended);
264
265 static int vm_map_region_count_obj_refs(
266 vm_map_entry_t entry,
267 vm_object_t object);
268
269 /*
270 * Macros to copy a vm_map_entry. We must be careful to correctly
271 * manage the wired page count. vm_map_entry_copy() creates a new
272 * map entry to the same memory - the wired count in the new entry
273 * must be set to zero. vm_map_entry_copy_full() creates a new
274 * entry that is identical to the old entry. This preserves the
275 * wire count; it's used for map splitting and zone changing in
276 * vm_map_copyout.
277 */
278 #define vm_map_entry_copy(NEW,OLD) \
279 MACRO_BEGIN \
280 *(NEW) = *(OLD); \
281 (NEW)->is_shared = FALSE; \
282 (NEW)->needs_wakeup = FALSE; \
283 (NEW)->in_transition = FALSE; \
284 (NEW)->wired_count = 0; \
285 (NEW)->user_wired_count = 0; \
286 MACRO_END
287
288 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
289
290 /*
291 * Virtual memory maps provide for the mapping, protection,
292 * and sharing of virtual memory objects. In addition,
293 * this module provides for an efficient virtual copy of
294 * memory from one map to another.
295 *
296 * Synchronization is required prior to most operations.
297 *
298 * Maps consist of an ordered doubly-linked list of simple
299 * entries; a single hint is used to speed up lookups.
300 *
301 * Sharing maps have been deleted from this version of Mach.
302 * All shared objects are now mapped directly into the respective
303 * maps. This requires a change in the copy on write strategy;
304 * the asymmetric (delayed) strategy is used for shared temporary
305 * objects instead of the symmetric (shadow) strategy. All maps
306 * are now "top level" maps (either task map, kernel map or submap
307 * of the kernel map).
308 *
309 * Since portions of maps are specified by start/end addreses,
310 * which may not align with existing map entries, all
311 * routines merely "clip" entries to these start/end values.
312 * [That is, an entry is split into two, bordering at a
313 * start or end value.] Note that these clippings may not
314 * always be necessary (as the two resulting entries are then
315 * not changed); however, the clipping is done for convenience.
316 * No attempt is currently made to "glue back together" two
317 * abutting entries.
318 *
319 * The symmetric (shadow) copy strategy implements virtual copy
320 * by copying VM object references from one map to
321 * another, and then marking both regions as copy-on-write.
322 * It is important to note that only one writeable reference
323 * to a VM object region exists in any map when this strategy
324 * is used -- this means that shadow object creation can be
325 * delayed until a write operation occurs. The symmetric (delayed)
326 * strategy allows multiple maps to have writeable references to
327 * the same region of a vm object, and hence cannot delay creating
328 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
329 * Copying of permanent objects is completely different; see
330 * vm_object_copy_strategically() in vm_object.c.
331 */
332
333 static zone_t vm_map_zone; /* zone for vm_map structures */
334 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
335 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
336 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
337
338
339 /*
340 * Placeholder object for submap operations. This object is dropped
341 * into the range by a call to vm_map_find, and removed when
342 * vm_map_submap creates the submap.
343 */
344
345 vm_object_t vm_submap_object;
346
347 /*
348 * vm_map_init:
349 *
350 * Initialize the vm_map module. Must be called before
351 * any other vm_map routines.
352 *
353 * Map and entry structures are allocated from zones -- we must
354 * initialize those zones.
355 *
356 * There are three zones of interest:
357 *
358 * vm_map_zone: used to allocate maps.
359 * vm_map_entry_zone: used to allocate map entries.
360 * vm_map_kentry_zone: used to allocate map entries for the kernel.
361 *
362 * The kernel allocates map entries from a special zone that is initially
363 * "crammed" with memory. It would be difficult (perhaps impossible) for
364 * the kernel to allocate more memory to a entry zone when it became
365 * empty since the very act of allocating memory implies the creation
366 * of a new entry.
367 */
368
369 static void *map_data;
370 static vm_map_size_t map_data_size;
371 static void *kentry_data;
372 static vm_map_size_t kentry_data_size;
373 static int kentry_count = 2048; /* to init kentry_data_size */
374
375 #define NO_COALESCE_LIMIT (1024 * 128)
376
377 /*
378 * Threshold for aggressive (eager) page map entering for vm copyout
379 * operations. Any copyout larger will NOT be aggressively entered.
380 */
381 static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */
382
383 /* Skip acquiring locks if we're in the midst of a kernel core dump */
384 extern unsigned int not_in_kdp;
385
386 #ifdef __i386__
387 kern_return_t
388 vm_map_apple_protected(
389 vm_map_t map,
390 vm_map_offset_t start,
391 vm_map_offset_t end)
392 {
393 boolean_t map_locked;
394 kern_return_t kr;
395 vm_map_entry_t map_entry;
396 memory_object_t protected_mem_obj;
397 vm_object_t protected_object;
398 vm_map_offset_t map_addr;
399
400 vm_map_lock_read(map);
401 map_locked = TRUE;
402
403 /* lookup the protected VM object */
404 if (!vm_map_lookup_entry(map,
405 start,
406 &map_entry) ||
407 map_entry->vme_end != end ||
408 map_entry->is_sub_map) {
409 /* that memory is not properly mapped */
410 kr = KERN_INVALID_ARGUMENT;
411 goto done;
412 }
413 protected_object = map_entry->object.vm_object;
414 if (protected_object == VM_OBJECT_NULL) {
415 /* there should be a VM object here at this point */
416 kr = KERN_INVALID_ARGUMENT;
417 goto done;
418 }
419
420 /*
421 * Lookup (and create if necessary) the protected memory object
422 * matching that VM object.
423 * If successful, this also grabs a reference on the memory object,
424 * to guarantee that it doesn't go away before we get a chance to map
425 * it.
426 */
427
428 protected_mem_obj = apple_protect_pager_setup(protected_object);
429 if (protected_mem_obj == NULL) {
430 kr = KERN_FAILURE;
431 goto done;
432 }
433
434 vm_map_unlock_read(map);
435 map_locked = FALSE;
436
437 /* map this memory object in place of the current one */
438 map_addr = start;
439 kr = mach_vm_map(map,
440 &map_addr,
441 end - start,
442 (mach_vm_offset_t) 0,
443 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
444 (ipc_port_t) protected_mem_obj,
445 map_entry->offset + (start - map_entry->vme_start),
446 TRUE,
447 map_entry->protection,
448 map_entry->max_protection,
449 map_entry->inheritance);
450 assert(map_addr == start);
451 if (kr == KERN_SUCCESS) {
452 /* let the pager know that this mem_obj is mapped */
453 apple_protect_pager_map(protected_mem_obj);
454 }
455 /*
456 * Release the reference obtained by apple_protect_pager_setup().
457 * The mapping (if it succeeded) is now holding a reference on the
458 * memory object.
459 */
460 memory_object_deallocate(protected_mem_obj);
461
462 done:
463 if (map_locked) {
464 vm_map_unlock_read(map);
465 }
466 return kr;
467 }
468 #endif /* __i386__ */
469
470
471 void
472 vm_map_init(
473 void)
474 {
475 vm_map_zone = zinit((vm_map_size_t) sizeof(struct vm_map), 40*1024,
476 PAGE_SIZE, "maps");
477
478 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
479 1024*1024, PAGE_SIZE*5,
480 "non-kernel map entries");
481
482 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
483 kentry_data_size, kentry_data_size,
484 "kernel map entries");
485
486 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
487 16*1024, PAGE_SIZE, "map copies");
488
489 /*
490 * Cram the map and kentry zones with initial data.
491 * Set kentry_zone non-collectible to aid zone_gc().
492 */
493 zone_change(vm_map_zone, Z_COLLECT, FALSE);
494 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
495 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
496 zcram(vm_map_zone, map_data, map_data_size);
497 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
498 }
499
500 void
501 vm_map_steal_memory(
502 void)
503 {
504 map_data_size = vm_map_round_page(10 * sizeof(struct vm_map));
505 map_data = pmap_steal_memory(map_data_size);
506
507 #if 0
508 /*
509 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
510 * physical page (i.e. that beyond the kernel image and page tables)
511 * individually; we guess at most one entry per eight pages in the
512 * real world. This works out to roughly .1 of 1% of physical memory,
513 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
514 */
515 #endif
516 kentry_count = pmap_free_pages() / 8;
517
518
519 kentry_data_size =
520 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
521 kentry_data = pmap_steal_memory(kentry_data_size);
522 }
523
524 /*
525 * vm_map_create:
526 *
527 * Creates and returns a new empty VM map with
528 * the given physical map structure, and having
529 * the given lower and upper address bounds.
530 */
531 vm_map_t
532 vm_map_create(
533 pmap_t pmap,
534 vm_map_offset_t min,
535 vm_map_offset_t max,
536 boolean_t pageable)
537 {
538 register vm_map_t result;
539
540 result = (vm_map_t) zalloc(vm_map_zone);
541 if (result == VM_MAP_NULL)
542 panic("vm_map_create");
543
544 vm_map_first_entry(result) = vm_map_to_entry(result);
545 vm_map_last_entry(result) = vm_map_to_entry(result);
546 result->hdr.nentries = 0;
547 result->hdr.entries_pageable = pageable;
548
549 result->size = 0;
550 result->ref_count = 1;
551 #if TASK_SWAPPER
552 result->res_count = 1;
553 result->sw_state = MAP_SW_IN;
554 #endif /* TASK_SWAPPER */
555 result->pmap = pmap;
556 result->min_offset = min;
557 result->max_offset = max;
558 result->wiring_required = FALSE;
559 result->no_zero_fill = FALSE;
560 result->mapped = FALSE;
561 result->wait_for_space = FALSE;
562 result->first_free = vm_map_to_entry(result);
563 result->hint = vm_map_to_entry(result);
564 vm_map_lock_init(result);
565 mutex_init(&result->s_lock, 0);
566
567 return(result);
568 }
569
570 /*
571 * vm_map_entry_create: [ internal use only ]
572 *
573 * Allocates a VM map entry for insertion in the
574 * given map (or map copy). No fields are filled.
575 */
576 #define vm_map_entry_create(map) \
577 _vm_map_entry_create(&(map)->hdr)
578
579 #define vm_map_copy_entry_create(copy) \
580 _vm_map_entry_create(&(copy)->cpy_hdr)
581
582 static vm_map_entry_t
583 _vm_map_entry_create(
584 register struct vm_map_header *map_header)
585 {
586 register zone_t zone;
587 register vm_map_entry_t entry;
588
589 if (map_header->entries_pageable)
590 zone = vm_map_entry_zone;
591 else
592 zone = vm_map_kentry_zone;
593
594 entry = (vm_map_entry_t) zalloc(zone);
595 if (entry == VM_MAP_ENTRY_NULL)
596 panic("vm_map_entry_create");
597
598 return(entry);
599 }
600
601 /*
602 * vm_map_entry_dispose: [ internal use only ]
603 *
604 * Inverse of vm_map_entry_create.
605 */
606 #define vm_map_entry_dispose(map, entry) \
607 MACRO_BEGIN \
608 if((entry) == (map)->first_free) \
609 (map)->first_free = vm_map_to_entry(map); \
610 if((entry) == (map)->hint) \
611 (map)->hint = vm_map_to_entry(map); \
612 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
613 MACRO_END
614
615 #define vm_map_copy_entry_dispose(map, entry) \
616 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
617
618 static void
619 _vm_map_entry_dispose(
620 register struct vm_map_header *map_header,
621 register vm_map_entry_t entry)
622 {
623 register zone_t zone;
624
625 if (map_header->entries_pageable)
626 zone = vm_map_entry_zone;
627 else
628 zone = vm_map_kentry_zone;
629
630 zfree(zone, entry);
631 }
632
633 #if MACH_ASSERT
634 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
635 static boolean_t first_free_check = FALSE;
636 static boolean_t
637 first_free_is_valid(
638 vm_map_t map)
639 {
640 vm_map_entry_t entry, next;
641
642 if (!first_free_check)
643 return TRUE;
644
645 entry = vm_map_to_entry(map);
646 next = entry->vme_next;
647 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
648 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
649 next != vm_map_to_entry(map))) {
650 entry = next;
651 next = entry->vme_next;
652 if (entry == vm_map_to_entry(map))
653 break;
654 }
655 if (map->first_free != entry) {
656 printf("Bad first_free for map 0x%x: 0x%x should be 0x%x\n",
657 map, map->first_free, entry);
658 return FALSE;
659 }
660 return TRUE;
661 }
662 #endif /* MACH_ASSERT */
663
664 /*
665 * UPDATE_FIRST_FREE:
666 *
667 * Updates the map->first_free pointer to the
668 * entry immediately before the first hole in the map.
669 * The map should be locked.
670 */
671 #define UPDATE_FIRST_FREE(map, new_first_free) \
672 MACRO_BEGIN \
673 vm_map_t UFF_map; \
674 vm_map_entry_t UFF_first_free; \
675 vm_map_entry_t UFF_next_entry; \
676 UFF_map = (map); \
677 UFF_first_free = (new_first_free); \
678 UFF_next_entry = UFF_first_free->vme_next; \
679 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
680 vm_map_trunc_page(UFF_first_free->vme_end) || \
681 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
682 vm_map_trunc_page(UFF_first_free->vme_start) && \
683 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
684 UFF_first_free = UFF_next_entry; \
685 UFF_next_entry = UFF_first_free->vme_next; \
686 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
687 break; \
688 } \
689 UFF_map->first_free = UFF_first_free; \
690 assert(first_free_is_valid(UFF_map)); \
691 MACRO_END
692
693 /*
694 * vm_map_entry_{un,}link:
695 *
696 * Insert/remove entries from maps (or map copies).
697 */
698 #define vm_map_entry_link(map, after_where, entry) \
699 MACRO_BEGIN \
700 vm_map_t VMEL_map; \
701 vm_map_entry_t VMEL_entry; \
702 VMEL_map = (map); \
703 VMEL_entry = (entry); \
704 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
705 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
706 MACRO_END
707
708
709 #define vm_map_copy_entry_link(copy, after_where, entry) \
710 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
711
712 #define _vm_map_entry_link(hdr, after_where, entry) \
713 MACRO_BEGIN \
714 (hdr)->nentries++; \
715 (entry)->vme_prev = (after_where); \
716 (entry)->vme_next = (after_where)->vme_next; \
717 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
718 MACRO_END
719
720 #define vm_map_entry_unlink(map, entry) \
721 MACRO_BEGIN \
722 vm_map_t VMEU_map; \
723 vm_map_entry_t VMEU_entry; \
724 vm_map_entry_t VMEU_first_free; \
725 VMEU_map = (map); \
726 VMEU_entry = (entry); \
727 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
728 VMEU_first_free = VMEU_entry->vme_prev; \
729 else \
730 VMEU_first_free = VMEU_map->first_free; \
731 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
732 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
733 MACRO_END
734
735 #define vm_map_copy_entry_unlink(copy, entry) \
736 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
737
738 #define _vm_map_entry_unlink(hdr, entry) \
739 MACRO_BEGIN \
740 (hdr)->nentries--; \
741 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
742 (entry)->vme_prev->vme_next = (entry)->vme_next; \
743 MACRO_END
744
745 #if MACH_ASSERT && TASK_SWAPPER
746 /*
747 * vm_map_res_reference:
748 *
749 * Adds another valid residence count to the given map.
750 *
751 * Map is locked so this function can be called from
752 * vm_map_swapin.
753 *
754 */
755 void vm_map_res_reference(register vm_map_t map)
756 {
757 /* assert map is locked */
758 assert(map->res_count >= 0);
759 assert(map->ref_count >= map->res_count);
760 if (map->res_count == 0) {
761 mutex_unlock(&map->s_lock);
762 vm_map_lock(map);
763 vm_map_swapin(map);
764 mutex_lock(&map->s_lock);
765 ++map->res_count;
766 vm_map_unlock(map);
767 } else
768 ++map->res_count;
769 }
770
771 /*
772 * vm_map_reference_swap:
773 *
774 * Adds valid reference and residence counts to the given map.
775 *
776 * The map may not be in memory (i.e. zero residence count).
777 *
778 */
779 void vm_map_reference_swap(register vm_map_t map)
780 {
781 assert(map != VM_MAP_NULL);
782 mutex_lock(&map->s_lock);
783 assert(map->res_count >= 0);
784 assert(map->ref_count >= map->res_count);
785 map->ref_count++;
786 vm_map_res_reference(map);
787 mutex_unlock(&map->s_lock);
788 }
789
790 /*
791 * vm_map_res_deallocate:
792 *
793 * Decrement residence count on a map; possibly causing swapout.
794 *
795 * The map must be in memory (i.e. non-zero residence count).
796 *
797 * The map is locked, so this function is callable from vm_map_deallocate.
798 *
799 */
800 void vm_map_res_deallocate(register vm_map_t map)
801 {
802 assert(map->res_count > 0);
803 if (--map->res_count == 0) {
804 mutex_unlock(&map->s_lock);
805 vm_map_lock(map);
806 vm_map_swapout(map);
807 vm_map_unlock(map);
808 mutex_lock(&map->s_lock);
809 }
810 assert(map->ref_count >= map->res_count);
811 }
812 #endif /* MACH_ASSERT && TASK_SWAPPER */
813
814 /*
815 * vm_map_destroy:
816 *
817 * Actually destroy a map.
818 */
819 void
820 vm_map_destroy(
821 register vm_map_t map)
822 {
823 vm_map_lock(map);
824 (void) vm_map_delete(map, map->min_offset,
825 map->max_offset, VM_MAP_NO_FLAGS,
826 VM_MAP_NULL);
827 vm_map_unlock(map);
828
829 if (map->hdr.nentries!=0)
830 vm_map_remove_commpage(map);
831
832 // assert(map->hdr.nentries==0);
833 // if(map->hdr.nentries) { /* (BRINGUP) */
834 // panic("vm_map_destroy: hdr.nentries is not 0 (%d) in map %08X\n", map->hdr.nentries, map);
835 // }
836
837 if(map->pmap)
838 pmap_destroy(map->pmap);
839
840 zfree(vm_map_zone, map);
841 }
842
843 #if TASK_SWAPPER
844 /*
845 * vm_map_swapin/vm_map_swapout
846 *
847 * Swap a map in and out, either referencing or releasing its resources.
848 * These functions are internal use only; however, they must be exported
849 * because they may be called from macros, which are exported.
850 *
851 * In the case of swapout, there could be races on the residence count,
852 * so if the residence count is up, we return, assuming that a
853 * vm_map_deallocate() call in the near future will bring us back.
854 *
855 * Locking:
856 * -- We use the map write lock for synchronization among races.
857 * -- The map write lock, and not the simple s_lock, protects the
858 * swap state of the map.
859 * -- If a map entry is a share map, then we hold both locks, in
860 * hierarchical order.
861 *
862 * Synchronization Notes:
863 * 1) If a vm_map_swapin() call happens while swapout in progress, it
864 * will block on the map lock and proceed when swapout is through.
865 * 2) A vm_map_reference() call at this time is illegal, and will
866 * cause a panic. vm_map_reference() is only allowed on resident
867 * maps, since it refuses to block.
868 * 3) A vm_map_swapin() call during a swapin will block, and
869 * proceeed when the first swapin is done, turning into a nop.
870 * This is the reason the res_count is not incremented until
871 * after the swapin is complete.
872 * 4) There is a timing hole after the checks of the res_count, before
873 * the map lock is taken, during which a swapin may get the lock
874 * before a swapout about to happen. If this happens, the swapin
875 * will detect the state and increment the reference count, causing
876 * the swapout to be a nop, thereby delaying it until a later
877 * vm_map_deallocate. If the swapout gets the lock first, then
878 * the swapin will simply block until the swapout is done, and
879 * then proceed.
880 *
881 * Because vm_map_swapin() is potentially an expensive operation, it
882 * should be used with caution.
883 *
884 * Invariants:
885 * 1) A map with a residence count of zero is either swapped, or
886 * being swapped.
887 * 2) A map with a non-zero residence count is either resident,
888 * or being swapped in.
889 */
890
891 int vm_map_swap_enable = 1;
892
893 void vm_map_swapin (vm_map_t map)
894 {
895 register vm_map_entry_t entry;
896
897 if (!vm_map_swap_enable) /* debug */
898 return;
899
900 /*
901 * Map is locked
902 * First deal with various races.
903 */
904 if (map->sw_state == MAP_SW_IN)
905 /*
906 * we raced with swapout and won. Returning will incr.
907 * the res_count, turning the swapout into a nop.
908 */
909 return;
910
911 /*
912 * The residence count must be zero. If we raced with another
913 * swapin, the state would have been IN; if we raced with a
914 * swapout (after another competing swapin), we must have lost
915 * the race to get here (see above comment), in which case
916 * res_count is still 0.
917 */
918 assert(map->res_count == 0);
919
920 /*
921 * There are no intermediate states of a map going out or
922 * coming in, since the map is locked during the transition.
923 */
924 assert(map->sw_state == MAP_SW_OUT);
925
926 /*
927 * We now operate upon each map entry. If the entry is a sub-
928 * or share-map, we call vm_map_res_reference upon it.
929 * If the entry is an object, we call vm_object_res_reference
930 * (this may iterate through the shadow chain).
931 * Note that we hold the map locked the entire time,
932 * even if we get back here via a recursive call in
933 * vm_map_res_reference.
934 */
935 entry = vm_map_first_entry(map);
936
937 while (entry != vm_map_to_entry(map)) {
938 if (entry->object.vm_object != VM_OBJECT_NULL) {
939 if (entry->is_sub_map) {
940 vm_map_t lmap = entry->object.sub_map;
941 mutex_lock(&lmap->s_lock);
942 vm_map_res_reference(lmap);
943 mutex_unlock(&lmap->s_lock);
944 } else {
945 vm_object_t object = entry->object.vm_object;
946 vm_object_lock(object);
947 /*
948 * This call may iterate through the
949 * shadow chain.
950 */
951 vm_object_res_reference(object);
952 vm_object_unlock(object);
953 }
954 }
955 entry = entry->vme_next;
956 }
957 assert(map->sw_state == MAP_SW_OUT);
958 map->sw_state = MAP_SW_IN;
959 }
960
961 void vm_map_swapout(vm_map_t map)
962 {
963 register vm_map_entry_t entry;
964
965 /*
966 * Map is locked
967 * First deal with various races.
968 * If we raced with a swapin and lost, the residence count
969 * will have been incremented to 1, and we simply return.
970 */
971 mutex_lock(&map->s_lock);
972 if (map->res_count != 0) {
973 mutex_unlock(&map->s_lock);
974 return;
975 }
976 mutex_unlock(&map->s_lock);
977
978 /*
979 * There are no intermediate states of a map going out or
980 * coming in, since the map is locked during the transition.
981 */
982 assert(map->sw_state == MAP_SW_IN);
983
984 if (!vm_map_swap_enable)
985 return;
986
987 /*
988 * We now operate upon each map entry. If the entry is a sub-
989 * or share-map, we call vm_map_res_deallocate upon it.
990 * If the entry is an object, we call vm_object_res_deallocate
991 * (this may iterate through the shadow chain).
992 * Note that we hold the map locked the entire time,
993 * even if we get back here via a recursive call in
994 * vm_map_res_deallocate.
995 */
996 entry = vm_map_first_entry(map);
997
998 while (entry != vm_map_to_entry(map)) {
999 if (entry->object.vm_object != VM_OBJECT_NULL) {
1000 if (entry->is_sub_map) {
1001 vm_map_t lmap = entry->object.sub_map;
1002 mutex_lock(&lmap->s_lock);
1003 vm_map_res_deallocate(lmap);
1004 mutex_unlock(&lmap->s_lock);
1005 } else {
1006 vm_object_t object = entry->object.vm_object;
1007 vm_object_lock(object);
1008 /*
1009 * This call may take a long time,
1010 * since it could actively push
1011 * out pages (if we implement it
1012 * that way).
1013 */
1014 vm_object_res_deallocate(object);
1015 vm_object_unlock(object);
1016 }
1017 }
1018 entry = entry->vme_next;
1019 }
1020 assert(map->sw_state == MAP_SW_IN);
1021 map->sw_state = MAP_SW_OUT;
1022 }
1023
1024 #endif /* TASK_SWAPPER */
1025
1026
1027 /*
1028 * SAVE_HINT_MAP_READ:
1029 *
1030 * Saves the specified entry as the hint for
1031 * future lookups. only a read lock is held on map,
1032 * so make sure the store is atomic... OSCompareAndSwap
1033 * guarantees this... also, we don't care if we collide
1034 * and someone else wins and stores their 'hint'
1035 */
1036 #define SAVE_HINT_MAP_READ(map,value) \
1037 MACRO_BEGIN \
1038 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1039 MACRO_END
1040
1041
1042 /*
1043 * SAVE_HINT_MAP_WRITE:
1044 *
1045 * Saves the specified entry as the hint for
1046 * future lookups. write lock held on map,
1047 * so no one else can be writing or looking
1048 * until the lock is dropped, so it's safe
1049 * to just do an assignment
1050 */
1051 #define SAVE_HINT_MAP_WRITE(map,value) \
1052 MACRO_BEGIN \
1053 (map)->hint = (value); \
1054 MACRO_END
1055
1056 /*
1057 * vm_map_lookup_entry: [ internal use only ]
1058 *
1059 * Finds the map entry containing (or
1060 * immediately preceding) the specified address
1061 * in the given map; the entry is returned
1062 * in the "entry" parameter. The boolean
1063 * result indicates whether the address is
1064 * actually contained in the map.
1065 */
1066 boolean_t
1067 vm_map_lookup_entry(
1068 register vm_map_t map,
1069 register vm_map_offset_t address,
1070 vm_map_entry_t *entry) /* OUT */
1071 {
1072 register vm_map_entry_t cur;
1073 register vm_map_entry_t last;
1074
1075 /*
1076 * Start looking either from the head of the
1077 * list, or from the hint.
1078 */
1079 cur = map->hint;
1080
1081 if (cur == vm_map_to_entry(map))
1082 cur = cur->vme_next;
1083
1084 if (address >= cur->vme_start) {
1085 /*
1086 * Go from hint to end of list.
1087 *
1088 * But first, make a quick check to see if
1089 * we are already looking at the entry we
1090 * want (which is usually the case).
1091 * Note also that we don't need to save the hint
1092 * here... it is the same hint (unless we are
1093 * at the header, in which case the hint didn't
1094 * buy us anything anyway).
1095 */
1096 last = vm_map_to_entry(map);
1097 if ((cur != last) && (cur->vme_end > address)) {
1098 *entry = cur;
1099 return(TRUE);
1100 }
1101 }
1102 else {
1103 /*
1104 * Go from start to hint, *inclusively*
1105 */
1106 last = cur->vme_next;
1107 cur = vm_map_first_entry(map);
1108 }
1109
1110 /*
1111 * Search linearly
1112 */
1113
1114 while (cur != last) {
1115 if (cur->vme_end > address) {
1116 if (address >= cur->vme_start) {
1117 /*
1118 * Save this lookup for future
1119 * hints, and return
1120 */
1121
1122 *entry = cur;
1123 SAVE_HINT_MAP_READ(map, cur);
1124
1125 return(TRUE);
1126 }
1127 break;
1128 }
1129 cur = cur->vme_next;
1130 }
1131 *entry = cur->vme_prev;
1132 SAVE_HINT_MAP_READ(map, *entry);
1133
1134 return(FALSE);
1135 }
1136
1137 /*
1138 * Routine: vm_map_find_space
1139 * Purpose:
1140 * Allocate a range in the specified virtual address map,
1141 * returning the entry allocated for that range.
1142 * Used by kmem_alloc, etc.
1143 *
1144 * The map must be NOT be locked. It will be returned locked
1145 * on KERN_SUCCESS, unlocked on failure.
1146 *
1147 * If an entry is allocated, the object/offset fields
1148 * are initialized to zero.
1149 */
1150 kern_return_t
1151 vm_map_find_space(
1152 register vm_map_t map,
1153 vm_map_offset_t *address, /* OUT */
1154 vm_map_size_t size,
1155 vm_map_offset_t mask,
1156 int flags,
1157 vm_map_entry_t *o_entry) /* OUT */
1158 {
1159 register vm_map_entry_t entry, new_entry;
1160 register vm_map_offset_t start;
1161 register vm_map_offset_t end;
1162
1163 if (size == 0) {
1164 *address = 0;
1165 return KERN_INVALID_ARGUMENT;
1166 }
1167
1168 new_entry = vm_map_entry_create(map);
1169
1170 /*
1171 * Look for the first possible address; if there's already
1172 * something at this address, we have to start after it.
1173 */
1174
1175 vm_map_lock(map);
1176
1177 assert(first_free_is_valid(map));
1178 if ((entry = map->first_free) == vm_map_to_entry(map))
1179 start = map->min_offset;
1180 else
1181 start = entry->vme_end;
1182
1183 /*
1184 * In any case, the "entry" always precedes
1185 * the proposed new region throughout the loop:
1186 */
1187
1188 while (TRUE) {
1189 register vm_map_entry_t next;
1190
1191 /*
1192 * Find the end of the proposed new region.
1193 * Be sure we didn't go beyond the end, or
1194 * wrap around the address.
1195 */
1196
1197 end = ((start + mask) & ~mask);
1198 if (end < start) {
1199 vm_map_entry_dispose(map, new_entry);
1200 vm_map_unlock(map);
1201 return(KERN_NO_SPACE);
1202 }
1203 start = end;
1204 end += size;
1205
1206 if ((end > map->max_offset) || (end < start)) {
1207 vm_map_entry_dispose(map, new_entry);
1208 vm_map_unlock(map);
1209 return(KERN_NO_SPACE);
1210 }
1211
1212 /*
1213 * If there are no more entries, we must win.
1214 */
1215
1216 next = entry->vme_next;
1217 if (next == vm_map_to_entry(map))
1218 break;
1219
1220 /*
1221 * If there is another entry, it must be
1222 * after the end of the potential new region.
1223 */
1224
1225 if (next->vme_start >= end)
1226 break;
1227
1228 /*
1229 * Didn't fit -- move to the next entry.
1230 */
1231
1232 entry = next;
1233 start = entry->vme_end;
1234 }
1235
1236 /*
1237 * At this point,
1238 * "start" and "end" should define the endpoints of the
1239 * available new range, and
1240 * "entry" should refer to the region before the new
1241 * range, and
1242 *
1243 * the map should be locked.
1244 */
1245
1246 *address = start;
1247
1248 new_entry->vme_start = start;
1249 new_entry->vme_end = end;
1250 assert(page_aligned(new_entry->vme_start));
1251 assert(page_aligned(new_entry->vme_end));
1252
1253 new_entry->is_shared = FALSE;
1254 new_entry->is_sub_map = FALSE;
1255 new_entry->use_pmap = FALSE;
1256 new_entry->object.vm_object = VM_OBJECT_NULL;
1257 new_entry->offset = (vm_object_offset_t) 0;
1258
1259 new_entry->needs_copy = FALSE;
1260
1261 new_entry->inheritance = VM_INHERIT_DEFAULT;
1262 new_entry->protection = VM_PROT_DEFAULT;
1263 new_entry->max_protection = VM_PROT_ALL;
1264 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1265 new_entry->wired_count = 0;
1266 new_entry->user_wired_count = 0;
1267
1268 new_entry->in_transition = FALSE;
1269 new_entry->needs_wakeup = FALSE;
1270
1271 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1272
1273 /*
1274 * Insert the new entry into the list
1275 */
1276
1277 vm_map_entry_link(map, entry, new_entry);
1278
1279 map->size += size;
1280
1281 /*
1282 * Update the lookup hint
1283 */
1284 SAVE_HINT_MAP_WRITE(map, new_entry);
1285
1286 *o_entry = new_entry;
1287 return(KERN_SUCCESS);
1288 }
1289
1290 int vm_map_pmap_enter_print = FALSE;
1291 int vm_map_pmap_enter_enable = FALSE;
1292
1293 /*
1294 * Routine: vm_map_pmap_enter [internal only]
1295 *
1296 * Description:
1297 * Force pages from the specified object to be entered into
1298 * the pmap at the specified address if they are present.
1299 * As soon as a page not found in the object the scan ends.
1300 *
1301 * Returns:
1302 * Nothing.
1303 *
1304 * In/out conditions:
1305 * The source map should not be locked on entry.
1306 */
1307 static void
1308 vm_map_pmap_enter(
1309 vm_map_t map,
1310 register vm_map_offset_t addr,
1311 register vm_map_offset_t end_addr,
1312 register vm_object_t object,
1313 vm_object_offset_t offset,
1314 vm_prot_t protection)
1315 {
1316 unsigned int cache_attr;
1317
1318 if(map->pmap == 0)
1319 return;
1320
1321 while (addr < end_addr) {
1322 register vm_page_t m;
1323
1324 vm_object_lock(object);
1325 vm_object_paging_begin(object);
1326
1327 m = vm_page_lookup(object, offset);
1328 /*
1329 * ENCRYPTED SWAP:
1330 * The user should never see encrypted data, so do not
1331 * enter an encrypted page in the page table.
1332 */
1333 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1334 (m->unusual && ( m->error || m->restart || m->absent ||
1335 protection & m->page_lock))) {
1336
1337 vm_object_paging_end(object);
1338 vm_object_unlock(object);
1339 return;
1340 }
1341
1342 assert(!m->fictitious); /* XXX is this possible ??? */
1343
1344 if (vm_map_pmap_enter_print) {
1345 printf("vm_map_pmap_enter:");
1346 printf("map: %x, addr: %llx, object: %x, offset: %llx\n",
1347 map, (unsigned long long)addr, object, (unsigned long long)offset);
1348 }
1349 m->busy = TRUE;
1350
1351 if (m->no_isync == TRUE) {
1352 pmap_sync_page_data_phys(m->phys_page);
1353 m->no_isync = FALSE;
1354 }
1355
1356 cache_attr = ((unsigned int)object->wimg_bits) & VM_WIMG_MASK;
1357 vm_object_unlock(object);
1358
1359 PMAP_ENTER(map->pmap, addr, m,
1360 protection, cache_attr, FALSE);
1361
1362 vm_object_lock(object);
1363
1364 PAGE_WAKEUP_DONE(m);
1365 vm_page_lock_queues();
1366 if (!m->active && !m->inactive)
1367 vm_page_activate(m);
1368 vm_page_unlock_queues();
1369 vm_object_paging_end(object);
1370 vm_object_unlock(object);
1371
1372 offset += PAGE_SIZE_64;
1373 addr += PAGE_SIZE;
1374 }
1375 }
1376
1377 boolean_t vm_map_pmap_is_empty(
1378 vm_map_t map,
1379 vm_map_offset_t start,
1380 vm_map_offset_t end);
1381 boolean_t vm_map_pmap_is_empty(
1382 vm_map_t map,
1383 vm_map_offset_t start,
1384 vm_map_offset_t end)
1385 {
1386 vm_map_offset_t offset;
1387 ppnum_t phys_page;
1388
1389 if (map->pmap == NULL) {
1390 return TRUE;
1391 }
1392 for (offset = start;
1393 offset < end;
1394 offset += PAGE_SIZE) {
1395 phys_page = pmap_find_phys(map->pmap, offset);
1396 if (phys_page) {
1397 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1398 "page %d at 0x%llx\n",
1399 map, start, end, phys_page, offset);
1400 return FALSE;
1401 }
1402 }
1403 return TRUE;
1404 }
1405
1406 /*
1407 * Routine: vm_map_enter
1408 *
1409 * Description:
1410 * Allocate a range in the specified virtual address map.
1411 * The resulting range will refer to memory defined by
1412 * the given memory object and offset into that object.
1413 *
1414 * Arguments are as defined in the vm_map call.
1415 */
1416 int _map_enter_debug = 0;
1417 static unsigned int vm_map_enter_restore_successes = 0;
1418 static unsigned int vm_map_enter_restore_failures = 0;
1419 kern_return_t
1420 vm_map_enter(
1421 vm_map_t map,
1422 vm_map_offset_t *address, /* IN/OUT */
1423 vm_map_size_t size,
1424 vm_map_offset_t mask,
1425 int flags,
1426 vm_object_t object,
1427 vm_object_offset_t offset,
1428 boolean_t needs_copy,
1429 vm_prot_t cur_protection,
1430 vm_prot_t max_protection,
1431 vm_inherit_t inheritance)
1432 {
1433 vm_map_entry_t entry, new_entry;
1434 vm_map_offset_t start, tmp_start;
1435 vm_map_offset_t end, tmp_end;
1436 kern_return_t result = KERN_SUCCESS;
1437 vm_map_t zap_old_map = VM_MAP_NULL;
1438 vm_map_t zap_new_map = VM_MAP_NULL;
1439 boolean_t map_locked = FALSE;
1440 boolean_t pmap_empty = TRUE;
1441 boolean_t new_mapping_established = FALSE;
1442 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1443 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1444 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1445 char alias;
1446
1447 if (size == 0) {
1448 *address = 0;
1449 return KERN_INVALID_ARGUMENT;
1450 }
1451
1452 VM_GET_FLAGS_ALIAS(flags, alias);
1453
1454 #define RETURN(value) { result = value; goto BailOut; }
1455
1456 assert(page_aligned(*address));
1457 assert(page_aligned(size));
1458
1459 /*
1460 * Only zero-fill objects are allowed to be purgable.
1461 * LP64todo - limit purgable objects to 32-bits for now
1462 */
1463 if (purgable &&
1464 (offset != 0 ||
1465 (object != VM_OBJECT_NULL &&
1466 (object->size != size ||
1467 object->purgable == VM_OBJECT_NONPURGABLE))
1468 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1469 return KERN_INVALID_ARGUMENT;
1470
1471 if (!anywhere && overwrite) {
1472 /*
1473 * Create a temporary VM map to hold the old mappings in the
1474 * affected area while we create the new one.
1475 * This avoids releasing the VM map lock in
1476 * vm_map_entry_delete() and allows atomicity
1477 * when we want to replace some mappings with a new one.
1478 * It also allows us to restore the old VM mappings if the
1479 * new mapping fails.
1480 */
1481 zap_old_map = vm_map_create(PMAP_NULL,
1482 *address,
1483 *address + size,
1484 TRUE);
1485 }
1486
1487 StartAgain: ;
1488
1489 start = *address;
1490
1491 if (anywhere) {
1492 vm_map_lock(map);
1493 map_locked = TRUE;
1494
1495 /*
1496 * Calculate the first possible address.
1497 */
1498
1499 if (start < map->min_offset)
1500 start = map->min_offset;
1501 if (start > map->max_offset)
1502 RETURN(KERN_NO_SPACE);
1503
1504 /*
1505 * Look for the first possible address;
1506 * if there's already something at this
1507 * address, we have to start after it.
1508 */
1509
1510 assert(first_free_is_valid(map));
1511 if (start == map->min_offset) {
1512 if ((entry = map->first_free) != vm_map_to_entry(map))
1513 start = entry->vme_end;
1514 } else {
1515 vm_map_entry_t tmp_entry;
1516 if (vm_map_lookup_entry(map, start, &tmp_entry))
1517 start = tmp_entry->vme_end;
1518 entry = tmp_entry;
1519 }
1520
1521 /*
1522 * In any case, the "entry" always precedes
1523 * the proposed new region throughout the
1524 * loop:
1525 */
1526
1527 while (TRUE) {
1528 register vm_map_entry_t next;
1529
1530 /*
1531 * Find the end of the proposed new region.
1532 * Be sure we didn't go beyond the end, or
1533 * wrap around the address.
1534 */
1535
1536 end = ((start + mask) & ~mask);
1537 if (end < start)
1538 RETURN(KERN_NO_SPACE);
1539 start = end;
1540 end += size;
1541
1542 if ((end > map->max_offset) || (end < start)) {
1543 if (map->wait_for_space) {
1544 if (size <= (map->max_offset -
1545 map->min_offset)) {
1546 assert_wait((event_t)map,
1547 THREAD_ABORTSAFE);
1548 vm_map_unlock(map);
1549 map_locked = FALSE;
1550 thread_block(THREAD_CONTINUE_NULL);
1551 goto StartAgain;
1552 }
1553 }
1554 RETURN(KERN_NO_SPACE);
1555 }
1556
1557 /*
1558 * If there are no more entries, we must win.
1559 */
1560
1561 next = entry->vme_next;
1562 if (next == vm_map_to_entry(map))
1563 break;
1564
1565 /*
1566 * If there is another entry, it must be
1567 * after the end of the potential new region.
1568 */
1569
1570 if (next->vme_start >= end)
1571 break;
1572
1573 /*
1574 * Didn't fit -- move to the next entry.
1575 */
1576
1577 entry = next;
1578 start = entry->vme_end;
1579 }
1580 *address = start;
1581 } else {
1582 vm_map_entry_t temp_entry;
1583
1584 /*
1585 * Verify that:
1586 * the address doesn't itself violate
1587 * the mask requirement.
1588 */
1589
1590 vm_map_lock(map);
1591 map_locked = TRUE;
1592 if ((start & mask) != 0)
1593 RETURN(KERN_NO_SPACE);
1594
1595 /*
1596 * ... the address is within bounds
1597 */
1598
1599 end = start + size;
1600
1601 if ((start < map->min_offset) ||
1602 (end > map->max_offset) ||
1603 (start >= end)) {
1604 RETURN(KERN_INVALID_ADDRESS);
1605 }
1606
1607 if (overwrite && zap_old_map != VM_MAP_NULL) {
1608 /*
1609 * Fixed mapping and "overwrite" flag: attempt to
1610 * remove all existing mappings in the specified
1611 * address range, saving them in our "zap_old_map".
1612 */
1613 (void) vm_map_delete(map, start, end,
1614 VM_MAP_REMOVE_SAVE_ENTRIES,
1615 zap_old_map);
1616 }
1617
1618 /*
1619 * ... the starting address isn't allocated
1620 */
1621
1622 if (vm_map_lookup_entry(map, start, &temp_entry))
1623 RETURN(KERN_NO_SPACE);
1624
1625 entry = temp_entry;
1626
1627 /*
1628 * ... the next region doesn't overlap the
1629 * end point.
1630 */
1631
1632 if ((entry->vme_next != vm_map_to_entry(map)) &&
1633 (entry->vme_next->vme_start < end))
1634 RETURN(KERN_NO_SPACE);
1635 }
1636
1637 /*
1638 * At this point,
1639 * "start" and "end" should define the endpoints of the
1640 * available new range, and
1641 * "entry" should refer to the region before the new
1642 * range, and
1643 *
1644 * the map should be locked.
1645 */
1646
1647 /*
1648 * See whether we can avoid creating a new entry (and object) by
1649 * extending one of our neighbors. [So far, we only attempt to
1650 * extend from below.] Note that we can never extend/join
1651 * purgable objects because they need to remain distinct
1652 * entities in order to implement their "volatile object"
1653 * semantics.
1654 */
1655
1656 if (purgable) {
1657 if (object == VM_OBJECT_NULL) {
1658 object = vm_object_allocate(size);
1659 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1660 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
1661 offset = (vm_object_offset_t)0;
1662 }
1663 } else if ((object == VM_OBJECT_NULL) &&
1664 (entry != vm_map_to_entry(map)) &&
1665 (entry->vme_end == start) &&
1666 (!entry->is_shared) &&
1667 (!entry->is_sub_map) &&
1668 (entry->alias == alias) &&
1669 (entry->inheritance == inheritance) &&
1670 (entry->protection == cur_protection) &&
1671 (entry->max_protection == max_protection) &&
1672 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1673 (entry->in_transition == 0) &&
1674 ((alias == VM_MEMORY_REALLOC) || ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1675 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1676 if (vm_object_coalesce(entry->object.vm_object,
1677 VM_OBJECT_NULL,
1678 entry->offset,
1679 (vm_object_offset_t) 0,
1680 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1681 (vm_map_size_t)(end - entry->vme_end))) {
1682
1683 /*
1684 * Coalesced the two objects - can extend
1685 * the previous map entry to include the
1686 * new range.
1687 */
1688 map->size += (end - entry->vme_end);
1689 entry->vme_end = end;
1690 UPDATE_FIRST_FREE(map, map->first_free);
1691 RETURN(KERN_SUCCESS);
1692 }
1693 }
1694
1695 /*
1696 * Create a new entry
1697 * LP64todo - for now, we can only allocate 4GB internal objects
1698 * because the default pager can't page bigger ones. Remove this
1699 * when it can.
1700 *
1701 * XXX FBDP
1702 * The reserved "page zero" in each process's address space can
1703 * be arbitrarily large. Splitting it into separate 4GB objects and
1704 * therefore different VM map entries serves no purpose and just
1705 * slows down operations on the VM map, so let's not split the
1706 * allocation into 4GB chunks if the max protection is NONE. That
1707 * memory should never be accessible, so it will never get to the
1708 * default pager.
1709 */
1710 tmp_start = start;
1711 if (object == VM_OBJECT_NULL &&
1712 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1713 max_protection != VM_PROT_NONE)
1714 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1715 else
1716 tmp_end = end;
1717 do {
1718 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1719 object, offset, needs_copy, FALSE, FALSE,
1720 cur_protection, max_protection,
1721 VM_BEHAVIOR_DEFAULT, inheritance, 0);
1722 new_entry->alias = alias;
1723 entry = new_entry;
1724 } while (tmp_end != end &&
1725 (tmp_start = tmp_end) &&
1726 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1727 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1728
1729 vm_map_unlock(map);
1730 map_locked = FALSE;
1731
1732 new_mapping_established = TRUE;
1733
1734 /* Wire down the new entry if the user
1735 * requested all new map entries be wired.
1736 */
1737 if (map->wiring_required) {
1738 pmap_empty = FALSE; /* pmap won't be empty */
1739 result = vm_map_wire(map, start, end,
1740 new_entry->protection, TRUE);
1741 RETURN(result);
1742 }
1743
1744 if ((object != VM_OBJECT_NULL) &&
1745 (vm_map_pmap_enter_enable) &&
1746 (!anywhere) &&
1747 (!needs_copy) &&
1748 (size < (128*1024))) {
1749 pmap_empty = FALSE; /* pmap won't be empty */
1750
1751 #ifdef STACK_ONLY_NX
1752 if (alias != VM_MEMORY_STACK && cur_protection)
1753 cur_protection |= VM_PROT_EXECUTE;
1754 #endif
1755 vm_map_pmap_enter(map, start, end,
1756 object, offset, cur_protection);
1757 }
1758
1759 BailOut: ;
1760 if (result == KERN_SUCCESS &&
1761 pmap_empty &&
1762 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
1763 assert(vm_map_pmap_is_empty(map, *address, *address+size));
1764 }
1765
1766 if (result != KERN_SUCCESS) {
1767 if (new_mapping_established) {
1768 /*
1769 * We have to get rid of the new mappings since we
1770 * won't make them available to the user.
1771 * Try and do that atomically, to minimize the risk
1772 * that someone else create new mappings that range.
1773 */
1774 zap_new_map = vm_map_create(PMAP_NULL,
1775 *address,
1776 *address + size,
1777 TRUE);
1778 if (!map_locked) {
1779 vm_map_lock(map);
1780 map_locked = TRUE;
1781 }
1782 (void) vm_map_delete(map, *address, *address+size,
1783 VM_MAP_REMOVE_SAVE_ENTRIES,
1784 zap_new_map);
1785 }
1786 if (zap_old_map != VM_MAP_NULL &&
1787 zap_old_map->hdr.nentries != 0) {
1788 vm_map_entry_t entry1, entry2;
1789
1790 /*
1791 * The new mapping failed. Attempt to restore
1792 * the old mappings, saved in the "zap_old_map".
1793 */
1794 if (!map_locked) {
1795 vm_map_lock(map);
1796 map_locked = TRUE;
1797 }
1798
1799 /* first check if the coast is still clear */
1800 start = vm_map_first_entry(zap_old_map)->vme_start;
1801 end = vm_map_last_entry(zap_old_map)->vme_end;
1802 if (vm_map_lookup_entry(map, start, &entry1) ||
1803 vm_map_lookup_entry(map, end, &entry2) ||
1804 entry1 != entry2) {
1805 /*
1806 * Part of that range has already been
1807 * re-mapped: we can't restore the old
1808 * mappings...
1809 */
1810 vm_map_enter_restore_failures++;
1811 } else {
1812 /*
1813 * Transfer the saved map entries from
1814 * "zap_old_map" to the original "map",
1815 * inserting them all after "entry1".
1816 */
1817 for (entry2 = vm_map_first_entry(zap_old_map);
1818 entry2 != vm_map_to_entry(zap_old_map);
1819 entry2 = vm_map_first_entry(zap_old_map)) {
1820 vm_map_entry_unlink(zap_old_map,
1821 entry2);
1822 vm_map_entry_link(map, entry1, entry2);
1823 entry1 = entry2;
1824 }
1825 if (map->wiring_required) {
1826 /*
1827 * XXX TODO: we should rewire the
1828 * old pages here...
1829 */
1830 }
1831 vm_map_enter_restore_successes++;
1832 }
1833 }
1834 }
1835
1836 if (map_locked) {
1837 vm_map_unlock(map);
1838 }
1839
1840 /*
1841 * Get rid of the "zap_maps" and all the map entries that
1842 * they may still contain.
1843 */
1844 if (zap_old_map != VM_MAP_NULL) {
1845 vm_map_destroy(zap_old_map);
1846 zap_old_map = VM_MAP_NULL;
1847 }
1848 if (zap_new_map != VM_MAP_NULL) {
1849 vm_map_destroy(zap_new_map);
1850 zap_new_map = VM_MAP_NULL;
1851 }
1852
1853 return result;
1854
1855 #undef RETURN
1856 }
1857
1858
1859 #if VM_CPM
1860
1861 #ifdef MACH_ASSERT
1862 extern pmap_paddr_t avail_start, avail_end;
1863 #endif
1864
1865 /*
1866 * Allocate memory in the specified map, with the caveat that
1867 * the memory is physically contiguous. This call may fail
1868 * if the system can't find sufficient contiguous memory.
1869 * This call may cause or lead to heart-stopping amounts of
1870 * paging activity.
1871 *
1872 * Memory obtained from this call should be freed in the
1873 * normal way, viz., via vm_deallocate.
1874 */
1875 kern_return_t
1876 vm_map_enter_cpm(
1877 vm_map_t map,
1878 vm_map_offset_t *addr,
1879 vm_map_size_t size,
1880 int flags)
1881 {
1882 vm_object_t cpm_obj;
1883 pmap_t pmap;
1884 vm_page_t m, pages;
1885 kern_return_t kr;
1886 vm_map_offset_t va, start, end, offset;
1887 #if MACH_ASSERT
1888 vm_map_offset_t prev_addr;
1889 #endif /* MACH_ASSERT */
1890
1891 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
1892
1893 if (!vm_allocate_cpm_enabled)
1894 return KERN_FAILURE;
1895
1896 if (size == 0) {
1897 *addr = 0;
1898 return KERN_SUCCESS;
1899 }
1900
1901 if (anywhere)
1902 *addr = vm_map_min(map);
1903 else
1904 *addr = vm_map_trunc_page(*addr);
1905 size = vm_map_round_page(size);
1906
1907 /*
1908 * LP64todo - cpm_allocate should probably allow
1909 * allocations of >4GB, but not with the current
1910 * algorithm, so just cast down the size for now.
1911 */
1912 if (size > VM_MAX_ADDRESS)
1913 return KERN_RESOURCE_SHORTAGE;
1914 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
1915 &pages, TRUE)) != KERN_SUCCESS)
1916 return kr;
1917
1918 cpm_obj = vm_object_allocate((vm_object_size_t)size);
1919 assert(cpm_obj != VM_OBJECT_NULL);
1920 assert(cpm_obj->internal);
1921 assert(cpm_obj->size == (vm_object_size_t)size);
1922 assert(cpm_obj->can_persist == FALSE);
1923 assert(cpm_obj->pager_created == FALSE);
1924 assert(cpm_obj->pageout == FALSE);
1925 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1926
1927 /*
1928 * Insert pages into object.
1929 */
1930
1931 vm_object_lock(cpm_obj);
1932 for (offset = 0; offset < size; offset += PAGE_SIZE) {
1933 m = pages;
1934 pages = NEXT_PAGE(m);
1935 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1936
1937 assert(!m->gobbled);
1938 assert(!m->wanted);
1939 assert(!m->pageout);
1940 assert(!m->tabled);
1941 /*
1942 * ENCRYPTED SWAP:
1943 * "m" is not supposed to be pageable, so it
1944 * should not be encrypted. It wouldn't be safe
1945 * to enter it in a new VM object while encrypted.
1946 */
1947 ASSERT_PAGE_DECRYPTED(m);
1948 assert(m->busy);
1949 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
1950
1951 m->busy = FALSE;
1952 vm_page_insert(m, cpm_obj, offset);
1953 }
1954 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
1955 vm_object_unlock(cpm_obj);
1956
1957 /*
1958 * Hang onto a reference on the object in case a
1959 * multi-threaded application for some reason decides
1960 * to deallocate the portion of the address space into
1961 * which we will insert this object.
1962 *
1963 * Unfortunately, we must insert the object now before
1964 * we can talk to the pmap module about which addresses
1965 * must be wired down. Hence, the race with a multi-
1966 * threaded app.
1967 */
1968 vm_object_reference(cpm_obj);
1969
1970 /*
1971 * Insert object into map.
1972 */
1973
1974 kr = vm_map_enter(
1975 map,
1976 addr,
1977 size,
1978 (vm_map_offset_t)0,
1979 flags,
1980 cpm_obj,
1981 (vm_object_offset_t)0,
1982 FALSE,
1983 VM_PROT_ALL,
1984 VM_PROT_ALL,
1985 VM_INHERIT_DEFAULT);
1986
1987 if (kr != KERN_SUCCESS) {
1988 /*
1989 * A CPM object doesn't have can_persist set,
1990 * so all we have to do is deallocate it to
1991 * free up these pages.
1992 */
1993 assert(cpm_obj->pager_created == FALSE);
1994 assert(cpm_obj->can_persist == FALSE);
1995 assert(cpm_obj->pageout == FALSE);
1996 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1997 vm_object_deallocate(cpm_obj); /* kill acquired ref */
1998 vm_object_deallocate(cpm_obj); /* kill creation ref */
1999 }
2000
2001 /*
2002 * Inform the physical mapping system that the
2003 * range of addresses may not fault, so that
2004 * page tables and such can be locked down as well.
2005 */
2006 start = *addr;
2007 end = start + size;
2008 pmap = vm_map_pmap(map);
2009 pmap_pageable(pmap, start, end, FALSE);
2010
2011 /*
2012 * Enter each page into the pmap, to avoid faults.
2013 * Note that this loop could be coded more efficiently,
2014 * if the need arose, rather than looking up each page
2015 * again.
2016 */
2017 for (offset = 0, va = start; offset < size;
2018 va += PAGE_SIZE, offset += PAGE_SIZE) {
2019 vm_object_lock(cpm_obj);
2020 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2021 vm_object_unlock(cpm_obj);
2022 assert(m != VM_PAGE_NULL);
2023 PMAP_ENTER(pmap, va, m, VM_PROT_ALL,
2024 ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK,
2025 TRUE);
2026 }
2027
2028 #if MACH_ASSERT
2029 /*
2030 * Verify ordering in address space.
2031 */
2032 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2033 vm_object_lock(cpm_obj);
2034 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2035 vm_object_unlock(cpm_obj);
2036 if (m == VM_PAGE_NULL)
2037 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2038 cpm_obj, offset);
2039 assert(m->tabled);
2040 assert(!m->busy);
2041 assert(!m->wanted);
2042 assert(!m->fictitious);
2043 assert(!m->private);
2044 assert(!m->absent);
2045 assert(!m->error);
2046 assert(!m->cleaning);
2047 assert(!m->precious);
2048 assert(!m->clustered);
2049 if (offset != 0) {
2050 if (m->phys_page != prev_addr + 1) {
2051 printf("start 0x%x end 0x%x va 0x%x\n",
2052 start, end, va);
2053 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2054 printf("m 0x%x prev_address 0x%x\n", m,
2055 prev_addr);
2056 panic("vm_allocate_cpm: pages not contig!");
2057 }
2058 }
2059 prev_addr = m->phys_page;
2060 }
2061 #endif /* MACH_ASSERT */
2062
2063 vm_object_deallocate(cpm_obj); /* kill extra ref */
2064
2065 return kr;
2066 }
2067
2068
2069 #else /* VM_CPM */
2070
2071 /*
2072 * Interface is defined in all cases, but unless the kernel
2073 * is built explicitly for this option, the interface does
2074 * nothing.
2075 */
2076
2077 kern_return_t
2078 vm_map_enter_cpm(
2079 __unused vm_map_t map,
2080 __unused vm_map_offset_t *addr,
2081 __unused vm_map_size_t size,
2082 __unused int flags)
2083 {
2084 return KERN_FAILURE;
2085 }
2086 #endif /* VM_CPM */
2087
2088 /*
2089 * vm_map_clip_start: [ internal use only ]
2090 *
2091 * Asserts that the given entry begins at or after
2092 * the specified address; if necessary,
2093 * it splits the entry into two.
2094 */
2095 #ifndef NO_NESTED_PMAP
2096 #define vm_map_clip_start(map, entry, startaddr) \
2097 MACRO_BEGIN \
2098 vm_map_t VMCS_map; \
2099 vm_map_entry_t VMCS_entry; \
2100 vm_map_offset_t VMCS_startaddr; \
2101 VMCS_map = (map); \
2102 VMCS_entry = (entry); \
2103 VMCS_startaddr = (startaddr); \
2104 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2105 if(entry->use_pmap) { \
2106 vm_map_offset_t pmap_base_addr; \
2107 \
2108 pmap_base_addr = 0xF0000000 & entry->vme_start; \
2109 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
2110 entry->use_pmap = FALSE; \
2111 } else if(entry->object.vm_object \
2112 && !entry->is_sub_map \
2113 && entry->object.vm_object->phys_contiguous) { \
2114 pmap_remove(map->pmap, \
2115 (addr64_t)(entry->vme_start), \
2116 (addr64_t)(entry->vme_end)); \
2117 } \
2118 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2119 } \
2120 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2121 MACRO_END
2122 #else /* NO_NESTED_PMAP */
2123 #define vm_map_clip_start(map, entry, startaddr) \
2124 MACRO_BEGIN \
2125 vm_map_t VMCS_map; \
2126 vm_map_entry_t VMCS_entry; \
2127 vm_map_offset_t VMCS_startaddr; \
2128 VMCS_map = (map); \
2129 VMCS_entry = (entry); \
2130 VMCS_startaddr = (startaddr); \
2131 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2132 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2133 } \
2134 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2135 MACRO_END
2136 #endif /* NO_NESTED_PMAP */
2137
2138 #define vm_map_copy_clip_start(copy, entry, startaddr) \
2139 MACRO_BEGIN \
2140 if ((startaddr) > (entry)->vme_start) \
2141 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2142 MACRO_END
2143
2144 /*
2145 * This routine is called only when it is known that
2146 * the entry must be split.
2147 */
2148 static void
2149 _vm_map_clip_start(
2150 register struct vm_map_header *map_header,
2151 register vm_map_entry_t entry,
2152 register vm_map_offset_t start)
2153 {
2154 register vm_map_entry_t new_entry;
2155
2156 /*
2157 * Split off the front portion --
2158 * note that we must insert the new
2159 * entry BEFORE this one, so that
2160 * this entry has the specified starting
2161 * address.
2162 */
2163
2164 new_entry = _vm_map_entry_create(map_header);
2165 vm_map_entry_copy_full(new_entry, entry);
2166
2167 new_entry->vme_end = start;
2168 entry->offset += (start - entry->vme_start);
2169 entry->vme_start = start;
2170
2171 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2172
2173 if (entry->is_sub_map)
2174 vm_map_reference(new_entry->object.sub_map);
2175 else
2176 vm_object_reference(new_entry->object.vm_object);
2177 }
2178
2179
2180 /*
2181 * vm_map_clip_end: [ internal use only ]
2182 *
2183 * Asserts that the given entry ends at or before
2184 * the specified address; if necessary,
2185 * it splits the entry into two.
2186 */
2187 #ifndef NO_NESTED_PMAP
2188 #define vm_map_clip_end(map, entry, endaddr) \
2189 MACRO_BEGIN \
2190 vm_map_t VMCE_map; \
2191 vm_map_entry_t VMCE_entry; \
2192 vm_map_offset_t VMCE_endaddr; \
2193 VMCE_map = (map); \
2194 VMCE_entry = (entry); \
2195 VMCE_endaddr = (endaddr); \
2196 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2197 if(entry->use_pmap) { \
2198 vm_map_offset_t pmap_base_addr; \
2199 \
2200 pmap_base_addr = 0xF0000000 & entry->vme_start; \
2201 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
2202 entry->use_pmap = FALSE; \
2203 } else if(entry->object.vm_object \
2204 && !entry->is_sub_map \
2205 && entry->object.vm_object->phys_contiguous) { \
2206 pmap_remove(map->pmap, \
2207 (addr64_t)(entry->vme_start), \
2208 (addr64_t)(entry->vme_end)); \
2209 } \
2210 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2211 } \
2212 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2213 MACRO_END
2214 #else /* NO_NESTED_PMAP */
2215 #define vm_map_clip_end(map, entry, endaddr) \
2216 MACRO_BEGIN \
2217 vm_map_t VMCE_map; \
2218 vm_map_entry_t VMCE_entry; \
2219 vm_map_offset_t VMCE_endaddr; \
2220 VMCE_map = (map); \
2221 VMCE_entry = (entry); \
2222 VMCE_endaddr = (endaddr); \
2223 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2224 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2225 } \
2226 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2227 MACRO_END
2228 #endif /* NO_NESTED_PMAP */
2229
2230
2231 #define vm_map_copy_clip_end(copy, entry, endaddr) \
2232 MACRO_BEGIN \
2233 if ((endaddr) < (entry)->vme_end) \
2234 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2235 MACRO_END
2236
2237 /*
2238 * This routine is called only when it is known that
2239 * the entry must be split.
2240 */
2241 static void
2242 _vm_map_clip_end(
2243 register struct vm_map_header *map_header,
2244 register vm_map_entry_t entry,
2245 register vm_map_offset_t end)
2246 {
2247 register vm_map_entry_t new_entry;
2248
2249 /*
2250 * Create a new entry and insert it
2251 * AFTER the specified entry
2252 */
2253
2254 new_entry = _vm_map_entry_create(map_header);
2255 vm_map_entry_copy_full(new_entry, entry);
2256
2257 new_entry->vme_start = entry->vme_end = end;
2258 new_entry->offset += (end - entry->vme_start);
2259
2260 _vm_map_entry_link(map_header, entry, new_entry);
2261
2262 if (entry->is_sub_map)
2263 vm_map_reference(new_entry->object.sub_map);
2264 else
2265 vm_object_reference(new_entry->object.vm_object);
2266 }
2267
2268
2269 /*
2270 * VM_MAP_RANGE_CHECK: [ internal use only ]
2271 *
2272 * Asserts that the starting and ending region
2273 * addresses fall within the valid range of the map.
2274 */
2275 #define VM_MAP_RANGE_CHECK(map, start, end) \
2276 { \
2277 if (start < vm_map_min(map)) \
2278 start = vm_map_min(map); \
2279 if (end > vm_map_max(map)) \
2280 end = vm_map_max(map); \
2281 if (start > end) \
2282 start = end; \
2283 }
2284
2285 /*
2286 * vm_map_range_check: [ internal use only ]
2287 *
2288 * Check that the region defined by the specified start and
2289 * end addresses are wholly contained within a single map
2290 * entry or set of adjacent map entries of the spacified map,
2291 * i.e. the specified region contains no unmapped space.
2292 * If any or all of the region is unmapped, FALSE is returned.
2293 * Otherwise, TRUE is returned and if the output argument 'entry'
2294 * is not NULL it points to the map entry containing the start
2295 * of the region.
2296 *
2297 * The map is locked for reading on entry and is left locked.
2298 */
2299 static boolean_t
2300 vm_map_range_check(
2301 register vm_map_t map,
2302 register vm_map_offset_t start,
2303 register vm_map_offset_t end,
2304 vm_map_entry_t *entry)
2305 {
2306 vm_map_entry_t cur;
2307 register vm_map_offset_t prev;
2308
2309 /*
2310 * Basic sanity checks first
2311 */
2312 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
2313 return (FALSE);
2314
2315 /*
2316 * Check first if the region starts within a valid
2317 * mapping for the map.
2318 */
2319 if (!vm_map_lookup_entry(map, start, &cur))
2320 return (FALSE);
2321
2322 /*
2323 * Optimize for the case that the region is contained
2324 * in a single map entry.
2325 */
2326 if (entry != (vm_map_entry_t *) NULL)
2327 *entry = cur;
2328 if (end <= cur->vme_end)
2329 return (TRUE);
2330
2331 /*
2332 * If the region is not wholly contained within a
2333 * single entry, walk the entries looking for holes.
2334 */
2335 prev = cur->vme_end;
2336 cur = cur->vme_next;
2337 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
2338 if (end <= cur->vme_end)
2339 return (TRUE);
2340 prev = cur->vme_end;
2341 cur = cur->vme_next;
2342 }
2343 return (FALSE);
2344 }
2345
2346 /*
2347 * vm_map_submap: [ kernel use only ]
2348 *
2349 * Mark the given range as handled by a subordinate map.
2350 *
2351 * This range must have been created with vm_map_find using
2352 * the vm_submap_object, and no other operations may have been
2353 * performed on this range prior to calling vm_map_submap.
2354 *
2355 * Only a limited number of operations can be performed
2356 * within this rage after calling vm_map_submap:
2357 * vm_fault
2358 * [Don't try vm_map_copyin!]
2359 *
2360 * To remove a submapping, one must first remove the
2361 * range from the superior map, and then destroy the
2362 * submap (if desired). [Better yet, don't try it.]
2363 */
2364 kern_return_t
2365 vm_map_submap(
2366 vm_map_t map,
2367 vm_map_offset_t start,
2368 vm_map_offset_t end,
2369 vm_map_t submap,
2370 vm_map_offset_t offset,
2371 #ifdef NO_NESTED_PMAP
2372 __unused
2373 #endif /* NO_NESTED_PMAP */
2374 boolean_t use_pmap)
2375 {
2376 vm_map_entry_t entry;
2377 register kern_return_t result = KERN_INVALID_ARGUMENT;
2378 register vm_object_t object;
2379
2380 vm_map_lock(map);
2381
2382 submap->mapped = TRUE;
2383
2384 VM_MAP_RANGE_CHECK(map, start, end);
2385
2386 if (vm_map_lookup_entry(map, start, &entry)) {
2387 vm_map_clip_start(map, entry, start);
2388 }
2389 else
2390 entry = entry->vme_next;
2391
2392 if(entry == vm_map_to_entry(map)) {
2393 vm_map_unlock(map);
2394 return KERN_INVALID_ARGUMENT;
2395 }
2396
2397 vm_map_clip_end(map, entry, end);
2398
2399 if ((entry->vme_start == start) && (entry->vme_end == end) &&
2400 (!entry->is_sub_map) &&
2401 ((object = entry->object.vm_object) == vm_submap_object) &&
2402 (object->resident_page_count == 0) &&
2403 (object->copy == VM_OBJECT_NULL) &&
2404 (object->shadow == VM_OBJECT_NULL) &&
2405 (!object->pager_created)) {
2406 entry->offset = (vm_object_offset_t)offset;
2407 entry->object.vm_object = VM_OBJECT_NULL;
2408 vm_object_deallocate(object);
2409 entry->is_sub_map = TRUE;
2410 entry->object.sub_map = submap;
2411 vm_map_reference(submap);
2412 #ifndef NO_NESTED_PMAP
2413 if ((use_pmap) && (offset == 0)) {
2414 /* nest if platform code will allow */
2415 if(submap->pmap == NULL) {
2416 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
2417 if(submap->pmap == PMAP_NULL) {
2418 vm_map_unlock(map);
2419 return(KERN_NO_SPACE);
2420 }
2421 }
2422 result = pmap_nest(map->pmap, (entry->object.sub_map)->pmap,
2423 (addr64_t)start,
2424 (addr64_t)start,
2425 (uint64_t)(end - start));
2426 if(result)
2427 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
2428 entry->use_pmap = TRUE;
2429 }
2430 #else /* NO_NESTED_PMAP */
2431 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
2432 #endif /* NO_NESTED_PMAP */
2433 result = KERN_SUCCESS;
2434 }
2435 vm_map_unlock(map);
2436
2437 return(result);
2438 }
2439
2440 /*
2441 * vm_map_protect:
2442 *
2443 * Sets the protection of the specified address
2444 * region in the target map. If "set_max" is
2445 * specified, the maximum protection is to be set;
2446 * otherwise, only the current protection is affected.
2447 */
2448 kern_return_t
2449 vm_map_protect(
2450 register vm_map_t map,
2451 register vm_map_offset_t start,
2452 register vm_map_offset_t end,
2453 register vm_prot_t new_prot,
2454 register boolean_t set_max)
2455 {
2456 register vm_map_entry_t current;
2457 register vm_map_offset_t prev;
2458 vm_map_entry_t entry;
2459 vm_prot_t new_max;
2460 boolean_t clip;
2461
2462 XPR(XPR_VM_MAP,
2463 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
2464 (integer_t)map, start, end, new_prot, set_max);
2465
2466 vm_map_lock(map);
2467
2468 /* LP64todo - remove this check when vm_map_commpage64()
2469 * no longer has to stuff in a map_entry for the commpage
2470 * above the map's max_offset.
2471 */
2472 if (start >= map->max_offset) {
2473 vm_map_unlock(map);
2474 return(KERN_INVALID_ADDRESS);
2475 }
2476
2477 /*
2478 * Lookup the entry. If it doesn't start in a valid
2479 * entry, return an error. Remember if we need to
2480 * clip the entry. We don't do it here because we don't
2481 * want to make any changes until we've scanned the
2482 * entire range below for address and protection
2483 * violations.
2484 */
2485 if (!(clip = vm_map_lookup_entry(map, start, &entry))) {
2486 vm_map_unlock(map);
2487 return(KERN_INVALID_ADDRESS);
2488 }
2489
2490 /*
2491 * Make a first pass to check for protection and address
2492 * violations.
2493 */
2494
2495 current = entry;
2496 prev = current->vme_start;
2497 while ((current != vm_map_to_entry(map)) &&
2498 (current->vme_start < end)) {
2499
2500 /*
2501 * If there is a hole, return an error.
2502 */
2503 if (current->vme_start != prev) {
2504 vm_map_unlock(map);
2505 return(KERN_INVALID_ADDRESS);
2506 }
2507
2508 new_max = current->max_protection;
2509 if(new_prot & VM_PROT_COPY) {
2510 new_max |= VM_PROT_WRITE;
2511 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
2512 vm_map_unlock(map);
2513 return(KERN_PROTECTION_FAILURE);
2514 }
2515 } else {
2516 if ((new_prot & new_max) != new_prot) {
2517 vm_map_unlock(map);
2518 return(KERN_PROTECTION_FAILURE);
2519 }
2520 }
2521
2522 prev = current->vme_end;
2523 current = current->vme_next;
2524 }
2525 if (end > prev) {
2526 vm_map_unlock(map);
2527 return(KERN_INVALID_ADDRESS);
2528 }
2529
2530 /*
2531 * Go back and fix up protections.
2532 * Clip to start here if the range starts within
2533 * the entry.
2534 */
2535
2536 current = entry;
2537 if (clip) {
2538 vm_map_clip_start(map, entry, start);
2539 }
2540 while ((current != vm_map_to_entry(map)) &&
2541 (current->vme_start < end)) {
2542
2543 vm_prot_t old_prot;
2544
2545 vm_map_clip_end(map, current, end);
2546
2547 old_prot = current->protection;
2548
2549 if(new_prot & VM_PROT_COPY) {
2550 /* caller is asking specifically to copy the */
2551 /* mapped data, this implies that max protection */
2552 /* will include write. Caller must be prepared */
2553 /* for loss of shared memory communication in the */
2554 /* target area after taking this step */
2555 current->needs_copy = TRUE;
2556 current->max_protection |= VM_PROT_WRITE;
2557 }
2558
2559 if (set_max)
2560 current->protection =
2561 (current->max_protection =
2562 new_prot & ~VM_PROT_COPY) &
2563 old_prot;
2564 else
2565 current->protection = new_prot & ~VM_PROT_COPY;
2566
2567 /*
2568 * Update physical map if necessary.
2569 * If the request is to turn off write protection,
2570 * we won't do it for real (in pmap). This is because
2571 * it would cause copy-on-write to fail. We've already
2572 * set, the new protection in the map, so if a
2573 * write-protect fault occurred, it will be fixed up
2574 * properly, COW or not.
2575 */
2576 /* the 256M hack for existing hardware limitations */
2577 if (current->protection != old_prot) {
2578 if(current->is_sub_map && current->use_pmap) {
2579 vm_map_offset_t pmap_base_addr;
2580 vm_map_offset_t pmap_end_addr;
2581 #ifdef NO_NESTED_PMAP
2582 __unused
2583 #endif /* NO_NESTED_PMAP */
2584 vm_map_entry_t local_entry;
2585
2586
2587 pmap_base_addr = 0xF0000000 & current->vme_start;
2588 pmap_end_addr = (pmap_base_addr + 0x10000000) - 1;
2589 #ifndef NO_NESTED_PMAP
2590 if(!vm_map_lookup_entry(map,
2591 pmap_base_addr, &local_entry))
2592 panic("vm_map_protect: nested pmap area is missing");
2593 while ((local_entry != vm_map_to_entry(map)) &&
2594 (local_entry->vme_start < pmap_end_addr)) {
2595 local_entry->use_pmap = FALSE;
2596 local_entry = local_entry->vme_next;
2597 }
2598 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr);
2599 #endif /* NO_NESTED_PMAP */
2600 }
2601 if (!(current->protection & VM_PROT_WRITE)) {
2602 /* Look one level in we support nested pmaps */
2603 /* from mapped submaps which are direct entries */
2604 /* in our map */
2605
2606 vm_prot_t prot;
2607
2608 prot = current->protection;
2609 #ifdef STACK_ONLY_NX
2610 if (current->alias != VM_MEMORY_STACK && prot)
2611 prot |= VM_PROT_EXECUTE;
2612 #endif
2613 if (current->is_sub_map && current->use_pmap) {
2614 pmap_protect(current->object.sub_map->pmap,
2615 current->vme_start,
2616 current->vme_end,
2617 prot);
2618 } else {
2619 pmap_protect(map->pmap, current->vme_start,
2620 current->vme_end,
2621 prot);
2622 }
2623 }
2624 }
2625 current = current->vme_next;
2626 }
2627
2628 current = entry;
2629 while ((current != vm_map_to_entry(map)) &&
2630 (current->vme_start <= end)) {
2631 vm_map_simplify_entry(map, current);
2632 current = current->vme_next;
2633 }
2634
2635 vm_map_unlock(map);
2636 return(KERN_SUCCESS);
2637 }
2638
2639 /*
2640 * vm_map_inherit:
2641 *
2642 * Sets the inheritance of the specified address
2643 * range in the target map. Inheritance
2644 * affects how the map will be shared with
2645 * child maps at the time of vm_map_fork.
2646 */
2647 kern_return_t
2648 vm_map_inherit(
2649 register vm_map_t map,
2650 register vm_map_offset_t start,
2651 register vm_map_offset_t end,
2652 register vm_inherit_t new_inheritance)
2653 {
2654 register vm_map_entry_t entry;
2655 vm_map_entry_t temp_entry;
2656
2657 vm_map_lock(map);
2658
2659 VM_MAP_RANGE_CHECK(map, start, end);
2660
2661 if (vm_map_lookup_entry(map, start, &temp_entry)) {
2662 entry = temp_entry;
2663 vm_map_clip_start(map, entry, start);
2664 }
2665 else {
2666 temp_entry = temp_entry->vme_next;
2667 entry = temp_entry;
2668 }
2669
2670 /* first check entire range for submaps which can't support the */
2671 /* given inheritance. */
2672 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2673 if(entry->is_sub_map) {
2674 if(new_inheritance == VM_INHERIT_COPY) {
2675 vm_map_unlock(map);
2676 return(KERN_INVALID_ARGUMENT);
2677 }
2678 }
2679
2680 entry = entry->vme_next;
2681 }
2682
2683 entry = temp_entry;
2684
2685 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2686 vm_map_clip_end(map, entry, end);
2687
2688 entry->inheritance = new_inheritance;
2689
2690 entry = entry->vme_next;
2691 }
2692
2693 vm_map_unlock(map);
2694 return(KERN_SUCCESS);
2695 }
2696
2697 /*
2698 * vm_map_wire:
2699 *
2700 * Sets the pageability of the specified address range in the
2701 * target map as wired. Regions specified as not pageable require
2702 * locked-down physical memory and physical page maps. The
2703 * access_type variable indicates types of accesses that must not
2704 * generate page faults. This is checked against protection of
2705 * memory being locked-down.
2706 *
2707 * The map must not be locked, but a reference must remain to the
2708 * map throughout the call.
2709 */
2710 static kern_return_t
2711 vm_map_wire_nested(
2712 register vm_map_t map,
2713 register vm_map_offset_t start,
2714 register vm_map_offset_t end,
2715 register vm_prot_t access_type,
2716 boolean_t user_wire,
2717 pmap_t map_pmap,
2718 vm_map_offset_t pmap_addr)
2719 {
2720 register vm_map_entry_t entry;
2721 struct vm_map_entry *first_entry, tmp_entry;
2722 vm_map_t real_map;
2723 register vm_map_offset_t s,e;
2724 kern_return_t rc;
2725 boolean_t need_wakeup;
2726 boolean_t main_map = FALSE;
2727 wait_interrupt_t interruptible_state;
2728 thread_t cur_thread;
2729 unsigned int last_timestamp;
2730 vm_map_size_t size;
2731
2732 vm_map_lock(map);
2733 if(map_pmap == NULL)
2734 main_map = TRUE;
2735 last_timestamp = map->timestamp;
2736
2737 VM_MAP_RANGE_CHECK(map, start, end);
2738 assert(page_aligned(start));
2739 assert(page_aligned(end));
2740 if (start == end) {
2741 /* We wired what the caller asked for, zero pages */
2742 vm_map_unlock(map);
2743 return KERN_SUCCESS;
2744 }
2745
2746 if (vm_map_lookup_entry(map, start, &first_entry)) {
2747 entry = first_entry;
2748 /* vm_map_clip_start will be done later. */
2749 } else {
2750 /* Start address is not in map */
2751 vm_map_unlock(map);
2752 return(KERN_INVALID_ADDRESS);
2753 }
2754
2755 s=start;
2756 need_wakeup = FALSE;
2757 cur_thread = current_thread();
2758 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2759 /*
2760 * If another thread is wiring/unwiring this entry then
2761 * block after informing other thread to wake us up.
2762 */
2763 if (entry->in_transition) {
2764 wait_result_t wait_result;
2765
2766 /*
2767 * We have not clipped the entry. Make sure that
2768 * the start address is in range so that the lookup
2769 * below will succeed.
2770 */
2771 s = entry->vme_start < start? start: entry->vme_start;
2772
2773 entry->needs_wakeup = TRUE;
2774
2775 /*
2776 * wake up anybody waiting on entries that we have
2777 * already wired.
2778 */
2779 if (need_wakeup) {
2780 vm_map_entry_wakeup(map);
2781 need_wakeup = FALSE;
2782 }
2783 /*
2784 * User wiring is interruptible
2785 */
2786 wait_result = vm_map_entry_wait(map,
2787 (user_wire) ? THREAD_ABORTSAFE :
2788 THREAD_UNINT);
2789 if (user_wire && wait_result == THREAD_INTERRUPTED) {
2790 /*
2791 * undo the wirings we have done so far
2792 * We do not clear the needs_wakeup flag,
2793 * because we cannot tell if we were the
2794 * only one waiting.
2795 */
2796 vm_map_unlock(map);
2797 vm_map_unwire(map, start, s, user_wire);
2798 return(KERN_FAILURE);
2799 }
2800
2801 /*
2802 * Cannot avoid a lookup here. reset timestamp.
2803 */
2804 last_timestamp = map->timestamp;
2805
2806 /*
2807 * The entry could have been clipped, look it up again.
2808 * Worse that can happen is, it may not exist anymore.
2809 */
2810 if (!vm_map_lookup_entry(map, s, &first_entry)) {
2811 if (!user_wire)
2812 panic("vm_map_wire: re-lookup failed");
2813
2814 /*
2815 * User: undo everything upto the previous
2816 * entry. let vm_map_unwire worry about
2817 * checking the validity of the range.
2818 */
2819 vm_map_unlock(map);
2820 vm_map_unwire(map, start, s, user_wire);
2821 return(KERN_FAILURE);
2822 }
2823 entry = first_entry;
2824 continue;
2825 }
2826
2827 if(entry->is_sub_map) {
2828 vm_map_offset_t sub_start;
2829 vm_map_offset_t sub_end;
2830 vm_map_offset_t local_start;
2831 vm_map_offset_t local_end;
2832 pmap_t pmap;
2833
2834 vm_map_clip_start(map, entry, start);
2835 vm_map_clip_end(map, entry, end);
2836
2837 sub_start = entry->offset;
2838 sub_end = entry->vme_end - entry->vme_start;
2839 sub_end += entry->offset;
2840
2841 local_end = entry->vme_end;
2842 if(map_pmap == NULL) {
2843 if(entry->use_pmap) {
2844 pmap = entry->object.sub_map->pmap;
2845 /* ppc implementation requires that */
2846 /* submaps pmap address ranges line */
2847 /* up with parent map */
2848 #ifdef notdef
2849 pmap_addr = sub_start;
2850 #endif
2851 pmap_addr = start;
2852 } else {
2853 pmap = map->pmap;
2854 pmap_addr = start;
2855 }
2856 if (entry->wired_count) {
2857 if (entry->wired_count
2858 >= MAX_WIRE_COUNT)
2859 panic("vm_map_wire: too many wirings");
2860
2861 if (user_wire &&
2862 entry->user_wired_count
2863 >= MAX_WIRE_COUNT) {
2864 vm_map_unlock(map);
2865 vm_map_unwire(map, start,
2866 entry->vme_start, user_wire);
2867 return(KERN_FAILURE);
2868 }
2869 if(user_wire)
2870 entry->user_wired_count++;
2871 if((!user_wire) ||
2872 (entry->user_wired_count == 0))
2873 entry->wired_count++;
2874 entry = entry->vme_next;
2875 continue;
2876
2877 } else {
2878 vm_object_t object;
2879 vm_map_offset_t offset_hi;
2880 vm_map_offset_t offset_lo;
2881 vm_object_offset_t offset;
2882 vm_prot_t prot;
2883 boolean_t wired;
2884 vm_behavior_t behavior;
2885 vm_map_entry_t local_entry;
2886 vm_map_version_t version;
2887 vm_map_t lookup_map;
2888
2889 /* call vm_map_lookup_locked to */
2890 /* cause any needs copy to be */
2891 /* evaluated */
2892 local_start = entry->vme_start;
2893 lookup_map = map;
2894 vm_map_lock_write_to_read(map);
2895 if(vm_map_lookup_locked(
2896 &lookup_map, local_start,
2897 access_type,
2898 &version, &object,
2899 &offset, &prot, &wired,
2900 &behavior, &offset_lo,
2901 &offset_hi, &real_map)) {
2902
2903 vm_map_unlock_read(lookup_map);
2904 vm_map_unwire(map, start,
2905 entry->vme_start, user_wire);
2906 return(KERN_FAILURE);
2907 }
2908 if(real_map != lookup_map)
2909 vm_map_unlock(real_map);
2910 vm_map_unlock_read(lookup_map);
2911 vm_map_lock(map);
2912 vm_object_unlock(object);
2913
2914 if (!vm_map_lookup_entry(map,
2915 local_start, &local_entry)) {
2916 vm_map_unlock(map);
2917 vm_map_unwire(map, start,
2918 entry->vme_start, user_wire);
2919 return(KERN_FAILURE);
2920 }
2921 /* did we have a change of type? */
2922 if (!local_entry->is_sub_map) {
2923 last_timestamp = map->timestamp;
2924 continue;
2925 }
2926 entry = local_entry;
2927 if (user_wire)
2928 entry->user_wired_count++;
2929 if((!user_wire) ||
2930 (entry->user_wired_count == 1))
2931 entry->wired_count++;
2932
2933 entry->in_transition = TRUE;
2934
2935 vm_map_unlock(map);
2936 rc = vm_map_wire_nested(
2937 entry->object.sub_map,
2938 sub_start, sub_end,
2939 access_type,
2940 user_wire, pmap, pmap_addr);
2941 vm_map_lock(map);
2942 }
2943 } else {
2944 local_start = entry->vme_start;
2945 if (user_wire)
2946 entry->user_wired_count++;
2947 if((!user_wire) ||
2948 (entry->user_wired_count == 1))
2949 entry->wired_count++;
2950 vm_map_unlock(map);
2951 rc = vm_map_wire_nested(entry->object.sub_map,
2952 sub_start, sub_end,
2953 access_type,
2954 user_wire, map_pmap, pmap_addr);
2955 vm_map_lock(map);
2956 }
2957 s = entry->vme_start;
2958 e = entry->vme_end;
2959
2960 /*
2961 * Find the entry again. It could have been clipped
2962 * after we unlocked the map.
2963 */
2964 if (!vm_map_lookup_entry(map, local_start,
2965 &first_entry))
2966 panic("vm_map_wire: re-lookup failed");
2967 entry = first_entry;
2968
2969 last_timestamp = map->timestamp;
2970 while ((entry != vm_map_to_entry(map)) &&
2971 (entry->vme_start < e)) {
2972 assert(entry->in_transition);
2973 entry->in_transition = FALSE;
2974 if (entry->needs_wakeup) {
2975 entry->needs_wakeup = FALSE;
2976 need_wakeup = TRUE;
2977 }
2978 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2979 if (user_wire)
2980 entry->user_wired_count--;
2981 if ((!user_wire) ||
2982 (entry->user_wired_count == 0))
2983 entry->wired_count--;
2984 }
2985 entry = entry->vme_next;
2986 }
2987 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2988 vm_map_unlock(map);
2989 if (need_wakeup)
2990 vm_map_entry_wakeup(map);
2991 /*
2992 * undo everything upto the previous entry.
2993 */
2994 (void)vm_map_unwire(map, start, s, user_wire);
2995 return rc;
2996 }
2997 continue;
2998 }
2999
3000 /*
3001 * If this entry is already wired then increment
3002 * the appropriate wire reference count.
3003 */
3004 if (entry->wired_count) {
3005 /* sanity check: wired_count is a short */
3006 if (entry->wired_count >= MAX_WIRE_COUNT)
3007 panic("vm_map_wire: too many wirings");
3008
3009 if (user_wire &&
3010 entry->user_wired_count >= MAX_WIRE_COUNT) {
3011 vm_map_unlock(map);
3012 vm_map_unwire(map, start,
3013 entry->vme_start, user_wire);
3014 return(KERN_FAILURE);
3015 }
3016 /*
3017 * entry is already wired down, get our reference
3018 * after clipping to our range.
3019 */
3020 vm_map_clip_start(map, entry, start);
3021 vm_map_clip_end(map, entry, end);
3022 if (user_wire)
3023 entry->user_wired_count++;
3024 if ((!user_wire) || (entry->user_wired_count == 1))
3025 entry->wired_count++;
3026
3027 entry = entry->vme_next;
3028 continue;
3029 }
3030
3031 /*
3032 * Unwired entry or wire request transmitted via submap
3033 */
3034
3035
3036 /*
3037 * Perform actions of vm_map_lookup that need the write
3038 * lock on the map: create a shadow object for a
3039 * copy-on-write region, or an object for a zero-fill
3040 * region.
3041 */
3042 size = entry->vme_end - entry->vme_start;
3043 /*
3044 * If wiring a copy-on-write page, we need to copy it now
3045 * even if we're only (currently) requesting read access.
3046 * This is aggressive, but once it's wired we can't move it.
3047 */
3048 if (entry->needs_copy) {
3049 vm_object_shadow(&entry->object.vm_object,
3050 &entry->offset, size);
3051 entry->needs_copy = FALSE;
3052 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3053 entry->object.vm_object = vm_object_allocate(size);
3054 entry->offset = (vm_object_offset_t)0;
3055 }
3056
3057 vm_map_clip_start(map, entry, start);
3058 vm_map_clip_end(map, entry, end);
3059
3060 s = entry->vme_start;
3061 e = entry->vme_end;
3062
3063 /*
3064 * Check for holes and protection mismatch.
3065 * Holes: Next entry should be contiguous unless this
3066 * is the end of the region.
3067 * Protection: Access requested must be allowed, unless
3068 * wiring is by protection class
3069 */
3070 if ((((entry->vme_end < end) &&
3071 ((entry->vme_next == vm_map_to_entry(map)) ||
3072 (entry->vme_next->vme_start > entry->vme_end))) ||
3073 ((entry->protection & access_type) != access_type))) {
3074 /*
3075 * Found a hole or protection problem.
3076 * Unwire the region we wired so far.
3077 */
3078 if (start != entry->vme_start) {
3079 vm_map_unlock(map);
3080 vm_map_unwire(map, start, s, user_wire);
3081 } else {
3082 vm_map_unlock(map);
3083 }
3084 return((entry->protection&access_type) != access_type?
3085 KERN_PROTECTION_FAILURE: KERN_INVALID_ADDRESS);
3086 }
3087
3088 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3089
3090 if (user_wire)
3091 entry->user_wired_count++;
3092 if ((!user_wire) || (entry->user_wired_count == 1))
3093 entry->wired_count++;
3094
3095 entry->in_transition = TRUE;
3096
3097 /*
3098 * This entry might get split once we unlock the map.
3099 * In vm_fault_wire(), we need the current range as
3100 * defined by this entry. In order for this to work
3101 * along with a simultaneous clip operation, we make a
3102 * temporary copy of this entry and use that for the
3103 * wiring. Note that the underlying objects do not
3104 * change during a clip.
3105 */
3106 tmp_entry = *entry;
3107
3108 /*
3109 * The in_transition state guarentees that the entry
3110 * (or entries for this range, if split occured) will be
3111 * there when the map lock is acquired for the second time.
3112 */
3113 vm_map_unlock(map);
3114
3115 if (!user_wire && cur_thread != THREAD_NULL)
3116 interruptible_state = thread_interrupt_level(THREAD_UNINT);
3117 else
3118 interruptible_state = THREAD_UNINT;
3119
3120 if(map_pmap)
3121 rc = vm_fault_wire(map,
3122 &tmp_entry, map_pmap, pmap_addr);
3123 else
3124 rc = vm_fault_wire(map,
3125 &tmp_entry, map->pmap,
3126 tmp_entry.vme_start);
3127
3128 if (!user_wire && cur_thread != THREAD_NULL)
3129 thread_interrupt_level(interruptible_state);
3130
3131 vm_map_lock(map);
3132
3133 if (last_timestamp+1 != map->timestamp) {
3134 /*
3135 * Find the entry again. It could have been clipped
3136 * after we unlocked the map.
3137 */
3138 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3139 &first_entry))
3140 panic("vm_map_wire: re-lookup failed");
3141
3142 entry = first_entry;
3143 }
3144
3145 last_timestamp = map->timestamp;
3146
3147 while ((entry != vm_map_to_entry(map)) &&
3148 (entry->vme_start < tmp_entry.vme_end)) {
3149 assert(entry->in_transition);
3150 entry->in_transition = FALSE;
3151 if (entry->needs_wakeup) {
3152 entry->needs_wakeup = FALSE;
3153 need_wakeup = TRUE;
3154 }
3155 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3156 if (user_wire)
3157 entry->user_wired_count--;
3158 if ((!user_wire) ||
3159 (entry->user_wired_count == 0))
3160 entry->wired_count--;
3161 }
3162 entry = entry->vme_next;
3163 }
3164
3165 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3166 vm_map_unlock(map);
3167 if (need_wakeup)
3168 vm_map_entry_wakeup(map);
3169 /*
3170 * undo everything upto the previous entry.
3171 */
3172 (void)vm_map_unwire(map, start, s, user_wire);
3173 return rc;
3174 }
3175 } /* end while loop through map entries */
3176 vm_map_unlock(map);
3177
3178 /*
3179 * wake up anybody waiting on entries we wired.
3180 */
3181 if (need_wakeup)
3182 vm_map_entry_wakeup(map);
3183
3184 return(KERN_SUCCESS);
3185
3186 }
3187
3188 kern_return_t
3189 vm_map_wire(
3190 register vm_map_t map,
3191 register vm_map_offset_t start,
3192 register vm_map_offset_t end,
3193 register vm_prot_t access_type,
3194 boolean_t user_wire)
3195 {
3196
3197 kern_return_t kret;
3198
3199 #ifdef ppc
3200 /*
3201 * the calls to mapping_prealloc and mapping_relpre
3202 * (along with the VM_MAP_RANGE_CHECK to insure a
3203 * resonable range was passed in) are
3204 * currently necessary because
3205 * we haven't enabled kernel pre-emption
3206 * and/or the pmap_enter cannot purge and re-use
3207 * existing mappings
3208 */
3209 VM_MAP_RANGE_CHECK(map, start, end);
3210 mapping_prealloc(end - start);
3211 #endif
3212 kret = vm_map_wire_nested(map, start, end, access_type,
3213 user_wire, (pmap_t)NULL, 0);
3214 #ifdef ppc
3215 mapping_relpre();
3216 #endif
3217 return kret;
3218 }
3219
3220 /*
3221 * vm_map_unwire:
3222 *
3223 * Sets the pageability of the specified address range in the target
3224 * as pageable. Regions specified must have been wired previously.
3225 *
3226 * The map must not be locked, but a reference must remain to the map
3227 * throughout the call.
3228 *
3229 * Kernel will panic on failures. User unwire ignores holes and
3230 * unwired and intransition entries to avoid losing memory by leaving
3231 * it unwired.
3232 */
3233 static kern_return_t
3234 vm_map_unwire_nested(
3235 register vm_map_t map,
3236 register vm_map_offset_t start,
3237 register vm_map_offset_t end,
3238 boolean_t user_wire,
3239 pmap_t map_pmap,
3240 vm_map_offset_t pmap_addr)
3241 {
3242 register vm_map_entry_t entry;
3243 struct vm_map_entry *first_entry, tmp_entry;
3244 boolean_t need_wakeup;
3245 boolean_t main_map = FALSE;
3246 unsigned int last_timestamp;
3247
3248 vm_map_lock(map);
3249 if(map_pmap == NULL)
3250 main_map = TRUE;
3251 last_timestamp = map->timestamp;
3252
3253 VM_MAP_RANGE_CHECK(map, start, end);
3254 assert(page_aligned(start));
3255 assert(page_aligned(end));
3256
3257 if (vm_map_lookup_entry(map, start, &first_entry)) {
3258 entry = first_entry;
3259 /* vm_map_clip_start will be done later. */
3260 }
3261 else {
3262 /* Start address is not in map. */
3263 vm_map_unlock(map);
3264 return(KERN_INVALID_ADDRESS);
3265 }
3266
3267 need_wakeup = FALSE;
3268 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3269 if (entry->in_transition) {
3270 /*
3271 * 1)
3272 * Another thread is wiring down this entry. Note
3273 * that if it is not for the other thread we would
3274 * be unwiring an unwired entry. This is not
3275 * permitted. If we wait, we will be unwiring memory
3276 * we did not wire.
3277 *
3278 * 2)
3279 * Another thread is unwiring this entry. We did not
3280 * have a reference to it, because if we did, this
3281 * entry will not be getting unwired now.
3282 */
3283 if (!user_wire)
3284 panic("vm_map_unwire: in_transition entry");
3285
3286 entry = entry->vme_next;
3287 continue;
3288 }
3289
3290 if(entry->is_sub_map) {
3291 vm_map_offset_t sub_start;
3292 vm_map_offset_t sub_end;
3293 vm_map_offset_t local_end;
3294 pmap_t pmap;
3295
3296
3297 vm_map_clip_start(map, entry, start);
3298 vm_map_clip_end(map, entry, end);
3299
3300 sub_start = entry->offset;
3301 sub_end = entry->vme_end - entry->vme_start;
3302 sub_end += entry->offset;
3303 local_end = entry->vme_end;
3304 if(map_pmap == NULL) {
3305 if(entry->use_pmap) {
3306 pmap = entry->object.sub_map->pmap;
3307 pmap_addr = sub_start;
3308 } else {
3309 pmap = map->pmap;
3310 pmap_addr = start;
3311 }
3312 if (entry->wired_count == 0 ||
3313 (user_wire && entry->user_wired_count == 0)) {
3314 if (!user_wire)
3315 panic("vm_map_unwire: entry is unwired");
3316 entry = entry->vme_next;
3317 continue;
3318 }
3319
3320 /*
3321 * Check for holes
3322 * Holes: Next entry should be contiguous unless
3323 * this is the end of the region.
3324 */
3325 if (((entry->vme_end < end) &&
3326 ((entry->vme_next == vm_map_to_entry(map)) ||
3327 (entry->vme_next->vme_start
3328 > entry->vme_end)))) {
3329 if (!user_wire)
3330 panic("vm_map_unwire: non-contiguous region");
3331 /*
3332 entry = entry->vme_next;
3333 continue;
3334 */
3335 }
3336
3337 if (!user_wire || (--entry->user_wired_count == 0))
3338 entry->wired_count--;
3339
3340 if (entry->wired_count != 0) {
3341 entry = entry->vme_next;
3342 continue;
3343 }
3344
3345 entry->in_transition = TRUE;
3346 tmp_entry = *entry;/* see comment in vm_map_wire() */
3347
3348 /*
3349 * We can unlock the map now. The in_transition state
3350 * guarantees existance of the entry.
3351 */
3352 vm_map_unlock(map);
3353 vm_map_unwire_nested(entry->object.sub_map,
3354 sub_start, sub_end, user_wire, pmap, pmap_addr);
3355 vm_map_lock(map);
3356
3357 if (last_timestamp+1 != map->timestamp) {
3358 /*
3359 * Find the entry again. It could have been
3360 * clipped or deleted after we unlocked the map.
3361 */
3362 if (!vm_map_lookup_entry(map,
3363 tmp_entry.vme_start,
3364 &first_entry)) {
3365 if (!user_wire)
3366 panic("vm_map_unwire: re-lookup failed");
3367 entry = first_entry->vme_next;
3368 } else
3369 entry = first_entry;
3370 }
3371 last_timestamp = map->timestamp;
3372
3373 /*
3374 * clear transition bit for all constituent entries
3375 * that were in the original entry (saved in
3376 * tmp_entry). Also check for waiters.
3377 */
3378 while ((entry != vm_map_to_entry(map)) &&
3379 (entry->vme_start < tmp_entry.vme_end)) {
3380 assert(entry->in_transition);
3381 entry->in_transition = FALSE;
3382 if (entry->needs_wakeup) {
3383 entry->needs_wakeup = FALSE;
3384 need_wakeup = TRUE;
3385 }
3386 entry = entry->vme_next;
3387 }
3388 continue;
3389 } else {
3390 vm_map_unlock(map);
3391 vm_map_unwire_nested(entry->object.sub_map,
3392 sub_start, sub_end, user_wire, map_pmap,
3393 pmap_addr);
3394 vm_map_lock(map);
3395
3396 if (last_timestamp+1 != map->timestamp) {
3397 /*
3398 * Find the entry again. It could have been
3399 * clipped or deleted after we unlocked the map.
3400 */
3401 if (!vm_map_lookup_entry(map,
3402 tmp_entry.vme_start,
3403 &first_entry)) {
3404 if (!user_wire)
3405 panic("vm_map_unwire: re-lookup failed");
3406 entry = first_entry->vme_next;
3407 } else
3408 entry = first_entry;
3409 }
3410 last_timestamp = map->timestamp;
3411 }
3412 }
3413
3414
3415 if ((entry->wired_count == 0) ||
3416 (user_wire && entry->user_wired_count == 0)) {
3417 if (!user_wire)
3418 panic("vm_map_unwire: entry is unwired");
3419
3420 entry = entry->vme_next;
3421 continue;
3422 }
3423
3424 assert(entry->wired_count > 0 &&
3425 (!user_wire || entry->user_wired_count > 0));
3426
3427 vm_map_clip_start(map, entry, start);
3428 vm_map_clip_end(map, entry, end);
3429
3430 /*
3431 * Check for holes
3432 * Holes: Next entry should be contiguous unless
3433 * this is the end of the region.
3434 */
3435 if (((entry->vme_end < end) &&
3436 ((entry->vme_next == vm_map_to_entry(map)) ||
3437 (entry->vme_next->vme_start > entry->vme_end)))) {
3438
3439 if (!user_wire)
3440 panic("vm_map_unwire: non-contiguous region");
3441 entry = entry->vme_next;
3442 continue;
3443 }
3444
3445 if (!user_wire || (--entry->user_wired_count == 0))
3446 entry->wired_count--;
3447
3448 if (entry->wired_count != 0) {
3449 entry = entry->vme_next;
3450 continue;
3451 }
3452
3453 entry->in_transition = TRUE;
3454 tmp_entry = *entry; /* see comment in vm_map_wire() */
3455
3456 /*
3457 * We can unlock the map now. The in_transition state
3458 * guarantees existance of the entry.
3459 */
3460 vm_map_unlock(map);
3461 if(map_pmap) {
3462 vm_fault_unwire(map,
3463 &tmp_entry, FALSE, map_pmap, pmap_addr);
3464 } else {
3465 vm_fault_unwire(map,
3466 &tmp_entry, FALSE, map->pmap,
3467 tmp_entry.vme_start);
3468 }
3469 vm_map_lock(map);
3470
3471 if (last_timestamp+1 != map->timestamp) {
3472 /*
3473 * Find the entry again. It could have been clipped
3474 * or deleted after we unlocked the map.
3475 */
3476 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3477 &first_entry)) {
3478 if (!user_wire)
3479 panic("vm_map_unwire: re-lookup failed");
3480 entry = first_entry->vme_next;
3481 } else
3482 entry = first_entry;
3483 }
3484 last_timestamp = map->timestamp;
3485
3486 /*
3487 * clear transition bit for all constituent entries that
3488 * were in the original entry (saved in tmp_entry). Also
3489 * check for waiters.
3490 */
3491 while ((entry != vm_map_to_entry(map)) &&
3492 (entry->vme_start < tmp_entry.vme_end)) {
3493 assert(entry->in_transition);
3494 entry->in_transition = FALSE;
3495 if (entry->needs_wakeup) {
3496 entry->needs_wakeup = FALSE;
3497 need_wakeup = TRUE;
3498 }
3499 entry = entry->vme_next;
3500 }
3501 }
3502
3503 /*
3504 * We might have fragmented the address space when we wired this
3505 * range of addresses. Attempt to re-coalesce these VM map entries
3506 * with their neighbors now that they're no longer wired.
3507 * Under some circumstances, address space fragmentation can
3508 * prevent VM object shadow chain collapsing, which can cause
3509 * swap space leaks.
3510 */
3511 vm_map_simplify_range(map, start, end);
3512
3513 vm_map_unlock(map);
3514 /*
3515 * wake up anybody waiting on entries that we have unwired.
3516 */
3517 if (need_wakeup)
3518 vm_map_entry_wakeup(map);
3519 return(KERN_SUCCESS);
3520
3521 }
3522
3523 kern_return_t
3524 vm_map_unwire(
3525 register vm_map_t map,
3526 register vm_map_offset_t start,
3527 register vm_map_offset_t end,
3528 boolean_t user_wire)
3529 {
3530 return vm_map_unwire_nested(map, start, end,
3531 user_wire, (pmap_t)NULL, 0);
3532 }
3533
3534
3535 /*
3536 * vm_map_entry_delete: [ internal use only ]
3537 *
3538 * Deallocate the given entry from the target map.
3539 */
3540 static void
3541 vm_map_entry_delete(
3542 register vm_map_t map,
3543 register vm_map_entry_t entry)
3544 {
3545 register vm_map_offset_t s, e;
3546 register vm_object_t object;
3547 register vm_map_t submap;
3548
3549 s = entry->vme_start;
3550 e = entry->vme_end;
3551 assert(page_aligned(s));
3552 assert(page_aligned(e));
3553 assert(entry->wired_count == 0);
3554 assert(entry->user_wired_count == 0);
3555
3556 if (entry->is_sub_map) {
3557 object = NULL;
3558 submap = entry->object.sub_map;
3559 } else {
3560 submap = NULL;
3561 object = entry->object.vm_object;
3562 }
3563
3564 vm_map_entry_unlink(map, entry);
3565 map->size -= e - s;
3566
3567 vm_map_entry_dispose(map, entry);
3568
3569 vm_map_unlock(map);
3570 /*
3571 * Deallocate the object only after removing all
3572 * pmap entries pointing to its pages.
3573 */
3574 if (submap)
3575 vm_map_deallocate(submap);
3576 else
3577 vm_object_deallocate(object);
3578
3579 }
3580
3581
3582 void
3583 vm_map_submap_pmap_clean(
3584 vm_map_t map,
3585 vm_map_offset_t start,
3586 vm_map_offset_t end,
3587 vm_map_t sub_map,
3588 vm_map_offset_t offset)
3589 {
3590 vm_map_offset_t submap_start;
3591 vm_map_offset_t submap_end;
3592 vm_map_size_t remove_size;
3593 vm_map_entry_t entry;
3594
3595 submap_end = offset + (end - start);
3596 submap_start = offset;
3597 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
3598
3599 remove_size = (entry->vme_end - entry->vme_start);
3600 if(offset > entry->vme_start)
3601 remove_size -= offset - entry->vme_start;
3602
3603
3604 if(submap_end < entry->vme_end) {
3605 remove_size -=
3606 entry->vme_end - submap_end;
3607 }
3608 if(entry->is_sub_map) {
3609 vm_map_submap_pmap_clean(
3610 sub_map,
3611 start,
3612 start + remove_size,
3613 entry->object.sub_map,
3614 entry->offset);
3615 } else {
3616
3617 if((map->mapped) && (map->ref_count)
3618 && (entry->object.vm_object != NULL)) {
3619 vm_object_pmap_protect(
3620 entry->object.vm_object,
3621 entry->offset,
3622 remove_size,
3623 PMAP_NULL,
3624 entry->vme_start,
3625 VM_PROT_NONE);
3626 } else {
3627 pmap_remove(map->pmap,
3628 (addr64_t)start,
3629 (addr64_t)(start + remove_size));
3630 }
3631 }
3632 }
3633
3634 entry = entry->vme_next;
3635
3636 while((entry != vm_map_to_entry(sub_map))
3637 && (entry->vme_start < submap_end)) {
3638 remove_size = (entry->vme_end - entry->vme_start);
3639 if(submap_end < entry->vme_end) {
3640 remove_size -= entry->vme_end - submap_end;
3641 }
3642 if(entry->is_sub_map) {
3643 vm_map_submap_pmap_clean(
3644 sub_map,
3645 (start + entry->vme_start) - offset,
3646 ((start + entry->vme_start) - offset) + remove_size,
3647 entry->object.sub_map,
3648 entry->offset);
3649 } else {
3650 if((map->mapped) && (map->ref_count)
3651 && (entry->object.vm_object != NULL)) {
3652 vm_object_pmap_protect(
3653 entry->object.vm_object,
3654 entry->offset,
3655 remove_size,
3656 PMAP_NULL,
3657 entry->vme_start,
3658 VM_PROT_NONE);
3659 } else {
3660 pmap_remove(map->pmap,
3661 (addr64_t)((start + entry->vme_start)
3662 - offset),
3663 (addr64_t)(((start + entry->vme_start)
3664 - offset) + remove_size));
3665 }
3666 }
3667 entry = entry->vme_next;
3668 }
3669 return;
3670 }
3671
3672 /*
3673 * vm_map_delete: [ internal use only ]
3674 *
3675 * Deallocates the given address range from the target map.
3676 * Removes all user wirings. Unwires one kernel wiring if
3677 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
3678 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
3679 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
3680 *
3681 * This routine is called with map locked and leaves map locked.
3682 */
3683 static kern_return_t
3684 vm_map_delete(
3685 vm_map_t map,
3686 vm_map_offset_t start,
3687 vm_map_offset_t end,
3688 int flags,
3689 vm_map_t zap_map)
3690 {
3691 vm_map_entry_t entry, next;
3692 struct vm_map_entry *first_entry, tmp_entry;
3693 register vm_map_offset_t s, e;
3694 register vm_object_t object;
3695 boolean_t need_wakeup;
3696 unsigned int last_timestamp = ~0; /* unlikely value */
3697 int interruptible;
3698
3699 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
3700 THREAD_ABORTSAFE : THREAD_UNINT;
3701
3702 /*
3703 * All our DMA I/O operations in IOKit are currently done by
3704 * wiring through the map entries of the task requesting the I/O.
3705 * Because of this, we must always wait for kernel wirings
3706 * to go away on the entries before deleting them.
3707 *
3708 * Any caller who wants to actually remove a kernel wiring
3709 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
3710 * properly remove one wiring instead of blasting through
3711 * them all.
3712 */
3713 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
3714
3715 /*
3716 * Find the start of the region, and clip it
3717 */
3718 if (vm_map_lookup_entry(map, start, &first_entry)) {
3719 entry = first_entry;
3720 vm_map_clip_start(map, entry, start);
3721
3722 /*
3723 * Fix the lookup hint now, rather than each
3724 * time through the loop.
3725 */
3726 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3727 } else {
3728 entry = first_entry->vme_next;
3729 }
3730
3731 need_wakeup = FALSE;
3732 /*
3733 * Step through all entries in this region
3734 */
3735 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3736
3737 vm_map_clip_end(map, entry, end);
3738 if (entry->in_transition) {
3739 wait_result_t wait_result;
3740
3741 /*
3742 * Another thread is wiring/unwiring this entry.
3743 * Let the other thread know we are waiting.
3744 */
3745 s = entry->vme_start;
3746 entry->needs_wakeup = TRUE;
3747
3748 /*
3749 * wake up anybody waiting on entries that we have
3750 * already unwired/deleted.
3751 */
3752 if (need_wakeup) {
3753 vm_map_entry_wakeup(map);
3754 need_wakeup = FALSE;
3755 }
3756
3757 wait_result = vm_map_entry_wait(map, interruptible);
3758
3759 if (interruptible &&
3760 wait_result == THREAD_INTERRUPTED) {
3761 /*
3762 * We do not clear the needs_wakeup flag,
3763 * since we cannot tell if we were the only one.
3764 */
3765 vm_map_unlock(map);
3766 return KERN_ABORTED;
3767 }
3768
3769 /*
3770 * The entry could have been clipped or it
3771 * may not exist anymore. Look it up again.
3772 */
3773 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3774 assert((map != kernel_map) &&
3775 (!entry->is_sub_map));
3776 /*
3777 * User: use the next entry
3778 */
3779 entry = first_entry->vme_next;
3780 } else {
3781 entry = first_entry;
3782 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3783 }
3784 last_timestamp = map->timestamp;
3785 continue;
3786 } /* end in_transition */
3787
3788 if (entry->wired_count) {
3789 /*
3790 * Remove a kernel wiring if requested or if
3791 * there are user wirings.
3792 */
3793 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
3794 (entry->user_wired_count > 0))
3795 entry->wired_count--;
3796
3797 /* remove all user wire references */
3798 entry->user_wired_count = 0;
3799
3800 if (entry->wired_count != 0) {
3801 assert((map != kernel_map) &&
3802 (!entry->is_sub_map));
3803 /*
3804 * Cannot continue. Typical case is when
3805 * a user thread has physical io pending on
3806 * on this page. Either wait for the
3807 * kernel wiring to go away or return an
3808 * error.
3809 */
3810 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
3811 wait_result_t wait_result;
3812
3813 s = entry->vme_start;
3814 entry->needs_wakeup = TRUE;
3815 wait_result = vm_map_entry_wait(map,
3816 interruptible);
3817
3818 if (interruptible &&
3819 wait_result == THREAD_INTERRUPTED) {
3820 /*
3821 * We do not clear the
3822 * needs_wakeup flag, since we
3823 * cannot tell if we were the
3824 * only one.
3825 */
3826 vm_map_unlock(map);
3827 return KERN_ABORTED;
3828 }
3829
3830 /*
3831 * The entry could have been clipped or
3832 * it may not exist anymore. Look it
3833 * up again.
3834 */
3835 if (!vm_map_lookup_entry(map, s,
3836 &first_entry)) {
3837 assert((map != kernel_map) &&
3838 (!entry->is_sub_map));
3839 /*
3840 * User: use the next entry
3841 */
3842 entry = first_entry->vme_next;
3843 } else {
3844 entry = first_entry;
3845 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3846 }
3847 last_timestamp = map->timestamp;
3848 continue;
3849 }
3850 else {
3851 return KERN_FAILURE;
3852 }
3853 }
3854
3855 entry->in_transition = TRUE;
3856 /*
3857 * copy current entry. see comment in vm_map_wire()
3858 */
3859 tmp_entry = *entry;
3860 s = entry->vme_start;
3861 e = entry->vme_end;
3862
3863 /*
3864 * We can unlock the map now. The in_transition
3865 * state guarentees existance of the entry.
3866 */
3867 vm_map_unlock(map);
3868 vm_fault_unwire(map, &tmp_entry,
3869 tmp_entry.object.vm_object == kernel_object,
3870 map->pmap, tmp_entry.vme_start);
3871 vm_map_lock(map);
3872
3873 if (last_timestamp+1 != map->timestamp) {
3874 /*
3875 * Find the entry again. It could have
3876 * been clipped after we unlocked the map.
3877 */
3878 if (!vm_map_lookup_entry(map, s, &first_entry)){
3879 assert((map != kernel_map) &&
3880 (!entry->is_sub_map));
3881 first_entry = first_entry->vme_next;
3882 } else {
3883 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3884 }
3885 } else {
3886 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3887 first_entry = entry;
3888 }
3889
3890 last_timestamp = map->timestamp;
3891
3892 entry = first_entry;
3893 while ((entry != vm_map_to_entry(map)) &&
3894 (entry->vme_start < tmp_entry.vme_end)) {
3895 assert(entry->in_transition);
3896 entry->in_transition = FALSE;
3897 if (entry->needs_wakeup) {
3898 entry->needs_wakeup = FALSE;
3899 need_wakeup = TRUE;
3900 }
3901 entry = entry->vme_next;
3902 }
3903 /*
3904 * We have unwired the entry(s). Go back and
3905 * delete them.
3906 */
3907 entry = first_entry;
3908 continue;
3909 }
3910
3911 /* entry is unwired */
3912 assert(entry->wired_count == 0);
3913 assert(entry->user_wired_count == 0);
3914
3915 if ((!entry->is_sub_map &&
3916 entry->object.vm_object != kernel_object) ||
3917 entry->is_sub_map) {
3918 if(entry->is_sub_map) {
3919 if(entry->use_pmap) {
3920 #ifndef NO_NESTED_PMAP
3921 pmap_unnest(map->pmap,
3922 (addr64_t)entry->vme_start);
3923 #endif /* NO_NESTED_PMAP */
3924 if((map->mapped) && (map->ref_count)) {
3925 /* clean up parent map/maps */
3926 vm_map_submap_pmap_clean(
3927 map, entry->vme_start,
3928 entry->vme_end,
3929 entry->object.sub_map,
3930 entry->offset);
3931 }
3932 } else {
3933 vm_map_submap_pmap_clean(
3934 map, entry->vme_start, entry->vme_end,
3935 entry->object.sub_map,
3936 entry->offset);
3937 }
3938 } else {
3939 object = entry->object.vm_object;
3940 if((map->mapped) && (map->ref_count)) {
3941 vm_object_pmap_protect(
3942 object, entry->offset,
3943 entry->vme_end - entry->vme_start,
3944 PMAP_NULL,
3945 entry->vme_start,
3946 VM_PROT_NONE);
3947 } else {
3948 pmap_remove(map->pmap,
3949 (addr64_t)entry->vme_start,
3950 (addr64_t)entry->vme_end);
3951 }
3952 }
3953 }
3954
3955 /*
3956 * All pmap mappings for this map entry must have been
3957 * cleared by now.
3958 */
3959 assert(vm_map_pmap_is_empty(map,
3960 entry->vme_start,
3961 entry->vme_end));
3962
3963 next = entry->vme_next;
3964 s = next->vme_start;
3965 last_timestamp = map->timestamp;
3966
3967 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
3968 zap_map != VM_MAP_NULL) {
3969 /*
3970 * The caller wants to save the affected VM map entries
3971 * into the "zap_map". The caller will take care of
3972 * these entries.
3973 */
3974 /* unlink the entry from "map" ... */
3975 vm_map_entry_unlink(map, entry);
3976 /* ... and add it to the end of the "zap_map" */
3977 vm_map_entry_link(zap_map,
3978 vm_map_last_entry(zap_map),
3979 entry);
3980 } else {
3981 vm_map_entry_delete(map, entry);
3982 /* vm_map_entry_delete unlocks the map */
3983 vm_map_lock(map);
3984 }
3985
3986 entry = next;
3987
3988 if(entry == vm_map_to_entry(map)) {
3989 break;
3990 }
3991 if (last_timestamp+1 != map->timestamp) {
3992 /*
3993 * we are responsible for deleting everything
3994 * from the give space, if someone has interfered
3995 * we pick up where we left off, back fills should
3996 * be all right for anyone except map_delete and
3997 * we have to assume that the task has been fully
3998 * disabled before we get here
3999 */
4000 if (!vm_map_lookup_entry(map, s, &entry)){
4001 entry = entry->vme_next;
4002 } else {
4003 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4004 }
4005 /*
4006 * others can not only allocate behind us, we can
4007 * also see coalesce while we don't have the map lock
4008 */
4009 if(entry == vm_map_to_entry(map)) {
4010 break;
4011 }
4012 vm_map_clip_start(map, entry, s);
4013 }
4014 last_timestamp = map->timestamp;
4015 }
4016
4017 if (map->wait_for_space)
4018 thread_wakeup((event_t) map);
4019 /*
4020 * wake up anybody waiting on entries that we have already deleted.
4021 */
4022 if (need_wakeup)
4023 vm_map_entry_wakeup(map);
4024
4025 return KERN_SUCCESS;
4026 }
4027
4028
4029 /*
4030 * vm_map_remove:
4031 *
4032 * Remove the given address range from the target map.
4033 * This is the exported form of vm_map_delete.
4034 */
4035 kern_return_t
4036 vm_map_remove(
4037 register vm_map_t map,
4038 register vm_map_offset_t start,
4039 register vm_map_offset_t end,
4040 register boolean_t flags)
4041 {
4042 register kern_return_t result;
4043
4044 vm_map_lock(map);
4045 VM_MAP_RANGE_CHECK(map, start, end);
4046 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4047 vm_map_unlock(map);
4048
4049 return(result);
4050 }
4051
4052
4053 /*
4054 * Routine: vm_map_copy_discard
4055 *
4056 * Description:
4057 * Dispose of a map copy object (returned by
4058 * vm_map_copyin).
4059 */
4060 void
4061 vm_map_copy_discard(
4062 vm_map_copy_t copy)
4063 {
4064 TR_DECL("vm_map_copy_discard");
4065
4066 /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4067
4068 if (copy == VM_MAP_COPY_NULL)
4069 return;
4070
4071 switch (copy->type) {
4072 case VM_MAP_COPY_ENTRY_LIST:
4073 while (vm_map_copy_first_entry(copy) !=
4074 vm_map_copy_to_entry(copy)) {
4075 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4076
4077 vm_map_copy_entry_unlink(copy, entry);
4078 vm_object_deallocate(entry->object.vm_object);
4079 vm_map_copy_entry_dispose(copy, entry);
4080 }
4081 break;
4082 case VM_MAP_COPY_OBJECT:
4083 vm_object_deallocate(copy->cpy_object);
4084 break;
4085 case VM_MAP_COPY_KERNEL_BUFFER:
4086
4087 /*
4088 * The vm_map_copy_t and possibly the data buffer were
4089 * allocated by a single call to kalloc(), i.e. the
4090 * vm_map_copy_t was not allocated out of the zone.
4091 */
4092 kfree(copy, copy->cpy_kalloc_size);
4093 return;
4094 }
4095 zfree(vm_map_copy_zone, copy);
4096 }
4097
4098 /*
4099 * Routine: vm_map_copy_copy
4100 *
4101 * Description:
4102 * Move the information in a map copy object to
4103 * a new map copy object, leaving the old one
4104 * empty.
4105 *
4106 * This is used by kernel routines that need
4107 * to look at out-of-line data (in copyin form)
4108 * before deciding whether to return SUCCESS.
4109 * If the routine returns FAILURE, the original
4110 * copy object will be deallocated; therefore,
4111 * these routines must make a copy of the copy
4112 * object and leave the original empty so that
4113 * deallocation will not fail.
4114 */
4115 vm_map_copy_t
4116 vm_map_copy_copy(
4117 vm_map_copy_t copy)
4118 {
4119 vm_map_copy_t new_copy;
4120
4121 if (copy == VM_MAP_COPY_NULL)
4122 return VM_MAP_COPY_NULL;
4123
4124 /*
4125 * Allocate a new copy object, and copy the information
4126 * from the old one into it.
4127 */
4128
4129 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4130 *new_copy = *copy;
4131
4132 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
4133 /*
4134 * The links in the entry chain must be
4135 * changed to point to the new copy object.
4136 */
4137 vm_map_copy_first_entry(copy)->vme_prev
4138 = vm_map_copy_to_entry(new_copy);
4139 vm_map_copy_last_entry(copy)->vme_next
4140 = vm_map_copy_to_entry(new_copy);
4141 }
4142
4143 /*
4144 * Change the old copy object into one that contains
4145 * nothing to be deallocated.
4146 */
4147 copy->type = VM_MAP_COPY_OBJECT;
4148 copy->cpy_object = VM_OBJECT_NULL;
4149
4150 /*
4151 * Return the new object.
4152 */
4153 return new_copy;
4154 }
4155
4156 static kern_return_t
4157 vm_map_overwrite_submap_recurse(
4158 vm_map_t dst_map,
4159 vm_map_offset_t dst_addr,
4160 vm_map_size_t dst_size)
4161 {
4162 vm_map_offset_t dst_end;
4163 vm_map_entry_t tmp_entry;
4164 vm_map_entry_t entry;
4165 kern_return_t result;
4166 boolean_t encountered_sub_map = FALSE;
4167
4168
4169
4170 /*
4171 * Verify that the destination is all writeable
4172 * initially. We have to trunc the destination
4173 * address and round the copy size or we'll end up
4174 * splitting entries in strange ways.
4175 */
4176
4177 dst_end = vm_map_round_page(dst_addr + dst_size);
4178 vm_map_lock(dst_map);
4179
4180 start_pass_1:
4181 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4182 vm_map_unlock(dst_map);
4183 return(KERN_INVALID_ADDRESS);
4184 }
4185
4186 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4187
4188 for (entry = tmp_entry;;) {
4189 vm_map_entry_t next;
4190
4191 next = entry->vme_next;
4192 while(entry->is_sub_map) {
4193 vm_map_offset_t sub_start;
4194 vm_map_offset_t sub_end;
4195 vm_map_offset_t local_end;
4196
4197 if (entry->in_transition) {
4198 /*
4199 * Say that we are waiting, and wait for entry.
4200 */
4201 entry->needs_wakeup = TRUE;
4202 vm_map_entry_wait(dst_map, THREAD_UNINT);
4203
4204 goto start_pass_1;
4205 }
4206
4207 encountered_sub_map = TRUE;
4208 sub_start = entry->offset;
4209
4210 if(entry->vme_end < dst_end)
4211 sub_end = entry->vme_end;
4212 else
4213 sub_end = dst_end;
4214 sub_end -= entry->vme_start;
4215 sub_end += entry->offset;
4216 local_end = entry->vme_end;
4217 vm_map_unlock(dst_map);
4218
4219 result = vm_map_overwrite_submap_recurse(
4220 entry->object.sub_map,
4221 sub_start,
4222 sub_end - sub_start);
4223
4224 if(result != KERN_SUCCESS)
4225 return result;
4226 if (dst_end <= entry->vme_end)
4227 return KERN_SUCCESS;
4228 vm_map_lock(dst_map);
4229 if(!vm_map_lookup_entry(dst_map, local_end,
4230 &tmp_entry)) {
4231 vm_map_unlock(dst_map);
4232 return(KERN_INVALID_ADDRESS);
4233 }
4234 entry = tmp_entry;
4235 next = entry->vme_next;
4236 }
4237
4238 if ( ! (entry->protection & VM_PROT_WRITE)) {
4239 vm_map_unlock(dst_map);
4240 return(KERN_PROTECTION_FAILURE);
4241 }
4242
4243 /*
4244 * If the entry is in transition, we must wait
4245 * for it to exit that state. Anything could happen
4246 * when we unlock the map, so start over.
4247 */
4248 if (entry->in_transition) {
4249
4250 /*
4251 * Say that we are waiting, and wait for entry.
4252 */
4253 entry->needs_wakeup = TRUE;
4254 vm_map_entry_wait(dst_map, THREAD_UNINT);
4255
4256 goto start_pass_1;
4257 }
4258
4259 /*
4260 * our range is contained completely within this map entry
4261 */
4262 if (dst_end <= entry->vme_end) {
4263 vm_map_unlock(dst_map);
4264 return KERN_SUCCESS;
4265 }
4266 /*
4267 * check that range specified is contiguous region
4268 */
4269 if ((next == vm_map_to_entry(dst_map)) ||
4270 (next->vme_start != entry->vme_end)) {
4271 vm_map_unlock(dst_map);
4272 return(KERN_INVALID_ADDRESS);
4273 }
4274
4275 /*
4276 * Check for permanent objects in the destination.
4277 */
4278 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4279 ((!entry->object.vm_object->internal) ||
4280 (entry->object.vm_object->true_share))) {
4281 if(encountered_sub_map) {
4282 vm_map_unlock(dst_map);
4283 return(KERN_FAILURE);
4284 }
4285 }
4286
4287
4288 entry = next;
4289 }/* for */
4290 vm_map_unlock(dst_map);
4291 return(KERN_SUCCESS);
4292 }
4293
4294 /*
4295 * Routine: vm_map_copy_overwrite
4296 *
4297 * Description:
4298 * Copy the memory described by the map copy
4299 * object (copy; returned by vm_map_copyin) onto
4300 * the specified destination region (dst_map, dst_addr).
4301 * The destination must be writeable.
4302 *
4303 * Unlike vm_map_copyout, this routine actually
4304 * writes over previously-mapped memory. If the
4305 * previous mapping was to a permanent (user-supplied)
4306 * memory object, it is preserved.
4307 *
4308 * The attributes (protection and inheritance) of the
4309 * destination region are preserved.
4310 *
4311 * If successful, consumes the copy object.
4312 * Otherwise, the caller is responsible for it.
4313 *
4314 * Implementation notes:
4315 * To overwrite aligned temporary virtual memory, it is
4316 * sufficient to remove the previous mapping and insert
4317 * the new copy. This replacement is done either on
4318 * the whole region (if no permanent virtual memory
4319 * objects are embedded in the destination region) or
4320 * in individual map entries.
4321 *
4322 * To overwrite permanent virtual memory , it is necessary
4323 * to copy each page, as the external memory management
4324 * interface currently does not provide any optimizations.
4325 *
4326 * Unaligned memory also has to be copied. It is possible
4327 * to use 'vm_trickery' to copy the aligned data. This is
4328 * not done but not hard to implement.
4329 *
4330 * Once a page of permanent memory has been overwritten,
4331 * it is impossible to interrupt this function; otherwise,
4332 * the call would be neither atomic nor location-independent.
4333 * The kernel-state portion of a user thread must be
4334 * interruptible.
4335 *
4336 * It may be expensive to forward all requests that might
4337 * overwrite permanent memory (vm_write, vm_copy) to
4338 * uninterruptible kernel threads. This routine may be
4339 * called by interruptible threads; however, success is
4340 * not guaranteed -- if the request cannot be performed
4341 * atomically and interruptibly, an error indication is
4342 * returned.
4343 */
4344
4345 static kern_return_t
4346 vm_map_copy_overwrite_nested(
4347 vm_map_t dst_map,
4348 vm_map_address_t dst_addr,
4349 vm_map_copy_t copy,
4350 boolean_t interruptible,
4351 pmap_t pmap)
4352 {
4353 vm_map_offset_t dst_end;
4354 vm_map_entry_t tmp_entry;
4355 vm_map_entry_t entry;
4356 kern_return_t kr;
4357 boolean_t aligned = TRUE;
4358 boolean_t contains_permanent_objects = FALSE;
4359 boolean_t encountered_sub_map = FALSE;
4360 vm_map_offset_t base_addr;
4361 vm_map_size_t copy_size;
4362 vm_map_size_t total_size;
4363
4364
4365 /*
4366 * Check for null copy object.
4367 */
4368
4369 if (copy == VM_MAP_COPY_NULL)
4370 return(KERN_SUCCESS);
4371
4372 /*
4373 * Check for special kernel buffer allocated
4374 * by new_ipc_kmsg_copyin.
4375 */
4376
4377 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
4378 return(vm_map_copyout_kernel_buffer(
4379 dst_map, &dst_addr,
4380 copy, TRUE));
4381 }
4382
4383 /*
4384 * Only works for entry lists at the moment. Will
4385 * support page lists later.
4386 */
4387
4388 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
4389
4390 if (copy->size == 0) {
4391 vm_map_copy_discard(copy);
4392 return(KERN_SUCCESS);
4393 }
4394
4395 /*
4396 * Verify that the destination is all writeable
4397 * initially. We have to trunc the destination
4398 * address and round the copy size or we'll end up
4399 * splitting entries in strange ways.
4400 */
4401
4402 if (!page_aligned(copy->size) ||
4403 !page_aligned (copy->offset) ||
4404 !page_aligned (dst_addr))
4405 {
4406 aligned = FALSE;
4407 dst_end = vm_map_round_page(dst_addr + copy->size);
4408 } else {
4409 dst_end = dst_addr + copy->size;
4410 }
4411
4412 vm_map_lock(dst_map);
4413
4414 /* LP64todo - remove this check when vm_map_commpage64()
4415 * no longer has to stuff in a map_entry for the commpage
4416 * above the map's max_offset.
4417 */
4418 if (dst_addr >= dst_map->max_offset) {
4419 vm_map_unlock(dst_map);
4420 return(KERN_INVALID_ADDRESS);
4421 }
4422
4423 start_pass_1:
4424 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4425 vm_map_unlock(dst_map);
4426 return(KERN_INVALID_ADDRESS);
4427 }
4428 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4429 for (entry = tmp_entry;;) {
4430 vm_map_entry_t next = entry->vme_next;
4431
4432 while(entry->is_sub_map) {
4433 vm_map_offset_t sub_start;
4434 vm_map_offset_t sub_end;
4435 vm_map_offset_t local_end;
4436
4437 if (entry->in_transition) {
4438
4439 /*
4440 * Say that we are waiting, and wait for entry.
4441 */
4442 entry->needs_wakeup = TRUE;
4443 vm_map_entry_wait(dst_map, THREAD_UNINT);
4444
4445 goto start_pass_1;
4446 }
4447
4448 local_end = entry->vme_end;
4449 if (!(entry->needs_copy)) {
4450 /* if needs_copy we are a COW submap */
4451 /* in such a case we just replace so */
4452 /* there is no need for the follow- */
4453 /* ing check. */
4454 encountered_sub_map = TRUE;
4455 sub_start = entry->offset;
4456
4457 if(entry->vme_end < dst_end)
4458 sub_end = entry->vme_end;
4459 else
4460 sub_end = dst_end;
4461 sub_end -= entry->vme_start;
4462 sub_end += entry->offset;
4463 vm_map_unlock(dst_map);
4464
4465 kr = vm_map_overwrite_submap_recurse(
4466 entry->object.sub_map,
4467 sub_start,
4468 sub_end - sub_start);
4469 if(kr != KERN_SUCCESS)
4470 return kr;
4471 vm_map_lock(dst_map);
4472 }
4473
4474 if (dst_end <= entry->vme_end)
4475 goto start_overwrite;
4476 if(!vm_map_lookup_entry(dst_map, local_end,
4477 &entry)) {
4478 vm_map_unlock(dst_map);
4479 return(KERN_INVALID_ADDRESS);
4480 }
4481 next = entry->vme_next;
4482 }
4483
4484 if ( ! (entry->protection & VM_PROT_WRITE)) {
4485 vm_map_unlock(dst_map);
4486 return(KERN_PROTECTION_FAILURE);
4487 }
4488
4489 /*
4490 * If the entry is in transition, we must wait
4491 * for it to exit that state. Anything could happen
4492 * when we unlock the map, so start over.
4493 */
4494 if (entry->in_transition) {
4495
4496 /*
4497 * Say that we are waiting, and wait for entry.
4498 */
4499 entry->needs_wakeup = TRUE;
4500 vm_map_entry_wait(dst_map, THREAD_UNINT);
4501
4502 goto start_pass_1;
4503 }
4504
4505 /*
4506 * our range is contained completely within this map entry
4507 */
4508 if (dst_end <= entry->vme_end)
4509 break;
4510 /*
4511 * check that range specified is contiguous region
4512 */
4513 if ((next == vm_map_to_entry(dst_map)) ||
4514 (next->vme_start != entry->vme_end)) {
4515 vm_map_unlock(dst_map);
4516 return(KERN_INVALID_ADDRESS);
4517 }
4518
4519
4520 /*
4521 * Check for permanent objects in the destination.
4522 */
4523 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4524 ((!entry->object.vm_object->internal) ||
4525 (entry->object.vm_object->true_share))) {
4526 contains_permanent_objects = TRUE;
4527 }
4528
4529 entry = next;
4530 }/* for */
4531
4532 start_overwrite:
4533 /*
4534 * If there are permanent objects in the destination, then
4535 * the copy cannot be interrupted.
4536 */
4537
4538 if (interruptible && contains_permanent_objects) {
4539 vm_map_unlock(dst_map);
4540 return(KERN_FAILURE); /* XXX */
4541 }
4542
4543 /*
4544 *
4545 * Make a second pass, overwriting the data
4546 * At the beginning of each loop iteration,
4547 * the next entry to be overwritten is "tmp_entry"
4548 * (initially, the value returned from the lookup above),
4549 * and the starting address expected in that entry
4550 * is "start".
4551 */
4552
4553 total_size = copy->size;
4554 if(encountered_sub_map) {
4555 copy_size = 0;
4556 /* re-calculate tmp_entry since we've had the map */
4557 /* unlocked */
4558 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
4559 vm_map_unlock(dst_map);
4560 return(KERN_INVALID_ADDRESS);
4561 }
4562 } else {
4563 copy_size = copy->size;
4564 }
4565
4566 base_addr = dst_addr;
4567 while(TRUE) {
4568 /* deconstruct the copy object and do in parts */
4569 /* only in sub_map, interruptable case */
4570 vm_map_entry_t copy_entry;
4571 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
4572 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
4573 int nentries;
4574 int remaining_entries = 0;
4575 int new_offset = 0;
4576
4577 for (entry = tmp_entry; copy_size == 0;) {
4578 vm_map_entry_t next;
4579
4580 next = entry->vme_next;
4581
4582 /* tmp_entry and base address are moved along */
4583 /* each time we encounter a sub-map. Otherwise */
4584 /* entry can outpase tmp_entry, and the copy_size */
4585 /* may reflect the distance between them */
4586 /* if the current entry is found to be in transition */
4587 /* we will start over at the beginning or the last */
4588 /* encounter of a submap as dictated by base_addr */
4589 /* we will zero copy_size accordingly. */
4590 if (entry->in_transition) {
4591 /*
4592 * Say that we are waiting, and wait for entry.
4593 */
4594 entry->needs_wakeup = TRUE;
4595 vm_map_entry_wait(dst_map, THREAD_UNINT);
4596
4597 if(!vm_map_lookup_entry(dst_map, base_addr,
4598 &tmp_entry)) {
4599 vm_map_unlock(dst_map);
4600 return(KERN_INVALID_ADDRESS);
4601 }
4602 copy_size = 0;
4603 entry = tmp_entry;
4604 continue;
4605 }
4606 if(entry->is_sub_map) {
4607 vm_map_offset_t sub_start;
4608 vm_map_offset_t sub_end;
4609 vm_map_offset_t local_end;
4610
4611 if (entry->needs_copy) {
4612 /* if this is a COW submap */
4613 /* just back the range with a */
4614 /* anonymous entry */
4615 if(entry->vme_end < dst_end)
4616 sub_end = entry->vme_end;
4617 else
4618 sub_end = dst_end;
4619 if(entry->vme_start < base_addr)
4620 sub_start = base_addr;
4621 else
4622 sub_start = entry->vme_start;
4623 vm_map_clip_end(
4624 dst_map, entry, sub_end);
4625 vm_map_clip_start(
4626 dst_map, entry, sub_start);
4627 entry->is_sub_map = FALSE;
4628 vm_map_deallocate(
4629 entry->object.sub_map);
4630 entry->object.sub_map = NULL;
4631 entry->is_shared = FALSE;
4632 entry->needs_copy = FALSE;
4633 entry->offset = 0;
4634 entry->protection = VM_PROT_ALL;
4635 entry->max_protection = VM_PROT_ALL;
4636 entry->wired_count = 0;
4637 entry->user_wired_count = 0;
4638 if(entry->inheritance
4639 == VM_INHERIT_SHARE)
4640 entry->inheritance = VM_INHERIT_COPY;
4641 continue;
4642 }
4643 /* first take care of any non-sub_map */
4644 /* entries to send */
4645 if(base_addr < entry->vme_start) {
4646 /* stuff to send */
4647 copy_size =
4648 entry->vme_start - base_addr;
4649 break;
4650 }
4651 sub_start = entry->offset;
4652
4653 if(entry->vme_end < dst_end)
4654 sub_end = entry->vme_end;
4655 else
4656 sub_end = dst_end;
4657 sub_end -= entry->vme_start;
4658 sub_end += entry->offset;
4659 local_end = entry->vme_end;
4660 vm_map_unlock(dst_map);
4661 copy_size = sub_end - sub_start;
4662
4663 /* adjust the copy object */
4664 if (total_size > copy_size) {
4665 vm_map_size_t local_size = 0;
4666 vm_map_size_t entry_size;
4667
4668 nentries = 1;
4669 new_offset = copy->offset;
4670 copy_entry = vm_map_copy_first_entry(copy);
4671 while(copy_entry !=
4672 vm_map_copy_to_entry(copy)){
4673 entry_size = copy_entry->vme_end -
4674 copy_entry->vme_start;
4675 if((local_size < copy_size) &&
4676 ((local_size + entry_size)
4677 >= copy_size)) {
4678 vm_map_copy_clip_end(copy,
4679 copy_entry,
4680 copy_entry->vme_start +
4681 (copy_size - local_size));
4682 entry_size = copy_entry->vme_end -
4683 copy_entry->vme_start;
4684 local_size += entry_size;
4685 new_offset += entry_size;
4686 }
4687 if(local_size >= copy_size) {
4688 next_copy = copy_entry->vme_next;
4689 copy_entry->vme_next =
4690 vm_map_copy_to_entry(copy);
4691 previous_prev =
4692 copy->cpy_hdr.links.prev;
4693 copy->cpy_hdr.links.prev = copy_entry;
4694 copy->size = copy_size;
4695 remaining_entries =
4696 copy->cpy_hdr.nentries;
4697 remaining_entries -= nentries;
4698 copy->cpy_hdr.nentries = nentries;
4699 break;
4700 } else {
4701 local_size += entry_size;
4702 new_offset += entry_size;
4703 nentries++;
4704 }
4705 copy_entry = copy_entry->vme_next;
4706 }
4707 }
4708
4709 if((entry->use_pmap) && (pmap == NULL)) {
4710 kr = vm_map_copy_overwrite_nested(
4711 entry->object.sub_map,
4712 sub_start,
4713 copy,
4714 interruptible,
4715 entry->object.sub_map->pmap);
4716 } else if (pmap != NULL) {
4717 kr = vm_map_copy_overwrite_nested(
4718 entry->object.sub_map,
4719 sub_start,
4720 copy,
4721 interruptible, pmap);
4722 } else {
4723 kr = vm_map_copy_overwrite_nested(
4724 entry->object.sub_map,
4725 sub_start,
4726 copy,
4727 interruptible,
4728 dst_map->pmap);
4729 }
4730 if(kr != KERN_SUCCESS) {
4731 if(next_copy != NULL) {
4732 copy->cpy_hdr.nentries +=
4733 remaining_entries;
4734 copy->cpy_hdr.links.prev->vme_next =
4735 next_copy;
4736 copy->cpy_hdr.links.prev
4737 = previous_prev;
4738 copy->size = total_size;
4739 }
4740 return kr;
4741 }
4742 if (dst_end <= local_end) {
4743 return(KERN_SUCCESS);
4744 }
4745 /* otherwise copy no longer exists, it was */
4746 /* destroyed after successful copy_overwrite */
4747 copy = (vm_map_copy_t)
4748 zalloc(vm_map_copy_zone);
4749 vm_map_copy_first_entry(copy) =
4750 vm_map_copy_last_entry(copy) =
4751 vm_map_copy_to_entry(copy);
4752 copy->type = VM_MAP_COPY_ENTRY_LIST;
4753 copy->offset = new_offset;
4754
4755 total_size -= copy_size;
4756 copy_size = 0;
4757 /* put back remainder of copy in container */
4758 if(next_copy != NULL) {
4759 copy->cpy_hdr.nentries = remaining_entries;
4760 copy->cpy_hdr.links.next = next_copy;
4761 copy->cpy_hdr.links.prev = previous_prev;
4762 copy->size = total_size;
4763 next_copy->vme_prev =
4764 vm_map_copy_to_entry(copy);
4765 next_copy = NULL;
4766 }
4767 base_addr = local_end;
4768 vm_map_lock(dst_map);
4769 if(!vm_map_lookup_entry(dst_map,
4770 local_end, &tmp_entry)) {
4771 vm_map_unlock(dst_map);
4772 return(KERN_INVALID_ADDRESS);
4773 }
4774 entry = tmp_entry;
4775 continue;
4776 }
4777 if (dst_end <= entry->vme_end) {
4778 copy_size = dst_end - base_addr;
4779 break;
4780 }
4781
4782 if ((next == vm_map_to_entry(dst_map)) ||
4783 (next->vme_start != entry->vme_end)) {
4784 vm_map_unlock(dst_map);
4785 return(KERN_INVALID_ADDRESS);
4786 }
4787
4788 entry = next;
4789 }/* for */
4790
4791 next_copy = NULL;
4792 nentries = 1;
4793
4794 /* adjust the copy object */
4795 if (total_size > copy_size) {
4796 vm_map_size_t local_size = 0;
4797 vm_map_size_t entry_size;
4798
4799 new_offset = copy->offset;
4800 copy_entry = vm_map_copy_first_entry(copy);
4801 while(copy_entry != vm_map_copy_to_entry(copy)) {
4802 entry_size = copy_entry->vme_end -
4803 copy_entry->vme_start;
4804 if((local_size < copy_size) &&
4805 ((local_size + entry_size)
4806 >= copy_size)) {
4807 vm_map_copy_clip_end(copy, copy_entry,
4808 copy_entry->vme_start +
4809 (copy_size - local_size));
4810 entry_size = copy_entry->vme_end -
4811 copy_entry->vme_start;
4812 local_size += entry_size;
4813 new_offset += entry_size;
4814 }
4815 if(local_size >= copy_size) {
4816 next_copy = copy_entry->vme_next;
4817 copy_entry->vme_next =
4818 vm_map_copy_to_entry(copy);
4819 previous_prev =
4820 copy->cpy_hdr.links.prev;
4821 copy->cpy_hdr.links.prev = copy_entry;
4822 copy->size = copy_size;
4823 remaining_entries =
4824 copy->cpy_hdr.nentries;
4825 remaining_entries -= nentries;
4826 copy->cpy_hdr.nentries = nentries;
4827 break;
4828 } else {
4829 local_size += entry_size;
4830 new_offset += entry_size;
4831 nentries++;
4832 }
4833 copy_entry = copy_entry->vme_next;
4834 }
4835 }
4836
4837 if (aligned) {
4838 pmap_t local_pmap;
4839
4840 if(pmap)
4841 local_pmap = pmap;
4842 else
4843 local_pmap = dst_map->pmap;
4844
4845 if ((kr = vm_map_copy_overwrite_aligned(
4846 dst_map, tmp_entry, copy,
4847 base_addr, local_pmap)) != KERN_SUCCESS) {
4848 if(next_copy != NULL) {
4849 copy->cpy_hdr.nentries +=
4850 remaining_entries;
4851 copy->cpy_hdr.links.prev->vme_next =
4852 next_copy;
4853 copy->cpy_hdr.links.prev =
4854 previous_prev;
4855 copy->size += copy_size;
4856 }
4857 return kr;
4858 }
4859 vm_map_unlock(dst_map);
4860 } else {
4861 /*
4862 * Performance gain:
4863 *
4864 * if the copy and dst address are misaligned but the same
4865 * offset within the page we can copy_not_aligned the
4866 * misaligned parts and copy aligned the rest. If they are
4867 * aligned but len is unaligned we simply need to copy
4868 * the end bit unaligned. We'll need to split the misaligned
4869 * bits of the region in this case !
4870 */
4871 /* ALWAYS UNLOCKS THE dst_map MAP */
4872 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
4873 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
4874 if(next_copy != NULL) {
4875 copy->cpy_hdr.nentries +=
4876 remaining_entries;
4877 copy->cpy_hdr.links.prev->vme_next =
4878 next_copy;
4879 copy->cpy_hdr.links.prev =
4880 previous_prev;
4881 copy->size += copy_size;
4882 }
4883 return kr;
4884 }
4885 }
4886 total_size -= copy_size;
4887 if(total_size == 0)
4888 break;
4889 base_addr += copy_size;
4890 copy_size = 0;
4891 copy->offset = new_offset;
4892 if(next_copy != NULL) {
4893 copy->cpy_hdr.nentries = remaining_entries;
4894 copy->cpy_hdr.links.next = next_copy;
4895 copy->cpy_hdr.links.prev = previous_prev;
4896 next_copy->vme_prev = vm_map_copy_to_entry(copy);
4897 copy->size = total_size;
4898 }
4899 vm_map_lock(dst_map);
4900 while(TRUE) {
4901 if (!vm_map_lookup_entry(dst_map,
4902 base_addr, &tmp_entry)) {
4903 vm_map_unlock(dst_map);
4904 return(KERN_INVALID_ADDRESS);
4905 }
4906 if (tmp_entry->in_transition) {
4907 entry->needs_wakeup = TRUE;
4908 vm_map_entry_wait(dst_map, THREAD_UNINT);
4909 } else {
4910 break;
4911 }
4912 }
4913 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
4914
4915 entry = tmp_entry;
4916 } /* while */
4917
4918 /*
4919 * Throw away the vm_map_copy object
4920 */
4921 vm_map_copy_discard(copy);
4922
4923 return(KERN_SUCCESS);
4924 }/* vm_map_copy_overwrite */
4925
4926 kern_return_t
4927 vm_map_copy_overwrite(
4928 vm_map_t dst_map,
4929 vm_map_offset_t dst_addr,
4930 vm_map_copy_t copy,
4931 boolean_t interruptible)
4932 {
4933 return vm_map_copy_overwrite_nested(
4934 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
4935 }
4936
4937
4938 /*
4939 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
4940 *
4941 * Decription:
4942 * Physically copy unaligned data
4943 *
4944 * Implementation:
4945 * Unaligned parts of pages have to be physically copied. We use
4946 * a modified form of vm_fault_copy (which understands none-aligned
4947 * page offsets and sizes) to do the copy. We attempt to copy as
4948 * much memory in one go as possibly, however vm_fault_copy copies
4949 * within 1 memory object so we have to find the smaller of "amount left"
4950 * "source object data size" and "target object data size". With
4951 * unaligned data we don't need to split regions, therefore the source
4952 * (copy) object should be one map entry, the target range may be split
4953 * over multiple map entries however. In any event we are pessimistic
4954 * about these assumptions.
4955 *
4956 * Assumptions:
4957 * dst_map is locked on entry and is return locked on success,
4958 * unlocked on error.
4959 */
4960
4961 static kern_return_t
4962 vm_map_copy_overwrite_unaligned(
4963 vm_map_t dst_map,
4964 vm_map_entry_t entry,
4965 vm_map_copy_t copy,
4966 vm_map_offset_t start)
4967 {
4968 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
4969 vm_map_version_t version;
4970 vm_object_t dst_object;
4971 vm_object_offset_t dst_offset;
4972 vm_object_offset_t src_offset;
4973 vm_object_offset_t entry_offset;
4974 vm_map_offset_t entry_end;
4975 vm_map_size_t src_size,
4976 dst_size,
4977 copy_size,
4978 amount_left;
4979 kern_return_t kr = KERN_SUCCESS;
4980
4981 vm_map_lock_write_to_read(dst_map);
4982
4983 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
4984 amount_left = copy->size;
4985 /*
4986 * unaligned so we never clipped this entry, we need the offset into
4987 * the vm_object not just the data.
4988 */
4989 while (amount_left > 0) {
4990
4991 if (entry == vm_map_to_entry(dst_map)) {
4992 vm_map_unlock_read(dst_map);
4993 return KERN_INVALID_ADDRESS;
4994 }
4995
4996 /* "start" must be within the current map entry */
4997 assert ((start>=entry->vme_start) && (start<entry->vme_end));
4998
4999 dst_offset = start - entry->vme_start;
5000
5001 dst_size = entry->vme_end - start;
5002
5003 src_size = copy_entry->vme_end -
5004 (copy_entry->vme_start + src_offset);
5005
5006 if (dst_size < src_size) {
5007 /*
5008 * we can only copy dst_size bytes before
5009 * we have to get the next destination entry
5010 */
5011 copy_size = dst_size;
5012 } else {
5013 /*
5014 * we can only copy src_size bytes before
5015 * we have to get the next source copy entry
5016 */
5017 copy_size = src_size;
5018 }
5019
5020 if (copy_size > amount_left) {
5021 copy_size = amount_left;
5022 }
5023 /*
5024 * Entry needs copy, create a shadow shadow object for
5025 * Copy on write region.
5026 */
5027 if (entry->needs_copy &&
5028 ((entry->protection & VM_PROT_WRITE) != 0))
5029 {
5030 if (vm_map_lock_read_to_write(dst_map)) {
5031 vm_map_lock_read(dst_map);
5032 goto RetryLookup;
5033 }
5034 vm_object_shadow(&entry->object.vm_object,
5035 &entry->offset,
5036 (vm_map_size_t)(entry->vme_end
5037 - entry->vme_start));
5038 entry->needs_copy = FALSE;
5039 vm_map_lock_write_to_read(dst_map);
5040 }
5041 dst_object = entry->object.vm_object;
5042 /*
5043 * unlike with the virtual (aligned) copy we're going
5044 * to fault on it therefore we need a target object.
5045 */
5046 if (dst_object == VM_OBJECT_NULL) {
5047 if (vm_map_lock_read_to_write(dst_map)) {
5048 vm_map_lock_read(dst_map);
5049 goto RetryLookup;
5050 }
5051 dst_object = vm_object_allocate((vm_map_size_t)
5052 entry->vme_end - entry->vme_start);
5053 entry->object.vm_object = dst_object;
5054 entry->offset = 0;
5055 vm_map_lock_write_to_read(dst_map);
5056 }
5057 /*
5058 * Take an object reference and unlock map. The "entry" may
5059 * disappear or change when the map is unlocked.
5060 */
5061 vm_object_reference(dst_object);
5062 version.main_timestamp = dst_map->timestamp;
5063 entry_offset = entry->offset;
5064 entry_end = entry->vme_end;
5065 vm_map_unlock_read(dst_map);
5066 /*
5067 * Copy as much as possible in one pass
5068 */
5069 kr = vm_fault_copy(
5070 copy_entry->object.vm_object,
5071 copy_entry->offset + src_offset,
5072 &copy_size,
5073 dst_object,
5074 entry_offset + dst_offset,
5075 dst_map,
5076 &version,
5077 THREAD_UNINT );
5078
5079 start += copy_size;
5080 src_offset += copy_size;
5081 amount_left -= copy_size;
5082 /*
5083 * Release the object reference
5084 */
5085 vm_object_deallocate(dst_object);
5086 /*
5087 * If a hard error occurred, return it now
5088 */
5089 if (kr != KERN_SUCCESS)
5090 return kr;
5091
5092 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
5093 || amount_left == 0)
5094 {
5095 /*
5096 * all done with this copy entry, dispose.
5097 */
5098 vm_map_copy_entry_unlink(copy, copy_entry);
5099 vm_object_deallocate(copy_entry->object.vm_object);
5100 vm_map_copy_entry_dispose(copy, copy_entry);
5101
5102 if ((copy_entry = vm_map_copy_first_entry(copy))
5103 == vm_map_copy_to_entry(copy) && amount_left) {
5104 /*
5105 * not finished copying but run out of source
5106 */
5107 return KERN_INVALID_ADDRESS;
5108 }
5109 src_offset = 0;
5110 }
5111
5112 if (amount_left == 0)
5113 return KERN_SUCCESS;
5114
5115 vm_map_lock_read(dst_map);
5116 if (version.main_timestamp == dst_map->timestamp) {
5117 if (start == entry_end) {
5118 /*
5119 * destination region is split. Use the version
5120 * information to avoid a lookup in the normal
5121 * case.
5122 */
5123 entry = entry->vme_next;
5124 /*
5125 * should be contiguous. Fail if we encounter
5126 * a hole in the destination.
5127 */
5128 if (start != entry->vme_start) {
5129 vm_map_unlock_read(dst_map);
5130 return KERN_INVALID_ADDRESS ;
5131 }
5132 }
5133 } else {
5134 /*
5135 * Map version check failed.
5136 * we must lookup the entry because somebody
5137 * might have changed the map behind our backs.
5138 */
5139 RetryLookup:
5140 if (!vm_map_lookup_entry(dst_map, start, &entry))
5141 {
5142 vm_map_unlock_read(dst_map);
5143 return KERN_INVALID_ADDRESS ;
5144 }
5145 }
5146 }/* while */
5147
5148 return KERN_SUCCESS;
5149 }/* vm_map_copy_overwrite_unaligned */
5150
5151 /*
5152 * Routine: vm_map_copy_overwrite_aligned [internal use only]
5153 *
5154 * Description:
5155 * Does all the vm_trickery possible for whole pages.
5156 *
5157 * Implementation:
5158 *
5159 * If there are no permanent objects in the destination,
5160 * and the source and destination map entry zones match,
5161 * and the destination map entry is not shared,
5162 * then the map entries can be deleted and replaced
5163 * with those from the copy. The following code is the
5164 * basic idea of what to do, but there are lots of annoying
5165 * little details about getting protection and inheritance
5166 * right. Should add protection, inheritance, and sharing checks
5167 * to the above pass and make sure that no wiring is involved.
5168 */
5169
5170 static kern_return_t
5171 vm_map_copy_overwrite_aligned(
5172 vm_map_t dst_map,
5173 vm_map_entry_t tmp_entry,
5174 vm_map_copy_t copy,
5175 vm_map_offset_t start,
5176 #if !BAD_OPTIMIZATION
5177 __unused
5178 #endif /* !BAD_OPTIMIZATION */
5179 pmap_t pmap)
5180 {
5181 vm_object_t object;
5182 vm_map_entry_t copy_entry;
5183 vm_map_size_t copy_size;
5184 vm_map_size_t size;
5185 vm_map_entry_t entry;
5186
5187 while ((copy_entry = vm_map_copy_first_entry(copy))
5188 != vm_map_copy_to_entry(copy))
5189 {
5190 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5191
5192 entry = tmp_entry;
5193 if (entry == vm_map_to_entry(dst_map)) {
5194 vm_map_unlock(dst_map);
5195 return KERN_INVALID_ADDRESS;
5196 }
5197 size = (entry->vme_end - entry->vme_start);
5198 /*
5199 * Make sure that no holes popped up in the
5200 * address map, and that the protection is
5201 * still valid, in case the map was unlocked
5202 * earlier.
5203 */
5204
5205 if ((entry->vme_start != start) || ((entry->is_sub_map)
5206 && !entry->needs_copy)) {
5207 vm_map_unlock(dst_map);
5208 return(KERN_INVALID_ADDRESS);
5209 }
5210 assert(entry != vm_map_to_entry(dst_map));
5211
5212 /*
5213 * Check protection again
5214 */
5215
5216 if ( ! (entry->protection & VM_PROT_WRITE)) {
5217 vm_map_unlock(dst_map);
5218 return(KERN_PROTECTION_FAILURE);
5219 }
5220
5221 /*
5222 * Adjust to source size first
5223 */
5224
5225 if (copy_size < size) {
5226 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
5227 size = copy_size;
5228 }
5229
5230 /*
5231 * Adjust to destination size
5232 */
5233
5234 if (size < copy_size) {
5235 vm_map_copy_clip_end(copy, copy_entry,
5236 copy_entry->vme_start + size);
5237 copy_size = size;
5238 }
5239
5240 assert((entry->vme_end - entry->vme_start) == size);
5241 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
5242 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
5243
5244 /*
5245 * If the destination contains temporary unshared memory,
5246 * we can perform the copy by throwing it away and
5247 * installing the source data.
5248 */
5249
5250 object = entry->object.vm_object;
5251 if ((!entry->is_shared &&
5252 ((object == VM_OBJECT_NULL) ||
5253 (object->internal && !object->true_share))) ||
5254 entry->needs_copy) {
5255 vm_object_t old_object = entry->object.vm_object;
5256 vm_object_offset_t old_offset = entry->offset;
5257 vm_object_offset_t offset;
5258
5259 /*
5260 * Ensure that the source and destination aren't
5261 * identical
5262 */
5263 if (old_object == copy_entry->object.vm_object &&
5264 old_offset == copy_entry->offset) {
5265 vm_map_copy_entry_unlink(copy, copy_entry);
5266 vm_map_copy_entry_dispose(copy, copy_entry);
5267
5268 if (old_object != VM_OBJECT_NULL)
5269 vm_object_deallocate(old_object);
5270
5271 start = tmp_entry->vme_end;
5272 tmp_entry = tmp_entry->vme_next;
5273 continue;
5274 }
5275
5276 if (old_object != VM_OBJECT_NULL) {
5277 if(entry->is_sub_map) {
5278 if(entry->use_pmap) {
5279 #ifndef NO_NESTED_PMAP
5280 pmap_unnest(dst_map->pmap,
5281 (addr64_t)entry->vme_start);
5282 #endif /* NO_NESTED_PMAP */
5283 if(dst_map->mapped) {
5284 /* clean up parent */
5285 /* map/maps */
5286 vm_map_submap_pmap_clean(
5287 dst_map, entry->vme_start,
5288 entry->vme_end,
5289 entry->object.sub_map,
5290 entry->offset);
5291 }
5292 } else {
5293 vm_map_submap_pmap_clean(
5294 dst_map, entry->vme_start,
5295 entry->vme_end,
5296 entry->object.sub_map,
5297 entry->offset);
5298 }
5299 vm_map_deallocate(
5300 entry->object.sub_map);
5301 } else {
5302 if(dst_map->mapped) {
5303 vm_object_pmap_protect(
5304 entry->object.vm_object,
5305 entry->offset,
5306 entry->vme_end
5307 - entry->vme_start,
5308 PMAP_NULL,
5309 entry->vme_start,
5310 VM_PROT_NONE);
5311 } else {
5312 pmap_remove(dst_map->pmap,
5313 (addr64_t)(entry->vme_start),
5314 (addr64_t)(entry->vme_end));
5315 }
5316 vm_object_deallocate(old_object);
5317 }
5318 }
5319
5320 entry->is_sub_map = FALSE;
5321 entry->object = copy_entry->object;
5322 object = entry->object.vm_object;
5323 entry->needs_copy = copy_entry->needs_copy;
5324 entry->wired_count = 0;
5325 entry->user_wired_count = 0;
5326 offset = entry->offset = copy_entry->offset;
5327
5328 vm_map_copy_entry_unlink(copy, copy_entry);
5329 vm_map_copy_entry_dispose(copy, copy_entry);
5330 #if BAD_OPTIMIZATION
5331 /*
5332 * if we turn this optimization back on
5333 * we need to revisit our use of pmap mappings
5334 * large copies will cause us to run out and panic
5335 * this optimization only saved on average 2 us per page if ALL
5336 * the pages in the source were currently mapped
5337 * and ALL the pages in the dest were touched, if there were fewer
5338 * than 2/3 of the pages touched, this optimization actually cost more cycles
5339 */
5340
5341 /*
5342 * Try to aggressively enter physical mappings
5343 * (but avoid uninstantiated objects)
5344 */
5345 if (object != VM_OBJECT_NULL) {
5346 vm_map_offset_t va = entry->vme_start;
5347
5348 while (va < entry->vme_end) {
5349 register vm_page_t m;
5350 vm_prot_t prot;
5351
5352 /*
5353 * Look for the page in the top object
5354 */
5355 prot = entry->protection;
5356 vm_object_lock(object);
5357 vm_object_paging_begin(object);
5358
5359 /*
5360 * ENCRYPTED SWAP:
5361 * If the page is encrypted, skip it:
5362 * we can't let the user see the encrypted
5363 * contents. The page will get decrypted
5364 * on demand when the user generates a
5365 * soft-fault when trying to access it.
5366 */
5367 if ((m = vm_page_lookup(object,offset)) !=
5368 VM_PAGE_NULL && !m->busy &&
5369 !m->fictitious && !m->encrypted &&
5370 (!m->unusual || (!m->error &&
5371 !m->restart && !m->absent &&
5372 (prot & m->page_lock) == 0))) {
5373
5374 m->busy = TRUE;
5375 vm_object_unlock(object);
5376
5377 /*
5378 * Honor COW obligations
5379 */
5380 if (entry->needs_copy)
5381 prot &= ~VM_PROT_WRITE;
5382 #ifdef STACK_ONLY_NX
5383 if (entry->alias != VM_MEMORY_STACK && prot)
5384 prot |= VM_PROT_EXECUTE;
5385 #endif
5386 /* It is our policy to require */
5387 /* explicit sync from anyone */
5388 /* writing code and then */
5389 /* a pc to execute it. */
5390 /* No isync here */
5391
5392 PMAP_ENTER(pmap, va, m, prot,
5393 ((unsigned int)
5394 (m->object->wimg_bits))
5395 & VM_WIMG_MASK,
5396 FALSE);
5397
5398 vm_object_lock(object);
5399 vm_page_lock_queues();
5400 if (!m->active && !m->inactive)
5401 vm_page_activate(m);
5402 vm_page_unlock_queues();
5403 PAGE_WAKEUP_DONE(m);
5404 }
5405 vm_object_paging_end(object);
5406 vm_object_unlock(object);
5407
5408 offset += PAGE_SIZE_64;
5409 va += PAGE_SIZE;
5410 } /* end while (va < entry->vme_end) */
5411 } /* end if (object) */
5412 #endif
5413 /*
5414 * Set up for the next iteration. The map
5415 * has not been unlocked, so the next
5416 * address should be at the end of this
5417 * entry, and the next map entry should be
5418 * the one following it.
5419 */
5420
5421 start = tmp_entry->vme_end;
5422 tmp_entry = tmp_entry->vme_next;
5423 } else {
5424 vm_map_version_t version;
5425 vm_object_t dst_object = entry->object.vm_object;
5426 vm_object_offset_t dst_offset = entry->offset;
5427 kern_return_t r;
5428
5429 /*
5430 * Take an object reference, and record
5431 * the map version information so that the
5432 * map can be safely unlocked.
5433 */
5434
5435 vm_object_reference(dst_object);
5436
5437 /* account for unlock bumping up timestamp */
5438 version.main_timestamp = dst_map->timestamp + 1;
5439
5440 vm_map_unlock(dst_map);
5441
5442 /*
5443 * Copy as much as possible in one pass
5444 */
5445
5446 copy_size = size;
5447 r = vm_fault_copy(
5448 copy_entry->object.vm_object,
5449 copy_entry->offset,
5450 &copy_size,
5451 dst_object,
5452 dst_offset,
5453 dst_map,
5454 &version,
5455 THREAD_UNINT );
5456
5457 /*
5458 * Release the object reference
5459 */
5460
5461 vm_object_deallocate(dst_object);
5462
5463 /*
5464 * If a hard error occurred, return it now
5465 */
5466
5467 if (r != KERN_SUCCESS)
5468 return(r);
5469
5470 if (copy_size != 0) {
5471 /*
5472 * Dispose of the copied region
5473 */
5474
5475 vm_map_copy_clip_end(copy, copy_entry,
5476 copy_entry->vme_start + copy_size);
5477 vm_map_copy_entry_unlink(copy, copy_entry);
5478 vm_object_deallocate(copy_entry->object.vm_object);
5479 vm_map_copy_entry_dispose(copy, copy_entry);
5480 }
5481
5482 /*
5483 * Pick up in the destination map where we left off.
5484 *
5485 * Use the version information to avoid a lookup
5486 * in the normal case.
5487 */
5488
5489 start += copy_size;
5490 vm_map_lock(dst_map);
5491 if (version.main_timestamp == dst_map->timestamp) {
5492 /* We can safely use saved tmp_entry value */
5493
5494 vm_map_clip_end(dst_map, tmp_entry, start);
5495 tmp_entry = tmp_entry->vme_next;
5496 } else {
5497 /* Must do lookup of tmp_entry */
5498
5499 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
5500 vm_map_unlock(dst_map);
5501 return(KERN_INVALID_ADDRESS);
5502 }
5503 vm_map_clip_start(dst_map, tmp_entry, start);
5504 }
5505 }
5506 }/* while */
5507
5508 return(KERN_SUCCESS);
5509 }/* vm_map_copy_overwrite_aligned */
5510
5511 /*
5512 * Routine: vm_map_copyin_kernel_buffer [internal use only]
5513 *
5514 * Description:
5515 * Copy in data to a kernel buffer from space in the
5516 * source map. The original space may be optionally
5517 * deallocated.
5518 *
5519 * If successful, returns a new copy object.
5520 */
5521 static kern_return_t
5522 vm_map_copyin_kernel_buffer(
5523 vm_map_t src_map,
5524 vm_map_offset_t src_addr,
5525 vm_map_size_t len,
5526 boolean_t src_destroy,
5527 vm_map_copy_t *copy_result)
5528 {
5529 kern_return_t kr;
5530 vm_map_copy_t copy;
5531 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
5532
5533 copy = (vm_map_copy_t) kalloc(kalloc_size);
5534 if (copy == VM_MAP_COPY_NULL) {
5535 return KERN_RESOURCE_SHORTAGE;
5536 }
5537 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
5538 copy->size = len;
5539 copy->offset = 0;
5540 copy->cpy_kdata = (void *) (copy + 1);
5541 copy->cpy_kalloc_size = kalloc_size;
5542
5543 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
5544 if (kr != KERN_SUCCESS) {
5545 kfree(copy, kalloc_size);
5546 return kr;
5547 }
5548 if (src_destroy) {
5549 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
5550 vm_map_round_page(src_addr + len),
5551 VM_MAP_REMOVE_INTERRUPTIBLE |
5552 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
5553 (src_map == kernel_map) ?
5554 VM_MAP_REMOVE_KUNWIRE : 0);
5555 }
5556 *copy_result = copy;
5557 return KERN_SUCCESS;
5558 }
5559
5560 /*
5561 * Routine: vm_map_copyout_kernel_buffer [internal use only]
5562 *
5563 * Description:
5564 * Copy out data from a kernel buffer into space in the
5565 * destination map. The space may be otpionally dynamically
5566 * allocated.
5567 *
5568 * If successful, consumes the copy object.
5569 * Otherwise, the caller is responsible for it.
5570 */
5571 static int vm_map_copyout_kernel_buffer_failures = 0;
5572 static kern_return_t
5573 vm_map_copyout_kernel_buffer(
5574 vm_map_t map,
5575 vm_map_address_t *addr, /* IN/OUT */
5576 vm_map_copy_t copy,
5577 boolean_t overwrite)
5578 {
5579 kern_return_t kr = KERN_SUCCESS;
5580 thread_t thread = current_thread();
5581
5582 if (!overwrite) {
5583
5584 /*
5585 * Allocate space in the target map for the data
5586 */
5587 *addr = 0;
5588 kr = vm_map_enter(map,
5589 addr,
5590 vm_map_round_page(copy->size),
5591 (vm_map_offset_t) 0,
5592 VM_FLAGS_ANYWHERE,
5593 VM_OBJECT_NULL,
5594 (vm_object_offset_t) 0,
5595 FALSE,
5596 VM_PROT_DEFAULT,
5597 VM_PROT_ALL,
5598 VM_INHERIT_DEFAULT);
5599 if (kr != KERN_SUCCESS)
5600 return kr;
5601 }
5602
5603 /*
5604 * Copyout the data from the kernel buffer to the target map.
5605 */
5606 if (thread->map == map) {
5607
5608 /*
5609 * If the target map is the current map, just do
5610 * the copy.
5611 */
5612 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5613 kr = KERN_INVALID_ADDRESS;
5614 }
5615 }
5616 else {
5617 vm_map_t oldmap;
5618
5619 /*
5620 * If the target map is another map, assume the
5621 * target's address space identity for the duration
5622 * of the copy.
5623 */
5624 vm_map_reference(map);
5625 oldmap = vm_map_switch(map);
5626
5627 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5628 vm_map_copyout_kernel_buffer_failures++;
5629 kr = KERN_INVALID_ADDRESS;
5630 }
5631
5632 (void) vm_map_switch(oldmap);
5633 vm_map_deallocate(map);
5634 }
5635
5636 if (kr != KERN_SUCCESS) {
5637 /* the copy failed, clean up */
5638 if (!overwrite) {
5639 /*
5640 * Deallocate the space we allocated in the target map.
5641 */
5642 (void) vm_map_remove(map,
5643 vm_map_trunc_page(*addr),
5644 vm_map_round_page(*addr +
5645 vm_map_round_page(copy->size)),
5646 VM_MAP_NO_FLAGS);
5647 *addr = 0;
5648 }
5649 } else {
5650 /* copy was successful, dicard the copy structure */
5651 kfree(copy, copy->cpy_kalloc_size);
5652 }
5653
5654 return kr;
5655 }
5656
5657 /*
5658 * Macro: vm_map_copy_insert
5659 *
5660 * Description:
5661 * Link a copy chain ("copy") into a map at the
5662 * specified location (after "where").
5663 * Side effects:
5664 * The copy chain is destroyed.
5665 * Warning:
5666 * The arguments are evaluated multiple times.
5667 */
5668 #define vm_map_copy_insert(map, where, copy) \
5669 MACRO_BEGIN \
5670 vm_map_t VMCI_map; \
5671 vm_map_entry_t VMCI_where; \
5672 vm_map_copy_t VMCI_copy; \
5673 VMCI_map = (map); \
5674 VMCI_where = (where); \
5675 VMCI_copy = (copy); \
5676 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
5677 ->vme_next = (VMCI_where->vme_next); \
5678 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
5679 ->vme_prev = VMCI_where; \
5680 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
5681 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
5682 zfree(vm_map_copy_zone, VMCI_copy); \
5683 MACRO_END
5684
5685 /*
5686 * Routine: vm_map_copyout
5687 *
5688 * Description:
5689 * Copy out a copy chain ("copy") into newly-allocated
5690 * space in the destination map.
5691 *
5692 * If successful, consumes the copy object.
5693 * Otherwise, the caller is responsible for it.
5694 */
5695 kern_return_t
5696 vm_map_copyout(
5697 vm_map_t dst_map,
5698 vm_map_address_t *dst_addr, /* OUT */
5699 vm_map_copy_t copy)
5700 {
5701 vm_map_size_t size;
5702 vm_map_size_t adjustment;
5703 vm_map_offset_t start;
5704 vm_object_offset_t vm_copy_start;
5705 vm_map_entry_t last;
5706 register
5707 vm_map_entry_t entry;
5708
5709 /*
5710 * Check for null copy object.
5711 */
5712
5713 if (copy == VM_MAP_COPY_NULL) {
5714 *dst_addr = 0;
5715 return(KERN_SUCCESS);
5716 }
5717
5718 /*
5719 * Check for special copy object, created
5720 * by vm_map_copyin_object.
5721 */
5722
5723 if (copy->type == VM_MAP_COPY_OBJECT) {
5724 vm_object_t object = copy->cpy_object;
5725 kern_return_t kr;
5726 vm_object_offset_t offset;
5727
5728 offset = vm_object_trunc_page(copy->offset);
5729 size = vm_map_round_page(copy->size +
5730 (vm_map_size_t)(copy->offset - offset));
5731 *dst_addr = 0;
5732 kr = vm_map_enter(dst_map, dst_addr, size,
5733 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
5734 object, offset, FALSE,
5735 VM_PROT_DEFAULT, VM_PROT_ALL,
5736 VM_INHERIT_DEFAULT);
5737 if (kr != KERN_SUCCESS)
5738 return(kr);
5739 /* Account for non-pagealigned copy object */
5740 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
5741 zfree(vm_map_copy_zone, copy);
5742 return(KERN_SUCCESS);
5743 }
5744
5745 /*
5746 * Check for special kernel buffer allocated
5747 * by new_ipc_kmsg_copyin.
5748 */
5749
5750 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5751 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
5752 copy, FALSE));
5753 }
5754
5755 /*
5756 * Find space for the data
5757 */
5758
5759 vm_copy_start = vm_object_trunc_page(copy->offset);
5760 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
5761 - vm_copy_start;
5762
5763 StartAgain: ;
5764
5765 vm_map_lock(dst_map);
5766 assert(first_free_is_valid(dst_map));
5767 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
5768 vm_map_min(dst_map) : last->vme_end;
5769
5770 while (TRUE) {
5771 vm_map_entry_t next = last->vme_next;
5772 vm_map_offset_t end = start + size;
5773
5774 if ((end > dst_map->max_offset) || (end < start)) {
5775 if (dst_map->wait_for_space) {
5776 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
5777 assert_wait((event_t) dst_map,
5778 THREAD_INTERRUPTIBLE);
5779 vm_map_unlock(dst_map);
5780 thread_block(THREAD_CONTINUE_NULL);
5781 goto StartAgain;
5782 }
5783 }
5784 vm_map_unlock(dst_map);
5785 return(KERN_NO_SPACE);
5786 }
5787
5788 if ((next == vm_map_to_entry(dst_map)) ||
5789 (next->vme_start >= end))
5790 break;
5791
5792 last = next;
5793 start = last->vme_end;
5794 }
5795
5796 /*
5797 * Since we're going to just drop the map
5798 * entries from the copy into the destination
5799 * map, they must come from the same pool.
5800 */
5801
5802 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
5803 /*
5804 * Mismatches occur when dealing with the default
5805 * pager.
5806 */
5807 zone_t old_zone;
5808 vm_map_entry_t next, new;
5809
5810 /*
5811 * Find the zone that the copies were allocated from
5812 */
5813 old_zone = (copy->cpy_hdr.entries_pageable)
5814 ? vm_map_entry_zone
5815 : vm_map_kentry_zone;
5816 entry = vm_map_copy_first_entry(copy);
5817
5818 /*
5819 * Reinitialize the copy so that vm_map_copy_entry_link
5820 * will work.
5821 */
5822 copy->cpy_hdr.nentries = 0;
5823 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
5824 vm_map_copy_first_entry(copy) =
5825 vm_map_copy_last_entry(copy) =
5826 vm_map_copy_to_entry(copy);
5827
5828 /*
5829 * Copy each entry.
5830 */
5831 while (entry != vm_map_copy_to_entry(copy)) {
5832 new = vm_map_copy_entry_create(copy);
5833 vm_map_entry_copy_full(new, entry);
5834 new->use_pmap = FALSE; /* clr address space specifics */
5835 vm_map_copy_entry_link(copy,
5836 vm_map_copy_last_entry(copy),
5837 new);
5838 next = entry->vme_next;
5839 zfree(old_zone, entry);
5840 entry = next;
5841 }
5842 }
5843
5844 /*
5845 * Adjust the addresses in the copy chain, and
5846 * reset the region attributes.
5847 */
5848
5849 adjustment = start - vm_copy_start;
5850 for (entry = vm_map_copy_first_entry(copy);
5851 entry != vm_map_copy_to_entry(copy);
5852 entry = entry->vme_next) {
5853 entry->vme_start += adjustment;
5854 entry->vme_end += adjustment;
5855
5856 entry->inheritance = VM_INHERIT_DEFAULT;
5857 entry->protection = VM_PROT_DEFAULT;
5858 entry->max_protection = VM_PROT_ALL;
5859 entry->behavior = VM_BEHAVIOR_DEFAULT;
5860
5861 /*
5862 * If the entry is now wired,
5863 * map the pages into the destination map.
5864 */
5865 if (entry->wired_count != 0) {
5866 register vm_map_offset_t va;
5867 vm_object_offset_t offset;
5868 register vm_object_t object;
5869 vm_prot_t prot;
5870
5871 object = entry->object.vm_object;
5872 offset = entry->offset;
5873 va = entry->vme_start;
5874
5875 pmap_pageable(dst_map->pmap,
5876 entry->vme_start,
5877 entry->vme_end,
5878 TRUE);
5879
5880 while (va < entry->vme_end) {
5881 register vm_page_t m;
5882
5883 /*
5884 * Look up the page in the object.
5885 * Assert that the page will be found in the
5886 * top object:
5887 * either
5888 * the object was newly created by
5889 * vm_object_copy_slowly, and has
5890 * copies of all of the pages from
5891 * the source object
5892 * or
5893 * the object was moved from the old
5894 * map entry; because the old map
5895 * entry was wired, all of the pages
5896 * were in the top-level object.
5897 * (XXX not true if we wire pages for
5898 * reading)
5899 */
5900 vm_object_lock(object);
5901 vm_object_paging_begin(object);
5902
5903 m = vm_page_lookup(object, offset);
5904 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
5905 m->absent)
5906 panic("vm_map_copyout: wiring 0x%x", m);
5907
5908 /*
5909 * ENCRYPTED SWAP:
5910 * The page is assumed to be wired here, so it
5911 * shouldn't be encrypted. Otherwise, we
5912 * couldn't enter it in the page table, since
5913 * we don't want the user to see the encrypted
5914 * data.
5915 */
5916 ASSERT_PAGE_DECRYPTED(m);
5917
5918 m->busy = TRUE;
5919 vm_object_unlock(object);
5920 prot = entry->protection;
5921 #ifdef STACK_ONLY_NX
5922 if (entry->alias != VM_MEMORY_STACK && prot)
5923 prot |= VM_PROT_EXECUTE;
5924 #endif
5925 PMAP_ENTER(dst_map->pmap, va, m, prot,
5926 ((unsigned int)
5927 (m->object->wimg_bits))
5928 & VM_WIMG_MASK,
5929 TRUE);
5930
5931 vm_object_lock(object);
5932 PAGE_WAKEUP_DONE(m);
5933 /* the page is wired, so we don't have to activate */
5934 vm_object_paging_end(object);
5935 vm_object_unlock(object);
5936
5937 offset += PAGE_SIZE_64;
5938 va += PAGE_SIZE;
5939 }
5940 }
5941 else if (size <= vm_map_aggressive_enter_max) {
5942
5943 register vm_map_offset_t va;
5944 vm_object_offset_t offset;
5945 register vm_object_t object;
5946 vm_prot_t prot;
5947
5948 object = entry->object.vm_object;
5949 if (object != VM_OBJECT_NULL) {
5950
5951 offset = entry->offset;
5952 va = entry->vme_start;
5953 while (va < entry->vme_end) {
5954 register vm_page_t m;
5955
5956 /*
5957 * Look up the page in the object.
5958 * Assert that the page will be found
5959 * in the top object if at all...
5960 */
5961 vm_object_lock(object);
5962 vm_object_paging_begin(object);
5963
5964 /*
5965 * ENCRYPTED SWAP:
5966 * If the page is encrypted, skip it:
5967 * we can't let the user see the
5968 * encrypted contents. The page will
5969 * get decrypted on demand when the
5970 * user generates a soft-fault when
5971 * trying to access it.
5972 */
5973 if (((m = vm_page_lookup(object,
5974 offset))
5975 != VM_PAGE_NULL) &&
5976 !m->busy && !m->fictitious &&
5977 !m->encrypted &&
5978 !m->absent && !m->error) {
5979 m->busy = TRUE;
5980 vm_object_unlock(object);
5981
5982 /* honor cow obligations */
5983 prot = entry->protection;
5984 if (entry->needs_copy)
5985 prot &= ~VM_PROT_WRITE;
5986 #ifdef STACK_ONLY_NX
5987 if (entry->alias != VM_MEMORY_STACK && prot)
5988 prot |= VM_PROT_EXECUTE;
5989 #endif
5990 PMAP_ENTER(dst_map->pmap, va,
5991 m, prot,
5992 ((unsigned int)
5993 (m->object->wimg_bits))
5994 & VM_WIMG_MASK,
5995 FALSE);
5996
5997 vm_object_lock(object);
5998 vm_page_lock_queues();
5999 if (!m->active && !m->inactive)
6000 vm_page_activate(m);
6001 vm_page_unlock_queues();
6002 PAGE_WAKEUP_DONE(m);
6003 }
6004 vm_object_paging_end(object);
6005 vm_object_unlock(object);
6006
6007 offset += PAGE_SIZE_64;
6008 va += PAGE_SIZE;
6009 }
6010 }
6011 }
6012 }
6013
6014 /*
6015 * Correct the page alignment for the result
6016 */
6017
6018 *dst_addr = start + (copy->offset - vm_copy_start);
6019
6020 /*
6021 * Update the hints and the map size
6022 */
6023
6024 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6025
6026 dst_map->size += size;
6027
6028 /*
6029 * Link in the copy
6030 */
6031
6032 vm_map_copy_insert(dst_map, last, copy);
6033
6034 vm_map_unlock(dst_map);
6035
6036 /*
6037 * XXX If wiring_required, call vm_map_pageable
6038 */
6039
6040 return(KERN_SUCCESS);
6041 }
6042
6043 /*
6044 * Routine: vm_map_copyin
6045 *
6046 * Description:
6047 * Copy the specified region (src_addr, len) from the
6048 * source address space (src_map), possibly removing
6049 * the region from the source address space (src_destroy).
6050 *
6051 * Returns:
6052 * A vm_map_copy_t object (copy_result), suitable for
6053 * insertion into another address space (using vm_map_copyout),
6054 * copying over another address space region (using
6055 * vm_map_copy_overwrite). If the copy is unused, it
6056 * should be destroyed (using vm_map_copy_discard).
6057 *
6058 * In/out conditions:
6059 * The source map should not be locked on entry.
6060 */
6061
6062 typedef struct submap_map {
6063 vm_map_t parent_map;
6064 vm_map_offset_t base_start;
6065 vm_map_offset_t base_end;
6066 struct submap_map *next;
6067 } submap_map_t;
6068
6069 kern_return_t
6070 vm_map_copyin_common(
6071 vm_map_t src_map,
6072 vm_map_address_t src_addr,
6073 vm_map_size_t len,
6074 boolean_t src_destroy,
6075 __unused boolean_t src_volatile,
6076 vm_map_copy_t *copy_result, /* OUT */
6077 boolean_t use_maxprot)
6078 {
6079 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6080 * in multi-level lookup, this
6081 * entry contains the actual
6082 * vm_object/offset.
6083 */
6084 register
6085 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6086
6087 vm_map_offset_t src_start; /* Start of current entry --
6088 * where copy is taking place now
6089 */
6090 vm_map_offset_t src_end; /* End of entire region to be
6091 * copied */
6092 vm_map_t base_map = src_map;
6093 boolean_t map_share=FALSE;
6094 submap_map_t *parent_maps = NULL;
6095
6096 register
6097 vm_map_copy_t copy; /* Resulting copy */
6098 vm_map_address_t copy_addr;
6099
6100 /*
6101 * Check for copies of zero bytes.
6102 */
6103
6104 if (len == 0) {
6105 *copy_result = VM_MAP_COPY_NULL;
6106 return(KERN_SUCCESS);
6107 }
6108
6109 /*
6110 * Check that the end address doesn't overflow
6111 */
6112 src_end = src_addr + len;
6113 if (src_end < src_addr)
6114 return KERN_INVALID_ADDRESS;
6115
6116 /*
6117 * If the copy is sufficiently small, use a kernel buffer instead
6118 * of making a virtual copy. The theory being that the cost of
6119 * setting up VM (and taking C-O-W faults) dominates the copy costs
6120 * for small regions.
6121 */
6122 if ((len < msg_ool_size_small) && !use_maxprot)
6123 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6124 src_destroy, copy_result);
6125
6126 /*
6127 * Compute (page aligned) start and end of region
6128 */
6129 src_start = vm_map_trunc_page(src_addr);
6130 src_end = vm_map_round_page(src_end);
6131
6132 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6133
6134 /*
6135 * Allocate a header element for the list.
6136 *
6137 * Use the start and end in the header to
6138 * remember the endpoints prior to rounding.
6139 */
6140
6141 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6142 vm_map_copy_first_entry(copy) =
6143 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6144 copy->type = VM_MAP_COPY_ENTRY_LIST;
6145 copy->cpy_hdr.nentries = 0;
6146 copy->cpy_hdr.entries_pageable = TRUE;
6147
6148 copy->offset = src_addr;
6149 copy->size = len;
6150
6151 new_entry = vm_map_copy_entry_create(copy);
6152
6153 #define RETURN(x) \
6154 MACRO_BEGIN \
6155 vm_map_unlock(src_map); \
6156 if(src_map != base_map) \
6157 vm_map_deallocate(src_map); \
6158 if (new_entry != VM_MAP_ENTRY_NULL) \
6159 vm_map_copy_entry_dispose(copy,new_entry); \
6160 vm_map_copy_discard(copy); \
6161 { \
6162 submap_map_t *_ptr; \
6163 \
6164 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6165 parent_maps=parent_maps->next; \
6166 if (_ptr->parent_map != base_map) \
6167 vm_map_deallocate(_ptr->parent_map); \
6168 kfree(_ptr, sizeof(submap_map_t)); \
6169 } \
6170 } \
6171 MACRO_RETURN(x); \
6172 MACRO_END
6173
6174 /*
6175 * Find the beginning of the region.
6176 */
6177
6178 vm_map_lock(src_map);
6179
6180 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6181 RETURN(KERN_INVALID_ADDRESS);
6182 if(!tmp_entry->is_sub_map) {
6183 vm_map_clip_start(src_map, tmp_entry, src_start);
6184 }
6185 /* set for later submap fix-up */
6186 copy_addr = src_start;
6187
6188 /*
6189 * Go through entries until we get to the end.
6190 */
6191
6192 while (TRUE) {
6193 register
6194 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
6195 vm_map_size_t src_size; /* Size of source
6196 * map entry (in both
6197 * maps)
6198 */
6199
6200 register
6201 vm_object_t src_object; /* Object to copy */
6202 vm_object_offset_t src_offset;
6203
6204 boolean_t src_needs_copy; /* Should source map
6205 * be made read-only
6206 * for copy-on-write?
6207 */
6208
6209 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6210
6211 boolean_t was_wired; /* Was source wired? */
6212 vm_map_version_t version; /* Version before locks
6213 * dropped to make copy
6214 */
6215 kern_return_t result; /* Return value from
6216 * copy_strategically.
6217 */
6218 while(tmp_entry->is_sub_map) {
6219 vm_map_size_t submap_len;
6220 submap_map_t *ptr;
6221
6222 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
6223 ptr->next = parent_maps;
6224 parent_maps = ptr;
6225 ptr->parent_map = src_map;
6226 ptr->base_start = src_start;
6227 ptr->base_end = src_end;
6228 submap_len = tmp_entry->vme_end - src_start;
6229 if(submap_len > (src_end-src_start))
6230 submap_len = src_end-src_start;
6231 ptr->base_start += submap_len;
6232
6233 src_start -= tmp_entry->vme_start;
6234 src_start += tmp_entry->offset;
6235 src_end = src_start + submap_len;
6236 src_map = tmp_entry->object.sub_map;
6237 vm_map_lock(src_map);
6238 /* keep an outstanding reference for all maps in */
6239 /* the parents tree except the base map */
6240 vm_map_reference(src_map);
6241 vm_map_unlock(ptr->parent_map);
6242 if (!vm_map_lookup_entry(
6243 src_map, src_start, &tmp_entry))
6244 RETURN(KERN_INVALID_ADDRESS);
6245 map_share = TRUE;
6246 if(!tmp_entry->is_sub_map)
6247 vm_map_clip_start(src_map, tmp_entry, src_start);
6248 src_entry = tmp_entry;
6249 }
6250 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
6251 (tmp_entry->object.vm_object->phys_contiguous)) {
6252 /* This is not, supported for now.In future */
6253 /* we will need to detect the phys_contig */
6254 /* condition and then upgrade copy_slowly */
6255 /* to do physical copy from the device mem */
6256 /* based object. We can piggy-back off of */
6257 /* the was wired boolean to set-up the */
6258 /* proper handling */
6259 RETURN(KERN_PROTECTION_FAILURE);
6260 }
6261 /*
6262 * Create a new address map entry to hold the result.
6263 * Fill in the fields from the appropriate source entries.
6264 * We must unlock the source map to do this if we need
6265 * to allocate a map entry.
6266 */
6267 if (new_entry == VM_MAP_ENTRY_NULL) {
6268 version.main_timestamp = src_map->timestamp;
6269 vm_map_unlock(src_map);
6270
6271 new_entry = vm_map_copy_entry_create(copy);
6272
6273 vm_map_lock(src_map);
6274 if ((version.main_timestamp + 1) != src_map->timestamp) {
6275 if (!vm_map_lookup_entry(src_map, src_start,
6276 &tmp_entry)) {
6277 RETURN(KERN_INVALID_ADDRESS);
6278 }
6279 vm_map_clip_start(src_map, tmp_entry, src_start);
6280 continue; /* restart w/ new tmp_entry */
6281 }
6282 }
6283
6284 /*
6285 * Verify that the region can be read.
6286 */
6287 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
6288 !use_maxprot) ||
6289 (src_entry->max_protection & VM_PROT_READ) == 0)
6290 RETURN(KERN_PROTECTION_FAILURE);
6291
6292 /*
6293 * Clip against the endpoints of the entire region.
6294 */
6295
6296 vm_map_clip_end(src_map, src_entry, src_end);
6297
6298 src_size = src_entry->vme_end - src_start;
6299 src_object = src_entry->object.vm_object;
6300 src_offset = src_entry->offset;
6301 was_wired = (src_entry->wired_count != 0);
6302
6303 vm_map_entry_copy(new_entry, src_entry);
6304 new_entry->use_pmap = FALSE; /* clr address space specifics */
6305
6306 /*
6307 * Attempt non-blocking copy-on-write optimizations.
6308 */
6309
6310 if (src_destroy &&
6311 (src_object == VM_OBJECT_NULL ||
6312 (src_object->internal && !src_object->true_share
6313 && !map_share))) {
6314 /*
6315 * If we are destroying the source, and the object
6316 * is internal, we can move the object reference
6317 * from the source to the copy. The copy is
6318 * copy-on-write only if the source is.
6319 * We make another reference to the object, because
6320 * destroying the source entry will deallocate it.
6321 */
6322 vm_object_reference(src_object);
6323
6324 /*
6325 * Copy is always unwired. vm_map_copy_entry
6326 * set its wired count to zero.
6327 */
6328
6329 goto CopySuccessful;
6330 }
6331
6332
6333 RestartCopy:
6334 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
6335 src_object, new_entry, new_entry->object.vm_object,
6336 was_wired, 0);
6337 if ((src_object == VM_OBJECT_NULL ||
6338 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
6339 vm_object_copy_quickly(
6340 &new_entry->object.vm_object,
6341 src_offset,
6342 src_size,
6343 &src_needs_copy,
6344 &new_entry_needs_copy)) {
6345
6346 new_entry->needs_copy = new_entry_needs_copy;
6347
6348 /*
6349 * Handle copy-on-write obligations
6350 */
6351
6352 if (src_needs_copy && !tmp_entry->needs_copy) {
6353 vm_prot_t prot;
6354
6355 prot = src_entry->protection & ~VM_PROT_WRITE;
6356 #ifdef STACK_ONLY_NX
6357 if (src_entry->alias != VM_MEMORY_STACK && prot)
6358 prot |= VM_PROT_EXECUTE;
6359 #endif
6360 vm_object_pmap_protect(
6361 src_object,
6362 src_offset,
6363 src_size,
6364 (src_entry->is_shared ?
6365 PMAP_NULL
6366 : src_map->pmap),
6367 src_entry->vme_start,
6368 prot);
6369
6370 tmp_entry->needs_copy = TRUE;
6371 }
6372
6373 /*
6374 * The map has never been unlocked, so it's safe
6375 * to move to the next entry rather than doing
6376 * another lookup.
6377 */
6378
6379 goto CopySuccessful;
6380 }
6381
6382 /*
6383 * Take an object reference, so that we may
6384 * release the map lock(s).
6385 */
6386
6387 assert(src_object != VM_OBJECT_NULL);
6388 vm_object_reference(src_object);
6389
6390 /*
6391 * Record the timestamp for later verification.
6392 * Unlock the map.
6393 */
6394
6395 version.main_timestamp = src_map->timestamp;
6396 vm_map_unlock(src_map); /* Increments timestamp once! */
6397
6398 /*
6399 * Perform the copy
6400 */
6401
6402 if (was_wired) {
6403 CopySlowly:
6404 vm_object_lock(src_object);
6405 result = vm_object_copy_slowly(
6406 src_object,
6407 src_offset,
6408 src_size,
6409 THREAD_UNINT,
6410 &new_entry->object.vm_object);
6411 new_entry->offset = 0;
6412 new_entry->needs_copy = FALSE;
6413
6414 }
6415 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6416 (tmp_entry->is_shared || map_share)) {
6417 vm_object_t new_object;
6418
6419 vm_object_lock(src_object);
6420 new_object = vm_object_copy_delayed(
6421 src_object,
6422 src_offset,
6423 src_size);
6424 if (new_object == VM_OBJECT_NULL)
6425 goto CopySlowly;
6426
6427 new_entry->object.vm_object = new_object;
6428 new_entry->needs_copy = TRUE;
6429 result = KERN_SUCCESS;
6430
6431 } else {
6432 result = vm_object_copy_strategically(src_object,
6433 src_offset,
6434 src_size,
6435 &new_entry->object.vm_object,
6436 &new_entry->offset,
6437 &new_entry_needs_copy);
6438
6439 new_entry->needs_copy = new_entry_needs_copy;
6440 }
6441
6442 if (result != KERN_SUCCESS &&
6443 result != KERN_MEMORY_RESTART_COPY) {
6444 vm_map_lock(src_map);
6445 RETURN(result);
6446 }
6447
6448 /*
6449 * Throw away the extra reference
6450 */
6451
6452 vm_object_deallocate(src_object);
6453
6454 /*
6455 * Verify that the map has not substantially
6456 * changed while the copy was being made.
6457 */
6458
6459 vm_map_lock(src_map);
6460
6461 if ((version.main_timestamp + 1) == src_map->timestamp)
6462 goto VerificationSuccessful;
6463
6464 /*
6465 * Simple version comparison failed.
6466 *
6467 * Retry the lookup and verify that the
6468 * same object/offset are still present.
6469 *
6470 * [Note: a memory manager that colludes with
6471 * the calling task can detect that we have
6472 * cheated. While the map was unlocked, the
6473 * mapping could have been changed and restored.]
6474 */
6475
6476 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
6477 RETURN(KERN_INVALID_ADDRESS);
6478 }
6479
6480 src_entry = tmp_entry;
6481 vm_map_clip_start(src_map, src_entry, src_start);
6482
6483 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
6484 !use_maxprot) ||
6485 ((src_entry->max_protection & VM_PROT_READ) == 0))
6486 goto VerificationFailed;
6487
6488 if (src_entry->vme_end < new_entry->vme_end)
6489 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
6490
6491 if ((src_entry->object.vm_object != src_object) ||
6492 (src_entry->offset != src_offset) ) {
6493
6494 /*
6495 * Verification failed.
6496 *
6497 * Start over with this top-level entry.
6498 */
6499
6500 VerificationFailed: ;
6501
6502 vm_object_deallocate(new_entry->object.vm_object);
6503 tmp_entry = src_entry;
6504 continue;
6505 }
6506
6507 /*
6508 * Verification succeeded.
6509 */
6510
6511 VerificationSuccessful: ;
6512
6513 if (result == KERN_MEMORY_RESTART_COPY)
6514 goto RestartCopy;
6515
6516 /*
6517 * Copy succeeded.
6518 */
6519
6520 CopySuccessful: ;
6521
6522 /*
6523 * Link in the new copy entry.
6524 */
6525
6526 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
6527 new_entry);
6528
6529 /*
6530 * Determine whether the entire region
6531 * has been copied.
6532 */
6533 src_start = new_entry->vme_end;
6534 new_entry = VM_MAP_ENTRY_NULL;
6535 while ((src_start >= src_end) && (src_end != 0)) {
6536 if (src_map != base_map) {
6537 submap_map_t *ptr;
6538
6539 ptr = parent_maps;
6540 assert(ptr != NULL);
6541 parent_maps = parent_maps->next;
6542 vm_map_unlock(src_map);
6543 vm_map_deallocate(src_map);
6544 vm_map_lock(ptr->parent_map);
6545 src_map = ptr->parent_map;
6546 src_start = ptr->base_start;
6547 src_end = ptr->base_end;
6548 if ((src_end > src_start) &&
6549 !vm_map_lookup_entry(
6550 src_map, src_start, &tmp_entry))
6551 RETURN(KERN_INVALID_ADDRESS);
6552 kfree(ptr, sizeof(submap_map_t));
6553 if(parent_maps == NULL)
6554 map_share = FALSE;
6555 src_entry = tmp_entry->vme_prev;
6556 } else
6557 break;
6558 }
6559 if ((src_start >= src_end) && (src_end != 0))
6560 break;
6561
6562 /*
6563 * Verify that there are no gaps in the region
6564 */
6565
6566 tmp_entry = src_entry->vme_next;
6567 if ((tmp_entry->vme_start != src_start) ||
6568 (tmp_entry == vm_map_to_entry(src_map)))
6569 RETURN(KERN_INVALID_ADDRESS);
6570 }
6571
6572 /*
6573 * If the source should be destroyed, do it now, since the
6574 * copy was successful.
6575 */
6576 if (src_destroy) {
6577 (void) vm_map_delete(src_map,
6578 vm_map_trunc_page(src_addr),
6579 src_end,
6580 (src_map == kernel_map) ?
6581 VM_MAP_REMOVE_KUNWIRE :
6582 VM_MAP_NO_FLAGS,
6583 VM_MAP_NULL);
6584 }
6585
6586 vm_map_unlock(src_map);
6587
6588 /* Fix-up start and end points in copy. This is necessary */
6589 /* when the various entries in the copy object were picked */
6590 /* up from different sub-maps */
6591
6592 tmp_entry = vm_map_copy_first_entry(copy);
6593 while (tmp_entry != vm_map_copy_to_entry(copy)) {
6594 tmp_entry->vme_end = copy_addr +
6595 (tmp_entry->vme_end - tmp_entry->vme_start);
6596 tmp_entry->vme_start = copy_addr;
6597 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
6598 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
6599 }
6600
6601 *copy_result = copy;
6602 return(KERN_SUCCESS);
6603
6604 #undef RETURN
6605 }
6606
6607 /*
6608 * vm_map_copyin_object:
6609 *
6610 * Create a copy object from an object.
6611 * Our caller donates an object reference.
6612 */
6613
6614 kern_return_t
6615 vm_map_copyin_object(
6616 vm_object_t object,
6617 vm_object_offset_t offset, /* offset of region in object */
6618 vm_object_size_t size, /* size of region in object */
6619 vm_map_copy_t *copy_result) /* OUT */
6620 {
6621 vm_map_copy_t copy; /* Resulting copy */
6622
6623 /*
6624 * We drop the object into a special copy object
6625 * that contains the object directly.
6626 */
6627
6628 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6629 copy->type = VM_MAP_COPY_OBJECT;
6630 copy->cpy_object = object;
6631 copy->offset = offset;
6632 copy->size = size;
6633
6634 *copy_result = copy;
6635 return(KERN_SUCCESS);
6636 }
6637
6638 static void
6639 vm_map_fork_share(
6640 vm_map_t old_map,
6641 vm_map_entry_t old_entry,
6642 vm_map_t new_map)
6643 {
6644 vm_object_t object;
6645 vm_map_entry_t new_entry;
6646
6647 /*
6648 * New sharing code. New map entry
6649 * references original object. Internal
6650 * objects use asynchronous copy algorithm for
6651 * future copies. First make sure we have
6652 * the right object. If we need a shadow,
6653 * or someone else already has one, then
6654 * make a new shadow and share it.
6655 */
6656
6657 object = old_entry->object.vm_object;
6658 if (old_entry->is_sub_map) {
6659 assert(old_entry->wired_count == 0);
6660 #ifndef NO_NESTED_PMAP
6661 if(old_entry->use_pmap) {
6662 kern_return_t result;
6663
6664 result = pmap_nest(new_map->pmap,
6665 (old_entry->object.sub_map)->pmap,
6666 (addr64_t)old_entry->vme_start,
6667 (addr64_t)old_entry->vme_start,
6668 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
6669 if(result)
6670 panic("vm_map_fork_share: pmap_nest failed!");
6671 }
6672 #endif /* NO_NESTED_PMAP */
6673 } else if (object == VM_OBJECT_NULL) {
6674 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
6675 old_entry->vme_start));
6676 old_entry->offset = 0;
6677 old_entry->object.vm_object = object;
6678 assert(!old_entry->needs_copy);
6679 } else if (object->copy_strategy !=
6680 MEMORY_OBJECT_COPY_SYMMETRIC) {
6681
6682 /*
6683 * We are already using an asymmetric
6684 * copy, and therefore we already have
6685 * the right object.
6686 */
6687
6688 assert(! old_entry->needs_copy);
6689 }
6690 else if (old_entry->needs_copy || /* case 1 */
6691 object->shadowed || /* case 2 */
6692 (!object->true_share && /* case 3 */
6693 !old_entry->is_shared &&
6694 (object->size >
6695 (vm_map_size_t)(old_entry->vme_end -
6696 old_entry->vme_start)))) {
6697
6698 /*
6699 * We need to create a shadow.
6700 * There are three cases here.
6701 * In the first case, we need to
6702 * complete a deferred symmetrical
6703 * copy that we participated in.
6704 * In the second and third cases,
6705 * we need to create the shadow so
6706 * that changes that we make to the
6707 * object do not interfere with
6708 * any symmetrical copies which
6709 * have occured (case 2) or which
6710 * might occur (case 3).
6711 *
6712 * The first case is when we had
6713 * deferred shadow object creation
6714 * via the entry->needs_copy mechanism.
6715 * This mechanism only works when
6716 * only one entry points to the source
6717 * object, and we are about to create
6718 * a second entry pointing to the
6719 * same object. The problem is that
6720 * there is no way of mapping from
6721 * an object to the entries pointing
6722 * to it. (Deferred shadow creation
6723 * works with one entry because occurs
6724 * at fault time, and we walk from the
6725 * entry to the object when handling
6726 * the fault.)
6727 *
6728 * The second case is when the object
6729 * to be shared has already been copied
6730 * with a symmetric copy, but we point
6731 * directly to the object without
6732 * needs_copy set in our entry. (This
6733 * can happen because different ranges
6734 * of an object can be pointed to by
6735 * different entries. In particular,
6736 * a single entry pointing to an object
6737 * can be split by a call to vm_inherit,
6738 * which, combined with task_create, can
6739 * result in the different entries
6740 * having different needs_copy values.)
6741 * The shadowed flag in the object allows
6742 * us to detect this case. The problem
6743 * with this case is that if this object
6744 * has or will have shadows, then we
6745 * must not perform an asymmetric copy
6746 * of this object, since such a copy
6747 * allows the object to be changed, which
6748 * will break the previous symmetrical
6749 * copies (which rely upon the object
6750 * not changing). In a sense, the shadowed
6751 * flag says "don't change this object".
6752 * We fix this by creating a shadow
6753 * object for this object, and sharing
6754 * that. This works because we are free
6755 * to change the shadow object (and thus
6756 * to use an asymmetric copy strategy);
6757 * this is also semantically correct,
6758 * since this object is temporary, and
6759 * therefore a copy of the object is
6760 * as good as the object itself. (This
6761 * is not true for permanent objects,
6762 * since the pager needs to see changes,
6763 * which won't happen if the changes
6764 * are made to a copy.)
6765 *
6766 * The third case is when the object
6767 * to be shared has parts sticking
6768 * outside of the entry we're working
6769 * with, and thus may in the future
6770 * be subject to a symmetrical copy.
6771 * (This is a preemptive version of
6772 * case 2.)
6773 */
6774
6775 assert(!(object->shadowed && old_entry->is_shared));
6776 vm_object_shadow(&old_entry->object.vm_object,
6777 &old_entry->offset,
6778 (vm_map_size_t) (old_entry->vme_end -
6779 old_entry->vme_start));
6780
6781 /*
6782 * If we're making a shadow for other than
6783 * copy on write reasons, then we have
6784 * to remove write permission.
6785 */
6786
6787 if (!old_entry->needs_copy &&
6788 (old_entry->protection & VM_PROT_WRITE)) {
6789 vm_prot_t prot;
6790
6791 prot = old_entry->protection & ~VM_PROT_WRITE;
6792 #ifdef STACK_ONLY_NX
6793 if (old_entry->alias != VM_MEMORY_STACK && prot)
6794 prot |= VM_PROT_EXECUTE;
6795 #endif
6796 if (old_map->mapped) {
6797 vm_object_pmap_protect(
6798 old_entry->object.vm_object,
6799 old_entry->offset,
6800 (old_entry->vme_end -
6801 old_entry->vme_start),
6802 PMAP_NULL,
6803 old_entry->vme_start,
6804 prot);
6805 } else {
6806 pmap_protect(old_map->pmap,
6807 old_entry->vme_start,
6808 old_entry->vme_end,
6809 prot);
6810 }
6811 }
6812
6813 old_entry->needs_copy = FALSE;
6814 object = old_entry->object.vm_object;
6815 }
6816
6817 /*
6818 * If object was using a symmetric copy strategy,
6819 * change its copy strategy to the default
6820 * asymmetric copy strategy, which is copy_delay
6821 * in the non-norma case and copy_call in the
6822 * norma case. Bump the reference count for the
6823 * new entry.
6824 */
6825
6826 if(old_entry->is_sub_map) {
6827 vm_map_lock(old_entry->object.sub_map);
6828 vm_map_reference(old_entry->object.sub_map);
6829 vm_map_unlock(old_entry->object.sub_map);
6830 } else {
6831 vm_object_lock(object);
6832 object->ref_count++;
6833 vm_object_res_reference(object);
6834 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6835 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6836 }
6837 vm_object_unlock(object);
6838 }
6839
6840 /*
6841 * Clone the entry, using object ref from above.
6842 * Mark both entries as shared.
6843 */
6844
6845 new_entry = vm_map_entry_create(new_map);
6846 vm_map_entry_copy(new_entry, old_entry);
6847 old_entry->is_shared = TRUE;
6848 new_entry->is_shared = TRUE;
6849
6850 /*
6851 * Insert the entry into the new map -- we
6852 * know we're inserting at the end of the new
6853 * map.
6854 */
6855
6856 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
6857
6858 /*
6859 * Update the physical map
6860 */
6861
6862 if (old_entry->is_sub_map) {
6863 /* Bill Angell pmap support goes here */
6864 } else {
6865 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
6866 old_entry->vme_end - old_entry->vme_start,
6867 old_entry->vme_start);
6868 }
6869 }
6870
6871 static boolean_t
6872 vm_map_fork_copy(
6873 vm_map_t old_map,
6874 vm_map_entry_t *old_entry_p,
6875 vm_map_t new_map)
6876 {
6877 vm_map_entry_t old_entry = *old_entry_p;
6878 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
6879 vm_map_offset_t start = old_entry->vme_start;
6880 vm_map_copy_t copy;
6881 vm_map_entry_t last = vm_map_last_entry(new_map);
6882
6883 vm_map_unlock(old_map);
6884 /*
6885 * Use maxprot version of copyin because we
6886 * care about whether this memory can ever
6887 * be accessed, not just whether it's accessible
6888 * right now.
6889 */
6890 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
6891 != KERN_SUCCESS) {
6892 /*
6893 * The map might have changed while it
6894 * was unlocked, check it again. Skip
6895 * any blank space or permanently
6896 * unreadable region.
6897 */
6898 vm_map_lock(old_map);
6899 if (!vm_map_lookup_entry(old_map, start, &last) ||
6900 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
6901 last = last->vme_next;
6902 }
6903 *old_entry_p = last;
6904
6905 /*
6906 * XXX For some error returns, want to
6907 * XXX skip to the next element. Note
6908 * that INVALID_ADDRESS and
6909 * PROTECTION_FAILURE are handled above.
6910 */
6911
6912 return FALSE;
6913 }
6914
6915 /*
6916 * Insert the copy into the new map
6917 */
6918
6919 vm_map_copy_insert(new_map, last, copy);
6920
6921 /*
6922 * Pick up the traversal at the end of
6923 * the copied region.
6924 */
6925
6926 vm_map_lock(old_map);
6927 start += entry_size;
6928 if (! vm_map_lookup_entry(old_map, start, &last)) {
6929 last = last->vme_next;
6930 } else {
6931 vm_map_clip_start(old_map, last, start);
6932 }
6933 *old_entry_p = last;
6934
6935 return TRUE;
6936 }
6937
6938 /*
6939 * vm_map_fork:
6940 *
6941 * Create and return a new map based on the old
6942 * map, according to the inheritance values on the
6943 * regions in that map.
6944 *
6945 * The source map must not be locked.
6946 */
6947 vm_map_t
6948 vm_map_fork(
6949 vm_map_t old_map)
6950 {
6951 pmap_t new_pmap = pmap_create(
6952 (vm_map_size_t) 0,
6953 task_has_64BitAddr(current_task()));
6954 vm_map_t new_map;
6955 vm_map_entry_t old_entry;
6956 vm_map_size_t new_size = 0, entry_size;
6957 vm_map_entry_t new_entry;
6958 boolean_t src_needs_copy;
6959 boolean_t new_entry_needs_copy;
6960
6961 vm_map_reference_swap(old_map);
6962 vm_map_lock(old_map);
6963
6964 new_map = vm_map_create(new_pmap,
6965 old_map->min_offset,
6966 old_map->max_offset,
6967 old_map->hdr.entries_pageable);
6968
6969 for (
6970 old_entry = vm_map_first_entry(old_map);
6971 old_entry != vm_map_to_entry(old_map);
6972 ) {
6973
6974 entry_size = old_entry->vme_end - old_entry->vme_start;
6975
6976 switch (old_entry->inheritance) {
6977 case VM_INHERIT_NONE:
6978 break;
6979
6980 case VM_INHERIT_SHARE:
6981 vm_map_fork_share(old_map, old_entry, new_map);
6982 new_size += entry_size;
6983 break;
6984
6985 case VM_INHERIT_COPY:
6986
6987 /*
6988 * Inline the copy_quickly case;
6989 * upon failure, fall back on call
6990 * to vm_map_fork_copy.
6991 */
6992
6993 if(old_entry->is_sub_map)
6994 break;
6995 if ((old_entry->wired_count != 0) ||
6996 ((old_entry->object.vm_object != NULL) &&
6997 (old_entry->object.vm_object->true_share))) {
6998 goto slow_vm_map_fork_copy;
6999 }
7000
7001 new_entry = vm_map_entry_create(new_map);
7002 vm_map_entry_copy(new_entry, old_entry);
7003 /* clear address space specifics */
7004 new_entry->use_pmap = FALSE;
7005
7006 if (! vm_object_copy_quickly(
7007 &new_entry->object.vm_object,
7008 old_entry->offset,
7009 (old_entry->vme_end -
7010 old_entry->vme_start),
7011 &src_needs_copy,
7012 &new_entry_needs_copy)) {
7013 vm_map_entry_dispose(new_map, new_entry);
7014 goto slow_vm_map_fork_copy;
7015 }
7016
7017 /*
7018 * Handle copy-on-write obligations
7019 */
7020
7021 if (src_needs_copy && !old_entry->needs_copy) {
7022 vm_prot_t prot;
7023
7024 prot = old_entry->protection & ~VM_PROT_WRITE;
7025 #ifdef STACK_ONLY_NX
7026 if (old_entry->alias != VM_MEMORY_STACK && prot)
7027 prot |= VM_PROT_EXECUTE;
7028 #endif
7029 vm_object_pmap_protect(
7030 old_entry->object.vm_object,
7031 old_entry->offset,
7032 (old_entry->vme_end -
7033 old_entry->vme_start),
7034 ((old_entry->is_shared
7035 || old_map->mapped)
7036 ? PMAP_NULL :
7037 old_map->pmap),
7038 old_entry->vme_start,
7039 prot);
7040
7041 old_entry->needs_copy = TRUE;
7042 }
7043 new_entry->needs_copy = new_entry_needs_copy;
7044
7045 /*
7046 * Insert the entry at the end
7047 * of the map.
7048 */
7049
7050 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7051 new_entry);
7052 new_size += entry_size;
7053 break;
7054
7055 slow_vm_map_fork_copy:
7056 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7057 new_size += entry_size;
7058 }
7059 continue;
7060 }
7061 old_entry = old_entry->vme_next;
7062 }
7063
7064 new_map->size = new_size;
7065 vm_map_unlock(old_map);
7066 vm_map_deallocate(old_map);
7067
7068 return(new_map);
7069 }
7070
7071
7072 /*
7073 * vm_map_lookup_locked:
7074 *
7075 * Finds the VM object, offset, and
7076 * protection for a given virtual address in the
7077 * specified map, assuming a page fault of the
7078 * type specified.
7079 *
7080 * Returns the (object, offset, protection) for
7081 * this address, whether it is wired down, and whether
7082 * this map has the only reference to the data in question.
7083 * In order to later verify this lookup, a "version"
7084 * is returned.
7085 *
7086 * The map MUST be locked by the caller and WILL be
7087 * locked on exit. In order to guarantee the
7088 * existence of the returned object, it is returned
7089 * locked.
7090 *
7091 * If a lookup is requested with "write protection"
7092 * specified, the map may be changed to perform virtual
7093 * copying operations, although the data referenced will
7094 * remain the same.
7095 */
7096 kern_return_t
7097 vm_map_lookup_locked(
7098 vm_map_t *var_map, /* IN/OUT */
7099 vm_map_offset_t vaddr,
7100 vm_prot_t fault_type,
7101 vm_map_version_t *out_version, /* OUT */
7102 vm_object_t *object, /* OUT */
7103 vm_object_offset_t *offset, /* OUT */
7104 vm_prot_t *out_prot, /* OUT */
7105 boolean_t *wired, /* OUT */
7106 int *behavior, /* OUT */
7107 vm_map_offset_t *lo_offset, /* OUT */
7108 vm_map_offset_t *hi_offset, /* OUT */
7109 vm_map_t *real_map)
7110 {
7111 vm_map_entry_t entry;
7112 register vm_map_t map = *var_map;
7113 vm_map_t old_map = *var_map;
7114 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
7115 vm_map_offset_t cow_parent_vaddr = 0;
7116 vm_map_offset_t old_start = 0;
7117 vm_map_offset_t old_end = 0;
7118 register vm_prot_t prot;
7119
7120 *real_map = map;
7121 RetryLookup: ;
7122
7123 /*
7124 * If the map has an interesting hint, try it before calling
7125 * full blown lookup routine.
7126 */
7127 entry = map->hint;
7128
7129 if ((entry == vm_map_to_entry(map)) ||
7130 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7131 vm_map_entry_t tmp_entry;
7132
7133 /*
7134 * Entry was either not a valid hint, or the vaddr
7135 * was not contained in the entry, so do a full lookup.
7136 */
7137 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7138 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7139 vm_map_unlock(cow_sub_map_parent);
7140 if((*real_map != map)
7141 && (*real_map != cow_sub_map_parent))
7142 vm_map_unlock(*real_map);
7143 return KERN_INVALID_ADDRESS;
7144 }
7145
7146 entry = tmp_entry;
7147 }
7148 if(map == old_map) {
7149 old_start = entry->vme_start;
7150 old_end = entry->vme_end;
7151 }
7152
7153 /*
7154 * Handle submaps. Drop lock on upper map, submap is
7155 * returned locked.
7156 */
7157
7158 submap_recurse:
7159 if (entry->is_sub_map) {
7160 vm_map_offset_t local_vaddr;
7161 vm_map_offset_t end_delta;
7162 vm_map_offset_t start_delta;
7163 vm_map_entry_t submap_entry;
7164 boolean_t mapped_needs_copy=FALSE;
7165
7166 local_vaddr = vaddr;
7167
7168 if ((!entry->needs_copy) && (entry->use_pmap)) {
7169 /* if real_map equals map we unlock below */
7170 if ((*real_map != map) &&
7171 (*real_map != cow_sub_map_parent))
7172 vm_map_unlock(*real_map);
7173 *real_map = entry->object.sub_map;
7174 }
7175
7176 if(entry->needs_copy) {
7177 if (!mapped_needs_copy) {
7178 if (vm_map_lock_read_to_write(map)) {
7179 vm_map_lock_read(map);
7180 if(*real_map == entry->object.sub_map)
7181 *real_map = map;
7182 goto RetryLookup;
7183 }
7184 vm_map_lock_read(entry->object.sub_map);
7185 cow_sub_map_parent = map;
7186 /* reset base to map before cow object */
7187 /* this is the map which will accept */
7188 /* the new cow object */
7189 old_start = entry->vme_start;
7190 old_end = entry->vme_end;
7191 cow_parent_vaddr = vaddr;
7192 mapped_needs_copy = TRUE;
7193 } else {
7194 vm_map_lock_read(entry->object.sub_map);
7195 if((cow_sub_map_parent != map) &&
7196 (*real_map != map))
7197 vm_map_unlock(map);
7198 }
7199 } else {
7200 vm_map_lock_read(entry->object.sub_map);
7201 /* leave map locked if it is a target */
7202 /* cow sub_map above otherwise, just */
7203 /* follow the maps down to the object */
7204 /* here we unlock knowing we are not */
7205 /* revisiting the map. */
7206 if((*real_map != map) && (map != cow_sub_map_parent))
7207 vm_map_unlock_read(map);
7208 }
7209
7210 *var_map = map = entry->object.sub_map;
7211
7212 /* calculate the offset in the submap for vaddr */
7213 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
7214
7215 RetrySubMap:
7216 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
7217 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
7218 vm_map_unlock(cow_sub_map_parent);
7219 }
7220 if((*real_map != map)
7221 && (*real_map != cow_sub_map_parent)) {
7222 vm_map_unlock(*real_map);
7223 }
7224 *real_map = map;
7225 return KERN_INVALID_ADDRESS;
7226 }
7227 /* find the attenuated shadow of the underlying object */
7228 /* on our target map */
7229
7230 /* in english the submap object may extend beyond the */
7231 /* region mapped by the entry or, may only fill a portion */
7232 /* of it. For our purposes, we only care if the object */
7233 /* doesn't fill. In this case the area which will */
7234 /* ultimately be clipped in the top map will only need */
7235 /* to be as big as the portion of the underlying entry */
7236 /* which is mapped */
7237 start_delta = submap_entry->vme_start > entry->offset ?
7238 submap_entry->vme_start - entry->offset : 0;
7239
7240 end_delta =
7241 (entry->offset + start_delta + (old_end - old_start)) <=
7242 submap_entry->vme_end ?
7243 0 : (entry->offset +
7244 (old_end - old_start))
7245 - submap_entry->vme_end;
7246
7247 old_start += start_delta;
7248 old_end -= end_delta;
7249
7250 if(submap_entry->is_sub_map) {
7251 entry = submap_entry;
7252 vaddr = local_vaddr;
7253 goto submap_recurse;
7254 }
7255
7256 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
7257
7258 vm_object_t copy_object;
7259 vm_map_offset_t local_start;
7260 vm_map_offset_t local_end;
7261 boolean_t copied_slowly = FALSE;
7262
7263 if (vm_map_lock_read_to_write(map)) {
7264 vm_map_lock_read(map);
7265 old_start -= start_delta;
7266 old_end += end_delta;
7267 goto RetrySubMap;
7268 }
7269
7270
7271 if (submap_entry->object.vm_object == VM_OBJECT_NULL) {
7272 submap_entry->object.vm_object =
7273 vm_object_allocate(
7274 (vm_map_size_t)
7275 (submap_entry->vme_end
7276 - submap_entry->vme_start));
7277 submap_entry->offset = 0;
7278 }
7279 local_start = local_vaddr -
7280 (cow_parent_vaddr - old_start);
7281 local_end = local_vaddr +
7282 (old_end - cow_parent_vaddr);
7283 vm_map_clip_start(map, submap_entry, local_start);
7284 vm_map_clip_end(map, submap_entry, local_end);
7285
7286 /* This is the COW case, lets connect */
7287 /* an entry in our space to the underlying */
7288 /* object in the submap, bypassing the */
7289 /* submap. */
7290
7291
7292 if(submap_entry->wired_count != 0) {
7293 vm_object_lock(
7294 submap_entry->object.vm_object);
7295 vm_object_copy_slowly(
7296 submap_entry->object.vm_object,
7297 submap_entry->offset,
7298 submap_entry->vme_end -
7299 submap_entry->vme_start,
7300 FALSE,
7301 &copy_object);
7302 copied_slowly = TRUE;
7303 } else {
7304 /* set up shadow object */
7305 copy_object = submap_entry->object.vm_object;
7306 vm_object_reference(copy_object);
7307 submap_entry->object.vm_object->shadowed = TRUE;
7308 submap_entry->needs_copy = TRUE;
7309
7310 prot = submap_entry->protection & ~VM_PROT_WRITE;
7311 #ifdef STACK_ONLY_NX
7312 if (submap_entry->alias != VM_MEMORY_STACK && prot)
7313 prot |= VM_PROT_EXECUTE;
7314 #endif
7315 vm_object_pmap_protect(
7316 submap_entry->object.vm_object,
7317 submap_entry->offset,
7318 submap_entry->vme_end -
7319 submap_entry->vme_start,
7320 (submap_entry->is_shared
7321 || map->mapped) ?
7322 PMAP_NULL : map->pmap,
7323 submap_entry->vme_start,
7324 prot);
7325 }
7326
7327
7328 /* This works diffently than the */
7329 /* normal submap case. We go back */
7330 /* to the parent of the cow map and*/
7331 /* clip out the target portion of */
7332 /* the sub_map, substituting the */
7333 /* new copy object, */
7334
7335 vm_map_unlock(map);
7336 local_start = old_start;
7337 local_end = old_end;
7338 map = cow_sub_map_parent;
7339 *var_map = cow_sub_map_parent;
7340 vaddr = cow_parent_vaddr;
7341 cow_sub_map_parent = NULL;
7342
7343 if(!vm_map_lookup_entry(map,
7344 vaddr, &entry)) {
7345 vm_object_deallocate(
7346 copy_object);
7347 vm_map_lock_write_to_read(map);
7348 return KERN_INVALID_ADDRESS;
7349 }
7350
7351 /* clip out the portion of space */
7352 /* mapped by the sub map which */
7353 /* corresponds to the underlying */
7354 /* object */
7355 vm_map_clip_start(map, entry, local_start);
7356 vm_map_clip_end(map, entry, local_end);
7357
7358
7359 /* substitute copy object for */
7360 /* shared map entry */
7361 vm_map_deallocate(entry->object.sub_map);
7362 entry->is_sub_map = FALSE;
7363 entry->object.vm_object = copy_object;
7364
7365 entry->protection |= VM_PROT_WRITE;
7366 entry->max_protection |= VM_PROT_WRITE;
7367 if(copied_slowly) {
7368 entry->offset = 0;
7369 entry->needs_copy = FALSE;
7370 entry->is_shared = FALSE;
7371 } else {
7372 entry->offset = submap_entry->offset;
7373 entry->needs_copy = TRUE;
7374 if(entry->inheritance == VM_INHERIT_SHARE)
7375 entry->inheritance = VM_INHERIT_COPY;
7376 if (map != old_map)
7377 entry->is_shared = TRUE;
7378 }
7379 if(entry->inheritance == VM_INHERIT_SHARE)
7380 entry->inheritance = VM_INHERIT_COPY;
7381
7382 vm_map_lock_write_to_read(map);
7383 } else {
7384 if((cow_sub_map_parent)
7385 && (cow_sub_map_parent != *real_map)
7386 && (cow_sub_map_parent != map)) {
7387 vm_map_unlock(cow_sub_map_parent);
7388 }
7389 entry = submap_entry;
7390 vaddr = local_vaddr;
7391 }
7392 }
7393
7394 /*
7395 * Check whether this task is allowed to have
7396 * this page.
7397 */
7398 prot = entry->protection;
7399
7400 #ifdef STACK_ONLY_NX
7401 if (entry->alias != VM_MEMORY_STACK && prot)
7402 /*
7403 * HACK -- if not a stack, than allow execution
7404 */
7405 prot |= VM_PROT_EXECUTE;
7406 #endif
7407 if ((fault_type & (prot)) != fault_type) {
7408 if (*real_map != map) {
7409 vm_map_unlock(*real_map);
7410 }
7411 *real_map = map;
7412
7413 if ((fault_type & VM_PROT_EXECUTE) && prot)
7414 log_nx_failure((addr64_t)vaddr, prot);
7415
7416 return KERN_PROTECTION_FAILURE;
7417 }
7418
7419 /*
7420 * If this page is not pageable, we have to get
7421 * it for all possible accesses.
7422 */
7423
7424 *wired = (entry->wired_count != 0);
7425 if (*wired)
7426 fault_type = prot;
7427
7428 /*
7429 * If the entry was copy-on-write, we either ...
7430 */
7431
7432 if (entry->needs_copy) {
7433 /*
7434 * If we want to write the page, we may as well
7435 * handle that now since we've got the map locked.
7436 *
7437 * If we don't need to write the page, we just
7438 * demote the permissions allowed.
7439 */
7440
7441 if ((fault_type & VM_PROT_WRITE) || *wired) {
7442 /*
7443 * Make a new object, and place it in the
7444 * object chain. Note that no new references
7445 * have appeared -- one just moved from the
7446 * map to the new object.
7447 */
7448
7449 if (vm_map_lock_read_to_write(map)) {
7450 vm_map_lock_read(map);
7451 goto RetryLookup;
7452 }
7453 vm_object_shadow(&entry->object.vm_object,
7454 &entry->offset,
7455 (vm_map_size_t) (entry->vme_end -
7456 entry->vme_start));
7457
7458 entry->object.vm_object->shadowed = TRUE;
7459 entry->needs_copy = FALSE;
7460 vm_map_lock_write_to_read(map);
7461 }
7462 else {
7463 /*
7464 * We're attempting to read a copy-on-write
7465 * page -- don't allow writes.
7466 */
7467
7468 prot &= (~VM_PROT_WRITE);
7469 }
7470 }
7471
7472 /*
7473 * Create an object if necessary.
7474 */
7475 if (entry->object.vm_object == VM_OBJECT_NULL) {
7476
7477 if (vm_map_lock_read_to_write(map)) {
7478 vm_map_lock_read(map);
7479 goto RetryLookup;
7480 }
7481
7482 entry->object.vm_object = vm_object_allocate(
7483 (vm_map_size_t)(entry->vme_end - entry->vme_start));
7484 entry->offset = 0;
7485 vm_map_lock_write_to_read(map);
7486 }
7487
7488 /*
7489 * Return the object/offset from this entry. If the entry
7490 * was copy-on-write or empty, it has been fixed up. Also
7491 * return the protection.
7492 */
7493
7494 *offset = (vaddr - entry->vme_start) + entry->offset;
7495 *object = entry->object.vm_object;
7496 *out_prot = prot;
7497 *behavior = entry->behavior;
7498 *lo_offset = entry->offset;
7499 *hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
7500
7501 /*
7502 * Lock the object to prevent it from disappearing
7503 */
7504
7505 vm_object_lock(*object);
7506
7507 /*
7508 * Save the version number
7509 */
7510
7511 out_version->main_timestamp = map->timestamp;
7512
7513 return KERN_SUCCESS;
7514 }
7515
7516
7517 /*
7518 * vm_map_verify:
7519 *
7520 * Verifies that the map in question has not changed
7521 * since the given version. If successful, the map
7522 * will not change until vm_map_verify_done() is called.
7523 */
7524 boolean_t
7525 vm_map_verify(
7526 register vm_map_t map,
7527 register vm_map_version_t *version) /* REF */
7528 {
7529 boolean_t result;
7530
7531 vm_map_lock_read(map);
7532 result = (map->timestamp == version->main_timestamp);
7533
7534 if (!result)
7535 vm_map_unlock_read(map);
7536
7537 return(result);
7538 }
7539
7540 /*
7541 * vm_map_verify_done:
7542 *
7543 * Releases locks acquired by a vm_map_verify.
7544 *
7545 * This is now a macro in vm/vm_map.h. It does a
7546 * vm_map_unlock_read on the map.
7547 */
7548
7549
7550 /*
7551 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
7552 * Goes away after regular vm_region_recurse function migrates to
7553 * 64 bits
7554 * vm_region_recurse: A form of vm_region which follows the
7555 * submaps in a target map
7556 *
7557 */
7558
7559 kern_return_t
7560 vm_map_region_recurse_64(
7561 vm_map_t map,
7562 vm_map_offset_t *address, /* IN/OUT */
7563 vm_map_size_t *size, /* OUT */
7564 natural_t *nesting_depth, /* IN/OUT */
7565 vm_region_submap_info_64_t submap_info, /* IN/OUT */
7566 mach_msg_type_number_t *count) /* IN/OUT */
7567 {
7568 vm_region_extended_info_data_t extended;
7569 vm_map_entry_t tmp_entry;
7570 vm_map_offset_t user_address;
7571 unsigned int user_max_depth;
7572
7573 /*
7574 * "curr_entry" is the VM map entry preceding or including the
7575 * address we're looking for.
7576 * "curr_map" is the map or sub-map containing "curr_entry".
7577 * "curr_offset" is the cumulated offset of "curr_map" in the
7578 * target task's address space.
7579 * "curr_depth" is the depth of "curr_map" in the chain of
7580 * sub-maps.
7581 * "curr_max_offset" is the maximum offset we should take into
7582 * account in the current map. It may be smaller than the current
7583 * map's "max_offset" because we might not have mapped it all in
7584 * the upper level map.
7585 */
7586 vm_map_entry_t curr_entry;
7587 vm_map_offset_t curr_offset;
7588 vm_map_t curr_map;
7589 unsigned int curr_depth;
7590 vm_map_offset_t curr_max_offset;
7591
7592 /*
7593 * "next_" is the same as "curr_" but for the VM region immediately
7594 * after the address we're looking for. We need to keep track of this
7595 * too because we want to return info about that region if the
7596 * address we're looking for is not mapped.
7597 */
7598 vm_map_entry_t next_entry;
7599 vm_map_offset_t next_offset;
7600 vm_map_t next_map;
7601 unsigned int next_depth;
7602 vm_map_offset_t next_max_offset;
7603
7604 if (map == VM_MAP_NULL) {
7605 /* no address space to work on */
7606 return KERN_INVALID_ARGUMENT;
7607 }
7608
7609 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
7610 /* "info" structure is not big enough and would overflow */
7611 return KERN_INVALID_ARGUMENT;
7612 }
7613
7614 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
7615
7616 user_address = *address;
7617 user_max_depth = *nesting_depth;
7618
7619 curr_entry = NULL;
7620 curr_map = map;
7621 curr_offset = 0;
7622 curr_depth = 0;
7623 curr_max_offset = curr_map->max_offset;
7624
7625 next_entry = NULL;
7626 next_map = NULL;
7627 next_offset = 0;
7628 next_depth = 0;
7629 next_max_offset = curr_max_offset;
7630
7631 if (not_in_kdp) {
7632 vm_map_lock_read(curr_map);
7633 }
7634
7635 for (;;) {
7636 if (vm_map_lookup_entry(curr_map,
7637 user_address - curr_offset,
7638 &tmp_entry)) {
7639 /* tmp_entry contains the address we're looking for */
7640 curr_entry = tmp_entry;
7641 } else {
7642 /*
7643 * The address is not mapped. "tmp_entry" is the
7644 * map entry preceding the address. We want the next
7645 * one, if it exists.
7646 */
7647 curr_entry = tmp_entry->vme_next;
7648 if (curr_entry == vm_map_to_entry(curr_map) ||
7649 curr_entry->vme_start >= curr_max_offset) {
7650 /* no next entry at this level: stop looking */
7651 if (not_in_kdp) {
7652 vm_map_unlock_read(curr_map);
7653 }
7654 curr_entry = NULL;
7655 curr_map = NULL;
7656 curr_offset = 0;
7657 curr_depth = 0;
7658 curr_max_offset = 0;
7659 break;
7660 }
7661 }
7662
7663 /*
7664 * Is the next entry at this level closer to the address (or
7665 * deeper in the submap chain) than the one we had
7666 * so far ?
7667 */
7668 tmp_entry = curr_entry->vme_next;
7669 if (tmp_entry == vm_map_to_entry(curr_map)) {
7670 /* no next entry at this level */
7671 } else if (tmp_entry->vme_start >= curr_max_offset) {
7672 /*
7673 * tmp_entry is beyond the scope of what we mapped of
7674 * this submap in the upper level: ignore it.
7675 */
7676 } else if ((next_entry == NULL) ||
7677 (tmp_entry->vme_start + curr_offset <=
7678 next_entry->vme_start + next_offset)) {
7679 /*
7680 * We didn't have a "next_entry" or this one is
7681 * closer to the address we're looking for:
7682 * use this "tmp_entry" as the new "next_entry".
7683 */
7684 if (next_entry != NULL) {
7685 /* unlock the last "next_map" */
7686 if (next_map != curr_map && not_in_kdp) {
7687 vm_map_unlock_read(next_map);
7688 }
7689 }
7690 next_entry = tmp_entry;
7691 next_map = curr_map;
7692 next_offset = curr_offset;
7693 next_depth = curr_depth;
7694 next_max_offset = curr_max_offset;
7695 }
7696
7697 if (!curr_entry->is_sub_map ||
7698 curr_depth >= user_max_depth) {
7699 /*
7700 * We hit a leaf map or we reached the maximum depth
7701 * we could, so stop looking. Keep the current map
7702 * locked.
7703 */
7704 break;
7705 }
7706
7707 /*
7708 * Get down to the next submap level.
7709 */
7710
7711 /*
7712 * Lock the next level and unlock the current level,
7713 * unless we need to keep it locked to access the "next_entry"
7714 * later.
7715 */
7716 if (not_in_kdp) {
7717 vm_map_lock_read(curr_entry->object.sub_map);
7718 }
7719 if (curr_map == next_map) {
7720 /* keep "next_map" locked in case we need it */
7721 } else {
7722 /* release this map */
7723 vm_map_unlock_read(curr_map);
7724 }
7725
7726 /*
7727 * Adjust the offset. "curr_entry" maps the submap
7728 * at relative address "curr_entry->vme_start" in the
7729 * curr_map but skips the first "curr_entry->offset"
7730 * bytes of the submap.
7731 * "curr_offset" always represents the offset of a virtual
7732 * address in the curr_map relative to the absolute address
7733 * space (i.e. the top-level VM map).
7734 */
7735 curr_offset +=
7736 (curr_entry->vme_start - curr_entry->offset);
7737 /* switch to the submap */
7738 curr_map = curr_entry->object.sub_map;
7739 curr_depth++;
7740 /*
7741 * "curr_max_offset" allows us to keep track of the
7742 * portion of the submap that is actually mapped at this level:
7743 * the rest of that submap is irrelevant to us, since it's not
7744 * mapped here.
7745 * The relevant portion of the map starts at
7746 * "curr_entry->offset" up to the size of "curr_entry".
7747 */
7748 curr_max_offset =
7749 curr_entry->vme_end - curr_entry->vme_start +
7750 curr_entry->offset;
7751 curr_entry = NULL;
7752 }
7753
7754 if (curr_entry == NULL) {
7755 /* no VM region contains the address... */
7756 if (next_entry == NULL) {
7757 /* ... and no VM region follows it either */
7758 return KERN_INVALID_ADDRESS;
7759 }
7760 /* ... gather info about the next VM region */
7761 curr_entry = next_entry;
7762 curr_map = next_map; /* still locked ... */
7763 curr_offset = next_offset;
7764 curr_depth = next_depth;
7765 curr_max_offset = next_max_offset;
7766 } else {
7767 /* we won't need "next_entry" after all */
7768 if (next_entry != NULL) {
7769 /* release "next_map" */
7770 if (next_map != curr_map && not_in_kdp) {
7771 vm_map_unlock_read(next_map);
7772 }
7773 }
7774 }
7775 next_entry = NULL;
7776 next_map = NULL;
7777 next_offset = 0;
7778 next_depth = 0;
7779 next_max_offset = 0;
7780
7781 *nesting_depth = curr_depth;
7782 *size = curr_entry->vme_end - curr_entry->vme_start;
7783 *address = curr_entry->vme_start + curr_offset;
7784
7785 submap_info->user_tag = curr_entry->alias;
7786 submap_info->offset = curr_entry->offset;
7787 submap_info->protection = curr_entry->protection;
7788 submap_info->inheritance = curr_entry->inheritance;
7789 submap_info->max_protection = curr_entry->max_protection;
7790 submap_info->behavior = curr_entry->behavior;
7791 submap_info->user_wired_count = curr_entry->user_wired_count;
7792 submap_info->is_submap = curr_entry->is_sub_map;
7793 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
7794
7795 extended.pages_resident = 0;
7796 extended.pages_swapped_out = 0;
7797 extended.pages_shared_now_private = 0;
7798 extended.pages_dirtied = 0;
7799 extended.external_pager = 0;
7800 extended.shadow_depth = 0;
7801
7802 if (not_in_kdp) {
7803 if (!curr_entry->is_sub_map) {
7804 vm_map_region_walk(curr_map,
7805 curr_entry->vme_start,
7806 curr_entry,
7807 curr_entry->offset,
7808 (curr_entry->vme_end -
7809 curr_entry->vme_start),
7810 &extended);
7811 submap_info->share_mode = extended.share_mode;
7812 if (extended.external_pager &&
7813 extended.ref_count == 2 &&
7814 extended.share_mode == SM_SHARED) {
7815 submap_info->share_mode = SM_PRIVATE;
7816 }
7817 submap_info->ref_count = extended.ref_count;
7818 } else {
7819 if (curr_entry->use_pmap) {
7820 submap_info->share_mode = SM_TRUESHARED;
7821 } else {
7822 submap_info->share_mode = SM_PRIVATE;
7823 }
7824 submap_info->ref_count =
7825 curr_entry->object.sub_map->ref_count;
7826 }
7827 }
7828
7829 submap_info->pages_resident = extended.pages_resident;
7830 submap_info->pages_swapped_out = extended.pages_swapped_out;
7831 submap_info->pages_shared_now_private =
7832 extended.pages_shared_now_private;
7833 submap_info->pages_dirtied = extended.pages_dirtied;
7834 submap_info->external_pager = extended.external_pager;
7835 submap_info->shadow_depth = extended.shadow_depth;
7836
7837 if (not_in_kdp) {
7838 vm_map_unlock_read(curr_map);
7839 }
7840
7841 return KERN_SUCCESS;
7842 }
7843
7844 /*
7845 * vm_region:
7846 *
7847 * User call to obtain information about a region in
7848 * a task's address map. Currently, only one flavor is
7849 * supported.
7850 *
7851 * XXX The reserved and behavior fields cannot be filled
7852 * in until the vm merge from the IK is completed, and
7853 * vm_reserve is implemented.
7854 */
7855
7856 kern_return_t
7857 vm_map_region(
7858 vm_map_t map,
7859 vm_map_offset_t *address, /* IN/OUT */
7860 vm_map_size_t *size, /* OUT */
7861 vm_region_flavor_t flavor, /* IN */
7862 vm_region_info_t info, /* OUT */
7863 mach_msg_type_number_t *count, /* IN/OUT */
7864 mach_port_t *object_name) /* OUT */
7865 {
7866 vm_map_entry_t tmp_entry;
7867 vm_map_entry_t entry;
7868 vm_map_offset_t start;
7869
7870 if (map == VM_MAP_NULL)
7871 return(KERN_INVALID_ARGUMENT);
7872
7873 switch (flavor) {
7874
7875 case VM_REGION_BASIC_INFO:
7876 /* legacy for old 32-bit objects info */
7877 {
7878 vm_region_basic_info_t basic;
7879
7880 if (*count < VM_REGION_BASIC_INFO_COUNT)
7881 return(KERN_INVALID_ARGUMENT);
7882
7883 basic = (vm_region_basic_info_t) info;
7884 *count = VM_REGION_BASIC_INFO_COUNT;
7885
7886 vm_map_lock_read(map);
7887
7888 start = *address;
7889 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7890 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7891 vm_map_unlock_read(map);
7892 return(KERN_INVALID_ADDRESS);
7893 }
7894 } else {
7895 entry = tmp_entry;
7896 }
7897
7898 start = entry->vme_start;
7899
7900 basic->offset = (uint32_t)entry->offset;
7901 basic->protection = entry->protection;
7902 basic->inheritance = entry->inheritance;
7903 basic->max_protection = entry->max_protection;
7904 basic->behavior = entry->behavior;
7905 basic->user_wired_count = entry->user_wired_count;
7906 basic->reserved = entry->is_sub_map;
7907 *address = start;
7908 *size = (entry->vme_end - start);
7909
7910 if (object_name) *object_name = IP_NULL;
7911 if (entry->is_sub_map) {
7912 basic->shared = FALSE;
7913 } else {
7914 basic->shared = entry->is_shared;
7915 }
7916
7917 vm_map_unlock_read(map);
7918 return(KERN_SUCCESS);
7919 }
7920
7921 case VM_REGION_BASIC_INFO_64:
7922 {
7923 vm_region_basic_info_64_t basic;
7924
7925 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
7926 return(KERN_INVALID_ARGUMENT);
7927
7928 basic = (vm_region_basic_info_64_t) info;
7929 *count = VM_REGION_BASIC_INFO_COUNT_64;
7930
7931 vm_map_lock_read(map);
7932
7933 start = *address;
7934 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7935 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7936 vm_map_unlock_read(map);
7937 return(KERN_INVALID_ADDRESS);
7938 }
7939 } else {
7940 entry = tmp_entry;
7941 }
7942
7943 start = entry->vme_start;
7944
7945 basic->offset = entry->offset;
7946 basic->protection = entry->protection;
7947 basic->inheritance = entry->inheritance;
7948 basic->max_protection = entry->max_protection;
7949 basic->behavior = entry->behavior;
7950 basic->user_wired_count = entry->user_wired_count;
7951 basic->reserved = entry->is_sub_map;
7952 *address = start;
7953 *size = (entry->vme_end - start);
7954
7955 if (object_name) *object_name = IP_NULL;
7956 if (entry->is_sub_map) {
7957 basic->shared = FALSE;
7958 } else {
7959 basic->shared = entry->is_shared;
7960 }
7961
7962 vm_map_unlock_read(map);
7963 return(KERN_SUCCESS);
7964 }
7965 case VM_REGION_EXTENDED_INFO:
7966 {
7967 vm_region_extended_info_t extended;
7968
7969 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
7970 return(KERN_INVALID_ARGUMENT);
7971
7972 extended = (vm_region_extended_info_t) info;
7973 *count = VM_REGION_EXTENDED_INFO_COUNT;
7974
7975 vm_map_lock_read(map);
7976
7977 start = *address;
7978 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7979 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7980 vm_map_unlock_read(map);
7981 return(KERN_INVALID_ADDRESS);
7982 }
7983 } else {
7984 entry = tmp_entry;
7985 }
7986 start = entry->vme_start;
7987
7988 extended->protection = entry->protection;
7989 extended->user_tag = entry->alias;
7990 extended->pages_resident = 0;
7991 extended->pages_swapped_out = 0;
7992 extended->pages_shared_now_private = 0;
7993 extended->pages_dirtied = 0;
7994 extended->external_pager = 0;
7995 extended->shadow_depth = 0;
7996
7997 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended);
7998
7999 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8000 extended->share_mode = SM_PRIVATE;
8001
8002 if (object_name)
8003 *object_name = IP_NULL;
8004 *address = start;
8005 *size = (entry->vme_end - start);
8006
8007 vm_map_unlock_read(map);
8008 return(KERN_SUCCESS);
8009 }
8010 case VM_REGION_TOP_INFO:
8011 {
8012 vm_region_top_info_t top;
8013
8014 if (*count < VM_REGION_TOP_INFO_COUNT)
8015 return(KERN_INVALID_ARGUMENT);
8016
8017 top = (vm_region_top_info_t) info;
8018 *count = VM_REGION_TOP_INFO_COUNT;
8019
8020 vm_map_lock_read(map);
8021
8022 start = *address;
8023 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8024 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8025 vm_map_unlock_read(map);
8026 return(KERN_INVALID_ADDRESS);
8027 }
8028 } else {
8029 entry = tmp_entry;
8030
8031 }
8032 start = entry->vme_start;
8033
8034 top->private_pages_resident = 0;
8035 top->shared_pages_resident = 0;
8036
8037 vm_map_region_top_walk(entry, top);
8038
8039 if (object_name)
8040 *object_name = IP_NULL;
8041 *address = start;
8042 *size = (entry->vme_end - start);
8043
8044 vm_map_unlock_read(map);
8045 return(KERN_SUCCESS);
8046 }
8047 default:
8048 return(KERN_INVALID_ARGUMENT);
8049 }
8050 }
8051
8052 void
8053 vm_map_region_top_walk(
8054 vm_map_entry_t entry,
8055 vm_region_top_info_t top)
8056 {
8057 register struct vm_object *obj, *tmp_obj;
8058 register int ref_count;
8059
8060 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8061 top->share_mode = SM_EMPTY;
8062 top->ref_count = 0;
8063 top->obj_id = 0;
8064 return;
8065 }
8066 {
8067 obj = entry->object.vm_object;
8068
8069 vm_object_lock(obj);
8070
8071 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8072 ref_count--;
8073
8074 if (obj->shadow) {
8075 if (ref_count == 1)
8076 top->private_pages_resident = obj->resident_page_count;
8077 else
8078 top->shared_pages_resident = obj->resident_page_count;
8079 top->ref_count = ref_count;
8080 top->share_mode = SM_COW;
8081
8082 while ((tmp_obj = obj->shadow)) {
8083 vm_object_lock(tmp_obj);
8084 vm_object_unlock(obj);
8085 obj = tmp_obj;
8086
8087 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8088 ref_count--;
8089
8090 top->shared_pages_resident += obj->resident_page_count;
8091 top->ref_count += ref_count - 1;
8092 }
8093 } else {
8094 if (entry->needs_copy) {
8095 top->share_mode = SM_COW;
8096 top->shared_pages_resident = obj->resident_page_count;
8097 } else {
8098 if (ref_count == 1 ||
8099 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
8100 top->share_mode = SM_PRIVATE;
8101 top->private_pages_resident = obj->resident_page_count;
8102 } else {
8103 top->share_mode = SM_SHARED;
8104 top->shared_pages_resident = obj->resident_page_count;
8105 }
8106 }
8107 top->ref_count = ref_count;
8108 }
8109 top->obj_id = (int)obj;
8110
8111 vm_object_unlock(obj);
8112 }
8113 }
8114
8115 void
8116 vm_map_region_walk(
8117 vm_map_t map,
8118 vm_map_offset_t va,
8119 vm_map_entry_t entry,
8120 vm_object_offset_t offset,
8121 vm_object_size_t range,
8122 vm_region_extended_info_t extended)
8123 {
8124 register struct vm_object *obj, *tmp_obj;
8125 register vm_map_offset_t last_offset;
8126 register int i;
8127 register int ref_count;
8128 struct vm_object *shadow_object;
8129 int shadow_depth;
8130
8131 if ((entry->object.vm_object == 0) ||
8132 (entry->is_sub_map) ||
8133 (entry->object.vm_object->phys_contiguous)) {
8134 extended->share_mode = SM_EMPTY;
8135 extended->ref_count = 0;
8136 return;
8137 }
8138 {
8139 obj = entry->object.vm_object;
8140
8141 vm_object_lock(obj);
8142
8143 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8144 ref_count--;
8145
8146 for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE)
8147 vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended);
8148
8149 shadow_object = obj->shadow;
8150 shadow_depth = 0;
8151 if (shadow_object != VM_OBJECT_NULL) {
8152 vm_object_lock(shadow_object);
8153 for (;
8154 shadow_object != VM_OBJECT_NULL;
8155 shadow_depth++) {
8156 vm_object_t next_shadow;
8157
8158 next_shadow = shadow_object->shadow;
8159 if (next_shadow) {
8160 vm_object_lock(next_shadow);
8161 }
8162 vm_object_unlock(shadow_object);
8163 shadow_object = next_shadow;
8164 }
8165 }
8166 extended->shadow_depth = shadow_depth;
8167
8168 if (extended->shadow_depth || entry->needs_copy)
8169 extended->share_mode = SM_COW;
8170 else {
8171 if (ref_count == 1)
8172 extended->share_mode = SM_PRIVATE;
8173 else {
8174 if (obj->true_share)
8175 extended->share_mode = SM_TRUESHARED;
8176 else
8177 extended->share_mode = SM_SHARED;
8178 }
8179 }
8180 extended->ref_count = ref_count - extended->shadow_depth;
8181
8182 for (i = 0; i < extended->shadow_depth; i++) {
8183 if ((tmp_obj = obj->shadow) == 0)
8184 break;
8185 vm_object_lock(tmp_obj);
8186 vm_object_unlock(obj);
8187
8188 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
8189 ref_count--;
8190
8191 extended->ref_count += ref_count;
8192 obj = tmp_obj;
8193 }
8194 vm_object_unlock(obj);
8195
8196 if (extended->share_mode == SM_SHARED) {
8197 register vm_map_entry_t cur;
8198 register vm_map_entry_t last;
8199 int my_refs;
8200
8201 obj = entry->object.vm_object;
8202 last = vm_map_to_entry(map);
8203 my_refs = 0;
8204
8205 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8206 ref_count--;
8207 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
8208 my_refs += vm_map_region_count_obj_refs(cur, obj);
8209
8210 if (my_refs == ref_count)
8211 extended->share_mode = SM_PRIVATE_ALIASED;
8212 else if (my_refs > 1)
8213 extended->share_mode = SM_SHARED_ALIASED;
8214 }
8215 }
8216 }
8217
8218
8219 /* object is locked on entry and locked on return */
8220
8221
8222 static void
8223 vm_map_region_look_for_page(
8224 __unused vm_map_t map,
8225 __unused vm_map_offset_t va,
8226 vm_object_t object,
8227 vm_object_offset_t offset,
8228 int max_refcnt,
8229 int depth,
8230 vm_region_extended_info_t extended)
8231 {
8232 register vm_page_t p;
8233 register vm_object_t shadow;
8234 register int ref_count;
8235 vm_object_t caller_object;
8236
8237 shadow = object->shadow;
8238 caller_object = object;
8239
8240
8241 while (TRUE) {
8242
8243 if ( !(object->pager_trusted) && !(object->internal))
8244 extended->external_pager = 1;
8245
8246 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
8247 if (shadow && (max_refcnt == 1))
8248 extended->pages_shared_now_private++;
8249
8250 if (!p->fictitious &&
8251 (p->dirty || pmap_is_modified(p->phys_page)))
8252 extended->pages_dirtied++;
8253
8254 extended->pages_resident++;
8255
8256 if(object != caller_object)
8257 vm_object_unlock(object);
8258
8259 return;
8260 }
8261 if (object->existence_map) {
8262 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
8263
8264 extended->pages_swapped_out++;
8265
8266 if(object != caller_object)
8267 vm_object_unlock(object);
8268
8269 return;
8270 }
8271 }
8272 if (shadow) {
8273 vm_object_lock(shadow);
8274
8275 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
8276 ref_count--;
8277
8278 if (++depth > extended->shadow_depth)
8279 extended->shadow_depth = depth;
8280
8281 if (ref_count > max_refcnt)
8282 max_refcnt = ref_count;
8283
8284 if(object != caller_object)
8285 vm_object_unlock(object);
8286
8287 offset = offset + object->shadow_offset;
8288 object = shadow;
8289 shadow = object->shadow;
8290 continue;
8291 }
8292 if(object != caller_object)
8293 vm_object_unlock(object);
8294 break;
8295 }
8296 }
8297
8298 static int
8299 vm_map_region_count_obj_refs(
8300 vm_map_entry_t entry,
8301 vm_object_t object)
8302 {
8303 register int ref_count;
8304 register vm_object_t chk_obj;
8305 register vm_object_t tmp_obj;
8306
8307 if (entry->object.vm_object == 0)
8308 return(0);
8309
8310 if (entry->is_sub_map)
8311 return(0);
8312 else {
8313 ref_count = 0;
8314
8315 chk_obj = entry->object.vm_object;
8316 vm_object_lock(chk_obj);
8317
8318 while (chk_obj) {
8319 if (chk_obj == object)
8320 ref_count++;
8321 tmp_obj = chk_obj->shadow;
8322 if (tmp_obj)
8323 vm_object_lock(tmp_obj);
8324 vm_object_unlock(chk_obj);
8325
8326 chk_obj = tmp_obj;
8327 }
8328 }
8329 return(ref_count);
8330 }
8331
8332
8333 /*
8334 * Routine: vm_map_simplify
8335 *
8336 * Description:
8337 * Attempt to simplify the map representation in
8338 * the vicinity of the given starting address.
8339 * Note:
8340 * This routine is intended primarily to keep the
8341 * kernel maps more compact -- they generally don't
8342 * benefit from the "expand a map entry" technology
8343 * at allocation time because the adjacent entry
8344 * is often wired down.
8345 */
8346 void
8347 vm_map_simplify_entry(
8348 vm_map_t map,
8349 vm_map_entry_t this_entry)
8350 {
8351 vm_map_entry_t prev_entry;
8352
8353 counter(c_vm_map_simplify_entry_called++);
8354
8355 prev_entry = this_entry->vme_prev;
8356
8357 if ((this_entry != vm_map_to_entry(map)) &&
8358 (prev_entry != vm_map_to_entry(map)) &&
8359
8360 (prev_entry->vme_end == this_entry->vme_start) &&
8361
8362 (prev_entry->is_sub_map == FALSE) &&
8363 (this_entry->is_sub_map == FALSE) &&
8364
8365 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
8366 ((prev_entry->offset + (prev_entry->vme_end -
8367 prev_entry->vme_start))
8368 == this_entry->offset) &&
8369
8370 (prev_entry->inheritance == this_entry->inheritance) &&
8371 (prev_entry->protection == this_entry->protection) &&
8372 (prev_entry->max_protection == this_entry->max_protection) &&
8373 (prev_entry->behavior == this_entry->behavior) &&
8374 (prev_entry->alias == this_entry->alias) &&
8375 (prev_entry->wired_count == this_entry->wired_count) &&
8376 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
8377
8378 (prev_entry->needs_copy == this_entry->needs_copy) &&
8379
8380 (prev_entry->use_pmap == FALSE) &&
8381 (this_entry->use_pmap == FALSE) &&
8382 (prev_entry->in_transition == FALSE) &&
8383 (this_entry->in_transition == FALSE) &&
8384 (prev_entry->needs_wakeup == FALSE) &&
8385 (this_entry->needs_wakeup == FALSE) &&
8386 (prev_entry->is_shared == FALSE) &&
8387 (this_entry->is_shared == FALSE)
8388 ) {
8389 _vm_map_entry_unlink(&map->hdr, prev_entry);
8390 this_entry->vme_start = prev_entry->vme_start;
8391 this_entry->offset = prev_entry->offset;
8392 vm_object_deallocate(prev_entry->object.vm_object);
8393 vm_map_entry_dispose(map, prev_entry);
8394 SAVE_HINT_MAP_WRITE(map, this_entry);
8395 counter(c_vm_map_simplified++);
8396 }
8397 }
8398
8399 void
8400 vm_map_simplify(
8401 vm_map_t map,
8402 vm_map_offset_t start)
8403 {
8404 vm_map_entry_t this_entry;
8405
8406 vm_map_lock(map);
8407 if (vm_map_lookup_entry(map, start, &this_entry)) {
8408 vm_map_simplify_entry(map, this_entry);
8409 vm_map_simplify_entry(map, this_entry->vme_next);
8410 }
8411 counter(c_vm_map_simplify_called++);
8412 vm_map_unlock(map);
8413 }
8414
8415 static void
8416 vm_map_simplify_range(
8417 vm_map_t map,
8418 vm_map_offset_t start,
8419 vm_map_offset_t end)
8420 {
8421 vm_map_entry_t entry;
8422
8423 /*
8424 * The map should be locked (for "write") by the caller.
8425 */
8426
8427 if (start >= end) {
8428 /* invalid address range */
8429 return;
8430 }
8431
8432 if (!vm_map_lookup_entry(map, start, &entry)) {
8433 /* "start" is not mapped and "entry" ends before "start" */
8434 if (entry == vm_map_to_entry(map)) {
8435 /* start with first entry in the map */
8436 entry = vm_map_first_entry(map);
8437 } else {
8438 /* start with next entry */
8439 entry = entry->vme_next;
8440 }
8441 }
8442
8443 while (entry != vm_map_to_entry(map) &&
8444 entry->vme_start <= end) {
8445 /* try and coalesce "entry" with its previous entry */
8446 vm_map_simplify_entry(map, entry);
8447 entry = entry->vme_next;
8448 }
8449 }
8450
8451
8452 /*
8453 * Routine: vm_map_machine_attribute
8454 * Purpose:
8455 * Provide machine-specific attributes to mappings,
8456 * such as cachability etc. for machines that provide
8457 * them. NUMA architectures and machines with big/strange
8458 * caches will use this.
8459 * Note:
8460 * Responsibilities for locking and checking are handled here,
8461 * everything else in the pmap module. If any non-volatile
8462 * information must be kept, the pmap module should handle
8463 * it itself. [This assumes that attributes do not
8464 * need to be inherited, which seems ok to me]
8465 */
8466 kern_return_t
8467 vm_map_machine_attribute(
8468 vm_map_t map,
8469 vm_map_offset_t start,
8470 vm_map_offset_t end,
8471 vm_machine_attribute_t attribute,
8472 vm_machine_attribute_val_t* value) /* IN/OUT */
8473 {
8474 kern_return_t ret;
8475 vm_map_size_t sync_size;
8476 vm_map_entry_t entry;
8477
8478 if (start < vm_map_min(map) || end > vm_map_max(map))
8479 return KERN_INVALID_ADDRESS;
8480
8481 /* Figure how much memory we need to flush (in page increments) */
8482 sync_size = end - start;
8483
8484 vm_map_lock(map);
8485
8486 if (attribute != MATTR_CACHE) {
8487 /* If we don't have to find physical addresses, we */
8488 /* don't have to do an explicit traversal here. */
8489 ret = pmap_attribute(map->pmap, start, end-start,
8490 attribute, value);
8491 vm_map_unlock(map);
8492 return ret;
8493 }
8494
8495 ret = KERN_SUCCESS; /* Assume it all worked */
8496
8497 while(sync_size) {
8498 if (vm_map_lookup_entry(map, start, &entry)) {
8499 vm_map_size_t sub_size;
8500 if((entry->vme_end - start) > sync_size) {
8501 sub_size = sync_size;
8502 sync_size = 0;
8503 } else {
8504 sub_size = entry->vme_end - start;
8505 sync_size -= sub_size;
8506 }
8507 if(entry->is_sub_map) {
8508 vm_map_offset_t sub_start;
8509 vm_map_offset_t sub_end;
8510
8511 sub_start = (start - entry->vme_start)
8512 + entry->offset;
8513 sub_end = sub_start + sub_size;
8514 vm_map_machine_attribute(
8515 entry->object.sub_map,
8516 sub_start,
8517 sub_end,
8518 attribute, value);
8519 } else {
8520 if(entry->object.vm_object) {
8521 vm_page_t m;
8522 vm_object_t object;
8523 vm_object_t base_object;
8524 vm_object_t last_object;
8525 vm_object_offset_t offset;
8526 vm_object_offset_t base_offset;
8527 vm_map_size_t range;
8528 range = sub_size;
8529 offset = (start - entry->vme_start)
8530 + entry->offset;
8531 base_offset = offset;
8532 object = entry->object.vm_object;
8533 base_object = object;
8534 last_object = NULL;
8535
8536 vm_object_lock(object);
8537
8538 while (range) {
8539 m = vm_page_lookup(
8540 object, offset);
8541
8542 if (m && !m->fictitious) {
8543 ret =
8544 pmap_attribute_cache_sync(
8545 m->phys_page,
8546 PAGE_SIZE,
8547 attribute, value);
8548
8549 } else if (object->shadow) {
8550 offset = offset + object->shadow_offset;
8551 last_object = object;
8552 object = object->shadow;
8553 vm_object_lock(last_object->shadow);
8554 vm_object_unlock(last_object);
8555 continue;
8556 }
8557 range -= PAGE_SIZE;
8558
8559 if (base_object != object) {
8560 vm_object_unlock(object);
8561 vm_object_lock(base_object);
8562 object = base_object;
8563 }
8564 /* Bump to the next page */
8565 base_offset += PAGE_SIZE;
8566 offset = base_offset;
8567 }
8568 vm_object_unlock(object);
8569 }
8570 }
8571 start += sub_size;
8572 } else {
8573 vm_map_unlock(map);
8574 return KERN_FAILURE;
8575 }
8576
8577 }
8578
8579 vm_map_unlock(map);
8580
8581 return ret;
8582 }
8583
8584 /*
8585 * vm_map_behavior_set:
8586 *
8587 * Sets the paging reference behavior of the specified address
8588 * range in the target map. Paging reference behavior affects
8589 * how pagein operations resulting from faults on the map will be
8590 * clustered.
8591 */
8592 kern_return_t
8593 vm_map_behavior_set(
8594 vm_map_t map,
8595 vm_map_offset_t start,
8596 vm_map_offset_t end,
8597 vm_behavior_t new_behavior)
8598 {
8599 register vm_map_entry_t entry;
8600 vm_map_entry_t temp_entry;
8601
8602 XPR(XPR_VM_MAP,
8603 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
8604 (integer_t)map, start, end, new_behavior, 0);
8605
8606 switch (new_behavior) {
8607 case VM_BEHAVIOR_DEFAULT:
8608 case VM_BEHAVIOR_RANDOM:
8609 case VM_BEHAVIOR_SEQUENTIAL:
8610 case VM_BEHAVIOR_RSEQNTL:
8611 break;
8612 case VM_BEHAVIOR_WILLNEED:
8613 case VM_BEHAVIOR_DONTNEED:
8614 new_behavior = VM_BEHAVIOR_DEFAULT;
8615 break;
8616 default:
8617 return(KERN_INVALID_ARGUMENT);
8618 }
8619
8620 vm_map_lock(map);
8621
8622 /*
8623 * The entire address range must be valid for the map.
8624 * Note that vm_map_range_check() does a
8625 * vm_map_lookup_entry() internally and returns the
8626 * entry containing the start of the address range if
8627 * the entire range is valid.
8628 */
8629 if (vm_map_range_check(map, start, end, &temp_entry)) {
8630 entry = temp_entry;
8631 vm_map_clip_start(map, entry, start);
8632 }
8633 else {
8634 vm_map_unlock(map);
8635 return(KERN_INVALID_ADDRESS);
8636 }
8637
8638 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
8639 vm_map_clip_end(map, entry, end);
8640
8641 entry->behavior = new_behavior;
8642
8643 entry = entry->vme_next;
8644 }
8645
8646 vm_map_unlock(map);
8647 return(KERN_SUCCESS);
8648 }
8649
8650
8651 #include <mach_kdb.h>
8652 #if MACH_KDB
8653 #include <ddb/db_output.h>
8654 #include <vm/vm_print.h>
8655
8656 #define printf db_printf
8657
8658 /*
8659 * Forward declarations for internal functions.
8660 */
8661 extern void vm_map_links_print(
8662 struct vm_map_links *links);
8663
8664 extern void vm_map_header_print(
8665 struct vm_map_header *header);
8666
8667 extern void vm_map_entry_print(
8668 vm_map_entry_t entry);
8669
8670 extern void vm_follow_entry(
8671 vm_map_entry_t entry);
8672
8673 extern void vm_follow_map(
8674 vm_map_t map);
8675
8676 /*
8677 * vm_map_links_print: [ debug ]
8678 */
8679 void
8680 vm_map_links_print(
8681 struct vm_map_links *links)
8682 {
8683 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
8684 links->prev,
8685 links->next,
8686 (unsigned long long)links->start,
8687 (unsigned long long)links->end);
8688 }
8689
8690 /*
8691 * vm_map_header_print: [ debug ]
8692 */
8693 void
8694 vm_map_header_print(
8695 struct vm_map_header *header)
8696 {
8697 vm_map_links_print(&header->links);
8698 iprintf("nentries = %08X, %sentries_pageable\n",
8699 header->nentries,
8700 (header->entries_pageable ? "" : "!"));
8701 }
8702
8703 /*
8704 * vm_follow_entry: [ debug ]
8705 */
8706 void
8707 vm_follow_entry(
8708 vm_map_entry_t entry)
8709 {
8710 int shadows;
8711
8712 iprintf("map entry %08X\n", entry);
8713
8714 db_indent += 2;
8715
8716 shadows = vm_follow_object(entry->object.vm_object);
8717 iprintf("Total objects : %d\n",shadows);
8718
8719 db_indent -= 2;
8720 }
8721
8722 /*
8723 * vm_map_entry_print: [ debug ]
8724 */
8725 void
8726 vm_map_entry_print(
8727 register vm_map_entry_t entry)
8728 {
8729 static const char *inheritance_name[4] =
8730 { "share", "copy", "none", "?"};
8731 static const char *behavior_name[4] =
8732 { "dflt", "rand", "seqtl", "rseqntl" };
8733
8734 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
8735
8736 db_indent += 2;
8737
8738 vm_map_links_print(&entry->links);
8739
8740 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
8741 (unsigned long long)entry->vme_start,
8742 (unsigned long long)entry->vme_end,
8743 entry->protection,
8744 entry->max_protection,
8745 inheritance_name[(entry->inheritance & 0x3)]);
8746
8747 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
8748 behavior_name[(entry->behavior & 0x3)],
8749 entry->wired_count,
8750 entry->user_wired_count);
8751 iprintf("%sin_transition, %sneeds_wakeup\n",
8752 (entry->in_transition ? "" : "!"),
8753 (entry->needs_wakeup ? "" : "!"));
8754
8755 if (entry->is_sub_map) {
8756 iprintf("submap = %08X - offset = %016llX\n",
8757 entry->object.sub_map,
8758 (unsigned long long)entry->offset);
8759 } else {
8760 iprintf("object = %08X offset = %016llX - ",
8761 entry->object.vm_object,
8762 (unsigned long long)entry->offset);
8763 printf("%sis_shared, %sneeds_copy\n",
8764 (entry->is_shared ? "" : "!"),
8765 (entry->needs_copy ? "" : "!"));
8766 }
8767
8768 db_indent -= 2;
8769 }
8770
8771 /*
8772 * vm_follow_map: [ debug ]
8773 */
8774 void
8775 vm_follow_map(
8776 vm_map_t map)
8777 {
8778 register vm_map_entry_t entry;
8779
8780 iprintf("task map %08X\n", map);
8781
8782 db_indent += 2;
8783
8784 for (entry = vm_map_first_entry(map);
8785 entry && entry != vm_map_to_entry(map);
8786 entry = entry->vme_next) {
8787 vm_follow_entry(entry);
8788 }
8789
8790 db_indent -= 2;
8791 }
8792
8793 /*
8794 * vm_map_print: [ debug ]
8795 */
8796 void
8797 vm_map_print(
8798 db_addr_t inmap)
8799 {
8800 register vm_map_entry_t entry;
8801 vm_map_t map;
8802 #if TASK_SWAPPER
8803 char *swstate;
8804 #endif /* TASK_SWAPPER */
8805
8806 map = (vm_map_t)(long)
8807 inmap; /* Make sure we have the right type */
8808
8809 iprintf("task map %08X\n", map);
8810
8811 db_indent += 2;
8812
8813 vm_map_header_print(&map->hdr);
8814
8815 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
8816 map->pmap,
8817 map->size,
8818 map->ref_count,
8819 map->hint,
8820 map->first_free);
8821
8822 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
8823 (map->wait_for_space ? "" : "!"),
8824 (map->wiring_required ? "" : "!"),
8825 map->timestamp);
8826
8827 #if TASK_SWAPPER
8828 switch (map->sw_state) {
8829 case MAP_SW_IN:
8830 swstate = "SW_IN";
8831 break;
8832 case MAP_SW_OUT:
8833 swstate = "SW_OUT";
8834 break;
8835 default:
8836 swstate = "????";
8837 break;
8838 }
8839 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
8840 #endif /* TASK_SWAPPER */
8841
8842 for (entry = vm_map_first_entry(map);
8843 entry && entry != vm_map_to_entry(map);
8844 entry = entry->vme_next) {
8845 vm_map_entry_print(entry);
8846 }
8847
8848 db_indent -= 2;
8849 }
8850
8851 /*
8852 * Routine: vm_map_copy_print
8853 * Purpose:
8854 * Pretty-print a copy object for ddb.
8855 */
8856
8857 void
8858 vm_map_copy_print(
8859 db_addr_t incopy)
8860 {
8861 vm_map_copy_t copy;
8862 vm_map_entry_t entry;
8863
8864 copy = (vm_map_copy_t)(long)
8865 incopy; /* Make sure we have the right type */
8866
8867 printf("copy object 0x%x\n", copy);
8868
8869 db_indent += 2;
8870
8871 iprintf("type=%d", copy->type);
8872 switch (copy->type) {
8873 case VM_MAP_COPY_ENTRY_LIST:
8874 printf("[entry_list]");
8875 break;
8876
8877 case VM_MAP_COPY_OBJECT:
8878 printf("[object]");
8879 break;
8880
8881 case VM_MAP_COPY_KERNEL_BUFFER:
8882 printf("[kernel_buffer]");
8883 break;
8884
8885 default:
8886 printf("[bad type]");
8887 break;
8888 }
8889 printf(", offset=0x%llx", (unsigned long long)copy->offset);
8890 printf(", size=0x%x\n", copy->size);
8891
8892 switch (copy->type) {
8893 case VM_MAP_COPY_ENTRY_LIST:
8894 vm_map_header_print(&copy->cpy_hdr);
8895 for (entry = vm_map_copy_first_entry(copy);
8896 entry && entry != vm_map_copy_to_entry(copy);
8897 entry = entry->vme_next) {
8898 vm_map_entry_print(entry);
8899 }
8900 break;
8901
8902 case VM_MAP_COPY_OBJECT:
8903 iprintf("object=0x%x\n", copy->cpy_object);
8904 break;
8905
8906 case VM_MAP_COPY_KERNEL_BUFFER:
8907 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
8908 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
8909 break;
8910
8911 }
8912
8913 db_indent -=2;
8914 }
8915
8916 /*
8917 * db_vm_map_total_size(map) [ debug ]
8918 *
8919 * return the total virtual size (in bytes) of the map
8920 */
8921 vm_map_size_t
8922 db_vm_map_total_size(
8923 db_addr_t inmap)
8924 {
8925 vm_map_entry_t entry;
8926 vm_map_size_t total;
8927 vm_map_t map;
8928
8929 map = (vm_map_t)(long)
8930 inmap; /* Make sure we have the right type */
8931
8932 total = 0;
8933 for (entry = vm_map_first_entry(map);
8934 entry != vm_map_to_entry(map);
8935 entry = entry->vme_next) {
8936 total += entry->vme_end - entry->vme_start;
8937 }
8938
8939 return total;
8940 }
8941
8942 #endif /* MACH_KDB */
8943
8944 /*
8945 * Routine: vm_map_entry_insert
8946 *
8947 * Descritpion: This routine inserts a new vm_entry in a locked map.
8948 */
8949 vm_map_entry_t
8950 vm_map_entry_insert(
8951 vm_map_t map,
8952 vm_map_entry_t insp_entry,
8953 vm_map_offset_t start,
8954 vm_map_offset_t end,
8955 vm_object_t object,
8956 vm_object_offset_t offset,
8957 boolean_t needs_copy,
8958 boolean_t is_shared,
8959 boolean_t in_transition,
8960 vm_prot_t cur_protection,
8961 vm_prot_t max_protection,
8962 vm_behavior_t behavior,
8963 vm_inherit_t inheritance,
8964 unsigned wired_count)
8965 {
8966 vm_map_entry_t new_entry;
8967
8968 assert(insp_entry != (vm_map_entry_t)0);
8969
8970 new_entry = vm_map_entry_create(map);
8971
8972 new_entry->vme_start = start;
8973 new_entry->vme_end = end;
8974 assert(page_aligned(new_entry->vme_start));
8975 assert(page_aligned(new_entry->vme_end));
8976
8977 new_entry->object.vm_object = object;
8978 new_entry->offset = offset;
8979 new_entry->is_shared = is_shared;
8980 new_entry->is_sub_map = FALSE;
8981 new_entry->needs_copy = needs_copy;
8982 new_entry->in_transition = in_transition;
8983 new_entry->needs_wakeup = FALSE;
8984 new_entry->inheritance = inheritance;
8985 new_entry->protection = cur_protection;
8986 new_entry->max_protection = max_protection;
8987 new_entry->behavior = behavior;
8988 new_entry->wired_count = wired_count;
8989 new_entry->user_wired_count = 0;
8990 new_entry->use_pmap = FALSE;
8991 new_entry->alias = 0;
8992
8993 /*
8994 * Insert the new entry into the list.
8995 */
8996
8997 vm_map_entry_link(map, insp_entry, new_entry);
8998 map->size += end - start;
8999
9000 /*
9001 * Update the free space hint and the lookup hint.
9002 */
9003
9004 SAVE_HINT_MAP_WRITE(map, new_entry);
9005 return new_entry;
9006 }
9007
9008 /*
9009 * Routine: vm_map_remap_extract
9010 *
9011 * Descritpion: This routine returns a vm_entry list from a map.
9012 */
9013 static kern_return_t
9014 vm_map_remap_extract(
9015 vm_map_t map,
9016 vm_map_offset_t addr,
9017 vm_map_size_t size,
9018 boolean_t copy,
9019 struct vm_map_header *map_header,
9020 vm_prot_t *cur_protection,
9021 vm_prot_t *max_protection,
9022 /* What, no behavior? */
9023 vm_inherit_t inheritance,
9024 boolean_t pageable)
9025 {
9026 kern_return_t result;
9027 vm_map_size_t mapped_size;
9028 vm_map_size_t tmp_size;
9029 vm_map_entry_t src_entry; /* result of last map lookup */
9030 vm_map_entry_t new_entry;
9031 vm_object_offset_t offset;
9032 vm_map_offset_t map_address;
9033 vm_map_offset_t src_start; /* start of entry to map */
9034 vm_map_offset_t src_end; /* end of region to be mapped */
9035 vm_object_t object;
9036 vm_map_version_t version;
9037 boolean_t src_needs_copy;
9038 boolean_t new_entry_needs_copy;
9039
9040 assert(map != VM_MAP_NULL);
9041 assert(size != 0 && size == vm_map_round_page(size));
9042 assert(inheritance == VM_INHERIT_NONE ||
9043 inheritance == VM_INHERIT_COPY ||
9044 inheritance == VM_INHERIT_SHARE);
9045
9046 /*
9047 * Compute start and end of region.
9048 */
9049 src_start = vm_map_trunc_page(addr);
9050 src_end = vm_map_round_page(src_start + size);
9051
9052 /*
9053 * Initialize map_header.
9054 */
9055 map_header->links.next = (struct vm_map_entry *)&map_header->links;
9056 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
9057 map_header->nentries = 0;
9058 map_header->entries_pageable = pageable;
9059
9060 *cur_protection = VM_PROT_ALL;
9061 *max_protection = VM_PROT_ALL;
9062
9063 map_address = 0;
9064 mapped_size = 0;
9065 result = KERN_SUCCESS;
9066
9067 /*
9068 * The specified source virtual space might correspond to
9069 * multiple map entries, need to loop on them.
9070 */
9071 vm_map_lock(map);
9072 while (mapped_size != size) {
9073 vm_map_size_t entry_size;
9074
9075 /*
9076 * Find the beginning of the region.
9077 */
9078 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
9079 result = KERN_INVALID_ADDRESS;
9080 break;
9081 }
9082
9083 if (src_start < src_entry->vme_start ||
9084 (mapped_size && src_start != src_entry->vme_start)) {
9085 result = KERN_INVALID_ADDRESS;
9086 break;
9087 }
9088
9089 if(src_entry->is_sub_map) {
9090 result = KERN_INVALID_ADDRESS;
9091 break;
9092 }
9093
9094 tmp_size = size - mapped_size;
9095 if (src_end > src_entry->vme_end)
9096 tmp_size -= (src_end - src_entry->vme_end);
9097
9098 entry_size = (vm_map_size_t)(src_entry->vme_end -
9099 src_entry->vme_start);
9100
9101 if(src_entry->is_sub_map) {
9102 vm_map_reference(src_entry->object.sub_map);
9103 object = VM_OBJECT_NULL;
9104 } else {
9105 object = src_entry->object.vm_object;
9106
9107 if (object == VM_OBJECT_NULL) {
9108 object = vm_object_allocate(entry_size);
9109 src_entry->offset = 0;
9110 src_entry->object.vm_object = object;
9111 } else if (object->copy_strategy !=
9112 MEMORY_OBJECT_COPY_SYMMETRIC) {
9113 /*
9114 * We are already using an asymmetric
9115 * copy, and therefore we already have
9116 * the right object.
9117 */
9118 assert(!src_entry->needs_copy);
9119 } else if (src_entry->needs_copy || object->shadowed ||
9120 (object->internal && !object->true_share &&
9121 !src_entry->is_shared &&
9122 object->size > entry_size)) {
9123
9124 vm_object_shadow(&src_entry->object.vm_object,
9125 &src_entry->offset,
9126 entry_size);
9127
9128 if (!src_entry->needs_copy &&
9129 (src_entry->protection & VM_PROT_WRITE)) {
9130 vm_prot_t prot;
9131
9132 prot = src_entry->protection & ~VM_PROT_WRITE;
9133 #ifdef STACK_ONLY_NX
9134 if (src_entry->alias != VM_MEMORY_STACK && prot)
9135 prot |= VM_PROT_EXECUTE;
9136 #endif
9137 if(map->mapped) {
9138 vm_object_pmap_protect(
9139 src_entry->object.vm_object,
9140 src_entry->offset,
9141 entry_size,
9142 PMAP_NULL,
9143 src_entry->vme_start,
9144 prot);
9145 } else {
9146 pmap_protect(vm_map_pmap(map),
9147 src_entry->vme_start,
9148 src_entry->vme_end,
9149 prot);
9150 }
9151 }
9152
9153 object = src_entry->object.vm_object;
9154 src_entry->needs_copy = FALSE;
9155 }
9156
9157
9158 vm_object_lock(object);
9159 object->ref_count++; /* object ref. for new entry */
9160 VM_OBJ_RES_INCR(object);
9161 if (object->copy_strategy ==
9162 MEMORY_OBJECT_COPY_SYMMETRIC) {
9163 object->copy_strategy =
9164 MEMORY_OBJECT_COPY_DELAY;
9165 }
9166 vm_object_unlock(object);
9167 }
9168
9169 offset = src_entry->offset + (src_start - src_entry->vme_start);
9170
9171 new_entry = _vm_map_entry_create(map_header);
9172 vm_map_entry_copy(new_entry, src_entry);
9173 new_entry->use_pmap = FALSE; /* clr address space specifics */
9174
9175 new_entry->vme_start = map_address;
9176 new_entry->vme_end = map_address + tmp_size;
9177 new_entry->inheritance = inheritance;
9178 new_entry->offset = offset;
9179
9180 /*
9181 * The new region has to be copied now if required.
9182 */
9183 RestartCopy:
9184 if (!copy) {
9185 src_entry->is_shared = TRUE;
9186 new_entry->is_shared = TRUE;
9187 if (!(new_entry->is_sub_map))
9188 new_entry->needs_copy = FALSE;
9189
9190 } else if (src_entry->is_sub_map) {
9191 /* make this a COW sub_map if not already */
9192 new_entry->needs_copy = TRUE;
9193 object = VM_OBJECT_NULL;
9194 } else if (src_entry->wired_count == 0 &&
9195 vm_object_copy_quickly(&new_entry->object.vm_object,
9196 new_entry->offset,
9197 (new_entry->vme_end -
9198 new_entry->vme_start),
9199 &src_needs_copy,
9200 &new_entry_needs_copy)) {
9201
9202 new_entry->needs_copy = new_entry_needs_copy;
9203 new_entry->is_shared = FALSE;
9204
9205 /*
9206 * Handle copy_on_write semantics.
9207 */
9208 if (src_needs_copy && !src_entry->needs_copy) {
9209 vm_prot_t prot;
9210
9211 prot = src_entry->protection & ~VM_PROT_WRITE;
9212 #ifdef STACK_ONLY_NX
9213 if (src_entry->alias != VM_MEMORY_STACK && prot)
9214 prot |= VM_PROT_EXECUTE;
9215 #endif
9216 vm_object_pmap_protect(object,
9217 offset,
9218 entry_size,
9219 ((src_entry->is_shared
9220 || map->mapped) ?
9221 PMAP_NULL : map->pmap),
9222 src_entry->vme_start,
9223 prot);
9224
9225 src_entry->needs_copy = TRUE;
9226 }
9227 /*
9228 * Throw away the old object reference of the new entry.
9229 */
9230 vm_object_deallocate(object);
9231
9232 } else {
9233 new_entry->is_shared = FALSE;
9234
9235 /*
9236 * The map can be safely unlocked since we
9237 * already hold a reference on the object.
9238 *
9239 * Record the timestamp of the map for later
9240 * verification, and unlock the map.
9241 */
9242 version.main_timestamp = map->timestamp;
9243 vm_map_unlock(map); /* Increments timestamp once! */
9244
9245 /*
9246 * Perform the copy.
9247 */
9248 if (src_entry->wired_count > 0) {
9249 vm_object_lock(object);
9250 result = vm_object_copy_slowly(
9251 object,
9252 offset,
9253 entry_size,
9254 THREAD_UNINT,
9255 &new_entry->object.vm_object);
9256
9257 new_entry->offset = 0;
9258 new_entry->needs_copy = FALSE;
9259 } else {
9260 result = vm_object_copy_strategically(
9261 object,
9262 offset,
9263 entry_size,
9264 &new_entry->object.vm_object,
9265 &new_entry->offset,
9266 &new_entry_needs_copy);
9267
9268 new_entry->needs_copy = new_entry_needs_copy;
9269 }
9270
9271 /*
9272 * Throw away the old object reference of the new entry.
9273 */
9274 vm_object_deallocate(object);
9275
9276 if (result != KERN_SUCCESS &&
9277 result != KERN_MEMORY_RESTART_COPY) {
9278 _vm_map_entry_dispose(map_header, new_entry);
9279 break;
9280 }
9281
9282 /*
9283 * Verify that the map has not substantially
9284 * changed while the copy was being made.
9285 */
9286
9287 vm_map_lock(map);
9288 if (version.main_timestamp + 1 != map->timestamp) {
9289 /*
9290 * Simple version comparison failed.
9291 *
9292 * Retry the lookup and verify that the
9293 * same object/offset are still present.
9294 */
9295 vm_object_deallocate(new_entry->
9296 object.vm_object);
9297 _vm_map_entry_dispose(map_header, new_entry);
9298 if (result == KERN_MEMORY_RESTART_COPY)
9299 result = KERN_SUCCESS;
9300 continue;
9301 }
9302
9303 if (result == KERN_MEMORY_RESTART_COPY) {
9304 vm_object_reference(object);
9305 goto RestartCopy;
9306 }
9307 }
9308
9309 _vm_map_entry_link(map_header,
9310 map_header->links.prev, new_entry);
9311
9312 *cur_protection &= src_entry->protection;
9313 *max_protection &= src_entry->max_protection;
9314
9315 map_address += tmp_size;
9316 mapped_size += tmp_size;
9317 src_start += tmp_size;
9318
9319 } /* end while */
9320
9321 vm_map_unlock(map);
9322 if (result != KERN_SUCCESS) {
9323 /*
9324 * Free all allocated elements.
9325 */
9326 for (src_entry = map_header->links.next;
9327 src_entry != (struct vm_map_entry *)&map_header->links;
9328 src_entry = new_entry) {
9329 new_entry = src_entry->vme_next;
9330 _vm_map_entry_unlink(map_header, src_entry);
9331 vm_object_deallocate(src_entry->object.vm_object);
9332 _vm_map_entry_dispose(map_header, src_entry);
9333 }
9334 }
9335 return result;
9336 }
9337
9338 /*
9339 * Routine: vm_remap
9340 *
9341 * Map portion of a task's address space.
9342 * Mapped region must not overlap more than
9343 * one vm memory object. Protections and
9344 * inheritance attributes remain the same
9345 * as in the original task and are out parameters.
9346 * Source and Target task can be identical
9347 * Other attributes are identical as for vm_map()
9348 */
9349 kern_return_t
9350 vm_map_remap(
9351 vm_map_t target_map,
9352 vm_map_address_t *address,
9353 vm_map_size_t size,
9354 vm_map_offset_t mask,
9355 boolean_t anywhere,
9356 vm_map_t src_map,
9357 vm_map_offset_t memory_address,
9358 boolean_t copy,
9359 vm_prot_t *cur_protection,
9360 vm_prot_t *max_protection,
9361 vm_inherit_t inheritance)
9362 {
9363 kern_return_t result;
9364 vm_map_entry_t entry;
9365 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
9366 vm_map_entry_t new_entry;
9367 struct vm_map_header map_header;
9368
9369 if (target_map == VM_MAP_NULL)
9370 return KERN_INVALID_ARGUMENT;
9371
9372 switch (inheritance) {
9373 case VM_INHERIT_NONE:
9374 case VM_INHERIT_COPY:
9375 case VM_INHERIT_SHARE:
9376 if (size != 0 && src_map != VM_MAP_NULL)
9377 break;
9378 /*FALL THRU*/
9379 default:
9380 return KERN_INVALID_ARGUMENT;
9381 }
9382
9383 size = vm_map_round_page(size);
9384
9385 result = vm_map_remap_extract(src_map, memory_address,
9386 size, copy, &map_header,
9387 cur_protection,
9388 max_protection,
9389 inheritance,
9390 target_map->hdr.
9391 entries_pageable);
9392
9393 if (result != KERN_SUCCESS) {
9394 return result;
9395 }
9396
9397 /*
9398 * Allocate/check a range of free virtual address
9399 * space for the target
9400 */
9401 *address = vm_map_trunc_page(*address);
9402 vm_map_lock(target_map);
9403 result = vm_map_remap_range_allocate(target_map, address, size,
9404 mask, anywhere, &insp_entry);
9405
9406 for (entry = map_header.links.next;
9407 entry != (struct vm_map_entry *)&map_header.links;
9408 entry = new_entry) {
9409 new_entry = entry->vme_next;
9410 _vm_map_entry_unlink(&map_header, entry);
9411 if (result == KERN_SUCCESS) {
9412 entry->vme_start += *address;
9413 entry->vme_end += *address;
9414 vm_map_entry_link(target_map, insp_entry, entry);
9415 insp_entry = entry;
9416 } else {
9417 if (!entry->is_sub_map) {
9418 vm_object_deallocate(entry->object.vm_object);
9419 } else {
9420 vm_map_deallocate(entry->object.sub_map);
9421 }
9422 _vm_map_entry_dispose(&map_header, entry);
9423 }
9424 }
9425
9426 if (result == KERN_SUCCESS) {
9427 target_map->size += size;
9428 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
9429 }
9430 vm_map_unlock(target_map);
9431
9432 if (result == KERN_SUCCESS && target_map->wiring_required)
9433 result = vm_map_wire(target_map, *address,
9434 *address + size, *cur_protection, TRUE);
9435 return result;
9436 }
9437
9438 /*
9439 * Routine: vm_map_remap_range_allocate
9440 *
9441 * Description:
9442 * Allocate a range in the specified virtual address map.
9443 * returns the address and the map entry just before the allocated
9444 * range
9445 *
9446 * Map must be locked.
9447 */
9448
9449 static kern_return_t
9450 vm_map_remap_range_allocate(
9451 vm_map_t map,
9452 vm_map_address_t *address, /* IN/OUT */
9453 vm_map_size_t size,
9454 vm_map_offset_t mask,
9455 boolean_t anywhere,
9456 vm_map_entry_t *map_entry) /* OUT */
9457 {
9458 register vm_map_entry_t entry;
9459 register vm_map_offset_t start;
9460 register vm_map_offset_t end;
9461
9462 StartAgain: ;
9463
9464 start = *address;
9465
9466 if (anywhere)
9467 {
9468 /*
9469 * Calculate the first possible address.
9470 */
9471
9472 if (start < map->min_offset)
9473 start = map->min_offset;
9474 if (start > map->max_offset)
9475 return(KERN_NO_SPACE);
9476
9477 /*
9478 * Look for the first possible address;
9479 * if there's already something at this
9480 * address, we have to start after it.
9481 */
9482
9483 assert(first_free_is_valid(map));
9484 if (start == map->min_offset) {
9485 if ((entry = map->first_free) != vm_map_to_entry(map))
9486 start = entry->vme_end;
9487 } else {
9488 vm_map_entry_t tmp_entry;
9489 if (vm_map_lookup_entry(map, start, &tmp_entry))
9490 start = tmp_entry->vme_end;
9491 entry = tmp_entry;
9492 }
9493
9494 /*
9495 * In any case, the "entry" always precedes
9496 * the proposed new region throughout the
9497 * loop:
9498 */
9499
9500 while (TRUE) {
9501 register vm_map_entry_t next;
9502
9503 /*
9504 * Find the end of the proposed new region.
9505 * Be sure we didn't go beyond the end, or
9506 * wrap around the address.
9507 */
9508
9509 end = ((start + mask) & ~mask);
9510 if (end < start)
9511 return(KERN_NO_SPACE);
9512 start = end;
9513 end += size;
9514
9515 if ((end > map->max_offset) || (end < start)) {
9516 if (map->wait_for_space) {
9517 if (size <= (map->max_offset -
9518 map->min_offset)) {
9519 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
9520 vm_map_unlock(map);
9521 thread_block(THREAD_CONTINUE_NULL);
9522 vm_map_lock(map);
9523 goto StartAgain;
9524 }
9525 }
9526
9527 return(KERN_NO_SPACE);
9528 }
9529
9530 /*
9531 * If there are no more entries, we must win.
9532 */
9533
9534 next = entry->vme_next;
9535 if (next == vm_map_to_entry(map))
9536 break;
9537
9538 /*
9539 * If there is another entry, it must be
9540 * after the end of the potential new region.
9541 */
9542
9543 if (next->vme_start >= end)
9544 break;
9545
9546 /*
9547 * Didn't fit -- move to the next entry.
9548 */
9549
9550 entry = next;
9551 start = entry->vme_end;
9552 }
9553 *address = start;
9554 } else {
9555 vm_map_entry_t temp_entry;
9556
9557 /*
9558 * Verify that:
9559 * the address doesn't itself violate
9560 * the mask requirement.
9561 */
9562
9563 if ((start & mask) != 0)
9564 return(KERN_NO_SPACE);
9565
9566
9567 /*
9568 * ... the address is within bounds
9569 */
9570
9571 end = start + size;
9572
9573 if ((start < map->min_offset) ||
9574 (end > map->max_offset) ||
9575 (start >= end)) {
9576 return(KERN_INVALID_ADDRESS);
9577 }
9578
9579 /*
9580 * ... the starting address isn't allocated
9581 */
9582
9583 if (vm_map_lookup_entry(map, start, &temp_entry))
9584 return(KERN_NO_SPACE);
9585
9586 entry = temp_entry;
9587
9588 /*
9589 * ... the next region doesn't overlap the
9590 * end point.
9591 */
9592
9593 if ((entry->vme_next != vm_map_to_entry(map)) &&
9594 (entry->vme_next->vme_start < end))
9595 return(KERN_NO_SPACE);
9596 }
9597 *map_entry = entry;
9598 return(KERN_SUCCESS);
9599 }
9600
9601 /*
9602 * vm_map_switch:
9603 *
9604 * Set the address map for the current thread to the specified map
9605 */
9606
9607 vm_map_t
9608 vm_map_switch(
9609 vm_map_t map)
9610 {
9611 int mycpu;
9612 thread_t thread = current_thread();
9613 vm_map_t oldmap = thread->map;
9614
9615 mp_disable_preemption();
9616 mycpu = cpu_number();
9617
9618 /*
9619 * Deactivate the current map and activate the requested map
9620 */
9621 PMAP_SWITCH_USER(thread, map, mycpu);
9622
9623 mp_enable_preemption();
9624 return(oldmap);
9625 }
9626
9627
9628 /*
9629 * Routine: vm_map_write_user
9630 *
9631 * Description:
9632 * Copy out data from a kernel space into space in the
9633 * destination map. The space must already exist in the
9634 * destination map.
9635 * NOTE: This routine should only be called by threads
9636 * which can block on a page fault. i.e. kernel mode user
9637 * threads.
9638 *
9639 */
9640 kern_return_t
9641 vm_map_write_user(
9642 vm_map_t map,
9643 void *src_p,
9644 vm_map_address_t dst_addr,
9645 vm_size_t size)
9646 {
9647 kern_return_t kr = KERN_SUCCESS;
9648
9649 if(current_map() == map) {
9650 if (copyout(src_p, dst_addr, size)) {
9651 kr = KERN_INVALID_ADDRESS;
9652 }
9653 } else {
9654 vm_map_t oldmap;
9655
9656 /* take on the identity of the target map while doing */
9657 /* the transfer */
9658
9659 vm_map_reference(map);
9660 oldmap = vm_map_switch(map);
9661 if (copyout(src_p, dst_addr, size)) {
9662 kr = KERN_INVALID_ADDRESS;
9663 }
9664 vm_map_switch(oldmap);
9665 vm_map_deallocate(map);
9666 }
9667 return kr;
9668 }
9669
9670 /*
9671 * Routine: vm_map_read_user
9672 *
9673 * Description:
9674 * Copy in data from a user space source map into the
9675 * kernel map. The space must already exist in the
9676 * kernel map.
9677 * NOTE: This routine should only be called by threads
9678 * which can block on a page fault. i.e. kernel mode user
9679 * threads.
9680 *
9681 */
9682 kern_return_t
9683 vm_map_read_user(
9684 vm_map_t map,
9685 vm_map_address_t src_addr,
9686 void *dst_p,
9687 vm_size_t size)
9688 {
9689 kern_return_t kr = KERN_SUCCESS;
9690
9691 if(current_map() == map) {
9692 if (copyin(src_addr, dst_p, size)) {
9693 kr = KERN_INVALID_ADDRESS;
9694 }
9695 } else {
9696 vm_map_t oldmap;
9697
9698 /* take on the identity of the target map while doing */
9699 /* the transfer */
9700
9701 vm_map_reference(map);
9702 oldmap = vm_map_switch(map);
9703 if (copyin(src_addr, dst_p, size)) {
9704 kr = KERN_INVALID_ADDRESS;
9705 }
9706 vm_map_switch(oldmap);
9707 vm_map_deallocate(map);
9708 }
9709 return kr;
9710 }
9711
9712
9713 /*
9714 * vm_map_check_protection:
9715 *
9716 * Assert that the target map allows the specified
9717 * privilege on the entire address region given.
9718 * The entire region must be allocated.
9719 */
9720 boolean_t vm_map_check_protection(map, start, end, protection)
9721 register vm_map_t map;
9722 register vm_map_offset_t start;
9723 register vm_map_offset_t end;
9724 register vm_prot_t protection;
9725 {
9726 register vm_map_entry_t entry;
9727 vm_map_entry_t tmp_entry;
9728
9729 vm_map_lock(map);
9730
9731 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
9732 {
9733 vm_map_unlock(map);
9734 return (FALSE);
9735 }
9736
9737 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9738 vm_map_unlock(map);
9739 return(FALSE);
9740 }
9741
9742 entry = tmp_entry;
9743
9744 while (start < end) {
9745 if (entry == vm_map_to_entry(map)) {
9746 vm_map_unlock(map);
9747 return(FALSE);
9748 }
9749
9750 /*
9751 * No holes allowed!
9752 */
9753
9754 if (start < entry->vme_start) {
9755 vm_map_unlock(map);
9756 return(FALSE);
9757 }
9758
9759 /*
9760 * Check protection associated with entry.
9761 */
9762
9763 if ((entry->protection & protection) != protection) {
9764 vm_map_unlock(map);
9765 return(FALSE);
9766 }
9767
9768 /* go to next entry */
9769
9770 start = entry->vme_end;
9771 entry = entry->vme_next;
9772 }
9773 vm_map_unlock(map);
9774 return(TRUE);
9775 }
9776
9777 kern_return_t
9778 vm_map_purgable_control(
9779 vm_map_t map,
9780 vm_map_offset_t address,
9781 vm_purgable_t control,
9782 int *state)
9783 {
9784 vm_map_entry_t entry;
9785 vm_object_t object;
9786 kern_return_t kr;
9787
9788 /*
9789 * Vet all the input parameters and current type and state of the
9790 * underlaying object. Return with an error if anything is amiss.
9791 */
9792 if (map == VM_MAP_NULL)
9793 return(KERN_INVALID_ARGUMENT);
9794
9795 if (control != VM_PURGABLE_SET_STATE &&
9796 control != VM_PURGABLE_GET_STATE)
9797 return(KERN_INVALID_ARGUMENT);
9798
9799 if (control == VM_PURGABLE_SET_STATE &&
9800 (*state < VM_PURGABLE_STATE_MIN ||
9801 *state > VM_PURGABLE_STATE_MAX))
9802 return(KERN_INVALID_ARGUMENT);
9803
9804 vm_map_lock(map);
9805
9806 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
9807
9808 /*
9809 * Must pass a valid non-submap address.
9810 */
9811 vm_map_unlock(map);
9812 return(KERN_INVALID_ADDRESS);
9813 }
9814
9815 if ((entry->protection & VM_PROT_WRITE) == 0) {
9816 /*
9817 * Can't apply purgable controls to something you can't write.
9818 */
9819 vm_map_unlock(map);
9820 return(KERN_PROTECTION_FAILURE);
9821 }
9822
9823 object = entry->object.vm_object;
9824 if (object == VM_OBJECT_NULL) {
9825 /*
9826 * Object must already be present or it can't be purgable.
9827 */
9828 vm_map_unlock(map);
9829 return KERN_INVALID_ARGUMENT;
9830 }
9831
9832 vm_object_lock(object);
9833
9834 if (entry->offset != 0 ||
9835 entry->vme_end - entry->vme_start != object->size) {
9836 /*
9837 * Can only apply purgable controls to the whole (existing)
9838 * object at once.
9839 */
9840 vm_map_unlock(map);
9841 vm_object_unlock(object);
9842 return KERN_INVALID_ARGUMENT;
9843 }
9844
9845 vm_map_unlock(map);
9846
9847 kr = vm_object_purgable_control(object, control, state);
9848
9849 vm_object_unlock(object);
9850
9851 return kr;
9852 }
9853
9854 kern_return_t
9855 vm_map_page_info(
9856 vm_map_t target_map,
9857 vm_map_offset_t offset,
9858 int *disposition,
9859 int *ref_count)
9860 {
9861 vm_map_entry_t map_entry;
9862 vm_object_t object;
9863 vm_page_t m;
9864
9865 restart_page_query:
9866 *disposition = 0;
9867 *ref_count = 0;
9868 vm_map_lock(target_map);
9869 if(!vm_map_lookup_entry(target_map, offset, &map_entry)) {
9870 vm_map_unlock(target_map);
9871 return KERN_FAILURE;
9872 }
9873 offset -= map_entry->vme_start; /* adjust to offset within entry */
9874 offset += map_entry->offset; /* adjust to target object offset */
9875 if(map_entry->object.vm_object != VM_OBJECT_NULL) {
9876 if(!map_entry->is_sub_map) {
9877 object = map_entry->object.vm_object;
9878 } else {
9879 vm_map_unlock(target_map);
9880 target_map = map_entry->object.sub_map;
9881 goto restart_page_query;
9882 }
9883 } else {
9884 vm_map_unlock(target_map);
9885 return KERN_FAILURE;
9886 }
9887 vm_object_lock(object);
9888 vm_map_unlock(target_map);
9889 while(TRUE) {
9890 m = vm_page_lookup(object, offset);
9891 if (m != VM_PAGE_NULL) {
9892 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
9893 break;
9894 } else {
9895 if(object->shadow) {
9896 offset += object->shadow_offset;
9897 vm_object_unlock(object);
9898 object = object->shadow;
9899 vm_object_lock(object);
9900 continue;
9901 }
9902 vm_object_unlock(object);
9903 return KERN_FAILURE;
9904 }
9905 }
9906
9907 /* The ref_count is not strictly accurate, it measures the number */
9908 /* of entities holding a ref on the object, they may not be mapping */
9909 /* the object or may not be mapping the section holding the */
9910 /* target page but its still a ball park number and though an over- */
9911 /* count, it picks up the copy-on-write cases */
9912
9913 /* We could also get a picture of page sharing from pmap_attributes */
9914 /* but this would under count as only faulted-in mappings would */
9915 /* show up. */
9916
9917 *ref_count = object->ref_count;
9918
9919 if (m->fictitious) {
9920 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
9921 vm_object_unlock(object);
9922 return KERN_SUCCESS;
9923 }
9924
9925 if (m->dirty)
9926 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
9927 else if(pmap_is_modified(m->phys_page))
9928 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
9929
9930 if (m->reference)
9931 *disposition |= VM_PAGE_QUERY_PAGE_REF;
9932 else if(pmap_is_referenced(m->phys_page))
9933 *disposition |= VM_PAGE_QUERY_PAGE_REF;
9934
9935 vm_object_unlock(object);
9936 return KERN_SUCCESS;
9937
9938 }
9939
9940
9941 /* For a given range, check all map entries. If the entry coresponds to */
9942 /* the old vm_region/map provided on the call, replace it with the */
9943 /* corresponding range in the new vm_region/map */
9944 kern_return_t vm_map_region_replace(
9945 vm_map_t target_map,
9946 ipc_port_t old_region,
9947 ipc_port_t new_region,
9948 vm_map_offset_t start,
9949 vm_map_offset_t end)
9950 {
9951 vm_named_entry_t old_object;
9952 vm_named_entry_t new_object;
9953 vm_map_t old_submap;
9954 vm_map_t new_submap;
9955 vm_map_offset_t addr;
9956 vm_map_entry_t entry;
9957 int nested_pmap = 0;
9958
9959
9960 vm_map_lock(target_map);
9961 old_object = (vm_named_entry_t)old_region->ip_kobject;
9962 new_object = (vm_named_entry_t)new_region->ip_kobject;
9963 if((!old_object->is_sub_map) || (!new_object->is_sub_map)) {
9964 vm_map_unlock(target_map);
9965 return KERN_INVALID_ARGUMENT;
9966 }
9967 old_submap = (vm_map_t)old_object->backing.map;
9968 new_submap = (vm_map_t)new_object->backing.map;
9969 vm_map_lock(old_submap);
9970 if((old_submap->min_offset != new_submap->min_offset) ||
9971 (old_submap->max_offset != new_submap->max_offset)) {
9972 vm_map_unlock(old_submap);
9973 vm_map_unlock(target_map);
9974 return KERN_INVALID_ARGUMENT;
9975 }
9976 if(!vm_map_lookup_entry(target_map, start, &entry)) {
9977 /* if the src is not contained, the entry preceeds */
9978 /* our range */
9979 addr = entry->vme_start;
9980 if(entry == vm_map_to_entry(target_map)) {
9981 vm_map_unlock(old_submap);
9982 vm_map_unlock(target_map);
9983 return KERN_SUCCESS;
9984 }
9985 }
9986 if ((entry->use_pmap) &&
9987 (new_submap->pmap == NULL)) {
9988 new_submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
9989 if(new_submap->pmap == PMAP_NULL) {
9990 vm_map_unlock(old_submap);
9991 vm_map_unlock(target_map);
9992 return(KERN_NO_SPACE);
9993 }
9994 }
9995
9996 /*
9997 * Mark the new submap as "mapped", so that we get proper
9998 * cleanup of the sub-pmap when we unmap it.
9999 */
10000 new_submap->mapped = TRUE;
10001
10002 addr = entry->vme_start;
10003 vm_map_reference(old_submap);
10004 while((entry != vm_map_to_entry(target_map)) &&
10005 (entry->vme_start < end)) {
10006 if((entry->is_sub_map) &&
10007 (entry->object.sub_map == old_submap)) {
10008 if(entry->use_pmap) {
10009 if((start & 0x0fffffff) ||
10010 ((end - start) != 0x10000000)) {
10011 vm_map_unlock(old_submap);
10012 vm_map_deallocate(old_submap);
10013 vm_map_unlock(target_map);
10014 return KERN_INVALID_ARGUMENT;
10015 }
10016 nested_pmap = 1;
10017 }
10018 entry->object.sub_map = new_submap;
10019 vm_map_reference(new_submap);
10020 vm_map_deallocate(old_submap);
10021 }
10022 entry = entry->vme_next;
10023 addr = entry->vme_start;
10024 }
10025 if(nested_pmap) {
10026 #ifndef NO_NESTED_PMAP
10027 pmap_unnest(target_map->pmap, (addr64_t)start);
10028 if(target_map->mapped) {
10029 vm_map_submap_pmap_clean(target_map,
10030 start, end, old_submap, 0);
10031 }
10032 pmap_nest(target_map->pmap, new_submap->pmap,
10033 (addr64_t)start, (addr64_t)start,
10034 (uint64_t)(end - start));
10035 #endif /* NO_NESTED_PMAP */
10036 } else {
10037 vm_map_submap_pmap_clean(target_map,
10038 start, end, old_submap, 0);
10039 }
10040 vm_map_unlock(old_submap);
10041 vm_map_deallocate(old_submap);
10042 vm_map_unlock(target_map);
10043 return KERN_SUCCESS;
10044 }
10045
10046 /*
10047 * vm_map_msync
10048 *
10049 * Synchronises the memory range specified with its backing store
10050 * image by either flushing or cleaning the contents to the appropriate
10051 * memory manager engaging in a memory object synchronize dialog with
10052 * the manager. The client doesn't return until the manager issues
10053 * m_o_s_completed message. MIG Magically converts user task parameter
10054 * to the task's address map.
10055 *
10056 * interpretation of sync_flags
10057 * VM_SYNC_INVALIDATE - discard pages, only return precious
10058 * pages to manager.
10059 *
10060 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
10061 * - discard pages, write dirty or precious
10062 * pages back to memory manager.
10063 *
10064 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
10065 * - write dirty or precious pages back to
10066 * the memory manager.
10067 *
10068 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
10069 * is a hole in the region, and we would
10070 * have returned KERN_SUCCESS, return
10071 * KERN_INVALID_ADDRESS instead.
10072 *
10073 * NOTE
10074 * The memory object attributes have not yet been implemented, this
10075 * function will have to deal with the invalidate attribute
10076 *
10077 * RETURNS
10078 * KERN_INVALID_TASK Bad task parameter
10079 * KERN_INVALID_ARGUMENT both sync and async were specified.
10080 * KERN_SUCCESS The usual.
10081 * KERN_INVALID_ADDRESS There was a hole in the region.
10082 */
10083
10084 kern_return_t
10085 vm_map_msync(
10086 vm_map_t map,
10087 vm_map_address_t address,
10088 vm_map_size_t size,
10089 vm_sync_t sync_flags)
10090 {
10091 msync_req_t msr;
10092 msync_req_t new_msr;
10093 queue_chain_t req_q; /* queue of requests for this msync */
10094 vm_map_entry_t entry;
10095 vm_map_size_t amount_left;
10096 vm_object_offset_t offset;
10097 boolean_t do_sync_req;
10098 boolean_t modifiable;
10099 boolean_t had_hole = FALSE;
10100
10101 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
10102 (sync_flags & VM_SYNC_SYNCHRONOUS))
10103 return(KERN_INVALID_ARGUMENT);
10104
10105 /*
10106 * align address and size on page boundaries
10107 */
10108 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
10109 address = vm_map_trunc_page(address);
10110
10111 if (map == VM_MAP_NULL)
10112 return(KERN_INVALID_TASK);
10113
10114 if (size == 0)
10115 return(KERN_SUCCESS);
10116
10117 queue_init(&req_q);
10118 amount_left = size;
10119
10120 while (amount_left > 0) {
10121 vm_object_size_t flush_size;
10122 vm_object_t object;
10123
10124 vm_map_lock(map);
10125 if (!vm_map_lookup_entry(map,
10126 vm_map_trunc_page(address), &entry)) {
10127
10128 vm_size_t skip;
10129
10130 /*
10131 * hole in the address map.
10132 */
10133 had_hole = TRUE;
10134
10135 /*
10136 * Check for empty map.
10137 */
10138 if (entry == vm_map_to_entry(map) &&
10139 entry->vme_next == entry) {
10140 vm_map_unlock(map);
10141 break;
10142 }
10143 /*
10144 * Check that we don't wrap and that
10145 * we have at least one real map entry.
10146 */
10147 if ((map->hdr.nentries == 0) ||
10148 (entry->vme_next->vme_start < address)) {
10149 vm_map_unlock(map);
10150 break;
10151 }
10152 /*
10153 * Move up to the next entry if needed
10154 */
10155 skip = (entry->vme_next->vme_start - address);
10156 if (skip >= amount_left)
10157 amount_left = 0;
10158 else
10159 amount_left -= skip;
10160 address = entry->vme_next->vme_start;
10161 vm_map_unlock(map);
10162 continue;
10163 }
10164
10165 offset = address - entry->vme_start;
10166
10167 /*
10168 * do we have more to flush than is contained in this
10169 * entry ?
10170 */
10171 if (amount_left + entry->vme_start + offset > entry->vme_end) {
10172 flush_size = entry->vme_end -
10173 (entry->vme_start + offset);
10174 } else {
10175 flush_size = amount_left;
10176 }
10177 amount_left -= flush_size;
10178 address += flush_size;
10179
10180 if (entry->is_sub_map == TRUE) {
10181 vm_map_t local_map;
10182 vm_map_offset_t local_offset;
10183
10184 local_map = entry->object.sub_map;
10185 local_offset = entry->offset;
10186 vm_map_unlock(map);
10187 if (vm_map_msync(
10188 local_map,
10189 local_offset,
10190 flush_size,
10191 sync_flags) == KERN_INVALID_ADDRESS) {
10192 had_hole = TRUE;
10193 }
10194 continue;
10195 }
10196 object = entry->object.vm_object;
10197
10198 /*
10199 * We can't sync this object if the object has not been
10200 * created yet
10201 */
10202 if (object == VM_OBJECT_NULL) {
10203 vm_map_unlock(map);
10204 continue;
10205 }
10206 offset += entry->offset;
10207 modifiable = (entry->protection & VM_PROT_WRITE)
10208 != VM_PROT_NONE;
10209
10210 vm_object_lock(object);
10211
10212 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
10213 boolean_t kill_pages = 0;
10214
10215 if (sync_flags & VM_SYNC_KILLPAGES) {
10216 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
10217 kill_pages = 1;
10218 else
10219 kill_pages = -1;
10220 }
10221 if (kill_pages != -1)
10222 vm_object_deactivate_pages(object, offset,
10223 (vm_object_size_t)flush_size, kill_pages);
10224 vm_object_unlock(object);
10225 vm_map_unlock(map);
10226 continue;
10227 }
10228 /*
10229 * We can't sync this object if there isn't a pager.
10230 * Don't bother to sync internal objects, since there can't
10231 * be any "permanent" storage for these objects anyway.
10232 */
10233 if ((object->pager == MEMORY_OBJECT_NULL) ||
10234 (object->internal) || (object->private)) {
10235 vm_object_unlock(object);
10236 vm_map_unlock(map);
10237 continue;
10238 }
10239 /*
10240 * keep reference on the object until syncing is done
10241 */
10242 assert(object->ref_count > 0);
10243 object->ref_count++;
10244 vm_object_res_reference(object);
10245 vm_object_unlock(object);
10246
10247 vm_map_unlock(map);
10248
10249 do_sync_req = vm_object_sync(object,
10250 offset,
10251 flush_size,
10252 sync_flags & VM_SYNC_INVALIDATE,
10253 (modifiable &&
10254 (sync_flags & VM_SYNC_SYNCHRONOUS ||
10255 sync_flags & VM_SYNC_ASYNCHRONOUS)),
10256 sync_flags & VM_SYNC_SYNCHRONOUS);
10257 /*
10258 * only send a m_o_s if we returned pages or if the entry
10259 * is writable (ie dirty pages may have already been sent back)
10260 */
10261 if (!do_sync_req && !modifiable) {
10262 vm_object_deallocate(object);
10263 continue;
10264 }
10265 msync_req_alloc(new_msr);
10266
10267 vm_object_lock(object);
10268 offset += object->paging_offset;
10269
10270 new_msr->offset = offset;
10271 new_msr->length = flush_size;
10272 new_msr->object = object;
10273 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
10274 re_iterate:
10275 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
10276 /*
10277 * need to check for overlapping entry, if found, wait
10278 * on overlapping msr to be done, then reiterate
10279 */
10280 msr_lock(msr);
10281 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
10282 ((offset >= msr->offset &&
10283 offset < (msr->offset + msr->length)) ||
10284 (msr->offset >= offset &&
10285 msr->offset < (offset + flush_size))))
10286 {
10287 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
10288 msr_unlock(msr);
10289 vm_object_unlock(object);
10290 thread_block(THREAD_CONTINUE_NULL);
10291 vm_object_lock(object);
10292 goto re_iterate;
10293 }
10294 msr_unlock(msr);
10295 }/* queue_iterate */
10296
10297 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
10298 vm_object_unlock(object);
10299
10300 queue_enter(&req_q, new_msr, msync_req_t, req_q);
10301
10302 (void) memory_object_synchronize(
10303 object->pager,
10304 offset,
10305 flush_size,
10306 sync_flags & ~VM_SYNC_CONTIGUOUS);
10307 }/* while */
10308
10309 /*
10310 * wait for memory_object_sychronize_completed messages from pager(s)
10311 */
10312
10313 while (!queue_empty(&req_q)) {
10314 msr = (msync_req_t)queue_first(&req_q);
10315 msr_lock(msr);
10316 while(msr->flag != VM_MSYNC_DONE) {
10317 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
10318 msr_unlock(msr);
10319 thread_block(THREAD_CONTINUE_NULL);
10320 msr_lock(msr);
10321 }/* while */
10322 queue_remove(&req_q, msr, msync_req_t, req_q);
10323 msr_unlock(msr);
10324 vm_object_deallocate(msr->object);
10325 msync_req_free(msr);
10326 }/* queue_iterate */
10327
10328 /* for proper msync() behaviour */
10329 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
10330 return(KERN_INVALID_ADDRESS);
10331
10332 return(KERN_SUCCESS);
10333 }/* vm_msync */
10334
10335 /* Takes existing source and destination sub-maps and clones the contents of */
10336 /* the source map */
10337 kern_return_t
10338 vm_region_clone(
10339 ipc_port_t src_region,
10340 ipc_port_t dst_region)
10341 {
10342 vm_named_entry_t src_object;
10343 vm_named_entry_t dst_object;
10344 vm_map_t src_map;
10345 vm_map_t dst_map;
10346 vm_map_offset_t addr;
10347 vm_map_offset_t max_off;
10348 vm_map_entry_t entry;
10349 vm_map_entry_t new_entry;
10350 vm_map_entry_t insert_point;
10351
10352 src_object = (vm_named_entry_t)src_region->ip_kobject;
10353 dst_object = (vm_named_entry_t)dst_region->ip_kobject;
10354 if((!src_object->is_sub_map) || (!dst_object->is_sub_map)) {
10355 return KERN_INVALID_ARGUMENT;
10356 }
10357 src_map = (vm_map_t)src_object->backing.map;
10358 dst_map = (vm_map_t)dst_object->backing.map;
10359 /* destination map is assumed to be unavailable to any other */
10360 /* activity. i.e. it is new */
10361 vm_map_lock(src_map);
10362 if((src_map->min_offset != dst_map->min_offset)
10363 || (src_map->max_offset != dst_map->max_offset)) {
10364 vm_map_unlock(src_map);
10365 return KERN_INVALID_ARGUMENT;
10366 }
10367 addr = src_map->min_offset;
10368 vm_map_lookup_entry(dst_map, addr, &entry);
10369 if(entry == vm_map_to_entry(dst_map)) {
10370 entry = entry->vme_next;
10371 }
10372 if(entry == vm_map_to_entry(dst_map)) {
10373 max_off = src_map->max_offset;
10374 } else {
10375 max_off = entry->vme_start;
10376 }
10377 vm_map_lookup_entry(src_map, addr, &entry);
10378 if(entry == vm_map_to_entry(src_map)) {
10379 entry = entry->vme_next;
10380 }
10381 vm_map_lookup_entry(dst_map, addr, &insert_point);
10382 while((entry != vm_map_to_entry(src_map)) &&
10383 (entry->vme_end <= max_off)) {
10384 addr = entry->vme_start;
10385 new_entry = vm_map_entry_create(dst_map);
10386 vm_map_entry_copy(new_entry, entry);
10387 vm_map_entry_link(dst_map, insert_point, new_entry);
10388 insert_point = new_entry;
10389 if (entry->object.vm_object != VM_OBJECT_NULL) {
10390 if (new_entry->is_sub_map) {
10391 vm_map_reference(new_entry->object.sub_map);
10392 } else {
10393 vm_object_reference(
10394 new_entry->object.vm_object);
10395 }
10396 }
10397 dst_map->size += new_entry->vme_end - new_entry->vme_start;
10398 entry = entry->vme_next;
10399 }
10400 vm_map_unlock(src_map);
10401 return KERN_SUCCESS;
10402 }
10403
10404 /*
10405 * Routine: convert_port_entry_to_map
10406 * Purpose:
10407 * Convert from a port specifying an entry or a task
10408 * to a map. Doesn't consume the port ref; produces a map ref,
10409 * which may be null. Unlike convert_port_to_map, the
10410 * port may be task or a named entry backed.
10411 * Conditions:
10412 * Nothing locked.
10413 */
10414
10415
10416 vm_map_t
10417 convert_port_entry_to_map(
10418 ipc_port_t port)
10419 {
10420 vm_map_t map;
10421 vm_named_entry_t named_entry;
10422
10423 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10424 while(TRUE) {
10425 ip_lock(port);
10426 if(ip_active(port) && (ip_kotype(port)
10427 == IKOT_NAMED_ENTRY)) {
10428 named_entry =
10429 (vm_named_entry_t)port->ip_kobject;
10430 if (!(mutex_try(&(named_entry)->Lock))) {
10431 ip_unlock(port);
10432 mutex_pause();
10433 continue;
10434 }
10435 named_entry->ref_count++;
10436 mutex_unlock(&(named_entry)->Lock);
10437 ip_unlock(port);
10438 if ((named_entry->is_sub_map) &&
10439 (named_entry->protection
10440 & VM_PROT_WRITE)) {
10441 map = named_entry->backing.map;
10442 } else {
10443 mach_destroy_memory_entry(port);
10444 return VM_MAP_NULL;
10445 }
10446 vm_map_reference_swap(map);
10447 mach_destroy_memory_entry(port);
10448 break;
10449 }
10450 else
10451 return VM_MAP_NULL;
10452 }
10453 }
10454 else
10455 map = convert_port_to_map(port);
10456
10457 return map;
10458 }
10459
10460 /*
10461 * Routine: convert_port_entry_to_object
10462 * Purpose:
10463 * Convert from a port specifying a named entry to an
10464 * object. Doesn't consume the port ref; produces a map ref,
10465 * which may be null.
10466 * Conditions:
10467 * Nothing locked.
10468 */
10469
10470
10471 vm_object_t
10472 convert_port_entry_to_object(
10473 ipc_port_t port)
10474 {
10475 vm_object_t object;
10476 vm_named_entry_t named_entry;
10477
10478 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10479 while(TRUE) {
10480 ip_lock(port);
10481 if(ip_active(port) && (ip_kotype(port)
10482 == IKOT_NAMED_ENTRY)) {
10483 named_entry =
10484 (vm_named_entry_t)port->ip_kobject;
10485 if (!(mutex_try(&(named_entry)->Lock))) {
10486 ip_unlock(port);
10487 mutex_pause();
10488 continue;
10489 }
10490 named_entry->ref_count++;
10491 mutex_unlock(&(named_entry)->Lock);
10492 ip_unlock(port);
10493 if ((!named_entry->is_sub_map) &&
10494 (!named_entry->is_pager) &&
10495 (named_entry->protection
10496 & VM_PROT_WRITE)) {
10497 object = named_entry->backing.object;
10498 } else {
10499 mach_destroy_memory_entry(port);
10500 return (vm_object_t)NULL;
10501 }
10502 vm_object_reference(named_entry->backing.object);
10503 mach_destroy_memory_entry(port);
10504 break;
10505 }
10506 else
10507 return (vm_object_t)NULL;
10508 }
10509 } else {
10510 return (vm_object_t)NULL;
10511 }
10512
10513 return object;
10514 }
10515
10516 /*
10517 * Export routines to other components for the things we access locally through
10518 * macros.
10519 */
10520 #undef current_map
10521 vm_map_t
10522 current_map(void)
10523 {
10524 return (current_map_fast());
10525 }
10526
10527 /*
10528 * vm_map_reference:
10529 *
10530 * Most code internal to the osfmk will go through a
10531 * macro defining this. This is always here for the
10532 * use of other kernel components.
10533 */
10534 #undef vm_map_reference
10535 void
10536 vm_map_reference(
10537 register vm_map_t map)
10538 {
10539 if (map == VM_MAP_NULL)
10540 return;
10541
10542 mutex_lock(&map->s_lock);
10543 #if TASK_SWAPPER
10544 assert(map->res_count > 0);
10545 assert(map->ref_count >= map->res_count);
10546 map->res_count++;
10547 #endif
10548 map->ref_count++;
10549 mutex_unlock(&map->s_lock);
10550 }
10551
10552 /*
10553 * vm_map_deallocate:
10554 *
10555 * Removes a reference from the specified map,
10556 * destroying it if no references remain.
10557 * The map should not be locked.
10558 */
10559 void
10560 vm_map_deallocate(
10561 register vm_map_t map)
10562 {
10563 unsigned int ref;
10564
10565 if (map == VM_MAP_NULL)
10566 return;
10567
10568 mutex_lock(&map->s_lock);
10569 ref = --map->ref_count;
10570 if (ref > 0) {
10571 vm_map_res_deallocate(map);
10572 mutex_unlock(&map->s_lock);
10573 return;
10574 }
10575 assert(map->ref_count == 0);
10576 mutex_unlock(&map->s_lock);
10577
10578 #if TASK_SWAPPER
10579 /*
10580 * The map residence count isn't decremented here because
10581 * the vm_map_delete below will traverse the entire map,
10582 * deleting entries, and the residence counts on objects
10583 * and sharing maps will go away then.
10584 */
10585 #endif
10586
10587 vm_map_destroy(map);
10588 }
10589
10590
10591 /* LP64todo - this whole mechanism is temporary. It should be redone when
10592 * the pmap layer can handle 64-bit address spaces. Until then, we trump
10593 * up a map entry for the 64-bit commpage above the map's max_offset.
10594 */
10595 extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */
10596 extern vm_map_t com_region_map32; /* the submap for 32-bit commpage */
10597
10598
10599 static void
10600 vm_map_commpage(
10601 vm_map_t user_map,
10602 vm_map_t com_region_map, /* com_region_map32 or com_region_map64 */
10603 vm_map_offset_t base_address,
10604 vm_map_size_t size)
10605 {
10606 vm_map_entry_t entry;
10607 vm_object_t object;
10608
10609 vm_map_lock(user_map);
10610
10611 /* The commpage is necessarily the last entry in the map.
10612 * See if one is already there (not sure if this can happen???)
10613 */
10614 entry = vm_map_last_entry(user_map);
10615 if (entry != vm_map_to_entry(user_map)) {
10616 if (entry->vme_end >= base_address) {
10617 vm_map_unlock(user_map);
10618 return;
10619 }
10620 }
10621
10622 entry = vm_map_first_entry(com_region_map);
10623 object = entry->object.vm_object;
10624 vm_object_reference(object);
10625
10626 /* We bypass vm_map_enter() because we are adding the entry past the
10627 * map's max_offset.
10628 */
10629 entry = vm_map_entry_insert(
10630 user_map,
10631 vm_map_last_entry(user_map), /* insert after last entry */
10632 base_address,
10633 base_address + size,
10634 object,
10635 0, /* offset */
10636 FALSE, /* needs_copy */
10637 FALSE, /* is_shared */
10638 FALSE, /* in_transition */
10639 VM_PROT_READ|VM_PROT_EXECUTE,
10640 VM_PROT_READ|VM_PROT_EXECUTE,
10641 VM_BEHAVIOR_DEFAULT,
10642 VM_INHERIT_NONE,
10643 1 ); /* wired_count */
10644
10645 vm_map_unlock(user_map);
10646 }
10647
10648 #ifdef __i386__
10649 void
10650 vm_map_commpage32(
10651 vm_map_t map)
10652 {
10653 vm_map_commpage(map,
10654 com_region_map32,
10655 (vm_map_offset_t) (unsigned) _COMM_PAGE32_BASE_ADDRESS,
10656 (vm_map_size_t) (unsigned) _COMM_PAGE32_AREA_USED);
10657 }
10658 #endif /* __i386__ */
10659
10660
10661
10662 void
10663 vm_map_commpage64(
10664 vm_map_t map)
10665 {
10666
10667 vm_map_commpage(map,
10668 com_region_map64,
10669 (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS,
10670 (vm_map_size_t) _COMM_PAGE64_AREA_USED);
10671 }
10672
10673 void
10674 vm_map_remove_commpage(
10675 vm_map_t map )
10676 {
10677 vm_map_entry_t entry;
10678
10679 while( 1 ) {
10680 vm_map_lock(map);
10681
10682 entry = vm_map_last_entry(map);
10683
10684 if ((entry == vm_map_to_entry(map)) ||
10685 (entry->vme_start < map->max_offset))
10686 break;
10687
10688 /* clearing the wired count isn't strictly correct */
10689 entry->wired_count = 0;
10690 vm_map_entry_delete(map,entry);
10691 }
10692
10693 vm_map_unlock(map);
10694 }
10695
10696 void
10697 vm_map_disable_NX(vm_map_t map)
10698 {
10699 if (map == NULL)
10700 return;
10701 if (map->pmap == NULL)
10702 return;
10703
10704 pmap_disable_NX(map->pmap);
10705 }
10706
10707 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
10708 * more descriptive.
10709 */
10710 void
10711 vm_map_set_32bit(vm_map_t map)
10712 {
10713 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
10714 }
10715
10716
10717 void
10718 vm_map_set_64bit(vm_map_t map)
10719 {
10720 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
10721 }
10722
10723 vm_map_offset_t
10724 vm_compute_max_offset(unsigned is64)
10725 {
10726 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
10727 }
10728
10729 boolean_t
10730 vm_map_has_4GB_pagezero(vm_map_t map)
10731 {
10732 /*
10733 * XXX FBDP
10734 * We should lock the VM map (for read) here but we can get away
10735 * with it for now because there can't really be any race condition:
10736 * the VM map's min_offset is changed only when the VM map is created
10737 * and when the zero page is established (when the binary gets loaded),
10738 * and this routine gets called only when the task terminates and the
10739 * VM map is being torn down, and when a new map is created via
10740 * load_machfile()/execve().
10741 */
10742 return (map->min_offset >= 0x100000000ULL);
10743 }
10744
10745 void
10746 vm_map_set_4GB_pagezero(vm_map_t map)
10747 {
10748 pmap_set_4GB_pagezero(map->pmap);
10749 }
10750
10751 void
10752 vm_map_clear_4GB_pagezero(vm_map_t map)
10753 {
10754 pmap_clear_4GB_pagezero(map->pmap);
10755 }
10756
10757 /*
10758 * Raise a VM map's minimum offset.
10759 * To strictly enforce "page zero" reservation.
10760 */
10761 kern_return_t
10762 vm_map_raise_min_offset(
10763 vm_map_t map,
10764 vm_map_offset_t new_min_offset)
10765 {
10766 vm_map_entry_t first_entry;
10767
10768 new_min_offset = vm_map_round_page(new_min_offset);
10769
10770 vm_map_lock(map);
10771
10772 if (new_min_offset < map->min_offset) {
10773 /*
10774 * Can't move min_offset backwards, as that would expose
10775 * a part of the address space that was previously, and for
10776 * possibly good reasons, inaccessible.
10777 */
10778 vm_map_unlock(map);
10779 return KERN_INVALID_ADDRESS;
10780 }
10781
10782 first_entry = vm_map_first_entry(map);
10783 if (first_entry != vm_map_to_entry(map) &&
10784 first_entry->vme_start < new_min_offset) {
10785 /*
10786 * Some memory was already allocated below the new
10787 * minimun offset. It's too late to change it now...
10788 */
10789 vm_map_unlock(map);
10790 return KERN_NO_SPACE;
10791 }
10792
10793 map->min_offset = new_min_offset;
10794
10795 vm_map_unlock(map);
10796
10797 return KERN_SUCCESS;
10798 }