]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
42ef2f13b3b072a71261679140f8b8417c8421e6
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * @OSF_COPYRIGHT@
32 */
33 /*
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
37 *
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
43 *
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 *
48 * Carnegie Mellon requests users of this software to return to
49 *
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
54 *
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
57 */
58 /*
59 */
60 /*
61 * File: vm/vm_map.c
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Date: 1985
64 *
65 * Virtual memory mapping module.
66 */
67
68 #include <task_swapper.h>
69 #include <mach_assert.h>
70 #include <libkern/OSAtomic.h>
71
72 #include <mach/kern_return.h>
73 #include <mach/port.h>
74 #include <mach/vm_attributes.h>
75 #include <mach/vm_param.h>
76 #include <mach/vm_behavior.h>
77 #include <mach/vm_statistics.h>
78 #include <mach/memory_object.h>
79 #include <mach/mach_vm.h>
80 #include <machine/cpu_capabilities.h>
81
82 #include <kern/assert.h>
83 #include <kern/counters.h>
84 #include <kern/kalloc.h>
85 #include <kern/zalloc.h>
86
87 #include <vm/cpm.h>
88 #include <vm/vm_init.h>
89 #include <vm/vm_fault.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_object.h>
92 #include <vm/vm_page.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <ddb/tr.h>
98 #include <machine/db_machdep.h>
99 #include <kern/xpr.h>
100
101 #include <mach/vm_map_server.h>
102 #include <mach/mach_host_server.h>
103 #include <vm/vm_shared_memory_server.h>
104 #include <vm/vm_protos.h> // for vm_map_commpage64 and vm_map_remove_compage64
105
106 #ifdef ppc
107 #include <ppc/mappings.h>
108 #endif /* ppc */
109
110 #include <vm/vm_protos.h>
111
112 /* Internal prototypes
113 */
114
115 static void vm_map_simplify_range(
116 vm_map_t map,
117 vm_map_offset_t start,
118 vm_map_offset_t end); /* forward */
119
120 static boolean_t vm_map_range_check(
121 vm_map_t map,
122 vm_map_offset_t start,
123 vm_map_offset_t end,
124 vm_map_entry_t *entry);
125
126 static vm_map_entry_t _vm_map_entry_create(
127 struct vm_map_header *map_header);
128
129 static void _vm_map_entry_dispose(
130 struct vm_map_header *map_header,
131 vm_map_entry_t entry);
132
133 static void vm_map_pmap_enter(
134 vm_map_t map,
135 vm_map_offset_t addr,
136 vm_map_offset_t end_addr,
137 vm_object_t object,
138 vm_object_offset_t offset,
139 vm_prot_t protection);
140
141 static void _vm_map_clip_end(
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry,
144 vm_map_offset_t end);
145
146 static void _vm_map_clip_start(
147 struct vm_map_header *map_header,
148 vm_map_entry_t entry,
149 vm_map_offset_t start);
150
151 static void vm_map_entry_delete(
152 vm_map_t map,
153 vm_map_entry_t entry);
154
155 static kern_return_t vm_map_delete(
156 vm_map_t map,
157 vm_map_offset_t start,
158 vm_map_offset_t end,
159 int flags,
160 vm_map_t zap_map);
161
162 static kern_return_t vm_map_copy_overwrite_unaligned(
163 vm_map_t dst_map,
164 vm_map_entry_t entry,
165 vm_map_copy_t copy,
166 vm_map_address_t start);
167
168 static kern_return_t vm_map_copy_overwrite_aligned(
169 vm_map_t dst_map,
170 vm_map_entry_t tmp_entry,
171 vm_map_copy_t copy,
172 vm_map_offset_t start,
173 pmap_t pmap);
174
175 static kern_return_t vm_map_copyin_kernel_buffer(
176 vm_map_t src_map,
177 vm_map_address_t src_addr,
178 vm_map_size_t len,
179 boolean_t src_destroy,
180 vm_map_copy_t *copy_result); /* OUT */
181
182 static kern_return_t vm_map_copyout_kernel_buffer(
183 vm_map_t map,
184 vm_map_address_t *addr, /* IN/OUT */
185 vm_map_copy_t copy,
186 boolean_t overwrite);
187
188 static void vm_map_fork_share(
189 vm_map_t old_map,
190 vm_map_entry_t old_entry,
191 vm_map_t new_map);
192
193 static boolean_t vm_map_fork_copy(
194 vm_map_t old_map,
195 vm_map_entry_t *old_entry_p,
196 vm_map_t new_map);
197
198 void vm_map_region_top_walk(
199 vm_map_entry_t entry,
200 vm_region_top_info_t top);
201
202 void vm_map_region_walk(
203 vm_map_t map,
204 vm_map_offset_t va,
205 vm_map_entry_t entry,
206 vm_object_offset_t offset,
207 vm_object_size_t range,
208 vm_region_extended_info_t extended);
209
210 static kern_return_t vm_map_wire_nested(
211 vm_map_t map,
212 vm_map_offset_t start,
213 vm_map_offset_t end,
214 vm_prot_t access_type,
215 boolean_t user_wire,
216 pmap_t map_pmap,
217 vm_map_offset_t pmap_addr);
218
219 static kern_return_t vm_map_unwire_nested(
220 vm_map_t map,
221 vm_map_offset_t start,
222 vm_map_offset_t end,
223 boolean_t user_wire,
224 pmap_t map_pmap,
225 vm_map_offset_t pmap_addr);
226
227 static kern_return_t vm_map_overwrite_submap_recurse(
228 vm_map_t dst_map,
229 vm_map_offset_t dst_addr,
230 vm_map_size_t dst_size);
231
232 static kern_return_t vm_map_copy_overwrite_nested(
233 vm_map_t dst_map,
234 vm_map_offset_t dst_addr,
235 vm_map_copy_t copy,
236 boolean_t interruptible,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_remap_extract(
240 vm_map_t map,
241 vm_map_offset_t addr,
242 vm_map_size_t size,
243 boolean_t copy,
244 struct vm_map_header *map_header,
245 vm_prot_t *cur_protection,
246 vm_prot_t *max_protection,
247 vm_inherit_t inheritance,
248 boolean_t pageable);
249
250 static kern_return_t vm_map_remap_range_allocate(
251 vm_map_t map,
252 vm_map_address_t *address,
253 vm_map_size_t size,
254 vm_map_offset_t mask,
255 boolean_t anywhere,
256 vm_map_entry_t *map_entry);
257
258 static void vm_map_region_look_for_page(
259 vm_map_t map,
260 vm_map_offset_t va,
261 vm_object_t object,
262 vm_object_offset_t offset,
263 int max_refcnt,
264 int depth,
265 vm_region_extended_info_t extended);
266
267 static int vm_map_region_count_obj_refs(
268 vm_map_entry_t entry,
269 vm_object_t object);
270
271 /*
272 * Macros to copy a vm_map_entry. We must be careful to correctly
273 * manage the wired page count. vm_map_entry_copy() creates a new
274 * map entry to the same memory - the wired count in the new entry
275 * must be set to zero. vm_map_entry_copy_full() creates a new
276 * entry that is identical to the old entry. This preserves the
277 * wire count; it's used for map splitting and zone changing in
278 * vm_map_copyout.
279 */
280 #define vm_map_entry_copy(NEW,OLD) \
281 MACRO_BEGIN \
282 *(NEW) = *(OLD); \
283 (NEW)->is_shared = FALSE; \
284 (NEW)->needs_wakeup = FALSE; \
285 (NEW)->in_transition = FALSE; \
286 (NEW)->wired_count = 0; \
287 (NEW)->user_wired_count = 0; \
288 MACRO_END
289
290 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
291
292 /*
293 * Virtual memory maps provide for the mapping, protection,
294 * and sharing of virtual memory objects. In addition,
295 * this module provides for an efficient virtual copy of
296 * memory from one map to another.
297 *
298 * Synchronization is required prior to most operations.
299 *
300 * Maps consist of an ordered doubly-linked list of simple
301 * entries; a single hint is used to speed up lookups.
302 *
303 * Sharing maps have been deleted from this version of Mach.
304 * All shared objects are now mapped directly into the respective
305 * maps. This requires a change in the copy on write strategy;
306 * the asymmetric (delayed) strategy is used for shared temporary
307 * objects instead of the symmetric (shadow) strategy. All maps
308 * are now "top level" maps (either task map, kernel map or submap
309 * of the kernel map).
310 *
311 * Since portions of maps are specified by start/end addreses,
312 * which may not align with existing map entries, all
313 * routines merely "clip" entries to these start/end values.
314 * [That is, an entry is split into two, bordering at a
315 * start or end value.] Note that these clippings may not
316 * always be necessary (as the two resulting entries are then
317 * not changed); however, the clipping is done for convenience.
318 * No attempt is currently made to "glue back together" two
319 * abutting entries.
320 *
321 * The symmetric (shadow) copy strategy implements virtual copy
322 * by copying VM object references from one map to
323 * another, and then marking both regions as copy-on-write.
324 * It is important to note that only one writeable reference
325 * to a VM object region exists in any map when this strategy
326 * is used -- this means that shadow object creation can be
327 * delayed until a write operation occurs. The symmetric (delayed)
328 * strategy allows multiple maps to have writeable references to
329 * the same region of a vm object, and hence cannot delay creating
330 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
331 * Copying of permanent objects is completely different; see
332 * vm_object_copy_strategically() in vm_object.c.
333 */
334
335 static zone_t vm_map_zone; /* zone for vm_map structures */
336 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
337 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
338 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
339
340
341 /*
342 * Placeholder object for submap operations. This object is dropped
343 * into the range by a call to vm_map_find, and removed when
344 * vm_map_submap creates the submap.
345 */
346
347 vm_object_t vm_submap_object;
348
349 /*
350 * vm_map_init:
351 *
352 * Initialize the vm_map module. Must be called before
353 * any other vm_map routines.
354 *
355 * Map and entry structures are allocated from zones -- we must
356 * initialize those zones.
357 *
358 * There are three zones of interest:
359 *
360 * vm_map_zone: used to allocate maps.
361 * vm_map_entry_zone: used to allocate map entries.
362 * vm_map_kentry_zone: used to allocate map entries for the kernel.
363 *
364 * The kernel allocates map entries from a special zone that is initially
365 * "crammed" with memory. It would be difficult (perhaps impossible) for
366 * the kernel to allocate more memory to a entry zone when it became
367 * empty since the very act of allocating memory implies the creation
368 * of a new entry.
369 */
370
371 static void *map_data;
372 static vm_map_size_t map_data_size;
373 static void *kentry_data;
374 static vm_map_size_t kentry_data_size;
375 static int kentry_count = 2048; /* to init kentry_data_size */
376
377 #define NO_COALESCE_LIMIT (1024 * 128)
378
379 /*
380 * Threshold for aggressive (eager) page map entering for vm copyout
381 * operations. Any copyout larger will NOT be aggressively entered.
382 */
383 static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */
384
385 /* Skip acquiring locks if we're in the midst of a kernel core dump */
386 extern unsigned int not_in_kdp;
387
388 #ifdef __i386__
389 kern_return_t
390 vm_map_apple_protected(
391 vm_map_t map,
392 vm_map_offset_t start,
393 vm_map_offset_t end)
394 {
395 boolean_t map_locked;
396 kern_return_t kr;
397 vm_map_entry_t map_entry;
398 memory_object_t protected_mem_obj;
399 vm_object_t protected_object;
400 vm_map_offset_t map_addr;
401
402 vm_map_lock_read(map);
403 map_locked = TRUE;
404
405 /* lookup the protected VM object */
406 if (!vm_map_lookup_entry(map,
407 start,
408 &map_entry) ||
409 map_entry->vme_end != end ||
410 map_entry->is_sub_map) {
411 /* that memory is not properly mapped */
412 kr = KERN_INVALID_ARGUMENT;
413 goto done;
414 }
415 protected_object = map_entry->object.vm_object;
416 if (protected_object == VM_OBJECT_NULL) {
417 /* there should be a VM object here at this point */
418 kr = KERN_INVALID_ARGUMENT;
419 goto done;
420 }
421
422 /*
423 * Lookup (and create if necessary) the protected memory object
424 * matching that VM object.
425 * If successful, this also grabs a reference on the memory object,
426 * to guarantee that it doesn't go away before we get a chance to map
427 * it.
428 */
429
430 protected_mem_obj = apple_protect_pager_setup(protected_object);
431 if (protected_mem_obj == NULL) {
432 kr = KERN_FAILURE;
433 goto done;
434 }
435
436 vm_map_unlock_read(map);
437 map_locked = FALSE;
438
439 /* map this memory object in place of the current one */
440 map_addr = start;
441 kr = mach_vm_map(map,
442 &map_addr,
443 end - start,
444 (mach_vm_offset_t) 0,
445 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
446 (ipc_port_t) protected_mem_obj,
447 map_entry->offset + (start - map_entry->vme_start),
448 TRUE,
449 map_entry->protection,
450 map_entry->max_protection,
451 map_entry->inheritance);
452 assert(map_addr == start);
453 if (kr == KERN_SUCCESS) {
454 /* let the pager know that this mem_obj is mapped */
455 apple_protect_pager_map(protected_mem_obj);
456 }
457 /*
458 * Release the reference obtained by apple_protect_pager_setup().
459 * The mapping (if it succeeded) is now holding a reference on the
460 * memory object.
461 */
462 memory_object_deallocate(protected_mem_obj);
463
464 done:
465 if (map_locked) {
466 vm_map_unlock_read(map);
467 }
468 return kr;
469 }
470 #endif /* __i386__ */
471
472
473 void
474 vm_map_init(
475 void)
476 {
477 vm_map_zone = zinit((vm_map_size_t) sizeof(struct vm_map), 40*1024,
478 PAGE_SIZE, "maps");
479
480 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
481 1024*1024, PAGE_SIZE*5,
482 "non-kernel map entries");
483
484 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
485 kentry_data_size, kentry_data_size,
486 "kernel map entries");
487
488 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
489 16*1024, PAGE_SIZE, "map copies");
490
491 /*
492 * Cram the map and kentry zones with initial data.
493 * Set kentry_zone non-collectible to aid zone_gc().
494 */
495 zone_change(vm_map_zone, Z_COLLECT, FALSE);
496 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
497 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
498 zcram(vm_map_zone, map_data, map_data_size);
499 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
500 }
501
502 void
503 vm_map_steal_memory(
504 void)
505 {
506 map_data_size = vm_map_round_page(10 * sizeof(struct vm_map));
507 map_data = pmap_steal_memory(map_data_size);
508
509 #if 0
510 /*
511 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
512 * physical page (i.e. that beyond the kernel image and page tables)
513 * individually; we guess at most one entry per eight pages in the
514 * real world. This works out to roughly .1 of 1% of physical memory,
515 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
516 */
517 #endif
518 kentry_count = pmap_free_pages() / 8;
519
520
521 kentry_data_size =
522 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
523 kentry_data = pmap_steal_memory(kentry_data_size);
524 }
525
526 /*
527 * vm_map_create:
528 *
529 * Creates and returns a new empty VM map with
530 * the given physical map structure, and having
531 * the given lower and upper address bounds.
532 */
533 vm_map_t
534 vm_map_create(
535 pmap_t pmap,
536 vm_map_offset_t min,
537 vm_map_offset_t max,
538 boolean_t pageable)
539 {
540 register vm_map_t result;
541
542 result = (vm_map_t) zalloc(vm_map_zone);
543 if (result == VM_MAP_NULL)
544 panic("vm_map_create");
545
546 vm_map_first_entry(result) = vm_map_to_entry(result);
547 vm_map_last_entry(result) = vm_map_to_entry(result);
548 result->hdr.nentries = 0;
549 result->hdr.entries_pageable = pageable;
550
551 result->size = 0;
552 result->ref_count = 1;
553 #if TASK_SWAPPER
554 result->res_count = 1;
555 result->sw_state = MAP_SW_IN;
556 #endif /* TASK_SWAPPER */
557 result->pmap = pmap;
558 result->min_offset = min;
559 result->max_offset = max;
560 result->wiring_required = FALSE;
561 result->no_zero_fill = FALSE;
562 result->mapped = FALSE;
563 result->wait_for_space = FALSE;
564 result->first_free = vm_map_to_entry(result);
565 result->hint = vm_map_to_entry(result);
566 vm_map_lock_init(result);
567 mutex_init(&result->s_lock, 0);
568
569 return(result);
570 }
571
572 /*
573 * vm_map_entry_create: [ internal use only ]
574 *
575 * Allocates a VM map entry for insertion in the
576 * given map (or map copy). No fields are filled.
577 */
578 #define vm_map_entry_create(map) \
579 _vm_map_entry_create(&(map)->hdr)
580
581 #define vm_map_copy_entry_create(copy) \
582 _vm_map_entry_create(&(copy)->cpy_hdr)
583
584 static vm_map_entry_t
585 _vm_map_entry_create(
586 register struct vm_map_header *map_header)
587 {
588 register zone_t zone;
589 register vm_map_entry_t entry;
590
591 if (map_header->entries_pageable)
592 zone = vm_map_entry_zone;
593 else
594 zone = vm_map_kentry_zone;
595
596 entry = (vm_map_entry_t) zalloc(zone);
597 if (entry == VM_MAP_ENTRY_NULL)
598 panic("vm_map_entry_create");
599
600 return(entry);
601 }
602
603 /*
604 * vm_map_entry_dispose: [ internal use only ]
605 *
606 * Inverse of vm_map_entry_create.
607 */
608 #define vm_map_entry_dispose(map, entry) \
609 MACRO_BEGIN \
610 if((entry) == (map)->first_free) \
611 (map)->first_free = vm_map_to_entry(map); \
612 if((entry) == (map)->hint) \
613 (map)->hint = vm_map_to_entry(map); \
614 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
615 MACRO_END
616
617 #define vm_map_copy_entry_dispose(map, entry) \
618 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
619
620 static void
621 _vm_map_entry_dispose(
622 register struct vm_map_header *map_header,
623 register vm_map_entry_t entry)
624 {
625 register zone_t zone;
626
627 if (map_header->entries_pageable)
628 zone = vm_map_entry_zone;
629 else
630 zone = vm_map_kentry_zone;
631
632 zfree(zone, entry);
633 }
634
635 #if MACH_ASSERT
636 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
637 static boolean_t first_free_check = FALSE;
638 static boolean_t
639 first_free_is_valid(
640 vm_map_t map)
641 {
642 vm_map_entry_t entry, next;
643
644 if (!first_free_check)
645 return TRUE;
646
647 entry = vm_map_to_entry(map);
648 next = entry->vme_next;
649 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
650 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
651 next != vm_map_to_entry(map))) {
652 entry = next;
653 next = entry->vme_next;
654 if (entry == vm_map_to_entry(map))
655 break;
656 }
657 if (map->first_free != entry) {
658 printf("Bad first_free for map 0x%x: 0x%x should be 0x%x\n",
659 map, map->first_free, entry);
660 return FALSE;
661 }
662 return TRUE;
663 }
664 #endif /* MACH_ASSERT */
665
666 /*
667 * UPDATE_FIRST_FREE:
668 *
669 * Updates the map->first_free pointer to the
670 * entry immediately before the first hole in the map.
671 * The map should be locked.
672 */
673 #define UPDATE_FIRST_FREE(map, new_first_free) \
674 MACRO_BEGIN \
675 vm_map_t UFF_map; \
676 vm_map_entry_t UFF_first_free; \
677 vm_map_entry_t UFF_next_entry; \
678 UFF_map = (map); \
679 UFF_first_free = (new_first_free); \
680 UFF_next_entry = UFF_first_free->vme_next; \
681 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
682 vm_map_trunc_page(UFF_first_free->vme_end) || \
683 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
684 vm_map_trunc_page(UFF_first_free->vme_start) && \
685 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
686 UFF_first_free = UFF_next_entry; \
687 UFF_next_entry = UFF_first_free->vme_next; \
688 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
689 break; \
690 } \
691 UFF_map->first_free = UFF_first_free; \
692 assert(first_free_is_valid(UFF_map)); \
693 MACRO_END
694
695 /*
696 * vm_map_entry_{un,}link:
697 *
698 * Insert/remove entries from maps (or map copies).
699 */
700 #define vm_map_entry_link(map, after_where, entry) \
701 MACRO_BEGIN \
702 vm_map_t VMEL_map; \
703 vm_map_entry_t VMEL_entry; \
704 VMEL_map = (map); \
705 VMEL_entry = (entry); \
706 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
707 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
708 MACRO_END
709
710
711 #define vm_map_copy_entry_link(copy, after_where, entry) \
712 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
713
714 #define _vm_map_entry_link(hdr, after_where, entry) \
715 MACRO_BEGIN \
716 (hdr)->nentries++; \
717 (entry)->vme_prev = (after_where); \
718 (entry)->vme_next = (after_where)->vme_next; \
719 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
720 MACRO_END
721
722 #define vm_map_entry_unlink(map, entry) \
723 MACRO_BEGIN \
724 vm_map_t VMEU_map; \
725 vm_map_entry_t VMEU_entry; \
726 vm_map_entry_t VMEU_first_free; \
727 VMEU_map = (map); \
728 VMEU_entry = (entry); \
729 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
730 VMEU_first_free = VMEU_entry->vme_prev; \
731 else \
732 VMEU_first_free = VMEU_map->first_free; \
733 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
734 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
735 MACRO_END
736
737 #define vm_map_copy_entry_unlink(copy, entry) \
738 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
739
740 #define _vm_map_entry_unlink(hdr, entry) \
741 MACRO_BEGIN \
742 (hdr)->nentries--; \
743 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
744 (entry)->vme_prev->vme_next = (entry)->vme_next; \
745 MACRO_END
746
747 #if MACH_ASSERT && TASK_SWAPPER
748 /*
749 * vm_map_res_reference:
750 *
751 * Adds another valid residence count to the given map.
752 *
753 * Map is locked so this function can be called from
754 * vm_map_swapin.
755 *
756 */
757 void vm_map_res_reference(register vm_map_t map)
758 {
759 /* assert map is locked */
760 assert(map->res_count >= 0);
761 assert(map->ref_count >= map->res_count);
762 if (map->res_count == 0) {
763 mutex_unlock(&map->s_lock);
764 vm_map_lock(map);
765 vm_map_swapin(map);
766 mutex_lock(&map->s_lock);
767 ++map->res_count;
768 vm_map_unlock(map);
769 } else
770 ++map->res_count;
771 }
772
773 /*
774 * vm_map_reference_swap:
775 *
776 * Adds valid reference and residence counts to the given map.
777 *
778 * The map may not be in memory (i.e. zero residence count).
779 *
780 */
781 void vm_map_reference_swap(register vm_map_t map)
782 {
783 assert(map != VM_MAP_NULL);
784 mutex_lock(&map->s_lock);
785 assert(map->res_count >= 0);
786 assert(map->ref_count >= map->res_count);
787 map->ref_count++;
788 vm_map_res_reference(map);
789 mutex_unlock(&map->s_lock);
790 }
791
792 /*
793 * vm_map_res_deallocate:
794 *
795 * Decrement residence count on a map; possibly causing swapout.
796 *
797 * The map must be in memory (i.e. non-zero residence count).
798 *
799 * The map is locked, so this function is callable from vm_map_deallocate.
800 *
801 */
802 void vm_map_res_deallocate(register vm_map_t map)
803 {
804 assert(map->res_count > 0);
805 if (--map->res_count == 0) {
806 mutex_unlock(&map->s_lock);
807 vm_map_lock(map);
808 vm_map_swapout(map);
809 vm_map_unlock(map);
810 mutex_lock(&map->s_lock);
811 }
812 assert(map->ref_count >= map->res_count);
813 }
814 #endif /* MACH_ASSERT && TASK_SWAPPER */
815
816 /*
817 * vm_map_destroy:
818 *
819 * Actually destroy a map.
820 */
821 void
822 vm_map_destroy(
823 register vm_map_t map)
824 {
825 vm_map_lock(map);
826 (void) vm_map_delete(map, map->min_offset,
827 map->max_offset, VM_MAP_NO_FLAGS,
828 VM_MAP_NULL);
829 vm_map_unlock(map);
830
831 if (map->hdr.nentries!=0)
832 vm_map_remove_commpage(map);
833
834 // assert(map->hdr.nentries==0);
835 // if(map->hdr.nentries) { /* (BRINGUP) */
836 // panic("vm_map_destroy: hdr.nentries is not 0 (%d) in map %08X\n", map->hdr.nentries, map);
837 // }
838
839 if(map->pmap)
840 pmap_destroy(map->pmap);
841
842 zfree(vm_map_zone, map);
843 }
844
845 #if TASK_SWAPPER
846 /*
847 * vm_map_swapin/vm_map_swapout
848 *
849 * Swap a map in and out, either referencing or releasing its resources.
850 * These functions are internal use only; however, they must be exported
851 * because they may be called from macros, which are exported.
852 *
853 * In the case of swapout, there could be races on the residence count,
854 * so if the residence count is up, we return, assuming that a
855 * vm_map_deallocate() call in the near future will bring us back.
856 *
857 * Locking:
858 * -- We use the map write lock for synchronization among races.
859 * -- The map write lock, and not the simple s_lock, protects the
860 * swap state of the map.
861 * -- If a map entry is a share map, then we hold both locks, in
862 * hierarchical order.
863 *
864 * Synchronization Notes:
865 * 1) If a vm_map_swapin() call happens while swapout in progress, it
866 * will block on the map lock and proceed when swapout is through.
867 * 2) A vm_map_reference() call at this time is illegal, and will
868 * cause a panic. vm_map_reference() is only allowed on resident
869 * maps, since it refuses to block.
870 * 3) A vm_map_swapin() call during a swapin will block, and
871 * proceeed when the first swapin is done, turning into a nop.
872 * This is the reason the res_count is not incremented until
873 * after the swapin is complete.
874 * 4) There is a timing hole after the checks of the res_count, before
875 * the map lock is taken, during which a swapin may get the lock
876 * before a swapout about to happen. If this happens, the swapin
877 * will detect the state and increment the reference count, causing
878 * the swapout to be a nop, thereby delaying it until a later
879 * vm_map_deallocate. If the swapout gets the lock first, then
880 * the swapin will simply block until the swapout is done, and
881 * then proceed.
882 *
883 * Because vm_map_swapin() is potentially an expensive operation, it
884 * should be used with caution.
885 *
886 * Invariants:
887 * 1) A map with a residence count of zero is either swapped, or
888 * being swapped.
889 * 2) A map with a non-zero residence count is either resident,
890 * or being swapped in.
891 */
892
893 int vm_map_swap_enable = 1;
894
895 void vm_map_swapin (vm_map_t map)
896 {
897 register vm_map_entry_t entry;
898
899 if (!vm_map_swap_enable) /* debug */
900 return;
901
902 /*
903 * Map is locked
904 * First deal with various races.
905 */
906 if (map->sw_state == MAP_SW_IN)
907 /*
908 * we raced with swapout and won. Returning will incr.
909 * the res_count, turning the swapout into a nop.
910 */
911 return;
912
913 /*
914 * The residence count must be zero. If we raced with another
915 * swapin, the state would have been IN; if we raced with a
916 * swapout (after another competing swapin), we must have lost
917 * the race to get here (see above comment), in which case
918 * res_count is still 0.
919 */
920 assert(map->res_count == 0);
921
922 /*
923 * There are no intermediate states of a map going out or
924 * coming in, since the map is locked during the transition.
925 */
926 assert(map->sw_state == MAP_SW_OUT);
927
928 /*
929 * We now operate upon each map entry. If the entry is a sub-
930 * or share-map, we call vm_map_res_reference upon it.
931 * If the entry is an object, we call vm_object_res_reference
932 * (this may iterate through the shadow chain).
933 * Note that we hold the map locked the entire time,
934 * even if we get back here via a recursive call in
935 * vm_map_res_reference.
936 */
937 entry = vm_map_first_entry(map);
938
939 while (entry != vm_map_to_entry(map)) {
940 if (entry->object.vm_object != VM_OBJECT_NULL) {
941 if (entry->is_sub_map) {
942 vm_map_t lmap = entry->object.sub_map;
943 mutex_lock(&lmap->s_lock);
944 vm_map_res_reference(lmap);
945 mutex_unlock(&lmap->s_lock);
946 } else {
947 vm_object_t object = entry->object.vm_object;
948 vm_object_lock(object);
949 /*
950 * This call may iterate through the
951 * shadow chain.
952 */
953 vm_object_res_reference(object);
954 vm_object_unlock(object);
955 }
956 }
957 entry = entry->vme_next;
958 }
959 assert(map->sw_state == MAP_SW_OUT);
960 map->sw_state = MAP_SW_IN;
961 }
962
963 void vm_map_swapout(vm_map_t map)
964 {
965 register vm_map_entry_t entry;
966
967 /*
968 * Map is locked
969 * First deal with various races.
970 * If we raced with a swapin and lost, the residence count
971 * will have been incremented to 1, and we simply return.
972 */
973 mutex_lock(&map->s_lock);
974 if (map->res_count != 0) {
975 mutex_unlock(&map->s_lock);
976 return;
977 }
978 mutex_unlock(&map->s_lock);
979
980 /*
981 * There are no intermediate states of a map going out or
982 * coming in, since the map is locked during the transition.
983 */
984 assert(map->sw_state == MAP_SW_IN);
985
986 if (!vm_map_swap_enable)
987 return;
988
989 /*
990 * We now operate upon each map entry. If the entry is a sub-
991 * or share-map, we call vm_map_res_deallocate upon it.
992 * If the entry is an object, we call vm_object_res_deallocate
993 * (this may iterate through the shadow chain).
994 * Note that we hold the map locked the entire time,
995 * even if we get back here via a recursive call in
996 * vm_map_res_deallocate.
997 */
998 entry = vm_map_first_entry(map);
999
1000 while (entry != vm_map_to_entry(map)) {
1001 if (entry->object.vm_object != VM_OBJECT_NULL) {
1002 if (entry->is_sub_map) {
1003 vm_map_t lmap = entry->object.sub_map;
1004 mutex_lock(&lmap->s_lock);
1005 vm_map_res_deallocate(lmap);
1006 mutex_unlock(&lmap->s_lock);
1007 } else {
1008 vm_object_t object = entry->object.vm_object;
1009 vm_object_lock(object);
1010 /*
1011 * This call may take a long time,
1012 * since it could actively push
1013 * out pages (if we implement it
1014 * that way).
1015 */
1016 vm_object_res_deallocate(object);
1017 vm_object_unlock(object);
1018 }
1019 }
1020 entry = entry->vme_next;
1021 }
1022 assert(map->sw_state == MAP_SW_IN);
1023 map->sw_state = MAP_SW_OUT;
1024 }
1025
1026 #endif /* TASK_SWAPPER */
1027
1028
1029 /*
1030 * SAVE_HINT_MAP_READ:
1031 *
1032 * Saves the specified entry as the hint for
1033 * future lookups. only a read lock is held on map,
1034 * so make sure the store is atomic... OSCompareAndSwap
1035 * guarantees this... also, we don't care if we collide
1036 * and someone else wins and stores their 'hint'
1037 */
1038 #define SAVE_HINT_MAP_READ(map,value) \
1039 MACRO_BEGIN \
1040 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1041 MACRO_END
1042
1043
1044 /*
1045 * SAVE_HINT_MAP_WRITE:
1046 *
1047 * Saves the specified entry as the hint for
1048 * future lookups. write lock held on map,
1049 * so no one else can be writing or looking
1050 * until the lock is dropped, so it's safe
1051 * to just do an assignment
1052 */
1053 #define SAVE_HINT_MAP_WRITE(map,value) \
1054 MACRO_BEGIN \
1055 (map)->hint = (value); \
1056 MACRO_END
1057
1058 /*
1059 * vm_map_lookup_entry: [ internal use only ]
1060 *
1061 * Finds the map entry containing (or
1062 * immediately preceding) the specified address
1063 * in the given map; the entry is returned
1064 * in the "entry" parameter. The boolean
1065 * result indicates whether the address is
1066 * actually contained in the map.
1067 */
1068 boolean_t
1069 vm_map_lookup_entry(
1070 register vm_map_t map,
1071 register vm_map_offset_t address,
1072 vm_map_entry_t *entry) /* OUT */
1073 {
1074 register vm_map_entry_t cur;
1075 register vm_map_entry_t last;
1076
1077 /*
1078 * Start looking either from the head of the
1079 * list, or from the hint.
1080 */
1081 cur = map->hint;
1082
1083 if (cur == vm_map_to_entry(map))
1084 cur = cur->vme_next;
1085
1086 if (address >= cur->vme_start) {
1087 /*
1088 * Go from hint to end of list.
1089 *
1090 * But first, make a quick check to see if
1091 * we are already looking at the entry we
1092 * want (which is usually the case).
1093 * Note also that we don't need to save the hint
1094 * here... it is the same hint (unless we are
1095 * at the header, in which case the hint didn't
1096 * buy us anything anyway).
1097 */
1098 last = vm_map_to_entry(map);
1099 if ((cur != last) && (cur->vme_end > address)) {
1100 *entry = cur;
1101 return(TRUE);
1102 }
1103 }
1104 else {
1105 /*
1106 * Go from start to hint, *inclusively*
1107 */
1108 last = cur->vme_next;
1109 cur = vm_map_first_entry(map);
1110 }
1111
1112 /*
1113 * Search linearly
1114 */
1115
1116 while (cur != last) {
1117 if (cur->vme_end > address) {
1118 if (address >= cur->vme_start) {
1119 /*
1120 * Save this lookup for future
1121 * hints, and return
1122 */
1123
1124 *entry = cur;
1125 SAVE_HINT_MAP_READ(map, cur);
1126
1127 return(TRUE);
1128 }
1129 break;
1130 }
1131 cur = cur->vme_next;
1132 }
1133 *entry = cur->vme_prev;
1134 SAVE_HINT_MAP_READ(map, *entry);
1135
1136 return(FALSE);
1137 }
1138
1139 /*
1140 * Routine: vm_map_find_space
1141 * Purpose:
1142 * Allocate a range in the specified virtual address map,
1143 * returning the entry allocated for that range.
1144 * Used by kmem_alloc, etc.
1145 *
1146 * The map must be NOT be locked. It will be returned locked
1147 * on KERN_SUCCESS, unlocked on failure.
1148 *
1149 * If an entry is allocated, the object/offset fields
1150 * are initialized to zero.
1151 */
1152 kern_return_t
1153 vm_map_find_space(
1154 register vm_map_t map,
1155 vm_map_offset_t *address, /* OUT */
1156 vm_map_size_t size,
1157 vm_map_offset_t mask,
1158 int flags,
1159 vm_map_entry_t *o_entry) /* OUT */
1160 {
1161 register vm_map_entry_t entry, new_entry;
1162 register vm_map_offset_t start;
1163 register vm_map_offset_t end;
1164
1165 if (size == 0) {
1166 *address = 0;
1167 return KERN_INVALID_ARGUMENT;
1168 }
1169
1170 new_entry = vm_map_entry_create(map);
1171
1172 /*
1173 * Look for the first possible address; if there's already
1174 * something at this address, we have to start after it.
1175 */
1176
1177 vm_map_lock(map);
1178
1179 assert(first_free_is_valid(map));
1180 if ((entry = map->first_free) == vm_map_to_entry(map))
1181 start = map->min_offset;
1182 else
1183 start = entry->vme_end;
1184
1185 /*
1186 * In any case, the "entry" always precedes
1187 * the proposed new region throughout the loop:
1188 */
1189
1190 while (TRUE) {
1191 register vm_map_entry_t next;
1192
1193 /*
1194 * Find the end of the proposed new region.
1195 * Be sure we didn't go beyond the end, or
1196 * wrap around the address.
1197 */
1198
1199 end = ((start + mask) & ~mask);
1200 if (end < start) {
1201 vm_map_entry_dispose(map, new_entry);
1202 vm_map_unlock(map);
1203 return(KERN_NO_SPACE);
1204 }
1205 start = end;
1206 end += size;
1207
1208 if ((end > map->max_offset) || (end < start)) {
1209 vm_map_entry_dispose(map, new_entry);
1210 vm_map_unlock(map);
1211 return(KERN_NO_SPACE);
1212 }
1213
1214 /*
1215 * If there are no more entries, we must win.
1216 */
1217
1218 next = entry->vme_next;
1219 if (next == vm_map_to_entry(map))
1220 break;
1221
1222 /*
1223 * If there is another entry, it must be
1224 * after the end of the potential new region.
1225 */
1226
1227 if (next->vme_start >= end)
1228 break;
1229
1230 /*
1231 * Didn't fit -- move to the next entry.
1232 */
1233
1234 entry = next;
1235 start = entry->vme_end;
1236 }
1237
1238 /*
1239 * At this point,
1240 * "start" and "end" should define the endpoints of the
1241 * available new range, and
1242 * "entry" should refer to the region before the new
1243 * range, and
1244 *
1245 * the map should be locked.
1246 */
1247
1248 *address = start;
1249
1250 new_entry->vme_start = start;
1251 new_entry->vme_end = end;
1252 assert(page_aligned(new_entry->vme_start));
1253 assert(page_aligned(new_entry->vme_end));
1254
1255 new_entry->is_shared = FALSE;
1256 new_entry->is_sub_map = FALSE;
1257 new_entry->use_pmap = FALSE;
1258 new_entry->object.vm_object = VM_OBJECT_NULL;
1259 new_entry->offset = (vm_object_offset_t) 0;
1260
1261 new_entry->needs_copy = FALSE;
1262
1263 new_entry->inheritance = VM_INHERIT_DEFAULT;
1264 new_entry->protection = VM_PROT_DEFAULT;
1265 new_entry->max_protection = VM_PROT_ALL;
1266 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1267 new_entry->wired_count = 0;
1268 new_entry->user_wired_count = 0;
1269
1270 new_entry->in_transition = FALSE;
1271 new_entry->needs_wakeup = FALSE;
1272
1273 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1274
1275 /*
1276 * Insert the new entry into the list
1277 */
1278
1279 vm_map_entry_link(map, entry, new_entry);
1280
1281 map->size += size;
1282
1283 /*
1284 * Update the lookup hint
1285 */
1286 SAVE_HINT_MAP_WRITE(map, new_entry);
1287
1288 *o_entry = new_entry;
1289 return(KERN_SUCCESS);
1290 }
1291
1292 int vm_map_pmap_enter_print = FALSE;
1293 int vm_map_pmap_enter_enable = FALSE;
1294
1295 /*
1296 * Routine: vm_map_pmap_enter [internal only]
1297 *
1298 * Description:
1299 * Force pages from the specified object to be entered into
1300 * the pmap at the specified address if they are present.
1301 * As soon as a page not found in the object the scan ends.
1302 *
1303 * Returns:
1304 * Nothing.
1305 *
1306 * In/out conditions:
1307 * The source map should not be locked on entry.
1308 */
1309 static void
1310 vm_map_pmap_enter(
1311 vm_map_t map,
1312 register vm_map_offset_t addr,
1313 register vm_map_offset_t end_addr,
1314 register vm_object_t object,
1315 vm_object_offset_t offset,
1316 vm_prot_t protection)
1317 {
1318 unsigned int cache_attr;
1319
1320 if(map->pmap == 0)
1321 return;
1322
1323 while (addr < end_addr) {
1324 register vm_page_t m;
1325
1326 vm_object_lock(object);
1327 vm_object_paging_begin(object);
1328
1329 m = vm_page_lookup(object, offset);
1330 /*
1331 * ENCRYPTED SWAP:
1332 * The user should never see encrypted data, so do not
1333 * enter an encrypted page in the page table.
1334 */
1335 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1336 (m->unusual && ( m->error || m->restart || m->absent ||
1337 protection & m->page_lock))) {
1338
1339 vm_object_paging_end(object);
1340 vm_object_unlock(object);
1341 return;
1342 }
1343
1344 assert(!m->fictitious); /* XXX is this possible ??? */
1345
1346 if (vm_map_pmap_enter_print) {
1347 printf("vm_map_pmap_enter:");
1348 printf("map: %x, addr: %llx, object: %x, offset: %llx\n",
1349 map, (unsigned long long)addr, object, (unsigned long long)offset);
1350 }
1351 m->busy = TRUE;
1352
1353 if (m->no_isync == TRUE) {
1354 pmap_sync_page_data_phys(m->phys_page);
1355 m->no_isync = FALSE;
1356 }
1357
1358 cache_attr = ((unsigned int)object->wimg_bits) & VM_WIMG_MASK;
1359 vm_object_unlock(object);
1360
1361 PMAP_ENTER(map->pmap, addr, m,
1362 protection, cache_attr, FALSE);
1363
1364 vm_object_lock(object);
1365
1366 PAGE_WAKEUP_DONE(m);
1367 vm_page_lock_queues();
1368 if (!m->active && !m->inactive)
1369 vm_page_activate(m);
1370 vm_page_unlock_queues();
1371 vm_object_paging_end(object);
1372 vm_object_unlock(object);
1373
1374 offset += PAGE_SIZE_64;
1375 addr += PAGE_SIZE;
1376 }
1377 }
1378
1379 boolean_t vm_map_pmap_is_empty(
1380 vm_map_t map,
1381 vm_map_offset_t start,
1382 vm_map_offset_t end);
1383 boolean_t vm_map_pmap_is_empty(
1384 vm_map_t map,
1385 vm_map_offset_t start,
1386 vm_map_offset_t end)
1387 {
1388 vm_map_offset_t offset;
1389 ppnum_t phys_page;
1390
1391 if (map->pmap == NULL) {
1392 return TRUE;
1393 }
1394 for (offset = start;
1395 offset < end;
1396 offset += PAGE_SIZE) {
1397 phys_page = pmap_find_phys(map->pmap, offset);
1398 if (phys_page) {
1399 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1400 "page %d at 0x%llx\n",
1401 map, start, end, phys_page, offset);
1402 return FALSE;
1403 }
1404 }
1405 return TRUE;
1406 }
1407
1408 /*
1409 * Routine: vm_map_enter
1410 *
1411 * Description:
1412 * Allocate a range in the specified virtual address map.
1413 * The resulting range will refer to memory defined by
1414 * the given memory object and offset into that object.
1415 *
1416 * Arguments are as defined in the vm_map call.
1417 */
1418 int _map_enter_debug = 0;
1419 static unsigned int vm_map_enter_restore_successes = 0;
1420 static unsigned int vm_map_enter_restore_failures = 0;
1421 kern_return_t
1422 vm_map_enter(
1423 vm_map_t map,
1424 vm_map_offset_t *address, /* IN/OUT */
1425 vm_map_size_t size,
1426 vm_map_offset_t mask,
1427 int flags,
1428 vm_object_t object,
1429 vm_object_offset_t offset,
1430 boolean_t needs_copy,
1431 vm_prot_t cur_protection,
1432 vm_prot_t max_protection,
1433 vm_inherit_t inheritance)
1434 {
1435 vm_map_entry_t entry, new_entry;
1436 vm_map_offset_t start, tmp_start;
1437 vm_map_offset_t end, tmp_end;
1438 kern_return_t result = KERN_SUCCESS;
1439 vm_map_t zap_old_map = VM_MAP_NULL;
1440 vm_map_t zap_new_map = VM_MAP_NULL;
1441 boolean_t map_locked = FALSE;
1442 boolean_t pmap_empty = TRUE;
1443 boolean_t new_mapping_established = FALSE;
1444 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1445 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1446 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1447 char alias;
1448
1449 if (size == 0) {
1450 *address = 0;
1451 return KERN_INVALID_ARGUMENT;
1452 }
1453
1454 VM_GET_FLAGS_ALIAS(flags, alias);
1455
1456 #define RETURN(value) { result = value; goto BailOut; }
1457
1458 assert(page_aligned(*address));
1459 assert(page_aligned(size));
1460
1461 /*
1462 * Only zero-fill objects are allowed to be purgable.
1463 * LP64todo - limit purgable objects to 32-bits for now
1464 */
1465 if (purgable &&
1466 (offset != 0 ||
1467 (object != VM_OBJECT_NULL &&
1468 (object->size != size ||
1469 object->purgable == VM_OBJECT_NONPURGABLE))
1470 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1471 return KERN_INVALID_ARGUMENT;
1472
1473 if (!anywhere && overwrite) {
1474 /*
1475 * Create a temporary VM map to hold the old mappings in the
1476 * affected area while we create the new one.
1477 * This avoids releasing the VM map lock in
1478 * vm_map_entry_delete() and allows atomicity
1479 * when we want to replace some mappings with a new one.
1480 * It also allows us to restore the old VM mappings if the
1481 * new mapping fails.
1482 */
1483 zap_old_map = vm_map_create(PMAP_NULL,
1484 *address,
1485 *address + size,
1486 TRUE);
1487 }
1488
1489 StartAgain: ;
1490
1491 start = *address;
1492
1493 if (anywhere) {
1494 vm_map_lock(map);
1495 map_locked = TRUE;
1496
1497 /*
1498 * Calculate the first possible address.
1499 */
1500
1501 if (start < map->min_offset)
1502 start = map->min_offset;
1503 if (start > map->max_offset)
1504 RETURN(KERN_NO_SPACE);
1505
1506 /*
1507 * Look for the first possible address;
1508 * if there's already something at this
1509 * address, we have to start after it.
1510 */
1511
1512 assert(first_free_is_valid(map));
1513 if (start == map->min_offset) {
1514 if ((entry = map->first_free) != vm_map_to_entry(map))
1515 start = entry->vme_end;
1516 } else {
1517 vm_map_entry_t tmp_entry;
1518 if (vm_map_lookup_entry(map, start, &tmp_entry))
1519 start = tmp_entry->vme_end;
1520 entry = tmp_entry;
1521 }
1522
1523 /*
1524 * In any case, the "entry" always precedes
1525 * the proposed new region throughout the
1526 * loop:
1527 */
1528
1529 while (TRUE) {
1530 register vm_map_entry_t next;
1531
1532 /*
1533 * Find the end of the proposed new region.
1534 * Be sure we didn't go beyond the end, or
1535 * wrap around the address.
1536 */
1537
1538 end = ((start + mask) & ~mask);
1539 if (end < start)
1540 RETURN(KERN_NO_SPACE);
1541 start = end;
1542 end += size;
1543
1544 if ((end > map->max_offset) || (end < start)) {
1545 if (map->wait_for_space) {
1546 if (size <= (map->max_offset -
1547 map->min_offset)) {
1548 assert_wait((event_t)map,
1549 THREAD_ABORTSAFE);
1550 vm_map_unlock(map);
1551 map_locked = FALSE;
1552 thread_block(THREAD_CONTINUE_NULL);
1553 goto StartAgain;
1554 }
1555 }
1556 RETURN(KERN_NO_SPACE);
1557 }
1558
1559 /*
1560 * If there are no more entries, we must win.
1561 */
1562
1563 next = entry->vme_next;
1564 if (next == vm_map_to_entry(map))
1565 break;
1566
1567 /*
1568 * If there is another entry, it must be
1569 * after the end of the potential new region.
1570 */
1571
1572 if (next->vme_start >= end)
1573 break;
1574
1575 /*
1576 * Didn't fit -- move to the next entry.
1577 */
1578
1579 entry = next;
1580 start = entry->vme_end;
1581 }
1582 *address = start;
1583 } else {
1584 vm_map_entry_t temp_entry;
1585
1586 /*
1587 * Verify that:
1588 * the address doesn't itself violate
1589 * the mask requirement.
1590 */
1591
1592 vm_map_lock(map);
1593 map_locked = TRUE;
1594 if ((start & mask) != 0)
1595 RETURN(KERN_NO_SPACE);
1596
1597 /*
1598 * ... the address is within bounds
1599 */
1600
1601 end = start + size;
1602
1603 if ((start < map->min_offset) ||
1604 (end > map->max_offset) ||
1605 (start >= end)) {
1606 RETURN(KERN_INVALID_ADDRESS);
1607 }
1608
1609 if (overwrite && zap_old_map != VM_MAP_NULL) {
1610 /*
1611 * Fixed mapping and "overwrite" flag: attempt to
1612 * remove all existing mappings in the specified
1613 * address range, saving them in our "zap_old_map".
1614 */
1615 (void) vm_map_delete(map, start, end,
1616 VM_MAP_REMOVE_SAVE_ENTRIES,
1617 zap_old_map);
1618 }
1619
1620 /*
1621 * ... the starting address isn't allocated
1622 */
1623
1624 if (vm_map_lookup_entry(map, start, &temp_entry))
1625 RETURN(KERN_NO_SPACE);
1626
1627 entry = temp_entry;
1628
1629 /*
1630 * ... the next region doesn't overlap the
1631 * end point.
1632 */
1633
1634 if ((entry->vme_next != vm_map_to_entry(map)) &&
1635 (entry->vme_next->vme_start < end))
1636 RETURN(KERN_NO_SPACE);
1637 }
1638
1639 /*
1640 * At this point,
1641 * "start" and "end" should define the endpoints of the
1642 * available new range, and
1643 * "entry" should refer to the region before the new
1644 * range, and
1645 *
1646 * the map should be locked.
1647 */
1648
1649 /*
1650 * See whether we can avoid creating a new entry (and object) by
1651 * extending one of our neighbors. [So far, we only attempt to
1652 * extend from below.] Note that we can never extend/join
1653 * purgable objects because they need to remain distinct
1654 * entities in order to implement their "volatile object"
1655 * semantics.
1656 */
1657
1658 if (purgable) {
1659 if (object == VM_OBJECT_NULL) {
1660 object = vm_object_allocate(size);
1661 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1662 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
1663 offset = (vm_object_offset_t)0;
1664 }
1665 } else if ((object == VM_OBJECT_NULL) &&
1666 (entry != vm_map_to_entry(map)) &&
1667 (entry->vme_end == start) &&
1668 (!entry->is_shared) &&
1669 (!entry->is_sub_map) &&
1670 (entry->alias == alias) &&
1671 (entry->inheritance == inheritance) &&
1672 (entry->protection == cur_protection) &&
1673 (entry->max_protection == max_protection) &&
1674 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1675 (entry->in_transition == 0) &&
1676 ((alias == VM_MEMORY_REALLOC) || ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1677 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1678 if (vm_object_coalesce(entry->object.vm_object,
1679 VM_OBJECT_NULL,
1680 entry->offset,
1681 (vm_object_offset_t) 0,
1682 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1683 (vm_map_size_t)(end - entry->vme_end))) {
1684
1685 /*
1686 * Coalesced the two objects - can extend
1687 * the previous map entry to include the
1688 * new range.
1689 */
1690 map->size += (end - entry->vme_end);
1691 entry->vme_end = end;
1692 UPDATE_FIRST_FREE(map, map->first_free);
1693 RETURN(KERN_SUCCESS);
1694 }
1695 }
1696
1697 /*
1698 * Create a new entry
1699 * LP64todo - for now, we can only allocate 4GB internal objects
1700 * because the default pager can't page bigger ones. Remove this
1701 * when it can.
1702 *
1703 * XXX FBDP
1704 * The reserved "page zero" in each process's address space can
1705 * be arbitrarily large. Splitting it into separate 4GB objects and
1706 * therefore different VM map entries serves no purpose and just
1707 * slows down operations on the VM map, so let's not split the
1708 * allocation into 4GB chunks if the max protection is NONE. That
1709 * memory should never be accessible, so it will never get to the
1710 * default pager.
1711 */
1712 tmp_start = start;
1713 if (object == VM_OBJECT_NULL &&
1714 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1715 max_protection != VM_PROT_NONE)
1716 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1717 else
1718 tmp_end = end;
1719 do {
1720 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1721 object, offset, needs_copy, FALSE, FALSE,
1722 cur_protection, max_protection,
1723 VM_BEHAVIOR_DEFAULT, inheritance, 0);
1724 new_entry->alias = alias;
1725 entry = new_entry;
1726 } while (tmp_end != end &&
1727 (tmp_start = tmp_end) &&
1728 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1729 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1730
1731 vm_map_unlock(map);
1732 map_locked = FALSE;
1733
1734 new_mapping_established = TRUE;
1735
1736 /* Wire down the new entry if the user
1737 * requested all new map entries be wired.
1738 */
1739 if (map->wiring_required) {
1740 pmap_empty = FALSE; /* pmap won't be empty */
1741 result = vm_map_wire(map, start, end,
1742 new_entry->protection, TRUE);
1743 RETURN(result);
1744 }
1745
1746 if ((object != VM_OBJECT_NULL) &&
1747 (vm_map_pmap_enter_enable) &&
1748 (!anywhere) &&
1749 (!needs_copy) &&
1750 (size < (128*1024))) {
1751 pmap_empty = FALSE; /* pmap won't be empty */
1752
1753 #ifdef STACK_ONLY_NX
1754 if (alias != VM_MEMORY_STACK && cur_protection)
1755 cur_protection |= VM_PROT_EXECUTE;
1756 #endif
1757 vm_map_pmap_enter(map, start, end,
1758 object, offset, cur_protection);
1759 }
1760
1761 BailOut: ;
1762 if (result == KERN_SUCCESS &&
1763 pmap_empty &&
1764 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
1765 assert(vm_map_pmap_is_empty(map, *address, *address+size));
1766 }
1767
1768 if (result != KERN_SUCCESS) {
1769 if (new_mapping_established) {
1770 /*
1771 * We have to get rid of the new mappings since we
1772 * won't make them available to the user.
1773 * Try and do that atomically, to minimize the risk
1774 * that someone else create new mappings that range.
1775 */
1776 zap_new_map = vm_map_create(PMAP_NULL,
1777 *address,
1778 *address + size,
1779 TRUE);
1780 if (!map_locked) {
1781 vm_map_lock(map);
1782 map_locked = TRUE;
1783 }
1784 (void) vm_map_delete(map, *address, *address+size,
1785 VM_MAP_REMOVE_SAVE_ENTRIES,
1786 zap_new_map);
1787 }
1788 if (zap_old_map != VM_MAP_NULL &&
1789 zap_old_map->hdr.nentries != 0) {
1790 vm_map_entry_t entry1, entry2;
1791
1792 /*
1793 * The new mapping failed. Attempt to restore
1794 * the old mappings, saved in the "zap_old_map".
1795 */
1796 if (!map_locked) {
1797 vm_map_lock(map);
1798 map_locked = TRUE;
1799 }
1800
1801 /* first check if the coast is still clear */
1802 start = vm_map_first_entry(zap_old_map)->vme_start;
1803 end = vm_map_last_entry(zap_old_map)->vme_end;
1804 if (vm_map_lookup_entry(map, start, &entry1) ||
1805 vm_map_lookup_entry(map, end, &entry2) ||
1806 entry1 != entry2) {
1807 /*
1808 * Part of that range has already been
1809 * re-mapped: we can't restore the old
1810 * mappings...
1811 */
1812 vm_map_enter_restore_failures++;
1813 } else {
1814 /*
1815 * Transfer the saved map entries from
1816 * "zap_old_map" to the original "map",
1817 * inserting them all after "entry1".
1818 */
1819 for (entry2 = vm_map_first_entry(zap_old_map);
1820 entry2 != vm_map_to_entry(zap_old_map);
1821 entry2 = vm_map_first_entry(zap_old_map)) {
1822 vm_map_entry_unlink(zap_old_map,
1823 entry2);
1824 vm_map_entry_link(map, entry1, entry2);
1825 entry1 = entry2;
1826 }
1827 if (map->wiring_required) {
1828 /*
1829 * XXX TODO: we should rewire the
1830 * old pages here...
1831 */
1832 }
1833 vm_map_enter_restore_successes++;
1834 }
1835 }
1836 }
1837
1838 if (map_locked) {
1839 vm_map_unlock(map);
1840 }
1841
1842 /*
1843 * Get rid of the "zap_maps" and all the map entries that
1844 * they may still contain.
1845 */
1846 if (zap_old_map != VM_MAP_NULL) {
1847 vm_map_destroy(zap_old_map);
1848 zap_old_map = VM_MAP_NULL;
1849 }
1850 if (zap_new_map != VM_MAP_NULL) {
1851 vm_map_destroy(zap_new_map);
1852 zap_new_map = VM_MAP_NULL;
1853 }
1854
1855 return result;
1856
1857 #undef RETURN
1858 }
1859
1860
1861 #if VM_CPM
1862
1863 #ifdef MACH_ASSERT
1864 extern pmap_paddr_t avail_start, avail_end;
1865 #endif
1866
1867 /*
1868 * Allocate memory in the specified map, with the caveat that
1869 * the memory is physically contiguous. This call may fail
1870 * if the system can't find sufficient contiguous memory.
1871 * This call may cause or lead to heart-stopping amounts of
1872 * paging activity.
1873 *
1874 * Memory obtained from this call should be freed in the
1875 * normal way, viz., via vm_deallocate.
1876 */
1877 kern_return_t
1878 vm_map_enter_cpm(
1879 vm_map_t map,
1880 vm_map_offset_t *addr,
1881 vm_map_size_t size,
1882 int flags)
1883 {
1884 vm_object_t cpm_obj;
1885 pmap_t pmap;
1886 vm_page_t m, pages;
1887 kern_return_t kr;
1888 vm_map_offset_t va, start, end, offset;
1889 #if MACH_ASSERT
1890 vm_map_offset_t prev_addr;
1891 #endif /* MACH_ASSERT */
1892
1893 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
1894
1895 if (!vm_allocate_cpm_enabled)
1896 return KERN_FAILURE;
1897
1898 if (size == 0) {
1899 *addr = 0;
1900 return KERN_SUCCESS;
1901 }
1902
1903 if (anywhere)
1904 *addr = vm_map_min(map);
1905 else
1906 *addr = vm_map_trunc_page(*addr);
1907 size = vm_map_round_page(size);
1908
1909 /*
1910 * LP64todo - cpm_allocate should probably allow
1911 * allocations of >4GB, but not with the current
1912 * algorithm, so just cast down the size for now.
1913 */
1914 if (size > VM_MAX_ADDRESS)
1915 return KERN_RESOURCE_SHORTAGE;
1916 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
1917 &pages, TRUE)) != KERN_SUCCESS)
1918 return kr;
1919
1920 cpm_obj = vm_object_allocate((vm_object_size_t)size);
1921 assert(cpm_obj != VM_OBJECT_NULL);
1922 assert(cpm_obj->internal);
1923 assert(cpm_obj->size == (vm_object_size_t)size);
1924 assert(cpm_obj->can_persist == FALSE);
1925 assert(cpm_obj->pager_created == FALSE);
1926 assert(cpm_obj->pageout == FALSE);
1927 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1928
1929 /*
1930 * Insert pages into object.
1931 */
1932
1933 vm_object_lock(cpm_obj);
1934 for (offset = 0; offset < size; offset += PAGE_SIZE) {
1935 m = pages;
1936 pages = NEXT_PAGE(m);
1937 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1938
1939 assert(!m->gobbled);
1940 assert(!m->wanted);
1941 assert(!m->pageout);
1942 assert(!m->tabled);
1943 /*
1944 * ENCRYPTED SWAP:
1945 * "m" is not supposed to be pageable, so it
1946 * should not be encrypted. It wouldn't be safe
1947 * to enter it in a new VM object while encrypted.
1948 */
1949 ASSERT_PAGE_DECRYPTED(m);
1950 assert(m->busy);
1951 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
1952
1953 m->busy = FALSE;
1954 vm_page_insert(m, cpm_obj, offset);
1955 }
1956 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
1957 vm_object_unlock(cpm_obj);
1958
1959 /*
1960 * Hang onto a reference on the object in case a
1961 * multi-threaded application for some reason decides
1962 * to deallocate the portion of the address space into
1963 * which we will insert this object.
1964 *
1965 * Unfortunately, we must insert the object now before
1966 * we can talk to the pmap module about which addresses
1967 * must be wired down. Hence, the race with a multi-
1968 * threaded app.
1969 */
1970 vm_object_reference(cpm_obj);
1971
1972 /*
1973 * Insert object into map.
1974 */
1975
1976 kr = vm_map_enter(
1977 map,
1978 addr,
1979 size,
1980 (vm_map_offset_t)0,
1981 flags,
1982 cpm_obj,
1983 (vm_object_offset_t)0,
1984 FALSE,
1985 VM_PROT_ALL,
1986 VM_PROT_ALL,
1987 VM_INHERIT_DEFAULT);
1988
1989 if (kr != KERN_SUCCESS) {
1990 /*
1991 * A CPM object doesn't have can_persist set,
1992 * so all we have to do is deallocate it to
1993 * free up these pages.
1994 */
1995 assert(cpm_obj->pager_created == FALSE);
1996 assert(cpm_obj->can_persist == FALSE);
1997 assert(cpm_obj->pageout == FALSE);
1998 assert(cpm_obj->shadow == VM_OBJECT_NULL);
1999 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2000 vm_object_deallocate(cpm_obj); /* kill creation ref */
2001 }
2002
2003 /*
2004 * Inform the physical mapping system that the
2005 * range of addresses may not fault, so that
2006 * page tables and such can be locked down as well.
2007 */
2008 start = *addr;
2009 end = start + size;
2010 pmap = vm_map_pmap(map);
2011 pmap_pageable(pmap, start, end, FALSE);
2012
2013 /*
2014 * Enter each page into the pmap, to avoid faults.
2015 * Note that this loop could be coded more efficiently,
2016 * if the need arose, rather than looking up each page
2017 * again.
2018 */
2019 for (offset = 0, va = start; offset < size;
2020 va += PAGE_SIZE, offset += PAGE_SIZE) {
2021 vm_object_lock(cpm_obj);
2022 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2023 vm_object_unlock(cpm_obj);
2024 assert(m != VM_PAGE_NULL);
2025 PMAP_ENTER(pmap, va, m, VM_PROT_ALL,
2026 ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK,
2027 TRUE);
2028 }
2029
2030 #if MACH_ASSERT
2031 /*
2032 * Verify ordering in address space.
2033 */
2034 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2035 vm_object_lock(cpm_obj);
2036 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2037 vm_object_unlock(cpm_obj);
2038 if (m == VM_PAGE_NULL)
2039 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2040 cpm_obj, offset);
2041 assert(m->tabled);
2042 assert(!m->busy);
2043 assert(!m->wanted);
2044 assert(!m->fictitious);
2045 assert(!m->private);
2046 assert(!m->absent);
2047 assert(!m->error);
2048 assert(!m->cleaning);
2049 assert(!m->precious);
2050 assert(!m->clustered);
2051 if (offset != 0) {
2052 if (m->phys_page != prev_addr + 1) {
2053 printf("start 0x%x end 0x%x va 0x%x\n",
2054 start, end, va);
2055 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2056 printf("m 0x%x prev_address 0x%x\n", m,
2057 prev_addr);
2058 panic("vm_allocate_cpm: pages not contig!");
2059 }
2060 }
2061 prev_addr = m->phys_page;
2062 }
2063 #endif /* MACH_ASSERT */
2064
2065 vm_object_deallocate(cpm_obj); /* kill extra ref */
2066
2067 return kr;
2068 }
2069
2070
2071 #else /* VM_CPM */
2072
2073 /*
2074 * Interface is defined in all cases, but unless the kernel
2075 * is built explicitly for this option, the interface does
2076 * nothing.
2077 */
2078
2079 kern_return_t
2080 vm_map_enter_cpm(
2081 __unused vm_map_t map,
2082 __unused vm_map_offset_t *addr,
2083 __unused vm_map_size_t size,
2084 __unused int flags)
2085 {
2086 return KERN_FAILURE;
2087 }
2088 #endif /* VM_CPM */
2089
2090 /*
2091 * vm_map_clip_start: [ internal use only ]
2092 *
2093 * Asserts that the given entry begins at or after
2094 * the specified address; if necessary,
2095 * it splits the entry into two.
2096 */
2097 #ifndef NO_NESTED_PMAP
2098 #define vm_map_clip_start(map, entry, startaddr) \
2099 MACRO_BEGIN \
2100 vm_map_t VMCS_map; \
2101 vm_map_entry_t VMCS_entry; \
2102 vm_map_offset_t VMCS_startaddr; \
2103 VMCS_map = (map); \
2104 VMCS_entry = (entry); \
2105 VMCS_startaddr = (startaddr); \
2106 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2107 if(entry->use_pmap) { \
2108 vm_map_offset_t pmap_base_addr; \
2109 \
2110 pmap_base_addr = 0xF0000000 & entry->vme_start; \
2111 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
2112 entry->use_pmap = FALSE; \
2113 } else if(entry->object.vm_object \
2114 && !entry->is_sub_map \
2115 && entry->object.vm_object->phys_contiguous) { \
2116 pmap_remove(map->pmap, \
2117 (addr64_t)(entry->vme_start), \
2118 (addr64_t)(entry->vme_end)); \
2119 } \
2120 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2121 } \
2122 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2123 MACRO_END
2124 #else /* NO_NESTED_PMAP */
2125 #define vm_map_clip_start(map, entry, startaddr) \
2126 MACRO_BEGIN \
2127 vm_map_t VMCS_map; \
2128 vm_map_entry_t VMCS_entry; \
2129 vm_map_offset_t VMCS_startaddr; \
2130 VMCS_map = (map); \
2131 VMCS_entry = (entry); \
2132 VMCS_startaddr = (startaddr); \
2133 if (VMCS_startaddr > VMCS_entry->vme_start) { \
2134 _vm_map_clip_start(&VMCS_map->hdr,VMCS_entry,VMCS_startaddr);\
2135 } \
2136 UPDATE_FIRST_FREE(VMCS_map, VMCS_map->first_free); \
2137 MACRO_END
2138 #endif /* NO_NESTED_PMAP */
2139
2140 #define vm_map_copy_clip_start(copy, entry, startaddr) \
2141 MACRO_BEGIN \
2142 if ((startaddr) > (entry)->vme_start) \
2143 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2144 MACRO_END
2145
2146 /*
2147 * This routine is called only when it is known that
2148 * the entry must be split.
2149 */
2150 static void
2151 _vm_map_clip_start(
2152 register struct vm_map_header *map_header,
2153 register vm_map_entry_t entry,
2154 register vm_map_offset_t start)
2155 {
2156 register vm_map_entry_t new_entry;
2157
2158 /*
2159 * Split off the front portion --
2160 * note that we must insert the new
2161 * entry BEFORE this one, so that
2162 * this entry has the specified starting
2163 * address.
2164 */
2165
2166 new_entry = _vm_map_entry_create(map_header);
2167 vm_map_entry_copy_full(new_entry, entry);
2168
2169 new_entry->vme_end = start;
2170 entry->offset += (start - entry->vme_start);
2171 entry->vme_start = start;
2172
2173 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2174
2175 if (entry->is_sub_map)
2176 vm_map_reference(new_entry->object.sub_map);
2177 else
2178 vm_object_reference(new_entry->object.vm_object);
2179 }
2180
2181
2182 /*
2183 * vm_map_clip_end: [ internal use only ]
2184 *
2185 * Asserts that the given entry ends at or before
2186 * the specified address; if necessary,
2187 * it splits the entry into two.
2188 */
2189 #ifndef NO_NESTED_PMAP
2190 #define vm_map_clip_end(map, entry, endaddr) \
2191 MACRO_BEGIN \
2192 vm_map_t VMCE_map; \
2193 vm_map_entry_t VMCE_entry; \
2194 vm_map_offset_t VMCE_endaddr; \
2195 VMCE_map = (map); \
2196 VMCE_entry = (entry); \
2197 VMCE_endaddr = (endaddr); \
2198 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2199 if(entry->use_pmap) { \
2200 vm_map_offset_t pmap_base_addr; \
2201 \
2202 pmap_base_addr = 0xF0000000 & entry->vme_start; \
2203 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \
2204 entry->use_pmap = FALSE; \
2205 } else if(entry->object.vm_object \
2206 && !entry->is_sub_map \
2207 && entry->object.vm_object->phys_contiguous) { \
2208 pmap_remove(map->pmap, \
2209 (addr64_t)(entry->vme_start), \
2210 (addr64_t)(entry->vme_end)); \
2211 } \
2212 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2213 } \
2214 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2215 MACRO_END
2216 #else /* NO_NESTED_PMAP */
2217 #define vm_map_clip_end(map, entry, endaddr) \
2218 MACRO_BEGIN \
2219 vm_map_t VMCE_map; \
2220 vm_map_entry_t VMCE_entry; \
2221 vm_map_offset_t VMCE_endaddr; \
2222 VMCE_map = (map); \
2223 VMCE_entry = (entry); \
2224 VMCE_endaddr = (endaddr); \
2225 if (VMCE_endaddr < VMCE_entry->vme_end) { \
2226 _vm_map_clip_end(&VMCE_map->hdr,VMCE_entry,VMCE_endaddr); \
2227 } \
2228 UPDATE_FIRST_FREE(VMCE_map, VMCE_map->first_free); \
2229 MACRO_END
2230 #endif /* NO_NESTED_PMAP */
2231
2232
2233 #define vm_map_copy_clip_end(copy, entry, endaddr) \
2234 MACRO_BEGIN \
2235 if ((endaddr) < (entry)->vme_end) \
2236 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2237 MACRO_END
2238
2239 /*
2240 * This routine is called only when it is known that
2241 * the entry must be split.
2242 */
2243 static void
2244 _vm_map_clip_end(
2245 register struct vm_map_header *map_header,
2246 register vm_map_entry_t entry,
2247 register vm_map_offset_t end)
2248 {
2249 register vm_map_entry_t new_entry;
2250
2251 /*
2252 * Create a new entry and insert it
2253 * AFTER the specified entry
2254 */
2255
2256 new_entry = _vm_map_entry_create(map_header);
2257 vm_map_entry_copy_full(new_entry, entry);
2258
2259 new_entry->vme_start = entry->vme_end = end;
2260 new_entry->offset += (end - entry->vme_start);
2261
2262 _vm_map_entry_link(map_header, entry, new_entry);
2263
2264 if (entry->is_sub_map)
2265 vm_map_reference(new_entry->object.sub_map);
2266 else
2267 vm_object_reference(new_entry->object.vm_object);
2268 }
2269
2270
2271 /*
2272 * VM_MAP_RANGE_CHECK: [ internal use only ]
2273 *
2274 * Asserts that the starting and ending region
2275 * addresses fall within the valid range of the map.
2276 */
2277 #define VM_MAP_RANGE_CHECK(map, start, end) \
2278 { \
2279 if (start < vm_map_min(map)) \
2280 start = vm_map_min(map); \
2281 if (end > vm_map_max(map)) \
2282 end = vm_map_max(map); \
2283 if (start > end) \
2284 start = end; \
2285 }
2286
2287 /*
2288 * vm_map_range_check: [ internal use only ]
2289 *
2290 * Check that the region defined by the specified start and
2291 * end addresses are wholly contained within a single map
2292 * entry or set of adjacent map entries of the spacified map,
2293 * i.e. the specified region contains no unmapped space.
2294 * If any or all of the region is unmapped, FALSE is returned.
2295 * Otherwise, TRUE is returned and if the output argument 'entry'
2296 * is not NULL it points to the map entry containing the start
2297 * of the region.
2298 *
2299 * The map is locked for reading on entry and is left locked.
2300 */
2301 static boolean_t
2302 vm_map_range_check(
2303 register vm_map_t map,
2304 register vm_map_offset_t start,
2305 register vm_map_offset_t end,
2306 vm_map_entry_t *entry)
2307 {
2308 vm_map_entry_t cur;
2309 register vm_map_offset_t prev;
2310
2311 /*
2312 * Basic sanity checks first
2313 */
2314 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
2315 return (FALSE);
2316
2317 /*
2318 * Check first if the region starts within a valid
2319 * mapping for the map.
2320 */
2321 if (!vm_map_lookup_entry(map, start, &cur))
2322 return (FALSE);
2323
2324 /*
2325 * Optimize for the case that the region is contained
2326 * in a single map entry.
2327 */
2328 if (entry != (vm_map_entry_t *) NULL)
2329 *entry = cur;
2330 if (end <= cur->vme_end)
2331 return (TRUE);
2332
2333 /*
2334 * If the region is not wholly contained within a
2335 * single entry, walk the entries looking for holes.
2336 */
2337 prev = cur->vme_end;
2338 cur = cur->vme_next;
2339 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
2340 if (end <= cur->vme_end)
2341 return (TRUE);
2342 prev = cur->vme_end;
2343 cur = cur->vme_next;
2344 }
2345 return (FALSE);
2346 }
2347
2348 /*
2349 * vm_map_submap: [ kernel use only ]
2350 *
2351 * Mark the given range as handled by a subordinate map.
2352 *
2353 * This range must have been created with vm_map_find using
2354 * the vm_submap_object, and no other operations may have been
2355 * performed on this range prior to calling vm_map_submap.
2356 *
2357 * Only a limited number of operations can be performed
2358 * within this rage after calling vm_map_submap:
2359 * vm_fault
2360 * [Don't try vm_map_copyin!]
2361 *
2362 * To remove a submapping, one must first remove the
2363 * range from the superior map, and then destroy the
2364 * submap (if desired). [Better yet, don't try it.]
2365 */
2366 kern_return_t
2367 vm_map_submap(
2368 vm_map_t map,
2369 vm_map_offset_t start,
2370 vm_map_offset_t end,
2371 vm_map_t submap,
2372 vm_map_offset_t offset,
2373 #ifdef NO_NESTED_PMAP
2374 __unused
2375 #endif /* NO_NESTED_PMAP */
2376 boolean_t use_pmap)
2377 {
2378 vm_map_entry_t entry;
2379 register kern_return_t result = KERN_INVALID_ARGUMENT;
2380 register vm_object_t object;
2381
2382 vm_map_lock(map);
2383
2384 submap->mapped = TRUE;
2385
2386 VM_MAP_RANGE_CHECK(map, start, end);
2387
2388 if (vm_map_lookup_entry(map, start, &entry)) {
2389 vm_map_clip_start(map, entry, start);
2390 }
2391 else
2392 entry = entry->vme_next;
2393
2394 if(entry == vm_map_to_entry(map)) {
2395 vm_map_unlock(map);
2396 return KERN_INVALID_ARGUMENT;
2397 }
2398
2399 vm_map_clip_end(map, entry, end);
2400
2401 if ((entry->vme_start == start) && (entry->vme_end == end) &&
2402 (!entry->is_sub_map) &&
2403 ((object = entry->object.vm_object) == vm_submap_object) &&
2404 (object->resident_page_count == 0) &&
2405 (object->copy == VM_OBJECT_NULL) &&
2406 (object->shadow == VM_OBJECT_NULL) &&
2407 (!object->pager_created)) {
2408 entry->offset = (vm_object_offset_t)offset;
2409 entry->object.vm_object = VM_OBJECT_NULL;
2410 vm_object_deallocate(object);
2411 entry->is_sub_map = TRUE;
2412 entry->object.sub_map = submap;
2413 vm_map_reference(submap);
2414 #ifndef NO_NESTED_PMAP
2415 if ((use_pmap) && (offset == 0)) {
2416 /* nest if platform code will allow */
2417 if(submap->pmap == NULL) {
2418 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
2419 if(submap->pmap == PMAP_NULL) {
2420 vm_map_unlock(map);
2421 return(KERN_NO_SPACE);
2422 }
2423 }
2424 result = pmap_nest(map->pmap, (entry->object.sub_map)->pmap,
2425 (addr64_t)start,
2426 (addr64_t)start,
2427 (uint64_t)(end - start));
2428 if(result)
2429 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
2430 entry->use_pmap = TRUE;
2431 }
2432 #else /* NO_NESTED_PMAP */
2433 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
2434 #endif /* NO_NESTED_PMAP */
2435 result = KERN_SUCCESS;
2436 }
2437 vm_map_unlock(map);
2438
2439 return(result);
2440 }
2441
2442 /*
2443 * vm_map_protect:
2444 *
2445 * Sets the protection of the specified address
2446 * region in the target map. If "set_max" is
2447 * specified, the maximum protection is to be set;
2448 * otherwise, only the current protection is affected.
2449 */
2450 kern_return_t
2451 vm_map_protect(
2452 register vm_map_t map,
2453 register vm_map_offset_t start,
2454 register vm_map_offset_t end,
2455 register vm_prot_t new_prot,
2456 register boolean_t set_max)
2457 {
2458 register vm_map_entry_t current;
2459 register vm_map_offset_t prev;
2460 vm_map_entry_t entry;
2461 vm_prot_t new_max;
2462 boolean_t clip;
2463
2464 XPR(XPR_VM_MAP,
2465 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
2466 (integer_t)map, start, end, new_prot, set_max);
2467
2468 vm_map_lock(map);
2469
2470 /* LP64todo - remove this check when vm_map_commpage64()
2471 * no longer has to stuff in a map_entry for the commpage
2472 * above the map's max_offset.
2473 */
2474 if (start >= map->max_offset) {
2475 vm_map_unlock(map);
2476 return(KERN_INVALID_ADDRESS);
2477 }
2478
2479 /*
2480 * Lookup the entry. If it doesn't start in a valid
2481 * entry, return an error. Remember if we need to
2482 * clip the entry. We don't do it here because we don't
2483 * want to make any changes until we've scanned the
2484 * entire range below for address and protection
2485 * violations.
2486 */
2487 if (!(clip = vm_map_lookup_entry(map, start, &entry))) {
2488 vm_map_unlock(map);
2489 return(KERN_INVALID_ADDRESS);
2490 }
2491
2492 /*
2493 * Make a first pass to check for protection and address
2494 * violations.
2495 */
2496
2497 current = entry;
2498 prev = current->vme_start;
2499 while ((current != vm_map_to_entry(map)) &&
2500 (current->vme_start < end)) {
2501
2502 /*
2503 * If there is a hole, return an error.
2504 */
2505 if (current->vme_start != prev) {
2506 vm_map_unlock(map);
2507 return(KERN_INVALID_ADDRESS);
2508 }
2509
2510 new_max = current->max_protection;
2511 if(new_prot & VM_PROT_COPY) {
2512 new_max |= VM_PROT_WRITE;
2513 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
2514 vm_map_unlock(map);
2515 return(KERN_PROTECTION_FAILURE);
2516 }
2517 } else {
2518 if ((new_prot & new_max) != new_prot) {
2519 vm_map_unlock(map);
2520 return(KERN_PROTECTION_FAILURE);
2521 }
2522 }
2523
2524 prev = current->vme_end;
2525 current = current->vme_next;
2526 }
2527 if (end > prev) {
2528 vm_map_unlock(map);
2529 return(KERN_INVALID_ADDRESS);
2530 }
2531
2532 /*
2533 * Go back and fix up protections.
2534 * Clip to start here if the range starts within
2535 * the entry.
2536 */
2537
2538 current = entry;
2539 if (clip) {
2540 vm_map_clip_start(map, entry, start);
2541 }
2542 while ((current != vm_map_to_entry(map)) &&
2543 (current->vme_start < end)) {
2544
2545 vm_prot_t old_prot;
2546
2547 vm_map_clip_end(map, current, end);
2548
2549 old_prot = current->protection;
2550
2551 if(new_prot & VM_PROT_COPY) {
2552 /* caller is asking specifically to copy the */
2553 /* mapped data, this implies that max protection */
2554 /* will include write. Caller must be prepared */
2555 /* for loss of shared memory communication in the */
2556 /* target area after taking this step */
2557 current->needs_copy = TRUE;
2558 current->max_protection |= VM_PROT_WRITE;
2559 }
2560
2561 if (set_max)
2562 current->protection =
2563 (current->max_protection =
2564 new_prot & ~VM_PROT_COPY) &
2565 old_prot;
2566 else
2567 current->protection = new_prot & ~VM_PROT_COPY;
2568
2569 /*
2570 * Update physical map if necessary.
2571 * If the request is to turn off write protection,
2572 * we won't do it for real (in pmap). This is because
2573 * it would cause copy-on-write to fail. We've already
2574 * set, the new protection in the map, so if a
2575 * write-protect fault occurred, it will be fixed up
2576 * properly, COW or not.
2577 */
2578 /* the 256M hack for existing hardware limitations */
2579 if (current->protection != old_prot) {
2580 if(current->is_sub_map && current->use_pmap) {
2581 vm_map_offset_t pmap_base_addr;
2582 vm_map_offset_t pmap_end_addr;
2583 #ifdef NO_NESTED_PMAP
2584 __unused
2585 #endif /* NO_NESTED_PMAP */
2586 vm_map_entry_t local_entry;
2587
2588
2589 pmap_base_addr = 0xF0000000 & current->vme_start;
2590 pmap_end_addr = (pmap_base_addr + 0x10000000) - 1;
2591 #ifndef NO_NESTED_PMAP
2592 if(!vm_map_lookup_entry(map,
2593 pmap_base_addr, &local_entry))
2594 panic("vm_map_protect: nested pmap area is missing");
2595 while ((local_entry != vm_map_to_entry(map)) &&
2596 (local_entry->vme_start < pmap_end_addr)) {
2597 local_entry->use_pmap = FALSE;
2598 local_entry = local_entry->vme_next;
2599 }
2600 pmap_unnest(map->pmap, (addr64_t)pmap_base_addr);
2601 #endif /* NO_NESTED_PMAP */
2602 }
2603 if (!(current->protection & VM_PROT_WRITE)) {
2604 /* Look one level in we support nested pmaps */
2605 /* from mapped submaps which are direct entries */
2606 /* in our map */
2607
2608 vm_prot_t prot;
2609
2610 prot = current->protection;
2611 #ifdef STACK_ONLY_NX
2612 if (current->alias != VM_MEMORY_STACK && prot)
2613 prot |= VM_PROT_EXECUTE;
2614 #endif
2615 if (current->is_sub_map && current->use_pmap) {
2616 pmap_protect(current->object.sub_map->pmap,
2617 current->vme_start,
2618 current->vme_end,
2619 prot);
2620 } else {
2621 pmap_protect(map->pmap, current->vme_start,
2622 current->vme_end,
2623 prot);
2624 }
2625 }
2626 }
2627 current = current->vme_next;
2628 }
2629
2630 current = entry;
2631 while ((current != vm_map_to_entry(map)) &&
2632 (current->vme_start <= end)) {
2633 vm_map_simplify_entry(map, current);
2634 current = current->vme_next;
2635 }
2636
2637 vm_map_unlock(map);
2638 return(KERN_SUCCESS);
2639 }
2640
2641 /*
2642 * vm_map_inherit:
2643 *
2644 * Sets the inheritance of the specified address
2645 * range in the target map. Inheritance
2646 * affects how the map will be shared with
2647 * child maps at the time of vm_map_fork.
2648 */
2649 kern_return_t
2650 vm_map_inherit(
2651 register vm_map_t map,
2652 register vm_map_offset_t start,
2653 register vm_map_offset_t end,
2654 register vm_inherit_t new_inheritance)
2655 {
2656 register vm_map_entry_t entry;
2657 vm_map_entry_t temp_entry;
2658
2659 vm_map_lock(map);
2660
2661 VM_MAP_RANGE_CHECK(map, start, end);
2662
2663 if (vm_map_lookup_entry(map, start, &temp_entry)) {
2664 entry = temp_entry;
2665 vm_map_clip_start(map, entry, start);
2666 }
2667 else {
2668 temp_entry = temp_entry->vme_next;
2669 entry = temp_entry;
2670 }
2671
2672 /* first check entire range for submaps which can't support the */
2673 /* given inheritance. */
2674 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2675 if(entry->is_sub_map) {
2676 if(new_inheritance == VM_INHERIT_COPY) {
2677 vm_map_unlock(map);
2678 return(KERN_INVALID_ARGUMENT);
2679 }
2680 }
2681
2682 entry = entry->vme_next;
2683 }
2684
2685 entry = temp_entry;
2686
2687 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2688 vm_map_clip_end(map, entry, end);
2689
2690 entry->inheritance = new_inheritance;
2691
2692 entry = entry->vme_next;
2693 }
2694
2695 vm_map_unlock(map);
2696 return(KERN_SUCCESS);
2697 }
2698
2699 /*
2700 * vm_map_wire:
2701 *
2702 * Sets the pageability of the specified address range in the
2703 * target map as wired. Regions specified as not pageable require
2704 * locked-down physical memory and physical page maps. The
2705 * access_type variable indicates types of accesses that must not
2706 * generate page faults. This is checked against protection of
2707 * memory being locked-down.
2708 *
2709 * The map must not be locked, but a reference must remain to the
2710 * map throughout the call.
2711 */
2712 static kern_return_t
2713 vm_map_wire_nested(
2714 register vm_map_t map,
2715 register vm_map_offset_t start,
2716 register vm_map_offset_t end,
2717 register vm_prot_t access_type,
2718 boolean_t user_wire,
2719 pmap_t map_pmap,
2720 vm_map_offset_t pmap_addr)
2721 {
2722 register vm_map_entry_t entry;
2723 struct vm_map_entry *first_entry, tmp_entry;
2724 vm_map_t real_map;
2725 register vm_map_offset_t s,e;
2726 kern_return_t rc;
2727 boolean_t need_wakeup;
2728 boolean_t main_map = FALSE;
2729 wait_interrupt_t interruptible_state;
2730 thread_t cur_thread;
2731 unsigned int last_timestamp;
2732 vm_map_size_t size;
2733
2734 vm_map_lock(map);
2735 if(map_pmap == NULL)
2736 main_map = TRUE;
2737 last_timestamp = map->timestamp;
2738
2739 VM_MAP_RANGE_CHECK(map, start, end);
2740 assert(page_aligned(start));
2741 assert(page_aligned(end));
2742 if (start == end) {
2743 /* We wired what the caller asked for, zero pages */
2744 vm_map_unlock(map);
2745 return KERN_SUCCESS;
2746 }
2747
2748 if (vm_map_lookup_entry(map, start, &first_entry)) {
2749 entry = first_entry;
2750 /* vm_map_clip_start will be done later. */
2751 } else {
2752 /* Start address is not in map */
2753 vm_map_unlock(map);
2754 return(KERN_INVALID_ADDRESS);
2755 }
2756
2757 s=start;
2758 need_wakeup = FALSE;
2759 cur_thread = current_thread();
2760 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2761 /*
2762 * If another thread is wiring/unwiring this entry then
2763 * block after informing other thread to wake us up.
2764 */
2765 if (entry->in_transition) {
2766 wait_result_t wait_result;
2767
2768 /*
2769 * We have not clipped the entry. Make sure that
2770 * the start address is in range so that the lookup
2771 * below will succeed.
2772 */
2773 s = entry->vme_start < start? start: entry->vme_start;
2774
2775 entry->needs_wakeup = TRUE;
2776
2777 /*
2778 * wake up anybody waiting on entries that we have
2779 * already wired.
2780 */
2781 if (need_wakeup) {
2782 vm_map_entry_wakeup(map);
2783 need_wakeup = FALSE;
2784 }
2785 /*
2786 * User wiring is interruptible
2787 */
2788 wait_result = vm_map_entry_wait(map,
2789 (user_wire) ? THREAD_ABORTSAFE :
2790 THREAD_UNINT);
2791 if (user_wire && wait_result == THREAD_INTERRUPTED) {
2792 /*
2793 * undo the wirings we have done so far
2794 * We do not clear the needs_wakeup flag,
2795 * because we cannot tell if we were the
2796 * only one waiting.
2797 */
2798 vm_map_unlock(map);
2799 vm_map_unwire(map, start, s, user_wire);
2800 return(KERN_FAILURE);
2801 }
2802
2803 /*
2804 * Cannot avoid a lookup here. reset timestamp.
2805 */
2806 last_timestamp = map->timestamp;
2807
2808 /*
2809 * The entry could have been clipped, look it up again.
2810 * Worse that can happen is, it may not exist anymore.
2811 */
2812 if (!vm_map_lookup_entry(map, s, &first_entry)) {
2813 if (!user_wire)
2814 panic("vm_map_wire: re-lookup failed");
2815
2816 /*
2817 * User: undo everything upto the previous
2818 * entry. let vm_map_unwire worry about
2819 * checking the validity of the range.
2820 */
2821 vm_map_unlock(map);
2822 vm_map_unwire(map, start, s, user_wire);
2823 return(KERN_FAILURE);
2824 }
2825 entry = first_entry;
2826 continue;
2827 }
2828
2829 if(entry->is_sub_map) {
2830 vm_map_offset_t sub_start;
2831 vm_map_offset_t sub_end;
2832 vm_map_offset_t local_start;
2833 vm_map_offset_t local_end;
2834 pmap_t pmap;
2835
2836 vm_map_clip_start(map, entry, start);
2837 vm_map_clip_end(map, entry, end);
2838
2839 sub_start = entry->offset;
2840 sub_end = entry->vme_end - entry->vme_start;
2841 sub_end += entry->offset;
2842
2843 local_end = entry->vme_end;
2844 if(map_pmap == NULL) {
2845 if(entry->use_pmap) {
2846 pmap = entry->object.sub_map->pmap;
2847 /* ppc implementation requires that */
2848 /* submaps pmap address ranges line */
2849 /* up with parent map */
2850 #ifdef notdef
2851 pmap_addr = sub_start;
2852 #endif
2853 pmap_addr = start;
2854 } else {
2855 pmap = map->pmap;
2856 pmap_addr = start;
2857 }
2858 if (entry->wired_count) {
2859 if (entry->wired_count
2860 >= MAX_WIRE_COUNT)
2861 panic("vm_map_wire: too many wirings");
2862
2863 if (user_wire &&
2864 entry->user_wired_count
2865 >= MAX_WIRE_COUNT) {
2866 vm_map_unlock(map);
2867 vm_map_unwire(map, start,
2868 entry->vme_start, user_wire);
2869 return(KERN_FAILURE);
2870 }
2871 if(user_wire)
2872 entry->user_wired_count++;
2873 if((!user_wire) ||
2874 (entry->user_wired_count == 0))
2875 entry->wired_count++;
2876 entry = entry->vme_next;
2877 continue;
2878
2879 } else {
2880 vm_object_t object;
2881 vm_map_offset_t offset_hi;
2882 vm_map_offset_t offset_lo;
2883 vm_object_offset_t offset;
2884 vm_prot_t prot;
2885 boolean_t wired;
2886 vm_behavior_t behavior;
2887 vm_map_entry_t local_entry;
2888 vm_map_version_t version;
2889 vm_map_t lookup_map;
2890
2891 /* call vm_map_lookup_locked to */
2892 /* cause any needs copy to be */
2893 /* evaluated */
2894 local_start = entry->vme_start;
2895 lookup_map = map;
2896 vm_map_lock_write_to_read(map);
2897 if(vm_map_lookup_locked(
2898 &lookup_map, local_start,
2899 access_type,
2900 &version, &object,
2901 &offset, &prot, &wired,
2902 &behavior, &offset_lo,
2903 &offset_hi, &real_map)) {
2904
2905 vm_map_unlock_read(lookup_map);
2906 vm_map_unwire(map, start,
2907 entry->vme_start, user_wire);
2908 return(KERN_FAILURE);
2909 }
2910 if(real_map != lookup_map)
2911 vm_map_unlock(real_map);
2912 vm_map_unlock_read(lookup_map);
2913 vm_map_lock(map);
2914 vm_object_unlock(object);
2915
2916 if (!vm_map_lookup_entry(map,
2917 local_start, &local_entry)) {
2918 vm_map_unlock(map);
2919 vm_map_unwire(map, start,
2920 entry->vme_start, user_wire);
2921 return(KERN_FAILURE);
2922 }
2923 /* did we have a change of type? */
2924 if (!local_entry->is_sub_map) {
2925 last_timestamp = map->timestamp;
2926 continue;
2927 }
2928 entry = local_entry;
2929 if (user_wire)
2930 entry->user_wired_count++;
2931 if((!user_wire) ||
2932 (entry->user_wired_count == 1))
2933 entry->wired_count++;
2934
2935 entry->in_transition = TRUE;
2936
2937 vm_map_unlock(map);
2938 rc = vm_map_wire_nested(
2939 entry->object.sub_map,
2940 sub_start, sub_end,
2941 access_type,
2942 user_wire, pmap, pmap_addr);
2943 vm_map_lock(map);
2944 }
2945 } else {
2946 local_start = entry->vme_start;
2947 if (user_wire)
2948 entry->user_wired_count++;
2949 if((!user_wire) ||
2950 (entry->user_wired_count == 1))
2951 entry->wired_count++;
2952 vm_map_unlock(map);
2953 rc = vm_map_wire_nested(entry->object.sub_map,
2954 sub_start, sub_end,
2955 access_type,
2956 user_wire, map_pmap, pmap_addr);
2957 vm_map_lock(map);
2958 }
2959 s = entry->vme_start;
2960 e = entry->vme_end;
2961
2962 /*
2963 * Find the entry again. It could have been clipped
2964 * after we unlocked the map.
2965 */
2966 if (!vm_map_lookup_entry(map, local_start,
2967 &first_entry))
2968 panic("vm_map_wire: re-lookup failed");
2969 entry = first_entry;
2970
2971 last_timestamp = map->timestamp;
2972 while ((entry != vm_map_to_entry(map)) &&
2973 (entry->vme_start < e)) {
2974 assert(entry->in_transition);
2975 entry->in_transition = FALSE;
2976 if (entry->needs_wakeup) {
2977 entry->needs_wakeup = FALSE;
2978 need_wakeup = TRUE;
2979 }
2980 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2981 if (user_wire)
2982 entry->user_wired_count--;
2983 if ((!user_wire) ||
2984 (entry->user_wired_count == 0))
2985 entry->wired_count--;
2986 }
2987 entry = entry->vme_next;
2988 }
2989 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2990 vm_map_unlock(map);
2991 if (need_wakeup)
2992 vm_map_entry_wakeup(map);
2993 /*
2994 * undo everything upto the previous entry.
2995 */
2996 (void)vm_map_unwire(map, start, s, user_wire);
2997 return rc;
2998 }
2999 continue;
3000 }
3001
3002 /*
3003 * If this entry is already wired then increment
3004 * the appropriate wire reference count.
3005 */
3006 if (entry->wired_count) {
3007 /* sanity check: wired_count is a short */
3008 if (entry->wired_count >= MAX_WIRE_COUNT)
3009 panic("vm_map_wire: too many wirings");
3010
3011 if (user_wire &&
3012 entry->user_wired_count >= MAX_WIRE_COUNT) {
3013 vm_map_unlock(map);
3014 vm_map_unwire(map, start,
3015 entry->vme_start, user_wire);
3016 return(KERN_FAILURE);
3017 }
3018 /*
3019 * entry is already wired down, get our reference
3020 * after clipping to our range.
3021 */
3022 vm_map_clip_start(map, entry, start);
3023 vm_map_clip_end(map, entry, end);
3024 if (user_wire)
3025 entry->user_wired_count++;
3026 if ((!user_wire) || (entry->user_wired_count == 1))
3027 entry->wired_count++;
3028
3029 entry = entry->vme_next;
3030 continue;
3031 }
3032
3033 /*
3034 * Unwired entry or wire request transmitted via submap
3035 */
3036
3037
3038 /*
3039 * Perform actions of vm_map_lookup that need the write
3040 * lock on the map: create a shadow object for a
3041 * copy-on-write region, or an object for a zero-fill
3042 * region.
3043 */
3044 size = entry->vme_end - entry->vme_start;
3045 /*
3046 * If wiring a copy-on-write page, we need to copy it now
3047 * even if we're only (currently) requesting read access.
3048 * This is aggressive, but once it's wired we can't move it.
3049 */
3050 if (entry->needs_copy) {
3051 vm_object_shadow(&entry->object.vm_object,
3052 &entry->offset, size);
3053 entry->needs_copy = FALSE;
3054 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3055 entry->object.vm_object = vm_object_allocate(size);
3056 entry->offset = (vm_object_offset_t)0;
3057 }
3058
3059 vm_map_clip_start(map, entry, start);
3060 vm_map_clip_end(map, entry, end);
3061
3062 s = entry->vme_start;
3063 e = entry->vme_end;
3064
3065 /*
3066 * Check for holes and protection mismatch.
3067 * Holes: Next entry should be contiguous unless this
3068 * is the end of the region.
3069 * Protection: Access requested must be allowed, unless
3070 * wiring is by protection class
3071 */
3072 if ((((entry->vme_end < end) &&
3073 ((entry->vme_next == vm_map_to_entry(map)) ||
3074 (entry->vme_next->vme_start > entry->vme_end))) ||
3075 ((entry->protection & access_type) != access_type))) {
3076 /*
3077 * Found a hole or protection problem.
3078 * Unwire the region we wired so far.
3079 */
3080 if (start != entry->vme_start) {
3081 vm_map_unlock(map);
3082 vm_map_unwire(map, start, s, user_wire);
3083 } else {
3084 vm_map_unlock(map);
3085 }
3086 return((entry->protection&access_type) != access_type?
3087 KERN_PROTECTION_FAILURE: KERN_INVALID_ADDRESS);
3088 }
3089
3090 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3091
3092 if (user_wire)
3093 entry->user_wired_count++;
3094 if ((!user_wire) || (entry->user_wired_count == 1))
3095 entry->wired_count++;
3096
3097 entry->in_transition = TRUE;
3098
3099 /*
3100 * This entry might get split once we unlock the map.
3101 * In vm_fault_wire(), we need the current range as
3102 * defined by this entry. In order for this to work
3103 * along with a simultaneous clip operation, we make a
3104 * temporary copy of this entry and use that for the
3105 * wiring. Note that the underlying objects do not
3106 * change during a clip.
3107 */
3108 tmp_entry = *entry;
3109
3110 /*
3111 * The in_transition state guarentees that the entry
3112 * (or entries for this range, if split occured) will be
3113 * there when the map lock is acquired for the second time.
3114 */
3115 vm_map_unlock(map);
3116
3117 if (!user_wire && cur_thread != THREAD_NULL)
3118 interruptible_state = thread_interrupt_level(THREAD_UNINT);
3119 else
3120 interruptible_state = THREAD_UNINT;
3121
3122 if(map_pmap)
3123 rc = vm_fault_wire(map,
3124 &tmp_entry, map_pmap, pmap_addr);
3125 else
3126 rc = vm_fault_wire(map,
3127 &tmp_entry, map->pmap,
3128 tmp_entry.vme_start);
3129
3130 if (!user_wire && cur_thread != THREAD_NULL)
3131 thread_interrupt_level(interruptible_state);
3132
3133 vm_map_lock(map);
3134
3135 if (last_timestamp+1 != map->timestamp) {
3136 /*
3137 * Find the entry again. It could have been clipped
3138 * after we unlocked the map.
3139 */
3140 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3141 &first_entry))
3142 panic("vm_map_wire: re-lookup failed");
3143
3144 entry = first_entry;
3145 }
3146
3147 last_timestamp = map->timestamp;
3148
3149 while ((entry != vm_map_to_entry(map)) &&
3150 (entry->vme_start < tmp_entry.vme_end)) {
3151 assert(entry->in_transition);
3152 entry->in_transition = FALSE;
3153 if (entry->needs_wakeup) {
3154 entry->needs_wakeup = FALSE;
3155 need_wakeup = TRUE;
3156 }
3157 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3158 if (user_wire)
3159 entry->user_wired_count--;
3160 if ((!user_wire) ||
3161 (entry->user_wired_count == 0))
3162 entry->wired_count--;
3163 }
3164 entry = entry->vme_next;
3165 }
3166
3167 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3168 vm_map_unlock(map);
3169 if (need_wakeup)
3170 vm_map_entry_wakeup(map);
3171 /*
3172 * undo everything upto the previous entry.
3173 */
3174 (void)vm_map_unwire(map, start, s, user_wire);
3175 return rc;
3176 }
3177 } /* end while loop through map entries */
3178 vm_map_unlock(map);
3179
3180 /*
3181 * wake up anybody waiting on entries we wired.
3182 */
3183 if (need_wakeup)
3184 vm_map_entry_wakeup(map);
3185
3186 return(KERN_SUCCESS);
3187
3188 }
3189
3190 kern_return_t
3191 vm_map_wire(
3192 register vm_map_t map,
3193 register vm_map_offset_t start,
3194 register vm_map_offset_t end,
3195 register vm_prot_t access_type,
3196 boolean_t user_wire)
3197 {
3198
3199 kern_return_t kret;
3200
3201 #ifdef ppc
3202 /*
3203 * the calls to mapping_prealloc and mapping_relpre
3204 * (along with the VM_MAP_RANGE_CHECK to insure a
3205 * resonable range was passed in) are
3206 * currently necessary because
3207 * we haven't enabled kernel pre-emption
3208 * and/or the pmap_enter cannot purge and re-use
3209 * existing mappings
3210 */
3211 VM_MAP_RANGE_CHECK(map, start, end);
3212 mapping_prealloc(end - start);
3213 #endif
3214 kret = vm_map_wire_nested(map, start, end, access_type,
3215 user_wire, (pmap_t)NULL, 0);
3216 #ifdef ppc
3217 mapping_relpre();
3218 #endif
3219 return kret;
3220 }
3221
3222 /*
3223 * vm_map_unwire:
3224 *
3225 * Sets the pageability of the specified address range in the target
3226 * as pageable. Regions specified must have been wired previously.
3227 *
3228 * The map must not be locked, but a reference must remain to the map
3229 * throughout the call.
3230 *
3231 * Kernel will panic on failures. User unwire ignores holes and
3232 * unwired and intransition entries to avoid losing memory by leaving
3233 * it unwired.
3234 */
3235 static kern_return_t
3236 vm_map_unwire_nested(
3237 register vm_map_t map,
3238 register vm_map_offset_t start,
3239 register vm_map_offset_t end,
3240 boolean_t user_wire,
3241 pmap_t map_pmap,
3242 vm_map_offset_t pmap_addr)
3243 {
3244 register vm_map_entry_t entry;
3245 struct vm_map_entry *first_entry, tmp_entry;
3246 boolean_t need_wakeup;
3247 boolean_t main_map = FALSE;
3248 unsigned int last_timestamp;
3249
3250 vm_map_lock(map);
3251 if(map_pmap == NULL)
3252 main_map = TRUE;
3253 last_timestamp = map->timestamp;
3254
3255 VM_MAP_RANGE_CHECK(map, start, end);
3256 assert(page_aligned(start));
3257 assert(page_aligned(end));
3258
3259 if (vm_map_lookup_entry(map, start, &first_entry)) {
3260 entry = first_entry;
3261 /* vm_map_clip_start will be done later. */
3262 }
3263 else {
3264 /* Start address is not in map. */
3265 vm_map_unlock(map);
3266 return(KERN_INVALID_ADDRESS);
3267 }
3268
3269 need_wakeup = FALSE;
3270 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3271 if (entry->in_transition) {
3272 /*
3273 * 1)
3274 * Another thread is wiring down this entry. Note
3275 * that if it is not for the other thread we would
3276 * be unwiring an unwired entry. This is not
3277 * permitted. If we wait, we will be unwiring memory
3278 * we did not wire.
3279 *
3280 * 2)
3281 * Another thread is unwiring this entry. We did not
3282 * have a reference to it, because if we did, this
3283 * entry will not be getting unwired now.
3284 */
3285 if (!user_wire)
3286 panic("vm_map_unwire: in_transition entry");
3287
3288 entry = entry->vme_next;
3289 continue;
3290 }
3291
3292 if(entry->is_sub_map) {
3293 vm_map_offset_t sub_start;
3294 vm_map_offset_t sub_end;
3295 vm_map_offset_t local_end;
3296 pmap_t pmap;
3297
3298
3299 vm_map_clip_start(map, entry, start);
3300 vm_map_clip_end(map, entry, end);
3301
3302 sub_start = entry->offset;
3303 sub_end = entry->vme_end - entry->vme_start;
3304 sub_end += entry->offset;
3305 local_end = entry->vme_end;
3306 if(map_pmap == NULL) {
3307 if(entry->use_pmap) {
3308 pmap = entry->object.sub_map->pmap;
3309 pmap_addr = sub_start;
3310 } else {
3311 pmap = map->pmap;
3312 pmap_addr = start;
3313 }
3314 if (entry->wired_count == 0 ||
3315 (user_wire && entry->user_wired_count == 0)) {
3316 if (!user_wire)
3317 panic("vm_map_unwire: entry is unwired");
3318 entry = entry->vme_next;
3319 continue;
3320 }
3321
3322 /*
3323 * Check for holes
3324 * Holes: Next entry should be contiguous unless
3325 * this is the end of the region.
3326 */
3327 if (((entry->vme_end < end) &&
3328 ((entry->vme_next == vm_map_to_entry(map)) ||
3329 (entry->vme_next->vme_start
3330 > entry->vme_end)))) {
3331 if (!user_wire)
3332 panic("vm_map_unwire: non-contiguous region");
3333 /*
3334 entry = entry->vme_next;
3335 continue;
3336 */
3337 }
3338
3339 if (!user_wire || (--entry->user_wired_count == 0))
3340 entry->wired_count--;
3341
3342 if (entry->wired_count != 0) {
3343 entry = entry->vme_next;
3344 continue;
3345 }
3346
3347 entry->in_transition = TRUE;
3348 tmp_entry = *entry;/* see comment in vm_map_wire() */
3349
3350 /*
3351 * We can unlock the map now. The in_transition state
3352 * guarantees existance of the entry.
3353 */
3354 vm_map_unlock(map);
3355 vm_map_unwire_nested(entry->object.sub_map,
3356 sub_start, sub_end, user_wire, pmap, pmap_addr);
3357 vm_map_lock(map);
3358
3359 if (last_timestamp+1 != map->timestamp) {
3360 /*
3361 * Find the entry again. It could have been
3362 * clipped or deleted after we unlocked the map.
3363 */
3364 if (!vm_map_lookup_entry(map,
3365 tmp_entry.vme_start,
3366 &first_entry)) {
3367 if (!user_wire)
3368 panic("vm_map_unwire: re-lookup failed");
3369 entry = first_entry->vme_next;
3370 } else
3371 entry = first_entry;
3372 }
3373 last_timestamp = map->timestamp;
3374
3375 /*
3376 * clear transition bit for all constituent entries
3377 * that were in the original entry (saved in
3378 * tmp_entry). Also check for waiters.
3379 */
3380 while ((entry != vm_map_to_entry(map)) &&
3381 (entry->vme_start < tmp_entry.vme_end)) {
3382 assert(entry->in_transition);
3383 entry->in_transition = FALSE;
3384 if (entry->needs_wakeup) {
3385 entry->needs_wakeup = FALSE;
3386 need_wakeup = TRUE;
3387 }
3388 entry = entry->vme_next;
3389 }
3390 continue;
3391 } else {
3392 vm_map_unlock(map);
3393 vm_map_unwire_nested(entry->object.sub_map,
3394 sub_start, sub_end, user_wire, map_pmap,
3395 pmap_addr);
3396 vm_map_lock(map);
3397
3398 if (last_timestamp+1 != map->timestamp) {
3399 /*
3400 * Find the entry again. It could have been
3401 * clipped or deleted after we unlocked the map.
3402 */
3403 if (!vm_map_lookup_entry(map,
3404 tmp_entry.vme_start,
3405 &first_entry)) {
3406 if (!user_wire)
3407 panic("vm_map_unwire: re-lookup failed");
3408 entry = first_entry->vme_next;
3409 } else
3410 entry = first_entry;
3411 }
3412 last_timestamp = map->timestamp;
3413 }
3414 }
3415
3416
3417 if ((entry->wired_count == 0) ||
3418 (user_wire && entry->user_wired_count == 0)) {
3419 if (!user_wire)
3420 panic("vm_map_unwire: entry is unwired");
3421
3422 entry = entry->vme_next;
3423 continue;
3424 }
3425
3426 assert(entry->wired_count > 0 &&
3427 (!user_wire || entry->user_wired_count > 0));
3428
3429 vm_map_clip_start(map, entry, start);
3430 vm_map_clip_end(map, entry, end);
3431
3432 /*
3433 * Check for holes
3434 * Holes: Next entry should be contiguous unless
3435 * this is the end of the region.
3436 */
3437 if (((entry->vme_end < end) &&
3438 ((entry->vme_next == vm_map_to_entry(map)) ||
3439 (entry->vme_next->vme_start > entry->vme_end)))) {
3440
3441 if (!user_wire)
3442 panic("vm_map_unwire: non-contiguous region");
3443 entry = entry->vme_next;
3444 continue;
3445 }
3446
3447 if (!user_wire || (--entry->user_wired_count == 0))
3448 entry->wired_count--;
3449
3450 if (entry->wired_count != 0) {
3451 entry = entry->vme_next;
3452 continue;
3453 }
3454
3455 entry->in_transition = TRUE;
3456 tmp_entry = *entry; /* see comment in vm_map_wire() */
3457
3458 /*
3459 * We can unlock the map now. The in_transition state
3460 * guarantees existance of the entry.
3461 */
3462 vm_map_unlock(map);
3463 if(map_pmap) {
3464 vm_fault_unwire(map,
3465 &tmp_entry, FALSE, map_pmap, pmap_addr);
3466 } else {
3467 vm_fault_unwire(map,
3468 &tmp_entry, FALSE, map->pmap,
3469 tmp_entry.vme_start);
3470 }
3471 vm_map_lock(map);
3472
3473 if (last_timestamp+1 != map->timestamp) {
3474 /*
3475 * Find the entry again. It could have been clipped
3476 * or deleted after we unlocked the map.
3477 */
3478 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3479 &first_entry)) {
3480 if (!user_wire)
3481 panic("vm_map_unwire: re-lookup failed");
3482 entry = first_entry->vme_next;
3483 } else
3484 entry = first_entry;
3485 }
3486 last_timestamp = map->timestamp;
3487
3488 /*
3489 * clear transition bit for all constituent entries that
3490 * were in the original entry (saved in tmp_entry). Also
3491 * check for waiters.
3492 */
3493 while ((entry != vm_map_to_entry(map)) &&
3494 (entry->vme_start < tmp_entry.vme_end)) {
3495 assert(entry->in_transition);
3496 entry->in_transition = FALSE;
3497 if (entry->needs_wakeup) {
3498 entry->needs_wakeup = FALSE;
3499 need_wakeup = TRUE;
3500 }
3501 entry = entry->vme_next;
3502 }
3503 }
3504
3505 /*
3506 * We might have fragmented the address space when we wired this
3507 * range of addresses. Attempt to re-coalesce these VM map entries
3508 * with their neighbors now that they're no longer wired.
3509 * Under some circumstances, address space fragmentation can
3510 * prevent VM object shadow chain collapsing, which can cause
3511 * swap space leaks.
3512 */
3513 vm_map_simplify_range(map, start, end);
3514
3515 vm_map_unlock(map);
3516 /*
3517 * wake up anybody waiting on entries that we have unwired.
3518 */
3519 if (need_wakeup)
3520 vm_map_entry_wakeup(map);
3521 return(KERN_SUCCESS);
3522
3523 }
3524
3525 kern_return_t
3526 vm_map_unwire(
3527 register vm_map_t map,
3528 register vm_map_offset_t start,
3529 register vm_map_offset_t end,
3530 boolean_t user_wire)
3531 {
3532 return vm_map_unwire_nested(map, start, end,
3533 user_wire, (pmap_t)NULL, 0);
3534 }
3535
3536
3537 /*
3538 * vm_map_entry_delete: [ internal use only ]
3539 *
3540 * Deallocate the given entry from the target map.
3541 */
3542 static void
3543 vm_map_entry_delete(
3544 register vm_map_t map,
3545 register vm_map_entry_t entry)
3546 {
3547 register vm_map_offset_t s, e;
3548 register vm_object_t object;
3549 register vm_map_t submap;
3550
3551 s = entry->vme_start;
3552 e = entry->vme_end;
3553 assert(page_aligned(s));
3554 assert(page_aligned(e));
3555 assert(entry->wired_count == 0);
3556 assert(entry->user_wired_count == 0);
3557
3558 if (entry->is_sub_map) {
3559 object = NULL;
3560 submap = entry->object.sub_map;
3561 } else {
3562 submap = NULL;
3563 object = entry->object.vm_object;
3564 }
3565
3566 vm_map_entry_unlink(map, entry);
3567 map->size -= e - s;
3568
3569 vm_map_entry_dispose(map, entry);
3570
3571 vm_map_unlock(map);
3572 /*
3573 * Deallocate the object only after removing all
3574 * pmap entries pointing to its pages.
3575 */
3576 if (submap)
3577 vm_map_deallocate(submap);
3578 else
3579 vm_object_deallocate(object);
3580
3581 }
3582
3583
3584 void
3585 vm_map_submap_pmap_clean(
3586 vm_map_t map,
3587 vm_map_offset_t start,
3588 vm_map_offset_t end,
3589 vm_map_t sub_map,
3590 vm_map_offset_t offset)
3591 {
3592 vm_map_offset_t submap_start;
3593 vm_map_offset_t submap_end;
3594 vm_map_size_t remove_size;
3595 vm_map_entry_t entry;
3596
3597 submap_end = offset + (end - start);
3598 submap_start = offset;
3599 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
3600
3601 remove_size = (entry->vme_end - entry->vme_start);
3602 if(offset > entry->vme_start)
3603 remove_size -= offset - entry->vme_start;
3604
3605
3606 if(submap_end < entry->vme_end) {
3607 remove_size -=
3608 entry->vme_end - submap_end;
3609 }
3610 if(entry->is_sub_map) {
3611 vm_map_submap_pmap_clean(
3612 sub_map,
3613 start,
3614 start + remove_size,
3615 entry->object.sub_map,
3616 entry->offset);
3617 } else {
3618
3619 if((map->mapped) && (map->ref_count)
3620 && (entry->object.vm_object != NULL)) {
3621 vm_object_pmap_protect(
3622 entry->object.vm_object,
3623 entry->offset,
3624 remove_size,
3625 PMAP_NULL,
3626 entry->vme_start,
3627 VM_PROT_NONE);
3628 } else {
3629 pmap_remove(map->pmap,
3630 (addr64_t)start,
3631 (addr64_t)(start + remove_size));
3632 }
3633 }
3634 }
3635
3636 entry = entry->vme_next;
3637
3638 while((entry != vm_map_to_entry(sub_map))
3639 && (entry->vme_start < submap_end)) {
3640 remove_size = (entry->vme_end - entry->vme_start);
3641 if(submap_end < entry->vme_end) {
3642 remove_size -= entry->vme_end - submap_end;
3643 }
3644 if(entry->is_sub_map) {
3645 vm_map_submap_pmap_clean(
3646 sub_map,
3647 (start + entry->vme_start) - offset,
3648 ((start + entry->vme_start) - offset) + remove_size,
3649 entry->object.sub_map,
3650 entry->offset);
3651 } else {
3652 if((map->mapped) && (map->ref_count)
3653 && (entry->object.vm_object != NULL)) {
3654 vm_object_pmap_protect(
3655 entry->object.vm_object,
3656 entry->offset,
3657 remove_size,
3658 PMAP_NULL,
3659 entry->vme_start,
3660 VM_PROT_NONE);
3661 } else {
3662 pmap_remove(map->pmap,
3663 (addr64_t)((start + entry->vme_start)
3664 - offset),
3665 (addr64_t)(((start + entry->vme_start)
3666 - offset) + remove_size));
3667 }
3668 }
3669 entry = entry->vme_next;
3670 }
3671 return;
3672 }
3673
3674 /*
3675 * vm_map_delete: [ internal use only ]
3676 *
3677 * Deallocates the given address range from the target map.
3678 * Removes all user wirings. Unwires one kernel wiring if
3679 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
3680 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
3681 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
3682 *
3683 * This routine is called with map locked and leaves map locked.
3684 */
3685 static kern_return_t
3686 vm_map_delete(
3687 vm_map_t map,
3688 vm_map_offset_t start,
3689 vm_map_offset_t end,
3690 int flags,
3691 vm_map_t zap_map)
3692 {
3693 vm_map_entry_t entry, next;
3694 struct vm_map_entry *first_entry, tmp_entry;
3695 register vm_map_offset_t s, e;
3696 register vm_object_t object;
3697 boolean_t need_wakeup;
3698 unsigned int last_timestamp = ~0; /* unlikely value */
3699 int interruptible;
3700
3701 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
3702 THREAD_ABORTSAFE : THREAD_UNINT;
3703
3704 /*
3705 * All our DMA I/O operations in IOKit are currently done by
3706 * wiring through the map entries of the task requesting the I/O.
3707 * Because of this, we must always wait for kernel wirings
3708 * to go away on the entries before deleting them.
3709 *
3710 * Any caller who wants to actually remove a kernel wiring
3711 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
3712 * properly remove one wiring instead of blasting through
3713 * them all.
3714 */
3715 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
3716
3717 /*
3718 * Find the start of the region, and clip it
3719 */
3720 if (vm_map_lookup_entry(map, start, &first_entry)) {
3721 entry = first_entry;
3722 vm_map_clip_start(map, entry, start);
3723
3724 /*
3725 * Fix the lookup hint now, rather than each
3726 * time through the loop.
3727 */
3728 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3729 } else {
3730 entry = first_entry->vme_next;
3731 }
3732
3733 need_wakeup = FALSE;
3734 /*
3735 * Step through all entries in this region
3736 */
3737 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3738
3739 vm_map_clip_end(map, entry, end);
3740 if (entry->in_transition) {
3741 wait_result_t wait_result;
3742
3743 /*
3744 * Another thread is wiring/unwiring this entry.
3745 * Let the other thread know we are waiting.
3746 */
3747 s = entry->vme_start;
3748 entry->needs_wakeup = TRUE;
3749
3750 /*
3751 * wake up anybody waiting on entries that we have
3752 * already unwired/deleted.
3753 */
3754 if (need_wakeup) {
3755 vm_map_entry_wakeup(map);
3756 need_wakeup = FALSE;
3757 }
3758
3759 wait_result = vm_map_entry_wait(map, interruptible);
3760
3761 if (interruptible &&
3762 wait_result == THREAD_INTERRUPTED) {
3763 /*
3764 * We do not clear the needs_wakeup flag,
3765 * since we cannot tell if we were the only one.
3766 */
3767 vm_map_unlock(map);
3768 return KERN_ABORTED;
3769 }
3770
3771 /*
3772 * The entry could have been clipped or it
3773 * may not exist anymore. Look it up again.
3774 */
3775 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3776 assert((map != kernel_map) &&
3777 (!entry->is_sub_map));
3778 /*
3779 * User: use the next entry
3780 */
3781 entry = first_entry->vme_next;
3782 } else {
3783 entry = first_entry;
3784 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3785 }
3786 last_timestamp = map->timestamp;
3787 continue;
3788 } /* end in_transition */
3789
3790 if (entry->wired_count) {
3791 /*
3792 * Remove a kernel wiring if requested or if
3793 * there are user wirings.
3794 */
3795 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
3796 (entry->user_wired_count > 0))
3797 entry->wired_count--;
3798
3799 /* remove all user wire references */
3800 entry->user_wired_count = 0;
3801
3802 if (entry->wired_count != 0) {
3803 assert((map != kernel_map) &&
3804 (!entry->is_sub_map));
3805 /*
3806 * Cannot continue. Typical case is when
3807 * a user thread has physical io pending on
3808 * on this page. Either wait for the
3809 * kernel wiring to go away or return an
3810 * error.
3811 */
3812 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
3813 wait_result_t wait_result;
3814
3815 s = entry->vme_start;
3816 entry->needs_wakeup = TRUE;
3817 wait_result = vm_map_entry_wait(map,
3818 interruptible);
3819
3820 if (interruptible &&
3821 wait_result == THREAD_INTERRUPTED) {
3822 /*
3823 * We do not clear the
3824 * needs_wakeup flag, since we
3825 * cannot tell if we were the
3826 * only one.
3827 */
3828 vm_map_unlock(map);
3829 return KERN_ABORTED;
3830 }
3831
3832 /*
3833 * The entry could have been clipped or
3834 * it may not exist anymore. Look it
3835 * up again.
3836 */
3837 if (!vm_map_lookup_entry(map, s,
3838 &first_entry)) {
3839 assert((map != kernel_map) &&
3840 (!entry->is_sub_map));
3841 /*
3842 * User: use the next entry
3843 */
3844 entry = first_entry->vme_next;
3845 } else {
3846 entry = first_entry;
3847 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3848 }
3849 last_timestamp = map->timestamp;
3850 continue;
3851 }
3852 else {
3853 return KERN_FAILURE;
3854 }
3855 }
3856
3857 entry->in_transition = TRUE;
3858 /*
3859 * copy current entry. see comment in vm_map_wire()
3860 */
3861 tmp_entry = *entry;
3862 s = entry->vme_start;
3863 e = entry->vme_end;
3864
3865 /*
3866 * We can unlock the map now. The in_transition
3867 * state guarentees existance of the entry.
3868 */
3869 vm_map_unlock(map);
3870 vm_fault_unwire(map, &tmp_entry,
3871 tmp_entry.object.vm_object == kernel_object,
3872 map->pmap, tmp_entry.vme_start);
3873 vm_map_lock(map);
3874
3875 if (last_timestamp+1 != map->timestamp) {
3876 /*
3877 * Find the entry again. It could have
3878 * been clipped after we unlocked the map.
3879 */
3880 if (!vm_map_lookup_entry(map, s, &first_entry)){
3881 assert((map != kernel_map) &&
3882 (!entry->is_sub_map));
3883 first_entry = first_entry->vme_next;
3884 } else {
3885 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3886 }
3887 } else {
3888 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
3889 first_entry = entry;
3890 }
3891
3892 last_timestamp = map->timestamp;
3893
3894 entry = first_entry;
3895 while ((entry != vm_map_to_entry(map)) &&
3896 (entry->vme_start < tmp_entry.vme_end)) {
3897 assert(entry->in_transition);
3898 entry->in_transition = FALSE;
3899 if (entry->needs_wakeup) {
3900 entry->needs_wakeup = FALSE;
3901 need_wakeup = TRUE;
3902 }
3903 entry = entry->vme_next;
3904 }
3905 /*
3906 * We have unwired the entry(s). Go back and
3907 * delete them.
3908 */
3909 entry = first_entry;
3910 continue;
3911 }
3912
3913 /* entry is unwired */
3914 assert(entry->wired_count == 0);
3915 assert(entry->user_wired_count == 0);
3916
3917 if ((!entry->is_sub_map &&
3918 entry->object.vm_object != kernel_object) ||
3919 entry->is_sub_map) {
3920 if(entry->is_sub_map) {
3921 if(entry->use_pmap) {
3922 #ifndef NO_NESTED_PMAP
3923 pmap_unnest(map->pmap,
3924 (addr64_t)entry->vme_start);
3925 #endif /* NO_NESTED_PMAP */
3926 if((map->mapped) && (map->ref_count)) {
3927 /* clean up parent map/maps */
3928 vm_map_submap_pmap_clean(
3929 map, entry->vme_start,
3930 entry->vme_end,
3931 entry->object.sub_map,
3932 entry->offset);
3933 }
3934 } else {
3935 vm_map_submap_pmap_clean(
3936 map, entry->vme_start, entry->vme_end,
3937 entry->object.sub_map,
3938 entry->offset);
3939 }
3940 } else {
3941 object = entry->object.vm_object;
3942 if((map->mapped) && (map->ref_count)) {
3943 vm_object_pmap_protect(
3944 object, entry->offset,
3945 entry->vme_end - entry->vme_start,
3946 PMAP_NULL,
3947 entry->vme_start,
3948 VM_PROT_NONE);
3949 } else {
3950 pmap_remove(map->pmap,
3951 (addr64_t)entry->vme_start,
3952 (addr64_t)entry->vme_end);
3953 }
3954 }
3955 }
3956
3957 /*
3958 * All pmap mappings for this map entry must have been
3959 * cleared by now.
3960 */
3961 assert(vm_map_pmap_is_empty(map,
3962 entry->vme_start,
3963 entry->vme_end));
3964
3965 next = entry->vme_next;
3966 s = next->vme_start;
3967 last_timestamp = map->timestamp;
3968
3969 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
3970 zap_map != VM_MAP_NULL) {
3971 /*
3972 * The caller wants to save the affected VM map entries
3973 * into the "zap_map". The caller will take care of
3974 * these entries.
3975 */
3976 /* unlink the entry from "map" ... */
3977 vm_map_entry_unlink(map, entry);
3978 /* ... and add it to the end of the "zap_map" */
3979 vm_map_entry_link(zap_map,
3980 vm_map_last_entry(zap_map),
3981 entry);
3982 } else {
3983 vm_map_entry_delete(map, entry);
3984 /* vm_map_entry_delete unlocks the map */
3985 vm_map_lock(map);
3986 }
3987
3988 entry = next;
3989
3990 if(entry == vm_map_to_entry(map)) {
3991 break;
3992 }
3993 if (last_timestamp+1 != map->timestamp) {
3994 /*
3995 * we are responsible for deleting everything
3996 * from the give space, if someone has interfered
3997 * we pick up where we left off, back fills should
3998 * be all right for anyone except map_delete and
3999 * we have to assume that the task has been fully
4000 * disabled before we get here
4001 */
4002 if (!vm_map_lookup_entry(map, s, &entry)){
4003 entry = entry->vme_next;
4004 } else {
4005 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4006 }
4007 /*
4008 * others can not only allocate behind us, we can
4009 * also see coalesce while we don't have the map lock
4010 */
4011 if(entry == vm_map_to_entry(map)) {
4012 break;
4013 }
4014 vm_map_clip_start(map, entry, s);
4015 }
4016 last_timestamp = map->timestamp;
4017 }
4018
4019 if (map->wait_for_space)
4020 thread_wakeup((event_t) map);
4021 /*
4022 * wake up anybody waiting on entries that we have already deleted.
4023 */
4024 if (need_wakeup)
4025 vm_map_entry_wakeup(map);
4026
4027 return KERN_SUCCESS;
4028 }
4029
4030
4031 /*
4032 * vm_map_remove:
4033 *
4034 * Remove the given address range from the target map.
4035 * This is the exported form of vm_map_delete.
4036 */
4037 kern_return_t
4038 vm_map_remove(
4039 register vm_map_t map,
4040 register vm_map_offset_t start,
4041 register vm_map_offset_t end,
4042 register boolean_t flags)
4043 {
4044 register kern_return_t result;
4045
4046 vm_map_lock(map);
4047 VM_MAP_RANGE_CHECK(map, start, end);
4048 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4049 vm_map_unlock(map);
4050
4051 return(result);
4052 }
4053
4054
4055 /*
4056 * Routine: vm_map_copy_discard
4057 *
4058 * Description:
4059 * Dispose of a map copy object (returned by
4060 * vm_map_copyin).
4061 */
4062 void
4063 vm_map_copy_discard(
4064 vm_map_copy_t copy)
4065 {
4066 TR_DECL("vm_map_copy_discard");
4067
4068 /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4069
4070 if (copy == VM_MAP_COPY_NULL)
4071 return;
4072
4073 switch (copy->type) {
4074 case VM_MAP_COPY_ENTRY_LIST:
4075 while (vm_map_copy_first_entry(copy) !=
4076 vm_map_copy_to_entry(copy)) {
4077 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4078
4079 vm_map_copy_entry_unlink(copy, entry);
4080 vm_object_deallocate(entry->object.vm_object);
4081 vm_map_copy_entry_dispose(copy, entry);
4082 }
4083 break;
4084 case VM_MAP_COPY_OBJECT:
4085 vm_object_deallocate(copy->cpy_object);
4086 break;
4087 case VM_MAP_COPY_KERNEL_BUFFER:
4088
4089 /*
4090 * The vm_map_copy_t and possibly the data buffer were
4091 * allocated by a single call to kalloc(), i.e. the
4092 * vm_map_copy_t was not allocated out of the zone.
4093 */
4094 kfree(copy, copy->cpy_kalloc_size);
4095 return;
4096 }
4097 zfree(vm_map_copy_zone, copy);
4098 }
4099
4100 /*
4101 * Routine: vm_map_copy_copy
4102 *
4103 * Description:
4104 * Move the information in a map copy object to
4105 * a new map copy object, leaving the old one
4106 * empty.
4107 *
4108 * This is used by kernel routines that need
4109 * to look at out-of-line data (in copyin form)
4110 * before deciding whether to return SUCCESS.
4111 * If the routine returns FAILURE, the original
4112 * copy object will be deallocated; therefore,
4113 * these routines must make a copy of the copy
4114 * object and leave the original empty so that
4115 * deallocation will not fail.
4116 */
4117 vm_map_copy_t
4118 vm_map_copy_copy(
4119 vm_map_copy_t copy)
4120 {
4121 vm_map_copy_t new_copy;
4122
4123 if (copy == VM_MAP_COPY_NULL)
4124 return VM_MAP_COPY_NULL;
4125
4126 /*
4127 * Allocate a new copy object, and copy the information
4128 * from the old one into it.
4129 */
4130
4131 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4132 *new_copy = *copy;
4133
4134 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
4135 /*
4136 * The links in the entry chain must be
4137 * changed to point to the new copy object.
4138 */
4139 vm_map_copy_first_entry(copy)->vme_prev
4140 = vm_map_copy_to_entry(new_copy);
4141 vm_map_copy_last_entry(copy)->vme_next
4142 = vm_map_copy_to_entry(new_copy);
4143 }
4144
4145 /*
4146 * Change the old copy object into one that contains
4147 * nothing to be deallocated.
4148 */
4149 copy->type = VM_MAP_COPY_OBJECT;
4150 copy->cpy_object = VM_OBJECT_NULL;
4151
4152 /*
4153 * Return the new object.
4154 */
4155 return new_copy;
4156 }
4157
4158 static kern_return_t
4159 vm_map_overwrite_submap_recurse(
4160 vm_map_t dst_map,
4161 vm_map_offset_t dst_addr,
4162 vm_map_size_t dst_size)
4163 {
4164 vm_map_offset_t dst_end;
4165 vm_map_entry_t tmp_entry;
4166 vm_map_entry_t entry;
4167 kern_return_t result;
4168 boolean_t encountered_sub_map = FALSE;
4169
4170
4171
4172 /*
4173 * Verify that the destination is all writeable
4174 * initially. We have to trunc the destination
4175 * address and round the copy size or we'll end up
4176 * splitting entries in strange ways.
4177 */
4178
4179 dst_end = vm_map_round_page(dst_addr + dst_size);
4180 vm_map_lock(dst_map);
4181
4182 start_pass_1:
4183 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4184 vm_map_unlock(dst_map);
4185 return(KERN_INVALID_ADDRESS);
4186 }
4187
4188 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4189
4190 for (entry = tmp_entry;;) {
4191 vm_map_entry_t next;
4192
4193 next = entry->vme_next;
4194 while(entry->is_sub_map) {
4195 vm_map_offset_t sub_start;
4196 vm_map_offset_t sub_end;
4197 vm_map_offset_t local_end;
4198
4199 if (entry->in_transition) {
4200 /*
4201 * Say that we are waiting, and wait for entry.
4202 */
4203 entry->needs_wakeup = TRUE;
4204 vm_map_entry_wait(dst_map, THREAD_UNINT);
4205
4206 goto start_pass_1;
4207 }
4208
4209 encountered_sub_map = TRUE;
4210 sub_start = entry->offset;
4211
4212 if(entry->vme_end < dst_end)
4213 sub_end = entry->vme_end;
4214 else
4215 sub_end = dst_end;
4216 sub_end -= entry->vme_start;
4217 sub_end += entry->offset;
4218 local_end = entry->vme_end;
4219 vm_map_unlock(dst_map);
4220
4221 result = vm_map_overwrite_submap_recurse(
4222 entry->object.sub_map,
4223 sub_start,
4224 sub_end - sub_start);
4225
4226 if(result != KERN_SUCCESS)
4227 return result;
4228 if (dst_end <= entry->vme_end)
4229 return KERN_SUCCESS;
4230 vm_map_lock(dst_map);
4231 if(!vm_map_lookup_entry(dst_map, local_end,
4232 &tmp_entry)) {
4233 vm_map_unlock(dst_map);
4234 return(KERN_INVALID_ADDRESS);
4235 }
4236 entry = tmp_entry;
4237 next = entry->vme_next;
4238 }
4239
4240 if ( ! (entry->protection & VM_PROT_WRITE)) {
4241 vm_map_unlock(dst_map);
4242 return(KERN_PROTECTION_FAILURE);
4243 }
4244
4245 /*
4246 * If the entry is in transition, we must wait
4247 * for it to exit that state. Anything could happen
4248 * when we unlock the map, so start over.
4249 */
4250 if (entry->in_transition) {
4251
4252 /*
4253 * Say that we are waiting, and wait for entry.
4254 */
4255 entry->needs_wakeup = TRUE;
4256 vm_map_entry_wait(dst_map, THREAD_UNINT);
4257
4258 goto start_pass_1;
4259 }
4260
4261 /*
4262 * our range is contained completely within this map entry
4263 */
4264 if (dst_end <= entry->vme_end) {
4265 vm_map_unlock(dst_map);
4266 return KERN_SUCCESS;
4267 }
4268 /*
4269 * check that range specified is contiguous region
4270 */
4271 if ((next == vm_map_to_entry(dst_map)) ||
4272 (next->vme_start != entry->vme_end)) {
4273 vm_map_unlock(dst_map);
4274 return(KERN_INVALID_ADDRESS);
4275 }
4276
4277 /*
4278 * Check for permanent objects in the destination.
4279 */
4280 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4281 ((!entry->object.vm_object->internal) ||
4282 (entry->object.vm_object->true_share))) {
4283 if(encountered_sub_map) {
4284 vm_map_unlock(dst_map);
4285 return(KERN_FAILURE);
4286 }
4287 }
4288
4289
4290 entry = next;
4291 }/* for */
4292 vm_map_unlock(dst_map);
4293 return(KERN_SUCCESS);
4294 }
4295
4296 /*
4297 * Routine: vm_map_copy_overwrite
4298 *
4299 * Description:
4300 * Copy the memory described by the map copy
4301 * object (copy; returned by vm_map_copyin) onto
4302 * the specified destination region (dst_map, dst_addr).
4303 * The destination must be writeable.
4304 *
4305 * Unlike vm_map_copyout, this routine actually
4306 * writes over previously-mapped memory. If the
4307 * previous mapping was to a permanent (user-supplied)
4308 * memory object, it is preserved.
4309 *
4310 * The attributes (protection and inheritance) of the
4311 * destination region are preserved.
4312 *
4313 * If successful, consumes the copy object.
4314 * Otherwise, the caller is responsible for it.
4315 *
4316 * Implementation notes:
4317 * To overwrite aligned temporary virtual memory, it is
4318 * sufficient to remove the previous mapping and insert
4319 * the new copy. This replacement is done either on
4320 * the whole region (if no permanent virtual memory
4321 * objects are embedded in the destination region) or
4322 * in individual map entries.
4323 *
4324 * To overwrite permanent virtual memory , it is necessary
4325 * to copy each page, as the external memory management
4326 * interface currently does not provide any optimizations.
4327 *
4328 * Unaligned memory also has to be copied. It is possible
4329 * to use 'vm_trickery' to copy the aligned data. This is
4330 * not done but not hard to implement.
4331 *
4332 * Once a page of permanent memory has been overwritten,
4333 * it is impossible to interrupt this function; otherwise,
4334 * the call would be neither atomic nor location-independent.
4335 * The kernel-state portion of a user thread must be
4336 * interruptible.
4337 *
4338 * It may be expensive to forward all requests that might
4339 * overwrite permanent memory (vm_write, vm_copy) to
4340 * uninterruptible kernel threads. This routine may be
4341 * called by interruptible threads; however, success is
4342 * not guaranteed -- if the request cannot be performed
4343 * atomically and interruptibly, an error indication is
4344 * returned.
4345 */
4346
4347 static kern_return_t
4348 vm_map_copy_overwrite_nested(
4349 vm_map_t dst_map,
4350 vm_map_address_t dst_addr,
4351 vm_map_copy_t copy,
4352 boolean_t interruptible,
4353 pmap_t pmap)
4354 {
4355 vm_map_offset_t dst_end;
4356 vm_map_entry_t tmp_entry;
4357 vm_map_entry_t entry;
4358 kern_return_t kr;
4359 boolean_t aligned = TRUE;
4360 boolean_t contains_permanent_objects = FALSE;
4361 boolean_t encountered_sub_map = FALSE;
4362 vm_map_offset_t base_addr;
4363 vm_map_size_t copy_size;
4364 vm_map_size_t total_size;
4365
4366
4367 /*
4368 * Check for null copy object.
4369 */
4370
4371 if (copy == VM_MAP_COPY_NULL)
4372 return(KERN_SUCCESS);
4373
4374 /*
4375 * Check for special kernel buffer allocated
4376 * by new_ipc_kmsg_copyin.
4377 */
4378
4379 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
4380 return(vm_map_copyout_kernel_buffer(
4381 dst_map, &dst_addr,
4382 copy, TRUE));
4383 }
4384
4385 /*
4386 * Only works for entry lists at the moment. Will
4387 * support page lists later.
4388 */
4389
4390 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
4391
4392 if (copy->size == 0) {
4393 vm_map_copy_discard(copy);
4394 return(KERN_SUCCESS);
4395 }
4396
4397 /*
4398 * Verify that the destination is all writeable
4399 * initially. We have to trunc the destination
4400 * address and round the copy size or we'll end up
4401 * splitting entries in strange ways.
4402 */
4403
4404 if (!page_aligned(copy->size) ||
4405 !page_aligned (copy->offset) ||
4406 !page_aligned (dst_addr))
4407 {
4408 aligned = FALSE;
4409 dst_end = vm_map_round_page(dst_addr + copy->size);
4410 } else {
4411 dst_end = dst_addr + copy->size;
4412 }
4413
4414 vm_map_lock(dst_map);
4415
4416 /* LP64todo - remove this check when vm_map_commpage64()
4417 * no longer has to stuff in a map_entry for the commpage
4418 * above the map's max_offset.
4419 */
4420 if (dst_addr >= dst_map->max_offset) {
4421 vm_map_unlock(dst_map);
4422 return(KERN_INVALID_ADDRESS);
4423 }
4424
4425 start_pass_1:
4426 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4427 vm_map_unlock(dst_map);
4428 return(KERN_INVALID_ADDRESS);
4429 }
4430 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4431 for (entry = tmp_entry;;) {
4432 vm_map_entry_t next = entry->vme_next;
4433
4434 while(entry->is_sub_map) {
4435 vm_map_offset_t sub_start;
4436 vm_map_offset_t sub_end;
4437 vm_map_offset_t local_end;
4438
4439 if (entry->in_transition) {
4440
4441 /*
4442 * Say that we are waiting, and wait for entry.
4443 */
4444 entry->needs_wakeup = TRUE;
4445 vm_map_entry_wait(dst_map, THREAD_UNINT);
4446
4447 goto start_pass_1;
4448 }
4449
4450 local_end = entry->vme_end;
4451 if (!(entry->needs_copy)) {
4452 /* if needs_copy we are a COW submap */
4453 /* in such a case we just replace so */
4454 /* there is no need for the follow- */
4455 /* ing check. */
4456 encountered_sub_map = TRUE;
4457 sub_start = entry->offset;
4458
4459 if(entry->vme_end < dst_end)
4460 sub_end = entry->vme_end;
4461 else
4462 sub_end = dst_end;
4463 sub_end -= entry->vme_start;
4464 sub_end += entry->offset;
4465 vm_map_unlock(dst_map);
4466
4467 kr = vm_map_overwrite_submap_recurse(
4468 entry->object.sub_map,
4469 sub_start,
4470 sub_end - sub_start);
4471 if(kr != KERN_SUCCESS)
4472 return kr;
4473 vm_map_lock(dst_map);
4474 }
4475
4476 if (dst_end <= entry->vme_end)
4477 goto start_overwrite;
4478 if(!vm_map_lookup_entry(dst_map, local_end,
4479 &entry)) {
4480 vm_map_unlock(dst_map);
4481 return(KERN_INVALID_ADDRESS);
4482 }
4483 next = entry->vme_next;
4484 }
4485
4486 if ( ! (entry->protection & VM_PROT_WRITE)) {
4487 vm_map_unlock(dst_map);
4488 return(KERN_PROTECTION_FAILURE);
4489 }
4490
4491 /*
4492 * If the entry is in transition, we must wait
4493 * for it to exit that state. Anything could happen
4494 * when we unlock the map, so start over.
4495 */
4496 if (entry->in_transition) {
4497
4498 /*
4499 * Say that we are waiting, and wait for entry.
4500 */
4501 entry->needs_wakeup = TRUE;
4502 vm_map_entry_wait(dst_map, THREAD_UNINT);
4503
4504 goto start_pass_1;
4505 }
4506
4507 /*
4508 * our range is contained completely within this map entry
4509 */
4510 if (dst_end <= entry->vme_end)
4511 break;
4512 /*
4513 * check that range specified is contiguous region
4514 */
4515 if ((next == vm_map_to_entry(dst_map)) ||
4516 (next->vme_start != entry->vme_end)) {
4517 vm_map_unlock(dst_map);
4518 return(KERN_INVALID_ADDRESS);
4519 }
4520
4521
4522 /*
4523 * Check for permanent objects in the destination.
4524 */
4525 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
4526 ((!entry->object.vm_object->internal) ||
4527 (entry->object.vm_object->true_share))) {
4528 contains_permanent_objects = TRUE;
4529 }
4530
4531 entry = next;
4532 }/* for */
4533
4534 start_overwrite:
4535 /*
4536 * If there are permanent objects in the destination, then
4537 * the copy cannot be interrupted.
4538 */
4539
4540 if (interruptible && contains_permanent_objects) {
4541 vm_map_unlock(dst_map);
4542 return(KERN_FAILURE); /* XXX */
4543 }
4544
4545 /*
4546 *
4547 * Make a second pass, overwriting the data
4548 * At the beginning of each loop iteration,
4549 * the next entry to be overwritten is "tmp_entry"
4550 * (initially, the value returned from the lookup above),
4551 * and the starting address expected in that entry
4552 * is "start".
4553 */
4554
4555 total_size = copy->size;
4556 if(encountered_sub_map) {
4557 copy_size = 0;
4558 /* re-calculate tmp_entry since we've had the map */
4559 /* unlocked */
4560 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
4561 vm_map_unlock(dst_map);
4562 return(KERN_INVALID_ADDRESS);
4563 }
4564 } else {
4565 copy_size = copy->size;
4566 }
4567
4568 base_addr = dst_addr;
4569 while(TRUE) {
4570 /* deconstruct the copy object and do in parts */
4571 /* only in sub_map, interruptable case */
4572 vm_map_entry_t copy_entry;
4573 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
4574 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
4575 int nentries;
4576 int remaining_entries = 0;
4577 int new_offset = 0;
4578
4579 for (entry = tmp_entry; copy_size == 0;) {
4580 vm_map_entry_t next;
4581
4582 next = entry->vme_next;
4583
4584 /* tmp_entry and base address are moved along */
4585 /* each time we encounter a sub-map. Otherwise */
4586 /* entry can outpase tmp_entry, and the copy_size */
4587 /* may reflect the distance between them */
4588 /* if the current entry is found to be in transition */
4589 /* we will start over at the beginning or the last */
4590 /* encounter of a submap as dictated by base_addr */
4591 /* we will zero copy_size accordingly. */
4592 if (entry->in_transition) {
4593 /*
4594 * Say that we are waiting, and wait for entry.
4595 */
4596 entry->needs_wakeup = TRUE;
4597 vm_map_entry_wait(dst_map, THREAD_UNINT);
4598
4599 if(!vm_map_lookup_entry(dst_map, base_addr,
4600 &tmp_entry)) {
4601 vm_map_unlock(dst_map);
4602 return(KERN_INVALID_ADDRESS);
4603 }
4604 copy_size = 0;
4605 entry = tmp_entry;
4606 continue;
4607 }
4608 if(entry->is_sub_map) {
4609 vm_map_offset_t sub_start;
4610 vm_map_offset_t sub_end;
4611 vm_map_offset_t local_end;
4612
4613 if (entry->needs_copy) {
4614 /* if this is a COW submap */
4615 /* just back the range with a */
4616 /* anonymous entry */
4617 if(entry->vme_end < dst_end)
4618 sub_end = entry->vme_end;
4619 else
4620 sub_end = dst_end;
4621 if(entry->vme_start < base_addr)
4622 sub_start = base_addr;
4623 else
4624 sub_start = entry->vme_start;
4625 vm_map_clip_end(
4626 dst_map, entry, sub_end);
4627 vm_map_clip_start(
4628 dst_map, entry, sub_start);
4629 entry->is_sub_map = FALSE;
4630 vm_map_deallocate(
4631 entry->object.sub_map);
4632 entry->object.sub_map = NULL;
4633 entry->is_shared = FALSE;
4634 entry->needs_copy = FALSE;
4635 entry->offset = 0;
4636 entry->protection = VM_PROT_ALL;
4637 entry->max_protection = VM_PROT_ALL;
4638 entry->wired_count = 0;
4639 entry->user_wired_count = 0;
4640 if(entry->inheritance
4641 == VM_INHERIT_SHARE)
4642 entry->inheritance = VM_INHERIT_COPY;
4643 continue;
4644 }
4645 /* first take care of any non-sub_map */
4646 /* entries to send */
4647 if(base_addr < entry->vme_start) {
4648 /* stuff to send */
4649 copy_size =
4650 entry->vme_start - base_addr;
4651 break;
4652 }
4653 sub_start = entry->offset;
4654
4655 if(entry->vme_end < dst_end)
4656 sub_end = entry->vme_end;
4657 else
4658 sub_end = dst_end;
4659 sub_end -= entry->vme_start;
4660 sub_end += entry->offset;
4661 local_end = entry->vme_end;
4662 vm_map_unlock(dst_map);
4663 copy_size = sub_end - sub_start;
4664
4665 /* adjust the copy object */
4666 if (total_size > copy_size) {
4667 vm_map_size_t local_size = 0;
4668 vm_map_size_t entry_size;
4669
4670 nentries = 1;
4671 new_offset = copy->offset;
4672 copy_entry = vm_map_copy_first_entry(copy);
4673 while(copy_entry !=
4674 vm_map_copy_to_entry(copy)){
4675 entry_size = copy_entry->vme_end -
4676 copy_entry->vme_start;
4677 if((local_size < copy_size) &&
4678 ((local_size + entry_size)
4679 >= copy_size)) {
4680 vm_map_copy_clip_end(copy,
4681 copy_entry,
4682 copy_entry->vme_start +
4683 (copy_size - local_size));
4684 entry_size = copy_entry->vme_end -
4685 copy_entry->vme_start;
4686 local_size += entry_size;
4687 new_offset += entry_size;
4688 }
4689 if(local_size >= copy_size) {
4690 next_copy = copy_entry->vme_next;
4691 copy_entry->vme_next =
4692 vm_map_copy_to_entry(copy);
4693 previous_prev =
4694 copy->cpy_hdr.links.prev;
4695 copy->cpy_hdr.links.prev = copy_entry;
4696 copy->size = copy_size;
4697 remaining_entries =
4698 copy->cpy_hdr.nentries;
4699 remaining_entries -= nentries;
4700 copy->cpy_hdr.nentries = nentries;
4701 break;
4702 } else {
4703 local_size += entry_size;
4704 new_offset += entry_size;
4705 nentries++;
4706 }
4707 copy_entry = copy_entry->vme_next;
4708 }
4709 }
4710
4711 if((entry->use_pmap) && (pmap == NULL)) {
4712 kr = vm_map_copy_overwrite_nested(
4713 entry->object.sub_map,
4714 sub_start,
4715 copy,
4716 interruptible,
4717 entry->object.sub_map->pmap);
4718 } else if (pmap != NULL) {
4719 kr = vm_map_copy_overwrite_nested(
4720 entry->object.sub_map,
4721 sub_start,
4722 copy,
4723 interruptible, pmap);
4724 } else {
4725 kr = vm_map_copy_overwrite_nested(
4726 entry->object.sub_map,
4727 sub_start,
4728 copy,
4729 interruptible,
4730 dst_map->pmap);
4731 }
4732 if(kr != KERN_SUCCESS) {
4733 if(next_copy != NULL) {
4734 copy->cpy_hdr.nentries +=
4735 remaining_entries;
4736 copy->cpy_hdr.links.prev->vme_next =
4737 next_copy;
4738 copy->cpy_hdr.links.prev
4739 = previous_prev;
4740 copy->size = total_size;
4741 }
4742 return kr;
4743 }
4744 if (dst_end <= local_end) {
4745 return(KERN_SUCCESS);
4746 }
4747 /* otherwise copy no longer exists, it was */
4748 /* destroyed after successful copy_overwrite */
4749 copy = (vm_map_copy_t)
4750 zalloc(vm_map_copy_zone);
4751 vm_map_copy_first_entry(copy) =
4752 vm_map_copy_last_entry(copy) =
4753 vm_map_copy_to_entry(copy);
4754 copy->type = VM_MAP_COPY_ENTRY_LIST;
4755 copy->offset = new_offset;
4756
4757 total_size -= copy_size;
4758 copy_size = 0;
4759 /* put back remainder of copy in container */
4760 if(next_copy != NULL) {
4761 copy->cpy_hdr.nentries = remaining_entries;
4762 copy->cpy_hdr.links.next = next_copy;
4763 copy->cpy_hdr.links.prev = previous_prev;
4764 copy->size = total_size;
4765 next_copy->vme_prev =
4766 vm_map_copy_to_entry(copy);
4767 next_copy = NULL;
4768 }
4769 base_addr = local_end;
4770 vm_map_lock(dst_map);
4771 if(!vm_map_lookup_entry(dst_map,
4772 local_end, &tmp_entry)) {
4773 vm_map_unlock(dst_map);
4774 return(KERN_INVALID_ADDRESS);
4775 }
4776 entry = tmp_entry;
4777 continue;
4778 }
4779 if (dst_end <= entry->vme_end) {
4780 copy_size = dst_end - base_addr;
4781 break;
4782 }
4783
4784 if ((next == vm_map_to_entry(dst_map)) ||
4785 (next->vme_start != entry->vme_end)) {
4786 vm_map_unlock(dst_map);
4787 return(KERN_INVALID_ADDRESS);
4788 }
4789
4790 entry = next;
4791 }/* for */
4792
4793 next_copy = NULL;
4794 nentries = 1;
4795
4796 /* adjust the copy object */
4797 if (total_size > copy_size) {
4798 vm_map_size_t local_size = 0;
4799 vm_map_size_t entry_size;
4800
4801 new_offset = copy->offset;
4802 copy_entry = vm_map_copy_first_entry(copy);
4803 while(copy_entry != vm_map_copy_to_entry(copy)) {
4804 entry_size = copy_entry->vme_end -
4805 copy_entry->vme_start;
4806 if((local_size < copy_size) &&
4807 ((local_size + entry_size)
4808 >= copy_size)) {
4809 vm_map_copy_clip_end(copy, copy_entry,
4810 copy_entry->vme_start +
4811 (copy_size - local_size));
4812 entry_size = copy_entry->vme_end -
4813 copy_entry->vme_start;
4814 local_size += entry_size;
4815 new_offset += entry_size;
4816 }
4817 if(local_size >= copy_size) {
4818 next_copy = copy_entry->vme_next;
4819 copy_entry->vme_next =
4820 vm_map_copy_to_entry(copy);
4821 previous_prev =
4822 copy->cpy_hdr.links.prev;
4823 copy->cpy_hdr.links.prev = copy_entry;
4824 copy->size = copy_size;
4825 remaining_entries =
4826 copy->cpy_hdr.nentries;
4827 remaining_entries -= nentries;
4828 copy->cpy_hdr.nentries = nentries;
4829 break;
4830 } else {
4831 local_size += entry_size;
4832 new_offset += entry_size;
4833 nentries++;
4834 }
4835 copy_entry = copy_entry->vme_next;
4836 }
4837 }
4838
4839 if (aligned) {
4840 pmap_t local_pmap;
4841
4842 if(pmap)
4843 local_pmap = pmap;
4844 else
4845 local_pmap = dst_map->pmap;
4846
4847 if ((kr = vm_map_copy_overwrite_aligned(
4848 dst_map, tmp_entry, copy,
4849 base_addr, local_pmap)) != KERN_SUCCESS) {
4850 if(next_copy != NULL) {
4851 copy->cpy_hdr.nentries +=
4852 remaining_entries;
4853 copy->cpy_hdr.links.prev->vme_next =
4854 next_copy;
4855 copy->cpy_hdr.links.prev =
4856 previous_prev;
4857 copy->size += copy_size;
4858 }
4859 return kr;
4860 }
4861 vm_map_unlock(dst_map);
4862 } else {
4863 /*
4864 * Performance gain:
4865 *
4866 * if the copy and dst address are misaligned but the same
4867 * offset within the page we can copy_not_aligned the
4868 * misaligned parts and copy aligned the rest. If they are
4869 * aligned but len is unaligned we simply need to copy
4870 * the end bit unaligned. We'll need to split the misaligned
4871 * bits of the region in this case !
4872 */
4873 /* ALWAYS UNLOCKS THE dst_map MAP */
4874 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
4875 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
4876 if(next_copy != NULL) {
4877 copy->cpy_hdr.nentries +=
4878 remaining_entries;
4879 copy->cpy_hdr.links.prev->vme_next =
4880 next_copy;
4881 copy->cpy_hdr.links.prev =
4882 previous_prev;
4883 copy->size += copy_size;
4884 }
4885 return kr;
4886 }
4887 }
4888 total_size -= copy_size;
4889 if(total_size == 0)
4890 break;
4891 base_addr += copy_size;
4892 copy_size = 0;
4893 copy->offset = new_offset;
4894 if(next_copy != NULL) {
4895 copy->cpy_hdr.nentries = remaining_entries;
4896 copy->cpy_hdr.links.next = next_copy;
4897 copy->cpy_hdr.links.prev = previous_prev;
4898 next_copy->vme_prev = vm_map_copy_to_entry(copy);
4899 copy->size = total_size;
4900 }
4901 vm_map_lock(dst_map);
4902 while(TRUE) {
4903 if (!vm_map_lookup_entry(dst_map,
4904 base_addr, &tmp_entry)) {
4905 vm_map_unlock(dst_map);
4906 return(KERN_INVALID_ADDRESS);
4907 }
4908 if (tmp_entry->in_transition) {
4909 entry->needs_wakeup = TRUE;
4910 vm_map_entry_wait(dst_map, THREAD_UNINT);
4911 } else {
4912 break;
4913 }
4914 }
4915 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
4916
4917 entry = tmp_entry;
4918 } /* while */
4919
4920 /*
4921 * Throw away the vm_map_copy object
4922 */
4923 vm_map_copy_discard(copy);
4924
4925 return(KERN_SUCCESS);
4926 }/* vm_map_copy_overwrite */
4927
4928 kern_return_t
4929 vm_map_copy_overwrite(
4930 vm_map_t dst_map,
4931 vm_map_offset_t dst_addr,
4932 vm_map_copy_t copy,
4933 boolean_t interruptible)
4934 {
4935 return vm_map_copy_overwrite_nested(
4936 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
4937 }
4938
4939
4940 /*
4941 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
4942 *
4943 * Decription:
4944 * Physically copy unaligned data
4945 *
4946 * Implementation:
4947 * Unaligned parts of pages have to be physically copied. We use
4948 * a modified form of vm_fault_copy (which understands none-aligned
4949 * page offsets and sizes) to do the copy. We attempt to copy as
4950 * much memory in one go as possibly, however vm_fault_copy copies
4951 * within 1 memory object so we have to find the smaller of "amount left"
4952 * "source object data size" and "target object data size". With
4953 * unaligned data we don't need to split regions, therefore the source
4954 * (copy) object should be one map entry, the target range may be split
4955 * over multiple map entries however. In any event we are pessimistic
4956 * about these assumptions.
4957 *
4958 * Assumptions:
4959 * dst_map is locked on entry and is return locked on success,
4960 * unlocked on error.
4961 */
4962
4963 static kern_return_t
4964 vm_map_copy_overwrite_unaligned(
4965 vm_map_t dst_map,
4966 vm_map_entry_t entry,
4967 vm_map_copy_t copy,
4968 vm_map_offset_t start)
4969 {
4970 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
4971 vm_map_version_t version;
4972 vm_object_t dst_object;
4973 vm_object_offset_t dst_offset;
4974 vm_object_offset_t src_offset;
4975 vm_object_offset_t entry_offset;
4976 vm_map_offset_t entry_end;
4977 vm_map_size_t src_size,
4978 dst_size,
4979 copy_size,
4980 amount_left;
4981 kern_return_t kr = KERN_SUCCESS;
4982
4983 vm_map_lock_write_to_read(dst_map);
4984
4985 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
4986 amount_left = copy->size;
4987 /*
4988 * unaligned so we never clipped this entry, we need the offset into
4989 * the vm_object not just the data.
4990 */
4991 while (amount_left > 0) {
4992
4993 if (entry == vm_map_to_entry(dst_map)) {
4994 vm_map_unlock_read(dst_map);
4995 return KERN_INVALID_ADDRESS;
4996 }
4997
4998 /* "start" must be within the current map entry */
4999 assert ((start>=entry->vme_start) && (start<entry->vme_end));
5000
5001 dst_offset = start - entry->vme_start;
5002
5003 dst_size = entry->vme_end - start;
5004
5005 src_size = copy_entry->vme_end -
5006 (copy_entry->vme_start + src_offset);
5007
5008 if (dst_size < src_size) {
5009 /*
5010 * we can only copy dst_size bytes before
5011 * we have to get the next destination entry
5012 */
5013 copy_size = dst_size;
5014 } else {
5015 /*
5016 * we can only copy src_size bytes before
5017 * we have to get the next source copy entry
5018 */
5019 copy_size = src_size;
5020 }
5021
5022 if (copy_size > amount_left) {
5023 copy_size = amount_left;
5024 }
5025 /*
5026 * Entry needs copy, create a shadow shadow object for
5027 * Copy on write region.
5028 */
5029 if (entry->needs_copy &&
5030 ((entry->protection & VM_PROT_WRITE) != 0))
5031 {
5032 if (vm_map_lock_read_to_write(dst_map)) {
5033 vm_map_lock_read(dst_map);
5034 goto RetryLookup;
5035 }
5036 vm_object_shadow(&entry->object.vm_object,
5037 &entry->offset,
5038 (vm_map_size_t)(entry->vme_end
5039 - entry->vme_start));
5040 entry->needs_copy = FALSE;
5041 vm_map_lock_write_to_read(dst_map);
5042 }
5043 dst_object = entry->object.vm_object;
5044 /*
5045 * unlike with the virtual (aligned) copy we're going
5046 * to fault on it therefore we need a target object.
5047 */
5048 if (dst_object == VM_OBJECT_NULL) {
5049 if (vm_map_lock_read_to_write(dst_map)) {
5050 vm_map_lock_read(dst_map);
5051 goto RetryLookup;
5052 }
5053 dst_object = vm_object_allocate((vm_map_size_t)
5054 entry->vme_end - entry->vme_start);
5055 entry->object.vm_object = dst_object;
5056 entry->offset = 0;
5057 vm_map_lock_write_to_read(dst_map);
5058 }
5059 /*
5060 * Take an object reference and unlock map. The "entry" may
5061 * disappear or change when the map is unlocked.
5062 */
5063 vm_object_reference(dst_object);
5064 version.main_timestamp = dst_map->timestamp;
5065 entry_offset = entry->offset;
5066 entry_end = entry->vme_end;
5067 vm_map_unlock_read(dst_map);
5068 /*
5069 * Copy as much as possible in one pass
5070 */
5071 kr = vm_fault_copy(
5072 copy_entry->object.vm_object,
5073 copy_entry->offset + src_offset,
5074 &copy_size,
5075 dst_object,
5076 entry_offset + dst_offset,
5077 dst_map,
5078 &version,
5079 THREAD_UNINT );
5080
5081 start += copy_size;
5082 src_offset += copy_size;
5083 amount_left -= copy_size;
5084 /*
5085 * Release the object reference
5086 */
5087 vm_object_deallocate(dst_object);
5088 /*
5089 * If a hard error occurred, return it now
5090 */
5091 if (kr != KERN_SUCCESS)
5092 return kr;
5093
5094 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
5095 || amount_left == 0)
5096 {
5097 /*
5098 * all done with this copy entry, dispose.
5099 */
5100 vm_map_copy_entry_unlink(copy, copy_entry);
5101 vm_object_deallocate(copy_entry->object.vm_object);
5102 vm_map_copy_entry_dispose(copy, copy_entry);
5103
5104 if ((copy_entry = vm_map_copy_first_entry(copy))
5105 == vm_map_copy_to_entry(copy) && amount_left) {
5106 /*
5107 * not finished copying but run out of source
5108 */
5109 return KERN_INVALID_ADDRESS;
5110 }
5111 src_offset = 0;
5112 }
5113
5114 if (amount_left == 0)
5115 return KERN_SUCCESS;
5116
5117 vm_map_lock_read(dst_map);
5118 if (version.main_timestamp == dst_map->timestamp) {
5119 if (start == entry_end) {
5120 /*
5121 * destination region is split. Use the version
5122 * information to avoid a lookup in the normal
5123 * case.
5124 */
5125 entry = entry->vme_next;
5126 /*
5127 * should be contiguous. Fail if we encounter
5128 * a hole in the destination.
5129 */
5130 if (start != entry->vme_start) {
5131 vm_map_unlock_read(dst_map);
5132 return KERN_INVALID_ADDRESS ;
5133 }
5134 }
5135 } else {
5136 /*
5137 * Map version check failed.
5138 * we must lookup the entry because somebody
5139 * might have changed the map behind our backs.
5140 */
5141 RetryLookup:
5142 if (!vm_map_lookup_entry(dst_map, start, &entry))
5143 {
5144 vm_map_unlock_read(dst_map);
5145 return KERN_INVALID_ADDRESS ;
5146 }
5147 }
5148 }/* while */
5149
5150 return KERN_SUCCESS;
5151 }/* vm_map_copy_overwrite_unaligned */
5152
5153 /*
5154 * Routine: vm_map_copy_overwrite_aligned [internal use only]
5155 *
5156 * Description:
5157 * Does all the vm_trickery possible for whole pages.
5158 *
5159 * Implementation:
5160 *
5161 * If there are no permanent objects in the destination,
5162 * and the source and destination map entry zones match,
5163 * and the destination map entry is not shared,
5164 * then the map entries can be deleted and replaced
5165 * with those from the copy. The following code is the
5166 * basic idea of what to do, but there are lots of annoying
5167 * little details about getting protection and inheritance
5168 * right. Should add protection, inheritance, and sharing checks
5169 * to the above pass and make sure that no wiring is involved.
5170 */
5171
5172 static kern_return_t
5173 vm_map_copy_overwrite_aligned(
5174 vm_map_t dst_map,
5175 vm_map_entry_t tmp_entry,
5176 vm_map_copy_t copy,
5177 vm_map_offset_t start,
5178 #if !BAD_OPTIMIZATION
5179 __unused
5180 #endif /* !BAD_OPTIMIZATION */
5181 pmap_t pmap)
5182 {
5183 vm_object_t object;
5184 vm_map_entry_t copy_entry;
5185 vm_map_size_t copy_size;
5186 vm_map_size_t size;
5187 vm_map_entry_t entry;
5188
5189 while ((copy_entry = vm_map_copy_first_entry(copy))
5190 != vm_map_copy_to_entry(copy))
5191 {
5192 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5193
5194 entry = tmp_entry;
5195 if (entry == vm_map_to_entry(dst_map)) {
5196 vm_map_unlock(dst_map);
5197 return KERN_INVALID_ADDRESS;
5198 }
5199 size = (entry->vme_end - entry->vme_start);
5200 /*
5201 * Make sure that no holes popped up in the
5202 * address map, and that the protection is
5203 * still valid, in case the map was unlocked
5204 * earlier.
5205 */
5206
5207 if ((entry->vme_start != start) || ((entry->is_sub_map)
5208 && !entry->needs_copy)) {
5209 vm_map_unlock(dst_map);
5210 return(KERN_INVALID_ADDRESS);
5211 }
5212 assert(entry != vm_map_to_entry(dst_map));
5213
5214 /*
5215 * Check protection again
5216 */
5217
5218 if ( ! (entry->protection & VM_PROT_WRITE)) {
5219 vm_map_unlock(dst_map);
5220 return(KERN_PROTECTION_FAILURE);
5221 }
5222
5223 /*
5224 * Adjust to source size first
5225 */
5226
5227 if (copy_size < size) {
5228 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
5229 size = copy_size;
5230 }
5231
5232 /*
5233 * Adjust to destination size
5234 */
5235
5236 if (size < copy_size) {
5237 vm_map_copy_clip_end(copy, copy_entry,
5238 copy_entry->vme_start + size);
5239 copy_size = size;
5240 }
5241
5242 assert((entry->vme_end - entry->vme_start) == size);
5243 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
5244 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
5245
5246 /*
5247 * If the destination contains temporary unshared memory,
5248 * we can perform the copy by throwing it away and
5249 * installing the source data.
5250 */
5251
5252 object = entry->object.vm_object;
5253 if ((!entry->is_shared &&
5254 ((object == VM_OBJECT_NULL) ||
5255 (object->internal && !object->true_share))) ||
5256 entry->needs_copy) {
5257 vm_object_t old_object = entry->object.vm_object;
5258 vm_object_offset_t old_offset = entry->offset;
5259 vm_object_offset_t offset;
5260
5261 /*
5262 * Ensure that the source and destination aren't
5263 * identical
5264 */
5265 if (old_object == copy_entry->object.vm_object &&
5266 old_offset == copy_entry->offset) {
5267 vm_map_copy_entry_unlink(copy, copy_entry);
5268 vm_map_copy_entry_dispose(copy, copy_entry);
5269
5270 if (old_object != VM_OBJECT_NULL)
5271 vm_object_deallocate(old_object);
5272
5273 start = tmp_entry->vme_end;
5274 tmp_entry = tmp_entry->vme_next;
5275 continue;
5276 }
5277
5278 if (old_object != VM_OBJECT_NULL) {
5279 if(entry->is_sub_map) {
5280 if(entry->use_pmap) {
5281 #ifndef NO_NESTED_PMAP
5282 pmap_unnest(dst_map->pmap,
5283 (addr64_t)entry->vme_start);
5284 #endif /* NO_NESTED_PMAP */
5285 if(dst_map->mapped) {
5286 /* clean up parent */
5287 /* map/maps */
5288 vm_map_submap_pmap_clean(
5289 dst_map, entry->vme_start,
5290 entry->vme_end,
5291 entry->object.sub_map,
5292 entry->offset);
5293 }
5294 } else {
5295 vm_map_submap_pmap_clean(
5296 dst_map, entry->vme_start,
5297 entry->vme_end,
5298 entry->object.sub_map,
5299 entry->offset);
5300 }
5301 vm_map_deallocate(
5302 entry->object.sub_map);
5303 } else {
5304 if(dst_map->mapped) {
5305 vm_object_pmap_protect(
5306 entry->object.vm_object,
5307 entry->offset,
5308 entry->vme_end
5309 - entry->vme_start,
5310 PMAP_NULL,
5311 entry->vme_start,
5312 VM_PROT_NONE);
5313 } else {
5314 pmap_remove(dst_map->pmap,
5315 (addr64_t)(entry->vme_start),
5316 (addr64_t)(entry->vme_end));
5317 }
5318 vm_object_deallocate(old_object);
5319 }
5320 }
5321
5322 entry->is_sub_map = FALSE;
5323 entry->object = copy_entry->object;
5324 object = entry->object.vm_object;
5325 entry->needs_copy = copy_entry->needs_copy;
5326 entry->wired_count = 0;
5327 entry->user_wired_count = 0;
5328 offset = entry->offset = copy_entry->offset;
5329
5330 vm_map_copy_entry_unlink(copy, copy_entry);
5331 vm_map_copy_entry_dispose(copy, copy_entry);
5332 #if BAD_OPTIMIZATION
5333 /*
5334 * if we turn this optimization back on
5335 * we need to revisit our use of pmap mappings
5336 * large copies will cause us to run out and panic
5337 * this optimization only saved on average 2 us per page if ALL
5338 * the pages in the source were currently mapped
5339 * and ALL the pages in the dest were touched, if there were fewer
5340 * than 2/3 of the pages touched, this optimization actually cost more cycles
5341 */
5342
5343 /*
5344 * Try to aggressively enter physical mappings
5345 * (but avoid uninstantiated objects)
5346 */
5347 if (object != VM_OBJECT_NULL) {
5348 vm_map_offset_t va = entry->vme_start;
5349
5350 while (va < entry->vme_end) {
5351 register vm_page_t m;
5352 vm_prot_t prot;
5353
5354 /*
5355 * Look for the page in the top object
5356 */
5357 prot = entry->protection;
5358 vm_object_lock(object);
5359 vm_object_paging_begin(object);
5360
5361 /*
5362 * ENCRYPTED SWAP:
5363 * If the page is encrypted, skip it:
5364 * we can't let the user see the encrypted
5365 * contents. The page will get decrypted
5366 * on demand when the user generates a
5367 * soft-fault when trying to access it.
5368 */
5369 if ((m = vm_page_lookup(object,offset)) !=
5370 VM_PAGE_NULL && !m->busy &&
5371 !m->fictitious && !m->encrypted &&
5372 (!m->unusual || (!m->error &&
5373 !m->restart && !m->absent &&
5374 (prot & m->page_lock) == 0))) {
5375
5376 m->busy = TRUE;
5377 vm_object_unlock(object);
5378
5379 /*
5380 * Honor COW obligations
5381 */
5382 if (entry->needs_copy)
5383 prot &= ~VM_PROT_WRITE;
5384 #ifdef STACK_ONLY_NX
5385 if (entry->alias != VM_MEMORY_STACK && prot)
5386 prot |= VM_PROT_EXECUTE;
5387 #endif
5388 /* It is our policy to require */
5389 /* explicit sync from anyone */
5390 /* writing code and then */
5391 /* a pc to execute it. */
5392 /* No isync here */
5393
5394 PMAP_ENTER(pmap, va, m, prot,
5395 ((unsigned int)
5396 (m->object->wimg_bits))
5397 & VM_WIMG_MASK,
5398 FALSE);
5399
5400 vm_object_lock(object);
5401 vm_page_lock_queues();
5402 if (!m->active && !m->inactive)
5403 vm_page_activate(m);
5404 vm_page_unlock_queues();
5405 PAGE_WAKEUP_DONE(m);
5406 }
5407 vm_object_paging_end(object);
5408 vm_object_unlock(object);
5409
5410 offset += PAGE_SIZE_64;
5411 va += PAGE_SIZE;
5412 } /* end while (va < entry->vme_end) */
5413 } /* end if (object) */
5414 #endif
5415 /*
5416 * Set up for the next iteration. The map
5417 * has not been unlocked, so the next
5418 * address should be at the end of this
5419 * entry, and the next map entry should be
5420 * the one following it.
5421 */
5422
5423 start = tmp_entry->vme_end;
5424 tmp_entry = tmp_entry->vme_next;
5425 } else {
5426 vm_map_version_t version;
5427 vm_object_t dst_object = entry->object.vm_object;
5428 vm_object_offset_t dst_offset = entry->offset;
5429 kern_return_t r;
5430
5431 /*
5432 * Take an object reference, and record
5433 * the map version information so that the
5434 * map can be safely unlocked.
5435 */
5436
5437 vm_object_reference(dst_object);
5438
5439 /* account for unlock bumping up timestamp */
5440 version.main_timestamp = dst_map->timestamp + 1;
5441
5442 vm_map_unlock(dst_map);
5443
5444 /*
5445 * Copy as much as possible in one pass
5446 */
5447
5448 copy_size = size;
5449 r = vm_fault_copy(
5450 copy_entry->object.vm_object,
5451 copy_entry->offset,
5452 &copy_size,
5453 dst_object,
5454 dst_offset,
5455 dst_map,
5456 &version,
5457 THREAD_UNINT );
5458
5459 /*
5460 * Release the object reference
5461 */
5462
5463 vm_object_deallocate(dst_object);
5464
5465 /*
5466 * If a hard error occurred, return it now
5467 */
5468
5469 if (r != KERN_SUCCESS)
5470 return(r);
5471
5472 if (copy_size != 0) {
5473 /*
5474 * Dispose of the copied region
5475 */
5476
5477 vm_map_copy_clip_end(copy, copy_entry,
5478 copy_entry->vme_start + copy_size);
5479 vm_map_copy_entry_unlink(copy, copy_entry);
5480 vm_object_deallocate(copy_entry->object.vm_object);
5481 vm_map_copy_entry_dispose(copy, copy_entry);
5482 }
5483
5484 /*
5485 * Pick up in the destination map where we left off.
5486 *
5487 * Use the version information to avoid a lookup
5488 * in the normal case.
5489 */
5490
5491 start += copy_size;
5492 vm_map_lock(dst_map);
5493 if (version.main_timestamp == dst_map->timestamp) {
5494 /* We can safely use saved tmp_entry value */
5495
5496 vm_map_clip_end(dst_map, tmp_entry, start);
5497 tmp_entry = tmp_entry->vme_next;
5498 } else {
5499 /* Must do lookup of tmp_entry */
5500
5501 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
5502 vm_map_unlock(dst_map);
5503 return(KERN_INVALID_ADDRESS);
5504 }
5505 vm_map_clip_start(dst_map, tmp_entry, start);
5506 }
5507 }
5508 }/* while */
5509
5510 return(KERN_SUCCESS);
5511 }/* vm_map_copy_overwrite_aligned */
5512
5513 /*
5514 * Routine: vm_map_copyin_kernel_buffer [internal use only]
5515 *
5516 * Description:
5517 * Copy in data to a kernel buffer from space in the
5518 * source map. The original space may be optionally
5519 * deallocated.
5520 *
5521 * If successful, returns a new copy object.
5522 */
5523 static kern_return_t
5524 vm_map_copyin_kernel_buffer(
5525 vm_map_t src_map,
5526 vm_map_offset_t src_addr,
5527 vm_map_size_t len,
5528 boolean_t src_destroy,
5529 vm_map_copy_t *copy_result)
5530 {
5531 kern_return_t kr;
5532 vm_map_copy_t copy;
5533 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
5534
5535 copy = (vm_map_copy_t) kalloc(kalloc_size);
5536 if (copy == VM_MAP_COPY_NULL) {
5537 return KERN_RESOURCE_SHORTAGE;
5538 }
5539 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
5540 copy->size = len;
5541 copy->offset = 0;
5542 copy->cpy_kdata = (void *) (copy + 1);
5543 copy->cpy_kalloc_size = kalloc_size;
5544
5545 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
5546 if (kr != KERN_SUCCESS) {
5547 kfree(copy, kalloc_size);
5548 return kr;
5549 }
5550 if (src_destroy) {
5551 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
5552 vm_map_round_page(src_addr + len),
5553 VM_MAP_REMOVE_INTERRUPTIBLE |
5554 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
5555 (src_map == kernel_map) ?
5556 VM_MAP_REMOVE_KUNWIRE : 0);
5557 }
5558 *copy_result = copy;
5559 return KERN_SUCCESS;
5560 }
5561
5562 /*
5563 * Routine: vm_map_copyout_kernel_buffer [internal use only]
5564 *
5565 * Description:
5566 * Copy out data from a kernel buffer into space in the
5567 * destination map. The space may be otpionally dynamically
5568 * allocated.
5569 *
5570 * If successful, consumes the copy object.
5571 * Otherwise, the caller is responsible for it.
5572 */
5573 static int vm_map_copyout_kernel_buffer_failures = 0;
5574 static kern_return_t
5575 vm_map_copyout_kernel_buffer(
5576 vm_map_t map,
5577 vm_map_address_t *addr, /* IN/OUT */
5578 vm_map_copy_t copy,
5579 boolean_t overwrite)
5580 {
5581 kern_return_t kr = KERN_SUCCESS;
5582 thread_t thread = current_thread();
5583
5584 if (!overwrite) {
5585
5586 /*
5587 * Allocate space in the target map for the data
5588 */
5589 *addr = 0;
5590 kr = vm_map_enter(map,
5591 addr,
5592 vm_map_round_page(copy->size),
5593 (vm_map_offset_t) 0,
5594 VM_FLAGS_ANYWHERE,
5595 VM_OBJECT_NULL,
5596 (vm_object_offset_t) 0,
5597 FALSE,
5598 VM_PROT_DEFAULT,
5599 VM_PROT_ALL,
5600 VM_INHERIT_DEFAULT);
5601 if (kr != KERN_SUCCESS)
5602 return kr;
5603 }
5604
5605 /*
5606 * Copyout the data from the kernel buffer to the target map.
5607 */
5608 if (thread->map == map) {
5609
5610 /*
5611 * If the target map is the current map, just do
5612 * the copy.
5613 */
5614 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5615 kr = KERN_INVALID_ADDRESS;
5616 }
5617 }
5618 else {
5619 vm_map_t oldmap;
5620
5621 /*
5622 * If the target map is another map, assume the
5623 * target's address space identity for the duration
5624 * of the copy.
5625 */
5626 vm_map_reference(map);
5627 oldmap = vm_map_switch(map);
5628
5629 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
5630 vm_map_copyout_kernel_buffer_failures++;
5631 kr = KERN_INVALID_ADDRESS;
5632 }
5633
5634 (void) vm_map_switch(oldmap);
5635 vm_map_deallocate(map);
5636 }
5637
5638 if (kr != KERN_SUCCESS) {
5639 /* the copy failed, clean up */
5640 if (!overwrite) {
5641 /*
5642 * Deallocate the space we allocated in the target map.
5643 */
5644 (void) vm_map_remove(map,
5645 vm_map_trunc_page(*addr),
5646 vm_map_round_page(*addr +
5647 vm_map_round_page(copy->size)),
5648 VM_MAP_NO_FLAGS);
5649 *addr = 0;
5650 }
5651 } else {
5652 /* copy was successful, dicard the copy structure */
5653 kfree(copy, copy->cpy_kalloc_size);
5654 }
5655
5656 return kr;
5657 }
5658
5659 /*
5660 * Macro: vm_map_copy_insert
5661 *
5662 * Description:
5663 * Link a copy chain ("copy") into a map at the
5664 * specified location (after "where").
5665 * Side effects:
5666 * The copy chain is destroyed.
5667 * Warning:
5668 * The arguments are evaluated multiple times.
5669 */
5670 #define vm_map_copy_insert(map, where, copy) \
5671 MACRO_BEGIN \
5672 vm_map_t VMCI_map; \
5673 vm_map_entry_t VMCI_where; \
5674 vm_map_copy_t VMCI_copy; \
5675 VMCI_map = (map); \
5676 VMCI_where = (where); \
5677 VMCI_copy = (copy); \
5678 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
5679 ->vme_next = (VMCI_where->vme_next); \
5680 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
5681 ->vme_prev = VMCI_where; \
5682 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
5683 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
5684 zfree(vm_map_copy_zone, VMCI_copy); \
5685 MACRO_END
5686
5687 /*
5688 * Routine: vm_map_copyout
5689 *
5690 * Description:
5691 * Copy out a copy chain ("copy") into newly-allocated
5692 * space in the destination map.
5693 *
5694 * If successful, consumes the copy object.
5695 * Otherwise, the caller is responsible for it.
5696 */
5697 kern_return_t
5698 vm_map_copyout(
5699 vm_map_t dst_map,
5700 vm_map_address_t *dst_addr, /* OUT */
5701 vm_map_copy_t copy)
5702 {
5703 vm_map_size_t size;
5704 vm_map_size_t adjustment;
5705 vm_map_offset_t start;
5706 vm_object_offset_t vm_copy_start;
5707 vm_map_entry_t last;
5708 register
5709 vm_map_entry_t entry;
5710
5711 /*
5712 * Check for null copy object.
5713 */
5714
5715 if (copy == VM_MAP_COPY_NULL) {
5716 *dst_addr = 0;
5717 return(KERN_SUCCESS);
5718 }
5719
5720 /*
5721 * Check for special copy object, created
5722 * by vm_map_copyin_object.
5723 */
5724
5725 if (copy->type == VM_MAP_COPY_OBJECT) {
5726 vm_object_t object = copy->cpy_object;
5727 kern_return_t kr;
5728 vm_object_offset_t offset;
5729
5730 offset = vm_object_trunc_page(copy->offset);
5731 size = vm_map_round_page(copy->size +
5732 (vm_map_size_t)(copy->offset - offset));
5733 *dst_addr = 0;
5734 kr = vm_map_enter(dst_map, dst_addr, size,
5735 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
5736 object, offset, FALSE,
5737 VM_PROT_DEFAULT, VM_PROT_ALL,
5738 VM_INHERIT_DEFAULT);
5739 if (kr != KERN_SUCCESS)
5740 return(kr);
5741 /* Account for non-pagealigned copy object */
5742 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
5743 zfree(vm_map_copy_zone, copy);
5744 return(KERN_SUCCESS);
5745 }
5746
5747 /*
5748 * Check for special kernel buffer allocated
5749 * by new_ipc_kmsg_copyin.
5750 */
5751
5752 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5753 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
5754 copy, FALSE));
5755 }
5756
5757 /*
5758 * Find space for the data
5759 */
5760
5761 vm_copy_start = vm_object_trunc_page(copy->offset);
5762 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
5763 - vm_copy_start;
5764
5765 StartAgain: ;
5766
5767 vm_map_lock(dst_map);
5768 assert(first_free_is_valid(dst_map));
5769 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
5770 vm_map_min(dst_map) : last->vme_end;
5771
5772 while (TRUE) {
5773 vm_map_entry_t next = last->vme_next;
5774 vm_map_offset_t end = start + size;
5775
5776 if ((end > dst_map->max_offset) || (end < start)) {
5777 if (dst_map->wait_for_space) {
5778 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
5779 assert_wait((event_t) dst_map,
5780 THREAD_INTERRUPTIBLE);
5781 vm_map_unlock(dst_map);
5782 thread_block(THREAD_CONTINUE_NULL);
5783 goto StartAgain;
5784 }
5785 }
5786 vm_map_unlock(dst_map);
5787 return(KERN_NO_SPACE);
5788 }
5789
5790 if ((next == vm_map_to_entry(dst_map)) ||
5791 (next->vme_start >= end))
5792 break;
5793
5794 last = next;
5795 start = last->vme_end;
5796 }
5797
5798 /*
5799 * Since we're going to just drop the map
5800 * entries from the copy into the destination
5801 * map, they must come from the same pool.
5802 */
5803
5804 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
5805 /*
5806 * Mismatches occur when dealing with the default
5807 * pager.
5808 */
5809 zone_t old_zone;
5810 vm_map_entry_t next, new;
5811
5812 /*
5813 * Find the zone that the copies were allocated from
5814 */
5815 old_zone = (copy->cpy_hdr.entries_pageable)
5816 ? vm_map_entry_zone
5817 : vm_map_kentry_zone;
5818 entry = vm_map_copy_first_entry(copy);
5819
5820 /*
5821 * Reinitialize the copy so that vm_map_copy_entry_link
5822 * will work.
5823 */
5824 copy->cpy_hdr.nentries = 0;
5825 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
5826 vm_map_copy_first_entry(copy) =
5827 vm_map_copy_last_entry(copy) =
5828 vm_map_copy_to_entry(copy);
5829
5830 /*
5831 * Copy each entry.
5832 */
5833 while (entry != vm_map_copy_to_entry(copy)) {
5834 new = vm_map_copy_entry_create(copy);
5835 vm_map_entry_copy_full(new, entry);
5836 new->use_pmap = FALSE; /* clr address space specifics */
5837 vm_map_copy_entry_link(copy,
5838 vm_map_copy_last_entry(copy),
5839 new);
5840 next = entry->vme_next;
5841 zfree(old_zone, entry);
5842 entry = next;
5843 }
5844 }
5845
5846 /*
5847 * Adjust the addresses in the copy chain, and
5848 * reset the region attributes.
5849 */
5850
5851 adjustment = start - vm_copy_start;
5852 for (entry = vm_map_copy_first_entry(copy);
5853 entry != vm_map_copy_to_entry(copy);
5854 entry = entry->vme_next) {
5855 entry->vme_start += adjustment;
5856 entry->vme_end += adjustment;
5857
5858 entry->inheritance = VM_INHERIT_DEFAULT;
5859 entry->protection = VM_PROT_DEFAULT;
5860 entry->max_protection = VM_PROT_ALL;
5861 entry->behavior = VM_BEHAVIOR_DEFAULT;
5862
5863 /*
5864 * If the entry is now wired,
5865 * map the pages into the destination map.
5866 */
5867 if (entry->wired_count != 0) {
5868 register vm_map_offset_t va;
5869 vm_object_offset_t offset;
5870 register vm_object_t object;
5871 vm_prot_t prot;
5872
5873 object = entry->object.vm_object;
5874 offset = entry->offset;
5875 va = entry->vme_start;
5876
5877 pmap_pageable(dst_map->pmap,
5878 entry->vme_start,
5879 entry->vme_end,
5880 TRUE);
5881
5882 while (va < entry->vme_end) {
5883 register vm_page_t m;
5884
5885 /*
5886 * Look up the page in the object.
5887 * Assert that the page will be found in the
5888 * top object:
5889 * either
5890 * the object was newly created by
5891 * vm_object_copy_slowly, and has
5892 * copies of all of the pages from
5893 * the source object
5894 * or
5895 * the object was moved from the old
5896 * map entry; because the old map
5897 * entry was wired, all of the pages
5898 * were in the top-level object.
5899 * (XXX not true if we wire pages for
5900 * reading)
5901 */
5902 vm_object_lock(object);
5903 vm_object_paging_begin(object);
5904
5905 m = vm_page_lookup(object, offset);
5906 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
5907 m->absent)
5908 panic("vm_map_copyout: wiring 0x%x", m);
5909
5910 /*
5911 * ENCRYPTED SWAP:
5912 * The page is assumed to be wired here, so it
5913 * shouldn't be encrypted. Otherwise, we
5914 * couldn't enter it in the page table, since
5915 * we don't want the user to see the encrypted
5916 * data.
5917 */
5918 ASSERT_PAGE_DECRYPTED(m);
5919
5920 m->busy = TRUE;
5921 vm_object_unlock(object);
5922 prot = entry->protection;
5923 #ifdef STACK_ONLY_NX
5924 if (entry->alias != VM_MEMORY_STACK && prot)
5925 prot |= VM_PROT_EXECUTE;
5926 #endif
5927 PMAP_ENTER(dst_map->pmap, va, m, prot,
5928 ((unsigned int)
5929 (m->object->wimg_bits))
5930 & VM_WIMG_MASK,
5931 TRUE);
5932
5933 vm_object_lock(object);
5934 PAGE_WAKEUP_DONE(m);
5935 /* the page is wired, so we don't have to activate */
5936 vm_object_paging_end(object);
5937 vm_object_unlock(object);
5938
5939 offset += PAGE_SIZE_64;
5940 va += PAGE_SIZE;
5941 }
5942 }
5943 else if (size <= vm_map_aggressive_enter_max) {
5944
5945 register vm_map_offset_t va;
5946 vm_object_offset_t offset;
5947 register vm_object_t object;
5948 vm_prot_t prot;
5949
5950 object = entry->object.vm_object;
5951 if (object != VM_OBJECT_NULL) {
5952
5953 offset = entry->offset;
5954 va = entry->vme_start;
5955 while (va < entry->vme_end) {
5956 register vm_page_t m;
5957
5958 /*
5959 * Look up the page in the object.
5960 * Assert that the page will be found
5961 * in the top object if at all...
5962 */
5963 vm_object_lock(object);
5964 vm_object_paging_begin(object);
5965
5966 /*
5967 * ENCRYPTED SWAP:
5968 * If the page is encrypted, skip it:
5969 * we can't let the user see the
5970 * encrypted contents. The page will
5971 * get decrypted on demand when the
5972 * user generates a soft-fault when
5973 * trying to access it.
5974 */
5975 if (((m = vm_page_lookup(object,
5976 offset))
5977 != VM_PAGE_NULL) &&
5978 !m->busy && !m->fictitious &&
5979 !m->encrypted &&
5980 !m->absent && !m->error) {
5981 m->busy = TRUE;
5982 vm_object_unlock(object);
5983
5984 /* honor cow obligations */
5985 prot = entry->protection;
5986 if (entry->needs_copy)
5987 prot &= ~VM_PROT_WRITE;
5988 #ifdef STACK_ONLY_NX
5989 if (entry->alias != VM_MEMORY_STACK && prot)
5990 prot |= VM_PROT_EXECUTE;
5991 #endif
5992 PMAP_ENTER(dst_map->pmap, va,
5993 m, prot,
5994 ((unsigned int)
5995 (m->object->wimg_bits))
5996 & VM_WIMG_MASK,
5997 FALSE);
5998
5999 vm_object_lock(object);
6000 vm_page_lock_queues();
6001 if (!m->active && !m->inactive)
6002 vm_page_activate(m);
6003 vm_page_unlock_queues();
6004 PAGE_WAKEUP_DONE(m);
6005 }
6006 vm_object_paging_end(object);
6007 vm_object_unlock(object);
6008
6009 offset += PAGE_SIZE_64;
6010 va += PAGE_SIZE;
6011 }
6012 }
6013 }
6014 }
6015
6016 /*
6017 * Correct the page alignment for the result
6018 */
6019
6020 *dst_addr = start + (copy->offset - vm_copy_start);
6021
6022 /*
6023 * Update the hints and the map size
6024 */
6025
6026 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6027
6028 dst_map->size += size;
6029
6030 /*
6031 * Link in the copy
6032 */
6033
6034 vm_map_copy_insert(dst_map, last, copy);
6035
6036 vm_map_unlock(dst_map);
6037
6038 /*
6039 * XXX If wiring_required, call vm_map_pageable
6040 */
6041
6042 return(KERN_SUCCESS);
6043 }
6044
6045 /*
6046 * Routine: vm_map_copyin
6047 *
6048 * Description:
6049 * Copy the specified region (src_addr, len) from the
6050 * source address space (src_map), possibly removing
6051 * the region from the source address space (src_destroy).
6052 *
6053 * Returns:
6054 * A vm_map_copy_t object (copy_result), suitable for
6055 * insertion into another address space (using vm_map_copyout),
6056 * copying over another address space region (using
6057 * vm_map_copy_overwrite). If the copy is unused, it
6058 * should be destroyed (using vm_map_copy_discard).
6059 *
6060 * In/out conditions:
6061 * The source map should not be locked on entry.
6062 */
6063
6064 typedef struct submap_map {
6065 vm_map_t parent_map;
6066 vm_map_offset_t base_start;
6067 vm_map_offset_t base_end;
6068 struct submap_map *next;
6069 } submap_map_t;
6070
6071 kern_return_t
6072 vm_map_copyin_common(
6073 vm_map_t src_map,
6074 vm_map_address_t src_addr,
6075 vm_map_size_t len,
6076 boolean_t src_destroy,
6077 __unused boolean_t src_volatile,
6078 vm_map_copy_t *copy_result, /* OUT */
6079 boolean_t use_maxprot)
6080 {
6081 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6082 * in multi-level lookup, this
6083 * entry contains the actual
6084 * vm_object/offset.
6085 */
6086 register
6087 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6088
6089 vm_map_offset_t src_start; /* Start of current entry --
6090 * where copy is taking place now
6091 */
6092 vm_map_offset_t src_end; /* End of entire region to be
6093 * copied */
6094 vm_map_t base_map = src_map;
6095 boolean_t map_share=FALSE;
6096 submap_map_t *parent_maps = NULL;
6097
6098 register
6099 vm_map_copy_t copy; /* Resulting copy */
6100 vm_map_address_t copy_addr;
6101
6102 /*
6103 * Check for copies of zero bytes.
6104 */
6105
6106 if (len == 0) {
6107 *copy_result = VM_MAP_COPY_NULL;
6108 return(KERN_SUCCESS);
6109 }
6110
6111 /*
6112 * Check that the end address doesn't overflow
6113 */
6114 src_end = src_addr + len;
6115 if (src_end < src_addr)
6116 return KERN_INVALID_ADDRESS;
6117
6118 /*
6119 * If the copy is sufficiently small, use a kernel buffer instead
6120 * of making a virtual copy. The theory being that the cost of
6121 * setting up VM (and taking C-O-W faults) dominates the copy costs
6122 * for small regions.
6123 */
6124 if ((len < msg_ool_size_small) && !use_maxprot)
6125 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6126 src_destroy, copy_result);
6127
6128 /*
6129 * Compute (page aligned) start and end of region
6130 */
6131 src_start = vm_map_trunc_page(src_addr);
6132 src_end = vm_map_round_page(src_end);
6133
6134 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6135
6136 /*
6137 * Allocate a header element for the list.
6138 *
6139 * Use the start and end in the header to
6140 * remember the endpoints prior to rounding.
6141 */
6142
6143 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6144 vm_map_copy_first_entry(copy) =
6145 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6146 copy->type = VM_MAP_COPY_ENTRY_LIST;
6147 copy->cpy_hdr.nentries = 0;
6148 copy->cpy_hdr.entries_pageable = TRUE;
6149
6150 copy->offset = src_addr;
6151 copy->size = len;
6152
6153 new_entry = vm_map_copy_entry_create(copy);
6154
6155 #define RETURN(x) \
6156 MACRO_BEGIN \
6157 vm_map_unlock(src_map); \
6158 if(src_map != base_map) \
6159 vm_map_deallocate(src_map); \
6160 if (new_entry != VM_MAP_ENTRY_NULL) \
6161 vm_map_copy_entry_dispose(copy,new_entry); \
6162 vm_map_copy_discard(copy); \
6163 { \
6164 submap_map_t *_ptr; \
6165 \
6166 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6167 parent_maps=parent_maps->next; \
6168 if (_ptr->parent_map != base_map) \
6169 vm_map_deallocate(_ptr->parent_map); \
6170 kfree(_ptr, sizeof(submap_map_t)); \
6171 } \
6172 } \
6173 MACRO_RETURN(x); \
6174 MACRO_END
6175
6176 /*
6177 * Find the beginning of the region.
6178 */
6179
6180 vm_map_lock(src_map);
6181
6182 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6183 RETURN(KERN_INVALID_ADDRESS);
6184 if(!tmp_entry->is_sub_map) {
6185 vm_map_clip_start(src_map, tmp_entry, src_start);
6186 }
6187 /* set for later submap fix-up */
6188 copy_addr = src_start;
6189
6190 /*
6191 * Go through entries until we get to the end.
6192 */
6193
6194 while (TRUE) {
6195 register
6196 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
6197 vm_map_size_t src_size; /* Size of source
6198 * map entry (in both
6199 * maps)
6200 */
6201
6202 register
6203 vm_object_t src_object; /* Object to copy */
6204 vm_object_offset_t src_offset;
6205
6206 boolean_t src_needs_copy; /* Should source map
6207 * be made read-only
6208 * for copy-on-write?
6209 */
6210
6211 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6212
6213 boolean_t was_wired; /* Was source wired? */
6214 vm_map_version_t version; /* Version before locks
6215 * dropped to make copy
6216 */
6217 kern_return_t result; /* Return value from
6218 * copy_strategically.
6219 */
6220 while(tmp_entry->is_sub_map) {
6221 vm_map_size_t submap_len;
6222 submap_map_t *ptr;
6223
6224 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
6225 ptr->next = parent_maps;
6226 parent_maps = ptr;
6227 ptr->parent_map = src_map;
6228 ptr->base_start = src_start;
6229 ptr->base_end = src_end;
6230 submap_len = tmp_entry->vme_end - src_start;
6231 if(submap_len > (src_end-src_start))
6232 submap_len = src_end-src_start;
6233 ptr->base_start += submap_len;
6234
6235 src_start -= tmp_entry->vme_start;
6236 src_start += tmp_entry->offset;
6237 src_end = src_start + submap_len;
6238 src_map = tmp_entry->object.sub_map;
6239 vm_map_lock(src_map);
6240 /* keep an outstanding reference for all maps in */
6241 /* the parents tree except the base map */
6242 vm_map_reference(src_map);
6243 vm_map_unlock(ptr->parent_map);
6244 if (!vm_map_lookup_entry(
6245 src_map, src_start, &tmp_entry))
6246 RETURN(KERN_INVALID_ADDRESS);
6247 map_share = TRUE;
6248 if(!tmp_entry->is_sub_map)
6249 vm_map_clip_start(src_map, tmp_entry, src_start);
6250 src_entry = tmp_entry;
6251 }
6252 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
6253 (tmp_entry->object.vm_object->phys_contiguous)) {
6254 /* This is not, supported for now.In future */
6255 /* we will need to detect the phys_contig */
6256 /* condition and then upgrade copy_slowly */
6257 /* to do physical copy from the device mem */
6258 /* based object. We can piggy-back off of */
6259 /* the was wired boolean to set-up the */
6260 /* proper handling */
6261 RETURN(KERN_PROTECTION_FAILURE);
6262 }
6263 /*
6264 * Create a new address map entry to hold the result.
6265 * Fill in the fields from the appropriate source entries.
6266 * We must unlock the source map to do this if we need
6267 * to allocate a map entry.
6268 */
6269 if (new_entry == VM_MAP_ENTRY_NULL) {
6270 version.main_timestamp = src_map->timestamp;
6271 vm_map_unlock(src_map);
6272
6273 new_entry = vm_map_copy_entry_create(copy);
6274
6275 vm_map_lock(src_map);
6276 if ((version.main_timestamp + 1) != src_map->timestamp) {
6277 if (!vm_map_lookup_entry(src_map, src_start,
6278 &tmp_entry)) {
6279 RETURN(KERN_INVALID_ADDRESS);
6280 }
6281 vm_map_clip_start(src_map, tmp_entry, src_start);
6282 continue; /* restart w/ new tmp_entry */
6283 }
6284 }
6285
6286 /*
6287 * Verify that the region can be read.
6288 */
6289 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
6290 !use_maxprot) ||
6291 (src_entry->max_protection & VM_PROT_READ) == 0)
6292 RETURN(KERN_PROTECTION_FAILURE);
6293
6294 /*
6295 * Clip against the endpoints of the entire region.
6296 */
6297
6298 vm_map_clip_end(src_map, src_entry, src_end);
6299
6300 src_size = src_entry->vme_end - src_start;
6301 src_object = src_entry->object.vm_object;
6302 src_offset = src_entry->offset;
6303 was_wired = (src_entry->wired_count != 0);
6304
6305 vm_map_entry_copy(new_entry, src_entry);
6306 new_entry->use_pmap = FALSE; /* clr address space specifics */
6307
6308 /*
6309 * Attempt non-blocking copy-on-write optimizations.
6310 */
6311
6312 if (src_destroy &&
6313 (src_object == VM_OBJECT_NULL ||
6314 (src_object->internal && !src_object->true_share
6315 && !map_share))) {
6316 /*
6317 * If we are destroying the source, and the object
6318 * is internal, we can move the object reference
6319 * from the source to the copy. The copy is
6320 * copy-on-write only if the source is.
6321 * We make another reference to the object, because
6322 * destroying the source entry will deallocate it.
6323 */
6324 vm_object_reference(src_object);
6325
6326 /*
6327 * Copy is always unwired. vm_map_copy_entry
6328 * set its wired count to zero.
6329 */
6330
6331 goto CopySuccessful;
6332 }
6333
6334
6335 RestartCopy:
6336 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
6337 src_object, new_entry, new_entry->object.vm_object,
6338 was_wired, 0);
6339 if ((src_object == VM_OBJECT_NULL ||
6340 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
6341 vm_object_copy_quickly(
6342 &new_entry->object.vm_object,
6343 src_offset,
6344 src_size,
6345 &src_needs_copy,
6346 &new_entry_needs_copy)) {
6347
6348 new_entry->needs_copy = new_entry_needs_copy;
6349
6350 /*
6351 * Handle copy-on-write obligations
6352 */
6353
6354 if (src_needs_copy && !tmp_entry->needs_copy) {
6355 vm_prot_t prot;
6356
6357 prot = src_entry->protection & ~VM_PROT_WRITE;
6358 #ifdef STACK_ONLY_NX
6359 if (src_entry->alias != VM_MEMORY_STACK && prot)
6360 prot |= VM_PROT_EXECUTE;
6361 #endif
6362 vm_object_pmap_protect(
6363 src_object,
6364 src_offset,
6365 src_size,
6366 (src_entry->is_shared ?
6367 PMAP_NULL
6368 : src_map->pmap),
6369 src_entry->vme_start,
6370 prot);
6371
6372 tmp_entry->needs_copy = TRUE;
6373 }
6374
6375 /*
6376 * The map has never been unlocked, so it's safe
6377 * to move to the next entry rather than doing
6378 * another lookup.
6379 */
6380
6381 goto CopySuccessful;
6382 }
6383
6384 /*
6385 * Take an object reference, so that we may
6386 * release the map lock(s).
6387 */
6388
6389 assert(src_object != VM_OBJECT_NULL);
6390 vm_object_reference(src_object);
6391
6392 /*
6393 * Record the timestamp for later verification.
6394 * Unlock the map.
6395 */
6396
6397 version.main_timestamp = src_map->timestamp;
6398 vm_map_unlock(src_map); /* Increments timestamp once! */
6399
6400 /*
6401 * Perform the copy
6402 */
6403
6404 if (was_wired) {
6405 CopySlowly:
6406 vm_object_lock(src_object);
6407 result = vm_object_copy_slowly(
6408 src_object,
6409 src_offset,
6410 src_size,
6411 THREAD_UNINT,
6412 &new_entry->object.vm_object);
6413 new_entry->offset = 0;
6414 new_entry->needs_copy = FALSE;
6415
6416 }
6417 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6418 (tmp_entry->is_shared || map_share)) {
6419 vm_object_t new_object;
6420
6421 vm_object_lock(src_object);
6422 new_object = vm_object_copy_delayed(
6423 src_object,
6424 src_offset,
6425 src_size);
6426 if (new_object == VM_OBJECT_NULL)
6427 goto CopySlowly;
6428
6429 new_entry->object.vm_object = new_object;
6430 new_entry->needs_copy = TRUE;
6431 result = KERN_SUCCESS;
6432
6433 } else {
6434 result = vm_object_copy_strategically(src_object,
6435 src_offset,
6436 src_size,
6437 &new_entry->object.vm_object,
6438 &new_entry->offset,
6439 &new_entry_needs_copy);
6440
6441 new_entry->needs_copy = new_entry_needs_copy;
6442 }
6443
6444 if (result != KERN_SUCCESS &&
6445 result != KERN_MEMORY_RESTART_COPY) {
6446 vm_map_lock(src_map);
6447 RETURN(result);
6448 }
6449
6450 /*
6451 * Throw away the extra reference
6452 */
6453
6454 vm_object_deallocate(src_object);
6455
6456 /*
6457 * Verify that the map has not substantially
6458 * changed while the copy was being made.
6459 */
6460
6461 vm_map_lock(src_map);
6462
6463 if ((version.main_timestamp + 1) == src_map->timestamp)
6464 goto VerificationSuccessful;
6465
6466 /*
6467 * Simple version comparison failed.
6468 *
6469 * Retry the lookup and verify that the
6470 * same object/offset are still present.
6471 *
6472 * [Note: a memory manager that colludes with
6473 * the calling task can detect that we have
6474 * cheated. While the map was unlocked, the
6475 * mapping could have been changed and restored.]
6476 */
6477
6478 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
6479 RETURN(KERN_INVALID_ADDRESS);
6480 }
6481
6482 src_entry = tmp_entry;
6483 vm_map_clip_start(src_map, src_entry, src_start);
6484
6485 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
6486 !use_maxprot) ||
6487 ((src_entry->max_protection & VM_PROT_READ) == 0))
6488 goto VerificationFailed;
6489
6490 if (src_entry->vme_end < new_entry->vme_end)
6491 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
6492
6493 if ((src_entry->object.vm_object != src_object) ||
6494 (src_entry->offset != src_offset) ) {
6495
6496 /*
6497 * Verification failed.
6498 *
6499 * Start over with this top-level entry.
6500 */
6501
6502 VerificationFailed: ;
6503
6504 vm_object_deallocate(new_entry->object.vm_object);
6505 tmp_entry = src_entry;
6506 continue;
6507 }
6508
6509 /*
6510 * Verification succeeded.
6511 */
6512
6513 VerificationSuccessful: ;
6514
6515 if (result == KERN_MEMORY_RESTART_COPY)
6516 goto RestartCopy;
6517
6518 /*
6519 * Copy succeeded.
6520 */
6521
6522 CopySuccessful: ;
6523
6524 /*
6525 * Link in the new copy entry.
6526 */
6527
6528 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
6529 new_entry);
6530
6531 /*
6532 * Determine whether the entire region
6533 * has been copied.
6534 */
6535 src_start = new_entry->vme_end;
6536 new_entry = VM_MAP_ENTRY_NULL;
6537 while ((src_start >= src_end) && (src_end != 0)) {
6538 if (src_map != base_map) {
6539 submap_map_t *ptr;
6540
6541 ptr = parent_maps;
6542 assert(ptr != NULL);
6543 parent_maps = parent_maps->next;
6544 vm_map_unlock(src_map);
6545 vm_map_deallocate(src_map);
6546 vm_map_lock(ptr->parent_map);
6547 src_map = ptr->parent_map;
6548 src_start = ptr->base_start;
6549 src_end = ptr->base_end;
6550 if ((src_end > src_start) &&
6551 !vm_map_lookup_entry(
6552 src_map, src_start, &tmp_entry))
6553 RETURN(KERN_INVALID_ADDRESS);
6554 kfree(ptr, sizeof(submap_map_t));
6555 if(parent_maps == NULL)
6556 map_share = FALSE;
6557 src_entry = tmp_entry->vme_prev;
6558 } else
6559 break;
6560 }
6561 if ((src_start >= src_end) && (src_end != 0))
6562 break;
6563
6564 /*
6565 * Verify that there are no gaps in the region
6566 */
6567
6568 tmp_entry = src_entry->vme_next;
6569 if ((tmp_entry->vme_start != src_start) ||
6570 (tmp_entry == vm_map_to_entry(src_map)))
6571 RETURN(KERN_INVALID_ADDRESS);
6572 }
6573
6574 /*
6575 * If the source should be destroyed, do it now, since the
6576 * copy was successful.
6577 */
6578 if (src_destroy) {
6579 (void) vm_map_delete(src_map,
6580 vm_map_trunc_page(src_addr),
6581 src_end,
6582 (src_map == kernel_map) ?
6583 VM_MAP_REMOVE_KUNWIRE :
6584 VM_MAP_NO_FLAGS,
6585 VM_MAP_NULL);
6586 }
6587
6588 vm_map_unlock(src_map);
6589
6590 /* Fix-up start and end points in copy. This is necessary */
6591 /* when the various entries in the copy object were picked */
6592 /* up from different sub-maps */
6593
6594 tmp_entry = vm_map_copy_first_entry(copy);
6595 while (tmp_entry != vm_map_copy_to_entry(copy)) {
6596 tmp_entry->vme_end = copy_addr +
6597 (tmp_entry->vme_end - tmp_entry->vme_start);
6598 tmp_entry->vme_start = copy_addr;
6599 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
6600 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
6601 }
6602
6603 *copy_result = copy;
6604 return(KERN_SUCCESS);
6605
6606 #undef RETURN
6607 }
6608
6609 /*
6610 * vm_map_copyin_object:
6611 *
6612 * Create a copy object from an object.
6613 * Our caller donates an object reference.
6614 */
6615
6616 kern_return_t
6617 vm_map_copyin_object(
6618 vm_object_t object,
6619 vm_object_offset_t offset, /* offset of region in object */
6620 vm_object_size_t size, /* size of region in object */
6621 vm_map_copy_t *copy_result) /* OUT */
6622 {
6623 vm_map_copy_t copy; /* Resulting copy */
6624
6625 /*
6626 * We drop the object into a special copy object
6627 * that contains the object directly.
6628 */
6629
6630 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6631 copy->type = VM_MAP_COPY_OBJECT;
6632 copy->cpy_object = object;
6633 copy->offset = offset;
6634 copy->size = size;
6635
6636 *copy_result = copy;
6637 return(KERN_SUCCESS);
6638 }
6639
6640 static void
6641 vm_map_fork_share(
6642 vm_map_t old_map,
6643 vm_map_entry_t old_entry,
6644 vm_map_t new_map)
6645 {
6646 vm_object_t object;
6647 vm_map_entry_t new_entry;
6648
6649 /*
6650 * New sharing code. New map entry
6651 * references original object. Internal
6652 * objects use asynchronous copy algorithm for
6653 * future copies. First make sure we have
6654 * the right object. If we need a shadow,
6655 * or someone else already has one, then
6656 * make a new shadow and share it.
6657 */
6658
6659 object = old_entry->object.vm_object;
6660 if (old_entry->is_sub_map) {
6661 assert(old_entry->wired_count == 0);
6662 #ifndef NO_NESTED_PMAP
6663 if(old_entry->use_pmap) {
6664 kern_return_t result;
6665
6666 result = pmap_nest(new_map->pmap,
6667 (old_entry->object.sub_map)->pmap,
6668 (addr64_t)old_entry->vme_start,
6669 (addr64_t)old_entry->vme_start,
6670 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
6671 if(result)
6672 panic("vm_map_fork_share: pmap_nest failed!");
6673 }
6674 #endif /* NO_NESTED_PMAP */
6675 } else if (object == VM_OBJECT_NULL) {
6676 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
6677 old_entry->vme_start));
6678 old_entry->offset = 0;
6679 old_entry->object.vm_object = object;
6680 assert(!old_entry->needs_copy);
6681 } else if (object->copy_strategy !=
6682 MEMORY_OBJECT_COPY_SYMMETRIC) {
6683
6684 /*
6685 * We are already using an asymmetric
6686 * copy, and therefore we already have
6687 * the right object.
6688 */
6689
6690 assert(! old_entry->needs_copy);
6691 }
6692 else if (old_entry->needs_copy || /* case 1 */
6693 object->shadowed || /* case 2 */
6694 (!object->true_share && /* case 3 */
6695 !old_entry->is_shared &&
6696 (object->size >
6697 (vm_map_size_t)(old_entry->vme_end -
6698 old_entry->vme_start)))) {
6699
6700 /*
6701 * We need to create a shadow.
6702 * There are three cases here.
6703 * In the first case, we need to
6704 * complete a deferred symmetrical
6705 * copy that we participated in.
6706 * In the second and third cases,
6707 * we need to create the shadow so
6708 * that changes that we make to the
6709 * object do not interfere with
6710 * any symmetrical copies which
6711 * have occured (case 2) or which
6712 * might occur (case 3).
6713 *
6714 * The first case is when we had
6715 * deferred shadow object creation
6716 * via the entry->needs_copy mechanism.
6717 * This mechanism only works when
6718 * only one entry points to the source
6719 * object, and we are about to create
6720 * a second entry pointing to the
6721 * same object. The problem is that
6722 * there is no way of mapping from
6723 * an object to the entries pointing
6724 * to it. (Deferred shadow creation
6725 * works with one entry because occurs
6726 * at fault time, and we walk from the
6727 * entry to the object when handling
6728 * the fault.)
6729 *
6730 * The second case is when the object
6731 * to be shared has already been copied
6732 * with a symmetric copy, but we point
6733 * directly to the object without
6734 * needs_copy set in our entry. (This
6735 * can happen because different ranges
6736 * of an object can be pointed to by
6737 * different entries. In particular,
6738 * a single entry pointing to an object
6739 * can be split by a call to vm_inherit,
6740 * which, combined with task_create, can
6741 * result in the different entries
6742 * having different needs_copy values.)
6743 * The shadowed flag in the object allows
6744 * us to detect this case. The problem
6745 * with this case is that if this object
6746 * has or will have shadows, then we
6747 * must not perform an asymmetric copy
6748 * of this object, since such a copy
6749 * allows the object to be changed, which
6750 * will break the previous symmetrical
6751 * copies (which rely upon the object
6752 * not changing). In a sense, the shadowed
6753 * flag says "don't change this object".
6754 * We fix this by creating a shadow
6755 * object for this object, and sharing
6756 * that. This works because we are free
6757 * to change the shadow object (and thus
6758 * to use an asymmetric copy strategy);
6759 * this is also semantically correct,
6760 * since this object is temporary, and
6761 * therefore a copy of the object is
6762 * as good as the object itself. (This
6763 * is not true for permanent objects,
6764 * since the pager needs to see changes,
6765 * which won't happen if the changes
6766 * are made to a copy.)
6767 *
6768 * The third case is when the object
6769 * to be shared has parts sticking
6770 * outside of the entry we're working
6771 * with, and thus may in the future
6772 * be subject to a symmetrical copy.
6773 * (This is a preemptive version of
6774 * case 2.)
6775 */
6776
6777 assert(!(object->shadowed && old_entry->is_shared));
6778 vm_object_shadow(&old_entry->object.vm_object,
6779 &old_entry->offset,
6780 (vm_map_size_t) (old_entry->vme_end -
6781 old_entry->vme_start));
6782
6783 /*
6784 * If we're making a shadow for other than
6785 * copy on write reasons, then we have
6786 * to remove write permission.
6787 */
6788
6789 if (!old_entry->needs_copy &&
6790 (old_entry->protection & VM_PROT_WRITE)) {
6791 vm_prot_t prot;
6792
6793 prot = old_entry->protection & ~VM_PROT_WRITE;
6794 #ifdef STACK_ONLY_NX
6795 if (old_entry->alias != VM_MEMORY_STACK && prot)
6796 prot |= VM_PROT_EXECUTE;
6797 #endif
6798 if (old_map->mapped) {
6799 vm_object_pmap_protect(
6800 old_entry->object.vm_object,
6801 old_entry->offset,
6802 (old_entry->vme_end -
6803 old_entry->vme_start),
6804 PMAP_NULL,
6805 old_entry->vme_start,
6806 prot);
6807 } else {
6808 pmap_protect(old_map->pmap,
6809 old_entry->vme_start,
6810 old_entry->vme_end,
6811 prot);
6812 }
6813 }
6814
6815 old_entry->needs_copy = FALSE;
6816 object = old_entry->object.vm_object;
6817 }
6818
6819 /*
6820 * If object was using a symmetric copy strategy,
6821 * change its copy strategy to the default
6822 * asymmetric copy strategy, which is copy_delay
6823 * in the non-norma case and copy_call in the
6824 * norma case. Bump the reference count for the
6825 * new entry.
6826 */
6827
6828 if(old_entry->is_sub_map) {
6829 vm_map_lock(old_entry->object.sub_map);
6830 vm_map_reference(old_entry->object.sub_map);
6831 vm_map_unlock(old_entry->object.sub_map);
6832 } else {
6833 vm_object_lock(object);
6834 object->ref_count++;
6835 vm_object_res_reference(object);
6836 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
6837 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6838 }
6839 vm_object_unlock(object);
6840 }
6841
6842 /*
6843 * Clone the entry, using object ref from above.
6844 * Mark both entries as shared.
6845 */
6846
6847 new_entry = vm_map_entry_create(new_map);
6848 vm_map_entry_copy(new_entry, old_entry);
6849 old_entry->is_shared = TRUE;
6850 new_entry->is_shared = TRUE;
6851
6852 /*
6853 * Insert the entry into the new map -- we
6854 * know we're inserting at the end of the new
6855 * map.
6856 */
6857
6858 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
6859
6860 /*
6861 * Update the physical map
6862 */
6863
6864 if (old_entry->is_sub_map) {
6865 /* Bill Angell pmap support goes here */
6866 } else {
6867 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
6868 old_entry->vme_end - old_entry->vme_start,
6869 old_entry->vme_start);
6870 }
6871 }
6872
6873 static boolean_t
6874 vm_map_fork_copy(
6875 vm_map_t old_map,
6876 vm_map_entry_t *old_entry_p,
6877 vm_map_t new_map)
6878 {
6879 vm_map_entry_t old_entry = *old_entry_p;
6880 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
6881 vm_map_offset_t start = old_entry->vme_start;
6882 vm_map_copy_t copy;
6883 vm_map_entry_t last = vm_map_last_entry(new_map);
6884
6885 vm_map_unlock(old_map);
6886 /*
6887 * Use maxprot version of copyin because we
6888 * care about whether this memory can ever
6889 * be accessed, not just whether it's accessible
6890 * right now.
6891 */
6892 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
6893 != KERN_SUCCESS) {
6894 /*
6895 * The map might have changed while it
6896 * was unlocked, check it again. Skip
6897 * any blank space or permanently
6898 * unreadable region.
6899 */
6900 vm_map_lock(old_map);
6901 if (!vm_map_lookup_entry(old_map, start, &last) ||
6902 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
6903 last = last->vme_next;
6904 }
6905 *old_entry_p = last;
6906
6907 /*
6908 * XXX For some error returns, want to
6909 * XXX skip to the next element. Note
6910 * that INVALID_ADDRESS and
6911 * PROTECTION_FAILURE are handled above.
6912 */
6913
6914 return FALSE;
6915 }
6916
6917 /*
6918 * Insert the copy into the new map
6919 */
6920
6921 vm_map_copy_insert(new_map, last, copy);
6922
6923 /*
6924 * Pick up the traversal at the end of
6925 * the copied region.
6926 */
6927
6928 vm_map_lock(old_map);
6929 start += entry_size;
6930 if (! vm_map_lookup_entry(old_map, start, &last)) {
6931 last = last->vme_next;
6932 } else {
6933 vm_map_clip_start(old_map, last, start);
6934 }
6935 *old_entry_p = last;
6936
6937 return TRUE;
6938 }
6939
6940 /*
6941 * vm_map_fork:
6942 *
6943 * Create and return a new map based on the old
6944 * map, according to the inheritance values on the
6945 * regions in that map.
6946 *
6947 * The source map must not be locked.
6948 */
6949 vm_map_t
6950 vm_map_fork(
6951 vm_map_t old_map)
6952 {
6953 pmap_t new_pmap = pmap_create(
6954 (vm_map_size_t) 0,
6955 task_has_64BitAddr(current_task()));
6956 vm_map_t new_map;
6957 vm_map_entry_t old_entry;
6958 vm_map_size_t new_size = 0, entry_size;
6959 vm_map_entry_t new_entry;
6960 boolean_t src_needs_copy;
6961 boolean_t new_entry_needs_copy;
6962
6963 vm_map_reference_swap(old_map);
6964 vm_map_lock(old_map);
6965
6966 new_map = vm_map_create(new_pmap,
6967 old_map->min_offset,
6968 old_map->max_offset,
6969 old_map->hdr.entries_pageable);
6970
6971 for (
6972 old_entry = vm_map_first_entry(old_map);
6973 old_entry != vm_map_to_entry(old_map);
6974 ) {
6975
6976 entry_size = old_entry->vme_end - old_entry->vme_start;
6977
6978 switch (old_entry->inheritance) {
6979 case VM_INHERIT_NONE:
6980 break;
6981
6982 case VM_INHERIT_SHARE:
6983 vm_map_fork_share(old_map, old_entry, new_map);
6984 new_size += entry_size;
6985 break;
6986
6987 case VM_INHERIT_COPY:
6988
6989 /*
6990 * Inline the copy_quickly case;
6991 * upon failure, fall back on call
6992 * to vm_map_fork_copy.
6993 */
6994
6995 if(old_entry->is_sub_map)
6996 break;
6997 if ((old_entry->wired_count != 0) ||
6998 ((old_entry->object.vm_object != NULL) &&
6999 (old_entry->object.vm_object->true_share))) {
7000 goto slow_vm_map_fork_copy;
7001 }
7002
7003 new_entry = vm_map_entry_create(new_map);
7004 vm_map_entry_copy(new_entry, old_entry);
7005 /* clear address space specifics */
7006 new_entry->use_pmap = FALSE;
7007
7008 if (! vm_object_copy_quickly(
7009 &new_entry->object.vm_object,
7010 old_entry->offset,
7011 (old_entry->vme_end -
7012 old_entry->vme_start),
7013 &src_needs_copy,
7014 &new_entry_needs_copy)) {
7015 vm_map_entry_dispose(new_map, new_entry);
7016 goto slow_vm_map_fork_copy;
7017 }
7018
7019 /*
7020 * Handle copy-on-write obligations
7021 */
7022
7023 if (src_needs_copy && !old_entry->needs_copy) {
7024 vm_prot_t prot;
7025
7026 prot = old_entry->protection & ~VM_PROT_WRITE;
7027 #ifdef STACK_ONLY_NX
7028 if (old_entry->alias != VM_MEMORY_STACK && prot)
7029 prot |= VM_PROT_EXECUTE;
7030 #endif
7031 vm_object_pmap_protect(
7032 old_entry->object.vm_object,
7033 old_entry->offset,
7034 (old_entry->vme_end -
7035 old_entry->vme_start),
7036 ((old_entry->is_shared
7037 || old_map->mapped)
7038 ? PMAP_NULL :
7039 old_map->pmap),
7040 old_entry->vme_start,
7041 prot);
7042
7043 old_entry->needs_copy = TRUE;
7044 }
7045 new_entry->needs_copy = new_entry_needs_copy;
7046
7047 /*
7048 * Insert the entry at the end
7049 * of the map.
7050 */
7051
7052 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7053 new_entry);
7054 new_size += entry_size;
7055 break;
7056
7057 slow_vm_map_fork_copy:
7058 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7059 new_size += entry_size;
7060 }
7061 continue;
7062 }
7063 old_entry = old_entry->vme_next;
7064 }
7065
7066 new_map->size = new_size;
7067 vm_map_unlock(old_map);
7068 vm_map_deallocate(old_map);
7069
7070 return(new_map);
7071 }
7072
7073
7074 /*
7075 * vm_map_lookup_locked:
7076 *
7077 * Finds the VM object, offset, and
7078 * protection for a given virtual address in the
7079 * specified map, assuming a page fault of the
7080 * type specified.
7081 *
7082 * Returns the (object, offset, protection) for
7083 * this address, whether it is wired down, and whether
7084 * this map has the only reference to the data in question.
7085 * In order to later verify this lookup, a "version"
7086 * is returned.
7087 *
7088 * The map MUST be locked by the caller and WILL be
7089 * locked on exit. In order to guarantee the
7090 * existence of the returned object, it is returned
7091 * locked.
7092 *
7093 * If a lookup is requested with "write protection"
7094 * specified, the map may be changed to perform virtual
7095 * copying operations, although the data referenced will
7096 * remain the same.
7097 */
7098 kern_return_t
7099 vm_map_lookup_locked(
7100 vm_map_t *var_map, /* IN/OUT */
7101 vm_map_offset_t vaddr,
7102 vm_prot_t fault_type,
7103 vm_map_version_t *out_version, /* OUT */
7104 vm_object_t *object, /* OUT */
7105 vm_object_offset_t *offset, /* OUT */
7106 vm_prot_t *out_prot, /* OUT */
7107 boolean_t *wired, /* OUT */
7108 int *behavior, /* OUT */
7109 vm_map_offset_t *lo_offset, /* OUT */
7110 vm_map_offset_t *hi_offset, /* OUT */
7111 vm_map_t *real_map)
7112 {
7113 vm_map_entry_t entry;
7114 register vm_map_t map = *var_map;
7115 vm_map_t old_map = *var_map;
7116 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
7117 vm_map_offset_t cow_parent_vaddr = 0;
7118 vm_map_offset_t old_start = 0;
7119 vm_map_offset_t old_end = 0;
7120 register vm_prot_t prot;
7121
7122 *real_map = map;
7123 RetryLookup: ;
7124
7125 /*
7126 * If the map has an interesting hint, try it before calling
7127 * full blown lookup routine.
7128 */
7129 entry = map->hint;
7130
7131 if ((entry == vm_map_to_entry(map)) ||
7132 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7133 vm_map_entry_t tmp_entry;
7134
7135 /*
7136 * Entry was either not a valid hint, or the vaddr
7137 * was not contained in the entry, so do a full lookup.
7138 */
7139 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7140 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7141 vm_map_unlock(cow_sub_map_parent);
7142 if((*real_map != map)
7143 && (*real_map != cow_sub_map_parent))
7144 vm_map_unlock(*real_map);
7145 return KERN_INVALID_ADDRESS;
7146 }
7147
7148 entry = tmp_entry;
7149 }
7150 if(map == old_map) {
7151 old_start = entry->vme_start;
7152 old_end = entry->vme_end;
7153 }
7154
7155 /*
7156 * Handle submaps. Drop lock on upper map, submap is
7157 * returned locked.
7158 */
7159
7160 submap_recurse:
7161 if (entry->is_sub_map) {
7162 vm_map_offset_t local_vaddr;
7163 vm_map_offset_t end_delta;
7164 vm_map_offset_t start_delta;
7165 vm_map_entry_t submap_entry;
7166 boolean_t mapped_needs_copy=FALSE;
7167
7168 local_vaddr = vaddr;
7169
7170 if ((!entry->needs_copy) && (entry->use_pmap)) {
7171 /* if real_map equals map we unlock below */
7172 if ((*real_map != map) &&
7173 (*real_map != cow_sub_map_parent))
7174 vm_map_unlock(*real_map);
7175 *real_map = entry->object.sub_map;
7176 }
7177
7178 if(entry->needs_copy) {
7179 if (!mapped_needs_copy) {
7180 if (vm_map_lock_read_to_write(map)) {
7181 vm_map_lock_read(map);
7182 if(*real_map == entry->object.sub_map)
7183 *real_map = map;
7184 goto RetryLookup;
7185 }
7186 vm_map_lock_read(entry->object.sub_map);
7187 cow_sub_map_parent = map;
7188 /* reset base to map before cow object */
7189 /* this is the map which will accept */
7190 /* the new cow object */
7191 old_start = entry->vme_start;
7192 old_end = entry->vme_end;
7193 cow_parent_vaddr = vaddr;
7194 mapped_needs_copy = TRUE;
7195 } else {
7196 vm_map_lock_read(entry->object.sub_map);
7197 if((cow_sub_map_parent != map) &&
7198 (*real_map != map))
7199 vm_map_unlock(map);
7200 }
7201 } else {
7202 vm_map_lock_read(entry->object.sub_map);
7203 /* leave map locked if it is a target */
7204 /* cow sub_map above otherwise, just */
7205 /* follow the maps down to the object */
7206 /* here we unlock knowing we are not */
7207 /* revisiting the map. */
7208 if((*real_map != map) && (map != cow_sub_map_parent))
7209 vm_map_unlock_read(map);
7210 }
7211
7212 *var_map = map = entry->object.sub_map;
7213
7214 /* calculate the offset in the submap for vaddr */
7215 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
7216
7217 RetrySubMap:
7218 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
7219 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
7220 vm_map_unlock(cow_sub_map_parent);
7221 }
7222 if((*real_map != map)
7223 && (*real_map != cow_sub_map_parent)) {
7224 vm_map_unlock(*real_map);
7225 }
7226 *real_map = map;
7227 return KERN_INVALID_ADDRESS;
7228 }
7229 /* find the attenuated shadow of the underlying object */
7230 /* on our target map */
7231
7232 /* in english the submap object may extend beyond the */
7233 /* region mapped by the entry or, may only fill a portion */
7234 /* of it. For our purposes, we only care if the object */
7235 /* doesn't fill. In this case the area which will */
7236 /* ultimately be clipped in the top map will only need */
7237 /* to be as big as the portion of the underlying entry */
7238 /* which is mapped */
7239 start_delta = submap_entry->vme_start > entry->offset ?
7240 submap_entry->vme_start - entry->offset : 0;
7241
7242 end_delta =
7243 (entry->offset + start_delta + (old_end - old_start)) <=
7244 submap_entry->vme_end ?
7245 0 : (entry->offset +
7246 (old_end - old_start))
7247 - submap_entry->vme_end;
7248
7249 old_start += start_delta;
7250 old_end -= end_delta;
7251
7252 if(submap_entry->is_sub_map) {
7253 entry = submap_entry;
7254 vaddr = local_vaddr;
7255 goto submap_recurse;
7256 }
7257
7258 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
7259
7260 vm_object_t copy_object;
7261 vm_map_offset_t local_start;
7262 vm_map_offset_t local_end;
7263 boolean_t copied_slowly = FALSE;
7264
7265 if (vm_map_lock_read_to_write(map)) {
7266 vm_map_lock_read(map);
7267 old_start -= start_delta;
7268 old_end += end_delta;
7269 goto RetrySubMap;
7270 }
7271
7272
7273 if (submap_entry->object.vm_object == VM_OBJECT_NULL) {
7274 submap_entry->object.vm_object =
7275 vm_object_allocate(
7276 (vm_map_size_t)
7277 (submap_entry->vme_end
7278 - submap_entry->vme_start));
7279 submap_entry->offset = 0;
7280 }
7281 local_start = local_vaddr -
7282 (cow_parent_vaddr - old_start);
7283 local_end = local_vaddr +
7284 (old_end - cow_parent_vaddr);
7285 vm_map_clip_start(map, submap_entry, local_start);
7286 vm_map_clip_end(map, submap_entry, local_end);
7287
7288 /* This is the COW case, lets connect */
7289 /* an entry in our space to the underlying */
7290 /* object in the submap, bypassing the */
7291 /* submap. */
7292
7293
7294 if(submap_entry->wired_count != 0) {
7295 vm_object_lock(
7296 submap_entry->object.vm_object);
7297 vm_object_copy_slowly(
7298 submap_entry->object.vm_object,
7299 submap_entry->offset,
7300 submap_entry->vme_end -
7301 submap_entry->vme_start,
7302 FALSE,
7303 &copy_object);
7304 copied_slowly = TRUE;
7305 } else {
7306 /* set up shadow object */
7307 copy_object = submap_entry->object.vm_object;
7308 vm_object_reference(copy_object);
7309 submap_entry->object.vm_object->shadowed = TRUE;
7310 submap_entry->needs_copy = TRUE;
7311
7312 prot = submap_entry->protection & ~VM_PROT_WRITE;
7313 #ifdef STACK_ONLY_NX
7314 if (submap_entry->alias != VM_MEMORY_STACK && prot)
7315 prot |= VM_PROT_EXECUTE;
7316 #endif
7317 vm_object_pmap_protect(
7318 submap_entry->object.vm_object,
7319 submap_entry->offset,
7320 submap_entry->vme_end -
7321 submap_entry->vme_start,
7322 (submap_entry->is_shared
7323 || map->mapped) ?
7324 PMAP_NULL : map->pmap,
7325 submap_entry->vme_start,
7326 prot);
7327 }
7328
7329
7330 /* This works diffently than the */
7331 /* normal submap case. We go back */
7332 /* to the parent of the cow map and*/
7333 /* clip out the target portion of */
7334 /* the sub_map, substituting the */
7335 /* new copy object, */
7336
7337 vm_map_unlock(map);
7338 local_start = old_start;
7339 local_end = old_end;
7340 map = cow_sub_map_parent;
7341 *var_map = cow_sub_map_parent;
7342 vaddr = cow_parent_vaddr;
7343 cow_sub_map_parent = NULL;
7344
7345 if(!vm_map_lookup_entry(map,
7346 vaddr, &entry)) {
7347 vm_object_deallocate(
7348 copy_object);
7349 vm_map_lock_write_to_read(map);
7350 return KERN_INVALID_ADDRESS;
7351 }
7352
7353 /* clip out the portion of space */
7354 /* mapped by the sub map which */
7355 /* corresponds to the underlying */
7356 /* object */
7357 vm_map_clip_start(map, entry, local_start);
7358 vm_map_clip_end(map, entry, local_end);
7359
7360
7361 /* substitute copy object for */
7362 /* shared map entry */
7363 vm_map_deallocate(entry->object.sub_map);
7364 entry->is_sub_map = FALSE;
7365 entry->object.vm_object = copy_object;
7366
7367 entry->protection |= VM_PROT_WRITE;
7368 entry->max_protection |= VM_PROT_WRITE;
7369 if(copied_slowly) {
7370 entry->offset = 0;
7371 entry->needs_copy = FALSE;
7372 entry->is_shared = FALSE;
7373 } else {
7374 entry->offset = submap_entry->offset;
7375 entry->needs_copy = TRUE;
7376 if(entry->inheritance == VM_INHERIT_SHARE)
7377 entry->inheritance = VM_INHERIT_COPY;
7378 if (map != old_map)
7379 entry->is_shared = TRUE;
7380 }
7381 if(entry->inheritance == VM_INHERIT_SHARE)
7382 entry->inheritance = VM_INHERIT_COPY;
7383
7384 vm_map_lock_write_to_read(map);
7385 } else {
7386 if((cow_sub_map_parent)
7387 && (cow_sub_map_parent != *real_map)
7388 && (cow_sub_map_parent != map)) {
7389 vm_map_unlock(cow_sub_map_parent);
7390 }
7391 entry = submap_entry;
7392 vaddr = local_vaddr;
7393 }
7394 }
7395
7396 /*
7397 * Check whether this task is allowed to have
7398 * this page.
7399 */
7400 prot = entry->protection;
7401
7402 #ifdef STACK_ONLY_NX
7403 if (entry->alias != VM_MEMORY_STACK && prot)
7404 /*
7405 * HACK -- if not a stack, than allow execution
7406 */
7407 prot |= VM_PROT_EXECUTE;
7408 #endif
7409 if ((fault_type & (prot)) != fault_type) {
7410 if (*real_map != map) {
7411 vm_map_unlock(*real_map);
7412 }
7413 *real_map = map;
7414
7415 if ((fault_type & VM_PROT_EXECUTE) && prot)
7416 log_nx_failure((addr64_t)vaddr, prot);
7417
7418 return KERN_PROTECTION_FAILURE;
7419 }
7420
7421 /*
7422 * If this page is not pageable, we have to get
7423 * it for all possible accesses.
7424 */
7425
7426 *wired = (entry->wired_count != 0);
7427 if (*wired)
7428 fault_type = prot;
7429
7430 /*
7431 * If the entry was copy-on-write, we either ...
7432 */
7433
7434 if (entry->needs_copy) {
7435 /*
7436 * If we want to write the page, we may as well
7437 * handle that now since we've got the map locked.
7438 *
7439 * If we don't need to write the page, we just
7440 * demote the permissions allowed.
7441 */
7442
7443 if ((fault_type & VM_PROT_WRITE) || *wired) {
7444 /*
7445 * Make a new object, and place it in the
7446 * object chain. Note that no new references
7447 * have appeared -- one just moved from the
7448 * map to the new object.
7449 */
7450
7451 if (vm_map_lock_read_to_write(map)) {
7452 vm_map_lock_read(map);
7453 goto RetryLookup;
7454 }
7455 vm_object_shadow(&entry->object.vm_object,
7456 &entry->offset,
7457 (vm_map_size_t) (entry->vme_end -
7458 entry->vme_start));
7459
7460 entry->object.vm_object->shadowed = TRUE;
7461 entry->needs_copy = FALSE;
7462 vm_map_lock_write_to_read(map);
7463 }
7464 else {
7465 /*
7466 * We're attempting to read a copy-on-write
7467 * page -- don't allow writes.
7468 */
7469
7470 prot &= (~VM_PROT_WRITE);
7471 }
7472 }
7473
7474 /*
7475 * Create an object if necessary.
7476 */
7477 if (entry->object.vm_object == VM_OBJECT_NULL) {
7478
7479 if (vm_map_lock_read_to_write(map)) {
7480 vm_map_lock_read(map);
7481 goto RetryLookup;
7482 }
7483
7484 entry->object.vm_object = vm_object_allocate(
7485 (vm_map_size_t)(entry->vme_end - entry->vme_start));
7486 entry->offset = 0;
7487 vm_map_lock_write_to_read(map);
7488 }
7489
7490 /*
7491 * Return the object/offset from this entry. If the entry
7492 * was copy-on-write or empty, it has been fixed up. Also
7493 * return the protection.
7494 */
7495
7496 *offset = (vaddr - entry->vme_start) + entry->offset;
7497 *object = entry->object.vm_object;
7498 *out_prot = prot;
7499 *behavior = entry->behavior;
7500 *lo_offset = entry->offset;
7501 *hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
7502
7503 /*
7504 * Lock the object to prevent it from disappearing
7505 */
7506
7507 vm_object_lock(*object);
7508
7509 /*
7510 * Save the version number
7511 */
7512
7513 out_version->main_timestamp = map->timestamp;
7514
7515 return KERN_SUCCESS;
7516 }
7517
7518
7519 /*
7520 * vm_map_verify:
7521 *
7522 * Verifies that the map in question has not changed
7523 * since the given version. If successful, the map
7524 * will not change until vm_map_verify_done() is called.
7525 */
7526 boolean_t
7527 vm_map_verify(
7528 register vm_map_t map,
7529 register vm_map_version_t *version) /* REF */
7530 {
7531 boolean_t result;
7532
7533 vm_map_lock_read(map);
7534 result = (map->timestamp == version->main_timestamp);
7535
7536 if (!result)
7537 vm_map_unlock_read(map);
7538
7539 return(result);
7540 }
7541
7542 /*
7543 * vm_map_verify_done:
7544 *
7545 * Releases locks acquired by a vm_map_verify.
7546 *
7547 * This is now a macro in vm/vm_map.h. It does a
7548 * vm_map_unlock_read on the map.
7549 */
7550
7551
7552 /*
7553 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
7554 * Goes away after regular vm_region_recurse function migrates to
7555 * 64 bits
7556 * vm_region_recurse: A form of vm_region which follows the
7557 * submaps in a target map
7558 *
7559 */
7560
7561 kern_return_t
7562 vm_map_region_recurse_64(
7563 vm_map_t map,
7564 vm_map_offset_t *address, /* IN/OUT */
7565 vm_map_size_t *size, /* OUT */
7566 natural_t *nesting_depth, /* IN/OUT */
7567 vm_region_submap_info_64_t submap_info, /* IN/OUT */
7568 mach_msg_type_number_t *count) /* IN/OUT */
7569 {
7570 vm_region_extended_info_data_t extended;
7571 vm_map_entry_t tmp_entry;
7572 vm_map_offset_t user_address;
7573 unsigned int user_max_depth;
7574
7575 /*
7576 * "curr_entry" is the VM map entry preceding or including the
7577 * address we're looking for.
7578 * "curr_map" is the map or sub-map containing "curr_entry".
7579 * "curr_offset" is the cumulated offset of "curr_map" in the
7580 * target task's address space.
7581 * "curr_depth" is the depth of "curr_map" in the chain of
7582 * sub-maps.
7583 * "curr_max_offset" is the maximum offset we should take into
7584 * account in the current map. It may be smaller than the current
7585 * map's "max_offset" because we might not have mapped it all in
7586 * the upper level map.
7587 */
7588 vm_map_entry_t curr_entry;
7589 vm_map_offset_t curr_offset;
7590 vm_map_t curr_map;
7591 unsigned int curr_depth;
7592 vm_map_offset_t curr_max_offset;
7593
7594 /*
7595 * "next_" is the same as "curr_" but for the VM region immediately
7596 * after the address we're looking for. We need to keep track of this
7597 * too because we want to return info about that region if the
7598 * address we're looking for is not mapped.
7599 */
7600 vm_map_entry_t next_entry;
7601 vm_map_offset_t next_offset;
7602 vm_map_t next_map;
7603 unsigned int next_depth;
7604 vm_map_offset_t next_max_offset;
7605
7606 if (map == VM_MAP_NULL) {
7607 /* no address space to work on */
7608 return KERN_INVALID_ARGUMENT;
7609 }
7610
7611 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
7612 /* "info" structure is not big enough and would overflow */
7613 return KERN_INVALID_ARGUMENT;
7614 }
7615
7616 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
7617
7618 user_address = *address;
7619 user_max_depth = *nesting_depth;
7620
7621 curr_entry = NULL;
7622 curr_map = map;
7623 curr_offset = 0;
7624 curr_depth = 0;
7625 curr_max_offset = curr_map->max_offset;
7626
7627 next_entry = NULL;
7628 next_map = NULL;
7629 next_offset = 0;
7630 next_depth = 0;
7631 next_max_offset = curr_max_offset;
7632
7633 if (not_in_kdp) {
7634 vm_map_lock_read(curr_map);
7635 }
7636
7637 for (;;) {
7638 if (vm_map_lookup_entry(curr_map,
7639 user_address - curr_offset,
7640 &tmp_entry)) {
7641 /* tmp_entry contains the address we're looking for */
7642 curr_entry = tmp_entry;
7643 } else {
7644 /*
7645 * The address is not mapped. "tmp_entry" is the
7646 * map entry preceding the address. We want the next
7647 * one, if it exists.
7648 */
7649 curr_entry = tmp_entry->vme_next;
7650 if (curr_entry == vm_map_to_entry(curr_map) ||
7651 curr_entry->vme_start >= curr_max_offset) {
7652 /* no next entry at this level: stop looking */
7653 if (not_in_kdp) {
7654 vm_map_unlock_read(curr_map);
7655 }
7656 curr_entry = NULL;
7657 curr_map = NULL;
7658 curr_offset = 0;
7659 curr_depth = 0;
7660 curr_max_offset = 0;
7661 break;
7662 }
7663 }
7664
7665 /*
7666 * Is the next entry at this level closer to the address (or
7667 * deeper in the submap chain) than the one we had
7668 * so far ?
7669 */
7670 tmp_entry = curr_entry->vme_next;
7671 if (tmp_entry == vm_map_to_entry(curr_map)) {
7672 /* no next entry at this level */
7673 } else if (tmp_entry->vme_start >= curr_max_offset) {
7674 /*
7675 * tmp_entry is beyond the scope of what we mapped of
7676 * this submap in the upper level: ignore it.
7677 */
7678 } else if ((next_entry == NULL) ||
7679 (tmp_entry->vme_start + curr_offset <=
7680 next_entry->vme_start + next_offset)) {
7681 /*
7682 * We didn't have a "next_entry" or this one is
7683 * closer to the address we're looking for:
7684 * use this "tmp_entry" as the new "next_entry".
7685 */
7686 if (next_entry != NULL) {
7687 /* unlock the last "next_map" */
7688 if (next_map != curr_map && not_in_kdp) {
7689 vm_map_unlock_read(next_map);
7690 }
7691 }
7692 next_entry = tmp_entry;
7693 next_map = curr_map;
7694 next_offset = curr_offset;
7695 next_depth = curr_depth;
7696 next_max_offset = curr_max_offset;
7697 }
7698
7699 if (!curr_entry->is_sub_map ||
7700 curr_depth >= user_max_depth) {
7701 /*
7702 * We hit a leaf map or we reached the maximum depth
7703 * we could, so stop looking. Keep the current map
7704 * locked.
7705 */
7706 break;
7707 }
7708
7709 /*
7710 * Get down to the next submap level.
7711 */
7712
7713 /*
7714 * Lock the next level and unlock the current level,
7715 * unless we need to keep it locked to access the "next_entry"
7716 * later.
7717 */
7718 if (not_in_kdp) {
7719 vm_map_lock_read(curr_entry->object.sub_map);
7720 }
7721 if (curr_map == next_map) {
7722 /* keep "next_map" locked in case we need it */
7723 } else {
7724 /* release this map */
7725 vm_map_unlock_read(curr_map);
7726 }
7727
7728 /*
7729 * Adjust the offset. "curr_entry" maps the submap
7730 * at relative address "curr_entry->vme_start" in the
7731 * curr_map but skips the first "curr_entry->offset"
7732 * bytes of the submap.
7733 * "curr_offset" always represents the offset of a virtual
7734 * address in the curr_map relative to the absolute address
7735 * space (i.e. the top-level VM map).
7736 */
7737 curr_offset +=
7738 (curr_entry->vme_start - curr_entry->offset);
7739 /* switch to the submap */
7740 curr_map = curr_entry->object.sub_map;
7741 curr_depth++;
7742 /*
7743 * "curr_max_offset" allows us to keep track of the
7744 * portion of the submap that is actually mapped at this level:
7745 * the rest of that submap is irrelevant to us, since it's not
7746 * mapped here.
7747 * The relevant portion of the map starts at
7748 * "curr_entry->offset" up to the size of "curr_entry".
7749 */
7750 curr_max_offset =
7751 curr_entry->vme_end - curr_entry->vme_start +
7752 curr_entry->offset;
7753 curr_entry = NULL;
7754 }
7755
7756 if (curr_entry == NULL) {
7757 /* no VM region contains the address... */
7758 if (next_entry == NULL) {
7759 /* ... and no VM region follows it either */
7760 return KERN_INVALID_ADDRESS;
7761 }
7762 /* ... gather info about the next VM region */
7763 curr_entry = next_entry;
7764 curr_map = next_map; /* still locked ... */
7765 curr_offset = next_offset;
7766 curr_depth = next_depth;
7767 curr_max_offset = next_max_offset;
7768 } else {
7769 /* we won't need "next_entry" after all */
7770 if (next_entry != NULL) {
7771 /* release "next_map" */
7772 if (next_map != curr_map && not_in_kdp) {
7773 vm_map_unlock_read(next_map);
7774 }
7775 }
7776 }
7777 next_entry = NULL;
7778 next_map = NULL;
7779 next_offset = 0;
7780 next_depth = 0;
7781 next_max_offset = 0;
7782
7783 *nesting_depth = curr_depth;
7784 *size = curr_entry->vme_end - curr_entry->vme_start;
7785 *address = curr_entry->vme_start + curr_offset;
7786
7787 submap_info->user_tag = curr_entry->alias;
7788 submap_info->offset = curr_entry->offset;
7789 submap_info->protection = curr_entry->protection;
7790 submap_info->inheritance = curr_entry->inheritance;
7791 submap_info->max_protection = curr_entry->max_protection;
7792 submap_info->behavior = curr_entry->behavior;
7793 submap_info->user_wired_count = curr_entry->user_wired_count;
7794 submap_info->is_submap = curr_entry->is_sub_map;
7795 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
7796
7797 extended.pages_resident = 0;
7798 extended.pages_swapped_out = 0;
7799 extended.pages_shared_now_private = 0;
7800 extended.pages_dirtied = 0;
7801 extended.external_pager = 0;
7802 extended.shadow_depth = 0;
7803
7804 if (not_in_kdp) {
7805 if (!curr_entry->is_sub_map) {
7806 vm_map_region_walk(curr_map,
7807 curr_entry->vme_start,
7808 curr_entry,
7809 curr_entry->offset,
7810 (curr_entry->vme_end -
7811 curr_entry->vme_start),
7812 &extended);
7813 submap_info->share_mode = extended.share_mode;
7814 if (extended.external_pager &&
7815 extended.ref_count == 2 &&
7816 extended.share_mode == SM_SHARED) {
7817 submap_info->share_mode = SM_PRIVATE;
7818 }
7819 submap_info->ref_count = extended.ref_count;
7820 } else {
7821 if (curr_entry->use_pmap) {
7822 submap_info->share_mode = SM_TRUESHARED;
7823 } else {
7824 submap_info->share_mode = SM_PRIVATE;
7825 }
7826 submap_info->ref_count =
7827 curr_entry->object.sub_map->ref_count;
7828 }
7829 }
7830
7831 submap_info->pages_resident = extended.pages_resident;
7832 submap_info->pages_swapped_out = extended.pages_swapped_out;
7833 submap_info->pages_shared_now_private =
7834 extended.pages_shared_now_private;
7835 submap_info->pages_dirtied = extended.pages_dirtied;
7836 submap_info->external_pager = extended.external_pager;
7837 submap_info->shadow_depth = extended.shadow_depth;
7838
7839 if (not_in_kdp) {
7840 vm_map_unlock_read(curr_map);
7841 }
7842
7843 return KERN_SUCCESS;
7844 }
7845
7846 /*
7847 * vm_region:
7848 *
7849 * User call to obtain information about a region in
7850 * a task's address map. Currently, only one flavor is
7851 * supported.
7852 *
7853 * XXX The reserved and behavior fields cannot be filled
7854 * in until the vm merge from the IK is completed, and
7855 * vm_reserve is implemented.
7856 */
7857
7858 kern_return_t
7859 vm_map_region(
7860 vm_map_t map,
7861 vm_map_offset_t *address, /* IN/OUT */
7862 vm_map_size_t *size, /* OUT */
7863 vm_region_flavor_t flavor, /* IN */
7864 vm_region_info_t info, /* OUT */
7865 mach_msg_type_number_t *count, /* IN/OUT */
7866 mach_port_t *object_name) /* OUT */
7867 {
7868 vm_map_entry_t tmp_entry;
7869 vm_map_entry_t entry;
7870 vm_map_offset_t start;
7871
7872 if (map == VM_MAP_NULL)
7873 return(KERN_INVALID_ARGUMENT);
7874
7875 switch (flavor) {
7876
7877 case VM_REGION_BASIC_INFO:
7878 /* legacy for old 32-bit objects info */
7879 {
7880 vm_region_basic_info_t basic;
7881
7882 if (*count < VM_REGION_BASIC_INFO_COUNT)
7883 return(KERN_INVALID_ARGUMENT);
7884
7885 basic = (vm_region_basic_info_t) info;
7886 *count = VM_REGION_BASIC_INFO_COUNT;
7887
7888 vm_map_lock_read(map);
7889
7890 start = *address;
7891 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7892 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7893 vm_map_unlock_read(map);
7894 return(KERN_INVALID_ADDRESS);
7895 }
7896 } else {
7897 entry = tmp_entry;
7898 }
7899
7900 start = entry->vme_start;
7901
7902 basic->offset = (uint32_t)entry->offset;
7903 basic->protection = entry->protection;
7904 basic->inheritance = entry->inheritance;
7905 basic->max_protection = entry->max_protection;
7906 basic->behavior = entry->behavior;
7907 basic->user_wired_count = entry->user_wired_count;
7908 basic->reserved = entry->is_sub_map;
7909 *address = start;
7910 *size = (entry->vme_end - start);
7911
7912 if (object_name) *object_name = IP_NULL;
7913 if (entry->is_sub_map) {
7914 basic->shared = FALSE;
7915 } else {
7916 basic->shared = entry->is_shared;
7917 }
7918
7919 vm_map_unlock_read(map);
7920 return(KERN_SUCCESS);
7921 }
7922
7923 case VM_REGION_BASIC_INFO_64:
7924 {
7925 vm_region_basic_info_64_t basic;
7926
7927 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
7928 return(KERN_INVALID_ARGUMENT);
7929
7930 basic = (vm_region_basic_info_64_t) info;
7931 *count = VM_REGION_BASIC_INFO_COUNT_64;
7932
7933 vm_map_lock_read(map);
7934
7935 start = *address;
7936 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7937 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7938 vm_map_unlock_read(map);
7939 return(KERN_INVALID_ADDRESS);
7940 }
7941 } else {
7942 entry = tmp_entry;
7943 }
7944
7945 start = entry->vme_start;
7946
7947 basic->offset = entry->offset;
7948 basic->protection = entry->protection;
7949 basic->inheritance = entry->inheritance;
7950 basic->max_protection = entry->max_protection;
7951 basic->behavior = entry->behavior;
7952 basic->user_wired_count = entry->user_wired_count;
7953 basic->reserved = entry->is_sub_map;
7954 *address = start;
7955 *size = (entry->vme_end - start);
7956
7957 if (object_name) *object_name = IP_NULL;
7958 if (entry->is_sub_map) {
7959 basic->shared = FALSE;
7960 } else {
7961 basic->shared = entry->is_shared;
7962 }
7963
7964 vm_map_unlock_read(map);
7965 return(KERN_SUCCESS);
7966 }
7967 case VM_REGION_EXTENDED_INFO:
7968 {
7969 vm_region_extended_info_t extended;
7970
7971 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
7972 return(KERN_INVALID_ARGUMENT);
7973
7974 extended = (vm_region_extended_info_t) info;
7975 *count = VM_REGION_EXTENDED_INFO_COUNT;
7976
7977 vm_map_lock_read(map);
7978
7979 start = *address;
7980 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
7981 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
7982 vm_map_unlock_read(map);
7983 return(KERN_INVALID_ADDRESS);
7984 }
7985 } else {
7986 entry = tmp_entry;
7987 }
7988 start = entry->vme_start;
7989
7990 extended->protection = entry->protection;
7991 extended->user_tag = entry->alias;
7992 extended->pages_resident = 0;
7993 extended->pages_swapped_out = 0;
7994 extended->pages_shared_now_private = 0;
7995 extended->pages_dirtied = 0;
7996 extended->external_pager = 0;
7997 extended->shadow_depth = 0;
7998
7999 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended);
8000
8001 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8002 extended->share_mode = SM_PRIVATE;
8003
8004 if (object_name)
8005 *object_name = IP_NULL;
8006 *address = start;
8007 *size = (entry->vme_end - start);
8008
8009 vm_map_unlock_read(map);
8010 return(KERN_SUCCESS);
8011 }
8012 case VM_REGION_TOP_INFO:
8013 {
8014 vm_region_top_info_t top;
8015
8016 if (*count < VM_REGION_TOP_INFO_COUNT)
8017 return(KERN_INVALID_ARGUMENT);
8018
8019 top = (vm_region_top_info_t) info;
8020 *count = VM_REGION_TOP_INFO_COUNT;
8021
8022 vm_map_lock_read(map);
8023
8024 start = *address;
8025 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8026 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8027 vm_map_unlock_read(map);
8028 return(KERN_INVALID_ADDRESS);
8029 }
8030 } else {
8031 entry = tmp_entry;
8032
8033 }
8034 start = entry->vme_start;
8035
8036 top->private_pages_resident = 0;
8037 top->shared_pages_resident = 0;
8038
8039 vm_map_region_top_walk(entry, top);
8040
8041 if (object_name)
8042 *object_name = IP_NULL;
8043 *address = start;
8044 *size = (entry->vme_end - start);
8045
8046 vm_map_unlock_read(map);
8047 return(KERN_SUCCESS);
8048 }
8049 default:
8050 return(KERN_INVALID_ARGUMENT);
8051 }
8052 }
8053
8054 void
8055 vm_map_region_top_walk(
8056 vm_map_entry_t entry,
8057 vm_region_top_info_t top)
8058 {
8059 register struct vm_object *obj, *tmp_obj;
8060 register int ref_count;
8061
8062 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8063 top->share_mode = SM_EMPTY;
8064 top->ref_count = 0;
8065 top->obj_id = 0;
8066 return;
8067 }
8068 {
8069 obj = entry->object.vm_object;
8070
8071 vm_object_lock(obj);
8072
8073 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8074 ref_count--;
8075
8076 if (obj->shadow) {
8077 if (ref_count == 1)
8078 top->private_pages_resident = obj->resident_page_count;
8079 else
8080 top->shared_pages_resident = obj->resident_page_count;
8081 top->ref_count = ref_count;
8082 top->share_mode = SM_COW;
8083
8084 while ((tmp_obj = obj->shadow)) {
8085 vm_object_lock(tmp_obj);
8086 vm_object_unlock(obj);
8087 obj = tmp_obj;
8088
8089 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8090 ref_count--;
8091
8092 top->shared_pages_resident += obj->resident_page_count;
8093 top->ref_count += ref_count - 1;
8094 }
8095 } else {
8096 if (entry->needs_copy) {
8097 top->share_mode = SM_COW;
8098 top->shared_pages_resident = obj->resident_page_count;
8099 } else {
8100 if (ref_count == 1 ||
8101 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
8102 top->share_mode = SM_PRIVATE;
8103 top->private_pages_resident = obj->resident_page_count;
8104 } else {
8105 top->share_mode = SM_SHARED;
8106 top->shared_pages_resident = obj->resident_page_count;
8107 }
8108 }
8109 top->ref_count = ref_count;
8110 }
8111 top->obj_id = (int)obj;
8112
8113 vm_object_unlock(obj);
8114 }
8115 }
8116
8117 void
8118 vm_map_region_walk(
8119 vm_map_t map,
8120 vm_map_offset_t va,
8121 vm_map_entry_t entry,
8122 vm_object_offset_t offset,
8123 vm_object_size_t range,
8124 vm_region_extended_info_t extended)
8125 {
8126 register struct vm_object *obj, *tmp_obj;
8127 register vm_map_offset_t last_offset;
8128 register int i;
8129 register int ref_count;
8130 struct vm_object *shadow_object;
8131 int shadow_depth;
8132
8133 if ((entry->object.vm_object == 0) ||
8134 (entry->is_sub_map) ||
8135 (entry->object.vm_object->phys_contiguous)) {
8136 extended->share_mode = SM_EMPTY;
8137 extended->ref_count = 0;
8138 return;
8139 }
8140 {
8141 obj = entry->object.vm_object;
8142
8143 vm_object_lock(obj);
8144
8145 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8146 ref_count--;
8147
8148 for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE)
8149 vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended);
8150
8151 shadow_object = obj->shadow;
8152 shadow_depth = 0;
8153 if (shadow_object != VM_OBJECT_NULL) {
8154 vm_object_lock(shadow_object);
8155 for (;
8156 shadow_object != VM_OBJECT_NULL;
8157 shadow_depth++) {
8158 vm_object_t next_shadow;
8159
8160 next_shadow = shadow_object->shadow;
8161 if (next_shadow) {
8162 vm_object_lock(next_shadow);
8163 }
8164 vm_object_unlock(shadow_object);
8165 shadow_object = next_shadow;
8166 }
8167 }
8168 extended->shadow_depth = shadow_depth;
8169
8170 if (extended->shadow_depth || entry->needs_copy)
8171 extended->share_mode = SM_COW;
8172 else {
8173 if (ref_count == 1)
8174 extended->share_mode = SM_PRIVATE;
8175 else {
8176 if (obj->true_share)
8177 extended->share_mode = SM_TRUESHARED;
8178 else
8179 extended->share_mode = SM_SHARED;
8180 }
8181 }
8182 extended->ref_count = ref_count - extended->shadow_depth;
8183
8184 for (i = 0; i < extended->shadow_depth; i++) {
8185 if ((tmp_obj = obj->shadow) == 0)
8186 break;
8187 vm_object_lock(tmp_obj);
8188 vm_object_unlock(obj);
8189
8190 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
8191 ref_count--;
8192
8193 extended->ref_count += ref_count;
8194 obj = tmp_obj;
8195 }
8196 vm_object_unlock(obj);
8197
8198 if (extended->share_mode == SM_SHARED) {
8199 register vm_map_entry_t cur;
8200 register vm_map_entry_t last;
8201 int my_refs;
8202
8203 obj = entry->object.vm_object;
8204 last = vm_map_to_entry(map);
8205 my_refs = 0;
8206
8207 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8208 ref_count--;
8209 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
8210 my_refs += vm_map_region_count_obj_refs(cur, obj);
8211
8212 if (my_refs == ref_count)
8213 extended->share_mode = SM_PRIVATE_ALIASED;
8214 else if (my_refs > 1)
8215 extended->share_mode = SM_SHARED_ALIASED;
8216 }
8217 }
8218 }
8219
8220
8221 /* object is locked on entry and locked on return */
8222
8223
8224 static void
8225 vm_map_region_look_for_page(
8226 __unused vm_map_t map,
8227 __unused vm_map_offset_t va,
8228 vm_object_t object,
8229 vm_object_offset_t offset,
8230 int max_refcnt,
8231 int depth,
8232 vm_region_extended_info_t extended)
8233 {
8234 register vm_page_t p;
8235 register vm_object_t shadow;
8236 register int ref_count;
8237 vm_object_t caller_object;
8238
8239 shadow = object->shadow;
8240 caller_object = object;
8241
8242
8243 while (TRUE) {
8244
8245 if ( !(object->pager_trusted) && !(object->internal))
8246 extended->external_pager = 1;
8247
8248 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
8249 if (shadow && (max_refcnt == 1))
8250 extended->pages_shared_now_private++;
8251
8252 if (!p->fictitious &&
8253 (p->dirty || pmap_is_modified(p->phys_page)))
8254 extended->pages_dirtied++;
8255
8256 extended->pages_resident++;
8257
8258 if(object != caller_object)
8259 vm_object_unlock(object);
8260
8261 return;
8262 }
8263 if (object->existence_map) {
8264 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
8265
8266 extended->pages_swapped_out++;
8267
8268 if(object != caller_object)
8269 vm_object_unlock(object);
8270
8271 return;
8272 }
8273 }
8274 if (shadow) {
8275 vm_object_lock(shadow);
8276
8277 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
8278 ref_count--;
8279
8280 if (++depth > extended->shadow_depth)
8281 extended->shadow_depth = depth;
8282
8283 if (ref_count > max_refcnt)
8284 max_refcnt = ref_count;
8285
8286 if(object != caller_object)
8287 vm_object_unlock(object);
8288
8289 offset = offset + object->shadow_offset;
8290 object = shadow;
8291 shadow = object->shadow;
8292 continue;
8293 }
8294 if(object != caller_object)
8295 vm_object_unlock(object);
8296 break;
8297 }
8298 }
8299
8300 static int
8301 vm_map_region_count_obj_refs(
8302 vm_map_entry_t entry,
8303 vm_object_t object)
8304 {
8305 register int ref_count;
8306 register vm_object_t chk_obj;
8307 register vm_object_t tmp_obj;
8308
8309 if (entry->object.vm_object == 0)
8310 return(0);
8311
8312 if (entry->is_sub_map)
8313 return(0);
8314 else {
8315 ref_count = 0;
8316
8317 chk_obj = entry->object.vm_object;
8318 vm_object_lock(chk_obj);
8319
8320 while (chk_obj) {
8321 if (chk_obj == object)
8322 ref_count++;
8323 tmp_obj = chk_obj->shadow;
8324 if (tmp_obj)
8325 vm_object_lock(tmp_obj);
8326 vm_object_unlock(chk_obj);
8327
8328 chk_obj = tmp_obj;
8329 }
8330 }
8331 return(ref_count);
8332 }
8333
8334
8335 /*
8336 * Routine: vm_map_simplify
8337 *
8338 * Description:
8339 * Attempt to simplify the map representation in
8340 * the vicinity of the given starting address.
8341 * Note:
8342 * This routine is intended primarily to keep the
8343 * kernel maps more compact -- they generally don't
8344 * benefit from the "expand a map entry" technology
8345 * at allocation time because the adjacent entry
8346 * is often wired down.
8347 */
8348 void
8349 vm_map_simplify_entry(
8350 vm_map_t map,
8351 vm_map_entry_t this_entry)
8352 {
8353 vm_map_entry_t prev_entry;
8354
8355 counter(c_vm_map_simplify_entry_called++);
8356
8357 prev_entry = this_entry->vme_prev;
8358
8359 if ((this_entry != vm_map_to_entry(map)) &&
8360 (prev_entry != vm_map_to_entry(map)) &&
8361
8362 (prev_entry->vme_end == this_entry->vme_start) &&
8363
8364 (prev_entry->is_sub_map == FALSE) &&
8365 (this_entry->is_sub_map == FALSE) &&
8366
8367 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
8368 ((prev_entry->offset + (prev_entry->vme_end -
8369 prev_entry->vme_start))
8370 == this_entry->offset) &&
8371
8372 (prev_entry->inheritance == this_entry->inheritance) &&
8373 (prev_entry->protection == this_entry->protection) &&
8374 (prev_entry->max_protection == this_entry->max_protection) &&
8375 (prev_entry->behavior == this_entry->behavior) &&
8376 (prev_entry->alias == this_entry->alias) &&
8377 (prev_entry->wired_count == this_entry->wired_count) &&
8378 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
8379
8380 (prev_entry->needs_copy == this_entry->needs_copy) &&
8381
8382 (prev_entry->use_pmap == FALSE) &&
8383 (this_entry->use_pmap == FALSE) &&
8384 (prev_entry->in_transition == FALSE) &&
8385 (this_entry->in_transition == FALSE) &&
8386 (prev_entry->needs_wakeup == FALSE) &&
8387 (this_entry->needs_wakeup == FALSE) &&
8388 (prev_entry->is_shared == FALSE) &&
8389 (this_entry->is_shared == FALSE)
8390 ) {
8391 _vm_map_entry_unlink(&map->hdr, prev_entry);
8392 this_entry->vme_start = prev_entry->vme_start;
8393 this_entry->offset = prev_entry->offset;
8394 vm_object_deallocate(prev_entry->object.vm_object);
8395 vm_map_entry_dispose(map, prev_entry);
8396 SAVE_HINT_MAP_WRITE(map, this_entry);
8397 counter(c_vm_map_simplified++);
8398 }
8399 }
8400
8401 void
8402 vm_map_simplify(
8403 vm_map_t map,
8404 vm_map_offset_t start)
8405 {
8406 vm_map_entry_t this_entry;
8407
8408 vm_map_lock(map);
8409 if (vm_map_lookup_entry(map, start, &this_entry)) {
8410 vm_map_simplify_entry(map, this_entry);
8411 vm_map_simplify_entry(map, this_entry->vme_next);
8412 }
8413 counter(c_vm_map_simplify_called++);
8414 vm_map_unlock(map);
8415 }
8416
8417 static void
8418 vm_map_simplify_range(
8419 vm_map_t map,
8420 vm_map_offset_t start,
8421 vm_map_offset_t end)
8422 {
8423 vm_map_entry_t entry;
8424
8425 /*
8426 * The map should be locked (for "write") by the caller.
8427 */
8428
8429 if (start >= end) {
8430 /* invalid address range */
8431 return;
8432 }
8433
8434 if (!vm_map_lookup_entry(map, start, &entry)) {
8435 /* "start" is not mapped and "entry" ends before "start" */
8436 if (entry == vm_map_to_entry(map)) {
8437 /* start with first entry in the map */
8438 entry = vm_map_first_entry(map);
8439 } else {
8440 /* start with next entry */
8441 entry = entry->vme_next;
8442 }
8443 }
8444
8445 while (entry != vm_map_to_entry(map) &&
8446 entry->vme_start <= end) {
8447 /* try and coalesce "entry" with its previous entry */
8448 vm_map_simplify_entry(map, entry);
8449 entry = entry->vme_next;
8450 }
8451 }
8452
8453
8454 /*
8455 * Routine: vm_map_machine_attribute
8456 * Purpose:
8457 * Provide machine-specific attributes to mappings,
8458 * such as cachability etc. for machines that provide
8459 * them. NUMA architectures and machines with big/strange
8460 * caches will use this.
8461 * Note:
8462 * Responsibilities for locking and checking are handled here,
8463 * everything else in the pmap module. If any non-volatile
8464 * information must be kept, the pmap module should handle
8465 * it itself. [This assumes that attributes do not
8466 * need to be inherited, which seems ok to me]
8467 */
8468 kern_return_t
8469 vm_map_machine_attribute(
8470 vm_map_t map,
8471 vm_map_offset_t start,
8472 vm_map_offset_t end,
8473 vm_machine_attribute_t attribute,
8474 vm_machine_attribute_val_t* value) /* IN/OUT */
8475 {
8476 kern_return_t ret;
8477 vm_map_size_t sync_size;
8478 vm_map_entry_t entry;
8479
8480 if (start < vm_map_min(map) || end > vm_map_max(map))
8481 return KERN_INVALID_ADDRESS;
8482
8483 /* Figure how much memory we need to flush (in page increments) */
8484 sync_size = end - start;
8485
8486 vm_map_lock(map);
8487
8488 if (attribute != MATTR_CACHE) {
8489 /* If we don't have to find physical addresses, we */
8490 /* don't have to do an explicit traversal here. */
8491 ret = pmap_attribute(map->pmap, start, end-start,
8492 attribute, value);
8493 vm_map_unlock(map);
8494 return ret;
8495 }
8496
8497 ret = KERN_SUCCESS; /* Assume it all worked */
8498
8499 while(sync_size) {
8500 if (vm_map_lookup_entry(map, start, &entry)) {
8501 vm_map_size_t sub_size;
8502 if((entry->vme_end - start) > sync_size) {
8503 sub_size = sync_size;
8504 sync_size = 0;
8505 } else {
8506 sub_size = entry->vme_end - start;
8507 sync_size -= sub_size;
8508 }
8509 if(entry->is_sub_map) {
8510 vm_map_offset_t sub_start;
8511 vm_map_offset_t sub_end;
8512
8513 sub_start = (start - entry->vme_start)
8514 + entry->offset;
8515 sub_end = sub_start + sub_size;
8516 vm_map_machine_attribute(
8517 entry->object.sub_map,
8518 sub_start,
8519 sub_end,
8520 attribute, value);
8521 } else {
8522 if(entry->object.vm_object) {
8523 vm_page_t m;
8524 vm_object_t object;
8525 vm_object_t base_object;
8526 vm_object_t last_object;
8527 vm_object_offset_t offset;
8528 vm_object_offset_t base_offset;
8529 vm_map_size_t range;
8530 range = sub_size;
8531 offset = (start - entry->vme_start)
8532 + entry->offset;
8533 base_offset = offset;
8534 object = entry->object.vm_object;
8535 base_object = object;
8536 last_object = NULL;
8537
8538 vm_object_lock(object);
8539
8540 while (range) {
8541 m = vm_page_lookup(
8542 object, offset);
8543
8544 if (m && !m->fictitious) {
8545 ret =
8546 pmap_attribute_cache_sync(
8547 m->phys_page,
8548 PAGE_SIZE,
8549 attribute, value);
8550
8551 } else if (object->shadow) {
8552 offset = offset + object->shadow_offset;
8553 last_object = object;
8554 object = object->shadow;
8555 vm_object_lock(last_object->shadow);
8556 vm_object_unlock(last_object);
8557 continue;
8558 }
8559 range -= PAGE_SIZE;
8560
8561 if (base_object != object) {
8562 vm_object_unlock(object);
8563 vm_object_lock(base_object);
8564 object = base_object;
8565 }
8566 /* Bump to the next page */
8567 base_offset += PAGE_SIZE;
8568 offset = base_offset;
8569 }
8570 vm_object_unlock(object);
8571 }
8572 }
8573 start += sub_size;
8574 } else {
8575 vm_map_unlock(map);
8576 return KERN_FAILURE;
8577 }
8578
8579 }
8580
8581 vm_map_unlock(map);
8582
8583 return ret;
8584 }
8585
8586 /*
8587 * vm_map_behavior_set:
8588 *
8589 * Sets the paging reference behavior of the specified address
8590 * range in the target map. Paging reference behavior affects
8591 * how pagein operations resulting from faults on the map will be
8592 * clustered.
8593 */
8594 kern_return_t
8595 vm_map_behavior_set(
8596 vm_map_t map,
8597 vm_map_offset_t start,
8598 vm_map_offset_t end,
8599 vm_behavior_t new_behavior)
8600 {
8601 register vm_map_entry_t entry;
8602 vm_map_entry_t temp_entry;
8603
8604 XPR(XPR_VM_MAP,
8605 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
8606 (integer_t)map, start, end, new_behavior, 0);
8607
8608 switch (new_behavior) {
8609 case VM_BEHAVIOR_DEFAULT:
8610 case VM_BEHAVIOR_RANDOM:
8611 case VM_BEHAVIOR_SEQUENTIAL:
8612 case VM_BEHAVIOR_RSEQNTL:
8613 break;
8614 case VM_BEHAVIOR_WILLNEED:
8615 case VM_BEHAVIOR_DONTNEED:
8616 new_behavior = VM_BEHAVIOR_DEFAULT;
8617 break;
8618 default:
8619 return(KERN_INVALID_ARGUMENT);
8620 }
8621
8622 vm_map_lock(map);
8623
8624 /*
8625 * The entire address range must be valid for the map.
8626 * Note that vm_map_range_check() does a
8627 * vm_map_lookup_entry() internally and returns the
8628 * entry containing the start of the address range if
8629 * the entire range is valid.
8630 */
8631 if (vm_map_range_check(map, start, end, &temp_entry)) {
8632 entry = temp_entry;
8633 vm_map_clip_start(map, entry, start);
8634 }
8635 else {
8636 vm_map_unlock(map);
8637 return(KERN_INVALID_ADDRESS);
8638 }
8639
8640 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
8641 vm_map_clip_end(map, entry, end);
8642
8643 entry->behavior = new_behavior;
8644
8645 entry = entry->vme_next;
8646 }
8647
8648 vm_map_unlock(map);
8649 return(KERN_SUCCESS);
8650 }
8651
8652
8653 #include <mach_kdb.h>
8654 #if MACH_KDB
8655 #include <ddb/db_output.h>
8656 #include <vm/vm_print.h>
8657
8658 #define printf db_printf
8659
8660 /*
8661 * Forward declarations for internal functions.
8662 */
8663 extern void vm_map_links_print(
8664 struct vm_map_links *links);
8665
8666 extern void vm_map_header_print(
8667 struct vm_map_header *header);
8668
8669 extern void vm_map_entry_print(
8670 vm_map_entry_t entry);
8671
8672 extern void vm_follow_entry(
8673 vm_map_entry_t entry);
8674
8675 extern void vm_follow_map(
8676 vm_map_t map);
8677
8678 /*
8679 * vm_map_links_print: [ debug ]
8680 */
8681 void
8682 vm_map_links_print(
8683 struct vm_map_links *links)
8684 {
8685 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
8686 links->prev,
8687 links->next,
8688 (unsigned long long)links->start,
8689 (unsigned long long)links->end);
8690 }
8691
8692 /*
8693 * vm_map_header_print: [ debug ]
8694 */
8695 void
8696 vm_map_header_print(
8697 struct vm_map_header *header)
8698 {
8699 vm_map_links_print(&header->links);
8700 iprintf("nentries = %08X, %sentries_pageable\n",
8701 header->nentries,
8702 (header->entries_pageable ? "" : "!"));
8703 }
8704
8705 /*
8706 * vm_follow_entry: [ debug ]
8707 */
8708 void
8709 vm_follow_entry(
8710 vm_map_entry_t entry)
8711 {
8712 int shadows;
8713
8714 iprintf("map entry %08X\n", entry);
8715
8716 db_indent += 2;
8717
8718 shadows = vm_follow_object(entry->object.vm_object);
8719 iprintf("Total objects : %d\n",shadows);
8720
8721 db_indent -= 2;
8722 }
8723
8724 /*
8725 * vm_map_entry_print: [ debug ]
8726 */
8727 void
8728 vm_map_entry_print(
8729 register vm_map_entry_t entry)
8730 {
8731 static const char *inheritance_name[4] =
8732 { "share", "copy", "none", "?"};
8733 static const char *behavior_name[4] =
8734 { "dflt", "rand", "seqtl", "rseqntl" };
8735
8736 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
8737
8738 db_indent += 2;
8739
8740 vm_map_links_print(&entry->links);
8741
8742 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
8743 (unsigned long long)entry->vme_start,
8744 (unsigned long long)entry->vme_end,
8745 entry->protection,
8746 entry->max_protection,
8747 inheritance_name[(entry->inheritance & 0x3)]);
8748
8749 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
8750 behavior_name[(entry->behavior & 0x3)],
8751 entry->wired_count,
8752 entry->user_wired_count);
8753 iprintf("%sin_transition, %sneeds_wakeup\n",
8754 (entry->in_transition ? "" : "!"),
8755 (entry->needs_wakeup ? "" : "!"));
8756
8757 if (entry->is_sub_map) {
8758 iprintf("submap = %08X - offset = %016llX\n",
8759 entry->object.sub_map,
8760 (unsigned long long)entry->offset);
8761 } else {
8762 iprintf("object = %08X offset = %016llX - ",
8763 entry->object.vm_object,
8764 (unsigned long long)entry->offset);
8765 printf("%sis_shared, %sneeds_copy\n",
8766 (entry->is_shared ? "" : "!"),
8767 (entry->needs_copy ? "" : "!"));
8768 }
8769
8770 db_indent -= 2;
8771 }
8772
8773 /*
8774 * vm_follow_map: [ debug ]
8775 */
8776 void
8777 vm_follow_map(
8778 vm_map_t map)
8779 {
8780 register vm_map_entry_t entry;
8781
8782 iprintf("task map %08X\n", map);
8783
8784 db_indent += 2;
8785
8786 for (entry = vm_map_first_entry(map);
8787 entry && entry != vm_map_to_entry(map);
8788 entry = entry->vme_next) {
8789 vm_follow_entry(entry);
8790 }
8791
8792 db_indent -= 2;
8793 }
8794
8795 /*
8796 * vm_map_print: [ debug ]
8797 */
8798 void
8799 vm_map_print(
8800 db_addr_t inmap)
8801 {
8802 register vm_map_entry_t entry;
8803 vm_map_t map;
8804 #if TASK_SWAPPER
8805 char *swstate;
8806 #endif /* TASK_SWAPPER */
8807
8808 map = (vm_map_t)(long)
8809 inmap; /* Make sure we have the right type */
8810
8811 iprintf("task map %08X\n", map);
8812
8813 db_indent += 2;
8814
8815 vm_map_header_print(&map->hdr);
8816
8817 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
8818 map->pmap,
8819 map->size,
8820 map->ref_count,
8821 map->hint,
8822 map->first_free);
8823
8824 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
8825 (map->wait_for_space ? "" : "!"),
8826 (map->wiring_required ? "" : "!"),
8827 map->timestamp);
8828
8829 #if TASK_SWAPPER
8830 switch (map->sw_state) {
8831 case MAP_SW_IN:
8832 swstate = "SW_IN";
8833 break;
8834 case MAP_SW_OUT:
8835 swstate = "SW_OUT";
8836 break;
8837 default:
8838 swstate = "????";
8839 break;
8840 }
8841 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
8842 #endif /* TASK_SWAPPER */
8843
8844 for (entry = vm_map_first_entry(map);
8845 entry && entry != vm_map_to_entry(map);
8846 entry = entry->vme_next) {
8847 vm_map_entry_print(entry);
8848 }
8849
8850 db_indent -= 2;
8851 }
8852
8853 /*
8854 * Routine: vm_map_copy_print
8855 * Purpose:
8856 * Pretty-print a copy object for ddb.
8857 */
8858
8859 void
8860 vm_map_copy_print(
8861 db_addr_t incopy)
8862 {
8863 vm_map_copy_t copy;
8864 vm_map_entry_t entry;
8865
8866 copy = (vm_map_copy_t)(long)
8867 incopy; /* Make sure we have the right type */
8868
8869 printf("copy object 0x%x\n", copy);
8870
8871 db_indent += 2;
8872
8873 iprintf("type=%d", copy->type);
8874 switch (copy->type) {
8875 case VM_MAP_COPY_ENTRY_LIST:
8876 printf("[entry_list]");
8877 break;
8878
8879 case VM_MAP_COPY_OBJECT:
8880 printf("[object]");
8881 break;
8882
8883 case VM_MAP_COPY_KERNEL_BUFFER:
8884 printf("[kernel_buffer]");
8885 break;
8886
8887 default:
8888 printf("[bad type]");
8889 break;
8890 }
8891 printf(", offset=0x%llx", (unsigned long long)copy->offset);
8892 printf(", size=0x%x\n", copy->size);
8893
8894 switch (copy->type) {
8895 case VM_MAP_COPY_ENTRY_LIST:
8896 vm_map_header_print(&copy->cpy_hdr);
8897 for (entry = vm_map_copy_first_entry(copy);
8898 entry && entry != vm_map_copy_to_entry(copy);
8899 entry = entry->vme_next) {
8900 vm_map_entry_print(entry);
8901 }
8902 break;
8903
8904 case VM_MAP_COPY_OBJECT:
8905 iprintf("object=0x%x\n", copy->cpy_object);
8906 break;
8907
8908 case VM_MAP_COPY_KERNEL_BUFFER:
8909 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
8910 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
8911 break;
8912
8913 }
8914
8915 db_indent -=2;
8916 }
8917
8918 /*
8919 * db_vm_map_total_size(map) [ debug ]
8920 *
8921 * return the total virtual size (in bytes) of the map
8922 */
8923 vm_map_size_t
8924 db_vm_map_total_size(
8925 db_addr_t inmap)
8926 {
8927 vm_map_entry_t entry;
8928 vm_map_size_t total;
8929 vm_map_t map;
8930
8931 map = (vm_map_t)(long)
8932 inmap; /* Make sure we have the right type */
8933
8934 total = 0;
8935 for (entry = vm_map_first_entry(map);
8936 entry != vm_map_to_entry(map);
8937 entry = entry->vme_next) {
8938 total += entry->vme_end - entry->vme_start;
8939 }
8940
8941 return total;
8942 }
8943
8944 #endif /* MACH_KDB */
8945
8946 /*
8947 * Routine: vm_map_entry_insert
8948 *
8949 * Descritpion: This routine inserts a new vm_entry in a locked map.
8950 */
8951 vm_map_entry_t
8952 vm_map_entry_insert(
8953 vm_map_t map,
8954 vm_map_entry_t insp_entry,
8955 vm_map_offset_t start,
8956 vm_map_offset_t end,
8957 vm_object_t object,
8958 vm_object_offset_t offset,
8959 boolean_t needs_copy,
8960 boolean_t is_shared,
8961 boolean_t in_transition,
8962 vm_prot_t cur_protection,
8963 vm_prot_t max_protection,
8964 vm_behavior_t behavior,
8965 vm_inherit_t inheritance,
8966 unsigned wired_count)
8967 {
8968 vm_map_entry_t new_entry;
8969
8970 assert(insp_entry != (vm_map_entry_t)0);
8971
8972 new_entry = vm_map_entry_create(map);
8973
8974 new_entry->vme_start = start;
8975 new_entry->vme_end = end;
8976 assert(page_aligned(new_entry->vme_start));
8977 assert(page_aligned(new_entry->vme_end));
8978
8979 new_entry->object.vm_object = object;
8980 new_entry->offset = offset;
8981 new_entry->is_shared = is_shared;
8982 new_entry->is_sub_map = FALSE;
8983 new_entry->needs_copy = needs_copy;
8984 new_entry->in_transition = in_transition;
8985 new_entry->needs_wakeup = FALSE;
8986 new_entry->inheritance = inheritance;
8987 new_entry->protection = cur_protection;
8988 new_entry->max_protection = max_protection;
8989 new_entry->behavior = behavior;
8990 new_entry->wired_count = wired_count;
8991 new_entry->user_wired_count = 0;
8992 new_entry->use_pmap = FALSE;
8993 new_entry->alias = 0;
8994
8995 /*
8996 * Insert the new entry into the list.
8997 */
8998
8999 vm_map_entry_link(map, insp_entry, new_entry);
9000 map->size += end - start;
9001
9002 /*
9003 * Update the free space hint and the lookup hint.
9004 */
9005
9006 SAVE_HINT_MAP_WRITE(map, new_entry);
9007 return new_entry;
9008 }
9009
9010 /*
9011 * Routine: vm_map_remap_extract
9012 *
9013 * Descritpion: This routine returns a vm_entry list from a map.
9014 */
9015 static kern_return_t
9016 vm_map_remap_extract(
9017 vm_map_t map,
9018 vm_map_offset_t addr,
9019 vm_map_size_t size,
9020 boolean_t copy,
9021 struct vm_map_header *map_header,
9022 vm_prot_t *cur_protection,
9023 vm_prot_t *max_protection,
9024 /* What, no behavior? */
9025 vm_inherit_t inheritance,
9026 boolean_t pageable)
9027 {
9028 kern_return_t result;
9029 vm_map_size_t mapped_size;
9030 vm_map_size_t tmp_size;
9031 vm_map_entry_t src_entry; /* result of last map lookup */
9032 vm_map_entry_t new_entry;
9033 vm_object_offset_t offset;
9034 vm_map_offset_t map_address;
9035 vm_map_offset_t src_start; /* start of entry to map */
9036 vm_map_offset_t src_end; /* end of region to be mapped */
9037 vm_object_t object;
9038 vm_map_version_t version;
9039 boolean_t src_needs_copy;
9040 boolean_t new_entry_needs_copy;
9041
9042 assert(map != VM_MAP_NULL);
9043 assert(size != 0 && size == vm_map_round_page(size));
9044 assert(inheritance == VM_INHERIT_NONE ||
9045 inheritance == VM_INHERIT_COPY ||
9046 inheritance == VM_INHERIT_SHARE);
9047
9048 /*
9049 * Compute start and end of region.
9050 */
9051 src_start = vm_map_trunc_page(addr);
9052 src_end = vm_map_round_page(src_start + size);
9053
9054 /*
9055 * Initialize map_header.
9056 */
9057 map_header->links.next = (struct vm_map_entry *)&map_header->links;
9058 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
9059 map_header->nentries = 0;
9060 map_header->entries_pageable = pageable;
9061
9062 *cur_protection = VM_PROT_ALL;
9063 *max_protection = VM_PROT_ALL;
9064
9065 map_address = 0;
9066 mapped_size = 0;
9067 result = KERN_SUCCESS;
9068
9069 /*
9070 * The specified source virtual space might correspond to
9071 * multiple map entries, need to loop on them.
9072 */
9073 vm_map_lock(map);
9074 while (mapped_size != size) {
9075 vm_map_size_t entry_size;
9076
9077 /*
9078 * Find the beginning of the region.
9079 */
9080 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
9081 result = KERN_INVALID_ADDRESS;
9082 break;
9083 }
9084
9085 if (src_start < src_entry->vme_start ||
9086 (mapped_size && src_start != src_entry->vme_start)) {
9087 result = KERN_INVALID_ADDRESS;
9088 break;
9089 }
9090
9091 if(src_entry->is_sub_map) {
9092 result = KERN_INVALID_ADDRESS;
9093 break;
9094 }
9095
9096 tmp_size = size - mapped_size;
9097 if (src_end > src_entry->vme_end)
9098 tmp_size -= (src_end - src_entry->vme_end);
9099
9100 entry_size = (vm_map_size_t)(src_entry->vme_end -
9101 src_entry->vme_start);
9102
9103 if(src_entry->is_sub_map) {
9104 vm_map_reference(src_entry->object.sub_map);
9105 object = VM_OBJECT_NULL;
9106 } else {
9107 object = src_entry->object.vm_object;
9108
9109 if (object == VM_OBJECT_NULL) {
9110 object = vm_object_allocate(entry_size);
9111 src_entry->offset = 0;
9112 src_entry->object.vm_object = object;
9113 } else if (object->copy_strategy !=
9114 MEMORY_OBJECT_COPY_SYMMETRIC) {
9115 /*
9116 * We are already using an asymmetric
9117 * copy, and therefore we already have
9118 * the right object.
9119 */
9120 assert(!src_entry->needs_copy);
9121 } else if (src_entry->needs_copy || object->shadowed ||
9122 (object->internal && !object->true_share &&
9123 !src_entry->is_shared &&
9124 object->size > entry_size)) {
9125
9126 vm_object_shadow(&src_entry->object.vm_object,
9127 &src_entry->offset,
9128 entry_size);
9129
9130 if (!src_entry->needs_copy &&
9131 (src_entry->protection & VM_PROT_WRITE)) {
9132 vm_prot_t prot;
9133
9134 prot = src_entry->protection & ~VM_PROT_WRITE;
9135 #ifdef STACK_ONLY_NX
9136 if (src_entry->alias != VM_MEMORY_STACK && prot)
9137 prot |= VM_PROT_EXECUTE;
9138 #endif
9139 if(map->mapped) {
9140 vm_object_pmap_protect(
9141 src_entry->object.vm_object,
9142 src_entry->offset,
9143 entry_size,
9144 PMAP_NULL,
9145 src_entry->vme_start,
9146 prot);
9147 } else {
9148 pmap_protect(vm_map_pmap(map),
9149 src_entry->vme_start,
9150 src_entry->vme_end,
9151 prot);
9152 }
9153 }
9154
9155 object = src_entry->object.vm_object;
9156 src_entry->needs_copy = FALSE;
9157 }
9158
9159
9160 vm_object_lock(object);
9161 object->ref_count++; /* object ref. for new entry */
9162 VM_OBJ_RES_INCR(object);
9163 if (object->copy_strategy ==
9164 MEMORY_OBJECT_COPY_SYMMETRIC) {
9165 object->copy_strategy =
9166 MEMORY_OBJECT_COPY_DELAY;
9167 }
9168 vm_object_unlock(object);
9169 }
9170
9171 offset = src_entry->offset + (src_start - src_entry->vme_start);
9172
9173 new_entry = _vm_map_entry_create(map_header);
9174 vm_map_entry_copy(new_entry, src_entry);
9175 new_entry->use_pmap = FALSE; /* clr address space specifics */
9176
9177 new_entry->vme_start = map_address;
9178 new_entry->vme_end = map_address + tmp_size;
9179 new_entry->inheritance = inheritance;
9180 new_entry->offset = offset;
9181
9182 /*
9183 * The new region has to be copied now if required.
9184 */
9185 RestartCopy:
9186 if (!copy) {
9187 src_entry->is_shared = TRUE;
9188 new_entry->is_shared = TRUE;
9189 if (!(new_entry->is_sub_map))
9190 new_entry->needs_copy = FALSE;
9191
9192 } else if (src_entry->is_sub_map) {
9193 /* make this a COW sub_map if not already */
9194 new_entry->needs_copy = TRUE;
9195 object = VM_OBJECT_NULL;
9196 } else if (src_entry->wired_count == 0 &&
9197 vm_object_copy_quickly(&new_entry->object.vm_object,
9198 new_entry->offset,
9199 (new_entry->vme_end -
9200 new_entry->vme_start),
9201 &src_needs_copy,
9202 &new_entry_needs_copy)) {
9203
9204 new_entry->needs_copy = new_entry_needs_copy;
9205 new_entry->is_shared = FALSE;
9206
9207 /*
9208 * Handle copy_on_write semantics.
9209 */
9210 if (src_needs_copy && !src_entry->needs_copy) {
9211 vm_prot_t prot;
9212
9213 prot = src_entry->protection & ~VM_PROT_WRITE;
9214 #ifdef STACK_ONLY_NX
9215 if (src_entry->alias != VM_MEMORY_STACK && prot)
9216 prot |= VM_PROT_EXECUTE;
9217 #endif
9218 vm_object_pmap_protect(object,
9219 offset,
9220 entry_size,
9221 ((src_entry->is_shared
9222 || map->mapped) ?
9223 PMAP_NULL : map->pmap),
9224 src_entry->vme_start,
9225 prot);
9226
9227 src_entry->needs_copy = TRUE;
9228 }
9229 /*
9230 * Throw away the old object reference of the new entry.
9231 */
9232 vm_object_deallocate(object);
9233
9234 } else {
9235 new_entry->is_shared = FALSE;
9236
9237 /*
9238 * The map can be safely unlocked since we
9239 * already hold a reference on the object.
9240 *
9241 * Record the timestamp of the map for later
9242 * verification, and unlock the map.
9243 */
9244 version.main_timestamp = map->timestamp;
9245 vm_map_unlock(map); /* Increments timestamp once! */
9246
9247 /*
9248 * Perform the copy.
9249 */
9250 if (src_entry->wired_count > 0) {
9251 vm_object_lock(object);
9252 result = vm_object_copy_slowly(
9253 object,
9254 offset,
9255 entry_size,
9256 THREAD_UNINT,
9257 &new_entry->object.vm_object);
9258
9259 new_entry->offset = 0;
9260 new_entry->needs_copy = FALSE;
9261 } else {
9262 result = vm_object_copy_strategically(
9263 object,
9264 offset,
9265 entry_size,
9266 &new_entry->object.vm_object,
9267 &new_entry->offset,
9268 &new_entry_needs_copy);
9269
9270 new_entry->needs_copy = new_entry_needs_copy;
9271 }
9272
9273 /*
9274 * Throw away the old object reference of the new entry.
9275 */
9276 vm_object_deallocate(object);
9277
9278 if (result != KERN_SUCCESS &&
9279 result != KERN_MEMORY_RESTART_COPY) {
9280 _vm_map_entry_dispose(map_header, new_entry);
9281 break;
9282 }
9283
9284 /*
9285 * Verify that the map has not substantially
9286 * changed while the copy was being made.
9287 */
9288
9289 vm_map_lock(map);
9290 if (version.main_timestamp + 1 != map->timestamp) {
9291 /*
9292 * Simple version comparison failed.
9293 *
9294 * Retry the lookup and verify that the
9295 * same object/offset are still present.
9296 */
9297 vm_object_deallocate(new_entry->
9298 object.vm_object);
9299 _vm_map_entry_dispose(map_header, new_entry);
9300 if (result == KERN_MEMORY_RESTART_COPY)
9301 result = KERN_SUCCESS;
9302 continue;
9303 }
9304
9305 if (result == KERN_MEMORY_RESTART_COPY) {
9306 vm_object_reference(object);
9307 goto RestartCopy;
9308 }
9309 }
9310
9311 _vm_map_entry_link(map_header,
9312 map_header->links.prev, new_entry);
9313
9314 *cur_protection &= src_entry->protection;
9315 *max_protection &= src_entry->max_protection;
9316
9317 map_address += tmp_size;
9318 mapped_size += tmp_size;
9319 src_start += tmp_size;
9320
9321 } /* end while */
9322
9323 vm_map_unlock(map);
9324 if (result != KERN_SUCCESS) {
9325 /*
9326 * Free all allocated elements.
9327 */
9328 for (src_entry = map_header->links.next;
9329 src_entry != (struct vm_map_entry *)&map_header->links;
9330 src_entry = new_entry) {
9331 new_entry = src_entry->vme_next;
9332 _vm_map_entry_unlink(map_header, src_entry);
9333 vm_object_deallocate(src_entry->object.vm_object);
9334 _vm_map_entry_dispose(map_header, src_entry);
9335 }
9336 }
9337 return result;
9338 }
9339
9340 /*
9341 * Routine: vm_remap
9342 *
9343 * Map portion of a task's address space.
9344 * Mapped region must not overlap more than
9345 * one vm memory object. Protections and
9346 * inheritance attributes remain the same
9347 * as in the original task and are out parameters.
9348 * Source and Target task can be identical
9349 * Other attributes are identical as for vm_map()
9350 */
9351 kern_return_t
9352 vm_map_remap(
9353 vm_map_t target_map,
9354 vm_map_address_t *address,
9355 vm_map_size_t size,
9356 vm_map_offset_t mask,
9357 boolean_t anywhere,
9358 vm_map_t src_map,
9359 vm_map_offset_t memory_address,
9360 boolean_t copy,
9361 vm_prot_t *cur_protection,
9362 vm_prot_t *max_protection,
9363 vm_inherit_t inheritance)
9364 {
9365 kern_return_t result;
9366 vm_map_entry_t entry;
9367 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
9368 vm_map_entry_t new_entry;
9369 struct vm_map_header map_header;
9370
9371 if (target_map == VM_MAP_NULL)
9372 return KERN_INVALID_ARGUMENT;
9373
9374 switch (inheritance) {
9375 case VM_INHERIT_NONE:
9376 case VM_INHERIT_COPY:
9377 case VM_INHERIT_SHARE:
9378 if (size != 0 && src_map != VM_MAP_NULL)
9379 break;
9380 /*FALL THRU*/
9381 default:
9382 return KERN_INVALID_ARGUMENT;
9383 }
9384
9385 size = vm_map_round_page(size);
9386
9387 result = vm_map_remap_extract(src_map, memory_address,
9388 size, copy, &map_header,
9389 cur_protection,
9390 max_protection,
9391 inheritance,
9392 target_map->hdr.
9393 entries_pageable);
9394
9395 if (result != KERN_SUCCESS) {
9396 return result;
9397 }
9398
9399 /*
9400 * Allocate/check a range of free virtual address
9401 * space for the target
9402 */
9403 *address = vm_map_trunc_page(*address);
9404 vm_map_lock(target_map);
9405 result = vm_map_remap_range_allocate(target_map, address, size,
9406 mask, anywhere, &insp_entry);
9407
9408 for (entry = map_header.links.next;
9409 entry != (struct vm_map_entry *)&map_header.links;
9410 entry = new_entry) {
9411 new_entry = entry->vme_next;
9412 _vm_map_entry_unlink(&map_header, entry);
9413 if (result == KERN_SUCCESS) {
9414 entry->vme_start += *address;
9415 entry->vme_end += *address;
9416 vm_map_entry_link(target_map, insp_entry, entry);
9417 insp_entry = entry;
9418 } else {
9419 if (!entry->is_sub_map) {
9420 vm_object_deallocate(entry->object.vm_object);
9421 } else {
9422 vm_map_deallocate(entry->object.sub_map);
9423 }
9424 _vm_map_entry_dispose(&map_header, entry);
9425 }
9426 }
9427
9428 if (result == KERN_SUCCESS) {
9429 target_map->size += size;
9430 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
9431 }
9432 vm_map_unlock(target_map);
9433
9434 if (result == KERN_SUCCESS && target_map->wiring_required)
9435 result = vm_map_wire(target_map, *address,
9436 *address + size, *cur_protection, TRUE);
9437 return result;
9438 }
9439
9440 /*
9441 * Routine: vm_map_remap_range_allocate
9442 *
9443 * Description:
9444 * Allocate a range in the specified virtual address map.
9445 * returns the address and the map entry just before the allocated
9446 * range
9447 *
9448 * Map must be locked.
9449 */
9450
9451 static kern_return_t
9452 vm_map_remap_range_allocate(
9453 vm_map_t map,
9454 vm_map_address_t *address, /* IN/OUT */
9455 vm_map_size_t size,
9456 vm_map_offset_t mask,
9457 boolean_t anywhere,
9458 vm_map_entry_t *map_entry) /* OUT */
9459 {
9460 register vm_map_entry_t entry;
9461 register vm_map_offset_t start;
9462 register vm_map_offset_t end;
9463
9464 StartAgain: ;
9465
9466 start = *address;
9467
9468 if (anywhere)
9469 {
9470 /*
9471 * Calculate the first possible address.
9472 */
9473
9474 if (start < map->min_offset)
9475 start = map->min_offset;
9476 if (start > map->max_offset)
9477 return(KERN_NO_SPACE);
9478
9479 /*
9480 * Look for the first possible address;
9481 * if there's already something at this
9482 * address, we have to start after it.
9483 */
9484
9485 assert(first_free_is_valid(map));
9486 if (start == map->min_offset) {
9487 if ((entry = map->first_free) != vm_map_to_entry(map))
9488 start = entry->vme_end;
9489 } else {
9490 vm_map_entry_t tmp_entry;
9491 if (vm_map_lookup_entry(map, start, &tmp_entry))
9492 start = tmp_entry->vme_end;
9493 entry = tmp_entry;
9494 }
9495
9496 /*
9497 * In any case, the "entry" always precedes
9498 * the proposed new region throughout the
9499 * loop:
9500 */
9501
9502 while (TRUE) {
9503 register vm_map_entry_t next;
9504
9505 /*
9506 * Find the end of the proposed new region.
9507 * Be sure we didn't go beyond the end, or
9508 * wrap around the address.
9509 */
9510
9511 end = ((start + mask) & ~mask);
9512 if (end < start)
9513 return(KERN_NO_SPACE);
9514 start = end;
9515 end += size;
9516
9517 if ((end > map->max_offset) || (end < start)) {
9518 if (map->wait_for_space) {
9519 if (size <= (map->max_offset -
9520 map->min_offset)) {
9521 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
9522 vm_map_unlock(map);
9523 thread_block(THREAD_CONTINUE_NULL);
9524 vm_map_lock(map);
9525 goto StartAgain;
9526 }
9527 }
9528
9529 return(KERN_NO_SPACE);
9530 }
9531
9532 /*
9533 * If there are no more entries, we must win.
9534 */
9535
9536 next = entry->vme_next;
9537 if (next == vm_map_to_entry(map))
9538 break;
9539
9540 /*
9541 * If there is another entry, it must be
9542 * after the end of the potential new region.
9543 */
9544
9545 if (next->vme_start >= end)
9546 break;
9547
9548 /*
9549 * Didn't fit -- move to the next entry.
9550 */
9551
9552 entry = next;
9553 start = entry->vme_end;
9554 }
9555 *address = start;
9556 } else {
9557 vm_map_entry_t temp_entry;
9558
9559 /*
9560 * Verify that:
9561 * the address doesn't itself violate
9562 * the mask requirement.
9563 */
9564
9565 if ((start & mask) != 0)
9566 return(KERN_NO_SPACE);
9567
9568
9569 /*
9570 * ... the address is within bounds
9571 */
9572
9573 end = start + size;
9574
9575 if ((start < map->min_offset) ||
9576 (end > map->max_offset) ||
9577 (start >= end)) {
9578 return(KERN_INVALID_ADDRESS);
9579 }
9580
9581 /*
9582 * ... the starting address isn't allocated
9583 */
9584
9585 if (vm_map_lookup_entry(map, start, &temp_entry))
9586 return(KERN_NO_SPACE);
9587
9588 entry = temp_entry;
9589
9590 /*
9591 * ... the next region doesn't overlap the
9592 * end point.
9593 */
9594
9595 if ((entry->vme_next != vm_map_to_entry(map)) &&
9596 (entry->vme_next->vme_start < end))
9597 return(KERN_NO_SPACE);
9598 }
9599 *map_entry = entry;
9600 return(KERN_SUCCESS);
9601 }
9602
9603 /*
9604 * vm_map_switch:
9605 *
9606 * Set the address map for the current thread to the specified map
9607 */
9608
9609 vm_map_t
9610 vm_map_switch(
9611 vm_map_t map)
9612 {
9613 int mycpu;
9614 thread_t thread = current_thread();
9615 vm_map_t oldmap = thread->map;
9616
9617 mp_disable_preemption();
9618 mycpu = cpu_number();
9619
9620 /*
9621 * Deactivate the current map and activate the requested map
9622 */
9623 PMAP_SWITCH_USER(thread, map, mycpu);
9624
9625 mp_enable_preemption();
9626 return(oldmap);
9627 }
9628
9629
9630 /*
9631 * Routine: vm_map_write_user
9632 *
9633 * Description:
9634 * Copy out data from a kernel space into space in the
9635 * destination map. The space must already exist in the
9636 * destination map.
9637 * NOTE: This routine should only be called by threads
9638 * which can block on a page fault. i.e. kernel mode user
9639 * threads.
9640 *
9641 */
9642 kern_return_t
9643 vm_map_write_user(
9644 vm_map_t map,
9645 void *src_p,
9646 vm_map_address_t dst_addr,
9647 vm_size_t size)
9648 {
9649 kern_return_t kr = KERN_SUCCESS;
9650
9651 if(current_map() == map) {
9652 if (copyout(src_p, dst_addr, size)) {
9653 kr = KERN_INVALID_ADDRESS;
9654 }
9655 } else {
9656 vm_map_t oldmap;
9657
9658 /* take on the identity of the target map while doing */
9659 /* the transfer */
9660
9661 vm_map_reference(map);
9662 oldmap = vm_map_switch(map);
9663 if (copyout(src_p, dst_addr, size)) {
9664 kr = KERN_INVALID_ADDRESS;
9665 }
9666 vm_map_switch(oldmap);
9667 vm_map_deallocate(map);
9668 }
9669 return kr;
9670 }
9671
9672 /*
9673 * Routine: vm_map_read_user
9674 *
9675 * Description:
9676 * Copy in data from a user space source map into the
9677 * kernel map. The space must already exist in the
9678 * kernel map.
9679 * NOTE: This routine should only be called by threads
9680 * which can block on a page fault. i.e. kernel mode user
9681 * threads.
9682 *
9683 */
9684 kern_return_t
9685 vm_map_read_user(
9686 vm_map_t map,
9687 vm_map_address_t src_addr,
9688 void *dst_p,
9689 vm_size_t size)
9690 {
9691 kern_return_t kr = KERN_SUCCESS;
9692
9693 if(current_map() == map) {
9694 if (copyin(src_addr, dst_p, size)) {
9695 kr = KERN_INVALID_ADDRESS;
9696 }
9697 } else {
9698 vm_map_t oldmap;
9699
9700 /* take on the identity of the target map while doing */
9701 /* the transfer */
9702
9703 vm_map_reference(map);
9704 oldmap = vm_map_switch(map);
9705 if (copyin(src_addr, dst_p, size)) {
9706 kr = KERN_INVALID_ADDRESS;
9707 }
9708 vm_map_switch(oldmap);
9709 vm_map_deallocate(map);
9710 }
9711 return kr;
9712 }
9713
9714
9715 /*
9716 * vm_map_check_protection:
9717 *
9718 * Assert that the target map allows the specified
9719 * privilege on the entire address region given.
9720 * The entire region must be allocated.
9721 */
9722 boolean_t vm_map_check_protection(map, start, end, protection)
9723 register vm_map_t map;
9724 register vm_map_offset_t start;
9725 register vm_map_offset_t end;
9726 register vm_prot_t protection;
9727 {
9728 register vm_map_entry_t entry;
9729 vm_map_entry_t tmp_entry;
9730
9731 vm_map_lock(map);
9732
9733 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
9734 {
9735 vm_map_unlock(map);
9736 return (FALSE);
9737 }
9738
9739 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9740 vm_map_unlock(map);
9741 return(FALSE);
9742 }
9743
9744 entry = tmp_entry;
9745
9746 while (start < end) {
9747 if (entry == vm_map_to_entry(map)) {
9748 vm_map_unlock(map);
9749 return(FALSE);
9750 }
9751
9752 /*
9753 * No holes allowed!
9754 */
9755
9756 if (start < entry->vme_start) {
9757 vm_map_unlock(map);
9758 return(FALSE);
9759 }
9760
9761 /*
9762 * Check protection associated with entry.
9763 */
9764
9765 if ((entry->protection & protection) != protection) {
9766 vm_map_unlock(map);
9767 return(FALSE);
9768 }
9769
9770 /* go to next entry */
9771
9772 start = entry->vme_end;
9773 entry = entry->vme_next;
9774 }
9775 vm_map_unlock(map);
9776 return(TRUE);
9777 }
9778
9779 kern_return_t
9780 vm_map_purgable_control(
9781 vm_map_t map,
9782 vm_map_offset_t address,
9783 vm_purgable_t control,
9784 int *state)
9785 {
9786 vm_map_entry_t entry;
9787 vm_object_t object;
9788 kern_return_t kr;
9789
9790 /*
9791 * Vet all the input parameters and current type and state of the
9792 * underlaying object. Return with an error if anything is amiss.
9793 */
9794 if (map == VM_MAP_NULL)
9795 return(KERN_INVALID_ARGUMENT);
9796
9797 if (control != VM_PURGABLE_SET_STATE &&
9798 control != VM_PURGABLE_GET_STATE)
9799 return(KERN_INVALID_ARGUMENT);
9800
9801 if (control == VM_PURGABLE_SET_STATE &&
9802 (*state < VM_PURGABLE_STATE_MIN ||
9803 *state > VM_PURGABLE_STATE_MAX))
9804 return(KERN_INVALID_ARGUMENT);
9805
9806 vm_map_lock(map);
9807
9808 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
9809
9810 /*
9811 * Must pass a valid non-submap address.
9812 */
9813 vm_map_unlock(map);
9814 return(KERN_INVALID_ADDRESS);
9815 }
9816
9817 if ((entry->protection & VM_PROT_WRITE) == 0) {
9818 /*
9819 * Can't apply purgable controls to something you can't write.
9820 */
9821 vm_map_unlock(map);
9822 return(KERN_PROTECTION_FAILURE);
9823 }
9824
9825 object = entry->object.vm_object;
9826 if (object == VM_OBJECT_NULL) {
9827 /*
9828 * Object must already be present or it can't be purgable.
9829 */
9830 vm_map_unlock(map);
9831 return KERN_INVALID_ARGUMENT;
9832 }
9833
9834 vm_object_lock(object);
9835
9836 if (entry->offset != 0 ||
9837 entry->vme_end - entry->vme_start != object->size) {
9838 /*
9839 * Can only apply purgable controls to the whole (existing)
9840 * object at once.
9841 */
9842 vm_map_unlock(map);
9843 vm_object_unlock(object);
9844 return KERN_INVALID_ARGUMENT;
9845 }
9846
9847 vm_map_unlock(map);
9848
9849 kr = vm_object_purgable_control(object, control, state);
9850
9851 vm_object_unlock(object);
9852
9853 return kr;
9854 }
9855
9856 kern_return_t
9857 vm_map_page_info(
9858 vm_map_t target_map,
9859 vm_map_offset_t offset,
9860 int *disposition,
9861 int *ref_count)
9862 {
9863 vm_map_entry_t map_entry;
9864 vm_object_t object;
9865 vm_page_t m;
9866
9867 restart_page_query:
9868 *disposition = 0;
9869 *ref_count = 0;
9870 vm_map_lock(target_map);
9871 if(!vm_map_lookup_entry(target_map, offset, &map_entry)) {
9872 vm_map_unlock(target_map);
9873 return KERN_FAILURE;
9874 }
9875 offset -= map_entry->vme_start; /* adjust to offset within entry */
9876 offset += map_entry->offset; /* adjust to target object offset */
9877 if(map_entry->object.vm_object != VM_OBJECT_NULL) {
9878 if(!map_entry->is_sub_map) {
9879 object = map_entry->object.vm_object;
9880 } else {
9881 vm_map_unlock(target_map);
9882 target_map = map_entry->object.sub_map;
9883 goto restart_page_query;
9884 }
9885 } else {
9886 vm_map_unlock(target_map);
9887 return KERN_FAILURE;
9888 }
9889 vm_object_lock(object);
9890 vm_map_unlock(target_map);
9891 while(TRUE) {
9892 m = vm_page_lookup(object, offset);
9893 if (m != VM_PAGE_NULL) {
9894 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
9895 break;
9896 } else {
9897 if(object->shadow) {
9898 offset += object->shadow_offset;
9899 vm_object_unlock(object);
9900 object = object->shadow;
9901 vm_object_lock(object);
9902 continue;
9903 }
9904 vm_object_unlock(object);
9905 return KERN_FAILURE;
9906 }
9907 }
9908
9909 /* The ref_count is not strictly accurate, it measures the number */
9910 /* of entities holding a ref on the object, they may not be mapping */
9911 /* the object or may not be mapping the section holding the */
9912 /* target page but its still a ball park number and though an over- */
9913 /* count, it picks up the copy-on-write cases */
9914
9915 /* We could also get a picture of page sharing from pmap_attributes */
9916 /* but this would under count as only faulted-in mappings would */
9917 /* show up. */
9918
9919 *ref_count = object->ref_count;
9920
9921 if (m->fictitious) {
9922 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
9923 vm_object_unlock(object);
9924 return KERN_SUCCESS;
9925 }
9926
9927 if (m->dirty)
9928 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
9929 else if(pmap_is_modified(m->phys_page))
9930 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
9931
9932 if (m->reference)
9933 *disposition |= VM_PAGE_QUERY_PAGE_REF;
9934 else if(pmap_is_referenced(m->phys_page))
9935 *disposition |= VM_PAGE_QUERY_PAGE_REF;
9936
9937 vm_object_unlock(object);
9938 return KERN_SUCCESS;
9939
9940 }
9941
9942
9943 /* For a given range, check all map entries. If the entry coresponds to */
9944 /* the old vm_region/map provided on the call, replace it with the */
9945 /* corresponding range in the new vm_region/map */
9946 kern_return_t vm_map_region_replace(
9947 vm_map_t target_map,
9948 ipc_port_t old_region,
9949 ipc_port_t new_region,
9950 vm_map_offset_t start,
9951 vm_map_offset_t end)
9952 {
9953 vm_named_entry_t old_object;
9954 vm_named_entry_t new_object;
9955 vm_map_t old_submap;
9956 vm_map_t new_submap;
9957 vm_map_offset_t addr;
9958 vm_map_entry_t entry;
9959 int nested_pmap = 0;
9960
9961
9962 vm_map_lock(target_map);
9963 old_object = (vm_named_entry_t)old_region->ip_kobject;
9964 new_object = (vm_named_entry_t)new_region->ip_kobject;
9965 if((!old_object->is_sub_map) || (!new_object->is_sub_map)) {
9966 vm_map_unlock(target_map);
9967 return KERN_INVALID_ARGUMENT;
9968 }
9969 old_submap = (vm_map_t)old_object->backing.map;
9970 new_submap = (vm_map_t)new_object->backing.map;
9971 vm_map_lock(old_submap);
9972 if((old_submap->min_offset != new_submap->min_offset) ||
9973 (old_submap->max_offset != new_submap->max_offset)) {
9974 vm_map_unlock(old_submap);
9975 vm_map_unlock(target_map);
9976 return KERN_INVALID_ARGUMENT;
9977 }
9978 if(!vm_map_lookup_entry(target_map, start, &entry)) {
9979 /* if the src is not contained, the entry preceeds */
9980 /* our range */
9981 addr = entry->vme_start;
9982 if(entry == vm_map_to_entry(target_map)) {
9983 vm_map_unlock(old_submap);
9984 vm_map_unlock(target_map);
9985 return KERN_SUCCESS;
9986 }
9987 }
9988 if ((entry->use_pmap) &&
9989 (new_submap->pmap == NULL)) {
9990 new_submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
9991 if(new_submap->pmap == PMAP_NULL) {
9992 vm_map_unlock(old_submap);
9993 vm_map_unlock(target_map);
9994 return(KERN_NO_SPACE);
9995 }
9996 }
9997
9998 /*
9999 * Mark the new submap as "mapped", so that we get proper
10000 * cleanup of the sub-pmap when we unmap it.
10001 */
10002 new_submap->mapped = TRUE;
10003
10004 addr = entry->vme_start;
10005 vm_map_reference(old_submap);
10006 while((entry != vm_map_to_entry(target_map)) &&
10007 (entry->vme_start < end)) {
10008 if((entry->is_sub_map) &&
10009 (entry->object.sub_map == old_submap)) {
10010 if(entry->use_pmap) {
10011 if((start & 0x0fffffff) ||
10012 ((end - start) != 0x10000000)) {
10013 vm_map_unlock(old_submap);
10014 vm_map_deallocate(old_submap);
10015 vm_map_unlock(target_map);
10016 return KERN_INVALID_ARGUMENT;
10017 }
10018 nested_pmap = 1;
10019 }
10020 entry->object.sub_map = new_submap;
10021 vm_map_reference(new_submap);
10022 vm_map_deallocate(old_submap);
10023 }
10024 entry = entry->vme_next;
10025 addr = entry->vme_start;
10026 }
10027 if(nested_pmap) {
10028 #ifndef NO_NESTED_PMAP
10029 pmap_unnest(target_map->pmap, (addr64_t)start);
10030 if(target_map->mapped) {
10031 vm_map_submap_pmap_clean(target_map,
10032 start, end, old_submap, 0);
10033 }
10034 pmap_nest(target_map->pmap, new_submap->pmap,
10035 (addr64_t)start, (addr64_t)start,
10036 (uint64_t)(end - start));
10037 #endif /* NO_NESTED_PMAP */
10038 } else {
10039 vm_map_submap_pmap_clean(target_map,
10040 start, end, old_submap, 0);
10041 }
10042 vm_map_unlock(old_submap);
10043 vm_map_deallocate(old_submap);
10044 vm_map_unlock(target_map);
10045 return KERN_SUCCESS;
10046 }
10047
10048 /*
10049 * vm_map_msync
10050 *
10051 * Synchronises the memory range specified with its backing store
10052 * image by either flushing or cleaning the contents to the appropriate
10053 * memory manager engaging in a memory object synchronize dialog with
10054 * the manager. The client doesn't return until the manager issues
10055 * m_o_s_completed message. MIG Magically converts user task parameter
10056 * to the task's address map.
10057 *
10058 * interpretation of sync_flags
10059 * VM_SYNC_INVALIDATE - discard pages, only return precious
10060 * pages to manager.
10061 *
10062 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
10063 * - discard pages, write dirty or precious
10064 * pages back to memory manager.
10065 *
10066 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
10067 * - write dirty or precious pages back to
10068 * the memory manager.
10069 *
10070 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
10071 * is a hole in the region, and we would
10072 * have returned KERN_SUCCESS, return
10073 * KERN_INVALID_ADDRESS instead.
10074 *
10075 * NOTE
10076 * The memory object attributes have not yet been implemented, this
10077 * function will have to deal with the invalidate attribute
10078 *
10079 * RETURNS
10080 * KERN_INVALID_TASK Bad task parameter
10081 * KERN_INVALID_ARGUMENT both sync and async were specified.
10082 * KERN_SUCCESS The usual.
10083 * KERN_INVALID_ADDRESS There was a hole in the region.
10084 */
10085
10086 kern_return_t
10087 vm_map_msync(
10088 vm_map_t map,
10089 vm_map_address_t address,
10090 vm_map_size_t size,
10091 vm_sync_t sync_flags)
10092 {
10093 msync_req_t msr;
10094 msync_req_t new_msr;
10095 queue_chain_t req_q; /* queue of requests for this msync */
10096 vm_map_entry_t entry;
10097 vm_map_size_t amount_left;
10098 vm_object_offset_t offset;
10099 boolean_t do_sync_req;
10100 boolean_t modifiable;
10101 boolean_t had_hole = FALSE;
10102
10103 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
10104 (sync_flags & VM_SYNC_SYNCHRONOUS))
10105 return(KERN_INVALID_ARGUMENT);
10106
10107 /*
10108 * align address and size on page boundaries
10109 */
10110 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
10111 address = vm_map_trunc_page(address);
10112
10113 if (map == VM_MAP_NULL)
10114 return(KERN_INVALID_TASK);
10115
10116 if (size == 0)
10117 return(KERN_SUCCESS);
10118
10119 queue_init(&req_q);
10120 amount_left = size;
10121
10122 while (amount_left > 0) {
10123 vm_object_size_t flush_size;
10124 vm_object_t object;
10125
10126 vm_map_lock(map);
10127 if (!vm_map_lookup_entry(map,
10128 vm_map_trunc_page(address), &entry)) {
10129
10130 vm_size_t skip;
10131
10132 /*
10133 * hole in the address map.
10134 */
10135 had_hole = TRUE;
10136
10137 /*
10138 * Check for empty map.
10139 */
10140 if (entry == vm_map_to_entry(map) &&
10141 entry->vme_next == entry) {
10142 vm_map_unlock(map);
10143 break;
10144 }
10145 /*
10146 * Check that we don't wrap and that
10147 * we have at least one real map entry.
10148 */
10149 if ((map->hdr.nentries == 0) ||
10150 (entry->vme_next->vme_start < address)) {
10151 vm_map_unlock(map);
10152 break;
10153 }
10154 /*
10155 * Move up to the next entry if needed
10156 */
10157 skip = (entry->vme_next->vme_start - address);
10158 if (skip >= amount_left)
10159 amount_left = 0;
10160 else
10161 amount_left -= skip;
10162 address = entry->vme_next->vme_start;
10163 vm_map_unlock(map);
10164 continue;
10165 }
10166
10167 offset = address - entry->vme_start;
10168
10169 /*
10170 * do we have more to flush than is contained in this
10171 * entry ?
10172 */
10173 if (amount_left + entry->vme_start + offset > entry->vme_end) {
10174 flush_size = entry->vme_end -
10175 (entry->vme_start + offset);
10176 } else {
10177 flush_size = amount_left;
10178 }
10179 amount_left -= flush_size;
10180 address += flush_size;
10181
10182 if (entry->is_sub_map == TRUE) {
10183 vm_map_t local_map;
10184 vm_map_offset_t local_offset;
10185
10186 local_map = entry->object.sub_map;
10187 local_offset = entry->offset;
10188 vm_map_unlock(map);
10189 if (vm_map_msync(
10190 local_map,
10191 local_offset,
10192 flush_size,
10193 sync_flags) == KERN_INVALID_ADDRESS) {
10194 had_hole = TRUE;
10195 }
10196 continue;
10197 }
10198 object = entry->object.vm_object;
10199
10200 /*
10201 * We can't sync this object if the object has not been
10202 * created yet
10203 */
10204 if (object == VM_OBJECT_NULL) {
10205 vm_map_unlock(map);
10206 continue;
10207 }
10208 offset += entry->offset;
10209 modifiable = (entry->protection & VM_PROT_WRITE)
10210 != VM_PROT_NONE;
10211
10212 vm_object_lock(object);
10213
10214 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
10215 boolean_t kill_pages = 0;
10216
10217 if (sync_flags & VM_SYNC_KILLPAGES) {
10218 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
10219 kill_pages = 1;
10220 else
10221 kill_pages = -1;
10222 }
10223 if (kill_pages != -1)
10224 vm_object_deactivate_pages(object, offset,
10225 (vm_object_size_t)flush_size, kill_pages);
10226 vm_object_unlock(object);
10227 vm_map_unlock(map);
10228 continue;
10229 }
10230 /*
10231 * We can't sync this object if there isn't a pager.
10232 * Don't bother to sync internal objects, since there can't
10233 * be any "permanent" storage for these objects anyway.
10234 */
10235 if ((object->pager == MEMORY_OBJECT_NULL) ||
10236 (object->internal) || (object->private)) {
10237 vm_object_unlock(object);
10238 vm_map_unlock(map);
10239 continue;
10240 }
10241 /*
10242 * keep reference on the object until syncing is done
10243 */
10244 assert(object->ref_count > 0);
10245 object->ref_count++;
10246 vm_object_res_reference(object);
10247 vm_object_unlock(object);
10248
10249 vm_map_unlock(map);
10250
10251 do_sync_req = vm_object_sync(object,
10252 offset,
10253 flush_size,
10254 sync_flags & VM_SYNC_INVALIDATE,
10255 (modifiable &&
10256 (sync_flags & VM_SYNC_SYNCHRONOUS ||
10257 sync_flags & VM_SYNC_ASYNCHRONOUS)),
10258 sync_flags & VM_SYNC_SYNCHRONOUS);
10259 /*
10260 * only send a m_o_s if we returned pages or if the entry
10261 * is writable (ie dirty pages may have already been sent back)
10262 */
10263 if (!do_sync_req && !modifiable) {
10264 vm_object_deallocate(object);
10265 continue;
10266 }
10267 msync_req_alloc(new_msr);
10268
10269 vm_object_lock(object);
10270 offset += object->paging_offset;
10271
10272 new_msr->offset = offset;
10273 new_msr->length = flush_size;
10274 new_msr->object = object;
10275 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
10276 re_iterate:
10277 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
10278 /*
10279 * need to check for overlapping entry, if found, wait
10280 * on overlapping msr to be done, then reiterate
10281 */
10282 msr_lock(msr);
10283 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
10284 ((offset >= msr->offset &&
10285 offset < (msr->offset + msr->length)) ||
10286 (msr->offset >= offset &&
10287 msr->offset < (offset + flush_size))))
10288 {
10289 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
10290 msr_unlock(msr);
10291 vm_object_unlock(object);
10292 thread_block(THREAD_CONTINUE_NULL);
10293 vm_object_lock(object);
10294 goto re_iterate;
10295 }
10296 msr_unlock(msr);
10297 }/* queue_iterate */
10298
10299 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
10300 vm_object_unlock(object);
10301
10302 queue_enter(&req_q, new_msr, msync_req_t, req_q);
10303
10304 (void) memory_object_synchronize(
10305 object->pager,
10306 offset,
10307 flush_size,
10308 sync_flags & ~VM_SYNC_CONTIGUOUS);
10309 }/* while */
10310
10311 /*
10312 * wait for memory_object_sychronize_completed messages from pager(s)
10313 */
10314
10315 while (!queue_empty(&req_q)) {
10316 msr = (msync_req_t)queue_first(&req_q);
10317 msr_lock(msr);
10318 while(msr->flag != VM_MSYNC_DONE) {
10319 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
10320 msr_unlock(msr);
10321 thread_block(THREAD_CONTINUE_NULL);
10322 msr_lock(msr);
10323 }/* while */
10324 queue_remove(&req_q, msr, msync_req_t, req_q);
10325 msr_unlock(msr);
10326 vm_object_deallocate(msr->object);
10327 msync_req_free(msr);
10328 }/* queue_iterate */
10329
10330 /* for proper msync() behaviour */
10331 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
10332 return(KERN_INVALID_ADDRESS);
10333
10334 return(KERN_SUCCESS);
10335 }/* vm_msync */
10336
10337 /* Takes existing source and destination sub-maps and clones the contents of */
10338 /* the source map */
10339 kern_return_t
10340 vm_region_clone(
10341 ipc_port_t src_region,
10342 ipc_port_t dst_region)
10343 {
10344 vm_named_entry_t src_object;
10345 vm_named_entry_t dst_object;
10346 vm_map_t src_map;
10347 vm_map_t dst_map;
10348 vm_map_offset_t addr;
10349 vm_map_offset_t max_off;
10350 vm_map_entry_t entry;
10351 vm_map_entry_t new_entry;
10352 vm_map_entry_t insert_point;
10353
10354 src_object = (vm_named_entry_t)src_region->ip_kobject;
10355 dst_object = (vm_named_entry_t)dst_region->ip_kobject;
10356 if((!src_object->is_sub_map) || (!dst_object->is_sub_map)) {
10357 return KERN_INVALID_ARGUMENT;
10358 }
10359 src_map = (vm_map_t)src_object->backing.map;
10360 dst_map = (vm_map_t)dst_object->backing.map;
10361 /* destination map is assumed to be unavailable to any other */
10362 /* activity. i.e. it is new */
10363 vm_map_lock(src_map);
10364 if((src_map->min_offset != dst_map->min_offset)
10365 || (src_map->max_offset != dst_map->max_offset)) {
10366 vm_map_unlock(src_map);
10367 return KERN_INVALID_ARGUMENT;
10368 }
10369 addr = src_map->min_offset;
10370 vm_map_lookup_entry(dst_map, addr, &entry);
10371 if(entry == vm_map_to_entry(dst_map)) {
10372 entry = entry->vme_next;
10373 }
10374 if(entry == vm_map_to_entry(dst_map)) {
10375 max_off = src_map->max_offset;
10376 } else {
10377 max_off = entry->vme_start;
10378 }
10379 vm_map_lookup_entry(src_map, addr, &entry);
10380 if(entry == vm_map_to_entry(src_map)) {
10381 entry = entry->vme_next;
10382 }
10383 vm_map_lookup_entry(dst_map, addr, &insert_point);
10384 while((entry != vm_map_to_entry(src_map)) &&
10385 (entry->vme_end <= max_off)) {
10386 addr = entry->vme_start;
10387 new_entry = vm_map_entry_create(dst_map);
10388 vm_map_entry_copy(new_entry, entry);
10389 vm_map_entry_link(dst_map, insert_point, new_entry);
10390 insert_point = new_entry;
10391 if (entry->object.vm_object != VM_OBJECT_NULL) {
10392 if (new_entry->is_sub_map) {
10393 vm_map_reference(new_entry->object.sub_map);
10394 } else {
10395 vm_object_reference(
10396 new_entry->object.vm_object);
10397 }
10398 }
10399 dst_map->size += new_entry->vme_end - new_entry->vme_start;
10400 entry = entry->vme_next;
10401 }
10402 vm_map_unlock(src_map);
10403 return KERN_SUCCESS;
10404 }
10405
10406 /*
10407 * Routine: convert_port_entry_to_map
10408 * Purpose:
10409 * Convert from a port specifying an entry or a task
10410 * to a map. Doesn't consume the port ref; produces a map ref,
10411 * which may be null. Unlike convert_port_to_map, the
10412 * port may be task or a named entry backed.
10413 * Conditions:
10414 * Nothing locked.
10415 */
10416
10417
10418 vm_map_t
10419 convert_port_entry_to_map(
10420 ipc_port_t port)
10421 {
10422 vm_map_t map;
10423 vm_named_entry_t named_entry;
10424
10425 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10426 while(TRUE) {
10427 ip_lock(port);
10428 if(ip_active(port) && (ip_kotype(port)
10429 == IKOT_NAMED_ENTRY)) {
10430 named_entry =
10431 (vm_named_entry_t)port->ip_kobject;
10432 if (!(mutex_try(&(named_entry)->Lock))) {
10433 ip_unlock(port);
10434 mutex_pause();
10435 continue;
10436 }
10437 named_entry->ref_count++;
10438 mutex_unlock(&(named_entry)->Lock);
10439 ip_unlock(port);
10440 if ((named_entry->is_sub_map) &&
10441 (named_entry->protection
10442 & VM_PROT_WRITE)) {
10443 map = named_entry->backing.map;
10444 } else {
10445 mach_destroy_memory_entry(port);
10446 return VM_MAP_NULL;
10447 }
10448 vm_map_reference_swap(map);
10449 mach_destroy_memory_entry(port);
10450 break;
10451 }
10452 else
10453 return VM_MAP_NULL;
10454 }
10455 }
10456 else
10457 map = convert_port_to_map(port);
10458
10459 return map;
10460 }
10461
10462 /*
10463 * Routine: convert_port_entry_to_object
10464 * Purpose:
10465 * Convert from a port specifying a named entry to an
10466 * object. Doesn't consume the port ref; produces a map ref,
10467 * which may be null.
10468 * Conditions:
10469 * Nothing locked.
10470 */
10471
10472
10473 vm_object_t
10474 convert_port_entry_to_object(
10475 ipc_port_t port)
10476 {
10477 vm_object_t object;
10478 vm_named_entry_t named_entry;
10479
10480 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
10481 while(TRUE) {
10482 ip_lock(port);
10483 if(ip_active(port) && (ip_kotype(port)
10484 == IKOT_NAMED_ENTRY)) {
10485 named_entry =
10486 (vm_named_entry_t)port->ip_kobject;
10487 if (!(mutex_try(&(named_entry)->Lock))) {
10488 ip_unlock(port);
10489 mutex_pause();
10490 continue;
10491 }
10492 named_entry->ref_count++;
10493 mutex_unlock(&(named_entry)->Lock);
10494 ip_unlock(port);
10495 if ((!named_entry->is_sub_map) &&
10496 (!named_entry->is_pager) &&
10497 (named_entry->protection
10498 & VM_PROT_WRITE)) {
10499 object = named_entry->backing.object;
10500 } else {
10501 mach_destroy_memory_entry(port);
10502 return (vm_object_t)NULL;
10503 }
10504 vm_object_reference(named_entry->backing.object);
10505 mach_destroy_memory_entry(port);
10506 break;
10507 }
10508 else
10509 return (vm_object_t)NULL;
10510 }
10511 } else {
10512 return (vm_object_t)NULL;
10513 }
10514
10515 return object;
10516 }
10517
10518 /*
10519 * Export routines to other components for the things we access locally through
10520 * macros.
10521 */
10522 #undef current_map
10523 vm_map_t
10524 current_map(void)
10525 {
10526 return (current_map_fast());
10527 }
10528
10529 /*
10530 * vm_map_reference:
10531 *
10532 * Most code internal to the osfmk will go through a
10533 * macro defining this. This is always here for the
10534 * use of other kernel components.
10535 */
10536 #undef vm_map_reference
10537 void
10538 vm_map_reference(
10539 register vm_map_t map)
10540 {
10541 if (map == VM_MAP_NULL)
10542 return;
10543
10544 mutex_lock(&map->s_lock);
10545 #if TASK_SWAPPER
10546 assert(map->res_count > 0);
10547 assert(map->ref_count >= map->res_count);
10548 map->res_count++;
10549 #endif
10550 map->ref_count++;
10551 mutex_unlock(&map->s_lock);
10552 }
10553
10554 /*
10555 * vm_map_deallocate:
10556 *
10557 * Removes a reference from the specified map,
10558 * destroying it if no references remain.
10559 * The map should not be locked.
10560 */
10561 void
10562 vm_map_deallocate(
10563 register vm_map_t map)
10564 {
10565 unsigned int ref;
10566
10567 if (map == VM_MAP_NULL)
10568 return;
10569
10570 mutex_lock(&map->s_lock);
10571 ref = --map->ref_count;
10572 if (ref > 0) {
10573 vm_map_res_deallocate(map);
10574 mutex_unlock(&map->s_lock);
10575 return;
10576 }
10577 assert(map->ref_count == 0);
10578 mutex_unlock(&map->s_lock);
10579
10580 #if TASK_SWAPPER
10581 /*
10582 * The map residence count isn't decremented here because
10583 * the vm_map_delete below will traverse the entire map,
10584 * deleting entries, and the residence counts on objects
10585 * and sharing maps will go away then.
10586 */
10587 #endif
10588
10589 vm_map_destroy(map);
10590 }
10591
10592
10593 /* LP64todo - this whole mechanism is temporary. It should be redone when
10594 * the pmap layer can handle 64-bit address spaces. Until then, we trump
10595 * up a map entry for the 64-bit commpage above the map's max_offset.
10596 */
10597 extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */
10598 extern vm_map_t com_region_map32; /* the submap for 32-bit commpage */
10599
10600
10601 static void
10602 vm_map_commpage(
10603 vm_map_t user_map,
10604 vm_map_t com_region_map, /* com_region_map32 or com_region_map64 */
10605 vm_map_offset_t base_address,
10606 vm_map_size_t size)
10607 {
10608 vm_map_entry_t entry;
10609 vm_object_t object;
10610
10611 vm_map_lock(user_map);
10612
10613 /* The commpage is necessarily the last entry in the map.
10614 * See if one is already there (not sure if this can happen???)
10615 */
10616 entry = vm_map_last_entry(user_map);
10617 if (entry != vm_map_to_entry(user_map)) {
10618 if (entry->vme_end >= base_address) {
10619 vm_map_unlock(user_map);
10620 return;
10621 }
10622 }
10623
10624 entry = vm_map_first_entry(com_region_map);
10625 object = entry->object.vm_object;
10626 vm_object_reference(object);
10627
10628 /* We bypass vm_map_enter() because we are adding the entry past the
10629 * map's max_offset.
10630 */
10631 entry = vm_map_entry_insert(
10632 user_map,
10633 vm_map_last_entry(user_map), /* insert after last entry */
10634 base_address,
10635 base_address + size,
10636 object,
10637 0, /* offset */
10638 FALSE, /* needs_copy */
10639 FALSE, /* is_shared */
10640 FALSE, /* in_transition */
10641 VM_PROT_READ|VM_PROT_EXECUTE,
10642 VM_PROT_READ|VM_PROT_EXECUTE,
10643 VM_BEHAVIOR_DEFAULT,
10644 VM_INHERIT_NONE,
10645 1 ); /* wired_count */
10646
10647 vm_map_unlock(user_map);
10648 }
10649
10650 #ifdef __i386__
10651 void
10652 vm_map_commpage32(
10653 vm_map_t map)
10654 {
10655 vm_map_commpage(map,
10656 com_region_map32,
10657 (vm_map_offset_t) (unsigned) _COMM_PAGE32_BASE_ADDRESS,
10658 (vm_map_size_t) (unsigned) _COMM_PAGE32_AREA_USED);
10659 }
10660 #endif /* __i386__ */
10661
10662
10663
10664 void
10665 vm_map_commpage64(
10666 vm_map_t map)
10667 {
10668
10669 vm_map_commpage(map,
10670 com_region_map64,
10671 (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS,
10672 (vm_map_size_t) _COMM_PAGE64_AREA_USED);
10673 }
10674
10675 void
10676 vm_map_remove_commpage(
10677 vm_map_t map )
10678 {
10679 vm_map_entry_t entry;
10680
10681 while( 1 ) {
10682 vm_map_lock(map);
10683
10684 entry = vm_map_last_entry(map);
10685
10686 if ((entry == vm_map_to_entry(map)) ||
10687 (entry->vme_start < map->max_offset))
10688 break;
10689
10690 /* clearing the wired count isn't strictly correct */
10691 entry->wired_count = 0;
10692 vm_map_entry_delete(map,entry);
10693 }
10694
10695 vm_map_unlock(map);
10696 }
10697
10698 void
10699 vm_map_disable_NX(vm_map_t map)
10700 {
10701 if (map == NULL)
10702 return;
10703 if (map->pmap == NULL)
10704 return;
10705
10706 pmap_disable_NX(map->pmap);
10707 }
10708
10709 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
10710 * more descriptive.
10711 */
10712 void
10713 vm_map_set_32bit(vm_map_t map)
10714 {
10715 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
10716 }
10717
10718
10719 void
10720 vm_map_set_64bit(vm_map_t map)
10721 {
10722 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
10723 }
10724
10725 vm_map_offset_t
10726 vm_compute_max_offset(unsigned is64)
10727 {
10728 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
10729 }
10730
10731 boolean_t
10732 vm_map_has_4GB_pagezero(vm_map_t map)
10733 {
10734 /*
10735 * XXX FBDP
10736 * We should lock the VM map (for read) here but we can get away
10737 * with it for now because there can't really be any race condition:
10738 * the VM map's min_offset is changed only when the VM map is created
10739 * and when the zero page is established (when the binary gets loaded),
10740 * and this routine gets called only when the task terminates and the
10741 * VM map is being torn down, and when a new map is created via
10742 * load_machfile()/execve().
10743 */
10744 return (map->min_offset >= 0x100000000ULL);
10745 }
10746
10747 void
10748 vm_map_set_4GB_pagezero(vm_map_t map)
10749 {
10750 pmap_set_4GB_pagezero(map->pmap);
10751 }
10752
10753 void
10754 vm_map_clear_4GB_pagezero(vm_map_t map)
10755 {
10756 pmap_clear_4GB_pagezero(map->pmap);
10757 }
10758
10759 /*
10760 * Raise a VM map's minimum offset.
10761 * To strictly enforce "page zero" reservation.
10762 */
10763 kern_return_t
10764 vm_map_raise_min_offset(
10765 vm_map_t map,
10766 vm_map_offset_t new_min_offset)
10767 {
10768 vm_map_entry_t first_entry;
10769
10770 new_min_offset = vm_map_round_page(new_min_offset);
10771
10772 vm_map_lock(map);
10773
10774 if (new_min_offset < map->min_offset) {
10775 /*
10776 * Can't move min_offset backwards, as that would expose
10777 * a part of the address space that was previously, and for
10778 * possibly good reasons, inaccessible.
10779 */
10780 vm_map_unlock(map);
10781 return KERN_INVALID_ADDRESS;
10782 }
10783
10784 first_entry = vm_map_first_entry(map);
10785 if (first_entry != vm_map_to_entry(map) &&
10786 first_entry->vme_start < new_min_offset) {
10787 /*
10788 * Some memory was already allocated below the new
10789 * minimun offset. It's too late to change it now...
10790 */
10791 vm_map_unlock(map);
10792 return KERN_NO_SPACE;
10793 }
10794
10795 map->min_offset = new_min_offset;
10796
10797 vm_map_unlock(map);
10798
10799 return KERN_SUCCESS;
10800 }