]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_kern.h>
93 #include <ipc/ipc_port.h>
94 #include <kern/sched_prim.h>
95 #include <kern/misc_protos.h>
96 #include <ddb/tr.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103
104 #ifdef ppc
105 #include <ppc/mappings.h>
106 #endif /* ppc */
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110
111 /* Internal prototypes
112 */
113
114 static void vm_map_simplify_range(
115 vm_map_t map,
116 vm_map_offset_t start,
117 vm_map_offset_t end); /* forward */
118
119 static boolean_t vm_map_range_check(
120 vm_map_t map,
121 vm_map_offset_t start,
122 vm_map_offset_t end,
123 vm_map_entry_t *entry);
124
125 static vm_map_entry_t _vm_map_entry_create(
126 struct vm_map_header *map_header);
127
128 static void _vm_map_entry_dispose(
129 struct vm_map_header *map_header,
130 vm_map_entry_t entry);
131
132 static void vm_map_pmap_enter(
133 vm_map_t map,
134 vm_map_offset_t addr,
135 vm_map_offset_t end_addr,
136 vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t protection);
139
140 static void _vm_map_clip_end(
141 struct vm_map_header *map_header,
142 vm_map_entry_t entry,
143 vm_map_offset_t end);
144
145 static void _vm_map_clip_start(
146 struct vm_map_header *map_header,
147 vm_map_entry_t entry,
148 vm_map_offset_t start);
149
150 static void vm_map_entry_delete(
151 vm_map_t map,
152 vm_map_entry_t entry);
153
154 static kern_return_t vm_map_delete(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 int flags,
159 vm_map_t zap_map);
160
161 static kern_return_t vm_map_copy_overwrite_unaligned(
162 vm_map_t dst_map,
163 vm_map_entry_t entry,
164 vm_map_copy_t copy,
165 vm_map_address_t start);
166
167 static kern_return_t vm_map_copy_overwrite_aligned(
168 vm_map_t dst_map,
169 vm_map_entry_t tmp_entry,
170 vm_map_copy_t copy,
171 vm_map_offset_t start,
172 pmap_t pmap);
173
174 static kern_return_t vm_map_copyin_kernel_buffer(
175 vm_map_t src_map,
176 vm_map_address_t src_addr,
177 vm_map_size_t len,
178 boolean_t src_destroy,
179 vm_map_copy_t *copy_result); /* OUT */
180
181 static kern_return_t vm_map_copyout_kernel_buffer(
182 vm_map_t map,
183 vm_map_address_t *addr, /* IN/OUT */
184 vm_map_copy_t copy,
185 boolean_t overwrite);
186
187 static void vm_map_fork_share(
188 vm_map_t old_map,
189 vm_map_entry_t old_entry,
190 vm_map_t new_map);
191
192 static boolean_t vm_map_fork_copy(
193 vm_map_t old_map,
194 vm_map_entry_t *old_entry_p,
195 vm_map_t new_map);
196
197 void vm_map_region_top_walk(
198 vm_map_entry_t entry,
199 vm_region_top_info_t top);
200
201 void vm_map_region_walk(
202 vm_map_t map,
203 vm_map_offset_t va,
204 vm_map_entry_t entry,
205 vm_object_offset_t offset,
206 vm_object_size_t range,
207 vm_region_extended_info_t extended,
208 boolean_t look_for_pages);
209
210 static kern_return_t vm_map_wire_nested(
211 vm_map_t map,
212 vm_map_offset_t start,
213 vm_map_offset_t end,
214 vm_prot_t access_type,
215 boolean_t user_wire,
216 pmap_t map_pmap,
217 vm_map_offset_t pmap_addr);
218
219 static kern_return_t vm_map_unwire_nested(
220 vm_map_t map,
221 vm_map_offset_t start,
222 vm_map_offset_t end,
223 boolean_t user_wire,
224 pmap_t map_pmap,
225 vm_map_offset_t pmap_addr);
226
227 static kern_return_t vm_map_overwrite_submap_recurse(
228 vm_map_t dst_map,
229 vm_map_offset_t dst_addr,
230 vm_map_size_t dst_size);
231
232 static kern_return_t vm_map_copy_overwrite_nested(
233 vm_map_t dst_map,
234 vm_map_offset_t dst_addr,
235 vm_map_copy_t copy,
236 boolean_t interruptible,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_remap_extract(
240 vm_map_t map,
241 vm_map_offset_t addr,
242 vm_map_size_t size,
243 boolean_t copy,
244 struct vm_map_header *map_header,
245 vm_prot_t *cur_protection,
246 vm_prot_t *max_protection,
247 vm_inherit_t inheritance,
248 boolean_t pageable);
249
250 static kern_return_t vm_map_remap_range_allocate(
251 vm_map_t map,
252 vm_map_address_t *address,
253 vm_map_size_t size,
254 vm_map_offset_t mask,
255 boolean_t anywhere,
256 vm_map_entry_t *map_entry);
257
258 static void vm_map_region_look_for_page(
259 vm_map_t map,
260 vm_map_offset_t va,
261 vm_object_t object,
262 vm_object_offset_t offset,
263 int max_refcnt,
264 int depth,
265 vm_region_extended_info_t extended);
266
267 static int vm_map_region_count_obj_refs(
268 vm_map_entry_t entry,
269 vm_object_t object);
270
271 /*
272 * Macros to copy a vm_map_entry. We must be careful to correctly
273 * manage the wired page count. vm_map_entry_copy() creates a new
274 * map entry to the same memory - the wired count in the new entry
275 * must be set to zero. vm_map_entry_copy_full() creates a new
276 * entry that is identical to the old entry. This preserves the
277 * wire count; it's used for map splitting and zone changing in
278 * vm_map_copyout.
279 */
280 #define vm_map_entry_copy(NEW,OLD) \
281 MACRO_BEGIN \
282 *(NEW) = *(OLD); \
283 (NEW)->is_shared = FALSE; \
284 (NEW)->needs_wakeup = FALSE; \
285 (NEW)->in_transition = FALSE; \
286 (NEW)->wired_count = 0; \
287 (NEW)->user_wired_count = 0; \
288 MACRO_END
289
290 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
291
292 /*
293 * Decide if we want to allow processes to execute from their data or stack areas.
294 * override_nx() returns true if we do. Data/stack execution can be enabled independently
295 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
296 * or allow_stack_exec to enable data execution for that type of data area for that particular
297 * ABI (or both by or'ing the flags together). These are initialized in the architecture
298 * specific pmap files since the default behavior varies according to architecture. The
299 * main reason it varies is because of the need to provide binary compatibility with old
300 * applications that were written before these restrictions came into being. In the old
301 * days, an app could execute anything it could read, but this has slowly been tightened
302 * up over time. The default behavior is:
303 *
304 * 32-bit PPC apps may execute from both stack and data areas
305 * 32-bit Intel apps may exeucte from data areas but not stack
306 * 64-bit PPC/Intel apps may not execute from either data or stack
307 *
308 * An application on any architecture may override these defaults by explicitly
309 * adding PROT_EXEC permission to the page in question with the mprotect(2)
310 * system call. This code here just determines what happens when an app tries to
311 * execute from a page that lacks execute permission.
312 *
313 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
314 * default behavior for both 32 and 64 bit apps on a system-wide basis.
315 */
316
317 extern int allow_data_exec, allow_stack_exec;
318
319 int
320 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
321 {
322 int current_abi;
323
324 /*
325 * Determine if the app is running in 32 or 64 bit mode.
326 */
327
328 if (vm_map_is_64bit(map))
329 current_abi = VM_ABI_64;
330 else
331 current_abi = VM_ABI_32;
332
333 /*
334 * Determine if we should allow the execution based on whether it's a
335 * stack or data area and the current architecture.
336 */
337
338 if (user_tag == VM_MEMORY_STACK)
339 return allow_stack_exec & current_abi;
340
341 return allow_data_exec & current_abi;
342 }
343
344
345 /*
346 * Virtual memory maps provide for the mapping, protection,
347 * and sharing of virtual memory objects. In addition,
348 * this module provides for an efficient virtual copy of
349 * memory from one map to another.
350 *
351 * Synchronization is required prior to most operations.
352 *
353 * Maps consist of an ordered doubly-linked list of simple
354 * entries; a single hint is used to speed up lookups.
355 *
356 * Sharing maps have been deleted from this version of Mach.
357 * All shared objects are now mapped directly into the respective
358 * maps. This requires a change in the copy on write strategy;
359 * the asymmetric (delayed) strategy is used for shared temporary
360 * objects instead of the symmetric (shadow) strategy. All maps
361 * are now "top level" maps (either task map, kernel map or submap
362 * of the kernel map).
363 *
364 * Since portions of maps are specified by start/end addreses,
365 * which may not align with existing map entries, all
366 * routines merely "clip" entries to these start/end values.
367 * [That is, an entry is split into two, bordering at a
368 * start or end value.] Note that these clippings may not
369 * always be necessary (as the two resulting entries are then
370 * not changed); however, the clipping is done for convenience.
371 * No attempt is currently made to "glue back together" two
372 * abutting entries.
373 *
374 * The symmetric (shadow) copy strategy implements virtual copy
375 * by copying VM object references from one map to
376 * another, and then marking both regions as copy-on-write.
377 * It is important to note that only one writeable reference
378 * to a VM object region exists in any map when this strategy
379 * is used -- this means that shadow object creation can be
380 * delayed until a write operation occurs. The symmetric (delayed)
381 * strategy allows multiple maps to have writeable references to
382 * the same region of a vm object, and hence cannot delay creating
383 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
384 * Copying of permanent objects is completely different; see
385 * vm_object_copy_strategically() in vm_object.c.
386 */
387
388 static zone_t vm_map_zone; /* zone for vm_map structures */
389 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
390 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
391 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
392
393
394 /*
395 * Placeholder object for submap operations. This object is dropped
396 * into the range by a call to vm_map_find, and removed when
397 * vm_map_submap creates the submap.
398 */
399
400 vm_object_t vm_submap_object;
401
402 /*
403 * vm_map_init:
404 *
405 * Initialize the vm_map module. Must be called before
406 * any other vm_map routines.
407 *
408 * Map and entry structures are allocated from zones -- we must
409 * initialize those zones.
410 *
411 * There are three zones of interest:
412 *
413 * vm_map_zone: used to allocate maps.
414 * vm_map_entry_zone: used to allocate map entries.
415 * vm_map_kentry_zone: used to allocate map entries for the kernel.
416 *
417 * The kernel allocates map entries from a special zone that is initially
418 * "crammed" with memory. It would be difficult (perhaps impossible) for
419 * the kernel to allocate more memory to a entry zone when it became
420 * empty since the very act of allocating memory implies the creation
421 * of a new entry.
422 */
423
424 static void *map_data;
425 static vm_map_size_t map_data_size;
426 static void *kentry_data;
427 static vm_map_size_t kentry_data_size;
428 static int kentry_count = 2048; /* to init kentry_data_size */
429
430 #define NO_COALESCE_LIMIT (1024 * 128)
431
432
433 /* Skip acquiring locks if we're in the midst of a kernel core dump */
434 extern unsigned int not_in_kdp;
435
436 #ifdef __i386__
437 kern_return_t
438 vm_map_apple_protected(
439 vm_map_t map,
440 vm_map_offset_t start,
441 vm_map_offset_t end)
442 {
443 boolean_t map_locked;
444 kern_return_t kr;
445 vm_map_entry_t map_entry;
446 memory_object_t protected_mem_obj;
447 vm_object_t protected_object;
448 vm_map_offset_t map_addr;
449
450 vm_map_lock_read(map);
451 map_locked = TRUE;
452
453 /* lookup the protected VM object */
454 if (!vm_map_lookup_entry(map,
455 start,
456 &map_entry) ||
457 map_entry->vme_end != end ||
458 map_entry->is_sub_map) {
459 /* that memory is not properly mapped */
460 kr = KERN_INVALID_ARGUMENT;
461 goto done;
462 }
463 protected_object = map_entry->object.vm_object;
464 if (protected_object == VM_OBJECT_NULL) {
465 /* there should be a VM object here at this point */
466 kr = KERN_INVALID_ARGUMENT;
467 goto done;
468 }
469
470 /*
471 * Lookup (and create if necessary) the protected memory object
472 * matching that VM object.
473 * If successful, this also grabs a reference on the memory object,
474 * to guarantee that it doesn't go away before we get a chance to map
475 * it.
476 */
477
478 protected_mem_obj = apple_protect_pager_setup(protected_object);
479 if (protected_mem_obj == NULL) {
480 kr = KERN_FAILURE;
481 goto done;
482 }
483
484 vm_map_unlock_read(map);
485 map_locked = FALSE;
486
487 /* map this memory object in place of the current one */
488 map_addr = start;
489 kr = vm_map_enter_mem_object(map,
490 &map_addr,
491 end - start,
492 (mach_vm_offset_t) 0,
493 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
494 (ipc_port_t) protected_mem_obj,
495 (map_entry->offset +
496 (start - map_entry->vme_start)),
497 TRUE,
498 map_entry->protection,
499 map_entry->max_protection,
500 map_entry->inheritance);
501 assert(map_addr == start);
502 if (kr == KERN_SUCCESS) {
503 /* let the pager know that this mem_obj is mapped */
504 apple_protect_pager_map(protected_mem_obj);
505 }
506 /*
507 * Release the reference obtained by apple_protect_pager_setup().
508 * The mapping (if it succeeded) is now holding a reference on the
509 * memory object.
510 */
511 memory_object_deallocate(protected_mem_obj);
512
513 done:
514 if (map_locked) {
515 vm_map_unlock_read(map);
516 }
517 return kr;
518 }
519 #endif /* __i386__ */
520
521
522 void
523 vm_map_init(
524 void)
525 {
526 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
527 PAGE_SIZE, "maps");
528
529 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
530 1024*1024, PAGE_SIZE*5,
531 "non-kernel map entries");
532
533 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
534 kentry_data_size, kentry_data_size,
535 "kernel map entries");
536
537 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
538 16*1024, PAGE_SIZE, "map copies");
539
540 /*
541 * Cram the map and kentry zones with initial data.
542 * Set kentry_zone non-collectible to aid zone_gc().
543 */
544 zone_change(vm_map_zone, Z_COLLECT, FALSE);
545 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
546 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
547 zcram(vm_map_zone, map_data, map_data_size);
548 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
549 }
550
551 void
552 vm_map_steal_memory(
553 void)
554 {
555 map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map));
556 map_data = pmap_steal_memory(map_data_size);
557
558 #if 0
559 /*
560 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
561 * physical page (i.e. that beyond the kernel image and page tables)
562 * individually; we guess at most one entry per eight pages in the
563 * real world. This works out to roughly .1 of 1% of physical memory,
564 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
565 */
566 #endif
567 kentry_count = pmap_free_pages() / 8;
568
569
570 kentry_data_size =
571 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
572 kentry_data = pmap_steal_memory(kentry_data_size);
573 }
574
575 /*
576 * vm_map_create:
577 *
578 * Creates and returns a new empty VM map with
579 * the given physical map structure, and having
580 * the given lower and upper address bounds.
581 */
582 vm_map_t
583 vm_map_create(
584 pmap_t pmap,
585 vm_map_offset_t min,
586 vm_map_offset_t max,
587 boolean_t pageable)
588 {
589 static int color_seed = 0;
590 register vm_map_t result;
591
592 result = (vm_map_t) zalloc(vm_map_zone);
593 if (result == VM_MAP_NULL)
594 panic("vm_map_create");
595
596 vm_map_first_entry(result) = vm_map_to_entry(result);
597 vm_map_last_entry(result) = vm_map_to_entry(result);
598 result->hdr.nentries = 0;
599 result->hdr.entries_pageable = pageable;
600
601 result->size = 0;
602 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
603 result->user_wire_size = 0;
604 result->ref_count = 1;
605 #if TASK_SWAPPER
606 result->res_count = 1;
607 result->sw_state = MAP_SW_IN;
608 #endif /* TASK_SWAPPER */
609 result->pmap = pmap;
610 result->min_offset = min;
611 result->max_offset = max;
612 result->wiring_required = FALSE;
613 result->no_zero_fill = FALSE;
614 result->mapped = FALSE;
615 result->wait_for_space = FALSE;
616 result->first_free = vm_map_to_entry(result);
617 result->hint = vm_map_to_entry(result);
618 result->color_rr = (color_seed++) & vm_color_mask;
619 vm_map_lock_init(result);
620 mutex_init(&result->s_lock, 0);
621
622 return(result);
623 }
624
625 /*
626 * vm_map_entry_create: [ internal use only ]
627 *
628 * Allocates a VM map entry for insertion in the
629 * given map (or map copy). No fields are filled.
630 */
631 #define vm_map_entry_create(map) \
632 _vm_map_entry_create(&(map)->hdr)
633
634 #define vm_map_copy_entry_create(copy) \
635 _vm_map_entry_create(&(copy)->cpy_hdr)
636
637 static vm_map_entry_t
638 _vm_map_entry_create(
639 register struct vm_map_header *map_header)
640 {
641 register zone_t zone;
642 register vm_map_entry_t entry;
643
644 if (map_header->entries_pageable)
645 zone = vm_map_entry_zone;
646 else
647 zone = vm_map_kentry_zone;
648
649 entry = (vm_map_entry_t) zalloc(zone);
650 if (entry == VM_MAP_ENTRY_NULL)
651 panic("vm_map_entry_create");
652
653 return(entry);
654 }
655
656 /*
657 * vm_map_entry_dispose: [ internal use only ]
658 *
659 * Inverse of vm_map_entry_create.
660 *
661 * write map lock held so no need to
662 * do anything special to insure correctness
663 * of the stores
664 */
665 #define vm_map_entry_dispose(map, entry) \
666 MACRO_BEGIN \
667 if((entry) == (map)->first_free) \
668 (map)->first_free = vm_map_to_entry(map); \
669 if((entry) == (map)->hint) \
670 (map)->hint = vm_map_to_entry(map); \
671 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
672 MACRO_END
673
674 #define vm_map_copy_entry_dispose(map, entry) \
675 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
676
677 static void
678 _vm_map_entry_dispose(
679 register struct vm_map_header *map_header,
680 register vm_map_entry_t entry)
681 {
682 register zone_t zone;
683
684 if (map_header->entries_pageable)
685 zone = vm_map_entry_zone;
686 else
687 zone = vm_map_kentry_zone;
688
689 zfree(zone, entry);
690 }
691
692 #if MACH_ASSERT
693 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
694 static boolean_t first_free_check = FALSE;
695 static boolean_t
696 first_free_is_valid(
697 vm_map_t map)
698 {
699 vm_map_entry_t entry, next;
700
701 if (!first_free_check)
702 return TRUE;
703
704 entry = vm_map_to_entry(map);
705 next = entry->vme_next;
706 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
707 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
708 next != vm_map_to_entry(map))) {
709 entry = next;
710 next = entry->vme_next;
711 if (entry == vm_map_to_entry(map))
712 break;
713 }
714 if (map->first_free != entry) {
715 printf("Bad first_free for map %p: %p should be %p\n",
716 map, map->first_free, entry);
717 return FALSE;
718 }
719 return TRUE;
720 }
721 #endif /* MACH_ASSERT */
722
723 /*
724 * UPDATE_FIRST_FREE:
725 *
726 * Updates the map->first_free pointer to the
727 * entry immediately before the first hole in the map.
728 * The map should be locked.
729 */
730 #define UPDATE_FIRST_FREE(map, new_first_free) \
731 MACRO_BEGIN \
732 vm_map_t UFF_map; \
733 vm_map_entry_t UFF_first_free; \
734 vm_map_entry_t UFF_next_entry; \
735 UFF_map = (map); \
736 UFF_first_free = (new_first_free); \
737 UFF_next_entry = UFF_first_free->vme_next; \
738 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
739 vm_map_trunc_page(UFF_first_free->vme_end) || \
740 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
741 vm_map_trunc_page(UFF_first_free->vme_start) && \
742 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
743 UFF_first_free = UFF_next_entry; \
744 UFF_next_entry = UFF_first_free->vme_next; \
745 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
746 break; \
747 } \
748 UFF_map->first_free = UFF_first_free; \
749 assert(first_free_is_valid(UFF_map)); \
750 MACRO_END
751
752 /*
753 * vm_map_entry_{un,}link:
754 *
755 * Insert/remove entries from maps (or map copies).
756 */
757 #define vm_map_entry_link(map, after_where, entry) \
758 MACRO_BEGIN \
759 vm_map_t VMEL_map; \
760 vm_map_entry_t VMEL_entry; \
761 VMEL_map = (map); \
762 VMEL_entry = (entry); \
763 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
764 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
765 MACRO_END
766
767
768 #define vm_map_copy_entry_link(copy, after_where, entry) \
769 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
770
771 #define _vm_map_entry_link(hdr, after_where, entry) \
772 MACRO_BEGIN \
773 (hdr)->nentries++; \
774 (entry)->vme_prev = (after_where); \
775 (entry)->vme_next = (after_where)->vme_next; \
776 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
777 MACRO_END
778
779 #define vm_map_entry_unlink(map, entry) \
780 MACRO_BEGIN \
781 vm_map_t VMEU_map; \
782 vm_map_entry_t VMEU_entry; \
783 vm_map_entry_t VMEU_first_free; \
784 VMEU_map = (map); \
785 VMEU_entry = (entry); \
786 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
787 VMEU_first_free = VMEU_entry->vme_prev; \
788 else \
789 VMEU_first_free = VMEU_map->first_free; \
790 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
791 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
792 MACRO_END
793
794 #define vm_map_copy_entry_unlink(copy, entry) \
795 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
796
797 #define _vm_map_entry_unlink(hdr, entry) \
798 MACRO_BEGIN \
799 (hdr)->nentries--; \
800 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
801 (entry)->vme_prev->vme_next = (entry)->vme_next; \
802 MACRO_END
803
804 #if MACH_ASSERT && TASK_SWAPPER
805 /*
806 * vm_map_res_reference:
807 *
808 * Adds another valid residence count to the given map.
809 *
810 * Map is locked so this function can be called from
811 * vm_map_swapin.
812 *
813 */
814 void vm_map_res_reference(register vm_map_t map)
815 {
816 /* assert map is locked */
817 assert(map->res_count >= 0);
818 assert(map->ref_count >= map->res_count);
819 if (map->res_count == 0) {
820 mutex_unlock(&map->s_lock);
821 vm_map_lock(map);
822 vm_map_swapin(map);
823 mutex_lock(&map->s_lock);
824 ++map->res_count;
825 vm_map_unlock(map);
826 } else
827 ++map->res_count;
828 }
829
830 /*
831 * vm_map_reference_swap:
832 *
833 * Adds valid reference and residence counts to the given map.
834 *
835 * The map may not be in memory (i.e. zero residence count).
836 *
837 */
838 void vm_map_reference_swap(register vm_map_t map)
839 {
840 assert(map != VM_MAP_NULL);
841 mutex_lock(&map->s_lock);
842 assert(map->res_count >= 0);
843 assert(map->ref_count >= map->res_count);
844 map->ref_count++;
845 vm_map_res_reference(map);
846 mutex_unlock(&map->s_lock);
847 }
848
849 /*
850 * vm_map_res_deallocate:
851 *
852 * Decrement residence count on a map; possibly causing swapout.
853 *
854 * The map must be in memory (i.e. non-zero residence count).
855 *
856 * The map is locked, so this function is callable from vm_map_deallocate.
857 *
858 */
859 void vm_map_res_deallocate(register vm_map_t map)
860 {
861 assert(map->res_count > 0);
862 if (--map->res_count == 0) {
863 mutex_unlock(&map->s_lock);
864 vm_map_lock(map);
865 vm_map_swapout(map);
866 vm_map_unlock(map);
867 mutex_lock(&map->s_lock);
868 }
869 assert(map->ref_count >= map->res_count);
870 }
871 #endif /* MACH_ASSERT && TASK_SWAPPER */
872
873 /*
874 * vm_map_destroy:
875 *
876 * Actually destroy a map.
877 */
878 void
879 vm_map_destroy(
880 vm_map_t map,
881 int flags)
882 {
883 vm_map_lock(map);
884
885 /* clean up regular map entries */
886 (void) vm_map_delete(map, map->min_offset, map->max_offset,
887 flags, VM_MAP_NULL);
888 /* clean up leftover special mappings (commpage, etc...) */
889 #ifdef __ppc__
890 /*
891 * PPC51: ppc64 is limited to 51-bit addresses.
892 * Memory beyond this 51-bit limit is mapped specially at the
893 * pmap level, so do not interfere.
894 * On PPC64, the commpage is mapped beyond the addressable range
895 * via a special pmap hack, so ask pmap to clean it explicitly...
896 */
897 if (map->pmap) {
898 pmap_unmap_sharedpage(map->pmap);
899 }
900 /* ... and do not let regular pmap cleanup apply here */
901 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
902 #endif /* __ppc__ */
903 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
904 flags, VM_MAP_NULL);
905 vm_map_unlock(map);
906
907 assert(map->hdr.nentries == 0);
908
909 if(map->pmap)
910 pmap_destroy(map->pmap);
911
912 zfree(vm_map_zone, map);
913 }
914
915 #if TASK_SWAPPER
916 /*
917 * vm_map_swapin/vm_map_swapout
918 *
919 * Swap a map in and out, either referencing or releasing its resources.
920 * These functions are internal use only; however, they must be exported
921 * because they may be called from macros, which are exported.
922 *
923 * In the case of swapout, there could be races on the residence count,
924 * so if the residence count is up, we return, assuming that a
925 * vm_map_deallocate() call in the near future will bring us back.
926 *
927 * Locking:
928 * -- We use the map write lock for synchronization among races.
929 * -- The map write lock, and not the simple s_lock, protects the
930 * swap state of the map.
931 * -- If a map entry is a share map, then we hold both locks, in
932 * hierarchical order.
933 *
934 * Synchronization Notes:
935 * 1) If a vm_map_swapin() call happens while swapout in progress, it
936 * will block on the map lock and proceed when swapout is through.
937 * 2) A vm_map_reference() call at this time is illegal, and will
938 * cause a panic. vm_map_reference() is only allowed on resident
939 * maps, since it refuses to block.
940 * 3) A vm_map_swapin() call during a swapin will block, and
941 * proceeed when the first swapin is done, turning into a nop.
942 * This is the reason the res_count is not incremented until
943 * after the swapin is complete.
944 * 4) There is a timing hole after the checks of the res_count, before
945 * the map lock is taken, during which a swapin may get the lock
946 * before a swapout about to happen. If this happens, the swapin
947 * will detect the state and increment the reference count, causing
948 * the swapout to be a nop, thereby delaying it until a later
949 * vm_map_deallocate. If the swapout gets the lock first, then
950 * the swapin will simply block until the swapout is done, and
951 * then proceed.
952 *
953 * Because vm_map_swapin() is potentially an expensive operation, it
954 * should be used with caution.
955 *
956 * Invariants:
957 * 1) A map with a residence count of zero is either swapped, or
958 * being swapped.
959 * 2) A map with a non-zero residence count is either resident,
960 * or being swapped in.
961 */
962
963 int vm_map_swap_enable = 1;
964
965 void vm_map_swapin (vm_map_t map)
966 {
967 register vm_map_entry_t entry;
968
969 if (!vm_map_swap_enable) /* debug */
970 return;
971
972 /*
973 * Map is locked
974 * First deal with various races.
975 */
976 if (map->sw_state == MAP_SW_IN)
977 /*
978 * we raced with swapout and won. Returning will incr.
979 * the res_count, turning the swapout into a nop.
980 */
981 return;
982
983 /*
984 * The residence count must be zero. If we raced with another
985 * swapin, the state would have been IN; if we raced with a
986 * swapout (after another competing swapin), we must have lost
987 * the race to get here (see above comment), in which case
988 * res_count is still 0.
989 */
990 assert(map->res_count == 0);
991
992 /*
993 * There are no intermediate states of a map going out or
994 * coming in, since the map is locked during the transition.
995 */
996 assert(map->sw_state == MAP_SW_OUT);
997
998 /*
999 * We now operate upon each map entry. If the entry is a sub-
1000 * or share-map, we call vm_map_res_reference upon it.
1001 * If the entry is an object, we call vm_object_res_reference
1002 * (this may iterate through the shadow chain).
1003 * Note that we hold the map locked the entire time,
1004 * even if we get back here via a recursive call in
1005 * vm_map_res_reference.
1006 */
1007 entry = vm_map_first_entry(map);
1008
1009 while (entry != vm_map_to_entry(map)) {
1010 if (entry->object.vm_object != VM_OBJECT_NULL) {
1011 if (entry->is_sub_map) {
1012 vm_map_t lmap = entry->object.sub_map;
1013 mutex_lock(&lmap->s_lock);
1014 vm_map_res_reference(lmap);
1015 mutex_unlock(&lmap->s_lock);
1016 } else {
1017 vm_object_t object = entry->object.vm_object;
1018 vm_object_lock(object);
1019 /*
1020 * This call may iterate through the
1021 * shadow chain.
1022 */
1023 vm_object_res_reference(object);
1024 vm_object_unlock(object);
1025 }
1026 }
1027 entry = entry->vme_next;
1028 }
1029 assert(map->sw_state == MAP_SW_OUT);
1030 map->sw_state = MAP_SW_IN;
1031 }
1032
1033 void vm_map_swapout(vm_map_t map)
1034 {
1035 register vm_map_entry_t entry;
1036
1037 /*
1038 * Map is locked
1039 * First deal with various races.
1040 * If we raced with a swapin and lost, the residence count
1041 * will have been incremented to 1, and we simply return.
1042 */
1043 mutex_lock(&map->s_lock);
1044 if (map->res_count != 0) {
1045 mutex_unlock(&map->s_lock);
1046 return;
1047 }
1048 mutex_unlock(&map->s_lock);
1049
1050 /*
1051 * There are no intermediate states of a map going out or
1052 * coming in, since the map is locked during the transition.
1053 */
1054 assert(map->sw_state == MAP_SW_IN);
1055
1056 if (!vm_map_swap_enable)
1057 return;
1058
1059 /*
1060 * We now operate upon each map entry. If the entry is a sub-
1061 * or share-map, we call vm_map_res_deallocate upon it.
1062 * If the entry is an object, we call vm_object_res_deallocate
1063 * (this may iterate through the shadow chain).
1064 * Note that we hold the map locked the entire time,
1065 * even if we get back here via a recursive call in
1066 * vm_map_res_deallocate.
1067 */
1068 entry = vm_map_first_entry(map);
1069
1070 while (entry != vm_map_to_entry(map)) {
1071 if (entry->object.vm_object != VM_OBJECT_NULL) {
1072 if (entry->is_sub_map) {
1073 vm_map_t lmap = entry->object.sub_map;
1074 mutex_lock(&lmap->s_lock);
1075 vm_map_res_deallocate(lmap);
1076 mutex_unlock(&lmap->s_lock);
1077 } else {
1078 vm_object_t object = entry->object.vm_object;
1079 vm_object_lock(object);
1080 /*
1081 * This call may take a long time,
1082 * since it could actively push
1083 * out pages (if we implement it
1084 * that way).
1085 */
1086 vm_object_res_deallocate(object);
1087 vm_object_unlock(object);
1088 }
1089 }
1090 entry = entry->vme_next;
1091 }
1092 assert(map->sw_state == MAP_SW_IN);
1093 map->sw_state = MAP_SW_OUT;
1094 }
1095
1096 #endif /* TASK_SWAPPER */
1097
1098
1099 /*
1100 * SAVE_HINT_MAP_READ:
1101 *
1102 * Saves the specified entry as the hint for
1103 * future lookups. only a read lock is held on map,
1104 * so make sure the store is atomic... OSCompareAndSwap
1105 * guarantees this... also, we don't care if we collide
1106 * and someone else wins and stores their 'hint'
1107 */
1108 #define SAVE_HINT_MAP_READ(map,value) \
1109 MACRO_BEGIN \
1110 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1111 MACRO_END
1112
1113
1114 /*
1115 * SAVE_HINT_MAP_WRITE:
1116 *
1117 * Saves the specified entry as the hint for
1118 * future lookups. write lock held on map,
1119 * so no one else can be writing or looking
1120 * until the lock is dropped, so it's safe
1121 * to just do an assignment
1122 */
1123 #define SAVE_HINT_MAP_WRITE(map,value) \
1124 MACRO_BEGIN \
1125 (map)->hint = (value); \
1126 MACRO_END
1127
1128 /*
1129 * vm_map_lookup_entry: [ internal use only ]
1130 *
1131 * Finds the map entry containing (or
1132 * immediately preceding) the specified address
1133 * in the given map; the entry is returned
1134 * in the "entry" parameter. The boolean
1135 * result indicates whether the address is
1136 * actually contained in the map.
1137 */
1138 boolean_t
1139 vm_map_lookup_entry(
1140 register vm_map_t map,
1141 register vm_map_offset_t address,
1142 vm_map_entry_t *entry) /* OUT */
1143 {
1144 register vm_map_entry_t cur;
1145 register vm_map_entry_t last;
1146
1147 /*
1148 * Start looking either from the head of the
1149 * list, or from the hint.
1150 */
1151 cur = map->hint;
1152
1153 if (cur == vm_map_to_entry(map))
1154 cur = cur->vme_next;
1155
1156 if (address >= cur->vme_start) {
1157 /*
1158 * Go from hint to end of list.
1159 *
1160 * But first, make a quick check to see if
1161 * we are already looking at the entry we
1162 * want (which is usually the case).
1163 * Note also that we don't need to save the hint
1164 * here... it is the same hint (unless we are
1165 * at the header, in which case the hint didn't
1166 * buy us anything anyway).
1167 */
1168 last = vm_map_to_entry(map);
1169 if ((cur != last) && (cur->vme_end > address)) {
1170 *entry = cur;
1171 return(TRUE);
1172 }
1173 }
1174 else {
1175 /*
1176 * Go from start to hint, *inclusively*
1177 */
1178 last = cur->vme_next;
1179 cur = vm_map_first_entry(map);
1180 }
1181
1182 /*
1183 * Search linearly
1184 */
1185
1186 while (cur != last) {
1187 if (cur->vme_end > address) {
1188 if (address >= cur->vme_start) {
1189 /*
1190 * Save this lookup for future
1191 * hints, and return
1192 */
1193
1194 *entry = cur;
1195 SAVE_HINT_MAP_READ(map, cur);
1196
1197 return(TRUE);
1198 }
1199 break;
1200 }
1201 cur = cur->vme_next;
1202 }
1203 *entry = cur->vme_prev;
1204 SAVE_HINT_MAP_READ(map, *entry);
1205
1206 return(FALSE);
1207 }
1208
1209 /*
1210 * Routine: vm_map_find_space
1211 * Purpose:
1212 * Allocate a range in the specified virtual address map,
1213 * returning the entry allocated for that range.
1214 * Used by kmem_alloc, etc.
1215 *
1216 * The map must be NOT be locked. It will be returned locked
1217 * on KERN_SUCCESS, unlocked on failure.
1218 *
1219 * If an entry is allocated, the object/offset fields
1220 * are initialized to zero.
1221 */
1222 kern_return_t
1223 vm_map_find_space(
1224 register vm_map_t map,
1225 vm_map_offset_t *address, /* OUT */
1226 vm_map_size_t size,
1227 vm_map_offset_t mask,
1228 int flags,
1229 vm_map_entry_t *o_entry) /* OUT */
1230 {
1231 register vm_map_entry_t entry, new_entry;
1232 register vm_map_offset_t start;
1233 register vm_map_offset_t end;
1234
1235 if (size == 0) {
1236 *address = 0;
1237 return KERN_INVALID_ARGUMENT;
1238 }
1239
1240 if (flags & VM_FLAGS_GUARD_AFTER) {
1241 /* account for the back guard page in the size */
1242 size += PAGE_SIZE_64;
1243 }
1244
1245 new_entry = vm_map_entry_create(map);
1246
1247 /*
1248 * Look for the first possible address; if there's already
1249 * something at this address, we have to start after it.
1250 */
1251
1252 vm_map_lock(map);
1253
1254 assert(first_free_is_valid(map));
1255 if ((entry = map->first_free) == vm_map_to_entry(map))
1256 start = map->min_offset;
1257 else
1258 start = entry->vme_end;
1259
1260 /*
1261 * In any case, the "entry" always precedes
1262 * the proposed new region throughout the loop:
1263 */
1264
1265 while (TRUE) {
1266 register vm_map_entry_t next;
1267
1268 /*
1269 * Find the end of the proposed new region.
1270 * Be sure we didn't go beyond the end, or
1271 * wrap around the address.
1272 */
1273
1274 if (flags & VM_FLAGS_GUARD_BEFORE) {
1275 /* reserve space for the front guard page */
1276 start += PAGE_SIZE_64;
1277 }
1278 end = ((start + mask) & ~mask);
1279
1280 if (end < start) {
1281 vm_map_entry_dispose(map, new_entry);
1282 vm_map_unlock(map);
1283 return(KERN_NO_SPACE);
1284 }
1285 start = end;
1286 end += size;
1287
1288 if ((end > map->max_offset) || (end < start)) {
1289 vm_map_entry_dispose(map, new_entry);
1290 vm_map_unlock(map);
1291 return(KERN_NO_SPACE);
1292 }
1293
1294 /*
1295 * If there are no more entries, we must win.
1296 */
1297
1298 next = entry->vme_next;
1299 if (next == vm_map_to_entry(map))
1300 break;
1301
1302 /*
1303 * If there is another entry, it must be
1304 * after the end of the potential new region.
1305 */
1306
1307 if (next->vme_start >= end)
1308 break;
1309
1310 /*
1311 * Didn't fit -- move to the next entry.
1312 */
1313
1314 entry = next;
1315 start = entry->vme_end;
1316 }
1317
1318 /*
1319 * At this point,
1320 * "start" and "end" should define the endpoints of the
1321 * available new range, and
1322 * "entry" should refer to the region before the new
1323 * range, and
1324 *
1325 * the map should be locked.
1326 */
1327
1328 if (flags & VM_FLAGS_GUARD_BEFORE) {
1329 /* go back for the front guard page */
1330 start -= PAGE_SIZE_64;
1331 }
1332 *address = start;
1333
1334 new_entry->vme_start = start;
1335 new_entry->vme_end = end;
1336 assert(page_aligned(new_entry->vme_start));
1337 assert(page_aligned(new_entry->vme_end));
1338
1339 new_entry->is_shared = FALSE;
1340 new_entry->is_sub_map = FALSE;
1341 new_entry->use_pmap = FALSE;
1342 new_entry->object.vm_object = VM_OBJECT_NULL;
1343 new_entry->offset = (vm_object_offset_t) 0;
1344
1345 new_entry->needs_copy = FALSE;
1346
1347 new_entry->inheritance = VM_INHERIT_DEFAULT;
1348 new_entry->protection = VM_PROT_DEFAULT;
1349 new_entry->max_protection = VM_PROT_ALL;
1350 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1351 new_entry->wired_count = 0;
1352 new_entry->user_wired_count = 0;
1353
1354 new_entry->in_transition = FALSE;
1355 new_entry->needs_wakeup = FALSE;
1356 new_entry->no_cache = FALSE;
1357
1358 new_entry->alias = 0;
1359
1360 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1361
1362 /*
1363 * Insert the new entry into the list
1364 */
1365
1366 vm_map_entry_link(map, entry, new_entry);
1367
1368 map->size += size;
1369
1370 /*
1371 * Update the lookup hint
1372 */
1373 SAVE_HINT_MAP_WRITE(map, new_entry);
1374
1375 *o_entry = new_entry;
1376 return(KERN_SUCCESS);
1377 }
1378
1379 int vm_map_pmap_enter_print = FALSE;
1380 int vm_map_pmap_enter_enable = FALSE;
1381
1382 /*
1383 * Routine: vm_map_pmap_enter [internal only]
1384 *
1385 * Description:
1386 * Force pages from the specified object to be entered into
1387 * the pmap at the specified address if they are present.
1388 * As soon as a page not found in the object the scan ends.
1389 *
1390 * Returns:
1391 * Nothing.
1392 *
1393 * In/out conditions:
1394 * The source map should not be locked on entry.
1395 */
1396 static void
1397 vm_map_pmap_enter(
1398 vm_map_t map,
1399 register vm_map_offset_t addr,
1400 register vm_map_offset_t end_addr,
1401 register vm_object_t object,
1402 vm_object_offset_t offset,
1403 vm_prot_t protection)
1404 {
1405 int type_of_fault;
1406 kern_return_t kr;
1407
1408 if(map->pmap == 0)
1409 return;
1410
1411 while (addr < end_addr) {
1412 register vm_page_t m;
1413
1414 vm_object_lock(object);
1415
1416 m = vm_page_lookup(object, offset);
1417 /*
1418 * ENCRYPTED SWAP:
1419 * The user should never see encrypted data, so do not
1420 * enter an encrypted page in the page table.
1421 */
1422 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1423 m->fictitious ||
1424 (m->unusual && ( m->error || m->restart || m->absent))) {
1425 vm_object_unlock(object);
1426 return;
1427 }
1428
1429 if (vm_map_pmap_enter_print) {
1430 printf("vm_map_pmap_enter:");
1431 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1432 map, (unsigned long long)addr, object, (unsigned long long)offset);
1433 }
1434 type_of_fault = DBG_CACHE_HIT_FAULT;
1435 kr = vm_fault_enter(m, map->pmap, addr, protection,
1436 m->wire_count != 0, FALSE, FALSE,
1437 &type_of_fault);
1438
1439 vm_object_unlock(object);
1440
1441 offset += PAGE_SIZE_64;
1442 addr += PAGE_SIZE;
1443 }
1444 }
1445
1446 boolean_t vm_map_pmap_is_empty(
1447 vm_map_t map,
1448 vm_map_offset_t start,
1449 vm_map_offset_t end);
1450 boolean_t vm_map_pmap_is_empty(
1451 vm_map_t map,
1452 vm_map_offset_t start,
1453 vm_map_offset_t end)
1454 {
1455 #ifdef MACHINE_PMAP_IS_EMPTY
1456 return pmap_is_empty(map->pmap, start, end);
1457 #else /* MACHINE_PMAP_IS_EMPTY */
1458 vm_map_offset_t offset;
1459 ppnum_t phys_page;
1460
1461 if (map->pmap == NULL) {
1462 return TRUE;
1463 }
1464
1465 for (offset = start;
1466 offset < end;
1467 offset += PAGE_SIZE) {
1468 phys_page = pmap_find_phys(map->pmap, offset);
1469 if (phys_page) {
1470 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1471 "page %d at 0x%llx\n",
1472 map, (long long)start, (long long)end,
1473 phys_page, (long long)offset);
1474 return FALSE;
1475 }
1476 }
1477 return TRUE;
1478 #endif /* MACHINE_PMAP_IS_EMPTY */
1479 }
1480
1481 /*
1482 * Routine: vm_map_enter
1483 *
1484 * Description:
1485 * Allocate a range in the specified virtual address map.
1486 * The resulting range will refer to memory defined by
1487 * the given memory object and offset into that object.
1488 *
1489 * Arguments are as defined in the vm_map call.
1490 */
1491 int _map_enter_debug = 0;
1492 static unsigned int vm_map_enter_restore_successes = 0;
1493 static unsigned int vm_map_enter_restore_failures = 0;
1494 kern_return_t
1495 vm_map_enter(
1496 vm_map_t map,
1497 vm_map_offset_t *address, /* IN/OUT */
1498 vm_map_size_t size,
1499 vm_map_offset_t mask,
1500 int flags,
1501 vm_object_t object,
1502 vm_object_offset_t offset,
1503 boolean_t needs_copy,
1504 vm_prot_t cur_protection,
1505 vm_prot_t max_protection,
1506 vm_inherit_t inheritance)
1507 {
1508 vm_map_entry_t entry, new_entry;
1509 vm_map_offset_t start, tmp_start, tmp_offset;
1510 vm_map_offset_t end, tmp_end;
1511 kern_return_t result = KERN_SUCCESS;
1512 vm_map_t zap_old_map = VM_MAP_NULL;
1513 vm_map_t zap_new_map = VM_MAP_NULL;
1514 boolean_t map_locked = FALSE;
1515 boolean_t pmap_empty = TRUE;
1516 boolean_t new_mapping_established = FALSE;
1517 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1518 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1519 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1520 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1521 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1522 char alias;
1523 vm_map_offset_t effective_min_offset, effective_max_offset;
1524
1525 if (is_submap) {
1526 if (purgable) {
1527 /* submaps can not be purgeable */
1528 return KERN_INVALID_ARGUMENT;
1529 }
1530 if (object == VM_OBJECT_NULL) {
1531 /* submaps can not be created lazily */
1532 return KERN_INVALID_ARGUMENT;
1533 }
1534 }
1535 if (flags & VM_FLAGS_ALREADY) {
1536 /*
1537 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1538 * is already present. For it to be meaningul, the requested
1539 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1540 * we shouldn't try and remove what was mapped there first
1541 * (!VM_FLAGS_OVERWRITE).
1542 */
1543 if ((flags & VM_FLAGS_ANYWHERE) ||
1544 (flags & VM_FLAGS_OVERWRITE)) {
1545 return KERN_INVALID_ARGUMENT;
1546 }
1547 }
1548
1549 effective_min_offset = map->min_offset;
1550 if (flags & VM_FLAGS_BEYOND_MAX) {
1551 /*
1552 * Allow an insertion beyond the map's official top boundary.
1553 */
1554 if (vm_map_is_64bit(map))
1555 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1556 else
1557 effective_max_offset = 0x00000000FFFFF000ULL;
1558 } else {
1559 effective_max_offset = map->max_offset;
1560 }
1561
1562 if (size == 0 ||
1563 (offset & PAGE_MASK_64) != 0) {
1564 *address = 0;
1565 return KERN_INVALID_ARGUMENT;
1566 }
1567
1568 VM_GET_FLAGS_ALIAS(flags, alias);
1569
1570 #define RETURN(value) { result = value; goto BailOut; }
1571
1572 assert(page_aligned(*address));
1573 assert(page_aligned(size));
1574
1575 /*
1576 * Only zero-fill objects are allowed to be purgable.
1577 * LP64todo - limit purgable objects to 32-bits for now
1578 */
1579 if (purgable &&
1580 (offset != 0 ||
1581 (object != VM_OBJECT_NULL &&
1582 (object->size != size ||
1583 object->purgable == VM_PURGABLE_DENY))
1584 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1585 return KERN_INVALID_ARGUMENT;
1586
1587 if (!anywhere && overwrite) {
1588 /*
1589 * Create a temporary VM map to hold the old mappings in the
1590 * affected area while we create the new one.
1591 * This avoids releasing the VM map lock in
1592 * vm_map_entry_delete() and allows atomicity
1593 * when we want to replace some mappings with a new one.
1594 * It also allows us to restore the old VM mappings if the
1595 * new mapping fails.
1596 */
1597 zap_old_map = vm_map_create(PMAP_NULL,
1598 *address,
1599 *address + size,
1600 TRUE);
1601 }
1602
1603 StartAgain: ;
1604
1605 start = *address;
1606
1607 if (anywhere) {
1608 vm_map_lock(map);
1609 map_locked = TRUE;
1610
1611 /*
1612 * Calculate the first possible address.
1613 */
1614
1615 if (start < effective_min_offset)
1616 start = effective_min_offset;
1617 if (start > effective_max_offset)
1618 RETURN(KERN_NO_SPACE);
1619
1620 /*
1621 * Look for the first possible address;
1622 * if there's already something at this
1623 * address, we have to start after it.
1624 */
1625
1626 assert(first_free_is_valid(map));
1627 if (start == effective_min_offset) {
1628 if ((entry = map->first_free) != vm_map_to_entry(map))
1629 start = entry->vme_end;
1630 } else {
1631 vm_map_entry_t tmp_entry;
1632 if (vm_map_lookup_entry(map, start, &tmp_entry))
1633 start = tmp_entry->vme_end;
1634 entry = tmp_entry;
1635 }
1636
1637 /*
1638 * In any case, the "entry" always precedes
1639 * the proposed new region throughout the
1640 * loop:
1641 */
1642
1643 while (TRUE) {
1644 register vm_map_entry_t next;
1645
1646 /*
1647 * Find the end of the proposed new region.
1648 * Be sure we didn't go beyond the end, or
1649 * wrap around the address.
1650 */
1651
1652 end = ((start + mask) & ~mask);
1653 if (end < start)
1654 RETURN(KERN_NO_SPACE);
1655 start = end;
1656 end += size;
1657
1658 if ((end > effective_max_offset) || (end < start)) {
1659 if (map->wait_for_space) {
1660 if (size <= (effective_max_offset -
1661 effective_min_offset)) {
1662 assert_wait((event_t)map,
1663 THREAD_ABORTSAFE);
1664 vm_map_unlock(map);
1665 map_locked = FALSE;
1666 thread_block(THREAD_CONTINUE_NULL);
1667 goto StartAgain;
1668 }
1669 }
1670 RETURN(KERN_NO_SPACE);
1671 }
1672
1673 /*
1674 * If there are no more entries, we must win.
1675 */
1676
1677 next = entry->vme_next;
1678 if (next == vm_map_to_entry(map))
1679 break;
1680
1681 /*
1682 * If there is another entry, it must be
1683 * after the end of the potential new region.
1684 */
1685
1686 if (next->vme_start >= end)
1687 break;
1688
1689 /*
1690 * Didn't fit -- move to the next entry.
1691 */
1692
1693 entry = next;
1694 start = entry->vme_end;
1695 }
1696 *address = start;
1697 } else {
1698 /*
1699 * Verify that:
1700 * the address doesn't itself violate
1701 * the mask requirement.
1702 */
1703
1704 vm_map_lock(map);
1705 map_locked = TRUE;
1706 if ((start & mask) != 0)
1707 RETURN(KERN_NO_SPACE);
1708
1709 /*
1710 * ... the address is within bounds
1711 */
1712
1713 end = start + size;
1714
1715 if ((start < effective_min_offset) ||
1716 (end > effective_max_offset) ||
1717 (start >= end)) {
1718 RETURN(KERN_INVALID_ADDRESS);
1719 }
1720
1721 if (overwrite && zap_old_map != VM_MAP_NULL) {
1722 /*
1723 * Fixed mapping and "overwrite" flag: attempt to
1724 * remove all existing mappings in the specified
1725 * address range, saving them in our "zap_old_map".
1726 */
1727 (void) vm_map_delete(map, start, end,
1728 VM_MAP_REMOVE_SAVE_ENTRIES,
1729 zap_old_map);
1730 }
1731
1732 /*
1733 * ... the starting address isn't allocated
1734 */
1735
1736 if (vm_map_lookup_entry(map, start, &entry)) {
1737 if (! (flags & VM_FLAGS_ALREADY)) {
1738 RETURN(KERN_NO_SPACE);
1739 }
1740 /*
1741 * Check if what's already there is what we want.
1742 */
1743 tmp_start = start;
1744 tmp_offset = offset;
1745 if (entry->vme_start < start) {
1746 tmp_start -= start - entry->vme_start;
1747 tmp_offset -= start - entry->vme_start;
1748
1749 }
1750 for (; entry->vme_start < end;
1751 entry = entry->vme_next) {
1752 if (entry == vm_map_to_entry(map) ||
1753 entry->vme_start != tmp_start ||
1754 entry->is_sub_map != is_submap ||
1755 entry->object.vm_object != object ||
1756 entry->offset != tmp_offset ||
1757 entry->needs_copy != needs_copy ||
1758 entry->protection != cur_protection ||
1759 entry->max_protection != max_protection ||
1760 entry->inheritance != inheritance ||
1761 entry->alias != alias) {
1762 /* not the same mapping ! */
1763 RETURN(KERN_NO_SPACE);
1764 }
1765 tmp_offset += entry->vme_end - entry->vme_start;
1766 tmp_start += entry->vme_end - entry->vme_start;
1767 if (entry->vme_end >= end) {
1768 /* reached the end of our mapping */
1769 break;
1770 }
1771 }
1772 /* it all matches: let's use what's already there ! */
1773 RETURN(KERN_MEMORY_PRESENT);
1774 }
1775
1776 /*
1777 * ... the next region doesn't overlap the
1778 * end point.
1779 */
1780
1781 if ((entry->vme_next != vm_map_to_entry(map)) &&
1782 (entry->vme_next->vme_start < end))
1783 RETURN(KERN_NO_SPACE);
1784 }
1785
1786 /*
1787 * At this point,
1788 * "start" and "end" should define the endpoints of the
1789 * available new range, and
1790 * "entry" should refer to the region before the new
1791 * range, and
1792 *
1793 * the map should be locked.
1794 */
1795
1796 /*
1797 * See whether we can avoid creating a new entry (and object) by
1798 * extending one of our neighbors. [So far, we only attempt to
1799 * extend from below.] Note that we can never extend/join
1800 * purgable objects because they need to remain distinct
1801 * entities in order to implement their "volatile object"
1802 * semantics.
1803 */
1804
1805 if (purgable) {
1806 if (object == VM_OBJECT_NULL) {
1807 object = vm_object_allocate(size);
1808 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1809 object->purgable = VM_PURGABLE_NONVOLATILE;
1810 offset = (vm_object_offset_t)0;
1811 }
1812 } else if ((is_submap == FALSE) &&
1813 (object == VM_OBJECT_NULL) &&
1814 (entry != vm_map_to_entry(map)) &&
1815 (entry->vme_end == start) &&
1816 (!entry->is_shared) &&
1817 (!entry->is_sub_map) &&
1818 (entry->alias == alias) &&
1819 (entry->inheritance == inheritance) &&
1820 (entry->protection == cur_protection) &&
1821 (entry->max_protection == max_protection) &&
1822 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1823 (entry->in_transition == 0) &&
1824 (entry->no_cache == no_cache) &&
1825 ((alias == VM_MEMORY_REALLOC) ||
1826 ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1827 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1828 if (vm_object_coalesce(entry->object.vm_object,
1829 VM_OBJECT_NULL,
1830 entry->offset,
1831 (vm_object_offset_t) 0,
1832 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1833 (vm_map_size_t)(end - entry->vme_end))) {
1834
1835 /*
1836 * Coalesced the two objects - can extend
1837 * the previous map entry to include the
1838 * new range.
1839 */
1840 map->size += (end - entry->vme_end);
1841 entry->vme_end = end;
1842 UPDATE_FIRST_FREE(map, map->first_free);
1843 RETURN(KERN_SUCCESS);
1844 }
1845 }
1846
1847 /*
1848 * Create a new entry
1849 * LP64todo - for now, we can only allocate 4GB internal objects
1850 * because the default pager can't page bigger ones. Remove this
1851 * when it can.
1852 *
1853 * XXX FBDP
1854 * The reserved "page zero" in each process's address space can
1855 * be arbitrarily large. Splitting it into separate 4GB objects and
1856 * therefore different VM map entries serves no purpose and just
1857 * slows down operations on the VM map, so let's not split the
1858 * allocation into 4GB chunks if the max protection is NONE. That
1859 * memory should never be accessible, so it will never get to the
1860 * default pager.
1861 */
1862 tmp_start = start;
1863 if (object == VM_OBJECT_NULL &&
1864 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1865 max_protection != VM_PROT_NONE)
1866 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1867 else
1868 tmp_end = end;
1869 do {
1870 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1871 object, offset, needs_copy,
1872 FALSE, FALSE,
1873 cur_protection, max_protection,
1874 VM_BEHAVIOR_DEFAULT,
1875 inheritance, 0, no_cache);
1876 new_entry->alias = alias;
1877 if (is_submap) {
1878 vm_map_t submap;
1879 boolean_t submap_is_64bit;
1880 boolean_t use_pmap;
1881
1882 new_entry->is_sub_map = TRUE;
1883 submap = (vm_map_t) object;
1884 submap_is_64bit = vm_map_is_64bit(submap);
1885 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1886 #ifndef NO_NESTED_PMAP
1887 if (use_pmap && submap->pmap == NULL) {
1888 /* we need a sub pmap to nest... */
1889 submap->pmap = pmap_create(0, submap_is_64bit);
1890 if (submap->pmap == NULL) {
1891 /* let's proceed without nesting... */
1892 }
1893 }
1894 if (use_pmap && submap->pmap != NULL) {
1895 kern_return_t kr;
1896
1897 kr = pmap_nest(map->pmap,
1898 submap->pmap,
1899 tmp_start,
1900 tmp_start,
1901 tmp_end - tmp_start);
1902 if (kr != KERN_SUCCESS) {
1903 printf("vm_map_enter: "
1904 "pmap_nest(0x%llx,0x%llx) "
1905 "error 0x%x\n",
1906 (long long)tmp_start,
1907 (long long)tmp_end,
1908 kr);
1909 } else {
1910 /* we're now nested ! */
1911 new_entry->use_pmap = TRUE;
1912 pmap_empty = FALSE;
1913 }
1914 }
1915 #endif /* NO_NESTED_PMAP */
1916 }
1917 entry = new_entry;
1918 } while (tmp_end != end &&
1919 (tmp_start = tmp_end) &&
1920 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1921 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1922
1923 vm_map_unlock(map);
1924 map_locked = FALSE;
1925
1926 new_mapping_established = TRUE;
1927
1928 /* Wire down the new entry if the user
1929 * requested all new map entries be wired.
1930 */
1931 if (map->wiring_required) {
1932 pmap_empty = FALSE; /* pmap won't be empty */
1933 result = vm_map_wire(map, start, end,
1934 new_entry->protection, TRUE);
1935 RETURN(result);
1936 }
1937
1938 if ((object != VM_OBJECT_NULL) &&
1939 (vm_map_pmap_enter_enable) &&
1940 (!anywhere) &&
1941 (!needs_copy) &&
1942 (size < (128*1024))) {
1943 pmap_empty = FALSE; /* pmap won't be empty */
1944
1945 if (override_nx(map, alias) && cur_protection)
1946 cur_protection |= VM_PROT_EXECUTE;
1947
1948 vm_map_pmap_enter(map, start, end,
1949 object, offset, cur_protection);
1950 }
1951
1952 BailOut: ;
1953 if (result == KERN_SUCCESS &&
1954 pmap_empty &&
1955 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
1956 assert(vm_map_pmap_is_empty(map, *address, *address+size));
1957 }
1958
1959 if (result != KERN_SUCCESS) {
1960 if (new_mapping_established) {
1961 /*
1962 * We have to get rid of the new mappings since we
1963 * won't make them available to the user.
1964 * Try and do that atomically, to minimize the risk
1965 * that someone else create new mappings that range.
1966 */
1967 zap_new_map = vm_map_create(PMAP_NULL,
1968 *address,
1969 *address + size,
1970 TRUE);
1971 if (!map_locked) {
1972 vm_map_lock(map);
1973 map_locked = TRUE;
1974 }
1975 (void) vm_map_delete(map, *address, *address+size,
1976 VM_MAP_REMOVE_SAVE_ENTRIES,
1977 zap_new_map);
1978 }
1979 if (zap_old_map != VM_MAP_NULL &&
1980 zap_old_map->hdr.nentries != 0) {
1981 vm_map_entry_t entry1, entry2;
1982
1983 /*
1984 * The new mapping failed. Attempt to restore
1985 * the old mappings, saved in the "zap_old_map".
1986 */
1987 if (!map_locked) {
1988 vm_map_lock(map);
1989 map_locked = TRUE;
1990 }
1991
1992 /* first check if the coast is still clear */
1993 start = vm_map_first_entry(zap_old_map)->vme_start;
1994 end = vm_map_last_entry(zap_old_map)->vme_end;
1995 if (vm_map_lookup_entry(map, start, &entry1) ||
1996 vm_map_lookup_entry(map, end, &entry2) ||
1997 entry1 != entry2) {
1998 /*
1999 * Part of that range has already been
2000 * re-mapped: we can't restore the old
2001 * mappings...
2002 */
2003 vm_map_enter_restore_failures++;
2004 } else {
2005 /*
2006 * Transfer the saved map entries from
2007 * "zap_old_map" to the original "map",
2008 * inserting them all after "entry1".
2009 */
2010 for (entry2 = vm_map_first_entry(zap_old_map);
2011 entry2 != vm_map_to_entry(zap_old_map);
2012 entry2 = vm_map_first_entry(zap_old_map)) {
2013 vm_map_size_t entry_size;
2014
2015 entry_size = (entry2->vme_end -
2016 entry2->vme_start);
2017 vm_map_entry_unlink(zap_old_map,
2018 entry2);
2019 zap_old_map->size -= entry_size;
2020 vm_map_entry_link(map, entry1, entry2);
2021 map->size += entry_size;
2022 entry1 = entry2;
2023 }
2024 if (map->wiring_required) {
2025 /*
2026 * XXX TODO: we should rewire the
2027 * old pages here...
2028 */
2029 }
2030 vm_map_enter_restore_successes++;
2031 }
2032 }
2033 }
2034
2035 if (map_locked) {
2036 vm_map_unlock(map);
2037 }
2038
2039 /*
2040 * Get rid of the "zap_maps" and all the map entries that
2041 * they may still contain.
2042 */
2043 if (zap_old_map != VM_MAP_NULL) {
2044 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2045 zap_old_map = VM_MAP_NULL;
2046 }
2047 if (zap_new_map != VM_MAP_NULL) {
2048 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2049 zap_new_map = VM_MAP_NULL;
2050 }
2051
2052 return result;
2053
2054 #undef RETURN
2055 }
2056
2057 kern_return_t
2058 vm_map_enter_mem_object(
2059 vm_map_t target_map,
2060 vm_map_offset_t *address,
2061 vm_map_size_t initial_size,
2062 vm_map_offset_t mask,
2063 int flags,
2064 ipc_port_t port,
2065 vm_object_offset_t offset,
2066 boolean_t copy,
2067 vm_prot_t cur_protection,
2068 vm_prot_t max_protection,
2069 vm_inherit_t inheritance)
2070 {
2071 vm_map_address_t map_addr;
2072 vm_map_size_t map_size;
2073 vm_object_t object;
2074 vm_object_size_t size;
2075 kern_return_t result;
2076
2077 /*
2078 * Check arguments for validity
2079 */
2080 if ((target_map == VM_MAP_NULL) ||
2081 (cur_protection & ~VM_PROT_ALL) ||
2082 (max_protection & ~VM_PROT_ALL) ||
2083 (inheritance > VM_INHERIT_LAST_VALID) ||
2084 initial_size == 0)
2085 return KERN_INVALID_ARGUMENT;
2086
2087 map_addr = vm_map_trunc_page(*address);
2088 map_size = vm_map_round_page(initial_size);
2089 size = vm_object_round_page(initial_size);
2090
2091 /*
2092 * Find the vm object (if any) corresponding to this port.
2093 */
2094 if (!IP_VALID(port)) {
2095 object = VM_OBJECT_NULL;
2096 offset = 0;
2097 copy = FALSE;
2098 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2099 vm_named_entry_t named_entry;
2100
2101 named_entry = (vm_named_entry_t) port->ip_kobject;
2102 /* a few checks to make sure user is obeying rules */
2103 if (size == 0) {
2104 if (offset >= named_entry->size)
2105 return KERN_INVALID_RIGHT;
2106 size = named_entry->size - offset;
2107 }
2108 if ((named_entry->protection & max_protection) !=
2109 max_protection)
2110 return KERN_INVALID_RIGHT;
2111 if ((named_entry->protection & cur_protection) !=
2112 cur_protection)
2113 return KERN_INVALID_RIGHT;
2114 if (named_entry->size < (offset + size))
2115 return KERN_INVALID_ARGUMENT;
2116
2117 /* the callers parameter offset is defined to be the */
2118 /* offset from beginning of named entry offset in object */
2119 offset = offset + named_entry->offset;
2120
2121 named_entry_lock(named_entry);
2122 if (named_entry->is_sub_map) {
2123 vm_map_t submap;
2124
2125 submap = named_entry->backing.map;
2126 vm_map_lock(submap);
2127 vm_map_reference(submap);
2128 vm_map_unlock(submap);
2129 named_entry_unlock(named_entry);
2130
2131 result = vm_map_enter(target_map,
2132 &map_addr,
2133 map_size,
2134 mask,
2135 flags | VM_FLAGS_SUBMAP,
2136 (vm_object_t) submap,
2137 offset,
2138 copy,
2139 cur_protection,
2140 max_protection,
2141 inheritance);
2142 if (result != KERN_SUCCESS) {
2143 vm_map_deallocate(submap);
2144 } else {
2145 /*
2146 * No need to lock "submap" just to check its
2147 * "mapped" flag: that flag is never reset
2148 * once it's been set and if we race, we'll
2149 * just end up setting it twice, which is OK.
2150 */
2151 if (submap->mapped == FALSE) {
2152 /*
2153 * This submap has never been mapped.
2154 * Set its "mapped" flag now that it
2155 * has been mapped.
2156 * This happens only for the first ever
2157 * mapping of a "submap".
2158 */
2159 vm_map_lock(submap);
2160 submap->mapped = TRUE;
2161 vm_map_unlock(submap);
2162 }
2163 *address = map_addr;
2164 }
2165 return result;
2166
2167 } else if (named_entry->is_pager) {
2168 unsigned int access;
2169 vm_prot_t protections;
2170 unsigned int wimg_mode;
2171 boolean_t cache_attr;
2172
2173 protections = named_entry->protection & VM_PROT_ALL;
2174 access = GET_MAP_MEM(named_entry->protection);
2175
2176 object = vm_object_enter(named_entry->backing.pager,
2177 named_entry->size,
2178 named_entry->internal,
2179 FALSE,
2180 FALSE);
2181 if (object == VM_OBJECT_NULL) {
2182 named_entry_unlock(named_entry);
2183 return KERN_INVALID_OBJECT;
2184 }
2185
2186 /* JMM - drop reference on pager here */
2187
2188 /* create an extra ref for the named entry */
2189 vm_object_lock(object);
2190 vm_object_reference_locked(object);
2191 named_entry->backing.object = object;
2192 named_entry->is_pager = FALSE;
2193 named_entry_unlock(named_entry);
2194
2195 wimg_mode = object->wimg_bits;
2196 if (access == MAP_MEM_IO) {
2197 wimg_mode = VM_WIMG_IO;
2198 } else if (access == MAP_MEM_COPYBACK) {
2199 wimg_mode = VM_WIMG_USE_DEFAULT;
2200 } else if (access == MAP_MEM_WTHRU) {
2201 wimg_mode = VM_WIMG_WTHRU;
2202 } else if (access == MAP_MEM_WCOMB) {
2203 wimg_mode = VM_WIMG_WCOMB;
2204 }
2205 if (wimg_mode == VM_WIMG_IO ||
2206 wimg_mode == VM_WIMG_WCOMB)
2207 cache_attr = TRUE;
2208 else
2209 cache_attr = FALSE;
2210
2211 /* wait for object (if any) to be ready */
2212 if (!named_entry->internal) {
2213 while (!object->pager_ready) {
2214 vm_object_wait(
2215 object,
2216 VM_OBJECT_EVENT_PAGER_READY,
2217 THREAD_UNINT);
2218 vm_object_lock(object);
2219 }
2220 }
2221
2222 if (object->wimg_bits != wimg_mode) {
2223 vm_page_t p;
2224
2225 vm_object_paging_wait(object, THREAD_UNINT);
2226
2227 object->wimg_bits = wimg_mode;
2228 queue_iterate(&object->memq, p, vm_page_t, listq) {
2229 if (!p->fictitious) {
2230 if (p->pmapped)
2231 pmap_disconnect(p->phys_page);
2232 if (cache_attr)
2233 pmap_sync_page_attributes_phys(p->phys_page);
2234 }
2235 }
2236 }
2237 object->true_share = TRUE;
2238 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2239 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2240 vm_object_unlock(object);
2241 } else {
2242 /* This is the case where we are going to map */
2243 /* an already mapped object. If the object is */
2244 /* not ready it is internal. An external */
2245 /* object cannot be mapped until it is ready */
2246 /* we can therefore avoid the ready check */
2247 /* in this case. */
2248 object = named_entry->backing.object;
2249 assert(object != VM_OBJECT_NULL);
2250 named_entry_unlock(named_entry);
2251 vm_object_reference(object);
2252 }
2253 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2254 /*
2255 * JMM - This is temporary until we unify named entries
2256 * and raw memory objects.
2257 *
2258 * Detected fake ip_kotype for a memory object. In
2259 * this case, the port isn't really a port at all, but
2260 * instead is just a raw memory object.
2261 */
2262
2263 object = vm_object_enter((memory_object_t)port,
2264 size, FALSE, FALSE, FALSE);
2265 if (object == VM_OBJECT_NULL)
2266 return KERN_INVALID_OBJECT;
2267
2268 /* wait for object (if any) to be ready */
2269 if (object != VM_OBJECT_NULL) {
2270 if (object == kernel_object) {
2271 printf("Warning: Attempt to map kernel object"
2272 " by a non-private kernel entity\n");
2273 return KERN_INVALID_OBJECT;
2274 }
2275 vm_object_lock(object);
2276 while (!object->pager_ready) {
2277 vm_object_wait(object,
2278 VM_OBJECT_EVENT_PAGER_READY,
2279 THREAD_UNINT);
2280 vm_object_lock(object);
2281 }
2282 vm_object_unlock(object);
2283 }
2284 } else {
2285 return KERN_INVALID_OBJECT;
2286 }
2287
2288 /*
2289 * Perform the copy if requested
2290 */
2291
2292 if (copy) {
2293 vm_object_t new_object;
2294 vm_object_offset_t new_offset;
2295
2296 result = vm_object_copy_strategically(object, offset, size,
2297 &new_object, &new_offset,
2298 &copy);
2299
2300
2301 if (result == KERN_MEMORY_RESTART_COPY) {
2302 boolean_t success;
2303 boolean_t src_needs_copy;
2304
2305 /*
2306 * XXX
2307 * We currently ignore src_needs_copy.
2308 * This really is the issue of how to make
2309 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2310 * non-kernel users to use. Solution forthcoming.
2311 * In the meantime, since we don't allow non-kernel
2312 * memory managers to specify symmetric copy,
2313 * we won't run into problems here.
2314 */
2315 new_object = object;
2316 new_offset = offset;
2317 success = vm_object_copy_quickly(&new_object,
2318 new_offset, size,
2319 &src_needs_copy,
2320 &copy);
2321 assert(success);
2322 result = KERN_SUCCESS;
2323 }
2324 /*
2325 * Throw away the reference to the
2326 * original object, as it won't be mapped.
2327 */
2328
2329 vm_object_deallocate(object);
2330
2331 if (result != KERN_SUCCESS)
2332 return result;
2333
2334 object = new_object;
2335 offset = new_offset;
2336 }
2337
2338 result = vm_map_enter(target_map,
2339 &map_addr, map_size,
2340 (vm_map_offset_t)mask,
2341 flags,
2342 object, offset,
2343 copy,
2344 cur_protection, max_protection, inheritance);
2345 if (result != KERN_SUCCESS)
2346 vm_object_deallocate(object);
2347 *address = map_addr;
2348 return result;
2349 }
2350
2351 #if VM_CPM
2352
2353 #ifdef MACH_ASSERT
2354 extern pmap_paddr_t avail_start, avail_end;
2355 #endif
2356
2357 /*
2358 * Allocate memory in the specified map, with the caveat that
2359 * the memory is physically contiguous. This call may fail
2360 * if the system can't find sufficient contiguous memory.
2361 * This call may cause or lead to heart-stopping amounts of
2362 * paging activity.
2363 *
2364 * Memory obtained from this call should be freed in the
2365 * normal way, viz., via vm_deallocate.
2366 */
2367 kern_return_t
2368 vm_map_enter_cpm(
2369 vm_map_t map,
2370 vm_map_offset_t *addr,
2371 vm_map_size_t size,
2372 int flags)
2373 {
2374 vm_object_t cpm_obj;
2375 pmap_t pmap;
2376 vm_page_t m, pages;
2377 kern_return_t kr;
2378 vm_map_offset_t va, start, end, offset;
2379 #if MACH_ASSERT
2380 vm_map_offset_t prev_addr;
2381 #endif /* MACH_ASSERT */
2382
2383 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2384
2385 if (!vm_allocate_cpm_enabled)
2386 return KERN_FAILURE;
2387
2388 if (size == 0) {
2389 *addr = 0;
2390 return KERN_SUCCESS;
2391 }
2392 if (anywhere)
2393 *addr = vm_map_min(map);
2394 else
2395 *addr = vm_map_trunc_page(*addr);
2396 size = vm_map_round_page(size);
2397
2398 /*
2399 * LP64todo - cpm_allocate should probably allow
2400 * allocations of >4GB, but not with the current
2401 * algorithm, so just cast down the size for now.
2402 */
2403 if (size > VM_MAX_ADDRESS)
2404 return KERN_RESOURCE_SHORTAGE;
2405 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2406 &pages, 0, TRUE)) != KERN_SUCCESS)
2407 return kr;
2408
2409 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2410 assert(cpm_obj != VM_OBJECT_NULL);
2411 assert(cpm_obj->internal);
2412 assert(cpm_obj->size == (vm_object_size_t)size);
2413 assert(cpm_obj->can_persist == FALSE);
2414 assert(cpm_obj->pager_created == FALSE);
2415 assert(cpm_obj->pageout == FALSE);
2416 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2417
2418 /*
2419 * Insert pages into object.
2420 */
2421
2422 vm_object_lock(cpm_obj);
2423 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2424 m = pages;
2425 pages = NEXT_PAGE(m);
2426 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2427
2428 assert(!m->gobbled);
2429 assert(!m->wanted);
2430 assert(!m->pageout);
2431 assert(!m->tabled);
2432 assert(m->wire_count);
2433 /*
2434 * ENCRYPTED SWAP:
2435 * "m" is not supposed to be pageable, so it
2436 * should not be encrypted. It wouldn't be safe
2437 * to enter it in a new VM object while encrypted.
2438 */
2439 ASSERT_PAGE_DECRYPTED(m);
2440 assert(m->busy);
2441 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2442
2443 m->busy = FALSE;
2444 vm_page_insert(m, cpm_obj, offset);
2445 }
2446 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2447 vm_object_unlock(cpm_obj);
2448
2449 /*
2450 * Hang onto a reference on the object in case a
2451 * multi-threaded application for some reason decides
2452 * to deallocate the portion of the address space into
2453 * which we will insert this object.
2454 *
2455 * Unfortunately, we must insert the object now before
2456 * we can talk to the pmap module about which addresses
2457 * must be wired down. Hence, the race with a multi-
2458 * threaded app.
2459 */
2460 vm_object_reference(cpm_obj);
2461
2462 /*
2463 * Insert object into map.
2464 */
2465
2466 kr = vm_map_enter(
2467 map,
2468 addr,
2469 size,
2470 (vm_map_offset_t)0,
2471 flags,
2472 cpm_obj,
2473 (vm_object_offset_t)0,
2474 FALSE,
2475 VM_PROT_ALL,
2476 VM_PROT_ALL,
2477 VM_INHERIT_DEFAULT);
2478
2479 if (kr != KERN_SUCCESS) {
2480 /*
2481 * A CPM object doesn't have can_persist set,
2482 * so all we have to do is deallocate it to
2483 * free up these pages.
2484 */
2485 assert(cpm_obj->pager_created == FALSE);
2486 assert(cpm_obj->can_persist == FALSE);
2487 assert(cpm_obj->pageout == FALSE);
2488 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2489 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2490 vm_object_deallocate(cpm_obj); /* kill creation ref */
2491 }
2492
2493 /*
2494 * Inform the physical mapping system that the
2495 * range of addresses may not fault, so that
2496 * page tables and such can be locked down as well.
2497 */
2498 start = *addr;
2499 end = start + size;
2500 pmap = vm_map_pmap(map);
2501 pmap_pageable(pmap, start, end, FALSE);
2502
2503 /*
2504 * Enter each page into the pmap, to avoid faults.
2505 * Note that this loop could be coded more efficiently,
2506 * if the need arose, rather than looking up each page
2507 * again.
2508 */
2509 for (offset = 0, va = start; offset < size;
2510 va += PAGE_SIZE, offset += PAGE_SIZE) {
2511 int type_of_fault;
2512
2513 vm_object_lock(cpm_obj);
2514 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2515 assert(m != VM_PAGE_NULL);
2516
2517 vm_page_zero_fill(m);
2518
2519 type_of_fault = DBG_ZERO_FILL_FAULT;
2520
2521 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2522 m->wire_count != 0, FALSE, FALSE,
2523 &type_of_fault);
2524
2525 vm_object_unlock(cpm_obj);
2526 }
2527
2528 #if MACH_ASSERT
2529 /*
2530 * Verify ordering in address space.
2531 */
2532 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2533 vm_object_lock(cpm_obj);
2534 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2535 vm_object_unlock(cpm_obj);
2536 if (m == VM_PAGE_NULL)
2537 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2538 cpm_obj, offset);
2539 assert(m->tabled);
2540 assert(!m->busy);
2541 assert(!m->wanted);
2542 assert(!m->fictitious);
2543 assert(!m->private);
2544 assert(!m->absent);
2545 assert(!m->error);
2546 assert(!m->cleaning);
2547 assert(!m->precious);
2548 assert(!m->clustered);
2549 if (offset != 0) {
2550 if (m->phys_page != prev_addr + 1) {
2551 printf("start 0x%x end 0x%x va 0x%x\n",
2552 start, end, va);
2553 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2554 printf("m 0x%x prev_address 0x%x\n", m,
2555 prev_addr);
2556 panic("vm_allocate_cpm: pages not contig!");
2557 }
2558 }
2559 prev_addr = m->phys_page;
2560 }
2561 #endif /* MACH_ASSERT */
2562
2563 vm_object_deallocate(cpm_obj); /* kill extra ref */
2564
2565 return kr;
2566 }
2567
2568
2569 #else /* VM_CPM */
2570
2571 /*
2572 * Interface is defined in all cases, but unless the kernel
2573 * is built explicitly for this option, the interface does
2574 * nothing.
2575 */
2576
2577 kern_return_t
2578 vm_map_enter_cpm(
2579 __unused vm_map_t map,
2580 __unused vm_map_offset_t *addr,
2581 __unused vm_map_size_t size,
2582 __unused int flags)
2583 {
2584 return KERN_FAILURE;
2585 }
2586 #endif /* VM_CPM */
2587
2588 /*
2589 * Clip and unnest a portion of a nested submap mapping.
2590 */
2591 static void
2592 vm_map_clip_unnest(
2593 vm_map_t map,
2594 vm_map_entry_t entry,
2595 vm_map_offset_t start_unnest,
2596 vm_map_offset_t end_unnest)
2597 {
2598 assert(entry->is_sub_map);
2599 assert(entry->object.sub_map != NULL);
2600
2601 if (entry->vme_start > start_unnest ||
2602 entry->vme_end < end_unnest) {
2603 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2604 "bad nested entry: start=0x%llx end=0x%llx\n",
2605 (long long)start_unnest, (long long)end_unnest,
2606 (long long)entry->vme_start, (long long)entry->vme_end);
2607 }
2608 if (start_unnest > entry->vme_start) {
2609 _vm_map_clip_start(&map->hdr,
2610 entry,
2611 start_unnest);
2612 UPDATE_FIRST_FREE(map, map->first_free);
2613 }
2614 if (entry->vme_end > end_unnest) {
2615 _vm_map_clip_end(&map->hdr,
2616 entry,
2617 end_unnest);
2618 UPDATE_FIRST_FREE(map, map->first_free);
2619 }
2620
2621 pmap_unnest(map->pmap,
2622 entry->vme_start,
2623 entry->vme_end - entry->vme_start);
2624 if ((map->mapped) && (map->ref_count)) {
2625 /* clean up parent map/maps */
2626 vm_map_submap_pmap_clean(
2627 map, entry->vme_start,
2628 entry->vme_end,
2629 entry->object.sub_map,
2630 entry->offset);
2631 }
2632 entry->use_pmap = FALSE;
2633 }
2634
2635 /*
2636 * vm_map_clip_start: [ internal use only ]
2637 *
2638 * Asserts that the given entry begins at or after
2639 * the specified address; if necessary,
2640 * it splits the entry into two.
2641 */
2642 static void
2643 vm_map_clip_start(
2644 vm_map_t map,
2645 vm_map_entry_t entry,
2646 vm_map_offset_t startaddr)
2647 {
2648 #ifndef NO_NESTED_PMAP
2649 if (entry->use_pmap &&
2650 startaddr >= entry->vme_start) {
2651 vm_map_offset_t start_unnest, end_unnest;
2652
2653 /*
2654 * Make sure "startaddr" is no longer in a nested range
2655 * before we clip. Unnest only the minimum range the platform
2656 * can handle.
2657 */
2658 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2659 end_unnest = start_unnest + pmap_nesting_size_min;
2660 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2661 }
2662 #endif /* NO_NESTED_PMAP */
2663 if (startaddr > entry->vme_start) {
2664 if (entry->object.vm_object &&
2665 !entry->is_sub_map &&
2666 entry->object.vm_object->phys_contiguous) {
2667 pmap_remove(map->pmap,
2668 (addr64_t)(entry->vme_start),
2669 (addr64_t)(entry->vme_end));
2670 }
2671 _vm_map_clip_start(&map->hdr, entry, startaddr);
2672 UPDATE_FIRST_FREE(map, map->first_free);
2673 }
2674 }
2675
2676
2677 #define vm_map_copy_clip_start(copy, entry, startaddr) \
2678 MACRO_BEGIN \
2679 if ((startaddr) > (entry)->vme_start) \
2680 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2681 MACRO_END
2682
2683 /*
2684 * This routine is called only when it is known that
2685 * the entry must be split.
2686 */
2687 static void
2688 _vm_map_clip_start(
2689 register struct vm_map_header *map_header,
2690 register vm_map_entry_t entry,
2691 register vm_map_offset_t start)
2692 {
2693 register vm_map_entry_t new_entry;
2694
2695 /*
2696 * Split off the front portion --
2697 * note that we must insert the new
2698 * entry BEFORE this one, so that
2699 * this entry has the specified starting
2700 * address.
2701 */
2702
2703 new_entry = _vm_map_entry_create(map_header);
2704 vm_map_entry_copy_full(new_entry, entry);
2705
2706 new_entry->vme_end = start;
2707 entry->offset += (start - entry->vme_start);
2708 entry->vme_start = start;
2709
2710 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2711
2712 if (entry->is_sub_map)
2713 vm_map_reference(new_entry->object.sub_map);
2714 else
2715 vm_object_reference(new_entry->object.vm_object);
2716 }
2717
2718
2719 /*
2720 * vm_map_clip_end: [ internal use only ]
2721 *
2722 * Asserts that the given entry ends at or before
2723 * the specified address; if necessary,
2724 * it splits the entry into two.
2725 */
2726 static void
2727 vm_map_clip_end(
2728 vm_map_t map,
2729 vm_map_entry_t entry,
2730 vm_map_offset_t endaddr)
2731 {
2732 if (endaddr > entry->vme_end) {
2733 /*
2734 * Within the scope of this clipping, limit "endaddr" to
2735 * the end of this map entry...
2736 */
2737 endaddr = entry->vme_end;
2738 }
2739 #ifndef NO_NESTED_PMAP
2740 if (entry->use_pmap) {
2741 vm_map_offset_t start_unnest, end_unnest;
2742
2743 /*
2744 * Make sure the range between the start of this entry and
2745 * the new "endaddr" is no longer nested before we clip.
2746 * Unnest only the minimum range the platform can handle.
2747 */
2748 start_unnest = entry->vme_start;
2749 end_unnest =
2750 (endaddr + pmap_nesting_size_min - 1) &
2751 ~(pmap_nesting_size_min - 1);
2752 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2753 }
2754 #endif /* NO_NESTED_PMAP */
2755 if (endaddr < entry->vme_end) {
2756 if (entry->object.vm_object &&
2757 !entry->is_sub_map &&
2758 entry->object.vm_object->phys_contiguous) {
2759 pmap_remove(map->pmap,
2760 (addr64_t)(entry->vme_start),
2761 (addr64_t)(entry->vme_end));
2762 }
2763 _vm_map_clip_end(&map->hdr, entry, endaddr);
2764 UPDATE_FIRST_FREE(map, map->first_free);
2765 }
2766 }
2767
2768
2769 #define vm_map_copy_clip_end(copy, entry, endaddr) \
2770 MACRO_BEGIN \
2771 if ((endaddr) < (entry)->vme_end) \
2772 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2773 MACRO_END
2774
2775 /*
2776 * This routine is called only when it is known that
2777 * the entry must be split.
2778 */
2779 static void
2780 _vm_map_clip_end(
2781 register struct vm_map_header *map_header,
2782 register vm_map_entry_t entry,
2783 register vm_map_offset_t end)
2784 {
2785 register vm_map_entry_t new_entry;
2786
2787 /*
2788 * Create a new entry and insert it
2789 * AFTER the specified entry
2790 */
2791
2792 new_entry = _vm_map_entry_create(map_header);
2793 vm_map_entry_copy_full(new_entry, entry);
2794
2795 new_entry->vme_start = entry->vme_end = end;
2796 new_entry->offset += (end - entry->vme_start);
2797
2798 _vm_map_entry_link(map_header, entry, new_entry);
2799
2800 if (entry->is_sub_map)
2801 vm_map_reference(new_entry->object.sub_map);
2802 else
2803 vm_object_reference(new_entry->object.vm_object);
2804 }
2805
2806
2807 /*
2808 * VM_MAP_RANGE_CHECK: [ internal use only ]
2809 *
2810 * Asserts that the starting and ending region
2811 * addresses fall within the valid range of the map.
2812 */
2813 #define VM_MAP_RANGE_CHECK(map, start, end) \
2814 MACRO_BEGIN \
2815 if (start < vm_map_min(map)) \
2816 start = vm_map_min(map); \
2817 if (end > vm_map_max(map)) \
2818 end = vm_map_max(map); \
2819 if (start > end) \
2820 start = end; \
2821 MACRO_END
2822
2823 /*
2824 * vm_map_range_check: [ internal use only ]
2825 *
2826 * Check that the region defined by the specified start and
2827 * end addresses are wholly contained within a single map
2828 * entry or set of adjacent map entries of the spacified map,
2829 * i.e. the specified region contains no unmapped space.
2830 * If any or all of the region is unmapped, FALSE is returned.
2831 * Otherwise, TRUE is returned and if the output argument 'entry'
2832 * is not NULL it points to the map entry containing the start
2833 * of the region.
2834 *
2835 * The map is locked for reading on entry and is left locked.
2836 */
2837 static boolean_t
2838 vm_map_range_check(
2839 register vm_map_t map,
2840 register vm_map_offset_t start,
2841 register vm_map_offset_t end,
2842 vm_map_entry_t *entry)
2843 {
2844 vm_map_entry_t cur;
2845 register vm_map_offset_t prev;
2846
2847 /*
2848 * Basic sanity checks first
2849 */
2850 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
2851 return (FALSE);
2852
2853 /*
2854 * Check first if the region starts within a valid
2855 * mapping for the map.
2856 */
2857 if (!vm_map_lookup_entry(map, start, &cur))
2858 return (FALSE);
2859
2860 /*
2861 * Optimize for the case that the region is contained
2862 * in a single map entry.
2863 */
2864 if (entry != (vm_map_entry_t *) NULL)
2865 *entry = cur;
2866 if (end <= cur->vme_end)
2867 return (TRUE);
2868
2869 /*
2870 * If the region is not wholly contained within a
2871 * single entry, walk the entries looking for holes.
2872 */
2873 prev = cur->vme_end;
2874 cur = cur->vme_next;
2875 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
2876 if (end <= cur->vme_end)
2877 return (TRUE);
2878 prev = cur->vme_end;
2879 cur = cur->vme_next;
2880 }
2881 return (FALSE);
2882 }
2883
2884 /*
2885 * vm_map_submap: [ kernel use only ]
2886 *
2887 * Mark the given range as handled by a subordinate map.
2888 *
2889 * This range must have been created with vm_map_find using
2890 * the vm_submap_object, and no other operations may have been
2891 * performed on this range prior to calling vm_map_submap.
2892 *
2893 * Only a limited number of operations can be performed
2894 * within this rage after calling vm_map_submap:
2895 * vm_fault
2896 * [Don't try vm_map_copyin!]
2897 *
2898 * To remove a submapping, one must first remove the
2899 * range from the superior map, and then destroy the
2900 * submap (if desired). [Better yet, don't try it.]
2901 */
2902 kern_return_t
2903 vm_map_submap(
2904 vm_map_t map,
2905 vm_map_offset_t start,
2906 vm_map_offset_t end,
2907 vm_map_t submap,
2908 vm_map_offset_t offset,
2909 #ifdef NO_NESTED_PMAP
2910 __unused
2911 #endif /* NO_NESTED_PMAP */
2912 boolean_t use_pmap)
2913 {
2914 vm_map_entry_t entry;
2915 register kern_return_t result = KERN_INVALID_ARGUMENT;
2916 register vm_object_t object;
2917
2918 vm_map_lock(map);
2919
2920 if (! vm_map_lookup_entry(map, start, &entry)) {
2921 entry = entry->vme_next;
2922 }
2923
2924 if (entry == vm_map_to_entry(map) ||
2925 entry->is_sub_map) {
2926 vm_map_unlock(map);
2927 return KERN_INVALID_ARGUMENT;
2928 }
2929
2930 assert(!entry->use_pmap); /* we don't want to unnest anything here */
2931 vm_map_clip_start(map, entry, start);
2932 vm_map_clip_end(map, entry, end);
2933
2934 if ((entry->vme_start == start) && (entry->vme_end == end) &&
2935 (!entry->is_sub_map) &&
2936 ((object = entry->object.vm_object) == vm_submap_object) &&
2937 (object->resident_page_count == 0) &&
2938 (object->copy == VM_OBJECT_NULL) &&
2939 (object->shadow == VM_OBJECT_NULL) &&
2940 (!object->pager_created)) {
2941 entry->offset = (vm_object_offset_t)offset;
2942 entry->object.vm_object = VM_OBJECT_NULL;
2943 vm_object_deallocate(object);
2944 entry->is_sub_map = TRUE;
2945 entry->object.sub_map = submap;
2946 vm_map_reference(submap);
2947 submap->mapped = TRUE;
2948
2949 #ifndef NO_NESTED_PMAP
2950 if (use_pmap) {
2951 /* nest if platform code will allow */
2952 if(submap->pmap == NULL) {
2953 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
2954 if(submap->pmap == PMAP_NULL) {
2955 vm_map_unlock(map);
2956 return(KERN_NO_SPACE);
2957 }
2958 }
2959 result = pmap_nest(map->pmap,
2960 (entry->object.sub_map)->pmap,
2961 (addr64_t)start,
2962 (addr64_t)start,
2963 (uint64_t)(end - start));
2964 if(result)
2965 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
2966 entry->use_pmap = TRUE;
2967 }
2968 #else /* NO_NESTED_PMAP */
2969 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
2970 #endif /* NO_NESTED_PMAP */
2971 result = KERN_SUCCESS;
2972 }
2973 vm_map_unlock(map);
2974
2975 return(result);
2976 }
2977
2978 /*
2979 * vm_map_protect:
2980 *
2981 * Sets the protection of the specified address
2982 * region in the target map. If "set_max" is
2983 * specified, the maximum protection is to be set;
2984 * otherwise, only the current protection is affected.
2985 */
2986 kern_return_t
2987 vm_map_protect(
2988 register vm_map_t map,
2989 register vm_map_offset_t start,
2990 register vm_map_offset_t end,
2991 register vm_prot_t new_prot,
2992 register boolean_t set_max)
2993 {
2994 register vm_map_entry_t current;
2995 register vm_map_offset_t prev;
2996 vm_map_entry_t entry;
2997 vm_prot_t new_max;
2998
2999 XPR(XPR_VM_MAP,
3000 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3001 (integer_t)map, start, end, new_prot, set_max);
3002
3003 vm_map_lock(map);
3004
3005 /* LP64todo - remove this check when vm_map_commpage64()
3006 * no longer has to stuff in a map_entry for the commpage
3007 * above the map's max_offset.
3008 */
3009 if (start >= map->max_offset) {
3010 vm_map_unlock(map);
3011 return(KERN_INVALID_ADDRESS);
3012 }
3013
3014 /*
3015 * Lookup the entry. If it doesn't start in a valid
3016 * entry, return an error.
3017 */
3018 if (! vm_map_lookup_entry(map, start, &entry)) {
3019 vm_map_unlock(map);
3020 return(KERN_INVALID_ADDRESS);
3021 }
3022
3023 /*
3024 * Make a first pass to check for protection and address
3025 * violations.
3026 */
3027
3028 current = entry;
3029 prev = current->vme_start;
3030 while ((current != vm_map_to_entry(map)) &&
3031 (current->vme_start < end)) {
3032
3033 /*
3034 * If there is a hole, return an error.
3035 */
3036 if (current->vme_start != prev) {
3037 vm_map_unlock(map);
3038 return(KERN_INVALID_ADDRESS);
3039 }
3040
3041 new_max = current->max_protection;
3042 if(new_prot & VM_PROT_COPY) {
3043 new_max |= VM_PROT_WRITE;
3044 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3045 vm_map_unlock(map);
3046 return(KERN_PROTECTION_FAILURE);
3047 }
3048 } else {
3049 if ((new_prot & new_max) != new_prot) {
3050 vm_map_unlock(map);
3051 return(KERN_PROTECTION_FAILURE);
3052 }
3053 }
3054
3055 prev = current->vme_end;
3056 current = current->vme_next;
3057 }
3058 if (end > prev) {
3059 vm_map_unlock(map);
3060 return(KERN_INVALID_ADDRESS);
3061 }
3062
3063 /*
3064 * Go back and fix up protections.
3065 * Clip to start here if the range starts within
3066 * the entry.
3067 */
3068
3069 current = entry;
3070 if (current != vm_map_to_entry(map)) {
3071 /* clip and unnest if necessary */
3072 vm_map_clip_start(map, current, start);
3073 }
3074
3075 while ((current != vm_map_to_entry(map)) &&
3076 (current->vme_start < end)) {
3077
3078 vm_prot_t old_prot;
3079
3080 vm_map_clip_end(map, current, end);
3081
3082 assert(!current->use_pmap); /* clipping did unnest if needed */
3083
3084 old_prot = current->protection;
3085
3086 if(new_prot & VM_PROT_COPY) {
3087 /* caller is asking specifically to copy the */
3088 /* mapped data, this implies that max protection */
3089 /* will include write. Caller must be prepared */
3090 /* for loss of shared memory communication in the */
3091 /* target area after taking this step */
3092 current->needs_copy = TRUE;
3093 current->max_protection |= VM_PROT_WRITE;
3094 }
3095
3096 if (set_max)
3097 current->protection =
3098 (current->max_protection =
3099 new_prot & ~VM_PROT_COPY) &
3100 old_prot;
3101 else
3102 current->protection = new_prot & ~VM_PROT_COPY;
3103
3104 /*
3105 * Update physical map if necessary.
3106 * If the request is to turn off write protection,
3107 * we won't do it for real (in pmap). This is because
3108 * it would cause copy-on-write to fail. We've already
3109 * set, the new protection in the map, so if a
3110 * write-protect fault occurred, it will be fixed up
3111 * properly, COW or not.
3112 */
3113 if (current->protection != old_prot) {
3114 /* Look one level in we support nested pmaps */
3115 /* from mapped submaps which are direct entries */
3116 /* in our map */
3117
3118 vm_prot_t prot;
3119
3120 prot = current->protection & ~VM_PROT_WRITE;
3121
3122 if (override_nx(map, current->alias) && prot)
3123 prot |= VM_PROT_EXECUTE;
3124
3125 if (current->is_sub_map && current->use_pmap) {
3126 pmap_protect(current->object.sub_map->pmap,
3127 current->vme_start,
3128 current->vme_end,
3129 prot);
3130 } else {
3131 pmap_protect(map->pmap,
3132 current->vme_start,
3133 current->vme_end,
3134 prot);
3135 }
3136 }
3137 current = current->vme_next;
3138 }
3139
3140 current = entry;
3141 while ((current != vm_map_to_entry(map)) &&
3142 (current->vme_start <= end)) {
3143 vm_map_simplify_entry(map, current);
3144 current = current->vme_next;
3145 }
3146
3147 vm_map_unlock(map);
3148 return(KERN_SUCCESS);
3149 }
3150
3151 /*
3152 * vm_map_inherit:
3153 *
3154 * Sets the inheritance of the specified address
3155 * range in the target map. Inheritance
3156 * affects how the map will be shared with
3157 * child maps at the time of vm_map_fork.
3158 */
3159 kern_return_t
3160 vm_map_inherit(
3161 register vm_map_t map,
3162 register vm_map_offset_t start,
3163 register vm_map_offset_t end,
3164 register vm_inherit_t new_inheritance)
3165 {
3166 register vm_map_entry_t entry;
3167 vm_map_entry_t temp_entry;
3168
3169 vm_map_lock(map);
3170
3171 VM_MAP_RANGE_CHECK(map, start, end);
3172
3173 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3174 entry = temp_entry;
3175 }
3176 else {
3177 temp_entry = temp_entry->vme_next;
3178 entry = temp_entry;
3179 }
3180
3181 /* first check entire range for submaps which can't support the */
3182 /* given inheritance. */
3183 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3184 if(entry->is_sub_map) {
3185 if(new_inheritance == VM_INHERIT_COPY) {
3186 vm_map_unlock(map);
3187 return(KERN_INVALID_ARGUMENT);
3188 }
3189 }
3190
3191 entry = entry->vme_next;
3192 }
3193
3194 entry = temp_entry;
3195 if (entry != vm_map_to_entry(map)) {
3196 /* clip and unnest if necessary */
3197 vm_map_clip_start(map, entry, start);
3198 }
3199
3200 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3201 vm_map_clip_end(map, entry, end);
3202 assert(!entry->use_pmap); /* clip did unnest if needed */
3203
3204 entry->inheritance = new_inheritance;
3205
3206 entry = entry->vme_next;
3207 }
3208
3209 vm_map_unlock(map);
3210 return(KERN_SUCCESS);
3211 }
3212
3213 /*
3214 * Update the accounting for the amount of wired memory in this map. If the user has
3215 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3216 */
3217
3218 static kern_return_t
3219 add_wire_counts(
3220 vm_map_t map,
3221 vm_map_entry_t entry,
3222 boolean_t user_wire)
3223 {
3224 vm_map_size_t size;
3225
3226 if (user_wire) {
3227
3228 /*
3229 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3230 * this map entry.
3231 */
3232
3233 if (entry->user_wired_count == 0) {
3234 size = entry->vme_end - entry->vme_start;
3235
3236 /*
3237 * Since this is the first time the user is wiring this map entry, check to see if we're
3238 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3239 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3240 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3241 * limit, then we fail.
3242 */
3243
3244 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3245 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit)
3246 return KERN_RESOURCE_SHORTAGE;
3247
3248 /*
3249 * The first time the user wires an entry, we also increment the wired_count and add this to
3250 * the total that has been wired in the map.
3251 */
3252
3253 if (entry->wired_count >= MAX_WIRE_COUNT)
3254 return KERN_FAILURE;
3255
3256 entry->wired_count++;
3257 map->user_wire_size += size;
3258 }
3259
3260 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3261 return KERN_FAILURE;
3262
3263 entry->user_wired_count++;
3264
3265 } else {
3266
3267 /*
3268 * The kernel's wiring the memory. Just bump the count and continue.
3269 */
3270
3271 if (entry->wired_count >= MAX_WIRE_COUNT)
3272 panic("vm_map_wire: too many wirings");
3273
3274 entry->wired_count++;
3275 }
3276
3277 return KERN_SUCCESS;
3278 }
3279
3280 /*
3281 * Update the memory wiring accounting now that the given map entry is being unwired.
3282 */
3283
3284 static void
3285 subtract_wire_counts(
3286 vm_map_t map,
3287 vm_map_entry_t entry,
3288 boolean_t user_wire)
3289 {
3290
3291 if (user_wire) {
3292
3293 /*
3294 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3295 */
3296
3297 if (entry->user_wired_count == 1) {
3298
3299 /*
3300 * We're removing the last user wire reference. Decrement the wired_count and the total
3301 * user wired memory for this map.
3302 */
3303
3304 assert(entry->wired_count >= 1);
3305 entry->wired_count--;
3306 map->user_wire_size -= entry->vme_end - entry->vme_start;
3307 }
3308
3309 assert(entry->user_wired_count >= 1);
3310 entry->user_wired_count--;
3311
3312 } else {
3313
3314 /*
3315 * The kernel is unwiring the memory. Just update the count.
3316 */
3317
3318 assert(entry->wired_count >= 1);
3319 entry->wired_count--;
3320 }
3321 }
3322
3323 /*
3324 * vm_map_wire:
3325 *
3326 * Sets the pageability of the specified address range in the
3327 * target map as wired. Regions specified as not pageable require
3328 * locked-down physical memory and physical page maps. The
3329 * access_type variable indicates types of accesses that must not
3330 * generate page faults. This is checked against protection of
3331 * memory being locked-down.
3332 *
3333 * The map must not be locked, but a reference must remain to the
3334 * map throughout the call.
3335 */
3336 static kern_return_t
3337 vm_map_wire_nested(
3338 register vm_map_t map,
3339 register vm_map_offset_t start,
3340 register vm_map_offset_t end,
3341 register vm_prot_t access_type,
3342 boolean_t user_wire,
3343 pmap_t map_pmap,
3344 vm_map_offset_t pmap_addr)
3345 {
3346 register vm_map_entry_t entry;
3347 struct vm_map_entry *first_entry, tmp_entry;
3348 vm_map_t real_map;
3349 register vm_map_offset_t s,e;
3350 kern_return_t rc;
3351 boolean_t need_wakeup;
3352 boolean_t main_map = FALSE;
3353 wait_interrupt_t interruptible_state;
3354 thread_t cur_thread;
3355 unsigned int last_timestamp;
3356 vm_map_size_t size;
3357
3358 vm_map_lock(map);
3359 if(map_pmap == NULL)
3360 main_map = TRUE;
3361 last_timestamp = map->timestamp;
3362
3363 VM_MAP_RANGE_CHECK(map, start, end);
3364 assert(page_aligned(start));
3365 assert(page_aligned(end));
3366 if (start == end) {
3367 /* We wired what the caller asked for, zero pages */
3368 vm_map_unlock(map);
3369 return KERN_SUCCESS;
3370 }
3371
3372 need_wakeup = FALSE;
3373 cur_thread = current_thread();
3374
3375 s = start;
3376 rc = KERN_SUCCESS;
3377
3378 if (vm_map_lookup_entry(map, s, &first_entry)) {
3379 entry = first_entry;
3380 /*
3381 * vm_map_clip_start will be done later.
3382 * We don't want to unnest any nested submaps here !
3383 */
3384 } else {
3385 /* Start address is not in map */
3386 rc = KERN_INVALID_ADDRESS;
3387 goto done;
3388 }
3389
3390 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3391 /*
3392 * At this point, we have wired from "start" to "s".
3393 * We still need to wire from "s" to "end".
3394 *
3395 * "entry" hasn't been clipped, so it could start before "s"
3396 * and/or end after "end".
3397 */
3398
3399 /* "e" is how far we want to wire in this entry */
3400 e = entry->vme_end;
3401 if (e > end)
3402 e = end;
3403
3404 /*
3405 * If another thread is wiring/unwiring this entry then
3406 * block after informing other thread to wake us up.
3407 */
3408 if (entry->in_transition) {
3409 wait_result_t wait_result;
3410
3411 /*
3412 * We have not clipped the entry. Make sure that
3413 * the start address is in range so that the lookup
3414 * below will succeed.
3415 * "s" is the current starting point: we've already
3416 * wired from "start" to "s" and we still have
3417 * to wire from "s" to "end".
3418 */
3419
3420 entry->needs_wakeup = TRUE;
3421
3422 /*
3423 * wake up anybody waiting on entries that we have
3424 * already wired.
3425 */
3426 if (need_wakeup) {
3427 vm_map_entry_wakeup(map);
3428 need_wakeup = FALSE;
3429 }
3430 /*
3431 * User wiring is interruptible
3432 */
3433 wait_result = vm_map_entry_wait(map,
3434 (user_wire) ? THREAD_ABORTSAFE :
3435 THREAD_UNINT);
3436 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3437 /*
3438 * undo the wirings we have done so far
3439 * We do not clear the needs_wakeup flag,
3440 * because we cannot tell if we were the
3441 * only one waiting.
3442 */
3443 rc = KERN_FAILURE;
3444 goto done;
3445 }
3446
3447 /*
3448 * Cannot avoid a lookup here. reset timestamp.
3449 */
3450 last_timestamp = map->timestamp;
3451
3452 /*
3453 * The entry could have been clipped, look it up again.
3454 * Worse that can happen is, it may not exist anymore.
3455 */
3456 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3457 if (!user_wire)
3458 panic("vm_map_wire: re-lookup failed");
3459
3460 /*
3461 * User: undo everything upto the previous
3462 * entry. let vm_map_unwire worry about
3463 * checking the validity of the range.
3464 */
3465 rc = KERN_FAILURE;
3466 goto done;
3467 }
3468 entry = first_entry;
3469 continue;
3470 }
3471
3472 if (entry->is_sub_map) {
3473 vm_map_offset_t sub_start;
3474 vm_map_offset_t sub_end;
3475 vm_map_offset_t local_start;
3476 vm_map_offset_t local_end;
3477 pmap_t pmap;
3478
3479 vm_map_clip_start(map, entry, s);
3480 vm_map_clip_end(map, entry, end);
3481
3482 sub_start = entry->offset;
3483 sub_end = entry->vme_end;
3484 sub_end += entry->offset - entry->vme_start;
3485
3486 local_end = entry->vme_end;
3487 if(map_pmap == NULL) {
3488 vm_object_t object;
3489 vm_object_offset_t offset;
3490 vm_prot_t prot;
3491 boolean_t wired;
3492 vm_map_entry_t local_entry;
3493 vm_map_version_t version;
3494 vm_map_t lookup_map;
3495
3496 if(entry->use_pmap) {
3497 pmap = entry->object.sub_map->pmap;
3498 /* ppc implementation requires that */
3499 /* submaps pmap address ranges line */
3500 /* up with parent map */
3501 #ifdef notdef
3502 pmap_addr = sub_start;
3503 #endif
3504 pmap_addr = s;
3505 } else {
3506 pmap = map->pmap;
3507 pmap_addr = s;
3508 }
3509
3510 if (entry->wired_count) {
3511 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3512 goto done;
3513
3514 /*
3515 * The map was not unlocked:
3516 * no need to goto re-lookup.
3517 * Just go directly to next entry.
3518 */
3519 entry = entry->vme_next;
3520 s = entry->vme_start;
3521 continue;
3522
3523 }
3524
3525 /* call vm_map_lookup_locked to */
3526 /* cause any needs copy to be */
3527 /* evaluated */
3528 local_start = entry->vme_start;
3529 lookup_map = map;
3530 vm_map_lock_write_to_read(map);
3531 if(vm_map_lookup_locked(
3532 &lookup_map, local_start,
3533 access_type,
3534 OBJECT_LOCK_EXCLUSIVE,
3535 &version, &object,
3536 &offset, &prot, &wired,
3537 NULL,
3538 &real_map)) {
3539
3540 vm_map_unlock_read(lookup_map);
3541 vm_map_unwire(map, start,
3542 s, user_wire);
3543 return(KERN_FAILURE);
3544 }
3545 if(real_map != lookup_map)
3546 vm_map_unlock(real_map);
3547 vm_map_unlock_read(lookup_map);
3548 vm_map_lock(map);
3549 vm_object_unlock(object);
3550
3551 /* we unlocked, so must re-lookup */
3552 if (!vm_map_lookup_entry(map,
3553 local_start,
3554 &local_entry)) {
3555 rc = KERN_FAILURE;
3556 goto done;
3557 }
3558
3559 /*
3560 * entry could have been "simplified",
3561 * so re-clip
3562 */
3563 entry = local_entry;
3564 assert(s == local_start);
3565 vm_map_clip_start(map, entry, s);
3566 vm_map_clip_end(map, entry, end);
3567 /* re-compute "e" */
3568 e = entry->vme_end;
3569 if (e > end)
3570 e = end;
3571
3572 /* did we have a change of type? */
3573 if (!entry->is_sub_map) {
3574 last_timestamp = map->timestamp;
3575 continue;
3576 }
3577 } else {
3578 local_start = entry->vme_start;
3579 pmap = map_pmap;
3580 }
3581
3582 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3583 goto done;
3584
3585 entry->in_transition = TRUE;
3586
3587 vm_map_unlock(map);
3588 rc = vm_map_wire_nested(entry->object.sub_map,
3589 sub_start, sub_end,
3590 access_type,
3591 user_wire, pmap, pmap_addr);
3592 vm_map_lock(map);
3593
3594 /*
3595 * Find the entry again. It could have been clipped
3596 * after we unlocked the map.
3597 */
3598 if (!vm_map_lookup_entry(map, local_start,
3599 &first_entry))
3600 panic("vm_map_wire: re-lookup failed");
3601 entry = first_entry;
3602
3603 assert(local_start == s);
3604 /* re-compute "e" */
3605 e = entry->vme_end;
3606 if (e > end)
3607 e = end;
3608
3609 last_timestamp = map->timestamp;
3610 while ((entry != vm_map_to_entry(map)) &&
3611 (entry->vme_start < e)) {
3612 assert(entry->in_transition);
3613 entry->in_transition = FALSE;
3614 if (entry->needs_wakeup) {
3615 entry->needs_wakeup = FALSE;
3616 need_wakeup = TRUE;
3617 }
3618 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3619 subtract_wire_counts(map, entry, user_wire);
3620 }
3621 entry = entry->vme_next;
3622 }
3623 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3624 goto done;
3625 }
3626
3627 /* no need to relookup again */
3628 s = entry->vme_start;
3629 continue;
3630 }
3631
3632 /*
3633 * If this entry is already wired then increment
3634 * the appropriate wire reference count.
3635 */
3636 if (entry->wired_count) {
3637 /*
3638 * entry is already wired down, get our reference
3639 * after clipping to our range.
3640 */
3641 vm_map_clip_start(map, entry, s);
3642 vm_map_clip_end(map, entry, end);
3643
3644 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3645 goto done;
3646
3647 /* map was not unlocked: no need to relookup */
3648 entry = entry->vme_next;
3649 s = entry->vme_start;
3650 continue;
3651 }
3652
3653 /*
3654 * Unwired entry or wire request transmitted via submap
3655 */
3656
3657
3658 /*
3659 * Perform actions of vm_map_lookup that need the write
3660 * lock on the map: create a shadow object for a
3661 * copy-on-write region, or an object for a zero-fill
3662 * region.
3663 */
3664 size = entry->vme_end - entry->vme_start;
3665 /*
3666 * If wiring a copy-on-write page, we need to copy it now
3667 * even if we're only (currently) requesting read access.
3668 * This is aggressive, but once it's wired we can't move it.
3669 */
3670 if (entry->needs_copy) {
3671 vm_object_shadow(&entry->object.vm_object,
3672 &entry->offset, size);
3673 entry->needs_copy = FALSE;
3674 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3675 entry->object.vm_object = vm_object_allocate(size);
3676 entry->offset = (vm_object_offset_t)0;
3677 }
3678
3679 vm_map_clip_start(map, entry, s);
3680 vm_map_clip_end(map, entry, end);
3681
3682 /* re-compute "e" */
3683 e = entry->vme_end;
3684 if (e > end)
3685 e = end;
3686
3687 /*
3688 * Check for holes and protection mismatch.
3689 * Holes: Next entry should be contiguous unless this
3690 * is the end of the region.
3691 * Protection: Access requested must be allowed, unless
3692 * wiring is by protection class
3693 */
3694 if ((entry->vme_end < end) &&
3695 ((entry->vme_next == vm_map_to_entry(map)) ||
3696 (entry->vme_next->vme_start > entry->vme_end))) {
3697 /* found a hole */
3698 rc = KERN_INVALID_ADDRESS;
3699 goto done;
3700 }
3701 if ((entry->protection & access_type) != access_type) {
3702 /* found a protection problem */
3703 rc = KERN_PROTECTION_FAILURE;
3704 goto done;
3705 }
3706
3707 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3708
3709 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3710 goto done;
3711
3712 entry->in_transition = TRUE;
3713
3714 /*
3715 * This entry might get split once we unlock the map.
3716 * In vm_fault_wire(), we need the current range as
3717 * defined by this entry. In order for this to work
3718 * along with a simultaneous clip operation, we make a
3719 * temporary copy of this entry and use that for the
3720 * wiring. Note that the underlying objects do not
3721 * change during a clip.
3722 */
3723 tmp_entry = *entry;
3724
3725 /*
3726 * The in_transition state guarentees that the entry
3727 * (or entries for this range, if split occured) will be
3728 * there when the map lock is acquired for the second time.
3729 */
3730 vm_map_unlock(map);
3731
3732 if (!user_wire && cur_thread != THREAD_NULL)
3733 interruptible_state = thread_interrupt_level(THREAD_UNINT);
3734 else
3735 interruptible_state = THREAD_UNINT;
3736
3737 if(map_pmap)
3738 rc = vm_fault_wire(map,
3739 &tmp_entry, map_pmap, pmap_addr);
3740 else
3741 rc = vm_fault_wire(map,
3742 &tmp_entry, map->pmap,
3743 tmp_entry.vme_start);
3744
3745 if (!user_wire && cur_thread != THREAD_NULL)
3746 thread_interrupt_level(interruptible_state);
3747
3748 vm_map_lock(map);
3749
3750 if (last_timestamp+1 != map->timestamp) {
3751 /*
3752 * Find the entry again. It could have been clipped
3753 * after we unlocked the map.
3754 */
3755 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3756 &first_entry))
3757 panic("vm_map_wire: re-lookup failed");
3758
3759 entry = first_entry;
3760 }
3761
3762 last_timestamp = map->timestamp;
3763
3764 while ((entry != vm_map_to_entry(map)) &&
3765 (entry->vme_start < tmp_entry.vme_end)) {
3766 assert(entry->in_transition);
3767 entry->in_transition = FALSE;
3768 if (entry->needs_wakeup) {
3769 entry->needs_wakeup = FALSE;
3770 need_wakeup = TRUE;
3771 }
3772 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3773 subtract_wire_counts(map, entry, user_wire);
3774 }
3775 entry = entry->vme_next;
3776 }
3777
3778 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3779 goto done;
3780 }
3781
3782 s = entry->vme_start;
3783 } /* end while loop through map entries */
3784
3785 done:
3786 if (rc == KERN_SUCCESS) {
3787 /* repair any damage we may have made to the VM map */
3788 vm_map_simplify_range(map, start, end);
3789 }
3790
3791 vm_map_unlock(map);
3792
3793 /*
3794 * wake up anybody waiting on entries we wired.
3795 */
3796 if (need_wakeup)
3797 vm_map_entry_wakeup(map);
3798
3799 if (rc != KERN_SUCCESS) {
3800 /* undo what has been wired so far */
3801 vm_map_unwire(map, start, s, user_wire);
3802 }
3803
3804 return rc;
3805
3806 }
3807
3808 kern_return_t
3809 vm_map_wire(
3810 register vm_map_t map,
3811 register vm_map_offset_t start,
3812 register vm_map_offset_t end,
3813 register vm_prot_t access_type,
3814 boolean_t user_wire)
3815 {
3816
3817 kern_return_t kret;
3818
3819 #ifdef ppc
3820 /*
3821 * the calls to mapping_prealloc and mapping_relpre
3822 * (along with the VM_MAP_RANGE_CHECK to insure a
3823 * resonable range was passed in) are
3824 * currently necessary because
3825 * we haven't enabled kernel pre-emption
3826 * and/or the pmap_enter cannot purge and re-use
3827 * existing mappings
3828 */
3829 VM_MAP_RANGE_CHECK(map, start, end);
3830 mapping_prealloc(end - start);
3831 #endif
3832 kret = vm_map_wire_nested(map, start, end, access_type,
3833 user_wire, (pmap_t)NULL, 0);
3834 #ifdef ppc
3835 mapping_relpre();
3836 #endif
3837 return kret;
3838 }
3839
3840 /*
3841 * vm_map_unwire:
3842 *
3843 * Sets the pageability of the specified address range in the target
3844 * as pageable. Regions specified must have been wired previously.
3845 *
3846 * The map must not be locked, but a reference must remain to the map
3847 * throughout the call.
3848 *
3849 * Kernel will panic on failures. User unwire ignores holes and
3850 * unwired and intransition entries to avoid losing memory by leaving
3851 * it unwired.
3852 */
3853 static kern_return_t
3854 vm_map_unwire_nested(
3855 register vm_map_t map,
3856 register vm_map_offset_t start,
3857 register vm_map_offset_t end,
3858 boolean_t user_wire,
3859 pmap_t map_pmap,
3860 vm_map_offset_t pmap_addr)
3861 {
3862 register vm_map_entry_t entry;
3863 struct vm_map_entry *first_entry, tmp_entry;
3864 boolean_t need_wakeup;
3865 boolean_t main_map = FALSE;
3866 unsigned int last_timestamp;
3867
3868 vm_map_lock(map);
3869 if(map_pmap == NULL)
3870 main_map = TRUE;
3871 last_timestamp = map->timestamp;
3872
3873 VM_MAP_RANGE_CHECK(map, start, end);
3874 assert(page_aligned(start));
3875 assert(page_aligned(end));
3876
3877 if (start == end) {
3878 /* We unwired what the caller asked for: zero pages */
3879 vm_map_unlock(map);
3880 return KERN_SUCCESS;
3881 }
3882
3883 if (vm_map_lookup_entry(map, start, &first_entry)) {
3884 entry = first_entry;
3885 /*
3886 * vm_map_clip_start will be done later.
3887 * We don't want to unnest any nested sub maps here !
3888 */
3889 }
3890 else {
3891 if (!user_wire) {
3892 panic("vm_map_unwire: start not found");
3893 }
3894 /* Start address is not in map. */
3895 vm_map_unlock(map);
3896 return(KERN_INVALID_ADDRESS);
3897 }
3898
3899 need_wakeup = FALSE;
3900 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3901 if (entry->in_transition) {
3902 /*
3903 * 1)
3904 * Another thread is wiring down this entry. Note
3905 * that if it is not for the other thread we would
3906 * be unwiring an unwired entry. This is not
3907 * permitted. If we wait, we will be unwiring memory
3908 * we did not wire.
3909 *
3910 * 2)
3911 * Another thread is unwiring this entry. We did not
3912 * have a reference to it, because if we did, this
3913 * entry will not be getting unwired now.
3914 */
3915 if (!user_wire) {
3916 /*
3917 * XXX FBDP
3918 * This could happen: there could be some
3919 * overlapping vslock/vsunlock operations
3920 * going on.
3921 * We should probably just wait and retry,
3922 * but then we have to be careful that this
3923 * entry could get "simplified" after
3924 * "in_transition" gets unset and before
3925 * we re-lookup the entry, so we would
3926 * have to re-clip the entry to avoid
3927 * re-unwiring what we have already unwired...
3928 * See vm_map_wire_nested().
3929 *
3930 * Or we could just ignore "in_transition"
3931 * here and proceed to decement the wired
3932 * count(s) on this entry. That should be fine
3933 * as long as "wired_count" doesn't drop all
3934 * the way to 0 (and we should panic if THAT
3935 * happens).
3936 */
3937 panic("vm_map_unwire: in_transition entry");
3938 }
3939
3940 entry = entry->vme_next;
3941 continue;
3942 }
3943
3944 if (entry->is_sub_map) {
3945 vm_map_offset_t sub_start;
3946 vm_map_offset_t sub_end;
3947 vm_map_offset_t local_end;
3948 pmap_t pmap;
3949
3950 vm_map_clip_start(map, entry, start);
3951 vm_map_clip_end(map, entry, end);
3952
3953 sub_start = entry->offset;
3954 sub_end = entry->vme_end - entry->vme_start;
3955 sub_end += entry->offset;
3956 local_end = entry->vme_end;
3957 if(map_pmap == NULL) {
3958 if(entry->use_pmap) {
3959 pmap = entry->object.sub_map->pmap;
3960 pmap_addr = sub_start;
3961 } else {
3962 pmap = map->pmap;
3963 pmap_addr = start;
3964 }
3965 if (entry->wired_count == 0 ||
3966 (user_wire && entry->user_wired_count == 0)) {
3967 if (!user_wire)
3968 panic("vm_map_unwire: entry is unwired");
3969 entry = entry->vme_next;
3970 continue;
3971 }
3972
3973 /*
3974 * Check for holes
3975 * Holes: Next entry should be contiguous unless
3976 * this is the end of the region.
3977 */
3978 if (((entry->vme_end < end) &&
3979 ((entry->vme_next == vm_map_to_entry(map)) ||
3980 (entry->vme_next->vme_start
3981 > entry->vme_end)))) {
3982 if (!user_wire)
3983 panic("vm_map_unwire: non-contiguous region");
3984 /*
3985 entry = entry->vme_next;
3986 continue;
3987 */
3988 }
3989
3990 subtract_wire_counts(map, entry, user_wire);
3991
3992 if (entry->wired_count != 0) {
3993 entry = entry->vme_next;
3994 continue;
3995 }
3996
3997 entry->in_transition = TRUE;
3998 tmp_entry = *entry;/* see comment in vm_map_wire() */
3999
4000 /*
4001 * We can unlock the map now. The in_transition state
4002 * guarantees existance of the entry.
4003 */
4004 vm_map_unlock(map);
4005 vm_map_unwire_nested(entry->object.sub_map,
4006 sub_start, sub_end, user_wire, pmap, pmap_addr);
4007 vm_map_lock(map);
4008
4009 if (last_timestamp+1 != map->timestamp) {
4010 /*
4011 * Find the entry again. It could have been
4012 * clipped or deleted after we unlocked the map.
4013 */
4014 if (!vm_map_lookup_entry(map,
4015 tmp_entry.vme_start,
4016 &first_entry)) {
4017 if (!user_wire)
4018 panic("vm_map_unwire: re-lookup failed");
4019 entry = first_entry->vme_next;
4020 } else
4021 entry = first_entry;
4022 }
4023 last_timestamp = map->timestamp;
4024
4025 /*
4026 * clear transition bit for all constituent entries
4027 * that were in the original entry (saved in
4028 * tmp_entry). Also check for waiters.
4029 */
4030 while ((entry != vm_map_to_entry(map)) &&
4031 (entry->vme_start < tmp_entry.vme_end)) {
4032 assert(entry->in_transition);
4033 entry->in_transition = FALSE;
4034 if (entry->needs_wakeup) {
4035 entry->needs_wakeup = FALSE;
4036 need_wakeup = TRUE;
4037 }
4038 entry = entry->vme_next;
4039 }
4040 continue;
4041 } else {
4042 vm_map_unlock(map);
4043 vm_map_unwire_nested(entry->object.sub_map,
4044 sub_start, sub_end, user_wire, map_pmap,
4045 pmap_addr);
4046 vm_map_lock(map);
4047
4048 if (last_timestamp+1 != map->timestamp) {
4049 /*
4050 * Find the entry again. It could have been
4051 * clipped or deleted after we unlocked the map.
4052 */
4053 if (!vm_map_lookup_entry(map,
4054 tmp_entry.vme_start,
4055 &first_entry)) {
4056 if (!user_wire)
4057 panic("vm_map_unwire: re-lookup failed");
4058 entry = first_entry->vme_next;
4059 } else
4060 entry = first_entry;
4061 }
4062 last_timestamp = map->timestamp;
4063 }
4064 }
4065
4066
4067 if ((entry->wired_count == 0) ||
4068 (user_wire && entry->user_wired_count == 0)) {
4069 if (!user_wire)
4070 panic("vm_map_unwire: entry is unwired");
4071
4072 entry = entry->vme_next;
4073 continue;
4074 }
4075
4076 assert(entry->wired_count > 0 &&
4077 (!user_wire || entry->user_wired_count > 0));
4078
4079 vm_map_clip_start(map, entry, start);
4080 vm_map_clip_end(map, entry, end);
4081
4082 /*
4083 * Check for holes
4084 * Holes: Next entry should be contiguous unless
4085 * this is the end of the region.
4086 */
4087 if (((entry->vme_end < end) &&
4088 ((entry->vme_next == vm_map_to_entry(map)) ||
4089 (entry->vme_next->vme_start > entry->vme_end)))) {
4090
4091 if (!user_wire)
4092 panic("vm_map_unwire: non-contiguous region");
4093 entry = entry->vme_next;
4094 continue;
4095 }
4096
4097 subtract_wire_counts(map, entry, user_wire);
4098
4099 if (entry->wired_count != 0) {
4100 entry = entry->vme_next;
4101 continue;
4102 }
4103
4104 entry->in_transition = TRUE;
4105 tmp_entry = *entry; /* see comment in vm_map_wire() */
4106
4107 /*
4108 * We can unlock the map now. The in_transition state
4109 * guarantees existance of the entry.
4110 */
4111 vm_map_unlock(map);
4112 if(map_pmap) {
4113 vm_fault_unwire(map,
4114 &tmp_entry, FALSE, map_pmap, pmap_addr);
4115 } else {
4116 vm_fault_unwire(map,
4117 &tmp_entry, FALSE, map->pmap,
4118 tmp_entry.vme_start);
4119 }
4120 vm_map_lock(map);
4121
4122 if (last_timestamp+1 != map->timestamp) {
4123 /*
4124 * Find the entry again. It could have been clipped
4125 * or deleted after we unlocked the map.
4126 */
4127 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4128 &first_entry)) {
4129 if (!user_wire)
4130 panic("vm_map_unwire: re-lookup failed");
4131 entry = first_entry->vme_next;
4132 } else
4133 entry = first_entry;
4134 }
4135 last_timestamp = map->timestamp;
4136
4137 /*
4138 * clear transition bit for all constituent entries that
4139 * were in the original entry (saved in tmp_entry). Also
4140 * check for waiters.
4141 */
4142 while ((entry != vm_map_to_entry(map)) &&
4143 (entry->vme_start < tmp_entry.vme_end)) {
4144 assert(entry->in_transition);
4145 entry->in_transition = FALSE;
4146 if (entry->needs_wakeup) {
4147 entry->needs_wakeup = FALSE;
4148 need_wakeup = TRUE;
4149 }
4150 entry = entry->vme_next;
4151 }
4152 }
4153
4154 /*
4155 * We might have fragmented the address space when we wired this
4156 * range of addresses. Attempt to re-coalesce these VM map entries
4157 * with their neighbors now that they're no longer wired.
4158 * Under some circumstances, address space fragmentation can
4159 * prevent VM object shadow chain collapsing, which can cause
4160 * swap space leaks.
4161 */
4162 vm_map_simplify_range(map, start, end);
4163
4164 vm_map_unlock(map);
4165 /*
4166 * wake up anybody waiting on entries that we have unwired.
4167 */
4168 if (need_wakeup)
4169 vm_map_entry_wakeup(map);
4170 return(KERN_SUCCESS);
4171
4172 }
4173
4174 kern_return_t
4175 vm_map_unwire(
4176 register vm_map_t map,
4177 register vm_map_offset_t start,
4178 register vm_map_offset_t end,
4179 boolean_t user_wire)
4180 {
4181 return vm_map_unwire_nested(map, start, end,
4182 user_wire, (pmap_t)NULL, 0);
4183 }
4184
4185
4186 /*
4187 * vm_map_entry_delete: [ internal use only ]
4188 *
4189 * Deallocate the given entry from the target map.
4190 */
4191 static void
4192 vm_map_entry_delete(
4193 register vm_map_t map,
4194 register vm_map_entry_t entry)
4195 {
4196 register vm_map_offset_t s, e;
4197 register vm_object_t object;
4198 register vm_map_t submap;
4199
4200 s = entry->vme_start;
4201 e = entry->vme_end;
4202 assert(page_aligned(s));
4203 assert(page_aligned(e));
4204 assert(entry->wired_count == 0);
4205 assert(entry->user_wired_count == 0);
4206
4207 if (entry->is_sub_map) {
4208 object = NULL;
4209 submap = entry->object.sub_map;
4210 } else {
4211 submap = NULL;
4212 object = entry->object.vm_object;
4213 }
4214
4215 vm_map_entry_unlink(map, entry);
4216 map->size -= e - s;
4217
4218 vm_map_entry_dispose(map, entry);
4219
4220 vm_map_unlock(map);
4221 /*
4222 * Deallocate the object only after removing all
4223 * pmap entries pointing to its pages.
4224 */
4225 if (submap)
4226 vm_map_deallocate(submap);
4227 else
4228 vm_object_deallocate(object);
4229
4230 }
4231
4232 void
4233 vm_map_submap_pmap_clean(
4234 vm_map_t map,
4235 vm_map_offset_t start,
4236 vm_map_offset_t end,
4237 vm_map_t sub_map,
4238 vm_map_offset_t offset)
4239 {
4240 vm_map_offset_t submap_start;
4241 vm_map_offset_t submap_end;
4242 vm_map_size_t remove_size;
4243 vm_map_entry_t entry;
4244
4245 submap_end = offset + (end - start);
4246 submap_start = offset;
4247 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4248
4249 remove_size = (entry->vme_end - entry->vme_start);
4250 if(offset > entry->vme_start)
4251 remove_size -= offset - entry->vme_start;
4252
4253
4254 if(submap_end < entry->vme_end) {
4255 remove_size -=
4256 entry->vme_end - submap_end;
4257 }
4258 if(entry->is_sub_map) {
4259 vm_map_submap_pmap_clean(
4260 sub_map,
4261 start,
4262 start + remove_size,
4263 entry->object.sub_map,
4264 entry->offset);
4265 } else {
4266
4267 if((map->mapped) && (map->ref_count)
4268 && (entry->object.vm_object != NULL)) {
4269 vm_object_pmap_protect(
4270 entry->object.vm_object,
4271 entry->offset,
4272 remove_size,
4273 PMAP_NULL,
4274 entry->vme_start,
4275 VM_PROT_NONE);
4276 } else {
4277 pmap_remove(map->pmap,
4278 (addr64_t)start,
4279 (addr64_t)(start + remove_size));
4280 }
4281 }
4282 }
4283
4284 entry = entry->vme_next;
4285
4286 while((entry != vm_map_to_entry(sub_map))
4287 && (entry->vme_start < submap_end)) {
4288 remove_size = (entry->vme_end - entry->vme_start);
4289 if(submap_end < entry->vme_end) {
4290 remove_size -= entry->vme_end - submap_end;
4291 }
4292 if(entry->is_sub_map) {
4293 vm_map_submap_pmap_clean(
4294 sub_map,
4295 (start + entry->vme_start) - offset,
4296 ((start + entry->vme_start) - offset) + remove_size,
4297 entry->object.sub_map,
4298 entry->offset);
4299 } else {
4300 if((map->mapped) && (map->ref_count)
4301 && (entry->object.vm_object != NULL)) {
4302 vm_object_pmap_protect(
4303 entry->object.vm_object,
4304 entry->offset,
4305 remove_size,
4306 PMAP_NULL,
4307 entry->vme_start,
4308 VM_PROT_NONE);
4309 } else {
4310 pmap_remove(map->pmap,
4311 (addr64_t)((start + entry->vme_start)
4312 - offset),
4313 (addr64_t)(((start + entry->vme_start)
4314 - offset) + remove_size));
4315 }
4316 }
4317 entry = entry->vme_next;
4318 }
4319 return;
4320 }
4321
4322 /*
4323 * vm_map_delete: [ internal use only ]
4324 *
4325 * Deallocates the given address range from the target map.
4326 * Removes all user wirings. Unwires one kernel wiring if
4327 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4328 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4329 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4330 *
4331 * This routine is called with map locked and leaves map locked.
4332 */
4333 static kern_return_t
4334 vm_map_delete(
4335 vm_map_t map,
4336 vm_map_offset_t start,
4337 vm_map_offset_t end,
4338 int flags,
4339 vm_map_t zap_map)
4340 {
4341 vm_map_entry_t entry, next;
4342 struct vm_map_entry *first_entry, tmp_entry;
4343 register vm_map_offset_t s;
4344 register vm_object_t object;
4345 boolean_t need_wakeup;
4346 unsigned int last_timestamp = ~0; /* unlikely value */
4347 int interruptible;
4348
4349 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4350 THREAD_ABORTSAFE : THREAD_UNINT;
4351
4352 /*
4353 * All our DMA I/O operations in IOKit are currently done by
4354 * wiring through the map entries of the task requesting the I/O.
4355 * Because of this, we must always wait for kernel wirings
4356 * to go away on the entries before deleting them.
4357 *
4358 * Any caller who wants to actually remove a kernel wiring
4359 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4360 * properly remove one wiring instead of blasting through
4361 * them all.
4362 */
4363 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4364
4365 /*
4366 * Find the start of the region, and clip it
4367 */
4368 if (vm_map_lookup_entry(map, start, &first_entry)) {
4369 entry = first_entry;
4370 if (start == entry->vme_start) {
4371 /*
4372 * No need to clip. We don't want to cause
4373 * any unnecessary unnesting in this case...
4374 */
4375 } else {
4376 vm_map_clip_start(map, entry, start);
4377 }
4378
4379 /*
4380 * Fix the lookup hint now, rather than each
4381 * time through the loop.
4382 */
4383 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4384 } else {
4385 entry = first_entry->vme_next;
4386 }
4387
4388 need_wakeup = FALSE;
4389 /*
4390 * Step through all entries in this region
4391 */
4392 s = entry->vme_start;
4393 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4394 /*
4395 * At this point, we have deleted all the memory entries
4396 * between "start" and "s". We still need to delete
4397 * all memory entries between "s" and "end".
4398 * While we were blocked and the map was unlocked, some
4399 * new memory entries could have been re-allocated between
4400 * "start" and "s" and we don't want to mess with those.
4401 * Some of those entries could even have been re-assembled
4402 * with an entry after "s" (in vm_map_simplify_entry()), so
4403 * we may have to vm_map_clip_start() again.
4404 */
4405
4406 if (entry->vme_start >= s) {
4407 /*
4408 * This entry starts on or after "s"
4409 * so no need to clip its start.
4410 */
4411 } else {
4412 /*
4413 * This entry has been re-assembled by a
4414 * vm_map_simplify_entry(). We need to
4415 * re-clip its start.
4416 */
4417 vm_map_clip_start(map, entry, s);
4418 }
4419 if (entry->vme_end <= end) {
4420 /*
4421 * This entry is going away completely, so no need
4422 * to clip and possibly cause an unnecessary unnesting.
4423 */
4424 } else {
4425 vm_map_clip_end(map, entry, end);
4426 }
4427 if (entry->in_transition) {
4428 wait_result_t wait_result;
4429
4430 /*
4431 * Another thread is wiring/unwiring this entry.
4432 * Let the other thread know we are waiting.
4433 */
4434 assert(s == entry->vme_start);
4435 entry->needs_wakeup = TRUE;
4436
4437 /*
4438 * wake up anybody waiting on entries that we have
4439 * already unwired/deleted.
4440 */
4441 if (need_wakeup) {
4442 vm_map_entry_wakeup(map);
4443 need_wakeup = FALSE;
4444 }
4445
4446 wait_result = vm_map_entry_wait(map, interruptible);
4447
4448 if (interruptible &&
4449 wait_result == THREAD_INTERRUPTED) {
4450 /*
4451 * We do not clear the needs_wakeup flag,
4452 * since we cannot tell if we were the only one.
4453 */
4454 vm_map_unlock(map);
4455 return KERN_ABORTED;
4456 }
4457
4458 /*
4459 * The entry could have been clipped or it
4460 * may not exist anymore. Look it up again.
4461 */
4462 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4463 assert((map != kernel_map) &&
4464 (!entry->is_sub_map));
4465 /*
4466 * User: use the next entry
4467 */
4468 entry = first_entry->vme_next;
4469 s = entry->vme_start;
4470 } else {
4471 entry = first_entry;
4472 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4473 }
4474 last_timestamp = map->timestamp;
4475 continue;
4476 } /* end in_transition */
4477
4478 if (entry->wired_count) {
4479 boolean_t user_wire;
4480
4481 user_wire = entry->user_wired_count > 0;
4482
4483 /*
4484 * Remove a kernel wiring if requested or if
4485 * there are user wirings.
4486 */
4487 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
4488 (entry->user_wired_count > 0))
4489 entry->wired_count--;
4490
4491 /* remove all user wire references */
4492 entry->user_wired_count = 0;
4493
4494 if (entry->wired_count != 0) {
4495 assert(map != kernel_map);
4496 /*
4497 * Cannot continue. Typical case is when
4498 * a user thread has physical io pending on
4499 * on this page. Either wait for the
4500 * kernel wiring to go away or return an
4501 * error.
4502 */
4503 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4504 wait_result_t wait_result;
4505
4506 assert(s == entry->vme_start);
4507 entry->needs_wakeup = TRUE;
4508 wait_result = vm_map_entry_wait(map,
4509 interruptible);
4510
4511 if (interruptible &&
4512 wait_result == THREAD_INTERRUPTED) {
4513 /*
4514 * We do not clear the
4515 * needs_wakeup flag, since we
4516 * cannot tell if we were the
4517 * only one.
4518 */
4519 vm_map_unlock(map);
4520 return KERN_ABORTED;
4521 }
4522
4523 /*
4524 * The entry could have been clipped or
4525 * it may not exist anymore. Look it
4526 * up again.
4527 */
4528 if (!vm_map_lookup_entry(map, s,
4529 &first_entry)) {
4530 assert(map != kernel_map);
4531 /*
4532 * User: use the next entry
4533 */
4534 entry = first_entry->vme_next;
4535 s = entry->vme_start;
4536 } else {
4537 entry = first_entry;
4538 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4539 }
4540 last_timestamp = map->timestamp;
4541 continue;
4542 }
4543 else {
4544 return KERN_FAILURE;
4545 }
4546 }
4547
4548 entry->in_transition = TRUE;
4549 /*
4550 * copy current entry. see comment in vm_map_wire()
4551 */
4552 tmp_entry = *entry;
4553 assert(s == entry->vme_start);
4554
4555 /*
4556 * We can unlock the map now. The in_transition
4557 * state guarentees existance of the entry.
4558 */
4559 vm_map_unlock(map);
4560
4561 if (tmp_entry.is_sub_map) {
4562 vm_map_t sub_map;
4563 vm_map_offset_t sub_start, sub_end;
4564 pmap_t pmap;
4565 vm_map_offset_t pmap_addr;
4566
4567
4568 sub_map = tmp_entry.object.sub_map;
4569 sub_start = tmp_entry.offset;
4570 sub_end = sub_start + (tmp_entry.vme_end -
4571 tmp_entry.vme_start);
4572 if (tmp_entry.use_pmap) {
4573 pmap = sub_map->pmap;
4574 pmap_addr = tmp_entry.vme_start;
4575 } else {
4576 pmap = map->pmap;
4577 pmap_addr = tmp_entry.vme_start;
4578 }
4579 (void) vm_map_unwire_nested(sub_map,
4580 sub_start, sub_end,
4581 user_wire,
4582 pmap, pmap_addr);
4583 } else {
4584
4585 vm_fault_unwire(map, &tmp_entry,
4586 tmp_entry.object.vm_object == kernel_object,
4587 map->pmap, tmp_entry.vme_start);
4588 }
4589
4590 vm_map_lock(map);
4591
4592 if (last_timestamp+1 != map->timestamp) {
4593 /*
4594 * Find the entry again. It could have
4595 * been clipped after we unlocked the map.
4596 */
4597 if (!vm_map_lookup_entry(map, s, &first_entry)){
4598 assert((map != kernel_map) &&
4599 (!entry->is_sub_map));
4600 first_entry = first_entry->vme_next;
4601 s = first_entry->vme_start;
4602 } else {
4603 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4604 }
4605 } else {
4606 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4607 first_entry = entry;
4608 }
4609
4610 last_timestamp = map->timestamp;
4611
4612 entry = first_entry;
4613 while ((entry != vm_map_to_entry(map)) &&
4614 (entry->vme_start < tmp_entry.vme_end)) {
4615 assert(entry->in_transition);
4616 entry->in_transition = FALSE;
4617 if (entry->needs_wakeup) {
4618 entry->needs_wakeup = FALSE;
4619 need_wakeup = TRUE;
4620 }
4621 entry = entry->vme_next;
4622 }
4623 /*
4624 * We have unwired the entry(s). Go back and
4625 * delete them.
4626 */
4627 entry = first_entry;
4628 continue;
4629 }
4630
4631 /* entry is unwired */
4632 assert(entry->wired_count == 0);
4633 assert(entry->user_wired_count == 0);
4634
4635 assert(s == entry->vme_start);
4636
4637 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
4638 /*
4639 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
4640 * vm_map_delete(), some map entries might have been
4641 * transferred to a "zap_map", which doesn't have a
4642 * pmap. The original pmap has already been flushed
4643 * in the vm_map_delete() call targeting the original
4644 * map, but when we get to destroying the "zap_map",
4645 * we don't have any pmap to flush, so let's just skip
4646 * all this.
4647 */
4648 } else if (entry->is_sub_map) {
4649 if (entry->use_pmap) {
4650 #ifndef NO_NESTED_PMAP
4651 pmap_unnest(map->pmap,
4652 (addr64_t)entry->vme_start,
4653 entry->vme_end - entry->vme_start);
4654 #endif /* NO_NESTED_PMAP */
4655 if ((map->mapped) && (map->ref_count)) {
4656 /* clean up parent map/maps */
4657 vm_map_submap_pmap_clean(
4658 map, entry->vme_start,
4659 entry->vme_end,
4660 entry->object.sub_map,
4661 entry->offset);
4662 }
4663 } else {
4664 vm_map_submap_pmap_clean(
4665 map, entry->vme_start, entry->vme_end,
4666 entry->object.sub_map,
4667 entry->offset);
4668 }
4669 } else if (entry->object.vm_object != kernel_object) {
4670 object = entry->object.vm_object;
4671 if((map->mapped) && (map->ref_count)) {
4672 vm_object_pmap_protect(
4673 object, entry->offset,
4674 entry->vme_end - entry->vme_start,
4675 PMAP_NULL,
4676 entry->vme_start,
4677 VM_PROT_NONE);
4678 } else {
4679 pmap_remove(map->pmap,
4680 (addr64_t)entry->vme_start,
4681 (addr64_t)entry->vme_end);
4682 }
4683 }
4684
4685 /*
4686 * All pmap mappings for this map entry must have been
4687 * cleared by now.
4688 */
4689 assert(vm_map_pmap_is_empty(map,
4690 entry->vme_start,
4691 entry->vme_end));
4692
4693 next = entry->vme_next;
4694 s = next->vme_start;
4695 last_timestamp = map->timestamp;
4696
4697 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
4698 zap_map != VM_MAP_NULL) {
4699 vm_map_size_t entry_size;
4700 /*
4701 * The caller wants to save the affected VM map entries
4702 * into the "zap_map". The caller will take care of
4703 * these entries.
4704 */
4705 /* unlink the entry from "map" ... */
4706 vm_map_entry_unlink(map, entry);
4707 /* ... and add it to the end of the "zap_map" */
4708 vm_map_entry_link(zap_map,
4709 vm_map_last_entry(zap_map),
4710 entry);
4711 entry_size = entry->vme_end - entry->vme_start;
4712 map->size -= entry_size;
4713 zap_map->size += entry_size;
4714 /* we didn't unlock the map, so no timestamp increase */
4715 last_timestamp--;
4716 } else {
4717 vm_map_entry_delete(map, entry);
4718 /* vm_map_entry_delete unlocks the map */
4719 vm_map_lock(map);
4720 }
4721
4722 entry = next;
4723
4724 if(entry == vm_map_to_entry(map)) {
4725 break;
4726 }
4727 if (last_timestamp+1 != map->timestamp) {
4728 /*
4729 * we are responsible for deleting everything
4730 * from the give space, if someone has interfered
4731 * we pick up where we left off, back fills should
4732 * be all right for anyone except map_delete and
4733 * we have to assume that the task has been fully
4734 * disabled before we get here
4735 */
4736 if (!vm_map_lookup_entry(map, s, &entry)){
4737 entry = entry->vme_next;
4738 s = entry->vme_start;
4739 } else {
4740 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4741 }
4742 /*
4743 * others can not only allocate behind us, we can
4744 * also see coalesce while we don't have the map lock
4745 */
4746 if(entry == vm_map_to_entry(map)) {
4747 break;
4748 }
4749 }
4750 last_timestamp = map->timestamp;
4751 }
4752
4753 if (map->wait_for_space)
4754 thread_wakeup((event_t) map);
4755 /*
4756 * wake up anybody waiting on entries that we have already deleted.
4757 */
4758 if (need_wakeup)
4759 vm_map_entry_wakeup(map);
4760
4761 return KERN_SUCCESS;
4762 }
4763
4764 /*
4765 * vm_map_remove:
4766 *
4767 * Remove the given address range from the target map.
4768 * This is the exported form of vm_map_delete.
4769 */
4770 kern_return_t
4771 vm_map_remove(
4772 register vm_map_t map,
4773 register vm_map_offset_t start,
4774 register vm_map_offset_t end,
4775 register boolean_t flags)
4776 {
4777 register kern_return_t result;
4778
4779 vm_map_lock(map);
4780 VM_MAP_RANGE_CHECK(map, start, end);
4781 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4782 vm_map_unlock(map);
4783
4784 return(result);
4785 }
4786
4787
4788 /*
4789 * Routine: vm_map_copy_discard
4790 *
4791 * Description:
4792 * Dispose of a map copy object (returned by
4793 * vm_map_copyin).
4794 */
4795 void
4796 vm_map_copy_discard(
4797 vm_map_copy_t copy)
4798 {
4799 TR_DECL("vm_map_copy_discard");
4800
4801 /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4802
4803 if (copy == VM_MAP_COPY_NULL)
4804 return;
4805
4806 switch (copy->type) {
4807 case VM_MAP_COPY_ENTRY_LIST:
4808 while (vm_map_copy_first_entry(copy) !=
4809 vm_map_copy_to_entry(copy)) {
4810 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4811
4812 vm_map_copy_entry_unlink(copy, entry);
4813 vm_object_deallocate(entry->object.vm_object);
4814 vm_map_copy_entry_dispose(copy, entry);
4815 }
4816 break;
4817 case VM_MAP_COPY_OBJECT:
4818 vm_object_deallocate(copy->cpy_object);
4819 break;
4820 case VM_MAP_COPY_KERNEL_BUFFER:
4821
4822 /*
4823 * The vm_map_copy_t and possibly the data buffer were
4824 * allocated by a single call to kalloc(), i.e. the
4825 * vm_map_copy_t was not allocated out of the zone.
4826 */
4827 kfree(copy, copy->cpy_kalloc_size);
4828 return;
4829 }
4830 zfree(vm_map_copy_zone, copy);
4831 }
4832
4833 /*
4834 * Routine: vm_map_copy_copy
4835 *
4836 * Description:
4837 * Move the information in a map copy object to
4838 * a new map copy object, leaving the old one
4839 * empty.
4840 *
4841 * This is used by kernel routines that need
4842 * to look at out-of-line data (in copyin form)
4843 * before deciding whether to return SUCCESS.
4844 * If the routine returns FAILURE, the original
4845 * copy object will be deallocated; therefore,
4846 * these routines must make a copy of the copy
4847 * object and leave the original empty so that
4848 * deallocation will not fail.
4849 */
4850 vm_map_copy_t
4851 vm_map_copy_copy(
4852 vm_map_copy_t copy)
4853 {
4854 vm_map_copy_t new_copy;
4855
4856 if (copy == VM_MAP_COPY_NULL)
4857 return VM_MAP_COPY_NULL;
4858
4859 /*
4860 * Allocate a new copy object, and copy the information
4861 * from the old one into it.
4862 */
4863
4864 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4865 *new_copy = *copy;
4866
4867 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
4868 /*
4869 * The links in the entry chain must be
4870 * changed to point to the new copy object.
4871 */
4872 vm_map_copy_first_entry(copy)->vme_prev
4873 = vm_map_copy_to_entry(new_copy);
4874 vm_map_copy_last_entry(copy)->vme_next
4875 = vm_map_copy_to_entry(new_copy);
4876 }
4877
4878 /*
4879 * Change the old copy object into one that contains
4880 * nothing to be deallocated.
4881 */
4882 copy->type = VM_MAP_COPY_OBJECT;
4883 copy->cpy_object = VM_OBJECT_NULL;
4884
4885 /*
4886 * Return the new object.
4887 */
4888 return new_copy;
4889 }
4890
4891 static kern_return_t
4892 vm_map_overwrite_submap_recurse(
4893 vm_map_t dst_map,
4894 vm_map_offset_t dst_addr,
4895 vm_map_size_t dst_size)
4896 {
4897 vm_map_offset_t dst_end;
4898 vm_map_entry_t tmp_entry;
4899 vm_map_entry_t entry;
4900 kern_return_t result;
4901 boolean_t encountered_sub_map = FALSE;
4902
4903
4904
4905 /*
4906 * Verify that the destination is all writeable
4907 * initially. We have to trunc the destination
4908 * address and round the copy size or we'll end up
4909 * splitting entries in strange ways.
4910 */
4911
4912 dst_end = vm_map_round_page(dst_addr + dst_size);
4913 vm_map_lock(dst_map);
4914
4915 start_pass_1:
4916 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4917 vm_map_unlock(dst_map);
4918 return(KERN_INVALID_ADDRESS);
4919 }
4920
4921 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4922 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
4923
4924 for (entry = tmp_entry;;) {
4925 vm_map_entry_t next;
4926
4927 next = entry->vme_next;
4928 while(entry->is_sub_map) {
4929 vm_map_offset_t sub_start;
4930 vm_map_offset_t sub_end;
4931 vm_map_offset_t local_end;
4932
4933 if (entry->in_transition) {
4934 /*
4935 * Say that we are waiting, and wait for entry.
4936 */
4937 entry->needs_wakeup = TRUE;
4938 vm_map_entry_wait(dst_map, THREAD_UNINT);
4939
4940 goto start_pass_1;
4941 }
4942
4943 encountered_sub_map = TRUE;
4944 sub_start = entry->offset;
4945
4946 if(entry->vme_end < dst_end)
4947 sub_end = entry->vme_end;
4948 else
4949 sub_end = dst_end;
4950 sub_end -= entry->vme_start;
4951 sub_end += entry->offset;
4952 local_end = entry->vme_end;
4953 vm_map_unlock(dst_map);
4954
4955 result = vm_map_overwrite_submap_recurse(
4956 entry->object.sub_map,
4957 sub_start,
4958 sub_end - sub_start);
4959
4960 if(result != KERN_SUCCESS)
4961 return result;
4962 if (dst_end <= entry->vme_end)
4963 return KERN_SUCCESS;
4964 vm_map_lock(dst_map);
4965 if(!vm_map_lookup_entry(dst_map, local_end,
4966 &tmp_entry)) {
4967 vm_map_unlock(dst_map);
4968 return(KERN_INVALID_ADDRESS);
4969 }
4970 entry = tmp_entry;
4971 next = entry->vme_next;
4972 }
4973
4974 if ( ! (entry->protection & VM_PROT_WRITE)) {
4975 vm_map_unlock(dst_map);
4976 return(KERN_PROTECTION_FAILURE);
4977 }
4978
4979 /*
4980 * If the entry is in transition, we must wait
4981 * for it to exit that state. Anything could happen
4982 * when we unlock the map, so start over.
4983 */
4984 if (entry->in_transition) {
4985
4986 /*
4987 * Say that we are waiting, and wait for entry.
4988 */
4989 entry->needs_wakeup = TRUE;
4990 vm_map_entry_wait(dst_map, THREAD_UNINT);
4991
4992 goto start_pass_1;
4993 }
4994
4995 /*
4996 * our range is contained completely within this map entry
4997 */
4998 if (dst_end <= entry->vme_end) {
4999 vm_map_unlock(dst_map);
5000 return KERN_SUCCESS;
5001 }
5002 /*
5003 * check that range specified is contiguous region
5004 */
5005 if ((next == vm_map_to_entry(dst_map)) ||
5006 (next->vme_start != entry->vme_end)) {
5007 vm_map_unlock(dst_map);
5008 return(KERN_INVALID_ADDRESS);
5009 }
5010
5011 /*
5012 * Check for permanent objects in the destination.
5013 */
5014 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5015 ((!entry->object.vm_object->internal) ||
5016 (entry->object.vm_object->true_share))) {
5017 if(encountered_sub_map) {
5018 vm_map_unlock(dst_map);
5019 return(KERN_FAILURE);
5020 }
5021 }
5022
5023
5024 entry = next;
5025 }/* for */
5026 vm_map_unlock(dst_map);
5027 return(KERN_SUCCESS);
5028 }
5029
5030 /*
5031 * Routine: vm_map_copy_overwrite
5032 *
5033 * Description:
5034 * Copy the memory described by the map copy
5035 * object (copy; returned by vm_map_copyin) onto
5036 * the specified destination region (dst_map, dst_addr).
5037 * The destination must be writeable.
5038 *
5039 * Unlike vm_map_copyout, this routine actually
5040 * writes over previously-mapped memory. If the
5041 * previous mapping was to a permanent (user-supplied)
5042 * memory object, it is preserved.
5043 *
5044 * The attributes (protection and inheritance) of the
5045 * destination region are preserved.
5046 *
5047 * If successful, consumes the copy object.
5048 * Otherwise, the caller is responsible for it.
5049 *
5050 * Implementation notes:
5051 * To overwrite aligned temporary virtual memory, it is
5052 * sufficient to remove the previous mapping and insert
5053 * the new copy. This replacement is done either on
5054 * the whole region (if no permanent virtual memory
5055 * objects are embedded in the destination region) or
5056 * in individual map entries.
5057 *
5058 * To overwrite permanent virtual memory , it is necessary
5059 * to copy each page, as the external memory management
5060 * interface currently does not provide any optimizations.
5061 *
5062 * Unaligned memory also has to be copied. It is possible
5063 * to use 'vm_trickery' to copy the aligned data. This is
5064 * not done but not hard to implement.
5065 *
5066 * Once a page of permanent memory has been overwritten,
5067 * it is impossible to interrupt this function; otherwise,
5068 * the call would be neither atomic nor location-independent.
5069 * The kernel-state portion of a user thread must be
5070 * interruptible.
5071 *
5072 * It may be expensive to forward all requests that might
5073 * overwrite permanent memory (vm_write, vm_copy) to
5074 * uninterruptible kernel threads. This routine may be
5075 * called by interruptible threads; however, success is
5076 * not guaranteed -- if the request cannot be performed
5077 * atomically and interruptibly, an error indication is
5078 * returned.
5079 */
5080
5081 static kern_return_t
5082 vm_map_copy_overwrite_nested(
5083 vm_map_t dst_map,
5084 vm_map_address_t dst_addr,
5085 vm_map_copy_t copy,
5086 boolean_t interruptible,
5087 pmap_t pmap)
5088 {
5089 vm_map_offset_t dst_end;
5090 vm_map_entry_t tmp_entry;
5091 vm_map_entry_t entry;
5092 kern_return_t kr;
5093 boolean_t aligned = TRUE;
5094 boolean_t contains_permanent_objects = FALSE;
5095 boolean_t encountered_sub_map = FALSE;
5096 vm_map_offset_t base_addr;
5097 vm_map_size_t copy_size;
5098 vm_map_size_t total_size;
5099
5100
5101 /*
5102 * Check for null copy object.
5103 */
5104
5105 if (copy == VM_MAP_COPY_NULL)
5106 return(KERN_SUCCESS);
5107
5108 /*
5109 * Check for special kernel buffer allocated
5110 * by new_ipc_kmsg_copyin.
5111 */
5112
5113 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5114 return(vm_map_copyout_kernel_buffer(
5115 dst_map, &dst_addr,
5116 copy, TRUE));
5117 }
5118
5119 /*
5120 * Only works for entry lists at the moment. Will
5121 * support page lists later.
5122 */
5123
5124 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5125
5126 if (copy->size == 0) {
5127 vm_map_copy_discard(copy);
5128 return(KERN_SUCCESS);
5129 }
5130
5131 /*
5132 * Verify that the destination is all writeable
5133 * initially. We have to trunc the destination
5134 * address and round the copy size or we'll end up
5135 * splitting entries in strange ways.
5136 */
5137
5138 if (!page_aligned(copy->size) ||
5139 !page_aligned (copy->offset) ||
5140 !page_aligned (dst_addr))
5141 {
5142 aligned = FALSE;
5143 dst_end = vm_map_round_page(dst_addr + copy->size);
5144 } else {
5145 dst_end = dst_addr + copy->size;
5146 }
5147
5148 vm_map_lock(dst_map);
5149
5150 /* LP64todo - remove this check when vm_map_commpage64()
5151 * no longer has to stuff in a map_entry for the commpage
5152 * above the map's max_offset.
5153 */
5154 if (dst_addr >= dst_map->max_offset) {
5155 vm_map_unlock(dst_map);
5156 return(KERN_INVALID_ADDRESS);
5157 }
5158
5159 start_pass_1:
5160 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5161 vm_map_unlock(dst_map);
5162 return(KERN_INVALID_ADDRESS);
5163 }
5164 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5165 for (entry = tmp_entry;;) {
5166 vm_map_entry_t next = entry->vme_next;
5167
5168 while(entry->is_sub_map) {
5169 vm_map_offset_t sub_start;
5170 vm_map_offset_t sub_end;
5171 vm_map_offset_t local_end;
5172
5173 if (entry->in_transition) {
5174
5175 /*
5176 * Say that we are waiting, and wait for entry.
5177 */
5178 entry->needs_wakeup = TRUE;
5179 vm_map_entry_wait(dst_map, THREAD_UNINT);
5180
5181 goto start_pass_1;
5182 }
5183
5184 local_end = entry->vme_end;
5185 if (!(entry->needs_copy)) {
5186 /* if needs_copy we are a COW submap */
5187 /* in such a case we just replace so */
5188 /* there is no need for the follow- */
5189 /* ing check. */
5190 encountered_sub_map = TRUE;
5191 sub_start = entry->offset;
5192
5193 if(entry->vme_end < dst_end)
5194 sub_end = entry->vme_end;
5195 else
5196 sub_end = dst_end;
5197 sub_end -= entry->vme_start;
5198 sub_end += entry->offset;
5199 vm_map_unlock(dst_map);
5200
5201 kr = vm_map_overwrite_submap_recurse(
5202 entry->object.sub_map,
5203 sub_start,
5204 sub_end - sub_start);
5205 if(kr != KERN_SUCCESS)
5206 return kr;
5207 vm_map_lock(dst_map);
5208 }
5209
5210 if (dst_end <= entry->vme_end)
5211 goto start_overwrite;
5212 if(!vm_map_lookup_entry(dst_map, local_end,
5213 &entry)) {
5214 vm_map_unlock(dst_map);
5215 return(KERN_INVALID_ADDRESS);
5216 }
5217 next = entry->vme_next;
5218 }
5219
5220 if ( ! (entry->protection & VM_PROT_WRITE)) {
5221 vm_map_unlock(dst_map);
5222 return(KERN_PROTECTION_FAILURE);
5223 }
5224
5225 /*
5226 * If the entry is in transition, we must wait
5227 * for it to exit that state. Anything could happen
5228 * when we unlock the map, so start over.
5229 */
5230 if (entry->in_transition) {
5231
5232 /*
5233 * Say that we are waiting, and wait for entry.
5234 */
5235 entry->needs_wakeup = TRUE;
5236 vm_map_entry_wait(dst_map, THREAD_UNINT);
5237
5238 goto start_pass_1;
5239 }
5240
5241 /*
5242 * our range is contained completely within this map entry
5243 */
5244 if (dst_end <= entry->vme_end)
5245 break;
5246 /*
5247 * check that range specified is contiguous region
5248 */
5249 if ((next == vm_map_to_entry(dst_map)) ||
5250 (next->vme_start != entry->vme_end)) {
5251 vm_map_unlock(dst_map);
5252 return(KERN_INVALID_ADDRESS);
5253 }
5254
5255
5256 /*
5257 * Check for permanent objects in the destination.
5258 */
5259 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5260 ((!entry->object.vm_object->internal) ||
5261 (entry->object.vm_object->true_share))) {
5262 contains_permanent_objects = TRUE;
5263 }
5264
5265 entry = next;
5266 }/* for */
5267
5268 start_overwrite:
5269 /*
5270 * If there are permanent objects in the destination, then
5271 * the copy cannot be interrupted.
5272 */
5273
5274 if (interruptible && contains_permanent_objects) {
5275 vm_map_unlock(dst_map);
5276 return(KERN_FAILURE); /* XXX */
5277 }
5278
5279 /*
5280 *
5281 * Make a second pass, overwriting the data
5282 * At the beginning of each loop iteration,
5283 * the next entry to be overwritten is "tmp_entry"
5284 * (initially, the value returned from the lookup above),
5285 * and the starting address expected in that entry
5286 * is "start".
5287 */
5288
5289 total_size = copy->size;
5290 if(encountered_sub_map) {
5291 copy_size = 0;
5292 /* re-calculate tmp_entry since we've had the map */
5293 /* unlocked */
5294 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5295 vm_map_unlock(dst_map);
5296 return(KERN_INVALID_ADDRESS);
5297 }
5298 } else {
5299 copy_size = copy->size;
5300 }
5301
5302 base_addr = dst_addr;
5303 while(TRUE) {
5304 /* deconstruct the copy object and do in parts */
5305 /* only in sub_map, interruptable case */
5306 vm_map_entry_t copy_entry;
5307 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5308 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5309 int nentries;
5310 int remaining_entries = 0;
5311 int new_offset = 0;
5312
5313 for (entry = tmp_entry; copy_size == 0;) {
5314 vm_map_entry_t next;
5315
5316 next = entry->vme_next;
5317
5318 /* tmp_entry and base address are moved along */
5319 /* each time we encounter a sub-map. Otherwise */
5320 /* entry can outpase tmp_entry, and the copy_size */
5321 /* may reflect the distance between them */
5322 /* if the current entry is found to be in transition */
5323 /* we will start over at the beginning or the last */
5324 /* encounter of a submap as dictated by base_addr */
5325 /* we will zero copy_size accordingly. */
5326 if (entry->in_transition) {
5327 /*
5328 * Say that we are waiting, and wait for entry.
5329 */
5330 entry->needs_wakeup = TRUE;
5331 vm_map_entry_wait(dst_map, THREAD_UNINT);
5332
5333 if(!vm_map_lookup_entry(dst_map, base_addr,
5334 &tmp_entry)) {
5335 vm_map_unlock(dst_map);
5336 return(KERN_INVALID_ADDRESS);
5337 }
5338 copy_size = 0;
5339 entry = tmp_entry;
5340 continue;
5341 }
5342 if(entry->is_sub_map) {
5343 vm_map_offset_t sub_start;
5344 vm_map_offset_t sub_end;
5345 vm_map_offset_t local_end;
5346
5347 if (entry->needs_copy) {
5348 /* if this is a COW submap */
5349 /* just back the range with a */
5350 /* anonymous entry */
5351 if(entry->vme_end < dst_end)
5352 sub_end = entry->vme_end;
5353 else
5354 sub_end = dst_end;
5355 if(entry->vme_start < base_addr)
5356 sub_start = base_addr;
5357 else
5358 sub_start = entry->vme_start;
5359 vm_map_clip_end(
5360 dst_map, entry, sub_end);
5361 vm_map_clip_start(
5362 dst_map, entry, sub_start);
5363 assert(!entry->use_pmap);
5364 entry->is_sub_map = FALSE;
5365 vm_map_deallocate(
5366 entry->object.sub_map);
5367 entry->object.sub_map = NULL;
5368 entry->is_shared = FALSE;
5369 entry->needs_copy = FALSE;
5370 entry->offset = 0;
5371 /*
5372 * XXX FBDP
5373 * We should propagate the protections
5374 * of the submap entry here instead
5375 * of forcing them to VM_PROT_ALL...
5376 * Or better yet, we should inherit
5377 * the protection of the copy_entry.
5378 */
5379 entry->protection = VM_PROT_ALL;
5380 entry->max_protection = VM_PROT_ALL;
5381 entry->wired_count = 0;
5382 entry->user_wired_count = 0;
5383 if(entry->inheritance
5384 == VM_INHERIT_SHARE)
5385 entry->inheritance = VM_INHERIT_COPY;
5386 continue;
5387 }
5388 /* first take care of any non-sub_map */
5389 /* entries to send */
5390 if(base_addr < entry->vme_start) {
5391 /* stuff to send */
5392 copy_size =
5393 entry->vme_start - base_addr;
5394 break;
5395 }
5396 sub_start = entry->offset;
5397
5398 if(entry->vme_end < dst_end)
5399 sub_end = entry->vme_end;
5400 else
5401 sub_end = dst_end;
5402 sub_end -= entry->vme_start;
5403 sub_end += entry->offset;
5404 local_end = entry->vme_end;
5405 vm_map_unlock(dst_map);
5406 copy_size = sub_end - sub_start;
5407
5408 /* adjust the copy object */
5409 if (total_size > copy_size) {
5410 vm_map_size_t local_size = 0;
5411 vm_map_size_t entry_size;
5412
5413 nentries = 1;
5414 new_offset = copy->offset;
5415 copy_entry = vm_map_copy_first_entry(copy);
5416 while(copy_entry !=
5417 vm_map_copy_to_entry(copy)){
5418 entry_size = copy_entry->vme_end -
5419 copy_entry->vme_start;
5420 if((local_size < copy_size) &&
5421 ((local_size + entry_size)
5422 >= copy_size)) {
5423 vm_map_copy_clip_end(copy,
5424 copy_entry,
5425 copy_entry->vme_start +
5426 (copy_size - local_size));
5427 entry_size = copy_entry->vme_end -
5428 copy_entry->vme_start;
5429 local_size += entry_size;
5430 new_offset += entry_size;
5431 }
5432 if(local_size >= copy_size) {
5433 next_copy = copy_entry->vme_next;
5434 copy_entry->vme_next =
5435 vm_map_copy_to_entry(copy);
5436 previous_prev =
5437 copy->cpy_hdr.links.prev;
5438 copy->cpy_hdr.links.prev = copy_entry;
5439 copy->size = copy_size;
5440 remaining_entries =
5441 copy->cpy_hdr.nentries;
5442 remaining_entries -= nentries;
5443 copy->cpy_hdr.nentries = nentries;
5444 break;
5445 } else {
5446 local_size += entry_size;
5447 new_offset += entry_size;
5448 nentries++;
5449 }
5450 copy_entry = copy_entry->vme_next;
5451 }
5452 }
5453
5454 if((entry->use_pmap) && (pmap == NULL)) {
5455 kr = vm_map_copy_overwrite_nested(
5456 entry->object.sub_map,
5457 sub_start,
5458 copy,
5459 interruptible,
5460 entry->object.sub_map->pmap);
5461 } else if (pmap != NULL) {
5462 kr = vm_map_copy_overwrite_nested(
5463 entry->object.sub_map,
5464 sub_start,
5465 copy,
5466 interruptible, pmap);
5467 } else {
5468 kr = vm_map_copy_overwrite_nested(
5469 entry->object.sub_map,
5470 sub_start,
5471 copy,
5472 interruptible,
5473 dst_map->pmap);
5474 }
5475 if(kr != KERN_SUCCESS) {
5476 if(next_copy != NULL) {
5477 copy->cpy_hdr.nentries +=
5478 remaining_entries;
5479 copy->cpy_hdr.links.prev->vme_next =
5480 next_copy;
5481 copy->cpy_hdr.links.prev
5482 = previous_prev;
5483 copy->size = total_size;
5484 }
5485 return kr;
5486 }
5487 if (dst_end <= local_end) {
5488 return(KERN_SUCCESS);
5489 }
5490 /* otherwise copy no longer exists, it was */
5491 /* destroyed after successful copy_overwrite */
5492 copy = (vm_map_copy_t)
5493 zalloc(vm_map_copy_zone);
5494 vm_map_copy_first_entry(copy) =
5495 vm_map_copy_last_entry(copy) =
5496 vm_map_copy_to_entry(copy);
5497 copy->type = VM_MAP_COPY_ENTRY_LIST;
5498 copy->offset = new_offset;
5499
5500 total_size -= copy_size;
5501 copy_size = 0;
5502 /* put back remainder of copy in container */
5503 if(next_copy != NULL) {
5504 copy->cpy_hdr.nentries = remaining_entries;
5505 copy->cpy_hdr.links.next = next_copy;
5506 copy->cpy_hdr.links.prev = previous_prev;
5507 copy->size = total_size;
5508 next_copy->vme_prev =
5509 vm_map_copy_to_entry(copy);
5510 next_copy = NULL;
5511 }
5512 base_addr = local_end;
5513 vm_map_lock(dst_map);
5514 if(!vm_map_lookup_entry(dst_map,
5515 local_end, &tmp_entry)) {
5516 vm_map_unlock(dst_map);
5517 return(KERN_INVALID_ADDRESS);
5518 }
5519 entry = tmp_entry;
5520 continue;
5521 }
5522 if (dst_end <= entry->vme_end) {
5523 copy_size = dst_end - base_addr;
5524 break;
5525 }
5526
5527 if ((next == vm_map_to_entry(dst_map)) ||
5528 (next->vme_start != entry->vme_end)) {
5529 vm_map_unlock(dst_map);
5530 return(KERN_INVALID_ADDRESS);
5531 }
5532
5533 entry = next;
5534 }/* for */
5535
5536 next_copy = NULL;
5537 nentries = 1;
5538
5539 /* adjust the copy object */
5540 if (total_size > copy_size) {
5541 vm_map_size_t local_size = 0;
5542 vm_map_size_t entry_size;
5543
5544 new_offset = copy->offset;
5545 copy_entry = vm_map_copy_first_entry(copy);
5546 while(copy_entry != vm_map_copy_to_entry(copy)) {
5547 entry_size = copy_entry->vme_end -
5548 copy_entry->vme_start;
5549 if((local_size < copy_size) &&
5550 ((local_size + entry_size)
5551 >= copy_size)) {
5552 vm_map_copy_clip_end(copy, copy_entry,
5553 copy_entry->vme_start +
5554 (copy_size - local_size));
5555 entry_size = copy_entry->vme_end -
5556 copy_entry->vme_start;
5557 local_size += entry_size;
5558 new_offset += entry_size;
5559 }
5560 if(local_size >= copy_size) {
5561 next_copy = copy_entry->vme_next;
5562 copy_entry->vme_next =
5563 vm_map_copy_to_entry(copy);
5564 previous_prev =
5565 copy->cpy_hdr.links.prev;
5566 copy->cpy_hdr.links.prev = copy_entry;
5567 copy->size = copy_size;
5568 remaining_entries =
5569 copy->cpy_hdr.nentries;
5570 remaining_entries -= nentries;
5571 copy->cpy_hdr.nentries = nentries;
5572 break;
5573 } else {
5574 local_size += entry_size;
5575 new_offset += entry_size;
5576 nentries++;
5577 }
5578 copy_entry = copy_entry->vme_next;
5579 }
5580 }
5581
5582 if (aligned) {
5583 pmap_t local_pmap;
5584
5585 if(pmap)
5586 local_pmap = pmap;
5587 else
5588 local_pmap = dst_map->pmap;
5589
5590 if ((kr = vm_map_copy_overwrite_aligned(
5591 dst_map, tmp_entry, copy,
5592 base_addr, local_pmap)) != KERN_SUCCESS) {
5593 if(next_copy != NULL) {
5594 copy->cpy_hdr.nentries +=
5595 remaining_entries;
5596 copy->cpy_hdr.links.prev->vme_next =
5597 next_copy;
5598 copy->cpy_hdr.links.prev =
5599 previous_prev;
5600 copy->size += copy_size;
5601 }
5602 return kr;
5603 }
5604 vm_map_unlock(dst_map);
5605 } else {
5606 /*
5607 * Performance gain:
5608 *
5609 * if the copy and dst address are misaligned but the same
5610 * offset within the page we can copy_not_aligned the
5611 * misaligned parts and copy aligned the rest. If they are
5612 * aligned but len is unaligned we simply need to copy
5613 * the end bit unaligned. We'll need to split the misaligned
5614 * bits of the region in this case !
5615 */
5616 /* ALWAYS UNLOCKS THE dst_map MAP */
5617 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
5618 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5619 if(next_copy != NULL) {
5620 copy->cpy_hdr.nentries +=
5621 remaining_entries;
5622 copy->cpy_hdr.links.prev->vme_next =
5623 next_copy;
5624 copy->cpy_hdr.links.prev =
5625 previous_prev;
5626 copy->size += copy_size;
5627 }
5628 return kr;
5629 }
5630 }
5631 total_size -= copy_size;
5632 if(total_size == 0)
5633 break;
5634 base_addr += copy_size;
5635 copy_size = 0;
5636 copy->offset = new_offset;
5637 if(next_copy != NULL) {
5638 copy->cpy_hdr.nentries = remaining_entries;
5639 copy->cpy_hdr.links.next = next_copy;
5640 copy->cpy_hdr.links.prev = previous_prev;
5641 next_copy->vme_prev = vm_map_copy_to_entry(copy);
5642 copy->size = total_size;
5643 }
5644 vm_map_lock(dst_map);
5645 while(TRUE) {
5646 if (!vm_map_lookup_entry(dst_map,
5647 base_addr, &tmp_entry)) {
5648 vm_map_unlock(dst_map);
5649 return(KERN_INVALID_ADDRESS);
5650 }
5651 if (tmp_entry->in_transition) {
5652 entry->needs_wakeup = TRUE;
5653 vm_map_entry_wait(dst_map, THREAD_UNINT);
5654 } else {
5655 break;
5656 }
5657 }
5658 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
5659
5660 entry = tmp_entry;
5661 } /* while */
5662
5663 /*
5664 * Throw away the vm_map_copy object
5665 */
5666 vm_map_copy_discard(copy);
5667
5668 return(KERN_SUCCESS);
5669 }/* vm_map_copy_overwrite */
5670
5671 kern_return_t
5672 vm_map_copy_overwrite(
5673 vm_map_t dst_map,
5674 vm_map_offset_t dst_addr,
5675 vm_map_copy_t copy,
5676 boolean_t interruptible)
5677 {
5678 return vm_map_copy_overwrite_nested(
5679 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
5680 }
5681
5682
5683 /*
5684 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
5685 *
5686 * Decription:
5687 * Physically copy unaligned data
5688 *
5689 * Implementation:
5690 * Unaligned parts of pages have to be physically copied. We use
5691 * a modified form of vm_fault_copy (which understands none-aligned
5692 * page offsets and sizes) to do the copy. We attempt to copy as
5693 * much memory in one go as possibly, however vm_fault_copy copies
5694 * within 1 memory object so we have to find the smaller of "amount left"
5695 * "source object data size" and "target object data size". With
5696 * unaligned data we don't need to split regions, therefore the source
5697 * (copy) object should be one map entry, the target range may be split
5698 * over multiple map entries however. In any event we are pessimistic
5699 * about these assumptions.
5700 *
5701 * Assumptions:
5702 * dst_map is locked on entry and is return locked on success,
5703 * unlocked on error.
5704 */
5705
5706 static kern_return_t
5707 vm_map_copy_overwrite_unaligned(
5708 vm_map_t dst_map,
5709 vm_map_entry_t entry,
5710 vm_map_copy_t copy,
5711 vm_map_offset_t start)
5712 {
5713 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
5714 vm_map_version_t version;
5715 vm_object_t dst_object;
5716 vm_object_offset_t dst_offset;
5717 vm_object_offset_t src_offset;
5718 vm_object_offset_t entry_offset;
5719 vm_map_offset_t entry_end;
5720 vm_map_size_t src_size,
5721 dst_size,
5722 copy_size,
5723 amount_left;
5724 kern_return_t kr = KERN_SUCCESS;
5725
5726 vm_map_lock_write_to_read(dst_map);
5727
5728 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
5729 amount_left = copy->size;
5730 /*
5731 * unaligned so we never clipped this entry, we need the offset into
5732 * the vm_object not just the data.
5733 */
5734 while (amount_left > 0) {
5735
5736 if (entry == vm_map_to_entry(dst_map)) {
5737 vm_map_unlock_read(dst_map);
5738 return KERN_INVALID_ADDRESS;
5739 }
5740
5741 /* "start" must be within the current map entry */
5742 assert ((start>=entry->vme_start) && (start<entry->vme_end));
5743
5744 dst_offset = start - entry->vme_start;
5745
5746 dst_size = entry->vme_end - start;
5747
5748 src_size = copy_entry->vme_end -
5749 (copy_entry->vme_start + src_offset);
5750
5751 if (dst_size < src_size) {
5752 /*
5753 * we can only copy dst_size bytes before
5754 * we have to get the next destination entry
5755 */
5756 copy_size = dst_size;
5757 } else {
5758 /*
5759 * we can only copy src_size bytes before
5760 * we have to get the next source copy entry
5761 */
5762 copy_size = src_size;
5763 }
5764
5765 if (copy_size > amount_left) {
5766 copy_size = amount_left;
5767 }
5768 /*
5769 * Entry needs copy, create a shadow shadow object for
5770 * Copy on write region.
5771 */
5772 if (entry->needs_copy &&
5773 ((entry->protection & VM_PROT_WRITE) != 0))
5774 {
5775 if (vm_map_lock_read_to_write(dst_map)) {
5776 vm_map_lock_read(dst_map);
5777 goto RetryLookup;
5778 }
5779 vm_object_shadow(&entry->object.vm_object,
5780 &entry->offset,
5781 (vm_map_size_t)(entry->vme_end
5782 - entry->vme_start));
5783 entry->needs_copy = FALSE;
5784 vm_map_lock_write_to_read(dst_map);
5785 }
5786 dst_object = entry->object.vm_object;
5787 /*
5788 * unlike with the virtual (aligned) copy we're going
5789 * to fault on it therefore we need a target object.
5790 */
5791 if (dst_object == VM_OBJECT_NULL) {
5792 if (vm_map_lock_read_to_write(dst_map)) {
5793 vm_map_lock_read(dst_map);
5794 goto RetryLookup;
5795 }
5796 dst_object = vm_object_allocate((vm_map_size_t)
5797 entry->vme_end - entry->vme_start);
5798 entry->object.vm_object = dst_object;
5799 entry->offset = 0;
5800 vm_map_lock_write_to_read(dst_map);
5801 }
5802 /*
5803 * Take an object reference and unlock map. The "entry" may
5804 * disappear or change when the map is unlocked.
5805 */
5806 vm_object_reference(dst_object);
5807 version.main_timestamp = dst_map->timestamp;
5808 entry_offset = entry->offset;
5809 entry_end = entry->vme_end;
5810 vm_map_unlock_read(dst_map);
5811 /*
5812 * Copy as much as possible in one pass
5813 */
5814 kr = vm_fault_copy(
5815 copy_entry->object.vm_object,
5816 copy_entry->offset + src_offset,
5817 &copy_size,
5818 dst_object,
5819 entry_offset + dst_offset,
5820 dst_map,
5821 &version,
5822 THREAD_UNINT );
5823
5824 start += copy_size;
5825 src_offset += copy_size;
5826 amount_left -= copy_size;
5827 /*
5828 * Release the object reference
5829 */
5830 vm_object_deallocate(dst_object);
5831 /*
5832 * If a hard error occurred, return it now
5833 */
5834 if (kr != KERN_SUCCESS)
5835 return kr;
5836
5837 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
5838 || amount_left == 0)
5839 {
5840 /*
5841 * all done with this copy entry, dispose.
5842 */
5843 vm_map_copy_entry_unlink(copy, copy_entry);
5844 vm_object_deallocate(copy_entry->object.vm_object);
5845 vm_map_copy_entry_dispose(copy, copy_entry);
5846
5847 if ((copy_entry = vm_map_copy_first_entry(copy))
5848 == vm_map_copy_to_entry(copy) && amount_left) {
5849 /*
5850 * not finished copying but run out of source
5851 */
5852 return KERN_INVALID_ADDRESS;
5853 }
5854 src_offset = 0;
5855 }
5856
5857 if (amount_left == 0)
5858 return KERN_SUCCESS;
5859
5860 vm_map_lock_read(dst_map);
5861 if (version.main_timestamp == dst_map->timestamp) {
5862 if (start == entry_end) {
5863 /*
5864 * destination region is split. Use the version
5865 * information to avoid a lookup in the normal
5866 * case.
5867 */
5868 entry = entry->vme_next;
5869 /*
5870 * should be contiguous. Fail if we encounter
5871 * a hole in the destination.
5872 */
5873 if (start != entry->vme_start) {
5874 vm_map_unlock_read(dst_map);
5875 return KERN_INVALID_ADDRESS ;
5876 }
5877 }
5878 } else {
5879 /*
5880 * Map version check failed.
5881 * we must lookup the entry because somebody
5882 * might have changed the map behind our backs.
5883 */
5884 RetryLookup:
5885 if (!vm_map_lookup_entry(dst_map, start, &entry))
5886 {
5887 vm_map_unlock_read(dst_map);
5888 return KERN_INVALID_ADDRESS ;
5889 }
5890 }
5891 }/* while */
5892
5893 return KERN_SUCCESS;
5894 }/* vm_map_copy_overwrite_unaligned */
5895
5896 /*
5897 * Routine: vm_map_copy_overwrite_aligned [internal use only]
5898 *
5899 * Description:
5900 * Does all the vm_trickery possible for whole pages.
5901 *
5902 * Implementation:
5903 *
5904 * If there are no permanent objects in the destination,
5905 * and the source and destination map entry zones match,
5906 * and the destination map entry is not shared,
5907 * then the map entries can be deleted and replaced
5908 * with those from the copy. The following code is the
5909 * basic idea of what to do, but there are lots of annoying
5910 * little details about getting protection and inheritance
5911 * right. Should add protection, inheritance, and sharing checks
5912 * to the above pass and make sure that no wiring is involved.
5913 */
5914
5915 static kern_return_t
5916 vm_map_copy_overwrite_aligned(
5917 vm_map_t dst_map,
5918 vm_map_entry_t tmp_entry,
5919 vm_map_copy_t copy,
5920 vm_map_offset_t start,
5921 __unused pmap_t pmap)
5922 {
5923 vm_object_t object;
5924 vm_map_entry_t copy_entry;
5925 vm_map_size_t copy_size;
5926 vm_map_size_t size;
5927 vm_map_entry_t entry;
5928
5929 while ((copy_entry = vm_map_copy_first_entry(copy))
5930 != vm_map_copy_to_entry(copy))
5931 {
5932 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5933
5934 entry = tmp_entry;
5935 assert(!entry->use_pmap); /* unnested when clipped earlier */
5936 if (entry == vm_map_to_entry(dst_map)) {
5937 vm_map_unlock(dst_map);
5938 return KERN_INVALID_ADDRESS;
5939 }
5940 size = (entry->vme_end - entry->vme_start);
5941 /*
5942 * Make sure that no holes popped up in the
5943 * address map, and that the protection is
5944 * still valid, in case the map was unlocked
5945 * earlier.
5946 */
5947
5948 if ((entry->vme_start != start) || ((entry->is_sub_map)
5949 && !entry->needs_copy)) {
5950 vm_map_unlock(dst_map);
5951 return(KERN_INVALID_ADDRESS);
5952 }
5953 assert(entry != vm_map_to_entry(dst_map));
5954
5955 /*
5956 * Check protection again
5957 */
5958
5959 if ( ! (entry->protection & VM_PROT_WRITE)) {
5960 vm_map_unlock(dst_map);
5961 return(KERN_PROTECTION_FAILURE);
5962 }
5963
5964 /*
5965 * Adjust to source size first
5966 */
5967
5968 if (copy_size < size) {
5969 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
5970 size = copy_size;
5971 }
5972
5973 /*
5974 * Adjust to destination size
5975 */
5976
5977 if (size < copy_size) {
5978 vm_map_copy_clip_end(copy, copy_entry,
5979 copy_entry->vme_start + size);
5980 copy_size = size;
5981 }
5982
5983 assert((entry->vme_end - entry->vme_start) == size);
5984 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
5985 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
5986
5987 /*
5988 * If the destination contains temporary unshared memory,
5989 * we can perform the copy by throwing it away and
5990 * installing the source data.
5991 */
5992
5993 object = entry->object.vm_object;
5994 if ((!entry->is_shared &&
5995 ((object == VM_OBJECT_NULL) ||
5996 (object->internal && !object->true_share))) ||
5997 entry->needs_copy) {
5998 vm_object_t old_object = entry->object.vm_object;
5999 vm_object_offset_t old_offset = entry->offset;
6000 vm_object_offset_t offset;
6001
6002 /*
6003 * Ensure that the source and destination aren't
6004 * identical
6005 */
6006 if (old_object == copy_entry->object.vm_object &&
6007 old_offset == copy_entry->offset) {
6008 vm_map_copy_entry_unlink(copy, copy_entry);
6009 vm_map_copy_entry_dispose(copy, copy_entry);
6010
6011 if (old_object != VM_OBJECT_NULL)
6012 vm_object_deallocate(old_object);
6013
6014 start = tmp_entry->vme_end;
6015 tmp_entry = tmp_entry->vme_next;
6016 continue;
6017 }
6018
6019 if (old_object != VM_OBJECT_NULL) {
6020 if(entry->is_sub_map) {
6021 if(entry->use_pmap) {
6022 #ifndef NO_NESTED_PMAP
6023 pmap_unnest(dst_map->pmap,
6024 (addr64_t)entry->vme_start,
6025 entry->vme_end - entry->vme_start);
6026 #endif /* NO_NESTED_PMAP */
6027 if(dst_map->mapped) {
6028 /* clean up parent */
6029 /* map/maps */
6030 vm_map_submap_pmap_clean(
6031 dst_map, entry->vme_start,
6032 entry->vme_end,
6033 entry->object.sub_map,
6034 entry->offset);
6035 }
6036 } else {
6037 vm_map_submap_pmap_clean(
6038 dst_map, entry->vme_start,
6039 entry->vme_end,
6040 entry->object.sub_map,
6041 entry->offset);
6042 }
6043 vm_map_deallocate(
6044 entry->object.sub_map);
6045 } else {
6046 if(dst_map->mapped) {
6047 vm_object_pmap_protect(
6048 entry->object.vm_object,
6049 entry->offset,
6050 entry->vme_end
6051 - entry->vme_start,
6052 PMAP_NULL,
6053 entry->vme_start,
6054 VM_PROT_NONE);
6055 } else {
6056 pmap_remove(dst_map->pmap,
6057 (addr64_t)(entry->vme_start),
6058 (addr64_t)(entry->vme_end));
6059 }
6060 vm_object_deallocate(old_object);
6061 }
6062 }
6063
6064 entry->is_sub_map = FALSE;
6065 entry->object = copy_entry->object;
6066 object = entry->object.vm_object;
6067 entry->needs_copy = copy_entry->needs_copy;
6068 entry->wired_count = 0;
6069 entry->user_wired_count = 0;
6070 offset = entry->offset = copy_entry->offset;
6071 /*
6072 * XXX FBDP
6073 * We should propagate the submap entry's protections
6074 * here instead of forcing VM_PROT_ALL.
6075 * Or better yet, we should inherit the protection
6076 * of the copy_entry.
6077 */
6078 entry->protection = VM_PROT_ALL;
6079 entry->max_protection = VM_PROT_ALL;
6080
6081 vm_map_copy_entry_unlink(copy, copy_entry);
6082 vm_map_copy_entry_dispose(copy, copy_entry);
6083
6084 /*
6085 * we could try to push pages into the pmap at this point, BUT
6086 * this optimization only saved on average 2 us per page if ALL
6087 * the pages in the source were currently mapped
6088 * and ALL the pages in the dest were touched, if there were fewer
6089 * than 2/3 of the pages touched, this optimization actually cost more cycles
6090 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6091 */
6092
6093 /*
6094 * Set up for the next iteration. The map
6095 * has not been unlocked, so the next
6096 * address should be at the end of this
6097 * entry, and the next map entry should be
6098 * the one following it.
6099 */
6100
6101 start = tmp_entry->vme_end;
6102 tmp_entry = tmp_entry->vme_next;
6103 } else {
6104 vm_map_version_t version;
6105 vm_object_t dst_object = entry->object.vm_object;
6106 vm_object_offset_t dst_offset = entry->offset;
6107 kern_return_t r;
6108
6109 /*
6110 * Take an object reference, and record
6111 * the map version information so that the
6112 * map can be safely unlocked.
6113 */
6114
6115 vm_object_reference(dst_object);
6116
6117 /* account for unlock bumping up timestamp */
6118 version.main_timestamp = dst_map->timestamp + 1;
6119
6120 vm_map_unlock(dst_map);
6121
6122 /*
6123 * Copy as much as possible in one pass
6124 */
6125
6126 copy_size = size;
6127 r = vm_fault_copy(
6128 copy_entry->object.vm_object,
6129 copy_entry->offset,
6130 &copy_size,
6131 dst_object,
6132 dst_offset,
6133 dst_map,
6134 &version,
6135 THREAD_UNINT );
6136
6137 /*
6138 * Release the object reference
6139 */
6140
6141 vm_object_deallocate(dst_object);
6142
6143 /*
6144 * If a hard error occurred, return it now
6145 */
6146
6147 if (r != KERN_SUCCESS)
6148 return(r);
6149
6150 if (copy_size != 0) {
6151 /*
6152 * Dispose of the copied region
6153 */
6154
6155 vm_map_copy_clip_end(copy, copy_entry,
6156 copy_entry->vme_start + copy_size);
6157 vm_map_copy_entry_unlink(copy, copy_entry);
6158 vm_object_deallocate(copy_entry->object.vm_object);
6159 vm_map_copy_entry_dispose(copy, copy_entry);
6160 }
6161
6162 /*
6163 * Pick up in the destination map where we left off.
6164 *
6165 * Use the version information to avoid a lookup
6166 * in the normal case.
6167 */
6168
6169 start += copy_size;
6170 vm_map_lock(dst_map);
6171 if (version.main_timestamp == dst_map->timestamp) {
6172 /* We can safely use saved tmp_entry value */
6173
6174 vm_map_clip_end(dst_map, tmp_entry, start);
6175 tmp_entry = tmp_entry->vme_next;
6176 } else {
6177 /* Must do lookup of tmp_entry */
6178
6179 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6180 vm_map_unlock(dst_map);
6181 return(KERN_INVALID_ADDRESS);
6182 }
6183 vm_map_clip_start(dst_map, tmp_entry, start);
6184 }
6185 }
6186 }/* while */
6187
6188 return(KERN_SUCCESS);
6189 }/* vm_map_copy_overwrite_aligned */
6190
6191 /*
6192 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6193 *
6194 * Description:
6195 * Copy in data to a kernel buffer from space in the
6196 * source map. The original space may be optionally
6197 * deallocated.
6198 *
6199 * If successful, returns a new copy object.
6200 */
6201 static kern_return_t
6202 vm_map_copyin_kernel_buffer(
6203 vm_map_t src_map,
6204 vm_map_offset_t src_addr,
6205 vm_map_size_t len,
6206 boolean_t src_destroy,
6207 vm_map_copy_t *copy_result)
6208 {
6209 kern_return_t kr;
6210 vm_map_copy_t copy;
6211 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
6212
6213 copy = (vm_map_copy_t) kalloc(kalloc_size);
6214 if (copy == VM_MAP_COPY_NULL) {
6215 return KERN_RESOURCE_SHORTAGE;
6216 }
6217 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6218 copy->size = len;
6219 copy->offset = 0;
6220 copy->cpy_kdata = (void *) (copy + 1);
6221 copy->cpy_kalloc_size = kalloc_size;
6222
6223 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
6224 if (kr != KERN_SUCCESS) {
6225 kfree(copy, kalloc_size);
6226 return kr;
6227 }
6228 if (src_destroy) {
6229 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6230 vm_map_round_page(src_addr + len),
6231 VM_MAP_REMOVE_INTERRUPTIBLE |
6232 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6233 (src_map == kernel_map) ?
6234 VM_MAP_REMOVE_KUNWIRE : 0);
6235 }
6236 *copy_result = copy;
6237 return KERN_SUCCESS;
6238 }
6239
6240 /*
6241 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6242 *
6243 * Description:
6244 * Copy out data from a kernel buffer into space in the
6245 * destination map. The space may be otpionally dynamically
6246 * allocated.
6247 *
6248 * If successful, consumes the copy object.
6249 * Otherwise, the caller is responsible for it.
6250 */
6251 static int vm_map_copyout_kernel_buffer_failures = 0;
6252 static kern_return_t
6253 vm_map_copyout_kernel_buffer(
6254 vm_map_t map,
6255 vm_map_address_t *addr, /* IN/OUT */
6256 vm_map_copy_t copy,
6257 boolean_t overwrite)
6258 {
6259 kern_return_t kr = KERN_SUCCESS;
6260 thread_t thread = current_thread();
6261
6262 if (!overwrite) {
6263
6264 /*
6265 * Allocate space in the target map for the data
6266 */
6267 *addr = 0;
6268 kr = vm_map_enter(map,
6269 addr,
6270 vm_map_round_page(copy->size),
6271 (vm_map_offset_t) 0,
6272 VM_FLAGS_ANYWHERE,
6273 VM_OBJECT_NULL,
6274 (vm_object_offset_t) 0,
6275 FALSE,
6276 VM_PROT_DEFAULT,
6277 VM_PROT_ALL,
6278 VM_INHERIT_DEFAULT);
6279 if (kr != KERN_SUCCESS)
6280 return kr;
6281 }
6282
6283 /*
6284 * Copyout the data from the kernel buffer to the target map.
6285 */
6286 if (thread->map == map) {
6287
6288 /*
6289 * If the target map is the current map, just do
6290 * the copy.
6291 */
6292 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6293 kr = KERN_INVALID_ADDRESS;
6294 }
6295 }
6296 else {
6297 vm_map_t oldmap;
6298
6299 /*
6300 * If the target map is another map, assume the
6301 * target's address space identity for the duration
6302 * of the copy.
6303 */
6304 vm_map_reference(map);
6305 oldmap = vm_map_switch(map);
6306
6307 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6308 vm_map_copyout_kernel_buffer_failures++;
6309 kr = KERN_INVALID_ADDRESS;
6310 }
6311
6312 (void) vm_map_switch(oldmap);
6313 vm_map_deallocate(map);
6314 }
6315
6316 if (kr != KERN_SUCCESS) {
6317 /* the copy failed, clean up */
6318 if (!overwrite) {
6319 /*
6320 * Deallocate the space we allocated in the target map.
6321 */
6322 (void) vm_map_remove(map,
6323 vm_map_trunc_page(*addr),
6324 vm_map_round_page(*addr +
6325 vm_map_round_page(copy->size)),
6326 VM_MAP_NO_FLAGS);
6327 *addr = 0;
6328 }
6329 } else {
6330 /* copy was successful, dicard the copy structure */
6331 kfree(copy, copy->cpy_kalloc_size);
6332 }
6333
6334 return kr;
6335 }
6336
6337 /*
6338 * Macro: vm_map_copy_insert
6339 *
6340 * Description:
6341 * Link a copy chain ("copy") into a map at the
6342 * specified location (after "where").
6343 * Side effects:
6344 * The copy chain is destroyed.
6345 * Warning:
6346 * The arguments are evaluated multiple times.
6347 */
6348 #define vm_map_copy_insert(map, where, copy) \
6349 MACRO_BEGIN \
6350 vm_map_t VMCI_map; \
6351 vm_map_entry_t VMCI_where; \
6352 vm_map_copy_t VMCI_copy; \
6353 VMCI_map = (map); \
6354 VMCI_where = (where); \
6355 VMCI_copy = (copy); \
6356 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6357 ->vme_next = (VMCI_where->vme_next); \
6358 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6359 ->vme_prev = VMCI_where; \
6360 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6361 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6362 zfree(vm_map_copy_zone, VMCI_copy); \
6363 MACRO_END
6364
6365 /*
6366 * Routine: vm_map_copyout
6367 *
6368 * Description:
6369 * Copy out a copy chain ("copy") into newly-allocated
6370 * space in the destination map.
6371 *
6372 * If successful, consumes the copy object.
6373 * Otherwise, the caller is responsible for it.
6374 */
6375 kern_return_t
6376 vm_map_copyout(
6377 vm_map_t dst_map,
6378 vm_map_address_t *dst_addr, /* OUT */
6379 vm_map_copy_t copy)
6380 {
6381 vm_map_size_t size;
6382 vm_map_size_t adjustment;
6383 vm_map_offset_t start;
6384 vm_object_offset_t vm_copy_start;
6385 vm_map_entry_t last;
6386 register
6387 vm_map_entry_t entry;
6388
6389 /*
6390 * Check for null copy object.
6391 */
6392
6393 if (copy == VM_MAP_COPY_NULL) {
6394 *dst_addr = 0;
6395 return(KERN_SUCCESS);
6396 }
6397
6398 /*
6399 * Check for special copy object, created
6400 * by vm_map_copyin_object.
6401 */
6402
6403 if (copy->type == VM_MAP_COPY_OBJECT) {
6404 vm_object_t object = copy->cpy_object;
6405 kern_return_t kr;
6406 vm_object_offset_t offset;
6407
6408 offset = vm_object_trunc_page(copy->offset);
6409 size = vm_map_round_page(copy->size +
6410 (vm_map_size_t)(copy->offset - offset));
6411 *dst_addr = 0;
6412 kr = vm_map_enter(dst_map, dst_addr, size,
6413 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6414 object, offset, FALSE,
6415 VM_PROT_DEFAULT, VM_PROT_ALL,
6416 VM_INHERIT_DEFAULT);
6417 if (kr != KERN_SUCCESS)
6418 return(kr);
6419 /* Account for non-pagealigned copy object */
6420 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6421 zfree(vm_map_copy_zone, copy);
6422 return(KERN_SUCCESS);
6423 }
6424
6425 /*
6426 * Check for special kernel buffer allocated
6427 * by new_ipc_kmsg_copyin.
6428 */
6429
6430 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6431 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6432 copy, FALSE));
6433 }
6434
6435 /*
6436 * Find space for the data
6437 */
6438
6439 vm_copy_start = vm_object_trunc_page(copy->offset);
6440 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6441 - vm_copy_start;
6442
6443 StartAgain: ;
6444
6445 vm_map_lock(dst_map);
6446 assert(first_free_is_valid(dst_map));
6447 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6448 vm_map_min(dst_map) : last->vme_end;
6449
6450 while (TRUE) {
6451 vm_map_entry_t next = last->vme_next;
6452 vm_map_offset_t end = start + size;
6453
6454 if ((end > dst_map->max_offset) || (end < start)) {
6455 if (dst_map->wait_for_space) {
6456 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6457 assert_wait((event_t) dst_map,
6458 THREAD_INTERRUPTIBLE);
6459 vm_map_unlock(dst_map);
6460 thread_block(THREAD_CONTINUE_NULL);
6461 goto StartAgain;
6462 }
6463 }
6464 vm_map_unlock(dst_map);
6465 return(KERN_NO_SPACE);
6466 }
6467
6468 if ((next == vm_map_to_entry(dst_map)) ||
6469 (next->vme_start >= end))
6470 break;
6471
6472 last = next;
6473 start = last->vme_end;
6474 }
6475
6476 /*
6477 * Since we're going to just drop the map
6478 * entries from the copy into the destination
6479 * map, they must come from the same pool.
6480 */
6481
6482 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6483 /*
6484 * Mismatches occur when dealing with the default
6485 * pager.
6486 */
6487 zone_t old_zone;
6488 vm_map_entry_t next, new;
6489
6490 /*
6491 * Find the zone that the copies were allocated from
6492 */
6493 old_zone = (copy->cpy_hdr.entries_pageable)
6494 ? vm_map_entry_zone
6495 : vm_map_kentry_zone;
6496 entry = vm_map_copy_first_entry(copy);
6497
6498 /*
6499 * Reinitialize the copy so that vm_map_copy_entry_link
6500 * will work.
6501 */
6502 copy->cpy_hdr.nentries = 0;
6503 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6504 vm_map_copy_first_entry(copy) =
6505 vm_map_copy_last_entry(copy) =
6506 vm_map_copy_to_entry(copy);
6507
6508 /*
6509 * Copy each entry.
6510 */
6511 while (entry != vm_map_copy_to_entry(copy)) {
6512 new = vm_map_copy_entry_create(copy);
6513 vm_map_entry_copy_full(new, entry);
6514 new->use_pmap = FALSE; /* clr address space specifics */
6515 vm_map_copy_entry_link(copy,
6516 vm_map_copy_last_entry(copy),
6517 new);
6518 next = entry->vme_next;
6519 zfree(old_zone, entry);
6520 entry = next;
6521 }
6522 }
6523
6524 /*
6525 * Adjust the addresses in the copy chain, and
6526 * reset the region attributes.
6527 */
6528
6529 adjustment = start - vm_copy_start;
6530 for (entry = vm_map_copy_first_entry(copy);
6531 entry != vm_map_copy_to_entry(copy);
6532 entry = entry->vme_next) {
6533 entry->vme_start += adjustment;
6534 entry->vme_end += adjustment;
6535
6536 entry->inheritance = VM_INHERIT_DEFAULT;
6537 entry->protection = VM_PROT_DEFAULT;
6538 entry->max_protection = VM_PROT_ALL;
6539 entry->behavior = VM_BEHAVIOR_DEFAULT;
6540
6541 /*
6542 * If the entry is now wired,
6543 * map the pages into the destination map.
6544 */
6545 if (entry->wired_count != 0) {
6546 register vm_map_offset_t va;
6547 vm_object_offset_t offset;
6548 register vm_object_t object;
6549 vm_prot_t prot;
6550 int type_of_fault;
6551
6552 object = entry->object.vm_object;
6553 offset = entry->offset;
6554 va = entry->vme_start;
6555
6556 pmap_pageable(dst_map->pmap,
6557 entry->vme_start,
6558 entry->vme_end,
6559 TRUE);
6560
6561 while (va < entry->vme_end) {
6562 register vm_page_t m;
6563
6564 /*
6565 * Look up the page in the object.
6566 * Assert that the page will be found in the
6567 * top object:
6568 * either
6569 * the object was newly created by
6570 * vm_object_copy_slowly, and has
6571 * copies of all of the pages from
6572 * the source object
6573 * or
6574 * the object was moved from the old
6575 * map entry; because the old map
6576 * entry was wired, all of the pages
6577 * were in the top-level object.
6578 * (XXX not true if we wire pages for
6579 * reading)
6580 */
6581 vm_object_lock(object);
6582
6583 m = vm_page_lookup(object, offset);
6584 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
6585 m->absent)
6586 panic("vm_map_copyout: wiring %p", m);
6587
6588 /*
6589 * ENCRYPTED SWAP:
6590 * The page is assumed to be wired here, so it
6591 * shouldn't be encrypted. Otherwise, we
6592 * couldn't enter it in the page table, since
6593 * we don't want the user to see the encrypted
6594 * data.
6595 */
6596 ASSERT_PAGE_DECRYPTED(m);
6597
6598 prot = entry->protection;
6599
6600 if (override_nx(dst_map, entry->alias) && prot)
6601 prot |= VM_PROT_EXECUTE;
6602
6603 type_of_fault = DBG_CACHE_HIT_FAULT;
6604
6605 vm_fault_enter(m, dst_map->pmap, va, prot,
6606 m->wire_count != 0, FALSE, FALSE,
6607 &type_of_fault);
6608
6609 vm_object_unlock(object);
6610
6611 offset += PAGE_SIZE_64;
6612 va += PAGE_SIZE;
6613 }
6614 }
6615 }
6616
6617 /*
6618 * Correct the page alignment for the result
6619 */
6620
6621 *dst_addr = start + (copy->offset - vm_copy_start);
6622
6623 /*
6624 * Update the hints and the map size
6625 */
6626
6627 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6628
6629 dst_map->size += size;
6630
6631 /*
6632 * Link in the copy
6633 */
6634
6635 vm_map_copy_insert(dst_map, last, copy);
6636
6637 vm_map_unlock(dst_map);
6638
6639 /*
6640 * XXX If wiring_required, call vm_map_pageable
6641 */
6642
6643 return(KERN_SUCCESS);
6644 }
6645
6646 /*
6647 * Routine: vm_map_copyin
6648 *
6649 * Description:
6650 * see vm_map_copyin_common. Exported via Unsupported.exports.
6651 *
6652 */
6653
6654 #undef vm_map_copyin
6655
6656 kern_return_t
6657 vm_map_copyin(
6658 vm_map_t src_map,
6659 vm_map_address_t src_addr,
6660 vm_map_size_t len,
6661 boolean_t src_destroy,
6662 vm_map_copy_t *copy_result) /* OUT */
6663 {
6664 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
6665 FALSE, copy_result, FALSE));
6666 }
6667
6668 /*
6669 * Routine: vm_map_copyin_common
6670 *
6671 * Description:
6672 * Copy the specified region (src_addr, len) from the
6673 * source address space (src_map), possibly removing
6674 * the region from the source address space (src_destroy).
6675 *
6676 * Returns:
6677 * A vm_map_copy_t object (copy_result), suitable for
6678 * insertion into another address space (using vm_map_copyout),
6679 * copying over another address space region (using
6680 * vm_map_copy_overwrite). If the copy is unused, it
6681 * should be destroyed (using vm_map_copy_discard).
6682 *
6683 * In/out conditions:
6684 * The source map should not be locked on entry.
6685 */
6686
6687 typedef struct submap_map {
6688 vm_map_t parent_map;
6689 vm_map_offset_t base_start;
6690 vm_map_offset_t base_end;
6691 vm_map_size_t base_len;
6692 struct submap_map *next;
6693 } submap_map_t;
6694
6695 kern_return_t
6696 vm_map_copyin_common(
6697 vm_map_t src_map,
6698 vm_map_address_t src_addr,
6699 vm_map_size_t len,
6700 boolean_t src_destroy,
6701 __unused boolean_t src_volatile,
6702 vm_map_copy_t *copy_result, /* OUT */
6703 boolean_t use_maxprot)
6704 {
6705 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6706 * in multi-level lookup, this
6707 * entry contains the actual
6708 * vm_object/offset.
6709 */
6710 register
6711 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6712
6713 vm_map_offset_t src_start; /* Start of current entry --
6714 * where copy is taking place now
6715 */
6716 vm_map_offset_t src_end; /* End of entire region to be
6717 * copied */
6718 vm_map_offset_t src_base;
6719 vm_map_t base_map = src_map;
6720 boolean_t map_share=FALSE;
6721 submap_map_t *parent_maps = NULL;
6722
6723 register
6724 vm_map_copy_t copy; /* Resulting copy */
6725 vm_map_address_t copy_addr;
6726
6727 /*
6728 * Check for copies of zero bytes.
6729 */
6730
6731 if (len == 0) {
6732 *copy_result = VM_MAP_COPY_NULL;
6733 return(KERN_SUCCESS);
6734 }
6735
6736 /*
6737 * Check that the end address doesn't overflow
6738 */
6739 src_end = src_addr + len;
6740 if (src_end < src_addr)
6741 return KERN_INVALID_ADDRESS;
6742
6743 /*
6744 * If the copy is sufficiently small, use a kernel buffer instead
6745 * of making a virtual copy. The theory being that the cost of
6746 * setting up VM (and taking C-O-W faults) dominates the copy costs
6747 * for small regions.
6748 */
6749 if ((len < msg_ool_size_small) && !use_maxprot)
6750 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6751 src_destroy, copy_result);
6752
6753 /*
6754 * Compute (page aligned) start and end of region
6755 */
6756 src_start = vm_map_trunc_page(src_addr);
6757 src_end = vm_map_round_page(src_end);
6758
6759 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6760
6761 /*
6762 * Allocate a header element for the list.
6763 *
6764 * Use the start and end in the header to
6765 * remember the endpoints prior to rounding.
6766 */
6767
6768 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6769 vm_map_copy_first_entry(copy) =
6770 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6771 copy->type = VM_MAP_COPY_ENTRY_LIST;
6772 copy->cpy_hdr.nentries = 0;
6773 copy->cpy_hdr.entries_pageable = TRUE;
6774
6775 copy->offset = src_addr;
6776 copy->size = len;
6777
6778 new_entry = vm_map_copy_entry_create(copy);
6779
6780 #define RETURN(x) \
6781 MACRO_BEGIN \
6782 vm_map_unlock(src_map); \
6783 if(src_map != base_map) \
6784 vm_map_deallocate(src_map); \
6785 if (new_entry != VM_MAP_ENTRY_NULL) \
6786 vm_map_copy_entry_dispose(copy,new_entry); \
6787 vm_map_copy_discard(copy); \
6788 { \
6789 submap_map_t *_ptr; \
6790 \
6791 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6792 parent_maps=parent_maps->next; \
6793 if (_ptr->parent_map != base_map) \
6794 vm_map_deallocate(_ptr->parent_map); \
6795 kfree(_ptr, sizeof(submap_map_t)); \
6796 } \
6797 } \
6798 MACRO_RETURN(x); \
6799 MACRO_END
6800
6801 /*
6802 * Find the beginning of the region.
6803 */
6804
6805 vm_map_lock(src_map);
6806
6807 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6808 RETURN(KERN_INVALID_ADDRESS);
6809 if(!tmp_entry->is_sub_map) {
6810 vm_map_clip_start(src_map, tmp_entry, src_start);
6811 }
6812 /* set for later submap fix-up */
6813 copy_addr = src_start;
6814
6815 /*
6816 * Go through entries until we get to the end.
6817 */
6818
6819 while (TRUE) {
6820 register
6821 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
6822 vm_map_size_t src_size; /* Size of source
6823 * map entry (in both
6824 * maps)
6825 */
6826
6827 register
6828 vm_object_t src_object; /* Object to copy */
6829 vm_object_offset_t src_offset;
6830
6831 boolean_t src_needs_copy; /* Should source map
6832 * be made read-only
6833 * for copy-on-write?
6834 */
6835
6836 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6837
6838 boolean_t was_wired; /* Was source wired? */
6839 vm_map_version_t version; /* Version before locks
6840 * dropped to make copy
6841 */
6842 kern_return_t result; /* Return value from
6843 * copy_strategically.
6844 */
6845 while(tmp_entry->is_sub_map) {
6846 vm_map_size_t submap_len;
6847 submap_map_t *ptr;
6848
6849 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
6850 ptr->next = parent_maps;
6851 parent_maps = ptr;
6852 ptr->parent_map = src_map;
6853 ptr->base_start = src_start;
6854 ptr->base_end = src_end;
6855 submap_len = tmp_entry->vme_end - src_start;
6856 if(submap_len > (src_end-src_start))
6857 submap_len = src_end-src_start;
6858 ptr->base_len = submap_len;
6859
6860 src_start -= tmp_entry->vme_start;
6861 src_start += tmp_entry->offset;
6862 src_end = src_start + submap_len;
6863 src_map = tmp_entry->object.sub_map;
6864 vm_map_lock(src_map);
6865 /* keep an outstanding reference for all maps in */
6866 /* the parents tree except the base map */
6867 vm_map_reference(src_map);
6868 vm_map_unlock(ptr->parent_map);
6869 if (!vm_map_lookup_entry(
6870 src_map, src_start, &tmp_entry))
6871 RETURN(KERN_INVALID_ADDRESS);
6872 map_share = TRUE;
6873 if(!tmp_entry->is_sub_map)
6874 vm_map_clip_start(src_map, tmp_entry, src_start);
6875 src_entry = tmp_entry;
6876 }
6877 /* we are now in the lowest level submap... */
6878
6879 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
6880 (tmp_entry->object.vm_object->phys_contiguous)) {
6881 /* This is not, supported for now.In future */
6882 /* we will need to detect the phys_contig */
6883 /* condition and then upgrade copy_slowly */
6884 /* to do physical copy from the device mem */
6885 /* based object. We can piggy-back off of */
6886 /* the was wired boolean to set-up the */
6887 /* proper handling */
6888 RETURN(KERN_PROTECTION_FAILURE);
6889 }
6890 /*
6891 * Create a new address map entry to hold the result.
6892 * Fill in the fields from the appropriate source entries.
6893 * We must unlock the source map to do this if we need
6894 * to allocate a map entry.
6895 */
6896 if (new_entry == VM_MAP_ENTRY_NULL) {
6897 version.main_timestamp = src_map->timestamp;
6898 vm_map_unlock(src_map);
6899
6900 new_entry = vm_map_copy_entry_create(copy);
6901
6902 vm_map_lock(src_map);
6903 if ((version.main_timestamp + 1) != src_map->timestamp) {
6904 if (!vm_map_lookup_entry(src_map, src_start,
6905 &tmp_entry)) {
6906 RETURN(KERN_INVALID_ADDRESS);
6907 }
6908 if (!tmp_entry->is_sub_map)
6909 vm_map_clip_start(src_map, tmp_entry, src_start);
6910 continue; /* restart w/ new tmp_entry */
6911 }
6912 }
6913
6914 /*
6915 * Verify that the region can be read.
6916 */
6917 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
6918 !use_maxprot) ||
6919 (src_entry->max_protection & VM_PROT_READ) == 0)
6920 RETURN(KERN_PROTECTION_FAILURE);
6921
6922 /*
6923 * Clip against the endpoints of the entire region.
6924 */
6925
6926 vm_map_clip_end(src_map, src_entry, src_end);
6927
6928 src_size = src_entry->vme_end - src_start;
6929 src_object = src_entry->object.vm_object;
6930 src_offset = src_entry->offset;
6931 was_wired = (src_entry->wired_count != 0);
6932
6933 vm_map_entry_copy(new_entry, src_entry);
6934 new_entry->use_pmap = FALSE; /* clr address space specifics */
6935
6936 /*
6937 * Attempt non-blocking copy-on-write optimizations.
6938 */
6939
6940 if (src_destroy &&
6941 (src_object == VM_OBJECT_NULL ||
6942 (src_object->internal && !src_object->true_share
6943 && !map_share))) {
6944 /*
6945 * If we are destroying the source, and the object
6946 * is internal, we can move the object reference
6947 * from the source to the copy. The copy is
6948 * copy-on-write only if the source is.
6949 * We make another reference to the object, because
6950 * destroying the source entry will deallocate it.
6951 */
6952 vm_object_reference(src_object);
6953
6954 /*
6955 * Copy is always unwired. vm_map_copy_entry
6956 * set its wired count to zero.
6957 */
6958
6959 goto CopySuccessful;
6960 }
6961
6962
6963 RestartCopy:
6964 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
6965 src_object, new_entry, new_entry->object.vm_object,
6966 was_wired, 0);
6967 if ((src_object == VM_OBJECT_NULL ||
6968 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
6969 vm_object_copy_quickly(
6970 &new_entry->object.vm_object,
6971 src_offset,
6972 src_size,
6973 &src_needs_copy,
6974 &new_entry_needs_copy)) {
6975
6976 new_entry->needs_copy = new_entry_needs_copy;
6977
6978 /*
6979 * Handle copy-on-write obligations
6980 */
6981
6982 if (src_needs_copy && !tmp_entry->needs_copy) {
6983 vm_prot_t prot;
6984
6985 prot = src_entry->protection & ~VM_PROT_WRITE;
6986
6987 if (override_nx(src_map, src_entry->alias) && prot)
6988 prot |= VM_PROT_EXECUTE;
6989
6990 vm_object_pmap_protect(
6991 src_object,
6992 src_offset,
6993 src_size,
6994 (src_entry->is_shared ?
6995 PMAP_NULL
6996 : src_map->pmap),
6997 src_entry->vme_start,
6998 prot);
6999
7000 tmp_entry->needs_copy = TRUE;
7001 }
7002
7003 /*
7004 * The map has never been unlocked, so it's safe
7005 * to move to the next entry rather than doing
7006 * another lookup.
7007 */
7008
7009 goto CopySuccessful;
7010 }
7011
7012 /*
7013 * Take an object reference, so that we may
7014 * release the map lock(s).
7015 */
7016
7017 assert(src_object != VM_OBJECT_NULL);
7018 vm_object_reference(src_object);
7019
7020 /*
7021 * Record the timestamp for later verification.
7022 * Unlock the map.
7023 */
7024
7025 version.main_timestamp = src_map->timestamp;
7026 vm_map_unlock(src_map); /* Increments timestamp once! */
7027
7028 /*
7029 * Perform the copy
7030 */
7031
7032 if (was_wired) {
7033 CopySlowly:
7034 vm_object_lock(src_object);
7035 result = vm_object_copy_slowly(
7036 src_object,
7037 src_offset,
7038 src_size,
7039 THREAD_UNINT,
7040 &new_entry->object.vm_object);
7041 new_entry->offset = 0;
7042 new_entry->needs_copy = FALSE;
7043
7044 }
7045 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7046 (tmp_entry->is_shared || map_share)) {
7047 vm_object_t new_object;
7048
7049 vm_object_lock_shared(src_object);
7050 new_object = vm_object_copy_delayed(
7051 src_object,
7052 src_offset,
7053 src_size,
7054 TRUE);
7055 if (new_object == VM_OBJECT_NULL)
7056 goto CopySlowly;
7057
7058 new_entry->object.vm_object = new_object;
7059 new_entry->needs_copy = TRUE;
7060 result = KERN_SUCCESS;
7061
7062 } else {
7063 result = vm_object_copy_strategically(src_object,
7064 src_offset,
7065 src_size,
7066 &new_entry->object.vm_object,
7067 &new_entry->offset,
7068 &new_entry_needs_copy);
7069
7070 new_entry->needs_copy = new_entry_needs_copy;
7071 }
7072
7073 if (result != KERN_SUCCESS &&
7074 result != KERN_MEMORY_RESTART_COPY) {
7075 vm_map_lock(src_map);
7076 RETURN(result);
7077 }
7078
7079 /*
7080 * Throw away the extra reference
7081 */
7082
7083 vm_object_deallocate(src_object);
7084
7085 /*
7086 * Verify that the map has not substantially
7087 * changed while the copy was being made.
7088 */
7089
7090 vm_map_lock(src_map);
7091
7092 if ((version.main_timestamp + 1) == src_map->timestamp)
7093 goto VerificationSuccessful;
7094
7095 /*
7096 * Simple version comparison failed.
7097 *
7098 * Retry the lookup and verify that the
7099 * same object/offset are still present.
7100 *
7101 * [Note: a memory manager that colludes with
7102 * the calling task can detect that we have
7103 * cheated. While the map was unlocked, the
7104 * mapping could have been changed and restored.]
7105 */
7106
7107 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7108 RETURN(KERN_INVALID_ADDRESS);
7109 }
7110
7111 src_entry = tmp_entry;
7112 vm_map_clip_start(src_map, src_entry, src_start);
7113
7114 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7115 !use_maxprot) ||
7116 ((src_entry->max_protection & VM_PROT_READ) == 0))
7117 goto VerificationFailed;
7118
7119 if (src_entry->vme_end < new_entry->vme_end)
7120 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7121
7122 if ((src_entry->object.vm_object != src_object) ||
7123 (src_entry->offset != src_offset) ) {
7124
7125 /*
7126 * Verification failed.
7127 *
7128 * Start over with this top-level entry.
7129 */
7130
7131 VerificationFailed: ;
7132
7133 vm_object_deallocate(new_entry->object.vm_object);
7134 tmp_entry = src_entry;
7135 continue;
7136 }
7137
7138 /*
7139 * Verification succeeded.
7140 */
7141
7142 VerificationSuccessful: ;
7143
7144 if (result == KERN_MEMORY_RESTART_COPY)
7145 goto RestartCopy;
7146
7147 /*
7148 * Copy succeeded.
7149 */
7150
7151 CopySuccessful: ;
7152
7153 /*
7154 * Link in the new copy entry.
7155 */
7156
7157 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7158 new_entry);
7159
7160 /*
7161 * Determine whether the entire region
7162 * has been copied.
7163 */
7164 src_base = src_start;
7165 src_start = new_entry->vme_end;
7166 new_entry = VM_MAP_ENTRY_NULL;
7167 while ((src_start >= src_end) && (src_end != 0)) {
7168 if (src_map != base_map) {
7169 submap_map_t *ptr;
7170
7171 ptr = parent_maps;
7172 assert(ptr != NULL);
7173 parent_maps = parent_maps->next;
7174
7175 /* fix up the damage we did in that submap */
7176 vm_map_simplify_range(src_map,
7177 src_base,
7178 src_end);
7179
7180 vm_map_unlock(src_map);
7181 vm_map_deallocate(src_map);
7182 vm_map_lock(ptr->parent_map);
7183 src_map = ptr->parent_map;
7184 src_base = ptr->base_start;
7185 src_start = ptr->base_start + ptr->base_len;
7186 src_end = ptr->base_end;
7187 if ((src_end > src_start) &&
7188 !vm_map_lookup_entry(
7189 src_map, src_start, &tmp_entry))
7190 RETURN(KERN_INVALID_ADDRESS);
7191 kfree(ptr, sizeof(submap_map_t));
7192 if(parent_maps == NULL)
7193 map_share = FALSE;
7194 src_entry = tmp_entry->vme_prev;
7195 } else
7196 break;
7197 }
7198 if ((src_start >= src_end) && (src_end != 0))
7199 break;
7200
7201 /*
7202 * Verify that there are no gaps in the region
7203 */
7204
7205 tmp_entry = src_entry->vme_next;
7206 if ((tmp_entry->vme_start != src_start) ||
7207 (tmp_entry == vm_map_to_entry(src_map)))
7208 RETURN(KERN_INVALID_ADDRESS);
7209 }
7210
7211 /*
7212 * If the source should be destroyed, do it now, since the
7213 * copy was successful.
7214 */
7215 if (src_destroy) {
7216 (void) vm_map_delete(src_map,
7217 vm_map_trunc_page(src_addr),
7218 src_end,
7219 (src_map == kernel_map) ?
7220 VM_MAP_REMOVE_KUNWIRE :
7221 VM_MAP_NO_FLAGS,
7222 VM_MAP_NULL);
7223 } else {
7224 /* fix up the damage we did in the base map */
7225 vm_map_simplify_range(src_map,
7226 vm_map_trunc_page(src_addr),
7227 vm_map_round_page(src_end));
7228 }
7229
7230 vm_map_unlock(src_map);
7231
7232 /* Fix-up start and end points in copy. This is necessary */
7233 /* when the various entries in the copy object were picked */
7234 /* up from different sub-maps */
7235
7236 tmp_entry = vm_map_copy_first_entry(copy);
7237 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7238 tmp_entry->vme_end = copy_addr +
7239 (tmp_entry->vme_end - tmp_entry->vme_start);
7240 tmp_entry->vme_start = copy_addr;
7241 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7242 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7243 }
7244
7245 *copy_result = copy;
7246 return(KERN_SUCCESS);
7247
7248 #undef RETURN
7249 }
7250
7251 /*
7252 * vm_map_copyin_object:
7253 *
7254 * Create a copy object from an object.
7255 * Our caller donates an object reference.
7256 */
7257
7258 kern_return_t
7259 vm_map_copyin_object(
7260 vm_object_t object,
7261 vm_object_offset_t offset, /* offset of region in object */
7262 vm_object_size_t size, /* size of region in object */
7263 vm_map_copy_t *copy_result) /* OUT */
7264 {
7265 vm_map_copy_t copy; /* Resulting copy */
7266
7267 /*
7268 * We drop the object into a special copy object
7269 * that contains the object directly.
7270 */
7271
7272 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7273 copy->type = VM_MAP_COPY_OBJECT;
7274 copy->cpy_object = object;
7275 copy->offset = offset;
7276 copy->size = size;
7277
7278 *copy_result = copy;
7279 return(KERN_SUCCESS);
7280 }
7281
7282 static void
7283 vm_map_fork_share(
7284 vm_map_t old_map,
7285 vm_map_entry_t old_entry,
7286 vm_map_t new_map)
7287 {
7288 vm_object_t object;
7289 vm_map_entry_t new_entry;
7290
7291 /*
7292 * New sharing code. New map entry
7293 * references original object. Internal
7294 * objects use asynchronous copy algorithm for
7295 * future copies. First make sure we have
7296 * the right object. If we need a shadow,
7297 * or someone else already has one, then
7298 * make a new shadow and share it.
7299 */
7300
7301 object = old_entry->object.vm_object;
7302 if (old_entry->is_sub_map) {
7303 assert(old_entry->wired_count == 0);
7304 #ifndef NO_NESTED_PMAP
7305 if(old_entry->use_pmap) {
7306 kern_return_t result;
7307
7308 result = pmap_nest(new_map->pmap,
7309 (old_entry->object.sub_map)->pmap,
7310 (addr64_t)old_entry->vme_start,
7311 (addr64_t)old_entry->vme_start,
7312 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7313 if(result)
7314 panic("vm_map_fork_share: pmap_nest failed!");
7315 }
7316 #endif /* NO_NESTED_PMAP */
7317 } else if (object == VM_OBJECT_NULL) {
7318 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7319 old_entry->vme_start));
7320 old_entry->offset = 0;
7321 old_entry->object.vm_object = object;
7322 assert(!old_entry->needs_copy);
7323 } else if (object->copy_strategy !=
7324 MEMORY_OBJECT_COPY_SYMMETRIC) {
7325
7326 /*
7327 * We are already using an asymmetric
7328 * copy, and therefore we already have
7329 * the right object.
7330 */
7331
7332 assert(! old_entry->needs_copy);
7333 }
7334 else if (old_entry->needs_copy || /* case 1 */
7335 object->shadowed || /* case 2 */
7336 (!object->true_share && /* case 3 */
7337 !old_entry->is_shared &&
7338 (object->size >
7339 (vm_map_size_t)(old_entry->vme_end -
7340 old_entry->vme_start)))) {
7341
7342 /*
7343 * We need to create a shadow.
7344 * There are three cases here.
7345 * In the first case, we need to
7346 * complete a deferred symmetrical
7347 * copy that we participated in.
7348 * In the second and third cases,
7349 * we need to create the shadow so
7350 * that changes that we make to the
7351 * object do not interfere with
7352 * any symmetrical copies which
7353 * have occured (case 2) or which
7354 * might occur (case 3).
7355 *
7356 * The first case is when we had
7357 * deferred shadow object creation
7358 * via the entry->needs_copy mechanism.
7359 * This mechanism only works when
7360 * only one entry points to the source
7361 * object, and we are about to create
7362 * a second entry pointing to the
7363 * same object. The problem is that
7364 * there is no way of mapping from
7365 * an object to the entries pointing
7366 * to it. (Deferred shadow creation
7367 * works with one entry because occurs
7368 * at fault time, and we walk from the
7369 * entry to the object when handling
7370 * the fault.)
7371 *
7372 * The second case is when the object
7373 * to be shared has already been copied
7374 * with a symmetric copy, but we point
7375 * directly to the object without
7376 * needs_copy set in our entry. (This
7377 * can happen because different ranges
7378 * of an object can be pointed to by
7379 * different entries. In particular,
7380 * a single entry pointing to an object
7381 * can be split by a call to vm_inherit,
7382 * which, combined with task_create, can
7383 * result in the different entries
7384 * having different needs_copy values.)
7385 * The shadowed flag in the object allows
7386 * us to detect this case. The problem
7387 * with this case is that if this object
7388 * has or will have shadows, then we
7389 * must not perform an asymmetric copy
7390 * of this object, since such a copy
7391 * allows the object to be changed, which
7392 * will break the previous symmetrical
7393 * copies (which rely upon the object
7394 * not changing). In a sense, the shadowed
7395 * flag says "don't change this object".
7396 * We fix this by creating a shadow
7397 * object for this object, and sharing
7398 * that. This works because we are free
7399 * to change the shadow object (and thus
7400 * to use an asymmetric copy strategy);
7401 * this is also semantically correct,
7402 * since this object is temporary, and
7403 * therefore a copy of the object is
7404 * as good as the object itself. (This
7405 * is not true for permanent objects,
7406 * since the pager needs to see changes,
7407 * which won't happen if the changes
7408 * are made to a copy.)
7409 *
7410 * The third case is when the object
7411 * to be shared has parts sticking
7412 * outside of the entry we're working
7413 * with, and thus may in the future
7414 * be subject to a symmetrical copy.
7415 * (This is a preemptive version of
7416 * case 2.)
7417 */
7418
7419 vm_object_shadow(&old_entry->object.vm_object,
7420 &old_entry->offset,
7421 (vm_map_size_t) (old_entry->vme_end -
7422 old_entry->vme_start));
7423
7424 /*
7425 * If we're making a shadow for other than
7426 * copy on write reasons, then we have
7427 * to remove write permission.
7428 */
7429
7430 if (!old_entry->needs_copy &&
7431 (old_entry->protection & VM_PROT_WRITE)) {
7432 vm_prot_t prot;
7433
7434 prot = old_entry->protection & ~VM_PROT_WRITE;
7435
7436 if (override_nx(old_map, old_entry->alias) && prot)
7437 prot |= VM_PROT_EXECUTE;
7438
7439 if (old_map->mapped) {
7440 vm_object_pmap_protect(
7441 old_entry->object.vm_object,
7442 old_entry->offset,
7443 (old_entry->vme_end -
7444 old_entry->vme_start),
7445 PMAP_NULL,
7446 old_entry->vme_start,
7447 prot);
7448 } else {
7449 pmap_protect(old_map->pmap,
7450 old_entry->vme_start,
7451 old_entry->vme_end,
7452 prot);
7453 }
7454 }
7455
7456 old_entry->needs_copy = FALSE;
7457 object = old_entry->object.vm_object;
7458 }
7459
7460 /*
7461 * If object was using a symmetric copy strategy,
7462 * change its copy strategy to the default
7463 * asymmetric copy strategy, which is copy_delay
7464 * in the non-norma case and copy_call in the
7465 * norma case. Bump the reference count for the
7466 * new entry.
7467 */
7468
7469 if(old_entry->is_sub_map) {
7470 vm_map_lock(old_entry->object.sub_map);
7471 vm_map_reference(old_entry->object.sub_map);
7472 vm_map_unlock(old_entry->object.sub_map);
7473 } else {
7474 vm_object_lock(object);
7475 vm_object_reference_locked(object);
7476 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7477 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7478 }
7479 vm_object_unlock(object);
7480 }
7481
7482 /*
7483 * Clone the entry, using object ref from above.
7484 * Mark both entries as shared.
7485 */
7486
7487 new_entry = vm_map_entry_create(new_map);
7488 vm_map_entry_copy(new_entry, old_entry);
7489 old_entry->is_shared = TRUE;
7490 new_entry->is_shared = TRUE;
7491
7492 /*
7493 * Insert the entry into the new map -- we
7494 * know we're inserting at the end of the new
7495 * map.
7496 */
7497
7498 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7499
7500 /*
7501 * Update the physical map
7502 */
7503
7504 if (old_entry->is_sub_map) {
7505 /* Bill Angell pmap support goes here */
7506 } else {
7507 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7508 old_entry->vme_end - old_entry->vme_start,
7509 old_entry->vme_start);
7510 }
7511 }
7512
7513 static boolean_t
7514 vm_map_fork_copy(
7515 vm_map_t old_map,
7516 vm_map_entry_t *old_entry_p,
7517 vm_map_t new_map)
7518 {
7519 vm_map_entry_t old_entry = *old_entry_p;
7520 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7521 vm_map_offset_t start = old_entry->vme_start;
7522 vm_map_copy_t copy;
7523 vm_map_entry_t last = vm_map_last_entry(new_map);
7524
7525 vm_map_unlock(old_map);
7526 /*
7527 * Use maxprot version of copyin because we
7528 * care about whether this memory can ever
7529 * be accessed, not just whether it's accessible
7530 * right now.
7531 */
7532 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
7533 != KERN_SUCCESS) {
7534 /*
7535 * The map might have changed while it
7536 * was unlocked, check it again. Skip
7537 * any blank space or permanently
7538 * unreadable region.
7539 */
7540 vm_map_lock(old_map);
7541 if (!vm_map_lookup_entry(old_map, start, &last) ||
7542 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
7543 last = last->vme_next;
7544 }
7545 *old_entry_p = last;
7546
7547 /*
7548 * XXX For some error returns, want to
7549 * XXX skip to the next element. Note
7550 * that INVALID_ADDRESS and
7551 * PROTECTION_FAILURE are handled above.
7552 */
7553
7554 return FALSE;
7555 }
7556
7557 /*
7558 * Insert the copy into the new map
7559 */
7560
7561 vm_map_copy_insert(new_map, last, copy);
7562
7563 /*
7564 * Pick up the traversal at the end of
7565 * the copied region.
7566 */
7567
7568 vm_map_lock(old_map);
7569 start += entry_size;
7570 if (! vm_map_lookup_entry(old_map, start, &last)) {
7571 last = last->vme_next;
7572 } else {
7573 if (last->vme_start == start) {
7574 /*
7575 * No need to clip here and we don't
7576 * want to cause any unnecessary
7577 * unnesting...
7578 */
7579 } else {
7580 vm_map_clip_start(old_map, last, start);
7581 }
7582 }
7583 *old_entry_p = last;
7584
7585 return TRUE;
7586 }
7587
7588 /*
7589 * vm_map_fork:
7590 *
7591 * Create and return a new map based on the old
7592 * map, according to the inheritance values on the
7593 * regions in that map.
7594 *
7595 * The source map must not be locked.
7596 */
7597 vm_map_t
7598 vm_map_fork(
7599 vm_map_t old_map)
7600 {
7601 pmap_t new_pmap;
7602 vm_map_t new_map;
7603 vm_map_entry_t old_entry;
7604 vm_map_size_t new_size = 0, entry_size;
7605 vm_map_entry_t new_entry;
7606 boolean_t src_needs_copy;
7607 boolean_t new_entry_needs_copy;
7608
7609 #ifdef __i386__
7610 new_pmap = pmap_create((vm_map_size_t) 0,
7611 old_map->pmap->pm_task_map != TASK_MAP_32BIT);
7612 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
7613 pmap_set_4GB_pagezero(new_pmap);
7614 #else
7615 new_pmap = pmap_create((vm_map_size_t) 0, 0);
7616 #endif
7617
7618 vm_map_reference_swap(old_map);
7619 vm_map_lock(old_map);
7620
7621 new_map = vm_map_create(new_pmap,
7622 old_map->min_offset,
7623 old_map->max_offset,
7624 old_map->hdr.entries_pageable);
7625
7626 for (
7627 old_entry = vm_map_first_entry(old_map);
7628 old_entry != vm_map_to_entry(old_map);
7629 ) {
7630
7631 entry_size = old_entry->vme_end - old_entry->vme_start;
7632
7633 switch (old_entry->inheritance) {
7634 case VM_INHERIT_NONE:
7635 break;
7636
7637 case VM_INHERIT_SHARE:
7638 vm_map_fork_share(old_map, old_entry, new_map);
7639 new_size += entry_size;
7640 break;
7641
7642 case VM_INHERIT_COPY:
7643
7644 /*
7645 * Inline the copy_quickly case;
7646 * upon failure, fall back on call
7647 * to vm_map_fork_copy.
7648 */
7649
7650 if(old_entry->is_sub_map)
7651 break;
7652 if ((old_entry->wired_count != 0) ||
7653 ((old_entry->object.vm_object != NULL) &&
7654 (old_entry->object.vm_object->true_share))) {
7655 goto slow_vm_map_fork_copy;
7656 }
7657
7658 new_entry = vm_map_entry_create(new_map);
7659 vm_map_entry_copy(new_entry, old_entry);
7660 /* clear address space specifics */
7661 new_entry->use_pmap = FALSE;
7662
7663 if (! vm_object_copy_quickly(
7664 &new_entry->object.vm_object,
7665 old_entry->offset,
7666 (old_entry->vme_end -
7667 old_entry->vme_start),
7668 &src_needs_copy,
7669 &new_entry_needs_copy)) {
7670 vm_map_entry_dispose(new_map, new_entry);
7671 goto slow_vm_map_fork_copy;
7672 }
7673
7674 /*
7675 * Handle copy-on-write obligations
7676 */
7677
7678 if (src_needs_copy && !old_entry->needs_copy) {
7679 vm_prot_t prot;
7680
7681 prot = old_entry->protection & ~VM_PROT_WRITE;
7682
7683 if (override_nx(old_map, old_entry->alias) && prot)
7684 prot |= VM_PROT_EXECUTE;
7685
7686 vm_object_pmap_protect(
7687 old_entry->object.vm_object,
7688 old_entry->offset,
7689 (old_entry->vme_end -
7690 old_entry->vme_start),
7691 ((old_entry->is_shared
7692 || old_map->mapped)
7693 ? PMAP_NULL :
7694 old_map->pmap),
7695 old_entry->vme_start,
7696 prot);
7697
7698 old_entry->needs_copy = TRUE;
7699 }
7700 new_entry->needs_copy = new_entry_needs_copy;
7701
7702 /*
7703 * Insert the entry at the end
7704 * of the map.
7705 */
7706
7707 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7708 new_entry);
7709 new_size += entry_size;
7710 break;
7711
7712 slow_vm_map_fork_copy:
7713 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7714 new_size += entry_size;
7715 }
7716 continue;
7717 }
7718 old_entry = old_entry->vme_next;
7719 }
7720
7721 new_map->size = new_size;
7722 vm_map_unlock(old_map);
7723 vm_map_deallocate(old_map);
7724
7725 return(new_map);
7726 }
7727
7728 /*
7729 * vm_map_exec:
7730 *
7731 * Setup the "new_map" with the proper execution environment according
7732 * to the type of executable (platform, 64bit, chroot environment).
7733 * Map the comm page and shared region, etc...
7734 */
7735 kern_return_t
7736 vm_map_exec(
7737 vm_map_t new_map,
7738 task_t task,
7739 void *fsroot,
7740 cpu_type_t cpu)
7741 {
7742 SHARED_REGION_TRACE_DEBUG(
7743 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
7744 current_task(), new_map, task, fsroot, cpu));
7745 (void) vm_commpage_enter(new_map, task);
7746 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
7747 SHARED_REGION_TRACE_DEBUG(
7748 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
7749 current_task(), new_map, task, fsroot, cpu));
7750 return KERN_SUCCESS;
7751 }
7752
7753 /*
7754 * vm_map_lookup_locked:
7755 *
7756 * Finds the VM object, offset, and
7757 * protection for a given virtual address in the
7758 * specified map, assuming a page fault of the
7759 * type specified.
7760 *
7761 * Returns the (object, offset, protection) for
7762 * this address, whether it is wired down, and whether
7763 * this map has the only reference to the data in question.
7764 * In order to later verify this lookup, a "version"
7765 * is returned.
7766 *
7767 * The map MUST be locked by the caller and WILL be
7768 * locked on exit. In order to guarantee the
7769 * existence of the returned object, it is returned
7770 * locked.
7771 *
7772 * If a lookup is requested with "write protection"
7773 * specified, the map may be changed to perform virtual
7774 * copying operations, although the data referenced will
7775 * remain the same.
7776 */
7777 kern_return_t
7778 vm_map_lookup_locked(
7779 vm_map_t *var_map, /* IN/OUT */
7780 vm_map_offset_t vaddr,
7781 vm_prot_t fault_type,
7782 int object_lock_type,
7783 vm_map_version_t *out_version, /* OUT */
7784 vm_object_t *object, /* OUT */
7785 vm_object_offset_t *offset, /* OUT */
7786 vm_prot_t *out_prot, /* OUT */
7787 boolean_t *wired, /* OUT */
7788 vm_object_fault_info_t fault_info, /* OUT */
7789 vm_map_t *real_map)
7790 {
7791 vm_map_entry_t entry;
7792 register vm_map_t map = *var_map;
7793 vm_map_t old_map = *var_map;
7794 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
7795 vm_map_offset_t cow_parent_vaddr = 0;
7796 vm_map_offset_t old_start = 0;
7797 vm_map_offset_t old_end = 0;
7798 register vm_prot_t prot;
7799
7800 *real_map = map;
7801 RetryLookup: ;
7802
7803 /*
7804 * If the map has an interesting hint, try it before calling
7805 * full blown lookup routine.
7806 */
7807 entry = map->hint;
7808
7809 if ((entry == vm_map_to_entry(map)) ||
7810 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7811 vm_map_entry_t tmp_entry;
7812
7813 /*
7814 * Entry was either not a valid hint, or the vaddr
7815 * was not contained in the entry, so do a full lookup.
7816 */
7817 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7818 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7819 vm_map_unlock(cow_sub_map_parent);
7820 if((*real_map != map)
7821 && (*real_map != cow_sub_map_parent))
7822 vm_map_unlock(*real_map);
7823 return KERN_INVALID_ADDRESS;
7824 }
7825
7826 entry = tmp_entry;
7827 }
7828 if(map == old_map) {
7829 old_start = entry->vme_start;
7830 old_end = entry->vme_end;
7831 }
7832
7833 /*
7834 * Handle submaps. Drop lock on upper map, submap is
7835 * returned locked.
7836 */
7837
7838 submap_recurse:
7839 if (entry->is_sub_map) {
7840 vm_map_offset_t local_vaddr;
7841 vm_map_offset_t end_delta;
7842 vm_map_offset_t start_delta;
7843 vm_map_entry_t submap_entry;
7844 boolean_t mapped_needs_copy=FALSE;
7845
7846 local_vaddr = vaddr;
7847
7848 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
7849 /* if real_map equals map we unlock below */
7850 if ((*real_map != map) &&
7851 (*real_map != cow_sub_map_parent))
7852 vm_map_unlock(*real_map);
7853 *real_map = entry->object.sub_map;
7854 }
7855
7856 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
7857 if (!mapped_needs_copy) {
7858 if (vm_map_lock_read_to_write(map)) {
7859 vm_map_lock_read(map);
7860 /* XXX FBDP: entry still valid ? */
7861 if(*real_map == entry->object.sub_map)
7862 *real_map = map;
7863 goto RetryLookup;
7864 }
7865 vm_map_lock_read(entry->object.sub_map);
7866 cow_sub_map_parent = map;
7867 /* reset base to map before cow object */
7868 /* this is the map which will accept */
7869 /* the new cow object */
7870 old_start = entry->vme_start;
7871 old_end = entry->vme_end;
7872 cow_parent_vaddr = vaddr;
7873 mapped_needs_copy = TRUE;
7874 } else {
7875 vm_map_lock_read(entry->object.sub_map);
7876 if((cow_sub_map_parent != map) &&
7877 (*real_map != map))
7878 vm_map_unlock(map);
7879 }
7880 } else {
7881 vm_map_lock_read(entry->object.sub_map);
7882 /* leave map locked if it is a target */
7883 /* cow sub_map above otherwise, just */
7884 /* follow the maps down to the object */
7885 /* here we unlock knowing we are not */
7886 /* revisiting the map. */
7887 if((*real_map != map) && (map != cow_sub_map_parent))
7888 vm_map_unlock_read(map);
7889 }
7890
7891 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
7892 *var_map = map = entry->object.sub_map;
7893
7894 /* calculate the offset in the submap for vaddr */
7895 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
7896
7897 RetrySubMap:
7898 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
7899 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
7900 vm_map_unlock(cow_sub_map_parent);
7901 }
7902 if((*real_map != map)
7903 && (*real_map != cow_sub_map_parent)) {
7904 vm_map_unlock(*real_map);
7905 }
7906 *real_map = map;
7907 return KERN_INVALID_ADDRESS;
7908 }
7909
7910 /* find the attenuated shadow of the underlying object */
7911 /* on our target map */
7912
7913 /* in english the submap object may extend beyond the */
7914 /* region mapped by the entry or, may only fill a portion */
7915 /* of it. For our purposes, we only care if the object */
7916 /* doesn't fill. In this case the area which will */
7917 /* ultimately be clipped in the top map will only need */
7918 /* to be as big as the portion of the underlying entry */
7919 /* which is mapped */
7920 start_delta = submap_entry->vme_start > entry->offset ?
7921 submap_entry->vme_start - entry->offset : 0;
7922
7923 end_delta =
7924 (entry->offset + start_delta + (old_end - old_start)) <=
7925 submap_entry->vme_end ?
7926 0 : (entry->offset +
7927 (old_end - old_start))
7928 - submap_entry->vme_end;
7929
7930 old_start += start_delta;
7931 old_end -= end_delta;
7932
7933 if(submap_entry->is_sub_map) {
7934 entry = submap_entry;
7935 vaddr = local_vaddr;
7936 goto submap_recurse;
7937 }
7938
7939 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
7940
7941 vm_object_t sub_object, copy_object;
7942 vm_object_offset_t copy_offset;
7943 vm_map_offset_t local_start;
7944 vm_map_offset_t local_end;
7945 boolean_t copied_slowly = FALSE;
7946
7947 if (vm_map_lock_read_to_write(map)) {
7948 vm_map_lock_read(map);
7949 old_start -= start_delta;
7950 old_end += end_delta;
7951 goto RetrySubMap;
7952 }
7953
7954
7955 sub_object = submap_entry->object.vm_object;
7956 if (sub_object == VM_OBJECT_NULL) {
7957 sub_object =
7958 vm_object_allocate(
7959 (vm_map_size_t)
7960 (submap_entry->vme_end -
7961 submap_entry->vme_start));
7962 submap_entry->object.vm_object = sub_object;
7963 submap_entry->offset = 0;
7964 }
7965 local_start = local_vaddr -
7966 (cow_parent_vaddr - old_start);
7967 local_end = local_vaddr +
7968 (old_end - cow_parent_vaddr);
7969 vm_map_clip_start(map, submap_entry, local_start);
7970 vm_map_clip_end(map, submap_entry, local_end);
7971 /* unnesting was done in vm_map_clip_start/end() */
7972 assert(!submap_entry->use_pmap);
7973
7974 /* This is the COW case, lets connect */
7975 /* an entry in our space to the underlying */
7976 /* object in the submap, bypassing the */
7977 /* submap. */
7978
7979
7980 if(submap_entry->wired_count != 0 ||
7981 (sub_object->copy_strategy !=
7982 MEMORY_OBJECT_COPY_SYMMETRIC)) {
7983 vm_object_lock(sub_object);
7984 vm_object_copy_slowly(sub_object,
7985 submap_entry->offset,
7986 (submap_entry->vme_end -
7987 submap_entry->vme_start),
7988 FALSE,
7989 &copy_object);
7990 copied_slowly = TRUE;
7991 } else {
7992
7993 /* set up shadow object */
7994 copy_object = sub_object;
7995 vm_object_reference(copy_object);
7996 sub_object->shadowed = TRUE;
7997 submap_entry->needs_copy = TRUE;
7998
7999 prot = submap_entry->protection & ~VM_PROT_WRITE;
8000
8001 if (override_nx(map, submap_entry->alias) && prot)
8002 prot |= VM_PROT_EXECUTE;
8003
8004 vm_object_pmap_protect(
8005 sub_object,
8006 submap_entry->offset,
8007 submap_entry->vme_end -
8008 submap_entry->vme_start,
8009 (submap_entry->is_shared
8010 || map->mapped) ?
8011 PMAP_NULL : map->pmap,
8012 submap_entry->vme_start,
8013 prot);
8014 }
8015
8016 /*
8017 * Adjust the fault offset to the submap entry.
8018 */
8019 copy_offset = (local_vaddr -
8020 submap_entry->vme_start +
8021 submap_entry->offset);
8022
8023 /* This works diffently than the */
8024 /* normal submap case. We go back */
8025 /* to the parent of the cow map and*/
8026 /* clip out the target portion of */
8027 /* the sub_map, substituting the */
8028 /* new copy object, */
8029
8030 vm_map_unlock(map);
8031 local_start = old_start;
8032 local_end = old_end;
8033 map = cow_sub_map_parent;
8034 *var_map = cow_sub_map_parent;
8035 vaddr = cow_parent_vaddr;
8036 cow_sub_map_parent = NULL;
8037
8038 if(!vm_map_lookup_entry(map,
8039 vaddr, &entry)) {
8040 vm_object_deallocate(
8041 copy_object);
8042 vm_map_lock_write_to_read(map);
8043 return KERN_INVALID_ADDRESS;
8044 }
8045
8046 /* clip out the portion of space */
8047 /* mapped by the sub map which */
8048 /* corresponds to the underlying */
8049 /* object */
8050
8051 /*
8052 * Clip (and unnest) the smallest nested chunk
8053 * possible around the faulting address...
8054 */
8055 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8056 local_end = local_start + pmap_nesting_size_min;
8057 /*
8058 * ... but don't go beyond the "old_start" to "old_end"
8059 * range, to avoid spanning over another VM region
8060 * with a possibly different VM object and/or offset.
8061 */
8062 if (local_start < old_start) {
8063 local_start = old_start;
8064 }
8065 if (local_end > old_end) {
8066 local_end = old_end;
8067 }
8068 /*
8069 * Adjust copy_offset to the start of the range.
8070 */
8071 copy_offset -= (vaddr - local_start);
8072
8073 vm_map_clip_start(map, entry, local_start);
8074 vm_map_clip_end(map, entry, local_end);
8075 /* unnesting was done in vm_map_clip_start/end() */
8076 assert(!entry->use_pmap);
8077
8078 /* substitute copy object for */
8079 /* shared map entry */
8080 vm_map_deallocate(entry->object.sub_map);
8081 entry->is_sub_map = FALSE;
8082 entry->object.vm_object = copy_object;
8083
8084 /* propagate the submap entry's protections */
8085 entry->protection |= submap_entry->protection;
8086 entry->max_protection |= submap_entry->max_protection;
8087
8088 if(copied_slowly) {
8089 entry->offset = 0;
8090 entry->needs_copy = FALSE;
8091 entry->is_shared = FALSE;
8092 } else {
8093 entry->offset = copy_offset;
8094 entry->needs_copy = TRUE;
8095 if(entry->inheritance == VM_INHERIT_SHARE)
8096 entry->inheritance = VM_INHERIT_COPY;
8097 if (map != old_map)
8098 entry->is_shared = TRUE;
8099 }
8100 if(entry->inheritance == VM_INHERIT_SHARE)
8101 entry->inheritance = VM_INHERIT_COPY;
8102
8103 vm_map_lock_write_to_read(map);
8104 } else {
8105 if((cow_sub_map_parent)
8106 && (cow_sub_map_parent != *real_map)
8107 && (cow_sub_map_parent != map)) {
8108 vm_map_unlock(cow_sub_map_parent);
8109 }
8110 entry = submap_entry;
8111 vaddr = local_vaddr;
8112 }
8113 }
8114
8115 /*
8116 * Check whether this task is allowed to have
8117 * this page.
8118 */
8119
8120 prot = entry->protection;
8121
8122 if (override_nx(map, entry->alias) && prot) {
8123 /*
8124 * HACK -- if not a stack, then allow execution
8125 */
8126 prot |= VM_PROT_EXECUTE;
8127 }
8128
8129 if ((fault_type & (prot)) != fault_type) {
8130 if (*real_map != map) {
8131 vm_map_unlock(*real_map);
8132 }
8133 *real_map = map;
8134
8135 if ((fault_type & VM_PROT_EXECUTE) && prot)
8136 log_stack_execution_failure((addr64_t)vaddr, prot);
8137
8138 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8139 return KERN_PROTECTION_FAILURE;
8140 }
8141
8142 /*
8143 * If this page is not pageable, we have to get
8144 * it for all possible accesses.
8145 */
8146
8147 *wired = (entry->wired_count != 0);
8148 if (*wired)
8149 fault_type = prot;
8150
8151 /*
8152 * If the entry was copy-on-write, we either ...
8153 */
8154
8155 if (entry->needs_copy) {
8156 /*
8157 * If we want to write the page, we may as well
8158 * handle that now since we've got the map locked.
8159 *
8160 * If we don't need to write the page, we just
8161 * demote the permissions allowed.
8162 */
8163
8164 if ((fault_type & VM_PROT_WRITE) || *wired) {
8165 /*
8166 * Make a new object, and place it in the
8167 * object chain. Note that no new references
8168 * have appeared -- one just moved from the
8169 * map to the new object.
8170 */
8171
8172 if (vm_map_lock_read_to_write(map)) {
8173 vm_map_lock_read(map);
8174 goto RetryLookup;
8175 }
8176 vm_object_shadow(&entry->object.vm_object,
8177 &entry->offset,
8178 (vm_map_size_t) (entry->vme_end -
8179 entry->vme_start));
8180
8181 entry->object.vm_object->shadowed = TRUE;
8182 entry->needs_copy = FALSE;
8183 vm_map_lock_write_to_read(map);
8184 }
8185 else {
8186 /*
8187 * We're attempting to read a copy-on-write
8188 * page -- don't allow writes.
8189 */
8190
8191 prot &= (~VM_PROT_WRITE);
8192 }
8193 }
8194
8195 /*
8196 * Create an object if necessary.
8197 */
8198 if (entry->object.vm_object == VM_OBJECT_NULL) {
8199
8200 if (vm_map_lock_read_to_write(map)) {
8201 vm_map_lock_read(map);
8202 goto RetryLookup;
8203 }
8204
8205 entry->object.vm_object = vm_object_allocate(
8206 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8207 entry->offset = 0;
8208 vm_map_lock_write_to_read(map);
8209 }
8210
8211 /*
8212 * Return the object/offset from this entry. If the entry
8213 * was copy-on-write or empty, it has been fixed up. Also
8214 * return the protection.
8215 */
8216
8217 *offset = (vaddr - entry->vme_start) + entry->offset;
8218 *object = entry->object.vm_object;
8219 *out_prot = prot;
8220
8221 if (fault_info) {
8222 fault_info->interruptible = THREAD_UNINT; /* for now... */
8223 /* ... the caller will change "interruptible" if needed */
8224 fault_info->cluster_size = 0;
8225 fault_info->user_tag = entry->alias;
8226 fault_info->behavior = entry->behavior;
8227 fault_info->lo_offset = entry->offset;
8228 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8229 fault_info->no_cache = entry->no_cache;
8230 }
8231
8232 /*
8233 * Lock the object to prevent it from disappearing
8234 */
8235 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8236 vm_object_lock(*object);
8237 else
8238 vm_object_lock_shared(*object);
8239
8240 /*
8241 * Save the version number
8242 */
8243
8244 out_version->main_timestamp = map->timestamp;
8245
8246 return KERN_SUCCESS;
8247 }
8248
8249
8250 /*
8251 * vm_map_verify:
8252 *
8253 * Verifies that the map in question has not changed
8254 * since the given version. If successful, the map
8255 * will not change until vm_map_verify_done() is called.
8256 */
8257 boolean_t
8258 vm_map_verify(
8259 register vm_map_t map,
8260 register vm_map_version_t *version) /* REF */
8261 {
8262 boolean_t result;
8263
8264 vm_map_lock_read(map);
8265 result = (map->timestamp == version->main_timestamp);
8266
8267 if (!result)
8268 vm_map_unlock_read(map);
8269
8270 return(result);
8271 }
8272
8273 /*
8274 * vm_map_verify_done:
8275 *
8276 * Releases locks acquired by a vm_map_verify.
8277 *
8278 * This is now a macro in vm/vm_map.h. It does a
8279 * vm_map_unlock_read on the map.
8280 */
8281
8282
8283 /*
8284 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8285 * Goes away after regular vm_region_recurse function migrates to
8286 * 64 bits
8287 * vm_region_recurse: A form of vm_region which follows the
8288 * submaps in a target map
8289 *
8290 */
8291
8292 kern_return_t
8293 vm_map_region_recurse_64(
8294 vm_map_t map,
8295 vm_map_offset_t *address, /* IN/OUT */
8296 vm_map_size_t *size, /* OUT */
8297 natural_t *nesting_depth, /* IN/OUT */
8298 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8299 mach_msg_type_number_t *count) /* IN/OUT */
8300 {
8301 vm_region_extended_info_data_t extended;
8302 vm_map_entry_t tmp_entry;
8303 vm_map_offset_t user_address;
8304 unsigned int user_max_depth;
8305
8306 /*
8307 * "curr_entry" is the VM map entry preceding or including the
8308 * address we're looking for.
8309 * "curr_map" is the map or sub-map containing "curr_entry".
8310 * "curr_offset" is the cumulated offset of "curr_map" in the
8311 * target task's address space.
8312 * "curr_depth" is the depth of "curr_map" in the chain of
8313 * sub-maps.
8314 * "curr_max_offset" is the maximum offset we should take into
8315 * account in the current map. It may be smaller than the current
8316 * map's "max_offset" because we might not have mapped it all in
8317 * the upper level map.
8318 */
8319 vm_map_entry_t curr_entry;
8320 vm_map_offset_t curr_offset;
8321 vm_map_t curr_map;
8322 unsigned int curr_depth;
8323 vm_map_offset_t curr_max_offset;
8324
8325 /*
8326 * "next_" is the same as "curr_" but for the VM region immediately
8327 * after the address we're looking for. We need to keep track of this
8328 * too because we want to return info about that region if the
8329 * address we're looking for is not mapped.
8330 */
8331 vm_map_entry_t next_entry;
8332 vm_map_offset_t next_offset;
8333 vm_map_t next_map;
8334 unsigned int next_depth;
8335 vm_map_offset_t next_max_offset;
8336
8337 boolean_t look_for_pages;
8338 vm_region_submap_short_info_64_t short_info;
8339
8340 if (map == VM_MAP_NULL) {
8341 /* no address space to work on */
8342 return KERN_INVALID_ARGUMENT;
8343 }
8344
8345 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8346 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8347 /*
8348 * "info" structure is not big enough and
8349 * would overflow
8350 */
8351 return KERN_INVALID_ARGUMENT;
8352 } else {
8353 look_for_pages = FALSE;
8354 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8355 short_info = (vm_region_submap_short_info_64_t) submap_info;
8356 submap_info = NULL;
8357 }
8358 } else {
8359 look_for_pages = TRUE;
8360 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8361 short_info = NULL;
8362 }
8363
8364
8365 user_address = *address;
8366 user_max_depth = *nesting_depth;
8367
8368 curr_entry = NULL;
8369 curr_map = map;
8370 curr_offset = 0;
8371 curr_depth = 0;
8372 curr_max_offset = curr_map->max_offset;
8373
8374 next_entry = NULL;
8375 next_map = NULL;
8376 next_offset = 0;
8377 next_depth = 0;
8378 next_max_offset = curr_max_offset;
8379
8380 if (not_in_kdp) {
8381 vm_map_lock_read(curr_map);
8382 }
8383
8384 for (;;) {
8385 if (vm_map_lookup_entry(curr_map,
8386 user_address - curr_offset,
8387 &tmp_entry)) {
8388 /* tmp_entry contains the address we're looking for */
8389 curr_entry = tmp_entry;
8390 } else {
8391 /*
8392 * The address is not mapped. "tmp_entry" is the
8393 * map entry preceding the address. We want the next
8394 * one, if it exists.
8395 */
8396 curr_entry = tmp_entry->vme_next;
8397 if (curr_entry == vm_map_to_entry(curr_map) ||
8398 curr_entry->vme_start >= curr_max_offset) {
8399 /* no next entry at this level: stop looking */
8400 if (not_in_kdp) {
8401 vm_map_unlock_read(curr_map);
8402 }
8403 curr_entry = NULL;
8404 curr_map = NULL;
8405 curr_offset = 0;
8406 curr_depth = 0;
8407 curr_max_offset = 0;
8408 break;
8409 }
8410 }
8411
8412 /*
8413 * Is the next entry at this level closer to the address (or
8414 * deeper in the submap chain) than the one we had
8415 * so far ?
8416 */
8417 tmp_entry = curr_entry->vme_next;
8418 if (tmp_entry == vm_map_to_entry(curr_map)) {
8419 /* no next entry at this level */
8420 } else if (tmp_entry->vme_start >= curr_max_offset) {
8421 /*
8422 * tmp_entry is beyond the scope of what we mapped of
8423 * this submap in the upper level: ignore it.
8424 */
8425 } else if ((next_entry == NULL) ||
8426 (tmp_entry->vme_start + curr_offset <=
8427 next_entry->vme_start + next_offset)) {
8428 /*
8429 * We didn't have a "next_entry" or this one is
8430 * closer to the address we're looking for:
8431 * use this "tmp_entry" as the new "next_entry".
8432 */
8433 if (next_entry != NULL) {
8434 /* unlock the last "next_map" */
8435 if (next_map != curr_map && not_in_kdp) {
8436 vm_map_unlock_read(next_map);
8437 }
8438 }
8439 next_entry = tmp_entry;
8440 next_map = curr_map;
8441 next_offset = curr_offset;
8442 next_depth = curr_depth;
8443 next_max_offset = curr_max_offset;
8444 }
8445
8446 if (!curr_entry->is_sub_map ||
8447 curr_depth >= user_max_depth) {
8448 /*
8449 * We hit a leaf map or we reached the maximum depth
8450 * we could, so stop looking. Keep the current map
8451 * locked.
8452 */
8453 break;
8454 }
8455
8456 /*
8457 * Get down to the next submap level.
8458 */
8459
8460 /*
8461 * Lock the next level and unlock the current level,
8462 * unless we need to keep it locked to access the "next_entry"
8463 * later.
8464 */
8465 if (not_in_kdp) {
8466 vm_map_lock_read(curr_entry->object.sub_map);
8467 }
8468 if (curr_map == next_map) {
8469 /* keep "next_map" locked in case we need it */
8470 } else {
8471 /* release this map */
8472 vm_map_unlock_read(curr_map);
8473 }
8474
8475 /*
8476 * Adjust the offset. "curr_entry" maps the submap
8477 * at relative address "curr_entry->vme_start" in the
8478 * curr_map but skips the first "curr_entry->offset"
8479 * bytes of the submap.
8480 * "curr_offset" always represents the offset of a virtual
8481 * address in the curr_map relative to the absolute address
8482 * space (i.e. the top-level VM map).
8483 */
8484 curr_offset +=
8485 (curr_entry->vme_start - curr_entry->offset);
8486 /* switch to the submap */
8487 curr_map = curr_entry->object.sub_map;
8488 curr_depth++;
8489 /*
8490 * "curr_max_offset" allows us to keep track of the
8491 * portion of the submap that is actually mapped at this level:
8492 * the rest of that submap is irrelevant to us, since it's not
8493 * mapped here.
8494 * The relevant portion of the map starts at
8495 * "curr_entry->offset" up to the size of "curr_entry".
8496 */
8497 curr_max_offset =
8498 curr_entry->vme_end - curr_entry->vme_start +
8499 curr_entry->offset;
8500 curr_entry = NULL;
8501 }
8502
8503 if (curr_entry == NULL) {
8504 /* no VM region contains the address... */
8505 if (next_entry == NULL) {
8506 /* ... and no VM region follows it either */
8507 return KERN_INVALID_ADDRESS;
8508 }
8509 /* ... gather info about the next VM region */
8510 curr_entry = next_entry;
8511 curr_map = next_map; /* still locked ... */
8512 curr_offset = next_offset;
8513 curr_depth = next_depth;
8514 curr_max_offset = next_max_offset;
8515 } else {
8516 /* we won't need "next_entry" after all */
8517 if (next_entry != NULL) {
8518 /* release "next_map" */
8519 if (next_map != curr_map && not_in_kdp) {
8520 vm_map_unlock_read(next_map);
8521 }
8522 }
8523 }
8524 next_entry = NULL;
8525 next_map = NULL;
8526 next_offset = 0;
8527 next_depth = 0;
8528 next_max_offset = 0;
8529
8530 *nesting_depth = curr_depth;
8531 *size = curr_entry->vme_end - curr_entry->vme_start;
8532 *address = curr_entry->vme_start + curr_offset;
8533
8534 if (look_for_pages) {
8535 submap_info->user_tag = curr_entry->alias;
8536 submap_info->offset = curr_entry->offset;
8537 submap_info->protection = curr_entry->protection;
8538 submap_info->inheritance = curr_entry->inheritance;
8539 submap_info->max_protection = curr_entry->max_protection;
8540 submap_info->behavior = curr_entry->behavior;
8541 submap_info->user_wired_count = curr_entry->user_wired_count;
8542 submap_info->is_submap = curr_entry->is_sub_map;
8543 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
8544 } else {
8545 short_info->user_tag = curr_entry->alias;
8546 short_info->offset = curr_entry->offset;
8547 short_info->protection = curr_entry->protection;
8548 short_info->inheritance = curr_entry->inheritance;
8549 short_info->max_protection = curr_entry->max_protection;
8550 short_info->behavior = curr_entry->behavior;
8551 short_info->user_wired_count = curr_entry->user_wired_count;
8552 short_info->is_submap = curr_entry->is_sub_map;
8553 short_info->object_id = (uint32_t) curr_entry->object.vm_object;
8554 }
8555
8556 extended.pages_resident = 0;
8557 extended.pages_swapped_out = 0;
8558 extended.pages_shared_now_private = 0;
8559 extended.pages_dirtied = 0;
8560 extended.external_pager = 0;
8561 extended.shadow_depth = 0;
8562
8563 if (not_in_kdp) {
8564 if (!curr_entry->is_sub_map) {
8565 vm_map_region_walk(curr_map,
8566 curr_entry->vme_start,
8567 curr_entry,
8568 curr_entry->offset,
8569 (curr_entry->vme_end -
8570 curr_entry->vme_start),
8571 &extended,
8572 look_for_pages);
8573 if (extended.external_pager &&
8574 extended.ref_count == 2 &&
8575 extended.share_mode == SM_SHARED) {
8576 extended.share_mode = SM_PRIVATE;
8577 }
8578 } else {
8579 if (curr_entry->use_pmap) {
8580 extended.share_mode = SM_TRUESHARED;
8581 } else {
8582 extended.share_mode = SM_PRIVATE;
8583 }
8584 extended.ref_count =
8585 curr_entry->object.sub_map->ref_count;
8586 }
8587 }
8588
8589 if (look_for_pages) {
8590 submap_info->pages_resident = extended.pages_resident;
8591 submap_info->pages_swapped_out = extended.pages_swapped_out;
8592 submap_info->pages_shared_now_private =
8593 extended.pages_shared_now_private;
8594 submap_info->pages_dirtied = extended.pages_dirtied;
8595 submap_info->external_pager = extended.external_pager;
8596 submap_info->shadow_depth = extended.shadow_depth;
8597 submap_info->share_mode = extended.share_mode;
8598 submap_info->ref_count = extended.ref_count;
8599 } else {
8600 short_info->external_pager = extended.external_pager;
8601 short_info->shadow_depth = extended.shadow_depth;
8602 short_info->share_mode = extended.share_mode;
8603 short_info->ref_count = extended.ref_count;
8604 }
8605
8606 if (not_in_kdp) {
8607 vm_map_unlock_read(curr_map);
8608 }
8609
8610 return KERN_SUCCESS;
8611 }
8612
8613 /*
8614 * vm_region:
8615 *
8616 * User call to obtain information about a region in
8617 * a task's address map. Currently, only one flavor is
8618 * supported.
8619 *
8620 * XXX The reserved and behavior fields cannot be filled
8621 * in until the vm merge from the IK is completed, and
8622 * vm_reserve is implemented.
8623 */
8624
8625 kern_return_t
8626 vm_map_region(
8627 vm_map_t map,
8628 vm_map_offset_t *address, /* IN/OUT */
8629 vm_map_size_t *size, /* OUT */
8630 vm_region_flavor_t flavor, /* IN */
8631 vm_region_info_t info, /* OUT */
8632 mach_msg_type_number_t *count, /* IN/OUT */
8633 mach_port_t *object_name) /* OUT */
8634 {
8635 vm_map_entry_t tmp_entry;
8636 vm_map_entry_t entry;
8637 vm_map_offset_t start;
8638
8639 if (map == VM_MAP_NULL)
8640 return(KERN_INVALID_ARGUMENT);
8641
8642 switch (flavor) {
8643
8644 case VM_REGION_BASIC_INFO:
8645 /* legacy for old 32-bit objects info */
8646 {
8647 vm_region_basic_info_t basic;
8648
8649 if (*count < VM_REGION_BASIC_INFO_COUNT)
8650 return(KERN_INVALID_ARGUMENT);
8651
8652 basic = (vm_region_basic_info_t) info;
8653 *count = VM_REGION_BASIC_INFO_COUNT;
8654
8655 vm_map_lock_read(map);
8656
8657 start = *address;
8658 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8659 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8660 vm_map_unlock_read(map);
8661 return(KERN_INVALID_ADDRESS);
8662 }
8663 } else {
8664 entry = tmp_entry;
8665 }
8666
8667 start = entry->vme_start;
8668
8669 basic->offset = (uint32_t)entry->offset;
8670 basic->protection = entry->protection;
8671 basic->inheritance = entry->inheritance;
8672 basic->max_protection = entry->max_protection;
8673 basic->behavior = entry->behavior;
8674 basic->user_wired_count = entry->user_wired_count;
8675 basic->reserved = entry->is_sub_map;
8676 *address = start;
8677 *size = (entry->vme_end - start);
8678
8679 if (object_name) *object_name = IP_NULL;
8680 if (entry->is_sub_map) {
8681 basic->shared = FALSE;
8682 } else {
8683 basic->shared = entry->is_shared;
8684 }
8685
8686 vm_map_unlock_read(map);
8687 return(KERN_SUCCESS);
8688 }
8689
8690 case VM_REGION_BASIC_INFO_64:
8691 {
8692 vm_region_basic_info_64_t basic;
8693
8694 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
8695 return(KERN_INVALID_ARGUMENT);
8696
8697 basic = (vm_region_basic_info_64_t) info;
8698 *count = VM_REGION_BASIC_INFO_COUNT_64;
8699
8700 vm_map_lock_read(map);
8701
8702 start = *address;
8703 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8704 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8705 vm_map_unlock_read(map);
8706 return(KERN_INVALID_ADDRESS);
8707 }
8708 } else {
8709 entry = tmp_entry;
8710 }
8711
8712 start = entry->vme_start;
8713
8714 basic->offset = entry->offset;
8715 basic->protection = entry->protection;
8716 basic->inheritance = entry->inheritance;
8717 basic->max_protection = entry->max_protection;
8718 basic->behavior = entry->behavior;
8719 basic->user_wired_count = entry->user_wired_count;
8720 basic->reserved = entry->is_sub_map;
8721 *address = start;
8722 *size = (entry->vme_end - start);
8723
8724 if (object_name) *object_name = IP_NULL;
8725 if (entry->is_sub_map) {
8726 basic->shared = FALSE;
8727 } else {
8728 basic->shared = entry->is_shared;
8729 }
8730
8731 vm_map_unlock_read(map);
8732 return(KERN_SUCCESS);
8733 }
8734 case VM_REGION_EXTENDED_INFO:
8735 {
8736 vm_region_extended_info_t extended;
8737
8738 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
8739 return(KERN_INVALID_ARGUMENT);
8740
8741 extended = (vm_region_extended_info_t) info;
8742 *count = VM_REGION_EXTENDED_INFO_COUNT;
8743
8744 vm_map_lock_read(map);
8745
8746 start = *address;
8747 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8748 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8749 vm_map_unlock_read(map);
8750 return(KERN_INVALID_ADDRESS);
8751 }
8752 } else {
8753 entry = tmp_entry;
8754 }
8755 start = entry->vme_start;
8756
8757 extended->protection = entry->protection;
8758 extended->user_tag = entry->alias;
8759 extended->pages_resident = 0;
8760 extended->pages_swapped_out = 0;
8761 extended->pages_shared_now_private = 0;
8762 extended->pages_dirtied = 0;
8763 extended->external_pager = 0;
8764 extended->shadow_depth = 0;
8765
8766 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
8767
8768 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8769 extended->share_mode = SM_PRIVATE;
8770
8771 if (object_name)
8772 *object_name = IP_NULL;
8773 *address = start;
8774 *size = (entry->vme_end - start);
8775
8776 vm_map_unlock_read(map);
8777 return(KERN_SUCCESS);
8778 }
8779 case VM_REGION_TOP_INFO:
8780 {
8781 vm_region_top_info_t top;
8782
8783 if (*count < VM_REGION_TOP_INFO_COUNT)
8784 return(KERN_INVALID_ARGUMENT);
8785
8786 top = (vm_region_top_info_t) info;
8787 *count = VM_REGION_TOP_INFO_COUNT;
8788
8789 vm_map_lock_read(map);
8790
8791 start = *address;
8792 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8793 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8794 vm_map_unlock_read(map);
8795 return(KERN_INVALID_ADDRESS);
8796 }
8797 } else {
8798 entry = tmp_entry;
8799
8800 }
8801 start = entry->vme_start;
8802
8803 top->private_pages_resident = 0;
8804 top->shared_pages_resident = 0;
8805
8806 vm_map_region_top_walk(entry, top);
8807
8808 if (object_name)
8809 *object_name = IP_NULL;
8810 *address = start;
8811 *size = (entry->vme_end - start);
8812
8813 vm_map_unlock_read(map);
8814 return(KERN_SUCCESS);
8815 }
8816 default:
8817 return(KERN_INVALID_ARGUMENT);
8818 }
8819 }
8820
8821 #define min(a, b) (((a) < (b)) ? (a) : (b))
8822
8823 void
8824 vm_map_region_top_walk(
8825 vm_map_entry_t entry,
8826 vm_region_top_info_t top)
8827 {
8828
8829 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8830 top->share_mode = SM_EMPTY;
8831 top->ref_count = 0;
8832 top->obj_id = 0;
8833 return;
8834 }
8835
8836 {
8837 struct vm_object *obj, *tmp_obj;
8838 int ref_count;
8839 uint32_t entry_size;
8840
8841 entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE;
8842
8843 obj = entry->object.vm_object;
8844
8845 vm_object_lock(obj);
8846
8847 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8848 ref_count--;
8849
8850 if (obj->shadow) {
8851 if (ref_count == 1)
8852 top->private_pages_resident = min(obj->resident_page_count, entry_size);
8853 else
8854 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8855 top->ref_count = ref_count;
8856 top->share_mode = SM_COW;
8857
8858 while ((tmp_obj = obj->shadow)) {
8859 vm_object_lock(tmp_obj);
8860 vm_object_unlock(obj);
8861 obj = tmp_obj;
8862
8863 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8864 ref_count--;
8865
8866 top->shared_pages_resident += min(obj->resident_page_count, entry_size);
8867 top->ref_count += ref_count - 1;
8868 }
8869 } else {
8870 if (entry->needs_copy) {
8871 top->share_mode = SM_COW;
8872 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8873 } else {
8874 if (ref_count == 1 ||
8875 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
8876 top->share_mode = SM_PRIVATE;
8877 top->private_pages_resident = min(obj->resident_page_count, entry_size);
8878 } else {
8879 top->share_mode = SM_SHARED;
8880 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8881 }
8882 }
8883 top->ref_count = ref_count;
8884 }
8885 top->obj_id = (int)obj;
8886
8887 vm_object_unlock(obj);
8888 }
8889 }
8890
8891 void
8892 vm_map_region_walk(
8893 vm_map_t map,
8894 vm_map_offset_t va,
8895 vm_map_entry_t entry,
8896 vm_object_offset_t offset,
8897 vm_object_size_t range,
8898 vm_region_extended_info_t extended,
8899 boolean_t look_for_pages)
8900 {
8901 register struct vm_object *obj, *tmp_obj;
8902 register vm_map_offset_t last_offset;
8903 register int i;
8904 register int ref_count;
8905 struct vm_object *shadow_object;
8906 int shadow_depth;
8907
8908 if ((entry->object.vm_object == 0) ||
8909 (entry->is_sub_map) ||
8910 (entry->object.vm_object->phys_contiguous)) {
8911 extended->share_mode = SM_EMPTY;
8912 extended->ref_count = 0;
8913 return;
8914 }
8915 {
8916 obj = entry->object.vm_object;
8917
8918 vm_object_lock(obj);
8919
8920 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8921 ref_count--;
8922
8923 if (look_for_pages) {
8924 for (last_offset = offset + range;
8925 offset < last_offset;
8926 offset += PAGE_SIZE_64, va += PAGE_SIZE)
8927 vm_map_region_look_for_page(map, va, obj,
8928 offset, ref_count,
8929 0, extended);
8930 }
8931
8932 shadow_object = obj->shadow;
8933 shadow_depth = 0;
8934 if (shadow_object != VM_OBJECT_NULL) {
8935 vm_object_lock(shadow_object);
8936 for (;
8937 shadow_object != VM_OBJECT_NULL;
8938 shadow_depth++) {
8939 vm_object_t next_shadow;
8940
8941 next_shadow = shadow_object->shadow;
8942 if (next_shadow) {
8943 vm_object_lock(next_shadow);
8944 }
8945 vm_object_unlock(shadow_object);
8946 shadow_object = next_shadow;
8947 }
8948 }
8949 extended->shadow_depth = shadow_depth;
8950
8951 if (extended->shadow_depth || entry->needs_copy)
8952 extended->share_mode = SM_COW;
8953 else {
8954 if (ref_count == 1)
8955 extended->share_mode = SM_PRIVATE;
8956 else {
8957 if (obj->true_share)
8958 extended->share_mode = SM_TRUESHARED;
8959 else
8960 extended->share_mode = SM_SHARED;
8961 }
8962 }
8963 extended->ref_count = ref_count - extended->shadow_depth;
8964
8965 for (i = 0; i < extended->shadow_depth; i++) {
8966 if ((tmp_obj = obj->shadow) == 0)
8967 break;
8968 vm_object_lock(tmp_obj);
8969 vm_object_unlock(obj);
8970
8971 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
8972 ref_count--;
8973
8974 extended->ref_count += ref_count;
8975 obj = tmp_obj;
8976 }
8977 vm_object_unlock(obj);
8978
8979 if (extended->share_mode == SM_SHARED) {
8980 register vm_map_entry_t cur;
8981 register vm_map_entry_t last;
8982 int my_refs;
8983
8984 obj = entry->object.vm_object;
8985 last = vm_map_to_entry(map);
8986 my_refs = 0;
8987
8988 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8989 ref_count--;
8990 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
8991 my_refs += vm_map_region_count_obj_refs(cur, obj);
8992
8993 if (my_refs == ref_count)
8994 extended->share_mode = SM_PRIVATE_ALIASED;
8995 else if (my_refs > 1)
8996 extended->share_mode = SM_SHARED_ALIASED;
8997 }
8998 }
8999 }
9000
9001
9002 /* object is locked on entry and locked on return */
9003
9004
9005 static void
9006 vm_map_region_look_for_page(
9007 __unused vm_map_t map,
9008 __unused vm_map_offset_t va,
9009 vm_object_t object,
9010 vm_object_offset_t offset,
9011 int max_refcnt,
9012 int depth,
9013 vm_region_extended_info_t extended)
9014 {
9015 register vm_page_t p;
9016 register vm_object_t shadow;
9017 register int ref_count;
9018 vm_object_t caller_object;
9019 #if MACH_PAGEMAP
9020 kern_return_t kr;
9021 #endif
9022 shadow = object->shadow;
9023 caller_object = object;
9024
9025
9026 while (TRUE) {
9027
9028 if ( !(object->pager_trusted) && !(object->internal))
9029 extended->external_pager = 1;
9030
9031 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9032 if (shadow && (max_refcnt == 1))
9033 extended->pages_shared_now_private++;
9034
9035 if (!p->fictitious &&
9036 (p->dirty || pmap_is_modified(p->phys_page)))
9037 extended->pages_dirtied++;
9038
9039 extended->pages_resident++;
9040
9041 if(object != caller_object)
9042 vm_object_unlock(object);
9043
9044 return;
9045 }
9046 #if MACH_PAGEMAP
9047 if (object->existence_map) {
9048 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9049
9050 extended->pages_swapped_out++;
9051
9052 if(object != caller_object)
9053 vm_object_unlock(object);
9054
9055 return;
9056 }
9057 } else if (object->internal &&
9058 object->alive &&
9059 !object->terminating &&
9060 object->pager_ready) {
9061
9062 memory_object_t pager;
9063
9064 vm_object_paging_begin(object);
9065 pager = object->pager;
9066 vm_object_unlock(object);
9067
9068 kr = memory_object_data_request(
9069 pager,
9070 offset + object->paging_offset,
9071 0, /* just poke the pager */
9072 VM_PROT_READ,
9073 NULL);
9074
9075 vm_object_lock(object);
9076 vm_object_paging_end(object);
9077
9078 if (kr == KERN_SUCCESS) {
9079 /* the pager has that page */
9080 extended->pages_swapped_out++;
9081 if (object != caller_object)
9082 vm_object_unlock(object);
9083 return;
9084 }
9085 }
9086 #endif /* MACH_PAGEMAP */
9087
9088 if (shadow) {
9089 vm_object_lock(shadow);
9090
9091 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9092 ref_count--;
9093
9094 if (++depth > extended->shadow_depth)
9095 extended->shadow_depth = depth;
9096
9097 if (ref_count > max_refcnt)
9098 max_refcnt = ref_count;
9099
9100 if(object != caller_object)
9101 vm_object_unlock(object);
9102
9103 offset = offset + object->shadow_offset;
9104 object = shadow;
9105 shadow = object->shadow;
9106 continue;
9107 }
9108 if(object != caller_object)
9109 vm_object_unlock(object);
9110 break;
9111 }
9112 }
9113
9114 static int
9115 vm_map_region_count_obj_refs(
9116 vm_map_entry_t entry,
9117 vm_object_t object)
9118 {
9119 register int ref_count;
9120 register vm_object_t chk_obj;
9121 register vm_object_t tmp_obj;
9122
9123 if (entry->object.vm_object == 0)
9124 return(0);
9125
9126 if (entry->is_sub_map)
9127 return(0);
9128 else {
9129 ref_count = 0;
9130
9131 chk_obj = entry->object.vm_object;
9132 vm_object_lock(chk_obj);
9133
9134 while (chk_obj) {
9135 if (chk_obj == object)
9136 ref_count++;
9137 tmp_obj = chk_obj->shadow;
9138 if (tmp_obj)
9139 vm_object_lock(tmp_obj);
9140 vm_object_unlock(chk_obj);
9141
9142 chk_obj = tmp_obj;
9143 }
9144 }
9145 return(ref_count);
9146 }
9147
9148
9149 /*
9150 * Routine: vm_map_simplify
9151 *
9152 * Description:
9153 * Attempt to simplify the map representation in
9154 * the vicinity of the given starting address.
9155 * Note:
9156 * This routine is intended primarily to keep the
9157 * kernel maps more compact -- they generally don't
9158 * benefit from the "expand a map entry" technology
9159 * at allocation time because the adjacent entry
9160 * is often wired down.
9161 */
9162 void
9163 vm_map_simplify_entry(
9164 vm_map_t map,
9165 vm_map_entry_t this_entry)
9166 {
9167 vm_map_entry_t prev_entry;
9168
9169 counter(c_vm_map_simplify_entry_called++);
9170
9171 prev_entry = this_entry->vme_prev;
9172
9173 if ((this_entry != vm_map_to_entry(map)) &&
9174 (prev_entry != vm_map_to_entry(map)) &&
9175
9176 (prev_entry->vme_end == this_entry->vme_start) &&
9177
9178 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9179
9180 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9181 ((prev_entry->offset + (prev_entry->vme_end -
9182 prev_entry->vme_start))
9183 == this_entry->offset) &&
9184
9185 (prev_entry->inheritance == this_entry->inheritance) &&
9186 (prev_entry->protection == this_entry->protection) &&
9187 (prev_entry->max_protection == this_entry->max_protection) &&
9188 (prev_entry->behavior == this_entry->behavior) &&
9189 (prev_entry->alias == this_entry->alias) &&
9190 (prev_entry->no_cache == this_entry->no_cache) &&
9191 (prev_entry->wired_count == this_entry->wired_count) &&
9192 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9193
9194 (prev_entry->needs_copy == this_entry->needs_copy) &&
9195
9196 (prev_entry->use_pmap == FALSE) &&
9197 (this_entry->use_pmap == FALSE) &&
9198 (prev_entry->in_transition == FALSE) &&
9199 (this_entry->in_transition == FALSE) &&
9200 (prev_entry->needs_wakeup == FALSE) &&
9201 (this_entry->needs_wakeup == FALSE) &&
9202 (prev_entry->is_shared == FALSE) &&
9203 (this_entry->is_shared == FALSE)
9204 ) {
9205 _vm_map_entry_unlink(&map->hdr, prev_entry);
9206 this_entry->vme_start = prev_entry->vme_start;
9207 this_entry->offset = prev_entry->offset;
9208 if (prev_entry->is_sub_map) {
9209 vm_map_deallocate(prev_entry->object.sub_map);
9210 } else {
9211 vm_object_deallocate(prev_entry->object.vm_object);
9212 }
9213 vm_map_entry_dispose(map, prev_entry);
9214 SAVE_HINT_MAP_WRITE(map, this_entry);
9215 counter(c_vm_map_simplified++);
9216 }
9217 }
9218
9219 void
9220 vm_map_simplify(
9221 vm_map_t map,
9222 vm_map_offset_t start)
9223 {
9224 vm_map_entry_t this_entry;
9225
9226 vm_map_lock(map);
9227 if (vm_map_lookup_entry(map, start, &this_entry)) {
9228 vm_map_simplify_entry(map, this_entry);
9229 vm_map_simplify_entry(map, this_entry->vme_next);
9230 }
9231 counter(c_vm_map_simplify_called++);
9232 vm_map_unlock(map);
9233 }
9234
9235 static void
9236 vm_map_simplify_range(
9237 vm_map_t map,
9238 vm_map_offset_t start,
9239 vm_map_offset_t end)
9240 {
9241 vm_map_entry_t entry;
9242
9243 /*
9244 * The map should be locked (for "write") by the caller.
9245 */
9246
9247 if (start >= end) {
9248 /* invalid address range */
9249 return;
9250 }
9251
9252 start = vm_map_trunc_page(start);
9253 end = vm_map_round_page(end);
9254
9255 if (!vm_map_lookup_entry(map, start, &entry)) {
9256 /* "start" is not mapped and "entry" ends before "start" */
9257 if (entry == vm_map_to_entry(map)) {
9258 /* start with first entry in the map */
9259 entry = vm_map_first_entry(map);
9260 } else {
9261 /* start with next entry */
9262 entry = entry->vme_next;
9263 }
9264 }
9265
9266 while (entry != vm_map_to_entry(map) &&
9267 entry->vme_start <= end) {
9268 /* try and coalesce "entry" with its previous entry */
9269 vm_map_simplify_entry(map, entry);
9270 entry = entry->vme_next;
9271 }
9272 }
9273
9274
9275 /*
9276 * Routine: vm_map_machine_attribute
9277 * Purpose:
9278 * Provide machine-specific attributes to mappings,
9279 * such as cachability etc. for machines that provide
9280 * them. NUMA architectures and machines with big/strange
9281 * caches will use this.
9282 * Note:
9283 * Responsibilities for locking and checking are handled here,
9284 * everything else in the pmap module. If any non-volatile
9285 * information must be kept, the pmap module should handle
9286 * it itself. [This assumes that attributes do not
9287 * need to be inherited, which seems ok to me]
9288 */
9289 kern_return_t
9290 vm_map_machine_attribute(
9291 vm_map_t map,
9292 vm_map_offset_t start,
9293 vm_map_offset_t end,
9294 vm_machine_attribute_t attribute,
9295 vm_machine_attribute_val_t* value) /* IN/OUT */
9296 {
9297 kern_return_t ret;
9298 vm_map_size_t sync_size;
9299 vm_map_entry_t entry;
9300
9301 if (start < vm_map_min(map) || end > vm_map_max(map))
9302 return KERN_INVALID_ADDRESS;
9303
9304 /* Figure how much memory we need to flush (in page increments) */
9305 sync_size = end - start;
9306
9307 vm_map_lock(map);
9308
9309 if (attribute != MATTR_CACHE) {
9310 /* If we don't have to find physical addresses, we */
9311 /* don't have to do an explicit traversal here. */
9312 ret = pmap_attribute(map->pmap, start, end-start,
9313 attribute, value);
9314 vm_map_unlock(map);
9315 return ret;
9316 }
9317
9318 ret = KERN_SUCCESS; /* Assume it all worked */
9319
9320 while(sync_size) {
9321 if (vm_map_lookup_entry(map, start, &entry)) {
9322 vm_map_size_t sub_size;
9323 if((entry->vme_end - start) > sync_size) {
9324 sub_size = sync_size;
9325 sync_size = 0;
9326 } else {
9327 sub_size = entry->vme_end - start;
9328 sync_size -= sub_size;
9329 }
9330 if(entry->is_sub_map) {
9331 vm_map_offset_t sub_start;
9332 vm_map_offset_t sub_end;
9333
9334 sub_start = (start - entry->vme_start)
9335 + entry->offset;
9336 sub_end = sub_start + sub_size;
9337 vm_map_machine_attribute(
9338 entry->object.sub_map,
9339 sub_start,
9340 sub_end,
9341 attribute, value);
9342 } else {
9343 if(entry->object.vm_object) {
9344 vm_page_t m;
9345 vm_object_t object;
9346 vm_object_t base_object;
9347 vm_object_t last_object;
9348 vm_object_offset_t offset;
9349 vm_object_offset_t base_offset;
9350 vm_map_size_t range;
9351 range = sub_size;
9352 offset = (start - entry->vme_start)
9353 + entry->offset;
9354 base_offset = offset;
9355 object = entry->object.vm_object;
9356 base_object = object;
9357 last_object = NULL;
9358
9359 vm_object_lock(object);
9360
9361 while (range) {
9362 m = vm_page_lookup(
9363 object, offset);
9364
9365 if (m && !m->fictitious) {
9366 ret =
9367 pmap_attribute_cache_sync(
9368 m->phys_page,
9369 PAGE_SIZE,
9370 attribute, value);
9371
9372 } else if (object->shadow) {
9373 offset = offset + object->shadow_offset;
9374 last_object = object;
9375 object = object->shadow;
9376 vm_object_lock(last_object->shadow);
9377 vm_object_unlock(last_object);
9378 continue;
9379 }
9380 range -= PAGE_SIZE;
9381
9382 if (base_object != object) {
9383 vm_object_unlock(object);
9384 vm_object_lock(base_object);
9385 object = base_object;
9386 }
9387 /* Bump to the next page */
9388 base_offset += PAGE_SIZE;
9389 offset = base_offset;
9390 }
9391 vm_object_unlock(object);
9392 }
9393 }
9394 start += sub_size;
9395 } else {
9396 vm_map_unlock(map);
9397 return KERN_FAILURE;
9398 }
9399
9400 }
9401
9402 vm_map_unlock(map);
9403
9404 return ret;
9405 }
9406
9407 /*
9408 * vm_map_behavior_set:
9409 *
9410 * Sets the paging reference behavior of the specified address
9411 * range in the target map. Paging reference behavior affects
9412 * how pagein operations resulting from faults on the map will be
9413 * clustered.
9414 */
9415 kern_return_t
9416 vm_map_behavior_set(
9417 vm_map_t map,
9418 vm_map_offset_t start,
9419 vm_map_offset_t end,
9420 vm_behavior_t new_behavior)
9421 {
9422 register vm_map_entry_t entry;
9423 vm_map_entry_t temp_entry;
9424
9425 XPR(XPR_VM_MAP,
9426 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9427 (integer_t)map, start, end, new_behavior, 0);
9428
9429 switch (new_behavior) {
9430 case VM_BEHAVIOR_DEFAULT:
9431 case VM_BEHAVIOR_RANDOM:
9432 case VM_BEHAVIOR_SEQUENTIAL:
9433 case VM_BEHAVIOR_RSEQNTL:
9434 break;
9435 case VM_BEHAVIOR_WILLNEED:
9436 case VM_BEHAVIOR_DONTNEED:
9437 new_behavior = VM_BEHAVIOR_DEFAULT;
9438 break;
9439 default:
9440 return(KERN_INVALID_ARGUMENT);
9441 }
9442
9443 vm_map_lock(map);
9444
9445 /*
9446 * The entire address range must be valid for the map.
9447 * Note that vm_map_range_check() does a
9448 * vm_map_lookup_entry() internally and returns the
9449 * entry containing the start of the address range if
9450 * the entire range is valid.
9451 */
9452 if (vm_map_range_check(map, start, end, &temp_entry)) {
9453 entry = temp_entry;
9454 vm_map_clip_start(map, entry, start);
9455 }
9456 else {
9457 vm_map_unlock(map);
9458 return(KERN_INVALID_ADDRESS);
9459 }
9460
9461 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9462 vm_map_clip_end(map, entry, end);
9463 assert(!entry->use_pmap);
9464
9465 entry->behavior = new_behavior;
9466
9467 entry = entry->vme_next;
9468 }
9469
9470 vm_map_unlock(map);
9471 return(KERN_SUCCESS);
9472 }
9473
9474
9475 #include <mach_kdb.h>
9476 #if MACH_KDB
9477 #include <ddb/db_output.h>
9478 #include <vm/vm_print.h>
9479
9480 #define printf db_printf
9481
9482 /*
9483 * Forward declarations for internal functions.
9484 */
9485 extern void vm_map_links_print(
9486 struct vm_map_links *links);
9487
9488 extern void vm_map_header_print(
9489 struct vm_map_header *header);
9490
9491 extern void vm_map_entry_print(
9492 vm_map_entry_t entry);
9493
9494 extern void vm_follow_entry(
9495 vm_map_entry_t entry);
9496
9497 extern void vm_follow_map(
9498 vm_map_t map);
9499
9500 /*
9501 * vm_map_links_print: [ debug ]
9502 */
9503 void
9504 vm_map_links_print(
9505 struct vm_map_links *links)
9506 {
9507 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
9508 links->prev,
9509 links->next,
9510 (unsigned long long)links->start,
9511 (unsigned long long)links->end);
9512 }
9513
9514 /*
9515 * vm_map_header_print: [ debug ]
9516 */
9517 void
9518 vm_map_header_print(
9519 struct vm_map_header *header)
9520 {
9521 vm_map_links_print(&header->links);
9522 iprintf("nentries = %08X, %sentries_pageable\n",
9523 header->nentries,
9524 (header->entries_pageable ? "" : "!"));
9525 }
9526
9527 /*
9528 * vm_follow_entry: [ debug ]
9529 */
9530 void
9531 vm_follow_entry(
9532 vm_map_entry_t entry)
9533 {
9534 int shadows;
9535
9536 iprintf("map entry %08X\n", entry);
9537
9538 db_indent += 2;
9539
9540 shadows = vm_follow_object(entry->object.vm_object);
9541 iprintf("Total objects : %d\n",shadows);
9542
9543 db_indent -= 2;
9544 }
9545
9546 /*
9547 * vm_map_entry_print: [ debug ]
9548 */
9549 void
9550 vm_map_entry_print(
9551 register vm_map_entry_t entry)
9552 {
9553 static const char *inheritance_name[4] =
9554 { "share", "copy", "none", "?"};
9555 static const char *behavior_name[4] =
9556 { "dflt", "rand", "seqtl", "rseqntl" };
9557
9558 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
9559
9560 db_indent += 2;
9561
9562 vm_map_links_print(&entry->links);
9563
9564 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
9565 (unsigned long long)entry->vme_start,
9566 (unsigned long long)entry->vme_end,
9567 entry->protection,
9568 entry->max_protection,
9569 inheritance_name[(entry->inheritance & 0x3)]);
9570
9571 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
9572 behavior_name[(entry->behavior & 0x3)],
9573 entry->wired_count,
9574 entry->user_wired_count);
9575 iprintf("%sin_transition, %sneeds_wakeup\n",
9576 (entry->in_transition ? "" : "!"),
9577 (entry->needs_wakeup ? "" : "!"));
9578
9579 if (entry->is_sub_map) {
9580 iprintf("submap = %08X - offset = %016llX\n",
9581 entry->object.sub_map,
9582 (unsigned long long)entry->offset);
9583 } else {
9584 iprintf("object = %08X offset = %016llX - ",
9585 entry->object.vm_object,
9586 (unsigned long long)entry->offset);
9587 printf("%sis_shared, %sneeds_copy\n",
9588 (entry->is_shared ? "" : "!"),
9589 (entry->needs_copy ? "" : "!"));
9590 }
9591
9592 db_indent -= 2;
9593 }
9594
9595 /*
9596 * vm_follow_map: [ debug ]
9597 */
9598 void
9599 vm_follow_map(
9600 vm_map_t map)
9601 {
9602 register vm_map_entry_t entry;
9603
9604 iprintf("task map %08X\n", map);
9605
9606 db_indent += 2;
9607
9608 for (entry = vm_map_first_entry(map);
9609 entry && entry != vm_map_to_entry(map);
9610 entry = entry->vme_next) {
9611 vm_follow_entry(entry);
9612 }
9613
9614 db_indent -= 2;
9615 }
9616
9617 /*
9618 * vm_map_print: [ debug ]
9619 */
9620 void
9621 vm_map_print(
9622 db_addr_t inmap)
9623 {
9624 register vm_map_entry_t entry;
9625 vm_map_t map;
9626 #if TASK_SWAPPER
9627 char *swstate;
9628 #endif /* TASK_SWAPPER */
9629
9630 map = (vm_map_t)(long)
9631 inmap; /* Make sure we have the right type */
9632
9633 iprintf("task map %08X\n", map);
9634
9635 db_indent += 2;
9636
9637 vm_map_header_print(&map->hdr);
9638
9639 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
9640 map->pmap,
9641 map->size,
9642 map->ref_count,
9643 map->hint,
9644 map->first_free);
9645
9646 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
9647 (map->wait_for_space ? "" : "!"),
9648 (map->wiring_required ? "" : "!"),
9649 map->timestamp);
9650
9651 #if TASK_SWAPPER
9652 switch (map->sw_state) {
9653 case MAP_SW_IN:
9654 swstate = "SW_IN";
9655 break;
9656 case MAP_SW_OUT:
9657 swstate = "SW_OUT";
9658 break;
9659 default:
9660 swstate = "????";
9661 break;
9662 }
9663 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
9664 #endif /* TASK_SWAPPER */
9665
9666 for (entry = vm_map_first_entry(map);
9667 entry && entry != vm_map_to_entry(map);
9668 entry = entry->vme_next) {
9669 vm_map_entry_print(entry);
9670 }
9671
9672 db_indent -= 2;
9673 }
9674
9675 /*
9676 * Routine: vm_map_copy_print
9677 * Purpose:
9678 * Pretty-print a copy object for ddb.
9679 */
9680
9681 void
9682 vm_map_copy_print(
9683 db_addr_t incopy)
9684 {
9685 vm_map_copy_t copy;
9686 vm_map_entry_t entry;
9687
9688 copy = (vm_map_copy_t)(long)
9689 incopy; /* Make sure we have the right type */
9690
9691 printf("copy object 0x%x\n", copy);
9692
9693 db_indent += 2;
9694
9695 iprintf("type=%d", copy->type);
9696 switch (copy->type) {
9697 case VM_MAP_COPY_ENTRY_LIST:
9698 printf("[entry_list]");
9699 break;
9700
9701 case VM_MAP_COPY_OBJECT:
9702 printf("[object]");
9703 break;
9704
9705 case VM_MAP_COPY_KERNEL_BUFFER:
9706 printf("[kernel_buffer]");
9707 break;
9708
9709 default:
9710 printf("[bad type]");
9711 break;
9712 }
9713 printf(", offset=0x%llx", (unsigned long long)copy->offset);
9714 printf(", size=0x%x\n", copy->size);
9715
9716 switch (copy->type) {
9717 case VM_MAP_COPY_ENTRY_LIST:
9718 vm_map_header_print(&copy->cpy_hdr);
9719 for (entry = vm_map_copy_first_entry(copy);
9720 entry && entry != vm_map_copy_to_entry(copy);
9721 entry = entry->vme_next) {
9722 vm_map_entry_print(entry);
9723 }
9724 break;
9725
9726 case VM_MAP_COPY_OBJECT:
9727 iprintf("object=0x%x\n", copy->cpy_object);
9728 break;
9729
9730 case VM_MAP_COPY_KERNEL_BUFFER:
9731 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
9732 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
9733 break;
9734
9735 }
9736
9737 db_indent -=2;
9738 }
9739
9740 /*
9741 * db_vm_map_total_size(map) [ debug ]
9742 *
9743 * return the total virtual size (in bytes) of the map
9744 */
9745 vm_map_size_t
9746 db_vm_map_total_size(
9747 db_addr_t inmap)
9748 {
9749 vm_map_entry_t entry;
9750 vm_map_size_t total;
9751 vm_map_t map;
9752
9753 map = (vm_map_t)(long)
9754 inmap; /* Make sure we have the right type */
9755
9756 total = 0;
9757 for (entry = vm_map_first_entry(map);
9758 entry != vm_map_to_entry(map);
9759 entry = entry->vme_next) {
9760 total += entry->vme_end - entry->vme_start;
9761 }
9762
9763 return total;
9764 }
9765
9766 #endif /* MACH_KDB */
9767
9768 /*
9769 * Routine: vm_map_entry_insert
9770 *
9771 * Descritpion: This routine inserts a new vm_entry in a locked map.
9772 */
9773 vm_map_entry_t
9774 vm_map_entry_insert(
9775 vm_map_t map,
9776 vm_map_entry_t insp_entry,
9777 vm_map_offset_t start,
9778 vm_map_offset_t end,
9779 vm_object_t object,
9780 vm_object_offset_t offset,
9781 boolean_t needs_copy,
9782 boolean_t is_shared,
9783 boolean_t in_transition,
9784 vm_prot_t cur_protection,
9785 vm_prot_t max_protection,
9786 vm_behavior_t behavior,
9787 vm_inherit_t inheritance,
9788 unsigned wired_count,
9789 boolean_t no_cache)
9790 {
9791 vm_map_entry_t new_entry;
9792
9793 assert(insp_entry != (vm_map_entry_t)0);
9794
9795 new_entry = vm_map_entry_create(map);
9796
9797 new_entry->vme_start = start;
9798 new_entry->vme_end = end;
9799 assert(page_aligned(new_entry->vme_start));
9800 assert(page_aligned(new_entry->vme_end));
9801
9802 new_entry->object.vm_object = object;
9803 new_entry->offset = offset;
9804 new_entry->is_shared = is_shared;
9805 new_entry->is_sub_map = FALSE;
9806 new_entry->needs_copy = needs_copy;
9807 new_entry->in_transition = in_transition;
9808 new_entry->needs_wakeup = FALSE;
9809 new_entry->inheritance = inheritance;
9810 new_entry->protection = cur_protection;
9811 new_entry->max_protection = max_protection;
9812 new_entry->behavior = behavior;
9813 new_entry->wired_count = wired_count;
9814 new_entry->user_wired_count = 0;
9815 new_entry->use_pmap = FALSE;
9816 new_entry->alias = 0;
9817 new_entry->no_cache = no_cache;
9818
9819 /*
9820 * Insert the new entry into the list.
9821 */
9822
9823 vm_map_entry_link(map, insp_entry, new_entry);
9824 map->size += end - start;
9825
9826 /*
9827 * Update the free space hint and the lookup hint.
9828 */
9829
9830 SAVE_HINT_MAP_WRITE(map, new_entry);
9831 return new_entry;
9832 }
9833
9834 /*
9835 * Routine: vm_map_remap_extract
9836 *
9837 * Descritpion: This routine returns a vm_entry list from a map.
9838 */
9839 static kern_return_t
9840 vm_map_remap_extract(
9841 vm_map_t map,
9842 vm_map_offset_t addr,
9843 vm_map_size_t size,
9844 boolean_t copy,
9845 struct vm_map_header *map_header,
9846 vm_prot_t *cur_protection,
9847 vm_prot_t *max_protection,
9848 /* What, no behavior? */
9849 vm_inherit_t inheritance,
9850 boolean_t pageable)
9851 {
9852 kern_return_t result;
9853 vm_map_size_t mapped_size;
9854 vm_map_size_t tmp_size;
9855 vm_map_entry_t src_entry; /* result of last map lookup */
9856 vm_map_entry_t new_entry;
9857 vm_object_offset_t offset;
9858 vm_map_offset_t map_address;
9859 vm_map_offset_t src_start; /* start of entry to map */
9860 vm_map_offset_t src_end; /* end of region to be mapped */
9861 vm_object_t object;
9862 vm_map_version_t version;
9863 boolean_t src_needs_copy;
9864 boolean_t new_entry_needs_copy;
9865
9866 assert(map != VM_MAP_NULL);
9867 assert(size != 0 && size == vm_map_round_page(size));
9868 assert(inheritance == VM_INHERIT_NONE ||
9869 inheritance == VM_INHERIT_COPY ||
9870 inheritance == VM_INHERIT_SHARE);
9871
9872 /*
9873 * Compute start and end of region.
9874 */
9875 src_start = vm_map_trunc_page(addr);
9876 src_end = vm_map_round_page(src_start + size);
9877
9878 /*
9879 * Initialize map_header.
9880 */
9881 map_header->links.next = (struct vm_map_entry *)&map_header->links;
9882 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
9883 map_header->nentries = 0;
9884 map_header->entries_pageable = pageable;
9885
9886 *cur_protection = VM_PROT_ALL;
9887 *max_protection = VM_PROT_ALL;
9888
9889 map_address = 0;
9890 mapped_size = 0;
9891 result = KERN_SUCCESS;
9892
9893 /*
9894 * The specified source virtual space might correspond to
9895 * multiple map entries, need to loop on them.
9896 */
9897 vm_map_lock(map);
9898 while (mapped_size != size) {
9899 vm_map_size_t entry_size;
9900
9901 /*
9902 * Find the beginning of the region.
9903 */
9904 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
9905 result = KERN_INVALID_ADDRESS;
9906 break;
9907 }
9908
9909 if (src_start < src_entry->vme_start ||
9910 (mapped_size && src_start != src_entry->vme_start)) {
9911 result = KERN_INVALID_ADDRESS;
9912 break;
9913 }
9914
9915 if(src_entry->is_sub_map) {
9916 result = KERN_INVALID_ADDRESS;
9917 break;
9918 }
9919
9920 tmp_size = size - mapped_size;
9921 if (src_end > src_entry->vme_end)
9922 tmp_size -= (src_end - src_entry->vme_end);
9923
9924 entry_size = (vm_map_size_t)(src_entry->vme_end -
9925 src_entry->vme_start);
9926
9927 if(src_entry->is_sub_map) {
9928 vm_map_reference(src_entry->object.sub_map);
9929 object = VM_OBJECT_NULL;
9930 } else {
9931 object = src_entry->object.vm_object;
9932
9933 if (object == VM_OBJECT_NULL) {
9934 object = vm_object_allocate(entry_size);
9935 src_entry->offset = 0;
9936 src_entry->object.vm_object = object;
9937 } else if (object->copy_strategy !=
9938 MEMORY_OBJECT_COPY_SYMMETRIC) {
9939 /*
9940 * We are already using an asymmetric
9941 * copy, and therefore we already have
9942 * the right object.
9943 */
9944 assert(!src_entry->needs_copy);
9945 } else if (src_entry->needs_copy || object->shadowed ||
9946 (object->internal && !object->true_share &&
9947 !src_entry->is_shared &&
9948 object->size > entry_size)) {
9949
9950 vm_object_shadow(&src_entry->object.vm_object,
9951 &src_entry->offset,
9952 entry_size);
9953
9954 if (!src_entry->needs_copy &&
9955 (src_entry->protection & VM_PROT_WRITE)) {
9956 vm_prot_t prot;
9957
9958 prot = src_entry->protection & ~VM_PROT_WRITE;
9959
9960 if (override_nx(map, src_entry->alias) && prot)
9961 prot |= VM_PROT_EXECUTE;
9962
9963 if(map->mapped) {
9964 vm_object_pmap_protect(
9965 src_entry->object.vm_object,
9966 src_entry->offset,
9967 entry_size,
9968 PMAP_NULL,
9969 src_entry->vme_start,
9970 prot);
9971 } else {
9972 pmap_protect(vm_map_pmap(map),
9973 src_entry->vme_start,
9974 src_entry->vme_end,
9975 prot);
9976 }
9977 }
9978
9979 object = src_entry->object.vm_object;
9980 src_entry->needs_copy = FALSE;
9981 }
9982
9983
9984 vm_object_lock(object);
9985 vm_object_reference_locked(object); /* object ref. for new entry */
9986 if (object->copy_strategy ==
9987 MEMORY_OBJECT_COPY_SYMMETRIC) {
9988 object->copy_strategy =
9989 MEMORY_OBJECT_COPY_DELAY;
9990 }
9991 vm_object_unlock(object);
9992 }
9993
9994 offset = src_entry->offset + (src_start - src_entry->vme_start);
9995
9996 new_entry = _vm_map_entry_create(map_header);
9997 vm_map_entry_copy(new_entry, src_entry);
9998 new_entry->use_pmap = FALSE; /* clr address space specifics */
9999
10000 new_entry->vme_start = map_address;
10001 new_entry->vme_end = map_address + tmp_size;
10002 new_entry->inheritance = inheritance;
10003 new_entry->offset = offset;
10004
10005 /*
10006 * The new region has to be copied now if required.
10007 */
10008 RestartCopy:
10009 if (!copy) {
10010 src_entry->is_shared = TRUE;
10011 new_entry->is_shared = TRUE;
10012 if (!(new_entry->is_sub_map))
10013 new_entry->needs_copy = FALSE;
10014
10015 } else if (src_entry->is_sub_map) {
10016 /* make this a COW sub_map if not already */
10017 new_entry->needs_copy = TRUE;
10018 object = VM_OBJECT_NULL;
10019 } else if (src_entry->wired_count == 0 &&
10020 vm_object_copy_quickly(&new_entry->object.vm_object,
10021 new_entry->offset,
10022 (new_entry->vme_end -
10023 new_entry->vme_start),
10024 &src_needs_copy,
10025 &new_entry_needs_copy)) {
10026
10027 new_entry->needs_copy = new_entry_needs_copy;
10028 new_entry->is_shared = FALSE;
10029
10030 /*
10031 * Handle copy_on_write semantics.
10032 */
10033 if (src_needs_copy && !src_entry->needs_copy) {
10034 vm_prot_t prot;
10035
10036 prot = src_entry->protection & ~VM_PROT_WRITE;
10037
10038 if (override_nx(map, src_entry->alias) && prot)
10039 prot |= VM_PROT_EXECUTE;
10040
10041 vm_object_pmap_protect(object,
10042 offset,
10043 entry_size,
10044 ((src_entry->is_shared
10045 || map->mapped) ?
10046 PMAP_NULL : map->pmap),
10047 src_entry->vme_start,
10048 prot);
10049
10050 src_entry->needs_copy = TRUE;
10051 }
10052 /*
10053 * Throw away the old object reference of the new entry.
10054 */
10055 vm_object_deallocate(object);
10056
10057 } else {
10058 new_entry->is_shared = FALSE;
10059
10060 /*
10061 * The map can be safely unlocked since we
10062 * already hold a reference on the object.
10063 *
10064 * Record the timestamp of the map for later
10065 * verification, and unlock the map.
10066 */
10067 version.main_timestamp = map->timestamp;
10068 vm_map_unlock(map); /* Increments timestamp once! */
10069
10070 /*
10071 * Perform the copy.
10072 */
10073 if (src_entry->wired_count > 0) {
10074 vm_object_lock(object);
10075 result = vm_object_copy_slowly(
10076 object,
10077 offset,
10078 entry_size,
10079 THREAD_UNINT,
10080 &new_entry->object.vm_object);
10081
10082 new_entry->offset = 0;
10083 new_entry->needs_copy = FALSE;
10084 } else {
10085 result = vm_object_copy_strategically(
10086 object,
10087 offset,
10088 entry_size,
10089 &new_entry->object.vm_object,
10090 &new_entry->offset,
10091 &new_entry_needs_copy);
10092
10093 new_entry->needs_copy = new_entry_needs_copy;
10094 }
10095
10096 /*
10097 * Throw away the old object reference of the new entry.
10098 */
10099 vm_object_deallocate(object);
10100
10101 if (result != KERN_SUCCESS &&
10102 result != KERN_MEMORY_RESTART_COPY) {
10103 _vm_map_entry_dispose(map_header, new_entry);
10104 break;
10105 }
10106
10107 /*
10108 * Verify that the map has not substantially
10109 * changed while the copy was being made.
10110 */
10111
10112 vm_map_lock(map);
10113 if (version.main_timestamp + 1 != map->timestamp) {
10114 /*
10115 * Simple version comparison failed.
10116 *
10117 * Retry the lookup and verify that the
10118 * same object/offset are still present.
10119 */
10120 vm_object_deallocate(new_entry->
10121 object.vm_object);
10122 _vm_map_entry_dispose(map_header, new_entry);
10123 if (result == KERN_MEMORY_RESTART_COPY)
10124 result = KERN_SUCCESS;
10125 continue;
10126 }
10127
10128 if (result == KERN_MEMORY_RESTART_COPY) {
10129 vm_object_reference(object);
10130 goto RestartCopy;
10131 }
10132 }
10133
10134 _vm_map_entry_link(map_header,
10135 map_header->links.prev, new_entry);
10136
10137 *cur_protection &= src_entry->protection;
10138 *max_protection &= src_entry->max_protection;
10139
10140 map_address += tmp_size;
10141 mapped_size += tmp_size;
10142 src_start += tmp_size;
10143
10144 } /* end while */
10145
10146 vm_map_unlock(map);
10147 if (result != KERN_SUCCESS) {
10148 /*
10149 * Free all allocated elements.
10150 */
10151 for (src_entry = map_header->links.next;
10152 src_entry != (struct vm_map_entry *)&map_header->links;
10153 src_entry = new_entry) {
10154 new_entry = src_entry->vme_next;
10155 _vm_map_entry_unlink(map_header, src_entry);
10156 vm_object_deallocate(src_entry->object.vm_object);
10157 _vm_map_entry_dispose(map_header, src_entry);
10158 }
10159 }
10160 return result;
10161 }
10162
10163 /*
10164 * Routine: vm_remap
10165 *
10166 * Map portion of a task's address space.
10167 * Mapped region must not overlap more than
10168 * one vm memory object. Protections and
10169 * inheritance attributes remain the same
10170 * as in the original task and are out parameters.
10171 * Source and Target task can be identical
10172 * Other attributes are identical as for vm_map()
10173 */
10174 kern_return_t
10175 vm_map_remap(
10176 vm_map_t target_map,
10177 vm_map_address_t *address,
10178 vm_map_size_t size,
10179 vm_map_offset_t mask,
10180 boolean_t anywhere,
10181 vm_map_t src_map,
10182 vm_map_offset_t memory_address,
10183 boolean_t copy,
10184 vm_prot_t *cur_protection,
10185 vm_prot_t *max_protection,
10186 vm_inherit_t inheritance)
10187 {
10188 kern_return_t result;
10189 vm_map_entry_t entry;
10190 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
10191 vm_map_entry_t new_entry;
10192 struct vm_map_header map_header;
10193
10194 if (target_map == VM_MAP_NULL)
10195 return KERN_INVALID_ARGUMENT;
10196
10197 switch (inheritance) {
10198 case VM_INHERIT_NONE:
10199 case VM_INHERIT_COPY:
10200 case VM_INHERIT_SHARE:
10201 if (size != 0 && src_map != VM_MAP_NULL)
10202 break;
10203 /*FALL THRU*/
10204 default:
10205 return KERN_INVALID_ARGUMENT;
10206 }
10207
10208 size = vm_map_round_page(size);
10209
10210 result = vm_map_remap_extract(src_map, memory_address,
10211 size, copy, &map_header,
10212 cur_protection,
10213 max_protection,
10214 inheritance,
10215 target_map->hdr.
10216 entries_pageable);
10217
10218 if (result != KERN_SUCCESS) {
10219 return result;
10220 }
10221
10222 /*
10223 * Allocate/check a range of free virtual address
10224 * space for the target
10225 */
10226 *address = vm_map_trunc_page(*address);
10227 vm_map_lock(target_map);
10228 result = vm_map_remap_range_allocate(target_map, address, size,
10229 mask, anywhere, &insp_entry);
10230
10231 for (entry = map_header.links.next;
10232 entry != (struct vm_map_entry *)&map_header.links;
10233 entry = new_entry) {
10234 new_entry = entry->vme_next;
10235 _vm_map_entry_unlink(&map_header, entry);
10236 if (result == KERN_SUCCESS) {
10237 entry->vme_start += *address;
10238 entry->vme_end += *address;
10239 vm_map_entry_link(target_map, insp_entry, entry);
10240 insp_entry = entry;
10241 } else {
10242 if (!entry->is_sub_map) {
10243 vm_object_deallocate(entry->object.vm_object);
10244 } else {
10245 vm_map_deallocate(entry->object.sub_map);
10246 }
10247 _vm_map_entry_dispose(&map_header, entry);
10248 }
10249 }
10250
10251 if (result == KERN_SUCCESS) {
10252 target_map->size += size;
10253 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
10254 }
10255 vm_map_unlock(target_map);
10256
10257 if (result == KERN_SUCCESS && target_map->wiring_required)
10258 result = vm_map_wire(target_map, *address,
10259 *address + size, *cur_protection, TRUE);
10260 return result;
10261 }
10262
10263 /*
10264 * Routine: vm_map_remap_range_allocate
10265 *
10266 * Description:
10267 * Allocate a range in the specified virtual address map.
10268 * returns the address and the map entry just before the allocated
10269 * range
10270 *
10271 * Map must be locked.
10272 */
10273
10274 static kern_return_t
10275 vm_map_remap_range_allocate(
10276 vm_map_t map,
10277 vm_map_address_t *address, /* IN/OUT */
10278 vm_map_size_t size,
10279 vm_map_offset_t mask,
10280 boolean_t anywhere,
10281 vm_map_entry_t *map_entry) /* OUT */
10282 {
10283 register vm_map_entry_t entry;
10284 register vm_map_offset_t start;
10285 register vm_map_offset_t end;
10286
10287 StartAgain: ;
10288
10289 start = *address;
10290
10291 if (anywhere)
10292 {
10293 /*
10294 * Calculate the first possible address.
10295 */
10296
10297 if (start < map->min_offset)
10298 start = map->min_offset;
10299 if (start > map->max_offset)
10300 return(KERN_NO_SPACE);
10301
10302 /*
10303 * Look for the first possible address;
10304 * if there's already something at this
10305 * address, we have to start after it.
10306 */
10307
10308 assert(first_free_is_valid(map));
10309 if (start == map->min_offset) {
10310 if ((entry = map->first_free) != vm_map_to_entry(map))
10311 start = entry->vme_end;
10312 } else {
10313 vm_map_entry_t tmp_entry;
10314 if (vm_map_lookup_entry(map, start, &tmp_entry))
10315 start = tmp_entry->vme_end;
10316 entry = tmp_entry;
10317 }
10318
10319 /*
10320 * In any case, the "entry" always precedes
10321 * the proposed new region throughout the
10322 * loop:
10323 */
10324
10325 while (TRUE) {
10326 register vm_map_entry_t next;
10327
10328 /*
10329 * Find the end of the proposed new region.
10330 * Be sure we didn't go beyond the end, or
10331 * wrap around the address.
10332 */
10333
10334 end = ((start + mask) & ~mask);
10335 if (end < start)
10336 return(KERN_NO_SPACE);
10337 start = end;
10338 end += size;
10339
10340 if ((end > map->max_offset) || (end < start)) {
10341 if (map->wait_for_space) {
10342 if (size <= (map->max_offset -
10343 map->min_offset)) {
10344 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
10345 vm_map_unlock(map);
10346 thread_block(THREAD_CONTINUE_NULL);
10347 vm_map_lock(map);
10348 goto StartAgain;
10349 }
10350 }
10351
10352 return(KERN_NO_SPACE);
10353 }
10354
10355 /*
10356 * If there are no more entries, we must win.
10357 */
10358
10359 next = entry->vme_next;
10360 if (next == vm_map_to_entry(map))
10361 break;
10362
10363 /*
10364 * If there is another entry, it must be
10365 * after the end of the potential new region.
10366 */
10367
10368 if (next->vme_start >= end)
10369 break;
10370
10371 /*
10372 * Didn't fit -- move to the next entry.
10373 */
10374
10375 entry = next;
10376 start = entry->vme_end;
10377 }
10378 *address = start;
10379 } else {
10380 vm_map_entry_t temp_entry;
10381
10382 /*
10383 * Verify that:
10384 * the address doesn't itself violate
10385 * the mask requirement.
10386 */
10387
10388 if ((start & mask) != 0)
10389 return(KERN_NO_SPACE);
10390
10391
10392 /*
10393 * ... the address is within bounds
10394 */
10395
10396 end = start + size;
10397
10398 if ((start < map->min_offset) ||
10399 (end > map->max_offset) ||
10400 (start >= end)) {
10401 return(KERN_INVALID_ADDRESS);
10402 }
10403
10404 /*
10405 * ... the starting address isn't allocated
10406 */
10407
10408 if (vm_map_lookup_entry(map, start, &temp_entry))
10409 return(KERN_NO_SPACE);
10410
10411 entry = temp_entry;
10412
10413 /*
10414 * ... the next region doesn't overlap the
10415 * end point.
10416 */
10417
10418 if ((entry->vme_next != vm_map_to_entry(map)) &&
10419 (entry->vme_next->vme_start < end))
10420 return(KERN_NO_SPACE);
10421 }
10422 *map_entry = entry;
10423 return(KERN_SUCCESS);
10424 }
10425
10426 /*
10427 * vm_map_switch:
10428 *
10429 * Set the address map for the current thread to the specified map
10430 */
10431
10432 vm_map_t
10433 vm_map_switch(
10434 vm_map_t map)
10435 {
10436 int mycpu;
10437 thread_t thread = current_thread();
10438 vm_map_t oldmap = thread->map;
10439
10440 mp_disable_preemption();
10441 mycpu = cpu_number();
10442
10443 /*
10444 * Deactivate the current map and activate the requested map
10445 */
10446 PMAP_SWITCH_USER(thread, map, mycpu);
10447
10448 mp_enable_preemption();
10449 return(oldmap);
10450 }
10451
10452
10453 /*
10454 * Routine: vm_map_write_user
10455 *
10456 * Description:
10457 * Copy out data from a kernel space into space in the
10458 * destination map. The space must already exist in the
10459 * destination map.
10460 * NOTE: This routine should only be called by threads
10461 * which can block on a page fault. i.e. kernel mode user
10462 * threads.
10463 *
10464 */
10465 kern_return_t
10466 vm_map_write_user(
10467 vm_map_t map,
10468 void *src_p,
10469 vm_map_address_t dst_addr,
10470 vm_size_t size)
10471 {
10472 kern_return_t kr = KERN_SUCCESS;
10473
10474 if(current_map() == map) {
10475 if (copyout(src_p, dst_addr, size)) {
10476 kr = KERN_INVALID_ADDRESS;
10477 }
10478 } else {
10479 vm_map_t oldmap;
10480
10481 /* take on the identity of the target map while doing */
10482 /* the transfer */
10483
10484 vm_map_reference(map);
10485 oldmap = vm_map_switch(map);
10486 if (copyout(src_p, dst_addr, size)) {
10487 kr = KERN_INVALID_ADDRESS;
10488 }
10489 vm_map_switch(oldmap);
10490 vm_map_deallocate(map);
10491 }
10492 return kr;
10493 }
10494
10495 /*
10496 * Routine: vm_map_read_user
10497 *
10498 * Description:
10499 * Copy in data from a user space source map into the
10500 * kernel map. The space must already exist in the
10501 * kernel map.
10502 * NOTE: This routine should only be called by threads
10503 * which can block on a page fault. i.e. kernel mode user
10504 * threads.
10505 *
10506 */
10507 kern_return_t
10508 vm_map_read_user(
10509 vm_map_t map,
10510 vm_map_address_t src_addr,
10511 void *dst_p,
10512 vm_size_t size)
10513 {
10514 kern_return_t kr = KERN_SUCCESS;
10515
10516 if(current_map() == map) {
10517 if (copyin(src_addr, dst_p, size)) {
10518 kr = KERN_INVALID_ADDRESS;
10519 }
10520 } else {
10521 vm_map_t oldmap;
10522
10523 /* take on the identity of the target map while doing */
10524 /* the transfer */
10525
10526 vm_map_reference(map);
10527 oldmap = vm_map_switch(map);
10528 if (copyin(src_addr, dst_p, size)) {
10529 kr = KERN_INVALID_ADDRESS;
10530 }
10531 vm_map_switch(oldmap);
10532 vm_map_deallocate(map);
10533 }
10534 return kr;
10535 }
10536
10537
10538 /*
10539 * vm_map_check_protection:
10540 *
10541 * Assert that the target map allows the specified
10542 * privilege on the entire address region given.
10543 * The entire region must be allocated.
10544 */
10545 boolean_t
10546 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
10547 vm_map_offset_t end, vm_prot_t protection)
10548 {
10549 vm_map_entry_t entry;
10550 vm_map_entry_t tmp_entry;
10551
10552 vm_map_lock(map);
10553
10554 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
10555 {
10556 vm_map_unlock(map);
10557 return (FALSE);
10558 }
10559
10560 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10561 vm_map_unlock(map);
10562 return(FALSE);
10563 }
10564
10565 entry = tmp_entry;
10566
10567 while (start < end) {
10568 if (entry == vm_map_to_entry(map)) {
10569 vm_map_unlock(map);
10570 return(FALSE);
10571 }
10572
10573 /*
10574 * No holes allowed!
10575 */
10576
10577 if (start < entry->vme_start) {
10578 vm_map_unlock(map);
10579 return(FALSE);
10580 }
10581
10582 /*
10583 * Check protection associated with entry.
10584 */
10585
10586 if ((entry->protection & protection) != protection) {
10587 vm_map_unlock(map);
10588 return(FALSE);
10589 }
10590
10591 /* go to next entry */
10592
10593 start = entry->vme_end;
10594 entry = entry->vme_next;
10595 }
10596 vm_map_unlock(map);
10597 return(TRUE);
10598 }
10599
10600 kern_return_t
10601 vm_map_purgable_control(
10602 vm_map_t map,
10603 vm_map_offset_t address,
10604 vm_purgable_t control,
10605 int *state)
10606 {
10607 vm_map_entry_t entry;
10608 vm_object_t object;
10609 kern_return_t kr;
10610
10611 /*
10612 * Vet all the input parameters and current type and state of the
10613 * underlaying object. Return with an error if anything is amiss.
10614 */
10615 if (map == VM_MAP_NULL)
10616 return(KERN_INVALID_ARGUMENT);
10617
10618 if (control != VM_PURGABLE_SET_STATE &&
10619 control != VM_PURGABLE_GET_STATE)
10620 return(KERN_INVALID_ARGUMENT);
10621
10622 if (control == VM_PURGABLE_SET_STATE &&
10623 (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) ||
10624 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
10625 return(KERN_INVALID_ARGUMENT);
10626
10627 vm_map_lock(map);
10628
10629 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
10630
10631 /*
10632 * Must pass a valid non-submap address.
10633 */
10634 vm_map_unlock(map);
10635 return(KERN_INVALID_ADDRESS);
10636 }
10637
10638 if ((entry->protection & VM_PROT_WRITE) == 0) {
10639 /*
10640 * Can't apply purgable controls to something you can't write.
10641 */
10642 vm_map_unlock(map);
10643 return(KERN_PROTECTION_FAILURE);
10644 }
10645
10646 object = entry->object.vm_object;
10647 if (object == VM_OBJECT_NULL) {
10648 /*
10649 * Object must already be present or it can't be purgable.
10650 */
10651 vm_map_unlock(map);
10652 return KERN_INVALID_ARGUMENT;
10653 }
10654
10655 vm_object_lock(object);
10656
10657 if (entry->offset != 0 ||
10658 entry->vme_end - entry->vme_start != object->size) {
10659 /*
10660 * Can only apply purgable controls to the whole (existing)
10661 * object at once.
10662 */
10663 vm_map_unlock(map);
10664 vm_object_unlock(object);
10665 return KERN_INVALID_ARGUMENT;
10666 }
10667
10668 vm_map_unlock(map);
10669
10670 kr = vm_object_purgable_control(object, control, state);
10671
10672 vm_object_unlock(object);
10673
10674 return kr;
10675 }
10676
10677 kern_return_t
10678 vm_map_page_info(
10679 vm_map_t target_map,
10680 vm_map_offset_t offset,
10681 int *disposition,
10682 int *ref_count)
10683 {
10684 vm_map_entry_t map_entry;
10685 vm_object_t object;
10686 vm_page_t m;
10687 kern_return_t kr;
10688 kern_return_t retval = KERN_SUCCESS;
10689 boolean_t top_object = TRUE;
10690
10691 *disposition = 0;
10692 *ref_count = 0;
10693
10694 vm_map_lock_read(target_map);
10695
10696 restart_page_query:
10697 if (!vm_map_lookup_entry(target_map, offset, &map_entry)) {
10698 vm_map_unlock_read(target_map);
10699 return KERN_FAILURE;
10700 }
10701 offset -= map_entry->vme_start; /* adjust to offset within entry */
10702 offset += map_entry->offset; /* adjust to target object offset */
10703
10704 if (map_entry->object.vm_object != VM_OBJECT_NULL) {
10705 if (!map_entry->is_sub_map) {
10706 object = map_entry->object.vm_object;
10707 } else {
10708 vm_map_t sub_map;
10709
10710 sub_map = map_entry->object.sub_map;
10711 vm_map_lock_read(sub_map);
10712 vm_map_unlock_read(target_map);
10713
10714 target_map = sub_map;
10715 goto restart_page_query;
10716 }
10717 } else {
10718 vm_map_unlock_read(target_map);
10719 return KERN_SUCCESS;
10720 }
10721 vm_object_lock(object);
10722 vm_map_unlock_read(target_map);
10723
10724 while (TRUE) {
10725 m = vm_page_lookup(object, offset);
10726
10727 if (m != VM_PAGE_NULL) {
10728 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
10729 break;
10730 } else {
10731 #if MACH_PAGEMAP
10732 if (object->existence_map) {
10733 if (vm_external_state_get(object->existence_map, offset)
10734 == VM_EXTERNAL_STATE_EXISTS) {
10735 /*
10736 * this page has been paged out
10737 */
10738 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10739 break;
10740 }
10741 } else
10742 #endif
10743 if (object->internal &&
10744 object->alive &&
10745 !object->terminating &&
10746 object->pager_ready) {
10747
10748 memory_object_t pager;
10749
10750 vm_object_paging_begin(object);
10751 pager = object->pager;
10752 vm_object_unlock(object);
10753
10754 kr = memory_object_data_request(
10755 pager,
10756 offset + object->paging_offset,
10757 0, /* just poke the pager */
10758 VM_PROT_READ,
10759 NULL);
10760
10761 vm_object_lock(object);
10762 vm_object_paging_end(object);
10763
10764 if (kr == KERN_SUCCESS) {
10765 /*
10766 * the pager has this page
10767 */
10768 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10769 break;
10770 }
10771 }
10772 if (object->shadow != VM_OBJECT_NULL) {
10773 vm_object_t shadow;
10774
10775 offset += object->shadow_offset;
10776 shadow = object->shadow;
10777
10778 vm_object_lock(shadow);
10779 vm_object_unlock(object);
10780
10781 object = shadow;
10782 top_object = FALSE;
10783 } else {
10784 if (!object->internal)
10785 break;
10786
10787 retval = KERN_FAILURE;
10788 goto page_query_done;
10789 }
10790 }
10791 }
10792 /* The ref_count is not strictly accurate, it measures the number */
10793 /* of entities holding a ref on the object, they may not be mapping */
10794 /* the object or may not be mapping the section holding the */
10795 /* target page but its still a ball park number and though an over- */
10796 /* count, it picks up the copy-on-write cases */
10797
10798 /* We could also get a picture of page sharing from pmap_attributes */
10799 /* but this would under count as only faulted-in mappings would */
10800 /* show up. */
10801
10802 *ref_count = object->ref_count;
10803
10804 if (top_object == TRUE && object->shadow)
10805 *disposition |= VM_PAGE_QUERY_PAGE_COPIED;
10806
10807 if (m == VM_PAGE_NULL)
10808 goto page_query_done;
10809
10810 if (m->fictitious) {
10811 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
10812 goto page_query_done;
10813 }
10814 if (m->dirty || pmap_is_modified(m->phys_page))
10815 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
10816
10817 if (m->reference || pmap_is_referenced(m->phys_page))
10818 *disposition |= VM_PAGE_QUERY_PAGE_REF;
10819
10820 if (m->speculative)
10821 *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
10822
10823 page_query_done:
10824 vm_object_unlock(object);
10825
10826 return retval;
10827 }
10828
10829 /*
10830 * vm_map_msync
10831 *
10832 * Synchronises the memory range specified with its backing store
10833 * image by either flushing or cleaning the contents to the appropriate
10834 * memory manager engaging in a memory object synchronize dialog with
10835 * the manager. The client doesn't return until the manager issues
10836 * m_o_s_completed message. MIG Magically converts user task parameter
10837 * to the task's address map.
10838 *
10839 * interpretation of sync_flags
10840 * VM_SYNC_INVALIDATE - discard pages, only return precious
10841 * pages to manager.
10842 *
10843 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
10844 * - discard pages, write dirty or precious
10845 * pages back to memory manager.
10846 *
10847 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
10848 * - write dirty or precious pages back to
10849 * the memory manager.
10850 *
10851 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
10852 * is a hole in the region, and we would
10853 * have returned KERN_SUCCESS, return
10854 * KERN_INVALID_ADDRESS instead.
10855 *
10856 * NOTE
10857 * The memory object attributes have not yet been implemented, this
10858 * function will have to deal with the invalidate attribute
10859 *
10860 * RETURNS
10861 * KERN_INVALID_TASK Bad task parameter
10862 * KERN_INVALID_ARGUMENT both sync and async were specified.
10863 * KERN_SUCCESS The usual.
10864 * KERN_INVALID_ADDRESS There was a hole in the region.
10865 */
10866
10867 kern_return_t
10868 vm_map_msync(
10869 vm_map_t map,
10870 vm_map_address_t address,
10871 vm_map_size_t size,
10872 vm_sync_t sync_flags)
10873 {
10874 msync_req_t msr;
10875 msync_req_t new_msr;
10876 queue_chain_t req_q; /* queue of requests for this msync */
10877 vm_map_entry_t entry;
10878 vm_map_size_t amount_left;
10879 vm_object_offset_t offset;
10880 boolean_t do_sync_req;
10881 boolean_t modifiable;
10882 boolean_t had_hole = FALSE;
10883 memory_object_t pager;
10884
10885 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
10886 (sync_flags & VM_SYNC_SYNCHRONOUS))
10887 return(KERN_INVALID_ARGUMENT);
10888
10889 /*
10890 * align address and size on page boundaries
10891 */
10892 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
10893 address = vm_map_trunc_page(address);
10894
10895 if (map == VM_MAP_NULL)
10896 return(KERN_INVALID_TASK);
10897
10898 if (size == 0)
10899 return(KERN_SUCCESS);
10900
10901 queue_init(&req_q);
10902 amount_left = size;
10903
10904 while (amount_left > 0) {
10905 vm_object_size_t flush_size;
10906 vm_object_t object;
10907
10908 vm_map_lock(map);
10909 if (!vm_map_lookup_entry(map,
10910 vm_map_trunc_page(address), &entry)) {
10911
10912 vm_map_size_t skip;
10913
10914 /*
10915 * hole in the address map.
10916 */
10917 had_hole = TRUE;
10918
10919 /*
10920 * Check for empty map.
10921 */
10922 if (entry == vm_map_to_entry(map) &&
10923 entry->vme_next == entry) {
10924 vm_map_unlock(map);
10925 break;
10926 }
10927 /*
10928 * Check that we don't wrap and that
10929 * we have at least one real map entry.
10930 */
10931 if ((map->hdr.nentries == 0) ||
10932 (entry->vme_next->vme_start < address)) {
10933 vm_map_unlock(map);
10934 break;
10935 }
10936 /*
10937 * Move up to the next entry if needed
10938 */
10939 skip = (entry->vme_next->vme_start - address);
10940 if (skip >= amount_left)
10941 amount_left = 0;
10942 else
10943 amount_left -= skip;
10944 address = entry->vme_next->vme_start;
10945 vm_map_unlock(map);
10946 continue;
10947 }
10948
10949 offset = address - entry->vme_start;
10950
10951 /*
10952 * do we have more to flush than is contained in this
10953 * entry ?
10954 */
10955 if (amount_left + entry->vme_start + offset > entry->vme_end) {
10956 flush_size = entry->vme_end -
10957 (entry->vme_start + offset);
10958 } else {
10959 flush_size = amount_left;
10960 }
10961 amount_left -= flush_size;
10962 address += flush_size;
10963
10964 if (entry->is_sub_map == TRUE) {
10965 vm_map_t local_map;
10966 vm_map_offset_t local_offset;
10967
10968 local_map = entry->object.sub_map;
10969 local_offset = entry->offset;
10970 vm_map_unlock(map);
10971 if (vm_map_msync(
10972 local_map,
10973 local_offset,
10974 flush_size,
10975 sync_flags) == KERN_INVALID_ADDRESS) {
10976 had_hole = TRUE;
10977 }
10978 continue;
10979 }
10980 object = entry->object.vm_object;
10981
10982 /*
10983 * We can't sync this object if the object has not been
10984 * created yet
10985 */
10986 if (object == VM_OBJECT_NULL) {
10987 vm_map_unlock(map);
10988 continue;
10989 }
10990 offset += entry->offset;
10991 modifiable = (entry->protection & VM_PROT_WRITE)
10992 != VM_PROT_NONE;
10993
10994 vm_object_lock(object);
10995
10996 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
10997 boolean_t kill_pages = 0;
10998
10999 if (sync_flags & VM_SYNC_KILLPAGES) {
11000 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
11001 kill_pages = 1;
11002 else
11003 kill_pages = -1;
11004 }
11005 if (kill_pages != -1)
11006 vm_object_deactivate_pages(object, offset,
11007 (vm_object_size_t)flush_size, kill_pages);
11008 vm_object_unlock(object);
11009 vm_map_unlock(map);
11010 continue;
11011 }
11012 /*
11013 * We can't sync this object if there isn't a pager.
11014 * Don't bother to sync internal objects, since there can't
11015 * be any "permanent" storage for these objects anyway.
11016 */
11017 if ((object->pager == MEMORY_OBJECT_NULL) ||
11018 (object->internal) || (object->private)) {
11019 vm_object_unlock(object);
11020 vm_map_unlock(map);
11021 continue;
11022 }
11023 /*
11024 * keep reference on the object until syncing is done
11025 */
11026 vm_object_reference_locked(object);
11027 vm_object_unlock(object);
11028
11029 vm_map_unlock(map);
11030
11031 do_sync_req = vm_object_sync(object,
11032 offset,
11033 flush_size,
11034 sync_flags & VM_SYNC_INVALIDATE,
11035 (modifiable &&
11036 (sync_flags & VM_SYNC_SYNCHRONOUS ||
11037 sync_flags & VM_SYNC_ASYNCHRONOUS)),
11038 sync_flags & VM_SYNC_SYNCHRONOUS);
11039 /*
11040 * only send a m_o_s if we returned pages or if the entry
11041 * is writable (ie dirty pages may have already been sent back)
11042 */
11043 if (!do_sync_req && !modifiable) {
11044 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
11045 /*
11046 * clear out the clustering and read-ahead hints
11047 */
11048 vm_object_lock(object);
11049
11050 object->pages_created = 0;
11051 object->pages_used = 0;
11052 object->sequential = 0;
11053 object->last_alloc = 0;
11054
11055 vm_object_unlock(object);
11056 }
11057 vm_object_deallocate(object);
11058 continue;
11059 }
11060 msync_req_alloc(new_msr);
11061
11062 vm_object_lock(object);
11063 offset += object->paging_offset;
11064
11065 new_msr->offset = offset;
11066 new_msr->length = flush_size;
11067 new_msr->object = object;
11068 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
11069 re_iterate:
11070
11071 /*
11072 * We can't sync this object if there isn't a pager. The
11073 * pager can disappear anytime we're not holding the object
11074 * lock. So this has to be checked anytime we goto re_iterate.
11075 */
11076
11077 pager = object->pager;
11078
11079 if (pager == MEMORY_OBJECT_NULL) {
11080 vm_object_unlock(object);
11081 vm_object_deallocate(object);
11082 continue;
11083 }
11084
11085 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
11086 /*
11087 * need to check for overlapping entry, if found, wait
11088 * on overlapping msr to be done, then reiterate
11089 */
11090 msr_lock(msr);
11091 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
11092 ((offset >= msr->offset &&
11093 offset < (msr->offset + msr->length)) ||
11094 (msr->offset >= offset &&
11095 msr->offset < (offset + flush_size))))
11096 {
11097 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
11098 msr_unlock(msr);
11099 vm_object_unlock(object);
11100 thread_block(THREAD_CONTINUE_NULL);
11101 vm_object_lock(object);
11102 goto re_iterate;
11103 }
11104 msr_unlock(msr);
11105 }/* queue_iterate */
11106
11107 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
11108
11109 vm_object_paging_begin(object);
11110 vm_object_unlock(object);
11111
11112 queue_enter(&req_q, new_msr, msync_req_t, req_q);
11113
11114 (void) memory_object_synchronize(
11115 pager,
11116 offset,
11117 flush_size,
11118 sync_flags & ~VM_SYNC_CONTIGUOUS);
11119
11120 vm_object_lock(object);
11121 vm_object_paging_end(object);
11122 vm_object_unlock(object);
11123 }/* while */
11124
11125 /*
11126 * wait for memory_object_sychronize_completed messages from pager(s)
11127 */
11128
11129 while (!queue_empty(&req_q)) {
11130 msr = (msync_req_t)queue_first(&req_q);
11131 msr_lock(msr);
11132 while(msr->flag != VM_MSYNC_DONE) {
11133 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
11134 msr_unlock(msr);
11135 thread_block(THREAD_CONTINUE_NULL);
11136 msr_lock(msr);
11137 }/* while */
11138 queue_remove(&req_q, msr, msync_req_t, req_q);
11139 msr_unlock(msr);
11140 vm_object_deallocate(msr->object);
11141 msync_req_free(msr);
11142 }/* queue_iterate */
11143
11144 /* for proper msync() behaviour */
11145 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
11146 return(KERN_INVALID_ADDRESS);
11147
11148 return(KERN_SUCCESS);
11149 }/* vm_msync */
11150
11151 /*
11152 * Routine: convert_port_entry_to_map
11153 * Purpose:
11154 * Convert from a port specifying an entry or a task
11155 * to a map. Doesn't consume the port ref; produces a map ref,
11156 * which may be null. Unlike convert_port_to_map, the
11157 * port may be task or a named entry backed.
11158 * Conditions:
11159 * Nothing locked.
11160 */
11161
11162
11163 vm_map_t
11164 convert_port_entry_to_map(
11165 ipc_port_t port)
11166 {
11167 vm_map_t map;
11168 vm_named_entry_t named_entry;
11169 uint32_t try_failed_count = 0;
11170
11171 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11172 while(TRUE) {
11173 ip_lock(port);
11174 if(ip_active(port) && (ip_kotype(port)
11175 == IKOT_NAMED_ENTRY)) {
11176 named_entry =
11177 (vm_named_entry_t)port->ip_kobject;
11178 if (!(mutex_try(&(named_entry)->Lock))) {
11179 ip_unlock(port);
11180
11181 try_failed_count++;
11182 mutex_pause(try_failed_count);
11183 continue;
11184 }
11185 named_entry->ref_count++;
11186 mutex_unlock(&(named_entry)->Lock);
11187 ip_unlock(port);
11188 if ((named_entry->is_sub_map) &&
11189 (named_entry->protection
11190 & VM_PROT_WRITE)) {
11191 map = named_entry->backing.map;
11192 } else {
11193 mach_destroy_memory_entry(port);
11194 return VM_MAP_NULL;
11195 }
11196 vm_map_reference_swap(map);
11197 mach_destroy_memory_entry(port);
11198 break;
11199 }
11200 else
11201 return VM_MAP_NULL;
11202 }
11203 }
11204 else
11205 map = convert_port_to_map(port);
11206
11207 return map;
11208 }
11209
11210 /*
11211 * Routine: convert_port_entry_to_object
11212 * Purpose:
11213 * Convert from a port specifying a named entry to an
11214 * object. Doesn't consume the port ref; produces a map ref,
11215 * which may be null.
11216 * Conditions:
11217 * Nothing locked.
11218 */
11219
11220
11221 vm_object_t
11222 convert_port_entry_to_object(
11223 ipc_port_t port)
11224 {
11225 vm_object_t object;
11226 vm_named_entry_t named_entry;
11227 uint32_t try_failed_count = 0;
11228
11229 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11230 while(TRUE) {
11231 ip_lock(port);
11232 if(ip_active(port) && (ip_kotype(port)
11233 == IKOT_NAMED_ENTRY)) {
11234 named_entry =
11235 (vm_named_entry_t)port->ip_kobject;
11236 if (!(mutex_try(&(named_entry)->Lock))) {
11237 ip_unlock(port);
11238
11239 try_failed_count++;
11240 mutex_pause(try_failed_count);
11241 continue;
11242 }
11243 named_entry->ref_count++;
11244 mutex_unlock(&(named_entry)->Lock);
11245 ip_unlock(port);
11246 if ((!named_entry->is_sub_map) &&
11247 (!named_entry->is_pager) &&
11248 (named_entry->protection
11249 & VM_PROT_WRITE)) {
11250 object = named_entry->backing.object;
11251 } else {
11252 mach_destroy_memory_entry(port);
11253 return (vm_object_t)NULL;
11254 }
11255 vm_object_reference(named_entry->backing.object);
11256 mach_destroy_memory_entry(port);
11257 break;
11258 }
11259 else
11260 return (vm_object_t)NULL;
11261 }
11262 } else {
11263 return (vm_object_t)NULL;
11264 }
11265
11266 return object;
11267 }
11268
11269 /*
11270 * Export routines to other components for the things we access locally through
11271 * macros.
11272 */
11273 #undef current_map
11274 vm_map_t
11275 current_map(void)
11276 {
11277 return (current_map_fast());
11278 }
11279
11280 /*
11281 * vm_map_reference:
11282 *
11283 * Most code internal to the osfmk will go through a
11284 * macro defining this. This is always here for the
11285 * use of other kernel components.
11286 */
11287 #undef vm_map_reference
11288 void
11289 vm_map_reference(
11290 register vm_map_t map)
11291 {
11292 if (map == VM_MAP_NULL)
11293 return;
11294
11295 mutex_lock(&map->s_lock);
11296 #if TASK_SWAPPER
11297 assert(map->res_count > 0);
11298 assert(map->ref_count >= map->res_count);
11299 map->res_count++;
11300 #endif
11301 map->ref_count++;
11302 mutex_unlock(&map->s_lock);
11303 }
11304
11305 /*
11306 * vm_map_deallocate:
11307 *
11308 * Removes a reference from the specified map,
11309 * destroying it if no references remain.
11310 * The map should not be locked.
11311 */
11312 void
11313 vm_map_deallocate(
11314 register vm_map_t map)
11315 {
11316 unsigned int ref;
11317
11318 if (map == VM_MAP_NULL)
11319 return;
11320
11321 mutex_lock(&map->s_lock);
11322 ref = --map->ref_count;
11323 if (ref > 0) {
11324 vm_map_res_deallocate(map);
11325 mutex_unlock(&map->s_lock);
11326 return;
11327 }
11328 assert(map->ref_count == 0);
11329 mutex_unlock(&map->s_lock);
11330
11331 #if TASK_SWAPPER
11332 /*
11333 * The map residence count isn't decremented here because
11334 * the vm_map_delete below will traverse the entire map,
11335 * deleting entries, and the residence counts on objects
11336 * and sharing maps will go away then.
11337 */
11338 #endif
11339
11340 vm_map_destroy(map, VM_MAP_NO_FLAGS);
11341 }
11342
11343
11344 void
11345 vm_map_disable_NX(vm_map_t map)
11346 {
11347 if (map == NULL)
11348 return;
11349 if (map->pmap == NULL)
11350 return;
11351
11352 pmap_disable_NX(map->pmap);
11353 }
11354
11355 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
11356 * more descriptive.
11357 */
11358 void
11359 vm_map_set_32bit(vm_map_t map)
11360 {
11361 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
11362 }
11363
11364
11365 void
11366 vm_map_set_64bit(vm_map_t map)
11367 {
11368 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
11369 }
11370
11371 vm_map_offset_t
11372 vm_compute_max_offset(unsigned is64)
11373 {
11374 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
11375 }
11376
11377 boolean_t
11378 vm_map_is_64bit(
11379 vm_map_t map)
11380 {
11381 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
11382 }
11383
11384 boolean_t
11385 vm_map_has_4GB_pagezero(
11386 vm_map_t map)
11387 {
11388 /*
11389 * XXX FBDP
11390 * We should lock the VM map (for read) here but we can get away
11391 * with it for now because there can't really be any race condition:
11392 * the VM map's min_offset is changed only when the VM map is created
11393 * and when the zero page is established (when the binary gets loaded),
11394 * and this routine gets called only when the task terminates and the
11395 * VM map is being torn down, and when a new map is created via
11396 * load_machfile()/execve().
11397 */
11398 return (map->min_offset >= 0x100000000ULL);
11399 }
11400
11401 void
11402 vm_map_set_4GB_pagezero(vm_map_t map)
11403 {
11404 pmap_set_4GB_pagezero(map->pmap);
11405 }
11406
11407 void
11408 vm_map_clear_4GB_pagezero(vm_map_t map)
11409 {
11410 pmap_clear_4GB_pagezero(map->pmap);
11411 }
11412
11413 /*
11414 * Raise a VM map's minimum offset.
11415 * To strictly enforce "page zero" reservation.
11416 */
11417 kern_return_t
11418 vm_map_raise_min_offset(
11419 vm_map_t map,
11420 vm_map_offset_t new_min_offset)
11421 {
11422 vm_map_entry_t first_entry;
11423
11424 new_min_offset = vm_map_round_page(new_min_offset);
11425
11426 vm_map_lock(map);
11427
11428 if (new_min_offset < map->min_offset) {
11429 /*
11430 * Can't move min_offset backwards, as that would expose
11431 * a part of the address space that was previously, and for
11432 * possibly good reasons, inaccessible.
11433 */
11434 vm_map_unlock(map);
11435 return KERN_INVALID_ADDRESS;
11436 }
11437
11438 first_entry = vm_map_first_entry(map);
11439 if (first_entry != vm_map_to_entry(map) &&
11440 first_entry->vme_start < new_min_offset) {
11441 /*
11442 * Some memory was already allocated below the new
11443 * minimun offset. It's too late to change it now...
11444 */
11445 vm_map_unlock(map);
11446 return KERN_NO_SPACE;
11447 }
11448
11449 map->min_offset = new_min_offset;
11450
11451 vm_map_unlock(map);
11452
11453 return KERN_SUCCESS;
11454 }
11455
11456 /*
11457 * Set the limit on the maximum amount of user wired memory allowed for this map.
11458 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
11459 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
11460 * don't have to reach over to the BSD data structures.
11461 */
11462
11463 void
11464 vm_map_set_user_wire_limit(vm_map_t map,
11465 vm_size_t limit)
11466 {
11467 map->user_wire_limit = limit;
11468 }