]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_kern.h>
93 #include <ipc/ipc_port.h>
94 #include <kern/sched_prim.h>
95 #include <kern/misc_protos.h>
96 #include <ddb/tr.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103
104 #ifdef ppc
105 #include <ppc/mappings.h>
106 #endif /* ppc */
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110
111 /* Internal prototypes
112 */
113
114 static void vm_map_simplify_range(
115 vm_map_t map,
116 vm_map_offset_t start,
117 vm_map_offset_t end); /* forward */
118
119 static boolean_t vm_map_range_check(
120 vm_map_t map,
121 vm_map_offset_t start,
122 vm_map_offset_t end,
123 vm_map_entry_t *entry);
124
125 static vm_map_entry_t _vm_map_entry_create(
126 struct vm_map_header *map_header);
127
128 static void _vm_map_entry_dispose(
129 struct vm_map_header *map_header,
130 vm_map_entry_t entry);
131
132 static void vm_map_pmap_enter(
133 vm_map_t map,
134 vm_map_offset_t addr,
135 vm_map_offset_t end_addr,
136 vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t protection);
139
140 static void _vm_map_clip_end(
141 struct vm_map_header *map_header,
142 vm_map_entry_t entry,
143 vm_map_offset_t end);
144
145 static void _vm_map_clip_start(
146 struct vm_map_header *map_header,
147 vm_map_entry_t entry,
148 vm_map_offset_t start);
149
150 static void vm_map_entry_delete(
151 vm_map_t map,
152 vm_map_entry_t entry);
153
154 static kern_return_t vm_map_delete(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 int flags,
159 vm_map_t zap_map);
160
161 static kern_return_t vm_map_copy_overwrite_unaligned(
162 vm_map_t dst_map,
163 vm_map_entry_t entry,
164 vm_map_copy_t copy,
165 vm_map_address_t start);
166
167 static kern_return_t vm_map_copy_overwrite_aligned(
168 vm_map_t dst_map,
169 vm_map_entry_t tmp_entry,
170 vm_map_copy_t copy,
171 vm_map_offset_t start,
172 pmap_t pmap);
173
174 static kern_return_t vm_map_copyin_kernel_buffer(
175 vm_map_t src_map,
176 vm_map_address_t src_addr,
177 vm_map_size_t len,
178 boolean_t src_destroy,
179 vm_map_copy_t *copy_result); /* OUT */
180
181 static kern_return_t vm_map_copyout_kernel_buffer(
182 vm_map_t map,
183 vm_map_address_t *addr, /* IN/OUT */
184 vm_map_copy_t copy,
185 boolean_t overwrite);
186
187 static void vm_map_fork_share(
188 vm_map_t old_map,
189 vm_map_entry_t old_entry,
190 vm_map_t new_map);
191
192 static boolean_t vm_map_fork_copy(
193 vm_map_t old_map,
194 vm_map_entry_t *old_entry_p,
195 vm_map_t new_map);
196
197 void vm_map_region_top_walk(
198 vm_map_entry_t entry,
199 vm_region_top_info_t top);
200
201 void vm_map_region_walk(
202 vm_map_t map,
203 vm_map_offset_t va,
204 vm_map_entry_t entry,
205 vm_object_offset_t offset,
206 vm_object_size_t range,
207 vm_region_extended_info_t extended,
208 boolean_t look_for_pages);
209
210 static kern_return_t vm_map_wire_nested(
211 vm_map_t map,
212 vm_map_offset_t start,
213 vm_map_offset_t end,
214 vm_prot_t access_type,
215 boolean_t user_wire,
216 pmap_t map_pmap,
217 vm_map_offset_t pmap_addr);
218
219 static kern_return_t vm_map_unwire_nested(
220 vm_map_t map,
221 vm_map_offset_t start,
222 vm_map_offset_t end,
223 boolean_t user_wire,
224 pmap_t map_pmap,
225 vm_map_offset_t pmap_addr);
226
227 static kern_return_t vm_map_overwrite_submap_recurse(
228 vm_map_t dst_map,
229 vm_map_offset_t dst_addr,
230 vm_map_size_t dst_size);
231
232 static kern_return_t vm_map_copy_overwrite_nested(
233 vm_map_t dst_map,
234 vm_map_offset_t dst_addr,
235 vm_map_copy_t copy,
236 boolean_t interruptible,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_remap_extract(
240 vm_map_t map,
241 vm_map_offset_t addr,
242 vm_map_size_t size,
243 boolean_t copy,
244 struct vm_map_header *map_header,
245 vm_prot_t *cur_protection,
246 vm_prot_t *max_protection,
247 vm_inherit_t inheritance,
248 boolean_t pageable);
249
250 static kern_return_t vm_map_remap_range_allocate(
251 vm_map_t map,
252 vm_map_address_t *address,
253 vm_map_size_t size,
254 vm_map_offset_t mask,
255 boolean_t anywhere,
256 vm_map_entry_t *map_entry);
257
258 static void vm_map_region_look_for_page(
259 vm_map_t map,
260 vm_map_offset_t va,
261 vm_object_t object,
262 vm_object_offset_t offset,
263 int max_refcnt,
264 int depth,
265 vm_region_extended_info_t extended);
266
267 static int vm_map_region_count_obj_refs(
268 vm_map_entry_t entry,
269 vm_object_t object);
270
271 /*
272 * Macros to copy a vm_map_entry. We must be careful to correctly
273 * manage the wired page count. vm_map_entry_copy() creates a new
274 * map entry to the same memory - the wired count in the new entry
275 * must be set to zero. vm_map_entry_copy_full() creates a new
276 * entry that is identical to the old entry. This preserves the
277 * wire count; it's used for map splitting and zone changing in
278 * vm_map_copyout.
279 */
280 #define vm_map_entry_copy(NEW,OLD) \
281 MACRO_BEGIN \
282 *(NEW) = *(OLD); \
283 (NEW)->is_shared = FALSE; \
284 (NEW)->needs_wakeup = FALSE; \
285 (NEW)->in_transition = FALSE; \
286 (NEW)->wired_count = 0; \
287 (NEW)->user_wired_count = 0; \
288 MACRO_END
289
290 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
291
292 /*
293 * Decide if we want to allow processes to execute from their data or stack areas.
294 * override_nx() returns true if we do. Data/stack execution can be enabled independently
295 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
296 * or allow_stack_exec to enable data execution for that type of data area for that particular
297 * ABI (or both by or'ing the flags together). These are initialized in the architecture
298 * specific pmap files since the default behavior varies according to architecture. The
299 * main reason it varies is because of the need to provide binary compatibility with old
300 * applications that were written before these restrictions came into being. In the old
301 * days, an app could execute anything it could read, but this has slowly been tightened
302 * up over time. The default behavior is:
303 *
304 * 32-bit PPC apps may execute from both stack and data areas
305 * 32-bit Intel apps may exeucte from data areas but not stack
306 * 64-bit PPC/Intel apps may not execute from either data or stack
307 *
308 * An application on any architecture may override these defaults by explicitly
309 * adding PROT_EXEC permission to the page in question with the mprotect(2)
310 * system call. This code here just determines what happens when an app tries to
311 * execute from a page that lacks execute permission.
312 *
313 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
314 * default behavior for both 32 and 64 bit apps on a system-wide basis.
315 */
316
317 extern int allow_data_exec, allow_stack_exec;
318
319 int
320 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
321 {
322 int current_abi;
323
324 /*
325 * Determine if the app is running in 32 or 64 bit mode.
326 */
327
328 if (vm_map_is_64bit(map))
329 current_abi = VM_ABI_64;
330 else
331 current_abi = VM_ABI_32;
332
333 /*
334 * Determine if we should allow the execution based on whether it's a
335 * stack or data area and the current architecture.
336 */
337
338 if (user_tag == VM_MEMORY_STACK)
339 return allow_stack_exec & current_abi;
340
341 return allow_data_exec & current_abi;
342 }
343
344
345 /*
346 * Virtual memory maps provide for the mapping, protection,
347 * and sharing of virtual memory objects. In addition,
348 * this module provides for an efficient virtual copy of
349 * memory from one map to another.
350 *
351 * Synchronization is required prior to most operations.
352 *
353 * Maps consist of an ordered doubly-linked list of simple
354 * entries; a single hint is used to speed up lookups.
355 *
356 * Sharing maps have been deleted from this version of Mach.
357 * All shared objects are now mapped directly into the respective
358 * maps. This requires a change in the copy on write strategy;
359 * the asymmetric (delayed) strategy is used for shared temporary
360 * objects instead of the symmetric (shadow) strategy. All maps
361 * are now "top level" maps (either task map, kernel map or submap
362 * of the kernel map).
363 *
364 * Since portions of maps are specified by start/end addreses,
365 * which may not align with existing map entries, all
366 * routines merely "clip" entries to these start/end values.
367 * [That is, an entry is split into two, bordering at a
368 * start or end value.] Note that these clippings may not
369 * always be necessary (as the two resulting entries are then
370 * not changed); however, the clipping is done for convenience.
371 * No attempt is currently made to "glue back together" two
372 * abutting entries.
373 *
374 * The symmetric (shadow) copy strategy implements virtual copy
375 * by copying VM object references from one map to
376 * another, and then marking both regions as copy-on-write.
377 * It is important to note that only one writeable reference
378 * to a VM object region exists in any map when this strategy
379 * is used -- this means that shadow object creation can be
380 * delayed until a write operation occurs. The symmetric (delayed)
381 * strategy allows multiple maps to have writeable references to
382 * the same region of a vm object, and hence cannot delay creating
383 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
384 * Copying of permanent objects is completely different; see
385 * vm_object_copy_strategically() in vm_object.c.
386 */
387
388 static zone_t vm_map_zone; /* zone for vm_map structures */
389 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
390 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
391 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
392
393
394 /*
395 * Placeholder object for submap operations. This object is dropped
396 * into the range by a call to vm_map_find, and removed when
397 * vm_map_submap creates the submap.
398 */
399
400 vm_object_t vm_submap_object;
401
402 /*
403 * vm_map_init:
404 *
405 * Initialize the vm_map module. Must be called before
406 * any other vm_map routines.
407 *
408 * Map and entry structures are allocated from zones -- we must
409 * initialize those zones.
410 *
411 * There are three zones of interest:
412 *
413 * vm_map_zone: used to allocate maps.
414 * vm_map_entry_zone: used to allocate map entries.
415 * vm_map_kentry_zone: used to allocate map entries for the kernel.
416 *
417 * The kernel allocates map entries from a special zone that is initially
418 * "crammed" with memory. It would be difficult (perhaps impossible) for
419 * the kernel to allocate more memory to a entry zone when it became
420 * empty since the very act of allocating memory implies the creation
421 * of a new entry.
422 */
423
424 static void *map_data;
425 static vm_map_size_t map_data_size;
426 static void *kentry_data;
427 static vm_map_size_t kentry_data_size;
428 static int kentry_count = 2048; /* to init kentry_data_size */
429
430 #define NO_COALESCE_LIMIT (1024 * 128)
431
432
433 /* Skip acquiring locks if we're in the midst of a kernel core dump */
434 extern unsigned int not_in_kdp;
435
436 #ifdef __i386__
437 kern_return_t
438 vm_map_apple_protected(
439 vm_map_t map,
440 vm_map_offset_t start,
441 vm_map_offset_t end)
442 {
443 boolean_t map_locked;
444 kern_return_t kr;
445 vm_map_entry_t map_entry;
446 memory_object_t protected_mem_obj;
447 vm_object_t protected_object;
448 vm_map_offset_t map_addr;
449
450 vm_map_lock_read(map);
451 map_locked = TRUE;
452
453 /* lookup the protected VM object */
454 if (!vm_map_lookup_entry(map,
455 start,
456 &map_entry) ||
457 map_entry->vme_end != end ||
458 map_entry->is_sub_map) {
459 /* that memory is not properly mapped */
460 kr = KERN_INVALID_ARGUMENT;
461 goto done;
462 }
463 protected_object = map_entry->object.vm_object;
464 if (protected_object == VM_OBJECT_NULL) {
465 /* there should be a VM object here at this point */
466 kr = KERN_INVALID_ARGUMENT;
467 goto done;
468 }
469
470 /*
471 * Lookup (and create if necessary) the protected memory object
472 * matching that VM object.
473 * If successful, this also grabs a reference on the memory object,
474 * to guarantee that it doesn't go away before we get a chance to map
475 * it.
476 */
477
478 protected_mem_obj = apple_protect_pager_setup(protected_object);
479 if (protected_mem_obj == NULL) {
480 kr = KERN_FAILURE;
481 goto done;
482 }
483
484 vm_map_unlock_read(map);
485 map_locked = FALSE;
486
487 /* map this memory object in place of the current one */
488 map_addr = start;
489 kr = vm_map_enter_mem_object(map,
490 &map_addr,
491 end - start,
492 (mach_vm_offset_t) 0,
493 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
494 (ipc_port_t) protected_mem_obj,
495 (map_entry->offset +
496 (start - map_entry->vme_start)),
497 TRUE,
498 map_entry->protection,
499 map_entry->max_protection,
500 map_entry->inheritance);
501 assert(map_addr == start);
502 if (kr == KERN_SUCCESS) {
503 /* let the pager know that this mem_obj is mapped */
504 apple_protect_pager_map(protected_mem_obj);
505 }
506 /*
507 * Release the reference obtained by apple_protect_pager_setup().
508 * The mapping (if it succeeded) is now holding a reference on the
509 * memory object.
510 */
511 memory_object_deallocate(protected_mem_obj);
512
513 done:
514 if (map_locked) {
515 vm_map_unlock_read(map);
516 }
517 return kr;
518 }
519 #endif /* __i386__ */
520
521
522 void
523 vm_map_init(
524 void)
525 {
526 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
527 PAGE_SIZE, "maps");
528
529 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
530 1024*1024, PAGE_SIZE*5,
531 "non-kernel map entries");
532
533 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
534 kentry_data_size, kentry_data_size,
535 "kernel map entries");
536
537 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
538 16*1024, PAGE_SIZE, "map copies");
539
540 /*
541 * Cram the map and kentry zones with initial data.
542 * Set kentry_zone non-collectible to aid zone_gc().
543 */
544 zone_change(vm_map_zone, Z_COLLECT, FALSE);
545 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
546 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
547 zcram(vm_map_zone, map_data, map_data_size);
548 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
549 }
550
551 void
552 vm_map_steal_memory(
553 void)
554 {
555 map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map));
556 map_data = pmap_steal_memory(map_data_size);
557
558 #if 0
559 /*
560 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
561 * physical page (i.e. that beyond the kernel image and page tables)
562 * individually; we guess at most one entry per eight pages in the
563 * real world. This works out to roughly .1 of 1% of physical memory,
564 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
565 */
566 #endif
567 kentry_count = pmap_free_pages() / 8;
568
569
570 kentry_data_size =
571 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
572 kentry_data = pmap_steal_memory(kentry_data_size);
573 }
574
575 /*
576 * vm_map_create:
577 *
578 * Creates and returns a new empty VM map with
579 * the given physical map structure, and having
580 * the given lower and upper address bounds.
581 */
582 vm_map_t
583 vm_map_create(
584 pmap_t pmap,
585 vm_map_offset_t min,
586 vm_map_offset_t max,
587 boolean_t pageable)
588 {
589 static int color_seed = 0;
590 register vm_map_t result;
591
592 result = (vm_map_t) zalloc(vm_map_zone);
593 if (result == VM_MAP_NULL)
594 panic("vm_map_create");
595
596 vm_map_first_entry(result) = vm_map_to_entry(result);
597 vm_map_last_entry(result) = vm_map_to_entry(result);
598 result->hdr.nentries = 0;
599 result->hdr.entries_pageable = pageable;
600
601 result->size = 0;
602 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
603 result->user_wire_size = 0;
604 result->ref_count = 1;
605 #if TASK_SWAPPER
606 result->res_count = 1;
607 result->sw_state = MAP_SW_IN;
608 #endif /* TASK_SWAPPER */
609 result->pmap = pmap;
610 result->min_offset = min;
611 result->max_offset = max;
612 result->wiring_required = FALSE;
613 result->no_zero_fill = FALSE;
614 result->mapped = FALSE;
615 result->wait_for_space = FALSE;
616 result->first_free = vm_map_to_entry(result);
617 result->hint = vm_map_to_entry(result);
618 result->color_rr = (color_seed++) & vm_color_mask;
619 vm_map_lock_init(result);
620 mutex_init(&result->s_lock, 0);
621
622 return(result);
623 }
624
625 /*
626 * vm_map_entry_create: [ internal use only ]
627 *
628 * Allocates a VM map entry for insertion in the
629 * given map (or map copy). No fields are filled.
630 */
631 #define vm_map_entry_create(map) \
632 _vm_map_entry_create(&(map)->hdr)
633
634 #define vm_map_copy_entry_create(copy) \
635 _vm_map_entry_create(&(copy)->cpy_hdr)
636
637 static vm_map_entry_t
638 _vm_map_entry_create(
639 register struct vm_map_header *map_header)
640 {
641 register zone_t zone;
642 register vm_map_entry_t entry;
643
644 if (map_header->entries_pageable)
645 zone = vm_map_entry_zone;
646 else
647 zone = vm_map_kentry_zone;
648
649 entry = (vm_map_entry_t) zalloc(zone);
650 if (entry == VM_MAP_ENTRY_NULL)
651 panic("vm_map_entry_create");
652
653 return(entry);
654 }
655
656 /*
657 * vm_map_entry_dispose: [ internal use only ]
658 *
659 * Inverse of vm_map_entry_create.
660 *
661 * write map lock held so no need to
662 * do anything special to insure correctness
663 * of the stores
664 */
665 #define vm_map_entry_dispose(map, entry) \
666 MACRO_BEGIN \
667 if((entry) == (map)->first_free) \
668 (map)->first_free = vm_map_to_entry(map); \
669 if((entry) == (map)->hint) \
670 (map)->hint = vm_map_to_entry(map); \
671 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
672 MACRO_END
673
674 #define vm_map_copy_entry_dispose(map, entry) \
675 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
676
677 static void
678 _vm_map_entry_dispose(
679 register struct vm_map_header *map_header,
680 register vm_map_entry_t entry)
681 {
682 register zone_t zone;
683
684 if (map_header->entries_pageable)
685 zone = vm_map_entry_zone;
686 else
687 zone = vm_map_kentry_zone;
688
689 zfree(zone, entry);
690 }
691
692 #if MACH_ASSERT
693 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
694 static boolean_t first_free_check = FALSE;
695 static boolean_t
696 first_free_is_valid(
697 vm_map_t map)
698 {
699 vm_map_entry_t entry, next;
700
701 if (!first_free_check)
702 return TRUE;
703
704 entry = vm_map_to_entry(map);
705 next = entry->vme_next;
706 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
707 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
708 next != vm_map_to_entry(map))) {
709 entry = next;
710 next = entry->vme_next;
711 if (entry == vm_map_to_entry(map))
712 break;
713 }
714 if (map->first_free != entry) {
715 printf("Bad first_free for map %p: %p should be %p\n",
716 map, map->first_free, entry);
717 return FALSE;
718 }
719 return TRUE;
720 }
721 #endif /* MACH_ASSERT */
722
723 /*
724 * UPDATE_FIRST_FREE:
725 *
726 * Updates the map->first_free pointer to the
727 * entry immediately before the first hole in the map.
728 * The map should be locked.
729 */
730 #define UPDATE_FIRST_FREE(map, new_first_free) \
731 MACRO_BEGIN \
732 vm_map_t UFF_map; \
733 vm_map_entry_t UFF_first_free; \
734 vm_map_entry_t UFF_next_entry; \
735 UFF_map = (map); \
736 UFF_first_free = (new_first_free); \
737 UFF_next_entry = UFF_first_free->vme_next; \
738 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
739 vm_map_trunc_page(UFF_first_free->vme_end) || \
740 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
741 vm_map_trunc_page(UFF_first_free->vme_start) && \
742 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
743 UFF_first_free = UFF_next_entry; \
744 UFF_next_entry = UFF_first_free->vme_next; \
745 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
746 break; \
747 } \
748 UFF_map->first_free = UFF_first_free; \
749 assert(first_free_is_valid(UFF_map)); \
750 MACRO_END
751
752 /*
753 * vm_map_entry_{un,}link:
754 *
755 * Insert/remove entries from maps (or map copies).
756 */
757 #define vm_map_entry_link(map, after_where, entry) \
758 MACRO_BEGIN \
759 vm_map_t VMEL_map; \
760 vm_map_entry_t VMEL_entry; \
761 VMEL_map = (map); \
762 VMEL_entry = (entry); \
763 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
764 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
765 MACRO_END
766
767
768 #define vm_map_copy_entry_link(copy, after_where, entry) \
769 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
770
771 #define _vm_map_entry_link(hdr, after_where, entry) \
772 MACRO_BEGIN \
773 (hdr)->nentries++; \
774 (entry)->vme_prev = (after_where); \
775 (entry)->vme_next = (after_where)->vme_next; \
776 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
777 MACRO_END
778
779 #define vm_map_entry_unlink(map, entry) \
780 MACRO_BEGIN \
781 vm_map_t VMEU_map; \
782 vm_map_entry_t VMEU_entry; \
783 vm_map_entry_t VMEU_first_free; \
784 VMEU_map = (map); \
785 VMEU_entry = (entry); \
786 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
787 VMEU_first_free = VMEU_entry->vme_prev; \
788 else \
789 VMEU_first_free = VMEU_map->first_free; \
790 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
791 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
792 MACRO_END
793
794 #define vm_map_copy_entry_unlink(copy, entry) \
795 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
796
797 #define _vm_map_entry_unlink(hdr, entry) \
798 MACRO_BEGIN \
799 (hdr)->nentries--; \
800 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
801 (entry)->vme_prev->vme_next = (entry)->vme_next; \
802 MACRO_END
803
804 #if MACH_ASSERT && TASK_SWAPPER
805 /*
806 * vm_map_res_reference:
807 *
808 * Adds another valid residence count to the given map.
809 *
810 * Map is locked so this function can be called from
811 * vm_map_swapin.
812 *
813 */
814 void vm_map_res_reference(register vm_map_t map)
815 {
816 /* assert map is locked */
817 assert(map->res_count >= 0);
818 assert(map->ref_count >= map->res_count);
819 if (map->res_count == 0) {
820 mutex_unlock(&map->s_lock);
821 vm_map_lock(map);
822 vm_map_swapin(map);
823 mutex_lock(&map->s_lock);
824 ++map->res_count;
825 vm_map_unlock(map);
826 } else
827 ++map->res_count;
828 }
829
830 /*
831 * vm_map_reference_swap:
832 *
833 * Adds valid reference and residence counts to the given map.
834 *
835 * The map may not be in memory (i.e. zero residence count).
836 *
837 */
838 void vm_map_reference_swap(register vm_map_t map)
839 {
840 assert(map != VM_MAP_NULL);
841 mutex_lock(&map->s_lock);
842 assert(map->res_count >= 0);
843 assert(map->ref_count >= map->res_count);
844 map->ref_count++;
845 vm_map_res_reference(map);
846 mutex_unlock(&map->s_lock);
847 }
848
849 /*
850 * vm_map_res_deallocate:
851 *
852 * Decrement residence count on a map; possibly causing swapout.
853 *
854 * The map must be in memory (i.e. non-zero residence count).
855 *
856 * The map is locked, so this function is callable from vm_map_deallocate.
857 *
858 */
859 void vm_map_res_deallocate(register vm_map_t map)
860 {
861 assert(map->res_count > 0);
862 if (--map->res_count == 0) {
863 mutex_unlock(&map->s_lock);
864 vm_map_lock(map);
865 vm_map_swapout(map);
866 vm_map_unlock(map);
867 mutex_lock(&map->s_lock);
868 }
869 assert(map->ref_count >= map->res_count);
870 }
871 #endif /* MACH_ASSERT && TASK_SWAPPER */
872
873 /*
874 * vm_map_destroy:
875 *
876 * Actually destroy a map.
877 */
878 void
879 vm_map_destroy(
880 vm_map_t map,
881 int flags)
882 {
883 vm_map_lock(map);
884
885 /* clean up regular map entries */
886 (void) vm_map_delete(map, map->min_offset, map->max_offset,
887 flags, VM_MAP_NULL);
888 /* clean up leftover special mappings (commpage, etc...) */
889 #ifdef __ppc__
890 /*
891 * PPC51: ppc64 is limited to 51-bit addresses.
892 * Memory beyond this 51-bit limit is mapped specially at the
893 * pmap level, so do not interfere.
894 * On PPC64, the commpage is mapped beyond the addressable range
895 * via a special pmap hack, so ask pmap to clean it explicitly...
896 */
897 if (map->pmap) {
898 pmap_unmap_sharedpage(map->pmap);
899 }
900 /* ... and do not let regular pmap cleanup apply here */
901 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
902 #endif /* __ppc__ */
903 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
904 flags, VM_MAP_NULL);
905 vm_map_unlock(map);
906
907 assert(map->hdr.nentries == 0);
908
909 if(map->pmap)
910 pmap_destroy(map->pmap);
911
912 zfree(vm_map_zone, map);
913 }
914
915 #if TASK_SWAPPER
916 /*
917 * vm_map_swapin/vm_map_swapout
918 *
919 * Swap a map in and out, either referencing or releasing its resources.
920 * These functions are internal use only; however, they must be exported
921 * because they may be called from macros, which are exported.
922 *
923 * In the case of swapout, there could be races on the residence count,
924 * so if the residence count is up, we return, assuming that a
925 * vm_map_deallocate() call in the near future will bring us back.
926 *
927 * Locking:
928 * -- We use the map write lock for synchronization among races.
929 * -- The map write lock, and not the simple s_lock, protects the
930 * swap state of the map.
931 * -- If a map entry is a share map, then we hold both locks, in
932 * hierarchical order.
933 *
934 * Synchronization Notes:
935 * 1) If a vm_map_swapin() call happens while swapout in progress, it
936 * will block on the map lock and proceed when swapout is through.
937 * 2) A vm_map_reference() call at this time is illegal, and will
938 * cause a panic. vm_map_reference() is only allowed on resident
939 * maps, since it refuses to block.
940 * 3) A vm_map_swapin() call during a swapin will block, and
941 * proceeed when the first swapin is done, turning into a nop.
942 * This is the reason the res_count is not incremented until
943 * after the swapin is complete.
944 * 4) There is a timing hole after the checks of the res_count, before
945 * the map lock is taken, during which a swapin may get the lock
946 * before a swapout about to happen. If this happens, the swapin
947 * will detect the state and increment the reference count, causing
948 * the swapout to be a nop, thereby delaying it until a later
949 * vm_map_deallocate. If the swapout gets the lock first, then
950 * the swapin will simply block until the swapout is done, and
951 * then proceed.
952 *
953 * Because vm_map_swapin() is potentially an expensive operation, it
954 * should be used with caution.
955 *
956 * Invariants:
957 * 1) A map with a residence count of zero is either swapped, or
958 * being swapped.
959 * 2) A map with a non-zero residence count is either resident,
960 * or being swapped in.
961 */
962
963 int vm_map_swap_enable = 1;
964
965 void vm_map_swapin (vm_map_t map)
966 {
967 register vm_map_entry_t entry;
968
969 if (!vm_map_swap_enable) /* debug */
970 return;
971
972 /*
973 * Map is locked
974 * First deal with various races.
975 */
976 if (map->sw_state == MAP_SW_IN)
977 /*
978 * we raced with swapout and won. Returning will incr.
979 * the res_count, turning the swapout into a nop.
980 */
981 return;
982
983 /*
984 * The residence count must be zero. If we raced with another
985 * swapin, the state would have been IN; if we raced with a
986 * swapout (after another competing swapin), we must have lost
987 * the race to get here (see above comment), in which case
988 * res_count is still 0.
989 */
990 assert(map->res_count == 0);
991
992 /*
993 * There are no intermediate states of a map going out or
994 * coming in, since the map is locked during the transition.
995 */
996 assert(map->sw_state == MAP_SW_OUT);
997
998 /*
999 * We now operate upon each map entry. If the entry is a sub-
1000 * or share-map, we call vm_map_res_reference upon it.
1001 * If the entry is an object, we call vm_object_res_reference
1002 * (this may iterate through the shadow chain).
1003 * Note that we hold the map locked the entire time,
1004 * even if we get back here via a recursive call in
1005 * vm_map_res_reference.
1006 */
1007 entry = vm_map_first_entry(map);
1008
1009 while (entry != vm_map_to_entry(map)) {
1010 if (entry->object.vm_object != VM_OBJECT_NULL) {
1011 if (entry->is_sub_map) {
1012 vm_map_t lmap = entry->object.sub_map;
1013 mutex_lock(&lmap->s_lock);
1014 vm_map_res_reference(lmap);
1015 mutex_unlock(&lmap->s_lock);
1016 } else {
1017 vm_object_t object = entry->object.vm_object;
1018 vm_object_lock(object);
1019 /*
1020 * This call may iterate through the
1021 * shadow chain.
1022 */
1023 vm_object_res_reference(object);
1024 vm_object_unlock(object);
1025 }
1026 }
1027 entry = entry->vme_next;
1028 }
1029 assert(map->sw_state == MAP_SW_OUT);
1030 map->sw_state = MAP_SW_IN;
1031 }
1032
1033 void vm_map_swapout(vm_map_t map)
1034 {
1035 register vm_map_entry_t entry;
1036
1037 /*
1038 * Map is locked
1039 * First deal with various races.
1040 * If we raced with a swapin and lost, the residence count
1041 * will have been incremented to 1, and we simply return.
1042 */
1043 mutex_lock(&map->s_lock);
1044 if (map->res_count != 0) {
1045 mutex_unlock(&map->s_lock);
1046 return;
1047 }
1048 mutex_unlock(&map->s_lock);
1049
1050 /*
1051 * There are no intermediate states of a map going out or
1052 * coming in, since the map is locked during the transition.
1053 */
1054 assert(map->sw_state == MAP_SW_IN);
1055
1056 if (!vm_map_swap_enable)
1057 return;
1058
1059 /*
1060 * We now operate upon each map entry. If the entry is a sub-
1061 * or share-map, we call vm_map_res_deallocate upon it.
1062 * If the entry is an object, we call vm_object_res_deallocate
1063 * (this may iterate through the shadow chain).
1064 * Note that we hold the map locked the entire time,
1065 * even if we get back here via a recursive call in
1066 * vm_map_res_deallocate.
1067 */
1068 entry = vm_map_first_entry(map);
1069
1070 while (entry != vm_map_to_entry(map)) {
1071 if (entry->object.vm_object != VM_OBJECT_NULL) {
1072 if (entry->is_sub_map) {
1073 vm_map_t lmap = entry->object.sub_map;
1074 mutex_lock(&lmap->s_lock);
1075 vm_map_res_deallocate(lmap);
1076 mutex_unlock(&lmap->s_lock);
1077 } else {
1078 vm_object_t object = entry->object.vm_object;
1079 vm_object_lock(object);
1080 /*
1081 * This call may take a long time,
1082 * since it could actively push
1083 * out pages (if we implement it
1084 * that way).
1085 */
1086 vm_object_res_deallocate(object);
1087 vm_object_unlock(object);
1088 }
1089 }
1090 entry = entry->vme_next;
1091 }
1092 assert(map->sw_state == MAP_SW_IN);
1093 map->sw_state = MAP_SW_OUT;
1094 }
1095
1096 #endif /* TASK_SWAPPER */
1097
1098
1099 /*
1100 * SAVE_HINT_MAP_READ:
1101 *
1102 * Saves the specified entry as the hint for
1103 * future lookups. only a read lock is held on map,
1104 * so make sure the store is atomic... OSCompareAndSwap
1105 * guarantees this... also, we don't care if we collide
1106 * and someone else wins and stores their 'hint'
1107 */
1108 #define SAVE_HINT_MAP_READ(map,value) \
1109 MACRO_BEGIN \
1110 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1111 MACRO_END
1112
1113
1114 /*
1115 * SAVE_HINT_MAP_WRITE:
1116 *
1117 * Saves the specified entry as the hint for
1118 * future lookups. write lock held on map,
1119 * so no one else can be writing or looking
1120 * until the lock is dropped, so it's safe
1121 * to just do an assignment
1122 */
1123 #define SAVE_HINT_MAP_WRITE(map,value) \
1124 MACRO_BEGIN \
1125 (map)->hint = (value); \
1126 MACRO_END
1127
1128 /*
1129 * vm_map_lookup_entry: [ internal use only ]
1130 *
1131 * Finds the map entry containing (or
1132 * immediately preceding) the specified address
1133 * in the given map; the entry is returned
1134 * in the "entry" parameter. The boolean
1135 * result indicates whether the address is
1136 * actually contained in the map.
1137 */
1138 boolean_t
1139 vm_map_lookup_entry(
1140 register vm_map_t map,
1141 register vm_map_offset_t address,
1142 vm_map_entry_t *entry) /* OUT */
1143 {
1144 register vm_map_entry_t cur;
1145 register vm_map_entry_t last;
1146
1147 /*
1148 * Start looking either from the head of the
1149 * list, or from the hint.
1150 */
1151 cur = map->hint;
1152
1153 if (cur == vm_map_to_entry(map))
1154 cur = cur->vme_next;
1155
1156 if (address >= cur->vme_start) {
1157 /*
1158 * Go from hint to end of list.
1159 *
1160 * But first, make a quick check to see if
1161 * we are already looking at the entry we
1162 * want (which is usually the case).
1163 * Note also that we don't need to save the hint
1164 * here... it is the same hint (unless we are
1165 * at the header, in which case the hint didn't
1166 * buy us anything anyway).
1167 */
1168 last = vm_map_to_entry(map);
1169 if ((cur != last) && (cur->vme_end > address)) {
1170 *entry = cur;
1171 return(TRUE);
1172 }
1173 }
1174 else {
1175 /*
1176 * Go from start to hint, *inclusively*
1177 */
1178 last = cur->vme_next;
1179 cur = vm_map_first_entry(map);
1180 }
1181
1182 /*
1183 * Search linearly
1184 */
1185
1186 while (cur != last) {
1187 if (cur->vme_end > address) {
1188 if (address >= cur->vme_start) {
1189 /*
1190 * Save this lookup for future
1191 * hints, and return
1192 */
1193
1194 *entry = cur;
1195 SAVE_HINT_MAP_READ(map, cur);
1196
1197 return(TRUE);
1198 }
1199 break;
1200 }
1201 cur = cur->vme_next;
1202 }
1203 *entry = cur->vme_prev;
1204 SAVE_HINT_MAP_READ(map, *entry);
1205
1206 return(FALSE);
1207 }
1208
1209 /*
1210 * Routine: vm_map_find_space
1211 * Purpose:
1212 * Allocate a range in the specified virtual address map,
1213 * returning the entry allocated for that range.
1214 * Used by kmem_alloc, etc.
1215 *
1216 * The map must be NOT be locked. It will be returned locked
1217 * on KERN_SUCCESS, unlocked on failure.
1218 *
1219 * If an entry is allocated, the object/offset fields
1220 * are initialized to zero.
1221 */
1222 kern_return_t
1223 vm_map_find_space(
1224 register vm_map_t map,
1225 vm_map_offset_t *address, /* OUT */
1226 vm_map_size_t size,
1227 vm_map_offset_t mask,
1228 int flags,
1229 vm_map_entry_t *o_entry) /* OUT */
1230 {
1231 register vm_map_entry_t entry, new_entry;
1232 register vm_map_offset_t start;
1233 register vm_map_offset_t end;
1234
1235 if (size == 0) {
1236 *address = 0;
1237 return KERN_INVALID_ARGUMENT;
1238 }
1239
1240 if (flags & VM_FLAGS_GUARD_AFTER) {
1241 /* account for the back guard page in the size */
1242 size += PAGE_SIZE_64;
1243 }
1244
1245 new_entry = vm_map_entry_create(map);
1246
1247 /*
1248 * Look for the first possible address; if there's already
1249 * something at this address, we have to start after it.
1250 */
1251
1252 vm_map_lock(map);
1253
1254 assert(first_free_is_valid(map));
1255 if ((entry = map->first_free) == vm_map_to_entry(map))
1256 start = map->min_offset;
1257 else
1258 start = entry->vme_end;
1259
1260 /*
1261 * In any case, the "entry" always precedes
1262 * the proposed new region throughout the loop:
1263 */
1264
1265 while (TRUE) {
1266 register vm_map_entry_t next;
1267
1268 /*
1269 * Find the end of the proposed new region.
1270 * Be sure we didn't go beyond the end, or
1271 * wrap around the address.
1272 */
1273
1274 if (flags & VM_FLAGS_GUARD_BEFORE) {
1275 /* reserve space for the front guard page */
1276 start += PAGE_SIZE_64;
1277 }
1278 end = ((start + mask) & ~mask);
1279
1280 if (end < start) {
1281 vm_map_entry_dispose(map, new_entry);
1282 vm_map_unlock(map);
1283 return(KERN_NO_SPACE);
1284 }
1285 start = end;
1286 end += size;
1287
1288 if ((end > map->max_offset) || (end < start)) {
1289 vm_map_entry_dispose(map, new_entry);
1290 vm_map_unlock(map);
1291 return(KERN_NO_SPACE);
1292 }
1293
1294 /*
1295 * If there are no more entries, we must win.
1296 */
1297
1298 next = entry->vme_next;
1299 if (next == vm_map_to_entry(map))
1300 break;
1301
1302 /*
1303 * If there is another entry, it must be
1304 * after the end of the potential new region.
1305 */
1306
1307 if (next->vme_start >= end)
1308 break;
1309
1310 /*
1311 * Didn't fit -- move to the next entry.
1312 */
1313
1314 entry = next;
1315 start = entry->vme_end;
1316 }
1317
1318 /*
1319 * At this point,
1320 * "start" and "end" should define the endpoints of the
1321 * available new range, and
1322 * "entry" should refer to the region before the new
1323 * range, and
1324 *
1325 * the map should be locked.
1326 */
1327
1328 if (flags & VM_FLAGS_GUARD_BEFORE) {
1329 /* go back for the front guard page */
1330 start -= PAGE_SIZE_64;
1331 }
1332 *address = start;
1333
1334 new_entry->vme_start = start;
1335 new_entry->vme_end = end;
1336 assert(page_aligned(new_entry->vme_start));
1337 assert(page_aligned(new_entry->vme_end));
1338
1339 new_entry->is_shared = FALSE;
1340 new_entry->is_sub_map = FALSE;
1341 new_entry->use_pmap = FALSE;
1342 new_entry->object.vm_object = VM_OBJECT_NULL;
1343 new_entry->offset = (vm_object_offset_t) 0;
1344
1345 new_entry->needs_copy = FALSE;
1346
1347 new_entry->inheritance = VM_INHERIT_DEFAULT;
1348 new_entry->protection = VM_PROT_DEFAULT;
1349 new_entry->max_protection = VM_PROT_ALL;
1350 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1351 new_entry->wired_count = 0;
1352 new_entry->user_wired_count = 0;
1353
1354 new_entry->in_transition = FALSE;
1355 new_entry->needs_wakeup = FALSE;
1356 new_entry->no_cache = FALSE;
1357
1358 new_entry->alias = 0;
1359
1360 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1361
1362 /*
1363 * Insert the new entry into the list
1364 */
1365
1366 vm_map_entry_link(map, entry, new_entry);
1367
1368 map->size += size;
1369
1370 /*
1371 * Update the lookup hint
1372 */
1373 SAVE_HINT_MAP_WRITE(map, new_entry);
1374
1375 *o_entry = new_entry;
1376 return(KERN_SUCCESS);
1377 }
1378
1379 int vm_map_pmap_enter_print = FALSE;
1380 int vm_map_pmap_enter_enable = FALSE;
1381
1382 /*
1383 * Routine: vm_map_pmap_enter [internal only]
1384 *
1385 * Description:
1386 * Force pages from the specified object to be entered into
1387 * the pmap at the specified address if they are present.
1388 * As soon as a page not found in the object the scan ends.
1389 *
1390 * Returns:
1391 * Nothing.
1392 *
1393 * In/out conditions:
1394 * The source map should not be locked on entry.
1395 */
1396 static void
1397 vm_map_pmap_enter(
1398 vm_map_t map,
1399 register vm_map_offset_t addr,
1400 register vm_map_offset_t end_addr,
1401 register vm_object_t object,
1402 vm_object_offset_t offset,
1403 vm_prot_t protection)
1404 {
1405 int type_of_fault;
1406 kern_return_t kr;
1407
1408 if(map->pmap == 0)
1409 return;
1410
1411 while (addr < end_addr) {
1412 register vm_page_t m;
1413
1414 vm_object_lock(object);
1415
1416 m = vm_page_lookup(object, offset);
1417 /*
1418 * ENCRYPTED SWAP:
1419 * The user should never see encrypted data, so do not
1420 * enter an encrypted page in the page table.
1421 */
1422 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1423 m->fictitious ||
1424 (m->unusual && ( m->error || m->restart || m->absent))) {
1425 vm_object_unlock(object);
1426 return;
1427 }
1428
1429 if (vm_map_pmap_enter_print) {
1430 printf("vm_map_pmap_enter:");
1431 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1432 map, (unsigned long long)addr, object, (unsigned long long)offset);
1433 }
1434 type_of_fault = DBG_CACHE_HIT_FAULT;
1435 kr = vm_fault_enter(m, map->pmap, addr, protection,
1436 m->wire_count != 0, FALSE, FALSE,
1437 &type_of_fault);
1438
1439 vm_object_unlock(object);
1440
1441 offset += PAGE_SIZE_64;
1442 addr += PAGE_SIZE;
1443 }
1444 }
1445
1446 boolean_t vm_map_pmap_is_empty(
1447 vm_map_t map,
1448 vm_map_offset_t start,
1449 vm_map_offset_t end);
1450 boolean_t vm_map_pmap_is_empty(
1451 vm_map_t map,
1452 vm_map_offset_t start,
1453 vm_map_offset_t end)
1454 {
1455 #ifdef MACHINE_PMAP_IS_EMPTY
1456 return pmap_is_empty(map->pmap, start, end);
1457 #else /* MACHINE_PMAP_IS_EMPTY */
1458 vm_map_offset_t offset;
1459 ppnum_t phys_page;
1460
1461 if (map->pmap == NULL) {
1462 return TRUE;
1463 }
1464
1465 for (offset = start;
1466 offset < end;
1467 offset += PAGE_SIZE) {
1468 phys_page = pmap_find_phys(map->pmap, offset);
1469 if (phys_page) {
1470 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1471 "page %d at 0x%llx\n",
1472 map, (long long)start, (long long)end,
1473 phys_page, (long long)offset);
1474 return FALSE;
1475 }
1476 }
1477 return TRUE;
1478 #endif /* MACHINE_PMAP_IS_EMPTY */
1479 }
1480
1481 /*
1482 * Routine: vm_map_enter
1483 *
1484 * Description:
1485 * Allocate a range in the specified virtual address map.
1486 * The resulting range will refer to memory defined by
1487 * the given memory object and offset into that object.
1488 *
1489 * Arguments are as defined in the vm_map call.
1490 */
1491 int _map_enter_debug = 0;
1492 static unsigned int vm_map_enter_restore_successes = 0;
1493 static unsigned int vm_map_enter_restore_failures = 0;
1494 kern_return_t
1495 vm_map_enter(
1496 vm_map_t map,
1497 vm_map_offset_t *address, /* IN/OUT */
1498 vm_map_size_t size,
1499 vm_map_offset_t mask,
1500 int flags,
1501 vm_object_t object,
1502 vm_object_offset_t offset,
1503 boolean_t needs_copy,
1504 vm_prot_t cur_protection,
1505 vm_prot_t max_protection,
1506 vm_inherit_t inheritance)
1507 {
1508 vm_map_entry_t entry, new_entry;
1509 vm_map_offset_t start, tmp_start, tmp_offset;
1510 vm_map_offset_t end, tmp_end;
1511 kern_return_t result = KERN_SUCCESS;
1512 vm_map_t zap_old_map = VM_MAP_NULL;
1513 vm_map_t zap_new_map = VM_MAP_NULL;
1514 boolean_t map_locked = FALSE;
1515 boolean_t pmap_empty = TRUE;
1516 boolean_t new_mapping_established = FALSE;
1517 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1518 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1519 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1520 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1521 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1522 char alias;
1523 vm_map_offset_t effective_min_offset, effective_max_offset;
1524
1525 if (is_submap) {
1526 if (purgable) {
1527 /* submaps can not be purgeable */
1528 return KERN_INVALID_ARGUMENT;
1529 }
1530 if (object == VM_OBJECT_NULL) {
1531 /* submaps can not be created lazily */
1532 return KERN_INVALID_ARGUMENT;
1533 }
1534 }
1535 if (flags & VM_FLAGS_ALREADY) {
1536 /*
1537 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1538 * is already present. For it to be meaningul, the requested
1539 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1540 * we shouldn't try and remove what was mapped there first
1541 * (!VM_FLAGS_OVERWRITE).
1542 */
1543 if ((flags & VM_FLAGS_ANYWHERE) ||
1544 (flags & VM_FLAGS_OVERWRITE)) {
1545 return KERN_INVALID_ARGUMENT;
1546 }
1547 }
1548
1549 effective_min_offset = map->min_offset;
1550 if (flags & VM_FLAGS_BEYOND_MAX) {
1551 /*
1552 * Allow an insertion beyond the map's official top boundary.
1553 */
1554 if (vm_map_is_64bit(map))
1555 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1556 else
1557 effective_max_offset = 0x00000000FFFFF000ULL;
1558 } else {
1559 effective_max_offset = map->max_offset;
1560 }
1561
1562 if (size == 0 ||
1563 (offset & PAGE_MASK_64) != 0) {
1564 *address = 0;
1565 return KERN_INVALID_ARGUMENT;
1566 }
1567
1568 VM_GET_FLAGS_ALIAS(flags, alias);
1569
1570 #define RETURN(value) { result = value; goto BailOut; }
1571
1572 assert(page_aligned(*address));
1573 assert(page_aligned(size));
1574
1575 /*
1576 * Only zero-fill objects are allowed to be purgable.
1577 * LP64todo - limit purgable objects to 32-bits for now
1578 */
1579 if (purgable &&
1580 (offset != 0 ||
1581 (object != VM_OBJECT_NULL &&
1582 (object->size != size ||
1583 object->purgable == VM_PURGABLE_DENY))
1584 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1585 return KERN_INVALID_ARGUMENT;
1586
1587 if (!anywhere && overwrite) {
1588 /*
1589 * Create a temporary VM map to hold the old mappings in the
1590 * affected area while we create the new one.
1591 * This avoids releasing the VM map lock in
1592 * vm_map_entry_delete() and allows atomicity
1593 * when we want to replace some mappings with a new one.
1594 * It also allows us to restore the old VM mappings if the
1595 * new mapping fails.
1596 */
1597 zap_old_map = vm_map_create(PMAP_NULL,
1598 *address,
1599 *address + size,
1600 TRUE);
1601 }
1602
1603 StartAgain: ;
1604
1605 start = *address;
1606
1607 if (anywhere) {
1608 vm_map_lock(map);
1609 map_locked = TRUE;
1610
1611 /*
1612 * Calculate the first possible address.
1613 */
1614
1615 if (start < effective_min_offset)
1616 start = effective_min_offset;
1617 if (start > effective_max_offset)
1618 RETURN(KERN_NO_SPACE);
1619
1620 /*
1621 * Look for the first possible address;
1622 * if there's already something at this
1623 * address, we have to start after it.
1624 */
1625
1626 assert(first_free_is_valid(map));
1627 if (start == effective_min_offset) {
1628 if ((entry = map->first_free) != vm_map_to_entry(map))
1629 start = entry->vme_end;
1630 } else {
1631 vm_map_entry_t tmp_entry;
1632 if (vm_map_lookup_entry(map, start, &tmp_entry))
1633 start = tmp_entry->vme_end;
1634 entry = tmp_entry;
1635 }
1636
1637 /*
1638 * In any case, the "entry" always precedes
1639 * the proposed new region throughout the
1640 * loop:
1641 */
1642
1643 while (TRUE) {
1644 register vm_map_entry_t next;
1645
1646 /*
1647 * Find the end of the proposed new region.
1648 * Be sure we didn't go beyond the end, or
1649 * wrap around the address.
1650 */
1651
1652 end = ((start + mask) & ~mask);
1653 if (end < start)
1654 RETURN(KERN_NO_SPACE);
1655 start = end;
1656 end += size;
1657
1658 if ((end > effective_max_offset) || (end < start)) {
1659 if (map->wait_for_space) {
1660 if (size <= (effective_max_offset -
1661 effective_min_offset)) {
1662 assert_wait((event_t)map,
1663 THREAD_ABORTSAFE);
1664 vm_map_unlock(map);
1665 map_locked = FALSE;
1666 thread_block(THREAD_CONTINUE_NULL);
1667 goto StartAgain;
1668 }
1669 }
1670 RETURN(KERN_NO_SPACE);
1671 }
1672
1673 /*
1674 * If there are no more entries, we must win.
1675 */
1676
1677 next = entry->vme_next;
1678 if (next == vm_map_to_entry(map))
1679 break;
1680
1681 /*
1682 * If there is another entry, it must be
1683 * after the end of the potential new region.
1684 */
1685
1686 if (next->vme_start >= end)
1687 break;
1688
1689 /*
1690 * Didn't fit -- move to the next entry.
1691 */
1692
1693 entry = next;
1694 start = entry->vme_end;
1695 }
1696 *address = start;
1697 } else {
1698 /*
1699 * Verify that:
1700 * the address doesn't itself violate
1701 * the mask requirement.
1702 */
1703
1704 vm_map_lock(map);
1705 map_locked = TRUE;
1706 if ((start & mask) != 0)
1707 RETURN(KERN_NO_SPACE);
1708
1709 /*
1710 * ... the address is within bounds
1711 */
1712
1713 end = start + size;
1714
1715 if ((start < effective_min_offset) ||
1716 (end > effective_max_offset) ||
1717 (start >= end)) {
1718 RETURN(KERN_INVALID_ADDRESS);
1719 }
1720
1721 if (overwrite && zap_old_map != VM_MAP_NULL) {
1722 /*
1723 * Fixed mapping and "overwrite" flag: attempt to
1724 * remove all existing mappings in the specified
1725 * address range, saving them in our "zap_old_map".
1726 */
1727 (void) vm_map_delete(map, start, end,
1728 VM_MAP_REMOVE_SAVE_ENTRIES,
1729 zap_old_map);
1730 }
1731
1732 /*
1733 * ... the starting address isn't allocated
1734 */
1735
1736 if (vm_map_lookup_entry(map, start, &entry)) {
1737 if (! (flags & VM_FLAGS_ALREADY)) {
1738 RETURN(KERN_NO_SPACE);
1739 }
1740 /*
1741 * Check if what's already there is what we want.
1742 */
1743 tmp_start = start;
1744 tmp_offset = offset;
1745 if (entry->vme_start < start) {
1746 tmp_start -= start - entry->vme_start;
1747 tmp_offset -= start - entry->vme_start;
1748
1749 }
1750 for (; entry->vme_start < end;
1751 entry = entry->vme_next) {
1752 /*
1753 * Check if the mapping's attributes
1754 * match the existing map entry.
1755 */
1756 if (entry == vm_map_to_entry(map) ||
1757 entry->vme_start != tmp_start ||
1758 entry->is_sub_map != is_submap ||
1759 entry->offset != tmp_offset ||
1760 entry->needs_copy != needs_copy ||
1761 entry->protection != cur_protection ||
1762 entry->max_protection != max_protection ||
1763 entry->inheritance != inheritance ||
1764 entry->alias != alias) {
1765 /* not the same mapping ! */
1766 RETURN(KERN_NO_SPACE);
1767 }
1768 /*
1769 * Check if the same object is being mapped.
1770 */
1771 if (is_submap) {
1772 if (entry->object.sub_map !=
1773 (vm_map_t) object) {
1774 /* not the same submap */
1775 RETURN(KERN_NO_SPACE);
1776 }
1777 } else {
1778 if (entry->object.vm_object != object) {
1779 /* not the same VM object... */
1780 vm_object_t obj2;
1781
1782 obj2 = entry->object.vm_object;
1783 if ((obj2 == VM_OBJECT_NULL ||
1784 obj2->internal) &&
1785 (object == VM_OBJECT_NULL ||
1786 object->internal)) {
1787 /*
1788 * ... but both are
1789 * anonymous memory,
1790 * so equivalent.
1791 */
1792 } else {
1793 RETURN(KERN_NO_SPACE);
1794 }
1795 }
1796 }
1797
1798 tmp_offset += entry->vme_end - entry->vme_start;
1799 tmp_start += entry->vme_end - entry->vme_start;
1800 if (entry->vme_end >= end) {
1801 /* reached the end of our mapping */
1802 break;
1803 }
1804 }
1805 /* it all matches: let's use what's already there ! */
1806 RETURN(KERN_MEMORY_PRESENT);
1807 }
1808
1809 /*
1810 * ... the next region doesn't overlap the
1811 * end point.
1812 */
1813
1814 if ((entry->vme_next != vm_map_to_entry(map)) &&
1815 (entry->vme_next->vme_start < end))
1816 RETURN(KERN_NO_SPACE);
1817 }
1818
1819 /*
1820 * At this point,
1821 * "start" and "end" should define the endpoints of the
1822 * available new range, and
1823 * "entry" should refer to the region before the new
1824 * range, and
1825 *
1826 * the map should be locked.
1827 */
1828
1829 /*
1830 * See whether we can avoid creating a new entry (and object) by
1831 * extending one of our neighbors. [So far, we only attempt to
1832 * extend from below.] Note that we can never extend/join
1833 * purgable objects because they need to remain distinct
1834 * entities in order to implement their "volatile object"
1835 * semantics.
1836 */
1837
1838 if (purgable) {
1839 if (object == VM_OBJECT_NULL) {
1840 object = vm_object_allocate(size);
1841 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1842 object->purgable = VM_PURGABLE_NONVOLATILE;
1843 offset = (vm_object_offset_t)0;
1844 }
1845 } else if ((is_submap == FALSE) &&
1846 (object == VM_OBJECT_NULL) &&
1847 (entry != vm_map_to_entry(map)) &&
1848 (entry->vme_end == start) &&
1849 (!entry->is_shared) &&
1850 (!entry->is_sub_map) &&
1851 (entry->alias == alias) &&
1852 (entry->inheritance == inheritance) &&
1853 (entry->protection == cur_protection) &&
1854 (entry->max_protection == max_protection) &&
1855 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1856 (entry->in_transition == 0) &&
1857 (entry->no_cache == no_cache) &&
1858 ((alias == VM_MEMORY_REALLOC) ||
1859 ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1860 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1861 if (vm_object_coalesce(entry->object.vm_object,
1862 VM_OBJECT_NULL,
1863 entry->offset,
1864 (vm_object_offset_t) 0,
1865 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1866 (vm_map_size_t)(end - entry->vme_end))) {
1867
1868 /*
1869 * Coalesced the two objects - can extend
1870 * the previous map entry to include the
1871 * new range.
1872 */
1873 map->size += (end - entry->vme_end);
1874 entry->vme_end = end;
1875 UPDATE_FIRST_FREE(map, map->first_free);
1876 RETURN(KERN_SUCCESS);
1877 }
1878 }
1879
1880 /*
1881 * Create a new entry
1882 * LP64todo - for now, we can only allocate 4GB internal objects
1883 * because the default pager can't page bigger ones. Remove this
1884 * when it can.
1885 *
1886 * XXX FBDP
1887 * The reserved "page zero" in each process's address space can
1888 * be arbitrarily large. Splitting it into separate 4GB objects and
1889 * therefore different VM map entries serves no purpose and just
1890 * slows down operations on the VM map, so let's not split the
1891 * allocation into 4GB chunks if the max protection is NONE. That
1892 * memory should never be accessible, so it will never get to the
1893 * default pager.
1894 */
1895 tmp_start = start;
1896 if (object == VM_OBJECT_NULL &&
1897 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1898 max_protection != VM_PROT_NONE)
1899 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1900 else
1901 tmp_end = end;
1902 do {
1903 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1904 object, offset, needs_copy,
1905 FALSE, FALSE,
1906 cur_protection, max_protection,
1907 VM_BEHAVIOR_DEFAULT,
1908 inheritance, 0, no_cache);
1909 new_entry->alias = alias;
1910 if (is_submap) {
1911 vm_map_t submap;
1912 boolean_t submap_is_64bit;
1913 boolean_t use_pmap;
1914
1915 new_entry->is_sub_map = TRUE;
1916 submap = (vm_map_t) object;
1917 submap_is_64bit = vm_map_is_64bit(submap);
1918 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1919 #ifndef NO_NESTED_PMAP
1920 if (use_pmap && submap->pmap == NULL) {
1921 /* we need a sub pmap to nest... */
1922 submap->pmap = pmap_create(0, submap_is_64bit);
1923 if (submap->pmap == NULL) {
1924 /* let's proceed without nesting... */
1925 }
1926 }
1927 if (use_pmap && submap->pmap != NULL) {
1928 kern_return_t kr;
1929
1930 kr = pmap_nest(map->pmap,
1931 submap->pmap,
1932 tmp_start,
1933 tmp_start,
1934 tmp_end - tmp_start);
1935 if (kr != KERN_SUCCESS) {
1936 printf("vm_map_enter: "
1937 "pmap_nest(0x%llx,0x%llx) "
1938 "error 0x%x\n",
1939 (long long)tmp_start,
1940 (long long)tmp_end,
1941 kr);
1942 } else {
1943 /* we're now nested ! */
1944 new_entry->use_pmap = TRUE;
1945 pmap_empty = FALSE;
1946 }
1947 }
1948 #endif /* NO_NESTED_PMAP */
1949 }
1950 entry = new_entry;
1951 } while (tmp_end != end &&
1952 (tmp_start = tmp_end) &&
1953 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1954 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1955
1956 vm_map_unlock(map);
1957 map_locked = FALSE;
1958
1959 new_mapping_established = TRUE;
1960
1961 /* Wire down the new entry if the user
1962 * requested all new map entries be wired.
1963 */
1964 if (map->wiring_required) {
1965 pmap_empty = FALSE; /* pmap won't be empty */
1966 result = vm_map_wire(map, start, end,
1967 new_entry->protection, TRUE);
1968 RETURN(result);
1969 }
1970
1971 if ((object != VM_OBJECT_NULL) &&
1972 (vm_map_pmap_enter_enable) &&
1973 (!anywhere) &&
1974 (!needs_copy) &&
1975 (size < (128*1024))) {
1976 pmap_empty = FALSE; /* pmap won't be empty */
1977
1978 if (override_nx(map, alias) && cur_protection)
1979 cur_protection |= VM_PROT_EXECUTE;
1980
1981 vm_map_pmap_enter(map, start, end,
1982 object, offset, cur_protection);
1983 }
1984
1985 BailOut: ;
1986 if (result == KERN_SUCCESS &&
1987 pmap_empty &&
1988 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
1989 assert(vm_map_pmap_is_empty(map, *address, *address+size));
1990 }
1991
1992 if (result != KERN_SUCCESS) {
1993 if (new_mapping_established) {
1994 /*
1995 * We have to get rid of the new mappings since we
1996 * won't make them available to the user.
1997 * Try and do that atomically, to minimize the risk
1998 * that someone else create new mappings that range.
1999 */
2000 zap_new_map = vm_map_create(PMAP_NULL,
2001 *address,
2002 *address + size,
2003 TRUE);
2004 if (!map_locked) {
2005 vm_map_lock(map);
2006 map_locked = TRUE;
2007 }
2008 (void) vm_map_delete(map, *address, *address+size,
2009 VM_MAP_REMOVE_SAVE_ENTRIES,
2010 zap_new_map);
2011 }
2012 if (zap_old_map != VM_MAP_NULL &&
2013 zap_old_map->hdr.nentries != 0) {
2014 vm_map_entry_t entry1, entry2;
2015
2016 /*
2017 * The new mapping failed. Attempt to restore
2018 * the old mappings, saved in the "zap_old_map".
2019 */
2020 if (!map_locked) {
2021 vm_map_lock(map);
2022 map_locked = TRUE;
2023 }
2024
2025 /* first check if the coast is still clear */
2026 start = vm_map_first_entry(zap_old_map)->vme_start;
2027 end = vm_map_last_entry(zap_old_map)->vme_end;
2028 if (vm_map_lookup_entry(map, start, &entry1) ||
2029 vm_map_lookup_entry(map, end, &entry2) ||
2030 entry1 != entry2) {
2031 /*
2032 * Part of that range has already been
2033 * re-mapped: we can't restore the old
2034 * mappings...
2035 */
2036 vm_map_enter_restore_failures++;
2037 } else {
2038 /*
2039 * Transfer the saved map entries from
2040 * "zap_old_map" to the original "map",
2041 * inserting them all after "entry1".
2042 */
2043 for (entry2 = vm_map_first_entry(zap_old_map);
2044 entry2 != vm_map_to_entry(zap_old_map);
2045 entry2 = vm_map_first_entry(zap_old_map)) {
2046 vm_map_size_t entry_size;
2047
2048 entry_size = (entry2->vme_end -
2049 entry2->vme_start);
2050 vm_map_entry_unlink(zap_old_map,
2051 entry2);
2052 zap_old_map->size -= entry_size;
2053 vm_map_entry_link(map, entry1, entry2);
2054 map->size += entry_size;
2055 entry1 = entry2;
2056 }
2057 if (map->wiring_required) {
2058 /*
2059 * XXX TODO: we should rewire the
2060 * old pages here...
2061 */
2062 }
2063 vm_map_enter_restore_successes++;
2064 }
2065 }
2066 }
2067
2068 if (map_locked) {
2069 vm_map_unlock(map);
2070 }
2071
2072 /*
2073 * Get rid of the "zap_maps" and all the map entries that
2074 * they may still contain.
2075 */
2076 if (zap_old_map != VM_MAP_NULL) {
2077 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2078 zap_old_map = VM_MAP_NULL;
2079 }
2080 if (zap_new_map != VM_MAP_NULL) {
2081 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2082 zap_new_map = VM_MAP_NULL;
2083 }
2084
2085 return result;
2086
2087 #undef RETURN
2088 }
2089
2090 kern_return_t
2091 vm_map_enter_mem_object(
2092 vm_map_t target_map,
2093 vm_map_offset_t *address,
2094 vm_map_size_t initial_size,
2095 vm_map_offset_t mask,
2096 int flags,
2097 ipc_port_t port,
2098 vm_object_offset_t offset,
2099 boolean_t copy,
2100 vm_prot_t cur_protection,
2101 vm_prot_t max_protection,
2102 vm_inherit_t inheritance)
2103 {
2104 vm_map_address_t map_addr;
2105 vm_map_size_t map_size;
2106 vm_object_t object;
2107 vm_object_size_t size;
2108 kern_return_t result;
2109
2110 /*
2111 * Check arguments for validity
2112 */
2113 if ((target_map == VM_MAP_NULL) ||
2114 (cur_protection & ~VM_PROT_ALL) ||
2115 (max_protection & ~VM_PROT_ALL) ||
2116 (inheritance > VM_INHERIT_LAST_VALID) ||
2117 initial_size == 0)
2118 return KERN_INVALID_ARGUMENT;
2119
2120 map_addr = vm_map_trunc_page(*address);
2121 map_size = vm_map_round_page(initial_size);
2122 size = vm_object_round_page(initial_size);
2123
2124 /*
2125 * Find the vm object (if any) corresponding to this port.
2126 */
2127 if (!IP_VALID(port)) {
2128 object = VM_OBJECT_NULL;
2129 offset = 0;
2130 copy = FALSE;
2131 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2132 vm_named_entry_t named_entry;
2133
2134 named_entry = (vm_named_entry_t) port->ip_kobject;
2135 /* a few checks to make sure user is obeying rules */
2136 if (size == 0) {
2137 if (offset >= named_entry->size)
2138 return KERN_INVALID_RIGHT;
2139 size = named_entry->size - offset;
2140 }
2141 if ((named_entry->protection & max_protection) !=
2142 max_protection)
2143 return KERN_INVALID_RIGHT;
2144 if ((named_entry->protection & cur_protection) !=
2145 cur_protection)
2146 return KERN_INVALID_RIGHT;
2147 if (named_entry->size < (offset + size))
2148 return KERN_INVALID_ARGUMENT;
2149
2150 /* the callers parameter offset is defined to be the */
2151 /* offset from beginning of named entry offset in object */
2152 offset = offset + named_entry->offset;
2153
2154 named_entry_lock(named_entry);
2155 if (named_entry->is_sub_map) {
2156 vm_map_t submap;
2157
2158 submap = named_entry->backing.map;
2159 vm_map_lock(submap);
2160 vm_map_reference(submap);
2161 vm_map_unlock(submap);
2162 named_entry_unlock(named_entry);
2163
2164 result = vm_map_enter(target_map,
2165 &map_addr,
2166 map_size,
2167 mask,
2168 flags | VM_FLAGS_SUBMAP,
2169 (vm_object_t) submap,
2170 offset,
2171 copy,
2172 cur_protection,
2173 max_protection,
2174 inheritance);
2175 if (result != KERN_SUCCESS) {
2176 vm_map_deallocate(submap);
2177 } else {
2178 /*
2179 * No need to lock "submap" just to check its
2180 * "mapped" flag: that flag is never reset
2181 * once it's been set and if we race, we'll
2182 * just end up setting it twice, which is OK.
2183 */
2184 if (submap->mapped == FALSE) {
2185 /*
2186 * This submap has never been mapped.
2187 * Set its "mapped" flag now that it
2188 * has been mapped.
2189 * This happens only for the first ever
2190 * mapping of a "submap".
2191 */
2192 vm_map_lock(submap);
2193 submap->mapped = TRUE;
2194 vm_map_unlock(submap);
2195 }
2196 *address = map_addr;
2197 }
2198 return result;
2199
2200 } else if (named_entry->is_pager) {
2201 unsigned int access;
2202 vm_prot_t protections;
2203 unsigned int wimg_mode;
2204 boolean_t cache_attr;
2205
2206 protections = named_entry->protection & VM_PROT_ALL;
2207 access = GET_MAP_MEM(named_entry->protection);
2208
2209 object = vm_object_enter(named_entry->backing.pager,
2210 named_entry->size,
2211 named_entry->internal,
2212 FALSE,
2213 FALSE);
2214 if (object == VM_OBJECT_NULL) {
2215 named_entry_unlock(named_entry);
2216 return KERN_INVALID_OBJECT;
2217 }
2218
2219 /* JMM - drop reference on pager here */
2220
2221 /* create an extra ref for the named entry */
2222 vm_object_lock(object);
2223 vm_object_reference_locked(object);
2224 named_entry->backing.object = object;
2225 named_entry->is_pager = FALSE;
2226 named_entry_unlock(named_entry);
2227
2228 wimg_mode = object->wimg_bits;
2229 if (access == MAP_MEM_IO) {
2230 wimg_mode = VM_WIMG_IO;
2231 } else if (access == MAP_MEM_COPYBACK) {
2232 wimg_mode = VM_WIMG_USE_DEFAULT;
2233 } else if (access == MAP_MEM_WTHRU) {
2234 wimg_mode = VM_WIMG_WTHRU;
2235 } else if (access == MAP_MEM_WCOMB) {
2236 wimg_mode = VM_WIMG_WCOMB;
2237 }
2238 if (wimg_mode == VM_WIMG_IO ||
2239 wimg_mode == VM_WIMG_WCOMB)
2240 cache_attr = TRUE;
2241 else
2242 cache_attr = FALSE;
2243
2244 /* wait for object (if any) to be ready */
2245 if (!named_entry->internal) {
2246 while (!object->pager_ready) {
2247 vm_object_wait(
2248 object,
2249 VM_OBJECT_EVENT_PAGER_READY,
2250 THREAD_UNINT);
2251 vm_object_lock(object);
2252 }
2253 }
2254
2255 if (object->wimg_bits != wimg_mode) {
2256 vm_page_t p;
2257
2258 vm_object_paging_wait(object, THREAD_UNINT);
2259
2260 object->wimg_bits = wimg_mode;
2261 queue_iterate(&object->memq, p, vm_page_t, listq) {
2262 if (!p->fictitious) {
2263 if (p->pmapped)
2264 pmap_disconnect(p->phys_page);
2265 if (cache_attr)
2266 pmap_sync_page_attributes_phys(p->phys_page);
2267 }
2268 }
2269 }
2270 object->true_share = TRUE;
2271 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2272 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2273 vm_object_unlock(object);
2274 } else {
2275 /* This is the case where we are going to map */
2276 /* an already mapped object. If the object is */
2277 /* not ready it is internal. An external */
2278 /* object cannot be mapped until it is ready */
2279 /* we can therefore avoid the ready check */
2280 /* in this case. */
2281 object = named_entry->backing.object;
2282 assert(object != VM_OBJECT_NULL);
2283 named_entry_unlock(named_entry);
2284 vm_object_reference(object);
2285 }
2286 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2287 /*
2288 * JMM - This is temporary until we unify named entries
2289 * and raw memory objects.
2290 *
2291 * Detected fake ip_kotype for a memory object. In
2292 * this case, the port isn't really a port at all, but
2293 * instead is just a raw memory object.
2294 */
2295
2296 object = vm_object_enter((memory_object_t)port,
2297 size, FALSE, FALSE, FALSE);
2298 if (object == VM_OBJECT_NULL)
2299 return KERN_INVALID_OBJECT;
2300
2301 /* wait for object (if any) to be ready */
2302 if (object != VM_OBJECT_NULL) {
2303 if (object == kernel_object) {
2304 printf("Warning: Attempt to map kernel object"
2305 " by a non-private kernel entity\n");
2306 return KERN_INVALID_OBJECT;
2307 }
2308 vm_object_lock(object);
2309 while (!object->pager_ready) {
2310 vm_object_wait(object,
2311 VM_OBJECT_EVENT_PAGER_READY,
2312 THREAD_UNINT);
2313 vm_object_lock(object);
2314 }
2315 vm_object_unlock(object);
2316 }
2317 } else {
2318 return KERN_INVALID_OBJECT;
2319 }
2320
2321 /*
2322 * Perform the copy if requested
2323 */
2324
2325 if (copy) {
2326 vm_object_t new_object;
2327 vm_object_offset_t new_offset;
2328
2329 result = vm_object_copy_strategically(object, offset, size,
2330 &new_object, &new_offset,
2331 &copy);
2332
2333
2334 if (result == KERN_MEMORY_RESTART_COPY) {
2335 boolean_t success;
2336 boolean_t src_needs_copy;
2337
2338 /*
2339 * XXX
2340 * We currently ignore src_needs_copy.
2341 * This really is the issue of how to make
2342 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2343 * non-kernel users to use. Solution forthcoming.
2344 * In the meantime, since we don't allow non-kernel
2345 * memory managers to specify symmetric copy,
2346 * we won't run into problems here.
2347 */
2348 new_object = object;
2349 new_offset = offset;
2350 success = vm_object_copy_quickly(&new_object,
2351 new_offset, size,
2352 &src_needs_copy,
2353 &copy);
2354 assert(success);
2355 result = KERN_SUCCESS;
2356 }
2357 /*
2358 * Throw away the reference to the
2359 * original object, as it won't be mapped.
2360 */
2361
2362 vm_object_deallocate(object);
2363
2364 if (result != KERN_SUCCESS)
2365 return result;
2366
2367 object = new_object;
2368 offset = new_offset;
2369 }
2370
2371 result = vm_map_enter(target_map,
2372 &map_addr, map_size,
2373 (vm_map_offset_t)mask,
2374 flags,
2375 object, offset,
2376 copy,
2377 cur_protection, max_protection, inheritance);
2378 if (result != KERN_SUCCESS)
2379 vm_object_deallocate(object);
2380 *address = map_addr;
2381 return result;
2382 }
2383
2384 #if VM_CPM
2385
2386 #ifdef MACH_ASSERT
2387 extern pmap_paddr_t avail_start, avail_end;
2388 #endif
2389
2390 /*
2391 * Allocate memory in the specified map, with the caveat that
2392 * the memory is physically contiguous. This call may fail
2393 * if the system can't find sufficient contiguous memory.
2394 * This call may cause or lead to heart-stopping amounts of
2395 * paging activity.
2396 *
2397 * Memory obtained from this call should be freed in the
2398 * normal way, viz., via vm_deallocate.
2399 */
2400 kern_return_t
2401 vm_map_enter_cpm(
2402 vm_map_t map,
2403 vm_map_offset_t *addr,
2404 vm_map_size_t size,
2405 int flags)
2406 {
2407 vm_object_t cpm_obj;
2408 pmap_t pmap;
2409 vm_page_t m, pages;
2410 kern_return_t kr;
2411 vm_map_offset_t va, start, end, offset;
2412 #if MACH_ASSERT
2413 vm_map_offset_t prev_addr;
2414 #endif /* MACH_ASSERT */
2415
2416 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2417
2418 if (!vm_allocate_cpm_enabled)
2419 return KERN_FAILURE;
2420
2421 if (size == 0) {
2422 *addr = 0;
2423 return KERN_SUCCESS;
2424 }
2425 if (anywhere)
2426 *addr = vm_map_min(map);
2427 else
2428 *addr = vm_map_trunc_page(*addr);
2429 size = vm_map_round_page(size);
2430
2431 /*
2432 * LP64todo - cpm_allocate should probably allow
2433 * allocations of >4GB, but not with the current
2434 * algorithm, so just cast down the size for now.
2435 */
2436 if (size > VM_MAX_ADDRESS)
2437 return KERN_RESOURCE_SHORTAGE;
2438 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2439 &pages, 0, TRUE)) != KERN_SUCCESS)
2440 return kr;
2441
2442 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2443 assert(cpm_obj != VM_OBJECT_NULL);
2444 assert(cpm_obj->internal);
2445 assert(cpm_obj->size == (vm_object_size_t)size);
2446 assert(cpm_obj->can_persist == FALSE);
2447 assert(cpm_obj->pager_created == FALSE);
2448 assert(cpm_obj->pageout == FALSE);
2449 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2450
2451 /*
2452 * Insert pages into object.
2453 */
2454
2455 vm_object_lock(cpm_obj);
2456 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2457 m = pages;
2458 pages = NEXT_PAGE(m);
2459 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2460
2461 assert(!m->gobbled);
2462 assert(!m->wanted);
2463 assert(!m->pageout);
2464 assert(!m->tabled);
2465 assert(m->wire_count);
2466 /*
2467 * ENCRYPTED SWAP:
2468 * "m" is not supposed to be pageable, so it
2469 * should not be encrypted. It wouldn't be safe
2470 * to enter it in a new VM object while encrypted.
2471 */
2472 ASSERT_PAGE_DECRYPTED(m);
2473 assert(m->busy);
2474 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2475
2476 m->busy = FALSE;
2477 vm_page_insert(m, cpm_obj, offset);
2478 }
2479 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2480 vm_object_unlock(cpm_obj);
2481
2482 /*
2483 * Hang onto a reference on the object in case a
2484 * multi-threaded application for some reason decides
2485 * to deallocate the portion of the address space into
2486 * which we will insert this object.
2487 *
2488 * Unfortunately, we must insert the object now before
2489 * we can talk to the pmap module about which addresses
2490 * must be wired down. Hence, the race with a multi-
2491 * threaded app.
2492 */
2493 vm_object_reference(cpm_obj);
2494
2495 /*
2496 * Insert object into map.
2497 */
2498
2499 kr = vm_map_enter(
2500 map,
2501 addr,
2502 size,
2503 (vm_map_offset_t)0,
2504 flags,
2505 cpm_obj,
2506 (vm_object_offset_t)0,
2507 FALSE,
2508 VM_PROT_ALL,
2509 VM_PROT_ALL,
2510 VM_INHERIT_DEFAULT);
2511
2512 if (kr != KERN_SUCCESS) {
2513 /*
2514 * A CPM object doesn't have can_persist set,
2515 * so all we have to do is deallocate it to
2516 * free up these pages.
2517 */
2518 assert(cpm_obj->pager_created == FALSE);
2519 assert(cpm_obj->can_persist == FALSE);
2520 assert(cpm_obj->pageout == FALSE);
2521 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2522 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2523 vm_object_deallocate(cpm_obj); /* kill creation ref */
2524 }
2525
2526 /*
2527 * Inform the physical mapping system that the
2528 * range of addresses may not fault, so that
2529 * page tables and such can be locked down as well.
2530 */
2531 start = *addr;
2532 end = start + size;
2533 pmap = vm_map_pmap(map);
2534 pmap_pageable(pmap, start, end, FALSE);
2535
2536 /*
2537 * Enter each page into the pmap, to avoid faults.
2538 * Note that this loop could be coded more efficiently,
2539 * if the need arose, rather than looking up each page
2540 * again.
2541 */
2542 for (offset = 0, va = start; offset < size;
2543 va += PAGE_SIZE, offset += PAGE_SIZE) {
2544 int type_of_fault;
2545
2546 vm_object_lock(cpm_obj);
2547 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2548 assert(m != VM_PAGE_NULL);
2549
2550 vm_page_zero_fill(m);
2551
2552 type_of_fault = DBG_ZERO_FILL_FAULT;
2553
2554 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2555 m->wire_count != 0, FALSE, FALSE,
2556 &type_of_fault);
2557
2558 vm_object_unlock(cpm_obj);
2559 }
2560
2561 #if MACH_ASSERT
2562 /*
2563 * Verify ordering in address space.
2564 */
2565 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2566 vm_object_lock(cpm_obj);
2567 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2568 vm_object_unlock(cpm_obj);
2569 if (m == VM_PAGE_NULL)
2570 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2571 cpm_obj, offset);
2572 assert(m->tabled);
2573 assert(!m->busy);
2574 assert(!m->wanted);
2575 assert(!m->fictitious);
2576 assert(!m->private);
2577 assert(!m->absent);
2578 assert(!m->error);
2579 assert(!m->cleaning);
2580 assert(!m->precious);
2581 assert(!m->clustered);
2582 if (offset != 0) {
2583 if (m->phys_page != prev_addr + 1) {
2584 printf("start 0x%x end 0x%x va 0x%x\n",
2585 start, end, va);
2586 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2587 printf("m 0x%x prev_address 0x%x\n", m,
2588 prev_addr);
2589 panic("vm_allocate_cpm: pages not contig!");
2590 }
2591 }
2592 prev_addr = m->phys_page;
2593 }
2594 #endif /* MACH_ASSERT */
2595
2596 vm_object_deallocate(cpm_obj); /* kill extra ref */
2597
2598 return kr;
2599 }
2600
2601
2602 #else /* VM_CPM */
2603
2604 /*
2605 * Interface is defined in all cases, but unless the kernel
2606 * is built explicitly for this option, the interface does
2607 * nothing.
2608 */
2609
2610 kern_return_t
2611 vm_map_enter_cpm(
2612 __unused vm_map_t map,
2613 __unused vm_map_offset_t *addr,
2614 __unused vm_map_size_t size,
2615 __unused int flags)
2616 {
2617 return KERN_FAILURE;
2618 }
2619 #endif /* VM_CPM */
2620
2621 /*
2622 * Clip and unnest a portion of a nested submap mapping.
2623 */
2624 static void
2625 vm_map_clip_unnest(
2626 vm_map_t map,
2627 vm_map_entry_t entry,
2628 vm_map_offset_t start_unnest,
2629 vm_map_offset_t end_unnest)
2630 {
2631 assert(entry->is_sub_map);
2632 assert(entry->object.sub_map != NULL);
2633
2634 if (entry->vme_start > start_unnest ||
2635 entry->vme_end < end_unnest) {
2636 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2637 "bad nested entry: start=0x%llx end=0x%llx\n",
2638 (long long)start_unnest, (long long)end_unnest,
2639 (long long)entry->vme_start, (long long)entry->vme_end);
2640 }
2641 if (start_unnest > entry->vme_start) {
2642 _vm_map_clip_start(&map->hdr,
2643 entry,
2644 start_unnest);
2645 UPDATE_FIRST_FREE(map, map->first_free);
2646 }
2647 if (entry->vme_end > end_unnest) {
2648 _vm_map_clip_end(&map->hdr,
2649 entry,
2650 end_unnest);
2651 UPDATE_FIRST_FREE(map, map->first_free);
2652 }
2653
2654 pmap_unnest(map->pmap,
2655 entry->vme_start,
2656 entry->vme_end - entry->vme_start);
2657 if ((map->mapped) && (map->ref_count)) {
2658 /* clean up parent map/maps */
2659 vm_map_submap_pmap_clean(
2660 map, entry->vme_start,
2661 entry->vme_end,
2662 entry->object.sub_map,
2663 entry->offset);
2664 }
2665 entry->use_pmap = FALSE;
2666 }
2667
2668 /*
2669 * vm_map_clip_start: [ internal use only ]
2670 *
2671 * Asserts that the given entry begins at or after
2672 * the specified address; if necessary,
2673 * it splits the entry into two.
2674 */
2675 static void
2676 vm_map_clip_start(
2677 vm_map_t map,
2678 vm_map_entry_t entry,
2679 vm_map_offset_t startaddr)
2680 {
2681 #ifndef NO_NESTED_PMAP
2682 if (entry->use_pmap &&
2683 startaddr >= entry->vme_start) {
2684 vm_map_offset_t start_unnest, end_unnest;
2685
2686 /*
2687 * Make sure "startaddr" is no longer in a nested range
2688 * before we clip. Unnest only the minimum range the platform
2689 * can handle.
2690 */
2691 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2692 end_unnest = start_unnest + pmap_nesting_size_min;
2693 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2694 }
2695 #endif /* NO_NESTED_PMAP */
2696 if (startaddr > entry->vme_start) {
2697 if (entry->object.vm_object &&
2698 !entry->is_sub_map &&
2699 entry->object.vm_object->phys_contiguous) {
2700 pmap_remove(map->pmap,
2701 (addr64_t)(entry->vme_start),
2702 (addr64_t)(entry->vme_end));
2703 }
2704 _vm_map_clip_start(&map->hdr, entry, startaddr);
2705 UPDATE_FIRST_FREE(map, map->first_free);
2706 }
2707 }
2708
2709
2710 #define vm_map_copy_clip_start(copy, entry, startaddr) \
2711 MACRO_BEGIN \
2712 if ((startaddr) > (entry)->vme_start) \
2713 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2714 MACRO_END
2715
2716 /*
2717 * This routine is called only when it is known that
2718 * the entry must be split.
2719 */
2720 static void
2721 _vm_map_clip_start(
2722 register struct vm_map_header *map_header,
2723 register vm_map_entry_t entry,
2724 register vm_map_offset_t start)
2725 {
2726 register vm_map_entry_t new_entry;
2727
2728 /*
2729 * Split off the front portion --
2730 * note that we must insert the new
2731 * entry BEFORE this one, so that
2732 * this entry has the specified starting
2733 * address.
2734 */
2735
2736 new_entry = _vm_map_entry_create(map_header);
2737 vm_map_entry_copy_full(new_entry, entry);
2738
2739 new_entry->vme_end = start;
2740 entry->offset += (start - entry->vme_start);
2741 entry->vme_start = start;
2742
2743 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2744
2745 if (entry->is_sub_map)
2746 vm_map_reference(new_entry->object.sub_map);
2747 else
2748 vm_object_reference(new_entry->object.vm_object);
2749 }
2750
2751
2752 /*
2753 * vm_map_clip_end: [ internal use only ]
2754 *
2755 * Asserts that the given entry ends at or before
2756 * the specified address; if necessary,
2757 * it splits the entry into two.
2758 */
2759 static void
2760 vm_map_clip_end(
2761 vm_map_t map,
2762 vm_map_entry_t entry,
2763 vm_map_offset_t endaddr)
2764 {
2765 if (endaddr > entry->vme_end) {
2766 /*
2767 * Within the scope of this clipping, limit "endaddr" to
2768 * the end of this map entry...
2769 */
2770 endaddr = entry->vme_end;
2771 }
2772 #ifndef NO_NESTED_PMAP
2773 if (entry->use_pmap) {
2774 vm_map_offset_t start_unnest, end_unnest;
2775
2776 /*
2777 * Make sure the range between the start of this entry and
2778 * the new "endaddr" is no longer nested before we clip.
2779 * Unnest only the minimum range the platform can handle.
2780 */
2781 start_unnest = entry->vme_start;
2782 end_unnest =
2783 (endaddr + pmap_nesting_size_min - 1) &
2784 ~(pmap_nesting_size_min - 1);
2785 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2786 }
2787 #endif /* NO_NESTED_PMAP */
2788 if (endaddr < entry->vme_end) {
2789 if (entry->object.vm_object &&
2790 !entry->is_sub_map &&
2791 entry->object.vm_object->phys_contiguous) {
2792 pmap_remove(map->pmap,
2793 (addr64_t)(entry->vme_start),
2794 (addr64_t)(entry->vme_end));
2795 }
2796 _vm_map_clip_end(&map->hdr, entry, endaddr);
2797 UPDATE_FIRST_FREE(map, map->first_free);
2798 }
2799 }
2800
2801
2802 #define vm_map_copy_clip_end(copy, entry, endaddr) \
2803 MACRO_BEGIN \
2804 if ((endaddr) < (entry)->vme_end) \
2805 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2806 MACRO_END
2807
2808 /*
2809 * This routine is called only when it is known that
2810 * the entry must be split.
2811 */
2812 static void
2813 _vm_map_clip_end(
2814 register struct vm_map_header *map_header,
2815 register vm_map_entry_t entry,
2816 register vm_map_offset_t end)
2817 {
2818 register vm_map_entry_t new_entry;
2819
2820 /*
2821 * Create a new entry and insert it
2822 * AFTER the specified entry
2823 */
2824
2825 new_entry = _vm_map_entry_create(map_header);
2826 vm_map_entry_copy_full(new_entry, entry);
2827
2828 new_entry->vme_start = entry->vme_end = end;
2829 new_entry->offset += (end - entry->vme_start);
2830
2831 _vm_map_entry_link(map_header, entry, new_entry);
2832
2833 if (entry->is_sub_map)
2834 vm_map_reference(new_entry->object.sub_map);
2835 else
2836 vm_object_reference(new_entry->object.vm_object);
2837 }
2838
2839
2840 /*
2841 * VM_MAP_RANGE_CHECK: [ internal use only ]
2842 *
2843 * Asserts that the starting and ending region
2844 * addresses fall within the valid range of the map.
2845 */
2846 #define VM_MAP_RANGE_CHECK(map, start, end) \
2847 MACRO_BEGIN \
2848 if (start < vm_map_min(map)) \
2849 start = vm_map_min(map); \
2850 if (end > vm_map_max(map)) \
2851 end = vm_map_max(map); \
2852 if (start > end) \
2853 start = end; \
2854 MACRO_END
2855
2856 /*
2857 * vm_map_range_check: [ internal use only ]
2858 *
2859 * Check that the region defined by the specified start and
2860 * end addresses are wholly contained within a single map
2861 * entry or set of adjacent map entries of the spacified map,
2862 * i.e. the specified region contains no unmapped space.
2863 * If any or all of the region is unmapped, FALSE is returned.
2864 * Otherwise, TRUE is returned and if the output argument 'entry'
2865 * is not NULL it points to the map entry containing the start
2866 * of the region.
2867 *
2868 * The map is locked for reading on entry and is left locked.
2869 */
2870 static boolean_t
2871 vm_map_range_check(
2872 register vm_map_t map,
2873 register vm_map_offset_t start,
2874 register vm_map_offset_t end,
2875 vm_map_entry_t *entry)
2876 {
2877 vm_map_entry_t cur;
2878 register vm_map_offset_t prev;
2879
2880 /*
2881 * Basic sanity checks first
2882 */
2883 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
2884 return (FALSE);
2885
2886 /*
2887 * Check first if the region starts within a valid
2888 * mapping for the map.
2889 */
2890 if (!vm_map_lookup_entry(map, start, &cur))
2891 return (FALSE);
2892
2893 /*
2894 * Optimize for the case that the region is contained
2895 * in a single map entry.
2896 */
2897 if (entry != (vm_map_entry_t *) NULL)
2898 *entry = cur;
2899 if (end <= cur->vme_end)
2900 return (TRUE);
2901
2902 /*
2903 * If the region is not wholly contained within a
2904 * single entry, walk the entries looking for holes.
2905 */
2906 prev = cur->vme_end;
2907 cur = cur->vme_next;
2908 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
2909 if (end <= cur->vme_end)
2910 return (TRUE);
2911 prev = cur->vme_end;
2912 cur = cur->vme_next;
2913 }
2914 return (FALSE);
2915 }
2916
2917 /*
2918 * vm_map_submap: [ kernel use only ]
2919 *
2920 * Mark the given range as handled by a subordinate map.
2921 *
2922 * This range must have been created with vm_map_find using
2923 * the vm_submap_object, and no other operations may have been
2924 * performed on this range prior to calling vm_map_submap.
2925 *
2926 * Only a limited number of operations can be performed
2927 * within this rage after calling vm_map_submap:
2928 * vm_fault
2929 * [Don't try vm_map_copyin!]
2930 *
2931 * To remove a submapping, one must first remove the
2932 * range from the superior map, and then destroy the
2933 * submap (if desired). [Better yet, don't try it.]
2934 */
2935 kern_return_t
2936 vm_map_submap(
2937 vm_map_t map,
2938 vm_map_offset_t start,
2939 vm_map_offset_t end,
2940 vm_map_t submap,
2941 vm_map_offset_t offset,
2942 #ifdef NO_NESTED_PMAP
2943 __unused
2944 #endif /* NO_NESTED_PMAP */
2945 boolean_t use_pmap)
2946 {
2947 vm_map_entry_t entry;
2948 register kern_return_t result = KERN_INVALID_ARGUMENT;
2949 register vm_object_t object;
2950
2951 vm_map_lock(map);
2952
2953 if (! vm_map_lookup_entry(map, start, &entry)) {
2954 entry = entry->vme_next;
2955 }
2956
2957 if (entry == vm_map_to_entry(map) ||
2958 entry->is_sub_map) {
2959 vm_map_unlock(map);
2960 return KERN_INVALID_ARGUMENT;
2961 }
2962
2963 assert(!entry->use_pmap); /* we don't want to unnest anything here */
2964 vm_map_clip_start(map, entry, start);
2965 vm_map_clip_end(map, entry, end);
2966
2967 if ((entry->vme_start == start) && (entry->vme_end == end) &&
2968 (!entry->is_sub_map) &&
2969 ((object = entry->object.vm_object) == vm_submap_object) &&
2970 (object->resident_page_count == 0) &&
2971 (object->copy == VM_OBJECT_NULL) &&
2972 (object->shadow == VM_OBJECT_NULL) &&
2973 (!object->pager_created)) {
2974 entry->offset = (vm_object_offset_t)offset;
2975 entry->object.vm_object = VM_OBJECT_NULL;
2976 vm_object_deallocate(object);
2977 entry->is_sub_map = TRUE;
2978 entry->object.sub_map = submap;
2979 vm_map_reference(submap);
2980 submap->mapped = TRUE;
2981
2982 #ifndef NO_NESTED_PMAP
2983 if (use_pmap) {
2984 /* nest if platform code will allow */
2985 if(submap->pmap == NULL) {
2986 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
2987 if(submap->pmap == PMAP_NULL) {
2988 vm_map_unlock(map);
2989 return(KERN_NO_SPACE);
2990 }
2991 }
2992 result = pmap_nest(map->pmap,
2993 (entry->object.sub_map)->pmap,
2994 (addr64_t)start,
2995 (addr64_t)start,
2996 (uint64_t)(end - start));
2997 if(result)
2998 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
2999 entry->use_pmap = TRUE;
3000 }
3001 #else /* NO_NESTED_PMAP */
3002 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3003 #endif /* NO_NESTED_PMAP */
3004 result = KERN_SUCCESS;
3005 }
3006 vm_map_unlock(map);
3007
3008 return(result);
3009 }
3010
3011 /*
3012 * vm_map_protect:
3013 *
3014 * Sets the protection of the specified address
3015 * region in the target map. If "set_max" is
3016 * specified, the maximum protection is to be set;
3017 * otherwise, only the current protection is affected.
3018 */
3019 kern_return_t
3020 vm_map_protect(
3021 register vm_map_t map,
3022 register vm_map_offset_t start,
3023 register vm_map_offset_t end,
3024 register vm_prot_t new_prot,
3025 register boolean_t set_max)
3026 {
3027 register vm_map_entry_t current;
3028 register vm_map_offset_t prev;
3029 vm_map_entry_t entry;
3030 vm_prot_t new_max;
3031
3032 XPR(XPR_VM_MAP,
3033 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3034 (integer_t)map, start, end, new_prot, set_max);
3035
3036 vm_map_lock(map);
3037
3038 /* LP64todo - remove this check when vm_map_commpage64()
3039 * no longer has to stuff in a map_entry for the commpage
3040 * above the map's max_offset.
3041 */
3042 if (start >= map->max_offset) {
3043 vm_map_unlock(map);
3044 return(KERN_INVALID_ADDRESS);
3045 }
3046
3047 /*
3048 * Lookup the entry. If it doesn't start in a valid
3049 * entry, return an error.
3050 */
3051 if (! vm_map_lookup_entry(map, start, &entry)) {
3052 vm_map_unlock(map);
3053 return(KERN_INVALID_ADDRESS);
3054 }
3055
3056 /*
3057 * Make a first pass to check for protection and address
3058 * violations.
3059 */
3060
3061 current = entry;
3062 prev = current->vme_start;
3063 while ((current != vm_map_to_entry(map)) &&
3064 (current->vme_start < end)) {
3065
3066 /*
3067 * If there is a hole, return an error.
3068 */
3069 if (current->vme_start != prev) {
3070 vm_map_unlock(map);
3071 return(KERN_INVALID_ADDRESS);
3072 }
3073
3074 new_max = current->max_protection;
3075 if(new_prot & VM_PROT_COPY) {
3076 new_max |= VM_PROT_WRITE;
3077 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3078 vm_map_unlock(map);
3079 return(KERN_PROTECTION_FAILURE);
3080 }
3081 } else {
3082 if ((new_prot & new_max) != new_prot) {
3083 vm_map_unlock(map);
3084 return(KERN_PROTECTION_FAILURE);
3085 }
3086 }
3087
3088 prev = current->vme_end;
3089 current = current->vme_next;
3090 }
3091 if (end > prev) {
3092 vm_map_unlock(map);
3093 return(KERN_INVALID_ADDRESS);
3094 }
3095
3096 /*
3097 * Go back and fix up protections.
3098 * Clip to start here if the range starts within
3099 * the entry.
3100 */
3101
3102 current = entry;
3103 if (current != vm_map_to_entry(map)) {
3104 /* clip and unnest if necessary */
3105 vm_map_clip_start(map, current, start);
3106 }
3107
3108 while ((current != vm_map_to_entry(map)) &&
3109 (current->vme_start < end)) {
3110
3111 vm_prot_t old_prot;
3112
3113 vm_map_clip_end(map, current, end);
3114
3115 assert(!current->use_pmap); /* clipping did unnest if needed */
3116
3117 old_prot = current->protection;
3118
3119 if(new_prot & VM_PROT_COPY) {
3120 /* caller is asking specifically to copy the */
3121 /* mapped data, this implies that max protection */
3122 /* will include write. Caller must be prepared */
3123 /* for loss of shared memory communication in the */
3124 /* target area after taking this step */
3125 current->needs_copy = TRUE;
3126 current->max_protection |= VM_PROT_WRITE;
3127 }
3128
3129 if (set_max)
3130 current->protection =
3131 (current->max_protection =
3132 new_prot & ~VM_PROT_COPY) &
3133 old_prot;
3134 else
3135 current->protection = new_prot & ~VM_PROT_COPY;
3136
3137 /*
3138 * Update physical map if necessary.
3139 * If the request is to turn off write protection,
3140 * we won't do it for real (in pmap). This is because
3141 * it would cause copy-on-write to fail. We've already
3142 * set, the new protection in the map, so if a
3143 * write-protect fault occurred, it will be fixed up
3144 * properly, COW or not.
3145 */
3146 if (current->protection != old_prot) {
3147 /* Look one level in we support nested pmaps */
3148 /* from mapped submaps which are direct entries */
3149 /* in our map */
3150
3151 vm_prot_t prot;
3152
3153 prot = current->protection & ~VM_PROT_WRITE;
3154
3155 if (override_nx(map, current->alias) && prot)
3156 prot |= VM_PROT_EXECUTE;
3157
3158 if (current->is_sub_map && current->use_pmap) {
3159 pmap_protect(current->object.sub_map->pmap,
3160 current->vme_start,
3161 current->vme_end,
3162 prot);
3163 } else {
3164 pmap_protect(map->pmap,
3165 current->vme_start,
3166 current->vme_end,
3167 prot);
3168 }
3169 }
3170 current = current->vme_next;
3171 }
3172
3173 current = entry;
3174 while ((current != vm_map_to_entry(map)) &&
3175 (current->vme_start <= end)) {
3176 vm_map_simplify_entry(map, current);
3177 current = current->vme_next;
3178 }
3179
3180 vm_map_unlock(map);
3181 return(KERN_SUCCESS);
3182 }
3183
3184 /*
3185 * vm_map_inherit:
3186 *
3187 * Sets the inheritance of the specified address
3188 * range in the target map. Inheritance
3189 * affects how the map will be shared with
3190 * child maps at the time of vm_map_fork.
3191 */
3192 kern_return_t
3193 vm_map_inherit(
3194 register vm_map_t map,
3195 register vm_map_offset_t start,
3196 register vm_map_offset_t end,
3197 register vm_inherit_t new_inheritance)
3198 {
3199 register vm_map_entry_t entry;
3200 vm_map_entry_t temp_entry;
3201
3202 vm_map_lock(map);
3203
3204 VM_MAP_RANGE_CHECK(map, start, end);
3205
3206 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3207 entry = temp_entry;
3208 }
3209 else {
3210 temp_entry = temp_entry->vme_next;
3211 entry = temp_entry;
3212 }
3213
3214 /* first check entire range for submaps which can't support the */
3215 /* given inheritance. */
3216 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3217 if(entry->is_sub_map) {
3218 if(new_inheritance == VM_INHERIT_COPY) {
3219 vm_map_unlock(map);
3220 return(KERN_INVALID_ARGUMENT);
3221 }
3222 }
3223
3224 entry = entry->vme_next;
3225 }
3226
3227 entry = temp_entry;
3228 if (entry != vm_map_to_entry(map)) {
3229 /* clip and unnest if necessary */
3230 vm_map_clip_start(map, entry, start);
3231 }
3232
3233 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3234 vm_map_clip_end(map, entry, end);
3235 assert(!entry->use_pmap); /* clip did unnest if needed */
3236
3237 entry->inheritance = new_inheritance;
3238
3239 entry = entry->vme_next;
3240 }
3241
3242 vm_map_unlock(map);
3243 return(KERN_SUCCESS);
3244 }
3245
3246 /*
3247 * Update the accounting for the amount of wired memory in this map. If the user has
3248 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3249 */
3250
3251 static kern_return_t
3252 add_wire_counts(
3253 vm_map_t map,
3254 vm_map_entry_t entry,
3255 boolean_t user_wire)
3256 {
3257 vm_map_size_t size;
3258
3259 if (user_wire) {
3260
3261 /*
3262 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3263 * this map entry.
3264 */
3265
3266 if (entry->user_wired_count == 0) {
3267 size = entry->vme_end - entry->vme_start;
3268
3269 /*
3270 * Since this is the first time the user is wiring this map entry, check to see if we're
3271 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3272 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3273 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3274 * limit, then we fail.
3275 */
3276
3277 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3278 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit)
3279 return KERN_RESOURCE_SHORTAGE;
3280
3281 /*
3282 * The first time the user wires an entry, we also increment the wired_count and add this to
3283 * the total that has been wired in the map.
3284 */
3285
3286 if (entry->wired_count >= MAX_WIRE_COUNT)
3287 return KERN_FAILURE;
3288
3289 entry->wired_count++;
3290 map->user_wire_size += size;
3291 }
3292
3293 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3294 return KERN_FAILURE;
3295
3296 entry->user_wired_count++;
3297
3298 } else {
3299
3300 /*
3301 * The kernel's wiring the memory. Just bump the count and continue.
3302 */
3303
3304 if (entry->wired_count >= MAX_WIRE_COUNT)
3305 panic("vm_map_wire: too many wirings");
3306
3307 entry->wired_count++;
3308 }
3309
3310 return KERN_SUCCESS;
3311 }
3312
3313 /*
3314 * Update the memory wiring accounting now that the given map entry is being unwired.
3315 */
3316
3317 static void
3318 subtract_wire_counts(
3319 vm_map_t map,
3320 vm_map_entry_t entry,
3321 boolean_t user_wire)
3322 {
3323
3324 if (user_wire) {
3325
3326 /*
3327 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3328 */
3329
3330 if (entry->user_wired_count == 1) {
3331
3332 /*
3333 * We're removing the last user wire reference. Decrement the wired_count and the total
3334 * user wired memory for this map.
3335 */
3336
3337 assert(entry->wired_count >= 1);
3338 entry->wired_count--;
3339 map->user_wire_size -= entry->vme_end - entry->vme_start;
3340 }
3341
3342 assert(entry->user_wired_count >= 1);
3343 entry->user_wired_count--;
3344
3345 } else {
3346
3347 /*
3348 * The kernel is unwiring the memory. Just update the count.
3349 */
3350
3351 assert(entry->wired_count >= 1);
3352 entry->wired_count--;
3353 }
3354 }
3355
3356 /*
3357 * vm_map_wire:
3358 *
3359 * Sets the pageability of the specified address range in the
3360 * target map as wired. Regions specified as not pageable require
3361 * locked-down physical memory and physical page maps. The
3362 * access_type variable indicates types of accesses that must not
3363 * generate page faults. This is checked against protection of
3364 * memory being locked-down.
3365 *
3366 * The map must not be locked, but a reference must remain to the
3367 * map throughout the call.
3368 */
3369 static kern_return_t
3370 vm_map_wire_nested(
3371 register vm_map_t map,
3372 register vm_map_offset_t start,
3373 register vm_map_offset_t end,
3374 register vm_prot_t access_type,
3375 boolean_t user_wire,
3376 pmap_t map_pmap,
3377 vm_map_offset_t pmap_addr)
3378 {
3379 register vm_map_entry_t entry;
3380 struct vm_map_entry *first_entry, tmp_entry;
3381 vm_map_t real_map;
3382 register vm_map_offset_t s,e;
3383 kern_return_t rc;
3384 boolean_t need_wakeup;
3385 boolean_t main_map = FALSE;
3386 wait_interrupt_t interruptible_state;
3387 thread_t cur_thread;
3388 unsigned int last_timestamp;
3389 vm_map_size_t size;
3390
3391 vm_map_lock(map);
3392 if(map_pmap == NULL)
3393 main_map = TRUE;
3394 last_timestamp = map->timestamp;
3395
3396 VM_MAP_RANGE_CHECK(map, start, end);
3397 assert(page_aligned(start));
3398 assert(page_aligned(end));
3399 if (start == end) {
3400 /* We wired what the caller asked for, zero pages */
3401 vm_map_unlock(map);
3402 return KERN_SUCCESS;
3403 }
3404
3405 need_wakeup = FALSE;
3406 cur_thread = current_thread();
3407
3408 s = start;
3409 rc = KERN_SUCCESS;
3410
3411 if (vm_map_lookup_entry(map, s, &first_entry)) {
3412 entry = first_entry;
3413 /*
3414 * vm_map_clip_start will be done later.
3415 * We don't want to unnest any nested submaps here !
3416 */
3417 } else {
3418 /* Start address is not in map */
3419 rc = KERN_INVALID_ADDRESS;
3420 goto done;
3421 }
3422
3423 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3424 /*
3425 * At this point, we have wired from "start" to "s".
3426 * We still need to wire from "s" to "end".
3427 *
3428 * "entry" hasn't been clipped, so it could start before "s"
3429 * and/or end after "end".
3430 */
3431
3432 /* "e" is how far we want to wire in this entry */
3433 e = entry->vme_end;
3434 if (e > end)
3435 e = end;
3436
3437 /*
3438 * If another thread is wiring/unwiring this entry then
3439 * block after informing other thread to wake us up.
3440 */
3441 if (entry->in_transition) {
3442 wait_result_t wait_result;
3443
3444 /*
3445 * We have not clipped the entry. Make sure that
3446 * the start address is in range so that the lookup
3447 * below will succeed.
3448 * "s" is the current starting point: we've already
3449 * wired from "start" to "s" and we still have
3450 * to wire from "s" to "end".
3451 */
3452
3453 entry->needs_wakeup = TRUE;
3454
3455 /*
3456 * wake up anybody waiting on entries that we have
3457 * already wired.
3458 */
3459 if (need_wakeup) {
3460 vm_map_entry_wakeup(map);
3461 need_wakeup = FALSE;
3462 }
3463 /*
3464 * User wiring is interruptible
3465 */
3466 wait_result = vm_map_entry_wait(map,
3467 (user_wire) ? THREAD_ABORTSAFE :
3468 THREAD_UNINT);
3469 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3470 /*
3471 * undo the wirings we have done so far
3472 * We do not clear the needs_wakeup flag,
3473 * because we cannot tell if we were the
3474 * only one waiting.
3475 */
3476 rc = KERN_FAILURE;
3477 goto done;
3478 }
3479
3480 /*
3481 * Cannot avoid a lookup here. reset timestamp.
3482 */
3483 last_timestamp = map->timestamp;
3484
3485 /*
3486 * The entry could have been clipped, look it up again.
3487 * Worse that can happen is, it may not exist anymore.
3488 */
3489 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3490 if (!user_wire)
3491 panic("vm_map_wire: re-lookup failed");
3492
3493 /*
3494 * User: undo everything upto the previous
3495 * entry. let vm_map_unwire worry about
3496 * checking the validity of the range.
3497 */
3498 rc = KERN_FAILURE;
3499 goto done;
3500 }
3501 entry = first_entry;
3502 continue;
3503 }
3504
3505 if (entry->is_sub_map) {
3506 vm_map_offset_t sub_start;
3507 vm_map_offset_t sub_end;
3508 vm_map_offset_t local_start;
3509 vm_map_offset_t local_end;
3510 pmap_t pmap;
3511
3512 vm_map_clip_start(map, entry, s);
3513 vm_map_clip_end(map, entry, end);
3514
3515 sub_start = entry->offset;
3516 sub_end = entry->vme_end;
3517 sub_end += entry->offset - entry->vme_start;
3518
3519 local_end = entry->vme_end;
3520 if(map_pmap == NULL) {
3521 vm_object_t object;
3522 vm_object_offset_t offset;
3523 vm_prot_t prot;
3524 boolean_t wired;
3525 vm_map_entry_t local_entry;
3526 vm_map_version_t version;
3527 vm_map_t lookup_map;
3528
3529 if(entry->use_pmap) {
3530 pmap = entry->object.sub_map->pmap;
3531 /* ppc implementation requires that */
3532 /* submaps pmap address ranges line */
3533 /* up with parent map */
3534 #ifdef notdef
3535 pmap_addr = sub_start;
3536 #endif
3537 pmap_addr = s;
3538 } else {
3539 pmap = map->pmap;
3540 pmap_addr = s;
3541 }
3542
3543 if (entry->wired_count) {
3544 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3545 goto done;
3546
3547 /*
3548 * The map was not unlocked:
3549 * no need to goto re-lookup.
3550 * Just go directly to next entry.
3551 */
3552 entry = entry->vme_next;
3553 s = entry->vme_start;
3554 continue;
3555
3556 }
3557
3558 /* call vm_map_lookup_locked to */
3559 /* cause any needs copy to be */
3560 /* evaluated */
3561 local_start = entry->vme_start;
3562 lookup_map = map;
3563 vm_map_lock_write_to_read(map);
3564 if(vm_map_lookup_locked(
3565 &lookup_map, local_start,
3566 access_type,
3567 OBJECT_LOCK_EXCLUSIVE,
3568 &version, &object,
3569 &offset, &prot, &wired,
3570 NULL,
3571 &real_map)) {
3572
3573 vm_map_unlock_read(lookup_map);
3574 vm_map_unwire(map, start,
3575 s, user_wire);
3576 return(KERN_FAILURE);
3577 }
3578 if(real_map != lookup_map)
3579 vm_map_unlock(real_map);
3580 vm_map_unlock_read(lookup_map);
3581 vm_map_lock(map);
3582 vm_object_unlock(object);
3583
3584 /* we unlocked, so must re-lookup */
3585 if (!vm_map_lookup_entry(map,
3586 local_start,
3587 &local_entry)) {
3588 rc = KERN_FAILURE;
3589 goto done;
3590 }
3591
3592 /*
3593 * entry could have been "simplified",
3594 * so re-clip
3595 */
3596 entry = local_entry;
3597 assert(s == local_start);
3598 vm_map_clip_start(map, entry, s);
3599 vm_map_clip_end(map, entry, end);
3600 /* re-compute "e" */
3601 e = entry->vme_end;
3602 if (e > end)
3603 e = end;
3604
3605 /* did we have a change of type? */
3606 if (!entry->is_sub_map) {
3607 last_timestamp = map->timestamp;
3608 continue;
3609 }
3610 } else {
3611 local_start = entry->vme_start;
3612 pmap = map_pmap;
3613 }
3614
3615 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3616 goto done;
3617
3618 entry->in_transition = TRUE;
3619
3620 vm_map_unlock(map);
3621 rc = vm_map_wire_nested(entry->object.sub_map,
3622 sub_start, sub_end,
3623 access_type,
3624 user_wire, pmap, pmap_addr);
3625 vm_map_lock(map);
3626
3627 /*
3628 * Find the entry again. It could have been clipped
3629 * after we unlocked the map.
3630 */
3631 if (!vm_map_lookup_entry(map, local_start,
3632 &first_entry))
3633 panic("vm_map_wire: re-lookup failed");
3634 entry = first_entry;
3635
3636 assert(local_start == s);
3637 /* re-compute "e" */
3638 e = entry->vme_end;
3639 if (e > end)
3640 e = end;
3641
3642 last_timestamp = map->timestamp;
3643 while ((entry != vm_map_to_entry(map)) &&
3644 (entry->vme_start < e)) {
3645 assert(entry->in_transition);
3646 entry->in_transition = FALSE;
3647 if (entry->needs_wakeup) {
3648 entry->needs_wakeup = FALSE;
3649 need_wakeup = TRUE;
3650 }
3651 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3652 subtract_wire_counts(map, entry, user_wire);
3653 }
3654 entry = entry->vme_next;
3655 }
3656 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3657 goto done;
3658 }
3659
3660 /* no need to relookup again */
3661 s = entry->vme_start;
3662 continue;
3663 }
3664
3665 /*
3666 * If this entry is already wired then increment
3667 * the appropriate wire reference count.
3668 */
3669 if (entry->wired_count) {
3670 /*
3671 * entry is already wired down, get our reference
3672 * after clipping to our range.
3673 */
3674 vm_map_clip_start(map, entry, s);
3675 vm_map_clip_end(map, entry, end);
3676
3677 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3678 goto done;
3679
3680 /* map was not unlocked: no need to relookup */
3681 entry = entry->vme_next;
3682 s = entry->vme_start;
3683 continue;
3684 }
3685
3686 /*
3687 * Unwired entry or wire request transmitted via submap
3688 */
3689
3690
3691 /*
3692 * Perform actions of vm_map_lookup that need the write
3693 * lock on the map: create a shadow object for a
3694 * copy-on-write region, or an object for a zero-fill
3695 * region.
3696 */
3697 size = entry->vme_end - entry->vme_start;
3698 /*
3699 * If wiring a copy-on-write page, we need to copy it now
3700 * even if we're only (currently) requesting read access.
3701 * This is aggressive, but once it's wired we can't move it.
3702 */
3703 if (entry->needs_copy) {
3704 vm_object_shadow(&entry->object.vm_object,
3705 &entry->offset, size);
3706 entry->needs_copy = FALSE;
3707 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3708 entry->object.vm_object = vm_object_allocate(size);
3709 entry->offset = (vm_object_offset_t)0;
3710 }
3711
3712 vm_map_clip_start(map, entry, s);
3713 vm_map_clip_end(map, entry, end);
3714
3715 /* re-compute "e" */
3716 e = entry->vme_end;
3717 if (e > end)
3718 e = end;
3719
3720 /*
3721 * Check for holes and protection mismatch.
3722 * Holes: Next entry should be contiguous unless this
3723 * is the end of the region.
3724 * Protection: Access requested must be allowed, unless
3725 * wiring is by protection class
3726 */
3727 if ((entry->vme_end < end) &&
3728 ((entry->vme_next == vm_map_to_entry(map)) ||
3729 (entry->vme_next->vme_start > entry->vme_end))) {
3730 /* found a hole */
3731 rc = KERN_INVALID_ADDRESS;
3732 goto done;
3733 }
3734 if ((entry->protection & access_type) != access_type) {
3735 /* found a protection problem */
3736 rc = KERN_PROTECTION_FAILURE;
3737 goto done;
3738 }
3739
3740 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3741
3742 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3743 goto done;
3744
3745 entry->in_transition = TRUE;
3746
3747 /*
3748 * This entry might get split once we unlock the map.
3749 * In vm_fault_wire(), we need the current range as
3750 * defined by this entry. In order for this to work
3751 * along with a simultaneous clip operation, we make a
3752 * temporary copy of this entry and use that for the
3753 * wiring. Note that the underlying objects do not
3754 * change during a clip.
3755 */
3756 tmp_entry = *entry;
3757
3758 /*
3759 * The in_transition state guarentees that the entry
3760 * (or entries for this range, if split occured) will be
3761 * there when the map lock is acquired for the second time.
3762 */
3763 vm_map_unlock(map);
3764
3765 if (!user_wire && cur_thread != THREAD_NULL)
3766 interruptible_state = thread_interrupt_level(THREAD_UNINT);
3767 else
3768 interruptible_state = THREAD_UNINT;
3769
3770 if(map_pmap)
3771 rc = vm_fault_wire(map,
3772 &tmp_entry, map_pmap, pmap_addr);
3773 else
3774 rc = vm_fault_wire(map,
3775 &tmp_entry, map->pmap,
3776 tmp_entry.vme_start);
3777
3778 if (!user_wire && cur_thread != THREAD_NULL)
3779 thread_interrupt_level(interruptible_state);
3780
3781 vm_map_lock(map);
3782
3783 if (last_timestamp+1 != map->timestamp) {
3784 /*
3785 * Find the entry again. It could have been clipped
3786 * after we unlocked the map.
3787 */
3788 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3789 &first_entry))
3790 panic("vm_map_wire: re-lookup failed");
3791
3792 entry = first_entry;
3793 }
3794
3795 last_timestamp = map->timestamp;
3796
3797 while ((entry != vm_map_to_entry(map)) &&
3798 (entry->vme_start < tmp_entry.vme_end)) {
3799 assert(entry->in_transition);
3800 entry->in_transition = FALSE;
3801 if (entry->needs_wakeup) {
3802 entry->needs_wakeup = FALSE;
3803 need_wakeup = TRUE;
3804 }
3805 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3806 subtract_wire_counts(map, entry, user_wire);
3807 }
3808 entry = entry->vme_next;
3809 }
3810
3811 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3812 goto done;
3813 }
3814
3815 s = entry->vme_start;
3816 } /* end while loop through map entries */
3817
3818 done:
3819 if (rc == KERN_SUCCESS) {
3820 /* repair any damage we may have made to the VM map */
3821 vm_map_simplify_range(map, start, end);
3822 }
3823
3824 vm_map_unlock(map);
3825
3826 /*
3827 * wake up anybody waiting on entries we wired.
3828 */
3829 if (need_wakeup)
3830 vm_map_entry_wakeup(map);
3831
3832 if (rc != KERN_SUCCESS) {
3833 /* undo what has been wired so far */
3834 vm_map_unwire(map, start, s, user_wire);
3835 }
3836
3837 return rc;
3838
3839 }
3840
3841 kern_return_t
3842 vm_map_wire(
3843 register vm_map_t map,
3844 register vm_map_offset_t start,
3845 register vm_map_offset_t end,
3846 register vm_prot_t access_type,
3847 boolean_t user_wire)
3848 {
3849
3850 kern_return_t kret;
3851
3852 #ifdef ppc
3853 /*
3854 * the calls to mapping_prealloc and mapping_relpre
3855 * (along with the VM_MAP_RANGE_CHECK to insure a
3856 * resonable range was passed in) are
3857 * currently necessary because
3858 * we haven't enabled kernel pre-emption
3859 * and/or the pmap_enter cannot purge and re-use
3860 * existing mappings
3861 */
3862 VM_MAP_RANGE_CHECK(map, start, end);
3863 mapping_prealloc(end - start);
3864 #endif
3865 kret = vm_map_wire_nested(map, start, end, access_type,
3866 user_wire, (pmap_t)NULL, 0);
3867 #ifdef ppc
3868 mapping_relpre();
3869 #endif
3870 return kret;
3871 }
3872
3873 /*
3874 * vm_map_unwire:
3875 *
3876 * Sets the pageability of the specified address range in the target
3877 * as pageable. Regions specified must have been wired previously.
3878 *
3879 * The map must not be locked, but a reference must remain to the map
3880 * throughout the call.
3881 *
3882 * Kernel will panic on failures. User unwire ignores holes and
3883 * unwired and intransition entries to avoid losing memory by leaving
3884 * it unwired.
3885 */
3886 static kern_return_t
3887 vm_map_unwire_nested(
3888 register vm_map_t map,
3889 register vm_map_offset_t start,
3890 register vm_map_offset_t end,
3891 boolean_t user_wire,
3892 pmap_t map_pmap,
3893 vm_map_offset_t pmap_addr)
3894 {
3895 register vm_map_entry_t entry;
3896 struct vm_map_entry *first_entry, tmp_entry;
3897 boolean_t need_wakeup;
3898 boolean_t main_map = FALSE;
3899 unsigned int last_timestamp;
3900
3901 vm_map_lock(map);
3902 if(map_pmap == NULL)
3903 main_map = TRUE;
3904 last_timestamp = map->timestamp;
3905
3906 VM_MAP_RANGE_CHECK(map, start, end);
3907 assert(page_aligned(start));
3908 assert(page_aligned(end));
3909
3910 if (start == end) {
3911 /* We unwired what the caller asked for: zero pages */
3912 vm_map_unlock(map);
3913 return KERN_SUCCESS;
3914 }
3915
3916 if (vm_map_lookup_entry(map, start, &first_entry)) {
3917 entry = first_entry;
3918 /*
3919 * vm_map_clip_start will be done later.
3920 * We don't want to unnest any nested sub maps here !
3921 */
3922 }
3923 else {
3924 if (!user_wire) {
3925 panic("vm_map_unwire: start not found");
3926 }
3927 /* Start address is not in map. */
3928 vm_map_unlock(map);
3929 return(KERN_INVALID_ADDRESS);
3930 }
3931
3932 need_wakeup = FALSE;
3933 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3934 if (entry->in_transition) {
3935 /*
3936 * 1)
3937 * Another thread is wiring down this entry. Note
3938 * that if it is not for the other thread we would
3939 * be unwiring an unwired entry. This is not
3940 * permitted. If we wait, we will be unwiring memory
3941 * we did not wire.
3942 *
3943 * 2)
3944 * Another thread is unwiring this entry. We did not
3945 * have a reference to it, because if we did, this
3946 * entry will not be getting unwired now.
3947 */
3948 if (!user_wire) {
3949 /*
3950 * XXX FBDP
3951 * This could happen: there could be some
3952 * overlapping vslock/vsunlock operations
3953 * going on.
3954 * We should probably just wait and retry,
3955 * but then we have to be careful that this
3956 * entry could get "simplified" after
3957 * "in_transition" gets unset and before
3958 * we re-lookup the entry, so we would
3959 * have to re-clip the entry to avoid
3960 * re-unwiring what we have already unwired...
3961 * See vm_map_wire_nested().
3962 *
3963 * Or we could just ignore "in_transition"
3964 * here and proceed to decement the wired
3965 * count(s) on this entry. That should be fine
3966 * as long as "wired_count" doesn't drop all
3967 * the way to 0 (and we should panic if THAT
3968 * happens).
3969 */
3970 panic("vm_map_unwire: in_transition entry");
3971 }
3972
3973 entry = entry->vme_next;
3974 continue;
3975 }
3976
3977 if (entry->is_sub_map) {
3978 vm_map_offset_t sub_start;
3979 vm_map_offset_t sub_end;
3980 vm_map_offset_t local_end;
3981 pmap_t pmap;
3982
3983 vm_map_clip_start(map, entry, start);
3984 vm_map_clip_end(map, entry, end);
3985
3986 sub_start = entry->offset;
3987 sub_end = entry->vme_end - entry->vme_start;
3988 sub_end += entry->offset;
3989 local_end = entry->vme_end;
3990 if(map_pmap == NULL) {
3991 if(entry->use_pmap) {
3992 pmap = entry->object.sub_map->pmap;
3993 pmap_addr = sub_start;
3994 } else {
3995 pmap = map->pmap;
3996 pmap_addr = start;
3997 }
3998 if (entry->wired_count == 0 ||
3999 (user_wire && entry->user_wired_count == 0)) {
4000 if (!user_wire)
4001 panic("vm_map_unwire: entry is unwired");
4002 entry = entry->vme_next;
4003 continue;
4004 }
4005
4006 /*
4007 * Check for holes
4008 * Holes: Next entry should be contiguous unless
4009 * this is the end of the region.
4010 */
4011 if (((entry->vme_end < end) &&
4012 ((entry->vme_next == vm_map_to_entry(map)) ||
4013 (entry->vme_next->vme_start
4014 > entry->vme_end)))) {
4015 if (!user_wire)
4016 panic("vm_map_unwire: non-contiguous region");
4017 /*
4018 entry = entry->vme_next;
4019 continue;
4020 */
4021 }
4022
4023 subtract_wire_counts(map, entry, user_wire);
4024
4025 if (entry->wired_count != 0) {
4026 entry = entry->vme_next;
4027 continue;
4028 }
4029
4030 entry->in_transition = TRUE;
4031 tmp_entry = *entry;/* see comment in vm_map_wire() */
4032
4033 /*
4034 * We can unlock the map now. The in_transition state
4035 * guarantees existance of the entry.
4036 */
4037 vm_map_unlock(map);
4038 vm_map_unwire_nested(entry->object.sub_map,
4039 sub_start, sub_end, user_wire, pmap, pmap_addr);
4040 vm_map_lock(map);
4041
4042 if (last_timestamp+1 != map->timestamp) {
4043 /*
4044 * Find the entry again. It could have been
4045 * clipped or deleted after we unlocked the map.
4046 */
4047 if (!vm_map_lookup_entry(map,
4048 tmp_entry.vme_start,
4049 &first_entry)) {
4050 if (!user_wire)
4051 panic("vm_map_unwire: re-lookup failed");
4052 entry = first_entry->vme_next;
4053 } else
4054 entry = first_entry;
4055 }
4056 last_timestamp = map->timestamp;
4057
4058 /*
4059 * clear transition bit for all constituent entries
4060 * that were in the original entry (saved in
4061 * tmp_entry). Also check for waiters.
4062 */
4063 while ((entry != vm_map_to_entry(map)) &&
4064 (entry->vme_start < tmp_entry.vme_end)) {
4065 assert(entry->in_transition);
4066 entry->in_transition = FALSE;
4067 if (entry->needs_wakeup) {
4068 entry->needs_wakeup = FALSE;
4069 need_wakeup = TRUE;
4070 }
4071 entry = entry->vme_next;
4072 }
4073 continue;
4074 } else {
4075 vm_map_unlock(map);
4076 vm_map_unwire_nested(entry->object.sub_map,
4077 sub_start, sub_end, user_wire, map_pmap,
4078 pmap_addr);
4079 vm_map_lock(map);
4080
4081 if (last_timestamp+1 != map->timestamp) {
4082 /*
4083 * Find the entry again. It could have been
4084 * clipped or deleted after we unlocked the map.
4085 */
4086 if (!vm_map_lookup_entry(map,
4087 tmp_entry.vme_start,
4088 &first_entry)) {
4089 if (!user_wire)
4090 panic("vm_map_unwire: re-lookup failed");
4091 entry = first_entry->vme_next;
4092 } else
4093 entry = first_entry;
4094 }
4095 last_timestamp = map->timestamp;
4096 }
4097 }
4098
4099
4100 if ((entry->wired_count == 0) ||
4101 (user_wire && entry->user_wired_count == 0)) {
4102 if (!user_wire)
4103 panic("vm_map_unwire: entry is unwired");
4104
4105 entry = entry->vme_next;
4106 continue;
4107 }
4108
4109 assert(entry->wired_count > 0 &&
4110 (!user_wire || entry->user_wired_count > 0));
4111
4112 vm_map_clip_start(map, entry, start);
4113 vm_map_clip_end(map, entry, end);
4114
4115 /*
4116 * Check for holes
4117 * Holes: Next entry should be contiguous unless
4118 * this is the end of the region.
4119 */
4120 if (((entry->vme_end < end) &&
4121 ((entry->vme_next == vm_map_to_entry(map)) ||
4122 (entry->vme_next->vme_start > entry->vme_end)))) {
4123
4124 if (!user_wire)
4125 panic("vm_map_unwire: non-contiguous region");
4126 entry = entry->vme_next;
4127 continue;
4128 }
4129
4130 subtract_wire_counts(map, entry, user_wire);
4131
4132 if (entry->wired_count != 0) {
4133 entry = entry->vme_next;
4134 continue;
4135 }
4136
4137 entry->in_transition = TRUE;
4138 tmp_entry = *entry; /* see comment in vm_map_wire() */
4139
4140 /*
4141 * We can unlock the map now. The in_transition state
4142 * guarantees existance of the entry.
4143 */
4144 vm_map_unlock(map);
4145 if(map_pmap) {
4146 vm_fault_unwire(map,
4147 &tmp_entry, FALSE, map_pmap, pmap_addr);
4148 } else {
4149 vm_fault_unwire(map,
4150 &tmp_entry, FALSE, map->pmap,
4151 tmp_entry.vme_start);
4152 }
4153 vm_map_lock(map);
4154
4155 if (last_timestamp+1 != map->timestamp) {
4156 /*
4157 * Find the entry again. It could have been clipped
4158 * or deleted after we unlocked the map.
4159 */
4160 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4161 &first_entry)) {
4162 if (!user_wire)
4163 panic("vm_map_unwire: re-lookup failed");
4164 entry = first_entry->vme_next;
4165 } else
4166 entry = first_entry;
4167 }
4168 last_timestamp = map->timestamp;
4169
4170 /*
4171 * clear transition bit for all constituent entries that
4172 * were in the original entry (saved in tmp_entry). Also
4173 * check for waiters.
4174 */
4175 while ((entry != vm_map_to_entry(map)) &&
4176 (entry->vme_start < tmp_entry.vme_end)) {
4177 assert(entry->in_transition);
4178 entry->in_transition = FALSE;
4179 if (entry->needs_wakeup) {
4180 entry->needs_wakeup = FALSE;
4181 need_wakeup = TRUE;
4182 }
4183 entry = entry->vme_next;
4184 }
4185 }
4186
4187 /*
4188 * We might have fragmented the address space when we wired this
4189 * range of addresses. Attempt to re-coalesce these VM map entries
4190 * with their neighbors now that they're no longer wired.
4191 * Under some circumstances, address space fragmentation can
4192 * prevent VM object shadow chain collapsing, which can cause
4193 * swap space leaks.
4194 */
4195 vm_map_simplify_range(map, start, end);
4196
4197 vm_map_unlock(map);
4198 /*
4199 * wake up anybody waiting on entries that we have unwired.
4200 */
4201 if (need_wakeup)
4202 vm_map_entry_wakeup(map);
4203 return(KERN_SUCCESS);
4204
4205 }
4206
4207 kern_return_t
4208 vm_map_unwire(
4209 register vm_map_t map,
4210 register vm_map_offset_t start,
4211 register vm_map_offset_t end,
4212 boolean_t user_wire)
4213 {
4214 return vm_map_unwire_nested(map, start, end,
4215 user_wire, (pmap_t)NULL, 0);
4216 }
4217
4218
4219 /*
4220 * vm_map_entry_delete: [ internal use only ]
4221 *
4222 * Deallocate the given entry from the target map.
4223 */
4224 static void
4225 vm_map_entry_delete(
4226 register vm_map_t map,
4227 register vm_map_entry_t entry)
4228 {
4229 register vm_map_offset_t s, e;
4230 register vm_object_t object;
4231 register vm_map_t submap;
4232
4233 s = entry->vme_start;
4234 e = entry->vme_end;
4235 assert(page_aligned(s));
4236 assert(page_aligned(e));
4237 assert(entry->wired_count == 0);
4238 assert(entry->user_wired_count == 0);
4239
4240 if (entry->is_sub_map) {
4241 object = NULL;
4242 submap = entry->object.sub_map;
4243 } else {
4244 submap = NULL;
4245 object = entry->object.vm_object;
4246 }
4247
4248 vm_map_entry_unlink(map, entry);
4249 map->size -= e - s;
4250
4251 vm_map_entry_dispose(map, entry);
4252
4253 vm_map_unlock(map);
4254 /*
4255 * Deallocate the object only after removing all
4256 * pmap entries pointing to its pages.
4257 */
4258 if (submap)
4259 vm_map_deallocate(submap);
4260 else
4261 vm_object_deallocate(object);
4262
4263 }
4264
4265 void
4266 vm_map_submap_pmap_clean(
4267 vm_map_t map,
4268 vm_map_offset_t start,
4269 vm_map_offset_t end,
4270 vm_map_t sub_map,
4271 vm_map_offset_t offset)
4272 {
4273 vm_map_offset_t submap_start;
4274 vm_map_offset_t submap_end;
4275 vm_map_size_t remove_size;
4276 vm_map_entry_t entry;
4277
4278 submap_end = offset + (end - start);
4279 submap_start = offset;
4280 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4281
4282 remove_size = (entry->vme_end - entry->vme_start);
4283 if(offset > entry->vme_start)
4284 remove_size -= offset - entry->vme_start;
4285
4286
4287 if(submap_end < entry->vme_end) {
4288 remove_size -=
4289 entry->vme_end - submap_end;
4290 }
4291 if(entry->is_sub_map) {
4292 vm_map_submap_pmap_clean(
4293 sub_map,
4294 start,
4295 start + remove_size,
4296 entry->object.sub_map,
4297 entry->offset);
4298 } else {
4299
4300 if((map->mapped) && (map->ref_count)
4301 && (entry->object.vm_object != NULL)) {
4302 vm_object_pmap_protect(
4303 entry->object.vm_object,
4304 entry->offset,
4305 remove_size,
4306 PMAP_NULL,
4307 entry->vme_start,
4308 VM_PROT_NONE);
4309 } else {
4310 pmap_remove(map->pmap,
4311 (addr64_t)start,
4312 (addr64_t)(start + remove_size));
4313 }
4314 }
4315 }
4316
4317 entry = entry->vme_next;
4318
4319 while((entry != vm_map_to_entry(sub_map))
4320 && (entry->vme_start < submap_end)) {
4321 remove_size = (entry->vme_end - entry->vme_start);
4322 if(submap_end < entry->vme_end) {
4323 remove_size -= entry->vme_end - submap_end;
4324 }
4325 if(entry->is_sub_map) {
4326 vm_map_submap_pmap_clean(
4327 sub_map,
4328 (start + entry->vme_start) - offset,
4329 ((start + entry->vme_start) - offset) + remove_size,
4330 entry->object.sub_map,
4331 entry->offset);
4332 } else {
4333 if((map->mapped) && (map->ref_count)
4334 && (entry->object.vm_object != NULL)) {
4335 vm_object_pmap_protect(
4336 entry->object.vm_object,
4337 entry->offset,
4338 remove_size,
4339 PMAP_NULL,
4340 entry->vme_start,
4341 VM_PROT_NONE);
4342 } else {
4343 pmap_remove(map->pmap,
4344 (addr64_t)((start + entry->vme_start)
4345 - offset),
4346 (addr64_t)(((start + entry->vme_start)
4347 - offset) + remove_size));
4348 }
4349 }
4350 entry = entry->vme_next;
4351 }
4352 return;
4353 }
4354
4355 /*
4356 * vm_map_delete: [ internal use only ]
4357 *
4358 * Deallocates the given address range from the target map.
4359 * Removes all user wirings. Unwires one kernel wiring if
4360 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4361 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4362 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4363 *
4364 * This routine is called with map locked and leaves map locked.
4365 */
4366 static kern_return_t
4367 vm_map_delete(
4368 vm_map_t map,
4369 vm_map_offset_t start,
4370 vm_map_offset_t end,
4371 int flags,
4372 vm_map_t zap_map)
4373 {
4374 vm_map_entry_t entry, next;
4375 struct vm_map_entry *first_entry, tmp_entry;
4376 register vm_map_offset_t s;
4377 register vm_object_t object;
4378 boolean_t need_wakeup;
4379 unsigned int last_timestamp = ~0; /* unlikely value */
4380 int interruptible;
4381
4382 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4383 THREAD_ABORTSAFE : THREAD_UNINT;
4384
4385 /*
4386 * All our DMA I/O operations in IOKit are currently done by
4387 * wiring through the map entries of the task requesting the I/O.
4388 * Because of this, we must always wait for kernel wirings
4389 * to go away on the entries before deleting them.
4390 *
4391 * Any caller who wants to actually remove a kernel wiring
4392 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4393 * properly remove one wiring instead of blasting through
4394 * them all.
4395 */
4396 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4397
4398 /*
4399 * Find the start of the region, and clip it
4400 */
4401 if (vm_map_lookup_entry(map, start, &first_entry)) {
4402 entry = first_entry;
4403 if (start == entry->vme_start) {
4404 /*
4405 * No need to clip. We don't want to cause
4406 * any unnecessary unnesting in this case...
4407 */
4408 } else {
4409 vm_map_clip_start(map, entry, start);
4410 }
4411
4412 /*
4413 * Fix the lookup hint now, rather than each
4414 * time through the loop.
4415 */
4416 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4417 } else {
4418 entry = first_entry->vme_next;
4419 }
4420
4421 need_wakeup = FALSE;
4422 /*
4423 * Step through all entries in this region
4424 */
4425 s = entry->vme_start;
4426 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4427 /*
4428 * At this point, we have deleted all the memory entries
4429 * between "start" and "s". We still need to delete
4430 * all memory entries between "s" and "end".
4431 * While we were blocked and the map was unlocked, some
4432 * new memory entries could have been re-allocated between
4433 * "start" and "s" and we don't want to mess with those.
4434 * Some of those entries could even have been re-assembled
4435 * with an entry after "s" (in vm_map_simplify_entry()), so
4436 * we may have to vm_map_clip_start() again.
4437 */
4438
4439 if (entry->vme_start >= s) {
4440 /*
4441 * This entry starts on or after "s"
4442 * so no need to clip its start.
4443 */
4444 } else {
4445 /*
4446 * This entry has been re-assembled by a
4447 * vm_map_simplify_entry(). We need to
4448 * re-clip its start.
4449 */
4450 vm_map_clip_start(map, entry, s);
4451 }
4452 if (entry->vme_end <= end) {
4453 /*
4454 * This entry is going away completely, so no need
4455 * to clip and possibly cause an unnecessary unnesting.
4456 */
4457 } else {
4458 vm_map_clip_end(map, entry, end);
4459 }
4460 if (entry->in_transition) {
4461 wait_result_t wait_result;
4462
4463 /*
4464 * Another thread is wiring/unwiring this entry.
4465 * Let the other thread know we are waiting.
4466 */
4467 assert(s == entry->vme_start);
4468 entry->needs_wakeup = TRUE;
4469
4470 /*
4471 * wake up anybody waiting on entries that we have
4472 * already unwired/deleted.
4473 */
4474 if (need_wakeup) {
4475 vm_map_entry_wakeup(map);
4476 need_wakeup = FALSE;
4477 }
4478
4479 wait_result = vm_map_entry_wait(map, interruptible);
4480
4481 if (interruptible &&
4482 wait_result == THREAD_INTERRUPTED) {
4483 /*
4484 * We do not clear the needs_wakeup flag,
4485 * since we cannot tell if we were the only one.
4486 */
4487 vm_map_unlock(map);
4488 return KERN_ABORTED;
4489 }
4490
4491 /*
4492 * The entry could have been clipped or it
4493 * may not exist anymore. Look it up again.
4494 */
4495 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4496 assert((map != kernel_map) &&
4497 (!entry->is_sub_map));
4498 /*
4499 * User: use the next entry
4500 */
4501 entry = first_entry->vme_next;
4502 s = entry->vme_start;
4503 } else {
4504 entry = first_entry;
4505 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4506 }
4507 last_timestamp = map->timestamp;
4508 continue;
4509 } /* end in_transition */
4510
4511 if (entry->wired_count) {
4512 boolean_t user_wire;
4513
4514 user_wire = entry->user_wired_count > 0;
4515
4516 /*
4517 * Remove a kernel wiring if requested or if
4518 * there are user wirings.
4519 */
4520 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
4521 (entry->user_wired_count > 0))
4522 entry->wired_count--;
4523
4524 /* remove all user wire references */
4525 entry->user_wired_count = 0;
4526
4527 if (entry->wired_count != 0) {
4528 assert(map != kernel_map);
4529 /*
4530 * Cannot continue. Typical case is when
4531 * a user thread has physical io pending on
4532 * on this page. Either wait for the
4533 * kernel wiring to go away or return an
4534 * error.
4535 */
4536 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4537 wait_result_t wait_result;
4538
4539 assert(s == entry->vme_start);
4540 entry->needs_wakeup = TRUE;
4541 wait_result = vm_map_entry_wait(map,
4542 interruptible);
4543
4544 if (interruptible &&
4545 wait_result == THREAD_INTERRUPTED) {
4546 /*
4547 * We do not clear the
4548 * needs_wakeup flag, since we
4549 * cannot tell if we were the
4550 * only one.
4551 */
4552 vm_map_unlock(map);
4553 return KERN_ABORTED;
4554 }
4555
4556 /*
4557 * The entry could have been clipped or
4558 * it may not exist anymore. Look it
4559 * up again.
4560 */
4561 if (!vm_map_lookup_entry(map, s,
4562 &first_entry)) {
4563 assert(map != kernel_map);
4564 /*
4565 * User: use the next entry
4566 */
4567 entry = first_entry->vme_next;
4568 s = entry->vme_start;
4569 } else {
4570 entry = first_entry;
4571 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4572 }
4573 last_timestamp = map->timestamp;
4574 continue;
4575 }
4576 else {
4577 return KERN_FAILURE;
4578 }
4579 }
4580
4581 entry->in_transition = TRUE;
4582 /*
4583 * copy current entry. see comment in vm_map_wire()
4584 */
4585 tmp_entry = *entry;
4586 assert(s == entry->vme_start);
4587
4588 /*
4589 * We can unlock the map now. The in_transition
4590 * state guarentees existance of the entry.
4591 */
4592 vm_map_unlock(map);
4593
4594 if (tmp_entry.is_sub_map) {
4595 vm_map_t sub_map;
4596 vm_map_offset_t sub_start, sub_end;
4597 pmap_t pmap;
4598 vm_map_offset_t pmap_addr;
4599
4600
4601 sub_map = tmp_entry.object.sub_map;
4602 sub_start = tmp_entry.offset;
4603 sub_end = sub_start + (tmp_entry.vme_end -
4604 tmp_entry.vme_start);
4605 if (tmp_entry.use_pmap) {
4606 pmap = sub_map->pmap;
4607 pmap_addr = tmp_entry.vme_start;
4608 } else {
4609 pmap = map->pmap;
4610 pmap_addr = tmp_entry.vme_start;
4611 }
4612 (void) vm_map_unwire_nested(sub_map,
4613 sub_start, sub_end,
4614 user_wire,
4615 pmap, pmap_addr);
4616 } else {
4617
4618 vm_fault_unwire(map, &tmp_entry,
4619 tmp_entry.object.vm_object == kernel_object,
4620 map->pmap, tmp_entry.vme_start);
4621 }
4622
4623 vm_map_lock(map);
4624
4625 if (last_timestamp+1 != map->timestamp) {
4626 /*
4627 * Find the entry again. It could have
4628 * been clipped after we unlocked the map.
4629 */
4630 if (!vm_map_lookup_entry(map, s, &first_entry)){
4631 assert((map != kernel_map) &&
4632 (!entry->is_sub_map));
4633 first_entry = first_entry->vme_next;
4634 s = first_entry->vme_start;
4635 } else {
4636 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4637 }
4638 } else {
4639 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4640 first_entry = entry;
4641 }
4642
4643 last_timestamp = map->timestamp;
4644
4645 entry = first_entry;
4646 while ((entry != vm_map_to_entry(map)) &&
4647 (entry->vme_start < tmp_entry.vme_end)) {
4648 assert(entry->in_transition);
4649 entry->in_transition = FALSE;
4650 if (entry->needs_wakeup) {
4651 entry->needs_wakeup = FALSE;
4652 need_wakeup = TRUE;
4653 }
4654 entry = entry->vme_next;
4655 }
4656 /*
4657 * We have unwired the entry(s). Go back and
4658 * delete them.
4659 */
4660 entry = first_entry;
4661 continue;
4662 }
4663
4664 /* entry is unwired */
4665 assert(entry->wired_count == 0);
4666 assert(entry->user_wired_count == 0);
4667
4668 assert(s == entry->vme_start);
4669
4670 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
4671 /*
4672 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
4673 * vm_map_delete(), some map entries might have been
4674 * transferred to a "zap_map", which doesn't have a
4675 * pmap. The original pmap has already been flushed
4676 * in the vm_map_delete() call targeting the original
4677 * map, but when we get to destroying the "zap_map",
4678 * we don't have any pmap to flush, so let's just skip
4679 * all this.
4680 */
4681 } else if (entry->is_sub_map) {
4682 if (entry->use_pmap) {
4683 #ifndef NO_NESTED_PMAP
4684 pmap_unnest(map->pmap,
4685 (addr64_t)entry->vme_start,
4686 entry->vme_end - entry->vme_start);
4687 #endif /* NO_NESTED_PMAP */
4688 if ((map->mapped) && (map->ref_count)) {
4689 /* clean up parent map/maps */
4690 vm_map_submap_pmap_clean(
4691 map, entry->vme_start,
4692 entry->vme_end,
4693 entry->object.sub_map,
4694 entry->offset);
4695 }
4696 } else {
4697 vm_map_submap_pmap_clean(
4698 map, entry->vme_start, entry->vme_end,
4699 entry->object.sub_map,
4700 entry->offset);
4701 }
4702 } else if (entry->object.vm_object != kernel_object) {
4703 object = entry->object.vm_object;
4704 if((map->mapped) && (map->ref_count)) {
4705 vm_object_pmap_protect(
4706 object, entry->offset,
4707 entry->vme_end - entry->vme_start,
4708 PMAP_NULL,
4709 entry->vme_start,
4710 VM_PROT_NONE);
4711 } else {
4712 pmap_remove(map->pmap,
4713 (addr64_t)entry->vme_start,
4714 (addr64_t)entry->vme_end);
4715 }
4716 }
4717
4718 /*
4719 * All pmap mappings for this map entry must have been
4720 * cleared by now.
4721 */
4722 assert(vm_map_pmap_is_empty(map,
4723 entry->vme_start,
4724 entry->vme_end));
4725
4726 next = entry->vme_next;
4727 s = next->vme_start;
4728 last_timestamp = map->timestamp;
4729
4730 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
4731 zap_map != VM_MAP_NULL) {
4732 vm_map_size_t entry_size;
4733 /*
4734 * The caller wants to save the affected VM map entries
4735 * into the "zap_map". The caller will take care of
4736 * these entries.
4737 */
4738 /* unlink the entry from "map" ... */
4739 vm_map_entry_unlink(map, entry);
4740 /* ... and add it to the end of the "zap_map" */
4741 vm_map_entry_link(zap_map,
4742 vm_map_last_entry(zap_map),
4743 entry);
4744 entry_size = entry->vme_end - entry->vme_start;
4745 map->size -= entry_size;
4746 zap_map->size += entry_size;
4747 /* we didn't unlock the map, so no timestamp increase */
4748 last_timestamp--;
4749 } else {
4750 vm_map_entry_delete(map, entry);
4751 /* vm_map_entry_delete unlocks the map */
4752 vm_map_lock(map);
4753 }
4754
4755 entry = next;
4756
4757 if(entry == vm_map_to_entry(map)) {
4758 break;
4759 }
4760 if (last_timestamp+1 != map->timestamp) {
4761 /*
4762 * we are responsible for deleting everything
4763 * from the give space, if someone has interfered
4764 * we pick up where we left off, back fills should
4765 * be all right for anyone except map_delete and
4766 * we have to assume that the task has been fully
4767 * disabled before we get here
4768 */
4769 if (!vm_map_lookup_entry(map, s, &entry)){
4770 entry = entry->vme_next;
4771 s = entry->vme_start;
4772 } else {
4773 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4774 }
4775 /*
4776 * others can not only allocate behind us, we can
4777 * also see coalesce while we don't have the map lock
4778 */
4779 if(entry == vm_map_to_entry(map)) {
4780 break;
4781 }
4782 }
4783 last_timestamp = map->timestamp;
4784 }
4785
4786 if (map->wait_for_space)
4787 thread_wakeup((event_t) map);
4788 /*
4789 * wake up anybody waiting on entries that we have already deleted.
4790 */
4791 if (need_wakeup)
4792 vm_map_entry_wakeup(map);
4793
4794 return KERN_SUCCESS;
4795 }
4796
4797 /*
4798 * vm_map_remove:
4799 *
4800 * Remove the given address range from the target map.
4801 * This is the exported form of vm_map_delete.
4802 */
4803 kern_return_t
4804 vm_map_remove(
4805 register vm_map_t map,
4806 register vm_map_offset_t start,
4807 register vm_map_offset_t end,
4808 register boolean_t flags)
4809 {
4810 register kern_return_t result;
4811
4812 vm_map_lock(map);
4813 VM_MAP_RANGE_CHECK(map, start, end);
4814 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4815 vm_map_unlock(map);
4816
4817 return(result);
4818 }
4819
4820
4821 /*
4822 * Routine: vm_map_copy_discard
4823 *
4824 * Description:
4825 * Dispose of a map copy object (returned by
4826 * vm_map_copyin).
4827 */
4828 void
4829 vm_map_copy_discard(
4830 vm_map_copy_t copy)
4831 {
4832 TR_DECL("vm_map_copy_discard");
4833
4834 /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4835
4836 if (copy == VM_MAP_COPY_NULL)
4837 return;
4838
4839 switch (copy->type) {
4840 case VM_MAP_COPY_ENTRY_LIST:
4841 while (vm_map_copy_first_entry(copy) !=
4842 vm_map_copy_to_entry(copy)) {
4843 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4844
4845 vm_map_copy_entry_unlink(copy, entry);
4846 vm_object_deallocate(entry->object.vm_object);
4847 vm_map_copy_entry_dispose(copy, entry);
4848 }
4849 break;
4850 case VM_MAP_COPY_OBJECT:
4851 vm_object_deallocate(copy->cpy_object);
4852 break;
4853 case VM_MAP_COPY_KERNEL_BUFFER:
4854
4855 /*
4856 * The vm_map_copy_t and possibly the data buffer were
4857 * allocated by a single call to kalloc(), i.e. the
4858 * vm_map_copy_t was not allocated out of the zone.
4859 */
4860 kfree(copy, copy->cpy_kalloc_size);
4861 return;
4862 }
4863 zfree(vm_map_copy_zone, copy);
4864 }
4865
4866 /*
4867 * Routine: vm_map_copy_copy
4868 *
4869 * Description:
4870 * Move the information in a map copy object to
4871 * a new map copy object, leaving the old one
4872 * empty.
4873 *
4874 * This is used by kernel routines that need
4875 * to look at out-of-line data (in copyin form)
4876 * before deciding whether to return SUCCESS.
4877 * If the routine returns FAILURE, the original
4878 * copy object will be deallocated; therefore,
4879 * these routines must make a copy of the copy
4880 * object and leave the original empty so that
4881 * deallocation will not fail.
4882 */
4883 vm_map_copy_t
4884 vm_map_copy_copy(
4885 vm_map_copy_t copy)
4886 {
4887 vm_map_copy_t new_copy;
4888
4889 if (copy == VM_MAP_COPY_NULL)
4890 return VM_MAP_COPY_NULL;
4891
4892 /*
4893 * Allocate a new copy object, and copy the information
4894 * from the old one into it.
4895 */
4896
4897 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4898 *new_copy = *copy;
4899
4900 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
4901 /*
4902 * The links in the entry chain must be
4903 * changed to point to the new copy object.
4904 */
4905 vm_map_copy_first_entry(copy)->vme_prev
4906 = vm_map_copy_to_entry(new_copy);
4907 vm_map_copy_last_entry(copy)->vme_next
4908 = vm_map_copy_to_entry(new_copy);
4909 }
4910
4911 /*
4912 * Change the old copy object into one that contains
4913 * nothing to be deallocated.
4914 */
4915 copy->type = VM_MAP_COPY_OBJECT;
4916 copy->cpy_object = VM_OBJECT_NULL;
4917
4918 /*
4919 * Return the new object.
4920 */
4921 return new_copy;
4922 }
4923
4924 static kern_return_t
4925 vm_map_overwrite_submap_recurse(
4926 vm_map_t dst_map,
4927 vm_map_offset_t dst_addr,
4928 vm_map_size_t dst_size)
4929 {
4930 vm_map_offset_t dst_end;
4931 vm_map_entry_t tmp_entry;
4932 vm_map_entry_t entry;
4933 kern_return_t result;
4934 boolean_t encountered_sub_map = FALSE;
4935
4936
4937
4938 /*
4939 * Verify that the destination is all writeable
4940 * initially. We have to trunc the destination
4941 * address and round the copy size or we'll end up
4942 * splitting entries in strange ways.
4943 */
4944
4945 dst_end = vm_map_round_page(dst_addr + dst_size);
4946 vm_map_lock(dst_map);
4947
4948 start_pass_1:
4949 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
4950 vm_map_unlock(dst_map);
4951 return(KERN_INVALID_ADDRESS);
4952 }
4953
4954 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
4955 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
4956
4957 for (entry = tmp_entry;;) {
4958 vm_map_entry_t next;
4959
4960 next = entry->vme_next;
4961 while(entry->is_sub_map) {
4962 vm_map_offset_t sub_start;
4963 vm_map_offset_t sub_end;
4964 vm_map_offset_t local_end;
4965
4966 if (entry->in_transition) {
4967 /*
4968 * Say that we are waiting, and wait for entry.
4969 */
4970 entry->needs_wakeup = TRUE;
4971 vm_map_entry_wait(dst_map, THREAD_UNINT);
4972
4973 goto start_pass_1;
4974 }
4975
4976 encountered_sub_map = TRUE;
4977 sub_start = entry->offset;
4978
4979 if(entry->vme_end < dst_end)
4980 sub_end = entry->vme_end;
4981 else
4982 sub_end = dst_end;
4983 sub_end -= entry->vme_start;
4984 sub_end += entry->offset;
4985 local_end = entry->vme_end;
4986 vm_map_unlock(dst_map);
4987
4988 result = vm_map_overwrite_submap_recurse(
4989 entry->object.sub_map,
4990 sub_start,
4991 sub_end - sub_start);
4992
4993 if(result != KERN_SUCCESS)
4994 return result;
4995 if (dst_end <= entry->vme_end)
4996 return KERN_SUCCESS;
4997 vm_map_lock(dst_map);
4998 if(!vm_map_lookup_entry(dst_map, local_end,
4999 &tmp_entry)) {
5000 vm_map_unlock(dst_map);
5001 return(KERN_INVALID_ADDRESS);
5002 }
5003 entry = tmp_entry;
5004 next = entry->vme_next;
5005 }
5006
5007 if ( ! (entry->protection & VM_PROT_WRITE)) {
5008 vm_map_unlock(dst_map);
5009 return(KERN_PROTECTION_FAILURE);
5010 }
5011
5012 /*
5013 * If the entry is in transition, we must wait
5014 * for it to exit that state. Anything could happen
5015 * when we unlock the map, so start over.
5016 */
5017 if (entry->in_transition) {
5018
5019 /*
5020 * Say that we are waiting, and wait for entry.
5021 */
5022 entry->needs_wakeup = TRUE;
5023 vm_map_entry_wait(dst_map, THREAD_UNINT);
5024
5025 goto start_pass_1;
5026 }
5027
5028 /*
5029 * our range is contained completely within this map entry
5030 */
5031 if (dst_end <= entry->vme_end) {
5032 vm_map_unlock(dst_map);
5033 return KERN_SUCCESS;
5034 }
5035 /*
5036 * check that range specified is contiguous region
5037 */
5038 if ((next == vm_map_to_entry(dst_map)) ||
5039 (next->vme_start != entry->vme_end)) {
5040 vm_map_unlock(dst_map);
5041 return(KERN_INVALID_ADDRESS);
5042 }
5043
5044 /*
5045 * Check for permanent objects in the destination.
5046 */
5047 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5048 ((!entry->object.vm_object->internal) ||
5049 (entry->object.vm_object->true_share))) {
5050 if(encountered_sub_map) {
5051 vm_map_unlock(dst_map);
5052 return(KERN_FAILURE);
5053 }
5054 }
5055
5056
5057 entry = next;
5058 }/* for */
5059 vm_map_unlock(dst_map);
5060 return(KERN_SUCCESS);
5061 }
5062
5063 /*
5064 * Routine: vm_map_copy_overwrite
5065 *
5066 * Description:
5067 * Copy the memory described by the map copy
5068 * object (copy; returned by vm_map_copyin) onto
5069 * the specified destination region (dst_map, dst_addr).
5070 * The destination must be writeable.
5071 *
5072 * Unlike vm_map_copyout, this routine actually
5073 * writes over previously-mapped memory. If the
5074 * previous mapping was to a permanent (user-supplied)
5075 * memory object, it is preserved.
5076 *
5077 * The attributes (protection and inheritance) of the
5078 * destination region are preserved.
5079 *
5080 * If successful, consumes the copy object.
5081 * Otherwise, the caller is responsible for it.
5082 *
5083 * Implementation notes:
5084 * To overwrite aligned temporary virtual memory, it is
5085 * sufficient to remove the previous mapping and insert
5086 * the new copy. This replacement is done either on
5087 * the whole region (if no permanent virtual memory
5088 * objects are embedded in the destination region) or
5089 * in individual map entries.
5090 *
5091 * To overwrite permanent virtual memory , it is necessary
5092 * to copy each page, as the external memory management
5093 * interface currently does not provide any optimizations.
5094 *
5095 * Unaligned memory also has to be copied. It is possible
5096 * to use 'vm_trickery' to copy the aligned data. This is
5097 * not done but not hard to implement.
5098 *
5099 * Once a page of permanent memory has been overwritten,
5100 * it is impossible to interrupt this function; otherwise,
5101 * the call would be neither atomic nor location-independent.
5102 * The kernel-state portion of a user thread must be
5103 * interruptible.
5104 *
5105 * It may be expensive to forward all requests that might
5106 * overwrite permanent memory (vm_write, vm_copy) to
5107 * uninterruptible kernel threads. This routine may be
5108 * called by interruptible threads; however, success is
5109 * not guaranteed -- if the request cannot be performed
5110 * atomically and interruptibly, an error indication is
5111 * returned.
5112 */
5113
5114 static kern_return_t
5115 vm_map_copy_overwrite_nested(
5116 vm_map_t dst_map,
5117 vm_map_address_t dst_addr,
5118 vm_map_copy_t copy,
5119 boolean_t interruptible,
5120 pmap_t pmap)
5121 {
5122 vm_map_offset_t dst_end;
5123 vm_map_entry_t tmp_entry;
5124 vm_map_entry_t entry;
5125 kern_return_t kr;
5126 boolean_t aligned = TRUE;
5127 boolean_t contains_permanent_objects = FALSE;
5128 boolean_t encountered_sub_map = FALSE;
5129 vm_map_offset_t base_addr;
5130 vm_map_size_t copy_size;
5131 vm_map_size_t total_size;
5132
5133
5134 /*
5135 * Check for null copy object.
5136 */
5137
5138 if (copy == VM_MAP_COPY_NULL)
5139 return(KERN_SUCCESS);
5140
5141 /*
5142 * Check for special kernel buffer allocated
5143 * by new_ipc_kmsg_copyin.
5144 */
5145
5146 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5147 return(vm_map_copyout_kernel_buffer(
5148 dst_map, &dst_addr,
5149 copy, TRUE));
5150 }
5151
5152 /*
5153 * Only works for entry lists at the moment. Will
5154 * support page lists later.
5155 */
5156
5157 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5158
5159 if (copy->size == 0) {
5160 vm_map_copy_discard(copy);
5161 return(KERN_SUCCESS);
5162 }
5163
5164 /*
5165 * Verify that the destination is all writeable
5166 * initially. We have to trunc the destination
5167 * address and round the copy size or we'll end up
5168 * splitting entries in strange ways.
5169 */
5170
5171 if (!page_aligned(copy->size) ||
5172 !page_aligned (copy->offset) ||
5173 !page_aligned (dst_addr))
5174 {
5175 aligned = FALSE;
5176 dst_end = vm_map_round_page(dst_addr + copy->size);
5177 } else {
5178 dst_end = dst_addr + copy->size;
5179 }
5180
5181 vm_map_lock(dst_map);
5182
5183 /* LP64todo - remove this check when vm_map_commpage64()
5184 * no longer has to stuff in a map_entry for the commpage
5185 * above the map's max_offset.
5186 */
5187 if (dst_addr >= dst_map->max_offset) {
5188 vm_map_unlock(dst_map);
5189 return(KERN_INVALID_ADDRESS);
5190 }
5191
5192 start_pass_1:
5193 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5194 vm_map_unlock(dst_map);
5195 return(KERN_INVALID_ADDRESS);
5196 }
5197 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5198 for (entry = tmp_entry;;) {
5199 vm_map_entry_t next = entry->vme_next;
5200
5201 while(entry->is_sub_map) {
5202 vm_map_offset_t sub_start;
5203 vm_map_offset_t sub_end;
5204 vm_map_offset_t local_end;
5205
5206 if (entry->in_transition) {
5207
5208 /*
5209 * Say that we are waiting, and wait for entry.
5210 */
5211 entry->needs_wakeup = TRUE;
5212 vm_map_entry_wait(dst_map, THREAD_UNINT);
5213
5214 goto start_pass_1;
5215 }
5216
5217 local_end = entry->vme_end;
5218 if (!(entry->needs_copy)) {
5219 /* if needs_copy we are a COW submap */
5220 /* in such a case we just replace so */
5221 /* there is no need for the follow- */
5222 /* ing check. */
5223 encountered_sub_map = TRUE;
5224 sub_start = entry->offset;
5225
5226 if(entry->vme_end < dst_end)
5227 sub_end = entry->vme_end;
5228 else
5229 sub_end = dst_end;
5230 sub_end -= entry->vme_start;
5231 sub_end += entry->offset;
5232 vm_map_unlock(dst_map);
5233
5234 kr = vm_map_overwrite_submap_recurse(
5235 entry->object.sub_map,
5236 sub_start,
5237 sub_end - sub_start);
5238 if(kr != KERN_SUCCESS)
5239 return kr;
5240 vm_map_lock(dst_map);
5241 }
5242
5243 if (dst_end <= entry->vme_end)
5244 goto start_overwrite;
5245 if(!vm_map_lookup_entry(dst_map, local_end,
5246 &entry)) {
5247 vm_map_unlock(dst_map);
5248 return(KERN_INVALID_ADDRESS);
5249 }
5250 next = entry->vme_next;
5251 }
5252
5253 if ( ! (entry->protection & VM_PROT_WRITE)) {
5254 vm_map_unlock(dst_map);
5255 return(KERN_PROTECTION_FAILURE);
5256 }
5257
5258 /*
5259 * If the entry is in transition, we must wait
5260 * for it to exit that state. Anything could happen
5261 * when we unlock the map, so start over.
5262 */
5263 if (entry->in_transition) {
5264
5265 /*
5266 * Say that we are waiting, and wait for entry.
5267 */
5268 entry->needs_wakeup = TRUE;
5269 vm_map_entry_wait(dst_map, THREAD_UNINT);
5270
5271 goto start_pass_1;
5272 }
5273
5274 /*
5275 * our range is contained completely within this map entry
5276 */
5277 if (dst_end <= entry->vme_end)
5278 break;
5279 /*
5280 * check that range specified is contiguous region
5281 */
5282 if ((next == vm_map_to_entry(dst_map)) ||
5283 (next->vme_start != entry->vme_end)) {
5284 vm_map_unlock(dst_map);
5285 return(KERN_INVALID_ADDRESS);
5286 }
5287
5288
5289 /*
5290 * Check for permanent objects in the destination.
5291 */
5292 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5293 ((!entry->object.vm_object->internal) ||
5294 (entry->object.vm_object->true_share))) {
5295 contains_permanent_objects = TRUE;
5296 }
5297
5298 entry = next;
5299 }/* for */
5300
5301 start_overwrite:
5302 /*
5303 * If there are permanent objects in the destination, then
5304 * the copy cannot be interrupted.
5305 */
5306
5307 if (interruptible && contains_permanent_objects) {
5308 vm_map_unlock(dst_map);
5309 return(KERN_FAILURE); /* XXX */
5310 }
5311
5312 /*
5313 *
5314 * Make a second pass, overwriting the data
5315 * At the beginning of each loop iteration,
5316 * the next entry to be overwritten is "tmp_entry"
5317 * (initially, the value returned from the lookup above),
5318 * and the starting address expected in that entry
5319 * is "start".
5320 */
5321
5322 total_size = copy->size;
5323 if(encountered_sub_map) {
5324 copy_size = 0;
5325 /* re-calculate tmp_entry since we've had the map */
5326 /* unlocked */
5327 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5328 vm_map_unlock(dst_map);
5329 return(KERN_INVALID_ADDRESS);
5330 }
5331 } else {
5332 copy_size = copy->size;
5333 }
5334
5335 base_addr = dst_addr;
5336 while(TRUE) {
5337 /* deconstruct the copy object and do in parts */
5338 /* only in sub_map, interruptable case */
5339 vm_map_entry_t copy_entry;
5340 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5341 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5342 int nentries;
5343 int remaining_entries = 0;
5344 int new_offset = 0;
5345
5346 for (entry = tmp_entry; copy_size == 0;) {
5347 vm_map_entry_t next;
5348
5349 next = entry->vme_next;
5350
5351 /* tmp_entry and base address are moved along */
5352 /* each time we encounter a sub-map. Otherwise */
5353 /* entry can outpase tmp_entry, and the copy_size */
5354 /* may reflect the distance between them */
5355 /* if the current entry is found to be in transition */
5356 /* we will start over at the beginning or the last */
5357 /* encounter of a submap as dictated by base_addr */
5358 /* we will zero copy_size accordingly. */
5359 if (entry->in_transition) {
5360 /*
5361 * Say that we are waiting, and wait for entry.
5362 */
5363 entry->needs_wakeup = TRUE;
5364 vm_map_entry_wait(dst_map, THREAD_UNINT);
5365
5366 if(!vm_map_lookup_entry(dst_map, base_addr,
5367 &tmp_entry)) {
5368 vm_map_unlock(dst_map);
5369 return(KERN_INVALID_ADDRESS);
5370 }
5371 copy_size = 0;
5372 entry = tmp_entry;
5373 continue;
5374 }
5375 if(entry->is_sub_map) {
5376 vm_map_offset_t sub_start;
5377 vm_map_offset_t sub_end;
5378 vm_map_offset_t local_end;
5379
5380 if (entry->needs_copy) {
5381 /* if this is a COW submap */
5382 /* just back the range with a */
5383 /* anonymous entry */
5384 if(entry->vme_end < dst_end)
5385 sub_end = entry->vme_end;
5386 else
5387 sub_end = dst_end;
5388 if(entry->vme_start < base_addr)
5389 sub_start = base_addr;
5390 else
5391 sub_start = entry->vme_start;
5392 vm_map_clip_end(
5393 dst_map, entry, sub_end);
5394 vm_map_clip_start(
5395 dst_map, entry, sub_start);
5396 assert(!entry->use_pmap);
5397 entry->is_sub_map = FALSE;
5398 vm_map_deallocate(
5399 entry->object.sub_map);
5400 entry->object.sub_map = NULL;
5401 entry->is_shared = FALSE;
5402 entry->needs_copy = FALSE;
5403 entry->offset = 0;
5404 /*
5405 * XXX FBDP
5406 * We should propagate the protections
5407 * of the submap entry here instead
5408 * of forcing them to VM_PROT_ALL...
5409 * Or better yet, we should inherit
5410 * the protection of the copy_entry.
5411 */
5412 entry->protection = VM_PROT_ALL;
5413 entry->max_protection = VM_PROT_ALL;
5414 entry->wired_count = 0;
5415 entry->user_wired_count = 0;
5416 if(entry->inheritance
5417 == VM_INHERIT_SHARE)
5418 entry->inheritance = VM_INHERIT_COPY;
5419 continue;
5420 }
5421 /* first take care of any non-sub_map */
5422 /* entries to send */
5423 if(base_addr < entry->vme_start) {
5424 /* stuff to send */
5425 copy_size =
5426 entry->vme_start - base_addr;
5427 break;
5428 }
5429 sub_start = entry->offset;
5430
5431 if(entry->vme_end < dst_end)
5432 sub_end = entry->vme_end;
5433 else
5434 sub_end = dst_end;
5435 sub_end -= entry->vme_start;
5436 sub_end += entry->offset;
5437 local_end = entry->vme_end;
5438 vm_map_unlock(dst_map);
5439 copy_size = sub_end - sub_start;
5440
5441 /* adjust the copy object */
5442 if (total_size > copy_size) {
5443 vm_map_size_t local_size = 0;
5444 vm_map_size_t entry_size;
5445
5446 nentries = 1;
5447 new_offset = copy->offset;
5448 copy_entry = vm_map_copy_first_entry(copy);
5449 while(copy_entry !=
5450 vm_map_copy_to_entry(copy)){
5451 entry_size = copy_entry->vme_end -
5452 copy_entry->vme_start;
5453 if((local_size < copy_size) &&
5454 ((local_size + entry_size)
5455 >= copy_size)) {
5456 vm_map_copy_clip_end(copy,
5457 copy_entry,
5458 copy_entry->vme_start +
5459 (copy_size - local_size));
5460 entry_size = copy_entry->vme_end -
5461 copy_entry->vme_start;
5462 local_size += entry_size;
5463 new_offset += entry_size;
5464 }
5465 if(local_size >= copy_size) {
5466 next_copy = copy_entry->vme_next;
5467 copy_entry->vme_next =
5468 vm_map_copy_to_entry(copy);
5469 previous_prev =
5470 copy->cpy_hdr.links.prev;
5471 copy->cpy_hdr.links.prev = copy_entry;
5472 copy->size = copy_size;
5473 remaining_entries =
5474 copy->cpy_hdr.nentries;
5475 remaining_entries -= nentries;
5476 copy->cpy_hdr.nentries = nentries;
5477 break;
5478 } else {
5479 local_size += entry_size;
5480 new_offset += entry_size;
5481 nentries++;
5482 }
5483 copy_entry = copy_entry->vme_next;
5484 }
5485 }
5486
5487 if((entry->use_pmap) && (pmap == NULL)) {
5488 kr = vm_map_copy_overwrite_nested(
5489 entry->object.sub_map,
5490 sub_start,
5491 copy,
5492 interruptible,
5493 entry->object.sub_map->pmap);
5494 } else if (pmap != NULL) {
5495 kr = vm_map_copy_overwrite_nested(
5496 entry->object.sub_map,
5497 sub_start,
5498 copy,
5499 interruptible, pmap);
5500 } else {
5501 kr = vm_map_copy_overwrite_nested(
5502 entry->object.sub_map,
5503 sub_start,
5504 copy,
5505 interruptible,
5506 dst_map->pmap);
5507 }
5508 if(kr != KERN_SUCCESS) {
5509 if(next_copy != NULL) {
5510 copy->cpy_hdr.nentries +=
5511 remaining_entries;
5512 copy->cpy_hdr.links.prev->vme_next =
5513 next_copy;
5514 copy->cpy_hdr.links.prev
5515 = previous_prev;
5516 copy->size = total_size;
5517 }
5518 return kr;
5519 }
5520 if (dst_end <= local_end) {
5521 return(KERN_SUCCESS);
5522 }
5523 /* otherwise copy no longer exists, it was */
5524 /* destroyed after successful copy_overwrite */
5525 copy = (vm_map_copy_t)
5526 zalloc(vm_map_copy_zone);
5527 vm_map_copy_first_entry(copy) =
5528 vm_map_copy_last_entry(copy) =
5529 vm_map_copy_to_entry(copy);
5530 copy->type = VM_MAP_COPY_ENTRY_LIST;
5531 copy->offset = new_offset;
5532
5533 total_size -= copy_size;
5534 copy_size = 0;
5535 /* put back remainder of copy in container */
5536 if(next_copy != NULL) {
5537 copy->cpy_hdr.nentries = remaining_entries;
5538 copy->cpy_hdr.links.next = next_copy;
5539 copy->cpy_hdr.links.prev = previous_prev;
5540 copy->size = total_size;
5541 next_copy->vme_prev =
5542 vm_map_copy_to_entry(copy);
5543 next_copy = NULL;
5544 }
5545 base_addr = local_end;
5546 vm_map_lock(dst_map);
5547 if(!vm_map_lookup_entry(dst_map,
5548 local_end, &tmp_entry)) {
5549 vm_map_unlock(dst_map);
5550 return(KERN_INVALID_ADDRESS);
5551 }
5552 entry = tmp_entry;
5553 continue;
5554 }
5555 if (dst_end <= entry->vme_end) {
5556 copy_size = dst_end - base_addr;
5557 break;
5558 }
5559
5560 if ((next == vm_map_to_entry(dst_map)) ||
5561 (next->vme_start != entry->vme_end)) {
5562 vm_map_unlock(dst_map);
5563 return(KERN_INVALID_ADDRESS);
5564 }
5565
5566 entry = next;
5567 }/* for */
5568
5569 next_copy = NULL;
5570 nentries = 1;
5571
5572 /* adjust the copy object */
5573 if (total_size > copy_size) {
5574 vm_map_size_t local_size = 0;
5575 vm_map_size_t entry_size;
5576
5577 new_offset = copy->offset;
5578 copy_entry = vm_map_copy_first_entry(copy);
5579 while(copy_entry != vm_map_copy_to_entry(copy)) {
5580 entry_size = copy_entry->vme_end -
5581 copy_entry->vme_start;
5582 if((local_size < copy_size) &&
5583 ((local_size + entry_size)
5584 >= copy_size)) {
5585 vm_map_copy_clip_end(copy, copy_entry,
5586 copy_entry->vme_start +
5587 (copy_size - local_size));
5588 entry_size = copy_entry->vme_end -
5589 copy_entry->vme_start;
5590 local_size += entry_size;
5591 new_offset += entry_size;
5592 }
5593 if(local_size >= copy_size) {
5594 next_copy = copy_entry->vme_next;
5595 copy_entry->vme_next =
5596 vm_map_copy_to_entry(copy);
5597 previous_prev =
5598 copy->cpy_hdr.links.prev;
5599 copy->cpy_hdr.links.prev = copy_entry;
5600 copy->size = copy_size;
5601 remaining_entries =
5602 copy->cpy_hdr.nentries;
5603 remaining_entries -= nentries;
5604 copy->cpy_hdr.nentries = nentries;
5605 break;
5606 } else {
5607 local_size += entry_size;
5608 new_offset += entry_size;
5609 nentries++;
5610 }
5611 copy_entry = copy_entry->vme_next;
5612 }
5613 }
5614
5615 if (aligned) {
5616 pmap_t local_pmap;
5617
5618 if(pmap)
5619 local_pmap = pmap;
5620 else
5621 local_pmap = dst_map->pmap;
5622
5623 if ((kr = vm_map_copy_overwrite_aligned(
5624 dst_map, tmp_entry, copy,
5625 base_addr, local_pmap)) != KERN_SUCCESS) {
5626 if(next_copy != NULL) {
5627 copy->cpy_hdr.nentries +=
5628 remaining_entries;
5629 copy->cpy_hdr.links.prev->vme_next =
5630 next_copy;
5631 copy->cpy_hdr.links.prev =
5632 previous_prev;
5633 copy->size += copy_size;
5634 }
5635 return kr;
5636 }
5637 vm_map_unlock(dst_map);
5638 } else {
5639 /*
5640 * Performance gain:
5641 *
5642 * if the copy and dst address are misaligned but the same
5643 * offset within the page we can copy_not_aligned the
5644 * misaligned parts and copy aligned the rest. If they are
5645 * aligned but len is unaligned we simply need to copy
5646 * the end bit unaligned. We'll need to split the misaligned
5647 * bits of the region in this case !
5648 */
5649 /* ALWAYS UNLOCKS THE dst_map MAP */
5650 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
5651 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5652 if(next_copy != NULL) {
5653 copy->cpy_hdr.nentries +=
5654 remaining_entries;
5655 copy->cpy_hdr.links.prev->vme_next =
5656 next_copy;
5657 copy->cpy_hdr.links.prev =
5658 previous_prev;
5659 copy->size += copy_size;
5660 }
5661 return kr;
5662 }
5663 }
5664 total_size -= copy_size;
5665 if(total_size == 0)
5666 break;
5667 base_addr += copy_size;
5668 copy_size = 0;
5669 copy->offset = new_offset;
5670 if(next_copy != NULL) {
5671 copy->cpy_hdr.nentries = remaining_entries;
5672 copy->cpy_hdr.links.next = next_copy;
5673 copy->cpy_hdr.links.prev = previous_prev;
5674 next_copy->vme_prev = vm_map_copy_to_entry(copy);
5675 copy->size = total_size;
5676 }
5677 vm_map_lock(dst_map);
5678 while(TRUE) {
5679 if (!vm_map_lookup_entry(dst_map,
5680 base_addr, &tmp_entry)) {
5681 vm_map_unlock(dst_map);
5682 return(KERN_INVALID_ADDRESS);
5683 }
5684 if (tmp_entry->in_transition) {
5685 entry->needs_wakeup = TRUE;
5686 vm_map_entry_wait(dst_map, THREAD_UNINT);
5687 } else {
5688 break;
5689 }
5690 }
5691 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
5692
5693 entry = tmp_entry;
5694 } /* while */
5695
5696 /*
5697 * Throw away the vm_map_copy object
5698 */
5699 vm_map_copy_discard(copy);
5700
5701 return(KERN_SUCCESS);
5702 }/* vm_map_copy_overwrite */
5703
5704 kern_return_t
5705 vm_map_copy_overwrite(
5706 vm_map_t dst_map,
5707 vm_map_offset_t dst_addr,
5708 vm_map_copy_t copy,
5709 boolean_t interruptible)
5710 {
5711 return vm_map_copy_overwrite_nested(
5712 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
5713 }
5714
5715
5716 /*
5717 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
5718 *
5719 * Decription:
5720 * Physically copy unaligned data
5721 *
5722 * Implementation:
5723 * Unaligned parts of pages have to be physically copied. We use
5724 * a modified form of vm_fault_copy (which understands none-aligned
5725 * page offsets and sizes) to do the copy. We attempt to copy as
5726 * much memory in one go as possibly, however vm_fault_copy copies
5727 * within 1 memory object so we have to find the smaller of "amount left"
5728 * "source object data size" and "target object data size". With
5729 * unaligned data we don't need to split regions, therefore the source
5730 * (copy) object should be one map entry, the target range may be split
5731 * over multiple map entries however. In any event we are pessimistic
5732 * about these assumptions.
5733 *
5734 * Assumptions:
5735 * dst_map is locked on entry and is return locked on success,
5736 * unlocked on error.
5737 */
5738
5739 static kern_return_t
5740 vm_map_copy_overwrite_unaligned(
5741 vm_map_t dst_map,
5742 vm_map_entry_t entry,
5743 vm_map_copy_t copy,
5744 vm_map_offset_t start)
5745 {
5746 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
5747 vm_map_version_t version;
5748 vm_object_t dst_object;
5749 vm_object_offset_t dst_offset;
5750 vm_object_offset_t src_offset;
5751 vm_object_offset_t entry_offset;
5752 vm_map_offset_t entry_end;
5753 vm_map_size_t src_size,
5754 dst_size,
5755 copy_size,
5756 amount_left;
5757 kern_return_t kr = KERN_SUCCESS;
5758
5759 vm_map_lock_write_to_read(dst_map);
5760
5761 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
5762 amount_left = copy->size;
5763 /*
5764 * unaligned so we never clipped this entry, we need the offset into
5765 * the vm_object not just the data.
5766 */
5767 while (amount_left > 0) {
5768
5769 if (entry == vm_map_to_entry(dst_map)) {
5770 vm_map_unlock_read(dst_map);
5771 return KERN_INVALID_ADDRESS;
5772 }
5773
5774 /* "start" must be within the current map entry */
5775 assert ((start>=entry->vme_start) && (start<entry->vme_end));
5776
5777 dst_offset = start - entry->vme_start;
5778
5779 dst_size = entry->vme_end - start;
5780
5781 src_size = copy_entry->vme_end -
5782 (copy_entry->vme_start + src_offset);
5783
5784 if (dst_size < src_size) {
5785 /*
5786 * we can only copy dst_size bytes before
5787 * we have to get the next destination entry
5788 */
5789 copy_size = dst_size;
5790 } else {
5791 /*
5792 * we can only copy src_size bytes before
5793 * we have to get the next source copy entry
5794 */
5795 copy_size = src_size;
5796 }
5797
5798 if (copy_size > amount_left) {
5799 copy_size = amount_left;
5800 }
5801 /*
5802 * Entry needs copy, create a shadow shadow object for
5803 * Copy on write region.
5804 */
5805 if (entry->needs_copy &&
5806 ((entry->protection & VM_PROT_WRITE) != 0))
5807 {
5808 if (vm_map_lock_read_to_write(dst_map)) {
5809 vm_map_lock_read(dst_map);
5810 goto RetryLookup;
5811 }
5812 vm_object_shadow(&entry->object.vm_object,
5813 &entry->offset,
5814 (vm_map_size_t)(entry->vme_end
5815 - entry->vme_start));
5816 entry->needs_copy = FALSE;
5817 vm_map_lock_write_to_read(dst_map);
5818 }
5819 dst_object = entry->object.vm_object;
5820 /*
5821 * unlike with the virtual (aligned) copy we're going
5822 * to fault on it therefore we need a target object.
5823 */
5824 if (dst_object == VM_OBJECT_NULL) {
5825 if (vm_map_lock_read_to_write(dst_map)) {
5826 vm_map_lock_read(dst_map);
5827 goto RetryLookup;
5828 }
5829 dst_object = vm_object_allocate((vm_map_size_t)
5830 entry->vme_end - entry->vme_start);
5831 entry->object.vm_object = dst_object;
5832 entry->offset = 0;
5833 vm_map_lock_write_to_read(dst_map);
5834 }
5835 /*
5836 * Take an object reference and unlock map. The "entry" may
5837 * disappear or change when the map is unlocked.
5838 */
5839 vm_object_reference(dst_object);
5840 version.main_timestamp = dst_map->timestamp;
5841 entry_offset = entry->offset;
5842 entry_end = entry->vme_end;
5843 vm_map_unlock_read(dst_map);
5844 /*
5845 * Copy as much as possible in one pass
5846 */
5847 kr = vm_fault_copy(
5848 copy_entry->object.vm_object,
5849 copy_entry->offset + src_offset,
5850 &copy_size,
5851 dst_object,
5852 entry_offset + dst_offset,
5853 dst_map,
5854 &version,
5855 THREAD_UNINT );
5856
5857 start += copy_size;
5858 src_offset += copy_size;
5859 amount_left -= copy_size;
5860 /*
5861 * Release the object reference
5862 */
5863 vm_object_deallocate(dst_object);
5864 /*
5865 * If a hard error occurred, return it now
5866 */
5867 if (kr != KERN_SUCCESS)
5868 return kr;
5869
5870 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
5871 || amount_left == 0)
5872 {
5873 /*
5874 * all done with this copy entry, dispose.
5875 */
5876 vm_map_copy_entry_unlink(copy, copy_entry);
5877 vm_object_deallocate(copy_entry->object.vm_object);
5878 vm_map_copy_entry_dispose(copy, copy_entry);
5879
5880 if ((copy_entry = vm_map_copy_first_entry(copy))
5881 == vm_map_copy_to_entry(copy) && amount_left) {
5882 /*
5883 * not finished copying but run out of source
5884 */
5885 return KERN_INVALID_ADDRESS;
5886 }
5887 src_offset = 0;
5888 }
5889
5890 if (amount_left == 0)
5891 return KERN_SUCCESS;
5892
5893 vm_map_lock_read(dst_map);
5894 if (version.main_timestamp == dst_map->timestamp) {
5895 if (start == entry_end) {
5896 /*
5897 * destination region is split. Use the version
5898 * information to avoid a lookup in the normal
5899 * case.
5900 */
5901 entry = entry->vme_next;
5902 /*
5903 * should be contiguous. Fail if we encounter
5904 * a hole in the destination.
5905 */
5906 if (start != entry->vme_start) {
5907 vm_map_unlock_read(dst_map);
5908 return KERN_INVALID_ADDRESS ;
5909 }
5910 }
5911 } else {
5912 /*
5913 * Map version check failed.
5914 * we must lookup the entry because somebody
5915 * might have changed the map behind our backs.
5916 */
5917 RetryLookup:
5918 if (!vm_map_lookup_entry(dst_map, start, &entry))
5919 {
5920 vm_map_unlock_read(dst_map);
5921 return KERN_INVALID_ADDRESS ;
5922 }
5923 }
5924 }/* while */
5925
5926 return KERN_SUCCESS;
5927 }/* vm_map_copy_overwrite_unaligned */
5928
5929 /*
5930 * Routine: vm_map_copy_overwrite_aligned [internal use only]
5931 *
5932 * Description:
5933 * Does all the vm_trickery possible for whole pages.
5934 *
5935 * Implementation:
5936 *
5937 * If there are no permanent objects in the destination,
5938 * and the source and destination map entry zones match,
5939 * and the destination map entry is not shared,
5940 * then the map entries can be deleted and replaced
5941 * with those from the copy. The following code is the
5942 * basic idea of what to do, but there are lots of annoying
5943 * little details about getting protection and inheritance
5944 * right. Should add protection, inheritance, and sharing checks
5945 * to the above pass and make sure that no wiring is involved.
5946 */
5947
5948 static kern_return_t
5949 vm_map_copy_overwrite_aligned(
5950 vm_map_t dst_map,
5951 vm_map_entry_t tmp_entry,
5952 vm_map_copy_t copy,
5953 vm_map_offset_t start,
5954 __unused pmap_t pmap)
5955 {
5956 vm_object_t object;
5957 vm_map_entry_t copy_entry;
5958 vm_map_size_t copy_size;
5959 vm_map_size_t size;
5960 vm_map_entry_t entry;
5961
5962 while ((copy_entry = vm_map_copy_first_entry(copy))
5963 != vm_map_copy_to_entry(copy))
5964 {
5965 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
5966
5967 entry = tmp_entry;
5968 assert(!entry->use_pmap); /* unnested when clipped earlier */
5969 if (entry == vm_map_to_entry(dst_map)) {
5970 vm_map_unlock(dst_map);
5971 return KERN_INVALID_ADDRESS;
5972 }
5973 size = (entry->vme_end - entry->vme_start);
5974 /*
5975 * Make sure that no holes popped up in the
5976 * address map, and that the protection is
5977 * still valid, in case the map was unlocked
5978 * earlier.
5979 */
5980
5981 if ((entry->vme_start != start) || ((entry->is_sub_map)
5982 && !entry->needs_copy)) {
5983 vm_map_unlock(dst_map);
5984 return(KERN_INVALID_ADDRESS);
5985 }
5986 assert(entry != vm_map_to_entry(dst_map));
5987
5988 /*
5989 * Check protection again
5990 */
5991
5992 if ( ! (entry->protection & VM_PROT_WRITE)) {
5993 vm_map_unlock(dst_map);
5994 return(KERN_PROTECTION_FAILURE);
5995 }
5996
5997 /*
5998 * Adjust to source size first
5999 */
6000
6001 if (copy_size < size) {
6002 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6003 size = copy_size;
6004 }
6005
6006 /*
6007 * Adjust to destination size
6008 */
6009
6010 if (size < copy_size) {
6011 vm_map_copy_clip_end(copy, copy_entry,
6012 copy_entry->vme_start + size);
6013 copy_size = size;
6014 }
6015
6016 assert((entry->vme_end - entry->vme_start) == size);
6017 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6018 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6019
6020 /*
6021 * If the destination contains temporary unshared memory,
6022 * we can perform the copy by throwing it away and
6023 * installing the source data.
6024 */
6025
6026 object = entry->object.vm_object;
6027 if ((!entry->is_shared &&
6028 ((object == VM_OBJECT_NULL) ||
6029 (object->internal && !object->true_share))) ||
6030 entry->needs_copy) {
6031 vm_object_t old_object = entry->object.vm_object;
6032 vm_object_offset_t old_offset = entry->offset;
6033 vm_object_offset_t offset;
6034
6035 /*
6036 * Ensure that the source and destination aren't
6037 * identical
6038 */
6039 if (old_object == copy_entry->object.vm_object &&
6040 old_offset == copy_entry->offset) {
6041 vm_map_copy_entry_unlink(copy, copy_entry);
6042 vm_map_copy_entry_dispose(copy, copy_entry);
6043
6044 if (old_object != VM_OBJECT_NULL)
6045 vm_object_deallocate(old_object);
6046
6047 start = tmp_entry->vme_end;
6048 tmp_entry = tmp_entry->vme_next;
6049 continue;
6050 }
6051
6052 if (old_object != VM_OBJECT_NULL) {
6053 if(entry->is_sub_map) {
6054 if(entry->use_pmap) {
6055 #ifndef NO_NESTED_PMAP
6056 pmap_unnest(dst_map->pmap,
6057 (addr64_t)entry->vme_start,
6058 entry->vme_end - entry->vme_start);
6059 #endif /* NO_NESTED_PMAP */
6060 if(dst_map->mapped) {
6061 /* clean up parent */
6062 /* map/maps */
6063 vm_map_submap_pmap_clean(
6064 dst_map, entry->vme_start,
6065 entry->vme_end,
6066 entry->object.sub_map,
6067 entry->offset);
6068 }
6069 } else {
6070 vm_map_submap_pmap_clean(
6071 dst_map, entry->vme_start,
6072 entry->vme_end,
6073 entry->object.sub_map,
6074 entry->offset);
6075 }
6076 vm_map_deallocate(
6077 entry->object.sub_map);
6078 } else {
6079 if(dst_map->mapped) {
6080 vm_object_pmap_protect(
6081 entry->object.vm_object,
6082 entry->offset,
6083 entry->vme_end
6084 - entry->vme_start,
6085 PMAP_NULL,
6086 entry->vme_start,
6087 VM_PROT_NONE);
6088 } else {
6089 pmap_remove(dst_map->pmap,
6090 (addr64_t)(entry->vme_start),
6091 (addr64_t)(entry->vme_end));
6092 }
6093 vm_object_deallocate(old_object);
6094 }
6095 }
6096
6097 entry->is_sub_map = FALSE;
6098 entry->object = copy_entry->object;
6099 object = entry->object.vm_object;
6100 entry->needs_copy = copy_entry->needs_copy;
6101 entry->wired_count = 0;
6102 entry->user_wired_count = 0;
6103 offset = entry->offset = copy_entry->offset;
6104 /*
6105 * XXX FBDP
6106 * We should propagate the submap entry's protections
6107 * here instead of forcing VM_PROT_ALL.
6108 * Or better yet, we should inherit the protection
6109 * of the copy_entry.
6110 */
6111 entry->protection = VM_PROT_ALL;
6112 entry->max_protection = VM_PROT_ALL;
6113
6114 vm_map_copy_entry_unlink(copy, copy_entry);
6115 vm_map_copy_entry_dispose(copy, copy_entry);
6116
6117 /*
6118 * we could try to push pages into the pmap at this point, BUT
6119 * this optimization only saved on average 2 us per page if ALL
6120 * the pages in the source were currently mapped
6121 * and ALL the pages in the dest were touched, if there were fewer
6122 * than 2/3 of the pages touched, this optimization actually cost more cycles
6123 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6124 */
6125
6126 /*
6127 * Set up for the next iteration. The map
6128 * has not been unlocked, so the next
6129 * address should be at the end of this
6130 * entry, and the next map entry should be
6131 * the one following it.
6132 */
6133
6134 start = tmp_entry->vme_end;
6135 tmp_entry = tmp_entry->vme_next;
6136 } else {
6137 vm_map_version_t version;
6138 vm_object_t dst_object = entry->object.vm_object;
6139 vm_object_offset_t dst_offset = entry->offset;
6140 kern_return_t r;
6141
6142 /*
6143 * Take an object reference, and record
6144 * the map version information so that the
6145 * map can be safely unlocked.
6146 */
6147
6148 vm_object_reference(dst_object);
6149
6150 /* account for unlock bumping up timestamp */
6151 version.main_timestamp = dst_map->timestamp + 1;
6152
6153 vm_map_unlock(dst_map);
6154
6155 /*
6156 * Copy as much as possible in one pass
6157 */
6158
6159 copy_size = size;
6160 r = vm_fault_copy(
6161 copy_entry->object.vm_object,
6162 copy_entry->offset,
6163 &copy_size,
6164 dst_object,
6165 dst_offset,
6166 dst_map,
6167 &version,
6168 THREAD_UNINT );
6169
6170 /*
6171 * Release the object reference
6172 */
6173
6174 vm_object_deallocate(dst_object);
6175
6176 /*
6177 * If a hard error occurred, return it now
6178 */
6179
6180 if (r != KERN_SUCCESS)
6181 return(r);
6182
6183 if (copy_size != 0) {
6184 /*
6185 * Dispose of the copied region
6186 */
6187
6188 vm_map_copy_clip_end(copy, copy_entry,
6189 copy_entry->vme_start + copy_size);
6190 vm_map_copy_entry_unlink(copy, copy_entry);
6191 vm_object_deallocate(copy_entry->object.vm_object);
6192 vm_map_copy_entry_dispose(copy, copy_entry);
6193 }
6194
6195 /*
6196 * Pick up in the destination map where we left off.
6197 *
6198 * Use the version information to avoid a lookup
6199 * in the normal case.
6200 */
6201
6202 start += copy_size;
6203 vm_map_lock(dst_map);
6204 if (version.main_timestamp == dst_map->timestamp) {
6205 /* We can safely use saved tmp_entry value */
6206
6207 vm_map_clip_end(dst_map, tmp_entry, start);
6208 tmp_entry = tmp_entry->vme_next;
6209 } else {
6210 /* Must do lookup of tmp_entry */
6211
6212 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6213 vm_map_unlock(dst_map);
6214 return(KERN_INVALID_ADDRESS);
6215 }
6216 vm_map_clip_start(dst_map, tmp_entry, start);
6217 }
6218 }
6219 }/* while */
6220
6221 return(KERN_SUCCESS);
6222 }/* vm_map_copy_overwrite_aligned */
6223
6224 /*
6225 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6226 *
6227 * Description:
6228 * Copy in data to a kernel buffer from space in the
6229 * source map. The original space may be optionally
6230 * deallocated.
6231 *
6232 * If successful, returns a new copy object.
6233 */
6234 static kern_return_t
6235 vm_map_copyin_kernel_buffer(
6236 vm_map_t src_map,
6237 vm_map_offset_t src_addr,
6238 vm_map_size_t len,
6239 boolean_t src_destroy,
6240 vm_map_copy_t *copy_result)
6241 {
6242 kern_return_t kr;
6243 vm_map_copy_t copy;
6244 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
6245
6246 copy = (vm_map_copy_t) kalloc(kalloc_size);
6247 if (copy == VM_MAP_COPY_NULL) {
6248 return KERN_RESOURCE_SHORTAGE;
6249 }
6250 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6251 copy->size = len;
6252 copy->offset = 0;
6253 copy->cpy_kdata = (void *) (copy + 1);
6254 copy->cpy_kalloc_size = kalloc_size;
6255
6256 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
6257 if (kr != KERN_SUCCESS) {
6258 kfree(copy, kalloc_size);
6259 return kr;
6260 }
6261 if (src_destroy) {
6262 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6263 vm_map_round_page(src_addr + len),
6264 VM_MAP_REMOVE_INTERRUPTIBLE |
6265 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6266 (src_map == kernel_map) ?
6267 VM_MAP_REMOVE_KUNWIRE : 0);
6268 }
6269 *copy_result = copy;
6270 return KERN_SUCCESS;
6271 }
6272
6273 /*
6274 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6275 *
6276 * Description:
6277 * Copy out data from a kernel buffer into space in the
6278 * destination map. The space may be otpionally dynamically
6279 * allocated.
6280 *
6281 * If successful, consumes the copy object.
6282 * Otherwise, the caller is responsible for it.
6283 */
6284 static int vm_map_copyout_kernel_buffer_failures = 0;
6285 static kern_return_t
6286 vm_map_copyout_kernel_buffer(
6287 vm_map_t map,
6288 vm_map_address_t *addr, /* IN/OUT */
6289 vm_map_copy_t copy,
6290 boolean_t overwrite)
6291 {
6292 kern_return_t kr = KERN_SUCCESS;
6293 thread_t thread = current_thread();
6294
6295 if (!overwrite) {
6296
6297 /*
6298 * Allocate space in the target map for the data
6299 */
6300 *addr = 0;
6301 kr = vm_map_enter(map,
6302 addr,
6303 vm_map_round_page(copy->size),
6304 (vm_map_offset_t) 0,
6305 VM_FLAGS_ANYWHERE,
6306 VM_OBJECT_NULL,
6307 (vm_object_offset_t) 0,
6308 FALSE,
6309 VM_PROT_DEFAULT,
6310 VM_PROT_ALL,
6311 VM_INHERIT_DEFAULT);
6312 if (kr != KERN_SUCCESS)
6313 return kr;
6314 }
6315
6316 /*
6317 * Copyout the data from the kernel buffer to the target map.
6318 */
6319 if (thread->map == map) {
6320
6321 /*
6322 * If the target map is the current map, just do
6323 * the copy.
6324 */
6325 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6326 kr = KERN_INVALID_ADDRESS;
6327 }
6328 }
6329 else {
6330 vm_map_t oldmap;
6331
6332 /*
6333 * If the target map is another map, assume the
6334 * target's address space identity for the duration
6335 * of the copy.
6336 */
6337 vm_map_reference(map);
6338 oldmap = vm_map_switch(map);
6339
6340 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6341 vm_map_copyout_kernel_buffer_failures++;
6342 kr = KERN_INVALID_ADDRESS;
6343 }
6344
6345 (void) vm_map_switch(oldmap);
6346 vm_map_deallocate(map);
6347 }
6348
6349 if (kr != KERN_SUCCESS) {
6350 /* the copy failed, clean up */
6351 if (!overwrite) {
6352 /*
6353 * Deallocate the space we allocated in the target map.
6354 */
6355 (void) vm_map_remove(map,
6356 vm_map_trunc_page(*addr),
6357 vm_map_round_page(*addr +
6358 vm_map_round_page(copy->size)),
6359 VM_MAP_NO_FLAGS);
6360 *addr = 0;
6361 }
6362 } else {
6363 /* copy was successful, dicard the copy structure */
6364 kfree(copy, copy->cpy_kalloc_size);
6365 }
6366
6367 return kr;
6368 }
6369
6370 /*
6371 * Macro: vm_map_copy_insert
6372 *
6373 * Description:
6374 * Link a copy chain ("copy") into a map at the
6375 * specified location (after "where").
6376 * Side effects:
6377 * The copy chain is destroyed.
6378 * Warning:
6379 * The arguments are evaluated multiple times.
6380 */
6381 #define vm_map_copy_insert(map, where, copy) \
6382 MACRO_BEGIN \
6383 vm_map_t VMCI_map; \
6384 vm_map_entry_t VMCI_where; \
6385 vm_map_copy_t VMCI_copy; \
6386 VMCI_map = (map); \
6387 VMCI_where = (where); \
6388 VMCI_copy = (copy); \
6389 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6390 ->vme_next = (VMCI_where->vme_next); \
6391 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6392 ->vme_prev = VMCI_where; \
6393 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6394 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6395 zfree(vm_map_copy_zone, VMCI_copy); \
6396 MACRO_END
6397
6398 /*
6399 * Routine: vm_map_copyout
6400 *
6401 * Description:
6402 * Copy out a copy chain ("copy") into newly-allocated
6403 * space in the destination map.
6404 *
6405 * If successful, consumes the copy object.
6406 * Otherwise, the caller is responsible for it.
6407 */
6408 kern_return_t
6409 vm_map_copyout(
6410 vm_map_t dst_map,
6411 vm_map_address_t *dst_addr, /* OUT */
6412 vm_map_copy_t copy)
6413 {
6414 vm_map_size_t size;
6415 vm_map_size_t adjustment;
6416 vm_map_offset_t start;
6417 vm_object_offset_t vm_copy_start;
6418 vm_map_entry_t last;
6419 register
6420 vm_map_entry_t entry;
6421
6422 /*
6423 * Check for null copy object.
6424 */
6425
6426 if (copy == VM_MAP_COPY_NULL) {
6427 *dst_addr = 0;
6428 return(KERN_SUCCESS);
6429 }
6430
6431 /*
6432 * Check for special copy object, created
6433 * by vm_map_copyin_object.
6434 */
6435
6436 if (copy->type == VM_MAP_COPY_OBJECT) {
6437 vm_object_t object = copy->cpy_object;
6438 kern_return_t kr;
6439 vm_object_offset_t offset;
6440
6441 offset = vm_object_trunc_page(copy->offset);
6442 size = vm_map_round_page(copy->size +
6443 (vm_map_size_t)(copy->offset - offset));
6444 *dst_addr = 0;
6445 kr = vm_map_enter(dst_map, dst_addr, size,
6446 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6447 object, offset, FALSE,
6448 VM_PROT_DEFAULT, VM_PROT_ALL,
6449 VM_INHERIT_DEFAULT);
6450 if (kr != KERN_SUCCESS)
6451 return(kr);
6452 /* Account for non-pagealigned copy object */
6453 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6454 zfree(vm_map_copy_zone, copy);
6455 return(KERN_SUCCESS);
6456 }
6457
6458 /*
6459 * Check for special kernel buffer allocated
6460 * by new_ipc_kmsg_copyin.
6461 */
6462
6463 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6464 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6465 copy, FALSE));
6466 }
6467
6468 /*
6469 * Find space for the data
6470 */
6471
6472 vm_copy_start = vm_object_trunc_page(copy->offset);
6473 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6474 - vm_copy_start;
6475
6476 StartAgain: ;
6477
6478 vm_map_lock(dst_map);
6479 assert(first_free_is_valid(dst_map));
6480 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6481 vm_map_min(dst_map) : last->vme_end;
6482
6483 while (TRUE) {
6484 vm_map_entry_t next = last->vme_next;
6485 vm_map_offset_t end = start + size;
6486
6487 if ((end > dst_map->max_offset) || (end < start)) {
6488 if (dst_map->wait_for_space) {
6489 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6490 assert_wait((event_t) dst_map,
6491 THREAD_INTERRUPTIBLE);
6492 vm_map_unlock(dst_map);
6493 thread_block(THREAD_CONTINUE_NULL);
6494 goto StartAgain;
6495 }
6496 }
6497 vm_map_unlock(dst_map);
6498 return(KERN_NO_SPACE);
6499 }
6500
6501 if ((next == vm_map_to_entry(dst_map)) ||
6502 (next->vme_start >= end))
6503 break;
6504
6505 last = next;
6506 start = last->vme_end;
6507 }
6508
6509 /*
6510 * Since we're going to just drop the map
6511 * entries from the copy into the destination
6512 * map, they must come from the same pool.
6513 */
6514
6515 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6516 /*
6517 * Mismatches occur when dealing with the default
6518 * pager.
6519 */
6520 zone_t old_zone;
6521 vm_map_entry_t next, new;
6522
6523 /*
6524 * Find the zone that the copies were allocated from
6525 */
6526 old_zone = (copy->cpy_hdr.entries_pageable)
6527 ? vm_map_entry_zone
6528 : vm_map_kentry_zone;
6529 entry = vm_map_copy_first_entry(copy);
6530
6531 /*
6532 * Reinitialize the copy so that vm_map_copy_entry_link
6533 * will work.
6534 */
6535 copy->cpy_hdr.nentries = 0;
6536 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6537 vm_map_copy_first_entry(copy) =
6538 vm_map_copy_last_entry(copy) =
6539 vm_map_copy_to_entry(copy);
6540
6541 /*
6542 * Copy each entry.
6543 */
6544 while (entry != vm_map_copy_to_entry(copy)) {
6545 new = vm_map_copy_entry_create(copy);
6546 vm_map_entry_copy_full(new, entry);
6547 new->use_pmap = FALSE; /* clr address space specifics */
6548 vm_map_copy_entry_link(copy,
6549 vm_map_copy_last_entry(copy),
6550 new);
6551 next = entry->vme_next;
6552 zfree(old_zone, entry);
6553 entry = next;
6554 }
6555 }
6556
6557 /*
6558 * Adjust the addresses in the copy chain, and
6559 * reset the region attributes.
6560 */
6561
6562 adjustment = start - vm_copy_start;
6563 for (entry = vm_map_copy_first_entry(copy);
6564 entry != vm_map_copy_to_entry(copy);
6565 entry = entry->vme_next) {
6566 entry->vme_start += adjustment;
6567 entry->vme_end += adjustment;
6568
6569 entry->inheritance = VM_INHERIT_DEFAULT;
6570 entry->protection = VM_PROT_DEFAULT;
6571 entry->max_protection = VM_PROT_ALL;
6572 entry->behavior = VM_BEHAVIOR_DEFAULT;
6573
6574 /*
6575 * If the entry is now wired,
6576 * map the pages into the destination map.
6577 */
6578 if (entry->wired_count != 0) {
6579 register vm_map_offset_t va;
6580 vm_object_offset_t offset;
6581 register vm_object_t object;
6582 vm_prot_t prot;
6583 int type_of_fault;
6584
6585 object = entry->object.vm_object;
6586 offset = entry->offset;
6587 va = entry->vme_start;
6588
6589 pmap_pageable(dst_map->pmap,
6590 entry->vme_start,
6591 entry->vme_end,
6592 TRUE);
6593
6594 while (va < entry->vme_end) {
6595 register vm_page_t m;
6596
6597 /*
6598 * Look up the page in the object.
6599 * Assert that the page will be found in the
6600 * top object:
6601 * either
6602 * the object was newly created by
6603 * vm_object_copy_slowly, and has
6604 * copies of all of the pages from
6605 * the source object
6606 * or
6607 * the object was moved from the old
6608 * map entry; because the old map
6609 * entry was wired, all of the pages
6610 * were in the top-level object.
6611 * (XXX not true if we wire pages for
6612 * reading)
6613 */
6614 vm_object_lock(object);
6615
6616 m = vm_page_lookup(object, offset);
6617 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
6618 m->absent)
6619 panic("vm_map_copyout: wiring %p", m);
6620
6621 /*
6622 * ENCRYPTED SWAP:
6623 * The page is assumed to be wired here, so it
6624 * shouldn't be encrypted. Otherwise, we
6625 * couldn't enter it in the page table, since
6626 * we don't want the user to see the encrypted
6627 * data.
6628 */
6629 ASSERT_PAGE_DECRYPTED(m);
6630
6631 prot = entry->protection;
6632
6633 if (override_nx(dst_map, entry->alias) && prot)
6634 prot |= VM_PROT_EXECUTE;
6635
6636 type_of_fault = DBG_CACHE_HIT_FAULT;
6637
6638 vm_fault_enter(m, dst_map->pmap, va, prot,
6639 m->wire_count != 0, FALSE, FALSE,
6640 &type_of_fault);
6641
6642 vm_object_unlock(object);
6643
6644 offset += PAGE_SIZE_64;
6645 va += PAGE_SIZE;
6646 }
6647 }
6648 }
6649
6650 /*
6651 * Correct the page alignment for the result
6652 */
6653
6654 *dst_addr = start + (copy->offset - vm_copy_start);
6655
6656 /*
6657 * Update the hints and the map size
6658 */
6659
6660 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6661
6662 dst_map->size += size;
6663
6664 /*
6665 * Link in the copy
6666 */
6667
6668 vm_map_copy_insert(dst_map, last, copy);
6669
6670 vm_map_unlock(dst_map);
6671
6672 /*
6673 * XXX If wiring_required, call vm_map_pageable
6674 */
6675
6676 return(KERN_SUCCESS);
6677 }
6678
6679 /*
6680 * Routine: vm_map_copyin
6681 *
6682 * Description:
6683 * see vm_map_copyin_common. Exported via Unsupported.exports.
6684 *
6685 */
6686
6687 #undef vm_map_copyin
6688
6689 kern_return_t
6690 vm_map_copyin(
6691 vm_map_t src_map,
6692 vm_map_address_t src_addr,
6693 vm_map_size_t len,
6694 boolean_t src_destroy,
6695 vm_map_copy_t *copy_result) /* OUT */
6696 {
6697 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
6698 FALSE, copy_result, FALSE));
6699 }
6700
6701 /*
6702 * Routine: vm_map_copyin_common
6703 *
6704 * Description:
6705 * Copy the specified region (src_addr, len) from the
6706 * source address space (src_map), possibly removing
6707 * the region from the source address space (src_destroy).
6708 *
6709 * Returns:
6710 * A vm_map_copy_t object (copy_result), suitable for
6711 * insertion into another address space (using vm_map_copyout),
6712 * copying over another address space region (using
6713 * vm_map_copy_overwrite). If the copy is unused, it
6714 * should be destroyed (using vm_map_copy_discard).
6715 *
6716 * In/out conditions:
6717 * The source map should not be locked on entry.
6718 */
6719
6720 typedef struct submap_map {
6721 vm_map_t parent_map;
6722 vm_map_offset_t base_start;
6723 vm_map_offset_t base_end;
6724 vm_map_size_t base_len;
6725 struct submap_map *next;
6726 } submap_map_t;
6727
6728 kern_return_t
6729 vm_map_copyin_common(
6730 vm_map_t src_map,
6731 vm_map_address_t src_addr,
6732 vm_map_size_t len,
6733 boolean_t src_destroy,
6734 __unused boolean_t src_volatile,
6735 vm_map_copy_t *copy_result, /* OUT */
6736 boolean_t use_maxprot)
6737 {
6738 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6739 * in multi-level lookup, this
6740 * entry contains the actual
6741 * vm_object/offset.
6742 */
6743 register
6744 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6745
6746 vm_map_offset_t src_start; /* Start of current entry --
6747 * where copy is taking place now
6748 */
6749 vm_map_offset_t src_end; /* End of entire region to be
6750 * copied */
6751 vm_map_offset_t src_base;
6752 vm_map_t base_map = src_map;
6753 boolean_t map_share=FALSE;
6754 submap_map_t *parent_maps = NULL;
6755
6756 register
6757 vm_map_copy_t copy; /* Resulting copy */
6758 vm_map_address_t copy_addr;
6759
6760 /*
6761 * Check for copies of zero bytes.
6762 */
6763
6764 if (len == 0) {
6765 *copy_result = VM_MAP_COPY_NULL;
6766 return(KERN_SUCCESS);
6767 }
6768
6769 /*
6770 * Check that the end address doesn't overflow
6771 */
6772 src_end = src_addr + len;
6773 if (src_end < src_addr)
6774 return KERN_INVALID_ADDRESS;
6775
6776 /*
6777 * If the copy is sufficiently small, use a kernel buffer instead
6778 * of making a virtual copy. The theory being that the cost of
6779 * setting up VM (and taking C-O-W faults) dominates the copy costs
6780 * for small regions.
6781 */
6782 if ((len < msg_ool_size_small) && !use_maxprot)
6783 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6784 src_destroy, copy_result);
6785
6786 /*
6787 * Compute (page aligned) start and end of region
6788 */
6789 src_start = vm_map_trunc_page(src_addr);
6790 src_end = vm_map_round_page(src_end);
6791
6792 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6793
6794 /*
6795 * Allocate a header element for the list.
6796 *
6797 * Use the start and end in the header to
6798 * remember the endpoints prior to rounding.
6799 */
6800
6801 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6802 vm_map_copy_first_entry(copy) =
6803 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6804 copy->type = VM_MAP_COPY_ENTRY_LIST;
6805 copy->cpy_hdr.nentries = 0;
6806 copy->cpy_hdr.entries_pageable = TRUE;
6807
6808 copy->offset = src_addr;
6809 copy->size = len;
6810
6811 new_entry = vm_map_copy_entry_create(copy);
6812
6813 #define RETURN(x) \
6814 MACRO_BEGIN \
6815 vm_map_unlock(src_map); \
6816 if(src_map != base_map) \
6817 vm_map_deallocate(src_map); \
6818 if (new_entry != VM_MAP_ENTRY_NULL) \
6819 vm_map_copy_entry_dispose(copy,new_entry); \
6820 vm_map_copy_discard(copy); \
6821 { \
6822 submap_map_t *_ptr; \
6823 \
6824 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6825 parent_maps=parent_maps->next; \
6826 if (_ptr->parent_map != base_map) \
6827 vm_map_deallocate(_ptr->parent_map); \
6828 kfree(_ptr, sizeof(submap_map_t)); \
6829 } \
6830 } \
6831 MACRO_RETURN(x); \
6832 MACRO_END
6833
6834 /*
6835 * Find the beginning of the region.
6836 */
6837
6838 vm_map_lock(src_map);
6839
6840 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6841 RETURN(KERN_INVALID_ADDRESS);
6842 if(!tmp_entry->is_sub_map) {
6843 vm_map_clip_start(src_map, tmp_entry, src_start);
6844 }
6845 /* set for later submap fix-up */
6846 copy_addr = src_start;
6847
6848 /*
6849 * Go through entries until we get to the end.
6850 */
6851
6852 while (TRUE) {
6853 register
6854 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
6855 vm_map_size_t src_size; /* Size of source
6856 * map entry (in both
6857 * maps)
6858 */
6859
6860 register
6861 vm_object_t src_object; /* Object to copy */
6862 vm_object_offset_t src_offset;
6863
6864 boolean_t src_needs_copy; /* Should source map
6865 * be made read-only
6866 * for copy-on-write?
6867 */
6868
6869 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6870
6871 boolean_t was_wired; /* Was source wired? */
6872 vm_map_version_t version; /* Version before locks
6873 * dropped to make copy
6874 */
6875 kern_return_t result; /* Return value from
6876 * copy_strategically.
6877 */
6878 while(tmp_entry->is_sub_map) {
6879 vm_map_size_t submap_len;
6880 submap_map_t *ptr;
6881
6882 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
6883 ptr->next = parent_maps;
6884 parent_maps = ptr;
6885 ptr->parent_map = src_map;
6886 ptr->base_start = src_start;
6887 ptr->base_end = src_end;
6888 submap_len = tmp_entry->vme_end - src_start;
6889 if(submap_len > (src_end-src_start))
6890 submap_len = src_end-src_start;
6891 ptr->base_len = submap_len;
6892
6893 src_start -= tmp_entry->vme_start;
6894 src_start += tmp_entry->offset;
6895 src_end = src_start + submap_len;
6896 src_map = tmp_entry->object.sub_map;
6897 vm_map_lock(src_map);
6898 /* keep an outstanding reference for all maps in */
6899 /* the parents tree except the base map */
6900 vm_map_reference(src_map);
6901 vm_map_unlock(ptr->parent_map);
6902 if (!vm_map_lookup_entry(
6903 src_map, src_start, &tmp_entry))
6904 RETURN(KERN_INVALID_ADDRESS);
6905 map_share = TRUE;
6906 if(!tmp_entry->is_sub_map)
6907 vm_map_clip_start(src_map, tmp_entry, src_start);
6908 src_entry = tmp_entry;
6909 }
6910 /* we are now in the lowest level submap... */
6911
6912 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
6913 (tmp_entry->object.vm_object->phys_contiguous)) {
6914 /* This is not, supported for now.In future */
6915 /* we will need to detect the phys_contig */
6916 /* condition and then upgrade copy_slowly */
6917 /* to do physical copy from the device mem */
6918 /* based object. We can piggy-back off of */
6919 /* the was wired boolean to set-up the */
6920 /* proper handling */
6921 RETURN(KERN_PROTECTION_FAILURE);
6922 }
6923 /*
6924 * Create a new address map entry to hold the result.
6925 * Fill in the fields from the appropriate source entries.
6926 * We must unlock the source map to do this if we need
6927 * to allocate a map entry.
6928 */
6929 if (new_entry == VM_MAP_ENTRY_NULL) {
6930 version.main_timestamp = src_map->timestamp;
6931 vm_map_unlock(src_map);
6932
6933 new_entry = vm_map_copy_entry_create(copy);
6934
6935 vm_map_lock(src_map);
6936 if ((version.main_timestamp + 1) != src_map->timestamp) {
6937 if (!vm_map_lookup_entry(src_map, src_start,
6938 &tmp_entry)) {
6939 RETURN(KERN_INVALID_ADDRESS);
6940 }
6941 if (!tmp_entry->is_sub_map)
6942 vm_map_clip_start(src_map, tmp_entry, src_start);
6943 continue; /* restart w/ new tmp_entry */
6944 }
6945 }
6946
6947 /*
6948 * Verify that the region can be read.
6949 */
6950 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
6951 !use_maxprot) ||
6952 (src_entry->max_protection & VM_PROT_READ) == 0)
6953 RETURN(KERN_PROTECTION_FAILURE);
6954
6955 /*
6956 * Clip against the endpoints of the entire region.
6957 */
6958
6959 vm_map_clip_end(src_map, src_entry, src_end);
6960
6961 src_size = src_entry->vme_end - src_start;
6962 src_object = src_entry->object.vm_object;
6963 src_offset = src_entry->offset;
6964 was_wired = (src_entry->wired_count != 0);
6965
6966 vm_map_entry_copy(new_entry, src_entry);
6967 new_entry->use_pmap = FALSE; /* clr address space specifics */
6968
6969 /*
6970 * Attempt non-blocking copy-on-write optimizations.
6971 */
6972
6973 if (src_destroy &&
6974 (src_object == VM_OBJECT_NULL ||
6975 (src_object->internal && !src_object->true_share
6976 && !map_share))) {
6977 /*
6978 * If we are destroying the source, and the object
6979 * is internal, we can move the object reference
6980 * from the source to the copy. The copy is
6981 * copy-on-write only if the source is.
6982 * We make another reference to the object, because
6983 * destroying the source entry will deallocate it.
6984 */
6985 vm_object_reference(src_object);
6986
6987 /*
6988 * Copy is always unwired. vm_map_copy_entry
6989 * set its wired count to zero.
6990 */
6991
6992 goto CopySuccessful;
6993 }
6994
6995
6996 RestartCopy:
6997 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
6998 src_object, new_entry, new_entry->object.vm_object,
6999 was_wired, 0);
7000 if ((src_object == VM_OBJECT_NULL ||
7001 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7002 vm_object_copy_quickly(
7003 &new_entry->object.vm_object,
7004 src_offset,
7005 src_size,
7006 &src_needs_copy,
7007 &new_entry_needs_copy)) {
7008
7009 new_entry->needs_copy = new_entry_needs_copy;
7010
7011 /*
7012 * Handle copy-on-write obligations
7013 */
7014
7015 if (src_needs_copy && !tmp_entry->needs_copy) {
7016 vm_prot_t prot;
7017
7018 prot = src_entry->protection & ~VM_PROT_WRITE;
7019
7020 if (override_nx(src_map, src_entry->alias) && prot)
7021 prot |= VM_PROT_EXECUTE;
7022
7023 vm_object_pmap_protect(
7024 src_object,
7025 src_offset,
7026 src_size,
7027 (src_entry->is_shared ?
7028 PMAP_NULL
7029 : src_map->pmap),
7030 src_entry->vme_start,
7031 prot);
7032
7033 tmp_entry->needs_copy = TRUE;
7034 }
7035
7036 /*
7037 * The map has never been unlocked, so it's safe
7038 * to move to the next entry rather than doing
7039 * another lookup.
7040 */
7041
7042 goto CopySuccessful;
7043 }
7044
7045 /*
7046 * Take an object reference, so that we may
7047 * release the map lock(s).
7048 */
7049
7050 assert(src_object != VM_OBJECT_NULL);
7051 vm_object_reference(src_object);
7052
7053 /*
7054 * Record the timestamp for later verification.
7055 * Unlock the map.
7056 */
7057
7058 version.main_timestamp = src_map->timestamp;
7059 vm_map_unlock(src_map); /* Increments timestamp once! */
7060
7061 /*
7062 * Perform the copy
7063 */
7064
7065 if (was_wired) {
7066 CopySlowly:
7067 vm_object_lock(src_object);
7068 result = vm_object_copy_slowly(
7069 src_object,
7070 src_offset,
7071 src_size,
7072 THREAD_UNINT,
7073 &new_entry->object.vm_object);
7074 new_entry->offset = 0;
7075 new_entry->needs_copy = FALSE;
7076
7077 }
7078 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7079 (tmp_entry->is_shared || map_share)) {
7080 vm_object_t new_object;
7081
7082 vm_object_lock_shared(src_object);
7083 new_object = vm_object_copy_delayed(
7084 src_object,
7085 src_offset,
7086 src_size,
7087 TRUE);
7088 if (new_object == VM_OBJECT_NULL)
7089 goto CopySlowly;
7090
7091 new_entry->object.vm_object = new_object;
7092 new_entry->needs_copy = TRUE;
7093 result = KERN_SUCCESS;
7094
7095 } else {
7096 result = vm_object_copy_strategically(src_object,
7097 src_offset,
7098 src_size,
7099 &new_entry->object.vm_object,
7100 &new_entry->offset,
7101 &new_entry_needs_copy);
7102
7103 new_entry->needs_copy = new_entry_needs_copy;
7104 }
7105
7106 if (result != KERN_SUCCESS &&
7107 result != KERN_MEMORY_RESTART_COPY) {
7108 vm_map_lock(src_map);
7109 RETURN(result);
7110 }
7111
7112 /*
7113 * Throw away the extra reference
7114 */
7115
7116 vm_object_deallocate(src_object);
7117
7118 /*
7119 * Verify that the map has not substantially
7120 * changed while the copy was being made.
7121 */
7122
7123 vm_map_lock(src_map);
7124
7125 if ((version.main_timestamp + 1) == src_map->timestamp)
7126 goto VerificationSuccessful;
7127
7128 /*
7129 * Simple version comparison failed.
7130 *
7131 * Retry the lookup and verify that the
7132 * same object/offset are still present.
7133 *
7134 * [Note: a memory manager that colludes with
7135 * the calling task can detect that we have
7136 * cheated. While the map was unlocked, the
7137 * mapping could have been changed and restored.]
7138 */
7139
7140 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7141 RETURN(KERN_INVALID_ADDRESS);
7142 }
7143
7144 src_entry = tmp_entry;
7145 vm_map_clip_start(src_map, src_entry, src_start);
7146
7147 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7148 !use_maxprot) ||
7149 ((src_entry->max_protection & VM_PROT_READ) == 0))
7150 goto VerificationFailed;
7151
7152 if (src_entry->vme_end < new_entry->vme_end)
7153 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7154
7155 if ((src_entry->object.vm_object != src_object) ||
7156 (src_entry->offset != src_offset) ) {
7157
7158 /*
7159 * Verification failed.
7160 *
7161 * Start over with this top-level entry.
7162 */
7163
7164 VerificationFailed: ;
7165
7166 vm_object_deallocate(new_entry->object.vm_object);
7167 tmp_entry = src_entry;
7168 continue;
7169 }
7170
7171 /*
7172 * Verification succeeded.
7173 */
7174
7175 VerificationSuccessful: ;
7176
7177 if (result == KERN_MEMORY_RESTART_COPY)
7178 goto RestartCopy;
7179
7180 /*
7181 * Copy succeeded.
7182 */
7183
7184 CopySuccessful: ;
7185
7186 /*
7187 * Link in the new copy entry.
7188 */
7189
7190 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7191 new_entry);
7192
7193 /*
7194 * Determine whether the entire region
7195 * has been copied.
7196 */
7197 src_base = src_start;
7198 src_start = new_entry->vme_end;
7199 new_entry = VM_MAP_ENTRY_NULL;
7200 while ((src_start >= src_end) && (src_end != 0)) {
7201 if (src_map != base_map) {
7202 submap_map_t *ptr;
7203
7204 ptr = parent_maps;
7205 assert(ptr != NULL);
7206 parent_maps = parent_maps->next;
7207
7208 /* fix up the damage we did in that submap */
7209 vm_map_simplify_range(src_map,
7210 src_base,
7211 src_end);
7212
7213 vm_map_unlock(src_map);
7214 vm_map_deallocate(src_map);
7215 vm_map_lock(ptr->parent_map);
7216 src_map = ptr->parent_map;
7217 src_base = ptr->base_start;
7218 src_start = ptr->base_start + ptr->base_len;
7219 src_end = ptr->base_end;
7220 if ((src_end > src_start) &&
7221 !vm_map_lookup_entry(
7222 src_map, src_start, &tmp_entry))
7223 RETURN(KERN_INVALID_ADDRESS);
7224 kfree(ptr, sizeof(submap_map_t));
7225 if(parent_maps == NULL)
7226 map_share = FALSE;
7227 src_entry = tmp_entry->vme_prev;
7228 } else
7229 break;
7230 }
7231 if ((src_start >= src_end) && (src_end != 0))
7232 break;
7233
7234 /*
7235 * Verify that there are no gaps in the region
7236 */
7237
7238 tmp_entry = src_entry->vme_next;
7239 if ((tmp_entry->vme_start != src_start) ||
7240 (tmp_entry == vm_map_to_entry(src_map)))
7241 RETURN(KERN_INVALID_ADDRESS);
7242 }
7243
7244 /*
7245 * If the source should be destroyed, do it now, since the
7246 * copy was successful.
7247 */
7248 if (src_destroy) {
7249 (void) vm_map_delete(src_map,
7250 vm_map_trunc_page(src_addr),
7251 src_end,
7252 (src_map == kernel_map) ?
7253 VM_MAP_REMOVE_KUNWIRE :
7254 VM_MAP_NO_FLAGS,
7255 VM_MAP_NULL);
7256 } else {
7257 /* fix up the damage we did in the base map */
7258 vm_map_simplify_range(src_map,
7259 vm_map_trunc_page(src_addr),
7260 vm_map_round_page(src_end));
7261 }
7262
7263 vm_map_unlock(src_map);
7264
7265 /* Fix-up start and end points in copy. This is necessary */
7266 /* when the various entries in the copy object were picked */
7267 /* up from different sub-maps */
7268
7269 tmp_entry = vm_map_copy_first_entry(copy);
7270 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7271 tmp_entry->vme_end = copy_addr +
7272 (tmp_entry->vme_end - tmp_entry->vme_start);
7273 tmp_entry->vme_start = copy_addr;
7274 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7275 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7276 }
7277
7278 *copy_result = copy;
7279 return(KERN_SUCCESS);
7280
7281 #undef RETURN
7282 }
7283
7284 /*
7285 * vm_map_copyin_object:
7286 *
7287 * Create a copy object from an object.
7288 * Our caller donates an object reference.
7289 */
7290
7291 kern_return_t
7292 vm_map_copyin_object(
7293 vm_object_t object,
7294 vm_object_offset_t offset, /* offset of region in object */
7295 vm_object_size_t size, /* size of region in object */
7296 vm_map_copy_t *copy_result) /* OUT */
7297 {
7298 vm_map_copy_t copy; /* Resulting copy */
7299
7300 /*
7301 * We drop the object into a special copy object
7302 * that contains the object directly.
7303 */
7304
7305 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7306 copy->type = VM_MAP_COPY_OBJECT;
7307 copy->cpy_object = object;
7308 copy->offset = offset;
7309 copy->size = size;
7310
7311 *copy_result = copy;
7312 return(KERN_SUCCESS);
7313 }
7314
7315 static void
7316 vm_map_fork_share(
7317 vm_map_t old_map,
7318 vm_map_entry_t old_entry,
7319 vm_map_t new_map)
7320 {
7321 vm_object_t object;
7322 vm_map_entry_t new_entry;
7323
7324 /*
7325 * New sharing code. New map entry
7326 * references original object. Internal
7327 * objects use asynchronous copy algorithm for
7328 * future copies. First make sure we have
7329 * the right object. If we need a shadow,
7330 * or someone else already has one, then
7331 * make a new shadow and share it.
7332 */
7333
7334 object = old_entry->object.vm_object;
7335 if (old_entry->is_sub_map) {
7336 assert(old_entry->wired_count == 0);
7337 #ifndef NO_NESTED_PMAP
7338 if(old_entry->use_pmap) {
7339 kern_return_t result;
7340
7341 result = pmap_nest(new_map->pmap,
7342 (old_entry->object.sub_map)->pmap,
7343 (addr64_t)old_entry->vme_start,
7344 (addr64_t)old_entry->vme_start,
7345 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7346 if(result)
7347 panic("vm_map_fork_share: pmap_nest failed!");
7348 }
7349 #endif /* NO_NESTED_PMAP */
7350 } else if (object == VM_OBJECT_NULL) {
7351 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7352 old_entry->vme_start));
7353 old_entry->offset = 0;
7354 old_entry->object.vm_object = object;
7355 assert(!old_entry->needs_copy);
7356 } else if (object->copy_strategy !=
7357 MEMORY_OBJECT_COPY_SYMMETRIC) {
7358
7359 /*
7360 * We are already using an asymmetric
7361 * copy, and therefore we already have
7362 * the right object.
7363 */
7364
7365 assert(! old_entry->needs_copy);
7366 }
7367 else if (old_entry->needs_copy || /* case 1 */
7368 object->shadowed || /* case 2 */
7369 (!object->true_share && /* case 3 */
7370 !old_entry->is_shared &&
7371 (object->size >
7372 (vm_map_size_t)(old_entry->vme_end -
7373 old_entry->vme_start)))) {
7374
7375 /*
7376 * We need to create a shadow.
7377 * There are three cases here.
7378 * In the first case, we need to
7379 * complete a deferred symmetrical
7380 * copy that we participated in.
7381 * In the second and third cases,
7382 * we need to create the shadow so
7383 * that changes that we make to the
7384 * object do not interfere with
7385 * any symmetrical copies which
7386 * have occured (case 2) or which
7387 * might occur (case 3).
7388 *
7389 * The first case is when we had
7390 * deferred shadow object creation
7391 * via the entry->needs_copy mechanism.
7392 * This mechanism only works when
7393 * only one entry points to the source
7394 * object, and we are about to create
7395 * a second entry pointing to the
7396 * same object. The problem is that
7397 * there is no way of mapping from
7398 * an object to the entries pointing
7399 * to it. (Deferred shadow creation
7400 * works with one entry because occurs
7401 * at fault time, and we walk from the
7402 * entry to the object when handling
7403 * the fault.)
7404 *
7405 * The second case is when the object
7406 * to be shared has already been copied
7407 * with a symmetric copy, but we point
7408 * directly to the object without
7409 * needs_copy set in our entry. (This
7410 * can happen because different ranges
7411 * of an object can be pointed to by
7412 * different entries. In particular,
7413 * a single entry pointing to an object
7414 * can be split by a call to vm_inherit,
7415 * which, combined with task_create, can
7416 * result in the different entries
7417 * having different needs_copy values.)
7418 * The shadowed flag in the object allows
7419 * us to detect this case. The problem
7420 * with this case is that if this object
7421 * has or will have shadows, then we
7422 * must not perform an asymmetric copy
7423 * of this object, since such a copy
7424 * allows the object to be changed, which
7425 * will break the previous symmetrical
7426 * copies (which rely upon the object
7427 * not changing). In a sense, the shadowed
7428 * flag says "don't change this object".
7429 * We fix this by creating a shadow
7430 * object for this object, and sharing
7431 * that. This works because we are free
7432 * to change the shadow object (and thus
7433 * to use an asymmetric copy strategy);
7434 * this is also semantically correct,
7435 * since this object is temporary, and
7436 * therefore a copy of the object is
7437 * as good as the object itself. (This
7438 * is not true for permanent objects,
7439 * since the pager needs to see changes,
7440 * which won't happen if the changes
7441 * are made to a copy.)
7442 *
7443 * The third case is when the object
7444 * to be shared has parts sticking
7445 * outside of the entry we're working
7446 * with, and thus may in the future
7447 * be subject to a symmetrical copy.
7448 * (This is a preemptive version of
7449 * case 2.)
7450 */
7451
7452 vm_object_shadow(&old_entry->object.vm_object,
7453 &old_entry->offset,
7454 (vm_map_size_t) (old_entry->vme_end -
7455 old_entry->vme_start));
7456
7457 /*
7458 * If we're making a shadow for other than
7459 * copy on write reasons, then we have
7460 * to remove write permission.
7461 */
7462
7463 if (!old_entry->needs_copy &&
7464 (old_entry->protection & VM_PROT_WRITE)) {
7465 vm_prot_t prot;
7466
7467 prot = old_entry->protection & ~VM_PROT_WRITE;
7468
7469 if (override_nx(old_map, old_entry->alias) && prot)
7470 prot |= VM_PROT_EXECUTE;
7471
7472 if (old_map->mapped) {
7473 vm_object_pmap_protect(
7474 old_entry->object.vm_object,
7475 old_entry->offset,
7476 (old_entry->vme_end -
7477 old_entry->vme_start),
7478 PMAP_NULL,
7479 old_entry->vme_start,
7480 prot);
7481 } else {
7482 pmap_protect(old_map->pmap,
7483 old_entry->vme_start,
7484 old_entry->vme_end,
7485 prot);
7486 }
7487 }
7488
7489 old_entry->needs_copy = FALSE;
7490 object = old_entry->object.vm_object;
7491 }
7492
7493 /*
7494 * If object was using a symmetric copy strategy,
7495 * change its copy strategy to the default
7496 * asymmetric copy strategy, which is copy_delay
7497 * in the non-norma case and copy_call in the
7498 * norma case. Bump the reference count for the
7499 * new entry.
7500 */
7501
7502 if(old_entry->is_sub_map) {
7503 vm_map_lock(old_entry->object.sub_map);
7504 vm_map_reference(old_entry->object.sub_map);
7505 vm_map_unlock(old_entry->object.sub_map);
7506 } else {
7507 vm_object_lock(object);
7508 vm_object_reference_locked(object);
7509 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7510 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7511 }
7512 vm_object_unlock(object);
7513 }
7514
7515 /*
7516 * Clone the entry, using object ref from above.
7517 * Mark both entries as shared.
7518 */
7519
7520 new_entry = vm_map_entry_create(new_map);
7521 vm_map_entry_copy(new_entry, old_entry);
7522 old_entry->is_shared = TRUE;
7523 new_entry->is_shared = TRUE;
7524
7525 /*
7526 * Insert the entry into the new map -- we
7527 * know we're inserting at the end of the new
7528 * map.
7529 */
7530
7531 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7532
7533 /*
7534 * Update the physical map
7535 */
7536
7537 if (old_entry->is_sub_map) {
7538 /* Bill Angell pmap support goes here */
7539 } else {
7540 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7541 old_entry->vme_end - old_entry->vme_start,
7542 old_entry->vme_start);
7543 }
7544 }
7545
7546 static boolean_t
7547 vm_map_fork_copy(
7548 vm_map_t old_map,
7549 vm_map_entry_t *old_entry_p,
7550 vm_map_t new_map)
7551 {
7552 vm_map_entry_t old_entry = *old_entry_p;
7553 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7554 vm_map_offset_t start = old_entry->vme_start;
7555 vm_map_copy_t copy;
7556 vm_map_entry_t last = vm_map_last_entry(new_map);
7557
7558 vm_map_unlock(old_map);
7559 /*
7560 * Use maxprot version of copyin because we
7561 * care about whether this memory can ever
7562 * be accessed, not just whether it's accessible
7563 * right now.
7564 */
7565 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
7566 != KERN_SUCCESS) {
7567 /*
7568 * The map might have changed while it
7569 * was unlocked, check it again. Skip
7570 * any blank space or permanently
7571 * unreadable region.
7572 */
7573 vm_map_lock(old_map);
7574 if (!vm_map_lookup_entry(old_map, start, &last) ||
7575 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
7576 last = last->vme_next;
7577 }
7578 *old_entry_p = last;
7579
7580 /*
7581 * XXX For some error returns, want to
7582 * XXX skip to the next element. Note
7583 * that INVALID_ADDRESS and
7584 * PROTECTION_FAILURE are handled above.
7585 */
7586
7587 return FALSE;
7588 }
7589
7590 /*
7591 * Insert the copy into the new map
7592 */
7593
7594 vm_map_copy_insert(new_map, last, copy);
7595
7596 /*
7597 * Pick up the traversal at the end of
7598 * the copied region.
7599 */
7600
7601 vm_map_lock(old_map);
7602 start += entry_size;
7603 if (! vm_map_lookup_entry(old_map, start, &last)) {
7604 last = last->vme_next;
7605 } else {
7606 if (last->vme_start == start) {
7607 /*
7608 * No need to clip here and we don't
7609 * want to cause any unnecessary
7610 * unnesting...
7611 */
7612 } else {
7613 vm_map_clip_start(old_map, last, start);
7614 }
7615 }
7616 *old_entry_p = last;
7617
7618 return TRUE;
7619 }
7620
7621 /*
7622 * vm_map_fork:
7623 *
7624 * Create and return a new map based on the old
7625 * map, according to the inheritance values on the
7626 * regions in that map.
7627 *
7628 * The source map must not be locked.
7629 */
7630 vm_map_t
7631 vm_map_fork(
7632 vm_map_t old_map)
7633 {
7634 pmap_t new_pmap;
7635 vm_map_t new_map;
7636 vm_map_entry_t old_entry;
7637 vm_map_size_t new_size = 0, entry_size;
7638 vm_map_entry_t new_entry;
7639 boolean_t src_needs_copy;
7640 boolean_t new_entry_needs_copy;
7641
7642 #ifdef __i386__
7643 new_pmap = pmap_create((vm_map_size_t) 0,
7644 old_map->pmap->pm_task_map != TASK_MAP_32BIT);
7645 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
7646 pmap_set_4GB_pagezero(new_pmap);
7647 #else
7648 new_pmap = pmap_create((vm_map_size_t) 0, 0);
7649 #endif
7650
7651 vm_map_reference_swap(old_map);
7652 vm_map_lock(old_map);
7653
7654 new_map = vm_map_create(new_pmap,
7655 old_map->min_offset,
7656 old_map->max_offset,
7657 old_map->hdr.entries_pageable);
7658
7659 for (
7660 old_entry = vm_map_first_entry(old_map);
7661 old_entry != vm_map_to_entry(old_map);
7662 ) {
7663
7664 entry_size = old_entry->vme_end - old_entry->vme_start;
7665
7666 switch (old_entry->inheritance) {
7667 case VM_INHERIT_NONE:
7668 break;
7669
7670 case VM_INHERIT_SHARE:
7671 vm_map_fork_share(old_map, old_entry, new_map);
7672 new_size += entry_size;
7673 break;
7674
7675 case VM_INHERIT_COPY:
7676
7677 /*
7678 * Inline the copy_quickly case;
7679 * upon failure, fall back on call
7680 * to vm_map_fork_copy.
7681 */
7682
7683 if(old_entry->is_sub_map)
7684 break;
7685 if ((old_entry->wired_count != 0) ||
7686 ((old_entry->object.vm_object != NULL) &&
7687 (old_entry->object.vm_object->true_share))) {
7688 goto slow_vm_map_fork_copy;
7689 }
7690
7691 new_entry = vm_map_entry_create(new_map);
7692 vm_map_entry_copy(new_entry, old_entry);
7693 /* clear address space specifics */
7694 new_entry->use_pmap = FALSE;
7695
7696 if (! vm_object_copy_quickly(
7697 &new_entry->object.vm_object,
7698 old_entry->offset,
7699 (old_entry->vme_end -
7700 old_entry->vme_start),
7701 &src_needs_copy,
7702 &new_entry_needs_copy)) {
7703 vm_map_entry_dispose(new_map, new_entry);
7704 goto slow_vm_map_fork_copy;
7705 }
7706
7707 /*
7708 * Handle copy-on-write obligations
7709 */
7710
7711 if (src_needs_copy && !old_entry->needs_copy) {
7712 vm_prot_t prot;
7713
7714 prot = old_entry->protection & ~VM_PROT_WRITE;
7715
7716 if (override_nx(old_map, old_entry->alias) && prot)
7717 prot |= VM_PROT_EXECUTE;
7718
7719 vm_object_pmap_protect(
7720 old_entry->object.vm_object,
7721 old_entry->offset,
7722 (old_entry->vme_end -
7723 old_entry->vme_start),
7724 ((old_entry->is_shared
7725 || old_map->mapped)
7726 ? PMAP_NULL :
7727 old_map->pmap),
7728 old_entry->vme_start,
7729 prot);
7730
7731 old_entry->needs_copy = TRUE;
7732 }
7733 new_entry->needs_copy = new_entry_needs_copy;
7734
7735 /*
7736 * Insert the entry at the end
7737 * of the map.
7738 */
7739
7740 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7741 new_entry);
7742 new_size += entry_size;
7743 break;
7744
7745 slow_vm_map_fork_copy:
7746 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7747 new_size += entry_size;
7748 }
7749 continue;
7750 }
7751 old_entry = old_entry->vme_next;
7752 }
7753
7754 new_map->size = new_size;
7755 vm_map_unlock(old_map);
7756 vm_map_deallocate(old_map);
7757
7758 return(new_map);
7759 }
7760
7761 /*
7762 * vm_map_exec:
7763 *
7764 * Setup the "new_map" with the proper execution environment according
7765 * to the type of executable (platform, 64bit, chroot environment).
7766 * Map the comm page and shared region, etc...
7767 */
7768 kern_return_t
7769 vm_map_exec(
7770 vm_map_t new_map,
7771 task_t task,
7772 void *fsroot,
7773 cpu_type_t cpu)
7774 {
7775 SHARED_REGION_TRACE_DEBUG(
7776 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
7777 current_task(), new_map, task, fsroot, cpu));
7778 (void) vm_commpage_enter(new_map, task);
7779 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
7780 SHARED_REGION_TRACE_DEBUG(
7781 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
7782 current_task(), new_map, task, fsroot, cpu));
7783 return KERN_SUCCESS;
7784 }
7785
7786 /*
7787 * vm_map_lookup_locked:
7788 *
7789 * Finds the VM object, offset, and
7790 * protection for a given virtual address in the
7791 * specified map, assuming a page fault of the
7792 * type specified.
7793 *
7794 * Returns the (object, offset, protection) for
7795 * this address, whether it is wired down, and whether
7796 * this map has the only reference to the data in question.
7797 * In order to later verify this lookup, a "version"
7798 * is returned.
7799 *
7800 * The map MUST be locked by the caller and WILL be
7801 * locked on exit. In order to guarantee the
7802 * existence of the returned object, it is returned
7803 * locked.
7804 *
7805 * If a lookup is requested with "write protection"
7806 * specified, the map may be changed to perform virtual
7807 * copying operations, although the data referenced will
7808 * remain the same.
7809 */
7810 kern_return_t
7811 vm_map_lookup_locked(
7812 vm_map_t *var_map, /* IN/OUT */
7813 vm_map_offset_t vaddr,
7814 vm_prot_t fault_type,
7815 int object_lock_type,
7816 vm_map_version_t *out_version, /* OUT */
7817 vm_object_t *object, /* OUT */
7818 vm_object_offset_t *offset, /* OUT */
7819 vm_prot_t *out_prot, /* OUT */
7820 boolean_t *wired, /* OUT */
7821 vm_object_fault_info_t fault_info, /* OUT */
7822 vm_map_t *real_map)
7823 {
7824 vm_map_entry_t entry;
7825 register vm_map_t map = *var_map;
7826 vm_map_t old_map = *var_map;
7827 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
7828 vm_map_offset_t cow_parent_vaddr = 0;
7829 vm_map_offset_t old_start = 0;
7830 vm_map_offset_t old_end = 0;
7831 register vm_prot_t prot;
7832
7833 *real_map = map;
7834 RetryLookup: ;
7835
7836 /*
7837 * If the map has an interesting hint, try it before calling
7838 * full blown lookup routine.
7839 */
7840 entry = map->hint;
7841
7842 if ((entry == vm_map_to_entry(map)) ||
7843 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7844 vm_map_entry_t tmp_entry;
7845
7846 /*
7847 * Entry was either not a valid hint, or the vaddr
7848 * was not contained in the entry, so do a full lookup.
7849 */
7850 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7851 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7852 vm_map_unlock(cow_sub_map_parent);
7853 if((*real_map != map)
7854 && (*real_map != cow_sub_map_parent))
7855 vm_map_unlock(*real_map);
7856 return KERN_INVALID_ADDRESS;
7857 }
7858
7859 entry = tmp_entry;
7860 }
7861 if(map == old_map) {
7862 old_start = entry->vme_start;
7863 old_end = entry->vme_end;
7864 }
7865
7866 /*
7867 * Handle submaps. Drop lock on upper map, submap is
7868 * returned locked.
7869 */
7870
7871 submap_recurse:
7872 if (entry->is_sub_map) {
7873 vm_map_offset_t local_vaddr;
7874 vm_map_offset_t end_delta;
7875 vm_map_offset_t start_delta;
7876 vm_map_entry_t submap_entry;
7877 boolean_t mapped_needs_copy=FALSE;
7878
7879 local_vaddr = vaddr;
7880
7881 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
7882 /* if real_map equals map we unlock below */
7883 if ((*real_map != map) &&
7884 (*real_map != cow_sub_map_parent))
7885 vm_map_unlock(*real_map);
7886 *real_map = entry->object.sub_map;
7887 }
7888
7889 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
7890 if (!mapped_needs_copy) {
7891 if (vm_map_lock_read_to_write(map)) {
7892 vm_map_lock_read(map);
7893 /* XXX FBDP: entry still valid ? */
7894 if(*real_map == entry->object.sub_map)
7895 *real_map = map;
7896 goto RetryLookup;
7897 }
7898 vm_map_lock_read(entry->object.sub_map);
7899 cow_sub_map_parent = map;
7900 /* reset base to map before cow object */
7901 /* this is the map which will accept */
7902 /* the new cow object */
7903 old_start = entry->vme_start;
7904 old_end = entry->vme_end;
7905 cow_parent_vaddr = vaddr;
7906 mapped_needs_copy = TRUE;
7907 } else {
7908 vm_map_lock_read(entry->object.sub_map);
7909 if((cow_sub_map_parent != map) &&
7910 (*real_map != map))
7911 vm_map_unlock(map);
7912 }
7913 } else {
7914 vm_map_lock_read(entry->object.sub_map);
7915 /* leave map locked if it is a target */
7916 /* cow sub_map above otherwise, just */
7917 /* follow the maps down to the object */
7918 /* here we unlock knowing we are not */
7919 /* revisiting the map. */
7920 if((*real_map != map) && (map != cow_sub_map_parent))
7921 vm_map_unlock_read(map);
7922 }
7923
7924 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
7925 *var_map = map = entry->object.sub_map;
7926
7927 /* calculate the offset in the submap for vaddr */
7928 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
7929
7930 RetrySubMap:
7931 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
7932 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
7933 vm_map_unlock(cow_sub_map_parent);
7934 }
7935 if((*real_map != map)
7936 && (*real_map != cow_sub_map_parent)) {
7937 vm_map_unlock(*real_map);
7938 }
7939 *real_map = map;
7940 return KERN_INVALID_ADDRESS;
7941 }
7942
7943 /* find the attenuated shadow of the underlying object */
7944 /* on our target map */
7945
7946 /* in english the submap object may extend beyond the */
7947 /* region mapped by the entry or, may only fill a portion */
7948 /* of it. For our purposes, we only care if the object */
7949 /* doesn't fill. In this case the area which will */
7950 /* ultimately be clipped in the top map will only need */
7951 /* to be as big as the portion of the underlying entry */
7952 /* which is mapped */
7953 start_delta = submap_entry->vme_start > entry->offset ?
7954 submap_entry->vme_start - entry->offset : 0;
7955
7956 end_delta =
7957 (entry->offset + start_delta + (old_end - old_start)) <=
7958 submap_entry->vme_end ?
7959 0 : (entry->offset +
7960 (old_end - old_start))
7961 - submap_entry->vme_end;
7962
7963 old_start += start_delta;
7964 old_end -= end_delta;
7965
7966 if(submap_entry->is_sub_map) {
7967 entry = submap_entry;
7968 vaddr = local_vaddr;
7969 goto submap_recurse;
7970 }
7971
7972 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
7973
7974 vm_object_t sub_object, copy_object;
7975 vm_object_offset_t copy_offset;
7976 vm_map_offset_t local_start;
7977 vm_map_offset_t local_end;
7978 boolean_t copied_slowly = FALSE;
7979
7980 if (vm_map_lock_read_to_write(map)) {
7981 vm_map_lock_read(map);
7982 old_start -= start_delta;
7983 old_end += end_delta;
7984 goto RetrySubMap;
7985 }
7986
7987
7988 sub_object = submap_entry->object.vm_object;
7989 if (sub_object == VM_OBJECT_NULL) {
7990 sub_object =
7991 vm_object_allocate(
7992 (vm_map_size_t)
7993 (submap_entry->vme_end -
7994 submap_entry->vme_start));
7995 submap_entry->object.vm_object = sub_object;
7996 submap_entry->offset = 0;
7997 }
7998 local_start = local_vaddr -
7999 (cow_parent_vaddr - old_start);
8000 local_end = local_vaddr +
8001 (old_end - cow_parent_vaddr);
8002 vm_map_clip_start(map, submap_entry, local_start);
8003 vm_map_clip_end(map, submap_entry, local_end);
8004 /* unnesting was done in vm_map_clip_start/end() */
8005 assert(!submap_entry->use_pmap);
8006
8007 /* This is the COW case, lets connect */
8008 /* an entry in our space to the underlying */
8009 /* object in the submap, bypassing the */
8010 /* submap. */
8011
8012
8013 if(submap_entry->wired_count != 0 ||
8014 (sub_object->copy_strategy ==
8015 MEMORY_OBJECT_COPY_NONE)) {
8016 vm_object_lock(sub_object);
8017 vm_object_copy_slowly(sub_object,
8018 submap_entry->offset,
8019 (submap_entry->vme_end -
8020 submap_entry->vme_start),
8021 FALSE,
8022 &copy_object);
8023 copied_slowly = TRUE;
8024 } else {
8025
8026 /* set up shadow object */
8027 copy_object = sub_object;
8028 vm_object_reference(copy_object);
8029 sub_object->shadowed = TRUE;
8030 submap_entry->needs_copy = TRUE;
8031
8032 prot = submap_entry->protection & ~VM_PROT_WRITE;
8033
8034 if (override_nx(map, submap_entry->alias) && prot)
8035 prot |= VM_PROT_EXECUTE;
8036
8037 vm_object_pmap_protect(
8038 sub_object,
8039 submap_entry->offset,
8040 submap_entry->vme_end -
8041 submap_entry->vme_start,
8042 (submap_entry->is_shared
8043 || map->mapped) ?
8044 PMAP_NULL : map->pmap,
8045 submap_entry->vme_start,
8046 prot);
8047 }
8048
8049 /*
8050 * Adjust the fault offset to the submap entry.
8051 */
8052 copy_offset = (local_vaddr -
8053 submap_entry->vme_start +
8054 submap_entry->offset);
8055
8056 /* This works diffently than the */
8057 /* normal submap case. We go back */
8058 /* to the parent of the cow map and*/
8059 /* clip out the target portion of */
8060 /* the sub_map, substituting the */
8061 /* new copy object, */
8062
8063 vm_map_unlock(map);
8064 local_start = old_start;
8065 local_end = old_end;
8066 map = cow_sub_map_parent;
8067 *var_map = cow_sub_map_parent;
8068 vaddr = cow_parent_vaddr;
8069 cow_sub_map_parent = NULL;
8070
8071 if(!vm_map_lookup_entry(map,
8072 vaddr, &entry)) {
8073 vm_object_deallocate(
8074 copy_object);
8075 vm_map_lock_write_to_read(map);
8076 return KERN_INVALID_ADDRESS;
8077 }
8078
8079 /* clip out the portion of space */
8080 /* mapped by the sub map which */
8081 /* corresponds to the underlying */
8082 /* object */
8083
8084 /*
8085 * Clip (and unnest) the smallest nested chunk
8086 * possible around the faulting address...
8087 */
8088 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8089 local_end = local_start + pmap_nesting_size_min;
8090 /*
8091 * ... but don't go beyond the "old_start" to "old_end"
8092 * range, to avoid spanning over another VM region
8093 * with a possibly different VM object and/or offset.
8094 */
8095 if (local_start < old_start) {
8096 local_start = old_start;
8097 }
8098 if (local_end > old_end) {
8099 local_end = old_end;
8100 }
8101 /*
8102 * Adjust copy_offset to the start of the range.
8103 */
8104 copy_offset -= (vaddr - local_start);
8105
8106 vm_map_clip_start(map, entry, local_start);
8107 vm_map_clip_end(map, entry, local_end);
8108 /* unnesting was done in vm_map_clip_start/end() */
8109 assert(!entry->use_pmap);
8110
8111 /* substitute copy object for */
8112 /* shared map entry */
8113 vm_map_deallocate(entry->object.sub_map);
8114 entry->is_sub_map = FALSE;
8115 entry->object.vm_object = copy_object;
8116
8117 /* propagate the submap entry's protections */
8118 entry->protection |= submap_entry->protection;
8119 entry->max_protection |= submap_entry->max_protection;
8120
8121 if(copied_slowly) {
8122 entry->offset = local_start - old_start;
8123 entry->needs_copy = FALSE;
8124 entry->is_shared = FALSE;
8125 } else {
8126 entry->offset = copy_offset;
8127 entry->needs_copy = TRUE;
8128 if(entry->inheritance == VM_INHERIT_SHARE)
8129 entry->inheritance = VM_INHERIT_COPY;
8130 if (map != old_map)
8131 entry->is_shared = TRUE;
8132 }
8133 if(entry->inheritance == VM_INHERIT_SHARE)
8134 entry->inheritance = VM_INHERIT_COPY;
8135
8136 vm_map_lock_write_to_read(map);
8137 } else {
8138 if((cow_sub_map_parent)
8139 && (cow_sub_map_parent != *real_map)
8140 && (cow_sub_map_parent != map)) {
8141 vm_map_unlock(cow_sub_map_parent);
8142 }
8143 entry = submap_entry;
8144 vaddr = local_vaddr;
8145 }
8146 }
8147
8148 /*
8149 * Check whether this task is allowed to have
8150 * this page.
8151 */
8152
8153 prot = entry->protection;
8154
8155 if (override_nx(map, entry->alias) && prot) {
8156 /*
8157 * HACK -- if not a stack, then allow execution
8158 */
8159 prot |= VM_PROT_EXECUTE;
8160 }
8161
8162 if ((fault_type & (prot)) != fault_type) {
8163 if (*real_map != map) {
8164 vm_map_unlock(*real_map);
8165 }
8166 *real_map = map;
8167
8168 if ((fault_type & VM_PROT_EXECUTE) && prot)
8169 log_stack_execution_failure((addr64_t)vaddr, prot);
8170
8171 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8172 return KERN_PROTECTION_FAILURE;
8173 }
8174
8175 /*
8176 * If this page is not pageable, we have to get
8177 * it for all possible accesses.
8178 */
8179
8180 *wired = (entry->wired_count != 0);
8181 if (*wired)
8182 fault_type = prot;
8183
8184 /*
8185 * If the entry was copy-on-write, we either ...
8186 */
8187
8188 if (entry->needs_copy) {
8189 /*
8190 * If we want to write the page, we may as well
8191 * handle that now since we've got the map locked.
8192 *
8193 * If we don't need to write the page, we just
8194 * demote the permissions allowed.
8195 */
8196
8197 if ((fault_type & VM_PROT_WRITE) || *wired) {
8198 /*
8199 * Make a new object, and place it in the
8200 * object chain. Note that no new references
8201 * have appeared -- one just moved from the
8202 * map to the new object.
8203 */
8204
8205 if (vm_map_lock_read_to_write(map)) {
8206 vm_map_lock_read(map);
8207 goto RetryLookup;
8208 }
8209 vm_object_shadow(&entry->object.vm_object,
8210 &entry->offset,
8211 (vm_map_size_t) (entry->vme_end -
8212 entry->vme_start));
8213
8214 entry->object.vm_object->shadowed = TRUE;
8215 entry->needs_copy = FALSE;
8216 vm_map_lock_write_to_read(map);
8217 }
8218 else {
8219 /*
8220 * We're attempting to read a copy-on-write
8221 * page -- don't allow writes.
8222 */
8223
8224 prot &= (~VM_PROT_WRITE);
8225 }
8226 }
8227
8228 /*
8229 * Create an object if necessary.
8230 */
8231 if (entry->object.vm_object == VM_OBJECT_NULL) {
8232
8233 if (vm_map_lock_read_to_write(map)) {
8234 vm_map_lock_read(map);
8235 goto RetryLookup;
8236 }
8237
8238 entry->object.vm_object = vm_object_allocate(
8239 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8240 entry->offset = 0;
8241 vm_map_lock_write_to_read(map);
8242 }
8243
8244 /*
8245 * Return the object/offset from this entry. If the entry
8246 * was copy-on-write or empty, it has been fixed up. Also
8247 * return the protection.
8248 */
8249
8250 *offset = (vaddr - entry->vme_start) + entry->offset;
8251 *object = entry->object.vm_object;
8252 *out_prot = prot;
8253
8254 if (fault_info) {
8255 fault_info->interruptible = THREAD_UNINT; /* for now... */
8256 /* ... the caller will change "interruptible" if needed */
8257 fault_info->cluster_size = 0;
8258 fault_info->user_tag = entry->alias;
8259 fault_info->behavior = entry->behavior;
8260 fault_info->lo_offset = entry->offset;
8261 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8262 fault_info->no_cache = entry->no_cache;
8263 }
8264
8265 /*
8266 * Lock the object to prevent it from disappearing
8267 */
8268 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8269 vm_object_lock(*object);
8270 else
8271 vm_object_lock_shared(*object);
8272
8273 /*
8274 * Save the version number
8275 */
8276
8277 out_version->main_timestamp = map->timestamp;
8278
8279 return KERN_SUCCESS;
8280 }
8281
8282
8283 /*
8284 * vm_map_verify:
8285 *
8286 * Verifies that the map in question has not changed
8287 * since the given version. If successful, the map
8288 * will not change until vm_map_verify_done() is called.
8289 */
8290 boolean_t
8291 vm_map_verify(
8292 register vm_map_t map,
8293 register vm_map_version_t *version) /* REF */
8294 {
8295 boolean_t result;
8296
8297 vm_map_lock_read(map);
8298 result = (map->timestamp == version->main_timestamp);
8299
8300 if (!result)
8301 vm_map_unlock_read(map);
8302
8303 return(result);
8304 }
8305
8306 /*
8307 * vm_map_verify_done:
8308 *
8309 * Releases locks acquired by a vm_map_verify.
8310 *
8311 * This is now a macro in vm/vm_map.h. It does a
8312 * vm_map_unlock_read on the map.
8313 */
8314
8315
8316 /*
8317 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8318 * Goes away after regular vm_region_recurse function migrates to
8319 * 64 bits
8320 * vm_region_recurse: A form of vm_region which follows the
8321 * submaps in a target map
8322 *
8323 */
8324
8325 kern_return_t
8326 vm_map_region_recurse_64(
8327 vm_map_t map,
8328 vm_map_offset_t *address, /* IN/OUT */
8329 vm_map_size_t *size, /* OUT */
8330 natural_t *nesting_depth, /* IN/OUT */
8331 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8332 mach_msg_type_number_t *count) /* IN/OUT */
8333 {
8334 vm_region_extended_info_data_t extended;
8335 vm_map_entry_t tmp_entry;
8336 vm_map_offset_t user_address;
8337 unsigned int user_max_depth;
8338
8339 /*
8340 * "curr_entry" is the VM map entry preceding or including the
8341 * address we're looking for.
8342 * "curr_map" is the map or sub-map containing "curr_entry".
8343 * "curr_offset" is the cumulated offset of "curr_map" in the
8344 * target task's address space.
8345 * "curr_depth" is the depth of "curr_map" in the chain of
8346 * sub-maps.
8347 * "curr_max_offset" is the maximum offset we should take into
8348 * account in the current map. It may be smaller than the current
8349 * map's "max_offset" because we might not have mapped it all in
8350 * the upper level map.
8351 */
8352 vm_map_entry_t curr_entry;
8353 vm_map_offset_t curr_offset;
8354 vm_map_t curr_map;
8355 unsigned int curr_depth;
8356 vm_map_offset_t curr_max_offset;
8357
8358 /*
8359 * "next_" is the same as "curr_" but for the VM region immediately
8360 * after the address we're looking for. We need to keep track of this
8361 * too because we want to return info about that region if the
8362 * address we're looking for is not mapped.
8363 */
8364 vm_map_entry_t next_entry;
8365 vm_map_offset_t next_offset;
8366 vm_map_t next_map;
8367 unsigned int next_depth;
8368 vm_map_offset_t next_max_offset;
8369
8370 boolean_t look_for_pages;
8371 vm_region_submap_short_info_64_t short_info;
8372
8373 if (map == VM_MAP_NULL) {
8374 /* no address space to work on */
8375 return KERN_INVALID_ARGUMENT;
8376 }
8377
8378 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8379 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8380 /*
8381 * "info" structure is not big enough and
8382 * would overflow
8383 */
8384 return KERN_INVALID_ARGUMENT;
8385 } else {
8386 look_for_pages = FALSE;
8387 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8388 short_info = (vm_region_submap_short_info_64_t) submap_info;
8389 submap_info = NULL;
8390 }
8391 } else {
8392 look_for_pages = TRUE;
8393 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8394 short_info = NULL;
8395 }
8396
8397
8398 user_address = *address;
8399 user_max_depth = *nesting_depth;
8400
8401 curr_entry = NULL;
8402 curr_map = map;
8403 curr_offset = 0;
8404 curr_depth = 0;
8405 curr_max_offset = curr_map->max_offset;
8406
8407 next_entry = NULL;
8408 next_map = NULL;
8409 next_offset = 0;
8410 next_depth = 0;
8411 next_max_offset = curr_max_offset;
8412
8413 if (not_in_kdp) {
8414 vm_map_lock_read(curr_map);
8415 }
8416
8417 for (;;) {
8418 if (vm_map_lookup_entry(curr_map,
8419 user_address - curr_offset,
8420 &tmp_entry)) {
8421 /* tmp_entry contains the address we're looking for */
8422 curr_entry = tmp_entry;
8423 } else {
8424 /*
8425 * The address is not mapped. "tmp_entry" is the
8426 * map entry preceding the address. We want the next
8427 * one, if it exists.
8428 */
8429 curr_entry = tmp_entry->vme_next;
8430 if (curr_entry == vm_map_to_entry(curr_map) ||
8431 curr_entry->vme_start >= curr_max_offset) {
8432 /* no next entry at this level: stop looking */
8433 if (not_in_kdp) {
8434 vm_map_unlock_read(curr_map);
8435 }
8436 curr_entry = NULL;
8437 curr_map = NULL;
8438 curr_offset = 0;
8439 curr_depth = 0;
8440 curr_max_offset = 0;
8441 break;
8442 }
8443 }
8444
8445 /*
8446 * Is the next entry at this level closer to the address (or
8447 * deeper in the submap chain) than the one we had
8448 * so far ?
8449 */
8450 tmp_entry = curr_entry->vme_next;
8451 if (tmp_entry == vm_map_to_entry(curr_map)) {
8452 /* no next entry at this level */
8453 } else if (tmp_entry->vme_start >= curr_max_offset) {
8454 /*
8455 * tmp_entry is beyond the scope of what we mapped of
8456 * this submap in the upper level: ignore it.
8457 */
8458 } else if ((next_entry == NULL) ||
8459 (tmp_entry->vme_start + curr_offset <=
8460 next_entry->vme_start + next_offset)) {
8461 /*
8462 * We didn't have a "next_entry" or this one is
8463 * closer to the address we're looking for:
8464 * use this "tmp_entry" as the new "next_entry".
8465 */
8466 if (next_entry != NULL) {
8467 /* unlock the last "next_map" */
8468 if (next_map != curr_map && not_in_kdp) {
8469 vm_map_unlock_read(next_map);
8470 }
8471 }
8472 next_entry = tmp_entry;
8473 next_map = curr_map;
8474 next_offset = curr_offset;
8475 next_depth = curr_depth;
8476 next_max_offset = curr_max_offset;
8477 }
8478
8479 if (!curr_entry->is_sub_map ||
8480 curr_depth >= user_max_depth) {
8481 /*
8482 * We hit a leaf map or we reached the maximum depth
8483 * we could, so stop looking. Keep the current map
8484 * locked.
8485 */
8486 break;
8487 }
8488
8489 /*
8490 * Get down to the next submap level.
8491 */
8492
8493 /*
8494 * Lock the next level and unlock the current level,
8495 * unless we need to keep it locked to access the "next_entry"
8496 * later.
8497 */
8498 if (not_in_kdp) {
8499 vm_map_lock_read(curr_entry->object.sub_map);
8500 }
8501 if (curr_map == next_map) {
8502 /* keep "next_map" locked in case we need it */
8503 } else {
8504 /* release this map */
8505 vm_map_unlock_read(curr_map);
8506 }
8507
8508 /*
8509 * Adjust the offset. "curr_entry" maps the submap
8510 * at relative address "curr_entry->vme_start" in the
8511 * curr_map but skips the first "curr_entry->offset"
8512 * bytes of the submap.
8513 * "curr_offset" always represents the offset of a virtual
8514 * address in the curr_map relative to the absolute address
8515 * space (i.e. the top-level VM map).
8516 */
8517 curr_offset +=
8518 (curr_entry->vme_start - curr_entry->offset);
8519 /* switch to the submap */
8520 curr_map = curr_entry->object.sub_map;
8521 curr_depth++;
8522 /*
8523 * "curr_max_offset" allows us to keep track of the
8524 * portion of the submap that is actually mapped at this level:
8525 * the rest of that submap is irrelevant to us, since it's not
8526 * mapped here.
8527 * The relevant portion of the map starts at
8528 * "curr_entry->offset" up to the size of "curr_entry".
8529 */
8530 curr_max_offset =
8531 curr_entry->vme_end - curr_entry->vme_start +
8532 curr_entry->offset;
8533 curr_entry = NULL;
8534 }
8535
8536 if (curr_entry == NULL) {
8537 /* no VM region contains the address... */
8538 if (next_entry == NULL) {
8539 /* ... and no VM region follows it either */
8540 return KERN_INVALID_ADDRESS;
8541 }
8542 /* ... gather info about the next VM region */
8543 curr_entry = next_entry;
8544 curr_map = next_map; /* still locked ... */
8545 curr_offset = next_offset;
8546 curr_depth = next_depth;
8547 curr_max_offset = next_max_offset;
8548 } else {
8549 /* we won't need "next_entry" after all */
8550 if (next_entry != NULL) {
8551 /* release "next_map" */
8552 if (next_map != curr_map && not_in_kdp) {
8553 vm_map_unlock_read(next_map);
8554 }
8555 }
8556 }
8557 next_entry = NULL;
8558 next_map = NULL;
8559 next_offset = 0;
8560 next_depth = 0;
8561 next_max_offset = 0;
8562
8563 *nesting_depth = curr_depth;
8564 *size = curr_entry->vme_end - curr_entry->vme_start;
8565 *address = curr_entry->vme_start + curr_offset;
8566
8567 if (look_for_pages) {
8568 submap_info->user_tag = curr_entry->alias;
8569 submap_info->offset = curr_entry->offset;
8570 submap_info->protection = curr_entry->protection;
8571 submap_info->inheritance = curr_entry->inheritance;
8572 submap_info->max_protection = curr_entry->max_protection;
8573 submap_info->behavior = curr_entry->behavior;
8574 submap_info->user_wired_count = curr_entry->user_wired_count;
8575 submap_info->is_submap = curr_entry->is_sub_map;
8576 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
8577 } else {
8578 short_info->user_tag = curr_entry->alias;
8579 short_info->offset = curr_entry->offset;
8580 short_info->protection = curr_entry->protection;
8581 short_info->inheritance = curr_entry->inheritance;
8582 short_info->max_protection = curr_entry->max_protection;
8583 short_info->behavior = curr_entry->behavior;
8584 short_info->user_wired_count = curr_entry->user_wired_count;
8585 short_info->is_submap = curr_entry->is_sub_map;
8586 short_info->object_id = (uint32_t) curr_entry->object.vm_object;
8587 }
8588
8589 extended.pages_resident = 0;
8590 extended.pages_swapped_out = 0;
8591 extended.pages_shared_now_private = 0;
8592 extended.pages_dirtied = 0;
8593 extended.external_pager = 0;
8594 extended.shadow_depth = 0;
8595
8596 if (not_in_kdp) {
8597 if (!curr_entry->is_sub_map) {
8598 vm_map_region_walk(curr_map,
8599 curr_entry->vme_start,
8600 curr_entry,
8601 curr_entry->offset,
8602 (curr_entry->vme_end -
8603 curr_entry->vme_start),
8604 &extended,
8605 look_for_pages);
8606 if (extended.external_pager &&
8607 extended.ref_count == 2 &&
8608 extended.share_mode == SM_SHARED) {
8609 extended.share_mode = SM_PRIVATE;
8610 }
8611 } else {
8612 if (curr_entry->use_pmap) {
8613 extended.share_mode = SM_TRUESHARED;
8614 } else {
8615 extended.share_mode = SM_PRIVATE;
8616 }
8617 extended.ref_count =
8618 curr_entry->object.sub_map->ref_count;
8619 }
8620 }
8621
8622 if (look_for_pages) {
8623 submap_info->pages_resident = extended.pages_resident;
8624 submap_info->pages_swapped_out = extended.pages_swapped_out;
8625 submap_info->pages_shared_now_private =
8626 extended.pages_shared_now_private;
8627 submap_info->pages_dirtied = extended.pages_dirtied;
8628 submap_info->external_pager = extended.external_pager;
8629 submap_info->shadow_depth = extended.shadow_depth;
8630 submap_info->share_mode = extended.share_mode;
8631 submap_info->ref_count = extended.ref_count;
8632 } else {
8633 short_info->external_pager = extended.external_pager;
8634 short_info->shadow_depth = extended.shadow_depth;
8635 short_info->share_mode = extended.share_mode;
8636 short_info->ref_count = extended.ref_count;
8637 }
8638
8639 if (not_in_kdp) {
8640 vm_map_unlock_read(curr_map);
8641 }
8642
8643 return KERN_SUCCESS;
8644 }
8645
8646 /*
8647 * vm_region:
8648 *
8649 * User call to obtain information about a region in
8650 * a task's address map. Currently, only one flavor is
8651 * supported.
8652 *
8653 * XXX The reserved and behavior fields cannot be filled
8654 * in until the vm merge from the IK is completed, and
8655 * vm_reserve is implemented.
8656 */
8657
8658 kern_return_t
8659 vm_map_region(
8660 vm_map_t map,
8661 vm_map_offset_t *address, /* IN/OUT */
8662 vm_map_size_t *size, /* OUT */
8663 vm_region_flavor_t flavor, /* IN */
8664 vm_region_info_t info, /* OUT */
8665 mach_msg_type_number_t *count, /* IN/OUT */
8666 mach_port_t *object_name) /* OUT */
8667 {
8668 vm_map_entry_t tmp_entry;
8669 vm_map_entry_t entry;
8670 vm_map_offset_t start;
8671
8672 if (map == VM_MAP_NULL)
8673 return(KERN_INVALID_ARGUMENT);
8674
8675 switch (flavor) {
8676
8677 case VM_REGION_BASIC_INFO:
8678 /* legacy for old 32-bit objects info */
8679 {
8680 vm_region_basic_info_t basic;
8681
8682 if (*count < VM_REGION_BASIC_INFO_COUNT)
8683 return(KERN_INVALID_ARGUMENT);
8684
8685 basic = (vm_region_basic_info_t) info;
8686 *count = VM_REGION_BASIC_INFO_COUNT;
8687
8688 vm_map_lock_read(map);
8689
8690 start = *address;
8691 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8692 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8693 vm_map_unlock_read(map);
8694 return(KERN_INVALID_ADDRESS);
8695 }
8696 } else {
8697 entry = tmp_entry;
8698 }
8699
8700 start = entry->vme_start;
8701
8702 basic->offset = (uint32_t)entry->offset;
8703 basic->protection = entry->protection;
8704 basic->inheritance = entry->inheritance;
8705 basic->max_protection = entry->max_protection;
8706 basic->behavior = entry->behavior;
8707 basic->user_wired_count = entry->user_wired_count;
8708 basic->reserved = entry->is_sub_map;
8709 *address = start;
8710 *size = (entry->vme_end - start);
8711
8712 if (object_name) *object_name = IP_NULL;
8713 if (entry->is_sub_map) {
8714 basic->shared = FALSE;
8715 } else {
8716 basic->shared = entry->is_shared;
8717 }
8718
8719 vm_map_unlock_read(map);
8720 return(KERN_SUCCESS);
8721 }
8722
8723 case VM_REGION_BASIC_INFO_64:
8724 {
8725 vm_region_basic_info_64_t basic;
8726
8727 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
8728 return(KERN_INVALID_ARGUMENT);
8729
8730 basic = (vm_region_basic_info_64_t) info;
8731 *count = VM_REGION_BASIC_INFO_COUNT_64;
8732
8733 vm_map_lock_read(map);
8734
8735 start = *address;
8736 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8737 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8738 vm_map_unlock_read(map);
8739 return(KERN_INVALID_ADDRESS);
8740 }
8741 } else {
8742 entry = tmp_entry;
8743 }
8744
8745 start = entry->vme_start;
8746
8747 basic->offset = entry->offset;
8748 basic->protection = entry->protection;
8749 basic->inheritance = entry->inheritance;
8750 basic->max_protection = entry->max_protection;
8751 basic->behavior = entry->behavior;
8752 basic->user_wired_count = entry->user_wired_count;
8753 basic->reserved = entry->is_sub_map;
8754 *address = start;
8755 *size = (entry->vme_end - start);
8756
8757 if (object_name) *object_name = IP_NULL;
8758 if (entry->is_sub_map) {
8759 basic->shared = FALSE;
8760 } else {
8761 basic->shared = entry->is_shared;
8762 }
8763
8764 vm_map_unlock_read(map);
8765 return(KERN_SUCCESS);
8766 }
8767 case VM_REGION_EXTENDED_INFO:
8768 {
8769 vm_region_extended_info_t extended;
8770
8771 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
8772 return(KERN_INVALID_ARGUMENT);
8773
8774 extended = (vm_region_extended_info_t) info;
8775 *count = VM_REGION_EXTENDED_INFO_COUNT;
8776
8777 vm_map_lock_read(map);
8778
8779 start = *address;
8780 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8781 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8782 vm_map_unlock_read(map);
8783 return(KERN_INVALID_ADDRESS);
8784 }
8785 } else {
8786 entry = tmp_entry;
8787 }
8788 start = entry->vme_start;
8789
8790 extended->protection = entry->protection;
8791 extended->user_tag = entry->alias;
8792 extended->pages_resident = 0;
8793 extended->pages_swapped_out = 0;
8794 extended->pages_shared_now_private = 0;
8795 extended->pages_dirtied = 0;
8796 extended->external_pager = 0;
8797 extended->shadow_depth = 0;
8798
8799 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
8800
8801 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8802 extended->share_mode = SM_PRIVATE;
8803
8804 if (object_name)
8805 *object_name = IP_NULL;
8806 *address = start;
8807 *size = (entry->vme_end - start);
8808
8809 vm_map_unlock_read(map);
8810 return(KERN_SUCCESS);
8811 }
8812 case VM_REGION_TOP_INFO:
8813 {
8814 vm_region_top_info_t top;
8815
8816 if (*count < VM_REGION_TOP_INFO_COUNT)
8817 return(KERN_INVALID_ARGUMENT);
8818
8819 top = (vm_region_top_info_t) info;
8820 *count = VM_REGION_TOP_INFO_COUNT;
8821
8822 vm_map_lock_read(map);
8823
8824 start = *address;
8825 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8826 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8827 vm_map_unlock_read(map);
8828 return(KERN_INVALID_ADDRESS);
8829 }
8830 } else {
8831 entry = tmp_entry;
8832
8833 }
8834 start = entry->vme_start;
8835
8836 top->private_pages_resident = 0;
8837 top->shared_pages_resident = 0;
8838
8839 vm_map_region_top_walk(entry, top);
8840
8841 if (object_name)
8842 *object_name = IP_NULL;
8843 *address = start;
8844 *size = (entry->vme_end - start);
8845
8846 vm_map_unlock_read(map);
8847 return(KERN_SUCCESS);
8848 }
8849 default:
8850 return(KERN_INVALID_ARGUMENT);
8851 }
8852 }
8853
8854 #define min(a, b) (((a) < (b)) ? (a) : (b))
8855
8856 void
8857 vm_map_region_top_walk(
8858 vm_map_entry_t entry,
8859 vm_region_top_info_t top)
8860 {
8861
8862 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8863 top->share_mode = SM_EMPTY;
8864 top->ref_count = 0;
8865 top->obj_id = 0;
8866 return;
8867 }
8868
8869 {
8870 struct vm_object *obj, *tmp_obj;
8871 int ref_count;
8872 uint32_t entry_size;
8873
8874 entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE;
8875
8876 obj = entry->object.vm_object;
8877
8878 vm_object_lock(obj);
8879
8880 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8881 ref_count--;
8882
8883 if (obj->shadow) {
8884 if (ref_count == 1)
8885 top->private_pages_resident = min(obj->resident_page_count, entry_size);
8886 else
8887 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8888 top->ref_count = ref_count;
8889 top->share_mode = SM_COW;
8890
8891 while ((tmp_obj = obj->shadow)) {
8892 vm_object_lock(tmp_obj);
8893 vm_object_unlock(obj);
8894 obj = tmp_obj;
8895
8896 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8897 ref_count--;
8898
8899 top->shared_pages_resident += min(obj->resident_page_count, entry_size);
8900 top->ref_count += ref_count - 1;
8901 }
8902 } else {
8903 if (entry->needs_copy) {
8904 top->share_mode = SM_COW;
8905 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8906 } else {
8907 if (ref_count == 1 ||
8908 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
8909 top->share_mode = SM_PRIVATE;
8910 top->private_pages_resident = min(obj->resident_page_count, entry_size);
8911 } else {
8912 top->share_mode = SM_SHARED;
8913 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
8914 }
8915 }
8916 top->ref_count = ref_count;
8917 }
8918 top->obj_id = (int)obj;
8919
8920 vm_object_unlock(obj);
8921 }
8922 }
8923
8924 void
8925 vm_map_region_walk(
8926 vm_map_t map,
8927 vm_map_offset_t va,
8928 vm_map_entry_t entry,
8929 vm_object_offset_t offset,
8930 vm_object_size_t range,
8931 vm_region_extended_info_t extended,
8932 boolean_t look_for_pages)
8933 {
8934 register struct vm_object *obj, *tmp_obj;
8935 register vm_map_offset_t last_offset;
8936 register int i;
8937 register int ref_count;
8938 struct vm_object *shadow_object;
8939 int shadow_depth;
8940
8941 if ((entry->object.vm_object == 0) ||
8942 (entry->is_sub_map) ||
8943 (entry->object.vm_object->phys_contiguous)) {
8944 extended->share_mode = SM_EMPTY;
8945 extended->ref_count = 0;
8946 return;
8947 }
8948 {
8949 obj = entry->object.vm_object;
8950
8951 vm_object_lock(obj);
8952
8953 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
8954 ref_count--;
8955
8956 if (look_for_pages) {
8957 for (last_offset = offset + range;
8958 offset < last_offset;
8959 offset += PAGE_SIZE_64, va += PAGE_SIZE)
8960 vm_map_region_look_for_page(map, va, obj,
8961 offset, ref_count,
8962 0, extended);
8963 }
8964
8965 shadow_object = obj->shadow;
8966 shadow_depth = 0;
8967 if (shadow_object != VM_OBJECT_NULL) {
8968 vm_object_lock(shadow_object);
8969 for (;
8970 shadow_object != VM_OBJECT_NULL;
8971 shadow_depth++) {
8972 vm_object_t next_shadow;
8973
8974 next_shadow = shadow_object->shadow;
8975 if (next_shadow) {
8976 vm_object_lock(next_shadow);
8977 }
8978 vm_object_unlock(shadow_object);
8979 shadow_object = next_shadow;
8980 }
8981 }
8982 extended->shadow_depth = shadow_depth;
8983
8984 if (extended->shadow_depth || entry->needs_copy)
8985 extended->share_mode = SM_COW;
8986 else {
8987 if (ref_count == 1)
8988 extended->share_mode = SM_PRIVATE;
8989 else {
8990 if (obj->true_share)
8991 extended->share_mode = SM_TRUESHARED;
8992 else
8993 extended->share_mode = SM_SHARED;
8994 }
8995 }
8996 extended->ref_count = ref_count - extended->shadow_depth;
8997
8998 for (i = 0; i < extended->shadow_depth; i++) {
8999 if ((tmp_obj = obj->shadow) == 0)
9000 break;
9001 vm_object_lock(tmp_obj);
9002 vm_object_unlock(obj);
9003
9004 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9005 ref_count--;
9006
9007 extended->ref_count += ref_count;
9008 obj = tmp_obj;
9009 }
9010 vm_object_unlock(obj);
9011
9012 if (extended->share_mode == SM_SHARED) {
9013 register vm_map_entry_t cur;
9014 register vm_map_entry_t last;
9015 int my_refs;
9016
9017 obj = entry->object.vm_object;
9018 last = vm_map_to_entry(map);
9019 my_refs = 0;
9020
9021 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9022 ref_count--;
9023 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9024 my_refs += vm_map_region_count_obj_refs(cur, obj);
9025
9026 if (my_refs == ref_count)
9027 extended->share_mode = SM_PRIVATE_ALIASED;
9028 else if (my_refs > 1)
9029 extended->share_mode = SM_SHARED_ALIASED;
9030 }
9031 }
9032 }
9033
9034
9035 /* object is locked on entry and locked on return */
9036
9037
9038 static void
9039 vm_map_region_look_for_page(
9040 __unused vm_map_t map,
9041 __unused vm_map_offset_t va,
9042 vm_object_t object,
9043 vm_object_offset_t offset,
9044 int max_refcnt,
9045 int depth,
9046 vm_region_extended_info_t extended)
9047 {
9048 register vm_page_t p;
9049 register vm_object_t shadow;
9050 register int ref_count;
9051 vm_object_t caller_object;
9052 #if MACH_PAGEMAP
9053 kern_return_t kr;
9054 #endif
9055 shadow = object->shadow;
9056 caller_object = object;
9057
9058
9059 while (TRUE) {
9060
9061 if ( !(object->pager_trusted) && !(object->internal))
9062 extended->external_pager = 1;
9063
9064 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9065 if (shadow && (max_refcnt == 1))
9066 extended->pages_shared_now_private++;
9067
9068 if (!p->fictitious &&
9069 (p->dirty || pmap_is_modified(p->phys_page)))
9070 extended->pages_dirtied++;
9071
9072 extended->pages_resident++;
9073
9074 if(object != caller_object)
9075 vm_object_unlock(object);
9076
9077 return;
9078 }
9079 #if MACH_PAGEMAP
9080 if (object->existence_map) {
9081 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9082
9083 extended->pages_swapped_out++;
9084
9085 if(object != caller_object)
9086 vm_object_unlock(object);
9087
9088 return;
9089 }
9090 } else if (object->internal &&
9091 object->alive &&
9092 !object->terminating &&
9093 object->pager_ready) {
9094
9095 memory_object_t pager;
9096
9097 vm_object_paging_begin(object);
9098 pager = object->pager;
9099 vm_object_unlock(object);
9100
9101 kr = memory_object_data_request(
9102 pager,
9103 offset + object->paging_offset,
9104 0, /* just poke the pager */
9105 VM_PROT_READ,
9106 NULL);
9107
9108 vm_object_lock(object);
9109 vm_object_paging_end(object);
9110
9111 if (kr == KERN_SUCCESS) {
9112 /* the pager has that page */
9113 extended->pages_swapped_out++;
9114 if (object != caller_object)
9115 vm_object_unlock(object);
9116 return;
9117 }
9118 }
9119 #endif /* MACH_PAGEMAP */
9120
9121 if (shadow) {
9122 vm_object_lock(shadow);
9123
9124 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9125 ref_count--;
9126
9127 if (++depth > extended->shadow_depth)
9128 extended->shadow_depth = depth;
9129
9130 if (ref_count > max_refcnt)
9131 max_refcnt = ref_count;
9132
9133 if(object != caller_object)
9134 vm_object_unlock(object);
9135
9136 offset = offset + object->shadow_offset;
9137 object = shadow;
9138 shadow = object->shadow;
9139 continue;
9140 }
9141 if(object != caller_object)
9142 vm_object_unlock(object);
9143 break;
9144 }
9145 }
9146
9147 static int
9148 vm_map_region_count_obj_refs(
9149 vm_map_entry_t entry,
9150 vm_object_t object)
9151 {
9152 register int ref_count;
9153 register vm_object_t chk_obj;
9154 register vm_object_t tmp_obj;
9155
9156 if (entry->object.vm_object == 0)
9157 return(0);
9158
9159 if (entry->is_sub_map)
9160 return(0);
9161 else {
9162 ref_count = 0;
9163
9164 chk_obj = entry->object.vm_object;
9165 vm_object_lock(chk_obj);
9166
9167 while (chk_obj) {
9168 if (chk_obj == object)
9169 ref_count++;
9170 tmp_obj = chk_obj->shadow;
9171 if (tmp_obj)
9172 vm_object_lock(tmp_obj);
9173 vm_object_unlock(chk_obj);
9174
9175 chk_obj = tmp_obj;
9176 }
9177 }
9178 return(ref_count);
9179 }
9180
9181
9182 /*
9183 * Routine: vm_map_simplify
9184 *
9185 * Description:
9186 * Attempt to simplify the map representation in
9187 * the vicinity of the given starting address.
9188 * Note:
9189 * This routine is intended primarily to keep the
9190 * kernel maps more compact -- they generally don't
9191 * benefit from the "expand a map entry" technology
9192 * at allocation time because the adjacent entry
9193 * is often wired down.
9194 */
9195 void
9196 vm_map_simplify_entry(
9197 vm_map_t map,
9198 vm_map_entry_t this_entry)
9199 {
9200 vm_map_entry_t prev_entry;
9201
9202 counter(c_vm_map_simplify_entry_called++);
9203
9204 prev_entry = this_entry->vme_prev;
9205
9206 if ((this_entry != vm_map_to_entry(map)) &&
9207 (prev_entry != vm_map_to_entry(map)) &&
9208
9209 (prev_entry->vme_end == this_entry->vme_start) &&
9210
9211 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9212
9213 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9214 ((prev_entry->offset + (prev_entry->vme_end -
9215 prev_entry->vme_start))
9216 == this_entry->offset) &&
9217
9218 (prev_entry->inheritance == this_entry->inheritance) &&
9219 (prev_entry->protection == this_entry->protection) &&
9220 (prev_entry->max_protection == this_entry->max_protection) &&
9221 (prev_entry->behavior == this_entry->behavior) &&
9222 (prev_entry->alias == this_entry->alias) &&
9223 (prev_entry->no_cache == this_entry->no_cache) &&
9224 (prev_entry->wired_count == this_entry->wired_count) &&
9225 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9226
9227 (prev_entry->needs_copy == this_entry->needs_copy) &&
9228
9229 (prev_entry->use_pmap == FALSE) &&
9230 (this_entry->use_pmap == FALSE) &&
9231 (prev_entry->in_transition == FALSE) &&
9232 (this_entry->in_transition == FALSE) &&
9233 (prev_entry->needs_wakeup == FALSE) &&
9234 (this_entry->needs_wakeup == FALSE) &&
9235 (prev_entry->is_shared == FALSE) &&
9236 (this_entry->is_shared == FALSE)
9237 ) {
9238 _vm_map_entry_unlink(&map->hdr, prev_entry);
9239 this_entry->vme_start = prev_entry->vme_start;
9240 this_entry->offset = prev_entry->offset;
9241 if (prev_entry->is_sub_map) {
9242 vm_map_deallocate(prev_entry->object.sub_map);
9243 } else {
9244 vm_object_deallocate(prev_entry->object.vm_object);
9245 }
9246 vm_map_entry_dispose(map, prev_entry);
9247 SAVE_HINT_MAP_WRITE(map, this_entry);
9248 counter(c_vm_map_simplified++);
9249 }
9250 }
9251
9252 void
9253 vm_map_simplify(
9254 vm_map_t map,
9255 vm_map_offset_t start)
9256 {
9257 vm_map_entry_t this_entry;
9258
9259 vm_map_lock(map);
9260 if (vm_map_lookup_entry(map, start, &this_entry)) {
9261 vm_map_simplify_entry(map, this_entry);
9262 vm_map_simplify_entry(map, this_entry->vme_next);
9263 }
9264 counter(c_vm_map_simplify_called++);
9265 vm_map_unlock(map);
9266 }
9267
9268 static void
9269 vm_map_simplify_range(
9270 vm_map_t map,
9271 vm_map_offset_t start,
9272 vm_map_offset_t end)
9273 {
9274 vm_map_entry_t entry;
9275
9276 /*
9277 * The map should be locked (for "write") by the caller.
9278 */
9279
9280 if (start >= end) {
9281 /* invalid address range */
9282 return;
9283 }
9284
9285 start = vm_map_trunc_page(start);
9286 end = vm_map_round_page(end);
9287
9288 if (!vm_map_lookup_entry(map, start, &entry)) {
9289 /* "start" is not mapped and "entry" ends before "start" */
9290 if (entry == vm_map_to_entry(map)) {
9291 /* start with first entry in the map */
9292 entry = vm_map_first_entry(map);
9293 } else {
9294 /* start with next entry */
9295 entry = entry->vme_next;
9296 }
9297 }
9298
9299 while (entry != vm_map_to_entry(map) &&
9300 entry->vme_start <= end) {
9301 /* try and coalesce "entry" with its previous entry */
9302 vm_map_simplify_entry(map, entry);
9303 entry = entry->vme_next;
9304 }
9305 }
9306
9307
9308 /*
9309 * Routine: vm_map_machine_attribute
9310 * Purpose:
9311 * Provide machine-specific attributes to mappings,
9312 * such as cachability etc. for machines that provide
9313 * them. NUMA architectures and machines with big/strange
9314 * caches will use this.
9315 * Note:
9316 * Responsibilities for locking and checking are handled here,
9317 * everything else in the pmap module. If any non-volatile
9318 * information must be kept, the pmap module should handle
9319 * it itself. [This assumes that attributes do not
9320 * need to be inherited, which seems ok to me]
9321 */
9322 kern_return_t
9323 vm_map_machine_attribute(
9324 vm_map_t map,
9325 vm_map_offset_t start,
9326 vm_map_offset_t end,
9327 vm_machine_attribute_t attribute,
9328 vm_machine_attribute_val_t* value) /* IN/OUT */
9329 {
9330 kern_return_t ret;
9331 vm_map_size_t sync_size;
9332 vm_map_entry_t entry;
9333
9334 if (start < vm_map_min(map) || end > vm_map_max(map))
9335 return KERN_INVALID_ADDRESS;
9336
9337 /* Figure how much memory we need to flush (in page increments) */
9338 sync_size = end - start;
9339
9340 vm_map_lock(map);
9341
9342 if (attribute != MATTR_CACHE) {
9343 /* If we don't have to find physical addresses, we */
9344 /* don't have to do an explicit traversal here. */
9345 ret = pmap_attribute(map->pmap, start, end-start,
9346 attribute, value);
9347 vm_map_unlock(map);
9348 return ret;
9349 }
9350
9351 ret = KERN_SUCCESS; /* Assume it all worked */
9352
9353 while(sync_size) {
9354 if (vm_map_lookup_entry(map, start, &entry)) {
9355 vm_map_size_t sub_size;
9356 if((entry->vme_end - start) > sync_size) {
9357 sub_size = sync_size;
9358 sync_size = 0;
9359 } else {
9360 sub_size = entry->vme_end - start;
9361 sync_size -= sub_size;
9362 }
9363 if(entry->is_sub_map) {
9364 vm_map_offset_t sub_start;
9365 vm_map_offset_t sub_end;
9366
9367 sub_start = (start - entry->vme_start)
9368 + entry->offset;
9369 sub_end = sub_start + sub_size;
9370 vm_map_machine_attribute(
9371 entry->object.sub_map,
9372 sub_start,
9373 sub_end,
9374 attribute, value);
9375 } else {
9376 if(entry->object.vm_object) {
9377 vm_page_t m;
9378 vm_object_t object;
9379 vm_object_t base_object;
9380 vm_object_t last_object;
9381 vm_object_offset_t offset;
9382 vm_object_offset_t base_offset;
9383 vm_map_size_t range;
9384 range = sub_size;
9385 offset = (start - entry->vme_start)
9386 + entry->offset;
9387 base_offset = offset;
9388 object = entry->object.vm_object;
9389 base_object = object;
9390 last_object = NULL;
9391
9392 vm_object_lock(object);
9393
9394 while (range) {
9395 m = vm_page_lookup(
9396 object, offset);
9397
9398 if (m && !m->fictitious) {
9399 ret =
9400 pmap_attribute_cache_sync(
9401 m->phys_page,
9402 PAGE_SIZE,
9403 attribute, value);
9404
9405 } else if (object->shadow) {
9406 offset = offset + object->shadow_offset;
9407 last_object = object;
9408 object = object->shadow;
9409 vm_object_lock(last_object->shadow);
9410 vm_object_unlock(last_object);
9411 continue;
9412 }
9413 range -= PAGE_SIZE;
9414
9415 if (base_object != object) {
9416 vm_object_unlock(object);
9417 vm_object_lock(base_object);
9418 object = base_object;
9419 }
9420 /* Bump to the next page */
9421 base_offset += PAGE_SIZE;
9422 offset = base_offset;
9423 }
9424 vm_object_unlock(object);
9425 }
9426 }
9427 start += sub_size;
9428 } else {
9429 vm_map_unlock(map);
9430 return KERN_FAILURE;
9431 }
9432
9433 }
9434
9435 vm_map_unlock(map);
9436
9437 return ret;
9438 }
9439
9440 /*
9441 * vm_map_behavior_set:
9442 *
9443 * Sets the paging reference behavior of the specified address
9444 * range in the target map. Paging reference behavior affects
9445 * how pagein operations resulting from faults on the map will be
9446 * clustered.
9447 */
9448 kern_return_t
9449 vm_map_behavior_set(
9450 vm_map_t map,
9451 vm_map_offset_t start,
9452 vm_map_offset_t end,
9453 vm_behavior_t new_behavior)
9454 {
9455 register vm_map_entry_t entry;
9456 vm_map_entry_t temp_entry;
9457
9458 XPR(XPR_VM_MAP,
9459 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9460 (integer_t)map, start, end, new_behavior, 0);
9461
9462 switch (new_behavior) {
9463 case VM_BEHAVIOR_DEFAULT:
9464 case VM_BEHAVIOR_RANDOM:
9465 case VM_BEHAVIOR_SEQUENTIAL:
9466 case VM_BEHAVIOR_RSEQNTL:
9467 break;
9468 case VM_BEHAVIOR_WILLNEED:
9469 case VM_BEHAVIOR_DONTNEED:
9470 new_behavior = VM_BEHAVIOR_DEFAULT;
9471 break;
9472 default:
9473 return(KERN_INVALID_ARGUMENT);
9474 }
9475
9476 vm_map_lock(map);
9477
9478 /*
9479 * The entire address range must be valid for the map.
9480 * Note that vm_map_range_check() does a
9481 * vm_map_lookup_entry() internally and returns the
9482 * entry containing the start of the address range if
9483 * the entire range is valid.
9484 */
9485 if (vm_map_range_check(map, start, end, &temp_entry)) {
9486 entry = temp_entry;
9487 vm_map_clip_start(map, entry, start);
9488 }
9489 else {
9490 vm_map_unlock(map);
9491 return(KERN_INVALID_ADDRESS);
9492 }
9493
9494 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9495 vm_map_clip_end(map, entry, end);
9496 assert(!entry->use_pmap);
9497
9498 entry->behavior = new_behavior;
9499
9500 entry = entry->vme_next;
9501 }
9502
9503 vm_map_unlock(map);
9504 return(KERN_SUCCESS);
9505 }
9506
9507
9508 #include <mach_kdb.h>
9509 #if MACH_KDB
9510 #include <ddb/db_output.h>
9511 #include <vm/vm_print.h>
9512
9513 #define printf db_printf
9514
9515 /*
9516 * Forward declarations for internal functions.
9517 */
9518 extern void vm_map_links_print(
9519 struct vm_map_links *links);
9520
9521 extern void vm_map_header_print(
9522 struct vm_map_header *header);
9523
9524 extern void vm_map_entry_print(
9525 vm_map_entry_t entry);
9526
9527 extern void vm_follow_entry(
9528 vm_map_entry_t entry);
9529
9530 extern void vm_follow_map(
9531 vm_map_t map);
9532
9533 /*
9534 * vm_map_links_print: [ debug ]
9535 */
9536 void
9537 vm_map_links_print(
9538 struct vm_map_links *links)
9539 {
9540 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
9541 links->prev,
9542 links->next,
9543 (unsigned long long)links->start,
9544 (unsigned long long)links->end);
9545 }
9546
9547 /*
9548 * vm_map_header_print: [ debug ]
9549 */
9550 void
9551 vm_map_header_print(
9552 struct vm_map_header *header)
9553 {
9554 vm_map_links_print(&header->links);
9555 iprintf("nentries = %08X, %sentries_pageable\n",
9556 header->nentries,
9557 (header->entries_pageable ? "" : "!"));
9558 }
9559
9560 /*
9561 * vm_follow_entry: [ debug ]
9562 */
9563 void
9564 vm_follow_entry(
9565 vm_map_entry_t entry)
9566 {
9567 int shadows;
9568
9569 iprintf("map entry %08X\n", entry);
9570
9571 db_indent += 2;
9572
9573 shadows = vm_follow_object(entry->object.vm_object);
9574 iprintf("Total objects : %d\n",shadows);
9575
9576 db_indent -= 2;
9577 }
9578
9579 /*
9580 * vm_map_entry_print: [ debug ]
9581 */
9582 void
9583 vm_map_entry_print(
9584 register vm_map_entry_t entry)
9585 {
9586 static const char *inheritance_name[4] =
9587 { "share", "copy", "none", "?"};
9588 static const char *behavior_name[4] =
9589 { "dflt", "rand", "seqtl", "rseqntl" };
9590
9591 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
9592
9593 db_indent += 2;
9594
9595 vm_map_links_print(&entry->links);
9596
9597 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
9598 (unsigned long long)entry->vme_start,
9599 (unsigned long long)entry->vme_end,
9600 entry->protection,
9601 entry->max_protection,
9602 inheritance_name[(entry->inheritance & 0x3)]);
9603
9604 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
9605 behavior_name[(entry->behavior & 0x3)],
9606 entry->wired_count,
9607 entry->user_wired_count);
9608 iprintf("%sin_transition, %sneeds_wakeup\n",
9609 (entry->in_transition ? "" : "!"),
9610 (entry->needs_wakeup ? "" : "!"));
9611
9612 if (entry->is_sub_map) {
9613 iprintf("submap = %08X - offset = %016llX\n",
9614 entry->object.sub_map,
9615 (unsigned long long)entry->offset);
9616 } else {
9617 iprintf("object = %08X offset = %016llX - ",
9618 entry->object.vm_object,
9619 (unsigned long long)entry->offset);
9620 printf("%sis_shared, %sneeds_copy\n",
9621 (entry->is_shared ? "" : "!"),
9622 (entry->needs_copy ? "" : "!"));
9623 }
9624
9625 db_indent -= 2;
9626 }
9627
9628 /*
9629 * vm_follow_map: [ debug ]
9630 */
9631 void
9632 vm_follow_map(
9633 vm_map_t map)
9634 {
9635 register vm_map_entry_t entry;
9636
9637 iprintf("task map %08X\n", map);
9638
9639 db_indent += 2;
9640
9641 for (entry = vm_map_first_entry(map);
9642 entry && entry != vm_map_to_entry(map);
9643 entry = entry->vme_next) {
9644 vm_follow_entry(entry);
9645 }
9646
9647 db_indent -= 2;
9648 }
9649
9650 /*
9651 * vm_map_print: [ debug ]
9652 */
9653 void
9654 vm_map_print(
9655 db_addr_t inmap)
9656 {
9657 register vm_map_entry_t entry;
9658 vm_map_t map;
9659 #if TASK_SWAPPER
9660 char *swstate;
9661 #endif /* TASK_SWAPPER */
9662
9663 map = (vm_map_t)(long)
9664 inmap; /* Make sure we have the right type */
9665
9666 iprintf("task map %08X\n", map);
9667
9668 db_indent += 2;
9669
9670 vm_map_header_print(&map->hdr);
9671
9672 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
9673 map->pmap,
9674 map->size,
9675 map->ref_count,
9676 map->hint,
9677 map->first_free);
9678
9679 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
9680 (map->wait_for_space ? "" : "!"),
9681 (map->wiring_required ? "" : "!"),
9682 map->timestamp);
9683
9684 #if TASK_SWAPPER
9685 switch (map->sw_state) {
9686 case MAP_SW_IN:
9687 swstate = "SW_IN";
9688 break;
9689 case MAP_SW_OUT:
9690 swstate = "SW_OUT";
9691 break;
9692 default:
9693 swstate = "????";
9694 break;
9695 }
9696 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
9697 #endif /* TASK_SWAPPER */
9698
9699 for (entry = vm_map_first_entry(map);
9700 entry && entry != vm_map_to_entry(map);
9701 entry = entry->vme_next) {
9702 vm_map_entry_print(entry);
9703 }
9704
9705 db_indent -= 2;
9706 }
9707
9708 /*
9709 * Routine: vm_map_copy_print
9710 * Purpose:
9711 * Pretty-print a copy object for ddb.
9712 */
9713
9714 void
9715 vm_map_copy_print(
9716 db_addr_t incopy)
9717 {
9718 vm_map_copy_t copy;
9719 vm_map_entry_t entry;
9720
9721 copy = (vm_map_copy_t)(long)
9722 incopy; /* Make sure we have the right type */
9723
9724 printf("copy object 0x%x\n", copy);
9725
9726 db_indent += 2;
9727
9728 iprintf("type=%d", copy->type);
9729 switch (copy->type) {
9730 case VM_MAP_COPY_ENTRY_LIST:
9731 printf("[entry_list]");
9732 break;
9733
9734 case VM_MAP_COPY_OBJECT:
9735 printf("[object]");
9736 break;
9737
9738 case VM_MAP_COPY_KERNEL_BUFFER:
9739 printf("[kernel_buffer]");
9740 break;
9741
9742 default:
9743 printf("[bad type]");
9744 break;
9745 }
9746 printf(", offset=0x%llx", (unsigned long long)copy->offset);
9747 printf(", size=0x%x\n", copy->size);
9748
9749 switch (copy->type) {
9750 case VM_MAP_COPY_ENTRY_LIST:
9751 vm_map_header_print(&copy->cpy_hdr);
9752 for (entry = vm_map_copy_first_entry(copy);
9753 entry && entry != vm_map_copy_to_entry(copy);
9754 entry = entry->vme_next) {
9755 vm_map_entry_print(entry);
9756 }
9757 break;
9758
9759 case VM_MAP_COPY_OBJECT:
9760 iprintf("object=0x%x\n", copy->cpy_object);
9761 break;
9762
9763 case VM_MAP_COPY_KERNEL_BUFFER:
9764 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
9765 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
9766 break;
9767
9768 }
9769
9770 db_indent -=2;
9771 }
9772
9773 /*
9774 * db_vm_map_total_size(map) [ debug ]
9775 *
9776 * return the total virtual size (in bytes) of the map
9777 */
9778 vm_map_size_t
9779 db_vm_map_total_size(
9780 db_addr_t inmap)
9781 {
9782 vm_map_entry_t entry;
9783 vm_map_size_t total;
9784 vm_map_t map;
9785
9786 map = (vm_map_t)(long)
9787 inmap; /* Make sure we have the right type */
9788
9789 total = 0;
9790 for (entry = vm_map_first_entry(map);
9791 entry != vm_map_to_entry(map);
9792 entry = entry->vme_next) {
9793 total += entry->vme_end - entry->vme_start;
9794 }
9795
9796 return total;
9797 }
9798
9799 #endif /* MACH_KDB */
9800
9801 /*
9802 * Routine: vm_map_entry_insert
9803 *
9804 * Descritpion: This routine inserts a new vm_entry in a locked map.
9805 */
9806 vm_map_entry_t
9807 vm_map_entry_insert(
9808 vm_map_t map,
9809 vm_map_entry_t insp_entry,
9810 vm_map_offset_t start,
9811 vm_map_offset_t end,
9812 vm_object_t object,
9813 vm_object_offset_t offset,
9814 boolean_t needs_copy,
9815 boolean_t is_shared,
9816 boolean_t in_transition,
9817 vm_prot_t cur_protection,
9818 vm_prot_t max_protection,
9819 vm_behavior_t behavior,
9820 vm_inherit_t inheritance,
9821 unsigned wired_count,
9822 boolean_t no_cache)
9823 {
9824 vm_map_entry_t new_entry;
9825
9826 assert(insp_entry != (vm_map_entry_t)0);
9827
9828 new_entry = vm_map_entry_create(map);
9829
9830 new_entry->vme_start = start;
9831 new_entry->vme_end = end;
9832 assert(page_aligned(new_entry->vme_start));
9833 assert(page_aligned(new_entry->vme_end));
9834
9835 new_entry->object.vm_object = object;
9836 new_entry->offset = offset;
9837 new_entry->is_shared = is_shared;
9838 new_entry->is_sub_map = FALSE;
9839 new_entry->needs_copy = needs_copy;
9840 new_entry->in_transition = in_transition;
9841 new_entry->needs_wakeup = FALSE;
9842 new_entry->inheritance = inheritance;
9843 new_entry->protection = cur_protection;
9844 new_entry->max_protection = max_protection;
9845 new_entry->behavior = behavior;
9846 new_entry->wired_count = wired_count;
9847 new_entry->user_wired_count = 0;
9848 new_entry->use_pmap = FALSE;
9849 new_entry->alias = 0;
9850 new_entry->no_cache = no_cache;
9851
9852 /*
9853 * Insert the new entry into the list.
9854 */
9855
9856 vm_map_entry_link(map, insp_entry, new_entry);
9857 map->size += end - start;
9858
9859 /*
9860 * Update the free space hint and the lookup hint.
9861 */
9862
9863 SAVE_HINT_MAP_WRITE(map, new_entry);
9864 return new_entry;
9865 }
9866
9867 /*
9868 * Routine: vm_map_remap_extract
9869 *
9870 * Descritpion: This routine returns a vm_entry list from a map.
9871 */
9872 static kern_return_t
9873 vm_map_remap_extract(
9874 vm_map_t map,
9875 vm_map_offset_t addr,
9876 vm_map_size_t size,
9877 boolean_t copy,
9878 struct vm_map_header *map_header,
9879 vm_prot_t *cur_protection,
9880 vm_prot_t *max_protection,
9881 /* What, no behavior? */
9882 vm_inherit_t inheritance,
9883 boolean_t pageable)
9884 {
9885 kern_return_t result;
9886 vm_map_size_t mapped_size;
9887 vm_map_size_t tmp_size;
9888 vm_map_entry_t src_entry; /* result of last map lookup */
9889 vm_map_entry_t new_entry;
9890 vm_object_offset_t offset;
9891 vm_map_offset_t map_address;
9892 vm_map_offset_t src_start; /* start of entry to map */
9893 vm_map_offset_t src_end; /* end of region to be mapped */
9894 vm_object_t object;
9895 vm_map_version_t version;
9896 boolean_t src_needs_copy;
9897 boolean_t new_entry_needs_copy;
9898
9899 assert(map != VM_MAP_NULL);
9900 assert(size != 0 && size == vm_map_round_page(size));
9901 assert(inheritance == VM_INHERIT_NONE ||
9902 inheritance == VM_INHERIT_COPY ||
9903 inheritance == VM_INHERIT_SHARE);
9904
9905 /*
9906 * Compute start and end of region.
9907 */
9908 src_start = vm_map_trunc_page(addr);
9909 src_end = vm_map_round_page(src_start + size);
9910
9911 /*
9912 * Initialize map_header.
9913 */
9914 map_header->links.next = (struct vm_map_entry *)&map_header->links;
9915 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
9916 map_header->nentries = 0;
9917 map_header->entries_pageable = pageable;
9918
9919 *cur_protection = VM_PROT_ALL;
9920 *max_protection = VM_PROT_ALL;
9921
9922 map_address = 0;
9923 mapped_size = 0;
9924 result = KERN_SUCCESS;
9925
9926 /*
9927 * The specified source virtual space might correspond to
9928 * multiple map entries, need to loop on them.
9929 */
9930 vm_map_lock(map);
9931 while (mapped_size != size) {
9932 vm_map_size_t entry_size;
9933
9934 /*
9935 * Find the beginning of the region.
9936 */
9937 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
9938 result = KERN_INVALID_ADDRESS;
9939 break;
9940 }
9941
9942 if (src_start < src_entry->vme_start ||
9943 (mapped_size && src_start != src_entry->vme_start)) {
9944 result = KERN_INVALID_ADDRESS;
9945 break;
9946 }
9947
9948 if(src_entry->is_sub_map) {
9949 result = KERN_INVALID_ADDRESS;
9950 break;
9951 }
9952
9953 tmp_size = size - mapped_size;
9954 if (src_end > src_entry->vme_end)
9955 tmp_size -= (src_end - src_entry->vme_end);
9956
9957 entry_size = (vm_map_size_t)(src_entry->vme_end -
9958 src_entry->vme_start);
9959
9960 if(src_entry->is_sub_map) {
9961 vm_map_reference(src_entry->object.sub_map);
9962 object = VM_OBJECT_NULL;
9963 } else {
9964 object = src_entry->object.vm_object;
9965
9966 if (object == VM_OBJECT_NULL) {
9967 object = vm_object_allocate(entry_size);
9968 src_entry->offset = 0;
9969 src_entry->object.vm_object = object;
9970 } else if (object->copy_strategy !=
9971 MEMORY_OBJECT_COPY_SYMMETRIC) {
9972 /*
9973 * We are already using an asymmetric
9974 * copy, and therefore we already have
9975 * the right object.
9976 */
9977 assert(!src_entry->needs_copy);
9978 } else if (src_entry->needs_copy || object->shadowed ||
9979 (object->internal && !object->true_share &&
9980 !src_entry->is_shared &&
9981 object->size > entry_size)) {
9982
9983 vm_object_shadow(&src_entry->object.vm_object,
9984 &src_entry->offset,
9985 entry_size);
9986
9987 if (!src_entry->needs_copy &&
9988 (src_entry->protection & VM_PROT_WRITE)) {
9989 vm_prot_t prot;
9990
9991 prot = src_entry->protection & ~VM_PROT_WRITE;
9992
9993 if (override_nx(map, src_entry->alias) && prot)
9994 prot |= VM_PROT_EXECUTE;
9995
9996 if(map->mapped) {
9997 vm_object_pmap_protect(
9998 src_entry->object.vm_object,
9999 src_entry->offset,
10000 entry_size,
10001 PMAP_NULL,
10002 src_entry->vme_start,
10003 prot);
10004 } else {
10005 pmap_protect(vm_map_pmap(map),
10006 src_entry->vme_start,
10007 src_entry->vme_end,
10008 prot);
10009 }
10010 }
10011
10012 object = src_entry->object.vm_object;
10013 src_entry->needs_copy = FALSE;
10014 }
10015
10016
10017 vm_object_lock(object);
10018 vm_object_reference_locked(object); /* object ref. for new entry */
10019 if (object->copy_strategy ==
10020 MEMORY_OBJECT_COPY_SYMMETRIC) {
10021 object->copy_strategy =
10022 MEMORY_OBJECT_COPY_DELAY;
10023 }
10024 vm_object_unlock(object);
10025 }
10026
10027 offset = src_entry->offset + (src_start - src_entry->vme_start);
10028
10029 new_entry = _vm_map_entry_create(map_header);
10030 vm_map_entry_copy(new_entry, src_entry);
10031 new_entry->use_pmap = FALSE; /* clr address space specifics */
10032
10033 new_entry->vme_start = map_address;
10034 new_entry->vme_end = map_address + tmp_size;
10035 new_entry->inheritance = inheritance;
10036 new_entry->offset = offset;
10037
10038 /*
10039 * The new region has to be copied now if required.
10040 */
10041 RestartCopy:
10042 if (!copy) {
10043 src_entry->is_shared = TRUE;
10044 new_entry->is_shared = TRUE;
10045 if (!(new_entry->is_sub_map))
10046 new_entry->needs_copy = FALSE;
10047
10048 } else if (src_entry->is_sub_map) {
10049 /* make this a COW sub_map if not already */
10050 new_entry->needs_copy = TRUE;
10051 object = VM_OBJECT_NULL;
10052 } else if (src_entry->wired_count == 0 &&
10053 vm_object_copy_quickly(&new_entry->object.vm_object,
10054 new_entry->offset,
10055 (new_entry->vme_end -
10056 new_entry->vme_start),
10057 &src_needs_copy,
10058 &new_entry_needs_copy)) {
10059
10060 new_entry->needs_copy = new_entry_needs_copy;
10061 new_entry->is_shared = FALSE;
10062
10063 /*
10064 * Handle copy_on_write semantics.
10065 */
10066 if (src_needs_copy && !src_entry->needs_copy) {
10067 vm_prot_t prot;
10068
10069 prot = src_entry->protection & ~VM_PROT_WRITE;
10070
10071 if (override_nx(map, src_entry->alias) && prot)
10072 prot |= VM_PROT_EXECUTE;
10073
10074 vm_object_pmap_protect(object,
10075 offset,
10076 entry_size,
10077 ((src_entry->is_shared
10078 || map->mapped) ?
10079 PMAP_NULL : map->pmap),
10080 src_entry->vme_start,
10081 prot);
10082
10083 src_entry->needs_copy = TRUE;
10084 }
10085 /*
10086 * Throw away the old object reference of the new entry.
10087 */
10088 vm_object_deallocate(object);
10089
10090 } else {
10091 new_entry->is_shared = FALSE;
10092
10093 /*
10094 * The map can be safely unlocked since we
10095 * already hold a reference on the object.
10096 *
10097 * Record the timestamp of the map for later
10098 * verification, and unlock the map.
10099 */
10100 version.main_timestamp = map->timestamp;
10101 vm_map_unlock(map); /* Increments timestamp once! */
10102
10103 /*
10104 * Perform the copy.
10105 */
10106 if (src_entry->wired_count > 0) {
10107 vm_object_lock(object);
10108 result = vm_object_copy_slowly(
10109 object,
10110 offset,
10111 entry_size,
10112 THREAD_UNINT,
10113 &new_entry->object.vm_object);
10114
10115 new_entry->offset = 0;
10116 new_entry->needs_copy = FALSE;
10117 } else {
10118 result = vm_object_copy_strategically(
10119 object,
10120 offset,
10121 entry_size,
10122 &new_entry->object.vm_object,
10123 &new_entry->offset,
10124 &new_entry_needs_copy);
10125
10126 new_entry->needs_copy = new_entry_needs_copy;
10127 }
10128
10129 /*
10130 * Throw away the old object reference of the new entry.
10131 */
10132 vm_object_deallocate(object);
10133
10134 if (result != KERN_SUCCESS &&
10135 result != KERN_MEMORY_RESTART_COPY) {
10136 _vm_map_entry_dispose(map_header, new_entry);
10137 break;
10138 }
10139
10140 /*
10141 * Verify that the map has not substantially
10142 * changed while the copy was being made.
10143 */
10144
10145 vm_map_lock(map);
10146 if (version.main_timestamp + 1 != map->timestamp) {
10147 /*
10148 * Simple version comparison failed.
10149 *
10150 * Retry the lookup and verify that the
10151 * same object/offset are still present.
10152 */
10153 vm_object_deallocate(new_entry->
10154 object.vm_object);
10155 _vm_map_entry_dispose(map_header, new_entry);
10156 if (result == KERN_MEMORY_RESTART_COPY)
10157 result = KERN_SUCCESS;
10158 continue;
10159 }
10160
10161 if (result == KERN_MEMORY_RESTART_COPY) {
10162 vm_object_reference(object);
10163 goto RestartCopy;
10164 }
10165 }
10166
10167 _vm_map_entry_link(map_header,
10168 map_header->links.prev, new_entry);
10169
10170 *cur_protection &= src_entry->protection;
10171 *max_protection &= src_entry->max_protection;
10172
10173 map_address += tmp_size;
10174 mapped_size += tmp_size;
10175 src_start += tmp_size;
10176
10177 } /* end while */
10178
10179 vm_map_unlock(map);
10180 if (result != KERN_SUCCESS) {
10181 /*
10182 * Free all allocated elements.
10183 */
10184 for (src_entry = map_header->links.next;
10185 src_entry != (struct vm_map_entry *)&map_header->links;
10186 src_entry = new_entry) {
10187 new_entry = src_entry->vme_next;
10188 _vm_map_entry_unlink(map_header, src_entry);
10189 vm_object_deallocate(src_entry->object.vm_object);
10190 _vm_map_entry_dispose(map_header, src_entry);
10191 }
10192 }
10193 return result;
10194 }
10195
10196 /*
10197 * Routine: vm_remap
10198 *
10199 * Map portion of a task's address space.
10200 * Mapped region must not overlap more than
10201 * one vm memory object. Protections and
10202 * inheritance attributes remain the same
10203 * as in the original task and are out parameters.
10204 * Source and Target task can be identical
10205 * Other attributes are identical as for vm_map()
10206 */
10207 kern_return_t
10208 vm_map_remap(
10209 vm_map_t target_map,
10210 vm_map_address_t *address,
10211 vm_map_size_t size,
10212 vm_map_offset_t mask,
10213 boolean_t anywhere,
10214 vm_map_t src_map,
10215 vm_map_offset_t memory_address,
10216 boolean_t copy,
10217 vm_prot_t *cur_protection,
10218 vm_prot_t *max_protection,
10219 vm_inherit_t inheritance)
10220 {
10221 kern_return_t result;
10222 vm_map_entry_t entry;
10223 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
10224 vm_map_entry_t new_entry;
10225 struct vm_map_header map_header;
10226
10227 if (target_map == VM_MAP_NULL)
10228 return KERN_INVALID_ARGUMENT;
10229
10230 switch (inheritance) {
10231 case VM_INHERIT_NONE:
10232 case VM_INHERIT_COPY:
10233 case VM_INHERIT_SHARE:
10234 if (size != 0 && src_map != VM_MAP_NULL)
10235 break;
10236 /*FALL THRU*/
10237 default:
10238 return KERN_INVALID_ARGUMENT;
10239 }
10240
10241 size = vm_map_round_page(size);
10242
10243 result = vm_map_remap_extract(src_map, memory_address,
10244 size, copy, &map_header,
10245 cur_protection,
10246 max_protection,
10247 inheritance,
10248 target_map->hdr.
10249 entries_pageable);
10250
10251 if (result != KERN_SUCCESS) {
10252 return result;
10253 }
10254
10255 /*
10256 * Allocate/check a range of free virtual address
10257 * space for the target
10258 */
10259 *address = vm_map_trunc_page(*address);
10260 vm_map_lock(target_map);
10261 result = vm_map_remap_range_allocate(target_map, address, size,
10262 mask, anywhere, &insp_entry);
10263
10264 for (entry = map_header.links.next;
10265 entry != (struct vm_map_entry *)&map_header.links;
10266 entry = new_entry) {
10267 new_entry = entry->vme_next;
10268 _vm_map_entry_unlink(&map_header, entry);
10269 if (result == KERN_SUCCESS) {
10270 entry->vme_start += *address;
10271 entry->vme_end += *address;
10272 vm_map_entry_link(target_map, insp_entry, entry);
10273 insp_entry = entry;
10274 } else {
10275 if (!entry->is_sub_map) {
10276 vm_object_deallocate(entry->object.vm_object);
10277 } else {
10278 vm_map_deallocate(entry->object.sub_map);
10279 }
10280 _vm_map_entry_dispose(&map_header, entry);
10281 }
10282 }
10283
10284 if (result == KERN_SUCCESS) {
10285 target_map->size += size;
10286 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
10287 }
10288 vm_map_unlock(target_map);
10289
10290 if (result == KERN_SUCCESS && target_map->wiring_required)
10291 result = vm_map_wire(target_map, *address,
10292 *address + size, *cur_protection, TRUE);
10293 return result;
10294 }
10295
10296 /*
10297 * Routine: vm_map_remap_range_allocate
10298 *
10299 * Description:
10300 * Allocate a range in the specified virtual address map.
10301 * returns the address and the map entry just before the allocated
10302 * range
10303 *
10304 * Map must be locked.
10305 */
10306
10307 static kern_return_t
10308 vm_map_remap_range_allocate(
10309 vm_map_t map,
10310 vm_map_address_t *address, /* IN/OUT */
10311 vm_map_size_t size,
10312 vm_map_offset_t mask,
10313 boolean_t anywhere,
10314 vm_map_entry_t *map_entry) /* OUT */
10315 {
10316 register vm_map_entry_t entry;
10317 register vm_map_offset_t start;
10318 register vm_map_offset_t end;
10319
10320 StartAgain: ;
10321
10322 start = *address;
10323
10324 if (anywhere)
10325 {
10326 /*
10327 * Calculate the first possible address.
10328 */
10329
10330 if (start < map->min_offset)
10331 start = map->min_offset;
10332 if (start > map->max_offset)
10333 return(KERN_NO_SPACE);
10334
10335 /*
10336 * Look for the first possible address;
10337 * if there's already something at this
10338 * address, we have to start after it.
10339 */
10340
10341 assert(first_free_is_valid(map));
10342 if (start == map->min_offset) {
10343 if ((entry = map->first_free) != vm_map_to_entry(map))
10344 start = entry->vme_end;
10345 } else {
10346 vm_map_entry_t tmp_entry;
10347 if (vm_map_lookup_entry(map, start, &tmp_entry))
10348 start = tmp_entry->vme_end;
10349 entry = tmp_entry;
10350 }
10351
10352 /*
10353 * In any case, the "entry" always precedes
10354 * the proposed new region throughout the
10355 * loop:
10356 */
10357
10358 while (TRUE) {
10359 register vm_map_entry_t next;
10360
10361 /*
10362 * Find the end of the proposed new region.
10363 * Be sure we didn't go beyond the end, or
10364 * wrap around the address.
10365 */
10366
10367 end = ((start + mask) & ~mask);
10368 if (end < start)
10369 return(KERN_NO_SPACE);
10370 start = end;
10371 end += size;
10372
10373 if ((end > map->max_offset) || (end < start)) {
10374 if (map->wait_for_space) {
10375 if (size <= (map->max_offset -
10376 map->min_offset)) {
10377 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
10378 vm_map_unlock(map);
10379 thread_block(THREAD_CONTINUE_NULL);
10380 vm_map_lock(map);
10381 goto StartAgain;
10382 }
10383 }
10384
10385 return(KERN_NO_SPACE);
10386 }
10387
10388 /*
10389 * If there are no more entries, we must win.
10390 */
10391
10392 next = entry->vme_next;
10393 if (next == vm_map_to_entry(map))
10394 break;
10395
10396 /*
10397 * If there is another entry, it must be
10398 * after the end of the potential new region.
10399 */
10400
10401 if (next->vme_start >= end)
10402 break;
10403
10404 /*
10405 * Didn't fit -- move to the next entry.
10406 */
10407
10408 entry = next;
10409 start = entry->vme_end;
10410 }
10411 *address = start;
10412 } else {
10413 vm_map_entry_t temp_entry;
10414
10415 /*
10416 * Verify that:
10417 * the address doesn't itself violate
10418 * the mask requirement.
10419 */
10420
10421 if ((start & mask) != 0)
10422 return(KERN_NO_SPACE);
10423
10424
10425 /*
10426 * ... the address is within bounds
10427 */
10428
10429 end = start + size;
10430
10431 if ((start < map->min_offset) ||
10432 (end > map->max_offset) ||
10433 (start >= end)) {
10434 return(KERN_INVALID_ADDRESS);
10435 }
10436
10437 /*
10438 * ... the starting address isn't allocated
10439 */
10440
10441 if (vm_map_lookup_entry(map, start, &temp_entry))
10442 return(KERN_NO_SPACE);
10443
10444 entry = temp_entry;
10445
10446 /*
10447 * ... the next region doesn't overlap the
10448 * end point.
10449 */
10450
10451 if ((entry->vme_next != vm_map_to_entry(map)) &&
10452 (entry->vme_next->vme_start < end))
10453 return(KERN_NO_SPACE);
10454 }
10455 *map_entry = entry;
10456 return(KERN_SUCCESS);
10457 }
10458
10459 /*
10460 * vm_map_switch:
10461 *
10462 * Set the address map for the current thread to the specified map
10463 */
10464
10465 vm_map_t
10466 vm_map_switch(
10467 vm_map_t map)
10468 {
10469 int mycpu;
10470 thread_t thread = current_thread();
10471 vm_map_t oldmap = thread->map;
10472
10473 mp_disable_preemption();
10474 mycpu = cpu_number();
10475
10476 /*
10477 * Deactivate the current map and activate the requested map
10478 */
10479 PMAP_SWITCH_USER(thread, map, mycpu);
10480
10481 mp_enable_preemption();
10482 return(oldmap);
10483 }
10484
10485
10486 /*
10487 * Routine: vm_map_write_user
10488 *
10489 * Description:
10490 * Copy out data from a kernel space into space in the
10491 * destination map. The space must already exist in the
10492 * destination map.
10493 * NOTE: This routine should only be called by threads
10494 * which can block on a page fault. i.e. kernel mode user
10495 * threads.
10496 *
10497 */
10498 kern_return_t
10499 vm_map_write_user(
10500 vm_map_t map,
10501 void *src_p,
10502 vm_map_address_t dst_addr,
10503 vm_size_t size)
10504 {
10505 kern_return_t kr = KERN_SUCCESS;
10506
10507 if(current_map() == map) {
10508 if (copyout(src_p, dst_addr, size)) {
10509 kr = KERN_INVALID_ADDRESS;
10510 }
10511 } else {
10512 vm_map_t oldmap;
10513
10514 /* take on the identity of the target map while doing */
10515 /* the transfer */
10516
10517 vm_map_reference(map);
10518 oldmap = vm_map_switch(map);
10519 if (copyout(src_p, dst_addr, size)) {
10520 kr = KERN_INVALID_ADDRESS;
10521 }
10522 vm_map_switch(oldmap);
10523 vm_map_deallocate(map);
10524 }
10525 return kr;
10526 }
10527
10528 /*
10529 * Routine: vm_map_read_user
10530 *
10531 * Description:
10532 * Copy in data from a user space source map into the
10533 * kernel map. The space must already exist in the
10534 * kernel map.
10535 * NOTE: This routine should only be called by threads
10536 * which can block on a page fault. i.e. kernel mode user
10537 * threads.
10538 *
10539 */
10540 kern_return_t
10541 vm_map_read_user(
10542 vm_map_t map,
10543 vm_map_address_t src_addr,
10544 void *dst_p,
10545 vm_size_t size)
10546 {
10547 kern_return_t kr = KERN_SUCCESS;
10548
10549 if(current_map() == map) {
10550 if (copyin(src_addr, dst_p, size)) {
10551 kr = KERN_INVALID_ADDRESS;
10552 }
10553 } else {
10554 vm_map_t oldmap;
10555
10556 /* take on the identity of the target map while doing */
10557 /* the transfer */
10558
10559 vm_map_reference(map);
10560 oldmap = vm_map_switch(map);
10561 if (copyin(src_addr, dst_p, size)) {
10562 kr = KERN_INVALID_ADDRESS;
10563 }
10564 vm_map_switch(oldmap);
10565 vm_map_deallocate(map);
10566 }
10567 return kr;
10568 }
10569
10570
10571 /*
10572 * vm_map_check_protection:
10573 *
10574 * Assert that the target map allows the specified
10575 * privilege on the entire address region given.
10576 * The entire region must be allocated.
10577 */
10578 boolean_t
10579 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
10580 vm_map_offset_t end, vm_prot_t protection)
10581 {
10582 vm_map_entry_t entry;
10583 vm_map_entry_t tmp_entry;
10584
10585 vm_map_lock(map);
10586
10587 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
10588 {
10589 vm_map_unlock(map);
10590 return (FALSE);
10591 }
10592
10593 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10594 vm_map_unlock(map);
10595 return(FALSE);
10596 }
10597
10598 entry = tmp_entry;
10599
10600 while (start < end) {
10601 if (entry == vm_map_to_entry(map)) {
10602 vm_map_unlock(map);
10603 return(FALSE);
10604 }
10605
10606 /*
10607 * No holes allowed!
10608 */
10609
10610 if (start < entry->vme_start) {
10611 vm_map_unlock(map);
10612 return(FALSE);
10613 }
10614
10615 /*
10616 * Check protection associated with entry.
10617 */
10618
10619 if ((entry->protection & protection) != protection) {
10620 vm_map_unlock(map);
10621 return(FALSE);
10622 }
10623
10624 /* go to next entry */
10625
10626 start = entry->vme_end;
10627 entry = entry->vme_next;
10628 }
10629 vm_map_unlock(map);
10630 return(TRUE);
10631 }
10632
10633 kern_return_t
10634 vm_map_purgable_control(
10635 vm_map_t map,
10636 vm_map_offset_t address,
10637 vm_purgable_t control,
10638 int *state)
10639 {
10640 vm_map_entry_t entry;
10641 vm_object_t object;
10642 kern_return_t kr;
10643
10644 /*
10645 * Vet all the input parameters and current type and state of the
10646 * underlaying object. Return with an error if anything is amiss.
10647 */
10648 if (map == VM_MAP_NULL)
10649 return(KERN_INVALID_ARGUMENT);
10650
10651 if (control != VM_PURGABLE_SET_STATE &&
10652 control != VM_PURGABLE_GET_STATE)
10653 return(KERN_INVALID_ARGUMENT);
10654
10655 if (control == VM_PURGABLE_SET_STATE &&
10656 (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) ||
10657 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
10658 return(KERN_INVALID_ARGUMENT);
10659
10660 vm_map_lock(map);
10661
10662 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
10663
10664 /*
10665 * Must pass a valid non-submap address.
10666 */
10667 vm_map_unlock(map);
10668 return(KERN_INVALID_ADDRESS);
10669 }
10670
10671 if ((entry->protection & VM_PROT_WRITE) == 0) {
10672 /*
10673 * Can't apply purgable controls to something you can't write.
10674 */
10675 vm_map_unlock(map);
10676 return(KERN_PROTECTION_FAILURE);
10677 }
10678
10679 object = entry->object.vm_object;
10680 if (object == VM_OBJECT_NULL) {
10681 /*
10682 * Object must already be present or it can't be purgable.
10683 */
10684 vm_map_unlock(map);
10685 return KERN_INVALID_ARGUMENT;
10686 }
10687
10688 vm_object_lock(object);
10689
10690 if (entry->offset != 0 ||
10691 entry->vme_end - entry->vme_start != object->size) {
10692 /*
10693 * Can only apply purgable controls to the whole (existing)
10694 * object at once.
10695 */
10696 vm_map_unlock(map);
10697 vm_object_unlock(object);
10698 return KERN_INVALID_ARGUMENT;
10699 }
10700
10701 vm_map_unlock(map);
10702
10703 kr = vm_object_purgable_control(object, control, state);
10704
10705 vm_object_unlock(object);
10706
10707 return kr;
10708 }
10709
10710 kern_return_t
10711 vm_map_page_info(
10712 vm_map_t target_map,
10713 vm_map_offset_t offset,
10714 int *disposition,
10715 int *ref_count)
10716 {
10717 vm_map_entry_t map_entry;
10718 vm_object_t object;
10719 vm_page_t m;
10720 kern_return_t kr;
10721 kern_return_t retval = KERN_SUCCESS;
10722 boolean_t top_object = TRUE;
10723
10724 *disposition = 0;
10725 *ref_count = 0;
10726
10727 vm_map_lock_read(target_map);
10728
10729 restart_page_query:
10730 if (!vm_map_lookup_entry(target_map, offset, &map_entry)) {
10731 vm_map_unlock_read(target_map);
10732 return KERN_FAILURE;
10733 }
10734 offset -= map_entry->vme_start; /* adjust to offset within entry */
10735 offset += map_entry->offset; /* adjust to target object offset */
10736
10737 if (map_entry->object.vm_object != VM_OBJECT_NULL) {
10738 if (!map_entry->is_sub_map) {
10739 object = map_entry->object.vm_object;
10740 } else {
10741 vm_map_t sub_map;
10742
10743 sub_map = map_entry->object.sub_map;
10744 vm_map_lock_read(sub_map);
10745 vm_map_unlock_read(target_map);
10746
10747 target_map = sub_map;
10748 goto restart_page_query;
10749 }
10750 } else {
10751 vm_map_unlock_read(target_map);
10752 return KERN_SUCCESS;
10753 }
10754 vm_object_lock(object);
10755 vm_map_unlock_read(target_map);
10756
10757 while (TRUE) {
10758 m = vm_page_lookup(object, offset);
10759
10760 if (m != VM_PAGE_NULL) {
10761 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
10762 break;
10763 } else {
10764 #if MACH_PAGEMAP
10765 if (object->existence_map) {
10766 if (vm_external_state_get(object->existence_map, offset)
10767 == VM_EXTERNAL_STATE_EXISTS) {
10768 /*
10769 * this page has been paged out
10770 */
10771 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10772 break;
10773 }
10774 } else
10775 #endif
10776 if (object->internal &&
10777 object->alive &&
10778 !object->terminating &&
10779 object->pager_ready) {
10780
10781 memory_object_t pager;
10782
10783 vm_object_paging_begin(object);
10784 pager = object->pager;
10785 vm_object_unlock(object);
10786
10787 kr = memory_object_data_request(
10788 pager,
10789 offset + object->paging_offset,
10790 0, /* just poke the pager */
10791 VM_PROT_READ,
10792 NULL);
10793
10794 vm_object_lock(object);
10795 vm_object_paging_end(object);
10796
10797 if (kr == KERN_SUCCESS) {
10798 /*
10799 * the pager has this page
10800 */
10801 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10802 break;
10803 }
10804 }
10805 if (object->shadow != VM_OBJECT_NULL) {
10806 vm_object_t shadow;
10807
10808 offset += object->shadow_offset;
10809 shadow = object->shadow;
10810
10811 vm_object_lock(shadow);
10812 vm_object_unlock(object);
10813
10814 object = shadow;
10815 top_object = FALSE;
10816 } else {
10817 if (!object->internal)
10818 break;
10819
10820 retval = KERN_FAILURE;
10821 goto page_query_done;
10822 }
10823 }
10824 }
10825 /* The ref_count is not strictly accurate, it measures the number */
10826 /* of entities holding a ref on the object, they may not be mapping */
10827 /* the object or may not be mapping the section holding the */
10828 /* target page but its still a ball park number and though an over- */
10829 /* count, it picks up the copy-on-write cases */
10830
10831 /* We could also get a picture of page sharing from pmap_attributes */
10832 /* but this would under count as only faulted-in mappings would */
10833 /* show up. */
10834
10835 *ref_count = object->ref_count;
10836
10837 if (top_object == TRUE && object->shadow)
10838 *disposition |= VM_PAGE_QUERY_PAGE_COPIED;
10839
10840 if (m == VM_PAGE_NULL)
10841 goto page_query_done;
10842
10843 if (m->fictitious) {
10844 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
10845 goto page_query_done;
10846 }
10847 if (m->dirty || pmap_is_modified(m->phys_page))
10848 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
10849
10850 if (m->reference || pmap_is_referenced(m->phys_page))
10851 *disposition |= VM_PAGE_QUERY_PAGE_REF;
10852
10853 if (m->speculative)
10854 *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
10855
10856 page_query_done:
10857 vm_object_unlock(object);
10858
10859 return retval;
10860 }
10861
10862 /*
10863 * vm_map_msync
10864 *
10865 * Synchronises the memory range specified with its backing store
10866 * image by either flushing or cleaning the contents to the appropriate
10867 * memory manager engaging in a memory object synchronize dialog with
10868 * the manager. The client doesn't return until the manager issues
10869 * m_o_s_completed message. MIG Magically converts user task parameter
10870 * to the task's address map.
10871 *
10872 * interpretation of sync_flags
10873 * VM_SYNC_INVALIDATE - discard pages, only return precious
10874 * pages to manager.
10875 *
10876 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
10877 * - discard pages, write dirty or precious
10878 * pages back to memory manager.
10879 *
10880 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
10881 * - write dirty or precious pages back to
10882 * the memory manager.
10883 *
10884 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
10885 * is a hole in the region, and we would
10886 * have returned KERN_SUCCESS, return
10887 * KERN_INVALID_ADDRESS instead.
10888 *
10889 * NOTE
10890 * The memory object attributes have not yet been implemented, this
10891 * function will have to deal with the invalidate attribute
10892 *
10893 * RETURNS
10894 * KERN_INVALID_TASK Bad task parameter
10895 * KERN_INVALID_ARGUMENT both sync and async were specified.
10896 * KERN_SUCCESS The usual.
10897 * KERN_INVALID_ADDRESS There was a hole in the region.
10898 */
10899
10900 kern_return_t
10901 vm_map_msync(
10902 vm_map_t map,
10903 vm_map_address_t address,
10904 vm_map_size_t size,
10905 vm_sync_t sync_flags)
10906 {
10907 msync_req_t msr;
10908 msync_req_t new_msr;
10909 queue_chain_t req_q; /* queue of requests for this msync */
10910 vm_map_entry_t entry;
10911 vm_map_size_t amount_left;
10912 vm_object_offset_t offset;
10913 boolean_t do_sync_req;
10914 boolean_t modifiable;
10915 boolean_t had_hole = FALSE;
10916 memory_object_t pager;
10917
10918 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
10919 (sync_flags & VM_SYNC_SYNCHRONOUS))
10920 return(KERN_INVALID_ARGUMENT);
10921
10922 /*
10923 * align address and size on page boundaries
10924 */
10925 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
10926 address = vm_map_trunc_page(address);
10927
10928 if (map == VM_MAP_NULL)
10929 return(KERN_INVALID_TASK);
10930
10931 if (size == 0)
10932 return(KERN_SUCCESS);
10933
10934 queue_init(&req_q);
10935 amount_left = size;
10936
10937 while (amount_left > 0) {
10938 vm_object_size_t flush_size;
10939 vm_object_t object;
10940
10941 vm_map_lock(map);
10942 if (!vm_map_lookup_entry(map,
10943 vm_map_trunc_page(address), &entry)) {
10944
10945 vm_map_size_t skip;
10946
10947 /*
10948 * hole in the address map.
10949 */
10950 had_hole = TRUE;
10951
10952 /*
10953 * Check for empty map.
10954 */
10955 if (entry == vm_map_to_entry(map) &&
10956 entry->vme_next == entry) {
10957 vm_map_unlock(map);
10958 break;
10959 }
10960 /*
10961 * Check that we don't wrap and that
10962 * we have at least one real map entry.
10963 */
10964 if ((map->hdr.nentries == 0) ||
10965 (entry->vme_next->vme_start < address)) {
10966 vm_map_unlock(map);
10967 break;
10968 }
10969 /*
10970 * Move up to the next entry if needed
10971 */
10972 skip = (entry->vme_next->vme_start - address);
10973 if (skip >= amount_left)
10974 amount_left = 0;
10975 else
10976 amount_left -= skip;
10977 address = entry->vme_next->vme_start;
10978 vm_map_unlock(map);
10979 continue;
10980 }
10981
10982 offset = address - entry->vme_start;
10983
10984 /*
10985 * do we have more to flush than is contained in this
10986 * entry ?
10987 */
10988 if (amount_left + entry->vme_start + offset > entry->vme_end) {
10989 flush_size = entry->vme_end -
10990 (entry->vme_start + offset);
10991 } else {
10992 flush_size = amount_left;
10993 }
10994 amount_left -= flush_size;
10995 address += flush_size;
10996
10997 if (entry->is_sub_map == TRUE) {
10998 vm_map_t local_map;
10999 vm_map_offset_t local_offset;
11000
11001 local_map = entry->object.sub_map;
11002 local_offset = entry->offset;
11003 vm_map_unlock(map);
11004 if (vm_map_msync(
11005 local_map,
11006 local_offset,
11007 flush_size,
11008 sync_flags) == KERN_INVALID_ADDRESS) {
11009 had_hole = TRUE;
11010 }
11011 continue;
11012 }
11013 object = entry->object.vm_object;
11014
11015 /*
11016 * We can't sync this object if the object has not been
11017 * created yet
11018 */
11019 if (object == VM_OBJECT_NULL) {
11020 vm_map_unlock(map);
11021 continue;
11022 }
11023 offset += entry->offset;
11024 modifiable = (entry->protection & VM_PROT_WRITE)
11025 != VM_PROT_NONE;
11026
11027 vm_object_lock(object);
11028
11029 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
11030 boolean_t kill_pages = 0;
11031
11032 if (sync_flags & VM_SYNC_KILLPAGES) {
11033 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
11034 kill_pages = 1;
11035 else
11036 kill_pages = -1;
11037 }
11038 if (kill_pages != -1)
11039 vm_object_deactivate_pages(object, offset,
11040 (vm_object_size_t)flush_size, kill_pages);
11041 vm_object_unlock(object);
11042 vm_map_unlock(map);
11043 continue;
11044 }
11045 /*
11046 * We can't sync this object if there isn't a pager.
11047 * Don't bother to sync internal objects, since there can't
11048 * be any "permanent" storage for these objects anyway.
11049 */
11050 if ((object->pager == MEMORY_OBJECT_NULL) ||
11051 (object->internal) || (object->private)) {
11052 vm_object_unlock(object);
11053 vm_map_unlock(map);
11054 continue;
11055 }
11056 /*
11057 * keep reference on the object until syncing is done
11058 */
11059 vm_object_reference_locked(object);
11060 vm_object_unlock(object);
11061
11062 vm_map_unlock(map);
11063
11064 do_sync_req = vm_object_sync(object,
11065 offset,
11066 flush_size,
11067 sync_flags & VM_SYNC_INVALIDATE,
11068 (modifiable &&
11069 (sync_flags & VM_SYNC_SYNCHRONOUS ||
11070 sync_flags & VM_SYNC_ASYNCHRONOUS)),
11071 sync_flags & VM_SYNC_SYNCHRONOUS);
11072 /*
11073 * only send a m_o_s if we returned pages or if the entry
11074 * is writable (ie dirty pages may have already been sent back)
11075 */
11076 if (!do_sync_req && !modifiable) {
11077 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
11078 /*
11079 * clear out the clustering and read-ahead hints
11080 */
11081 vm_object_lock(object);
11082
11083 object->pages_created = 0;
11084 object->pages_used = 0;
11085 object->sequential = 0;
11086 object->last_alloc = 0;
11087
11088 vm_object_unlock(object);
11089 }
11090 vm_object_deallocate(object);
11091 continue;
11092 }
11093 msync_req_alloc(new_msr);
11094
11095 vm_object_lock(object);
11096 offset += object->paging_offset;
11097
11098 new_msr->offset = offset;
11099 new_msr->length = flush_size;
11100 new_msr->object = object;
11101 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
11102 re_iterate:
11103
11104 /*
11105 * We can't sync this object if there isn't a pager. The
11106 * pager can disappear anytime we're not holding the object
11107 * lock. So this has to be checked anytime we goto re_iterate.
11108 */
11109
11110 pager = object->pager;
11111
11112 if (pager == MEMORY_OBJECT_NULL) {
11113 vm_object_unlock(object);
11114 vm_object_deallocate(object);
11115 continue;
11116 }
11117
11118 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
11119 /*
11120 * need to check for overlapping entry, if found, wait
11121 * on overlapping msr to be done, then reiterate
11122 */
11123 msr_lock(msr);
11124 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
11125 ((offset >= msr->offset &&
11126 offset < (msr->offset + msr->length)) ||
11127 (msr->offset >= offset &&
11128 msr->offset < (offset + flush_size))))
11129 {
11130 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
11131 msr_unlock(msr);
11132 vm_object_unlock(object);
11133 thread_block(THREAD_CONTINUE_NULL);
11134 vm_object_lock(object);
11135 goto re_iterate;
11136 }
11137 msr_unlock(msr);
11138 }/* queue_iterate */
11139
11140 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
11141
11142 vm_object_paging_begin(object);
11143 vm_object_unlock(object);
11144
11145 queue_enter(&req_q, new_msr, msync_req_t, req_q);
11146
11147 (void) memory_object_synchronize(
11148 pager,
11149 offset,
11150 flush_size,
11151 sync_flags & ~VM_SYNC_CONTIGUOUS);
11152
11153 vm_object_lock(object);
11154 vm_object_paging_end(object);
11155 vm_object_unlock(object);
11156 }/* while */
11157
11158 /*
11159 * wait for memory_object_sychronize_completed messages from pager(s)
11160 */
11161
11162 while (!queue_empty(&req_q)) {
11163 msr = (msync_req_t)queue_first(&req_q);
11164 msr_lock(msr);
11165 while(msr->flag != VM_MSYNC_DONE) {
11166 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
11167 msr_unlock(msr);
11168 thread_block(THREAD_CONTINUE_NULL);
11169 msr_lock(msr);
11170 }/* while */
11171 queue_remove(&req_q, msr, msync_req_t, req_q);
11172 msr_unlock(msr);
11173 vm_object_deallocate(msr->object);
11174 msync_req_free(msr);
11175 }/* queue_iterate */
11176
11177 /* for proper msync() behaviour */
11178 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
11179 return(KERN_INVALID_ADDRESS);
11180
11181 return(KERN_SUCCESS);
11182 }/* vm_msync */
11183
11184 /*
11185 * Routine: convert_port_entry_to_map
11186 * Purpose:
11187 * Convert from a port specifying an entry or a task
11188 * to a map. Doesn't consume the port ref; produces a map ref,
11189 * which may be null. Unlike convert_port_to_map, the
11190 * port may be task or a named entry backed.
11191 * Conditions:
11192 * Nothing locked.
11193 */
11194
11195
11196 vm_map_t
11197 convert_port_entry_to_map(
11198 ipc_port_t port)
11199 {
11200 vm_map_t map;
11201 vm_named_entry_t named_entry;
11202 uint32_t try_failed_count = 0;
11203
11204 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11205 while(TRUE) {
11206 ip_lock(port);
11207 if(ip_active(port) && (ip_kotype(port)
11208 == IKOT_NAMED_ENTRY)) {
11209 named_entry =
11210 (vm_named_entry_t)port->ip_kobject;
11211 if (!(mutex_try(&(named_entry)->Lock))) {
11212 ip_unlock(port);
11213
11214 try_failed_count++;
11215 mutex_pause(try_failed_count);
11216 continue;
11217 }
11218 named_entry->ref_count++;
11219 mutex_unlock(&(named_entry)->Lock);
11220 ip_unlock(port);
11221 if ((named_entry->is_sub_map) &&
11222 (named_entry->protection
11223 & VM_PROT_WRITE)) {
11224 map = named_entry->backing.map;
11225 } else {
11226 mach_destroy_memory_entry(port);
11227 return VM_MAP_NULL;
11228 }
11229 vm_map_reference_swap(map);
11230 mach_destroy_memory_entry(port);
11231 break;
11232 }
11233 else
11234 return VM_MAP_NULL;
11235 }
11236 }
11237 else
11238 map = convert_port_to_map(port);
11239
11240 return map;
11241 }
11242
11243 /*
11244 * Routine: convert_port_entry_to_object
11245 * Purpose:
11246 * Convert from a port specifying a named entry to an
11247 * object. Doesn't consume the port ref; produces a map ref,
11248 * which may be null.
11249 * Conditions:
11250 * Nothing locked.
11251 */
11252
11253
11254 vm_object_t
11255 convert_port_entry_to_object(
11256 ipc_port_t port)
11257 {
11258 vm_object_t object;
11259 vm_named_entry_t named_entry;
11260 uint32_t try_failed_count = 0;
11261
11262 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11263 while(TRUE) {
11264 ip_lock(port);
11265 if(ip_active(port) && (ip_kotype(port)
11266 == IKOT_NAMED_ENTRY)) {
11267 named_entry =
11268 (vm_named_entry_t)port->ip_kobject;
11269 if (!(mutex_try(&(named_entry)->Lock))) {
11270 ip_unlock(port);
11271
11272 try_failed_count++;
11273 mutex_pause(try_failed_count);
11274 continue;
11275 }
11276 named_entry->ref_count++;
11277 mutex_unlock(&(named_entry)->Lock);
11278 ip_unlock(port);
11279 if ((!named_entry->is_sub_map) &&
11280 (!named_entry->is_pager) &&
11281 (named_entry->protection
11282 & VM_PROT_WRITE)) {
11283 object = named_entry->backing.object;
11284 } else {
11285 mach_destroy_memory_entry(port);
11286 return (vm_object_t)NULL;
11287 }
11288 vm_object_reference(named_entry->backing.object);
11289 mach_destroy_memory_entry(port);
11290 break;
11291 }
11292 else
11293 return (vm_object_t)NULL;
11294 }
11295 } else {
11296 return (vm_object_t)NULL;
11297 }
11298
11299 return object;
11300 }
11301
11302 /*
11303 * Export routines to other components for the things we access locally through
11304 * macros.
11305 */
11306 #undef current_map
11307 vm_map_t
11308 current_map(void)
11309 {
11310 return (current_map_fast());
11311 }
11312
11313 /*
11314 * vm_map_reference:
11315 *
11316 * Most code internal to the osfmk will go through a
11317 * macro defining this. This is always here for the
11318 * use of other kernel components.
11319 */
11320 #undef vm_map_reference
11321 void
11322 vm_map_reference(
11323 register vm_map_t map)
11324 {
11325 if (map == VM_MAP_NULL)
11326 return;
11327
11328 mutex_lock(&map->s_lock);
11329 #if TASK_SWAPPER
11330 assert(map->res_count > 0);
11331 assert(map->ref_count >= map->res_count);
11332 map->res_count++;
11333 #endif
11334 map->ref_count++;
11335 mutex_unlock(&map->s_lock);
11336 }
11337
11338 /*
11339 * vm_map_deallocate:
11340 *
11341 * Removes a reference from the specified map,
11342 * destroying it if no references remain.
11343 * The map should not be locked.
11344 */
11345 void
11346 vm_map_deallocate(
11347 register vm_map_t map)
11348 {
11349 unsigned int ref;
11350
11351 if (map == VM_MAP_NULL)
11352 return;
11353
11354 mutex_lock(&map->s_lock);
11355 ref = --map->ref_count;
11356 if (ref > 0) {
11357 vm_map_res_deallocate(map);
11358 mutex_unlock(&map->s_lock);
11359 return;
11360 }
11361 assert(map->ref_count == 0);
11362 mutex_unlock(&map->s_lock);
11363
11364 #if TASK_SWAPPER
11365 /*
11366 * The map residence count isn't decremented here because
11367 * the vm_map_delete below will traverse the entire map,
11368 * deleting entries, and the residence counts on objects
11369 * and sharing maps will go away then.
11370 */
11371 #endif
11372
11373 vm_map_destroy(map, VM_MAP_NO_FLAGS);
11374 }
11375
11376
11377 void
11378 vm_map_disable_NX(vm_map_t map)
11379 {
11380 if (map == NULL)
11381 return;
11382 if (map->pmap == NULL)
11383 return;
11384
11385 pmap_disable_NX(map->pmap);
11386 }
11387
11388 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
11389 * more descriptive.
11390 */
11391 void
11392 vm_map_set_32bit(vm_map_t map)
11393 {
11394 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
11395 }
11396
11397
11398 void
11399 vm_map_set_64bit(vm_map_t map)
11400 {
11401 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
11402 }
11403
11404 vm_map_offset_t
11405 vm_compute_max_offset(unsigned is64)
11406 {
11407 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
11408 }
11409
11410 boolean_t
11411 vm_map_is_64bit(
11412 vm_map_t map)
11413 {
11414 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
11415 }
11416
11417 boolean_t
11418 vm_map_has_4GB_pagezero(
11419 vm_map_t map)
11420 {
11421 /*
11422 * XXX FBDP
11423 * We should lock the VM map (for read) here but we can get away
11424 * with it for now because there can't really be any race condition:
11425 * the VM map's min_offset is changed only when the VM map is created
11426 * and when the zero page is established (when the binary gets loaded),
11427 * and this routine gets called only when the task terminates and the
11428 * VM map is being torn down, and when a new map is created via
11429 * load_machfile()/execve().
11430 */
11431 return (map->min_offset >= 0x100000000ULL);
11432 }
11433
11434 void
11435 vm_map_set_4GB_pagezero(vm_map_t map)
11436 {
11437 pmap_set_4GB_pagezero(map->pmap);
11438 }
11439
11440 void
11441 vm_map_clear_4GB_pagezero(vm_map_t map)
11442 {
11443 pmap_clear_4GB_pagezero(map->pmap);
11444 }
11445
11446 /*
11447 * Raise a VM map's minimum offset.
11448 * To strictly enforce "page zero" reservation.
11449 */
11450 kern_return_t
11451 vm_map_raise_min_offset(
11452 vm_map_t map,
11453 vm_map_offset_t new_min_offset)
11454 {
11455 vm_map_entry_t first_entry;
11456
11457 new_min_offset = vm_map_round_page(new_min_offset);
11458
11459 vm_map_lock(map);
11460
11461 if (new_min_offset < map->min_offset) {
11462 /*
11463 * Can't move min_offset backwards, as that would expose
11464 * a part of the address space that was previously, and for
11465 * possibly good reasons, inaccessible.
11466 */
11467 vm_map_unlock(map);
11468 return KERN_INVALID_ADDRESS;
11469 }
11470
11471 first_entry = vm_map_first_entry(map);
11472 if (first_entry != vm_map_to_entry(map) &&
11473 first_entry->vme_start < new_min_offset) {
11474 /*
11475 * Some memory was already allocated below the new
11476 * minimun offset. It's too late to change it now...
11477 */
11478 vm_map_unlock(map);
11479 return KERN_NO_SPACE;
11480 }
11481
11482 map->min_offset = new_min_offset;
11483
11484 vm_map_unlock(map);
11485
11486 return KERN_SUCCESS;
11487 }
11488
11489 /*
11490 * Set the limit on the maximum amount of user wired memory allowed for this map.
11491 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
11492 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
11493 * don't have to reach over to the BSD data structures.
11494 */
11495
11496 void
11497 vm_map_set_user_wire_limit(vm_map_t map,
11498 vm_size_t limit)
11499 {
11500 map->user_wire_limit = limit;
11501 }