]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_kern.h>
93 #include <ipc/ipc_port.h>
94 #include <kern/sched_prim.h>
95 #include <kern/misc_protos.h>
96 #include <ddb/tr.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103
104 #ifdef ppc
105 #include <ppc/mappings.h>
106 #endif /* ppc */
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110
111 /* Internal prototypes
112 */
113
114 static void vm_map_simplify_range(
115 vm_map_t map,
116 vm_map_offset_t start,
117 vm_map_offset_t end); /* forward */
118
119 static boolean_t vm_map_range_check(
120 vm_map_t map,
121 vm_map_offset_t start,
122 vm_map_offset_t end,
123 vm_map_entry_t *entry);
124
125 static vm_map_entry_t _vm_map_entry_create(
126 struct vm_map_header *map_header);
127
128 static void _vm_map_entry_dispose(
129 struct vm_map_header *map_header,
130 vm_map_entry_t entry);
131
132 static void vm_map_pmap_enter(
133 vm_map_t map,
134 vm_map_offset_t addr,
135 vm_map_offset_t end_addr,
136 vm_object_t object,
137 vm_object_offset_t offset,
138 vm_prot_t protection);
139
140 static void _vm_map_clip_end(
141 struct vm_map_header *map_header,
142 vm_map_entry_t entry,
143 vm_map_offset_t end);
144
145 static void _vm_map_clip_start(
146 struct vm_map_header *map_header,
147 vm_map_entry_t entry,
148 vm_map_offset_t start);
149
150 static void vm_map_entry_delete(
151 vm_map_t map,
152 vm_map_entry_t entry);
153
154 static kern_return_t vm_map_delete(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 int flags,
159 vm_map_t zap_map);
160
161 static kern_return_t vm_map_copy_overwrite_unaligned(
162 vm_map_t dst_map,
163 vm_map_entry_t entry,
164 vm_map_copy_t copy,
165 vm_map_address_t start);
166
167 static kern_return_t vm_map_copy_overwrite_aligned(
168 vm_map_t dst_map,
169 vm_map_entry_t tmp_entry,
170 vm_map_copy_t copy,
171 vm_map_offset_t start,
172 pmap_t pmap);
173
174 static kern_return_t vm_map_copyin_kernel_buffer(
175 vm_map_t src_map,
176 vm_map_address_t src_addr,
177 vm_map_size_t len,
178 boolean_t src_destroy,
179 vm_map_copy_t *copy_result); /* OUT */
180
181 static kern_return_t vm_map_copyout_kernel_buffer(
182 vm_map_t map,
183 vm_map_address_t *addr, /* IN/OUT */
184 vm_map_copy_t copy,
185 boolean_t overwrite);
186
187 static void vm_map_fork_share(
188 vm_map_t old_map,
189 vm_map_entry_t old_entry,
190 vm_map_t new_map);
191
192 static boolean_t vm_map_fork_copy(
193 vm_map_t old_map,
194 vm_map_entry_t *old_entry_p,
195 vm_map_t new_map);
196
197 void vm_map_region_top_walk(
198 vm_map_entry_t entry,
199 vm_region_top_info_t top);
200
201 void vm_map_region_walk(
202 vm_map_t map,
203 vm_map_offset_t va,
204 vm_map_entry_t entry,
205 vm_object_offset_t offset,
206 vm_object_size_t range,
207 vm_region_extended_info_t extended,
208 boolean_t look_for_pages);
209
210 static kern_return_t vm_map_wire_nested(
211 vm_map_t map,
212 vm_map_offset_t start,
213 vm_map_offset_t end,
214 vm_prot_t access_type,
215 boolean_t user_wire,
216 pmap_t map_pmap,
217 vm_map_offset_t pmap_addr);
218
219 static kern_return_t vm_map_unwire_nested(
220 vm_map_t map,
221 vm_map_offset_t start,
222 vm_map_offset_t end,
223 boolean_t user_wire,
224 pmap_t map_pmap,
225 vm_map_offset_t pmap_addr);
226
227 static kern_return_t vm_map_overwrite_submap_recurse(
228 vm_map_t dst_map,
229 vm_map_offset_t dst_addr,
230 vm_map_size_t dst_size);
231
232 static kern_return_t vm_map_copy_overwrite_nested(
233 vm_map_t dst_map,
234 vm_map_offset_t dst_addr,
235 vm_map_copy_t copy,
236 boolean_t interruptible,
237 pmap_t pmap);
238
239 static kern_return_t vm_map_remap_extract(
240 vm_map_t map,
241 vm_map_offset_t addr,
242 vm_map_size_t size,
243 boolean_t copy,
244 struct vm_map_header *map_header,
245 vm_prot_t *cur_protection,
246 vm_prot_t *max_protection,
247 vm_inherit_t inheritance,
248 boolean_t pageable);
249
250 static kern_return_t vm_map_remap_range_allocate(
251 vm_map_t map,
252 vm_map_address_t *address,
253 vm_map_size_t size,
254 vm_map_offset_t mask,
255 boolean_t anywhere,
256 vm_map_entry_t *map_entry);
257
258 static void vm_map_region_look_for_page(
259 vm_map_t map,
260 vm_map_offset_t va,
261 vm_object_t object,
262 vm_object_offset_t offset,
263 int max_refcnt,
264 int depth,
265 vm_region_extended_info_t extended);
266
267 static int vm_map_region_count_obj_refs(
268 vm_map_entry_t entry,
269 vm_object_t object);
270
271 /*
272 * Macros to copy a vm_map_entry. We must be careful to correctly
273 * manage the wired page count. vm_map_entry_copy() creates a new
274 * map entry to the same memory - the wired count in the new entry
275 * must be set to zero. vm_map_entry_copy_full() creates a new
276 * entry that is identical to the old entry. This preserves the
277 * wire count; it's used for map splitting and zone changing in
278 * vm_map_copyout.
279 */
280 #define vm_map_entry_copy(NEW,OLD) \
281 MACRO_BEGIN \
282 *(NEW) = *(OLD); \
283 (NEW)->is_shared = FALSE; \
284 (NEW)->needs_wakeup = FALSE; \
285 (NEW)->in_transition = FALSE; \
286 (NEW)->wired_count = 0; \
287 (NEW)->user_wired_count = 0; \
288 MACRO_END
289
290 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
291
292 /*
293 * Decide if we want to allow processes to execute from their data or stack areas.
294 * override_nx() returns true if we do. Data/stack execution can be enabled independently
295 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
296 * or allow_stack_exec to enable data execution for that type of data area for that particular
297 * ABI (or both by or'ing the flags together). These are initialized in the architecture
298 * specific pmap files since the default behavior varies according to architecture. The
299 * main reason it varies is because of the need to provide binary compatibility with old
300 * applications that were written before these restrictions came into being. In the old
301 * days, an app could execute anything it could read, but this has slowly been tightened
302 * up over time. The default behavior is:
303 *
304 * 32-bit PPC apps may execute from both stack and data areas
305 * 32-bit Intel apps may exeucte from data areas but not stack
306 * 64-bit PPC/Intel apps may not execute from either data or stack
307 *
308 * An application on any architecture may override these defaults by explicitly
309 * adding PROT_EXEC permission to the page in question with the mprotect(2)
310 * system call. This code here just determines what happens when an app tries to
311 * execute from a page that lacks execute permission.
312 *
313 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
314 * default behavior for both 32 and 64 bit apps on a system-wide basis.
315 */
316
317 extern int allow_data_exec, allow_stack_exec;
318
319 int
320 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
321 {
322 int current_abi;
323
324 /*
325 * Determine if the app is running in 32 or 64 bit mode.
326 */
327
328 if (vm_map_is_64bit(map))
329 current_abi = VM_ABI_64;
330 else
331 current_abi = VM_ABI_32;
332
333 /*
334 * Determine if we should allow the execution based on whether it's a
335 * stack or data area and the current architecture.
336 */
337
338 if (user_tag == VM_MEMORY_STACK)
339 return allow_stack_exec & current_abi;
340
341 return allow_data_exec & current_abi;
342 }
343
344
345 /*
346 * Virtual memory maps provide for the mapping, protection,
347 * and sharing of virtual memory objects. In addition,
348 * this module provides for an efficient virtual copy of
349 * memory from one map to another.
350 *
351 * Synchronization is required prior to most operations.
352 *
353 * Maps consist of an ordered doubly-linked list of simple
354 * entries; a single hint is used to speed up lookups.
355 *
356 * Sharing maps have been deleted from this version of Mach.
357 * All shared objects are now mapped directly into the respective
358 * maps. This requires a change in the copy on write strategy;
359 * the asymmetric (delayed) strategy is used for shared temporary
360 * objects instead of the symmetric (shadow) strategy. All maps
361 * are now "top level" maps (either task map, kernel map or submap
362 * of the kernel map).
363 *
364 * Since portions of maps are specified by start/end addreses,
365 * which may not align with existing map entries, all
366 * routines merely "clip" entries to these start/end values.
367 * [That is, an entry is split into two, bordering at a
368 * start or end value.] Note that these clippings may not
369 * always be necessary (as the two resulting entries are then
370 * not changed); however, the clipping is done for convenience.
371 * No attempt is currently made to "glue back together" two
372 * abutting entries.
373 *
374 * The symmetric (shadow) copy strategy implements virtual copy
375 * by copying VM object references from one map to
376 * another, and then marking both regions as copy-on-write.
377 * It is important to note that only one writeable reference
378 * to a VM object region exists in any map when this strategy
379 * is used -- this means that shadow object creation can be
380 * delayed until a write operation occurs. The symmetric (delayed)
381 * strategy allows multiple maps to have writeable references to
382 * the same region of a vm object, and hence cannot delay creating
383 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
384 * Copying of permanent objects is completely different; see
385 * vm_object_copy_strategically() in vm_object.c.
386 */
387
388 static zone_t vm_map_zone; /* zone for vm_map structures */
389 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
390 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
391 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
392
393
394 /*
395 * Placeholder object for submap operations. This object is dropped
396 * into the range by a call to vm_map_find, and removed when
397 * vm_map_submap creates the submap.
398 */
399
400 vm_object_t vm_submap_object;
401
402 static void *map_data;
403 static vm_map_size_t map_data_size;
404 static void *kentry_data;
405 static vm_map_size_t kentry_data_size;
406 static int kentry_count = 2048; /* to init kentry_data_size */
407
408 #define NO_COALESCE_LIMIT (1024 * 128)
409
410
411 /* Skip acquiring locks if we're in the midst of a kernel core dump */
412 extern unsigned int not_in_kdp;
413
414 #if CONFIG_CODE_DECRYPTION
415 /*
416 * vm_map_apple_protected:
417 * This remaps the requested part of the object with an object backed by
418 * the decrypting pager.
419 * crypt_info contains entry points and session data for the crypt module.
420 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
421 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
422 */
423 kern_return_t
424 vm_map_apple_protected(
425 vm_map_t map,
426 vm_map_offset_t start,
427 vm_map_offset_t end,
428 struct pager_crypt_info *crypt_info)
429 {
430 boolean_t map_locked;
431 kern_return_t kr;
432 vm_map_entry_t map_entry;
433 memory_object_t protected_mem_obj;
434 vm_object_t protected_object;
435 vm_map_offset_t map_addr;
436
437 vm_map_lock_read(map);
438 map_locked = TRUE;
439
440 /* lookup the protected VM object */
441 if (!vm_map_lookup_entry(map,
442 start,
443 &map_entry) ||
444 map_entry->vme_end < end ||
445 map_entry->is_sub_map) {
446 /* that memory is not properly mapped */
447 kr = KERN_INVALID_ARGUMENT;
448 goto done;
449 }
450 protected_object = map_entry->object.vm_object;
451 if (protected_object == VM_OBJECT_NULL) {
452 /* there should be a VM object here at this point */
453 kr = KERN_INVALID_ARGUMENT;
454 goto done;
455 }
456
457 /*
458 * Lookup (and create if necessary) the protected memory object
459 * matching that VM object.
460 * If successful, this also grabs a reference on the memory object,
461 * to guarantee that it doesn't go away before we get a chance to map
462 * it.
463 */
464
465 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
466 if (protected_mem_obj == NULL) {
467 kr = KERN_FAILURE;
468 goto done;
469 }
470
471 vm_map_unlock_read(map);
472 map_locked = FALSE;
473
474 /* map this memory object in place of the current one */
475 map_addr = start;
476 kr = vm_map_enter_mem_object(map,
477 &map_addr,
478 end - start,
479 (mach_vm_offset_t) 0,
480 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
481 (ipc_port_t) protected_mem_obj,
482 (map_entry->offset +
483 (start - map_entry->vme_start)),
484 TRUE,
485 map_entry->protection,
486 map_entry->max_protection,
487 map_entry->inheritance);
488 assert(map_addr == start);
489 /*
490 * Release the reference obtained by apple_protect_pager_setup().
491 * The mapping (if it succeeded) is now holding a reference on the
492 * memory object.
493 */
494 memory_object_deallocate(protected_mem_obj);
495
496 done:
497 if (map_locked) {
498 vm_map_unlock_read(map);
499 }
500 return kr;
501 }
502 #endif /* CONFIG_CODE_DECRYPTION */
503
504
505 /*
506 * vm_map_init:
507 *
508 * Initialize the vm_map module. Must be called before
509 * any other vm_map routines.
510 *
511 * Map and entry structures are allocated from zones -- we must
512 * initialize those zones.
513 *
514 * There are three zones of interest:
515 *
516 * vm_map_zone: used to allocate maps.
517 * vm_map_entry_zone: used to allocate map entries.
518 * vm_map_kentry_zone: used to allocate map entries for the kernel.
519 *
520 * The kernel allocates map entries from a special zone that is initially
521 * "crammed" with memory. It would be difficult (perhaps impossible) for
522 * the kernel to allocate more memory to a entry zone when it became
523 * empty since the very act of allocating memory implies the creation
524 * of a new entry.
525 */
526 void
527 vm_map_init(
528 void)
529 {
530 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
531 PAGE_SIZE, "maps");
532
533 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
534 1024*1024, PAGE_SIZE*5,
535 "non-kernel map entries");
536
537 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
538 kentry_data_size, kentry_data_size,
539 "kernel map entries");
540
541 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
542 16*1024, PAGE_SIZE, "map copies");
543
544 /*
545 * Cram the map and kentry zones with initial data.
546 * Set kentry_zone non-collectible to aid zone_gc().
547 */
548 zone_change(vm_map_zone, Z_COLLECT, FALSE);
549 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
550 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
551 zcram(vm_map_zone, map_data, map_data_size);
552 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
553 }
554
555 void
556 vm_map_steal_memory(
557 void)
558 {
559 map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map));
560 map_data = pmap_steal_memory(map_data_size);
561
562 #if 0
563 /*
564 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
565 * physical page (i.e. that beyond the kernel image and page tables)
566 * individually; we guess at most one entry per eight pages in the
567 * real world. This works out to roughly .1 of 1% of physical memory,
568 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
569 */
570 #endif
571 kentry_count = pmap_free_pages() / 8;
572
573
574 kentry_data_size =
575 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
576 kentry_data = pmap_steal_memory(kentry_data_size);
577 }
578
579 /*
580 * vm_map_create:
581 *
582 * Creates and returns a new empty VM map with
583 * the given physical map structure, and having
584 * the given lower and upper address bounds.
585 */
586 vm_map_t
587 vm_map_create(
588 pmap_t pmap,
589 vm_map_offset_t min,
590 vm_map_offset_t max,
591 boolean_t pageable)
592 {
593 static int color_seed = 0;
594 register vm_map_t result;
595
596 result = (vm_map_t) zalloc(vm_map_zone);
597 if (result == VM_MAP_NULL)
598 panic("vm_map_create");
599
600 vm_map_first_entry(result) = vm_map_to_entry(result);
601 vm_map_last_entry(result) = vm_map_to_entry(result);
602 result->hdr.nentries = 0;
603 result->hdr.entries_pageable = pageable;
604
605 result->size = 0;
606 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
607 result->user_wire_size = 0;
608 result->ref_count = 1;
609 #if TASK_SWAPPER
610 result->res_count = 1;
611 result->sw_state = MAP_SW_IN;
612 #endif /* TASK_SWAPPER */
613 result->pmap = pmap;
614 result->min_offset = min;
615 result->max_offset = max;
616 result->wiring_required = FALSE;
617 result->no_zero_fill = FALSE;
618 result->mapped = FALSE;
619 #if CONFIG_EMBEDDED
620 result->prot_copy_allow = FALSE;
621 #else
622 result->prot_copy_allow = TRUE;
623 #endif
624 result->wait_for_space = FALSE;
625 result->first_free = vm_map_to_entry(result);
626 result->hint = vm_map_to_entry(result);
627 result->color_rr = (color_seed++) & vm_color_mask;
628 vm_map_lock_init(result);
629 mutex_init(&result->s_lock, 0);
630
631 return(result);
632 }
633
634 /*
635 * vm_map_entry_create: [ internal use only ]
636 *
637 * Allocates a VM map entry for insertion in the
638 * given map (or map copy). No fields are filled.
639 */
640 #define vm_map_entry_create(map) \
641 _vm_map_entry_create(&(map)->hdr)
642
643 #define vm_map_copy_entry_create(copy) \
644 _vm_map_entry_create(&(copy)->cpy_hdr)
645
646 static vm_map_entry_t
647 _vm_map_entry_create(
648 register struct vm_map_header *map_header)
649 {
650 register zone_t zone;
651 register vm_map_entry_t entry;
652
653 if (map_header->entries_pageable)
654 zone = vm_map_entry_zone;
655 else
656 zone = vm_map_kentry_zone;
657
658 entry = (vm_map_entry_t) zalloc(zone);
659 if (entry == VM_MAP_ENTRY_NULL)
660 panic("vm_map_entry_create");
661
662 return(entry);
663 }
664
665 /*
666 * vm_map_entry_dispose: [ internal use only ]
667 *
668 * Inverse of vm_map_entry_create.
669 *
670 * write map lock held so no need to
671 * do anything special to insure correctness
672 * of the stores
673 */
674 #define vm_map_entry_dispose(map, entry) \
675 MACRO_BEGIN \
676 if((entry) == (map)->first_free) \
677 (map)->first_free = vm_map_to_entry(map); \
678 if((entry) == (map)->hint) \
679 (map)->hint = vm_map_to_entry(map); \
680 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
681 MACRO_END
682
683 #define vm_map_copy_entry_dispose(map, entry) \
684 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
685
686 static void
687 _vm_map_entry_dispose(
688 register struct vm_map_header *map_header,
689 register vm_map_entry_t entry)
690 {
691 register zone_t zone;
692
693 if (map_header->entries_pageable)
694 zone = vm_map_entry_zone;
695 else
696 zone = vm_map_kentry_zone;
697
698 zfree(zone, entry);
699 }
700
701 #if MACH_ASSERT
702 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
703 static boolean_t first_free_check = FALSE;
704 static boolean_t
705 first_free_is_valid(
706 vm_map_t map)
707 {
708 vm_map_entry_t entry, next;
709
710 if (!first_free_check)
711 return TRUE;
712
713 entry = vm_map_to_entry(map);
714 next = entry->vme_next;
715 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
716 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
717 next != vm_map_to_entry(map))) {
718 entry = next;
719 next = entry->vme_next;
720 if (entry == vm_map_to_entry(map))
721 break;
722 }
723 if (map->first_free != entry) {
724 printf("Bad first_free for map %p: %p should be %p\n",
725 map, map->first_free, entry);
726 return FALSE;
727 }
728 return TRUE;
729 }
730 #endif /* MACH_ASSERT */
731
732 /*
733 * UPDATE_FIRST_FREE:
734 *
735 * Updates the map->first_free pointer to the
736 * entry immediately before the first hole in the map.
737 * The map should be locked.
738 */
739 #define UPDATE_FIRST_FREE(map, new_first_free) \
740 MACRO_BEGIN \
741 vm_map_t UFF_map; \
742 vm_map_entry_t UFF_first_free; \
743 vm_map_entry_t UFF_next_entry; \
744 UFF_map = (map); \
745 UFF_first_free = (new_first_free); \
746 UFF_next_entry = UFF_first_free->vme_next; \
747 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
748 vm_map_trunc_page(UFF_first_free->vme_end) || \
749 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
750 vm_map_trunc_page(UFF_first_free->vme_start) && \
751 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
752 UFF_first_free = UFF_next_entry; \
753 UFF_next_entry = UFF_first_free->vme_next; \
754 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
755 break; \
756 } \
757 UFF_map->first_free = UFF_first_free; \
758 assert(first_free_is_valid(UFF_map)); \
759 MACRO_END
760
761 /*
762 * vm_map_entry_{un,}link:
763 *
764 * Insert/remove entries from maps (or map copies).
765 */
766 #define vm_map_entry_link(map, after_where, entry) \
767 MACRO_BEGIN \
768 vm_map_t VMEL_map; \
769 vm_map_entry_t VMEL_entry; \
770 VMEL_map = (map); \
771 VMEL_entry = (entry); \
772 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
773 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
774 MACRO_END
775
776
777 #define vm_map_copy_entry_link(copy, after_where, entry) \
778 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
779
780 #define _vm_map_entry_link(hdr, after_where, entry) \
781 MACRO_BEGIN \
782 (hdr)->nentries++; \
783 (entry)->vme_prev = (after_where); \
784 (entry)->vme_next = (after_where)->vme_next; \
785 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
786 MACRO_END
787
788 #define vm_map_entry_unlink(map, entry) \
789 MACRO_BEGIN \
790 vm_map_t VMEU_map; \
791 vm_map_entry_t VMEU_entry; \
792 vm_map_entry_t VMEU_first_free; \
793 VMEU_map = (map); \
794 VMEU_entry = (entry); \
795 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
796 VMEU_first_free = VMEU_entry->vme_prev; \
797 else \
798 VMEU_first_free = VMEU_map->first_free; \
799 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
800 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
801 MACRO_END
802
803 #define vm_map_copy_entry_unlink(copy, entry) \
804 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
805
806 #define _vm_map_entry_unlink(hdr, entry) \
807 MACRO_BEGIN \
808 (hdr)->nentries--; \
809 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
810 (entry)->vme_prev->vme_next = (entry)->vme_next; \
811 MACRO_END
812
813 #if MACH_ASSERT && TASK_SWAPPER
814 /*
815 * vm_map_res_reference:
816 *
817 * Adds another valid residence count to the given map.
818 *
819 * Map is locked so this function can be called from
820 * vm_map_swapin.
821 *
822 */
823 void vm_map_res_reference(register vm_map_t map)
824 {
825 /* assert map is locked */
826 assert(map->res_count >= 0);
827 assert(map->ref_count >= map->res_count);
828 if (map->res_count == 0) {
829 mutex_unlock(&map->s_lock);
830 vm_map_lock(map);
831 vm_map_swapin(map);
832 mutex_lock(&map->s_lock);
833 ++map->res_count;
834 vm_map_unlock(map);
835 } else
836 ++map->res_count;
837 }
838
839 /*
840 * vm_map_reference_swap:
841 *
842 * Adds valid reference and residence counts to the given map.
843 *
844 * The map may not be in memory (i.e. zero residence count).
845 *
846 */
847 void vm_map_reference_swap(register vm_map_t map)
848 {
849 assert(map != VM_MAP_NULL);
850 mutex_lock(&map->s_lock);
851 assert(map->res_count >= 0);
852 assert(map->ref_count >= map->res_count);
853 map->ref_count++;
854 vm_map_res_reference(map);
855 mutex_unlock(&map->s_lock);
856 }
857
858 /*
859 * vm_map_res_deallocate:
860 *
861 * Decrement residence count on a map; possibly causing swapout.
862 *
863 * The map must be in memory (i.e. non-zero residence count).
864 *
865 * The map is locked, so this function is callable from vm_map_deallocate.
866 *
867 */
868 void vm_map_res_deallocate(register vm_map_t map)
869 {
870 assert(map->res_count > 0);
871 if (--map->res_count == 0) {
872 mutex_unlock(&map->s_lock);
873 vm_map_lock(map);
874 vm_map_swapout(map);
875 vm_map_unlock(map);
876 mutex_lock(&map->s_lock);
877 }
878 assert(map->ref_count >= map->res_count);
879 }
880 #endif /* MACH_ASSERT && TASK_SWAPPER */
881
882 /*
883 * vm_map_destroy:
884 *
885 * Actually destroy a map.
886 */
887 void
888 vm_map_destroy(
889 vm_map_t map,
890 int flags)
891 {
892 vm_map_lock(map);
893
894 /* clean up regular map entries */
895 (void) vm_map_delete(map, map->min_offset, map->max_offset,
896 flags, VM_MAP_NULL);
897 /* clean up leftover special mappings (commpage, etc...) */
898 #ifdef __ppc__
899 /*
900 * PPC51: ppc64 is limited to 51-bit addresses.
901 * Memory beyond this 51-bit limit is mapped specially at the
902 * pmap level, so do not interfere.
903 * On PPC64, the commpage is mapped beyond the addressable range
904 * via a special pmap hack, so ask pmap to clean it explicitly...
905 */
906 if (map->pmap) {
907 pmap_unmap_sharedpage(map->pmap);
908 }
909 /* ... and do not let regular pmap cleanup apply here */
910 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
911 #endif /* __ppc__ */
912 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
913 flags, VM_MAP_NULL);
914 vm_map_unlock(map);
915
916 assert(map->hdr.nentries == 0);
917
918 if(map->pmap)
919 pmap_destroy(map->pmap);
920
921 zfree(vm_map_zone, map);
922 }
923
924 #if TASK_SWAPPER
925 /*
926 * vm_map_swapin/vm_map_swapout
927 *
928 * Swap a map in and out, either referencing or releasing its resources.
929 * These functions are internal use only; however, they must be exported
930 * because they may be called from macros, which are exported.
931 *
932 * In the case of swapout, there could be races on the residence count,
933 * so if the residence count is up, we return, assuming that a
934 * vm_map_deallocate() call in the near future will bring us back.
935 *
936 * Locking:
937 * -- We use the map write lock for synchronization among races.
938 * -- The map write lock, and not the simple s_lock, protects the
939 * swap state of the map.
940 * -- If a map entry is a share map, then we hold both locks, in
941 * hierarchical order.
942 *
943 * Synchronization Notes:
944 * 1) If a vm_map_swapin() call happens while swapout in progress, it
945 * will block on the map lock and proceed when swapout is through.
946 * 2) A vm_map_reference() call at this time is illegal, and will
947 * cause a panic. vm_map_reference() is only allowed on resident
948 * maps, since it refuses to block.
949 * 3) A vm_map_swapin() call during a swapin will block, and
950 * proceeed when the first swapin is done, turning into a nop.
951 * This is the reason the res_count is not incremented until
952 * after the swapin is complete.
953 * 4) There is a timing hole after the checks of the res_count, before
954 * the map lock is taken, during which a swapin may get the lock
955 * before a swapout about to happen. If this happens, the swapin
956 * will detect the state and increment the reference count, causing
957 * the swapout to be a nop, thereby delaying it until a later
958 * vm_map_deallocate. If the swapout gets the lock first, then
959 * the swapin will simply block until the swapout is done, and
960 * then proceed.
961 *
962 * Because vm_map_swapin() is potentially an expensive operation, it
963 * should be used with caution.
964 *
965 * Invariants:
966 * 1) A map with a residence count of zero is either swapped, or
967 * being swapped.
968 * 2) A map with a non-zero residence count is either resident,
969 * or being swapped in.
970 */
971
972 int vm_map_swap_enable = 1;
973
974 void vm_map_swapin (vm_map_t map)
975 {
976 register vm_map_entry_t entry;
977
978 if (!vm_map_swap_enable) /* debug */
979 return;
980
981 /*
982 * Map is locked
983 * First deal with various races.
984 */
985 if (map->sw_state == MAP_SW_IN)
986 /*
987 * we raced with swapout and won. Returning will incr.
988 * the res_count, turning the swapout into a nop.
989 */
990 return;
991
992 /*
993 * The residence count must be zero. If we raced with another
994 * swapin, the state would have been IN; if we raced with a
995 * swapout (after another competing swapin), we must have lost
996 * the race to get here (see above comment), in which case
997 * res_count is still 0.
998 */
999 assert(map->res_count == 0);
1000
1001 /*
1002 * There are no intermediate states of a map going out or
1003 * coming in, since the map is locked during the transition.
1004 */
1005 assert(map->sw_state == MAP_SW_OUT);
1006
1007 /*
1008 * We now operate upon each map entry. If the entry is a sub-
1009 * or share-map, we call vm_map_res_reference upon it.
1010 * If the entry is an object, we call vm_object_res_reference
1011 * (this may iterate through the shadow chain).
1012 * Note that we hold the map locked the entire time,
1013 * even if we get back here via a recursive call in
1014 * vm_map_res_reference.
1015 */
1016 entry = vm_map_first_entry(map);
1017
1018 while (entry != vm_map_to_entry(map)) {
1019 if (entry->object.vm_object != VM_OBJECT_NULL) {
1020 if (entry->is_sub_map) {
1021 vm_map_t lmap = entry->object.sub_map;
1022 mutex_lock(&lmap->s_lock);
1023 vm_map_res_reference(lmap);
1024 mutex_unlock(&lmap->s_lock);
1025 } else {
1026 vm_object_t object = entry->object.vm_object;
1027 vm_object_lock(object);
1028 /*
1029 * This call may iterate through the
1030 * shadow chain.
1031 */
1032 vm_object_res_reference(object);
1033 vm_object_unlock(object);
1034 }
1035 }
1036 entry = entry->vme_next;
1037 }
1038 assert(map->sw_state == MAP_SW_OUT);
1039 map->sw_state = MAP_SW_IN;
1040 }
1041
1042 void vm_map_swapout(vm_map_t map)
1043 {
1044 register vm_map_entry_t entry;
1045
1046 /*
1047 * Map is locked
1048 * First deal with various races.
1049 * If we raced with a swapin and lost, the residence count
1050 * will have been incremented to 1, and we simply return.
1051 */
1052 mutex_lock(&map->s_lock);
1053 if (map->res_count != 0) {
1054 mutex_unlock(&map->s_lock);
1055 return;
1056 }
1057 mutex_unlock(&map->s_lock);
1058
1059 /*
1060 * There are no intermediate states of a map going out or
1061 * coming in, since the map is locked during the transition.
1062 */
1063 assert(map->sw_state == MAP_SW_IN);
1064
1065 if (!vm_map_swap_enable)
1066 return;
1067
1068 /*
1069 * We now operate upon each map entry. If the entry is a sub-
1070 * or share-map, we call vm_map_res_deallocate upon it.
1071 * If the entry is an object, we call vm_object_res_deallocate
1072 * (this may iterate through the shadow chain).
1073 * Note that we hold the map locked the entire time,
1074 * even if we get back here via a recursive call in
1075 * vm_map_res_deallocate.
1076 */
1077 entry = vm_map_first_entry(map);
1078
1079 while (entry != vm_map_to_entry(map)) {
1080 if (entry->object.vm_object != VM_OBJECT_NULL) {
1081 if (entry->is_sub_map) {
1082 vm_map_t lmap = entry->object.sub_map;
1083 mutex_lock(&lmap->s_lock);
1084 vm_map_res_deallocate(lmap);
1085 mutex_unlock(&lmap->s_lock);
1086 } else {
1087 vm_object_t object = entry->object.vm_object;
1088 vm_object_lock(object);
1089 /*
1090 * This call may take a long time,
1091 * since it could actively push
1092 * out pages (if we implement it
1093 * that way).
1094 */
1095 vm_object_res_deallocate(object);
1096 vm_object_unlock(object);
1097 }
1098 }
1099 entry = entry->vme_next;
1100 }
1101 assert(map->sw_state == MAP_SW_IN);
1102 map->sw_state = MAP_SW_OUT;
1103 }
1104
1105 #endif /* TASK_SWAPPER */
1106
1107
1108 /*
1109 * SAVE_HINT_MAP_READ:
1110 *
1111 * Saves the specified entry as the hint for
1112 * future lookups. only a read lock is held on map,
1113 * so make sure the store is atomic... OSCompareAndSwap
1114 * guarantees this... also, we don't care if we collide
1115 * and someone else wins and stores their 'hint'
1116 */
1117 #define SAVE_HINT_MAP_READ(map,value) \
1118 MACRO_BEGIN \
1119 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1120 MACRO_END
1121
1122
1123 /*
1124 * SAVE_HINT_MAP_WRITE:
1125 *
1126 * Saves the specified entry as the hint for
1127 * future lookups. write lock held on map,
1128 * so no one else can be writing or looking
1129 * until the lock is dropped, so it's safe
1130 * to just do an assignment
1131 */
1132 #define SAVE_HINT_MAP_WRITE(map,value) \
1133 MACRO_BEGIN \
1134 (map)->hint = (value); \
1135 MACRO_END
1136
1137 /*
1138 * vm_map_lookup_entry: [ internal use only ]
1139 *
1140 * Finds the map entry containing (or
1141 * immediately preceding) the specified address
1142 * in the given map; the entry is returned
1143 * in the "entry" parameter. The boolean
1144 * result indicates whether the address is
1145 * actually contained in the map.
1146 */
1147 boolean_t
1148 vm_map_lookup_entry(
1149 register vm_map_t map,
1150 register vm_map_offset_t address,
1151 vm_map_entry_t *entry) /* OUT */
1152 {
1153 register vm_map_entry_t cur;
1154 register vm_map_entry_t last;
1155
1156 /*
1157 * Start looking either from the head of the
1158 * list, or from the hint.
1159 */
1160 cur = map->hint;
1161
1162 if (cur == vm_map_to_entry(map))
1163 cur = cur->vme_next;
1164
1165 if (address >= cur->vme_start) {
1166 /*
1167 * Go from hint to end of list.
1168 *
1169 * But first, make a quick check to see if
1170 * we are already looking at the entry we
1171 * want (which is usually the case).
1172 * Note also that we don't need to save the hint
1173 * here... it is the same hint (unless we are
1174 * at the header, in which case the hint didn't
1175 * buy us anything anyway).
1176 */
1177 last = vm_map_to_entry(map);
1178 if ((cur != last) && (cur->vme_end > address)) {
1179 *entry = cur;
1180 return(TRUE);
1181 }
1182 }
1183 else {
1184 /*
1185 * Go from start to hint, *inclusively*
1186 */
1187 last = cur->vme_next;
1188 cur = vm_map_first_entry(map);
1189 }
1190
1191 /*
1192 * Search linearly
1193 */
1194
1195 while (cur != last) {
1196 if (cur->vme_end > address) {
1197 if (address >= cur->vme_start) {
1198 /*
1199 * Save this lookup for future
1200 * hints, and return
1201 */
1202
1203 *entry = cur;
1204 SAVE_HINT_MAP_READ(map, cur);
1205
1206 return(TRUE);
1207 }
1208 break;
1209 }
1210 cur = cur->vme_next;
1211 }
1212 *entry = cur->vme_prev;
1213 SAVE_HINT_MAP_READ(map, *entry);
1214
1215 return(FALSE);
1216 }
1217
1218 /*
1219 * Routine: vm_map_find_space
1220 * Purpose:
1221 * Allocate a range in the specified virtual address map,
1222 * returning the entry allocated for that range.
1223 * Used by kmem_alloc, etc.
1224 *
1225 * The map must be NOT be locked. It will be returned locked
1226 * on KERN_SUCCESS, unlocked on failure.
1227 *
1228 * If an entry is allocated, the object/offset fields
1229 * are initialized to zero.
1230 */
1231 kern_return_t
1232 vm_map_find_space(
1233 register vm_map_t map,
1234 vm_map_offset_t *address, /* OUT */
1235 vm_map_size_t size,
1236 vm_map_offset_t mask,
1237 int flags,
1238 vm_map_entry_t *o_entry) /* OUT */
1239 {
1240 register vm_map_entry_t entry, new_entry;
1241 register vm_map_offset_t start;
1242 register vm_map_offset_t end;
1243
1244 if (size == 0) {
1245 *address = 0;
1246 return KERN_INVALID_ARGUMENT;
1247 }
1248
1249 if (flags & VM_FLAGS_GUARD_AFTER) {
1250 /* account for the back guard page in the size */
1251 size += PAGE_SIZE_64;
1252 }
1253
1254 new_entry = vm_map_entry_create(map);
1255
1256 /*
1257 * Look for the first possible address; if there's already
1258 * something at this address, we have to start after it.
1259 */
1260
1261 vm_map_lock(map);
1262
1263 assert(first_free_is_valid(map));
1264 if ((entry = map->first_free) == vm_map_to_entry(map))
1265 start = map->min_offset;
1266 else
1267 start = entry->vme_end;
1268
1269 /*
1270 * In any case, the "entry" always precedes
1271 * the proposed new region throughout the loop:
1272 */
1273
1274 while (TRUE) {
1275 register vm_map_entry_t next;
1276
1277 /*
1278 * Find the end of the proposed new region.
1279 * Be sure we didn't go beyond the end, or
1280 * wrap around the address.
1281 */
1282
1283 if (flags & VM_FLAGS_GUARD_BEFORE) {
1284 /* reserve space for the front guard page */
1285 start += PAGE_SIZE_64;
1286 }
1287 end = ((start + mask) & ~mask);
1288
1289 if (end < start) {
1290 vm_map_entry_dispose(map, new_entry);
1291 vm_map_unlock(map);
1292 return(KERN_NO_SPACE);
1293 }
1294 start = end;
1295 end += size;
1296
1297 if ((end > map->max_offset) || (end < start)) {
1298 vm_map_entry_dispose(map, new_entry);
1299 vm_map_unlock(map);
1300 return(KERN_NO_SPACE);
1301 }
1302
1303 /*
1304 * If there are no more entries, we must win.
1305 */
1306
1307 next = entry->vme_next;
1308 if (next == vm_map_to_entry(map))
1309 break;
1310
1311 /*
1312 * If there is another entry, it must be
1313 * after the end of the potential new region.
1314 */
1315
1316 if (next->vme_start >= end)
1317 break;
1318
1319 /*
1320 * Didn't fit -- move to the next entry.
1321 */
1322
1323 entry = next;
1324 start = entry->vme_end;
1325 }
1326
1327 /*
1328 * At this point,
1329 * "start" and "end" should define the endpoints of the
1330 * available new range, and
1331 * "entry" should refer to the region before the new
1332 * range, and
1333 *
1334 * the map should be locked.
1335 */
1336
1337 if (flags & VM_FLAGS_GUARD_BEFORE) {
1338 /* go back for the front guard page */
1339 start -= PAGE_SIZE_64;
1340 }
1341 *address = start;
1342
1343 new_entry->vme_start = start;
1344 new_entry->vme_end = end;
1345 assert(page_aligned(new_entry->vme_start));
1346 assert(page_aligned(new_entry->vme_end));
1347
1348 new_entry->is_shared = FALSE;
1349 new_entry->is_sub_map = FALSE;
1350 new_entry->use_pmap = FALSE;
1351 new_entry->object.vm_object = VM_OBJECT_NULL;
1352 new_entry->offset = (vm_object_offset_t) 0;
1353
1354 new_entry->needs_copy = FALSE;
1355
1356 new_entry->inheritance = VM_INHERIT_DEFAULT;
1357 new_entry->protection = VM_PROT_DEFAULT;
1358 new_entry->max_protection = VM_PROT_ALL;
1359 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1360 new_entry->wired_count = 0;
1361 new_entry->user_wired_count = 0;
1362
1363 new_entry->in_transition = FALSE;
1364 new_entry->needs_wakeup = FALSE;
1365 new_entry->no_cache = FALSE;
1366
1367 new_entry->alias = 0;
1368
1369 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1370
1371 /*
1372 * Insert the new entry into the list
1373 */
1374
1375 vm_map_entry_link(map, entry, new_entry);
1376
1377 map->size += size;
1378
1379 /*
1380 * Update the lookup hint
1381 */
1382 SAVE_HINT_MAP_WRITE(map, new_entry);
1383
1384 *o_entry = new_entry;
1385 return(KERN_SUCCESS);
1386 }
1387
1388 int vm_map_pmap_enter_print = FALSE;
1389 int vm_map_pmap_enter_enable = FALSE;
1390
1391 /*
1392 * Routine: vm_map_pmap_enter [internal only]
1393 *
1394 * Description:
1395 * Force pages from the specified object to be entered into
1396 * the pmap at the specified address if they are present.
1397 * As soon as a page not found in the object the scan ends.
1398 *
1399 * Returns:
1400 * Nothing.
1401 *
1402 * In/out conditions:
1403 * The source map should not be locked on entry.
1404 */
1405 static void
1406 vm_map_pmap_enter(
1407 vm_map_t map,
1408 register vm_map_offset_t addr,
1409 register vm_map_offset_t end_addr,
1410 register vm_object_t object,
1411 vm_object_offset_t offset,
1412 vm_prot_t protection)
1413 {
1414 int type_of_fault;
1415 kern_return_t kr;
1416
1417 if(map->pmap == 0)
1418 return;
1419
1420 while (addr < end_addr) {
1421 register vm_page_t m;
1422
1423 vm_object_lock(object);
1424
1425 m = vm_page_lookup(object, offset);
1426 /*
1427 * ENCRYPTED SWAP:
1428 * The user should never see encrypted data, so do not
1429 * enter an encrypted page in the page table.
1430 */
1431 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1432 m->fictitious ||
1433 (m->unusual && ( m->error || m->restart || m->absent))) {
1434 vm_object_unlock(object);
1435 return;
1436 }
1437
1438 if (vm_map_pmap_enter_print) {
1439 printf("vm_map_pmap_enter:");
1440 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1441 map, (unsigned long long)addr, object, (unsigned long long)offset);
1442 }
1443 type_of_fault = DBG_CACHE_HIT_FAULT;
1444 kr = vm_fault_enter(m, map->pmap, addr, protection,
1445 m->wire_count != 0, FALSE, FALSE,
1446 &type_of_fault);
1447
1448 vm_object_unlock(object);
1449
1450 offset += PAGE_SIZE_64;
1451 addr += PAGE_SIZE;
1452 }
1453 }
1454
1455 boolean_t vm_map_pmap_is_empty(
1456 vm_map_t map,
1457 vm_map_offset_t start,
1458 vm_map_offset_t end);
1459 boolean_t vm_map_pmap_is_empty(
1460 vm_map_t map,
1461 vm_map_offset_t start,
1462 vm_map_offset_t end)
1463 {
1464 #ifdef MACHINE_PMAP_IS_EMPTY
1465 return pmap_is_empty(map->pmap, start, end);
1466 #else /* MACHINE_PMAP_IS_EMPTY */
1467 vm_map_offset_t offset;
1468 ppnum_t phys_page;
1469
1470 if (map->pmap == NULL) {
1471 return TRUE;
1472 }
1473
1474 for (offset = start;
1475 offset < end;
1476 offset += PAGE_SIZE) {
1477 phys_page = pmap_find_phys(map->pmap, offset);
1478 if (phys_page) {
1479 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1480 "page %d at 0x%llx\n",
1481 map, (long long)start, (long long)end,
1482 phys_page, (long long)offset);
1483 return FALSE;
1484 }
1485 }
1486 return TRUE;
1487 #endif /* MACHINE_PMAP_IS_EMPTY */
1488 }
1489
1490 /*
1491 * Routine: vm_map_enter
1492 *
1493 * Description:
1494 * Allocate a range in the specified virtual address map.
1495 * The resulting range will refer to memory defined by
1496 * the given memory object and offset into that object.
1497 *
1498 * Arguments are as defined in the vm_map call.
1499 */
1500 int _map_enter_debug = 0;
1501 static unsigned int vm_map_enter_restore_successes = 0;
1502 static unsigned int vm_map_enter_restore_failures = 0;
1503 kern_return_t
1504 vm_map_enter(
1505 vm_map_t map,
1506 vm_map_offset_t *address, /* IN/OUT */
1507 vm_map_size_t size,
1508 vm_map_offset_t mask,
1509 int flags,
1510 vm_object_t object,
1511 vm_object_offset_t offset,
1512 boolean_t needs_copy,
1513 vm_prot_t cur_protection,
1514 vm_prot_t max_protection,
1515 vm_inherit_t inheritance)
1516 {
1517 vm_map_entry_t entry, new_entry;
1518 vm_map_offset_t start, tmp_start, tmp_offset;
1519 vm_map_offset_t end, tmp_end;
1520 kern_return_t result = KERN_SUCCESS;
1521 vm_map_t zap_old_map = VM_MAP_NULL;
1522 vm_map_t zap_new_map = VM_MAP_NULL;
1523 boolean_t map_locked = FALSE;
1524 boolean_t pmap_empty = TRUE;
1525 boolean_t new_mapping_established = FALSE;
1526 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1527 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1528 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1529 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1530 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1531 char alias;
1532 vm_map_offset_t effective_min_offset, effective_max_offset;
1533 kern_return_t kr;
1534
1535 #if CONFIG_EMBEDDED
1536 if (cur_protection & VM_PROT_WRITE) {
1537 if (cur_protection & VM_PROT_EXECUTE) {
1538 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1539 cur_protection &= ~VM_PROT_EXECUTE;
1540 }
1541 }
1542 if (max_protection & VM_PROT_WRITE) {
1543 if (max_protection & VM_PROT_EXECUTE) {
1544 /* Right now all kinds of data segments are RWX. No point in logging that. */
1545 /* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */
1546
1547 /* Try to take a hint from curprot. If curprot is not writable,
1548 * make maxprot not writable. Otherwise make it not executable.
1549 */
1550 if((cur_protection & VM_PROT_WRITE) == 0) {
1551 max_protection &= ~VM_PROT_WRITE;
1552 } else {
1553 max_protection &= ~VM_PROT_EXECUTE;
1554 }
1555 }
1556 }
1557 assert ((cur_protection | max_protection) == max_protection);
1558 #endif /* CONFIG_EMBEDDED */
1559
1560 if (is_submap) {
1561 if (purgable) {
1562 /* submaps can not be purgeable */
1563 return KERN_INVALID_ARGUMENT;
1564 }
1565 if (object == VM_OBJECT_NULL) {
1566 /* submaps can not be created lazily */
1567 return KERN_INVALID_ARGUMENT;
1568 }
1569 }
1570 if (flags & VM_FLAGS_ALREADY) {
1571 /*
1572 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1573 * is already present. For it to be meaningul, the requested
1574 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1575 * we shouldn't try and remove what was mapped there first
1576 * (!VM_FLAGS_OVERWRITE).
1577 */
1578 if ((flags & VM_FLAGS_ANYWHERE) ||
1579 (flags & VM_FLAGS_OVERWRITE)) {
1580 return KERN_INVALID_ARGUMENT;
1581 }
1582 }
1583
1584 effective_min_offset = map->min_offset;
1585 if (flags & VM_FLAGS_BEYOND_MAX) {
1586 /*
1587 * Allow an insertion beyond the map's official top boundary.
1588 */
1589 if (vm_map_is_64bit(map))
1590 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1591 else
1592 effective_max_offset = 0x00000000FFFFF000ULL;
1593 } else {
1594 effective_max_offset = map->max_offset;
1595 }
1596
1597 if (size == 0 ||
1598 (offset & PAGE_MASK_64) != 0) {
1599 *address = 0;
1600 return KERN_INVALID_ARGUMENT;
1601 }
1602
1603 VM_GET_FLAGS_ALIAS(flags, alias);
1604
1605 #define RETURN(value) { result = value; goto BailOut; }
1606
1607 assert(page_aligned(*address));
1608 assert(page_aligned(size));
1609
1610 /*
1611 * Only zero-fill objects are allowed to be purgable.
1612 * LP64todo - limit purgable objects to 32-bits for now
1613 */
1614 if (purgable &&
1615 (offset != 0 ||
1616 (object != VM_OBJECT_NULL &&
1617 (object->size != size ||
1618 object->purgable == VM_PURGABLE_DENY))
1619 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1620 return KERN_INVALID_ARGUMENT;
1621
1622 if (!anywhere && overwrite) {
1623 /*
1624 * Create a temporary VM map to hold the old mappings in the
1625 * affected area while we create the new one.
1626 * This avoids releasing the VM map lock in
1627 * vm_map_entry_delete() and allows atomicity
1628 * when we want to replace some mappings with a new one.
1629 * It also allows us to restore the old VM mappings if the
1630 * new mapping fails.
1631 */
1632 zap_old_map = vm_map_create(PMAP_NULL,
1633 *address,
1634 *address + size,
1635 TRUE);
1636 }
1637
1638 StartAgain: ;
1639
1640 start = *address;
1641
1642 if (anywhere) {
1643 vm_map_lock(map);
1644 map_locked = TRUE;
1645
1646 /*
1647 * Calculate the first possible address.
1648 */
1649
1650 if (start < effective_min_offset)
1651 start = effective_min_offset;
1652 if (start > effective_max_offset)
1653 RETURN(KERN_NO_SPACE);
1654
1655 /*
1656 * Look for the first possible address;
1657 * if there's already something at this
1658 * address, we have to start after it.
1659 */
1660
1661 assert(first_free_is_valid(map));
1662 if (start == effective_min_offset) {
1663 if ((entry = map->first_free) != vm_map_to_entry(map))
1664 start = entry->vme_end;
1665 } else {
1666 vm_map_entry_t tmp_entry;
1667 if (vm_map_lookup_entry(map, start, &tmp_entry))
1668 start = tmp_entry->vme_end;
1669 entry = tmp_entry;
1670 }
1671
1672 /*
1673 * In any case, the "entry" always precedes
1674 * the proposed new region throughout the
1675 * loop:
1676 */
1677
1678 while (TRUE) {
1679 register vm_map_entry_t next;
1680
1681 /*
1682 * Find the end of the proposed new region.
1683 * Be sure we didn't go beyond the end, or
1684 * wrap around the address.
1685 */
1686
1687 end = ((start + mask) & ~mask);
1688 if (end < start)
1689 RETURN(KERN_NO_SPACE);
1690 start = end;
1691 end += size;
1692
1693 if ((end > effective_max_offset) || (end < start)) {
1694 if (map->wait_for_space) {
1695 if (size <= (effective_max_offset -
1696 effective_min_offset)) {
1697 assert_wait((event_t)map,
1698 THREAD_ABORTSAFE);
1699 vm_map_unlock(map);
1700 map_locked = FALSE;
1701 thread_block(THREAD_CONTINUE_NULL);
1702 goto StartAgain;
1703 }
1704 }
1705 RETURN(KERN_NO_SPACE);
1706 }
1707
1708 /*
1709 * If there are no more entries, we must win.
1710 */
1711
1712 next = entry->vme_next;
1713 if (next == vm_map_to_entry(map))
1714 break;
1715
1716 /*
1717 * If there is another entry, it must be
1718 * after the end of the potential new region.
1719 */
1720
1721 if (next->vme_start >= end)
1722 break;
1723
1724 /*
1725 * Didn't fit -- move to the next entry.
1726 */
1727
1728 entry = next;
1729 start = entry->vme_end;
1730 }
1731 *address = start;
1732 } else {
1733 /*
1734 * Verify that:
1735 * the address doesn't itself violate
1736 * the mask requirement.
1737 */
1738
1739 vm_map_lock(map);
1740 map_locked = TRUE;
1741 if ((start & mask) != 0)
1742 RETURN(KERN_NO_SPACE);
1743
1744 /*
1745 * ... the address is within bounds
1746 */
1747
1748 end = start + size;
1749
1750 if ((start < effective_min_offset) ||
1751 (end > effective_max_offset) ||
1752 (start >= end)) {
1753 RETURN(KERN_INVALID_ADDRESS);
1754 }
1755
1756 if (overwrite && zap_old_map != VM_MAP_NULL) {
1757 /*
1758 * Fixed mapping and "overwrite" flag: attempt to
1759 * remove all existing mappings in the specified
1760 * address range, saving them in our "zap_old_map".
1761 */
1762 (void) vm_map_delete(map, start, end,
1763 VM_MAP_REMOVE_SAVE_ENTRIES,
1764 zap_old_map);
1765 }
1766
1767 /*
1768 * ... the starting address isn't allocated
1769 */
1770
1771 if (vm_map_lookup_entry(map, start, &entry)) {
1772 if (! (flags & VM_FLAGS_ALREADY)) {
1773 RETURN(KERN_NO_SPACE);
1774 }
1775 /*
1776 * Check if what's already there is what we want.
1777 */
1778 tmp_start = start;
1779 tmp_offset = offset;
1780 if (entry->vme_start < start) {
1781 tmp_start -= start - entry->vme_start;
1782 tmp_offset -= start - entry->vme_start;
1783
1784 }
1785 for (; entry->vme_start < end;
1786 entry = entry->vme_next) {
1787 /*
1788 * Check if the mapping's attributes
1789 * match the existing map entry.
1790 */
1791 if (entry == vm_map_to_entry(map) ||
1792 entry->vme_start != tmp_start ||
1793 entry->is_sub_map != is_submap ||
1794 entry->offset != tmp_offset ||
1795 entry->needs_copy != needs_copy ||
1796 entry->protection != cur_protection ||
1797 entry->max_protection != max_protection ||
1798 entry->inheritance != inheritance ||
1799 entry->alias != alias) {
1800 /* not the same mapping ! */
1801 RETURN(KERN_NO_SPACE);
1802 }
1803 /*
1804 * Check if the same object is being mapped.
1805 */
1806 if (is_submap) {
1807 if (entry->object.sub_map !=
1808 (vm_map_t) object) {
1809 /* not the same submap */
1810 RETURN(KERN_NO_SPACE);
1811 }
1812 } else {
1813 if (entry->object.vm_object != object) {
1814 /* not the same VM object... */
1815 vm_object_t obj2;
1816
1817 obj2 = entry->object.vm_object;
1818 if ((obj2 == VM_OBJECT_NULL ||
1819 obj2->internal) &&
1820 (object == VM_OBJECT_NULL ||
1821 object->internal)) {
1822 /*
1823 * ... but both are
1824 * anonymous memory,
1825 * so equivalent.
1826 */
1827 } else {
1828 RETURN(KERN_NO_SPACE);
1829 }
1830 }
1831 }
1832
1833 tmp_offset += entry->vme_end - entry->vme_start;
1834 tmp_start += entry->vme_end - entry->vme_start;
1835 if (entry->vme_end >= end) {
1836 /* reached the end of our mapping */
1837 break;
1838 }
1839 }
1840 /* it all matches: let's use what's already there ! */
1841 RETURN(KERN_MEMORY_PRESENT);
1842 }
1843
1844 /*
1845 * ... the next region doesn't overlap the
1846 * end point.
1847 */
1848
1849 if ((entry->vme_next != vm_map_to_entry(map)) &&
1850 (entry->vme_next->vme_start < end))
1851 RETURN(KERN_NO_SPACE);
1852 }
1853
1854 /*
1855 * At this point,
1856 * "start" and "end" should define the endpoints of the
1857 * available new range, and
1858 * "entry" should refer to the region before the new
1859 * range, and
1860 *
1861 * the map should be locked.
1862 */
1863
1864 /*
1865 * See whether we can avoid creating a new entry (and object) by
1866 * extending one of our neighbors. [So far, we only attempt to
1867 * extend from below.] Note that we can never extend/join
1868 * purgable objects because they need to remain distinct
1869 * entities in order to implement their "volatile object"
1870 * semantics.
1871 */
1872
1873 if (purgable) {
1874 if (object == VM_OBJECT_NULL) {
1875 object = vm_object_allocate(size);
1876 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1877 object->purgable = VM_PURGABLE_NONVOLATILE;
1878 offset = (vm_object_offset_t)0;
1879 }
1880 } else if ((is_submap == FALSE) &&
1881 (object == VM_OBJECT_NULL) &&
1882 (entry != vm_map_to_entry(map)) &&
1883 (entry->vme_end == start) &&
1884 (!entry->is_shared) &&
1885 (!entry->is_sub_map) &&
1886 (entry->alias == alias) &&
1887 (entry->inheritance == inheritance) &&
1888 (entry->protection == cur_protection) &&
1889 (entry->max_protection == max_protection) &&
1890 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1891 (entry->in_transition == 0) &&
1892 (entry->no_cache == no_cache) &&
1893 ((alias == VM_MEMORY_REALLOC) ||
1894 ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1895 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1896 if (vm_object_coalesce(entry->object.vm_object,
1897 VM_OBJECT_NULL,
1898 entry->offset,
1899 (vm_object_offset_t) 0,
1900 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1901 (vm_map_size_t)(end - entry->vme_end))) {
1902
1903 /*
1904 * Coalesced the two objects - can extend
1905 * the previous map entry to include the
1906 * new range.
1907 */
1908 map->size += (end - entry->vme_end);
1909 entry->vme_end = end;
1910 UPDATE_FIRST_FREE(map, map->first_free);
1911 RETURN(KERN_SUCCESS);
1912 }
1913 }
1914
1915 /*
1916 * Create a new entry
1917 * LP64todo - for now, we can only allocate 4GB internal objects
1918 * because the default pager can't page bigger ones. Remove this
1919 * when it can.
1920 *
1921 * XXX FBDP
1922 * The reserved "page zero" in each process's address space can
1923 * be arbitrarily large. Splitting it into separate 4GB objects and
1924 * therefore different VM map entries serves no purpose and just
1925 * slows down operations on the VM map, so let's not split the
1926 * allocation into 4GB chunks if the max protection is NONE. That
1927 * memory should never be accessible, so it will never get to the
1928 * default pager.
1929 */
1930 tmp_start = start;
1931 if (object == VM_OBJECT_NULL &&
1932 size > (vm_map_size_t)VM_MAX_ADDRESS &&
1933 max_protection != VM_PROT_NONE)
1934 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1935 else
1936 tmp_end = end;
1937 do {
1938 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1939 object, offset, needs_copy,
1940 FALSE, FALSE,
1941 cur_protection, max_protection,
1942 VM_BEHAVIOR_DEFAULT,
1943 inheritance, 0, no_cache);
1944 new_entry->alias = alias;
1945 if (is_submap) {
1946 vm_map_t submap;
1947 boolean_t submap_is_64bit;
1948 boolean_t use_pmap;
1949
1950 new_entry->is_sub_map = TRUE;
1951 submap = (vm_map_t) object;
1952 submap_is_64bit = vm_map_is_64bit(submap);
1953 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1954 #ifndef NO_NESTED_PMAP
1955 if (use_pmap && submap->pmap == NULL) {
1956 /* we need a sub pmap to nest... */
1957 submap->pmap = pmap_create(0, submap_is_64bit);
1958 if (submap->pmap == NULL) {
1959 /* let's proceed without nesting... */
1960 }
1961 }
1962 if (use_pmap && submap->pmap != NULL) {
1963 kr = pmap_nest(map->pmap,
1964 submap->pmap,
1965 tmp_start,
1966 tmp_start,
1967 tmp_end - tmp_start);
1968 if (kr != KERN_SUCCESS) {
1969 printf("vm_map_enter: "
1970 "pmap_nest(0x%llx,0x%llx) "
1971 "error 0x%x\n",
1972 (long long)tmp_start,
1973 (long long)tmp_end,
1974 kr);
1975 } else {
1976 /* we're now nested ! */
1977 new_entry->use_pmap = TRUE;
1978 pmap_empty = FALSE;
1979 }
1980 }
1981 #endif /* NO_NESTED_PMAP */
1982 }
1983 entry = new_entry;
1984 } while (tmp_end != end &&
1985 (tmp_start = tmp_end) &&
1986 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1987 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1988
1989 vm_map_unlock(map);
1990 map_locked = FALSE;
1991
1992 new_mapping_established = TRUE;
1993
1994 /* Wire down the new entry if the user
1995 * requested all new map entries be wired.
1996 */
1997 if (map->wiring_required) {
1998 pmap_empty = FALSE; /* pmap won't be empty */
1999 result = vm_map_wire(map, start, end,
2000 new_entry->protection, TRUE);
2001 RETURN(result);
2002 }
2003
2004 if ((object != VM_OBJECT_NULL) &&
2005 (vm_map_pmap_enter_enable) &&
2006 (!anywhere) &&
2007 (!needs_copy) &&
2008 (size < (128*1024))) {
2009 pmap_empty = FALSE; /* pmap won't be empty */
2010
2011 if (override_nx(map, alias) && cur_protection)
2012 cur_protection |= VM_PROT_EXECUTE;
2013
2014 vm_map_pmap_enter(map, start, end,
2015 object, offset, cur_protection);
2016 }
2017
2018 BailOut: ;
2019 if (result == KERN_SUCCESS) {
2020 vm_prot_t pager_prot;
2021 memory_object_t pager;
2022
2023 if (pmap_empty &&
2024 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2025 assert(vm_map_pmap_is_empty(map,
2026 *address,
2027 *address+size));
2028 }
2029
2030 /*
2031 * For "named" VM objects, let the pager know that the
2032 * memory object is being mapped. Some pagers need to keep
2033 * track of this, to know when they can reclaim the memory
2034 * object, for example.
2035 * VM calls memory_object_map() for each mapping (specifying
2036 * the protection of each mapping) and calls
2037 * memory_object_last_unmap() when all the mappings are gone.
2038 */
2039 pager_prot = max_protection;
2040 if (needs_copy) {
2041 /*
2042 * Copy-On-Write mapping: won't modify
2043 * the memory object.
2044 */
2045 pager_prot &= ~VM_PROT_WRITE;
2046 }
2047 if (!is_submap &&
2048 object != VM_OBJECT_NULL &&
2049 object->named &&
2050 object->pager != MEMORY_OBJECT_NULL) {
2051 vm_object_lock(object);
2052 pager = object->pager;
2053 if (object->named &&
2054 pager != MEMORY_OBJECT_NULL) {
2055 assert(object->pager_ready);
2056 vm_object_mapping_wait(object, THREAD_UNINT);
2057 vm_object_mapping_begin(object);
2058 vm_object_unlock(object);
2059
2060 kr = memory_object_map(pager, pager_prot);
2061 assert(kr == KERN_SUCCESS);
2062
2063 vm_object_lock(object);
2064 vm_object_mapping_end(object);
2065 }
2066 vm_object_unlock(object);
2067 }
2068 } else {
2069 if (new_mapping_established) {
2070 /*
2071 * We have to get rid of the new mappings since we
2072 * won't make them available to the user.
2073 * Try and do that atomically, to minimize the risk
2074 * that someone else create new mappings that range.
2075 */
2076 zap_new_map = vm_map_create(PMAP_NULL,
2077 *address,
2078 *address + size,
2079 TRUE);
2080 if (!map_locked) {
2081 vm_map_lock(map);
2082 map_locked = TRUE;
2083 }
2084 (void) vm_map_delete(map, *address, *address+size,
2085 VM_MAP_REMOVE_SAVE_ENTRIES,
2086 zap_new_map);
2087 }
2088 if (zap_old_map != VM_MAP_NULL &&
2089 zap_old_map->hdr.nentries != 0) {
2090 vm_map_entry_t entry1, entry2;
2091
2092 /*
2093 * The new mapping failed. Attempt to restore
2094 * the old mappings, saved in the "zap_old_map".
2095 */
2096 if (!map_locked) {
2097 vm_map_lock(map);
2098 map_locked = TRUE;
2099 }
2100
2101 /* first check if the coast is still clear */
2102 start = vm_map_first_entry(zap_old_map)->vme_start;
2103 end = vm_map_last_entry(zap_old_map)->vme_end;
2104 if (vm_map_lookup_entry(map, start, &entry1) ||
2105 vm_map_lookup_entry(map, end, &entry2) ||
2106 entry1 != entry2) {
2107 /*
2108 * Part of that range has already been
2109 * re-mapped: we can't restore the old
2110 * mappings...
2111 */
2112 vm_map_enter_restore_failures++;
2113 } else {
2114 /*
2115 * Transfer the saved map entries from
2116 * "zap_old_map" to the original "map",
2117 * inserting them all after "entry1".
2118 */
2119 for (entry2 = vm_map_first_entry(zap_old_map);
2120 entry2 != vm_map_to_entry(zap_old_map);
2121 entry2 = vm_map_first_entry(zap_old_map)) {
2122 vm_map_size_t entry_size;
2123
2124 entry_size = (entry2->vme_end -
2125 entry2->vme_start);
2126 vm_map_entry_unlink(zap_old_map,
2127 entry2);
2128 zap_old_map->size -= entry_size;
2129 vm_map_entry_link(map, entry1, entry2);
2130 map->size += entry_size;
2131 entry1 = entry2;
2132 }
2133 if (map->wiring_required) {
2134 /*
2135 * XXX TODO: we should rewire the
2136 * old pages here...
2137 */
2138 }
2139 vm_map_enter_restore_successes++;
2140 }
2141 }
2142 }
2143
2144 if (map_locked) {
2145 vm_map_unlock(map);
2146 }
2147
2148 /*
2149 * Get rid of the "zap_maps" and all the map entries that
2150 * they may still contain.
2151 */
2152 if (zap_old_map != VM_MAP_NULL) {
2153 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2154 zap_old_map = VM_MAP_NULL;
2155 }
2156 if (zap_new_map != VM_MAP_NULL) {
2157 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2158 zap_new_map = VM_MAP_NULL;
2159 }
2160
2161 return result;
2162
2163 #undef RETURN
2164 }
2165
2166 kern_return_t
2167 vm_map_enter_mem_object(
2168 vm_map_t target_map,
2169 vm_map_offset_t *address,
2170 vm_map_size_t initial_size,
2171 vm_map_offset_t mask,
2172 int flags,
2173 ipc_port_t port,
2174 vm_object_offset_t offset,
2175 boolean_t copy,
2176 vm_prot_t cur_protection,
2177 vm_prot_t max_protection,
2178 vm_inherit_t inheritance)
2179 {
2180 vm_map_address_t map_addr;
2181 vm_map_size_t map_size;
2182 vm_object_t object;
2183 vm_object_size_t size;
2184 kern_return_t result;
2185
2186 /*
2187 * Check arguments for validity
2188 */
2189 if ((target_map == VM_MAP_NULL) ||
2190 (cur_protection & ~VM_PROT_ALL) ||
2191 (max_protection & ~VM_PROT_ALL) ||
2192 (inheritance > VM_INHERIT_LAST_VALID) ||
2193 initial_size == 0)
2194 return KERN_INVALID_ARGUMENT;
2195
2196 map_addr = vm_map_trunc_page(*address);
2197 map_size = vm_map_round_page(initial_size);
2198 size = vm_object_round_page(initial_size);
2199
2200 /*
2201 * Find the vm object (if any) corresponding to this port.
2202 */
2203 if (!IP_VALID(port)) {
2204 object = VM_OBJECT_NULL;
2205 offset = 0;
2206 copy = FALSE;
2207 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2208 vm_named_entry_t named_entry;
2209
2210 named_entry = (vm_named_entry_t) port->ip_kobject;
2211 /* a few checks to make sure user is obeying rules */
2212 if (size == 0) {
2213 if (offset >= named_entry->size)
2214 return KERN_INVALID_RIGHT;
2215 size = named_entry->size - offset;
2216 }
2217 if ((named_entry->protection & max_protection) !=
2218 max_protection)
2219 return KERN_INVALID_RIGHT;
2220 if ((named_entry->protection & cur_protection) !=
2221 cur_protection)
2222 return KERN_INVALID_RIGHT;
2223 if (named_entry->size < (offset + size))
2224 return KERN_INVALID_ARGUMENT;
2225
2226 /* the callers parameter offset is defined to be the */
2227 /* offset from beginning of named entry offset in object */
2228 offset = offset + named_entry->offset;
2229
2230 named_entry_lock(named_entry);
2231 if (named_entry->is_sub_map) {
2232 vm_map_t submap;
2233
2234 submap = named_entry->backing.map;
2235 vm_map_lock(submap);
2236 vm_map_reference(submap);
2237 vm_map_unlock(submap);
2238 named_entry_unlock(named_entry);
2239
2240 result = vm_map_enter(target_map,
2241 &map_addr,
2242 map_size,
2243 mask,
2244 flags | VM_FLAGS_SUBMAP,
2245 (vm_object_t) submap,
2246 offset,
2247 copy,
2248 cur_protection,
2249 max_protection,
2250 inheritance);
2251 if (result != KERN_SUCCESS) {
2252 vm_map_deallocate(submap);
2253 } else {
2254 /*
2255 * No need to lock "submap" just to check its
2256 * "mapped" flag: that flag is never reset
2257 * once it's been set and if we race, we'll
2258 * just end up setting it twice, which is OK.
2259 */
2260 if (submap->mapped == FALSE) {
2261 /*
2262 * This submap has never been mapped.
2263 * Set its "mapped" flag now that it
2264 * has been mapped.
2265 * This happens only for the first ever
2266 * mapping of a "submap".
2267 */
2268 vm_map_lock(submap);
2269 submap->mapped = TRUE;
2270 vm_map_unlock(submap);
2271 }
2272 *address = map_addr;
2273 }
2274 return result;
2275
2276 } else if (named_entry->is_pager) {
2277 unsigned int access;
2278 vm_prot_t protections;
2279 unsigned int wimg_mode;
2280 boolean_t cache_attr;
2281
2282 protections = named_entry->protection & VM_PROT_ALL;
2283 access = GET_MAP_MEM(named_entry->protection);
2284
2285 object = vm_object_enter(named_entry->backing.pager,
2286 named_entry->size,
2287 named_entry->internal,
2288 FALSE,
2289 FALSE);
2290 if (object == VM_OBJECT_NULL) {
2291 named_entry_unlock(named_entry);
2292 return KERN_INVALID_OBJECT;
2293 }
2294
2295 /* JMM - drop reference on pager here */
2296
2297 /* create an extra ref for the named entry */
2298 vm_object_lock(object);
2299 vm_object_reference_locked(object);
2300 named_entry->backing.object = object;
2301 named_entry->is_pager = FALSE;
2302 named_entry_unlock(named_entry);
2303
2304 wimg_mode = object->wimg_bits;
2305 if (access == MAP_MEM_IO) {
2306 wimg_mode = VM_WIMG_IO;
2307 } else if (access == MAP_MEM_COPYBACK) {
2308 wimg_mode = VM_WIMG_USE_DEFAULT;
2309 } else if (access == MAP_MEM_WTHRU) {
2310 wimg_mode = VM_WIMG_WTHRU;
2311 } else if (access == MAP_MEM_WCOMB) {
2312 wimg_mode = VM_WIMG_WCOMB;
2313 }
2314 if (wimg_mode == VM_WIMG_IO ||
2315 wimg_mode == VM_WIMG_WCOMB)
2316 cache_attr = TRUE;
2317 else
2318 cache_attr = FALSE;
2319
2320 /* wait for object (if any) to be ready */
2321 if (!named_entry->internal) {
2322 while (!object->pager_ready) {
2323 vm_object_wait(
2324 object,
2325 VM_OBJECT_EVENT_PAGER_READY,
2326 THREAD_UNINT);
2327 vm_object_lock(object);
2328 }
2329 }
2330
2331 if (object->wimg_bits != wimg_mode) {
2332 vm_page_t p;
2333
2334 vm_object_paging_wait(object, THREAD_UNINT);
2335
2336 object->wimg_bits = wimg_mode;
2337 queue_iterate(&object->memq, p, vm_page_t, listq) {
2338 if (!p->fictitious) {
2339 if (p->pmapped)
2340 pmap_disconnect(p->phys_page);
2341 if (cache_attr)
2342 pmap_sync_page_attributes_phys(p->phys_page);
2343 }
2344 }
2345 }
2346 object->true_share = TRUE;
2347 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2348 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2349 vm_object_unlock(object);
2350 } else {
2351 /* This is the case where we are going to map */
2352 /* an already mapped object. If the object is */
2353 /* not ready it is internal. An external */
2354 /* object cannot be mapped until it is ready */
2355 /* we can therefore avoid the ready check */
2356 /* in this case. */
2357 object = named_entry->backing.object;
2358 assert(object != VM_OBJECT_NULL);
2359 named_entry_unlock(named_entry);
2360 vm_object_reference(object);
2361 }
2362 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2363 /*
2364 * JMM - This is temporary until we unify named entries
2365 * and raw memory objects.
2366 *
2367 * Detected fake ip_kotype for a memory object. In
2368 * this case, the port isn't really a port at all, but
2369 * instead is just a raw memory object.
2370 */
2371
2372 object = vm_object_enter((memory_object_t)port,
2373 size, FALSE, FALSE, FALSE);
2374 if (object == VM_OBJECT_NULL)
2375 return KERN_INVALID_OBJECT;
2376
2377 /* wait for object (if any) to be ready */
2378 if (object != VM_OBJECT_NULL) {
2379 if (object == kernel_object) {
2380 printf("Warning: Attempt to map kernel object"
2381 " by a non-private kernel entity\n");
2382 return KERN_INVALID_OBJECT;
2383 }
2384 vm_object_lock(object);
2385 while (!object->pager_ready) {
2386 vm_object_wait(object,
2387 VM_OBJECT_EVENT_PAGER_READY,
2388 THREAD_UNINT);
2389 vm_object_lock(object);
2390 }
2391 vm_object_unlock(object);
2392 }
2393 } else {
2394 return KERN_INVALID_OBJECT;
2395 }
2396
2397 if (object != VM_OBJECT_NULL &&
2398 object->named &&
2399 object->pager != MEMORY_OBJECT_NULL &&
2400 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2401 memory_object_t pager;
2402 vm_prot_t pager_prot;
2403 kern_return_t kr;
2404
2405 /*
2406 * For "named" VM objects, let the pager know that the
2407 * memory object is being mapped. Some pagers need to keep
2408 * track of this, to know when they can reclaim the memory
2409 * object, for example.
2410 * VM calls memory_object_map() for each mapping (specifying
2411 * the protection of each mapping) and calls
2412 * memory_object_last_unmap() when all the mappings are gone.
2413 */
2414 pager_prot = max_protection;
2415 if (copy) {
2416 /*
2417 * Copy-On-Write mapping: won't modify the
2418 * memory object.
2419 */
2420 pager_prot &= ~VM_PROT_WRITE;
2421 }
2422 vm_object_lock(object);
2423 pager = object->pager;
2424 if (object->named &&
2425 pager != MEMORY_OBJECT_NULL &&
2426 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2427 assert(object->pager_ready);
2428 vm_object_mapping_wait(object, THREAD_UNINT);
2429 vm_object_mapping_begin(object);
2430 vm_object_unlock(object);
2431
2432 kr = memory_object_map(pager, pager_prot);
2433 assert(kr == KERN_SUCCESS);
2434
2435 vm_object_lock(object);
2436 vm_object_mapping_end(object);
2437 }
2438 vm_object_unlock(object);
2439 }
2440
2441 /*
2442 * Perform the copy if requested
2443 */
2444
2445 if (copy) {
2446 vm_object_t new_object;
2447 vm_object_offset_t new_offset;
2448
2449 result = vm_object_copy_strategically(object, offset, size,
2450 &new_object, &new_offset,
2451 &copy);
2452
2453
2454 if (result == KERN_MEMORY_RESTART_COPY) {
2455 boolean_t success;
2456 boolean_t src_needs_copy;
2457
2458 /*
2459 * XXX
2460 * We currently ignore src_needs_copy.
2461 * This really is the issue of how to make
2462 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2463 * non-kernel users to use. Solution forthcoming.
2464 * In the meantime, since we don't allow non-kernel
2465 * memory managers to specify symmetric copy,
2466 * we won't run into problems here.
2467 */
2468 new_object = object;
2469 new_offset = offset;
2470 success = vm_object_copy_quickly(&new_object,
2471 new_offset, size,
2472 &src_needs_copy,
2473 &copy);
2474 assert(success);
2475 result = KERN_SUCCESS;
2476 }
2477 /*
2478 * Throw away the reference to the
2479 * original object, as it won't be mapped.
2480 */
2481
2482 vm_object_deallocate(object);
2483
2484 if (result != KERN_SUCCESS)
2485 return result;
2486
2487 object = new_object;
2488 offset = new_offset;
2489 }
2490
2491 result = vm_map_enter(target_map,
2492 &map_addr, map_size,
2493 (vm_map_offset_t)mask,
2494 flags,
2495 object, offset,
2496 copy,
2497 cur_protection, max_protection, inheritance);
2498 if (result != KERN_SUCCESS)
2499 vm_object_deallocate(object);
2500 *address = map_addr;
2501 return result;
2502 }
2503
2504 #if VM_CPM
2505
2506 #ifdef MACH_ASSERT
2507 extern pmap_paddr_t avail_start, avail_end;
2508 #endif
2509
2510 /*
2511 * Allocate memory in the specified map, with the caveat that
2512 * the memory is physically contiguous. This call may fail
2513 * if the system can't find sufficient contiguous memory.
2514 * This call may cause or lead to heart-stopping amounts of
2515 * paging activity.
2516 *
2517 * Memory obtained from this call should be freed in the
2518 * normal way, viz., via vm_deallocate.
2519 */
2520 kern_return_t
2521 vm_map_enter_cpm(
2522 vm_map_t map,
2523 vm_map_offset_t *addr,
2524 vm_map_size_t size,
2525 int flags)
2526 {
2527 vm_object_t cpm_obj;
2528 pmap_t pmap;
2529 vm_page_t m, pages;
2530 kern_return_t kr;
2531 vm_map_offset_t va, start, end, offset;
2532 #if MACH_ASSERT
2533 vm_map_offset_t prev_addr;
2534 #endif /* MACH_ASSERT */
2535
2536 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2537
2538 if (!vm_allocate_cpm_enabled)
2539 return KERN_FAILURE;
2540
2541 if (size == 0) {
2542 *addr = 0;
2543 return KERN_SUCCESS;
2544 }
2545 if (anywhere)
2546 *addr = vm_map_min(map);
2547 else
2548 *addr = vm_map_trunc_page(*addr);
2549 size = vm_map_round_page(size);
2550
2551 /*
2552 * LP64todo - cpm_allocate should probably allow
2553 * allocations of >4GB, but not with the current
2554 * algorithm, so just cast down the size for now.
2555 */
2556 if (size > VM_MAX_ADDRESS)
2557 return KERN_RESOURCE_SHORTAGE;
2558 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2559 &pages, 0, TRUE)) != KERN_SUCCESS)
2560 return kr;
2561
2562 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2563 assert(cpm_obj != VM_OBJECT_NULL);
2564 assert(cpm_obj->internal);
2565 assert(cpm_obj->size == (vm_object_size_t)size);
2566 assert(cpm_obj->can_persist == FALSE);
2567 assert(cpm_obj->pager_created == FALSE);
2568 assert(cpm_obj->pageout == FALSE);
2569 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2570
2571 /*
2572 * Insert pages into object.
2573 */
2574
2575 vm_object_lock(cpm_obj);
2576 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2577 m = pages;
2578 pages = NEXT_PAGE(m);
2579 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2580
2581 assert(!m->gobbled);
2582 assert(!m->wanted);
2583 assert(!m->pageout);
2584 assert(!m->tabled);
2585 assert(m->wire_count);
2586 /*
2587 * ENCRYPTED SWAP:
2588 * "m" is not supposed to be pageable, so it
2589 * should not be encrypted. It wouldn't be safe
2590 * to enter it in a new VM object while encrypted.
2591 */
2592 ASSERT_PAGE_DECRYPTED(m);
2593 assert(m->busy);
2594 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2595
2596 m->busy = FALSE;
2597 vm_page_insert(m, cpm_obj, offset);
2598 }
2599 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2600 vm_object_unlock(cpm_obj);
2601
2602 /*
2603 * Hang onto a reference on the object in case a
2604 * multi-threaded application for some reason decides
2605 * to deallocate the portion of the address space into
2606 * which we will insert this object.
2607 *
2608 * Unfortunately, we must insert the object now before
2609 * we can talk to the pmap module about which addresses
2610 * must be wired down. Hence, the race with a multi-
2611 * threaded app.
2612 */
2613 vm_object_reference(cpm_obj);
2614
2615 /*
2616 * Insert object into map.
2617 */
2618
2619 kr = vm_map_enter(
2620 map,
2621 addr,
2622 size,
2623 (vm_map_offset_t)0,
2624 flags,
2625 cpm_obj,
2626 (vm_object_offset_t)0,
2627 FALSE,
2628 VM_PROT_ALL,
2629 VM_PROT_ALL,
2630 VM_INHERIT_DEFAULT);
2631
2632 if (kr != KERN_SUCCESS) {
2633 /*
2634 * A CPM object doesn't have can_persist set,
2635 * so all we have to do is deallocate it to
2636 * free up these pages.
2637 */
2638 assert(cpm_obj->pager_created == FALSE);
2639 assert(cpm_obj->can_persist == FALSE);
2640 assert(cpm_obj->pageout == FALSE);
2641 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2642 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2643 vm_object_deallocate(cpm_obj); /* kill creation ref */
2644 }
2645
2646 /*
2647 * Inform the physical mapping system that the
2648 * range of addresses may not fault, so that
2649 * page tables and such can be locked down as well.
2650 */
2651 start = *addr;
2652 end = start + size;
2653 pmap = vm_map_pmap(map);
2654 pmap_pageable(pmap, start, end, FALSE);
2655
2656 /*
2657 * Enter each page into the pmap, to avoid faults.
2658 * Note that this loop could be coded more efficiently,
2659 * if the need arose, rather than looking up each page
2660 * again.
2661 */
2662 for (offset = 0, va = start; offset < size;
2663 va += PAGE_SIZE, offset += PAGE_SIZE) {
2664 int type_of_fault;
2665
2666 vm_object_lock(cpm_obj);
2667 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2668 assert(m != VM_PAGE_NULL);
2669
2670 vm_page_zero_fill(m);
2671
2672 type_of_fault = DBG_ZERO_FILL_FAULT;
2673
2674 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2675 m->wire_count != 0, FALSE, FALSE,
2676 &type_of_fault);
2677
2678 vm_object_unlock(cpm_obj);
2679 }
2680
2681 #if MACH_ASSERT
2682 /*
2683 * Verify ordering in address space.
2684 */
2685 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2686 vm_object_lock(cpm_obj);
2687 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2688 vm_object_unlock(cpm_obj);
2689 if (m == VM_PAGE_NULL)
2690 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2691 cpm_obj, offset);
2692 assert(m->tabled);
2693 assert(!m->busy);
2694 assert(!m->wanted);
2695 assert(!m->fictitious);
2696 assert(!m->private);
2697 assert(!m->absent);
2698 assert(!m->error);
2699 assert(!m->cleaning);
2700 assert(!m->precious);
2701 assert(!m->clustered);
2702 if (offset != 0) {
2703 if (m->phys_page != prev_addr + 1) {
2704 printf("start 0x%x end 0x%x va 0x%x\n",
2705 start, end, va);
2706 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2707 printf("m 0x%x prev_address 0x%x\n", m,
2708 prev_addr);
2709 panic("vm_allocate_cpm: pages not contig!");
2710 }
2711 }
2712 prev_addr = m->phys_page;
2713 }
2714 #endif /* MACH_ASSERT */
2715
2716 vm_object_deallocate(cpm_obj); /* kill extra ref */
2717
2718 return kr;
2719 }
2720
2721
2722 #else /* VM_CPM */
2723
2724 /*
2725 * Interface is defined in all cases, but unless the kernel
2726 * is built explicitly for this option, the interface does
2727 * nothing.
2728 */
2729
2730 kern_return_t
2731 vm_map_enter_cpm(
2732 __unused vm_map_t map,
2733 __unused vm_map_offset_t *addr,
2734 __unused vm_map_size_t size,
2735 __unused int flags)
2736 {
2737 return KERN_FAILURE;
2738 }
2739 #endif /* VM_CPM */
2740
2741 /*
2742 * Clip and unnest a portion of a nested submap mapping.
2743 */
2744 static void
2745 vm_map_clip_unnest(
2746 vm_map_t map,
2747 vm_map_entry_t entry,
2748 vm_map_offset_t start_unnest,
2749 vm_map_offset_t end_unnest)
2750 {
2751 assert(entry->is_sub_map);
2752 assert(entry->object.sub_map != NULL);
2753
2754 if (entry->vme_start > start_unnest ||
2755 entry->vme_end < end_unnest) {
2756 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2757 "bad nested entry: start=0x%llx end=0x%llx\n",
2758 (long long)start_unnest, (long long)end_unnest,
2759 (long long)entry->vme_start, (long long)entry->vme_end);
2760 }
2761 if (start_unnest > entry->vme_start) {
2762 _vm_map_clip_start(&map->hdr,
2763 entry,
2764 start_unnest);
2765 UPDATE_FIRST_FREE(map, map->first_free);
2766 }
2767 if (entry->vme_end > end_unnest) {
2768 _vm_map_clip_end(&map->hdr,
2769 entry,
2770 end_unnest);
2771 UPDATE_FIRST_FREE(map, map->first_free);
2772 }
2773
2774 pmap_unnest(map->pmap,
2775 entry->vme_start,
2776 entry->vme_end - entry->vme_start);
2777 if ((map->mapped) && (map->ref_count)) {
2778 /* clean up parent map/maps */
2779 vm_map_submap_pmap_clean(
2780 map, entry->vme_start,
2781 entry->vme_end,
2782 entry->object.sub_map,
2783 entry->offset);
2784 }
2785 entry->use_pmap = FALSE;
2786 }
2787
2788 /*
2789 * vm_map_clip_start: [ internal use only ]
2790 *
2791 * Asserts that the given entry begins at or after
2792 * the specified address; if necessary,
2793 * it splits the entry into two.
2794 */
2795 static void
2796 vm_map_clip_start(
2797 vm_map_t map,
2798 vm_map_entry_t entry,
2799 vm_map_offset_t startaddr)
2800 {
2801 #ifndef NO_NESTED_PMAP
2802 if (entry->use_pmap &&
2803 startaddr >= entry->vme_start) {
2804 vm_map_offset_t start_unnest, end_unnest;
2805
2806 /*
2807 * Make sure "startaddr" is no longer in a nested range
2808 * before we clip. Unnest only the minimum range the platform
2809 * can handle.
2810 */
2811 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2812 end_unnest = start_unnest + pmap_nesting_size_min;
2813 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2814 }
2815 #endif /* NO_NESTED_PMAP */
2816 if (startaddr > entry->vme_start) {
2817 if (entry->object.vm_object &&
2818 !entry->is_sub_map &&
2819 entry->object.vm_object->phys_contiguous) {
2820 pmap_remove(map->pmap,
2821 (addr64_t)(entry->vme_start),
2822 (addr64_t)(entry->vme_end));
2823 }
2824 _vm_map_clip_start(&map->hdr, entry, startaddr);
2825 UPDATE_FIRST_FREE(map, map->first_free);
2826 }
2827 }
2828
2829
2830 #define vm_map_copy_clip_start(copy, entry, startaddr) \
2831 MACRO_BEGIN \
2832 if ((startaddr) > (entry)->vme_start) \
2833 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2834 MACRO_END
2835
2836 /*
2837 * This routine is called only when it is known that
2838 * the entry must be split.
2839 */
2840 static void
2841 _vm_map_clip_start(
2842 register struct vm_map_header *map_header,
2843 register vm_map_entry_t entry,
2844 register vm_map_offset_t start)
2845 {
2846 register vm_map_entry_t new_entry;
2847
2848 /*
2849 * Split off the front portion --
2850 * note that we must insert the new
2851 * entry BEFORE this one, so that
2852 * this entry has the specified starting
2853 * address.
2854 */
2855
2856 new_entry = _vm_map_entry_create(map_header);
2857 vm_map_entry_copy_full(new_entry, entry);
2858
2859 new_entry->vme_end = start;
2860 entry->offset += (start - entry->vme_start);
2861 entry->vme_start = start;
2862
2863 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2864
2865 if (entry->is_sub_map)
2866 vm_map_reference(new_entry->object.sub_map);
2867 else
2868 vm_object_reference(new_entry->object.vm_object);
2869 }
2870
2871
2872 /*
2873 * vm_map_clip_end: [ internal use only ]
2874 *
2875 * Asserts that the given entry ends at or before
2876 * the specified address; if necessary,
2877 * it splits the entry into two.
2878 */
2879 static void
2880 vm_map_clip_end(
2881 vm_map_t map,
2882 vm_map_entry_t entry,
2883 vm_map_offset_t endaddr)
2884 {
2885 if (endaddr > entry->vme_end) {
2886 /*
2887 * Within the scope of this clipping, limit "endaddr" to
2888 * the end of this map entry...
2889 */
2890 endaddr = entry->vme_end;
2891 }
2892 #ifndef NO_NESTED_PMAP
2893 if (entry->use_pmap) {
2894 vm_map_offset_t start_unnest, end_unnest;
2895
2896 /*
2897 * Make sure the range between the start of this entry and
2898 * the new "endaddr" is no longer nested before we clip.
2899 * Unnest only the minimum range the platform can handle.
2900 */
2901 start_unnest = entry->vme_start;
2902 end_unnest =
2903 (endaddr + pmap_nesting_size_min - 1) &
2904 ~(pmap_nesting_size_min - 1);
2905 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2906 }
2907 #endif /* NO_NESTED_PMAP */
2908 if (endaddr < entry->vme_end) {
2909 if (entry->object.vm_object &&
2910 !entry->is_sub_map &&
2911 entry->object.vm_object->phys_contiguous) {
2912 pmap_remove(map->pmap,
2913 (addr64_t)(entry->vme_start),
2914 (addr64_t)(entry->vme_end));
2915 }
2916 _vm_map_clip_end(&map->hdr, entry, endaddr);
2917 UPDATE_FIRST_FREE(map, map->first_free);
2918 }
2919 }
2920
2921
2922 #define vm_map_copy_clip_end(copy, entry, endaddr) \
2923 MACRO_BEGIN \
2924 if ((endaddr) < (entry)->vme_end) \
2925 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2926 MACRO_END
2927
2928 /*
2929 * This routine is called only when it is known that
2930 * the entry must be split.
2931 */
2932 static void
2933 _vm_map_clip_end(
2934 register struct vm_map_header *map_header,
2935 register vm_map_entry_t entry,
2936 register vm_map_offset_t end)
2937 {
2938 register vm_map_entry_t new_entry;
2939
2940 /*
2941 * Create a new entry and insert it
2942 * AFTER the specified entry
2943 */
2944
2945 new_entry = _vm_map_entry_create(map_header);
2946 vm_map_entry_copy_full(new_entry, entry);
2947
2948 new_entry->vme_start = entry->vme_end = end;
2949 new_entry->offset += (end - entry->vme_start);
2950
2951 _vm_map_entry_link(map_header, entry, new_entry);
2952
2953 if (entry->is_sub_map)
2954 vm_map_reference(new_entry->object.sub_map);
2955 else
2956 vm_object_reference(new_entry->object.vm_object);
2957 }
2958
2959
2960 /*
2961 * VM_MAP_RANGE_CHECK: [ internal use only ]
2962 *
2963 * Asserts that the starting and ending region
2964 * addresses fall within the valid range of the map.
2965 */
2966 #define VM_MAP_RANGE_CHECK(map, start, end) \
2967 MACRO_BEGIN \
2968 if (start < vm_map_min(map)) \
2969 start = vm_map_min(map); \
2970 if (end > vm_map_max(map)) \
2971 end = vm_map_max(map); \
2972 if (start > end) \
2973 start = end; \
2974 MACRO_END
2975
2976 /*
2977 * vm_map_range_check: [ internal use only ]
2978 *
2979 * Check that the region defined by the specified start and
2980 * end addresses are wholly contained within a single map
2981 * entry or set of adjacent map entries of the spacified map,
2982 * i.e. the specified region contains no unmapped space.
2983 * If any or all of the region is unmapped, FALSE is returned.
2984 * Otherwise, TRUE is returned and if the output argument 'entry'
2985 * is not NULL it points to the map entry containing the start
2986 * of the region.
2987 *
2988 * The map is locked for reading on entry and is left locked.
2989 */
2990 static boolean_t
2991 vm_map_range_check(
2992 register vm_map_t map,
2993 register vm_map_offset_t start,
2994 register vm_map_offset_t end,
2995 vm_map_entry_t *entry)
2996 {
2997 vm_map_entry_t cur;
2998 register vm_map_offset_t prev;
2999
3000 /*
3001 * Basic sanity checks first
3002 */
3003 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3004 return (FALSE);
3005
3006 /*
3007 * Check first if the region starts within a valid
3008 * mapping for the map.
3009 */
3010 if (!vm_map_lookup_entry(map, start, &cur))
3011 return (FALSE);
3012
3013 /*
3014 * Optimize for the case that the region is contained
3015 * in a single map entry.
3016 */
3017 if (entry != (vm_map_entry_t *) NULL)
3018 *entry = cur;
3019 if (end <= cur->vme_end)
3020 return (TRUE);
3021
3022 /*
3023 * If the region is not wholly contained within a
3024 * single entry, walk the entries looking for holes.
3025 */
3026 prev = cur->vme_end;
3027 cur = cur->vme_next;
3028 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3029 if (end <= cur->vme_end)
3030 return (TRUE);
3031 prev = cur->vme_end;
3032 cur = cur->vme_next;
3033 }
3034 return (FALSE);
3035 }
3036
3037 /*
3038 * vm_map_submap: [ kernel use only ]
3039 *
3040 * Mark the given range as handled by a subordinate map.
3041 *
3042 * This range must have been created with vm_map_find using
3043 * the vm_submap_object, and no other operations may have been
3044 * performed on this range prior to calling vm_map_submap.
3045 *
3046 * Only a limited number of operations can be performed
3047 * within this rage after calling vm_map_submap:
3048 * vm_fault
3049 * [Don't try vm_map_copyin!]
3050 *
3051 * To remove a submapping, one must first remove the
3052 * range from the superior map, and then destroy the
3053 * submap (if desired). [Better yet, don't try it.]
3054 */
3055 kern_return_t
3056 vm_map_submap(
3057 vm_map_t map,
3058 vm_map_offset_t start,
3059 vm_map_offset_t end,
3060 vm_map_t submap,
3061 vm_map_offset_t offset,
3062 #ifdef NO_NESTED_PMAP
3063 __unused
3064 #endif /* NO_NESTED_PMAP */
3065 boolean_t use_pmap)
3066 {
3067 vm_map_entry_t entry;
3068 register kern_return_t result = KERN_INVALID_ARGUMENT;
3069 register vm_object_t object;
3070
3071 vm_map_lock(map);
3072
3073 if (! vm_map_lookup_entry(map, start, &entry)) {
3074 entry = entry->vme_next;
3075 }
3076
3077 if (entry == vm_map_to_entry(map) ||
3078 entry->is_sub_map) {
3079 vm_map_unlock(map);
3080 return KERN_INVALID_ARGUMENT;
3081 }
3082
3083 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3084 vm_map_clip_start(map, entry, start);
3085 vm_map_clip_end(map, entry, end);
3086
3087 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3088 (!entry->is_sub_map) &&
3089 ((object = entry->object.vm_object) == vm_submap_object) &&
3090 (object->resident_page_count == 0) &&
3091 (object->copy == VM_OBJECT_NULL) &&
3092 (object->shadow == VM_OBJECT_NULL) &&
3093 (!object->pager_created)) {
3094 entry->offset = (vm_object_offset_t)offset;
3095 entry->object.vm_object = VM_OBJECT_NULL;
3096 vm_object_deallocate(object);
3097 entry->is_sub_map = TRUE;
3098 entry->object.sub_map = submap;
3099 vm_map_reference(submap);
3100 submap->mapped = TRUE;
3101
3102 #ifndef NO_NESTED_PMAP
3103 if (use_pmap) {
3104 /* nest if platform code will allow */
3105 if(submap->pmap == NULL) {
3106 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3107 if(submap->pmap == PMAP_NULL) {
3108 vm_map_unlock(map);
3109 return(KERN_NO_SPACE);
3110 }
3111 }
3112 result = pmap_nest(map->pmap,
3113 (entry->object.sub_map)->pmap,
3114 (addr64_t)start,
3115 (addr64_t)start,
3116 (uint64_t)(end - start));
3117 if(result)
3118 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3119 entry->use_pmap = TRUE;
3120 }
3121 #else /* NO_NESTED_PMAP */
3122 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3123 #endif /* NO_NESTED_PMAP */
3124 result = KERN_SUCCESS;
3125 }
3126 vm_map_unlock(map);
3127
3128 return(result);
3129 }
3130
3131 /*
3132 * vm_map_protect:
3133 *
3134 * Sets the protection of the specified address
3135 * region in the target map. If "set_max" is
3136 * specified, the maximum protection is to be set;
3137 * otherwise, only the current protection is affected.
3138 */
3139 kern_return_t
3140 vm_map_protect(
3141 register vm_map_t map,
3142 register vm_map_offset_t start,
3143 register vm_map_offset_t end,
3144 register vm_prot_t new_prot,
3145 register boolean_t set_max)
3146 {
3147 register vm_map_entry_t current;
3148 register vm_map_offset_t prev;
3149 vm_map_entry_t entry;
3150 vm_prot_t new_max;
3151
3152 XPR(XPR_VM_MAP,
3153 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3154 (integer_t)map, start, end, new_prot, set_max);
3155
3156 vm_map_lock(map);
3157
3158 if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) {
3159 vm_map_unlock(map);
3160 return(KERN_PROTECTION_FAILURE);
3161 }
3162
3163 /* LP64todo - remove this check when vm_map_commpage64()
3164 * no longer has to stuff in a map_entry for the commpage
3165 * above the map's max_offset.
3166 */
3167 if (start >= map->max_offset) {
3168 vm_map_unlock(map);
3169 return(KERN_INVALID_ADDRESS);
3170 }
3171
3172 /*
3173 * Lookup the entry. If it doesn't start in a valid
3174 * entry, return an error.
3175 */
3176 if (! vm_map_lookup_entry(map, start, &entry)) {
3177 vm_map_unlock(map);
3178 return(KERN_INVALID_ADDRESS);
3179 }
3180
3181 /*
3182 * Make a first pass to check for protection and address
3183 * violations.
3184 */
3185
3186 current = entry;
3187 prev = current->vme_start;
3188 while ((current != vm_map_to_entry(map)) &&
3189 (current->vme_start < end)) {
3190
3191 /*
3192 * If there is a hole, return an error.
3193 */
3194 if (current->vme_start != prev) {
3195 vm_map_unlock(map);
3196 return(KERN_INVALID_ADDRESS);
3197 }
3198
3199 new_max = current->max_protection;
3200 if(new_prot & VM_PROT_COPY) {
3201 new_max |= VM_PROT_WRITE;
3202 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3203 vm_map_unlock(map);
3204 return(KERN_PROTECTION_FAILURE);
3205 }
3206 } else {
3207 if ((new_prot & new_max) != new_prot) {
3208 vm_map_unlock(map);
3209 return(KERN_PROTECTION_FAILURE);
3210 }
3211 }
3212
3213 #if CONFIG_EMBEDDED
3214 if (new_prot & VM_PROT_WRITE) {
3215 if (new_prot & VM_PROT_EXECUTE) {
3216 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3217 new_prot &= ~VM_PROT_EXECUTE;
3218 }
3219 }
3220 #endif
3221
3222 prev = current->vme_end;
3223 current = current->vme_next;
3224 }
3225 if (end > prev) {
3226 vm_map_unlock(map);
3227 return(KERN_INVALID_ADDRESS);
3228 }
3229
3230 /*
3231 * Go back and fix up protections.
3232 * Clip to start here if the range starts within
3233 * the entry.
3234 */
3235
3236 current = entry;
3237 if (current != vm_map_to_entry(map)) {
3238 /* clip and unnest if necessary */
3239 vm_map_clip_start(map, current, start);
3240 }
3241
3242 while ((current != vm_map_to_entry(map)) &&
3243 (current->vme_start < end)) {
3244
3245 vm_prot_t old_prot;
3246
3247 vm_map_clip_end(map, current, end);
3248
3249 assert(!current->use_pmap); /* clipping did unnest if needed */
3250
3251 old_prot = current->protection;
3252
3253 if(new_prot & VM_PROT_COPY) {
3254 /* caller is asking specifically to copy the */
3255 /* mapped data, this implies that max protection */
3256 /* will include write. Caller must be prepared */
3257 /* for loss of shared memory communication in the */
3258 /* target area after taking this step */
3259 current->needs_copy = TRUE;
3260 current->max_protection |= VM_PROT_WRITE;
3261 }
3262
3263 if (set_max)
3264 current->protection =
3265 (current->max_protection =
3266 new_prot & ~VM_PROT_COPY) &
3267 old_prot;
3268 else
3269 current->protection = new_prot & ~VM_PROT_COPY;
3270
3271 /*
3272 * Update physical map if necessary.
3273 * If the request is to turn off write protection,
3274 * we won't do it for real (in pmap). This is because
3275 * it would cause copy-on-write to fail. We've already
3276 * set, the new protection in the map, so if a
3277 * write-protect fault occurred, it will be fixed up
3278 * properly, COW or not.
3279 */
3280 if (current->protection != old_prot) {
3281 /* Look one level in we support nested pmaps */
3282 /* from mapped submaps which are direct entries */
3283 /* in our map */
3284
3285 vm_prot_t prot;
3286
3287 prot = current->protection & ~VM_PROT_WRITE;
3288
3289 if (override_nx(map, current->alias) && prot)
3290 prot |= VM_PROT_EXECUTE;
3291
3292 if (current->is_sub_map && current->use_pmap) {
3293 pmap_protect(current->object.sub_map->pmap,
3294 current->vme_start,
3295 current->vme_end,
3296 prot);
3297 } else {
3298 pmap_protect(map->pmap,
3299 current->vme_start,
3300 current->vme_end,
3301 prot);
3302 }
3303 }
3304 current = current->vme_next;
3305 }
3306
3307 current = entry;
3308 while ((current != vm_map_to_entry(map)) &&
3309 (current->vme_start <= end)) {
3310 vm_map_simplify_entry(map, current);
3311 current = current->vme_next;
3312 }
3313
3314 vm_map_unlock(map);
3315 return(KERN_SUCCESS);
3316 }
3317
3318 /*
3319 * vm_map_inherit:
3320 *
3321 * Sets the inheritance of the specified address
3322 * range in the target map. Inheritance
3323 * affects how the map will be shared with
3324 * child maps at the time of vm_map_fork.
3325 */
3326 kern_return_t
3327 vm_map_inherit(
3328 register vm_map_t map,
3329 register vm_map_offset_t start,
3330 register vm_map_offset_t end,
3331 register vm_inherit_t new_inheritance)
3332 {
3333 register vm_map_entry_t entry;
3334 vm_map_entry_t temp_entry;
3335
3336 vm_map_lock(map);
3337
3338 VM_MAP_RANGE_CHECK(map, start, end);
3339
3340 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3341 entry = temp_entry;
3342 }
3343 else {
3344 temp_entry = temp_entry->vme_next;
3345 entry = temp_entry;
3346 }
3347
3348 /* first check entire range for submaps which can't support the */
3349 /* given inheritance. */
3350 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3351 if(entry->is_sub_map) {
3352 if(new_inheritance == VM_INHERIT_COPY) {
3353 vm_map_unlock(map);
3354 return(KERN_INVALID_ARGUMENT);
3355 }
3356 }
3357
3358 entry = entry->vme_next;
3359 }
3360
3361 entry = temp_entry;
3362 if (entry != vm_map_to_entry(map)) {
3363 /* clip and unnest if necessary */
3364 vm_map_clip_start(map, entry, start);
3365 }
3366
3367 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3368 vm_map_clip_end(map, entry, end);
3369 assert(!entry->use_pmap); /* clip did unnest if needed */
3370
3371 entry->inheritance = new_inheritance;
3372
3373 entry = entry->vme_next;
3374 }
3375
3376 vm_map_unlock(map);
3377 return(KERN_SUCCESS);
3378 }
3379
3380 /*
3381 * Update the accounting for the amount of wired memory in this map. If the user has
3382 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3383 */
3384
3385 static kern_return_t
3386 add_wire_counts(
3387 vm_map_t map,
3388 vm_map_entry_t entry,
3389 boolean_t user_wire)
3390 {
3391 vm_map_size_t size;
3392
3393 if (user_wire) {
3394
3395 /*
3396 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3397 * this map entry.
3398 */
3399
3400 if (entry->user_wired_count == 0) {
3401 size = entry->vme_end - entry->vme_start;
3402
3403 /*
3404 * Since this is the first time the user is wiring this map entry, check to see if we're
3405 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3406 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3407 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3408 * limit, then we fail.
3409 */
3410
3411 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3412 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit)
3413 return KERN_RESOURCE_SHORTAGE;
3414
3415 /*
3416 * The first time the user wires an entry, we also increment the wired_count and add this to
3417 * the total that has been wired in the map.
3418 */
3419
3420 if (entry->wired_count >= MAX_WIRE_COUNT)
3421 return KERN_FAILURE;
3422
3423 entry->wired_count++;
3424 map->user_wire_size += size;
3425 }
3426
3427 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3428 return KERN_FAILURE;
3429
3430 entry->user_wired_count++;
3431
3432 } else {
3433
3434 /*
3435 * The kernel's wiring the memory. Just bump the count and continue.
3436 */
3437
3438 if (entry->wired_count >= MAX_WIRE_COUNT)
3439 panic("vm_map_wire: too many wirings");
3440
3441 entry->wired_count++;
3442 }
3443
3444 return KERN_SUCCESS;
3445 }
3446
3447 /*
3448 * Update the memory wiring accounting now that the given map entry is being unwired.
3449 */
3450
3451 static void
3452 subtract_wire_counts(
3453 vm_map_t map,
3454 vm_map_entry_t entry,
3455 boolean_t user_wire)
3456 {
3457
3458 if (user_wire) {
3459
3460 /*
3461 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3462 */
3463
3464 if (entry->user_wired_count == 1) {
3465
3466 /*
3467 * We're removing the last user wire reference. Decrement the wired_count and the total
3468 * user wired memory for this map.
3469 */
3470
3471 assert(entry->wired_count >= 1);
3472 entry->wired_count--;
3473 map->user_wire_size -= entry->vme_end - entry->vme_start;
3474 }
3475
3476 assert(entry->user_wired_count >= 1);
3477 entry->user_wired_count--;
3478
3479 } else {
3480
3481 /*
3482 * The kernel is unwiring the memory. Just update the count.
3483 */
3484
3485 assert(entry->wired_count >= 1);
3486 entry->wired_count--;
3487 }
3488 }
3489
3490 /*
3491 * vm_map_wire:
3492 *
3493 * Sets the pageability of the specified address range in the
3494 * target map as wired. Regions specified as not pageable require
3495 * locked-down physical memory and physical page maps. The
3496 * access_type variable indicates types of accesses that must not
3497 * generate page faults. This is checked against protection of
3498 * memory being locked-down.
3499 *
3500 * The map must not be locked, but a reference must remain to the
3501 * map throughout the call.
3502 */
3503 static kern_return_t
3504 vm_map_wire_nested(
3505 register vm_map_t map,
3506 register vm_map_offset_t start,
3507 register vm_map_offset_t end,
3508 register vm_prot_t access_type,
3509 boolean_t user_wire,
3510 pmap_t map_pmap,
3511 vm_map_offset_t pmap_addr)
3512 {
3513 register vm_map_entry_t entry;
3514 struct vm_map_entry *first_entry, tmp_entry;
3515 vm_map_t real_map;
3516 register vm_map_offset_t s,e;
3517 kern_return_t rc;
3518 boolean_t need_wakeup;
3519 boolean_t main_map = FALSE;
3520 wait_interrupt_t interruptible_state;
3521 thread_t cur_thread;
3522 unsigned int last_timestamp;
3523 vm_map_size_t size;
3524
3525 vm_map_lock(map);
3526 if(map_pmap == NULL)
3527 main_map = TRUE;
3528 last_timestamp = map->timestamp;
3529
3530 VM_MAP_RANGE_CHECK(map, start, end);
3531 assert(page_aligned(start));
3532 assert(page_aligned(end));
3533 if (start == end) {
3534 /* We wired what the caller asked for, zero pages */
3535 vm_map_unlock(map);
3536 return KERN_SUCCESS;
3537 }
3538
3539 need_wakeup = FALSE;
3540 cur_thread = current_thread();
3541
3542 s = start;
3543 rc = KERN_SUCCESS;
3544
3545 if (vm_map_lookup_entry(map, s, &first_entry)) {
3546 entry = first_entry;
3547 /*
3548 * vm_map_clip_start will be done later.
3549 * We don't want to unnest any nested submaps here !
3550 */
3551 } else {
3552 /* Start address is not in map */
3553 rc = KERN_INVALID_ADDRESS;
3554 goto done;
3555 }
3556
3557 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3558 /*
3559 * At this point, we have wired from "start" to "s".
3560 * We still need to wire from "s" to "end".
3561 *
3562 * "entry" hasn't been clipped, so it could start before "s"
3563 * and/or end after "end".
3564 */
3565
3566 /* "e" is how far we want to wire in this entry */
3567 e = entry->vme_end;
3568 if (e > end)
3569 e = end;
3570
3571 /*
3572 * If another thread is wiring/unwiring this entry then
3573 * block after informing other thread to wake us up.
3574 */
3575 if (entry->in_transition) {
3576 wait_result_t wait_result;
3577
3578 /*
3579 * We have not clipped the entry. Make sure that
3580 * the start address is in range so that the lookup
3581 * below will succeed.
3582 * "s" is the current starting point: we've already
3583 * wired from "start" to "s" and we still have
3584 * to wire from "s" to "end".
3585 */
3586
3587 entry->needs_wakeup = TRUE;
3588
3589 /*
3590 * wake up anybody waiting on entries that we have
3591 * already wired.
3592 */
3593 if (need_wakeup) {
3594 vm_map_entry_wakeup(map);
3595 need_wakeup = FALSE;
3596 }
3597 /*
3598 * User wiring is interruptible
3599 */
3600 wait_result = vm_map_entry_wait(map,
3601 (user_wire) ? THREAD_ABORTSAFE :
3602 THREAD_UNINT);
3603 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3604 /*
3605 * undo the wirings we have done so far
3606 * We do not clear the needs_wakeup flag,
3607 * because we cannot tell if we were the
3608 * only one waiting.
3609 */
3610 rc = KERN_FAILURE;
3611 goto done;
3612 }
3613
3614 /*
3615 * Cannot avoid a lookup here. reset timestamp.
3616 */
3617 last_timestamp = map->timestamp;
3618
3619 /*
3620 * The entry could have been clipped, look it up again.
3621 * Worse that can happen is, it may not exist anymore.
3622 */
3623 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3624 if (!user_wire)
3625 panic("vm_map_wire: re-lookup failed");
3626
3627 /*
3628 * User: undo everything upto the previous
3629 * entry. let vm_map_unwire worry about
3630 * checking the validity of the range.
3631 */
3632 rc = KERN_FAILURE;
3633 goto done;
3634 }
3635 entry = first_entry;
3636 continue;
3637 }
3638
3639 if (entry->is_sub_map) {
3640 vm_map_offset_t sub_start;
3641 vm_map_offset_t sub_end;
3642 vm_map_offset_t local_start;
3643 vm_map_offset_t local_end;
3644 pmap_t pmap;
3645
3646 vm_map_clip_start(map, entry, s);
3647 vm_map_clip_end(map, entry, end);
3648
3649 sub_start = entry->offset;
3650 sub_end = entry->vme_end;
3651 sub_end += entry->offset - entry->vme_start;
3652
3653 local_end = entry->vme_end;
3654 if(map_pmap == NULL) {
3655 vm_object_t object;
3656 vm_object_offset_t offset;
3657 vm_prot_t prot;
3658 boolean_t wired;
3659 vm_map_entry_t local_entry;
3660 vm_map_version_t version;
3661 vm_map_t lookup_map;
3662
3663 if(entry->use_pmap) {
3664 pmap = entry->object.sub_map->pmap;
3665 /* ppc implementation requires that */
3666 /* submaps pmap address ranges line */
3667 /* up with parent map */
3668 #ifdef notdef
3669 pmap_addr = sub_start;
3670 #endif
3671 pmap_addr = s;
3672 } else {
3673 pmap = map->pmap;
3674 pmap_addr = s;
3675 }
3676
3677 if (entry->wired_count) {
3678 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3679 goto done;
3680
3681 /*
3682 * The map was not unlocked:
3683 * no need to goto re-lookup.
3684 * Just go directly to next entry.
3685 */
3686 entry = entry->vme_next;
3687 s = entry->vme_start;
3688 continue;
3689
3690 }
3691
3692 /* call vm_map_lookup_locked to */
3693 /* cause any needs copy to be */
3694 /* evaluated */
3695 local_start = entry->vme_start;
3696 lookup_map = map;
3697 vm_map_lock_write_to_read(map);
3698 if(vm_map_lookup_locked(
3699 &lookup_map, local_start,
3700 access_type,
3701 OBJECT_LOCK_EXCLUSIVE,
3702 &version, &object,
3703 &offset, &prot, &wired,
3704 NULL,
3705 &real_map)) {
3706
3707 vm_map_unlock_read(lookup_map);
3708 vm_map_unwire(map, start,
3709 s, user_wire);
3710 return(KERN_FAILURE);
3711 }
3712 if(real_map != lookup_map)
3713 vm_map_unlock(real_map);
3714 vm_map_unlock_read(lookup_map);
3715 vm_map_lock(map);
3716 vm_object_unlock(object);
3717
3718 /* we unlocked, so must re-lookup */
3719 if (!vm_map_lookup_entry(map,
3720 local_start,
3721 &local_entry)) {
3722 rc = KERN_FAILURE;
3723 goto done;
3724 }
3725
3726 /*
3727 * entry could have been "simplified",
3728 * so re-clip
3729 */
3730 entry = local_entry;
3731 assert(s == local_start);
3732 vm_map_clip_start(map, entry, s);
3733 vm_map_clip_end(map, entry, end);
3734 /* re-compute "e" */
3735 e = entry->vme_end;
3736 if (e > end)
3737 e = end;
3738
3739 /* did we have a change of type? */
3740 if (!entry->is_sub_map) {
3741 last_timestamp = map->timestamp;
3742 continue;
3743 }
3744 } else {
3745 local_start = entry->vme_start;
3746 pmap = map_pmap;
3747 }
3748
3749 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3750 goto done;
3751
3752 entry->in_transition = TRUE;
3753
3754 vm_map_unlock(map);
3755 rc = vm_map_wire_nested(entry->object.sub_map,
3756 sub_start, sub_end,
3757 access_type,
3758 user_wire, pmap, pmap_addr);
3759 vm_map_lock(map);
3760
3761 /*
3762 * Find the entry again. It could have been clipped
3763 * after we unlocked the map.
3764 */
3765 if (!vm_map_lookup_entry(map, local_start,
3766 &first_entry))
3767 panic("vm_map_wire: re-lookup failed");
3768 entry = first_entry;
3769
3770 assert(local_start == s);
3771 /* re-compute "e" */
3772 e = entry->vme_end;
3773 if (e > end)
3774 e = end;
3775
3776 last_timestamp = map->timestamp;
3777 while ((entry != vm_map_to_entry(map)) &&
3778 (entry->vme_start < e)) {
3779 assert(entry->in_transition);
3780 entry->in_transition = FALSE;
3781 if (entry->needs_wakeup) {
3782 entry->needs_wakeup = FALSE;
3783 need_wakeup = TRUE;
3784 }
3785 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3786 subtract_wire_counts(map, entry, user_wire);
3787 }
3788 entry = entry->vme_next;
3789 }
3790 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3791 goto done;
3792 }
3793
3794 /* no need to relookup again */
3795 s = entry->vme_start;
3796 continue;
3797 }
3798
3799 /*
3800 * If this entry is already wired then increment
3801 * the appropriate wire reference count.
3802 */
3803 if (entry->wired_count) {
3804 /*
3805 * entry is already wired down, get our reference
3806 * after clipping to our range.
3807 */
3808 vm_map_clip_start(map, entry, s);
3809 vm_map_clip_end(map, entry, end);
3810
3811 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3812 goto done;
3813
3814 /* map was not unlocked: no need to relookup */
3815 entry = entry->vme_next;
3816 s = entry->vme_start;
3817 continue;
3818 }
3819
3820 /*
3821 * Unwired entry or wire request transmitted via submap
3822 */
3823
3824
3825 /*
3826 * Perform actions of vm_map_lookup that need the write
3827 * lock on the map: create a shadow object for a
3828 * copy-on-write region, or an object for a zero-fill
3829 * region.
3830 */
3831 size = entry->vme_end - entry->vme_start;
3832 /*
3833 * If wiring a copy-on-write page, we need to copy it now
3834 * even if we're only (currently) requesting read access.
3835 * This is aggressive, but once it's wired we can't move it.
3836 */
3837 if (entry->needs_copy) {
3838 vm_object_shadow(&entry->object.vm_object,
3839 &entry->offset, size);
3840 entry->needs_copy = FALSE;
3841 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
3842 entry->object.vm_object = vm_object_allocate(size);
3843 entry->offset = (vm_object_offset_t)0;
3844 }
3845
3846 vm_map_clip_start(map, entry, s);
3847 vm_map_clip_end(map, entry, end);
3848
3849 /* re-compute "e" */
3850 e = entry->vme_end;
3851 if (e > end)
3852 e = end;
3853
3854 /*
3855 * Check for holes and protection mismatch.
3856 * Holes: Next entry should be contiguous unless this
3857 * is the end of the region.
3858 * Protection: Access requested must be allowed, unless
3859 * wiring is by protection class
3860 */
3861 if ((entry->vme_end < end) &&
3862 ((entry->vme_next == vm_map_to_entry(map)) ||
3863 (entry->vme_next->vme_start > entry->vme_end))) {
3864 /* found a hole */
3865 rc = KERN_INVALID_ADDRESS;
3866 goto done;
3867 }
3868 if ((entry->protection & access_type) != access_type) {
3869 /* found a protection problem */
3870 rc = KERN_PROTECTION_FAILURE;
3871 goto done;
3872 }
3873
3874 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3875
3876 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3877 goto done;
3878
3879 entry->in_transition = TRUE;
3880
3881 /*
3882 * This entry might get split once we unlock the map.
3883 * In vm_fault_wire(), we need the current range as
3884 * defined by this entry. In order for this to work
3885 * along with a simultaneous clip operation, we make a
3886 * temporary copy of this entry and use that for the
3887 * wiring. Note that the underlying objects do not
3888 * change during a clip.
3889 */
3890 tmp_entry = *entry;
3891
3892 /*
3893 * The in_transition state guarentees that the entry
3894 * (or entries for this range, if split occured) will be
3895 * there when the map lock is acquired for the second time.
3896 */
3897 vm_map_unlock(map);
3898
3899 if (!user_wire && cur_thread != THREAD_NULL)
3900 interruptible_state = thread_interrupt_level(THREAD_UNINT);
3901 else
3902 interruptible_state = THREAD_UNINT;
3903
3904 if(map_pmap)
3905 rc = vm_fault_wire(map,
3906 &tmp_entry, map_pmap, pmap_addr);
3907 else
3908 rc = vm_fault_wire(map,
3909 &tmp_entry, map->pmap,
3910 tmp_entry.vme_start);
3911
3912 if (!user_wire && cur_thread != THREAD_NULL)
3913 thread_interrupt_level(interruptible_state);
3914
3915 vm_map_lock(map);
3916
3917 if (last_timestamp+1 != map->timestamp) {
3918 /*
3919 * Find the entry again. It could have been clipped
3920 * after we unlocked the map.
3921 */
3922 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3923 &first_entry))
3924 panic("vm_map_wire: re-lookup failed");
3925
3926 entry = first_entry;
3927 }
3928
3929 last_timestamp = map->timestamp;
3930
3931 while ((entry != vm_map_to_entry(map)) &&
3932 (entry->vme_start < tmp_entry.vme_end)) {
3933 assert(entry->in_transition);
3934 entry->in_transition = FALSE;
3935 if (entry->needs_wakeup) {
3936 entry->needs_wakeup = FALSE;
3937 need_wakeup = TRUE;
3938 }
3939 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3940 subtract_wire_counts(map, entry, user_wire);
3941 }
3942 entry = entry->vme_next;
3943 }
3944
3945 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
3946 goto done;
3947 }
3948
3949 s = entry->vme_start;
3950 } /* end while loop through map entries */
3951
3952 done:
3953 if (rc == KERN_SUCCESS) {
3954 /* repair any damage we may have made to the VM map */
3955 vm_map_simplify_range(map, start, end);
3956 }
3957
3958 vm_map_unlock(map);
3959
3960 /*
3961 * wake up anybody waiting on entries we wired.
3962 */
3963 if (need_wakeup)
3964 vm_map_entry_wakeup(map);
3965
3966 if (rc != KERN_SUCCESS) {
3967 /* undo what has been wired so far */
3968 vm_map_unwire(map, start, s, user_wire);
3969 }
3970
3971 return rc;
3972
3973 }
3974
3975 kern_return_t
3976 vm_map_wire(
3977 register vm_map_t map,
3978 register vm_map_offset_t start,
3979 register vm_map_offset_t end,
3980 register vm_prot_t access_type,
3981 boolean_t user_wire)
3982 {
3983
3984 kern_return_t kret;
3985
3986 #ifdef ppc
3987 /*
3988 * the calls to mapping_prealloc and mapping_relpre
3989 * (along with the VM_MAP_RANGE_CHECK to insure a
3990 * resonable range was passed in) are
3991 * currently necessary because
3992 * we haven't enabled kernel pre-emption
3993 * and/or the pmap_enter cannot purge and re-use
3994 * existing mappings
3995 */
3996 VM_MAP_RANGE_CHECK(map, start, end);
3997 mapping_prealloc(end - start);
3998 #endif
3999 kret = vm_map_wire_nested(map, start, end, access_type,
4000 user_wire, (pmap_t)NULL, 0);
4001 #ifdef ppc
4002 mapping_relpre();
4003 #endif
4004 return kret;
4005 }
4006
4007 /*
4008 * vm_map_unwire:
4009 *
4010 * Sets the pageability of the specified address range in the target
4011 * as pageable. Regions specified must have been wired previously.
4012 *
4013 * The map must not be locked, but a reference must remain to the map
4014 * throughout the call.
4015 *
4016 * Kernel will panic on failures. User unwire ignores holes and
4017 * unwired and intransition entries to avoid losing memory by leaving
4018 * it unwired.
4019 */
4020 static kern_return_t
4021 vm_map_unwire_nested(
4022 register vm_map_t map,
4023 register vm_map_offset_t start,
4024 register vm_map_offset_t end,
4025 boolean_t user_wire,
4026 pmap_t map_pmap,
4027 vm_map_offset_t pmap_addr)
4028 {
4029 register vm_map_entry_t entry;
4030 struct vm_map_entry *first_entry, tmp_entry;
4031 boolean_t need_wakeup;
4032 boolean_t main_map = FALSE;
4033 unsigned int last_timestamp;
4034
4035 vm_map_lock(map);
4036 if(map_pmap == NULL)
4037 main_map = TRUE;
4038 last_timestamp = map->timestamp;
4039
4040 VM_MAP_RANGE_CHECK(map, start, end);
4041 assert(page_aligned(start));
4042 assert(page_aligned(end));
4043
4044 if (start == end) {
4045 /* We unwired what the caller asked for: zero pages */
4046 vm_map_unlock(map);
4047 return KERN_SUCCESS;
4048 }
4049
4050 if (vm_map_lookup_entry(map, start, &first_entry)) {
4051 entry = first_entry;
4052 /*
4053 * vm_map_clip_start will be done later.
4054 * We don't want to unnest any nested sub maps here !
4055 */
4056 }
4057 else {
4058 if (!user_wire) {
4059 panic("vm_map_unwire: start not found");
4060 }
4061 /* Start address is not in map. */
4062 vm_map_unlock(map);
4063 return(KERN_INVALID_ADDRESS);
4064 }
4065
4066 need_wakeup = FALSE;
4067 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4068 if (entry->in_transition) {
4069 /*
4070 * 1)
4071 * Another thread is wiring down this entry. Note
4072 * that if it is not for the other thread we would
4073 * be unwiring an unwired entry. This is not
4074 * permitted. If we wait, we will be unwiring memory
4075 * we did not wire.
4076 *
4077 * 2)
4078 * Another thread is unwiring this entry. We did not
4079 * have a reference to it, because if we did, this
4080 * entry will not be getting unwired now.
4081 */
4082 if (!user_wire) {
4083 /*
4084 * XXX FBDP
4085 * This could happen: there could be some
4086 * overlapping vslock/vsunlock operations
4087 * going on.
4088 * We should probably just wait and retry,
4089 * but then we have to be careful that this
4090 * entry could get "simplified" after
4091 * "in_transition" gets unset and before
4092 * we re-lookup the entry, so we would
4093 * have to re-clip the entry to avoid
4094 * re-unwiring what we have already unwired...
4095 * See vm_map_wire_nested().
4096 *
4097 * Or we could just ignore "in_transition"
4098 * here and proceed to decement the wired
4099 * count(s) on this entry. That should be fine
4100 * as long as "wired_count" doesn't drop all
4101 * the way to 0 (and we should panic if THAT
4102 * happens).
4103 */
4104 panic("vm_map_unwire: in_transition entry");
4105 }
4106
4107 entry = entry->vme_next;
4108 continue;
4109 }
4110
4111 if (entry->is_sub_map) {
4112 vm_map_offset_t sub_start;
4113 vm_map_offset_t sub_end;
4114 vm_map_offset_t local_end;
4115 pmap_t pmap;
4116
4117 vm_map_clip_start(map, entry, start);
4118 vm_map_clip_end(map, entry, end);
4119
4120 sub_start = entry->offset;
4121 sub_end = entry->vme_end - entry->vme_start;
4122 sub_end += entry->offset;
4123 local_end = entry->vme_end;
4124 if(map_pmap == NULL) {
4125 if(entry->use_pmap) {
4126 pmap = entry->object.sub_map->pmap;
4127 pmap_addr = sub_start;
4128 } else {
4129 pmap = map->pmap;
4130 pmap_addr = start;
4131 }
4132 if (entry->wired_count == 0 ||
4133 (user_wire && entry->user_wired_count == 0)) {
4134 if (!user_wire)
4135 panic("vm_map_unwire: entry is unwired");
4136 entry = entry->vme_next;
4137 continue;
4138 }
4139
4140 /*
4141 * Check for holes
4142 * Holes: Next entry should be contiguous unless
4143 * this is the end of the region.
4144 */
4145 if (((entry->vme_end < end) &&
4146 ((entry->vme_next == vm_map_to_entry(map)) ||
4147 (entry->vme_next->vme_start
4148 > entry->vme_end)))) {
4149 if (!user_wire)
4150 panic("vm_map_unwire: non-contiguous region");
4151 /*
4152 entry = entry->vme_next;
4153 continue;
4154 */
4155 }
4156
4157 subtract_wire_counts(map, entry, user_wire);
4158
4159 if (entry->wired_count != 0) {
4160 entry = entry->vme_next;
4161 continue;
4162 }
4163
4164 entry->in_transition = TRUE;
4165 tmp_entry = *entry;/* see comment in vm_map_wire() */
4166
4167 /*
4168 * We can unlock the map now. The in_transition state
4169 * guarantees existance of the entry.
4170 */
4171 vm_map_unlock(map);
4172 vm_map_unwire_nested(entry->object.sub_map,
4173 sub_start, sub_end, user_wire, pmap, pmap_addr);
4174 vm_map_lock(map);
4175
4176 if (last_timestamp+1 != map->timestamp) {
4177 /*
4178 * Find the entry again. It could have been
4179 * clipped or deleted after we unlocked the map.
4180 */
4181 if (!vm_map_lookup_entry(map,
4182 tmp_entry.vme_start,
4183 &first_entry)) {
4184 if (!user_wire)
4185 panic("vm_map_unwire: re-lookup failed");
4186 entry = first_entry->vme_next;
4187 } else
4188 entry = first_entry;
4189 }
4190 last_timestamp = map->timestamp;
4191
4192 /*
4193 * clear transition bit for all constituent entries
4194 * that were in the original entry (saved in
4195 * tmp_entry). Also check for waiters.
4196 */
4197 while ((entry != vm_map_to_entry(map)) &&
4198 (entry->vme_start < tmp_entry.vme_end)) {
4199 assert(entry->in_transition);
4200 entry->in_transition = FALSE;
4201 if (entry->needs_wakeup) {
4202 entry->needs_wakeup = FALSE;
4203 need_wakeup = TRUE;
4204 }
4205 entry = entry->vme_next;
4206 }
4207 continue;
4208 } else {
4209 vm_map_unlock(map);
4210 vm_map_unwire_nested(entry->object.sub_map,
4211 sub_start, sub_end, user_wire, map_pmap,
4212 pmap_addr);
4213 vm_map_lock(map);
4214
4215 if (last_timestamp+1 != map->timestamp) {
4216 /*
4217 * Find the entry again. It could have been
4218 * clipped or deleted after we unlocked the map.
4219 */
4220 if (!vm_map_lookup_entry(map,
4221 tmp_entry.vme_start,
4222 &first_entry)) {
4223 if (!user_wire)
4224 panic("vm_map_unwire: re-lookup failed");
4225 entry = first_entry->vme_next;
4226 } else
4227 entry = first_entry;
4228 }
4229 last_timestamp = map->timestamp;
4230 }
4231 }
4232
4233
4234 if ((entry->wired_count == 0) ||
4235 (user_wire && entry->user_wired_count == 0)) {
4236 if (!user_wire)
4237 panic("vm_map_unwire: entry is unwired");
4238
4239 entry = entry->vme_next;
4240 continue;
4241 }
4242
4243 assert(entry->wired_count > 0 &&
4244 (!user_wire || entry->user_wired_count > 0));
4245
4246 vm_map_clip_start(map, entry, start);
4247 vm_map_clip_end(map, entry, end);
4248
4249 /*
4250 * Check for holes
4251 * Holes: Next entry should be contiguous unless
4252 * this is the end of the region.
4253 */
4254 if (((entry->vme_end < end) &&
4255 ((entry->vme_next == vm_map_to_entry(map)) ||
4256 (entry->vme_next->vme_start > entry->vme_end)))) {
4257
4258 if (!user_wire)
4259 panic("vm_map_unwire: non-contiguous region");
4260 entry = entry->vme_next;
4261 continue;
4262 }
4263
4264 subtract_wire_counts(map, entry, user_wire);
4265
4266 if (entry->wired_count != 0) {
4267 entry = entry->vme_next;
4268 continue;
4269 }
4270
4271 entry->in_transition = TRUE;
4272 tmp_entry = *entry; /* see comment in vm_map_wire() */
4273
4274 /*
4275 * We can unlock the map now. The in_transition state
4276 * guarantees existance of the entry.
4277 */
4278 vm_map_unlock(map);
4279 if(map_pmap) {
4280 vm_fault_unwire(map,
4281 &tmp_entry, FALSE, map_pmap, pmap_addr);
4282 } else {
4283 vm_fault_unwire(map,
4284 &tmp_entry, FALSE, map->pmap,
4285 tmp_entry.vme_start);
4286 }
4287 vm_map_lock(map);
4288
4289 if (last_timestamp+1 != map->timestamp) {
4290 /*
4291 * Find the entry again. It could have been clipped
4292 * or deleted after we unlocked the map.
4293 */
4294 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4295 &first_entry)) {
4296 if (!user_wire)
4297 panic("vm_map_unwire: re-lookup failed");
4298 entry = first_entry->vme_next;
4299 } else
4300 entry = first_entry;
4301 }
4302 last_timestamp = map->timestamp;
4303
4304 /*
4305 * clear transition bit for all constituent entries that
4306 * were in the original entry (saved in tmp_entry). Also
4307 * check for waiters.
4308 */
4309 while ((entry != vm_map_to_entry(map)) &&
4310 (entry->vme_start < tmp_entry.vme_end)) {
4311 assert(entry->in_transition);
4312 entry->in_transition = FALSE;
4313 if (entry->needs_wakeup) {
4314 entry->needs_wakeup = FALSE;
4315 need_wakeup = TRUE;
4316 }
4317 entry = entry->vme_next;
4318 }
4319 }
4320
4321 /*
4322 * We might have fragmented the address space when we wired this
4323 * range of addresses. Attempt to re-coalesce these VM map entries
4324 * with their neighbors now that they're no longer wired.
4325 * Under some circumstances, address space fragmentation can
4326 * prevent VM object shadow chain collapsing, which can cause
4327 * swap space leaks.
4328 */
4329 vm_map_simplify_range(map, start, end);
4330
4331 vm_map_unlock(map);
4332 /*
4333 * wake up anybody waiting on entries that we have unwired.
4334 */
4335 if (need_wakeup)
4336 vm_map_entry_wakeup(map);
4337 return(KERN_SUCCESS);
4338
4339 }
4340
4341 kern_return_t
4342 vm_map_unwire(
4343 register vm_map_t map,
4344 register vm_map_offset_t start,
4345 register vm_map_offset_t end,
4346 boolean_t user_wire)
4347 {
4348 return vm_map_unwire_nested(map, start, end,
4349 user_wire, (pmap_t)NULL, 0);
4350 }
4351
4352
4353 /*
4354 * vm_map_entry_delete: [ internal use only ]
4355 *
4356 * Deallocate the given entry from the target map.
4357 */
4358 static void
4359 vm_map_entry_delete(
4360 register vm_map_t map,
4361 register vm_map_entry_t entry)
4362 {
4363 register vm_map_offset_t s, e;
4364 register vm_object_t object;
4365 register vm_map_t submap;
4366
4367 s = entry->vme_start;
4368 e = entry->vme_end;
4369 assert(page_aligned(s));
4370 assert(page_aligned(e));
4371 assert(entry->wired_count == 0);
4372 assert(entry->user_wired_count == 0);
4373
4374 if (entry->is_sub_map) {
4375 object = NULL;
4376 submap = entry->object.sub_map;
4377 } else {
4378 submap = NULL;
4379 object = entry->object.vm_object;
4380 }
4381
4382 vm_map_entry_unlink(map, entry);
4383 map->size -= e - s;
4384
4385 vm_map_entry_dispose(map, entry);
4386
4387 vm_map_unlock(map);
4388 /*
4389 * Deallocate the object only after removing all
4390 * pmap entries pointing to its pages.
4391 */
4392 if (submap)
4393 vm_map_deallocate(submap);
4394 else
4395 vm_object_deallocate(object);
4396
4397 }
4398
4399 void
4400 vm_map_submap_pmap_clean(
4401 vm_map_t map,
4402 vm_map_offset_t start,
4403 vm_map_offset_t end,
4404 vm_map_t sub_map,
4405 vm_map_offset_t offset)
4406 {
4407 vm_map_offset_t submap_start;
4408 vm_map_offset_t submap_end;
4409 vm_map_size_t remove_size;
4410 vm_map_entry_t entry;
4411
4412 submap_end = offset + (end - start);
4413 submap_start = offset;
4414 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4415
4416 remove_size = (entry->vme_end - entry->vme_start);
4417 if(offset > entry->vme_start)
4418 remove_size -= offset - entry->vme_start;
4419
4420
4421 if(submap_end < entry->vme_end) {
4422 remove_size -=
4423 entry->vme_end - submap_end;
4424 }
4425 if(entry->is_sub_map) {
4426 vm_map_submap_pmap_clean(
4427 sub_map,
4428 start,
4429 start + remove_size,
4430 entry->object.sub_map,
4431 entry->offset);
4432 } else {
4433
4434 if((map->mapped) && (map->ref_count)
4435 && (entry->object.vm_object != NULL)) {
4436 vm_object_pmap_protect(
4437 entry->object.vm_object,
4438 entry->offset,
4439 remove_size,
4440 PMAP_NULL,
4441 entry->vme_start,
4442 VM_PROT_NONE);
4443 } else {
4444 pmap_remove(map->pmap,
4445 (addr64_t)start,
4446 (addr64_t)(start + remove_size));
4447 }
4448 }
4449 }
4450
4451 entry = entry->vme_next;
4452
4453 while((entry != vm_map_to_entry(sub_map))
4454 && (entry->vme_start < submap_end)) {
4455 remove_size = (entry->vme_end - entry->vme_start);
4456 if(submap_end < entry->vme_end) {
4457 remove_size -= entry->vme_end - submap_end;
4458 }
4459 if(entry->is_sub_map) {
4460 vm_map_submap_pmap_clean(
4461 sub_map,
4462 (start + entry->vme_start) - offset,
4463 ((start + entry->vme_start) - offset) + remove_size,
4464 entry->object.sub_map,
4465 entry->offset);
4466 } else {
4467 if((map->mapped) && (map->ref_count)
4468 && (entry->object.vm_object != NULL)) {
4469 vm_object_pmap_protect(
4470 entry->object.vm_object,
4471 entry->offset,
4472 remove_size,
4473 PMAP_NULL,
4474 entry->vme_start,
4475 VM_PROT_NONE);
4476 } else {
4477 pmap_remove(map->pmap,
4478 (addr64_t)((start + entry->vme_start)
4479 - offset),
4480 (addr64_t)(((start + entry->vme_start)
4481 - offset) + remove_size));
4482 }
4483 }
4484 entry = entry->vme_next;
4485 }
4486 return;
4487 }
4488
4489 /*
4490 * vm_map_delete: [ internal use only ]
4491 *
4492 * Deallocates the given address range from the target map.
4493 * Removes all user wirings. Unwires one kernel wiring if
4494 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4495 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4496 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4497 *
4498 * This routine is called with map locked and leaves map locked.
4499 */
4500 static kern_return_t
4501 vm_map_delete(
4502 vm_map_t map,
4503 vm_map_offset_t start,
4504 vm_map_offset_t end,
4505 int flags,
4506 vm_map_t zap_map)
4507 {
4508 vm_map_entry_t entry, next;
4509 struct vm_map_entry *first_entry, tmp_entry;
4510 register vm_map_offset_t s;
4511 register vm_object_t object;
4512 boolean_t need_wakeup;
4513 unsigned int last_timestamp = ~0; /* unlikely value */
4514 int interruptible;
4515
4516 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4517 THREAD_ABORTSAFE : THREAD_UNINT;
4518
4519 /*
4520 * All our DMA I/O operations in IOKit are currently done by
4521 * wiring through the map entries of the task requesting the I/O.
4522 * Because of this, we must always wait for kernel wirings
4523 * to go away on the entries before deleting them.
4524 *
4525 * Any caller who wants to actually remove a kernel wiring
4526 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4527 * properly remove one wiring instead of blasting through
4528 * them all.
4529 */
4530 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4531
4532 /*
4533 * Find the start of the region, and clip it
4534 */
4535 if (vm_map_lookup_entry(map, start, &first_entry)) {
4536 entry = first_entry;
4537 if (start == entry->vme_start) {
4538 /*
4539 * No need to clip. We don't want to cause
4540 * any unnecessary unnesting in this case...
4541 */
4542 } else {
4543 vm_map_clip_start(map, entry, start);
4544 }
4545
4546 /*
4547 * Fix the lookup hint now, rather than each
4548 * time through the loop.
4549 */
4550 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4551 } else {
4552 entry = first_entry->vme_next;
4553 }
4554
4555 need_wakeup = FALSE;
4556 /*
4557 * Step through all entries in this region
4558 */
4559 s = entry->vme_start;
4560 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4561 /*
4562 * At this point, we have deleted all the memory entries
4563 * between "start" and "s". We still need to delete
4564 * all memory entries between "s" and "end".
4565 * While we were blocked and the map was unlocked, some
4566 * new memory entries could have been re-allocated between
4567 * "start" and "s" and we don't want to mess with those.
4568 * Some of those entries could even have been re-assembled
4569 * with an entry after "s" (in vm_map_simplify_entry()), so
4570 * we may have to vm_map_clip_start() again.
4571 */
4572
4573 if (entry->vme_start >= s) {
4574 /*
4575 * This entry starts on or after "s"
4576 * so no need to clip its start.
4577 */
4578 } else {
4579 /*
4580 * This entry has been re-assembled by a
4581 * vm_map_simplify_entry(). We need to
4582 * re-clip its start.
4583 */
4584 vm_map_clip_start(map, entry, s);
4585 }
4586 if (entry->vme_end <= end) {
4587 /*
4588 * This entry is going away completely, so no need
4589 * to clip and possibly cause an unnecessary unnesting.
4590 */
4591 } else {
4592 vm_map_clip_end(map, entry, end);
4593 }
4594 if (entry->in_transition) {
4595 wait_result_t wait_result;
4596
4597 /*
4598 * Another thread is wiring/unwiring this entry.
4599 * Let the other thread know we are waiting.
4600 */
4601 assert(s == entry->vme_start);
4602 entry->needs_wakeup = TRUE;
4603
4604 /*
4605 * wake up anybody waiting on entries that we have
4606 * already unwired/deleted.
4607 */
4608 if (need_wakeup) {
4609 vm_map_entry_wakeup(map);
4610 need_wakeup = FALSE;
4611 }
4612
4613 wait_result = vm_map_entry_wait(map, interruptible);
4614
4615 if (interruptible &&
4616 wait_result == THREAD_INTERRUPTED) {
4617 /*
4618 * We do not clear the needs_wakeup flag,
4619 * since we cannot tell if we were the only one.
4620 */
4621 vm_map_unlock(map);
4622 return KERN_ABORTED;
4623 }
4624
4625 /*
4626 * The entry could have been clipped or it
4627 * may not exist anymore. Look it up again.
4628 */
4629 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4630 assert((map != kernel_map) &&
4631 (!entry->is_sub_map));
4632 /*
4633 * User: use the next entry
4634 */
4635 entry = first_entry->vme_next;
4636 s = entry->vme_start;
4637 } else {
4638 entry = first_entry;
4639 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4640 }
4641 last_timestamp = map->timestamp;
4642 continue;
4643 } /* end in_transition */
4644
4645 if (entry->wired_count) {
4646 boolean_t user_wire;
4647
4648 user_wire = entry->user_wired_count > 0;
4649
4650 /*
4651 * Remove a kernel wiring if requested or if
4652 * there are user wirings.
4653 */
4654 if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
4655 (entry->user_wired_count > 0))
4656 entry->wired_count--;
4657
4658 /* remove all user wire references */
4659 entry->user_wired_count = 0;
4660
4661 if (entry->wired_count != 0) {
4662 assert(map != kernel_map);
4663 /*
4664 * Cannot continue. Typical case is when
4665 * a user thread has physical io pending on
4666 * on this page. Either wait for the
4667 * kernel wiring to go away or return an
4668 * error.
4669 */
4670 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4671 wait_result_t wait_result;
4672
4673 assert(s == entry->vme_start);
4674 entry->needs_wakeup = TRUE;
4675 wait_result = vm_map_entry_wait(map,
4676 interruptible);
4677
4678 if (interruptible &&
4679 wait_result == THREAD_INTERRUPTED) {
4680 /*
4681 * We do not clear the
4682 * needs_wakeup flag, since we
4683 * cannot tell if we were the
4684 * only one.
4685 */
4686 vm_map_unlock(map);
4687 return KERN_ABORTED;
4688 }
4689
4690 /*
4691 * The entry could have been clipped or
4692 * it may not exist anymore. Look it
4693 * up again.
4694 */
4695 if (!vm_map_lookup_entry(map, s,
4696 &first_entry)) {
4697 assert(map != kernel_map);
4698 /*
4699 * User: use the next entry
4700 */
4701 entry = first_entry->vme_next;
4702 s = entry->vme_start;
4703 } else {
4704 entry = first_entry;
4705 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4706 }
4707 last_timestamp = map->timestamp;
4708 continue;
4709 }
4710 else {
4711 return KERN_FAILURE;
4712 }
4713 }
4714
4715 entry->in_transition = TRUE;
4716 /*
4717 * copy current entry. see comment in vm_map_wire()
4718 */
4719 tmp_entry = *entry;
4720 assert(s == entry->vme_start);
4721
4722 /*
4723 * We can unlock the map now. The in_transition
4724 * state guarentees existance of the entry.
4725 */
4726 vm_map_unlock(map);
4727
4728 if (tmp_entry.is_sub_map) {
4729 vm_map_t sub_map;
4730 vm_map_offset_t sub_start, sub_end;
4731 pmap_t pmap;
4732 vm_map_offset_t pmap_addr;
4733
4734
4735 sub_map = tmp_entry.object.sub_map;
4736 sub_start = tmp_entry.offset;
4737 sub_end = sub_start + (tmp_entry.vme_end -
4738 tmp_entry.vme_start);
4739 if (tmp_entry.use_pmap) {
4740 pmap = sub_map->pmap;
4741 pmap_addr = tmp_entry.vme_start;
4742 } else {
4743 pmap = map->pmap;
4744 pmap_addr = tmp_entry.vme_start;
4745 }
4746 (void) vm_map_unwire_nested(sub_map,
4747 sub_start, sub_end,
4748 user_wire,
4749 pmap, pmap_addr);
4750 } else {
4751
4752 vm_fault_unwire(map, &tmp_entry,
4753 tmp_entry.object.vm_object == kernel_object,
4754 map->pmap, tmp_entry.vme_start);
4755 }
4756
4757 vm_map_lock(map);
4758
4759 if (last_timestamp+1 != map->timestamp) {
4760 /*
4761 * Find the entry again. It could have
4762 * been clipped after we unlocked the map.
4763 */
4764 if (!vm_map_lookup_entry(map, s, &first_entry)){
4765 assert((map != kernel_map) &&
4766 (!entry->is_sub_map));
4767 first_entry = first_entry->vme_next;
4768 s = first_entry->vme_start;
4769 } else {
4770 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4771 }
4772 } else {
4773 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4774 first_entry = entry;
4775 }
4776
4777 last_timestamp = map->timestamp;
4778
4779 entry = first_entry;
4780 while ((entry != vm_map_to_entry(map)) &&
4781 (entry->vme_start < tmp_entry.vme_end)) {
4782 assert(entry->in_transition);
4783 entry->in_transition = FALSE;
4784 if (entry->needs_wakeup) {
4785 entry->needs_wakeup = FALSE;
4786 need_wakeup = TRUE;
4787 }
4788 entry = entry->vme_next;
4789 }
4790 /*
4791 * We have unwired the entry(s). Go back and
4792 * delete them.
4793 */
4794 entry = first_entry;
4795 continue;
4796 }
4797
4798 /* entry is unwired */
4799 assert(entry->wired_count == 0);
4800 assert(entry->user_wired_count == 0);
4801
4802 assert(s == entry->vme_start);
4803
4804 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
4805 /*
4806 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
4807 * vm_map_delete(), some map entries might have been
4808 * transferred to a "zap_map", which doesn't have a
4809 * pmap. The original pmap has already been flushed
4810 * in the vm_map_delete() call targeting the original
4811 * map, but when we get to destroying the "zap_map",
4812 * we don't have any pmap to flush, so let's just skip
4813 * all this.
4814 */
4815 } else if (entry->is_sub_map) {
4816 if (entry->use_pmap) {
4817 #ifndef NO_NESTED_PMAP
4818 pmap_unnest(map->pmap,
4819 (addr64_t)entry->vme_start,
4820 entry->vme_end - entry->vme_start);
4821 #endif /* NO_NESTED_PMAP */
4822 if ((map->mapped) && (map->ref_count)) {
4823 /* clean up parent map/maps */
4824 vm_map_submap_pmap_clean(
4825 map, entry->vme_start,
4826 entry->vme_end,
4827 entry->object.sub_map,
4828 entry->offset);
4829 }
4830 } else {
4831 vm_map_submap_pmap_clean(
4832 map, entry->vme_start, entry->vme_end,
4833 entry->object.sub_map,
4834 entry->offset);
4835 }
4836 } else if (entry->object.vm_object != kernel_object) {
4837 object = entry->object.vm_object;
4838 if((map->mapped) && (map->ref_count)) {
4839 vm_object_pmap_protect(
4840 object, entry->offset,
4841 entry->vme_end - entry->vme_start,
4842 PMAP_NULL,
4843 entry->vme_start,
4844 VM_PROT_NONE);
4845 } else {
4846 pmap_remove(map->pmap,
4847 (addr64_t)entry->vme_start,
4848 (addr64_t)entry->vme_end);
4849 }
4850 }
4851
4852 /*
4853 * All pmap mappings for this map entry must have been
4854 * cleared by now.
4855 */
4856 assert(vm_map_pmap_is_empty(map,
4857 entry->vme_start,
4858 entry->vme_end));
4859
4860 next = entry->vme_next;
4861 s = next->vme_start;
4862 last_timestamp = map->timestamp;
4863
4864 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
4865 zap_map != VM_MAP_NULL) {
4866 vm_map_size_t entry_size;
4867 /*
4868 * The caller wants to save the affected VM map entries
4869 * into the "zap_map". The caller will take care of
4870 * these entries.
4871 */
4872 /* unlink the entry from "map" ... */
4873 vm_map_entry_unlink(map, entry);
4874 /* ... and add it to the end of the "zap_map" */
4875 vm_map_entry_link(zap_map,
4876 vm_map_last_entry(zap_map),
4877 entry);
4878 entry_size = entry->vme_end - entry->vme_start;
4879 map->size -= entry_size;
4880 zap_map->size += entry_size;
4881 /* we didn't unlock the map, so no timestamp increase */
4882 last_timestamp--;
4883 } else {
4884 vm_map_entry_delete(map, entry);
4885 /* vm_map_entry_delete unlocks the map */
4886 vm_map_lock(map);
4887 }
4888
4889 entry = next;
4890
4891 if(entry == vm_map_to_entry(map)) {
4892 break;
4893 }
4894 if (last_timestamp+1 != map->timestamp) {
4895 /*
4896 * we are responsible for deleting everything
4897 * from the give space, if someone has interfered
4898 * we pick up where we left off, back fills should
4899 * be all right for anyone except map_delete and
4900 * we have to assume that the task has been fully
4901 * disabled before we get here
4902 */
4903 if (!vm_map_lookup_entry(map, s, &entry)){
4904 entry = entry->vme_next;
4905 s = entry->vme_start;
4906 } else {
4907 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4908 }
4909 /*
4910 * others can not only allocate behind us, we can
4911 * also see coalesce while we don't have the map lock
4912 */
4913 if(entry == vm_map_to_entry(map)) {
4914 break;
4915 }
4916 }
4917 last_timestamp = map->timestamp;
4918 }
4919
4920 if (map->wait_for_space)
4921 thread_wakeup((event_t) map);
4922 /*
4923 * wake up anybody waiting on entries that we have already deleted.
4924 */
4925 if (need_wakeup)
4926 vm_map_entry_wakeup(map);
4927
4928 return KERN_SUCCESS;
4929 }
4930
4931 /*
4932 * vm_map_remove:
4933 *
4934 * Remove the given address range from the target map.
4935 * This is the exported form of vm_map_delete.
4936 */
4937 kern_return_t
4938 vm_map_remove(
4939 register vm_map_t map,
4940 register vm_map_offset_t start,
4941 register vm_map_offset_t end,
4942 register boolean_t flags)
4943 {
4944 register kern_return_t result;
4945
4946 vm_map_lock(map);
4947 VM_MAP_RANGE_CHECK(map, start, end);
4948 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4949 vm_map_unlock(map);
4950
4951 return(result);
4952 }
4953
4954
4955 /*
4956 * Routine: vm_map_copy_discard
4957 *
4958 * Description:
4959 * Dispose of a map copy object (returned by
4960 * vm_map_copyin).
4961 */
4962 void
4963 vm_map_copy_discard(
4964 vm_map_copy_t copy)
4965 {
4966 TR_DECL("vm_map_copy_discard");
4967
4968 /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4969
4970 if (copy == VM_MAP_COPY_NULL)
4971 return;
4972
4973 switch (copy->type) {
4974 case VM_MAP_COPY_ENTRY_LIST:
4975 while (vm_map_copy_first_entry(copy) !=
4976 vm_map_copy_to_entry(copy)) {
4977 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
4978
4979 vm_map_copy_entry_unlink(copy, entry);
4980 vm_object_deallocate(entry->object.vm_object);
4981 vm_map_copy_entry_dispose(copy, entry);
4982 }
4983 break;
4984 case VM_MAP_COPY_OBJECT:
4985 vm_object_deallocate(copy->cpy_object);
4986 break;
4987 case VM_MAP_COPY_KERNEL_BUFFER:
4988
4989 /*
4990 * The vm_map_copy_t and possibly the data buffer were
4991 * allocated by a single call to kalloc(), i.e. the
4992 * vm_map_copy_t was not allocated out of the zone.
4993 */
4994 kfree(copy, copy->cpy_kalloc_size);
4995 return;
4996 }
4997 zfree(vm_map_copy_zone, copy);
4998 }
4999
5000 /*
5001 * Routine: vm_map_copy_copy
5002 *
5003 * Description:
5004 * Move the information in a map copy object to
5005 * a new map copy object, leaving the old one
5006 * empty.
5007 *
5008 * This is used by kernel routines that need
5009 * to look at out-of-line data (in copyin form)
5010 * before deciding whether to return SUCCESS.
5011 * If the routine returns FAILURE, the original
5012 * copy object will be deallocated; therefore,
5013 * these routines must make a copy of the copy
5014 * object and leave the original empty so that
5015 * deallocation will not fail.
5016 */
5017 vm_map_copy_t
5018 vm_map_copy_copy(
5019 vm_map_copy_t copy)
5020 {
5021 vm_map_copy_t new_copy;
5022
5023 if (copy == VM_MAP_COPY_NULL)
5024 return VM_MAP_COPY_NULL;
5025
5026 /*
5027 * Allocate a new copy object, and copy the information
5028 * from the old one into it.
5029 */
5030
5031 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5032 *new_copy = *copy;
5033
5034 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5035 /*
5036 * The links in the entry chain must be
5037 * changed to point to the new copy object.
5038 */
5039 vm_map_copy_first_entry(copy)->vme_prev
5040 = vm_map_copy_to_entry(new_copy);
5041 vm_map_copy_last_entry(copy)->vme_next
5042 = vm_map_copy_to_entry(new_copy);
5043 }
5044
5045 /*
5046 * Change the old copy object into one that contains
5047 * nothing to be deallocated.
5048 */
5049 copy->type = VM_MAP_COPY_OBJECT;
5050 copy->cpy_object = VM_OBJECT_NULL;
5051
5052 /*
5053 * Return the new object.
5054 */
5055 return new_copy;
5056 }
5057
5058 static kern_return_t
5059 vm_map_overwrite_submap_recurse(
5060 vm_map_t dst_map,
5061 vm_map_offset_t dst_addr,
5062 vm_map_size_t dst_size)
5063 {
5064 vm_map_offset_t dst_end;
5065 vm_map_entry_t tmp_entry;
5066 vm_map_entry_t entry;
5067 kern_return_t result;
5068 boolean_t encountered_sub_map = FALSE;
5069
5070
5071
5072 /*
5073 * Verify that the destination is all writeable
5074 * initially. We have to trunc the destination
5075 * address and round the copy size or we'll end up
5076 * splitting entries in strange ways.
5077 */
5078
5079 dst_end = vm_map_round_page(dst_addr + dst_size);
5080 vm_map_lock(dst_map);
5081
5082 start_pass_1:
5083 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5084 vm_map_unlock(dst_map);
5085 return(KERN_INVALID_ADDRESS);
5086 }
5087
5088 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5089 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5090
5091 for (entry = tmp_entry;;) {
5092 vm_map_entry_t next;
5093
5094 next = entry->vme_next;
5095 while(entry->is_sub_map) {
5096 vm_map_offset_t sub_start;
5097 vm_map_offset_t sub_end;
5098 vm_map_offset_t local_end;
5099
5100 if (entry->in_transition) {
5101 /*
5102 * Say that we are waiting, and wait for entry.
5103 */
5104 entry->needs_wakeup = TRUE;
5105 vm_map_entry_wait(dst_map, THREAD_UNINT);
5106
5107 goto start_pass_1;
5108 }
5109
5110 encountered_sub_map = TRUE;
5111 sub_start = entry->offset;
5112
5113 if(entry->vme_end < dst_end)
5114 sub_end = entry->vme_end;
5115 else
5116 sub_end = dst_end;
5117 sub_end -= entry->vme_start;
5118 sub_end += entry->offset;
5119 local_end = entry->vme_end;
5120 vm_map_unlock(dst_map);
5121
5122 result = vm_map_overwrite_submap_recurse(
5123 entry->object.sub_map,
5124 sub_start,
5125 sub_end - sub_start);
5126
5127 if(result != KERN_SUCCESS)
5128 return result;
5129 if (dst_end <= entry->vme_end)
5130 return KERN_SUCCESS;
5131 vm_map_lock(dst_map);
5132 if(!vm_map_lookup_entry(dst_map, local_end,
5133 &tmp_entry)) {
5134 vm_map_unlock(dst_map);
5135 return(KERN_INVALID_ADDRESS);
5136 }
5137 entry = tmp_entry;
5138 next = entry->vme_next;
5139 }
5140
5141 if ( ! (entry->protection & VM_PROT_WRITE)) {
5142 vm_map_unlock(dst_map);
5143 return(KERN_PROTECTION_FAILURE);
5144 }
5145
5146 /*
5147 * If the entry is in transition, we must wait
5148 * for it to exit that state. Anything could happen
5149 * when we unlock the map, so start over.
5150 */
5151 if (entry->in_transition) {
5152
5153 /*
5154 * Say that we are waiting, and wait for entry.
5155 */
5156 entry->needs_wakeup = TRUE;
5157 vm_map_entry_wait(dst_map, THREAD_UNINT);
5158
5159 goto start_pass_1;
5160 }
5161
5162 /*
5163 * our range is contained completely within this map entry
5164 */
5165 if (dst_end <= entry->vme_end) {
5166 vm_map_unlock(dst_map);
5167 return KERN_SUCCESS;
5168 }
5169 /*
5170 * check that range specified is contiguous region
5171 */
5172 if ((next == vm_map_to_entry(dst_map)) ||
5173 (next->vme_start != entry->vme_end)) {
5174 vm_map_unlock(dst_map);
5175 return(KERN_INVALID_ADDRESS);
5176 }
5177
5178 /*
5179 * Check for permanent objects in the destination.
5180 */
5181 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5182 ((!entry->object.vm_object->internal) ||
5183 (entry->object.vm_object->true_share))) {
5184 if(encountered_sub_map) {
5185 vm_map_unlock(dst_map);
5186 return(KERN_FAILURE);
5187 }
5188 }
5189
5190
5191 entry = next;
5192 }/* for */
5193 vm_map_unlock(dst_map);
5194 return(KERN_SUCCESS);
5195 }
5196
5197 /*
5198 * Routine: vm_map_copy_overwrite
5199 *
5200 * Description:
5201 * Copy the memory described by the map copy
5202 * object (copy; returned by vm_map_copyin) onto
5203 * the specified destination region (dst_map, dst_addr).
5204 * The destination must be writeable.
5205 *
5206 * Unlike vm_map_copyout, this routine actually
5207 * writes over previously-mapped memory. If the
5208 * previous mapping was to a permanent (user-supplied)
5209 * memory object, it is preserved.
5210 *
5211 * The attributes (protection and inheritance) of the
5212 * destination region are preserved.
5213 *
5214 * If successful, consumes the copy object.
5215 * Otherwise, the caller is responsible for it.
5216 *
5217 * Implementation notes:
5218 * To overwrite aligned temporary virtual memory, it is
5219 * sufficient to remove the previous mapping and insert
5220 * the new copy. This replacement is done either on
5221 * the whole region (if no permanent virtual memory
5222 * objects are embedded in the destination region) or
5223 * in individual map entries.
5224 *
5225 * To overwrite permanent virtual memory , it is necessary
5226 * to copy each page, as the external memory management
5227 * interface currently does not provide any optimizations.
5228 *
5229 * Unaligned memory also has to be copied. It is possible
5230 * to use 'vm_trickery' to copy the aligned data. This is
5231 * not done but not hard to implement.
5232 *
5233 * Once a page of permanent memory has been overwritten,
5234 * it is impossible to interrupt this function; otherwise,
5235 * the call would be neither atomic nor location-independent.
5236 * The kernel-state portion of a user thread must be
5237 * interruptible.
5238 *
5239 * It may be expensive to forward all requests that might
5240 * overwrite permanent memory (vm_write, vm_copy) to
5241 * uninterruptible kernel threads. This routine may be
5242 * called by interruptible threads; however, success is
5243 * not guaranteed -- if the request cannot be performed
5244 * atomically and interruptibly, an error indication is
5245 * returned.
5246 */
5247
5248 static kern_return_t
5249 vm_map_copy_overwrite_nested(
5250 vm_map_t dst_map,
5251 vm_map_address_t dst_addr,
5252 vm_map_copy_t copy,
5253 boolean_t interruptible,
5254 pmap_t pmap)
5255 {
5256 vm_map_offset_t dst_end;
5257 vm_map_entry_t tmp_entry;
5258 vm_map_entry_t entry;
5259 kern_return_t kr;
5260 boolean_t aligned = TRUE;
5261 boolean_t contains_permanent_objects = FALSE;
5262 boolean_t encountered_sub_map = FALSE;
5263 vm_map_offset_t base_addr;
5264 vm_map_size_t copy_size;
5265 vm_map_size_t total_size;
5266
5267
5268 /*
5269 * Check for null copy object.
5270 */
5271
5272 if (copy == VM_MAP_COPY_NULL)
5273 return(KERN_SUCCESS);
5274
5275 /*
5276 * Check for special kernel buffer allocated
5277 * by new_ipc_kmsg_copyin.
5278 */
5279
5280 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5281 return(vm_map_copyout_kernel_buffer(
5282 dst_map, &dst_addr,
5283 copy, TRUE));
5284 }
5285
5286 /*
5287 * Only works for entry lists at the moment. Will
5288 * support page lists later.
5289 */
5290
5291 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5292
5293 if (copy->size == 0) {
5294 vm_map_copy_discard(copy);
5295 return(KERN_SUCCESS);
5296 }
5297
5298 /*
5299 * Verify that the destination is all writeable
5300 * initially. We have to trunc the destination
5301 * address and round the copy size or we'll end up
5302 * splitting entries in strange ways.
5303 */
5304
5305 if (!page_aligned(copy->size) ||
5306 !page_aligned (copy->offset) ||
5307 !page_aligned (dst_addr))
5308 {
5309 aligned = FALSE;
5310 dst_end = vm_map_round_page(dst_addr + copy->size);
5311 } else {
5312 dst_end = dst_addr + copy->size;
5313 }
5314
5315 vm_map_lock(dst_map);
5316
5317 /* LP64todo - remove this check when vm_map_commpage64()
5318 * no longer has to stuff in a map_entry for the commpage
5319 * above the map's max_offset.
5320 */
5321 if (dst_addr >= dst_map->max_offset) {
5322 vm_map_unlock(dst_map);
5323 return(KERN_INVALID_ADDRESS);
5324 }
5325
5326 start_pass_1:
5327 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5328 vm_map_unlock(dst_map);
5329 return(KERN_INVALID_ADDRESS);
5330 }
5331 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5332 for (entry = tmp_entry;;) {
5333 vm_map_entry_t next = entry->vme_next;
5334
5335 while(entry->is_sub_map) {
5336 vm_map_offset_t sub_start;
5337 vm_map_offset_t sub_end;
5338 vm_map_offset_t local_end;
5339
5340 if (entry->in_transition) {
5341
5342 /*
5343 * Say that we are waiting, and wait for entry.
5344 */
5345 entry->needs_wakeup = TRUE;
5346 vm_map_entry_wait(dst_map, THREAD_UNINT);
5347
5348 goto start_pass_1;
5349 }
5350
5351 local_end = entry->vme_end;
5352 if (!(entry->needs_copy)) {
5353 /* if needs_copy we are a COW submap */
5354 /* in such a case we just replace so */
5355 /* there is no need for the follow- */
5356 /* ing check. */
5357 encountered_sub_map = TRUE;
5358 sub_start = entry->offset;
5359
5360 if(entry->vme_end < dst_end)
5361 sub_end = entry->vme_end;
5362 else
5363 sub_end = dst_end;
5364 sub_end -= entry->vme_start;
5365 sub_end += entry->offset;
5366 vm_map_unlock(dst_map);
5367
5368 kr = vm_map_overwrite_submap_recurse(
5369 entry->object.sub_map,
5370 sub_start,
5371 sub_end - sub_start);
5372 if(kr != KERN_SUCCESS)
5373 return kr;
5374 vm_map_lock(dst_map);
5375 }
5376
5377 if (dst_end <= entry->vme_end)
5378 goto start_overwrite;
5379 if(!vm_map_lookup_entry(dst_map, local_end,
5380 &entry)) {
5381 vm_map_unlock(dst_map);
5382 return(KERN_INVALID_ADDRESS);
5383 }
5384 next = entry->vme_next;
5385 }
5386
5387 if ( ! (entry->protection & VM_PROT_WRITE)) {
5388 vm_map_unlock(dst_map);
5389 return(KERN_PROTECTION_FAILURE);
5390 }
5391
5392 /*
5393 * If the entry is in transition, we must wait
5394 * for it to exit that state. Anything could happen
5395 * when we unlock the map, so start over.
5396 */
5397 if (entry->in_transition) {
5398
5399 /*
5400 * Say that we are waiting, and wait for entry.
5401 */
5402 entry->needs_wakeup = TRUE;
5403 vm_map_entry_wait(dst_map, THREAD_UNINT);
5404
5405 goto start_pass_1;
5406 }
5407
5408 /*
5409 * our range is contained completely within this map entry
5410 */
5411 if (dst_end <= entry->vme_end)
5412 break;
5413 /*
5414 * check that range specified is contiguous region
5415 */
5416 if ((next == vm_map_to_entry(dst_map)) ||
5417 (next->vme_start != entry->vme_end)) {
5418 vm_map_unlock(dst_map);
5419 return(KERN_INVALID_ADDRESS);
5420 }
5421
5422
5423 /*
5424 * Check for permanent objects in the destination.
5425 */
5426 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5427 ((!entry->object.vm_object->internal) ||
5428 (entry->object.vm_object->true_share))) {
5429 contains_permanent_objects = TRUE;
5430 }
5431
5432 entry = next;
5433 }/* for */
5434
5435 start_overwrite:
5436 /*
5437 * If there are permanent objects in the destination, then
5438 * the copy cannot be interrupted.
5439 */
5440
5441 if (interruptible && contains_permanent_objects) {
5442 vm_map_unlock(dst_map);
5443 return(KERN_FAILURE); /* XXX */
5444 }
5445
5446 /*
5447 *
5448 * Make a second pass, overwriting the data
5449 * At the beginning of each loop iteration,
5450 * the next entry to be overwritten is "tmp_entry"
5451 * (initially, the value returned from the lookup above),
5452 * and the starting address expected in that entry
5453 * is "start".
5454 */
5455
5456 total_size = copy->size;
5457 if(encountered_sub_map) {
5458 copy_size = 0;
5459 /* re-calculate tmp_entry since we've had the map */
5460 /* unlocked */
5461 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5462 vm_map_unlock(dst_map);
5463 return(KERN_INVALID_ADDRESS);
5464 }
5465 } else {
5466 copy_size = copy->size;
5467 }
5468
5469 base_addr = dst_addr;
5470 while(TRUE) {
5471 /* deconstruct the copy object and do in parts */
5472 /* only in sub_map, interruptable case */
5473 vm_map_entry_t copy_entry;
5474 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5475 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5476 int nentries;
5477 int remaining_entries = 0;
5478 int new_offset = 0;
5479
5480 for (entry = tmp_entry; copy_size == 0;) {
5481 vm_map_entry_t next;
5482
5483 next = entry->vme_next;
5484
5485 /* tmp_entry and base address are moved along */
5486 /* each time we encounter a sub-map. Otherwise */
5487 /* entry can outpase tmp_entry, and the copy_size */
5488 /* may reflect the distance between them */
5489 /* if the current entry is found to be in transition */
5490 /* we will start over at the beginning or the last */
5491 /* encounter of a submap as dictated by base_addr */
5492 /* we will zero copy_size accordingly. */
5493 if (entry->in_transition) {
5494 /*
5495 * Say that we are waiting, and wait for entry.
5496 */
5497 entry->needs_wakeup = TRUE;
5498 vm_map_entry_wait(dst_map, THREAD_UNINT);
5499
5500 if(!vm_map_lookup_entry(dst_map, base_addr,
5501 &tmp_entry)) {
5502 vm_map_unlock(dst_map);
5503 return(KERN_INVALID_ADDRESS);
5504 }
5505 copy_size = 0;
5506 entry = tmp_entry;
5507 continue;
5508 }
5509 if(entry->is_sub_map) {
5510 vm_map_offset_t sub_start;
5511 vm_map_offset_t sub_end;
5512 vm_map_offset_t local_end;
5513
5514 if (entry->needs_copy) {
5515 /* if this is a COW submap */
5516 /* just back the range with a */
5517 /* anonymous entry */
5518 if(entry->vme_end < dst_end)
5519 sub_end = entry->vme_end;
5520 else
5521 sub_end = dst_end;
5522 if(entry->vme_start < base_addr)
5523 sub_start = base_addr;
5524 else
5525 sub_start = entry->vme_start;
5526 vm_map_clip_end(
5527 dst_map, entry, sub_end);
5528 vm_map_clip_start(
5529 dst_map, entry, sub_start);
5530 assert(!entry->use_pmap);
5531 entry->is_sub_map = FALSE;
5532 vm_map_deallocate(
5533 entry->object.sub_map);
5534 entry->object.sub_map = NULL;
5535 entry->is_shared = FALSE;
5536 entry->needs_copy = FALSE;
5537 entry->offset = 0;
5538 /*
5539 * XXX FBDP
5540 * We should propagate the protections
5541 * of the submap entry here instead
5542 * of forcing them to VM_PROT_ALL...
5543 * Or better yet, we should inherit
5544 * the protection of the copy_entry.
5545 */
5546 entry->protection = VM_PROT_ALL;
5547 entry->max_protection = VM_PROT_ALL;
5548 entry->wired_count = 0;
5549 entry->user_wired_count = 0;
5550 if(entry->inheritance
5551 == VM_INHERIT_SHARE)
5552 entry->inheritance = VM_INHERIT_COPY;
5553 continue;
5554 }
5555 /* first take care of any non-sub_map */
5556 /* entries to send */
5557 if(base_addr < entry->vme_start) {
5558 /* stuff to send */
5559 copy_size =
5560 entry->vme_start - base_addr;
5561 break;
5562 }
5563 sub_start = entry->offset;
5564
5565 if(entry->vme_end < dst_end)
5566 sub_end = entry->vme_end;
5567 else
5568 sub_end = dst_end;
5569 sub_end -= entry->vme_start;
5570 sub_end += entry->offset;
5571 local_end = entry->vme_end;
5572 vm_map_unlock(dst_map);
5573 copy_size = sub_end - sub_start;
5574
5575 /* adjust the copy object */
5576 if (total_size > copy_size) {
5577 vm_map_size_t local_size = 0;
5578 vm_map_size_t entry_size;
5579
5580 nentries = 1;
5581 new_offset = copy->offset;
5582 copy_entry = vm_map_copy_first_entry(copy);
5583 while(copy_entry !=
5584 vm_map_copy_to_entry(copy)){
5585 entry_size = copy_entry->vme_end -
5586 copy_entry->vme_start;
5587 if((local_size < copy_size) &&
5588 ((local_size + entry_size)
5589 >= copy_size)) {
5590 vm_map_copy_clip_end(copy,
5591 copy_entry,
5592 copy_entry->vme_start +
5593 (copy_size - local_size));
5594 entry_size = copy_entry->vme_end -
5595 copy_entry->vme_start;
5596 local_size += entry_size;
5597 new_offset += entry_size;
5598 }
5599 if(local_size >= copy_size) {
5600 next_copy = copy_entry->vme_next;
5601 copy_entry->vme_next =
5602 vm_map_copy_to_entry(copy);
5603 previous_prev =
5604 copy->cpy_hdr.links.prev;
5605 copy->cpy_hdr.links.prev = copy_entry;
5606 copy->size = copy_size;
5607 remaining_entries =
5608 copy->cpy_hdr.nentries;
5609 remaining_entries -= nentries;
5610 copy->cpy_hdr.nentries = nentries;
5611 break;
5612 } else {
5613 local_size += entry_size;
5614 new_offset += entry_size;
5615 nentries++;
5616 }
5617 copy_entry = copy_entry->vme_next;
5618 }
5619 }
5620
5621 if((entry->use_pmap) && (pmap == NULL)) {
5622 kr = vm_map_copy_overwrite_nested(
5623 entry->object.sub_map,
5624 sub_start,
5625 copy,
5626 interruptible,
5627 entry->object.sub_map->pmap);
5628 } else if (pmap != NULL) {
5629 kr = vm_map_copy_overwrite_nested(
5630 entry->object.sub_map,
5631 sub_start,
5632 copy,
5633 interruptible, pmap);
5634 } else {
5635 kr = vm_map_copy_overwrite_nested(
5636 entry->object.sub_map,
5637 sub_start,
5638 copy,
5639 interruptible,
5640 dst_map->pmap);
5641 }
5642 if(kr != KERN_SUCCESS) {
5643 if(next_copy != NULL) {
5644 copy->cpy_hdr.nentries +=
5645 remaining_entries;
5646 copy->cpy_hdr.links.prev->vme_next =
5647 next_copy;
5648 copy->cpy_hdr.links.prev
5649 = previous_prev;
5650 copy->size = total_size;
5651 }
5652 return kr;
5653 }
5654 if (dst_end <= local_end) {
5655 return(KERN_SUCCESS);
5656 }
5657 /* otherwise copy no longer exists, it was */
5658 /* destroyed after successful copy_overwrite */
5659 copy = (vm_map_copy_t)
5660 zalloc(vm_map_copy_zone);
5661 vm_map_copy_first_entry(copy) =
5662 vm_map_copy_last_entry(copy) =
5663 vm_map_copy_to_entry(copy);
5664 copy->type = VM_MAP_COPY_ENTRY_LIST;
5665 copy->offset = new_offset;
5666
5667 total_size -= copy_size;
5668 copy_size = 0;
5669 /* put back remainder of copy in container */
5670 if(next_copy != NULL) {
5671 copy->cpy_hdr.nentries = remaining_entries;
5672 copy->cpy_hdr.links.next = next_copy;
5673 copy->cpy_hdr.links.prev = previous_prev;
5674 copy->size = total_size;
5675 next_copy->vme_prev =
5676 vm_map_copy_to_entry(copy);
5677 next_copy = NULL;
5678 }
5679 base_addr = local_end;
5680 vm_map_lock(dst_map);
5681 if(!vm_map_lookup_entry(dst_map,
5682 local_end, &tmp_entry)) {
5683 vm_map_unlock(dst_map);
5684 return(KERN_INVALID_ADDRESS);
5685 }
5686 entry = tmp_entry;
5687 continue;
5688 }
5689 if (dst_end <= entry->vme_end) {
5690 copy_size = dst_end - base_addr;
5691 break;
5692 }
5693
5694 if ((next == vm_map_to_entry(dst_map)) ||
5695 (next->vme_start != entry->vme_end)) {
5696 vm_map_unlock(dst_map);
5697 return(KERN_INVALID_ADDRESS);
5698 }
5699
5700 entry = next;
5701 }/* for */
5702
5703 next_copy = NULL;
5704 nentries = 1;
5705
5706 /* adjust the copy object */
5707 if (total_size > copy_size) {
5708 vm_map_size_t local_size = 0;
5709 vm_map_size_t entry_size;
5710
5711 new_offset = copy->offset;
5712 copy_entry = vm_map_copy_first_entry(copy);
5713 while(copy_entry != vm_map_copy_to_entry(copy)) {
5714 entry_size = copy_entry->vme_end -
5715 copy_entry->vme_start;
5716 if((local_size < copy_size) &&
5717 ((local_size + entry_size)
5718 >= copy_size)) {
5719 vm_map_copy_clip_end(copy, copy_entry,
5720 copy_entry->vme_start +
5721 (copy_size - local_size));
5722 entry_size = copy_entry->vme_end -
5723 copy_entry->vme_start;
5724 local_size += entry_size;
5725 new_offset += entry_size;
5726 }
5727 if(local_size >= copy_size) {
5728 next_copy = copy_entry->vme_next;
5729 copy_entry->vme_next =
5730 vm_map_copy_to_entry(copy);
5731 previous_prev =
5732 copy->cpy_hdr.links.prev;
5733 copy->cpy_hdr.links.prev = copy_entry;
5734 copy->size = copy_size;
5735 remaining_entries =
5736 copy->cpy_hdr.nentries;
5737 remaining_entries -= nentries;
5738 copy->cpy_hdr.nentries = nentries;
5739 break;
5740 } else {
5741 local_size += entry_size;
5742 new_offset += entry_size;
5743 nentries++;
5744 }
5745 copy_entry = copy_entry->vme_next;
5746 }
5747 }
5748
5749 if (aligned) {
5750 pmap_t local_pmap;
5751
5752 if(pmap)
5753 local_pmap = pmap;
5754 else
5755 local_pmap = dst_map->pmap;
5756
5757 if ((kr = vm_map_copy_overwrite_aligned(
5758 dst_map, tmp_entry, copy,
5759 base_addr, local_pmap)) != KERN_SUCCESS) {
5760 if(next_copy != NULL) {
5761 copy->cpy_hdr.nentries +=
5762 remaining_entries;
5763 copy->cpy_hdr.links.prev->vme_next =
5764 next_copy;
5765 copy->cpy_hdr.links.prev =
5766 previous_prev;
5767 copy->size += copy_size;
5768 }
5769 return kr;
5770 }
5771 vm_map_unlock(dst_map);
5772 } else {
5773 /*
5774 * Performance gain:
5775 *
5776 * if the copy and dst address are misaligned but the same
5777 * offset within the page we can copy_not_aligned the
5778 * misaligned parts and copy aligned the rest. If they are
5779 * aligned but len is unaligned we simply need to copy
5780 * the end bit unaligned. We'll need to split the misaligned
5781 * bits of the region in this case !
5782 */
5783 /* ALWAYS UNLOCKS THE dst_map MAP */
5784 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
5785 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5786 if(next_copy != NULL) {
5787 copy->cpy_hdr.nentries +=
5788 remaining_entries;
5789 copy->cpy_hdr.links.prev->vme_next =
5790 next_copy;
5791 copy->cpy_hdr.links.prev =
5792 previous_prev;
5793 copy->size += copy_size;
5794 }
5795 return kr;
5796 }
5797 }
5798 total_size -= copy_size;
5799 if(total_size == 0)
5800 break;
5801 base_addr += copy_size;
5802 copy_size = 0;
5803 copy->offset = new_offset;
5804 if(next_copy != NULL) {
5805 copy->cpy_hdr.nentries = remaining_entries;
5806 copy->cpy_hdr.links.next = next_copy;
5807 copy->cpy_hdr.links.prev = previous_prev;
5808 next_copy->vme_prev = vm_map_copy_to_entry(copy);
5809 copy->size = total_size;
5810 }
5811 vm_map_lock(dst_map);
5812 while(TRUE) {
5813 if (!vm_map_lookup_entry(dst_map,
5814 base_addr, &tmp_entry)) {
5815 vm_map_unlock(dst_map);
5816 return(KERN_INVALID_ADDRESS);
5817 }
5818 if (tmp_entry->in_transition) {
5819 entry->needs_wakeup = TRUE;
5820 vm_map_entry_wait(dst_map, THREAD_UNINT);
5821 } else {
5822 break;
5823 }
5824 }
5825 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
5826
5827 entry = tmp_entry;
5828 } /* while */
5829
5830 /*
5831 * Throw away the vm_map_copy object
5832 */
5833 vm_map_copy_discard(copy);
5834
5835 return(KERN_SUCCESS);
5836 }/* vm_map_copy_overwrite */
5837
5838 kern_return_t
5839 vm_map_copy_overwrite(
5840 vm_map_t dst_map,
5841 vm_map_offset_t dst_addr,
5842 vm_map_copy_t copy,
5843 boolean_t interruptible)
5844 {
5845 return vm_map_copy_overwrite_nested(
5846 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
5847 }
5848
5849
5850 /*
5851 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
5852 *
5853 * Decription:
5854 * Physically copy unaligned data
5855 *
5856 * Implementation:
5857 * Unaligned parts of pages have to be physically copied. We use
5858 * a modified form of vm_fault_copy (which understands none-aligned
5859 * page offsets and sizes) to do the copy. We attempt to copy as
5860 * much memory in one go as possibly, however vm_fault_copy copies
5861 * within 1 memory object so we have to find the smaller of "amount left"
5862 * "source object data size" and "target object data size". With
5863 * unaligned data we don't need to split regions, therefore the source
5864 * (copy) object should be one map entry, the target range may be split
5865 * over multiple map entries however. In any event we are pessimistic
5866 * about these assumptions.
5867 *
5868 * Assumptions:
5869 * dst_map is locked on entry and is return locked on success,
5870 * unlocked on error.
5871 */
5872
5873 static kern_return_t
5874 vm_map_copy_overwrite_unaligned(
5875 vm_map_t dst_map,
5876 vm_map_entry_t entry,
5877 vm_map_copy_t copy,
5878 vm_map_offset_t start)
5879 {
5880 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
5881 vm_map_version_t version;
5882 vm_object_t dst_object;
5883 vm_object_offset_t dst_offset;
5884 vm_object_offset_t src_offset;
5885 vm_object_offset_t entry_offset;
5886 vm_map_offset_t entry_end;
5887 vm_map_size_t src_size,
5888 dst_size,
5889 copy_size,
5890 amount_left;
5891 kern_return_t kr = KERN_SUCCESS;
5892
5893 vm_map_lock_write_to_read(dst_map);
5894
5895 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
5896 amount_left = copy->size;
5897 /*
5898 * unaligned so we never clipped this entry, we need the offset into
5899 * the vm_object not just the data.
5900 */
5901 while (amount_left > 0) {
5902
5903 if (entry == vm_map_to_entry(dst_map)) {
5904 vm_map_unlock_read(dst_map);
5905 return KERN_INVALID_ADDRESS;
5906 }
5907
5908 /* "start" must be within the current map entry */
5909 assert ((start>=entry->vme_start) && (start<entry->vme_end));
5910
5911 dst_offset = start - entry->vme_start;
5912
5913 dst_size = entry->vme_end - start;
5914
5915 src_size = copy_entry->vme_end -
5916 (copy_entry->vme_start + src_offset);
5917
5918 if (dst_size < src_size) {
5919 /*
5920 * we can only copy dst_size bytes before
5921 * we have to get the next destination entry
5922 */
5923 copy_size = dst_size;
5924 } else {
5925 /*
5926 * we can only copy src_size bytes before
5927 * we have to get the next source copy entry
5928 */
5929 copy_size = src_size;
5930 }
5931
5932 if (copy_size > amount_left) {
5933 copy_size = amount_left;
5934 }
5935 /*
5936 * Entry needs copy, create a shadow shadow object for
5937 * Copy on write region.
5938 */
5939 if (entry->needs_copy &&
5940 ((entry->protection & VM_PROT_WRITE) != 0))
5941 {
5942 if (vm_map_lock_read_to_write(dst_map)) {
5943 vm_map_lock_read(dst_map);
5944 goto RetryLookup;
5945 }
5946 vm_object_shadow(&entry->object.vm_object,
5947 &entry->offset,
5948 (vm_map_size_t)(entry->vme_end
5949 - entry->vme_start));
5950 entry->needs_copy = FALSE;
5951 vm_map_lock_write_to_read(dst_map);
5952 }
5953 dst_object = entry->object.vm_object;
5954 /*
5955 * unlike with the virtual (aligned) copy we're going
5956 * to fault on it therefore we need a target object.
5957 */
5958 if (dst_object == VM_OBJECT_NULL) {
5959 if (vm_map_lock_read_to_write(dst_map)) {
5960 vm_map_lock_read(dst_map);
5961 goto RetryLookup;
5962 }
5963 dst_object = vm_object_allocate((vm_map_size_t)
5964 entry->vme_end - entry->vme_start);
5965 entry->object.vm_object = dst_object;
5966 entry->offset = 0;
5967 vm_map_lock_write_to_read(dst_map);
5968 }
5969 /*
5970 * Take an object reference and unlock map. The "entry" may
5971 * disappear or change when the map is unlocked.
5972 */
5973 vm_object_reference(dst_object);
5974 version.main_timestamp = dst_map->timestamp;
5975 entry_offset = entry->offset;
5976 entry_end = entry->vme_end;
5977 vm_map_unlock_read(dst_map);
5978 /*
5979 * Copy as much as possible in one pass
5980 */
5981 kr = vm_fault_copy(
5982 copy_entry->object.vm_object,
5983 copy_entry->offset + src_offset,
5984 &copy_size,
5985 dst_object,
5986 entry_offset + dst_offset,
5987 dst_map,
5988 &version,
5989 THREAD_UNINT );
5990
5991 start += copy_size;
5992 src_offset += copy_size;
5993 amount_left -= copy_size;
5994 /*
5995 * Release the object reference
5996 */
5997 vm_object_deallocate(dst_object);
5998 /*
5999 * If a hard error occurred, return it now
6000 */
6001 if (kr != KERN_SUCCESS)
6002 return kr;
6003
6004 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6005 || amount_left == 0)
6006 {
6007 /*
6008 * all done with this copy entry, dispose.
6009 */
6010 vm_map_copy_entry_unlink(copy, copy_entry);
6011 vm_object_deallocate(copy_entry->object.vm_object);
6012 vm_map_copy_entry_dispose(copy, copy_entry);
6013
6014 if ((copy_entry = vm_map_copy_first_entry(copy))
6015 == vm_map_copy_to_entry(copy) && amount_left) {
6016 /*
6017 * not finished copying but run out of source
6018 */
6019 return KERN_INVALID_ADDRESS;
6020 }
6021 src_offset = 0;
6022 }
6023
6024 if (amount_left == 0)
6025 return KERN_SUCCESS;
6026
6027 vm_map_lock_read(dst_map);
6028 if (version.main_timestamp == dst_map->timestamp) {
6029 if (start == entry_end) {
6030 /*
6031 * destination region is split. Use the version
6032 * information to avoid a lookup in the normal
6033 * case.
6034 */
6035 entry = entry->vme_next;
6036 /*
6037 * should be contiguous. Fail if we encounter
6038 * a hole in the destination.
6039 */
6040 if (start != entry->vme_start) {
6041 vm_map_unlock_read(dst_map);
6042 return KERN_INVALID_ADDRESS ;
6043 }
6044 }
6045 } else {
6046 /*
6047 * Map version check failed.
6048 * we must lookup the entry because somebody
6049 * might have changed the map behind our backs.
6050 */
6051 RetryLookup:
6052 if (!vm_map_lookup_entry(dst_map, start, &entry))
6053 {
6054 vm_map_unlock_read(dst_map);
6055 return KERN_INVALID_ADDRESS ;
6056 }
6057 }
6058 }/* while */
6059
6060 return KERN_SUCCESS;
6061 }/* vm_map_copy_overwrite_unaligned */
6062
6063 /*
6064 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6065 *
6066 * Description:
6067 * Does all the vm_trickery possible for whole pages.
6068 *
6069 * Implementation:
6070 *
6071 * If there are no permanent objects in the destination,
6072 * and the source and destination map entry zones match,
6073 * and the destination map entry is not shared,
6074 * then the map entries can be deleted and replaced
6075 * with those from the copy. The following code is the
6076 * basic idea of what to do, but there are lots of annoying
6077 * little details about getting protection and inheritance
6078 * right. Should add protection, inheritance, and sharing checks
6079 * to the above pass and make sure that no wiring is involved.
6080 */
6081
6082 static kern_return_t
6083 vm_map_copy_overwrite_aligned(
6084 vm_map_t dst_map,
6085 vm_map_entry_t tmp_entry,
6086 vm_map_copy_t copy,
6087 vm_map_offset_t start,
6088 __unused pmap_t pmap)
6089 {
6090 vm_object_t object;
6091 vm_map_entry_t copy_entry;
6092 vm_map_size_t copy_size;
6093 vm_map_size_t size;
6094 vm_map_entry_t entry;
6095
6096 while ((copy_entry = vm_map_copy_first_entry(copy))
6097 != vm_map_copy_to_entry(copy))
6098 {
6099 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6100
6101 entry = tmp_entry;
6102 assert(!entry->use_pmap); /* unnested when clipped earlier */
6103 if (entry == vm_map_to_entry(dst_map)) {
6104 vm_map_unlock(dst_map);
6105 return KERN_INVALID_ADDRESS;
6106 }
6107 size = (entry->vme_end - entry->vme_start);
6108 /*
6109 * Make sure that no holes popped up in the
6110 * address map, and that the protection is
6111 * still valid, in case the map was unlocked
6112 * earlier.
6113 */
6114
6115 if ((entry->vme_start != start) || ((entry->is_sub_map)
6116 && !entry->needs_copy)) {
6117 vm_map_unlock(dst_map);
6118 return(KERN_INVALID_ADDRESS);
6119 }
6120 assert(entry != vm_map_to_entry(dst_map));
6121
6122 /*
6123 * Check protection again
6124 */
6125
6126 if ( ! (entry->protection & VM_PROT_WRITE)) {
6127 vm_map_unlock(dst_map);
6128 return(KERN_PROTECTION_FAILURE);
6129 }
6130
6131 /*
6132 * Adjust to source size first
6133 */
6134
6135 if (copy_size < size) {
6136 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6137 size = copy_size;
6138 }
6139
6140 /*
6141 * Adjust to destination size
6142 */
6143
6144 if (size < copy_size) {
6145 vm_map_copy_clip_end(copy, copy_entry,
6146 copy_entry->vme_start + size);
6147 copy_size = size;
6148 }
6149
6150 assert((entry->vme_end - entry->vme_start) == size);
6151 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6152 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6153
6154 /*
6155 * If the destination contains temporary unshared memory,
6156 * we can perform the copy by throwing it away and
6157 * installing the source data.
6158 */
6159
6160 object = entry->object.vm_object;
6161 if ((!entry->is_shared &&
6162 ((object == VM_OBJECT_NULL) ||
6163 (object->internal && !object->true_share))) ||
6164 entry->needs_copy) {
6165 vm_object_t old_object = entry->object.vm_object;
6166 vm_object_offset_t old_offset = entry->offset;
6167 vm_object_offset_t offset;
6168
6169 /*
6170 * Ensure that the source and destination aren't
6171 * identical
6172 */
6173 if (old_object == copy_entry->object.vm_object &&
6174 old_offset == copy_entry->offset) {
6175 vm_map_copy_entry_unlink(copy, copy_entry);
6176 vm_map_copy_entry_dispose(copy, copy_entry);
6177
6178 if (old_object != VM_OBJECT_NULL)
6179 vm_object_deallocate(old_object);
6180
6181 start = tmp_entry->vme_end;
6182 tmp_entry = tmp_entry->vme_next;
6183 continue;
6184 }
6185
6186 if (old_object != VM_OBJECT_NULL) {
6187 if(entry->is_sub_map) {
6188 if(entry->use_pmap) {
6189 #ifndef NO_NESTED_PMAP
6190 pmap_unnest(dst_map->pmap,
6191 (addr64_t)entry->vme_start,
6192 entry->vme_end - entry->vme_start);
6193 #endif /* NO_NESTED_PMAP */
6194 if(dst_map->mapped) {
6195 /* clean up parent */
6196 /* map/maps */
6197 vm_map_submap_pmap_clean(
6198 dst_map, entry->vme_start,
6199 entry->vme_end,
6200 entry->object.sub_map,
6201 entry->offset);
6202 }
6203 } else {
6204 vm_map_submap_pmap_clean(
6205 dst_map, entry->vme_start,
6206 entry->vme_end,
6207 entry->object.sub_map,
6208 entry->offset);
6209 }
6210 vm_map_deallocate(
6211 entry->object.sub_map);
6212 } else {
6213 if(dst_map->mapped) {
6214 vm_object_pmap_protect(
6215 entry->object.vm_object,
6216 entry->offset,
6217 entry->vme_end
6218 - entry->vme_start,
6219 PMAP_NULL,
6220 entry->vme_start,
6221 VM_PROT_NONE);
6222 } else {
6223 pmap_remove(dst_map->pmap,
6224 (addr64_t)(entry->vme_start),
6225 (addr64_t)(entry->vme_end));
6226 }
6227 vm_object_deallocate(old_object);
6228 }
6229 }
6230
6231 entry->is_sub_map = FALSE;
6232 entry->object = copy_entry->object;
6233 object = entry->object.vm_object;
6234 entry->needs_copy = copy_entry->needs_copy;
6235 entry->wired_count = 0;
6236 entry->user_wired_count = 0;
6237 offset = entry->offset = copy_entry->offset;
6238
6239 vm_map_copy_entry_unlink(copy, copy_entry);
6240 vm_map_copy_entry_dispose(copy, copy_entry);
6241
6242 /*
6243 * we could try to push pages into the pmap at this point, BUT
6244 * this optimization only saved on average 2 us per page if ALL
6245 * the pages in the source were currently mapped
6246 * and ALL the pages in the dest were touched, if there were fewer
6247 * than 2/3 of the pages touched, this optimization actually cost more cycles
6248 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6249 */
6250
6251 /*
6252 * Set up for the next iteration. The map
6253 * has not been unlocked, so the next
6254 * address should be at the end of this
6255 * entry, and the next map entry should be
6256 * the one following it.
6257 */
6258
6259 start = tmp_entry->vme_end;
6260 tmp_entry = tmp_entry->vme_next;
6261 } else {
6262 vm_map_version_t version;
6263 vm_object_t dst_object = entry->object.vm_object;
6264 vm_object_offset_t dst_offset = entry->offset;
6265 kern_return_t r;
6266
6267 /*
6268 * Take an object reference, and record
6269 * the map version information so that the
6270 * map can be safely unlocked.
6271 */
6272
6273 vm_object_reference(dst_object);
6274
6275 /* account for unlock bumping up timestamp */
6276 version.main_timestamp = dst_map->timestamp + 1;
6277
6278 vm_map_unlock(dst_map);
6279
6280 /*
6281 * Copy as much as possible in one pass
6282 */
6283
6284 copy_size = size;
6285 r = vm_fault_copy(
6286 copy_entry->object.vm_object,
6287 copy_entry->offset,
6288 &copy_size,
6289 dst_object,
6290 dst_offset,
6291 dst_map,
6292 &version,
6293 THREAD_UNINT );
6294
6295 /*
6296 * Release the object reference
6297 */
6298
6299 vm_object_deallocate(dst_object);
6300
6301 /*
6302 * If a hard error occurred, return it now
6303 */
6304
6305 if (r != KERN_SUCCESS)
6306 return(r);
6307
6308 if (copy_size != 0) {
6309 /*
6310 * Dispose of the copied region
6311 */
6312
6313 vm_map_copy_clip_end(copy, copy_entry,
6314 copy_entry->vme_start + copy_size);
6315 vm_map_copy_entry_unlink(copy, copy_entry);
6316 vm_object_deallocate(copy_entry->object.vm_object);
6317 vm_map_copy_entry_dispose(copy, copy_entry);
6318 }
6319
6320 /*
6321 * Pick up in the destination map where we left off.
6322 *
6323 * Use the version information to avoid a lookup
6324 * in the normal case.
6325 */
6326
6327 start += copy_size;
6328 vm_map_lock(dst_map);
6329 if (version.main_timestamp == dst_map->timestamp) {
6330 /* We can safely use saved tmp_entry value */
6331
6332 vm_map_clip_end(dst_map, tmp_entry, start);
6333 tmp_entry = tmp_entry->vme_next;
6334 } else {
6335 /* Must do lookup of tmp_entry */
6336
6337 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6338 vm_map_unlock(dst_map);
6339 return(KERN_INVALID_ADDRESS);
6340 }
6341 vm_map_clip_start(dst_map, tmp_entry, start);
6342 }
6343 }
6344 }/* while */
6345
6346 return(KERN_SUCCESS);
6347 }/* vm_map_copy_overwrite_aligned */
6348
6349 /*
6350 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6351 *
6352 * Description:
6353 * Copy in data to a kernel buffer from space in the
6354 * source map. The original space may be optionally
6355 * deallocated.
6356 *
6357 * If successful, returns a new copy object.
6358 */
6359 static kern_return_t
6360 vm_map_copyin_kernel_buffer(
6361 vm_map_t src_map,
6362 vm_map_offset_t src_addr,
6363 vm_map_size_t len,
6364 boolean_t src_destroy,
6365 vm_map_copy_t *copy_result)
6366 {
6367 kern_return_t kr;
6368 vm_map_copy_t copy;
6369 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
6370
6371 copy = (vm_map_copy_t) kalloc(kalloc_size);
6372 if (copy == VM_MAP_COPY_NULL) {
6373 return KERN_RESOURCE_SHORTAGE;
6374 }
6375 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6376 copy->size = len;
6377 copy->offset = 0;
6378 copy->cpy_kdata = (void *) (copy + 1);
6379 copy->cpy_kalloc_size = kalloc_size;
6380
6381 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
6382 if (kr != KERN_SUCCESS) {
6383 kfree(copy, kalloc_size);
6384 return kr;
6385 }
6386 if (src_destroy) {
6387 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6388 vm_map_round_page(src_addr + len),
6389 VM_MAP_REMOVE_INTERRUPTIBLE |
6390 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6391 (src_map == kernel_map) ?
6392 VM_MAP_REMOVE_KUNWIRE : 0);
6393 }
6394 *copy_result = copy;
6395 return KERN_SUCCESS;
6396 }
6397
6398 /*
6399 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6400 *
6401 * Description:
6402 * Copy out data from a kernel buffer into space in the
6403 * destination map. The space may be otpionally dynamically
6404 * allocated.
6405 *
6406 * If successful, consumes the copy object.
6407 * Otherwise, the caller is responsible for it.
6408 */
6409 static int vm_map_copyout_kernel_buffer_failures = 0;
6410 static kern_return_t
6411 vm_map_copyout_kernel_buffer(
6412 vm_map_t map,
6413 vm_map_address_t *addr, /* IN/OUT */
6414 vm_map_copy_t copy,
6415 boolean_t overwrite)
6416 {
6417 kern_return_t kr = KERN_SUCCESS;
6418 thread_t thread = current_thread();
6419
6420 if (!overwrite) {
6421
6422 /*
6423 * Allocate space in the target map for the data
6424 */
6425 *addr = 0;
6426 kr = vm_map_enter(map,
6427 addr,
6428 vm_map_round_page(copy->size),
6429 (vm_map_offset_t) 0,
6430 VM_FLAGS_ANYWHERE,
6431 VM_OBJECT_NULL,
6432 (vm_object_offset_t) 0,
6433 FALSE,
6434 VM_PROT_DEFAULT,
6435 VM_PROT_ALL,
6436 VM_INHERIT_DEFAULT);
6437 if (kr != KERN_SUCCESS)
6438 return kr;
6439 }
6440
6441 /*
6442 * Copyout the data from the kernel buffer to the target map.
6443 */
6444 if (thread->map == map) {
6445
6446 /*
6447 * If the target map is the current map, just do
6448 * the copy.
6449 */
6450 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6451 kr = KERN_INVALID_ADDRESS;
6452 }
6453 }
6454 else {
6455 vm_map_t oldmap;
6456
6457 /*
6458 * If the target map is another map, assume the
6459 * target's address space identity for the duration
6460 * of the copy.
6461 */
6462 vm_map_reference(map);
6463 oldmap = vm_map_switch(map);
6464
6465 if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6466 vm_map_copyout_kernel_buffer_failures++;
6467 kr = KERN_INVALID_ADDRESS;
6468 }
6469
6470 (void) vm_map_switch(oldmap);
6471 vm_map_deallocate(map);
6472 }
6473
6474 if (kr != KERN_SUCCESS) {
6475 /* the copy failed, clean up */
6476 if (!overwrite) {
6477 /*
6478 * Deallocate the space we allocated in the target map.
6479 */
6480 (void) vm_map_remove(map,
6481 vm_map_trunc_page(*addr),
6482 vm_map_round_page(*addr +
6483 vm_map_round_page(copy->size)),
6484 VM_MAP_NO_FLAGS);
6485 *addr = 0;
6486 }
6487 } else {
6488 /* copy was successful, dicard the copy structure */
6489 kfree(copy, copy->cpy_kalloc_size);
6490 }
6491
6492 return kr;
6493 }
6494
6495 /*
6496 * Macro: vm_map_copy_insert
6497 *
6498 * Description:
6499 * Link a copy chain ("copy") into a map at the
6500 * specified location (after "where").
6501 * Side effects:
6502 * The copy chain is destroyed.
6503 * Warning:
6504 * The arguments are evaluated multiple times.
6505 */
6506 #define vm_map_copy_insert(map, where, copy) \
6507 MACRO_BEGIN \
6508 vm_map_t VMCI_map; \
6509 vm_map_entry_t VMCI_where; \
6510 vm_map_copy_t VMCI_copy; \
6511 VMCI_map = (map); \
6512 VMCI_where = (where); \
6513 VMCI_copy = (copy); \
6514 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6515 ->vme_next = (VMCI_where->vme_next); \
6516 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6517 ->vme_prev = VMCI_where; \
6518 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6519 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6520 zfree(vm_map_copy_zone, VMCI_copy); \
6521 MACRO_END
6522
6523 /*
6524 * Routine: vm_map_copyout
6525 *
6526 * Description:
6527 * Copy out a copy chain ("copy") into newly-allocated
6528 * space in the destination map.
6529 *
6530 * If successful, consumes the copy object.
6531 * Otherwise, the caller is responsible for it.
6532 */
6533 kern_return_t
6534 vm_map_copyout(
6535 vm_map_t dst_map,
6536 vm_map_address_t *dst_addr, /* OUT */
6537 vm_map_copy_t copy)
6538 {
6539 vm_map_size_t size;
6540 vm_map_size_t adjustment;
6541 vm_map_offset_t start;
6542 vm_object_offset_t vm_copy_start;
6543 vm_map_entry_t last;
6544 register
6545 vm_map_entry_t entry;
6546
6547 /*
6548 * Check for null copy object.
6549 */
6550
6551 if (copy == VM_MAP_COPY_NULL) {
6552 *dst_addr = 0;
6553 return(KERN_SUCCESS);
6554 }
6555
6556 /*
6557 * Check for special copy object, created
6558 * by vm_map_copyin_object.
6559 */
6560
6561 if (copy->type == VM_MAP_COPY_OBJECT) {
6562 vm_object_t object = copy->cpy_object;
6563 kern_return_t kr;
6564 vm_object_offset_t offset;
6565
6566 offset = vm_object_trunc_page(copy->offset);
6567 size = vm_map_round_page(copy->size +
6568 (vm_map_size_t)(copy->offset - offset));
6569 *dst_addr = 0;
6570 kr = vm_map_enter(dst_map, dst_addr, size,
6571 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6572 object, offset, FALSE,
6573 VM_PROT_DEFAULT, VM_PROT_ALL,
6574 VM_INHERIT_DEFAULT);
6575 if (kr != KERN_SUCCESS)
6576 return(kr);
6577 /* Account for non-pagealigned copy object */
6578 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6579 zfree(vm_map_copy_zone, copy);
6580 return(KERN_SUCCESS);
6581 }
6582
6583 /*
6584 * Check for special kernel buffer allocated
6585 * by new_ipc_kmsg_copyin.
6586 */
6587
6588 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6589 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6590 copy, FALSE));
6591 }
6592
6593 /*
6594 * Find space for the data
6595 */
6596
6597 vm_copy_start = vm_object_trunc_page(copy->offset);
6598 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6599 - vm_copy_start;
6600
6601 StartAgain: ;
6602
6603 vm_map_lock(dst_map);
6604 assert(first_free_is_valid(dst_map));
6605 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6606 vm_map_min(dst_map) : last->vme_end;
6607
6608 while (TRUE) {
6609 vm_map_entry_t next = last->vme_next;
6610 vm_map_offset_t end = start + size;
6611
6612 if ((end > dst_map->max_offset) || (end < start)) {
6613 if (dst_map->wait_for_space) {
6614 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6615 assert_wait((event_t) dst_map,
6616 THREAD_INTERRUPTIBLE);
6617 vm_map_unlock(dst_map);
6618 thread_block(THREAD_CONTINUE_NULL);
6619 goto StartAgain;
6620 }
6621 }
6622 vm_map_unlock(dst_map);
6623 return(KERN_NO_SPACE);
6624 }
6625
6626 if ((next == vm_map_to_entry(dst_map)) ||
6627 (next->vme_start >= end))
6628 break;
6629
6630 last = next;
6631 start = last->vme_end;
6632 }
6633
6634 /*
6635 * Since we're going to just drop the map
6636 * entries from the copy into the destination
6637 * map, they must come from the same pool.
6638 */
6639
6640 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6641 /*
6642 * Mismatches occur when dealing with the default
6643 * pager.
6644 */
6645 zone_t old_zone;
6646 vm_map_entry_t next, new;
6647
6648 /*
6649 * Find the zone that the copies were allocated from
6650 */
6651 old_zone = (copy->cpy_hdr.entries_pageable)
6652 ? vm_map_entry_zone
6653 : vm_map_kentry_zone;
6654 entry = vm_map_copy_first_entry(copy);
6655
6656 /*
6657 * Reinitialize the copy so that vm_map_copy_entry_link
6658 * will work.
6659 */
6660 copy->cpy_hdr.nentries = 0;
6661 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6662 vm_map_copy_first_entry(copy) =
6663 vm_map_copy_last_entry(copy) =
6664 vm_map_copy_to_entry(copy);
6665
6666 /*
6667 * Copy each entry.
6668 */
6669 while (entry != vm_map_copy_to_entry(copy)) {
6670 new = vm_map_copy_entry_create(copy);
6671 vm_map_entry_copy_full(new, entry);
6672 new->use_pmap = FALSE; /* clr address space specifics */
6673 vm_map_copy_entry_link(copy,
6674 vm_map_copy_last_entry(copy),
6675 new);
6676 next = entry->vme_next;
6677 zfree(old_zone, entry);
6678 entry = next;
6679 }
6680 }
6681
6682 /*
6683 * Adjust the addresses in the copy chain, and
6684 * reset the region attributes.
6685 */
6686
6687 adjustment = start - vm_copy_start;
6688 for (entry = vm_map_copy_first_entry(copy);
6689 entry != vm_map_copy_to_entry(copy);
6690 entry = entry->vme_next) {
6691 entry->vme_start += adjustment;
6692 entry->vme_end += adjustment;
6693
6694 entry->inheritance = VM_INHERIT_DEFAULT;
6695 entry->protection = VM_PROT_DEFAULT;
6696 entry->max_protection = VM_PROT_ALL;
6697 entry->behavior = VM_BEHAVIOR_DEFAULT;
6698
6699 /*
6700 * If the entry is now wired,
6701 * map the pages into the destination map.
6702 */
6703 if (entry->wired_count != 0) {
6704 register vm_map_offset_t va;
6705 vm_object_offset_t offset;
6706 register vm_object_t object;
6707 vm_prot_t prot;
6708 int type_of_fault;
6709
6710 object = entry->object.vm_object;
6711 offset = entry->offset;
6712 va = entry->vme_start;
6713
6714 pmap_pageable(dst_map->pmap,
6715 entry->vme_start,
6716 entry->vme_end,
6717 TRUE);
6718
6719 while (va < entry->vme_end) {
6720 register vm_page_t m;
6721
6722 /*
6723 * Look up the page in the object.
6724 * Assert that the page will be found in the
6725 * top object:
6726 * either
6727 * the object was newly created by
6728 * vm_object_copy_slowly, and has
6729 * copies of all of the pages from
6730 * the source object
6731 * or
6732 * the object was moved from the old
6733 * map entry; because the old map
6734 * entry was wired, all of the pages
6735 * were in the top-level object.
6736 * (XXX not true if we wire pages for
6737 * reading)
6738 */
6739 vm_object_lock(object);
6740
6741 m = vm_page_lookup(object, offset);
6742 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
6743 m->absent)
6744 panic("vm_map_copyout: wiring %p", m);
6745
6746 /*
6747 * ENCRYPTED SWAP:
6748 * The page is assumed to be wired here, so it
6749 * shouldn't be encrypted. Otherwise, we
6750 * couldn't enter it in the page table, since
6751 * we don't want the user to see the encrypted
6752 * data.
6753 */
6754 ASSERT_PAGE_DECRYPTED(m);
6755
6756 prot = entry->protection;
6757
6758 if (override_nx(dst_map, entry->alias) && prot)
6759 prot |= VM_PROT_EXECUTE;
6760
6761 type_of_fault = DBG_CACHE_HIT_FAULT;
6762
6763 vm_fault_enter(m, dst_map->pmap, va, prot,
6764 m->wire_count != 0, FALSE, FALSE,
6765 &type_of_fault);
6766
6767 vm_object_unlock(object);
6768
6769 offset += PAGE_SIZE_64;
6770 va += PAGE_SIZE;
6771 }
6772 }
6773 }
6774
6775 /*
6776 * Correct the page alignment for the result
6777 */
6778
6779 *dst_addr = start + (copy->offset - vm_copy_start);
6780
6781 /*
6782 * Update the hints and the map size
6783 */
6784
6785 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6786
6787 dst_map->size += size;
6788
6789 /*
6790 * Link in the copy
6791 */
6792
6793 vm_map_copy_insert(dst_map, last, copy);
6794
6795 vm_map_unlock(dst_map);
6796
6797 /*
6798 * XXX If wiring_required, call vm_map_pageable
6799 */
6800
6801 return(KERN_SUCCESS);
6802 }
6803
6804 /*
6805 * Routine: vm_map_copyin
6806 *
6807 * Description:
6808 * see vm_map_copyin_common. Exported via Unsupported.exports.
6809 *
6810 */
6811
6812 #undef vm_map_copyin
6813
6814 kern_return_t
6815 vm_map_copyin(
6816 vm_map_t src_map,
6817 vm_map_address_t src_addr,
6818 vm_map_size_t len,
6819 boolean_t src_destroy,
6820 vm_map_copy_t *copy_result) /* OUT */
6821 {
6822 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
6823 FALSE, copy_result, FALSE));
6824 }
6825
6826 /*
6827 * Routine: vm_map_copyin_common
6828 *
6829 * Description:
6830 * Copy the specified region (src_addr, len) from the
6831 * source address space (src_map), possibly removing
6832 * the region from the source address space (src_destroy).
6833 *
6834 * Returns:
6835 * A vm_map_copy_t object (copy_result), suitable for
6836 * insertion into another address space (using vm_map_copyout),
6837 * copying over another address space region (using
6838 * vm_map_copy_overwrite). If the copy is unused, it
6839 * should be destroyed (using vm_map_copy_discard).
6840 *
6841 * In/out conditions:
6842 * The source map should not be locked on entry.
6843 */
6844
6845 typedef struct submap_map {
6846 vm_map_t parent_map;
6847 vm_map_offset_t base_start;
6848 vm_map_offset_t base_end;
6849 vm_map_size_t base_len;
6850 struct submap_map *next;
6851 } submap_map_t;
6852
6853 kern_return_t
6854 vm_map_copyin_common(
6855 vm_map_t src_map,
6856 vm_map_address_t src_addr,
6857 vm_map_size_t len,
6858 boolean_t src_destroy,
6859 __unused boolean_t src_volatile,
6860 vm_map_copy_t *copy_result, /* OUT */
6861 boolean_t use_maxprot)
6862 {
6863 vm_map_entry_t tmp_entry; /* Result of last map lookup --
6864 * in multi-level lookup, this
6865 * entry contains the actual
6866 * vm_object/offset.
6867 */
6868 register
6869 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
6870
6871 vm_map_offset_t src_start; /* Start of current entry --
6872 * where copy is taking place now
6873 */
6874 vm_map_offset_t src_end; /* End of entire region to be
6875 * copied */
6876 vm_map_offset_t src_base;
6877 vm_map_t base_map = src_map;
6878 boolean_t map_share=FALSE;
6879 submap_map_t *parent_maps = NULL;
6880
6881 register
6882 vm_map_copy_t copy; /* Resulting copy */
6883 vm_map_address_t copy_addr;
6884
6885 /*
6886 * Check for copies of zero bytes.
6887 */
6888
6889 if (len == 0) {
6890 *copy_result = VM_MAP_COPY_NULL;
6891 return(KERN_SUCCESS);
6892 }
6893
6894 /*
6895 * Check that the end address doesn't overflow
6896 */
6897 src_end = src_addr + len;
6898 if (src_end < src_addr)
6899 return KERN_INVALID_ADDRESS;
6900
6901 /*
6902 * If the copy is sufficiently small, use a kernel buffer instead
6903 * of making a virtual copy. The theory being that the cost of
6904 * setting up VM (and taking C-O-W faults) dominates the copy costs
6905 * for small regions.
6906 */
6907 if ((len < msg_ool_size_small) && !use_maxprot)
6908 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6909 src_destroy, copy_result);
6910
6911 /*
6912 * Compute (page aligned) start and end of region
6913 */
6914 src_start = vm_map_trunc_page(src_addr);
6915 src_end = vm_map_round_page(src_end);
6916
6917 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6918
6919 /*
6920 * Allocate a header element for the list.
6921 *
6922 * Use the start and end in the header to
6923 * remember the endpoints prior to rounding.
6924 */
6925
6926 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6927 vm_map_copy_first_entry(copy) =
6928 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6929 copy->type = VM_MAP_COPY_ENTRY_LIST;
6930 copy->cpy_hdr.nentries = 0;
6931 copy->cpy_hdr.entries_pageable = TRUE;
6932
6933 copy->offset = src_addr;
6934 copy->size = len;
6935
6936 new_entry = vm_map_copy_entry_create(copy);
6937
6938 #define RETURN(x) \
6939 MACRO_BEGIN \
6940 vm_map_unlock(src_map); \
6941 if(src_map != base_map) \
6942 vm_map_deallocate(src_map); \
6943 if (new_entry != VM_MAP_ENTRY_NULL) \
6944 vm_map_copy_entry_dispose(copy,new_entry); \
6945 vm_map_copy_discard(copy); \
6946 { \
6947 submap_map_t *_ptr; \
6948 \
6949 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6950 parent_maps=parent_maps->next; \
6951 if (_ptr->parent_map != base_map) \
6952 vm_map_deallocate(_ptr->parent_map); \
6953 kfree(_ptr, sizeof(submap_map_t)); \
6954 } \
6955 } \
6956 MACRO_RETURN(x); \
6957 MACRO_END
6958
6959 /*
6960 * Find the beginning of the region.
6961 */
6962
6963 vm_map_lock(src_map);
6964
6965 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6966 RETURN(KERN_INVALID_ADDRESS);
6967 if(!tmp_entry->is_sub_map) {
6968 vm_map_clip_start(src_map, tmp_entry, src_start);
6969 }
6970 /* set for later submap fix-up */
6971 copy_addr = src_start;
6972
6973 /*
6974 * Go through entries until we get to the end.
6975 */
6976
6977 while (TRUE) {
6978 register
6979 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
6980 vm_map_size_t src_size; /* Size of source
6981 * map entry (in both
6982 * maps)
6983 */
6984
6985 register
6986 vm_object_t src_object; /* Object to copy */
6987 vm_object_offset_t src_offset;
6988
6989 boolean_t src_needs_copy; /* Should source map
6990 * be made read-only
6991 * for copy-on-write?
6992 */
6993
6994 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
6995
6996 boolean_t was_wired; /* Was source wired? */
6997 vm_map_version_t version; /* Version before locks
6998 * dropped to make copy
6999 */
7000 kern_return_t result; /* Return value from
7001 * copy_strategically.
7002 */
7003 while(tmp_entry->is_sub_map) {
7004 vm_map_size_t submap_len;
7005 submap_map_t *ptr;
7006
7007 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7008 ptr->next = parent_maps;
7009 parent_maps = ptr;
7010 ptr->parent_map = src_map;
7011 ptr->base_start = src_start;
7012 ptr->base_end = src_end;
7013 submap_len = tmp_entry->vme_end - src_start;
7014 if(submap_len > (src_end-src_start))
7015 submap_len = src_end-src_start;
7016 ptr->base_len = submap_len;
7017
7018 src_start -= tmp_entry->vme_start;
7019 src_start += tmp_entry->offset;
7020 src_end = src_start + submap_len;
7021 src_map = tmp_entry->object.sub_map;
7022 vm_map_lock(src_map);
7023 /* keep an outstanding reference for all maps in */
7024 /* the parents tree except the base map */
7025 vm_map_reference(src_map);
7026 vm_map_unlock(ptr->parent_map);
7027 if (!vm_map_lookup_entry(
7028 src_map, src_start, &tmp_entry))
7029 RETURN(KERN_INVALID_ADDRESS);
7030 map_share = TRUE;
7031 if(!tmp_entry->is_sub_map)
7032 vm_map_clip_start(src_map, tmp_entry, src_start);
7033 src_entry = tmp_entry;
7034 }
7035 /* we are now in the lowest level submap... */
7036
7037 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7038 (tmp_entry->object.vm_object->phys_contiguous)) {
7039 /* This is not, supported for now.In future */
7040 /* we will need to detect the phys_contig */
7041 /* condition and then upgrade copy_slowly */
7042 /* to do physical copy from the device mem */
7043 /* based object. We can piggy-back off of */
7044 /* the was wired boolean to set-up the */
7045 /* proper handling */
7046 RETURN(KERN_PROTECTION_FAILURE);
7047 }
7048 /*
7049 * Create a new address map entry to hold the result.
7050 * Fill in the fields from the appropriate source entries.
7051 * We must unlock the source map to do this if we need
7052 * to allocate a map entry.
7053 */
7054 if (new_entry == VM_MAP_ENTRY_NULL) {
7055 version.main_timestamp = src_map->timestamp;
7056 vm_map_unlock(src_map);
7057
7058 new_entry = vm_map_copy_entry_create(copy);
7059
7060 vm_map_lock(src_map);
7061 if ((version.main_timestamp + 1) != src_map->timestamp) {
7062 if (!vm_map_lookup_entry(src_map, src_start,
7063 &tmp_entry)) {
7064 RETURN(KERN_INVALID_ADDRESS);
7065 }
7066 if (!tmp_entry->is_sub_map)
7067 vm_map_clip_start(src_map, tmp_entry, src_start);
7068 continue; /* restart w/ new tmp_entry */
7069 }
7070 }
7071
7072 /*
7073 * Verify that the region can be read.
7074 */
7075 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7076 !use_maxprot) ||
7077 (src_entry->max_protection & VM_PROT_READ) == 0)
7078 RETURN(KERN_PROTECTION_FAILURE);
7079
7080 /*
7081 * Clip against the endpoints of the entire region.
7082 */
7083
7084 vm_map_clip_end(src_map, src_entry, src_end);
7085
7086 src_size = src_entry->vme_end - src_start;
7087 src_object = src_entry->object.vm_object;
7088 src_offset = src_entry->offset;
7089 was_wired = (src_entry->wired_count != 0);
7090
7091 vm_map_entry_copy(new_entry, src_entry);
7092 new_entry->use_pmap = FALSE; /* clr address space specifics */
7093
7094 /*
7095 * Attempt non-blocking copy-on-write optimizations.
7096 */
7097
7098 if (src_destroy &&
7099 (src_object == VM_OBJECT_NULL ||
7100 (src_object->internal && !src_object->true_share
7101 && !map_share))) {
7102 /*
7103 * If we are destroying the source, and the object
7104 * is internal, we can move the object reference
7105 * from the source to the copy. The copy is
7106 * copy-on-write only if the source is.
7107 * We make another reference to the object, because
7108 * destroying the source entry will deallocate it.
7109 */
7110 vm_object_reference(src_object);
7111
7112 /*
7113 * Copy is always unwired. vm_map_copy_entry
7114 * set its wired count to zero.
7115 */
7116
7117 goto CopySuccessful;
7118 }
7119
7120
7121 RestartCopy:
7122 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7123 src_object, new_entry, new_entry->object.vm_object,
7124 was_wired, 0);
7125 if ((src_object == VM_OBJECT_NULL ||
7126 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7127 vm_object_copy_quickly(
7128 &new_entry->object.vm_object,
7129 src_offset,
7130 src_size,
7131 &src_needs_copy,
7132 &new_entry_needs_copy)) {
7133
7134 new_entry->needs_copy = new_entry_needs_copy;
7135
7136 /*
7137 * Handle copy-on-write obligations
7138 */
7139
7140 if (src_needs_copy && !tmp_entry->needs_copy) {
7141 vm_prot_t prot;
7142
7143 prot = src_entry->protection & ~VM_PROT_WRITE;
7144
7145 if (override_nx(src_map, src_entry->alias) && prot)
7146 prot |= VM_PROT_EXECUTE;
7147
7148 vm_object_pmap_protect(
7149 src_object,
7150 src_offset,
7151 src_size,
7152 (src_entry->is_shared ?
7153 PMAP_NULL
7154 : src_map->pmap),
7155 src_entry->vme_start,
7156 prot);
7157
7158 tmp_entry->needs_copy = TRUE;
7159 }
7160
7161 /*
7162 * The map has never been unlocked, so it's safe
7163 * to move to the next entry rather than doing
7164 * another lookup.
7165 */
7166
7167 goto CopySuccessful;
7168 }
7169
7170 /*
7171 * Take an object reference, so that we may
7172 * release the map lock(s).
7173 */
7174
7175 assert(src_object != VM_OBJECT_NULL);
7176 vm_object_reference(src_object);
7177
7178 /*
7179 * Record the timestamp for later verification.
7180 * Unlock the map.
7181 */
7182
7183 version.main_timestamp = src_map->timestamp;
7184 vm_map_unlock(src_map); /* Increments timestamp once! */
7185
7186 /*
7187 * Perform the copy
7188 */
7189
7190 if (was_wired) {
7191 CopySlowly:
7192 vm_object_lock(src_object);
7193 result = vm_object_copy_slowly(
7194 src_object,
7195 src_offset,
7196 src_size,
7197 THREAD_UNINT,
7198 &new_entry->object.vm_object);
7199 new_entry->offset = 0;
7200 new_entry->needs_copy = FALSE;
7201
7202 }
7203 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7204 (tmp_entry->is_shared || map_share)) {
7205 vm_object_t new_object;
7206
7207 vm_object_lock_shared(src_object);
7208 new_object = vm_object_copy_delayed(
7209 src_object,
7210 src_offset,
7211 src_size,
7212 TRUE);
7213 if (new_object == VM_OBJECT_NULL)
7214 goto CopySlowly;
7215
7216 new_entry->object.vm_object = new_object;
7217 new_entry->needs_copy = TRUE;
7218 result = KERN_SUCCESS;
7219
7220 } else {
7221 result = vm_object_copy_strategically(src_object,
7222 src_offset,
7223 src_size,
7224 &new_entry->object.vm_object,
7225 &new_entry->offset,
7226 &new_entry_needs_copy);
7227
7228 new_entry->needs_copy = new_entry_needs_copy;
7229 }
7230
7231 if (result != KERN_SUCCESS &&
7232 result != KERN_MEMORY_RESTART_COPY) {
7233 vm_map_lock(src_map);
7234 RETURN(result);
7235 }
7236
7237 /*
7238 * Throw away the extra reference
7239 */
7240
7241 vm_object_deallocate(src_object);
7242
7243 /*
7244 * Verify that the map has not substantially
7245 * changed while the copy was being made.
7246 */
7247
7248 vm_map_lock(src_map);
7249
7250 if ((version.main_timestamp + 1) == src_map->timestamp)
7251 goto VerificationSuccessful;
7252
7253 /*
7254 * Simple version comparison failed.
7255 *
7256 * Retry the lookup and verify that the
7257 * same object/offset are still present.
7258 *
7259 * [Note: a memory manager that colludes with
7260 * the calling task can detect that we have
7261 * cheated. While the map was unlocked, the
7262 * mapping could have been changed and restored.]
7263 */
7264
7265 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7266 RETURN(KERN_INVALID_ADDRESS);
7267 }
7268
7269 src_entry = tmp_entry;
7270 vm_map_clip_start(src_map, src_entry, src_start);
7271
7272 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7273 !use_maxprot) ||
7274 ((src_entry->max_protection & VM_PROT_READ) == 0))
7275 goto VerificationFailed;
7276
7277 if (src_entry->vme_end < new_entry->vme_end)
7278 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7279
7280 if ((src_entry->object.vm_object != src_object) ||
7281 (src_entry->offset != src_offset) ) {
7282
7283 /*
7284 * Verification failed.
7285 *
7286 * Start over with this top-level entry.
7287 */
7288
7289 VerificationFailed: ;
7290
7291 vm_object_deallocate(new_entry->object.vm_object);
7292 tmp_entry = src_entry;
7293 continue;
7294 }
7295
7296 /*
7297 * Verification succeeded.
7298 */
7299
7300 VerificationSuccessful: ;
7301
7302 if (result == KERN_MEMORY_RESTART_COPY)
7303 goto RestartCopy;
7304
7305 /*
7306 * Copy succeeded.
7307 */
7308
7309 CopySuccessful: ;
7310
7311 /*
7312 * Link in the new copy entry.
7313 */
7314
7315 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7316 new_entry);
7317
7318 /*
7319 * Determine whether the entire region
7320 * has been copied.
7321 */
7322 src_base = src_start;
7323 src_start = new_entry->vme_end;
7324 new_entry = VM_MAP_ENTRY_NULL;
7325 while ((src_start >= src_end) && (src_end != 0)) {
7326 if (src_map != base_map) {
7327 submap_map_t *ptr;
7328
7329 ptr = parent_maps;
7330 assert(ptr != NULL);
7331 parent_maps = parent_maps->next;
7332
7333 /* fix up the damage we did in that submap */
7334 vm_map_simplify_range(src_map,
7335 src_base,
7336 src_end);
7337
7338 vm_map_unlock(src_map);
7339 vm_map_deallocate(src_map);
7340 vm_map_lock(ptr->parent_map);
7341 src_map = ptr->parent_map;
7342 src_base = ptr->base_start;
7343 src_start = ptr->base_start + ptr->base_len;
7344 src_end = ptr->base_end;
7345 if ((src_end > src_start) &&
7346 !vm_map_lookup_entry(
7347 src_map, src_start, &tmp_entry))
7348 RETURN(KERN_INVALID_ADDRESS);
7349 kfree(ptr, sizeof(submap_map_t));
7350 if(parent_maps == NULL)
7351 map_share = FALSE;
7352 src_entry = tmp_entry->vme_prev;
7353 } else
7354 break;
7355 }
7356 if ((src_start >= src_end) && (src_end != 0))
7357 break;
7358
7359 /*
7360 * Verify that there are no gaps in the region
7361 */
7362
7363 tmp_entry = src_entry->vme_next;
7364 if ((tmp_entry->vme_start != src_start) ||
7365 (tmp_entry == vm_map_to_entry(src_map)))
7366 RETURN(KERN_INVALID_ADDRESS);
7367 }
7368
7369 /*
7370 * If the source should be destroyed, do it now, since the
7371 * copy was successful.
7372 */
7373 if (src_destroy) {
7374 (void) vm_map_delete(src_map,
7375 vm_map_trunc_page(src_addr),
7376 src_end,
7377 (src_map == kernel_map) ?
7378 VM_MAP_REMOVE_KUNWIRE :
7379 VM_MAP_NO_FLAGS,
7380 VM_MAP_NULL);
7381 } else {
7382 /* fix up the damage we did in the base map */
7383 vm_map_simplify_range(src_map,
7384 vm_map_trunc_page(src_addr),
7385 vm_map_round_page(src_end));
7386 }
7387
7388 vm_map_unlock(src_map);
7389
7390 /* Fix-up start and end points in copy. This is necessary */
7391 /* when the various entries in the copy object were picked */
7392 /* up from different sub-maps */
7393
7394 tmp_entry = vm_map_copy_first_entry(copy);
7395 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7396 tmp_entry->vme_end = copy_addr +
7397 (tmp_entry->vme_end - tmp_entry->vme_start);
7398 tmp_entry->vme_start = copy_addr;
7399 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7400 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7401 }
7402
7403 *copy_result = copy;
7404 return(KERN_SUCCESS);
7405
7406 #undef RETURN
7407 }
7408
7409 /*
7410 * vm_map_copyin_object:
7411 *
7412 * Create a copy object from an object.
7413 * Our caller donates an object reference.
7414 */
7415
7416 kern_return_t
7417 vm_map_copyin_object(
7418 vm_object_t object,
7419 vm_object_offset_t offset, /* offset of region in object */
7420 vm_object_size_t size, /* size of region in object */
7421 vm_map_copy_t *copy_result) /* OUT */
7422 {
7423 vm_map_copy_t copy; /* Resulting copy */
7424
7425 /*
7426 * We drop the object into a special copy object
7427 * that contains the object directly.
7428 */
7429
7430 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7431 copy->type = VM_MAP_COPY_OBJECT;
7432 copy->cpy_object = object;
7433 copy->offset = offset;
7434 copy->size = size;
7435
7436 *copy_result = copy;
7437 return(KERN_SUCCESS);
7438 }
7439
7440 static void
7441 vm_map_fork_share(
7442 vm_map_t old_map,
7443 vm_map_entry_t old_entry,
7444 vm_map_t new_map)
7445 {
7446 vm_object_t object;
7447 vm_map_entry_t new_entry;
7448
7449 /*
7450 * New sharing code. New map entry
7451 * references original object. Internal
7452 * objects use asynchronous copy algorithm for
7453 * future copies. First make sure we have
7454 * the right object. If we need a shadow,
7455 * or someone else already has one, then
7456 * make a new shadow and share it.
7457 */
7458
7459 object = old_entry->object.vm_object;
7460 if (old_entry->is_sub_map) {
7461 assert(old_entry->wired_count == 0);
7462 #ifndef NO_NESTED_PMAP
7463 if(old_entry->use_pmap) {
7464 kern_return_t result;
7465
7466 result = pmap_nest(new_map->pmap,
7467 (old_entry->object.sub_map)->pmap,
7468 (addr64_t)old_entry->vme_start,
7469 (addr64_t)old_entry->vme_start,
7470 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7471 if(result)
7472 panic("vm_map_fork_share: pmap_nest failed!");
7473 }
7474 #endif /* NO_NESTED_PMAP */
7475 } else if (object == VM_OBJECT_NULL) {
7476 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7477 old_entry->vme_start));
7478 old_entry->offset = 0;
7479 old_entry->object.vm_object = object;
7480 assert(!old_entry->needs_copy);
7481 } else if (object->copy_strategy !=
7482 MEMORY_OBJECT_COPY_SYMMETRIC) {
7483
7484 /*
7485 * We are already using an asymmetric
7486 * copy, and therefore we already have
7487 * the right object.
7488 */
7489
7490 assert(! old_entry->needs_copy);
7491 }
7492 else if (old_entry->needs_copy || /* case 1 */
7493 object->shadowed || /* case 2 */
7494 (!object->true_share && /* case 3 */
7495 !old_entry->is_shared &&
7496 (object->size >
7497 (vm_map_size_t)(old_entry->vme_end -
7498 old_entry->vme_start)))) {
7499
7500 /*
7501 * We need to create a shadow.
7502 * There are three cases here.
7503 * In the first case, we need to
7504 * complete a deferred symmetrical
7505 * copy that we participated in.
7506 * In the second and third cases,
7507 * we need to create the shadow so
7508 * that changes that we make to the
7509 * object do not interfere with
7510 * any symmetrical copies which
7511 * have occured (case 2) or which
7512 * might occur (case 3).
7513 *
7514 * The first case is when we had
7515 * deferred shadow object creation
7516 * via the entry->needs_copy mechanism.
7517 * This mechanism only works when
7518 * only one entry points to the source
7519 * object, and we are about to create
7520 * a second entry pointing to the
7521 * same object. The problem is that
7522 * there is no way of mapping from
7523 * an object to the entries pointing
7524 * to it. (Deferred shadow creation
7525 * works with one entry because occurs
7526 * at fault time, and we walk from the
7527 * entry to the object when handling
7528 * the fault.)
7529 *
7530 * The second case is when the object
7531 * to be shared has already been copied
7532 * with a symmetric copy, but we point
7533 * directly to the object without
7534 * needs_copy set in our entry. (This
7535 * can happen because different ranges
7536 * of an object can be pointed to by
7537 * different entries. In particular,
7538 * a single entry pointing to an object
7539 * can be split by a call to vm_inherit,
7540 * which, combined with task_create, can
7541 * result in the different entries
7542 * having different needs_copy values.)
7543 * The shadowed flag in the object allows
7544 * us to detect this case. The problem
7545 * with this case is that if this object
7546 * has or will have shadows, then we
7547 * must not perform an asymmetric copy
7548 * of this object, since such a copy
7549 * allows the object to be changed, which
7550 * will break the previous symmetrical
7551 * copies (which rely upon the object
7552 * not changing). In a sense, the shadowed
7553 * flag says "don't change this object".
7554 * We fix this by creating a shadow
7555 * object for this object, and sharing
7556 * that. This works because we are free
7557 * to change the shadow object (and thus
7558 * to use an asymmetric copy strategy);
7559 * this is also semantically correct,
7560 * since this object is temporary, and
7561 * therefore a copy of the object is
7562 * as good as the object itself. (This
7563 * is not true for permanent objects,
7564 * since the pager needs to see changes,
7565 * which won't happen if the changes
7566 * are made to a copy.)
7567 *
7568 * The third case is when the object
7569 * to be shared has parts sticking
7570 * outside of the entry we're working
7571 * with, and thus may in the future
7572 * be subject to a symmetrical copy.
7573 * (This is a preemptive version of
7574 * case 2.)
7575 */
7576
7577 vm_object_shadow(&old_entry->object.vm_object,
7578 &old_entry->offset,
7579 (vm_map_size_t) (old_entry->vme_end -
7580 old_entry->vme_start));
7581
7582 /*
7583 * If we're making a shadow for other than
7584 * copy on write reasons, then we have
7585 * to remove write permission.
7586 */
7587
7588 if (!old_entry->needs_copy &&
7589 (old_entry->protection & VM_PROT_WRITE)) {
7590 vm_prot_t prot;
7591
7592 prot = old_entry->protection & ~VM_PROT_WRITE;
7593
7594 if (override_nx(old_map, old_entry->alias) && prot)
7595 prot |= VM_PROT_EXECUTE;
7596
7597 if (old_map->mapped) {
7598 vm_object_pmap_protect(
7599 old_entry->object.vm_object,
7600 old_entry->offset,
7601 (old_entry->vme_end -
7602 old_entry->vme_start),
7603 PMAP_NULL,
7604 old_entry->vme_start,
7605 prot);
7606 } else {
7607 pmap_protect(old_map->pmap,
7608 old_entry->vme_start,
7609 old_entry->vme_end,
7610 prot);
7611 }
7612 }
7613
7614 old_entry->needs_copy = FALSE;
7615 object = old_entry->object.vm_object;
7616 }
7617
7618 /*
7619 * If object was using a symmetric copy strategy,
7620 * change its copy strategy to the default
7621 * asymmetric copy strategy, which is copy_delay
7622 * in the non-norma case and copy_call in the
7623 * norma case. Bump the reference count for the
7624 * new entry.
7625 */
7626
7627 if(old_entry->is_sub_map) {
7628 vm_map_lock(old_entry->object.sub_map);
7629 vm_map_reference(old_entry->object.sub_map);
7630 vm_map_unlock(old_entry->object.sub_map);
7631 } else {
7632 vm_object_lock(object);
7633 vm_object_reference_locked(object);
7634 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7635 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7636 }
7637 vm_object_unlock(object);
7638 }
7639
7640 /*
7641 * Clone the entry, using object ref from above.
7642 * Mark both entries as shared.
7643 */
7644
7645 new_entry = vm_map_entry_create(new_map);
7646 vm_map_entry_copy(new_entry, old_entry);
7647 old_entry->is_shared = TRUE;
7648 new_entry->is_shared = TRUE;
7649
7650 /*
7651 * Insert the entry into the new map -- we
7652 * know we're inserting at the end of the new
7653 * map.
7654 */
7655
7656 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7657
7658 /*
7659 * Update the physical map
7660 */
7661
7662 if (old_entry->is_sub_map) {
7663 /* Bill Angell pmap support goes here */
7664 } else {
7665 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7666 old_entry->vme_end - old_entry->vme_start,
7667 old_entry->vme_start);
7668 }
7669 }
7670
7671 static boolean_t
7672 vm_map_fork_copy(
7673 vm_map_t old_map,
7674 vm_map_entry_t *old_entry_p,
7675 vm_map_t new_map)
7676 {
7677 vm_map_entry_t old_entry = *old_entry_p;
7678 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7679 vm_map_offset_t start = old_entry->vme_start;
7680 vm_map_copy_t copy;
7681 vm_map_entry_t last = vm_map_last_entry(new_map);
7682
7683 vm_map_unlock(old_map);
7684 /*
7685 * Use maxprot version of copyin because we
7686 * care about whether this memory can ever
7687 * be accessed, not just whether it's accessible
7688 * right now.
7689 */
7690 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
7691 != KERN_SUCCESS) {
7692 /*
7693 * The map might have changed while it
7694 * was unlocked, check it again. Skip
7695 * any blank space or permanently
7696 * unreadable region.
7697 */
7698 vm_map_lock(old_map);
7699 if (!vm_map_lookup_entry(old_map, start, &last) ||
7700 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
7701 last = last->vme_next;
7702 }
7703 *old_entry_p = last;
7704
7705 /*
7706 * XXX For some error returns, want to
7707 * XXX skip to the next element. Note
7708 * that INVALID_ADDRESS and
7709 * PROTECTION_FAILURE are handled above.
7710 */
7711
7712 return FALSE;
7713 }
7714
7715 /*
7716 * Insert the copy into the new map
7717 */
7718
7719 vm_map_copy_insert(new_map, last, copy);
7720
7721 /*
7722 * Pick up the traversal at the end of
7723 * the copied region.
7724 */
7725
7726 vm_map_lock(old_map);
7727 start += entry_size;
7728 if (! vm_map_lookup_entry(old_map, start, &last)) {
7729 last = last->vme_next;
7730 } else {
7731 if (last->vme_start == start) {
7732 /*
7733 * No need to clip here and we don't
7734 * want to cause any unnecessary
7735 * unnesting...
7736 */
7737 } else {
7738 vm_map_clip_start(old_map, last, start);
7739 }
7740 }
7741 *old_entry_p = last;
7742
7743 return TRUE;
7744 }
7745
7746 /*
7747 * vm_map_fork:
7748 *
7749 * Create and return a new map based on the old
7750 * map, according to the inheritance values on the
7751 * regions in that map.
7752 *
7753 * The source map must not be locked.
7754 */
7755 vm_map_t
7756 vm_map_fork(
7757 vm_map_t old_map)
7758 {
7759 pmap_t new_pmap;
7760 vm_map_t new_map;
7761 vm_map_entry_t old_entry;
7762 vm_map_size_t new_size = 0, entry_size;
7763 vm_map_entry_t new_entry;
7764 boolean_t src_needs_copy;
7765 boolean_t new_entry_needs_copy;
7766
7767 #ifdef __i386__
7768 new_pmap = pmap_create((vm_map_size_t) 0,
7769 old_map->pmap->pm_task_map != TASK_MAP_32BIT);
7770 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
7771 pmap_set_4GB_pagezero(new_pmap);
7772 #else
7773 new_pmap = pmap_create((vm_map_size_t) 0, 0);
7774 #endif
7775
7776 vm_map_reference_swap(old_map);
7777 vm_map_lock(old_map);
7778
7779 new_map = vm_map_create(new_pmap,
7780 old_map->min_offset,
7781 old_map->max_offset,
7782 old_map->hdr.entries_pageable);
7783
7784 for (
7785 old_entry = vm_map_first_entry(old_map);
7786 old_entry != vm_map_to_entry(old_map);
7787 ) {
7788
7789 entry_size = old_entry->vme_end - old_entry->vme_start;
7790
7791 switch (old_entry->inheritance) {
7792 case VM_INHERIT_NONE:
7793 break;
7794
7795 case VM_INHERIT_SHARE:
7796 vm_map_fork_share(old_map, old_entry, new_map);
7797 new_size += entry_size;
7798 break;
7799
7800 case VM_INHERIT_COPY:
7801
7802 /*
7803 * Inline the copy_quickly case;
7804 * upon failure, fall back on call
7805 * to vm_map_fork_copy.
7806 */
7807
7808 if(old_entry->is_sub_map)
7809 break;
7810 if ((old_entry->wired_count != 0) ||
7811 ((old_entry->object.vm_object != NULL) &&
7812 (old_entry->object.vm_object->true_share))) {
7813 goto slow_vm_map_fork_copy;
7814 }
7815
7816 new_entry = vm_map_entry_create(new_map);
7817 vm_map_entry_copy(new_entry, old_entry);
7818 /* clear address space specifics */
7819 new_entry->use_pmap = FALSE;
7820
7821 if (! vm_object_copy_quickly(
7822 &new_entry->object.vm_object,
7823 old_entry->offset,
7824 (old_entry->vme_end -
7825 old_entry->vme_start),
7826 &src_needs_copy,
7827 &new_entry_needs_copy)) {
7828 vm_map_entry_dispose(new_map, new_entry);
7829 goto slow_vm_map_fork_copy;
7830 }
7831
7832 /*
7833 * Handle copy-on-write obligations
7834 */
7835
7836 if (src_needs_copy && !old_entry->needs_copy) {
7837 vm_prot_t prot;
7838
7839 prot = old_entry->protection & ~VM_PROT_WRITE;
7840
7841 if (override_nx(old_map, old_entry->alias) && prot)
7842 prot |= VM_PROT_EXECUTE;
7843
7844 vm_object_pmap_protect(
7845 old_entry->object.vm_object,
7846 old_entry->offset,
7847 (old_entry->vme_end -
7848 old_entry->vme_start),
7849 ((old_entry->is_shared
7850 || old_map->mapped)
7851 ? PMAP_NULL :
7852 old_map->pmap),
7853 old_entry->vme_start,
7854 prot);
7855
7856 old_entry->needs_copy = TRUE;
7857 }
7858 new_entry->needs_copy = new_entry_needs_copy;
7859
7860 /*
7861 * Insert the entry at the end
7862 * of the map.
7863 */
7864
7865 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7866 new_entry);
7867 new_size += entry_size;
7868 break;
7869
7870 slow_vm_map_fork_copy:
7871 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7872 new_size += entry_size;
7873 }
7874 continue;
7875 }
7876 old_entry = old_entry->vme_next;
7877 }
7878
7879 new_map->size = new_size;
7880 vm_map_unlock(old_map);
7881 vm_map_deallocate(old_map);
7882
7883 return(new_map);
7884 }
7885
7886 /*
7887 * vm_map_exec:
7888 *
7889 * Setup the "new_map" with the proper execution environment according
7890 * to the type of executable (platform, 64bit, chroot environment).
7891 * Map the comm page and shared region, etc...
7892 */
7893 kern_return_t
7894 vm_map_exec(
7895 vm_map_t new_map,
7896 task_t task,
7897 void *fsroot,
7898 cpu_type_t cpu)
7899 {
7900 SHARED_REGION_TRACE_DEBUG(
7901 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
7902 current_task(), new_map, task, fsroot, cpu));
7903 (void) vm_commpage_enter(new_map, task);
7904 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
7905 SHARED_REGION_TRACE_DEBUG(
7906 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
7907 current_task(), new_map, task, fsroot, cpu));
7908 return KERN_SUCCESS;
7909 }
7910
7911 /*
7912 * vm_map_lookup_locked:
7913 *
7914 * Finds the VM object, offset, and
7915 * protection for a given virtual address in the
7916 * specified map, assuming a page fault of the
7917 * type specified.
7918 *
7919 * Returns the (object, offset, protection) for
7920 * this address, whether it is wired down, and whether
7921 * this map has the only reference to the data in question.
7922 * In order to later verify this lookup, a "version"
7923 * is returned.
7924 *
7925 * The map MUST be locked by the caller and WILL be
7926 * locked on exit. In order to guarantee the
7927 * existence of the returned object, it is returned
7928 * locked.
7929 *
7930 * If a lookup is requested with "write protection"
7931 * specified, the map may be changed to perform virtual
7932 * copying operations, although the data referenced will
7933 * remain the same.
7934 */
7935 kern_return_t
7936 vm_map_lookup_locked(
7937 vm_map_t *var_map, /* IN/OUT */
7938 vm_map_offset_t vaddr,
7939 vm_prot_t fault_type,
7940 int object_lock_type,
7941 vm_map_version_t *out_version, /* OUT */
7942 vm_object_t *object, /* OUT */
7943 vm_object_offset_t *offset, /* OUT */
7944 vm_prot_t *out_prot, /* OUT */
7945 boolean_t *wired, /* OUT */
7946 vm_object_fault_info_t fault_info, /* OUT */
7947 vm_map_t *real_map)
7948 {
7949 vm_map_entry_t entry;
7950 register vm_map_t map = *var_map;
7951 vm_map_t old_map = *var_map;
7952 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
7953 vm_map_offset_t cow_parent_vaddr = 0;
7954 vm_map_offset_t old_start = 0;
7955 vm_map_offset_t old_end = 0;
7956 register vm_prot_t prot;
7957
7958 *real_map = map;
7959 RetryLookup: ;
7960
7961 /*
7962 * If the map has an interesting hint, try it before calling
7963 * full blown lookup routine.
7964 */
7965 entry = map->hint;
7966
7967 if ((entry == vm_map_to_entry(map)) ||
7968 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7969 vm_map_entry_t tmp_entry;
7970
7971 /*
7972 * Entry was either not a valid hint, or the vaddr
7973 * was not contained in the entry, so do a full lookup.
7974 */
7975 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7976 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7977 vm_map_unlock(cow_sub_map_parent);
7978 if((*real_map != map)
7979 && (*real_map != cow_sub_map_parent))
7980 vm_map_unlock(*real_map);
7981 return KERN_INVALID_ADDRESS;
7982 }
7983
7984 entry = tmp_entry;
7985 }
7986 if(map == old_map) {
7987 old_start = entry->vme_start;
7988 old_end = entry->vme_end;
7989 }
7990
7991 /*
7992 * Handle submaps. Drop lock on upper map, submap is
7993 * returned locked.
7994 */
7995
7996 submap_recurse:
7997 if (entry->is_sub_map) {
7998 vm_map_offset_t local_vaddr;
7999 vm_map_offset_t end_delta;
8000 vm_map_offset_t start_delta;
8001 vm_map_entry_t submap_entry;
8002 boolean_t mapped_needs_copy=FALSE;
8003
8004 local_vaddr = vaddr;
8005
8006 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8007 /* if real_map equals map we unlock below */
8008 if ((*real_map != map) &&
8009 (*real_map != cow_sub_map_parent))
8010 vm_map_unlock(*real_map);
8011 *real_map = entry->object.sub_map;
8012 }
8013
8014 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8015 if (!mapped_needs_copy) {
8016 if (vm_map_lock_read_to_write(map)) {
8017 vm_map_lock_read(map);
8018 /* XXX FBDP: entry still valid ? */
8019 if(*real_map == entry->object.sub_map)
8020 *real_map = map;
8021 goto RetryLookup;
8022 }
8023 vm_map_lock_read(entry->object.sub_map);
8024 cow_sub_map_parent = map;
8025 /* reset base to map before cow object */
8026 /* this is the map which will accept */
8027 /* the new cow object */
8028 old_start = entry->vme_start;
8029 old_end = entry->vme_end;
8030 cow_parent_vaddr = vaddr;
8031 mapped_needs_copy = TRUE;
8032 } else {
8033 vm_map_lock_read(entry->object.sub_map);
8034 if((cow_sub_map_parent != map) &&
8035 (*real_map != map))
8036 vm_map_unlock(map);
8037 }
8038 } else {
8039 vm_map_lock_read(entry->object.sub_map);
8040 /* leave map locked if it is a target */
8041 /* cow sub_map above otherwise, just */
8042 /* follow the maps down to the object */
8043 /* here we unlock knowing we are not */
8044 /* revisiting the map. */
8045 if((*real_map != map) && (map != cow_sub_map_parent))
8046 vm_map_unlock_read(map);
8047 }
8048
8049 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8050 *var_map = map = entry->object.sub_map;
8051
8052 /* calculate the offset in the submap for vaddr */
8053 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8054
8055 RetrySubMap:
8056 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8057 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8058 vm_map_unlock(cow_sub_map_parent);
8059 }
8060 if((*real_map != map)
8061 && (*real_map != cow_sub_map_parent)) {
8062 vm_map_unlock(*real_map);
8063 }
8064 *real_map = map;
8065 return KERN_INVALID_ADDRESS;
8066 }
8067
8068 /* find the attenuated shadow of the underlying object */
8069 /* on our target map */
8070
8071 /* in english the submap object may extend beyond the */
8072 /* region mapped by the entry or, may only fill a portion */
8073 /* of it. For our purposes, we only care if the object */
8074 /* doesn't fill. In this case the area which will */
8075 /* ultimately be clipped in the top map will only need */
8076 /* to be as big as the portion of the underlying entry */
8077 /* which is mapped */
8078 start_delta = submap_entry->vme_start > entry->offset ?
8079 submap_entry->vme_start - entry->offset : 0;
8080
8081 end_delta =
8082 (entry->offset + start_delta + (old_end - old_start)) <=
8083 submap_entry->vme_end ?
8084 0 : (entry->offset +
8085 (old_end - old_start))
8086 - submap_entry->vme_end;
8087
8088 old_start += start_delta;
8089 old_end -= end_delta;
8090
8091 if(submap_entry->is_sub_map) {
8092 entry = submap_entry;
8093 vaddr = local_vaddr;
8094 goto submap_recurse;
8095 }
8096
8097 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8098
8099 vm_object_t sub_object, copy_object;
8100 vm_object_offset_t copy_offset;
8101 vm_map_offset_t local_start;
8102 vm_map_offset_t local_end;
8103 boolean_t copied_slowly = FALSE;
8104
8105 if (vm_map_lock_read_to_write(map)) {
8106 vm_map_lock_read(map);
8107 old_start -= start_delta;
8108 old_end += end_delta;
8109 goto RetrySubMap;
8110 }
8111
8112
8113 sub_object = submap_entry->object.vm_object;
8114 if (sub_object == VM_OBJECT_NULL) {
8115 sub_object =
8116 vm_object_allocate(
8117 (vm_map_size_t)
8118 (submap_entry->vme_end -
8119 submap_entry->vme_start));
8120 submap_entry->object.vm_object = sub_object;
8121 submap_entry->offset = 0;
8122 }
8123 local_start = local_vaddr -
8124 (cow_parent_vaddr - old_start);
8125 local_end = local_vaddr +
8126 (old_end - cow_parent_vaddr);
8127 vm_map_clip_start(map, submap_entry, local_start);
8128 vm_map_clip_end(map, submap_entry, local_end);
8129 /* unnesting was done in vm_map_clip_start/end() */
8130 assert(!submap_entry->use_pmap);
8131
8132 /* This is the COW case, lets connect */
8133 /* an entry in our space to the underlying */
8134 /* object in the submap, bypassing the */
8135 /* submap. */
8136
8137
8138 if(submap_entry->wired_count != 0 ||
8139 (sub_object->copy_strategy ==
8140 MEMORY_OBJECT_COPY_NONE)) {
8141 vm_object_lock(sub_object);
8142 vm_object_copy_slowly(sub_object,
8143 submap_entry->offset,
8144 (submap_entry->vme_end -
8145 submap_entry->vme_start),
8146 FALSE,
8147 &copy_object);
8148 copied_slowly = TRUE;
8149 } else {
8150
8151 /* set up shadow object */
8152 copy_object = sub_object;
8153 vm_object_reference(copy_object);
8154 sub_object->shadowed = TRUE;
8155 submap_entry->needs_copy = TRUE;
8156
8157 prot = submap_entry->protection & ~VM_PROT_WRITE;
8158
8159 if (override_nx(map, submap_entry->alias) && prot)
8160 prot |= VM_PROT_EXECUTE;
8161
8162 vm_object_pmap_protect(
8163 sub_object,
8164 submap_entry->offset,
8165 submap_entry->vme_end -
8166 submap_entry->vme_start,
8167 (submap_entry->is_shared
8168 || map->mapped) ?
8169 PMAP_NULL : map->pmap,
8170 submap_entry->vme_start,
8171 prot);
8172 }
8173
8174 /*
8175 * Adjust the fault offset to the submap entry.
8176 */
8177 copy_offset = (local_vaddr -
8178 submap_entry->vme_start +
8179 submap_entry->offset);
8180
8181 /* This works diffently than the */
8182 /* normal submap case. We go back */
8183 /* to the parent of the cow map and*/
8184 /* clip out the target portion of */
8185 /* the sub_map, substituting the */
8186 /* new copy object, */
8187
8188 vm_map_unlock(map);
8189 local_start = old_start;
8190 local_end = old_end;
8191 map = cow_sub_map_parent;
8192 *var_map = cow_sub_map_parent;
8193 vaddr = cow_parent_vaddr;
8194 cow_sub_map_parent = NULL;
8195
8196 if(!vm_map_lookup_entry(map,
8197 vaddr, &entry)) {
8198 vm_object_deallocate(
8199 copy_object);
8200 vm_map_lock_write_to_read(map);
8201 return KERN_INVALID_ADDRESS;
8202 }
8203
8204 /* clip out the portion of space */
8205 /* mapped by the sub map which */
8206 /* corresponds to the underlying */
8207 /* object */
8208
8209 /*
8210 * Clip (and unnest) the smallest nested chunk
8211 * possible around the faulting address...
8212 */
8213 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8214 local_end = local_start + pmap_nesting_size_min;
8215 /*
8216 * ... but don't go beyond the "old_start" to "old_end"
8217 * range, to avoid spanning over another VM region
8218 * with a possibly different VM object and/or offset.
8219 */
8220 if (local_start < old_start) {
8221 local_start = old_start;
8222 }
8223 if (local_end > old_end) {
8224 local_end = old_end;
8225 }
8226 /*
8227 * Adjust copy_offset to the start of the range.
8228 */
8229 copy_offset -= (vaddr - local_start);
8230
8231 vm_map_clip_start(map, entry, local_start);
8232 vm_map_clip_end(map, entry, local_end);
8233 /* unnesting was done in vm_map_clip_start/end() */
8234 assert(!entry->use_pmap);
8235
8236 /* substitute copy object for */
8237 /* shared map entry */
8238 vm_map_deallocate(entry->object.sub_map);
8239 entry->is_sub_map = FALSE;
8240 entry->object.vm_object = copy_object;
8241
8242 /* propagate the submap entry's protections */
8243 entry->protection |= submap_entry->protection;
8244 entry->max_protection |= submap_entry->max_protection;
8245
8246 if(copied_slowly) {
8247 entry->offset = local_start - old_start;
8248 entry->needs_copy = FALSE;
8249 entry->is_shared = FALSE;
8250 } else {
8251 entry->offset = copy_offset;
8252 entry->needs_copy = TRUE;
8253 if(entry->inheritance == VM_INHERIT_SHARE)
8254 entry->inheritance = VM_INHERIT_COPY;
8255 if (map != old_map)
8256 entry->is_shared = TRUE;
8257 }
8258 if(entry->inheritance == VM_INHERIT_SHARE)
8259 entry->inheritance = VM_INHERIT_COPY;
8260
8261 vm_map_lock_write_to_read(map);
8262 } else {
8263 if((cow_sub_map_parent)
8264 && (cow_sub_map_parent != *real_map)
8265 && (cow_sub_map_parent != map)) {
8266 vm_map_unlock(cow_sub_map_parent);
8267 }
8268 entry = submap_entry;
8269 vaddr = local_vaddr;
8270 }
8271 }
8272
8273 /*
8274 * Check whether this task is allowed to have
8275 * this page.
8276 */
8277
8278 prot = entry->protection;
8279
8280 if (override_nx(map, entry->alias) && prot) {
8281 /*
8282 * HACK -- if not a stack, then allow execution
8283 */
8284 prot |= VM_PROT_EXECUTE;
8285 }
8286
8287 if ((fault_type & (prot)) != fault_type) {
8288 if (*real_map != map) {
8289 vm_map_unlock(*real_map);
8290 }
8291 *real_map = map;
8292
8293 if ((fault_type & VM_PROT_EXECUTE) && prot)
8294 log_stack_execution_failure((addr64_t)vaddr, prot);
8295
8296 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8297 return KERN_PROTECTION_FAILURE;
8298 }
8299
8300 /*
8301 * If this page is not pageable, we have to get
8302 * it for all possible accesses.
8303 */
8304
8305 *wired = (entry->wired_count != 0);
8306 if (*wired)
8307 fault_type = prot;
8308
8309 /*
8310 * If the entry was copy-on-write, we either ...
8311 */
8312
8313 if (entry->needs_copy) {
8314 /*
8315 * If we want to write the page, we may as well
8316 * handle that now since we've got the map locked.
8317 *
8318 * If we don't need to write the page, we just
8319 * demote the permissions allowed.
8320 */
8321
8322 if ((fault_type & VM_PROT_WRITE) || *wired) {
8323 /*
8324 * Make a new object, and place it in the
8325 * object chain. Note that no new references
8326 * have appeared -- one just moved from the
8327 * map to the new object.
8328 */
8329
8330 if (vm_map_lock_read_to_write(map)) {
8331 vm_map_lock_read(map);
8332 goto RetryLookup;
8333 }
8334 vm_object_shadow(&entry->object.vm_object,
8335 &entry->offset,
8336 (vm_map_size_t) (entry->vme_end -
8337 entry->vme_start));
8338
8339 entry->object.vm_object->shadowed = TRUE;
8340 entry->needs_copy = FALSE;
8341 vm_map_lock_write_to_read(map);
8342 }
8343 else {
8344 /*
8345 * We're attempting to read a copy-on-write
8346 * page -- don't allow writes.
8347 */
8348
8349 prot &= (~VM_PROT_WRITE);
8350 }
8351 }
8352
8353 /*
8354 * Create an object if necessary.
8355 */
8356 if (entry->object.vm_object == VM_OBJECT_NULL) {
8357
8358 if (vm_map_lock_read_to_write(map)) {
8359 vm_map_lock_read(map);
8360 goto RetryLookup;
8361 }
8362
8363 entry->object.vm_object = vm_object_allocate(
8364 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8365 entry->offset = 0;
8366 vm_map_lock_write_to_read(map);
8367 }
8368
8369 /*
8370 * Return the object/offset from this entry. If the entry
8371 * was copy-on-write or empty, it has been fixed up. Also
8372 * return the protection.
8373 */
8374
8375 *offset = (vaddr - entry->vme_start) + entry->offset;
8376 *object = entry->object.vm_object;
8377 *out_prot = prot;
8378
8379 if (fault_info) {
8380 fault_info->interruptible = THREAD_UNINT; /* for now... */
8381 /* ... the caller will change "interruptible" if needed */
8382 fault_info->cluster_size = 0;
8383 fault_info->user_tag = entry->alias;
8384 fault_info->behavior = entry->behavior;
8385 fault_info->lo_offset = entry->offset;
8386 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8387 fault_info->no_cache = entry->no_cache;
8388 }
8389
8390 /*
8391 * Lock the object to prevent it from disappearing
8392 */
8393 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8394 vm_object_lock(*object);
8395 else
8396 vm_object_lock_shared(*object);
8397
8398 /*
8399 * Save the version number
8400 */
8401
8402 out_version->main_timestamp = map->timestamp;
8403
8404 return KERN_SUCCESS;
8405 }
8406
8407
8408 /*
8409 * vm_map_verify:
8410 *
8411 * Verifies that the map in question has not changed
8412 * since the given version. If successful, the map
8413 * will not change until vm_map_verify_done() is called.
8414 */
8415 boolean_t
8416 vm_map_verify(
8417 register vm_map_t map,
8418 register vm_map_version_t *version) /* REF */
8419 {
8420 boolean_t result;
8421
8422 vm_map_lock_read(map);
8423 result = (map->timestamp == version->main_timestamp);
8424
8425 if (!result)
8426 vm_map_unlock_read(map);
8427
8428 return(result);
8429 }
8430
8431 /*
8432 * vm_map_verify_done:
8433 *
8434 * Releases locks acquired by a vm_map_verify.
8435 *
8436 * This is now a macro in vm/vm_map.h. It does a
8437 * vm_map_unlock_read on the map.
8438 */
8439
8440
8441 /*
8442 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8443 * Goes away after regular vm_region_recurse function migrates to
8444 * 64 bits
8445 * vm_region_recurse: A form of vm_region which follows the
8446 * submaps in a target map
8447 *
8448 */
8449
8450 kern_return_t
8451 vm_map_region_recurse_64(
8452 vm_map_t map,
8453 vm_map_offset_t *address, /* IN/OUT */
8454 vm_map_size_t *size, /* OUT */
8455 natural_t *nesting_depth, /* IN/OUT */
8456 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8457 mach_msg_type_number_t *count) /* IN/OUT */
8458 {
8459 vm_region_extended_info_data_t extended;
8460 vm_map_entry_t tmp_entry;
8461 vm_map_offset_t user_address;
8462 unsigned int user_max_depth;
8463
8464 /*
8465 * "curr_entry" is the VM map entry preceding or including the
8466 * address we're looking for.
8467 * "curr_map" is the map or sub-map containing "curr_entry".
8468 * "curr_offset" is the cumulated offset of "curr_map" in the
8469 * target task's address space.
8470 * "curr_depth" is the depth of "curr_map" in the chain of
8471 * sub-maps.
8472 * "curr_max_offset" is the maximum offset we should take into
8473 * account in the current map. It may be smaller than the current
8474 * map's "max_offset" because we might not have mapped it all in
8475 * the upper level map.
8476 */
8477 vm_map_entry_t curr_entry;
8478 vm_map_offset_t curr_offset;
8479 vm_map_t curr_map;
8480 unsigned int curr_depth;
8481 vm_map_offset_t curr_max_offset;
8482
8483 /*
8484 * "next_" is the same as "curr_" but for the VM region immediately
8485 * after the address we're looking for. We need to keep track of this
8486 * too because we want to return info about that region if the
8487 * address we're looking for is not mapped.
8488 */
8489 vm_map_entry_t next_entry;
8490 vm_map_offset_t next_offset;
8491 vm_map_t next_map;
8492 unsigned int next_depth;
8493 vm_map_offset_t next_max_offset;
8494
8495 boolean_t look_for_pages;
8496 vm_region_submap_short_info_64_t short_info;
8497
8498 if (map == VM_MAP_NULL) {
8499 /* no address space to work on */
8500 return KERN_INVALID_ARGUMENT;
8501 }
8502
8503 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8504 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8505 /*
8506 * "info" structure is not big enough and
8507 * would overflow
8508 */
8509 return KERN_INVALID_ARGUMENT;
8510 } else {
8511 look_for_pages = FALSE;
8512 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8513 short_info = (vm_region_submap_short_info_64_t) submap_info;
8514 submap_info = NULL;
8515 }
8516 } else {
8517 look_for_pages = TRUE;
8518 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8519 short_info = NULL;
8520 }
8521
8522
8523 user_address = *address;
8524 user_max_depth = *nesting_depth;
8525
8526 curr_entry = NULL;
8527 curr_map = map;
8528 curr_offset = 0;
8529 curr_depth = 0;
8530 curr_max_offset = curr_map->max_offset;
8531
8532 next_entry = NULL;
8533 next_map = NULL;
8534 next_offset = 0;
8535 next_depth = 0;
8536 next_max_offset = curr_max_offset;
8537
8538 if (not_in_kdp) {
8539 vm_map_lock_read(curr_map);
8540 }
8541
8542 for (;;) {
8543 if (vm_map_lookup_entry(curr_map,
8544 user_address - curr_offset,
8545 &tmp_entry)) {
8546 /* tmp_entry contains the address we're looking for */
8547 curr_entry = tmp_entry;
8548 } else {
8549 /*
8550 * The address is not mapped. "tmp_entry" is the
8551 * map entry preceding the address. We want the next
8552 * one, if it exists.
8553 */
8554 curr_entry = tmp_entry->vme_next;
8555 if (curr_entry == vm_map_to_entry(curr_map) ||
8556 curr_entry->vme_start >= curr_max_offset) {
8557 /* no next entry at this level: stop looking */
8558 if (not_in_kdp) {
8559 vm_map_unlock_read(curr_map);
8560 }
8561 curr_entry = NULL;
8562 curr_map = NULL;
8563 curr_offset = 0;
8564 curr_depth = 0;
8565 curr_max_offset = 0;
8566 break;
8567 }
8568 }
8569
8570 /*
8571 * Is the next entry at this level closer to the address (or
8572 * deeper in the submap chain) than the one we had
8573 * so far ?
8574 */
8575 tmp_entry = curr_entry->vme_next;
8576 if (tmp_entry == vm_map_to_entry(curr_map)) {
8577 /* no next entry at this level */
8578 } else if (tmp_entry->vme_start >= curr_max_offset) {
8579 /*
8580 * tmp_entry is beyond the scope of what we mapped of
8581 * this submap in the upper level: ignore it.
8582 */
8583 } else if ((next_entry == NULL) ||
8584 (tmp_entry->vme_start + curr_offset <=
8585 next_entry->vme_start + next_offset)) {
8586 /*
8587 * We didn't have a "next_entry" or this one is
8588 * closer to the address we're looking for:
8589 * use this "tmp_entry" as the new "next_entry".
8590 */
8591 if (next_entry != NULL) {
8592 /* unlock the last "next_map" */
8593 if (next_map != curr_map && not_in_kdp) {
8594 vm_map_unlock_read(next_map);
8595 }
8596 }
8597 next_entry = tmp_entry;
8598 next_map = curr_map;
8599 next_offset = curr_offset;
8600 next_depth = curr_depth;
8601 next_max_offset = curr_max_offset;
8602 }
8603
8604 if (!curr_entry->is_sub_map ||
8605 curr_depth >= user_max_depth) {
8606 /*
8607 * We hit a leaf map or we reached the maximum depth
8608 * we could, so stop looking. Keep the current map
8609 * locked.
8610 */
8611 break;
8612 }
8613
8614 /*
8615 * Get down to the next submap level.
8616 */
8617
8618 /*
8619 * Lock the next level and unlock the current level,
8620 * unless we need to keep it locked to access the "next_entry"
8621 * later.
8622 */
8623 if (not_in_kdp) {
8624 vm_map_lock_read(curr_entry->object.sub_map);
8625 }
8626 if (curr_map == next_map) {
8627 /* keep "next_map" locked in case we need it */
8628 } else {
8629 /* release this map */
8630 vm_map_unlock_read(curr_map);
8631 }
8632
8633 /*
8634 * Adjust the offset. "curr_entry" maps the submap
8635 * at relative address "curr_entry->vme_start" in the
8636 * curr_map but skips the first "curr_entry->offset"
8637 * bytes of the submap.
8638 * "curr_offset" always represents the offset of a virtual
8639 * address in the curr_map relative to the absolute address
8640 * space (i.e. the top-level VM map).
8641 */
8642 curr_offset +=
8643 (curr_entry->vme_start - curr_entry->offset);
8644 /* switch to the submap */
8645 curr_map = curr_entry->object.sub_map;
8646 curr_depth++;
8647 /*
8648 * "curr_max_offset" allows us to keep track of the
8649 * portion of the submap that is actually mapped at this level:
8650 * the rest of that submap is irrelevant to us, since it's not
8651 * mapped here.
8652 * The relevant portion of the map starts at
8653 * "curr_entry->offset" up to the size of "curr_entry".
8654 */
8655 curr_max_offset =
8656 curr_entry->vme_end - curr_entry->vme_start +
8657 curr_entry->offset;
8658 curr_entry = NULL;
8659 }
8660
8661 if (curr_entry == NULL) {
8662 /* no VM region contains the address... */
8663 if (next_entry == NULL) {
8664 /* ... and no VM region follows it either */
8665 return KERN_INVALID_ADDRESS;
8666 }
8667 /* ... gather info about the next VM region */
8668 curr_entry = next_entry;
8669 curr_map = next_map; /* still locked ... */
8670 curr_offset = next_offset;
8671 curr_depth = next_depth;
8672 curr_max_offset = next_max_offset;
8673 } else {
8674 /* we won't need "next_entry" after all */
8675 if (next_entry != NULL) {
8676 /* release "next_map" */
8677 if (next_map != curr_map && not_in_kdp) {
8678 vm_map_unlock_read(next_map);
8679 }
8680 }
8681 }
8682 next_entry = NULL;
8683 next_map = NULL;
8684 next_offset = 0;
8685 next_depth = 0;
8686 next_max_offset = 0;
8687
8688 *nesting_depth = curr_depth;
8689 *size = curr_entry->vme_end - curr_entry->vme_start;
8690 *address = curr_entry->vme_start + curr_offset;
8691
8692 if (look_for_pages) {
8693 submap_info->user_tag = curr_entry->alias;
8694 submap_info->offset = curr_entry->offset;
8695 submap_info->protection = curr_entry->protection;
8696 submap_info->inheritance = curr_entry->inheritance;
8697 submap_info->max_protection = curr_entry->max_protection;
8698 submap_info->behavior = curr_entry->behavior;
8699 submap_info->user_wired_count = curr_entry->user_wired_count;
8700 submap_info->is_submap = curr_entry->is_sub_map;
8701 submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
8702 } else {
8703 short_info->user_tag = curr_entry->alias;
8704 short_info->offset = curr_entry->offset;
8705 short_info->protection = curr_entry->protection;
8706 short_info->inheritance = curr_entry->inheritance;
8707 short_info->max_protection = curr_entry->max_protection;
8708 short_info->behavior = curr_entry->behavior;
8709 short_info->user_wired_count = curr_entry->user_wired_count;
8710 short_info->is_submap = curr_entry->is_sub_map;
8711 short_info->object_id = (uint32_t) curr_entry->object.vm_object;
8712 }
8713
8714 extended.pages_resident = 0;
8715 extended.pages_swapped_out = 0;
8716 extended.pages_shared_now_private = 0;
8717 extended.pages_dirtied = 0;
8718 extended.external_pager = 0;
8719 extended.shadow_depth = 0;
8720
8721 if (not_in_kdp) {
8722 if (!curr_entry->is_sub_map) {
8723 vm_map_region_walk(curr_map,
8724 curr_entry->vme_start,
8725 curr_entry,
8726 curr_entry->offset,
8727 (curr_entry->vme_end -
8728 curr_entry->vme_start),
8729 &extended,
8730 look_for_pages);
8731 if (extended.external_pager &&
8732 extended.ref_count == 2 &&
8733 extended.share_mode == SM_SHARED) {
8734 extended.share_mode = SM_PRIVATE;
8735 }
8736 } else {
8737 if (curr_entry->use_pmap) {
8738 extended.share_mode = SM_TRUESHARED;
8739 } else {
8740 extended.share_mode = SM_PRIVATE;
8741 }
8742 extended.ref_count =
8743 curr_entry->object.sub_map->ref_count;
8744 }
8745 }
8746
8747 if (look_for_pages) {
8748 submap_info->pages_resident = extended.pages_resident;
8749 submap_info->pages_swapped_out = extended.pages_swapped_out;
8750 submap_info->pages_shared_now_private =
8751 extended.pages_shared_now_private;
8752 submap_info->pages_dirtied = extended.pages_dirtied;
8753 submap_info->external_pager = extended.external_pager;
8754 submap_info->shadow_depth = extended.shadow_depth;
8755 submap_info->share_mode = extended.share_mode;
8756 submap_info->ref_count = extended.ref_count;
8757 } else {
8758 short_info->external_pager = extended.external_pager;
8759 short_info->shadow_depth = extended.shadow_depth;
8760 short_info->share_mode = extended.share_mode;
8761 short_info->ref_count = extended.ref_count;
8762 }
8763
8764 if (not_in_kdp) {
8765 vm_map_unlock_read(curr_map);
8766 }
8767
8768 return KERN_SUCCESS;
8769 }
8770
8771 /*
8772 * vm_region:
8773 *
8774 * User call to obtain information about a region in
8775 * a task's address map. Currently, only one flavor is
8776 * supported.
8777 *
8778 * XXX The reserved and behavior fields cannot be filled
8779 * in until the vm merge from the IK is completed, and
8780 * vm_reserve is implemented.
8781 */
8782
8783 kern_return_t
8784 vm_map_region(
8785 vm_map_t map,
8786 vm_map_offset_t *address, /* IN/OUT */
8787 vm_map_size_t *size, /* OUT */
8788 vm_region_flavor_t flavor, /* IN */
8789 vm_region_info_t info, /* OUT */
8790 mach_msg_type_number_t *count, /* IN/OUT */
8791 mach_port_t *object_name) /* OUT */
8792 {
8793 vm_map_entry_t tmp_entry;
8794 vm_map_entry_t entry;
8795 vm_map_offset_t start;
8796
8797 if (map == VM_MAP_NULL)
8798 return(KERN_INVALID_ARGUMENT);
8799
8800 switch (flavor) {
8801
8802 case VM_REGION_BASIC_INFO:
8803 /* legacy for old 32-bit objects info */
8804 {
8805 vm_region_basic_info_t basic;
8806
8807 if (*count < VM_REGION_BASIC_INFO_COUNT)
8808 return(KERN_INVALID_ARGUMENT);
8809
8810 basic = (vm_region_basic_info_t) info;
8811 *count = VM_REGION_BASIC_INFO_COUNT;
8812
8813 vm_map_lock_read(map);
8814
8815 start = *address;
8816 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8817 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8818 vm_map_unlock_read(map);
8819 return(KERN_INVALID_ADDRESS);
8820 }
8821 } else {
8822 entry = tmp_entry;
8823 }
8824
8825 start = entry->vme_start;
8826
8827 basic->offset = (uint32_t)entry->offset;
8828 basic->protection = entry->protection;
8829 basic->inheritance = entry->inheritance;
8830 basic->max_protection = entry->max_protection;
8831 basic->behavior = entry->behavior;
8832 basic->user_wired_count = entry->user_wired_count;
8833 basic->reserved = entry->is_sub_map;
8834 *address = start;
8835 *size = (entry->vme_end - start);
8836
8837 if (object_name) *object_name = IP_NULL;
8838 if (entry->is_sub_map) {
8839 basic->shared = FALSE;
8840 } else {
8841 basic->shared = entry->is_shared;
8842 }
8843
8844 vm_map_unlock_read(map);
8845 return(KERN_SUCCESS);
8846 }
8847
8848 case VM_REGION_BASIC_INFO_64:
8849 {
8850 vm_region_basic_info_64_t basic;
8851
8852 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
8853 return(KERN_INVALID_ARGUMENT);
8854
8855 basic = (vm_region_basic_info_64_t) info;
8856 *count = VM_REGION_BASIC_INFO_COUNT_64;
8857
8858 vm_map_lock_read(map);
8859
8860 start = *address;
8861 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8862 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8863 vm_map_unlock_read(map);
8864 return(KERN_INVALID_ADDRESS);
8865 }
8866 } else {
8867 entry = tmp_entry;
8868 }
8869
8870 start = entry->vme_start;
8871
8872 basic->offset = entry->offset;
8873 basic->protection = entry->protection;
8874 basic->inheritance = entry->inheritance;
8875 basic->max_protection = entry->max_protection;
8876 basic->behavior = entry->behavior;
8877 basic->user_wired_count = entry->user_wired_count;
8878 basic->reserved = entry->is_sub_map;
8879 *address = start;
8880 *size = (entry->vme_end - start);
8881
8882 if (object_name) *object_name = IP_NULL;
8883 if (entry->is_sub_map) {
8884 basic->shared = FALSE;
8885 } else {
8886 basic->shared = entry->is_shared;
8887 }
8888
8889 vm_map_unlock_read(map);
8890 return(KERN_SUCCESS);
8891 }
8892 case VM_REGION_EXTENDED_INFO:
8893 {
8894 vm_region_extended_info_t extended;
8895
8896 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
8897 return(KERN_INVALID_ARGUMENT);
8898
8899 extended = (vm_region_extended_info_t) info;
8900 *count = VM_REGION_EXTENDED_INFO_COUNT;
8901
8902 vm_map_lock_read(map);
8903
8904 start = *address;
8905 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8906 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8907 vm_map_unlock_read(map);
8908 return(KERN_INVALID_ADDRESS);
8909 }
8910 } else {
8911 entry = tmp_entry;
8912 }
8913 start = entry->vme_start;
8914
8915 extended->protection = entry->protection;
8916 extended->user_tag = entry->alias;
8917 extended->pages_resident = 0;
8918 extended->pages_swapped_out = 0;
8919 extended->pages_shared_now_private = 0;
8920 extended->pages_dirtied = 0;
8921 extended->external_pager = 0;
8922 extended->shadow_depth = 0;
8923
8924 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
8925
8926 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8927 extended->share_mode = SM_PRIVATE;
8928
8929 if (object_name)
8930 *object_name = IP_NULL;
8931 *address = start;
8932 *size = (entry->vme_end - start);
8933
8934 vm_map_unlock_read(map);
8935 return(KERN_SUCCESS);
8936 }
8937 case VM_REGION_TOP_INFO:
8938 {
8939 vm_region_top_info_t top;
8940
8941 if (*count < VM_REGION_TOP_INFO_COUNT)
8942 return(KERN_INVALID_ARGUMENT);
8943
8944 top = (vm_region_top_info_t) info;
8945 *count = VM_REGION_TOP_INFO_COUNT;
8946
8947 vm_map_lock_read(map);
8948
8949 start = *address;
8950 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8951 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8952 vm_map_unlock_read(map);
8953 return(KERN_INVALID_ADDRESS);
8954 }
8955 } else {
8956 entry = tmp_entry;
8957
8958 }
8959 start = entry->vme_start;
8960
8961 top->private_pages_resident = 0;
8962 top->shared_pages_resident = 0;
8963
8964 vm_map_region_top_walk(entry, top);
8965
8966 if (object_name)
8967 *object_name = IP_NULL;
8968 *address = start;
8969 *size = (entry->vme_end - start);
8970
8971 vm_map_unlock_read(map);
8972 return(KERN_SUCCESS);
8973 }
8974 default:
8975 return(KERN_INVALID_ARGUMENT);
8976 }
8977 }
8978
8979 #define min(a, b) (((a) < (b)) ? (a) : (b))
8980
8981 void
8982 vm_map_region_top_walk(
8983 vm_map_entry_t entry,
8984 vm_region_top_info_t top)
8985 {
8986
8987 if (entry->object.vm_object == 0 || entry->is_sub_map) {
8988 top->share_mode = SM_EMPTY;
8989 top->ref_count = 0;
8990 top->obj_id = 0;
8991 return;
8992 }
8993
8994 {
8995 struct vm_object *obj, *tmp_obj;
8996 int ref_count;
8997 uint32_t entry_size;
8998
8999 entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE;
9000
9001 obj = entry->object.vm_object;
9002
9003 vm_object_lock(obj);
9004
9005 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9006 ref_count--;
9007
9008 if (obj->shadow) {
9009 if (ref_count == 1)
9010 top->private_pages_resident = min(obj->resident_page_count, entry_size);
9011 else
9012 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9013 top->ref_count = ref_count;
9014 top->share_mode = SM_COW;
9015
9016 while ((tmp_obj = obj->shadow)) {
9017 vm_object_lock(tmp_obj);
9018 vm_object_unlock(obj);
9019 obj = tmp_obj;
9020
9021 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9022 ref_count--;
9023
9024 top->shared_pages_resident += min(obj->resident_page_count, entry_size);
9025 top->ref_count += ref_count - 1;
9026 }
9027 } else {
9028 if (entry->needs_copy) {
9029 top->share_mode = SM_COW;
9030 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9031 } else {
9032 if (ref_count == 1 ||
9033 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9034 top->share_mode = SM_PRIVATE;
9035 top->private_pages_resident = min(obj->resident_page_count, entry_size);
9036 } else {
9037 top->share_mode = SM_SHARED;
9038 top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9039 }
9040 }
9041 top->ref_count = ref_count;
9042 }
9043 top->obj_id = (int)obj;
9044
9045 vm_object_unlock(obj);
9046 }
9047 }
9048
9049 void
9050 vm_map_region_walk(
9051 vm_map_t map,
9052 vm_map_offset_t va,
9053 vm_map_entry_t entry,
9054 vm_object_offset_t offset,
9055 vm_object_size_t range,
9056 vm_region_extended_info_t extended,
9057 boolean_t look_for_pages)
9058 {
9059 register struct vm_object *obj, *tmp_obj;
9060 register vm_map_offset_t last_offset;
9061 register int i;
9062 register int ref_count;
9063 struct vm_object *shadow_object;
9064 int shadow_depth;
9065
9066 if ((entry->object.vm_object == 0) ||
9067 (entry->is_sub_map) ||
9068 (entry->object.vm_object->phys_contiguous)) {
9069 extended->share_mode = SM_EMPTY;
9070 extended->ref_count = 0;
9071 return;
9072 }
9073 {
9074 obj = entry->object.vm_object;
9075
9076 vm_object_lock(obj);
9077
9078 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9079 ref_count--;
9080
9081 if (look_for_pages) {
9082 for (last_offset = offset + range;
9083 offset < last_offset;
9084 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9085 vm_map_region_look_for_page(map, va, obj,
9086 offset, ref_count,
9087 0, extended);
9088 }
9089
9090 shadow_object = obj->shadow;
9091 shadow_depth = 0;
9092 if (shadow_object != VM_OBJECT_NULL) {
9093 vm_object_lock(shadow_object);
9094 for (;
9095 shadow_object != VM_OBJECT_NULL;
9096 shadow_depth++) {
9097 vm_object_t next_shadow;
9098
9099 next_shadow = shadow_object->shadow;
9100 if (next_shadow) {
9101 vm_object_lock(next_shadow);
9102 }
9103 vm_object_unlock(shadow_object);
9104 shadow_object = next_shadow;
9105 }
9106 }
9107 extended->shadow_depth = shadow_depth;
9108
9109 if (extended->shadow_depth || entry->needs_copy)
9110 extended->share_mode = SM_COW;
9111 else {
9112 if (ref_count == 1)
9113 extended->share_mode = SM_PRIVATE;
9114 else {
9115 if (obj->true_share)
9116 extended->share_mode = SM_TRUESHARED;
9117 else
9118 extended->share_mode = SM_SHARED;
9119 }
9120 }
9121 extended->ref_count = ref_count - extended->shadow_depth;
9122
9123 for (i = 0; i < extended->shadow_depth; i++) {
9124 if ((tmp_obj = obj->shadow) == 0)
9125 break;
9126 vm_object_lock(tmp_obj);
9127 vm_object_unlock(obj);
9128
9129 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9130 ref_count--;
9131
9132 extended->ref_count += ref_count;
9133 obj = tmp_obj;
9134 }
9135 vm_object_unlock(obj);
9136
9137 if (extended->share_mode == SM_SHARED) {
9138 register vm_map_entry_t cur;
9139 register vm_map_entry_t last;
9140 int my_refs;
9141
9142 obj = entry->object.vm_object;
9143 last = vm_map_to_entry(map);
9144 my_refs = 0;
9145
9146 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9147 ref_count--;
9148 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9149 my_refs += vm_map_region_count_obj_refs(cur, obj);
9150
9151 if (my_refs == ref_count)
9152 extended->share_mode = SM_PRIVATE_ALIASED;
9153 else if (my_refs > 1)
9154 extended->share_mode = SM_SHARED_ALIASED;
9155 }
9156 }
9157 }
9158
9159
9160 /* object is locked on entry and locked on return */
9161
9162
9163 static void
9164 vm_map_region_look_for_page(
9165 __unused vm_map_t map,
9166 __unused vm_map_offset_t va,
9167 vm_object_t object,
9168 vm_object_offset_t offset,
9169 int max_refcnt,
9170 int depth,
9171 vm_region_extended_info_t extended)
9172 {
9173 register vm_page_t p;
9174 register vm_object_t shadow;
9175 register int ref_count;
9176 vm_object_t caller_object;
9177 #if MACH_PAGEMAP
9178 kern_return_t kr;
9179 #endif
9180 shadow = object->shadow;
9181 caller_object = object;
9182
9183
9184 while (TRUE) {
9185
9186 if ( !(object->pager_trusted) && !(object->internal))
9187 extended->external_pager = 1;
9188
9189 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9190 if (shadow && (max_refcnt == 1))
9191 extended->pages_shared_now_private++;
9192
9193 if (!p->fictitious &&
9194 (p->dirty || pmap_is_modified(p->phys_page)))
9195 extended->pages_dirtied++;
9196
9197 extended->pages_resident++;
9198
9199 if(object != caller_object)
9200 vm_object_unlock(object);
9201
9202 return;
9203 }
9204 #if MACH_PAGEMAP
9205 if (object->existence_map) {
9206 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9207
9208 extended->pages_swapped_out++;
9209
9210 if(object != caller_object)
9211 vm_object_unlock(object);
9212
9213 return;
9214 }
9215 } else if (object->internal &&
9216 object->alive &&
9217 !object->terminating &&
9218 object->pager_ready) {
9219
9220 memory_object_t pager;
9221
9222 vm_object_paging_begin(object);
9223 pager = object->pager;
9224 vm_object_unlock(object);
9225
9226 kr = memory_object_data_request(
9227 pager,
9228 offset + object->paging_offset,
9229 0, /* just poke the pager */
9230 VM_PROT_READ,
9231 NULL);
9232
9233 vm_object_lock(object);
9234 vm_object_paging_end(object);
9235
9236 if (kr == KERN_SUCCESS) {
9237 /* the pager has that page */
9238 extended->pages_swapped_out++;
9239 if (object != caller_object)
9240 vm_object_unlock(object);
9241 return;
9242 }
9243 }
9244 #endif /* MACH_PAGEMAP */
9245
9246 if (shadow) {
9247 vm_object_lock(shadow);
9248
9249 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9250 ref_count--;
9251
9252 if (++depth > extended->shadow_depth)
9253 extended->shadow_depth = depth;
9254
9255 if (ref_count > max_refcnt)
9256 max_refcnt = ref_count;
9257
9258 if(object != caller_object)
9259 vm_object_unlock(object);
9260
9261 offset = offset + object->shadow_offset;
9262 object = shadow;
9263 shadow = object->shadow;
9264 continue;
9265 }
9266 if(object != caller_object)
9267 vm_object_unlock(object);
9268 break;
9269 }
9270 }
9271
9272 static int
9273 vm_map_region_count_obj_refs(
9274 vm_map_entry_t entry,
9275 vm_object_t object)
9276 {
9277 register int ref_count;
9278 register vm_object_t chk_obj;
9279 register vm_object_t tmp_obj;
9280
9281 if (entry->object.vm_object == 0)
9282 return(0);
9283
9284 if (entry->is_sub_map)
9285 return(0);
9286 else {
9287 ref_count = 0;
9288
9289 chk_obj = entry->object.vm_object;
9290 vm_object_lock(chk_obj);
9291
9292 while (chk_obj) {
9293 if (chk_obj == object)
9294 ref_count++;
9295 tmp_obj = chk_obj->shadow;
9296 if (tmp_obj)
9297 vm_object_lock(tmp_obj);
9298 vm_object_unlock(chk_obj);
9299
9300 chk_obj = tmp_obj;
9301 }
9302 }
9303 return(ref_count);
9304 }
9305
9306
9307 /*
9308 * Routine: vm_map_simplify
9309 *
9310 * Description:
9311 * Attempt to simplify the map representation in
9312 * the vicinity of the given starting address.
9313 * Note:
9314 * This routine is intended primarily to keep the
9315 * kernel maps more compact -- they generally don't
9316 * benefit from the "expand a map entry" technology
9317 * at allocation time because the adjacent entry
9318 * is often wired down.
9319 */
9320 void
9321 vm_map_simplify_entry(
9322 vm_map_t map,
9323 vm_map_entry_t this_entry)
9324 {
9325 vm_map_entry_t prev_entry;
9326
9327 counter(c_vm_map_simplify_entry_called++);
9328
9329 prev_entry = this_entry->vme_prev;
9330
9331 if ((this_entry != vm_map_to_entry(map)) &&
9332 (prev_entry != vm_map_to_entry(map)) &&
9333
9334 (prev_entry->vme_end == this_entry->vme_start) &&
9335
9336 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9337
9338 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9339 ((prev_entry->offset + (prev_entry->vme_end -
9340 prev_entry->vme_start))
9341 == this_entry->offset) &&
9342
9343 (prev_entry->inheritance == this_entry->inheritance) &&
9344 (prev_entry->protection == this_entry->protection) &&
9345 (prev_entry->max_protection == this_entry->max_protection) &&
9346 (prev_entry->behavior == this_entry->behavior) &&
9347 (prev_entry->alias == this_entry->alias) &&
9348 (prev_entry->no_cache == this_entry->no_cache) &&
9349 (prev_entry->wired_count == this_entry->wired_count) &&
9350 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9351
9352 (prev_entry->needs_copy == this_entry->needs_copy) &&
9353
9354 (prev_entry->use_pmap == FALSE) &&
9355 (this_entry->use_pmap == FALSE) &&
9356 (prev_entry->in_transition == FALSE) &&
9357 (this_entry->in_transition == FALSE) &&
9358 (prev_entry->needs_wakeup == FALSE) &&
9359 (this_entry->needs_wakeup == FALSE) &&
9360 (prev_entry->is_shared == FALSE) &&
9361 (this_entry->is_shared == FALSE)
9362 ) {
9363 _vm_map_entry_unlink(&map->hdr, prev_entry);
9364 this_entry->vme_start = prev_entry->vme_start;
9365 this_entry->offset = prev_entry->offset;
9366 if (prev_entry->is_sub_map) {
9367 vm_map_deallocate(prev_entry->object.sub_map);
9368 } else {
9369 vm_object_deallocate(prev_entry->object.vm_object);
9370 }
9371 vm_map_entry_dispose(map, prev_entry);
9372 SAVE_HINT_MAP_WRITE(map, this_entry);
9373 counter(c_vm_map_simplified++);
9374 }
9375 }
9376
9377 void
9378 vm_map_simplify(
9379 vm_map_t map,
9380 vm_map_offset_t start)
9381 {
9382 vm_map_entry_t this_entry;
9383
9384 vm_map_lock(map);
9385 if (vm_map_lookup_entry(map, start, &this_entry)) {
9386 vm_map_simplify_entry(map, this_entry);
9387 vm_map_simplify_entry(map, this_entry->vme_next);
9388 }
9389 counter(c_vm_map_simplify_called++);
9390 vm_map_unlock(map);
9391 }
9392
9393 static void
9394 vm_map_simplify_range(
9395 vm_map_t map,
9396 vm_map_offset_t start,
9397 vm_map_offset_t end)
9398 {
9399 vm_map_entry_t entry;
9400
9401 /*
9402 * The map should be locked (for "write") by the caller.
9403 */
9404
9405 if (start >= end) {
9406 /* invalid address range */
9407 return;
9408 }
9409
9410 start = vm_map_trunc_page(start);
9411 end = vm_map_round_page(end);
9412
9413 if (!vm_map_lookup_entry(map, start, &entry)) {
9414 /* "start" is not mapped and "entry" ends before "start" */
9415 if (entry == vm_map_to_entry(map)) {
9416 /* start with first entry in the map */
9417 entry = vm_map_first_entry(map);
9418 } else {
9419 /* start with next entry */
9420 entry = entry->vme_next;
9421 }
9422 }
9423
9424 while (entry != vm_map_to_entry(map) &&
9425 entry->vme_start <= end) {
9426 /* try and coalesce "entry" with its previous entry */
9427 vm_map_simplify_entry(map, entry);
9428 entry = entry->vme_next;
9429 }
9430 }
9431
9432
9433 /*
9434 * Routine: vm_map_machine_attribute
9435 * Purpose:
9436 * Provide machine-specific attributes to mappings,
9437 * such as cachability etc. for machines that provide
9438 * them. NUMA architectures and machines with big/strange
9439 * caches will use this.
9440 * Note:
9441 * Responsibilities for locking and checking are handled here,
9442 * everything else in the pmap module. If any non-volatile
9443 * information must be kept, the pmap module should handle
9444 * it itself. [This assumes that attributes do not
9445 * need to be inherited, which seems ok to me]
9446 */
9447 kern_return_t
9448 vm_map_machine_attribute(
9449 vm_map_t map,
9450 vm_map_offset_t start,
9451 vm_map_offset_t end,
9452 vm_machine_attribute_t attribute,
9453 vm_machine_attribute_val_t* value) /* IN/OUT */
9454 {
9455 kern_return_t ret;
9456 vm_map_size_t sync_size;
9457 vm_map_entry_t entry;
9458
9459 if (start < vm_map_min(map) || end > vm_map_max(map))
9460 return KERN_INVALID_ADDRESS;
9461
9462 /* Figure how much memory we need to flush (in page increments) */
9463 sync_size = end - start;
9464
9465 vm_map_lock(map);
9466
9467 if (attribute != MATTR_CACHE) {
9468 /* If we don't have to find physical addresses, we */
9469 /* don't have to do an explicit traversal here. */
9470 ret = pmap_attribute(map->pmap, start, end-start,
9471 attribute, value);
9472 vm_map_unlock(map);
9473 return ret;
9474 }
9475
9476 ret = KERN_SUCCESS; /* Assume it all worked */
9477
9478 while(sync_size) {
9479 if (vm_map_lookup_entry(map, start, &entry)) {
9480 vm_map_size_t sub_size;
9481 if((entry->vme_end - start) > sync_size) {
9482 sub_size = sync_size;
9483 sync_size = 0;
9484 } else {
9485 sub_size = entry->vme_end - start;
9486 sync_size -= sub_size;
9487 }
9488 if(entry->is_sub_map) {
9489 vm_map_offset_t sub_start;
9490 vm_map_offset_t sub_end;
9491
9492 sub_start = (start - entry->vme_start)
9493 + entry->offset;
9494 sub_end = sub_start + sub_size;
9495 vm_map_machine_attribute(
9496 entry->object.sub_map,
9497 sub_start,
9498 sub_end,
9499 attribute, value);
9500 } else {
9501 if(entry->object.vm_object) {
9502 vm_page_t m;
9503 vm_object_t object;
9504 vm_object_t base_object;
9505 vm_object_t last_object;
9506 vm_object_offset_t offset;
9507 vm_object_offset_t base_offset;
9508 vm_map_size_t range;
9509 range = sub_size;
9510 offset = (start - entry->vme_start)
9511 + entry->offset;
9512 base_offset = offset;
9513 object = entry->object.vm_object;
9514 base_object = object;
9515 last_object = NULL;
9516
9517 vm_object_lock(object);
9518
9519 while (range) {
9520 m = vm_page_lookup(
9521 object, offset);
9522
9523 if (m && !m->fictitious) {
9524 ret =
9525 pmap_attribute_cache_sync(
9526 m->phys_page,
9527 PAGE_SIZE,
9528 attribute, value);
9529
9530 } else if (object->shadow) {
9531 offset = offset + object->shadow_offset;
9532 last_object = object;
9533 object = object->shadow;
9534 vm_object_lock(last_object->shadow);
9535 vm_object_unlock(last_object);
9536 continue;
9537 }
9538 range -= PAGE_SIZE;
9539
9540 if (base_object != object) {
9541 vm_object_unlock(object);
9542 vm_object_lock(base_object);
9543 object = base_object;
9544 }
9545 /* Bump to the next page */
9546 base_offset += PAGE_SIZE;
9547 offset = base_offset;
9548 }
9549 vm_object_unlock(object);
9550 }
9551 }
9552 start += sub_size;
9553 } else {
9554 vm_map_unlock(map);
9555 return KERN_FAILURE;
9556 }
9557
9558 }
9559
9560 vm_map_unlock(map);
9561
9562 return ret;
9563 }
9564
9565 /*
9566 * vm_map_behavior_set:
9567 *
9568 * Sets the paging reference behavior of the specified address
9569 * range in the target map. Paging reference behavior affects
9570 * how pagein operations resulting from faults on the map will be
9571 * clustered.
9572 */
9573 kern_return_t
9574 vm_map_behavior_set(
9575 vm_map_t map,
9576 vm_map_offset_t start,
9577 vm_map_offset_t end,
9578 vm_behavior_t new_behavior)
9579 {
9580 register vm_map_entry_t entry;
9581 vm_map_entry_t temp_entry;
9582
9583 XPR(XPR_VM_MAP,
9584 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9585 (integer_t)map, start, end, new_behavior, 0);
9586
9587 switch (new_behavior) {
9588 case VM_BEHAVIOR_DEFAULT:
9589 case VM_BEHAVIOR_RANDOM:
9590 case VM_BEHAVIOR_SEQUENTIAL:
9591 case VM_BEHAVIOR_RSEQNTL:
9592 break;
9593 case VM_BEHAVIOR_WILLNEED:
9594 case VM_BEHAVIOR_DONTNEED:
9595 new_behavior = VM_BEHAVIOR_DEFAULT;
9596 break;
9597 default:
9598 return(KERN_INVALID_ARGUMENT);
9599 }
9600
9601 vm_map_lock(map);
9602
9603 /*
9604 * The entire address range must be valid for the map.
9605 * Note that vm_map_range_check() does a
9606 * vm_map_lookup_entry() internally and returns the
9607 * entry containing the start of the address range if
9608 * the entire range is valid.
9609 */
9610 if (vm_map_range_check(map, start, end, &temp_entry)) {
9611 entry = temp_entry;
9612 vm_map_clip_start(map, entry, start);
9613 }
9614 else {
9615 vm_map_unlock(map);
9616 return(KERN_INVALID_ADDRESS);
9617 }
9618
9619 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9620 vm_map_clip_end(map, entry, end);
9621 assert(!entry->use_pmap);
9622
9623 entry->behavior = new_behavior;
9624
9625 entry = entry->vme_next;
9626 }
9627
9628 vm_map_unlock(map);
9629 return(KERN_SUCCESS);
9630 }
9631
9632
9633 #include <mach_kdb.h>
9634 #if MACH_KDB
9635 #include <ddb/db_output.h>
9636 #include <vm/vm_print.h>
9637
9638 #define printf db_printf
9639
9640 /*
9641 * Forward declarations for internal functions.
9642 */
9643 extern void vm_map_links_print(
9644 struct vm_map_links *links);
9645
9646 extern void vm_map_header_print(
9647 struct vm_map_header *header);
9648
9649 extern void vm_map_entry_print(
9650 vm_map_entry_t entry);
9651
9652 extern void vm_follow_entry(
9653 vm_map_entry_t entry);
9654
9655 extern void vm_follow_map(
9656 vm_map_t map);
9657
9658 /*
9659 * vm_map_links_print: [ debug ]
9660 */
9661 void
9662 vm_map_links_print(
9663 struct vm_map_links *links)
9664 {
9665 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
9666 links->prev,
9667 links->next,
9668 (unsigned long long)links->start,
9669 (unsigned long long)links->end);
9670 }
9671
9672 /*
9673 * vm_map_header_print: [ debug ]
9674 */
9675 void
9676 vm_map_header_print(
9677 struct vm_map_header *header)
9678 {
9679 vm_map_links_print(&header->links);
9680 iprintf("nentries = %08X, %sentries_pageable\n",
9681 header->nentries,
9682 (header->entries_pageable ? "" : "!"));
9683 }
9684
9685 /*
9686 * vm_follow_entry: [ debug ]
9687 */
9688 void
9689 vm_follow_entry(
9690 vm_map_entry_t entry)
9691 {
9692 int shadows;
9693
9694 iprintf("map entry %08X\n", entry);
9695
9696 db_indent += 2;
9697
9698 shadows = vm_follow_object(entry->object.vm_object);
9699 iprintf("Total objects : %d\n",shadows);
9700
9701 db_indent -= 2;
9702 }
9703
9704 /*
9705 * vm_map_entry_print: [ debug ]
9706 */
9707 void
9708 vm_map_entry_print(
9709 register vm_map_entry_t entry)
9710 {
9711 static const char *inheritance_name[4] =
9712 { "share", "copy", "none", "?"};
9713 static const char *behavior_name[4] =
9714 { "dflt", "rand", "seqtl", "rseqntl" };
9715
9716 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
9717
9718 db_indent += 2;
9719
9720 vm_map_links_print(&entry->links);
9721
9722 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
9723 (unsigned long long)entry->vme_start,
9724 (unsigned long long)entry->vme_end,
9725 entry->protection,
9726 entry->max_protection,
9727 inheritance_name[(entry->inheritance & 0x3)]);
9728
9729 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
9730 behavior_name[(entry->behavior & 0x3)],
9731 entry->wired_count,
9732 entry->user_wired_count);
9733 iprintf("%sin_transition, %sneeds_wakeup\n",
9734 (entry->in_transition ? "" : "!"),
9735 (entry->needs_wakeup ? "" : "!"));
9736
9737 if (entry->is_sub_map) {
9738 iprintf("submap = %08X - offset = %016llX\n",
9739 entry->object.sub_map,
9740 (unsigned long long)entry->offset);
9741 } else {
9742 iprintf("object = %08X offset = %016llX - ",
9743 entry->object.vm_object,
9744 (unsigned long long)entry->offset);
9745 printf("%sis_shared, %sneeds_copy\n",
9746 (entry->is_shared ? "" : "!"),
9747 (entry->needs_copy ? "" : "!"));
9748 }
9749
9750 db_indent -= 2;
9751 }
9752
9753 /*
9754 * vm_follow_map: [ debug ]
9755 */
9756 void
9757 vm_follow_map(
9758 vm_map_t map)
9759 {
9760 register vm_map_entry_t entry;
9761
9762 iprintf("task map %08X\n", map);
9763
9764 db_indent += 2;
9765
9766 for (entry = vm_map_first_entry(map);
9767 entry && entry != vm_map_to_entry(map);
9768 entry = entry->vme_next) {
9769 vm_follow_entry(entry);
9770 }
9771
9772 db_indent -= 2;
9773 }
9774
9775 /*
9776 * vm_map_print: [ debug ]
9777 */
9778 void
9779 vm_map_print(
9780 db_addr_t inmap)
9781 {
9782 register vm_map_entry_t entry;
9783 vm_map_t map;
9784 #if TASK_SWAPPER
9785 char *swstate;
9786 #endif /* TASK_SWAPPER */
9787
9788 map = (vm_map_t)(long)
9789 inmap; /* Make sure we have the right type */
9790
9791 iprintf("task map %08X\n", map);
9792
9793 db_indent += 2;
9794
9795 vm_map_header_print(&map->hdr);
9796
9797 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
9798 map->pmap,
9799 map->size,
9800 map->ref_count,
9801 map->hint,
9802 map->first_free);
9803
9804 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
9805 (map->wait_for_space ? "" : "!"),
9806 (map->wiring_required ? "" : "!"),
9807 map->timestamp);
9808
9809 #if TASK_SWAPPER
9810 switch (map->sw_state) {
9811 case MAP_SW_IN:
9812 swstate = "SW_IN";
9813 break;
9814 case MAP_SW_OUT:
9815 swstate = "SW_OUT";
9816 break;
9817 default:
9818 swstate = "????";
9819 break;
9820 }
9821 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
9822 #endif /* TASK_SWAPPER */
9823
9824 for (entry = vm_map_first_entry(map);
9825 entry && entry != vm_map_to_entry(map);
9826 entry = entry->vme_next) {
9827 vm_map_entry_print(entry);
9828 }
9829
9830 db_indent -= 2;
9831 }
9832
9833 /*
9834 * Routine: vm_map_copy_print
9835 * Purpose:
9836 * Pretty-print a copy object for ddb.
9837 */
9838
9839 void
9840 vm_map_copy_print(
9841 db_addr_t incopy)
9842 {
9843 vm_map_copy_t copy;
9844 vm_map_entry_t entry;
9845
9846 copy = (vm_map_copy_t)(long)
9847 incopy; /* Make sure we have the right type */
9848
9849 printf("copy object 0x%x\n", copy);
9850
9851 db_indent += 2;
9852
9853 iprintf("type=%d", copy->type);
9854 switch (copy->type) {
9855 case VM_MAP_COPY_ENTRY_LIST:
9856 printf("[entry_list]");
9857 break;
9858
9859 case VM_MAP_COPY_OBJECT:
9860 printf("[object]");
9861 break;
9862
9863 case VM_MAP_COPY_KERNEL_BUFFER:
9864 printf("[kernel_buffer]");
9865 break;
9866
9867 default:
9868 printf("[bad type]");
9869 break;
9870 }
9871 printf(", offset=0x%llx", (unsigned long long)copy->offset);
9872 printf(", size=0x%x\n", copy->size);
9873
9874 switch (copy->type) {
9875 case VM_MAP_COPY_ENTRY_LIST:
9876 vm_map_header_print(&copy->cpy_hdr);
9877 for (entry = vm_map_copy_first_entry(copy);
9878 entry && entry != vm_map_copy_to_entry(copy);
9879 entry = entry->vme_next) {
9880 vm_map_entry_print(entry);
9881 }
9882 break;
9883
9884 case VM_MAP_COPY_OBJECT:
9885 iprintf("object=0x%x\n", copy->cpy_object);
9886 break;
9887
9888 case VM_MAP_COPY_KERNEL_BUFFER:
9889 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
9890 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
9891 break;
9892
9893 }
9894
9895 db_indent -=2;
9896 }
9897
9898 /*
9899 * db_vm_map_total_size(map) [ debug ]
9900 *
9901 * return the total virtual size (in bytes) of the map
9902 */
9903 vm_map_size_t
9904 db_vm_map_total_size(
9905 db_addr_t inmap)
9906 {
9907 vm_map_entry_t entry;
9908 vm_map_size_t total;
9909 vm_map_t map;
9910
9911 map = (vm_map_t)(long)
9912 inmap; /* Make sure we have the right type */
9913
9914 total = 0;
9915 for (entry = vm_map_first_entry(map);
9916 entry != vm_map_to_entry(map);
9917 entry = entry->vme_next) {
9918 total += entry->vme_end - entry->vme_start;
9919 }
9920
9921 return total;
9922 }
9923
9924 #endif /* MACH_KDB */
9925
9926 /*
9927 * Routine: vm_map_entry_insert
9928 *
9929 * Descritpion: This routine inserts a new vm_entry in a locked map.
9930 */
9931 vm_map_entry_t
9932 vm_map_entry_insert(
9933 vm_map_t map,
9934 vm_map_entry_t insp_entry,
9935 vm_map_offset_t start,
9936 vm_map_offset_t end,
9937 vm_object_t object,
9938 vm_object_offset_t offset,
9939 boolean_t needs_copy,
9940 boolean_t is_shared,
9941 boolean_t in_transition,
9942 vm_prot_t cur_protection,
9943 vm_prot_t max_protection,
9944 vm_behavior_t behavior,
9945 vm_inherit_t inheritance,
9946 unsigned wired_count,
9947 boolean_t no_cache)
9948 {
9949 vm_map_entry_t new_entry;
9950
9951 assert(insp_entry != (vm_map_entry_t)0);
9952
9953 new_entry = vm_map_entry_create(map);
9954
9955 new_entry->vme_start = start;
9956 new_entry->vme_end = end;
9957 assert(page_aligned(new_entry->vme_start));
9958 assert(page_aligned(new_entry->vme_end));
9959
9960 new_entry->object.vm_object = object;
9961 new_entry->offset = offset;
9962 new_entry->is_shared = is_shared;
9963 new_entry->is_sub_map = FALSE;
9964 new_entry->needs_copy = needs_copy;
9965 new_entry->in_transition = in_transition;
9966 new_entry->needs_wakeup = FALSE;
9967 new_entry->inheritance = inheritance;
9968 new_entry->protection = cur_protection;
9969 new_entry->max_protection = max_protection;
9970 new_entry->behavior = behavior;
9971 new_entry->wired_count = wired_count;
9972 new_entry->user_wired_count = 0;
9973 new_entry->use_pmap = FALSE;
9974 new_entry->alias = 0;
9975 new_entry->no_cache = no_cache;
9976
9977 /*
9978 * Insert the new entry into the list.
9979 */
9980
9981 vm_map_entry_link(map, insp_entry, new_entry);
9982 map->size += end - start;
9983
9984 /*
9985 * Update the free space hint and the lookup hint.
9986 */
9987
9988 SAVE_HINT_MAP_WRITE(map, new_entry);
9989 return new_entry;
9990 }
9991
9992 /*
9993 * Routine: vm_map_remap_extract
9994 *
9995 * Descritpion: This routine returns a vm_entry list from a map.
9996 */
9997 static kern_return_t
9998 vm_map_remap_extract(
9999 vm_map_t map,
10000 vm_map_offset_t addr,
10001 vm_map_size_t size,
10002 boolean_t copy,
10003 struct vm_map_header *map_header,
10004 vm_prot_t *cur_protection,
10005 vm_prot_t *max_protection,
10006 /* What, no behavior? */
10007 vm_inherit_t inheritance,
10008 boolean_t pageable)
10009 {
10010 kern_return_t result;
10011 vm_map_size_t mapped_size;
10012 vm_map_size_t tmp_size;
10013 vm_map_entry_t src_entry; /* result of last map lookup */
10014 vm_map_entry_t new_entry;
10015 vm_object_offset_t offset;
10016 vm_map_offset_t map_address;
10017 vm_map_offset_t src_start; /* start of entry to map */
10018 vm_map_offset_t src_end; /* end of region to be mapped */
10019 vm_object_t object;
10020 vm_map_version_t version;
10021 boolean_t src_needs_copy;
10022 boolean_t new_entry_needs_copy;
10023
10024 assert(map != VM_MAP_NULL);
10025 assert(size != 0 && size == vm_map_round_page(size));
10026 assert(inheritance == VM_INHERIT_NONE ||
10027 inheritance == VM_INHERIT_COPY ||
10028 inheritance == VM_INHERIT_SHARE);
10029
10030 /*
10031 * Compute start and end of region.
10032 */
10033 src_start = vm_map_trunc_page(addr);
10034 src_end = vm_map_round_page(src_start + size);
10035
10036 /*
10037 * Initialize map_header.
10038 */
10039 map_header->links.next = (struct vm_map_entry *)&map_header->links;
10040 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10041 map_header->nentries = 0;
10042 map_header->entries_pageable = pageable;
10043
10044 *cur_protection = VM_PROT_ALL;
10045 *max_protection = VM_PROT_ALL;
10046
10047 map_address = 0;
10048 mapped_size = 0;
10049 result = KERN_SUCCESS;
10050
10051 /*
10052 * The specified source virtual space might correspond to
10053 * multiple map entries, need to loop on them.
10054 */
10055 vm_map_lock(map);
10056 while (mapped_size != size) {
10057 vm_map_size_t entry_size;
10058
10059 /*
10060 * Find the beginning of the region.
10061 */
10062 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10063 result = KERN_INVALID_ADDRESS;
10064 break;
10065 }
10066
10067 if (src_start < src_entry->vme_start ||
10068 (mapped_size && src_start != src_entry->vme_start)) {
10069 result = KERN_INVALID_ADDRESS;
10070 break;
10071 }
10072
10073 if(src_entry->is_sub_map) {
10074 result = KERN_INVALID_ADDRESS;
10075 break;
10076 }
10077
10078 tmp_size = size - mapped_size;
10079 if (src_end > src_entry->vme_end)
10080 tmp_size -= (src_end - src_entry->vme_end);
10081
10082 entry_size = (vm_map_size_t)(src_entry->vme_end -
10083 src_entry->vme_start);
10084
10085 if(src_entry->is_sub_map) {
10086 vm_map_reference(src_entry->object.sub_map);
10087 object = VM_OBJECT_NULL;
10088 } else {
10089 object = src_entry->object.vm_object;
10090
10091 if (object == VM_OBJECT_NULL) {
10092 object = vm_object_allocate(entry_size);
10093 src_entry->offset = 0;
10094 src_entry->object.vm_object = object;
10095 } else if (object->copy_strategy !=
10096 MEMORY_OBJECT_COPY_SYMMETRIC) {
10097 /*
10098 * We are already using an asymmetric
10099 * copy, and therefore we already have
10100 * the right object.
10101 */
10102 assert(!src_entry->needs_copy);
10103 } else if (src_entry->needs_copy || object->shadowed ||
10104 (object->internal && !object->true_share &&
10105 !src_entry->is_shared &&
10106 object->size > entry_size)) {
10107
10108 vm_object_shadow(&src_entry->object.vm_object,
10109 &src_entry->offset,
10110 entry_size);
10111
10112 if (!src_entry->needs_copy &&
10113 (src_entry->protection & VM_PROT_WRITE)) {
10114 vm_prot_t prot;
10115
10116 prot = src_entry->protection & ~VM_PROT_WRITE;
10117
10118 if (override_nx(map, src_entry->alias) && prot)
10119 prot |= VM_PROT_EXECUTE;
10120
10121 if(map->mapped) {
10122 vm_object_pmap_protect(
10123 src_entry->object.vm_object,
10124 src_entry->offset,
10125 entry_size,
10126 PMAP_NULL,
10127 src_entry->vme_start,
10128 prot);
10129 } else {
10130 pmap_protect(vm_map_pmap(map),
10131 src_entry->vme_start,
10132 src_entry->vme_end,
10133 prot);
10134 }
10135 }
10136
10137 object = src_entry->object.vm_object;
10138 src_entry->needs_copy = FALSE;
10139 }
10140
10141
10142 vm_object_lock(object);
10143 vm_object_reference_locked(object); /* object ref. for new entry */
10144 if (object->copy_strategy ==
10145 MEMORY_OBJECT_COPY_SYMMETRIC) {
10146 object->copy_strategy =
10147 MEMORY_OBJECT_COPY_DELAY;
10148 }
10149 vm_object_unlock(object);
10150 }
10151
10152 offset = src_entry->offset + (src_start - src_entry->vme_start);
10153
10154 new_entry = _vm_map_entry_create(map_header);
10155 vm_map_entry_copy(new_entry, src_entry);
10156 new_entry->use_pmap = FALSE; /* clr address space specifics */
10157
10158 new_entry->vme_start = map_address;
10159 new_entry->vme_end = map_address + tmp_size;
10160 new_entry->inheritance = inheritance;
10161 new_entry->offset = offset;
10162
10163 /*
10164 * The new region has to be copied now if required.
10165 */
10166 RestartCopy:
10167 if (!copy) {
10168 src_entry->is_shared = TRUE;
10169 new_entry->is_shared = TRUE;
10170 if (!(new_entry->is_sub_map))
10171 new_entry->needs_copy = FALSE;
10172
10173 } else if (src_entry->is_sub_map) {
10174 /* make this a COW sub_map if not already */
10175 new_entry->needs_copy = TRUE;
10176 object = VM_OBJECT_NULL;
10177 } else if (src_entry->wired_count == 0 &&
10178 vm_object_copy_quickly(&new_entry->object.vm_object,
10179 new_entry->offset,
10180 (new_entry->vme_end -
10181 new_entry->vme_start),
10182 &src_needs_copy,
10183 &new_entry_needs_copy)) {
10184
10185 new_entry->needs_copy = new_entry_needs_copy;
10186 new_entry->is_shared = FALSE;
10187
10188 /*
10189 * Handle copy_on_write semantics.
10190 */
10191 if (src_needs_copy && !src_entry->needs_copy) {
10192 vm_prot_t prot;
10193
10194 prot = src_entry->protection & ~VM_PROT_WRITE;
10195
10196 if (override_nx(map, src_entry->alias) && prot)
10197 prot |= VM_PROT_EXECUTE;
10198
10199 vm_object_pmap_protect(object,
10200 offset,
10201 entry_size,
10202 ((src_entry->is_shared
10203 || map->mapped) ?
10204 PMAP_NULL : map->pmap),
10205 src_entry->vme_start,
10206 prot);
10207
10208 src_entry->needs_copy = TRUE;
10209 }
10210 /*
10211 * Throw away the old object reference of the new entry.
10212 */
10213 vm_object_deallocate(object);
10214
10215 } else {
10216 new_entry->is_shared = FALSE;
10217
10218 /*
10219 * The map can be safely unlocked since we
10220 * already hold a reference on the object.
10221 *
10222 * Record the timestamp of the map for later
10223 * verification, and unlock the map.
10224 */
10225 version.main_timestamp = map->timestamp;
10226 vm_map_unlock(map); /* Increments timestamp once! */
10227
10228 /*
10229 * Perform the copy.
10230 */
10231 if (src_entry->wired_count > 0) {
10232 vm_object_lock(object);
10233 result = vm_object_copy_slowly(
10234 object,
10235 offset,
10236 entry_size,
10237 THREAD_UNINT,
10238 &new_entry->object.vm_object);
10239
10240 new_entry->offset = 0;
10241 new_entry->needs_copy = FALSE;
10242 } else {
10243 result = vm_object_copy_strategically(
10244 object,
10245 offset,
10246 entry_size,
10247 &new_entry->object.vm_object,
10248 &new_entry->offset,
10249 &new_entry_needs_copy);
10250
10251 new_entry->needs_copy = new_entry_needs_copy;
10252 }
10253
10254 /*
10255 * Throw away the old object reference of the new entry.
10256 */
10257 vm_object_deallocate(object);
10258
10259 if (result != KERN_SUCCESS &&
10260 result != KERN_MEMORY_RESTART_COPY) {
10261 _vm_map_entry_dispose(map_header, new_entry);
10262 break;
10263 }
10264
10265 /*
10266 * Verify that the map has not substantially
10267 * changed while the copy was being made.
10268 */
10269
10270 vm_map_lock(map);
10271 if (version.main_timestamp + 1 != map->timestamp) {
10272 /*
10273 * Simple version comparison failed.
10274 *
10275 * Retry the lookup and verify that the
10276 * same object/offset are still present.
10277 */
10278 vm_object_deallocate(new_entry->
10279 object.vm_object);
10280 _vm_map_entry_dispose(map_header, new_entry);
10281 if (result == KERN_MEMORY_RESTART_COPY)
10282 result = KERN_SUCCESS;
10283 continue;
10284 }
10285
10286 if (result == KERN_MEMORY_RESTART_COPY) {
10287 vm_object_reference(object);
10288 goto RestartCopy;
10289 }
10290 }
10291
10292 _vm_map_entry_link(map_header,
10293 map_header->links.prev, new_entry);
10294
10295 *cur_protection &= src_entry->protection;
10296 *max_protection &= src_entry->max_protection;
10297
10298 map_address += tmp_size;
10299 mapped_size += tmp_size;
10300 src_start += tmp_size;
10301
10302 } /* end while */
10303
10304 vm_map_unlock(map);
10305 if (result != KERN_SUCCESS) {
10306 /*
10307 * Free all allocated elements.
10308 */
10309 for (src_entry = map_header->links.next;
10310 src_entry != (struct vm_map_entry *)&map_header->links;
10311 src_entry = new_entry) {
10312 new_entry = src_entry->vme_next;
10313 _vm_map_entry_unlink(map_header, src_entry);
10314 vm_object_deallocate(src_entry->object.vm_object);
10315 _vm_map_entry_dispose(map_header, src_entry);
10316 }
10317 }
10318 return result;
10319 }
10320
10321 /*
10322 * Routine: vm_remap
10323 *
10324 * Map portion of a task's address space.
10325 * Mapped region must not overlap more than
10326 * one vm memory object. Protections and
10327 * inheritance attributes remain the same
10328 * as in the original task and are out parameters.
10329 * Source and Target task can be identical
10330 * Other attributes are identical as for vm_map()
10331 */
10332 kern_return_t
10333 vm_map_remap(
10334 vm_map_t target_map,
10335 vm_map_address_t *address,
10336 vm_map_size_t size,
10337 vm_map_offset_t mask,
10338 boolean_t anywhere,
10339 vm_map_t src_map,
10340 vm_map_offset_t memory_address,
10341 boolean_t copy,
10342 vm_prot_t *cur_protection,
10343 vm_prot_t *max_protection,
10344 vm_inherit_t inheritance)
10345 {
10346 kern_return_t result;
10347 vm_map_entry_t entry;
10348 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
10349 vm_map_entry_t new_entry;
10350 struct vm_map_header map_header;
10351
10352 if (target_map == VM_MAP_NULL)
10353 return KERN_INVALID_ARGUMENT;
10354
10355 switch (inheritance) {
10356 case VM_INHERIT_NONE:
10357 case VM_INHERIT_COPY:
10358 case VM_INHERIT_SHARE:
10359 if (size != 0 && src_map != VM_MAP_NULL)
10360 break;
10361 /*FALL THRU*/
10362 default:
10363 return KERN_INVALID_ARGUMENT;
10364 }
10365
10366 size = vm_map_round_page(size);
10367
10368 result = vm_map_remap_extract(src_map, memory_address,
10369 size, copy, &map_header,
10370 cur_protection,
10371 max_protection,
10372 inheritance,
10373 target_map->hdr.
10374 entries_pageable);
10375
10376 if (result != KERN_SUCCESS) {
10377 return result;
10378 }
10379
10380 /*
10381 * Allocate/check a range of free virtual address
10382 * space for the target
10383 */
10384 *address = vm_map_trunc_page(*address);
10385 vm_map_lock(target_map);
10386 result = vm_map_remap_range_allocate(target_map, address, size,
10387 mask, anywhere, &insp_entry);
10388
10389 for (entry = map_header.links.next;
10390 entry != (struct vm_map_entry *)&map_header.links;
10391 entry = new_entry) {
10392 new_entry = entry->vme_next;
10393 _vm_map_entry_unlink(&map_header, entry);
10394 if (result == KERN_SUCCESS) {
10395 entry->vme_start += *address;
10396 entry->vme_end += *address;
10397 vm_map_entry_link(target_map, insp_entry, entry);
10398 insp_entry = entry;
10399 } else {
10400 if (!entry->is_sub_map) {
10401 vm_object_deallocate(entry->object.vm_object);
10402 } else {
10403 vm_map_deallocate(entry->object.sub_map);
10404 }
10405 _vm_map_entry_dispose(&map_header, entry);
10406 }
10407 }
10408
10409 if (result == KERN_SUCCESS) {
10410 target_map->size += size;
10411 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
10412 }
10413 vm_map_unlock(target_map);
10414
10415 if (result == KERN_SUCCESS && target_map->wiring_required)
10416 result = vm_map_wire(target_map, *address,
10417 *address + size, *cur_protection, TRUE);
10418 return result;
10419 }
10420
10421 /*
10422 * Routine: vm_map_remap_range_allocate
10423 *
10424 * Description:
10425 * Allocate a range in the specified virtual address map.
10426 * returns the address and the map entry just before the allocated
10427 * range
10428 *
10429 * Map must be locked.
10430 */
10431
10432 static kern_return_t
10433 vm_map_remap_range_allocate(
10434 vm_map_t map,
10435 vm_map_address_t *address, /* IN/OUT */
10436 vm_map_size_t size,
10437 vm_map_offset_t mask,
10438 boolean_t anywhere,
10439 vm_map_entry_t *map_entry) /* OUT */
10440 {
10441 register vm_map_entry_t entry;
10442 register vm_map_offset_t start;
10443 register vm_map_offset_t end;
10444
10445 StartAgain: ;
10446
10447 start = *address;
10448
10449 if (anywhere)
10450 {
10451 /*
10452 * Calculate the first possible address.
10453 */
10454
10455 if (start < map->min_offset)
10456 start = map->min_offset;
10457 if (start > map->max_offset)
10458 return(KERN_NO_SPACE);
10459
10460 /*
10461 * Look for the first possible address;
10462 * if there's already something at this
10463 * address, we have to start after it.
10464 */
10465
10466 assert(first_free_is_valid(map));
10467 if (start == map->min_offset) {
10468 if ((entry = map->first_free) != vm_map_to_entry(map))
10469 start = entry->vme_end;
10470 } else {
10471 vm_map_entry_t tmp_entry;
10472 if (vm_map_lookup_entry(map, start, &tmp_entry))
10473 start = tmp_entry->vme_end;
10474 entry = tmp_entry;
10475 }
10476
10477 /*
10478 * In any case, the "entry" always precedes
10479 * the proposed new region throughout the
10480 * loop:
10481 */
10482
10483 while (TRUE) {
10484 register vm_map_entry_t next;
10485
10486 /*
10487 * Find the end of the proposed new region.
10488 * Be sure we didn't go beyond the end, or
10489 * wrap around the address.
10490 */
10491
10492 end = ((start + mask) & ~mask);
10493 if (end < start)
10494 return(KERN_NO_SPACE);
10495 start = end;
10496 end += size;
10497
10498 if ((end > map->max_offset) || (end < start)) {
10499 if (map->wait_for_space) {
10500 if (size <= (map->max_offset -
10501 map->min_offset)) {
10502 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
10503 vm_map_unlock(map);
10504 thread_block(THREAD_CONTINUE_NULL);
10505 vm_map_lock(map);
10506 goto StartAgain;
10507 }
10508 }
10509
10510 return(KERN_NO_SPACE);
10511 }
10512
10513 /*
10514 * If there are no more entries, we must win.
10515 */
10516
10517 next = entry->vme_next;
10518 if (next == vm_map_to_entry(map))
10519 break;
10520
10521 /*
10522 * If there is another entry, it must be
10523 * after the end of the potential new region.
10524 */
10525
10526 if (next->vme_start >= end)
10527 break;
10528
10529 /*
10530 * Didn't fit -- move to the next entry.
10531 */
10532
10533 entry = next;
10534 start = entry->vme_end;
10535 }
10536 *address = start;
10537 } else {
10538 vm_map_entry_t temp_entry;
10539
10540 /*
10541 * Verify that:
10542 * the address doesn't itself violate
10543 * the mask requirement.
10544 */
10545
10546 if ((start & mask) != 0)
10547 return(KERN_NO_SPACE);
10548
10549
10550 /*
10551 * ... the address is within bounds
10552 */
10553
10554 end = start + size;
10555
10556 if ((start < map->min_offset) ||
10557 (end > map->max_offset) ||
10558 (start >= end)) {
10559 return(KERN_INVALID_ADDRESS);
10560 }
10561
10562 /*
10563 * ... the starting address isn't allocated
10564 */
10565
10566 if (vm_map_lookup_entry(map, start, &temp_entry))
10567 return(KERN_NO_SPACE);
10568
10569 entry = temp_entry;
10570
10571 /*
10572 * ... the next region doesn't overlap the
10573 * end point.
10574 */
10575
10576 if ((entry->vme_next != vm_map_to_entry(map)) &&
10577 (entry->vme_next->vme_start < end))
10578 return(KERN_NO_SPACE);
10579 }
10580 *map_entry = entry;
10581 return(KERN_SUCCESS);
10582 }
10583
10584 /*
10585 * vm_map_switch:
10586 *
10587 * Set the address map for the current thread to the specified map
10588 */
10589
10590 vm_map_t
10591 vm_map_switch(
10592 vm_map_t map)
10593 {
10594 int mycpu;
10595 thread_t thread = current_thread();
10596 vm_map_t oldmap = thread->map;
10597
10598 mp_disable_preemption();
10599 mycpu = cpu_number();
10600
10601 /*
10602 * Deactivate the current map and activate the requested map
10603 */
10604 PMAP_SWITCH_USER(thread, map, mycpu);
10605
10606 mp_enable_preemption();
10607 return(oldmap);
10608 }
10609
10610
10611 /*
10612 * Routine: vm_map_write_user
10613 *
10614 * Description:
10615 * Copy out data from a kernel space into space in the
10616 * destination map. The space must already exist in the
10617 * destination map.
10618 * NOTE: This routine should only be called by threads
10619 * which can block on a page fault. i.e. kernel mode user
10620 * threads.
10621 *
10622 */
10623 kern_return_t
10624 vm_map_write_user(
10625 vm_map_t map,
10626 void *src_p,
10627 vm_map_address_t dst_addr,
10628 vm_size_t size)
10629 {
10630 kern_return_t kr = KERN_SUCCESS;
10631
10632 if(current_map() == map) {
10633 if (copyout(src_p, dst_addr, size)) {
10634 kr = KERN_INVALID_ADDRESS;
10635 }
10636 } else {
10637 vm_map_t oldmap;
10638
10639 /* take on the identity of the target map while doing */
10640 /* the transfer */
10641
10642 vm_map_reference(map);
10643 oldmap = vm_map_switch(map);
10644 if (copyout(src_p, dst_addr, size)) {
10645 kr = KERN_INVALID_ADDRESS;
10646 }
10647 vm_map_switch(oldmap);
10648 vm_map_deallocate(map);
10649 }
10650 return kr;
10651 }
10652
10653 /*
10654 * Routine: vm_map_read_user
10655 *
10656 * Description:
10657 * Copy in data from a user space source map into the
10658 * kernel map. The space must already exist in the
10659 * kernel map.
10660 * NOTE: This routine should only be called by threads
10661 * which can block on a page fault. i.e. kernel mode user
10662 * threads.
10663 *
10664 */
10665 kern_return_t
10666 vm_map_read_user(
10667 vm_map_t map,
10668 vm_map_address_t src_addr,
10669 void *dst_p,
10670 vm_size_t size)
10671 {
10672 kern_return_t kr = KERN_SUCCESS;
10673
10674 if(current_map() == map) {
10675 if (copyin(src_addr, dst_p, size)) {
10676 kr = KERN_INVALID_ADDRESS;
10677 }
10678 } else {
10679 vm_map_t oldmap;
10680
10681 /* take on the identity of the target map while doing */
10682 /* the transfer */
10683
10684 vm_map_reference(map);
10685 oldmap = vm_map_switch(map);
10686 if (copyin(src_addr, dst_p, size)) {
10687 kr = KERN_INVALID_ADDRESS;
10688 }
10689 vm_map_switch(oldmap);
10690 vm_map_deallocate(map);
10691 }
10692 return kr;
10693 }
10694
10695
10696 /*
10697 * vm_map_check_protection:
10698 *
10699 * Assert that the target map allows the specified
10700 * privilege on the entire address region given.
10701 * The entire region must be allocated.
10702 */
10703 boolean_t
10704 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
10705 vm_map_offset_t end, vm_prot_t protection)
10706 {
10707 vm_map_entry_t entry;
10708 vm_map_entry_t tmp_entry;
10709
10710 vm_map_lock(map);
10711
10712 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
10713 {
10714 vm_map_unlock(map);
10715 return (FALSE);
10716 }
10717
10718 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10719 vm_map_unlock(map);
10720 return(FALSE);
10721 }
10722
10723 entry = tmp_entry;
10724
10725 while (start < end) {
10726 if (entry == vm_map_to_entry(map)) {
10727 vm_map_unlock(map);
10728 return(FALSE);
10729 }
10730
10731 /*
10732 * No holes allowed!
10733 */
10734
10735 if (start < entry->vme_start) {
10736 vm_map_unlock(map);
10737 return(FALSE);
10738 }
10739
10740 /*
10741 * Check protection associated with entry.
10742 */
10743
10744 if ((entry->protection & protection) != protection) {
10745 vm_map_unlock(map);
10746 return(FALSE);
10747 }
10748
10749 /* go to next entry */
10750
10751 start = entry->vme_end;
10752 entry = entry->vme_next;
10753 }
10754 vm_map_unlock(map);
10755 return(TRUE);
10756 }
10757
10758 kern_return_t
10759 vm_map_purgable_control(
10760 vm_map_t map,
10761 vm_map_offset_t address,
10762 vm_purgable_t control,
10763 int *state)
10764 {
10765 vm_map_entry_t entry;
10766 vm_object_t object;
10767 kern_return_t kr;
10768
10769 /*
10770 * Vet all the input parameters and current type and state of the
10771 * underlaying object. Return with an error if anything is amiss.
10772 */
10773 if (map == VM_MAP_NULL)
10774 return(KERN_INVALID_ARGUMENT);
10775
10776 if (control != VM_PURGABLE_SET_STATE &&
10777 control != VM_PURGABLE_GET_STATE)
10778 return(KERN_INVALID_ARGUMENT);
10779
10780 if (control == VM_PURGABLE_SET_STATE &&
10781 (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) ||
10782 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
10783 return(KERN_INVALID_ARGUMENT);
10784
10785 vm_map_lock(map);
10786
10787 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
10788
10789 /*
10790 * Must pass a valid non-submap address.
10791 */
10792 vm_map_unlock(map);
10793 return(KERN_INVALID_ADDRESS);
10794 }
10795
10796 if ((entry->protection & VM_PROT_WRITE) == 0) {
10797 /*
10798 * Can't apply purgable controls to something you can't write.
10799 */
10800 vm_map_unlock(map);
10801 return(KERN_PROTECTION_FAILURE);
10802 }
10803
10804 object = entry->object.vm_object;
10805 if (object == VM_OBJECT_NULL) {
10806 /*
10807 * Object must already be present or it can't be purgable.
10808 */
10809 vm_map_unlock(map);
10810 return KERN_INVALID_ARGUMENT;
10811 }
10812
10813 vm_object_lock(object);
10814
10815 if (entry->offset != 0 ||
10816 entry->vme_end - entry->vme_start != object->size) {
10817 /*
10818 * Can only apply purgable controls to the whole (existing)
10819 * object at once.
10820 */
10821 vm_map_unlock(map);
10822 vm_object_unlock(object);
10823 return KERN_INVALID_ARGUMENT;
10824 }
10825
10826 vm_map_unlock(map);
10827
10828 kr = vm_object_purgable_control(object, control, state);
10829
10830 vm_object_unlock(object);
10831
10832 return kr;
10833 }
10834
10835 kern_return_t
10836 vm_map_page_info(
10837 vm_map_t target_map,
10838 vm_map_offset_t offset,
10839 int *disposition,
10840 int *ref_count)
10841 {
10842 vm_map_entry_t map_entry;
10843 vm_object_t object;
10844 vm_page_t m;
10845 kern_return_t kr;
10846 kern_return_t retval = KERN_SUCCESS;
10847 boolean_t top_object = TRUE;
10848
10849 *disposition = 0;
10850 *ref_count = 0;
10851
10852 vm_map_lock_read(target_map);
10853
10854 restart_page_query:
10855 if (!vm_map_lookup_entry(target_map, offset, &map_entry)) {
10856 vm_map_unlock_read(target_map);
10857 return KERN_FAILURE;
10858 }
10859 offset -= map_entry->vme_start; /* adjust to offset within entry */
10860 offset += map_entry->offset; /* adjust to target object offset */
10861
10862 if (map_entry->object.vm_object != VM_OBJECT_NULL) {
10863 if (!map_entry->is_sub_map) {
10864 object = map_entry->object.vm_object;
10865 } else {
10866 vm_map_t sub_map;
10867
10868 sub_map = map_entry->object.sub_map;
10869 vm_map_lock_read(sub_map);
10870 vm_map_unlock_read(target_map);
10871
10872 target_map = sub_map;
10873 goto restart_page_query;
10874 }
10875 } else {
10876 vm_map_unlock_read(target_map);
10877 return KERN_SUCCESS;
10878 }
10879 vm_object_lock(object);
10880 vm_map_unlock_read(target_map);
10881
10882 while (TRUE) {
10883 m = vm_page_lookup(object, offset);
10884
10885 if (m != VM_PAGE_NULL) {
10886 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
10887 break;
10888 } else {
10889 #if MACH_PAGEMAP
10890 if (object->existence_map) {
10891 if (vm_external_state_get(object->existence_map, offset)
10892 == VM_EXTERNAL_STATE_EXISTS) {
10893 /*
10894 * this page has been paged out
10895 */
10896 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10897 break;
10898 }
10899 } else
10900 #endif
10901 if (object->internal &&
10902 object->alive &&
10903 !object->terminating &&
10904 object->pager_ready) {
10905
10906 memory_object_t pager;
10907
10908 vm_object_paging_begin(object);
10909 pager = object->pager;
10910 vm_object_unlock(object);
10911
10912 kr = memory_object_data_request(
10913 pager,
10914 offset + object->paging_offset,
10915 0, /* just poke the pager */
10916 VM_PROT_READ,
10917 NULL);
10918
10919 vm_object_lock(object);
10920 vm_object_paging_end(object);
10921
10922 if (kr == KERN_SUCCESS) {
10923 /*
10924 * the pager has this page
10925 */
10926 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10927 break;
10928 }
10929 }
10930 if (object->shadow != VM_OBJECT_NULL) {
10931 vm_object_t shadow;
10932
10933 offset += object->shadow_offset;
10934 shadow = object->shadow;
10935
10936 vm_object_lock(shadow);
10937 vm_object_unlock(object);
10938
10939 object = shadow;
10940 top_object = FALSE;
10941 } else {
10942 if (!object->internal)
10943 break;
10944
10945 retval = KERN_FAILURE;
10946 goto page_query_done;
10947 }
10948 }
10949 }
10950 /* The ref_count is not strictly accurate, it measures the number */
10951 /* of entities holding a ref on the object, they may not be mapping */
10952 /* the object or may not be mapping the section holding the */
10953 /* target page but its still a ball park number and though an over- */
10954 /* count, it picks up the copy-on-write cases */
10955
10956 /* We could also get a picture of page sharing from pmap_attributes */
10957 /* but this would under count as only faulted-in mappings would */
10958 /* show up. */
10959
10960 *ref_count = object->ref_count;
10961
10962 if (top_object == TRUE && object->shadow)
10963 *disposition |= VM_PAGE_QUERY_PAGE_COPIED;
10964
10965 if (m == VM_PAGE_NULL)
10966 goto page_query_done;
10967
10968 if (m->fictitious) {
10969 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
10970 goto page_query_done;
10971 }
10972 if (m->dirty || pmap_is_modified(m->phys_page))
10973 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
10974
10975 if (m->reference || pmap_is_referenced(m->phys_page))
10976 *disposition |= VM_PAGE_QUERY_PAGE_REF;
10977
10978 if (m->speculative)
10979 *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
10980
10981 if (m->cs_validated)
10982 *disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
10983 if (m->cs_tainted)
10984 *disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
10985
10986 page_query_done:
10987 vm_object_unlock(object);
10988
10989 return retval;
10990 }
10991
10992 /*
10993 * vm_map_msync
10994 *
10995 * Synchronises the memory range specified with its backing store
10996 * image by either flushing or cleaning the contents to the appropriate
10997 * memory manager engaging in a memory object synchronize dialog with
10998 * the manager. The client doesn't return until the manager issues
10999 * m_o_s_completed message. MIG Magically converts user task parameter
11000 * to the task's address map.
11001 *
11002 * interpretation of sync_flags
11003 * VM_SYNC_INVALIDATE - discard pages, only return precious
11004 * pages to manager.
11005 *
11006 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11007 * - discard pages, write dirty or precious
11008 * pages back to memory manager.
11009 *
11010 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11011 * - write dirty or precious pages back to
11012 * the memory manager.
11013 *
11014 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
11015 * is a hole in the region, and we would
11016 * have returned KERN_SUCCESS, return
11017 * KERN_INVALID_ADDRESS instead.
11018 *
11019 * NOTE
11020 * The memory object attributes have not yet been implemented, this
11021 * function will have to deal with the invalidate attribute
11022 *
11023 * RETURNS
11024 * KERN_INVALID_TASK Bad task parameter
11025 * KERN_INVALID_ARGUMENT both sync and async were specified.
11026 * KERN_SUCCESS The usual.
11027 * KERN_INVALID_ADDRESS There was a hole in the region.
11028 */
11029
11030 kern_return_t
11031 vm_map_msync(
11032 vm_map_t map,
11033 vm_map_address_t address,
11034 vm_map_size_t size,
11035 vm_sync_t sync_flags)
11036 {
11037 msync_req_t msr;
11038 msync_req_t new_msr;
11039 queue_chain_t req_q; /* queue of requests for this msync */
11040 vm_map_entry_t entry;
11041 vm_map_size_t amount_left;
11042 vm_object_offset_t offset;
11043 boolean_t do_sync_req;
11044 boolean_t modifiable;
11045 boolean_t had_hole = FALSE;
11046 memory_object_t pager;
11047
11048 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11049 (sync_flags & VM_SYNC_SYNCHRONOUS))
11050 return(KERN_INVALID_ARGUMENT);
11051
11052 /*
11053 * align address and size on page boundaries
11054 */
11055 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
11056 address = vm_map_trunc_page(address);
11057
11058 if (map == VM_MAP_NULL)
11059 return(KERN_INVALID_TASK);
11060
11061 if (size == 0)
11062 return(KERN_SUCCESS);
11063
11064 queue_init(&req_q);
11065 amount_left = size;
11066
11067 while (amount_left > 0) {
11068 vm_object_size_t flush_size;
11069 vm_object_t object;
11070
11071 vm_map_lock(map);
11072 if (!vm_map_lookup_entry(map,
11073 vm_map_trunc_page(address), &entry)) {
11074
11075 vm_map_size_t skip;
11076
11077 /*
11078 * hole in the address map.
11079 */
11080 had_hole = TRUE;
11081
11082 /*
11083 * Check for empty map.
11084 */
11085 if (entry == vm_map_to_entry(map) &&
11086 entry->vme_next == entry) {
11087 vm_map_unlock(map);
11088 break;
11089 }
11090 /*
11091 * Check that we don't wrap and that
11092 * we have at least one real map entry.
11093 */
11094 if ((map->hdr.nentries == 0) ||
11095 (entry->vme_next->vme_start < address)) {
11096 vm_map_unlock(map);
11097 break;
11098 }
11099 /*
11100 * Move up to the next entry if needed
11101 */
11102 skip = (entry->vme_next->vme_start - address);
11103 if (skip >= amount_left)
11104 amount_left = 0;
11105 else
11106 amount_left -= skip;
11107 address = entry->vme_next->vme_start;
11108 vm_map_unlock(map);
11109 continue;
11110 }
11111
11112 offset = address - entry->vme_start;
11113
11114 /*
11115 * do we have more to flush than is contained in this
11116 * entry ?
11117 */
11118 if (amount_left + entry->vme_start + offset > entry->vme_end) {
11119 flush_size = entry->vme_end -
11120 (entry->vme_start + offset);
11121 } else {
11122 flush_size = amount_left;
11123 }
11124 amount_left -= flush_size;
11125 address += flush_size;
11126
11127 if (entry->is_sub_map == TRUE) {
11128 vm_map_t local_map;
11129 vm_map_offset_t local_offset;
11130
11131 local_map = entry->object.sub_map;
11132 local_offset = entry->offset;
11133 vm_map_unlock(map);
11134 if (vm_map_msync(
11135 local_map,
11136 local_offset,
11137 flush_size,
11138 sync_flags) == KERN_INVALID_ADDRESS) {
11139 had_hole = TRUE;
11140 }
11141 continue;
11142 }
11143 object = entry->object.vm_object;
11144
11145 /*
11146 * We can't sync this object if the object has not been
11147 * created yet
11148 */
11149 if (object == VM_OBJECT_NULL) {
11150 vm_map_unlock(map);
11151 continue;
11152 }
11153 offset += entry->offset;
11154 modifiable = (entry->protection & VM_PROT_WRITE)
11155 != VM_PROT_NONE;
11156
11157 vm_object_lock(object);
11158
11159 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
11160 boolean_t kill_pages = 0;
11161
11162 if (sync_flags & VM_SYNC_KILLPAGES) {
11163 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
11164 kill_pages = 1;
11165 else
11166 kill_pages = -1;
11167 }
11168 if (kill_pages != -1)
11169 vm_object_deactivate_pages(object, offset,
11170 (vm_object_size_t)flush_size, kill_pages);
11171 vm_object_unlock(object);
11172 vm_map_unlock(map);
11173 continue;
11174 }
11175 /*
11176 * We can't sync this object if there isn't a pager.
11177 * Don't bother to sync internal objects, since there can't
11178 * be any "permanent" storage for these objects anyway.
11179 */
11180 if ((object->pager == MEMORY_OBJECT_NULL) ||
11181 (object->internal) || (object->private)) {
11182 vm_object_unlock(object);
11183 vm_map_unlock(map);
11184 continue;
11185 }
11186 /*
11187 * keep reference on the object until syncing is done
11188 */
11189 vm_object_reference_locked(object);
11190 vm_object_unlock(object);
11191
11192 vm_map_unlock(map);
11193
11194 do_sync_req = vm_object_sync(object,
11195 offset,
11196 flush_size,
11197 sync_flags & VM_SYNC_INVALIDATE,
11198 (modifiable &&
11199 (sync_flags & VM_SYNC_SYNCHRONOUS ||
11200 sync_flags & VM_SYNC_ASYNCHRONOUS)),
11201 sync_flags & VM_SYNC_SYNCHRONOUS);
11202 /*
11203 * only send a m_o_s if we returned pages or if the entry
11204 * is writable (ie dirty pages may have already been sent back)
11205 */
11206 if (!do_sync_req && !modifiable) {
11207 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
11208 /*
11209 * clear out the clustering and read-ahead hints
11210 */
11211 vm_object_lock(object);
11212
11213 object->pages_created = 0;
11214 object->pages_used = 0;
11215 object->sequential = 0;
11216 object->last_alloc = 0;
11217
11218 vm_object_unlock(object);
11219 }
11220 vm_object_deallocate(object);
11221 continue;
11222 }
11223 msync_req_alloc(new_msr);
11224
11225 vm_object_lock(object);
11226 offset += object->paging_offset;
11227
11228 new_msr->offset = offset;
11229 new_msr->length = flush_size;
11230 new_msr->object = object;
11231 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
11232 re_iterate:
11233
11234 /*
11235 * We can't sync this object if there isn't a pager. The
11236 * pager can disappear anytime we're not holding the object
11237 * lock. So this has to be checked anytime we goto re_iterate.
11238 */
11239
11240 pager = object->pager;
11241
11242 if (pager == MEMORY_OBJECT_NULL) {
11243 vm_object_unlock(object);
11244 vm_object_deallocate(object);
11245 continue;
11246 }
11247
11248 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
11249 /*
11250 * need to check for overlapping entry, if found, wait
11251 * on overlapping msr to be done, then reiterate
11252 */
11253 msr_lock(msr);
11254 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
11255 ((offset >= msr->offset &&
11256 offset < (msr->offset + msr->length)) ||
11257 (msr->offset >= offset &&
11258 msr->offset < (offset + flush_size))))
11259 {
11260 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
11261 msr_unlock(msr);
11262 vm_object_unlock(object);
11263 thread_block(THREAD_CONTINUE_NULL);
11264 vm_object_lock(object);
11265 goto re_iterate;
11266 }
11267 msr_unlock(msr);
11268 }/* queue_iterate */
11269
11270 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
11271
11272 vm_object_paging_begin(object);
11273 vm_object_unlock(object);
11274
11275 queue_enter(&req_q, new_msr, msync_req_t, req_q);
11276
11277 (void) memory_object_synchronize(
11278 pager,
11279 offset,
11280 flush_size,
11281 sync_flags & ~VM_SYNC_CONTIGUOUS);
11282
11283 vm_object_lock(object);
11284 vm_object_paging_end(object);
11285 vm_object_unlock(object);
11286 }/* while */
11287
11288 /*
11289 * wait for memory_object_sychronize_completed messages from pager(s)
11290 */
11291
11292 while (!queue_empty(&req_q)) {
11293 msr = (msync_req_t)queue_first(&req_q);
11294 msr_lock(msr);
11295 while(msr->flag != VM_MSYNC_DONE) {
11296 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
11297 msr_unlock(msr);
11298 thread_block(THREAD_CONTINUE_NULL);
11299 msr_lock(msr);
11300 }/* while */
11301 queue_remove(&req_q, msr, msync_req_t, req_q);
11302 msr_unlock(msr);
11303 vm_object_deallocate(msr->object);
11304 msync_req_free(msr);
11305 }/* queue_iterate */
11306
11307 /* for proper msync() behaviour */
11308 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
11309 return(KERN_INVALID_ADDRESS);
11310
11311 return(KERN_SUCCESS);
11312 }/* vm_msync */
11313
11314 /*
11315 * Routine: convert_port_entry_to_map
11316 * Purpose:
11317 * Convert from a port specifying an entry or a task
11318 * to a map. Doesn't consume the port ref; produces a map ref,
11319 * which may be null. Unlike convert_port_to_map, the
11320 * port may be task or a named entry backed.
11321 * Conditions:
11322 * Nothing locked.
11323 */
11324
11325
11326 vm_map_t
11327 convert_port_entry_to_map(
11328 ipc_port_t port)
11329 {
11330 vm_map_t map;
11331 vm_named_entry_t named_entry;
11332 uint32_t try_failed_count = 0;
11333
11334 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11335 while(TRUE) {
11336 ip_lock(port);
11337 if(ip_active(port) && (ip_kotype(port)
11338 == IKOT_NAMED_ENTRY)) {
11339 named_entry =
11340 (vm_named_entry_t)port->ip_kobject;
11341 if (!(mutex_try(&(named_entry)->Lock))) {
11342 ip_unlock(port);
11343
11344 try_failed_count++;
11345 mutex_pause(try_failed_count);
11346 continue;
11347 }
11348 named_entry->ref_count++;
11349 mutex_unlock(&(named_entry)->Lock);
11350 ip_unlock(port);
11351 if ((named_entry->is_sub_map) &&
11352 (named_entry->protection
11353 & VM_PROT_WRITE)) {
11354 map = named_entry->backing.map;
11355 } else {
11356 mach_destroy_memory_entry(port);
11357 return VM_MAP_NULL;
11358 }
11359 vm_map_reference_swap(map);
11360 mach_destroy_memory_entry(port);
11361 break;
11362 }
11363 else
11364 return VM_MAP_NULL;
11365 }
11366 }
11367 else
11368 map = convert_port_to_map(port);
11369
11370 return map;
11371 }
11372
11373 /*
11374 * Routine: convert_port_entry_to_object
11375 * Purpose:
11376 * Convert from a port specifying a named entry to an
11377 * object. Doesn't consume the port ref; produces a map ref,
11378 * which may be null.
11379 * Conditions:
11380 * Nothing locked.
11381 */
11382
11383
11384 vm_object_t
11385 convert_port_entry_to_object(
11386 ipc_port_t port)
11387 {
11388 vm_object_t object;
11389 vm_named_entry_t named_entry;
11390 uint32_t try_failed_count = 0;
11391
11392 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11393 while(TRUE) {
11394 ip_lock(port);
11395 if(ip_active(port) && (ip_kotype(port)
11396 == IKOT_NAMED_ENTRY)) {
11397 named_entry =
11398 (vm_named_entry_t)port->ip_kobject;
11399 if (!(mutex_try(&(named_entry)->Lock))) {
11400 ip_unlock(port);
11401
11402 try_failed_count++;
11403 mutex_pause(try_failed_count);
11404 continue;
11405 }
11406 named_entry->ref_count++;
11407 mutex_unlock(&(named_entry)->Lock);
11408 ip_unlock(port);
11409 if ((!named_entry->is_sub_map) &&
11410 (!named_entry->is_pager) &&
11411 (named_entry->protection
11412 & VM_PROT_WRITE)) {
11413 object = named_entry->backing.object;
11414 } else {
11415 mach_destroy_memory_entry(port);
11416 return (vm_object_t)NULL;
11417 }
11418 vm_object_reference(named_entry->backing.object);
11419 mach_destroy_memory_entry(port);
11420 break;
11421 }
11422 else
11423 return (vm_object_t)NULL;
11424 }
11425 } else {
11426 return (vm_object_t)NULL;
11427 }
11428
11429 return object;
11430 }
11431
11432 /*
11433 * Export routines to other components for the things we access locally through
11434 * macros.
11435 */
11436 #undef current_map
11437 vm_map_t
11438 current_map(void)
11439 {
11440 return (current_map_fast());
11441 }
11442
11443 /*
11444 * vm_map_reference:
11445 *
11446 * Most code internal to the osfmk will go through a
11447 * macro defining this. This is always here for the
11448 * use of other kernel components.
11449 */
11450 #undef vm_map_reference
11451 void
11452 vm_map_reference(
11453 register vm_map_t map)
11454 {
11455 if (map == VM_MAP_NULL)
11456 return;
11457
11458 mutex_lock(&map->s_lock);
11459 #if TASK_SWAPPER
11460 assert(map->res_count > 0);
11461 assert(map->ref_count >= map->res_count);
11462 map->res_count++;
11463 #endif
11464 map->ref_count++;
11465 mutex_unlock(&map->s_lock);
11466 }
11467
11468 /*
11469 * vm_map_deallocate:
11470 *
11471 * Removes a reference from the specified map,
11472 * destroying it if no references remain.
11473 * The map should not be locked.
11474 */
11475 void
11476 vm_map_deallocate(
11477 register vm_map_t map)
11478 {
11479 unsigned int ref;
11480
11481 if (map == VM_MAP_NULL)
11482 return;
11483
11484 mutex_lock(&map->s_lock);
11485 ref = --map->ref_count;
11486 if (ref > 0) {
11487 vm_map_res_deallocate(map);
11488 mutex_unlock(&map->s_lock);
11489 return;
11490 }
11491 assert(map->ref_count == 0);
11492 mutex_unlock(&map->s_lock);
11493
11494 #if TASK_SWAPPER
11495 /*
11496 * The map residence count isn't decremented here because
11497 * the vm_map_delete below will traverse the entire map,
11498 * deleting entries, and the residence counts on objects
11499 * and sharing maps will go away then.
11500 */
11501 #endif
11502
11503 vm_map_destroy(map, VM_MAP_NO_FLAGS);
11504 }
11505
11506
11507 void
11508 vm_map_disable_NX(vm_map_t map)
11509 {
11510 if (map == NULL)
11511 return;
11512 if (map->pmap == NULL)
11513 return;
11514
11515 pmap_disable_NX(map->pmap);
11516 }
11517
11518 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
11519 * more descriptive.
11520 */
11521 void
11522 vm_map_set_32bit(vm_map_t map)
11523 {
11524 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
11525 }
11526
11527
11528 void
11529 vm_map_set_64bit(vm_map_t map)
11530 {
11531 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
11532 }
11533
11534 vm_map_offset_t
11535 vm_compute_max_offset(unsigned is64)
11536 {
11537 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
11538 }
11539
11540 boolean_t
11541 vm_map_is_64bit(
11542 vm_map_t map)
11543 {
11544 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
11545 }
11546
11547 boolean_t
11548 vm_map_has_4GB_pagezero(
11549 vm_map_t map)
11550 {
11551 /*
11552 * XXX FBDP
11553 * We should lock the VM map (for read) here but we can get away
11554 * with it for now because there can't really be any race condition:
11555 * the VM map's min_offset is changed only when the VM map is created
11556 * and when the zero page is established (when the binary gets loaded),
11557 * and this routine gets called only when the task terminates and the
11558 * VM map is being torn down, and when a new map is created via
11559 * load_machfile()/execve().
11560 */
11561 return (map->min_offset >= 0x100000000ULL);
11562 }
11563
11564 void
11565 vm_map_set_4GB_pagezero(vm_map_t map)
11566 {
11567 pmap_set_4GB_pagezero(map->pmap);
11568 }
11569
11570 void
11571 vm_map_clear_4GB_pagezero(vm_map_t map)
11572 {
11573 pmap_clear_4GB_pagezero(map->pmap);
11574 }
11575
11576 /*
11577 * Raise a VM map's minimum offset.
11578 * To strictly enforce "page zero" reservation.
11579 */
11580 kern_return_t
11581 vm_map_raise_min_offset(
11582 vm_map_t map,
11583 vm_map_offset_t new_min_offset)
11584 {
11585 vm_map_entry_t first_entry;
11586
11587 new_min_offset = vm_map_round_page(new_min_offset);
11588
11589 vm_map_lock(map);
11590
11591 if (new_min_offset < map->min_offset) {
11592 /*
11593 * Can't move min_offset backwards, as that would expose
11594 * a part of the address space that was previously, and for
11595 * possibly good reasons, inaccessible.
11596 */
11597 vm_map_unlock(map);
11598 return KERN_INVALID_ADDRESS;
11599 }
11600
11601 first_entry = vm_map_first_entry(map);
11602 if (first_entry != vm_map_to_entry(map) &&
11603 first_entry->vme_start < new_min_offset) {
11604 /*
11605 * Some memory was already allocated below the new
11606 * minimun offset. It's too late to change it now...
11607 */
11608 vm_map_unlock(map);
11609 return KERN_NO_SPACE;
11610 }
11611
11612 map->min_offset = new_min_offset;
11613
11614 vm_map_unlock(map);
11615
11616 return KERN_SUCCESS;
11617 }
11618
11619 /*
11620 * Set the limit on the maximum amount of user wired memory allowed for this map.
11621 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
11622 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
11623 * don't have to reach over to the BSD data structures.
11624 */
11625
11626 void
11627 vm_map_set_user_wire_limit(vm_map_t map,
11628 vm_size_t limit)
11629 {
11630 map->user_wire_limit = limit;
11631 }
11632
11633 void vm_map_set_prot_copy_allow(vm_map_t map,
11634 boolean_t allow)
11635 {
11636 vm_map_lock(map);
11637 map->prot_copy_allow = allow;
11638 vm_map_unlock(map);
11639 };