]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68 #include <libkern/OSAtomic.h>
69
70 #include <mach/kern_return.h>
71 #include <mach/port.h>
72 #include <mach/vm_attributes.h>
73 #include <mach/vm_param.h>
74 #include <mach/vm_behavior.h>
75 #include <mach/vm_statistics.h>
76 #include <mach/memory_object.h>
77 #include <mach/mach_vm.h>
78 #include <machine/cpu_capabilities.h>
79 #include <mach/sdt.h>
80
81 #include <kern/assert.h>
82 #include <kern/counters.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85
86 #include <vm/cpm.h>
87 #include <vm/vm_init.h>
88 #include <vm/vm_fault.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_kern.h>
94 #include <ipc/ipc_port.h>
95 #include <kern/sched_prim.h>
96 #include <kern/misc_protos.h>
97 #include <machine/db_machdep.h>
98 #include <kern/xpr.h>
99
100 #include <mach/vm_map_server.h>
101 #include <mach/mach_host_server.h>
102 #include <vm/vm_protos.h>
103 #include <vm/vm_purgeable_internal.h>
104
105 #ifdef ppc
106 #include <ppc/mappings.h>
107 #endif /* ppc */
108
109 #include <vm/vm_protos.h>
110 #include <vm/vm_shared_region.h>
111
112 /* Internal prototypes
113 */
114
115 static void vm_map_simplify_range(
116 vm_map_t map,
117 vm_map_offset_t start,
118 vm_map_offset_t end); /* forward */
119
120 static boolean_t vm_map_range_check(
121 vm_map_t map,
122 vm_map_offset_t start,
123 vm_map_offset_t end,
124 vm_map_entry_t *entry);
125
126 static vm_map_entry_t _vm_map_entry_create(
127 struct vm_map_header *map_header);
128
129 static void _vm_map_entry_dispose(
130 struct vm_map_header *map_header,
131 vm_map_entry_t entry);
132
133 static void vm_map_pmap_enter(
134 vm_map_t map,
135 vm_map_offset_t addr,
136 vm_map_offset_t end_addr,
137 vm_object_t object,
138 vm_object_offset_t offset,
139 vm_prot_t protection);
140
141 static void _vm_map_clip_end(
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry,
144 vm_map_offset_t end);
145
146 static void _vm_map_clip_start(
147 struct vm_map_header *map_header,
148 vm_map_entry_t entry,
149 vm_map_offset_t start);
150
151 static void vm_map_entry_delete(
152 vm_map_t map,
153 vm_map_entry_t entry);
154
155 static kern_return_t vm_map_delete(
156 vm_map_t map,
157 vm_map_offset_t start,
158 vm_map_offset_t end,
159 int flags,
160 vm_map_t zap_map);
161
162 static kern_return_t vm_map_copy_overwrite_unaligned(
163 vm_map_t dst_map,
164 vm_map_entry_t entry,
165 vm_map_copy_t copy,
166 vm_map_address_t start);
167
168 static kern_return_t vm_map_copy_overwrite_aligned(
169 vm_map_t dst_map,
170 vm_map_entry_t tmp_entry,
171 vm_map_copy_t copy,
172 vm_map_offset_t start,
173 pmap_t pmap);
174
175 static kern_return_t vm_map_copyin_kernel_buffer(
176 vm_map_t src_map,
177 vm_map_address_t src_addr,
178 vm_map_size_t len,
179 boolean_t src_destroy,
180 vm_map_copy_t *copy_result); /* OUT */
181
182 static kern_return_t vm_map_copyout_kernel_buffer(
183 vm_map_t map,
184 vm_map_address_t *addr, /* IN/OUT */
185 vm_map_copy_t copy,
186 boolean_t overwrite);
187
188 static void vm_map_fork_share(
189 vm_map_t old_map,
190 vm_map_entry_t old_entry,
191 vm_map_t new_map);
192
193 static boolean_t vm_map_fork_copy(
194 vm_map_t old_map,
195 vm_map_entry_t *old_entry_p,
196 vm_map_t new_map);
197
198 void vm_map_region_top_walk(
199 vm_map_entry_t entry,
200 vm_region_top_info_t top);
201
202 void vm_map_region_walk(
203 vm_map_t map,
204 vm_map_offset_t va,
205 vm_map_entry_t entry,
206 vm_object_offset_t offset,
207 vm_object_size_t range,
208 vm_region_extended_info_t extended,
209 boolean_t look_for_pages);
210
211 static kern_return_t vm_map_wire_nested(
212 vm_map_t map,
213 vm_map_offset_t start,
214 vm_map_offset_t end,
215 vm_prot_t access_type,
216 boolean_t user_wire,
217 pmap_t map_pmap,
218 vm_map_offset_t pmap_addr);
219
220 static kern_return_t vm_map_unwire_nested(
221 vm_map_t map,
222 vm_map_offset_t start,
223 vm_map_offset_t end,
224 boolean_t user_wire,
225 pmap_t map_pmap,
226 vm_map_offset_t pmap_addr);
227
228 static kern_return_t vm_map_overwrite_submap_recurse(
229 vm_map_t dst_map,
230 vm_map_offset_t dst_addr,
231 vm_map_size_t dst_size);
232
233 static kern_return_t vm_map_copy_overwrite_nested(
234 vm_map_t dst_map,
235 vm_map_offset_t dst_addr,
236 vm_map_copy_t copy,
237 boolean_t interruptible,
238 pmap_t pmap);
239
240 static kern_return_t vm_map_remap_extract(
241 vm_map_t map,
242 vm_map_offset_t addr,
243 vm_map_size_t size,
244 boolean_t copy,
245 struct vm_map_header *map_header,
246 vm_prot_t *cur_protection,
247 vm_prot_t *max_protection,
248 vm_inherit_t inheritance,
249 boolean_t pageable);
250
251 static kern_return_t vm_map_remap_range_allocate(
252 vm_map_t map,
253 vm_map_address_t *address,
254 vm_map_size_t size,
255 vm_map_offset_t mask,
256 boolean_t anywhere,
257 vm_map_entry_t *map_entry);
258
259 static void vm_map_region_look_for_page(
260 vm_map_t map,
261 vm_map_offset_t va,
262 vm_object_t object,
263 vm_object_offset_t offset,
264 int max_refcnt,
265 int depth,
266 vm_region_extended_info_t extended);
267
268 static int vm_map_region_count_obj_refs(
269 vm_map_entry_t entry,
270 vm_object_t object);
271
272
273 static kern_return_t vm_map_willneed(
274 vm_map_t map,
275 vm_map_offset_t start,
276 vm_map_offset_t end);
277
278 static kern_return_t vm_map_reuse_pages(
279 vm_map_t map,
280 vm_map_offset_t start,
281 vm_map_offset_t end);
282
283 static kern_return_t vm_map_reusable_pages(
284 vm_map_t map,
285 vm_map_offset_t start,
286 vm_map_offset_t end);
287
288 static kern_return_t vm_map_can_reuse(
289 vm_map_t map,
290 vm_map_offset_t start,
291 vm_map_offset_t end);
292
293 /*
294 * Macros to copy a vm_map_entry. We must be careful to correctly
295 * manage the wired page count. vm_map_entry_copy() creates a new
296 * map entry to the same memory - the wired count in the new entry
297 * must be set to zero. vm_map_entry_copy_full() creates a new
298 * entry that is identical to the old entry. This preserves the
299 * wire count; it's used for map splitting and zone changing in
300 * vm_map_copyout.
301 */
302 #define vm_map_entry_copy(NEW,OLD) \
303 MACRO_BEGIN \
304 *(NEW) = *(OLD); \
305 (NEW)->is_shared = FALSE; \
306 (NEW)->needs_wakeup = FALSE; \
307 (NEW)->in_transition = FALSE; \
308 (NEW)->wired_count = 0; \
309 (NEW)->user_wired_count = 0; \
310 (NEW)->permanent = FALSE; \
311 MACRO_END
312
313 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
314
315 /*
316 * Decide if we want to allow processes to execute from their data or stack areas.
317 * override_nx() returns true if we do. Data/stack execution can be enabled independently
318 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
319 * or allow_stack_exec to enable data execution for that type of data area for that particular
320 * ABI (or both by or'ing the flags together). These are initialized in the architecture
321 * specific pmap files since the default behavior varies according to architecture. The
322 * main reason it varies is because of the need to provide binary compatibility with old
323 * applications that were written before these restrictions came into being. In the old
324 * days, an app could execute anything it could read, but this has slowly been tightened
325 * up over time. The default behavior is:
326 *
327 * 32-bit PPC apps may execute from both stack and data areas
328 * 32-bit Intel apps may exeucte from data areas but not stack
329 * 64-bit PPC/Intel apps may not execute from either data or stack
330 *
331 * An application on any architecture may override these defaults by explicitly
332 * adding PROT_EXEC permission to the page in question with the mprotect(2)
333 * system call. This code here just determines what happens when an app tries to
334 * execute from a page that lacks execute permission.
335 *
336 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
337 * default behavior for both 32 and 64 bit apps on a system-wide basis.
338 */
339
340 extern int allow_data_exec, allow_stack_exec;
341
342 int
343 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
344 {
345 int current_abi;
346
347 /*
348 * Determine if the app is running in 32 or 64 bit mode.
349 */
350
351 if (vm_map_is_64bit(map))
352 current_abi = VM_ABI_64;
353 else
354 current_abi = VM_ABI_32;
355
356 /*
357 * Determine if we should allow the execution based on whether it's a
358 * stack or data area and the current architecture.
359 */
360
361 if (user_tag == VM_MEMORY_STACK)
362 return allow_stack_exec & current_abi;
363
364 return allow_data_exec & current_abi;
365 }
366
367
368 /*
369 * Virtual memory maps provide for the mapping, protection,
370 * and sharing of virtual memory objects. In addition,
371 * this module provides for an efficient virtual copy of
372 * memory from one map to another.
373 *
374 * Synchronization is required prior to most operations.
375 *
376 * Maps consist of an ordered doubly-linked list of simple
377 * entries; a single hint is used to speed up lookups.
378 *
379 * Sharing maps have been deleted from this version of Mach.
380 * All shared objects are now mapped directly into the respective
381 * maps. This requires a change in the copy on write strategy;
382 * the asymmetric (delayed) strategy is used for shared temporary
383 * objects instead of the symmetric (shadow) strategy. All maps
384 * are now "top level" maps (either task map, kernel map or submap
385 * of the kernel map).
386 *
387 * Since portions of maps are specified by start/end addreses,
388 * which may not align with existing map entries, all
389 * routines merely "clip" entries to these start/end values.
390 * [That is, an entry is split into two, bordering at a
391 * start or end value.] Note that these clippings may not
392 * always be necessary (as the two resulting entries are then
393 * not changed); however, the clipping is done for convenience.
394 * No attempt is currently made to "glue back together" two
395 * abutting entries.
396 *
397 * The symmetric (shadow) copy strategy implements virtual copy
398 * by copying VM object references from one map to
399 * another, and then marking both regions as copy-on-write.
400 * It is important to note that only one writeable reference
401 * to a VM object region exists in any map when this strategy
402 * is used -- this means that shadow object creation can be
403 * delayed until a write operation occurs. The symmetric (delayed)
404 * strategy allows multiple maps to have writeable references to
405 * the same region of a vm object, and hence cannot delay creating
406 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
407 * Copying of permanent objects is completely different; see
408 * vm_object_copy_strategically() in vm_object.c.
409 */
410
411 static zone_t vm_map_zone; /* zone for vm_map structures */
412 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
413 static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
414 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
415
416
417 /*
418 * Placeholder object for submap operations. This object is dropped
419 * into the range by a call to vm_map_find, and removed when
420 * vm_map_submap creates the submap.
421 */
422
423 vm_object_t vm_submap_object;
424
425 static void *map_data;
426 static vm_size_t map_data_size;
427 static void *kentry_data;
428 static vm_size_t kentry_data_size;
429 static int kentry_count = 2048; /* to init kentry_data_size */
430
431 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
432
433
434 /* Skip acquiring locks if we're in the midst of a kernel core dump */
435 unsigned int not_in_kdp = 1;
436
437 #if CONFIG_CODE_DECRYPTION
438 /*
439 * vm_map_apple_protected:
440 * This remaps the requested part of the object with an object backed by
441 * the decrypting pager.
442 * crypt_info contains entry points and session data for the crypt module.
443 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
444 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
445 */
446 kern_return_t
447 vm_map_apple_protected(
448 vm_map_t map,
449 vm_map_offset_t start,
450 vm_map_offset_t end,
451 struct pager_crypt_info *crypt_info)
452 {
453 boolean_t map_locked;
454 kern_return_t kr;
455 vm_map_entry_t map_entry;
456 memory_object_t protected_mem_obj;
457 vm_object_t protected_object;
458 vm_map_offset_t map_addr;
459
460 vm_map_lock_read(map);
461 map_locked = TRUE;
462
463 /* lookup the protected VM object */
464 if (!vm_map_lookup_entry(map,
465 start,
466 &map_entry) ||
467 map_entry->vme_end < end ||
468 map_entry->is_sub_map) {
469 /* that memory is not properly mapped */
470 kr = KERN_INVALID_ARGUMENT;
471 goto done;
472 }
473 protected_object = map_entry->object.vm_object;
474 if (protected_object == VM_OBJECT_NULL) {
475 /* there should be a VM object here at this point */
476 kr = KERN_INVALID_ARGUMENT;
477 goto done;
478 }
479
480 /* make sure protected object stays alive while map is unlocked */
481 vm_object_reference(protected_object);
482
483 vm_map_unlock_read(map);
484 map_locked = FALSE;
485
486 /*
487 * Lookup (and create if necessary) the protected memory object
488 * matching that VM object.
489 * If successful, this also grabs a reference on the memory object,
490 * to guarantee that it doesn't go away before we get a chance to map
491 * it.
492 */
493 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
494
495 /* release extra ref on protected object */
496 vm_object_deallocate(protected_object);
497
498 if (protected_mem_obj == NULL) {
499 kr = KERN_FAILURE;
500 goto done;
501 }
502
503 /* map this memory object in place of the current one */
504 map_addr = start;
505 kr = vm_map_enter_mem_object(map,
506 &map_addr,
507 end - start,
508 (mach_vm_offset_t) 0,
509 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
510 (ipc_port_t) protected_mem_obj,
511 (map_entry->offset +
512 (start - map_entry->vme_start)),
513 TRUE,
514 map_entry->protection,
515 map_entry->max_protection,
516 map_entry->inheritance);
517 assert(map_addr == start);
518 /*
519 * Release the reference obtained by apple_protect_pager_setup().
520 * The mapping (if it succeeded) is now holding a reference on the
521 * memory object.
522 */
523 memory_object_deallocate(protected_mem_obj);
524
525 done:
526 if (map_locked) {
527 vm_map_unlock_read(map);
528 }
529 return kr;
530 }
531 #endif /* CONFIG_CODE_DECRYPTION */
532
533
534 lck_grp_t vm_map_lck_grp;
535 lck_grp_attr_t vm_map_lck_grp_attr;
536 lck_attr_t vm_map_lck_attr;
537
538
539 /*
540 * vm_map_init:
541 *
542 * Initialize the vm_map module. Must be called before
543 * any other vm_map routines.
544 *
545 * Map and entry structures are allocated from zones -- we must
546 * initialize those zones.
547 *
548 * There are three zones of interest:
549 *
550 * vm_map_zone: used to allocate maps.
551 * vm_map_entry_zone: used to allocate map entries.
552 * vm_map_kentry_zone: used to allocate map entries for the kernel.
553 *
554 * The kernel allocates map entries from a special zone that is initially
555 * "crammed" with memory. It would be difficult (perhaps impossible) for
556 * the kernel to allocate more memory to a entry zone when it became
557 * empty since the very act of allocating memory implies the creation
558 * of a new entry.
559 */
560 void
561 vm_map_init(
562 void)
563 {
564 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
565 PAGE_SIZE, "maps");
566
567 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
568 1024*1024, PAGE_SIZE*5,
569 "non-kernel map entries");
570
571 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
572 kentry_data_size, kentry_data_size,
573 "kernel map entries");
574
575 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
576 16*1024, PAGE_SIZE, "map copies");
577
578 /*
579 * Cram the map and kentry zones with initial data.
580 * Set kentry_zone non-collectible to aid zone_gc().
581 */
582 zone_change(vm_map_zone, Z_COLLECT, FALSE);
583 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
584 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
585 zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
586 zcram(vm_map_zone, map_data, map_data_size);
587 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
588
589 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
590 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
591 lck_attr_setdefault(&vm_map_lck_attr);
592 }
593
594 void
595 vm_map_steal_memory(
596 void)
597 {
598 map_data_size = round_page(10 * sizeof(struct _vm_map));
599 map_data = pmap_steal_memory(map_data_size);
600
601 #if 0
602 /*
603 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
604 * physical page (i.e. that beyond the kernel image and page tables)
605 * individually; we guess at most one entry per eight pages in the
606 * real world. This works out to roughly .1 of 1% of physical memory,
607 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
608 */
609 #endif
610 kentry_count = pmap_free_pages() / 8;
611
612
613 kentry_data_size =
614 round_page(kentry_count * sizeof(struct vm_map_entry));
615 kentry_data = pmap_steal_memory(kentry_data_size);
616 }
617
618 /*
619 * vm_map_create:
620 *
621 * Creates and returns a new empty VM map with
622 * the given physical map structure, and having
623 * the given lower and upper address bounds.
624 */
625 vm_map_t
626 vm_map_create(
627 pmap_t pmap,
628 vm_map_offset_t min,
629 vm_map_offset_t max,
630 boolean_t pageable)
631 {
632 static int color_seed = 0;
633 register vm_map_t result;
634
635 result = (vm_map_t) zalloc(vm_map_zone);
636 if (result == VM_MAP_NULL)
637 panic("vm_map_create");
638
639 vm_map_first_entry(result) = vm_map_to_entry(result);
640 vm_map_last_entry(result) = vm_map_to_entry(result);
641 result->hdr.nentries = 0;
642 result->hdr.entries_pageable = pageable;
643
644 result->size = 0;
645 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
646 result->user_wire_size = 0;
647 result->ref_count = 1;
648 #if TASK_SWAPPER
649 result->res_count = 1;
650 result->sw_state = MAP_SW_IN;
651 #endif /* TASK_SWAPPER */
652 result->pmap = pmap;
653 result->min_offset = min;
654 result->max_offset = max;
655 result->wiring_required = FALSE;
656 result->no_zero_fill = FALSE;
657 result->mapped = FALSE;
658 result->wait_for_space = FALSE;
659 result->switch_protect = FALSE;
660 result->first_free = vm_map_to_entry(result);
661 result->hint = vm_map_to_entry(result);
662 result->color_rr = (color_seed++) & vm_color_mask;
663 vm_map_lock_init(result);
664 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
665
666 return(result);
667 }
668
669 /*
670 * vm_map_entry_create: [ internal use only ]
671 *
672 * Allocates a VM map entry for insertion in the
673 * given map (or map copy). No fields are filled.
674 */
675 #define vm_map_entry_create(map) \
676 _vm_map_entry_create(&(map)->hdr)
677
678 #define vm_map_copy_entry_create(copy) \
679 _vm_map_entry_create(&(copy)->cpy_hdr)
680
681 static vm_map_entry_t
682 _vm_map_entry_create(
683 register struct vm_map_header *map_header)
684 {
685 register zone_t zone;
686 register vm_map_entry_t entry;
687
688 if (map_header->entries_pageable)
689 zone = vm_map_entry_zone;
690 else
691 zone = vm_map_kentry_zone;
692
693 entry = (vm_map_entry_t) zalloc(zone);
694 if (entry == VM_MAP_ENTRY_NULL)
695 panic("vm_map_entry_create");
696
697 return(entry);
698 }
699
700 /*
701 * vm_map_entry_dispose: [ internal use only ]
702 *
703 * Inverse of vm_map_entry_create.
704 *
705 * write map lock held so no need to
706 * do anything special to insure correctness
707 * of the stores
708 */
709 #define vm_map_entry_dispose(map, entry) \
710 MACRO_BEGIN \
711 if((entry) == (map)->first_free) \
712 (map)->first_free = vm_map_to_entry(map); \
713 if((entry) == (map)->hint) \
714 (map)->hint = vm_map_to_entry(map); \
715 _vm_map_entry_dispose(&(map)->hdr, (entry)); \
716 MACRO_END
717
718 #define vm_map_copy_entry_dispose(map, entry) \
719 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
720
721 static void
722 _vm_map_entry_dispose(
723 register struct vm_map_header *map_header,
724 register vm_map_entry_t entry)
725 {
726 register zone_t zone;
727
728 if (map_header->entries_pageable)
729 zone = vm_map_entry_zone;
730 else
731 zone = vm_map_kentry_zone;
732
733 zfree(zone, entry);
734 }
735
736 #if MACH_ASSERT
737 static boolean_t first_free_is_valid(vm_map_t map); /* forward */
738 static boolean_t first_free_check = FALSE;
739 static boolean_t
740 first_free_is_valid(
741 vm_map_t map)
742 {
743 vm_map_entry_t entry, next;
744
745 if (!first_free_check)
746 return TRUE;
747
748 entry = vm_map_to_entry(map);
749 next = entry->vme_next;
750 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
751 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
752 next != vm_map_to_entry(map))) {
753 entry = next;
754 next = entry->vme_next;
755 if (entry == vm_map_to_entry(map))
756 break;
757 }
758 if (map->first_free != entry) {
759 printf("Bad first_free for map %p: %p should be %p\n",
760 map, map->first_free, entry);
761 return FALSE;
762 }
763 return TRUE;
764 }
765 #endif /* MACH_ASSERT */
766
767 /*
768 * UPDATE_FIRST_FREE:
769 *
770 * Updates the map->first_free pointer to the
771 * entry immediately before the first hole in the map.
772 * The map should be locked.
773 */
774 #define UPDATE_FIRST_FREE(map, new_first_free) \
775 MACRO_BEGIN \
776 vm_map_t UFF_map; \
777 vm_map_entry_t UFF_first_free; \
778 vm_map_entry_t UFF_next_entry; \
779 UFF_map = (map); \
780 UFF_first_free = (new_first_free); \
781 UFF_next_entry = UFF_first_free->vme_next; \
782 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \
783 vm_map_trunc_page(UFF_first_free->vme_end) || \
784 (vm_map_trunc_page(UFF_next_entry->vme_start) == \
785 vm_map_trunc_page(UFF_first_free->vme_start) && \
786 UFF_next_entry != vm_map_to_entry(UFF_map))) { \
787 UFF_first_free = UFF_next_entry; \
788 UFF_next_entry = UFF_first_free->vme_next; \
789 if (UFF_first_free == vm_map_to_entry(UFF_map)) \
790 break; \
791 } \
792 UFF_map->first_free = UFF_first_free; \
793 assert(first_free_is_valid(UFF_map)); \
794 MACRO_END
795
796 /*
797 * vm_map_entry_{un,}link:
798 *
799 * Insert/remove entries from maps (or map copies).
800 */
801 #define vm_map_entry_link(map, after_where, entry) \
802 MACRO_BEGIN \
803 vm_map_t VMEL_map; \
804 vm_map_entry_t VMEL_entry; \
805 VMEL_map = (map); \
806 VMEL_entry = (entry); \
807 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \
808 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \
809 MACRO_END
810
811
812 #define vm_map_copy_entry_link(copy, after_where, entry) \
813 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
814
815 #define _vm_map_entry_link(hdr, after_where, entry) \
816 MACRO_BEGIN \
817 (hdr)->nentries++; \
818 (entry)->vme_prev = (after_where); \
819 (entry)->vme_next = (after_where)->vme_next; \
820 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
821 MACRO_END
822
823 #define vm_map_entry_unlink(map, entry) \
824 MACRO_BEGIN \
825 vm_map_t VMEU_map; \
826 vm_map_entry_t VMEU_entry; \
827 vm_map_entry_t VMEU_first_free; \
828 VMEU_map = (map); \
829 VMEU_entry = (entry); \
830 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \
831 VMEU_first_free = VMEU_entry->vme_prev; \
832 else \
833 VMEU_first_free = VMEU_map->first_free; \
834 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \
835 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \
836 MACRO_END
837
838 #define vm_map_copy_entry_unlink(copy, entry) \
839 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
840
841 #define _vm_map_entry_unlink(hdr, entry) \
842 MACRO_BEGIN \
843 (hdr)->nentries--; \
844 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
845 (entry)->vme_prev->vme_next = (entry)->vme_next; \
846 MACRO_END
847
848 #if MACH_ASSERT && TASK_SWAPPER
849 /*
850 * vm_map_res_reference:
851 *
852 * Adds another valid residence count to the given map.
853 *
854 * Map is locked so this function can be called from
855 * vm_map_swapin.
856 *
857 */
858 void vm_map_res_reference(register vm_map_t map)
859 {
860 /* assert map is locked */
861 assert(map->res_count >= 0);
862 assert(map->ref_count >= map->res_count);
863 if (map->res_count == 0) {
864 lck_mtx_unlock(&map->s_lock);
865 vm_map_lock(map);
866 vm_map_swapin(map);
867 lck_mtx_lock(&map->s_lock);
868 ++map->res_count;
869 vm_map_unlock(map);
870 } else
871 ++map->res_count;
872 }
873
874 /*
875 * vm_map_reference_swap:
876 *
877 * Adds valid reference and residence counts to the given map.
878 *
879 * The map may not be in memory (i.e. zero residence count).
880 *
881 */
882 void vm_map_reference_swap(register vm_map_t map)
883 {
884 assert(map != VM_MAP_NULL);
885 lck_mtx_lock(&map->s_lock);
886 assert(map->res_count >= 0);
887 assert(map->ref_count >= map->res_count);
888 map->ref_count++;
889 vm_map_res_reference(map);
890 lck_mtx_unlock(&map->s_lock);
891 }
892
893 /*
894 * vm_map_res_deallocate:
895 *
896 * Decrement residence count on a map; possibly causing swapout.
897 *
898 * The map must be in memory (i.e. non-zero residence count).
899 *
900 * The map is locked, so this function is callable from vm_map_deallocate.
901 *
902 */
903 void vm_map_res_deallocate(register vm_map_t map)
904 {
905 assert(map->res_count > 0);
906 if (--map->res_count == 0) {
907 lck_mtx_unlock(&map->s_lock);
908 vm_map_lock(map);
909 vm_map_swapout(map);
910 vm_map_unlock(map);
911 lck_mtx_lock(&map->s_lock);
912 }
913 assert(map->ref_count >= map->res_count);
914 }
915 #endif /* MACH_ASSERT && TASK_SWAPPER */
916
917 /*
918 * vm_map_destroy:
919 *
920 * Actually destroy a map.
921 */
922 void
923 vm_map_destroy(
924 vm_map_t map,
925 int flags)
926 {
927 vm_map_lock(map);
928
929 /* clean up regular map entries */
930 (void) vm_map_delete(map, map->min_offset, map->max_offset,
931 flags, VM_MAP_NULL);
932 /* clean up leftover special mappings (commpage, etc...) */
933 #ifdef __ppc__
934 /*
935 * PPC51: ppc64 is limited to 51-bit addresses.
936 * Memory beyond this 51-bit limit is mapped specially at the
937 * pmap level, so do not interfere.
938 * On PPC64, the commpage is mapped beyond the addressable range
939 * via a special pmap hack, so ask pmap to clean it explicitly...
940 */
941 if (map->pmap) {
942 pmap_unmap_sharedpage(map->pmap);
943 }
944 /* ... and do not let regular pmap cleanup apply here */
945 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
946 #endif /* __ppc__ */
947 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
948 flags, VM_MAP_NULL);
949 vm_map_unlock(map);
950
951 assert(map->hdr.nentries == 0);
952
953 if(map->pmap)
954 pmap_destroy(map->pmap);
955
956 zfree(vm_map_zone, map);
957 }
958
959 #if TASK_SWAPPER
960 /*
961 * vm_map_swapin/vm_map_swapout
962 *
963 * Swap a map in and out, either referencing or releasing its resources.
964 * These functions are internal use only; however, they must be exported
965 * because they may be called from macros, which are exported.
966 *
967 * In the case of swapout, there could be races on the residence count,
968 * so if the residence count is up, we return, assuming that a
969 * vm_map_deallocate() call in the near future will bring us back.
970 *
971 * Locking:
972 * -- We use the map write lock for synchronization among races.
973 * -- The map write lock, and not the simple s_lock, protects the
974 * swap state of the map.
975 * -- If a map entry is a share map, then we hold both locks, in
976 * hierarchical order.
977 *
978 * Synchronization Notes:
979 * 1) If a vm_map_swapin() call happens while swapout in progress, it
980 * will block on the map lock and proceed when swapout is through.
981 * 2) A vm_map_reference() call at this time is illegal, and will
982 * cause a panic. vm_map_reference() is only allowed on resident
983 * maps, since it refuses to block.
984 * 3) A vm_map_swapin() call during a swapin will block, and
985 * proceeed when the first swapin is done, turning into a nop.
986 * This is the reason the res_count is not incremented until
987 * after the swapin is complete.
988 * 4) There is a timing hole after the checks of the res_count, before
989 * the map lock is taken, during which a swapin may get the lock
990 * before a swapout about to happen. If this happens, the swapin
991 * will detect the state and increment the reference count, causing
992 * the swapout to be a nop, thereby delaying it until a later
993 * vm_map_deallocate. If the swapout gets the lock first, then
994 * the swapin will simply block until the swapout is done, and
995 * then proceed.
996 *
997 * Because vm_map_swapin() is potentially an expensive operation, it
998 * should be used with caution.
999 *
1000 * Invariants:
1001 * 1) A map with a residence count of zero is either swapped, or
1002 * being swapped.
1003 * 2) A map with a non-zero residence count is either resident,
1004 * or being swapped in.
1005 */
1006
1007 int vm_map_swap_enable = 1;
1008
1009 void vm_map_swapin (vm_map_t map)
1010 {
1011 register vm_map_entry_t entry;
1012
1013 if (!vm_map_swap_enable) /* debug */
1014 return;
1015
1016 /*
1017 * Map is locked
1018 * First deal with various races.
1019 */
1020 if (map->sw_state == MAP_SW_IN)
1021 /*
1022 * we raced with swapout and won. Returning will incr.
1023 * the res_count, turning the swapout into a nop.
1024 */
1025 return;
1026
1027 /*
1028 * The residence count must be zero. If we raced with another
1029 * swapin, the state would have been IN; if we raced with a
1030 * swapout (after another competing swapin), we must have lost
1031 * the race to get here (see above comment), in which case
1032 * res_count is still 0.
1033 */
1034 assert(map->res_count == 0);
1035
1036 /*
1037 * There are no intermediate states of a map going out or
1038 * coming in, since the map is locked during the transition.
1039 */
1040 assert(map->sw_state == MAP_SW_OUT);
1041
1042 /*
1043 * We now operate upon each map entry. If the entry is a sub-
1044 * or share-map, we call vm_map_res_reference upon it.
1045 * If the entry is an object, we call vm_object_res_reference
1046 * (this may iterate through the shadow chain).
1047 * Note that we hold the map locked the entire time,
1048 * even if we get back here via a recursive call in
1049 * vm_map_res_reference.
1050 */
1051 entry = vm_map_first_entry(map);
1052
1053 while (entry != vm_map_to_entry(map)) {
1054 if (entry->object.vm_object != VM_OBJECT_NULL) {
1055 if (entry->is_sub_map) {
1056 vm_map_t lmap = entry->object.sub_map;
1057 lck_mtx_lock(&lmap->s_lock);
1058 vm_map_res_reference(lmap);
1059 lck_mtx_unlock(&lmap->s_lock);
1060 } else {
1061 vm_object_t object = entry->object.vm_object;
1062 vm_object_lock(object);
1063 /*
1064 * This call may iterate through the
1065 * shadow chain.
1066 */
1067 vm_object_res_reference(object);
1068 vm_object_unlock(object);
1069 }
1070 }
1071 entry = entry->vme_next;
1072 }
1073 assert(map->sw_state == MAP_SW_OUT);
1074 map->sw_state = MAP_SW_IN;
1075 }
1076
1077 void vm_map_swapout(vm_map_t map)
1078 {
1079 register vm_map_entry_t entry;
1080
1081 /*
1082 * Map is locked
1083 * First deal with various races.
1084 * If we raced with a swapin and lost, the residence count
1085 * will have been incremented to 1, and we simply return.
1086 */
1087 lck_mtx_lock(&map->s_lock);
1088 if (map->res_count != 0) {
1089 lck_mtx_unlock(&map->s_lock);
1090 return;
1091 }
1092 lck_mtx_unlock(&map->s_lock);
1093
1094 /*
1095 * There are no intermediate states of a map going out or
1096 * coming in, since the map is locked during the transition.
1097 */
1098 assert(map->sw_state == MAP_SW_IN);
1099
1100 if (!vm_map_swap_enable)
1101 return;
1102
1103 /*
1104 * We now operate upon each map entry. If the entry is a sub-
1105 * or share-map, we call vm_map_res_deallocate upon it.
1106 * If the entry is an object, we call vm_object_res_deallocate
1107 * (this may iterate through the shadow chain).
1108 * Note that we hold the map locked the entire time,
1109 * even if we get back here via a recursive call in
1110 * vm_map_res_deallocate.
1111 */
1112 entry = vm_map_first_entry(map);
1113
1114 while (entry != vm_map_to_entry(map)) {
1115 if (entry->object.vm_object != VM_OBJECT_NULL) {
1116 if (entry->is_sub_map) {
1117 vm_map_t lmap = entry->object.sub_map;
1118 lck_mtx_lock(&lmap->s_lock);
1119 vm_map_res_deallocate(lmap);
1120 lck_mtx_unlock(&lmap->s_lock);
1121 } else {
1122 vm_object_t object = entry->object.vm_object;
1123 vm_object_lock(object);
1124 /*
1125 * This call may take a long time,
1126 * since it could actively push
1127 * out pages (if we implement it
1128 * that way).
1129 */
1130 vm_object_res_deallocate(object);
1131 vm_object_unlock(object);
1132 }
1133 }
1134 entry = entry->vme_next;
1135 }
1136 assert(map->sw_state == MAP_SW_IN);
1137 map->sw_state = MAP_SW_OUT;
1138 }
1139
1140 #endif /* TASK_SWAPPER */
1141
1142
1143 /*
1144 * SAVE_HINT_MAP_READ:
1145 *
1146 * Saves the specified entry as the hint for
1147 * future lookups. only a read lock is held on map,
1148 * so make sure the store is atomic... OSCompareAndSwap
1149 * guarantees this... also, we don't care if we collide
1150 * and someone else wins and stores their 'hint'
1151 */
1152 #define SAVE_HINT_MAP_READ(map,value) \
1153 MACRO_BEGIN \
1154 OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
1155 MACRO_END
1156
1157
1158 /*
1159 * SAVE_HINT_MAP_WRITE:
1160 *
1161 * Saves the specified entry as the hint for
1162 * future lookups. write lock held on map,
1163 * so no one else can be writing or looking
1164 * until the lock is dropped, so it's safe
1165 * to just do an assignment
1166 */
1167 #define SAVE_HINT_MAP_WRITE(map,value) \
1168 MACRO_BEGIN \
1169 (map)->hint = (value); \
1170 MACRO_END
1171
1172 /*
1173 * vm_map_lookup_entry: [ internal use only ]
1174 *
1175 * Finds the map entry containing (or
1176 * immediately preceding) the specified address
1177 * in the given map; the entry is returned
1178 * in the "entry" parameter. The boolean
1179 * result indicates whether the address is
1180 * actually contained in the map.
1181 */
1182 boolean_t
1183 vm_map_lookup_entry(
1184 register vm_map_t map,
1185 register vm_map_offset_t address,
1186 vm_map_entry_t *entry) /* OUT */
1187 {
1188 register vm_map_entry_t cur;
1189 register vm_map_entry_t last;
1190
1191 /*
1192 * Start looking either from the head of the
1193 * list, or from the hint.
1194 */
1195 cur = map->hint;
1196
1197 if (cur == vm_map_to_entry(map))
1198 cur = cur->vme_next;
1199
1200 if (address >= cur->vme_start) {
1201 /*
1202 * Go from hint to end of list.
1203 *
1204 * But first, make a quick check to see if
1205 * we are already looking at the entry we
1206 * want (which is usually the case).
1207 * Note also that we don't need to save the hint
1208 * here... it is the same hint (unless we are
1209 * at the header, in which case the hint didn't
1210 * buy us anything anyway).
1211 */
1212 last = vm_map_to_entry(map);
1213 if ((cur != last) && (cur->vme_end > address)) {
1214 *entry = cur;
1215 return(TRUE);
1216 }
1217 }
1218 else {
1219 /*
1220 * Go from start to hint, *inclusively*
1221 */
1222 last = cur->vme_next;
1223 cur = vm_map_first_entry(map);
1224 }
1225
1226 /*
1227 * Search linearly
1228 */
1229
1230 while (cur != last) {
1231 if (cur->vme_end > address) {
1232 if (address >= cur->vme_start) {
1233 /*
1234 * Save this lookup for future
1235 * hints, and return
1236 */
1237
1238 *entry = cur;
1239 SAVE_HINT_MAP_READ(map, cur);
1240
1241 return(TRUE);
1242 }
1243 break;
1244 }
1245 cur = cur->vme_next;
1246 }
1247 *entry = cur->vme_prev;
1248 SAVE_HINT_MAP_READ(map, *entry);
1249
1250 return(FALSE);
1251 }
1252
1253 /*
1254 * Routine: vm_map_find_space
1255 * Purpose:
1256 * Allocate a range in the specified virtual address map,
1257 * returning the entry allocated for that range.
1258 * Used by kmem_alloc, etc.
1259 *
1260 * The map must be NOT be locked. It will be returned locked
1261 * on KERN_SUCCESS, unlocked on failure.
1262 *
1263 * If an entry is allocated, the object/offset fields
1264 * are initialized to zero.
1265 */
1266 kern_return_t
1267 vm_map_find_space(
1268 register vm_map_t map,
1269 vm_map_offset_t *address, /* OUT */
1270 vm_map_size_t size,
1271 vm_map_offset_t mask,
1272 int flags,
1273 vm_map_entry_t *o_entry) /* OUT */
1274 {
1275 register vm_map_entry_t entry, new_entry;
1276 register vm_map_offset_t start;
1277 register vm_map_offset_t end;
1278
1279 if (size == 0) {
1280 *address = 0;
1281 return KERN_INVALID_ARGUMENT;
1282 }
1283
1284 if (flags & VM_FLAGS_GUARD_AFTER) {
1285 /* account for the back guard page in the size */
1286 size += PAGE_SIZE_64;
1287 }
1288
1289 new_entry = vm_map_entry_create(map);
1290
1291 /*
1292 * Look for the first possible address; if there's already
1293 * something at this address, we have to start after it.
1294 */
1295
1296 vm_map_lock(map);
1297
1298 assert(first_free_is_valid(map));
1299 if ((entry = map->first_free) == vm_map_to_entry(map))
1300 start = map->min_offset;
1301 else
1302 start = entry->vme_end;
1303
1304 /*
1305 * In any case, the "entry" always precedes
1306 * the proposed new region throughout the loop:
1307 */
1308
1309 while (TRUE) {
1310 register vm_map_entry_t next;
1311
1312 /*
1313 * Find the end of the proposed new region.
1314 * Be sure we didn't go beyond the end, or
1315 * wrap around the address.
1316 */
1317
1318 if (flags & VM_FLAGS_GUARD_BEFORE) {
1319 /* reserve space for the front guard page */
1320 start += PAGE_SIZE_64;
1321 }
1322 end = ((start + mask) & ~mask);
1323
1324 if (end < start) {
1325 vm_map_entry_dispose(map, new_entry);
1326 vm_map_unlock(map);
1327 return(KERN_NO_SPACE);
1328 }
1329 start = end;
1330 end += size;
1331
1332 if ((end > map->max_offset) || (end < start)) {
1333 vm_map_entry_dispose(map, new_entry);
1334 vm_map_unlock(map);
1335 return(KERN_NO_SPACE);
1336 }
1337
1338 /*
1339 * If there are no more entries, we must win.
1340 */
1341
1342 next = entry->vme_next;
1343 if (next == vm_map_to_entry(map))
1344 break;
1345
1346 /*
1347 * If there is another entry, it must be
1348 * after the end of the potential new region.
1349 */
1350
1351 if (next->vme_start >= end)
1352 break;
1353
1354 /*
1355 * Didn't fit -- move to the next entry.
1356 */
1357
1358 entry = next;
1359 start = entry->vme_end;
1360 }
1361
1362 /*
1363 * At this point,
1364 * "start" and "end" should define the endpoints of the
1365 * available new range, and
1366 * "entry" should refer to the region before the new
1367 * range, and
1368 *
1369 * the map should be locked.
1370 */
1371
1372 if (flags & VM_FLAGS_GUARD_BEFORE) {
1373 /* go back for the front guard page */
1374 start -= PAGE_SIZE_64;
1375 }
1376 *address = start;
1377
1378 new_entry->vme_start = start;
1379 new_entry->vme_end = end;
1380 assert(page_aligned(new_entry->vme_start));
1381 assert(page_aligned(new_entry->vme_end));
1382
1383 new_entry->is_shared = FALSE;
1384 new_entry->is_sub_map = FALSE;
1385 new_entry->use_pmap = FALSE;
1386 new_entry->object.vm_object = VM_OBJECT_NULL;
1387 new_entry->offset = (vm_object_offset_t) 0;
1388
1389 new_entry->needs_copy = FALSE;
1390
1391 new_entry->inheritance = VM_INHERIT_DEFAULT;
1392 new_entry->protection = VM_PROT_DEFAULT;
1393 new_entry->max_protection = VM_PROT_ALL;
1394 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1395 new_entry->wired_count = 0;
1396 new_entry->user_wired_count = 0;
1397
1398 new_entry->in_transition = FALSE;
1399 new_entry->needs_wakeup = FALSE;
1400 new_entry->no_cache = FALSE;
1401 new_entry->permanent = FALSE;
1402 new_entry->superpage_size = 0;
1403
1404 new_entry->alias = 0;
1405 new_entry->zero_wired_pages = FALSE;
1406
1407 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1408
1409 /*
1410 * Insert the new entry into the list
1411 */
1412
1413 vm_map_entry_link(map, entry, new_entry);
1414
1415 map->size += size;
1416
1417 /*
1418 * Update the lookup hint
1419 */
1420 SAVE_HINT_MAP_WRITE(map, new_entry);
1421
1422 *o_entry = new_entry;
1423 return(KERN_SUCCESS);
1424 }
1425
1426 int vm_map_pmap_enter_print = FALSE;
1427 int vm_map_pmap_enter_enable = FALSE;
1428
1429 /*
1430 * Routine: vm_map_pmap_enter [internal only]
1431 *
1432 * Description:
1433 * Force pages from the specified object to be entered into
1434 * the pmap at the specified address if they are present.
1435 * As soon as a page not found in the object the scan ends.
1436 *
1437 * Returns:
1438 * Nothing.
1439 *
1440 * In/out conditions:
1441 * The source map should not be locked on entry.
1442 */
1443 static void
1444 vm_map_pmap_enter(
1445 vm_map_t map,
1446 register vm_map_offset_t addr,
1447 register vm_map_offset_t end_addr,
1448 register vm_object_t object,
1449 vm_object_offset_t offset,
1450 vm_prot_t protection)
1451 {
1452 int type_of_fault;
1453 kern_return_t kr;
1454
1455 if(map->pmap == 0)
1456 return;
1457
1458 while (addr < end_addr) {
1459 register vm_page_t m;
1460
1461 vm_object_lock(object);
1462
1463 m = vm_page_lookup(object, offset);
1464 /*
1465 * ENCRYPTED SWAP:
1466 * The user should never see encrypted data, so do not
1467 * enter an encrypted page in the page table.
1468 */
1469 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1470 m->fictitious ||
1471 (m->unusual && ( m->error || m->restart || m->absent))) {
1472 vm_object_unlock(object);
1473 return;
1474 }
1475
1476 if (vm_map_pmap_enter_print) {
1477 printf("vm_map_pmap_enter:");
1478 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1479 map, (unsigned long long)addr, object, (unsigned long long)offset);
1480 }
1481 type_of_fault = DBG_CACHE_HIT_FAULT;
1482 kr = vm_fault_enter(m, map->pmap, addr, protection,
1483 VM_PAGE_WIRED(m), FALSE, FALSE,
1484 &type_of_fault);
1485
1486 vm_object_unlock(object);
1487
1488 offset += PAGE_SIZE_64;
1489 addr += PAGE_SIZE;
1490 }
1491 }
1492
1493 boolean_t vm_map_pmap_is_empty(
1494 vm_map_t map,
1495 vm_map_offset_t start,
1496 vm_map_offset_t end);
1497 boolean_t vm_map_pmap_is_empty(
1498 vm_map_t map,
1499 vm_map_offset_t start,
1500 vm_map_offset_t end)
1501 {
1502 #ifdef MACHINE_PMAP_IS_EMPTY
1503 return pmap_is_empty(map->pmap, start, end);
1504 #else /* MACHINE_PMAP_IS_EMPTY */
1505 vm_map_offset_t offset;
1506 ppnum_t phys_page;
1507
1508 if (map->pmap == NULL) {
1509 return TRUE;
1510 }
1511
1512 for (offset = start;
1513 offset < end;
1514 offset += PAGE_SIZE) {
1515 phys_page = pmap_find_phys(map->pmap, offset);
1516 if (phys_page) {
1517 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1518 "page %d at 0x%llx\n",
1519 map, (long long)start, (long long)end,
1520 phys_page, (long long)offset);
1521 return FALSE;
1522 }
1523 }
1524 return TRUE;
1525 #endif /* MACHINE_PMAP_IS_EMPTY */
1526 }
1527
1528 /*
1529 * Routine: vm_map_enter
1530 *
1531 * Description:
1532 * Allocate a range in the specified virtual address map.
1533 * The resulting range will refer to memory defined by
1534 * the given memory object and offset into that object.
1535 *
1536 * Arguments are as defined in the vm_map call.
1537 */
1538 int _map_enter_debug = 0;
1539 static unsigned int vm_map_enter_restore_successes = 0;
1540 static unsigned int vm_map_enter_restore_failures = 0;
1541 kern_return_t
1542 vm_map_enter(
1543 vm_map_t map,
1544 vm_map_offset_t *address, /* IN/OUT */
1545 vm_map_size_t size,
1546 vm_map_offset_t mask,
1547 int flags,
1548 vm_object_t object,
1549 vm_object_offset_t offset,
1550 boolean_t needs_copy,
1551 vm_prot_t cur_protection,
1552 vm_prot_t max_protection,
1553 vm_inherit_t inheritance)
1554 {
1555 vm_map_entry_t entry, new_entry;
1556 vm_map_offset_t start, tmp_start, tmp_offset;
1557 vm_map_offset_t end, tmp_end;
1558 vm_map_offset_t tmp2_start, tmp2_end;
1559 vm_map_offset_t step;
1560 kern_return_t result = KERN_SUCCESS;
1561 vm_map_t zap_old_map = VM_MAP_NULL;
1562 vm_map_t zap_new_map = VM_MAP_NULL;
1563 boolean_t map_locked = FALSE;
1564 boolean_t pmap_empty = TRUE;
1565 boolean_t new_mapping_established = FALSE;
1566 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1567 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1568 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1569 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1570 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1571 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1572 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1573 char alias;
1574 vm_map_offset_t effective_min_offset, effective_max_offset;
1575 kern_return_t kr;
1576
1577 if (superpage_size) {
1578 switch (superpage_size) {
1579 /*
1580 * Note that the current implementation only supports
1581 * a single size for superpages, SUPERPAGE_SIZE, per
1582 * architecture. As soon as more sizes are supposed
1583 * to be supported, SUPERPAGE_SIZE has to be replaced
1584 * with a lookup of the size depending on superpage_size.
1585 */
1586 #ifdef __x86_64__
1587 case SUPERPAGE_SIZE_2MB:
1588 break;
1589 #endif
1590 default:
1591 return KERN_INVALID_ARGUMENT;
1592 }
1593 mask = SUPERPAGE_SIZE-1;
1594 if (size & (SUPERPAGE_SIZE-1))
1595 return KERN_INVALID_ARGUMENT;
1596 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1597 }
1598
1599 #if CONFIG_EMBEDDED
1600 if (cur_protection & VM_PROT_WRITE) {
1601 if (cur_protection & VM_PROT_EXECUTE) {
1602 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1603 cur_protection &= ~VM_PROT_EXECUTE;
1604 }
1605 }
1606 #endif /* CONFIG_EMBEDDED */
1607
1608 if (is_submap) {
1609 if (purgable) {
1610 /* submaps can not be purgeable */
1611 return KERN_INVALID_ARGUMENT;
1612 }
1613 if (object == VM_OBJECT_NULL) {
1614 /* submaps can not be created lazily */
1615 return KERN_INVALID_ARGUMENT;
1616 }
1617 }
1618 if (flags & VM_FLAGS_ALREADY) {
1619 /*
1620 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1621 * is already present. For it to be meaningul, the requested
1622 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1623 * we shouldn't try and remove what was mapped there first
1624 * (!VM_FLAGS_OVERWRITE).
1625 */
1626 if ((flags & VM_FLAGS_ANYWHERE) ||
1627 (flags & VM_FLAGS_OVERWRITE)) {
1628 return KERN_INVALID_ARGUMENT;
1629 }
1630 }
1631
1632 if (flags & VM_FLAGS_BELOW_MIN) {
1633 /*
1634 * Allow an insertion below the map's min offset.
1635 */
1636 effective_min_offset = 0ULL;
1637 } else {
1638 effective_min_offset = map->min_offset;
1639 }
1640
1641 if (flags & VM_FLAGS_BEYOND_MAX) {
1642 /*
1643 * Allow an insertion beyond the map's max offset.
1644 */
1645 if (vm_map_is_64bit(map))
1646 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1647 else
1648 effective_max_offset = 0x00000000FFFFF000ULL;
1649 } else {
1650 effective_max_offset = map->max_offset;
1651 }
1652
1653 if (size == 0 ||
1654 (offset & PAGE_MASK_64) != 0) {
1655 *address = 0;
1656 return KERN_INVALID_ARGUMENT;
1657 }
1658
1659 VM_GET_FLAGS_ALIAS(flags, alias);
1660
1661 #define RETURN(value) { result = value; goto BailOut; }
1662
1663 assert(page_aligned(*address));
1664 assert(page_aligned(size));
1665
1666 /*
1667 * Only zero-fill objects are allowed to be purgable.
1668 * LP64todo - limit purgable objects to 32-bits for now
1669 */
1670 if (purgable &&
1671 (offset != 0 ||
1672 (object != VM_OBJECT_NULL &&
1673 (object->size != size ||
1674 object->purgable == VM_PURGABLE_DENY))
1675 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1676 return KERN_INVALID_ARGUMENT;
1677
1678 if (!anywhere && overwrite) {
1679 /*
1680 * Create a temporary VM map to hold the old mappings in the
1681 * affected area while we create the new one.
1682 * This avoids releasing the VM map lock in
1683 * vm_map_entry_delete() and allows atomicity
1684 * when we want to replace some mappings with a new one.
1685 * It also allows us to restore the old VM mappings if the
1686 * new mapping fails.
1687 */
1688 zap_old_map = vm_map_create(PMAP_NULL,
1689 *address,
1690 *address + size,
1691 map->hdr.entries_pageable);
1692 }
1693
1694 StartAgain: ;
1695
1696 start = *address;
1697
1698 if (anywhere) {
1699 vm_map_lock(map);
1700 map_locked = TRUE;
1701
1702 /*
1703 * Calculate the first possible address.
1704 */
1705
1706 if (start < effective_min_offset)
1707 start = effective_min_offset;
1708 if (start > effective_max_offset)
1709 RETURN(KERN_NO_SPACE);
1710
1711 /*
1712 * Look for the first possible address;
1713 * if there's already something at this
1714 * address, we have to start after it.
1715 */
1716
1717 assert(first_free_is_valid(map));
1718 if (start == effective_min_offset) {
1719 if ((entry = map->first_free) != vm_map_to_entry(map))
1720 start = entry->vme_end;
1721 } else {
1722 vm_map_entry_t tmp_entry;
1723 if (vm_map_lookup_entry(map, start, &tmp_entry))
1724 start = tmp_entry->vme_end;
1725 entry = tmp_entry;
1726 }
1727
1728 /*
1729 * In any case, the "entry" always precedes
1730 * the proposed new region throughout the
1731 * loop:
1732 */
1733
1734 while (TRUE) {
1735 register vm_map_entry_t next;
1736
1737 /*
1738 * Find the end of the proposed new region.
1739 * Be sure we didn't go beyond the end, or
1740 * wrap around the address.
1741 */
1742
1743 end = ((start + mask) & ~mask);
1744 if (end < start)
1745 RETURN(KERN_NO_SPACE);
1746 start = end;
1747 end += size;
1748
1749 if ((end > effective_max_offset) || (end < start)) {
1750 if (map->wait_for_space) {
1751 if (size <= (effective_max_offset -
1752 effective_min_offset)) {
1753 assert_wait((event_t)map,
1754 THREAD_ABORTSAFE);
1755 vm_map_unlock(map);
1756 map_locked = FALSE;
1757 thread_block(THREAD_CONTINUE_NULL);
1758 goto StartAgain;
1759 }
1760 }
1761 RETURN(KERN_NO_SPACE);
1762 }
1763
1764 /*
1765 * If there are no more entries, we must win.
1766 */
1767
1768 next = entry->vme_next;
1769 if (next == vm_map_to_entry(map))
1770 break;
1771
1772 /*
1773 * If there is another entry, it must be
1774 * after the end of the potential new region.
1775 */
1776
1777 if (next->vme_start >= end)
1778 break;
1779
1780 /*
1781 * Didn't fit -- move to the next entry.
1782 */
1783
1784 entry = next;
1785 start = entry->vme_end;
1786 }
1787 *address = start;
1788 } else {
1789 /*
1790 * Verify that:
1791 * the address doesn't itself violate
1792 * the mask requirement.
1793 */
1794
1795 vm_map_lock(map);
1796 map_locked = TRUE;
1797 if ((start & mask) != 0)
1798 RETURN(KERN_NO_SPACE);
1799
1800 /*
1801 * ... the address is within bounds
1802 */
1803
1804 end = start + size;
1805
1806 if ((start < effective_min_offset) ||
1807 (end > effective_max_offset) ||
1808 (start >= end)) {
1809 RETURN(KERN_INVALID_ADDRESS);
1810 }
1811
1812 if (overwrite && zap_old_map != VM_MAP_NULL) {
1813 /*
1814 * Fixed mapping and "overwrite" flag: attempt to
1815 * remove all existing mappings in the specified
1816 * address range, saving them in our "zap_old_map".
1817 */
1818 (void) vm_map_delete(map, start, end,
1819 VM_MAP_REMOVE_SAVE_ENTRIES,
1820 zap_old_map);
1821 }
1822
1823 /*
1824 * ... the starting address isn't allocated
1825 */
1826
1827 if (vm_map_lookup_entry(map, start, &entry)) {
1828 if (! (flags & VM_FLAGS_ALREADY)) {
1829 RETURN(KERN_NO_SPACE);
1830 }
1831 /*
1832 * Check if what's already there is what we want.
1833 */
1834 tmp_start = start;
1835 tmp_offset = offset;
1836 if (entry->vme_start < start) {
1837 tmp_start -= start - entry->vme_start;
1838 tmp_offset -= start - entry->vme_start;
1839
1840 }
1841 for (; entry->vme_start < end;
1842 entry = entry->vme_next) {
1843 /*
1844 * Check if the mapping's attributes
1845 * match the existing map entry.
1846 */
1847 if (entry == vm_map_to_entry(map) ||
1848 entry->vme_start != tmp_start ||
1849 entry->is_sub_map != is_submap ||
1850 entry->offset != tmp_offset ||
1851 entry->needs_copy != needs_copy ||
1852 entry->protection != cur_protection ||
1853 entry->max_protection != max_protection ||
1854 entry->inheritance != inheritance ||
1855 entry->alias != alias) {
1856 /* not the same mapping ! */
1857 RETURN(KERN_NO_SPACE);
1858 }
1859 /*
1860 * Check if the same object is being mapped.
1861 */
1862 if (is_submap) {
1863 if (entry->object.sub_map !=
1864 (vm_map_t) object) {
1865 /* not the same submap */
1866 RETURN(KERN_NO_SPACE);
1867 }
1868 } else {
1869 if (entry->object.vm_object != object) {
1870 /* not the same VM object... */
1871 vm_object_t obj2;
1872
1873 obj2 = entry->object.vm_object;
1874 if ((obj2 == VM_OBJECT_NULL ||
1875 obj2->internal) &&
1876 (object == VM_OBJECT_NULL ||
1877 object->internal)) {
1878 /*
1879 * ... but both are
1880 * anonymous memory,
1881 * so equivalent.
1882 */
1883 } else {
1884 RETURN(KERN_NO_SPACE);
1885 }
1886 }
1887 }
1888
1889 tmp_offset += entry->vme_end - entry->vme_start;
1890 tmp_start += entry->vme_end - entry->vme_start;
1891 if (entry->vme_end >= end) {
1892 /* reached the end of our mapping */
1893 break;
1894 }
1895 }
1896 /* it all matches: let's use what's already there ! */
1897 RETURN(KERN_MEMORY_PRESENT);
1898 }
1899
1900 /*
1901 * ... the next region doesn't overlap the
1902 * end point.
1903 */
1904
1905 if ((entry->vme_next != vm_map_to_entry(map)) &&
1906 (entry->vme_next->vme_start < end))
1907 RETURN(KERN_NO_SPACE);
1908 }
1909
1910 /*
1911 * At this point,
1912 * "start" and "end" should define the endpoints of the
1913 * available new range, and
1914 * "entry" should refer to the region before the new
1915 * range, and
1916 *
1917 * the map should be locked.
1918 */
1919
1920 /*
1921 * See whether we can avoid creating a new entry (and object) by
1922 * extending one of our neighbors. [So far, we only attempt to
1923 * extend from below.] Note that we can never extend/join
1924 * purgable objects because they need to remain distinct
1925 * entities in order to implement their "volatile object"
1926 * semantics.
1927 */
1928
1929 if (purgable) {
1930 if (object == VM_OBJECT_NULL) {
1931 object = vm_object_allocate(size);
1932 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1933 object->purgable = VM_PURGABLE_NONVOLATILE;
1934 offset = (vm_object_offset_t)0;
1935 }
1936 } else if ((is_submap == FALSE) &&
1937 (object == VM_OBJECT_NULL) &&
1938 (entry != vm_map_to_entry(map)) &&
1939 (entry->vme_end == start) &&
1940 (!entry->is_shared) &&
1941 (!entry->is_sub_map) &&
1942 (entry->alias == alias) &&
1943 (entry->inheritance == inheritance) &&
1944 (entry->protection == cur_protection) &&
1945 (entry->max_protection == max_protection) &&
1946 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1947 (entry->in_transition == 0) &&
1948 (entry->no_cache == no_cache) &&
1949 ((entry->vme_end - entry->vme_start) + size <=
1950 (alias == VM_MEMORY_REALLOC ?
1951 ANON_CHUNK_SIZE :
1952 NO_COALESCE_LIMIT)) &&
1953 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1954 if (vm_object_coalesce(entry->object.vm_object,
1955 VM_OBJECT_NULL,
1956 entry->offset,
1957 (vm_object_offset_t) 0,
1958 (vm_map_size_t)(entry->vme_end - entry->vme_start),
1959 (vm_map_size_t)(end - entry->vme_end))) {
1960
1961 /*
1962 * Coalesced the two objects - can extend
1963 * the previous map entry to include the
1964 * new range.
1965 */
1966 map->size += (end - entry->vme_end);
1967 entry->vme_end = end;
1968 UPDATE_FIRST_FREE(map, map->first_free);
1969 RETURN(KERN_SUCCESS);
1970 }
1971 }
1972
1973 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1974 new_entry = NULL;
1975
1976 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1977 tmp2_end = tmp2_start + step;
1978 /*
1979 * Create a new entry
1980 * LP64todo - for now, we can only allocate 4GB internal objects
1981 * because the default pager can't page bigger ones. Remove this
1982 * when it can.
1983 *
1984 * XXX FBDP
1985 * The reserved "page zero" in each process's address space can
1986 * be arbitrarily large. Splitting it into separate 4GB objects and
1987 * therefore different VM map entries serves no purpose and just
1988 * slows down operations on the VM map, so let's not split the
1989 * allocation into 4GB chunks if the max protection is NONE. That
1990 * memory should never be accessible, so it will never get to the
1991 * default pager.
1992 */
1993 tmp_start = tmp2_start;
1994 if (object == VM_OBJECT_NULL &&
1995 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1996 max_protection != VM_PROT_NONE &&
1997 superpage_size == 0)
1998 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1999 else
2000 tmp_end = tmp2_end;
2001 do {
2002 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2003 object, offset, needs_copy,
2004 FALSE, FALSE,
2005 cur_protection, max_protection,
2006 VM_BEHAVIOR_DEFAULT,
2007 inheritance, 0, no_cache,
2008 permanent, superpage_size);
2009 new_entry->alias = alias;
2010 if (is_submap) {
2011 vm_map_t submap;
2012 boolean_t submap_is_64bit;
2013 boolean_t use_pmap;
2014
2015 new_entry->is_sub_map = TRUE;
2016 submap = (vm_map_t) object;
2017 submap_is_64bit = vm_map_is_64bit(submap);
2018 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2019 #ifndef NO_NESTED_PMAP
2020 if (use_pmap && submap->pmap == NULL) {
2021 /* we need a sub pmap to nest... */
2022 submap->pmap = pmap_create(0, submap_is_64bit);
2023 if (submap->pmap == NULL) {
2024 /* let's proceed without nesting... */
2025 }
2026 }
2027 if (use_pmap && submap->pmap != NULL) {
2028 kr = pmap_nest(map->pmap,
2029 submap->pmap,
2030 tmp_start,
2031 tmp_start,
2032 tmp_end - tmp_start);
2033 if (kr != KERN_SUCCESS) {
2034 printf("vm_map_enter: "
2035 "pmap_nest(0x%llx,0x%llx) "
2036 "error 0x%x\n",
2037 (long long)tmp_start,
2038 (long long)tmp_end,
2039 kr);
2040 } else {
2041 /* we're now nested ! */
2042 new_entry->use_pmap = TRUE;
2043 pmap_empty = FALSE;
2044 }
2045 }
2046 #endif /* NO_NESTED_PMAP */
2047 }
2048 entry = new_entry;
2049
2050 if (superpage_size) {
2051 vm_page_t pages, m;
2052 vm_object_t sp_object;
2053
2054 entry->offset = 0;
2055
2056 /* allocate one superpage */
2057 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2058 if (kr != KERN_SUCCESS) {
2059 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2060 RETURN(kr);
2061 }
2062
2063 /* create one vm_object per superpage */
2064 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2065 sp_object->phys_contiguous = TRUE;
2066 sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2067 entry->object.vm_object = sp_object;
2068
2069 /* enter the base pages into the object */
2070 vm_object_lock(sp_object);
2071 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2072 m = pages;
2073 pmap_zero_page(m->phys_page);
2074 pages = NEXT_PAGE(m);
2075 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2076 vm_page_insert(m, sp_object, offset);
2077 }
2078 vm_object_unlock(sp_object);
2079 }
2080 } while (tmp_end != tmp2_end &&
2081 (tmp_start = tmp_end) &&
2082 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2083 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2084 }
2085
2086 vm_map_unlock(map);
2087 map_locked = FALSE;
2088
2089 new_mapping_established = TRUE;
2090
2091 /* Wire down the new entry if the user
2092 * requested all new map entries be wired.
2093 */
2094 if ((map->wiring_required)||(superpage_size)) {
2095 pmap_empty = FALSE; /* pmap won't be empty */
2096 result = vm_map_wire(map, start, end,
2097 new_entry->protection, TRUE);
2098 RETURN(result);
2099 }
2100
2101 if ((object != VM_OBJECT_NULL) &&
2102 (vm_map_pmap_enter_enable) &&
2103 (!anywhere) &&
2104 (!needs_copy) &&
2105 (size < (128*1024))) {
2106 pmap_empty = FALSE; /* pmap won't be empty */
2107
2108 if (override_nx(map, alias) && cur_protection)
2109 cur_protection |= VM_PROT_EXECUTE;
2110
2111 vm_map_pmap_enter(map, start, end,
2112 object, offset, cur_protection);
2113 }
2114
2115 BailOut: ;
2116 if (result == KERN_SUCCESS) {
2117 vm_prot_t pager_prot;
2118 memory_object_t pager;
2119
2120 if (pmap_empty &&
2121 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2122 assert(vm_map_pmap_is_empty(map,
2123 *address,
2124 *address+size));
2125 }
2126
2127 /*
2128 * For "named" VM objects, let the pager know that the
2129 * memory object is being mapped. Some pagers need to keep
2130 * track of this, to know when they can reclaim the memory
2131 * object, for example.
2132 * VM calls memory_object_map() for each mapping (specifying
2133 * the protection of each mapping) and calls
2134 * memory_object_last_unmap() when all the mappings are gone.
2135 */
2136 pager_prot = max_protection;
2137 if (needs_copy) {
2138 /*
2139 * Copy-On-Write mapping: won't modify
2140 * the memory object.
2141 */
2142 pager_prot &= ~VM_PROT_WRITE;
2143 }
2144 if (!is_submap &&
2145 object != VM_OBJECT_NULL &&
2146 object->named &&
2147 object->pager != MEMORY_OBJECT_NULL) {
2148 vm_object_lock(object);
2149 pager = object->pager;
2150 if (object->named &&
2151 pager != MEMORY_OBJECT_NULL) {
2152 assert(object->pager_ready);
2153 vm_object_mapping_wait(object, THREAD_UNINT);
2154 vm_object_mapping_begin(object);
2155 vm_object_unlock(object);
2156
2157 kr = memory_object_map(pager, pager_prot);
2158 assert(kr == KERN_SUCCESS);
2159
2160 vm_object_lock(object);
2161 vm_object_mapping_end(object);
2162 }
2163 vm_object_unlock(object);
2164 }
2165 } else {
2166 if (new_mapping_established) {
2167 /*
2168 * We have to get rid of the new mappings since we
2169 * won't make them available to the user.
2170 * Try and do that atomically, to minimize the risk
2171 * that someone else create new mappings that range.
2172 */
2173 zap_new_map = vm_map_create(PMAP_NULL,
2174 *address,
2175 *address + size,
2176 map->hdr.entries_pageable);
2177 if (!map_locked) {
2178 vm_map_lock(map);
2179 map_locked = TRUE;
2180 }
2181 (void) vm_map_delete(map, *address, *address+size,
2182 VM_MAP_REMOVE_SAVE_ENTRIES,
2183 zap_new_map);
2184 }
2185 if (zap_old_map != VM_MAP_NULL &&
2186 zap_old_map->hdr.nentries != 0) {
2187 vm_map_entry_t entry1, entry2;
2188
2189 /*
2190 * The new mapping failed. Attempt to restore
2191 * the old mappings, saved in the "zap_old_map".
2192 */
2193 if (!map_locked) {
2194 vm_map_lock(map);
2195 map_locked = TRUE;
2196 }
2197
2198 /* first check if the coast is still clear */
2199 start = vm_map_first_entry(zap_old_map)->vme_start;
2200 end = vm_map_last_entry(zap_old_map)->vme_end;
2201 if (vm_map_lookup_entry(map, start, &entry1) ||
2202 vm_map_lookup_entry(map, end, &entry2) ||
2203 entry1 != entry2) {
2204 /*
2205 * Part of that range has already been
2206 * re-mapped: we can't restore the old
2207 * mappings...
2208 */
2209 vm_map_enter_restore_failures++;
2210 } else {
2211 /*
2212 * Transfer the saved map entries from
2213 * "zap_old_map" to the original "map",
2214 * inserting them all after "entry1".
2215 */
2216 for (entry2 = vm_map_first_entry(zap_old_map);
2217 entry2 != vm_map_to_entry(zap_old_map);
2218 entry2 = vm_map_first_entry(zap_old_map)) {
2219 vm_map_size_t entry_size;
2220
2221 entry_size = (entry2->vme_end -
2222 entry2->vme_start);
2223 vm_map_entry_unlink(zap_old_map,
2224 entry2);
2225 zap_old_map->size -= entry_size;
2226 vm_map_entry_link(map, entry1, entry2);
2227 map->size += entry_size;
2228 entry1 = entry2;
2229 }
2230 if (map->wiring_required) {
2231 /*
2232 * XXX TODO: we should rewire the
2233 * old pages here...
2234 */
2235 }
2236 vm_map_enter_restore_successes++;
2237 }
2238 }
2239 }
2240
2241 if (map_locked) {
2242 vm_map_unlock(map);
2243 }
2244
2245 /*
2246 * Get rid of the "zap_maps" and all the map entries that
2247 * they may still contain.
2248 */
2249 if (zap_old_map != VM_MAP_NULL) {
2250 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2251 zap_old_map = VM_MAP_NULL;
2252 }
2253 if (zap_new_map != VM_MAP_NULL) {
2254 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2255 zap_new_map = VM_MAP_NULL;
2256 }
2257
2258 return result;
2259
2260 #undef RETURN
2261 }
2262
2263 kern_return_t
2264 vm_map_enter_mem_object(
2265 vm_map_t target_map,
2266 vm_map_offset_t *address,
2267 vm_map_size_t initial_size,
2268 vm_map_offset_t mask,
2269 int flags,
2270 ipc_port_t port,
2271 vm_object_offset_t offset,
2272 boolean_t copy,
2273 vm_prot_t cur_protection,
2274 vm_prot_t max_protection,
2275 vm_inherit_t inheritance)
2276 {
2277 vm_map_address_t map_addr;
2278 vm_map_size_t map_size;
2279 vm_object_t object;
2280 vm_object_size_t size;
2281 kern_return_t result;
2282
2283 /*
2284 * Check arguments for validity
2285 */
2286 if ((target_map == VM_MAP_NULL) ||
2287 (cur_protection & ~VM_PROT_ALL) ||
2288 (max_protection & ~VM_PROT_ALL) ||
2289 (inheritance > VM_INHERIT_LAST_VALID) ||
2290 initial_size == 0)
2291 return KERN_INVALID_ARGUMENT;
2292
2293 map_addr = vm_map_trunc_page(*address);
2294 map_size = vm_map_round_page(initial_size);
2295 size = vm_object_round_page(initial_size);
2296
2297 /*
2298 * Find the vm object (if any) corresponding to this port.
2299 */
2300 if (!IP_VALID(port)) {
2301 object = VM_OBJECT_NULL;
2302 offset = 0;
2303 copy = FALSE;
2304 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2305 vm_named_entry_t named_entry;
2306
2307 named_entry = (vm_named_entry_t) port->ip_kobject;
2308 /* a few checks to make sure user is obeying rules */
2309 if (size == 0) {
2310 if (offset >= named_entry->size)
2311 return KERN_INVALID_RIGHT;
2312 size = named_entry->size - offset;
2313 }
2314 if ((named_entry->protection & max_protection) !=
2315 max_protection)
2316 return KERN_INVALID_RIGHT;
2317 if ((named_entry->protection & cur_protection) !=
2318 cur_protection)
2319 return KERN_INVALID_RIGHT;
2320 if (named_entry->size < (offset + size))
2321 return KERN_INVALID_ARGUMENT;
2322
2323 /* the callers parameter offset is defined to be the */
2324 /* offset from beginning of named entry offset in object */
2325 offset = offset + named_entry->offset;
2326
2327 named_entry_lock(named_entry);
2328 if (named_entry->is_sub_map) {
2329 vm_map_t submap;
2330
2331 submap = named_entry->backing.map;
2332 vm_map_lock(submap);
2333 vm_map_reference(submap);
2334 vm_map_unlock(submap);
2335 named_entry_unlock(named_entry);
2336
2337 result = vm_map_enter(target_map,
2338 &map_addr,
2339 map_size,
2340 mask,
2341 flags | VM_FLAGS_SUBMAP,
2342 (vm_object_t) submap,
2343 offset,
2344 copy,
2345 cur_protection,
2346 max_protection,
2347 inheritance);
2348 if (result != KERN_SUCCESS) {
2349 vm_map_deallocate(submap);
2350 } else {
2351 /*
2352 * No need to lock "submap" just to check its
2353 * "mapped" flag: that flag is never reset
2354 * once it's been set and if we race, we'll
2355 * just end up setting it twice, which is OK.
2356 */
2357 if (submap->mapped == FALSE) {
2358 /*
2359 * This submap has never been mapped.
2360 * Set its "mapped" flag now that it
2361 * has been mapped.
2362 * This happens only for the first ever
2363 * mapping of a "submap".
2364 */
2365 vm_map_lock(submap);
2366 submap->mapped = TRUE;
2367 vm_map_unlock(submap);
2368 }
2369 *address = map_addr;
2370 }
2371 return result;
2372
2373 } else if (named_entry->is_pager) {
2374 unsigned int access;
2375 vm_prot_t protections;
2376 unsigned int wimg_mode;
2377 boolean_t cache_attr;
2378
2379 protections = named_entry->protection & VM_PROT_ALL;
2380 access = GET_MAP_MEM(named_entry->protection);
2381
2382 object = vm_object_enter(named_entry->backing.pager,
2383 named_entry->size,
2384 named_entry->internal,
2385 FALSE,
2386 FALSE);
2387 if (object == VM_OBJECT_NULL) {
2388 named_entry_unlock(named_entry);
2389 return KERN_INVALID_OBJECT;
2390 }
2391
2392 /* JMM - drop reference on pager here */
2393
2394 /* create an extra ref for the named entry */
2395 vm_object_lock(object);
2396 vm_object_reference_locked(object);
2397 named_entry->backing.object = object;
2398 named_entry->is_pager = FALSE;
2399 named_entry_unlock(named_entry);
2400
2401 wimg_mode = object->wimg_bits;
2402 if (access == MAP_MEM_IO) {
2403 wimg_mode = VM_WIMG_IO;
2404 } else if (access == MAP_MEM_COPYBACK) {
2405 wimg_mode = VM_WIMG_USE_DEFAULT;
2406 } else if (access == MAP_MEM_WTHRU) {
2407 wimg_mode = VM_WIMG_WTHRU;
2408 } else if (access == MAP_MEM_WCOMB) {
2409 wimg_mode = VM_WIMG_WCOMB;
2410 }
2411 if (wimg_mode == VM_WIMG_IO ||
2412 wimg_mode == VM_WIMG_WCOMB)
2413 cache_attr = TRUE;
2414 else
2415 cache_attr = FALSE;
2416
2417 /* wait for object (if any) to be ready */
2418 if (!named_entry->internal) {
2419 while (!object->pager_ready) {
2420 vm_object_wait(
2421 object,
2422 VM_OBJECT_EVENT_PAGER_READY,
2423 THREAD_UNINT);
2424 vm_object_lock(object);
2425 }
2426 }
2427
2428 if (object->wimg_bits != wimg_mode) {
2429 vm_page_t p;
2430
2431 vm_object_paging_wait(object, THREAD_UNINT);
2432
2433 object->wimg_bits = wimg_mode;
2434 queue_iterate(&object->memq, p, vm_page_t, listq) {
2435 if (!p->fictitious) {
2436 if (p->pmapped)
2437 pmap_disconnect(p->phys_page);
2438 if (cache_attr)
2439 pmap_sync_page_attributes_phys(p->phys_page);
2440 }
2441 }
2442 }
2443 object->true_share = TRUE;
2444 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2445 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2446 vm_object_unlock(object);
2447 } else {
2448 /* This is the case where we are going to map */
2449 /* an already mapped object. If the object is */
2450 /* not ready it is internal. An external */
2451 /* object cannot be mapped until it is ready */
2452 /* we can therefore avoid the ready check */
2453 /* in this case. */
2454 object = named_entry->backing.object;
2455 assert(object != VM_OBJECT_NULL);
2456 named_entry_unlock(named_entry);
2457 vm_object_reference(object);
2458 }
2459 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2460 /*
2461 * JMM - This is temporary until we unify named entries
2462 * and raw memory objects.
2463 *
2464 * Detected fake ip_kotype for a memory object. In
2465 * this case, the port isn't really a port at all, but
2466 * instead is just a raw memory object.
2467 */
2468
2469 object = vm_object_enter((memory_object_t)port,
2470 size, FALSE, FALSE, FALSE);
2471 if (object == VM_OBJECT_NULL)
2472 return KERN_INVALID_OBJECT;
2473
2474 /* wait for object (if any) to be ready */
2475 if (object != VM_OBJECT_NULL) {
2476 if (object == kernel_object) {
2477 printf("Warning: Attempt to map kernel object"
2478 " by a non-private kernel entity\n");
2479 return KERN_INVALID_OBJECT;
2480 }
2481 if (!object->pager_ready) {
2482 vm_object_lock(object);
2483
2484 while (!object->pager_ready) {
2485 vm_object_wait(object,
2486 VM_OBJECT_EVENT_PAGER_READY,
2487 THREAD_UNINT);
2488 vm_object_lock(object);
2489 }
2490 vm_object_unlock(object);
2491 }
2492 }
2493 } else {
2494 return KERN_INVALID_OBJECT;
2495 }
2496
2497 if (object != VM_OBJECT_NULL &&
2498 object->named &&
2499 object->pager != MEMORY_OBJECT_NULL &&
2500 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2501 memory_object_t pager;
2502 vm_prot_t pager_prot;
2503 kern_return_t kr;
2504
2505 /*
2506 * For "named" VM objects, let the pager know that the
2507 * memory object is being mapped. Some pagers need to keep
2508 * track of this, to know when they can reclaim the memory
2509 * object, for example.
2510 * VM calls memory_object_map() for each mapping (specifying
2511 * the protection of each mapping) and calls
2512 * memory_object_last_unmap() when all the mappings are gone.
2513 */
2514 pager_prot = max_protection;
2515 if (copy) {
2516 /*
2517 * Copy-On-Write mapping: won't modify the
2518 * memory object.
2519 */
2520 pager_prot &= ~VM_PROT_WRITE;
2521 }
2522 vm_object_lock(object);
2523 pager = object->pager;
2524 if (object->named &&
2525 pager != MEMORY_OBJECT_NULL &&
2526 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2527 assert(object->pager_ready);
2528 vm_object_mapping_wait(object, THREAD_UNINT);
2529 vm_object_mapping_begin(object);
2530 vm_object_unlock(object);
2531
2532 kr = memory_object_map(pager, pager_prot);
2533 assert(kr == KERN_SUCCESS);
2534
2535 vm_object_lock(object);
2536 vm_object_mapping_end(object);
2537 }
2538 vm_object_unlock(object);
2539 }
2540
2541 /*
2542 * Perform the copy if requested
2543 */
2544
2545 if (copy) {
2546 vm_object_t new_object;
2547 vm_object_offset_t new_offset;
2548
2549 result = vm_object_copy_strategically(object, offset, size,
2550 &new_object, &new_offset,
2551 &copy);
2552
2553
2554 if (result == KERN_MEMORY_RESTART_COPY) {
2555 boolean_t success;
2556 boolean_t src_needs_copy;
2557
2558 /*
2559 * XXX
2560 * We currently ignore src_needs_copy.
2561 * This really is the issue of how to make
2562 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2563 * non-kernel users to use. Solution forthcoming.
2564 * In the meantime, since we don't allow non-kernel
2565 * memory managers to specify symmetric copy,
2566 * we won't run into problems here.
2567 */
2568 new_object = object;
2569 new_offset = offset;
2570 success = vm_object_copy_quickly(&new_object,
2571 new_offset, size,
2572 &src_needs_copy,
2573 &copy);
2574 assert(success);
2575 result = KERN_SUCCESS;
2576 }
2577 /*
2578 * Throw away the reference to the
2579 * original object, as it won't be mapped.
2580 */
2581
2582 vm_object_deallocate(object);
2583
2584 if (result != KERN_SUCCESS)
2585 return result;
2586
2587 object = new_object;
2588 offset = new_offset;
2589 }
2590
2591 result = vm_map_enter(target_map,
2592 &map_addr, map_size,
2593 (vm_map_offset_t)mask,
2594 flags,
2595 object, offset,
2596 copy,
2597 cur_protection, max_protection, inheritance);
2598 if (result != KERN_SUCCESS)
2599 vm_object_deallocate(object);
2600 *address = map_addr;
2601 return result;
2602 }
2603
2604
2605
2606
2607 kern_return_t
2608 vm_map_enter_mem_object_control(
2609 vm_map_t target_map,
2610 vm_map_offset_t *address,
2611 vm_map_size_t initial_size,
2612 vm_map_offset_t mask,
2613 int flags,
2614 memory_object_control_t control,
2615 vm_object_offset_t offset,
2616 boolean_t copy,
2617 vm_prot_t cur_protection,
2618 vm_prot_t max_protection,
2619 vm_inherit_t inheritance)
2620 {
2621 vm_map_address_t map_addr;
2622 vm_map_size_t map_size;
2623 vm_object_t object;
2624 vm_object_size_t size;
2625 kern_return_t result;
2626 memory_object_t pager;
2627 vm_prot_t pager_prot;
2628 kern_return_t kr;
2629
2630 /*
2631 * Check arguments for validity
2632 */
2633 if ((target_map == VM_MAP_NULL) ||
2634 (cur_protection & ~VM_PROT_ALL) ||
2635 (max_protection & ~VM_PROT_ALL) ||
2636 (inheritance > VM_INHERIT_LAST_VALID) ||
2637 initial_size == 0)
2638 return KERN_INVALID_ARGUMENT;
2639
2640 map_addr = vm_map_trunc_page(*address);
2641 map_size = vm_map_round_page(initial_size);
2642 size = vm_object_round_page(initial_size);
2643
2644 object = memory_object_control_to_vm_object(control);
2645
2646 if (object == VM_OBJECT_NULL)
2647 return KERN_INVALID_OBJECT;
2648
2649 if (object == kernel_object) {
2650 printf("Warning: Attempt to map kernel object"
2651 " by a non-private kernel entity\n");
2652 return KERN_INVALID_OBJECT;
2653 }
2654
2655 vm_object_lock(object);
2656 object->ref_count++;
2657 vm_object_res_reference(object);
2658
2659 /*
2660 * For "named" VM objects, let the pager know that the
2661 * memory object is being mapped. Some pagers need to keep
2662 * track of this, to know when they can reclaim the memory
2663 * object, for example.
2664 * VM calls memory_object_map() for each mapping (specifying
2665 * the protection of each mapping) and calls
2666 * memory_object_last_unmap() when all the mappings are gone.
2667 */
2668 pager_prot = max_protection;
2669 if (copy) {
2670 pager_prot &= ~VM_PROT_WRITE;
2671 }
2672 pager = object->pager;
2673 if (object->named &&
2674 pager != MEMORY_OBJECT_NULL &&
2675 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2676 assert(object->pager_ready);
2677 vm_object_mapping_wait(object, THREAD_UNINT);
2678 vm_object_mapping_begin(object);
2679 vm_object_unlock(object);
2680
2681 kr = memory_object_map(pager, pager_prot);
2682 assert(kr == KERN_SUCCESS);
2683
2684 vm_object_lock(object);
2685 vm_object_mapping_end(object);
2686 }
2687 vm_object_unlock(object);
2688
2689 /*
2690 * Perform the copy if requested
2691 */
2692
2693 if (copy) {
2694 vm_object_t new_object;
2695 vm_object_offset_t new_offset;
2696
2697 result = vm_object_copy_strategically(object, offset, size,
2698 &new_object, &new_offset,
2699 &copy);
2700
2701
2702 if (result == KERN_MEMORY_RESTART_COPY) {
2703 boolean_t success;
2704 boolean_t src_needs_copy;
2705
2706 /*
2707 * XXX
2708 * We currently ignore src_needs_copy.
2709 * This really is the issue of how to make
2710 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2711 * non-kernel users to use. Solution forthcoming.
2712 * In the meantime, since we don't allow non-kernel
2713 * memory managers to specify symmetric copy,
2714 * we won't run into problems here.
2715 */
2716 new_object = object;
2717 new_offset = offset;
2718 success = vm_object_copy_quickly(&new_object,
2719 new_offset, size,
2720 &src_needs_copy,
2721 &copy);
2722 assert(success);
2723 result = KERN_SUCCESS;
2724 }
2725 /*
2726 * Throw away the reference to the
2727 * original object, as it won't be mapped.
2728 */
2729
2730 vm_object_deallocate(object);
2731
2732 if (result != KERN_SUCCESS)
2733 return result;
2734
2735 object = new_object;
2736 offset = new_offset;
2737 }
2738
2739 result = vm_map_enter(target_map,
2740 &map_addr, map_size,
2741 (vm_map_offset_t)mask,
2742 flags,
2743 object, offset,
2744 copy,
2745 cur_protection, max_protection, inheritance);
2746 if (result != KERN_SUCCESS)
2747 vm_object_deallocate(object);
2748 *address = map_addr;
2749
2750 return result;
2751 }
2752
2753
2754 #if VM_CPM
2755
2756 #ifdef MACH_ASSERT
2757 extern pmap_paddr_t avail_start, avail_end;
2758 #endif
2759
2760 /*
2761 * Allocate memory in the specified map, with the caveat that
2762 * the memory is physically contiguous. This call may fail
2763 * if the system can't find sufficient contiguous memory.
2764 * This call may cause or lead to heart-stopping amounts of
2765 * paging activity.
2766 *
2767 * Memory obtained from this call should be freed in the
2768 * normal way, viz., via vm_deallocate.
2769 */
2770 kern_return_t
2771 vm_map_enter_cpm(
2772 vm_map_t map,
2773 vm_map_offset_t *addr,
2774 vm_map_size_t size,
2775 int flags)
2776 {
2777 vm_object_t cpm_obj;
2778 pmap_t pmap;
2779 vm_page_t m, pages;
2780 kern_return_t kr;
2781 vm_map_offset_t va, start, end, offset;
2782 #if MACH_ASSERT
2783 vm_map_offset_t prev_addr;
2784 #endif /* MACH_ASSERT */
2785
2786 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2787
2788 if (!vm_allocate_cpm_enabled)
2789 return KERN_FAILURE;
2790
2791 if (size == 0) {
2792 *addr = 0;
2793 return KERN_SUCCESS;
2794 }
2795 if (anywhere)
2796 *addr = vm_map_min(map);
2797 else
2798 *addr = vm_map_trunc_page(*addr);
2799 size = vm_map_round_page(size);
2800
2801 /*
2802 * LP64todo - cpm_allocate should probably allow
2803 * allocations of >4GB, but not with the current
2804 * algorithm, so just cast down the size for now.
2805 */
2806 if (size > VM_MAX_ADDRESS)
2807 return KERN_RESOURCE_SHORTAGE;
2808 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2809 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2810 return kr;
2811
2812 cpm_obj = vm_object_allocate((vm_object_size_t)size);
2813 assert(cpm_obj != VM_OBJECT_NULL);
2814 assert(cpm_obj->internal);
2815 assert(cpm_obj->size == (vm_object_size_t)size);
2816 assert(cpm_obj->can_persist == FALSE);
2817 assert(cpm_obj->pager_created == FALSE);
2818 assert(cpm_obj->pageout == FALSE);
2819 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2820
2821 /*
2822 * Insert pages into object.
2823 */
2824
2825 vm_object_lock(cpm_obj);
2826 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2827 m = pages;
2828 pages = NEXT_PAGE(m);
2829 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2830
2831 assert(!m->gobbled);
2832 assert(!m->wanted);
2833 assert(!m->pageout);
2834 assert(!m->tabled);
2835 assert(VM_PAGE_WIRED(m));
2836 /*
2837 * ENCRYPTED SWAP:
2838 * "m" is not supposed to be pageable, so it
2839 * should not be encrypted. It wouldn't be safe
2840 * to enter it in a new VM object while encrypted.
2841 */
2842 ASSERT_PAGE_DECRYPTED(m);
2843 assert(m->busy);
2844 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2845
2846 m->busy = FALSE;
2847 vm_page_insert(m, cpm_obj, offset);
2848 }
2849 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2850 vm_object_unlock(cpm_obj);
2851
2852 /*
2853 * Hang onto a reference on the object in case a
2854 * multi-threaded application for some reason decides
2855 * to deallocate the portion of the address space into
2856 * which we will insert this object.
2857 *
2858 * Unfortunately, we must insert the object now before
2859 * we can talk to the pmap module about which addresses
2860 * must be wired down. Hence, the race with a multi-
2861 * threaded app.
2862 */
2863 vm_object_reference(cpm_obj);
2864
2865 /*
2866 * Insert object into map.
2867 */
2868
2869 kr = vm_map_enter(
2870 map,
2871 addr,
2872 size,
2873 (vm_map_offset_t)0,
2874 flags,
2875 cpm_obj,
2876 (vm_object_offset_t)0,
2877 FALSE,
2878 VM_PROT_ALL,
2879 VM_PROT_ALL,
2880 VM_INHERIT_DEFAULT);
2881
2882 if (kr != KERN_SUCCESS) {
2883 /*
2884 * A CPM object doesn't have can_persist set,
2885 * so all we have to do is deallocate it to
2886 * free up these pages.
2887 */
2888 assert(cpm_obj->pager_created == FALSE);
2889 assert(cpm_obj->can_persist == FALSE);
2890 assert(cpm_obj->pageout == FALSE);
2891 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2892 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2893 vm_object_deallocate(cpm_obj); /* kill creation ref */
2894 }
2895
2896 /*
2897 * Inform the physical mapping system that the
2898 * range of addresses may not fault, so that
2899 * page tables and such can be locked down as well.
2900 */
2901 start = *addr;
2902 end = start + size;
2903 pmap = vm_map_pmap(map);
2904 pmap_pageable(pmap, start, end, FALSE);
2905
2906 /*
2907 * Enter each page into the pmap, to avoid faults.
2908 * Note that this loop could be coded more efficiently,
2909 * if the need arose, rather than looking up each page
2910 * again.
2911 */
2912 for (offset = 0, va = start; offset < size;
2913 va += PAGE_SIZE, offset += PAGE_SIZE) {
2914 int type_of_fault;
2915
2916 vm_object_lock(cpm_obj);
2917 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2918 assert(m != VM_PAGE_NULL);
2919
2920 vm_page_zero_fill(m);
2921
2922 type_of_fault = DBG_ZERO_FILL_FAULT;
2923
2924 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2925 VM_PAGE_WIRED(m), FALSE, FALSE,
2926 &type_of_fault);
2927
2928 vm_object_unlock(cpm_obj);
2929 }
2930
2931 #if MACH_ASSERT
2932 /*
2933 * Verify ordering in address space.
2934 */
2935 for (offset = 0; offset < size; offset += PAGE_SIZE) {
2936 vm_object_lock(cpm_obj);
2937 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2938 vm_object_unlock(cpm_obj);
2939 if (m == VM_PAGE_NULL)
2940 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page",
2941 cpm_obj, offset);
2942 assert(m->tabled);
2943 assert(!m->busy);
2944 assert(!m->wanted);
2945 assert(!m->fictitious);
2946 assert(!m->private);
2947 assert(!m->absent);
2948 assert(!m->error);
2949 assert(!m->cleaning);
2950 assert(!m->precious);
2951 assert(!m->clustered);
2952 if (offset != 0) {
2953 if (m->phys_page != prev_addr + 1) {
2954 printf("start 0x%x end 0x%x va 0x%x\n",
2955 start, end, va);
2956 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2957 printf("m 0x%x prev_address 0x%x\n", m,
2958 prev_addr);
2959 panic("vm_allocate_cpm: pages not contig!");
2960 }
2961 }
2962 prev_addr = m->phys_page;
2963 }
2964 #endif /* MACH_ASSERT */
2965
2966 vm_object_deallocate(cpm_obj); /* kill extra ref */
2967
2968 return kr;
2969 }
2970
2971
2972 #else /* VM_CPM */
2973
2974 /*
2975 * Interface is defined in all cases, but unless the kernel
2976 * is built explicitly for this option, the interface does
2977 * nothing.
2978 */
2979
2980 kern_return_t
2981 vm_map_enter_cpm(
2982 __unused vm_map_t map,
2983 __unused vm_map_offset_t *addr,
2984 __unused vm_map_size_t size,
2985 __unused int flags)
2986 {
2987 return KERN_FAILURE;
2988 }
2989 #endif /* VM_CPM */
2990
2991 /* Not used without nested pmaps */
2992 #ifndef NO_NESTED_PMAP
2993 /*
2994 * Clip and unnest a portion of a nested submap mapping.
2995 */
2996
2997
2998 static void
2999 vm_map_clip_unnest(
3000 vm_map_t map,
3001 vm_map_entry_t entry,
3002 vm_map_offset_t start_unnest,
3003 vm_map_offset_t end_unnest)
3004 {
3005 vm_map_offset_t old_start_unnest = start_unnest;
3006 vm_map_offset_t old_end_unnest = end_unnest;
3007
3008 assert(entry->is_sub_map);
3009 assert(entry->object.sub_map != NULL);
3010
3011 /*
3012 * Query the platform for the optimal unnest range.
3013 * DRK: There's some duplication of effort here, since
3014 * callers may have adjusted the range to some extent. This
3015 * routine was introduced to support 1GiB subtree nesting
3016 * for x86 platforms, which can also nest on 2MiB boundaries
3017 * depending on size/alignment.
3018 */
3019 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3020 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3021 }
3022
3023 if (entry->vme_start > start_unnest ||
3024 entry->vme_end < end_unnest) {
3025 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3026 "bad nested entry: start=0x%llx end=0x%llx\n",
3027 (long long)start_unnest, (long long)end_unnest,
3028 (long long)entry->vme_start, (long long)entry->vme_end);
3029 }
3030
3031 if (start_unnest > entry->vme_start) {
3032 _vm_map_clip_start(&map->hdr,
3033 entry,
3034 start_unnest);
3035 UPDATE_FIRST_FREE(map, map->first_free);
3036 }
3037 if (entry->vme_end > end_unnest) {
3038 _vm_map_clip_end(&map->hdr,
3039 entry,
3040 end_unnest);
3041 UPDATE_FIRST_FREE(map, map->first_free);
3042 }
3043
3044 pmap_unnest(map->pmap,
3045 entry->vme_start,
3046 entry->vme_end - entry->vme_start);
3047 if ((map->mapped) && (map->ref_count)) {
3048 /* clean up parent map/maps */
3049 vm_map_submap_pmap_clean(
3050 map, entry->vme_start,
3051 entry->vme_end,
3052 entry->object.sub_map,
3053 entry->offset);
3054 }
3055 entry->use_pmap = FALSE;
3056 }
3057 #endif /* NO_NESTED_PMAP */
3058
3059 /*
3060 * vm_map_clip_start: [ internal use only ]
3061 *
3062 * Asserts that the given entry begins at or after
3063 * the specified address; if necessary,
3064 * it splits the entry into two.
3065 */
3066 static void
3067 vm_map_clip_start(
3068 vm_map_t map,
3069 vm_map_entry_t entry,
3070 vm_map_offset_t startaddr)
3071 {
3072 #ifndef NO_NESTED_PMAP
3073 if (entry->use_pmap &&
3074 startaddr >= entry->vme_start) {
3075 vm_map_offset_t start_unnest, end_unnest;
3076
3077 /*
3078 * Make sure "startaddr" is no longer in a nested range
3079 * before we clip. Unnest only the minimum range the platform
3080 * can handle.
3081 * vm_map_clip_unnest may perform additional adjustments to
3082 * the unnest range.
3083 */
3084 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3085 end_unnest = start_unnest + pmap_nesting_size_min;
3086 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3087 }
3088 #endif /* NO_NESTED_PMAP */
3089 if (startaddr > entry->vme_start) {
3090 if (entry->object.vm_object &&
3091 !entry->is_sub_map &&
3092 entry->object.vm_object->phys_contiguous) {
3093 pmap_remove(map->pmap,
3094 (addr64_t)(entry->vme_start),
3095 (addr64_t)(entry->vme_end));
3096 }
3097 _vm_map_clip_start(&map->hdr, entry, startaddr);
3098 UPDATE_FIRST_FREE(map, map->first_free);
3099 }
3100 }
3101
3102
3103 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3104 MACRO_BEGIN \
3105 if ((startaddr) > (entry)->vme_start) \
3106 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3107 MACRO_END
3108
3109 /*
3110 * This routine is called only when it is known that
3111 * the entry must be split.
3112 */
3113 static void
3114 _vm_map_clip_start(
3115 register struct vm_map_header *map_header,
3116 register vm_map_entry_t entry,
3117 register vm_map_offset_t start)
3118 {
3119 register vm_map_entry_t new_entry;
3120
3121 /*
3122 * Split off the front portion --
3123 * note that we must insert the new
3124 * entry BEFORE this one, so that
3125 * this entry has the specified starting
3126 * address.
3127 */
3128
3129 new_entry = _vm_map_entry_create(map_header);
3130 vm_map_entry_copy_full(new_entry, entry);
3131
3132 new_entry->vme_end = start;
3133 entry->offset += (start - entry->vme_start);
3134 entry->vme_start = start;
3135
3136 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
3137
3138 if (entry->is_sub_map)
3139 vm_map_reference(new_entry->object.sub_map);
3140 else
3141 vm_object_reference(new_entry->object.vm_object);
3142 }
3143
3144
3145 /*
3146 * vm_map_clip_end: [ internal use only ]
3147 *
3148 * Asserts that the given entry ends at or before
3149 * the specified address; if necessary,
3150 * it splits the entry into two.
3151 */
3152 static void
3153 vm_map_clip_end(
3154 vm_map_t map,
3155 vm_map_entry_t entry,
3156 vm_map_offset_t endaddr)
3157 {
3158 if (endaddr > entry->vme_end) {
3159 /*
3160 * Within the scope of this clipping, limit "endaddr" to
3161 * the end of this map entry...
3162 */
3163 endaddr = entry->vme_end;
3164 }
3165 #ifndef NO_NESTED_PMAP
3166 if (entry->use_pmap) {
3167 vm_map_offset_t start_unnest, end_unnest;
3168
3169 /*
3170 * Make sure the range between the start of this entry and
3171 * the new "endaddr" is no longer nested before we clip.
3172 * Unnest only the minimum range the platform can handle.
3173 * vm_map_clip_unnest may perform additional adjustments to
3174 * the unnest range.
3175 */
3176 start_unnest = entry->vme_start;
3177 end_unnest =
3178 (endaddr + pmap_nesting_size_min - 1) &
3179 ~(pmap_nesting_size_min - 1);
3180 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3181 }
3182 #endif /* NO_NESTED_PMAP */
3183 if (endaddr < entry->vme_end) {
3184 if (entry->object.vm_object &&
3185 !entry->is_sub_map &&
3186 entry->object.vm_object->phys_contiguous) {
3187 pmap_remove(map->pmap,
3188 (addr64_t)(entry->vme_start),
3189 (addr64_t)(entry->vme_end));
3190 }
3191 _vm_map_clip_end(&map->hdr, entry, endaddr);
3192 UPDATE_FIRST_FREE(map, map->first_free);
3193 }
3194 }
3195
3196
3197 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3198 MACRO_BEGIN \
3199 if ((endaddr) < (entry)->vme_end) \
3200 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3201 MACRO_END
3202
3203 /*
3204 * This routine is called only when it is known that
3205 * the entry must be split.
3206 */
3207 static void
3208 _vm_map_clip_end(
3209 register struct vm_map_header *map_header,
3210 register vm_map_entry_t entry,
3211 register vm_map_offset_t end)
3212 {
3213 register vm_map_entry_t new_entry;
3214
3215 /*
3216 * Create a new entry and insert it
3217 * AFTER the specified entry
3218 */
3219
3220 new_entry = _vm_map_entry_create(map_header);
3221 vm_map_entry_copy_full(new_entry, entry);
3222
3223 new_entry->vme_start = entry->vme_end = end;
3224 new_entry->offset += (end - entry->vme_start);
3225
3226 _vm_map_entry_link(map_header, entry, new_entry);
3227
3228 if (entry->is_sub_map)
3229 vm_map_reference(new_entry->object.sub_map);
3230 else
3231 vm_object_reference(new_entry->object.vm_object);
3232 }
3233
3234
3235 /*
3236 * VM_MAP_RANGE_CHECK: [ internal use only ]
3237 *
3238 * Asserts that the starting and ending region
3239 * addresses fall within the valid range of the map.
3240 */
3241 #define VM_MAP_RANGE_CHECK(map, start, end) \
3242 MACRO_BEGIN \
3243 if (start < vm_map_min(map)) \
3244 start = vm_map_min(map); \
3245 if (end > vm_map_max(map)) \
3246 end = vm_map_max(map); \
3247 if (start > end) \
3248 start = end; \
3249 MACRO_END
3250
3251 /*
3252 * vm_map_range_check: [ internal use only ]
3253 *
3254 * Check that the region defined by the specified start and
3255 * end addresses are wholly contained within a single map
3256 * entry or set of adjacent map entries of the spacified map,
3257 * i.e. the specified region contains no unmapped space.
3258 * If any or all of the region is unmapped, FALSE is returned.
3259 * Otherwise, TRUE is returned and if the output argument 'entry'
3260 * is not NULL it points to the map entry containing the start
3261 * of the region.
3262 *
3263 * The map is locked for reading on entry and is left locked.
3264 */
3265 static boolean_t
3266 vm_map_range_check(
3267 register vm_map_t map,
3268 register vm_map_offset_t start,
3269 register vm_map_offset_t end,
3270 vm_map_entry_t *entry)
3271 {
3272 vm_map_entry_t cur;
3273 register vm_map_offset_t prev;
3274
3275 /*
3276 * Basic sanity checks first
3277 */
3278 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3279 return (FALSE);
3280
3281 /*
3282 * Check first if the region starts within a valid
3283 * mapping for the map.
3284 */
3285 if (!vm_map_lookup_entry(map, start, &cur))
3286 return (FALSE);
3287
3288 /*
3289 * Optimize for the case that the region is contained
3290 * in a single map entry.
3291 */
3292 if (entry != (vm_map_entry_t *) NULL)
3293 *entry = cur;
3294 if (end <= cur->vme_end)
3295 return (TRUE);
3296
3297 /*
3298 * If the region is not wholly contained within a
3299 * single entry, walk the entries looking for holes.
3300 */
3301 prev = cur->vme_end;
3302 cur = cur->vme_next;
3303 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3304 if (end <= cur->vme_end)
3305 return (TRUE);
3306 prev = cur->vme_end;
3307 cur = cur->vme_next;
3308 }
3309 return (FALSE);
3310 }
3311
3312 /*
3313 * vm_map_submap: [ kernel use only ]
3314 *
3315 * Mark the given range as handled by a subordinate map.
3316 *
3317 * This range must have been created with vm_map_find using
3318 * the vm_submap_object, and no other operations may have been
3319 * performed on this range prior to calling vm_map_submap.
3320 *
3321 * Only a limited number of operations can be performed
3322 * within this rage after calling vm_map_submap:
3323 * vm_fault
3324 * [Don't try vm_map_copyin!]
3325 *
3326 * To remove a submapping, one must first remove the
3327 * range from the superior map, and then destroy the
3328 * submap (if desired). [Better yet, don't try it.]
3329 */
3330 kern_return_t
3331 vm_map_submap(
3332 vm_map_t map,
3333 vm_map_offset_t start,
3334 vm_map_offset_t end,
3335 vm_map_t submap,
3336 vm_map_offset_t offset,
3337 #ifdef NO_NESTED_PMAP
3338 __unused
3339 #endif /* NO_NESTED_PMAP */
3340 boolean_t use_pmap)
3341 {
3342 vm_map_entry_t entry;
3343 register kern_return_t result = KERN_INVALID_ARGUMENT;
3344 register vm_object_t object;
3345
3346 vm_map_lock(map);
3347
3348 if (! vm_map_lookup_entry(map, start, &entry)) {
3349 entry = entry->vme_next;
3350 }
3351
3352 if (entry == vm_map_to_entry(map) ||
3353 entry->is_sub_map) {
3354 vm_map_unlock(map);
3355 return KERN_INVALID_ARGUMENT;
3356 }
3357
3358 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3359 vm_map_clip_start(map, entry, start);
3360 vm_map_clip_end(map, entry, end);
3361
3362 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3363 (!entry->is_sub_map) &&
3364 ((object = entry->object.vm_object) == vm_submap_object) &&
3365 (object->resident_page_count == 0) &&
3366 (object->copy == VM_OBJECT_NULL) &&
3367 (object->shadow == VM_OBJECT_NULL) &&
3368 (!object->pager_created)) {
3369 entry->offset = (vm_object_offset_t)offset;
3370 entry->object.vm_object = VM_OBJECT_NULL;
3371 vm_object_deallocate(object);
3372 entry->is_sub_map = TRUE;
3373 entry->object.sub_map = submap;
3374 vm_map_reference(submap);
3375 submap->mapped = TRUE;
3376
3377 #ifndef NO_NESTED_PMAP
3378 if (use_pmap) {
3379 /* nest if platform code will allow */
3380 if(submap->pmap == NULL) {
3381 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3382 if(submap->pmap == PMAP_NULL) {
3383 vm_map_unlock(map);
3384 return(KERN_NO_SPACE);
3385 }
3386 }
3387 result = pmap_nest(map->pmap,
3388 (entry->object.sub_map)->pmap,
3389 (addr64_t)start,
3390 (addr64_t)start,
3391 (uint64_t)(end - start));
3392 if(result)
3393 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3394 entry->use_pmap = TRUE;
3395 }
3396 #else /* NO_NESTED_PMAP */
3397 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3398 #endif /* NO_NESTED_PMAP */
3399 result = KERN_SUCCESS;
3400 }
3401 vm_map_unlock(map);
3402
3403 return(result);
3404 }
3405
3406 /*
3407 * vm_map_protect:
3408 *
3409 * Sets the protection of the specified address
3410 * region in the target map. If "set_max" is
3411 * specified, the maximum protection is to be set;
3412 * otherwise, only the current protection is affected.
3413 */
3414 kern_return_t
3415 vm_map_protect(
3416 register vm_map_t map,
3417 register vm_map_offset_t start,
3418 register vm_map_offset_t end,
3419 register vm_prot_t new_prot,
3420 register boolean_t set_max)
3421 {
3422 register vm_map_entry_t current;
3423 register vm_map_offset_t prev;
3424 vm_map_entry_t entry;
3425 vm_prot_t new_max;
3426
3427 XPR(XPR_VM_MAP,
3428 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3429 map, start, end, new_prot, set_max);
3430
3431 vm_map_lock(map);
3432
3433 /* LP64todo - remove this check when vm_map_commpage64()
3434 * no longer has to stuff in a map_entry for the commpage
3435 * above the map's max_offset.
3436 */
3437 if (start >= map->max_offset) {
3438 vm_map_unlock(map);
3439 return(KERN_INVALID_ADDRESS);
3440 }
3441
3442 while(1) {
3443 /*
3444 * Lookup the entry. If it doesn't start in a valid
3445 * entry, return an error.
3446 */
3447 if (! vm_map_lookup_entry(map, start, &entry)) {
3448 vm_map_unlock(map);
3449 return(KERN_INVALID_ADDRESS);
3450 }
3451
3452 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3453 start = SUPERPAGE_ROUND_DOWN(start);
3454 continue;
3455 }
3456 break;
3457 }
3458 if (entry->superpage_size)
3459 end = SUPERPAGE_ROUND_UP(end);
3460
3461 /*
3462 * Make a first pass to check for protection and address
3463 * violations.
3464 */
3465
3466 current = entry;
3467 prev = current->vme_start;
3468 while ((current != vm_map_to_entry(map)) &&
3469 (current->vme_start < end)) {
3470
3471 /*
3472 * If there is a hole, return an error.
3473 */
3474 if (current->vme_start != prev) {
3475 vm_map_unlock(map);
3476 return(KERN_INVALID_ADDRESS);
3477 }
3478
3479 new_max = current->max_protection;
3480 if(new_prot & VM_PROT_COPY) {
3481 new_max |= VM_PROT_WRITE;
3482 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3483 vm_map_unlock(map);
3484 return(KERN_PROTECTION_FAILURE);
3485 }
3486 } else {
3487 if ((new_prot & new_max) != new_prot) {
3488 vm_map_unlock(map);
3489 return(KERN_PROTECTION_FAILURE);
3490 }
3491 }
3492
3493 #if CONFIG_EMBEDDED
3494 if (new_prot & VM_PROT_WRITE) {
3495 if (new_prot & VM_PROT_EXECUTE) {
3496 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3497 new_prot &= ~VM_PROT_EXECUTE;
3498 }
3499 }
3500 #endif
3501
3502 prev = current->vme_end;
3503 current = current->vme_next;
3504 }
3505 if (end > prev) {
3506 vm_map_unlock(map);
3507 return(KERN_INVALID_ADDRESS);
3508 }
3509
3510 /*
3511 * Go back and fix up protections.
3512 * Clip to start here if the range starts within
3513 * the entry.
3514 */
3515
3516 current = entry;
3517 if (current != vm_map_to_entry(map)) {
3518 /* clip and unnest if necessary */
3519 vm_map_clip_start(map, current, start);
3520 }
3521
3522 while ((current != vm_map_to_entry(map)) &&
3523 (current->vme_start < end)) {
3524
3525 vm_prot_t old_prot;
3526
3527 vm_map_clip_end(map, current, end);
3528
3529 assert(!current->use_pmap); /* clipping did unnest if needed */
3530
3531 old_prot = current->protection;
3532
3533 if(new_prot & VM_PROT_COPY) {
3534 /* caller is asking specifically to copy the */
3535 /* mapped data, this implies that max protection */
3536 /* will include write. Caller must be prepared */
3537 /* for loss of shared memory communication in the */
3538 /* target area after taking this step */
3539 current->needs_copy = TRUE;
3540 current->max_protection |= VM_PROT_WRITE;
3541 }
3542
3543 if (set_max)
3544 current->protection =
3545 (current->max_protection =
3546 new_prot & ~VM_PROT_COPY) &
3547 old_prot;
3548 else
3549 current->protection = new_prot & ~VM_PROT_COPY;
3550
3551 /*
3552 * Update physical map if necessary.
3553 * If the request is to turn off write protection,
3554 * we won't do it for real (in pmap). This is because
3555 * it would cause copy-on-write to fail. We've already
3556 * set, the new protection in the map, so if a
3557 * write-protect fault occurred, it will be fixed up
3558 * properly, COW or not.
3559 */
3560 if (current->protection != old_prot) {
3561 /* Look one level in we support nested pmaps */
3562 /* from mapped submaps which are direct entries */
3563 /* in our map */
3564
3565 vm_prot_t prot;
3566
3567 prot = current->protection & ~VM_PROT_WRITE;
3568
3569 if (override_nx(map, current->alias) && prot)
3570 prot |= VM_PROT_EXECUTE;
3571
3572 if (current->is_sub_map && current->use_pmap) {
3573 pmap_protect(current->object.sub_map->pmap,
3574 current->vme_start,
3575 current->vme_end,
3576 prot);
3577 } else {
3578 pmap_protect(map->pmap,
3579 current->vme_start,
3580 current->vme_end,
3581 prot);
3582 }
3583 }
3584 current = current->vme_next;
3585 }
3586
3587 current = entry;
3588 while ((current != vm_map_to_entry(map)) &&
3589 (current->vme_start <= end)) {
3590 vm_map_simplify_entry(map, current);
3591 current = current->vme_next;
3592 }
3593
3594 vm_map_unlock(map);
3595 return(KERN_SUCCESS);
3596 }
3597
3598 /*
3599 * vm_map_inherit:
3600 *
3601 * Sets the inheritance of the specified address
3602 * range in the target map. Inheritance
3603 * affects how the map will be shared with
3604 * child maps at the time of vm_map_fork.
3605 */
3606 kern_return_t
3607 vm_map_inherit(
3608 register vm_map_t map,
3609 register vm_map_offset_t start,
3610 register vm_map_offset_t end,
3611 register vm_inherit_t new_inheritance)
3612 {
3613 register vm_map_entry_t entry;
3614 vm_map_entry_t temp_entry;
3615
3616 vm_map_lock(map);
3617
3618 VM_MAP_RANGE_CHECK(map, start, end);
3619
3620 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3621 entry = temp_entry;
3622 }
3623 else {
3624 temp_entry = temp_entry->vme_next;
3625 entry = temp_entry;
3626 }
3627
3628 /* first check entire range for submaps which can't support the */
3629 /* given inheritance. */
3630 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3631 if(entry->is_sub_map) {
3632 if(new_inheritance == VM_INHERIT_COPY) {
3633 vm_map_unlock(map);
3634 return(KERN_INVALID_ARGUMENT);
3635 }
3636 }
3637
3638 entry = entry->vme_next;
3639 }
3640
3641 entry = temp_entry;
3642 if (entry != vm_map_to_entry(map)) {
3643 /* clip and unnest if necessary */
3644 vm_map_clip_start(map, entry, start);
3645 }
3646
3647 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3648 vm_map_clip_end(map, entry, end);
3649 assert(!entry->use_pmap); /* clip did unnest if needed */
3650
3651 entry->inheritance = new_inheritance;
3652
3653 entry = entry->vme_next;
3654 }
3655
3656 vm_map_unlock(map);
3657 return(KERN_SUCCESS);
3658 }
3659
3660 /*
3661 * Update the accounting for the amount of wired memory in this map. If the user has
3662 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3663 */
3664
3665 static kern_return_t
3666 add_wire_counts(
3667 vm_map_t map,
3668 vm_map_entry_t entry,
3669 boolean_t user_wire)
3670 {
3671 vm_map_size_t size;
3672
3673 if (user_wire) {
3674
3675 /*
3676 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3677 * this map entry.
3678 */
3679
3680 if (entry->user_wired_count == 0) {
3681 size = entry->vme_end - entry->vme_start;
3682
3683 /*
3684 * Since this is the first time the user is wiring this map entry, check to see if we're
3685 * exceeding the user wire limits. There is a per map limit which is the smaller of either
3686 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
3687 * a system-wide limit on the amount of memory all users can wire. If the user is over either
3688 * limit, then we fail.
3689 */
3690
3691 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3692 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit ||
3693 size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount)
3694 return KERN_RESOURCE_SHORTAGE;
3695
3696 /*
3697 * The first time the user wires an entry, we also increment the wired_count and add this to
3698 * the total that has been wired in the map.
3699 */
3700
3701 if (entry->wired_count >= MAX_WIRE_COUNT)
3702 return KERN_FAILURE;
3703
3704 entry->wired_count++;
3705 map->user_wire_size += size;
3706 }
3707
3708 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3709 return KERN_FAILURE;
3710
3711 entry->user_wired_count++;
3712
3713 } else {
3714
3715 /*
3716 * The kernel's wiring the memory. Just bump the count and continue.
3717 */
3718
3719 if (entry->wired_count >= MAX_WIRE_COUNT)
3720 panic("vm_map_wire: too many wirings");
3721
3722 entry->wired_count++;
3723 }
3724
3725 return KERN_SUCCESS;
3726 }
3727
3728 /*
3729 * Update the memory wiring accounting now that the given map entry is being unwired.
3730 */
3731
3732 static void
3733 subtract_wire_counts(
3734 vm_map_t map,
3735 vm_map_entry_t entry,
3736 boolean_t user_wire)
3737 {
3738
3739 if (user_wire) {
3740
3741 /*
3742 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
3743 */
3744
3745 if (entry->user_wired_count == 1) {
3746
3747 /*
3748 * We're removing the last user wire reference. Decrement the wired_count and the total
3749 * user wired memory for this map.
3750 */
3751
3752 assert(entry->wired_count >= 1);
3753 entry->wired_count--;
3754 map->user_wire_size -= entry->vme_end - entry->vme_start;
3755 }
3756
3757 assert(entry->user_wired_count >= 1);
3758 entry->user_wired_count--;
3759
3760 } else {
3761
3762 /*
3763 * The kernel is unwiring the memory. Just update the count.
3764 */
3765
3766 assert(entry->wired_count >= 1);
3767 entry->wired_count--;
3768 }
3769 }
3770
3771 /*
3772 * vm_map_wire:
3773 *
3774 * Sets the pageability of the specified address range in the
3775 * target map as wired. Regions specified as not pageable require
3776 * locked-down physical memory and physical page maps. The
3777 * access_type variable indicates types of accesses that must not
3778 * generate page faults. This is checked against protection of
3779 * memory being locked-down.
3780 *
3781 * The map must not be locked, but a reference must remain to the
3782 * map throughout the call.
3783 */
3784 static kern_return_t
3785 vm_map_wire_nested(
3786 register vm_map_t map,
3787 register vm_map_offset_t start,
3788 register vm_map_offset_t end,
3789 register vm_prot_t access_type,
3790 boolean_t user_wire,
3791 pmap_t map_pmap,
3792 vm_map_offset_t pmap_addr)
3793 {
3794 register vm_map_entry_t entry;
3795 struct vm_map_entry *first_entry, tmp_entry;
3796 vm_map_t real_map;
3797 register vm_map_offset_t s,e;
3798 kern_return_t rc;
3799 boolean_t need_wakeup;
3800 boolean_t main_map = FALSE;
3801 wait_interrupt_t interruptible_state;
3802 thread_t cur_thread;
3803 unsigned int last_timestamp;
3804 vm_map_size_t size;
3805
3806 vm_map_lock(map);
3807 if(map_pmap == NULL)
3808 main_map = TRUE;
3809 last_timestamp = map->timestamp;
3810
3811 VM_MAP_RANGE_CHECK(map, start, end);
3812 assert(page_aligned(start));
3813 assert(page_aligned(end));
3814 if (start == end) {
3815 /* We wired what the caller asked for, zero pages */
3816 vm_map_unlock(map);
3817 return KERN_SUCCESS;
3818 }
3819
3820 need_wakeup = FALSE;
3821 cur_thread = current_thread();
3822
3823 s = start;
3824 rc = KERN_SUCCESS;
3825
3826 if (vm_map_lookup_entry(map, s, &first_entry)) {
3827 entry = first_entry;
3828 /*
3829 * vm_map_clip_start will be done later.
3830 * We don't want to unnest any nested submaps here !
3831 */
3832 } else {
3833 /* Start address is not in map */
3834 rc = KERN_INVALID_ADDRESS;
3835 goto done;
3836 }
3837
3838 while ((entry != vm_map_to_entry(map)) && (s < end)) {
3839 /*
3840 * At this point, we have wired from "start" to "s".
3841 * We still need to wire from "s" to "end".
3842 *
3843 * "entry" hasn't been clipped, so it could start before "s"
3844 * and/or end after "end".
3845 */
3846
3847 /* "e" is how far we want to wire in this entry */
3848 e = entry->vme_end;
3849 if (e > end)
3850 e = end;
3851
3852 /*
3853 * If another thread is wiring/unwiring this entry then
3854 * block after informing other thread to wake us up.
3855 */
3856 if (entry->in_transition) {
3857 wait_result_t wait_result;
3858
3859 /*
3860 * We have not clipped the entry. Make sure that
3861 * the start address is in range so that the lookup
3862 * below will succeed.
3863 * "s" is the current starting point: we've already
3864 * wired from "start" to "s" and we still have
3865 * to wire from "s" to "end".
3866 */
3867
3868 entry->needs_wakeup = TRUE;
3869
3870 /*
3871 * wake up anybody waiting on entries that we have
3872 * already wired.
3873 */
3874 if (need_wakeup) {
3875 vm_map_entry_wakeup(map);
3876 need_wakeup = FALSE;
3877 }
3878 /*
3879 * User wiring is interruptible
3880 */
3881 wait_result = vm_map_entry_wait(map,
3882 (user_wire) ? THREAD_ABORTSAFE :
3883 THREAD_UNINT);
3884 if (user_wire && wait_result == THREAD_INTERRUPTED) {
3885 /*
3886 * undo the wirings we have done so far
3887 * We do not clear the needs_wakeup flag,
3888 * because we cannot tell if we were the
3889 * only one waiting.
3890 */
3891 rc = KERN_FAILURE;
3892 goto done;
3893 }
3894
3895 /*
3896 * Cannot avoid a lookup here. reset timestamp.
3897 */
3898 last_timestamp = map->timestamp;
3899
3900 /*
3901 * The entry could have been clipped, look it up again.
3902 * Worse that can happen is, it may not exist anymore.
3903 */
3904 if (!vm_map_lookup_entry(map, s, &first_entry)) {
3905 if (!user_wire)
3906 panic("vm_map_wire: re-lookup failed");
3907
3908 /*
3909 * User: undo everything upto the previous
3910 * entry. let vm_map_unwire worry about
3911 * checking the validity of the range.
3912 */
3913 rc = KERN_FAILURE;
3914 goto done;
3915 }
3916 entry = first_entry;
3917 continue;
3918 }
3919
3920 if (entry->is_sub_map) {
3921 vm_map_offset_t sub_start;
3922 vm_map_offset_t sub_end;
3923 vm_map_offset_t local_start;
3924 vm_map_offset_t local_end;
3925 pmap_t pmap;
3926
3927 vm_map_clip_start(map, entry, s);
3928 vm_map_clip_end(map, entry, end);
3929
3930 sub_start = entry->offset;
3931 sub_end = entry->vme_end;
3932 sub_end += entry->offset - entry->vme_start;
3933
3934 local_end = entry->vme_end;
3935 if(map_pmap == NULL) {
3936 vm_object_t object;
3937 vm_object_offset_t offset;
3938 vm_prot_t prot;
3939 boolean_t wired;
3940 vm_map_entry_t local_entry;
3941 vm_map_version_t version;
3942 vm_map_t lookup_map;
3943
3944 if(entry->use_pmap) {
3945 pmap = entry->object.sub_map->pmap;
3946 /* ppc implementation requires that */
3947 /* submaps pmap address ranges line */
3948 /* up with parent map */
3949 #ifdef notdef
3950 pmap_addr = sub_start;
3951 #endif
3952 pmap_addr = s;
3953 } else {
3954 pmap = map->pmap;
3955 pmap_addr = s;
3956 }
3957
3958 if (entry->wired_count) {
3959 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3960 goto done;
3961
3962 /*
3963 * The map was not unlocked:
3964 * no need to goto re-lookup.
3965 * Just go directly to next entry.
3966 */
3967 entry = entry->vme_next;
3968 s = entry->vme_start;
3969 continue;
3970
3971 }
3972
3973 /* call vm_map_lookup_locked to */
3974 /* cause any needs copy to be */
3975 /* evaluated */
3976 local_start = entry->vme_start;
3977 lookup_map = map;
3978 vm_map_lock_write_to_read(map);
3979 if(vm_map_lookup_locked(
3980 &lookup_map, local_start,
3981 access_type,
3982 OBJECT_LOCK_EXCLUSIVE,
3983 &version, &object,
3984 &offset, &prot, &wired,
3985 NULL,
3986 &real_map)) {
3987
3988 vm_map_unlock_read(lookup_map);
3989 vm_map_unwire(map, start,
3990 s, user_wire);
3991 return(KERN_FAILURE);
3992 }
3993 if(real_map != lookup_map)
3994 vm_map_unlock(real_map);
3995 vm_map_unlock_read(lookup_map);
3996 vm_map_lock(map);
3997 vm_object_unlock(object);
3998
3999 /* we unlocked, so must re-lookup */
4000 if (!vm_map_lookup_entry(map,
4001 local_start,
4002 &local_entry)) {
4003 rc = KERN_FAILURE;
4004 goto done;
4005 }
4006
4007 /*
4008 * entry could have been "simplified",
4009 * so re-clip
4010 */
4011 entry = local_entry;
4012 assert(s == local_start);
4013 vm_map_clip_start(map, entry, s);
4014 vm_map_clip_end(map, entry, end);
4015 /* re-compute "e" */
4016 e = entry->vme_end;
4017 if (e > end)
4018 e = end;
4019
4020 /* did we have a change of type? */
4021 if (!entry->is_sub_map) {
4022 last_timestamp = map->timestamp;
4023 continue;
4024 }
4025 } else {
4026 local_start = entry->vme_start;
4027 pmap = map_pmap;
4028 }
4029
4030 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4031 goto done;
4032
4033 entry->in_transition = TRUE;
4034
4035 vm_map_unlock(map);
4036 rc = vm_map_wire_nested(entry->object.sub_map,
4037 sub_start, sub_end,
4038 access_type,
4039 user_wire, pmap, pmap_addr);
4040 vm_map_lock(map);
4041
4042 /*
4043 * Find the entry again. It could have been clipped
4044 * after we unlocked the map.
4045 */
4046 if (!vm_map_lookup_entry(map, local_start,
4047 &first_entry))
4048 panic("vm_map_wire: re-lookup failed");
4049 entry = first_entry;
4050
4051 assert(local_start == s);
4052 /* re-compute "e" */
4053 e = entry->vme_end;
4054 if (e > end)
4055 e = end;
4056
4057 last_timestamp = map->timestamp;
4058 while ((entry != vm_map_to_entry(map)) &&
4059 (entry->vme_start < e)) {
4060 assert(entry->in_transition);
4061 entry->in_transition = FALSE;
4062 if (entry->needs_wakeup) {
4063 entry->needs_wakeup = FALSE;
4064 need_wakeup = TRUE;
4065 }
4066 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4067 subtract_wire_counts(map, entry, user_wire);
4068 }
4069 entry = entry->vme_next;
4070 }
4071 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4072 goto done;
4073 }
4074
4075 /* no need to relookup again */
4076 s = entry->vme_start;
4077 continue;
4078 }
4079
4080 /*
4081 * If this entry is already wired then increment
4082 * the appropriate wire reference count.
4083 */
4084 if (entry->wired_count) {
4085 /*
4086 * entry is already wired down, get our reference
4087 * after clipping to our range.
4088 */
4089 vm_map_clip_start(map, entry, s);
4090 vm_map_clip_end(map, entry, end);
4091
4092 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4093 goto done;
4094
4095 /* map was not unlocked: no need to relookup */
4096 entry = entry->vme_next;
4097 s = entry->vme_start;
4098 continue;
4099 }
4100
4101 /*
4102 * Unwired entry or wire request transmitted via submap
4103 */
4104
4105
4106 /*
4107 * Perform actions of vm_map_lookup that need the write
4108 * lock on the map: create a shadow object for a
4109 * copy-on-write region, or an object for a zero-fill
4110 * region.
4111 */
4112 size = entry->vme_end - entry->vme_start;
4113 /*
4114 * If wiring a copy-on-write page, we need to copy it now
4115 * even if we're only (currently) requesting read access.
4116 * This is aggressive, but once it's wired we can't move it.
4117 */
4118 if (entry->needs_copy) {
4119 vm_object_shadow(&entry->object.vm_object,
4120 &entry->offset, size);
4121 entry->needs_copy = FALSE;
4122 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4123 entry->object.vm_object = vm_object_allocate(size);
4124 entry->offset = (vm_object_offset_t)0;
4125 }
4126
4127 vm_map_clip_start(map, entry, s);
4128 vm_map_clip_end(map, entry, end);
4129
4130 /* re-compute "e" */
4131 e = entry->vme_end;
4132 if (e > end)
4133 e = end;
4134
4135 /*
4136 * Check for holes and protection mismatch.
4137 * Holes: Next entry should be contiguous unless this
4138 * is the end of the region.
4139 * Protection: Access requested must be allowed, unless
4140 * wiring is by protection class
4141 */
4142 if ((entry->vme_end < end) &&
4143 ((entry->vme_next == vm_map_to_entry(map)) ||
4144 (entry->vme_next->vme_start > entry->vme_end))) {
4145 /* found a hole */
4146 rc = KERN_INVALID_ADDRESS;
4147 goto done;
4148 }
4149 if ((entry->protection & access_type) != access_type) {
4150 /* found a protection problem */
4151 rc = KERN_PROTECTION_FAILURE;
4152 goto done;
4153 }
4154
4155 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4156
4157 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4158 goto done;
4159
4160 entry->in_transition = TRUE;
4161
4162 /*
4163 * This entry might get split once we unlock the map.
4164 * In vm_fault_wire(), we need the current range as
4165 * defined by this entry. In order for this to work
4166 * along with a simultaneous clip operation, we make a
4167 * temporary copy of this entry and use that for the
4168 * wiring. Note that the underlying objects do not
4169 * change during a clip.
4170 */
4171 tmp_entry = *entry;
4172
4173 /*
4174 * The in_transition state guarentees that the entry
4175 * (or entries for this range, if split occured) will be
4176 * there when the map lock is acquired for the second time.
4177 */
4178 vm_map_unlock(map);
4179
4180 if (!user_wire && cur_thread != THREAD_NULL)
4181 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4182 else
4183 interruptible_state = THREAD_UNINT;
4184
4185 if(map_pmap)
4186 rc = vm_fault_wire(map,
4187 &tmp_entry, map_pmap, pmap_addr);
4188 else
4189 rc = vm_fault_wire(map,
4190 &tmp_entry, map->pmap,
4191 tmp_entry.vme_start);
4192
4193 if (!user_wire && cur_thread != THREAD_NULL)
4194 thread_interrupt_level(interruptible_state);
4195
4196 vm_map_lock(map);
4197
4198 if (last_timestamp+1 != map->timestamp) {
4199 /*
4200 * Find the entry again. It could have been clipped
4201 * after we unlocked the map.
4202 */
4203 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4204 &first_entry))
4205 panic("vm_map_wire: re-lookup failed");
4206
4207 entry = first_entry;
4208 }
4209
4210 last_timestamp = map->timestamp;
4211
4212 while ((entry != vm_map_to_entry(map)) &&
4213 (entry->vme_start < tmp_entry.vme_end)) {
4214 assert(entry->in_transition);
4215 entry->in_transition = FALSE;
4216 if (entry->needs_wakeup) {
4217 entry->needs_wakeup = FALSE;
4218 need_wakeup = TRUE;
4219 }
4220 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4221 subtract_wire_counts(map, entry, user_wire);
4222 }
4223 entry = entry->vme_next;
4224 }
4225
4226 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4227 goto done;
4228 }
4229
4230 s = entry->vme_start;
4231 } /* end while loop through map entries */
4232
4233 done:
4234 if (rc == KERN_SUCCESS) {
4235 /* repair any damage we may have made to the VM map */
4236 vm_map_simplify_range(map, start, end);
4237 }
4238
4239 vm_map_unlock(map);
4240
4241 /*
4242 * wake up anybody waiting on entries we wired.
4243 */
4244 if (need_wakeup)
4245 vm_map_entry_wakeup(map);
4246
4247 if (rc != KERN_SUCCESS) {
4248 /* undo what has been wired so far */
4249 vm_map_unwire(map, start, s, user_wire);
4250 }
4251
4252 return rc;
4253
4254 }
4255
4256 kern_return_t
4257 vm_map_wire(
4258 register vm_map_t map,
4259 register vm_map_offset_t start,
4260 register vm_map_offset_t end,
4261 register vm_prot_t access_type,
4262 boolean_t user_wire)
4263 {
4264
4265 kern_return_t kret;
4266
4267 #ifdef ppc
4268 /*
4269 * the calls to mapping_prealloc and mapping_relpre
4270 * (along with the VM_MAP_RANGE_CHECK to insure a
4271 * resonable range was passed in) are
4272 * currently necessary because
4273 * we haven't enabled kernel pre-emption
4274 * and/or the pmap_enter cannot purge and re-use
4275 * existing mappings
4276 */
4277 VM_MAP_RANGE_CHECK(map, start, end);
4278 assert((unsigned int) (end - start) == (end - start));
4279 mapping_prealloc((unsigned int) (end - start));
4280 #endif
4281 kret = vm_map_wire_nested(map, start, end, access_type,
4282 user_wire, (pmap_t)NULL, 0);
4283 #ifdef ppc
4284 mapping_relpre();
4285 #endif
4286 return kret;
4287 }
4288
4289 /*
4290 * vm_map_unwire:
4291 *
4292 * Sets the pageability of the specified address range in the target
4293 * as pageable. Regions specified must have been wired previously.
4294 *
4295 * The map must not be locked, but a reference must remain to the map
4296 * throughout the call.
4297 *
4298 * Kernel will panic on failures. User unwire ignores holes and
4299 * unwired and intransition entries to avoid losing memory by leaving
4300 * it unwired.
4301 */
4302 static kern_return_t
4303 vm_map_unwire_nested(
4304 register vm_map_t map,
4305 register vm_map_offset_t start,
4306 register vm_map_offset_t end,
4307 boolean_t user_wire,
4308 pmap_t map_pmap,
4309 vm_map_offset_t pmap_addr)
4310 {
4311 register vm_map_entry_t entry;
4312 struct vm_map_entry *first_entry, tmp_entry;
4313 boolean_t need_wakeup;
4314 boolean_t main_map = FALSE;
4315 unsigned int last_timestamp;
4316
4317 vm_map_lock(map);
4318 if(map_pmap == NULL)
4319 main_map = TRUE;
4320 last_timestamp = map->timestamp;
4321
4322 VM_MAP_RANGE_CHECK(map, start, end);
4323 assert(page_aligned(start));
4324 assert(page_aligned(end));
4325
4326 if (start == end) {
4327 /* We unwired what the caller asked for: zero pages */
4328 vm_map_unlock(map);
4329 return KERN_SUCCESS;
4330 }
4331
4332 if (vm_map_lookup_entry(map, start, &first_entry)) {
4333 entry = first_entry;
4334 /*
4335 * vm_map_clip_start will be done later.
4336 * We don't want to unnest any nested sub maps here !
4337 */
4338 }
4339 else {
4340 if (!user_wire) {
4341 panic("vm_map_unwire: start not found");
4342 }
4343 /* Start address is not in map. */
4344 vm_map_unlock(map);
4345 return(KERN_INVALID_ADDRESS);
4346 }
4347
4348 if (entry->superpage_size) {
4349 /* superpages are always wired */
4350 vm_map_unlock(map);
4351 return KERN_INVALID_ADDRESS;
4352 }
4353
4354 need_wakeup = FALSE;
4355 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4356 if (entry->in_transition) {
4357 /*
4358 * 1)
4359 * Another thread is wiring down this entry. Note
4360 * that if it is not for the other thread we would
4361 * be unwiring an unwired entry. This is not
4362 * permitted. If we wait, we will be unwiring memory
4363 * we did not wire.
4364 *
4365 * 2)
4366 * Another thread is unwiring this entry. We did not
4367 * have a reference to it, because if we did, this
4368 * entry will not be getting unwired now.
4369 */
4370 if (!user_wire) {
4371 /*
4372 * XXX FBDP
4373 * This could happen: there could be some
4374 * overlapping vslock/vsunlock operations
4375 * going on.
4376 * We should probably just wait and retry,
4377 * but then we have to be careful that this
4378 * entry could get "simplified" after
4379 * "in_transition" gets unset and before
4380 * we re-lookup the entry, so we would
4381 * have to re-clip the entry to avoid
4382 * re-unwiring what we have already unwired...
4383 * See vm_map_wire_nested().
4384 *
4385 * Or we could just ignore "in_transition"
4386 * here and proceed to decement the wired
4387 * count(s) on this entry. That should be fine
4388 * as long as "wired_count" doesn't drop all
4389 * the way to 0 (and we should panic if THAT
4390 * happens).
4391 */
4392 panic("vm_map_unwire: in_transition entry");
4393 }
4394
4395 entry = entry->vme_next;
4396 continue;
4397 }
4398
4399 if (entry->is_sub_map) {
4400 vm_map_offset_t sub_start;
4401 vm_map_offset_t sub_end;
4402 vm_map_offset_t local_end;
4403 pmap_t pmap;
4404
4405 vm_map_clip_start(map, entry, start);
4406 vm_map_clip_end(map, entry, end);
4407
4408 sub_start = entry->offset;
4409 sub_end = entry->vme_end - entry->vme_start;
4410 sub_end += entry->offset;
4411 local_end = entry->vme_end;
4412 if(map_pmap == NULL) {
4413 if(entry->use_pmap) {
4414 pmap = entry->object.sub_map->pmap;
4415 pmap_addr = sub_start;
4416 } else {
4417 pmap = map->pmap;
4418 pmap_addr = start;
4419 }
4420 if (entry->wired_count == 0 ||
4421 (user_wire && entry->user_wired_count == 0)) {
4422 if (!user_wire)
4423 panic("vm_map_unwire: entry is unwired");
4424 entry = entry->vme_next;
4425 continue;
4426 }
4427
4428 /*
4429 * Check for holes
4430 * Holes: Next entry should be contiguous unless
4431 * this is the end of the region.
4432 */
4433 if (((entry->vme_end < end) &&
4434 ((entry->vme_next == vm_map_to_entry(map)) ||
4435 (entry->vme_next->vme_start
4436 > entry->vme_end)))) {
4437 if (!user_wire)
4438 panic("vm_map_unwire: non-contiguous region");
4439 /*
4440 entry = entry->vme_next;
4441 continue;
4442 */
4443 }
4444
4445 subtract_wire_counts(map, entry, user_wire);
4446
4447 if (entry->wired_count != 0) {
4448 entry = entry->vme_next;
4449 continue;
4450 }
4451
4452 entry->in_transition = TRUE;
4453 tmp_entry = *entry;/* see comment in vm_map_wire() */
4454
4455 /*
4456 * We can unlock the map now. The in_transition state
4457 * guarantees existance of the entry.
4458 */
4459 vm_map_unlock(map);
4460 vm_map_unwire_nested(entry->object.sub_map,
4461 sub_start, sub_end, user_wire, pmap, pmap_addr);
4462 vm_map_lock(map);
4463
4464 if (last_timestamp+1 != map->timestamp) {
4465 /*
4466 * Find the entry again. It could have been
4467 * clipped or deleted after we unlocked the map.
4468 */
4469 if (!vm_map_lookup_entry(map,
4470 tmp_entry.vme_start,
4471 &first_entry)) {
4472 if (!user_wire)
4473 panic("vm_map_unwire: re-lookup failed");
4474 entry = first_entry->vme_next;
4475 } else
4476 entry = first_entry;
4477 }
4478 last_timestamp = map->timestamp;
4479
4480 /*
4481 * clear transition bit for all constituent entries
4482 * that were in the original entry (saved in
4483 * tmp_entry). Also check for waiters.
4484 */
4485 while ((entry != vm_map_to_entry(map)) &&
4486 (entry->vme_start < tmp_entry.vme_end)) {
4487 assert(entry->in_transition);
4488 entry->in_transition = FALSE;
4489 if (entry->needs_wakeup) {
4490 entry->needs_wakeup = FALSE;
4491 need_wakeup = TRUE;
4492 }
4493 entry = entry->vme_next;
4494 }
4495 continue;
4496 } else {
4497 vm_map_unlock(map);
4498 vm_map_unwire_nested(entry->object.sub_map,
4499 sub_start, sub_end, user_wire, map_pmap,
4500 pmap_addr);
4501 vm_map_lock(map);
4502
4503 if (last_timestamp+1 != map->timestamp) {
4504 /*
4505 * Find the entry again. It could have been
4506 * clipped or deleted after we unlocked the map.
4507 */
4508 if (!vm_map_lookup_entry(map,
4509 tmp_entry.vme_start,
4510 &first_entry)) {
4511 if (!user_wire)
4512 panic("vm_map_unwire: re-lookup failed");
4513 entry = first_entry->vme_next;
4514 } else
4515 entry = first_entry;
4516 }
4517 last_timestamp = map->timestamp;
4518 }
4519 }
4520
4521
4522 if ((entry->wired_count == 0) ||
4523 (user_wire && entry->user_wired_count == 0)) {
4524 if (!user_wire)
4525 panic("vm_map_unwire: entry is unwired");
4526
4527 entry = entry->vme_next;
4528 continue;
4529 }
4530
4531 assert(entry->wired_count > 0 &&
4532 (!user_wire || entry->user_wired_count > 0));
4533
4534 vm_map_clip_start(map, entry, start);
4535 vm_map_clip_end(map, entry, end);
4536
4537 /*
4538 * Check for holes
4539 * Holes: Next entry should be contiguous unless
4540 * this is the end of the region.
4541 */
4542 if (((entry->vme_end < end) &&
4543 ((entry->vme_next == vm_map_to_entry(map)) ||
4544 (entry->vme_next->vme_start > entry->vme_end)))) {
4545
4546 if (!user_wire)
4547 panic("vm_map_unwire: non-contiguous region");
4548 entry = entry->vme_next;
4549 continue;
4550 }
4551
4552 subtract_wire_counts(map, entry, user_wire);
4553
4554 if (entry->wired_count != 0) {
4555 entry = entry->vme_next;
4556 continue;
4557 }
4558
4559 if(entry->zero_wired_pages) {
4560 entry->zero_wired_pages = FALSE;
4561 }
4562
4563 entry->in_transition = TRUE;
4564 tmp_entry = *entry; /* see comment in vm_map_wire() */
4565
4566 /*
4567 * We can unlock the map now. The in_transition state
4568 * guarantees existance of the entry.
4569 */
4570 vm_map_unlock(map);
4571 if(map_pmap) {
4572 vm_fault_unwire(map,
4573 &tmp_entry, FALSE, map_pmap, pmap_addr);
4574 } else {
4575 vm_fault_unwire(map,
4576 &tmp_entry, FALSE, map->pmap,
4577 tmp_entry.vme_start);
4578 }
4579 vm_map_lock(map);
4580
4581 if (last_timestamp+1 != map->timestamp) {
4582 /*
4583 * Find the entry again. It could have been clipped
4584 * or deleted after we unlocked the map.
4585 */
4586 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4587 &first_entry)) {
4588 if (!user_wire)
4589 panic("vm_map_unwire: re-lookup failed");
4590 entry = first_entry->vme_next;
4591 } else
4592 entry = first_entry;
4593 }
4594 last_timestamp = map->timestamp;
4595
4596 /*
4597 * clear transition bit for all constituent entries that
4598 * were in the original entry (saved in tmp_entry). Also
4599 * check for waiters.
4600 */
4601 while ((entry != vm_map_to_entry(map)) &&
4602 (entry->vme_start < tmp_entry.vme_end)) {
4603 assert(entry->in_transition);
4604 entry->in_transition = FALSE;
4605 if (entry->needs_wakeup) {
4606 entry->needs_wakeup = FALSE;
4607 need_wakeup = TRUE;
4608 }
4609 entry = entry->vme_next;
4610 }
4611 }
4612
4613 /*
4614 * We might have fragmented the address space when we wired this
4615 * range of addresses. Attempt to re-coalesce these VM map entries
4616 * with their neighbors now that they're no longer wired.
4617 * Under some circumstances, address space fragmentation can
4618 * prevent VM object shadow chain collapsing, which can cause
4619 * swap space leaks.
4620 */
4621 vm_map_simplify_range(map, start, end);
4622
4623 vm_map_unlock(map);
4624 /*
4625 * wake up anybody waiting on entries that we have unwired.
4626 */
4627 if (need_wakeup)
4628 vm_map_entry_wakeup(map);
4629 return(KERN_SUCCESS);
4630
4631 }
4632
4633 kern_return_t
4634 vm_map_unwire(
4635 register vm_map_t map,
4636 register vm_map_offset_t start,
4637 register vm_map_offset_t end,
4638 boolean_t user_wire)
4639 {
4640 return vm_map_unwire_nested(map, start, end,
4641 user_wire, (pmap_t)NULL, 0);
4642 }
4643
4644
4645 /*
4646 * vm_map_entry_delete: [ internal use only ]
4647 *
4648 * Deallocate the given entry from the target map.
4649 */
4650 static void
4651 vm_map_entry_delete(
4652 register vm_map_t map,
4653 register vm_map_entry_t entry)
4654 {
4655 register vm_map_offset_t s, e;
4656 register vm_object_t object;
4657 register vm_map_t submap;
4658
4659 s = entry->vme_start;
4660 e = entry->vme_end;
4661 assert(page_aligned(s));
4662 assert(page_aligned(e));
4663 assert(entry->wired_count == 0);
4664 assert(entry->user_wired_count == 0);
4665 assert(!entry->permanent);
4666
4667 if (entry->is_sub_map) {
4668 object = NULL;
4669 submap = entry->object.sub_map;
4670 } else {
4671 submap = NULL;
4672 object = entry->object.vm_object;
4673 }
4674
4675 vm_map_entry_unlink(map, entry);
4676 map->size -= e - s;
4677
4678 vm_map_entry_dispose(map, entry);
4679
4680 vm_map_unlock(map);
4681 /*
4682 * Deallocate the object only after removing all
4683 * pmap entries pointing to its pages.
4684 */
4685 if (submap)
4686 vm_map_deallocate(submap);
4687 else
4688 vm_object_deallocate(object);
4689
4690 }
4691
4692 void
4693 vm_map_submap_pmap_clean(
4694 vm_map_t map,
4695 vm_map_offset_t start,
4696 vm_map_offset_t end,
4697 vm_map_t sub_map,
4698 vm_map_offset_t offset)
4699 {
4700 vm_map_offset_t submap_start;
4701 vm_map_offset_t submap_end;
4702 vm_map_size_t remove_size;
4703 vm_map_entry_t entry;
4704
4705 submap_end = offset + (end - start);
4706 submap_start = offset;
4707 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4708
4709 remove_size = (entry->vme_end - entry->vme_start);
4710 if(offset > entry->vme_start)
4711 remove_size -= offset - entry->vme_start;
4712
4713
4714 if(submap_end < entry->vme_end) {
4715 remove_size -=
4716 entry->vme_end - submap_end;
4717 }
4718 if(entry->is_sub_map) {
4719 vm_map_submap_pmap_clean(
4720 sub_map,
4721 start,
4722 start + remove_size,
4723 entry->object.sub_map,
4724 entry->offset);
4725 } else {
4726
4727 if((map->mapped) && (map->ref_count)
4728 && (entry->object.vm_object != NULL)) {
4729 vm_object_pmap_protect(
4730 entry->object.vm_object,
4731 entry->offset,
4732 remove_size,
4733 PMAP_NULL,
4734 entry->vme_start,
4735 VM_PROT_NONE);
4736 } else {
4737 pmap_remove(map->pmap,
4738 (addr64_t)start,
4739 (addr64_t)(start + remove_size));
4740 }
4741 }
4742 }
4743
4744 entry = entry->vme_next;
4745
4746 while((entry != vm_map_to_entry(sub_map))
4747 && (entry->vme_start < submap_end)) {
4748 remove_size = (entry->vme_end - entry->vme_start);
4749 if(submap_end < entry->vme_end) {
4750 remove_size -= entry->vme_end - submap_end;
4751 }
4752 if(entry->is_sub_map) {
4753 vm_map_submap_pmap_clean(
4754 sub_map,
4755 (start + entry->vme_start) - offset,
4756 ((start + entry->vme_start) - offset) + remove_size,
4757 entry->object.sub_map,
4758 entry->offset);
4759 } else {
4760 if((map->mapped) && (map->ref_count)
4761 && (entry->object.vm_object != NULL)) {
4762 vm_object_pmap_protect(
4763 entry->object.vm_object,
4764 entry->offset,
4765 remove_size,
4766 PMAP_NULL,
4767 entry->vme_start,
4768 VM_PROT_NONE);
4769 } else {
4770 pmap_remove(map->pmap,
4771 (addr64_t)((start + entry->vme_start)
4772 - offset),
4773 (addr64_t)(((start + entry->vme_start)
4774 - offset) + remove_size));
4775 }
4776 }
4777 entry = entry->vme_next;
4778 }
4779 return;
4780 }
4781
4782 /*
4783 * vm_map_delete: [ internal use only ]
4784 *
4785 * Deallocates the given address range from the target map.
4786 * Removes all user wirings. Unwires one kernel wiring if
4787 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
4788 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
4789 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4790 *
4791 * This routine is called with map locked and leaves map locked.
4792 */
4793 static kern_return_t
4794 vm_map_delete(
4795 vm_map_t map,
4796 vm_map_offset_t start,
4797 vm_map_offset_t end,
4798 int flags,
4799 vm_map_t zap_map)
4800 {
4801 vm_map_entry_t entry, next;
4802 struct vm_map_entry *first_entry, tmp_entry;
4803 register vm_map_offset_t s;
4804 register vm_object_t object;
4805 boolean_t need_wakeup;
4806 unsigned int last_timestamp = ~0; /* unlikely value */
4807 int interruptible;
4808
4809 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4810 THREAD_ABORTSAFE : THREAD_UNINT;
4811
4812 /*
4813 * All our DMA I/O operations in IOKit are currently done by
4814 * wiring through the map entries of the task requesting the I/O.
4815 * Because of this, we must always wait for kernel wirings
4816 * to go away on the entries before deleting them.
4817 *
4818 * Any caller who wants to actually remove a kernel wiring
4819 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4820 * properly remove one wiring instead of blasting through
4821 * them all.
4822 */
4823 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4824
4825 while(1) {
4826 /*
4827 * Find the start of the region, and clip it
4828 */
4829 if (vm_map_lookup_entry(map, start, &first_entry)) {
4830 entry = first_entry;
4831 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
4832 start = SUPERPAGE_ROUND_DOWN(start);
4833 continue;
4834 }
4835 if (start == entry->vme_start) {
4836 /*
4837 * No need to clip. We don't want to cause
4838 * any unnecessary unnesting in this case...
4839 */
4840 } else {
4841 vm_map_clip_start(map, entry, start);
4842 }
4843
4844 /*
4845 * Fix the lookup hint now, rather than each
4846 * time through the loop.
4847 */
4848 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4849 } else {
4850 entry = first_entry->vme_next;
4851 }
4852 break;
4853 }
4854 if (entry->superpage_size)
4855 end = SUPERPAGE_ROUND_UP(end);
4856
4857 need_wakeup = FALSE;
4858 /*
4859 * Step through all entries in this region
4860 */
4861 s = entry->vme_start;
4862 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4863 /*
4864 * At this point, we have deleted all the memory entries
4865 * between "start" and "s". We still need to delete
4866 * all memory entries between "s" and "end".
4867 * While we were blocked and the map was unlocked, some
4868 * new memory entries could have been re-allocated between
4869 * "start" and "s" and we don't want to mess with those.
4870 * Some of those entries could even have been re-assembled
4871 * with an entry after "s" (in vm_map_simplify_entry()), so
4872 * we may have to vm_map_clip_start() again.
4873 */
4874
4875 if (entry->vme_start >= s) {
4876 /*
4877 * This entry starts on or after "s"
4878 * so no need to clip its start.
4879 */
4880 } else {
4881 /*
4882 * This entry has been re-assembled by a
4883 * vm_map_simplify_entry(). We need to
4884 * re-clip its start.
4885 */
4886 vm_map_clip_start(map, entry, s);
4887 }
4888 if (entry->vme_end <= end) {
4889 /*
4890 * This entry is going away completely, so no need
4891 * to clip and possibly cause an unnecessary unnesting.
4892 */
4893 } else {
4894 vm_map_clip_end(map, entry, end);
4895 }
4896
4897 if (entry->permanent) {
4898 panic("attempt to remove permanent VM map entry "
4899 "%p [0x%llx:0x%llx]\n",
4900 entry, (uint64_t) s, (uint64_t) end);
4901 }
4902
4903
4904 if (entry->in_transition) {
4905 wait_result_t wait_result;
4906
4907 /*
4908 * Another thread is wiring/unwiring this entry.
4909 * Let the other thread know we are waiting.
4910 */
4911 assert(s == entry->vme_start);
4912 entry->needs_wakeup = TRUE;
4913
4914 /*
4915 * wake up anybody waiting on entries that we have
4916 * already unwired/deleted.
4917 */
4918 if (need_wakeup) {
4919 vm_map_entry_wakeup(map);
4920 need_wakeup = FALSE;
4921 }
4922
4923 wait_result = vm_map_entry_wait(map, interruptible);
4924
4925 if (interruptible &&
4926 wait_result == THREAD_INTERRUPTED) {
4927 /*
4928 * We do not clear the needs_wakeup flag,
4929 * since we cannot tell if we were the only one.
4930 */
4931 vm_map_unlock(map);
4932 return KERN_ABORTED;
4933 }
4934
4935 /*
4936 * The entry could have been clipped or it
4937 * may not exist anymore. Look it up again.
4938 */
4939 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4940 assert((map != kernel_map) &&
4941 (!entry->is_sub_map));
4942 /*
4943 * User: use the next entry
4944 */
4945 entry = first_entry->vme_next;
4946 s = entry->vme_start;
4947 } else {
4948 entry = first_entry;
4949 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4950 }
4951 last_timestamp = map->timestamp;
4952 continue;
4953 } /* end in_transition */
4954
4955 if (entry->wired_count) {
4956 boolean_t user_wire;
4957
4958 user_wire = entry->user_wired_count > 0;
4959
4960 /*
4961 * Remove a kernel wiring if requested
4962 */
4963 if (flags & VM_MAP_REMOVE_KUNWIRE) {
4964 entry->wired_count--;
4965 }
4966
4967 /*
4968 * Remove all user wirings for proper accounting
4969 */
4970 if (entry->user_wired_count > 0) {
4971 while (entry->user_wired_count)
4972 subtract_wire_counts(map, entry, user_wire);
4973 }
4974
4975 if (entry->wired_count != 0) {
4976 assert(map != kernel_map);
4977 /*
4978 * Cannot continue. Typical case is when
4979 * a user thread has physical io pending on
4980 * on this page. Either wait for the
4981 * kernel wiring to go away or return an
4982 * error.
4983 */
4984 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4985 wait_result_t wait_result;
4986
4987 assert(s == entry->vme_start);
4988 entry->needs_wakeup = TRUE;
4989 wait_result = vm_map_entry_wait(map,
4990 interruptible);
4991
4992 if (interruptible &&
4993 wait_result == THREAD_INTERRUPTED) {
4994 /*
4995 * We do not clear the
4996 * needs_wakeup flag, since we
4997 * cannot tell if we were the
4998 * only one.
4999 */
5000 vm_map_unlock(map);
5001 return KERN_ABORTED;
5002 }
5003
5004 /*
5005 * The entry could have been clipped or
5006 * it may not exist anymore. Look it
5007 * up again.
5008 */
5009 if (!vm_map_lookup_entry(map, s,
5010 &first_entry)) {
5011 assert(map != kernel_map);
5012 /*
5013 * User: use the next entry
5014 */
5015 entry = first_entry->vme_next;
5016 s = entry->vme_start;
5017 } else {
5018 entry = first_entry;
5019 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5020 }
5021 last_timestamp = map->timestamp;
5022 continue;
5023 }
5024 else {
5025 return KERN_FAILURE;
5026 }
5027 }
5028
5029 entry->in_transition = TRUE;
5030 /*
5031 * copy current entry. see comment in vm_map_wire()
5032 */
5033 tmp_entry = *entry;
5034 assert(s == entry->vme_start);
5035
5036 /*
5037 * We can unlock the map now. The in_transition
5038 * state guarentees existance of the entry.
5039 */
5040 vm_map_unlock(map);
5041
5042 if (tmp_entry.is_sub_map) {
5043 vm_map_t sub_map;
5044 vm_map_offset_t sub_start, sub_end;
5045 pmap_t pmap;
5046 vm_map_offset_t pmap_addr;
5047
5048
5049 sub_map = tmp_entry.object.sub_map;
5050 sub_start = tmp_entry.offset;
5051 sub_end = sub_start + (tmp_entry.vme_end -
5052 tmp_entry.vme_start);
5053 if (tmp_entry.use_pmap) {
5054 pmap = sub_map->pmap;
5055 pmap_addr = tmp_entry.vme_start;
5056 } else {
5057 pmap = map->pmap;
5058 pmap_addr = tmp_entry.vme_start;
5059 }
5060 (void) vm_map_unwire_nested(sub_map,
5061 sub_start, sub_end,
5062 user_wire,
5063 pmap, pmap_addr);
5064 } else {
5065
5066 vm_fault_unwire(map, &tmp_entry,
5067 tmp_entry.object.vm_object == kernel_object,
5068 map->pmap, tmp_entry.vme_start);
5069 }
5070
5071 vm_map_lock(map);
5072
5073 if (last_timestamp+1 != map->timestamp) {
5074 /*
5075 * Find the entry again. It could have
5076 * been clipped after we unlocked the map.
5077 */
5078 if (!vm_map_lookup_entry(map, s, &first_entry)){
5079 assert((map != kernel_map) &&
5080 (!entry->is_sub_map));
5081 first_entry = first_entry->vme_next;
5082 s = first_entry->vme_start;
5083 } else {
5084 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5085 }
5086 } else {
5087 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5088 first_entry = entry;
5089 }
5090
5091 last_timestamp = map->timestamp;
5092
5093 entry = first_entry;
5094 while ((entry != vm_map_to_entry(map)) &&
5095 (entry->vme_start < tmp_entry.vme_end)) {
5096 assert(entry->in_transition);
5097 entry->in_transition = FALSE;
5098 if (entry->needs_wakeup) {
5099 entry->needs_wakeup = FALSE;
5100 need_wakeup = TRUE;
5101 }
5102 entry = entry->vme_next;
5103 }
5104 /*
5105 * We have unwired the entry(s). Go back and
5106 * delete them.
5107 */
5108 entry = first_entry;
5109 continue;
5110 }
5111
5112 /* entry is unwired */
5113 assert(entry->wired_count == 0);
5114 assert(entry->user_wired_count == 0);
5115
5116 assert(s == entry->vme_start);
5117
5118 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5119 /*
5120 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5121 * vm_map_delete(), some map entries might have been
5122 * transferred to a "zap_map", which doesn't have a
5123 * pmap. The original pmap has already been flushed
5124 * in the vm_map_delete() call targeting the original
5125 * map, but when we get to destroying the "zap_map",
5126 * we don't have any pmap to flush, so let's just skip
5127 * all this.
5128 */
5129 } else if (entry->is_sub_map) {
5130 if (entry->use_pmap) {
5131 #ifndef NO_NESTED_PMAP
5132 pmap_unnest(map->pmap,
5133 (addr64_t)entry->vme_start,
5134 entry->vme_end - entry->vme_start);
5135 #endif /* NO_NESTED_PMAP */
5136 if ((map->mapped) && (map->ref_count)) {
5137 /* clean up parent map/maps */
5138 vm_map_submap_pmap_clean(
5139 map, entry->vme_start,
5140 entry->vme_end,
5141 entry->object.sub_map,
5142 entry->offset);
5143 }
5144 } else {
5145 vm_map_submap_pmap_clean(
5146 map, entry->vme_start, entry->vme_end,
5147 entry->object.sub_map,
5148 entry->offset);
5149 }
5150 } else if (entry->object.vm_object != kernel_object) {
5151 object = entry->object.vm_object;
5152 if((map->mapped) && (map->ref_count)) {
5153 vm_object_pmap_protect(
5154 object, entry->offset,
5155 entry->vme_end - entry->vme_start,
5156 PMAP_NULL,
5157 entry->vme_start,
5158 VM_PROT_NONE);
5159 } else {
5160 pmap_remove(map->pmap,
5161 (addr64_t)entry->vme_start,
5162 (addr64_t)entry->vme_end);
5163 }
5164 }
5165
5166 /*
5167 * All pmap mappings for this map entry must have been
5168 * cleared by now.
5169 */
5170 assert(vm_map_pmap_is_empty(map,
5171 entry->vme_start,
5172 entry->vme_end));
5173
5174 next = entry->vme_next;
5175 s = next->vme_start;
5176 last_timestamp = map->timestamp;
5177
5178 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5179 zap_map != VM_MAP_NULL) {
5180 vm_map_size_t entry_size;
5181 /*
5182 * The caller wants to save the affected VM map entries
5183 * into the "zap_map". The caller will take care of
5184 * these entries.
5185 */
5186 /* unlink the entry from "map" ... */
5187 vm_map_entry_unlink(map, entry);
5188 /* ... and add it to the end of the "zap_map" */
5189 vm_map_entry_link(zap_map,
5190 vm_map_last_entry(zap_map),
5191 entry);
5192 entry_size = entry->vme_end - entry->vme_start;
5193 map->size -= entry_size;
5194 zap_map->size += entry_size;
5195 /* we didn't unlock the map, so no timestamp increase */
5196 last_timestamp--;
5197 } else {
5198 vm_map_entry_delete(map, entry);
5199 /* vm_map_entry_delete unlocks the map */
5200 vm_map_lock(map);
5201 }
5202
5203 entry = next;
5204
5205 if(entry == vm_map_to_entry(map)) {
5206 break;
5207 }
5208 if (last_timestamp+1 != map->timestamp) {
5209 /*
5210 * we are responsible for deleting everything
5211 * from the give space, if someone has interfered
5212 * we pick up where we left off, back fills should
5213 * be all right for anyone except map_delete and
5214 * we have to assume that the task has been fully
5215 * disabled before we get here
5216 */
5217 if (!vm_map_lookup_entry(map, s, &entry)){
5218 entry = entry->vme_next;
5219 s = entry->vme_start;
5220 } else {
5221 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5222 }
5223 /*
5224 * others can not only allocate behind us, we can
5225 * also see coalesce while we don't have the map lock
5226 */
5227 if(entry == vm_map_to_entry(map)) {
5228 break;
5229 }
5230 }
5231 last_timestamp = map->timestamp;
5232 }
5233
5234 if (map->wait_for_space)
5235 thread_wakeup((event_t) map);
5236 /*
5237 * wake up anybody waiting on entries that we have already deleted.
5238 */
5239 if (need_wakeup)
5240 vm_map_entry_wakeup(map);
5241
5242 return KERN_SUCCESS;
5243 }
5244
5245 /*
5246 * vm_map_remove:
5247 *
5248 * Remove the given address range from the target map.
5249 * This is the exported form of vm_map_delete.
5250 */
5251 kern_return_t
5252 vm_map_remove(
5253 register vm_map_t map,
5254 register vm_map_offset_t start,
5255 register vm_map_offset_t end,
5256 register boolean_t flags)
5257 {
5258 register kern_return_t result;
5259
5260 vm_map_lock(map);
5261 VM_MAP_RANGE_CHECK(map, start, end);
5262 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5263 vm_map_unlock(map);
5264
5265 return(result);
5266 }
5267
5268
5269 /*
5270 * Routine: vm_map_copy_discard
5271 *
5272 * Description:
5273 * Dispose of a map copy object (returned by
5274 * vm_map_copyin).
5275 */
5276 void
5277 vm_map_copy_discard(
5278 vm_map_copy_t copy)
5279 {
5280 if (copy == VM_MAP_COPY_NULL)
5281 return;
5282
5283 switch (copy->type) {
5284 case VM_MAP_COPY_ENTRY_LIST:
5285 while (vm_map_copy_first_entry(copy) !=
5286 vm_map_copy_to_entry(copy)) {
5287 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5288
5289 vm_map_copy_entry_unlink(copy, entry);
5290 vm_object_deallocate(entry->object.vm_object);
5291 vm_map_copy_entry_dispose(copy, entry);
5292 }
5293 break;
5294 case VM_MAP_COPY_OBJECT:
5295 vm_object_deallocate(copy->cpy_object);
5296 break;
5297 case VM_MAP_COPY_KERNEL_BUFFER:
5298
5299 /*
5300 * The vm_map_copy_t and possibly the data buffer were
5301 * allocated by a single call to kalloc(), i.e. the
5302 * vm_map_copy_t was not allocated out of the zone.
5303 */
5304 kfree(copy, copy->cpy_kalloc_size);
5305 return;
5306 }
5307 zfree(vm_map_copy_zone, copy);
5308 }
5309
5310 /*
5311 * Routine: vm_map_copy_copy
5312 *
5313 * Description:
5314 * Move the information in a map copy object to
5315 * a new map copy object, leaving the old one
5316 * empty.
5317 *
5318 * This is used by kernel routines that need
5319 * to look at out-of-line data (in copyin form)
5320 * before deciding whether to return SUCCESS.
5321 * If the routine returns FAILURE, the original
5322 * copy object will be deallocated; therefore,
5323 * these routines must make a copy of the copy
5324 * object and leave the original empty so that
5325 * deallocation will not fail.
5326 */
5327 vm_map_copy_t
5328 vm_map_copy_copy(
5329 vm_map_copy_t copy)
5330 {
5331 vm_map_copy_t new_copy;
5332
5333 if (copy == VM_MAP_COPY_NULL)
5334 return VM_MAP_COPY_NULL;
5335
5336 /*
5337 * Allocate a new copy object, and copy the information
5338 * from the old one into it.
5339 */
5340
5341 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5342 *new_copy = *copy;
5343
5344 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5345 /*
5346 * The links in the entry chain must be
5347 * changed to point to the new copy object.
5348 */
5349 vm_map_copy_first_entry(copy)->vme_prev
5350 = vm_map_copy_to_entry(new_copy);
5351 vm_map_copy_last_entry(copy)->vme_next
5352 = vm_map_copy_to_entry(new_copy);
5353 }
5354
5355 /*
5356 * Change the old copy object into one that contains
5357 * nothing to be deallocated.
5358 */
5359 copy->type = VM_MAP_COPY_OBJECT;
5360 copy->cpy_object = VM_OBJECT_NULL;
5361
5362 /*
5363 * Return the new object.
5364 */
5365 return new_copy;
5366 }
5367
5368 static kern_return_t
5369 vm_map_overwrite_submap_recurse(
5370 vm_map_t dst_map,
5371 vm_map_offset_t dst_addr,
5372 vm_map_size_t dst_size)
5373 {
5374 vm_map_offset_t dst_end;
5375 vm_map_entry_t tmp_entry;
5376 vm_map_entry_t entry;
5377 kern_return_t result;
5378 boolean_t encountered_sub_map = FALSE;
5379
5380
5381
5382 /*
5383 * Verify that the destination is all writeable
5384 * initially. We have to trunc the destination
5385 * address and round the copy size or we'll end up
5386 * splitting entries in strange ways.
5387 */
5388
5389 dst_end = vm_map_round_page(dst_addr + dst_size);
5390 vm_map_lock(dst_map);
5391
5392 start_pass_1:
5393 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5394 vm_map_unlock(dst_map);
5395 return(KERN_INVALID_ADDRESS);
5396 }
5397
5398 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5399 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5400
5401 for (entry = tmp_entry;;) {
5402 vm_map_entry_t next;
5403
5404 next = entry->vme_next;
5405 while(entry->is_sub_map) {
5406 vm_map_offset_t sub_start;
5407 vm_map_offset_t sub_end;
5408 vm_map_offset_t local_end;
5409
5410 if (entry->in_transition) {
5411 /*
5412 * Say that we are waiting, and wait for entry.
5413 */
5414 entry->needs_wakeup = TRUE;
5415 vm_map_entry_wait(dst_map, THREAD_UNINT);
5416
5417 goto start_pass_1;
5418 }
5419
5420 encountered_sub_map = TRUE;
5421 sub_start = entry->offset;
5422
5423 if(entry->vme_end < dst_end)
5424 sub_end = entry->vme_end;
5425 else
5426 sub_end = dst_end;
5427 sub_end -= entry->vme_start;
5428 sub_end += entry->offset;
5429 local_end = entry->vme_end;
5430 vm_map_unlock(dst_map);
5431
5432 result = vm_map_overwrite_submap_recurse(
5433 entry->object.sub_map,
5434 sub_start,
5435 sub_end - sub_start);
5436
5437 if(result != KERN_SUCCESS)
5438 return result;
5439 if (dst_end <= entry->vme_end)
5440 return KERN_SUCCESS;
5441 vm_map_lock(dst_map);
5442 if(!vm_map_lookup_entry(dst_map, local_end,
5443 &tmp_entry)) {
5444 vm_map_unlock(dst_map);
5445 return(KERN_INVALID_ADDRESS);
5446 }
5447 entry = tmp_entry;
5448 next = entry->vme_next;
5449 }
5450
5451 if ( ! (entry->protection & VM_PROT_WRITE)) {
5452 vm_map_unlock(dst_map);
5453 return(KERN_PROTECTION_FAILURE);
5454 }
5455
5456 /*
5457 * If the entry is in transition, we must wait
5458 * for it to exit that state. Anything could happen
5459 * when we unlock the map, so start over.
5460 */
5461 if (entry->in_transition) {
5462
5463 /*
5464 * Say that we are waiting, and wait for entry.
5465 */
5466 entry->needs_wakeup = TRUE;
5467 vm_map_entry_wait(dst_map, THREAD_UNINT);
5468
5469 goto start_pass_1;
5470 }
5471
5472 /*
5473 * our range is contained completely within this map entry
5474 */
5475 if (dst_end <= entry->vme_end) {
5476 vm_map_unlock(dst_map);
5477 return KERN_SUCCESS;
5478 }
5479 /*
5480 * check that range specified is contiguous region
5481 */
5482 if ((next == vm_map_to_entry(dst_map)) ||
5483 (next->vme_start != entry->vme_end)) {
5484 vm_map_unlock(dst_map);
5485 return(KERN_INVALID_ADDRESS);
5486 }
5487
5488 /*
5489 * Check for permanent objects in the destination.
5490 */
5491 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5492 ((!entry->object.vm_object->internal) ||
5493 (entry->object.vm_object->true_share))) {
5494 if(encountered_sub_map) {
5495 vm_map_unlock(dst_map);
5496 return(KERN_FAILURE);
5497 }
5498 }
5499
5500
5501 entry = next;
5502 }/* for */
5503 vm_map_unlock(dst_map);
5504 return(KERN_SUCCESS);
5505 }
5506
5507 /*
5508 * Routine: vm_map_copy_overwrite
5509 *
5510 * Description:
5511 * Copy the memory described by the map copy
5512 * object (copy; returned by vm_map_copyin) onto
5513 * the specified destination region (dst_map, dst_addr).
5514 * The destination must be writeable.
5515 *
5516 * Unlike vm_map_copyout, this routine actually
5517 * writes over previously-mapped memory. If the
5518 * previous mapping was to a permanent (user-supplied)
5519 * memory object, it is preserved.
5520 *
5521 * The attributes (protection and inheritance) of the
5522 * destination region are preserved.
5523 *
5524 * If successful, consumes the copy object.
5525 * Otherwise, the caller is responsible for it.
5526 *
5527 * Implementation notes:
5528 * To overwrite aligned temporary virtual memory, it is
5529 * sufficient to remove the previous mapping and insert
5530 * the new copy. This replacement is done either on
5531 * the whole region (if no permanent virtual memory
5532 * objects are embedded in the destination region) or
5533 * in individual map entries.
5534 *
5535 * To overwrite permanent virtual memory , it is necessary
5536 * to copy each page, as the external memory management
5537 * interface currently does not provide any optimizations.
5538 *
5539 * Unaligned memory also has to be copied. It is possible
5540 * to use 'vm_trickery' to copy the aligned data. This is
5541 * not done but not hard to implement.
5542 *
5543 * Once a page of permanent memory has been overwritten,
5544 * it is impossible to interrupt this function; otherwise,
5545 * the call would be neither atomic nor location-independent.
5546 * The kernel-state portion of a user thread must be
5547 * interruptible.
5548 *
5549 * It may be expensive to forward all requests that might
5550 * overwrite permanent memory (vm_write, vm_copy) to
5551 * uninterruptible kernel threads. This routine may be
5552 * called by interruptible threads; however, success is
5553 * not guaranteed -- if the request cannot be performed
5554 * atomically and interruptibly, an error indication is
5555 * returned.
5556 */
5557
5558 static kern_return_t
5559 vm_map_copy_overwrite_nested(
5560 vm_map_t dst_map,
5561 vm_map_address_t dst_addr,
5562 vm_map_copy_t copy,
5563 boolean_t interruptible,
5564 pmap_t pmap)
5565 {
5566 vm_map_offset_t dst_end;
5567 vm_map_entry_t tmp_entry;
5568 vm_map_entry_t entry;
5569 kern_return_t kr;
5570 boolean_t aligned = TRUE;
5571 boolean_t contains_permanent_objects = FALSE;
5572 boolean_t encountered_sub_map = FALSE;
5573 vm_map_offset_t base_addr;
5574 vm_map_size_t copy_size;
5575 vm_map_size_t total_size;
5576
5577
5578 /*
5579 * Check for null copy object.
5580 */
5581
5582 if (copy == VM_MAP_COPY_NULL)
5583 return(KERN_SUCCESS);
5584
5585 /*
5586 * Check for special kernel buffer allocated
5587 * by new_ipc_kmsg_copyin.
5588 */
5589
5590 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5591 return(vm_map_copyout_kernel_buffer(
5592 dst_map, &dst_addr,
5593 copy, TRUE));
5594 }
5595
5596 /*
5597 * Only works for entry lists at the moment. Will
5598 * support page lists later.
5599 */
5600
5601 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5602
5603 if (copy->size == 0) {
5604 vm_map_copy_discard(copy);
5605 return(KERN_SUCCESS);
5606 }
5607
5608 /*
5609 * Verify that the destination is all writeable
5610 * initially. We have to trunc the destination
5611 * address and round the copy size or we'll end up
5612 * splitting entries in strange ways.
5613 */
5614
5615 if (!page_aligned(copy->size) ||
5616 !page_aligned (copy->offset) ||
5617 !page_aligned (dst_addr))
5618 {
5619 aligned = FALSE;
5620 dst_end = vm_map_round_page(dst_addr + copy->size);
5621 } else {
5622 dst_end = dst_addr + copy->size;
5623 }
5624
5625 vm_map_lock(dst_map);
5626
5627 /* LP64todo - remove this check when vm_map_commpage64()
5628 * no longer has to stuff in a map_entry for the commpage
5629 * above the map's max_offset.
5630 */
5631 if (dst_addr >= dst_map->max_offset) {
5632 vm_map_unlock(dst_map);
5633 return(KERN_INVALID_ADDRESS);
5634 }
5635
5636 start_pass_1:
5637 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5638 vm_map_unlock(dst_map);
5639 return(KERN_INVALID_ADDRESS);
5640 }
5641 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5642 for (entry = tmp_entry;;) {
5643 vm_map_entry_t next = entry->vme_next;
5644
5645 while(entry->is_sub_map) {
5646 vm_map_offset_t sub_start;
5647 vm_map_offset_t sub_end;
5648 vm_map_offset_t local_end;
5649
5650 if (entry->in_transition) {
5651
5652 /*
5653 * Say that we are waiting, and wait for entry.
5654 */
5655 entry->needs_wakeup = TRUE;
5656 vm_map_entry_wait(dst_map, THREAD_UNINT);
5657
5658 goto start_pass_1;
5659 }
5660
5661 local_end = entry->vme_end;
5662 if (!(entry->needs_copy)) {
5663 /* if needs_copy we are a COW submap */
5664 /* in such a case we just replace so */
5665 /* there is no need for the follow- */
5666 /* ing check. */
5667 encountered_sub_map = TRUE;
5668 sub_start = entry->offset;
5669
5670 if(entry->vme_end < dst_end)
5671 sub_end = entry->vme_end;
5672 else
5673 sub_end = dst_end;
5674 sub_end -= entry->vme_start;
5675 sub_end += entry->offset;
5676 vm_map_unlock(dst_map);
5677
5678 kr = vm_map_overwrite_submap_recurse(
5679 entry->object.sub_map,
5680 sub_start,
5681 sub_end - sub_start);
5682 if(kr != KERN_SUCCESS)
5683 return kr;
5684 vm_map_lock(dst_map);
5685 }
5686
5687 if (dst_end <= entry->vme_end)
5688 goto start_overwrite;
5689 if(!vm_map_lookup_entry(dst_map, local_end,
5690 &entry)) {
5691 vm_map_unlock(dst_map);
5692 return(KERN_INVALID_ADDRESS);
5693 }
5694 next = entry->vme_next;
5695 }
5696
5697 if ( ! (entry->protection & VM_PROT_WRITE)) {
5698 vm_map_unlock(dst_map);
5699 return(KERN_PROTECTION_FAILURE);
5700 }
5701
5702 /*
5703 * If the entry is in transition, we must wait
5704 * for it to exit that state. Anything could happen
5705 * when we unlock the map, so start over.
5706 */
5707 if (entry->in_transition) {
5708
5709 /*
5710 * Say that we are waiting, and wait for entry.
5711 */
5712 entry->needs_wakeup = TRUE;
5713 vm_map_entry_wait(dst_map, THREAD_UNINT);
5714
5715 goto start_pass_1;
5716 }
5717
5718 /*
5719 * our range is contained completely within this map entry
5720 */
5721 if (dst_end <= entry->vme_end)
5722 break;
5723 /*
5724 * check that range specified is contiguous region
5725 */
5726 if ((next == vm_map_to_entry(dst_map)) ||
5727 (next->vme_start != entry->vme_end)) {
5728 vm_map_unlock(dst_map);
5729 return(KERN_INVALID_ADDRESS);
5730 }
5731
5732
5733 /*
5734 * Check for permanent objects in the destination.
5735 */
5736 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5737 ((!entry->object.vm_object->internal) ||
5738 (entry->object.vm_object->true_share))) {
5739 contains_permanent_objects = TRUE;
5740 }
5741
5742 entry = next;
5743 }/* for */
5744
5745 start_overwrite:
5746 /*
5747 * If there are permanent objects in the destination, then
5748 * the copy cannot be interrupted.
5749 */
5750
5751 if (interruptible && contains_permanent_objects) {
5752 vm_map_unlock(dst_map);
5753 return(KERN_FAILURE); /* XXX */
5754 }
5755
5756 /*
5757 *
5758 * Make a second pass, overwriting the data
5759 * At the beginning of each loop iteration,
5760 * the next entry to be overwritten is "tmp_entry"
5761 * (initially, the value returned from the lookup above),
5762 * and the starting address expected in that entry
5763 * is "start".
5764 */
5765
5766 total_size = copy->size;
5767 if(encountered_sub_map) {
5768 copy_size = 0;
5769 /* re-calculate tmp_entry since we've had the map */
5770 /* unlocked */
5771 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5772 vm_map_unlock(dst_map);
5773 return(KERN_INVALID_ADDRESS);
5774 }
5775 } else {
5776 copy_size = copy->size;
5777 }
5778
5779 base_addr = dst_addr;
5780 while(TRUE) {
5781 /* deconstruct the copy object and do in parts */
5782 /* only in sub_map, interruptable case */
5783 vm_map_entry_t copy_entry;
5784 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
5785 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
5786 int nentries;
5787 int remaining_entries = 0;
5788 vm_map_offset_t new_offset = 0;
5789
5790 for (entry = tmp_entry; copy_size == 0;) {
5791 vm_map_entry_t next;
5792
5793 next = entry->vme_next;
5794
5795 /* tmp_entry and base address are moved along */
5796 /* each time we encounter a sub-map. Otherwise */
5797 /* entry can outpase tmp_entry, and the copy_size */
5798 /* may reflect the distance between them */
5799 /* if the current entry is found to be in transition */
5800 /* we will start over at the beginning or the last */
5801 /* encounter of a submap as dictated by base_addr */
5802 /* we will zero copy_size accordingly. */
5803 if (entry->in_transition) {
5804 /*
5805 * Say that we are waiting, and wait for entry.
5806 */
5807 entry->needs_wakeup = TRUE;
5808 vm_map_entry_wait(dst_map, THREAD_UNINT);
5809
5810 if(!vm_map_lookup_entry(dst_map, base_addr,
5811 &tmp_entry)) {
5812 vm_map_unlock(dst_map);
5813 return(KERN_INVALID_ADDRESS);
5814 }
5815 copy_size = 0;
5816 entry = tmp_entry;
5817 continue;
5818 }
5819 if(entry->is_sub_map) {
5820 vm_map_offset_t sub_start;
5821 vm_map_offset_t sub_end;
5822 vm_map_offset_t local_end;
5823
5824 if (entry->needs_copy) {
5825 /* if this is a COW submap */
5826 /* just back the range with a */
5827 /* anonymous entry */
5828 if(entry->vme_end < dst_end)
5829 sub_end = entry->vme_end;
5830 else
5831 sub_end = dst_end;
5832 if(entry->vme_start < base_addr)
5833 sub_start = base_addr;
5834 else
5835 sub_start = entry->vme_start;
5836 vm_map_clip_end(
5837 dst_map, entry, sub_end);
5838 vm_map_clip_start(
5839 dst_map, entry, sub_start);
5840 assert(!entry->use_pmap);
5841 entry->is_sub_map = FALSE;
5842 vm_map_deallocate(
5843 entry->object.sub_map);
5844 entry->object.sub_map = NULL;
5845 entry->is_shared = FALSE;
5846 entry->needs_copy = FALSE;
5847 entry->offset = 0;
5848 /*
5849 * XXX FBDP
5850 * We should propagate the protections
5851 * of the submap entry here instead
5852 * of forcing them to VM_PROT_ALL...
5853 * Or better yet, we should inherit
5854 * the protection of the copy_entry.
5855 */
5856 entry->protection = VM_PROT_ALL;
5857 entry->max_protection = VM_PROT_ALL;
5858 entry->wired_count = 0;
5859 entry->user_wired_count = 0;
5860 if(entry->inheritance
5861 == VM_INHERIT_SHARE)
5862 entry->inheritance = VM_INHERIT_COPY;
5863 continue;
5864 }
5865 /* first take care of any non-sub_map */
5866 /* entries to send */
5867 if(base_addr < entry->vme_start) {
5868 /* stuff to send */
5869 copy_size =
5870 entry->vme_start - base_addr;
5871 break;
5872 }
5873 sub_start = entry->offset;
5874
5875 if(entry->vme_end < dst_end)
5876 sub_end = entry->vme_end;
5877 else
5878 sub_end = dst_end;
5879 sub_end -= entry->vme_start;
5880 sub_end += entry->offset;
5881 local_end = entry->vme_end;
5882 vm_map_unlock(dst_map);
5883 copy_size = sub_end - sub_start;
5884
5885 /* adjust the copy object */
5886 if (total_size > copy_size) {
5887 vm_map_size_t local_size = 0;
5888 vm_map_size_t entry_size;
5889
5890 nentries = 1;
5891 new_offset = copy->offset;
5892 copy_entry = vm_map_copy_first_entry(copy);
5893 while(copy_entry !=
5894 vm_map_copy_to_entry(copy)){
5895 entry_size = copy_entry->vme_end -
5896 copy_entry->vme_start;
5897 if((local_size < copy_size) &&
5898 ((local_size + entry_size)
5899 >= copy_size)) {
5900 vm_map_copy_clip_end(copy,
5901 copy_entry,
5902 copy_entry->vme_start +
5903 (copy_size - local_size));
5904 entry_size = copy_entry->vme_end -
5905 copy_entry->vme_start;
5906 local_size += entry_size;
5907 new_offset += entry_size;
5908 }
5909 if(local_size >= copy_size) {
5910 next_copy = copy_entry->vme_next;
5911 copy_entry->vme_next =
5912 vm_map_copy_to_entry(copy);
5913 previous_prev =
5914 copy->cpy_hdr.links.prev;
5915 copy->cpy_hdr.links.prev = copy_entry;
5916 copy->size = copy_size;
5917 remaining_entries =
5918 copy->cpy_hdr.nentries;
5919 remaining_entries -= nentries;
5920 copy->cpy_hdr.nentries = nentries;
5921 break;
5922 } else {
5923 local_size += entry_size;
5924 new_offset += entry_size;
5925 nentries++;
5926 }
5927 copy_entry = copy_entry->vme_next;
5928 }
5929 }
5930
5931 if((entry->use_pmap) && (pmap == NULL)) {
5932 kr = vm_map_copy_overwrite_nested(
5933 entry->object.sub_map,
5934 sub_start,
5935 copy,
5936 interruptible,
5937 entry->object.sub_map->pmap);
5938 } else if (pmap != NULL) {
5939 kr = vm_map_copy_overwrite_nested(
5940 entry->object.sub_map,
5941 sub_start,
5942 copy,
5943 interruptible, pmap);
5944 } else {
5945 kr = vm_map_copy_overwrite_nested(
5946 entry->object.sub_map,
5947 sub_start,
5948 copy,
5949 interruptible,
5950 dst_map->pmap);
5951 }
5952 if(kr != KERN_SUCCESS) {
5953 if(next_copy != NULL) {
5954 copy->cpy_hdr.nentries +=
5955 remaining_entries;
5956 copy->cpy_hdr.links.prev->vme_next =
5957 next_copy;
5958 copy->cpy_hdr.links.prev
5959 = previous_prev;
5960 copy->size = total_size;
5961 }
5962 return kr;
5963 }
5964 if (dst_end <= local_end) {
5965 return(KERN_SUCCESS);
5966 }
5967 /* otherwise copy no longer exists, it was */
5968 /* destroyed after successful copy_overwrite */
5969 copy = (vm_map_copy_t)
5970 zalloc(vm_map_copy_zone);
5971 vm_map_copy_first_entry(copy) =
5972 vm_map_copy_last_entry(copy) =
5973 vm_map_copy_to_entry(copy);
5974 copy->type = VM_MAP_COPY_ENTRY_LIST;
5975 copy->offset = new_offset;
5976
5977 total_size -= copy_size;
5978 copy_size = 0;
5979 /* put back remainder of copy in container */
5980 if(next_copy != NULL) {
5981 copy->cpy_hdr.nentries = remaining_entries;
5982 copy->cpy_hdr.links.next = next_copy;
5983 copy->cpy_hdr.links.prev = previous_prev;
5984 copy->size = total_size;
5985 next_copy->vme_prev =
5986 vm_map_copy_to_entry(copy);
5987 next_copy = NULL;
5988 }
5989 base_addr = local_end;
5990 vm_map_lock(dst_map);
5991 if(!vm_map_lookup_entry(dst_map,
5992 local_end, &tmp_entry)) {
5993 vm_map_unlock(dst_map);
5994 return(KERN_INVALID_ADDRESS);
5995 }
5996 entry = tmp_entry;
5997 continue;
5998 }
5999 if (dst_end <= entry->vme_end) {
6000 copy_size = dst_end - base_addr;
6001 break;
6002 }
6003
6004 if ((next == vm_map_to_entry(dst_map)) ||
6005 (next->vme_start != entry->vme_end)) {
6006 vm_map_unlock(dst_map);
6007 return(KERN_INVALID_ADDRESS);
6008 }
6009
6010 entry = next;
6011 }/* for */
6012
6013 next_copy = NULL;
6014 nentries = 1;
6015
6016 /* adjust the copy object */
6017 if (total_size > copy_size) {
6018 vm_map_size_t local_size = 0;
6019 vm_map_size_t entry_size;
6020
6021 new_offset = copy->offset;
6022 copy_entry = vm_map_copy_first_entry(copy);
6023 while(copy_entry != vm_map_copy_to_entry(copy)) {
6024 entry_size = copy_entry->vme_end -
6025 copy_entry->vme_start;
6026 if((local_size < copy_size) &&
6027 ((local_size + entry_size)
6028 >= copy_size)) {
6029 vm_map_copy_clip_end(copy, copy_entry,
6030 copy_entry->vme_start +
6031 (copy_size - local_size));
6032 entry_size = copy_entry->vme_end -
6033 copy_entry->vme_start;
6034 local_size += entry_size;
6035 new_offset += entry_size;
6036 }
6037 if(local_size >= copy_size) {
6038 next_copy = copy_entry->vme_next;
6039 copy_entry->vme_next =
6040 vm_map_copy_to_entry(copy);
6041 previous_prev =
6042 copy->cpy_hdr.links.prev;
6043 copy->cpy_hdr.links.prev = copy_entry;
6044 copy->size = copy_size;
6045 remaining_entries =
6046 copy->cpy_hdr.nentries;
6047 remaining_entries -= nentries;
6048 copy->cpy_hdr.nentries = nentries;
6049 break;
6050 } else {
6051 local_size += entry_size;
6052 new_offset += entry_size;
6053 nentries++;
6054 }
6055 copy_entry = copy_entry->vme_next;
6056 }
6057 }
6058
6059 if (aligned) {
6060 pmap_t local_pmap;
6061
6062 if(pmap)
6063 local_pmap = pmap;
6064 else
6065 local_pmap = dst_map->pmap;
6066
6067 if ((kr = vm_map_copy_overwrite_aligned(
6068 dst_map, tmp_entry, copy,
6069 base_addr, local_pmap)) != KERN_SUCCESS) {
6070 if(next_copy != NULL) {
6071 copy->cpy_hdr.nentries +=
6072 remaining_entries;
6073 copy->cpy_hdr.links.prev->vme_next =
6074 next_copy;
6075 copy->cpy_hdr.links.prev =
6076 previous_prev;
6077 copy->size += copy_size;
6078 }
6079 return kr;
6080 }
6081 vm_map_unlock(dst_map);
6082 } else {
6083 /*
6084 * Performance gain:
6085 *
6086 * if the copy and dst address are misaligned but the same
6087 * offset within the page we can copy_not_aligned the
6088 * misaligned parts and copy aligned the rest. If they are
6089 * aligned but len is unaligned we simply need to copy
6090 * the end bit unaligned. We'll need to split the misaligned
6091 * bits of the region in this case !
6092 */
6093 /* ALWAYS UNLOCKS THE dst_map MAP */
6094 if ((kr = vm_map_copy_overwrite_unaligned( dst_map,
6095 tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6096 if(next_copy != NULL) {
6097 copy->cpy_hdr.nentries +=
6098 remaining_entries;
6099 copy->cpy_hdr.links.prev->vme_next =
6100 next_copy;
6101 copy->cpy_hdr.links.prev =
6102 previous_prev;
6103 copy->size += copy_size;
6104 }
6105 return kr;
6106 }
6107 }
6108 total_size -= copy_size;
6109 if(total_size == 0)
6110 break;
6111 base_addr += copy_size;
6112 copy_size = 0;
6113 copy->offset = new_offset;
6114 if(next_copy != NULL) {
6115 copy->cpy_hdr.nentries = remaining_entries;
6116 copy->cpy_hdr.links.next = next_copy;
6117 copy->cpy_hdr.links.prev = previous_prev;
6118 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6119 copy->size = total_size;
6120 }
6121 vm_map_lock(dst_map);
6122 while(TRUE) {
6123 if (!vm_map_lookup_entry(dst_map,
6124 base_addr, &tmp_entry)) {
6125 vm_map_unlock(dst_map);
6126 return(KERN_INVALID_ADDRESS);
6127 }
6128 if (tmp_entry->in_transition) {
6129 entry->needs_wakeup = TRUE;
6130 vm_map_entry_wait(dst_map, THREAD_UNINT);
6131 } else {
6132 break;
6133 }
6134 }
6135 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6136
6137 entry = tmp_entry;
6138 } /* while */
6139
6140 /*
6141 * Throw away the vm_map_copy object
6142 */
6143 vm_map_copy_discard(copy);
6144
6145 return(KERN_SUCCESS);
6146 }/* vm_map_copy_overwrite */
6147
6148 kern_return_t
6149 vm_map_copy_overwrite(
6150 vm_map_t dst_map,
6151 vm_map_offset_t dst_addr,
6152 vm_map_copy_t copy,
6153 boolean_t interruptible)
6154 {
6155 return vm_map_copy_overwrite_nested(
6156 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
6157 }
6158
6159
6160 /*
6161 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
6162 *
6163 * Decription:
6164 * Physically copy unaligned data
6165 *
6166 * Implementation:
6167 * Unaligned parts of pages have to be physically copied. We use
6168 * a modified form of vm_fault_copy (which understands none-aligned
6169 * page offsets and sizes) to do the copy. We attempt to copy as
6170 * much memory in one go as possibly, however vm_fault_copy copies
6171 * within 1 memory object so we have to find the smaller of "amount left"
6172 * "source object data size" and "target object data size". With
6173 * unaligned data we don't need to split regions, therefore the source
6174 * (copy) object should be one map entry, the target range may be split
6175 * over multiple map entries however. In any event we are pessimistic
6176 * about these assumptions.
6177 *
6178 * Assumptions:
6179 * dst_map is locked on entry and is return locked on success,
6180 * unlocked on error.
6181 */
6182
6183 static kern_return_t
6184 vm_map_copy_overwrite_unaligned(
6185 vm_map_t dst_map,
6186 vm_map_entry_t entry,
6187 vm_map_copy_t copy,
6188 vm_map_offset_t start)
6189 {
6190 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
6191 vm_map_version_t version;
6192 vm_object_t dst_object;
6193 vm_object_offset_t dst_offset;
6194 vm_object_offset_t src_offset;
6195 vm_object_offset_t entry_offset;
6196 vm_map_offset_t entry_end;
6197 vm_map_size_t src_size,
6198 dst_size,
6199 copy_size,
6200 amount_left;
6201 kern_return_t kr = KERN_SUCCESS;
6202
6203 vm_map_lock_write_to_read(dst_map);
6204
6205 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6206 amount_left = copy->size;
6207 /*
6208 * unaligned so we never clipped this entry, we need the offset into
6209 * the vm_object not just the data.
6210 */
6211 while (amount_left > 0) {
6212
6213 if (entry == vm_map_to_entry(dst_map)) {
6214 vm_map_unlock_read(dst_map);
6215 return KERN_INVALID_ADDRESS;
6216 }
6217
6218 /* "start" must be within the current map entry */
6219 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6220
6221 dst_offset = start - entry->vme_start;
6222
6223 dst_size = entry->vme_end - start;
6224
6225 src_size = copy_entry->vme_end -
6226 (copy_entry->vme_start + src_offset);
6227
6228 if (dst_size < src_size) {
6229 /*
6230 * we can only copy dst_size bytes before
6231 * we have to get the next destination entry
6232 */
6233 copy_size = dst_size;
6234 } else {
6235 /*
6236 * we can only copy src_size bytes before
6237 * we have to get the next source copy entry
6238 */
6239 copy_size = src_size;
6240 }
6241
6242 if (copy_size > amount_left) {
6243 copy_size = amount_left;
6244 }
6245 /*
6246 * Entry needs copy, create a shadow shadow object for
6247 * Copy on write region.
6248 */
6249 if (entry->needs_copy &&
6250 ((entry->protection & VM_PROT_WRITE) != 0))
6251 {
6252 if (vm_map_lock_read_to_write(dst_map)) {
6253 vm_map_lock_read(dst_map);
6254 goto RetryLookup;
6255 }
6256 vm_object_shadow(&entry->object.vm_object,
6257 &entry->offset,
6258 (vm_map_size_t)(entry->vme_end
6259 - entry->vme_start));
6260 entry->needs_copy = FALSE;
6261 vm_map_lock_write_to_read(dst_map);
6262 }
6263 dst_object = entry->object.vm_object;
6264 /*
6265 * unlike with the virtual (aligned) copy we're going
6266 * to fault on it therefore we need a target object.
6267 */
6268 if (dst_object == VM_OBJECT_NULL) {
6269 if (vm_map_lock_read_to_write(dst_map)) {
6270 vm_map_lock_read(dst_map);
6271 goto RetryLookup;
6272 }
6273 dst_object = vm_object_allocate((vm_map_size_t)
6274 entry->vme_end - entry->vme_start);
6275 entry->object.vm_object = dst_object;
6276 entry->offset = 0;
6277 vm_map_lock_write_to_read(dst_map);
6278 }
6279 /*
6280 * Take an object reference and unlock map. The "entry" may
6281 * disappear or change when the map is unlocked.
6282 */
6283 vm_object_reference(dst_object);
6284 version.main_timestamp = dst_map->timestamp;
6285 entry_offset = entry->offset;
6286 entry_end = entry->vme_end;
6287 vm_map_unlock_read(dst_map);
6288 /*
6289 * Copy as much as possible in one pass
6290 */
6291 kr = vm_fault_copy(
6292 copy_entry->object.vm_object,
6293 copy_entry->offset + src_offset,
6294 &copy_size,
6295 dst_object,
6296 entry_offset + dst_offset,
6297 dst_map,
6298 &version,
6299 THREAD_UNINT );
6300
6301 start += copy_size;
6302 src_offset += copy_size;
6303 amount_left -= copy_size;
6304 /*
6305 * Release the object reference
6306 */
6307 vm_object_deallocate(dst_object);
6308 /*
6309 * If a hard error occurred, return it now
6310 */
6311 if (kr != KERN_SUCCESS)
6312 return kr;
6313
6314 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6315 || amount_left == 0)
6316 {
6317 /*
6318 * all done with this copy entry, dispose.
6319 */
6320 vm_map_copy_entry_unlink(copy, copy_entry);
6321 vm_object_deallocate(copy_entry->object.vm_object);
6322 vm_map_copy_entry_dispose(copy, copy_entry);
6323
6324 if ((copy_entry = vm_map_copy_first_entry(copy))
6325 == vm_map_copy_to_entry(copy) && amount_left) {
6326 /*
6327 * not finished copying but run out of source
6328 */
6329 return KERN_INVALID_ADDRESS;
6330 }
6331 src_offset = 0;
6332 }
6333
6334 if (amount_left == 0)
6335 return KERN_SUCCESS;
6336
6337 vm_map_lock_read(dst_map);
6338 if (version.main_timestamp == dst_map->timestamp) {
6339 if (start == entry_end) {
6340 /*
6341 * destination region is split. Use the version
6342 * information to avoid a lookup in the normal
6343 * case.
6344 */
6345 entry = entry->vme_next;
6346 /*
6347 * should be contiguous. Fail if we encounter
6348 * a hole in the destination.
6349 */
6350 if (start != entry->vme_start) {
6351 vm_map_unlock_read(dst_map);
6352 return KERN_INVALID_ADDRESS ;
6353 }
6354 }
6355 } else {
6356 /*
6357 * Map version check failed.
6358 * we must lookup the entry because somebody
6359 * might have changed the map behind our backs.
6360 */
6361 RetryLookup:
6362 if (!vm_map_lookup_entry(dst_map, start, &entry))
6363 {
6364 vm_map_unlock_read(dst_map);
6365 return KERN_INVALID_ADDRESS ;
6366 }
6367 }
6368 }/* while */
6369
6370 return KERN_SUCCESS;
6371 }/* vm_map_copy_overwrite_unaligned */
6372
6373 /*
6374 * Routine: vm_map_copy_overwrite_aligned [internal use only]
6375 *
6376 * Description:
6377 * Does all the vm_trickery possible for whole pages.
6378 *
6379 * Implementation:
6380 *
6381 * If there are no permanent objects in the destination,
6382 * and the source and destination map entry zones match,
6383 * and the destination map entry is not shared,
6384 * then the map entries can be deleted and replaced
6385 * with those from the copy. The following code is the
6386 * basic idea of what to do, but there are lots of annoying
6387 * little details about getting protection and inheritance
6388 * right. Should add protection, inheritance, and sharing checks
6389 * to the above pass and make sure that no wiring is involved.
6390 */
6391
6392 static kern_return_t
6393 vm_map_copy_overwrite_aligned(
6394 vm_map_t dst_map,
6395 vm_map_entry_t tmp_entry,
6396 vm_map_copy_t copy,
6397 vm_map_offset_t start,
6398 __unused pmap_t pmap)
6399 {
6400 vm_object_t object;
6401 vm_map_entry_t copy_entry;
6402 vm_map_size_t copy_size;
6403 vm_map_size_t size;
6404 vm_map_entry_t entry;
6405
6406 while ((copy_entry = vm_map_copy_first_entry(copy))
6407 != vm_map_copy_to_entry(copy))
6408 {
6409 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6410
6411 entry = tmp_entry;
6412 assert(!entry->use_pmap); /* unnested when clipped earlier */
6413 if (entry == vm_map_to_entry(dst_map)) {
6414 vm_map_unlock(dst_map);
6415 return KERN_INVALID_ADDRESS;
6416 }
6417 size = (entry->vme_end - entry->vme_start);
6418 /*
6419 * Make sure that no holes popped up in the
6420 * address map, and that the protection is
6421 * still valid, in case the map was unlocked
6422 * earlier.
6423 */
6424
6425 if ((entry->vme_start != start) || ((entry->is_sub_map)
6426 && !entry->needs_copy)) {
6427 vm_map_unlock(dst_map);
6428 return(KERN_INVALID_ADDRESS);
6429 }
6430 assert(entry != vm_map_to_entry(dst_map));
6431
6432 /*
6433 * Check protection again
6434 */
6435
6436 if ( ! (entry->protection & VM_PROT_WRITE)) {
6437 vm_map_unlock(dst_map);
6438 return(KERN_PROTECTION_FAILURE);
6439 }
6440
6441 /*
6442 * Adjust to source size first
6443 */
6444
6445 if (copy_size < size) {
6446 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6447 size = copy_size;
6448 }
6449
6450 /*
6451 * Adjust to destination size
6452 */
6453
6454 if (size < copy_size) {
6455 vm_map_copy_clip_end(copy, copy_entry,
6456 copy_entry->vme_start + size);
6457 copy_size = size;
6458 }
6459
6460 assert((entry->vme_end - entry->vme_start) == size);
6461 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6462 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6463
6464 /*
6465 * If the destination contains temporary unshared memory,
6466 * we can perform the copy by throwing it away and
6467 * installing the source data.
6468 */
6469
6470 object = entry->object.vm_object;
6471 if ((!entry->is_shared &&
6472 ((object == VM_OBJECT_NULL) ||
6473 (object->internal && !object->true_share))) ||
6474 entry->needs_copy) {
6475 vm_object_t old_object = entry->object.vm_object;
6476 vm_object_offset_t old_offset = entry->offset;
6477 vm_object_offset_t offset;
6478
6479 /*
6480 * Ensure that the source and destination aren't
6481 * identical
6482 */
6483 if (old_object == copy_entry->object.vm_object &&
6484 old_offset == copy_entry->offset) {
6485 vm_map_copy_entry_unlink(copy, copy_entry);
6486 vm_map_copy_entry_dispose(copy, copy_entry);
6487
6488 if (old_object != VM_OBJECT_NULL)
6489 vm_object_deallocate(old_object);
6490
6491 start = tmp_entry->vme_end;
6492 tmp_entry = tmp_entry->vme_next;
6493 continue;
6494 }
6495
6496 if (old_object != VM_OBJECT_NULL) {
6497 if(entry->is_sub_map) {
6498 if(entry->use_pmap) {
6499 #ifndef NO_NESTED_PMAP
6500 pmap_unnest(dst_map->pmap,
6501 (addr64_t)entry->vme_start,
6502 entry->vme_end - entry->vme_start);
6503 #endif /* NO_NESTED_PMAP */
6504 if(dst_map->mapped) {
6505 /* clean up parent */
6506 /* map/maps */
6507 vm_map_submap_pmap_clean(
6508 dst_map, entry->vme_start,
6509 entry->vme_end,
6510 entry->object.sub_map,
6511 entry->offset);
6512 }
6513 } else {
6514 vm_map_submap_pmap_clean(
6515 dst_map, entry->vme_start,
6516 entry->vme_end,
6517 entry->object.sub_map,
6518 entry->offset);
6519 }
6520 vm_map_deallocate(
6521 entry->object.sub_map);
6522 } else {
6523 if(dst_map->mapped) {
6524 vm_object_pmap_protect(
6525 entry->object.vm_object,
6526 entry->offset,
6527 entry->vme_end
6528 - entry->vme_start,
6529 PMAP_NULL,
6530 entry->vme_start,
6531 VM_PROT_NONE);
6532 } else {
6533 pmap_remove(dst_map->pmap,
6534 (addr64_t)(entry->vme_start),
6535 (addr64_t)(entry->vme_end));
6536 }
6537 vm_object_deallocate(old_object);
6538 }
6539 }
6540
6541 entry->is_sub_map = FALSE;
6542 entry->object = copy_entry->object;
6543 object = entry->object.vm_object;
6544 entry->needs_copy = copy_entry->needs_copy;
6545 entry->wired_count = 0;
6546 entry->user_wired_count = 0;
6547 offset = entry->offset = copy_entry->offset;
6548
6549 vm_map_copy_entry_unlink(copy, copy_entry);
6550 vm_map_copy_entry_dispose(copy, copy_entry);
6551
6552 /*
6553 * we could try to push pages into the pmap at this point, BUT
6554 * this optimization only saved on average 2 us per page if ALL
6555 * the pages in the source were currently mapped
6556 * and ALL the pages in the dest were touched, if there were fewer
6557 * than 2/3 of the pages touched, this optimization actually cost more cycles
6558 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6559 */
6560
6561 /*
6562 * Set up for the next iteration. The map
6563 * has not been unlocked, so the next
6564 * address should be at the end of this
6565 * entry, and the next map entry should be
6566 * the one following it.
6567 */
6568
6569 start = tmp_entry->vme_end;
6570 tmp_entry = tmp_entry->vme_next;
6571 } else {
6572 vm_map_version_t version;
6573 vm_object_t dst_object = entry->object.vm_object;
6574 vm_object_offset_t dst_offset = entry->offset;
6575 kern_return_t r;
6576
6577 /*
6578 * Take an object reference, and record
6579 * the map version information so that the
6580 * map can be safely unlocked.
6581 */
6582
6583 vm_object_reference(dst_object);
6584
6585 /* account for unlock bumping up timestamp */
6586 version.main_timestamp = dst_map->timestamp + 1;
6587
6588 vm_map_unlock(dst_map);
6589
6590 /*
6591 * Copy as much as possible in one pass
6592 */
6593
6594 copy_size = size;
6595 r = vm_fault_copy(
6596 copy_entry->object.vm_object,
6597 copy_entry->offset,
6598 &copy_size,
6599 dst_object,
6600 dst_offset,
6601 dst_map,
6602 &version,
6603 THREAD_UNINT );
6604
6605 /*
6606 * Release the object reference
6607 */
6608
6609 vm_object_deallocate(dst_object);
6610
6611 /*
6612 * If a hard error occurred, return it now
6613 */
6614
6615 if (r != KERN_SUCCESS)
6616 return(r);
6617
6618 if (copy_size != 0) {
6619 /*
6620 * Dispose of the copied region
6621 */
6622
6623 vm_map_copy_clip_end(copy, copy_entry,
6624 copy_entry->vme_start + copy_size);
6625 vm_map_copy_entry_unlink(copy, copy_entry);
6626 vm_object_deallocate(copy_entry->object.vm_object);
6627 vm_map_copy_entry_dispose(copy, copy_entry);
6628 }
6629
6630 /*
6631 * Pick up in the destination map where we left off.
6632 *
6633 * Use the version information to avoid a lookup
6634 * in the normal case.
6635 */
6636
6637 start += copy_size;
6638 vm_map_lock(dst_map);
6639 if (version.main_timestamp == dst_map->timestamp) {
6640 /* We can safely use saved tmp_entry value */
6641
6642 vm_map_clip_end(dst_map, tmp_entry, start);
6643 tmp_entry = tmp_entry->vme_next;
6644 } else {
6645 /* Must do lookup of tmp_entry */
6646
6647 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6648 vm_map_unlock(dst_map);
6649 return(KERN_INVALID_ADDRESS);
6650 }
6651 vm_map_clip_start(dst_map, tmp_entry, start);
6652 }
6653 }
6654 }/* while */
6655
6656 return(KERN_SUCCESS);
6657 }/* vm_map_copy_overwrite_aligned */
6658
6659 /*
6660 * Routine: vm_map_copyin_kernel_buffer [internal use only]
6661 *
6662 * Description:
6663 * Copy in data to a kernel buffer from space in the
6664 * source map. The original space may be optionally
6665 * deallocated.
6666 *
6667 * If successful, returns a new copy object.
6668 */
6669 static kern_return_t
6670 vm_map_copyin_kernel_buffer(
6671 vm_map_t src_map,
6672 vm_map_offset_t src_addr,
6673 vm_map_size_t len,
6674 boolean_t src_destroy,
6675 vm_map_copy_t *copy_result)
6676 {
6677 kern_return_t kr;
6678 vm_map_copy_t copy;
6679 vm_size_t kalloc_size;
6680
6681 if ((vm_size_t) len != len) {
6682 /* "len" is too big and doesn't fit in a "vm_size_t" */
6683 return KERN_RESOURCE_SHORTAGE;
6684 }
6685 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6686 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6687
6688 copy = (vm_map_copy_t) kalloc(kalloc_size);
6689 if (copy == VM_MAP_COPY_NULL) {
6690 return KERN_RESOURCE_SHORTAGE;
6691 }
6692 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6693 copy->size = len;
6694 copy->offset = 0;
6695 copy->cpy_kdata = (void *) (copy + 1);
6696 copy->cpy_kalloc_size = kalloc_size;
6697
6698 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6699 if (kr != KERN_SUCCESS) {
6700 kfree(copy, kalloc_size);
6701 return kr;
6702 }
6703 if (src_destroy) {
6704 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6705 vm_map_round_page(src_addr + len),
6706 VM_MAP_REMOVE_INTERRUPTIBLE |
6707 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6708 (src_map == kernel_map) ?
6709 VM_MAP_REMOVE_KUNWIRE : 0);
6710 }
6711 *copy_result = copy;
6712 return KERN_SUCCESS;
6713 }
6714
6715 /*
6716 * Routine: vm_map_copyout_kernel_buffer [internal use only]
6717 *
6718 * Description:
6719 * Copy out data from a kernel buffer into space in the
6720 * destination map. The space may be otpionally dynamically
6721 * allocated.
6722 *
6723 * If successful, consumes the copy object.
6724 * Otherwise, the caller is responsible for it.
6725 */
6726 static int vm_map_copyout_kernel_buffer_failures = 0;
6727 static kern_return_t
6728 vm_map_copyout_kernel_buffer(
6729 vm_map_t map,
6730 vm_map_address_t *addr, /* IN/OUT */
6731 vm_map_copy_t copy,
6732 boolean_t overwrite)
6733 {
6734 kern_return_t kr = KERN_SUCCESS;
6735 thread_t thread = current_thread();
6736
6737 if (!overwrite) {
6738
6739 /*
6740 * Allocate space in the target map for the data
6741 */
6742 *addr = 0;
6743 kr = vm_map_enter(map,
6744 addr,
6745 vm_map_round_page(copy->size),
6746 (vm_map_offset_t) 0,
6747 VM_FLAGS_ANYWHERE,
6748 VM_OBJECT_NULL,
6749 (vm_object_offset_t) 0,
6750 FALSE,
6751 VM_PROT_DEFAULT,
6752 VM_PROT_ALL,
6753 VM_INHERIT_DEFAULT);
6754 if (kr != KERN_SUCCESS)
6755 return kr;
6756 }
6757
6758 /*
6759 * Copyout the data from the kernel buffer to the target map.
6760 */
6761 if (thread->map == map) {
6762
6763 /*
6764 * If the target map is the current map, just do
6765 * the copy.
6766 */
6767 assert((vm_size_t) copy->size == copy->size);
6768 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6769 kr = KERN_INVALID_ADDRESS;
6770 }
6771 }
6772 else {
6773 vm_map_t oldmap;
6774
6775 /*
6776 * If the target map is another map, assume the
6777 * target's address space identity for the duration
6778 * of the copy.
6779 */
6780 vm_map_reference(map);
6781 oldmap = vm_map_switch(map);
6782
6783 assert((vm_size_t) copy->size == copy->size);
6784 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6785 vm_map_copyout_kernel_buffer_failures++;
6786 kr = KERN_INVALID_ADDRESS;
6787 }
6788
6789 (void) vm_map_switch(oldmap);
6790 vm_map_deallocate(map);
6791 }
6792
6793 if (kr != KERN_SUCCESS) {
6794 /* the copy failed, clean up */
6795 if (!overwrite) {
6796 /*
6797 * Deallocate the space we allocated in the target map.
6798 */
6799 (void) vm_map_remove(map,
6800 vm_map_trunc_page(*addr),
6801 vm_map_round_page(*addr +
6802 vm_map_round_page(copy->size)),
6803 VM_MAP_NO_FLAGS);
6804 *addr = 0;
6805 }
6806 } else {
6807 /* copy was successful, dicard the copy structure */
6808 kfree(copy, copy->cpy_kalloc_size);
6809 }
6810
6811 return kr;
6812 }
6813
6814 /*
6815 * Macro: vm_map_copy_insert
6816 *
6817 * Description:
6818 * Link a copy chain ("copy") into a map at the
6819 * specified location (after "where").
6820 * Side effects:
6821 * The copy chain is destroyed.
6822 * Warning:
6823 * The arguments are evaluated multiple times.
6824 */
6825 #define vm_map_copy_insert(map, where, copy) \
6826 MACRO_BEGIN \
6827 vm_map_t VMCI_map; \
6828 vm_map_entry_t VMCI_where; \
6829 vm_map_copy_t VMCI_copy; \
6830 VMCI_map = (map); \
6831 VMCI_where = (where); \
6832 VMCI_copy = (copy); \
6833 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6834 ->vme_next = (VMCI_where->vme_next); \
6835 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \
6836 ->vme_prev = VMCI_where; \
6837 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \
6838 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \
6839 zfree(vm_map_copy_zone, VMCI_copy); \
6840 MACRO_END
6841
6842 /*
6843 * Routine: vm_map_copyout
6844 *
6845 * Description:
6846 * Copy out a copy chain ("copy") into newly-allocated
6847 * space in the destination map.
6848 *
6849 * If successful, consumes the copy object.
6850 * Otherwise, the caller is responsible for it.
6851 */
6852 kern_return_t
6853 vm_map_copyout(
6854 vm_map_t dst_map,
6855 vm_map_address_t *dst_addr, /* OUT */
6856 vm_map_copy_t copy)
6857 {
6858 vm_map_size_t size;
6859 vm_map_size_t adjustment;
6860 vm_map_offset_t start;
6861 vm_object_offset_t vm_copy_start;
6862 vm_map_entry_t last;
6863 register
6864 vm_map_entry_t entry;
6865
6866 /*
6867 * Check for null copy object.
6868 */
6869
6870 if (copy == VM_MAP_COPY_NULL) {
6871 *dst_addr = 0;
6872 return(KERN_SUCCESS);
6873 }
6874
6875 /*
6876 * Check for special copy object, created
6877 * by vm_map_copyin_object.
6878 */
6879
6880 if (copy->type == VM_MAP_COPY_OBJECT) {
6881 vm_object_t object = copy->cpy_object;
6882 kern_return_t kr;
6883 vm_object_offset_t offset;
6884
6885 offset = vm_object_trunc_page(copy->offset);
6886 size = vm_map_round_page(copy->size +
6887 (vm_map_size_t)(copy->offset - offset));
6888 *dst_addr = 0;
6889 kr = vm_map_enter(dst_map, dst_addr, size,
6890 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6891 object, offset, FALSE,
6892 VM_PROT_DEFAULT, VM_PROT_ALL,
6893 VM_INHERIT_DEFAULT);
6894 if (kr != KERN_SUCCESS)
6895 return(kr);
6896 /* Account for non-pagealigned copy object */
6897 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6898 zfree(vm_map_copy_zone, copy);
6899 return(KERN_SUCCESS);
6900 }
6901
6902 /*
6903 * Check for special kernel buffer allocated
6904 * by new_ipc_kmsg_copyin.
6905 */
6906
6907 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6908 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6909 copy, FALSE));
6910 }
6911
6912 /*
6913 * Find space for the data
6914 */
6915
6916 vm_copy_start = vm_object_trunc_page(copy->offset);
6917 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6918 - vm_copy_start;
6919
6920 StartAgain: ;
6921
6922 vm_map_lock(dst_map);
6923 assert(first_free_is_valid(dst_map));
6924 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6925 vm_map_min(dst_map) : last->vme_end;
6926
6927 while (TRUE) {
6928 vm_map_entry_t next = last->vme_next;
6929 vm_map_offset_t end = start + size;
6930
6931 if ((end > dst_map->max_offset) || (end < start)) {
6932 if (dst_map->wait_for_space) {
6933 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6934 assert_wait((event_t) dst_map,
6935 THREAD_INTERRUPTIBLE);
6936 vm_map_unlock(dst_map);
6937 thread_block(THREAD_CONTINUE_NULL);
6938 goto StartAgain;
6939 }
6940 }
6941 vm_map_unlock(dst_map);
6942 return(KERN_NO_SPACE);
6943 }
6944
6945 if ((next == vm_map_to_entry(dst_map)) ||
6946 (next->vme_start >= end))
6947 break;
6948
6949 last = next;
6950 start = last->vme_end;
6951 }
6952
6953 /*
6954 * Since we're going to just drop the map
6955 * entries from the copy into the destination
6956 * map, they must come from the same pool.
6957 */
6958
6959 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6960 /*
6961 * Mismatches occur when dealing with the default
6962 * pager.
6963 */
6964 zone_t old_zone;
6965 vm_map_entry_t next, new;
6966
6967 /*
6968 * Find the zone that the copies were allocated from
6969 */
6970 old_zone = (copy->cpy_hdr.entries_pageable)
6971 ? vm_map_entry_zone
6972 : vm_map_kentry_zone;
6973 entry = vm_map_copy_first_entry(copy);
6974
6975 /*
6976 * Reinitialize the copy so that vm_map_copy_entry_link
6977 * will work.
6978 */
6979 copy->cpy_hdr.nentries = 0;
6980 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6981 vm_map_copy_first_entry(copy) =
6982 vm_map_copy_last_entry(copy) =
6983 vm_map_copy_to_entry(copy);
6984
6985 /*
6986 * Copy each entry.
6987 */
6988 while (entry != vm_map_copy_to_entry(copy)) {
6989 new = vm_map_copy_entry_create(copy);
6990 vm_map_entry_copy_full(new, entry);
6991 new->use_pmap = FALSE; /* clr address space specifics */
6992 vm_map_copy_entry_link(copy,
6993 vm_map_copy_last_entry(copy),
6994 new);
6995 next = entry->vme_next;
6996 zfree(old_zone, entry);
6997 entry = next;
6998 }
6999 }
7000
7001 /*
7002 * Adjust the addresses in the copy chain, and
7003 * reset the region attributes.
7004 */
7005
7006 adjustment = start - vm_copy_start;
7007 for (entry = vm_map_copy_first_entry(copy);
7008 entry != vm_map_copy_to_entry(copy);
7009 entry = entry->vme_next) {
7010 entry->vme_start += adjustment;
7011 entry->vme_end += adjustment;
7012
7013 entry->inheritance = VM_INHERIT_DEFAULT;
7014 entry->protection = VM_PROT_DEFAULT;
7015 entry->max_protection = VM_PROT_ALL;
7016 entry->behavior = VM_BEHAVIOR_DEFAULT;
7017
7018 /*
7019 * If the entry is now wired,
7020 * map the pages into the destination map.
7021 */
7022 if (entry->wired_count != 0) {
7023 register vm_map_offset_t va;
7024 vm_object_offset_t offset;
7025 register vm_object_t object;
7026 vm_prot_t prot;
7027 int type_of_fault;
7028
7029 object = entry->object.vm_object;
7030 offset = entry->offset;
7031 va = entry->vme_start;
7032
7033 pmap_pageable(dst_map->pmap,
7034 entry->vme_start,
7035 entry->vme_end,
7036 TRUE);
7037
7038 while (va < entry->vme_end) {
7039 register vm_page_t m;
7040
7041 /*
7042 * Look up the page in the object.
7043 * Assert that the page will be found in the
7044 * top object:
7045 * either
7046 * the object was newly created by
7047 * vm_object_copy_slowly, and has
7048 * copies of all of the pages from
7049 * the source object
7050 * or
7051 * the object was moved from the old
7052 * map entry; because the old map
7053 * entry was wired, all of the pages
7054 * were in the top-level object.
7055 * (XXX not true if we wire pages for
7056 * reading)
7057 */
7058 vm_object_lock(object);
7059
7060 m = vm_page_lookup(object, offset);
7061 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7062 m->absent)
7063 panic("vm_map_copyout: wiring %p", m);
7064
7065 /*
7066 * ENCRYPTED SWAP:
7067 * The page is assumed to be wired here, so it
7068 * shouldn't be encrypted. Otherwise, we
7069 * couldn't enter it in the page table, since
7070 * we don't want the user to see the encrypted
7071 * data.
7072 */
7073 ASSERT_PAGE_DECRYPTED(m);
7074
7075 prot = entry->protection;
7076
7077 if (override_nx(dst_map, entry->alias) && prot)
7078 prot |= VM_PROT_EXECUTE;
7079
7080 type_of_fault = DBG_CACHE_HIT_FAULT;
7081
7082 vm_fault_enter(m, dst_map->pmap, va, prot,
7083 VM_PAGE_WIRED(m), FALSE, FALSE,
7084 &type_of_fault);
7085
7086 vm_object_unlock(object);
7087
7088 offset += PAGE_SIZE_64;
7089 va += PAGE_SIZE;
7090 }
7091 }
7092 }
7093
7094 /*
7095 * Correct the page alignment for the result
7096 */
7097
7098 *dst_addr = start + (copy->offset - vm_copy_start);
7099
7100 /*
7101 * Update the hints and the map size
7102 */
7103
7104 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7105
7106 dst_map->size += size;
7107
7108 /*
7109 * Link in the copy
7110 */
7111
7112 vm_map_copy_insert(dst_map, last, copy);
7113
7114 vm_map_unlock(dst_map);
7115
7116 /*
7117 * XXX If wiring_required, call vm_map_pageable
7118 */
7119
7120 return(KERN_SUCCESS);
7121 }
7122
7123 /*
7124 * Routine: vm_map_copyin
7125 *
7126 * Description:
7127 * see vm_map_copyin_common. Exported via Unsupported.exports.
7128 *
7129 */
7130
7131 #undef vm_map_copyin
7132
7133 kern_return_t
7134 vm_map_copyin(
7135 vm_map_t src_map,
7136 vm_map_address_t src_addr,
7137 vm_map_size_t len,
7138 boolean_t src_destroy,
7139 vm_map_copy_t *copy_result) /* OUT */
7140 {
7141 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7142 FALSE, copy_result, FALSE));
7143 }
7144
7145 /*
7146 * Routine: vm_map_copyin_common
7147 *
7148 * Description:
7149 * Copy the specified region (src_addr, len) from the
7150 * source address space (src_map), possibly removing
7151 * the region from the source address space (src_destroy).
7152 *
7153 * Returns:
7154 * A vm_map_copy_t object (copy_result), suitable for
7155 * insertion into another address space (using vm_map_copyout),
7156 * copying over another address space region (using
7157 * vm_map_copy_overwrite). If the copy is unused, it
7158 * should be destroyed (using vm_map_copy_discard).
7159 *
7160 * In/out conditions:
7161 * The source map should not be locked on entry.
7162 */
7163
7164 typedef struct submap_map {
7165 vm_map_t parent_map;
7166 vm_map_offset_t base_start;
7167 vm_map_offset_t base_end;
7168 vm_map_size_t base_len;
7169 struct submap_map *next;
7170 } submap_map_t;
7171
7172 kern_return_t
7173 vm_map_copyin_common(
7174 vm_map_t src_map,
7175 vm_map_address_t src_addr,
7176 vm_map_size_t len,
7177 boolean_t src_destroy,
7178 __unused boolean_t src_volatile,
7179 vm_map_copy_t *copy_result, /* OUT */
7180 boolean_t use_maxprot)
7181 {
7182 vm_map_entry_t tmp_entry; /* Result of last map lookup --
7183 * in multi-level lookup, this
7184 * entry contains the actual
7185 * vm_object/offset.
7186 */
7187 register
7188 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
7189
7190 vm_map_offset_t src_start; /* Start of current entry --
7191 * where copy is taking place now
7192 */
7193 vm_map_offset_t src_end; /* End of entire region to be
7194 * copied */
7195 vm_map_offset_t src_base;
7196 vm_map_t base_map = src_map;
7197 boolean_t map_share=FALSE;
7198 submap_map_t *parent_maps = NULL;
7199
7200 register
7201 vm_map_copy_t copy; /* Resulting copy */
7202 vm_map_address_t copy_addr;
7203
7204 /*
7205 * Check for copies of zero bytes.
7206 */
7207
7208 if (len == 0) {
7209 *copy_result = VM_MAP_COPY_NULL;
7210 return(KERN_SUCCESS);
7211 }
7212
7213 /*
7214 * Check that the end address doesn't overflow
7215 */
7216 src_end = src_addr + len;
7217 if (src_end < src_addr)
7218 return KERN_INVALID_ADDRESS;
7219
7220 /*
7221 * If the copy is sufficiently small, use a kernel buffer instead
7222 * of making a virtual copy. The theory being that the cost of
7223 * setting up VM (and taking C-O-W faults) dominates the copy costs
7224 * for small regions.
7225 */
7226 if ((len < msg_ool_size_small) && !use_maxprot)
7227 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7228 src_destroy, copy_result);
7229
7230 /*
7231 * Compute (page aligned) start and end of region
7232 */
7233 src_start = vm_map_trunc_page(src_addr);
7234 src_end = vm_map_round_page(src_end);
7235
7236 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7237
7238 /*
7239 * Allocate a header element for the list.
7240 *
7241 * Use the start and end in the header to
7242 * remember the endpoints prior to rounding.
7243 */
7244
7245 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7246 vm_map_copy_first_entry(copy) =
7247 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7248 copy->type = VM_MAP_COPY_ENTRY_LIST;
7249 copy->cpy_hdr.nentries = 0;
7250 copy->cpy_hdr.entries_pageable = TRUE;
7251
7252 copy->offset = src_addr;
7253 copy->size = len;
7254
7255 new_entry = vm_map_copy_entry_create(copy);
7256
7257 #define RETURN(x) \
7258 MACRO_BEGIN \
7259 vm_map_unlock(src_map); \
7260 if(src_map != base_map) \
7261 vm_map_deallocate(src_map); \
7262 if (new_entry != VM_MAP_ENTRY_NULL) \
7263 vm_map_copy_entry_dispose(copy,new_entry); \
7264 vm_map_copy_discard(copy); \
7265 { \
7266 submap_map_t *_ptr; \
7267 \
7268 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7269 parent_maps=parent_maps->next; \
7270 if (_ptr->parent_map != base_map) \
7271 vm_map_deallocate(_ptr->parent_map); \
7272 kfree(_ptr, sizeof(submap_map_t)); \
7273 } \
7274 } \
7275 MACRO_RETURN(x); \
7276 MACRO_END
7277
7278 /*
7279 * Find the beginning of the region.
7280 */
7281
7282 vm_map_lock(src_map);
7283
7284 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7285 RETURN(KERN_INVALID_ADDRESS);
7286 if(!tmp_entry->is_sub_map) {
7287 vm_map_clip_start(src_map, tmp_entry, src_start);
7288 }
7289 /* set for later submap fix-up */
7290 copy_addr = src_start;
7291
7292 /*
7293 * Go through entries until we get to the end.
7294 */
7295
7296 while (TRUE) {
7297 register
7298 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
7299 vm_map_size_t src_size; /* Size of source
7300 * map entry (in both
7301 * maps)
7302 */
7303
7304 register
7305 vm_object_t src_object; /* Object to copy */
7306 vm_object_offset_t src_offset;
7307
7308 boolean_t src_needs_copy; /* Should source map
7309 * be made read-only
7310 * for copy-on-write?
7311 */
7312
7313 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
7314
7315 boolean_t was_wired; /* Was source wired? */
7316 vm_map_version_t version; /* Version before locks
7317 * dropped to make copy
7318 */
7319 kern_return_t result; /* Return value from
7320 * copy_strategically.
7321 */
7322 while(tmp_entry->is_sub_map) {
7323 vm_map_size_t submap_len;
7324 submap_map_t *ptr;
7325
7326 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7327 ptr->next = parent_maps;
7328 parent_maps = ptr;
7329 ptr->parent_map = src_map;
7330 ptr->base_start = src_start;
7331 ptr->base_end = src_end;
7332 submap_len = tmp_entry->vme_end - src_start;
7333 if(submap_len > (src_end-src_start))
7334 submap_len = src_end-src_start;
7335 ptr->base_len = submap_len;
7336
7337 src_start -= tmp_entry->vme_start;
7338 src_start += tmp_entry->offset;
7339 src_end = src_start + submap_len;
7340 src_map = tmp_entry->object.sub_map;
7341 vm_map_lock(src_map);
7342 /* keep an outstanding reference for all maps in */
7343 /* the parents tree except the base map */
7344 vm_map_reference(src_map);
7345 vm_map_unlock(ptr->parent_map);
7346 if (!vm_map_lookup_entry(
7347 src_map, src_start, &tmp_entry))
7348 RETURN(KERN_INVALID_ADDRESS);
7349 map_share = TRUE;
7350 if(!tmp_entry->is_sub_map)
7351 vm_map_clip_start(src_map, tmp_entry, src_start);
7352 src_entry = tmp_entry;
7353 }
7354 /* we are now in the lowest level submap... */
7355
7356 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7357 (tmp_entry->object.vm_object->phys_contiguous)) {
7358 /* This is not, supported for now.In future */
7359 /* we will need to detect the phys_contig */
7360 /* condition and then upgrade copy_slowly */
7361 /* to do physical copy from the device mem */
7362 /* based object. We can piggy-back off of */
7363 /* the was wired boolean to set-up the */
7364 /* proper handling */
7365 RETURN(KERN_PROTECTION_FAILURE);
7366 }
7367 /*
7368 * Create a new address map entry to hold the result.
7369 * Fill in the fields from the appropriate source entries.
7370 * We must unlock the source map to do this if we need
7371 * to allocate a map entry.
7372 */
7373 if (new_entry == VM_MAP_ENTRY_NULL) {
7374 version.main_timestamp = src_map->timestamp;
7375 vm_map_unlock(src_map);
7376
7377 new_entry = vm_map_copy_entry_create(copy);
7378
7379 vm_map_lock(src_map);
7380 if ((version.main_timestamp + 1) != src_map->timestamp) {
7381 if (!vm_map_lookup_entry(src_map, src_start,
7382 &tmp_entry)) {
7383 RETURN(KERN_INVALID_ADDRESS);
7384 }
7385 if (!tmp_entry->is_sub_map)
7386 vm_map_clip_start(src_map, tmp_entry, src_start);
7387 continue; /* restart w/ new tmp_entry */
7388 }
7389 }
7390
7391 /*
7392 * Verify that the region can be read.
7393 */
7394 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7395 !use_maxprot) ||
7396 (src_entry->max_protection & VM_PROT_READ) == 0)
7397 RETURN(KERN_PROTECTION_FAILURE);
7398
7399 /*
7400 * Clip against the endpoints of the entire region.
7401 */
7402
7403 vm_map_clip_end(src_map, src_entry, src_end);
7404
7405 src_size = src_entry->vme_end - src_start;
7406 src_object = src_entry->object.vm_object;
7407 src_offset = src_entry->offset;
7408 was_wired = (src_entry->wired_count != 0);
7409
7410 vm_map_entry_copy(new_entry, src_entry);
7411 new_entry->use_pmap = FALSE; /* clr address space specifics */
7412
7413 /*
7414 * Attempt non-blocking copy-on-write optimizations.
7415 */
7416
7417 if (src_destroy &&
7418 (src_object == VM_OBJECT_NULL ||
7419 (src_object->internal && !src_object->true_share
7420 && !map_share))) {
7421 /*
7422 * If we are destroying the source, and the object
7423 * is internal, we can move the object reference
7424 * from the source to the copy. The copy is
7425 * copy-on-write only if the source is.
7426 * We make another reference to the object, because
7427 * destroying the source entry will deallocate it.
7428 */
7429 vm_object_reference(src_object);
7430
7431 /*
7432 * Copy is always unwired. vm_map_copy_entry
7433 * set its wired count to zero.
7434 */
7435
7436 goto CopySuccessful;
7437 }
7438
7439
7440 RestartCopy:
7441 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7442 src_object, new_entry, new_entry->object.vm_object,
7443 was_wired, 0);
7444 if ((src_object == VM_OBJECT_NULL ||
7445 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7446 vm_object_copy_quickly(
7447 &new_entry->object.vm_object,
7448 src_offset,
7449 src_size,
7450 &src_needs_copy,
7451 &new_entry_needs_copy)) {
7452
7453 new_entry->needs_copy = new_entry_needs_copy;
7454
7455 /*
7456 * Handle copy-on-write obligations
7457 */
7458
7459 if (src_needs_copy && !tmp_entry->needs_copy) {
7460 vm_prot_t prot;
7461
7462 prot = src_entry->protection & ~VM_PROT_WRITE;
7463
7464 if (override_nx(src_map, src_entry->alias) && prot)
7465 prot |= VM_PROT_EXECUTE;
7466
7467 vm_object_pmap_protect(
7468 src_object,
7469 src_offset,
7470 src_size,
7471 (src_entry->is_shared ?
7472 PMAP_NULL
7473 : src_map->pmap),
7474 src_entry->vme_start,
7475 prot);
7476
7477 tmp_entry->needs_copy = TRUE;
7478 }
7479
7480 /*
7481 * The map has never been unlocked, so it's safe
7482 * to move to the next entry rather than doing
7483 * another lookup.
7484 */
7485
7486 goto CopySuccessful;
7487 }
7488
7489 /*
7490 * Take an object reference, so that we may
7491 * release the map lock(s).
7492 */
7493
7494 assert(src_object != VM_OBJECT_NULL);
7495 vm_object_reference(src_object);
7496
7497 /*
7498 * Record the timestamp for later verification.
7499 * Unlock the map.
7500 */
7501
7502 version.main_timestamp = src_map->timestamp;
7503 vm_map_unlock(src_map); /* Increments timestamp once! */
7504
7505 /*
7506 * Perform the copy
7507 */
7508
7509 if (was_wired) {
7510 CopySlowly:
7511 vm_object_lock(src_object);
7512 result = vm_object_copy_slowly(
7513 src_object,
7514 src_offset,
7515 src_size,
7516 THREAD_UNINT,
7517 &new_entry->object.vm_object);
7518 new_entry->offset = 0;
7519 new_entry->needs_copy = FALSE;
7520
7521 }
7522 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7523 (tmp_entry->is_shared || map_share)) {
7524 vm_object_t new_object;
7525
7526 vm_object_lock_shared(src_object);
7527 new_object = vm_object_copy_delayed(
7528 src_object,
7529 src_offset,
7530 src_size,
7531 TRUE);
7532 if (new_object == VM_OBJECT_NULL)
7533 goto CopySlowly;
7534
7535 new_entry->object.vm_object = new_object;
7536 new_entry->needs_copy = TRUE;
7537 result = KERN_SUCCESS;
7538
7539 } else {
7540 result = vm_object_copy_strategically(src_object,
7541 src_offset,
7542 src_size,
7543 &new_entry->object.vm_object,
7544 &new_entry->offset,
7545 &new_entry_needs_copy);
7546
7547 new_entry->needs_copy = new_entry_needs_copy;
7548 }
7549
7550 if (result != KERN_SUCCESS &&
7551 result != KERN_MEMORY_RESTART_COPY) {
7552 vm_map_lock(src_map);
7553 RETURN(result);
7554 }
7555
7556 /*
7557 * Throw away the extra reference
7558 */
7559
7560 vm_object_deallocate(src_object);
7561
7562 /*
7563 * Verify that the map has not substantially
7564 * changed while the copy was being made.
7565 */
7566
7567 vm_map_lock(src_map);
7568
7569 if ((version.main_timestamp + 1) == src_map->timestamp)
7570 goto VerificationSuccessful;
7571
7572 /*
7573 * Simple version comparison failed.
7574 *
7575 * Retry the lookup and verify that the
7576 * same object/offset are still present.
7577 *
7578 * [Note: a memory manager that colludes with
7579 * the calling task can detect that we have
7580 * cheated. While the map was unlocked, the
7581 * mapping could have been changed and restored.]
7582 */
7583
7584 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7585 RETURN(KERN_INVALID_ADDRESS);
7586 }
7587
7588 src_entry = tmp_entry;
7589 vm_map_clip_start(src_map, src_entry, src_start);
7590
7591 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7592 !use_maxprot) ||
7593 ((src_entry->max_protection & VM_PROT_READ) == 0))
7594 goto VerificationFailed;
7595
7596 if (src_entry->vme_end < new_entry->vme_end)
7597 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7598
7599 if ((src_entry->object.vm_object != src_object) ||
7600 (src_entry->offset != src_offset) ) {
7601
7602 /*
7603 * Verification failed.
7604 *
7605 * Start over with this top-level entry.
7606 */
7607
7608 VerificationFailed: ;
7609
7610 vm_object_deallocate(new_entry->object.vm_object);
7611 tmp_entry = src_entry;
7612 continue;
7613 }
7614
7615 /*
7616 * Verification succeeded.
7617 */
7618
7619 VerificationSuccessful: ;
7620
7621 if (result == KERN_MEMORY_RESTART_COPY)
7622 goto RestartCopy;
7623
7624 /*
7625 * Copy succeeded.
7626 */
7627
7628 CopySuccessful: ;
7629
7630 /*
7631 * Link in the new copy entry.
7632 */
7633
7634 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7635 new_entry);
7636
7637 /*
7638 * Determine whether the entire region
7639 * has been copied.
7640 */
7641 src_base = src_start;
7642 src_start = new_entry->vme_end;
7643 new_entry = VM_MAP_ENTRY_NULL;
7644 while ((src_start >= src_end) && (src_end != 0)) {
7645 if (src_map != base_map) {
7646 submap_map_t *ptr;
7647
7648 ptr = parent_maps;
7649 assert(ptr != NULL);
7650 parent_maps = parent_maps->next;
7651
7652 /* fix up the damage we did in that submap */
7653 vm_map_simplify_range(src_map,
7654 src_base,
7655 src_end);
7656
7657 vm_map_unlock(src_map);
7658 vm_map_deallocate(src_map);
7659 vm_map_lock(ptr->parent_map);
7660 src_map = ptr->parent_map;
7661 src_base = ptr->base_start;
7662 src_start = ptr->base_start + ptr->base_len;
7663 src_end = ptr->base_end;
7664 if ((src_end > src_start) &&
7665 !vm_map_lookup_entry(
7666 src_map, src_start, &tmp_entry))
7667 RETURN(KERN_INVALID_ADDRESS);
7668 kfree(ptr, sizeof(submap_map_t));
7669 if(parent_maps == NULL)
7670 map_share = FALSE;
7671 src_entry = tmp_entry->vme_prev;
7672 } else
7673 break;
7674 }
7675 if ((src_start >= src_end) && (src_end != 0))
7676 break;
7677
7678 /*
7679 * Verify that there are no gaps in the region
7680 */
7681
7682 tmp_entry = src_entry->vme_next;
7683 if ((tmp_entry->vme_start != src_start) ||
7684 (tmp_entry == vm_map_to_entry(src_map)))
7685 RETURN(KERN_INVALID_ADDRESS);
7686 }
7687
7688 /*
7689 * If the source should be destroyed, do it now, since the
7690 * copy was successful.
7691 */
7692 if (src_destroy) {
7693 (void) vm_map_delete(src_map,
7694 vm_map_trunc_page(src_addr),
7695 src_end,
7696 (src_map == kernel_map) ?
7697 VM_MAP_REMOVE_KUNWIRE :
7698 VM_MAP_NO_FLAGS,
7699 VM_MAP_NULL);
7700 } else {
7701 /* fix up the damage we did in the base map */
7702 vm_map_simplify_range(src_map,
7703 vm_map_trunc_page(src_addr),
7704 vm_map_round_page(src_end));
7705 }
7706
7707 vm_map_unlock(src_map);
7708
7709 /* Fix-up start and end points in copy. This is necessary */
7710 /* when the various entries in the copy object were picked */
7711 /* up from different sub-maps */
7712
7713 tmp_entry = vm_map_copy_first_entry(copy);
7714 while (tmp_entry != vm_map_copy_to_entry(copy)) {
7715 tmp_entry->vme_end = copy_addr +
7716 (tmp_entry->vme_end - tmp_entry->vme_start);
7717 tmp_entry->vme_start = copy_addr;
7718 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7719 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7720 }
7721
7722 *copy_result = copy;
7723 return(KERN_SUCCESS);
7724
7725 #undef RETURN
7726 }
7727
7728 /*
7729 * vm_map_copyin_object:
7730 *
7731 * Create a copy object from an object.
7732 * Our caller donates an object reference.
7733 */
7734
7735 kern_return_t
7736 vm_map_copyin_object(
7737 vm_object_t object,
7738 vm_object_offset_t offset, /* offset of region in object */
7739 vm_object_size_t size, /* size of region in object */
7740 vm_map_copy_t *copy_result) /* OUT */
7741 {
7742 vm_map_copy_t copy; /* Resulting copy */
7743
7744 /*
7745 * We drop the object into a special copy object
7746 * that contains the object directly.
7747 */
7748
7749 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7750 copy->type = VM_MAP_COPY_OBJECT;
7751 copy->cpy_object = object;
7752 copy->offset = offset;
7753 copy->size = size;
7754
7755 *copy_result = copy;
7756 return(KERN_SUCCESS);
7757 }
7758
7759 static void
7760 vm_map_fork_share(
7761 vm_map_t old_map,
7762 vm_map_entry_t old_entry,
7763 vm_map_t new_map)
7764 {
7765 vm_object_t object;
7766 vm_map_entry_t new_entry;
7767
7768 /*
7769 * New sharing code. New map entry
7770 * references original object. Internal
7771 * objects use asynchronous copy algorithm for
7772 * future copies. First make sure we have
7773 * the right object. If we need a shadow,
7774 * or someone else already has one, then
7775 * make a new shadow and share it.
7776 */
7777
7778 object = old_entry->object.vm_object;
7779 if (old_entry->is_sub_map) {
7780 assert(old_entry->wired_count == 0);
7781 #ifndef NO_NESTED_PMAP
7782 if(old_entry->use_pmap) {
7783 kern_return_t result;
7784
7785 result = pmap_nest(new_map->pmap,
7786 (old_entry->object.sub_map)->pmap,
7787 (addr64_t)old_entry->vme_start,
7788 (addr64_t)old_entry->vme_start,
7789 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7790 if(result)
7791 panic("vm_map_fork_share: pmap_nest failed!");
7792 }
7793 #endif /* NO_NESTED_PMAP */
7794 } else if (object == VM_OBJECT_NULL) {
7795 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7796 old_entry->vme_start));
7797 old_entry->offset = 0;
7798 old_entry->object.vm_object = object;
7799 assert(!old_entry->needs_copy);
7800 } else if (object->copy_strategy !=
7801 MEMORY_OBJECT_COPY_SYMMETRIC) {
7802
7803 /*
7804 * We are already using an asymmetric
7805 * copy, and therefore we already have
7806 * the right object.
7807 */
7808
7809 assert(! old_entry->needs_copy);
7810 }
7811 else if (old_entry->needs_copy || /* case 1 */
7812 object->shadowed || /* case 2 */
7813 (!object->true_share && /* case 3 */
7814 !old_entry->is_shared &&
7815 (object->size >
7816 (vm_map_size_t)(old_entry->vme_end -
7817 old_entry->vme_start)))) {
7818
7819 /*
7820 * We need to create a shadow.
7821 * There are three cases here.
7822 * In the first case, we need to
7823 * complete a deferred symmetrical
7824 * copy that we participated in.
7825 * In the second and third cases,
7826 * we need to create the shadow so
7827 * that changes that we make to the
7828 * object do not interfere with
7829 * any symmetrical copies which
7830 * have occured (case 2) or which
7831 * might occur (case 3).
7832 *
7833 * The first case is when we had
7834 * deferred shadow object creation
7835 * via the entry->needs_copy mechanism.
7836 * This mechanism only works when
7837 * only one entry points to the source
7838 * object, and we are about to create
7839 * a second entry pointing to the
7840 * same object. The problem is that
7841 * there is no way of mapping from
7842 * an object to the entries pointing
7843 * to it. (Deferred shadow creation
7844 * works with one entry because occurs
7845 * at fault time, and we walk from the
7846 * entry to the object when handling
7847 * the fault.)
7848 *
7849 * The second case is when the object
7850 * to be shared has already been copied
7851 * with a symmetric copy, but we point
7852 * directly to the object without
7853 * needs_copy set in our entry. (This
7854 * can happen because different ranges
7855 * of an object can be pointed to by
7856 * different entries. In particular,
7857 * a single entry pointing to an object
7858 * can be split by a call to vm_inherit,
7859 * which, combined with task_create, can
7860 * result in the different entries
7861 * having different needs_copy values.)
7862 * The shadowed flag in the object allows
7863 * us to detect this case. The problem
7864 * with this case is that if this object
7865 * has or will have shadows, then we
7866 * must not perform an asymmetric copy
7867 * of this object, since such a copy
7868 * allows the object to be changed, which
7869 * will break the previous symmetrical
7870 * copies (which rely upon the object
7871 * not changing). In a sense, the shadowed
7872 * flag says "don't change this object".
7873 * We fix this by creating a shadow
7874 * object for this object, and sharing
7875 * that. This works because we are free
7876 * to change the shadow object (and thus
7877 * to use an asymmetric copy strategy);
7878 * this is also semantically correct,
7879 * since this object is temporary, and
7880 * therefore a copy of the object is
7881 * as good as the object itself. (This
7882 * is not true for permanent objects,
7883 * since the pager needs to see changes,
7884 * which won't happen if the changes
7885 * are made to a copy.)
7886 *
7887 * The third case is when the object
7888 * to be shared has parts sticking
7889 * outside of the entry we're working
7890 * with, and thus may in the future
7891 * be subject to a symmetrical copy.
7892 * (This is a preemptive version of
7893 * case 2.)
7894 */
7895
7896 vm_object_shadow(&old_entry->object.vm_object,
7897 &old_entry->offset,
7898 (vm_map_size_t) (old_entry->vme_end -
7899 old_entry->vme_start));
7900
7901 /*
7902 * If we're making a shadow for other than
7903 * copy on write reasons, then we have
7904 * to remove write permission.
7905 */
7906
7907 if (!old_entry->needs_copy &&
7908 (old_entry->protection & VM_PROT_WRITE)) {
7909 vm_prot_t prot;
7910
7911 prot = old_entry->protection & ~VM_PROT_WRITE;
7912
7913 if (override_nx(old_map, old_entry->alias) && prot)
7914 prot |= VM_PROT_EXECUTE;
7915
7916 if (old_map->mapped) {
7917 vm_object_pmap_protect(
7918 old_entry->object.vm_object,
7919 old_entry->offset,
7920 (old_entry->vme_end -
7921 old_entry->vme_start),
7922 PMAP_NULL,
7923 old_entry->vme_start,
7924 prot);
7925 } else {
7926 pmap_protect(old_map->pmap,
7927 old_entry->vme_start,
7928 old_entry->vme_end,
7929 prot);
7930 }
7931 }
7932
7933 old_entry->needs_copy = FALSE;
7934 object = old_entry->object.vm_object;
7935 }
7936
7937 /*
7938 * If object was using a symmetric copy strategy,
7939 * change its copy strategy to the default
7940 * asymmetric copy strategy, which is copy_delay
7941 * in the non-norma case and copy_call in the
7942 * norma case. Bump the reference count for the
7943 * new entry.
7944 */
7945
7946 if(old_entry->is_sub_map) {
7947 vm_map_lock(old_entry->object.sub_map);
7948 vm_map_reference(old_entry->object.sub_map);
7949 vm_map_unlock(old_entry->object.sub_map);
7950 } else {
7951 vm_object_lock(object);
7952 vm_object_reference_locked(object);
7953 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7954 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7955 }
7956 vm_object_unlock(object);
7957 }
7958
7959 /*
7960 * Clone the entry, using object ref from above.
7961 * Mark both entries as shared.
7962 */
7963
7964 new_entry = vm_map_entry_create(new_map);
7965 vm_map_entry_copy(new_entry, old_entry);
7966 old_entry->is_shared = TRUE;
7967 new_entry->is_shared = TRUE;
7968
7969 /*
7970 * Insert the entry into the new map -- we
7971 * know we're inserting at the end of the new
7972 * map.
7973 */
7974
7975 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7976
7977 /*
7978 * Update the physical map
7979 */
7980
7981 if (old_entry->is_sub_map) {
7982 /* Bill Angell pmap support goes here */
7983 } else {
7984 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7985 old_entry->vme_end - old_entry->vme_start,
7986 old_entry->vme_start);
7987 }
7988 }
7989
7990 static boolean_t
7991 vm_map_fork_copy(
7992 vm_map_t old_map,
7993 vm_map_entry_t *old_entry_p,
7994 vm_map_t new_map)
7995 {
7996 vm_map_entry_t old_entry = *old_entry_p;
7997 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7998 vm_map_offset_t start = old_entry->vme_start;
7999 vm_map_copy_t copy;
8000 vm_map_entry_t last = vm_map_last_entry(new_map);
8001
8002 vm_map_unlock(old_map);
8003 /*
8004 * Use maxprot version of copyin because we
8005 * care about whether this memory can ever
8006 * be accessed, not just whether it's accessible
8007 * right now.
8008 */
8009 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8010 != KERN_SUCCESS) {
8011 /*
8012 * The map might have changed while it
8013 * was unlocked, check it again. Skip
8014 * any blank space or permanently
8015 * unreadable region.
8016 */
8017 vm_map_lock(old_map);
8018 if (!vm_map_lookup_entry(old_map, start, &last) ||
8019 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8020 last = last->vme_next;
8021 }
8022 *old_entry_p = last;
8023
8024 /*
8025 * XXX For some error returns, want to
8026 * XXX skip to the next element. Note
8027 * that INVALID_ADDRESS and
8028 * PROTECTION_FAILURE are handled above.
8029 */
8030
8031 return FALSE;
8032 }
8033
8034 /*
8035 * Insert the copy into the new map
8036 */
8037
8038 vm_map_copy_insert(new_map, last, copy);
8039
8040 /*
8041 * Pick up the traversal at the end of
8042 * the copied region.
8043 */
8044
8045 vm_map_lock(old_map);
8046 start += entry_size;
8047 if (! vm_map_lookup_entry(old_map, start, &last)) {
8048 last = last->vme_next;
8049 } else {
8050 if (last->vme_start == start) {
8051 /*
8052 * No need to clip here and we don't
8053 * want to cause any unnecessary
8054 * unnesting...
8055 */
8056 } else {
8057 vm_map_clip_start(old_map, last, start);
8058 }
8059 }
8060 *old_entry_p = last;
8061
8062 return TRUE;
8063 }
8064
8065 /*
8066 * vm_map_fork:
8067 *
8068 * Create and return a new map based on the old
8069 * map, according to the inheritance values on the
8070 * regions in that map.
8071 *
8072 * The source map must not be locked.
8073 */
8074 vm_map_t
8075 vm_map_fork(
8076 vm_map_t old_map)
8077 {
8078 pmap_t new_pmap;
8079 vm_map_t new_map;
8080 vm_map_entry_t old_entry;
8081 vm_map_size_t new_size = 0, entry_size;
8082 vm_map_entry_t new_entry;
8083 boolean_t src_needs_copy;
8084 boolean_t new_entry_needs_copy;
8085
8086 new_pmap = pmap_create((vm_map_size_t) 0,
8087 #if defined(__i386__) || defined(__x86_64__)
8088 old_map->pmap->pm_task_map != TASK_MAP_32BIT
8089 #else
8090 0
8091 #endif
8092 );
8093 #if defined(__i386__)
8094 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8095 pmap_set_4GB_pagezero(new_pmap);
8096 #endif
8097
8098 vm_map_reference_swap(old_map);
8099 vm_map_lock(old_map);
8100
8101 new_map = vm_map_create(new_pmap,
8102 old_map->min_offset,
8103 old_map->max_offset,
8104 old_map->hdr.entries_pageable);
8105
8106 for (
8107 old_entry = vm_map_first_entry(old_map);
8108 old_entry != vm_map_to_entry(old_map);
8109 ) {
8110
8111 entry_size = old_entry->vme_end - old_entry->vme_start;
8112
8113 switch (old_entry->inheritance) {
8114 case VM_INHERIT_NONE:
8115 break;
8116
8117 case VM_INHERIT_SHARE:
8118 vm_map_fork_share(old_map, old_entry, new_map);
8119 new_size += entry_size;
8120 break;
8121
8122 case VM_INHERIT_COPY:
8123
8124 /*
8125 * Inline the copy_quickly case;
8126 * upon failure, fall back on call
8127 * to vm_map_fork_copy.
8128 */
8129
8130 if(old_entry->is_sub_map)
8131 break;
8132 if ((old_entry->wired_count != 0) ||
8133 ((old_entry->object.vm_object != NULL) &&
8134 (old_entry->object.vm_object->true_share))) {
8135 goto slow_vm_map_fork_copy;
8136 }
8137
8138 new_entry = vm_map_entry_create(new_map);
8139 vm_map_entry_copy(new_entry, old_entry);
8140 /* clear address space specifics */
8141 new_entry->use_pmap = FALSE;
8142
8143 if (! vm_object_copy_quickly(
8144 &new_entry->object.vm_object,
8145 old_entry->offset,
8146 (old_entry->vme_end -
8147 old_entry->vme_start),
8148 &src_needs_copy,
8149 &new_entry_needs_copy)) {
8150 vm_map_entry_dispose(new_map, new_entry);
8151 goto slow_vm_map_fork_copy;
8152 }
8153
8154 /*
8155 * Handle copy-on-write obligations
8156 */
8157
8158 if (src_needs_copy && !old_entry->needs_copy) {
8159 vm_prot_t prot;
8160
8161 prot = old_entry->protection & ~VM_PROT_WRITE;
8162
8163 if (override_nx(old_map, old_entry->alias) && prot)
8164 prot |= VM_PROT_EXECUTE;
8165
8166 vm_object_pmap_protect(
8167 old_entry->object.vm_object,
8168 old_entry->offset,
8169 (old_entry->vme_end -
8170 old_entry->vme_start),
8171 ((old_entry->is_shared
8172 || old_map->mapped)
8173 ? PMAP_NULL :
8174 old_map->pmap),
8175 old_entry->vme_start,
8176 prot);
8177
8178 old_entry->needs_copy = TRUE;
8179 }
8180 new_entry->needs_copy = new_entry_needs_copy;
8181
8182 /*
8183 * Insert the entry at the end
8184 * of the map.
8185 */
8186
8187 vm_map_entry_link(new_map, vm_map_last_entry(new_map),
8188 new_entry);
8189 new_size += entry_size;
8190 break;
8191
8192 slow_vm_map_fork_copy:
8193 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8194 new_size += entry_size;
8195 }
8196 continue;
8197 }
8198 old_entry = old_entry->vme_next;
8199 }
8200
8201 new_map->size = new_size;
8202 vm_map_unlock(old_map);
8203 vm_map_deallocate(old_map);
8204
8205 return(new_map);
8206 }
8207
8208 /*
8209 * vm_map_exec:
8210 *
8211 * Setup the "new_map" with the proper execution environment according
8212 * to the type of executable (platform, 64bit, chroot environment).
8213 * Map the comm page and shared region, etc...
8214 */
8215 kern_return_t
8216 vm_map_exec(
8217 vm_map_t new_map,
8218 task_t task,
8219 void *fsroot,
8220 cpu_type_t cpu)
8221 {
8222 SHARED_REGION_TRACE_DEBUG(
8223 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8224 current_task(), new_map, task, fsroot, cpu));
8225 (void) vm_commpage_enter(new_map, task);
8226 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8227 SHARED_REGION_TRACE_DEBUG(
8228 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8229 current_task(), new_map, task, fsroot, cpu));
8230 return KERN_SUCCESS;
8231 }
8232
8233 /*
8234 * vm_map_lookup_locked:
8235 *
8236 * Finds the VM object, offset, and
8237 * protection for a given virtual address in the
8238 * specified map, assuming a page fault of the
8239 * type specified.
8240 *
8241 * Returns the (object, offset, protection) for
8242 * this address, whether it is wired down, and whether
8243 * this map has the only reference to the data in question.
8244 * In order to later verify this lookup, a "version"
8245 * is returned.
8246 *
8247 * The map MUST be locked by the caller and WILL be
8248 * locked on exit. In order to guarantee the
8249 * existence of the returned object, it is returned
8250 * locked.
8251 *
8252 * If a lookup is requested with "write protection"
8253 * specified, the map may be changed to perform virtual
8254 * copying operations, although the data referenced will
8255 * remain the same.
8256 */
8257 kern_return_t
8258 vm_map_lookup_locked(
8259 vm_map_t *var_map, /* IN/OUT */
8260 vm_map_offset_t vaddr,
8261 vm_prot_t fault_type,
8262 int object_lock_type,
8263 vm_map_version_t *out_version, /* OUT */
8264 vm_object_t *object, /* OUT */
8265 vm_object_offset_t *offset, /* OUT */
8266 vm_prot_t *out_prot, /* OUT */
8267 boolean_t *wired, /* OUT */
8268 vm_object_fault_info_t fault_info, /* OUT */
8269 vm_map_t *real_map)
8270 {
8271 vm_map_entry_t entry;
8272 register vm_map_t map = *var_map;
8273 vm_map_t old_map = *var_map;
8274 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
8275 vm_map_offset_t cow_parent_vaddr = 0;
8276 vm_map_offset_t old_start = 0;
8277 vm_map_offset_t old_end = 0;
8278 register vm_prot_t prot;
8279
8280 *real_map = map;
8281 RetryLookup: ;
8282
8283 /*
8284 * If the map has an interesting hint, try it before calling
8285 * full blown lookup routine.
8286 */
8287 entry = map->hint;
8288
8289 if ((entry == vm_map_to_entry(map)) ||
8290 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8291 vm_map_entry_t tmp_entry;
8292
8293 /*
8294 * Entry was either not a valid hint, or the vaddr
8295 * was not contained in the entry, so do a full lookup.
8296 */
8297 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8298 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8299 vm_map_unlock(cow_sub_map_parent);
8300 if((*real_map != map)
8301 && (*real_map != cow_sub_map_parent))
8302 vm_map_unlock(*real_map);
8303 return KERN_INVALID_ADDRESS;
8304 }
8305
8306 entry = tmp_entry;
8307 }
8308 if(map == old_map) {
8309 old_start = entry->vme_start;
8310 old_end = entry->vme_end;
8311 }
8312
8313 /*
8314 * Handle submaps. Drop lock on upper map, submap is
8315 * returned locked.
8316 */
8317
8318 submap_recurse:
8319 if (entry->is_sub_map) {
8320 vm_map_offset_t local_vaddr;
8321 vm_map_offset_t end_delta;
8322 vm_map_offset_t start_delta;
8323 vm_map_entry_t submap_entry;
8324 boolean_t mapped_needs_copy=FALSE;
8325
8326 local_vaddr = vaddr;
8327
8328 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8329 /* if real_map equals map we unlock below */
8330 if ((*real_map != map) &&
8331 (*real_map != cow_sub_map_parent))
8332 vm_map_unlock(*real_map);
8333 *real_map = entry->object.sub_map;
8334 }
8335
8336 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8337 if (!mapped_needs_copy) {
8338 if (vm_map_lock_read_to_write(map)) {
8339 vm_map_lock_read(map);
8340 /* XXX FBDP: entry still valid ? */
8341 if(*real_map == entry->object.sub_map)
8342 *real_map = map;
8343 goto RetryLookup;
8344 }
8345 vm_map_lock_read(entry->object.sub_map);
8346 cow_sub_map_parent = map;
8347 /* reset base to map before cow object */
8348 /* this is the map which will accept */
8349 /* the new cow object */
8350 old_start = entry->vme_start;
8351 old_end = entry->vme_end;
8352 cow_parent_vaddr = vaddr;
8353 mapped_needs_copy = TRUE;
8354 } else {
8355 vm_map_lock_read(entry->object.sub_map);
8356 if((cow_sub_map_parent != map) &&
8357 (*real_map != map))
8358 vm_map_unlock(map);
8359 }
8360 } else {
8361 vm_map_lock_read(entry->object.sub_map);
8362 /* leave map locked if it is a target */
8363 /* cow sub_map above otherwise, just */
8364 /* follow the maps down to the object */
8365 /* here we unlock knowing we are not */
8366 /* revisiting the map. */
8367 if((*real_map != map) && (map != cow_sub_map_parent))
8368 vm_map_unlock_read(map);
8369 }
8370
8371 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8372 *var_map = map = entry->object.sub_map;
8373
8374 /* calculate the offset in the submap for vaddr */
8375 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8376
8377 RetrySubMap:
8378 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8379 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8380 vm_map_unlock(cow_sub_map_parent);
8381 }
8382 if((*real_map != map)
8383 && (*real_map != cow_sub_map_parent)) {
8384 vm_map_unlock(*real_map);
8385 }
8386 *real_map = map;
8387 return KERN_INVALID_ADDRESS;
8388 }
8389
8390 /* find the attenuated shadow of the underlying object */
8391 /* on our target map */
8392
8393 /* in english the submap object may extend beyond the */
8394 /* region mapped by the entry or, may only fill a portion */
8395 /* of it. For our purposes, we only care if the object */
8396 /* doesn't fill. In this case the area which will */
8397 /* ultimately be clipped in the top map will only need */
8398 /* to be as big as the portion of the underlying entry */
8399 /* which is mapped */
8400 start_delta = submap_entry->vme_start > entry->offset ?
8401 submap_entry->vme_start - entry->offset : 0;
8402
8403 end_delta =
8404 (entry->offset + start_delta + (old_end - old_start)) <=
8405 submap_entry->vme_end ?
8406 0 : (entry->offset +
8407 (old_end - old_start))
8408 - submap_entry->vme_end;
8409
8410 old_start += start_delta;
8411 old_end -= end_delta;
8412
8413 if(submap_entry->is_sub_map) {
8414 entry = submap_entry;
8415 vaddr = local_vaddr;
8416 goto submap_recurse;
8417 }
8418
8419 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8420
8421 vm_object_t sub_object, copy_object;
8422 vm_object_offset_t copy_offset;
8423 vm_map_offset_t local_start;
8424 vm_map_offset_t local_end;
8425 boolean_t copied_slowly = FALSE;
8426
8427 if (vm_map_lock_read_to_write(map)) {
8428 vm_map_lock_read(map);
8429 old_start -= start_delta;
8430 old_end += end_delta;
8431 goto RetrySubMap;
8432 }
8433
8434
8435 sub_object = submap_entry->object.vm_object;
8436 if (sub_object == VM_OBJECT_NULL) {
8437 sub_object =
8438 vm_object_allocate(
8439 (vm_map_size_t)
8440 (submap_entry->vme_end -
8441 submap_entry->vme_start));
8442 submap_entry->object.vm_object = sub_object;
8443 submap_entry->offset = 0;
8444 }
8445 local_start = local_vaddr -
8446 (cow_parent_vaddr - old_start);
8447 local_end = local_vaddr +
8448 (old_end - cow_parent_vaddr);
8449 vm_map_clip_start(map, submap_entry, local_start);
8450 vm_map_clip_end(map, submap_entry, local_end);
8451 /* unnesting was done in vm_map_clip_start/end() */
8452 assert(!submap_entry->use_pmap);
8453
8454 /* This is the COW case, lets connect */
8455 /* an entry in our space to the underlying */
8456 /* object in the submap, bypassing the */
8457 /* submap. */
8458
8459
8460 if(submap_entry->wired_count != 0 ||
8461 (sub_object->copy_strategy ==
8462 MEMORY_OBJECT_COPY_NONE)) {
8463 vm_object_lock(sub_object);
8464 vm_object_copy_slowly(sub_object,
8465 submap_entry->offset,
8466 (submap_entry->vme_end -
8467 submap_entry->vme_start),
8468 FALSE,
8469 &copy_object);
8470 copied_slowly = TRUE;
8471 } else {
8472
8473 /* set up shadow object */
8474 copy_object = sub_object;
8475 vm_object_reference(copy_object);
8476 sub_object->shadowed = TRUE;
8477 submap_entry->needs_copy = TRUE;
8478
8479 prot = submap_entry->protection & ~VM_PROT_WRITE;
8480
8481 if (override_nx(map, submap_entry->alias) && prot)
8482 prot |= VM_PROT_EXECUTE;
8483
8484 vm_object_pmap_protect(
8485 sub_object,
8486 submap_entry->offset,
8487 submap_entry->vme_end -
8488 submap_entry->vme_start,
8489 (submap_entry->is_shared
8490 || map->mapped) ?
8491 PMAP_NULL : map->pmap,
8492 submap_entry->vme_start,
8493 prot);
8494 }
8495
8496 /*
8497 * Adjust the fault offset to the submap entry.
8498 */
8499 copy_offset = (local_vaddr -
8500 submap_entry->vme_start +
8501 submap_entry->offset);
8502
8503 /* This works diffently than the */
8504 /* normal submap case. We go back */
8505 /* to the parent of the cow map and*/
8506 /* clip out the target portion of */
8507 /* the sub_map, substituting the */
8508 /* new copy object, */
8509
8510 vm_map_unlock(map);
8511 local_start = old_start;
8512 local_end = old_end;
8513 map = cow_sub_map_parent;
8514 *var_map = cow_sub_map_parent;
8515 vaddr = cow_parent_vaddr;
8516 cow_sub_map_parent = NULL;
8517
8518 if(!vm_map_lookup_entry(map,
8519 vaddr, &entry)) {
8520 vm_object_deallocate(
8521 copy_object);
8522 vm_map_lock_write_to_read(map);
8523 return KERN_INVALID_ADDRESS;
8524 }
8525
8526 /* clip out the portion of space */
8527 /* mapped by the sub map which */
8528 /* corresponds to the underlying */
8529 /* object */
8530
8531 /*
8532 * Clip (and unnest) the smallest nested chunk
8533 * possible around the faulting address...
8534 */
8535 local_start = vaddr & ~(pmap_nesting_size_min - 1);
8536 local_end = local_start + pmap_nesting_size_min;
8537 /*
8538 * ... but don't go beyond the "old_start" to "old_end"
8539 * range, to avoid spanning over another VM region
8540 * with a possibly different VM object and/or offset.
8541 */
8542 if (local_start < old_start) {
8543 local_start = old_start;
8544 }
8545 if (local_end > old_end) {
8546 local_end = old_end;
8547 }
8548 /*
8549 * Adjust copy_offset to the start of the range.
8550 */
8551 copy_offset -= (vaddr - local_start);
8552
8553 vm_map_clip_start(map, entry, local_start);
8554 vm_map_clip_end(map, entry, local_end);
8555 /* unnesting was done in vm_map_clip_start/end() */
8556 assert(!entry->use_pmap);
8557
8558 /* substitute copy object for */
8559 /* shared map entry */
8560 vm_map_deallocate(entry->object.sub_map);
8561 entry->is_sub_map = FALSE;
8562 entry->object.vm_object = copy_object;
8563
8564 /* propagate the submap entry's protections */
8565 entry->protection |= submap_entry->protection;
8566 entry->max_protection |= submap_entry->max_protection;
8567
8568 if(copied_slowly) {
8569 entry->offset = local_start - old_start;
8570 entry->needs_copy = FALSE;
8571 entry->is_shared = FALSE;
8572 } else {
8573 entry->offset = copy_offset;
8574 entry->needs_copy = TRUE;
8575 if(entry->inheritance == VM_INHERIT_SHARE)
8576 entry->inheritance = VM_INHERIT_COPY;
8577 if (map != old_map)
8578 entry->is_shared = TRUE;
8579 }
8580 if(entry->inheritance == VM_INHERIT_SHARE)
8581 entry->inheritance = VM_INHERIT_COPY;
8582
8583 vm_map_lock_write_to_read(map);
8584 } else {
8585 if((cow_sub_map_parent)
8586 && (cow_sub_map_parent != *real_map)
8587 && (cow_sub_map_parent != map)) {
8588 vm_map_unlock(cow_sub_map_parent);
8589 }
8590 entry = submap_entry;
8591 vaddr = local_vaddr;
8592 }
8593 }
8594
8595 /*
8596 * Check whether this task is allowed to have
8597 * this page.
8598 */
8599
8600 prot = entry->protection;
8601
8602 if (override_nx(map, entry->alias) && prot) {
8603 /*
8604 * HACK -- if not a stack, then allow execution
8605 */
8606 prot |= VM_PROT_EXECUTE;
8607 }
8608
8609 if ((fault_type & (prot)) != fault_type) {
8610 if (*real_map != map) {
8611 vm_map_unlock(*real_map);
8612 }
8613 *real_map = map;
8614
8615 if ((fault_type & VM_PROT_EXECUTE) && prot)
8616 log_stack_execution_failure((addr64_t)vaddr, prot);
8617
8618 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8619 return KERN_PROTECTION_FAILURE;
8620 }
8621
8622 /*
8623 * If this page is not pageable, we have to get
8624 * it for all possible accesses.
8625 */
8626
8627 *wired = (entry->wired_count != 0);
8628 if (*wired)
8629 fault_type = prot;
8630
8631 /*
8632 * If the entry was copy-on-write, we either ...
8633 */
8634
8635 if (entry->needs_copy) {
8636 /*
8637 * If we want to write the page, we may as well
8638 * handle that now since we've got the map locked.
8639 *
8640 * If we don't need to write the page, we just
8641 * demote the permissions allowed.
8642 */
8643
8644 if ((fault_type & VM_PROT_WRITE) || *wired) {
8645 /*
8646 * Make a new object, and place it in the
8647 * object chain. Note that no new references
8648 * have appeared -- one just moved from the
8649 * map to the new object.
8650 */
8651
8652 if (vm_map_lock_read_to_write(map)) {
8653 vm_map_lock_read(map);
8654 goto RetryLookup;
8655 }
8656 vm_object_shadow(&entry->object.vm_object,
8657 &entry->offset,
8658 (vm_map_size_t) (entry->vme_end -
8659 entry->vme_start));
8660
8661 entry->object.vm_object->shadowed = TRUE;
8662 entry->needs_copy = FALSE;
8663 vm_map_lock_write_to_read(map);
8664 }
8665 else {
8666 /*
8667 * We're attempting to read a copy-on-write
8668 * page -- don't allow writes.
8669 */
8670
8671 prot &= (~VM_PROT_WRITE);
8672 }
8673 }
8674
8675 /*
8676 * Create an object if necessary.
8677 */
8678 if (entry->object.vm_object == VM_OBJECT_NULL) {
8679
8680 if (vm_map_lock_read_to_write(map)) {
8681 vm_map_lock_read(map);
8682 goto RetryLookup;
8683 }
8684
8685 entry->object.vm_object = vm_object_allocate(
8686 (vm_map_size_t)(entry->vme_end - entry->vme_start));
8687 entry->offset = 0;
8688 vm_map_lock_write_to_read(map);
8689 }
8690
8691 /*
8692 * Return the object/offset from this entry. If the entry
8693 * was copy-on-write or empty, it has been fixed up. Also
8694 * return the protection.
8695 */
8696
8697 *offset = (vaddr - entry->vme_start) + entry->offset;
8698 *object = entry->object.vm_object;
8699 *out_prot = prot;
8700
8701 if (fault_info) {
8702 fault_info->interruptible = THREAD_UNINT; /* for now... */
8703 /* ... the caller will change "interruptible" if needed */
8704 fault_info->cluster_size = 0;
8705 fault_info->user_tag = entry->alias;
8706 fault_info->behavior = entry->behavior;
8707 fault_info->lo_offset = entry->offset;
8708 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8709 fault_info->no_cache = entry->no_cache;
8710 fault_info->stealth = FALSE;
8711 }
8712
8713 /*
8714 * Lock the object to prevent it from disappearing
8715 */
8716 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8717 vm_object_lock(*object);
8718 else
8719 vm_object_lock_shared(*object);
8720
8721 /*
8722 * Save the version number
8723 */
8724
8725 out_version->main_timestamp = map->timestamp;
8726
8727 return KERN_SUCCESS;
8728 }
8729
8730
8731 /*
8732 * vm_map_verify:
8733 *
8734 * Verifies that the map in question has not changed
8735 * since the given version. If successful, the map
8736 * will not change until vm_map_verify_done() is called.
8737 */
8738 boolean_t
8739 vm_map_verify(
8740 register vm_map_t map,
8741 register vm_map_version_t *version) /* REF */
8742 {
8743 boolean_t result;
8744
8745 vm_map_lock_read(map);
8746 result = (map->timestamp == version->main_timestamp);
8747
8748 if (!result)
8749 vm_map_unlock_read(map);
8750
8751 return(result);
8752 }
8753
8754 /*
8755 * vm_map_verify_done:
8756 *
8757 * Releases locks acquired by a vm_map_verify.
8758 *
8759 * This is now a macro in vm/vm_map.h. It does a
8760 * vm_map_unlock_read on the map.
8761 */
8762
8763
8764 /*
8765 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8766 * Goes away after regular vm_region_recurse function migrates to
8767 * 64 bits
8768 * vm_region_recurse: A form of vm_region which follows the
8769 * submaps in a target map
8770 *
8771 */
8772
8773 kern_return_t
8774 vm_map_region_recurse_64(
8775 vm_map_t map,
8776 vm_map_offset_t *address, /* IN/OUT */
8777 vm_map_size_t *size, /* OUT */
8778 natural_t *nesting_depth, /* IN/OUT */
8779 vm_region_submap_info_64_t submap_info, /* IN/OUT */
8780 mach_msg_type_number_t *count) /* IN/OUT */
8781 {
8782 vm_region_extended_info_data_t extended;
8783 vm_map_entry_t tmp_entry;
8784 vm_map_offset_t user_address;
8785 unsigned int user_max_depth;
8786
8787 /*
8788 * "curr_entry" is the VM map entry preceding or including the
8789 * address we're looking for.
8790 * "curr_map" is the map or sub-map containing "curr_entry".
8791 * "curr_offset" is the cumulated offset of "curr_map" in the
8792 * target task's address space.
8793 * "curr_depth" is the depth of "curr_map" in the chain of
8794 * sub-maps.
8795 * "curr_max_offset" is the maximum offset we should take into
8796 * account in the current map. It may be smaller than the current
8797 * map's "max_offset" because we might not have mapped it all in
8798 * the upper level map.
8799 */
8800 vm_map_entry_t curr_entry;
8801 vm_map_offset_t curr_offset;
8802 vm_map_t curr_map;
8803 unsigned int curr_depth;
8804 vm_map_offset_t curr_max_offset;
8805
8806 /*
8807 * "next_" is the same as "curr_" but for the VM region immediately
8808 * after the address we're looking for. We need to keep track of this
8809 * too because we want to return info about that region if the
8810 * address we're looking for is not mapped.
8811 */
8812 vm_map_entry_t next_entry;
8813 vm_map_offset_t next_offset;
8814 vm_map_t next_map;
8815 unsigned int next_depth;
8816 vm_map_offset_t next_max_offset;
8817
8818 boolean_t look_for_pages;
8819 vm_region_submap_short_info_64_t short_info;
8820
8821 if (map == VM_MAP_NULL) {
8822 /* no address space to work on */
8823 return KERN_INVALID_ARGUMENT;
8824 }
8825
8826 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8827 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8828 /*
8829 * "info" structure is not big enough and
8830 * would overflow
8831 */
8832 return KERN_INVALID_ARGUMENT;
8833 } else {
8834 look_for_pages = FALSE;
8835 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8836 short_info = (vm_region_submap_short_info_64_t) submap_info;
8837 submap_info = NULL;
8838 }
8839 } else {
8840 look_for_pages = TRUE;
8841 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8842 short_info = NULL;
8843 }
8844
8845
8846 user_address = *address;
8847 user_max_depth = *nesting_depth;
8848
8849 curr_entry = NULL;
8850 curr_map = map;
8851 curr_offset = 0;
8852 curr_depth = 0;
8853 curr_max_offset = curr_map->max_offset;
8854
8855 next_entry = NULL;
8856 next_map = NULL;
8857 next_offset = 0;
8858 next_depth = 0;
8859 next_max_offset = curr_max_offset;
8860
8861 if (not_in_kdp) {
8862 vm_map_lock_read(curr_map);
8863 }
8864
8865 for (;;) {
8866 if (vm_map_lookup_entry(curr_map,
8867 user_address - curr_offset,
8868 &tmp_entry)) {
8869 /* tmp_entry contains the address we're looking for */
8870 curr_entry = tmp_entry;
8871 } else {
8872 /*
8873 * The address is not mapped. "tmp_entry" is the
8874 * map entry preceding the address. We want the next
8875 * one, if it exists.
8876 */
8877 curr_entry = tmp_entry->vme_next;
8878 if (curr_entry == vm_map_to_entry(curr_map) ||
8879 curr_entry->vme_start >= curr_max_offset) {
8880 /* no next entry at this level: stop looking */
8881 if (not_in_kdp) {
8882 vm_map_unlock_read(curr_map);
8883 }
8884 curr_entry = NULL;
8885 curr_map = NULL;
8886 curr_offset = 0;
8887 curr_depth = 0;
8888 curr_max_offset = 0;
8889 break;
8890 }
8891 }
8892
8893 /*
8894 * Is the next entry at this level closer to the address (or
8895 * deeper in the submap chain) than the one we had
8896 * so far ?
8897 */
8898 tmp_entry = curr_entry->vme_next;
8899 if (tmp_entry == vm_map_to_entry(curr_map)) {
8900 /* no next entry at this level */
8901 } else if (tmp_entry->vme_start >= curr_max_offset) {
8902 /*
8903 * tmp_entry is beyond the scope of what we mapped of
8904 * this submap in the upper level: ignore it.
8905 */
8906 } else if ((next_entry == NULL) ||
8907 (tmp_entry->vme_start + curr_offset <=
8908 next_entry->vme_start + next_offset)) {
8909 /*
8910 * We didn't have a "next_entry" or this one is
8911 * closer to the address we're looking for:
8912 * use this "tmp_entry" as the new "next_entry".
8913 */
8914 if (next_entry != NULL) {
8915 /* unlock the last "next_map" */
8916 if (next_map != curr_map && not_in_kdp) {
8917 vm_map_unlock_read(next_map);
8918 }
8919 }
8920 next_entry = tmp_entry;
8921 next_map = curr_map;
8922 next_offset = curr_offset;
8923 next_depth = curr_depth;
8924 next_max_offset = curr_max_offset;
8925 }
8926
8927 if (!curr_entry->is_sub_map ||
8928 curr_depth >= user_max_depth) {
8929 /*
8930 * We hit a leaf map or we reached the maximum depth
8931 * we could, so stop looking. Keep the current map
8932 * locked.
8933 */
8934 break;
8935 }
8936
8937 /*
8938 * Get down to the next submap level.
8939 */
8940
8941 /*
8942 * Lock the next level and unlock the current level,
8943 * unless we need to keep it locked to access the "next_entry"
8944 * later.
8945 */
8946 if (not_in_kdp) {
8947 vm_map_lock_read(curr_entry->object.sub_map);
8948 }
8949 if (curr_map == next_map) {
8950 /* keep "next_map" locked in case we need it */
8951 } else {
8952 /* release this map */
8953 if (not_in_kdp)
8954 vm_map_unlock_read(curr_map);
8955 }
8956
8957 /*
8958 * Adjust the offset. "curr_entry" maps the submap
8959 * at relative address "curr_entry->vme_start" in the
8960 * curr_map but skips the first "curr_entry->offset"
8961 * bytes of the submap.
8962 * "curr_offset" always represents the offset of a virtual
8963 * address in the curr_map relative to the absolute address
8964 * space (i.e. the top-level VM map).
8965 */
8966 curr_offset +=
8967 (curr_entry->vme_start - curr_entry->offset);
8968 /* switch to the submap */
8969 curr_map = curr_entry->object.sub_map;
8970 curr_depth++;
8971 /*
8972 * "curr_max_offset" allows us to keep track of the
8973 * portion of the submap that is actually mapped at this level:
8974 * the rest of that submap is irrelevant to us, since it's not
8975 * mapped here.
8976 * The relevant portion of the map starts at
8977 * "curr_entry->offset" up to the size of "curr_entry".
8978 */
8979 curr_max_offset =
8980 curr_entry->vme_end - curr_entry->vme_start +
8981 curr_entry->offset;
8982 curr_entry = NULL;
8983 }
8984
8985 if (curr_entry == NULL) {
8986 /* no VM region contains the address... */
8987 if (next_entry == NULL) {
8988 /* ... and no VM region follows it either */
8989 return KERN_INVALID_ADDRESS;
8990 }
8991 /* ... gather info about the next VM region */
8992 curr_entry = next_entry;
8993 curr_map = next_map; /* still locked ... */
8994 curr_offset = next_offset;
8995 curr_depth = next_depth;
8996 curr_max_offset = next_max_offset;
8997 } else {
8998 /* we won't need "next_entry" after all */
8999 if (next_entry != NULL) {
9000 /* release "next_map" */
9001 if (next_map != curr_map && not_in_kdp) {
9002 vm_map_unlock_read(next_map);
9003 }
9004 }
9005 }
9006 next_entry = NULL;
9007 next_map = NULL;
9008 next_offset = 0;
9009 next_depth = 0;
9010 next_max_offset = 0;
9011
9012 *nesting_depth = curr_depth;
9013 *size = curr_entry->vme_end - curr_entry->vme_start;
9014 *address = curr_entry->vme_start + curr_offset;
9015
9016 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9017 // so probably should be a real 32b ID vs. ptr.
9018 // Current users just check for equality
9019 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p)
9020
9021 if (look_for_pages) {
9022 submap_info->user_tag = curr_entry->alias;
9023 submap_info->offset = curr_entry->offset;
9024 submap_info->protection = curr_entry->protection;
9025 submap_info->inheritance = curr_entry->inheritance;
9026 submap_info->max_protection = curr_entry->max_protection;
9027 submap_info->behavior = curr_entry->behavior;
9028 submap_info->user_wired_count = curr_entry->user_wired_count;
9029 submap_info->is_submap = curr_entry->is_sub_map;
9030 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9031 } else {
9032 short_info->user_tag = curr_entry->alias;
9033 short_info->offset = curr_entry->offset;
9034 short_info->protection = curr_entry->protection;
9035 short_info->inheritance = curr_entry->inheritance;
9036 short_info->max_protection = curr_entry->max_protection;
9037 short_info->behavior = curr_entry->behavior;
9038 short_info->user_wired_count = curr_entry->user_wired_count;
9039 short_info->is_submap = curr_entry->is_sub_map;
9040 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9041 }
9042
9043 extended.pages_resident = 0;
9044 extended.pages_swapped_out = 0;
9045 extended.pages_shared_now_private = 0;
9046 extended.pages_dirtied = 0;
9047 extended.external_pager = 0;
9048 extended.shadow_depth = 0;
9049
9050 if (not_in_kdp) {
9051 if (!curr_entry->is_sub_map) {
9052 vm_map_region_walk(curr_map,
9053 curr_entry->vme_start,
9054 curr_entry,
9055 curr_entry->offset,
9056 (curr_entry->vme_end -
9057 curr_entry->vme_start),
9058 &extended,
9059 look_for_pages);
9060 if (extended.external_pager &&
9061 extended.ref_count == 2 &&
9062 extended.share_mode == SM_SHARED) {
9063 extended.share_mode = SM_PRIVATE;
9064 }
9065 } else {
9066 if (curr_entry->use_pmap) {
9067 extended.share_mode = SM_TRUESHARED;
9068 } else {
9069 extended.share_mode = SM_PRIVATE;
9070 }
9071 extended.ref_count =
9072 curr_entry->object.sub_map->ref_count;
9073 }
9074 }
9075
9076 if (look_for_pages) {
9077 submap_info->pages_resident = extended.pages_resident;
9078 submap_info->pages_swapped_out = extended.pages_swapped_out;
9079 submap_info->pages_shared_now_private =
9080 extended.pages_shared_now_private;
9081 submap_info->pages_dirtied = extended.pages_dirtied;
9082 submap_info->external_pager = extended.external_pager;
9083 submap_info->shadow_depth = extended.shadow_depth;
9084 submap_info->share_mode = extended.share_mode;
9085 submap_info->ref_count = extended.ref_count;
9086 } else {
9087 short_info->external_pager = extended.external_pager;
9088 short_info->shadow_depth = extended.shadow_depth;
9089 short_info->share_mode = extended.share_mode;
9090 short_info->ref_count = extended.ref_count;
9091 }
9092
9093 if (not_in_kdp) {
9094 vm_map_unlock_read(curr_map);
9095 }
9096
9097 return KERN_SUCCESS;
9098 }
9099
9100 /*
9101 * vm_region:
9102 *
9103 * User call to obtain information about a region in
9104 * a task's address map. Currently, only one flavor is
9105 * supported.
9106 *
9107 * XXX The reserved and behavior fields cannot be filled
9108 * in until the vm merge from the IK is completed, and
9109 * vm_reserve is implemented.
9110 */
9111
9112 kern_return_t
9113 vm_map_region(
9114 vm_map_t map,
9115 vm_map_offset_t *address, /* IN/OUT */
9116 vm_map_size_t *size, /* OUT */
9117 vm_region_flavor_t flavor, /* IN */
9118 vm_region_info_t info, /* OUT */
9119 mach_msg_type_number_t *count, /* IN/OUT */
9120 mach_port_t *object_name) /* OUT */
9121 {
9122 vm_map_entry_t tmp_entry;
9123 vm_map_entry_t entry;
9124 vm_map_offset_t start;
9125
9126 if (map == VM_MAP_NULL)
9127 return(KERN_INVALID_ARGUMENT);
9128
9129 switch (flavor) {
9130
9131 case VM_REGION_BASIC_INFO:
9132 /* legacy for old 32-bit objects info */
9133 {
9134 vm_region_basic_info_t basic;
9135
9136 if (*count < VM_REGION_BASIC_INFO_COUNT)
9137 return(KERN_INVALID_ARGUMENT);
9138
9139 basic = (vm_region_basic_info_t) info;
9140 *count = VM_REGION_BASIC_INFO_COUNT;
9141
9142 vm_map_lock_read(map);
9143
9144 start = *address;
9145 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9146 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9147 vm_map_unlock_read(map);
9148 return(KERN_INVALID_ADDRESS);
9149 }
9150 } else {
9151 entry = tmp_entry;
9152 }
9153
9154 start = entry->vme_start;
9155
9156 basic->offset = (uint32_t)entry->offset;
9157 basic->protection = entry->protection;
9158 basic->inheritance = entry->inheritance;
9159 basic->max_protection = entry->max_protection;
9160 basic->behavior = entry->behavior;
9161 basic->user_wired_count = entry->user_wired_count;
9162 basic->reserved = entry->is_sub_map;
9163 *address = start;
9164 *size = (entry->vme_end - start);
9165
9166 if (object_name) *object_name = IP_NULL;
9167 if (entry->is_sub_map) {
9168 basic->shared = FALSE;
9169 } else {
9170 basic->shared = entry->is_shared;
9171 }
9172
9173 vm_map_unlock_read(map);
9174 return(KERN_SUCCESS);
9175 }
9176
9177 case VM_REGION_BASIC_INFO_64:
9178 {
9179 vm_region_basic_info_64_t basic;
9180
9181 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9182 return(KERN_INVALID_ARGUMENT);
9183
9184 basic = (vm_region_basic_info_64_t) info;
9185 *count = VM_REGION_BASIC_INFO_COUNT_64;
9186
9187 vm_map_lock_read(map);
9188
9189 start = *address;
9190 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9191 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9192 vm_map_unlock_read(map);
9193 return(KERN_INVALID_ADDRESS);
9194 }
9195 } else {
9196 entry = tmp_entry;
9197 }
9198
9199 start = entry->vme_start;
9200
9201 basic->offset = entry->offset;
9202 basic->protection = entry->protection;
9203 basic->inheritance = entry->inheritance;
9204 basic->max_protection = entry->max_protection;
9205 basic->behavior = entry->behavior;
9206 basic->user_wired_count = entry->user_wired_count;
9207 basic->reserved = entry->is_sub_map;
9208 *address = start;
9209 *size = (entry->vme_end - start);
9210
9211 if (object_name) *object_name = IP_NULL;
9212 if (entry->is_sub_map) {
9213 basic->shared = FALSE;
9214 } else {
9215 basic->shared = entry->is_shared;
9216 }
9217
9218 vm_map_unlock_read(map);
9219 return(KERN_SUCCESS);
9220 }
9221 case VM_REGION_EXTENDED_INFO:
9222 {
9223 vm_region_extended_info_t extended;
9224
9225 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9226 return(KERN_INVALID_ARGUMENT);
9227
9228 extended = (vm_region_extended_info_t) info;
9229 *count = VM_REGION_EXTENDED_INFO_COUNT;
9230
9231 vm_map_lock_read(map);
9232
9233 start = *address;
9234 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9235 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9236 vm_map_unlock_read(map);
9237 return(KERN_INVALID_ADDRESS);
9238 }
9239 } else {
9240 entry = tmp_entry;
9241 }
9242 start = entry->vme_start;
9243
9244 extended->protection = entry->protection;
9245 extended->user_tag = entry->alias;
9246 extended->pages_resident = 0;
9247 extended->pages_swapped_out = 0;
9248 extended->pages_shared_now_private = 0;
9249 extended->pages_dirtied = 0;
9250 extended->external_pager = 0;
9251 extended->shadow_depth = 0;
9252
9253 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9254
9255 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9256 extended->share_mode = SM_PRIVATE;
9257
9258 if (object_name)
9259 *object_name = IP_NULL;
9260 *address = start;
9261 *size = (entry->vme_end - start);
9262
9263 vm_map_unlock_read(map);
9264 return(KERN_SUCCESS);
9265 }
9266 case VM_REGION_TOP_INFO:
9267 {
9268 vm_region_top_info_t top;
9269
9270 if (*count < VM_REGION_TOP_INFO_COUNT)
9271 return(KERN_INVALID_ARGUMENT);
9272
9273 top = (vm_region_top_info_t) info;
9274 *count = VM_REGION_TOP_INFO_COUNT;
9275
9276 vm_map_lock_read(map);
9277
9278 start = *address;
9279 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9280 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9281 vm_map_unlock_read(map);
9282 return(KERN_INVALID_ADDRESS);
9283 }
9284 } else {
9285 entry = tmp_entry;
9286
9287 }
9288 start = entry->vme_start;
9289
9290 top->private_pages_resident = 0;
9291 top->shared_pages_resident = 0;
9292
9293 vm_map_region_top_walk(entry, top);
9294
9295 if (object_name)
9296 *object_name = IP_NULL;
9297 *address = start;
9298 *size = (entry->vme_end - start);
9299
9300 vm_map_unlock_read(map);
9301 return(KERN_SUCCESS);
9302 }
9303 default:
9304 return(KERN_INVALID_ARGUMENT);
9305 }
9306 }
9307
9308 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
9309 MIN((entry_size), \
9310 ((obj)->all_reusable ? \
9311 (obj)->wired_page_count : \
9312 (obj)->resident_page_count - (obj)->reusable_page_count))
9313
9314 void
9315 vm_map_region_top_walk(
9316 vm_map_entry_t entry,
9317 vm_region_top_info_t top)
9318 {
9319
9320 if (entry->object.vm_object == 0 || entry->is_sub_map) {
9321 top->share_mode = SM_EMPTY;
9322 top->ref_count = 0;
9323 top->obj_id = 0;
9324 return;
9325 }
9326
9327 {
9328 struct vm_object *obj, *tmp_obj;
9329 int ref_count;
9330 uint32_t entry_size;
9331
9332 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9333
9334 obj = entry->object.vm_object;
9335
9336 vm_object_lock(obj);
9337
9338 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9339 ref_count--;
9340
9341 assert(obj->reusable_page_count <= obj->resident_page_count);
9342 if (obj->shadow) {
9343 if (ref_count == 1)
9344 top->private_pages_resident =
9345 OBJ_RESIDENT_COUNT(obj, entry_size);
9346 else
9347 top->shared_pages_resident =
9348 OBJ_RESIDENT_COUNT(obj, entry_size);
9349 top->ref_count = ref_count;
9350 top->share_mode = SM_COW;
9351
9352 while ((tmp_obj = obj->shadow)) {
9353 vm_object_lock(tmp_obj);
9354 vm_object_unlock(obj);
9355 obj = tmp_obj;
9356
9357 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9358 ref_count--;
9359
9360 assert(obj->reusable_page_count <= obj->resident_page_count);
9361 top->shared_pages_resident +=
9362 OBJ_RESIDENT_COUNT(obj, entry_size);
9363 top->ref_count += ref_count - 1;
9364 }
9365 } else {
9366 if (entry->needs_copy) {
9367 top->share_mode = SM_COW;
9368 top->shared_pages_resident =
9369 OBJ_RESIDENT_COUNT(obj, entry_size);
9370 } else {
9371 if (ref_count == 1 ||
9372 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9373 top->share_mode = SM_PRIVATE;
9374 top->private_pages_resident =
9375 OBJ_RESIDENT_COUNT(obj,
9376 entry_size);
9377 } else {
9378 top->share_mode = SM_SHARED;
9379 top->shared_pages_resident =
9380 OBJ_RESIDENT_COUNT(obj,
9381 entry_size);
9382 }
9383 }
9384 top->ref_count = ref_count;
9385 }
9386 /* XXX K64: obj_id will be truncated */
9387 top->obj_id = (unsigned int) (uintptr_t)obj;
9388
9389 vm_object_unlock(obj);
9390 }
9391 }
9392
9393 void
9394 vm_map_region_walk(
9395 vm_map_t map,
9396 vm_map_offset_t va,
9397 vm_map_entry_t entry,
9398 vm_object_offset_t offset,
9399 vm_object_size_t range,
9400 vm_region_extended_info_t extended,
9401 boolean_t look_for_pages)
9402 {
9403 register struct vm_object *obj, *tmp_obj;
9404 register vm_map_offset_t last_offset;
9405 register int i;
9406 register int ref_count;
9407 struct vm_object *shadow_object;
9408 int shadow_depth;
9409
9410 if ((entry->object.vm_object == 0) ||
9411 (entry->is_sub_map) ||
9412 (entry->object.vm_object->phys_contiguous)) {
9413 extended->share_mode = SM_EMPTY;
9414 extended->ref_count = 0;
9415 return;
9416 }
9417 {
9418 obj = entry->object.vm_object;
9419
9420 vm_object_lock(obj);
9421
9422 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9423 ref_count--;
9424
9425 if (look_for_pages) {
9426 for (last_offset = offset + range;
9427 offset < last_offset;
9428 offset += PAGE_SIZE_64, va += PAGE_SIZE)
9429 vm_map_region_look_for_page(map, va, obj,
9430 offset, ref_count,
9431 0, extended);
9432 } else {
9433 shadow_object = obj->shadow;
9434 shadow_depth = 0;
9435
9436 if ( !(obj->pager_trusted) && !(obj->internal))
9437 extended->external_pager = 1;
9438
9439 if (shadow_object != VM_OBJECT_NULL) {
9440 vm_object_lock(shadow_object);
9441 for (;
9442 shadow_object != VM_OBJECT_NULL;
9443 shadow_depth++) {
9444 vm_object_t next_shadow;
9445
9446 if ( !(shadow_object->pager_trusted) &&
9447 !(shadow_object->internal))
9448 extended->external_pager = 1;
9449
9450 next_shadow = shadow_object->shadow;
9451 if (next_shadow) {
9452 vm_object_lock(next_shadow);
9453 }
9454 vm_object_unlock(shadow_object);
9455 shadow_object = next_shadow;
9456 }
9457 }
9458 extended->shadow_depth = shadow_depth;
9459 }
9460
9461 if (extended->shadow_depth || entry->needs_copy)
9462 extended->share_mode = SM_COW;
9463 else {
9464 if (ref_count == 1)
9465 extended->share_mode = SM_PRIVATE;
9466 else {
9467 if (obj->true_share)
9468 extended->share_mode = SM_TRUESHARED;
9469 else
9470 extended->share_mode = SM_SHARED;
9471 }
9472 }
9473 extended->ref_count = ref_count - extended->shadow_depth;
9474
9475 for (i = 0; i < extended->shadow_depth; i++) {
9476 if ((tmp_obj = obj->shadow) == 0)
9477 break;
9478 vm_object_lock(tmp_obj);
9479 vm_object_unlock(obj);
9480
9481 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9482 ref_count--;
9483
9484 extended->ref_count += ref_count;
9485 obj = tmp_obj;
9486 }
9487 vm_object_unlock(obj);
9488
9489 if (extended->share_mode == SM_SHARED) {
9490 register vm_map_entry_t cur;
9491 register vm_map_entry_t last;
9492 int my_refs;
9493
9494 obj = entry->object.vm_object;
9495 last = vm_map_to_entry(map);
9496 my_refs = 0;
9497
9498 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9499 ref_count--;
9500 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9501 my_refs += vm_map_region_count_obj_refs(cur, obj);
9502
9503 if (my_refs == ref_count)
9504 extended->share_mode = SM_PRIVATE_ALIASED;
9505 else if (my_refs > 1)
9506 extended->share_mode = SM_SHARED_ALIASED;
9507 }
9508 }
9509 }
9510
9511
9512 /* object is locked on entry and locked on return */
9513
9514
9515 static void
9516 vm_map_region_look_for_page(
9517 __unused vm_map_t map,
9518 __unused vm_map_offset_t va,
9519 vm_object_t object,
9520 vm_object_offset_t offset,
9521 int max_refcnt,
9522 int depth,
9523 vm_region_extended_info_t extended)
9524 {
9525 register vm_page_t p;
9526 register vm_object_t shadow;
9527 register int ref_count;
9528 vm_object_t caller_object;
9529 #if MACH_PAGEMAP
9530 kern_return_t kr;
9531 #endif
9532 shadow = object->shadow;
9533 caller_object = object;
9534
9535
9536 while (TRUE) {
9537
9538 if ( !(object->pager_trusted) && !(object->internal))
9539 extended->external_pager = 1;
9540
9541 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9542 if (shadow && (max_refcnt == 1))
9543 extended->pages_shared_now_private++;
9544
9545 if (!p->fictitious &&
9546 (p->dirty || pmap_is_modified(p->phys_page)))
9547 extended->pages_dirtied++;
9548
9549 extended->pages_resident++;
9550
9551 if(object != caller_object)
9552 vm_object_unlock(object);
9553
9554 return;
9555 }
9556 #if MACH_PAGEMAP
9557 if (object->existence_map) {
9558 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9559
9560 extended->pages_swapped_out++;
9561
9562 if(object != caller_object)
9563 vm_object_unlock(object);
9564
9565 return;
9566 }
9567 } else if (object->internal &&
9568 object->alive &&
9569 !object->terminating &&
9570 object->pager_ready) {
9571
9572 memory_object_t pager;
9573
9574 vm_object_paging_begin(object);
9575 pager = object->pager;
9576 vm_object_unlock(object);
9577
9578 kr = memory_object_data_request(
9579 pager,
9580 offset + object->paging_offset,
9581 0, /* just poke the pager */
9582 VM_PROT_READ,
9583 NULL);
9584
9585 vm_object_lock(object);
9586 vm_object_paging_end(object);
9587
9588 if (kr == KERN_SUCCESS) {
9589 /* the pager has that page */
9590 extended->pages_swapped_out++;
9591 if (object != caller_object)
9592 vm_object_unlock(object);
9593 return;
9594 }
9595 }
9596 #endif /* MACH_PAGEMAP */
9597
9598 if (shadow) {
9599 vm_object_lock(shadow);
9600
9601 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9602 ref_count--;
9603
9604 if (++depth > extended->shadow_depth)
9605 extended->shadow_depth = depth;
9606
9607 if (ref_count > max_refcnt)
9608 max_refcnt = ref_count;
9609
9610 if(object != caller_object)
9611 vm_object_unlock(object);
9612
9613 offset = offset + object->shadow_offset;
9614 object = shadow;
9615 shadow = object->shadow;
9616 continue;
9617 }
9618 if(object != caller_object)
9619 vm_object_unlock(object);
9620 break;
9621 }
9622 }
9623
9624 static int
9625 vm_map_region_count_obj_refs(
9626 vm_map_entry_t entry,
9627 vm_object_t object)
9628 {
9629 register int ref_count;
9630 register vm_object_t chk_obj;
9631 register vm_object_t tmp_obj;
9632
9633 if (entry->object.vm_object == 0)
9634 return(0);
9635
9636 if (entry->is_sub_map)
9637 return(0);
9638 else {
9639 ref_count = 0;
9640
9641 chk_obj = entry->object.vm_object;
9642 vm_object_lock(chk_obj);
9643
9644 while (chk_obj) {
9645 if (chk_obj == object)
9646 ref_count++;
9647 tmp_obj = chk_obj->shadow;
9648 if (tmp_obj)
9649 vm_object_lock(tmp_obj);
9650 vm_object_unlock(chk_obj);
9651
9652 chk_obj = tmp_obj;
9653 }
9654 }
9655 return(ref_count);
9656 }
9657
9658
9659 /*
9660 * Routine: vm_map_simplify
9661 *
9662 * Description:
9663 * Attempt to simplify the map representation in
9664 * the vicinity of the given starting address.
9665 * Note:
9666 * This routine is intended primarily to keep the
9667 * kernel maps more compact -- they generally don't
9668 * benefit from the "expand a map entry" technology
9669 * at allocation time because the adjacent entry
9670 * is often wired down.
9671 */
9672 void
9673 vm_map_simplify_entry(
9674 vm_map_t map,
9675 vm_map_entry_t this_entry)
9676 {
9677 vm_map_entry_t prev_entry;
9678
9679 counter(c_vm_map_simplify_entry_called++);
9680
9681 prev_entry = this_entry->vme_prev;
9682
9683 if ((this_entry != vm_map_to_entry(map)) &&
9684 (prev_entry != vm_map_to_entry(map)) &&
9685
9686 (prev_entry->vme_end == this_entry->vme_start) &&
9687
9688 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9689
9690 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9691 ((prev_entry->offset + (prev_entry->vme_end -
9692 prev_entry->vme_start))
9693 == this_entry->offset) &&
9694
9695 (prev_entry->inheritance == this_entry->inheritance) &&
9696 (prev_entry->protection == this_entry->protection) &&
9697 (prev_entry->max_protection == this_entry->max_protection) &&
9698 (prev_entry->behavior == this_entry->behavior) &&
9699 (prev_entry->alias == this_entry->alias) &&
9700 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9701 (prev_entry->no_cache == this_entry->no_cache) &&
9702 (prev_entry->wired_count == this_entry->wired_count) &&
9703 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9704
9705 (prev_entry->needs_copy == this_entry->needs_copy) &&
9706 (prev_entry->permanent == this_entry->permanent) &&
9707
9708 (prev_entry->use_pmap == FALSE) &&
9709 (this_entry->use_pmap == FALSE) &&
9710 (prev_entry->in_transition == FALSE) &&
9711 (this_entry->in_transition == FALSE) &&
9712 (prev_entry->needs_wakeup == FALSE) &&
9713 (this_entry->needs_wakeup == FALSE) &&
9714 (prev_entry->is_shared == FALSE) &&
9715 (this_entry->is_shared == FALSE)
9716 ) {
9717 _vm_map_entry_unlink(&map->hdr, prev_entry);
9718 this_entry->vme_start = prev_entry->vme_start;
9719 this_entry->offset = prev_entry->offset;
9720 if (prev_entry->is_sub_map) {
9721 vm_map_deallocate(prev_entry->object.sub_map);
9722 } else {
9723 vm_object_deallocate(prev_entry->object.vm_object);
9724 }
9725 vm_map_entry_dispose(map, prev_entry);
9726 SAVE_HINT_MAP_WRITE(map, this_entry);
9727 counter(c_vm_map_simplified++);
9728 }
9729 }
9730
9731 void
9732 vm_map_simplify(
9733 vm_map_t map,
9734 vm_map_offset_t start)
9735 {
9736 vm_map_entry_t this_entry;
9737
9738 vm_map_lock(map);
9739 if (vm_map_lookup_entry(map, start, &this_entry)) {
9740 vm_map_simplify_entry(map, this_entry);
9741 vm_map_simplify_entry(map, this_entry->vme_next);
9742 }
9743 counter(c_vm_map_simplify_called++);
9744 vm_map_unlock(map);
9745 }
9746
9747 static void
9748 vm_map_simplify_range(
9749 vm_map_t map,
9750 vm_map_offset_t start,
9751 vm_map_offset_t end)
9752 {
9753 vm_map_entry_t entry;
9754
9755 /*
9756 * The map should be locked (for "write") by the caller.
9757 */
9758
9759 if (start >= end) {
9760 /* invalid address range */
9761 return;
9762 }
9763
9764 start = vm_map_trunc_page(start);
9765 end = vm_map_round_page(end);
9766
9767 if (!vm_map_lookup_entry(map, start, &entry)) {
9768 /* "start" is not mapped and "entry" ends before "start" */
9769 if (entry == vm_map_to_entry(map)) {
9770 /* start with first entry in the map */
9771 entry = vm_map_first_entry(map);
9772 } else {
9773 /* start with next entry */
9774 entry = entry->vme_next;
9775 }
9776 }
9777
9778 while (entry != vm_map_to_entry(map) &&
9779 entry->vme_start <= end) {
9780 /* try and coalesce "entry" with its previous entry */
9781 vm_map_simplify_entry(map, entry);
9782 entry = entry->vme_next;
9783 }
9784 }
9785
9786
9787 /*
9788 * Routine: vm_map_machine_attribute
9789 * Purpose:
9790 * Provide machine-specific attributes to mappings,
9791 * such as cachability etc. for machines that provide
9792 * them. NUMA architectures and machines with big/strange
9793 * caches will use this.
9794 * Note:
9795 * Responsibilities for locking and checking are handled here,
9796 * everything else in the pmap module. If any non-volatile
9797 * information must be kept, the pmap module should handle
9798 * it itself. [This assumes that attributes do not
9799 * need to be inherited, which seems ok to me]
9800 */
9801 kern_return_t
9802 vm_map_machine_attribute(
9803 vm_map_t map,
9804 vm_map_offset_t start,
9805 vm_map_offset_t end,
9806 vm_machine_attribute_t attribute,
9807 vm_machine_attribute_val_t* value) /* IN/OUT */
9808 {
9809 kern_return_t ret;
9810 vm_map_size_t sync_size;
9811 vm_map_entry_t entry;
9812
9813 if (start < vm_map_min(map) || end > vm_map_max(map))
9814 return KERN_INVALID_ADDRESS;
9815
9816 /* Figure how much memory we need to flush (in page increments) */
9817 sync_size = end - start;
9818
9819 vm_map_lock(map);
9820
9821 if (attribute != MATTR_CACHE) {
9822 /* If we don't have to find physical addresses, we */
9823 /* don't have to do an explicit traversal here. */
9824 ret = pmap_attribute(map->pmap, start, end-start,
9825 attribute, value);
9826 vm_map_unlock(map);
9827 return ret;
9828 }
9829
9830 ret = KERN_SUCCESS; /* Assume it all worked */
9831
9832 while(sync_size) {
9833 if (vm_map_lookup_entry(map, start, &entry)) {
9834 vm_map_size_t sub_size;
9835 if((entry->vme_end - start) > sync_size) {
9836 sub_size = sync_size;
9837 sync_size = 0;
9838 } else {
9839 sub_size = entry->vme_end - start;
9840 sync_size -= sub_size;
9841 }
9842 if(entry->is_sub_map) {
9843 vm_map_offset_t sub_start;
9844 vm_map_offset_t sub_end;
9845
9846 sub_start = (start - entry->vme_start)
9847 + entry->offset;
9848 sub_end = sub_start + sub_size;
9849 vm_map_machine_attribute(
9850 entry->object.sub_map,
9851 sub_start,
9852 sub_end,
9853 attribute, value);
9854 } else {
9855 if(entry->object.vm_object) {
9856 vm_page_t m;
9857 vm_object_t object;
9858 vm_object_t base_object;
9859 vm_object_t last_object;
9860 vm_object_offset_t offset;
9861 vm_object_offset_t base_offset;
9862 vm_map_size_t range;
9863 range = sub_size;
9864 offset = (start - entry->vme_start)
9865 + entry->offset;
9866 base_offset = offset;
9867 object = entry->object.vm_object;
9868 base_object = object;
9869 last_object = NULL;
9870
9871 vm_object_lock(object);
9872
9873 while (range) {
9874 m = vm_page_lookup(
9875 object, offset);
9876
9877 if (m && !m->fictitious) {
9878 ret =
9879 pmap_attribute_cache_sync(
9880 m->phys_page,
9881 PAGE_SIZE,
9882 attribute, value);
9883
9884 } else if (object->shadow) {
9885 offset = offset + object->shadow_offset;
9886 last_object = object;
9887 object = object->shadow;
9888 vm_object_lock(last_object->shadow);
9889 vm_object_unlock(last_object);
9890 continue;
9891 }
9892 range -= PAGE_SIZE;
9893
9894 if (base_object != object) {
9895 vm_object_unlock(object);
9896 vm_object_lock(base_object);
9897 object = base_object;
9898 }
9899 /* Bump to the next page */
9900 base_offset += PAGE_SIZE;
9901 offset = base_offset;
9902 }
9903 vm_object_unlock(object);
9904 }
9905 }
9906 start += sub_size;
9907 } else {
9908 vm_map_unlock(map);
9909 return KERN_FAILURE;
9910 }
9911
9912 }
9913
9914 vm_map_unlock(map);
9915
9916 return ret;
9917 }
9918
9919 /*
9920 * vm_map_behavior_set:
9921 *
9922 * Sets the paging reference behavior of the specified address
9923 * range in the target map. Paging reference behavior affects
9924 * how pagein operations resulting from faults on the map will be
9925 * clustered.
9926 */
9927 kern_return_t
9928 vm_map_behavior_set(
9929 vm_map_t map,
9930 vm_map_offset_t start,
9931 vm_map_offset_t end,
9932 vm_behavior_t new_behavior)
9933 {
9934 register vm_map_entry_t entry;
9935 vm_map_entry_t temp_entry;
9936
9937 XPR(XPR_VM_MAP,
9938 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9939 map, start, end, new_behavior, 0);
9940
9941 switch (new_behavior) {
9942
9943 /*
9944 * This first block of behaviors all set a persistent state on the specified
9945 * memory range. All we have to do here is to record the desired behavior
9946 * in the vm_map_entry_t's.
9947 */
9948
9949 case VM_BEHAVIOR_DEFAULT:
9950 case VM_BEHAVIOR_RANDOM:
9951 case VM_BEHAVIOR_SEQUENTIAL:
9952 case VM_BEHAVIOR_RSEQNTL:
9953 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
9954 vm_map_lock(map);
9955
9956 /*
9957 * The entire address range must be valid for the map.
9958 * Note that vm_map_range_check() does a
9959 * vm_map_lookup_entry() internally and returns the
9960 * entry containing the start of the address range if
9961 * the entire range is valid.
9962 */
9963 if (vm_map_range_check(map, start, end, &temp_entry)) {
9964 entry = temp_entry;
9965 vm_map_clip_start(map, entry, start);
9966 }
9967 else {
9968 vm_map_unlock(map);
9969 return(KERN_INVALID_ADDRESS);
9970 }
9971
9972 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9973 vm_map_clip_end(map, entry, end);
9974 assert(!entry->use_pmap);
9975
9976 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
9977 entry->zero_wired_pages = TRUE;
9978 } else {
9979 entry->behavior = new_behavior;
9980 }
9981 entry = entry->vme_next;
9982 }
9983
9984 vm_map_unlock(map);
9985 break;
9986
9987 /*
9988 * The rest of these are different from the above in that they cause
9989 * an immediate action to take place as opposed to setting a behavior that
9990 * affects future actions.
9991 */
9992
9993 case VM_BEHAVIOR_WILLNEED:
9994 return vm_map_willneed(map, start, end);
9995
9996 case VM_BEHAVIOR_DONTNEED:
9997 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
9998
9999 case VM_BEHAVIOR_FREE:
10000 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10001
10002 case VM_BEHAVIOR_REUSABLE:
10003 return vm_map_reusable_pages(map, start, end);
10004
10005 case VM_BEHAVIOR_REUSE:
10006 return vm_map_reuse_pages(map, start, end);
10007
10008 case VM_BEHAVIOR_CAN_REUSE:
10009 return vm_map_can_reuse(map, start, end);
10010
10011 default:
10012 return(KERN_INVALID_ARGUMENT);
10013 }
10014
10015 return(KERN_SUCCESS);
10016 }
10017
10018
10019 /*
10020 * Internals for madvise(MADV_WILLNEED) system call.
10021 *
10022 * The present implementation is to do a read-ahead if the mapping corresponds
10023 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
10024 * and basically ignore the "advice" (which we are always free to do).
10025 */
10026
10027
10028 static kern_return_t
10029 vm_map_willneed(
10030 vm_map_t map,
10031 vm_map_offset_t start,
10032 vm_map_offset_t end
10033 )
10034 {
10035 vm_map_entry_t entry;
10036 vm_object_t object;
10037 memory_object_t pager;
10038 struct vm_object_fault_info fault_info;
10039 kern_return_t kr;
10040 vm_object_size_t len;
10041 vm_object_offset_t offset;
10042
10043 /*
10044 * Fill in static values in fault_info. Several fields get ignored by the code
10045 * we call, but we'll fill them in anyway since uninitialized fields are bad
10046 * when it comes to future backwards compatibility.
10047 */
10048
10049 fault_info.interruptible = THREAD_UNINT; /* ignored value */
10050 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
10051 fault_info.no_cache = FALSE; /* ignored value */
10052 fault_info.stealth = TRUE;
10053
10054 /*
10055 * The MADV_WILLNEED operation doesn't require any changes to the
10056 * vm_map_entry_t's, so the read lock is sufficient.
10057 */
10058
10059 vm_map_lock_read(map);
10060
10061 /*
10062 * The madvise semantics require that the address range be fully
10063 * allocated with no holes. Otherwise, we're required to return
10064 * an error.
10065 */
10066
10067 if (vm_map_range_check(map, start, end, &entry)) {
10068
10069 /*
10070 * Examine each vm_map_entry_t in the range.
10071 */
10072
10073 for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
10074
10075 /*
10076 * The first time through, the start address could be anywhere within the
10077 * vm_map_entry we found. So adjust the offset to correspond. After that,
10078 * the offset will always be zero to correspond to the beginning of the current
10079 * vm_map_entry.
10080 */
10081
10082 offset = (start - entry->vme_start) + entry->offset;
10083
10084 /*
10085 * Set the length so we don't go beyond the end of the map_entry or beyond the
10086 * end of the range we were given. This range could span also multiple map
10087 * entries all of which map different files, so make sure we only do the right
10088 * amount of I/O for each object. Note that it's possible for there to be
10089 * multiple map entries all referring to the same object but with different
10090 * page permissions, but it's not worth trying to optimize that case.
10091 */
10092
10093 len = MIN(entry->vme_end - start, end - start);
10094
10095 if ((vm_size_t) len != len) {
10096 /* 32-bit overflow */
10097 len = (vm_size_t) (0 - PAGE_SIZE);
10098 }
10099 fault_info.cluster_size = (vm_size_t) len;
10100 fault_info.lo_offset = offset;
10101 fault_info.hi_offset = offset + len;
10102 fault_info.user_tag = entry->alias;
10103
10104 /*
10105 * If there's no read permission to this mapping, then just skip it.
10106 */
10107
10108 if ((entry->protection & VM_PROT_READ) == 0) {
10109 continue;
10110 }
10111
10112 /*
10113 * Find the file object backing this map entry. If there is none,
10114 * then we simply ignore the "will need" advice for this entry and
10115 * go on to the next one.
10116 */
10117
10118 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10119 continue;
10120 }
10121
10122 vm_object_paging_begin(object);
10123 pager = object->pager;
10124 vm_object_unlock(object);
10125
10126 /*
10127 * Get the data from the object asynchronously.
10128 *
10129 * Note that memory_object_data_request() places limits on the amount
10130 * of I/O it will do. Regardless of the len we specified, it won't do
10131 * more than MAX_UPL_TRANSFER and it silently truncates the len to that
10132 * size. This isn't necessarily bad since madvise shouldn't really be
10133 * used to page in unlimited amounts of data. Other Unix variants limit
10134 * the willneed case as well. If this turns out to be an issue for
10135 * developers, then we can always adjust the policy here and still be
10136 * backwards compatible since this is all just "advice".
10137 */
10138
10139 kr = memory_object_data_request(
10140 pager,
10141 offset + object->paging_offset,
10142 0, /* ignored */
10143 VM_PROT_READ,
10144 (memory_object_fault_info_t)&fault_info);
10145
10146 vm_object_lock(object);
10147 vm_object_paging_end(object);
10148 vm_object_unlock(object);
10149
10150 /*
10151 * If we couldn't do the I/O for some reason, just give up on the
10152 * madvise. We still return success to the user since madvise isn't
10153 * supposed to fail when the advice can't be taken.
10154 */
10155
10156 if (kr != KERN_SUCCESS) {
10157 break;
10158 }
10159 }
10160
10161 kr = KERN_SUCCESS;
10162 } else
10163 kr = KERN_INVALID_ADDRESS;
10164
10165 vm_map_unlock_read(map);
10166 return kr;
10167 }
10168
10169 static boolean_t
10170 vm_map_entry_is_reusable(
10171 vm_map_entry_t entry)
10172 {
10173 vm_object_t object;
10174
10175 if (entry->is_shared ||
10176 entry->is_sub_map ||
10177 entry->in_transition ||
10178 entry->protection != VM_PROT_DEFAULT ||
10179 entry->max_protection != VM_PROT_ALL ||
10180 entry->inheritance != VM_INHERIT_DEFAULT ||
10181 entry->no_cache ||
10182 entry->permanent ||
10183 entry->superpage_size != 0 ||
10184 entry->zero_wired_pages ||
10185 entry->wired_count != 0 ||
10186 entry->user_wired_count != 0) {
10187 return FALSE;
10188 }
10189
10190 object = entry->object.vm_object;
10191 if (object == VM_OBJECT_NULL) {
10192 return TRUE;
10193 }
10194 if (object->ref_count == 1 &&
10195 object->wired_page_count == 0 &&
10196 object->copy == VM_OBJECT_NULL &&
10197 object->shadow == VM_OBJECT_NULL &&
10198 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10199 object->internal &&
10200 !object->true_share &&
10201 object->wimg_bits == VM_WIMG_DEFAULT &&
10202 !object->code_signed) {
10203 return TRUE;
10204 }
10205 return FALSE;
10206
10207
10208 }
10209
10210 static kern_return_t
10211 vm_map_reuse_pages(
10212 vm_map_t map,
10213 vm_map_offset_t start,
10214 vm_map_offset_t end)
10215 {
10216 vm_map_entry_t entry;
10217 vm_object_t object;
10218 vm_object_offset_t start_offset, end_offset;
10219
10220 /*
10221 * The MADV_REUSE operation doesn't require any changes to the
10222 * vm_map_entry_t's, so the read lock is sufficient.
10223 */
10224
10225 vm_map_lock_read(map);
10226
10227 /*
10228 * The madvise semantics require that the address range be fully
10229 * allocated with no holes. Otherwise, we're required to return
10230 * an error.
10231 */
10232
10233 if (!vm_map_range_check(map, start, end, &entry)) {
10234 vm_map_unlock_read(map);
10235 vm_page_stats_reusable.reuse_pages_failure++;
10236 return KERN_INVALID_ADDRESS;
10237 }
10238
10239 /*
10240 * Examine each vm_map_entry_t in the range.
10241 */
10242 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10243 entry = entry->vme_next) {
10244 /*
10245 * Sanity check on the VM map entry.
10246 */
10247 if (! vm_map_entry_is_reusable(entry)) {
10248 vm_map_unlock_read(map);
10249 vm_page_stats_reusable.reuse_pages_failure++;
10250 return KERN_INVALID_ADDRESS;
10251 }
10252
10253 /*
10254 * The first time through, the start address could be anywhere
10255 * within the vm_map_entry we found. So adjust the offset to
10256 * correspond.
10257 */
10258 if (entry->vme_start < start) {
10259 start_offset = start - entry->vme_start;
10260 } else {
10261 start_offset = 0;
10262 }
10263 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10264 start_offset += entry->offset;
10265 end_offset += entry->offset;
10266
10267 object = entry->object.vm_object;
10268 if (object != VM_OBJECT_NULL) {
10269 vm_object_lock(object);
10270 vm_object_reuse_pages(object, start_offset, end_offset,
10271 TRUE);
10272 vm_object_unlock(object);
10273 }
10274
10275 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10276 /*
10277 * XXX
10278 * We do not hold the VM map exclusively here.
10279 * The "alias" field is not that critical, so it's
10280 * safe to update it here, as long as it is the only
10281 * one that can be modified while holding the VM map
10282 * "shared".
10283 */
10284 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10285 }
10286 }
10287
10288 vm_map_unlock_read(map);
10289 vm_page_stats_reusable.reuse_pages_success++;
10290 return KERN_SUCCESS;
10291 }
10292
10293
10294 static kern_return_t
10295 vm_map_reusable_pages(
10296 vm_map_t map,
10297 vm_map_offset_t start,
10298 vm_map_offset_t end)
10299 {
10300 vm_map_entry_t entry;
10301 vm_object_t object;
10302 vm_object_offset_t start_offset, end_offset;
10303
10304 /*
10305 * The MADV_REUSABLE operation doesn't require any changes to the
10306 * vm_map_entry_t's, so the read lock is sufficient.
10307 */
10308
10309 vm_map_lock_read(map);
10310
10311 /*
10312 * The madvise semantics require that the address range be fully
10313 * allocated with no holes. Otherwise, we're required to return
10314 * an error.
10315 */
10316
10317 if (!vm_map_range_check(map, start, end, &entry)) {
10318 vm_map_unlock_read(map);
10319 vm_page_stats_reusable.reusable_pages_failure++;
10320 return KERN_INVALID_ADDRESS;
10321 }
10322
10323 /*
10324 * Examine each vm_map_entry_t in the range.
10325 */
10326 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10327 entry = entry->vme_next) {
10328 int kill_pages = 0;
10329
10330 /*
10331 * Sanity check on the VM map entry.
10332 */
10333 if (! vm_map_entry_is_reusable(entry)) {
10334 vm_map_unlock_read(map);
10335 vm_page_stats_reusable.reusable_pages_failure++;
10336 return KERN_INVALID_ADDRESS;
10337 }
10338
10339 /*
10340 * The first time through, the start address could be anywhere
10341 * within the vm_map_entry we found. So adjust the offset to
10342 * correspond.
10343 */
10344 if (entry->vme_start < start) {
10345 start_offset = start - entry->vme_start;
10346 } else {
10347 start_offset = 0;
10348 }
10349 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10350 start_offset += entry->offset;
10351 end_offset += entry->offset;
10352
10353 object = entry->object.vm_object;
10354 if (object == VM_OBJECT_NULL)
10355 continue;
10356
10357
10358 vm_object_lock(object);
10359 if (object->ref_count == 1 && !object->shadow)
10360 kill_pages = 1;
10361 else
10362 kill_pages = -1;
10363 if (kill_pages != -1) {
10364 vm_object_deactivate_pages(object,
10365 start_offset,
10366 end_offset - start_offset,
10367 kill_pages,
10368 TRUE /*reusable_pages*/);
10369 } else {
10370 vm_page_stats_reusable.reusable_pages_shared++;
10371 }
10372 vm_object_unlock(object);
10373
10374 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10375 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10376 /*
10377 * XXX
10378 * We do not hold the VM map exclusively here.
10379 * The "alias" field is not that critical, so it's
10380 * safe to update it here, as long as it is the only
10381 * one that can be modified while holding the VM map
10382 * "shared".
10383 */
10384 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10385 }
10386 }
10387
10388 vm_map_unlock_read(map);
10389 vm_page_stats_reusable.reusable_pages_success++;
10390 return KERN_SUCCESS;
10391 }
10392
10393
10394 static kern_return_t
10395 vm_map_can_reuse(
10396 vm_map_t map,
10397 vm_map_offset_t start,
10398 vm_map_offset_t end)
10399 {
10400 vm_map_entry_t entry;
10401
10402 /*
10403 * The MADV_REUSABLE operation doesn't require any changes to the
10404 * vm_map_entry_t's, so the read lock is sufficient.
10405 */
10406
10407 vm_map_lock_read(map);
10408
10409 /*
10410 * The madvise semantics require that the address range be fully
10411 * allocated with no holes. Otherwise, we're required to return
10412 * an error.
10413 */
10414
10415 if (!vm_map_range_check(map, start, end, &entry)) {
10416 vm_map_unlock_read(map);
10417 vm_page_stats_reusable.can_reuse_failure++;
10418 return KERN_INVALID_ADDRESS;
10419 }
10420
10421 /*
10422 * Examine each vm_map_entry_t in the range.
10423 */
10424 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10425 entry = entry->vme_next) {
10426 /*
10427 * Sanity check on the VM map entry.
10428 */
10429 if (! vm_map_entry_is_reusable(entry)) {
10430 vm_map_unlock_read(map);
10431 vm_page_stats_reusable.can_reuse_failure++;
10432 return KERN_INVALID_ADDRESS;
10433 }
10434 }
10435
10436 vm_map_unlock_read(map);
10437 vm_page_stats_reusable.can_reuse_success++;
10438 return KERN_SUCCESS;
10439 }
10440
10441
10442
10443 #include <mach_kdb.h>
10444 #if MACH_KDB
10445 #include <ddb/db_output.h>
10446 #include <vm/vm_print.h>
10447
10448 #define printf db_printf
10449
10450 /*
10451 * Forward declarations for internal functions.
10452 */
10453 extern void vm_map_links_print(
10454 struct vm_map_links *links);
10455
10456 extern void vm_map_header_print(
10457 struct vm_map_header *header);
10458
10459 extern void vm_map_entry_print(
10460 vm_map_entry_t entry);
10461
10462 extern void vm_follow_entry(
10463 vm_map_entry_t entry);
10464
10465 extern void vm_follow_map(
10466 vm_map_t map);
10467
10468 /*
10469 * vm_map_links_print: [ debug ]
10470 */
10471 void
10472 vm_map_links_print(
10473 struct vm_map_links *links)
10474 {
10475 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n",
10476 links->prev,
10477 links->next,
10478 (unsigned long long)links->start,
10479 (unsigned long long)links->end);
10480 }
10481
10482 /*
10483 * vm_map_header_print: [ debug ]
10484 */
10485 void
10486 vm_map_header_print(
10487 struct vm_map_header *header)
10488 {
10489 vm_map_links_print(&header->links);
10490 iprintf("nentries = %08X, %sentries_pageable\n",
10491 header->nentries,
10492 (header->entries_pageable ? "" : "!"));
10493 }
10494
10495 /*
10496 * vm_follow_entry: [ debug ]
10497 */
10498 void
10499 vm_follow_entry(
10500 vm_map_entry_t entry)
10501 {
10502 int shadows;
10503
10504 iprintf("map entry %08X\n", entry);
10505
10506 db_indent += 2;
10507
10508 shadows = vm_follow_object(entry->object.vm_object);
10509 iprintf("Total objects : %d\n",shadows);
10510
10511 db_indent -= 2;
10512 }
10513
10514 /*
10515 * vm_map_entry_print: [ debug ]
10516 */
10517 void
10518 vm_map_entry_print(
10519 register vm_map_entry_t entry)
10520 {
10521 static const char *inheritance_name[4] =
10522 { "share", "copy", "none", "?"};
10523 static const char *behavior_name[4] =
10524 { "dflt", "rand", "seqtl", "rseqntl" };
10525
10526 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10527
10528 db_indent += 2;
10529
10530 vm_map_links_print(&entry->links);
10531
10532 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n",
10533 (unsigned long long)entry->vme_start,
10534 (unsigned long long)entry->vme_end,
10535 entry->protection,
10536 entry->max_protection,
10537 inheritance_name[(entry->inheritance & 0x3)]);
10538
10539 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10540 behavior_name[(entry->behavior & 0x3)],
10541 entry->wired_count,
10542 entry->user_wired_count);
10543 iprintf("%sin_transition, %sneeds_wakeup\n",
10544 (entry->in_transition ? "" : "!"),
10545 (entry->needs_wakeup ? "" : "!"));
10546
10547 if (entry->is_sub_map) {
10548 iprintf("submap = %08X - offset = %016llX\n",
10549 entry->object.sub_map,
10550 (unsigned long long)entry->offset);
10551 } else {
10552 iprintf("object = %08X offset = %016llX - ",
10553 entry->object.vm_object,
10554 (unsigned long long)entry->offset);
10555 printf("%sis_shared, %sneeds_copy\n",
10556 (entry->is_shared ? "" : "!"),
10557 (entry->needs_copy ? "" : "!"));
10558 }
10559
10560 db_indent -= 2;
10561 }
10562
10563 /*
10564 * vm_follow_map: [ debug ]
10565 */
10566 void
10567 vm_follow_map(
10568 vm_map_t map)
10569 {
10570 register vm_map_entry_t entry;
10571
10572 iprintf("task map %08X\n", map);
10573
10574 db_indent += 2;
10575
10576 for (entry = vm_map_first_entry(map);
10577 entry && entry != vm_map_to_entry(map);
10578 entry = entry->vme_next) {
10579 vm_follow_entry(entry);
10580 }
10581
10582 db_indent -= 2;
10583 }
10584
10585 /*
10586 * vm_map_print: [ debug ]
10587 */
10588 void
10589 vm_map_print(
10590 db_addr_t inmap)
10591 {
10592 register vm_map_entry_t entry;
10593 vm_map_t map;
10594 #if TASK_SWAPPER
10595 char *swstate;
10596 #endif /* TASK_SWAPPER */
10597
10598 map = (vm_map_t)(long)
10599 inmap; /* Make sure we have the right type */
10600
10601 iprintf("task map %08X\n", map);
10602
10603 db_indent += 2;
10604
10605 vm_map_header_print(&map->hdr);
10606
10607 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n",
10608 map->pmap,
10609 map->size,
10610 map->ref_count,
10611 map->hint,
10612 map->first_free);
10613
10614 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10615 (map->wait_for_space ? "" : "!"),
10616 (map->wiring_required ? "" : "!"),
10617 map->timestamp);
10618
10619 #if TASK_SWAPPER
10620 switch (map->sw_state) {
10621 case MAP_SW_IN:
10622 swstate = "SW_IN";
10623 break;
10624 case MAP_SW_OUT:
10625 swstate = "SW_OUT";
10626 break;
10627 default:
10628 swstate = "????";
10629 break;
10630 }
10631 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10632 #endif /* TASK_SWAPPER */
10633
10634 for (entry = vm_map_first_entry(map);
10635 entry && entry != vm_map_to_entry(map);
10636 entry = entry->vme_next) {
10637 vm_map_entry_print(entry);
10638 }
10639
10640 db_indent -= 2;
10641 }
10642
10643 /*
10644 * Routine: vm_map_copy_print
10645 * Purpose:
10646 * Pretty-print a copy object for ddb.
10647 */
10648
10649 void
10650 vm_map_copy_print(
10651 db_addr_t incopy)
10652 {
10653 vm_map_copy_t copy;
10654 vm_map_entry_t entry;
10655
10656 copy = (vm_map_copy_t)(long)
10657 incopy; /* Make sure we have the right type */
10658
10659 printf("copy object 0x%x\n", copy);
10660
10661 db_indent += 2;
10662
10663 iprintf("type=%d", copy->type);
10664 switch (copy->type) {
10665 case VM_MAP_COPY_ENTRY_LIST:
10666 printf("[entry_list]");
10667 break;
10668
10669 case VM_MAP_COPY_OBJECT:
10670 printf("[object]");
10671 break;
10672
10673 case VM_MAP_COPY_KERNEL_BUFFER:
10674 printf("[kernel_buffer]");
10675 break;
10676
10677 default:
10678 printf("[bad type]");
10679 break;
10680 }
10681 printf(", offset=0x%llx", (unsigned long long)copy->offset);
10682 printf(", size=0x%x\n", copy->size);
10683
10684 switch (copy->type) {
10685 case VM_MAP_COPY_ENTRY_LIST:
10686 vm_map_header_print(&copy->cpy_hdr);
10687 for (entry = vm_map_copy_first_entry(copy);
10688 entry && entry != vm_map_copy_to_entry(copy);
10689 entry = entry->vme_next) {
10690 vm_map_entry_print(entry);
10691 }
10692 break;
10693
10694 case VM_MAP_COPY_OBJECT:
10695 iprintf("object=0x%x\n", copy->cpy_object);
10696 break;
10697
10698 case VM_MAP_COPY_KERNEL_BUFFER:
10699 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10700 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10701 break;
10702
10703 }
10704
10705 db_indent -=2;
10706 }
10707
10708 /*
10709 * db_vm_map_total_size(map) [ debug ]
10710 *
10711 * return the total virtual size (in bytes) of the map
10712 */
10713 vm_map_size_t
10714 db_vm_map_total_size(
10715 db_addr_t inmap)
10716 {
10717 vm_map_entry_t entry;
10718 vm_map_size_t total;
10719 vm_map_t map;
10720
10721 map = (vm_map_t)(long)
10722 inmap; /* Make sure we have the right type */
10723
10724 total = 0;
10725 for (entry = vm_map_first_entry(map);
10726 entry != vm_map_to_entry(map);
10727 entry = entry->vme_next) {
10728 total += entry->vme_end - entry->vme_start;
10729 }
10730
10731 return total;
10732 }
10733
10734 #endif /* MACH_KDB */
10735
10736 /*
10737 * Routine: vm_map_entry_insert
10738 *
10739 * Descritpion: This routine inserts a new vm_entry in a locked map.
10740 */
10741 vm_map_entry_t
10742 vm_map_entry_insert(
10743 vm_map_t map,
10744 vm_map_entry_t insp_entry,
10745 vm_map_offset_t start,
10746 vm_map_offset_t end,
10747 vm_object_t object,
10748 vm_object_offset_t offset,
10749 boolean_t needs_copy,
10750 boolean_t is_shared,
10751 boolean_t in_transition,
10752 vm_prot_t cur_protection,
10753 vm_prot_t max_protection,
10754 vm_behavior_t behavior,
10755 vm_inherit_t inheritance,
10756 unsigned wired_count,
10757 boolean_t no_cache,
10758 boolean_t permanent,
10759 unsigned int superpage_size)
10760 {
10761 vm_map_entry_t new_entry;
10762
10763 assert(insp_entry != (vm_map_entry_t)0);
10764
10765 new_entry = vm_map_entry_create(map);
10766
10767 new_entry->vme_start = start;
10768 new_entry->vme_end = end;
10769 assert(page_aligned(new_entry->vme_start));
10770 assert(page_aligned(new_entry->vme_end));
10771
10772 new_entry->object.vm_object = object;
10773 new_entry->offset = offset;
10774 new_entry->is_shared = is_shared;
10775 new_entry->is_sub_map = FALSE;
10776 new_entry->needs_copy = needs_copy;
10777 new_entry->in_transition = in_transition;
10778 new_entry->needs_wakeup = FALSE;
10779 new_entry->inheritance = inheritance;
10780 new_entry->protection = cur_protection;
10781 new_entry->max_protection = max_protection;
10782 new_entry->behavior = behavior;
10783 new_entry->wired_count = wired_count;
10784 new_entry->user_wired_count = 0;
10785 new_entry->use_pmap = FALSE;
10786 new_entry->alias = 0;
10787 new_entry->zero_wired_pages = FALSE;
10788 new_entry->no_cache = no_cache;
10789 new_entry->permanent = permanent;
10790 new_entry->superpage_size = superpage_size;
10791
10792 /*
10793 * Insert the new entry into the list.
10794 */
10795
10796 vm_map_entry_link(map, insp_entry, new_entry);
10797 map->size += end - start;
10798
10799 /*
10800 * Update the free space hint and the lookup hint.
10801 */
10802
10803 SAVE_HINT_MAP_WRITE(map, new_entry);
10804 return new_entry;
10805 }
10806
10807 /*
10808 * Routine: vm_map_remap_extract
10809 *
10810 * Descritpion: This routine returns a vm_entry list from a map.
10811 */
10812 static kern_return_t
10813 vm_map_remap_extract(
10814 vm_map_t map,
10815 vm_map_offset_t addr,
10816 vm_map_size_t size,
10817 boolean_t copy,
10818 struct vm_map_header *map_header,
10819 vm_prot_t *cur_protection,
10820 vm_prot_t *max_protection,
10821 /* What, no behavior? */
10822 vm_inherit_t inheritance,
10823 boolean_t pageable)
10824 {
10825 kern_return_t result;
10826 vm_map_size_t mapped_size;
10827 vm_map_size_t tmp_size;
10828 vm_map_entry_t src_entry; /* result of last map lookup */
10829 vm_map_entry_t new_entry;
10830 vm_object_offset_t offset;
10831 vm_map_offset_t map_address;
10832 vm_map_offset_t src_start; /* start of entry to map */
10833 vm_map_offset_t src_end; /* end of region to be mapped */
10834 vm_object_t object;
10835 vm_map_version_t version;
10836 boolean_t src_needs_copy;
10837 boolean_t new_entry_needs_copy;
10838
10839 assert(map != VM_MAP_NULL);
10840 assert(size != 0 && size == vm_map_round_page(size));
10841 assert(inheritance == VM_INHERIT_NONE ||
10842 inheritance == VM_INHERIT_COPY ||
10843 inheritance == VM_INHERIT_SHARE);
10844
10845 /*
10846 * Compute start and end of region.
10847 */
10848 src_start = vm_map_trunc_page(addr);
10849 src_end = vm_map_round_page(src_start + size);
10850
10851 /*
10852 * Initialize map_header.
10853 */
10854 map_header->links.next = (struct vm_map_entry *)&map_header->links;
10855 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10856 map_header->nentries = 0;
10857 map_header->entries_pageable = pageable;
10858
10859 *cur_protection = VM_PROT_ALL;
10860 *max_protection = VM_PROT_ALL;
10861
10862 map_address = 0;
10863 mapped_size = 0;
10864 result = KERN_SUCCESS;
10865
10866 /*
10867 * The specified source virtual space might correspond to
10868 * multiple map entries, need to loop on them.
10869 */
10870 vm_map_lock(map);
10871 while (mapped_size != size) {
10872 vm_map_size_t entry_size;
10873
10874 /*
10875 * Find the beginning of the region.
10876 */
10877 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10878 result = KERN_INVALID_ADDRESS;
10879 break;
10880 }
10881
10882 if (src_start < src_entry->vme_start ||
10883 (mapped_size && src_start != src_entry->vme_start)) {
10884 result = KERN_INVALID_ADDRESS;
10885 break;
10886 }
10887
10888 tmp_size = size - mapped_size;
10889 if (src_end > src_entry->vme_end)
10890 tmp_size -= (src_end - src_entry->vme_end);
10891
10892 entry_size = (vm_map_size_t)(src_entry->vme_end -
10893 src_entry->vme_start);
10894
10895 if(src_entry->is_sub_map) {
10896 vm_map_reference(src_entry->object.sub_map);
10897 object = VM_OBJECT_NULL;
10898 } else {
10899 object = src_entry->object.vm_object;
10900
10901 if (object == VM_OBJECT_NULL) {
10902 object = vm_object_allocate(entry_size);
10903 src_entry->offset = 0;
10904 src_entry->object.vm_object = object;
10905 } else if (object->copy_strategy !=
10906 MEMORY_OBJECT_COPY_SYMMETRIC) {
10907 /*
10908 * We are already using an asymmetric
10909 * copy, and therefore we already have
10910 * the right object.
10911 */
10912 assert(!src_entry->needs_copy);
10913 } else if (src_entry->needs_copy || object->shadowed ||
10914 (object->internal && !object->true_share &&
10915 !src_entry->is_shared &&
10916 object->size > entry_size)) {
10917
10918 vm_object_shadow(&src_entry->object.vm_object,
10919 &src_entry->offset,
10920 entry_size);
10921
10922 if (!src_entry->needs_copy &&
10923 (src_entry->protection & VM_PROT_WRITE)) {
10924 vm_prot_t prot;
10925
10926 prot = src_entry->protection & ~VM_PROT_WRITE;
10927
10928 if (override_nx(map, src_entry->alias) && prot)
10929 prot |= VM_PROT_EXECUTE;
10930
10931 if(map->mapped) {
10932 vm_object_pmap_protect(
10933 src_entry->object.vm_object,
10934 src_entry->offset,
10935 entry_size,
10936 PMAP_NULL,
10937 src_entry->vme_start,
10938 prot);
10939 } else {
10940 pmap_protect(vm_map_pmap(map),
10941 src_entry->vme_start,
10942 src_entry->vme_end,
10943 prot);
10944 }
10945 }
10946
10947 object = src_entry->object.vm_object;
10948 src_entry->needs_copy = FALSE;
10949 }
10950
10951
10952 vm_object_lock(object);
10953 vm_object_reference_locked(object); /* object ref. for new entry */
10954 if (object->copy_strategy ==
10955 MEMORY_OBJECT_COPY_SYMMETRIC) {
10956 object->copy_strategy =
10957 MEMORY_OBJECT_COPY_DELAY;
10958 }
10959 vm_object_unlock(object);
10960 }
10961
10962 offset = src_entry->offset + (src_start - src_entry->vme_start);
10963
10964 new_entry = _vm_map_entry_create(map_header);
10965 vm_map_entry_copy(new_entry, src_entry);
10966 new_entry->use_pmap = FALSE; /* clr address space specifics */
10967
10968 new_entry->vme_start = map_address;
10969 new_entry->vme_end = map_address + tmp_size;
10970 new_entry->inheritance = inheritance;
10971 new_entry->offset = offset;
10972
10973 /*
10974 * The new region has to be copied now if required.
10975 */
10976 RestartCopy:
10977 if (!copy) {
10978 src_entry->is_shared = TRUE;
10979 new_entry->is_shared = TRUE;
10980 if (!(new_entry->is_sub_map))
10981 new_entry->needs_copy = FALSE;
10982
10983 } else if (src_entry->is_sub_map) {
10984 /* make this a COW sub_map if not already */
10985 new_entry->needs_copy = TRUE;
10986 object = VM_OBJECT_NULL;
10987 } else if (src_entry->wired_count == 0 &&
10988 vm_object_copy_quickly(&new_entry->object.vm_object,
10989 new_entry->offset,
10990 (new_entry->vme_end -
10991 new_entry->vme_start),
10992 &src_needs_copy,
10993 &new_entry_needs_copy)) {
10994
10995 new_entry->needs_copy = new_entry_needs_copy;
10996 new_entry->is_shared = FALSE;
10997
10998 /*
10999 * Handle copy_on_write semantics.
11000 */
11001 if (src_needs_copy && !src_entry->needs_copy) {
11002 vm_prot_t prot;
11003
11004 prot = src_entry->protection & ~VM_PROT_WRITE;
11005
11006 if (override_nx(map, src_entry->alias) && prot)
11007 prot |= VM_PROT_EXECUTE;
11008
11009 vm_object_pmap_protect(object,
11010 offset,
11011 entry_size,
11012 ((src_entry->is_shared
11013 || map->mapped) ?
11014 PMAP_NULL : map->pmap),
11015 src_entry->vme_start,
11016 prot);
11017
11018 src_entry->needs_copy = TRUE;
11019 }
11020 /*
11021 * Throw away the old object reference of the new entry.
11022 */
11023 vm_object_deallocate(object);
11024
11025 } else {
11026 new_entry->is_shared = FALSE;
11027
11028 /*
11029 * The map can be safely unlocked since we
11030 * already hold a reference on the object.
11031 *
11032 * Record the timestamp of the map for later
11033 * verification, and unlock the map.
11034 */
11035 version.main_timestamp = map->timestamp;
11036 vm_map_unlock(map); /* Increments timestamp once! */
11037
11038 /*
11039 * Perform the copy.
11040 */
11041 if (src_entry->wired_count > 0) {
11042 vm_object_lock(object);
11043 result = vm_object_copy_slowly(
11044 object,
11045 offset,
11046 entry_size,
11047 THREAD_UNINT,
11048 &new_entry->object.vm_object);
11049
11050 new_entry->offset = 0;
11051 new_entry->needs_copy = FALSE;
11052 } else {
11053 result = vm_object_copy_strategically(
11054 object,
11055 offset,
11056 entry_size,
11057 &new_entry->object.vm_object,
11058 &new_entry->offset,
11059 &new_entry_needs_copy);
11060
11061 new_entry->needs_copy = new_entry_needs_copy;
11062 }
11063
11064 /*
11065 * Throw away the old object reference of the new entry.
11066 */
11067 vm_object_deallocate(object);
11068
11069 if (result != KERN_SUCCESS &&
11070 result != KERN_MEMORY_RESTART_COPY) {
11071 _vm_map_entry_dispose(map_header, new_entry);
11072 break;
11073 }
11074
11075 /*
11076 * Verify that the map has not substantially
11077 * changed while the copy was being made.
11078 */
11079
11080 vm_map_lock(map);
11081 if (version.main_timestamp + 1 != map->timestamp) {
11082 /*
11083 * Simple version comparison failed.
11084 *
11085 * Retry the lookup and verify that the
11086 * same object/offset are still present.
11087 */
11088 vm_object_deallocate(new_entry->
11089 object.vm_object);
11090 _vm_map_entry_dispose(map_header, new_entry);
11091 if (result == KERN_MEMORY_RESTART_COPY)
11092 result = KERN_SUCCESS;
11093 continue;
11094 }
11095
11096 if (result == KERN_MEMORY_RESTART_COPY) {
11097 vm_object_reference(object);
11098 goto RestartCopy;
11099 }
11100 }
11101
11102 _vm_map_entry_link(map_header,
11103 map_header->links.prev, new_entry);
11104
11105 *cur_protection &= src_entry->protection;
11106 *max_protection &= src_entry->max_protection;
11107
11108 map_address += tmp_size;
11109 mapped_size += tmp_size;
11110 src_start += tmp_size;
11111
11112 } /* end while */
11113
11114 vm_map_unlock(map);
11115 if (result != KERN_SUCCESS) {
11116 /*
11117 * Free all allocated elements.
11118 */
11119 for (src_entry = map_header->links.next;
11120 src_entry != (struct vm_map_entry *)&map_header->links;
11121 src_entry = new_entry) {
11122 new_entry = src_entry->vme_next;
11123 _vm_map_entry_unlink(map_header, src_entry);
11124 vm_object_deallocate(src_entry->object.vm_object);
11125 _vm_map_entry_dispose(map_header, src_entry);
11126 }
11127 }
11128 return result;
11129 }
11130
11131 /*
11132 * Routine: vm_remap
11133 *
11134 * Map portion of a task's address space.
11135 * Mapped region must not overlap more than
11136 * one vm memory object. Protections and
11137 * inheritance attributes remain the same
11138 * as in the original task and are out parameters.
11139 * Source and Target task can be identical
11140 * Other attributes are identical as for vm_map()
11141 */
11142 kern_return_t
11143 vm_map_remap(
11144 vm_map_t target_map,
11145 vm_map_address_t *address,
11146 vm_map_size_t size,
11147 vm_map_offset_t mask,
11148 boolean_t anywhere,
11149 vm_map_t src_map,
11150 vm_map_offset_t memory_address,
11151 boolean_t copy,
11152 vm_prot_t *cur_protection,
11153 vm_prot_t *max_protection,
11154 vm_inherit_t inheritance)
11155 {
11156 kern_return_t result;
11157 vm_map_entry_t entry;
11158 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
11159 vm_map_entry_t new_entry;
11160 struct vm_map_header map_header;
11161
11162 if (target_map == VM_MAP_NULL)
11163 return KERN_INVALID_ARGUMENT;
11164
11165 switch (inheritance) {
11166 case VM_INHERIT_NONE:
11167 case VM_INHERIT_COPY:
11168 case VM_INHERIT_SHARE:
11169 if (size != 0 && src_map != VM_MAP_NULL)
11170 break;
11171 /*FALL THRU*/
11172 default:
11173 return KERN_INVALID_ARGUMENT;
11174 }
11175
11176 size = vm_map_round_page(size);
11177
11178 result = vm_map_remap_extract(src_map, memory_address,
11179 size, copy, &map_header,
11180 cur_protection,
11181 max_protection,
11182 inheritance,
11183 target_map->hdr.
11184 entries_pageable);
11185
11186 if (result != KERN_SUCCESS) {
11187 return result;
11188 }
11189
11190 /*
11191 * Allocate/check a range of free virtual address
11192 * space for the target
11193 */
11194 *address = vm_map_trunc_page(*address);
11195 vm_map_lock(target_map);
11196 result = vm_map_remap_range_allocate(target_map, address, size,
11197 mask, anywhere, &insp_entry);
11198
11199 for (entry = map_header.links.next;
11200 entry != (struct vm_map_entry *)&map_header.links;
11201 entry = new_entry) {
11202 new_entry = entry->vme_next;
11203 _vm_map_entry_unlink(&map_header, entry);
11204 if (result == KERN_SUCCESS) {
11205 entry->vme_start += *address;
11206 entry->vme_end += *address;
11207 vm_map_entry_link(target_map, insp_entry, entry);
11208 insp_entry = entry;
11209 } else {
11210 if (!entry->is_sub_map) {
11211 vm_object_deallocate(entry->object.vm_object);
11212 } else {
11213 vm_map_deallocate(entry->object.sub_map);
11214 }
11215 _vm_map_entry_dispose(&map_header, entry);
11216 }
11217 }
11218
11219 if (result == KERN_SUCCESS) {
11220 target_map->size += size;
11221 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11222 }
11223 vm_map_unlock(target_map);
11224
11225 if (result == KERN_SUCCESS && target_map->wiring_required)
11226 result = vm_map_wire(target_map, *address,
11227 *address + size, *cur_protection, TRUE);
11228 return result;
11229 }
11230
11231 /*
11232 * Routine: vm_map_remap_range_allocate
11233 *
11234 * Description:
11235 * Allocate a range in the specified virtual address map.
11236 * returns the address and the map entry just before the allocated
11237 * range
11238 *
11239 * Map must be locked.
11240 */
11241
11242 static kern_return_t
11243 vm_map_remap_range_allocate(
11244 vm_map_t map,
11245 vm_map_address_t *address, /* IN/OUT */
11246 vm_map_size_t size,
11247 vm_map_offset_t mask,
11248 boolean_t anywhere,
11249 vm_map_entry_t *map_entry) /* OUT */
11250 {
11251 register vm_map_entry_t entry;
11252 register vm_map_offset_t start;
11253 register vm_map_offset_t end;
11254
11255 StartAgain: ;
11256
11257 start = *address;
11258
11259 if (anywhere)
11260 {
11261 /*
11262 * Calculate the first possible address.
11263 */
11264
11265 if (start < map->min_offset)
11266 start = map->min_offset;
11267 if (start > map->max_offset)
11268 return(KERN_NO_SPACE);
11269
11270 /*
11271 * Look for the first possible address;
11272 * if there's already something at this
11273 * address, we have to start after it.
11274 */
11275
11276 assert(first_free_is_valid(map));
11277 if (start == map->min_offset) {
11278 if ((entry = map->first_free) != vm_map_to_entry(map))
11279 start = entry->vme_end;
11280 } else {
11281 vm_map_entry_t tmp_entry;
11282 if (vm_map_lookup_entry(map, start, &tmp_entry))
11283 start = tmp_entry->vme_end;
11284 entry = tmp_entry;
11285 }
11286
11287 /*
11288 * In any case, the "entry" always precedes
11289 * the proposed new region throughout the
11290 * loop:
11291 */
11292
11293 while (TRUE) {
11294 register vm_map_entry_t next;
11295
11296 /*
11297 * Find the end of the proposed new region.
11298 * Be sure we didn't go beyond the end, or
11299 * wrap around the address.
11300 */
11301
11302 end = ((start + mask) & ~mask);
11303 if (end < start)
11304 return(KERN_NO_SPACE);
11305 start = end;
11306 end += size;
11307
11308 if ((end > map->max_offset) || (end < start)) {
11309 if (map->wait_for_space) {
11310 if (size <= (map->max_offset -
11311 map->min_offset)) {
11312 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11313 vm_map_unlock(map);
11314 thread_block(THREAD_CONTINUE_NULL);
11315 vm_map_lock(map);
11316 goto StartAgain;
11317 }
11318 }
11319
11320 return(KERN_NO_SPACE);
11321 }
11322
11323 /*
11324 * If there are no more entries, we must win.
11325 */
11326
11327 next = entry->vme_next;
11328 if (next == vm_map_to_entry(map))
11329 break;
11330
11331 /*
11332 * If there is another entry, it must be
11333 * after the end of the potential new region.
11334 */
11335
11336 if (next->vme_start >= end)
11337 break;
11338
11339 /*
11340 * Didn't fit -- move to the next entry.
11341 */
11342
11343 entry = next;
11344 start = entry->vme_end;
11345 }
11346 *address = start;
11347 } else {
11348 vm_map_entry_t temp_entry;
11349
11350 /*
11351 * Verify that:
11352 * the address doesn't itself violate
11353 * the mask requirement.
11354 */
11355
11356 if ((start & mask) != 0)
11357 return(KERN_NO_SPACE);
11358
11359
11360 /*
11361 * ... the address is within bounds
11362 */
11363
11364 end = start + size;
11365
11366 if ((start < map->min_offset) ||
11367 (end > map->max_offset) ||
11368 (start >= end)) {
11369 return(KERN_INVALID_ADDRESS);
11370 }
11371
11372 /*
11373 * ... the starting address isn't allocated
11374 */
11375
11376 if (vm_map_lookup_entry(map, start, &temp_entry))
11377 return(KERN_NO_SPACE);
11378
11379 entry = temp_entry;
11380
11381 /*
11382 * ... the next region doesn't overlap the
11383 * end point.
11384 */
11385
11386 if ((entry->vme_next != vm_map_to_entry(map)) &&
11387 (entry->vme_next->vme_start < end))
11388 return(KERN_NO_SPACE);
11389 }
11390 *map_entry = entry;
11391 return(KERN_SUCCESS);
11392 }
11393
11394 /*
11395 * vm_map_switch:
11396 *
11397 * Set the address map for the current thread to the specified map
11398 */
11399
11400 vm_map_t
11401 vm_map_switch(
11402 vm_map_t map)
11403 {
11404 int mycpu;
11405 thread_t thread = current_thread();
11406 vm_map_t oldmap = thread->map;
11407
11408 mp_disable_preemption();
11409 mycpu = cpu_number();
11410
11411 /*
11412 * Deactivate the current map and activate the requested map
11413 */
11414 PMAP_SWITCH_USER(thread, map, mycpu);
11415
11416 mp_enable_preemption();
11417 return(oldmap);
11418 }
11419
11420
11421 /*
11422 * Routine: vm_map_write_user
11423 *
11424 * Description:
11425 * Copy out data from a kernel space into space in the
11426 * destination map. The space must already exist in the
11427 * destination map.
11428 * NOTE: This routine should only be called by threads
11429 * which can block on a page fault. i.e. kernel mode user
11430 * threads.
11431 *
11432 */
11433 kern_return_t
11434 vm_map_write_user(
11435 vm_map_t map,
11436 void *src_p,
11437 vm_map_address_t dst_addr,
11438 vm_size_t size)
11439 {
11440 kern_return_t kr = KERN_SUCCESS;
11441
11442 if(current_map() == map) {
11443 if (copyout(src_p, dst_addr, size)) {
11444 kr = KERN_INVALID_ADDRESS;
11445 }
11446 } else {
11447 vm_map_t oldmap;
11448
11449 /* take on the identity of the target map while doing */
11450 /* the transfer */
11451
11452 vm_map_reference(map);
11453 oldmap = vm_map_switch(map);
11454 if (copyout(src_p, dst_addr, size)) {
11455 kr = KERN_INVALID_ADDRESS;
11456 }
11457 vm_map_switch(oldmap);
11458 vm_map_deallocate(map);
11459 }
11460 return kr;
11461 }
11462
11463 /*
11464 * Routine: vm_map_read_user
11465 *
11466 * Description:
11467 * Copy in data from a user space source map into the
11468 * kernel map. The space must already exist in the
11469 * kernel map.
11470 * NOTE: This routine should only be called by threads
11471 * which can block on a page fault. i.e. kernel mode user
11472 * threads.
11473 *
11474 */
11475 kern_return_t
11476 vm_map_read_user(
11477 vm_map_t map,
11478 vm_map_address_t src_addr,
11479 void *dst_p,
11480 vm_size_t size)
11481 {
11482 kern_return_t kr = KERN_SUCCESS;
11483
11484 if(current_map() == map) {
11485 if (copyin(src_addr, dst_p, size)) {
11486 kr = KERN_INVALID_ADDRESS;
11487 }
11488 } else {
11489 vm_map_t oldmap;
11490
11491 /* take on the identity of the target map while doing */
11492 /* the transfer */
11493
11494 vm_map_reference(map);
11495 oldmap = vm_map_switch(map);
11496 if (copyin(src_addr, dst_p, size)) {
11497 kr = KERN_INVALID_ADDRESS;
11498 }
11499 vm_map_switch(oldmap);
11500 vm_map_deallocate(map);
11501 }
11502 return kr;
11503 }
11504
11505
11506 /*
11507 * vm_map_check_protection:
11508 *
11509 * Assert that the target map allows the specified
11510 * privilege on the entire address region given.
11511 * The entire region must be allocated.
11512 */
11513 boolean_t
11514 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11515 vm_map_offset_t end, vm_prot_t protection)
11516 {
11517 vm_map_entry_t entry;
11518 vm_map_entry_t tmp_entry;
11519
11520 vm_map_lock(map);
11521
11522 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11523 {
11524 vm_map_unlock(map);
11525 return (FALSE);
11526 }
11527
11528 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11529 vm_map_unlock(map);
11530 return(FALSE);
11531 }
11532
11533 entry = tmp_entry;
11534
11535 while (start < end) {
11536 if (entry == vm_map_to_entry(map)) {
11537 vm_map_unlock(map);
11538 return(FALSE);
11539 }
11540
11541 /*
11542 * No holes allowed!
11543 */
11544
11545 if (start < entry->vme_start) {
11546 vm_map_unlock(map);
11547 return(FALSE);
11548 }
11549
11550 /*
11551 * Check protection associated with entry.
11552 */
11553
11554 if ((entry->protection & protection) != protection) {
11555 vm_map_unlock(map);
11556 return(FALSE);
11557 }
11558
11559 /* go to next entry */
11560
11561 start = entry->vme_end;
11562 entry = entry->vme_next;
11563 }
11564 vm_map_unlock(map);
11565 return(TRUE);
11566 }
11567
11568 kern_return_t
11569 vm_map_purgable_control(
11570 vm_map_t map,
11571 vm_map_offset_t address,
11572 vm_purgable_t control,
11573 int *state)
11574 {
11575 vm_map_entry_t entry;
11576 vm_object_t object;
11577 kern_return_t kr;
11578
11579 /*
11580 * Vet all the input parameters and current type and state of the
11581 * underlaying object. Return with an error if anything is amiss.
11582 */
11583 if (map == VM_MAP_NULL)
11584 return(KERN_INVALID_ARGUMENT);
11585
11586 if (control != VM_PURGABLE_SET_STATE &&
11587 control != VM_PURGABLE_GET_STATE &&
11588 control != VM_PURGABLE_PURGE_ALL)
11589 return(KERN_INVALID_ARGUMENT);
11590
11591 if (control == VM_PURGABLE_PURGE_ALL) {
11592 vm_purgeable_object_purge_all();
11593 return KERN_SUCCESS;
11594 }
11595
11596 if (control == VM_PURGABLE_SET_STATE &&
11597 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11598 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11599 return(KERN_INVALID_ARGUMENT);
11600
11601 vm_map_lock_read(map);
11602
11603 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11604
11605 /*
11606 * Must pass a valid non-submap address.
11607 */
11608 vm_map_unlock_read(map);
11609 return(KERN_INVALID_ADDRESS);
11610 }
11611
11612 if ((entry->protection & VM_PROT_WRITE) == 0) {
11613 /*
11614 * Can't apply purgable controls to something you can't write.
11615 */
11616 vm_map_unlock_read(map);
11617 return(KERN_PROTECTION_FAILURE);
11618 }
11619
11620 object = entry->object.vm_object;
11621 if (object == VM_OBJECT_NULL) {
11622 /*
11623 * Object must already be present or it can't be purgable.
11624 */
11625 vm_map_unlock_read(map);
11626 return KERN_INVALID_ARGUMENT;
11627 }
11628
11629 vm_object_lock(object);
11630
11631 if (entry->offset != 0 ||
11632 entry->vme_end - entry->vme_start != object->size) {
11633 /*
11634 * Can only apply purgable controls to the whole (existing)
11635 * object at once.
11636 */
11637 vm_map_unlock_read(map);
11638 vm_object_unlock(object);
11639 return KERN_INVALID_ARGUMENT;
11640 }
11641
11642 vm_map_unlock_read(map);
11643
11644 kr = vm_object_purgable_control(object, control, state);
11645
11646 vm_object_unlock(object);
11647
11648 return kr;
11649 }
11650
11651 kern_return_t
11652 vm_map_page_query_internal(
11653 vm_map_t target_map,
11654 vm_map_offset_t offset,
11655 int *disposition,
11656 int *ref_count)
11657 {
11658 kern_return_t kr;
11659 vm_page_info_basic_data_t info;
11660 mach_msg_type_number_t count;
11661
11662 count = VM_PAGE_INFO_BASIC_COUNT;
11663 kr = vm_map_page_info(target_map,
11664 offset,
11665 VM_PAGE_INFO_BASIC,
11666 (vm_page_info_t) &info,
11667 &count);
11668 if (kr == KERN_SUCCESS) {
11669 *disposition = info.disposition;
11670 *ref_count = info.ref_count;
11671 } else {
11672 *disposition = 0;
11673 *ref_count = 0;
11674 }
11675
11676 return kr;
11677 }
11678
11679 kern_return_t
11680 vm_map_page_info(
11681 vm_map_t map,
11682 vm_map_offset_t offset,
11683 vm_page_info_flavor_t flavor,
11684 vm_page_info_t info,
11685 mach_msg_type_number_t *count)
11686 {
11687 vm_map_entry_t map_entry;
11688 vm_object_t object;
11689 vm_page_t m;
11690 kern_return_t kr;
11691 kern_return_t retval = KERN_SUCCESS;
11692 boolean_t top_object;
11693 int disposition;
11694 int ref_count;
11695 vm_object_id_t object_id;
11696 vm_page_info_basic_t basic_info;
11697 int depth;
11698
11699 switch (flavor) {
11700 case VM_PAGE_INFO_BASIC:
11701 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
11702 return KERN_INVALID_ARGUMENT;
11703 }
11704 break;
11705 default:
11706 return KERN_INVALID_ARGUMENT;
11707 }
11708
11709 disposition = 0;
11710 ref_count = 0;
11711 object_id = 0;
11712 top_object = TRUE;
11713 depth = 0;
11714
11715 retval = KERN_SUCCESS;
11716 offset = vm_map_trunc_page(offset);
11717
11718 vm_map_lock_read(map);
11719
11720 /*
11721 * First, find the map entry covering "offset", going down
11722 * submaps if necessary.
11723 */
11724 for (;;) {
11725 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
11726 vm_map_unlock_read(map);
11727 return KERN_INVALID_ADDRESS;
11728 }
11729 /* compute offset from this map entry's start */
11730 offset -= map_entry->vme_start;
11731 /* compute offset into this map entry's object (or submap) */
11732 offset += map_entry->offset;
11733
11734 if (map_entry->is_sub_map) {
11735 vm_map_t sub_map;
11736
11737 sub_map = map_entry->object.sub_map;
11738 vm_map_lock_read(sub_map);
11739 vm_map_unlock_read(map);
11740
11741 map = sub_map;
11742
11743 ref_count = MAX(ref_count, map->ref_count);
11744 continue;
11745 }
11746 break;
11747 }
11748
11749 object = map_entry->object.vm_object;
11750 if (object == VM_OBJECT_NULL) {
11751 /* no object -> no page */
11752 vm_map_unlock_read(map);
11753 goto done;
11754 }
11755
11756 vm_object_lock(object);
11757 vm_map_unlock_read(map);
11758
11759 /*
11760 * Go down the VM object shadow chain until we find the page
11761 * we're looking for.
11762 */
11763 for (;;) {
11764 ref_count = MAX(ref_count, object->ref_count);
11765
11766 m = vm_page_lookup(object, offset);
11767
11768 if (m != VM_PAGE_NULL) {
11769 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
11770 break;
11771 } else {
11772 #if MACH_PAGEMAP
11773 if (object->existence_map) {
11774 if (vm_external_state_get(object->existence_map,
11775 offset) ==
11776 VM_EXTERNAL_STATE_EXISTS) {
11777 /*
11778 * this page has been paged out
11779 */
11780 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11781 break;
11782 }
11783 } else
11784 #endif
11785 {
11786 if (object->internal &&
11787 object->alive &&
11788 !object->terminating &&
11789 object->pager_ready) {
11790
11791 memory_object_t pager;
11792
11793 vm_object_paging_begin(object);
11794 pager = object->pager;
11795 vm_object_unlock(object);
11796
11797 /*
11798 * Ask the default pager if
11799 * it has this page.
11800 */
11801 kr = memory_object_data_request(
11802 pager,
11803 offset + object->paging_offset,
11804 0, /* just poke the pager */
11805 VM_PROT_READ,
11806 NULL);
11807
11808 vm_object_lock(object);
11809 vm_object_paging_end(object);
11810
11811 if (kr == KERN_SUCCESS) {
11812 /* the default pager has it */
11813 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11814 break;
11815 }
11816 }
11817 }
11818
11819 if (object->shadow != VM_OBJECT_NULL) {
11820 vm_object_t shadow;
11821
11822 offset += object->shadow_offset;
11823 shadow = object->shadow;
11824
11825 vm_object_lock(shadow);
11826 vm_object_unlock(object);
11827
11828 object = shadow;
11829 top_object = FALSE;
11830 depth++;
11831 } else {
11832 // if (!object->internal)
11833 // break;
11834 // retval = KERN_FAILURE;
11835 // goto done_with_object;
11836 break;
11837 }
11838 }
11839 }
11840 /* The ref_count is not strictly accurate, it measures the number */
11841 /* of entities holding a ref on the object, they may not be mapping */
11842 /* the object or may not be mapping the section holding the */
11843 /* target page but its still a ball park number and though an over- */
11844 /* count, it picks up the copy-on-write cases */
11845
11846 /* We could also get a picture of page sharing from pmap_attributes */
11847 /* but this would under count as only faulted-in mappings would */
11848 /* show up. */
11849
11850 if (top_object == TRUE && object->shadow)
11851 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
11852
11853 if (! object->internal)
11854 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
11855
11856 if (m == VM_PAGE_NULL)
11857 goto done_with_object;
11858
11859 if (m->fictitious) {
11860 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
11861 goto done_with_object;
11862 }
11863 if (m->dirty || pmap_is_modified(m->phys_page))
11864 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
11865
11866 if (m->reference || pmap_is_referenced(m->phys_page))
11867 disposition |= VM_PAGE_QUERY_PAGE_REF;
11868
11869 if (m->speculative)
11870 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
11871
11872 if (m->cs_validated)
11873 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
11874 if (m->cs_tainted)
11875 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
11876
11877 done_with_object:
11878 vm_object_unlock(object);
11879 done:
11880
11881 switch (flavor) {
11882 case VM_PAGE_INFO_BASIC:
11883 basic_info = (vm_page_info_basic_t) info;
11884 basic_info->disposition = disposition;
11885 basic_info->ref_count = ref_count;
11886 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
11887 basic_info->offset = (memory_object_offset_t) offset;
11888 basic_info->depth = depth;
11889 break;
11890 }
11891
11892 return retval;
11893 }
11894
11895 /*
11896 * vm_map_msync
11897 *
11898 * Synchronises the memory range specified with its backing store
11899 * image by either flushing or cleaning the contents to the appropriate
11900 * memory manager engaging in a memory object synchronize dialog with
11901 * the manager. The client doesn't return until the manager issues
11902 * m_o_s_completed message. MIG Magically converts user task parameter
11903 * to the task's address map.
11904 *
11905 * interpretation of sync_flags
11906 * VM_SYNC_INVALIDATE - discard pages, only return precious
11907 * pages to manager.
11908 *
11909 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11910 * - discard pages, write dirty or precious
11911 * pages back to memory manager.
11912 *
11913 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11914 * - write dirty or precious pages back to
11915 * the memory manager.
11916 *
11917 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
11918 * is a hole in the region, and we would
11919 * have returned KERN_SUCCESS, return
11920 * KERN_INVALID_ADDRESS instead.
11921 *
11922 * NOTE
11923 * The memory object attributes have not yet been implemented, this
11924 * function will have to deal with the invalidate attribute
11925 *
11926 * RETURNS
11927 * KERN_INVALID_TASK Bad task parameter
11928 * KERN_INVALID_ARGUMENT both sync and async were specified.
11929 * KERN_SUCCESS The usual.
11930 * KERN_INVALID_ADDRESS There was a hole in the region.
11931 */
11932
11933 kern_return_t
11934 vm_map_msync(
11935 vm_map_t map,
11936 vm_map_address_t address,
11937 vm_map_size_t size,
11938 vm_sync_t sync_flags)
11939 {
11940 msync_req_t msr;
11941 msync_req_t new_msr;
11942 queue_chain_t req_q; /* queue of requests for this msync */
11943 vm_map_entry_t entry;
11944 vm_map_size_t amount_left;
11945 vm_object_offset_t offset;
11946 boolean_t do_sync_req;
11947 boolean_t had_hole = FALSE;
11948 memory_object_t pager;
11949
11950 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11951 (sync_flags & VM_SYNC_SYNCHRONOUS))
11952 return(KERN_INVALID_ARGUMENT);
11953
11954 /*
11955 * align address and size on page boundaries
11956 */
11957 size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
11958 address = vm_map_trunc_page(address);
11959
11960 if (map == VM_MAP_NULL)
11961 return(KERN_INVALID_TASK);
11962
11963 if (size == 0)
11964 return(KERN_SUCCESS);
11965
11966 queue_init(&req_q);
11967 amount_left = size;
11968
11969 while (amount_left > 0) {
11970 vm_object_size_t flush_size;
11971 vm_object_t object;
11972
11973 vm_map_lock(map);
11974 if (!vm_map_lookup_entry(map,
11975 vm_map_trunc_page(address), &entry)) {
11976
11977 vm_map_size_t skip;
11978
11979 /*
11980 * hole in the address map.
11981 */
11982 had_hole = TRUE;
11983
11984 /*
11985 * Check for empty map.
11986 */
11987 if (entry == vm_map_to_entry(map) &&
11988 entry->vme_next == entry) {
11989 vm_map_unlock(map);
11990 break;
11991 }
11992 /*
11993 * Check that we don't wrap and that
11994 * we have at least one real map entry.
11995 */
11996 if ((map->hdr.nentries == 0) ||
11997 (entry->vme_next->vme_start < address)) {
11998 vm_map_unlock(map);
11999 break;
12000 }
12001 /*
12002 * Move up to the next entry if needed
12003 */
12004 skip = (entry->vme_next->vme_start - address);
12005 if (skip >= amount_left)
12006 amount_left = 0;
12007 else
12008 amount_left -= skip;
12009 address = entry->vme_next->vme_start;
12010 vm_map_unlock(map);
12011 continue;
12012 }
12013
12014 offset = address - entry->vme_start;
12015
12016 /*
12017 * do we have more to flush than is contained in this
12018 * entry ?
12019 */
12020 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12021 flush_size = entry->vme_end -
12022 (entry->vme_start + offset);
12023 } else {
12024 flush_size = amount_left;
12025 }
12026 amount_left -= flush_size;
12027 address += flush_size;
12028
12029 if (entry->is_sub_map == TRUE) {
12030 vm_map_t local_map;
12031 vm_map_offset_t local_offset;
12032
12033 local_map = entry->object.sub_map;
12034 local_offset = entry->offset;
12035 vm_map_unlock(map);
12036 if (vm_map_msync(
12037 local_map,
12038 local_offset,
12039 flush_size,
12040 sync_flags) == KERN_INVALID_ADDRESS) {
12041 had_hole = TRUE;
12042 }
12043 continue;
12044 }
12045 object = entry->object.vm_object;
12046
12047 /*
12048 * We can't sync this object if the object has not been
12049 * created yet
12050 */
12051 if (object == VM_OBJECT_NULL) {
12052 vm_map_unlock(map);
12053 continue;
12054 }
12055 offset += entry->offset;
12056
12057 vm_object_lock(object);
12058
12059 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12060 int kill_pages = 0;
12061 boolean_t reusable_pages = FALSE;
12062
12063 if (sync_flags & VM_SYNC_KILLPAGES) {
12064 if (object->ref_count == 1 && !object->shadow)
12065 kill_pages = 1;
12066 else
12067 kill_pages = -1;
12068 }
12069 if (kill_pages != -1)
12070 vm_object_deactivate_pages(object, offset,
12071 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12072 vm_object_unlock(object);
12073 vm_map_unlock(map);
12074 continue;
12075 }
12076 /*
12077 * We can't sync this object if there isn't a pager.
12078 * Don't bother to sync internal objects, since there can't
12079 * be any "permanent" storage for these objects anyway.
12080 */
12081 if ((object->pager == MEMORY_OBJECT_NULL) ||
12082 (object->internal) || (object->private)) {
12083 vm_object_unlock(object);
12084 vm_map_unlock(map);
12085 continue;
12086 }
12087 /*
12088 * keep reference on the object until syncing is done
12089 */
12090 vm_object_reference_locked(object);
12091 vm_object_unlock(object);
12092
12093 vm_map_unlock(map);
12094
12095 do_sync_req = vm_object_sync(object,
12096 offset,
12097 flush_size,
12098 sync_flags & VM_SYNC_INVALIDATE,
12099 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12100 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12101 sync_flags & VM_SYNC_SYNCHRONOUS);
12102 /*
12103 * only send a m_o_s if we returned pages or if the entry
12104 * is writable (ie dirty pages may have already been sent back)
12105 */
12106 if (!do_sync_req) {
12107 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12108 /*
12109 * clear out the clustering and read-ahead hints
12110 */
12111 vm_object_lock(object);
12112
12113 object->pages_created = 0;
12114 object->pages_used = 0;
12115 object->sequential = 0;
12116 object->last_alloc = 0;
12117
12118 vm_object_unlock(object);
12119 }
12120 vm_object_deallocate(object);
12121 continue;
12122 }
12123 msync_req_alloc(new_msr);
12124
12125 vm_object_lock(object);
12126 offset += object->paging_offset;
12127
12128 new_msr->offset = offset;
12129 new_msr->length = flush_size;
12130 new_msr->object = object;
12131 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12132 re_iterate:
12133
12134 /*
12135 * We can't sync this object if there isn't a pager. The
12136 * pager can disappear anytime we're not holding the object
12137 * lock. So this has to be checked anytime we goto re_iterate.
12138 */
12139
12140 pager = object->pager;
12141
12142 if (pager == MEMORY_OBJECT_NULL) {
12143 vm_object_unlock(object);
12144 vm_object_deallocate(object);
12145 continue;
12146 }
12147
12148 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12149 /*
12150 * need to check for overlapping entry, if found, wait
12151 * on overlapping msr to be done, then reiterate
12152 */
12153 msr_lock(msr);
12154 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12155 ((offset >= msr->offset &&
12156 offset < (msr->offset + msr->length)) ||
12157 (msr->offset >= offset &&
12158 msr->offset < (offset + flush_size))))
12159 {
12160 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12161 msr_unlock(msr);
12162 vm_object_unlock(object);
12163 thread_block(THREAD_CONTINUE_NULL);
12164 vm_object_lock(object);
12165 goto re_iterate;
12166 }
12167 msr_unlock(msr);
12168 }/* queue_iterate */
12169
12170 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12171
12172 vm_object_paging_begin(object);
12173 vm_object_unlock(object);
12174
12175 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12176
12177 (void) memory_object_synchronize(
12178 pager,
12179 offset,
12180 flush_size,
12181 sync_flags & ~VM_SYNC_CONTIGUOUS);
12182
12183 vm_object_lock(object);
12184 vm_object_paging_end(object);
12185 vm_object_unlock(object);
12186 }/* while */
12187
12188 /*
12189 * wait for memory_object_sychronize_completed messages from pager(s)
12190 */
12191
12192 while (!queue_empty(&req_q)) {
12193 msr = (msync_req_t)queue_first(&req_q);
12194 msr_lock(msr);
12195 while(msr->flag != VM_MSYNC_DONE) {
12196 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12197 msr_unlock(msr);
12198 thread_block(THREAD_CONTINUE_NULL);
12199 msr_lock(msr);
12200 }/* while */
12201 queue_remove(&req_q, msr, msync_req_t, req_q);
12202 msr_unlock(msr);
12203 vm_object_deallocate(msr->object);
12204 msync_req_free(msr);
12205 }/* queue_iterate */
12206
12207 /* for proper msync() behaviour */
12208 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12209 return(KERN_INVALID_ADDRESS);
12210
12211 return(KERN_SUCCESS);
12212 }/* vm_msync */
12213
12214 /*
12215 * Routine: convert_port_entry_to_map
12216 * Purpose:
12217 * Convert from a port specifying an entry or a task
12218 * to a map. Doesn't consume the port ref; produces a map ref,
12219 * which may be null. Unlike convert_port_to_map, the
12220 * port may be task or a named entry backed.
12221 * Conditions:
12222 * Nothing locked.
12223 */
12224
12225
12226 vm_map_t
12227 convert_port_entry_to_map(
12228 ipc_port_t port)
12229 {
12230 vm_map_t map;
12231 vm_named_entry_t named_entry;
12232 uint32_t try_failed_count = 0;
12233
12234 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12235 while(TRUE) {
12236 ip_lock(port);
12237 if(ip_active(port) && (ip_kotype(port)
12238 == IKOT_NAMED_ENTRY)) {
12239 named_entry =
12240 (vm_named_entry_t)port->ip_kobject;
12241 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12242 ip_unlock(port);
12243
12244 try_failed_count++;
12245 mutex_pause(try_failed_count);
12246 continue;
12247 }
12248 named_entry->ref_count++;
12249 lck_mtx_unlock(&(named_entry)->Lock);
12250 ip_unlock(port);
12251 if ((named_entry->is_sub_map) &&
12252 (named_entry->protection
12253 & VM_PROT_WRITE)) {
12254 map = named_entry->backing.map;
12255 } else {
12256 mach_destroy_memory_entry(port);
12257 return VM_MAP_NULL;
12258 }
12259 vm_map_reference_swap(map);
12260 mach_destroy_memory_entry(port);
12261 break;
12262 }
12263 else
12264 return VM_MAP_NULL;
12265 }
12266 }
12267 else
12268 map = convert_port_to_map(port);
12269
12270 return map;
12271 }
12272
12273 /*
12274 * Routine: convert_port_entry_to_object
12275 * Purpose:
12276 * Convert from a port specifying a named entry to an
12277 * object. Doesn't consume the port ref; produces a map ref,
12278 * which may be null.
12279 * Conditions:
12280 * Nothing locked.
12281 */
12282
12283
12284 vm_object_t
12285 convert_port_entry_to_object(
12286 ipc_port_t port)
12287 {
12288 vm_object_t object;
12289 vm_named_entry_t named_entry;
12290 uint32_t try_failed_count = 0;
12291
12292 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12293 while(TRUE) {
12294 ip_lock(port);
12295 if(ip_active(port) && (ip_kotype(port)
12296 == IKOT_NAMED_ENTRY)) {
12297 named_entry =
12298 (vm_named_entry_t)port->ip_kobject;
12299 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12300 ip_unlock(port);
12301
12302 try_failed_count++;
12303 mutex_pause(try_failed_count);
12304 continue;
12305 }
12306 named_entry->ref_count++;
12307 lck_mtx_unlock(&(named_entry)->Lock);
12308 ip_unlock(port);
12309 if ((!named_entry->is_sub_map) &&
12310 (!named_entry->is_pager) &&
12311 (named_entry->protection
12312 & VM_PROT_WRITE)) {
12313 object = named_entry->backing.object;
12314 } else {
12315 mach_destroy_memory_entry(port);
12316 return (vm_object_t)NULL;
12317 }
12318 vm_object_reference(named_entry->backing.object);
12319 mach_destroy_memory_entry(port);
12320 break;
12321 }
12322 else
12323 return (vm_object_t)NULL;
12324 }
12325 } else {
12326 return (vm_object_t)NULL;
12327 }
12328
12329 return object;
12330 }
12331
12332 /*
12333 * Export routines to other components for the things we access locally through
12334 * macros.
12335 */
12336 #undef current_map
12337 vm_map_t
12338 current_map(void)
12339 {
12340 return (current_map_fast());
12341 }
12342
12343 /*
12344 * vm_map_reference:
12345 *
12346 * Most code internal to the osfmk will go through a
12347 * macro defining this. This is always here for the
12348 * use of other kernel components.
12349 */
12350 #undef vm_map_reference
12351 void
12352 vm_map_reference(
12353 register vm_map_t map)
12354 {
12355 if (map == VM_MAP_NULL)
12356 return;
12357
12358 lck_mtx_lock(&map->s_lock);
12359 #if TASK_SWAPPER
12360 assert(map->res_count > 0);
12361 assert(map->ref_count >= map->res_count);
12362 map->res_count++;
12363 #endif
12364 map->ref_count++;
12365 lck_mtx_unlock(&map->s_lock);
12366 }
12367
12368 /*
12369 * vm_map_deallocate:
12370 *
12371 * Removes a reference from the specified map,
12372 * destroying it if no references remain.
12373 * The map should not be locked.
12374 */
12375 void
12376 vm_map_deallocate(
12377 register vm_map_t map)
12378 {
12379 unsigned int ref;
12380
12381 if (map == VM_MAP_NULL)
12382 return;
12383
12384 lck_mtx_lock(&map->s_lock);
12385 ref = --map->ref_count;
12386 if (ref > 0) {
12387 vm_map_res_deallocate(map);
12388 lck_mtx_unlock(&map->s_lock);
12389 return;
12390 }
12391 assert(map->ref_count == 0);
12392 lck_mtx_unlock(&map->s_lock);
12393
12394 #if TASK_SWAPPER
12395 /*
12396 * The map residence count isn't decremented here because
12397 * the vm_map_delete below will traverse the entire map,
12398 * deleting entries, and the residence counts on objects
12399 * and sharing maps will go away then.
12400 */
12401 #endif
12402
12403 vm_map_destroy(map, VM_MAP_NO_FLAGS);
12404 }
12405
12406
12407 void
12408 vm_map_disable_NX(vm_map_t map)
12409 {
12410 if (map == NULL)
12411 return;
12412 if (map->pmap == NULL)
12413 return;
12414
12415 pmap_disable_NX(map->pmap);
12416 }
12417
12418 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12419 * more descriptive.
12420 */
12421 void
12422 vm_map_set_32bit(vm_map_t map)
12423 {
12424 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12425 }
12426
12427
12428 void
12429 vm_map_set_64bit(vm_map_t map)
12430 {
12431 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12432 }
12433
12434 vm_map_offset_t
12435 vm_compute_max_offset(unsigned is64)
12436 {
12437 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12438 }
12439
12440 boolean_t
12441 vm_map_is_64bit(
12442 vm_map_t map)
12443 {
12444 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12445 }
12446
12447 boolean_t
12448 vm_map_has_4GB_pagezero(
12449 vm_map_t map)
12450 {
12451 /*
12452 * XXX FBDP
12453 * We should lock the VM map (for read) here but we can get away
12454 * with it for now because there can't really be any race condition:
12455 * the VM map's min_offset is changed only when the VM map is created
12456 * and when the zero page is established (when the binary gets loaded),
12457 * and this routine gets called only when the task terminates and the
12458 * VM map is being torn down, and when a new map is created via
12459 * load_machfile()/execve().
12460 */
12461 return (map->min_offset >= 0x100000000ULL);
12462 }
12463
12464 void
12465 vm_map_set_4GB_pagezero(vm_map_t map)
12466 {
12467 #ifdef __i386__
12468 pmap_set_4GB_pagezero(map->pmap);
12469 #else
12470 #pragma unused(map)
12471 #endif
12472
12473 }
12474
12475 void
12476 vm_map_clear_4GB_pagezero(vm_map_t map)
12477 {
12478 #ifdef __i386__
12479 pmap_clear_4GB_pagezero(map->pmap);
12480 #else
12481 #pragma unused(map)
12482 #endif
12483 }
12484
12485 /*
12486 * Raise a VM map's minimum offset.
12487 * To strictly enforce "page zero" reservation.
12488 */
12489 kern_return_t
12490 vm_map_raise_min_offset(
12491 vm_map_t map,
12492 vm_map_offset_t new_min_offset)
12493 {
12494 vm_map_entry_t first_entry;
12495
12496 new_min_offset = vm_map_round_page(new_min_offset);
12497
12498 vm_map_lock(map);
12499
12500 if (new_min_offset < map->min_offset) {
12501 /*
12502 * Can't move min_offset backwards, as that would expose
12503 * a part of the address space that was previously, and for
12504 * possibly good reasons, inaccessible.
12505 */
12506 vm_map_unlock(map);
12507 return KERN_INVALID_ADDRESS;
12508 }
12509
12510 first_entry = vm_map_first_entry(map);
12511 if (first_entry != vm_map_to_entry(map) &&
12512 first_entry->vme_start < new_min_offset) {
12513 /*
12514 * Some memory was already allocated below the new
12515 * minimun offset. It's too late to change it now...
12516 */
12517 vm_map_unlock(map);
12518 return KERN_NO_SPACE;
12519 }
12520
12521 map->min_offset = new_min_offset;
12522
12523 vm_map_unlock(map);
12524
12525 return KERN_SUCCESS;
12526 }
12527
12528 /*
12529 * Set the limit on the maximum amount of user wired memory allowed for this map.
12530 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12531 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
12532 * don't have to reach over to the BSD data structures.
12533 */
12534
12535 void
12536 vm_map_set_user_wire_limit(vm_map_t map,
12537 vm_size_t limit)
12538 {
12539 map->user_wire_limit = limit;
12540 }
12541
12542
12543 void vm_map_switch_protect(vm_map_t map,
12544 boolean_t val)
12545 {
12546 vm_map_lock(map);
12547 map->switch_protect=val;
12548 vm_map_unlock(map);
12549 }