]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/counters.h>
86 #include <kern/kalloc.h>
87 #include <kern/zalloc.h>
88
89 #include <vm/cpm.h>
90 #include <vm/vm_compressor_pager.h>
91 #include <vm/vm_init.h>
92 #include <vm/vm_fault.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_object.h>
95 #include <vm/vm_page.h>
96 #include <vm/vm_pageout.h>
97 #include <vm/vm_kern.h>
98 #include <ipc/ipc_port.h>
99 #include <kern/sched_prim.h>
100 #include <kern/misc_protos.h>
101 #include <kern/xpr.h>
102
103 #include <mach/vm_map_server.h>
104 #include <mach/mach_host_server.h>
105 #include <vm/vm_protos.h>
106 #include <vm/vm_purgeable_internal.h>
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110 #include <vm/vm_map_store.h>
111
112
113 extern u_int32_t random(void); /* from <libkern/libkern.h> */
114 /* Internal prototypes
115 */
116
117 static void vm_map_simplify_range(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end); /* forward */
121
122 static boolean_t vm_map_range_check(
123 vm_map_t map,
124 vm_map_offset_t start,
125 vm_map_offset_t end,
126 vm_map_entry_t *entry);
127
128 static vm_map_entry_t _vm_map_entry_create(
129 struct vm_map_header *map_header, boolean_t map_locked);
130
131 static void _vm_map_entry_dispose(
132 struct vm_map_header *map_header,
133 vm_map_entry_t entry);
134
135 static void vm_map_pmap_enter(
136 vm_map_t map,
137 vm_map_offset_t addr,
138 vm_map_offset_t end_addr,
139 vm_object_t object,
140 vm_object_offset_t offset,
141 vm_prot_t protection);
142
143 static void _vm_map_clip_end(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t end);
147
148 static void _vm_map_clip_start(
149 struct vm_map_header *map_header,
150 vm_map_entry_t entry,
151 vm_map_offset_t start);
152
153 static void vm_map_entry_delete(
154 vm_map_t map,
155 vm_map_entry_t entry);
156
157 static kern_return_t vm_map_delete(
158 vm_map_t map,
159 vm_map_offset_t start,
160 vm_map_offset_t end,
161 int flags,
162 vm_map_t zap_map);
163
164 static kern_return_t vm_map_copy_overwrite_unaligned(
165 vm_map_t dst_map,
166 vm_map_entry_t entry,
167 vm_map_copy_t copy,
168 vm_map_address_t start,
169 boolean_t discard_on_success);
170
171 static kern_return_t vm_map_copy_overwrite_aligned(
172 vm_map_t dst_map,
173 vm_map_entry_t tmp_entry,
174 vm_map_copy_t copy,
175 vm_map_offset_t start,
176 pmap_t pmap);
177
178 static kern_return_t vm_map_copyin_kernel_buffer(
179 vm_map_t src_map,
180 vm_map_address_t src_addr,
181 vm_map_size_t len,
182 boolean_t src_destroy,
183 vm_map_copy_t *copy_result); /* OUT */
184
185 static kern_return_t vm_map_copyout_kernel_buffer(
186 vm_map_t map,
187 vm_map_address_t *addr, /* IN/OUT */
188 vm_map_copy_t copy,
189 boolean_t overwrite,
190 boolean_t consume_on_success);
191
192 static void vm_map_fork_share(
193 vm_map_t old_map,
194 vm_map_entry_t old_entry,
195 vm_map_t new_map);
196
197 static boolean_t vm_map_fork_copy(
198 vm_map_t old_map,
199 vm_map_entry_t *old_entry_p,
200 vm_map_t new_map);
201
202 void vm_map_region_top_walk(
203 vm_map_entry_t entry,
204 vm_region_top_info_t top);
205
206 void vm_map_region_walk(
207 vm_map_t map,
208 vm_map_offset_t va,
209 vm_map_entry_t entry,
210 vm_object_offset_t offset,
211 vm_object_size_t range,
212 vm_region_extended_info_t extended,
213 boolean_t look_for_pages,
214 mach_msg_type_number_t count);
215
216 static kern_return_t vm_map_wire_nested(
217 vm_map_t map,
218 vm_map_offset_t start,
219 vm_map_offset_t end,
220 vm_prot_t caller_prot,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr,
224 ppnum_t *physpage_p);
225
226 static kern_return_t vm_map_unwire_nested(
227 vm_map_t map,
228 vm_map_offset_t start,
229 vm_map_offset_t end,
230 boolean_t user_wire,
231 pmap_t map_pmap,
232 vm_map_offset_t pmap_addr);
233
234 static kern_return_t vm_map_overwrite_submap_recurse(
235 vm_map_t dst_map,
236 vm_map_offset_t dst_addr,
237 vm_map_size_t dst_size);
238
239 static kern_return_t vm_map_copy_overwrite_nested(
240 vm_map_t dst_map,
241 vm_map_offset_t dst_addr,
242 vm_map_copy_t copy,
243 boolean_t interruptible,
244 pmap_t pmap,
245 boolean_t discard_on_success);
246
247 static kern_return_t vm_map_remap_extract(
248 vm_map_t map,
249 vm_map_offset_t addr,
250 vm_map_size_t size,
251 boolean_t copy,
252 struct vm_map_header *map_header,
253 vm_prot_t *cur_protection,
254 vm_prot_t *max_protection,
255 vm_inherit_t inheritance,
256 boolean_t pageable);
257
258 static kern_return_t vm_map_remap_range_allocate(
259 vm_map_t map,
260 vm_map_address_t *address,
261 vm_map_size_t size,
262 vm_map_offset_t mask,
263 int flags,
264 vm_map_entry_t *map_entry);
265
266 static void vm_map_region_look_for_page(
267 vm_map_t map,
268 vm_map_offset_t va,
269 vm_object_t object,
270 vm_object_offset_t offset,
271 int max_refcnt,
272 int depth,
273 vm_region_extended_info_t extended,
274 mach_msg_type_number_t count);
275
276 static int vm_map_region_count_obj_refs(
277 vm_map_entry_t entry,
278 vm_object_t object);
279
280
281 static kern_return_t vm_map_willneed(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_reuse_pages(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291 static kern_return_t vm_map_reusable_pages(
292 vm_map_t map,
293 vm_map_offset_t start,
294 vm_map_offset_t end);
295
296 static kern_return_t vm_map_can_reuse(
297 vm_map_t map,
298 vm_map_offset_t start,
299 vm_map_offset_t end);
300
301 #if MACH_ASSERT
302 static kern_return_t vm_map_pageout(
303 vm_map_t map,
304 vm_map_offset_t start,
305 vm_map_offset_t end);
306 #endif /* MACH_ASSERT */
307
308 /*
309 * Macros to copy a vm_map_entry. We must be careful to correctly
310 * manage the wired page count. vm_map_entry_copy() creates a new
311 * map entry to the same memory - the wired count in the new entry
312 * must be set to zero. vm_map_entry_copy_full() creates a new
313 * entry that is identical to the old entry. This preserves the
314 * wire count; it's used for map splitting and zone changing in
315 * vm_map_copyout.
316 */
317
318 #define vm_map_entry_copy(NEW,OLD) \
319 MACRO_BEGIN \
320 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
321 *(NEW) = *(OLD); \
322 (NEW)->is_shared = FALSE; \
323 (NEW)->needs_wakeup = FALSE; \
324 (NEW)->in_transition = FALSE; \
325 (NEW)->wired_count = 0; \
326 (NEW)->user_wired_count = 0; \
327 (NEW)->permanent = FALSE; \
328 (NEW)->used_for_jit = FALSE; \
329 (NEW)->from_reserved_zone = _vmec_reserved; \
330 (NEW)->iokit_acct = FALSE; \
331 (NEW)->vme_resilient_codesign = FALSE; \
332 (NEW)->vme_resilient_media = FALSE; \
333 MACRO_END
334
335 #define vm_map_entry_copy_full(NEW,OLD) \
336 MACRO_BEGIN \
337 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
338 (*(NEW) = *(OLD)); \
339 (NEW)->from_reserved_zone = _vmecf_reserved; \
340 MACRO_END
341
342 /*
343 * Decide if we want to allow processes to execute from their data or stack areas.
344 * override_nx() returns true if we do. Data/stack execution can be enabled independently
345 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
346 * or allow_stack_exec to enable data execution for that type of data area for that particular
347 * ABI (or both by or'ing the flags together). These are initialized in the architecture
348 * specific pmap files since the default behavior varies according to architecture. The
349 * main reason it varies is because of the need to provide binary compatibility with old
350 * applications that were written before these restrictions came into being. In the old
351 * days, an app could execute anything it could read, but this has slowly been tightened
352 * up over time. The default behavior is:
353 *
354 * 32-bit PPC apps may execute from both stack and data areas
355 * 32-bit Intel apps may exeucte from data areas but not stack
356 * 64-bit PPC/Intel apps may not execute from either data or stack
357 *
358 * An application on any architecture may override these defaults by explicitly
359 * adding PROT_EXEC permission to the page in question with the mprotect(2)
360 * system call. This code here just determines what happens when an app tries to
361 * execute from a page that lacks execute permission.
362 *
363 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
364 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
365 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
366 * execution from data areas for a particular binary even if the arch normally permits it. As
367 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
368 * to support some complicated use cases, notably browsers with out-of-process plugins that
369 * are not all NX-safe.
370 */
371
372 extern int allow_data_exec, allow_stack_exec;
373
374 int
375 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
376 {
377 int current_abi;
378
379 if (map->pmap == kernel_pmap) return FALSE;
380
381 /*
382 * Determine if the app is running in 32 or 64 bit mode.
383 */
384
385 if (vm_map_is_64bit(map))
386 current_abi = VM_ABI_64;
387 else
388 current_abi = VM_ABI_32;
389
390 /*
391 * Determine if we should allow the execution based on whether it's a
392 * stack or data area and the current architecture.
393 */
394
395 if (user_tag == VM_MEMORY_STACK)
396 return allow_stack_exec & current_abi;
397
398 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
399 }
400
401
402 /*
403 * Virtual memory maps provide for the mapping, protection,
404 * and sharing of virtual memory objects. In addition,
405 * this module provides for an efficient virtual copy of
406 * memory from one map to another.
407 *
408 * Synchronization is required prior to most operations.
409 *
410 * Maps consist of an ordered doubly-linked list of simple
411 * entries; a single hint is used to speed up lookups.
412 *
413 * Sharing maps have been deleted from this version of Mach.
414 * All shared objects are now mapped directly into the respective
415 * maps. This requires a change in the copy on write strategy;
416 * the asymmetric (delayed) strategy is used for shared temporary
417 * objects instead of the symmetric (shadow) strategy. All maps
418 * are now "top level" maps (either task map, kernel map or submap
419 * of the kernel map).
420 *
421 * Since portions of maps are specified by start/end addreses,
422 * which may not align with existing map entries, all
423 * routines merely "clip" entries to these start/end values.
424 * [That is, an entry is split into two, bordering at a
425 * start or end value.] Note that these clippings may not
426 * always be necessary (as the two resulting entries are then
427 * not changed); however, the clipping is done for convenience.
428 * No attempt is currently made to "glue back together" two
429 * abutting entries.
430 *
431 * The symmetric (shadow) copy strategy implements virtual copy
432 * by copying VM object references from one map to
433 * another, and then marking both regions as copy-on-write.
434 * It is important to note that only one writeable reference
435 * to a VM object region exists in any map when this strategy
436 * is used -- this means that shadow object creation can be
437 * delayed until a write operation occurs. The symmetric (delayed)
438 * strategy allows multiple maps to have writeable references to
439 * the same region of a vm object, and hence cannot delay creating
440 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
441 * Copying of permanent objects is completely different; see
442 * vm_object_copy_strategically() in vm_object.c.
443 */
444
445 static zone_t vm_map_zone; /* zone for vm_map structures */
446 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
447 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
448 * allocations */
449 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
450 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
451
452
453 /*
454 * Placeholder object for submap operations. This object is dropped
455 * into the range by a call to vm_map_find, and removed when
456 * vm_map_submap creates the submap.
457 */
458
459 vm_object_t vm_submap_object;
460
461 static void *map_data;
462 static vm_size_t map_data_size;
463 static void *kentry_data;
464 static vm_size_t kentry_data_size;
465 static void *map_holes_data;
466 static vm_size_t map_holes_data_size;
467
468 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
469
470 /* Skip acquiring locks if we're in the midst of a kernel core dump */
471 unsigned int not_in_kdp = 1;
472
473 unsigned int vm_map_set_cache_attr_count = 0;
474
475 kern_return_t
476 vm_map_set_cache_attr(
477 vm_map_t map,
478 vm_map_offset_t va)
479 {
480 vm_map_entry_t map_entry;
481 vm_object_t object;
482 kern_return_t kr = KERN_SUCCESS;
483
484 vm_map_lock_read(map);
485
486 if (!vm_map_lookup_entry(map, va, &map_entry) ||
487 map_entry->is_sub_map) {
488 /*
489 * that memory is not properly mapped
490 */
491 kr = KERN_INVALID_ARGUMENT;
492 goto done;
493 }
494 object = VME_OBJECT(map_entry);
495
496 if (object == VM_OBJECT_NULL) {
497 /*
498 * there should be a VM object here at this point
499 */
500 kr = KERN_INVALID_ARGUMENT;
501 goto done;
502 }
503 vm_object_lock(object);
504 object->set_cache_attr = TRUE;
505 vm_object_unlock(object);
506
507 vm_map_set_cache_attr_count++;
508 done:
509 vm_map_unlock_read(map);
510
511 return kr;
512 }
513
514
515 #if CONFIG_CODE_DECRYPTION
516 /*
517 * vm_map_apple_protected:
518 * This remaps the requested part of the object with an object backed by
519 * the decrypting pager.
520 * crypt_info contains entry points and session data for the crypt module.
521 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
522 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
523 */
524 kern_return_t
525 vm_map_apple_protected(
526 vm_map_t map,
527 vm_map_offset_t start,
528 vm_map_offset_t end,
529 vm_object_offset_t crypto_backing_offset,
530 struct pager_crypt_info *crypt_info)
531 {
532 boolean_t map_locked;
533 kern_return_t kr;
534 vm_map_entry_t map_entry;
535 struct vm_map_entry tmp_entry;
536 memory_object_t unprotected_mem_obj;
537 vm_object_t protected_object;
538 vm_map_offset_t map_addr;
539 vm_map_offset_t start_aligned, end_aligned;
540 vm_object_offset_t crypto_start, crypto_end;
541 int vm_flags;
542
543 map_locked = FALSE;
544 unprotected_mem_obj = MEMORY_OBJECT_NULL;
545
546 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
547 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
548 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
549 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
550
551 assert(start_aligned == start);
552 assert(end_aligned == end);
553
554 map_addr = start_aligned;
555 for (map_addr = start_aligned;
556 map_addr < end;
557 map_addr = tmp_entry.vme_end) {
558 vm_map_lock(map);
559 map_locked = TRUE;
560
561 /* lookup the protected VM object */
562 if (!vm_map_lookup_entry(map,
563 map_addr,
564 &map_entry) ||
565 map_entry->is_sub_map ||
566 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
567 !(map_entry->protection & VM_PROT_EXECUTE)) {
568 /* that memory is not properly mapped */
569 kr = KERN_INVALID_ARGUMENT;
570 goto done;
571 }
572
573 /* get the protected object to be decrypted */
574 protected_object = VME_OBJECT(map_entry);
575 if (protected_object == VM_OBJECT_NULL) {
576 /* there should be a VM object here at this point */
577 kr = KERN_INVALID_ARGUMENT;
578 goto done;
579 }
580 /* ensure protected object stays alive while map is unlocked */
581 vm_object_reference(protected_object);
582
583 /* limit the map entry to the area we want to cover */
584 vm_map_clip_start(map, map_entry, start_aligned);
585 vm_map_clip_end(map, map_entry, end_aligned);
586
587 tmp_entry = *map_entry;
588 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
589 vm_map_unlock(map);
590 map_locked = FALSE;
591
592 /*
593 * This map entry might be only partially encrypted
594 * (if not fully "page-aligned").
595 */
596 crypto_start = 0;
597 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
598 if (tmp_entry.vme_start < start) {
599 if (tmp_entry.vme_start != start_aligned) {
600 kr = KERN_INVALID_ADDRESS;
601 }
602 crypto_start += (start - tmp_entry.vme_start);
603 }
604 if (tmp_entry.vme_end > end) {
605 if (tmp_entry.vme_end != end_aligned) {
606 kr = KERN_INVALID_ADDRESS;
607 }
608 crypto_end -= (tmp_entry.vme_end - end);
609 }
610
611 /*
612 * This "extra backing offset" is needed to get the decryption
613 * routine to use the right key. It adjusts for the possibly
614 * relative offset of an interposed "4K" pager...
615 */
616 if (crypto_backing_offset == (vm_object_offset_t) -1) {
617 crypto_backing_offset = VME_OFFSET(&tmp_entry);
618 }
619
620 /*
621 * Lookup (and create if necessary) the protected memory object
622 * matching that VM object.
623 * If successful, this also grabs a reference on the memory object,
624 * to guarantee that it doesn't go away before we get a chance to map
625 * it.
626 */
627 unprotected_mem_obj = apple_protect_pager_setup(
628 protected_object,
629 VME_OFFSET(&tmp_entry),
630 crypto_backing_offset,
631 crypt_info,
632 crypto_start,
633 crypto_end);
634
635 /* release extra ref on protected object */
636 vm_object_deallocate(protected_object);
637
638 if (unprotected_mem_obj == NULL) {
639 kr = KERN_FAILURE;
640 goto done;
641 }
642
643 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
644
645 /* map this memory object in place of the current one */
646 map_addr = tmp_entry.vme_start;
647 kr = vm_map_enter_mem_object(map,
648 &map_addr,
649 (tmp_entry.vme_end -
650 tmp_entry.vme_start),
651 (mach_vm_offset_t) 0,
652 vm_flags,
653 (ipc_port_t) unprotected_mem_obj,
654 0,
655 TRUE,
656 tmp_entry.protection,
657 tmp_entry.max_protection,
658 tmp_entry.inheritance);
659 assert(kr == KERN_SUCCESS);
660 assert(map_addr == tmp_entry.vme_start);
661
662 #if VM_MAP_DEBUG_APPLE_PROTECT
663 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
664 "backing:[object:%p,offset:0x%llx,"
665 "crypto_backing_offset:0x%llx,"
666 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
667 map,
668 (uint64_t) map_addr,
669 (uint64_t) (map_addr + (tmp_entry.vme_end -
670 tmp_entry.vme_start)),
671 unprotected_mem_obj,
672 protected_object,
673 VME_OFFSET(&tmp_entry),
674 crypto_backing_offset,
675 crypto_start,
676 crypto_end);
677 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
678
679 /*
680 * Release the reference obtained by
681 * apple_protect_pager_setup().
682 * The mapping (if it succeeded) is now holding a reference on
683 * the memory object.
684 */
685 memory_object_deallocate(unprotected_mem_obj);
686 unprotected_mem_obj = MEMORY_OBJECT_NULL;
687
688 /* continue with next map entry */
689 crypto_backing_offset += (tmp_entry.vme_end -
690 tmp_entry.vme_start);
691 crypto_backing_offset -= crypto_start;
692 }
693 kr = KERN_SUCCESS;
694
695 done:
696 if (map_locked) {
697 vm_map_unlock(map);
698 }
699 return kr;
700 }
701 #endif /* CONFIG_CODE_DECRYPTION */
702
703
704 lck_grp_t vm_map_lck_grp;
705 lck_grp_attr_t vm_map_lck_grp_attr;
706 lck_attr_t vm_map_lck_attr;
707 lck_attr_t vm_map_lck_rw_attr;
708
709
710 /*
711 * vm_map_init:
712 *
713 * Initialize the vm_map module. Must be called before
714 * any other vm_map routines.
715 *
716 * Map and entry structures are allocated from zones -- we must
717 * initialize those zones.
718 *
719 * There are three zones of interest:
720 *
721 * vm_map_zone: used to allocate maps.
722 * vm_map_entry_zone: used to allocate map entries.
723 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
724 *
725 * The kernel allocates map entries from a special zone that is initially
726 * "crammed" with memory. It would be difficult (perhaps impossible) for
727 * the kernel to allocate more memory to a entry zone when it became
728 * empty since the very act of allocating memory implies the creation
729 * of a new entry.
730 */
731 void
732 vm_map_init(
733 void)
734 {
735 vm_size_t entry_zone_alloc_size;
736 const char *mez_name = "VM map entries";
737
738 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
739 PAGE_SIZE, "maps");
740 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
741 #if defined(__LP64__)
742 entry_zone_alloc_size = PAGE_SIZE * 5;
743 #else
744 entry_zone_alloc_size = PAGE_SIZE * 6;
745 #endif
746 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
747 1024*1024, entry_zone_alloc_size,
748 mez_name);
749 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
750 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
751 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
752
753 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
754 kentry_data_size * 64, kentry_data_size,
755 "Reserved VM map entries");
756 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
757
758 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
759 16*1024, PAGE_SIZE, "VM map copies");
760 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
761
762 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
763 16*1024, PAGE_SIZE, "VM map holes");
764 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
765
766 /*
767 * Cram the map and kentry zones with initial data.
768 * Set reserved_zone non-collectible to aid zone_gc().
769 */
770 zone_change(vm_map_zone, Z_COLLECT, FALSE);
771
772 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
773 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
774 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
775 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
776 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
777 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
778 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
779
780 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
781 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
782 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
783 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
784 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
785 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
786
787 /*
788 * Add the stolen memory to zones, adjust zone size and stolen counts.
789 */
790 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
791 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
792 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
793 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
794
795 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
796 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
797 lck_attr_setdefault(&vm_map_lck_attr);
798
799 lck_attr_setdefault(&vm_map_lck_rw_attr);
800 lck_attr_cleardebug(&vm_map_lck_rw_attr);
801
802 #if CONFIG_FREEZE
803 default_freezer_init();
804 #endif /* CONFIG_FREEZE */
805 }
806
807 void
808 vm_map_steal_memory(
809 void)
810 {
811 uint32_t kentry_initial_pages;
812
813 map_data_size = round_page(10 * sizeof(struct _vm_map));
814 map_data = pmap_steal_memory(map_data_size);
815
816 /*
817 * kentry_initial_pages corresponds to the number of kernel map entries
818 * required during bootstrap until the asynchronous replenishment
819 * scheme is activated and/or entries are available from the general
820 * map entry pool.
821 */
822 #if defined(__LP64__)
823 kentry_initial_pages = 10;
824 #else
825 kentry_initial_pages = 6;
826 #endif
827
828 #if CONFIG_GZALLOC
829 /* If using the guard allocator, reserve more memory for the kernel
830 * reserved map entry pool.
831 */
832 if (gzalloc_enabled())
833 kentry_initial_pages *= 1024;
834 #endif
835
836 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
837 kentry_data = pmap_steal_memory(kentry_data_size);
838
839 map_holes_data_size = kentry_data_size;
840 map_holes_data = pmap_steal_memory(map_holes_data_size);
841 }
842
843 void
844 vm_kernel_reserved_entry_init(void) {
845 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
846 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
847 }
848
849 void
850 vm_map_disable_hole_optimization(vm_map_t map)
851 {
852 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
853
854 if (map->holelistenabled) {
855
856 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
857
858 while (hole_entry != NULL) {
859
860 next_hole_entry = hole_entry->vme_next;
861
862 hole_entry->vme_next = NULL;
863 hole_entry->vme_prev = NULL;
864 zfree(vm_map_holes_zone, hole_entry);
865
866 if (next_hole_entry == head_entry) {
867 hole_entry = NULL;
868 } else {
869 hole_entry = next_hole_entry;
870 }
871 }
872
873 map->holes_list = NULL;
874 map->holelistenabled = FALSE;
875
876 map->first_free = vm_map_first_entry(map);
877 SAVE_HINT_HOLE_WRITE(map, NULL);
878 }
879 }
880
881 boolean_t
882 vm_kernel_map_is_kernel(vm_map_t map) {
883 return (map->pmap == kernel_pmap);
884 }
885
886 /*
887 * vm_map_create:
888 *
889 * Creates and returns a new empty VM map with
890 * the given physical map structure, and having
891 * the given lower and upper address bounds.
892 */
893
894 boolean_t vm_map_supports_hole_optimization = TRUE;
895
896 vm_map_t
897 vm_map_create(
898 pmap_t pmap,
899 vm_map_offset_t min,
900 vm_map_offset_t max,
901 boolean_t pageable)
902 {
903 static int color_seed = 0;
904 register vm_map_t result;
905 struct vm_map_links *hole_entry = NULL;
906
907 result = (vm_map_t) zalloc(vm_map_zone);
908 if (result == VM_MAP_NULL)
909 panic("vm_map_create");
910
911 vm_map_first_entry(result) = vm_map_to_entry(result);
912 vm_map_last_entry(result) = vm_map_to_entry(result);
913 result->hdr.nentries = 0;
914 result->hdr.entries_pageable = pageable;
915
916 vm_map_store_init( &(result->hdr) );
917
918 result->hdr.page_shift = PAGE_SHIFT;
919
920 result->size = 0;
921 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
922 result->user_wire_size = 0;
923 result->ref_count = 1;
924 #if TASK_SWAPPER
925 result->res_count = 1;
926 result->sw_state = MAP_SW_IN;
927 #endif /* TASK_SWAPPER */
928 result->pmap = pmap;
929 result->min_offset = min;
930 result->max_offset = max;
931 result->wiring_required = FALSE;
932 result->no_zero_fill = FALSE;
933 result->mapped_in_other_pmaps = FALSE;
934 result->wait_for_space = FALSE;
935 result->switch_protect = FALSE;
936 result->disable_vmentry_reuse = FALSE;
937 result->map_disallow_data_exec = FALSE;
938 result->highest_entry_end = 0;
939 result->first_free = vm_map_to_entry(result);
940 result->hint = vm_map_to_entry(result);
941 result->color_rr = (color_seed++) & vm_color_mask;
942 result->jit_entry_exists = FALSE;
943
944 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
945 hole_entry = zalloc(vm_map_holes_zone);
946
947 hole_entry->start = min;
948 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
949 result->holes_list = result->hole_hint = hole_entry;
950 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
951 result->holelistenabled = TRUE;
952
953 } else {
954
955 result->holelistenabled = FALSE;
956 }
957
958 #if CONFIG_FREEZE
959 result->default_freezer_handle = NULL;
960 #endif
961 vm_map_lock_init(result);
962 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
963
964 return(result);
965 }
966
967 /*
968 * vm_map_entry_create: [ internal use only ]
969 *
970 * Allocates a VM map entry for insertion in the
971 * given map (or map copy). No fields are filled.
972 */
973 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
974
975 #define vm_map_copy_entry_create(copy, map_locked) \
976 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
977 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
978
979 static vm_map_entry_t
980 _vm_map_entry_create(
981 struct vm_map_header *map_header, boolean_t __unused map_locked)
982 {
983 zone_t zone;
984 vm_map_entry_t entry;
985
986 zone = vm_map_entry_zone;
987
988 assert(map_header->entries_pageable ? !map_locked : TRUE);
989
990 if (map_header->entries_pageable) {
991 entry = (vm_map_entry_t) zalloc(zone);
992 }
993 else {
994 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
995
996 if (entry == VM_MAP_ENTRY_NULL) {
997 zone = vm_map_entry_reserved_zone;
998 entry = (vm_map_entry_t) zalloc(zone);
999 OSAddAtomic(1, &reserved_zalloc_count);
1000 } else
1001 OSAddAtomic(1, &nonreserved_zalloc_count);
1002 }
1003
1004 if (entry == VM_MAP_ENTRY_NULL)
1005 panic("vm_map_entry_create");
1006 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1007
1008 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1009 #if MAP_ENTRY_CREATION_DEBUG
1010 entry->vme_creation_maphdr = map_header;
1011 fastbacktrace(&entry->vme_creation_bt[0],
1012 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1013 #endif
1014 return(entry);
1015 }
1016
1017 /*
1018 * vm_map_entry_dispose: [ internal use only ]
1019 *
1020 * Inverse of vm_map_entry_create.
1021 *
1022 * write map lock held so no need to
1023 * do anything special to insure correctness
1024 * of the stores
1025 */
1026 #define vm_map_entry_dispose(map, entry) \
1027 _vm_map_entry_dispose(&(map)->hdr, (entry))
1028
1029 #define vm_map_copy_entry_dispose(map, entry) \
1030 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1031
1032 static void
1033 _vm_map_entry_dispose(
1034 register struct vm_map_header *map_header,
1035 register vm_map_entry_t entry)
1036 {
1037 register zone_t zone;
1038
1039 if (map_header->entries_pageable || !(entry->from_reserved_zone))
1040 zone = vm_map_entry_zone;
1041 else
1042 zone = vm_map_entry_reserved_zone;
1043
1044 if (!map_header->entries_pageable) {
1045 if (zone == vm_map_entry_zone)
1046 OSAddAtomic(-1, &nonreserved_zalloc_count);
1047 else
1048 OSAddAtomic(-1, &reserved_zalloc_count);
1049 }
1050
1051 zfree(zone, entry);
1052 }
1053
1054 #if MACH_ASSERT
1055 static boolean_t first_free_check = FALSE;
1056 boolean_t
1057 first_free_is_valid(
1058 vm_map_t map)
1059 {
1060 if (!first_free_check)
1061 return TRUE;
1062
1063 return( first_free_is_valid_store( map ));
1064 }
1065 #endif /* MACH_ASSERT */
1066
1067
1068 #define vm_map_copy_entry_link(copy, after_where, entry) \
1069 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1070
1071 #define vm_map_copy_entry_unlink(copy, entry) \
1072 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1073
1074 #if MACH_ASSERT && TASK_SWAPPER
1075 /*
1076 * vm_map_res_reference:
1077 *
1078 * Adds another valid residence count to the given map.
1079 *
1080 * Map is locked so this function can be called from
1081 * vm_map_swapin.
1082 *
1083 */
1084 void vm_map_res_reference(register vm_map_t map)
1085 {
1086 /* assert map is locked */
1087 assert(map->res_count >= 0);
1088 assert(map->ref_count >= map->res_count);
1089 if (map->res_count == 0) {
1090 lck_mtx_unlock(&map->s_lock);
1091 vm_map_lock(map);
1092 vm_map_swapin(map);
1093 lck_mtx_lock(&map->s_lock);
1094 ++map->res_count;
1095 vm_map_unlock(map);
1096 } else
1097 ++map->res_count;
1098 }
1099
1100 /*
1101 * vm_map_reference_swap:
1102 *
1103 * Adds valid reference and residence counts to the given map.
1104 *
1105 * The map may not be in memory (i.e. zero residence count).
1106 *
1107 */
1108 void vm_map_reference_swap(register vm_map_t map)
1109 {
1110 assert(map != VM_MAP_NULL);
1111 lck_mtx_lock(&map->s_lock);
1112 assert(map->res_count >= 0);
1113 assert(map->ref_count >= map->res_count);
1114 map->ref_count++;
1115 vm_map_res_reference(map);
1116 lck_mtx_unlock(&map->s_lock);
1117 }
1118
1119 /*
1120 * vm_map_res_deallocate:
1121 *
1122 * Decrement residence count on a map; possibly causing swapout.
1123 *
1124 * The map must be in memory (i.e. non-zero residence count).
1125 *
1126 * The map is locked, so this function is callable from vm_map_deallocate.
1127 *
1128 */
1129 void vm_map_res_deallocate(register vm_map_t map)
1130 {
1131 assert(map->res_count > 0);
1132 if (--map->res_count == 0) {
1133 lck_mtx_unlock(&map->s_lock);
1134 vm_map_lock(map);
1135 vm_map_swapout(map);
1136 vm_map_unlock(map);
1137 lck_mtx_lock(&map->s_lock);
1138 }
1139 assert(map->ref_count >= map->res_count);
1140 }
1141 #endif /* MACH_ASSERT && TASK_SWAPPER */
1142
1143 /*
1144 * vm_map_destroy:
1145 *
1146 * Actually destroy a map.
1147 */
1148 void
1149 vm_map_destroy(
1150 vm_map_t map,
1151 int flags)
1152 {
1153 vm_map_lock(map);
1154
1155 /* final cleanup: no need to unnest shared region */
1156 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1157
1158 /* clean up regular map entries */
1159 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1160 flags, VM_MAP_NULL);
1161 /* clean up leftover special mappings (commpage, etc...) */
1162 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1163 flags, VM_MAP_NULL);
1164
1165 #if CONFIG_FREEZE
1166 if (map->default_freezer_handle) {
1167 default_freezer_handle_deallocate(map->default_freezer_handle);
1168 map->default_freezer_handle = NULL;
1169 }
1170 #endif
1171 vm_map_disable_hole_optimization(map);
1172 vm_map_unlock(map);
1173
1174 assert(map->hdr.nentries == 0);
1175
1176 if(map->pmap)
1177 pmap_destroy(map->pmap);
1178
1179 zfree(vm_map_zone, map);
1180 }
1181
1182 #if TASK_SWAPPER
1183 /*
1184 * vm_map_swapin/vm_map_swapout
1185 *
1186 * Swap a map in and out, either referencing or releasing its resources.
1187 * These functions are internal use only; however, they must be exported
1188 * because they may be called from macros, which are exported.
1189 *
1190 * In the case of swapout, there could be races on the residence count,
1191 * so if the residence count is up, we return, assuming that a
1192 * vm_map_deallocate() call in the near future will bring us back.
1193 *
1194 * Locking:
1195 * -- We use the map write lock for synchronization among races.
1196 * -- The map write lock, and not the simple s_lock, protects the
1197 * swap state of the map.
1198 * -- If a map entry is a share map, then we hold both locks, in
1199 * hierarchical order.
1200 *
1201 * Synchronization Notes:
1202 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1203 * will block on the map lock and proceed when swapout is through.
1204 * 2) A vm_map_reference() call at this time is illegal, and will
1205 * cause a panic. vm_map_reference() is only allowed on resident
1206 * maps, since it refuses to block.
1207 * 3) A vm_map_swapin() call during a swapin will block, and
1208 * proceeed when the first swapin is done, turning into a nop.
1209 * This is the reason the res_count is not incremented until
1210 * after the swapin is complete.
1211 * 4) There is a timing hole after the checks of the res_count, before
1212 * the map lock is taken, during which a swapin may get the lock
1213 * before a swapout about to happen. If this happens, the swapin
1214 * will detect the state and increment the reference count, causing
1215 * the swapout to be a nop, thereby delaying it until a later
1216 * vm_map_deallocate. If the swapout gets the lock first, then
1217 * the swapin will simply block until the swapout is done, and
1218 * then proceed.
1219 *
1220 * Because vm_map_swapin() is potentially an expensive operation, it
1221 * should be used with caution.
1222 *
1223 * Invariants:
1224 * 1) A map with a residence count of zero is either swapped, or
1225 * being swapped.
1226 * 2) A map with a non-zero residence count is either resident,
1227 * or being swapped in.
1228 */
1229
1230 int vm_map_swap_enable = 1;
1231
1232 void vm_map_swapin (vm_map_t map)
1233 {
1234 register vm_map_entry_t entry;
1235
1236 if (!vm_map_swap_enable) /* debug */
1237 return;
1238
1239 /*
1240 * Map is locked
1241 * First deal with various races.
1242 */
1243 if (map->sw_state == MAP_SW_IN)
1244 /*
1245 * we raced with swapout and won. Returning will incr.
1246 * the res_count, turning the swapout into a nop.
1247 */
1248 return;
1249
1250 /*
1251 * The residence count must be zero. If we raced with another
1252 * swapin, the state would have been IN; if we raced with a
1253 * swapout (after another competing swapin), we must have lost
1254 * the race to get here (see above comment), in which case
1255 * res_count is still 0.
1256 */
1257 assert(map->res_count == 0);
1258
1259 /*
1260 * There are no intermediate states of a map going out or
1261 * coming in, since the map is locked during the transition.
1262 */
1263 assert(map->sw_state == MAP_SW_OUT);
1264
1265 /*
1266 * We now operate upon each map entry. If the entry is a sub-
1267 * or share-map, we call vm_map_res_reference upon it.
1268 * If the entry is an object, we call vm_object_res_reference
1269 * (this may iterate through the shadow chain).
1270 * Note that we hold the map locked the entire time,
1271 * even if we get back here via a recursive call in
1272 * vm_map_res_reference.
1273 */
1274 entry = vm_map_first_entry(map);
1275
1276 while (entry != vm_map_to_entry(map)) {
1277 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1278 if (entry->is_sub_map) {
1279 vm_map_t lmap = VME_SUBMAP(entry);
1280 lck_mtx_lock(&lmap->s_lock);
1281 vm_map_res_reference(lmap);
1282 lck_mtx_unlock(&lmap->s_lock);
1283 } else {
1284 vm_object_t object = VME_OBEJCT(entry);
1285 vm_object_lock(object);
1286 /*
1287 * This call may iterate through the
1288 * shadow chain.
1289 */
1290 vm_object_res_reference(object);
1291 vm_object_unlock(object);
1292 }
1293 }
1294 entry = entry->vme_next;
1295 }
1296 assert(map->sw_state == MAP_SW_OUT);
1297 map->sw_state = MAP_SW_IN;
1298 }
1299
1300 void vm_map_swapout(vm_map_t map)
1301 {
1302 register vm_map_entry_t entry;
1303
1304 /*
1305 * Map is locked
1306 * First deal with various races.
1307 * If we raced with a swapin and lost, the residence count
1308 * will have been incremented to 1, and we simply return.
1309 */
1310 lck_mtx_lock(&map->s_lock);
1311 if (map->res_count != 0) {
1312 lck_mtx_unlock(&map->s_lock);
1313 return;
1314 }
1315 lck_mtx_unlock(&map->s_lock);
1316
1317 /*
1318 * There are no intermediate states of a map going out or
1319 * coming in, since the map is locked during the transition.
1320 */
1321 assert(map->sw_state == MAP_SW_IN);
1322
1323 if (!vm_map_swap_enable)
1324 return;
1325
1326 /*
1327 * We now operate upon each map entry. If the entry is a sub-
1328 * or share-map, we call vm_map_res_deallocate upon it.
1329 * If the entry is an object, we call vm_object_res_deallocate
1330 * (this may iterate through the shadow chain).
1331 * Note that we hold the map locked the entire time,
1332 * even if we get back here via a recursive call in
1333 * vm_map_res_deallocate.
1334 */
1335 entry = vm_map_first_entry(map);
1336
1337 while (entry != vm_map_to_entry(map)) {
1338 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1339 if (entry->is_sub_map) {
1340 vm_map_t lmap = VME_SUBMAP(entry);
1341 lck_mtx_lock(&lmap->s_lock);
1342 vm_map_res_deallocate(lmap);
1343 lck_mtx_unlock(&lmap->s_lock);
1344 } else {
1345 vm_object_t object = VME_OBJECT(entry);
1346 vm_object_lock(object);
1347 /*
1348 * This call may take a long time,
1349 * since it could actively push
1350 * out pages (if we implement it
1351 * that way).
1352 */
1353 vm_object_res_deallocate(object);
1354 vm_object_unlock(object);
1355 }
1356 }
1357 entry = entry->vme_next;
1358 }
1359 assert(map->sw_state == MAP_SW_IN);
1360 map->sw_state = MAP_SW_OUT;
1361 }
1362
1363 #endif /* TASK_SWAPPER */
1364
1365 /*
1366 * vm_map_lookup_entry: [ internal use only ]
1367 *
1368 * Calls into the vm map store layer to find the map
1369 * entry containing (or immediately preceding) the
1370 * specified address in the given map; the entry is returned
1371 * in the "entry" parameter. The boolean
1372 * result indicates whether the address is
1373 * actually contained in the map.
1374 */
1375 boolean_t
1376 vm_map_lookup_entry(
1377 register vm_map_t map,
1378 register vm_map_offset_t address,
1379 vm_map_entry_t *entry) /* OUT */
1380 {
1381 return ( vm_map_store_lookup_entry( map, address, entry ));
1382 }
1383
1384 /*
1385 * Routine: vm_map_find_space
1386 * Purpose:
1387 * Allocate a range in the specified virtual address map,
1388 * returning the entry allocated for that range.
1389 * Used by kmem_alloc, etc.
1390 *
1391 * The map must be NOT be locked. It will be returned locked
1392 * on KERN_SUCCESS, unlocked on failure.
1393 *
1394 * If an entry is allocated, the object/offset fields
1395 * are initialized to zero.
1396 */
1397 kern_return_t
1398 vm_map_find_space(
1399 register vm_map_t map,
1400 vm_map_offset_t *address, /* OUT */
1401 vm_map_size_t size,
1402 vm_map_offset_t mask,
1403 int flags,
1404 vm_map_entry_t *o_entry) /* OUT */
1405 {
1406 vm_map_entry_t entry, new_entry;
1407 register vm_map_offset_t start;
1408 register vm_map_offset_t end;
1409 vm_map_entry_t hole_entry;
1410
1411 if (size == 0) {
1412 *address = 0;
1413 return KERN_INVALID_ARGUMENT;
1414 }
1415
1416 if (flags & VM_FLAGS_GUARD_AFTER) {
1417 /* account for the back guard page in the size */
1418 size += VM_MAP_PAGE_SIZE(map);
1419 }
1420
1421 new_entry = vm_map_entry_create(map, FALSE);
1422
1423 /*
1424 * Look for the first possible address; if there's already
1425 * something at this address, we have to start after it.
1426 */
1427
1428 vm_map_lock(map);
1429
1430 if( map->disable_vmentry_reuse == TRUE) {
1431 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1432 } else {
1433 if (map->holelistenabled) {
1434 hole_entry = (vm_map_entry_t)map->holes_list;
1435
1436 if (hole_entry == NULL) {
1437 /*
1438 * No more space in the map?
1439 */
1440 vm_map_entry_dispose(map, new_entry);
1441 vm_map_unlock(map);
1442 return(KERN_NO_SPACE);
1443 }
1444
1445 entry = hole_entry;
1446 start = entry->vme_start;
1447 } else {
1448 assert(first_free_is_valid(map));
1449 if ((entry = map->first_free) == vm_map_to_entry(map))
1450 start = map->min_offset;
1451 else
1452 start = entry->vme_end;
1453 }
1454 }
1455
1456 /*
1457 * In any case, the "entry" always precedes
1458 * the proposed new region throughout the loop:
1459 */
1460
1461 while (TRUE) {
1462 register vm_map_entry_t next;
1463
1464 /*
1465 * Find the end of the proposed new region.
1466 * Be sure we didn't go beyond the end, or
1467 * wrap around the address.
1468 */
1469
1470 if (flags & VM_FLAGS_GUARD_BEFORE) {
1471 /* reserve space for the front guard page */
1472 start += VM_MAP_PAGE_SIZE(map);
1473 }
1474 end = ((start + mask) & ~mask);
1475
1476 if (end < start) {
1477 vm_map_entry_dispose(map, new_entry);
1478 vm_map_unlock(map);
1479 return(KERN_NO_SPACE);
1480 }
1481 start = end;
1482 end += size;
1483
1484 if ((end > map->max_offset) || (end < start)) {
1485 vm_map_entry_dispose(map, new_entry);
1486 vm_map_unlock(map);
1487 return(KERN_NO_SPACE);
1488 }
1489
1490 next = entry->vme_next;
1491
1492 if (map->holelistenabled) {
1493 if (entry->vme_end >= end)
1494 break;
1495 } else {
1496 /*
1497 * If there are no more entries, we must win.
1498 *
1499 * OR
1500 *
1501 * If there is another entry, it must be
1502 * after the end of the potential new region.
1503 */
1504
1505 if (next == vm_map_to_entry(map))
1506 break;
1507
1508 if (next->vme_start >= end)
1509 break;
1510 }
1511
1512 /*
1513 * Didn't fit -- move to the next entry.
1514 */
1515
1516 entry = next;
1517
1518 if (map->holelistenabled) {
1519 if (entry == (vm_map_entry_t) map->holes_list) {
1520 /*
1521 * Wrapped around
1522 */
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527 start = entry->vme_start;
1528 } else {
1529 start = entry->vme_end;
1530 }
1531 }
1532
1533 if (map->holelistenabled) {
1534 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1535 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1536 }
1537 }
1538
1539 /*
1540 * At this point,
1541 * "start" and "end" should define the endpoints of the
1542 * available new range, and
1543 * "entry" should refer to the region before the new
1544 * range, and
1545 *
1546 * the map should be locked.
1547 */
1548
1549 if (flags & VM_FLAGS_GUARD_BEFORE) {
1550 /* go back for the front guard page */
1551 start -= VM_MAP_PAGE_SIZE(map);
1552 }
1553 *address = start;
1554
1555 assert(start < end);
1556 new_entry->vme_start = start;
1557 new_entry->vme_end = end;
1558 assert(page_aligned(new_entry->vme_start));
1559 assert(page_aligned(new_entry->vme_end));
1560 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1561 VM_MAP_PAGE_MASK(map)));
1562 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1563 VM_MAP_PAGE_MASK(map)));
1564
1565 new_entry->is_shared = FALSE;
1566 new_entry->is_sub_map = FALSE;
1567 new_entry->use_pmap = TRUE;
1568 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1569 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1570
1571 new_entry->needs_copy = FALSE;
1572
1573 new_entry->inheritance = VM_INHERIT_DEFAULT;
1574 new_entry->protection = VM_PROT_DEFAULT;
1575 new_entry->max_protection = VM_PROT_ALL;
1576 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1577 new_entry->wired_count = 0;
1578 new_entry->user_wired_count = 0;
1579
1580 new_entry->in_transition = FALSE;
1581 new_entry->needs_wakeup = FALSE;
1582 new_entry->no_cache = FALSE;
1583 new_entry->permanent = FALSE;
1584 new_entry->superpage_size = FALSE;
1585 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1586 new_entry->map_aligned = TRUE;
1587 } else {
1588 new_entry->map_aligned = FALSE;
1589 }
1590
1591 new_entry->used_for_jit = FALSE;
1592 new_entry->zero_wired_pages = FALSE;
1593 new_entry->iokit_acct = FALSE;
1594 new_entry->vme_resilient_codesign = FALSE;
1595 new_entry->vme_resilient_media = FALSE;
1596
1597 int alias;
1598 VM_GET_FLAGS_ALIAS(flags, alias);
1599 VME_ALIAS_SET(new_entry, alias);
1600
1601 /*
1602 * Insert the new entry into the list
1603 */
1604
1605 vm_map_store_entry_link(map, entry, new_entry);
1606
1607 map->size += size;
1608
1609 /*
1610 * Update the lookup hint
1611 */
1612 SAVE_HINT_MAP_WRITE(map, new_entry);
1613
1614 *o_entry = new_entry;
1615 return(KERN_SUCCESS);
1616 }
1617
1618 int vm_map_pmap_enter_print = FALSE;
1619 int vm_map_pmap_enter_enable = FALSE;
1620
1621 /*
1622 * Routine: vm_map_pmap_enter [internal only]
1623 *
1624 * Description:
1625 * Force pages from the specified object to be entered into
1626 * the pmap at the specified address if they are present.
1627 * As soon as a page not found in the object the scan ends.
1628 *
1629 * Returns:
1630 * Nothing.
1631 *
1632 * In/out conditions:
1633 * The source map should not be locked on entry.
1634 */
1635 __unused static void
1636 vm_map_pmap_enter(
1637 vm_map_t map,
1638 register vm_map_offset_t addr,
1639 register vm_map_offset_t end_addr,
1640 register vm_object_t object,
1641 vm_object_offset_t offset,
1642 vm_prot_t protection)
1643 {
1644 int type_of_fault;
1645 kern_return_t kr;
1646
1647 if(map->pmap == 0)
1648 return;
1649
1650 while (addr < end_addr) {
1651 register vm_page_t m;
1652
1653
1654 /*
1655 * TODO:
1656 * From vm_map_enter(), we come into this function without the map
1657 * lock held or the object lock held.
1658 * We haven't taken a reference on the object either.
1659 * We should do a proper lookup on the map to make sure
1660 * that things are sane before we go locking objects that
1661 * could have been deallocated from under us.
1662 */
1663
1664 vm_object_lock(object);
1665
1666 m = vm_page_lookup(object, offset);
1667 /*
1668 * ENCRYPTED SWAP:
1669 * The user should never see encrypted data, so do not
1670 * enter an encrypted page in the page table.
1671 */
1672 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1673 m->fictitious ||
1674 (m->unusual && ( m->error || m->restart || m->absent))) {
1675 vm_object_unlock(object);
1676 return;
1677 }
1678
1679 if (vm_map_pmap_enter_print) {
1680 printf("vm_map_pmap_enter:");
1681 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1682 map, (unsigned long long)addr, object, (unsigned long long)offset);
1683 }
1684 type_of_fault = DBG_CACHE_HIT_FAULT;
1685 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1686 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1687 0, /* XXX need user tag / alias? */
1688 0, /* alternate accounting? */
1689 NULL,
1690 &type_of_fault);
1691
1692 vm_object_unlock(object);
1693
1694 offset += PAGE_SIZE_64;
1695 addr += PAGE_SIZE;
1696 }
1697 }
1698
1699 boolean_t vm_map_pmap_is_empty(
1700 vm_map_t map,
1701 vm_map_offset_t start,
1702 vm_map_offset_t end);
1703 boolean_t vm_map_pmap_is_empty(
1704 vm_map_t map,
1705 vm_map_offset_t start,
1706 vm_map_offset_t end)
1707 {
1708 #ifdef MACHINE_PMAP_IS_EMPTY
1709 return pmap_is_empty(map->pmap, start, end);
1710 #else /* MACHINE_PMAP_IS_EMPTY */
1711 vm_map_offset_t offset;
1712 ppnum_t phys_page;
1713
1714 if (map->pmap == NULL) {
1715 return TRUE;
1716 }
1717
1718 for (offset = start;
1719 offset < end;
1720 offset += PAGE_SIZE) {
1721 phys_page = pmap_find_phys(map->pmap, offset);
1722 if (phys_page) {
1723 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1724 "page %d at 0x%llx\n",
1725 map, (long long)start, (long long)end,
1726 phys_page, (long long)offset);
1727 return FALSE;
1728 }
1729 }
1730 return TRUE;
1731 #endif /* MACHINE_PMAP_IS_EMPTY */
1732 }
1733
1734 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1735 kern_return_t
1736 vm_map_random_address_for_size(
1737 vm_map_t map,
1738 vm_map_offset_t *address,
1739 vm_map_size_t size)
1740 {
1741 kern_return_t kr = KERN_SUCCESS;
1742 int tries = 0;
1743 vm_map_offset_t random_addr = 0;
1744 vm_map_offset_t hole_end;
1745
1746 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1747 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1748 vm_map_size_t vm_hole_size = 0;
1749 vm_map_size_t addr_space_size;
1750
1751 addr_space_size = vm_map_max(map) - vm_map_min(map);
1752
1753 assert(page_aligned(size));
1754
1755 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1756 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1757 random_addr = vm_map_trunc_page(
1758 vm_map_min(map) +(random_addr % addr_space_size),
1759 VM_MAP_PAGE_MASK(map));
1760
1761 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1762 if (prev_entry == vm_map_to_entry(map)) {
1763 next_entry = vm_map_first_entry(map);
1764 } else {
1765 next_entry = prev_entry->vme_next;
1766 }
1767 if (next_entry == vm_map_to_entry(map)) {
1768 hole_end = vm_map_max(map);
1769 } else {
1770 hole_end = next_entry->vme_start;
1771 }
1772 vm_hole_size = hole_end - random_addr;
1773 if (vm_hole_size >= size) {
1774 *address = random_addr;
1775 break;
1776 }
1777 }
1778 tries++;
1779 }
1780
1781 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1782 kr = KERN_NO_SPACE;
1783 }
1784 return kr;
1785 }
1786
1787 /*
1788 * Routine: vm_map_enter
1789 *
1790 * Description:
1791 * Allocate a range in the specified virtual address map.
1792 * The resulting range will refer to memory defined by
1793 * the given memory object and offset into that object.
1794 *
1795 * Arguments are as defined in the vm_map call.
1796 */
1797 int _map_enter_debug = 0;
1798 static unsigned int vm_map_enter_restore_successes = 0;
1799 static unsigned int vm_map_enter_restore_failures = 0;
1800 kern_return_t
1801 vm_map_enter(
1802 vm_map_t map,
1803 vm_map_offset_t *address, /* IN/OUT */
1804 vm_map_size_t size,
1805 vm_map_offset_t mask,
1806 int flags,
1807 vm_object_t object,
1808 vm_object_offset_t offset,
1809 boolean_t needs_copy,
1810 vm_prot_t cur_protection,
1811 vm_prot_t max_protection,
1812 vm_inherit_t inheritance)
1813 {
1814 vm_map_entry_t entry, new_entry;
1815 vm_map_offset_t start, tmp_start, tmp_offset;
1816 vm_map_offset_t end, tmp_end;
1817 vm_map_offset_t tmp2_start, tmp2_end;
1818 vm_map_offset_t step;
1819 kern_return_t result = KERN_SUCCESS;
1820 vm_map_t zap_old_map = VM_MAP_NULL;
1821 vm_map_t zap_new_map = VM_MAP_NULL;
1822 boolean_t map_locked = FALSE;
1823 boolean_t pmap_empty = TRUE;
1824 boolean_t new_mapping_established = FALSE;
1825 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1826 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1827 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1828 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1829 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1830 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1831 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1832 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1833 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1834 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1835 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1836 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1837 vm_tag_t alias, user_alias;
1838 vm_map_offset_t effective_min_offset, effective_max_offset;
1839 kern_return_t kr;
1840 boolean_t clear_map_aligned = FALSE;
1841 vm_map_entry_t hole_entry;
1842
1843 if (superpage_size) {
1844 switch (superpage_size) {
1845 /*
1846 * Note that the current implementation only supports
1847 * a single size for superpages, SUPERPAGE_SIZE, per
1848 * architecture. As soon as more sizes are supposed
1849 * to be supported, SUPERPAGE_SIZE has to be replaced
1850 * with a lookup of the size depending on superpage_size.
1851 */
1852 #ifdef __x86_64__
1853 case SUPERPAGE_SIZE_ANY:
1854 /* handle it like 2 MB and round up to page size */
1855 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1856 case SUPERPAGE_SIZE_2MB:
1857 break;
1858 #endif
1859 default:
1860 return KERN_INVALID_ARGUMENT;
1861 }
1862 mask = SUPERPAGE_SIZE-1;
1863 if (size & (SUPERPAGE_SIZE-1))
1864 return KERN_INVALID_ARGUMENT;
1865 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1866 }
1867
1868
1869
1870 if (resilient_codesign || resilient_media) {
1871 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1872 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1873 return KERN_PROTECTION_FAILURE;
1874 }
1875 }
1876
1877 if (is_submap) {
1878 if (purgable) {
1879 /* submaps can not be purgeable */
1880 return KERN_INVALID_ARGUMENT;
1881 }
1882 if (object == VM_OBJECT_NULL) {
1883 /* submaps can not be created lazily */
1884 return KERN_INVALID_ARGUMENT;
1885 }
1886 }
1887 if (flags & VM_FLAGS_ALREADY) {
1888 /*
1889 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1890 * is already present. For it to be meaningul, the requested
1891 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1892 * we shouldn't try and remove what was mapped there first
1893 * (!VM_FLAGS_OVERWRITE).
1894 */
1895 if ((flags & VM_FLAGS_ANYWHERE) ||
1896 (flags & VM_FLAGS_OVERWRITE)) {
1897 return KERN_INVALID_ARGUMENT;
1898 }
1899 }
1900
1901 effective_min_offset = map->min_offset;
1902
1903 if (flags & VM_FLAGS_BEYOND_MAX) {
1904 /*
1905 * Allow an insertion beyond the map's max offset.
1906 */
1907 if (vm_map_is_64bit(map))
1908 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1909 else
1910 effective_max_offset = 0x00000000FFFFF000ULL;
1911 } else {
1912 effective_max_offset = map->max_offset;
1913 }
1914
1915 if (size == 0 ||
1916 (offset & PAGE_MASK_64) != 0) {
1917 *address = 0;
1918 return KERN_INVALID_ARGUMENT;
1919 }
1920
1921 VM_GET_FLAGS_ALIAS(flags, alias);
1922 if (map->pmap == kernel_pmap) {
1923 user_alias = VM_KERN_MEMORY_NONE;
1924 } else {
1925 user_alias = alias;
1926 }
1927
1928 #define RETURN(value) { result = value; goto BailOut; }
1929
1930 assert(page_aligned(*address));
1931 assert(page_aligned(size));
1932
1933 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1934 /*
1935 * In most cases, the caller rounds the size up to the
1936 * map's page size.
1937 * If we get a size that is explicitly not map-aligned here,
1938 * we'll have to respect the caller's wish and mark the
1939 * mapping as "not map-aligned" to avoid tripping the
1940 * map alignment checks later.
1941 */
1942 clear_map_aligned = TRUE;
1943 }
1944 if (!anywhere &&
1945 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1946 /*
1947 * We've been asked to map at a fixed address and that
1948 * address is not aligned to the map's specific alignment.
1949 * The caller should know what it's doing (i.e. most likely
1950 * mapping some fragmented copy map, transferring memory from
1951 * a VM map with a different alignment), so clear map_aligned
1952 * for this new VM map entry and proceed.
1953 */
1954 clear_map_aligned = TRUE;
1955 }
1956
1957 /*
1958 * Only zero-fill objects are allowed to be purgable.
1959 * LP64todo - limit purgable objects to 32-bits for now
1960 */
1961 if (purgable &&
1962 (offset != 0 ||
1963 (object != VM_OBJECT_NULL &&
1964 (object->vo_size != size ||
1965 object->purgable == VM_PURGABLE_DENY))
1966 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1967 return KERN_INVALID_ARGUMENT;
1968
1969 if (!anywhere && overwrite) {
1970 /*
1971 * Create a temporary VM map to hold the old mappings in the
1972 * affected area while we create the new one.
1973 * This avoids releasing the VM map lock in
1974 * vm_map_entry_delete() and allows atomicity
1975 * when we want to replace some mappings with a new one.
1976 * It also allows us to restore the old VM mappings if the
1977 * new mapping fails.
1978 */
1979 zap_old_map = vm_map_create(PMAP_NULL,
1980 *address,
1981 *address + size,
1982 map->hdr.entries_pageable);
1983 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1984 vm_map_disable_hole_optimization(zap_old_map);
1985 }
1986
1987 StartAgain: ;
1988
1989 start = *address;
1990
1991 if (anywhere) {
1992 vm_map_lock(map);
1993 map_locked = TRUE;
1994
1995 if (entry_for_jit) {
1996 if (map->jit_entry_exists) {
1997 result = KERN_INVALID_ARGUMENT;
1998 goto BailOut;
1999 }
2000 /*
2001 * Get a random start address.
2002 */
2003 result = vm_map_random_address_for_size(map, address, size);
2004 if (result != KERN_SUCCESS) {
2005 goto BailOut;
2006 }
2007 start = *address;
2008 }
2009
2010
2011 /*
2012 * Calculate the first possible address.
2013 */
2014
2015 if (start < effective_min_offset)
2016 start = effective_min_offset;
2017 if (start > effective_max_offset)
2018 RETURN(KERN_NO_SPACE);
2019
2020 /*
2021 * Look for the first possible address;
2022 * if there's already something at this
2023 * address, we have to start after it.
2024 */
2025
2026 if( map->disable_vmentry_reuse == TRUE) {
2027 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2028 } else {
2029
2030 if (map->holelistenabled) {
2031 hole_entry = (vm_map_entry_t)map->holes_list;
2032
2033 if (hole_entry == NULL) {
2034 /*
2035 * No more space in the map?
2036 */
2037 result = KERN_NO_SPACE;
2038 goto BailOut;
2039 } else {
2040
2041 boolean_t found_hole = FALSE;
2042
2043 do {
2044 if (hole_entry->vme_start >= start) {
2045 start = hole_entry->vme_start;
2046 found_hole = TRUE;
2047 break;
2048 }
2049
2050 if (hole_entry->vme_end > start) {
2051 found_hole = TRUE;
2052 break;
2053 }
2054 hole_entry = hole_entry->vme_next;
2055
2056 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2057
2058 if (found_hole == FALSE) {
2059 result = KERN_NO_SPACE;
2060 goto BailOut;
2061 }
2062
2063 entry = hole_entry;
2064
2065 if (start == 0)
2066 start += PAGE_SIZE_64;
2067 }
2068 } else {
2069 assert(first_free_is_valid(map));
2070
2071 entry = map->first_free;
2072
2073 if (entry == vm_map_to_entry(map)) {
2074 entry = NULL;
2075 } else {
2076 if (entry->vme_next == vm_map_to_entry(map)){
2077 /*
2078 * Hole at the end of the map.
2079 */
2080 entry = NULL;
2081 } else {
2082 if (start < (entry->vme_next)->vme_start ) {
2083 start = entry->vme_end;
2084 start = vm_map_round_page(start,
2085 VM_MAP_PAGE_MASK(map));
2086 } else {
2087 /*
2088 * Need to do a lookup.
2089 */
2090 entry = NULL;
2091 }
2092 }
2093 }
2094
2095 if (entry == NULL) {
2096 vm_map_entry_t tmp_entry;
2097 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2098 assert(!entry_for_jit);
2099 start = tmp_entry->vme_end;
2100 start = vm_map_round_page(start,
2101 VM_MAP_PAGE_MASK(map));
2102 }
2103 entry = tmp_entry;
2104 }
2105 }
2106 }
2107
2108 /*
2109 * In any case, the "entry" always precedes
2110 * the proposed new region throughout the
2111 * loop:
2112 */
2113
2114 while (TRUE) {
2115 register vm_map_entry_t next;
2116
2117 /*
2118 * Find the end of the proposed new region.
2119 * Be sure we didn't go beyond the end, or
2120 * wrap around the address.
2121 */
2122
2123 end = ((start + mask) & ~mask);
2124 end = vm_map_round_page(end,
2125 VM_MAP_PAGE_MASK(map));
2126 if (end < start)
2127 RETURN(KERN_NO_SPACE);
2128 start = end;
2129 assert(VM_MAP_PAGE_ALIGNED(start,
2130 VM_MAP_PAGE_MASK(map)));
2131 end += size;
2132
2133 if ((end > effective_max_offset) || (end < start)) {
2134 if (map->wait_for_space) {
2135 assert(!keep_map_locked);
2136 if (size <= (effective_max_offset -
2137 effective_min_offset)) {
2138 assert_wait((event_t)map,
2139 THREAD_ABORTSAFE);
2140 vm_map_unlock(map);
2141 map_locked = FALSE;
2142 thread_block(THREAD_CONTINUE_NULL);
2143 goto StartAgain;
2144 }
2145 }
2146 RETURN(KERN_NO_SPACE);
2147 }
2148
2149 next = entry->vme_next;
2150
2151 if (map->holelistenabled) {
2152 if (entry->vme_end >= end)
2153 break;
2154 } else {
2155 /*
2156 * If there are no more entries, we must win.
2157 *
2158 * OR
2159 *
2160 * If there is another entry, it must be
2161 * after the end of the potential new region.
2162 */
2163
2164 if (next == vm_map_to_entry(map))
2165 break;
2166
2167 if (next->vme_start >= end)
2168 break;
2169 }
2170
2171 /*
2172 * Didn't fit -- move to the next entry.
2173 */
2174
2175 entry = next;
2176
2177 if (map->holelistenabled) {
2178 if (entry == (vm_map_entry_t) map->holes_list) {
2179 /*
2180 * Wrapped around
2181 */
2182 result = KERN_NO_SPACE;
2183 goto BailOut;
2184 }
2185 start = entry->vme_start;
2186 } else {
2187 start = entry->vme_end;
2188 }
2189
2190 start = vm_map_round_page(start,
2191 VM_MAP_PAGE_MASK(map));
2192 }
2193
2194 if (map->holelistenabled) {
2195 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2196 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2197 }
2198 }
2199
2200 *address = start;
2201 assert(VM_MAP_PAGE_ALIGNED(*address,
2202 VM_MAP_PAGE_MASK(map)));
2203 } else {
2204 /*
2205 * Verify that:
2206 * the address doesn't itself violate
2207 * the mask requirement.
2208 */
2209
2210 vm_map_lock(map);
2211 map_locked = TRUE;
2212 if ((start & mask) != 0)
2213 RETURN(KERN_NO_SPACE);
2214
2215 /*
2216 * ... the address is within bounds
2217 */
2218
2219 end = start + size;
2220
2221 if ((start < effective_min_offset) ||
2222 (end > effective_max_offset) ||
2223 (start >= end)) {
2224 RETURN(KERN_INVALID_ADDRESS);
2225 }
2226
2227 if (overwrite && zap_old_map != VM_MAP_NULL) {
2228 /*
2229 * Fixed mapping and "overwrite" flag: attempt to
2230 * remove all existing mappings in the specified
2231 * address range, saving them in our "zap_old_map".
2232 */
2233 (void) vm_map_delete(map, start, end,
2234 (VM_MAP_REMOVE_SAVE_ENTRIES |
2235 VM_MAP_REMOVE_NO_MAP_ALIGN),
2236 zap_old_map);
2237 }
2238
2239 /*
2240 * ... the starting address isn't allocated
2241 */
2242
2243 if (vm_map_lookup_entry(map, start, &entry)) {
2244 if (! (flags & VM_FLAGS_ALREADY)) {
2245 RETURN(KERN_NO_SPACE);
2246 }
2247 /*
2248 * Check if what's already there is what we want.
2249 */
2250 tmp_start = start;
2251 tmp_offset = offset;
2252 if (entry->vme_start < start) {
2253 tmp_start -= start - entry->vme_start;
2254 tmp_offset -= start - entry->vme_start;
2255
2256 }
2257 for (; entry->vme_start < end;
2258 entry = entry->vme_next) {
2259 /*
2260 * Check if the mapping's attributes
2261 * match the existing map entry.
2262 */
2263 if (entry == vm_map_to_entry(map) ||
2264 entry->vme_start != tmp_start ||
2265 entry->is_sub_map != is_submap ||
2266 VME_OFFSET(entry) != tmp_offset ||
2267 entry->needs_copy != needs_copy ||
2268 entry->protection != cur_protection ||
2269 entry->max_protection != max_protection ||
2270 entry->inheritance != inheritance ||
2271 entry->iokit_acct != iokit_acct ||
2272 VME_ALIAS(entry) != alias) {
2273 /* not the same mapping ! */
2274 RETURN(KERN_NO_SPACE);
2275 }
2276 /*
2277 * Check if the same object is being mapped.
2278 */
2279 if (is_submap) {
2280 if (VME_SUBMAP(entry) !=
2281 (vm_map_t) object) {
2282 /* not the same submap */
2283 RETURN(KERN_NO_SPACE);
2284 }
2285 } else {
2286 if (VME_OBJECT(entry) != object) {
2287 /* not the same VM object... */
2288 vm_object_t obj2;
2289
2290 obj2 = VME_OBJECT(entry);
2291 if ((obj2 == VM_OBJECT_NULL ||
2292 obj2->internal) &&
2293 (object == VM_OBJECT_NULL ||
2294 object->internal)) {
2295 /*
2296 * ... but both are
2297 * anonymous memory,
2298 * so equivalent.
2299 */
2300 } else {
2301 RETURN(KERN_NO_SPACE);
2302 }
2303 }
2304 }
2305
2306 tmp_offset += entry->vme_end - entry->vme_start;
2307 tmp_start += entry->vme_end - entry->vme_start;
2308 if (entry->vme_end >= end) {
2309 /* reached the end of our mapping */
2310 break;
2311 }
2312 }
2313 /* it all matches: let's use what's already there ! */
2314 RETURN(KERN_MEMORY_PRESENT);
2315 }
2316
2317 /*
2318 * ... the next region doesn't overlap the
2319 * end point.
2320 */
2321
2322 if ((entry->vme_next != vm_map_to_entry(map)) &&
2323 (entry->vme_next->vme_start < end))
2324 RETURN(KERN_NO_SPACE);
2325 }
2326
2327 /*
2328 * At this point,
2329 * "start" and "end" should define the endpoints of the
2330 * available new range, and
2331 * "entry" should refer to the region before the new
2332 * range, and
2333 *
2334 * the map should be locked.
2335 */
2336
2337 /*
2338 * See whether we can avoid creating a new entry (and object) by
2339 * extending one of our neighbors. [So far, we only attempt to
2340 * extend from below.] Note that we can never extend/join
2341 * purgable objects because they need to remain distinct
2342 * entities in order to implement their "volatile object"
2343 * semantics.
2344 */
2345
2346 if (purgable || entry_for_jit) {
2347 if (object == VM_OBJECT_NULL) {
2348
2349 object = vm_object_allocate(size);
2350 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2351 object->true_share = TRUE;
2352 if (purgable) {
2353 task_t owner;
2354 object->purgable = VM_PURGABLE_NONVOLATILE;
2355 if (map->pmap == kernel_pmap) {
2356 /*
2357 * Purgeable mappings made in a kernel
2358 * map are "owned" by the kernel itself
2359 * rather than the current user task
2360 * because they're likely to be used by
2361 * more than this user task (see
2362 * execargs_purgeable_allocate(), for
2363 * example).
2364 */
2365 owner = kernel_task;
2366 } else {
2367 owner = current_task();
2368 }
2369 assert(object->vo_purgeable_owner == NULL);
2370 assert(object->resident_page_count == 0);
2371 assert(object->wired_page_count == 0);
2372 vm_object_lock(object);
2373 vm_purgeable_nonvolatile_enqueue(object, owner);
2374 vm_object_unlock(object);
2375 }
2376 offset = (vm_object_offset_t)0;
2377 }
2378 } else if ((is_submap == FALSE) &&
2379 (object == VM_OBJECT_NULL) &&
2380 (entry != vm_map_to_entry(map)) &&
2381 (entry->vme_end == start) &&
2382 (!entry->is_shared) &&
2383 (!entry->is_sub_map) &&
2384 (!entry->in_transition) &&
2385 (!entry->needs_wakeup) &&
2386 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2387 (entry->protection == cur_protection) &&
2388 (entry->max_protection == max_protection) &&
2389 (entry->inheritance == inheritance) &&
2390 ((user_alias == VM_MEMORY_REALLOC) ||
2391 (VME_ALIAS(entry) == alias)) &&
2392 (entry->no_cache == no_cache) &&
2393 (entry->permanent == permanent) &&
2394 (!entry->superpage_size && !superpage_size) &&
2395 /*
2396 * No coalescing if not map-aligned, to avoid propagating
2397 * that condition any further than needed:
2398 */
2399 (!entry->map_aligned || !clear_map_aligned) &&
2400 (!entry->zero_wired_pages) &&
2401 (!entry->used_for_jit && !entry_for_jit) &&
2402 (entry->iokit_acct == iokit_acct) &&
2403 (!entry->vme_resilient_codesign) &&
2404 (!entry->vme_resilient_media) &&
2405
2406 ((entry->vme_end - entry->vme_start) + size <=
2407 (user_alias == VM_MEMORY_REALLOC ?
2408 ANON_CHUNK_SIZE :
2409 NO_COALESCE_LIMIT)) &&
2410
2411 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2412 if (vm_object_coalesce(VME_OBJECT(entry),
2413 VM_OBJECT_NULL,
2414 VME_OFFSET(entry),
2415 (vm_object_offset_t) 0,
2416 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2417 (vm_map_size_t)(end - entry->vme_end))) {
2418
2419 /*
2420 * Coalesced the two objects - can extend
2421 * the previous map entry to include the
2422 * new range.
2423 */
2424 map->size += (end - entry->vme_end);
2425 assert(entry->vme_start < end);
2426 assert(VM_MAP_PAGE_ALIGNED(end,
2427 VM_MAP_PAGE_MASK(map)));
2428 if (__improbable(vm_debug_events))
2429 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2430 entry->vme_end = end;
2431 if (map->holelistenabled) {
2432 vm_map_store_update_first_free(map, entry, TRUE);
2433 } else {
2434 vm_map_store_update_first_free(map, map->first_free, TRUE);
2435 }
2436 new_mapping_established = TRUE;
2437 RETURN(KERN_SUCCESS);
2438 }
2439 }
2440
2441 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2442 new_entry = NULL;
2443
2444 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2445 tmp2_end = tmp2_start + step;
2446 /*
2447 * Create a new entry
2448 * LP64todo - for now, we can only allocate 4GB internal objects
2449 * because the default pager can't page bigger ones. Remove this
2450 * when it can.
2451 *
2452 * XXX FBDP
2453 * The reserved "page zero" in each process's address space can
2454 * be arbitrarily large. Splitting it into separate 4GB objects and
2455 * therefore different VM map entries serves no purpose and just
2456 * slows down operations on the VM map, so let's not split the
2457 * allocation into 4GB chunks if the max protection is NONE. That
2458 * memory should never be accessible, so it will never get to the
2459 * default pager.
2460 */
2461 tmp_start = tmp2_start;
2462 if (object == VM_OBJECT_NULL &&
2463 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2464 max_protection != VM_PROT_NONE &&
2465 superpage_size == 0)
2466 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2467 else
2468 tmp_end = tmp2_end;
2469 do {
2470 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2471 object, offset, needs_copy,
2472 FALSE, FALSE,
2473 cur_protection, max_protection,
2474 VM_BEHAVIOR_DEFAULT,
2475 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2476 0, no_cache,
2477 permanent,
2478 superpage_size,
2479 clear_map_aligned,
2480 is_submap);
2481
2482 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2483 VME_ALIAS_SET(new_entry, alias);
2484
2485 if (entry_for_jit){
2486 if (!(map->jit_entry_exists)){
2487 new_entry->used_for_jit = TRUE;
2488 map->jit_entry_exists = TRUE;
2489 }
2490 }
2491
2492 if (resilient_codesign &&
2493 ! ((cur_protection | max_protection) &
2494 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2495 new_entry->vme_resilient_codesign = TRUE;
2496 }
2497
2498 if (resilient_media &&
2499 ! ((cur_protection | max_protection) &
2500 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2501 new_entry->vme_resilient_media = TRUE;
2502 }
2503
2504 assert(!new_entry->iokit_acct);
2505 if (!is_submap &&
2506 object != VM_OBJECT_NULL &&
2507 object->purgable != VM_PURGABLE_DENY) {
2508 assert(new_entry->use_pmap);
2509 assert(!new_entry->iokit_acct);
2510 /*
2511 * Turn off pmap accounting since
2512 * purgeable objects have their
2513 * own ledgers.
2514 */
2515 new_entry->use_pmap = FALSE;
2516 } else if (!is_submap &&
2517 iokit_acct &&
2518 object != VM_OBJECT_NULL &&
2519 object->internal) {
2520 /* alternate accounting */
2521 assert(!new_entry->iokit_acct);
2522 assert(new_entry->use_pmap);
2523 new_entry->iokit_acct = TRUE;
2524 new_entry->use_pmap = FALSE;
2525 DTRACE_VM4(
2526 vm_map_iokit_mapped_region,
2527 vm_map_t, map,
2528 vm_map_offset_t, new_entry->vme_start,
2529 vm_map_offset_t, new_entry->vme_end,
2530 int, VME_ALIAS(new_entry));
2531 vm_map_iokit_mapped_region(
2532 map,
2533 (new_entry->vme_end -
2534 new_entry->vme_start));
2535 } else if (!is_submap) {
2536 assert(!new_entry->iokit_acct);
2537 assert(new_entry->use_pmap);
2538 }
2539
2540 if (is_submap) {
2541 vm_map_t submap;
2542 boolean_t submap_is_64bit;
2543 boolean_t use_pmap;
2544
2545 assert(new_entry->is_sub_map);
2546 assert(!new_entry->use_pmap);
2547 assert(!new_entry->iokit_acct);
2548 submap = (vm_map_t) object;
2549 submap_is_64bit = vm_map_is_64bit(submap);
2550 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2551 #ifndef NO_NESTED_PMAP
2552 if (use_pmap && submap->pmap == NULL) {
2553 ledger_t ledger = map->pmap->ledger;
2554 /* we need a sub pmap to nest... */
2555 submap->pmap = pmap_create(ledger, 0,
2556 submap_is_64bit);
2557 if (submap->pmap == NULL) {
2558 /* let's proceed without nesting... */
2559 }
2560 }
2561 if (use_pmap && submap->pmap != NULL) {
2562 kr = pmap_nest(map->pmap,
2563 submap->pmap,
2564 tmp_start,
2565 tmp_start,
2566 tmp_end - tmp_start);
2567 if (kr != KERN_SUCCESS) {
2568 printf("vm_map_enter: "
2569 "pmap_nest(0x%llx,0x%llx) "
2570 "error 0x%x\n",
2571 (long long)tmp_start,
2572 (long long)tmp_end,
2573 kr);
2574 } else {
2575 /* we're now nested ! */
2576 new_entry->use_pmap = TRUE;
2577 pmap_empty = FALSE;
2578 }
2579 }
2580 #endif /* NO_NESTED_PMAP */
2581 }
2582 entry = new_entry;
2583
2584 if (superpage_size) {
2585 vm_page_t pages, m;
2586 vm_object_t sp_object;
2587
2588 VME_OFFSET_SET(entry, 0);
2589
2590 /* allocate one superpage */
2591 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2592 if (kr != KERN_SUCCESS) {
2593 /* deallocate whole range... */
2594 new_mapping_established = TRUE;
2595 /* ... but only up to "tmp_end" */
2596 size -= end - tmp_end;
2597 RETURN(kr);
2598 }
2599
2600 /* create one vm_object per superpage */
2601 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2602 sp_object->phys_contiguous = TRUE;
2603 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2604 VME_OBJECT_SET(entry, sp_object);
2605 assert(entry->use_pmap);
2606
2607 /* enter the base pages into the object */
2608 vm_object_lock(sp_object);
2609 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2610 m = pages;
2611 pmap_zero_page(m->phys_page);
2612 pages = NEXT_PAGE(m);
2613 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2614 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2615 }
2616 vm_object_unlock(sp_object);
2617 }
2618 } while (tmp_end != tmp2_end &&
2619 (tmp_start = tmp_end) &&
2620 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2621 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2622 }
2623
2624 new_mapping_established = TRUE;
2625
2626 BailOut:
2627 assert(map_locked == TRUE);
2628
2629 if (result == KERN_SUCCESS) {
2630 vm_prot_t pager_prot;
2631 memory_object_t pager;
2632
2633 #if DEBUG
2634 if (pmap_empty &&
2635 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2636 assert(vm_map_pmap_is_empty(map,
2637 *address,
2638 *address+size));
2639 }
2640 #endif /* DEBUG */
2641
2642 /*
2643 * For "named" VM objects, let the pager know that the
2644 * memory object is being mapped. Some pagers need to keep
2645 * track of this, to know when they can reclaim the memory
2646 * object, for example.
2647 * VM calls memory_object_map() for each mapping (specifying
2648 * the protection of each mapping) and calls
2649 * memory_object_last_unmap() when all the mappings are gone.
2650 */
2651 pager_prot = max_protection;
2652 if (needs_copy) {
2653 /*
2654 * Copy-On-Write mapping: won't modify
2655 * the memory object.
2656 */
2657 pager_prot &= ~VM_PROT_WRITE;
2658 }
2659 if (!is_submap &&
2660 object != VM_OBJECT_NULL &&
2661 object->named &&
2662 object->pager != MEMORY_OBJECT_NULL) {
2663 vm_object_lock(object);
2664 pager = object->pager;
2665 if (object->named &&
2666 pager != MEMORY_OBJECT_NULL) {
2667 assert(object->pager_ready);
2668 vm_object_mapping_wait(object, THREAD_UNINT);
2669 vm_object_mapping_begin(object);
2670 vm_object_unlock(object);
2671
2672 kr = memory_object_map(pager, pager_prot);
2673 assert(kr == KERN_SUCCESS);
2674
2675 vm_object_lock(object);
2676 vm_object_mapping_end(object);
2677 }
2678 vm_object_unlock(object);
2679 }
2680 }
2681
2682 assert(map_locked == TRUE);
2683
2684 if (!keep_map_locked) {
2685 vm_map_unlock(map);
2686 map_locked = FALSE;
2687 }
2688
2689 /*
2690 * We can't hold the map lock if we enter this block.
2691 */
2692
2693 if (result == KERN_SUCCESS) {
2694
2695 /* Wire down the new entry if the user
2696 * requested all new map entries be wired.
2697 */
2698 if ((map->wiring_required)||(superpage_size)) {
2699 assert(!keep_map_locked);
2700 pmap_empty = FALSE; /* pmap won't be empty */
2701 kr = vm_map_wire(map, start, end,
2702 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2703 TRUE);
2704 result = kr;
2705 }
2706
2707 }
2708
2709 if (result != KERN_SUCCESS) {
2710 if (new_mapping_established) {
2711 /*
2712 * We have to get rid of the new mappings since we
2713 * won't make them available to the user.
2714 * Try and do that atomically, to minimize the risk
2715 * that someone else create new mappings that range.
2716 */
2717 zap_new_map = vm_map_create(PMAP_NULL,
2718 *address,
2719 *address + size,
2720 map->hdr.entries_pageable);
2721 vm_map_set_page_shift(zap_new_map,
2722 VM_MAP_PAGE_SHIFT(map));
2723 vm_map_disable_hole_optimization(zap_new_map);
2724
2725 if (!map_locked) {
2726 vm_map_lock(map);
2727 map_locked = TRUE;
2728 }
2729 (void) vm_map_delete(map, *address, *address+size,
2730 (VM_MAP_REMOVE_SAVE_ENTRIES |
2731 VM_MAP_REMOVE_NO_MAP_ALIGN),
2732 zap_new_map);
2733 }
2734 if (zap_old_map != VM_MAP_NULL &&
2735 zap_old_map->hdr.nentries != 0) {
2736 vm_map_entry_t entry1, entry2;
2737
2738 /*
2739 * The new mapping failed. Attempt to restore
2740 * the old mappings, saved in the "zap_old_map".
2741 */
2742 if (!map_locked) {
2743 vm_map_lock(map);
2744 map_locked = TRUE;
2745 }
2746
2747 /* first check if the coast is still clear */
2748 start = vm_map_first_entry(zap_old_map)->vme_start;
2749 end = vm_map_last_entry(zap_old_map)->vme_end;
2750 if (vm_map_lookup_entry(map, start, &entry1) ||
2751 vm_map_lookup_entry(map, end, &entry2) ||
2752 entry1 != entry2) {
2753 /*
2754 * Part of that range has already been
2755 * re-mapped: we can't restore the old
2756 * mappings...
2757 */
2758 vm_map_enter_restore_failures++;
2759 } else {
2760 /*
2761 * Transfer the saved map entries from
2762 * "zap_old_map" to the original "map",
2763 * inserting them all after "entry1".
2764 */
2765 for (entry2 = vm_map_first_entry(zap_old_map);
2766 entry2 != vm_map_to_entry(zap_old_map);
2767 entry2 = vm_map_first_entry(zap_old_map)) {
2768 vm_map_size_t entry_size;
2769
2770 entry_size = (entry2->vme_end -
2771 entry2->vme_start);
2772 vm_map_store_entry_unlink(zap_old_map,
2773 entry2);
2774 zap_old_map->size -= entry_size;
2775 vm_map_store_entry_link(map, entry1, entry2);
2776 map->size += entry_size;
2777 entry1 = entry2;
2778 }
2779 if (map->wiring_required) {
2780 /*
2781 * XXX TODO: we should rewire the
2782 * old pages here...
2783 */
2784 }
2785 vm_map_enter_restore_successes++;
2786 }
2787 }
2788 }
2789
2790 /*
2791 * The caller is responsible for releasing the lock if it requested to
2792 * keep the map locked.
2793 */
2794 if (map_locked && !keep_map_locked) {
2795 vm_map_unlock(map);
2796 }
2797
2798 /*
2799 * Get rid of the "zap_maps" and all the map entries that
2800 * they may still contain.
2801 */
2802 if (zap_old_map != VM_MAP_NULL) {
2803 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2804 zap_old_map = VM_MAP_NULL;
2805 }
2806 if (zap_new_map != VM_MAP_NULL) {
2807 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2808 zap_new_map = VM_MAP_NULL;
2809 }
2810
2811 return result;
2812
2813 #undef RETURN
2814 }
2815
2816
2817 /*
2818 * Counters for the prefault optimization.
2819 */
2820 int64_t vm_prefault_nb_pages = 0;
2821 int64_t vm_prefault_nb_bailout = 0;
2822
2823 static kern_return_t
2824 vm_map_enter_mem_object_helper(
2825 vm_map_t target_map,
2826 vm_map_offset_t *address,
2827 vm_map_size_t initial_size,
2828 vm_map_offset_t mask,
2829 int flags,
2830 ipc_port_t port,
2831 vm_object_offset_t offset,
2832 boolean_t copy,
2833 vm_prot_t cur_protection,
2834 vm_prot_t max_protection,
2835 vm_inherit_t inheritance,
2836 upl_page_list_ptr_t page_list,
2837 unsigned int page_list_count)
2838 {
2839 vm_map_address_t map_addr;
2840 vm_map_size_t map_size;
2841 vm_object_t object;
2842 vm_object_size_t size;
2843 kern_return_t result;
2844 boolean_t mask_cur_protection, mask_max_protection;
2845 boolean_t try_prefault = (page_list_count != 0);
2846 vm_map_offset_t offset_in_mapping = 0;
2847
2848 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2849 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2850 cur_protection &= ~VM_PROT_IS_MASK;
2851 max_protection &= ~VM_PROT_IS_MASK;
2852
2853 /*
2854 * Check arguments for validity
2855 */
2856 if ((target_map == VM_MAP_NULL) ||
2857 (cur_protection & ~VM_PROT_ALL) ||
2858 (max_protection & ~VM_PROT_ALL) ||
2859 (inheritance > VM_INHERIT_LAST_VALID) ||
2860 (try_prefault && (copy || !page_list)) ||
2861 initial_size == 0) {
2862 return KERN_INVALID_ARGUMENT;
2863 }
2864
2865 {
2866 map_addr = vm_map_trunc_page(*address,
2867 VM_MAP_PAGE_MASK(target_map));
2868 map_size = vm_map_round_page(initial_size,
2869 VM_MAP_PAGE_MASK(target_map));
2870 }
2871 size = vm_object_round_page(initial_size);
2872
2873 /*
2874 * Find the vm object (if any) corresponding to this port.
2875 */
2876 if (!IP_VALID(port)) {
2877 object = VM_OBJECT_NULL;
2878 offset = 0;
2879 copy = FALSE;
2880 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2881 vm_named_entry_t named_entry;
2882
2883 named_entry = (vm_named_entry_t) port->ip_kobject;
2884
2885 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2886 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2887 offset += named_entry->data_offset;
2888 }
2889
2890 /* a few checks to make sure user is obeying rules */
2891 if (size == 0) {
2892 if (offset >= named_entry->size)
2893 return KERN_INVALID_RIGHT;
2894 size = named_entry->size - offset;
2895 }
2896 if (mask_max_protection) {
2897 max_protection &= named_entry->protection;
2898 }
2899 if (mask_cur_protection) {
2900 cur_protection &= named_entry->protection;
2901 }
2902 if ((named_entry->protection & max_protection) !=
2903 max_protection)
2904 return KERN_INVALID_RIGHT;
2905 if ((named_entry->protection & cur_protection) !=
2906 cur_protection)
2907 return KERN_INVALID_RIGHT;
2908 if (offset + size < offset) {
2909 /* overflow */
2910 return KERN_INVALID_ARGUMENT;
2911 }
2912 if (named_entry->size < (offset + initial_size)) {
2913 return KERN_INVALID_ARGUMENT;
2914 }
2915
2916 if (named_entry->is_copy) {
2917 /* for a vm_map_copy, we can only map it whole */
2918 if ((size != named_entry->size) &&
2919 (vm_map_round_page(size,
2920 VM_MAP_PAGE_MASK(target_map)) ==
2921 named_entry->size)) {
2922 /* XXX FBDP use the rounded size... */
2923 size = vm_map_round_page(
2924 size,
2925 VM_MAP_PAGE_MASK(target_map));
2926 }
2927
2928 if (!(flags & VM_FLAGS_ANYWHERE) &&
2929 (offset != 0 ||
2930 size != named_entry->size)) {
2931 /*
2932 * XXX for a mapping at a "fixed" address,
2933 * we can't trim after mapping the whole
2934 * memory entry, so reject a request for a
2935 * partial mapping.
2936 */
2937 return KERN_INVALID_ARGUMENT;
2938 }
2939 }
2940
2941 /* the callers parameter offset is defined to be the */
2942 /* offset from beginning of named entry offset in object */
2943 offset = offset + named_entry->offset;
2944
2945 if (! VM_MAP_PAGE_ALIGNED(size,
2946 VM_MAP_PAGE_MASK(target_map))) {
2947 /*
2948 * Let's not map more than requested;
2949 * vm_map_enter() will handle this "not map-aligned"
2950 * case.
2951 */
2952 map_size = size;
2953 }
2954
2955 named_entry_lock(named_entry);
2956 if (named_entry->is_sub_map) {
2957 vm_map_t submap;
2958
2959 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2960 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2961 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2962 }
2963
2964 submap = named_entry->backing.map;
2965 vm_map_lock(submap);
2966 vm_map_reference(submap);
2967 vm_map_unlock(submap);
2968 named_entry_unlock(named_entry);
2969
2970 result = vm_map_enter(target_map,
2971 &map_addr,
2972 map_size,
2973 mask,
2974 flags | VM_FLAGS_SUBMAP,
2975 (vm_object_t) submap,
2976 offset,
2977 copy,
2978 cur_protection,
2979 max_protection,
2980 inheritance);
2981 if (result != KERN_SUCCESS) {
2982 vm_map_deallocate(submap);
2983 } else {
2984 /*
2985 * No need to lock "submap" just to check its
2986 * "mapped" flag: that flag is never reset
2987 * once it's been set and if we race, we'll
2988 * just end up setting it twice, which is OK.
2989 */
2990 if (submap->mapped_in_other_pmaps == FALSE &&
2991 vm_map_pmap(submap) != PMAP_NULL &&
2992 vm_map_pmap(submap) !=
2993 vm_map_pmap(target_map)) {
2994 /*
2995 * This submap is being mapped in a map
2996 * that uses a different pmap.
2997 * Set its "mapped_in_other_pmaps" flag
2998 * to indicate that we now need to
2999 * remove mappings from all pmaps rather
3000 * than just the submap's pmap.
3001 */
3002 vm_map_lock(submap);
3003 submap->mapped_in_other_pmaps = TRUE;
3004 vm_map_unlock(submap);
3005 }
3006 *address = map_addr;
3007 }
3008 return result;
3009
3010 } else if (named_entry->is_pager) {
3011 unsigned int access;
3012 vm_prot_t protections;
3013 unsigned int wimg_mode;
3014
3015 protections = named_entry->protection & VM_PROT_ALL;
3016 access = GET_MAP_MEM(named_entry->protection);
3017
3018 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3019 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3020 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3021 }
3022
3023 object = vm_object_enter(named_entry->backing.pager,
3024 named_entry->size,
3025 named_entry->internal,
3026 FALSE,
3027 FALSE);
3028 if (object == VM_OBJECT_NULL) {
3029 named_entry_unlock(named_entry);
3030 return KERN_INVALID_OBJECT;
3031 }
3032
3033 /* JMM - drop reference on pager here */
3034
3035 /* create an extra ref for the named entry */
3036 vm_object_lock(object);
3037 vm_object_reference_locked(object);
3038 named_entry->backing.object = object;
3039 named_entry->is_pager = FALSE;
3040 named_entry_unlock(named_entry);
3041
3042 wimg_mode = object->wimg_bits;
3043
3044 if (access == MAP_MEM_IO) {
3045 wimg_mode = VM_WIMG_IO;
3046 } else if (access == MAP_MEM_COPYBACK) {
3047 wimg_mode = VM_WIMG_USE_DEFAULT;
3048 } else if (access == MAP_MEM_INNERWBACK) {
3049 wimg_mode = VM_WIMG_INNERWBACK;
3050 } else if (access == MAP_MEM_WTHRU) {
3051 wimg_mode = VM_WIMG_WTHRU;
3052 } else if (access == MAP_MEM_WCOMB) {
3053 wimg_mode = VM_WIMG_WCOMB;
3054 }
3055
3056 /* wait for object (if any) to be ready */
3057 if (!named_entry->internal) {
3058 while (!object->pager_ready) {
3059 vm_object_wait(
3060 object,
3061 VM_OBJECT_EVENT_PAGER_READY,
3062 THREAD_UNINT);
3063 vm_object_lock(object);
3064 }
3065 }
3066
3067 if (object->wimg_bits != wimg_mode)
3068 vm_object_change_wimg_mode(object, wimg_mode);
3069
3070 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3071 if (!object->true_share &&
3072 vm_object_tracking_inited) {
3073 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3074 int num = 0;
3075
3076 num = OSBacktrace(bt,
3077 VM_OBJECT_TRACKING_BTDEPTH);
3078 btlog_add_entry(vm_object_tracking_btlog,
3079 object,
3080 VM_OBJECT_TRACKING_OP_TRUESHARE,
3081 bt,
3082 num);
3083 }
3084 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3085
3086 object->true_share = TRUE;
3087
3088 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3089 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3090 vm_object_unlock(object);
3091
3092 } else if (named_entry->is_copy) {
3093 kern_return_t kr;
3094 vm_map_copy_t copy_map;
3095 vm_map_entry_t copy_entry;
3096 vm_map_offset_t copy_addr;
3097
3098 if (flags & ~(VM_FLAGS_FIXED |
3099 VM_FLAGS_ANYWHERE |
3100 VM_FLAGS_OVERWRITE |
3101 VM_FLAGS_RETURN_4K_DATA_ADDR |
3102 VM_FLAGS_RETURN_DATA_ADDR)) {
3103 named_entry_unlock(named_entry);
3104 return KERN_INVALID_ARGUMENT;
3105 }
3106
3107 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3108 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3109 offset_in_mapping = offset - vm_object_trunc_page(offset);
3110 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3111 offset_in_mapping &= ~((signed)(0xFFF));
3112 offset = vm_object_trunc_page(offset);
3113 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3114 }
3115
3116 copy_map = named_entry->backing.copy;
3117 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3118 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3119 /* unsupported type; should not happen */
3120 printf("vm_map_enter_mem_object: "
3121 "memory_entry->backing.copy "
3122 "unsupported type 0x%x\n",
3123 copy_map->type);
3124 named_entry_unlock(named_entry);
3125 return KERN_INVALID_ARGUMENT;
3126 }
3127
3128 /* reserve a contiguous range */
3129 kr = vm_map_enter(target_map,
3130 &map_addr,
3131 /* map whole mem entry, trim later: */
3132 named_entry->size,
3133 mask,
3134 flags & (VM_FLAGS_ANYWHERE |
3135 VM_FLAGS_OVERWRITE |
3136 VM_FLAGS_RETURN_4K_DATA_ADDR |
3137 VM_FLAGS_RETURN_DATA_ADDR),
3138 VM_OBJECT_NULL,
3139 0,
3140 FALSE, /* copy */
3141 cur_protection,
3142 max_protection,
3143 inheritance);
3144 if (kr != KERN_SUCCESS) {
3145 named_entry_unlock(named_entry);
3146 return kr;
3147 }
3148
3149 copy_addr = map_addr;
3150
3151 for (copy_entry = vm_map_copy_first_entry(copy_map);
3152 copy_entry != vm_map_copy_to_entry(copy_map);
3153 copy_entry = copy_entry->vme_next) {
3154 int remap_flags = 0;
3155 vm_map_t copy_submap;
3156 vm_object_t copy_object;
3157 vm_map_size_t copy_size;
3158 vm_object_offset_t copy_offset;
3159
3160 copy_offset = VME_OFFSET(copy_entry);
3161 copy_size = (copy_entry->vme_end -
3162 copy_entry->vme_start);
3163
3164 /* sanity check */
3165 if ((copy_addr + copy_size) >
3166 (map_addr +
3167 named_entry->size /* XXX full size */ )) {
3168 /* over-mapping too much !? */
3169 kr = KERN_INVALID_ARGUMENT;
3170 /* abort */
3171 break;
3172 }
3173
3174 /* take a reference on the object */
3175 if (copy_entry->is_sub_map) {
3176 remap_flags |= VM_FLAGS_SUBMAP;
3177 copy_submap = VME_SUBMAP(copy_entry);
3178 vm_map_lock(copy_submap);
3179 vm_map_reference(copy_submap);
3180 vm_map_unlock(copy_submap);
3181 copy_object = (vm_object_t) copy_submap;
3182 } else {
3183 copy_object = VME_OBJECT(copy_entry);
3184 vm_object_reference(copy_object);
3185 }
3186
3187 /* over-map the object into destination */
3188 remap_flags |= flags;
3189 remap_flags |= VM_FLAGS_FIXED;
3190 remap_flags |= VM_FLAGS_OVERWRITE;
3191 remap_flags &= ~VM_FLAGS_ANYWHERE;
3192 kr = vm_map_enter(target_map,
3193 &copy_addr,
3194 copy_size,
3195 (vm_map_offset_t) 0,
3196 remap_flags,
3197 copy_object,
3198 copy_offset,
3199 copy,
3200 cur_protection,
3201 max_protection,
3202 inheritance);
3203 if (kr != KERN_SUCCESS) {
3204 if (copy_entry->is_sub_map) {
3205 vm_map_deallocate(copy_submap);
3206 } else {
3207 vm_object_deallocate(copy_object);
3208 }
3209 /* abort */
3210 break;
3211 }
3212
3213 /* next mapping */
3214 copy_addr += copy_size;
3215 }
3216
3217 if (kr == KERN_SUCCESS) {
3218 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3219 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3220 *address = map_addr + offset_in_mapping;
3221 } else {
3222 *address = map_addr;
3223 }
3224
3225 if (offset) {
3226 /*
3227 * Trim in front, from 0 to "offset".
3228 */
3229 vm_map_remove(target_map,
3230 map_addr,
3231 map_addr + offset,
3232 0);
3233 *address += offset;
3234 }
3235 if (offset + map_size < named_entry->size) {
3236 /*
3237 * Trim in back, from
3238 * "offset + map_size" to
3239 * "named_entry->size".
3240 */
3241 vm_map_remove(target_map,
3242 (map_addr +
3243 offset + map_size),
3244 (map_addr +
3245 named_entry->size),
3246 0);
3247 }
3248 }
3249 named_entry_unlock(named_entry);
3250
3251 if (kr != KERN_SUCCESS) {
3252 if (! (flags & VM_FLAGS_OVERWRITE)) {
3253 /* deallocate the contiguous range */
3254 (void) vm_deallocate(target_map,
3255 map_addr,
3256 map_size);
3257 }
3258 }
3259
3260 return kr;
3261
3262 } else {
3263 /* This is the case where we are going to map */
3264 /* an already mapped object. If the object is */
3265 /* not ready it is internal. An external */
3266 /* object cannot be mapped until it is ready */
3267 /* we can therefore avoid the ready check */
3268 /* in this case. */
3269 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3270 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3271 offset_in_mapping = offset - vm_object_trunc_page(offset);
3272 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3273 offset_in_mapping &= ~((signed)(0xFFF));
3274 offset = vm_object_trunc_page(offset);
3275 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3276 }
3277
3278 object = named_entry->backing.object;
3279 assert(object != VM_OBJECT_NULL);
3280 named_entry_unlock(named_entry);
3281 vm_object_reference(object);
3282 }
3283 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3284 /*
3285 * JMM - This is temporary until we unify named entries
3286 * and raw memory objects.
3287 *
3288 * Detected fake ip_kotype for a memory object. In
3289 * this case, the port isn't really a port at all, but
3290 * instead is just a raw memory object.
3291 */
3292 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3293 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3294 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3295 }
3296
3297 object = vm_object_enter((memory_object_t)port,
3298 size, FALSE, FALSE, FALSE);
3299 if (object == VM_OBJECT_NULL)
3300 return KERN_INVALID_OBJECT;
3301
3302 /* wait for object (if any) to be ready */
3303 if (object != VM_OBJECT_NULL) {
3304 if (object == kernel_object) {
3305 printf("Warning: Attempt to map kernel object"
3306 " by a non-private kernel entity\n");
3307 return KERN_INVALID_OBJECT;
3308 }
3309 if (!object->pager_ready) {
3310 vm_object_lock(object);
3311
3312 while (!object->pager_ready) {
3313 vm_object_wait(object,
3314 VM_OBJECT_EVENT_PAGER_READY,
3315 THREAD_UNINT);
3316 vm_object_lock(object);
3317 }
3318 vm_object_unlock(object);
3319 }
3320 }
3321 } else {
3322 return KERN_INVALID_OBJECT;
3323 }
3324
3325 if (object != VM_OBJECT_NULL &&
3326 object->named &&
3327 object->pager != MEMORY_OBJECT_NULL &&
3328 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3329 memory_object_t pager;
3330 vm_prot_t pager_prot;
3331 kern_return_t kr;
3332
3333 /*
3334 * For "named" VM objects, let the pager know that the
3335 * memory object is being mapped. Some pagers need to keep
3336 * track of this, to know when they can reclaim the memory
3337 * object, for example.
3338 * VM calls memory_object_map() for each mapping (specifying
3339 * the protection of each mapping) and calls
3340 * memory_object_last_unmap() when all the mappings are gone.
3341 */
3342 pager_prot = max_protection;
3343 if (copy) {
3344 /*
3345 * Copy-On-Write mapping: won't modify the
3346 * memory object.
3347 */
3348 pager_prot &= ~VM_PROT_WRITE;
3349 }
3350 vm_object_lock(object);
3351 pager = object->pager;
3352 if (object->named &&
3353 pager != MEMORY_OBJECT_NULL &&
3354 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3355 assert(object->pager_ready);
3356 vm_object_mapping_wait(object, THREAD_UNINT);
3357 vm_object_mapping_begin(object);
3358 vm_object_unlock(object);
3359
3360 kr = memory_object_map(pager, pager_prot);
3361 assert(kr == KERN_SUCCESS);
3362
3363 vm_object_lock(object);
3364 vm_object_mapping_end(object);
3365 }
3366 vm_object_unlock(object);
3367 }
3368
3369 /*
3370 * Perform the copy if requested
3371 */
3372
3373 if (copy) {
3374 vm_object_t new_object;
3375 vm_object_offset_t new_offset;
3376
3377 result = vm_object_copy_strategically(object, offset,
3378 map_size,
3379 &new_object, &new_offset,
3380 &copy);
3381
3382
3383 if (result == KERN_MEMORY_RESTART_COPY) {
3384 boolean_t success;
3385 boolean_t src_needs_copy;
3386
3387 /*
3388 * XXX
3389 * We currently ignore src_needs_copy.
3390 * This really is the issue of how to make
3391 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3392 * non-kernel users to use. Solution forthcoming.
3393 * In the meantime, since we don't allow non-kernel
3394 * memory managers to specify symmetric copy,
3395 * we won't run into problems here.
3396 */
3397 new_object = object;
3398 new_offset = offset;
3399 success = vm_object_copy_quickly(&new_object,
3400 new_offset,
3401 map_size,
3402 &src_needs_copy,
3403 &copy);
3404 assert(success);
3405 result = KERN_SUCCESS;
3406 }
3407 /*
3408 * Throw away the reference to the
3409 * original object, as it won't be mapped.
3410 */
3411
3412 vm_object_deallocate(object);
3413
3414 if (result != KERN_SUCCESS) {
3415 return result;
3416 }
3417
3418 object = new_object;
3419 offset = new_offset;
3420 }
3421
3422 /*
3423 * If users want to try to prefault pages, the mapping and prefault
3424 * needs to be atomic.
3425 */
3426 if (try_prefault)
3427 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3428
3429 {
3430 result = vm_map_enter(target_map,
3431 &map_addr, map_size,
3432 (vm_map_offset_t)mask,
3433 flags,
3434 object, offset,
3435 copy,
3436 cur_protection, max_protection,
3437 inheritance);
3438 }
3439 if (result != KERN_SUCCESS)
3440 vm_object_deallocate(object);
3441
3442 /*
3443 * Try to prefault, and do not forget to release the vm map lock.
3444 */
3445 if (result == KERN_SUCCESS && try_prefault) {
3446 mach_vm_address_t va = map_addr;
3447 kern_return_t kr = KERN_SUCCESS;
3448 unsigned int i = 0;
3449
3450 for (i = 0; i < page_list_count; ++i) {
3451 if (UPL_VALID_PAGE(page_list, i)) {
3452 /*
3453 * If this function call failed, we should stop
3454 * trying to optimize, other calls are likely
3455 * going to fail too.
3456 *
3457 * We are not gonna report an error for such
3458 * failure though. That's an optimization, not
3459 * something critical.
3460 */
3461 kr = pmap_enter_options(target_map->pmap,
3462 va, UPL_PHYS_PAGE(page_list, i),
3463 cur_protection, VM_PROT_NONE,
3464 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3465 if (kr != KERN_SUCCESS) {
3466 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3467 break;
3468 }
3469 OSIncrementAtomic64(&vm_prefault_nb_pages);
3470 }
3471
3472 /* Next virtual address */
3473 va += PAGE_SIZE;
3474 }
3475 vm_map_unlock(target_map);
3476 }
3477
3478 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3479 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3480 *address = map_addr + offset_in_mapping;
3481 } else {
3482 *address = map_addr;
3483 }
3484 return result;
3485 }
3486
3487 kern_return_t
3488 vm_map_enter_mem_object(
3489 vm_map_t target_map,
3490 vm_map_offset_t *address,
3491 vm_map_size_t initial_size,
3492 vm_map_offset_t mask,
3493 int flags,
3494 ipc_port_t port,
3495 vm_object_offset_t offset,
3496 boolean_t copy,
3497 vm_prot_t cur_protection,
3498 vm_prot_t max_protection,
3499 vm_inherit_t inheritance)
3500 {
3501 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3502 port, offset, copy, cur_protection, max_protection,
3503 inheritance, NULL, 0);
3504 }
3505
3506 kern_return_t
3507 vm_map_enter_mem_object_prefault(
3508 vm_map_t target_map,
3509 vm_map_offset_t *address,
3510 vm_map_size_t initial_size,
3511 vm_map_offset_t mask,
3512 int flags,
3513 ipc_port_t port,
3514 vm_object_offset_t offset,
3515 vm_prot_t cur_protection,
3516 vm_prot_t max_protection,
3517 upl_page_list_ptr_t page_list,
3518 unsigned int page_list_count)
3519 {
3520 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3521 port, offset, FALSE, cur_protection, max_protection,
3522 VM_INHERIT_DEFAULT, page_list, page_list_count);
3523 }
3524
3525
3526 kern_return_t
3527 vm_map_enter_mem_object_control(
3528 vm_map_t target_map,
3529 vm_map_offset_t *address,
3530 vm_map_size_t initial_size,
3531 vm_map_offset_t mask,
3532 int flags,
3533 memory_object_control_t control,
3534 vm_object_offset_t offset,
3535 boolean_t copy,
3536 vm_prot_t cur_protection,
3537 vm_prot_t max_protection,
3538 vm_inherit_t inheritance)
3539 {
3540 vm_map_address_t map_addr;
3541 vm_map_size_t map_size;
3542 vm_object_t object;
3543 vm_object_size_t size;
3544 kern_return_t result;
3545 memory_object_t pager;
3546 vm_prot_t pager_prot;
3547 kern_return_t kr;
3548
3549 /*
3550 * Check arguments for validity
3551 */
3552 if ((target_map == VM_MAP_NULL) ||
3553 (cur_protection & ~VM_PROT_ALL) ||
3554 (max_protection & ~VM_PROT_ALL) ||
3555 (inheritance > VM_INHERIT_LAST_VALID) ||
3556 initial_size == 0) {
3557 return KERN_INVALID_ARGUMENT;
3558 }
3559
3560 {
3561 map_addr = vm_map_trunc_page(*address,
3562 VM_MAP_PAGE_MASK(target_map));
3563 map_size = vm_map_round_page(initial_size,
3564 VM_MAP_PAGE_MASK(target_map));
3565 }
3566 size = vm_object_round_page(initial_size);
3567
3568 object = memory_object_control_to_vm_object(control);
3569
3570 if (object == VM_OBJECT_NULL)
3571 return KERN_INVALID_OBJECT;
3572
3573 if (object == kernel_object) {
3574 printf("Warning: Attempt to map kernel object"
3575 " by a non-private kernel entity\n");
3576 return KERN_INVALID_OBJECT;
3577 }
3578
3579 vm_object_lock(object);
3580 object->ref_count++;
3581 vm_object_res_reference(object);
3582
3583 /*
3584 * For "named" VM objects, let the pager know that the
3585 * memory object is being mapped. Some pagers need to keep
3586 * track of this, to know when they can reclaim the memory
3587 * object, for example.
3588 * VM calls memory_object_map() for each mapping (specifying
3589 * the protection of each mapping) and calls
3590 * memory_object_last_unmap() when all the mappings are gone.
3591 */
3592 pager_prot = max_protection;
3593 if (copy) {
3594 pager_prot &= ~VM_PROT_WRITE;
3595 }
3596 pager = object->pager;
3597 if (object->named &&
3598 pager != MEMORY_OBJECT_NULL &&
3599 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3600 assert(object->pager_ready);
3601 vm_object_mapping_wait(object, THREAD_UNINT);
3602 vm_object_mapping_begin(object);
3603 vm_object_unlock(object);
3604
3605 kr = memory_object_map(pager, pager_prot);
3606 assert(kr == KERN_SUCCESS);
3607
3608 vm_object_lock(object);
3609 vm_object_mapping_end(object);
3610 }
3611 vm_object_unlock(object);
3612
3613 /*
3614 * Perform the copy if requested
3615 */
3616
3617 if (copy) {
3618 vm_object_t new_object;
3619 vm_object_offset_t new_offset;
3620
3621 result = vm_object_copy_strategically(object, offset, size,
3622 &new_object, &new_offset,
3623 &copy);
3624
3625
3626 if (result == KERN_MEMORY_RESTART_COPY) {
3627 boolean_t success;
3628 boolean_t src_needs_copy;
3629
3630 /*
3631 * XXX
3632 * We currently ignore src_needs_copy.
3633 * This really is the issue of how to make
3634 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3635 * non-kernel users to use. Solution forthcoming.
3636 * In the meantime, since we don't allow non-kernel
3637 * memory managers to specify symmetric copy,
3638 * we won't run into problems here.
3639 */
3640 new_object = object;
3641 new_offset = offset;
3642 success = vm_object_copy_quickly(&new_object,
3643 new_offset, size,
3644 &src_needs_copy,
3645 &copy);
3646 assert(success);
3647 result = KERN_SUCCESS;
3648 }
3649 /*
3650 * Throw away the reference to the
3651 * original object, as it won't be mapped.
3652 */
3653
3654 vm_object_deallocate(object);
3655
3656 if (result != KERN_SUCCESS) {
3657 return result;
3658 }
3659
3660 object = new_object;
3661 offset = new_offset;
3662 }
3663
3664 {
3665 result = vm_map_enter(target_map,
3666 &map_addr, map_size,
3667 (vm_map_offset_t)mask,
3668 flags,
3669 object, offset,
3670 copy,
3671 cur_protection, max_protection,
3672 inheritance);
3673 }
3674 if (result != KERN_SUCCESS)
3675 vm_object_deallocate(object);
3676 *address = map_addr;
3677
3678 return result;
3679 }
3680
3681
3682 #if VM_CPM
3683
3684 #ifdef MACH_ASSERT
3685 extern pmap_paddr_t avail_start, avail_end;
3686 #endif
3687
3688 /*
3689 * Allocate memory in the specified map, with the caveat that
3690 * the memory is physically contiguous. This call may fail
3691 * if the system can't find sufficient contiguous memory.
3692 * This call may cause or lead to heart-stopping amounts of
3693 * paging activity.
3694 *
3695 * Memory obtained from this call should be freed in the
3696 * normal way, viz., via vm_deallocate.
3697 */
3698 kern_return_t
3699 vm_map_enter_cpm(
3700 vm_map_t map,
3701 vm_map_offset_t *addr,
3702 vm_map_size_t size,
3703 int flags)
3704 {
3705 vm_object_t cpm_obj;
3706 pmap_t pmap;
3707 vm_page_t m, pages;
3708 kern_return_t kr;
3709 vm_map_offset_t va, start, end, offset;
3710 #if MACH_ASSERT
3711 vm_map_offset_t prev_addr = 0;
3712 #endif /* MACH_ASSERT */
3713
3714 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3715 vm_tag_t tag;
3716
3717 VM_GET_FLAGS_ALIAS(flags, tag);
3718
3719 if (size == 0) {
3720 *addr = 0;
3721 return KERN_SUCCESS;
3722 }
3723 if (anywhere)
3724 *addr = vm_map_min(map);
3725 else
3726 *addr = vm_map_trunc_page(*addr,
3727 VM_MAP_PAGE_MASK(map));
3728 size = vm_map_round_page(size,
3729 VM_MAP_PAGE_MASK(map));
3730
3731 /*
3732 * LP64todo - cpm_allocate should probably allow
3733 * allocations of >4GB, but not with the current
3734 * algorithm, so just cast down the size for now.
3735 */
3736 if (size > VM_MAX_ADDRESS)
3737 return KERN_RESOURCE_SHORTAGE;
3738 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3739 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3740 return kr;
3741
3742 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3743 assert(cpm_obj != VM_OBJECT_NULL);
3744 assert(cpm_obj->internal);
3745 assert(cpm_obj->vo_size == (vm_object_size_t)size);
3746 assert(cpm_obj->can_persist == FALSE);
3747 assert(cpm_obj->pager_created == FALSE);
3748 assert(cpm_obj->pageout == FALSE);
3749 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3750
3751 /*
3752 * Insert pages into object.
3753 */
3754
3755 vm_object_lock(cpm_obj);
3756 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3757 m = pages;
3758 pages = NEXT_PAGE(m);
3759 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3760
3761 assert(!m->gobbled);
3762 assert(!m->wanted);
3763 assert(!m->pageout);
3764 assert(!m->tabled);
3765 assert(VM_PAGE_WIRED(m));
3766 /*
3767 * ENCRYPTED SWAP:
3768 * "m" is not supposed to be pageable, so it
3769 * should not be encrypted. It wouldn't be safe
3770 * to enter it in a new VM object while encrypted.
3771 */
3772 ASSERT_PAGE_DECRYPTED(m);
3773 assert(m->busy);
3774 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3775
3776 m->busy = FALSE;
3777 vm_page_insert(m, cpm_obj, offset);
3778 }
3779 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3780 vm_object_unlock(cpm_obj);
3781
3782 /*
3783 * Hang onto a reference on the object in case a
3784 * multi-threaded application for some reason decides
3785 * to deallocate the portion of the address space into
3786 * which we will insert this object.
3787 *
3788 * Unfortunately, we must insert the object now before
3789 * we can talk to the pmap module about which addresses
3790 * must be wired down. Hence, the race with a multi-
3791 * threaded app.
3792 */
3793 vm_object_reference(cpm_obj);
3794
3795 /*
3796 * Insert object into map.
3797 */
3798
3799 kr = vm_map_enter(
3800 map,
3801 addr,
3802 size,
3803 (vm_map_offset_t)0,
3804 flags,
3805 cpm_obj,
3806 (vm_object_offset_t)0,
3807 FALSE,
3808 VM_PROT_ALL,
3809 VM_PROT_ALL,
3810 VM_INHERIT_DEFAULT);
3811
3812 if (kr != KERN_SUCCESS) {
3813 /*
3814 * A CPM object doesn't have can_persist set,
3815 * so all we have to do is deallocate it to
3816 * free up these pages.
3817 */
3818 assert(cpm_obj->pager_created == FALSE);
3819 assert(cpm_obj->can_persist == FALSE);
3820 assert(cpm_obj->pageout == FALSE);
3821 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3822 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3823 vm_object_deallocate(cpm_obj); /* kill creation ref */
3824 }
3825
3826 /*
3827 * Inform the physical mapping system that the
3828 * range of addresses may not fault, so that
3829 * page tables and such can be locked down as well.
3830 */
3831 start = *addr;
3832 end = start + size;
3833 pmap = vm_map_pmap(map);
3834 pmap_pageable(pmap, start, end, FALSE);
3835
3836 /*
3837 * Enter each page into the pmap, to avoid faults.
3838 * Note that this loop could be coded more efficiently,
3839 * if the need arose, rather than looking up each page
3840 * again.
3841 */
3842 for (offset = 0, va = start; offset < size;
3843 va += PAGE_SIZE, offset += PAGE_SIZE) {
3844 int type_of_fault;
3845
3846 vm_object_lock(cpm_obj);
3847 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3848 assert(m != VM_PAGE_NULL);
3849
3850 vm_page_zero_fill(m);
3851
3852 type_of_fault = DBG_ZERO_FILL_FAULT;
3853
3854 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3855 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3856 &type_of_fault);
3857
3858 vm_object_unlock(cpm_obj);
3859 }
3860
3861 #if MACH_ASSERT
3862 /*
3863 * Verify ordering in address space.
3864 */
3865 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3866 vm_object_lock(cpm_obj);
3867 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3868 vm_object_unlock(cpm_obj);
3869 if (m == VM_PAGE_NULL)
3870 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3871 cpm_obj, (uint64_t)offset);
3872 assert(m->tabled);
3873 assert(!m->busy);
3874 assert(!m->wanted);
3875 assert(!m->fictitious);
3876 assert(!m->private);
3877 assert(!m->absent);
3878 assert(!m->error);
3879 assert(!m->cleaning);
3880 assert(!m->laundry);
3881 assert(!m->precious);
3882 assert(!m->clustered);
3883 if (offset != 0) {
3884 if (m->phys_page != prev_addr + 1) {
3885 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3886 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3887 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3888 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3889 panic("vm_allocate_cpm: pages not contig!");
3890 }
3891 }
3892 prev_addr = m->phys_page;
3893 }
3894 #endif /* MACH_ASSERT */
3895
3896 vm_object_deallocate(cpm_obj); /* kill extra ref */
3897
3898 return kr;
3899 }
3900
3901
3902 #else /* VM_CPM */
3903
3904 /*
3905 * Interface is defined in all cases, but unless the kernel
3906 * is built explicitly for this option, the interface does
3907 * nothing.
3908 */
3909
3910 kern_return_t
3911 vm_map_enter_cpm(
3912 __unused vm_map_t map,
3913 __unused vm_map_offset_t *addr,
3914 __unused vm_map_size_t size,
3915 __unused int flags)
3916 {
3917 return KERN_FAILURE;
3918 }
3919 #endif /* VM_CPM */
3920
3921 /* Not used without nested pmaps */
3922 #ifndef NO_NESTED_PMAP
3923 /*
3924 * Clip and unnest a portion of a nested submap mapping.
3925 */
3926
3927
3928 static void
3929 vm_map_clip_unnest(
3930 vm_map_t map,
3931 vm_map_entry_t entry,
3932 vm_map_offset_t start_unnest,
3933 vm_map_offset_t end_unnest)
3934 {
3935 vm_map_offset_t old_start_unnest = start_unnest;
3936 vm_map_offset_t old_end_unnest = end_unnest;
3937
3938 assert(entry->is_sub_map);
3939 assert(VME_SUBMAP(entry) != NULL);
3940 assert(entry->use_pmap);
3941
3942 /*
3943 * Query the platform for the optimal unnest range.
3944 * DRK: There's some duplication of effort here, since
3945 * callers may have adjusted the range to some extent. This
3946 * routine was introduced to support 1GiB subtree nesting
3947 * for x86 platforms, which can also nest on 2MiB boundaries
3948 * depending on size/alignment.
3949 */
3950 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3951 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3952 }
3953
3954 if (entry->vme_start > start_unnest ||
3955 entry->vme_end < end_unnest) {
3956 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3957 "bad nested entry: start=0x%llx end=0x%llx\n",
3958 (long long)start_unnest, (long long)end_unnest,
3959 (long long)entry->vme_start, (long long)entry->vme_end);
3960 }
3961
3962 if (start_unnest > entry->vme_start) {
3963 _vm_map_clip_start(&map->hdr,
3964 entry,
3965 start_unnest);
3966 if (map->holelistenabled) {
3967 vm_map_store_update_first_free(map, NULL, FALSE);
3968 } else {
3969 vm_map_store_update_first_free(map, map->first_free, FALSE);
3970 }
3971 }
3972 if (entry->vme_end > end_unnest) {
3973 _vm_map_clip_end(&map->hdr,
3974 entry,
3975 end_unnest);
3976 if (map->holelistenabled) {
3977 vm_map_store_update_first_free(map, NULL, FALSE);
3978 } else {
3979 vm_map_store_update_first_free(map, map->first_free, FALSE);
3980 }
3981 }
3982
3983 pmap_unnest(map->pmap,
3984 entry->vme_start,
3985 entry->vme_end - entry->vme_start);
3986 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3987 /* clean up parent map/maps */
3988 vm_map_submap_pmap_clean(
3989 map, entry->vme_start,
3990 entry->vme_end,
3991 VME_SUBMAP(entry),
3992 VME_OFFSET(entry));
3993 }
3994 entry->use_pmap = FALSE;
3995 if ((map->pmap != kernel_pmap) &&
3996 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
3997 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
3998 }
3999 }
4000 #endif /* NO_NESTED_PMAP */
4001
4002 /*
4003 * vm_map_clip_start: [ internal use only ]
4004 *
4005 * Asserts that the given entry begins at or after
4006 * the specified address; if necessary,
4007 * it splits the entry into two.
4008 */
4009 void
4010 vm_map_clip_start(
4011 vm_map_t map,
4012 vm_map_entry_t entry,
4013 vm_map_offset_t startaddr)
4014 {
4015 #ifndef NO_NESTED_PMAP
4016 if (entry->is_sub_map &&
4017 entry->use_pmap &&
4018 startaddr >= entry->vme_start) {
4019 vm_map_offset_t start_unnest, end_unnest;
4020
4021 /*
4022 * Make sure "startaddr" is no longer in a nested range
4023 * before we clip. Unnest only the minimum range the platform
4024 * can handle.
4025 * vm_map_clip_unnest may perform additional adjustments to
4026 * the unnest range.
4027 */
4028 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4029 end_unnest = start_unnest + pmap_nesting_size_min;
4030 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4031 }
4032 #endif /* NO_NESTED_PMAP */
4033 if (startaddr > entry->vme_start) {
4034 if (VME_OBJECT(entry) &&
4035 !entry->is_sub_map &&
4036 VME_OBJECT(entry)->phys_contiguous) {
4037 pmap_remove(map->pmap,
4038 (addr64_t)(entry->vme_start),
4039 (addr64_t)(entry->vme_end));
4040 }
4041 _vm_map_clip_start(&map->hdr, entry, startaddr);
4042 if (map->holelistenabled) {
4043 vm_map_store_update_first_free(map, NULL, FALSE);
4044 } else {
4045 vm_map_store_update_first_free(map, map->first_free, FALSE);
4046 }
4047 }
4048 }
4049
4050
4051 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4052 MACRO_BEGIN \
4053 if ((startaddr) > (entry)->vme_start) \
4054 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4055 MACRO_END
4056
4057 /*
4058 * This routine is called only when it is known that
4059 * the entry must be split.
4060 */
4061 static void
4062 _vm_map_clip_start(
4063 register struct vm_map_header *map_header,
4064 register vm_map_entry_t entry,
4065 register vm_map_offset_t start)
4066 {
4067 register vm_map_entry_t new_entry;
4068
4069 /*
4070 * Split off the front portion --
4071 * note that we must insert the new
4072 * entry BEFORE this one, so that
4073 * this entry has the specified starting
4074 * address.
4075 */
4076
4077 if (entry->map_aligned) {
4078 assert(VM_MAP_PAGE_ALIGNED(start,
4079 VM_MAP_HDR_PAGE_MASK(map_header)));
4080 }
4081
4082 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4083 vm_map_entry_copy_full(new_entry, entry);
4084
4085 new_entry->vme_end = start;
4086 assert(new_entry->vme_start < new_entry->vme_end);
4087 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4088 assert(start < entry->vme_end);
4089 entry->vme_start = start;
4090
4091 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4092
4093 if (entry->is_sub_map)
4094 vm_map_reference(VME_SUBMAP(new_entry));
4095 else
4096 vm_object_reference(VME_OBJECT(new_entry));
4097 }
4098
4099
4100 /*
4101 * vm_map_clip_end: [ internal use only ]
4102 *
4103 * Asserts that the given entry ends at or before
4104 * the specified address; if necessary,
4105 * it splits the entry into two.
4106 */
4107 void
4108 vm_map_clip_end(
4109 vm_map_t map,
4110 vm_map_entry_t entry,
4111 vm_map_offset_t endaddr)
4112 {
4113 if (endaddr > entry->vme_end) {
4114 /*
4115 * Within the scope of this clipping, limit "endaddr" to
4116 * the end of this map entry...
4117 */
4118 endaddr = entry->vme_end;
4119 }
4120 #ifndef NO_NESTED_PMAP
4121 if (entry->is_sub_map && entry->use_pmap) {
4122 vm_map_offset_t start_unnest, end_unnest;
4123
4124 /*
4125 * Make sure the range between the start of this entry and
4126 * the new "endaddr" is no longer nested before we clip.
4127 * Unnest only the minimum range the platform can handle.
4128 * vm_map_clip_unnest may perform additional adjustments to
4129 * the unnest range.
4130 */
4131 start_unnest = entry->vme_start;
4132 end_unnest =
4133 (endaddr + pmap_nesting_size_min - 1) &
4134 ~(pmap_nesting_size_min - 1);
4135 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4136 }
4137 #endif /* NO_NESTED_PMAP */
4138 if (endaddr < entry->vme_end) {
4139 if (VME_OBJECT(entry) &&
4140 !entry->is_sub_map &&
4141 VME_OBJECT(entry)->phys_contiguous) {
4142 pmap_remove(map->pmap,
4143 (addr64_t)(entry->vme_start),
4144 (addr64_t)(entry->vme_end));
4145 }
4146 _vm_map_clip_end(&map->hdr, entry, endaddr);
4147 if (map->holelistenabled) {
4148 vm_map_store_update_first_free(map, NULL, FALSE);
4149 } else {
4150 vm_map_store_update_first_free(map, map->first_free, FALSE);
4151 }
4152 }
4153 }
4154
4155
4156 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4157 MACRO_BEGIN \
4158 if ((endaddr) < (entry)->vme_end) \
4159 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4160 MACRO_END
4161
4162 /*
4163 * This routine is called only when it is known that
4164 * the entry must be split.
4165 */
4166 static void
4167 _vm_map_clip_end(
4168 register struct vm_map_header *map_header,
4169 register vm_map_entry_t entry,
4170 register vm_map_offset_t end)
4171 {
4172 register vm_map_entry_t new_entry;
4173
4174 /*
4175 * Create a new entry and insert it
4176 * AFTER the specified entry
4177 */
4178
4179 if (entry->map_aligned) {
4180 assert(VM_MAP_PAGE_ALIGNED(end,
4181 VM_MAP_HDR_PAGE_MASK(map_header)));
4182 }
4183
4184 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4185 vm_map_entry_copy_full(new_entry, entry);
4186
4187 assert(entry->vme_start < end);
4188 new_entry->vme_start = entry->vme_end = end;
4189 VME_OFFSET_SET(new_entry,
4190 VME_OFFSET(new_entry) + (end - entry->vme_start));
4191 assert(new_entry->vme_start < new_entry->vme_end);
4192
4193 _vm_map_store_entry_link(map_header, entry, new_entry);
4194
4195 if (entry->is_sub_map)
4196 vm_map_reference(VME_SUBMAP(new_entry));
4197 else
4198 vm_object_reference(VME_OBJECT(new_entry));
4199 }
4200
4201
4202 /*
4203 * VM_MAP_RANGE_CHECK: [ internal use only ]
4204 *
4205 * Asserts that the starting and ending region
4206 * addresses fall within the valid range of the map.
4207 */
4208 #define VM_MAP_RANGE_CHECK(map, start, end) \
4209 MACRO_BEGIN \
4210 if (start < vm_map_min(map)) \
4211 start = vm_map_min(map); \
4212 if (end > vm_map_max(map)) \
4213 end = vm_map_max(map); \
4214 if (start > end) \
4215 start = end; \
4216 MACRO_END
4217
4218 /*
4219 * vm_map_range_check: [ internal use only ]
4220 *
4221 * Check that the region defined by the specified start and
4222 * end addresses are wholly contained within a single map
4223 * entry or set of adjacent map entries of the spacified map,
4224 * i.e. the specified region contains no unmapped space.
4225 * If any or all of the region is unmapped, FALSE is returned.
4226 * Otherwise, TRUE is returned and if the output argument 'entry'
4227 * is not NULL it points to the map entry containing the start
4228 * of the region.
4229 *
4230 * The map is locked for reading on entry and is left locked.
4231 */
4232 static boolean_t
4233 vm_map_range_check(
4234 register vm_map_t map,
4235 register vm_map_offset_t start,
4236 register vm_map_offset_t end,
4237 vm_map_entry_t *entry)
4238 {
4239 vm_map_entry_t cur;
4240 register vm_map_offset_t prev;
4241
4242 /*
4243 * Basic sanity checks first
4244 */
4245 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4246 return (FALSE);
4247
4248 /*
4249 * Check first if the region starts within a valid
4250 * mapping for the map.
4251 */
4252 if (!vm_map_lookup_entry(map, start, &cur))
4253 return (FALSE);
4254
4255 /*
4256 * Optimize for the case that the region is contained
4257 * in a single map entry.
4258 */
4259 if (entry != (vm_map_entry_t *) NULL)
4260 *entry = cur;
4261 if (end <= cur->vme_end)
4262 return (TRUE);
4263
4264 /*
4265 * If the region is not wholly contained within a
4266 * single entry, walk the entries looking for holes.
4267 */
4268 prev = cur->vme_end;
4269 cur = cur->vme_next;
4270 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4271 if (end <= cur->vme_end)
4272 return (TRUE);
4273 prev = cur->vme_end;
4274 cur = cur->vme_next;
4275 }
4276 return (FALSE);
4277 }
4278
4279 /*
4280 * vm_map_submap: [ kernel use only ]
4281 *
4282 * Mark the given range as handled by a subordinate map.
4283 *
4284 * This range must have been created with vm_map_find using
4285 * the vm_submap_object, and no other operations may have been
4286 * performed on this range prior to calling vm_map_submap.
4287 *
4288 * Only a limited number of operations can be performed
4289 * within this rage after calling vm_map_submap:
4290 * vm_fault
4291 * [Don't try vm_map_copyin!]
4292 *
4293 * To remove a submapping, one must first remove the
4294 * range from the superior map, and then destroy the
4295 * submap (if desired). [Better yet, don't try it.]
4296 */
4297 kern_return_t
4298 vm_map_submap(
4299 vm_map_t map,
4300 vm_map_offset_t start,
4301 vm_map_offset_t end,
4302 vm_map_t submap,
4303 vm_map_offset_t offset,
4304 #ifdef NO_NESTED_PMAP
4305 __unused
4306 #endif /* NO_NESTED_PMAP */
4307 boolean_t use_pmap)
4308 {
4309 vm_map_entry_t entry;
4310 register kern_return_t result = KERN_INVALID_ARGUMENT;
4311 register vm_object_t object;
4312
4313 vm_map_lock(map);
4314
4315 if (! vm_map_lookup_entry(map, start, &entry)) {
4316 entry = entry->vme_next;
4317 }
4318
4319 if (entry == vm_map_to_entry(map) ||
4320 entry->is_sub_map) {
4321 vm_map_unlock(map);
4322 return KERN_INVALID_ARGUMENT;
4323 }
4324
4325 vm_map_clip_start(map, entry, start);
4326 vm_map_clip_end(map, entry, end);
4327
4328 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4329 (!entry->is_sub_map) &&
4330 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4331 (object->resident_page_count == 0) &&
4332 (object->copy == VM_OBJECT_NULL) &&
4333 (object->shadow == VM_OBJECT_NULL) &&
4334 (!object->pager_created)) {
4335 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4336 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4337 vm_object_deallocate(object);
4338 entry->is_sub_map = TRUE;
4339 entry->use_pmap = FALSE;
4340 VME_SUBMAP_SET(entry, submap);
4341 vm_map_reference(submap);
4342 if (submap->mapped_in_other_pmaps == FALSE &&
4343 vm_map_pmap(submap) != PMAP_NULL &&
4344 vm_map_pmap(submap) != vm_map_pmap(map)) {
4345 /*
4346 * This submap is being mapped in a map
4347 * that uses a different pmap.
4348 * Set its "mapped_in_other_pmaps" flag
4349 * to indicate that we now need to
4350 * remove mappings from all pmaps rather
4351 * than just the submap's pmap.
4352 */
4353 submap->mapped_in_other_pmaps = TRUE;
4354 }
4355
4356 #ifndef NO_NESTED_PMAP
4357 if (use_pmap) {
4358 /* nest if platform code will allow */
4359 if(submap->pmap == NULL) {
4360 ledger_t ledger = map->pmap->ledger;
4361 submap->pmap = pmap_create(ledger,
4362 (vm_map_size_t) 0, FALSE);
4363 if(submap->pmap == PMAP_NULL) {
4364 vm_map_unlock(map);
4365 return(KERN_NO_SPACE);
4366 }
4367 }
4368 result = pmap_nest(map->pmap,
4369 (VME_SUBMAP(entry))->pmap,
4370 (addr64_t)start,
4371 (addr64_t)start,
4372 (uint64_t)(end - start));
4373 if(result)
4374 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4375 entry->use_pmap = TRUE;
4376 }
4377 #else /* NO_NESTED_PMAP */
4378 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4379 #endif /* NO_NESTED_PMAP */
4380 result = KERN_SUCCESS;
4381 }
4382 vm_map_unlock(map);
4383
4384 return(result);
4385 }
4386
4387
4388 /*
4389 * vm_map_protect:
4390 *
4391 * Sets the protection of the specified address
4392 * region in the target map. If "set_max" is
4393 * specified, the maximum protection is to be set;
4394 * otherwise, only the current protection is affected.
4395 */
4396 kern_return_t
4397 vm_map_protect(
4398 register vm_map_t map,
4399 register vm_map_offset_t start,
4400 register vm_map_offset_t end,
4401 register vm_prot_t new_prot,
4402 register boolean_t set_max)
4403 {
4404 register vm_map_entry_t current;
4405 register vm_map_offset_t prev;
4406 vm_map_entry_t entry;
4407 vm_prot_t new_max;
4408
4409 XPR(XPR_VM_MAP,
4410 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4411 map, start, end, new_prot, set_max);
4412
4413 vm_map_lock(map);
4414
4415 /* LP64todo - remove this check when vm_map_commpage64()
4416 * no longer has to stuff in a map_entry for the commpage
4417 * above the map's max_offset.
4418 */
4419 if (start >= map->max_offset) {
4420 vm_map_unlock(map);
4421 return(KERN_INVALID_ADDRESS);
4422 }
4423
4424 while(1) {
4425 /*
4426 * Lookup the entry. If it doesn't start in a valid
4427 * entry, return an error.
4428 */
4429 if (! vm_map_lookup_entry(map, start, &entry)) {
4430 vm_map_unlock(map);
4431 return(KERN_INVALID_ADDRESS);
4432 }
4433
4434 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4435 start = SUPERPAGE_ROUND_DOWN(start);
4436 continue;
4437 }
4438 break;
4439 }
4440 if (entry->superpage_size)
4441 end = SUPERPAGE_ROUND_UP(end);
4442
4443 /*
4444 * Make a first pass to check for protection and address
4445 * violations.
4446 */
4447
4448 current = entry;
4449 prev = current->vme_start;
4450 while ((current != vm_map_to_entry(map)) &&
4451 (current->vme_start < end)) {
4452
4453 /*
4454 * If there is a hole, return an error.
4455 */
4456 if (current->vme_start != prev) {
4457 vm_map_unlock(map);
4458 return(KERN_INVALID_ADDRESS);
4459 }
4460
4461 new_max = current->max_protection;
4462 if(new_prot & VM_PROT_COPY) {
4463 new_max |= VM_PROT_WRITE;
4464 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4465 vm_map_unlock(map);
4466 return(KERN_PROTECTION_FAILURE);
4467 }
4468 } else {
4469 if ((new_prot & new_max) != new_prot) {
4470 vm_map_unlock(map);
4471 return(KERN_PROTECTION_FAILURE);
4472 }
4473 }
4474
4475
4476 prev = current->vme_end;
4477 current = current->vme_next;
4478 }
4479 if (end > prev) {
4480 vm_map_unlock(map);
4481 return(KERN_INVALID_ADDRESS);
4482 }
4483
4484 /*
4485 * Go back and fix up protections.
4486 * Clip to start here if the range starts within
4487 * the entry.
4488 */
4489
4490 current = entry;
4491 if (current != vm_map_to_entry(map)) {
4492 /* clip and unnest if necessary */
4493 vm_map_clip_start(map, current, start);
4494 }
4495
4496 while ((current != vm_map_to_entry(map)) &&
4497 (current->vme_start < end)) {
4498
4499 vm_prot_t old_prot;
4500
4501 vm_map_clip_end(map, current, end);
4502
4503 if (current->is_sub_map) {
4504 /* clipping did unnest if needed */
4505 assert(!current->use_pmap);
4506 }
4507
4508 old_prot = current->protection;
4509
4510 if(new_prot & VM_PROT_COPY) {
4511 /* caller is asking specifically to copy the */
4512 /* mapped data, this implies that max protection */
4513 /* will include write. Caller must be prepared */
4514 /* for loss of shared memory communication in the */
4515 /* target area after taking this step */
4516
4517 if (current->is_sub_map == FALSE &&
4518 VME_OBJECT(current) == VM_OBJECT_NULL) {
4519 VME_OBJECT_SET(current,
4520 vm_object_allocate(
4521 (vm_map_size_t)
4522 (current->vme_end -
4523 current->vme_start)));
4524 VME_OFFSET_SET(current, 0);
4525 assert(current->use_pmap);
4526 }
4527 assert(current->wired_count == 0);
4528 current->needs_copy = TRUE;
4529 current->max_protection |= VM_PROT_WRITE;
4530 }
4531
4532 if (set_max)
4533 current->protection =
4534 (current->max_protection =
4535 new_prot & ~VM_PROT_COPY) &
4536 old_prot;
4537 else
4538 current->protection = new_prot & ~VM_PROT_COPY;
4539
4540 /*
4541 * Update physical map if necessary.
4542 * If the request is to turn off write protection,
4543 * we won't do it for real (in pmap). This is because
4544 * it would cause copy-on-write to fail. We've already
4545 * set, the new protection in the map, so if a
4546 * write-protect fault occurred, it will be fixed up
4547 * properly, COW or not.
4548 */
4549 if (current->protection != old_prot) {
4550 /* Look one level in we support nested pmaps */
4551 /* from mapped submaps which are direct entries */
4552 /* in our map */
4553
4554 vm_prot_t prot;
4555
4556 prot = current->protection & ~VM_PROT_WRITE;
4557
4558 if (override_nx(map, VME_ALIAS(current)) && prot)
4559 prot |= VM_PROT_EXECUTE;
4560
4561
4562 if (current->is_sub_map && current->use_pmap) {
4563 pmap_protect(VME_SUBMAP(current)->pmap,
4564 current->vme_start,
4565 current->vme_end,
4566 prot);
4567 } else {
4568 pmap_protect(map->pmap,
4569 current->vme_start,
4570 current->vme_end,
4571 prot);
4572 }
4573 }
4574 current = current->vme_next;
4575 }
4576
4577 current = entry;
4578 while ((current != vm_map_to_entry(map)) &&
4579 (current->vme_start <= end)) {
4580 vm_map_simplify_entry(map, current);
4581 current = current->vme_next;
4582 }
4583
4584 vm_map_unlock(map);
4585 return(KERN_SUCCESS);
4586 }
4587
4588 /*
4589 * vm_map_inherit:
4590 *
4591 * Sets the inheritance of the specified address
4592 * range in the target map. Inheritance
4593 * affects how the map will be shared with
4594 * child maps at the time of vm_map_fork.
4595 */
4596 kern_return_t
4597 vm_map_inherit(
4598 register vm_map_t map,
4599 register vm_map_offset_t start,
4600 register vm_map_offset_t end,
4601 register vm_inherit_t new_inheritance)
4602 {
4603 register vm_map_entry_t entry;
4604 vm_map_entry_t temp_entry;
4605
4606 vm_map_lock(map);
4607
4608 VM_MAP_RANGE_CHECK(map, start, end);
4609
4610 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4611 entry = temp_entry;
4612 }
4613 else {
4614 temp_entry = temp_entry->vme_next;
4615 entry = temp_entry;
4616 }
4617
4618 /* first check entire range for submaps which can't support the */
4619 /* given inheritance. */
4620 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4621 if(entry->is_sub_map) {
4622 if(new_inheritance == VM_INHERIT_COPY) {
4623 vm_map_unlock(map);
4624 return(KERN_INVALID_ARGUMENT);
4625 }
4626 }
4627
4628 entry = entry->vme_next;
4629 }
4630
4631 entry = temp_entry;
4632 if (entry != vm_map_to_entry(map)) {
4633 /* clip and unnest if necessary */
4634 vm_map_clip_start(map, entry, start);
4635 }
4636
4637 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4638 vm_map_clip_end(map, entry, end);
4639 if (entry->is_sub_map) {
4640 /* clip did unnest if needed */
4641 assert(!entry->use_pmap);
4642 }
4643
4644 entry->inheritance = new_inheritance;
4645
4646 entry = entry->vme_next;
4647 }
4648
4649 vm_map_unlock(map);
4650 return(KERN_SUCCESS);
4651 }
4652
4653 /*
4654 * Update the accounting for the amount of wired memory in this map. If the user has
4655 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4656 */
4657
4658 static kern_return_t
4659 add_wire_counts(
4660 vm_map_t map,
4661 vm_map_entry_t entry,
4662 boolean_t user_wire)
4663 {
4664 vm_map_size_t size;
4665
4666 if (user_wire) {
4667 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
4668
4669 /*
4670 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4671 * this map entry.
4672 */
4673
4674 if (entry->user_wired_count == 0) {
4675 size = entry->vme_end - entry->vme_start;
4676
4677 /*
4678 * Since this is the first time the user is wiring this map entry, check to see if we're
4679 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4680 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4681 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4682 * limit, then we fail.
4683 */
4684
4685 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4686 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4687 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4688 return KERN_RESOURCE_SHORTAGE;
4689
4690 /*
4691 * The first time the user wires an entry, we also increment the wired_count and add this to
4692 * the total that has been wired in the map.
4693 */
4694
4695 if (entry->wired_count >= MAX_WIRE_COUNT)
4696 return KERN_FAILURE;
4697
4698 entry->wired_count++;
4699 map->user_wire_size += size;
4700 }
4701
4702 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4703 return KERN_FAILURE;
4704
4705 entry->user_wired_count++;
4706
4707 } else {
4708
4709 /*
4710 * The kernel's wiring the memory. Just bump the count and continue.
4711 */
4712
4713 if (entry->wired_count >= MAX_WIRE_COUNT)
4714 panic("vm_map_wire: too many wirings");
4715
4716 entry->wired_count++;
4717 }
4718
4719 return KERN_SUCCESS;
4720 }
4721
4722 /*
4723 * Update the memory wiring accounting now that the given map entry is being unwired.
4724 */
4725
4726 static void
4727 subtract_wire_counts(
4728 vm_map_t map,
4729 vm_map_entry_t entry,
4730 boolean_t user_wire)
4731 {
4732
4733 if (user_wire) {
4734
4735 /*
4736 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4737 */
4738
4739 if (entry->user_wired_count == 1) {
4740
4741 /*
4742 * We're removing the last user wire reference. Decrement the wired_count and the total
4743 * user wired memory for this map.
4744 */
4745
4746 assert(entry->wired_count >= 1);
4747 entry->wired_count--;
4748 map->user_wire_size -= entry->vme_end - entry->vme_start;
4749 }
4750
4751 assert(entry->user_wired_count >= 1);
4752 entry->user_wired_count--;
4753
4754 } else {
4755
4756 /*
4757 * The kernel is unwiring the memory. Just update the count.
4758 */
4759
4760 assert(entry->wired_count >= 1);
4761 entry->wired_count--;
4762 }
4763 }
4764
4765 /*
4766 * vm_map_wire:
4767 *
4768 * Sets the pageability of the specified address range in the
4769 * target map as wired. Regions specified as not pageable require
4770 * locked-down physical memory and physical page maps. The
4771 * access_type variable indicates types of accesses that must not
4772 * generate page faults. This is checked against protection of
4773 * memory being locked-down.
4774 *
4775 * The map must not be locked, but a reference must remain to the
4776 * map throughout the call.
4777 */
4778 static kern_return_t
4779 vm_map_wire_nested(
4780 register vm_map_t map,
4781 register vm_map_offset_t start,
4782 register vm_map_offset_t end,
4783 register vm_prot_t caller_prot,
4784 boolean_t user_wire,
4785 pmap_t map_pmap,
4786 vm_map_offset_t pmap_addr,
4787 ppnum_t *physpage_p)
4788 {
4789 register vm_map_entry_t entry;
4790 register vm_prot_t access_type;
4791 struct vm_map_entry *first_entry, tmp_entry;
4792 vm_map_t real_map;
4793 register vm_map_offset_t s,e;
4794 kern_return_t rc;
4795 boolean_t need_wakeup;
4796 boolean_t main_map = FALSE;
4797 wait_interrupt_t interruptible_state;
4798 thread_t cur_thread;
4799 unsigned int last_timestamp;
4800 vm_map_size_t size;
4801 boolean_t wire_and_extract;
4802
4803 access_type = (caller_prot & VM_PROT_ALL);
4804
4805 wire_and_extract = FALSE;
4806 if (physpage_p != NULL) {
4807 /*
4808 * The caller wants the physical page number of the
4809 * wired page. We return only one physical page number
4810 * so this works for only one page at a time.
4811 */
4812 if ((end - start) != PAGE_SIZE) {
4813 return KERN_INVALID_ARGUMENT;
4814 }
4815 wire_and_extract = TRUE;
4816 *physpage_p = 0;
4817 }
4818
4819 vm_map_lock(map);
4820 if(map_pmap == NULL)
4821 main_map = TRUE;
4822 last_timestamp = map->timestamp;
4823
4824 VM_MAP_RANGE_CHECK(map, start, end);
4825 assert(page_aligned(start));
4826 assert(page_aligned(end));
4827 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4828 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4829 if (start == end) {
4830 /* We wired what the caller asked for, zero pages */
4831 vm_map_unlock(map);
4832 return KERN_SUCCESS;
4833 }
4834
4835 need_wakeup = FALSE;
4836 cur_thread = current_thread();
4837
4838 s = start;
4839 rc = KERN_SUCCESS;
4840
4841 if (vm_map_lookup_entry(map, s, &first_entry)) {
4842 entry = first_entry;
4843 /*
4844 * vm_map_clip_start will be done later.
4845 * We don't want to unnest any nested submaps here !
4846 */
4847 } else {
4848 /* Start address is not in map */
4849 rc = KERN_INVALID_ADDRESS;
4850 goto done;
4851 }
4852
4853 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4854 /*
4855 * At this point, we have wired from "start" to "s".
4856 * We still need to wire from "s" to "end".
4857 *
4858 * "entry" hasn't been clipped, so it could start before "s"
4859 * and/or end after "end".
4860 */
4861
4862 /* "e" is how far we want to wire in this entry */
4863 e = entry->vme_end;
4864 if (e > end)
4865 e = end;
4866
4867 /*
4868 * If another thread is wiring/unwiring this entry then
4869 * block after informing other thread to wake us up.
4870 */
4871 if (entry->in_transition) {
4872 wait_result_t wait_result;
4873
4874 /*
4875 * We have not clipped the entry. Make sure that
4876 * the start address is in range so that the lookup
4877 * below will succeed.
4878 * "s" is the current starting point: we've already
4879 * wired from "start" to "s" and we still have
4880 * to wire from "s" to "end".
4881 */
4882
4883 entry->needs_wakeup = TRUE;
4884
4885 /*
4886 * wake up anybody waiting on entries that we have
4887 * already wired.
4888 */
4889 if (need_wakeup) {
4890 vm_map_entry_wakeup(map);
4891 need_wakeup = FALSE;
4892 }
4893 /*
4894 * User wiring is interruptible
4895 */
4896 wait_result = vm_map_entry_wait(map,
4897 (user_wire) ? THREAD_ABORTSAFE :
4898 THREAD_UNINT);
4899 if (user_wire && wait_result == THREAD_INTERRUPTED) {
4900 /*
4901 * undo the wirings we have done so far
4902 * We do not clear the needs_wakeup flag,
4903 * because we cannot tell if we were the
4904 * only one waiting.
4905 */
4906 rc = KERN_FAILURE;
4907 goto done;
4908 }
4909
4910 /*
4911 * Cannot avoid a lookup here. reset timestamp.
4912 */
4913 last_timestamp = map->timestamp;
4914
4915 /*
4916 * The entry could have been clipped, look it up again.
4917 * Worse that can happen is, it may not exist anymore.
4918 */
4919 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4920 /*
4921 * User: undo everything upto the previous
4922 * entry. let vm_map_unwire worry about
4923 * checking the validity of the range.
4924 */
4925 rc = KERN_FAILURE;
4926 goto done;
4927 }
4928 entry = first_entry;
4929 continue;
4930 }
4931
4932 if (entry->is_sub_map) {
4933 vm_map_offset_t sub_start;
4934 vm_map_offset_t sub_end;
4935 vm_map_offset_t local_start;
4936 vm_map_offset_t local_end;
4937 pmap_t pmap;
4938
4939 if (wire_and_extract) {
4940 /*
4941 * Wiring would result in copy-on-write
4942 * which would not be compatible with
4943 * the sharing we have with the original
4944 * provider of this memory.
4945 */
4946 rc = KERN_INVALID_ARGUMENT;
4947 goto done;
4948 }
4949
4950 vm_map_clip_start(map, entry, s);
4951 vm_map_clip_end(map, entry, end);
4952
4953 sub_start = VME_OFFSET(entry);
4954 sub_end = entry->vme_end;
4955 sub_end += VME_OFFSET(entry) - entry->vme_start;
4956
4957 local_end = entry->vme_end;
4958 if(map_pmap == NULL) {
4959 vm_object_t object;
4960 vm_object_offset_t offset;
4961 vm_prot_t prot;
4962 boolean_t wired;
4963 vm_map_entry_t local_entry;
4964 vm_map_version_t version;
4965 vm_map_t lookup_map;
4966
4967 if(entry->use_pmap) {
4968 pmap = VME_SUBMAP(entry)->pmap;
4969 /* ppc implementation requires that */
4970 /* submaps pmap address ranges line */
4971 /* up with parent map */
4972 #ifdef notdef
4973 pmap_addr = sub_start;
4974 #endif
4975 pmap_addr = s;
4976 } else {
4977 pmap = map->pmap;
4978 pmap_addr = s;
4979 }
4980
4981 if (entry->wired_count) {
4982 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4983 goto done;
4984
4985 /*
4986 * The map was not unlocked:
4987 * no need to goto re-lookup.
4988 * Just go directly to next entry.
4989 */
4990 entry = entry->vme_next;
4991 s = entry->vme_start;
4992 continue;
4993
4994 }
4995
4996 /* call vm_map_lookup_locked to */
4997 /* cause any needs copy to be */
4998 /* evaluated */
4999 local_start = entry->vme_start;
5000 lookup_map = map;
5001 vm_map_lock_write_to_read(map);
5002 if(vm_map_lookup_locked(
5003 &lookup_map, local_start,
5004 access_type,
5005 OBJECT_LOCK_EXCLUSIVE,
5006 &version, &object,
5007 &offset, &prot, &wired,
5008 NULL,
5009 &real_map)) {
5010
5011 vm_map_unlock_read(lookup_map);
5012 assert(map_pmap == NULL);
5013 vm_map_unwire(map, start,
5014 s, user_wire);
5015 return(KERN_FAILURE);
5016 }
5017 vm_object_unlock(object);
5018 if(real_map != lookup_map)
5019 vm_map_unlock(real_map);
5020 vm_map_unlock_read(lookup_map);
5021 vm_map_lock(map);
5022
5023 /* we unlocked, so must re-lookup */
5024 if (!vm_map_lookup_entry(map,
5025 local_start,
5026 &local_entry)) {
5027 rc = KERN_FAILURE;
5028 goto done;
5029 }
5030
5031 /*
5032 * entry could have been "simplified",
5033 * so re-clip
5034 */
5035 entry = local_entry;
5036 assert(s == local_start);
5037 vm_map_clip_start(map, entry, s);
5038 vm_map_clip_end(map, entry, end);
5039 /* re-compute "e" */
5040 e = entry->vme_end;
5041 if (e > end)
5042 e = end;
5043
5044 /* did we have a change of type? */
5045 if (!entry->is_sub_map) {
5046 last_timestamp = map->timestamp;
5047 continue;
5048 }
5049 } else {
5050 local_start = entry->vme_start;
5051 pmap = map_pmap;
5052 }
5053
5054 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5055 goto done;
5056
5057 entry->in_transition = TRUE;
5058
5059 vm_map_unlock(map);
5060 rc = vm_map_wire_nested(VME_SUBMAP(entry),
5061 sub_start, sub_end,
5062 caller_prot,
5063 user_wire, pmap, pmap_addr,
5064 NULL);
5065 vm_map_lock(map);
5066
5067 /*
5068 * Find the entry again. It could have been clipped
5069 * after we unlocked the map.
5070 */
5071 if (!vm_map_lookup_entry(map, local_start,
5072 &first_entry))
5073 panic("vm_map_wire: re-lookup failed");
5074 entry = first_entry;
5075
5076 assert(local_start == s);
5077 /* re-compute "e" */
5078 e = entry->vme_end;
5079 if (e > end)
5080 e = end;
5081
5082 last_timestamp = map->timestamp;
5083 while ((entry != vm_map_to_entry(map)) &&
5084 (entry->vme_start < e)) {
5085 assert(entry->in_transition);
5086 entry->in_transition = FALSE;
5087 if (entry->needs_wakeup) {
5088 entry->needs_wakeup = FALSE;
5089 need_wakeup = TRUE;
5090 }
5091 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5092 subtract_wire_counts(map, entry, user_wire);
5093 }
5094 entry = entry->vme_next;
5095 }
5096 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5097 goto done;
5098 }
5099
5100 /* no need to relookup again */
5101 s = entry->vme_start;
5102 continue;
5103 }
5104
5105 /*
5106 * If this entry is already wired then increment
5107 * the appropriate wire reference count.
5108 */
5109 if (entry->wired_count) {
5110
5111 if ((entry->protection & access_type) != access_type) {
5112 /* found a protection problem */
5113
5114 /*
5115 * XXX FBDP
5116 * We should always return an error
5117 * in this case but since we didn't
5118 * enforce it before, let's do
5119 * it only for the new "wire_and_extract"
5120 * code path for now...
5121 */
5122 if (wire_and_extract) {
5123 rc = KERN_PROTECTION_FAILURE;
5124 goto done;
5125 }
5126 }
5127
5128 /*
5129 * entry is already wired down, get our reference
5130 * after clipping to our range.
5131 */
5132 vm_map_clip_start(map, entry, s);
5133 vm_map_clip_end(map, entry, end);
5134
5135 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5136 goto done;
5137
5138 if (wire_and_extract) {
5139 vm_object_t object;
5140 vm_object_offset_t offset;
5141 vm_page_t m;
5142
5143 /*
5144 * We don't have to "wire" the page again
5145 * bit we still have to "extract" its
5146 * physical page number, after some sanity
5147 * checks.
5148 */
5149 assert((entry->vme_end - entry->vme_start)
5150 == PAGE_SIZE);
5151 assert(!entry->needs_copy);
5152 assert(!entry->is_sub_map);
5153 assert(VME_OBJECT(entry));
5154 if (((entry->vme_end - entry->vme_start)
5155 != PAGE_SIZE) ||
5156 entry->needs_copy ||
5157 entry->is_sub_map ||
5158 VME_OBJECT(entry) == VM_OBJECT_NULL) {
5159 rc = KERN_INVALID_ARGUMENT;
5160 goto done;
5161 }
5162
5163 object = VME_OBJECT(entry);
5164 offset = VME_OFFSET(entry);
5165 /* need exclusive lock to update m->dirty */
5166 if (entry->protection & VM_PROT_WRITE) {
5167 vm_object_lock(object);
5168 } else {
5169 vm_object_lock_shared(object);
5170 }
5171 m = vm_page_lookup(object, offset);
5172 assert(m != VM_PAGE_NULL);
5173 assert(m->wire_count);
5174 if (m != VM_PAGE_NULL && m->wire_count) {
5175 *physpage_p = m->phys_page;
5176 if (entry->protection & VM_PROT_WRITE) {
5177 vm_object_lock_assert_exclusive(
5178 m->object);
5179 m->dirty = TRUE;
5180 }
5181 } else {
5182 /* not already wired !? */
5183 *physpage_p = 0;
5184 }
5185 vm_object_unlock(object);
5186 }
5187
5188 /* map was not unlocked: no need to relookup */
5189 entry = entry->vme_next;
5190 s = entry->vme_start;
5191 continue;
5192 }
5193
5194 /*
5195 * Unwired entry or wire request transmitted via submap
5196 */
5197
5198
5199 /*
5200 * Perform actions of vm_map_lookup that need the write
5201 * lock on the map: create a shadow object for a
5202 * copy-on-write region, or an object for a zero-fill
5203 * region.
5204 */
5205 size = entry->vme_end - entry->vme_start;
5206 /*
5207 * If wiring a copy-on-write page, we need to copy it now
5208 * even if we're only (currently) requesting read access.
5209 * This is aggressive, but once it's wired we can't move it.
5210 */
5211 if (entry->needs_copy) {
5212 if (wire_and_extract) {
5213 /*
5214 * We're supposed to share with the original
5215 * provider so should not be "needs_copy"
5216 */
5217 rc = KERN_INVALID_ARGUMENT;
5218 goto done;
5219 }
5220
5221 VME_OBJECT_SHADOW(entry, size);
5222 entry->needs_copy = FALSE;
5223 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5224 if (wire_and_extract) {
5225 /*
5226 * We're supposed to share with the original
5227 * provider so should already have an object.
5228 */
5229 rc = KERN_INVALID_ARGUMENT;
5230 goto done;
5231 }
5232 VME_OBJECT_SET(entry, vm_object_allocate(size));
5233 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5234 assert(entry->use_pmap);
5235 }
5236
5237 vm_map_clip_start(map, entry, s);
5238 vm_map_clip_end(map, entry, end);
5239
5240 /* re-compute "e" */
5241 e = entry->vme_end;
5242 if (e > end)
5243 e = end;
5244
5245 /*
5246 * Check for holes and protection mismatch.
5247 * Holes: Next entry should be contiguous unless this
5248 * is the end of the region.
5249 * Protection: Access requested must be allowed, unless
5250 * wiring is by protection class
5251 */
5252 if ((entry->vme_end < end) &&
5253 ((entry->vme_next == vm_map_to_entry(map)) ||
5254 (entry->vme_next->vme_start > entry->vme_end))) {
5255 /* found a hole */
5256 rc = KERN_INVALID_ADDRESS;
5257 goto done;
5258 }
5259 if ((entry->protection & access_type) != access_type) {
5260 /* found a protection problem */
5261 rc = KERN_PROTECTION_FAILURE;
5262 goto done;
5263 }
5264
5265 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5266
5267 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5268 goto done;
5269
5270 entry->in_transition = TRUE;
5271
5272 /*
5273 * This entry might get split once we unlock the map.
5274 * In vm_fault_wire(), we need the current range as
5275 * defined by this entry. In order for this to work
5276 * along with a simultaneous clip operation, we make a
5277 * temporary copy of this entry and use that for the
5278 * wiring. Note that the underlying objects do not
5279 * change during a clip.
5280 */
5281 tmp_entry = *entry;
5282
5283 /*
5284 * The in_transition state guarentees that the entry
5285 * (or entries for this range, if split occured) will be
5286 * there when the map lock is acquired for the second time.
5287 */
5288 vm_map_unlock(map);
5289
5290 if (!user_wire && cur_thread != THREAD_NULL)
5291 interruptible_state = thread_interrupt_level(THREAD_UNINT);
5292 else
5293 interruptible_state = THREAD_UNINT;
5294
5295 if(map_pmap)
5296 rc = vm_fault_wire(map,
5297 &tmp_entry, caller_prot, map_pmap, pmap_addr,
5298 physpage_p);
5299 else
5300 rc = vm_fault_wire(map,
5301 &tmp_entry, caller_prot, map->pmap,
5302 tmp_entry.vme_start,
5303 physpage_p);
5304
5305 if (!user_wire && cur_thread != THREAD_NULL)
5306 thread_interrupt_level(interruptible_state);
5307
5308 vm_map_lock(map);
5309
5310 if (last_timestamp+1 != map->timestamp) {
5311 /*
5312 * Find the entry again. It could have been clipped
5313 * after we unlocked the map.
5314 */
5315 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5316 &first_entry))
5317 panic("vm_map_wire: re-lookup failed");
5318
5319 entry = first_entry;
5320 }
5321
5322 last_timestamp = map->timestamp;
5323
5324 while ((entry != vm_map_to_entry(map)) &&
5325 (entry->vme_start < tmp_entry.vme_end)) {
5326 assert(entry->in_transition);
5327 entry->in_transition = FALSE;
5328 if (entry->needs_wakeup) {
5329 entry->needs_wakeup = FALSE;
5330 need_wakeup = TRUE;
5331 }
5332 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5333 subtract_wire_counts(map, entry, user_wire);
5334 }
5335 entry = entry->vme_next;
5336 }
5337
5338 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5339 goto done;
5340 }
5341
5342 s = entry->vme_start;
5343 } /* end while loop through map entries */
5344
5345 done:
5346 if (rc == KERN_SUCCESS) {
5347 /* repair any damage we may have made to the VM map */
5348 vm_map_simplify_range(map, start, end);
5349 }
5350
5351 vm_map_unlock(map);
5352
5353 /*
5354 * wake up anybody waiting on entries we wired.
5355 */
5356 if (need_wakeup)
5357 vm_map_entry_wakeup(map);
5358
5359 if (rc != KERN_SUCCESS) {
5360 /* undo what has been wired so far */
5361 vm_map_unwire_nested(map, start, s, user_wire,
5362 map_pmap, pmap_addr);
5363 if (physpage_p) {
5364 *physpage_p = 0;
5365 }
5366 }
5367
5368 return rc;
5369
5370 }
5371
5372 kern_return_t
5373 vm_map_wire_external(
5374 register vm_map_t map,
5375 register vm_map_offset_t start,
5376 register vm_map_offset_t end,
5377 register vm_prot_t caller_prot,
5378 boolean_t user_wire)
5379 {
5380 kern_return_t kret;
5381
5382 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5383 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5384 kret = vm_map_wire_nested(map, start, end, caller_prot,
5385 user_wire, (pmap_t)NULL, 0, NULL);
5386 return kret;
5387 }
5388
5389 kern_return_t
5390 vm_map_wire(
5391 register vm_map_t map,
5392 register vm_map_offset_t start,
5393 register vm_map_offset_t end,
5394 register vm_prot_t caller_prot,
5395 boolean_t user_wire)
5396 {
5397 kern_return_t kret;
5398
5399 kret = vm_map_wire_nested(map, start, end, caller_prot,
5400 user_wire, (pmap_t)NULL, 0, NULL);
5401 return kret;
5402 }
5403
5404 kern_return_t
5405 vm_map_wire_and_extract_external(
5406 vm_map_t map,
5407 vm_map_offset_t start,
5408 vm_prot_t caller_prot,
5409 boolean_t user_wire,
5410 ppnum_t *physpage_p)
5411 {
5412 kern_return_t kret;
5413
5414 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5415 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5416 kret = vm_map_wire_nested(map,
5417 start,
5418 start+VM_MAP_PAGE_SIZE(map),
5419 caller_prot,
5420 user_wire,
5421 (pmap_t)NULL,
5422 0,
5423 physpage_p);
5424 if (kret != KERN_SUCCESS &&
5425 physpage_p != NULL) {
5426 *physpage_p = 0;
5427 }
5428 return kret;
5429 }
5430
5431 kern_return_t
5432 vm_map_wire_and_extract(
5433 vm_map_t map,
5434 vm_map_offset_t start,
5435 vm_prot_t caller_prot,
5436 boolean_t user_wire,
5437 ppnum_t *physpage_p)
5438 {
5439 kern_return_t kret;
5440
5441 kret = vm_map_wire_nested(map,
5442 start,
5443 start+VM_MAP_PAGE_SIZE(map),
5444 caller_prot,
5445 user_wire,
5446 (pmap_t)NULL,
5447 0,
5448 physpage_p);
5449 if (kret != KERN_SUCCESS &&
5450 physpage_p != NULL) {
5451 *physpage_p = 0;
5452 }
5453 return kret;
5454 }
5455
5456 /*
5457 * vm_map_unwire:
5458 *
5459 * Sets the pageability of the specified address range in the target
5460 * as pageable. Regions specified must have been wired previously.
5461 *
5462 * The map must not be locked, but a reference must remain to the map
5463 * throughout the call.
5464 *
5465 * Kernel will panic on failures. User unwire ignores holes and
5466 * unwired and intransition entries to avoid losing memory by leaving
5467 * it unwired.
5468 */
5469 static kern_return_t
5470 vm_map_unwire_nested(
5471 register vm_map_t map,
5472 register vm_map_offset_t start,
5473 register vm_map_offset_t end,
5474 boolean_t user_wire,
5475 pmap_t map_pmap,
5476 vm_map_offset_t pmap_addr)
5477 {
5478 register vm_map_entry_t entry;
5479 struct vm_map_entry *first_entry, tmp_entry;
5480 boolean_t need_wakeup;
5481 boolean_t main_map = FALSE;
5482 unsigned int last_timestamp;
5483
5484 vm_map_lock(map);
5485 if(map_pmap == NULL)
5486 main_map = TRUE;
5487 last_timestamp = map->timestamp;
5488
5489 VM_MAP_RANGE_CHECK(map, start, end);
5490 assert(page_aligned(start));
5491 assert(page_aligned(end));
5492 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5493 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5494
5495 if (start == end) {
5496 /* We unwired what the caller asked for: zero pages */
5497 vm_map_unlock(map);
5498 return KERN_SUCCESS;
5499 }
5500
5501 if (vm_map_lookup_entry(map, start, &first_entry)) {
5502 entry = first_entry;
5503 /*
5504 * vm_map_clip_start will be done later.
5505 * We don't want to unnest any nested sub maps here !
5506 */
5507 }
5508 else {
5509 if (!user_wire) {
5510 panic("vm_map_unwire: start not found");
5511 }
5512 /* Start address is not in map. */
5513 vm_map_unlock(map);
5514 return(KERN_INVALID_ADDRESS);
5515 }
5516
5517 if (entry->superpage_size) {
5518 /* superpages are always wired */
5519 vm_map_unlock(map);
5520 return KERN_INVALID_ADDRESS;
5521 }
5522
5523 need_wakeup = FALSE;
5524 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5525 if (entry->in_transition) {
5526 /*
5527 * 1)
5528 * Another thread is wiring down this entry. Note
5529 * that if it is not for the other thread we would
5530 * be unwiring an unwired entry. This is not
5531 * permitted. If we wait, we will be unwiring memory
5532 * we did not wire.
5533 *
5534 * 2)
5535 * Another thread is unwiring this entry. We did not
5536 * have a reference to it, because if we did, this
5537 * entry will not be getting unwired now.
5538 */
5539 if (!user_wire) {
5540 /*
5541 * XXX FBDP
5542 * This could happen: there could be some
5543 * overlapping vslock/vsunlock operations
5544 * going on.
5545 * We should probably just wait and retry,
5546 * but then we have to be careful that this
5547 * entry could get "simplified" after
5548 * "in_transition" gets unset and before
5549 * we re-lookup the entry, so we would
5550 * have to re-clip the entry to avoid
5551 * re-unwiring what we have already unwired...
5552 * See vm_map_wire_nested().
5553 *
5554 * Or we could just ignore "in_transition"
5555 * here and proceed to decement the wired
5556 * count(s) on this entry. That should be fine
5557 * as long as "wired_count" doesn't drop all
5558 * the way to 0 (and we should panic if THAT
5559 * happens).
5560 */
5561 panic("vm_map_unwire: in_transition entry");
5562 }
5563
5564 entry = entry->vme_next;
5565 continue;
5566 }
5567
5568 if (entry->is_sub_map) {
5569 vm_map_offset_t sub_start;
5570 vm_map_offset_t sub_end;
5571 vm_map_offset_t local_end;
5572 pmap_t pmap;
5573
5574 vm_map_clip_start(map, entry, start);
5575 vm_map_clip_end(map, entry, end);
5576
5577 sub_start = VME_OFFSET(entry);
5578 sub_end = entry->vme_end - entry->vme_start;
5579 sub_end += VME_OFFSET(entry);
5580 local_end = entry->vme_end;
5581 if(map_pmap == NULL) {
5582 if(entry->use_pmap) {
5583 pmap = VME_SUBMAP(entry)->pmap;
5584 pmap_addr = sub_start;
5585 } else {
5586 pmap = map->pmap;
5587 pmap_addr = start;
5588 }
5589 if (entry->wired_count == 0 ||
5590 (user_wire && entry->user_wired_count == 0)) {
5591 if (!user_wire)
5592 panic("vm_map_unwire: entry is unwired");
5593 entry = entry->vme_next;
5594 continue;
5595 }
5596
5597 /*
5598 * Check for holes
5599 * Holes: Next entry should be contiguous unless
5600 * this is the end of the region.
5601 */
5602 if (((entry->vme_end < end) &&
5603 ((entry->vme_next == vm_map_to_entry(map)) ||
5604 (entry->vme_next->vme_start
5605 > entry->vme_end)))) {
5606 if (!user_wire)
5607 panic("vm_map_unwire: non-contiguous region");
5608 /*
5609 entry = entry->vme_next;
5610 continue;
5611 */
5612 }
5613
5614 subtract_wire_counts(map, entry, user_wire);
5615
5616 if (entry->wired_count != 0) {
5617 entry = entry->vme_next;
5618 continue;
5619 }
5620
5621 entry->in_transition = TRUE;
5622 tmp_entry = *entry;/* see comment in vm_map_wire() */
5623
5624 /*
5625 * We can unlock the map now. The in_transition state
5626 * guarantees existance of the entry.
5627 */
5628 vm_map_unlock(map);
5629 vm_map_unwire_nested(VME_SUBMAP(entry),
5630 sub_start, sub_end, user_wire, pmap, pmap_addr);
5631 vm_map_lock(map);
5632
5633 if (last_timestamp+1 != map->timestamp) {
5634 /*
5635 * Find the entry again. It could have been
5636 * clipped or deleted after we unlocked the map.
5637 */
5638 if (!vm_map_lookup_entry(map,
5639 tmp_entry.vme_start,
5640 &first_entry)) {
5641 if (!user_wire)
5642 panic("vm_map_unwire: re-lookup failed");
5643 entry = first_entry->vme_next;
5644 } else
5645 entry = first_entry;
5646 }
5647 last_timestamp = map->timestamp;
5648
5649 /*
5650 * clear transition bit for all constituent entries
5651 * that were in the original entry (saved in
5652 * tmp_entry). Also check for waiters.
5653 */
5654 while ((entry != vm_map_to_entry(map)) &&
5655 (entry->vme_start < tmp_entry.vme_end)) {
5656 assert(entry->in_transition);
5657 entry->in_transition = FALSE;
5658 if (entry->needs_wakeup) {
5659 entry->needs_wakeup = FALSE;
5660 need_wakeup = TRUE;
5661 }
5662 entry = entry->vme_next;
5663 }
5664 continue;
5665 } else {
5666 vm_map_unlock(map);
5667 vm_map_unwire_nested(VME_SUBMAP(entry),
5668 sub_start, sub_end, user_wire, map_pmap,
5669 pmap_addr);
5670 vm_map_lock(map);
5671
5672 if (last_timestamp+1 != map->timestamp) {
5673 /*
5674 * Find the entry again. It could have been
5675 * clipped or deleted after we unlocked the map.
5676 */
5677 if (!vm_map_lookup_entry(map,
5678 tmp_entry.vme_start,
5679 &first_entry)) {
5680 if (!user_wire)
5681 panic("vm_map_unwire: re-lookup failed");
5682 entry = first_entry->vme_next;
5683 } else
5684 entry = first_entry;
5685 }
5686 last_timestamp = map->timestamp;
5687 }
5688 }
5689
5690
5691 if ((entry->wired_count == 0) ||
5692 (user_wire && entry->user_wired_count == 0)) {
5693 if (!user_wire)
5694 panic("vm_map_unwire: entry is unwired");
5695
5696 entry = entry->vme_next;
5697 continue;
5698 }
5699
5700 assert(entry->wired_count > 0 &&
5701 (!user_wire || entry->user_wired_count > 0));
5702
5703 vm_map_clip_start(map, entry, start);
5704 vm_map_clip_end(map, entry, end);
5705
5706 /*
5707 * Check for holes
5708 * Holes: Next entry should be contiguous unless
5709 * this is the end of the region.
5710 */
5711 if (((entry->vme_end < end) &&
5712 ((entry->vme_next == vm_map_to_entry(map)) ||
5713 (entry->vme_next->vme_start > entry->vme_end)))) {
5714
5715 if (!user_wire)
5716 panic("vm_map_unwire: non-contiguous region");
5717 entry = entry->vme_next;
5718 continue;
5719 }
5720
5721 subtract_wire_counts(map, entry, user_wire);
5722
5723 if (entry->wired_count != 0) {
5724 entry = entry->vme_next;
5725 continue;
5726 }
5727
5728 if(entry->zero_wired_pages) {
5729 entry->zero_wired_pages = FALSE;
5730 }
5731
5732 entry->in_transition = TRUE;
5733 tmp_entry = *entry; /* see comment in vm_map_wire() */
5734
5735 /*
5736 * We can unlock the map now. The in_transition state
5737 * guarantees existance of the entry.
5738 */
5739 vm_map_unlock(map);
5740 if(map_pmap) {
5741 vm_fault_unwire(map,
5742 &tmp_entry, FALSE, map_pmap, pmap_addr);
5743 } else {
5744 vm_fault_unwire(map,
5745 &tmp_entry, FALSE, map->pmap,
5746 tmp_entry.vme_start);
5747 }
5748 vm_map_lock(map);
5749
5750 if (last_timestamp+1 != map->timestamp) {
5751 /*
5752 * Find the entry again. It could have been clipped
5753 * or deleted after we unlocked the map.
5754 */
5755 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5756 &first_entry)) {
5757 if (!user_wire)
5758 panic("vm_map_unwire: re-lookup failed");
5759 entry = first_entry->vme_next;
5760 } else
5761 entry = first_entry;
5762 }
5763 last_timestamp = map->timestamp;
5764
5765 /*
5766 * clear transition bit for all constituent entries that
5767 * were in the original entry (saved in tmp_entry). Also
5768 * check for waiters.
5769 */
5770 while ((entry != vm_map_to_entry(map)) &&
5771 (entry->vme_start < tmp_entry.vme_end)) {
5772 assert(entry->in_transition);
5773 entry->in_transition = FALSE;
5774 if (entry->needs_wakeup) {
5775 entry->needs_wakeup = FALSE;
5776 need_wakeup = TRUE;
5777 }
5778 entry = entry->vme_next;
5779 }
5780 }
5781
5782 /*
5783 * We might have fragmented the address space when we wired this
5784 * range of addresses. Attempt to re-coalesce these VM map entries
5785 * with their neighbors now that they're no longer wired.
5786 * Under some circumstances, address space fragmentation can
5787 * prevent VM object shadow chain collapsing, which can cause
5788 * swap space leaks.
5789 */
5790 vm_map_simplify_range(map, start, end);
5791
5792 vm_map_unlock(map);
5793 /*
5794 * wake up anybody waiting on entries that we have unwired.
5795 */
5796 if (need_wakeup)
5797 vm_map_entry_wakeup(map);
5798 return(KERN_SUCCESS);
5799
5800 }
5801
5802 kern_return_t
5803 vm_map_unwire(
5804 register vm_map_t map,
5805 register vm_map_offset_t start,
5806 register vm_map_offset_t end,
5807 boolean_t user_wire)
5808 {
5809 return vm_map_unwire_nested(map, start, end,
5810 user_wire, (pmap_t)NULL, 0);
5811 }
5812
5813
5814 /*
5815 * vm_map_entry_delete: [ internal use only ]
5816 *
5817 * Deallocate the given entry from the target map.
5818 */
5819 static void
5820 vm_map_entry_delete(
5821 register vm_map_t map,
5822 register vm_map_entry_t entry)
5823 {
5824 register vm_map_offset_t s, e;
5825 register vm_object_t object;
5826 register vm_map_t submap;
5827
5828 s = entry->vme_start;
5829 e = entry->vme_end;
5830 assert(page_aligned(s));
5831 assert(page_aligned(e));
5832 if (entry->map_aligned == TRUE) {
5833 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5834 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5835 }
5836 assert(entry->wired_count == 0);
5837 assert(entry->user_wired_count == 0);
5838 assert(!entry->permanent);
5839
5840 if (entry->is_sub_map) {
5841 object = NULL;
5842 submap = VME_SUBMAP(entry);
5843 } else {
5844 submap = NULL;
5845 object = VME_OBJECT(entry);
5846 }
5847
5848 vm_map_store_entry_unlink(map, entry);
5849 map->size -= e - s;
5850
5851 vm_map_entry_dispose(map, entry);
5852
5853 vm_map_unlock(map);
5854 /*
5855 * Deallocate the object only after removing all
5856 * pmap entries pointing to its pages.
5857 */
5858 if (submap)
5859 vm_map_deallocate(submap);
5860 else
5861 vm_object_deallocate(object);
5862
5863 }
5864
5865 void
5866 vm_map_submap_pmap_clean(
5867 vm_map_t map,
5868 vm_map_offset_t start,
5869 vm_map_offset_t end,
5870 vm_map_t sub_map,
5871 vm_map_offset_t offset)
5872 {
5873 vm_map_offset_t submap_start;
5874 vm_map_offset_t submap_end;
5875 vm_map_size_t remove_size;
5876 vm_map_entry_t entry;
5877
5878 submap_end = offset + (end - start);
5879 submap_start = offset;
5880
5881 vm_map_lock_read(sub_map);
5882 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5883
5884 remove_size = (entry->vme_end - entry->vme_start);
5885 if(offset > entry->vme_start)
5886 remove_size -= offset - entry->vme_start;
5887
5888
5889 if(submap_end < entry->vme_end) {
5890 remove_size -=
5891 entry->vme_end - submap_end;
5892 }
5893 if(entry->is_sub_map) {
5894 vm_map_submap_pmap_clean(
5895 sub_map,
5896 start,
5897 start + remove_size,
5898 VME_SUBMAP(entry),
5899 VME_OFFSET(entry));
5900 } else {
5901
5902 if((map->mapped_in_other_pmaps) && (map->ref_count)
5903 && (VME_OBJECT(entry) != NULL)) {
5904 vm_object_pmap_protect_options(
5905 VME_OBJECT(entry),
5906 (VME_OFFSET(entry) +
5907 offset -
5908 entry->vme_start),
5909 remove_size,
5910 PMAP_NULL,
5911 entry->vme_start,
5912 VM_PROT_NONE,
5913 PMAP_OPTIONS_REMOVE);
5914 } else {
5915 pmap_remove(map->pmap,
5916 (addr64_t)start,
5917 (addr64_t)(start + remove_size));
5918 }
5919 }
5920 }
5921
5922 entry = entry->vme_next;
5923
5924 while((entry != vm_map_to_entry(sub_map))
5925 && (entry->vme_start < submap_end)) {
5926 remove_size = (entry->vme_end - entry->vme_start);
5927 if(submap_end < entry->vme_end) {
5928 remove_size -= entry->vme_end - submap_end;
5929 }
5930 if(entry->is_sub_map) {
5931 vm_map_submap_pmap_clean(
5932 sub_map,
5933 (start + entry->vme_start) - offset,
5934 ((start + entry->vme_start) - offset) + remove_size,
5935 VME_SUBMAP(entry),
5936 VME_OFFSET(entry));
5937 } else {
5938 if((map->mapped_in_other_pmaps) && (map->ref_count)
5939 && (VME_OBJECT(entry) != NULL)) {
5940 vm_object_pmap_protect_options(
5941 VME_OBJECT(entry),
5942 VME_OFFSET(entry),
5943 remove_size,
5944 PMAP_NULL,
5945 entry->vme_start,
5946 VM_PROT_NONE,
5947 PMAP_OPTIONS_REMOVE);
5948 } else {
5949 pmap_remove(map->pmap,
5950 (addr64_t)((start + entry->vme_start)
5951 - offset),
5952 (addr64_t)(((start + entry->vme_start)
5953 - offset) + remove_size));
5954 }
5955 }
5956 entry = entry->vme_next;
5957 }
5958 vm_map_unlock_read(sub_map);
5959 return;
5960 }
5961
5962 /*
5963 * vm_map_delete: [ internal use only ]
5964 *
5965 * Deallocates the given address range from the target map.
5966 * Removes all user wirings. Unwires one kernel wiring if
5967 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5968 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5969 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5970 *
5971 * This routine is called with map locked and leaves map locked.
5972 */
5973 static kern_return_t
5974 vm_map_delete(
5975 vm_map_t map,
5976 vm_map_offset_t start,
5977 vm_map_offset_t end,
5978 int flags,
5979 vm_map_t zap_map)
5980 {
5981 vm_map_entry_t entry, next;
5982 struct vm_map_entry *first_entry, tmp_entry;
5983 register vm_map_offset_t s;
5984 register vm_object_t object;
5985 boolean_t need_wakeup;
5986 unsigned int last_timestamp = ~0; /* unlikely value */
5987 int interruptible;
5988
5989 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5990 THREAD_ABORTSAFE : THREAD_UNINT;
5991
5992 /*
5993 * All our DMA I/O operations in IOKit are currently done by
5994 * wiring through the map entries of the task requesting the I/O.
5995 * Because of this, we must always wait for kernel wirings
5996 * to go away on the entries before deleting them.
5997 *
5998 * Any caller who wants to actually remove a kernel wiring
5999 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6000 * properly remove one wiring instead of blasting through
6001 * them all.
6002 */
6003 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6004
6005 while(1) {
6006 /*
6007 * Find the start of the region, and clip it
6008 */
6009 if (vm_map_lookup_entry(map, start, &first_entry)) {
6010 entry = first_entry;
6011 if (map == kalloc_map &&
6012 (entry->vme_start != start ||
6013 entry->vme_end != end)) {
6014 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6015 "mismatched entry %p [0x%llx:0x%llx]\n",
6016 map,
6017 (uint64_t)start,
6018 (uint64_t)end,
6019 entry,
6020 (uint64_t)entry->vme_start,
6021 (uint64_t)entry->vme_end);
6022 }
6023 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6024 start = SUPERPAGE_ROUND_DOWN(start);
6025 continue;
6026 }
6027 if (start == entry->vme_start) {
6028 /*
6029 * No need to clip. We don't want to cause
6030 * any unnecessary unnesting in this case...
6031 */
6032 } else {
6033 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6034 entry->map_aligned &&
6035 !VM_MAP_PAGE_ALIGNED(
6036 start,
6037 VM_MAP_PAGE_MASK(map))) {
6038 /*
6039 * The entry will no longer be
6040 * map-aligned after clipping
6041 * and the caller said it's OK.
6042 */
6043 entry->map_aligned = FALSE;
6044 }
6045 if (map == kalloc_map) {
6046 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6047 " clipping %p at 0x%llx\n",
6048 map,
6049 (uint64_t)start,
6050 (uint64_t)end,
6051 entry,
6052 (uint64_t)start);
6053 }
6054 vm_map_clip_start(map, entry, start);
6055 }
6056
6057 /*
6058 * Fix the lookup hint now, rather than each
6059 * time through the loop.
6060 */
6061 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6062 } else {
6063 if (map->pmap == kernel_pmap &&
6064 map->ref_count != 0) {
6065 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6066 "no map entry at 0x%llx\n",
6067 map,
6068 (uint64_t)start,
6069 (uint64_t)end,
6070 (uint64_t)start);
6071 }
6072 entry = first_entry->vme_next;
6073 }
6074 break;
6075 }
6076 if (entry->superpage_size)
6077 end = SUPERPAGE_ROUND_UP(end);
6078
6079 need_wakeup = FALSE;
6080 /*
6081 * Step through all entries in this region
6082 */
6083 s = entry->vme_start;
6084 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6085 /*
6086 * At this point, we have deleted all the memory entries
6087 * between "start" and "s". We still need to delete
6088 * all memory entries between "s" and "end".
6089 * While we were blocked and the map was unlocked, some
6090 * new memory entries could have been re-allocated between
6091 * "start" and "s" and we don't want to mess with those.
6092 * Some of those entries could even have been re-assembled
6093 * with an entry after "s" (in vm_map_simplify_entry()), so
6094 * we may have to vm_map_clip_start() again.
6095 */
6096
6097 if (entry->vme_start >= s) {
6098 /*
6099 * This entry starts on or after "s"
6100 * so no need to clip its start.
6101 */
6102 } else {
6103 /*
6104 * This entry has been re-assembled by a
6105 * vm_map_simplify_entry(). We need to
6106 * re-clip its start.
6107 */
6108 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6109 entry->map_aligned &&
6110 !VM_MAP_PAGE_ALIGNED(s,
6111 VM_MAP_PAGE_MASK(map))) {
6112 /*
6113 * The entry will no longer be map-aligned
6114 * after clipping and the caller said it's OK.
6115 */
6116 entry->map_aligned = FALSE;
6117 }
6118 if (map == kalloc_map) {
6119 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6120 "clipping %p at 0x%llx\n",
6121 map,
6122 (uint64_t)start,
6123 (uint64_t)end,
6124 entry,
6125 (uint64_t)s);
6126 }
6127 vm_map_clip_start(map, entry, s);
6128 }
6129 if (entry->vme_end <= end) {
6130 /*
6131 * This entry is going away completely, so no need
6132 * to clip and possibly cause an unnecessary unnesting.
6133 */
6134 } else {
6135 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6136 entry->map_aligned &&
6137 !VM_MAP_PAGE_ALIGNED(end,
6138 VM_MAP_PAGE_MASK(map))) {
6139 /*
6140 * The entry will no longer be map-aligned
6141 * after clipping and the caller said it's OK.
6142 */
6143 entry->map_aligned = FALSE;
6144 }
6145 if (map == kalloc_map) {
6146 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6147 "clipping %p at 0x%llx\n",
6148 map,
6149 (uint64_t)start,
6150 (uint64_t)end,
6151 entry,
6152 (uint64_t)end);
6153 }
6154 vm_map_clip_end(map, entry, end);
6155 }
6156
6157 if (entry->permanent) {
6158 panic("attempt to remove permanent VM map entry "
6159 "%p [0x%llx:0x%llx]\n",
6160 entry, (uint64_t) s, (uint64_t) end);
6161 }
6162
6163
6164 if (entry->in_transition) {
6165 wait_result_t wait_result;
6166
6167 /*
6168 * Another thread is wiring/unwiring this entry.
6169 * Let the other thread know we are waiting.
6170 */
6171 assert(s == entry->vme_start);
6172 entry->needs_wakeup = TRUE;
6173
6174 /*
6175 * wake up anybody waiting on entries that we have
6176 * already unwired/deleted.
6177 */
6178 if (need_wakeup) {
6179 vm_map_entry_wakeup(map);
6180 need_wakeup = FALSE;
6181 }
6182
6183 wait_result = vm_map_entry_wait(map, interruptible);
6184
6185 if (interruptible &&
6186 wait_result == THREAD_INTERRUPTED) {
6187 /*
6188 * We do not clear the needs_wakeup flag,
6189 * since we cannot tell if we were the only one.
6190 */
6191 return KERN_ABORTED;
6192 }
6193
6194 /*
6195 * The entry could have been clipped or it
6196 * may not exist anymore. Look it up again.
6197 */
6198 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6199 /*
6200 * User: use the next entry
6201 */
6202 entry = first_entry->vme_next;
6203 s = entry->vme_start;
6204 } else {
6205 entry = first_entry;
6206 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6207 }
6208 last_timestamp = map->timestamp;
6209 continue;
6210 } /* end in_transition */
6211
6212 if (entry->wired_count) {
6213 boolean_t user_wire;
6214
6215 user_wire = entry->user_wired_count > 0;
6216
6217 /*
6218 * Remove a kernel wiring if requested
6219 */
6220 if (flags & VM_MAP_REMOVE_KUNWIRE) {
6221 entry->wired_count--;
6222 }
6223
6224 /*
6225 * Remove all user wirings for proper accounting
6226 */
6227 if (entry->user_wired_count > 0) {
6228 while (entry->user_wired_count)
6229 subtract_wire_counts(map, entry, user_wire);
6230 }
6231
6232 if (entry->wired_count != 0) {
6233 assert(map != kernel_map);
6234 /*
6235 * Cannot continue. Typical case is when
6236 * a user thread has physical io pending on
6237 * on this page. Either wait for the
6238 * kernel wiring to go away or return an
6239 * error.
6240 */
6241 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6242 wait_result_t wait_result;
6243
6244 assert(s == entry->vme_start);
6245 entry->needs_wakeup = TRUE;
6246 wait_result = vm_map_entry_wait(map,
6247 interruptible);
6248
6249 if (interruptible &&
6250 wait_result == THREAD_INTERRUPTED) {
6251 /*
6252 * We do not clear the
6253 * needs_wakeup flag, since we
6254 * cannot tell if we were the
6255 * only one.
6256 */
6257 return KERN_ABORTED;
6258 }
6259
6260 /*
6261 * The entry could have been clipped or
6262 * it may not exist anymore. Look it
6263 * up again.
6264 */
6265 if (!vm_map_lookup_entry(map, s,
6266 &first_entry)) {
6267 assert(map != kernel_map);
6268 /*
6269 * User: use the next entry
6270 */
6271 entry = first_entry->vme_next;
6272 s = entry->vme_start;
6273 } else {
6274 entry = first_entry;
6275 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6276 }
6277 last_timestamp = map->timestamp;
6278 continue;
6279 }
6280 else {
6281 return KERN_FAILURE;
6282 }
6283 }
6284
6285 entry->in_transition = TRUE;
6286 /*
6287 * copy current entry. see comment in vm_map_wire()
6288 */
6289 tmp_entry = *entry;
6290 assert(s == entry->vme_start);
6291
6292 /*
6293 * We can unlock the map now. The in_transition
6294 * state guarentees existance of the entry.
6295 */
6296 vm_map_unlock(map);
6297
6298 if (tmp_entry.is_sub_map) {
6299 vm_map_t sub_map;
6300 vm_map_offset_t sub_start, sub_end;
6301 pmap_t pmap;
6302 vm_map_offset_t pmap_addr;
6303
6304
6305 sub_map = VME_SUBMAP(&tmp_entry);
6306 sub_start = VME_OFFSET(&tmp_entry);
6307 sub_end = sub_start + (tmp_entry.vme_end -
6308 tmp_entry.vme_start);
6309 if (tmp_entry.use_pmap) {
6310 pmap = sub_map->pmap;
6311 pmap_addr = tmp_entry.vme_start;
6312 } else {
6313 pmap = map->pmap;
6314 pmap_addr = tmp_entry.vme_start;
6315 }
6316 (void) vm_map_unwire_nested(sub_map,
6317 sub_start, sub_end,
6318 user_wire,
6319 pmap, pmap_addr);
6320 } else {
6321
6322 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6323 pmap_protect_options(
6324 map->pmap,
6325 tmp_entry.vme_start,
6326 tmp_entry.vme_end,
6327 VM_PROT_NONE,
6328 PMAP_OPTIONS_REMOVE,
6329 NULL);
6330 }
6331 vm_fault_unwire(map, &tmp_entry,
6332 VME_OBJECT(&tmp_entry) == kernel_object,
6333 map->pmap, tmp_entry.vme_start);
6334 }
6335
6336 vm_map_lock(map);
6337
6338 if (last_timestamp+1 != map->timestamp) {
6339 /*
6340 * Find the entry again. It could have
6341 * been clipped after we unlocked the map.
6342 */
6343 if (!vm_map_lookup_entry(map, s, &first_entry)){
6344 assert((map != kernel_map) &&
6345 (!entry->is_sub_map));
6346 first_entry = first_entry->vme_next;
6347 s = first_entry->vme_start;
6348 } else {
6349 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6350 }
6351 } else {
6352 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6353 first_entry = entry;
6354 }
6355
6356 last_timestamp = map->timestamp;
6357
6358 entry = first_entry;
6359 while ((entry != vm_map_to_entry(map)) &&
6360 (entry->vme_start < tmp_entry.vme_end)) {
6361 assert(entry->in_transition);
6362 entry->in_transition = FALSE;
6363 if (entry->needs_wakeup) {
6364 entry->needs_wakeup = FALSE;
6365 need_wakeup = TRUE;
6366 }
6367 entry = entry->vme_next;
6368 }
6369 /*
6370 * We have unwired the entry(s). Go back and
6371 * delete them.
6372 */
6373 entry = first_entry;
6374 continue;
6375 }
6376
6377 /* entry is unwired */
6378 assert(entry->wired_count == 0);
6379 assert(entry->user_wired_count == 0);
6380
6381 assert(s == entry->vme_start);
6382
6383 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6384 /*
6385 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6386 * vm_map_delete(), some map entries might have been
6387 * transferred to a "zap_map", which doesn't have a
6388 * pmap. The original pmap has already been flushed
6389 * in the vm_map_delete() call targeting the original
6390 * map, but when we get to destroying the "zap_map",
6391 * we don't have any pmap to flush, so let's just skip
6392 * all this.
6393 */
6394 } else if (entry->is_sub_map) {
6395 if (entry->use_pmap) {
6396 #ifndef NO_NESTED_PMAP
6397 int pmap_flags;
6398
6399 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6400 /*
6401 * This is the final cleanup of the
6402 * address space being terminated.
6403 * No new mappings are expected and
6404 * we don't really need to unnest the
6405 * shared region (and lose the "global"
6406 * pmap mappings, if applicable).
6407 *
6408 * Tell the pmap layer that we're
6409 * "clean" wrt nesting.
6410 */
6411 pmap_flags = PMAP_UNNEST_CLEAN;
6412 } else {
6413 /*
6414 * We're unmapping part of the nested
6415 * shared region, so we can't keep the
6416 * nested pmap.
6417 */
6418 pmap_flags = 0;
6419 }
6420 pmap_unnest_options(
6421 map->pmap,
6422 (addr64_t)entry->vme_start,
6423 entry->vme_end - entry->vme_start,
6424 pmap_flags);
6425 #endif /* NO_NESTED_PMAP */
6426 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6427 /* clean up parent map/maps */
6428 vm_map_submap_pmap_clean(
6429 map, entry->vme_start,
6430 entry->vme_end,
6431 VME_SUBMAP(entry),
6432 VME_OFFSET(entry));
6433 }
6434 } else {
6435 vm_map_submap_pmap_clean(
6436 map, entry->vme_start, entry->vme_end,
6437 VME_SUBMAP(entry),
6438 VME_OFFSET(entry));
6439 }
6440 } else if (VME_OBJECT(entry) != kernel_object &&
6441 VME_OBJECT(entry) != compressor_object) {
6442 object = VME_OBJECT(entry);
6443 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6444 vm_object_pmap_protect_options(
6445 object, VME_OFFSET(entry),
6446 entry->vme_end - entry->vme_start,
6447 PMAP_NULL,
6448 entry->vme_start,
6449 VM_PROT_NONE,
6450 PMAP_OPTIONS_REMOVE);
6451 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6452 (map->pmap == kernel_pmap)) {
6453 /* Remove translations associated
6454 * with this range unless the entry
6455 * does not have an object, or
6456 * it's the kernel map or a descendant
6457 * since the platform could potentially
6458 * create "backdoor" mappings invisible
6459 * to the VM. It is expected that
6460 * objectless, non-kernel ranges
6461 * do not have such VM invisible
6462 * translations.
6463 */
6464 pmap_remove_options(map->pmap,
6465 (addr64_t)entry->vme_start,
6466 (addr64_t)entry->vme_end,
6467 PMAP_OPTIONS_REMOVE);
6468 }
6469 }
6470
6471 if (entry->iokit_acct) {
6472 /* alternate accounting */
6473 DTRACE_VM4(vm_map_iokit_unmapped_region,
6474 vm_map_t, map,
6475 vm_map_offset_t, entry->vme_start,
6476 vm_map_offset_t, entry->vme_end,
6477 int, VME_ALIAS(entry));
6478 vm_map_iokit_unmapped_region(map,
6479 (entry->vme_end -
6480 entry->vme_start));
6481 entry->iokit_acct = FALSE;
6482 }
6483
6484 /*
6485 * All pmap mappings for this map entry must have been
6486 * cleared by now.
6487 */
6488 #if DEBUG
6489 assert(vm_map_pmap_is_empty(map,
6490 entry->vme_start,
6491 entry->vme_end));
6492 #endif /* DEBUG */
6493
6494 next = entry->vme_next;
6495
6496 if (map->pmap == kernel_pmap &&
6497 map->ref_count != 0 &&
6498 entry->vme_end < end &&
6499 (next == vm_map_to_entry(map) ||
6500 next->vme_start != entry->vme_end)) {
6501 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6502 "hole after %p at 0x%llx\n",
6503 map,
6504 (uint64_t)start,
6505 (uint64_t)end,
6506 entry,
6507 (uint64_t)entry->vme_end);
6508 }
6509
6510 s = next->vme_start;
6511 last_timestamp = map->timestamp;
6512
6513 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6514 zap_map != VM_MAP_NULL) {
6515 vm_map_size_t entry_size;
6516 /*
6517 * The caller wants to save the affected VM map entries
6518 * into the "zap_map". The caller will take care of
6519 * these entries.
6520 */
6521 /* unlink the entry from "map" ... */
6522 vm_map_store_entry_unlink(map, entry);
6523 /* ... and add it to the end of the "zap_map" */
6524 vm_map_store_entry_link(zap_map,
6525 vm_map_last_entry(zap_map),
6526 entry);
6527 entry_size = entry->vme_end - entry->vme_start;
6528 map->size -= entry_size;
6529 zap_map->size += entry_size;
6530 /* we didn't unlock the map, so no timestamp increase */
6531 last_timestamp--;
6532 } else {
6533 vm_map_entry_delete(map, entry);
6534 /* vm_map_entry_delete unlocks the map */
6535 vm_map_lock(map);
6536 }
6537
6538 entry = next;
6539
6540 if(entry == vm_map_to_entry(map)) {
6541 break;
6542 }
6543 if (last_timestamp+1 != map->timestamp) {
6544 /*
6545 * we are responsible for deleting everything
6546 * from the give space, if someone has interfered
6547 * we pick up where we left off, back fills should
6548 * be all right for anyone except map_delete and
6549 * we have to assume that the task has been fully
6550 * disabled before we get here
6551 */
6552 if (!vm_map_lookup_entry(map, s, &entry)){
6553 entry = entry->vme_next;
6554 s = entry->vme_start;
6555 } else {
6556 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6557 }
6558 /*
6559 * others can not only allocate behind us, we can
6560 * also see coalesce while we don't have the map lock
6561 */
6562 if(entry == vm_map_to_entry(map)) {
6563 break;
6564 }
6565 }
6566 last_timestamp = map->timestamp;
6567 }
6568
6569 if (map->wait_for_space)
6570 thread_wakeup((event_t) map);
6571 /*
6572 * wake up anybody waiting on entries that we have already deleted.
6573 */
6574 if (need_wakeup)
6575 vm_map_entry_wakeup(map);
6576
6577 return KERN_SUCCESS;
6578 }
6579
6580 /*
6581 * vm_map_remove:
6582 *
6583 * Remove the given address range from the target map.
6584 * This is the exported form of vm_map_delete.
6585 */
6586 kern_return_t
6587 vm_map_remove(
6588 register vm_map_t map,
6589 register vm_map_offset_t start,
6590 register vm_map_offset_t end,
6591 register boolean_t flags)
6592 {
6593 register kern_return_t result;
6594
6595 vm_map_lock(map);
6596 VM_MAP_RANGE_CHECK(map, start, end);
6597 /*
6598 * For the zone_map, the kernel controls the allocation/freeing of memory.
6599 * Any free to the zone_map should be within the bounds of the map and
6600 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6601 * free to the zone_map into a no-op, there is a problem and we should
6602 * panic.
6603 */
6604 if ((map == zone_map) && (start == end))
6605 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6606 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6607 vm_map_unlock(map);
6608
6609 return(result);
6610 }
6611
6612
6613 /*
6614 * Routine: vm_map_copy_discard
6615 *
6616 * Description:
6617 * Dispose of a map copy object (returned by
6618 * vm_map_copyin).
6619 */
6620 void
6621 vm_map_copy_discard(
6622 vm_map_copy_t copy)
6623 {
6624 if (copy == VM_MAP_COPY_NULL)
6625 return;
6626
6627 switch (copy->type) {
6628 case VM_MAP_COPY_ENTRY_LIST:
6629 while (vm_map_copy_first_entry(copy) !=
6630 vm_map_copy_to_entry(copy)) {
6631 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6632
6633 vm_map_copy_entry_unlink(copy, entry);
6634 if (entry->is_sub_map) {
6635 vm_map_deallocate(VME_SUBMAP(entry));
6636 } else {
6637 vm_object_deallocate(VME_OBJECT(entry));
6638 }
6639 vm_map_copy_entry_dispose(copy, entry);
6640 }
6641 break;
6642 case VM_MAP_COPY_OBJECT:
6643 vm_object_deallocate(copy->cpy_object);
6644 break;
6645 case VM_MAP_COPY_KERNEL_BUFFER:
6646
6647 /*
6648 * The vm_map_copy_t and possibly the data buffer were
6649 * allocated by a single call to kalloc(), i.e. the
6650 * vm_map_copy_t was not allocated out of the zone.
6651 */
6652 if (copy->size > msg_ool_size_small || copy->offset)
6653 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6654 (long long)copy->size, (long long)copy->offset);
6655 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6656 return;
6657 }
6658 zfree(vm_map_copy_zone, copy);
6659 }
6660
6661 /*
6662 * Routine: vm_map_copy_copy
6663 *
6664 * Description:
6665 * Move the information in a map copy object to
6666 * a new map copy object, leaving the old one
6667 * empty.
6668 *
6669 * This is used by kernel routines that need
6670 * to look at out-of-line data (in copyin form)
6671 * before deciding whether to return SUCCESS.
6672 * If the routine returns FAILURE, the original
6673 * copy object will be deallocated; therefore,
6674 * these routines must make a copy of the copy
6675 * object and leave the original empty so that
6676 * deallocation will not fail.
6677 */
6678 vm_map_copy_t
6679 vm_map_copy_copy(
6680 vm_map_copy_t copy)
6681 {
6682 vm_map_copy_t new_copy;
6683
6684 if (copy == VM_MAP_COPY_NULL)
6685 return VM_MAP_COPY_NULL;
6686
6687 /*
6688 * Allocate a new copy object, and copy the information
6689 * from the old one into it.
6690 */
6691
6692 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6693 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6694 *new_copy = *copy;
6695
6696 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6697 /*
6698 * The links in the entry chain must be
6699 * changed to point to the new copy object.
6700 */
6701 vm_map_copy_first_entry(copy)->vme_prev
6702 = vm_map_copy_to_entry(new_copy);
6703 vm_map_copy_last_entry(copy)->vme_next
6704 = vm_map_copy_to_entry(new_copy);
6705 }
6706
6707 /*
6708 * Change the old copy object into one that contains
6709 * nothing to be deallocated.
6710 */
6711 copy->type = VM_MAP_COPY_OBJECT;
6712 copy->cpy_object = VM_OBJECT_NULL;
6713
6714 /*
6715 * Return the new object.
6716 */
6717 return new_copy;
6718 }
6719
6720 static kern_return_t
6721 vm_map_overwrite_submap_recurse(
6722 vm_map_t dst_map,
6723 vm_map_offset_t dst_addr,
6724 vm_map_size_t dst_size)
6725 {
6726 vm_map_offset_t dst_end;
6727 vm_map_entry_t tmp_entry;
6728 vm_map_entry_t entry;
6729 kern_return_t result;
6730 boolean_t encountered_sub_map = FALSE;
6731
6732
6733
6734 /*
6735 * Verify that the destination is all writeable
6736 * initially. We have to trunc the destination
6737 * address and round the copy size or we'll end up
6738 * splitting entries in strange ways.
6739 */
6740
6741 dst_end = vm_map_round_page(dst_addr + dst_size,
6742 VM_MAP_PAGE_MASK(dst_map));
6743 vm_map_lock(dst_map);
6744
6745 start_pass_1:
6746 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6747 vm_map_unlock(dst_map);
6748 return(KERN_INVALID_ADDRESS);
6749 }
6750
6751 vm_map_clip_start(dst_map,
6752 tmp_entry,
6753 vm_map_trunc_page(dst_addr,
6754 VM_MAP_PAGE_MASK(dst_map)));
6755 if (tmp_entry->is_sub_map) {
6756 /* clipping did unnest if needed */
6757 assert(!tmp_entry->use_pmap);
6758 }
6759
6760 for (entry = tmp_entry;;) {
6761 vm_map_entry_t next;
6762
6763 next = entry->vme_next;
6764 while(entry->is_sub_map) {
6765 vm_map_offset_t sub_start;
6766 vm_map_offset_t sub_end;
6767 vm_map_offset_t local_end;
6768
6769 if (entry->in_transition) {
6770 /*
6771 * Say that we are waiting, and wait for entry.
6772 */
6773 entry->needs_wakeup = TRUE;
6774 vm_map_entry_wait(dst_map, THREAD_UNINT);
6775
6776 goto start_pass_1;
6777 }
6778
6779 encountered_sub_map = TRUE;
6780 sub_start = VME_OFFSET(entry);
6781
6782 if(entry->vme_end < dst_end)
6783 sub_end = entry->vme_end;
6784 else
6785 sub_end = dst_end;
6786 sub_end -= entry->vme_start;
6787 sub_end += VME_OFFSET(entry);
6788 local_end = entry->vme_end;
6789 vm_map_unlock(dst_map);
6790
6791 result = vm_map_overwrite_submap_recurse(
6792 VME_SUBMAP(entry),
6793 sub_start,
6794 sub_end - sub_start);
6795
6796 if(result != KERN_SUCCESS)
6797 return result;
6798 if (dst_end <= entry->vme_end)
6799 return KERN_SUCCESS;
6800 vm_map_lock(dst_map);
6801 if(!vm_map_lookup_entry(dst_map, local_end,
6802 &tmp_entry)) {
6803 vm_map_unlock(dst_map);
6804 return(KERN_INVALID_ADDRESS);
6805 }
6806 entry = tmp_entry;
6807 next = entry->vme_next;
6808 }
6809
6810 if ( ! (entry->protection & VM_PROT_WRITE)) {
6811 vm_map_unlock(dst_map);
6812 return(KERN_PROTECTION_FAILURE);
6813 }
6814
6815 /*
6816 * If the entry is in transition, we must wait
6817 * for it to exit that state. Anything could happen
6818 * when we unlock the map, so start over.
6819 */
6820 if (entry->in_transition) {
6821
6822 /*
6823 * Say that we are waiting, and wait for entry.
6824 */
6825 entry->needs_wakeup = TRUE;
6826 vm_map_entry_wait(dst_map, THREAD_UNINT);
6827
6828 goto start_pass_1;
6829 }
6830
6831 /*
6832 * our range is contained completely within this map entry
6833 */
6834 if (dst_end <= entry->vme_end) {
6835 vm_map_unlock(dst_map);
6836 return KERN_SUCCESS;
6837 }
6838 /*
6839 * check that range specified is contiguous region
6840 */
6841 if ((next == vm_map_to_entry(dst_map)) ||
6842 (next->vme_start != entry->vme_end)) {
6843 vm_map_unlock(dst_map);
6844 return(KERN_INVALID_ADDRESS);
6845 }
6846
6847 /*
6848 * Check for permanent objects in the destination.
6849 */
6850 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6851 ((!VME_OBJECT(entry)->internal) ||
6852 (VME_OBJECT(entry)->true_share))) {
6853 if(encountered_sub_map) {
6854 vm_map_unlock(dst_map);
6855 return(KERN_FAILURE);
6856 }
6857 }
6858
6859
6860 entry = next;
6861 }/* for */
6862 vm_map_unlock(dst_map);
6863 return(KERN_SUCCESS);
6864 }
6865
6866 /*
6867 * Routine: vm_map_copy_overwrite
6868 *
6869 * Description:
6870 * Copy the memory described by the map copy
6871 * object (copy; returned by vm_map_copyin) onto
6872 * the specified destination region (dst_map, dst_addr).
6873 * The destination must be writeable.
6874 *
6875 * Unlike vm_map_copyout, this routine actually
6876 * writes over previously-mapped memory. If the
6877 * previous mapping was to a permanent (user-supplied)
6878 * memory object, it is preserved.
6879 *
6880 * The attributes (protection and inheritance) of the
6881 * destination region are preserved.
6882 *
6883 * If successful, consumes the copy object.
6884 * Otherwise, the caller is responsible for it.
6885 *
6886 * Implementation notes:
6887 * To overwrite aligned temporary virtual memory, it is
6888 * sufficient to remove the previous mapping and insert
6889 * the new copy. This replacement is done either on
6890 * the whole region (if no permanent virtual memory
6891 * objects are embedded in the destination region) or
6892 * in individual map entries.
6893 *
6894 * To overwrite permanent virtual memory , it is necessary
6895 * to copy each page, as the external memory management
6896 * interface currently does not provide any optimizations.
6897 *
6898 * Unaligned memory also has to be copied. It is possible
6899 * to use 'vm_trickery' to copy the aligned data. This is
6900 * not done but not hard to implement.
6901 *
6902 * Once a page of permanent memory has been overwritten,
6903 * it is impossible to interrupt this function; otherwise,
6904 * the call would be neither atomic nor location-independent.
6905 * The kernel-state portion of a user thread must be
6906 * interruptible.
6907 *
6908 * It may be expensive to forward all requests that might
6909 * overwrite permanent memory (vm_write, vm_copy) to
6910 * uninterruptible kernel threads. This routine may be
6911 * called by interruptible threads; however, success is
6912 * not guaranteed -- if the request cannot be performed
6913 * atomically and interruptibly, an error indication is
6914 * returned.
6915 */
6916
6917 static kern_return_t
6918 vm_map_copy_overwrite_nested(
6919 vm_map_t dst_map,
6920 vm_map_address_t dst_addr,
6921 vm_map_copy_t copy,
6922 boolean_t interruptible,
6923 pmap_t pmap,
6924 boolean_t discard_on_success)
6925 {
6926 vm_map_offset_t dst_end;
6927 vm_map_entry_t tmp_entry;
6928 vm_map_entry_t entry;
6929 kern_return_t kr;
6930 boolean_t aligned = TRUE;
6931 boolean_t contains_permanent_objects = FALSE;
6932 boolean_t encountered_sub_map = FALSE;
6933 vm_map_offset_t base_addr;
6934 vm_map_size_t copy_size;
6935 vm_map_size_t total_size;
6936
6937
6938 /*
6939 * Check for null copy object.
6940 */
6941
6942 if (copy == VM_MAP_COPY_NULL)
6943 return(KERN_SUCCESS);
6944
6945 /*
6946 * Check for special kernel buffer allocated
6947 * by new_ipc_kmsg_copyin.
6948 */
6949
6950 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6951 return(vm_map_copyout_kernel_buffer(
6952 dst_map, &dst_addr,
6953 copy, TRUE, discard_on_success));
6954 }
6955
6956 /*
6957 * Only works for entry lists at the moment. Will
6958 * support page lists later.
6959 */
6960
6961 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6962
6963 if (copy->size == 0) {
6964 if (discard_on_success)
6965 vm_map_copy_discard(copy);
6966 return(KERN_SUCCESS);
6967 }
6968
6969 /*
6970 * Verify that the destination is all writeable
6971 * initially. We have to trunc the destination
6972 * address and round the copy size or we'll end up
6973 * splitting entries in strange ways.
6974 */
6975
6976 if (!VM_MAP_PAGE_ALIGNED(copy->size,
6977 VM_MAP_PAGE_MASK(dst_map)) ||
6978 !VM_MAP_PAGE_ALIGNED(copy->offset,
6979 VM_MAP_PAGE_MASK(dst_map)) ||
6980 !VM_MAP_PAGE_ALIGNED(dst_addr,
6981 VM_MAP_PAGE_MASK(dst_map)))
6982 {
6983 aligned = FALSE;
6984 dst_end = vm_map_round_page(dst_addr + copy->size,
6985 VM_MAP_PAGE_MASK(dst_map));
6986 } else {
6987 dst_end = dst_addr + copy->size;
6988 }
6989
6990 vm_map_lock(dst_map);
6991
6992 /* LP64todo - remove this check when vm_map_commpage64()
6993 * no longer has to stuff in a map_entry for the commpage
6994 * above the map's max_offset.
6995 */
6996 if (dst_addr >= dst_map->max_offset) {
6997 vm_map_unlock(dst_map);
6998 return(KERN_INVALID_ADDRESS);
6999 }
7000
7001 start_pass_1:
7002 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7003 vm_map_unlock(dst_map);
7004 return(KERN_INVALID_ADDRESS);
7005 }
7006 vm_map_clip_start(dst_map,
7007 tmp_entry,
7008 vm_map_trunc_page(dst_addr,
7009 VM_MAP_PAGE_MASK(dst_map)));
7010 for (entry = tmp_entry;;) {
7011 vm_map_entry_t next = entry->vme_next;
7012
7013 while(entry->is_sub_map) {
7014 vm_map_offset_t sub_start;
7015 vm_map_offset_t sub_end;
7016 vm_map_offset_t local_end;
7017
7018 if (entry->in_transition) {
7019
7020 /*
7021 * Say that we are waiting, and wait for entry.
7022 */
7023 entry->needs_wakeup = TRUE;
7024 vm_map_entry_wait(dst_map, THREAD_UNINT);
7025
7026 goto start_pass_1;
7027 }
7028
7029 local_end = entry->vme_end;
7030 if (!(entry->needs_copy)) {
7031 /* if needs_copy we are a COW submap */
7032 /* in such a case we just replace so */
7033 /* there is no need for the follow- */
7034 /* ing check. */
7035 encountered_sub_map = TRUE;
7036 sub_start = VME_OFFSET(entry);
7037
7038 if(entry->vme_end < dst_end)
7039 sub_end = entry->vme_end;
7040 else
7041 sub_end = dst_end;
7042 sub_end -= entry->vme_start;
7043 sub_end += VME_OFFSET(entry);
7044 vm_map_unlock(dst_map);
7045
7046 kr = vm_map_overwrite_submap_recurse(
7047 VME_SUBMAP(entry),
7048 sub_start,
7049 sub_end - sub_start);
7050 if(kr != KERN_SUCCESS)
7051 return kr;
7052 vm_map_lock(dst_map);
7053 }
7054
7055 if (dst_end <= entry->vme_end)
7056 goto start_overwrite;
7057 if(!vm_map_lookup_entry(dst_map, local_end,
7058 &entry)) {
7059 vm_map_unlock(dst_map);
7060 return(KERN_INVALID_ADDRESS);
7061 }
7062 next = entry->vme_next;
7063 }
7064
7065 if ( ! (entry->protection & VM_PROT_WRITE)) {
7066 vm_map_unlock(dst_map);
7067 return(KERN_PROTECTION_FAILURE);
7068 }
7069
7070 /*
7071 * If the entry is in transition, we must wait
7072 * for it to exit that state. Anything could happen
7073 * when we unlock the map, so start over.
7074 */
7075 if (entry->in_transition) {
7076
7077 /*
7078 * Say that we are waiting, and wait for entry.
7079 */
7080 entry->needs_wakeup = TRUE;
7081 vm_map_entry_wait(dst_map, THREAD_UNINT);
7082
7083 goto start_pass_1;
7084 }
7085
7086 /*
7087 * our range is contained completely within this map entry
7088 */
7089 if (dst_end <= entry->vme_end)
7090 break;
7091 /*
7092 * check that range specified is contiguous region
7093 */
7094 if ((next == vm_map_to_entry(dst_map)) ||
7095 (next->vme_start != entry->vme_end)) {
7096 vm_map_unlock(dst_map);
7097 return(KERN_INVALID_ADDRESS);
7098 }
7099
7100
7101 /*
7102 * Check for permanent objects in the destination.
7103 */
7104 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7105 ((!VME_OBJECT(entry)->internal) ||
7106 (VME_OBJECT(entry)->true_share))) {
7107 contains_permanent_objects = TRUE;
7108 }
7109
7110 entry = next;
7111 }/* for */
7112
7113 start_overwrite:
7114 /*
7115 * If there are permanent objects in the destination, then
7116 * the copy cannot be interrupted.
7117 */
7118
7119 if (interruptible && contains_permanent_objects) {
7120 vm_map_unlock(dst_map);
7121 return(KERN_FAILURE); /* XXX */
7122 }
7123
7124 /*
7125 *
7126 * Make a second pass, overwriting the data
7127 * At the beginning of each loop iteration,
7128 * the next entry to be overwritten is "tmp_entry"
7129 * (initially, the value returned from the lookup above),
7130 * and the starting address expected in that entry
7131 * is "start".
7132 */
7133
7134 total_size = copy->size;
7135 if(encountered_sub_map) {
7136 copy_size = 0;
7137 /* re-calculate tmp_entry since we've had the map */
7138 /* unlocked */
7139 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7140 vm_map_unlock(dst_map);
7141 return(KERN_INVALID_ADDRESS);
7142 }
7143 } else {
7144 copy_size = copy->size;
7145 }
7146
7147 base_addr = dst_addr;
7148 while(TRUE) {
7149 /* deconstruct the copy object and do in parts */
7150 /* only in sub_map, interruptable case */
7151 vm_map_entry_t copy_entry;
7152 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7153 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
7154 int nentries;
7155 int remaining_entries = 0;
7156 vm_map_offset_t new_offset = 0;
7157
7158 for (entry = tmp_entry; copy_size == 0;) {
7159 vm_map_entry_t next;
7160
7161 next = entry->vme_next;
7162
7163 /* tmp_entry and base address are moved along */
7164 /* each time we encounter a sub-map. Otherwise */
7165 /* entry can outpase tmp_entry, and the copy_size */
7166 /* may reflect the distance between them */
7167 /* if the current entry is found to be in transition */
7168 /* we will start over at the beginning or the last */
7169 /* encounter of a submap as dictated by base_addr */
7170 /* we will zero copy_size accordingly. */
7171 if (entry->in_transition) {
7172 /*
7173 * Say that we are waiting, and wait for entry.
7174 */
7175 entry->needs_wakeup = TRUE;
7176 vm_map_entry_wait(dst_map, THREAD_UNINT);
7177
7178 if(!vm_map_lookup_entry(dst_map, base_addr,
7179 &tmp_entry)) {
7180 vm_map_unlock(dst_map);
7181 return(KERN_INVALID_ADDRESS);
7182 }
7183 copy_size = 0;
7184 entry = tmp_entry;
7185 continue;
7186 }
7187 if(entry->is_sub_map) {
7188 vm_map_offset_t sub_start;
7189 vm_map_offset_t sub_end;
7190 vm_map_offset_t local_end;
7191
7192 if (entry->needs_copy) {
7193 /* if this is a COW submap */
7194 /* just back the range with a */
7195 /* anonymous entry */
7196 if(entry->vme_end < dst_end)
7197 sub_end = entry->vme_end;
7198 else
7199 sub_end = dst_end;
7200 if(entry->vme_start < base_addr)
7201 sub_start = base_addr;
7202 else
7203 sub_start = entry->vme_start;
7204 vm_map_clip_end(
7205 dst_map, entry, sub_end);
7206 vm_map_clip_start(
7207 dst_map, entry, sub_start);
7208 assert(!entry->use_pmap);
7209 entry->is_sub_map = FALSE;
7210 vm_map_deallocate(
7211 VME_SUBMAP(entry));
7212 VME_SUBMAP_SET(entry, NULL);
7213 entry->is_shared = FALSE;
7214 entry->needs_copy = FALSE;
7215 VME_OFFSET_SET(entry, 0);
7216 /*
7217 * XXX FBDP
7218 * We should propagate the protections
7219 * of the submap entry here instead
7220 * of forcing them to VM_PROT_ALL...
7221 * Or better yet, we should inherit
7222 * the protection of the copy_entry.
7223 */
7224 entry->protection = VM_PROT_ALL;
7225 entry->max_protection = VM_PROT_ALL;
7226 entry->wired_count = 0;
7227 entry->user_wired_count = 0;
7228 if(entry->inheritance
7229 == VM_INHERIT_SHARE)
7230 entry->inheritance = VM_INHERIT_COPY;
7231 continue;
7232 }
7233 /* first take care of any non-sub_map */
7234 /* entries to send */
7235 if(base_addr < entry->vme_start) {
7236 /* stuff to send */
7237 copy_size =
7238 entry->vme_start - base_addr;
7239 break;
7240 }
7241 sub_start = VME_OFFSET(entry);
7242
7243 if(entry->vme_end < dst_end)
7244 sub_end = entry->vme_end;
7245 else
7246 sub_end = dst_end;
7247 sub_end -= entry->vme_start;
7248 sub_end += VME_OFFSET(entry);
7249 local_end = entry->vme_end;
7250 vm_map_unlock(dst_map);
7251 copy_size = sub_end - sub_start;
7252
7253 /* adjust the copy object */
7254 if (total_size > copy_size) {
7255 vm_map_size_t local_size = 0;
7256 vm_map_size_t entry_size;
7257
7258 nentries = 1;
7259 new_offset = copy->offset;
7260 copy_entry = vm_map_copy_first_entry(copy);
7261 while(copy_entry !=
7262 vm_map_copy_to_entry(copy)){
7263 entry_size = copy_entry->vme_end -
7264 copy_entry->vme_start;
7265 if((local_size < copy_size) &&
7266 ((local_size + entry_size)
7267 >= copy_size)) {
7268 vm_map_copy_clip_end(copy,
7269 copy_entry,
7270 copy_entry->vme_start +
7271 (copy_size - local_size));
7272 entry_size = copy_entry->vme_end -
7273 copy_entry->vme_start;
7274 local_size += entry_size;
7275 new_offset += entry_size;
7276 }
7277 if(local_size >= copy_size) {
7278 next_copy = copy_entry->vme_next;
7279 copy_entry->vme_next =
7280 vm_map_copy_to_entry(copy);
7281 previous_prev =
7282 copy->cpy_hdr.links.prev;
7283 copy->cpy_hdr.links.prev = copy_entry;
7284 copy->size = copy_size;
7285 remaining_entries =
7286 copy->cpy_hdr.nentries;
7287 remaining_entries -= nentries;
7288 copy->cpy_hdr.nentries = nentries;
7289 break;
7290 } else {
7291 local_size += entry_size;
7292 new_offset += entry_size;
7293 nentries++;
7294 }
7295 copy_entry = copy_entry->vme_next;
7296 }
7297 }
7298
7299 if((entry->use_pmap) && (pmap == NULL)) {
7300 kr = vm_map_copy_overwrite_nested(
7301 VME_SUBMAP(entry),
7302 sub_start,
7303 copy,
7304 interruptible,
7305 VME_SUBMAP(entry)->pmap,
7306 TRUE);
7307 } else if (pmap != NULL) {
7308 kr = vm_map_copy_overwrite_nested(
7309 VME_SUBMAP(entry),
7310 sub_start,
7311 copy,
7312 interruptible, pmap,
7313 TRUE);
7314 } else {
7315 kr = vm_map_copy_overwrite_nested(
7316 VME_SUBMAP(entry),
7317 sub_start,
7318 copy,
7319 interruptible,
7320 dst_map->pmap,
7321 TRUE);
7322 }
7323 if(kr != KERN_SUCCESS) {
7324 if(next_copy != NULL) {
7325 copy->cpy_hdr.nentries +=
7326 remaining_entries;
7327 copy->cpy_hdr.links.prev->vme_next =
7328 next_copy;
7329 copy->cpy_hdr.links.prev
7330 = previous_prev;
7331 copy->size = total_size;
7332 }
7333 return kr;
7334 }
7335 if (dst_end <= local_end) {
7336 return(KERN_SUCCESS);
7337 }
7338 /* otherwise copy no longer exists, it was */
7339 /* destroyed after successful copy_overwrite */
7340 copy = (vm_map_copy_t)
7341 zalloc(vm_map_copy_zone);
7342 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7343 vm_map_copy_first_entry(copy) =
7344 vm_map_copy_last_entry(copy) =
7345 vm_map_copy_to_entry(copy);
7346 copy->type = VM_MAP_COPY_ENTRY_LIST;
7347 copy->offset = new_offset;
7348
7349 /*
7350 * XXX FBDP
7351 * this does not seem to deal with
7352 * the VM map store (R&B tree)
7353 */
7354
7355 total_size -= copy_size;
7356 copy_size = 0;
7357 /* put back remainder of copy in container */
7358 if(next_copy != NULL) {
7359 copy->cpy_hdr.nentries = remaining_entries;
7360 copy->cpy_hdr.links.next = next_copy;
7361 copy->cpy_hdr.links.prev = previous_prev;
7362 copy->size = total_size;
7363 next_copy->vme_prev =
7364 vm_map_copy_to_entry(copy);
7365 next_copy = NULL;
7366 }
7367 base_addr = local_end;
7368 vm_map_lock(dst_map);
7369 if(!vm_map_lookup_entry(dst_map,
7370 local_end, &tmp_entry)) {
7371 vm_map_unlock(dst_map);
7372 return(KERN_INVALID_ADDRESS);
7373 }
7374 entry = tmp_entry;
7375 continue;
7376 }
7377 if (dst_end <= entry->vme_end) {
7378 copy_size = dst_end - base_addr;
7379 break;
7380 }
7381
7382 if ((next == vm_map_to_entry(dst_map)) ||
7383 (next->vme_start != entry->vme_end)) {
7384 vm_map_unlock(dst_map);
7385 return(KERN_INVALID_ADDRESS);
7386 }
7387
7388 entry = next;
7389 }/* for */
7390
7391 next_copy = NULL;
7392 nentries = 1;
7393
7394 /* adjust the copy object */
7395 if (total_size > copy_size) {
7396 vm_map_size_t local_size = 0;
7397 vm_map_size_t entry_size;
7398
7399 new_offset = copy->offset;
7400 copy_entry = vm_map_copy_first_entry(copy);
7401 while(copy_entry != vm_map_copy_to_entry(copy)) {
7402 entry_size = copy_entry->vme_end -
7403 copy_entry->vme_start;
7404 if((local_size < copy_size) &&
7405 ((local_size + entry_size)
7406 >= copy_size)) {
7407 vm_map_copy_clip_end(copy, copy_entry,
7408 copy_entry->vme_start +
7409 (copy_size - local_size));
7410 entry_size = copy_entry->vme_end -
7411 copy_entry->vme_start;
7412 local_size += entry_size;
7413 new_offset += entry_size;
7414 }
7415 if(local_size >= copy_size) {
7416 next_copy = copy_entry->vme_next;
7417 copy_entry->vme_next =
7418 vm_map_copy_to_entry(copy);
7419 previous_prev =
7420 copy->cpy_hdr.links.prev;
7421 copy->cpy_hdr.links.prev = copy_entry;
7422 copy->size = copy_size;
7423 remaining_entries =
7424 copy->cpy_hdr.nentries;
7425 remaining_entries -= nentries;
7426 copy->cpy_hdr.nentries = nentries;
7427 break;
7428 } else {
7429 local_size += entry_size;
7430 new_offset += entry_size;
7431 nentries++;
7432 }
7433 copy_entry = copy_entry->vme_next;
7434 }
7435 }
7436
7437 if (aligned) {
7438 pmap_t local_pmap;
7439
7440 if(pmap)
7441 local_pmap = pmap;
7442 else
7443 local_pmap = dst_map->pmap;
7444
7445 if ((kr = vm_map_copy_overwrite_aligned(
7446 dst_map, tmp_entry, copy,
7447 base_addr, local_pmap)) != KERN_SUCCESS) {
7448 if(next_copy != NULL) {
7449 copy->cpy_hdr.nentries +=
7450 remaining_entries;
7451 copy->cpy_hdr.links.prev->vme_next =
7452 next_copy;
7453 copy->cpy_hdr.links.prev =
7454 previous_prev;
7455 copy->size += copy_size;
7456 }
7457 return kr;
7458 }
7459 vm_map_unlock(dst_map);
7460 } else {
7461 /*
7462 * Performance gain:
7463 *
7464 * if the copy and dst address are misaligned but the same
7465 * offset within the page we can copy_not_aligned the
7466 * misaligned parts and copy aligned the rest. If they are
7467 * aligned but len is unaligned we simply need to copy
7468 * the end bit unaligned. We'll need to split the misaligned
7469 * bits of the region in this case !
7470 */
7471 /* ALWAYS UNLOCKS THE dst_map MAP */
7472 kr = vm_map_copy_overwrite_unaligned(
7473 dst_map,
7474 tmp_entry,
7475 copy,
7476 base_addr,
7477 discard_on_success);
7478 if (kr != KERN_SUCCESS) {
7479 if(next_copy != NULL) {
7480 copy->cpy_hdr.nentries +=
7481 remaining_entries;
7482 copy->cpy_hdr.links.prev->vme_next =
7483 next_copy;
7484 copy->cpy_hdr.links.prev =
7485 previous_prev;
7486 copy->size += copy_size;
7487 }
7488 return kr;
7489 }
7490 }
7491 total_size -= copy_size;
7492 if(total_size == 0)
7493 break;
7494 base_addr += copy_size;
7495 copy_size = 0;
7496 copy->offset = new_offset;
7497 if(next_copy != NULL) {
7498 copy->cpy_hdr.nentries = remaining_entries;
7499 copy->cpy_hdr.links.next = next_copy;
7500 copy->cpy_hdr.links.prev = previous_prev;
7501 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7502 copy->size = total_size;
7503 }
7504 vm_map_lock(dst_map);
7505 while(TRUE) {
7506 if (!vm_map_lookup_entry(dst_map,
7507 base_addr, &tmp_entry)) {
7508 vm_map_unlock(dst_map);
7509 return(KERN_INVALID_ADDRESS);
7510 }
7511 if (tmp_entry->in_transition) {
7512 entry->needs_wakeup = TRUE;
7513 vm_map_entry_wait(dst_map, THREAD_UNINT);
7514 } else {
7515 break;
7516 }
7517 }
7518 vm_map_clip_start(dst_map,
7519 tmp_entry,
7520 vm_map_trunc_page(base_addr,
7521 VM_MAP_PAGE_MASK(dst_map)));
7522
7523 entry = tmp_entry;
7524 } /* while */
7525
7526 /*
7527 * Throw away the vm_map_copy object
7528 */
7529 if (discard_on_success)
7530 vm_map_copy_discard(copy);
7531
7532 return(KERN_SUCCESS);
7533 }/* vm_map_copy_overwrite */
7534
7535 kern_return_t
7536 vm_map_copy_overwrite(
7537 vm_map_t dst_map,
7538 vm_map_offset_t dst_addr,
7539 vm_map_copy_t copy,
7540 boolean_t interruptible)
7541 {
7542 vm_map_size_t head_size, tail_size;
7543 vm_map_copy_t head_copy, tail_copy;
7544 vm_map_offset_t head_addr, tail_addr;
7545 vm_map_entry_t entry;
7546 kern_return_t kr;
7547
7548 head_size = 0;
7549 tail_size = 0;
7550 head_copy = NULL;
7551 tail_copy = NULL;
7552 head_addr = 0;
7553 tail_addr = 0;
7554
7555 if (interruptible ||
7556 copy == VM_MAP_COPY_NULL ||
7557 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7558 /*
7559 * We can't split the "copy" map if we're interruptible
7560 * or if we don't have a "copy" map...
7561 */
7562 blunt_copy:
7563 return vm_map_copy_overwrite_nested(dst_map,
7564 dst_addr,
7565 copy,
7566 interruptible,
7567 (pmap_t) NULL,
7568 TRUE);
7569 }
7570
7571 if (copy->size < 3 * PAGE_SIZE) {
7572 /*
7573 * Too small to bother with optimizing...
7574 */
7575 goto blunt_copy;
7576 }
7577
7578 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7579 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7580 /*
7581 * Incompatible mis-alignment of source and destination...
7582 */
7583 goto blunt_copy;
7584 }
7585
7586 /*
7587 * Proper alignment or identical mis-alignment at the beginning.
7588 * Let's try and do a small unaligned copy first (if needed)
7589 * and then an aligned copy for the rest.
7590 */
7591 if (!page_aligned(dst_addr)) {
7592 head_addr = dst_addr;
7593 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7594 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7595 }
7596 if (!page_aligned(copy->offset + copy->size)) {
7597 /*
7598 * Mis-alignment at the end.
7599 * Do an aligned copy up to the last page and
7600 * then an unaligned copy for the remaining bytes.
7601 */
7602 tail_size = ((copy->offset + copy->size) &
7603 VM_MAP_PAGE_MASK(dst_map));
7604 tail_addr = dst_addr + copy->size - tail_size;
7605 }
7606
7607 if (head_size + tail_size == copy->size) {
7608 /*
7609 * It's all unaligned, no optimization possible...
7610 */
7611 goto blunt_copy;
7612 }
7613
7614 /*
7615 * Can't optimize if there are any submaps in the
7616 * destination due to the way we free the "copy" map
7617 * progressively in vm_map_copy_overwrite_nested()
7618 * in that case.
7619 */
7620 vm_map_lock_read(dst_map);
7621 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7622 vm_map_unlock_read(dst_map);
7623 goto blunt_copy;
7624 }
7625 for (;
7626 (entry != vm_map_copy_to_entry(copy) &&
7627 entry->vme_start < dst_addr + copy->size);
7628 entry = entry->vme_next) {
7629 if (entry->is_sub_map) {
7630 vm_map_unlock_read(dst_map);
7631 goto blunt_copy;
7632 }
7633 }
7634 vm_map_unlock_read(dst_map);
7635
7636 if (head_size) {
7637 /*
7638 * Unaligned copy of the first "head_size" bytes, to reach
7639 * a page boundary.
7640 */
7641
7642 /*
7643 * Extract "head_copy" out of "copy".
7644 */
7645 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7646 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7647 vm_map_copy_first_entry(head_copy) =
7648 vm_map_copy_to_entry(head_copy);
7649 vm_map_copy_last_entry(head_copy) =
7650 vm_map_copy_to_entry(head_copy);
7651 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7652 head_copy->cpy_hdr.nentries = 0;
7653 head_copy->cpy_hdr.entries_pageable =
7654 copy->cpy_hdr.entries_pageable;
7655 vm_map_store_init(&head_copy->cpy_hdr);
7656
7657 head_copy->offset = copy->offset;
7658 head_copy->size = head_size;
7659
7660 copy->offset += head_size;
7661 copy->size -= head_size;
7662
7663 entry = vm_map_copy_first_entry(copy);
7664 vm_map_copy_clip_end(copy, entry, copy->offset);
7665 vm_map_copy_entry_unlink(copy, entry);
7666 vm_map_copy_entry_link(head_copy,
7667 vm_map_copy_to_entry(head_copy),
7668 entry);
7669
7670 /*
7671 * Do the unaligned copy.
7672 */
7673 kr = vm_map_copy_overwrite_nested(dst_map,
7674 head_addr,
7675 head_copy,
7676 interruptible,
7677 (pmap_t) NULL,
7678 FALSE);
7679 if (kr != KERN_SUCCESS)
7680 goto done;
7681 }
7682
7683 if (tail_size) {
7684 /*
7685 * Extract "tail_copy" out of "copy".
7686 */
7687 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7688 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7689 vm_map_copy_first_entry(tail_copy) =
7690 vm_map_copy_to_entry(tail_copy);
7691 vm_map_copy_last_entry(tail_copy) =
7692 vm_map_copy_to_entry(tail_copy);
7693 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7694 tail_copy->cpy_hdr.nentries = 0;
7695 tail_copy->cpy_hdr.entries_pageable =
7696 copy->cpy_hdr.entries_pageable;
7697 vm_map_store_init(&tail_copy->cpy_hdr);
7698
7699 tail_copy->offset = copy->offset + copy->size - tail_size;
7700 tail_copy->size = tail_size;
7701
7702 copy->size -= tail_size;
7703
7704 entry = vm_map_copy_last_entry(copy);
7705 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7706 entry = vm_map_copy_last_entry(copy);
7707 vm_map_copy_entry_unlink(copy, entry);
7708 vm_map_copy_entry_link(tail_copy,
7709 vm_map_copy_last_entry(tail_copy),
7710 entry);
7711 }
7712
7713 /*
7714 * Copy most (or possibly all) of the data.
7715 */
7716 kr = vm_map_copy_overwrite_nested(dst_map,
7717 dst_addr + head_size,
7718 copy,
7719 interruptible,
7720 (pmap_t) NULL,
7721 FALSE);
7722 if (kr != KERN_SUCCESS) {
7723 goto done;
7724 }
7725
7726 if (tail_size) {
7727 kr = vm_map_copy_overwrite_nested(dst_map,
7728 tail_addr,
7729 tail_copy,
7730 interruptible,
7731 (pmap_t) NULL,
7732 FALSE);
7733 }
7734
7735 done:
7736 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7737 if (kr == KERN_SUCCESS) {
7738 /*
7739 * Discard all the copy maps.
7740 */
7741 if (head_copy) {
7742 vm_map_copy_discard(head_copy);
7743 head_copy = NULL;
7744 }
7745 vm_map_copy_discard(copy);
7746 if (tail_copy) {
7747 vm_map_copy_discard(tail_copy);
7748 tail_copy = NULL;
7749 }
7750 } else {
7751 /*
7752 * Re-assemble the original copy map.
7753 */
7754 if (head_copy) {
7755 entry = vm_map_copy_first_entry(head_copy);
7756 vm_map_copy_entry_unlink(head_copy, entry);
7757 vm_map_copy_entry_link(copy,
7758 vm_map_copy_to_entry(copy),
7759 entry);
7760 copy->offset -= head_size;
7761 copy->size += head_size;
7762 vm_map_copy_discard(head_copy);
7763 head_copy = NULL;
7764 }
7765 if (tail_copy) {
7766 entry = vm_map_copy_last_entry(tail_copy);
7767 vm_map_copy_entry_unlink(tail_copy, entry);
7768 vm_map_copy_entry_link(copy,
7769 vm_map_copy_last_entry(copy),
7770 entry);
7771 copy->size += tail_size;
7772 vm_map_copy_discard(tail_copy);
7773 tail_copy = NULL;
7774 }
7775 }
7776 return kr;
7777 }
7778
7779
7780 /*
7781 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
7782 *
7783 * Decription:
7784 * Physically copy unaligned data
7785 *
7786 * Implementation:
7787 * Unaligned parts of pages have to be physically copied. We use
7788 * a modified form of vm_fault_copy (which understands none-aligned
7789 * page offsets and sizes) to do the copy. We attempt to copy as
7790 * much memory in one go as possibly, however vm_fault_copy copies
7791 * within 1 memory object so we have to find the smaller of "amount left"
7792 * "source object data size" and "target object data size". With
7793 * unaligned data we don't need to split regions, therefore the source
7794 * (copy) object should be one map entry, the target range may be split
7795 * over multiple map entries however. In any event we are pessimistic
7796 * about these assumptions.
7797 *
7798 * Assumptions:
7799 * dst_map is locked on entry and is return locked on success,
7800 * unlocked on error.
7801 */
7802
7803 static kern_return_t
7804 vm_map_copy_overwrite_unaligned(
7805 vm_map_t dst_map,
7806 vm_map_entry_t entry,
7807 vm_map_copy_t copy,
7808 vm_map_offset_t start,
7809 boolean_t discard_on_success)
7810 {
7811 vm_map_entry_t copy_entry;
7812 vm_map_entry_t copy_entry_next;
7813 vm_map_version_t version;
7814 vm_object_t dst_object;
7815 vm_object_offset_t dst_offset;
7816 vm_object_offset_t src_offset;
7817 vm_object_offset_t entry_offset;
7818 vm_map_offset_t entry_end;
7819 vm_map_size_t src_size,
7820 dst_size,
7821 copy_size,
7822 amount_left;
7823 kern_return_t kr = KERN_SUCCESS;
7824
7825
7826 copy_entry = vm_map_copy_first_entry(copy);
7827
7828 vm_map_lock_write_to_read(dst_map);
7829
7830 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7831 amount_left = copy->size;
7832 /*
7833 * unaligned so we never clipped this entry, we need the offset into
7834 * the vm_object not just the data.
7835 */
7836 while (amount_left > 0) {
7837
7838 if (entry == vm_map_to_entry(dst_map)) {
7839 vm_map_unlock_read(dst_map);
7840 return KERN_INVALID_ADDRESS;
7841 }
7842
7843 /* "start" must be within the current map entry */
7844 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7845
7846 dst_offset = start - entry->vme_start;
7847
7848 dst_size = entry->vme_end - start;
7849
7850 src_size = copy_entry->vme_end -
7851 (copy_entry->vme_start + src_offset);
7852
7853 if (dst_size < src_size) {
7854 /*
7855 * we can only copy dst_size bytes before
7856 * we have to get the next destination entry
7857 */
7858 copy_size = dst_size;
7859 } else {
7860 /*
7861 * we can only copy src_size bytes before
7862 * we have to get the next source copy entry
7863 */
7864 copy_size = src_size;
7865 }
7866
7867 if (copy_size > amount_left) {
7868 copy_size = amount_left;
7869 }
7870 /*
7871 * Entry needs copy, create a shadow shadow object for
7872 * Copy on write region.
7873 */
7874 if (entry->needs_copy &&
7875 ((entry->protection & VM_PROT_WRITE) != 0))
7876 {
7877 if (vm_map_lock_read_to_write(dst_map)) {
7878 vm_map_lock_read(dst_map);
7879 goto RetryLookup;
7880 }
7881 VME_OBJECT_SHADOW(entry,
7882 (vm_map_size_t)(entry->vme_end
7883 - entry->vme_start));
7884 entry->needs_copy = FALSE;
7885 vm_map_lock_write_to_read(dst_map);
7886 }
7887 dst_object = VME_OBJECT(entry);
7888 /*
7889 * unlike with the virtual (aligned) copy we're going
7890 * to fault on it therefore we need a target object.
7891 */
7892 if (dst_object == VM_OBJECT_NULL) {
7893 if (vm_map_lock_read_to_write(dst_map)) {
7894 vm_map_lock_read(dst_map);
7895 goto RetryLookup;
7896 }
7897 dst_object = vm_object_allocate((vm_map_size_t)
7898 entry->vme_end - entry->vme_start);
7899 VME_OBJECT(entry) = dst_object;
7900 VME_OFFSET_SET(entry, 0);
7901 assert(entry->use_pmap);
7902 vm_map_lock_write_to_read(dst_map);
7903 }
7904 /*
7905 * Take an object reference and unlock map. The "entry" may
7906 * disappear or change when the map is unlocked.
7907 */
7908 vm_object_reference(dst_object);
7909 version.main_timestamp = dst_map->timestamp;
7910 entry_offset = VME_OFFSET(entry);
7911 entry_end = entry->vme_end;
7912 vm_map_unlock_read(dst_map);
7913 /*
7914 * Copy as much as possible in one pass
7915 */
7916 kr = vm_fault_copy(
7917 VME_OBJECT(copy_entry),
7918 VME_OFFSET(copy_entry) + src_offset,
7919 &copy_size,
7920 dst_object,
7921 entry_offset + dst_offset,
7922 dst_map,
7923 &version,
7924 THREAD_UNINT );
7925
7926 start += copy_size;
7927 src_offset += copy_size;
7928 amount_left -= copy_size;
7929 /*
7930 * Release the object reference
7931 */
7932 vm_object_deallocate(dst_object);
7933 /*
7934 * If a hard error occurred, return it now
7935 */
7936 if (kr != KERN_SUCCESS)
7937 return kr;
7938
7939 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7940 || amount_left == 0)
7941 {
7942 /*
7943 * all done with this copy entry, dispose.
7944 */
7945 copy_entry_next = copy_entry->vme_next;
7946
7947 if (discard_on_success) {
7948 vm_map_copy_entry_unlink(copy, copy_entry);
7949 assert(!copy_entry->is_sub_map);
7950 vm_object_deallocate(VME_OBJECT(copy_entry));
7951 vm_map_copy_entry_dispose(copy, copy_entry);
7952 }
7953
7954 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7955 amount_left) {
7956 /*
7957 * not finished copying but run out of source
7958 */
7959 return KERN_INVALID_ADDRESS;
7960 }
7961
7962 copy_entry = copy_entry_next;
7963
7964 src_offset = 0;
7965 }
7966
7967 if (amount_left == 0)
7968 return KERN_SUCCESS;
7969
7970 vm_map_lock_read(dst_map);
7971 if (version.main_timestamp == dst_map->timestamp) {
7972 if (start == entry_end) {
7973 /*
7974 * destination region is split. Use the version
7975 * information to avoid a lookup in the normal
7976 * case.
7977 */
7978 entry = entry->vme_next;
7979 /*
7980 * should be contiguous. Fail if we encounter
7981 * a hole in the destination.
7982 */
7983 if (start != entry->vme_start) {
7984 vm_map_unlock_read(dst_map);
7985 return KERN_INVALID_ADDRESS ;
7986 }
7987 }
7988 } else {
7989 /*
7990 * Map version check failed.
7991 * we must lookup the entry because somebody
7992 * might have changed the map behind our backs.
7993 */
7994 RetryLookup:
7995 if (!vm_map_lookup_entry(dst_map, start, &entry))
7996 {
7997 vm_map_unlock_read(dst_map);
7998 return KERN_INVALID_ADDRESS ;
7999 }
8000 }
8001 }/* while */
8002
8003 return KERN_SUCCESS;
8004 }/* vm_map_copy_overwrite_unaligned */
8005
8006 /*
8007 * Routine: vm_map_copy_overwrite_aligned [internal use only]
8008 *
8009 * Description:
8010 * Does all the vm_trickery possible for whole pages.
8011 *
8012 * Implementation:
8013 *
8014 * If there are no permanent objects in the destination,
8015 * and the source and destination map entry zones match,
8016 * and the destination map entry is not shared,
8017 * then the map entries can be deleted and replaced
8018 * with those from the copy. The following code is the
8019 * basic idea of what to do, but there are lots of annoying
8020 * little details about getting protection and inheritance
8021 * right. Should add protection, inheritance, and sharing checks
8022 * to the above pass and make sure that no wiring is involved.
8023 */
8024
8025 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8026 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8027 int vm_map_copy_overwrite_aligned_src_large = 0;
8028
8029 static kern_return_t
8030 vm_map_copy_overwrite_aligned(
8031 vm_map_t dst_map,
8032 vm_map_entry_t tmp_entry,
8033 vm_map_copy_t copy,
8034 vm_map_offset_t start,
8035 __unused pmap_t pmap)
8036 {
8037 vm_object_t object;
8038 vm_map_entry_t copy_entry;
8039 vm_map_size_t copy_size;
8040 vm_map_size_t size;
8041 vm_map_entry_t entry;
8042
8043 while ((copy_entry = vm_map_copy_first_entry(copy))
8044 != vm_map_copy_to_entry(copy))
8045 {
8046 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8047
8048 entry = tmp_entry;
8049 if (entry->is_sub_map) {
8050 /* unnested when clipped earlier */
8051 assert(!entry->use_pmap);
8052 }
8053 if (entry == vm_map_to_entry(dst_map)) {
8054 vm_map_unlock(dst_map);
8055 return KERN_INVALID_ADDRESS;
8056 }
8057 size = (entry->vme_end - entry->vme_start);
8058 /*
8059 * Make sure that no holes popped up in the
8060 * address map, and that the protection is
8061 * still valid, in case the map was unlocked
8062 * earlier.
8063 */
8064
8065 if ((entry->vme_start != start) || ((entry->is_sub_map)
8066 && !entry->needs_copy)) {
8067 vm_map_unlock(dst_map);
8068 return(KERN_INVALID_ADDRESS);
8069 }
8070 assert(entry != vm_map_to_entry(dst_map));
8071
8072 /*
8073 * Check protection again
8074 */
8075
8076 if ( ! (entry->protection & VM_PROT_WRITE)) {
8077 vm_map_unlock(dst_map);
8078 return(KERN_PROTECTION_FAILURE);
8079 }
8080
8081 /*
8082 * Adjust to source size first
8083 */
8084
8085 if (copy_size < size) {
8086 if (entry->map_aligned &&
8087 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8088 VM_MAP_PAGE_MASK(dst_map))) {
8089 /* no longer map-aligned */
8090 entry->map_aligned = FALSE;
8091 }
8092 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8093 size = copy_size;
8094 }
8095
8096 /*
8097 * Adjust to destination size
8098 */
8099
8100 if (size < copy_size) {
8101 vm_map_copy_clip_end(copy, copy_entry,
8102 copy_entry->vme_start + size);
8103 copy_size = size;
8104 }
8105
8106 assert((entry->vme_end - entry->vme_start) == size);
8107 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8108 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8109
8110 /*
8111 * If the destination contains temporary unshared memory,
8112 * we can perform the copy by throwing it away and
8113 * installing the source data.
8114 */
8115
8116 object = VME_OBJECT(entry);
8117 if ((!entry->is_shared &&
8118 ((object == VM_OBJECT_NULL) ||
8119 (object->internal && !object->true_share))) ||
8120 entry->needs_copy) {
8121 vm_object_t old_object = VME_OBJECT(entry);
8122 vm_object_offset_t old_offset = VME_OFFSET(entry);
8123 vm_object_offset_t offset;
8124
8125 /*
8126 * Ensure that the source and destination aren't
8127 * identical
8128 */
8129 if (old_object == VME_OBJECT(copy_entry) &&
8130 old_offset == VME_OFFSET(copy_entry)) {
8131 vm_map_copy_entry_unlink(copy, copy_entry);
8132 vm_map_copy_entry_dispose(copy, copy_entry);
8133
8134 if (old_object != VM_OBJECT_NULL)
8135 vm_object_deallocate(old_object);
8136
8137 start = tmp_entry->vme_end;
8138 tmp_entry = tmp_entry->vme_next;
8139 continue;
8140 }
8141
8142 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8143 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
8144 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8145 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8146 copy_size <= __TRADEOFF1_COPY_SIZE) {
8147 /*
8148 * Virtual vs. Physical copy tradeoff #1.
8149 *
8150 * Copying only a few pages out of a large
8151 * object: do a physical copy instead of
8152 * a virtual copy, to avoid possibly keeping
8153 * the entire large object alive because of
8154 * those few copy-on-write pages.
8155 */
8156 vm_map_copy_overwrite_aligned_src_large++;
8157 goto slow_copy;
8158 }
8159
8160 if ((dst_map->pmap != kernel_pmap) &&
8161 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8162 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8163 vm_object_t new_object, new_shadow;
8164
8165 /*
8166 * We're about to map something over a mapping
8167 * established by malloc()...
8168 */
8169 new_object = VME_OBJECT(copy_entry);
8170 if (new_object != VM_OBJECT_NULL) {
8171 vm_object_lock_shared(new_object);
8172 }
8173 while (new_object != VM_OBJECT_NULL &&
8174 !new_object->true_share &&
8175 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8176 new_object->internal) {
8177 new_shadow = new_object->shadow;
8178 if (new_shadow == VM_OBJECT_NULL) {
8179 break;
8180 }
8181 vm_object_lock_shared(new_shadow);
8182 vm_object_unlock(new_object);
8183 new_object = new_shadow;
8184 }
8185 if (new_object != VM_OBJECT_NULL) {
8186 if (!new_object->internal) {
8187 /*
8188 * The new mapping is backed
8189 * by an external object. We
8190 * don't want malloc'ed memory
8191 * to be replaced with such a
8192 * non-anonymous mapping, so
8193 * let's go off the optimized
8194 * path...
8195 */
8196 vm_map_copy_overwrite_aligned_src_not_internal++;
8197 vm_object_unlock(new_object);
8198 goto slow_copy;
8199 }
8200 if (new_object->true_share ||
8201 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8202 /*
8203 * Same if there's a "true_share"
8204 * object in the shadow chain, or
8205 * an object with a non-default
8206 * (SYMMETRIC) copy strategy.
8207 */
8208 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8209 vm_object_unlock(new_object);
8210 goto slow_copy;
8211 }
8212 vm_object_unlock(new_object);
8213 }
8214 /*
8215 * The new mapping is still backed by
8216 * anonymous (internal) memory, so it's
8217 * OK to substitute it for the original
8218 * malloc() mapping.
8219 */
8220 }
8221
8222 if (old_object != VM_OBJECT_NULL) {
8223 if(entry->is_sub_map) {
8224 if(entry->use_pmap) {
8225 #ifndef NO_NESTED_PMAP
8226 pmap_unnest(dst_map->pmap,
8227 (addr64_t)entry->vme_start,
8228 entry->vme_end - entry->vme_start);
8229 #endif /* NO_NESTED_PMAP */
8230 if(dst_map->mapped_in_other_pmaps) {
8231 /* clean up parent */
8232 /* map/maps */
8233 vm_map_submap_pmap_clean(
8234 dst_map, entry->vme_start,
8235 entry->vme_end,
8236 VME_SUBMAP(entry),
8237 VME_OFFSET(entry));
8238 }
8239 } else {
8240 vm_map_submap_pmap_clean(
8241 dst_map, entry->vme_start,
8242 entry->vme_end,
8243 VME_SUBMAP(entry),
8244 VME_OFFSET(entry));
8245 }
8246 vm_map_deallocate(VME_SUBMAP(entry));
8247 } else {
8248 if(dst_map->mapped_in_other_pmaps) {
8249 vm_object_pmap_protect_options(
8250 VME_OBJECT(entry),
8251 VME_OFFSET(entry),
8252 entry->vme_end
8253 - entry->vme_start,
8254 PMAP_NULL,
8255 entry->vme_start,
8256 VM_PROT_NONE,
8257 PMAP_OPTIONS_REMOVE);
8258 } else {
8259 pmap_remove_options(
8260 dst_map->pmap,
8261 (addr64_t)(entry->vme_start),
8262 (addr64_t)(entry->vme_end),
8263 PMAP_OPTIONS_REMOVE);
8264 }
8265 vm_object_deallocate(old_object);
8266 }
8267 }
8268
8269 entry->is_sub_map = FALSE;
8270 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8271 object = VME_OBJECT(entry);
8272 entry->needs_copy = copy_entry->needs_copy;
8273 entry->wired_count = 0;
8274 entry->user_wired_count = 0;
8275 offset = VME_OFFSET(copy_entry);
8276 VME_OFFSET_SET(entry, offset);
8277
8278 vm_map_copy_entry_unlink(copy, copy_entry);
8279 vm_map_copy_entry_dispose(copy, copy_entry);
8280
8281 /*
8282 * we could try to push pages into the pmap at this point, BUT
8283 * this optimization only saved on average 2 us per page if ALL
8284 * the pages in the source were currently mapped
8285 * and ALL the pages in the dest were touched, if there were fewer
8286 * than 2/3 of the pages touched, this optimization actually cost more cycles
8287 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8288 */
8289
8290 /*
8291 * Set up for the next iteration. The map
8292 * has not been unlocked, so the next
8293 * address should be at the end of this
8294 * entry, and the next map entry should be
8295 * the one following it.
8296 */
8297
8298 start = tmp_entry->vme_end;
8299 tmp_entry = tmp_entry->vme_next;
8300 } else {
8301 vm_map_version_t version;
8302 vm_object_t dst_object;
8303 vm_object_offset_t dst_offset;
8304 kern_return_t r;
8305
8306 slow_copy:
8307 if (entry->needs_copy) {
8308 VME_OBJECT_SHADOW(entry,
8309 (entry->vme_end -
8310 entry->vme_start));
8311 entry->needs_copy = FALSE;
8312 }
8313
8314 dst_object = VME_OBJECT(entry);
8315 dst_offset = VME_OFFSET(entry);
8316
8317 /*
8318 * Take an object reference, and record
8319 * the map version information so that the
8320 * map can be safely unlocked.
8321 */
8322
8323 if (dst_object == VM_OBJECT_NULL) {
8324 /*
8325 * We would usually have just taken the
8326 * optimized path above if the destination
8327 * object has not been allocated yet. But we
8328 * now disable that optimization if the copy
8329 * entry's object is not backed by anonymous
8330 * memory to avoid replacing malloc'ed
8331 * (i.e. re-usable) anonymous memory with a
8332 * not-so-anonymous mapping.
8333 * So we have to handle this case here and
8334 * allocate a new VM object for this map entry.
8335 */
8336 dst_object = vm_object_allocate(
8337 entry->vme_end - entry->vme_start);
8338 dst_offset = 0;
8339 VME_OBJECT_SET(entry, dst_object);
8340 VME_OFFSET_SET(entry, dst_offset);
8341 assert(entry->use_pmap);
8342
8343 }
8344
8345 vm_object_reference(dst_object);
8346
8347 /* account for unlock bumping up timestamp */
8348 version.main_timestamp = dst_map->timestamp + 1;
8349
8350 vm_map_unlock(dst_map);
8351
8352 /*
8353 * Copy as much as possible in one pass
8354 */
8355
8356 copy_size = size;
8357 r = vm_fault_copy(
8358 VME_OBJECT(copy_entry),
8359 VME_OFFSET(copy_entry),
8360 &copy_size,
8361 dst_object,
8362 dst_offset,
8363 dst_map,
8364 &version,
8365 THREAD_UNINT );
8366
8367 /*
8368 * Release the object reference
8369 */
8370
8371 vm_object_deallocate(dst_object);
8372
8373 /*
8374 * If a hard error occurred, return it now
8375 */
8376
8377 if (r != KERN_SUCCESS)
8378 return(r);
8379
8380 if (copy_size != 0) {
8381 /*
8382 * Dispose of the copied region
8383 */
8384
8385 vm_map_copy_clip_end(copy, copy_entry,
8386 copy_entry->vme_start + copy_size);
8387 vm_map_copy_entry_unlink(copy, copy_entry);
8388 vm_object_deallocate(VME_OBJECT(copy_entry));
8389 vm_map_copy_entry_dispose(copy, copy_entry);
8390 }
8391
8392 /*
8393 * Pick up in the destination map where we left off.
8394 *
8395 * Use the version information to avoid a lookup
8396 * in the normal case.
8397 */
8398
8399 start += copy_size;
8400 vm_map_lock(dst_map);
8401 if (version.main_timestamp == dst_map->timestamp &&
8402 copy_size != 0) {
8403 /* We can safely use saved tmp_entry value */
8404
8405 if (tmp_entry->map_aligned &&
8406 !VM_MAP_PAGE_ALIGNED(
8407 start,
8408 VM_MAP_PAGE_MASK(dst_map))) {
8409 /* no longer map-aligned */
8410 tmp_entry->map_aligned = FALSE;
8411 }
8412 vm_map_clip_end(dst_map, tmp_entry, start);
8413 tmp_entry = tmp_entry->vme_next;
8414 } else {
8415 /* Must do lookup of tmp_entry */
8416
8417 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8418 vm_map_unlock(dst_map);
8419 return(KERN_INVALID_ADDRESS);
8420 }
8421 if (tmp_entry->map_aligned &&
8422 !VM_MAP_PAGE_ALIGNED(
8423 start,
8424 VM_MAP_PAGE_MASK(dst_map))) {
8425 /* no longer map-aligned */
8426 tmp_entry->map_aligned = FALSE;
8427 }
8428 vm_map_clip_start(dst_map, tmp_entry, start);
8429 }
8430 }
8431 }/* while */
8432
8433 return(KERN_SUCCESS);
8434 }/* vm_map_copy_overwrite_aligned */
8435
8436 /*
8437 * Routine: vm_map_copyin_kernel_buffer [internal use only]
8438 *
8439 * Description:
8440 * Copy in data to a kernel buffer from space in the
8441 * source map. The original space may be optionally
8442 * deallocated.
8443 *
8444 * If successful, returns a new copy object.
8445 */
8446 static kern_return_t
8447 vm_map_copyin_kernel_buffer(
8448 vm_map_t src_map,
8449 vm_map_offset_t src_addr,
8450 vm_map_size_t len,
8451 boolean_t src_destroy,
8452 vm_map_copy_t *copy_result)
8453 {
8454 kern_return_t kr;
8455 vm_map_copy_t copy;
8456 vm_size_t kalloc_size;
8457
8458 if (len > msg_ool_size_small)
8459 return KERN_INVALID_ARGUMENT;
8460
8461 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8462
8463 copy = (vm_map_copy_t)kalloc(kalloc_size);
8464 if (copy == VM_MAP_COPY_NULL)
8465 return KERN_RESOURCE_SHORTAGE;
8466 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8467 copy->size = len;
8468 copy->offset = 0;
8469
8470 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8471 if (kr != KERN_SUCCESS) {
8472 kfree(copy, kalloc_size);
8473 return kr;
8474 }
8475 if (src_destroy) {
8476 (void) vm_map_remove(
8477 src_map,
8478 vm_map_trunc_page(src_addr,
8479 VM_MAP_PAGE_MASK(src_map)),
8480 vm_map_round_page(src_addr + len,
8481 VM_MAP_PAGE_MASK(src_map)),
8482 (VM_MAP_REMOVE_INTERRUPTIBLE |
8483 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8484 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
8485 }
8486 *copy_result = copy;
8487 return KERN_SUCCESS;
8488 }
8489
8490 /*
8491 * Routine: vm_map_copyout_kernel_buffer [internal use only]
8492 *
8493 * Description:
8494 * Copy out data from a kernel buffer into space in the
8495 * destination map. The space may be otpionally dynamically
8496 * allocated.
8497 *
8498 * If successful, consumes the copy object.
8499 * Otherwise, the caller is responsible for it.
8500 */
8501 static int vm_map_copyout_kernel_buffer_failures = 0;
8502 static kern_return_t
8503 vm_map_copyout_kernel_buffer(
8504 vm_map_t map,
8505 vm_map_address_t *addr, /* IN/OUT */
8506 vm_map_copy_t copy,
8507 boolean_t overwrite,
8508 boolean_t consume_on_success)
8509 {
8510 kern_return_t kr = KERN_SUCCESS;
8511 thread_t thread = current_thread();
8512
8513 /*
8514 * check for corrupted vm_map_copy structure
8515 */
8516 if (copy->size > msg_ool_size_small || copy->offset)
8517 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8518 (long long)copy->size, (long long)copy->offset);
8519
8520 if (!overwrite) {
8521
8522 /*
8523 * Allocate space in the target map for the data
8524 */
8525 *addr = 0;
8526 kr = vm_map_enter(map,
8527 addr,
8528 vm_map_round_page(copy->size,
8529 VM_MAP_PAGE_MASK(map)),
8530 (vm_map_offset_t) 0,
8531 VM_FLAGS_ANYWHERE,
8532 VM_OBJECT_NULL,
8533 (vm_object_offset_t) 0,
8534 FALSE,
8535 VM_PROT_DEFAULT,
8536 VM_PROT_ALL,
8537 VM_INHERIT_DEFAULT);
8538 if (kr != KERN_SUCCESS)
8539 return kr;
8540 }
8541
8542 /*
8543 * Copyout the data from the kernel buffer to the target map.
8544 */
8545 if (thread->map == map) {
8546
8547 /*
8548 * If the target map is the current map, just do
8549 * the copy.
8550 */
8551 assert((vm_size_t) copy->size == copy->size);
8552 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8553 kr = KERN_INVALID_ADDRESS;
8554 }
8555 }
8556 else {
8557 vm_map_t oldmap;
8558
8559 /*
8560 * If the target map is another map, assume the
8561 * target's address space identity for the duration
8562 * of the copy.
8563 */
8564 vm_map_reference(map);
8565 oldmap = vm_map_switch(map);
8566
8567 assert((vm_size_t) copy->size == copy->size);
8568 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8569 vm_map_copyout_kernel_buffer_failures++;
8570 kr = KERN_INVALID_ADDRESS;
8571 }
8572
8573 (void) vm_map_switch(oldmap);
8574 vm_map_deallocate(map);
8575 }
8576
8577 if (kr != KERN_SUCCESS) {
8578 /* the copy failed, clean up */
8579 if (!overwrite) {
8580 /*
8581 * Deallocate the space we allocated in the target map.
8582 */
8583 (void) vm_map_remove(
8584 map,
8585 vm_map_trunc_page(*addr,
8586 VM_MAP_PAGE_MASK(map)),
8587 vm_map_round_page((*addr +
8588 vm_map_round_page(copy->size,
8589 VM_MAP_PAGE_MASK(map))),
8590 VM_MAP_PAGE_MASK(map)),
8591 VM_MAP_NO_FLAGS);
8592 *addr = 0;
8593 }
8594 } else {
8595 /* copy was successful, dicard the copy structure */
8596 if (consume_on_success) {
8597 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8598 }
8599 }
8600
8601 return kr;
8602 }
8603
8604 /*
8605 * Macro: vm_map_copy_insert
8606 *
8607 * Description:
8608 * Link a copy chain ("copy") into a map at the
8609 * specified location (after "where").
8610 * Side effects:
8611 * The copy chain is destroyed.
8612 * Warning:
8613 * The arguments are evaluated multiple times.
8614 */
8615 #define vm_map_copy_insert(map, where, copy) \
8616 MACRO_BEGIN \
8617 vm_map_store_copy_insert(map, where, copy); \
8618 zfree(vm_map_copy_zone, copy); \
8619 MACRO_END
8620
8621 void
8622 vm_map_copy_remap(
8623 vm_map_t map,
8624 vm_map_entry_t where,
8625 vm_map_copy_t copy,
8626 vm_map_offset_t adjustment,
8627 vm_prot_t cur_prot,
8628 vm_prot_t max_prot,
8629 vm_inherit_t inheritance)
8630 {
8631 vm_map_entry_t copy_entry, new_entry;
8632
8633 for (copy_entry = vm_map_copy_first_entry(copy);
8634 copy_entry != vm_map_copy_to_entry(copy);
8635 copy_entry = copy_entry->vme_next) {
8636 /* get a new VM map entry for the map */
8637 new_entry = vm_map_entry_create(map,
8638 !map->hdr.entries_pageable);
8639 /* copy the "copy entry" to the new entry */
8640 vm_map_entry_copy(new_entry, copy_entry);
8641 /* adjust "start" and "end" */
8642 new_entry->vme_start += adjustment;
8643 new_entry->vme_end += adjustment;
8644 /* clear some attributes */
8645 new_entry->inheritance = inheritance;
8646 new_entry->protection = cur_prot;
8647 new_entry->max_protection = max_prot;
8648 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8649 /* take an extra reference on the entry's "object" */
8650 if (new_entry->is_sub_map) {
8651 assert(!new_entry->use_pmap); /* not nested */
8652 vm_map_lock(VME_SUBMAP(new_entry));
8653 vm_map_reference(VME_SUBMAP(new_entry));
8654 vm_map_unlock(VME_SUBMAP(new_entry));
8655 } else {
8656 vm_object_reference(VME_OBJECT(new_entry));
8657 }
8658 /* insert the new entry in the map */
8659 vm_map_store_entry_link(map, where, new_entry);
8660 /* continue inserting the "copy entries" after the new entry */
8661 where = new_entry;
8662 }
8663 }
8664
8665
8666 boolean_t
8667 vm_map_copy_validate_size(
8668 vm_map_t dst_map,
8669 vm_map_copy_t copy,
8670 vm_map_size_t size)
8671 {
8672 if (copy == VM_MAP_COPY_NULL)
8673 return FALSE;
8674 switch (copy->type) {
8675 case VM_MAP_COPY_OBJECT:
8676 case VM_MAP_COPY_KERNEL_BUFFER:
8677 if (size == copy->size)
8678 return TRUE;
8679 break;
8680 case VM_MAP_COPY_ENTRY_LIST:
8681 /*
8682 * potential page-size rounding prevents us from exactly
8683 * validating this flavor of vm_map_copy, but we can at least
8684 * assert that it's within a range.
8685 */
8686 if (copy->size >= size &&
8687 copy->size <= vm_map_round_page(size,
8688 VM_MAP_PAGE_MASK(dst_map)))
8689 return TRUE;
8690 break;
8691 default:
8692 break;
8693 }
8694 return FALSE;
8695 }
8696
8697
8698 /*
8699 * Routine: vm_map_copyout
8700 *
8701 * Description:
8702 * Copy out a copy chain ("copy") into newly-allocated
8703 * space in the destination map.
8704 *
8705 * If successful, consumes the copy object.
8706 * Otherwise, the caller is responsible for it.
8707 */
8708
8709 kern_return_t
8710 vm_map_copyout(
8711 vm_map_t dst_map,
8712 vm_map_address_t *dst_addr, /* OUT */
8713 vm_map_copy_t copy)
8714 {
8715 return vm_map_copyout_internal(dst_map, dst_addr, copy,
8716 TRUE, /* consume_on_success */
8717 VM_PROT_DEFAULT,
8718 VM_PROT_ALL,
8719 VM_INHERIT_DEFAULT);
8720 }
8721
8722 kern_return_t
8723 vm_map_copyout_internal(
8724 vm_map_t dst_map,
8725 vm_map_address_t *dst_addr, /* OUT */
8726 vm_map_copy_t copy,
8727 boolean_t consume_on_success,
8728 vm_prot_t cur_protection,
8729 vm_prot_t max_protection,
8730 vm_inherit_t inheritance)
8731 {
8732 vm_map_size_t size;
8733 vm_map_size_t adjustment;
8734 vm_map_offset_t start;
8735 vm_object_offset_t vm_copy_start;
8736 vm_map_entry_t last;
8737 vm_map_entry_t entry;
8738 vm_map_entry_t hole_entry;
8739
8740 /*
8741 * Check for null copy object.
8742 */
8743
8744 if (copy == VM_MAP_COPY_NULL) {
8745 *dst_addr = 0;
8746 return(KERN_SUCCESS);
8747 }
8748
8749 /*
8750 * Check for special copy object, created
8751 * by vm_map_copyin_object.
8752 */
8753
8754 if (copy->type == VM_MAP_COPY_OBJECT) {
8755 vm_object_t object = copy->cpy_object;
8756 kern_return_t kr;
8757 vm_object_offset_t offset;
8758
8759 offset = vm_object_trunc_page(copy->offset);
8760 size = vm_map_round_page((copy->size +
8761 (vm_map_size_t)(copy->offset -
8762 offset)),
8763 VM_MAP_PAGE_MASK(dst_map));
8764 *dst_addr = 0;
8765 kr = vm_map_enter(dst_map, dst_addr, size,
8766 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8767 object, offset, FALSE,
8768 VM_PROT_DEFAULT, VM_PROT_ALL,
8769 VM_INHERIT_DEFAULT);
8770 if (kr != KERN_SUCCESS)
8771 return(kr);
8772 /* Account for non-pagealigned copy object */
8773 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8774 if (consume_on_success)
8775 zfree(vm_map_copy_zone, copy);
8776 return(KERN_SUCCESS);
8777 }
8778
8779 /*
8780 * Check for special kernel buffer allocated
8781 * by new_ipc_kmsg_copyin.
8782 */
8783
8784 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8785 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8786 copy, FALSE,
8787 consume_on_success);
8788 }
8789
8790
8791 /*
8792 * Find space for the data
8793 */
8794
8795 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8796 VM_MAP_COPY_PAGE_MASK(copy));
8797 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8798 VM_MAP_COPY_PAGE_MASK(copy))
8799 - vm_copy_start;
8800
8801
8802 StartAgain: ;
8803
8804 vm_map_lock(dst_map);
8805 if( dst_map->disable_vmentry_reuse == TRUE) {
8806 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8807 last = entry;
8808 } else {
8809 if (dst_map->holelistenabled) {
8810 hole_entry = (vm_map_entry_t)dst_map->holes_list;
8811
8812 if (hole_entry == NULL) {
8813 /*
8814 * No more space in the map?
8815 */
8816 vm_map_unlock(dst_map);
8817 return(KERN_NO_SPACE);
8818 }
8819
8820 last = hole_entry;
8821 start = last->vme_start;
8822 } else {
8823 assert(first_free_is_valid(dst_map));
8824 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8825 vm_map_min(dst_map) : last->vme_end;
8826 }
8827 start = vm_map_round_page(start,
8828 VM_MAP_PAGE_MASK(dst_map));
8829 }
8830
8831 while (TRUE) {
8832 vm_map_entry_t next = last->vme_next;
8833 vm_map_offset_t end = start + size;
8834
8835 if ((end > dst_map->max_offset) || (end < start)) {
8836 if (dst_map->wait_for_space) {
8837 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8838 assert_wait((event_t) dst_map,
8839 THREAD_INTERRUPTIBLE);
8840 vm_map_unlock(dst_map);
8841 thread_block(THREAD_CONTINUE_NULL);
8842 goto StartAgain;
8843 }
8844 }
8845 vm_map_unlock(dst_map);
8846 return(KERN_NO_SPACE);
8847 }
8848
8849 if (dst_map->holelistenabled) {
8850 if (last->vme_end >= end)
8851 break;
8852 } else {
8853 /*
8854 * If there are no more entries, we must win.
8855 *
8856 * OR
8857 *
8858 * If there is another entry, it must be
8859 * after the end of the potential new region.
8860 */
8861
8862 if (next == vm_map_to_entry(dst_map))
8863 break;
8864
8865 if (next->vme_start >= end)
8866 break;
8867 }
8868
8869 last = next;
8870
8871 if (dst_map->holelistenabled) {
8872 if (last == (vm_map_entry_t) dst_map->holes_list) {
8873 /*
8874 * Wrapped around
8875 */
8876 vm_map_unlock(dst_map);
8877 return(KERN_NO_SPACE);
8878 }
8879 start = last->vme_start;
8880 } else {
8881 start = last->vme_end;
8882 }
8883 start = vm_map_round_page(start,
8884 VM_MAP_PAGE_MASK(dst_map));
8885 }
8886
8887 if (dst_map->holelistenabled) {
8888 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
8889 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
8890 }
8891 }
8892
8893
8894 adjustment = start - vm_copy_start;
8895 if (! consume_on_success) {
8896 /*
8897 * We're not allowed to consume "copy", so we'll have to
8898 * copy its map entries into the destination map below.
8899 * No need to re-allocate map entries from the correct
8900 * (pageable or not) zone, since we'll get new map entries
8901 * during the transfer.
8902 * We'll also adjust the map entries's "start" and "end"
8903 * during the transfer, to keep "copy"'s entries consistent
8904 * with its "offset".
8905 */
8906 goto after_adjustments;
8907 }
8908
8909 /*
8910 * Since we're going to just drop the map
8911 * entries from the copy into the destination
8912 * map, they must come from the same pool.
8913 */
8914
8915 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8916 /*
8917 * Mismatches occur when dealing with the default
8918 * pager.
8919 */
8920 zone_t old_zone;
8921 vm_map_entry_t next, new;
8922
8923 /*
8924 * Find the zone that the copies were allocated from
8925 */
8926
8927 entry = vm_map_copy_first_entry(copy);
8928
8929 /*
8930 * Reinitialize the copy so that vm_map_copy_entry_link
8931 * will work.
8932 */
8933 vm_map_store_copy_reset(copy, entry);
8934 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8935
8936 /*
8937 * Copy each entry.
8938 */
8939 while (entry != vm_map_copy_to_entry(copy)) {
8940 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8941 vm_map_entry_copy_full(new, entry);
8942 assert(!new->iokit_acct);
8943 if (new->is_sub_map) {
8944 /* clr address space specifics */
8945 new->use_pmap = FALSE;
8946 }
8947 vm_map_copy_entry_link(copy,
8948 vm_map_copy_last_entry(copy),
8949 new);
8950 next = entry->vme_next;
8951 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8952 zfree(old_zone, entry);
8953 entry = next;
8954 }
8955 }
8956
8957 /*
8958 * Adjust the addresses in the copy chain, and
8959 * reset the region attributes.
8960 */
8961
8962 for (entry = vm_map_copy_first_entry(copy);
8963 entry != vm_map_copy_to_entry(copy);
8964 entry = entry->vme_next) {
8965 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8966 /*
8967 * We're injecting this copy entry into a map that
8968 * has the standard page alignment, so clear
8969 * "map_aligned" (which might have been inherited
8970 * from the original map entry).
8971 */
8972 entry->map_aligned = FALSE;
8973 }
8974
8975 entry->vme_start += adjustment;
8976 entry->vme_end += adjustment;
8977
8978 if (entry->map_aligned) {
8979 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8980 VM_MAP_PAGE_MASK(dst_map)));
8981 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8982 VM_MAP_PAGE_MASK(dst_map)));
8983 }
8984
8985 entry->inheritance = VM_INHERIT_DEFAULT;
8986 entry->protection = VM_PROT_DEFAULT;
8987 entry->max_protection = VM_PROT_ALL;
8988 entry->behavior = VM_BEHAVIOR_DEFAULT;
8989
8990 /*
8991 * If the entry is now wired,
8992 * map the pages into the destination map.
8993 */
8994 if (entry->wired_count != 0) {
8995 register vm_map_offset_t va;
8996 vm_object_offset_t offset;
8997 register vm_object_t object;
8998 vm_prot_t prot;
8999 int type_of_fault;
9000
9001 object = VME_OBJECT(entry);
9002 offset = VME_OFFSET(entry);
9003 va = entry->vme_start;
9004
9005 pmap_pageable(dst_map->pmap,
9006 entry->vme_start,
9007 entry->vme_end,
9008 TRUE);
9009
9010 while (va < entry->vme_end) {
9011 register vm_page_t m;
9012
9013 /*
9014 * Look up the page in the object.
9015 * Assert that the page will be found in the
9016 * top object:
9017 * either
9018 * the object was newly created by
9019 * vm_object_copy_slowly, and has
9020 * copies of all of the pages from
9021 * the source object
9022 * or
9023 * the object was moved from the old
9024 * map entry; because the old map
9025 * entry was wired, all of the pages
9026 * were in the top-level object.
9027 * (XXX not true if we wire pages for
9028 * reading)
9029 */
9030 vm_object_lock(object);
9031
9032 m = vm_page_lookup(object, offset);
9033 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9034 m->absent)
9035 panic("vm_map_copyout: wiring %p", m);
9036
9037 /*
9038 * ENCRYPTED SWAP:
9039 * The page is assumed to be wired here, so it
9040 * shouldn't be encrypted. Otherwise, we
9041 * couldn't enter it in the page table, since
9042 * we don't want the user to see the encrypted
9043 * data.
9044 */
9045 ASSERT_PAGE_DECRYPTED(m);
9046
9047 prot = entry->protection;
9048
9049 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9050 prot)
9051 prot |= VM_PROT_EXECUTE;
9052
9053 type_of_fault = DBG_CACHE_HIT_FAULT;
9054
9055 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9056 VM_PAGE_WIRED(m), FALSE, FALSE,
9057 FALSE, VME_ALIAS(entry),
9058 ((entry->iokit_acct ||
9059 (!entry->is_sub_map &&
9060 !entry->use_pmap))
9061 ? PMAP_OPTIONS_ALT_ACCT
9062 : 0),
9063 NULL, &type_of_fault);
9064
9065 vm_object_unlock(object);
9066
9067 offset += PAGE_SIZE_64;
9068 va += PAGE_SIZE;
9069 }
9070 }
9071 }
9072
9073 after_adjustments:
9074
9075 /*
9076 * Correct the page alignment for the result
9077 */
9078
9079 *dst_addr = start + (copy->offset - vm_copy_start);
9080
9081 /*
9082 * Update the hints and the map size
9083 */
9084
9085 if (consume_on_success) {
9086 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9087 } else {
9088 SAVE_HINT_MAP_WRITE(dst_map, last);
9089 }
9090
9091 dst_map->size += size;
9092
9093 /*
9094 * Link in the copy
9095 */
9096
9097 if (consume_on_success) {
9098 vm_map_copy_insert(dst_map, last, copy);
9099 } else {
9100 vm_map_copy_remap(dst_map, last, copy, adjustment,
9101 cur_protection, max_protection,
9102 inheritance);
9103 }
9104
9105 vm_map_unlock(dst_map);
9106
9107 /*
9108 * XXX If wiring_required, call vm_map_pageable
9109 */
9110
9111 return(KERN_SUCCESS);
9112 }
9113
9114 /*
9115 * Routine: vm_map_copyin
9116 *
9117 * Description:
9118 * see vm_map_copyin_common. Exported via Unsupported.exports.
9119 *
9120 */
9121
9122 #undef vm_map_copyin
9123
9124 kern_return_t
9125 vm_map_copyin(
9126 vm_map_t src_map,
9127 vm_map_address_t src_addr,
9128 vm_map_size_t len,
9129 boolean_t src_destroy,
9130 vm_map_copy_t *copy_result) /* OUT */
9131 {
9132 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9133 FALSE, copy_result, FALSE));
9134 }
9135
9136 /*
9137 * Routine: vm_map_copyin_common
9138 *
9139 * Description:
9140 * Copy the specified region (src_addr, len) from the
9141 * source address space (src_map), possibly removing
9142 * the region from the source address space (src_destroy).
9143 *
9144 * Returns:
9145 * A vm_map_copy_t object (copy_result), suitable for
9146 * insertion into another address space (using vm_map_copyout),
9147 * copying over another address space region (using
9148 * vm_map_copy_overwrite). If the copy is unused, it
9149 * should be destroyed (using vm_map_copy_discard).
9150 *
9151 * In/out conditions:
9152 * The source map should not be locked on entry.
9153 */
9154
9155 typedef struct submap_map {
9156 vm_map_t parent_map;
9157 vm_map_offset_t base_start;
9158 vm_map_offset_t base_end;
9159 vm_map_size_t base_len;
9160 struct submap_map *next;
9161 } submap_map_t;
9162
9163 kern_return_t
9164 vm_map_copyin_common(
9165 vm_map_t src_map,
9166 vm_map_address_t src_addr,
9167 vm_map_size_t len,
9168 boolean_t src_destroy,
9169 __unused boolean_t src_volatile,
9170 vm_map_copy_t *copy_result, /* OUT */
9171 boolean_t use_maxprot)
9172 {
9173 int flags;
9174
9175 flags = 0;
9176 if (src_destroy) {
9177 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9178 }
9179 if (use_maxprot) {
9180 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9181 }
9182 return vm_map_copyin_internal(src_map,
9183 src_addr,
9184 len,
9185 flags,
9186 copy_result);
9187 }
9188 kern_return_t
9189 vm_map_copyin_internal(
9190 vm_map_t src_map,
9191 vm_map_address_t src_addr,
9192 vm_map_size_t len,
9193 int flags,
9194 vm_map_copy_t *copy_result) /* OUT */
9195 {
9196 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9197 * in multi-level lookup, this
9198 * entry contains the actual
9199 * vm_object/offset.
9200 */
9201 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9202
9203 vm_map_offset_t src_start; /* Start of current entry --
9204 * where copy is taking place now
9205 */
9206 vm_map_offset_t src_end; /* End of entire region to be
9207 * copied */
9208 vm_map_offset_t src_base;
9209 vm_map_t base_map = src_map;
9210 boolean_t map_share=FALSE;
9211 submap_map_t *parent_maps = NULL;
9212
9213 vm_map_copy_t copy; /* Resulting copy */
9214 vm_map_address_t copy_addr;
9215 vm_map_size_t copy_size;
9216 boolean_t src_destroy;
9217 boolean_t use_maxprot;
9218
9219 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9220 return KERN_INVALID_ARGUMENT;
9221 }
9222
9223 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9224 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9225
9226 /*
9227 * Check for copies of zero bytes.
9228 */
9229
9230 if (len == 0) {
9231 *copy_result = VM_MAP_COPY_NULL;
9232 return(KERN_SUCCESS);
9233 }
9234
9235 /*
9236 * Check that the end address doesn't overflow
9237 */
9238 src_end = src_addr + len;
9239 if (src_end < src_addr)
9240 return KERN_INVALID_ADDRESS;
9241
9242 /*
9243 * If the copy is sufficiently small, use a kernel buffer instead
9244 * of making a virtual copy. The theory being that the cost of
9245 * setting up VM (and taking C-O-W faults) dominates the copy costs
9246 * for small regions.
9247 */
9248 if ((len < msg_ool_size_small) &&
9249 !use_maxprot &&
9250 !(flags & VM_MAP_COPYIN_ENTRY_LIST))
9251 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9252 src_destroy, copy_result);
9253
9254 /*
9255 * Compute (page aligned) start and end of region
9256 */
9257 src_start = vm_map_trunc_page(src_addr,
9258 VM_MAP_PAGE_MASK(src_map));
9259 src_end = vm_map_round_page(src_end,
9260 VM_MAP_PAGE_MASK(src_map));
9261
9262 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9263
9264 /*
9265 * Allocate a header element for the list.
9266 *
9267 * Use the start and end in the header to
9268 * remember the endpoints prior to rounding.
9269 */
9270
9271 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9272 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9273 vm_map_copy_first_entry(copy) =
9274 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9275 copy->type = VM_MAP_COPY_ENTRY_LIST;
9276 copy->cpy_hdr.nentries = 0;
9277 copy->cpy_hdr.entries_pageable = TRUE;
9278 #if 00
9279 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9280 #else
9281 /*
9282 * The copy entries can be broken down for a variety of reasons,
9283 * so we can't guarantee that they will remain map-aligned...
9284 * Will need to adjust the first copy_entry's "vme_start" and
9285 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9286 * rather than the original map's alignment.
9287 */
9288 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9289 #endif
9290
9291 vm_map_store_init( &(copy->cpy_hdr) );
9292
9293 copy->offset = src_addr;
9294 copy->size = len;
9295
9296 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9297
9298 #define RETURN(x) \
9299 MACRO_BEGIN \
9300 vm_map_unlock(src_map); \
9301 if(src_map != base_map) \
9302 vm_map_deallocate(src_map); \
9303 if (new_entry != VM_MAP_ENTRY_NULL) \
9304 vm_map_copy_entry_dispose(copy,new_entry); \
9305 vm_map_copy_discard(copy); \
9306 { \
9307 submap_map_t *_ptr; \
9308 \
9309 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9310 parent_maps=parent_maps->next; \
9311 if (_ptr->parent_map != base_map) \
9312 vm_map_deallocate(_ptr->parent_map); \
9313 kfree(_ptr, sizeof(submap_map_t)); \
9314 } \
9315 } \
9316 MACRO_RETURN(x); \
9317 MACRO_END
9318
9319 /*
9320 * Find the beginning of the region.
9321 */
9322
9323 vm_map_lock(src_map);
9324
9325 /*
9326 * Lookup the original "src_addr" rather than the truncated
9327 * "src_start", in case "src_start" falls in a non-map-aligned
9328 * map entry *before* the map entry that contains "src_addr"...
9329 */
9330 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9331 RETURN(KERN_INVALID_ADDRESS);
9332 if(!tmp_entry->is_sub_map) {
9333 /*
9334 * ... but clip to the map-rounded "src_start" rather than
9335 * "src_addr" to preserve map-alignment. We'll adjust the
9336 * first copy entry at the end, if needed.
9337 */
9338 vm_map_clip_start(src_map, tmp_entry, src_start);
9339 }
9340 if (src_start < tmp_entry->vme_start) {
9341 /*
9342 * Move "src_start" up to the start of the
9343 * first map entry to copy.
9344 */
9345 src_start = tmp_entry->vme_start;
9346 }
9347 /* set for later submap fix-up */
9348 copy_addr = src_start;
9349
9350 /*
9351 * Go through entries until we get to the end.
9352 */
9353
9354 while (TRUE) {
9355 register
9356 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
9357 vm_map_size_t src_size; /* Size of source
9358 * map entry (in both
9359 * maps)
9360 */
9361
9362 register
9363 vm_object_t src_object; /* Object to copy */
9364 vm_object_offset_t src_offset;
9365
9366 boolean_t src_needs_copy; /* Should source map
9367 * be made read-only
9368 * for copy-on-write?
9369 */
9370
9371 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9372
9373 boolean_t was_wired; /* Was source wired? */
9374 vm_map_version_t version; /* Version before locks
9375 * dropped to make copy
9376 */
9377 kern_return_t result; /* Return value from
9378 * copy_strategically.
9379 */
9380 while(tmp_entry->is_sub_map) {
9381 vm_map_size_t submap_len;
9382 submap_map_t *ptr;
9383
9384 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9385 ptr->next = parent_maps;
9386 parent_maps = ptr;
9387 ptr->parent_map = src_map;
9388 ptr->base_start = src_start;
9389 ptr->base_end = src_end;
9390 submap_len = tmp_entry->vme_end - src_start;
9391 if(submap_len > (src_end-src_start))
9392 submap_len = src_end-src_start;
9393 ptr->base_len = submap_len;
9394
9395 src_start -= tmp_entry->vme_start;
9396 src_start += VME_OFFSET(tmp_entry);
9397 src_end = src_start + submap_len;
9398 src_map = VME_SUBMAP(tmp_entry);
9399 vm_map_lock(src_map);
9400 /* keep an outstanding reference for all maps in */
9401 /* the parents tree except the base map */
9402 vm_map_reference(src_map);
9403 vm_map_unlock(ptr->parent_map);
9404 if (!vm_map_lookup_entry(
9405 src_map, src_start, &tmp_entry))
9406 RETURN(KERN_INVALID_ADDRESS);
9407 map_share = TRUE;
9408 if(!tmp_entry->is_sub_map)
9409 vm_map_clip_start(src_map, tmp_entry, src_start);
9410 src_entry = tmp_entry;
9411 }
9412 /* we are now in the lowest level submap... */
9413
9414 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9415 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9416 /* This is not, supported for now.In future */
9417 /* we will need to detect the phys_contig */
9418 /* condition and then upgrade copy_slowly */
9419 /* to do physical copy from the device mem */
9420 /* based object. We can piggy-back off of */
9421 /* the was wired boolean to set-up the */
9422 /* proper handling */
9423 RETURN(KERN_PROTECTION_FAILURE);
9424 }
9425 /*
9426 * Create a new address map entry to hold the result.
9427 * Fill in the fields from the appropriate source entries.
9428 * We must unlock the source map to do this if we need
9429 * to allocate a map entry.
9430 */
9431 if (new_entry == VM_MAP_ENTRY_NULL) {
9432 version.main_timestamp = src_map->timestamp;
9433 vm_map_unlock(src_map);
9434
9435 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9436
9437 vm_map_lock(src_map);
9438 if ((version.main_timestamp + 1) != src_map->timestamp) {
9439 if (!vm_map_lookup_entry(src_map, src_start,
9440 &tmp_entry)) {
9441 RETURN(KERN_INVALID_ADDRESS);
9442 }
9443 if (!tmp_entry->is_sub_map)
9444 vm_map_clip_start(src_map, tmp_entry, src_start);
9445 continue; /* restart w/ new tmp_entry */
9446 }
9447 }
9448
9449 /*
9450 * Verify that the region can be read.
9451 */
9452 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9453 !use_maxprot) ||
9454 (src_entry->max_protection & VM_PROT_READ) == 0)
9455 RETURN(KERN_PROTECTION_FAILURE);
9456
9457 /*
9458 * Clip against the endpoints of the entire region.
9459 */
9460
9461 vm_map_clip_end(src_map, src_entry, src_end);
9462
9463 src_size = src_entry->vme_end - src_start;
9464 src_object = VME_OBJECT(src_entry);
9465 src_offset = VME_OFFSET(src_entry);
9466 was_wired = (src_entry->wired_count != 0);
9467
9468 vm_map_entry_copy(new_entry, src_entry);
9469 if (new_entry->is_sub_map) {
9470 /* clr address space specifics */
9471 new_entry->use_pmap = FALSE;
9472 }
9473
9474 /*
9475 * Attempt non-blocking copy-on-write optimizations.
9476 */
9477
9478 if (src_destroy &&
9479 (src_object == VM_OBJECT_NULL ||
9480 (src_object->internal && !src_object->true_share
9481 && !map_share))) {
9482 /*
9483 * If we are destroying the source, and the object
9484 * is internal, we can move the object reference
9485 * from the source to the copy. The copy is
9486 * copy-on-write only if the source is.
9487 * We make another reference to the object, because
9488 * destroying the source entry will deallocate it.
9489 */
9490 vm_object_reference(src_object);
9491
9492 /*
9493 * Copy is always unwired. vm_map_copy_entry
9494 * set its wired count to zero.
9495 */
9496
9497 goto CopySuccessful;
9498 }
9499
9500
9501 RestartCopy:
9502 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9503 src_object, new_entry, VME_OBJECT(new_entry),
9504 was_wired, 0);
9505 if ((src_object == VM_OBJECT_NULL ||
9506 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9507 vm_object_copy_quickly(
9508 &VME_OBJECT(new_entry),
9509 src_offset,
9510 src_size,
9511 &src_needs_copy,
9512 &new_entry_needs_copy)) {
9513
9514 new_entry->needs_copy = new_entry_needs_copy;
9515
9516 /*
9517 * Handle copy-on-write obligations
9518 */
9519
9520 if (src_needs_copy && !tmp_entry->needs_copy) {
9521 vm_prot_t prot;
9522
9523 prot = src_entry->protection & ~VM_PROT_WRITE;
9524
9525 if (override_nx(src_map, VME_ALIAS(src_entry))
9526 && prot)
9527 prot |= VM_PROT_EXECUTE;
9528
9529 vm_object_pmap_protect(
9530 src_object,
9531 src_offset,
9532 src_size,
9533 (src_entry->is_shared ?
9534 PMAP_NULL
9535 : src_map->pmap),
9536 src_entry->vme_start,
9537 prot);
9538
9539 assert(tmp_entry->wired_count == 0);
9540 tmp_entry->needs_copy = TRUE;
9541 }
9542
9543 /*
9544 * The map has never been unlocked, so it's safe
9545 * to move to the next entry rather than doing
9546 * another lookup.
9547 */
9548
9549 goto CopySuccessful;
9550 }
9551
9552 /*
9553 * Take an object reference, so that we may
9554 * release the map lock(s).
9555 */
9556
9557 assert(src_object != VM_OBJECT_NULL);
9558 vm_object_reference(src_object);
9559
9560 /*
9561 * Record the timestamp for later verification.
9562 * Unlock the map.
9563 */
9564
9565 version.main_timestamp = src_map->timestamp;
9566 vm_map_unlock(src_map); /* Increments timestamp once! */
9567
9568 /*
9569 * Perform the copy
9570 */
9571
9572 if (was_wired) {
9573 CopySlowly:
9574 vm_object_lock(src_object);
9575 result = vm_object_copy_slowly(
9576 src_object,
9577 src_offset,
9578 src_size,
9579 THREAD_UNINT,
9580 &VME_OBJECT(new_entry));
9581 VME_OFFSET_SET(new_entry, 0);
9582 new_entry->needs_copy = FALSE;
9583
9584 }
9585 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9586 (tmp_entry->is_shared || map_share)) {
9587 vm_object_t new_object;
9588
9589 vm_object_lock_shared(src_object);
9590 new_object = vm_object_copy_delayed(
9591 src_object,
9592 src_offset,
9593 src_size,
9594 TRUE);
9595 if (new_object == VM_OBJECT_NULL)
9596 goto CopySlowly;
9597
9598 VME_OBJECT_SET(new_entry, new_object);
9599 assert(new_entry->wired_count == 0);
9600 new_entry->needs_copy = TRUE;
9601 assert(!new_entry->iokit_acct);
9602 assert(new_object->purgable == VM_PURGABLE_DENY);
9603 new_entry->use_pmap = TRUE;
9604 result = KERN_SUCCESS;
9605
9606 } else {
9607 vm_object_offset_t new_offset;
9608 new_offset = VME_OFFSET(new_entry);
9609 result = vm_object_copy_strategically(src_object,
9610 src_offset,
9611 src_size,
9612 &VME_OBJECT(new_entry),
9613 &new_offset,
9614 &new_entry_needs_copy);
9615 if (new_offset != VME_OFFSET(new_entry)) {
9616 VME_OFFSET_SET(new_entry, new_offset);
9617 }
9618
9619 new_entry->needs_copy = new_entry_needs_copy;
9620 }
9621
9622 if (result != KERN_SUCCESS &&
9623 result != KERN_MEMORY_RESTART_COPY) {
9624 vm_map_lock(src_map);
9625 RETURN(result);
9626 }
9627
9628 /*
9629 * Throw away the extra reference
9630 */
9631
9632 vm_object_deallocate(src_object);
9633
9634 /*
9635 * Verify that the map has not substantially
9636 * changed while the copy was being made.
9637 */
9638
9639 vm_map_lock(src_map);
9640
9641 if ((version.main_timestamp + 1) == src_map->timestamp)
9642 goto VerificationSuccessful;
9643
9644 /*
9645 * Simple version comparison failed.
9646 *
9647 * Retry the lookup and verify that the
9648 * same object/offset are still present.
9649 *
9650 * [Note: a memory manager that colludes with
9651 * the calling task can detect that we have
9652 * cheated. While the map was unlocked, the
9653 * mapping could have been changed and restored.]
9654 */
9655
9656 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9657 if (result != KERN_MEMORY_RESTART_COPY) {
9658 vm_object_deallocate(VME_OBJECT(new_entry));
9659 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9660 assert(!new_entry->iokit_acct);
9661 new_entry->use_pmap = TRUE;
9662 }
9663 RETURN(KERN_INVALID_ADDRESS);
9664 }
9665
9666 src_entry = tmp_entry;
9667 vm_map_clip_start(src_map, src_entry, src_start);
9668
9669 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9670 !use_maxprot) ||
9671 ((src_entry->max_protection & VM_PROT_READ) == 0))
9672 goto VerificationFailed;
9673
9674 if (src_entry->vme_end < new_entry->vme_end) {
9675 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9676 VM_MAP_COPY_PAGE_MASK(copy)));
9677 new_entry->vme_end = src_entry->vme_end;
9678 src_size = new_entry->vme_end - src_start;
9679 }
9680
9681 if ((VME_OBJECT(src_entry) != src_object) ||
9682 (VME_OFFSET(src_entry) != src_offset) ) {
9683
9684 /*
9685 * Verification failed.
9686 *
9687 * Start over with this top-level entry.
9688 */
9689
9690 VerificationFailed: ;
9691
9692 vm_object_deallocate(VME_OBJECT(new_entry));
9693 tmp_entry = src_entry;
9694 continue;
9695 }
9696
9697 /*
9698 * Verification succeeded.
9699 */
9700
9701 VerificationSuccessful: ;
9702
9703 if (result == KERN_MEMORY_RESTART_COPY)
9704 goto RestartCopy;
9705
9706 /*
9707 * Copy succeeded.
9708 */
9709
9710 CopySuccessful: ;
9711
9712 /*
9713 * Link in the new copy entry.
9714 */
9715
9716 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9717 new_entry);
9718
9719 /*
9720 * Determine whether the entire region
9721 * has been copied.
9722 */
9723 src_base = src_start;
9724 src_start = new_entry->vme_end;
9725 new_entry = VM_MAP_ENTRY_NULL;
9726 while ((src_start >= src_end) && (src_end != 0)) {
9727 submap_map_t *ptr;
9728
9729 if (src_map == base_map) {
9730 /* back to the top */
9731 break;
9732 }
9733
9734 ptr = parent_maps;
9735 assert(ptr != NULL);
9736 parent_maps = parent_maps->next;
9737
9738 /* fix up the damage we did in that submap */
9739 vm_map_simplify_range(src_map,
9740 src_base,
9741 src_end);
9742
9743 vm_map_unlock(src_map);
9744 vm_map_deallocate(src_map);
9745 vm_map_lock(ptr->parent_map);
9746 src_map = ptr->parent_map;
9747 src_base = ptr->base_start;
9748 src_start = ptr->base_start + ptr->base_len;
9749 src_end = ptr->base_end;
9750 if (!vm_map_lookup_entry(src_map,
9751 src_start,
9752 &tmp_entry) &&
9753 (src_end > src_start)) {
9754 RETURN(KERN_INVALID_ADDRESS);
9755 }
9756 kfree(ptr, sizeof(submap_map_t));
9757 if (parent_maps == NULL)
9758 map_share = FALSE;
9759 src_entry = tmp_entry->vme_prev;
9760 }
9761
9762 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9763 (src_start >= src_addr + len) &&
9764 (src_addr + len != 0)) {
9765 /*
9766 * Stop copying now, even though we haven't reached
9767 * "src_end". We'll adjust the end of the last copy
9768 * entry at the end, if needed.
9769 *
9770 * If src_map's aligment is different from the
9771 * system's page-alignment, there could be
9772 * extra non-map-aligned map entries between
9773 * the original (non-rounded) "src_addr + len"
9774 * and the rounded "src_end".
9775 * We do not want to copy those map entries since
9776 * they're not part of the copied range.
9777 */
9778 break;
9779 }
9780
9781 if ((src_start >= src_end) && (src_end != 0))
9782 break;
9783
9784 /*
9785 * Verify that there are no gaps in the region
9786 */
9787
9788 tmp_entry = src_entry->vme_next;
9789 if ((tmp_entry->vme_start != src_start) ||
9790 (tmp_entry == vm_map_to_entry(src_map))) {
9791 RETURN(KERN_INVALID_ADDRESS);
9792 }
9793 }
9794
9795 /*
9796 * If the source should be destroyed, do it now, since the
9797 * copy was successful.
9798 */
9799 if (src_destroy) {
9800 (void) vm_map_delete(
9801 src_map,
9802 vm_map_trunc_page(src_addr,
9803 VM_MAP_PAGE_MASK(src_map)),
9804 src_end,
9805 ((src_map == kernel_map) ?
9806 VM_MAP_REMOVE_KUNWIRE :
9807 VM_MAP_NO_FLAGS),
9808 VM_MAP_NULL);
9809 } else {
9810 /* fix up the damage we did in the base map */
9811 vm_map_simplify_range(
9812 src_map,
9813 vm_map_trunc_page(src_addr,
9814 VM_MAP_PAGE_MASK(src_map)),
9815 vm_map_round_page(src_end,
9816 VM_MAP_PAGE_MASK(src_map)));
9817 }
9818
9819 vm_map_unlock(src_map);
9820
9821 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9822 vm_map_offset_t original_start, original_offset, original_end;
9823
9824 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9825
9826 /* adjust alignment of first copy_entry's "vme_start" */
9827 tmp_entry = vm_map_copy_first_entry(copy);
9828 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9829 vm_map_offset_t adjustment;
9830
9831 original_start = tmp_entry->vme_start;
9832 original_offset = VME_OFFSET(tmp_entry);
9833
9834 /* map-align the start of the first copy entry... */
9835 adjustment = (tmp_entry->vme_start -
9836 vm_map_trunc_page(
9837 tmp_entry->vme_start,
9838 VM_MAP_PAGE_MASK(src_map)));
9839 tmp_entry->vme_start -= adjustment;
9840 VME_OFFSET_SET(tmp_entry,
9841 VME_OFFSET(tmp_entry) - adjustment);
9842 copy_addr -= adjustment;
9843 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9844 /* ... adjust for mis-aligned start of copy range */
9845 adjustment =
9846 (vm_map_trunc_page(copy->offset,
9847 PAGE_MASK) -
9848 vm_map_trunc_page(copy->offset,
9849 VM_MAP_PAGE_MASK(src_map)));
9850 if (adjustment) {
9851 assert(page_aligned(adjustment));
9852 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9853 tmp_entry->vme_start += adjustment;
9854 VME_OFFSET_SET(tmp_entry,
9855 (VME_OFFSET(tmp_entry) +
9856 adjustment));
9857 copy_addr += adjustment;
9858 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9859 }
9860
9861 /*
9862 * Assert that the adjustments haven't exposed
9863 * more than was originally copied...
9864 */
9865 assert(tmp_entry->vme_start >= original_start);
9866 assert(VME_OFFSET(tmp_entry) >= original_offset);
9867 /*
9868 * ... and that it did not adjust outside of a
9869 * a single 16K page.
9870 */
9871 assert(vm_map_trunc_page(tmp_entry->vme_start,
9872 VM_MAP_PAGE_MASK(src_map)) ==
9873 vm_map_trunc_page(original_start,
9874 VM_MAP_PAGE_MASK(src_map)));
9875 }
9876
9877 /* adjust alignment of last copy_entry's "vme_end" */
9878 tmp_entry = vm_map_copy_last_entry(copy);
9879 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9880 vm_map_offset_t adjustment;
9881
9882 original_end = tmp_entry->vme_end;
9883
9884 /* map-align the end of the last copy entry... */
9885 tmp_entry->vme_end =
9886 vm_map_round_page(tmp_entry->vme_end,
9887 VM_MAP_PAGE_MASK(src_map));
9888 /* ... adjust for mis-aligned end of copy range */
9889 adjustment =
9890 (vm_map_round_page((copy->offset +
9891 copy->size),
9892 VM_MAP_PAGE_MASK(src_map)) -
9893 vm_map_round_page((copy->offset +
9894 copy->size),
9895 PAGE_MASK));
9896 if (adjustment) {
9897 assert(page_aligned(adjustment));
9898 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9899 tmp_entry->vme_end -= adjustment;
9900 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9901 }
9902
9903 /*
9904 * Assert that the adjustments haven't exposed
9905 * more than was originally copied...
9906 */
9907 assert(tmp_entry->vme_end <= original_end);
9908 /*
9909 * ... and that it did not adjust outside of a
9910 * a single 16K page.
9911 */
9912 assert(vm_map_round_page(tmp_entry->vme_end,
9913 VM_MAP_PAGE_MASK(src_map)) ==
9914 vm_map_round_page(original_end,
9915 VM_MAP_PAGE_MASK(src_map)));
9916 }
9917 }
9918
9919 /* Fix-up start and end points in copy. This is necessary */
9920 /* when the various entries in the copy object were picked */
9921 /* up from different sub-maps */
9922
9923 tmp_entry = vm_map_copy_first_entry(copy);
9924 copy_size = 0; /* compute actual size */
9925 while (tmp_entry != vm_map_copy_to_entry(copy)) {
9926 assert(VM_MAP_PAGE_ALIGNED(
9927 copy_addr + (tmp_entry->vme_end -
9928 tmp_entry->vme_start),
9929 VM_MAP_COPY_PAGE_MASK(copy)));
9930 assert(VM_MAP_PAGE_ALIGNED(
9931 copy_addr,
9932 VM_MAP_COPY_PAGE_MASK(copy)));
9933
9934 /*
9935 * The copy_entries will be injected directly into the
9936 * destination map and might not be "map aligned" there...
9937 */
9938 tmp_entry->map_aligned = FALSE;
9939
9940 tmp_entry->vme_end = copy_addr +
9941 (tmp_entry->vme_end - tmp_entry->vme_start);
9942 tmp_entry->vme_start = copy_addr;
9943 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9944 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9945 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9946 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9947 }
9948
9949 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9950 copy_size < copy->size) {
9951 /*
9952 * The actual size of the VM map copy is smaller than what
9953 * was requested by the caller. This must be because some
9954 * PAGE_SIZE-sized pages are missing at the end of the last
9955 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9956 * The caller might not have been aware of those missing
9957 * pages and might not want to be aware of it, which is
9958 * fine as long as they don't try to access (and crash on)
9959 * those missing pages.
9960 * Let's adjust the size of the "copy", to avoid failing
9961 * in vm_map_copyout() or vm_map_copy_overwrite().
9962 */
9963 assert(vm_map_round_page(copy_size,
9964 VM_MAP_PAGE_MASK(src_map)) ==
9965 vm_map_round_page(copy->size,
9966 VM_MAP_PAGE_MASK(src_map)));
9967 copy->size = copy_size;
9968 }
9969
9970 *copy_result = copy;
9971 return(KERN_SUCCESS);
9972
9973 #undef RETURN
9974 }
9975
9976 kern_return_t
9977 vm_map_copy_extract(
9978 vm_map_t src_map,
9979 vm_map_address_t src_addr,
9980 vm_map_size_t len,
9981 vm_map_copy_t *copy_result, /* OUT */
9982 vm_prot_t *cur_prot, /* OUT */
9983 vm_prot_t *max_prot)
9984 {
9985 vm_map_offset_t src_start, src_end;
9986 vm_map_copy_t copy;
9987 kern_return_t kr;
9988
9989 /*
9990 * Check for copies of zero bytes.
9991 */
9992
9993 if (len == 0) {
9994 *copy_result = VM_MAP_COPY_NULL;
9995 return(KERN_SUCCESS);
9996 }
9997
9998 /*
9999 * Check that the end address doesn't overflow
10000 */
10001 src_end = src_addr + len;
10002 if (src_end < src_addr)
10003 return KERN_INVALID_ADDRESS;
10004
10005 /*
10006 * Compute (page aligned) start and end of region
10007 */
10008 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10009 src_end = vm_map_round_page(src_end, PAGE_MASK);
10010
10011 /*
10012 * Allocate a header element for the list.
10013 *
10014 * Use the start and end in the header to
10015 * remember the endpoints prior to rounding.
10016 */
10017
10018 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10019 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10020 vm_map_copy_first_entry(copy) =
10021 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10022 copy->type = VM_MAP_COPY_ENTRY_LIST;
10023 copy->cpy_hdr.nentries = 0;
10024 copy->cpy_hdr.entries_pageable = TRUE;
10025
10026 vm_map_store_init(&copy->cpy_hdr);
10027
10028 copy->offset = 0;
10029 copy->size = len;
10030
10031 kr = vm_map_remap_extract(src_map,
10032 src_addr,
10033 len,
10034 FALSE, /* copy */
10035 &copy->cpy_hdr,
10036 cur_prot,
10037 max_prot,
10038 VM_INHERIT_SHARE,
10039 TRUE); /* pageable */
10040 if (kr != KERN_SUCCESS) {
10041 vm_map_copy_discard(copy);
10042 return kr;
10043 }
10044
10045 *copy_result = copy;
10046 return KERN_SUCCESS;
10047 }
10048
10049 /*
10050 * vm_map_copyin_object:
10051 *
10052 * Create a copy object from an object.
10053 * Our caller donates an object reference.
10054 */
10055
10056 kern_return_t
10057 vm_map_copyin_object(
10058 vm_object_t object,
10059 vm_object_offset_t offset, /* offset of region in object */
10060 vm_object_size_t size, /* size of region in object */
10061 vm_map_copy_t *copy_result) /* OUT */
10062 {
10063 vm_map_copy_t copy; /* Resulting copy */
10064
10065 /*
10066 * We drop the object into a special copy object
10067 * that contains the object directly.
10068 */
10069
10070 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10071 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10072 copy->type = VM_MAP_COPY_OBJECT;
10073 copy->cpy_object = object;
10074 copy->offset = offset;
10075 copy->size = size;
10076
10077 *copy_result = copy;
10078 return(KERN_SUCCESS);
10079 }
10080
10081 static void
10082 vm_map_fork_share(
10083 vm_map_t old_map,
10084 vm_map_entry_t old_entry,
10085 vm_map_t new_map)
10086 {
10087 vm_object_t object;
10088 vm_map_entry_t new_entry;
10089
10090 /*
10091 * New sharing code. New map entry
10092 * references original object. Internal
10093 * objects use asynchronous copy algorithm for
10094 * future copies. First make sure we have
10095 * the right object. If we need a shadow,
10096 * or someone else already has one, then
10097 * make a new shadow and share it.
10098 */
10099
10100 object = VME_OBJECT(old_entry);
10101 if (old_entry->is_sub_map) {
10102 assert(old_entry->wired_count == 0);
10103 #ifndef NO_NESTED_PMAP
10104 if(old_entry->use_pmap) {
10105 kern_return_t result;
10106
10107 result = pmap_nest(new_map->pmap,
10108 (VME_SUBMAP(old_entry))->pmap,
10109 (addr64_t)old_entry->vme_start,
10110 (addr64_t)old_entry->vme_start,
10111 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10112 if(result)
10113 panic("vm_map_fork_share: pmap_nest failed!");
10114 }
10115 #endif /* NO_NESTED_PMAP */
10116 } else if (object == VM_OBJECT_NULL) {
10117 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10118 old_entry->vme_start));
10119 VME_OFFSET_SET(old_entry, 0);
10120 VME_OBJECT_SET(old_entry, object);
10121 old_entry->use_pmap = TRUE;
10122 assert(!old_entry->needs_copy);
10123 } else if (object->copy_strategy !=
10124 MEMORY_OBJECT_COPY_SYMMETRIC) {
10125
10126 /*
10127 * We are already using an asymmetric
10128 * copy, and therefore we already have
10129 * the right object.
10130 */
10131
10132 assert(! old_entry->needs_copy);
10133 }
10134 else if (old_entry->needs_copy || /* case 1 */
10135 object->shadowed || /* case 2 */
10136 (!object->true_share && /* case 3 */
10137 !old_entry->is_shared &&
10138 (object->vo_size >
10139 (vm_map_size_t)(old_entry->vme_end -
10140 old_entry->vme_start)))) {
10141
10142 /*
10143 * We need to create a shadow.
10144 * There are three cases here.
10145 * In the first case, we need to
10146 * complete a deferred symmetrical
10147 * copy that we participated in.
10148 * In the second and third cases,
10149 * we need to create the shadow so
10150 * that changes that we make to the
10151 * object do not interfere with
10152 * any symmetrical copies which
10153 * have occured (case 2) or which
10154 * might occur (case 3).
10155 *
10156 * The first case is when we had
10157 * deferred shadow object creation
10158 * via the entry->needs_copy mechanism.
10159 * This mechanism only works when
10160 * only one entry points to the source
10161 * object, and we are about to create
10162 * a second entry pointing to the
10163 * same object. The problem is that
10164 * there is no way of mapping from
10165 * an object to the entries pointing
10166 * to it. (Deferred shadow creation
10167 * works with one entry because occurs
10168 * at fault time, and we walk from the
10169 * entry to the object when handling
10170 * the fault.)
10171 *
10172 * The second case is when the object
10173 * to be shared has already been copied
10174 * with a symmetric copy, but we point
10175 * directly to the object without
10176 * needs_copy set in our entry. (This
10177 * can happen because different ranges
10178 * of an object can be pointed to by
10179 * different entries. In particular,
10180 * a single entry pointing to an object
10181 * can be split by a call to vm_inherit,
10182 * which, combined with task_create, can
10183 * result in the different entries
10184 * having different needs_copy values.)
10185 * The shadowed flag in the object allows
10186 * us to detect this case. The problem
10187 * with this case is that if this object
10188 * has or will have shadows, then we
10189 * must not perform an asymmetric copy
10190 * of this object, since such a copy
10191 * allows the object to be changed, which
10192 * will break the previous symmetrical
10193 * copies (which rely upon the object
10194 * not changing). In a sense, the shadowed
10195 * flag says "don't change this object".
10196 * We fix this by creating a shadow
10197 * object for this object, and sharing
10198 * that. This works because we are free
10199 * to change the shadow object (and thus
10200 * to use an asymmetric copy strategy);
10201 * this is also semantically correct,
10202 * since this object is temporary, and
10203 * therefore a copy of the object is
10204 * as good as the object itself. (This
10205 * is not true for permanent objects,
10206 * since the pager needs to see changes,
10207 * which won't happen if the changes
10208 * are made to a copy.)
10209 *
10210 * The third case is when the object
10211 * to be shared has parts sticking
10212 * outside of the entry we're working
10213 * with, and thus may in the future
10214 * be subject to a symmetrical copy.
10215 * (This is a preemptive version of
10216 * case 2.)
10217 */
10218 VME_OBJECT_SHADOW(old_entry,
10219 (vm_map_size_t) (old_entry->vme_end -
10220 old_entry->vme_start));
10221
10222 /*
10223 * If we're making a shadow for other than
10224 * copy on write reasons, then we have
10225 * to remove write permission.
10226 */
10227
10228 if (!old_entry->needs_copy &&
10229 (old_entry->protection & VM_PROT_WRITE)) {
10230 vm_prot_t prot;
10231
10232 prot = old_entry->protection & ~VM_PROT_WRITE;
10233
10234 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10235 prot |= VM_PROT_EXECUTE;
10236
10237 if (old_map->mapped_in_other_pmaps) {
10238 vm_object_pmap_protect(
10239 VME_OBJECT(old_entry),
10240 VME_OFFSET(old_entry),
10241 (old_entry->vme_end -
10242 old_entry->vme_start),
10243 PMAP_NULL,
10244 old_entry->vme_start,
10245 prot);
10246 } else {
10247 pmap_protect(old_map->pmap,
10248 old_entry->vme_start,
10249 old_entry->vme_end,
10250 prot);
10251 }
10252 }
10253
10254 old_entry->needs_copy = FALSE;
10255 object = VME_OBJECT(old_entry);
10256 }
10257
10258
10259 /*
10260 * If object was using a symmetric copy strategy,
10261 * change its copy strategy to the default
10262 * asymmetric copy strategy, which is copy_delay
10263 * in the non-norma case and copy_call in the
10264 * norma case. Bump the reference count for the
10265 * new entry.
10266 */
10267
10268 if(old_entry->is_sub_map) {
10269 vm_map_lock(VME_SUBMAP(old_entry));
10270 vm_map_reference(VME_SUBMAP(old_entry));
10271 vm_map_unlock(VME_SUBMAP(old_entry));
10272 } else {
10273 vm_object_lock(object);
10274 vm_object_reference_locked(object);
10275 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10276 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10277 }
10278 vm_object_unlock(object);
10279 }
10280
10281 /*
10282 * Clone the entry, using object ref from above.
10283 * Mark both entries as shared.
10284 */
10285
10286 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10287 * map or descendants */
10288 vm_map_entry_copy(new_entry, old_entry);
10289 old_entry->is_shared = TRUE;
10290 new_entry->is_shared = TRUE;
10291
10292 /*
10293 * Insert the entry into the new map -- we
10294 * know we're inserting at the end of the new
10295 * map.
10296 */
10297
10298 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10299
10300 /*
10301 * Update the physical map
10302 */
10303
10304 if (old_entry->is_sub_map) {
10305 /* Bill Angell pmap support goes here */
10306 } else {
10307 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10308 old_entry->vme_end - old_entry->vme_start,
10309 old_entry->vme_start);
10310 }
10311 }
10312
10313 static boolean_t
10314 vm_map_fork_copy(
10315 vm_map_t old_map,
10316 vm_map_entry_t *old_entry_p,
10317 vm_map_t new_map)
10318 {
10319 vm_map_entry_t old_entry = *old_entry_p;
10320 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10321 vm_map_offset_t start = old_entry->vme_start;
10322 vm_map_copy_t copy;
10323 vm_map_entry_t last = vm_map_last_entry(new_map);
10324
10325 vm_map_unlock(old_map);
10326 /*
10327 * Use maxprot version of copyin because we
10328 * care about whether this memory can ever
10329 * be accessed, not just whether it's accessible
10330 * right now.
10331 */
10332 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
10333 != KERN_SUCCESS) {
10334 /*
10335 * The map might have changed while it
10336 * was unlocked, check it again. Skip
10337 * any blank space or permanently
10338 * unreadable region.
10339 */
10340 vm_map_lock(old_map);
10341 if (!vm_map_lookup_entry(old_map, start, &last) ||
10342 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10343 last = last->vme_next;
10344 }
10345 *old_entry_p = last;
10346
10347 /*
10348 * XXX For some error returns, want to
10349 * XXX skip to the next element. Note
10350 * that INVALID_ADDRESS and
10351 * PROTECTION_FAILURE are handled above.
10352 */
10353
10354 return FALSE;
10355 }
10356
10357 /*
10358 * Insert the copy into the new map
10359 */
10360
10361 vm_map_copy_insert(new_map, last, copy);
10362
10363 /*
10364 * Pick up the traversal at the end of
10365 * the copied region.
10366 */
10367
10368 vm_map_lock(old_map);
10369 start += entry_size;
10370 if (! vm_map_lookup_entry(old_map, start, &last)) {
10371 last = last->vme_next;
10372 } else {
10373 if (last->vme_start == start) {
10374 /*
10375 * No need to clip here and we don't
10376 * want to cause any unnecessary
10377 * unnesting...
10378 */
10379 } else {
10380 vm_map_clip_start(old_map, last, start);
10381 }
10382 }
10383 *old_entry_p = last;
10384
10385 return TRUE;
10386 }
10387
10388 /*
10389 * vm_map_fork:
10390 *
10391 * Create and return a new map based on the old
10392 * map, according to the inheritance values on the
10393 * regions in that map.
10394 *
10395 * The source map must not be locked.
10396 */
10397 vm_map_t
10398 vm_map_fork(
10399 ledger_t ledger,
10400 vm_map_t old_map)
10401 {
10402 pmap_t new_pmap;
10403 vm_map_t new_map;
10404 vm_map_entry_t old_entry;
10405 vm_map_size_t new_size = 0, entry_size;
10406 vm_map_entry_t new_entry;
10407 boolean_t src_needs_copy;
10408 boolean_t new_entry_needs_copy;
10409 boolean_t pmap_is64bit;
10410
10411 pmap_is64bit =
10412 #if defined(__i386__) || defined(__x86_64__)
10413 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10414 #else
10415 #error Unknown architecture.
10416 #endif
10417
10418 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10419
10420 vm_map_reference_swap(old_map);
10421 vm_map_lock(old_map);
10422
10423 new_map = vm_map_create(new_pmap,
10424 old_map->min_offset,
10425 old_map->max_offset,
10426 old_map->hdr.entries_pageable);
10427 /* inherit the parent map's page size */
10428 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10429 for (
10430 old_entry = vm_map_first_entry(old_map);
10431 old_entry != vm_map_to_entry(old_map);
10432 ) {
10433
10434 entry_size = old_entry->vme_end - old_entry->vme_start;
10435
10436 switch (old_entry->inheritance) {
10437 case VM_INHERIT_NONE:
10438 break;
10439
10440 case VM_INHERIT_SHARE:
10441 vm_map_fork_share(old_map, old_entry, new_map);
10442 new_size += entry_size;
10443 break;
10444
10445 case VM_INHERIT_COPY:
10446
10447 /*
10448 * Inline the copy_quickly case;
10449 * upon failure, fall back on call
10450 * to vm_map_fork_copy.
10451 */
10452
10453 if(old_entry->is_sub_map)
10454 break;
10455 if ((old_entry->wired_count != 0) ||
10456 ((VME_OBJECT(old_entry) != NULL) &&
10457 (VME_OBJECT(old_entry)->true_share))) {
10458 goto slow_vm_map_fork_copy;
10459 }
10460
10461 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10462 vm_map_entry_copy(new_entry, old_entry);
10463 if (new_entry->is_sub_map) {
10464 /* clear address space specifics */
10465 new_entry->use_pmap = FALSE;
10466 }
10467
10468 if (! vm_object_copy_quickly(
10469 &VME_OBJECT(new_entry),
10470 VME_OFFSET(old_entry),
10471 (old_entry->vme_end -
10472 old_entry->vme_start),
10473 &src_needs_copy,
10474 &new_entry_needs_copy)) {
10475 vm_map_entry_dispose(new_map, new_entry);
10476 goto slow_vm_map_fork_copy;
10477 }
10478
10479 /*
10480 * Handle copy-on-write obligations
10481 */
10482
10483 if (src_needs_copy && !old_entry->needs_copy) {
10484 vm_prot_t prot;
10485
10486 prot = old_entry->protection & ~VM_PROT_WRITE;
10487
10488 if (override_nx(old_map, VME_ALIAS(old_entry))
10489 && prot)
10490 prot |= VM_PROT_EXECUTE;
10491
10492 vm_object_pmap_protect(
10493 VME_OBJECT(old_entry),
10494 VME_OFFSET(old_entry),
10495 (old_entry->vme_end -
10496 old_entry->vme_start),
10497 ((old_entry->is_shared
10498 || old_map->mapped_in_other_pmaps)
10499 ? PMAP_NULL :
10500 old_map->pmap),
10501 old_entry->vme_start,
10502 prot);
10503
10504 assert(old_entry->wired_count == 0);
10505 old_entry->needs_copy = TRUE;
10506 }
10507 new_entry->needs_copy = new_entry_needs_copy;
10508
10509 /*
10510 * Insert the entry at the end
10511 * of the map.
10512 */
10513
10514 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10515 new_entry);
10516 new_size += entry_size;
10517 break;
10518
10519 slow_vm_map_fork_copy:
10520 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
10521 new_size += entry_size;
10522 }
10523 continue;
10524 }
10525 old_entry = old_entry->vme_next;
10526 }
10527
10528
10529 new_map->size = new_size;
10530 vm_map_unlock(old_map);
10531 vm_map_deallocate(old_map);
10532
10533 return(new_map);
10534 }
10535
10536 /*
10537 * vm_map_exec:
10538 *
10539 * Setup the "new_map" with the proper execution environment according
10540 * to the type of executable (platform, 64bit, chroot environment).
10541 * Map the comm page and shared region, etc...
10542 */
10543 kern_return_t
10544 vm_map_exec(
10545 vm_map_t new_map,
10546 task_t task,
10547 void *fsroot,
10548 cpu_type_t cpu)
10549 {
10550 SHARED_REGION_TRACE_DEBUG(
10551 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10552 (void *)VM_KERNEL_ADDRPERM(current_task()),
10553 (void *)VM_KERNEL_ADDRPERM(new_map),
10554 (void *)VM_KERNEL_ADDRPERM(task),
10555 (void *)VM_KERNEL_ADDRPERM(fsroot),
10556 cpu));
10557 (void) vm_commpage_enter(new_map, task);
10558 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
10559 SHARED_REGION_TRACE_DEBUG(
10560 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10561 (void *)VM_KERNEL_ADDRPERM(current_task()),
10562 (void *)VM_KERNEL_ADDRPERM(new_map),
10563 (void *)VM_KERNEL_ADDRPERM(task),
10564 (void *)VM_KERNEL_ADDRPERM(fsroot),
10565 cpu));
10566 return KERN_SUCCESS;
10567 }
10568
10569 /*
10570 * vm_map_lookup_locked:
10571 *
10572 * Finds the VM object, offset, and
10573 * protection for a given virtual address in the
10574 * specified map, assuming a page fault of the
10575 * type specified.
10576 *
10577 * Returns the (object, offset, protection) for
10578 * this address, whether it is wired down, and whether
10579 * this map has the only reference to the data in question.
10580 * In order to later verify this lookup, a "version"
10581 * is returned.
10582 *
10583 * The map MUST be locked by the caller and WILL be
10584 * locked on exit. In order to guarantee the
10585 * existence of the returned object, it is returned
10586 * locked.
10587 *
10588 * If a lookup is requested with "write protection"
10589 * specified, the map may be changed to perform virtual
10590 * copying operations, although the data referenced will
10591 * remain the same.
10592 */
10593 kern_return_t
10594 vm_map_lookup_locked(
10595 vm_map_t *var_map, /* IN/OUT */
10596 vm_map_offset_t vaddr,
10597 vm_prot_t fault_type,
10598 int object_lock_type,
10599 vm_map_version_t *out_version, /* OUT */
10600 vm_object_t *object, /* OUT */
10601 vm_object_offset_t *offset, /* OUT */
10602 vm_prot_t *out_prot, /* OUT */
10603 boolean_t *wired, /* OUT */
10604 vm_object_fault_info_t fault_info, /* OUT */
10605 vm_map_t *real_map)
10606 {
10607 vm_map_entry_t entry;
10608 register vm_map_t map = *var_map;
10609 vm_map_t old_map = *var_map;
10610 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
10611 vm_map_offset_t cow_parent_vaddr = 0;
10612 vm_map_offset_t old_start = 0;
10613 vm_map_offset_t old_end = 0;
10614 register vm_prot_t prot;
10615 boolean_t mask_protections;
10616 boolean_t force_copy;
10617 vm_prot_t original_fault_type;
10618
10619 /*
10620 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10621 * as a mask against the mapping's actual protections, not as an
10622 * absolute value.
10623 */
10624 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10625 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10626 fault_type &= VM_PROT_ALL;
10627 original_fault_type = fault_type;
10628
10629 *real_map = map;
10630
10631 RetryLookup:
10632 fault_type = original_fault_type;
10633
10634 /*
10635 * If the map has an interesting hint, try it before calling
10636 * full blown lookup routine.
10637 */
10638 entry = map->hint;
10639
10640 if ((entry == vm_map_to_entry(map)) ||
10641 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10642 vm_map_entry_t tmp_entry;
10643
10644 /*
10645 * Entry was either not a valid hint, or the vaddr
10646 * was not contained in the entry, so do a full lookup.
10647 */
10648 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10649 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10650 vm_map_unlock(cow_sub_map_parent);
10651 if((*real_map != map)
10652 && (*real_map != cow_sub_map_parent))
10653 vm_map_unlock(*real_map);
10654 return KERN_INVALID_ADDRESS;
10655 }
10656
10657 entry = tmp_entry;
10658 }
10659 if(map == old_map) {
10660 old_start = entry->vme_start;
10661 old_end = entry->vme_end;
10662 }
10663
10664 /*
10665 * Handle submaps. Drop lock on upper map, submap is
10666 * returned locked.
10667 */
10668
10669 submap_recurse:
10670 if (entry->is_sub_map) {
10671 vm_map_offset_t local_vaddr;
10672 vm_map_offset_t end_delta;
10673 vm_map_offset_t start_delta;
10674 vm_map_entry_t submap_entry;
10675 boolean_t mapped_needs_copy=FALSE;
10676
10677 local_vaddr = vaddr;
10678
10679 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10680 /* if real_map equals map we unlock below */
10681 if ((*real_map != map) &&
10682 (*real_map != cow_sub_map_parent))
10683 vm_map_unlock(*real_map);
10684 *real_map = VME_SUBMAP(entry);
10685 }
10686
10687 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10688 if (!mapped_needs_copy) {
10689 if (vm_map_lock_read_to_write(map)) {
10690 vm_map_lock_read(map);
10691 *real_map = map;
10692 goto RetryLookup;
10693 }
10694 vm_map_lock_read(VME_SUBMAP(entry));
10695 *var_map = VME_SUBMAP(entry);
10696 cow_sub_map_parent = map;
10697 /* reset base to map before cow object */
10698 /* this is the map which will accept */
10699 /* the new cow object */
10700 old_start = entry->vme_start;
10701 old_end = entry->vme_end;
10702 cow_parent_vaddr = vaddr;
10703 mapped_needs_copy = TRUE;
10704 } else {
10705 vm_map_lock_read(VME_SUBMAP(entry));
10706 *var_map = VME_SUBMAP(entry);
10707 if((cow_sub_map_parent != map) &&
10708 (*real_map != map))
10709 vm_map_unlock(map);
10710 }
10711 } else {
10712 vm_map_lock_read(VME_SUBMAP(entry));
10713 *var_map = VME_SUBMAP(entry);
10714 /* leave map locked if it is a target */
10715 /* cow sub_map above otherwise, just */
10716 /* follow the maps down to the object */
10717 /* here we unlock knowing we are not */
10718 /* revisiting the map. */
10719 if((*real_map != map) && (map != cow_sub_map_parent))
10720 vm_map_unlock_read(map);
10721 }
10722
10723 map = *var_map;
10724
10725 /* calculate the offset in the submap for vaddr */
10726 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
10727
10728 RetrySubMap:
10729 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10730 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10731 vm_map_unlock(cow_sub_map_parent);
10732 }
10733 if((*real_map != map)
10734 && (*real_map != cow_sub_map_parent)) {
10735 vm_map_unlock(*real_map);
10736 }
10737 *real_map = map;
10738 return KERN_INVALID_ADDRESS;
10739 }
10740
10741 /* find the attenuated shadow of the underlying object */
10742 /* on our target map */
10743
10744 /* in english the submap object may extend beyond the */
10745 /* region mapped by the entry or, may only fill a portion */
10746 /* of it. For our purposes, we only care if the object */
10747 /* doesn't fill. In this case the area which will */
10748 /* ultimately be clipped in the top map will only need */
10749 /* to be as big as the portion of the underlying entry */
10750 /* which is mapped */
10751 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
10752 submap_entry->vme_start - VME_OFFSET(entry) : 0;
10753
10754 end_delta =
10755 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
10756 submap_entry->vme_end ?
10757 0 : (VME_OFFSET(entry) +
10758 (old_end - old_start))
10759 - submap_entry->vme_end;
10760
10761 old_start += start_delta;
10762 old_end -= end_delta;
10763
10764 if(submap_entry->is_sub_map) {
10765 entry = submap_entry;
10766 vaddr = local_vaddr;
10767 goto submap_recurse;
10768 }
10769
10770 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10771
10772 vm_object_t sub_object, copy_object;
10773 vm_object_offset_t copy_offset;
10774 vm_map_offset_t local_start;
10775 vm_map_offset_t local_end;
10776 boolean_t copied_slowly = FALSE;
10777
10778 if (vm_map_lock_read_to_write(map)) {
10779 vm_map_lock_read(map);
10780 old_start -= start_delta;
10781 old_end += end_delta;
10782 goto RetrySubMap;
10783 }
10784
10785
10786 sub_object = VME_OBJECT(submap_entry);
10787 if (sub_object == VM_OBJECT_NULL) {
10788 sub_object =
10789 vm_object_allocate(
10790 (vm_map_size_t)
10791 (submap_entry->vme_end -
10792 submap_entry->vme_start));
10793 VME_OBJECT_SET(submap_entry, sub_object);
10794 VME_OFFSET_SET(submap_entry, 0);
10795 }
10796 local_start = local_vaddr -
10797 (cow_parent_vaddr - old_start);
10798 local_end = local_vaddr +
10799 (old_end - cow_parent_vaddr);
10800 vm_map_clip_start(map, submap_entry, local_start);
10801 vm_map_clip_end(map, submap_entry, local_end);
10802 if (submap_entry->is_sub_map) {
10803 /* unnesting was done when clipping */
10804 assert(!submap_entry->use_pmap);
10805 }
10806
10807 /* This is the COW case, lets connect */
10808 /* an entry in our space to the underlying */
10809 /* object in the submap, bypassing the */
10810 /* submap. */
10811
10812
10813 if(submap_entry->wired_count != 0 ||
10814 (sub_object->copy_strategy ==
10815 MEMORY_OBJECT_COPY_NONE)) {
10816 vm_object_lock(sub_object);
10817 vm_object_copy_slowly(sub_object,
10818 VME_OFFSET(submap_entry),
10819 (submap_entry->vme_end -
10820 submap_entry->vme_start),
10821 FALSE,
10822 &copy_object);
10823 copied_slowly = TRUE;
10824 } else {
10825
10826 /* set up shadow object */
10827 copy_object = sub_object;
10828 vm_object_reference(copy_object);
10829 sub_object->shadowed = TRUE;
10830 assert(submap_entry->wired_count == 0);
10831 submap_entry->needs_copy = TRUE;
10832
10833 prot = submap_entry->protection & ~VM_PROT_WRITE;
10834
10835 if (override_nx(old_map,
10836 VME_ALIAS(submap_entry))
10837 && prot)
10838 prot |= VM_PROT_EXECUTE;
10839
10840 vm_object_pmap_protect(
10841 sub_object,
10842 VME_OFFSET(submap_entry),
10843 submap_entry->vme_end -
10844 submap_entry->vme_start,
10845 (submap_entry->is_shared
10846 || map->mapped_in_other_pmaps) ?
10847 PMAP_NULL : map->pmap,
10848 submap_entry->vme_start,
10849 prot);
10850 }
10851
10852 /*
10853 * Adjust the fault offset to the submap entry.
10854 */
10855 copy_offset = (local_vaddr -
10856 submap_entry->vme_start +
10857 VME_OFFSET(submap_entry));
10858
10859 /* This works diffently than the */
10860 /* normal submap case. We go back */
10861 /* to the parent of the cow map and*/
10862 /* clip out the target portion of */
10863 /* the sub_map, substituting the */
10864 /* new copy object, */
10865
10866 vm_map_unlock(map);
10867 local_start = old_start;
10868 local_end = old_end;
10869 map = cow_sub_map_parent;
10870 *var_map = cow_sub_map_parent;
10871 vaddr = cow_parent_vaddr;
10872 cow_sub_map_parent = NULL;
10873
10874 if(!vm_map_lookup_entry(map,
10875 vaddr, &entry)) {
10876 vm_object_deallocate(
10877 copy_object);
10878 vm_map_lock_write_to_read(map);
10879 return KERN_INVALID_ADDRESS;
10880 }
10881
10882 /* clip out the portion of space */
10883 /* mapped by the sub map which */
10884 /* corresponds to the underlying */
10885 /* object */
10886
10887 /*
10888 * Clip (and unnest) the smallest nested chunk
10889 * possible around the faulting address...
10890 */
10891 local_start = vaddr & ~(pmap_nesting_size_min - 1);
10892 local_end = local_start + pmap_nesting_size_min;
10893 /*
10894 * ... but don't go beyond the "old_start" to "old_end"
10895 * range, to avoid spanning over another VM region
10896 * with a possibly different VM object and/or offset.
10897 */
10898 if (local_start < old_start) {
10899 local_start = old_start;
10900 }
10901 if (local_end > old_end) {
10902 local_end = old_end;
10903 }
10904 /*
10905 * Adjust copy_offset to the start of the range.
10906 */
10907 copy_offset -= (vaddr - local_start);
10908
10909 vm_map_clip_start(map, entry, local_start);
10910 vm_map_clip_end(map, entry, local_end);
10911 if (entry->is_sub_map) {
10912 /* unnesting was done when clipping */
10913 assert(!entry->use_pmap);
10914 }
10915
10916 /* substitute copy object for */
10917 /* shared map entry */
10918 vm_map_deallocate(VME_SUBMAP(entry));
10919 assert(!entry->iokit_acct);
10920 entry->is_sub_map = FALSE;
10921 entry->use_pmap = TRUE;
10922 VME_OBJECT_SET(entry, copy_object);
10923
10924 /* propagate the submap entry's protections */
10925 entry->protection |= submap_entry->protection;
10926 entry->max_protection |= submap_entry->max_protection;
10927
10928 if(copied_slowly) {
10929 VME_OFFSET_SET(entry, local_start - old_start);
10930 entry->needs_copy = FALSE;
10931 entry->is_shared = FALSE;
10932 } else {
10933 VME_OFFSET_SET(entry, copy_offset);
10934 assert(entry->wired_count == 0);
10935 entry->needs_copy = TRUE;
10936 if(entry->inheritance == VM_INHERIT_SHARE)
10937 entry->inheritance = VM_INHERIT_COPY;
10938 if (map != old_map)
10939 entry->is_shared = TRUE;
10940 }
10941 if(entry->inheritance == VM_INHERIT_SHARE)
10942 entry->inheritance = VM_INHERIT_COPY;
10943
10944 vm_map_lock_write_to_read(map);
10945 } else {
10946 if((cow_sub_map_parent)
10947 && (cow_sub_map_parent != *real_map)
10948 && (cow_sub_map_parent != map)) {
10949 vm_map_unlock(cow_sub_map_parent);
10950 }
10951 entry = submap_entry;
10952 vaddr = local_vaddr;
10953 }
10954 }
10955
10956 /*
10957 * Check whether this task is allowed to have
10958 * this page.
10959 */
10960
10961 prot = entry->protection;
10962
10963 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
10964 /*
10965 * HACK -- if not a stack, then allow execution
10966 */
10967 prot |= VM_PROT_EXECUTE;
10968 }
10969
10970 if (mask_protections) {
10971 fault_type &= prot;
10972 if (fault_type == VM_PROT_NONE) {
10973 goto protection_failure;
10974 }
10975 }
10976 if ((fault_type & (prot)) != fault_type) {
10977 protection_failure:
10978 if (*real_map != map) {
10979 vm_map_unlock(*real_map);
10980 }
10981 *real_map = map;
10982
10983 if ((fault_type & VM_PROT_EXECUTE) && prot)
10984 log_stack_execution_failure((addr64_t)vaddr, prot);
10985
10986 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10987 return KERN_PROTECTION_FAILURE;
10988 }
10989
10990 /*
10991 * If this page is not pageable, we have to get
10992 * it for all possible accesses.
10993 */
10994
10995 *wired = (entry->wired_count != 0);
10996 if (*wired)
10997 fault_type = prot;
10998
10999 /*
11000 * If the entry was copy-on-write, we either ...
11001 */
11002
11003 if (entry->needs_copy) {
11004 /*
11005 * If we want to write the page, we may as well
11006 * handle that now since we've got the map locked.
11007 *
11008 * If we don't need to write the page, we just
11009 * demote the permissions allowed.
11010 */
11011
11012 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11013 /*
11014 * Make a new object, and place it in the
11015 * object chain. Note that no new references
11016 * have appeared -- one just moved from the
11017 * map to the new object.
11018 */
11019
11020 if (vm_map_lock_read_to_write(map)) {
11021 vm_map_lock_read(map);
11022 goto RetryLookup;
11023 }
11024 VME_OBJECT_SHADOW(entry,
11025 (vm_map_size_t) (entry->vme_end -
11026 entry->vme_start));
11027
11028 VME_OBJECT(entry)->shadowed = TRUE;
11029 entry->needs_copy = FALSE;
11030 vm_map_lock_write_to_read(map);
11031 }
11032 else {
11033 /*
11034 * We're attempting to read a copy-on-write
11035 * page -- don't allow writes.
11036 */
11037
11038 prot &= (~VM_PROT_WRITE);
11039 }
11040 }
11041
11042 /*
11043 * Create an object if necessary.
11044 */
11045 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11046
11047 if (vm_map_lock_read_to_write(map)) {
11048 vm_map_lock_read(map);
11049 goto RetryLookup;
11050 }
11051
11052 VME_OBJECT_SET(entry,
11053 vm_object_allocate(
11054 (vm_map_size_t)(entry->vme_end -
11055 entry->vme_start)));
11056 VME_OFFSET_SET(entry, 0);
11057 vm_map_lock_write_to_read(map);
11058 }
11059
11060 /*
11061 * Return the object/offset from this entry. If the entry
11062 * was copy-on-write or empty, it has been fixed up. Also
11063 * return the protection.
11064 */
11065
11066 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11067 *object = VME_OBJECT(entry);
11068 *out_prot = prot;
11069
11070 if (fault_info) {
11071 fault_info->interruptible = THREAD_UNINT; /* for now... */
11072 /* ... the caller will change "interruptible" if needed */
11073 fault_info->cluster_size = 0;
11074 fault_info->user_tag = VME_ALIAS(entry);
11075 fault_info->pmap_options = 0;
11076 if (entry->iokit_acct ||
11077 (!entry->is_sub_map && !entry->use_pmap)) {
11078 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11079 }
11080 fault_info->behavior = entry->behavior;
11081 fault_info->lo_offset = VME_OFFSET(entry);
11082 fault_info->hi_offset =
11083 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11084 fault_info->no_cache = entry->no_cache;
11085 fault_info->stealth = FALSE;
11086 fault_info->io_sync = FALSE;
11087 if (entry->used_for_jit ||
11088 entry->vme_resilient_codesign) {
11089 fault_info->cs_bypass = TRUE;
11090 } else {
11091 fault_info->cs_bypass = FALSE;
11092 }
11093 fault_info->mark_zf_absent = FALSE;
11094 fault_info->batch_pmap_op = FALSE;
11095 }
11096
11097 /*
11098 * Lock the object to prevent it from disappearing
11099 */
11100 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11101 vm_object_lock(*object);
11102 else
11103 vm_object_lock_shared(*object);
11104
11105 /*
11106 * Save the version number
11107 */
11108
11109 out_version->main_timestamp = map->timestamp;
11110
11111 return KERN_SUCCESS;
11112 }
11113
11114
11115 /*
11116 * vm_map_verify:
11117 *
11118 * Verifies that the map in question has not changed
11119 * since the given version. If successful, the map
11120 * will not change until vm_map_verify_done() is called.
11121 */
11122 boolean_t
11123 vm_map_verify(
11124 register vm_map_t map,
11125 register vm_map_version_t *version) /* REF */
11126 {
11127 boolean_t result;
11128
11129 vm_map_lock_read(map);
11130 result = (map->timestamp == version->main_timestamp);
11131
11132 if (!result)
11133 vm_map_unlock_read(map);
11134
11135 return(result);
11136 }
11137
11138 /*
11139 * vm_map_verify_done:
11140 *
11141 * Releases locks acquired by a vm_map_verify.
11142 *
11143 * This is now a macro in vm/vm_map.h. It does a
11144 * vm_map_unlock_read on the map.
11145 */
11146
11147
11148 /*
11149 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11150 * Goes away after regular vm_region_recurse function migrates to
11151 * 64 bits
11152 * vm_region_recurse: A form of vm_region which follows the
11153 * submaps in a target map
11154 *
11155 */
11156
11157 kern_return_t
11158 vm_map_region_recurse_64(
11159 vm_map_t map,
11160 vm_map_offset_t *address, /* IN/OUT */
11161 vm_map_size_t *size, /* OUT */
11162 natural_t *nesting_depth, /* IN/OUT */
11163 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11164 mach_msg_type_number_t *count) /* IN/OUT */
11165 {
11166 mach_msg_type_number_t original_count;
11167 vm_region_extended_info_data_t extended;
11168 vm_map_entry_t tmp_entry;
11169 vm_map_offset_t user_address;
11170 unsigned int user_max_depth;
11171
11172 /*
11173 * "curr_entry" is the VM map entry preceding or including the
11174 * address we're looking for.
11175 * "curr_map" is the map or sub-map containing "curr_entry".
11176 * "curr_address" is the equivalent of the top map's "user_address"
11177 * in the current map.
11178 * "curr_offset" is the cumulated offset of "curr_map" in the
11179 * target task's address space.
11180 * "curr_depth" is the depth of "curr_map" in the chain of
11181 * sub-maps.
11182 *
11183 * "curr_max_below" and "curr_max_above" limit the range (around
11184 * "curr_address") we should take into account in the current (sub)map.
11185 * They limit the range to what's visible through the map entries
11186 * we've traversed from the top map to the current map.
11187
11188 */
11189 vm_map_entry_t curr_entry;
11190 vm_map_address_t curr_address;
11191 vm_map_offset_t curr_offset;
11192 vm_map_t curr_map;
11193 unsigned int curr_depth;
11194 vm_map_offset_t curr_max_below, curr_max_above;
11195 vm_map_offset_t curr_skip;
11196
11197 /*
11198 * "next_" is the same as "curr_" but for the VM region immediately
11199 * after the address we're looking for. We need to keep track of this
11200 * too because we want to return info about that region if the
11201 * address we're looking for is not mapped.
11202 */
11203 vm_map_entry_t next_entry;
11204 vm_map_offset_t next_offset;
11205 vm_map_offset_t next_address;
11206 vm_map_t next_map;
11207 unsigned int next_depth;
11208 vm_map_offset_t next_max_below, next_max_above;
11209 vm_map_offset_t next_skip;
11210
11211 boolean_t look_for_pages;
11212 vm_region_submap_short_info_64_t short_info;
11213
11214 if (map == VM_MAP_NULL) {
11215 /* no address space to work on */
11216 return KERN_INVALID_ARGUMENT;
11217 }
11218
11219
11220 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11221 /*
11222 * "info" structure is not big enough and
11223 * would overflow
11224 */
11225 return KERN_INVALID_ARGUMENT;
11226 }
11227
11228 original_count = *count;
11229
11230 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11231 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11232 look_for_pages = FALSE;
11233 short_info = (vm_region_submap_short_info_64_t) submap_info;
11234 submap_info = NULL;
11235 } else {
11236 look_for_pages = TRUE;
11237 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11238 short_info = NULL;
11239
11240 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11241 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11242 }
11243 }
11244
11245 user_address = *address;
11246 user_max_depth = *nesting_depth;
11247
11248 if (not_in_kdp) {
11249 vm_map_lock_read(map);
11250 }
11251
11252 recurse_again:
11253 curr_entry = NULL;
11254 curr_map = map;
11255 curr_address = user_address;
11256 curr_offset = 0;
11257 curr_skip = 0;
11258 curr_depth = 0;
11259 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11260 curr_max_below = curr_address;
11261
11262 next_entry = NULL;
11263 next_map = NULL;
11264 next_address = 0;
11265 next_offset = 0;
11266 next_skip = 0;
11267 next_depth = 0;
11268 next_max_above = (vm_map_offset_t) -1;
11269 next_max_below = (vm_map_offset_t) -1;
11270
11271 for (;;) {
11272 if (vm_map_lookup_entry(curr_map,
11273 curr_address,
11274 &tmp_entry)) {
11275 /* tmp_entry contains the address we're looking for */
11276 curr_entry = tmp_entry;
11277 } else {
11278 vm_map_offset_t skip;
11279 /*
11280 * The address is not mapped. "tmp_entry" is the
11281 * map entry preceding the address. We want the next
11282 * one, if it exists.
11283 */
11284 curr_entry = tmp_entry->vme_next;
11285
11286 if (curr_entry == vm_map_to_entry(curr_map) ||
11287 (curr_entry->vme_start >=
11288 curr_address + curr_max_above)) {
11289 /* no next entry at this level: stop looking */
11290 if (not_in_kdp) {
11291 vm_map_unlock_read(curr_map);
11292 }
11293 curr_entry = NULL;
11294 curr_map = NULL;
11295 curr_skip = 0;
11296 curr_offset = 0;
11297 curr_depth = 0;
11298 curr_max_above = 0;
11299 curr_max_below = 0;
11300 break;
11301 }
11302
11303 /* adjust current address and offset */
11304 skip = curr_entry->vme_start - curr_address;
11305 curr_address = curr_entry->vme_start;
11306 curr_skip += skip;
11307 curr_offset += skip;
11308 curr_max_above -= skip;
11309 curr_max_below = 0;
11310 }
11311
11312 /*
11313 * Is the next entry at this level closer to the address (or
11314 * deeper in the submap chain) than the one we had
11315 * so far ?
11316 */
11317 tmp_entry = curr_entry->vme_next;
11318 if (tmp_entry == vm_map_to_entry(curr_map)) {
11319 /* no next entry at this level */
11320 } else if (tmp_entry->vme_start >=
11321 curr_address + curr_max_above) {
11322 /*
11323 * tmp_entry is beyond the scope of what we mapped of
11324 * this submap in the upper level: ignore it.
11325 */
11326 } else if ((next_entry == NULL) ||
11327 (tmp_entry->vme_start + curr_offset <=
11328 next_entry->vme_start + next_offset)) {
11329 /*
11330 * We didn't have a "next_entry" or this one is
11331 * closer to the address we're looking for:
11332 * use this "tmp_entry" as the new "next_entry".
11333 */
11334 if (next_entry != NULL) {
11335 /* unlock the last "next_map" */
11336 if (next_map != curr_map && not_in_kdp) {
11337 vm_map_unlock_read(next_map);
11338 }
11339 }
11340 next_entry = tmp_entry;
11341 next_map = curr_map;
11342 next_depth = curr_depth;
11343 next_address = next_entry->vme_start;
11344 next_skip = curr_skip;
11345 next_skip += (next_address - curr_address);
11346 next_offset = curr_offset;
11347 next_offset += (next_address - curr_address);
11348 next_max_above = MIN(next_max_above, curr_max_above);
11349 next_max_above = MIN(next_max_above,
11350 next_entry->vme_end - next_address);
11351 next_max_below = MIN(next_max_below, curr_max_below);
11352 next_max_below = MIN(next_max_below,
11353 next_address - next_entry->vme_start);
11354 }
11355
11356 /*
11357 * "curr_max_{above,below}" allow us to keep track of the
11358 * portion of the submap that is actually mapped at this level:
11359 * the rest of that submap is irrelevant to us, since it's not
11360 * mapped here.
11361 * The relevant portion of the map starts at
11362 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11363 */
11364 curr_max_above = MIN(curr_max_above,
11365 curr_entry->vme_end - curr_address);
11366 curr_max_below = MIN(curr_max_below,
11367 curr_address - curr_entry->vme_start);
11368
11369 if (!curr_entry->is_sub_map ||
11370 curr_depth >= user_max_depth) {
11371 /*
11372 * We hit a leaf map or we reached the maximum depth
11373 * we could, so stop looking. Keep the current map
11374 * locked.
11375 */
11376 break;
11377 }
11378
11379 /*
11380 * Get down to the next submap level.
11381 */
11382
11383 /*
11384 * Lock the next level and unlock the current level,
11385 * unless we need to keep it locked to access the "next_entry"
11386 * later.
11387 */
11388 if (not_in_kdp) {
11389 vm_map_lock_read(VME_SUBMAP(curr_entry));
11390 }
11391 if (curr_map == next_map) {
11392 /* keep "next_map" locked in case we need it */
11393 } else {
11394 /* release this map */
11395 if (not_in_kdp)
11396 vm_map_unlock_read(curr_map);
11397 }
11398
11399 /*
11400 * Adjust the offset. "curr_entry" maps the submap
11401 * at relative address "curr_entry->vme_start" in the
11402 * curr_map but skips the first "VME_OFFSET(curr_entry)"
11403 * bytes of the submap.
11404 * "curr_offset" always represents the offset of a virtual
11405 * address in the curr_map relative to the absolute address
11406 * space (i.e. the top-level VM map).
11407 */
11408 curr_offset +=
11409 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11410 curr_address = user_address + curr_offset;
11411 /* switch to the submap */
11412 curr_map = VME_SUBMAP(curr_entry);
11413 curr_depth++;
11414 curr_entry = NULL;
11415 }
11416
11417 if (curr_entry == NULL) {
11418 /* no VM region contains the address... */
11419 if (next_entry == NULL) {
11420 /* ... and no VM region follows it either */
11421 return KERN_INVALID_ADDRESS;
11422 }
11423 /* ... gather info about the next VM region */
11424 curr_entry = next_entry;
11425 curr_map = next_map; /* still locked ... */
11426 curr_address = next_address;
11427 curr_skip = next_skip;
11428 curr_offset = next_offset;
11429 curr_depth = next_depth;
11430 curr_max_above = next_max_above;
11431 curr_max_below = next_max_below;
11432 } else {
11433 /* we won't need "next_entry" after all */
11434 if (next_entry != NULL) {
11435 /* release "next_map" */
11436 if (next_map != curr_map && not_in_kdp) {
11437 vm_map_unlock_read(next_map);
11438 }
11439 }
11440 }
11441 next_entry = NULL;
11442 next_map = NULL;
11443 next_offset = 0;
11444 next_skip = 0;
11445 next_depth = 0;
11446 next_max_below = -1;
11447 next_max_above = -1;
11448
11449 if (curr_entry->is_sub_map &&
11450 curr_depth < user_max_depth) {
11451 /*
11452 * We're not as deep as we could be: we must have
11453 * gone back up after not finding anything mapped
11454 * below the original top-level map entry's.
11455 * Let's move "curr_address" forward and recurse again.
11456 */
11457 user_address = curr_address;
11458 goto recurse_again;
11459 }
11460
11461 *nesting_depth = curr_depth;
11462 *size = curr_max_above + curr_max_below;
11463 *address = user_address + curr_skip - curr_max_below;
11464
11465 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11466 // so probably should be a real 32b ID vs. ptr.
11467 // Current users just check for equality
11468 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11469
11470 if (look_for_pages) {
11471 submap_info->user_tag = VME_ALIAS(curr_entry);
11472 submap_info->offset = VME_OFFSET(curr_entry);
11473 submap_info->protection = curr_entry->protection;
11474 submap_info->inheritance = curr_entry->inheritance;
11475 submap_info->max_protection = curr_entry->max_protection;
11476 submap_info->behavior = curr_entry->behavior;
11477 submap_info->user_wired_count = curr_entry->user_wired_count;
11478 submap_info->is_submap = curr_entry->is_sub_map;
11479 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11480 } else {
11481 short_info->user_tag = VME_ALIAS(curr_entry);
11482 short_info->offset = VME_OFFSET(curr_entry);
11483 short_info->protection = curr_entry->protection;
11484 short_info->inheritance = curr_entry->inheritance;
11485 short_info->max_protection = curr_entry->max_protection;
11486 short_info->behavior = curr_entry->behavior;
11487 short_info->user_wired_count = curr_entry->user_wired_count;
11488 short_info->is_submap = curr_entry->is_sub_map;
11489 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11490 }
11491
11492 extended.pages_resident = 0;
11493 extended.pages_swapped_out = 0;
11494 extended.pages_shared_now_private = 0;
11495 extended.pages_dirtied = 0;
11496 extended.pages_reusable = 0;
11497 extended.external_pager = 0;
11498 extended.shadow_depth = 0;
11499 extended.share_mode = SM_EMPTY;
11500 extended.ref_count = 0;
11501
11502 if (not_in_kdp) {
11503 if (!curr_entry->is_sub_map) {
11504 vm_map_offset_t range_start, range_end;
11505 range_start = MAX((curr_address - curr_max_below),
11506 curr_entry->vme_start);
11507 range_end = MIN((curr_address + curr_max_above),
11508 curr_entry->vme_end);
11509 vm_map_region_walk(curr_map,
11510 range_start,
11511 curr_entry,
11512 (VME_OFFSET(curr_entry) +
11513 (range_start -
11514 curr_entry->vme_start)),
11515 range_end - range_start,
11516 &extended,
11517 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11518 if (extended.external_pager &&
11519 extended.ref_count == 2 &&
11520 extended.share_mode == SM_SHARED) {
11521 extended.share_mode = SM_PRIVATE;
11522 }
11523 } else {
11524 if (curr_entry->use_pmap) {
11525 extended.share_mode = SM_TRUESHARED;
11526 } else {
11527 extended.share_mode = SM_PRIVATE;
11528 }
11529 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11530 }
11531 }
11532
11533 if (look_for_pages) {
11534 submap_info->pages_resident = extended.pages_resident;
11535 submap_info->pages_swapped_out = extended.pages_swapped_out;
11536 submap_info->pages_shared_now_private =
11537 extended.pages_shared_now_private;
11538 submap_info->pages_dirtied = extended.pages_dirtied;
11539 submap_info->external_pager = extended.external_pager;
11540 submap_info->shadow_depth = extended.shadow_depth;
11541 submap_info->share_mode = extended.share_mode;
11542 submap_info->ref_count = extended.ref_count;
11543
11544 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11545 submap_info->pages_reusable = extended.pages_reusable;
11546 }
11547 } else {
11548 short_info->external_pager = extended.external_pager;
11549 short_info->shadow_depth = extended.shadow_depth;
11550 short_info->share_mode = extended.share_mode;
11551 short_info->ref_count = extended.ref_count;
11552 }
11553
11554 if (not_in_kdp) {
11555 vm_map_unlock_read(curr_map);
11556 }
11557
11558 return KERN_SUCCESS;
11559 }
11560
11561 /*
11562 * vm_region:
11563 *
11564 * User call to obtain information about a region in
11565 * a task's address map. Currently, only one flavor is
11566 * supported.
11567 *
11568 * XXX The reserved and behavior fields cannot be filled
11569 * in until the vm merge from the IK is completed, and
11570 * vm_reserve is implemented.
11571 */
11572
11573 kern_return_t
11574 vm_map_region(
11575 vm_map_t map,
11576 vm_map_offset_t *address, /* IN/OUT */
11577 vm_map_size_t *size, /* OUT */
11578 vm_region_flavor_t flavor, /* IN */
11579 vm_region_info_t info, /* OUT */
11580 mach_msg_type_number_t *count, /* IN/OUT */
11581 mach_port_t *object_name) /* OUT */
11582 {
11583 vm_map_entry_t tmp_entry;
11584 vm_map_entry_t entry;
11585 vm_map_offset_t start;
11586
11587 if (map == VM_MAP_NULL)
11588 return(KERN_INVALID_ARGUMENT);
11589
11590 switch (flavor) {
11591
11592 case VM_REGION_BASIC_INFO:
11593 /* legacy for old 32-bit objects info */
11594 {
11595 vm_region_basic_info_t basic;
11596
11597 if (*count < VM_REGION_BASIC_INFO_COUNT)
11598 return(KERN_INVALID_ARGUMENT);
11599
11600 basic = (vm_region_basic_info_t) info;
11601 *count = VM_REGION_BASIC_INFO_COUNT;
11602
11603 vm_map_lock_read(map);
11604
11605 start = *address;
11606 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11607 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11608 vm_map_unlock_read(map);
11609 return(KERN_INVALID_ADDRESS);
11610 }
11611 } else {
11612 entry = tmp_entry;
11613 }
11614
11615 start = entry->vme_start;
11616
11617 basic->offset = (uint32_t)VME_OFFSET(entry);
11618 basic->protection = entry->protection;
11619 basic->inheritance = entry->inheritance;
11620 basic->max_protection = entry->max_protection;
11621 basic->behavior = entry->behavior;
11622 basic->user_wired_count = entry->user_wired_count;
11623 basic->reserved = entry->is_sub_map;
11624 *address = start;
11625 *size = (entry->vme_end - start);
11626
11627 if (object_name) *object_name = IP_NULL;
11628 if (entry->is_sub_map) {
11629 basic->shared = FALSE;
11630 } else {
11631 basic->shared = entry->is_shared;
11632 }
11633
11634 vm_map_unlock_read(map);
11635 return(KERN_SUCCESS);
11636 }
11637
11638 case VM_REGION_BASIC_INFO_64:
11639 {
11640 vm_region_basic_info_64_t basic;
11641
11642 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11643 return(KERN_INVALID_ARGUMENT);
11644
11645 basic = (vm_region_basic_info_64_t) info;
11646 *count = VM_REGION_BASIC_INFO_COUNT_64;
11647
11648 vm_map_lock_read(map);
11649
11650 start = *address;
11651 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11652 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11653 vm_map_unlock_read(map);
11654 return(KERN_INVALID_ADDRESS);
11655 }
11656 } else {
11657 entry = tmp_entry;
11658 }
11659
11660 start = entry->vme_start;
11661
11662 basic->offset = VME_OFFSET(entry);
11663 basic->protection = entry->protection;
11664 basic->inheritance = entry->inheritance;
11665 basic->max_protection = entry->max_protection;
11666 basic->behavior = entry->behavior;
11667 basic->user_wired_count = entry->user_wired_count;
11668 basic->reserved = entry->is_sub_map;
11669 *address = start;
11670 *size = (entry->vme_end - start);
11671
11672 if (object_name) *object_name = IP_NULL;
11673 if (entry->is_sub_map) {
11674 basic->shared = FALSE;
11675 } else {
11676 basic->shared = entry->is_shared;
11677 }
11678
11679 vm_map_unlock_read(map);
11680 return(KERN_SUCCESS);
11681 }
11682 case VM_REGION_EXTENDED_INFO:
11683 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11684 return(KERN_INVALID_ARGUMENT);
11685 /*fallthru*/
11686 case VM_REGION_EXTENDED_INFO__legacy:
11687 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11688 return KERN_INVALID_ARGUMENT;
11689
11690 {
11691 vm_region_extended_info_t extended;
11692 mach_msg_type_number_t original_count;
11693
11694 extended = (vm_region_extended_info_t) info;
11695
11696 vm_map_lock_read(map);
11697
11698 start = *address;
11699 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11700 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11701 vm_map_unlock_read(map);
11702 return(KERN_INVALID_ADDRESS);
11703 }
11704 } else {
11705 entry = tmp_entry;
11706 }
11707 start = entry->vme_start;
11708
11709 extended->protection = entry->protection;
11710 extended->user_tag = VME_ALIAS(entry);
11711 extended->pages_resident = 0;
11712 extended->pages_swapped_out = 0;
11713 extended->pages_shared_now_private = 0;
11714 extended->pages_dirtied = 0;
11715 extended->external_pager = 0;
11716 extended->shadow_depth = 0;
11717
11718 original_count = *count;
11719 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11720 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11721 } else {
11722 extended->pages_reusable = 0;
11723 *count = VM_REGION_EXTENDED_INFO_COUNT;
11724 }
11725
11726 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
11727
11728 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11729 extended->share_mode = SM_PRIVATE;
11730
11731 if (object_name)
11732 *object_name = IP_NULL;
11733 *address = start;
11734 *size = (entry->vme_end - start);
11735
11736 vm_map_unlock_read(map);
11737 return(KERN_SUCCESS);
11738 }
11739 case VM_REGION_TOP_INFO:
11740 {
11741 vm_region_top_info_t top;
11742
11743 if (*count < VM_REGION_TOP_INFO_COUNT)
11744 return(KERN_INVALID_ARGUMENT);
11745
11746 top = (vm_region_top_info_t) info;
11747 *count = VM_REGION_TOP_INFO_COUNT;
11748
11749 vm_map_lock_read(map);
11750
11751 start = *address;
11752 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11753 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11754 vm_map_unlock_read(map);
11755 return(KERN_INVALID_ADDRESS);
11756 }
11757 } else {
11758 entry = tmp_entry;
11759
11760 }
11761 start = entry->vme_start;
11762
11763 top->private_pages_resident = 0;
11764 top->shared_pages_resident = 0;
11765
11766 vm_map_region_top_walk(entry, top);
11767
11768 if (object_name)
11769 *object_name = IP_NULL;
11770 *address = start;
11771 *size = (entry->vme_end - start);
11772
11773 vm_map_unlock_read(map);
11774 return(KERN_SUCCESS);
11775 }
11776 default:
11777 return(KERN_INVALID_ARGUMENT);
11778 }
11779 }
11780
11781 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
11782 MIN((entry_size), \
11783 ((obj)->all_reusable ? \
11784 (obj)->wired_page_count : \
11785 (obj)->resident_page_count - (obj)->reusable_page_count))
11786
11787 void
11788 vm_map_region_top_walk(
11789 vm_map_entry_t entry,
11790 vm_region_top_info_t top)
11791 {
11792
11793 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
11794 top->share_mode = SM_EMPTY;
11795 top->ref_count = 0;
11796 top->obj_id = 0;
11797 return;
11798 }
11799
11800 {
11801 struct vm_object *obj, *tmp_obj;
11802 int ref_count;
11803 uint32_t entry_size;
11804
11805 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11806
11807 obj = VME_OBJECT(entry);
11808
11809 vm_object_lock(obj);
11810
11811 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11812 ref_count--;
11813
11814 assert(obj->reusable_page_count <= obj->resident_page_count);
11815 if (obj->shadow) {
11816 if (ref_count == 1)
11817 top->private_pages_resident =
11818 OBJ_RESIDENT_COUNT(obj, entry_size);
11819 else
11820 top->shared_pages_resident =
11821 OBJ_RESIDENT_COUNT(obj, entry_size);
11822 top->ref_count = ref_count;
11823 top->share_mode = SM_COW;
11824
11825 while ((tmp_obj = obj->shadow)) {
11826 vm_object_lock(tmp_obj);
11827 vm_object_unlock(obj);
11828 obj = tmp_obj;
11829
11830 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11831 ref_count--;
11832
11833 assert(obj->reusable_page_count <= obj->resident_page_count);
11834 top->shared_pages_resident +=
11835 OBJ_RESIDENT_COUNT(obj, entry_size);
11836 top->ref_count += ref_count - 1;
11837 }
11838 } else {
11839 if (entry->superpage_size) {
11840 top->share_mode = SM_LARGE_PAGE;
11841 top->shared_pages_resident = 0;
11842 top->private_pages_resident = entry_size;
11843 } else if (entry->needs_copy) {
11844 top->share_mode = SM_COW;
11845 top->shared_pages_resident =
11846 OBJ_RESIDENT_COUNT(obj, entry_size);
11847 } else {
11848 if (ref_count == 1 ||
11849 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11850 top->share_mode = SM_PRIVATE;
11851 top->private_pages_resident =
11852 OBJ_RESIDENT_COUNT(obj,
11853 entry_size);
11854 } else {
11855 top->share_mode = SM_SHARED;
11856 top->shared_pages_resident =
11857 OBJ_RESIDENT_COUNT(obj,
11858 entry_size);
11859 }
11860 }
11861 top->ref_count = ref_count;
11862 }
11863 /* XXX K64: obj_id will be truncated */
11864 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11865
11866 vm_object_unlock(obj);
11867 }
11868 }
11869
11870 void
11871 vm_map_region_walk(
11872 vm_map_t map,
11873 vm_map_offset_t va,
11874 vm_map_entry_t entry,
11875 vm_object_offset_t offset,
11876 vm_object_size_t range,
11877 vm_region_extended_info_t extended,
11878 boolean_t look_for_pages,
11879 mach_msg_type_number_t count)
11880 {
11881 register struct vm_object *obj, *tmp_obj;
11882 register vm_map_offset_t last_offset;
11883 register int i;
11884 register int ref_count;
11885 struct vm_object *shadow_object;
11886 int shadow_depth;
11887
11888 if ((VME_OBJECT(entry) == 0) ||
11889 (entry->is_sub_map) ||
11890 (VME_OBJECT(entry)->phys_contiguous &&
11891 !entry->superpage_size)) {
11892 extended->share_mode = SM_EMPTY;
11893 extended->ref_count = 0;
11894 return;
11895 }
11896
11897 if (entry->superpage_size) {
11898 extended->shadow_depth = 0;
11899 extended->share_mode = SM_LARGE_PAGE;
11900 extended->ref_count = 1;
11901 extended->external_pager = 0;
11902 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11903 extended->shadow_depth = 0;
11904 return;
11905 }
11906
11907 {
11908 obj = VME_OBJECT(entry);
11909
11910 vm_object_lock(obj);
11911
11912 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11913 ref_count--;
11914
11915 if (look_for_pages) {
11916 for (last_offset = offset + range;
11917 offset < last_offset;
11918 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11919 vm_map_region_look_for_page(map, va, obj,
11920 offset, ref_count,
11921 0, extended, count);
11922 }
11923 } else {
11924 shadow_object = obj->shadow;
11925 shadow_depth = 0;
11926
11927 if ( !(obj->pager_trusted) && !(obj->internal))
11928 extended->external_pager = 1;
11929
11930 if (shadow_object != VM_OBJECT_NULL) {
11931 vm_object_lock(shadow_object);
11932 for (;
11933 shadow_object != VM_OBJECT_NULL;
11934 shadow_depth++) {
11935 vm_object_t next_shadow;
11936
11937 if ( !(shadow_object->pager_trusted) &&
11938 !(shadow_object->internal))
11939 extended->external_pager = 1;
11940
11941 next_shadow = shadow_object->shadow;
11942 if (next_shadow) {
11943 vm_object_lock(next_shadow);
11944 }
11945 vm_object_unlock(shadow_object);
11946 shadow_object = next_shadow;
11947 }
11948 }
11949 extended->shadow_depth = shadow_depth;
11950 }
11951
11952 if (extended->shadow_depth || entry->needs_copy)
11953 extended->share_mode = SM_COW;
11954 else {
11955 if (ref_count == 1)
11956 extended->share_mode = SM_PRIVATE;
11957 else {
11958 if (obj->true_share)
11959 extended->share_mode = SM_TRUESHARED;
11960 else
11961 extended->share_mode = SM_SHARED;
11962 }
11963 }
11964 extended->ref_count = ref_count - extended->shadow_depth;
11965
11966 for (i = 0; i < extended->shadow_depth; i++) {
11967 if ((tmp_obj = obj->shadow) == 0)
11968 break;
11969 vm_object_lock(tmp_obj);
11970 vm_object_unlock(obj);
11971
11972 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11973 ref_count--;
11974
11975 extended->ref_count += ref_count;
11976 obj = tmp_obj;
11977 }
11978 vm_object_unlock(obj);
11979
11980 if (extended->share_mode == SM_SHARED) {
11981 register vm_map_entry_t cur;
11982 register vm_map_entry_t last;
11983 int my_refs;
11984
11985 obj = VME_OBJECT(entry);
11986 last = vm_map_to_entry(map);
11987 my_refs = 0;
11988
11989 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11990 ref_count--;
11991 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11992 my_refs += vm_map_region_count_obj_refs(cur, obj);
11993
11994 if (my_refs == ref_count)
11995 extended->share_mode = SM_PRIVATE_ALIASED;
11996 else if (my_refs > 1)
11997 extended->share_mode = SM_SHARED_ALIASED;
11998 }
11999 }
12000 }
12001
12002
12003 /* object is locked on entry and locked on return */
12004
12005
12006 static void
12007 vm_map_region_look_for_page(
12008 __unused vm_map_t map,
12009 __unused vm_map_offset_t va,
12010 vm_object_t object,
12011 vm_object_offset_t offset,
12012 int max_refcnt,
12013 int depth,
12014 vm_region_extended_info_t extended,
12015 mach_msg_type_number_t count)
12016 {
12017 register vm_page_t p;
12018 register vm_object_t shadow;
12019 register int ref_count;
12020 vm_object_t caller_object;
12021 kern_return_t kr;
12022 shadow = object->shadow;
12023 caller_object = object;
12024
12025
12026 while (TRUE) {
12027
12028 if ( !(object->pager_trusted) && !(object->internal))
12029 extended->external_pager = 1;
12030
12031 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12032 if (shadow && (max_refcnt == 1))
12033 extended->pages_shared_now_private++;
12034
12035 if (!p->fictitious &&
12036 (p->dirty || pmap_is_modified(p->phys_page)))
12037 extended->pages_dirtied++;
12038 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12039 if (p->reusable || p->object->all_reusable) {
12040 extended->pages_reusable++;
12041 }
12042 }
12043
12044 extended->pages_resident++;
12045
12046 if(object != caller_object)
12047 vm_object_unlock(object);
12048
12049 return;
12050 }
12051 #if MACH_PAGEMAP
12052 if (object->existence_map) {
12053 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
12054
12055 extended->pages_swapped_out++;
12056
12057 if(object != caller_object)
12058 vm_object_unlock(object);
12059
12060 return;
12061 }
12062 } else
12063 #endif /* MACH_PAGEMAP */
12064 if (object->internal &&
12065 object->alive &&
12066 !object->terminating &&
12067 object->pager_ready) {
12068
12069 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12070 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
12071 offset)
12072 == VM_EXTERNAL_STATE_EXISTS) {
12073 /* the pager has that page */
12074 extended->pages_swapped_out++;
12075 if (object != caller_object)
12076 vm_object_unlock(object);
12077 return;
12078 }
12079 } else {
12080 memory_object_t pager;
12081
12082 vm_object_paging_begin(object);
12083 pager = object->pager;
12084 vm_object_unlock(object);
12085
12086 kr = memory_object_data_request(
12087 pager,
12088 offset + object->paging_offset,
12089 0, /* just poke the pager */
12090 VM_PROT_READ,
12091 NULL);
12092
12093 vm_object_lock(object);
12094 vm_object_paging_end(object);
12095
12096 if (kr == KERN_SUCCESS) {
12097 /* the pager has that page */
12098 extended->pages_swapped_out++;
12099 if (object != caller_object)
12100 vm_object_unlock(object);
12101 return;
12102 }
12103 }
12104 }
12105
12106 if (shadow) {
12107 vm_object_lock(shadow);
12108
12109 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12110 ref_count--;
12111
12112 if (++depth > extended->shadow_depth)
12113 extended->shadow_depth = depth;
12114
12115 if (ref_count > max_refcnt)
12116 max_refcnt = ref_count;
12117
12118 if(object != caller_object)
12119 vm_object_unlock(object);
12120
12121 offset = offset + object->vo_shadow_offset;
12122 object = shadow;
12123 shadow = object->shadow;
12124 continue;
12125 }
12126 if(object != caller_object)
12127 vm_object_unlock(object);
12128 break;
12129 }
12130 }
12131
12132 static int
12133 vm_map_region_count_obj_refs(
12134 vm_map_entry_t entry,
12135 vm_object_t object)
12136 {
12137 register int ref_count;
12138 register vm_object_t chk_obj;
12139 register vm_object_t tmp_obj;
12140
12141 if (VME_OBJECT(entry) == 0)
12142 return(0);
12143
12144 if (entry->is_sub_map)
12145 return(0);
12146 else {
12147 ref_count = 0;
12148
12149 chk_obj = VME_OBJECT(entry);
12150 vm_object_lock(chk_obj);
12151
12152 while (chk_obj) {
12153 if (chk_obj == object)
12154 ref_count++;
12155 tmp_obj = chk_obj->shadow;
12156 if (tmp_obj)
12157 vm_object_lock(tmp_obj);
12158 vm_object_unlock(chk_obj);
12159
12160 chk_obj = tmp_obj;
12161 }
12162 }
12163 return(ref_count);
12164 }
12165
12166
12167 /*
12168 * Routine: vm_map_simplify
12169 *
12170 * Description:
12171 * Attempt to simplify the map representation in
12172 * the vicinity of the given starting address.
12173 * Note:
12174 * This routine is intended primarily to keep the
12175 * kernel maps more compact -- they generally don't
12176 * benefit from the "expand a map entry" technology
12177 * at allocation time because the adjacent entry
12178 * is often wired down.
12179 */
12180 void
12181 vm_map_simplify_entry(
12182 vm_map_t map,
12183 vm_map_entry_t this_entry)
12184 {
12185 vm_map_entry_t prev_entry;
12186
12187 counter(c_vm_map_simplify_entry_called++);
12188
12189 prev_entry = this_entry->vme_prev;
12190
12191 if ((this_entry != vm_map_to_entry(map)) &&
12192 (prev_entry != vm_map_to_entry(map)) &&
12193
12194 (prev_entry->vme_end == this_entry->vme_start) &&
12195
12196 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12197 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12198 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12199 prev_entry->vme_start))
12200 == VME_OFFSET(this_entry)) &&
12201
12202 (prev_entry->behavior == this_entry->behavior) &&
12203 (prev_entry->needs_copy == this_entry->needs_copy) &&
12204 (prev_entry->protection == this_entry->protection) &&
12205 (prev_entry->max_protection == this_entry->max_protection) &&
12206 (prev_entry->inheritance == this_entry->inheritance) &&
12207 (prev_entry->use_pmap == this_entry->use_pmap) &&
12208 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12209 (prev_entry->no_cache == this_entry->no_cache) &&
12210 (prev_entry->permanent == this_entry->permanent) &&
12211 (prev_entry->map_aligned == this_entry->map_aligned) &&
12212 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12213 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12214 /* from_reserved_zone: OK if that field doesn't match */
12215 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12216 (prev_entry->vme_resilient_codesign ==
12217 this_entry->vme_resilient_codesign) &&
12218 (prev_entry->vme_resilient_media ==
12219 this_entry->vme_resilient_media) &&
12220
12221 (prev_entry->wired_count == this_entry->wired_count) &&
12222 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12223
12224 (prev_entry->in_transition == FALSE) &&
12225 (this_entry->in_transition == FALSE) &&
12226 (prev_entry->needs_wakeup == FALSE) &&
12227 (this_entry->needs_wakeup == FALSE) &&
12228 (prev_entry->is_shared == FALSE) &&
12229 (this_entry->is_shared == FALSE) &&
12230 (prev_entry->superpage_size == FALSE) &&
12231 (this_entry->superpage_size == FALSE)
12232 ) {
12233 vm_map_store_entry_unlink(map, prev_entry);
12234 assert(prev_entry->vme_start < this_entry->vme_end);
12235 if (prev_entry->map_aligned)
12236 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12237 VM_MAP_PAGE_MASK(map)));
12238 this_entry->vme_start = prev_entry->vme_start;
12239 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12240
12241 if (map->holelistenabled) {
12242 vm_map_store_update_first_free(map, this_entry, TRUE);
12243 }
12244
12245 if (prev_entry->is_sub_map) {
12246 vm_map_deallocate(VME_SUBMAP(prev_entry));
12247 } else {
12248 vm_object_deallocate(VME_OBJECT(prev_entry));
12249 }
12250 vm_map_entry_dispose(map, prev_entry);
12251 SAVE_HINT_MAP_WRITE(map, this_entry);
12252 counter(c_vm_map_simplified++);
12253 }
12254 }
12255
12256 void
12257 vm_map_simplify(
12258 vm_map_t map,
12259 vm_map_offset_t start)
12260 {
12261 vm_map_entry_t this_entry;
12262
12263 vm_map_lock(map);
12264 if (vm_map_lookup_entry(map, start, &this_entry)) {
12265 vm_map_simplify_entry(map, this_entry);
12266 vm_map_simplify_entry(map, this_entry->vme_next);
12267 }
12268 counter(c_vm_map_simplify_called++);
12269 vm_map_unlock(map);
12270 }
12271
12272 static void
12273 vm_map_simplify_range(
12274 vm_map_t map,
12275 vm_map_offset_t start,
12276 vm_map_offset_t end)
12277 {
12278 vm_map_entry_t entry;
12279
12280 /*
12281 * The map should be locked (for "write") by the caller.
12282 */
12283
12284 if (start >= end) {
12285 /* invalid address range */
12286 return;
12287 }
12288
12289 start = vm_map_trunc_page(start,
12290 VM_MAP_PAGE_MASK(map));
12291 end = vm_map_round_page(end,
12292 VM_MAP_PAGE_MASK(map));
12293
12294 if (!vm_map_lookup_entry(map, start, &entry)) {
12295 /* "start" is not mapped and "entry" ends before "start" */
12296 if (entry == vm_map_to_entry(map)) {
12297 /* start with first entry in the map */
12298 entry = vm_map_first_entry(map);
12299 } else {
12300 /* start with next entry */
12301 entry = entry->vme_next;
12302 }
12303 }
12304
12305 while (entry != vm_map_to_entry(map) &&
12306 entry->vme_start <= end) {
12307 /* try and coalesce "entry" with its previous entry */
12308 vm_map_simplify_entry(map, entry);
12309 entry = entry->vme_next;
12310 }
12311 }
12312
12313
12314 /*
12315 * Routine: vm_map_machine_attribute
12316 * Purpose:
12317 * Provide machine-specific attributes to mappings,
12318 * such as cachability etc. for machines that provide
12319 * them. NUMA architectures and machines with big/strange
12320 * caches will use this.
12321 * Note:
12322 * Responsibilities for locking and checking are handled here,
12323 * everything else in the pmap module. If any non-volatile
12324 * information must be kept, the pmap module should handle
12325 * it itself. [This assumes that attributes do not
12326 * need to be inherited, which seems ok to me]
12327 */
12328 kern_return_t
12329 vm_map_machine_attribute(
12330 vm_map_t map,
12331 vm_map_offset_t start,
12332 vm_map_offset_t end,
12333 vm_machine_attribute_t attribute,
12334 vm_machine_attribute_val_t* value) /* IN/OUT */
12335 {
12336 kern_return_t ret;
12337 vm_map_size_t sync_size;
12338 vm_map_entry_t entry;
12339
12340 if (start < vm_map_min(map) || end > vm_map_max(map))
12341 return KERN_INVALID_ADDRESS;
12342
12343 /* Figure how much memory we need to flush (in page increments) */
12344 sync_size = end - start;
12345
12346 vm_map_lock(map);
12347
12348 if (attribute != MATTR_CACHE) {
12349 /* If we don't have to find physical addresses, we */
12350 /* don't have to do an explicit traversal here. */
12351 ret = pmap_attribute(map->pmap, start, end-start,
12352 attribute, value);
12353 vm_map_unlock(map);
12354 return ret;
12355 }
12356
12357 ret = KERN_SUCCESS; /* Assume it all worked */
12358
12359 while(sync_size) {
12360 if (vm_map_lookup_entry(map, start, &entry)) {
12361 vm_map_size_t sub_size;
12362 if((entry->vme_end - start) > sync_size) {
12363 sub_size = sync_size;
12364 sync_size = 0;
12365 } else {
12366 sub_size = entry->vme_end - start;
12367 sync_size -= sub_size;
12368 }
12369 if(entry->is_sub_map) {
12370 vm_map_offset_t sub_start;
12371 vm_map_offset_t sub_end;
12372
12373 sub_start = (start - entry->vme_start)
12374 + VME_OFFSET(entry);
12375 sub_end = sub_start + sub_size;
12376 vm_map_machine_attribute(
12377 VME_SUBMAP(entry),
12378 sub_start,
12379 sub_end,
12380 attribute, value);
12381 } else {
12382 if (VME_OBJECT(entry)) {
12383 vm_page_t m;
12384 vm_object_t object;
12385 vm_object_t base_object;
12386 vm_object_t last_object;
12387 vm_object_offset_t offset;
12388 vm_object_offset_t base_offset;
12389 vm_map_size_t range;
12390 range = sub_size;
12391 offset = (start - entry->vme_start)
12392 + VME_OFFSET(entry);
12393 base_offset = offset;
12394 object = VME_OBJECT(entry);
12395 base_object = object;
12396 last_object = NULL;
12397
12398 vm_object_lock(object);
12399
12400 while (range) {
12401 m = vm_page_lookup(
12402 object, offset);
12403
12404 if (m && !m->fictitious) {
12405 ret =
12406 pmap_attribute_cache_sync(
12407 m->phys_page,
12408 PAGE_SIZE,
12409 attribute, value);
12410
12411 } else if (object->shadow) {
12412 offset = offset + object->vo_shadow_offset;
12413 last_object = object;
12414 object = object->shadow;
12415 vm_object_lock(last_object->shadow);
12416 vm_object_unlock(last_object);
12417 continue;
12418 }
12419 range -= PAGE_SIZE;
12420
12421 if (base_object != object) {
12422 vm_object_unlock(object);
12423 vm_object_lock(base_object);
12424 object = base_object;
12425 }
12426 /* Bump to the next page */
12427 base_offset += PAGE_SIZE;
12428 offset = base_offset;
12429 }
12430 vm_object_unlock(object);
12431 }
12432 }
12433 start += sub_size;
12434 } else {
12435 vm_map_unlock(map);
12436 return KERN_FAILURE;
12437 }
12438
12439 }
12440
12441 vm_map_unlock(map);
12442
12443 return ret;
12444 }
12445
12446 /*
12447 * vm_map_behavior_set:
12448 *
12449 * Sets the paging reference behavior of the specified address
12450 * range in the target map. Paging reference behavior affects
12451 * how pagein operations resulting from faults on the map will be
12452 * clustered.
12453 */
12454 kern_return_t
12455 vm_map_behavior_set(
12456 vm_map_t map,
12457 vm_map_offset_t start,
12458 vm_map_offset_t end,
12459 vm_behavior_t new_behavior)
12460 {
12461 register vm_map_entry_t entry;
12462 vm_map_entry_t temp_entry;
12463
12464 XPR(XPR_VM_MAP,
12465 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12466 map, start, end, new_behavior, 0);
12467
12468 if (start > end ||
12469 start < vm_map_min(map) ||
12470 end > vm_map_max(map)) {
12471 return KERN_NO_SPACE;
12472 }
12473
12474 switch (new_behavior) {
12475
12476 /*
12477 * This first block of behaviors all set a persistent state on the specified
12478 * memory range. All we have to do here is to record the desired behavior
12479 * in the vm_map_entry_t's.
12480 */
12481
12482 case VM_BEHAVIOR_DEFAULT:
12483 case VM_BEHAVIOR_RANDOM:
12484 case VM_BEHAVIOR_SEQUENTIAL:
12485 case VM_BEHAVIOR_RSEQNTL:
12486 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12487 vm_map_lock(map);
12488
12489 /*
12490 * The entire address range must be valid for the map.
12491 * Note that vm_map_range_check() does a
12492 * vm_map_lookup_entry() internally and returns the
12493 * entry containing the start of the address range if
12494 * the entire range is valid.
12495 */
12496 if (vm_map_range_check(map, start, end, &temp_entry)) {
12497 entry = temp_entry;
12498 vm_map_clip_start(map, entry, start);
12499 }
12500 else {
12501 vm_map_unlock(map);
12502 return(KERN_INVALID_ADDRESS);
12503 }
12504
12505 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12506 vm_map_clip_end(map, entry, end);
12507 if (entry->is_sub_map) {
12508 assert(!entry->use_pmap);
12509 }
12510
12511 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12512 entry->zero_wired_pages = TRUE;
12513 } else {
12514 entry->behavior = new_behavior;
12515 }
12516 entry = entry->vme_next;
12517 }
12518
12519 vm_map_unlock(map);
12520 break;
12521
12522 /*
12523 * The rest of these are different from the above in that they cause
12524 * an immediate action to take place as opposed to setting a behavior that
12525 * affects future actions.
12526 */
12527
12528 case VM_BEHAVIOR_WILLNEED:
12529 return vm_map_willneed(map, start, end);
12530
12531 case VM_BEHAVIOR_DONTNEED:
12532 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12533
12534 case VM_BEHAVIOR_FREE:
12535 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12536
12537 case VM_BEHAVIOR_REUSABLE:
12538 return vm_map_reusable_pages(map, start, end);
12539
12540 case VM_BEHAVIOR_REUSE:
12541 return vm_map_reuse_pages(map, start, end);
12542
12543 case VM_BEHAVIOR_CAN_REUSE:
12544 return vm_map_can_reuse(map, start, end);
12545
12546 #if MACH_ASSERT
12547 case VM_BEHAVIOR_PAGEOUT:
12548 return vm_map_pageout(map, start, end);
12549 #endif /* MACH_ASSERT */
12550
12551 default:
12552 return(KERN_INVALID_ARGUMENT);
12553 }
12554
12555 return(KERN_SUCCESS);
12556 }
12557
12558
12559 /*
12560 * Internals for madvise(MADV_WILLNEED) system call.
12561 *
12562 * The present implementation is to do a read-ahead if the mapping corresponds
12563 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12564 * and basically ignore the "advice" (which we are always free to do).
12565 */
12566
12567
12568 static kern_return_t
12569 vm_map_willneed(
12570 vm_map_t map,
12571 vm_map_offset_t start,
12572 vm_map_offset_t end
12573 )
12574 {
12575 vm_map_entry_t entry;
12576 vm_object_t object;
12577 memory_object_t pager;
12578 struct vm_object_fault_info fault_info;
12579 kern_return_t kr;
12580 vm_object_size_t len;
12581 vm_object_offset_t offset;
12582
12583 /*
12584 * Fill in static values in fault_info. Several fields get ignored by the code
12585 * we call, but we'll fill them in anyway since uninitialized fields are bad
12586 * when it comes to future backwards compatibility.
12587 */
12588
12589 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12590 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12591 fault_info.no_cache = FALSE; /* ignored value */
12592 fault_info.stealth = TRUE;
12593 fault_info.io_sync = FALSE;
12594 fault_info.cs_bypass = FALSE;
12595 fault_info.mark_zf_absent = FALSE;
12596 fault_info.batch_pmap_op = FALSE;
12597
12598 /*
12599 * The MADV_WILLNEED operation doesn't require any changes to the
12600 * vm_map_entry_t's, so the read lock is sufficient.
12601 */
12602
12603 vm_map_lock_read(map);
12604
12605 /*
12606 * The madvise semantics require that the address range be fully
12607 * allocated with no holes. Otherwise, we're required to return
12608 * an error.
12609 */
12610
12611 if (! vm_map_range_check(map, start, end, &entry)) {
12612 vm_map_unlock_read(map);
12613 return KERN_INVALID_ADDRESS;
12614 }
12615
12616 /*
12617 * Examine each vm_map_entry_t in the range.
12618 */
12619 for (; entry != vm_map_to_entry(map) && start < end; ) {
12620
12621 /*
12622 * The first time through, the start address could be anywhere
12623 * within the vm_map_entry we found. So adjust the offset to
12624 * correspond. After that, the offset will always be zero to
12625 * correspond to the beginning of the current vm_map_entry.
12626 */
12627 offset = (start - entry->vme_start) + VME_OFFSET(entry);
12628
12629 /*
12630 * Set the length so we don't go beyond the end of the
12631 * map_entry or beyond the end of the range we were given.
12632 * This range could span also multiple map entries all of which
12633 * map different files, so make sure we only do the right amount
12634 * of I/O for each object. Note that it's possible for there
12635 * to be multiple map entries all referring to the same object
12636 * but with different page permissions, but it's not worth
12637 * trying to optimize that case.
12638 */
12639 len = MIN(entry->vme_end - start, end - start);
12640
12641 if ((vm_size_t) len != len) {
12642 /* 32-bit overflow */
12643 len = (vm_size_t) (0 - PAGE_SIZE);
12644 }
12645 fault_info.cluster_size = (vm_size_t) len;
12646 fault_info.lo_offset = offset;
12647 fault_info.hi_offset = offset + len;
12648 fault_info.user_tag = VME_ALIAS(entry);
12649 fault_info.pmap_options = 0;
12650 if (entry->iokit_acct ||
12651 (!entry->is_sub_map && !entry->use_pmap)) {
12652 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12653 }
12654
12655 /*
12656 * If there's no read permission to this mapping, then just
12657 * skip it.
12658 */
12659 if ((entry->protection & VM_PROT_READ) == 0) {
12660 entry = entry->vme_next;
12661 start = entry->vme_start;
12662 continue;
12663 }
12664
12665 /*
12666 * Find the file object backing this map entry. If there is
12667 * none, then we simply ignore the "will need" advice for this
12668 * entry and go on to the next one.
12669 */
12670 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12671 entry = entry->vme_next;
12672 start = entry->vme_start;
12673 continue;
12674 }
12675
12676 /*
12677 * The data_request() could take a long time, so let's
12678 * release the map lock to avoid blocking other threads.
12679 */
12680 vm_map_unlock_read(map);
12681
12682 vm_object_paging_begin(object);
12683 pager = object->pager;
12684 vm_object_unlock(object);
12685
12686 /*
12687 * Get the data from the object asynchronously.
12688 *
12689 * Note that memory_object_data_request() places limits on the
12690 * amount of I/O it will do. Regardless of the len we
12691 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12692 * silently truncates the len to that size. This isn't
12693 * necessarily bad since madvise shouldn't really be used to
12694 * page in unlimited amounts of data. Other Unix variants
12695 * limit the willneed case as well. If this turns out to be an
12696 * issue for developers, then we can always adjust the policy
12697 * here and still be backwards compatible since this is all
12698 * just "advice".
12699 */
12700 kr = memory_object_data_request(
12701 pager,
12702 offset + object->paging_offset,
12703 0, /* ignored */
12704 VM_PROT_READ,
12705 (memory_object_fault_info_t)&fault_info);
12706
12707 vm_object_lock(object);
12708 vm_object_paging_end(object);
12709 vm_object_unlock(object);
12710
12711 /*
12712 * If we couldn't do the I/O for some reason, just give up on
12713 * the madvise. We still return success to the user since
12714 * madvise isn't supposed to fail when the advice can't be
12715 * taken.
12716 */
12717 if (kr != KERN_SUCCESS) {
12718 return KERN_SUCCESS;
12719 }
12720
12721 start += len;
12722 if (start >= end) {
12723 /* done */
12724 return KERN_SUCCESS;
12725 }
12726
12727 /* look up next entry */
12728 vm_map_lock_read(map);
12729 if (! vm_map_lookup_entry(map, start, &entry)) {
12730 /*
12731 * There's a new hole in the address range.
12732 */
12733 vm_map_unlock_read(map);
12734 return KERN_INVALID_ADDRESS;
12735 }
12736 }
12737
12738 vm_map_unlock_read(map);
12739 return KERN_SUCCESS;
12740 }
12741
12742 static boolean_t
12743 vm_map_entry_is_reusable(
12744 vm_map_entry_t entry)
12745 {
12746 /* Only user map entries */
12747
12748 vm_object_t object;
12749
12750 if (entry->is_sub_map) {
12751 return FALSE;
12752 }
12753
12754 switch (VME_ALIAS(entry)) {
12755 case VM_MEMORY_MALLOC:
12756 case VM_MEMORY_MALLOC_SMALL:
12757 case VM_MEMORY_MALLOC_LARGE:
12758 case VM_MEMORY_REALLOC:
12759 case VM_MEMORY_MALLOC_TINY:
12760 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12761 case VM_MEMORY_MALLOC_LARGE_REUSED:
12762 /*
12763 * This is a malloc() memory region: check if it's still
12764 * in its original state and can be re-used for more
12765 * malloc() allocations.
12766 */
12767 break;
12768 default:
12769 /*
12770 * Not a malloc() memory region: let the caller decide if
12771 * it's re-usable.
12772 */
12773 return TRUE;
12774 }
12775
12776 if (entry->is_shared ||
12777 entry->is_sub_map ||
12778 entry->in_transition ||
12779 entry->protection != VM_PROT_DEFAULT ||
12780 entry->max_protection != VM_PROT_ALL ||
12781 entry->inheritance != VM_INHERIT_DEFAULT ||
12782 entry->no_cache ||
12783 entry->permanent ||
12784 entry->superpage_size != FALSE ||
12785 entry->zero_wired_pages ||
12786 entry->wired_count != 0 ||
12787 entry->user_wired_count != 0) {
12788 return FALSE;
12789 }
12790
12791 object = VME_OBJECT(entry);
12792 if (object == VM_OBJECT_NULL) {
12793 return TRUE;
12794 }
12795 if (
12796 #if 0
12797 /*
12798 * Let's proceed even if the VM object is potentially
12799 * shared.
12800 * We check for this later when processing the actual
12801 * VM pages, so the contents will be safe if shared.
12802 *
12803 * But we can still mark this memory region as "reusable" to
12804 * acknowledge that the caller did let us know that the memory
12805 * could be re-used and should not be penalized for holding
12806 * on to it. This allows its "resident size" to not include
12807 * the reusable range.
12808 */
12809 object->ref_count == 1 &&
12810 #endif
12811 object->wired_page_count == 0 &&
12812 object->copy == VM_OBJECT_NULL &&
12813 object->shadow == VM_OBJECT_NULL &&
12814 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12815 object->internal &&
12816 !object->true_share &&
12817 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12818 !object->code_signed) {
12819 return TRUE;
12820 }
12821 return FALSE;
12822
12823
12824 }
12825
12826 static kern_return_t
12827 vm_map_reuse_pages(
12828 vm_map_t map,
12829 vm_map_offset_t start,
12830 vm_map_offset_t end)
12831 {
12832 vm_map_entry_t entry;
12833 vm_object_t object;
12834 vm_object_offset_t start_offset, end_offset;
12835
12836 /*
12837 * The MADV_REUSE operation doesn't require any changes to the
12838 * vm_map_entry_t's, so the read lock is sufficient.
12839 */
12840
12841 vm_map_lock_read(map);
12842 assert(map->pmap != kernel_pmap); /* protect alias access */
12843
12844 /*
12845 * The madvise semantics require that the address range be fully
12846 * allocated with no holes. Otherwise, we're required to return
12847 * an error.
12848 */
12849
12850 if (!vm_map_range_check(map, start, end, &entry)) {
12851 vm_map_unlock_read(map);
12852 vm_page_stats_reusable.reuse_pages_failure++;
12853 return KERN_INVALID_ADDRESS;
12854 }
12855
12856 /*
12857 * Examine each vm_map_entry_t in the range.
12858 */
12859 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12860 entry = entry->vme_next) {
12861 /*
12862 * Sanity check on the VM map entry.
12863 */
12864 if (! vm_map_entry_is_reusable(entry)) {
12865 vm_map_unlock_read(map);
12866 vm_page_stats_reusable.reuse_pages_failure++;
12867 return KERN_INVALID_ADDRESS;
12868 }
12869
12870 /*
12871 * The first time through, the start address could be anywhere
12872 * within the vm_map_entry we found. So adjust the offset to
12873 * correspond.
12874 */
12875 if (entry->vme_start < start) {
12876 start_offset = start - entry->vme_start;
12877 } else {
12878 start_offset = 0;
12879 }
12880 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12881 start_offset += VME_OFFSET(entry);
12882 end_offset += VME_OFFSET(entry);
12883
12884 assert(!entry->is_sub_map);
12885 object = VME_OBJECT(entry);
12886 if (object != VM_OBJECT_NULL) {
12887 vm_object_lock(object);
12888 vm_object_reuse_pages(object, start_offset, end_offset,
12889 TRUE);
12890 vm_object_unlock(object);
12891 }
12892
12893 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12894 /*
12895 * XXX
12896 * We do not hold the VM map exclusively here.
12897 * The "alias" field is not that critical, so it's
12898 * safe to update it here, as long as it is the only
12899 * one that can be modified while holding the VM map
12900 * "shared".
12901 */
12902 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
12903 }
12904 }
12905
12906 vm_map_unlock_read(map);
12907 vm_page_stats_reusable.reuse_pages_success++;
12908 return KERN_SUCCESS;
12909 }
12910
12911
12912 static kern_return_t
12913 vm_map_reusable_pages(
12914 vm_map_t map,
12915 vm_map_offset_t start,
12916 vm_map_offset_t end)
12917 {
12918 vm_map_entry_t entry;
12919 vm_object_t object;
12920 vm_object_offset_t start_offset, end_offset;
12921 vm_map_offset_t pmap_offset;
12922
12923 /*
12924 * The MADV_REUSABLE operation doesn't require any changes to the
12925 * vm_map_entry_t's, so the read lock is sufficient.
12926 */
12927
12928 vm_map_lock_read(map);
12929 assert(map->pmap != kernel_pmap); /* protect alias access */
12930
12931 /*
12932 * The madvise semantics require that the address range be fully
12933 * allocated with no holes. Otherwise, we're required to return
12934 * an error.
12935 */
12936
12937 if (!vm_map_range_check(map, start, end, &entry)) {
12938 vm_map_unlock_read(map);
12939 vm_page_stats_reusable.reusable_pages_failure++;
12940 return KERN_INVALID_ADDRESS;
12941 }
12942
12943 /*
12944 * Examine each vm_map_entry_t in the range.
12945 */
12946 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12947 entry = entry->vme_next) {
12948 int kill_pages = 0;
12949
12950 /*
12951 * Sanity check on the VM map entry.
12952 */
12953 if (! vm_map_entry_is_reusable(entry)) {
12954 vm_map_unlock_read(map);
12955 vm_page_stats_reusable.reusable_pages_failure++;
12956 return KERN_INVALID_ADDRESS;
12957 }
12958
12959 /*
12960 * The first time through, the start address could be anywhere
12961 * within the vm_map_entry we found. So adjust the offset to
12962 * correspond.
12963 */
12964 if (entry->vme_start < start) {
12965 start_offset = start - entry->vme_start;
12966 pmap_offset = start;
12967 } else {
12968 start_offset = 0;
12969 pmap_offset = entry->vme_start;
12970 }
12971 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12972 start_offset += VME_OFFSET(entry);
12973 end_offset += VME_OFFSET(entry);
12974
12975 assert(!entry->is_sub_map);
12976 object = VME_OBJECT(entry);
12977 if (object == VM_OBJECT_NULL)
12978 continue;
12979
12980
12981 vm_object_lock(object);
12982 if (object->ref_count == 1 &&
12983 !object->shadow &&
12984 /*
12985 * "iokit_acct" entries are billed for their virtual size
12986 * (rather than for their resident pages only), so they
12987 * wouldn't benefit from making pages reusable, and it
12988 * would be hard to keep track of pages that are both
12989 * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12990 */
12991 !(entry->iokit_acct ||
12992 (!entry->is_sub_map && !entry->use_pmap)))
12993 kill_pages = 1;
12994 else
12995 kill_pages = -1;
12996 if (kill_pages != -1) {
12997 vm_object_deactivate_pages(object,
12998 start_offset,
12999 end_offset - start_offset,
13000 kill_pages,
13001 TRUE /*reusable_pages*/,
13002 map->pmap,
13003 pmap_offset);
13004 } else {
13005 vm_page_stats_reusable.reusable_pages_shared++;
13006 }
13007 vm_object_unlock(object);
13008
13009 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13010 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
13011 /*
13012 * XXX
13013 * We do not hold the VM map exclusively here.
13014 * The "alias" field is not that critical, so it's
13015 * safe to update it here, as long as it is the only
13016 * one that can be modified while holding the VM map
13017 * "shared".
13018 */
13019 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13020 }
13021 }
13022
13023 vm_map_unlock_read(map);
13024 vm_page_stats_reusable.reusable_pages_success++;
13025 return KERN_SUCCESS;
13026 }
13027
13028
13029 static kern_return_t
13030 vm_map_can_reuse(
13031 vm_map_t map,
13032 vm_map_offset_t start,
13033 vm_map_offset_t end)
13034 {
13035 vm_map_entry_t entry;
13036
13037 /*
13038 * The MADV_REUSABLE operation doesn't require any changes to the
13039 * vm_map_entry_t's, so the read lock is sufficient.
13040 */
13041
13042 vm_map_lock_read(map);
13043 assert(map->pmap != kernel_pmap); /* protect alias access */
13044
13045 /*
13046 * The madvise semantics require that the address range be fully
13047 * allocated with no holes. Otherwise, we're required to return
13048 * an error.
13049 */
13050
13051 if (!vm_map_range_check(map, start, end, &entry)) {
13052 vm_map_unlock_read(map);
13053 vm_page_stats_reusable.can_reuse_failure++;
13054 return KERN_INVALID_ADDRESS;
13055 }
13056
13057 /*
13058 * Examine each vm_map_entry_t in the range.
13059 */
13060 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13061 entry = entry->vme_next) {
13062 /*
13063 * Sanity check on the VM map entry.
13064 */
13065 if (! vm_map_entry_is_reusable(entry)) {
13066 vm_map_unlock_read(map);
13067 vm_page_stats_reusable.can_reuse_failure++;
13068 return KERN_INVALID_ADDRESS;
13069 }
13070 }
13071
13072 vm_map_unlock_read(map);
13073 vm_page_stats_reusable.can_reuse_success++;
13074 return KERN_SUCCESS;
13075 }
13076
13077
13078 #if MACH_ASSERT
13079 static kern_return_t
13080 vm_map_pageout(
13081 vm_map_t map,
13082 vm_map_offset_t start,
13083 vm_map_offset_t end)
13084 {
13085 vm_map_entry_t entry;
13086
13087 /*
13088 * The MADV_PAGEOUT operation doesn't require any changes to the
13089 * vm_map_entry_t's, so the read lock is sufficient.
13090 */
13091
13092 vm_map_lock_read(map);
13093
13094 /*
13095 * The madvise semantics require that the address range be fully
13096 * allocated with no holes. Otherwise, we're required to return
13097 * an error.
13098 */
13099
13100 if (!vm_map_range_check(map, start, end, &entry)) {
13101 vm_map_unlock_read(map);
13102 return KERN_INVALID_ADDRESS;
13103 }
13104
13105 /*
13106 * Examine each vm_map_entry_t in the range.
13107 */
13108 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13109 entry = entry->vme_next) {
13110 vm_object_t object;
13111
13112 /*
13113 * Sanity check on the VM map entry.
13114 */
13115 if (entry->is_sub_map) {
13116 vm_map_t submap;
13117 vm_map_offset_t submap_start;
13118 vm_map_offset_t submap_end;
13119 vm_map_entry_t submap_entry;
13120
13121 submap = VME_SUBMAP(entry);
13122 submap_start = VME_OFFSET(entry);
13123 submap_end = submap_start + (entry->vme_end -
13124 entry->vme_start);
13125
13126 vm_map_lock_read(submap);
13127
13128 if (! vm_map_range_check(submap,
13129 submap_start,
13130 submap_end,
13131 &submap_entry)) {
13132 vm_map_unlock_read(submap);
13133 vm_map_unlock_read(map);
13134 return KERN_INVALID_ADDRESS;
13135 }
13136
13137 object = VME_OBJECT(submap_entry);
13138 if (submap_entry->is_sub_map ||
13139 object == VM_OBJECT_NULL ||
13140 !object->internal) {
13141 vm_map_unlock_read(submap);
13142 continue;
13143 }
13144
13145 vm_object_pageout(object);
13146
13147 vm_map_unlock_read(submap);
13148 submap = VM_MAP_NULL;
13149 submap_entry = VM_MAP_ENTRY_NULL;
13150 continue;
13151 }
13152
13153 object = VME_OBJECT(entry);
13154 if (entry->is_sub_map ||
13155 object == VM_OBJECT_NULL ||
13156 !object->internal) {
13157 continue;
13158 }
13159
13160 vm_object_pageout(object);
13161 }
13162
13163 vm_map_unlock_read(map);
13164 return KERN_SUCCESS;
13165 }
13166 #endif /* MACH_ASSERT */
13167
13168
13169 /*
13170 * Routine: vm_map_entry_insert
13171 *
13172 * Descritpion: This routine inserts a new vm_entry in a locked map.
13173 */
13174 vm_map_entry_t
13175 vm_map_entry_insert(
13176 vm_map_t map,
13177 vm_map_entry_t insp_entry,
13178 vm_map_offset_t start,
13179 vm_map_offset_t end,
13180 vm_object_t object,
13181 vm_object_offset_t offset,
13182 boolean_t needs_copy,
13183 boolean_t is_shared,
13184 boolean_t in_transition,
13185 vm_prot_t cur_protection,
13186 vm_prot_t max_protection,
13187 vm_behavior_t behavior,
13188 vm_inherit_t inheritance,
13189 unsigned wired_count,
13190 boolean_t no_cache,
13191 boolean_t permanent,
13192 unsigned int superpage_size,
13193 boolean_t clear_map_aligned,
13194 boolean_t is_submap)
13195 {
13196 vm_map_entry_t new_entry;
13197
13198 assert(insp_entry != (vm_map_entry_t)0);
13199
13200 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13201
13202 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13203 new_entry->map_aligned = TRUE;
13204 } else {
13205 new_entry->map_aligned = FALSE;
13206 }
13207 if (clear_map_aligned &&
13208 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13209 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13210 new_entry->map_aligned = FALSE;
13211 }
13212
13213 new_entry->vme_start = start;
13214 new_entry->vme_end = end;
13215 assert(page_aligned(new_entry->vme_start));
13216 assert(page_aligned(new_entry->vme_end));
13217 if (new_entry->map_aligned) {
13218 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13219 VM_MAP_PAGE_MASK(map)));
13220 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13221 VM_MAP_PAGE_MASK(map)));
13222 }
13223 assert(new_entry->vme_start < new_entry->vme_end);
13224
13225 VME_OBJECT_SET(new_entry, object);
13226 VME_OFFSET_SET(new_entry, offset);
13227 new_entry->is_shared = is_shared;
13228 new_entry->is_sub_map = is_submap;
13229 new_entry->needs_copy = needs_copy;
13230 new_entry->in_transition = in_transition;
13231 new_entry->needs_wakeup = FALSE;
13232 new_entry->inheritance = inheritance;
13233 new_entry->protection = cur_protection;
13234 new_entry->max_protection = max_protection;
13235 new_entry->behavior = behavior;
13236 new_entry->wired_count = wired_count;
13237 new_entry->user_wired_count = 0;
13238 if (is_submap) {
13239 /*
13240 * submap: "use_pmap" means "nested".
13241 * default: false.
13242 */
13243 new_entry->use_pmap = FALSE;
13244 } else {
13245 /*
13246 * object: "use_pmap" means "use pmap accounting" for footprint.
13247 * default: true.
13248 */
13249 new_entry->use_pmap = TRUE;
13250 }
13251 VME_ALIAS_SET(new_entry, 0);
13252 new_entry->zero_wired_pages = FALSE;
13253 new_entry->no_cache = no_cache;
13254 new_entry->permanent = permanent;
13255 if (superpage_size)
13256 new_entry->superpage_size = TRUE;
13257 else
13258 new_entry->superpage_size = FALSE;
13259 new_entry->used_for_jit = FALSE;
13260 new_entry->iokit_acct = FALSE;
13261 new_entry->vme_resilient_codesign = FALSE;
13262 new_entry->vme_resilient_media = FALSE;
13263
13264 /*
13265 * Insert the new entry into the list.
13266 */
13267
13268 vm_map_store_entry_link(map, insp_entry, new_entry);
13269 map->size += end - start;
13270
13271 /*
13272 * Update the free space hint and the lookup hint.
13273 */
13274
13275 SAVE_HINT_MAP_WRITE(map, new_entry);
13276 return new_entry;
13277 }
13278
13279 /*
13280 * Routine: vm_map_remap_extract
13281 *
13282 * Descritpion: This routine returns a vm_entry list from a map.
13283 */
13284 static kern_return_t
13285 vm_map_remap_extract(
13286 vm_map_t map,
13287 vm_map_offset_t addr,
13288 vm_map_size_t size,
13289 boolean_t copy,
13290 struct vm_map_header *map_header,
13291 vm_prot_t *cur_protection,
13292 vm_prot_t *max_protection,
13293 /* What, no behavior? */
13294 vm_inherit_t inheritance,
13295 boolean_t pageable)
13296 {
13297 kern_return_t result;
13298 vm_map_size_t mapped_size;
13299 vm_map_size_t tmp_size;
13300 vm_map_entry_t src_entry; /* result of last map lookup */
13301 vm_map_entry_t new_entry;
13302 vm_object_offset_t offset;
13303 vm_map_offset_t map_address;
13304 vm_map_offset_t src_start; /* start of entry to map */
13305 vm_map_offset_t src_end; /* end of region to be mapped */
13306 vm_object_t object;
13307 vm_map_version_t version;
13308 boolean_t src_needs_copy;
13309 boolean_t new_entry_needs_copy;
13310
13311 assert(map != VM_MAP_NULL);
13312 assert(size != 0);
13313 assert(size == vm_map_round_page(size, PAGE_MASK));
13314 assert(inheritance == VM_INHERIT_NONE ||
13315 inheritance == VM_INHERIT_COPY ||
13316 inheritance == VM_INHERIT_SHARE);
13317
13318 /*
13319 * Compute start and end of region.
13320 */
13321 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13322 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13323
13324
13325 /*
13326 * Initialize map_header.
13327 */
13328 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13329 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13330 map_header->nentries = 0;
13331 map_header->entries_pageable = pageable;
13332 map_header->page_shift = PAGE_SHIFT;
13333
13334 vm_map_store_init( map_header );
13335
13336 *cur_protection = VM_PROT_ALL;
13337 *max_protection = VM_PROT_ALL;
13338
13339 map_address = 0;
13340 mapped_size = 0;
13341 result = KERN_SUCCESS;
13342
13343 /*
13344 * The specified source virtual space might correspond to
13345 * multiple map entries, need to loop on them.
13346 */
13347 vm_map_lock(map);
13348 while (mapped_size != size) {
13349 vm_map_size_t entry_size;
13350
13351 /*
13352 * Find the beginning of the region.
13353 */
13354 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13355 result = KERN_INVALID_ADDRESS;
13356 break;
13357 }
13358
13359 if (src_start < src_entry->vme_start ||
13360 (mapped_size && src_start != src_entry->vme_start)) {
13361 result = KERN_INVALID_ADDRESS;
13362 break;
13363 }
13364
13365 tmp_size = size - mapped_size;
13366 if (src_end > src_entry->vme_end)
13367 tmp_size -= (src_end - src_entry->vme_end);
13368
13369 entry_size = (vm_map_size_t)(src_entry->vme_end -
13370 src_entry->vme_start);
13371
13372 if(src_entry->is_sub_map) {
13373 vm_map_reference(VME_SUBMAP(src_entry));
13374 object = VM_OBJECT_NULL;
13375 } else {
13376 object = VME_OBJECT(src_entry);
13377 if (src_entry->iokit_acct) {
13378 /*
13379 * This entry uses "IOKit accounting".
13380 */
13381 } else if (object != VM_OBJECT_NULL &&
13382 object->purgable != VM_PURGABLE_DENY) {
13383 /*
13384 * Purgeable objects have their own accounting:
13385 * no pmap accounting for them.
13386 */
13387 assert(!src_entry->use_pmap);
13388 } else {
13389 /*
13390 * Not IOKit or purgeable:
13391 * must be accounted by pmap stats.
13392 */
13393 assert(src_entry->use_pmap);
13394 }
13395
13396 if (object == VM_OBJECT_NULL) {
13397 object = vm_object_allocate(entry_size);
13398 VME_OFFSET_SET(src_entry, 0);
13399 VME_OBJECT_SET(src_entry, object);
13400 } else if (object->copy_strategy !=
13401 MEMORY_OBJECT_COPY_SYMMETRIC) {
13402 /*
13403 * We are already using an asymmetric
13404 * copy, and therefore we already have
13405 * the right object.
13406 */
13407 assert(!src_entry->needs_copy);
13408 } else if (src_entry->needs_copy || object->shadowed ||
13409 (object->internal && !object->true_share &&
13410 !src_entry->is_shared &&
13411 object->vo_size > entry_size)) {
13412
13413 VME_OBJECT_SHADOW(src_entry, entry_size);
13414
13415 if (!src_entry->needs_copy &&
13416 (src_entry->protection & VM_PROT_WRITE)) {
13417 vm_prot_t prot;
13418
13419 prot = src_entry->protection & ~VM_PROT_WRITE;
13420
13421 if (override_nx(map,
13422 VME_ALIAS(src_entry))
13423 && prot)
13424 prot |= VM_PROT_EXECUTE;
13425
13426 if(map->mapped_in_other_pmaps) {
13427 vm_object_pmap_protect(
13428 VME_OBJECT(src_entry),
13429 VME_OFFSET(src_entry),
13430 entry_size,
13431 PMAP_NULL,
13432 src_entry->vme_start,
13433 prot);
13434 } else {
13435 pmap_protect(vm_map_pmap(map),
13436 src_entry->vme_start,
13437 src_entry->vme_end,
13438 prot);
13439 }
13440 }
13441
13442 object = VME_OBJECT(src_entry);
13443 src_entry->needs_copy = FALSE;
13444 }
13445
13446
13447 vm_object_lock(object);
13448 vm_object_reference_locked(object); /* object ref. for new entry */
13449 if (object->copy_strategy ==
13450 MEMORY_OBJECT_COPY_SYMMETRIC) {
13451 object->copy_strategy =
13452 MEMORY_OBJECT_COPY_DELAY;
13453 }
13454 vm_object_unlock(object);
13455 }
13456
13457 offset = (VME_OFFSET(src_entry) +
13458 (src_start - src_entry->vme_start));
13459
13460 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13461 vm_map_entry_copy(new_entry, src_entry);
13462 if (new_entry->is_sub_map) {
13463 /* clr address space specifics */
13464 new_entry->use_pmap = FALSE;
13465 }
13466
13467 new_entry->map_aligned = FALSE;
13468
13469 new_entry->vme_start = map_address;
13470 new_entry->vme_end = map_address + tmp_size;
13471 assert(new_entry->vme_start < new_entry->vme_end);
13472 new_entry->inheritance = inheritance;
13473 VME_OFFSET_SET(new_entry, offset);
13474
13475 /*
13476 * The new region has to be copied now if required.
13477 */
13478 RestartCopy:
13479 if (!copy) {
13480 /*
13481 * Cannot allow an entry describing a JIT
13482 * region to be shared across address spaces.
13483 */
13484 if (src_entry->used_for_jit == TRUE) {
13485 result = KERN_INVALID_ARGUMENT;
13486 break;
13487 }
13488 src_entry->is_shared = TRUE;
13489 new_entry->is_shared = TRUE;
13490 if (!(new_entry->is_sub_map))
13491 new_entry->needs_copy = FALSE;
13492
13493 } else if (src_entry->is_sub_map) {
13494 /* make this a COW sub_map if not already */
13495 assert(new_entry->wired_count == 0);
13496 new_entry->needs_copy = TRUE;
13497 object = VM_OBJECT_NULL;
13498 } else if (src_entry->wired_count == 0 &&
13499 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13500 VME_OFFSET(new_entry),
13501 (new_entry->vme_end -
13502 new_entry->vme_start),
13503 &src_needs_copy,
13504 &new_entry_needs_copy)) {
13505
13506 new_entry->needs_copy = new_entry_needs_copy;
13507 new_entry->is_shared = FALSE;
13508
13509 /*
13510 * Handle copy_on_write semantics.
13511 */
13512 if (src_needs_copy && !src_entry->needs_copy) {
13513 vm_prot_t prot;
13514
13515 prot = src_entry->protection & ~VM_PROT_WRITE;
13516
13517 if (override_nx(map,
13518 VME_ALIAS(src_entry))
13519 && prot)
13520 prot |= VM_PROT_EXECUTE;
13521
13522 vm_object_pmap_protect(object,
13523 offset,
13524 entry_size,
13525 ((src_entry->is_shared
13526 || map->mapped_in_other_pmaps) ?
13527 PMAP_NULL : map->pmap),
13528 src_entry->vme_start,
13529 prot);
13530
13531 assert(src_entry->wired_count == 0);
13532 src_entry->needs_copy = TRUE;
13533 }
13534 /*
13535 * Throw away the old object reference of the new entry.
13536 */
13537 vm_object_deallocate(object);
13538
13539 } else {
13540 new_entry->is_shared = FALSE;
13541
13542 /*
13543 * The map can be safely unlocked since we
13544 * already hold a reference on the object.
13545 *
13546 * Record the timestamp of the map for later
13547 * verification, and unlock the map.
13548 */
13549 version.main_timestamp = map->timestamp;
13550 vm_map_unlock(map); /* Increments timestamp once! */
13551
13552 /*
13553 * Perform the copy.
13554 */
13555 if (src_entry->wired_count > 0) {
13556 vm_object_lock(object);
13557 result = vm_object_copy_slowly(
13558 object,
13559 offset,
13560 entry_size,
13561 THREAD_UNINT,
13562 &VME_OBJECT(new_entry));
13563
13564 VME_OFFSET_SET(new_entry, 0);
13565 new_entry->needs_copy = FALSE;
13566 } else {
13567 vm_object_offset_t new_offset;
13568
13569 new_offset = VME_OFFSET(new_entry);
13570 result = vm_object_copy_strategically(
13571 object,
13572 offset,
13573 entry_size,
13574 &VME_OBJECT(new_entry),
13575 &new_offset,
13576 &new_entry_needs_copy);
13577 if (new_offset != VME_OFFSET(new_entry)) {
13578 VME_OFFSET_SET(new_entry, new_offset);
13579 }
13580
13581 new_entry->needs_copy = new_entry_needs_copy;
13582 }
13583
13584 /*
13585 * Throw away the old object reference of the new entry.
13586 */
13587 vm_object_deallocate(object);
13588
13589 if (result != KERN_SUCCESS &&
13590 result != KERN_MEMORY_RESTART_COPY) {
13591 _vm_map_entry_dispose(map_header, new_entry);
13592 break;
13593 }
13594
13595 /*
13596 * Verify that the map has not substantially
13597 * changed while the copy was being made.
13598 */
13599
13600 vm_map_lock(map);
13601 if (version.main_timestamp + 1 != map->timestamp) {
13602 /*
13603 * Simple version comparison failed.
13604 *
13605 * Retry the lookup and verify that the
13606 * same object/offset are still present.
13607 */
13608 vm_object_deallocate(VME_OBJECT(new_entry));
13609 _vm_map_entry_dispose(map_header, new_entry);
13610 if (result == KERN_MEMORY_RESTART_COPY)
13611 result = KERN_SUCCESS;
13612 continue;
13613 }
13614
13615 if (result == KERN_MEMORY_RESTART_COPY) {
13616 vm_object_reference(object);
13617 goto RestartCopy;
13618 }
13619 }
13620
13621 _vm_map_store_entry_link(map_header,
13622 map_header->links.prev, new_entry);
13623
13624 /*Protections for submap mapping are irrelevant here*/
13625 if( !src_entry->is_sub_map ) {
13626 *cur_protection &= src_entry->protection;
13627 *max_protection &= src_entry->max_protection;
13628 }
13629 map_address += tmp_size;
13630 mapped_size += tmp_size;
13631 src_start += tmp_size;
13632
13633 } /* end while */
13634
13635 vm_map_unlock(map);
13636 if (result != KERN_SUCCESS) {
13637 /*
13638 * Free all allocated elements.
13639 */
13640 for (src_entry = map_header->links.next;
13641 src_entry != (struct vm_map_entry *)&map_header->links;
13642 src_entry = new_entry) {
13643 new_entry = src_entry->vme_next;
13644 _vm_map_store_entry_unlink(map_header, src_entry);
13645 if (src_entry->is_sub_map) {
13646 vm_map_deallocate(VME_SUBMAP(src_entry));
13647 } else {
13648 vm_object_deallocate(VME_OBJECT(src_entry));
13649 }
13650 _vm_map_entry_dispose(map_header, src_entry);
13651 }
13652 }
13653 return result;
13654 }
13655
13656 /*
13657 * Routine: vm_remap
13658 *
13659 * Map portion of a task's address space.
13660 * Mapped region must not overlap more than
13661 * one vm memory object. Protections and
13662 * inheritance attributes remain the same
13663 * as in the original task and are out parameters.
13664 * Source and Target task can be identical
13665 * Other attributes are identical as for vm_map()
13666 */
13667 kern_return_t
13668 vm_map_remap(
13669 vm_map_t target_map,
13670 vm_map_address_t *address,
13671 vm_map_size_t size,
13672 vm_map_offset_t mask,
13673 int flags,
13674 vm_map_t src_map,
13675 vm_map_offset_t memory_address,
13676 boolean_t copy,
13677 vm_prot_t *cur_protection,
13678 vm_prot_t *max_protection,
13679 vm_inherit_t inheritance)
13680 {
13681 kern_return_t result;
13682 vm_map_entry_t entry;
13683 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
13684 vm_map_entry_t new_entry;
13685 struct vm_map_header map_header;
13686 vm_map_offset_t offset_in_mapping;
13687
13688 if (target_map == VM_MAP_NULL)
13689 return KERN_INVALID_ARGUMENT;
13690
13691 switch (inheritance) {
13692 case VM_INHERIT_NONE:
13693 case VM_INHERIT_COPY:
13694 case VM_INHERIT_SHARE:
13695 if (size != 0 && src_map != VM_MAP_NULL)
13696 break;
13697 /*FALL THRU*/
13698 default:
13699 return KERN_INVALID_ARGUMENT;
13700 }
13701
13702 /*
13703 * If the user is requesting that we return the address of the
13704 * first byte of the data (rather than the base of the page),
13705 * then we use different rounding semantics: specifically,
13706 * we assume that (memory_address, size) describes a region
13707 * all of whose pages we must cover, rather than a base to be truncated
13708 * down and a size to be added to that base. So we figure out
13709 * the highest page that the requested region includes and make
13710 * sure that the size will cover it.
13711 *
13712 * The key example we're worried about it is of the form:
13713 *
13714 * memory_address = 0x1ff0, size = 0x20
13715 *
13716 * With the old semantics, we round down the memory_address to 0x1000
13717 * and round up the size to 0x1000, resulting in our covering *only*
13718 * page 0x1000. With the new semantics, we'd realize that the region covers
13719 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
13720 * 0x1000 and page 0x2000 in the region we remap.
13721 */
13722 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13723 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
13724 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
13725 } else {
13726 size = vm_map_round_page(size, PAGE_MASK);
13727 }
13728
13729 result = vm_map_remap_extract(src_map, memory_address,
13730 size, copy, &map_header,
13731 cur_protection,
13732 max_protection,
13733 inheritance,
13734 target_map->hdr.entries_pageable);
13735
13736 if (result != KERN_SUCCESS) {
13737 return result;
13738 }
13739
13740 /*
13741 * Allocate/check a range of free virtual address
13742 * space for the target
13743 */
13744 *address = vm_map_trunc_page(*address,
13745 VM_MAP_PAGE_MASK(target_map));
13746 vm_map_lock(target_map);
13747 result = vm_map_remap_range_allocate(target_map, address, size,
13748 mask, flags, &insp_entry);
13749
13750 for (entry = map_header.links.next;
13751 entry != (struct vm_map_entry *)&map_header.links;
13752 entry = new_entry) {
13753 new_entry = entry->vme_next;
13754 _vm_map_store_entry_unlink(&map_header, entry);
13755 if (result == KERN_SUCCESS) {
13756 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13757 /* no codesigning -> read-only access */
13758 assert(!entry->used_for_jit);
13759 entry->max_protection = VM_PROT_READ;
13760 entry->protection = VM_PROT_READ;
13761 entry->vme_resilient_codesign = TRUE;
13762 }
13763 entry->vme_start += *address;
13764 entry->vme_end += *address;
13765 assert(!entry->map_aligned);
13766 vm_map_store_entry_link(target_map, insp_entry, entry);
13767 insp_entry = entry;
13768 } else {
13769 if (!entry->is_sub_map) {
13770 vm_object_deallocate(VME_OBJECT(entry));
13771 } else {
13772 vm_map_deallocate(VME_SUBMAP(entry));
13773 }
13774 _vm_map_entry_dispose(&map_header, entry);
13775 }
13776 }
13777
13778 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13779 *cur_protection = VM_PROT_READ;
13780 *max_protection = VM_PROT_READ;
13781 }
13782
13783 if( target_map->disable_vmentry_reuse == TRUE) {
13784 if( target_map->highest_entry_end < insp_entry->vme_end ){
13785 target_map->highest_entry_end = insp_entry->vme_end;
13786 }
13787 }
13788
13789 if (result == KERN_SUCCESS) {
13790 target_map->size += size;
13791 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
13792 }
13793 vm_map_unlock(target_map);
13794
13795 if (result == KERN_SUCCESS && target_map->wiring_required)
13796 result = vm_map_wire(target_map, *address,
13797 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
13798 TRUE);
13799
13800 /*
13801 * If requested, return the address of the data pointed to by the
13802 * request, rather than the base of the resulting page.
13803 */
13804 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13805 *address += offset_in_mapping;
13806 }
13807
13808 return result;
13809 }
13810
13811 /*
13812 * Routine: vm_map_remap_range_allocate
13813 *
13814 * Description:
13815 * Allocate a range in the specified virtual address map.
13816 * returns the address and the map entry just before the allocated
13817 * range
13818 *
13819 * Map must be locked.
13820 */
13821
13822 static kern_return_t
13823 vm_map_remap_range_allocate(
13824 vm_map_t map,
13825 vm_map_address_t *address, /* IN/OUT */
13826 vm_map_size_t size,
13827 vm_map_offset_t mask,
13828 int flags,
13829 vm_map_entry_t *map_entry) /* OUT */
13830 {
13831 vm_map_entry_t entry;
13832 vm_map_offset_t start;
13833 vm_map_offset_t end;
13834 kern_return_t kr;
13835 vm_map_entry_t hole_entry;
13836
13837 StartAgain: ;
13838
13839 start = *address;
13840
13841 if (flags & VM_FLAGS_ANYWHERE)
13842 {
13843 /*
13844 * Calculate the first possible address.
13845 */
13846
13847 if (start < map->min_offset)
13848 start = map->min_offset;
13849 if (start > map->max_offset)
13850 return(KERN_NO_SPACE);
13851
13852 /*
13853 * Look for the first possible address;
13854 * if there's already something at this
13855 * address, we have to start after it.
13856 */
13857
13858 if( map->disable_vmentry_reuse == TRUE) {
13859 VM_MAP_HIGHEST_ENTRY(map, entry, start);
13860 } else {
13861
13862 if (map->holelistenabled) {
13863 hole_entry = (vm_map_entry_t)map->holes_list;
13864
13865 if (hole_entry == NULL) {
13866 /*
13867 * No more space in the map?
13868 */
13869 return(KERN_NO_SPACE);
13870 } else {
13871
13872 boolean_t found_hole = FALSE;
13873
13874 do {
13875 if (hole_entry->vme_start >= start) {
13876 start = hole_entry->vme_start;
13877 found_hole = TRUE;
13878 break;
13879 }
13880
13881 if (hole_entry->vme_end > start) {
13882 found_hole = TRUE;
13883 break;
13884 }
13885 hole_entry = hole_entry->vme_next;
13886
13887 } while (hole_entry != (vm_map_entry_t) map->holes_list);
13888
13889 if (found_hole == FALSE) {
13890 return (KERN_NO_SPACE);
13891 }
13892
13893 entry = hole_entry;
13894 }
13895 } else {
13896 assert(first_free_is_valid(map));
13897 if (start == map->min_offset) {
13898 if ((entry = map->first_free) != vm_map_to_entry(map))
13899 start = entry->vme_end;
13900 } else {
13901 vm_map_entry_t tmp_entry;
13902 if (vm_map_lookup_entry(map, start, &tmp_entry))
13903 start = tmp_entry->vme_end;
13904 entry = tmp_entry;
13905 }
13906 }
13907 start = vm_map_round_page(start,
13908 VM_MAP_PAGE_MASK(map));
13909 }
13910
13911 /*
13912 * In any case, the "entry" always precedes
13913 * the proposed new region throughout the
13914 * loop:
13915 */
13916
13917 while (TRUE) {
13918 register vm_map_entry_t next;
13919
13920 /*
13921 * Find the end of the proposed new region.
13922 * Be sure we didn't go beyond the end, or
13923 * wrap around the address.
13924 */
13925
13926 end = ((start + mask) & ~mask);
13927 end = vm_map_round_page(end,
13928 VM_MAP_PAGE_MASK(map));
13929 if (end < start)
13930 return(KERN_NO_SPACE);
13931 start = end;
13932 end += size;
13933
13934 if ((end > map->max_offset) || (end < start)) {
13935 if (map->wait_for_space) {
13936 if (size <= (map->max_offset -
13937 map->min_offset)) {
13938 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13939 vm_map_unlock(map);
13940 thread_block(THREAD_CONTINUE_NULL);
13941 vm_map_lock(map);
13942 goto StartAgain;
13943 }
13944 }
13945
13946 return(KERN_NO_SPACE);
13947 }
13948
13949 next = entry->vme_next;
13950
13951 if (map->holelistenabled) {
13952 if (entry->vme_end >= end)
13953 break;
13954 } else {
13955 /*
13956 * If there are no more entries, we must win.
13957 *
13958 * OR
13959 *
13960 * If there is another entry, it must be
13961 * after the end of the potential new region.
13962 */
13963
13964 if (next == vm_map_to_entry(map))
13965 break;
13966
13967 if (next->vme_start >= end)
13968 break;
13969 }
13970
13971 /*
13972 * Didn't fit -- move to the next entry.
13973 */
13974
13975 entry = next;
13976
13977 if (map->holelistenabled) {
13978 if (entry == (vm_map_entry_t) map->holes_list) {
13979 /*
13980 * Wrapped around
13981 */
13982 return(KERN_NO_SPACE);
13983 }
13984 start = entry->vme_start;
13985 } else {
13986 start = entry->vme_end;
13987 }
13988 }
13989
13990 if (map->holelistenabled) {
13991
13992 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
13993 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
13994 }
13995 }
13996
13997 *address = start;
13998
13999 } else {
14000 vm_map_entry_t temp_entry;
14001
14002 /*
14003 * Verify that:
14004 * the address doesn't itself violate
14005 * the mask requirement.
14006 */
14007
14008 if ((start & mask) != 0)
14009 return(KERN_NO_SPACE);
14010
14011
14012 /*
14013 * ... the address is within bounds
14014 */
14015
14016 end = start + size;
14017
14018 if ((start < map->min_offset) ||
14019 (end > map->max_offset) ||
14020 (start >= end)) {
14021 return(KERN_INVALID_ADDRESS);
14022 }
14023
14024 /*
14025 * If we're asked to overwrite whatever was mapped in that
14026 * range, first deallocate that range.
14027 */
14028 if (flags & VM_FLAGS_OVERWRITE) {
14029 vm_map_t zap_map;
14030
14031 /*
14032 * We use a "zap_map" to avoid having to unlock
14033 * the "map" in vm_map_delete(), which would compromise
14034 * the atomicity of the "deallocate" and then "remap"
14035 * combination.
14036 */
14037 zap_map = vm_map_create(PMAP_NULL,
14038 start,
14039 end,
14040 map->hdr.entries_pageable);
14041 if (zap_map == VM_MAP_NULL) {
14042 return KERN_RESOURCE_SHORTAGE;
14043 }
14044 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14045 vm_map_disable_hole_optimization(zap_map);
14046
14047 kr = vm_map_delete(map, start, end,
14048 (VM_MAP_REMOVE_SAVE_ENTRIES |
14049 VM_MAP_REMOVE_NO_MAP_ALIGN),
14050 zap_map);
14051 if (kr == KERN_SUCCESS) {
14052 vm_map_destroy(zap_map,
14053 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14054 zap_map = VM_MAP_NULL;
14055 }
14056 }
14057
14058 /*
14059 * ... the starting address isn't allocated
14060 */
14061
14062 if (vm_map_lookup_entry(map, start, &temp_entry))
14063 return(KERN_NO_SPACE);
14064
14065 entry = temp_entry;
14066
14067 /*
14068 * ... the next region doesn't overlap the
14069 * end point.
14070 */
14071
14072 if ((entry->vme_next != vm_map_to_entry(map)) &&
14073 (entry->vme_next->vme_start < end))
14074 return(KERN_NO_SPACE);
14075 }
14076 *map_entry = entry;
14077 return(KERN_SUCCESS);
14078 }
14079
14080 /*
14081 * vm_map_switch:
14082 *
14083 * Set the address map for the current thread to the specified map
14084 */
14085
14086 vm_map_t
14087 vm_map_switch(
14088 vm_map_t map)
14089 {
14090 int mycpu;
14091 thread_t thread = current_thread();
14092 vm_map_t oldmap = thread->map;
14093
14094 mp_disable_preemption();
14095 mycpu = cpu_number();
14096
14097 /*
14098 * Deactivate the current map and activate the requested map
14099 */
14100 PMAP_SWITCH_USER(thread, map, mycpu);
14101
14102 mp_enable_preemption();
14103 return(oldmap);
14104 }
14105
14106
14107 /*
14108 * Routine: vm_map_write_user
14109 *
14110 * Description:
14111 * Copy out data from a kernel space into space in the
14112 * destination map. The space must already exist in the
14113 * destination map.
14114 * NOTE: This routine should only be called by threads
14115 * which can block on a page fault. i.e. kernel mode user
14116 * threads.
14117 *
14118 */
14119 kern_return_t
14120 vm_map_write_user(
14121 vm_map_t map,
14122 void *src_p,
14123 vm_map_address_t dst_addr,
14124 vm_size_t size)
14125 {
14126 kern_return_t kr = KERN_SUCCESS;
14127
14128 if(current_map() == map) {
14129 if (copyout(src_p, dst_addr, size)) {
14130 kr = KERN_INVALID_ADDRESS;
14131 }
14132 } else {
14133 vm_map_t oldmap;
14134
14135 /* take on the identity of the target map while doing */
14136 /* the transfer */
14137
14138 vm_map_reference(map);
14139 oldmap = vm_map_switch(map);
14140 if (copyout(src_p, dst_addr, size)) {
14141 kr = KERN_INVALID_ADDRESS;
14142 }
14143 vm_map_switch(oldmap);
14144 vm_map_deallocate(map);
14145 }
14146 return kr;
14147 }
14148
14149 /*
14150 * Routine: vm_map_read_user
14151 *
14152 * Description:
14153 * Copy in data from a user space source map into the
14154 * kernel map. The space must already exist in the
14155 * kernel map.
14156 * NOTE: This routine should only be called by threads
14157 * which can block on a page fault. i.e. kernel mode user
14158 * threads.
14159 *
14160 */
14161 kern_return_t
14162 vm_map_read_user(
14163 vm_map_t map,
14164 vm_map_address_t src_addr,
14165 void *dst_p,
14166 vm_size_t size)
14167 {
14168 kern_return_t kr = KERN_SUCCESS;
14169
14170 if(current_map() == map) {
14171 if (copyin(src_addr, dst_p, size)) {
14172 kr = KERN_INVALID_ADDRESS;
14173 }
14174 } else {
14175 vm_map_t oldmap;
14176
14177 /* take on the identity of the target map while doing */
14178 /* the transfer */
14179
14180 vm_map_reference(map);
14181 oldmap = vm_map_switch(map);
14182 if (copyin(src_addr, dst_p, size)) {
14183 kr = KERN_INVALID_ADDRESS;
14184 }
14185 vm_map_switch(oldmap);
14186 vm_map_deallocate(map);
14187 }
14188 return kr;
14189 }
14190
14191
14192 /*
14193 * vm_map_check_protection:
14194 *
14195 * Assert that the target map allows the specified
14196 * privilege on the entire address region given.
14197 * The entire region must be allocated.
14198 */
14199 boolean_t
14200 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14201 vm_map_offset_t end, vm_prot_t protection)
14202 {
14203 vm_map_entry_t entry;
14204 vm_map_entry_t tmp_entry;
14205
14206 vm_map_lock(map);
14207
14208 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14209 {
14210 vm_map_unlock(map);
14211 return (FALSE);
14212 }
14213
14214 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14215 vm_map_unlock(map);
14216 return(FALSE);
14217 }
14218
14219 entry = tmp_entry;
14220
14221 while (start < end) {
14222 if (entry == vm_map_to_entry(map)) {
14223 vm_map_unlock(map);
14224 return(FALSE);
14225 }
14226
14227 /*
14228 * No holes allowed!
14229 */
14230
14231 if (start < entry->vme_start) {
14232 vm_map_unlock(map);
14233 return(FALSE);
14234 }
14235
14236 /*
14237 * Check protection associated with entry.
14238 */
14239
14240 if ((entry->protection & protection) != protection) {
14241 vm_map_unlock(map);
14242 return(FALSE);
14243 }
14244
14245 /* go to next entry */
14246
14247 start = entry->vme_end;
14248 entry = entry->vme_next;
14249 }
14250 vm_map_unlock(map);
14251 return(TRUE);
14252 }
14253
14254 kern_return_t
14255 vm_map_purgable_control(
14256 vm_map_t map,
14257 vm_map_offset_t address,
14258 vm_purgable_t control,
14259 int *state)
14260 {
14261 vm_map_entry_t entry;
14262 vm_object_t object;
14263 kern_return_t kr;
14264 boolean_t was_nonvolatile;
14265
14266 /*
14267 * Vet all the input parameters and current type and state of the
14268 * underlaying object. Return with an error if anything is amiss.
14269 */
14270 if (map == VM_MAP_NULL)
14271 return(KERN_INVALID_ARGUMENT);
14272
14273 if (control != VM_PURGABLE_SET_STATE &&
14274 control != VM_PURGABLE_GET_STATE &&
14275 control != VM_PURGABLE_PURGE_ALL)
14276 return(KERN_INVALID_ARGUMENT);
14277
14278 if (control == VM_PURGABLE_PURGE_ALL) {
14279 vm_purgeable_object_purge_all();
14280 return KERN_SUCCESS;
14281 }
14282
14283 if (control == VM_PURGABLE_SET_STATE &&
14284 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14285 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14286 return(KERN_INVALID_ARGUMENT);
14287
14288 vm_map_lock_read(map);
14289
14290 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14291
14292 /*
14293 * Must pass a valid non-submap address.
14294 */
14295 vm_map_unlock_read(map);
14296 return(KERN_INVALID_ADDRESS);
14297 }
14298
14299 if ((entry->protection & VM_PROT_WRITE) == 0) {
14300 /*
14301 * Can't apply purgable controls to something you can't write.
14302 */
14303 vm_map_unlock_read(map);
14304 return(KERN_PROTECTION_FAILURE);
14305 }
14306
14307 object = VME_OBJECT(entry);
14308 if (object == VM_OBJECT_NULL ||
14309 object->purgable == VM_PURGABLE_DENY) {
14310 /*
14311 * Object must already be present and be purgeable.
14312 */
14313 vm_map_unlock_read(map);
14314 return KERN_INVALID_ARGUMENT;
14315 }
14316
14317 vm_object_lock(object);
14318
14319 #if 00
14320 if (VME_OFFSET(entry) != 0 ||
14321 entry->vme_end - entry->vme_start != object->vo_size) {
14322 /*
14323 * Can only apply purgable controls to the whole (existing)
14324 * object at once.
14325 */
14326 vm_map_unlock_read(map);
14327 vm_object_unlock(object);
14328 return KERN_INVALID_ARGUMENT;
14329 }
14330 #endif
14331
14332 assert(!entry->is_sub_map);
14333 assert(!entry->use_pmap); /* purgeable has its own accounting */
14334
14335 vm_map_unlock_read(map);
14336
14337 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14338
14339 kr = vm_object_purgable_control(object, control, state);
14340
14341 if (was_nonvolatile &&
14342 object->purgable != VM_PURGABLE_NONVOLATILE &&
14343 map->pmap == kernel_pmap) {
14344 #if DEBUG
14345 object->vo_purgeable_volatilizer = kernel_task;
14346 #endif /* DEBUG */
14347 }
14348
14349 vm_object_unlock(object);
14350
14351 return kr;
14352 }
14353
14354 kern_return_t
14355 vm_map_page_query_internal(
14356 vm_map_t target_map,
14357 vm_map_offset_t offset,
14358 int *disposition,
14359 int *ref_count)
14360 {
14361 kern_return_t kr;
14362 vm_page_info_basic_data_t info;
14363 mach_msg_type_number_t count;
14364
14365 count = VM_PAGE_INFO_BASIC_COUNT;
14366 kr = vm_map_page_info(target_map,
14367 offset,
14368 VM_PAGE_INFO_BASIC,
14369 (vm_page_info_t) &info,
14370 &count);
14371 if (kr == KERN_SUCCESS) {
14372 *disposition = info.disposition;
14373 *ref_count = info.ref_count;
14374 } else {
14375 *disposition = 0;
14376 *ref_count = 0;
14377 }
14378
14379 return kr;
14380 }
14381
14382 kern_return_t
14383 vm_map_page_info(
14384 vm_map_t map,
14385 vm_map_offset_t offset,
14386 vm_page_info_flavor_t flavor,
14387 vm_page_info_t info,
14388 mach_msg_type_number_t *count)
14389 {
14390 vm_map_entry_t map_entry;
14391 vm_object_t object;
14392 vm_page_t m;
14393 kern_return_t kr;
14394 kern_return_t retval = KERN_SUCCESS;
14395 boolean_t top_object;
14396 int disposition;
14397 int ref_count;
14398 vm_page_info_basic_t basic_info;
14399 int depth;
14400 vm_map_offset_t offset_in_page;
14401
14402 switch (flavor) {
14403 case VM_PAGE_INFO_BASIC:
14404 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14405 /*
14406 * The "vm_page_info_basic_data" structure was not
14407 * properly padded, so allow the size to be off by
14408 * one to maintain backwards binary compatibility...
14409 */
14410 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14411 return KERN_INVALID_ARGUMENT;
14412 }
14413 break;
14414 default:
14415 return KERN_INVALID_ARGUMENT;
14416 }
14417
14418 disposition = 0;
14419 ref_count = 0;
14420 top_object = TRUE;
14421 depth = 0;
14422
14423 retval = KERN_SUCCESS;
14424 offset_in_page = offset & PAGE_MASK;
14425 offset = vm_map_trunc_page(offset, PAGE_MASK);
14426
14427 vm_map_lock_read(map);
14428
14429 /*
14430 * First, find the map entry covering "offset", going down
14431 * submaps if necessary.
14432 */
14433 for (;;) {
14434 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14435 vm_map_unlock_read(map);
14436 return KERN_INVALID_ADDRESS;
14437 }
14438 /* compute offset from this map entry's start */
14439 offset -= map_entry->vme_start;
14440 /* compute offset into this map entry's object (or submap) */
14441 offset += VME_OFFSET(map_entry);
14442
14443 if (map_entry->is_sub_map) {
14444 vm_map_t sub_map;
14445
14446 sub_map = VME_SUBMAP(map_entry);
14447 vm_map_lock_read(sub_map);
14448 vm_map_unlock_read(map);
14449
14450 map = sub_map;
14451
14452 ref_count = MAX(ref_count, map->ref_count);
14453 continue;
14454 }
14455 break;
14456 }
14457
14458 object = VME_OBJECT(map_entry);
14459 if (object == VM_OBJECT_NULL) {
14460 /* no object -> no page */
14461 vm_map_unlock_read(map);
14462 goto done;
14463 }
14464
14465 vm_object_lock(object);
14466 vm_map_unlock_read(map);
14467
14468 /*
14469 * Go down the VM object shadow chain until we find the page
14470 * we're looking for.
14471 */
14472 for (;;) {
14473 ref_count = MAX(ref_count, object->ref_count);
14474
14475 m = vm_page_lookup(object, offset);
14476
14477 if (m != VM_PAGE_NULL) {
14478 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14479 break;
14480 } else {
14481 #if MACH_PAGEMAP
14482 if (object->existence_map) {
14483 if (vm_external_state_get(object->existence_map,
14484 offset) ==
14485 VM_EXTERNAL_STATE_EXISTS) {
14486 /*
14487 * this page has been paged out
14488 */
14489 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14490 break;
14491 }
14492 } else
14493 #endif
14494 if (object->internal &&
14495 object->alive &&
14496 !object->terminating &&
14497 object->pager_ready) {
14498
14499 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14500 if (VM_COMPRESSOR_PAGER_STATE_GET(
14501 object,
14502 offset)
14503 == VM_EXTERNAL_STATE_EXISTS) {
14504 /* the pager has that page */
14505 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14506 break;
14507 }
14508 } else {
14509 memory_object_t pager;
14510
14511 vm_object_paging_begin(object);
14512 pager = object->pager;
14513 vm_object_unlock(object);
14514
14515 /*
14516 * Ask the default pager if
14517 * it has this page.
14518 */
14519 kr = memory_object_data_request(
14520 pager,
14521 offset + object->paging_offset,
14522 0, /* just poke the pager */
14523 VM_PROT_READ,
14524 NULL);
14525
14526 vm_object_lock(object);
14527 vm_object_paging_end(object);
14528
14529 if (kr == KERN_SUCCESS) {
14530 /* the default pager has it */
14531 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14532 break;
14533 }
14534 }
14535 }
14536
14537 if (object->shadow != VM_OBJECT_NULL) {
14538 vm_object_t shadow;
14539
14540 offset += object->vo_shadow_offset;
14541 shadow = object->shadow;
14542
14543 vm_object_lock(shadow);
14544 vm_object_unlock(object);
14545
14546 object = shadow;
14547 top_object = FALSE;
14548 depth++;
14549 } else {
14550 // if (!object->internal)
14551 // break;
14552 // retval = KERN_FAILURE;
14553 // goto done_with_object;
14554 break;
14555 }
14556 }
14557 }
14558 /* The ref_count is not strictly accurate, it measures the number */
14559 /* of entities holding a ref on the object, they may not be mapping */
14560 /* the object or may not be mapping the section holding the */
14561 /* target page but its still a ball park number and though an over- */
14562 /* count, it picks up the copy-on-write cases */
14563
14564 /* We could also get a picture of page sharing from pmap_attributes */
14565 /* but this would under count as only faulted-in mappings would */
14566 /* show up. */
14567
14568 if (top_object == TRUE && object->shadow)
14569 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14570
14571 if (! object->internal)
14572 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14573
14574 if (m == VM_PAGE_NULL)
14575 goto done_with_object;
14576
14577 if (m->fictitious) {
14578 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14579 goto done_with_object;
14580 }
14581 if (m->dirty || pmap_is_modified(m->phys_page))
14582 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14583
14584 if (m->reference || pmap_is_referenced(m->phys_page))
14585 disposition |= VM_PAGE_QUERY_PAGE_REF;
14586
14587 if (m->speculative)
14588 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14589
14590 if (m->cs_validated)
14591 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14592 if (m->cs_tainted)
14593 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14594 if (m->cs_nx)
14595 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14596
14597 done_with_object:
14598 vm_object_unlock(object);
14599 done:
14600
14601 switch (flavor) {
14602 case VM_PAGE_INFO_BASIC:
14603 basic_info = (vm_page_info_basic_t) info;
14604 basic_info->disposition = disposition;
14605 basic_info->ref_count = ref_count;
14606 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14607 VM_KERNEL_ADDRPERM(object);
14608 basic_info->offset =
14609 (memory_object_offset_t) offset + offset_in_page;
14610 basic_info->depth = depth;
14611 break;
14612 }
14613
14614 return retval;
14615 }
14616
14617 /*
14618 * vm_map_msync
14619 *
14620 * Synchronises the memory range specified with its backing store
14621 * image by either flushing or cleaning the contents to the appropriate
14622 * memory manager engaging in a memory object synchronize dialog with
14623 * the manager. The client doesn't return until the manager issues
14624 * m_o_s_completed message. MIG Magically converts user task parameter
14625 * to the task's address map.
14626 *
14627 * interpretation of sync_flags
14628 * VM_SYNC_INVALIDATE - discard pages, only return precious
14629 * pages to manager.
14630 *
14631 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14632 * - discard pages, write dirty or precious
14633 * pages back to memory manager.
14634 *
14635 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14636 * - write dirty or precious pages back to
14637 * the memory manager.
14638 *
14639 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
14640 * is a hole in the region, and we would
14641 * have returned KERN_SUCCESS, return
14642 * KERN_INVALID_ADDRESS instead.
14643 *
14644 * NOTE
14645 * The memory object attributes have not yet been implemented, this
14646 * function will have to deal with the invalidate attribute
14647 *
14648 * RETURNS
14649 * KERN_INVALID_TASK Bad task parameter
14650 * KERN_INVALID_ARGUMENT both sync and async were specified.
14651 * KERN_SUCCESS The usual.
14652 * KERN_INVALID_ADDRESS There was a hole in the region.
14653 */
14654
14655 kern_return_t
14656 vm_map_msync(
14657 vm_map_t map,
14658 vm_map_address_t address,
14659 vm_map_size_t size,
14660 vm_sync_t sync_flags)
14661 {
14662 msync_req_t msr;
14663 msync_req_t new_msr;
14664 queue_chain_t req_q; /* queue of requests for this msync */
14665 vm_map_entry_t entry;
14666 vm_map_size_t amount_left;
14667 vm_object_offset_t offset;
14668 boolean_t do_sync_req;
14669 boolean_t had_hole = FALSE;
14670 memory_object_t pager;
14671 vm_map_offset_t pmap_offset;
14672
14673 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14674 (sync_flags & VM_SYNC_SYNCHRONOUS))
14675 return(KERN_INVALID_ARGUMENT);
14676
14677 /*
14678 * align address and size on page boundaries
14679 */
14680 size = (vm_map_round_page(address + size,
14681 VM_MAP_PAGE_MASK(map)) -
14682 vm_map_trunc_page(address,
14683 VM_MAP_PAGE_MASK(map)));
14684 address = vm_map_trunc_page(address,
14685 VM_MAP_PAGE_MASK(map));
14686
14687 if (map == VM_MAP_NULL)
14688 return(KERN_INVALID_TASK);
14689
14690 if (size == 0)
14691 return(KERN_SUCCESS);
14692
14693 queue_init(&req_q);
14694 amount_left = size;
14695
14696 while (amount_left > 0) {
14697 vm_object_size_t flush_size;
14698 vm_object_t object;
14699
14700 vm_map_lock(map);
14701 if (!vm_map_lookup_entry(map,
14702 address,
14703 &entry)) {
14704
14705 vm_map_size_t skip;
14706
14707 /*
14708 * hole in the address map.
14709 */
14710 had_hole = TRUE;
14711
14712 /*
14713 * Check for empty map.
14714 */
14715 if (entry == vm_map_to_entry(map) &&
14716 entry->vme_next == entry) {
14717 vm_map_unlock(map);
14718 break;
14719 }
14720 /*
14721 * Check that we don't wrap and that
14722 * we have at least one real map entry.
14723 */
14724 if ((map->hdr.nentries == 0) ||
14725 (entry->vme_next->vme_start < address)) {
14726 vm_map_unlock(map);
14727 break;
14728 }
14729 /*
14730 * Move up to the next entry if needed
14731 */
14732 skip = (entry->vme_next->vme_start - address);
14733 if (skip >= amount_left)
14734 amount_left = 0;
14735 else
14736 amount_left -= skip;
14737 address = entry->vme_next->vme_start;
14738 vm_map_unlock(map);
14739 continue;
14740 }
14741
14742 offset = address - entry->vme_start;
14743 pmap_offset = address;
14744
14745 /*
14746 * do we have more to flush than is contained in this
14747 * entry ?
14748 */
14749 if (amount_left + entry->vme_start + offset > entry->vme_end) {
14750 flush_size = entry->vme_end -
14751 (entry->vme_start + offset);
14752 } else {
14753 flush_size = amount_left;
14754 }
14755 amount_left -= flush_size;
14756 address += flush_size;
14757
14758 if (entry->is_sub_map == TRUE) {
14759 vm_map_t local_map;
14760 vm_map_offset_t local_offset;
14761
14762 local_map = VME_SUBMAP(entry);
14763 local_offset = VME_OFFSET(entry);
14764 vm_map_unlock(map);
14765 if (vm_map_msync(
14766 local_map,
14767 local_offset,
14768 flush_size,
14769 sync_flags) == KERN_INVALID_ADDRESS) {
14770 had_hole = TRUE;
14771 }
14772 continue;
14773 }
14774 object = VME_OBJECT(entry);
14775
14776 /*
14777 * We can't sync this object if the object has not been
14778 * created yet
14779 */
14780 if (object == VM_OBJECT_NULL) {
14781 vm_map_unlock(map);
14782 continue;
14783 }
14784 offset += VME_OFFSET(entry);
14785
14786 vm_object_lock(object);
14787
14788 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
14789 int kill_pages = 0;
14790 boolean_t reusable_pages = FALSE;
14791
14792 if (sync_flags & VM_SYNC_KILLPAGES) {
14793 if (object->ref_count == 1 && !object->shadow)
14794 kill_pages = 1;
14795 else
14796 kill_pages = -1;
14797 }
14798 if (kill_pages != -1)
14799 vm_object_deactivate_pages(
14800 object,
14801 offset,
14802 (vm_object_size_t) flush_size,
14803 kill_pages,
14804 reusable_pages,
14805 map->pmap,
14806 pmap_offset);
14807 vm_object_unlock(object);
14808 vm_map_unlock(map);
14809 continue;
14810 }
14811 /*
14812 * We can't sync this object if there isn't a pager.
14813 * Don't bother to sync internal objects, since there can't
14814 * be any "permanent" storage for these objects anyway.
14815 */
14816 if ((object->pager == MEMORY_OBJECT_NULL) ||
14817 (object->internal) || (object->private)) {
14818 vm_object_unlock(object);
14819 vm_map_unlock(map);
14820 continue;
14821 }
14822 /*
14823 * keep reference on the object until syncing is done
14824 */
14825 vm_object_reference_locked(object);
14826 vm_object_unlock(object);
14827
14828 vm_map_unlock(map);
14829
14830 do_sync_req = vm_object_sync(object,
14831 offset,
14832 flush_size,
14833 sync_flags & VM_SYNC_INVALIDATE,
14834 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
14835 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
14836 sync_flags & VM_SYNC_SYNCHRONOUS);
14837 /*
14838 * only send a m_o_s if we returned pages or if the entry
14839 * is writable (ie dirty pages may have already been sent back)
14840 */
14841 if (!do_sync_req) {
14842 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
14843 /*
14844 * clear out the clustering and read-ahead hints
14845 */
14846 vm_object_lock(object);
14847
14848 object->pages_created = 0;
14849 object->pages_used = 0;
14850 object->sequential = 0;
14851 object->last_alloc = 0;
14852
14853 vm_object_unlock(object);
14854 }
14855 vm_object_deallocate(object);
14856 continue;
14857 }
14858 msync_req_alloc(new_msr);
14859
14860 vm_object_lock(object);
14861 offset += object->paging_offset;
14862
14863 new_msr->offset = offset;
14864 new_msr->length = flush_size;
14865 new_msr->object = object;
14866 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
14867 re_iterate:
14868
14869 /*
14870 * We can't sync this object if there isn't a pager. The
14871 * pager can disappear anytime we're not holding the object
14872 * lock. So this has to be checked anytime we goto re_iterate.
14873 */
14874
14875 pager = object->pager;
14876
14877 if (pager == MEMORY_OBJECT_NULL) {
14878 vm_object_unlock(object);
14879 vm_object_deallocate(object);
14880 msync_req_free(new_msr);
14881 new_msr = NULL;
14882 continue;
14883 }
14884
14885 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14886 /*
14887 * need to check for overlapping entry, if found, wait
14888 * on overlapping msr to be done, then reiterate
14889 */
14890 msr_lock(msr);
14891 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14892 ((offset >= msr->offset &&
14893 offset < (msr->offset + msr->length)) ||
14894 (msr->offset >= offset &&
14895 msr->offset < (offset + flush_size))))
14896 {
14897 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14898 msr_unlock(msr);
14899 vm_object_unlock(object);
14900 thread_block(THREAD_CONTINUE_NULL);
14901 vm_object_lock(object);
14902 goto re_iterate;
14903 }
14904 msr_unlock(msr);
14905 }/* queue_iterate */
14906
14907 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14908
14909 vm_object_paging_begin(object);
14910 vm_object_unlock(object);
14911
14912 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14913
14914 (void) memory_object_synchronize(
14915 pager,
14916 offset,
14917 flush_size,
14918 sync_flags & ~VM_SYNC_CONTIGUOUS);
14919
14920 vm_object_lock(object);
14921 vm_object_paging_end(object);
14922 vm_object_unlock(object);
14923 }/* while */
14924
14925 /*
14926 * wait for memory_object_sychronize_completed messages from pager(s)
14927 */
14928
14929 while (!queue_empty(&req_q)) {
14930 msr = (msync_req_t)queue_first(&req_q);
14931 msr_lock(msr);
14932 while(msr->flag != VM_MSYNC_DONE) {
14933 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14934 msr_unlock(msr);
14935 thread_block(THREAD_CONTINUE_NULL);
14936 msr_lock(msr);
14937 }/* while */
14938 queue_remove(&req_q, msr, msync_req_t, req_q);
14939 msr_unlock(msr);
14940 vm_object_deallocate(msr->object);
14941 msync_req_free(msr);
14942 }/* queue_iterate */
14943
14944 /* for proper msync() behaviour */
14945 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14946 return(KERN_INVALID_ADDRESS);
14947
14948 return(KERN_SUCCESS);
14949 }/* vm_msync */
14950
14951 /*
14952 * Routine: convert_port_entry_to_map
14953 * Purpose:
14954 * Convert from a port specifying an entry or a task
14955 * to a map. Doesn't consume the port ref; produces a map ref,
14956 * which may be null. Unlike convert_port_to_map, the
14957 * port may be task or a named entry backed.
14958 * Conditions:
14959 * Nothing locked.
14960 */
14961
14962
14963 vm_map_t
14964 convert_port_entry_to_map(
14965 ipc_port_t port)
14966 {
14967 vm_map_t map;
14968 vm_named_entry_t named_entry;
14969 uint32_t try_failed_count = 0;
14970
14971 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14972 while(TRUE) {
14973 ip_lock(port);
14974 if(ip_active(port) && (ip_kotype(port)
14975 == IKOT_NAMED_ENTRY)) {
14976 named_entry =
14977 (vm_named_entry_t)port->ip_kobject;
14978 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14979 ip_unlock(port);
14980
14981 try_failed_count++;
14982 mutex_pause(try_failed_count);
14983 continue;
14984 }
14985 named_entry->ref_count++;
14986 lck_mtx_unlock(&(named_entry)->Lock);
14987 ip_unlock(port);
14988 if ((named_entry->is_sub_map) &&
14989 (named_entry->protection
14990 & VM_PROT_WRITE)) {
14991 map = named_entry->backing.map;
14992 } else {
14993 mach_destroy_memory_entry(port);
14994 return VM_MAP_NULL;
14995 }
14996 vm_map_reference_swap(map);
14997 mach_destroy_memory_entry(port);
14998 break;
14999 }
15000 else
15001 return VM_MAP_NULL;
15002 }
15003 }
15004 else
15005 map = convert_port_to_map(port);
15006
15007 return map;
15008 }
15009
15010 /*
15011 * Routine: convert_port_entry_to_object
15012 * Purpose:
15013 * Convert from a port specifying a named entry to an
15014 * object. Doesn't consume the port ref; produces a map ref,
15015 * which may be null.
15016 * Conditions:
15017 * Nothing locked.
15018 */
15019
15020
15021 vm_object_t
15022 convert_port_entry_to_object(
15023 ipc_port_t port)
15024 {
15025 vm_object_t object = VM_OBJECT_NULL;
15026 vm_named_entry_t named_entry;
15027 uint32_t try_failed_count = 0;
15028
15029 if (IP_VALID(port) &&
15030 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15031 try_again:
15032 ip_lock(port);
15033 if (ip_active(port) &&
15034 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15035 named_entry = (vm_named_entry_t)port->ip_kobject;
15036 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15037 ip_unlock(port);
15038 try_failed_count++;
15039 mutex_pause(try_failed_count);
15040 goto try_again;
15041 }
15042 named_entry->ref_count++;
15043 lck_mtx_unlock(&(named_entry)->Lock);
15044 ip_unlock(port);
15045 if (!(named_entry->is_sub_map) &&
15046 !(named_entry->is_pager) &&
15047 !(named_entry->is_copy) &&
15048 (named_entry->protection & VM_PROT_WRITE)) {
15049 object = named_entry->backing.object;
15050 vm_object_reference(object);
15051 }
15052 mach_destroy_memory_entry(port);
15053 }
15054 }
15055
15056 return object;
15057 }
15058
15059 /*
15060 * Export routines to other components for the things we access locally through
15061 * macros.
15062 */
15063 #undef current_map
15064 vm_map_t
15065 current_map(void)
15066 {
15067 return (current_map_fast());
15068 }
15069
15070 /*
15071 * vm_map_reference:
15072 *
15073 * Most code internal to the osfmk will go through a
15074 * macro defining this. This is always here for the
15075 * use of other kernel components.
15076 */
15077 #undef vm_map_reference
15078 void
15079 vm_map_reference(
15080 register vm_map_t map)
15081 {
15082 if (map == VM_MAP_NULL)
15083 return;
15084
15085 lck_mtx_lock(&map->s_lock);
15086 #if TASK_SWAPPER
15087 assert(map->res_count > 0);
15088 assert(map->ref_count >= map->res_count);
15089 map->res_count++;
15090 #endif
15091 map->ref_count++;
15092 lck_mtx_unlock(&map->s_lock);
15093 }
15094
15095 /*
15096 * vm_map_deallocate:
15097 *
15098 * Removes a reference from the specified map,
15099 * destroying it if no references remain.
15100 * The map should not be locked.
15101 */
15102 void
15103 vm_map_deallocate(
15104 register vm_map_t map)
15105 {
15106 unsigned int ref;
15107
15108 if (map == VM_MAP_NULL)
15109 return;
15110
15111 lck_mtx_lock(&map->s_lock);
15112 ref = --map->ref_count;
15113 if (ref > 0) {
15114 vm_map_res_deallocate(map);
15115 lck_mtx_unlock(&map->s_lock);
15116 return;
15117 }
15118 assert(map->ref_count == 0);
15119 lck_mtx_unlock(&map->s_lock);
15120
15121 #if TASK_SWAPPER
15122 /*
15123 * The map residence count isn't decremented here because
15124 * the vm_map_delete below will traverse the entire map,
15125 * deleting entries, and the residence counts on objects
15126 * and sharing maps will go away then.
15127 */
15128 #endif
15129
15130 vm_map_destroy(map, VM_MAP_NO_FLAGS);
15131 }
15132
15133
15134 void
15135 vm_map_disable_NX(vm_map_t map)
15136 {
15137 if (map == NULL)
15138 return;
15139 if (map->pmap == NULL)
15140 return;
15141
15142 pmap_disable_NX(map->pmap);
15143 }
15144
15145 void
15146 vm_map_disallow_data_exec(vm_map_t map)
15147 {
15148 if (map == NULL)
15149 return;
15150
15151 map->map_disallow_data_exec = TRUE;
15152 }
15153
15154 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15155 * more descriptive.
15156 */
15157 void
15158 vm_map_set_32bit(vm_map_t map)
15159 {
15160 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15161 }
15162
15163
15164 void
15165 vm_map_set_64bit(vm_map_t map)
15166 {
15167 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15168 }
15169
15170 vm_map_offset_t
15171 vm_compute_max_offset(boolean_t is64)
15172 {
15173 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15174 }
15175
15176 uint64_t
15177 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15178 {
15179 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15180 }
15181
15182 boolean_t
15183 vm_map_is_64bit(
15184 vm_map_t map)
15185 {
15186 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15187 }
15188
15189 boolean_t
15190 vm_map_has_hard_pagezero(
15191 vm_map_t map,
15192 vm_map_offset_t pagezero_size)
15193 {
15194 /*
15195 * XXX FBDP
15196 * We should lock the VM map (for read) here but we can get away
15197 * with it for now because there can't really be any race condition:
15198 * the VM map's min_offset is changed only when the VM map is created
15199 * and when the zero page is established (when the binary gets loaded),
15200 * and this routine gets called only when the task terminates and the
15201 * VM map is being torn down, and when a new map is created via
15202 * load_machfile()/execve().
15203 */
15204 return (map->min_offset >= pagezero_size);
15205 }
15206
15207 /*
15208 * Raise a VM map's maximun offset.
15209 */
15210 kern_return_t
15211 vm_map_raise_max_offset(
15212 vm_map_t map,
15213 vm_map_offset_t new_max_offset)
15214 {
15215 kern_return_t ret;
15216
15217 vm_map_lock(map);
15218 ret = KERN_INVALID_ADDRESS;
15219
15220 if (new_max_offset >= map->max_offset) {
15221 if (!vm_map_is_64bit(map)) {
15222 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15223 map->max_offset = new_max_offset;
15224 ret = KERN_SUCCESS;
15225 }
15226 } else {
15227 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15228 map->max_offset = new_max_offset;
15229 ret = KERN_SUCCESS;
15230 }
15231 }
15232 }
15233
15234 vm_map_unlock(map);
15235 return ret;
15236 }
15237
15238
15239 /*
15240 * Raise a VM map's minimum offset.
15241 * To strictly enforce "page zero" reservation.
15242 */
15243 kern_return_t
15244 vm_map_raise_min_offset(
15245 vm_map_t map,
15246 vm_map_offset_t new_min_offset)
15247 {
15248 vm_map_entry_t first_entry;
15249
15250 new_min_offset = vm_map_round_page(new_min_offset,
15251 VM_MAP_PAGE_MASK(map));
15252
15253 vm_map_lock(map);
15254
15255 if (new_min_offset < map->min_offset) {
15256 /*
15257 * Can't move min_offset backwards, as that would expose
15258 * a part of the address space that was previously, and for
15259 * possibly good reasons, inaccessible.
15260 */
15261 vm_map_unlock(map);
15262 return KERN_INVALID_ADDRESS;
15263 }
15264 if (new_min_offset >= map->max_offset) {
15265 /* can't go beyond the end of the address space */
15266 vm_map_unlock(map);
15267 return KERN_INVALID_ADDRESS;
15268 }
15269
15270 first_entry = vm_map_first_entry(map);
15271 if (first_entry != vm_map_to_entry(map) &&
15272 first_entry->vme_start < new_min_offset) {
15273 /*
15274 * Some memory was already allocated below the new
15275 * minimun offset. It's too late to change it now...
15276 */
15277 vm_map_unlock(map);
15278 return KERN_NO_SPACE;
15279 }
15280
15281 map->min_offset = new_min_offset;
15282
15283 assert(map->holes_list);
15284 map->holes_list->start = new_min_offset;
15285 assert(new_min_offset < map->holes_list->end);
15286
15287 vm_map_unlock(map);
15288
15289 return KERN_SUCCESS;
15290 }
15291
15292 /*
15293 * Set the limit on the maximum amount of user wired memory allowed for this map.
15294 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15295 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15296 * don't have to reach over to the BSD data structures.
15297 */
15298
15299 void
15300 vm_map_set_user_wire_limit(vm_map_t map,
15301 vm_size_t limit)
15302 {
15303 map->user_wire_limit = limit;
15304 }
15305
15306
15307 void vm_map_switch_protect(vm_map_t map,
15308 boolean_t val)
15309 {
15310 vm_map_lock(map);
15311 map->switch_protect=val;
15312 vm_map_unlock(map);
15313 }
15314
15315 /*
15316 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15317 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15318 * bump both counters.
15319 */
15320 void
15321 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15322 {
15323 pmap_t pmap = vm_map_pmap(map);
15324
15325 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15326 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15327 }
15328
15329 void
15330 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15331 {
15332 pmap_t pmap = vm_map_pmap(map);
15333
15334 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15335 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15336 }
15337
15338 /* Add (generate) code signature for memory range */
15339 #if CONFIG_DYNAMIC_CODE_SIGNING
15340 kern_return_t vm_map_sign(vm_map_t map,
15341 vm_map_offset_t start,
15342 vm_map_offset_t end)
15343 {
15344 vm_map_entry_t entry;
15345 vm_page_t m;
15346 vm_object_t object;
15347
15348 /*
15349 * Vet all the input parameters and current type and state of the
15350 * underlaying object. Return with an error if anything is amiss.
15351 */
15352 if (map == VM_MAP_NULL)
15353 return(KERN_INVALID_ARGUMENT);
15354
15355 vm_map_lock_read(map);
15356
15357 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15358 /*
15359 * Must pass a valid non-submap address.
15360 */
15361 vm_map_unlock_read(map);
15362 return(KERN_INVALID_ADDRESS);
15363 }
15364
15365 if((entry->vme_start > start) || (entry->vme_end < end)) {
15366 /*
15367 * Map entry doesn't cover the requested range. Not handling
15368 * this situation currently.
15369 */
15370 vm_map_unlock_read(map);
15371 return(KERN_INVALID_ARGUMENT);
15372 }
15373
15374 object = VME_OBJECT(entry);
15375 if (object == VM_OBJECT_NULL) {
15376 /*
15377 * Object must already be present or we can't sign.
15378 */
15379 vm_map_unlock_read(map);
15380 return KERN_INVALID_ARGUMENT;
15381 }
15382
15383 vm_object_lock(object);
15384 vm_map_unlock_read(map);
15385
15386 while(start < end) {
15387 uint32_t refmod;
15388
15389 m = vm_page_lookup(object,
15390 start - entry->vme_start + VME_OFFSET(entry));
15391 if (m==VM_PAGE_NULL) {
15392 /* shoud we try to fault a page here? we can probably
15393 * demand it exists and is locked for this request */
15394 vm_object_unlock(object);
15395 return KERN_FAILURE;
15396 }
15397 /* deal with special page status */
15398 if (m->busy ||
15399 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15400 vm_object_unlock(object);
15401 return KERN_FAILURE;
15402 }
15403
15404 /* Page is OK... now "validate" it */
15405 /* This is the place where we'll call out to create a code
15406 * directory, later */
15407 m->cs_validated = TRUE;
15408
15409 /* The page is now "clean" for codesigning purposes. That means
15410 * we don't consider it as modified (wpmapped) anymore. But
15411 * we'll disconnect the page so we note any future modification
15412 * attempts. */
15413 m->wpmapped = FALSE;
15414 refmod = pmap_disconnect(m->phys_page);
15415
15416 /* Pull the dirty status from the pmap, since we cleared the
15417 * wpmapped bit */
15418 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15419 SET_PAGE_DIRTY(m, FALSE);
15420 }
15421
15422 /* On to the next page */
15423 start += PAGE_SIZE;
15424 }
15425 vm_object_unlock(object);
15426
15427 return KERN_SUCCESS;
15428 }
15429 #endif
15430
15431 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15432 {
15433 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15434 vm_map_entry_t next_entry;
15435 kern_return_t kr = KERN_SUCCESS;
15436 vm_map_t zap_map;
15437
15438 vm_map_lock(map);
15439
15440 /*
15441 * We use a "zap_map" to avoid having to unlock
15442 * the "map" in vm_map_delete().
15443 */
15444 zap_map = vm_map_create(PMAP_NULL,
15445 map->min_offset,
15446 map->max_offset,
15447 map->hdr.entries_pageable);
15448
15449 if (zap_map == VM_MAP_NULL) {
15450 return KERN_RESOURCE_SHORTAGE;
15451 }
15452
15453 vm_map_set_page_shift(zap_map,
15454 VM_MAP_PAGE_SHIFT(map));
15455 vm_map_disable_hole_optimization(zap_map);
15456
15457 for (entry = vm_map_first_entry(map);
15458 entry != vm_map_to_entry(map);
15459 entry = next_entry) {
15460 next_entry = entry->vme_next;
15461
15462 if (VME_OBJECT(entry) &&
15463 !entry->is_sub_map &&
15464 (VME_OBJECT(entry)->internal == TRUE) &&
15465 (VME_OBJECT(entry)->ref_count == 1)) {
15466
15467 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15468 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15469
15470 (void)vm_map_delete(map,
15471 entry->vme_start,
15472 entry->vme_end,
15473 VM_MAP_REMOVE_SAVE_ENTRIES,
15474 zap_map);
15475 }
15476 }
15477
15478 vm_map_unlock(map);
15479
15480 /*
15481 * Get rid of the "zap_maps" and all the map entries that
15482 * they may still contain.
15483 */
15484 if (zap_map != VM_MAP_NULL) {
15485 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15486 zap_map = VM_MAP_NULL;
15487 }
15488
15489 return kr;
15490 }
15491
15492 #if CONFIG_FREEZE
15493
15494 kern_return_t vm_map_freeze_walk(
15495 vm_map_t map,
15496 unsigned int *purgeable_count,
15497 unsigned int *wired_count,
15498 unsigned int *clean_count,
15499 unsigned int *dirty_count,
15500 unsigned int dirty_budget,
15501 boolean_t *has_shared)
15502 {
15503 vm_map_entry_t entry;
15504
15505 vm_map_lock_read(map);
15506
15507 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15508 *has_shared = FALSE;
15509
15510 for (entry = vm_map_first_entry(map);
15511 entry != vm_map_to_entry(map);
15512 entry = entry->vme_next) {
15513 unsigned int purgeable, clean, dirty, wired;
15514 boolean_t shared;
15515
15516 if ((VME_OBJECT(entry) == 0) ||
15517 (entry->is_sub_map) ||
15518 (VME_OBJECT(entry)->phys_contiguous)) {
15519 continue;
15520 }
15521
15522 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
15523
15524 *purgeable_count += purgeable;
15525 *wired_count += wired;
15526 *clean_count += clean;
15527 *dirty_count += dirty;
15528
15529 if (shared) {
15530 *has_shared = TRUE;
15531 }
15532
15533 /* Adjust pageout budget and finish up if reached */
15534 if (dirty_budget) {
15535 dirty_budget -= dirty;
15536 if (dirty_budget == 0) {
15537 break;
15538 }
15539 }
15540 }
15541
15542 vm_map_unlock_read(map);
15543
15544 return KERN_SUCCESS;
15545 }
15546
15547 int c_freezer_swapout_count;
15548 int c_freezer_compression_count = 0;
15549 AbsoluteTime c_freezer_last_yield_ts = 0;
15550
15551 kern_return_t vm_map_freeze(
15552 vm_map_t map,
15553 unsigned int *purgeable_count,
15554 unsigned int *wired_count,
15555 unsigned int *clean_count,
15556 unsigned int *dirty_count,
15557 unsigned int dirty_budget,
15558 boolean_t *has_shared)
15559 {
15560 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15561 kern_return_t kr = KERN_SUCCESS;
15562 boolean_t default_freezer_active = TRUE;
15563
15564 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15565 *has_shared = FALSE;
15566
15567 /*
15568 * We need the exclusive lock here so that we can
15569 * block any page faults or lookups while we are
15570 * in the middle of freezing this vm map.
15571 */
15572 vm_map_lock(map);
15573
15574 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15575 default_freezer_active = FALSE;
15576
15577 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15578 kr = KERN_NO_SPACE;
15579 goto done;
15580 }
15581 }
15582 assert(default_freezer_active == FALSE);
15583
15584 if (default_freezer_active) {
15585 if (map->default_freezer_handle == NULL) {
15586 map->default_freezer_handle = default_freezer_handle_allocate();
15587 }
15588
15589 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
15590 /*
15591 * Can happen if default_freezer_handle passed in is NULL
15592 * Or, a table has already been allocated and associated
15593 * with this handle, i.e. the map is already frozen.
15594 */
15595 goto done;
15596 }
15597 }
15598 c_freezer_compression_count = 0;
15599 clock_get_uptime(&c_freezer_last_yield_ts);
15600
15601 for (entry2 = vm_map_first_entry(map);
15602 entry2 != vm_map_to_entry(map);
15603 entry2 = entry2->vme_next) {
15604
15605 vm_object_t src_object = VME_OBJECT(entry2);
15606
15607 if (VME_OBJECT(entry2) &&
15608 !entry2->is_sub_map &&
15609 !VME_OBJECT(entry2)->phys_contiguous) {
15610 /* If eligible, scan the entry, moving eligible pages over to our parent object */
15611 if (default_freezer_active) {
15612 unsigned int purgeable, clean, dirty, wired;
15613 boolean_t shared;
15614
15615 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
15616 src_object, map->default_freezer_handle);
15617
15618 *purgeable_count += purgeable;
15619 *wired_count += wired;
15620 *clean_count += clean;
15621 *dirty_count += dirty;
15622
15623 /* Adjust pageout budget and finish up if reached */
15624 if (dirty_budget) {
15625 dirty_budget -= dirty;
15626 if (dirty_budget == 0) {
15627 break;
15628 }
15629 }
15630
15631 if (shared) {
15632 *has_shared = TRUE;
15633 }
15634 } else {
15635 if (VME_OBJECT(entry2)->internal == TRUE) {
15636
15637 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15638 /*
15639 * Pages belonging to this object could be swapped to disk.
15640 * Make sure it's not a shared object because we could end
15641 * up just bringing it back in again.
15642 */
15643 if (VME_OBJECT(entry2)->ref_count > 1) {
15644 continue;
15645 }
15646 }
15647 vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
15648 }
15649
15650 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15651 kr = KERN_NO_SPACE;
15652 break;
15653 }
15654 }
15655 }
15656 }
15657
15658 if (default_freezer_active) {
15659 /* Finally, throw out the pages to swap */
15660 default_freezer_pageout(map->default_freezer_handle);
15661 }
15662
15663 done:
15664 vm_map_unlock(map);
15665
15666 if (!default_freezer_active) {
15667 vm_object_compressed_freezer_done();
15668 }
15669 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15670 /*
15671 * reset the counter tracking the # of swapped c_segs
15672 * because we are now done with this freeze session and task.
15673 */
15674 c_freezer_swapout_count = 0;
15675 }
15676 return kr;
15677 }
15678
15679 kern_return_t
15680 vm_map_thaw(
15681 vm_map_t map)
15682 {
15683 kern_return_t kr = KERN_SUCCESS;
15684
15685 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15686 /*
15687 * We will on-demand thaw in the presence of the compressed pager.
15688 */
15689 return kr;
15690 }
15691
15692 vm_map_lock(map);
15693
15694 if (map->default_freezer_handle == NULL) {
15695 /*
15696 * This map is not in a frozen state.
15697 */
15698 kr = KERN_FAILURE;
15699 goto out;
15700 }
15701
15702 kr = default_freezer_unpack(map->default_freezer_handle);
15703 out:
15704 vm_map_unlock(map);
15705
15706 return kr;
15707 }
15708 #endif
15709
15710 /*
15711 * vm_map_entry_should_cow_for_true_share:
15712 *
15713 * Determines if the map entry should be clipped and setup for copy-on-write
15714 * to avoid applying "true_share" to a large VM object when only a subset is
15715 * targeted.
15716 *
15717 * For now, we target only the map entries created for the Objective C
15718 * Garbage Collector, which initially have the following properties:
15719 * - alias == VM_MEMORY_MALLOC
15720 * - wired_count == 0
15721 * - !needs_copy
15722 * and a VM object with:
15723 * - internal
15724 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15725 * - !true_share
15726 * - vo_size == ANON_CHUNK_SIZE
15727 *
15728 * Only non-kernel map entries.
15729 */
15730 boolean_t
15731 vm_map_entry_should_cow_for_true_share(
15732 vm_map_entry_t entry)
15733 {
15734 vm_object_t object;
15735
15736 if (entry->is_sub_map) {
15737 /* entry does not point at a VM object */
15738 return FALSE;
15739 }
15740
15741 if (entry->needs_copy) {
15742 /* already set for copy_on_write: done! */
15743 return FALSE;
15744 }
15745
15746 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
15747 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
15748 /* not a malloc heap or Obj-C Garbage Collector heap */
15749 return FALSE;
15750 }
15751
15752 if (entry->wired_count) {
15753 /* wired: can't change the map entry... */
15754 vm_counters.should_cow_but_wired++;
15755 return FALSE;
15756 }
15757
15758 object = VME_OBJECT(entry);
15759
15760 if (object == VM_OBJECT_NULL) {
15761 /* no object yet... */
15762 return FALSE;
15763 }
15764
15765 if (!object->internal) {
15766 /* not an internal object */
15767 return FALSE;
15768 }
15769
15770 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
15771 /* not the default copy strategy */
15772 return FALSE;
15773 }
15774
15775 if (object->true_share) {
15776 /* already true_share: too late to avoid it */
15777 return FALSE;
15778 }
15779
15780 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
15781 object->vo_size != ANON_CHUNK_SIZE) {
15782 /* ... not an object created for the ObjC Garbage Collector */
15783 return FALSE;
15784 }
15785
15786 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
15787 object->vo_size != 2048 * 4096) {
15788 /* ... not a "MALLOC_SMALL" heap */
15789 return FALSE;
15790 }
15791
15792 /*
15793 * All the criteria match: we have a large object being targeted for "true_share".
15794 * To limit the adverse side-effects linked with "true_share", tell the caller to
15795 * try and avoid setting up the entire object for "true_share" by clipping the
15796 * targeted range and setting it up for copy-on-write.
15797 */
15798 return TRUE;
15799 }
15800
15801 vm_map_offset_t
15802 vm_map_round_page_mask(
15803 vm_map_offset_t offset,
15804 vm_map_offset_t mask)
15805 {
15806 return VM_MAP_ROUND_PAGE(offset, mask);
15807 }
15808
15809 vm_map_offset_t
15810 vm_map_trunc_page_mask(
15811 vm_map_offset_t offset,
15812 vm_map_offset_t mask)
15813 {
15814 return VM_MAP_TRUNC_PAGE(offset, mask);
15815 }
15816
15817 boolean_t
15818 vm_map_page_aligned(
15819 vm_map_offset_t offset,
15820 vm_map_offset_t mask)
15821 {
15822 return ((offset) & mask) == 0;
15823 }
15824
15825 int
15826 vm_map_page_shift(
15827 vm_map_t map)
15828 {
15829 return VM_MAP_PAGE_SHIFT(map);
15830 }
15831
15832 int
15833 vm_map_page_size(
15834 vm_map_t map)
15835 {
15836 return VM_MAP_PAGE_SIZE(map);
15837 }
15838
15839 vm_map_offset_t
15840 vm_map_page_mask(
15841 vm_map_t map)
15842 {
15843 return VM_MAP_PAGE_MASK(map);
15844 }
15845
15846 kern_return_t
15847 vm_map_set_page_shift(
15848 vm_map_t map,
15849 int pageshift)
15850 {
15851 if (map->hdr.nentries != 0) {
15852 /* too late to change page size */
15853 return KERN_FAILURE;
15854 }
15855
15856 map->hdr.page_shift = pageshift;
15857
15858 return KERN_SUCCESS;
15859 }
15860
15861 int
15862 vm_map_purge(
15863 vm_map_t map)
15864 {
15865 int num_object_purged;
15866 vm_map_entry_t entry;
15867 vm_map_offset_t next_address;
15868 vm_object_t object;
15869 int state;
15870 kern_return_t kr;
15871
15872 num_object_purged = 0;
15873
15874 vm_map_lock_read(map);
15875 entry = vm_map_first_entry(map);
15876 while (entry != vm_map_to_entry(map)) {
15877 if (entry->is_sub_map) {
15878 goto next;
15879 }
15880 if (! (entry->protection & VM_PROT_WRITE)) {
15881 goto next;
15882 }
15883 object = VME_OBJECT(entry);
15884 if (object == VM_OBJECT_NULL) {
15885 goto next;
15886 }
15887 if (object->purgable != VM_PURGABLE_VOLATILE) {
15888 goto next;
15889 }
15890
15891 vm_object_lock(object);
15892 #if 00
15893 if (VME_OFFSET(entry) != 0 ||
15894 (entry->vme_end - entry->vme_start) != object->vo_size) {
15895 vm_object_unlock(object);
15896 goto next;
15897 }
15898 #endif
15899 next_address = entry->vme_end;
15900 vm_map_unlock_read(map);
15901 state = VM_PURGABLE_EMPTY;
15902 kr = vm_object_purgable_control(object,
15903 VM_PURGABLE_SET_STATE,
15904 &state);
15905 if (kr == KERN_SUCCESS) {
15906 num_object_purged++;
15907 }
15908 vm_object_unlock(object);
15909
15910 vm_map_lock_read(map);
15911 if (vm_map_lookup_entry(map, next_address, &entry)) {
15912 continue;
15913 }
15914 next:
15915 entry = entry->vme_next;
15916 }
15917 vm_map_unlock_read(map);
15918
15919 return num_object_purged;
15920 }
15921
15922 kern_return_t
15923 vm_map_query_volatile(
15924 vm_map_t map,
15925 mach_vm_size_t *volatile_virtual_size_p,
15926 mach_vm_size_t *volatile_resident_size_p,
15927 mach_vm_size_t *volatile_compressed_size_p,
15928 mach_vm_size_t *volatile_pmap_size_p,
15929 mach_vm_size_t *volatile_compressed_pmap_size_p)
15930 {
15931 mach_vm_size_t volatile_virtual_size;
15932 mach_vm_size_t volatile_resident_count;
15933 mach_vm_size_t volatile_compressed_count;
15934 mach_vm_size_t volatile_pmap_count;
15935 mach_vm_size_t volatile_compressed_pmap_count;
15936 mach_vm_size_t resident_count;
15937 vm_map_entry_t entry;
15938 vm_object_t object;
15939
15940 /* map should be locked by caller */
15941
15942 volatile_virtual_size = 0;
15943 volatile_resident_count = 0;
15944 volatile_compressed_count = 0;
15945 volatile_pmap_count = 0;
15946 volatile_compressed_pmap_count = 0;
15947
15948 for (entry = vm_map_first_entry(map);
15949 entry != vm_map_to_entry(map);
15950 entry = entry->vme_next) {
15951 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
15952
15953 if (entry->is_sub_map) {
15954 continue;
15955 }
15956 if (! (entry->protection & VM_PROT_WRITE)) {
15957 continue;
15958 }
15959 object = VME_OBJECT(entry);
15960 if (object == VM_OBJECT_NULL) {
15961 continue;
15962 }
15963 if (object->purgable != VM_PURGABLE_VOLATILE &&
15964 object->purgable != VM_PURGABLE_EMPTY) {
15965 continue;
15966 }
15967 if (VME_OFFSET(entry)) {
15968 /*
15969 * If the map entry has been split and the object now
15970 * appears several times in the VM map, we don't want
15971 * to count the object's resident_page_count more than
15972 * once. We count it only for the first one, starting
15973 * at offset 0 and ignore the other VM map entries.
15974 */
15975 continue;
15976 }
15977 resident_count = object->resident_page_count;
15978 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
15979 resident_count = 0;
15980 } else {
15981 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
15982 }
15983
15984 volatile_virtual_size += entry->vme_end - entry->vme_start;
15985 volatile_resident_count += resident_count;
15986 if (object->pager) {
15987 volatile_compressed_count +=
15988 vm_compressor_pager_get_count(object->pager);
15989 }
15990 pmap_compressed_bytes = 0;
15991 pmap_resident_bytes =
15992 pmap_query_resident(map->pmap,
15993 entry->vme_start,
15994 entry->vme_end,
15995 &pmap_compressed_bytes);
15996 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
15997 volatile_compressed_pmap_count += (pmap_compressed_bytes
15998 / PAGE_SIZE);
15999 }
16000
16001 /* map is still locked on return */
16002
16003 *volatile_virtual_size_p = volatile_virtual_size;
16004 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
16005 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
16006 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
16007 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
16008
16009 return KERN_SUCCESS;
16010 }
16011
16012 void
16013 vm_map_sizes(vm_map_t map,
16014 vm_map_size_t * psize,
16015 vm_map_size_t * pfree,
16016 vm_map_size_t * plargest_free)
16017 {
16018 vm_map_entry_t entry;
16019 vm_map_offset_t prev;
16020 vm_map_size_t free, total_free, largest_free;
16021 boolean_t end;
16022
16023 total_free = largest_free = 0;
16024
16025 vm_map_lock_read(map);
16026 if (psize) *psize = map->max_offset - map->min_offset;
16027
16028 prev = map->min_offset;
16029 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16030 {
16031 end = (entry == vm_map_to_entry(map));
16032
16033 if (end) free = entry->vme_end - prev;
16034 else free = entry->vme_start - prev;
16035
16036 total_free += free;
16037 if (free > largest_free) largest_free = free;
16038
16039 if (end) break;
16040 prev = entry->vme_end;
16041 }
16042 vm_map_unlock_read(map);
16043 if (pfree) *pfree = total_free;
16044 if (plargest_free) *plargest_free = largest_free;
16045 }
16046
16047 #if VM_SCAN_FOR_SHADOW_CHAIN
16048 int vm_map_shadow_max(vm_map_t map);
16049 int vm_map_shadow_max(
16050 vm_map_t map)
16051 {
16052 int shadows, shadows_max;
16053 vm_map_entry_t entry;
16054 vm_object_t object, next_object;
16055
16056 if (map == NULL)
16057 return 0;
16058
16059 shadows_max = 0;
16060
16061 vm_map_lock_read(map);
16062
16063 for (entry = vm_map_first_entry(map);
16064 entry != vm_map_to_entry(map);
16065 entry = entry->vme_next) {
16066 if (entry->is_sub_map) {
16067 continue;
16068 }
16069 object = VME_OBJECT(entry);
16070 if (object == NULL) {
16071 continue;
16072 }
16073 vm_object_lock_shared(object);
16074 for (shadows = 0;
16075 object->shadow != NULL;
16076 shadows++, object = next_object) {
16077 next_object = object->shadow;
16078 vm_object_lock_shared(next_object);
16079 vm_object_unlock(object);
16080 }
16081 vm_object_unlock(object);
16082 if (shadows > shadows_max) {
16083 shadows_max = shadows;
16084 }
16085 }
16086
16087 vm_map_unlock_read(map);
16088
16089 return shadows_max;
16090 }
16091 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */