]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
f6a8e5e5c1dd4ca5efe3f110b6c87a963acb3349
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/kalloc.h>
88 #include <kern/zalloc.h>
89
90 #include <vm/cpm.h>
91 #include <vm/vm_compressor_pager.h>
92 #include <vm/vm_init.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_page.h>
97 #include <vm/vm_pageout.h>
98 #include <vm/vm_kern.h>
99 #include <ipc/ipc_port.h>
100 #include <kern/sched_prim.h>
101 #include <kern/misc_protos.h>
102 #include <kern/xpr.h>
103
104 #include <mach/vm_map_server.h>
105 #include <mach/mach_host_server.h>
106 #include <vm/vm_protos.h>
107 #include <vm/vm_purgeable_internal.h>
108
109 #include <vm/vm_protos.h>
110 #include <vm/vm_shared_region.h>
111 #include <vm/vm_map_store.h>
112
113 #include <san/kasan.h>
114
115 #if __arm64__
116 extern int fourk_binary_compatibility_unsafe;
117 extern int fourk_binary_compatibility_allow_wx;
118 #endif /* __arm64__ */
119 extern int proc_selfpid(void);
120 extern char *proc_name_address(void *p);
121
122 #if VM_MAP_DEBUG_APPLE_PROTECT
123 int vm_map_debug_apple_protect = 0;
124 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
125 #if VM_MAP_DEBUG_FOURK
126 int vm_map_debug_fourk = 0;
127 #endif /* VM_MAP_DEBUG_FOURK */
128
129 int vm_map_executable_immutable = 0;
130 int vm_map_executable_immutable_no_log = 0;
131
132 extern u_int32_t random(void); /* from <libkern/libkern.h> */
133 /* Internal prototypes
134 */
135
136 static void vm_map_simplify_range(
137 vm_map_t map,
138 vm_map_offset_t start,
139 vm_map_offset_t end); /* forward */
140
141 static boolean_t vm_map_range_check(
142 vm_map_t map,
143 vm_map_offset_t start,
144 vm_map_offset_t end,
145 vm_map_entry_t *entry);
146
147 static vm_map_entry_t _vm_map_entry_create(
148 struct vm_map_header *map_header, boolean_t map_locked);
149
150 static void _vm_map_entry_dispose(
151 struct vm_map_header *map_header,
152 vm_map_entry_t entry);
153
154 static void vm_map_pmap_enter(
155 vm_map_t map,
156 vm_map_offset_t addr,
157 vm_map_offset_t end_addr,
158 vm_object_t object,
159 vm_object_offset_t offset,
160 vm_prot_t protection);
161
162 static void _vm_map_clip_end(
163 struct vm_map_header *map_header,
164 vm_map_entry_t entry,
165 vm_map_offset_t end);
166
167 static void _vm_map_clip_start(
168 struct vm_map_header *map_header,
169 vm_map_entry_t entry,
170 vm_map_offset_t start);
171
172 static void vm_map_entry_delete(
173 vm_map_t map,
174 vm_map_entry_t entry);
175
176 static kern_return_t vm_map_delete(
177 vm_map_t map,
178 vm_map_offset_t start,
179 vm_map_offset_t end,
180 int flags,
181 vm_map_t zap_map);
182
183 static kern_return_t vm_map_copy_overwrite_unaligned(
184 vm_map_t dst_map,
185 vm_map_entry_t entry,
186 vm_map_copy_t copy,
187 vm_map_address_t start,
188 boolean_t discard_on_success);
189
190 static kern_return_t vm_map_copy_overwrite_aligned(
191 vm_map_t dst_map,
192 vm_map_entry_t tmp_entry,
193 vm_map_copy_t copy,
194 vm_map_offset_t start,
195 pmap_t pmap);
196
197 static kern_return_t vm_map_copyin_kernel_buffer(
198 vm_map_t src_map,
199 vm_map_address_t src_addr,
200 vm_map_size_t len,
201 boolean_t src_destroy,
202 vm_map_copy_t *copy_result); /* OUT */
203
204 static kern_return_t vm_map_copyout_kernel_buffer(
205 vm_map_t map,
206 vm_map_address_t *addr, /* IN/OUT */
207 vm_map_copy_t copy,
208 vm_map_size_t copy_size,
209 boolean_t overwrite,
210 boolean_t consume_on_success);
211
212 static void vm_map_fork_share(
213 vm_map_t old_map,
214 vm_map_entry_t old_entry,
215 vm_map_t new_map);
216
217 static boolean_t vm_map_fork_copy(
218 vm_map_t old_map,
219 vm_map_entry_t *old_entry_p,
220 vm_map_t new_map,
221 int vm_map_copyin_flags);
222
223 static kern_return_t vm_map_wire_nested(
224 vm_map_t map,
225 vm_map_offset_t start,
226 vm_map_offset_t end,
227 vm_prot_t caller_prot,
228 vm_tag_t tag,
229 boolean_t user_wire,
230 pmap_t map_pmap,
231 vm_map_offset_t pmap_addr,
232 ppnum_t *physpage_p);
233
234 static kern_return_t vm_map_unwire_nested(
235 vm_map_t map,
236 vm_map_offset_t start,
237 vm_map_offset_t end,
238 boolean_t user_wire,
239 pmap_t map_pmap,
240 vm_map_offset_t pmap_addr);
241
242 static kern_return_t vm_map_overwrite_submap_recurse(
243 vm_map_t dst_map,
244 vm_map_offset_t dst_addr,
245 vm_map_size_t dst_size);
246
247 static kern_return_t vm_map_copy_overwrite_nested(
248 vm_map_t dst_map,
249 vm_map_offset_t dst_addr,
250 vm_map_copy_t copy,
251 boolean_t interruptible,
252 pmap_t pmap,
253 boolean_t discard_on_success);
254
255 static kern_return_t vm_map_remap_extract(
256 vm_map_t map,
257 vm_map_offset_t addr,
258 vm_map_size_t size,
259 boolean_t copy,
260 struct vm_map_header *map_header,
261 vm_prot_t *cur_protection,
262 vm_prot_t *max_protection,
263 vm_inherit_t inheritance,
264 boolean_t pageable,
265 boolean_t same_map,
266 vm_map_kernel_flags_t vmk_flags);
267
268 static kern_return_t vm_map_remap_range_allocate(
269 vm_map_t map,
270 vm_map_address_t *address,
271 vm_map_size_t size,
272 vm_map_offset_t mask,
273 int flags,
274 vm_map_kernel_flags_t vmk_flags,
275 vm_tag_t tag,
276 vm_map_entry_t *map_entry);
277
278 static void vm_map_region_look_for_page(
279 vm_map_t map,
280 vm_map_offset_t va,
281 vm_object_t object,
282 vm_object_offset_t offset,
283 int max_refcnt,
284 int depth,
285 vm_region_extended_info_t extended,
286 mach_msg_type_number_t count);
287
288 static int vm_map_region_count_obj_refs(
289 vm_map_entry_t entry,
290 vm_object_t object);
291
292
293 static kern_return_t vm_map_willneed(
294 vm_map_t map,
295 vm_map_offset_t start,
296 vm_map_offset_t end);
297
298 static kern_return_t vm_map_reuse_pages(
299 vm_map_t map,
300 vm_map_offset_t start,
301 vm_map_offset_t end);
302
303 static kern_return_t vm_map_reusable_pages(
304 vm_map_t map,
305 vm_map_offset_t start,
306 vm_map_offset_t end);
307
308 static kern_return_t vm_map_can_reuse(
309 vm_map_t map,
310 vm_map_offset_t start,
311 vm_map_offset_t end);
312
313 #if MACH_ASSERT
314 static kern_return_t vm_map_pageout(
315 vm_map_t map,
316 vm_map_offset_t start,
317 vm_map_offset_t end);
318 #endif /* MACH_ASSERT */
319
320 pid_t find_largest_process_vm_map_entries(void);
321
322 /*
323 * Macros to copy a vm_map_entry. We must be careful to correctly
324 * manage the wired page count. vm_map_entry_copy() creates a new
325 * map entry to the same memory - the wired count in the new entry
326 * must be set to zero. vm_map_entry_copy_full() creates a new
327 * entry that is identical to the old entry. This preserves the
328 * wire count; it's used for map splitting and zone changing in
329 * vm_map_copyout.
330 */
331
332 #define vm_map_entry_copy(NEW,OLD) \
333 MACRO_BEGIN \
334 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
335 *(NEW) = *(OLD); \
336 (NEW)->is_shared = FALSE; \
337 (NEW)->needs_wakeup = FALSE; \
338 (NEW)->in_transition = FALSE; \
339 (NEW)->wired_count = 0; \
340 (NEW)->user_wired_count = 0; \
341 (NEW)->permanent = FALSE; \
342 (NEW)->used_for_jit = FALSE; \
343 (NEW)->from_reserved_zone = _vmec_reserved; \
344 if ((NEW)->iokit_acct) { \
345 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
346 (NEW)->iokit_acct = FALSE; \
347 (NEW)->use_pmap = TRUE; \
348 } \
349 (NEW)->vme_resilient_codesign = FALSE; \
350 (NEW)->vme_resilient_media = FALSE; \
351 (NEW)->vme_atomic = FALSE; \
352 MACRO_END
353
354 #define vm_map_entry_copy_full(NEW,OLD) \
355 MACRO_BEGIN \
356 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
357 (*(NEW) = *(OLD)); \
358 (NEW)->from_reserved_zone = _vmecf_reserved; \
359 MACRO_END
360
361 /*
362 * Decide if we want to allow processes to execute from their data or stack areas.
363 * override_nx() returns true if we do. Data/stack execution can be enabled independently
364 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
365 * or allow_stack_exec to enable data execution for that type of data area for that particular
366 * ABI (or both by or'ing the flags together). These are initialized in the architecture
367 * specific pmap files since the default behavior varies according to architecture. The
368 * main reason it varies is because of the need to provide binary compatibility with old
369 * applications that were written before these restrictions came into being. In the old
370 * days, an app could execute anything it could read, but this has slowly been tightened
371 * up over time. The default behavior is:
372 *
373 * 32-bit PPC apps may execute from both stack and data areas
374 * 32-bit Intel apps may exeucte from data areas but not stack
375 * 64-bit PPC/Intel apps may not execute from either data or stack
376 *
377 * An application on any architecture may override these defaults by explicitly
378 * adding PROT_EXEC permission to the page in question with the mprotect(2)
379 * system call. This code here just determines what happens when an app tries to
380 * execute from a page that lacks execute permission.
381 *
382 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
383 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
384 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
385 * execution from data areas for a particular binary even if the arch normally permits it. As
386 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
387 * to support some complicated use cases, notably browsers with out-of-process plugins that
388 * are not all NX-safe.
389 */
390
391 extern int allow_data_exec, allow_stack_exec;
392
393 int
394 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
395 {
396 int current_abi;
397
398 if (map->pmap == kernel_pmap) return FALSE;
399
400 /*
401 * Determine if the app is running in 32 or 64 bit mode.
402 */
403
404 if (vm_map_is_64bit(map))
405 current_abi = VM_ABI_64;
406 else
407 current_abi = VM_ABI_32;
408
409 /*
410 * Determine if we should allow the execution based on whether it's a
411 * stack or data area and the current architecture.
412 */
413
414 if (user_tag == VM_MEMORY_STACK)
415 return allow_stack_exec & current_abi;
416
417 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
418 }
419
420
421 /*
422 * Virtual memory maps provide for the mapping, protection,
423 * and sharing of virtual memory objects. In addition,
424 * this module provides for an efficient virtual copy of
425 * memory from one map to another.
426 *
427 * Synchronization is required prior to most operations.
428 *
429 * Maps consist of an ordered doubly-linked list of simple
430 * entries; a single hint is used to speed up lookups.
431 *
432 * Sharing maps have been deleted from this version of Mach.
433 * All shared objects are now mapped directly into the respective
434 * maps. This requires a change in the copy on write strategy;
435 * the asymmetric (delayed) strategy is used for shared temporary
436 * objects instead of the symmetric (shadow) strategy. All maps
437 * are now "top level" maps (either task map, kernel map or submap
438 * of the kernel map).
439 *
440 * Since portions of maps are specified by start/end addreses,
441 * which may not align with existing map entries, all
442 * routines merely "clip" entries to these start/end values.
443 * [That is, an entry is split into two, bordering at a
444 * start or end value.] Note that these clippings may not
445 * always be necessary (as the two resulting entries are then
446 * not changed); however, the clipping is done for convenience.
447 * No attempt is currently made to "glue back together" two
448 * abutting entries.
449 *
450 * The symmetric (shadow) copy strategy implements virtual copy
451 * by copying VM object references from one map to
452 * another, and then marking both regions as copy-on-write.
453 * It is important to note that only one writeable reference
454 * to a VM object region exists in any map when this strategy
455 * is used -- this means that shadow object creation can be
456 * delayed until a write operation occurs. The symmetric (delayed)
457 * strategy allows multiple maps to have writeable references to
458 * the same region of a vm object, and hence cannot delay creating
459 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
460 * Copying of permanent objects is completely different; see
461 * vm_object_copy_strategically() in vm_object.c.
462 */
463
464 static zone_t vm_map_zone; /* zone for vm_map structures */
465 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
466 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
467 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
468 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
469
470
471 /*
472 * Placeholder object for submap operations. This object is dropped
473 * into the range by a call to vm_map_find, and removed when
474 * vm_map_submap creates the submap.
475 */
476
477 vm_object_t vm_submap_object;
478
479 static void *map_data;
480 static vm_size_t map_data_size;
481 static void *kentry_data;
482 static vm_size_t kentry_data_size;
483 static void *map_holes_data;
484 static vm_size_t map_holes_data_size;
485
486 #if CONFIG_EMBEDDED
487 #define NO_COALESCE_LIMIT 0
488 #else
489 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
490 #endif
491
492 /* Skip acquiring locks if we're in the midst of a kernel core dump */
493 unsigned int not_in_kdp = 1;
494
495 unsigned int vm_map_set_cache_attr_count = 0;
496
497 kern_return_t
498 vm_map_set_cache_attr(
499 vm_map_t map,
500 vm_map_offset_t va)
501 {
502 vm_map_entry_t map_entry;
503 vm_object_t object;
504 kern_return_t kr = KERN_SUCCESS;
505
506 vm_map_lock_read(map);
507
508 if (!vm_map_lookup_entry(map, va, &map_entry) ||
509 map_entry->is_sub_map) {
510 /*
511 * that memory is not properly mapped
512 */
513 kr = KERN_INVALID_ARGUMENT;
514 goto done;
515 }
516 object = VME_OBJECT(map_entry);
517
518 if (object == VM_OBJECT_NULL) {
519 /*
520 * there should be a VM object here at this point
521 */
522 kr = KERN_INVALID_ARGUMENT;
523 goto done;
524 }
525 vm_object_lock(object);
526 object->set_cache_attr = TRUE;
527 vm_object_unlock(object);
528
529 vm_map_set_cache_attr_count++;
530 done:
531 vm_map_unlock_read(map);
532
533 return kr;
534 }
535
536
537 #if CONFIG_CODE_DECRYPTION
538 /*
539 * vm_map_apple_protected:
540 * This remaps the requested part of the object with an object backed by
541 * the decrypting pager.
542 * crypt_info contains entry points and session data for the crypt module.
543 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
544 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
545 */
546 kern_return_t
547 vm_map_apple_protected(
548 vm_map_t map,
549 vm_map_offset_t start,
550 vm_map_offset_t end,
551 vm_object_offset_t crypto_backing_offset,
552 struct pager_crypt_info *crypt_info)
553 {
554 boolean_t map_locked;
555 kern_return_t kr;
556 vm_map_entry_t map_entry;
557 struct vm_map_entry tmp_entry;
558 memory_object_t unprotected_mem_obj;
559 vm_object_t protected_object;
560 vm_map_offset_t map_addr;
561 vm_map_offset_t start_aligned, end_aligned;
562 vm_object_offset_t crypto_start, crypto_end;
563 int vm_flags;
564 vm_map_kernel_flags_t vmk_flags;
565
566 vm_flags = 0;
567 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
568
569 map_locked = FALSE;
570 unprotected_mem_obj = MEMORY_OBJECT_NULL;
571
572 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
573 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
574 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
575 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
576
577 #if __arm64__
578 /*
579 * "start" and "end" might be 4K-aligned but not 16K-aligned,
580 * so we might have to loop and establish up to 3 mappings:
581 *
582 * + the first 16K-page, which might overlap with the previous
583 * 4K-aligned mapping,
584 * + the center,
585 * + the last 16K-page, which might overlap with the next
586 * 4K-aligned mapping.
587 * Each of these mapping might be backed by a vnode pager (if
588 * properly page-aligned) or a "fourk_pager", itself backed by a
589 * vnode pager (if 4K-aligned but not page-aligned).
590 */
591 #else /* __arm64__ */
592 assert(start_aligned == start);
593 assert(end_aligned == end);
594 #endif /* __arm64__ */
595
596 map_addr = start_aligned;
597 for (map_addr = start_aligned;
598 map_addr < end;
599 map_addr = tmp_entry.vme_end) {
600 vm_map_lock(map);
601 map_locked = TRUE;
602
603 /* lookup the protected VM object */
604 if (!vm_map_lookup_entry(map,
605 map_addr,
606 &map_entry) ||
607 map_entry->is_sub_map ||
608 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
609 !(map_entry->protection & VM_PROT_EXECUTE)) {
610 /* that memory is not properly mapped */
611 kr = KERN_INVALID_ARGUMENT;
612 goto done;
613 }
614
615 /* get the protected object to be decrypted */
616 protected_object = VME_OBJECT(map_entry);
617 if (protected_object == VM_OBJECT_NULL) {
618 /* there should be a VM object here at this point */
619 kr = KERN_INVALID_ARGUMENT;
620 goto done;
621 }
622 /* ensure protected object stays alive while map is unlocked */
623 vm_object_reference(protected_object);
624
625 /* limit the map entry to the area we want to cover */
626 vm_map_clip_start(map, map_entry, start_aligned);
627 vm_map_clip_end(map, map_entry, end_aligned);
628
629 tmp_entry = *map_entry;
630 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
631 vm_map_unlock(map);
632 map_locked = FALSE;
633
634 /*
635 * This map entry might be only partially encrypted
636 * (if not fully "page-aligned").
637 */
638 crypto_start = 0;
639 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
640 if (tmp_entry.vme_start < start) {
641 if (tmp_entry.vme_start != start_aligned) {
642 kr = KERN_INVALID_ADDRESS;
643 }
644 crypto_start += (start - tmp_entry.vme_start);
645 }
646 if (tmp_entry.vme_end > end) {
647 if (tmp_entry.vme_end != end_aligned) {
648 kr = KERN_INVALID_ADDRESS;
649 }
650 crypto_end -= (tmp_entry.vme_end - end);
651 }
652
653 /*
654 * This "extra backing offset" is needed to get the decryption
655 * routine to use the right key. It adjusts for the possibly
656 * relative offset of an interposed "4K" pager...
657 */
658 if (crypto_backing_offset == (vm_object_offset_t) -1) {
659 crypto_backing_offset = VME_OFFSET(&tmp_entry);
660 }
661
662 /*
663 * Lookup (and create if necessary) the protected memory object
664 * matching that VM object.
665 * If successful, this also grabs a reference on the memory object,
666 * to guarantee that it doesn't go away before we get a chance to map
667 * it.
668 */
669 unprotected_mem_obj = apple_protect_pager_setup(
670 protected_object,
671 VME_OFFSET(&tmp_entry),
672 crypto_backing_offset,
673 crypt_info,
674 crypto_start,
675 crypto_end);
676
677 /* release extra ref on protected object */
678 vm_object_deallocate(protected_object);
679
680 if (unprotected_mem_obj == NULL) {
681 kr = KERN_FAILURE;
682 goto done;
683 }
684
685 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
686 /* can overwrite an immutable mapping */
687 vmk_flags.vmkf_overwrite_immutable = TRUE;
688 #if __arm64__
689 if (tmp_entry.used_for_jit &&
690 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
691 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
692 fourk_binary_compatibility_unsafe &&
693 fourk_binary_compatibility_allow_wx) {
694 printf("** FOURK_COMPAT [%d]: "
695 "allowing write+execute at 0x%llx\n",
696 proc_selfpid(), tmp_entry.vme_start);
697 vmk_flags.vmkf_map_jit = TRUE;
698 }
699 #endif /* __arm64__ */
700
701 /* map this memory object in place of the current one */
702 map_addr = tmp_entry.vme_start;
703 kr = vm_map_enter_mem_object(map,
704 &map_addr,
705 (tmp_entry.vme_end -
706 tmp_entry.vme_start),
707 (mach_vm_offset_t) 0,
708 vm_flags,
709 vmk_flags,
710 VM_KERN_MEMORY_NONE,
711 (ipc_port_t) unprotected_mem_obj,
712 0,
713 TRUE,
714 tmp_entry.protection,
715 tmp_entry.max_protection,
716 tmp_entry.inheritance);
717 assertf(kr == KERN_SUCCESS,
718 "kr = 0x%x\n", kr);
719 assertf(map_addr == tmp_entry.vme_start,
720 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
721 (uint64_t)map_addr,
722 (uint64_t) tmp_entry.vme_start,
723 &tmp_entry);
724
725 #if VM_MAP_DEBUG_APPLE_PROTECT
726 if (vm_map_debug_apple_protect) {
727 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
728 " backing:[object:%p,offset:0x%llx,"
729 "crypto_backing_offset:0x%llx,"
730 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
731 map,
732 (uint64_t) map_addr,
733 (uint64_t) (map_addr + (tmp_entry.vme_end -
734 tmp_entry.vme_start)),
735 unprotected_mem_obj,
736 protected_object,
737 VME_OFFSET(&tmp_entry),
738 crypto_backing_offset,
739 crypto_start,
740 crypto_end);
741 }
742 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
743
744 /*
745 * Release the reference obtained by
746 * apple_protect_pager_setup().
747 * The mapping (if it succeeded) is now holding a reference on
748 * the memory object.
749 */
750 memory_object_deallocate(unprotected_mem_obj);
751 unprotected_mem_obj = MEMORY_OBJECT_NULL;
752
753 /* continue with next map entry */
754 crypto_backing_offset += (tmp_entry.vme_end -
755 tmp_entry.vme_start);
756 crypto_backing_offset -= crypto_start;
757 }
758 kr = KERN_SUCCESS;
759
760 done:
761 if (map_locked) {
762 vm_map_unlock(map);
763 }
764 return kr;
765 }
766 #endif /* CONFIG_CODE_DECRYPTION */
767
768
769 lck_grp_t vm_map_lck_grp;
770 lck_grp_attr_t vm_map_lck_grp_attr;
771 lck_attr_t vm_map_lck_attr;
772 lck_attr_t vm_map_lck_rw_attr;
773
774
775 /*
776 * vm_map_init:
777 *
778 * Initialize the vm_map module. Must be called before
779 * any other vm_map routines.
780 *
781 * Map and entry structures are allocated from zones -- we must
782 * initialize those zones.
783 *
784 * There are three zones of interest:
785 *
786 * vm_map_zone: used to allocate maps.
787 * vm_map_entry_zone: used to allocate map entries.
788 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
789 *
790 * The kernel allocates map entries from a special zone that is initially
791 * "crammed" with memory. It would be difficult (perhaps impossible) for
792 * the kernel to allocate more memory to a entry zone when it became
793 * empty since the very act of allocating memory implies the creation
794 * of a new entry.
795 */
796 void
797 vm_map_init(
798 void)
799 {
800 vm_size_t entry_zone_alloc_size;
801 const char *mez_name = "VM map entries";
802
803 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
804 PAGE_SIZE, "maps");
805 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
806 #if defined(__LP64__)
807 entry_zone_alloc_size = PAGE_SIZE * 5;
808 #else
809 entry_zone_alloc_size = PAGE_SIZE * 6;
810 #endif
811 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
812 1024*1024, entry_zone_alloc_size,
813 mez_name);
814 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
815 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
816 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
817
818 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
819 kentry_data_size * 64, kentry_data_size,
820 "Reserved VM map entries");
821 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
822 /* Don't quarantine because we always need elements available */
823 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
824
825 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
826 16*1024, PAGE_SIZE, "VM map copies");
827 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
828
829 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
830 16*1024, PAGE_SIZE, "VM map holes");
831 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
832
833 /*
834 * Cram the map and kentry zones with initial data.
835 * Set reserved_zone non-collectible to aid zone_gc().
836 */
837 zone_change(vm_map_zone, Z_COLLECT, FALSE);
838 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
839 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
840
841 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
842 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
843 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
844 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
845 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
846 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
847 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
848
849 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
850 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
851 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
852 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
853 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
854 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
855
856 /*
857 * Add the stolen memory to zones, adjust zone size and stolen counts.
858 * zcram only up to the maximum number of pages for each zone chunk.
859 */
860 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
861
862 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
863 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
864 zcram(vm_map_entry_reserved_zone,
865 (vm_offset_t)kentry_data + off,
866 MIN(kentry_data_size - off, stride));
867 }
868 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
869 zcram(vm_map_holes_zone,
870 (vm_offset_t)map_holes_data + off,
871 MIN(map_holes_data_size - off, stride));
872 }
873
874 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
875
876 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
877 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
878 lck_attr_setdefault(&vm_map_lck_attr);
879
880 lck_attr_setdefault(&vm_map_lck_rw_attr);
881 lck_attr_cleardebug(&vm_map_lck_rw_attr);
882
883 #if VM_MAP_DEBUG_APPLE_PROTECT
884 PE_parse_boot_argn("vm_map_debug_apple_protect",
885 &vm_map_debug_apple_protect,
886 sizeof(vm_map_debug_apple_protect));
887 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
888 #if VM_MAP_DEBUG_APPLE_FOURK
889 PE_parse_boot_argn("vm_map_debug_fourk",
890 &vm_map_debug_fourk,
891 sizeof(vm_map_debug_fourk));
892 #endif /* VM_MAP_DEBUG_FOURK */
893 PE_parse_boot_argn("vm_map_executable_immutable",
894 &vm_map_executable_immutable,
895 sizeof(vm_map_executable_immutable));
896 PE_parse_boot_argn("vm_map_executable_immutable_no_log",
897 &vm_map_executable_immutable_no_log,
898 sizeof(vm_map_executable_immutable_no_log));
899 }
900
901 void
902 vm_map_steal_memory(
903 void)
904 {
905 uint32_t kentry_initial_pages;
906
907 map_data_size = round_page(10 * sizeof(struct _vm_map));
908 map_data = pmap_steal_memory(map_data_size);
909
910 /*
911 * kentry_initial_pages corresponds to the number of kernel map entries
912 * required during bootstrap until the asynchronous replenishment
913 * scheme is activated and/or entries are available from the general
914 * map entry pool.
915 */
916 #if defined(__LP64__)
917 kentry_initial_pages = 10;
918 #else
919 kentry_initial_pages = 6;
920 #endif
921
922 #if CONFIG_GZALLOC
923 /* If using the guard allocator, reserve more memory for the kernel
924 * reserved map entry pool.
925 */
926 if (gzalloc_enabled())
927 kentry_initial_pages *= 1024;
928 #endif
929
930 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
931 kentry_data = pmap_steal_memory(kentry_data_size);
932
933 map_holes_data_size = kentry_data_size;
934 map_holes_data = pmap_steal_memory(map_holes_data_size);
935 }
936
937 boolean_t vm_map_supports_hole_optimization = FALSE;
938
939 void
940 vm_kernel_reserved_entry_init(void) {
941 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
942
943 /*
944 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
945 */
946 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
947 vm_map_supports_hole_optimization = TRUE;
948 }
949
950 void
951 vm_map_disable_hole_optimization(vm_map_t map)
952 {
953 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
954
955 if (map->holelistenabled) {
956
957 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
958
959 while (hole_entry != NULL) {
960
961 next_hole_entry = hole_entry->vme_next;
962
963 hole_entry->vme_next = NULL;
964 hole_entry->vme_prev = NULL;
965 zfree(vm_map_holes_zone, hole_entry);
966
967 if (next_hole_entry == head_entry) {
968 hole_entry = NULL;
969 } else {
970 hole_entry = next_hole_entry;
971 }
972 }
973
974 map->holes_list = NULL;
975 map->holelistenabled = FALSE;
976
977 map->first_free = vm_map_first_entry(map);
978 SAVE_HINT_HOLE_WRITE(map, NULL);
979 }
980 }
981
982 boolean_t
983 vm_kernel_map_is_kernel(vm_map_t map) {
984 return (map->pmap == kernel_pmap);
985 }
986
987 /*
988 * vm_map_create:
989 *
990 * Creates and returns a new empty VM map with
991 * the given physical map structure, and having
992 * the given lower and upper address bounds.
993 */
994
995 vm_map_t
996 vm_map_create(
997 pmap_t pmap,
998 vm_map_offset_t min,
999 vm_map_offset_t max,
1000 boolean_t pageable)
1001 {
1002 static int color_seed = 0;
1003 vm_map_t result;
1004 struct vm_map_links *hole_entry = NULL;
1005
1006 result = (vm_map_t) zalloc(vm_map_zone);
1007 if (result == VM_MAP_NULL)
1008 panic("vm_map_create");
1009
1010 vm_map_first_entry(result) = vm_map_to_entry(result);
1011 vm_map_last_entry(result) = vm_map_to_entry(result);
1012 result->hdr.nentries = 0;
1013 result->hdr.entries_pageable = pageable;
1014
1015 vm_map_store_init( &(result->hdr) );
1016
1017 result->hdr.page_shift = PAGE_SHIFT;
1018
1019 result->size = 0;
1020 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1021 result->user_wire_size = 0;
1022 #if __x86_64__
1023 result->vmmap_high_start = 0;
1024 #endif /* __x86_64__ */
1025 result->ref_count = 1;
1026 #if TASK_SWAPPER
1027 result->res_count = 1;
1028 result->sw_state = MAP_SW_IN;
1029 #endif /* TASK_SWAPPER */
1030 result->pmap = pmap;
1031 result->min_offset = min;
1032 result->max_offset = max;
1033 result->wiring_required = FALSE;
1034 result->no_zero_fill = FALSE;
1035 result->mapped_in_other_pmaps = FALSE;
1036 result->wait_for_space = FALSE;
1037 result->switch_protect = FALSE;
1038 result->disable_vmentry_reuse = FALSE;
1039 result->map_disallow_data_exec = FALSE;
1040 result->is_nested_map = FALSE;
1041 result->map_disallow_new_exec = FALSE;
1042 result->highest_entry_end = 0;
1043 result->first_free = vm_map_to_entry(result);
1044 result->hint = vm_map_to_entry(result);
1045 result->color_rr = (color_seed++) & vm_color_mask;
1046 result->jit_entry_exists = FALSE;
1047
1048 if (vm_map_supports_hole_optimization) {
1049 hole_entry = zalloc(vm_map_holes_zone);
1050
1051 hole_entry->start = min;
1052 #if defined(__arm__) || defined(__arm64__)
1053 hole_entry->end = result->max_offset;
1054 #else
1055 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1056 #endif
1057 result->holes_list = result->hole_hint = hole_entry;
1058 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
1059 result->holelistenabled = TRUE;
1060
1061 } else {
1062
1063 result->holelistenabled = FALSE;
1064 }
1065
1066 vm_map_lock_init(result);
1067 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1068
1069 return(result);
1070 }
1071
1072 /*
1073 * vm_map_entry_create: [ internal use only ]
1074 *
1075 * Allocates a VM map entry for insertion in the
1076 * given map (or map copy). No fields are filled.
1077 */
1078 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1079
1080 #define vm_map_copy_entry_create(copy, map_locked) \
1081 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1082 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1083
1084 static vm_map_entry_t
1085 _vm_map_entry_create(
1086 struct vm_map_header *map_header, boolean_t __unused map_locked)
1087 {
1088 zone_t zone;
1089 vm_map_entry_t entry;
1090
1091 zone = vm_map_entry_zone;
1092
1093 assert(map_header->entries_pageable ? !map_locked : TRUE);
1094
1095 if (map_header->entries_pageable) {
1096 entry = (vm_map_entry_t) zalloc(zone);
1097 }
1098 else {
1099 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1100
1101 if (entry == VM_MAP_ENTRY_NULL) {
1102 zone = vm_map_entry_reserved_zone;
1103 entry = (vm_map_entry_t) zalloc(zone);
1104 OSAddAtomic(1, &reserved_zalloc_count);
1105 } else
1106 OSAddAtomic(1, &nonreserved_zalloc_count);
1107 }
1108
1109 if (entry == VM_MAP_ENTRY_NULL)
1110 panic("vm_map_entry_create");
1111 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1112
1113 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1114 #if MAP_ENTRY_CREATION_DEBUG
1115 entry->vme_creation_maphdr = map_header;
1116 backtrace(&entry->vme_creation_bt[0],
1117 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1118 #endif
1119 return(entry);
1120 }
1121
1122 /*
1123 * vm_map_entry_dispose: [ internal use only ]
1124 *
1125 * Inverse of vm_map_entry_create.
1126 *
1127 * write map lock held so no need to
1128 * do anything special to insure correctness
1129 * of the stores
1130 */
1131 #define vm_map_entry_dispose(map, entry) \
1132 _vm_map_entry_dispose(&(map)->hdr, (entry))
1133
1134 #define vm_map_copy_entry_dispose(map, entry) \
1135 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1136
1137 static void
1138 _vm_map_entry_dispose(
1139 struct vm_map_header *map_header,
1140 vm_map_entry_t entry)
1141 {
1142 zone_t zone;
1143
1144 if (map_header->entries_pageable || !(entry->from_reserved_zone))
1145 zone = vm_map_entry_zone;
1146 else
1147 zone = vm_map_entry_reserved_zone;
1148
1149 if (!map_header->entries_pageable) {
1150 if (zone == vm_map_entry_zone)
1151 OSAddAtomic(-1, &nonreserved_zalloc_count);
1152 else
1153 OSAddAtomic(-1, &reserved_zalloc_count);
1154 }
1155
1156 zfree(zone, entry);
1157 }
1158
1159 #if MACH_ASSERT
1160 static boolean_t first_free_check = FALSE;
1161 boolean_t
1162 first_free_is_valid(
1163 vm_map_t map)
1164 {
1165 if (!first_free_check)
1166 return TRUE;
1167
1168 return( first_free_is_valid_store( map ));
1169 }
1170 #endif /* MACH_ASSERT */
1171
1172
1173 #define vm_map_copy_entry_link(copy, after_where, entry) \
1174 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1175
1176 #define vm_map_copy_entry_unlink(copy, entry) \
1177 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1178
1179 #if MACH_ASSERT && TASK_SWAPPER
1180 /*
1181 * vm_map_res_reference:
1182 *
1183 * Adds another valid residence count to the given map.
1184 *
1185 * Map is locked so this function can be called from
1186 * vm_map_swapin.
1187 *
1188 */
1189 void vm_map_res_reference(vm_map_t map)
1190 {
1191 /* assert map is locked */
1192 assert(map->res_count >= 0);
1193 assert(map->ref_count >= map->res_count);
1194 if (map->res_count == 0) {
1195 lck_mtx_unlock(&map->s_lock);
1196 vm_map_lock(map);
1197 vm_map_swapin(map);
1198 lck_mtx_lock(&map->s_lock);
1199 ++map->res_count;
1200 vm_map_unlock(map);
1201 } else
1202 ++map->res_count;
1203 }
1204
1205 /*
1206 * vm_map_reference_swap:
1207 *
1208 * Adds valid reference and residence counts to the given map.
1209 *
1210 * The map may not be in memory (i.e. zero residence count).
1211 *
1212 */
1213 void vm_map_reference_swap(vm_map_t map)
1214 {
1215 assert(map != VM_MAP_NULL);
1216 lck_mtx_lock(&map->s_lock);
1217 assert(map->res_count >= 0);
1218 assert(map->ref_count >= map->res_count);
1219 map->ref_count++;
1220 vm_map_res_reference(map);
1221 lck_mtx_unlock(&map->s_lock);
1222 }
1223
1224 /*
1225 * vm_map_res_deallocate:
1226 *
1227 * Decrement residence count on a map; possibly causing swapout.
1228 *
1229 * The map must be in memory (i.e. non-zero residence count).
1230 *
1231 * The map is locked, so this function is callable from vm_map_deallocate.
1232 *
1233 */
1234 void vm_map_res_deallocate(vm_map_t map)
1235 {
1236 assert(map->res_count > 0);
1237 if (--map->res_count == 0) {
1238 lck_mtx_unlock(&map->s_lock);
1239 vm_map_lock(map);
1240 vm_map_swapout(map);
1241 vm_map_unlock(map);
1242 lck_mtx_lock(&map->s_lock);
1243 }
1244 assert(map->ref_count >= map->res_count);
1245 }
1246 #endif /* MACH_ASSERT && TASK_SWAPPER */
1247
1248 /*
1249 * vm_map_destroy:
1250 *
1251 * Actually destroy a map.
1252 */
1253 void
1254 vm_map_destroy(
1255 vm_map_t map,
1256 int flags)
1257 {
1258 vm_map_lock(map);
1259
1260 /* final cleanup: no need to unnest shared region */
1261 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1262 /* final cleanup: ok to remove immutable mappings */
1263 flags |= VM_MAP_REMOVE_IMMUTABLE;
1264
1265 /* clean up regular map entries */
1266 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1267 flags, VM_MAP_NULL);
1268 /* clean up leftover special mappings (commpage, etc...) */
1269 #if !defined(__arm__) && !defined(__arm64__)
1270 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1271 flags, VM_MAP_NULL);
1272 #endif /* !__arm__ && !__arm64__ */
1273
1274 vm_map_disable_hole_optimization(map);
1275 vm_map_unlock(map);
1276
1277 assert(map->hdr.nentries == 0);
1278
1279 if(map->pmap)
1280 pmap_destroy(map->pmap);
1281
1282 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1283 /*
1284 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1285 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1286 * structure or kalloc'ed via lck_mtx_init.
1287 * An example is s_lock_ext within struct _vm_map.
1288 *
1289 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1290 * can add another tag to detect embedded vs alloc'ed indirect external
1291 * mutexes but that'll be additional checks in the lock path and require
1292 * updating dependencies for the old vs new tag.
1293 *
1294 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1295 * just when lock debugging is ON, we choose to forego explicitly destroying
1296 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1297 * count on vm_map_lck_grp, which has no serious side-effect.
1298 */
1299 } else {
1300 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1301 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1302 }
1303
1304 zfree(vm_map_zone, map);
1305 }
1306
1307 /*
1308 * Returns pid of the task with the largest number of VM map entries.
1309 * Used in the zone-map-exhaustion jetsam path.
1310 */
1311 pid_t
1312 find_largest_process_vm_map_entries(void)
1313 {
1314 pid_t victim_pid = -1;
1315 int max_vm_map_entries = 0;
1316 task_t task = TASK_NULL;
1317 queue_head_t *task_list = &tasks;
1318
1319 lck_mtx_lock(&tasks_threads_lock);
1320 queue_iterate(task_list, task, task_t, tasks) {
1321 if (task == kernel_task || !task->active)
1322 continue;
1323
1324 vm_map_t task_map = task->map;
1325 if (task_map != VM_MAP_NULL) {
1326 int task_vm_map_entries = task_map->hdr.nentries;
1327 if (task_vm_map_entries > max_vm_map_entries) {
1328 max_vm_map_entries = task_vm_map_entries;
1329 victim_pid = pid_from_task(task);
1330 }
1331 }
1332 }
1333 lck_mtx_unlock(&tasks_threads_lock);
1334
1335 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1336 return victim_pid;
1337 }
1338
1339 #if TASK_SWAPPER
1340 /*
1341 * vm_map_swapin/vm_map_swapout
1342 *
1343 * Swap a map in and out, either referencing or releasing its resources.
1344 * These functions are internal use only; however, they must be exported
1345 * because they may be called from macros, which are exported.
1346 *
1347 * In the case of swapout, there could be races on the residence count,
1348 * so if the residence count is up, we return, assuming that a
1349 * vm_map_deallocate() call in the near future will bring us back.
1350 *
1351 * Locking:
1352 * -- We use the map write lock for synchronization among races.
1353 * -- The map write lock, and not the simple s_lock, protects the
1354 * swap state of the map.
1355 * -- If a map entry is a share map, then we hold both locks, in
1356 * hierarchical order.
1357 *
1358 * Synchronization Notes:
1359 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1360 * will block on the map lock and proceed when swapout is through.
1361 * 2) A vm_map_reference() call at this time is illegal, and will
1362 * cause a panic. vm_map_reference() is only allowed on resident
1363 * maps, since it refuses to block.
1364 * 3) A vm_map_swapin() call during a swapin will block, and
1365 * proceeed when the first swapin is done, turning into a nop.
1366 * This is the reason the res_count is not incremented until
1367 * after the swapin is complete.
1368 * 4) There is a timing hole after the checks of the res_count, before
1369 * the map lock is taken, during which a swapin may get the lock
1370 * before a swapout about to happen. If this happens, the swapin
1371 * will detect the state and increment the reference count, causing
1372 * the swapout to be a nop, thereby delaying it until a later
1373 * vm_map_deallocate. If the swapout gets the lock first, then
1374 * the swapin will simply block until the swapout is done, and
1375 * then proceed.
1376 *
1377 * Because vm_map_swapin() is potentially an expensive operation, it
1378 * should be used with caution.
1379 *
1380 * Invariants:
1381 * 1) A map with a residence count of zero is either swapped, or
1382 * being swapped.
1383 * 2) A map with a non-zero residence count is either resident,
1384 * or being swapped in.
1385 */
1386
1387 int vm_map_swap_enable = 1;
1388
1389 void vm_map_swapin (vm_map_t map)
1390 {
1391 vm_map_entry_t entry;
1392
1393 if (!vm_map_swap_enable) /* debug */
1394 return;
1395
1396 /*
1397 * Map is locked
1398 * First deal with various races.
1399 */
1400 if (map->sw_state == MAP_SW_IN)
1401 /*
1402 * we raced with swapout and won. Returning will incr.
1403 * the res_count, turning the swapout into a nop.
1404 */
1405 return;
1406
1407 /*
1408 * The residence count must be zero. If we raced with another
1409 * swapin, the state would have been IN; if we raced with a
1410 * swapout (after another competing swapin), we must have lost
1411 * the race to get here (see above comment), in which case
1412 * res_count is still 0.
1413 */
1414 assert(map->res_count == 0);
1415
1416 /*
1417 * There are no intermediate states of a map going out or
1418 * coming in, since the map is locked during the transition.
1419 */
1420 assert(map->sw_state == MAP_SW_OUT);
1421
1422 /*
1423 * We now operate upon each map entry. If the entry is a sub-
1424 * or share-map, we call vm_map_res_reference upon it.
1425 * If the entry is an object, we call vm_object_res_reference
1426 * (this may iterate through the shadow chain).
1427 * Note that we hold the map locked the entire time,
1428 * even if we get back here via a recursive call in
1429 * vm_map_res_reference.
1430 */
1431 entry = vm_map_first_entry(map);
1432
1433 while (entry != vm_map_to_entry(map)) {
1434 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1435 if (entry->is_sub_map) {
1436 vm_map_t lmap = VME_SUBMAP(entry);
1437 lck_mtx_lock(&lmap->s_lock);
1438 vm_map_res_reference(lmap);
1439 lck_mtx_unlock(&lmap->s_lock);
1440 } else {
1441 vm_object_t object = VME_OBEJCT(entry);
1442 vm_object_lock(object);
1443 /*
1444 * This call may iterate through the
1445 * shadow chain.
1446 */
1447 vm_object_res_reference(object);
1448 vm_object_unlock(object);
1449 }
1450 }
1451 entry = entry->vme_next;
1452 }
1453 assert(map->sw_state == MAP_SW_OUT);
1454 map->sw_state = MAP_SW_IN;
1455 }
1456
1457 void vm_map_swapout(vm_map_t map)
1458 {
1459 vm_map_entry_t entry;
1460
1461 /*
1462 * Map is locked
1463 * First deal with various races.
1464 * If we raced with a swapin and lost, the residence count
1465 * will have been incremented to 1, and we simply return.
1466 */
1467 lck_mtx_lock(&map->s_lock);
1468 if (map->res_count != 0) {
1469 lck_mtx_unlock(&map->s_lock);
1470 return;
1471 }
1472 lck_mtx_unlock(&map->s_lock);
1473
1474 /*
1475 * There are no intermediate states of a map going out or
1476 * coming in, since the map is locked during the transition.
1477 */
1478 assert(map->sw_state == MAP_SW_IN);
1479
1480 if (!vm_map_swap_enable)
1481 return;
1482
1483 /*
1484 * We now operate upon each map entry. If the entry is a sub-
1485 * or share-map, we call vm_map_res_deallocate upon it.
1486 * If the entry is an object, we call vm_object_res_deallocate
1487 * (this may iterate through the shadow chain).
1488 * Note that we hold the map locked the entire time,
1489 * even if we get back here via a recursive call in
1490 * vm_map_res_deallocate.
1491 */
1492 entry = vm_map_first_entry(map);
1493
1494 while (entry != vm_map_to_entry(map)) {
1495 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1496 if (entry->is_sub_map) {
1497 vm_map_t lmap = VME_SUBMAP(entry);
1498 lck_mtx_lock(&lmap->s_lock);
1499 vm_map_res_deallocate(lmap);
1500 lck_mtx_unlock(&lmap->s_lock);
1501 } else {
1502 vm_object_t object = VME_OBJECT(entry);
1503 vm_object_lock(object);
1504 /*
1505 * This call may take a long time,
1506 * since it could actively push
1507 * out pages (if we implement it
1508 * that way).
1509 */
1510 vm_object_res_deallocate(object);
1511 vm_object_unlock(object);
1512 }
1513 }
1514 entry = entry->vme_next;
1515 }
1516 assert(map->sw_state == MAP_SW_IN);
1517 map->sw_state = MAP_SW_OUT;
1518 }
1519
1520 #endif /* TASK_SWAPPER */
1521
1522 /*
1523 * vm_map_lookup_entry: [ internal use only ]
1524 *
1525 * Calls into the vm map store layer to find the map
1526 * entry containing (or immediately preceding) the
1527 * specified address in the given map; the entry is returned
1528 * in the "entry" parameter. The boolean
1529 * result indicates whether the address is
1530 * actually contained in the map.
1531 */
1532 boolean_t
1533 vm_map_lookup_entry(
1534 vm_map_t map,
1535 vm_map_offset_t address,
1536 vm_map_entry_t *entry) /* OUT */
1537 {
1538 return ( vm_map_store_lookup_entry( map, address, entry ));
1539 }
1540
1541 /*
1542 * Routine: vm_map_find_space
1543 * Purpose:
1544 * Allocate a range in the specified virtual address map,
1545 * returning the entry allocated for that range.
1546 * Used by kmem_alloc, etc.
1547 *
1548 * The map must be NOT be locked. It will be returned locked
1549 * on KERN_SUCCESS, unlocked on failure.
1550 *
1551 * If an entry is allocated, the object/offset fields
1552 * are initialized to zero.
1553 */
1554 kern_return_t
1555 vm_map_find_space(
1556 vm_map_t map,
1557 vm_map_offset_t *address, /* OUT */
1558 vm_map_size_t size,
1559 vm_map_offset_t mask,
1560 int flags __unused,
1561 vm_map_kernel_flags_t vmk_flags,
1562 vm_tag_t tag,
1563 vm_map_entry_t *o_entry) /* OUT */
1564 {
1565 vm_map_entry_t entry, new_entry;
1566 vm_map_offset_t start;
1567 vm_map_offset_t end;
1568 vm_map_entry_t hole_entry;
1569
1570 if (size == 0) {
1571 *address = 0;
1572 return KERN_INVALID_ARGUMENT;
1573 }
1574
1575 if (vmk_flags.vmkf_guard_after) {
1576 /* account for the back guard page in the size */
1577 size += VM_MAP_PAGE_SIZE(map);
1578 }
1579
1580 new_entry = vm_map_entry_create(map, FALSE);
1581
1582 /*
1583 * Look for the first possible address; if there's already
1584 * something at this address, we have to start after it.
1585 */
1586
1587 vm_map_lock(map);
1588
1589 if( map->disable_vmentry_reuse == TRUE) {
1590 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1591 } else {
1592 if (map->holelistenabled) {
1593 hole_entry = (vm_map_entry_t)map->holes_list;
1594
1595 if (hole_entry == NULL) {
1596 /*
1597 * No more space in the map?
1598 */
1599 vm_map_entry_dispose(map, new_entry);
1600 vm_map_unlock(map);
1601 return(KERN_NO_SPACE);
1602 }
1603
1604 entry = hole_entry;
1605 start = entry->vme_start;
1606 } else {
1607 assert(first_free_is_valid(map));
1608 if ((entry = map->first_free) == vm_map_to_entry(map))
1609 start = map->min_offset;
1610 else
1611 start = entry->vme_end;
1612 }
1613 }
1614
1615 /*
1616 * In any case, the "entry" always precedes
1617 * the proposed new region throughout the loop:
1618 */
1619
1620 while (TRUE) {
1621 vm_map_entry_t next;
1622
1623 /*
1624 * Find the end of the proposed new region.
1625 * Be sure we didn't go beyond the end, or
1626 * wrap around the address.
1627 */
1628
1629 if (vmk_flags.vmkf_guard_before) {
1630 /* reserve space for the front guard page */
1631 start += VM_MAP_PAGE_SIZE(map);
1632 }
1633 end = ((start + mask) & ~mask);
1634
1635 if (end < start) {
1636 vm_map_entry_dispose(map, new_entry);
1637 vm_map_unlock(map);
1638 return(KERN_NO_SPACE);
1639 }
1640 start = end;
1641 end += size;
1642
1643 if ((end > map->max_offset) || (end < start)) {
1644 vm_map_entry_dispose(map, new_entry);
1645 vm_map_unlock(map);
1646 return(KERN_NO_SPACE);
1647 }
1648
1649 next = entry->vme_next;
1650
1651 if (map->holelistenabled) {
1652 if (entry->vme_end >= end)
1653 break;
1654 } else {
1655 /*
1656 * If there are no more entries, we must win.
1657 *
1658 * OR
1659 *
1660 * If there is another entry, it must be
1661 * after the end of the potential new region.
1662 */
1663
1664 if (next == vm_map_to_entry(map))
1665 break;
1666
1667 if (next->vme_start >= end)
1668 break;
1669 }
1670
1671 /*
1672 * Didn't fit -- move to the next entry.
1673 */
1674
1675 entry = next;
1676
1677 if (map->holelistenabled) {
1678 if (entry == (vm_map_entry_t) map->holes_list) {
1679 /*
1680 * Wrapped around
1681 */
1682 vm_map_entry_dispose(map, new_entry);
1683 vm_map_unlock(map);
1684 return(KERN_NO_SPACE);
1685 }
1686 start = entry->vme_start;
1687 } else {
1688 start = entry->vme_end;
1689 }
1690 }
1691
1692 if (map->holelistenabled) {
1693 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1694 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1695 }
1696 }
1697
1698 /*
1699 * At this point,
1700 * "start" and "end" should define the endpoints of the
1701 * available new range, and
1702 * "entry" should refer to the region before the new
1703 * range, and
1704 *
1705 * the map should be locked.
1706 */
1707
1708 if (vmk_flags.vmkf_guard_before) {
1709 /* go back for the front guard page */
1710 start -= VM_MAP_PAGE_SIZE(map);
1711 }
1712 *address = start;
1713
1714 assert(start < end);
1715 new_entry->vme_start = start;
1716 new_entry->vme_end = end;
1717 assert(page_aligned(new_entry->vme_start));
1718 assert(page_aligned(new_entry->vme_end));
1719 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1720 VM_MAP_PAGE_MASK(map)));
1721 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1722 VM_MAP_PAGE_MASK(map)));
1723
1724 new_entry->is_shared = FALSE;
1725 new_entry->is_sub_map = FALSE;
1726 new_entry->use_pmap = TRUE;
1727 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1728 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1729
1730 new_entry->needs_copy = FALSE;
1731
1732 new_entry->inheritance = VM_INHERIT_DEFAULT;
1733 new_entry->protection = VM_PROT_DEFAULT;
1734 new_entry->max_protection = VM_PROT_ALL;
1735 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1736 new_entry->wired_count = 0;
1737 new_entry->user_wired_count = 0;
1738
1739 new_entry->in_transition = FALSE;
1740 new_entry->needs_wakeup = FALSE;
1741 new_entry->no_cache = FALSE;
1742 new_entry->permanent = FALSE;
1743 new_entry->superpage_size = FALSE;
1744 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1745 new_entry->map_aligned = TRUE;
1746 } else {
1747 new_entry->map_aligned = FALSE;
1748 }
1749
1750 new_entry->used_for_jit = FALSE;
1751 new_entry->zero_wired_pages = FALSE;
1752 new_entry->iokit_acct = FALSE;
1753 new_entry->vme_resilient_codesign = FALSE;
1754 new_entry->vme_resilient_media = FALSE;
1755 if (vmk_flags.vmkf_atomic_entry)
1756 new_entry->vme_atomic = TRUE;
1757 else
1758 new_entry->vme_atomic = FALSE;
1759
1760 VME_ALIAS_SET(new_entry, tag);
1761
1762 /*
1763 * Insert the new entry into the list
1764 */
1765
1766 vm_map_store_entry_link(map, entry, new_entry);
1767
1768 map->size += size;
1769
1770 /*
1771 * Update the lookup hint
1772 */
1773 SAVE_HINT_MAP_WRITE(map, new_entry);
1774
1775 *o_entry = new_entry;
1776 return(KERN_SUCCESS);
1777 }
1778
1779 int vm_map_pmap_enter_print = FALSE;
1780 int vm_map_pmap_enter_enable = FALSE;
1781
1782 /*
1783 * Routine: vm_map_pmap_enter [internal only]
1784 *
1785 * Description:
1786 * Force pages from the specified object to be entered into
1787 * the pmap at the specified address if they are present.
1788 * As soon as a page not found in the object the scan ends.
1789 *
1790 * Returns:
1791 * Nothing.
1792 *
1793 * In/out conditions:
1794 * The source map should not be locked on entry.
1795 */
1796 __unused static void
1797 vm_map_pmap_enter(
1798 vm_map_t map,
1799 vm_map_offset_t addr,
1800 vm_map_offset_t end_addr,
1801 vm_object_t object,
1802 vm_object_offset_t offset,
1803 vm_prot_t protection)
1804 {
1805 int type_of_fault;
1806 kern_return_t kr;
1807
1808 if(map->pmap == 0)
1809 return;
1810
1811 while (addr < end_addr) {
1812 vm_page_t m;
1813
1814
1815 /*
1816 * TODO:
1817 * From vm_map_enter(), we come into this function without the map
1818 * lock held or the object lock held.
1819 * We haven't taken a reference on the object either.
1820 * We should do a proper lookup on the map to make sure
1821 * that things are sane before we go locking objects that
1822 * could have been deallocated from under us.
1823 */
1824
1825 vm_object_lock(object);
1826
1827 m = vm_page_lookup(object, offset);
1828
1829 if (m == VM_PAGE_NULL || m->busy || m->fictitious ||
1830 (m->unusual && ( m->error || m->restart || m->absent))) {
1831 vm_object_unlock(object);
1832 return;
1833 }
1834
1835 if (vm_map_pmap_enter_print) {
1836 printf("vm_map_pmap_enter:");
1837 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1838 map, (unsigned long long)addr, object, (unsigned long long)offset);
1839 }
1840 type_of_fault = DBG_CACHE_HIT_FAULT;
1841 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1842 VM_PAGE_WIRED(m),
1843 FALSE, /* change_wiring */
1844 VM_KERN_MEMORY_NONE, /* tag - not wiring */
1845 FALSE, /* no_cache */
1846 FALSE, /* cs_bypass */
1847 0, /* XXX need user tag / alias? */
1848 0, /* pmap_options */
1849 NULL, /* need_retry */
1850 &type_of_fault);
1851
1852 vm_object_unlock(object);
1853
1854 offset += PAGE_SIZE_64;
1855 addr += PAGE_SIZE;
1856 }
1857 }
1858
1859 boolean_t vm_map_pmap_is_empty(
1860 vm_map_t map,
1861 vm_map_offset_t start,
1862 vm_map_offset_t end);
1863 boolean_t vm_map_pmap_is_empty(
1864 vm_map_t map,
1865 vm_map_offset_t start,
1866 vm_map_offset_t end)
1867 {
1868 #ifdef MACHINE_PMAP_IS_EMPTY
1869 return pmap_is_empty(map->pmap, start, end);
1870 #else /* MACHINE_PMAP_IS_EMPTY */
1871 vm_map_offset_t offset;
1872 ppnum_t phys_page;
1873
1874 if (map->pmap == NULL) {
1875 return TRUE;
1876 }
1877
1878 for (offset = start;
1879 offset < end;
1880 offset += PAGE_SIZE) {
1881 phys_page = pmap_find_phys(map->pmap, offset);
1882 if (phys_page) {
1883 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1884 "page %d at 0x%llx\n",
1885 map, (long long)start, (long long)end,
1886 phys_page, (long long)offset);
1887 return FALSE;
1888 }
1889 }
1890 return TRUE;
1891 #endif /* MACHINE_PMAP_IS_EMPTY */
1892 }
1893
1894 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1895 kern_return_t
1896 vm_map_random_address_for_size(
1897 vm_map_t map,
1898 vm_map_offset_t *address,
1899 vm_map_size_t size)
1900 {
1901 kern_return_t kr = KERN_SUCCESS;
1902 int tries = 0;
1903 vm_map_offset_t random_addr = 0;
1904 vm_map_offset_t hole_end;
1905
1906 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1907 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1908 vm_map_size_t vm_hole_size = 0;
1909 vm_map_size_t addr_space_size;
1910
1911 addr_space_size = vm_map_max(map) - vm_map_min(map);
1912
1913 assert(page_aligned(size));
1914
1915 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1916 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1917 random_addr = vm_map_trunc_page(
1918 vm_map_min(map) +(random_addr % addr_space_size),
1919 VM_MAP_PAGE_MASK(map));
1920
1921 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1922 if (prev_entry == vm_map_to_entry(map)) {
1923 next_entry = vm_map_first_entry(map);
1924 } else {
1925 next_entry = prev_entry->vme_next;
1926 }
1927 if (next_entry == vm_map_to_entry(map)) {
1928 hole_end = vm_map_max(map);
1929 } else {
1930 hole_end = next_entry->vme_start;
1931 }
1932 vm_hole_size = hole_end - random_addr;
1933 if (vm_hole_size >= size) {
1934 *address = random_addr;
1935 break;
1936 }
1937 }
1938 tries++;
1939 }
1940
1941 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1942 kr = KERN_NO_SPACE;
1943 }
1944 return kr;
1945 }
1946
1947 /*
1948 * Routine: vm_map_enter
1949 *
1950 * Description:
1951 * Allocate a range in the specified virtual address map.
1952 * The resulting range will refer to memory defined by
1953 * the given memory object and offset into that object.
1954 *
1955 * Arguments are as defined in the vm_map call.
1956 */
1957 int _map_enter_debug = 0;
1958 static unsigned int vm_map_enter_restore_successes = 0;
1959 static unsigned int vm_map_enter_restore_failures = 0;
1960 kern_return_t
1961 vm_map_enter(
1962 vm_map_t map,
1963 vm_map_offset_t *address, /* IN/OUT */
1964 vm_map_size_t size,
1965 vm_map_offset_t mask,
1966 int flags,
1967 vm_map_kernel_flags_t vmk_flags,
1968 vm_tag_t alias,
1969 vm_object_t object,
1970 vm_object_offset_t offset,
1971 boolean_t needs_copy,
1972 vm_prot_t cur_protection,
1973 vm_prot_t max_protection,
1974 vm_inherit_t inheritance)
1975 {
1976 vm_map_entry_t entry, new_entry;
1977 vm_map_offset_t start, tmp_start, tmp_offset;
1978 vm_map_offset_t end, tmp_end;
1979 vm_map_offset_t tmp2_start, tmp2_end;
1980 vm_map_offset_t step;
1981 kern_return_t result = KERN_SUCCESS;
1982 vm_map_t zap_old_map = VM_MAP_NULL;
1983 vm_map_t zap_new_map = VM_MAP_NULL;
1984 boolean_t map_locked = FALSE;
1985 boolean_t pmap_empty = TRUE;
1986 boolean_t new_mapping_established = FALSE;
1987 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
1988 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1989 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1990 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1991 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1992 boolean_t is_submap = vmk_flags.vmkf_submap;
1993 boolean_t permanent = vmk_flags.vmkf_permanent;
1994 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
1995 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
1996 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1997 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1998 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
1999 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2000 vm_tag_t user_alias;
2001 vm_map_offset_t effective_min_offset, effective_max_offset;
2002 kern_return_t kr;
2003 boolean_t clear_map_aligned = FALSE;
2004 vm_map_entry_t hole_entry;
2005 vm_map_size_t chunk_size = 0;
2006
2007 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2008
2009 if (flags & VM_FLAGS_4GB_CHUNK) {
2010 #if defined(__LP64__)
2011 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2012 #else /* __LP64__ */
2013 chunk_size = ANON_CHUNK_SIZE;
2014 #endif /* __LP64__ */
2015 } else {
2016 chunk_size = ANON_CHUNK_SIZE;
2017 }
2018
2019 if (superpage_size) {
2020 switch (superpage_size) {
2021 /*
2022 * Note that the current implementation only supports
2023 * a single size for superpages, SUPERPAGE_SIZE, per
2024 * architecture. As soon as more sizes are supposed
2025 * to be supported, SUPERPAGE_SIZE has to be replaced
2026 * with a lookup of the size depending on superpage_size.
2027 */
2028 #ifdef __x86_64__
2029 case SUPERPAGE_SIZE_ANY:
2030 /* handle it like 2 MB and round up to page size */
2031 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2032 case SUPERPAGE_SIZE_2MB:
2033 break;
2034 #endif
2035 default:
2036 return KERN_INVALID_ARGUMENT;
2037 }
2038 mask = SUPERPAGE_SIZE-1;
2039 if (size & (SUPERPAGE_SIZE-1))
2040 return KERN_INVALID_ARGUMENT;
2041 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2042 }
2043
2044
2045 #if CONFIG_EMBEDDED
2046 if (cur_protection & VM_PROT_WRITE){
2047 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
2048 printf("EMBEDDED: %s: curprot cannot be write+execute. "
2049 "turning off execute\n",
2050 __FUNCTION__);
2051 cur_protection &= ~VM_PROT_EXECUTE;
2052 }
2053 }
2054 #endif /* CONFIG_EMBEDDED */
2055
2056 /*
2057 * If the task has requested executable lockdown,
2058 * deny any new executable mapping.
2059 */
2060 if (map->map_disallow_new_exec == TRUE) {
2061 if (cur_protection & VM_PROT_EXECUTE) {
2062 return KERN_PROTECTION_FAILURE;
2063 }
2064 }
2065
2066 if (resilient_codesign || resilient_media) {
2067 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2068 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2069 return KERN_PROTECTION_FAILURE;
2070 }
2071 }
2072
2073 if (is_submap) {
2074 if (purgable) {
2075 /* submaps can not be purgeable */
2076 return KERN_INVALID_ARGUMENT;
2077 }
2078 if (object == VM_OBJECT_NULL) {
2079 /* submaps can not be created lazily */
2080 return KERN_INVALID_ARGUMENT;
2081 }
2082 }
2083 if (vmk_flags.vmkf_already) {
2084 /*
2085 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2086 * is already present. For it to be meaningul, the requested
2087 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2088 * we shouldn't try and remove what was mapped there first
2089 * (!VM_FLAGS_OVERWRITE).
2090 */
2091 if ((flags & VM_FLAGS_ANYWHERE) ||
2092 (flags & VM_FLAGS_OVERWRITE)) {
2093 return KERN_INVALID_ARGUMENT;
2094 }
2095 }
2096
2097 effective_min_offset = map->min_offset;
2098
2099 if (vmk_flags.vmkf_beyond_max) {
2100 /*
2101 * Allow an insertion beyond the map's max offset.
2102 */
2103 #if !defined(__arm__) && !defined(__arm64__)
2104 if (vm_map_is_64bit(map))
2105 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2106 else
2107 #endif /* __arm__ */
2108 effective_max_offset = 0x00000000FFFFF000ULL;
2109 } else {
2110 effective_max_offset = map->max_offset;
2111 }
2112
2113 if (size == 0 ||
2114 (offset & PAGE_MASK_64) != 0) {
2115 *address = 0;
2116 return KERN_INVALID_ARGUMENT;
2117 }
2118
2119 if (map->pmap == kernel_pmap) {
2120 user_alias = VM_KERN_MEMORY_NONE;
2121 } else {
2122 user_alias = alias;
2123 }
2124
2125 #define RETURN(value) { result = value; goto BailOut; }
2126
2127 assert(page_aligned(*address));
2128 assert(page_aligned(size));
2129
2130 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2131 /*
2132 * In most cases, the caller rounds the size up to the
2133 * map's page size.
2134 * If we get a size that is explicitly not map-aligned here,
2135 * we'll have to respect the caller's wish and mark the
2136 * mapping as "not map-aligned" to avoid tripping the
2137 * map alignment checks later.
2138 */
2139 clear_map_aligned = TRUE;
2140 }
2141 if (!anywhere &&
2142 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2143 /*
2144 * We've been asked to map at a fixed address and that
2145 * address is not aligned to the map's specific alignment.
2146 * The caller should know what it's doing (i.e. most likely
2147 * mapping some fragmented copy map, transferring memory from
2148 * a VM map with a different alignment), so clear map_aligned
2149 * for this new VM map entry and proceed.
2150 */
2151 clear_map_aligned = TRUE;
2152 }
2153
2154 /*
2155 * Only zero-fill objects are allowed to be purgable.
2156 * LP64todo - limit purgable objects to 32-bits for now
2157 */
2158 if (purgable &&
2159 (offset != 0 ||
2160 (object != VM_OBJECT_NULL &&
2161 (object->vo_size != size ||
2162 object->purgable == VM_PURGABLE_DENY))
2163 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2164 return KERN_INVALID_ARGUMENT;
2165
2166 if (!anywhere && overwrite) {
2167 /*
2168 * Create a temporary VM map to hold the old mappings in the
2169 * affected area while we create the new one.
2170 * This avoids releasing the VM map lock in
2171 * vm_map_entry_delete() and allows atomicity
2172 * when we want to replace some mappings with a new one.
2173 * It also allows us to restore the old VM mappings if the
2174 * new mapping fails.
2175 */
2176 zap_old_map = vm_map_create(PMAP_NULL,
2177 *address,
2178 *address + size,
2179 map->hdr.entries_pageable);
2180 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2181 vm_map_disable_hole_optimization(zap_old_map);
2182 }
2183
2184 StartAgain: ;
2185
2186 start = *address;
2187
2188 if (anywhere) {
2189 vm_map_lock(map);
2190 map_locked = TRUE;
2191
2192 if (entry_for_jit) {
2193 if (map->jit_entry_exists) {
2194 result = KERN_INVALID_ARGUMENT;
2195 goto BailOut;
2196 }
2197 random_address = TRUE;
2198 }
2199
2200 if (random_address) {
2201 /*
2202 * Get a random start address.
2203 */
2204 result = vm_map_random_address_for_size(map, address, size);
2205 if (result != KERN_SUCCESS) {
2206 goto BailOut;
2207 }
2208 start = *address;
2209 }
2210 #if __x86_64__
2211 else if ((start == 0 || start == vm_map_min(map)) &&
2212 !map->disable_vmentry_reuse &&
2213 map->vmmap_high_start != 0) {
2214 start = map->vmmap_high_start;
2215 }
2216 #endif /* __x86_64__ */
2217
2218
2219 /*
2220 * Calculate the first possible address.
2221 */
2222
2223 if (start < effective_min_offset)
2224 start = effective_min_offset;
2225 if (start > effective_max_offset)
2226 RETURN(KERN_NO_SPACE);
2227
2228 /*
2229 * Look for the first possible address;
2230 * if there's already something at this
2231 * address, we have to start after it.
2232 */
2233
2234 if( map->disable_vmentry_reuse == TRUE) {
2235 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2236 } else {
2237
2238 if (map->holelistenabled) {
2239 hole_entry = (vm_map_entry_t)map->holes_list;
2240
2241 if (hole_entry == NULL) {
2242 /*
2243 * No more space in the map?
2244 */
2245 result = KERN_NO_SPACE;
2246 goto BailOut;
2247 } else {
2248
2249 boolean_t found_hole = FALSE;
2250
2251 do {
2252 if (hole_entry->vme_start >= start) {
2253 start = hole_entry->vme_start;
2254 found_hole = TRUE;
2255 break;
2256 }
2257
2258 if (hole_entry->vme_end > start) {
2259 found_hole = TRUE;
2260 break;
2261 }
2262 hole_entry = hole_entry->vme_next;
2263
2264 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2265
2266 if (found_hole == FALSE) {
2267 result = KERN_NO_SPACE;
2268 goto BailOut;
2269 }
2270
2271 entry = hole_entry;
2272
2273 if (start == 0)
2274 start += PAGE_SIZE_64;
2275 }
2276 } else {
2277 assert(first_free_is_valid(map));
2278
2279 entry = map->first_free;
2280
2281 if (entry == vm_map_to_entry(map)) {
2282 entry = NULL;
2283 } else {
2284 if (entry->vme_next == vm_map_to_entry(map)){
2285 /*
2286 * Hole at the end of the map.
2287 */
2288 entry = NULL;
2289 } else {
2290 if (start < (entry->vme_next)->vme_start ) {
2291 start = entry->vme_end;
2292 start = vm_map_round_page(start,
2293 VM_MAP_PAGE_MASK(map));
2294 } else {
2295 /*
2296 * Need to do a lookup.
2297 */
2298 entry = NULL;
2299 }
2300 }
2301 }
2302
2303 if (entry == NULL) {
2304 vm_map_entry_t tmp_entry;
2305 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2306 assert(!entry_for_jit);
2307 start = tmp_entry->vme_end;
2308 start = vm_map_round_page(start,
2309 VM_MAP_PAGE_MASK(map));
2310 }
2311 entry = tmp_entry;
2312 }
2313 }
2314 }
2315
2316 /*
2317 * In any case, the "entry" always precedes
2318 * the proposed new region throughout the
2319 * loop:
2320 */
2321
2322 while (TRUE) {
2323 vm_map_entry_t next;
2324
2325 /*
2326 * Find the end of the proposed new region.
2327 * Be sure we didn't go beyond the end, or
2328 * wrap around the address.
2329 */
2330
2331 end = ((start + mask) & ~mask);
2332 end = vm_map_round_page(end,
2333 VM_MAP_PAGE_MASK(map));
2334 if (end < start)
2335 RETURN(KERN_NO_SPACE);
2336 start = end;
2337 assert(VM_MAP_PAGE_ALIGNED(start,
2338 VM_MAP_PAGE_MASK(map)));
2339 end += size;
2340
2341 if ((end > effective_max_offset) || (end < start)) {
2342 if (map->wait_for_space) {
2343 assert(!keep_map_locked);
2344 if (size <= (effective_max_offset -
2345 effective_min_offset)) {
2346 assert_wait((event_t)map,
2347 THREAD_ABORTSAFE);
2348 vm_map_unlock(map);
2349 map_locked = FALSE;
2350 thread_block(THREAD_CONTINUE_NULL);
2351 goto StartAgain;
2352 }
2353 }
2354 RETURN(KERN_NO_SPACE);
2355 }
2356
2357 next = entry->vme_next;
2358
2359 if (map->holelistenabled) {
2360 if (entry->vme_end >= end)
2361 break;
2362 } else {
2363 /*
2364 * If there are no more entries, we must win.
2365 *
2366 * OR
2367 *
2368 * If there is another entry, it must be
2369 * after the end of the potential new region.
2370 */
2371
2372 if (next == vm_map_to_entry(map))
2373 break;
2374
2375 if (next->vme_start >= end)
2376 break;
2377 }
2378
2379 /*
2380 * Didn't fit -- move to the next entry.
2381 */
2382
2383 entry = next;
2384
2385 if (map->holelistenabled) {
2386 if (entry == (vm_map_entry_t) map->holes_list) {
2387 /*
2388 * Wrapped around
2389 */
2390 result = KERN_NO_SPACE;
2391 goto BailOut;
2392 }
2393 start = entry->vme_start;
2394 } else {
2395 start = entry->vme_end;
2396 }
2397
2398 start = vm_map_round_page(start,
2399 VM_MAP_PAGE_MASK(map));
2400 }
2401
2402 if (map->holelistenabled) {
2403 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2404 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2405 }
2406 }
2407
2408 *address = start;
2409 assert(VM_MAP_PAGE_ALIGNED(*address,
2410 VM_MAP_PAGE_MASK(map)));
2411 } else {
2412 /*
2413 * Verify that:
2414 * the address doesn't itself violate
2415 * the mask requirement.
2416 */
2417
2418 vm_map_lock(map);
2419 map_locked = TRUE;
2420 if ((start & mask) != 0)
2421 RETURN(KERN_NO_SPACE);
2422
2423 /*
2424 * ... the address is within bounds
2425 */
2426
2427 end = start + size;
2428
2429 if ((start < effective_min_offset) ||
2430 (end > effective_max_offset) ||
2431 (start >= end)) {
2432 RETURN(KERN_INVALID_ADDRESS);
2433 }
2434
2435 if (overwrite && zap_old_map != VM_MAP_NULL) {
2436 int remove_flags;
2437 /*
2438 * Fixed mapping and "overwrite" flag: attempt to
2439 * remove all existing mappings in the specified
2440 * address range, saving them in our "zap_old_map".
2441 */
2442 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2443 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2444 if (vmk_flags.vmkf_overwrite_immutable) {
2445 /* we can overwrite immutable mappings */
2446 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2447 }
2448 (void) vm_map_delete(map, start, end,
2449 remove_flags,
2450 zap_old_map);
2451 }
2452
2453 /*
2454 * ... the starting address isn't allocated
2455 */
2456
2457 if (vm_map_lookup_entry(map, start, &entry)) {
2458 if (! (vmk_flags.vmkf_already)) {
2459 RETURN(KERN_NO_SPACE);
2460 }
2461 /*
2462 * Check if what's already there is what we want.
2463 */
2464 tmp_start = start;
2465 tmp_offset = offset;
2466 if (entry->vme_start < start) {
2467 tmp_start -= start - entry->vme_start;
2468 tmp_offset -= start - entry->vme_start;
2469
2470 }
2471 for (; entry->vme_start < end;
2472 entry = entry->vme_next) {
2473 /*
2474 * Check if the mapping's attributes
2475 * match the existing map entry.
2476 */
2477 if (entry == vm_map_to_entry(map) ||
2478 entry->vme_start != tmp_start ||
2479 entry->is_sub_map != is_submap ||
2480 VME_OFFSET(entry) != tmp_offset ||
2481 entry->needs_copy != needs_copy ||
2482 entry->protection != cur_protection ||
2483 entry->max_protection != max_protection ||
2484 entry->inheritance != inheritance ||
2485 entry->iokit_acct != iokit_acct ||
2486 VME_ALIAS(entry) != alias) {
2487 /* not the same mapping ! */
2488 RETURN(KERN_NO_SPACE);
2489 }
2490 /*
2491 * Check if the same object is being mapped.
2492 */
2493 if (is_submap) {
2494 if (VME_SUBMAP(entry) !=
2495 (vm_map_t) object) {
2496 /* not the same submap */
2497 RETURN(KERN_NO_SPACE);
2498 }
2499 } else {
2500 if (VME_OBJECT(entry) != object) {
2501 /* not the same VM object... */
2502 vm_object_t obj2;
2503
2504 obj2 = VME_OBJECT(entry);
2505 if ((obj2 == VM_OBJECT_NULL ||
2506 obj2->internal) &&
2507 (object == VM_OBJECT_NULL ||
2508 object->internal)) {
2509 /*
2510 * ... but both are
2511 * anonymous memory,
2512 * so equivalent.
2513 */
2514 } else {
2515 RETURN(KERN_NO_SPACE);
2516 }
2517 }
2518 }
2519
2520 tmp_offset += entry->vme_end - entry->vme_start;
2521 tmp_start += entry->vme_end - entry->vme_start;
2522 if (entry->vme_end >= end) {
2523 /* reached the end of our mapping */
2524 break;
2525 }
2526 }
2527 /* it all matches: let's use what's already there ! */
2528 RETURN(KERN_MEMORY_PRESENT);
2529 }
2530
2531 /*
2532 * ... the next region doesn't overlap the
2533 * end point.
2534 */
2535
2536 if ((entry->vme_next != vm_map_to_entry(map)) &&
2537 (entry->vme_next->vme_start < end))
2538 RETURN(KERN_NO_SPACE);
2539 }
2540
2541 /*
2542 * At this point,
2543 * "start" and "end" should define the endpoints of the
2544 * available new range, and
2545 * "entry" should refer to the region before the new
2546 * range, and
2547 *
2548 * the map should be locked.
2549 */
2550
2551 /*
2552 * See whether we can avoid creating a new entry (and object) by
2553 * extending one of our neighbors. [So far, we only attempt to
2554 * extend from below.] Note that we can never extend/join
2555 * purgable objects because they need to remain distinct
2556 * entities in order to implement their "volatile object"
2557 * semantics.
2558 */
2559
2560 if (purgable || entry_for_jit) {
2561 if (object == VM_OBJECT_NULL) {
2562
2563 object = vm_object_allocate(size);
2564 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2565 object->true_share = TRUE;
2566 if (purgable) {
2567 task_t owner;
2568 object->purgable = VM_PURGABLE_NONVOLATILE;
2569 if (map->pmap == kernel_pmap) {
2570 /*
2571 * Purgeable mappings made in a kernel
2572 * map are "owned" by the kernel itself
2573 * rather than the current user task
2574 * because they're likely to be used by
2575 * more than this user task (see
2576 * execargs_purgeable_allocate(), for
2577 * example).
2578 */
2579 owner = kernel_task;
2580 } else {
2581 owner = current_task();
2582 }
2583 assert(object->vo_purgeable_owner == NULL);
2584 assert(object->resident_page_count == 0);
2585 assert(object->wired_page_count == 0);
2586 vm_object_lock(object);
2587 vm_purgeable_nonvolatile_enqueue(object, owner);
2588 vm_object_unlock(object);
2589 }
2590 offset = (vm_object_offset_t)0;
2591 }
2592 } else if ((is_submap == FALSE) &&
2593 (object == VM_OBJECT_NULL) &&
2594 (entry != vm_map_to_entry(map)) &&
2595 (entry->vme_end == start) &&
2596 (!entry->is_shared) &&
2597 (!entry->is_sub_map) &&
2598 (!entry->in_transition) &&
2599 (!entry->needs_wakeup) &&
2600 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2601 (entry->protection == cur_protection) &&
2602 (entry->max_protection == max_protection) &&
2603 (entry->inheritance == inheritance) &&
2604 ((user_alias == VM_MEMORY_REALLOC) ||
2605 (VME_ALIAS(entry) == alias)) &&
2606 (entry->no_cache == no_cache) &&
2607 (entry->permanent == permanent) &&
2608 /* no coalescing for immutable executable mappings */
2609 !((entry->protection & VM_PROT_EXECUTE) &&
2610 entry->permanent) &&
2611 (!entry->superpage_size && !superpage_size) &&
2612 /*
2613 * No coalescing if not map-aligned, to avoid propagating
2614 * that condition any further than needed:
2615 */
2616 (!entry->map_aligned || !clear_map_aligned) &&
2617 (!entry->zero_wired_pages) &&
2618 (!entry->used_for_jit && !entry_for_jit) &&
2619 (entry->iokit_acct == iokit_acct) &&
2620 (!entry->vme_resilient_codesign) &&
2621 (!entry->vme_resilient_media) &&
2622 (!entry->vme_atomic) &&
2623
2624 ((entry->vme_end - entry->vme_start) + size <=
2625 (user_alias == VM_MEMORY_REALLOC ?
2626 ANON_CHUNK_SIZE :
2627 NO_COALESCE_LIMIT)) &&
2628
2629 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2630 if (vm_object_coalesce(VME_OBJECT(entry),
2631 VM_OBJECT_NULL,
2632 VME_OFFSET(entry),
2633 (vm_object_offset_t) 0,
2634 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2635 (vm_map_size_t)(end - entry->vme_end))) {
2636
2637 /*
2638 * Coalesced the two objects - can extend
2639 * the previous map entry to include the
2640 * new range.
2641 */
2642 map->size += (end - entry->vme_end);
2643 assert(entry->vme_start < end);
2644 assert(VM_MAP_PAGE_ALIGNED(end,
2645 VM_MAP_PAGE_MASK(map)));
2646 if (__improbable(vm_debug_events))
2647 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2648 entry->vme_end = end;
2649 if (map->holelistenabled) {
2650 vm_map_store_update_first_free(map, entry, TRUE);
2651 } else {
2652 vm_map_store_update_first_free(map, map->first_free, TRUE);
2653 }
2654 new_mapping_established = TRUE;
2655 RETURN(KERN_SUCCESS);
2656 }
2657 }
2658
2659 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2660 new_entry = NULL;
2661
2662 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2663 tmp2_end = tmp2_start + step;
2664 /*
2665 * Create a new entry
2666 *
2667 * XXX FBDP
2668 * The reserved "page zero" in each process's address space can
2669 * be arbitrarily large. Splitting it into separate objects and
2670 * therefore different VM map entries serves no purpose and just
2671 * slows down operations on the VM map, so let's not split the
2672 * allocation into chunks if the max protection is NONE. That
2673 * memory should never be accessible, so it will never get to the
2674 * default pager.
2675 */
2676 tmp_start = tmp2_start;
2677 if (object == VM_OBJECT_NULL &&
2678 size > chunk_size &&
2679 max_protection != VM_PROT_NONE &&
2680 superpage_size == 0)
2681 tmp_end = tmp_start + chunk_size;
2682 else
2683 tmp_end = tmp2_end;
2684 do {
2685 new_entry = vm_map_entry_insert(
2686 map, entry, tmp_start, tmp_end,
2687 object, offset, needs_copy,
2688 FALSE, FALSE,
2689 cur_protection, max_protection,
2690 VM_BEHAVIOR_DEFAULT,
2691 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2692 0,
2693 no_cache,
2694 permanent,
2695 superpage_size,
2696 clear_map_aligned,
2697 is_submap,
2698 entry_for_jit,
2699 alias);
2700
2701 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2702
2703 if (resilient_codesign &&
2704 ! ((cur_protection | max_protection) &
2705 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2706 new_entry->vme_resilient_codesign = TRUE;
2707 }
2708
2709 if (resilient_media &&
2710 ! ((cur_protection | max_protection) &
2711 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2712 new_entry->vme_resilient_media = TRUE;
2713 }
2714
2715 assert(!new_entry->iokit_acct);
2716 if (!is_submap &&
2717 object != VM_OBJECT_NULL &&
2718 object->purgable != VM_PURGABLE_DENY) {
2719 assert(new_entry->use_pmap);
2720 assert(!new_entry->iokit_acct);
2721 /*
2722 * Turn off pmap accounting since
2723 * purgeable objects have their
2724 * own ledgers.
2725 */
2726 new_entry->use_pmap = FALSE;
2727 } else if (!is_submap &&
2728 iokit_acct &&
2729 object != VM_OBJECT_NULL &&
2730 object->internal) {
2731 /* alternate accounting */
2732 assert(!new_entry->iokit_acct);
2733 assert(new_entry->use_pmap);
2734 new_entry->iokit_acct = TRUE;
2735 new_entry->use_pmap = FALSE;
2736 DTRACE_VM4(
2737 vm_map_iokit_mapped_region,
2738 vm_map_t, map,
2739 vm_map_offset_t, new_entry->vme_start,
2740 vm_map_offset_t, new_entry->vme_end,
2741 int, VME_ALIAS(new_entry));
2742 vm_map_iokit_mapped_region(
2743 map,
2744 (new_entry->vme_end -
2745 new_entry->vme_start));
2746 } else if (!is_submap) {
2747 assert(!new_entry->iokit_acct);
2748 assert(new_entry->use_pmap);
2749 }
2750
2751 if (is_submap) {
2752 vm_map_t submap;
2753 boolean_t submap_is_64bit;
2754 boolean_t use_pmap;
2755
2756 assert(new_entry->is_sub_map);
2757 assert(!new_entry->use_pmap);
2758 assert(!new_entry->iokit_acct);
2759 submap = (vm_map_t) object;
2760 submap_is_64bit = vm_map_is_64bit(submap);
2761 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2762 #ifndef NO_NESTED_PMAP
2763 if (use_pmap && submap->pmap == NULL) {
2764 ledger_t ledger = map->pmap->ledger;
2765 /* we need a sub pmap to nest... */
2766 submap->pmap = pmap_create(ledger, 0,
2767 submap_is_64bit);
2768 if (submap->pmap == NULL) {
2769 /* let's proceed without nesting... */
2770 }
2771 #if defined(__arm__) || defined(__arm64__)
2772 else {
2773 pmap_set_nested(submap->pmap);
2774 }
2775 #endif
2776 }
2777 if (use_pmap && submap->pmap != NULL) {
2778 kr = pmap_nest(map->pmap,
2779 submap->pmap,
2780 tmp_start,
2781 tmp_start,
2782 tmp_end - tmp_start);
2783 if (kr != KERN_SUCCESS) {
2784 printf("vm_map_enter: "
2785 "pmap_nest(0x%llx,0x%llx) "
2786 "error 0x%x\n",
2787 (long long)tmp_start,
2788 (long long)tmp_end,
2789 kr);
2790 } else {
2791 /* we're now nested ! */
2792 new_entry->use_pmap = TRUE;
2793 pmap_empty = FALSE;
2794 }
2795 }
2796 #endif /* NO_NESTED_PMAP */
2797 }
2798 entry = new_entry;
2799
2800 if (superpage_size) {
2801 vm_page_t pages, m;
2802 vm_object_t sp_object;
2803 vm_object_offset_t sp_offset;
2804
2805 VME_OFFSET_SET(entry, 0);
2806
2807 /* allocate one superpage */
2808 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2809 if (kr != KERN_SUCCESS) {
2810 /* deallocate whole range... */
2811 new_mapping_established = TRUE;
2812 /* ... but only up to "tmp_end" */
2813 size -= end - tmp_end;
2814 RETURN(kr);
2815 }
2816
2817 /* create one vm_object per superpage */
2818 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2819 sp_object->phys_contiguous = TRUE;
2820 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2821 VME_OBJECT_SET(entry, sp_object);
2822 assert(entry->use_pmap);
2823
2824 /* enter the base pages into the object */
2825 vm_object_lock(sp_object);
2826 for (sp_offset = 0;
2827 sp_offset < SUPERPAGE_SIZE;
2828 sp_offset += PAGE_SIZE) {
2829 m = pages;
2830 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2831 pages = NEXT_PAGE(m);
2832 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2833 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2834 }
2835 vm_object_unlock(sp_object);
2836 }
2837 } while (tmp_end != tmp2_end &&
2838 (tmp_start = tmp_end) &&
2839 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
2840 tmp_end + chunk_size : tmp2_end));
2841 }
2842
2843 new_mapping_established = TRUE;
2844
2845 BailOut:
2846 assert(map_locked == TRUE);
2847
2848 if (result == KERN_SUCCESS) {
2849 vm_prot_t pager_prot;
2850 memory_object_t pager;
2851
2852 #if DEBUG
2853 if (pmap_empty &&
2854 !(vmk_flags.vmkf_no_pmap_check)) {
2855 assert(vm_map_pmap_is_empty(map,
2856 *address,
2857 *address+size));
2858 }
2859 #endif /* DEBUG */
2860
2861 /*
2862 * For "named" VM objects, let the pager know that the
2863 * memory object is being mapped. Some pagers need to keep
2864 * track of this, to know when they can reclaim the memory
2865 * object, for example.
2866 * VM calls memory_object_map() for each mapping (specifying
2867 * the protection of each mapping) and calls
2868 * memory_object_last_unmap() when all the mappings are gone.
2869 */
2870 pager_prot = max_protection;
2871 if (needs_copy) {
2872 /*
2873 * Copy-On-Write mapping: won't modify
2874 * the memory object.
2875 */
2876 pager_prot &= ~VM_PROT_WRITE;
2877 }
2878 if (!is_submap &&
2879 object != VM_OBJECT_NULL &&
2880 object->named &&
2881 object->pager != MEMORY_OBJECT_NULL) {
2882 vm_object_lock(object);
2883 pager = object->pager;
2884 if (object->named &&
2885 pager != MEMORY_OBJECT_NULL) {
2886 assert(object->pager_ready);
2887 vm_object_mapping_wait(object, THREAD_UNINT);
2888 vm_object_mapping_begin(object);
2889 vm_object_unlock(object);
2890
2891 kr = memory_object_map(pager, pager_prot);
2892 assert(kr == KERN_SUCCESS);
2893
2894 vm_object_lock(object);
2895 vm_object_mapping_end(object);
2896 }
2897 vm_object_unlock(object);
2898 }
2899 }
2900
2901 assert(map_locked == TRUE);
2902
2903 if (!keep_map_locked) {
2904 vm_map_unlock(map);
2905 map_locked = FALSE;
2906 }
2907
2908 /*
2909 * We can't hold the map lock if we enter this block.
2910 */
2911
2912 if (result == KERN_SUCCESS) {
2913
2914 /* Wire down the new entry if the user
2915 * requested all new map entries be wired.
2916 */
2917 if ((map->wiring_required)||(superpage_size)) {
2918 assert(!keep_map_locked);
2919 pmap_empty = FALSE; /* pmap won't be empty */
2920 kr = vm_map_wire_kernel(map, start, end,
2921 new_entry->protection, VM_KERN_MEMORY_MLOCK,
2922 TRUE);
2923 result = kr;
2924 }
2925
2926 }
2927
2928 if (result != KERN_SUCCESS) {
2929 if (new_mapping_established) {
2930 /*
2931 * We have to get rid of the new mappings since we
2932 * won't make them available to the user.
2933 * Try and do that atomically, to minimize the risk
2934 * that someone else create new mappings that range.
2935 */
2936 zap_new_map = vm_map_create(PMAP_NULL,
2937 *address,
2938 *address + size,
2939 map->hdr.entries_pageable);
2940 vm_map_set_page_shift(zap_new_map,
2941 VM_MAP_PAGE_SHIFT(map));
2942 vm_map_disable_hole_optimization(zap_new_map);
2943
2944 if (!map_locked) {
2945 vm_map_lock(map);
2946 map_locked = TRUE;
2947 }
2948 (void) vm_map_delete(map, *address, *address+size,
2949 (VM_MAP_REMOVE_SAVE_ENTRIES |
2950 VM_MAP_REMOVE_NO_MAP_ALIGN),
2951 zap_new_map);
2952 }
2953 if (zap_old_map != VM_MAP_NULL &&
2954 zap_old_map->hdr.nentries != 0) {
2955 vm_map_entry_t entry1, entry2;
2956
2957 /*
2958 * The new mapping failed. Attempt to restore
2959 * the old mappings, saved in the "zap_old_map".
2960 */
2961 if (!map_locked) {
2962 vm_map_lock(map);
2963 map_locked = TRUE;
2964 }
2965
2966 /* first check if the coast is still clear */
2967 start = vm_map_first_entry(zap_old_map)->vme_start;
2968 end = vm_map_last_entry(zap_old_map)->vme_end;
2969 if (vm_map_lookup_entry(map, start, &entry1) ||
2970 vm_map_lookup_entry(map, end, &entry2) ||
2971 entry1 != entry2) {
2972 /*
2973 * Part of that range has already been
2974 * re-mapped: we can't restore the old
2975 * mappings...
2976 */
2977 vm_map_enter_restore_failures++;
2978 } else {
2979 /*
2980 * Transfer the saved map entries from
2981 * "zap_old_map" to the original "map",
2982 * inserting them all after "entry1".
2983 */
2984 for (entry2 = vm_map_first_entry(zap_old_map);
2985 entry2 != vm_map_to_entry(zap_old_map);
2986 entry2 = vm_map_first_entry(zap_old_map)) {
2987 vm_map_size_t entry_size;
2988
2989 entry_size = (entry2->vme_end -
2990 entry2->vme_start);
2991 vm_map_store_entry_unlink(zap_old_map,
2992 entry2);
2993 zap_old_map->size -= entry_size;
2994 vm_map_store_entry_link(map, entry1, entry2);
2995 map->size += entry_size;
2996 entry1 = entry2;
2997 }
2998 if (map->wiring_required) {
2999 /*
3000 * XXX TODO: we should rewire the
3001 * old pages here...
3002 */
3003 }
3004 vm_map_enter_restore_successes++;
3005 }
3006 }
3007 }
3008
3009 /*
3010 * The caller is responsible for releasing the lock if it requested to
3011 * keep the map locked.
3012 */
3013 if (map_locked && !keep_map_locked) {
3014 vm_map_unlock(map);
3015 }
3016
3017 /*
3018 * Get rid of the "zap_maps" and all the map entries that
3019 * they may still contain.
3020 */
3021 if (zap_old_map != VM_MAP_NULL) {
3022 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3023 zap_old_map = VM_MAP_NULL;
3024 }
3025 if (zap_new_map != VM_MAP_NULL) {
3026 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3027 zap_new_map = VM_MAP_NULL;
3028 }
3029
3030 return result;
3031
3032 #undef RETURN
3033 }
3034
3035 #if __arm64__
3036 extern const struct memory_object_pager_ops fourk_pager_ops;
3037 kern_return_t
3038 vm_map_enter_fourk(
3039 vm_map_t map,
3040 vm_map_offset_t *address, /* IN/OUT */
3041 vm_map_size_t size,
3042 vm_map_offset_t mask,
3043 int flags,
3044 vm_map_kernel_flags_t vmk_flags,
3045 vm_tag_t alias,
3046 vm_object_t object,
3047 vm_object_offset_t offset,
3048 boolean_t needs_copy,
3049 vm_prot_t cur_protection,
3050 vm_prot_t max_protection,
3051 vm_inherit_t inheritance)
3052 {
3053 vm_map_entry_t entry, new_entry;
3054 vm_map_offset_t start, fourk_start;
3055 vm_map_offset_t end, fourk_end;
3056 vm_map_size_t fourk_size;
3057 kern_return_t result = KERN_SUCCESS;
3058 vm_map_t zap_old_map = VM_MAP_NULL;
3059 vm_map_t zap_new_map = VM_MAP_NULL;
3060 boolean_t map_locked = FALSE;
3061 boolean_t pmap_empty = TRUE;
3062 boolean_t new_mapping_established = FALSE;
3063 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3064 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3065 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3066 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3067 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3068 boolean_t is_submap = vmk_flags.vmkf_submap;
3069 boolean_t permanent = vmk_flags.vmkf_permanent;
3070 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3071 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3072 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3073 vm_map_offset_t effective_min_offset, effective_max_offset;
3074 kern_return_t kr;
3075 boolean_t clear_map_aligned = FALSE;
3076 memory_object_t fourk_mem_obj;
3077 vm_object_t fourk_object;
3078 vm_map_offset_t fourk_pager_offset;
3079 int fourk_pager_index_start, fourk_pager_index_num;
3080 int cur_idx;
3081 boolean_t fourk_copy;
3082 vm_object_t copy_object;
3083 vm_object_offset_t copy_offset;
3084
3085 fourk_mem_obj = MEMORY_OBJECT_NULL;
3086 fourk_object = VM_OBJECT_NULL;
3087
3088 if (superpage_size) {
3089 return KERN_NOT_SUPPORTED;
3090 }
3091
3092 #if CONFIG_EMBEDDED
3093 if (cur_protection & VM_PROT_WRITE) {
3094 if ((cur_protection & VM_PROT_EXECUTE) &&
3095 !entry_for_jit) {
3096 printf("EMBEDDED: %s: curprot cannot be write+execute. "
3097 "turning off execute\n",
3098 __FUNCTION__);
3099 cur_protection &= ~VM_PROT_EXECUTE;
3100 }
3101 }
3102 #endif /* CONFIG_EMBEDDED */
3103
3104 /*
3105 * If the task has requested executable lockdown,
3106 * deny any new executable mapping.
3107 */
3108 if (map->map_disallow_new_exec == TRUE) {
3109 if (cur_protection & VM_PROT_EXECUTE) {
3110 return KERN_PROTECTION_FAILURE;
3111 }
3112 }
3113
3114 if (is_submap) {
3115 return KERN_NOT_SUPPORTED;
3116 }
3117 if (vmk_flags.vmkf_already) {
3118 return KERN_NOT_SUPPORTED;
3119 }
3120 if (purgable || entry_for_jit) {
3121 return KERN_NOT_SUPPORTED;
3122 }
3123
3124 effective_min_offset = map->min_offset;
3125
3126 if (vmk_flags.vmkf_beyond_max) {
3127 return KERN_NOT_SUPPORTED;
3128 } else {
3129 effective_max_offset = map->max_offset;
3130 }
3131
3132 if (size == 0 ||
3133 (offset & FOURK_PAGE_MASK) != 0) {
3134 *address = 0;
3135 return KERN_INVALID_ARGUMENT;
3136 }
3137
3138 #define RETURN(value) { result = value; goto BailOut; }
3139
3140 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3141 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3142
3143 if (!anywhere && overwrite) {
3144 return KERN_NOT_SUPPORTED;
3145 }
3146 if (!anywhere && overwrite) {
3147 /*
3148 * Create a temporary VM map to hold the old mappings in the
3149 * affected area while we create the new one.
3150 * This avoids releasing the VM map lock in
3151 * vm_map_entry_delete() and allows atomicity
3152 * when we want to replace some mappings with a new one.
3153 * It also allows us to restore the old VM mappings if the
3154 * new mapping fails.
3155 */
3156 zap_old_map = vm_map_create(PMAP_NULL,
3157 *address,
3158 *address + size,
3159 map->hdr.entries_pageable);
3160 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3161 vm_map_disable_hole_optimization(zap_old_map);
3162 }
3163
3164 fourk_start = *address;
3165 fourk_size = size;
3166 fourk_end = fourk_start + fourk_size;
3167
3168 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3169 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3170 size = end - start;
3171
3172 if (anywhere) {
3173 return KERN_NOT_SUPPORTED;
3174 } else {
3175 /*
3176 * Verify that:
3177 * the address doesn't itself violate
3178 * the mask requirement.
3179 */
3180
3181 vm_map_lock(map);
3182 map_locked = TRUE;
3183 if ((start & mask) != 0) {
3184 RETURN(KERN_NO_SPACE);
3185 }
3186
3187 /*
3188 * ... the address is within bounds
3189 */
3190
3191 end = start + size;
3192
3193 if ((start < effective_min_offset) ||
3194 (end > effective_max_offset) ||
3195 (start >= end)) {
3196 RETURN(KERN_INVALID_ADDRESS);
3197 }
3198
3199 if (overwrite && zap_old_map != VM_MAP_NULL) {
3200 /*
3201 * Fixed mapping and "overwrite" flag: attempt to
3202 * remove all existing mappings in the specified
3203 * address range, saving them in our "zap_old_map".
3204 */
3205 (void) vm_map_delete(map, start, end,
3206 (VM_MAP_REMOVE_SAVE_ENTRIES |
3207 VM_MAP_REMOVE_NO_MAP_ALIGN),
3208 zap_old_map);
3209 }
3210
3211 /*
3212 * ... the starting address isn't allocated
3213 */
3214 if (vm_map_lookup_entry(map, start, &entry)) {
3215 vm_object_t cur_object, shadow_object;
3216
3217 /*
3218 * We might already some 4K mappings
3219 * in a 16K page here.
3220 */
3221
3222 if (entry->vme_end - entry->vme_start
3223 != SIXTEENK_PAGE_SIZE) {
3224 RETURN(KERN_NO_SPACE);
3225 }
3226 if (entry->is_sub_map) {
3227 RETURN(KERN_NO_SPACE);
3228 }
3229 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3230 RETURN(KERN_NO_SPACE);
3231 }
3232
3233 /* go all the way down the shadow chain */
3234 cur_object = VME_OBJECT(entry);
3235 vm_object_lock(cur_object);
3236 while (cur_object->shadow != VM_OBJECT_NULL) {
3237 shadow_object = cur_object->shadow;
3238 vm_object_lock(shadow_object);
3239 vm_object_unlock(cur_object);
3240 cur_object = shadow_object;
3241 shadow_object = VM_OBJECT_NULL;
3242 }
3243 if (cur_object->internal ||
3244 cur_object->pager == NULL) {
3245 vm_object_unlock(cur_object);
3246 RETURN(KERN_NO_SPACE);
3247 }
3248 if (cur_object->pager->mo_pager_ops
3249 != &fourk_pager_ops) {
3250 vm_object_unlock(cur_object);
3251 RETURN(KERN_NO_SPACE);
3252 }
3253 fourk_object = cur_object;
3254 fourk_mem_obj = fourk_object->pager;
3255
3256 /* keep the "4K" object alive */
3257 vm_object_reference_locked(fourk_object);
3258 vm_object_unlock(fourk_object);
3259
3260 /* merge permissions */
3261 entry->protection |= cur_protection;
3262 entry->max_protection |= max_protection;
3263 if ((entry->protection & (VM_PROT_WRITE |
3264 VM_PROT_EXECUTE)) ==
3265 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3266 fourk_binary_compatibility_unsafe &&
3267 fourk_binary_compatibility_allow_wx) {
3268 /* write+execute: need to be "jit" */
3269 entry->used_for_jit = TRUE;
3270 }
3271
3272 goto map_in_fourk_pager;
3273 }
3274
3275 /*
3276 * ... the next region doesn't overlap the
3277 * end point.
3278 */
3279
3280 if ((entry->vme_next != vm_map_to_entry(map)) &&
3281 (entry->vme_next->vme_start < end)) {
3282 RETURN(KERN_NO_SPACE);
3283 }
3284 }
3285
3286 /*
3287 * At this point,
3288 * "start" and "end" should define the endpoints of the
3289 * available new range, and
3290 * "entry" should refer to the region before the new
3291 * range, and
3292 *
3293 * the map should be locked.
3294 */
3295
3296 /* create a new "4K" pager */
3297 fourk_mem_obj = fourk_pager_create();
3298 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3299 assert(fourk_object);
3300
3301 /* keep the "4" object alive */
3302 vm_object_reference(fourk_object);
3303
3304 /* create a "copy" object, to map the "4K" object copy-on-write */
3305 fourk_copy = TRUE;
3306 result = vm_object_copy_strategically(fourk_object,
3307 0,
3308 end - start,
3309 &copy_object,
3310 &copy_offset,
3311 &fourk_copy);
3312 assert(result == KERN_SUCCESS);
3313 assert(copy_object != VM_OBJECT_NULL);
3314 assert(copy_offset == 0);
3315
3316 /* take a reference on the copy object, for this mapping */
3317 vm_object_reference(copy_object);
3318
3319 /* map the "4K" pager's copy object */
3320 new_entry =
3321 vm_map_entry_insert(map, entry,
3322 vm_map_trunc_page(start,
3323 VM_MAP_PAGE_MASK(map)),
3324 vm_map_round_page(end,
3325 VM_MAP_PAGE_MASK(map)),
3326 copy_object,
3327 0, /* offset */
3328 FALSE, /* needs_copy */
3329 FALSE, FALSE,
3330 cur_protection, max_protection,
3331 VM_BEHAVIOR_DEFAULT,
3332 ((entry_for_jit)
3333 ? VM_INHERIT_NONE
3334 : inheritance),
3335 0,
3336 no_cache,
3337 permanent,
3338 superpage_size,
3339 clear_map_aligned,
3340 is_submap,
3341 FALSE, /* jit */
3342 alias);
3343 entry = new_entry;
3344
3345 #if VM_MAP_DEBUG_FOURK
3346 if (vm_map_debug_fourk) {
3347 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3348 map,
3349 (uint64_t) entry->vme_start,
3350 (uint64_t) entry->vme_end,
3351 fourk_mem_obj);
3352 }
3353 #endif /* VM_MAP_DEBUG_FOURK */
3354
3355 new_mapping_established = TRUE;
3356
3357 map_in_fourk_pager:
3358 /* "map" the original "object" where it belongs in the "4K" pager */
3359 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3360 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3361 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3362 fourk_pager_index_num = 4;
3363 } else {
3364 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3365 }
3366 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3367 fourk_pager_index_num = 4 - fourk_pager_index_start;
3368 }
3369 for (cur_idx = 0;
3370 cur_idx < fourk_pager_index_num;
3371 cur_idx++) {
3372 vm_object_t old_object;
3373 vm_object_offset_t old_offset;
3374
3375 kr = fourk_pager_populate(fourk_mem_obj,
3376 TRUE, /* overwrite */
3377 fourk_pager_index_start + cur_idx,
3378 object,
3379 (object
3380 ? (offset +
3381 (cur_idx * FOURK_PAGE_SIZE))
3382 : 0),
3383 &old_object,
3384 &old_offset);
3385 #if VM_MAP_DEBUG_FOURK
3386 if (vm_map_debug_fourk) {
3387 if (old_object == (vm_object_t) -1 &&
3388 old_offset == (vm_object_offset_t) -1) {
3389 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3390 "pager [%p:0x%llx] "
3391 "populate[%d] "
3392 "[object:%p,offset:0x%llx]\n",
3393 map,
3394 (uint64_t) entry->vme_start,
3395 (uint64_t) entry->vme_end,
3396 fourk_mem_obj,
3397 VME_OFFSET(entry),
3398 fourk_pager_index_start + cur_idx,
3399 object,
3400 (object
3401 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3402 : 0));
3403 } else {
3404 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3405 "pager [%p:0x%llx] "
3406 "populate[%d] [object:%p,offset:0x%llx] "
3407 "old [%p:0x%llx]\n",
3408 map,
3409 (uint64_t) entry->vme_start,
3410 (uint64_t) entry->vme_end,
3411 fourk_mem_obj,
3412 VME_OFFSET(entry),
3413 fourk_pager_index_start + cur_idx,
3414 object,
3415 (object
3416 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3417 : 0),
3418 old_object,
3419 old_offset);
3420 }
3421 }
3422 #endif /* VM_MAP_DEBUG_FOURK */
3423
3424 assert(kr == KERN_SUCCESS);
3425 if (object != old_object &&
3426 object != VM_OBJECT_NULL &&
3427 object != (vm_object_t) -1) {
3428 vm_object_reference(object);
3429 }
3430 if (object != old_object &&
3431 old_object != VM_OBJECT_NULL &&
3432 old_object != (vm_object_t) -1) {
3433 vm_object_deallocate(old_object);
3434 }
3435 }
3436
3437 BailOut:
3438 assert(map_locked == TRUE);
3439
3440 if (fourk_object != VM_OBJECT_NULL) {
3441 vm_object_deallocate(fourk_object);
3442 fourk_object = VM_OBJECT_NULL;
3443 fourk_mem_obj = MEMORY_OBJECT_NULL;
3444 }
3445
3446 if (result == KERN_SUCCESS) {
3447 vm_prot_t pager_prot;
3448 memory_object_t pager;
3449
3450 #if DEBUG
3451 if (pmap_empty &&
3452 !(vmk_flags.vmkf_no_pmap_check)) {
3453 assert(vm_map_pmap_is_empty(map,
3454 *address,
3455 *address+size));
3456 }
3457 #endif /* DEBUG */
3458
3459 /*
3460 * For "named" VM objects, let the pager know that the
3461 * memory object is being mapped. Some pagers need to keep
3462 * track of this, to know when they can reclaim the memory
3463 * object, for example.
3464 * VM calls memory_object_map() for each mapping (specifying
3465 * the protection of each mapping) and calls
3466 * memory_object_last_unmap() when all the mappings are gone.
3467 */
3468 pager_prot = max_protection;
3469 if (needs_copy) {
3470 /*
3471 * Copy-On-Write mapping: won't modify
3472 * the memory object.
3473 */
3474 pager_prot &= ~VM_PROT_WRITE;
3475 }
3476 if (!is_submap &&
3477 object != VM_OBJECT_NULL &&
3478 object->named &&
3479 object->pager != MEMORY_OBJECT_NULL) {
3480 vm_object_lock(object);
3481 pager = object->pager;
3482 if (object->named &&
3483 pager != MEMORY_OBJECT_NULL) {
3484 assert(object->pager_ready);
3485 vm_object_mapping_wait(object, THREAD_UNINT);
3486 vm_object_mapping_begin(object);
3487 vm_object_unlock(object);
3488
3489 kr = memory_object_map(pager, pager_prot);
3490 assert(kr == KERN_SUCCESS);
3491
3492 vm_object_lock(object);
3493 vm_object_mapping_end(object);
3494 }
3495 vm_object_unlock(object);
3496 }
3497 if (!is_submap &&
3498 fourk_object != VM_OBJECT_NULL &&
3499 fourk_object->named &&
3500 fourk_object->pager != MEMORY_OBJECT_NULL) {
3501 vm_object_lock(fourk_object);
3502 pager = fourk_object->pager;
3503 if (fourk_object->named &&
3504 pager != MEMORY_OBJECT_NULL) {
3505 assert(fourk_object->pager_ready);
3506 vm_object_mapping_wait(fourk_object,
3507 THREAD_UNINT);
3508 vm_object_mapping_begin(fourk_object);
3509 vm_object_unlock(fourk_object);
3510
3511 kr = memory_object_map(pager, VM_PROT_READ);
3512 assert(kr == KERN_SUCCESS);
3513
3514 vm_object_lock(fourk_object);
3515 vm_object_mapping_end(fourk_object);
3516 }
3517 vm_object_unlock(fourk_object);
3518 }
3519 }
3520
3521 assert(map_locked == TRUE);
3522
3523 if (!keep_map_locked) {
3524 vm_map_unlock(map);
3525 map_locked = FALSE;
3526 }
3527
3528 /*
3529 * We can't hold the map lock if we enter this block.
3530 */
3531
3532 if (result == KERN_SUCCESS) {
3533
3534 /* Wire down the new entry if the user
3535 * requested all new map entries be wired.
3536 */
3537 if ((map->wiring_required)||(superpage_size)) {
3538 assert(!keep_map_locked);
3539 pmap_empty = FALSE; /* pmap won't be empty */
3540 kr = vm_map_wire_kernel(map, start, end,
3541 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3542 TRUE);
3543 result = kr;
3544 }
3545
3546 }
3547
3548 if (result != KERN_SUCCESS) {
3549 if (new_mapping_established) {
3550 /*
3551 * We have to get rid of the new mappings since we
3552 * won't make them available to the user.
3553 * Try and do that atomically, to minimize the risk
3554 * that someone else create new mappings that range.
3555 */
3556 zap_new_map = vm_map_create(PMAP_NULL,
3557 *address,
3558 *address + size,
3559 map->hdr.entries_pageable);
3560 vm_map_set_page_shift(zap_new_map,
3561 VM_MAP_PAGE_SHIFT(map));
3562 vm_map_disable_hole_optimization(zap_new_map);
3563
3564 if (!map_locked) {
3565 vm_map_lock(map);
3566 map_locked = TRUE;
3567 }
3568 (void) vm_map_delete(map, *address, *address+size,
3569 (VM_MAP_REMOVE_SAVE_ENTRIES |
3570 VM_MAP_REMOVE_NO_MAP_ALIGN),
3571 zap_new_map);
3572 }
3573 if (zap_old_map != VM_MAP_NULL &&
3574 zap_old_map->hdr.nentries != 0) {
3575 vm_map_entry_t entry1, entry2;
3576
3577 /*
3578 * The new mapping failed. Attempt to restore
3579 * the old mappings, saved in the "zap_old_map".
3580 */
3581 if (!map_locked) {
3582 vm_map_lock(map);
3583 map_locked = TRUE;
3584 }
3585
3586 /* first check if the coast is still clear */
3587 start = vm_map_first_entry(zap_old_map)->vme_start;
3588 end = vm_map_last_entry(zap_old_map)->vme_end;
3589 if (vm_map_lookup_entry(map, start, &entry1) ||
3590 vm_map_lookup_entry(map, end, &entry2) ||
3591 entry1 != entry2) {
3592 /*
3593 * Part of that range has already been
3594 * re-mapped: we can't restore the old
3595 * mappings...
3596 */
3597 vm_map_enter_restore_failures++;
3598 } else {
3599 /*
3600 * Transfer the saved map entries from
3601 * "zap_old_map" to the original "map",
3602 * inserting them all after "entry1".
3603 */
3604 for (entry2 = vm_map_first_entry(zap_old_map);
3605 entry2 != vm_map_to_entry(zap_old_map);
3606 entry2 = vm_map_first_entry(zap_old_map)) {
3607 vm_map_size_t entry_size;
3608
3609 entry_size = (entry2->vme_end -
3610 entry2->vme_start);
3611 vm_map_store_entry_unlink(zap_old_map,
3612 entry2);
3613 zap_old_map->size -= entry_size;
3614 vm_map_store_entry_link(map, entry1, entry2);
3615 map->size += entry_size;
3616 entry1 = entry2;
3617 }
3618 if (map->wiring_required) {
3619 /*
3620 * XXX TODO: we should rewire the
3621 * old pages here...
3622 */
3623 }
3624 vm_map_enter_restore_successes++;
3625 }
3626 }
3627 }
3628
3629 /*
3630 * The caller is responsible for releasing the lock if it requested to
3631 * keep the map locked.
3632 */
3633 if (map_locked && !keep_map_locked) {
3634 vm_map_unlock(map);
3635 }
3636
3637 /*
3638 * Get rid of the "zap_maps" and all the map entries that
3639 * they may still contain.
3640 */
3641 if (zap_old_map != VM_MAP_NULL) {
3642 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3643 zap_old_map = VM_MAP_NULL;
3644 }
3645 if (zap_new_map != VM_MAP_NULL) {
3646 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3647 zap_new_map = VM_MAP_NULL;
3648 }
3649
3650 return result;
3651
3652 #undef RETURN
3653 }
3654 #endif /* __arm64__ */
3655
3656 /*
3657 * Counters for the prefault optimization.
3658 */
3659 int64_t vm_prefault_nb_pages = 0;
3660 int64_t vm_prefault_nb_bailout = 0;
3661
3662 static kern_return_t
3663 vm_map_enter_mem_object_helper(
3664 vm_map_t target_map,
3665 vm_map_offset_t *address,
3666 vm_map_size_t initial_size,
3667 vm_map_offset_t mask,
3668 int flags,
3669 vm_map_kernel_flags_t vmk_flags,
3670 vm_tag_t tag,
3671 ipc_port_t port,
3672 vm_object_offset_t offset,
3673 boolean_t copy,
3674 vm_prot_t cur_protection,
3675 vm_prot_t max_protection,
3676 vm_inherit_t inheritance,
3677 upl_page_list_ptr_t page_list,
3678 unsigned int page_list_count)
3679 {
3680 vm_map_address_t map_addr;
3681 vm_map_size_t map_size;
3682 vm_object_t object;
3683 vm_object_size_t size;
3684 kern_return_t result;
3685 boolean_t mask_cur_protection, mask_max_protection;
3686 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3687 vm_map_offset_t offset_in_mapping = 0;
3688 #if __arm64__
3689 boolean_t fourk = vmk_flags.vmkf_fourk;
3690 #endif /* __arm64__ */
3691
3692 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3693
3694 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3695 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3696 cur_protection &= ~VM_PROT_IS_MASK;
3697 max_protection &= ~VM_PROT_IS_MASK;
3698
3699 /*
3700 * Check arguments for validity
3701 */
3702 if ((target_map == VM_MAP_NULL) ||
3703 (cur_protection & ~VM_PROT_ALL) ||
3704 (max_protection & ~VM_PROT_ALL) ||
3705 (inheritance > VM_INHERIT_LAST_VALID) ||
3706 (try_prefault && (copy || !page_list)) ||
3707 initial_size == 0) {
3708 return KERN_INVALID_ARGUMENT;
3709 }
3710
3711 #if __arm64__
3712 if (fourk) {
3713 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3714 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3715 } else
3716 #endif /* __arm64__ */
3717 {
3718 map_addr = vm_map_trunc_page(*address,
3719 VM_MAP_PAGE_MASK(target_map));
3720 map_size = vm_map_round_page(initial_size,
3721 VM_MAP_PAGE_MASK(target_map));
3722 }
3723 size = vm_object_round_page(initial_size);
3724
3725 /*
3726 * Find the vm object (if any) corresponding to this port.
3727 */
3728 if (!IP_VALID(port)) {
3729 object = VM_OBJECT_NULL;
3730 offset = 0;
3731 copy = FALSE;
3732 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3733 vm_named_entry_t named_entry;
3734
3735 named_entry = (vm_named_entry_t) port->ip_kobject;
3736
3737 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3738 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3739 offset += named_entry->data_offset;
3740 }
3741
3742 /* a few checks to make sure user is obeying rules */
3743 if (size == 0) {
3744 if (offset >= named_entry->size)
3745 return KERN_INVALID_RIGHT;
3746 size = named_entry->size - offset;
3747 }
3748 if (mask_max_protection) {
3749 max_protection &= named_entry->protection;
3750 }
3751 if (mask_cur_protection) {
3752 cur_protection &= named_entry->protection;
3753 }
3754 if ((named_entry->protection & max_protection) !=
3755 max_protection)
3756 return KERN_INVALID_RIGHT;
3757 if ((named_entry->protection & cur_protection) !=
3758 cur_protection)
3759 return KERN_INVALID_RIGHT;
3760 if (offset + size < offset) {
3761 /* overflow */
3762 return KERN_INVALID_ARGUMENT;
3763 }
3764 if (named_entry->size < (offset + initial_size)) {
3765 return KERN_INVALID_ARGUMENT;
3766 }
3767
3768 if (named_entry->is_copy) {
3769 /* for a vm_map_copy, we can only map it whole */
3770 if ((size != named_entry->size) &&
3771 (vm_map_round_page(size,
3772 VM_MAP_PAGE_MASK(target_map)) ==
3773 named_entry->size)) {
3774 /* XXX FBDP use the rounded size... */
3775 size = vm_map_round_page(
3776 size,
3777 VM_MAP_PAGE_MASK(target_map));
3778 }
3779
3780 if (!(flags & VM_FLAGS_ANYWHERE) &&
3781 (offset != 0 ||
3782 size != named_entry->size)) {
3783 /*
3784 * XXX for a mapping at a "fixed" address,
3785 * we can't trim after mapping the whole
3786 * memory entry, so reject a request for a
3787 * partial mapping.
3788 */
3789 return KERN_INVALID_ARGUMENT;
3790 }
3791 }
3792
3793 /* the callers parameter offset is defined to be the */
3794 /* offset from beginning of named entry offset in object */
3795 offset = offset + named_entry->offset;
3796
3797 if (! VM_MAP_PAGE_ALIGNED(size,
3798 VM_MAP_PAGE_MASK(target_map))) {
3799 /*
3800 * Let's not map more than requested;
3801 * vm_map_enter() will handle this "not map-aligned"
3802 * case.
3803 */
3804 map_size = size;
3805 }
3806
3807 named_entry_lock(named_entry);
3808 if (named_entry->is_sub_map) {
3809 vm_map_t submap;
3810
3811 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3812 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3813 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3814 }
3815
3816 submap = named_entry->backing.map;
3817 vm_map_lock(submap);
3818 vm_map_reference(submap);
3819 vm_map_unlock(submap);
3820 named_entry_unlock(named_entry);
3821
3822 vmk_flags.vmkf_submap = TRUE;
3823
3824 result = vm_map_enter(target_map,
3825 &map_addr,
3826 map_size,
3827 mask,
3828 flags,
3829 vmk_flags,
3830 tag,
3831 (vm_object_t) submap,
3832 offset,
3833 copy,
3834 cur_protection,
3835 max_protection,
3836 inheritance);
3837 if (result != KERN_SUCCESS) {
3838 vm_map_deallocate(submap);
3839 } else {
3840 /*
3841 * No need to lock "submap" just to check its
3842 * "mapped" flag: that flag is never reset
3843 * once it's been set and if we race, we'll
3844 * just end up setting it twice, which is OK.
3845 */
3846 if (submap->mapped_in_other_pmaps == FALSE &&
3847 vm_map_pmap(submap) != PMAP_NULL &&
3848 vm_map_pmap(submap) !=
3849 vm_map_pmap(target_map)) {
3850 /*
3851 * This submap is being mapped in a map
3852 * that uses a different pmap.
3853 * Set its "mapped_in_other_pmaps" flag
3854 * to indicate that we now need to
3855 * remove mappings from all pmaps rather
3856 * than just the submap's pmap.
3857 */
3858 vm_map_lock(submap);
3859 submap->mapped_in_other_pmaps = TRUE;
3860 vm_map_unlock(submap);
3861 }
3862 *address = map_addr;
3863 }
3864 return result;
3865
3866 } else if (named_entry->is_copy) {
3867 kern_return_t kr;
3868 vm_map_copy_t copy_map;
3869 vm_map_entry_t copy_entry;
3870 vm_map_offset_t copy_addr;
3871
3872 if (flags & ~(VM_FLAGS_FIXED |
3873 VM_FLAGS_ANYWHERE |
3874 VM_FLAGS_OVERWRITE |
3875 VM_FLAGS_RETURN_4K_DATA_ADDR |
3876 VM_FLAGS_RETURN_DATA_ADDR |
3877 VM_FLAGS_ALIAS_MASK)) {
3878 named_entry_unlock(named_entry);
3879 return KERN_INVALID_ARGUMENT;
3880 }
3881
3882 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3883 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3884 offset_in_mapping = offset - vm_object_trunc_page(offset);
3885 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3886 offset_in_mapping &= ~((signed)(0xFFF));
3887 offset = vm_object_trunc_page(offset);
3888 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3889 }
3890
3891 copy_map = named_entry->backing.copy;
3892 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3893 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3894 /* unsupported type; should not happen */
3895 printf("vm_map_enter_mem_object: "
3896 "memory_entry->backing.copy "
3897 "unsupported type 0x%x\n",
3898 copy_map->type);
3899 named_entry_unlock(named_entry);
3900 return KERN_INVALID_ARGUMENT;
3901 }
3902
3903 /* reserve a contiguous range */
3904 kr = vm_map_enter(target_map,
3905 &map_addr,
3906 /* map whole mem entry, trim later: */
3907 named_entry->size,
3908 mask,
3909 flags & (VM_FLAGS_ANYWHERE |
3910 VM_FLAGS_OVERWRITE |
3911 VM_FLAGS_RETURN_4K_DATA_ADDR |
3912 VM_FLAGS_RETURN_DATA_ADDR),
3913 vmk_flags,
3914 tag,
3915 VM_OBJECT_NULL,
3916 0,
3917 FALSE, /* copy */
3918 cur_protection,
3919 max_protection,
3920 inheritance);
3921 if (kr != KERN_SUCCESS) {
3922 named_entry_unlock(named_entry);
3923 return kr;
3924 }
3925
3926 copy_addr = map_addr;
3927
3928 for (copy_entry = vm_map_copy_first_entry(copy_map);
3929 copy_entry != vm_map_copy_to_entry(copy_map);
3930 copy_entry = copy_entry->vme_next) {
3931 int remap_flags;
3932 vm_map_kernel_flags_t vmk_remap_flags;
3933 vm_map_t copy_submap;
3934 vm_object_t copy_object;
3935 vm_map_size_t copy_size;
3936 vm_object_offset_t copy_offset;
3937 int copy_vm_alias;
3938
3939 remap_flags = 0;
3940 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
3941
3942 copy_object = VME_OBJECT(copy_entry);
3943 copy_offset = VME_OFFSET(copy_entry);
3944 copy_size = (copy_entry->vme_end -
3945 copy_entry->vme_start);
3946 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3947 if (copy_vm_alias == 0) {
3948 /*
3949 * Caller does not want a specific
3950 * alias for this new mapping: use
3951 * the alias of the original mapping.
3952 */
3953 copy_vm_alias = VME_ALIAS(copy_entry);
3954 }
3955
3956 /* sanity check */
3957 if ((copy_addr + copy_size) >
3958 (map_addr +
3959 named_entry->size /* XXX full size */ )) {
3960 /* over-mapping too much !? */
3961 kr = KERN_INVALID_ARGUMENT;
3962 /* abort */
3963 break;
3964 }
3965
3966 /* take a reference on the object */
3967 if (copy_entry->is_sub_map) {
3968 vmk_remap_flags.vmkf_submap = TRUE;
3969 copy_submap = VME_SUBMAP(copy_entry);
3970 vm_map_lock(copy_submap);
3971 vm_map_reference(copy_submap);
3972 vm_map_unlock(copy_submap);
3973 copy_object = (vm_object_t) copy_submap;
3974 } else if (!copy &&
3975 copy_object != VM_OBJECT_NULL &&
3976 (copy_entry->needs_copy ||
3977 copy_object->shadowed ||
3978 (!copy_object->true_share &&
3979 !copy_entry->is_shared &&
3980 copy_object->vo_size > copy_size))) {
3981 /*
3982 * We need to resolve our side of this
3983 * "symmetric" copy-on-write now; we
3984 * need a new object to map and share,
3985 * instead of the current one which
3986 * might still be shared with the
3987 * original mapping.
3988 *
3989 * Note: A "vm_map_copy_t" does not
3990 * have a lock but we're protected by
3991 * the named entry's lock here.
3992 */
3993 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3994 VME_OBJECT_SHADOW(copy_entry, copy_size);
3995 if (!copy_entry->needs_copy &&
3996 copy_entry->protection & VM_PROT_WRITE) {
3997 vm_prot_t prot;
3998
3999 prot = copy_entry->protection & ~VM_PROT_WRITE;
4000 vm_object_pmap_protect(copy_object,
4001 copy_offset,
4002 copy_size,
4003 PMAP_NULL,
4004 0,
4005 prot);
4006 }
4007
4008 copy_entry->needs_copy = FALSE;
4009 copy_entry->is_shared = TRUE;
4010 copy_object = VME_OBJECT(copy_entry);
4011 copy_offset = VME_OFFSET(copy_entry);
4012 vm_object_lock(copy_object);
4013 vm_object_reference_locked(copy_object);
4014 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4015 /* we're about to make a shared mapping of this object */
4016 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4017 copy_object->true_share = TRUE;
4018 }
4019 vm_object_unlock(copy_object);
4020 } else {
4021 /*
4022 * We already have the right object
4023 * to map.
4024 */
4025 copy_object = VME_OBJECT(copy_entry);
4026 vm_object_reference(copy_object);
4027 }
4028
4029 /* over-map the object into destination */
4030 remap_flags |= flags;
4031 remap_flags |= VM_FLAGS_FIXED;
4032 remap_flags |= VM_FLAGS_OVERWRITE;
4033 remap_flags &= ~VM_FLAGS_ANYWHERE;
4034 if (!copy && !copy_entry->is_sub_map) {
4035 /*
4036 * copy-on-write should have been
4037 * resolved at this point, or we would
4038 * end up sharing instead of copying.
4039 */
4040 assert(!copy_entry->needs_copy);
4041 }
4042 kr = vm_map_enter(target_map,
4043 &copy_addr,
4044 copy_size,
4045 (vm_map_offset_t) 0,
4046 remap_flags,
4047 vmk_remap_flags,
4048 copy_vm_alias,
4049 copy_object,
4050 copy_offset,
4051 copy,
4052 cur_protection,
4053 max_protection,
4054 inheritance);
4055 if (kr != KERN_SUCCESS) {
4056 if (copy_entry->is_sub_map) {
4057 vm_map_deallocate(copy_submap);
4058 } else {
4059 vm_object_deallocate(copy_object);
4060 }
4061 /* abort */
4062 break;
4063 }
4064
4065 /* next mapping */
4066 copy_addr += copy_size;
4067 }
4068
4069 if (kr == KERN_SUCCESS) {
4070 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4071 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4072 *address = map_addr + offset_in_mapping;
4073 } else {
4074 *address = map_addr;
4075 }
4076
4077 if (offset) {
4078 /*
4079 * Trim in front, from 0 to "offset".
4080 */
4081 vm_map_remove(target_map,
4082 map_addr,
4083 map_addr + offset,
4084 0);
4085 *address += offset;
4086 }
4087 if (offset + map_size < named_entry->size) {
4088 /*
4089 * Trim in back, from
4090 * "offset + map_size" to
4091 * "named_entry->size".
4092 */
4093 vm_map_remove(target_map,
4094 (map_addr +
4095 offset + map_size),
4096 (map_addr +
4097 named_entry->size),
4098 0);
4099 }
4100 }
4101 named_entry_unlock(named_entry);
4102
4103 if (kr != KERN_SUCCESS) {
4104 if (! (flags & VM_FLAGS_OVERWRITE)) {
4105 /* deallocate the contiguous range */
4106 (void) vm_deallocate(target_map,
4107 map_addr,
4108 map_size);
4109 }
4110 }
4111
4112 return kr;
4113
4114 } else {
4115 unsigned int access;
4116 vm_prot_t protections;
4117 unsigned int wimg_mode;
4118
4119 /* we are mapping a VM object */
4120
4121 protections = named_entry->protection & VM_PROT_ALL;
4122 access = GET_MAP_MEM(named_entry->protection);
4123
4124 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4125 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4126 offset_in_mapping = offset - vm_object_trunc_page(offset);
4127 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4128 offset_in_mapping &= ~((signed)(0xFFF));
4129 offset = vm_object_trunc_page(offset);
4130 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4131 }
4132
4133 object = named_entry->backing.object;
4134 assert(object != VM_OBJECT_NULL);
4135 vm_object_lock(object);
4136 named_entry_unlock(named_entry);
4137
4138 vm_object_reference_locked(object);
4139
4140 wimg_mode = object->wimg_bits;
4141 vm_prot_to_wimg(access, &wimg_mode);
4142 if (object->wimg_bits != wimg_mode)
4143 vm_object_change_wimg_mode(object, wimg_mode);
4144
4145 vm_object_unlock(object);
4146 }
4147 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4148 /*
4149 * JMM - This is temporary until we unify named entries
4150 * and raw memory objects.
4151 *
4152 * Detected fake ip_kotype for a memory object. In
4153 * this case, the port isn't really a port at all, but
4154 * instead is just a raw memory object.
4155 */
4156 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4157 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4158 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4159 }
4160
4161 object = memory_object_to_vm_object((memory_object_t)port);
4162 if (object == VM_OBJECT_NULL)
4163 return KERN_INVALID_OBJECT;
4164 vm_object_reference(object);
4165
4166 /* wait for object (if any) to be ready */
4167 if (object != VM_OBJECT_NULL) {
4168 if (object == kernel_object) {
4169 printf("Warning: Attempt to map kernel object"
4170 " by a non-private kernel entity\n");
4171 return KERN_INVALID_OBJECT;
4172 }
4173 if (!object->pager_ready) {
4174 vm_object_lock(object);
4175
4176 while (!object->pager_ready) {
4177 vm_object_wait(object,
4178 VM_OBJECT_EVENT_PAGER_READY,
4179 THREAD_UNINT);
4180 vm_object_lock(object);
4181 }
4182 vm_object_unlock(object);
4183 }
4184 }
4185 } else {
4186 return KERN_INVALID_OBJECT;
4187 }
4188
4189 if (object != VM_OBJECT_NULL &&
4190 object->named &&
4191 object->pager != MEMORY_OBJECT_NULL &&
4192 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4193 memory_object_t pager;
4194 vm_prot_t pager_prot;
4195 kern_return_t kr;
4196
4197 /*
4198 * For "named" VM objects, let the pager know that the
4199 * memory object is being mapped. Some pagers need to keep
4200 * track of this, to know when they can reclaim the memory
4201 * object, for example.
4202 * VM calls memory_object_map() for each mapping (specifying
4203 * the protection of each mapping) and calls
4204 * memory_object_last_unmap() when all the mappings are gone.
4205 */
4206 pager_prot = max_protection;
4207 if (copy) {
4208 /*
4209 * Copy-On-Write mapping: won't modify the
4210 * memory object.
4211 */
4212 pager_prot &= ~VM_PROT_WRITE;
4213 }
4214 vm_object_lock(object);
4215 pager = object->pager;
4216 if (object->named &&
4217 pager != MEMORY_OBJECT_NULL &&
4218 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4219 assert(object->pager_ready);
4220 vm_object_mapping_wait(object, THREAD_UNINT);
4221 vm_object_mapping_begin(object);
4222 vm_object_unlock(object);
4223
4224 kr = memory_object_map(pager, pager_prot);
4225 assert(kr == KERN_SUCCESS);
4226
4227 vm_object_lock(object);
4228 vm_object_mapping_end(object);
4229 }
4230 vm_object_unlock(object);
4231 }
4232
4233 /*
4234 * Perform the copy if requested
4235 */
4236
4237 if (copy) {
4238 vm_object_t new_object;
4239 vm_object_offset_t new_offset;
4240
4241 result = vm_object_copy_strategically(object, offset,
4242 map_size,
4243 &new_object, &new_offset,
4244 &copy);
4245
4246
4247 if (result == KERN_MEMORY_RESTART_COPY) {
4248 boolean_t success;
4249 boolean_t src_needs_copy;
4250
4251 /*
4252 * XXX
4253 * We currently ignore src_needs_copy.
4254 * This really is the issue of how to make
4255 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4256 * non-kernel users to use. Solution forthcoming.
4257 * In the meantime, since we don't allow non-kernel
4258 * memory managers to specify symmetric copy,
4259 * we won't run into problems here.
4260 */
4261 new_object = object;
4262 new_offset = offset;
4263 success = vm_object_copy_quickly(&new_object,
4264 new_offset,
4265 map_size,
4266 &src_needs_copy,
4267 &copy);
4268 assert(success);
4269 result = KERN_SUCCESS;
4270 }
4271 /*
4272 * Throw away the reference to the
4273 * original object, as it won't be mapped.
4274 */
4275
4276 vm_object_deallocate(object);
4277
4278 if (result != KERN_SUCCESS) {
4279 return result;
4280 }
4281
4282 object = new_object;
4283 offset = new_offset;
4284 }
4285
4286 /*
4287 * If non-kernel users want to try to prefault pages, the mapping and prefault
4288 * needs to be atomic.
4289 */
4290 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4291 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4292
4293 #if __arm64__
4294 if (fourk) {
4295 /* map this object in a "4K" pager */
4296 result = vm_map_enter_fourk(target_map,
4297 &map_addr,
4298 map_size,
4299 (vm_map_offset_t) mask,
4300 flags,
4301 vmk_flags,
4302 tag,
4303 object,
4304 offset,
4305 copy,
4306 cur_protection,
4307 max_protection,
4308 inheritance);
4309 } else
4310 #endif /* __arm64__ */
4311 {
4312 result = vm_map_enter(target_map,
4313 &map_addr, map_size,
4314 (vm_map_offset_t)mask,
4315 flags,
4316 vmk_flags,
4317 tag,
4318 object, offset,
4319 copy,
4320 cur_protection, max_protection,
4321 inheritance);
4322 }
4323 if (result != KERN_SUCCESS)
4324 vm_object_deallocate(object);
4325
4326 /*
4327 * Try to prefault, and do not forget to release the vm map lock.
4328 */
4329 if (result == KERN_SUCCESS && try_prefault) {
4330 mach_vm_address_t va = map_addr;
4331 kern_return_t kr = KERN_SUCCESS;
4332 unsigned int i = 0;
4333 int pmap_options;
4334
4335 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4336 if (object->internal) {
4337 pmap_options |= PMAP_OPTIONS_INTERNAL;
4338 }
4339
4340 for (i = 0; i < page_list_count; ++i) {
4341 if (!UPL_VALID_PAGE(page_list, i)) {
4342 if (kernel_prefault) {
4343 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4344 result = KERN_MEMORY_ERROR;
4345 break;
4346 }
4347 } else {
4348 /*
4349 * If this function call failed, we should stop
4350 * trying to optimize, other calls are likely
4351 * going to fail too.
4352 *
4353 * We are not gonna report an error for such
4354 * failure though. That's an optimization, not
4355 * something critical.
4356 */
4357 kr = pmap_enter_options(target_map->pmap,
4358 va, UPL_PHYS_PAGE(page_list, i),
4359 cur_protection, VM_PROT_NONE,
4360 0, TRUE, pmap_options, NULL);
4361 if (kr != KERN_SUCCESS) {
4362 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4363 if (kernel_prefault) {
4364 result = kr;
4365 }
4366 break;
4367 }
4368 OSIncrementAtomic64(&vm_prefault_nb_pages);
4369 }
4370
4371 /* Next virtual address */
4372 va += PAGE_SIZE;
4373 }
4374 if (vmk_flags.vmkf_keep_map_locked) {
4375 vm_map_unlock(target_map);
4376 }
4377 }
4378
4379 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4380 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4381 *address = map_addr + offset_in_mapping;
4382 } else {
4383 *address = map_addr;
4384 }
4385 return result;
4386 }
4387
4388 kern_return_t
4389 vm_map_enter_mem_object(
4390 vm_map_t target_map,
4391 vm_map_offset_t *address,
4392 vm_map_size_t initial_size,
4393 vm_map_offset_t mask,
4394 int flags,
4395 vm_map_kernel_flags_t vmk_flags,
4396 vm_tag_t tag,
4397 ipc_port_t port,
4398 vm_object_offset_t offset,
4399 boolean_t copy,
4400 vm_prot_t cur_protection,
4401 vm_prot_t max_protection,
4402 vm_inherit_t inheritance)
4403 {
4404 kern_return_t ret;
4405
4406 ret = vm_map_enter_mem_object_helper(target_map,
4407 address,
4408 initial_size,
4409 mask,
4410 flags,
4411 vmk_flags,
4412 tag,
4413 port,
4414 offset,
4415 copy,
4416 cur_protection,
4417 max_protection,
4418 inheritance,
4419 NULL,
4420 0);
4421
4422 #if KASAN
4423 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4424 kasan_notify_address(*address, initial_size);
4425 }
4426 #endif
4427
4428 return ret;
4429 }
4430
4431 kern_return_t
4432 vm_map_enter_mem_object_prefault(
4433 vm_map_t target_map,
4434 vm_map_offset_t *address,
4435 vm_map_size_t initial_size,
4436 vm_map_offset_t mask,
4437 int flags,
4438 vm_map_kernel_flags_t vmk_flags,
4439 vm_tag_t tag,
4440 ipc_port_t port,
4441 vm_object_offset_t offset,
4442 vm_prot_t cur_protection,
4443 vm_prot_t max_protection,
4444 upl_page_list_ptr_t page_list,
4445 unsigned int page_list_count)
4446 {
4447 kern_return_t ret;
4448
4449 ret = vm_map_enter_mem_object_helper(target_map,
4450 address,
4451 initial_size,
4452 mask,
4453 flags,
4454 vmk_flags,
4455 tag,
4456 port,
4457 offset,
4458 FALSE,
4459 cur_protection,
4460 max_protection,
4461 VM_INHERIT_DEFAULT,
4462 page_list,
4463 page_list_count);
4464
4465 #if KASAN
4466 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4467 kasan_notify_address(*address, initial_size);
4468 }
4469 #endif
4470
4471 return ret;
4472 }
4473
4474
4475 kern_return_t
4476 vm_map_enter_mem_object_control(
4477 vm_map_t target_map,
4478 vm_map_offset_t *address,
4479 vm_map_size_t initial_size,
4480 vm_map_offset_t mask,
4481 int flags,
4482 vm_map_kernel_flags_t vmk_flags,
4483 vm_tag_t tag,
4484 memory_object_control_t control,
4485 vm_object_offset_t offset,
4486 boolean_t copy,
4487 vm_prot_t cur_protection,
4488 vm_prot_t max_protection,
4489 vm_inherit_t inheritance)
4490 {
4491 vm_map_address_t map_addr;
4492 vm_map_size_t map_size;
4493 vm_object_t object;
4494 vm_object_size_t size;
4495 kern_return_t result;
4496 memory_object_t pager;
4497 vm_prot_t pager_prot;
4498 kern_return_t kr;
4499 #if __arm64__
4500 boolean_t fourk = vmk_flags.vmkf_fourk;
4501 #endif /* __arm64__ */
4502
4503 /*
4504 * Check arguments for validity
4505 */
4506 if ((target_map == VM_MAP_NULL) ||
4507 (cur_protection & ~VM_PROT_ALL) ||
4508 (max_protection & ~VM_PROT_ALL) ||
4509 (inheritance > VM_INHERIT_LAST_VALID) ||
4510 initial_size == 0) {
4511 return KERN_INVALID_ARGUMENT;
4512 }
4513
4514 #if __arm64__
4515 if (fourk) {
4516 map_addr = vm_map_trunc_page(*address,
4517 FOURK_PAGE_MASK);
4518 map_size = vm_map_round_page(initial_size,
4519 FOURK_PAGE_MASK);
4520 } else
4521 #endif /* __arm64__ */
4522 {
4523 map_addr = vm_map_trunc_page(*address,
4524 VM_MAP_PAGE_MASK(target_map));
4525 map_size = vm_map_round_page(initial_size,
4526 VM_MAP_PAGE_MASK(target_map));
4527 }
4528 size = vm_object_round_page(initial_size);
4529
4530 object = memory_object_control_to_vm_object(control);
4531
4532 if (object == VM_OBJECT_NULL)
4533 return KERN_INVALID_OBJECT;
4534
4535 if (object == kernel_object) {
4536 printf("Warning: Attempt to map kernel object"
4537 " by a non-private kernel entity\n");
4538 return KERN_INVALID_OBJECT;
4539 }
4540
4541 vm_object_lock(object);
4542 object->ref_count++;
4543 vm_object_res_reference(object);
4544
4545 /*
4546 * For "named" VM objects, let the pager know that the
4547 * memory object is being mapped. Some pagers need to keep
4548 * track of this, to know when they can reclaim the memory
4549 * object, for example.
4550 * VM calls memory_object_map() for each mapping (specifying
4551 * the protection of each mapping) and calls
4552 * memory_object_last_unmap() when all the mappings are gone.
4553 */
4554 pager_prot = max_protection;
4555 if (copy) {
4556 pager_prot &= ~VM_PROT_WRITE;
4557 }
4558 pager = object->pager;
4559 if (object->named &&
4560 pager != MEMORY_OBJECT_NULL &&
4561 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4562 assert(object->pager_ready);
4563 vm_object_mapping_wait(object, THREAD_UNINT);
4564 vm_object_mapping_begin(object);
4565 vm_object_unlock(object);
4566
4567 kr = memory_object_map(pager, pager_prot);
4568 assert(kr == KERN_SUCCESS);
4569
4570 vm_object_lock(object);
4571 vm_object_mapping_end(object);
4572 }
4573 vm_object_unlock(object);
4574
4575 /*
4576 * Perform the copy if requested
4577 */
4578
4579 if (copy) {
4580 vm_object_t new_object;
4581 vm_object_offset_t new_offset;
4582
4583 result = vm_object_copy_strategically(object, offset, size,
4584 &new_object, &new_offset,
4585 &copy);
4586
4587
4588 if (result == KERN_MEMORY_RESTART_COPY) {
4589 boolean_t success;
4590 boolean_t src_needs_copy;
4591
4592 /*
4593 * XXX
4594 * We currently ignore src_needs_copy.
4595 * This really is the issue of how to make
4596 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4597 * non-kernel users to use. Solution forthcoming.
4598 * In the meantime, since we don't allow non-kernel
4599 * memory managers to specify symmetric copy,
4600 * we won't run into problems here.
4601 */
4602 new_object = object;
4603 new_offset = offset;
4604 success = vm_object_copy_quickly(&new_object,
4605 new_offset, size,
4606 &src_needs_copy,
4607 &copy);
4608 assert(success);
4609 result = KERN_SUCCESS;
4610 }
4611 /*
4612 * Throw away the reference to the
4613 * original object, as it won't be mapped.
4614 */
4615
4616 vm_object_deallocate(object);
4617
4618 if (result != KERN_SUCCESS) {
4619 return result;
4620 }
4621
4622 object = new_object;
4623 offset = new_offset;
4624 }
4625
4626 #if __arm64__
4627 if (fourk) {
4628 result = vm_map_enter_fourk(target_map,
4629 &map_addr,
4630 map_size,
4631 (vm_map_offset_t)mask,
4632 flags,
4633 vmk_flags,
4634 tag,
4635 object, offset,
4636 copy,
4637 cur_protection, max_protection,
4638 inheritance);
4639 } else
4640 #endif /* __arm64__ */
4641 {
4642 result = vm_map_enter(target_map,
4643 &map_addr, map_size,
4644 (vm_map_offset_t)mask,
4645 flags,
4646 vmk_flags,
4647 tag,
4648 object, offset,
4649 copy,
4650 cur_protection, max_protection,
4651 inheritance);
4652 }
4653 if (result != KERN_SUCCESS)
4654 vm_object_deallocate(object);
4655 *address = map_addr;
4656
4657 return result;
4658 }
4659
4660
4661 #if VM_CPM
4662
4663 #ifdef MACH_ASSERT
4664 extern pmap_paddr_t avail_start, avail_end;
4665 #endif
4666
4667 /*
4668 * Allocate memory in the specified map, with the caveat that
4669 * the memory is physically contiguous. This call may fail
4670 * if the system can't find sufficient contiguous memory.
4671 * This call may cause or lead to heart-stopping amounts of
4672 * paging activity.
4673 *
4674 * Memory obtained from this call should be freed in the
4675 * normal way, viz., via vm_deallocate.
4676 */
4677 kern_return_t
4678 vm_map_enter_cpm(
4679 vm_map_t map,
4680 vm_map_offset_t *addr,
4681 vm_map_size_t size,
4682 int flags)
4683 {
4684 vm_object_t cpm_obj;
4685 pmap_t pmap;
4686 vm_page_t m, pages;
4687 kern_return_t kr;
4688 vm_map_offset_t va, start, end, offset;
4689 #if MACH_ASSERT
4690 vm_map_offset_t prev_addr = 0;
4691 #endif /* MACH_ASSERT */
4692
4693 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4694 vm_tag_t tag;
4695
4696 VM_GET_FLAGS_ALIAS(flags, tag);
4697
4698 if (size == 0) {
4699 *addr = 0;
4700 return KERN_SUCCESS;
4701 }
4702 if (anywhere)
4703 *addr = vm_map_min(map);
4704 else
4705 *addr = vm_map_trunc_page(*addr,
4706 VM_MAP_PAGE_MASK(map));
4707 size = vm_map_round_page(size,
4708 VM_MAP_PAGE_MASK(map));
4709
4710 /*
4711 * LP64todo - cpm_allocate should probably allow
4712 * allocations of >4GB, but not with the current
4713 * algorithm, so just cast down the size for now.
4714 */
4715 if (size > VM_MAX_ADDRESS)
4716 return KERN_RESOURCE_SHORTAGE;
4717 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
4718 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
4719 return kr;
4720
4721 cpm_obj = vm_object_allocate((vm_object_size_t)size);
4722 assert(cpm_obj != VM_OBJECT_NULL);
4723 assert(cpm_obj->internal);
4724 assert(cpm_obj->vo_size == (vm_object_size_t)size);
4725 assert(cpm_obj->can_persist == FALSE);
4726 assert(cpm_obj->pager_created == FALSE);
4727 assert(cpm_obj->pageout == FALSE);
4728 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4729
4730 /*
4731 * Insert pages into object.
4732 */
4733
4734 vm_object_lock(cpm_obj);
4735 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4736 m = pages;
4737 pages = NEXT_PAGE(m);
4738 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
4739
4740 assert(!m->gobbled);
4741 assert(!m->wanted);
4742 assert(!m->pageout);
4743 assert(!m->tabled);
4744 assert(VM_PAGE_WIRED(m));
4745 assert(m->busy);
4746 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
4747
4748 m->busy = FALSE;
4749 vm_page_insert(m, cpm_obj, offset);
4750 }
4751 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4752 vm_object_unlock(cpm_obj);
4753
4754 /*
4755 * Hang onto a reference on the object in case a
4756 * multi-threaded application for some reason decides
4757 * to deallocate the portion of the address space into
4758 * which we will insert this object.
4759 *
4760 * Unfortunately, we must insert the object now before
4761 * we can talk to the pmap module about which addresses
4762 * must be wired down. Hence, the race with a multi-
4763 * threaded app.
4764 */
4765 vm_object_reference(cpm_obj);
4766
4767 /*
4768 * Insert object into map.
4769 */
4770
4771 kr = vm_map_enter(
4772 map,
4773 addr,
4774 size,
4775 (vm_map_offset_t)0,
4776 flags,
4777 VM_MAP_KERNEL_FLAGS_NONE,
4778 cpm_obj,
4779 (vm_object_offset_t)0,
4780 FALSE,
4781 VM_PROT_ALL,
4782 VM_PROT_ALL,
4783 VM_INHERIT_DEFAULT);
4784
4785 if (kr != KERN_SUCCESS) {
4786 /*
4787 * A CPM object doesn't have can_persist set,
4788 * so all we have to do is deallocate it to
4789 * free up these pages.
4790 */
4791 assert(cpm_obj->pager_created == FALSE);
4792 assert(cpm_obj->can_persist == FALSE);
4793 assert(cpm_obj->pageout == FALSE);
4794 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4795 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4796 vm_object_deallocate(cpm_obj); /* kill creation ref */
4797 }
4798
4799 /*
4800 * Inform the physical mapping system that the
4801 * range of addresses may not fault, so that
4802 * page tables and such can be locked down as well.
4803 */
4804 start = *addr;
4805 end = start + size;
4806 pmap = vm_map_pmap(map);
4807 pmap_pageable(pmap, start, end, FALSE);
4808
4809 /*
4810 * Enter each page into the pmap, to avoid faults.
4811 * Note that this loop could be coded more efficiently,
4812 * if the need arose, rather than looking up each page
4813 * again.
4814 */
4815 for (offset = 0, va = start; offset < size;
4816 va += PAGE_SIZE, offset += PAGE_SIZE) {
4817 int type_of_fault;
4818
4819 vm_object_lock(cpm_obj);
4820 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4821 assert(m != VM_PAGE_NULL);
4822
4823 vm_page_zero_fill(m);
4824
4825 type_of_fault = DBG_ZERO_FILL_FAULT;
4826
4827 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
4828 VM_PAGE_WIRED(m),
4829 FALSE, /* change_wiring */
4830 VM_KERN_MEMORY_NONE, /* tag - not wiring */
4831 FALSE, /* no_cache */
4832 FALSE, /* cs_bypass */
4833 0, /* user_tag */
4834 0, /* pmap_options */
4835 NULL, /* need_retry */
4836 &type_of_fault);
4837
4838 vm_object_unlock(cpm_obj);
4839 }
4840
4841 #if MACH_ASSERT
4842 /*
4843 * Verify ordering in address space.
4844 */
4845 for (offset = 0; offset < size; offset += PAGE_SIZE) {
4846 vm_object_lock(cpm_obj);
4847 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4848 vm_object_unlock(cpm_obj);
4849 if (m == VM_PAGE_NULL)
4850 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
4851 cpm_obj, (uint64_t)offset);
4852 assert(m->tabled);
4853 assert(!m->busy);
4854 assert(!m->wanted);
4855 assert(!m->fictitious);
4856 assert(!m->private);
4857 assert(!m->absent);
4858 assert(!m->error);
4859 assert(!m->cleaning);
4860 assert(!m->laundry);
4861 assert(!m->precious);
4862 assert(!m->clustered);
4863 if (offset != 0) {
4864 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4865 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4866 (uint64_t)start, (uint64_t)end, (uint64_t)va);
4867 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4868 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
4869 panic("vm_allocate_cpm: pages not contig!");
4870 }
4871 }
4872 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4873 }
4874 #endif /* MACH_ASSERT */
4875
4876 vm_object_deallocate(cpm_obj); /* kill extra ref */
4877
4878 return kr;
4879 }
4880
4881
4882 #else /* VM_CPM */
4883
4884 /*
4885 * Interface is defined in all cases, but unless the kernel
4886 * is built explicitly for this option, the interface does
4887 * nothing.
4888 */
4889
4890 kern_return_t
4891 vm_map_enter_cpm(
4892 __unused vm_map_t map,
4893 __unused vm_map_offset_t *addr,
4894 __unused vm_map_size_t size,
4895 __unused int flags)
4896 {
4897 return KERN_FAILURE;
4898 }
4899 #endif /* VM_CPM */
4900
4901 /* Not used without nested pmaps */
4902 #ifndef NO_NESTED_PMAP
4903 /*
4904 * Clip and unnest a portion of a nested submap mapping.
4905 */
4906
4907
4908 static void
4909 vm_map_clip_unnest(
4910 vm_map_t map,
4911 vm_map_entry_t entry,
4912 vm_map_offset_t start_unnest,
4913 vm_map_offset_t end_unnest)
4914 {
4915 vm_map_offset_t old_start_unnest = start_unnest;
4916 vm_map_offset_t old_end_unnest = end_unnest;
4917
4918 assert(entry->is_sub_map);
4919 assert(VME_SUBMAP(entry) != NULL);
4920 assert(entry->use_pmap);
4921
4922 /*
4923 * Query the platform for the optimal unnest range.
4924 * DRK: There's some duplication of effort here, since
4925 * callers may have adjusted the range to some extent. This
4926 * routine was introduced to support 1GiB subtree nesting
4927 * for x86 platforms, which can also nest on 2MiB boundaries
4928 * depending on size/alignment.
4929 */
4930 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
4931 assert(VME_SUBMAP(entry)->is_nested_map);
4932 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4933 log_unnest_badness(map,
4934 old_start_unnest,
4935 old_end_unnest,
4936 VME_SUBMAP(entry)->is_nested_map,
4937 (entry->vme_start +
4938 VME_SUBMAP(entry)->lowest_unnestable_start -
4939 VME_OFFSET(entry)));
4940 }
4941
4942 if (entry->vme_start > start_unnest ||
4943 entry->vme_end < end_unnest) {
4944 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4945 "bad nested entry: start=0x%llx end=0x%llx\n",
4946 (long long)start_unnest, (long long)end_unnest,
4947 (long long)entry->vme_start, (long long)entry->vme_end);
4948 }
4949
4950 if (start_unnest > entry->vme_start) {
4951 _vm_map_clip_start(&map->hdr,
4952 entry,
4953 start_unnest);
4954 if (map->holelistenabled) {
4955 vm_map_store_update_first_free(map, NULL, FALSE);
4956 } else {
4957 vm_map_store_update_first_free(map, map->first_free, FALSE);
4958 }
4959 }
4960 if (entry->vme_end > end_unnest) {
4961 _vm_map_clip_end(&map->hdr,
4962 entry,
4963 end_unnest);
4964 if (map->holelistenabled) {
4965 vm_map_store_update_first_free(map, NULL, FALSE);
4966 } else {
4967 vm_map_store_update_first_free(map, map->first_free, FALSE);
4968 }
4969 }
4970
4971 pmap_unnest(map->pmap,
4972 entry->vme_start,
4973 entry->vme_end - entry->vme_start);
4974 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
4975 /* clean up parent map/maps */
4976 vm_map_submap_pmap_clean(
4977 map, entry->vme_start,
4978 entry->vme_end,
4979 VME_SUBMAP(entry),
4980 VME_OFFSET(entry));
4981 }
4982 entry->use_pmap = FALSE;
4983 if ((map->pmap != kernel_pmap) &&
4984 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4985 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
4986 }
4987 }
4988 #endif /* NO_NESTED_PMAP */
4989
4990 /*
4991 * vm_map_clip_start: [ internal use only ]
4992 *
4993 * Asserts that the given entry begins at or after
4994 * the specified address; if necessary,
4995 * it splits the entry into two.
4996 */
4997 void
4998 vm_map_clip_start(
4999 vm_map_t map,
5000 vm_map_entry_t entry,
5001 vm_map_offset_t startaddr)
5002 {
5003 #ifndef NO_NESTED_PMAP
5004 if (entry->is_sub_map &&
5005 entry->use_pmap &&
5006 startaddr >= entry->vme_start) {
5007 vm_map_offset_t start_unnest, end_unnest;
5008
5009 /*
5010 * Make sure "startaddr" is no longer in a nested range
5011 * before we clip. Unnest only the minimum range the platform
5012 * can handle.
5013 * vm_map_clip_unnest may perform additional adjustments to
5014 * the unnest range.
5015 */
5016 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5017 end_unnest = start_unnest + pmap_nesting_size_min;
5018 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5019 }
5020 #endif /* NO_NESTED_PMAP */
5021 if (startaddr > entry->vme_start) {
5022 if (VME_OBJECT(entry) &&
5023 !entry->is_sub_map &&
5024 VME_OBJECT(entry)->phys_contiguous) {
5025 pmap_remove(map->pmap,
5026 (addr64_t)(entry->vme_start),
5027 (addr64_t)(entry->vme_end));
5028 }
5029 if (entry->vme_atomic) {
5030 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5031 }
5032 _vm_map_clip_start(&map->hdr, entry, startaddr);
5033 if (map->holelistenabled) {
5034 vm_map_store_update_first_free(map, NULL, FALSE);
5035 } else {
5036 vm_map_store_update_first_free(map, map->first_free, FALSE);
5037 }
5038 }
5039 }
5040
5041
5042 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5043 MACRO_BEGIN \
5044 if ((startaddr) > (entry)->vme_start) \
5045 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5046 MACRO_END
5047
5048 /*
5049 * This routine is called only when it is known that
5050 * the entry must be split.
5051 */
5052 static void
5053 _vm_map_clip_start(
5054 struct vm_map_header *map_header,
5055 vm_map_entry_t entry,
5056 vm_map_offset_t start)
5057 {
5058 vm_map_entry_t new_entry;
5059
5060 /*
5061 * Split off the front portion --
5062 * note that we must insert the new
5063 * entry BEFORE this one, so that
5064 * this entry has the specified starting
5065 * address.
5066 */
5067
5068 if (entry->map_aligned) {
5069 assert(VM_MAP_PAGE_ALIGNED(start,
5070 VM_MAP_HDR_PAGE_MASK(map_header)));
5071 }
5072
5073 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5074 vm_map_entry_copy_full(new_entry, entry);
5075
5076 new_entry->vme_end = start;
5077 assert(new_entry->vme_start < new_entry->vme_end);
5078 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5079 assert(start < entry->vme_end);
5080 entry->vme_start = start;
5081
5082 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5083
5084 if (entry->is_sub_map)
5085 vm_map_reference(VME_SUBMAP(new_entry));
5086 else
5087 vm_object_reference(VME_OBJECT(new_entry));
5088 }
5089
5090
5091 /*
5092 * vm_map_clip_end: [ internal use only ]
5093 *
5094 * Asserts that the given entry ends at or before
5095 * the specified address; if necessary,
5096 * it splits the entry into two.
5097 */
5098 void
5099 vm_map_clip_end(
5100 vm_map_t map,
5101 vm_map_entry_t entry,
5102 vm_map_offset_t endaddr)
5103 {
5104 if (endaddr > entry->vme_end) {
5105 /*
5106 * Within the scope of this clipping, limit "endaddr" to
5107 * the end of this map entry...
5108 */
5109 endaddr = entry->vme_end;
5110 }
5111 #ifndef NO_NESTED_PMAP
5112 if (entry->is_sub_map && entry->use_pmap) {
5113 vm_map_offset_t start_unnest, end_unnest;
5114
5115 /*
5116 * Make sure the range between the start of this entry and
5117 * the new "endaddr" is no longer nested before we clip.
5118 * Unnest only the minimum range the platform can handle.
5119 * vm_map_clip_unnest may perform additional adjustments to
5120 * the unnest range.
5121 */
5122 start_unnest = entry->vme_start;
5123 end_unnest =
5124 (endaddr + pmap_nesting_size_min - 1) &
5125 ~(pmap_nesting_size_min - 1);
5126 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5127 }
5128 #endif /* NO_NESTED_PMAP */
5129 if (endaddr < entry->vme_end) {
5130 if (VME_OBJECT(entry) &&
5131 !entry->is_sub_map &&
5132 VME_OBJECT(entry)->phys_contiguous) {
5133 pmap_remove(map->pmap,
5134 (addr64_t)(entry->vme_start),
5135 (addr64_t)(entry->vme_end));
5136 }
5137 if (entry->vme_atomic) {
5138 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5139 }
5140 _vm_map_clip_end(&map->hdr, entry, endaddr);
5141 if (map->holelistenabled) {
5142 vm_map_store_update_first_free(map, NULL, FALSE);
5143 } else {
5144 vm_map_store_update_first_free(map, map->first_free, FALSE);
5145 }
5146 }
5147 }
5148
5149
5150 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5151 MACRO_BEGIN \
5152 if ((endaddr) < (entry)->vme_end) \
5153 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5154 MACRO_END
5155
5156 /*
5157 * This routine is called only when it is known that
5158 * the entry must be split.
5159 */
5160 static void
5161 _vm_map_clip_end(
5162 struct vm_map_header *map_header,
5163 vm_map_entry_t entry,
5164 vm_map_offset_t end)
5165 {
5166 vm_map_entry_t new_entry;
5167
5168 /*
5169 * Create a new entry and insert it
5170 * AFTER the specified entry
5171 */
5172
5173 if (entry->map_aligned) {
5174 assert(VM_MAP_PAGE_ALIGNED(end,
5175 VM_MAP_HDR_PAGE_MASK(map_header)));
5176 }
5177
5178 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5179 vm_map_entry_copy_full(new_entry, entry);
5180
5181 assert(entry->vme_start < end);
5182 new_entry->vme_start = entry->vme_end = end;
5183 VME_OFFSET_SET(new_entry,
5184 VME_OFFSET(new_entry) + (end - entry->vme_start));
5185 assert(new_entry->vme_start < new_entry->vme_end);
5186
5187 _vm_map_store_entry_link(map_header, entry, new_entry);
5188
5189 if (entry->is_sub_map)
5190 vm_map_reference(VME_SUBMAP(new_entry));
5191 else
5192 vm_object_reference(VME_OBJECT(new_entry));
5193 }
5194
5195
5196 /*
5197 * VM_MAP_RANGE_CHECK: [ internal use only ]
5198 *
5199 * Asserts that the starting and ending region
5200 * addresses fall within the valid range of the map.
5201 */
5202 #define VM_MAP_RANGE_CHECK(map, start, end) \
5203 MACRO_BEGIN \
5204 if (start < vm_map_min(map)) \
5205 start = vm_map_min(map); \
5206 if (end > vm_map_max(map)) \
5207 end = vm_map_max(map); \
5208 if (start > end) \
5209 start = end; \
5210 MACRO_END
5211
5212 /*
5213 * vm_map_range_check: [ internal use only ]
5214 *
5215 * Check that the region defined by the specified start and
5216 * end addresses are wholly contained within a single map
5217 * entry or set of adjacent map entries of the spacified map,
5218 * i.e. the specified region contains no unmapped space.
5219 * If any or all of the region is unmapped, FALSE is returned.
5220 * Otherwise, TRUE is returned and if the output argument 'entry'
5221 * is not NULL it points to the map entry containing the start
5222 * of the region.
5223 *
5224 * The map is locked for reading on entry and is left locked.
5225 */
5226 static boolean_t
5227 vm_map_range_check(
5228 vm_map_t map,
5229 vm_map_offset_t start,
5230 vm_map_offset_t end,
5231 vm_map_entry_t *entry)
5232 {
5233 vm_map_entry_t cur;
5234 vm_map_offset_t prev;
5235
5236 /*
5237 * Basic sanity checks first
5238 */
5239 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5240 return (FALSE);
5241
5242 /*
5243 * Check first if the region starts within a valid
5244 * mapping for the map.
5245 */
5246 if (!vm_map_lookup_entry(map, start, &cur))
5247 return (FALSE);
5248
5249 /*
5250 * Optimize for the case that the region is contained
5251 * in a single map entry.
5252 */
5253 if (entry != (vm_map_entry_t *) NULL)
5254 *entry = cur;
5255 if (end <= cur->vme_end)
5256 return (TRUE);
5257
5258 /*
5259 * If the region is not wholly contained within a
5260 * single entry, walk the entries looking for holes.
5261 */
5262 prev = cur->vme_end;
5263 cur = cur->vme_next;
5264 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5265 if (end <= cur->vme_end)
5266 return (TRUE);
5267 prev = cur->vme_end;
5268 cur = cur->vme_next;
5269 }
5270 return (FALSE);
5271 }
5272
5273 /*
5274 * vm_map_submap: [ kernel use only ]
5275 *
5276 * Mark the given range as handled by a subordinate map.
5277 *
5278 * This range must have been created with vm_map_find using
5279 * the vm_submap_object, and no other operations may have been
5280 * performed on this range prior to calling vm_map_submap.
5281 *
5282 * Only a limited number of operations can be performed
5283 * within this rage after calling vm_map_submap:
5284 * vm_fault
5285 * [Don't try vm_map_copyin!]
5286 *
5287 * To remove a submapping, one must first remove the
5288 * range from the superior map, and then destroy the
5289 * submap (if desired). [Better yet, don't try it.]
5290 */
5291 kern_return_t
5292 vm_map_submap(
5293 vm_map_t map,
5294 vm_map_offset_t start,
5295 vm_map_offset_t end,
5296 vm_map_t submap,
5297 vm_map_offset_t offset,
5298 #ifdef NO_NESTED_PMAP
5299 __unused
5300 #endif /* NO_NESTED_PMAP */
5301 boolean_t use_pmap)
5302 {
5303 vm_map_entry_t entry;
5304 kern_return_t result = KERN_INVALID_ARGUMENT;
5305 vm_object_t object;
5306
5307 vm_map_lock(map);
5308
5309 if (! vm_map_lookup_entry(map, start, &entry)) {
5310 entry = entry->vme_next;
5311 }
5312
5313 if (entry == vm_map_to_entry(map) ||
5314 entry->is_sub_map) {
5315 vm_map_unlock(map);
5316 return KERN_INVALID_ARGUMENT;
5317 }
5318
5319 vm_map_clip_start(map, entry, start);
5320 vm_map_clip_end(map, entry, end);
5321
5322 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5323 (!entry->is_sub_map) &&
5324 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5325 (object->resident_page_count == 0) &&
5326 (object->copy == VM_OBJECT_NULL) &&
5327 (object->shadow == VM_OBJECT_NULL) &&
5328 (!object->pager_created)) {
5329 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5330 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5331 vm_object_deallocate(object);
5332 entry->is_sub_map = TRUE;
5333 entry->use_pmap = FALSE;
5334 VME_SUBMAP_SET(entry, submap);
5335 vm_map_reference(submap);
5336 if (submap->mapped_in_other_pmaps == FALSE &&
5337 vm_map_pmap(submap) != PMAP_NULL &&
5338 vm_map_pmap(submap) != vm_map_pmap(map)) {
5339 /*
5340 * This submap is being mapped in a map
5341 * that uses a different pmap.
5342 * Set its "mapped_in_other_pmaps" flag
5343 * to indicate that we now need to
5344 * remove mappings from all pmaps rather
5345 * than just the submap's pmap.
5346 */
5347 submap->mapped_in_other_pmaps = TRUE;
5348 }
5349
5350 #ifndef NO_NESTED_PMAP
5351 if (use_pmap) {
5352 /* nest if platform code will allow */
5353 if(submap->pmap == NULL) {
5354 ledger_t ledger = map->pmap->ledger;
5355 submap->pmap = pmap_create(ledger,
5356 (vm_map_size_t) 0, FALSE);
5357 if(submap->pmap == PMAP_NULL) {
5358 vm_map_unlock(map);
5359 return(KERN_NO_SPACE);
5360 }
5361 #if defined(__arm__) || defined(__arm64__)
5362 pmap_set_nested(submap->pmap);
5363 #endif
5364 }
5365 result = pmap_nest(map->pmap,
5366 (VME_SUBMAP(entry))->pmap,
5367 (addr64_t)start,
5368 (addr64_t)start,
5369 (uint64_t)(end - start));
5370 if(result)
5371 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5372 entry->use_pmap = TRUE;
5373 }
5374 #else /* NO_NESTED_PMAP */
5375 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5376 #endif /* NO_NESTED_PMAP */
5377 result = KERN_SUCCESS;
5378 }
5379 vm_map_unlock(map);
5380
5381 return(result);
5382 }
5383
5384 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5385 #include <sys/codesign.h>
5386 extern int proc_selfcsflags(void);
5387 extern int panic_on_unsigned_execute;
5388 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5389
5390 /*
5391 * vm_map_protect:
5392 *
5393 * Sets the protection of the specified address
5394 * region in the target map. If "set_max" is
5395 * specified, the maximum protection is to be set;
5396 * otherwise, only the current protection is affected.
5397 */
5398 kern_return_t
5399 vm_map_protect(
5400 vm_map_t map,
5401 vm_map_offset_t start,
5402 vm_map_offset_t end,
5403 vm_prot_t new_prot,
5404 boolean_t set_max)
5405 {
5406 vm_map_entry_t current;
5407 vm_map_offset_t prev;
5408 vm_map_entry_t entry;
5409 vm_prot_t new_max;
5410 int pmap_options = 0;
5411 kern_return_t kr;
5412
5413 XPR(XPR_VM_MAP,
5414 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5415 map, start, end, new_prot, set_max);
5416
5417 if (new_prot & VM_PROT_COPY) {
5418 vm_map_offset_t new_start;
5419 vm_prot_t cur_prot, max_prot;
5420 vm_map_kernel_flags_t kflags;
5421
5422 /* LP64todo - see below */
5423 if (start >= map->max_offset) {
5424 return KERN_INVALID_ADDRESS;
5425 }
5426
5427 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5428 kflags.vmkf_remap_prot_copy = TRUE;
5429 new_start = start;
5430 kr = vm_map_remap(map,
5431 &new_start,
5432 end - start,
5433 0, /* mask */
5434 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5435 kflags,
5436 0,
5437 map,
5438 start,
5439 TRUE, /* copy-on-write remapping! */
5440 &cur_prot,
5441 &max_prot,
5442 VM_INHERIT_DEFAULT);
5443 if (kr != KERN_SUCCESS) {
5444 return kr;
5445 }
5446 new_prot &= ~VM_PROT_COPY;
5447 }
5448
5449 vm_map_lock(map);
5450
5451 /* LP64todo - remove this check when vm_map_commpage64()
5452 * no longer has to stuff in a map_entry for the commpage
5453 * above the map's max_offset.
5454 */
5455 if (start >= map->max_offset) {
5456 vm_map_unlock(map);
5457 return(KERN_INVALID_ADDRESS);
5458 }
5459
5460 while(1) {
5461 /*
5462 * Lookup the entry. If it doesn't start in a valid
5463 * entry, return an error.
5464 */
5465 if (! vm_map_lookup_entry(map, start, &entry)) {
5466 vm_map_unlock(map);
5467 return(KERN_INVALID_ADDRESS);
5468 }
5469
5470 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5471 start = SUPERPAGE_ROUND_DOWN(start);
5472 continue;
5473 }
5474 break;
5475 }
5476 if (entry->superpage_size)
5477 end = SUPERPAGE_ROUND_UP(end);
5478
5479 /*
5480 * Make a first pass to check for protection and address
5481 * violations.
5482 */
5483
5484 current = entry;
5485 prev = current->vme_start;
5486 while ((current != vm_map_to_entry(map)) &&
5487 (current->vme_start < end)) {
5488
5489 /*
5490 * If there is a hole, return an error.
5491 */
5492 if (current->vme_start != prev) {
5493 vm_map_unlock(map);
5494 return(KERN_INVALID_ADDRESS);
5495 }
5496
5497 new_max = current->max_protection;
5498 if ((new_prot & new_max) != new_prot) {
5499 vm_map_unlock(map);
5500 return(KERN_PROTECTION_FAILURE);
5501 }
5502
5503 #if CONFIG_EMBEDDED
5504 if (new_prot & VM_PROT_WRITE) {
5505 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
5506 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
5507 new_prot &= ~VM_PROT_EXECUTE;
5508 }
5509 }
5510 #endif
5511
5512 /*
5513 * If the task has requested executable lockdown,
5514 * deny both:
5515 * - adding executable protections OR
5516 * - adding write protections to an existing executable mapping.
5517 */
5518 if (map->map_disallow_new_exec == TRUE) {
5519 if ((new_prot & VM_PROT_EXECUTE) ||
5520 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5521 vm_map_unlock(map);
5522 return(KERN_PROTECTION_FAILURE);
5523 }
5524 }
5525
5526 prev = current->vme_end;
5527 current = current->vme_next;
5528 }
5529
5530 #if __arm64__
5531 if (end > prev &&
5532 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5533 vm_map_entry_t prev_entry;
5534
5535 prev_entry = current->vme_prev;
5536 if (prev_entry != vm_map_to_entry(map) &&
5537 !prev_entry->map_aligned &&
5538 (vm_map_round_page(prev_entry->vme_end,
5539 VM_MAP_PAGE_MASK(map))
5540 == end)) {
5541 /*
5542 * The last entry in our range is not "map-aligned"
5543 * but it would have reached all the way to "end"
5544 * if it had been map-aligned, so this is not really
5545 * a hole in the range and we can proceed.
5546 */
5547 prev = end;
5548 }
5549 }
5550 #endif /* __arm64__ */
5551
5552 if (end > prev) {
5553 vm_map_unlock(map);
5554 return(KERN_INVALID_ADDRESS);
5555 }
5556
5557 /*
5558 * Go back and fix up protections.
5559 * Clip to start here if the range starts within
5560 * the entry.
5561 */
5562
5563 current = entry;
5564 if (current != vm_map_to_entry(map)) {
5565 /* clip and unnest if necessary */
5566 vm_map_clip_start(map, current, start);
5567 }
5568
5569 while ((current != vm_map_to_entry(map)) &&
5570 (current->vme_start < end)) {
5571
5572 vm_prot_t old_prot;
5573
5574 vm_map_clip_end(map, current, end);
5575
5576 if (current->is_sub_map) {
5577 /* clipping did unnest if needed */
5578 assert(!current->use_pmap);
5579 }
5580
5581 old_prot = current->protection;
5582
5583 if (set_max) {
5584 current->max_protection = new_prot;
5585 current->protection = new_prot & old_prot;
5586 } else {
5587 current->protection = new_prot;
5588 }
5589
5590 /*
5591 * Update physical map if necessary.
5592 * If the request is to turn off write protection,
5593 * we won't do it for real (in pmap). This is because
5594 * it would cause copy-on-write to fail. We've already
5595 * set, the new protection in the map, so if a
5596 * write-protect fault occurred, it will be fixed up
5597 * properly, COW or not.
5598 */
5599 if (current->protection != old_prot) {
5600 /* Look one level in we support nested pmaps */
5601 /* from mapped submaps which are direct entries */
5602 /* in our map */
5603
5604 vm_prot_t prot;
5605
5606 prot = current->protection;
5607 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5608 prot &= ~VM_PROT_WRITE;
5609 } else {
5610 assert(!VME_OBJECT(current)->code_signed);
5611 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5612 }
5613
5614 if (override_nx(map, VME_ALIAS(current)) && prot)
5615 prot |= VM_PROT_EXECUTE;
5616
5617 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5618 if (!(old_prot & VM_PROT_EXECUTE) &&
5619 (prot & VM_PROT_EXECUTE) &&
5620 (proc_selfcsflags() & CS_KILL) &&
5621 panic_on_unsigned_execute) {
5622 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5623 }
5624 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5625
5626 if (pmap_has_prot_policy(prot)) {
5627 if (current->wired_count) {
5628 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5629 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5630 }
5631
5632 /* If the pmap layer cares about this
5633 * protection type, force a fault for
5634 * each page so that vm_fault will
5635 * repopulate the page with the full
5636 * set of protections.
5637 */
5638 /*
5639 * TODO: We don't seem to need this,
5640 * but this is due to an internal
5641 * implementation detail of
5642 * pmap_protect. Do we want to rely
5643 * on this?
5644 */
5645 prot = VM_PROT_NONE;
5646 }
5647
5648 if (current->is_sub_map && current->use_pmap) {
5649 pmap_protect(VME_SUBMAP(current)->pmap,
5650 current->vme_start,
5651 current->vme_end,
5652 prot);
5653 } else {
5654 if (prot & VM_PROT_WRITE) {
5655 if (VME_OBJECT(current) == compressor_object) {
5656 /*
5657 * For write requests on the
5658 * compressor, we wil ask the
5659 * pmap layer to prevent us from
5660 * taking a write fault when we
5661 * attempt to access the mapping
5662 * next.
5663 */
5664 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5665 }
5666 }
5667
5668 pmap_protect_options(map->pmap,
5669 current->vme_start,
5670 current->vme_end,
5671 prot,
5672 pmap_options,
5673 NULL);
5674 }
5675 }
5676 current = current->vme_next;
5677 }
5678
5679 current = entry;
5680 while ((current != vm_map_to_entry(map)) &&
5681 (current->vme_start <= end)) {
5682 vm_map_simplify_entry(map, current);
5683 current = current->vme_next;
5684 }
5685
5686 vm_map_unlock(map);
5687 return(KERN_SUCCESS);
5688 }
5689
5690 /*
5691 * vm_map_inherit:
5692 *
5693 * Sets the inheritance of the specified address
5694 * range in the target map. Inheritance
5695 * affects how the map will be shared with
5696 * child maps at the time of vm_map_fork.
5697 */
5698 kern_return_t
5699 vm_map_inherit(
5700 vm_map_t map,
5701 vm_map_offset_t start,
5702 vm_map_offset_t end,
5703 vm_inherit_t new_inheritance)
5704 {
5705 vm_map_entry_t entry;
5706 vm_map_entry_t temp_entry;
5707
5708 vm_map_lock(map);
5709
5710 VM_MAP_RANGE_CHECK(map, start, end);
5711
5712 if (vm_map_lookup_entry(map, start, &temp_entry)) {
5713 entry = temp_entry;
5714 }
5715 else {
5716 temp_entry = temp_entry->vme_next;
5717 entry = temp_entry;
5718 }
5719
5720 /* first check entire range for submaps which can't support the */
5721 /* given inheritance. */
5722 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5723 if(entry->is_sub_map) {
5724 if(new_inheritance == VM_INHERIT_COPY) {
5725 vm_map_unlock(map);
5726 return(KERN_INVALID_ARGUMENT);
5727 }
5728 }
5729
5730 entry = entry->vme_next;
5731 }
5732
5733 entry = temp_entry;
5734 if (entry != vm_map_to_entry(map)) {
5735 /* clip and unnest if necessary */
5736 vm_map_clip_start(map, entry, start);
5737 }
5738
5739 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5740 vm_map_clip_end(map, entry, end);
5741 if (entry->is_sub_map) {
5742 /* clip did unnest if needed */
5743 assert(!entry->use_pmap);
5744 }
5745
5746 entry->inheritance = new_inheritance;
5747
5748 entry = entry->vme_next;
5749 }
5750
5751 vm_map_unlock(map);
5752 return(KERN_SUCCESS);
5753 }
5754
5755 /*
5756 * Update the accounting for the amount of wired memory in this map. If the user has
5757 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
5758 */
5759
5760 static kern_return_t
5761 add_wire_counts(
5762 vm_map_t map,
5763 vm_map_entry_t entry,
5764 boolean_t user_wire)
5765 {
5766 vm_map_size_t size;
5767
5768 if (user_wire) {
5769 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
5770
5771 /*
5772 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
5773 * this map entry.
5774 */
5775
5776 if (entry->user_wired_count == 0) {
5777 size = entry->vme_end - entry->vme_start;
5778
5779 /*
5780 * Since this is the first time the user is wiring this map entry, check to see if we're
5781 * exceeding the user wire limits. There is a per map limit which is the smaller of either
5782 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
5783 * a system-wide limit on the amount of memory all users can wire. If the user is over either
5784 * limit, then we fail.
5785 */
5786
5787 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
5788 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
5789 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
5790 return KERN_RESOURCE_SHORTAGE;
5791
5792 /*
5793 * The first time the user wires an entry, we also increment the wired_count and add this to
5794 * the total that has been wired in the map.
5795 */
5796
5797 if (entry->wired_count >= MAX_WIRE_COUNT)
5798 return KERN_FAILURE;
5799
5800 entry->wired_count++;
5801 map->user_wire_size += size;
5802 }
5803
5804 if (entry->user_wired_count >= MAX_WIRE_COUNT)
5805 return KERN_FAILURE;
5806
5807 entry->user_wired_count++;
5808
5809 } else {
5810
5811 /*
5812 * The kernel's wiring the memory. Just bump the count and continue.
5813 */
5814
5815 if (entry->wired_count >= MAX_WIRE_COUNT)
5816 panic("vm_map_wire: too many wirings");
5817
5818 entry->wired_count++;
5819 }
5820
5821 return KERN_SUCCESS;
5822 }
5823
5824 /*
5825 * Update the memory wiring accounting now that the given map entry is being unwired.
5826 */
5827
5828 static void
5829 subtract_wire_counts(
5830 vm_map_t map,
5831 vm_map_entry_t entry,
5832 boolean_t user_wire)
5833 {
5834
5835 if (user_wire) {
5836
5837 /*
5838 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
5839 */
5840
5841 if (entry->user_wired_count == 1) {
5842
5843 /*
5844 * We're removing the last user wire reference. Decrement the wired_count and the total
5845 * user wired memory for this map.
5846 */
5847
5848 assert(entry->wired_count >= 1);
5849 entry->wired_count--;
5850 map->user_wire_size -= entry->vme_end - entry->vme_start;
5851 }
5852
5853 assert(entry->user_wired_count >= 1);
5854 entry->user_wired_count--;
5855
5856 } else {
5857
5858 /*
5859 * The kernel is unwiring the memory. Just update the count.
5860 */
5861
5862 assert(entry->wired_count >= 1);
5863 entry->wired_count--;
5864 }
5865 }
5866
5867 #if CONFIG_EMBEDDED
5868 int cs_executable_wire = 0;
5869 #endif /* CONFIG_EMBEDDED */
5870
5871 /*
5872 * vm_map_wire:
5873 *
5874 * Sets the pageability of the specified address range in the
5875 * target map as wired. Regions specified as not pageable require
5876 * locked-down physical memory and physical page maps. The
5877 * access_type variable indicates types of accesses that must not
5878 * generate page faults. This is checked against protection of
5879 * memory being locked-down.
5880 *
5881 * The map must not be locked, but a reference must remain to the
5882 * map throughout the call.
5883 */
5884 static kern_return_t
5885 vm_map_wire_nested(
5886 vm_map_t map,
5887 vm_map_offset_t start,
5888 vm_map_offset_t end,
5889 vm_prot_t caller_prot,
5890 vm_tag_t tag,
5891 boolean_t user_wire,
5892 pmap_t map_pmap,
5893 vm_map_offset_t pmap_addr,
5894 ppnum_t *physpage_p)
5895 {
5896 vm_map_entry_t entry;
5897 vm_prot_t access_type;
5898 struct vm_map_entry *first_entry, tmp_entry;
5899 vm_map_t real_map;
5900 vm_map_offset_t s,e;
5901 kern_return_t rc;
5902 boolean_t need_wakeup;
5903 boolean_t main_map = FALSE;
5904 wait_interrupt_t interruptible_state;
5905 thread_t cur_thread;
5906 unsigned int last_timestamp;
5907 vm_map_size_t size;
5908 boolean_t wire_and_extract;
5909
5910 access_type = (caller_prot & VM_PROT_ALL);
5911
5912 wire_and_extract = FALSE;
5913 if (physpage_p != NULL) {
5914 /*
5915 * The caller wants the physical page number of the
5916 * wired page. We return only one physical page number
5917 * so this works for only one page at a time.
5918 */
5919 if ((end - start) != PAGE_SIZE) {
5920 return KERN_INVALID_ARGUMENT;
5921 }
5922 wire_and_extract = TRUE;
5923 *physpage_p = 0;
5924 }
5925
5926 vm_map_lock(map);
5927 if(map_pmap == NULL)
5928 main_map = TRUE;
5929 last_timestamp = map->timestamp;
5930
5931 VM_MAP_RANGE_CHECK(map, start, end);
5932 assert(page_aligned(start));
5933 assert(page_aligned(end));
5934 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5935 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5936 if (start == end) {
5937 /* We wired what the caller asked for, zero pages */
5938 vm_map_unlock(map);
5939 return KERN_SUCCESS;
5940 }
5941
5942 need_wakeup = FALSE;
5943 cur_thread = current_thread();
5944
5945 s = start;
5946 rc = KERN_SUCCESS;
5947
5948 if (vm_map_lookup_entry(map, s, &first_entry)) {
5949 entry = first_entry;
5950 /*
5951 * vm_map_clip_start will be done later.
5952 * We don't want to unnest any nested submaps here !
5953 */
5954 } else {
5955 /* Start address is not in map */
5956 rc = KERN_INVALID_ADDRESS;
5957 goto done;
5958 }
5959
5960 while ((entry != vm_map_to_entry(map)) && (s < end)) {
5961 /*
5962 * At this point, we have wired from "start" to "s".
5963 * We still need to wire from "s" to "end".
5964 *
5965 * "entry" hasn't been clipped, so it could start before "s"
5966 * and/or end after "end".
5967 */
5968
5969 /* "e" is how far we want to wire in this entry */
5970 e = entry->vme_end;
5971 if (e > end)
5972 e = end;
5973
5974 /*
5975 * If another thread is wiring/unwiring this entry then
5976 * block after informing other thread to wake us up.
5977 */
5978 if (entry->in_transition) {
5979 wait_result_t wait_result;
5980
5981 /*
5982 * We have not clipped the entry. Make sure that
5983 * the start address is in range so that the lookup
5984 * below will succeed.
5985 * "s" is the current starting point: we've already
5986 * wired from "start" to "s" and we still have
5987 * to wire from "s" to "end".
5988 */
5989
5990 entry->needs_wakeup = TRUE;
5991
5992 /*
5993 * wake up anybody waiting on entries that we have
5994 * already wired.
5995 */
5996 if (need_wakeup) {
5997 vm_map_entry_wakeup(map);
5998 need_wakeup = FALSE;
5999 }
6000 /*
6001 * User wiring is interruptible
6002 */
6003 wait_result = vm_map_entry_wait(map,
6004 (user_wire) ? THREAD_ABORTSAFE :
6005 THREAD_UNINT);
6006 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6007 /*
6008 * undo the wirings we have done so far
6009 * We do not clear the needs_wakeup flag,
6010 * because we cannot tell if we were the
6011 * only one waiting.
6012 */
6013 rc = KERN_FAILURE;
6014 goto done;
6015 }
6016
6017 /*
6018 * Cannot avoid a lookup here. reset timestamp.
6019 */
6020 last_timestamp = map->timestamp;
6021
6022 /*
6023 * The entry could have been clipped, look it up again.
6024 * Worse that can happen is, it may not exist anymore.
6025 */
6026 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6027 /*
6028 * User: undo everything upto the previous
6029 * entry. let vm_map_unwire worry about
6030 * checking the validity of the range.
6031 */
6032 rc = KERN_FAILURE;
6033 goto done;
6034 }
6035 entry = first_entry;
6036 continue;
6037 }
6038
6039 if (entry->is_sub_map) {
6040 vm_map_offset_t sub_start;
6041 vm_map_offset_t sub_end;
6042 vm_map_offset_t local_start;
6043 vm_map_offset_t local_end;
6044 pmap_t pmap;
6045
6046 if (wire_and_extract) {
6047 /*
6048 * Wiring would result in copy-on-write
6049 * which would not be compatible with
6050 * the sharing we have with the original
6051 * provider of this memory.
6052 */
6053 rc = KERN_INVALID_ARGUMENT;
6054 goto done;
6055 }
6056
6057 vm_map_clip_start(map, entry, s);
6058 vm_map_clip_end(map, entry, end);
6059
6060 sub_start = VME_OFFSET(entry);
6061 sub_end = entry->vme_end;
6062 sub_end += VME_OFFSET(entry) - entry->vme_start;
6063
6064 local_end = entry->vme_end;
6065 if(map_pmap == NULL) {
6066 vm_object_t object;
6067 vm_object_offset_t offset;
6068 vm_prot_t prot;
6069 boolean_t wired;
6070 vm_map_entry_t local_entry;
6071 vm_map_version_t version;
6072 vm_map_t lookup_map;
6073
6074 if(entry->use_pmap) {
6075 pmap = VME_SUBMAP(entry)->pmap;
6076 /* ppc implementation requires that */
6077 /* submaps pmap address ranges line */
6078 /* up with parent map */
6079 #ifdef notdef
6080 pmap_addr = sub_start;
6081 #endif
6082 pmap_addr = s;
6083 } else {
6084 pmap = map->pmap;
6085 pmap_addr = s;
6086 }
6087
6088 if (entry->wired_count) {
6089 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6090 goto done;
6091
6092 /*
6093 * The map was not unlocked:
6094 * no need to goto re-lookup.
6095 * Just go directly to next entry.
6096 */
6097 entry = entry->vme_next;
6098 s = entry->vme_start;
6099 continue;
6100
6101 }
6102
6103 /* call vm_map_lookup_locked to */
6104 /* cause any needs copy to be */
6105 /* evaluated */
6106 local_start = entry->vme_start;
6107 lookup_map = map;
6108 vm_map_lock_write_to_read(map);
6109 if(vm_map_lookup_locked(
6110 &lookup_map, local_start,
6111 access_type | VM_PROT_COPY,
6112 OBJECT_LOCK_EXCLUSIVE,
6113 &version, &object,
6114 &offset, &prot, &wired,
6115 NULL,
6116 &real_map)) {
6117
6118 vm_map_unlock_read(lookup_map);
6119 assert(map_pmap == NULL);
6120 vm_map_unwire(map, start,
6121 s, user_wire);
6122 return(KERN_FAILURE);
6123 }
6124 vm_object_unlock(object);
6125 if(real_map != lookup_map)
6126 vm_map_unlock(real_map);
6127 vm_map_unlock_read(lookup_map);
6128 vm_map_lock(map);
6129
6130 /* we unlocked, so must re-lookup */
6131 if (!vm_map_lookup_entry(map,
6132 local_start,
6133 &local_entry)) {
6134 rc = KERN_FAILURE;
6135 goto done;
6136 }
6137
6138 /*
6139 * entry could have been "simplified",
6140 * so re-clip
6141 */
6142 entry = local_entry;
6143 assert(s == local_start);
6144 vm_map_clip_start(map, entry, s);
6145 vm_map_clip_end(map, entry, end);
6146 /* re-compute "e" */
6147 e = entry->vme_end;
6148 if (e > end)
6149 e = end;
6150
6151 /* did we have a change of type? */
6152 if (!entry->is_sub_map) {
6153 last_timestamp = map->timestamp;
6154 continue;
6155 }
6156 } else {
6157 local_start = entry->vme_start;
6158 pmap = map_pmap;
6159 }
6160
6161 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6162 goto done;
6163
6164 entry->in_transition = TRUE;
6165
6166 vm_map_unlock(map);
6167 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6168 sub_start, sub_end,
6169 caller_prot, tag,
6170 user_wire, pmap, pmap_addr,
6171 NULL);
6172 vm_map_lock(map);
6173
6174 /*
6175 * Find the entry again. It could have been clipped
6176 * after we unlocked the map.
6177 */
6178 if (!vm_map_lookup_entry(map, local_start,
6179 &first_entry))
6180 panic("vm_map_wire: re-lookup failed");
6181 entry = first_entry;
6182
6183 assert(local_start == s);
6184 /* re-compute "e" */
6185 e = entry->vme_end;
6186 if (e > end)
6187 e = end;
6188
6189 last_timestamp = map->timestamp;
6190 while ((entry != vm_map_to_entry(map)) &&
6191 (entry->vme_start < e)) {
6192 assert(entry->in_transition);
6193 entry->in_transition = FALSE;
6194 if (entry->needs_wakeup) {
6195 entry->needs_wakeup = FALSE;
6196 need_wakeup = TRUE;
6197 }
6198 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6199 subtract_wire_counts(map, entry, user_wire);
6200 }
6201 entry = entry->vme_next;
6202 }
6203 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6204 goto done;
6205 }
6206
6207 /* no need to relookup again */
6208 s = entry->vme_start;
6209 continue;
6210 }
6211
6212 /*
6213 * If this entry is already wired then increment
6214 * the appropriate wire reference count.
6215 */
6216 if (entry->wired_count) {
6217
6218 if ((entry->protection & access_type) != access_type) {
6219 /* found a protection problem */
6220
6221 /*
6222 * XXX FBDP
6223 * We should always return an error
6224 * in this case but since we didn't
6225 * enforce it before, let's do
6226 * it only for the new "wire_and_extract"
6227 * code path for now...
6228 */
6229 if (wire_and_extract) {
6230 rc = KERN_PROTECTION_FAILURE;
6231 goto done;
6232 }
6233 }
6234
6235 /*
6236 * entry is already wired down, get our reference
6237 * after clipping to our range.
6238 */
6239 vm_map_clip_start(map, entry, s);
6240 vm_map_clip_end(map, entry, end);
6241
6242 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6243 goto done;
6244
6245 if (wire_and_extract) {
6246 vm_object_t object;
6247 vm_object_offset_t offset;
6248 vm_page_t m;
6249
6250 /*
6251 * We don't have to "wire" the page again
6252 * bit we still have to "extract" its
6253 * physical page number, after some sanity
6254 * checks.
6255 */
6256 assert((entry->vme_end - entry->vme_start)
6257 == PAGE_SIZE);
6258 assert(!entry->needs_copy);
6259 assert(!entry->is_sub_map);
6260 assert(VME_OBJECT(entry));
6261 if (((entry->vme_end - entry->vme_start)
6262 != PAGE_SIZE) ||
6263 entry->needs_copy ||
6264 entry->is_sub_map ||
6265 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6266 rc = KERN_INVALID_ARGUMENT;
6267 goto done;
6268 }
6269
6270 object = VME_OBJECT(entry);
6271 offset = VME_OFFSET(entry);
6272 /* need exclusive lock to update m->dirty */
6273 if (entry->protection & VM_PROT_WRITE) {
6274 vm_object_lock(object);
6275 } else {
6276 vm_object_lock_shared(object);
6277 }
6278 m = vm_page_lookup(object, offset);
6279 assert(m != VM_PAGE_NULL);
6280 assert(VM_PAGE_WIRED(m));
6281 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6282 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6283 if (entry->protection & VM_PROT_WRITE) {
6284 vm_object_lock_assert_exclusive(
6285 object);
6286 m->dirty = TRUE;
6287 }
6288 } else {
6289 /* not already wired !? */
6290 *physpage_p = 0;
6291 }
6292 vm_object_unlock(object);
6293 }
6294
6295 /* map was not unlocked: no need to relookup */
6296 entry = entry->vme_next;
6297 s = entry->vme_start;
6298 continue;
6299 }
6300
6301 /*
6302 * Unwired entry or wire request transmitted via submap
6303 */
6304
6305 #if CONFIG_EMBEDDED
6306 /*
6307 * Wiring would copy the pages to the shadow object.
6308 * The shadow object would not be code-signed so
6309 * attempting to execute code from these copied pages
6310 * would trigger a code-signing violation.
6311 */
6312 if (entry->protection & VM_PROT_EXECUTE) {
6313 #if MACH_ASSERT
6314 printf("pid %d[%s] wiring executable range from "
6315 "0x%llx to 0x%llx: rejected to preserve "
6316 "code-signing\n",
6317 proc_selfpid(),
6318 (current_task()->bsd_info
6319 ? proc_name_address(current_task()->bsd_info)
6320 : "?"),
6321 (uint64_t) entry->vme_start,
6322 (uint64_t) entry->vme_end);
6323 #endif /* MACH_ASSERT */
6324 DTRACE_VM2(cs_executable_wire,
6325 uint64_t, (uint64_t)entry->vme_start,
6326 uint64_t, (uint64_t)entry->vme_end);
6327 cs_executable_wire++;
6328 rc = KERN_PROTECTION_FAILURE;
6329 goto done;
6330 }
6331 #endif /* CONFIG_EMBEDDED */
6332
6333
6334 /*
6335 * Perform actions of vm_map_lookup that need the write
6336 * lock on the map: create a shadow object for a
6337 * copy-on-write region, or an object for a zero-fill
6338 * region.
6339 */
6340 size = entry->vme_end - entry->vme_start;
6341 /*
6342 * If wiring a copy-on-write page, we need to copy it now
6343 * even if we're only (currently) requesting read access.
6344 * This is aggressive, but once it's wired we can't move it.
6345 */
6346 if (entry->needs_copy) {
6347 if (wire_and_extract) {
6348 /*
6349 * We're supposed to share with the original
6350 * provider so should not be "needs_copy"
6351 */
6352 rc = KERN_INVALID_ARGUMENT;
6353 goto done;
6354 }
6355
6356 VME_OBJECT_SHADOW(entry, size);
6357 entry->needs_copy = FALSE;
6358 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6359 if (wire_and_extract) {
6360 /*
6361 * We're supposed to share with the original
6362 * provider so should already have an object.
6363 */
6364 rc = KERN_INVALID_ARGUMENT;
6365 goto done;
6366 }
6367 VME_OBJECT_SET(entry, vm_object_allocate(size));
6368 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6369 assert(entry->use_pmap);
6370 }
6371
6372 vm_map_clip_start(map, entry, s);
6373 vm_map_clip_end(map, entry, end);
6374
6375 /* re-compute "e" */
6376 e = entry->vme_end;
6377 if (e > end)
6378 e = end;
6379
6380 /*
6381 * Check for holes and protection mismatch.
6382 * Holes: Next entry should be contiguous unless this
6383 * is the end of the region.
6384 * Protection: Access requested must be allowed, unless
6385 * wiring is by protection class
6386 */
6387 if ((entry->vme_end < end) &&
6388 ((entry->vme_next == vm_map_to_entry(map)) ||
6389 (entry->vme_next->vme_start > entry->vme_end))) {
6390 /* found a hole */
6391 rc = KERN_INVALID_ADDRESS;
6392 goto done;
6393 }
6394 if ((entry->protection & access_type) != access_type) {
6395 /* found a protection problem */
6396 rc = KERN_PROTECTION_FAILURE;
6397 goto done;
6398 }
6399
6400 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6401
6402 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6403 goto done;
6404
6405 entry->in_transition = TRUE;
6406
6407 /*
6408 * This entry might get split once we unlock the map.
6409 * In vm_fault_wire(), we need the current range as
6410 * defined by this entry. In order for this to work
6411 * along with a simultaneous clip operation, we make a
6412 * temporary copy of this entry and use that for the
6413 * wiring. Note that the underlying objects do not
6414 * change during a clip.
6415 */
6416 tmp_entry = *entry;
6417
6418 /*
6419 * The in_transition state guarentees that the entry
6420 * (or entries for this range, if split occured) will be
6421 * there when the map lock is acquired for the second time.
6422 */
6423 vm_map_unlock(map);
6424
6425 if (!user_wire && cur_thread != THREAD_NULL)
6426 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6427 else
6428 interruptible_state = THREAD_UNINT;
6429
6430 if(map_pmap)
6431 rc = vm_fault_wire(map,
6432 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6433 physpage_p);
6434 else
6435 rc = vm_fault_wire(map,
6436 &tmp_entry, caller_prot, tag, map->pmap,
6437 tmp_entry.vme_start,
6438 physpage_p);
6439
6440 if (!user_wire && cur_thread != THREAD_NULL)
6441 thread_interrupt_level(interruptible_state);
6442
6443 vm_map_lock(map);
6444
6445 if (last_timestamp+1 != map->timestamp) {
6446 /*
6447 * Find the entry again. It could have been clipped
6448 * after we unlocked the map.
6449 */
6450 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6451 &first_entry))
6452 panic("vm_map_wire: re-lookup failed");
6453
6454 entry = first_entry;
6455 }
6456
6457 last_timestamp = map->timestamp;
6458
6459 while ((entry != vm_map_to_entry(map)) &&
6460 (entry->vme_start < tmp_entry.vme_end)) {
6461 assert(entry->in_transition);
6462 entry->in_transition = FALSE;
6463 if (entry->needs_wakeup) {
6464 entry->needs_wakeup = FALSE;
6465 need_wakeup = TRUE;
6466 }
6467 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6468 subtract_wire_counts(map, entry, user_wire);
6469 }
6470 entry = entry->vme_next;
6471 }
6472
6473 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6474 goto done;
6475 }
6476
6477 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6478 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6479 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6480 /* found a "new" hole */
6481 s = tmp_entry.vme_end;
6482 rc = KERN_INVALID_ADDRESS;
6483 goto done;
6484 }
6485
6486 s = entry->vme_start;
6487
6488 } /* end while loop through map entries */
6489
6490 done:
6491 if (rc == KERN_SUCCESS) {
6492 /* repair any damage we may have made to the VM map */
6493 vm_map_simplify_range(map, start, end);
6494 }
6495
6496 vm_map_unlock(map);
6497
6498 /*
6499 * wake up anybody waiting on entries we wired.
6500 */
6501 if (need_wakeup)
6502 vm_map_entry_wakeup(map);
6503
6504 if (rc != KERN_SUCCESS) {
6505 /* undo what has been wired so far */
6506 vm_map_unwire_nested(map, start, s, user_wire,
6507 map_pmap, pmap_addr);
6508 if (physpage_p) {
6509 *physpage_p = 0;
6510 }
6511 }
6512
6513 return rc;
6514
6515 }
6516
6517 kern_return_t
6518 vm_map_wire_external(
6519 vm_map_t map,
6520 vm_map_offset_t start,
6521 vm_map_offset_t end,
6522 vm_prot_t caller_prot,
6523 boolean_t user_wire)
6524 {
6525 kern_return_t kret;
6526
6527 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6528 user_wire, (pmap_t)NULL, 0, NULL);
6529 return kret;
6530 }
6531
6532 kern_return_t
6533 vm_map_wire_kernel(
6534 vm_map_t map,
6535 vm_map_offset_t start,
6536 vm_map_offset_t end,
6537 vm_prot_t caller_prot,
6538 vm_tag_t tag,
6539 boolean_t user_wire)
6540 {
6541 kern_return_t kret;
6542
6543 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6544 user_wire, (pmap_t)NULL, 0, NULL);
6545 return kret;
6546 }
6547
6548 kern_return_t
6549 vm_map_wire_and_extract_external(
6550 vm_map_t map,
6551 vm_map_offset_t start,
6552 vm_prot_t caller_prot,
6553 boolean_t user_wire,
6554 ppnum_t *physpage_p)
6555 {
6556 kern_return_t kret;
6557
6558 kret = vm_map_wire_nested(map,
6559 start,
6560 start+VM_MAP_PAGE_SIZE(map),
6561 caller_prot,
6562 vm_tag_bt(),
6563 user_wire,
6564 (pmap_t)NULL,
6565 0,
6566 physpage_p);
6567 if (kret != KERN_SUCCESS &&
6568 physpage_p != NULL) {
6569 *physpage_p = 0;
6570 }
6571 return kret;
6572 }
6573
6574 kern_return_t
6575 vm_map_wire_and_extract_kernel(
6576 vm_map_t map,
6577 vm_map_offset_t start,
6578 vm_prot_t caller_prot,
6579 vm_tag_t tag,
6580 boolean_t user_wire,
6581 ppnum_t *physpage_p)
6582 {
6583 kern_return_t kret;
6584
6585 kret = vm_map_wire_nested(map,
6586 start,
6587 start+VM_MAP_PAGE_SIZE(map),
6588 caller_prot,
6589 tag,
6590 user_wire,
6591 (pmap_t)NULL,
6592 0,
6593 physpage_p);
6594 if (kret != KERN_SUCCESS &&
6595 physpage_p != NULL) {
6596 *physpage_p = 0;
6597 }
6598 return kret;
6599 }
6600
6601 /*
6602 * vm_map_unwire:
6603 *
6604 * Sets the pageability of the specified address range in the target
6605 * as pageable. Regions specified must have been wired previously.
6606 *
6607 * The map must not be locked, but a reference must remain to the map
6608 * throughout the call.
6609 *
6610 * Kernel will panic on failures. User unwire ignores holes and
6611 * unwired and intransition entries to avoid losing memory by leaving
6612 * it unwired.
6613 */
6614 static kern_return_t
6615 vm_map_unwire_nested(
6616 vm_map_t map,
6617 vm_map_offset_t start,
6618 vm_map_offset_t end,
6619 boolean_t user_wire,
6620 pmap_t map_pmap,
6621 vm_map_offset_t pmap_addr)
6622 {
6623 vm_map_entry_t entry;
6624 struct vm_map_entry *first_entry, tmp_entry;
6625 boolean_t need_wakeup;
6626 boolean_t main_map = FALSE;
6627 unsigned int last_timestamp;
6628
6629 vm_map_lock(map);
6630 if(map_pmap == NULL)
6631 main_map = TRUE;
6632 last_timestamp = map->timestamp;
6633
6634 VM_MAP_RANGE_CHECK(map, start, end);
6635 assert(page_aligned(start));
6636 assert(page_aligned(end));
6637 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6638 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6639
6640 if (start == end) {
6641 /* We unwired what the caller asked for: zero pages */
6642 vm_map_unlock(map);
6643 return KERN_SUCCESS;
6644 }
6645
6646 if (vm_map_lookup_entry(map, start, &first_entry)) {
6647 entry = first_entry;
6648 /*
6649 * vm_map_clip_start will be done later.
6650 * We don't want to unnest any nested sub maps here !
6651 */
6652 }
6653 else {
6654 if (!user_wire) {
6655 panic("vm_map_unwire: start not found");
6656 }
6657 /* Start address is not in map. */
6658 vm_map_unlock(map);
6659 return(KERN_INVALID_ADDRESS);
6660 }
6661
6662 if (entry->superpage_size) {
6663 /* superpages are always wired */
6664 vm_map_unlock(map);
6665 return KERN_INVALID_ADDRESS;
6666 }
6667
6668 need_wakeup = FALSE;
6669 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6670 if (entry->in_transition) {
6671 /*
6672 * 1)
6673 * Another thread is wiring down this entry. Note
6674 * that if it is not for the other thread we would
6675 * be unwiring an unwired entry. This is not
6676 * permitted. If we wait, we will be unwiring memory
6677 * we did not wire.
6678 *
6679 * 2)
6680 * Another thread is unwiring this entry. We did not
6681 * have a reference to it, because if we did, this
6682 * entry will not be getting unwired now.
6683 */
6684 if (!user_wire) {
6685 /*
6686 * XXX FBDP
6687 * This could happen: there could be some
6688 * overlapping vslock/vsunlock operations
6689 * going on.
6690 * We should probably just wait and retry,
6691 * but then we have to be careful that this
6692 * entry could get "simplified" after
6693 * "in_transition" gets unset and before
6694 * we re-lookup the entry, so we would
6695 * have to re-clip the entry to avoid
6696 * re-unwiring what we have already unwired...
6697 * See vm_map_wire_nested().
6698 *
6699 * Or we could just ignore "in_transition"
6700 * here and proceed to decement the wired
6701 * count(s) on this entry. That should be fine
6702 * as long as "wired_count" doesn't drop all
6703 * the way to 0 (and we should panic if THAT
6704 * happens).
6705 */
6706 panic("vm_map_unwire: in_transition entry");
6707 }
6708
6709 entry = entry->vme_next;
6710 continue;
6711 }
6712
6713 if (entry->is_sub_map) {
6714 vm_map_offset_t sub_start;
6715 vm_map_offset_t sub_end;
6716 vm_map_offset_t local_end;
6717 pmap_t pmap;
6718
6719 vm_map_clip_start(map, entry, start);
6720 vm_map_clip_end(map, entry, end);
6721
6722 sub_start = VME_OFFSET(entry);
6723 sub_end = entry->vme_end - entry->vme_start;
6724 sub_end += VME_OFFSET(entry);
6725 local_end = entry->vme_end;
6726 if(map_pmap == NULL) {
6727 if(entry->use_pmap) {
6728 pmap = VME_SUBMAP(entry)->pmap;
6729 pmap_addr = sub_start;
6730 } else {
6731 pmap = map->pmap;
6732 pmap_addr = start;
6733 }
6734 if (entry->wired_count == 0 ||
6735 (user_wire && entry->user_wired_count == 0)) {
6736 if (!user_wire)
6737 panic("vm_map_unwire: entry is unwired");
6738 entry = entry->vme_next;
6739 continue;
6740 }
6741
6742 /*
6743 * Check for holes
6744 * Holes: Next entry should be contiguous unless
6745 * this is the end of the region.
6746 */
6747 if (((entry->vme_end < end) &&
6748 ((entry->vme_next == vm_map_to_entry(map)) ||
6749 (entry->vme_next->vme_start
6750 > entry->vme_end)))) {
6751 if (!user_wire)
6752 panic("vm_map_unwire: non-contiguous region");
6753 /*
6754 entry = entry->vme_next;
6755 continue;
6756 */
6757 }
6758
6759 subtract_wire_counts(map, entry, user_wire);
6760
6761 if (entry->wired_count != 0) {
6762 entry = entry->vme_next;
6763 continue;
6764 }
6765
6766 entry->in_transition = TRUE;
6767 tmp_entry = *entry;/* see comment in vm_map_wire() */
6768
6769 /*
6770 * We can unlock the map now. The in_transition state
6771 * guarantees existance of the entry.
6772 */
6773 vm_map_unlock(map);
6774 vm_map_unwire_nested(VME_SUBMAP(entry),
6775 sub_start, sub_end, user_wire, pmap, pmap_addr);
6776 vm_map_lock(map);
6777
6778 if (last_timestamp+1 != map->timestamp) {
6779 /*
6780 * Find the entry again. It could have been
6781 * clipped or deleted after we unlocked the map.
6782 */
6783 if (!vm_map_lookup_entry(map,
6784 tmp_entry.vme_start,
6785 &first_entry)) {
6786 if (!user_wire)
6787 panic("vm_map_unwire: re-lookup failed");
6788 entry = first_entry->vme_next;
6789 } else
6790 entry = first_entry;
6791 }
6792 last_timestamp = map->timestamp;
6793
6794 /*
6795 * clear transition bit for all constituent entries
6796 * that were in the original entry (saved in
6797 * tmp_entry). Also check for waiters.
6798 */
6799 while ((entry != vm_map_to_entry(map)) &&
6800 (entry->vme_start < tmp_entry.vme_end)) {
6801 assert(entry->in_transition);
6802 entry->in_transition = FALSE;
6803 if (entry->needs_wakeup) {
6804 entry->needs_wakeup = FALSE;
6805 need_wakeup = TRUE;
6806 }
6807 entry = entry->vme_next;
6808 }
6809 continue;
6810 } else {
6811 vm_map_unlock(map);
6812 vm_map_unwire_nested(VME_SUBMAP(entry),
6813 sub_start, sub_end, user_wire, map_pmap,
6814 pmap_addr);
6815 vm_map_lock(map);
6816
6817 if (last_timestamp+1 != map->timestamp) {
6818 /*
6819 * Find the entry again. It could have been
6820 * clipped or deleted after we unlocked the map.
6821 */
6822 if (!vm_map_lookup_entry(map,
6823 tmp_entry.vme_start,
6824 &first_entry)) {
6825 if (!user_wire)
6826 panic("vm_map_unwire: re-lookup failed");
6827 entry = first_entry->vme_next;
6828 } else
6829 entry = first_entry;
6830 }
6831 last_timestamp = map->timestamp;
6832 }
6833 }
6834
6835
6836 if ((entry->wired_count == 0) ||
6837 (user_wire && entry->user_wired_count == 0)) {
6838 if (!user_wire)
6839 panic("vm_map_unwire: entry is unwired");
6840
6841 entry = entry->vme_next;
6842 continue;
6843 }
6844
6845 assert(entry->wired_count > 0 &&
6846 (!user_wire || entry->user_wired_count > 0));
6847
6848 vm_map_clip_start(map, entry, start);
6849 vm_map_clip_end(map, entry, end);
6850
6851 /*
6852 * Check for holes
6853 * Holes: Next entry should be contiguous unless
6854 * this is the end of the region.
6855 */
6856 if (((entry->vme_end < end) &&
6857 ((entry->vme_next == vm_map_to_entry(map)) ||
6858 (entry->vme_next->vme_start > entry->vme_end)))) {
6859
6860 if (!user_wire)
6861 panic("vm_map_unwire: non-contiguous region");
6862 entry = entry->vme_next;
6863 continue;
6864 }
6865
6866 subtract_wire_counts(map, entry, user_wire);
6867
6868 if (entry->wired_count != 0) {
6869 entry = entry->vme_next;
6870 continue;
6871 }
6872
6873 if(entry->zero_wired_pages) {
6874 entry->zero_wired_pages = FALSE;
6875 }
6876
6877 entry->in_transition = TRUE;
6878 tmp_entry = *entry; /* see comment in vm_map_wire() */
6879
6880 /*
6881 * We can unlock the map now. The in_transition state
6882 * guarantees existance of the entry.
6883 */
6884 vm_map_unlock(map);
6885 if(map_pmap) {
6886 vm_fault_unwire(map,
6887 &tmp_entry, FALSE, map_pmap, pmap_addr);
6888 } else {
6889 vm_fault_unwire(map,
6890 &tmp_entry, FALSE, map->pmap,
6891 tmp_entry.vme_start);
6892 }
6893 vm_map_lock(map);
6894
6895 if (last_timestamp+1 != map->timestamp) {
6896 /*
6897 * Find the entry again. It could have been clipped
6898 * or deleted after we unlocked the map.
6899 */
6900 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6901 &first_entry)) {
6902 if (!user_wire)
6903 panic("vm_map_unwire: re-lookup failed");
6904 entry = first_entry->vme_next;
6905 } else
6906 entry = first_entry;
6907 }
6908 last_timestamp = map->timestamp;
6909
6910 /*
6911 * clear transition bit for all constituent entries that
6912 * were in the original entry (saved in tmp_entry). Also
6913 * check for waiters.
6914 */
6915 while ((entry != vm_map_to_entry(map)) &&
6916 (entry->vme_start < tmp_entry.vme_end)) {
6917 assert(entry->in_transition);
6918 entry->in_transition = FALSE;
6919 if (entry->needs_wakeup) {
6920 entry->needs_wakeup = FALSE;
6921 need_wakeup = TRUE;
6922 }
6923 entry = entry->vme_next;
6924 }
6925 }
6926
6927 /*
6928 * We might have fragmented the address space when we wired this
6929 * range of addresses. Attempt to re-coalesce these VM map entries
6930 * with their neighbors now that they're no longer wired.
6931 * Under some circumstances, address space fragmentation can
6932 * prevent VM object shadow chain collapsing, which can cause
6933 * swap space leaks.
6934 */
6935 vm_map_simplify_range(map, start, end);
6936
6937 vm_map_unlock(map);
6938 /*
6939 * wake up anybody waiting on entries that we have unwired.
6940 */
6941 if (need_wakeup)
6942 vm_map_entry_wakeup(map);
6943 return(KERN_SUCCESS);
6944
6945 }
6946
6947 kern_return_t
6948 vm_map_unwire(
6949 vm_map_t map,
6950 vm_map_offset_t start,
6951 vm_map_offset_t end,
6952 boolean_t user_wire)
6953 {
6954 return vm_map_unwire_nested(map, start, end,
6955 user_wire, (pmap_t)NULL, 0);
6956 }
6957
6958
6959 /*
6960 * vm_map_entry_delete: [ internal use only ]
6961 *
6962 * Deallocate the given entry from the target map.
6963 */
6964 static void
6965 vm_map_entry_delete(
6966 vm_map_t map,
6967 vm_map_entry_t entry)
6968 {
6969 vm_map_offset_t s, e;
6970 vm_object_t object;
6971 vm_map_t submap;
6972
6973 s = entry->vme_start;
6974 e = entry->vme_end;
6975 assert(page_aligned(s));
6976 assert(page_aligned(e));
6977 if (entry->map_aligned == TRUE) {
6978 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
6979 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
6980 }
6981 assert(entry->wired_count == 0);
6982 assert(entry->user_wired_count == 0);
6983 assert(!entry->permanent);
6984
6985 if (entry->is_sub_map) {
6986 object = NULL;
6987 submap = VME_SUBMAP(entry);
6988 } else {
6989 submap = NULL;
6990 object = VME_OBJECT(entry);
6991 }
6992
6993 vm_map_store_entry_unlink(map, entry);
6994 map->size -= e - s;
6995
6996 vm_map_entry_dispose(map, entry);
6997
6998 vm_map_unlock(map);
6999 /*
7000 * Deallocate the object only after removing all
7001 * pmap entries pointing to its pages.
7002 */
7003 if (submap)
7004 vm_map_deallocate(submap);
7005 else
7006 vm_object_deallocate(object);
7007
7008 }
7009
7010 void
7011 vm_map_submap_pmap_clean(
7012 vm_map_t map,
7013 vm_map_offset_t start,
7014 vm_map_offset_t end,
7015 vm_map_t sub_map,
7016 vm_map_offset_t offset)
7017 {
7018 vm_map_offset_t submap_start;
7019 vm_map_offset_t submap_end;
7020 vm_map_size_t remove_size;
7021 vm_map_entry_t entry;
7022
7023 submap_end = offset + (end - start);
7024 submap_start = offset;
7025
7026 vm_map_lock_read(sub_map);
7027 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
7028
7029 remove_size = (entry->vme_end - entry->vme_start);
7030 if(offset > entry->vme_start)
7031 remove_size -= offset - entry->vme_start;
7032
7033
7034 if(submap_end < entry->vme_end) {
7035 remove_size -=
7036 entry->vme_end - submap_end;
7037 }
7038 if(entry->is_sub_map) {
7039 vm_map_submap_pmap_clean(
7040 sub_map,
7041 start,
7042 start + remove_size,
7043 VME_SUBMAP(entry),
7044 VME_OFFSET(entry));
7045 } else {
7046
7047 if((map->mapped_in_other_pmaps) && (map->ref_count)
7048 && (VME_OBJECT(entry) != NULL)) {
7049 vm_object_pmap_protect_options(
7050 VME_OBJECT(entry),
7051 (VME_OFFSET(entry) +
7052 offset -
7053 entry->vme_start),
7054 remove_size,
7055 PMAP_NULL,
7056 entry->vme_start,
7057 VM_PROT_NONE,
7058 PMAP_OPTIONS_REMOVE);
7059 } else {
7060 pmap_remove(map->pmap,
7061 (addr64_t)start,
7062 (addr64_t)(start + remove_size));
7063 }
7064 }
7065 }
7066
7067 entry = entry->vme_next;
7068
7069 while((entry != vm_map_to_entry(sub_map))
7070 && (entry->vme_start < submap_end)) {
7071 remove_size = (entry->vme_end - entry->vme_start);
7072 if(submap_end < entry->vme_end) {
7073 remove_size -= entry->vme_end - submap_end;
7074 }
7075 if(entry->is_sub_map) {
7076 vm_map_submap_pmap_clean(
7077 sub_map,
7078 (start + entry->vme_start) - offset,
7079 ((start + entry->vme_start) - offset) + remove_size,
7080 VME_SUBMAP(entry),
7081 VME_OFFSET(entry));
7082 } else {
7083 if((map->mapped_in_other_pmaps) && (map->ref_count)
7084 && (VME_OBJECT(entry) != NULL)) {
7085 vm_object_pmap_protect_options(
7086 VME_OBJECT(entry),
7087 VME_OFFSET(entry),
7088 remove_size,
7089 PMAP_NULL,
7090 entry->vme_start,
7091 VM_PROT_NONE,
7092 PMAP_OPTIONS_REMOVE);
7093 } else {
7094 pmap_remove(map->pmap,
7095 (addr64_t)((start + entry->vme_start)
7096 - offset),
7097 (addr64_t)(((start + entry->vme_start)
7098 - offset) + remove_size));
7099 }
7100 }
7101 entry = entry->vme_next;
7102 }
7103 vm_map_unlock_read(sub_map);
7104 return;
7105 }
7106
7107 /*
7108 * vm_map_delete: [ internal use only ]
7109 *
7110 * Deallocates the given address range from the target map.
7111 * Removes all user wirings. Unwires one kernel wiring if
7112 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7113 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7114 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7115 *
7116 * This routine is called with map locked and leaves map locked.
7117 */
7118 static kern_return_t
7119 vm_map_delete(
7120 vm_map_t map,
7121 vm_map_offset_t start,
7122 vm_map_offset_t end,
7123 int flags,
7124 vm_map_t zap_map)
7125 {
7126 vm_map_entry_t entry, next;
7127 struct vm_map_entry *first_entry, tmp_entry;
7128 vm_map_offset_t s;
7129 vm_object_t object;
7130 boolean_t need_wakeup;
7131 unsigned int last_timestamp = ~0; /* unlikely value */
7132 int interruptible;
7133
7134 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7135 THREAD_ABORTSAFE : THREAD_UNINT;
7136
7137 /*
7138 * All our DMA I/O operations in IOKit are currently done by
7139 * wiring through the map entries of the task requesting the I/O.
7140 * Because of this, we must always wait for kernel wirings
7141 * to go away on the entries before deleting them.
7142 *
7143 * Any caller who wants to actually remove a kernel wiring
7144 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7145 * properly remove one wiring instead of blasting through
7146 * them all.
7147 */
7148 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7149
7150 while(1) {
7151 /*
7152 * Find the start of the region, and clip it
7153 */
7154 if (vm_map_lookup_entry(map, start, &first_entry)) {
7155 entry = first_entry;
7156 if (map == kalloc_map &&
7157 (entry->vme_start != start ||
7158 entry->vme_end != end)) {
7159 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7160 "mismatched entry %p [0x%llx:0x%llx]\n",
7161 map,
7162 (uint64_t)start,
7163 (uint64_t)end,
7164 entry,
7165 (uint64_t)entry->vme_start,
7166 (uint64_t)entry->vme_end);
7167 }
7168 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
7169 start = SUPERPAGE_ROUND_DOWN(start);
7170 continue;
7171 }
7172 if (start == entry->vme_start) {
7173 /*
7174 * No need to clip. We don't want to cause
7175 * any unnecessary unnesting in this case...
7176 */
7177 } else {
7178 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7179 entry->map_aligned &&
7180 !VM_MAP_PAGE_ALIGNED(
7181 start,
7182 VM_MAP_PAGE_MASK(map))) {
7183 /*
7184 * The entry will no longer be
7185 * map-aligned after clipping
7186 * and the caller said it's OK.
7187 */
7188 entry->map_aligned = FALSE;
7189 }
7190 if (map == kalloc_map) {
7191 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7192 " clipping %p at 0x%llx\n",
7193 map,
7194 (uint64_t)start,
7195 (uint64_t)end,
7196 entry,
7197 (uint64_t)start);
7198 }
7199 vm_map_clip_start(map, entry, start);
7200 }
7201
7202 /*
7203 * Fix the lookup hint now, rather than each
7204 * time through the loop.
7205 */
7206 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7207 } else {
7208 if (map->pmap == kernel_pmap &&
7209 map->ref_count != 0) {
7210 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7211 "no map entry at 0x%llx\n",
7212 map,
7213 (uint64_t)start,
7214 (uint64_t)end,
7215 (uint64_t)start);
7216 }
7217 entry = first_entry->vme_next;
7218 }
7219 break;
7220 }
7221 if (entry->superpage_size)
7222 end = SUPERPAGE_ROUND_UP(end);
7223
7224 need_wakeup = FALSE;
7225 /*
7226 * Step through all entries in this region
7227 */
7228 s = entry->vme_start;
7229 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7230 /*
7231 * At this point, we have deleted all the memory entries
7232 * between "start" and "s". We still need to delete
7233 * all memory entries between "s" and "end".
7234 * While we were blocked and the map was unlocked, some
7235 * new memory entries could have been re-allocated between
7236 * "start" and "s" and we don't want to mess with those.
7237 * Some of those entries could even have been re-assembled
7238 * with an entry after "s" (in vm_map_simplify_entry()), so
7239 * we may have to vm_map_clip_start() again.
7240 */
7241
7242 if (entry->vme_start >= s) {
7243 /*
7244 * This entry starts on or after "s"
7245 * so no need to clip its start.
7246 */
7247 } else {
7248 /*
7249 * This entry has been re-assembled by a
7250 * vm_map_simplify_entry(). We need to
7251 * re-clip its start.
7252 */
7253 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7254 entry->map_aligned &&
7255 !VM_MAP_PAGE_ALIGNED(s,
7256 VM_MAP_PAGE_MASK(map))) {
7257 /*
7258 * The entry will no longer be map-aligned
7259 * after clipping and the caller said it's OK.
7260 */
7261 entry->map_aligned = FALSE;
7262 }
7263 if (map == kalloc_map) {
7264 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7265 "clipping %p at 0x%llx\n",
7266 map,
7267 (uint64_t)start,
7268 (uint64_t)end,
7269 entry,
7270 (uint64_t)s);
7271 }
7272 vm_map_clip_start(map, entry, s);
7273 }
7274 if (entry->vme_end <= end) {
7275 /*
7276 * This entry is going away completely, so no need
7277 * to clip and possibly cause an unnecessary unnesting.
7278 */
7279 } else {
7280 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7281 entry->map_aligned &&
7282 !VM_MAP_PAGE_ALIGNED(end,
7283 VM_MAP_PAGE_MASK(map))) {
7284 /*
7285 * The entry will no longer be map-aligned
7286 * after clipping and the caller said it's OK.
7287 */
7288 entry->map_aligned = FALSE;
7289 }
7290 if (map == kalloc_map) {
7291 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7292 "clipping %p at 0x%llx\n",
7293 map,
7294 (uint64_t)start,
7295 (uint64_t)end,
7296 entry,
7297 (uint64_t)end);
7298 }
7299 vm_map_clip_end(map, entry, end);
7300 }
7301
7302 if (entry->permanent) {
7303 if (map->pmap == kernel_pmap) {
7304 panic("%s(%p,0x%llx,0x%llx): "
7305 "attempt to remove permanent "
7306 "VM map entry "
7307 "%p [0x%llx:0x%llx]\n",
7308 __FUNCTION__,
7309 map,
7310 (uint64_t) start,
7311 (uint64_t) end,
7312 entry,
7313 (uint64_t) entry->vme_start,
7314 (uint64_t) entry->vme_end);
7315 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7316 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7317 entry->permanent = FALSE;
7318 } else {
7319 if (!vm_map_executable_immutable_no_log) {
7320 printf("%d[%s] %s(0x%llx,0x%llx): "
7321 "permanent entry [0x%llx:0x%llx] "
7322 "prot 0x%x/0x%x\n",
7323 proc_selfpid(),
7324 (current_task()->bsd_info
7325 ? proc_name_address(current_task()->bsd_info)
7326 : "?"),
7327 __FUNCTION__,
7328 (uint64_t) start,
7329 (uint64_t) end,
7330 (uint64_t)entry->vme_start,
7331 (uint64_t)entry->vme_end,
7332 entry->protection,
7333 entry->max_protection);
7334 }
7335 /*
7336 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7337 */
7338 DTRACE_VM5(vm_map_delete_permanent,
7339 vm_map_offset_t, entry->vme_start,
7340 vm_map_offset_t, entry->vme_end,
7341 vm_prot_t, entry->protection,
7342 vm_prot_t, entry->max_protection,
7343 int, VME_ALIAS(entry));
7344 }
7345 }
7346
7347
7348 if (entry->in_transition) {
7349 wait_result_t wait_result;
7350
7351 /*
7352 * Another thread is wiring/unwiring this entry.
7353 * Let the other thread know we are waiting.
7354 */
7355 assert(s == entry->vme_start);
7356 entry->needs_wakeup = TRUE;
7357
7358 /*
7359 * wake up anybody waiting on entries that we have
7360 * already unwired/deleted.
7361 */
7362 if (need_wakeup) {
7363 vm_map_entry_wakeup(map);
7364 need_wakeup = FALSE;
7365 }
7366
7367 wait_result = vm_map_entry_wait(map, interruptible);
7368
7369 if (interruptible &&
7370 wait_result == THREAD_INTERRUPTED) {
7371 /*
7372 * We do not clear the needs_wakeup flag,
7373 * since we cannot tell if we were the only one.
7374 */
7375 return KERN_ABORTED;
7376 }
7377
7378 /*
7379 * The entry could have been clipped or it
7380 * may not exist anymore. Look it up again.
7381 */
7382 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7383 /*
7384 * User: use the next entry
7385 */
7386 entry = first_entry->vme_next;
7387 s = entry->vme_start;
7388 } else {
7389 entry = first_entry;
7390 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7391 }
7392 last_timestamp = map->timestamp;
7393 continue;
7394 } /* end in_transition */
7395
7396 if (entry->wired_count) {
7397 boolean_t user_wire;
7398
7399 user_wire = entry->user_wired_count > 0;
7400
7401 /*
7402 * Remove a kernel wiring if requested
7403 */
7404 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7405 entry->wired_count--;
7406 }
7407
7408 /*
7409 * Remove all user wirings for proper accounting
7410 */
7411 if (entry->user_wired_count > 0) {
7412 while (entry->user_wired_count)
7413 subtract_wire_counts(map, entry, user_wire);
7414 }
7415
7416 if (entry->wired_count != 0) {
7417 assert(map != kernel_map);
7418 /*
7419 * Cannot continue. Typical case is when
7420 * a user thread has physical io pending on
7421 * on this page. Either wait for the
7422 * kernel wiring to go away or return an
7423 * error.
7424 */
7425 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7426 wait_result_t wait_result;
7427
7428 assert(s == entry->vme_start);
7429 entry->needs_wakeup = TRUE;
7430 wait_result = vm_map_entry_wait(map,
7431 interruptible);
7432
7433 if (interruptible &&
7434 wait_result == THREAD_INTERRUPTED) {
7435 /*
7436 * We do not clear the
7437 * needs_wakeup flag, since we
7438 * cannot tell if we were the
7439 * only one.
7440 */
7441 return KERN_ABORTED;
7442 }
7443
7444 /*
7445 * The entry could have been clipped or
7446 * it may not exist anymore. Look it
7447 * up again.
7448 */
7449 if (!vm_map_lookup_entry(map, s,
7450 &first_entry)) {
7451 assert(map != kernel_map);
7452 /*
7453 * User: use the next entry
7454 */
7455 entry = first_entry->vme_next;
7456 s = entry->vme_start;
7457 } else {
7458 entry = first_entry;
7459 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7460 }
7461 last_timestamp = map->timestamp;
7462 continue;
7463 }
7464 else {
7465 return KERN_FAILURE;
7466 }
7467 }
7468
7469 entry->in_transition = TRUE;
7470 /*
7471 * copy current entry. see comment in vm_map_wire()
7472 */
7473 tmp_entry = *entry;
7474 assert(s == entry->vme_start);
7475
7476 /*
7477 * We can unlock the map now. The in_transition
7478 * state guarentees existance of the entry.
7479 */
7480 vm_map_unlock(map);
7481
7482 if (tmp_entry.is_sub_map) {
7483 vm_map_t sub_map;
7484 vm_map_offset_t sub_start, sub_end;
7485 pmap_t pmap;
7486 vm_map_offset_t pmap_addr;
7487
7488
7489 sub_map = VME_SUBMAP(&tmp_entry);
7490 sub_start = VME_OFFSET(&tmp_entry);
7491 sub_end = sub_start + (tmp_entry.vme_end -
7492 tmp_entry.vme_start);
7493 if (tmp_entry.use_pmap) {
7494 pmap = sub_map->pmap;
7495 pmap_addr = tmp_entry.vme_start;
7496 } else {
7497 pmap = map->pmap;
7498 pmap_addr = tmp_entry.vme_start;
7499 }
7500 (void) vm_map_unwire_nested(sub_map,
7501 sub_start, sub_end,
7502 user_wire,
7503 pmap, pmap_addr);
7504 } else {
7505
7506 if (VME_OBJECT(&tmp_entry) == kernel_object) {
7507 pmap_protect_options(
7508 map->pmap,
7509 tmp_entry.vme_start,
7510 tmp_entry.vme_end,
7511 VM_PROT_NONE,
7512 PMAP_OPTIONS_REMOVE,
7513 NULL);
7514 }
7515 vm_fault_unwire(map, &tmp_entry,
7516 VME_OBJECT(&tmp_entry) == kernel_object,
7517 map->pmap, tmp_entry.vme_start);
7518 }
7519
7520 vm_map_lock(map);
7521
7522 if (last_timestamp+1 != map->timestamp) {
7523 /*
7524 * Find the entry again. It could have
7525 * been clipped after we unlocked the map.
7526 */
7527 if (!vm_map_lookup_entry(map, s, &first_entry)){
7528 assert((map != kernel_map) &&
7529 (!entry->is_sub_map));
7530 first_entry = first_entry->vme_next;
7531 s = first_entry->vme_start;
7532 } else {
7533 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7534 }
7535 } else {
7536 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7537 first_entry = entry;
7538 }
7539
7540 last_timestamp = map->timestamp;
7541
7542 entry = first_entry;
7543 while ((entry != vm_map_to_entry(map)) &&
7544 (entry->vme_start < tmp_entry.vme_end)) {
7545 assert(entry->in_transition);
7546 entry->in_transition = FALSE;
7547 if (entry->needs_wakeup) {
7548 entry->needs_wakeup = FALSE;
7549 need_wakeup = TRUE;
7550 }
7551 entry = entry->vme_next;
7552 }
7553 /*
7554 * We have unwired the entry(s). Go back and
7555 * delete them.
7556 */
7557 entry = first_entry;
7558 continue;
7559 }
7560
7561 /* entry is unwired */
7562 assert(entry->wired_count == 0);
7563 assert(entry->user_wired_count == 0);
7564
7565 assert(s == entry->vme_start);
7566
7567 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7568 /*
7569 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7570 * vm_map_delete(), some map entries might have been
7571 * transferred to a "zap_map", which doesn't have a
7572 * pmap. The original pmap has already been flushed
7573 * in the vm_map_delete() call targeting the original
7574 * map, but when we get to destroying the "zap_map",
7575 * we don't have any pmap to flush, so let's just skip
7576 * all this.
7577 */
7578 } else if (entry->is_sub_map) {
7579 if (entry->use_pmap) {
7580 #ifndef NO_NESTED_PMAP
7581 int pmap_flags;
7582
7583 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7584 /*
7585 * This is the final cleanup of the
7586 * address space being terminated.
7587 * No new mappings are expected and
7588 * we don't really need to unnest the
7589 * shared region (and lose the "global"
7590 * pmap mappings, if applicable).
7591 *
7592 * Tell the pmap layer that we're
7593 * "clean" wrt nesting.
7594 */
7595 pmap_flags = PMAP_UNNEST_CLEAN;
7596 } else {
7597 /*
7598 * We're unmapping part of the nested
7599 * shared region, so we can't keep the
7600 * nested pmap.
7601 */
7602 pmap_flags = 0;
7603 }
7604 pmap_unnest_options(
7605 map->pmap,
7606 (addr64_t)entry->vme_start,
7607 entry->vme_end - entry->vme_start,
7608 pmap_flags);
7609 #endif /* NO_NESTED_PMAP */
7610 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7611 /* clean up parent map/maps */
7612 vm_map_submap_pmap_clean(
7613 map, entry->vme_start,
7614 entry->vme_end,
7615 VME_SUBMAP(entry),
7616 VME_OFFSET(entry));
7617 }
7618 } else {
7619 vm_map_submap_pmap_clean(
7620 map, entry->vme_start, entry->vme_end,
7621 VME_SUBMAP(entry),
7622 VME_OFFSET(entry));
7623 }
7624 } else if (VME_OBJECT(entry) != kernel_object &&
7625 VME_OBJECT(entry) != compressor_object) {
7626 object = VME_OBJECT(entry);
7627 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7628 vm_object_pmap_protect_options(
7629 object, VME_OFFSET(entry),
7630 entry->vme_end - entry->vme_start,
7631 PMAP_NULL,
7632 entry->vme_start,
7633 VM_PROT_NONE,
7634 PMAP_OPTIONS_REMOVE);
7635 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
7636 (map->pmap == kernel_pmap)) {
7637 /* Remove translations associated
7638 * with this range unless the entry
7639 * does not have an object, or
7640 * it's the kernel map or a descendant
7641 * since the platform could potentially
7642 * create "backdoor" mappings invisible
7643 * to the VM. It is expected that
7644 * objectless, non-kernel ranges
7645 * do not have such VM invisible
7646 * translations.
7647 */
7648 pmap_remove_options(map->pmap,
7649 (addr64_t)entry->vme_start,
7650 (addr64_t)entry->vme_end,
7651 PMAP_OPTIONS_REMOVE);
7652 }
7653 }
7654
7655 if (entry->iokit_acct) {
7656 /* alternate accounting */
7657 DTRACE_VM4(vm_map_iokit_unmapped_region,
7658 vm_map_t, map,
7659 vm_map_offset_t, entry->vme_start,
7660 vm_map_offset_t, entry->vme_end,
7661 int, VME_ALIAS(entry));
7662 vm_map_iokit_unmapped_region(map,
7663 (entry->vme_end -
7664 entry->vme_start));
7665 entry->iokit_acct = FALSE;
7666 entry->use_pmap = FALSE;
7667 }
7668
7669 /*
7670 * All pmap mappings for this map entry must have been
7671 * cleared by now.
7672 */
7673 #if DEBUG
7674 assert(vm_map_pmap_is_empty(map,
7675 entry->vme_start,
7676 entry->vme_end));
7677 #endif /* DEBUG */
7678
7679 next = entry->vme_next;
7680
7681 if (map->pmap == kernel_pmap &&
7682 map->ref_count != 0 &&
7683 entry->vme_end < end &&
7684 (next == vm_map_to_entry(map) ||
7685 next->vme_start != entry->vme_end)) {
7686 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7687 "hole after %p at 0x%llx\n",
7688 map,
7689 (uint64_t)start,
7690 (uint64_t)end,
7691 entry,
7692 (uint64_t)entry->vme_end);
7693 }
7694
7695 s = next->vme_start;
7696 last_timestamp = map->timestamp;
7697
7698 if (entry->permanent) {
7699 /*
7700 * A permanent entry can not be removed, so leave it
7701 * in place but remove all access permissions.
7702 */
7703 entry->protection = VM_PROT_NONE;
7704 entry->max_protection = VM_PROT_NONE;
7705 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
7706 zap_map != VM_MAP_NULL) {
7707 vm_map_size_t entry_size;
7708 /*
7709 * The caller wants to save the affected VM map entries
7710 * into the "zap_map". The caller will take care of
7711 * these entries.
7712 */
7713 /* unlink the entry from "map" ... */
7714 vm_map_store_entry_unlink(map, entry);
7715 /* ... and add it to the end of the "zap_map" */
7716 vm_map_store_entry_link(zap_map,
7717 vm_map_last_entry(zap_map),
7718 entry);
7719 entry_size = entry->vme_end - entry->vme_start;
7720 map->size -= entry_size;
7721 zap_map->size += entry_size;
7722 /* we didn't unlock the map, so no timestamp increase */
7723 last_timestamp--;
7724 } else {
7725 vm_map_entry_delete(map, entry);
7726 /* vm_map_entry_delete unlocks the map */
7727 vm_map_lock(map);
7728 }
7729
7730 entry = next;
7731
7732 if(entry == vm_map_to_entry(map)) {
7733 break;
7734 }
7735 if (last_timestamp+1 != map->timestamp) {
7736 /*
7737 * we are responsible for deleting everything
7738 * from the give space, if someone has interfered
7739 * we pick up where we left off, back fills should
7740 * be all right for anyone except map_delete and
7741 * we have to assume that the task has been fully
7742 * disabled before we get here
7743 */
7744 if (!vm_map_lookup_entry(map, s, &entry)){
7745 entry = entry->vme_next;
7746 s = entry->vme_start;
7747 } else {
7748 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7749 }
7750 /*
7751 * others can not only allocate behind us, we can
7752 * also see coalesce while we don't have the map lock
7753 */
7754 if(entry == vm_map_to_entry(map)) {
7755 break;
7756 }
7757 }
7758 last_timestamp = map->timestamp;
7759 }
7760
7761 if (map->wait_for_space)
7762 thread_wakeup((event_t) map);
7763 /*
7764 * wake up anybody waiting on entries that we have already deleted.
7765 */
7766 if (need_wakeup)
7767 vm_map_entry_wakeup(map);
7768
7769 return KERN_SUCCESS;
7770 }
7771
7772 /*
7773 * vm_map_remove:
7774 *
7775 * Remove the given address range from the target map.
7776 * This is the exported form of vm_map_delete.
7777 */
7778 kern_return_t
7779 vm_map_remove(
7780 vm_map_t map,
7781 vm_map_offset_t start,
7782 vm_map_offset_t end,
7783 boolean_t flags)
7784 {
7785 kern_return_t result;
7786
7787 vm_map_lock(map);
7788 VM_MAP_RANGE_CHECK(map, start, end);
7789 /*
7790 * For the zone_map, the kernel controls the allocation/freeing of memory.
7791 * Any free to the zone_map should be within the bounds of the map and
7792 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
7793 * free to the zone_map into a no-op, there is a problem and we should
7794 * panic.
7795 */
7796 if ((map == zone_map) && (start == end))
7797 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
7798 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7799 vm_map_unlock(map);
7800
7801 return(result);
7802 }
7803
7804 /*
7805 * vm_map_remove_locked:
7806 *
7807 * Remove the given address range from the target locked map.
7808 * This is the exported form of vm_map_delete.
7809 */
7810 kern_return_t
7811 vm_map_remove_locked(
7812 vm_map_t map,
7813 vm_map_offset_t start,
7814 vm_map_offset_t end,
7815 boolean_t flags)
7816 {
7817 kern_return_t result;
7818
7819 VM_MAP_RANGE_CHECK(map, start, end);
7820 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7821 return(result);
7822 }
7823
7824
7825 /*
7826 * Routine: vm_map_copy_discard
7827 *
7828 * Description:
7829 * Dispose of a map copy object (returned by
7830 * vm_map_copyin).
7831 */
7832 void
7833 vm_map_copy_discard(
7834 vm_map_copy_t copy)
7835 {
7836 if (copy == VM_MAP_COPY_NULL)
7837 return;
7838
7839 switch (copy->type) {
7840 case VM_MAP_COPY_ENTRY_LIST:
7841 while (vm_map_copy_first_entry(copy) !=
7842 vm_map_copy_to_entry(copy)) {
7843 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
7844
7845 vm_map_copy_entry_unlink(copy, entry);
7846 if (entry->is_sub_map) {
7847 vm_map_deallocate(VME_SUBMAP(entry));
7848 } else {
7849 vm_object_deallocate(VME_OBJECT(entry));
7850 }
7851 vm_map_copy_entry_dispose(copy, entry);
7852 }
7853 break;
7854 case VM_MAP_COPY_OBJECT:
7855 vm_object_deallocate(copy->cpy_object);
7856 break;
7857 case VM_MAP_COPY_KERNEL_BUFFER:
7858
7859 /*
7860 * The vm_map_copy_t and possibly the data buffer were
7861 * allocated by a single call to kalloc(), i.e. the
7862 * vm_map_copy_t was not allocated out of the zone.
7863 */
7864 if (copy->size > msg_ool_size_small || copy->offset)
7865 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
7866 (long long)copy->size, (long long)copy->offset);
7867 kfree(copy, copy->size + cpy_kdata_hdr_sz);
7868 return;
7869 }
7870 zfree(vm_map_copy_zone, copy);
7871 }
7872
7873 /*
7874 * Routine: vm_map_copy_copy
7875 *
7876 * Description:
7877 * Move the information in a map copy object to
7878 * a new map copy object, leaving the old one
7879 * empty.
7880 *
7881 * This is used by kernel routines that need
7882 * to look at out-of-line data (in copyin form)
7883 * before deciding whether to return SUCCESS.
7884 * If the routine returns FAILURE, the original
7885 * copy object will be deallocated; therefore,
7886 * these routines must make a copy of the copy
7887 * object and leave the original empty so that
7888 * deallocation will not fail.
7889 */
7890 vm_map_copy_t
7891 vm_map_copy_copy(
7892 vm_map_copy_t copy)
7893 {
7894 vm_map_copy_t new_copy;
7895
7896 if (copy == VM_MAP_COPY_NULL)
7897 return VM_MAP_COPY_NULL;
7898
7899 /*
7900 * Allocate a new copy object, and copy the information
7901 * from the old one into it.
7902 */
7903
7904 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7905 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7906 *new_copy = *copy;
7907
7908 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
7909 /*
7910 * The links in the entry chain must be
7911 * changed to point to the new copy object.
7912 */
7913 vm_map_copy_first_entry(copy)->vme_prev
7914 = vm_map_copy_to_entry(new_copy);
7915 vm_map_copy_last_entry(copy)->vme_next
7916 = vm_map_copy_to_entry(new_copy);
7917 }
7918
7919 /*
7920 * Change the old copy object into one that contains
7921 * nothing to be deallocated.
7922 */
7923 copy->type = VM_MAP_COPY_OBJECT;
7924 copy->cpy_object = VM_OBJECT_NULL;
7925
7926 /*
7927 * Return the new object.
7928 */
7929 return new_copy;
7930 }
7931
7932 static kern_return_t
7933 vm_map_overwrite_submap_recurse(
7934 vm_map_t dst_map,
7935 vm_map_offset_t dst_addr,
7936 vm_map_size_t dst_size)
7937 {
7938 vm_map_offset_t dst_end;
7939 vm_map_entry_t tmp_entry;
7940 vm_map_entry_t entry;
7941 kern_return_t result;
7942 boolean_t encountered_sub_map = FALSE;
7943
7944
7945
7946 /*
7947 * Verify that the destination is all writeable
7948 * initially. We have to trunc the destination
7949 * address and round the copy size or we'll end up
7950 * splitting entries in strange ways.
7951 */
7952
7953 dst_end = vm_map_round_page(dst_addr + dst_size,
7954 VM_MAP_PAGE_MASK(dst_map));
7955 vm_map_lock(dst_map);
7956
7957 start_pass_1:
7958 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7959 vm_map_unlock(dst_map);
7960 return(KERN_INVALID_ADDRESS);
7961 }
7962
7963 vm_map_clip_start(dst_map,
7964 tmp_entry,
7965 vm_map_trunc_page(dst_addr,
7966 VM_MAP_PAGE_MASK(dst_map)));
7967 if (tmp_entry->is_sub_map) {
7968 /* clipping did unnest if needed */
7969 assert(!tmp_entry->use_pmap);
7970 }
7971
7972 for (entry = tmp_entry;;) {
7973 vm_map_entry_t next;
7974
7975 next = entry->vme_next;
7976 while(entry->is_sub_map) {
7977 vm_map_offset_t sub_start;
7978 vm_map_offset_t sub_end;
7979 vm_map_offset_t local_end;
7980
7981 if (entry->in_transition) {
7982 /*
7983 * Say that we are waiting, and wait for entry.
7984 */
7985 entry->needs_wakeup = TRUE;
7986 vm_map_entry_wait(dst_map, THREAD_UNINT);
7987
7988 goto start_pass_1;
7989 }
7990
7991 encountered_sub_map = TRUE;
7992 sub_start = VME_OFFSET(entry);
7993
7994 if(entry->vme_end < dst_end)
7995 sub_end = entry->vme_end;
7996 else
7997 sub_end = dst_end;
7998 sub_end -= entry->vme_start;
7999 sub_end += VME_OFFSET(entry);
8000 local_end = entry->vme_end;
8001 vm_map_unlock(dst_map);
8002
8003 result = vm_map_overwrite_submap_recurse(
8004 VME_SUBMAP(entry),
8005 sub_start,
8006 sub_end - sub_start);
8007
8008 if(result != KERN_SUCCESS)
8009 return result;
8010 if (dst_end <= entry->vme_end)
8011 return KERN_SUCCESS;
8012 vm_map_lock(dst_map);
8013 if(!vm_map_lookup_entry(dst_map, local_end,
8014 &tmp_entry)) {
8015 vm_map_unlock(dst_map);
8016 return(KERN_INVALID_ADDRESS);
8017 }
8018 entry = tmp_entry;
8019 next = entry->vme_next;
8020 }
8021
8022 if ( ! (entry->protection & VM_PROT_WRITE)) {
8023 vm_map_unlock(dst_map);
8024 return(KERN_PROTECTION_FAILURE);
8025 }
8026
8027 /*
8028 * If the entry is in transition, we must wait
8029 * for it to exit that state. Anything could happen
8030 * when we unlock the map, so start over.
8031 */
8032 if (entry->in_transition) {
8033
8034 /*
8035 * Say that we are waiting, and wait for entry.
8036 */
8037 entry->needs_wakeup = TRUE;
8038 vm_map_entry_wait(dst_map, THREAD_UNINT);
8039
8040 goto start_pass_1;
8041 }
8042
8043 /*
8044 * our range is contained completely within this map entry
8045 */
8046 if (dst_end <= entry->vme_end) {
8047 vm_map_unlock(dst_map);
8048 return KERN_SUCCESS;
8049 }
8050 /*
8051 * check that range specified is contiguous region
8052 */
8053 if ((next == vm_map_to_entry(dst_map)) ||
8054 (next->vme_start != entry->vme_end)) {
8055 vm_map_unlock(dst_map);
8056 return(KERN_INVALID_ADDRESS);
8057 }
8058
8059 /*
8060 * Check for permanent objects in the destination.
8061 */
8062 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8063 ((!VME_OBJECT(entry)->internal) ||
8064 (VME_OBJECT(entry)->true_share))) {
8065 if(encountered_sub_map) {
8066 vm_map_unlock(dst_map);
8067 return(KERN_FAILURE);
8068 }
8069 }
8070
8071
8072 entry = next;
8073 }/* for */
8074 vm_map_unlock(dst_map);
8075 return(KERN_SUCCESS);
8076 }
8077
8078 /*
8079 * Routine: vm_map_copy_overwrite
8080 *
8081 * Description:
8082 * Copy the memory described by the map copy
8083 * object (copy; returned by vm_map_copyin) onto
8084 * the specified destination region (dst_map, dst_addr).
8085 * The destination must be writeable.
8086 *
8087 * Unlike vm_map_copyout, this routine actually
8088 * writes over previously-mapped memory. If the
8089 * previous mapping was to a permanent (user-supplied)
8090 * memory object, it is preserved.
8091 *
8092 * The attributes (protection and inheritance) of the
8093 * destination region are preserved.
8094 *
8095 * If successful, consumes the copy object.
8096 * Otherwise, the caller is responsible for it.
8097 *
8098 * Implementation notes:
8099 * To overwrite aligned temporary virtual memory, it is
8100 * sufficient to remove the previous mapping and insert
8101 * the new copy. This replacement is done either on
8102 * the whole region (if no permanent virtual memory
8103 * objects are embedded in the destination region) or
8104 * in individual map entries.
8105 *
8106 * To overwrite permanent virtual memory , it is necessary
8107 * to copy each page, as the external memory management
8108 * interface currently does not provide any optimizations.
8109 *
8110 * Unaligned memory also has to be copied. It is possible
8111 * to use 'vm_trickery' to copy the aligned data. This is
8112 * not done but not hard to implement.
8113 *
8114 * Once a page of permanent memory has been overwritten,
8115 * it is impossible to interrupt this function; otherwise,
8116 * the call would be neither atomic nor location-independent.
8117 * The kernel-state portion of a user thread must be
8118 * interruptible.
8119 *
8120 * It may be expensive to forward all requests that might
8121 * overwrite permanent memory (vm_write, vm_copy) to
8122 * uninterruptible kernel threads. This routine may be
8123 * called by interruptible threads; however, success is
8124 * not guaranteed -- if the request cannot be performed
8125 * atomically and interruptibly, an error indication is
8126 * returned.
8127 */
8128
8129 static kern_return_t
8130 vm_map_copy_overwrite_nested(
8131 vm_map_t dst_map,
8132 vm_map_address_t dst_addr,
8133 vm_map_copy_t copy,
8134 boolean_t interruptible,
8135 pmap_t pmap,
8136 boolean_t discard_on_success)
8137 {
8138 vm_map_offset_t dst_end;
8139 vm_map_entry_t tmp_entry;
8140 vm_map_entry_t entry;
8141 kern_return_t kr;
8142 boolean_t aligned = TRUE;
8143 boolean_t contains_permanent_objects = FALSE;
8144 boolean_t encountered_sub_map = FALSE;
8145 vm_map_offset_t base_addr;
8146 vm_map_size_t copy_size;
8147 vm_map_size_t total_size;
8148
8149
8150 /*
8151 * Check for null copy object.
8152 */
8153
8154 if (copy == VM_MAP_COPY_NULL)
8155 return(KERN_SUCCESS);
8156
8157 /*
8158 * Check for special kernel buffer allocated
8159 * by new_ipc_kmsg_copyin.
8160 */
8161
8162 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8163 return(vm_map_copyout_kernel_buffer(
8164 dst_map, &dst_addr,
8165 copy, copy->size, TRUE, discard_on_success));
8166 }
8167
8168 /*
8169 * Only works for entry lists at the moment. Will
8170 * support page lists later.
8171 */
8172
8173 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8174
8175 if (copy->size == 0) {
8176 if (discard_on_success)
8177 vm_map_copy_discard(copy);
8178 return(KERN_SUCCESS);
8179 }
8180
8181 /*
8182 * Verify that the destination is all writeable
8183 * initially. We have to trunc the destination
8184 * address and round the copy size or we'll end up
8185 * splitting entries in strange ways.
8186 */
8187
8188 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8189 VM_MAP_PAGE_MASK(dst_map)) ||
8190 !VM_MAP_PAGE_ALIGNED(copy->offset,
8191 VM_MAP_PAGE_MASK(dst_map)) ||
8192 !VM_MAP_PAGE_ALIGNED(dst_addr,
8193 VM_MAP_PAGE_MASK(dst_map)))
8194 {
8195 aligned = FALSE;
8196 dst_end = vm_map_round_page(dst_addr + copy->size,
8197 VM_MAP_PAGE_MASK(dst_map));
8198 } else {
8199 dst_end = dst_addr + copy->size;
8200 }
8201
8202 vm_map_lock(dst_map);
8203
8204 /* LP64todo - remove this check when vm_map_commpage64()
8205 * no longer has to stuff in a map_entry for the commpage
8206 * above the map's max_offset.
8207 */
8208 if (dst_addr >= dst_map->max_offset) {
8209 vm_map_unlock(dst_map);
8210 return(KERN_INVALID_ADDRESS);
8211 }
8212
8213 start_pass_1:
8214 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8215 vm_map_unlock(dst_map);
8216 return(KERN_INVALID_ADDRESS);
8217 }
8218 vm_map_clip_start(dst_map,
8219 tmp_entry,
8220 vm_map_trunc_page(dst_addr,
8221 VM_MAP_PAGE_MASK(dst_map)));
8222 for (entry = tmp_entry;;) {
8223 vm_map_entry_t next = entry->vme_next;
8224
8225 while(entry->is_sub_map) {
8226 vm_map_offset_t sub_start;
8227 vm_map_offset_t sub_end;
8228 vm_map_offset_t local_end;
8229
8230 if (entry->in_transition) {
8231
8232 /*
8233 * Say that we are waiting, and wait for entry.
8234 */
8235 entry->needs_wakeup = TRUE;
8236 vm_map_entry_wait(dst_map, THREAD_UNINT);
8237
8238 goto start_pass_1;
8239 }
8240
8241 local_end = entry->vme_end;
8242 if (!(entry->needs_copy)) {
8243 /* if needs_copy we are a COW submap */
8244 /* in such a case we just replace so */
8245 /* there is no need for the follow- */
8246 /* ing check. */
8247 encountered_sub_map = TRUE;
8248 sub_start = VME_OFFSET(entry);
8249
8250 if(entry->vme_end < dst_end)
8251 sub_end = entry->vme_end;
8252 else
8253 sub_end = dst_end;
8254 sub_end -= entry->vme_start;
8255 sub_end += VME_OFFSET(entry);
8256 vm_map_unlock(dst_map);
8257
8258 kr = vm_map_overwrite_submap_recurse(
8259 VME_SUBMAP(entry),
8260 sub_start,
8261 sub_end - sub_start);
8262 if(kr != KERN_SUCCESS)
8263 return kr;
8264 vm_map_lock(dst_map);
8265 }
8266
8267 if (dst_end <= entry->vme_end)
8268 goto start_overwrite;
8269 if(!vm_map_lookup_entry(dst_map, local_end,
8270 &entry)) {
8271 vm_map_unlock(dst_map);
8272 return(KERN_INVALID_ADDRESS);
8273 }
8274 next = entry->vme_next;
8275 }
8276
8277 if ( ! (entry->protection & VM_PROT_WRITE)) {
8278 vm_map_unlock(dst_map);
8279 return(KERN_PROTECTION_FAILURE);
8280 }
8281
8282 /*
8283 * If the entry is in transition, we must wait
8284 * for it to exit that state. Anything could happen
8285 * when we unlock the map, so start over.
8286 */
8287 if (entry->in_transition) {
8288
8289 /*
8290 * Say that we are waiting, and wait for entry.
8291 */
8292 entry->needs_wakeup = TRUE;
8293 vm_map_entry_wait(dst_map, THREAD_UNINT);
8294
8295 goto start_pass_1;
8296 }
8297
8298 /*
8299 * our range is contained completely within this map entry
8300 */
8301 if (dst_end <= entry->vme_end)
8302 break;
8303 /*
8304 * check that range specified is contiguous region
8305 */
8306 if ((next == vm_map_to_entry(dst_map)) ||
8307 (next->vme_start != entry->vme_end)) {
8308 vm_map_unlock(dst_map);
8309 return(KERN_INVALID_ADDRESS);
8310 }
8311
8312
8313 /*
8314 * Check for permanent objects in the destination.
8315 */
8316 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8317 ((!VME_OBJECT(entry)->internal) ||
8318 (VME_OBJECT(entry)->true_share))) {
8319 contains_permanent_objects = TRUE;
8320 }
8321
8322 entry = next;
8323 }/* for */
8324
8325 start_overwrite:
8326 /*
8327 * If there are permanent objects in the destination, then
8328 * the copy cannot be interrupted.
8329 */
8330
8331 if (interruptible && contains_permanent_objects) {
8332 vm_map_unlock(dst_map);
8333 return(KERN_FAILURE); /* XXX */
8334 }
8335
8336 /*
8337 *
8338 * Make a second pass, overwriting the data
8339 * At the beginning of each loop iteration,
8340 * the next entry to be overwritten is "tmp_entry"
8341 * (initially, the value returned from the lookup above),
8342 * and the starting address expected in that entry
8343 * is "start".
8344 */
8345
8346 total_size = copy->size;
8347 if(encountered_sub_map) {
8348 copy_size = 0;
8349 /* re-calculate tmp_entry since we've had the map */
8350 /* unlocked */
8351 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8352 vm_map_unlock(dst_map);
8353 return(KERN_INVALID_ADDRESS);
8354 }
8355 } else {
8356 copy_size = copy->size;
8357 }
8358
8359 base_addr = dst_addr;
8360 while(TRUE) {
8361 /* deconstruct the copy object and do in parts */
8362 /* only in sub_map, interruptable case */
8363 vm_map_entry_t copy_entry;
8364 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8365 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8366 int nentries;
8367 int remaining_entries = 0;
8368 vm_map_offset_t new_offset = 0;
8369
8370 for (entry = tmp_entry; copy_size == 0;) {
8371 vm_map_entry_t next;
8372
8373 next = entry->vme_next;
8374
8375 /* tmp_entry and base address are moved along */
8376 /* each time we encounter a sub-map. Otherwise */
8377 /* entry can outpase tmp_entry, and the copy_size */
8378 /* may reflect the distance between them */
8379 /* if the current entry is found to be in transition */
8380 /* we will start over at the beginning or the last */
8381 /* encounter of a submap as dictated by base_addr */
8382 /* we will zero copy_size accordingly. */
8383 if (entry->in_transition) {
8384 /*
8385 * Say that we are waiting, and wait for entry.
8386 */
8387 entry->needs_wakeup = TRUE;
8388 vm_map_entry_wait(dst_map, THREAD_UNINT);
8389
8390 if(!vm_map_lookup_entry(dst_map, base_addr,
8391 &tmp_entry)) {
8392 vm_map_unlock(dst_map);
8393 return(KERN_INVALID_ADDRESS);
8394 }
8395 copy_size = 0;
8396 entry = tmp_entry;
8397 continue;
8398 }
8399 if (entry->is_sub_map) {
8400 vm_map_offset_t sub_start;
8401 vm_map_offset_t sub_end;
8402 vm_map_offset_t local_end;
8403
8404 if (entry->needs_copy) {
8405 /* if this is a COW submap */
8406 /* just back the range with a */
8407 /* anonymous entry */
8408 if(entry->vme_end < dst_end)
8409 sub_end = entry->vme_end;
8410 else
8411 sub_end = dst_end;
8412 if(entry->vme_start < base_addr)
8413 sub_start = base_addr;
8414 else
8415 sub_start = entry->vme_start;
8416 vm_map_clip_end(
8417 dst_map, entry, sub_end);
8418 vm_map_clip_start(
8419 dst_map, entry, sub_start);
8420 assert(!entry->use_pmap);
8421 assert(!entry->iokit_acct);
8422 entry->use_pmap = TRUE;
8423 entry->is_sub_map = FALSE;
8424 vm_map_deallocate(
8425 VME_SUBMAP(entry));
8426 VME_OBJECT_SET(entry, NULL);
8427 VME_OFFSET_SET(entry, 0);
8428 entry->is_shared = FALSE;
8429 entry->needs_copy = FALSE;
8430 entry->protection = VM_PROT_DEFAULT;
8431 entry->max_protection = VM_PROT_ALL;
8432 entry->wired_count = 0;
8433 entry->user_wired_count = 0;
8434 if(entry->inheritance
8435 == VM_INHERIT_SHARE)
8436 entry->inheritance = VM_INHERIT_COPY;
8437 continue;
8438 }
8439 /* first take care of any non-sub_map */
8440 /* entries to send */
8441 if(base_addr < entry->vme_start) {
8442 /* stuff to send */
8443 copy_size =
8444 entry->vme_start - base_addr;
8445 break;
8446 }
8447 sub_start = VME_OFFSET(entry);
8448
8449 if(entry->vme_end < dst_end)
8450 sub_end = entry->vme_end;
8451 else
8452 sub_end = dst_end;
8453 sub_end -= entry->vme_start;
8454 sub_end += VME_OFFSET(entry);
8455 local_end = entry->vme_end;
8456 vm_map_unlock(dst_map);
8457 copy_size = sub_end - sub_start;
8458
8459 /* adjust the copy object */
8460 if (total_size > copy_size) {
8461 vm_map_size_t local_size = 0;
8462 vm_map_size_t entry_size;
8463
8464 nentries = 1;
8465 new_offset = copy->offset;
8466 copy_entry = vm_map_copy_first_entry(copy);
8467 while(copy_entry !=
8468 vm_map_copy_to_entry(copy)){
8469 entry_size = copy_entry->vme_end -
8470 copy_entry->vme_start;
8471 if((local_size < copy_size) &&
8472 ((local_size + entry_size)
8473 >= copy_size)) {
8474 vm_map_copy_clip_end(copy,
8475 copy_entry,
8476 copy_entry->vme_start +
8477 (copy_size - local_size));
8478 entry_size = copy_entry->vme_end -
8479 copy_entry->vme_start;
8480 local_size += entry_size;
8481 new_offset += entry_size;
8482 }
8483 if(local_size >= copy_size) {
8484 next_copy = copy_entry->vme_next;
8485 copy_entry->vme_next =
8486 vm_map_copy_to_entry(copy);
8487 previous_prev =
8488 copy->cpy_hdr.links.prev;
8489 copy->cpy_hdr.links.prev = copy_entry;
8490 copy->size = copy_size;
8491 remaining_entries =
8492 copy->cpy_hdr.nentries;
8493 remaining_entries -= nentries;
8494 copy->cpy_hdr.nentries = nentries;
8495 break;
8496 } else {
8497 local_size += entry_size;
8498 new_offset += entry_size;
8499 nentries++;
8500 }
8501 copy_entry = copy_entry->vme_next;
8502 }
8503 }
8504
8505 if((entry->use_pmap) && (pmap == NULL)) {
8506 kr = vm_map_copy_overwrite_nested(
8507 VME_SUBMAP(entry),
8508 sub_start,
8509 copy,
8510 interruptible,
8511 VME_SUBMAP(entry)->pmap,
8512 TRUE);
8513 } else if (pmap != NULL) {
8514 kr = vm_map_copy_overwrite_nested(
8515 VME_SUBMAP(entry),
8516 sub_start,
8517 copy,
8518 interruptible, pmap,
8519 TRUE);
8520 } else {
8521 kr = vm_map_copy_overwrite_nested(
8522 VME_SUBMAP(entry),
8523 sub_start,
8524 copy,
8525 interruptible,
8526 dst_map->pmap,
8527 TRUE);
8528 }
8529 if(kr != KERN_SUCCESS) {
8530 if(next_copy != NULL) {
8531 copy->cpy_hdr.nentries +=
8532 remaining_entries;
8533 copy->cpy_hdr.links.prev->vme_next =
8534 next_copy;
8535 copy->cpy_hdr.links.prev
8536 = previous_prev;
8537 copy->size = total_size;
8538 }
8539 return kr;
8540 }
8541 if (dst_end <= local_end) {
8542 return(KERN_SUCCESS);
8543 }
8544 /* otherwise copy no longer exists, it was */
8545 /* destroyed after successful copy_overwrite */
8546 copy = (vm_map_copy_t)
8547 zalloc(vm_map_copy_zone);
8548 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8549 vm_map_copy_first_entry(copy) =
8550 vm_map_copy_last_entry(copy) =
8551 vm_map_copy_to_entry(copy);
8552 copy->type = VM_MAP_COPY_ENTRY_LIST;
8553 copy->offset = new_offset;
8554
8555 /*
8556 * XXX FBDP
8557 * this does not seem to deal with
8558 * the VM map store (R&B tree)
8559 */
8560
8561 total_size -= copy_size;
8562 copy_size = 0;
8563 /* put back remainder of copy in container */
8564 if(next_copy != NULL) {
8565 copy->cpy_hdr.nentries = remaining_entries;
8566 copy->cpy_hdr.links.next = next_copy;
8567 copy->cpy_hdr.links.prev = previous_prev;
8568 copy->size = total_size;
8569 next_copy->vme_prev =
8570 vm_map_copy_to_entry(copy);
8571 next_copy = NULL;
8572 }
8573 base_addr = local_end;
8574 vm_map_lock(dst_map);
8575 if(!vm_map_lookup_entry(dst_map,
8576 local_end, &tmp_entry)) {
8577 vm_map_unlock(dst_map);
8578 return(KERN_INVALID_ADDRESS);
8579 }
8580 entry = tmp_entry;
8581 continue;
8582 }
8583 if (dst_end <= entry->vme_end) {
8584 copy_size = dst_end - base_addr;
8585 break;
8586 }
8587
8588 if ((next == vm_map_to_entry(dst_map)) ||
8589 (next->vme_start != entry->vme_end)) {
8590 vm_map_unlock(dst_map);
8591 return(KERN_INVALID_ADDRESS);
8592 }
8593
8594 entry = next;
8595 }/* for */
8596
8597 next_copy = NULL;
8598 nentries = 1;
8599
8600 /* adjust the copy object */
8601 if (total_size > copy_size) {
8602 vm_map_size_t local_size = 0;
8603 vm_map_size_t entry_size;
8604
8605 new_offset = copy->offset;
8606 copy_entry = vm_map_copy_first_entry(copy);
8607 while(copy_entry != vm_map_copy_to_entry(copy)) {
8608 entry_size = copy_entry->vme_end -
8609 copy_entry->vme_start;
8610 if((local_size < copy_size) &&
8611 ((local_size + entry_size)
8612 >= copy_size)) {
8613 vm_map_copy_clip_end(copy, copy_entry,
8614 copy_entry->vme_start +
8615 (copy_size - local_size));
8616 entry_size = copy_entry->vme_end -
8617 copy_entry->vme_start;
8618 local_size += entry_size;
8619 new_offset += entry_size;
8620 }
8621 if(local_size >= copy_size) {
8622 next_copy = copy_entry->vme_next;
8623 copy_entry->vme_next =
8624 vm_map_copy_to_entry(copy);
8625 previous_prev =
8626 copy->cpy_hdr.links.prev;
8627 copy->cpy_hdr.links.prev = copy_entry;
8628 copy->size = copy_size;
8629 remaining_entries =
8630 copy->cpy_hdr.nentries;
8631 remaining_entries -= nentries;
8632 copy->cpy_hdr.nentries = nentries;
8633 break;
8634 } else {
8635 local_size += entry_size;
8636 new_offset += entry_size;
8637 nentries++;
8638 }
8639 copy_entry = copy_entry->vme_next;
8640 }
8641 }
8642
8643 if (aligned) {
8644 pmap_t local_pmap;
8645
8646 if(pmap)
8647 local_pmap = pmap;
8648 else
8649 local_pmap = dst_map->pmap;
8650
8651 if ((kr = vm_map_copy_overwrite_aligned(
8652 dst_map, tmp_entry, copy,
8653 base_addr, local_pmap)) != KERN_SUCCESS) {
8654 if(next_copy != NULL) {
8655 copy->cpy_hdr.nentries +=
8656 remaining_entries;
8657 copy->cpy_hdr.links.prev->vme_next =
8658 next_copy;
8659 copy->cpy_hdr.links.prev =
8660 previous_prev;
8661 copy->size += copy_size;
8662 }
8663 return kr;
8664 }
8665 vm_map_unlock(dst_map);
8666 } else {
8667 /*
8668 * Performance gain:
8669 *
8670 * if the copy and dst address are misaligned but the same
8671 * offset within the page we can copy_not_aligned the
8672 * misaligned parts and copy aligned the rest. If they are
8673 * aligned but len is unaligned we simply need to copy
8674 * the end bit unaligned. We'll need to split the misaligned
8675 * bits of the region in this case !
8676 */
8677 /* ALWAYS UNLOCKS THE dst_map MAP */
8678 kr = vm_map_copy_overwrite_unaligned(
8679 dst_map,
8680 tmp_entry,
8681 copy,
8682 base_addr,
8683 discard_on_success);
8684 if (kr != KERN_SUCCESS) {
8685 if(next_copy != NULL) {
8686 copy->cpy_hdr.nentries +=
8687 remaining_entries;
8688 copy->cpy_hdr.links.prev->vme_next =
8689 next_copy;
8690 copy->cpy_hdr.links.prev =
8691 previous_prev;
8692 copy->size += copy_size;
8693 }
8694 return kr;
8695 }
8696 }
8697 total_size -= copy_size;
8698 if(total_size == 0)
8699 break;
8700 base_addr += copy_size;
8701 copy_size = 0;
8702 copy->offset = new_offset;
8703 if(next_copy != NULL) {
8704 copy->cpy_hdr.nentries = remaining_entries;
8705 copy->cpy_hdr.links.next = next_copy;
8706 copy->cpy_hdr.links.prev = previous_prev;
8707 next_copy->vme_prev = vm_map_copy_to_entry(copy);
8708 copy->size = total_size;
8709 }
8710 vm_map_lock(dst_map);
8711 while(TRUE) {
8712 if (!vm_map_lookup_entry(dst_map,
8713 base_addr, &tmp_entry)) {
8714 vm_map_unlock(dst_map);
8715 return(KERN_INVALID_ADDRESS);
8716 }
8717 if (tmp_entry->in_transition) {
8718 entry->needs_wakeup = TRUE;
8719 vm_map_entry_wait(dst_map, THREAD_UNINT);
8720 } else {
8721 break;
8722 }
8723 }
8724 vm_map_clip_start(dst_map,
8725 tmp_entry,
8726 vm_map_trunc_page(base_addr,
8727 VM_MAP_PAGE_MASK(dst_map)));
8728
8729 entry = tmp_entry;
8730 } /* while */
8731
8732 /*
8733 * Throw away the vm_map_copy object
8734 */
8735 if (discard_on_success)
8736 vm_map_copy_discard(copy);
8737
8738 return(KERN_SUCCESS);
8739 }/* vm_map_copy_overwrite */
8740
8741 kern_return_t
8742 vm_map_copy_overwrite(
8743 vm_map_t dst_map,
8744 vm_map_offset_t dst_addr,
8745 vm_map_copy_t copy,
8746 boolean_t interruptible)
8747 {
8748 vm_map_size_t head_size, tail_size;
8749 vm_map_copy_t head_copy, tail_copy;
8750 vm_map_offset_t head_addr, tail_addr;
8751 vm_map_entry_t entry;
8752 kern_return_t kr;
8753 vm_map_offset_t effective_page_mask, effective_page_size;
8754
8755 head_size = 0;
8756 tail_size = 0;
8757 head_copy = NULL;
8758 tail_copy = NULL;
8759 head_addr = 0;
8760 tail_addr = 0;
8761
8762 if (interruptible ||
8763 copy == VM_MAP_COPY_NULL ||
8764 copy->type != VM_MAP_COPY_ENTRY_LIST) {
8765 /*
8766 * We can't split the "copy" map if we're interruptible
8767 * or if we don't have a "copy" map...
8768 */
8769 blunt_copy:
8770 return vm_map_copy_overwrite_nested(dst_map,
8771 dst_addr,
8772 copy,
8773 interruptible,
8774 (pmap_t) NULL,
8775 TRUE);
8776 }
8777
8778 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
8779 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
8780 effective_page_mask);
8781 effective_page_size = effective_page_mask + 1;
8782
8783 if (copy->size < 3 * effective_page_size) {
8784 /*
8785 * Too small to bother with optimizing...
8786 */
8787 goto blunt_copy;
8788 }
8789
8790 if ((dst_addr & effective_page_mask) !=
8791 (copy->offset & effective_page_mask)) {
8792 /*
8793 * Incompatible mis-alignment of source and destination...
8794 */
8795 goto blunt_copy;
8796 }
8797
8798 /*
8799 * Proper alignment or identical mis-alignment at the beginning.
8800 * Let's try and do a small unaligned copy first (if needed)
8801 * and then an aligned copy for the rest.
8802 */
8803 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
8804 head_addr = dst_addr;
8805 head_size = (effective_page_size -
8806 (copy->offset & effective_page_mask));
8807 head_size = MIN(head_size, copy->size);
8808 }
8809 if (!vm_map_page_aligned(copy->offset + copy->size,
8810 effective_page_mask)) {
8811 /*
8812 * Mis-alignment at the end.
8813 * Do an aligned copy up to the last page and
8814 * then an unaligned copy for the remaining bytes.
8815 */
8816 tail_size = ((copy->offset + copy->size) &
8817 effective_page_mask);
8818 tail_size = MIN(tail_size, copy->size);
8819 tail_addr = dst_addr + copy->size - tail_size;
8820 assert(tail_addr >= head_addr + head_size);
8821 }
8822 assert(head_size + tail_size <= copy->size);
8823
8824 if (head_size + tail_size == copy->size) {
8825 /*
8826 * It's all unaligned, no optimization possible...
8827 */
8828 goto blunt_copy;
8829 }
8830
8831 /*
8832 * Can't optimize if there are any submaps in the
8833 * destination due to the way we free the "copy" map
8834 * progressively in vm_map_copy_overwrite_nested()
8835 * in that case.
8836 */
8837 vm_map_lock_read(dst_map);
8838 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
8839 vm_map_unlock_read(dst_map);
8840 goto blunt_copy;
8841 }
8842 for (;
8843 (entry != vm_map_copy_to_entry(copy) &&
8844 entry->vme_start < dst_addr + copy->size);
8845 entry = entry->vme_next) {
8846 if (entry->is_sub_map) {
8847 vm_map_unlock_read(dst_map);
8848 goto blunt_copy;
8849 }
8850 }
8851 vm_map_unlock_read(dst_map);
8852
8853 if (head_size) {
8854 /*
8855 * Unaligned copy of the first "head_size" bytes, to reach
8856 * a page boundary.
8857 */
8858
8859 /*
8860 * Extract "head_copy" out of "copy".
8861 */
8862 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8863 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8864 vm_map_copy_first_entry(head_copy) =
8865 vm_map_copy_to_entry(head_copy);
8866 vm_map_copy_last_entry(head_copy) =
8867 vm_map_copy_to_entry(head_copy);
8868 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
8869 head_copy->cpy_hdr.nentries = 0;
8870 head_copy->cpy_hdr.entries_pageable =
8871 copy->cpy_hdr.entries_pageable;
8872 vm_map_store_init(&head_copy->cpy_hdr);
8873
8874 entry = vm_map_copy_first_entry(copy);
8875 if (entry->vme_end < copy->offset + head_size) {
8876 head_size = entry->vme_end - copy->offset;
8877 }
8878
8879 head_copy->offset = copy->offset;
8880 head_copy->size = head_size;
8881 copy->offset += head_size;
8882 copy->size -= head_size;
8883
8884 vm_map_copy_clip_end(copy, entry, copy->offset);
8885 vm_map_copy_entry_unlink(copy, entry);
8886 vm_map_copy_entry_link(head_copy,
8887 vm_map_copy_to_entry(head_copy),
8888 entry);
8889
8890 /*
8891 * Do the unaligned copy.
8892 */
8893 kr = vm_map_copy_overwrite_nested(dst_map,
8894 head_addr,
8895 head_copy,
8896 interruptible,
8897 (pmap_t) NULL,
8898 FALSE);
8899 if (kr != KERN_SUCCESS)
8900 goto done;
8901 }
8902
8903 if (tail_size) {
8904 /*
8905 * Extract "tail_copy" out of "copy".
8906 */
8907 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8908 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8909 vm_map_copy_first_entry(tail_copy) =
8910 vm_map_copy_to_entry(tail_copy);
8911 vm_map_copy_last_entry(tail_copy) =
8912 vm_map_copy_to_entry(tail_copy);
8913 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
8914 tail_copy->cpy_hdr.nentries = 0;
8915 tail_copy->cpy_hdr.entries_pageable =
8916 copy->cpy_hdr.entries_pageable;
8917 vm_map_store_init(&tail_copy->cpy_hdr);
8918
8919 tail_copy->offset = copy->offset + copy->size - tail_size;
8920 tail_copy->size = tail_size;
8921
8922 copy->size -= tail_size;
8923
8924 entry = vm_map_copy_last_entry(copy);
8925 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
8926 entry = vm_map_copy_last_entry(copy);
8927 vm_map_copy_entry_unlink(copy, entry);
8928 vm_map_copy_entry_link(tail_copy,
8929 vm_map_copy_last_entry(tail_copy),
8930 entry);
8931 }
8932
8933 /*
8934 * Copy most (or possibly all) of the data.
8935 */
8936 kr = vm_map_copy_overwrite_nested(dst_map,
8937 dst_addr + head_size,
8938 copy,
8939 interruptible,
8940 (pmap_t) NULL,
8941 FALSE);
8942 if (kr != KERN_SUCCESS) {
8943 goto done;
8944 }
8945
8946 if (tail_size) {
8947 kr = vm_map_copy_overwrite_nested(dst_map,
8948 tail_addr,
8949 tail_copy,
8950 interruptible,
8951 (pmap_t) NULL,
8952 FALSE);
8953 }
8954
8955 done:
8956 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8957 if (kr == KERN_SUCCESS) {
8958 /*
8959 * Discard all the copy maps.
8960 */
8961 if (head_copy) {
8962 vm_map_copy_discard(head_copy);
8963 head_copy = NULL;
8964 }
8965 vm_map_copy_discard(copy);
8966 if (tail_copy) {
8967 vm_map_copy_discard(tail_copy);
8968 tail_copy = NULL;
8969 }
8970 } else {
8971 /*
8972 * Re-assemble the original copy map.
8973 */
8974 if (head_copy) {
8975 entry = vm_map_copy_first_entry(head_copy);
8976 vm_map_copy_entry_unlink(head_copy, entry);
8977 vm_map_copy_entry_link(copy,
8978 vm_map_copy_to_entry(copy),
8979 entry);
8980 copy->offset -= head_size;
8981 copy->size += head_size;
8982 vm_map_copy_discard(head_copy);
8983 head_copy = NULL;
8984 }
8985 if (tail_copy) {
8986 entry = vm_map_copy_last_entry(tail_copy);
8987 vm_map_copy_entry_unlink(tail_copy, entry);
8988 vm_map_copy_entry_link(copy,
8989 vm_map_copy_last_entry(copy),
8990 entry);
8991 copy->size += tail_size;
8992 vm_map_copy_discard(tail_copy);
8993 tail_copy = NULL;
8994 }
8995 }
8996 return kr;
8997 }
8998
8999
9000 /*
9001 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9002 *
9003 * Decription:
9004 * Physically copy unaligned data
9005 *
9006 * Implementation:
9007 * Unaligned parts of pages have to be physically copied. We use
9008 * a modified form of vm_fault_copy (which understands none-aligned
9009 * page offsets and sizes) to do the copy. We attempt to copy as
9010 * much memory in one go as possibly, however vm_fault_copy copies
9011 * within 1 memory object so we have to find the smaller of "amount left"
9012 * "source object data size" and "target object data size". With
9013 * unaligned data we don't need to split regions, therefore the source
9014 * (copy) object should be one map entry, the target range may be split
9015 * over multiple map entries however. In any event we are pessimistic
9016 * about these assumptions.
9017 *
9018 * Assumptions:
9019 * dst_map is locked on entry and is return locked on success,
9020 * unlocked on error.
9021 */
9022
9023 static kern_return_t
9024 vm_map_copy_overwrite_unaligned(
9025 vm_map_t dst_map,
9026 vm_map_entry_t entry,
9027 vm_map_copy_t copy,
9028 vm_map_offset_t start,
9029 boolean_t discard_on_success)
9030 {
9031 vm_map_entry_t copy_entry;
9032 vm_map_entry_t copy_entry_next;
9033 vm_map_version_t version;
9034 vm_object_t dst_object;
9035 vm_object_offset_t dst_offset;
9036 vm_object_offset_t src_offset;
9037 vm_object_offset_t entry_offset;
9038 vm_map_offset_t entry_end;
9039 vm_map_size_t src_size,
9040 dst_size,
9041 copy_size,
9042 amount_left;
9043 kern_return_t kr = KERN_SUCCESS;
9044
9045
9046 copy_entry = vm_map_copy_first_entry(copy);
9047
9048 vm_map_lock_write_to_read(dst_map);
9049
9050 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9051 amount_left = copy->size;
9052 /*
9053 * unaligned so we never clipped this entry, we need the offset into
9054 * the vm_object not just the data.
9055 */
9056 while (amount_left > 0) {
9057
9058 if (entry == vm_map_to_entry(dst_map)) {
9059 vm_map_unlock_read(dst_map);
9060 return KERN_INVALID_ADDRESS;
9061 }
9062
9063 /* "start" must be within the current map entry */
9064 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9065
9066 dst_offset = start - entry->vme_start;
9067
9068 dst_size = entry->vme_end - start;
9069
9070 src_size = copy_entry->vme_end -
9071 (copy_entry->vme_start + src_offset);
9072
9073 if (dst_size < src_size) {
9074 /*
9075 * we can only copy dst_size bytes before
9076 * we have to get the next destination entry
9077 */
9078 copy_size = dst_size;
9079 } else {
9080 /*
9081 * we can only copy src_size bytes before
9082 * we have to get the next source copy entry
9083 */
9084 copy_size = src_size;
9085 }
9086
9087 if (copy_size > amount_left) {
9088 copy_size = amount_left;
9089 }
9090 /*
9091 * Entry needs copy, create a shadow shadow object for
9092 * Copy on write region.
9093 */
9094 if (entry->needs_copy &&
9095 ((entry->protection & VM_PROT_WRITE) != 0))
9096 {
9097 if (vm_map_lock_read_to_write(dst_map)) {
9098 vm_map_lock_read(dst_map);
9099 goto RetryLookup;
9100 }
9101 VME_OBJECT_SHADOW(entry,
9102 (vm_map_size_t)(entry->vme_end
9103 - entry->vme_start));
9104 entry->needs_copy = FALSE;
9105 vm_map_lock_write_to_read(dst_map);
9106 }
9107 dst_object = VME_OBJECT(entry);
9108 /*
9109 * unlike with the virtual (aligned) copy we're going
9110 * to fault on it therefore we need a target object.
9111 */
9112 if (dst_object == VM_OBJECT_NULL) {
9113 if (vm_map_lock_read_to_write(dst_map)) {
9114 vm_map_lock_read(dst_map);
9115 goto RetryLookup;
9116 }
9117 dst_object = vm_object_allocate((vm_map_size_t)
9118 entry->vme_end - entry->vme_start);
9119 VME_OBJECT(entry) = dst_object;
9120 VME_OFFSET_SET(entry, 0);
9121 assert(entry->use_pmap);
9122 vm_map_lock_write_to_read(dst_map);
9123 }
9124 /*
9125 * Take an object reference and unlock map. The "entry" may
9126 * disappear or change when the map is unlocked.
9127 */
9128 vm_object_reference(dst_object);
9129 version.main_timestamp = dst_map->timestamp;
9130 entry_offset = VME_OFFSET(entry);
9131 entry_end = entry->vme_end;
9132 vm_map_unlock_read(dst_map);
9133 /*
9134 * Copy as much as possible in one pass
9135 */
9136 kr = vm_fault_copy(
9137 VME_OBJECT(copy_entry),
9138 VME_OFFSET(copy_entry) + src_offset,
9139 &copy_size,
9140 dst_object,
9141 entry_offset + dst_offset,
9142 dst_map,
9143 &version,
9144 THREAD_UNINT );
9145
9146 start += copy_size;
9147 src_offset += copy_size;
9148 amount_left -= copy_size;
9149 /*
9150 * Release the object reference
9151 */
9152 vm_object_deallocate(dst_object);
9153 /*
9154 * If a hard error occurred, return it now
9155 */
9156 if (kr != KERN_SUCCESS)
9157 return kr;
9158
9159 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9160 || amount_left == 0)
9161 {
9162 /*
9163 * all done with this copy entry, dispose.
9164 */
9165 copy_entry_next = copy_entry->vme_next;
9166
9167 if (discard_on_success) {
9168 vm_map_copy_entry_unlink(copy, copy_entry);
9169 assert(!copy_entry->is_sub_map);
9170 vm_object_deallocate(VME_OBJECT(copy_entry));
9171 vm_map_copy_entry_dispose(copy, copy_entry);
9172 }
9173
9174 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9175 amount_left) {
9176 /*
9177 * not finished copying but run out of source
9178 */
9179 return KERN_INVALID_ADDRESS;
9180 }
9181
9182 copy_entry = copy_entry_next;
9183
9184 src_offset = 0;
9185 }
9186
9187 if (amount_left == 0)
9188 return KERN_SUCCESS;
9189
9190 vm_map_lock_read(dst_map);
9191 if (version.main_timestamp == dst_map->timestamp) {
9192 if (start == entry_end) {
9193 /*
9194 * destination region is split. Use the version
9195 * information to avoid a lookup in the normal
9196 * case.
9197 */
9198 entry = entry->vme_next;
9199 /*
9200 * should be contiguous. Fail if we encounter
9201 * a hole in the destination.
9202 */
9203 if (start != entry->vme_start) {
9204 vm_map_unlock_read(dst_map);
9205 return KERN_INVALID_ADDRESS ;
9206 }
9207 }
9208 } else {
9209 /*
9210 * Map version check failed.
9211 * we must lookup the entry because somebody
9212 * might have changed the map behind our backs.
9213 */
9214 RetryLookup:
9215 if (!vm_map_lookup_entry(dst_map, start, &entry))
9216 {
9217 vm_map_unlock_read(dst_map);
9218 return KERN_INVALID_ADDRESS ;
9219 }
9220 }
9221 }/* while */
9222
9223 return KERN_SUCCESS;
9224 }/* vm_map_copy_overwrite_unaligned */
9225
9226 /*
9227 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9228 *
9229 * Description:
9230 * Does all the vm_trickery possible for whole pages.
9231 *
9232 * Implementation:
9233 *
9234 * If there are no permanent objects in the destination,
9235 * and the source and destination map entry zones match,
9236 * and the destination map entry is not shared,
9237 * then the map entries can be deleted and replaced
9238 * with those from the copy. The following code is the
9239 * basic idea of what to do, but there are lots of annoying
9240 * little details about getting protection and inheritance
9241 * right. Should add protection, inheritance, and sharing checks
9242 * to the above pass and make sure that no wiring is involved.
9243 */
9244
9245 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9246 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9247 int vm_map_copy_overwrite_aligned_src_large = 0;
9248
9249 static kern_return_t
9250 vm_map_copy_overwrite_aligned(
9251 vm_map_t dst_map,
9252 vm_map_entry_t tmp_entry,
9253 vm_map_copy_t copy,
9254 vm_map_offset_t start,
9255 __unused pmap_t pmap)
9256 {
9257 vm_object_t object;
9258 vm_map_entry_t copy_entry;
9259 vm_map_size_t copy_size;
9260 vm_map_size_t size;
9261 vm_map_entry_t entry;
9262
9263 while ((copy_entry = vm_map_copy_first_entry(copy))
9264 != vm_map_copy_to_entry(copy))
9265 {
9266 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9267
9268 entry = tmp_entry;
9269 if (entry->is_sub_map) {
9270 /* unnested when clipped earlier */
9271 assert(!entry->use_pmap);
9272 }
9273 if (entry == vm_map_to_entry(dst_map)) {
9274 vm_map_unlock(dst_map);
9275 return KERN_INVALID_ADDRESS;
9276 }
9277 size = (entry->vme_end - entry->vme_start);
9278 /*
9279 * Make sure that no holes popped up in the
9280 * address map, and that the protection is
9281 * still valid, in case the map was unlocked
9282 * earlier.
9283 */
9284
9285 if ((entry->vme_start != start) || ((entry->is_sub_map)
9286 && !entry->needs_copy)) {
9287 vm_map_unlock(dst_map);
9288 return(KERN_INVALID_ADDRESS);
9289 }
9290 assert(entry != vm_map_to_entry(dst_map));
9291
9292 /*
9293 * Check protection again
9294 */
9295
9296 if ( ! (entry->protection & VM_PROT_WRITE)) {
9297 vm_map_unlock(dst_map);
9298 return(KERN_PROTECTION_FAILURE);
9299 }
9300
9301 /*
9302 * Adjust to source size first
9303 */
9304
9305 if (copy_size < size) {
9306 if (entry->map_aligned &&
9307 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9308 VM_MAP_PAGE_MASK(dst_map))) {
9309 /* no longer map-aligned */
9310 entry->map_aligned = FALSE;
9311 }
9312 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9313 size = copy_size;
9314 }
9315
9316 /*
9317 * Adjust to destination size
9318 */
9319
9320 if (size < copy_size) {
9321 vm_map_copy_clip_end(copy, copy_entry,
9322 copy_entry->vme_start + size);
9323 copy_size = size;
9324 }
9325
9326 assert((entry->vme_end - entry->vme_start) == size);
9327 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9328 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9329
9330 /*
9331 * If the destination contains temporary unshared memory,
9332 * we can perform the copy by throwing it away and
9333 * installing the source data.
9334 */
9335
9336 object = VME_OBJECT(entry);
9337 if ((!entry->is_shared &&
9338 ((object == VM_OBJECT_NULL) ||
9339 (object->internal && !object->true_share))) ||
9340 entry->needs_copy) {
9341 vm_object_t old_object = VME_OBJECT(entry);
9342 vm_object_offset_t old_offset = VME_OFFSET(entry);
9343 vm_object_offset_t offset;
9344
9345 /*
9346 * Ensure that the source and destination aren't
9347 * identical
9348 */
9349 if (old_object == VME_OBJECT(copy_entry) &&
9350 old_offset == VME_OFFSET(copy_entry)) {
9351 vm_map_copy_entry_unlink(copy, copy_entry);
9352 vm_map_copy_entry_dispose(copy, copy_entry);
9353
9354 if (old_object != VM_OBJECT_NULL)
9355 vm_object_deallocate(old_object);
9356
9357 start = tmp_entry->vme_end;
9358 tmp_entry = tmp_entry->vme_next;
9359 continue;
9360 }
9361
9362 #if !CONFIG_EMBEDDED
9363 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9364 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9365 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9366 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9367 copy_size <= __TRADEOFF1_COPY_SIZE) {
9368 /*
9369 * Virtual vs. Physical copy tradeoff #1.
9370 *
9371 * Copying only a few pages out of a large
9372 * object: do a physical copy instead of
9373 * a virtual copy, to avoid possibly keeping
9374 * the entire large object alive because of
9375 * those few copy-on-write pages.
9376 */
9377 vm_map_copy_overwrite_aligned_src_large++;
9378 goto slow_copy;
9379 }
9380 #endif /* !CONFIG_EMBEDDED */
9381
9382 if ((dst_map->pmap != kernel_pmap) &&
9383 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9384 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
9385 vm_object_t new_object, new_shadow;
9386
9387 /*
9388 * We're about to map something over a mapping
9389 * established by malloc()...
9390 */
9391 new_object = VME_OBJECT(copy_entry);
9392 if (new_object != VM_OBJECT_NULL) {
9393 vm_object_lock_shared(new_object);
9394 }
9395 while (new_object != VM_OBJECT_NULL &&
9396 #if !CONFIG_EMBEDDED
9397 !new_object->true_share &&
9398 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9399 #endif /* !CONFIG_EMBEDDED */
9400 new_object->internal) {
9401 new_shadow = new_object->shadow;
9402 if (new_shadow == VM_OBJECT_NULL) {
9403 break;
9404 }
9405 vm_object_lock_shared(new_shadow);
9406 vm_object_unlock(new_object);
9407 new_object = new_shadow;
9408 }
9409 if (new_object != VM_OBJECT_NULL) {
9410 if (!new_object->internal) {
9411 /*
9412 * The new mapping is backed
9413 * by an external object. We
9414 * don't want malloc'ed memory
9415 * to be replaced with such a
9416 * non-anonymous mapping, so
9417 * let's go off the optimized
9418 * path...
9419 */
9420 vm_map_copy_overwrite_aligned_src_not_internal++;
9421 vm_object_unlock(new_object);
9422 goto slow_copy;
9423 }
9424 #if !CONFIG_EMBEDDED
9425 if (new_object->true_share ||
9426 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9427 /*
9428 * Same if there's a "true_share"
9429 * object in the shadow chain, or
9430 * an object with a non-default
9431 * (SYMMETRIC) copy strategy.
9432 */
9433 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9434 vm_object_unlock(new_object);
9435 goto slow_copy;
9436 }
9437 #endif /* !CONFIG_EMBEDDED */
9438 vm_object_unlock(new_object);
9439 }
9440 /*
9441 * The new mapping is still backed by
9442 * anonymous (internal) memory, so it's
9443 * OK to substitute it for the original
9444 * malloc() mapping.
9445 */
9446 }
9447
9448 if (old_object != VM_OBJECT_NULL) {
9449 if(entry->is_sub_map) {
9450 if(entry->use_pmap) {
9451 #ifndef NO_NESTED_PMAP
9452 pmap_unnest(dst_map->pmap,
9453 (addr64_t)entry->vme_start,
9454 entry->vme_end - entry->vme_start);
9455 #endif /* NO_NESTED_PMAP */
9456 if(dst_map->mapped_in_other_pmaps) {
9457 /* clean up parent */
9458 /* map/maps */
9459 vm_map_submap_pmap_clean(
9460 dst_map, entry->vme_start,
9461 entry->vme_end,
9462 VME_SUBMAP(entry),
9463 VME_OFFSET(entry));
9464 }
9465 } else {
9466 vm_map_submap_pmap_clean(
9467 dst_map, entry->vme_start,
9468 entry->vme_end,
9469 VME_SUBMAP(entry),
9470 VME_OFFSET(entry));
9471 }
9472 vm_map_deallocate(VME_SUBMAP(entry));
9473 } else {
9474 if(dst_map->mapped_in_other_pmaps) {
9475 vm_object_pmap_protect_options(
9476 VME_OBJECT(entry),
9477 VME_OFFSET(entry),
9478 entry->vme_end
9479 - entry->vme_start,
9480 PMAP_NULL,
9481 entry->vme_start,
9482 VM_PROT_NONE,
9483 PMAP_OPTIONS_REMOVE);
9484 } else {
9485 pmap_remove_options(
9486 dst_map->pmap,
9487 (addr64_t)(entry->vme_start),
9488 (addr64_t)(entry->vme_end),
9489 PMAP_OPTIONS_REMOVE);
9490 }
9491 vm_object_deallocate(old_object);
9492 }
9493 }
9494
9495 if (entry->iokit_acct) {
9496 /* keep using iokit accounting */
9497 entry->use_pmap = FALSE;
9498 } else {
9499 /* use pmap accounting */
9500 entry->use_pmap = TRUE;
9501 }
9502 entry->is_sub_map = FALSE;
9503 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9504 object = VME_OBJECT(entry);
9505 entry->needs_copy = copy_entry->needs_copy;
9506 entry->wired_count = 0;
9507 entry->user_wired_count = 0;
9508 offset = VME_OFFSET(copy_entry);
9509 VME_OFFSET_SET(entry, offset);
9510
9511 vm_map_copy_entry_unlink(copy, copy_entry);
9512 vm_map_copy_entry_dispose(copy, copy_entry);
9513
9514 /*
9515 * we could try to push pages into the pmap at this point, BUT
9516 * this optimization only saved on average 2 us per page if ALL
9517 * the pages in the source were currently mapped
9518 * and ALL the pages in the dest were touched, if there were fewer
9519 * than 2/3 of the pages touched, this optimization actually cost more cycles
9520 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9521 */
9522
9523 /*
9524 * Set up for the next iteration. The map
9525 * has not been unlocked, so the next
9526 * address should be at the end of this
9527 * entry, and the next map entry should be
9528 * the one following it.
9529 */
9530
9531 start = tmp_entry->vme_end;
9532 tmp_entry = tmp_entry->vme_next;
9533 } else {
9534 vm_map_version_t version;
9535 vm_object_t dst_object;
9536 vm_object_offset_t dst_offset;
9537 kern_return_t r;
9538
9539 slow_copy:
9540 if (entry->needs_copy) {
9541 VME_OBJECT_SHADOW(entry,
9542 (entry->vme_end -
9543 entry->vme_start));
9544 entry->needs_copy = FALSE;
9545 }
9546
9547 dst_object = VME_OBJECT(entry);
9548 dst_offset = VME_OFFSET(entry);
9549
9550 /*
9551 * Take an object reference, and record
9552 * the map version information so that the
9553 * map can be safely unlocked.
9554 */
9555
9556 if (dst_object == VM_OBJECT_NULL) {
9557 /*
9558 * We would usually have just taken the
9559 * optimized path above if the destination
9560 * object has not been allocated yet. But we
9561 * now disable that optimization if the copy
9562 * entry's object is not backed by anonymous
9563 * memory to avoid replacing malloc'ed
9564 * (i.e. re-usable) anonymous memory with a
9565 * not-so-anonymous mapping.
9566 * So we have to handle this case here and
9567 * allocate a new VM object for this map entry.
9568 */
9569 dst_object = vm_object_allocate(
9570 entry->vme_end - entry->vme_start);
9571 dst_offset = 0;
9572 VME_OBJECT_SET(entry, dst_object);
9573 VME_OFFSET_SET(entry, dst_offset);
9574 assert(entry->use_pmap);
9575
9576 }
9577
9578 vm_object_reference(dst_object);
9579
9580 /* account for unlock bumping up timestamp */
9581 version.main_timestamp = dst_map->timestamp + 1;
9582
9583 vm_map_unlock(dst_map);
9584
9585 /*
9586 * Copy as much as possible in one pass
9587 */
9588
9589 copy_size = size;
9590 r = vm_fault_copy(
9591 VME_OBJECT(copy_entry),
9592 VME_OFFSET(copy_entry),
9593 &copy_size,
9594 dst_object,
9595 dst_offset,
9596 dst_map,
9597 &version,
9598 THREAD_UNINT );
9599
9600 /*
9601 * Release the object reference
9602 */
9603
9604 vm_object_deallocate(dst_object);
9605
9606 /*
9607 * If a hard error occurred, return it now
9608 */
9609
9610 if (r != KERN_SUCCESS)
9611 return(r);
9612
9613 if (copy_size != 0) {
9614 /*
9615 * Dispose of the copied region
9616 */
9617
9618 vm_map_copy_clip_end(copy, copy_entry,
9619 copy_entry->vme_start + copy_size);
9620 vm_map_copy_entry_unlink(copy, copy_entry);
9621 vm_object_deallocate(VME_OBJECT(copy_entry));
9622 vm_map_copy_entry_dispose(copy, copy_entry);
9623 }
9624
9625 /*
9626 * Pick up in the destination map where we left off.
9627 *
9628 * Use the version information to avoid a lookup
9629 * in the normal case.
9630 */
9631
9632 start += copy_size;
9633 vm_map_lock(dst_map);
9634 if (version.main_timestamp == dst_map->timestamp &&
9635 copy_size != 0) {
9636 /* We can safely use saved tmp_entry value */
9637
9638 if (tmp_entry->map_aligned &&
9639 !VM_MAP_PAGE_ALIGNED(
9640 start,
9641 VM_MAP_PAGE_MASK(dst_map))) {
9642 /* no longer map-aligned */
9643 tmp_entry->map_aligned = FALSE;
9644 }
9645 vm_map_clip_end(dst_map, tmp_entry, start);
9646 tmp_entry = tmp_entry->vme_next;
9647 } else {
9648 /* Must do lookup of tmp_entry */
9649
9650 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
9651 vm_map_unlock(dst_map);
9652 return(KERN_INVALID_ADDRESS);
9653 }
9654 if (tmp_entry->map_aligned &&
9655 !VM_MAP_PAGE_ALIGNED(
9656 start,
9657 VM_MAP_PAGE_MASK(dst_map))) {
9658 /* no longer map-aligned */
9659 tmp_entry->map_aligned = FALSE;
9660 }
9661 vm_map_clip_start(dst_map, tmp_entry, start);
9662 }
9663 }
9664 }/* while */
9665
9666 return(KERN_SUCCESS);
9667 }/* vm_map_copy_overwrite_aligned */
9668
9669 /*
9670 * Routine: vm_map_copyin_kernel_buffer [internal use only]
9671 *
9672 * Description:
9673 * Copy in data to a kernel buffer from space in the
9674 * source map. The original space may be optionally
9675 * deallocated.
9676 *
9677 * If successful, returns a new copy object.
9678 */
9679 static kern_return_t
9680 vm_map_copyin_kernel_buffer(
9681 vm_map_t src_map,
9682 vm_map_offset_t src_addr,
9683 vm_map_size_t len,
9684 boolean_t src_destroy,
9685 vm_map_copy_t *copy_result)
9686 {
9687 kern_return_t kr;
9688 vm_map_copy_t copy;
9689 vm_size_t kalloc_size;
9690
9691 if (len > msg_ool_size_small)
9692 return KERN_INVALID_ARGUMENT;
9693
9694 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
9695
9696 copy = (vm_map_copy_t)kalloc(kalloc_size);
9697 if (copy == VM_MAP_COPY_NULL)
9698 return KERN_RESOURCE_SHORTAGE;
9699 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
9700 copy->size = len;
9701 copy->offset = 0;
9702
9703 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
9704 if (kr != KERN_SUCCESS) {
9705 kfree(copy, kalloc_size);
9706 return kr;
9707 }
9708 if (src_destroy) {
9709 (void) vm_map_remove(
9710 src_map,
9711 vm_map_trunc_page(src_addr,
9712 VM_MAP_PAGE_MASK(src_map)),
9713 vm_map_round_page(src_addr + len,
9714 VM_MAP_PAGE_MASK(src_map)),
9715 (VM_MAP_REMOVE_INTERRUPTIBLE |
9716 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
9717 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
9718 }
9719 *copy_result = copy;
9720 return KERN_SUCCESS;
9721 }
9722
9723 /*
9724 * Routine: vm_map_copyout_kernel_buffer [internal use only]
9725 *
9726 * Description:
9727 * Copy out data from a kernel buffer into space in the
9728 * destination map. The space may be otpionally dynamically
9729 * allocated.
9730 *
9731 * If successful, consumes the copy object.
9732 * Otherwise, the caller is responsible for it.
9733 */
9734 static int vm_map_copyout_kernel_buffer_failures = 0;
9735 static kern_return_t
9736 vm_map_copyout_kernel_buffer(
9737 vm_map_t map,
9738 vm_map_address_t *addr, /* IN/OUT */
9739 vm_map_copy_t copy,
9740 vm_map_size_t copy_size,
9741 boolean_t overwrite,
9742 boolean_t consume_on_success)
9743 {
9744 kern_return_t kr = KERN_SUCCESS;
9745 thread_t thread = current_thread();
9746
9747 assert(copy->size == copy_size);
9748
9749 /*
9750 * check for corrupted vm_map_copy structure
9751 */
9752 if (copy_size > msg_ool_size_small || copy->offset)
9753 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
9754 (long long)copy->size, (long long)copy->offset);
9755
9756 if (!overwrite) {
9757
9758 /*
9759 * Allocate space in the target map for the data
9760 */
9761 *addr = 0;
9762 kr = vm_map_enter(map,
9763 addr,
9764 vm_map_round_page(copy_size,
9765 VM_MAP_PAGE_MASK(map)),
9766 (vm_map_offset_t) 0,
9767 VM_FLAGS_ANYWHERE,
9768 VM_MAP_KERNEL_FLAGS_NONE,
9769 VM_KERN_MEMORY_NONE,
9770 VM_OBJECT_NULL,
9771 (vm_object_offset_t) 0,
9772 FALSE,
9773 VM_PROT_DEFAULT,
9774 VM_PROT_ALL,
9775 VM_INHERIT_DEFAULT);
9776 if (kr != KERN_SUCCESS)
9777 return kr;
9778 #if KASAN
9779 if (map->pmap == kernel_pmap) {
9780 kasan_notify_address(*addr, copy->size);
9781 }
9782 #endif
9783 }
9784
9785 /*
9786 * Copyout the data from the kernel buffer to the target map.
9787 */
9788 if (thread->map == map) {
9789
9790 /*
9791 * If the target map is the current map, just do
9792 * the copy.
9793 */
9794 assert((vm_size_t)copy_size == copy_size);
9795 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9796 kr = KERN_INVALID_ADDRESS;
9797 }
9798 }
9799 else {
9800 vm_map_t oldmap;
9801
9802 /*
9803 * If the target map is another map, assume the
9804 * target's address space identity for the duration
9805 * of the copy.
9806 */
9807 vm_map_reference(map);
9808 oldmap = vm_map_switch(map);
9809
9810 assert((vm_size_t)copy_size == copy_size);
9811 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9812 vm_map_copyout_kernel_buffer_failures++;
9813 kr = KERN_INVALID_ADDRESS;
9814 }
9815
9816 (void) vm_map_switch(oldmap);
9817 vm_map_deallocate(map);
9818 }
9819
9820 if (kr != KERN_SUCCESS) {
9821 /* the copy failed, clean up */
9822 if (!overwrite) {
9823 /*
9824 * Deallocate the space we allocated in the target map.
9825 */
9826 (void) vm_map_remove(
9827 map,
9828 vm_map_trunc_page(*addr,
9829 VM_MAP_PAGE_MASK(map)),
9830 vm_map_round_page((*addr +
9831 vm_map_round_page(copy_size,
9832 VM_MAP_PAGE_MASK(map))),
9833 VM_MAP_PAGE_MASK(map)),
9834 VM_MAP_NO_FLAGS);
9835 *addr = 0;
9836 }
9837 } else {
9838 /* copy was successful, dicard the copy structure */
9839 if (consume_on_success) {
9840 kfree(copy, copy_size + cpy_kdata_hdr_sz);
9841 }
9842 }
9843
9844 return kr;
9845 }
9846
9847 /*
9848 * Macro: vm_map_copy_insert
9849 *
9850 * Description:
9851 * Link a copy chain ("copy") into a map at the
9852 * specified location (after "where").
9853 * Side effects:
9854 * The copy chain is destroyed.
9855 * Warning:
9856 * The arguments are evaluated multiple times.
9857 */
9858 #define vm_map_copy_insert(map, where, copy) \
9859 MACRO_BEGIN \
9860 vm_map_store_copy_insert(map, where, copy); \
9861 zfree(vm_map_copy_zone, copy); \
9862 MACRO_END
9863
9864 void
9865 vm_map_copy_remap(
9866 vm_map_t map,
9867 vm_map_entry_t where,
9868 vm_map_copy_t copy,
9869 vm_map_offset_t adjustment,
9870 vm_prot_t cur_prot,
9871 vm_prot_t max_prot,
9872 vm_inherit_t inheritance)
9873 {
9874 vm_map_entry_t copy_entry, new_entry;
9875
9876 for (copy_entry = vm_map_copy_first_entry(copy);
9877 copy_entry != vm_map_copy_to_entry(copy);
9878 copy_entry = copy_entry->vme_next) {
9879 /* get a new VM map entry for the map */
9880 new_entry = vm_map_entry_create(map,
9881 !map->hdr.entries_pageable);
9882 /* copy the "copy entry" to the new entry */
9883 vm_map_entry_copy(new_entry, copy_entry);
9884 /* adjust "start" and "end" */
9885 new_entry->vme_start += adjustment;
9886 new_entry->vme_end += adjustment;
9887 /* clear some attributes */
9888 new_entry->inheritance = inheritance;
9889 new_entry->protection = cur_prot;
9890 new_entry->max_protection = max_prot;
9891 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
9892 /* take an extra reference on the entry's "object" */
9893 if (new_entry->is_sub_map) {
9894 assert(!new_entry->use_pmap); /* not nested */
9895 vm_map_lock(VME_SUBMAP(new_entry));
9896 vm_map_reference(VME_SUBMAP(new_entry));
9897 vm_map_unlock(VME_SUBMAP(new_entry));
9898 } else {
9899 vm_object_reference(VME_OBJECT(new_entry));
9900 }
9901 /* insert the new entry in the map */
9902 vm_map_store_entry_link(map, where, new_entry);
9903 /* continue inserting the "copy entries" after the new entry */
9904 where = new_entry;
9905 }
9906 }
9907
9908
9909 /*
9910 * Returns true if *size matches (or is in the range of) copy->size.
9911 * Upon returning true, the *size field is updated with the actual size of the
9912 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
9913 */
9914 boolean_t
9915 vm_map_copy_validate_size(
9916 vm_map_t dst_map,
9917 vm_map_copy_t copy,
9918 vm_map_size_t *size)
9919 {
9920 if (copy == VM_MAP_COPY_NULL)
9921 return FALSE;
9922 vm_map_size_t copy_sz = copy->size;
9923 vm_map_size_t sz = *size;
9924 switch (copy->type) {
9925 case VM_MAP_COPY_OBJECT:
9926 case VM_MAP_COPY_KERNEL_BUFFER:
9927 if (sz == copy_sz)
9928 return TRUE;
9929 break;
9930 case VM_MAP_COPY_ENTRY_LIST:
9931 /*
9932 * potential page-size rounding prevents us from exactly
9933 * validating this flavor of vm_map_copy, but we can at least
9934 * assert that it's within a range.
9935 */
9936 if (copy_sz >= sz &&
9937 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
9938 *size = copy_sz;
9939 return TRUE;
9940 }
9941 break;
9942 default:
9943 break;
9944 }
9945 return FALSE;
9946 }
9947
9948 /*
9949 * Routine: vm_map_copyout_size
9950 *
9951 * Description:
9952 * Copy out a copy chain ("copy") into newly-allocated
9953 * space in the destination map. Uses a prevalidated
9954 * size for the copy object (vm_map_copy_validate_size).
9955 *
9956 * If successful, consumes the copy object.
9957 * Otherwise, the caller is responsible for it.
9958 */
9959 kern_return_t
9960 vm_map_copyout_size(
9961 vm_map_t dst_map,
9962 vm_map_address_t *dst_addr, /* OUT */
9963 vm_map_copy_t copy,
9964 vm_map_size_t copy_size)
9965 {
9966 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
9967 TRUE, /* consume_on_success */
9968 VM_PROT_DEFAULT,
9969 VM_PROT_ALL,
9970 VM_INHERIT_DEFAULT);
9971 }
9972
9973 /*
9974 * Routine: vm_map_copyout
9975 *
9976 * Description:
9977 * Copy out a copy chain ("copy") into newly-allocated
9978 * space in the destination map.
9979 *
9980 * If successful, consumes the copy object.
9981 * Otherwise, the caller is responsible for it.
9982 */
9983 kern_return_t
9984 vm_map_copyout(
9985 vm_map_t dst_map,
9986 vm_map_address_t *dst_addr, /* OUT */
9987 vm_map_copy_t copy)
9988 {
9989 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
9990 TRUE, /* consume_on_success */
9991 VM_PROT_DEFAULT,
9992 VM_PROT_ALL,
9993 VM_INHERIT_DEFAULT);
9994 }
9995
9996 kern_return_t
9997 vm_map_copyout_internal(
9998 vm_map_t dst_map,
9999 vm_map_address_t *dst_addr, /* OUT */
10000 vm_map_copy_t copy,
10001 vm_map_size_t copy_size,
10002 boolean_t consume_on_success,
10003 vm_prot_t cur_protection,
10004 vm_prot_t max_protection,
10005 vm_inherit_t inheritance)
10006 {
10007 vm_map_size_t size;
10008 vm_map_size_t adjustment;
10009 vm_map_offset_t start;
10010 vm_object_offset_t vm_copy_start;
10011 vm_map_entry_t last;
10012 vm_map_entry_t entry;
10013 vm_map_entry_t hole_entry;
10014
10015 /*
10016 * Check for null copy object.
10017 */
10018
10019 if (copy == VM_MAP_COPY_NULL) {
10020 *dst_addr = 0;
10021 return(KERN_SUCCESS);
10022 }
10023
10024 if (copy->size != copy_size) {
10025 *dst_addr = 0;
10026 return KERN_FAILURE;
10027 }
10028
10029 /*
10030 * Check for special copy object, created
10031 * by vm_map_copyin_object.
10032 */
10033
10034 if (copy->type == VM_MAP_COPY_OBJECT) {
10035 vm_object_t object = copy->cpy_object;
10036 kern_return_t kr;
10037 vm_object_offset_t offset;
10038
10039 offset = vm_object_trunc_page(copy->offset);
10040 size = vm_map_round_page((copy_size +
10041 (vm_map_size_t)(copy->offset -
10042 offset)),
10043 VM_MAP_PAGE_MASK(dst_map));
10044 *dst_addr = 0;
10045 kr = vm_map_enter(dst_map, dst_addr, size,
10046 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10047 VM_MAP_KERNEL_FLAGS_NONE,
10048 VM_KERN_MEMORY_NONE,
10049 object, offset, FALSE,
10050 VM_PROT_DEFAULT, VM_PROT_ALL,
10051 VM_INHERIT_DEFAULT);
10052 if (kr != KERN_SUCCESS)
10053 return(kr);
10054 /* Account for non-pagealigned copy object */
10055 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10056 if (consume_on_success)
10057 zfree(vm_map_copy_zone, copy);
10058 return(KERN_SUCCESS);
10059 }
10060
10061 /*
10062 * Check for special kernel buffer allocated
10063 * by new_ipc_kmsg_copyin.
10064 */
10065
10066 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10067 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10068 copy, copy_size, FALSE,
10069 consume_on_success);
10070 }
10071
10072
10073 /*
10074 * Find space for the data
10075 */
10076
10077 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10078 VM_MAP_COPY_PAGE_MASK(copy));
10079 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10080 VM_MAP_COPY_PAGE_MASK(copy))
10081 - vm_copy_start;
10082
10083
10084 StartAgain: ;
10085
10086 vm_map_lock(dst_map);
10087 if( dst_map->disable_vmentry_reuse == TRUE) {
10088 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10089 last = entry;
10090 } else {
10091 if (dst_map->holelistenabled) {
10092 hole_entry = (vm_map_entry_t)dst_map->holes_list;
10093
10094 if (hole_entry == NULL) {
10095 /*
10096 * No more space in the map?
10097 */
10098 vm_map_unlock(dst_map);
10099 return(KERN_NO_SPACE);
10100 }
10101
10102 last = hole_entry;
10103 start = last->vme_start;
10104 } else {
10105 assert(first_free_is_valid(dst_map));
10106 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10107 vm_map_min(dst_map) : last->vme_end;
10108 }
10109 start = vm_map_round_page(start,
10110 VM_MAP_PAGE_MASK(dst_map));
10111 }
10112
10113 while (TRUE) {
10114 vm_map_entry_t next = last->vme_next;
10115 vm_map_offset_t end = start + size;
10116
10117 if ((end > dst_map->max_offset) || (end < start)) {
10118 if (dst_map->wait_for_space) {
10119 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10120 assert_wait((event_t) dst_map,
10121 THREAD_INTERRUPTIBLE);
10122 vm_map_unlock(dst_map);
10123 thread_block(THREAD_CONTINUE_NULL);
10124 goto StartAgain;
10125 }
10126 }
10127 vm_map_unlock(dst_map);
10128 return(KERN_NO_SPACE);
10129 }
10130
10131 if (dst_map->holelistenabled) {
10132 if (last->vme_end >= end)
10133 break;
10134 } else {
10135 /*
10136 * If there are no more entries, we must win.
10137 *
10138 * OR
10139 *
10140 * If there is another entry, it must be
10141 * after the end of the potential new region.
10142 */
10143
10144 if (next == vm_map_to_entry(dst_map))
10145 break;
10146
10147 if (next->vme_start >= end)
10148 break;
10149 }
10150
10151 last = next;
10152
10153 if (dst_map->holelistenabled) {
10154 if (last == (vm_map_entry_t) dst_map->holes_list) {
10155 /*
10156 * Wrapped around
10157 */
10158 vm_map_unlock(dst_map);
10159 return(KERN_NO_SPACE);
10160 }
10161 start = last->vme_start;
10162 } else {
10163 start = last->vme_end;
10164 }
10165 start = vm_map_round_page(start,
10166 VM_MAP_PAGE_MASK(dst_map));
10167 }
10168
10169 if (dst_map->holelistenabled) {
10170 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10171 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10172 }
10173 }
10174
10175
10176 adjustment = start - vm_copy_start;
10177 if (! consume_on_success) {
10178 /*
10179 * We're not allowed to consume "copy", so we'll have to
10180 * copy its map entries into the destination map below.
10181 * No need to re-allocate map entries from the correct
10182 * (pageable or not) zone, since we'll get new map entries
10183 * during the transfer.
10184 * We'll also adjust the map entries's "start" and "end"
10185 * during the transfer, to keep "copy"'s entries consistent
10186 * with its "offset".
10187 */
10188 goto after_adjustments;
10189 }
10190
10191 /*
10192 * Since we're going to just drop the map
10193 * entries from the copy into the destination
10194 * map, they must come from the same pool.
10195 */
10196
10197 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10198 /*
10199 * Mismatches occur when dealing with the default
10200 * pager.
10201 */
10202 zone_t old_zone;
10203 vm_map_entry_t next, new;
10204
10205 /*
10206 * Find the zone that the copies were allocated from
10207 */
10208
10209 entry = vm_map_copy_first_entry(copy);
10210
10211 /*
10212 * Reinitialize the copy so that vm_map_copy_entry_link
10213 * will work.
10214 */
10215 vm_map_store_copy_reset(copy, entry);
10216 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10217
10218 /*
10219 * Copy each entry.
10220 */
10221 while (entry != vm_map_copy_to_entry(copy)) {
10222 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10223 vm_map_entry_copy_full(new, entry);
10224 assert(!new->iokit_acct);
10225 if (new->is_sub_map) {
10226 /* clr address space specifics */
10227 new->use_pmap = FALSE;
10228 }
10229 vm_map_copy_entry_link(copy,
10230 vm_map_copy_last_entry(copy),
10231 new);
10232 next = entry->vme_next;
10233 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10234 zfree(old_zone, entry);
10235 entry = next;
10236 }
10237 }
10238
10239 /*
10240 * Adjust the addresses in the copy chain, and
10241 * reset the region attributes.
10242 */
10243
10244 for (entry = vm_map_copy_first_entry(copy);
10245 entry != vm_map_copy_to_entry(copy);
10246 entry = entry->vme_next) {
10247 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10248 /*
10249 * We're injecting this copy entry into a map that
10250 * has the standard page alignment, so clear
10251 * "map_aligned" (which might have been inherited
10252 * from the original map entry).
10253 */
10254 entry->map_aligned = FALSE;
10255 }
10256
10257 entry->vme_start += adjustment;
10258 entry->vme_end += adjustment;
10259
10260 if (entry->map_aligned) {
10261 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10262 VM_MAP_PAGE_MASK(dst_map)));
10263 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10264 VM_MAP_PAGE_MASK(dst_map)));
10265 }
10266
10267 entry->inheritance = VM_INHERIT_DEFAULT;
10268 entry->protection = VM_PROT_DEFAULT;
10269 entry->max_protection = VM_PROT_ALL;
10270 entry->behavior = VM_BEHAVIOR_DEFAULT;
10271
10272 /*
10273 * If the entry is now wired,
10274 * map the pages into the destination map.
10275 */
10276 if (entry->wired_count != 0) {
10277 vm_map_offset_t va;
10278 vm_object_offset_t offset;
10279 vm_object_t object;
10280 vm_prot_t prot;
10281 int type_of_fault;
10282
10283 object = VME_OBJECT(entry);
10284 offset = VME_OFFSET(entry);
10285 va = entry->vme_start;
10286
10287 pmap_pageable(dst_map->pmap,
10288 entry->vme_start,
10289 entry->vme_end,
10290 TRUE);
10291
10292 while (va < entry->vme_end) {
10293 vm_page_t m;
10294
10295 /*
10296 * Look up the page in the object.
10297 * Assert that the page will be found in the
10298 * top object:
10299 * either
10300 * the object was newly created by
10301 * vm_object_copy_slowly, and has
10302 * copies of all of the pages from
10303 * the source object
10304 * or
10305 * the object was moved from the old
10306 * map entry; because the old map
10307 * entry was wired, all of the pages
10308 * were in the top-level object.
10309 * (XXX not true if we wire pages for
10310 * reading)
10311 */
10312 vm_object_lock(object);
10313
10314 m = vm_page_lookup(object, offset);
10315 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10316 m->absent)
10317 panic("vm_map_copyout: wiring %p", m);
10318
10319 prot = entry->protection;
10320
10321 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10322 prot)
10323 prot |= VM_PROT_EXECUTE;
10324
10325 type_of_fault = DBG_CACHE_HIT_FAULT;
10326
10327 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
10328 VM_PAGE_WIRED(m),
10329 FALSE, /* change_wiring */
10330 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10331 FALSE, /* no_cache */
10332 FALSE, /* cs_bypass */
10333 VME_ALIAS(entry),
10334 ((entry->iokit_acct ||
10335 (!entry->is_sub_map &&
10336 !entry->use_pmap))
10337 ? PMAP_OPTIONS_ALT_ACCT
10338 : 0), /* pmap_options */
10339 NULL, /* need_retry */
10340 &type_of_fault);
10341
10342 vm_object_unlock(object);
10343
10344 offset += PAGE_SIZE_64;
10345 va += PAGE_SIZE;
10346 }
10347 }
10348 }
10349
10350 after_adjustments:
10351
10352 /*
10353 * Correct the page alignment for the result
10354 */
10355
10356 *dst_addr = start + (copy->offset - vm_copy_start);
10357
10358 #if KASAN
10359 kasan_notify_address(*dst_addr, size);
10360 #endif
10361
10362 /*
10363 * Update the hints and the map size
10364 */
10365
10366 if (consume_on_success) {
10367 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10368 } else {
10369 SAVE_HINT_MAP_WRITE(dst_map, last);
10370 }
10371
10372 dst_map->size += size;
10373
10374 /*
10375 * Link in the copy
10376 */
10377
10378 if (consume_on_success) {
10379 vm_map_copy_insert(dst_map, last, copy);
10380 } else {
10381 vm_map_copy_remap(dst_map, last, copy, adjustment,
10382 cur_protection, max_protection,
10383 inheritance);
10384 }
10385
10386 vm_map_unlock(dst_map);
10387
10388 /*
10389 * XXX If wiring_required, call vm_map_pageable
10390 */
10391
10392 return(KERN_SUCCESS);
10393 }
10394
10395 /*
10396 * Routine: vm_map_copyin
10397 *
10398 * Description:
10399 * see vm_map_copyin_common. Exported via Unsupported.exports.
10400 *
10401 */
10402
10403 #undef vm_map_copyin
10404
10405 kern_return_t
10406 vm_map_copyin(
10407 vm_map_t src_map,
10408 vm_map_address_t src_addr,
10409 vm_map_size_t len,
10410 boolean_t src_destroy,
10411 vm_map_copy_t *copy_result) /* OUT */
10412 {
10413 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10414 FALSE, copy_result, FALSE));
10415 }
10416
10417 /*
10418 * Routine: vm_map_copyin_common
10419 *
10420 * Description:
10421 * Copy the specified region (src_addr, len) from the
10422 * source address space (src_map), possibly removing
10423 * the region from the source address space (src_destroy).
10424 *
10425 * Returns:
10426 * A vm_map_copy_t object (copy_result), suitable for
10427 * insertion into another address space (using vm_map_copyout),
10428 * copying over another address space region (using
10429 * vm_map_copy_overwrite). If the copy is unused, it
10430 * should be destroyed (using vm_map_copy_discard).
10431 *
10432 * In/out conditions:
10433 * The source map should not be locked on entry.
10434 */
10435
10436 typedef struct submap_map {
10437 vm_map_t parent_map;
10438 vm_map_offset_t base_start;
10439 vm_map_offset_t base_end;
10440 vm_map_size_t base_len;
10441 struct submap_map *next;
10442 } submap_map_t;
10443
10444 kern_return_t
10445 vm_map_copyin_common(
10446 vm_map_t src_map,
10447 vm_map_address_t src_addr,
10448 vm_map_size_t len,
10449 boolean_t src_destroy,
10450 __unused boolean_t src_volatile,
10451 vm_map_copy_t *copy_result, /* OUT */
10452 boolean_t use_maxprot)
10453 {
10454 int flags;
10455
10456 flags = 0;
10457 if (src_destroy) {
10458 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10459 }
10460 if (use_maxprot) {
10461 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10462 }
10463 return vm_map_copyin_internal(src_map,
10464 src_addr,
10465 len,
10466 flags,
10467 copy_result);
10468 }
10469 kern_return_t
10470 vm_map_copyin_internal(
10471 vm_map_t src_map,
10472 vm_map_address_t src_addr,
10473 vm_map_size_t len,
10474 int flags,
10475 vm_map_copy_t *copy_result) /* OUT */
10476 {
10477 vm_map_entry_t tmp_entry; /* Result of last map lookup --
10478 * in multi-level lookup, this
10479 * entry contains the actual
10480 * vm_object/offset.
10481 */
10482 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
10483
10484 vm_map_offset_t src_start; /* Start of current entry --
10485 * where copy is taking place now
10486 */
10487 vm_map_offset_t src_end; /* End of entire region to be
10488 * copied */
10489 vm_map_offset_t src_base;
10490 vm_map_t base_map = src_map;
10491 boolean_t map_share=FALSE;
10492 submap_map_t *parent_maps = NULL;
10493
10494 vm_map_copy_t copy; /* Resulting copy */
10495 vm_map_address_t copy_addr;
10496 vm_map_size_t copy_size;
10497 boolean_t src_destroy;
10498 boolean_t use_maxprot;
10499 boolean_t preserve_purgeable;
10500 boolean_t entry_was_shared;
10501 vm_map_entry_t saved_src_entry;
10502
10503 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10504 return KERN_INVALID_ARGUMENT;
10505 }
10506
10507 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10508 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
10509 preserve_purgeable =
10510 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
10511
10512 /*
10513 * Check for copies of zero bytes.
10514 */
10515
10516 if (len == 0) {
10517 *copy_result = VM_MAP_COPY_NULL;
10518 return(KERN_SUCCESS);
10519 }
10520
10521 /*
10522 * Check that the end address doesn't overflow
10523 */
10524 src_end = src_addr + len;
10525 if (src_end < src_addr)
10526 return KERN_INVALID_ADDRESS;
10527
10528 /*
10529 * Compute (page aligned) start and end of region
10530 */
10531 src_start = vm_map_trunc_page(src_addr,
10532 VM_MAP_PAGE_MASK(src_map));
10533 src_end = vm_map_round_page(src_end,
10534 VM_MAP_PAGE_MASK(src_map));
10535
10536 /*
10537 * If the copy is sufficiently small, use a kernel buffer instead
10538 * of making a virtual copy. The theory being that the cost of
10539 * setting up VM (and taking C-O-W faults) dominates the copy costs
10540 * for small regions.
10541 */
10542 if ((len < msg_ool_size_small) &&
10543 !use_maxprot &&
10544 !preserve_purgeable &&
10545 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10546 /*
10547 * Since the "msg_ool_size_small" threshold was increased and
10548 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10549 * address space limits, we revert to doing a virtual copy if the
10550 * copied range goes beyond those limits. Otherwise, mach_vm_read()
10551 * of the commpage would now fail when it used to work.
10552 */
10553 (src_start >= vm_map_min(src_map) &&
10554 src_start < vm_map_max(src_map) &&
10555 src_end >= vm_map_min(src_map) &&
10556 src_end < vm_map_max(src_map)))
10557 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10558 src_destroy, copy_result);
10559
10560 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
10561
10562 /*
10563 * Allocate a header element for the list.
10564 *
10565 * Use the start and end in the header to
10566 * remember the endpoints prior to rounding.
10567 */
10568
10569 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10570 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10571 vm_map_copy_first_entry(copy) =
10572 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10573 copy->type = VM_MAP_COPY_ENTRY_LIST;
10574 copy->cpy_hdr.nentries = 0;
10575 copy->cpy_hdr.entries_pageable = TRUE;
10576 #if 00
10577 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10578 #else
10579 /*
10580 * The copy entries can be broken down for a variety of reasons,
10581 * so we can't guarantee that they will remain map-aligned...
10582 * Will need to adjust the first copy_entry's "vme_start" and
10583 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10584 * rather than the original map's alignment.
10585 */
10586 copy->cpy_hdr.page_shift = PAGE_SHIFT;
10587 #endif
10588
10589 vm_map_store_init( &(copy->cpy_hdr) );
10590
10591 copy->offset = src_addr;
10592 copy->size = len;
10593
10594 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10595
10596 #define RETURN(x) \
10597 MACRO_BEGIN \
10598 vm_map_unlock(src_map); \
10599 if(src_map != base_map) \
10600 vm_map_deallocate(src_map); \
10601 if (new_entry != VM_MAP_ENTRY_NULL) \
10602 vm_map_copy_entry_dispose(copy,new_entry); \
10603 vm_map_copy_discard(copy); \
10604 { \
10605 submap_map_t *_ptr; \
10606 \
10607 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
10608 parent_maps=parent_maps->next; \
10609 if (_ptr->parent_map != base_map) \
10610 vm_map_deallocate(_ptr->parent_map); \
10611 kfree(_ptr, sizeof(submap_map_t)); \
10612 } \
10613 } \
10614 MACRO_RETURN(x); \
10615 MACRO_END
10616
10617 /*
10618 * Find the beginning of the region.
10619 */
10620
10621 vm_map_lock(src_map);
10622
10623 /*
10624 * Lookup the original "src_addr" rather than the truncated
10625 * "src_start", in case "src_start" falls in a non-map-aligned
10626 * map entry *before* the map entry that contains "src_addr"...
10627 */
10628 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
10629 RETURN(KERN_INVALID_ADDRESS);
10630 if(!tmp_entry->is_sub_map) {
10631 /*
10632 * ... but clip to the map-rounded "src_start" rather than
10633 * "src_addr" to preserve map-alignment. We'll adjust the
10634 * first copy entry at the end, if needed.
10635 */
10636 vm_map_clip_start(src_map, tmp_entry, src_start);
10637 }
10638 if (src_start < tmp_entry->vme_start) {
10639 /*
10640 * Move "src_start" up to the start of the
10641 * first map entry to copy.
10642 */
10643 src_start = tmp_entry->vme_start;
10644 }
10645 /* set for later submap fix-up */
10646 copy_addr = src_start;
10647
10648 /*
10649 * Go through entries until we get to the end.
10650 */
10651
10652 while (TRUE) {
10653 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
10654 vm_map_size_t src_size; /* Size of source
10655 * map entry (in both
10656 * maps)
10657 */
10658
10659 vm_object_t src_object; /* Object to copy */
10660 vm_object_offset_t src_offset;
10661
10662 boolean_t src_needs_copy; /* Should source map
10663 * be made read-only
10664 * for copy-on-write?
10665 */
10666
10667 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
10668
10669 boolean_t was_wired; /* Was source wired? */
10670 vm_map_version_t version; /* Version before locks
10671 * dropped to make copy
10672 */
10673 kern_return_t result; /* Return value from
10674 * copy_strategically.
10675 */
10676 while(tmp_entry->is_sub_map) {
10677 vm_map_size_t submap_len;
10678 submap_map_t *ptr;
10679
10680 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
10681 ptr->next = parent_maps;
10682 parent_maps = ptr;
10683 ptr->parent_map = src_map;
10684 ptr->base_start = src_start;
10685 ptr->base_end = src_end;
10686 submap_len = tmp_entry->vme_end - src_start;
10687 if(submap_len > (src_end-src_start))
10688 submap_len = src_end-src_start;
10689 ptr->base_len = submap_len;
10690
10691 src_start -= tmp_entry->vme_start;
10692 src_start += VME_OFFSET(tmp_entry);
10693 src_end = src_start + submap_len;
10694 src_map = VME_SUBMAP(tmp_entry);
10695 vm_map_lock(src_map);
10696 /* keep an outstanding reference for all maps in */
10697 /* the parents tree except the base map */
10698 vm_map_reference(src_map);
10699 vm_map_unlock(ptr->parent_map);
10700 if (!vm_map_lookup_entry(
10701 src_map, src_start, &tmp_entry))
10702 RETURN(KERN_INVALID_ADDRESS);
10703 map_share = TRUE;
10704 if(!tmp_entry->is_sub_map)
10705 vm_map_clip_start(src_map, tmp_entry, src_start);
10706 src_entry = tmp_entry;
10707 }
10708 /* we are now in the lowest level submap... */
10709
10710 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
10711 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
10712 /* This is not, supported for now.In future */
10713 /* we will need to detect the phys_contig */
10714 /* condition and then upgrade copy_slowly */
10715 /* to do physical copy from the device mem */
10716 /* based object. We can piggy-back off of */
10717 /* the was wired boolean to set-up the */
10718 /* proper handling */
10719 RETURN(KERN_PROTECTION_FAILURE);
10720 }
10721 /*
10722 * Create a new address map entry to hold the result.
10723 * Fill in the fields from the appropriate source entries.
10724 * We must unlock the source map to do this if we need
10725 * to allocate a map entry.
10726 */
10727 if (new_entry == VM_MAP_ENTRY_NULL) {
10728 version.main_timestamp = src_map->timestamp;
10729 vm_map_unlock(src_map);
10730
10731 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10732
10733 vm_map_lock(src_map);
10734 if ((version.main_timestamp + 1) != src_map->timestamp) {
10735 if (!vm_map_lookup_entry(src_map, src_start,
10736 &tmp_entry)) {
10737 RETURN(KERN_INVALID_ADDRESS);
10738 }
10739 if (!tmp_entry->is_sub_map)
10740 vm_map_clip_start(src_map, tmp_entry, src_start);
10741 continue; /* restart w/ new tmp_entry */
10742 }
10743 }
10744
10745 /*
10746 * Verify that the region can be read.
10747 */
10748 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
10749 !use_maxprot) ||
10750 (src_entry->max_protection & VM_PROT_READ) == 0)
10751 RETURN(KERN_PROTECTION_FAILURE);
10752
10753 /*
10754 * Clip against the endpoints of the entire region.
10755 */
10756
10757 vm_map_clip_end(src_map, src_entry, src_end);
10758
10759 src_size = src_entry->vme_end - src_start;
10760 src_object = VME_OBJECT(src_entry);
10761 src_offset = VME_OFFSET(src_entry);
10762 was_wired = (src_entry->wired_count != 0);
10763
10764 vm_map_entry_copy(new_entry, src_entry);
10765 if (new_entry->is_sub_map) {
10766 /* clr address space specifics */
10767 new_entry->use_pmap = FALSE;
10768 } else {
10769 /*
10770 * We're dealing with a copy-on-write operation,
10771 * so the resulting mapping should not inherit the
10772 * original mapping's accounting settings.
10773 * "iokit_acct" should have been cleared in
10774 * vm_map_entry_copy().
10775 * "use_pmap" should be reset to its default (TRUE)
10776 * so that the new mapping gets accounted for in
10777 * the task's memory footprint.
10778 */
10779 assert(!new_entry->iokit_acct);
10780 new_entry->use_pmap = TRUE;
10781 }
10782
10783 /*
10784 * Attempt non-blocking copy-on-write optimizations.
10785 */
10786
10787 if (src_destroy &&
10788 (src_object == VM_OBJECT_NULL ||
10789 (src_object->internal &&
10790 src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10791 !map_share))) {
10792 /*
10793 * If we are destroying the source, and the object
10794 * is internal, we can move the object reference
10795 * from the source to the copy. The copy is
10796 * copy-on-write only if the source is.
10797 * We make another reference to the object, because
10798 * destroying the source entry will deallocate it.
10799 */
10800 vm_object_reference(src_object);
10801
10802 /*
10803 * Copy is always unwired. vm_map_copy_entry
10804 * set its wired count to zero.
10805 */
10806
10807 goto CopySuccessful;
10808 }
10809
10810
10811 RestartCopy:
10812 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
10813 src_object, new_entry, VME_OBJECT(new_entry),
10814 was_wired, 0);
10815 if ((src_object == VM_OBJECT_NULL ||
10816 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
10817 vm_object_copy_quickly(
10818 &VME_OBJECT(new_entry),
10819 src_offset,
10820 src_size,
10821 &src_needs_copy,
10822 &new_entry_needs_copy)) {
10823
10824 new_entry->needs_copy = new_entry_needs_copy;
10825
10826 /*
10827 * Handle copy-on-write obligations
10828 */
10829
10830 if (src_needs_copy && !tmp_entry->needs_copy) {
10831 vm_prot_t prot;
10832
10833 prot = src_entry->protection & ~VM_PROT_WRITE;
10834
10835 if (override_nx(src_map, VME_ALIAS(src_entry))
10836 && prot)
10837 prot |= VM_PROT_EXECUTE;
10838
10839 vm_object_pmap_protect(
10840 src_object,
10841 src_offset,
10842 src_size,
10843 (src_entry->is_shared ?
10844 PMAP_NULL
10845 : src_map->pmap),
10846 src_entry->vme_start,
10847 prot);
10848
10849 assert(tmp_entry->wired_count == 0);
10850 tmp_entry->needs_copy = TRUE;
10851 }
10852
10853 /*
10854 * The map has never been unlocked, so it's safe
10855 * to move to the next entry rather than doing
10856 * another lookup.
10857 */
10858
10859 goto CopySuccessful;
10860 }
10861
10862 entry_was_shared = tmp_entry->is_shared;
10863
10864 /*
10865 * Take an object reference, so that we may
10866 * release the map lock(s).
10867 */
10868
10869 assert(src_object != VM_OBJECT_NULL);
10870 vm_object_reference(src_object);
10871
10872 /*
10873 * Record the timestamp for later verification.
10874 * Unlock the map.
10875 */
10876
10877 version.main_timestamp = src_map->timestamp;
10878 vm_map_unlock(src_map); /* Increments timestamp once! */
10879 saved_src_entry = src_entry;
10880 tmp_entry = VM_MAP_ENTRY_NULL;
10881 src_entry = VM_MAP_ENTRY_NULL;
10882
10883 /*
10884 * Perform the copy
10885 */
10886
10887 if (was_wired) {
10888 CopySlowly:
10889 vm_object_lock(src_object);
10890 result = vm_object_copy_slowly(
10891 src_object,
10892 src_offset,
10893 src_size,
10894 THREAD_UNINT,
10895 &VME_OBJECT(new_entry));
10896 VME_OFFSET_SET(new_entry, 0);
10897 new_entry->needs_copy = FALSE;
10898 }
10899 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10900 (entry_was_shared || map_share)) {
10901 vm_object_t new_object;
10902
10903 vm_object_lock_shared(src_object);
10904 new_object = vm_object_copy_delayed(
10905 src_object,
10906 src_offset,
10907 src_size,
10908 TRUE);
10909 if (new_object == VM_OBJECT_NULL)
10910 goto CopySlowly;
10911
10912 VME_OBJECT_SET(new_entry, new_object);
10913 assert(new_entry->wired_count == 0);
10914 new_entry->needs_copy = TRUE;
10915 assert(!new_entry->iokit_acct);
10916 assert(new_object->purgable == VM_PURGABLE_DENY);
10917 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
10918 result = KERN_SUCCESS;
10919
10920 } else {
10921 vm_object_offset_t new_offset;
10922 new_offset = VME_OFFSET(new_entry);
10923 result = vm_object_copy_strategically(src_object,
10924 src_offset,
10925 src_size,
10926 &VME_OBJECT(new_entry),
10927 &new_offset,
10928 &new_entry_needs_copy);
10929 if (new_offset != VME_OFFSET(new_entry)) {
10930 VME_OFFSET_SET(new_entry, new_offset);
10931 }
10932
10933 new_entry->needs_copy = new_entry_needs_copy;
10934 }
10935
10936 if (result == KERN_SUCCESS &&
10937 preserve_purgeable &&
10938 src_object->purgable != VM_PURGABLE_DENY) {
10939 vm_object_t new_object;
10940
10941 new_object = VME_OBJECT(new_entry);
10942 assert(new_object != src_object);
10943 vm_object_lock(new_object);
10944 assert(new_object->ref_count == 1);
10945 assert(new_object->shadow == VM_OBJECT_NULL);
10946 assert(new_object->copy == VM_OBJECT_NULL);
10947 assert(new_object->vo_purgeable_owner == NULL);
10948
10949 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
10950 new_object->true_share = TRUE;
10951 /* start as non-volatile with no owner... */
10952 new_object->purgable = VM_PURGABLE_NONVOLATILE;
10953 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
10954 /* ... and move to src_object's purgeable state */
10955 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
10956 int state;
10957 state = src_object->purgable;
10958 vm_object_purgable_control(
10959 new_object,
10960 VM_PURGABLE_SET_STATE_FROM_KERNEL,
10961 &state);
10962 }
10963 vm_object_unlock(new_object);
10964 new_object = VM_OBJECT_NULL;
10965 /* no pmap accounting for purgeable objects */
10966 new_entry->use_pmap = FALSE;
10967 }
10968
10969 if (result != KERN_SUCCESS &&
10970 result != KERN_MEMORY_RESTART_COPY) {
10971 vm_map_lock(src_map);
10972 RETURN(result);
10973 }
10974
10975 /*
10976 * Throw away the extra reference
10977 */
10978
10979 vm_object_deallocate(src_object);
10980
10981 /*
10982 * Verify that the map has not substantially
10983 * changed while the copy was being made.
10984 */
10985
10986 vm_map_lock(src_map);
10987
10988 if ((version.main_timestamp + 1) == src_map->timestamp) {
10989 /* src_map hasn't changed: src_entry is still valid */
10990 src_entry = saved_src_entry;
10991 goto VerificationSuccessful;
10992 }
10993
10994 /*
10995 * Simple version comparison failed.
10996 *
10997 * Retry the lookup and verify that the
10998 * same object/offset are still present.
10999 *
11000 * [Note: a memory manager that colludes with
11001 * the calling task can detect that we have
11002 * cheated. While the map was unlocked, the
11003 * mapping could have been changed and restored.]
11004 */
11005
11006 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11007 if (result != KERN_MEMORY_RESTART_COPY) {
11008 vm_object_deallocate(VME_OBJECT(new_entry));
11009 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11010 /* reset accounting state */
11011 new_entry->iokit_acct = FALSE;
11012 new_entry->use_pmap = TRUE;
11013 }
11014 RETURN(KERN_INVALID_ADDRESS);
11015 }
11016
11017 src_entry = tmp_entry;
11018 vm_map_clip_start(src_map, src_entry, src_start);
11019
11020 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11021 !use_maxprot) ||
11022 ((src_entry->max_protection & VM_PROT_READ) == 0))
11023 goto VerificationFailed;
11024
11025 if (src_entry->vme_end < new_entry->vme_end) {
11026 /*
11027 * This entry might have been shortened
11028 * (vm_map_clip_end) or been replaced with
11029 * an entry that ends closer to "src_start"
11030 * than before.
11031 * Adjust "new_entry" accordingly; copying
11032 * less memory would be correct but we also
11033 * redo the copy (see below) if the new entry
11034 * no longer points at the same object/offset.
11035 */
11036 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11037 VM_MAP_COPY_PAGE_MASK(copy)));
11038 new_entry->vme_end = src_entry->vme_end;
11039 src_size = new_entry->vme_end - src_start;
11040 } else if (src_entry->vme_end > new_entry->vme_end) {
11041 /*
11042 * This entry might have been extended
11043 * (vm_map_entry_simplify() or coalesce)
11044 * or been replaced with an entry that ends farther
11045 * from "src_start" than before.
11046 *
11047 * We've called vm_object_copy_*() only on
11048 * the previous <start:end> range, so we can't
11049 * just extend new_entry. We have to re-do
11050 * the copy based on the new entry as if it was
11051 * pointing at a different object/offset (see
11052 * "Verification failed" below).
11053 */
11054 }
11055
11056 if ((VME_OBJECT(src_entry) != src_object) ||
11057 (VME_OFFSET(src_entry) != src_offset) ||
11058 (src_entry->vme_end > new_entry->vme_end)) {
11059
11060 /*
11061 * Verification failed.
11062 *
11063 * Start over with this top-level entry.
11064 */
11065
11066 VerificationFailed: ;
11067
11068 vm_object_deallocate(VME_OBJECT(new_entry));
11069 tmp_entry = src_entry;
11070 continue;
11071 }
11072
11073 /*
11074 * Verification succeeded.
11075 */
11076
11077 VerificationSuccessful: ;
11078
11079 if (result == KERN_MEMORY_RESTART_COPY)
11080 goto RestartCopy;
11081
11082 /*
11083 * Copy succeeded.
11084 */
11085
11086 CopySuccessful: ;
11087
11088 /*
11089 * Link in the new copy entry.
11090 */
11091
11092 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11093 new_entry);
11094
11095 /*
11096 * Determine whether the entire region
11097 * has been copied.
11098 */
11099 src_base = src_start;
11100 src_start = new_entry->vme_end;
11101 new_entry = VM_MAP_ENTRY_NULL;
11102 while ((src_start >= src_end) && (src_end != 0)) {
11103 submap_map_t *ptr;
11104
11105 if (src_map == base_map) {
11106 /* back to the top */
11107 break;
11108 }
11109
11110 ptr = parent_maps;
11111 assert(ptr != NULL);
11112 parent_maps = parent_maps->next;
11113
11114 /* fix up the damage we did in that submap */
11115 vm_map_simplify_range(src_map,
11116 src_base,
11117 src_end);
11118
11119 vm_map_unlock(src_map);
11120 vm_map_deallocate(src_map);
11121 vm_map_lock(ptr->parent_map);
11122 src_map = ptr->parent_map;
11123 src_base = ptr->base_start;
11124 src_start = ptr->base_start + ptr->base_len;
11125 src_end = ptr->base_end;
11126 if (!vm_map_lookup_entry(src_map,
11127 src_start,
11128 &tmp_entry) &&
11129 (src_end > src_start)) {
11130 RETURN(KERN_INVALID_ADDRESS);
11131 }
11132 kfree(ptr, sizeof(submap_map_t));
11133 if (parent_maps == NULL)
11134 map_share = FALSE;
11135 src_entry = tmp_entry->vme_prev;
11136 }
11137
11138 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11139 (src_start >= src_addr + len) &&
11140 (src_addr + len != 0)) {
11141 /*
11142 * Stop copying now, even though we haven't reached
11143 * "src_end". We'll adjust the end of the last copy
11144 * entry at the end, if needed.
11145 *
11146 * If src_map's aligment is different from the
11147 * system's page-alignment, there could be
11148 * extra non-map-aligned map entries between
11149 * the original (non-rounded) "src_addr + len"
11150 * and the rounded "src_end".
11151 * We do not want to copy those map entries since
11152 * they're not part of the copied range.
11153 */
11154 break;
11155 }
11156
11157 if ((src_start >= src_end) && (src_end != 0))
11158 break;
11159
11160 /*
11161 * Verify that there are no gaps in the region
11162 */
11163
11164 tmp_entry = src_entry->vme_next;
11165 if ((tmp_entry->vme_start != src_start) ||
11166 (tmp_entry == vm_map_to_entry(src_map))) {
11167 RETURN(KERN_INVALID_ADDRESS);
11168 }
11169 }
11170
11171 /*
11172 * If the source should be destroyed, do it now, since the
11173 * copy was successful.
11174 */
11175 if (src_destroy) {
11176 (void) vm_map_delete(
11177 src_map,
11178 vm_map_trunc_page(src_addr,
11179 VM_MAP_PAGE_MASK(src_map)),
11180 src_end,
11181 ((src_map == kernel_map) ?
11182 VM_MAP_REMOVE_KUNWIRE :
11183 VM_MAP_NO_FLAGS),
11184 VM_MAP_NULL);
11185 } else {
11186 /* fix up the damage we did in the base map */
11187 vm_map_simplify_range(
11188 src_map,
11189 vm_map_trunc_page(src_addr,
11190 VM_MAP_PAGE_MASK(src_map)),
11191 vm_map_round_page(src_end,
11192 VM_MAP_PAGE_MASK(src_map)));
11193 }
11194
11195 vm_map_unlock(src_map);
11196 tmp_entry = VM_MAP_ENTRY_NULL;
11197
11198 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11199 vm_map_offset_t original_start, original_offset, original_end;
11200
11201 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11202
11203 /* adjust alignment of first copy_entry's "vme_start" */
11204 tmp_entry = vm_map_copy_first_entry(copy);
11205 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11206 vm_map_offset_t adjustment;
11207
11208 original_start = tmp_entry->vme_start;
11209 original_offset = VME_OFFSET(tmp_entry);
11210
11211 /* map-align the start of the first copy entry... */
11212 adjustment = (tmp_entry->vme_start -
11213 vm_map_trunc_page(
11214 tmp_entry->vme_start,
11215 VM_MAP_PAGE_MASK(src_map)));
11216 tmp_entry->vme_start -= adjustment;
11217 VME_OFFSET_SET(tmp_entry,
11218 VME_OFFSET(tmp_entry) - adjustment);
11219 copy_addr -= adjustment;
11220 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11221 /* ... adjust for mis-aligned start of copy range */
11222 adjustment =
11223 (vm_map_trunc_page(copy->offset,
11224 PAGE_MASK) -
11225 vm_map_trunc_page(copy->offset,
11226 VM_MAP_PAGE_MASK(src_map)));
11227 if (adjustment) {
11228 assert(page_aligned(adjustment));
11229 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11230 tmp_entry->vme_start += adjustment;
11231 VME_OFFSET_SET(tmp_entry,
11232 (VME_OFFSET(tmp_entry) +
11233 adjustment));
11234 copy_addr += adjustment;
11235 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11236 }
11237
11238 /*
11239 * Assert that the adjustments haven't exposed
11240 * more than was originally copied...
11241 */
11242 assert(tmp_entry->vme_start >= original_start);
11243 assert(VME_OFFSET(tmp_entry) >= original_offset);
11244 /*
11245 * ... and that it did not adjust outside of a
11246 * a single 16K page.
11247 */
11248 assert(vm_map_trunc_page(tmp_entry->vme_start,
11249 VM_MAP_PAGE_MASK(src_map)) ==
11250 vm_map_trunc_page(original_start,
11251 VM_MAP_PAGE_MASK(src_map)));
11252 }
11253
11254 /* adjust alignment of last copy_entry's "vme_end" */
11255 tmp_entry = vm_map_copy_last_entry(copy);
11256 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11257 vm_map_offset_t adjustment;
11258
11259 original_end = tmp_entry->vme_end;
11260
11261 /* map-align the end of the last copy entry... */
11262 tmp_entry->vme_end =
11263 vm_map_round_page(tmp_entry->vme_end,
11264 VM_MAP_PAGE_MASK(src_map));
11265 /* ... adjust for mis-aligned end of copy range */
11266 adjustment =
11267 (vm_map_round_page((copy->offset +
11268 copy->size),
11269 VM_MAP_PAGE_MASK(src_map)) -
11270 vm_map_round_page((copy->offset +
11271 copy->size),
11272 PAGE_MASK));
11273 if (adjustment) {
11274 assert(page_aligned(adjustment));
11275 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11276 tmp_entry->vme_end -= adjustment;
11277 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11278 }
11279
11280 /*
11281 * Assert that the adjustments haven't exposed
11282 * more than was originally copied...
11283 */
11284 assert(tmp_entry->vme_end <= original_end);
11285 /*
11286 * ... and that it did not adjust outside of a
11287 * a single 16K page.
11288 */
11289 assert(vm_map_round_page(tmp_entry->vme_end,
11290 VM_MAP_PAGE_MASK(src_map)) ==
11291 vm_map_round_page(original_end,
11292 VM_MAP_PAGE_MASK(src_map)));
11293 }
11294 }
11295
11296 /* Fix-up start and end points in copy. This is necessary */
11297 /* when the various entries in the copy object were picked */
11298 /* up from different sub-maps */
11299
11300 tmp_entry = vm_map_copy_first_entry(copy);
11301 copy_size = 0; /* compute actual size */
11302 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11303 assert(VM_MAP_PAGE_ALIGNED(
11304 copy_addr + (tmp_entry->vme_end -
11305 tmp_entry->vme_start),
11306 VM_MAP_COPY_PAGE_MASK(copy)));
11307 assert(VM_MAP_PAGE_ALIGNED(
11308 copy_addr,
11309 VM_MAP_COPY_PAGE_MASK(copy)));
11310
11311 /*
11312 * The copy_entries will be injected directly into the
11313 * destination map and might not be "map aligned" there...
11314 */
11315 tmp_entry->map_aligned = FALSE;
11316
11317 tmp_entry->vme_end = copy_addr +
11318 (tmp_entry->vme_end - tmp_entry->vme_start);
11319 tmp_entry->vme_start = copy_addr;
11320 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11321 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11322 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11323 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11324 }
11325
11326 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11327 copy_size < copy->size) {
11328 /*
11329 * The actual size of the VM map copy is smaller than what
11330 * was requested by the caller. This must be because some
11331 * PAGE_SIZE-sized pages are missing at the end of the last
11332 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11333 * The caller might not have been aware of those missing
11334 * pages and might not want to be aware of it, which is
11335 * fine as long as they don't try to access (and crash on)
11336 * those missing pages.
11337 * Let's adjust the size of the "copy", to avoid failing
11338 * in vm_map_copyout() or vm_map_copy_overwrite().
11339 */
11340 assert(vm_map_round_page(copy_size,
11341 VM_MAP_PAGE_MASK(src_map)) ==
11342 vm_map_round_page(copy->size,
11343 VM_MAP_PAGE_MASK(src_map)));
11344 copy->size = copy_size;
11345 }
11346
11347 *copy_result = copy;
11348 return(KERN_SUCCESS);
11349
11350 #undef RETURN
11351 }
11352
11353 kern_return_t
11354 vm_map_copy_extract(
11355 vm_map_t src_map,
11356 vm_map_address_t src_addr,
11357 vm_map_size_t len,
11358 vm_map_copy_t *copy_result, /* OUT */
11359 vm_prot_t *cur_prot, /* OUT */
11360 vm_prot_t *max_prot)
11361 {
11362 vm_map_offset_t src_start, src_end;
11363 vm_map_copy_t copy;
11364 kern_return_t kr;
11365
11366 /*
11367 * Check for copies of zero bytes.
11368 */
11369
11370 if (len == 0) {
11371 *copy_result = VM_MAP_COPY_NULL;
11372 return(KERN_SUCCESS);
11373 }
11374
11375 /*
11376 * Check that the end address doesn't overflow
11377 */
11378 src_end = src_addr + len;
11379 if (src_end < src_addr)
11380 return KERN_INVALID_ADDRESS;
11381
11382 /*
11383 * Compute (page aligned) start and end of region
11384 */
11385 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11386 src_end = vm_map_round_page(src_end, PAGE_MASK);
11387
11388 /*
11389 * Allocate a header element for the list.
11390 *
11391 * Use the start and end in the header to
11392 * remember the endpoints prior to rounding.
11393 */
11394
11395 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11396 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11397 vm_map_copy_first_entry(copy) =
11398 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
11399 copy->type = VM_MAP_COPY_ENTRY_LIST;
11400 copy->cpy_hdr.nentries = 0;
11401 copy->cpy_hdr.entries_pageable = TRUE;
11402
11403 vm_map_store_init(&copy->cpy_hdr);
11404
11405 copy->offset = 0;
11406 copy->size = len;
11407
11408 kr = vm_map_remap_extract(src_map,
11409 src_addr,
11410 len,
11411 FALSE, /* copy */
11412 &copy->cpy_hdr,
11413 cur_prot,
11414 max_prot,
11415 VM_INHERIT_SHARE,
11416 TRUE, /* pageable */
11417 FALSE, /* same_map */
11418 VM_MAP_KERNEL_FLAGS_NONE);
11419 if (kr != KERN_SUCCESS) {
11420 vm_map_copy_discard(copy);
11421 return kr;
11422 }
11423
11424 *copy_result = copy;
11425 return KERN_SUCCESS;
11426 }
11427
11428 /*
11429 * vm_map_copyin_object:
11430 *
11431 * Create a copy object from an object.
11432 * Our caller donates an object reference.
11433 */
11434
11435 kern_return_t
11436 vm_map_copyin_object(
11437 vm_object_t object,
11438 vm_object_offset_t offset, /* offset of region in object */
11439 vm_object_size_t size, /* size of region in object */
11440 vm_map_copy_t *copy_result) /* OUT */
11441 {
11442 vm_map_copy_t copy; /* Resulting copy */
11443
11444 /*
11445 * We drop the object into a special copy object
11446 * that contains the object directly.
11447 */
11448
11449 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11450 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11451 copy->type = VM_MAP_COPY_OBJECT;
11452 copy->cpy_object = object;
11453 copy->offset = offset;
11454 copy->size = size;
11455
11456 *copy_result = copy;
11457 return(KERN_SUCCESS);
11458 }
11459
11460 static void
11461 vm_map_fork_share(
11462 vm_map_t old_map,
11463 vm_map_entry_t old_entry,
11464 vm_map_t new_map)
11465 {
11466 vm_object_t object;
11467 vm_map_entry_t new_entry;
11468
11469 /*
11470 * New sharing code. New map entry
11471 * references original object. Internal
11472 * objects use asynchronous copy algorithm for
11473 * future copies. First make sure we have
11474 * the right object. If we need a shadow,
11475 * or someone else already has one, then
11476 * make a new shadow and share it.
11477 */
11478
11479 object = VME_OBJECT(old_entry);
11480 if (old_entry->is_sub_map) {
11481 assert(old_entry->wired_count == 0);
11482 #ifndef NO_NESTED_PMAP
11483 if(old_entry->use_pmap) {
11484 kern_return_t result;
11485
11486 result = pmap_nest(new_map->pmap,
11487 (VME_SUBMAP(old_entry))->pmap,
11488 (addr64_t)old_entry->vme_start,
11489 (addr64_t)old_entry->vme_start,
11490 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
11491 if(result)
11492 panic("vm_map_fork_share: pmap_nest failed!");
11493 }
11494 #endif /* NO_NESTED_PMAP */
11495 } else if (object == VM_OBJECT_NULL) {
11496 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
11497 old_entry->vme_start));
11498 VME_OFFSET_SET(old_entry, 0);
11499 VME_OBJECT_SET(old_entry, object);
11500 old_entry->use_pmap = TRUE;
11501 // assert(!old_entry->needs_copy);
11502 } else if (object->copy_strategy !=
11503 MEMORY_OBJECT_COPY_SYMMETRIC) {
11504
11505 /*
11506 * We are already using an asymmetric
11507 * copy, and therefore we already have
11508 * the right object.
11509 */
11510
11511 assert(! old_entry->needs_copy);
11512 }
11513 else if (old_entry->needs_copy || /* case 1 */
11514 object->shadowed || /* case 2 */
11515 (!object->true_share && /* case 3 */
11516 !old_entry->is_shared &&
11517 (object->vo_size >
11518 (vm_map_size_t)(old_entry->vme_end -
11519 old_entry->vme_start)))) {
11520
11521 /*
11522 * We need to create a shadow.
11523 * There are three cases here.
11524 * In the first case, we need to
11525 * complete a deferred symmetrical
11526 * copy that we participated in.
11527 * In the second and third cases,
11528 * we need to create the shadow so
11529 * that changes that we make to the
11530 * object do not interfere with
11531 * any symmetrical copies which
11532 * have occured (case 2) or which
11533 * might occur (case 3).
11534 *
11535 * The first case is when we had
11536 * deferred shadow object creation
11537 * via the entry->needs_copy mechanism.
11538 * This mechanism only works when
11539 * only one entry points to the source
11540 * object, and we are about to create
11541 * a second entry pointing to the
11542 * same object. The problem is that
11543 * there is no way of mapping from
11544 * an object to the entries pointing
11545 * to it. (Deferred shadow creation
11546 * works with one entry because occurs
11547 * at fault time, and we walk from the
11548 * entry to the object when handling
11549 * the fault.)
11550 *
11551 * The second case is when the object
11552 * to be shared has already been copied
11553 * with a symmetric copy, but we point
11554 * directly to the object without
11555 * needs_copy set in our entry. (This
11556 * can happen because different ranges
11557 * of an object can be pointed to by
11558 * different entries. In particular,
11559 * a single entry pointing to an object
11560 * can be split by a call to vm_inherit,
11561 * which, combined with task_create, can
11562 * result in the different entries
11563 * having different needs_copy values.)
11564 * The shadowed flag in the object allows
11565 * us to detect this case. The problem
11566 * with this case is that if this object
11567 * has or will have shadows, then we
11568 * must not perform an asymmetric copy
11569 * of this object, since such a copy
11570 * allows the object to be changed, which
11571 * will break the previous symmetrical
11572 * copies (which rely upon the object
11573 * not changing). In a sense, the shadowed
11574 * flag says "don't change this object".
11575 * We fix this by creating a shadow
11576 * object for this object, and sharing
11577 * that. This works because we are free
11578 * to change the shadow object (and thus
11579 * to use an asymmetric copy strategy);
11580 * this is also semantically correct,
11581 * since this object is temporary, and
11582 * therefore a copy of the object is
11583 * as good as the object itself. (This
11584 * is not true for permanent objects,
11585 * since the pager needs to see changes,
11586 * which won't happen if the changes
11587 * are made to a copy.)
11588 *
11589 * The third case is when the object
11590 * to be shared has parts sticking
11591 * outside of the entry we're working
11592 * with, and thus may in the future
11593 * be subject to a symmetrical copy.
11594 * (This is a preemptive version of
11595 * case 2.)
11596 */
11597 VME_OBJECT_SHADOW(old_entry,
11598 (vm_map_size_t) (old_entry->vme_end -
11599 old_entry->vme_start));
11600
11601 /*
11602 * If we're making a shadow for other than
11603 * copy on write reasons, then we have
11604 * to remove write permission.
11605 */
11606
11607 if (!old_entry->needs_copy &&
11608 (old_entry->protection & VM_PROT_WRITE)) {
11609 vm_prot_t prot;
11610
11611 assert(!pmap_has_prot_policy(old_entry->protection));
11612
11613 prot = old_entry->protection & ~VM_PROT_WRITE;
11614
11615 assert(!pmap_has_prot_policy(prot));
11616
11617 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
11618 prot |= VM_PROT_EXECUTE;
11619
11620
11621 if (old_map->mapped_in_other_pmaps) {
11622 vm_object_pmap_protect(
11623 VME_OBJECT(old_entry),
11624 VME_OFFSET(old_entry),
11625 (old_entry->vme_end -
11626 old_entry->vme_start),
11627 PMAP_NULL,
11628 old_entry->vme_start,
11629 prot);
11630 } else {
11631 pmap_protect(old_map->pmap,
11632 old_entry->vme_start,
11633 old_entry->vme_end,
11634 prot);
11635 }
11636 }
11637
11638 old_entry->needs_copy = FALSE;
11639 object = VME_OBJECT(old_entry);
11640 }
11641
11642
11643 /*
11644 * If object was using a symmetric copy strategy,
11645 * change its copy strategy to the default
11646 * asymmetric copy strategy, which is copy_delay
11647 * in the non-norma case and copy_call in the
11648 * norma case. Bump the reference count for the
11649 * new entry.
11650 */
11651
11652 if(old_entry->is_sub_map) {
11653 vm_map_lock(VME_SUBMAP(old_entry));
11654 vm_map_reference(VME_SUBMAP(old_entry));
11655 vm_map_unlock(VME_SUBMAP(old_entry));
11656 } else {
11657 vm_object_lock(object);
11658 vm_object_reference_locked(object);
11659 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
11660 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
11661 }
11662 vm_object_unlock(object);
11663 }
11664
11665 /*
11666 * Clone the entry, using object ref from above.
11667 * Mark both entries as shared.
11668 */
11669
11670 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
11671 * map or descendants */
11672 vm_map_entry_copy(new_entry, old_entry);
11673 old_entry->is_shared = TRUE;
11674 new_entry->is_shared = TRUE;
11675
11676 /*
11677 * We're dealing with a shared mapping, so the resulting mapping
11678 * should inherit some of the original mapping's accounting settings.
11679 * "iokit_acct" should have been cleared in vm_map_entry_copy().
11680 * "use_pmap" should stay the same as before (if it hasn't been reset
11681 * to TRUE when we cleared "iokit_acct").
11682 */
11683 assert(!new_entry->iokit_acct);
11684
11685 /*
11686 * If old entry's inheritence is VM_INHERIT_NONE,
11687 * the new entry is for corpse fork, remove the
11688 * write permission from the new entry.
11689 */
11690 if (old_entry->inheritance == VM_INHERIT_NONE) {
11691
11692 new_entry->protection &= ~VM_PROT_WRITE;
11693 new_entry->max_protection &= ~VM_PROT_WRITE;
11694 }
11695
11696 /*
11697 * Insert the entry into the new map -- we
11698 * know we're inserting at the end of the new
11699 * map.
11700 */
11701
11702 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
11703
11704 /*
11705 * Update the physical map
11706 */
11707
11708 if (old_entry->is_sub_map) {
11709 /* Bill Angell pmap support goes here */
11710 } else {
11711 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
11712 old_entry->vme_end - old_entry->vme_start,
11713 old_entry->vme_start);
11714 }
11715 }
11716
11717 static boolean_t
11718 vm_map_fork_copy(
11719 vm_map_t old_map,
11720 vm_map_entry_t *old_entry_p,
11721 vm_map_t new_map,
11722 int vm_map_copyin_flags)
11723 {
11724 vm_map_entry_t old_entry = *old_entry_p;
11725 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
11726 vm_map_offset_t start = old_entry->vme_start;
11727 vm_map_copy_t copy;
11728 vm_map_entry_t last = vm_map_last_entry(new_map);
11729
11730 vm_map_unlock(old_map);
11731 /*
11732 * Use maxprot version of copyin because we
11733 * care about whether this memory can ever
11734 * be accessed, not just whether it's accessible
11735 * right now.
11736 */
11737 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
11738 if (vm_map_copyin_internal(old_map, start, entry_size,
11739 vm_map_copyin_flags, &copy)
11740 != KERN_SUCCESS) {
11741 /*
11742 * The map might have changed while it
11743 * was unlocked, check it again. Skip
11744 * any blank space or permanently
11745 * unreadable region.
11746 */
11747 vm_map_lock(old_map);
11748 if (!vm_map_lookup_entry(old_map, start, &last) ||
11749 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
11750 last = last->vme_next;
11751 }
11752 *old_entry_p = last;
11753
11754 /*
11755 * XXX For some error returns, want to
11756 * XXX skip to the next element. Note
11757 * that INVALID_ADDRESS and
11758 * PROTECTION_FAILURE are handled above.
11759 */
11760
11761 return FALSE;
11762 }
11763
11764 /*
11765 * Insert the copy into the new map
11766 */
11767
11768 vm_map_copy_insert(new_map, last, copy);
11769
11770 /*
11771 * Pick up the traversal at the end of
11772 * the copied region.
11773 */
11774
11775 vm_map_lock(old_map);
11776 start += entry_size;
11777 if (! vm_map_lookup_entry(old_map, start, &last)) {
11778 last = last->vme_next;
11779 } else {
11780 if (last->vme_start == start) {
11781 /*
11782 * No need to clip here and we don't
11783 * want to cause any unnecessary
11784 * unnesting...
11785 */
11786 } else {
11787 vm_map_clip_start(old_map, last, start);
11788 }
11789 }
11790 *old_entry_p = last;
11791
11792 return TRUE;
11793 }
11794
11795 /*
11796 * vm_map_fork:
11797 *
11798 * Create and return a new map based on the old
11799 * map, according to the inheritance values on the
11800 * regions in that map and the options.
11801 *
11802 * The source map must not be locked.
11803 */
11804 vm_map_t
11805 vm_map_fork(
11806 ledger_t ledger,
11807 vm_map_t old_map,
11808 int options)
11809 {
11810 pmap_t new_pmap;
11811 vm_map_t new_map;
11812 vm_map_entry_t old_entry;
11813 vm_map_size_t new_size = 0, entry_size;
11814 vm_map_entry_t new_entry;
11815 boolean_t src_needs_copy;
11816 boolean_t new_entry_needs_copy;
11817 boolean_t pmap_is64bit;
11818 int vm_map_copyin_flags;
11819
11820 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
11821 VM_MAP_FORK_PRESERVE_PURGEABLE)) {
11822 /* unsupported option */
11823 return VM_MAP_NULL;
11824 }
11825
11826 pmap_is64bit =
11827 #if defined(__i386__) || defined(__x86_64__)
11828 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
11829 #elif defined(__arm64__)
11830 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
11831 #elif defined(__arm__)
11832 FALSE;
11833 #else
11834 #error Unknown architecture.
11835 #endif
11836
11837 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
11838
11839 vm_map_reference_swap(old_map);
11840 vm_map_lock(old_map);
11841
11842 new_map = vm_map_create(new_pmap,
11843 old_map->min_offset,
11844 old_map->max_offset,
11845 old_map->hdr.entries_pageable);
11846 vm_map_lock(new_map);
11847 vm_commit_pagezero_status(new_map);
11848 /* inherit the parent map's page size */
11849 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
11850 for (
11851 old_entry = vm_map_first_entry(old_map);
11852 old_entry != vm_map_to_entry(old_map);
11853 ) {
11854
11855 entry_size = old_entry->vme_end - old_entry->vme_start;
11856
11857 switch (old_entry->inheritance) {
11858 case VM_INHERIT_NONE:
11859 /*
11860 * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
11861 * is not passed or it is backed by a device pager.
11862 */
11863 if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
11864 (!old_entry->is_sub_map &&
11865 VME_OBJECT(old_entry) != NULL &&
11866 VME_OBJECT(old_entry)->pager != NULL &&
11867 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
11868 break;
11869 }
11870 /* FALLTHROUGH */
11871
11872 case VM_INHERIT_SHARE:
11873 vm_map_fork_share(old_map, old_entry, new_map);
11874 new_size += entry_size;
11875 break;
11876
11877 case VM_INHERIT_COPY:
11878
11879 /*
11880 * Inline the copy_quickly case;
11881 * upon failure, fall back on call
11882 * to vm_map_fork_copy.
11883 */
11884
11885 if(old_entry->is_sub_map)
11886 break;
11887 if ((old_entry->wired_count != 0) ||
11888 ((VME_OBJECT(old_entry) != NULL) &&
11889 (VME_OBJECT(old_entry)->true_share))) {
11890 goto slow_vm_map_fork_copy;
11891 }
11892
11893 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
11894 vm_map_entry_copy(new_entry, old_entry);
11895 if (new_entry->is_sub_map) {
11896 /* clear address space specifics */
11897 new_entry->use_pmap = FALSE;
11898 } else {
11899 /*
11900 * We're dealing with a copy-on-write operation,
11901 * so the resulting mapping should not inherit
11902 * the original mapping's accounting settings.
11903 * "iokit_acct" should have been cleared in
11904 * vm_map_entry_copy().
11905 * "use_pmap" should be reset to its default
11906 * (TRUE) so that the new mapping gets
11907 * accounted for in the task's memory footprint.
11908 */
11909 assert(!new_entry->iokit_acct);
11910 new_entry->use_pmap = TRUE;
11911 }
11912
11913 if (! vm_object_copy_quickly(
11914 &VME_OBJECT(new_entry),
11915 VME_OFFSET(old_entry),
11916 (old_entry->vme_end -
11917 old_entry->vme_start),
11918 &src_needs_copy,
11919 &new_entry_needs_copy)) {
11920 vm_map_entry_dispose(new_map, new_entry);
11921 goto slow_vm_map_fork_copy;
11922 }
11923
11924 /*
11925 * Handle copy-on-write obligations
11926 */
11927
11928 if (src_needs_copy && !old_entry->needs_copy) {
11929 vm_prot_t prot;
11930
11931 assert(!pmap_has_prot_policy(old_entry->protection));
11932
11933 prot = old_entry->protection & ~VM_PROT_WRITE;
11934
11935 if (override_nx(old_map, VME_ALIAS(old_entry))
11936 && prot)
11937 prot |= VM_PROT_EXECUTE;
11938
11939 assert(!pmap_has_prot_policy(prot));
11940
11941 vm_object_pmap_protect(
11942 VME_OBJECT(old_entry),
11943 VME_OFFSET(old_entry),
11944 (old_entry->vme_end -
11945 old_entry->vme_start),
11946 ((old_entry->is_shared
11947 || old_map->mapped_in_other_pmaps)
11948 ? PMAP_NULL :
11949 old_map->pmap),
11950 old_entry->vme_start,
11951 prot);
11952
11953 assert(old_entry->wired_count == 0);
11954 old_entry->needs_copy = TRUE;
11955 }
11956 new_entry->needs_copy = new_entry_needs_copy;
11957
11958 /*
11959 * Insert the entry at the end
11960 * of the map.
11961 */
11962
11963 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
11964 new_entry);
11965 new_size += entry_size;
11966 break;
11967
11968 slow_vm_map_fork_copy:
11969 vm_map_copyin_flags = 0;
11970 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
11971 vm_map_copyin_flags |=
11972 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
11973 }
11974 if (vm_map_fork_copy(old_map,
11975 &old_entry,
11976 new_map,
11977 vm_map_copyin_flags)) {
11978 new_size += entry_size;
11979 }
11980 continue;
11981 }
11982 old_entry = old_entry->vme_next;
11983 }
11984
11985 #if defined(__arm64__)
11986 pmap_insert_sharedpage(new_map->pmap);
11987 #endif
11988
11989 new_map->size = new_size;
11990 vm_map_unlock(new_map);
11991 vm_map_unlock(old_map);
11992 vm_map_deallocate(old_map);
11993
11994 return(new_map);
11995 }
11996
11997 /*
11998 * vm_map_exec:
11999 *
12000 * Setup the "new_map" with the proper execution environment according
12001 * to the type of executable (platform, 64bit, chroot environment).
12002 * Map the comm page and shared region, etc...
12003 */
12004 kern_return_t
12005 vm_map_exec(
12006 vm_map_t new_map,
12007 task_t task,
12008 boolean_t is64bit,
12009 void *fsroot,
12010 cpu_type_t cpu)
12011 {
12012 SHARED_REGION_TRACE_DEBUG(
12013 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
12014 (void *)VM_KERNEL_ADDRPERM(current_task()),
12015 (void *)VM_KERNEL_ADDRPERM(new_map),
12016 (void *)VM_KERNEL_ADDRPERM(task),
12017 (void *)VM_KERNEL_ADDRPERM(fsroot),
12018 cpu));
12019 (void) vm_commpage_enter(new_map, task, is64bit);
12020 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
12021 SHARED_REGION_TRACE_DEBUG(
12022 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
12023 (void *)VM_KERNEL_ADDRPERM(current_task()),
12024 (void *)VM_KERNEL_ADDRPERM(new_map),
12025 (void *)VM_KERNEL_ADDRPERM(task),
12026 (void *)VM_KERNEL_ADDRPERM(fsroot),
12027 cpu));
12028 return KERN_SUCCESS;
12029 }
12030
12031 /*
12032 * vm_map_lookup_locked:
12033 *
12034 * Finds the VM object, offset, and
12035 * protection for a given virtual address in the
12036 * specified map, assuming a page fault of the
12037 * type specified.
12038 *
12039 * Returns the (object, offset, protection) for
12040 * this address, whether it is wired down, and whether
12041 * this map has the only reference to the data in question.
12042 * In order to later verify this lookup, a "version"
12043 * is returned.
12044 *
12045 * The map MUST be locked by the caller and WILL be
12046 * locked on exit. In order to guarantee the
12047 * existence of the returned object, it is returned
12048 * locked.
12049 *
12050 * If a lookup is requested with "write protection"
12051 * specified, the map may be changed to perform virtual
12052 * copying operations, although the data referenced will
12053 * remain the same.
12054 */
12055 kern_return_t
12056 vm_map_lookup_locked(
12057 vm_map_t *var_map, /* IN/OUT */
12058 vm_map_offset_t vaddr,
12059 vm_prot_t fault_type,
12060 int object_lock_type,
12061 vm_map_version_t *out_version, /* OUT */
12062 vm_object_t *object, /* OUT */
12063 vm_object_offset_t *offset, /* OUT */
12064 vm_prot_t *out_prot, /* OUT */
12065 boolean_t *wired, /* OUT */
12066 vm_object_fault_info_t fault_info, /* OUT */
12067 vm_map_t *real_map)
12068 {
12069 vm_map_entry_t entry;
12070 vm_map_t map = *var_map;
12071 vm_map_t old_map = *var_map;
12072 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12073 vm_map_offset_t cow_parent_vaddr = 0;
12074 vm_map_offset_t old_start = 0;
12075 vm_map_offset_t old_end = 0;
12076 vm_prot_t prot;
12077 boolean_t mask_protections;
12078 boolean_t force_copy;
12079 vm_prot_t original_fault_type;
12080
12081 /*
12082 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12083 * as a mask against the mapping's actual protections, not as an
12084 * absolute value.
12085 */
12086 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12087 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12088 fault_type &= VM_PROT_ALL;
12089 original_fault_type = fault_type;
12090
12091 *real_map = map;
12092
12093 RetryLookup:
12094 fault_type = original_fault_type;
12095
12096 /*
12097 * If the map has an interesting hint, try it before calling
12098 * full blown lookup routine.
12099 */
12100 entry = map->hint;
12101
12102 if ((entry == vm_map_to_entry(map)) ||
12103 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12104 vm_map_entry_t tmp_entry;
12105
12106 /*
12107 * Entry was either not a valid hint, or the vaddr
12108 * was not contained in the entry, so do a full lookup.
12109 */
12110 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12111 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12112 vm_map_unlock(cow_sub_map_parent);
12113 if((*real_map != map)
12114 && (*real_map != cow_sub_map_parent))
12115 vm_map_unlock(*real_map);
12116 return KERN_INVALID_ADDRESS;
12117 }
12118
12119 entry = tmp_entry;
12120 }
12121 if(map == old_map) {
12122 old_start = entry->vme_start;
12123 old_end = entry->vme_end;
12124 }
12125
12126 /*
12127 * Handle submaps. Drop lock on upper map, submap is
12128 * returned locked.
12129 */
12130
12131 submap_recurse:
12132 if (entry->is_sub_map) {
12133 vm_map_offset_t local_vaddr;
12134 vm_map_offset_t end_delta;
12135 vm_map_offset_t start_delta;
12136 vm_map_entry_t submap_entry;
12137 vm_prot_t subentry_protection;
12138 vm_prot_t subentry_max_protection;
12139 boolean_t mapped_needs_copy=FALSE;
12140
12141 local_vaddr = vaddr;
12142
12143 if ((entry->use_pmap &&
12144 ! ((fault_type & VM_PROT_WRITE) ||
12145 force_copy))) {
12146 /* if real_map equals map we unlock below */
12147 if ((*real_map != map) &&
12148 (*real_map != cow_sub_map_parent))
12149 vm_map_unlock(*real_map);
12150 *real_map = VME_SUBMAP(entry);
12151 }
12152
12153 if(entry->needs_copy &&
12154 ((fault_type & VM_PROT_WRITE) ||
12155 force_copy)) {
12156 if (!mapped_needs_copy) {
12157 if (vm_map_lock_read_to_write(map)) {
12158 vm_map_lock_read(map);
12159 *real_map = map;
12160 goto RetryLookup;
12161 }
12162 vm_map_lock_read(VME_SUBMAP(entry));
12163 *var_map = VME_SUBMAP(entry);
12164 cow_sub_map_parent = map;
12165 /* reset base to map before cow object */
12166 /* this is the map which will accept */
12167 /* the new cow object */
12168 old_start = entry->vme_start;
12169 old_end = entry->vme_end;
12170 cow_parent_vaddr = vaddr;
12171 mapped_needs_copy = TRUE;
12172 } else {
12173 vm_map_lock_read(VME_SUBMAP(entry));
12174 *var_map = VME_SUBMAP(entry);
12175 if((cow_sub_map_parent != map) &&
12176 (*real_map != map))
12177 vm_map_unlock(map);
12178 }
12179 } else {
12180 vm_map_lock_read(VME_SUBMAP(entry));
12181 *var_map = VME_SUBMAP(entry);
12182 /* leave map locked if it is a target */
12183 /* cow sub_map above otherwise, just */
12184 /* follow the maps down to the object */
12185 /* here we unlock knowing we are not */
12186 /* revisiting the map. */
12187 if((*real_map != map) && (map != cow_sub_map_parent))
12188 vm_map_unlock_read(map);
12189 }
12190
12191 map = *var_map;
12192
12193 /* calculate the offset in the submap for vaddr */
12194 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12195
12196 RetrySubMap:
12197 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12198 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12199 vm_map_unlock(cow_sub_map_parent);
12200 }
12201 if((*real_map != map)
12202 && (*real_map != cow_sub_map_parent)) {
12203 vm_map_unlock(*real_map);
12204 }
12205 *real_map = map;
12206 return KERN_INVALID_ADDRESS;
12207 }
12208
12209 /* find the attenuated shadow of the underlying object */
12210 /* on our target map */
12211
12212 /* in english the submap object may extend beyond the */
12213 /* region mapped by the entry or, may only fill a portion */
12214 /* of it. For our purposes, we only care if the object */
12215 /* doesn't fill. In this case the area which will */
12216 /* ultimately be clipped in the top map will only need */
12217 /* to be as big as the portion of the underlying entry */
12218 /* which is mapped */
12219 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12220 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12221
12222 end_delta =
12223 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12224 submap_entry->vme_end ?
12225 0 : (VME_OFFSET(entry) +
12226 (old_end - old_start))
12227 - submap_entry->vme_end;
12228
12229 old_start += start_delta;
12230 old_end -= end_delta;
12231
12232 if(submap_entry->is_sub_map) {
12233 entry = submap_entry;
12234 vaddr = local_vaddr;
12235 goto submap_recurse;
12236 }
12237
12238 if (((fault_type & VM_PROT_WRITE) ||
12239 force_copy)
12240 && cow_sub_map_parent) {
12241
12242 vm_object_t sub_object, copy_object;
12243 vm_object_offset_t copy_offset;
12244 vm_map_offset_t local_start;
12245 vm_map_offset_t local_end;
12246 boolean_t copied_slowly = FALSE;
12247
12248 if (vm_map_lock_read_to_write(map)) {
12249 vm_map_lock_read(map);
12250 old_start -= start_delta;
12251 old_end += end_delta;
12252 goto RetrySubMap;
12253 }
12254
12255
12256 sub_object = VME_OBJECT(submap_entry);
12257 if (sub_object == VM_OBJECT_NULL) {
12258 sub_object =
12259 vm_object_allocate(
12260 (vm_map_size_t)
12261 (submap_entry->vme_end -
12262 submap_entry->vme_start));
12263 VME_OBJECT_SET(submap_entry, sub_object);
12264 VME_OFFSET_SET(submap_entry, 0);
12265 assert(!submap_entry->is_sub_map);
12266 assert(submap_entry->use_pmap);
12267 }
12268 local_start = local_vaddr -
12269 (cow_parent_vaddr - old_start);
12270 local_end = local_vaddr +
12271 (old_end - cow_parent_vaddr);
12272 vm_map_clip_start(map, submap_entry, local_start);
12273 vm_map_clip_end(map, submap_entry, local_end);
12274 if (submap_entry->is_sub_map) {
12275 /* unnesting was done when clipping */
12276 assert(!submap_entry->use_pmap);
12277 }
12278
12279 /* This is the COW case, lets connect */
12280 /* an entry in our space to the underlying */
12281 /* object in the submap, bypassing the */
12282 /* submap. */
12283
12284
12285 if(submap_entry->wired_count != 0 ||
12286 (sub_object->copy_strategy ==
12287 MEMORY_OBJECT_COPY_NONE)) {
12288 vm_object_lock(sub_object);
12289 vm_object_copy_slowly(sub_object,
12290 VME_OFFSET(submap_entry),
12291 (submap_entry->vme_end -
12292 submap_entry->vme_start),
12293 FALSE,
12294 &copy_object);
12295 copied_slowly = TRUE;
12296 } else {
12297
12298 /* set up shadow object */
12299 copy_object = sub_object;
12300 vm_object_lock(sub_object);
12301 vm_object_reference_locked(sub_object);
12302 sub_object->shadowed = TRUE;
12303 vm_object_unlock(sub_object);
12304
12305 assert(submap_entry->wired_count == 0);
12306 submap_entry->needs_copy = TRUE;
12307
12308 prot = submap_entry->protection;
12309 assert(!pmap_has_prot_policy(prot));
12310 prot = prot & ~VM_PROT_WRITE;
12311 assert(!pmap_has_prot_policy(prot));
12312
12313 if (override_nx(old_map,
12314 VME_ALIAS(submap_entry))
12315 && prot)
12316 prot |= VM_PROT_EXECUTE;
12317
12318 vm_object_pmap_protect(
12319 sub_object,
12320 VME_OFFSET(submap_entry),
12321 submap_entry->vme_end -
12322 submap_entry->vme_start,
12323 (submap_entry->is_shared
12324 || map->mapped_in_other_pmaps) ?
12325 PMAP_NULL : map->pmap,
12326 submap_entry->vme_start,
12327 prot);
12328 }
12329
12330 /*
12331 * Adjust the fault offset to the submap entry.
12332 */
12333 copy_offset = (local_vaddr -
12334 submap_entry->vme_start +
12335 VME_OFFSET(submap_entry));
12336
12337 /* This works diffently than the */
12338 /* normal submap case. We go back */
12339 /* to the parent of the cow map and*/
12340 /* clip out the target portion of */
12341 /* the sub_map, substituting the */
12342 /* new copy object, */
12343
12344 subentry_protection = submap_entry->protection;
12345 subentry_max_protection = submap_entry->max_protection;
12346 vm_map_unlock(map);
12347 submap_entry = NULL; /* not valid after map unlock */
12348
12349 local_start = old_start;
12350 local_end = old_end;
12351 map = cow_sub_map_parent;
12352 *var_map = cow_sub_map_parent;
12353 vaddr = cow_parent_vaddr;
12354 cow_sub_map_parent = NULL;
12355
12356 if(!vm_map_lookup_entry(map,
12357 vaddr, &entry)) {
12358 vm_object_deallocate(
12359 copy_object);
12360 vm_map_lock_write_to_read(map);
12361 return KERN_INVALID_ADDRESS;
12362 }
12363
12364 /* clip out the portion of space */
12365 /* mapped by the sub map which */
12366 /* corresponds to the underlying */
12367 /* object */
12368
12369 /*
12370 * Clip (and unnest) the smallest nested chunk
12371 * possible around the faulting address...
12372 */
12373 local_start = vaddr & ~(pmap_nesting_size_min - 1);
12374 local_end = local_start + pmap_nesting_size_min;
12375 /*
12376 * ... but don't go beyond the "old_start" to "old_end"
12377 * range, to avoid spanning over another VM region
12378 * with a possibly different VM object and/or offset.
12379 */
12380 if (local_start < old_start) {
12381 local_start = old_start;
12382 }
12383 if (local_end > old_end) {
12384 local_end = old_end;
12385 }
12386 /*
12387 * Adjust copy_offset to the start of the range.
12388 */
12389 copy_offset -= (vaddr - local_start);
12390
12391 vm_map_clip_start(map, entry, local_start);
12392 vm_map_clip_end(map, entry, local_end);
12393 if (entry->is_sub_map) {
12394 /* unnesting was done when clipping */
12395 assert(!entry->use_pmap);
12396 }
12397
12398 /* substitute copy object for */
12399 /* shared map entry */
12400 vm_map_deallocate(VME_SUBMAP(entry));
12401 assert(!entry->iokit_acct);
12402 entry->is_sub_map = FALSE;
12403 entry->use_pmap = TRUE;
12404 VME_OBJECT_SET(entry, copy_object);
12405
12406 /* propagate the submap entry's protections */
12407 entry->protection |= subentry_protection;
12408 entry->max_protection |= subentry_max_protection;
12409
12410 #if CONFIG_EMBEDDED
12411 if (entry->protection & VM_PROT_WRITE) {
12412 if ((entry->protection & VM_PROT_EXECUTE) && !(entry->used_for_jit)) {
12413 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
12414 entry->protection &= ~VM_PROT_EXECUTE;
12415 }
12416 }
12417 #endif
12418
12419 if(copied_slowly) {
12420 VME_OFFSET_SET(entry, local_start - old_start);
12421 entry->needs_copy = FALSE;
12422 entry->is_shared = FALSE;
12423 } else {
12424 VME_OFFSET_SET(entry, copy_offset);
12425 assert(entry->wired_count == 0);
12426 entry->needs_copy = TRUE;
12427 if(entry->inheritance == VM_INHERIT_SHARE)
12428 entry->inheritance = VM_INHERIT_COPY;
12429 if (map != old_map)
12430 entry->is_shared = TRUE;
12431 }
12432 if(entry->inheritance == VM_INHERIT_SHARE)
12433 entry->inheritance = VM_INHERIT_COPY;
12434
12435 vm_map_lock_write_to_read(map);
12436 } else {
12437 if((cow_sub_map_parent)
12438 && (cow_sub_map_parent != *real_map)
12439 && (cow_sub_map_parent != map)) {
12440 vm_map_unlock(cow_sub_map_parent);
12441 }
12442 entry = submap_entry;
12443 vaddr = local_vaddr;
12444 }
12445 }
12446
12447 /*
12448 * Check whether this task is allowed to have
12449 * this page.
12450 */
12451
12452 prot = entry->protection;
12453
12454 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
12455 /*
12456 * HACK -- if not a stack, then allow execution
12457 */
12458 prot |= VM_PROT_EXECUTE;
12459 }
12460
12461 if (mask_protections) {
12462 fault_type &= prot;
12463 if (fault_type == VM_PROT_NONE) {
12464 goto protection_failure;
12465 }
12466 }
12467 if (((fault_type & prot) != fault_type)
12468 #if __arm64__
12469 /* prefetch abort in execute-only page */
12470 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12471 #endif
12472 ) {
12473 protection_failure:
12474 if (*real_map != map) {
12475 vm_map_unlock(*real_map);
12476 }
12477 *real_map = map;
12478
12479 if ((fault_type & VM_PROT_EXECUTE) && prot)
12480 log_stack_execution_failure((addr64_t)vaddr, prot);
12481
12482 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
12483 return KERN_PROTECTION_FAILURE;
12484 }
12485
12486 /*
12487 * If this page is not pageable, we have to get
12488 * it for all possible accesses.
12489 */
12490
12491 *wired = (entry->wired_count != 0);
12492 if (*wired)
12493 fault_type = prot;
12494
12495 /*
12496 * If the entry was copy-on-write, we either ...
12497 */
12498
12499 if (entry->needs_copy) {
12500 /*
12501 * If we want to write the page, we may as well
12502 * handle that now since we've got the map locked.
12503 *
12504 * If we don't need to write the page, we just
12505 * demote the permissions allowed.
12506 */
12507
12508 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
12509 /*
12510 * Make a new object, and place it in the
12511 * object chain. Note that no new references
12512 * have appeared -- one just moved from the
12513 * map to the new object.
12514 */
12515
12516 if (vm_map_lock_read_to_write(map)) {
12517 vm_map_lock_read(map);
12518 goto RetryLookup;
12519 }
12520
12521 if (VME_OBJECT(entry)->shadowed == FALSE) {
12522 vm_object_lock(VME_OBJECT(entry));
12523 VME_OBJECT(entry)->shadowed = TRUE;
12524 vm_object_unlock(VME_OBJECT(entry));
12525 }
12526 VME_OBJECT_SHADOW(entry,
12527 (vm_map_size_t) (entry->vme_end -
12528 entry->vme_start));
12529 entry->needs_copy = FALSE;
12530
12531 vm_map_lock_write_to_read(map);
12532 }
12533 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
12534 /*
12535 * We're attempting to read a copy-on-write
12536 * page -- don't allow writes.
12537 */
12538
12539 prot &= (~VM_PROT_WRITE);
12540 }
12541 }
12542
12543 /*
12544 * Create an object if necessary.
12545 */
12546 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
12547
12548 if (vm_map_lock_read_to_write(map)) {
12549 vm_map_lock_read(map);
12550 goto RetryLookup;
12551 }
12552
12553 VME_OBJECT_SET(entry,
12554 vm_object_allocate(
12555 (vm_map_size_t)(entry->vme_end -
12556 entry->vme_start)));
12557 VME_OFFSET_SET(entry, 0);
12558 assert(entry->use_pmap);
12559 vm_map_lock_write_to_read(map);
12560 }
12561
12562 /*
12563 * Return the object/offset from this entry. If the entry
12564 * was copy-on-write or empty, it has been fixed up. Also
12565 * return the protection.
12566 */
12567
12568 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
12569 *object = VME_OBJECT(entry);
12570 *out_prot = prot;
12571
12572 if (fault_info) {
12573 fault_info->interruptible = THREAD_UNINT; /* for now... */
12574 /* ... the caller will change "interruptible" if needed */
12575 fault_info->cluster_size = 0;
12576 fault_info->user_tag = VME_ALIAS(entry);
12577 fault_info->pmap_options = 0;
12578 if (entry->iokit_acct ||
12579 (!entry->is_sub_map && !entry->use_pmap)) {
12580 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12581 }
12582 fault_info->behavior = entry->behavior;
12583 fault_info->lo_offset = VME_OFFSET(entry);
12584 fault_info->hi_offset =
12585 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
12586 fault_info->no_cache = entry->no_cache;
12587 fault_info->stealth = FALSE;
12588 fault_info->io_sync = FALSE;
12589 if (entry->used_for_jit ||
12590 entry->vme_resilient_codesign) {
12591 fault_info->cs_bypass = TRUE;
12592 } else {
12593 fault_info->cs_bypass = FALSE;
12594 }
12595 fault_info->mark_zf_absent = FALSE;
12596 fault_info->batch_pmap_op = FALSE;
12597 }
12598
12599 /*
12600 * Lock the object to prevent it from disappearing
12601 */
12602 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
12603 vm_object_lock(*object);
12604 else
12605 vm_object_lock_shared(*object);
12606
12607 /*
12608 * Save the version number
12609 */
12610
12611 out_version->main_timestamp = map->timestamp;
12612
12613 return KERN_SUCCESS;
12614 }
12615
12616
12617 /*
12618 * vm_map_verify:
12619 *
12620 * Verifies that the map in question has not changed
12621 * since the given version. The map has to be locked
12622 * ("shared" mode is fine) before calling this function
12623 * and it will be returned locked too.
12624 */
12625 boolean_t
12626 vm_map_verify(
12627 vm_map_t map,
12628 vm_map_version_t *version) /* REF */
12629 {
12630 boolean_t result;
12631
12632 vm_map_lock_assert_held(map);
12633 result = (map->timestamp == version->main_timestamp);
12634
12635 return(result);
12636 }
12637
12638 /*
12639 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
12640 * Goes away after regular vm_region_recurse function migrates to
12641 * 64 bits
12642 * vm_region_recurse: A form of vm_region which follows the
12643 * submaps in a target map
12644 *
12645 */
12646
12647 kern_return_t
12648 vm_map_region_recurse_64(
12649 vm_map_t map,
12650 vm_map_offset_t *address, /* IN/OUT */
12651 vm_map_size_t *size, /* OUT */
12652 natural_t *nesting_depth, /* IN/OUT */
12653 vm_region_submap_info_64_t submap_info, /* IN/OUT */
12654 mach_msg_type_number_t *count) /* IN/OUT */
12655 {
12656 mach_msg_type_number_t original_count;
12657 vm_region_extended_info_data_t extended;
12658 vm_map_entry_t tmp_entry;
12659 vm_map_offset_t user_address;
12660 unsigned int user_max_depth;
12661
12662 /*
12663 * "curr_entry" is the VM map entry preceding or including the
12664 * address we're looking for.
12665 * "curr_map" is the map or sub-map containing "curr_entry".
12666 * "curr_address" is the equivalent of the top map's "user_address"
12667 * in the current map.
12668 * "curr_offset" is the cumulated offset of "curr_map" in the
12669 * target task's address space.
12670 * "curr_depth" is the depth of "curr_map" in the chain of
12671 * sub-maps.
12672 *
12673 * "curr_max_below" and "curr_max_above" limit the range (around
12674 * "curr_address") we should take into account in the current (sub)map.
12675 * They limit the range to what's visible through the map entries
12676 * we've traversed from the top map to the current map.
12677
12678 */
12679 vm_map_entry_t curr_entry;
12680 vm_map_address_t curr_address;
12681 vm_map_offset_t curr_offset;
12682 vm_map_t curr_map;
12683 unsigned int curr_depth;
12684 vm_map_offset_t curr_max_below, curr_max_above;
12685 vm_map_offset_t curr_skip;
12686
12687 /*
12688 * "next_" is the same as "curr_" but for the VM region immediately
12689 * after the address we're looking for. We need to keep track of this
12690 * too because we want to return info about that region if the
12691 * address we're looking for is not mapped.
12692 */
12693 vm_map_entry_t next_entry;
12694 vm_map_offset_t next_offset;
12695 vm_map_offset_t next_address;
12696 vm_map_t next_map;
12697 unsigned int next_depth;
12698 vm_map_offset_t next_max_below, next_max_above;
12699 vm_map_offset_t next_skip;
12700
12701 boolean_t look_for_pages;
12702 vm_region_submap_short_info_64_t short_info;
12703 boolean_t do_region_footprint;
12704
12705 if (map == VM_MAP_NULL) {
12706 /* no address space to work on */
12707 return KERN_INVALID_ARGUMENT;
12708 }
12709
12710
12711 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
12712 /*
12713 * "info" structure is not big enough and
12714 * would overflow
12715 */
12716 return KERN_INVALID_ARGUMENT;
12717 }
12718
12719 do_region_footprint = task_self_region_footprint();
12720 original_count = *count;
12721
12722 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
12723 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
12724 look_for_pages = FALSE;
12725 short_info = (vm_region_submap_short_info_64_t) submap_info;
12726 submap_info = NULL;
12727 } else {
12728 look_for_pages = TRUE;
12729 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
12730 short_info = NULL;
12731
12732 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
12733 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
12734 }
12735 }
12736
12737 user_address = *address;
12738 user_max_depth = *nesting_depth;
12739
12740 if (not_in_kdp) {
12741 vm_map_lock_read(map);
12742 }
12743
12744 recurse_again:
12745 curr_entry = NULL;
12746 curr_map = map;
12747 curr_address = user_address;
12748 curr_offset = 0;
12749 curr_skip = 0;
12750 curr_depth = 0;
12751 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
12752 curr_max_below = curr_address;
12753
12754 next_entry = NULL;
12755 next_map = NULL;
12756 next_address = 0;
12757 next_offset = 0;
12758 next_skip = 0;
12759 next_depth = 0;
12760 next_max_above = (vm_map_offset_t) -1;
12761 next_max_below = (vm_map_offset_t) -1;
12762
12763 for (;;) {
12764 if (vm_map_lookup_entry(curr_map,
12765 curr_address,
12766 &tmp_entry)) {
12767 /* tmp_entry contains the address we're looking for */
12768 curr_entry = tmp_entry;
12769 } else {
12770 vm_map_offset_t skip;
12771 /*
12772 * The address is not mapped. "tmp_entry" is the
12773 * map entry preceding the address. We want the next
12774 * one, if it exists.
12775 */
12776 curr_entry = tmp_entry->vme_next;
12777
12778 if (curr_entry == vm_map_to_entry(curr_map) ||
12779 (curr_entry->vme_start >=
12780 curr_address + curr_max_above)) {
12781 /* no next entry at this level: stop looking */
12782 if (not_in_kdp) {
12783 vm_map_unlock_read(curr_map);
12784 }
12785 curr_entry = NULL;
12786 curr_map = NULL;
12787 curr_skip = 0;
12788 curr_offset = 0;
12789 curr_depth = 0;
12790 curr_max_above = 0;
12791 curr_max_below = 0;
12792 break;
12793 }
12794
12795 /* adjust current address and offset */
12796 skip = curr_entry->vme_start - curr_address;
12797 curr_address = curr_entry->vme_start;
12798 curr_skip += skip;
12799 curr_offset += skip;
12800 curr_max_above -= skip;
12801 curr_max_below = 0;
12802 }
12803
12804 /*
12805 * Is the next entry at this level closer to the address (or
12806 * deeper in the submap chain) than the one we had
12807 * so far ?
12808 */
12809 tmp_entry = curr_entry->vme_next;
12810 if (tmp_entry == vm_map_to_entry(curr_map)) {
12811 /* no next entry at this level */
12812 } else if (tmp_entry->vme_start >=
12813 curr_address + curr_max_above) {
12814 /*
12815 * tmp_entry is beyond the scope of what we mapped of
12816 * this submap in the upper level: ignore it.
12817 */
12818 } else if ((next_entry == NULL) ||
12819 (tmp_entry->vme_start + curr_offset <=
12820 next_entry->vme_start + next_offset)) {
12821 /*
12822 * We didn't have a "next_entry" or this one is
12823 * closer to the address we're looking for:
12824 * use this "tmp_entry" as the new "next_entry".
12825 */
12826 if (next_entry != NULL) {
12827 /* unlock the last "next_map" */
12828 if (next_map != curr_map && not_in_kdp) {
12829 vm_map_unlock_read(next_map);
12830 }
12831 }
12832 next_entry = tmp_entry;
12833 next_map = curr_map;
12834 next_depth = curr_depth;
12835 next_address = next_entry->vme_start;
12836 next_skip = curr_skip;
12837 next_skip += (next_address - curr_address);
12838 next_offset = curr_offset;
12839 next_offset += (next_address - curr_address);
12840 next_max_above = MIN(next_max_above, curr_max_above);
12841 next_max_above = MIN(next_max_above,
12842 next_entry->vme_end - next_address);
12843 next_max_below = MIN(next_max_below, curr_max_below);
12844 next_max_below = MIN(next_max_below,
12845 next_address - next_entry->vme_start);
12846 }
12847
12848 /*
12849 * "curr_max_{above,below}" allow us to keep track of the
12850 * portion of the submap that is actually mapped at this level:
12851 * the rest of that submap is irrelevant to us, since it's not
12852 * mapped here.
12853 * The relevant portion of the map starts at
12854 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
12855 */
12856 curr_max_above = MIN(curr_max_above,
12857 curr_entry->vme_end - curr_address);
12858 curr_max_below = MIN(curr_max_below,
12859 curr_address - curr_entry->vme_start);
12860
12861 if (!curr_entry->is_sub_map ||
12862 curr_depth >= user_max_depth) {
12863 /*
12864 * We hit a leaf map or we reached the maximum depth
12865 * we could, so stop looking. Keep the current map
12866 * locked.
12867 */
12868 break;
12869 }
12870
12871 /*
12872 * Get down to the next submap level.
12873 */
12874
12875 /*
12876 * Lock the next level and unlock the current level,
12877 * unless we need to keep it locked to access the "next_entry"
12878 * later.
12879 */
12880 if (not_in_kdp) {
12881 vm_map_lock_read(VME_SUBMAP(curr_entry));
12882 }
12883 if (curr_map == next_map) {
12884 /* keep "next_map" locked in case we need it */
12885 } else {
12886 /* release this map */
12887 if (not_in_kdp)
12888 vm_map_unlock_read(curr_map);
12889 }
12890
12891 /*
12892 * Adjust the offset. "curr_entry" maps the submap
12893 * at relative address "curr_entry->vme_start" in the
12894 * curr_map but skips the first "VME_OFFSET(curr_entry)"
12895 * bytes of the submap.
12896 * "curr_offset" always represents the offset of a virtual
12897 * address in the curr_map relative to the absolute address
12898 * space (i.e. the top-level VM map).
12899 */
12900 curr_offset +=
12901 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
12902 curr_address = user_address + curr_offset;
12903 /* switch to the submap */
12904 curr_map = VME_SUBMAP(curr_entry);
12905 curr_depth++;
12906 curr_entry = NULL;
12907 }
12908
12909 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
12910 // so probably should be a real 32b ID vs. ptr.
12911 // Current users just check for equality
12912
12913 if (curr_entry == NULL) {
12914 /* no VM region contains the address... */
12915
12916 if (do_region_footprint && /* we want footprint numbers */
12917 next_entry == NULL && /* & there are no more regions */
12918 /* & we haven't already provided our fake region: */
12919 user_address <= vm_map_last_entry(map)->vme_end) {
12920 ledger_amount_t nonvol, nonvol_compressed;
12921 /*
12922 * Add a fake memory region to account for
12923 * purgeable memory that counts towards this
12924 * task's memory footprint, i.e. the resident
12925 * compressed pages of non-volatile objects
12926 * owned by that task.
12927 */
12928 ledger_get_balance(
12929 map->pmap->ledger,
12930 task_ledgers.purgeable_nonvolatile,
12931 &nonvol);
12932 ledger_get_balance(
12933 map->pmap->ledger,
12934 task_ledgers.purgeable_nonvolatile_compressed,
12935 &nonvol_compressed);
12936 if (nonvol + nonvol_compressed == 0) {
12937 /* no purgeable memory usage to report */
12938 return KERN_INVALID_ADDRESS;
12939 }
12940 /* fake region to show nonvolatile footprint */
12941 if (look_for_pages) {
12942 submap_info->protection = VM_PROT_DEFAULT;
12943 submap_info->max_protection = VM_PROT_DEFAULT;
12944 submap_info->inheritance = VM_INHERIT_DEFAULT;
12945 submap_info->offset = 0;
12946 submap_info->user_tag = -1;
12947 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
12948 submap_info->pages_shared_now_private = 0;
12949 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
12950 submap_info->pages_dirtied = submap_info->pages_resident;
12951 submap_info->ref_count = 1;
12952 submap_info->shadow_depth = 0;
12953 submap_info->external_pager = 0;
12954 submap_info->share_mode = SM_PRIVATE;
12955 submap_info->is_submap = 0;
12956 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
12957 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
12958 submap_info->user_wired_count = 0;
12959 submap_info->pages_reusable = 0;
12960 } else {
12961 short_info->user_tag = -1;
12962 short_info->offset = 0;
12963 short_info->protection = VM_PROT_DEFAULT;
12964 short_info->inheritance = VM_INHERIT_DEFAULT;
12965 short_info->max_protection = VM_PROT_DEFAULT;
12966 short_info->behavior = VM_BEHAVIOR_DEFAULT;
12967 short_info->user_wired_count = 0;
12968 short_info->is_submap = 0;
12969 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
12970 short_info->external_pager = 0;
12971 short_info->shadow_depth = 0;
12972 short_info->share_mode = SM_PRIVATE;
12973 short_info->ref_count = 1;
12974 }
12975 *nesting_depth = 0;
12976 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
12977 // *address = user_address;
12978 *address = vm_map_last_entry(map)->vme_end;
12979 return KERN_SUCCESS;
12980 }
12981
12982 if (next_entry == NULL) {
12983 /* ... and no VM region follows it either */
12984 return KERN_INVALID_ADDRESS;
12985 }
12986 /* ... gather info about the next VM region */
12987 curr_entry = next_entry;
12988 curr_map = next_map; /* still locked ... */
12989 curr_address = next_address;
12990 curr_skip = next_skip;
12991 curr_offset = next_offset;
12992 curr_depth = next_depth;
12993 curr_max_above = next_max_above;
12994 curr_max_below = next_max_below;
12995 } else {
12996 /* we won't need "next_entry" after all */
12997 if (next_entry != NULL) {
12998 /* release "next_map" */
12999 if (next_map != curr_map && not_in_kdp) {
13000 vm_map_unlock_read(next_map);
13001 }
13002 }
13003 }
13004 next_entry = NULL;
13005 next_map = NULL;
13006 next_offset = 0;
13007 next_skip = 0;
13008 next_depth = 0;
13009 next_max_below = -1;
13010 next_max_above = -1;
13011
13012 if (curr_entry->is_sub_map &&
13013 curr_depth < user_max_depth) {
13014 /*
13015 * We're not as deep as we could be: we must have
13016 * gone back up after not finding anything mapped
13017 * below the original top-level map entry's.
13018 * Let's move "curr_address" forward and recurse again.
13019 */
13020 user_address = curr_address;
13021 goto recurse_again;
13022 }
13023
13024 *nesting_depth = curr_depth;
13025 *size = curr_max_above + curr_max_below;
13026 *address = user_address + curr_skip - curr_max_below;
13027
13028 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13029 // so probably should be a real 32b ID vs. ptr.
13030 // Current users just check for equality
13031 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13032
13033 if (look_for_pages) {
13034 submap_info->user_tag = VME_ALIAS(curr_entry);
13035 submap_info->offset = VME_OFFSET(curr_entry);
13036 submap_info->protection = curr_entry->protection;
13037 submap_info->inheritance = curr_entry->inheritance;
13038 submap_info->max_protection = curr_entry->max_protection;
13039 submap_info->behavior = curr_entry->behavior;
13040 submap_info->user_wired_count = curr_entry->user_wired_count;
13041 submap_info->is_submap = curr_entry->is_sub_map;
13042 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13043 } else {
13044 short_info->user_tag = VME_ALIAS(curr_entry);
13045 short_info->offset = VME_OFFSET(curr_entry);
13046 short_info->protection = curr_entry->protection;
13047 short_info->inheritance = curr_entry->inheritance;
13048 short_info->max_protection = curr_entry->max_protection;
13049 short_info->behavior = curr_entry->behavior;
13050 short_info->user_wired_count = curr_entry->user_wired_count;
13051 short_info->is_submap = curr_entry->is_sub_map;
13052 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13053 }
13054
13055 extended.pages_resident = 0;
13056 extended.pages_swapped_out = 0;
13057 extended.pages_shared_now_private = 0;
13058 extended.pages_dirtied = 0;
13059 extended.pages_reusable = 0;
13060 extended.external_pager = 0;
13061 extended.shadow_depth = 0;
13062 extended.share_mode = SM_EMPTY;
13063 extended.ref_count = 0;
13064
13065 if (not_in_kdp) {
13066 if (!curr_entry->is_sub_map) {
13067 vm_map_offset_t range_start, range_end;
13068 range_start = MAX((curr_address - curr_max_below),
13069 curr_entry->vme_start);
13070 range_end = MIN((curr_address + curr_max_above),
13071 curr_entry->vme_end);
13072 vm_map_region_walk(curr_map,
13073 range_start,
13074 curr_entry,
13075 (VME_OFFSET(curr_entry) +
13076 (range_start -
13077 curr_entry->vme_start)),
13078 range_end - range_start,
13079 &extended,
13080 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13081 if (extended.external_pager &&
13082 extended.ref_count == 2 &&
13083 extended.share_mode == SM_SHARED) {
13084 extended.share_mode = SM_PRIVATE;
13085 }
13086 } else {
13087 if (curr_entry->use_pmap) {
13088 extended.share_mode = SM_TRUESHARED;
13089 } else {
13090 extended.share_mode = SM_PRIVATE;
13091 }
13092 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
13093 }
13094 }
13095
13096 if (look_for_pages) {
13097 submap_info->pages_resident = extended.pages_resident;
13098 submap_info->pages_swapped_out = extended.pages_swapped_out;
13099 submap_info->pages_shared_now_private =
13100 extended.pages_shared_now_private;
13101 submap_info->pages_dirtied = extended.pages_dirtied;
13102 submap_info->external_pager = extended.external_pager;
13103 submap_info->shadow_depth = extended.shadow_depth;
13104 submap_info->share_mode = extended.share_mode;
13105 submap_info->ref_count = extended.ref_count;
13106
13107 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13108 submap_info->pages_reusable = extended.pages_reusable;
13109 }
13110 } else {
13111 short_info->external_pager = extended.external_pager;
13112 short_info->shadow_depth = extended.shadow_depth;
13113 short_info->share_mode = extended.share_mode;
13114 short_info->ref_count = extended.ref_count;
13115 }
13116
13117 if (not_in_kdp) {
13118 vm_map_unlock_read(curr_map);
13119 }
13120
13121 return KERN_SUCCESS;
13122 }
13123
13124 /*
13125 * vm_region:
13126 *
13127 * User call to obtain information about a region in
13128 * a task's address map. Currently, only one flavor is
13129 * supported.
13130 *
13131 * XXX The reserved and behavior fields cannot be filled
13132 * in until the vm merge from the IK is completed, and
13133 * vm_reserve is implemented.
13134 */
13135
13136 kern_return_t
13137 vm_map_region(
13138 vm_map_t map,
13139 vm_map_offset_t *address, /* IN/OUT */
13140 vm_map_size_t *size, /* OUT */
13141 vm_region_flavor_t flavor, /* IN */
13142 vm_region_info_t info, /* OUT */
13143 mach_msg_type_number_t *count, /* IN/OUT */
13144 mach_port_t *object_name) /* OUT */
13145 {
13146 vm_map_entry_t tmp_entry;
13147 vm_map_entry_t entry;
13148 vm_map_offset_t start;
13149
13150 if (map == VM_MAP_NULL)
13151 return(KERN_INVALID_ARGUMENT);
13152
13153 switch (flavor) {
13154
13155 case VM_REGION_BASIC_INFO:
13156 /* legacy for old 32-bit objects info */
13157 {
13158 vm_region_basic_info_t basic;
13159
13160 if (*count < VM_REGION_BASIC_INFO_COUNT)
13161 return(KERN_INVALID_ARGUMENT);
13162
13163 basic = (vm_region_basic_info_t) info;
13164 *count = VM_REGION_BASIC_INFO_COUNT;
13165
13166 vm_map_lock_read(map);
13167
13168 start = *address;
13169 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13170 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13171 vm_map_unlock_read(map);
13172 return(KERN_INVALID_ADDRESS);
13173 }
13174 } else {
13175 entry = tmp_entry;
13176 }
13177
13178 start = entry->vme_start;
13179
13180 basic->offset = (uint32_t)VME_OFFSET(entry);
13181 basic->protection = entry->protection;
13182 basic->inheritance = entry->inheritance;
13183 basic->max_protection = entry->max_protection;
13184 basic->behavior = entry->behavior;
13185 basic->user_wired_count = entry->user_wired_count;
13186 basic->reserved = entry->is_sub_map;
13187 *address = start;
13188 *size = (entry->vme_end - start);
13189
13190 if (object_name) *object_name = IP_NULL;
13191 if (entry->is_sub_map) {
13192 basic->shared = FALSE;
13193 } else {
13194 basic->shared = entry->is_shared;
13195 }
13196
13197 vm_map_unlock_read(map);
13198 return(KERN_SUCCESS);
13199 }
13200
13201 case VM_REGION_BASIC_INFO_64:
13202 {
13203 vm_region_basic_info_64_t basic;
13204
13205 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13206 return(KERN_INVALID_ARGUMENT);
13207
13208 basic = (vm_region_basic_info_64_t) info;
13209 *count = VM_REGION_BASIC_INFO_COUNT_64;
13210
13211 vm_map_lock_read(map);
13212
13213 start = *address;
13214 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13215 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13216 vm_map_unlock_read(map);
13217 return(KERN_INVALID_ADDRESS);
13218 }
13219 } else {
13220 entry = tmp_entry;
13221 }
13222
13223 start = entry->vme_start;
13224
13225 basic->offset = VME_OFFSET(entry);
13226 basic->protection = entry->protection;
13227 basic->inheritance = entry->inheritance;
13228 basic->max_protection = entry->max_protection;
13229 basic->behavior = entry->behavior;
13230 basic->user_wired_count = entry->user_wired_count;
13231 basic->reserved = entry->is_sub_map;
13232 *address = start;
13233 *size = (entry->vme_end - start);
13234
13235 if (object_name) *object_name = IP_NULL;
13236 if (entry->is_sub_map) {
13237 basic->shared = FALSE;
13238 } else {
13239 basic->shared = entry->is_shared;
13240 }
13241
13242 vm_map_unlock_read(map);
13243 return(KERN_SUCCESS);
13244 }
13245 case VM_REGION_EXTENDED_INFO:
13246 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13247 return(KERN_INVALID_ARGUMENT);
13248 /*fallthru*/
13249 case VM_REGION_EXTENDED_INFO__legacy:
13250 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13251 return KERN_INVALID_ARGUMENT;
13252
13253 {
13254 vm_region_extended_info_t extended;
13255 mach_msg_type_number_t original_count;
13256
13257 extended = (vm_region_extended_info_t) info;
13258
13259 vm_map_lock_read(map);
13260
13261 start = *address;
13262 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13263 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13264 vm_map_unlock_read(map);
13265 return(KERN_INVALID_ADDRESS);
13266 }
13267 } else {
13268 entry = tmp_entry;
13269 }
13270 start = entry->vme_start;
13271
13272 extended->protection = entry->protection;
13273 extended->user_tag = VME_ALIAS(entry);
13274 extended->pages_resident = 0;
13275 extended->pages_swapped_out = 0;
13276 extended->pages_shared_now_private = 0;
13277 extended->pages_dirtied = 0;
13278 extended->external_pager = 0;
13279 extended->shadow_depth = 0;
13280
13281 original_count = *count;
13282 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13283 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13284 } else {
13285 extended->pages_reusable = 0;
13286 *count = VM_REGION_EXTENDED_INFO_COUNT;
13287 }
13288
13289 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13290
13291 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13292 extended->share_mode = SM_PRIVATE;
13293
13294 if (object_name)
13295 *object_name = IP_NULL;
13296 *address = start;
13297 *size = (entry->vme_end - start);
13298
13299 vm_map_unlock_read(map);
13300 return(KERN_SUCCESS);
13301 }
13302 case VM_REGION_TOP_INFO:
13303 {
13304 vm_region_top_info_t top;
13305
13306 if (*count < VM_REGION_TOP_INFO_COUNT)
13307 return(KERN_INVALID_ARGUMENT);
13308
13309 top = (vm_region_top_info_t) info;
13310 *count = VM_REGION_TOP_INFO_COUNT;
13311
13312 vm_map_lock_read(map);
13313
13314 start = *address;
13315 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13316 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13317 vm_map_unlock_read(map);
13318 return(KERN_INVALID_ADDRESS);
13319 }
13320 } else {
13321 entry = tmp_entry;
13322
13323 }
13324 start = entry->vme_start;
13325
13326 top->private_pages_resident = 0;
13327 top->shared_pages_resident = 0;
13328
13329 vm_map_region_top_walk(entry, top);
13330
13331 if (object_name)
13332 *object_name = IP_NULL;
13333 *address = start;
13334 *size = (entry->vme_end - start);
13335
13336 vm_map_unlock_read(map);
13337 return(KERN_SUCCESS);
13338 }
13339 default:
13340 return(KERN_INVALID_ARGUMENT);
13341 }
13342 }
13343
13344 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
13345 MIN((entry_size), \
13346 ((obj)->all_reusable ? \
13347 (obj)->wired_page_count : \
13348 (obj)->resident_page_count - (obj)->reusable_page_count))
13349
13350 void
13351 vm_map_region_top_walk(
13352 vm_map_entry_t entry,
13353 vm_region_top_info_t top)
13354 {
13355
13356 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
13357 top->share_mode = SM_EMPTY;
13358 top->ref_count = 0;
13359 top->obj_id = 0;
13360 return;
13361 }
13362
13363 {
13364 struct vm_object *obj, *tmp_obj;
13365 int ref_count;
13366 uint32_t entry_size;
13367
13368 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
13369
13370 obj = VME_OBJECT(entry);
13371
13372 vm_object_lock(obj);
13373
13374 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13375 ref_count--;
13376
13377 assert(obj->reusable_page_count <= obj->resident_page_count);
13378 if (obj->shadow) {
13379 if (ref_count == 1)
13380 top->private_pages_resident =
13381 OBJ_RESIDENT_COUNT(obj, entry_size);
13382 else
13383 top->shared_pages_resident =
13384 OBJ_RESIDENT_COUNT(obj, entry_size);
13385 top->ref_count = ref_count;
13386 top->share_mode = SM_COW;
13387
13388 while ((tmp_obj = obj->shadow)) {
13389 vm_object_lock(tmp_obj);
13390 vm_object_unlock(obj);
13391 obj = tmp_obj;
13392
13393 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13394 ref_count--;
13395
13396 assert(obj->reusable_page_count <= obj->resident_page_count);
13397 top->shared_pages_resident +=
13398 OBJ_RESIDENT_COUNT(obj, entry_size);
13399 top->ref_count += ref_count - 1;
13400 }
13401 } else {
13402 if (entry->superpage_size) {
13403 top->share_mode = SM_LARGE_PAGE;
13404 top->shared_pages_resident = 0;
13405 top->private_pages_resident = entry_size;
13406 } else if (entry->needs_copy) {
13407 top->share_mode = SM_COW;
13408 top->shared_pages_resident =
13409 OBJ_RESIDENT_COUNT(obj, entry_size);
13410 } else {
13411 if (ref_count == 1 ||
13412 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13413 top->share_mode = SM_PRIVATE;
13414 top->private_pages_resident =
13415 OBJ_RESIDENT_COUNT(obj,
13416 entry_size);
13417 } else {
13418 top->share_mode = SM_SHARED;
13419 top->shared_pages_resident =
13420 OBJ_RESIDENT_COUNT(obj,
13421 entry_size);
13422 }
13423 }
13424 top->ref_count = ref_count;
13425 }
13426 /* XXX K64: obj_id will be truncated */
13427 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
13428
13429 vm_object_unlock(obj);
13430 }
13431 }
13432
13433 void
13434 vm_map_region_walk(
13435 vm_map_t map,
13436 vm_map_offset_t va,
13437 vm_map_entry_t entry,
13438 vm_object_offset_t offset,
13439 vm_object_size_t range,
13440 vm_region_extended_info_t extended,
13441 boolean_t look_for_pages,
13442 mach_msg_type_number_t count)
13443 {
13444 struct vm_object *obj, *tmp_obj;
13445 vm_map_offset_t last_offset;
13446 int i;
13447 int ref_count;
13448 struct vm_object *shadow_object;
13449 int shadow_depth;
13450 boolean_t do_region_footprint;
13451
13452 do_region_footprint = task_self_region_footprint();
13453
13454 if ((VME_OBJECT(entry) == 0) ||
13455 (entry->is_sub_map) ||
13456 (VME_OBJECT(entry)->phys_contiguous &&
13457 !entry->superpage_size)) {
13458 extended->share_mode = SM_EMPTY;
13459 extended->ref_count = 0;
13460 return;
13461 }
13462
13463 if (entry->superpage_size) {
13464 extended->shadow_depth = 0;
13465 extended->share_mode = SM_LARGE_PAGE;
13466 extended->ref_count = 1;
13467 extended->external_pager = 0;
13468 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13469 extended->shadow_depth = 0;
13470 return;
13471 }
13472
13473 obj = VME_OBJECT(entry);
13474
13475 vm_object_lock(obj);
13476
13477 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13478 ref_count--;
13479
13480 if (look_for_pages) {
13481 for (last_offset = offset + range;
13482 offset < last_offset;
13483 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13484
13485 if (do_region_footprint) {
13486 int disp;
13487
13488 disp = 0;
13489 pmap_query_page_info(map->pmap, va, &disp);
13490 if (disp & PMAP_QUERY_PAGE_PRESENT) {
13491 extended->pages_resident++;
13492 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13493 extended->pages_reusable++;
13494 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13495 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13496 /* alternate accounting */
13497 } else {
13498 extended->pages_dirtied++;
13499 }
13500 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
13501 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
13502 /* alternate accounting */
13503 } else {
13504 extended->pages_swapped_out++;
13505 }
13506 }
13507 /* deal with alternate accounting */
13508 if (obj->purgable != VM_PURGABLE_DENY) {
13509 /*
13510 * Pages from purgeable objects
13511 * will be reported as dirty
13512 * appropriately in an extra
13513 * fake memory region at the end of
13514 * the address space.
13515 */
13516 } else if (entry->iokit_acct) {
13517 /*
13518 * IOKit mappings are considered
13519 * as fully dirty for footprint's
13520 * sake.
13521 */
13522 extended->pages_dirtied++;
13523 }
13524 continue;
13525 }
13526
13527 vm_map_region_look_for_page(map, va, obj,
13528 offset, ref_count,
13529 0, extended, count);
13530 }
13531
13532 if (do_region_footprint) {
13533 goto collect_object_info;
13534 }
13535
13536 } else {
13537 collect_object_info:
13538 shadow_object = obj->shadow;
13539 shadow_depth = 0;
13540
13541 if ( !(obj->pager_trusted) && !(obj->internal))
13542 extended->external_pager = 1;
13543
13544 if (shadow_object != VM_OBJECT_NULL) {
13545 vm_object_lock(shadow_object);
13546 for (;
13547 shadow_object != VM_OBJECT_NULL;
13548 shadow_depth++) {
13549 vm_object_t next_shadow;
13550
13551 if ( !(shadow_object->pager_trusted) &&
13552 !(shadow_object->internal))
13553 extended->external_pager = 1;
13554
13555 next_shadow = shadow_object->shadow;
13556 if (next_shadow) {
13557 vm_object_lock(next_shadow);
13558 }
13559 vm_object_unlock(shadow_object);
13560 shadow_object = next_shadow;
13561 }
13562 }
13563 extended->shadow_depth = shadow_depth;
13564 }
13565
13566 if (extended->shadow_depth || entry->needs_copy)
13567 extended->share_mode = SM_COW;
13568 else {
13569 if (ref_count == 1)
13570 extended->share_mode = SM_PRIVATE;
13571 else {
13572 if (obj->true_share)
13573 extended->share_mode = SM_TRUESHARED;
13574 else
13575 extended->share_mode = SM_SHARED;
13576 }
13577 }
13578 extended->ref_count = ref_count - extended->shadow_depth;
13579
13580 for (i = 0; i < extended->shadow_depth; i++) {
13581 if ((tmp_obj = obj->shadow) == 0)
13582 break;
13583 vm_object_lock(tmp_obj);
13584 vm_object_unlock(obj);
13585
13586 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
13587 ref_count--;
13588
13589 extended->ref_count += ref_count;
13590 obj = tmp_obj;
13591 }
13592 vm_object_unlock(obj);
13593
13594 if (extended->share_mode == SM_SHARED) {
13595 vm_map_entry_t cur;
13596 vm_map_entry_t last;
13597 int my_refs;
13598
13599 obj = VME_OBJECT(entry);
13600 last = vm_map_to_entry(map);
13601 my_refs = 0;
13602
13603 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13604 ref_count--;
13605 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
13606 my_refs += vm_map_region_count_obj_refs(cur, obj);
13607
13608 if (my_refs == ref_count)
13609 extended->share_mode = SM_PRIVATE_ALIASED;
13610 else if (my_refs > 1)
13611 extended->share_mode = SM_SHARED_ALIASED;
13612 }
13613 }
13614
13615
13616 /* object is locked on entry and locked on return */
13617
13618
13619 static void
13620 vm_map_region_look_for_page(
13621 __unused vm_map_t map,
13622 __unused vm_map_offset_t va,
13623 vm_object_t object,
13624 vm_object_offset_t offset,
13625 int max_refcnt,
13626 int depth,
13627 vm_region_extended_info_t extended,
13628 mach_msg_type_number_t count)
13629 {
13630 vm_page_t p;
13631 vm_object_t shadow;
13632 int ref_count;
13633 vm_object_t caller_object;
13634
13635 shadow = object->shadow;
13636 caller_object = object;
13637
13638
13639 while (TRUE) {
13640
13641 if ( !(object->pager_trusted) && !(object->internal))
13642 extended->external_pager = 1;
13643
13644 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
13645 if (shadow && (max_refcnt == 1))
13646 extended->pages_shared_now_private++;
13647
13648 if (!p->fictitious &&
13649 (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
13650 extended->pages_dirtied++;
13651 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
13652 if (p->reusable || object->all_reusable) {
13653 extended->pages_reusable++;
13654 }
13655 }
13656
13657 extended->pages_resident++;
13658
13659 if(object != caller_object)
13660 vm_object_unlock(object);
13661
13662 return;
13663 }
13664 if (object->internal &&
13665 object->alive &&
13666 !object->terminating &&
13667 object->pager_ready) {
13668
13669 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
13670 == VM_EXTERNAL_STATE_EXISTS) {
13671 /* the pager has that page */
13672 extended->pages_swapped_out++;
13673 if (object != caller_object)
13674 vm_object_unlock(object);
13675 return;
13676 }
13677 }
13678
13679 if (shadow) {
13680 vm_object_lock(shadow);
13681
13682 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
13683 ref_count--;
13684
13685 if (++depth > extended->shadow_depth)
13686 extended->shadow_depth = depth;
13687
13688 if (ref_count > max_refcnt)
13689 max_refcnt = ref_count;
13690
13691 if(object != caller_object)
13692 vm_object_unlock(object);
13693
13694 offset = offset + object->vo_shadow_offset;
13695 object = shadow;
13696 shadow = object->shadow;
13697 continue;
13698 }
13699 if(object != caller_object)
13700 vm_object_unlock(object);
13701 break;
13702 }
13703 }
13704
13705 static int
13706 vm_map_region_count_obj_refs(
13707 vm_map_entry_t entry,
13708 vm_object_t object)
13709 {
13710 int ref_count;
13711 vm_object_t chk_obj;
13712 vm_object_t tmp_obj;
13713
13714 if (VME_OBJECT(entry) == 0)
13715 return(0);
13716
13717 if (entry->is_sub_map)
13718 return(0);
13719 else {
13720 ref_count = 0;
13721
13722 chk_obj = VME_OBJECT(entry);
13723 vm_object_lock(chk_obj);
13724
13725 while (chk_obj) {
13726 if (chk_obj == object)
13727 ref_count++;
13728 tmp_obj = chk_obj->shadow;
13729 if (tmp_obj)
13730 vm_object_lock(tmp_obj);
13731 vm_object_unlock(chk_obj);
13732
13733 chk_obj = tmp_obj;
13734 }
13735 }
13736 return(ref_count);
13737 }
13738
13739
13740 /*
13741 * Routine: vm_map_simplify
13742 *
13743 * Description:
13744 * Attempt to simplify the map representation in
13745 * the vicinity of the given starting address.
13746 * Note:
13747 * This routine is intended primarily to keep the
13748 * kernel maps more compact -- they generally don't
13749 * benefit from the "expand a map entry" technology
13750 * at allocation time because the adjacent entry
13751 * is often wired down.
13752 */
13753 void
13754 vm_map_simplify_entry(
13755 vm_map_t map,
13756 vm_map_entry_t this_entry)
13757 {
13758 vm_map_entry_t prev_entry;
13759
13760 counter(c_vm_map_simplify_entry_called++);
13761
13762 prev_entry = this_entry->vme_prev;
13763
13764 if ((this_entry != vm_map_to_entry(map)) &&
13765 (prev_entry != vm_map_to_entry(map)) &&
13766
13767 (prev_entry->vme_end == this_entry->vme_start) &&
13768
13769 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
13770 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
13771 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
13772 prev_entry->vme_start))
13773 == VME_OFFSET(this_entry)) &&
13774
13775 (prev_entry->behavior == this_entry->behavior) &&
13776 (prev_entry->needs_copy == this_entry->needs_copy) &&
13777 (prev_entry->protection == this_entry->protection) &&
13778 (prev_entry->max_protection == this_entry->max_protection) &&
13779 (prev_entry->inheritance == this_entry->inheritance) &&
13780 (prev_entry->use_pmap == this_entry->use_pmap) &&
13781 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
13782 (prev_entry->no_cache == this_entry->no_cache) &&
13783 (prev_entry->permanent == this_entry->permanent) &&
13784 (prev_entry->map_aligned == this_entry->map_aligned) &&
13785 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
13786 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
13787 /* from_reserved_zone: OK if that field doesn't match */
13788 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
13789 (prev_entry->vme_resilient_codesign ==
13790 this_entry->vme_resilient_codesign) &&
13791 (prev_entry->vme_resilient_media ==
13792 this_entry->vme_resilient_media) &&
13793
13794 (prev_entry->wired_count == this_entry->wired_count) &&
13795 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
13796
13797 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
13798 (prev_entry->in_transition == FALSE) &&
13799 (this_entry->in_transition == FALSE) &&
13800 (prev_entry->needs_wakeup == FALSE) &&
13801 (this_entry->needs_wakeup == FALSE) &&
13802 (prev_entry->is_shared == FALSE) &&
13803 (this_entry->is_shared == FALSE) &&
13804 (prev_entry->superpage_size == FALSE) &&
13805 (this_entry->superpage_size == FALSE)
13806 ) {
13807 vm_map_store_entry_unlink(map, prev_entry);
13808 assert(prev_entry->vme_start < this_entry->vme_end);
13809 if (prev_entry->map_aligned)
13810 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
13811 VM_MAP_PAGE_MASK(map)));
13812 this_entry->vme_start = prev_entry->vme_start;
13813 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
13814
13815 if (map->holelistenabled) {
13816 vm_map_store_update_first_free(map, this_entry, TRUE);
13817 }
13818
13819 if (prev_entry->is_sub_map) {
13820 vm_map_deallocate(VME_SUBMAP(prev_entry));
13821 } else {
13822 vm_object_deallocate(VME_OBJECT(prev_entry));
13823 }
13824 vm_map_entry_dispose(map, prev_entry);
13825 SAVE_HINT_MAP_WRITE(map, this_entry);
13826 counter(c_vm_map_simplified++);
13827 }
13828 }
13829
13830 void
13831 vm_map_simplify(
13832 vm_map_t map,
13833 vm_map_offset_t start)
13834 {
13835 vm_map_entry_t this_entry;
13836
13837 vm_map_lock(map);
13838 if (vm_map_lookup_entry(map, start, &this_entry)) {
13839 vm_map_simplify_entry(map, this_entry);
13840 vm_map_simplify_entry(map, this_entry->vme_next);
13841 }
13842 counter(c_vm_map_simplify_called++);
13843 vm_map_unlock(map);
13844 }
13845
13846 static void
13847 vm_map_simplify_range(
13848 vm_map_t map,
13849 vm_map_offset_t start,
13850 vm_map_offset_t end)
13851 {
13852 vm_map_entry_t entry;
13853
13854 /*
13855 * The map should be locked (for "write") by the caller.
13856 */
13857
13858 if (start >= end) {
13859 /* invalid address range */
13860 return;
13861 }
13862
13863 start = vm_map_trunc_page(start,
13864 VM_MAP_PAGE_MASK(map));
13865 end = vm_map_round_page(end,
13866 VM_MAP_PAGE_MASK(map));
13867
13868 if (!vm_map_lookup_entry(map, start, &entry)) {
13869 /* "start" is not mapped and "entry" ends before "start" */
13870 if (entry == vm_map_to_entry(map)) {
13871 /* start with first entry in the map */
13872 entry = vm_map_first_entry(map);
13873 } else {
13874 /* start with next entry */
13875 entry = entry->vme_next;
13876 }
13877 }
13878
13879 while (entry != vm_map_to_entry(map) &&
13880 entry->vme_start <= end) {
13881 /* try and coalesce "entry" with its previous entry */
13882 vm_map_simplify_entry(map, entry);
13883 entry = entry->vme_next;
13884 }
13885 }
13886
13887
13888 /*
13889 * Routine: vm_map_machine_attribute
13890 * Purpose:
13891 * Provide machine-specific attributes to mappings,
13892 * such as cachability etc. for machines that provide
13893 * them. NUMA architectures and machines with big/strange
13894 * caches will use this.
13895 * Note:
13896 * Responsibilities for locking and checking are handled here,
13897 * everything else in the pmap module. If any non-volatile
13898 * information must be kept, the pmap module should handle
13899 * it itself. [This assumes that attributes do not
13900 * need to be inherited, which seems ok to me]
13901 */
13902 kern_return_t
13903 vm_map_machine_attribute(
13904 vm_map_t map,
13905 vm_map_offset_t start,
13906 vm_map_offset_t end,
13907 vm_machine_attribute_t attribute,
13908 vm_machine_attribute_val_t* value) /* IN/OUT */
13909 {
13910 kern_return_t ret;
13911 vm_map_size_t sync_size;
13912 vm_map_entry_t entry;
13913
13914 if (start < vm_map_min(map) || end > vm_map_max(map))
13915 return KERN_INVALID_ADDRESS;
13916
13917 /* Figure how much memory we need to flush (in page increments) */
13918 sync_size = end - start;
13919
13920 vm_map_lock(map);
13921
13922 if (attribute != MATTR_CACHE) {
13923 /* If we don't have to find physical addresses, we */
13924 /* don't have to do an explicit traversal here. */
13925 ret = pmap_attribute(map->pmap, start, end-start,
13926 attribute, value);
13927 vm_map_unlock(map);
13928 return ret;
13929 }
13930
13931 ret = KERN_SUCCESS; /* Assume it all worked */
13932
13933 while(sync_size) {
13934 if (vm_map_lookup_entry(map, start, &entry)) {
13935 vm_map_size_t sub_size;
13936 if((entry->vme_end - start) > sync_size) {
13937 sub_size = sync_size;
13938 sync_size = 0;
13939 } else {
13940 sub_size = entry->vme_end - start;
13941 sync_size -= sub_size;
13942 }
13943 if(entry->is_sub_map) {
13944 vm_map_offset_t sub_start;
13945 vm_map_offset_t sub_end;
13946
13947 sub_start = (start - entry->vme_start)
13948 + VME_OFFSET(entry);
13949 sub_end = sub_start + sub_size;
13950 vm_map_machine_attribute(
13951 VME_SUBMAP(entry),
13952 sub_start,
13953 sub_end,
13954 attribute, value);
13955 } else {
13956 if (VME_OBJECT(entry)) {
13957 vm_page_t m;
13958 vm_object_t object;
13959 vm_object_t base_object;
13960 vm_object_t last_object;
13961 vm_object_offset_t offset;
13962 vm_object_offset_t base_offset;
13963 vm_map_size_t range;
13964 range = sub_size;
13965 offset = (start - entry->vme_start)
13966 + VME_OFFSET(entry);
13967 base_offset = offset;
13968 object = VME_OBJECT(entry);
13969 base_object = object;
13970 last_object = NULL;
13971
13972 vm_object_lock(object);
13973
13974 while (range) {
13975 m = vm_page_lookup(
13976 object, offset);
13977
13978 if (m && !m->fictitious) {
13979 ret =
13980 pmap_attribute_cache_sync(
13981 VM_PAGE_GET_PHYS_PAGE(m),
13982 PAGE_SIZE,
13983 attribute, value);
13984
13985 } else if (object->shadow) {
13986 offset = offset + object->vo_shadow_offset;
13987 last_object = object;
13988 object = object->shadow;
13989 vm_object_lock(last_object->shadow);
13990 vm_object_unlock(last_object);
13991 continue;
13992 }
13993 range -= PAGE_SIZE;
13994
13995 if (base_object != object) {
13996 vm_object_unlock(object);
13997 vm_object_lock(base_object);
13998 object = base_object;
13999 }
14000 /* Bump to the next page */
14001 base_offset += PAGE_SIZE;
14002 offset = base_offset;
14003 }
14004 vm_object_unlock(object);
14005 }
14006 }
14007 start += sub_size;
14008 } else {
14009 vm_map_unlock(map);
14010 return KERN_FAILURE;
14011 }
14012
14013 }
14014
14015 vm_map_unlock(map);
14016
14017 return ret;
14018 }
14019
14020 /*
14021 * vm_map_behavior_set:
14022 *
14023 * Sets the paging reference behavior of the specified address
14024 * range in the target map. Paging reference behavior affects
14025 * how pagein operations resulting from faults on the map will be
14026 * clustered.
14027 */
14028 kern_return_t
14029 vm_map_behavior_set(
14030 vm_map_t map,
14031 vm_map_offset_t start,
14032 vm_map_offset_t end,
14033 vm_behavior_t new_behavior)
14034 {
14035 vm_map_entry_t entry;
14036 vm_map_entry_t temp_entry;
14037
14038 XPR(XPR_VM_MAP,
14039 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
14040 map, start, end, new_behavior, 0);
14041
14042 if (start > end ||
14043 start < vm_map_min(map) ||
14044 end > vm_map_max(map)) {
14045 return KERN_NO_SPACE;
14046 }
14047
14048 switch (new_behavior) {
14049
14050 /*
14051 * This first block of behaviors all set a persistent state on the specified
14052 * memory range. All we have to do here is to record the desired behavior
14053 * in the vm_map_entry_t's.
14054 */
14055
14056 case VM_BEHAVIOR_DEFAULT:
14057 case VM_BEHAVIOR_RANDOM:
14058 case VM_BEHAVIOR_SEQUENTIAL:
14059 case VM_BEHAVIOR_RSEQNTL:
14060 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14061 vm_map_lock(map);
14062
14063 /*
14064 * The entire address range must be valid for the map.
14065 * Note that vm_map_range_check() does a
14066 * vm_map_lookup_entry() internally and returns the
14067 * entry containing the start of the address range if
14068 * the entire range is valid.
14069 */
14070 if (vm_map_range_check(map, start, end, &temp_entry)) {
14071 entry = temp_entry;
14072 vm_map_clip_start(map, entry, start);
14073 }
14074 else {
14075 vm_map_unlock(map);
14076 return(KERN_INVALID_ADDRESS);
14077 }
14078
14079 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14080 vm_map_clip_end(map, entry, end);
14081 if (entry->is_sub_map) {
14082 assert(!entry->use_pmap);
14083 }
14084
14085 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
14086 entry->zero_wired_pages = TRUE;
14087 } else {
14088 entry->behavior = new_behavior;
14089 }
14090 entry = entry->vme_next;
14091 }
14092
14093 vm_map_unlock(map);
14094 break;
14095
14096 /*
14097 * The rest of these are different from the above in that they cause
14098 * an immediate action to take place as opposed to setting a behavior that
14099 * affects future actions.
14100 */
14101
14102 case VM_BEHAVIOR_WILLNEED:
14103 return vm_map_willneed(map, start, end);
14104
14105 case VM_BEHAVIOR_DONTNEED:
14106 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14107
14108 case VM_BEHAVIOR_FREE:
14109 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14110
14111 case VM_BEHAVIOR_REUSABLE:
14112 return vm_map_reusable_pages(map, start, end);
14113
14114 case VM_BEHAVIOR_REUSE:
14115 return vm_map_reuse_pages(map, start, end);
14116
14117 case VM_BEHAVIOR_CAN_REUSE:
14118 return vm_map_can_reuse(map, start, end);
14119
14120 #if MACH_ASSERT
14121 case VM_BEHAVIOR_PAGEOUT:
14122 return vm_map_pageout(map, start, end);
14123 #endif /* MACH_ASSERT */
14124
14125 default:
14126 return(KERN_INVALID_ARGUMENT);
14127 }
14128
14129 return(KERN_SUCCESS);
14130 }
14131
14132
14133 /*
14134 * Internals for madvise(MADV_WILLNEED) system call.
14135 *
14136 * The present implementation is to do a read-ahead if the mapping corresponds
14137 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
14138 * and basically ignore the "advice" (which we are always free to do).
14139 */
14140
14141
14142 static kern_return_t
14143 vm_map_willneed(
14144 vm_map_t map,
14145 vm_map_offset_t start,
14146 vm_map_offset_t end
14147 )
14148 {
14149 vm_map_entry_t entry;
14150 vm_object_t object;
14151 memory_object_t pager;
14152 struct vm_object_fault_info fault_info;
14153 kern_return_t kr;
14154 vm_object_size_t len;
14155 vm_object_offset_t offset;
14156
14157 /*
14158 * Fill in static values in fault_info. Several fields get ignored by the code
14159 * we call, but we'll fill them in anyway since uninitialized fields are bad
14160 * when it comes to future backwards compatibility.
14161 */
14162
14163 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14164 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14165 fault_info.no_cache = FALSE; /* ignored value */
14166 fault_info.stealth = TRUE;
14167 fault_info.io_sync = FALSE;
14168 fault_info.cs_bypass = FALSE;
14169 fault_info.mark_zf_absent = FALSE;
14170 fault_info.batch_pmap_op = FALSE;
14171
14172 /*
14173 * The MADV_WILLNEED operation doesn't require any changes to the
14174 * vm_map_entry_t's, so the read lock is sufficient.
14175 */
14176
14177 vm_map_lock_read(map);
14178
14179 /*
14180 * The madvise semantics require that the address range be fully
14181 * allocated with no holes. Otherwise, we're required to return
14182 * an error.
14183 */
14184
14185 if (! vm_map_range_check(map, start, end, &entry)) {
14186 vm_map_unlock_read(map);
14187 return KERN_INVALID_ADDRESS;
14188 }
14189
14190 /*
14191 * Examine each vm_map_entry_t in the range.
14192 */
14193 for (; entry != vm_map_to_entry(map) && start < end; ) {
14194
14195 /*
14196 * The first time through, the start address could be anywhere
14197 * within the vm_map_entry we found. So adjust the offset to
14198 * correspond. After that, the offset will always be zero to
14199 * correspond to the beginning of the current vm_map_entry.
14200 */
14201 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14202
14203 /*
14204 * Set the length so we don't go beyond the end of the
14205 * map_entry or beyond the end of the range we were given.
14206 * This range could span also multiple map entries all of which
14207 * map different files, so make sure we only do the right amount
14208 * of I/O for each object. Note that it's possible for there
14209 * to be multiple map entries all referring to the same object
14210 * but with different page permissions, but it's not worth
14211 * trying to optimize that case.
14212 */
14213 len = MIN(entry->vme_end - start, end - start);
14214
14215 if ((vm_size_t) len != len) {
14216 /* 32-bit overflow */
14217 len = (vm_size_t) (0 - PAGE_SIZE);
14218 }
14219 fault_info.cluster_size = (vm_size_t) len;
14220 fault_info.lo_offset = offset;
14221 fault_info.hi_offset = offset + len;
14222 fault_info.user_tag = VME_ALIAS(entry);
14223 fault_info.pmap_options = 0;
14224 if (entry->iokit_acct ||
14225 (!entry->is_sub_map && !entry->use_pmap)) {
14226 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14227 }
14228
14229 /*
14230 * If there's no read permission to this mapping, then just
14231 * skip it.
14232 */
14233 if ((entry->protection & VM_PROT_READ) == 0) {
14234 entry = entry->vme_next;
14235 start = entry->vme_start;
14236 continue;
14237 }
14238
14239 /*
14240 * Find the file object backing this map entry. If there is
14241 * none, then we simply ignore the "will need" advice for this
14242 * entry and go on to the next one.
14243 */
14244 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14245 entry = entry->vme_next;
14246 start = entry->vme_start;
14247 continue;
14248 }
14249
14250 /*
14251 * The data_request() could take a long time, so let's
14252 * release the map lock to avoid blocking other threads.
14253 */
14254 vm_map_unlock_read(map);
14255
14256 vm_object_paging_begin(object);
14257 pager = object->pager;
14258 vm_object_unlock(object);
14259
14260 /*
14261 * Get the data from the object asynchronously.
14262 *
14263 * Note that memory_object_data_request() places limits on the
14264 * amount of I/O it will do. Regardless of the len we
14265 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14266 * silently truncates the len to that size. This isn't
14267 * necessarily bad since madvise shouldn't really be used to
14268 * page in unlimited amounts of data. Other Unix variants
14269 * limit the willneed case as well. If this turns out to be an
14270 * issue for developers, then we can always adjust the policy
14271 * here and still be backwards compatible since this is all
14272 * just "advice".
14273 */
14274 kr = memory_object_data_request(
14275 pager,
14276 offset + object->paging_offset,
14277 0, /* ignored */
14278 VM_PROT_READ,
14279 (memory_object_fault_info_t)&fault_info);
14280
14281 vm_object_lock(object);
14282 vm_object_paging_end(object);
14283 vm_object_unlock(object);
14284
14285 /*
14286 * If we couldn't do the I/O for some reason, just give up on
14287 * the madvise. We still return success to the user since
14288 * madvise isn't supposed to fail when the advice can't be
14289 * taken.
14290 */
14291 if (kr != KERN_SUCCESS) {
14292 return KERN_SUCCESS;
14293 }
14294
14295 start += len;
14296 if (start >= end) {
14297 /* done */
14298 return KERN_SUCCESS;
14299 }
14300
14301 /* look up next entry */
14302 vm_map_lock_read(map);
14303 if (! vm_map_lookup_entry(map, start, &entry)) {
14304 /*
14305 * There's a new hole in the address range.
14306 */
14307 vm_map_unlock_read(map);
14308 return KERN_INVALID_ADDRESS;
14309 }
14310 }
14311
14312 vm_map_unlock_read(map);
14313 return KERN_SUCCESS;
14314 }
14315
14316 static boolean_t
14317 vm_map_entry_is_reusable(
14318 vm_map_entry_t entry)
14319 {
14320 /* Only user map entries */
14321
14322 vm_object_t object;
14323
14324 if (entry->is_sub_map) {
14325 return FALSE;
14326 }
14327
14328 switch (VME_ALIAS(entry)) {
14329 case VM_MEMORY_MALLOC:
14330 case VM_MEMORY_MALLOC_SMALL:
14331 case VM_MEMORY_MALLOC_LARGE:
14332 case VM_MEMORY_REALLOC:
14333 case VM_MEMORY_MALLOC_TINY:
14334 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14335 case VM_MEMORY_MALLOC_LARGE_REUSED:
14336 /*
14337 * This is a malloc() memory region: check if it's still
14338 * in its original state and can be re-used for more
14339 * malloc() allocations.
14340 */
14341 break;
14342 default:
14343 /*
14344 * Not a malloc() memory region: let the caller decide if
14345 * it's re-usable.
14346 */
14347 return TRUE;
14348 }
14349
14350 if (entry->is_shared ||
14351 entry->is_sub_map ||
14352 entry->in_transition ||
14353 entry->protection != VM_PROT_DEFAULT ||
14354 entry->max_protection != VM_PROT_ALL ||
14355 entry->inheritance != VM_INHERIT_DEFAULT ||
14356 entry->no_cache ||
14357 entry->permanent ||
14358 entry->superpage_size != FALSE ||
14359 entry->zero_wired_pages ||
14360 entry->wired_count != 0 ||
14361 entry->user_wired_count != 0) {
14362 return FALSE;
14363 }
14364
14365 object = VME_OBJECT(entry);
14366 if (object == VM_OBJECT_NULL) {
14367 return TRUE;
14368 }
14369 if (
14370 #if 0
14371 /*
14372 * Let's proceed even if the VM object is potentially
14373 * shared.
14374 * We check for this later when processing the actual
14375 * VM pages, so the contents will be safe if shared.
14376 *
14377 * But we can still mark this memory region as "reusable" to
14378 * acknowledge that the caller did let us know that the memory
14379 * could be re-used and should not be penalized for holding
14380 * on to it. This allows its "resident size" to not include
14381 * the reusable range.
14382 */
14383 object->ref_count == 1 &&
14384 #endif
14385 object->wired_page_count == 0 &&
14386 object->copy == VM_OBJECT_NULL &&
14387 object->shadow == VM_OBJECT_NULL &&
14388 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
14389 object->internal &&
14390 !object->true_share &&
14391 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
14392 !object->code_signed) {
14393 return TRUE;
14394 }
14395 return FALSE;
14396
14397
14398 }
14399
14400 static kern_return_t
14401 vm_map_reuse_pages(
14402 vm_map_t map,
14403 vm_map_offset_t start,
14404 vm_map_offset_t end)
14405 {
14406 vm_map_entry_t entry;
14407 vm_object_t object;
14408 vm_object_offset_t start_offset, end_offset;
14409
14410 /*
14411 * The MADV_REUSE operation doesn't require any changes to the
14412 * vm_map_entry_t's, so the read lock is sufficient.
14413 */
14414
14415 vm_map_lock_read(map);
14416 assert(map->pmap != kernel_pmap); /* protect alias access */
14417
14418 /*
14419 * The madvise semantics require that the address range be fully
14420 * allocated with no holes. Otherwise, we're required to return
14421 * an error.
14422 */
14423
14424 if (!vm_map_range_check(map, start, end, &entry)) {
14425 vm_map_unlock_read(map);
14426 vm_page_stats_reusable.reuse_pages_failure++;
14427 return KERN_INVALID_ADDRESS;
14428 }
14429
14430 /*
14431 * Examine each vm_map_entry_t in the range.
14432 */
14433 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14434 entry = entry->vme_next) {
14435 /*
14436 * Sanity check on the VM map entry.
14437 */
14438 if (! vm_map_entry_is_reusable(entry)) {
14439 vm_map_unlock_read(map);
14440 vm_page_stats_reusable.reuse_pages_failure++;
14441 return KERN_INVALID_ADDRESS;
14442 }
14443
14444 /*
14445 * The first time through, the start address could be anywhere
14446 * within the vm_map_entry we found. So adjust the offset to
14447 * correspond.
14448 */
14449 if (entry->vme_start < start) {
14450 start_offset = start - entry->vme_start;
14451 } else {
14452 start_offset = 0;
14453 }
14454 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14455 start_offset += VME_OFFSET(entry);
14456 end_offset += VME_OFFSET(entry);
14457
14458 assert(!entry->is_sub_map);
14459 object = VME_OBJECT(entry);
14460 if (object != VM_OBJECT_NULL) {
14461 vm_object_lock(object);
14462 vm_object_reuse_pages(object, start_offset, end_offset,
14463 TRUE);
14464 vm_object_unlock(object);
14465 }
14466
14467 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
14468 /*
14469 * XXX
14470 * We do not hold the VM map exclusively here.
14471 * The "alias" field is not that critical, so it's
14472 * safe to update it here, as long as it is the only
14473 * one that can be modified while holding the VM map
14474 * "shared".
14475 */
14476 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
14477 }
14478 }
14479
14480 vm_map_unlock_read(map);
14481 vm_page_stats_reusable.reuse_pages_success++;
14482 return KERN_SUCCESS;
14483 }
14484
14485
14486 static kern_return_t
14487 vm_map_reusable_pages(
14488 vm_map_t map,
14489 vm_map_offset_t start,
14490 vm_map_offset_t end)
14491 {
14492 vm_map_entry_t entry;
14493 vm_object_t object;
14494 vm_object_offset_t start_offset, end_offset;
14495 vm_map_offset_t pmap_offset;
14496
14497 /*
14498 * The MADV_REUSABLE operation doesn't require any changes to the
14499 * vm_map_entry_t's, so the read lock is sufficient.
14500 */
14501
14502 vm_map_lock_read(map);
14503 assert(map->pmap != kernel_pmap); /* protect alias access */
14504
14505 /*
14506 * The madvise semantics require that the address range be fully
14507 * allocated with no holes. Otherwise, we're required to return
14508 * an error.
14509 */
14510
14511 if (!vm_map_range_check(map, start, end, &entry)) {
14512 vm_map_unlock_read(map);
14513 vm_page_stats_reusable.reusable_pages_failure++;
14514 return KERN_INVALID_ADDRESS;
14515 }
14516
14517 /*
14518 * Examine each vm_map_entry_t in the range.
14519 */
14520 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14521 entry = entry->vme_next) {
14522 int kill_pages = 0;
14523
14524 /*
14525 * Sanity check on the VM map entry.
14526 */
14527 if (! vm_map_entry_is_reusable(entry)) {
14528 vm_map_unlock_read(map);
14529 vm_page_stats_reusable.reusable_pages_failure++;
14530 return KERN_INVALID_ADDRESS;
14531 }
14532
14533 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
14534 /* not writable: can't discard contents */
14535 vm_map_unlock_read(map);
14536 vm_page_stats_reusable.reusable_nonwritable++;
14537 vm_page_stats_reusable.reusable_pages_failure++;
14538 return KERN_PROTECTION_FAILURE;
14539 }
14540
14541 /*
14542 * The first time through, the start address could be anywhere
14543 * within the vm_map_entry we found. So adjust the offset to
14544 * correspond.
14545 */
14546 if (entry->vme_start < start) {
14547 start_offset = start - entry->vme_start;
14548 pmap_offset = start;
14549 } else {
14550 start_offset = 0;
14551 pmap_offset = entry->vme_start;
14552 }
14553 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14554 start_offset += VME_OFFSET(entry);
14555 end_offset += VME_OFFSET(entry);
14556
14557 assert(!entry->is_sub_map);
14558 object = VME_OBJECT(entry);
14559 if (object == VM_OBJECT_NULL)
14560 continue;
14561
14562
14563 vm_object_lock(object);
14564 if (((object->ref_count == 1) ||
14565 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
14566 object->copy == VM_OBJECT_NULL)) &&
14567 object->shadow == VM_OBJECT_NULL &&
14568 /*
14569 * "iokit_acct" entries are billed for their virtual size
14570 * (rather than for their resident pages only), so they
14571 * wouldn't benefit from making pages reusable, and it
14572 * would be hard to keep track of pages that are both
14573 * "iokit_acct" and "reusable" in the pmap stats and
14574 * ledgers.
14575 */
14576 !(entry->iokit_acct ||
14577 (!entry->is_sub_map && !entry->use_pmap))) {
14578 if (object->ref_count != 1) {
14579 vm_page_stats_reusable.reusable_shared++;
14580 }
14581 kill_pages = 1;
14582 } else {
14583 kill_pages = -1;
14584 }
14585 if (kill_pages != -1) {
14586 vm_object_deactivate_pages(object,
14587 start_offset,
14588 end_offset - start_offset,
14589 kill_pages,
14590 TRUE /*reusable_pages*/,
14591 map->pmap,
14592 pmap_offset);
14593 } else {
14594 vm_page_stats_reusable.reusable_pages_shared++;
14595 }
14596 vm_object_unlock(object);
14597
14598 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
14599 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
14600 /*
14601 * XXX
14602 * We do not hold the VM map exclusively here.
14603 * The "alias" field is not that critical, so it's
14604 * safe to update it here, as long as it is the only
14605 * one that can be modified while holding the VM map
14606 * "shared".
14607 */
14608 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
14609 }
14610 }
14611
14612 vm_map_unlock_read(map);
14613 vm_page_stats_reusable.reusable_pages_success++;
14614 return KERN_SUCCESS;
14615 }
14616
14617
14618 static kern_return_t
14619 vm_map_can_reuse(
14620 vm_map_t map,
14621 vm_map_offset_t start,
14622 vm_map_offset_t end)
14623 {
14624 vm_map_entry_t entry;
14625
14626 /*
14627 * The MADV_REUSABLE operation doesn't require any changes to the
14628 * vm_map_entry_t's, so the read lock is sufficient.
14629 */
14630
14631 vm_map_lock_read(map);
14632 assert(map->pmap != kernel_pmap); /* protect alias access */
14633
14634 /*
14635 * The madvise semantics require that the address range be fully
14636 * allocated with no holes. Otherwise, we're required to return
14637 * an error.
14638 */
14639
14640 if (!vm_map_range_check(map, start, end, &entry)) {
14641 vm_map_unlock_read(map);
14642 vm_page_stats_reusable.can_reuse_failure++;
14643 return KERN_INVALID_ADDRESS;
14644 }
14645
14646 /*
14647 * Examine each vm_map_entry_t in the range.
14648 */
14649 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14650 entry = entry->vme_next) {
14651 /*
14652 * Sanity check on the VM map entry.
14653 */
14654 if (! vm_map_entry_is_reusable(entry)) {
14655 vm_map_unlock_read(map);
14656 vm_page_stats_reusable.can_reuse_failure++;
14657 return KERN_INVALID_ADDRESS;
14658 }
14659 }
14660
14661 vm_map_unlock_read(map);
14662 vm_page_stats_reusable.can_reuse_success++;
14663 return KERN_SUCCESS;
14664 }
14665
14666
14667 #if MACH_ASSERT
14668 static kern_return_t
14669 vm_map_pageout(
14670 vm_map_t map,
14671 vm_map_offset_t start,
14672 vm_map_offset_t end)
14673 {
14674 vm_map_entry_t entry;
14675
14676 /*
14677 * The MADV_PAGEOUT operation doesn't require any changes to the
14678 * vm_map_entry_t's, so the read lock is sufficient.
14679 */
14680
14681 vm_map_lock_read(map);
14682
14683 /*
14684 * The madvise semantics require that the address range be fully
14685 * allocated with no holes. Otherwise, we're required to return
14686 * an error.
14687 */
14688
14689 if (!vm_map_range_check(map, start, end, &entry)) {
14690 vm_map_unlock_read(map);
14691 return KERN_INVALID_ADDRESS;
14692 }
14693
14694 /*
14695 * Examine each vm_map_entry_t in the range.
14696 */
14697 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14698 entry = entry->vme_next) {
14699 vm_object_t object;
14700
14701 /*
14702 * Sanity check on the VM map entry.
14703 */
14704 if (entry->is_sub_map) {
14705 vm_map_t submap;
14706 vm_map_offset_t submap_start;
14707 vm_map_offset_t submap_end;
14708 vm_map_entry_t submap_entry;
14709
14710 submap = VME_SUBMAP(entry);
14711 submap_start = VME_OFFSET(entry);
14712 submap_end = submap_start + (entry->vme_end -
14713 entry->vme_start);
14714
14715 vm_map_lock_read(submap);
14716
14717 if (! vm_map_range_check(submap,
14718 submap_start,
14719 submap_end,
14720 &submap_entry)) {
14721 vm_map_unlock_read(submap);
14722 vm_map_unlock_read(map);
14723 return KERN_INVALID_ADDRESS;
14724 }
14725
14726 object = VME_OBJECT(submap_entry);
14727 if (submap_entry->is_sub_map ||
14728 object == VM_OBJECT_NULL ||
14729 !object->internal) {
14730 vm_map_unlock_read(submap);
14731 continue;
14732 }
14733
14734 vm_object_pageout(object);
14735
14736 vm_map_unlock_read(submap);
14737 submap = VM_MAP_NULL;
14738 submap_entry = VM_MAP_ENTRY_NULL;
14739 continue;
14740 }
14741
14742 object = VME_OBJECT(entry);
14743 if (entry->is_sub_map ||
14744 object == VM_OBJECT_NULL ||
14745 !object->internal) {
14746 continue;
14747 }
14748
14749 vm_object_pageout(object);
14750 }
14751
14752 vm_map_unlock_read(map);
14753 return KERN_SUCCESS;
14754 }
14755 #endif /* MACH_ASSERT */
14756
14757
14758 /*
14759 * Routine: vm_map_entry_insert
14760 *
14761 * Descritpion: This routine inserts a new vm_entry in a locked map.
14762 */
14763 vm_map_entry_t
14764 vm_map_entry_insert(
14765 vm_map_t map,
14766 vm_map_entry_t insp_entry,
14767 vm_map_offset_t start,
14768 vm_map_offset_t end,
14769 vm_object_t object,
14770 vm_object_offset_t offset,
14771 boolean_t needs_copy,
14772 boolean_t is_shared,
14773 boolean_t in_transition,
14774 vm_prot_t cur_protection,
14775 vm_prot_t max_protection,
14776 vm_behavior_t behavior,
14777 vm_inherit_t inheritance,
14778 unsigned wired_count,
14779 boolean_t no_cache,
14780 boolean_t permanent,
14781 unsigned int superpage_size,
14782 boolean_t clear_map_aligned,
14783 boolean_t is_submap,
14784 boolean_t used_for_jit,
14785 int alias)
14786 {
14787 vm_map_entry_t new_entry;
14788
14789 assert(insp_entry != (vm_map_entry_t)0);
14790
14791 #if DEVELOPMENT || DEBUG
14792 vm_object_offset_t end_offset = 0;
14793 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
14794 #endif /* DEVELOPMENT || DEBUG */
14795
14796 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
14797
14798 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
14799 new_entry->map_aligned = TRUE;
14800 } else {
14801 new_entry->map_aligned = FALSE;
14802 }
14803 if (clear_map_aligned &&
14804 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
14805 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
14806 new_entry->map_aligned = FALSE;
14807 }
14808
14809 new_entry->vme_start = start;
14810 new_entry->vme_end = end;
14811 assert(page_aligned(new_entry->vme_start));
14812 assert(page_aligned(new_entry->vme_end));
14813 if (new_entry->map_aligned) {
14814 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
14815 VM_MAP_PAGE_MASK(map)));
14816 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
14817 VM_MAP_PAGE_MASK(map)));
14818 }
14819 assert(new_entry->vme_start < new_entry->vme_end);
14820
14821 VME_OBJECT_SET(new_entry, object);
14822 VME_OFFSET_SET(new_entry, offset);
14823 new_entry->is_shared = is_shared;
14824 new_entry->is_sub_map = is_submap;
14825 new_entry->needs_copy = needs_copy;
14826 new_entry->in_transition = in_transition;
14827 new_entry->needs_wakeup = FALSE;
14828 new_entry->inheritance = inheritance;
14829 new_entry->protection = cur_protection;
14830 new_entry->max_protection = max_protection;
14831 new_entry->behavior = behavior;
14832 new_entry->wired_count = wired_count;
14833 new_entry->user_wired_count = 0;
14834 if (is_submap) {
14835 /*
14836 * submap: "use_pmap" means "nested".
14837 * default: false.
14838 */
14839 new_entry->use_pmap = FALSE;
14840 } else {
14841 /*
14842 * object: "use_pmap" means "use pmap accounting" for footprint.
14843 * default: true.
14844 */
14845 new_entry->use_pmap = TRUE;
14846 }
14847 VME_ALIAS_SET(new_entry, alias);
14848 new_entry->zero_wired_pages = FALSE;
14849 new_entry->no_cache = no_cache;
14850 new_entry->permanent = permanent;
14851 if (superpage_size)
14852 new_entry->superpage_size = TRUE;
14853 else
14854 new_entry->superpage_size = FALSE;
14855 if (used_for_jit){
14856 if (!(map->jit_entry_exists)){
14857 new_entry->used_for_jit = TRUE;
14858 map->jit_entry_exists = TRUE;
14859
14860 /* Tell the pmap that it supports JIT. */
14861 pmap_set_jit_entitled(map->pmap);
14862 }
14863 } else {
14864 new_entry->used_for_jit = FALSE;
14865 }
14866 new_entry->iokit_acct = FALSE;
14867 new_entry->vme_resilient_codesign = FALSE;
14868 new_entry->vme_resilient_media = FALSE;
14869 new_entry->vme_atomic = FALSE;
14870
14871 /*
14872 * Insert the new entry into the list.
14873 */
14874
14875 vm_map_store_entry_link(map, insp_entry, new_entry);
14876 map->size += end - start;
14877
14878 /*
14879 * Update the free space hint and the lookup hint.
14880 */
14881
14882 SAVE_HINT_MAP_WRITE(map, new_entry);
14883 return new_entry;
14884 }
14885
14886 /*
14887 * Routine: vm_map_remap_extract
14888 *
14889 * Descritpion: This routine returns a vm_entry list from a map.
14890 */
14891 static kern_return_t
14892 vm_map_remap_extract(
14893 vm_map_t map,
14894 vm_map_offset_t addr,
14895 vm_map_size_t size,
14896 boolean_t copy,
14897 struct vm_map_header *map_header,
14898 vm_prot_t *cur_protection,
14899 vm_prot_t *max_protection,
14900 /* What, no behavior? */
14901 vm_inherit_t inheritance,
14902 boolean_t pageable,
14903 boolean_t same_map,
14904 vm_map_kernel_flags_t vmk_flags)
14905 {
14906 kern_return_t result;
14907 vm_map_size_t mapped_size;
14908 vm_map_size_t tmp_size;
14909 vm_map_entry_t src_entry; /* result of last map lookup */
14910 vm_map_entry_t new_entry;
14911 vm_object_offset_t offset;
14912 vm_map_offset_t map_address;
14913 vm_map_offset_t src_start; /* start of entry to map */
14914 vm_map_offset_t src_end; /* end of region to be mapped */
14915 vm_object_t object;
14916 vm_map_version_t version;
14917 boolean_t src_needs_copy;
14918 boolean_t new_entry_needs_copy;
14919 vm_map_entry_t saved_src_entry;
14920 boolean_t src_entry_was_wired;
14921
14922 assert(map != VM_MAP_NULL);
14923 assert(size != 0);
14924 assert(size == vm_map_round_page(size, PAGE_MASK));
14925 assert(inheritance == VM_INHERIT_NONE ||
14926 inheritance == VM_INHERIT_COPY ||
14927 inheritance == VM_INHERIT_SHARE);
14928
14929 /*
14930 * Compute start and end of region.
14931 */
14932 src_start = vm_map_trunc_page(addr, PAGE_MASK);
14933 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
14934
14935
14936 /*
14937 * Initialize map_header.
14938 */
14939 map_header->links.next = (struct vm_map_entry *)&map_header->links;
14940 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
14941 map_header->nentries = 0;
14942 map_header->entries_pageable = pageable;
14943 map_header->page_shift = PAGE_SHIFT;
14944
14945 vm_map_store_init( map_header );
14946
14947 *cur_protection = VM_PROT_ALL;
14948 *max_protection = VM_PROT_ALL;
14949
14950 map_address = 0;
14951 mapped_size = 0;
14952 result = KERN_SUCCESS;
14953
14954 /*
14955 * The specified source virtual space might correspond to
14956 * multiple map entries, need to loop on them.
14957 */
14958 vm_map_lock(map);
14959 while (mapped_size != size) {
14960 vm_map_size_t entry_size;
14961
14962 /*
14963 * Find the beginning of the region.
14964 */
14965 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
14966 result = KERN_INVALID_ADDRESS;
14967 break;
14968 }
14969
14970 if (src_start < src_entry->vme_start ||
14971 (mapped_size && src_start != src_entry->vme_start)) {
14972 result = KERN_INVALID_ADDRESS;
14973 break;
14974 }
14975
14976 tmp_size = size - mapped_size;
14977 if (src_end > src_entry->vme_end)
14978 tmp_size -= (src_end - src_entry->vme_end);
14979
14980 entry_size = (vm_map_size_t)(src_entry->vme_end -
14981 src_entry->vme_start);
14982
14983 if(src_entry->is_sub_map) {
14984 vm_map_reference(VME_SUBMAP(src_entry));
14985 object = VM_OBJECT_NULL;
14986 } else {
14987 object = VME_OBJECT(src_entry);
14988 if (src_entry->iokit_acct) {
14989 /*
14990 * This entry uses "IOKit accounting".
14991 */
14992 } else if (object != VM_OBJECT_NULL &&
14993 object->purgable != VM_PURGABLE_DENY) {
14994 /*
14995 * Purgeable objects have their own accounting:
14996 * no pmap accounting for them.
14997 */
14998 assertf(!src_entry->use_pmap,
14999 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15000 map,
15001 src_entry,
15002 (uint64_t)src_entry->vme_start,
15003 (uint64_t)src_entry->vme_end,
15004 src_entry->protection,
15005 src_entry->max_protection,
15006 VME_ALIAS(src_entry));
15007 } else {
15008 /*
15009 * Not IOKit or purgeable:
15010 * must be accounted by pmap stats.
15011 */
15012 assertf(src_entry->use_pmap,
15013 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15014 map,
15015 src_entry,
15016 (uint64_t)src_entry->vme_start,
15017 (uint64_t)src_entry->vme_end,
15018 src_entry->protection,
15019 src_entry->max_protection,
15020 VME_ALIAS(src_entry));
15021 }
15022
15023 if (object == VM_OBJECT_NULL) {
15024 object = vm_object_allocate(entry_size);
15025 VME_OFFSET_SET(src_entry, 0);
15026 VME_OBJECT_SET(src_entry, object);
15027 assert(src_entry->use_pmap);
15028 } else if (object->copy_strategy !=
15029 MEMORY_OBJECT_COPY_SYMMETRIC) {
15030 /*
15031 * We are already using an asymmetric
15032 * copy, and therefore we already have
15033 * the right object.
15034 */
15035 assert(!src_entry->needs_copy);
15036 } else if (src_entry->needs_copy || object->shadowed ||
15037 (object->internal && !object->true_share &&
15038 !src_entry->is_shared &&
15039 object->vo_size > entry_size)) {
15040
15041 VME_OBJECT_SHADOW(src_entry, entry_size);
15042 assert(src_entry->use_pmap);
15043
15044 if (!src_entry->needs_copy &&
15045 (src_entry->protection & VM_PROT_WRITE)) {
15046 vm_prot_t prot;
15047
15048 assert(!pmap_has_prot_policy(src_entry->protection));
15049
15050 prot = src_entry->protection & ~VM_PROT_WRITE;
15051
15052 if (override_nx(map,
15053 VME_ALIAS(src_entry))
15054 && prot)
15055 prot |= VM_PROT_EXECUTE;
15056
15057 assert(!pmap_has_prot_policy(prot));
15058
15059 if(map->mapped_in_other_pmaps) {
15060 vm_object_pmap_protect(
15061 VME_OBJECT(src_entry),
15062 VME_OFFSET(src_entry),
15063 entry_size,
15064 PMAP_NULL,
15065 src_entry->vme_start,
15066 prot);
15067 } else {
15068 pmap_protect(vm_map_pmap(map),
15069 src_entry->vme_start,
15070 src_entry->vme_end,
15071 prot);
15072 }
15073 }
15074
15075 object = VME_OBJECT(src_entry);
15076 src_entry->needs_copy = FALSE;
15077 }
15078
15079
15080 vm_object_lock(object);
15081 vm_object_reference_locked(object); /* object ref. for new entry */
15082 if (object->copy_strategy ==
15083 MEMORY_OBJECT_COPY_SYMMETRIC) {
15084 object->copy_strategy =
15085 MEMORY_OBJECT_COPY_DELAY;
15086 }
15087 vm_object_unlock(object);
15088 }
15089
15090 offset = (VME_OFFSET(src_entry) +
15091 (src_start - src_entry->vme_start));
15092
15093 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15094 vm_map_entry_copy(new_entry, src_entry);
15095 if (new_entry->is_sub_map) {
15096 /* clr address space specifics */
15097 new_entry->use_pmap = FALSE;
15098 } else if (copy) {
15099 /*
15100 * We're dealing with a copy-on-write operation,
15101 * so the resulting mapping should not inherit the
15102 * original mapping's accounting settings.
15103 * "use_pmap" should be reset to its default (TRUE)
15104 * so that the new mapping gets accounted for in
15105 * the task's memory footprint.
15106 */
15107 new_entry->use_pmap = TRUE;
15108 }
15109 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15110 assert(!new_entry->iokit_acct);
15111
15112 new_entry->map_aligned = FALSE;
15113
15114 new_entry->vme_start = map_address;
15115 new_entry->vme_end = map_address + tmp_size;
15116 assert(new_entry->vme_start < new_entry->vme_end);
15117 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15118 /*
15119 * Remapping for vm_map_protect(VM_PROT_COPY)
15120 * to convert a read-only mapping into a
15121 * copy-on-write version of itself but
15122 * with write access:
15123 * keep the original inheritance and add
15124 * VM_PROT_WRITE to the max protection.
15125 */
15126 new_entry->inheritance = src_entry->inheritance;
15127 new_entry->max_protection |= VM_PROT_WRITE;
15128 } else {
15129 new_entry->inheritance = inheritance;
15130 }
15131 VME_OFFSET_SET(new_entry, offset);
15132
15133 /*
15134 * The new region has to be copied now if required.
15135 */
15136 RestartCopy:
15137 if (!copy) {
15138 /*
15139 * Cannot allow an entry describing a JIT
15140 * region to be shared across address spaces.
15141 */
15142 if (src_entry->used_for_jit == TRUE && !same_map) {
15143 result = KERN_INVALID_ARGUMENT;
15144 break;
15145 }
15146 src_entry->is_shared = TRUE;
15147 new_entry->is_shared = TRUE;
15148 if (!(new_entry->is_sub_map))
15149 new_entry->needs_copy = FALSE;
15150
15151 } else if (src_entry->is_sub_map) {
15152 /* make this a COW sub_map if not already */
15153 assert(new_entry->wired_count == 0);
15154 new_entry->needs_copy = TRUE;
15155 object = VM_OBJECT_NULL;
15156 } else if (src_entry->wired_count == 0 &&
15157 vm_object_copy_quickly(&VME_OBJECT(new_entry),
15158 VME_OFFSET(new_entry),
15159 (new_entry->vme_end -
15160 new_entry->vme_start),
15161 &src_needs_copy,
15162 &new_entry_needs_copy)) {
15163
15164 new_entry->needs_copy = new_entry_needs_copy;
15165 new_entry->is_shared = FALSE;
15166 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15167
15168 /*
15169 * Handle copy_on_write semantics.
15170 */
15171 if (src_needs_copy && !src_entry->needs_copy) {
15172 vm_prot_t prot;
15173
15174 assert(!pmap_has_prot_policy(src_entry->protection));
15175
15176 prot = src_entry->protection & ~VM_PROT_WRITE;
15177
15178 if (override_nx(map,
15179 VME_ALIAS(src_entry))
15180 && prot)
15181 prot |= VM_PROT_EXECUTE;
15182
15183 assert(!pmap_has_prot_policy(prot));
15184
15185 vm_object_pmap_protect(object,
15186 offset,
15187 entry_size,
15188 ((src_entry->is_shared
15189 || map->mapped_in_other_pmaps) ?
15190 PMAP_NULL : map->pmap),
15191 src_entry->vme_start,
15192 prot);
15193
15194 assert(src_entry->wired_count == 0);
15195 src_entry->needs_copy = TRUE;
15196 }
15197 /*
15198 * Throw away the old object reference of the new entry.
15199 */
15200 vm_object_deallocate(object);
15201
15202 } else {
15203 new_entry->is_shared = FALSE;
15204 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15205
15206 src_entry_was_wired = (src_entry->wired_count > 0);
15207 saved_src_entry = src_entry;
15208 src_entry = VM_MAP_ENTRY_NULL;
15209
15210 /*
15211 * The map can be safely unlocked since we
15212 * already hold a reference on the object.
15213 *
15214 * Record the timestamp of the map for later
15215 * verification, and unlock the map.
15216 */
15217 version.main_timestamp = map->timestamp;
15218 vm_map_unlock(map); /* Increments timestamp once! */
15219
15220 /*
15221 * Perform the copy.
15222 */
15223 if (src_entry_was_wired > 0) {
15224 vm_object_lock(object);
15225 result = vm_object_copy_slowly(
15226 object,
15227 offset,
15228 (new_entry->vme_end -
15229 new_entry->vme_start),
15230 THREAD_UNINT,
15231 &VME_OBJECT(new_entry));
15232
15233 VME_OFFSET_SET(new_entry, 0);
15234 new_entry->needs_copy = FALSE;
15235 } else {
15236 vm_object_offset_t new_offset;
15237
15238 new_offset = VME_OFFSET(new_entry);
15239 result = vm_object_copy_strategically(
15240 object,
15241 offset,
15242 (new_entry->vme_end -
15243 new_entry->vme_start),
15244 &VME_OBJECT(new_entry),
15245 &new_offset,
15246 &new_entry_needs_copy);
15247 if (new_offset != VME_OFFSET(new_entry)) {
15248 VME_OFFSET_SET(new_entry, new_offset);
15249 }
15250
15251 new_entry->needs_copy = new_entry_needs_copy;
15252 }
15253
15254 /*
15255 * Throw away the old object reference of the new entry.
15256 */
15257 vm_object_deallocate(object);
15258
15259 if (result != KERN_SUCCESS &&
15260 result != KERN_MEMORY_RESTART_COPY) {
15261 _vm_map_entry_dispose(map_header, new_entry);
15262 vm_map_lock(map);
15263 break;
15264 }
15265
15266 /*
15267 * Verify that the map has not substantially
15268 * changed while the copy was being made.
15269 */
15270
15271 vm_map_lock(map);
15272 if (version.main_timestamp + 1 != map->timestamp) {
15273 /*
15274 * Simple version comparison failed.
15275 *
15276 * Retry the lookup and verify that the
15277 * same object/offset are still present.
15278 */
15279 saved_src_entry = VM_MAP_ENTRY_NULL;
15280 vm_object_deallocate(VME_OBJECT(new_entry));
15281 _vm_map_entry_dispose(map_header, new_entry);
15282 if (result == KERN_MEMORY_RESTART_COPY)
15283 result = KERN_SUCCESS;
15284 continue;
15285 }
15286 /* map hasn't changed: src_entry is still valid */
15287 src_entry = saved_src_entry;
15288 saved_src_entry = VM_MAP_ENTRY_NULL;
15289
15290 if (result == KERN_MEMORY_RESTART_COPY) {
15291 vm_object_reference(object);
15292 goto RestartCopy;
15293 }
15294 }
15295
15296 _vm_map_store_entry_link(map_header,
15297 map_header->links.prev, new_entry);
15298
15299 /*Protections for submap mapping are irrelevant here*/
15300 if( !src_entry->is_sub_map ) {
15301 *cur_protection &= src_entry->protection;
15302 *max_protection &= src_entry->max_protection;
15303 }
15304 map_address += tmp_size;
15305 mapped_size += tmp_size;
15306 src_start += tmp_size;
15307
15308 } /* end while */
15309
15310 vm_map_unlock(map);
15311 if (result != KERN_SUCCESS) {
15312 /*
15313 * Free all allocated elements.
15314 */
15315 for (src_entry = map_header->links.next;
15316 src_entry != (struct vm_map_entry *)&map_header->links;
15317 src_entry = new_entry) {
15318 new_entry = src_entry->vme_next;
15319 _vm_map_store_entry_unlink(map_header, src_entry);
15320 if (src_entry->is_sub_map) {
15321 vm_map_deallocate(VME_SUBMAP(src_entry));
15322 } else {
15323 vm_object_deallocate(VME_OBJECT(src_entry));
15324 }
15325 _vm_map_entry_dispose(map_header, src_entry);
15326 }
15327 }
15328 return result;
15329 }
15330
15331 /*
15332 * Routine: vm_remap
15333 *
15334 * Map portion of a task's address space.
15335 * Mapped region must not overlap more than
15336 * one vm memory object. Protections and
15337 * inheritance attributes remain the same
15338 * as in the original task and are out parameters.
15339 * Source and Target task can be identical
15340 * Other attributes are identical as for vm_map()
15341 */
15342 kern_return_t
15343 vm_map_remap(
15344 vm_map_t target_map,
15345 vm_map_address_t *address,
15346 vm_map_size_t size,
15347 vm_map_offset_t mask,
15348 int flags,
15349 vm_map_kernel_flags_t vmk_flags,
15350 vm_tag_t tag,
15351 vm_map_t src_map,
15352 vm_map_offset_t memory_address,
15353 boolean_t copy,
15354 vm_prot_t *cur_protection,
15355 vm_prot_t *max_protection,
15356 vm_inherit_t inheritance)
15357 {
15358 kern_return_t result;
15359 vm_map_entry_t entry;
15360 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
15361 vm_map_entry_t new_entry;
15362 struct vm_map_header map_header;
15363 vm_map_offset_t offset_in_mapping;
15364
15365 if (target_map == VM_MAP_NULL)
15366 return KERN_INVALID_ARGUMENT;
15367
15368 switch (inheritance) {
15369 case VM_INHERIT_NONE:
15370 case VM_INHERIT_COPY:
15371 case VM_INHERIT_SHARE:
15372 if (size != 0 && src_map != VM_MAP_NULL)
15373 break;
15374 /*FALL THRU*/
15375 default:
15376 return KERN_INVALID_ARGUMENT;
15377 }
15378
15379 /*
15380 * If the user is requesting that we return the address of the
15381 * first byte of the data (rather than the base of the page),
15382 * then we use different rounding semantics: specifically,
15383 * we assume that (memory_address, size) describes a region
15384 * all of whose pages we must cover, rather than a base to be truncated
15385 * down and a size to be added to that base. So we figure out
15386 * the highest page that the requested region includes and make
15387 * sure that the size will cover it.
15388 *
15389 * The key example we're worried about it is of the form:
15390 *
15391 * memory_address = 0x1ff0, size = 0x20
15392 *
15393 * With the old semantics, we round down the memory_address to 0x1000
15394 * and round up the size to 0x1000, resulting in our covering *only*
15395 * page 0x1000. With the new semantics, we'd realize that the region covers
15396 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
15397 * 0x1000 and page 0x2000 in the region we remap.
15398 */
15399 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15400 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15401 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15402 } else {
15403 size = vm_map_round_page(size, PAGE_MASK);
15404 }
15405 if (size == 0) {
15406 return KERN_INVALID_ARGUMENT;
15407 }
15408
15409 result = vm_map_remap_extract(src_map, memory_address,
15410 size, copy, &map_header,
15411 cur_protection,
15412 max_protection,
15413 inheritance,
15414 target_map->hdr.entries_pageable,
15415 src_map == target_map,
15416 vmk_flags);
15417
15418 if (result != KERN_SUCCESS) {
15419 return result;
15420 }
15421
15422 /*
15423 * Allocate/check a range of free virtual address
15424 * space for the target
15425 */
15426 *address = vm_map_trunc_page(*address,
15427 VM_MAP_PAGE_MASK(target_map));
15428 vm_map_lock(target_map);
15429 result = vm_map_remap_range_allocate(target_map, address, size,
15430 mask, flags, vmk_flags, tag,
15431 &insp_entry);
15432
15433 for (entry = map_header.links.next;
15434 entry != (struct vm_map_entry *)&map_header.links;
15435 entry = new_entry) {
15436 new_entry = entry->vme_next;
15437 _vm_map_store_entry_unlink(&map_header, entry);
15438 if (result == KERN_SUCCESS) {
15439 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15440 /* no codesigning -> read-only access */
15441 assert(!entry->used_for_jit);
15442 entry->max_protection = VM_PROT_READ;
15443 entry->protection = VM_PROT_READ;
15444 entry->vme_resilient_codesign = TRUE;
15445 }
15446 entry->vme_start += *address;
15447 entry->vme_end += *address;
15448 assert(!entry->map_aligned);
15449 vm_map_store_entry_link(target_map, insp_entry, entry);
15450 insp_entry = entry;
15451 } else {
15452 if (!entry->is_sub_map) {
15453 vm_object_deallocate(VME_OBJECT(entry));
15454 } else {
15455 vm_map_deallocate(VME_SUBMAP(entry));
15456 }
15457 _vm_map_entry_dispose(&map_header, entry);
15458 }
15459 }
15460
15461 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15462 *cur_protection = VM_PROT_READ;
15463 *max_protection = VM_PROT_READ;
15464 }
15465
15466 if( target_map->disable_vmentry_reuse == TRUE) {
15467 assert(!target_map->is_nested_map);
15468 if( target_map->highest_entry_end < insp_entry->vme_end ){
15469 target_map->highest_entry_end = insp_entry->vme_end;
15470 }
15471 }
15472
15473 if (result == KERN_SUCCESS) {
15474 target_map->size += size;
15475 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
15476
15477 }
15478 vm_map_unlock(target_map);
15479
15480 if (result == KERN_SUCCESS && target_map->wiring_required)
15481 result = vm_map_wire_kernel(target_map, *address,
15482 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
15483 TRUE);
15484
15485 /*
15486 * If requested, return the address of the data pointed to by the
15487 * request, rather than the base of the resulting page.
15488 */
15489 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15490 *address += offset_in_mapping;
15491 }
15492
15493 return result;
15494 }
15495
15496 /*
15497 * Routine: vm_map_remap_range_allocate
15498 *
15499 * Description:
15500 * Allocate a range in the specified virtual address map.
15501 * returns the address and the map entry just before the allocated
15502 * range
15503 *
15504 * Map must be locked.
15505 */
15506
15507 static kern_return_t
15508 vm_map_remap_range_allocate(
15509 vm_map_t map,
15510 vm_map_address_t *address, /* IN/OUT */
15511 vm_map_size_t size,
15512 vm_map_offset_t mask,
15513 int flags,
15514 __unused vm_map_kernel_flags_t vmk_flags,
15515 __unused vm_tag_t tag,
15516 vm_map_entry_t *map_entry) /* OUT */
15517 {
15518 vm_map_entry_t entry;
15519 vm_map_offset_t start;
15520 vm_map_offset_t end;
15521 kern_return_t kr;
15522 vm_map_entry_t hole_entry;
15523
15524 StartAgain: ;
15525
15526 start = *address;
15527
15528 if (flags & VM_FLAGS_ANYWHERE)
15529 {
15530 if (flags & VM_FLAGS_RANDOM_ADDR)
15531 {
15532 /*
15533 * Get a random start address.
15534 */
15535 kr = vm_map_random_address_for_size(map, address, size);
15536 if (kr != KERN_SUCCESS) {
15537 return(kr);
15538 }
15539 start = *address;
15540 }
15541
15542 /*
15543 * Calculate the first possible address.
15544 */
15545
15546 if (start < map->min_offset)
15547 start = map->min_offset;
15548 if (start > map->max_offset)
15549 return(KERN_NO_SPACE);
15550
15551 /*
15552 * Look for the first possible address;
15553 * if there's already something at this
15554 * address, we have to start after it.
15555 */
15556
15557 if( map->disable_vmentry_reuse == TRUE) {
15558 VM_MAP_HIGHEST_ENTRY(map, entry, start);
15559 } else {
15560
15561 if (map->holelistenabled) {
15562 hole_entry = (vm_map_entry_t)map->holes_list;
15563
15564 if (hole_entry == NULL) {
15565 /*
15566 * No more space in the map?
15567 */
15568 return(KERN_NO_SPACE);
15569 } else {
15570
15571 boolean_t found_hole = FALSE;
15572
15573 do {
15574 if (hole_entry->vme_start >= start) {
15575 start = hole_entry->vme_start;
15576 found_hole = TRUE;
15577 break;
15578 }
15579
15580 if (hole_entry->vme_end > start) {
15581 found_hole = TRUE;
15582 break;
15583 }
15584 hole_entry = hole_entry->vme_next;
15585
15586 } while (hole_entry != (vm_map_entry_t) map->holes_list);
15587
15588 if (found_hole == FALSE) {
15589 return (KERN_NO_SPACE);
15590 }
15591
15592 entry = hole_entry;
15593 }
15594 } else {
15595 assert(first_free_is_valid(map));
15596 if (start == map->min_offset) {
15597 if ((entry = map->first_free) != vm_map_to_entry(map))
15598 start = entry->vme_end;
15599 } else {
15600 vm_map_entry_t tmp_entry;
15601 if (vm_map_lookup_entry(map, start, &tmp_entry))
15602 start = tmp_entry->vme_end;
15603 entry = tmp_entry;
15604 }
15605 }
15606 start = vm_map_round_page(start,
15607 VM_MAP_PAGE_MASK(map));
15608 }
15609
15610 /*
15611 * In any case, the "entry" always precedes
15612 * the proposed new region throughout the
15613 * loop:
15614 */
15615
15616 while (TRUE) {
15617 vm_map_entry_t next;
15618
15619 /*
15620 * Find the end of the proposed new region.
15621 * Be sure we didn't go beyond the end, or
15622 * wrap around the address.
15623 */
15624
15625 end = ((start + mask) & ~mask);
15626 end = vm_map_round_page(end,
15627 VM_MAP_PAGE_MASK(map));
15628 if (end < start)
15629 return(KERN_NO_SPACE);
15630 start = end;
15631 end += size;
15632
15633 if ((end > map->max_offset) || (end < start)) {
15634 if (map->wait_for_space) {
15635 if (size <= (map->max_offset -
15636 map->min_offset)) {
15637 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
15638 vm_map_unlock(map);
15639 thread_block(THREAD_CONTINUE_NULL);
15640 vm_map_lock(map);
15641 goto StartAgain;
15642 }
15643 }
15644
15645 return(KERN_NO_SPACE);
15646 }
15647
15648 next = entry->vme_next;
15649
15650 if (map->holelistenabled) {
15651 if (entry->vme_end >= end)
15652 break;
15653 } else {
15654 /*
15655 * If there are no more entries, we must win.
15656 *
15657 * OR
15658 *
15659 * If there is another entry, it must be
15660 * after the end of the potential new region.
15661 */
15662
15663 if (next == vm_map_to_entry(map))
15664 break;
15665
15666 if (next->vme_start >= end)
15667 break;
15668 }
15669
15670 /*
15671 * Didn't fit -- move to the next entry.
15672 */
15673
15674 entry = next;
15675
15676 if (map->holelistenabled) {
15677 if (entry == (vm_map_entry_t) map->holes_list) {
15678 /*
15679 * Wrapped around
15680 */
15681 return(KERN_NO_SPACE);
15682 }
15683 start = entry->vme_start;
15684 } else {
15685 start = entry->vme_end;
15686 }
15687 }
15688
15689 if (map->holelistenabled) {
15690
15691 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
15692 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
15693 }
15694 }
15695
15696 *address = start;
15697
15698 } else {
15699 vm_map_entry_t temp_entry;
15700
15701 /*
15702 * Verify that:
15703 * the address doesn't itself violate
15704 * the mask requirement.
15705 */
15706
15707 if ((start & mask) != 0)
15708 return(KERN_NO_SPACE);
15709
15710
15711 /*
15712 * ... the address is within bounds
15713 */
15714
15715 end = start + size;
15716
15717 if ((start < map->min_offset) ||
15718 (end > map->max_offset) ||
15719 (start >= end)) {
15720 return(KERN_INVALID_ADDRESS);
15721 }
15722
15723 /*
15724 * If we're asked to overwrite whatever was mapped in that
15725 * range, first deallocate that range.
15726 */
15727 if (flags & VM_FLAGS_OVERWRITE) {
15728 vm_map_t zap_map;
15729
15730 /*
15731 * We use a "zap_map" to avoid having to unlock
15732 * the "map" in vm_map_delete(), which would compromise
15733 * the atomicity of the "deallocate" and then "remap"
15734 * combination.
15735 */
15736 zap_map = vm_map_create(PMAP_NULL,
15737 start,
15738 end,
15739 map->hdr.entries_pageable);
15740 if (zap_map == VM_MAP_NULL) {
15741 return KERN_RESOURCE_SHORTAGE;
15742 }
15743 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
15744 vm_map_disable_hole_optimization(zap_map);
15745
15746 kr = vm_map_delete(map, start, end,
15747 (VM_MAP_REMOVE_SAVE_ENTRIES |
15748 VM_MAP_REMOVE_NO_MAP_ALIGN),
15749 zap_map);
15750 if (kr == KERN_SUCCESS) {
15751 vm_map_destroy(zap_map,
15752 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15753 zap_map = VM_MAP_NULL;
15754 }
15755 }
15756
15757 /*
15758 * ... the starting address isn't allocated
15759 */
15760
15761 if (vm_map_lookup_entry(map, start, &temp_entry))
15762 return(KERN_NO_SPACE);
15763
15764 entry = temp_entry;
15765
15766 /*
15767 * ... the next region doesn't overlap the
15768 * end point.
15769 */
15770
15771 if ((entry->vme_next != vm_map_to_entry(map)) &&
15772 (entry->vme_next->vme_start < end))
15773 return(KERN_NO_SPACE);
15774 }
15775 *map_entry = entry;
15776 return(KERN_SUCCESS);
15777 }
15778
15779 /*
15780 * vm_map_switch:
15781 *
15782 * Set the address map for the current thread to the specified map
15783 */
15784
15785 vm_map_t
15786 vm_map_switch(
15787 vm_map_t map)
15788 {
15789 int mycpu;
15790 thread_t thread = current_thread();
15791 vm_map_t oldmap = thread->map;
15792
15793 mp_disable_preemption();
15794 mycpu = cpu_number();
15795
15796 /*
15797 * Deactivate the current map and activate the requested map
15798 */
15799 PMAP_SWITCH_USER(thread, map, mycpu);
15800
15801 mp_enable_preemption();
15802 return(oldmap);
15803 }
15804
15805
15806 /*
15807 * Routine: vm_map_write_user
15808 *
15809 * Description:
15810 * Copy out data from a kernel space into space in the
15811 * destination map. The space must already exist in the
15812 * destination map.
15813 * NOTE: This routine should only be called by threads
15814 * which can block on a page fault. i.e. kernel mode user
15815 * threads.
15816 *
15817 */
15818 kern_return_t
15819 vm_map_write_user(
15820 vm_map_t map,
15821 void *src_p,
15822 vm_map_address_t dst_addr,
15823 vm_size_t size)
15824 {
15825 kern_return_t kr = KERN_SUCCESS;
15826
15827 if(current_map() == map) {
15828 if (copyout(src_p, dst_addr, size)) {
15829 kr = KERN_INVALID_ADDRESS;
15830 }
15831 } else {
15832 vm_map_t oldmap;
15833
15834 /* take on the identity of the target map while doing */
15835 /* the transfer */
15836
15837 vm_map_reference(map);
15838 oldmap = vm_map_switch(map);
15839 if (copyout(src_p, dst_addr, size)) {
15840 kr = KERN_INVALID_ADDRESS;
15841 }
15842 vm_map_switch(oldmap);
15843 vm_map_deallocate(map);
15844 }
15845 return kr;
15846 }
15847
15848 /*
15849 * Routine: vm_map_read_user
15850 *
15851 * Description:
15852 * Copy in data from a user space source map into the
15853 * kernel map. The space must already exist in the
15854 * kernel map.
15855 * NOTE: This routine should only be called by threads
15856 * which can block on a page fault. i.e. kernel mode user
15857 * threads.
15858 *
15859 */
15860 kern_return_t
15861 vm_map_read_user(
15862 vm_map_t map,
15863 vm_map_address_t src_addr,
15864 void *dst_p,
15865 vm_size_t size)
15866 {
15867 kern_return_t kr = KERN_SUCCESS;
15868
15869 if(current_map() == map) {
15870 if (copyin(src_addr, dst_p, size)) {
15871 kr = KERN_INVALID_ADDRESS;
15872 }
15873 } else {
15874 vm_map_t oldmap;
15875
15876 /* take on the identity of the target map while doing */
15877 /* the transfer */
15878
15879 vm_map_reference(map);
15880 oldmap = vm_map_switch(map);
15881 if (copyin(src_addr, dst_p, size)) {
15882 kr = KERN_INVALID_ADDRESS;
15883 }
15884 vm_map_switch(oldmap);
15885 vm_map_deallocate(map);
15886 }
15887 return kr;
15888 }
15889
15890
15891 /*
15892 * vm_map_check_protection:
15893 *
15894 * Assert that the target map allows the specified
15895 * privilege on the entire address region given.
15896 * The entire region must be allocated.
15897 */
15898 boolean_t
15899 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
15900 vm_map_offset_t end, vm_prot_t protection)
15901 {
15902 vm_map_entry_t entry;
15903 vm_map_entry_t tmp_entry;
15904
15905 vm_map_lock(map);
15906
15907 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
15908 {
15909 vm_map_unlock(map);
15910 return (FALSE);
15911 }
15912
15913 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15914 vm_map_unlock(map);
15915 return(FALSE);
15916 }
15917
15918 entry = tmp_entry;
15919
15920 while (start < end) {
15921 if (entry == vm_map_to_entry(map)) {
15922 vm_map_unlock(map);
15923 return(FALSE);
15924 }
15925
15926 /*
15927 * No holes allowed!
15928 */
15929
15930 if (start < entry->vme_start) {
15931 vm_map_unlock(map);
15932 return(FALSE);
15933 }
15934
15935 /*
15936 * Check protection associated with entry.
15937 */
15938
15939 if ((entry->protection & protection) != protection) {
15940 vm_map_unlock(map);
15941 return(FALSE);
15942 }
15943
15944 /* go to next entry */
15945
15946 start = entry->vme_end;
15947 entry = entry->vme_next;
15948 }
15949 vm_map_unlock(map);
15950 return(TRUE);
15951 }
15952
15953 kern_return_t
15954 vm_map_purgable_control(
15955 vm_map_t map,
15956 vm_map_offset_t address,
15957 vm_purgable_t control,
15958 int *state)
15959 {
15960 vm_map_entry_t entry;
15961 vm_object_t object;
15962 kern_return_t kr;
15963 boolean_t was_nonvolatile;
15964
15965 /*
15966 * Vet all the input parameters and current type and state of the
15967 * underlaying object. Return with an error if anything is amiss.
15968 */
15969 if (map == VM_MAP_NULL)
15970 return(KERN_INVALID_ARGUMENT);
15971
15972 if (control != VM_PURGABLE_SET_STATE &&
15973 control != VM_PURGABLE_GET_STATE &&
15974 control != VM_PURGABLE_PURGE_ALL &&
15975 control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
15976 return(KERN_INVALID_ARGUMENT);
15977
15978 if (control == VM_PURGABLE_PURGE_ALL) {
15979 vm_purgeable_object_purge_all();
15980 return KERN_SUCCESS;
15981 }
15982
15983 if ((control == VM_PURGABLE_SET_STATE ||
15984 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
15985 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
15986 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
15987 return(KERN_INVALID_ARGUMENT);
15988
15989 vm_map_lock_read(map);
15990
15991 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
15992
15993 /*
15994 * Must pass a valid non-submap address.
15995 */
15996 vm_map_unlock_read(map);
15997 return(KERN_INVALID_ADDRESS);
15998 }
15999
16000 if ((entry->protection & VM_PROT_WRITE) == 0) {
16001 /*
16002 * Can't apply purgable controls to something you can't write.
16003 */
16004 vm_map_unlock_read(map);
16005 return(KERN_PROTECTION_FAILURE);
16006 }
16007
16008 object = VME_OBJECT(entry);
16009 if (object == VM_OBJECT_NULL ||
16010 object->purgable == VM_PURGABLE_DENY) {
16011 /*
16012 * Object must already be present and be purgeable.
16013 */
16014 vm_map_unlock_read(map);
16015 return KERN_INVALID_ARGUMENT;
16016 }
16017
16018 vm_object_lock(object);
16019
16020 #if 00
16021 if (VME_OFFSET(entry) != 0 ||
16022 entry->vme_end - entry->vme_start != object->vo_size) {
16023 /*
16024 * Can only apply purgable controls to the whole (existing)
16025 * object at once.
16026 */
16027 vm_map_unlock_read(map);
16028 vm_object_unlock(object);
16029 return KERN_INVALID_ARGUMENT;
16030 }
16031 #endif
16032
16033 assert(!entry->is_sub_map);
16034 assert(!entry->use_pmap); /* purgeable has its own accounting */
16035
16036 vm_map_unlock_read(map);
16037
16038 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16039
16040 kr = vm_object_purgable_control(object, control, state);
16041
16042 if (was_nonvolatile &&
16043 object->purgable != VM_PURGABLE_NONVOLATILE &&
16044 map->pmap == kernel_pmap) {
16045 #if DEBUG
16046 object->vo_purgeable_volatilizer = kernel_task;
16047 #endif /* DEBUG */
16048 }
16049
16050 vm_object_unlock(object);
16051
16052 return kr;
16053 }
16054
16055 kern_return_t
16056 vm_map_page_query_internal(
16057 vm_map_t target_map,
16058 vm_map_offset_t offset,
16059 int *disposition,
16060 int *ref_count)
16061 {
16062 kern_return_t kr;
16063 vm_page_info_basic_data_t info;
16064 mach_msg_type_number_t count;
16065
16066 count = VM_PAGE_INFO_BASIC_COUNT;
16067 kr = vm_map_page_info(target_map,
16068 offset,
16069 VM_PAGE_INFO_BASIC,
16070 (vm_page_info_t) &info,
16071 &count);
16072 if (kr == KERN_SUCCESS) {
16073 *disposition = info.disposition;
16074 *ref_count = info.ref_count;
16075 } else {
16076 *disposition = 0;
16077 *ref_count = 0;
16078 }
16079
16080 return kr;
16081 }
16082
16083 kern_return_t
16084 vm_map_page_info(
16085 vm_map_t map,
16086 vm_map_offset_t offset,
16087 vm_page_info_flavor_t flavor,
16088 vm_page_info_t info,
16089 mach_msg_type_number_t *count)
16090 {
16091 return (vm_map_page_range_info_internal(map,
16092 offset, /* start of range */
16093 (offset + 1), /* this will get rounded in the call to the page boundary */
16094 flavor,
16095 info,
16096 count));
16097 }
16098
16099 kern_return_t
16100 vm_map_page_range_info_internal(
16101 vm_map_t map,
16102 vm_map_offset_t start_offset,
16103 vm_map_offset_t end_offset,
16104 vm_page_info_flavor_t flavor,
16105 vm_page_info_t info,
16106 mach_msg_type_number_t *count)
16107 {
16108 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16109 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16110 vm_page_t m = VM_PAGE_NULL;
16111 kern_return_t retval = KERN_SUCCESS;
16112 int disposition = 0;
16113 int ref_count = 0;
16114 int depth = 0, info_idx = 0;
16115 vm_page_info_basic_t basic_info = 0;
16116 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16117 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16118 boolean_t do_region_footprint;
16119
16120 switch (flavor) {
16121 case VM_PAGE_INFO_BASIC:
16122 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16123 /*
16124 * The "vm_page_info_basic_data" structure was not
16125 * properly padded, so allow the size to be off by
16126 * one to maintain backwards binary compatibility...
16127 */
16128 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
16129 return KERN_INVALID_ARGUMENT;
16130 }
16131 break;
16132 default:
16133 return KERN_INVALID_ARGUMENT;
16134 }
16135
16136 do_region_footprint = task_self_region_footprint();
16137 disposition = 0;
16138 ref_count = 0;
16139 depth = 0;
16140 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
16141 retval = KERN_SUCCESS;
16142
16143 offset_in_page = start_offset & PAGE_MASK;
16144 start = vm_map_trunc_page(start_offset, PAGE_MASK);
16145 end = vm_map_round_page(end_offset, PAGE_MASK);
16146
16147 assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
16148
16149 vm_map_lock_read(map);
16150
16151 for (curr_s_offset = start; curr_s_offset < end;) {
16152 /*
16153 * New lookup needs reset of these variables.
16154 */
16155 curr_object = object = VM_OBJECT_NULL;
16156 offset_in_object = 0;
16157 ref_count = 0;
16158 depth = 0;
16159
16160 if (do_region_footprint &&
16161 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
16162 ledger_amount_t nonvol_compressed;
16163
16164 /*
16165 * Request for "footprint" info about a page beyond
16166 * the end of address space: this must be for
16167 * the fake region vm_map_region_recurse_64()
16168 * reported to account for non-volatile purgeable
16169 * memory owned by this task.
16170 */
16171 disposition = 0;
16172 nonvol_compressed = 0;
16173 ledger_get_balance(
16174 map->pmap->ledger,
16175 task_ledgers.purgeable_nonvolatile_compressed,
16176 &nonvol_compressed);
16177 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
16178 (unsigned) nonvol_compressed) {
16179 /*
16180 * We haven't reported all the "non-volatile
16181 * compressed" pages yet, so report this fake
16182 * page as "compressed".
16183 */
16184 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16185 } else {
16186 /*
16187 * We've reported all the non-volatile
16188 * compressed page but not all the non-volatile
16189 * pages , so report this fake page as
16190 * "resident dirty".
16191 */
16192 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16193 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16194 disposition |= VM_PAGE_QUERY_PAGE_REF;
16195 }
16196 switch (flavor) {
16197 case VM_PAGE_INFO_BASIC:
16198 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16199 basic_info->disposition = disposition;
16200 basic_info->ref_count = 1;
16201 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16202 basic_info->offset = 0;
16203 basic_info->depth = 0;
16204
16205 info_idx++;
16206 break;
16207 }
16208 curr_s_offset += PAGE_SIZE;
16209 continue;
16210 }
16211
16212 /*
16213 * First, find the map entry covering "curr_s_offset", going down
16214 * submaps if necessary.
16215 */
16216 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16217 /* no entry -> no object -> no page */
16218
16219 if (curr_s_offset < vm_map_min(map)) {
16220 /*
16221 * Illegal address that falls below map min.
16222 */
16223 curr_e_offset = MIN(end, vm_map_min(map));
16224
16225 } else if (curr_s_offset >= vm_map_max(map)) {
16226 /*
16227 * Illegal address that falls on/after map max.
16228 */
16229 curr_e_offset = end;
16230
16231 } else if (map_entry == vm_map_to_entry(map)) {
16232 /*
16233 * Hit a hole.
16234 */
16235 if (map_entry->vme_next == vm_map_to_entry(map)) {
16236 /*
16237 * Empty map.
16238 */
16239 curr_e_offset = MIN(map->max_offset, end);
16240 } else {
16241 /*
16242 * Hole at start of the map.
16243 */
16244 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16245 }
16246 } else {
16247 if (map_entry->vme_next == vm_map_to_entry(map)) {
16248 /*
16249 * Hole at the end of the map.
16250 */
16251 curr_e_offset = MIN(map->max_offset, end);
16252 } else {
16253 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16254 }
16255 }
16256
16257 assert(curr_e_offset >= curr_s_offset);
16258
16259 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16260
16261 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16262
16263 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16264
16265 curr_s_offset = curr_e_offset;
16266
16267 info_idx += num_pages;
16268
16269 continue;
16270 }
16271
16272 /* compute offset from this map entry's start */
16273 offset_in_object = curr_s_offset - map_entry->vme_start;
16274
16275 /* compute offset into this map entry's object (or submap) */
16276 offset_in_object += VME_OFFSET(map_entry);
16277
16278 if (map_entry->is_sub_map) {
16279 vm_map_t sub_map = VM_MAP_NULL;
16280 vm_page_info_t submap_info = 0;
16281 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16282
16283 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16284
16285 submap_s_offset = offset_in_object;
16286 submap_e_offset = submap_s_offset + range_len;
16287
16288 sub_map = VME_SUBMAP(map_entry);
16289
16290 vm_map_reference(sub_map);
16291 vm_map_unlock_read(map);
16292
16293 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16294
16295 retval = vm_map_page_range_info_internal(sub_map,
16296 submap_s_offset,
16297 submap_e_offset,
16298 VM_PAGE_INFO_BASIC,
16299 (vm_page_info_t) submap_info,
16300 count);
16301
16302 assert(retval == KERN_SUCCESS);
16303
16304 vm_map_lock_read(map);
16305 vm_map_deallocate(sub_map);
16306
16307 /* Move the "info" index by the number of pages we inspected.*/
16308 info_idx += range_len >> PAGE_SHIFT;
16309
16310 /* Move our current offset by the size of the range we inspected.*/
16311 curr_s_offset += range_len;
16312
16313 continue;
16314 }
16315
16316 object = VME_OBJECT(map_entry);
16317 if (object == VM_OBJECT_NULL) {
16318
16319 /*
16320 * We don't have an object here and, hence,
16321 * no pages to inspect. We'll fill up the
16322 * info structure appropriately.
16323 */
16324
16325 curr_e_offset = MIN(map_entry->vme_end, end);
16326
16327 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16328
16329 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16330
16331 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16332
16333 curr_s_offset = curr_e_offset;
16334
16335 info_idx += num_pages;
16336
16337 continue;
16338 }
16339
16340 if (do_region_footprint) {
16341 int pmap_disp;
16342
16343 disposition = 0;
16344 pmap_disp = 0;
16345 pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
16346 if (map_entry->iokit_acct &&
16347 object->internal &&
16348 object->purgable == VM_PURGABLE_DENY) {
16349 /*
16350 * Non-purgeable IOKit memory: phys_footprint
16351 * includes the entire virtual mapping.
16352 */
16353 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16354 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16355 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16356 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
16357 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
16358 /* alternate accounting */
16359 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16360 pmap_disp = 0;
16361 } else {
16362 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
16363 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16364 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16365 disposition |= VM_PAGE_QUERY_PAGE_REF;
16366 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
16367 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16368 } else {
16369 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16370 }
16371 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
16372 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
16373 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16374 }
16375 }
16376 switch (flavor) {
16377 case VM_PAGE_INFO_BASIC:
16378 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16379 basic_info->disposition = disposition;
16380 basic_info->ref_count = 1;
16381 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
16382 basic_info->offset = 0;
16383 basic_info->depth = 0;
16384
16385 info_idx++;
16386 break;
16387 }
16388 curr_s_offset += PAGE_SIZE;
16389 continue;
16390 }
16391
16392 vm_object_reference(object);
16393 /*
16394 * Shared mode -- so we can allow other readers
16395 * to grab the lock too.
16396 */
16397 vm_object_lock_shared(object);
16398
16399 curr_e_offset = MIN(map_entry->vme_end, end);
16400
16401 vm_map_unlock_read(map);
16402
16403 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
16404
16405 curr_object = object;
16406
16407 for (; curr_s_offset < curr_e_offset;) {
16408
16409 if (object == curr_object) {
16410 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
16411 } else {
16412 ref_count = curr_object->ref_count;
16413 }
16414
16415 curr_offset_in_object = offset_in_object;
16416
16417 for (;;) {
16418 m = vm_page_lookup(curr_object, curr_offset_in_object);
16419
16420 if (m != VM_PAGE_NULL) {
16421
16422 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16423 break;
16424
16425 } else {
16426 if (curr_object->internal &&
16427 curr_object->alive &&
16428 !curr_object->terminating &&
16429 curr_object->pager_ready) {
16430
16431 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
16432 == VM_EXTERNAL_STATE_EXISTS) {
16433 /* the pager has that page */
16434 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16435 break;
16436 }
16437 }
16438
16439 /*
16440 * Go down the VM object shadow chain until we find the page
16441 * we're looking for.
16442 */
16443
16444 if (curr_object->shadow != VM_OBJECT_NULL) {
16445 vm_object_t shadow = VM_OBJECT_NULL;
16446
16447 curr_offset_in_object += curr_object->vo_shadow_offset;
16448 shadow = curr_object->shadow;
16449
16450 vm_object_lock_shared(shadow);
16451 vm_object_unlock(curr_object);
16452
16453 curr_object = shadow;
16454 depth++;
16455 continue;
16456 } else {
16457
16458 break;
16459 }
16460 }
16461 }
16462
16463 /* The ref_count is not strictly accurate, it measures the number */
16464 /* of entities holding a ref on the object, they may not be mapping */
16465 /* the object or may not be mapping the section holding the */
16466 /* target page but its still a ball park number and though an over- */
16467 /* count, it picks up the copy-on-write cases */
16468
16469 /* We could also get a picture of page sharing from pmap_attributes */
16470 /* but this would under count as only faulted-in mappings would */
16471 /* show up. */
16472
16473 if ((curr_object == object) && curr_object->shadow)
16474 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
16475
16476 if (! curr_object->internal)
16477 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16478
16479 if (m != VM_PAGE_NULL) {
16480
16481 if (m->fictitious) {
16482
16483 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
16484
16485 } else {
16486 if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
16487 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16488
16489 if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
16490 disposition |= VM_PAGE_QUERY_PAGE_REF;
16491
16492 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
16493 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
16494
16495 if (m->cs_validated)
16496 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
16497 if (m->cs_tainted)
16498 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
16499 if (m->cs_nx)
16500 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
16501 }
16502 }
16503
16504 switch (flavor) {
16505 case VM_PAGE_INFO_BASIC:
16506 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16507 basic_info->disposition = disposition;
16508 basic_info->ref_count = ref_count;
16509 basic_info->object_id = (vm_object_id_t) (uintptr_t)
16510 VM_KERNEL_ADDRPERM(curr_object);
16511 basic_info->offset =
16512 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
16513 basic_info->depth = depth;
16514
16515 info_idx++;
16516 break;
16517 }
16518
16519 disposition = 0;
16520 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
16521
16522 /*
16523 * Move to next offset in the range and in our object.
16524 */
16525 curr_s_offset += PAGE_SIZE;
16526 offset_in_object += PAGE_SIZE;
16527 curr_offset_in_object = offset_in_object;
16528
16529 if (curr_object != object) {
16530
16531 vm_object_unlock(curr_object);
16532
16533 curr_object = object;
16534
16535 vm_object_lock_shared(curr_object);
16536 } else {
16537
16538 vm_object_lock_yield_shared(curr_object);
16539 }
16540 }
16541
16542 vm_object_unlock(curr_object);
16543 vm_object_deallocate(curr_object);
16544
16545 vm_map_lock_read(map);
16546 }
16547
16548 vm_map_unlock_read(map);
16549 return retval;
16550 }
16551
16552 /*
16553 * vm_map_msync
16554 *
16555 * Synchronises the memory range specified with its backing store
16556 * image by either flushing or cleaning the contents to the appropriate
16557 * memory manager engaging in a memory object synchronize dialog with
16558 * the manager. The client doesn't return until the manager issues
16559 * m_o_s_completed message. MIG Magically converts user task parameter
16560 * to the task's address map.
16561 *
16562 * interpretation of sync_flags
16563 * VM_SYNC_INVALIDATE - discard pages, only return precious
16564 * pages to manager.
16565 *
16566 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
16567 * - discard pages, write dirty or precious
16568 * pages back to memory manager.
16569 *
16570 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
16571 * - write dirty or precious pages back to
16572 * the memory manager.
16573 *
16574 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
16575 * is a hole in the region, and we would
16576 * have returned KERN_SUCCESS, return
16577 * KERN_INVALID_ADDRESS instead.
16578 *
16579 * NOTE
16580 * The memory object attributes have not yet been implemented, this
16581 * function will have to deal with the invalidate attribute
16582 *
16583 * RETURNS
16584 * KERN_INVALID_TASK Bad task parameter
16585 * KERN_INVALID_ARGUMENT both sync and async were specified.
16586 * KERN_SUCCESS The usual.
16587 * KERN_INVALID_ADDRESS There was a hole in the region.
16588 */
16589
16590 kern_return_t
16591 vm_map_msync(
16592 vm_map_t map,
16593 vm_map_address_t address,
16594 vm_map_size_t size,
16595 vm_sync_t sync_flags)
16596 {
16597 vm_map_entry_t entry;
16598 vm_map_size_t amount_left;
16599 vm_object_offset_t offset;
16600 boolean_t do_sync_req;
16601 boolean_t had_hole = FALSE;
16602 vm_map_offset_t pmap_offset;
16603
16604 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
16605 (sync_flags & VM_SYNC_SYNCHRONOUS))
16606 return(KERN_INVALID_ARGUMENT);
16607
16608 /*
16609 * align address and size on page boundaries
16610 */
16611 size = (vm_map_round_page(address + size,
16612 VM_MAP_PAGE_MASK(map)) -
16613 vm_map_trunc_page(address,
16614 VM_MAP_PAGE_MASK(map)));
16615 address = vm_map_trunc_page(address,
16616 VM_MAP_PAGE_MASK(map));
16617
16618 if (map == VM_MAP_NULL)
16619 return(KERN_INVALID_TASK);
16620
16621 if (size == 0)
16622 return(KERN_SUCCESS);
16623
16624 amount_left = size;
16625
16626 while (amount_left > 0) {
16627 vm_object_size_t flush_size;
16628 vm_object_t object;
16629
16630 vm_map_lock(map);
16631 if (!vm_map_lookup_entry(map,
16632 address,
16633 &entry)) {
16634
16635 vm_map_size_t skip;
16636
16637 /*
16638 * hole in the address map.
16639 */
16640 had_hole = TRUE;
16641
16642 if (sync_flags & VM_SYNC_KILLPAGES) {
16643 /*
16644 * For VM_SYNC_KILLPAGES, there should be
16645 * no holes in the range, since we couldn't
16646 * prevent someone else from allocating in
16647 * that hole and we wouldn't want to "kill"
16648 * their pages.
16649 */
16650 vm_map_unlock(map);
16651 break;
16652 }
16653
16654 /*
16655 * Check for empty map.
16656 */
16657 if (entry == vm_map_to_entry(map) &&
16658 entry->vme_next == entry) {
16659 vm_map_unlock(map);
16660 break;
16661 }
16662 /*
16663 * Check that we don't wrap and that
16664 * we have at least one real map entry.
16665 */
16666 if ((map->hdr.nentries == 0) ||
16667 (entry->vme_next->vme_start < address)) {
16668 vm_map_unlock(map);
16669 break;
16670 }
16671 /*
16672 * Move up to the next entry if needed
16673 */
16674 skip = (entry->vme_next->vme_start - address);
16675 if (skip >= amount_left)
16676 amount_left = 0;
16677 else
16678 amount_left -= skip;
16679 address = entry->vme_next->vme_start;
16680 vm_map_unlock(map);
16681 continue;
16682 }
16683
16684 offset = address - entry->vme_start;
16685 pmap_offset = address;
16686
16687 /*
16688 * do we have more to flush than is contained in this
16689 * entry ?
16690 */
16691 if (amount_left + entry->vme_start + offset > entry->vme_end) {
16692 flush_size = entry->vme_end -
16693 (entry->vme_start + offset);
16694 } else {
16695 flush_size = amount_left;
16696 }
16697 amount_left -= flush_size;
16698 address += flush_size;
16699
16700 if (entry->is_sub_map == TRUE) {
16701 vm_map_t local_map;
16702 vm_map_offset_t local_offset;
16703
16704 local_map = VME_SUBMAP(entry);
16705 local_offset = VME_OFFSET(entry);
16706 vm_map_unlock(map);
16707 if (vm_map_msync(
16708 local_map,
16709 local_offset,
16710 flush_size,
16711 sync_flags) == KERN_INVALID_ADDRESS) {
16712 had_hole = TRUE;
16713 }
16714 continue;
16715 }
16716 object = VME_OBJECT(entry);
16717
16718 /*
16719 * We can't sync this object if the object has not been
16720 * created yet
16721 */
16722 if (object == VM_OBJECT_NULL) {
16723 vm_map_unlock(map);
16724 continue;
16725 }
16726 offset += VME_OFFSET(entry);
16727
16728 vm_object_lock(object);
16729
16730 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
16731 int kill_pages = 0;
16732 boolean_t reusable_pages = FALSE;
16733
16734 if (sync_flags & VM_SYNC_KILLPAGES) {
16735 if (((object->ref_count == 1) ||
16736 ((object->copy_strategy !=
16737 MEMORY_OBJECT_COPY_SYMMETRIC) &&
16738 (object->copy == VM_OBJECT_NULL))) &&
16739 (object->shadow == VM_OBJECT_NULL)) {
16740 if (object->ref_count != 1) {
16741 vm_page_stats_reusable.free_shared++;
16742 }
16743 kill_pages = 1;
16744 } else {
16745 kill_pages = -1;
16746 }
16747 }
16748 if (kill_pages != -1)
16749 vm_object_deactivate_pages(
16750 object,
16751 offset,
16752 (vm_object_size_t) flush_size,
16753 kill_pages,
16754 reusable_pages,
16755 map->pmap,
16756 pmap_offset);
16757 vm_object_unlock(object);
16758 vm_map_unlock(map);
16759 continue;
16760 }
16761 /*
16762 * We can't sync this object if there isn't a pager.
16763 * Don't bother to sync internal objects, since there can't
16764 * be any "permanent" storage for these objects anyway.
16765 */
16766 if ((object->pager == MEMORY_OBJECT_NULL) ||
16767 (object->internal) || (object->private)) {
16768 vm_object_unlock(object);
16769 vm_map_unlock(map);
16770 continue;
16771 }
16772 /*
16773 * keep reference on the object until syncing is done
16774 */
16775 vm_object_reference_locked(object);
16776 vm_object_unlock(object);
16777
16778 vm_map_unlock(map);
16779
16780 do_sync_req = vm_object_sync(object,
16781 offset,
16782 flush_size,
16783 sync_flags & VM_SYNC_INVALIDATE,
16784 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
16785 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
16786 sync_flags & VM_SYNC_SYNCHRONOUS);
16787
16788 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
16789 /*
16790 * clear out the clustering and read-ahead hints
16791 */
16792 vm_object_lock(object);
16793
16794 object->pages_created = 0;
16795 object->pages_used = 0;
16796 object->sequential = 0;
16797 object->last_alloc = 0;
16798
16799 vm_object_unlock(object);
16800 }
16801 vm_object_deallocate(object);
16802 } /* while */
16803
16804 /* for proper msync() behaviour */
16805 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
16806 return(KERN_INVALID_ADDRESS);
16807
16808 return(KERN_SUCCESS);
16809 }/* vm_msync */
16810
16811 /*
16812 * Routine: convert_port_entry_to_map
16813 * Purpose:
16814 * Convert from a port specifying an entry or a task
16815 * to a map. Doesn't consume the port ref; produces a map ref,
16816 * which may be null. Unlike convert_port_to_map, the
16817 * port may be task or a named entry backed.
16818 * Conditions:
16819 * Nothing locked.
16820 */
16821
16822
16823 vm_map_t
16824 convert_port_entry_to_map(
16825 ipc_port_t port)
16826 {
16827 vm_map_t map;
16828 vm_named_entry_t named_entry;
16829 uint32_t try_failed_count = 0;
16830
16831 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16832 while(TRUE) {
16833 ip_lock(port);
16834 if(ip_active(port) && (ip_kotype(port)
16835 == IKOT_NAMED_ENTRY)) {
16836 named_entry =
16837 (vm_named_entry_t)port->ip_kobject;
16838 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16839 ip_unlock(port);
16840
16841 try_failed_count++;
16842 mutex_pause(try_failed_count);
16843 continue;
16844 }
16845 named_entry->ref_count++;
16846 lck_mtx_unlock(&(named_entry)->Lock);
16847 ip_unlock(port);
16848 if ((named_entry->is_sub_map) &&
16849 (named_entry->protection
16850 & VM_PROT_WRITE)) {
16851 map = named_entry->backing.map;
16852 } else {
16853 mach_destroy_memory_entry(port);
16854 return VM_MAP_NULL;
16855 }
16856 vm_map_reference_swap(map);
16857 mach_destroy_memory_entry(port);
16858 break;
16859 }
16860 else
16861 return VM_MAP_NULL;
16862 }
16863 }
16864 else
16865 map = convert_port_to_map(port);
16866
16867 return map;
16868 }
16869
16870 /*
16871 * Routine: convert_port_entry_to_object
16872 * Purpose:
16873 * Convert from a port specifying a named entry to an
16874 * object. Doesn't consume the port ref; produces a map ref,
16875 * which may be null.
16876 * Conditions:
16877 * Nothing locked.
16878 */
16879
16880
16881 vm_object_t
16882 convert_port_entry_to_object(
16883 ipc_port_t port)
16884 {
16885 vm_object_t object = VM_OBJECT_NULL;
16886 vm_named_entry_t named_entry;
16887 uint32_t try_failed_count = 0;
16888
16889 if (IP_VALID(port) &&
16890 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16891 try_again:
16892 ip_lock(port);
16893 if (ip_active(port) &&
16894 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16895 named_entry = (vm_named_entry_t)port->ip_kobject;
16896 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16897 ip_unlock(port);
16898 try_failed_count++;
16899 mutex_pause(try_failed_count);
16900 goto try_again;
16901 }
16902 named_entry->ref_count++;
16903 lck_mtx_unlock(&(named_entry)->Lock);
16904 ip_unlock(port);
16905 if (!(named_entry->is_sub_map) &&
16906 !(named_entry->is_copy) &&
16907 (named_entry->protection & VM_PROT_WRITE)) {
16908 object = named_entry->backing.object;
16909 vm_object_reference(object);
16910 }
16911 mach_destroy_memory_entry(port);
16912 }
16913 }
16914
16915 return object;
16916 }
16917
16918 /*
16919 * Export routines to other components for the things we access locally through
16920 * macros.
16921 */
16922 #undef current_map
16923 vm_map_t
16924 current_map(void)
16925 {
16926 return (current_map_fast());
16927 }
16928
16929 /*
16930 * vm_map_reference:
16931 *
16932 * Most code internal to the osfmk will go through a
16933 * macro defining this. This is always here for the
16934 * use of other kernel components.
16935 */
16936 #undef vm_map_reference
16937 void
16938 vm_map_reference(
16939 vm_map_t map)
16940 {
16941 if (map == VM_MAP_NULL)
16942 return;
16943
16944 lck_mtx_lock(&map->s_lock);
16945 #if TASK_SWAPPER
16946 assert(map->res_count > 0);
16947 assert(map->ref_count >= map->res_count);
16948 map->res_count++;
16949 #endif
16950 map->ref_count++;
16951 lck_mtx_unlock(&map->s_lock);
16952 }
16953
16954 /*
16955 * vm_map_deallocate:
16956 *
16957 * Removes a reference from the specified map,
16958 * destroying it if no references remain.
16959 * The map should not be locked.
16960 */
16961 void
16962 vm_map_deallocate(
16963 vm_map_t map)
16964 {
16965 unsigned int ref;
16966
16967 if (map == VM_MAP_NULL)
16968 return;
16969
16970 lck_mtx_lock(&map->s_lock);
16971 ref = --map->ref_count;
16972 if (ref > 0) {
16973 vm_map_res_deallocate(map);
16974 lck_mtx_unlock(&map->s_lock);
16975 return;
16976 }
16977 assert(map->ref_count == 0);
16978 lck_mtx_unlock(&map->s_lock);
16979
16980 #if TASK_SWAPPER
16981 /*
16982 * The map residence count isn't decremented here because
16983 * the vm_map_delete below will traverse the entire map,
16984 * deleting entries, and the residence counts on objects
16985 * and sharing maps will go away then.
16986 */
16987 #endif
16988
16989 vm_map_destroy(map, VM_MAP_NO_FLAGS);
16990 }
16991
16992
16993 void
16994 vm_map_disable_NX(vm_map_t map)
16995 {
16996 if (map == NULL)
16997 return;
16998 if (map->pmap == NULL)
16999 return;
17000
17001 pmap_disable_NX(map->pmap);
17002 }
17003
17004 void
17005 vm_map_disallow_data_exec(vm_map_t map)
17006 {
17007 if (map == NULL)
17008 return;
17009
17010 map->map_disallow_data_exec = TRUE;
17011 }
17012
17013 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17014 * more descriptive.
17015 */
17016 void
17017 vm_map_set_32bit(vm_map_t map)
17018 {
17019 #if defined(__arm__) || defined(__arm64__)
17020 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17021 #else
17022 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17023 #endif
17024 }
17025
17026
17027 void
17028 vm_map_set_64bit(vm_map_t map)
17029 {
17030 #if defined(__arm__) || defined(__arm64__)
17031 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
17032 #else
17033 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
17034 #endif
17035 }
17036
17037 /*
17038 * Expand the maximum size of an existing map.
17039 */
17040 void
17041 vm_map_set_jumbo(vm_map_t map)
17042 {
17043 #if defined (__arm64__)
17044 vm_map_offset_t old_max_offset = map->max_offset;
17045 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_JUMBO);
17046 if (map->holes_list->prev->vme_end == pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE)) {
17047 /*
17048 * There is already a hole at the end of the map; simply make it bigger.
17049 */
17050 map->holes_list->prev->vme_end = map->max_offset;
17051 } else {
17052 /*
17053 * There is no hole at the end, so we need to create a new hole
17054 * for the new empty space we're creating.
17055 */
17056 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
17057 new_hole->start = old_max_offset;
17058 new_hole->end = map->max_offset;
17059 new_hole->prev = map->holes_list->prev;
17060 new_hole->next = (struct vm_map_entry *)map->holes_list;
17061 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
17062 map->holes_list->prev = (struct vm_map_entry *)new_hole;
17063 }
17064 #else /* arm64 */
17065 (void) map;
17066 #endif
17067 }
17068
17069 vm_map_offset_t
17070 vm_compute_max_offset(boolean_t is64)
17071 {
17072 #if defined(__arm__) || defined(__arm64__)
17073 return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
17074 #else
17075 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
17076 #endif
17077 }
17078
17079 void
17080 vm_map_get_max_aslr_slide_section(
17081 vm_map_t map __unused,
17082 int64_t *max_sections,
17083 int64_t *section_size)
17084 {
17085 #if defined(__arm64__)
17086 *max_sections = 3;
17087 *section_size = ARM_TT_TWIG_SIZE;
17088 #else
17089 *max_sections = 1;
17090 *section_size = 0;
17091 #endif
17092 }
17093
17094 uint64_t
17095 vm_map_get_max_aslr_slide_pages(vm_map_t map)
17096 {
17097 #if defined(__arm64__)
17098 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
17099 * limited embedded address space; this is also meant to minimize pmap
17100 * memory usage on 16KB page systems.
17101 */
17102 return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
17103 #else
17104 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17105 #endif
17106 }
17107
17108 uint64_t
17109 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
17110 {
17111 #if defined(__arm64__)
17112 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
17113 * of independent entropy on 16KB page systems.
17114 */
17115 return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
17116 #else
17117 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
17118 #endif
17119 }
17120
17121 #ifndef __arm__
17122 boolean_t
17123 vm_map_is_64bit(
17124 vm_map_t map)
17125 {
17126 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
17127 }
17128 #endif
17129
17130 boolean_t
17131 vm_map_has_hard_pagezero(
17132 vm_map_t map,
17133 vm_map_offset_t pagezero_size)
17134 {
17135 /*
17136 * XXX FBDP
17137 * We should lock the VM map (for read) here but we can get away
17138 * with it for now because there can't really be any race condition:
17139 * the VM map's min_offset is changed only when the VM map is created
17140 * and when the zero page is established (when the binary gets loaded),
17141 * and this routine gets called only when the task terminates and the
17142 * VM map is being torn down, and when a new map is created via
17143 * load_machfile()/execve().
17144 */
17145 return (map->min_offset >= pagezero_size);
17146 }
17147
17148 /*
17149 * Raise a VM map's maximun offset.
17150 */
17151 kern_return_t
17152 vm_map_raise_max_offset(
17153 vm_map_t map,
17154 vm_map_offset_t new_max_offset)
17155 {
17156 kern_return_t ret;
17157
17158 vm_map_lock(map);
17159 ret = KERN_INVALID_ADDRESS;
17160
17161 if (new_max_offset >= map->max_offset) {
17162 if (!vm_map_is_64bit(map)) {
17163 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
17164 map->max_offset = new_max_offset;
17165 ret = KERN_SUCCESS;
17166 }
17167 } else {
17168 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
17169 map->max_offset = new_max_offset;
17170 ret = KERN_SUCCESS;
17171 }
17172 }
17173 }
17174
17175 vm_map_unlock(map);
17176 return ret;
17177 }
17178
17179
17180 /*
17181 * Raise a VM map's minimum offset.
17182 * To strictly enforce "page zero" reservation.
17183 */
17184 kern_return_t
17185 vm_map_raise_min_offset(
17186 vm_map_t map,
17187 vm_map_offset_t new_min_offset)
17188 {
17189 vm_map_entry_t first_entry;
17190
17191 new_min_offset = vm_map_round_page(new_min_offset,
17192 VM_MAP_PAGE_MASK(map));
17193
17194 vm_map_lock(map);
17195
17196 if (new_min_offset < map->min_offset) {
17197 /*
17198 * Can't move min_offset backwards, as that would expose
17199 * a part of the address space that was previously, and for
17200 * possibly good reasons, inaccessible.
17201 */
17202 vm_map_unlock(map);
17203 return KERN_INVALID_ADDRESS;
17204 }
17205 if (new_min_offset >= map->max_offset) {
17206 /* can't go beyond the end of the address space */
17207 vm_map_unlock(map);
17208 return KERN_INVALID_ADDRESS;
17209 }
17210
17211 first_entry = vm_map_first_entry(map);
17212 if (first_entry != vm_map_to_entry(map) &&
17213 first_entry->vme_start < new_min_offset) {
17214 /*
17215 * Some memory was already allocated below the new
17216 * minimun offset. It's too late to change it now...
17217 */
17218 vm_map_unlock(map);
17219 return KERN_NO_SPACE;
17220 }
17221
17222 map->min_offset = new_min_offset;
17223
17224 assert(map->holes_list);
17225 map->holes_list->start = new_min_offset;
17226 assert(new_min_offset < map->holes_list->end);
17227
17228 vm_map_unlock(map);
17229
17230 return KERN_SUCCESS;
17231 }
17232
17233 /*
17234 * Set the limit on the maximum amount of user wired memory allowed for this map.
17235 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
17236 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
17237 * don't have to reach over to the BSD data structures.
17238 */
17239
17240 void
17241 vm_map_set_user_wire_limit(vm_map_t map,
17242 vm_size_t limit)
17243 {
17244 map->user_wire_limit = limit;
17245 }
17246
17247
17248 void vm_map_switch_protect(vm_map_t map,
17249 boolean_t val)
17250 {
17251 vm_map_lock(map);
17252 map->switch_protect=val;
17253 vm_map_unlock(map);
17254 }
17255
17256 /*
17257 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
17258 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
17259 * bump both counters.
17260 */
17261 void
17262 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17263 {
17264 pmap_t pmap = vm_map_pmap(map);
17265
17266 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17267 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17268 }
17269
17270 void
17271 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17272 {
17273 pmap_t pmap = vm_map_pmap(map);
17274
17275 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17276 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17277 }
17278
17279 /* Add (generate) code signature for memory range */
17280 #if CONFIG_DYNAMIC_CODE_SIGNING
17281 kern_return_t vm_map_sign(vm_map_t map,
17282 vm_map_offset_t start,
17283 vm_map_offset_t end)
17284 {
17285 vm_map_entry_t entry;
17286 vm_page_t m;
17287 vm_object_t object;
17288
17289 /*
17290 * Vet all the input parameters and current type and state of the
17291 * underlaying object. Return with an error if anything is amiss.
17292 */
17293 if (map == VM_MAP_NULL)
17294 return(KERN_INVALID_ARGUMENT);
17295
17296 vm_map_lock_read(map);
17297
17298 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
17299 /*
17300 * Must pass a valid non-submap address.
17301 */
17302 vm_map_unlock_read(map);
17303 return(KERN_INVALID_ADDRESS);
17304 }
17305
17306 if((entry->vme_start > start) || (entry->vme_end < end)) {
17307 /*
17308 * Map entry doesn't cover the requested range. Not handling
17309 * this situation currently.
17310 */
17311 vm_map_unlock_read(map);
17312 return(KERN_INVALID_ARGUMENT);
17313 }
17314
17315 object = VME_OBJECT(entry);
17316 if (object == VM_OBJECT_NULL) {
17317 /*
17318 * Object must already be present or we can't sign.
17319 */
17320 vm_map_unlock_read(map);
17321 return KERN_INVALID_ARGUMENT;
17322 }
17323
17324 vm_object_lock(object);
17325 vm_map_unlock_read(map);
17326
17327 while(start < end) {
17328 uint32_t refmod;
17329
17330 m = vm_page_lookup(object,
17331 start - entry->vme_start + VME_OFFSET(entry));
17332 if (m==VM_PAGE_NULL) {
17333 /* shoud we try to fault a page here? we can probably
17334 * demand it exists and is locked for this request */
17335 vm_object_unlock(object);
17336 return KERN_FAILURE;
17337 }
17338 /* deal with special page status */
17339 if (m->busy ||
17340 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
17341 vm_object_unlock(object);
17342 return KERN_FAILURE;
17343 }
17344
17345 /* Page is OK... now "validate" it */
17346 /* This is the place where we'll call out to create a code
17347 * directory, later */
17348 m->cs_validated = TRUE;
17349
17350 /* The page is now "clean" for codesigning purposes. That means
17351 * we don't consider it as modified (wpmapped) anymore. But
17352 * we'll disconnect the page so we note any future modification
17353 * attempts. */
17354 m->wpmapped = FALSE;
17355 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
17356
17357 /* Pull the dirty status from the pmap, since we cleared the
17358 * wpmapped bit */
17359 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
17360 SET_PAGE_DIRTY(m, FALSE);
17361 }
17362
17363 /* On to the next page */
17364 start += PAGE_SIZE;
17365 }
17366 vm_object_unlock(object);
17367
17368 return KERN_SUCCESS;
17369 }
17370 #endif
17371
17372 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
17373 {
17374 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
17375 vm_map_entry_t next_entry;
17376 kern_return_t kr = KERN_SUCCESS;
17377 vm_map_t zap_map;
17378
17379 vm_map_lock(map);
17380
17381 /*
17382 * We use a "zap_map" to avoid having to unlock
17383 * the "map" in vm_map_delete().
17384 */
17385 zap_map = vm_map_create(PMAP_NULL,
17386 map->min_offset,
17387 map->max_offset,
17388 map->hdr.entries_pageable);
17389
17390 if (zap_map == VM_MAP_NULL) {
17391 return KERN_RESOURCE_SHORTAGE;
17392 }
17393
17394 vm_map_set_page_shift(zap_map,
17395 VM_MAP_PAGE_SHIFT(map));
17396 vm_map_disable_hole_optimization(zap_map);
17397
17398 for (entry = vm_map_first_entry(map);
17399 entry != vm_map_to_entry(map);
17400 entry = next_entry) {
17401 next_entry = entry->vme_next;
17402
17403 if (VME_OBJECT(entry) &&
17404 !entry->is_sub_map &&
17405 (VME_OBJECT(entry)->internal == TRUE) &&
17406 (VME_OBJECT(entry)->ref_count == 1)) {
17407
17408 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
17409 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
17410
17411 (void)vm_map_delete(map,
17412 entry->vme_start,
17413 entry->vme_end,
17414 VM_MAP_REMOVE_SAVE_ENTRIES,
17415 zap_map);
17416 }
17417 }
17418
17419 vm_map_unlock(map);
17420
17421 /*
17422 * Get rid of the "zap_maps" and all the map entries that
17423 * they may still contain.
17424 */
17425 if (zap_map != VM_MAP_NULL) {
17426 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
17427 zap_map = VM_MAP_NULL;
17428 }
17429
17430 return kr;
17431 }
17432
17433
17434 #if DEVELOPMENT || DEBUG
17435
17436 int
17437 vm_map_disconnect_page_mappings(
17438 vm_map_t map,
17439 boolean_t do_unnest)
17440 {
17441 vm_map_entry_t entry;
17442 int page_count = 0;
17443
17444 if (do_unnest == TRUE) {
17445 #ifndef NO_NESTED_PMAP
17446 vm_map_lock(map);
17447
17448 for (entry = vm_map_first_entry(map);
17449 entry != vm_map_to_entry(map);
17450 entry = entry->vme_next) {
17451
17452 if (entry->is_sub_map && entry->use_pmap) {
17453 /*
17454 * Make sure the range between the start of this entry and
17455 * the end of this entry is no longer nested, so that
17456 * we will only remove mappings from the pmap in use by this
17457 * this task
17458 */
17459 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
17460 }
17461 }
17462 vm_map_unlock(map);
17463 #endif
17464 }
17465 vm_map_lock_read(map);
17466
17467 page_count = map->pmap->stats.resident_count;
17468
17469 for (entry = vm_map_first_entry(map);
17470 entry != vm_map_to_entry(map);
17471 entry = entry->vme_next) {
17472
17473 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
17474 (VME_OBJECT(entry)->phys_contiguous))) {
17475 continue;
17476 }
17477 if (entry->is_sub_map)
17478 assert(!entry->use_pmap);
17479
17480 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
17481 }
17482 vm_map_unlock_read(map);
17483
17484 return page_count;
17485 }
17486
17487 #endif
17488
17489
17490 #if CONFIG_FREEZE
17491
17492
17493 int c_freezer_swapout_count;
17494 int c_freezer_compression_count = 0;
17495 AbsoluteTime c_freezer_last_yield_ts = 0;
17496
17497 kern_return_t vm_map_freeze(
17498 vm_map_t map,
17499 unsigned int *purgeable_count,
17500 unsigned int *wired_count,
17501 unsigned int *clean_count,
17502 unsigned int *dirty_count,
17503 __unused unsigned int dirty_budget,
17504 boolean_t *has_shared)
17505 {
17506 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
17507 kern_return_t kr = KERN_SUCCESS;
17508
17509 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
17510 *has_shared = FALSE;
17511
17512 /*
17513 * We need the exclusive lock here so that we can
17514 * block any page faults or lookups while we are
17515 * in the middle of freezing this vm map.
17516 */
17517 vm_map_lock(map);
17518
17519 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
17520
17521 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17522 kr = KERN_NO_SPACE;
17523 goto done;
17524 }
17525
17526 c_freezer_compression_count = 0;
17527 clock_get_uptime(&c_freezer_last_yield_ts);
17528
17529 for (entry2 = vm_map_first_entry(map);
17530 entry2 != vm_map_to_entry(map);
17531 entry2 = entry2->vme_next) {
17532
17533 vm_object_t src_object = VME_OBJECT(entry2);
17534
17535 if (src_object &&
17536 !entry2->is_sub_map &&
17537 !src_object->phys_contiguous) {
17538 /* If eligible, scan the entry, moving eligible pages over to our parent object */
17539
17540 if (src_object->internal == TRUE) {
17541
17542 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17543 /*
17544 * Pages belonging to this object could be swapped to disk.
17545 * Make sure it's not a shared object because we could end
17546 * up just bringing it back in again.
17547 */
17548 if (src_object->ref_count > 1) {
17549 continue;
17550 }
17551 }
17552 vm_object_compressed_freezer_pageout(src_object);
17553
17554 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17555 kr = KERN_NO_SPACE;
17556 break;
17557 }
17558 }
17559 }
17560 }
17561 done:
17562 vm_map_unlock(map);
17563
17564 vm_object_compressed_freezer_done();
17565
17566 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17567 /*
17568 * reset the counter tracking the # of swapped c_segs
17569 * because we are now done with this freeze session and task.
17570 */
17571 c_freezer_swapout_count = 0;
17572 }
17573 return kr;
17574 }
17575
17576 #endif
17577
17578 /*
17579 * vm_map_entry_should_cow_for_true_share:
17580 *
17581 * Determines if the map entry should be clipped and setup for copy-on-write
17582 * to avoid applying "true_share" to a large VM object when only a subset is
17583 * targeted.
17584 *
17585 * For now, we target only the map entries created for the Objective C
17586 * Garbage Collector, which initially have the following properties:
17587 * - alias == VM_MEMORY_MALLOC
17588 * - wired_count == 0
17589 * - !needs_copy
17590 * and a VM object with:
17591 * - internal
17592 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
17593 * - !true_share
17594 * - vo_size == ANON_CHUNK_SIZE
17595 *
17596 * Only non-kernel map entries.
17597 */
17598 boolean_t
17599 vm_map_entry_should_cow_for_true_share(
17600 vm_map_entry_t entry)
17601 {
17602 vm_object_t object;
17603
17604 if (entry->is_sub_map) {
17605 /* entry does not point at a VM object */
17606 return FALSE;
17607 }
17608
17609 if (entry->needs_copy) {
17610 /* already set for copy_on_write: done! */
17611 return FALSE;
17612 }
17613
17614 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
17615 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
17616 /* not a malloc heap or Obj-C Garbage Collector heap */
17617 return FALSE;
17618 }
17619
17620 if (entry->wired_count) {
17621 /* wired: can't change the map entry... */
17622 vm_counters.should_cow_but_wired++;
17623 return FALSE;
17624 }
17625
17626 object = VME_OBJECT(entry);
17627
17628 if (object == VM_OBJECT_NULL) {
17629 /* no object yet... */
17630 return FALSE;
17631 }
17632
17633 if (!object->internal) {
17634 /* not an internal object */
17635 return FALSE;
17636 }
17637
17638 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17639 /* not the default copy strategy */
17640 return FALSE;
17641 }
17642
17643 if (object->true_share) {
17644 /* already true_share: too late to avoid it */
17645 return FALSE;
17646 }
17647
17648 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
17649 object->vo_size != ANON_CHUNK_SIZE) {
17650 /* ... not an object created for the ObjC Garbage Collector */
17651 return FALSE;
17652 }
17653
17654 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
17655 object->vo_size != 2048 * 4096) {
17656 /* ... not a "MALLOC_SMALL" heap */
17657 return FALSE;
17658 }
17659
17660 /*
17661 * All the criteria match: we have a large object being targeted for "true_share".
17662 * To limit the adverse side-effects linked with "true_share", tell the caller to
17663 * try and avoid setting up the entire object for "true_share" by clipping the
17664 * targeted range and setting it up for copy-on-write.
17665 */
17666 return TRUE;
17667 }
17668
17669 vm_map_offset_t
17670 vm_map_round_page_mask(
17671 vm_map_offset_t offset,
17672 vm_map_offset_t mask)
17673 {
17674 return VM_MAP_ROUND_PAGE(offset, mask);
17675 }
17676
17677 vm_map_offset_t
17678 vm_map_trunc_page_mask(
17679 vm_map_offset_t offset,
17680 vm_map_offset_t mask)
17681 {
17682 return VM_MAP_TRUNC_PAGE(offset, mask);
17683 }
17684
17685 boolean_t
17686 vm_map_page_aligned(
17687 vm_map_offset_t offset,
17688 vm_map_offset_t mask)
17689 {
17690 return ((offset) & mask) == 0;
17691 }
17692
17693 int
17694 vm_map_page_shift(
17695 vm_map_t map)
17696 {
17697 return VM_MAP_PAGE_SHIFT(map);
17698 }
17699
17700 int
17701 vm_map_page_size(
17702 vm_map_t map)
17703 {
17704 return VM_MAP_PAGE_SIZE(map);
17705 }
17706
17707 vm_map_offset_t
17708 vm_map_page_mask(
17709 vm_map_t map)
17710 {
17711 return VM_MAP_PAGE_MASK(map);
17712 }
17713
17714 kern_return_t
17715 vm_map_set_page_shift(
17716 vm_map_t map,
17717 int pageshift)
17718 {
17719 if (map->hdr.nentries != 0) {
17720 /* too late to change page size */
17721 return KERN_FAILURE;
17722 }
17723
17724 map->hdr.page_shift = pageshift;
17725
17726 return KERN_SUCCESS;
17727 }
17728
17729 kern_return_t
17730 vm_map_query_volatile(
17731 vm_map_t map,
17732 mach_vm_size_t *volatile_virtual_size_p,
17733 mach_vm_size_t *volatile_resident_size_p,
17734 mach_vm_size_t *volatile_compressed_size_p,
17735 mach_vm_size_t *volatile_pmap_size_p,
17736 mach_vm_size_t *volatile_compressed_pmap_size_p)
17737 {
17738 mach_vm_size_t volatile_virtual_size;
17739 mach_vm_size_t volatile_resident_count;
17740 mach_vm_size_t volatile_compressed_count;
17741 mach_vm_size_t volatile_pmap_count;
17742 mach_vm_size_t volatile_compressed_pmap_count;
17743 mach_vm_size_t resident_count;
17744 vm_map_entry_t entry;
17745 vm_object_t object;
17746
17747 /* map should be locked by caller */
17748
17749 volatile_virtual_size = 0;
17750 volatile_resident_count = 0;
17751 volatile_compressed_count = 0;
17752 volatile_pmap_count = 0;
17753 volatile_compressed_pmap_count = 0;
17754
17755 for (entry = vm_map_first_entry(map);
17756 entry != vm_map_to_entry(map);
17757 entry = entry->vme_next) {
17758 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
17759
17760 if (entry->is_sub_map) {
17761 continue;
17762 }
17763 if (! (entry->protection & VM_PROT_WRITE)) {
17764 continue;
17765 }
17766 object = VME_OBJECT(entry);
17767 if (object == VM_OBJECT_NULL) {
17768 continue;
17769 }
17770 if (object->purgable != VM_PURGABLE_VOLATILE &&
17771 object->purgable != VM_PURGABLE_EMPTY) {
17772 continue;
17773 }
17774 if (VME_OFFSET(entry)) {
17775 /*
17776 * If the map entry has been split and the object now
17777 * appears several times in the VM map, we don't want
17778 * to count the object's resident_page_count more than
17779 * once. We count it only for the first one, starting
17780 * at offset 0 and ignore the other VM map entries.
17781 */
17782 continue;
17783 }
17784 resident_count = object->resident_page_count;
17785 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
17786 resident_count = 0;
17787 } else {
17788 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
17789 }
17790
17791 volatile_virtual_size += entry->vme_end - entry->vme_start;
17792 volatile_resident_count += resident_count;
17793 if (object->pager) {
17794 volatile_compressed_count +=
17795 vm_compressor_pager_get_count(object->pager);
17796 }
17797 pmap_compressed_bytes = 0;
17798 pmap_resident_bytes =
17799 pmap_query_resident(map->pmap,
17800 entry->vme_start,
17801 entry->vme_end,
17802 &pmap_compressed_bytes);
17803 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
17804 volatile_compressed_pmap_count += (pmap_compressed_bytes
17805 / PAGE_SIZE);
17806 }
17807
17808 /* map is still locked on return */
17809
17810 *volatile_virtual_size_p = volatile_virtual_size;
17811 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
17812 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
17813 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
17814 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
17815
17816 return KERN_SUCCESS;
17817 }
17818
17819 void
17820 vm_map_sizes(vm_map_t map,
17821 vm_map_size_t * psize,
17822 vm_map_size_t * pfree,
17823 vm_map_size_t * plargest_free)
17824 {
17825 vm_map_entry_t entry;
17826 vm_map_offset_t prev;
17827 vm_map_size_t free, total_free, largest_free;
17828 boolean_t end;
17829
17830 if (!map)
17831 {
17832 *psize = *pfree = *plargest_free = 0;
17833 return;
17834 }
17835 total_free = largest_free = 0;
17836
17837 vm_map_lock_read(map);
17838 if (psize) *psize = map->max_offset - map->min_offset;
17839
17840 prev = map->min_offset;
17841 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
17842 {
17843 end = (entry == vm_map_to_entry(map));
17844
17845 if (end) free = entry->vme_end - prev;
17846 else free = entry->vme_start - prev;
17847
17848 total_free += free;
17849 if (free > largest_free) largest_free = free;
17850
17851 if (end) break;
17852 prev = entry->vme_end;
17853 }
17854 vm_map_unlock_read(map);
17855 if (pfree) *pfree = total_free;
17856 if (plargest_free) *plargest_free = largest_free;
17857 }
17858
17859 #if VM_SCAN_FOR_SHADOW_CHAIN
17860 int vm_map_shadow_max(vm_map_t map);
17861 int vm_map_shadow_max(
17862 vm_map_t map)
17863 {
17864 int shadows, shadows_max;
17865 vm_map_entry_t entry;
17866 vm_object_t object, next_object;
17867
17868 if (map == NULL)
17869 return 0;
17870
17871 shadows_max = 0;
17872
17873 vm_map_lock_read(map);
17874
17875 for (entry = vm_map_first_entry(map);
17876 entry != vm_map_to_entry(map);
17877 entry = entry->vme_next) {
17878 if (entry->is_sub_map) {
17879 continue;
17880 }
17881 object = VME_OBJECT(entry);
17882 if (object == NULL) {
17883 continue;
17884 }
17885 vm_object_lock_shared(object);
17886 for (shadows = 0;
17887 object->shadow != NULL;
17888 shadows++, object = next_object) {
17889 next_object = object->shadow;
17890 vm_object_lock_shared(next_object);
17891 vm_object_unlock(object);
17892 }
17893 vm_object_unlock(object);
17894 if (shadows > shadows_max) {
17895 shadows_max = shadows;
17896 }
17897 }
17898
17899 vm_map_unlock_read(map);
17900
17901 return shadows_max;
17902 }
17903 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
17904
17905 void vm_commit_pagezero_status(vm_map_t lmap) {
17906 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
17907 }
17908
17909 #if __x86_64__
17910 void
17911 vm_map_set_high_start(
17912 vm_map_t map,
17913 vm_map_offset_t high_start)
17914 {
17915 map->vmmap_high_start = high_start;
17916 }
17917 #endif /* __x86_64__ */