]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
121 #if CONFIG_EMBEDDED
122 extern int panic_on_unsigned_execute;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if __arm64__
127 extern const int fourk_binary_compatibility_unsafe;
128 extern const int fourk_binary_compatibility_allow_wx;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p);
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect = 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk = 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
139
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
141 int vm_map_executable_immutable_verbose = 0;
142
143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
144
145 extern u_int32_t random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
147 */
148
149 static void vm_map_simplify_range(
150 vm_map_t map,
151 vm_map_offset_t start,
152 vm_map_offset_t end); /* forward */
153
154 static boolean_t vm_map_range_check(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 vm_map_entry_t *entry);
159
160 static vm_map_entry_t _vm_map_entry_create(
161 struct vm_map_header *map_header, boolean_t map_locked);
162
163 static void _vm_map_entry_dispose(
164 struct vm_map_header *map_header,
165 vm_map_entry_t entry);
166
167 static void vm_map_pmap_enter(
168 vm_map_t map,
169 vm_map_offset_t addr,
170 vm_map_offset_t end_addr,
171 vm_object_t object,
172 vm_object_offset_t offset,
173 vm_prot_t protection);
174
175 static void _vm_map_clip_end(
176 struct vm_map_header *map_header,
177 vm_map_entry_t entry,
178 vm_map_offset_t end);
179
180 static void _vm_map_clip_start(
181 struct vm_map_header *map_header,
182 vm_map_entry_t entry,
183 vm_map_offset_t start);
184
185 static void vm_map_entry_delete(
186 vm_map_t map,
187 vm_map_entry_t entry);
188
189 static kern_return_t vm_map_delete(
190 vm_map_t map,
191 vm_map_offset_t start,
192 vm_map_offset_t end,
193 int flags,
194 vm_map_t zap_map);
195
196 static void vm_map_copy_insert(
197 vm_map_t map,
198 vm_map_entry_t after_where,
199 vm_map_copy_t copy);
200
201 static kern_return_t vm_map_copy_overwrite_unaligned(
202 vm_map_t dst_map,
203 vm_map_entry_t entry,
204 vm_map_copy_t copy,
205 vm_map_address_t start,
206 boolean_t discard_on_success);
207
208 static kern_return_t vm_map_copy_overwrite_aligned(
209 vm_map_t dst_map,
210 vm_map_entry_t tmp_entry,
211 vm_map_copy_t copy,
212 vm_map_offset_t start,
213 pmap_t pmap);
214
215 static kern_return_t vm_map_copyin_kernel_buffer(
216 vm_map_t src_map,
217 vm_map_address_t src_addr,
218 vm_map_size_t len,
219 boolean_t src_destroy,
220 vm_map_copy_t *copy_result); /* OUT */
221
222 static kern_return_t vm_map_copyout_kernel_buffer(
223 vm_map_t map,
224 vm_map_address_t *addr, /* IN/OUT */
225 vm_map_copy_t copy,
226 vm_map_size_t copy_size,
227 boolean_t overwrite,
228 boolean_t consume_on_success);
229
230 static void vm_map_fork_share(
231 vm_map_t old_map,
232 vm_map_entry_t old_entry,
233 vm_map_t new_map);
234
235 static boolean_t vm_map_fork_copy(
236 vm_map_t old_map,
237 vm_map_entry_t *old_entry_p,
238 vm_map_t new_map,
239 int vm_map_copyin_flags);
240
241 static kern_return_t vm_map_wire_nested(
242 vm_map_t map,
243 vm_map_offset_t start,
244 vm_map_offset_t end,
245 vm_prot_t caller_prot,
246 vm_tag_t tag,
247 boolean_t user_wire,
248 pmap_t map_pmap,
249 vm_map_offset_t pmap_addr,
250 ppnum_t *physpage_p);
251
252 static kern_return_t vm_map_unwire_nested(
253 vm_map_t map,
254 vm_map_offset_t start,
255 vm_map_offset_t end,
256 boolean_t user_wire,
257 pmap_t map_pmap,
258 vm_map_offset_t pmap_addr);
259
260 static kern_return_t vm_map_overwrite_submap_recurse(
261 vm_map_t dst_map,
262 vm_map_offset_t dst_addr,
263 vm_map_size_t dst_size);
264
265 static kern_return_t vm_map_copy_overwrite_nested(
266 vm_map_t dst_map,
267 vm_map_offset_t dst_addr,
268 vm_map_copy_t copy,
269 boolean_t interruptible,
270 pmap_t pmap,
271 boolean_t discard_on_success);
272
273 static kern_return_t vm_map_remap_extract(
274 vm_map_t map,
275 vm_map_offset_t addr,
276 vm_map_size_t size,
277 boolean_t copy,
278 struct vm_map_header *map_header,
279 vm_prot_t *cur_protection,
280 vm_prot_t *max_protection,
281 vm_inherit_t inheritance,
282 boolean_t pageable,
283 boolean_t same_map,
284 vm_map_kernel_flags_t vmk_flags);
285
286 static kern_return_t vm_map_remap_range_allocate(
287 vm_map_t map,
288 vm_map_address_t *address,
289 vm_map_size_t size,
290 vm_map_offset_t mask,
291 int flags,
292 vm_map_kernel_flags_t vmk_flags,
293 vm_tag_t tag,
294 vm_map_entry_t *map_entry);
295
296 static void vm_map_region_look_for_page(
297 vm_map_t map,
298 vm_map_offset_t va,
299 vm_object_t object,
300 vm_object_offset_t offset,
301 int max_refcnt,
302 int depth,
303 vm_region_extended_info_t extended,
304 mach_msg_type_number_t count);
305
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry,
308 vm_object_t object);
309
310
311 static kern_return_t vm_map_willneed(
312 vm_map_t map,
313 vm_map_offset_t start,
314 vm_map_offset_t end);
315
316 static kern_return_t vm_map_reuse_pages(
317 vm_map_t map,
318 vm_map_offset_t start,
319 vm_map_offset_t end);
320
321 static kern_return_t vm_map_reusable_pages(
322 vm_map_t map,
323 vm_map_offset_t start,
324 vm_map_offset_t end);
325
326 static kern_return_t vm_map_can_reuse(
327 vm_map_t map,
328 vm_map_offset_t start,
329 vm_map_offset_t end);
330
331 #if MACH_ASSERT
332 static kern_return_t vm_map_pageout(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
336 #endif /* MACH_ASSERT */
337
338 static void vm_map_corpse_footprint_destroy(
339 vm_map_t map);
340
341 pid_t find_largest_process_vm_map_entries(void);
342
343 /*
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
350 * vm_map_copyout.
351 */
352
353 #if CONFIG_EMBEDDED
354
355 /*
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
360 */
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
362 MACRO_BEGIN \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
365 MACRO_END
366
367 #else /* CONFIG_EMBEDDED */
368
369 /*
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
372 */
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
374 MACRO_BEGIN \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
377 MACRO_END
378
379 #endif /* CONFIG_EMBEDDED */
380
381 #define vm_map_entry_copy(NEW, OLD) \
382 MACRO_BEGIN \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
384 *(NEW) = *(OLD); \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
397 } \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
402 MACRO_END
403
404 #define vm_map_entry_copy_full(NEW, OLD) \
405 MACRO_BEGIN \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
407 (*(NEW) = *(OLD)); \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
409 MACRO_END
410
411 /*
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
414 */
415 __attribute__((always_inline))
416 int
417 vm_map_lock_read_to_write(vm_map_t map)
418 {
419 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
420 DTRACE_VM(vm_map_lock_upgrade);
421 return 0;
422 }
423 return 1;
424 }
425
426 __attribute__((always_inline))
427 boolean_t
428 vm_map_try_lock(vm_map_t map)
429 {
430 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
431 DTRACE_VM(vm_map_lock_w);
432 return TRUE;
433 }
434 return FALSE;
435 }
436
437 __attribute__((always_inline))
438 boolean_t
439 vm_map_try_lock_read(vm_map_t map)
440 {
441 if (lck_rw_try_lock_shared(&(map)->lock)) {
442 DTRACE_VM(vm_map_lock_r);
443 return TRUE;
444 }
445 return FALSE;
446 }
447
448 /*
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
459 *
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
463 *
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
468 *
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
476 */
477
478 extern int allow_data_exec, allow_stack_exec;
479
480 int
481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
482 {
483 int current_abi;
484
485 if (map->pmap == kernel_pmap) {
486 return FALSE;
487 }
488
489 /*
490 * Determine if the app is running in 32 or 64 bit mode.
491 */
492
493 if (vm_map_is_64bit(map)) {
494 current_abi = VM_ABI_64;
495 } else {
496 current_abi = VM_ABI_32;
497 }
498
499 /*
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
502 */
503
504 if (user_tag == VM_MEMORY_STACK) {
505 return allow_stack_exec & current_abi;
506 }
507
508 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
509 }
510
511
512 /*
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
517 *
518 * Synchronization is required prior to most operations.
519 *
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
522 *
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
530 *
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
539 * abutting entries.
540 *
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
553 */
554
555 static zone_t vm_map_zone; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
560
561
562 /*
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
566 */
567
568 vm_object_t vm_submap_object;
569
570 static void *map_data;
571 static vm_size_t map_data_size;
572 static void *kentry_data;
573 static vm_size_t kentry_data_size;
574 static void *map_holes_data;
575 static vm_size_t map_holes_data_size;
576
577 #if CONFIG_EMBEDDED
578 #define NO_COALESCE_LIMIT 0
579 #else
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
581 #endif
582
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp = 1;
585
586 unsigned int vm_map_set_cache_attr_count = 0;
587
588 kern_return_t
589 vm_map_set_cache_attr(
590 vm_map_t map,
591 vm_map_offset_t va)
592 {
593 vm_map_entry_t map_entry;
594 vm_object_t object;
595 kern_return_t kr = KERN_SUCCESS;
596
597 vm_map_lock_read(map);
598
599 if (!vm_map_lookup_entry(map, va, &map_entry) ||
600 map_entry->is_sub_map) {
601 /*
602 * that memory is not properly mapped
603 */
604 kr = KERN_INVALID_ARGUMENT;
605 goto done;
606 }
607 object = VME_OBJECT(map_entry);
608
609 if (object == VM_OBJECT_NULL) {
610 /*
611 * there should be a VM object here at this point
612 */
613 kr = KERN_INVALID_ARGUMENT;
614 goto done;
615 }
616 vm_object_lock(object);
617 object->set_cache_attr = TRUE;
618 vm_object_unlock(object);
619
620 vm_map_set_cache_attr_count++;
621 done:
622 vm_map_unlock_read(map);
623
624 return kr;
625 }
626
627
628 #if CONFIG_CODE_DECRYPTION
629 /*
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
636 */
637 kern_return_t
638 vm_map_apple_protected(
639 vm_map_t map,
640 vm_map_offset_t start,
641 vm_map_offset_t end,
642 vm_object_offset_t crypto_backing_offset,
643 struct pager_crypt_info *crypt_info)
644 {
645 boolean_t map_locked;
646 kern_return_t kr;
647 vm_map_entry_t map_entry;
648 struct vm_map_entry tmp_entry;
649 memory_object_t unprotected_mem_obj;
650 vm_object_t protected_object;
651 vm_map_offset_t map_addr;
652 vm_map_offset_t start_aligned, end_aligned;
653 vm_object_offset_t crypto_start, crypto_end;
654 int vm_flags;
655 vm_map_kernel_flags_t vmk_flags;
656
657 vm_flags = 0;
658 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
659
660 map_locked = FALSE;
661 unprotected_mem_obj = MEMORY_OBJECT_NULL;
662
663 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
664 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
665 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
666 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
667
668 #if __arm64__
669 /*
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
672 *
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
675 * + the center,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
681 */
682 #endif /* __arm64__ */
683
684 map_addr = start_aligned;
685 for (map_addr = start_aligned;
686 map_addr < end;
687 map_addr = tmp_entry.vme_end) {
688 vm_map_lock(map);
689 map_locked = TRUE;
690
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map,
693 map_addr,
694 &map_entry) ||
695 map_entry->is_sub_map ||
696 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
697 !(map_entry->protection & VM_PROT_EXECUTE)) {
698 /* that memory is not properly mapped */
699 kr = KERN_INVALID_ARGUMENT;
700 goto done;
701 }
702
703 /* get the protected object to be decrypted */
704 protected_object = VME_OBJECT(map_entry);
705 if (protected_object == VM_OBJECT_NULL) {
706 /* there should be a VM object here at this point */
707 kr = KERN_INVALID_ARGUMENT;
708 goto done;
709 }
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object);
712
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map, map_entry, start_aligned);
715 vm_map_clip_end(map, map_entry, end_aligned);
716
717 tmp_entry = *map_entry;
718 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
719 vm_map_unlock(map);
720 map_locked = FALSE;
721
722 /*
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
725 */
726 crypto_start = 0;
727 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
728 if (tmp_entry.vme_start < start) {
729 if (tmp_entry.vme_start != start_aligned) {
730 kr = KERN_INVALID_ADDRESS;
731 }
732 crypto_start += (start - tmp_entry.vme_start);
733 }
734 if (tmp_entry.vme_end > end) {
735 if (tmp_entry.vme_end != end_aligned) {
736 kr = KERN_INVALID_ADDRESS;
737 }
738 crypto_end -= (tmp_entry.vme_end - end);
739 }
740
741 /*
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
745 */
746 if (crypto_backing_offset == (vm_object_offset_t) -1) {
747 crypto_backing_offset = VME_OFFSET(&tmp_entry);
748 }
749
750 /*
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
755 * it.
756 */
757 unprotected_mem_obj = apple_protect_pager_setup(
758 protected_object,
759 VME_OFFSET(&tmp_entry),
760 crypto_backing_offset,
761 crypt_info,
762 crypto_start,
763 crypto_end);
764
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object);
767
768 if (unprotected_mem_obj == NULL) {
769 kr = KERN_FAILURE;
770 goto done;
771 }
772
773 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
774 /* can overwrite an immutable mapping */
775 vmk_flags.vmkf_overwrite_immutable = TRUE;
776 #if __arm64__
777 if (tmp_entry.used_for_jit &&
778 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
779 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
780 fourk_binary_compatibility_unsafe &&
781 fourk_binary_compatibility_allow_wx) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry.vme_start);
785 vmk_flags.vmkf_map_jit = TRUE;
786 }
787 #endif /* __arm64__ */
788
789 /* map this memory object in place of the current one */
790 map_addr = tmp_entry.vme_start;
791 kr = vm_map_enter_mem_object(map,
792 &map_addr,
793 (tmp_entry.vme_end -
794 tmp_entry.vme_start),
795 (mach_vm_offset_t) 0,
796 vm_flags,
797 vmk_flags,
798 VM_KERN_MEMORY_NONE,
799 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
800 0,
801 TRUE,
802 tmp_entry.protection,
803 tmp_entry.max_protection,
804 tmp_entry.inheritance);
805 assertf(kr == KERN_SUCCESS,
806 "kr = 0x%x\n", kr);
807 assertf(map_addr == tmp_entry.vme_start,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
809 (uint64_t)map_addr,
810 (uint64_t) tmp_entry.vme_start,
811 &tmp_entry);
812
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
819 map,
820 (uint64_t) map_addr,
821 (uint64_t) (map_addr + (tmp_entry.vme_end -
822 tmp_entry.vme_start)),
823 unprotected_mem_obj,
824 protected_object,
825 VME_OFFSET(&tmp_entry),
826 crypto_backing_offset,
827 crypto_start,
828 crypto_end);
829 }
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
831
832 /*
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
836 * the memory object.
837 */
838 memory_object_deallocate(unprotected_mem_obj);
839 unprotected_mem_obj = MEMORY_OBJECT_NULL;
840
841 /* continue with next map entry */
842 crypto_backing_offset += (tmp_entry.vme_end -
843 tmp_entry.vme_start);
844 crypto_backing_offset -= crypto_start;
845 }
846 kr = KERN_SUCCESS;
847
848 done:
849 if (map_locked) {
850 vm_map_unlock(map);
851 }
852 return kr;
853 }
854 #endif /* CONFIG_CODE_DECRYPTION */
855
856
857 lck_grp_t vm_map_lck_grp;
858 lck_grp_attr_t vm_map_lck_grp_attr;
859 lck_attr_t vm_map_lck_attr;
860 lck_attr_t vm_map_lck_rw_attr;
861
862 #if CONFIG_EMBEDDED
863 int malloc_no_cow = 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow = 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
870 #if DEBUG
871 int vm_check_map_sanity = 0;
872 #endif
873
874 /*
875 * vm_map_init:
876 *
877 * Initialize the vm_map module. Must be called before
878 * any other vm_map routines.
879 *
880 * Map and entry structures are allocated from zones -- we must
881 * initialize those zones.
882 *
883 * There are three zones of interest:
884 *
885 * vm_map_zone: used to allocate maps.
886 * vm_map_entry_zone: used to allocate map entries.
887 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
888 *
889 * The kernel allocates map entries from a special zone that is initially
890 * "crammed" with memory. It would be difficult (perhaps impossible) for
891 * the kernel to allocate more memory to a entry zone when it became
892 * empty since the very act of allocating memory implies the creation
893 * of a new entry.
894 */
895 void
896 vm_map_init(
897 void)
898 {
899 vm_size_t entry_zone_alloc_size;
900 const char *mez_name = "VM map entries";
901
902 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
903 PAGE_SIZE, "maps");
904 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
905 #if defined(__LP64__)
906 entry_zone_alloc_size = PAGE_SIZE * 5;
907 #else
908 entry_zone_alloc_size = PAGE_SIZE * 6;
909 #endif
910 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
911 1024 * 1024, entry_zone_alloc_size,
912 mez_name);
913 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
914 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
915 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
916
917 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
918 kentry_data_size * 64, kentry_data_size,
919 "Reserved VM map entries");
920 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
921 /* Don't quarantine because we always need elements available */
922 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
923
924 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
925 16 * 1024, PAGE_SIZE, "VM map copies");
926 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
927
928 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
929 16 * 1024, PAGE_SIZE, "VM map holes");
930 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
931
932 /*
933 * Cram the map and kentry zones with initial data.
934 * Set reserved_zone non-collectible to aid zone_gc().
935 */
936 zone_change(vm_map_zone, Z_COLLECT, FALSE);
937 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
938 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
939
940 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
941 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
942 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
943 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
944 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
945 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
946 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
947
948 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
949 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
950 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
951 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
952 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
953 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
954
955 /*
956 * Add the stolen memory to zones, adjust zone size and stolen counts.
957 * zcram only up to the maximum number of pages for each zone chunk.
958 */
959 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
960
961 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
962 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
963 zcram(vm_map_entry_reserved_zone,
964 (vm_offset_t)kentry_data + off,
965 MIN(kentry_data_size - off, stride));
966 }
967 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
968 zcram(vm_map_holes_zone,
969 (vm_offset_t)map_holes_data + off,
970 MIN(map_holes_data_size - off, stride));
971 }
972
973 /*
974 * Since these are covered by zones, remove them from stolen page accounting.
975 */
976 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
977
978 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
979 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
980 lck_attr_setdefault(&vm_map_lck_attr);
981
982 lck_attr_setdefault(&vm_map_lck_rw_attr);
983 lck_attr_cleardebug(&vm_map_lck_rw_attr);
984
985 #if VM_MAP_DEBUG_APPLE_PROTECT
986 PE_parse_boot_argn("vm_map_debug_apple_protect",
987 &vm_map_debug_apple_protect,
988 sizeof(vm_map_debug_apple_protect));
989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
990 #if VM_MAP_DEBUG_APPLE_FOURK
991 PE_parse_boot_argn("vm_map_debug_fourk",
992 &vm_map_debug_fourk,
993 sizeof(vm_map_debug_fourk));
994 #endif /* VM_MAP_DEBUG_FOURK */
995 PE_parse_boot_argn("vm_map_executable_immutable",
996 &vm_map_executable_immutable,
997 sizeof(vm_map_executable_immutable));
998 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
999 &vm_map_executable_immutable_verbose,
1000 sizeof(vm_map_executable_immutable_verbose));
1001
1002 PE_parse_boot_argn("malloc_no_cow",
1003 &malloc_no_cow,
1004 sizeof(malloc_no_cow));
1005 if (malloc_no_cow) {
1006 vm_memory_malloc_no_cow_mask = 0ULL;
1007 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1008 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1009 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1010 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1011 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1014 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1015 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1016 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1017 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019 &vm_memory_malloc_no_cow_mask,
1020 sizeof(vm_memory_malloc_no_cow_mask));
1021 }
1022
1023 #if DEBUG
1024 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1025 if (vm_check_map_sanity) {
1026 kprintf("VM sanity checking enabled\n");
1027 } else {
1028 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1029 }
1030 #endif /* DEBUG */
1031 }
1032
1033 void
1034 vm_map_steal_memory(
1035 void)
1036 {
1037 uint32_t kentry_initial_pages;
1038
1039 map_data_size = round_page(10 * sizeof(struct _vm_map));
1040 map_data = pmap_steal_memory(map_data_size);
1041
1042 /*
1043 * kentry_initial_pages corresponds to the number of kernel map entries
1044 * required during bootstrap until the asynchronous replenishment
1045 * scheme is activated and/or entries are available from the general
1046 * map entry pool.
1047 */
1048 #if defined(__LP64__)
1049 kentry_initial_pages = 10;
1050 #else
1051 kentry_initial_pages = 6;
1052 #endif
1053
1054 #if CONFIG_GZALLOC
1055 /* If using the guard allocator, reserve more memory for the kernel
1056 * reserved map entry pool.
1057 */
1058 if (gzalloc_enabled()) {
1059 kentry_initial_pages *= 1024;
1060 }
1061 #endif
1062
1063 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1064 kentry_data = pmap_steal_memory(kentry_data_size);
1065
1066 map_holes_data_size = kentry_data_size;
1067 map_holes_data = pmap_steal_memory(map_holes_data_size);
1068 }
1069
1070 boolean_t vm_map_supports_hole_optimization = FALSE;
1071
1072 void
1073 vm_kernel_reserved_entry_init(void)
1074 {
1075 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1076
1077 /*
1078 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1079 */
1080 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1081 vm_map_supports_hole_optimization = TRUE;
1082 }
1083
1084 void
1085 vm_map_disable_hole_optimization(vm_map_t map)
1086 {
1087 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1088
1089 if (map->holelistenabled) {
1090 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1091
1092 while (hole_entry != NULL) {
1093 next_hole_entry = hole_entry->vme_next;
1094
1095 hole_entry->vme_next = NULL;
1096 hole_entry->vme_prev = NULL;
1097 zfree(vm_map_holes_zone, hole_entry);
1098
1099 if (next_hole_entry == head_entry) {
1100 hole_entry = NULL;
1101 } else {
1102 hole_entry = next_hole_entry;
1103 }
1104 }
1105
1106 map->holes_list = NULL;
1107 map->holelistenabled = FALSE;
1108
1109 map->first_free = vm_map_first_entry(map);
1110 SAVE_HINT_HOLE_WRITE(map, NULL);
1111 }
1112 }
1113
1114 boolean_t
1115 vm_kernel_map_is_kernel(vm_map_t map)
1116 {
1117 return map->pmap == kernel_pmap;
1118 }
1119
1120 /*
1121 * vm_map_create:
1122 *
1123 * Creates and returns a new empty VM map with
1124 * the given physical map structure, and having
1125 * the given lower and upper address bounds.
1126 */
1127
1128 vm_map_t
1129 vm_map_create(
1130 pmap_t pmap,
1131 vm_map_offset_t min,
1132 vm_map_offset_t max,
1133 boolean_t pageable)
1134 {
1135 int options;
1136
1137 options = 0;
1138 if (pageable) {
1139 options |= VM_MAP_CREATE_PAGEABLE;
1140 }
1141 return vm_map_create_options(pmap, min, max, options);
1142 }
1143
1144 vm_map_t
1145 vm_map_create_options(
1146 pmap_t pmap,
1147 vm_map_offset_t min,
1148 vm_map_offset_t max,
1149 int options)
1150 {
1151 vm_map_t result;
1152 struct vm_map_links *hole_entry = NULL;
1153
1154 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1155 /* unknown option */
1156 return VM_MAP_NULL;
1157 }
1158
1159 result = (vm_map_t) zalloc(vm_map_zone);
1160 if (result == VM_MAP_NULL) {
1161 panic("vm_map_create");
1162 }
1163
1164 vm_map_first_entry(result) = vm_map_to_entry(result);
1165 vm_map_last_entry(result) = vm_map_to_entry(result);
1166 result->hdr.nentries = 0;
1167 if (options & VM_MAP_CREATE_PAGEABLE) {
1168 result->hdr.entries_pageable = TRUE;
1169 } else {
1170 result->hdr.entries_pageable = FALSE;
1171 }
1172
1173 vm_map_store_init( &(result->hdr));
1174
1175 result->hdr.page_shift = PAGE_SHIFT;
1176
1177 result->size = 0;
1178 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1179 result->user_wire_size = 0;
1180 #if !CONFIG_EMBEDDED
1181 result->vmmap_high_start = 0;
1182 #endif
1183 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1184 #if TASK_SWAPPER
1185 result->res_count = 1;
1186 result->sw_state = MAP_SW_IN;
1187 #endif /* TASK_SWAPPER */
1188 result->pmap = pmap;
1189 result->min_offset = min;
1190 result->max_offset = max;
1191 result->wiring_required = FALSE;
1192 result->no_zero_fill = FALSE;
1193 result->mapped_in_other_pmaps = FALSE;
1194 result->wait_for_space = FALSE;
1195 result->switch_protect = FALSE;
1196 result->disable_vmentry_reuse = FALSE;
1197 result->map_disallow_data_exec = FALSE;
1198 result->is_nested_map = FALSE;
1199 result->map_disallow_new_exec = FALSE;
1200 result->terminated = FALSE;
1201 result->highest_entry_end = 0;
1202 result->first_free = vm_map_to_entry(result);
1203 result->hint = vm_map_to_entry(result);
1204 result->jit_entry_exists = FALSE;
1205
1206 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1207 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1208 result->has_corpse_footprint = TRUE;
1209 result->holelistenabled = FALSE;
1210 result->vmmap_corpse_footprint = NULL;
1211 } else {
1212 result->has_corpse_footprint = FALSE;
1213 if (vm_map_supports_hole_optimization) {
1214 hole_entry = zalloc(vm_map_holes_zone);
1215
1216 hole_entry->start = min;
1217 #if defined(__arm__) || defined(__arm64__)
1218 hole_entry->end = result->max_offset;
1219 #else
1220 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1221 #endif
1222 result->holes_list = result->hole_hint = hole_entry;
1223 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1224 result->holelistenabled = TRUE;
1225 } else {
1226 result->holelistenabled = FALSE;
1227 }
1228 }
1229
1230 vm_map_lock_init(result);
1231 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1232
1233 return result;
1234 }
1235
1236 /*
1237 * vm_map_entry_create: [ internal use only ]
1238 *
1239 * Allocates a VM map entry for insertion in the
1240 * given map (or map copy). No fields are filled.
1241 */
1242 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1243
1244 #define vm_map_copy_entry_create(copy, map_locked) \
1245 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1246 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1247
1248 static vm_map_entry_t
1249 _vm_map_entry_create(
1250 struct vm_map_header *map_header, boolean_t __unused map_locked)
1251 {
1252 zone_t zone;
1253 vm_map_entry_t entry;
1254
1255 zone = vm_map_entry_zone;
1256
1257 assert(map_header->entries_pageable ? !map_locked : TRUE);
1258
1259 if (map_header->entries_pageable) {
1260 entry = (vm_map_entry_t) zalloc(zone);
1261 } else {
1262 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1263
1264 if (entry == VM_MAP_ENTRY_NULL) {
1265 zone = vm_map_entry_reserved_zone;
1266 entry = (vm_map_entry_t) zalloc(zone);
1267 OSAddAtomic(1, &reserved_zalloc_count);
1268 } else {
1269 OSAddAtomic(1, &nonreserved_zalloc_count);
1270 }
1271 }
1272
1273 if (entry == VM_MAP_ENTRY_NULL) {
1274 panic("vm_map_entry_create");
1275 }
1276 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1277
1278 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1279 #if MAP_ENTRY_CREATION_DEBUG
1280 entry->vme_creation_maphdr = map_header;
1281 backtrace(&entry->vme_creation_bt[0],
1282 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1283 #endif
1284 return entry;
1285 }
1286
1287 /*
1288 * vm_map_entry_dispose: [ internal use only ]
1289 *
1290 * Inverse of vm_map_entry_create.
1291 *
1292 * write map lock held so no need to
1293 * do anything special to insure correctness
1294 * of the stores
1295 */
1296 #define vm_map_entry_dispose(map, entry) \
1297 _vm_map_entry_dispose(&(map)->hdr, (entry))
1298
1299 #define vm_map_copy_entry_dispose(map, entry) \
1300 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1301
1302 static void
1303 _vm_map_entry_dispose(
1304 struct vm_map_header *map_header,
1305 vm_map_entry_t entry)
1306 {
1307 zone_t zone;
1308
1309 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1310 zone = vm_map_entry_zone;
1311 } else {
1312 zone = vm_map_entry_reserved_zone;
1313 }
1314
1315 if (!map_header->entries_pageable) {
1316 if (zone == vm_map_entry_zone) {
1317 OSAddAtomic(-1, &nonreserved_zalloc_count);
1318 } else {
1319 OSAddAtomic(-1, &reserved_zalloc_count);
1320 }
1321 }
1322
1323 zfree(zone, entry);
1324 }
1325
1326 #if MACH_ASSERT
1327 static boolean_t first_free_check = FALSE;
1328 boolean_t
1329 first_free_is_valid(
1330 vm_map_t map)
1331 {
1332 if (!first_free_check) {
1333 return TRUE;
1334 }
1335
1336 return first_free_is_valid_store( map );
1337 }
1338 #endif /* MACH_ASSERT */
1339
1340
1341 #define vm_map_copy_entry_link(copy, after_where, entry) \
1342 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1343
1344 #define vm_map_copy_entry_unlink(copy, entry) \
1345 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1346
1347 #if MACH_ASSERT && TASK_SWAPPER
1348 /*
1349 * vm_map_res_reference:
1350 *
1351 * Adds another valid residence count to the given map.
1352 *
1353 * Map is locked so this function can be called from
1354 * vm_map_swapin.
1355 *
1356 */
1357 void
1358 vm_map_res_reference(vm_map_t map)
1359 {
1360 /* assert map is locked */
1361 assert(map->res_count >= 0);
1362 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1363 if (map->res_count == 0) {
1364 lck_mtx_unlock(&map->s_lock);
1365 vm_map_lock(map);
1366 vm_map_swapin(map);
1367 lck_mtx_lock(&map->s_lock);
1368 ++map->res_count;
1369 vm_map_unlock(map);
1370 } else {
1371 ++map->res_count;
1372 }
1373 }
1374
1375 /*
1376 * vm_map_reference_swap:
1377 *
1378 * Adds valid reference and residence counts to the given map.
1379 *
1380 * The map may not be in memory (i.e. zero residence count).
1381 *
1382 */
1383 void
1384 vm_map_reference_swap(vm_map_t map)
1385 {
1386 assert(map != VM_MAP_NULL);
1387 lck_mtx_lock(&map->s_lock);
1388 assert(map->res_count >= 0);
1389 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1390 os_ref_retain_locked(&map->map_refcnt);
1391 vm_map_res_reference(map);
1392 lck_mtx_unlock(&map->s_lock);
1393 }
1394
1395 /*
1396 * vm_map_res_deallocate:
1397 *
1398 * Decrement residence count on a map; possibly causing swapout.
1399 *
1400 * The map must be in memory (i.e. non-zero residence count).
1401 *
1402 * The map is locked, so this function is callable from vm_map_deallocate.
1403 *
1404 */
1405 void
1406 vm_map_res_deallocate(vm_map_t map)
1407 {
1408 assert(map->res_count > 0);
1409 if (--map->res_count == 0) {
1410 lck_mtx_unlock(&map->s_lock);
1411 vm_map_lock(map);
1412 vm_map_swapout(map);
1413 vm_map_unlock(map);
1414 lck_mtx_lock(&map->s_lock);
1415 }
1416 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1417 }
1418 #endif /* MACH_ASSERT && TASK_SWAPPER */
1419
1420 /*
1421 * vm_map_destroy:
1422 *
1423 * Actually destroy a map.
1424 */
1425 void
1426 vm_map_destroy(
1427 vm_map_t map,
1428 int flags)
1429 {
1430 vm_map_lock(map);
1431
1432 /* final cleanup: no need to unnest shared region */
1433 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1434 /* final cleanup: ok to remove immutable mappings */
1435 flags |= VM_MAP_REMOVE_IMMUTABLE;
1436 /* final cleanup: allow gaps in range */
1437 flags |= VM_MAP_REMOVE_GAPS_OK;
1438
1439 /* clean up regular map entries */
1440 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1441 flags, VM_MAP_NULL);
1442 /* clean up leftover special mappings (commpage, etc...) */
1443 #if !defined(__arm__) && !defined(__arm64__)
1444 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1445 flags, VM_MAP_NULL);
1446 #endif /* !__arm__ && !__arm64__ */
1447
1448 vm_map_disable_hole_optimization(map);
1449 vm_map_corpse_footprint_destroy(map);
1450
1451 vm_map_unlock(map);
1452
1453 assert(map->hdr.nentries == 0);
1454
1455 if (map->pmap) {
1456 pmap_destroy(map->pmap);
1457 }
1458
1459 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1460 /*
1461 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1462 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1463 * structure or kalloc'ed via lck_mtx_init.
1464 * An example is s_lock_ext within struct _vm_map.
1465 *
1466 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1467 * can add another tag to detect embedded vs alloc'ed indirect external
1468 * mutexes but that'll be additional checks in the lock path and require
1469 * updating dependencies for the old vs new tag.
1470 *
1471 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1472 * just when lock debugging is ON, we choose to forego explicitly destroying
1473 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1474 * count on vm_map_lck_grp, which has no serious side-effect.
1475 */
1476 } else {
1477 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1478 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1479 }
1480
1481 zfree(vm_map_zone, map);
1482 }
1483
1484 /*
1485 * Returns pid of the task with the largest number of VM map entries.
1486 * Used in the zone-map-exhaustion jetsam path.
1487 */
1488 pid_t
1489 find_largest_process_vm_map_entries(void)
1490 {
1491 pid_t victim_pid = -1;
1492 int max_vm_map_entries = 0;
1493 task_t task = TASK_NULL;
1494 queue_head_t *task_list = &tasks;
1495
1496 lck_mtx_lock(&tasks_threads_lock);
1497 queue_iterate(task_list, task, task_t, tasks) {
1498 if (task == kernel_task || !task->active) {
1499 continue;
1500 }
1501
1502 vm_map_t task_map = task->map;
1503 if (task_map != VM_MAP_NULL) {
1504 int task_vm_map_entries = task_map->hdr.nentries;
1505 if (task_vm_map_entries > max_vm_map_entries) {
1506 max_vm_map_entries = task_vm_map_entries;
1507 victim_pid = pid_from_task(task);
1508 }
1509 }
1510 }
1511 lck_mtx_unlock(&tasks_threads_lock);
1512
1513 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1514 return victim_pid;
1515 }
1516
1517 #if TASK_SWAPPER
1518 /*
1519 * vm_map_swapin/vm_map_swapout
1520 *
1521 * Swap a map in and out, either referencing or releasing its resources.
1522 * These functions are internal use only; however, they must be exported
1523 * because they may be called from macros, which are exported.
1524 *
1525 * In the case of swapout, there could be races on the residence count,
1526 * so if the residence count is up, we return, assuming that a
1527 * vm_map_deallocate() call in the near future will bring us back.
1528 *
1529 * Locking:
1530 * -- We use the map write lock for synchronization among races.
1531 * -- The map write lock, and not the simple s_lock, protects the
1532 * swap state of the map.
1533 * -- If a map entry is a share map, then we hold both locks, in
1534 * hierarchical order.
1535 *
1536 * Synchronization Notes:
1537 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1538 * will block on the map lock and proceed when swapout is through.
1539 * 2) A vm_map_reference() call at this time is illegal, and will
1540 * cause a panic. vm_map_reference() is only allowed on resident
1541 * maps, since it refuses to block.
1542 * 3) A vm_map_swapin() call during a swapin will block, and
1543 * proceeed when the first swapin is done, turning into a nop.
1544 * This is the reason the res_count is not incremented until
1545 * after the swapin is complete.
1546 * 4) There is a timing hole after the checks of the res_count, before
1547 * the map lock is taken, during which a swapin may get the lock
1548 * before a swapout about to happen. If this happens, the swapin
1549 * will detect the state and increment the reference count, causing
1550 * the swapout to be a nop, thereby delaying it until a later
1551 * vm_map_deallocate. If the swapout gets the lock first, then
1552 * the swapin will simply block until the swapout is done, and
1553 * then proceed.
1554 *
1555 * Because vm_map_swapin() is potentially an expensive operation, it
1556 * should be used with caution.
1557 *
1558 * Invariants:
1559 * 1) A map with a residence count of zero is either swapped, or
1560 * being swapped.
1561 * 2) A map with a non-zero residence count is either resident,
1562 * or being swapped in.
1563 */
1564
1565 int vm_map_swap_enable = 1;
1566
1567 void
1568 vm_map_swapin(vm_map_t map)
1569 {
1570 vm_map_entry_t entry;
1571
1572 if (!vm_map_swap_enable) { /* debug */
1573 return;
1574 }
1575
1576 /*
1577 * Map is locked
1578 * First deal with various races.
1579 */
1580 if (map->sw_state == MAP_SW_IN) {
1581 /*
1582 * we raced with swapout and won. Returning will incr.
1583 * the res_count, turning the swapout into a nop.
1584 */
1585 return;
1586 }
1587
1588 /*
1589 * The residence count must be zero. If we raced with another
1590 * swapin, the state would have been IN; if we raced with a
1591 * swapout (after another competing swapin), we must have lost
1592 * the race to get here (see above comment), in which case
1593 * res_count is still 0.
1594 */
1595 assert(map->res_count == 0);
1596
1597 /*
1598 * There are no intermediate states of a map going out or
1599 * coming in, since the map is locked during the transition.
1600 */
1601 assert(map->sw_state == MAP_SW_OUT);
1602
1603 /*
1604 * We now operate upon each map entry. If the entry is a sub-
1605 * or share-map, we call vm_map_res_reference upon it.
1606 * If the entry is an object, we call vm_object_res_reference
1607 * (this may iterate through the shadow chain).
1608 * Note that we hold the map locked the entire time,
1609 * even if we get back here via a recursive call in
1610 * vm_map_res_reference.
1611 */
1612 entry = vm_map_first_entry(map);
1613
1614 while (entry != vm_map_to_entry(map)) {
1615 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1616 if (entry->is_sub_map) {
1617 vm_map_t lmap = VME_SUBMAP(entry);
1618 lck_mtx_lock(&lmap->s_lock);
1619 vm_map_res_reference(lmap);
1620 lck_mtx_unlock(&lmap->s_lock);
1621 } else {
1622 vm_object_t object = VME_OBEJCT(entry);
1623 vm_object_lock(object);
1624 /*
1625 * This call may iterate through the
1626 * shadow chain.
1627 */
1628 vm_object_res_reference(object);
1629 vm_object_unlock(object);
1630 }
1631 }
1632 entry = entry->vme_next;
1633 }
1634 assert(map->sw_state == MAP_SW_OUT);
1635 map->sw_state = MAP_SW_IN;
1636 }
1637
1638 void
1639 vm_map_swapout(vm_map_t map)
1640 {
1641 vm_map_entry_t entry;
1642
1643 /*
1644 * Map is locked
1645 * First deal with various races.
1646 * If we raced with a swapin and lost, the residence count
1647 * will have been incremented to 1, and we simply return.
1648 */
1649 lck_mtx_lock(&map->s_lock);
1650 if (map->res_count != 0) {
1651 lck_mtx_unlock(&map->s_lock);
1652 return;
1653 }
1654 lck_mtx_unlock(&map->s_lock);
1655
1656 /*
1657 * There are no intermediate states of a map going out or
1658 * coming in, since the map is locked during the transition.
1659 */
1660 assert(map->sw_state == MAP_SW_IN);
1661
1662 if (!vm_map_swap_enable) {
1663 return;
1664 }
1665
1666 /*
1667 * We now operate upon each map entry. If the entry is a sub-
1668 * or share-map, we call vm_map_res_deallocate upon it.
1669 * If the entry is an object, we call vm_object_res_deallocate
1670 * (this may iterate through the shadow chain).
1671 * Note that we hold the map locked the entire time,
1672 * even if we get back here via a recursive call in
1673 * vm_map_res_deallocate.
1674 */
1675 entry = vm_map_first_entry(map);
1676
1677 while (entry != vm_map_to_entry(map)) {
1678 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1679 if (entry->is_sub_map) {
1680 vm_map_t lmap = VME_SUBMAP(entry);
1681 lck_mtx_lock(&lmap->s_lock);
1682 vm_map_res_deallocate(lmap);
1683 lck_mtx_unlock(&lmap->s_lock);
1684 } else {
1685 vm_object_t object = VME_OBJECT(entry);
1686 vm_object_lock(object);
1687 /*
1688 * This call may take a long time,
1689 * since it could actively push
1690 * out pages (if we implement it
1691 * that way).
1692 */
1693 vm_object_res_deallocate(object);
1694 vm_object_unlock(object);
1695 }
1696 }
1697 entry = entry->vme_next;
1698 }
1699 assert(map->sw_state == MAP_SW_IN);
1700 map->sw_state = MAP_SW_OUT;
1701 }
1702
1703 #endif /* TASK_SWAPPER */
1704
1705 /*
1706 * vm_map_lookup_entry: [ internal use only ]
1707 *
1708 * Calls into the vm map store layer to find the map
1709 * entry containing (or immediately preceding) the
1710 * specified address in the given map; the entry is returned
1711 * in the "entry" parameter. The boolean
1712 * result indicates whether the address is
1713 * actually contained in the map.
1714 */
1715 boolean_t
1716 vm_map_lookup_entry(
1717 vm_map_t map,
1718 vm_map_offset_t address,
1719 vm_map_entry_t *entry) /* OUT */
1720 {
1721 return vm_map_store_lookup_entry( map, address, entry );
1722 }
1723
1724 /*
1725 * Routine: vm_map_find_space
1726 * Purpose:
1727 * Allocate a range in the specified virtual address map,
1728 * returning the entry allocated for that range.
1729 * Used by kmem_alloc, etc.
1730 *
1731 * The map must be NOT be locked. It will be returned locked
1732 * on KERN_SUCCESS, unlocked on failure.
1733 *
1734 * If an entry is allocated, the object/offset fields
1735 * are initialized to zero.
1736 */
1737 kern_return_t
1738 vm_map_find_space(
1739 vm_map_t map,
1740 vm_map_offset_t *address, /* OUT */
1741 vm_map_size_t size,
1742 vm_map_offset_t mask,
1743 int flags __unused,
1744 vm_map_kernel_flags_t vmk_flags,
1745 vm_tag_t tag,
1746 vm_map_entry_t *o_entry) /* OUT */
1747 {
1748 vm_map_entry_t entry, new_entry;
1749 vm_map_offset_t start;
1750 vm_map_offset_t end;
1751 vm_map_entry_t hole_entry;
1752
1753 if (size == 0) {
1754 *address = 0;
1755 return KERN_INVALID_ARGUMENT;
1756 }
1757
1758 if (vmk_flags.vmkf_guard_after) {
1759 /* account for the back guard page in the size */
1760 size += VM_MAP_PAGE_SIZE(map);
1761 }
1762
1763 new_entry = vm_map_entry_create(map, FALSE);
1764
1765 /*
1766 * Look for the first possible address; if there's already
1767 * something at this address, we have to start after it.
1768 */
1769
1770 vm_map_lock(map);
1771
1772 if (map->disable_vmentry_reuse == TRUE) {
1773 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1774 } else {
1775 if (map->holelistenabled) {
1776 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1777
1778 if (hole_entry == NULL) {
1779 /*
1780 * No more space in the map?
1781 */
1782 vm_map_entry_dispose(map, new_entry);
1783 vm_map_unlock(map);
1784 return KERN_NO_SPACE;
1785 }
1786
1787 entry = hole_entry;
1788 start = entry->vme_start;
1789 } else {
1790 assert(first_free_is_valid(map));
1791 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1792 start = map->min_offset;
1793 } else {
1794 start = entry->vme_end;
1795 }
1796 }
1797 }
1798
1799 /*
1800 * In any case, the "entry" always precedes
1801 * the proposed new region throughout the loop:
1802 */
1803
1804 while (TRUE) {
1805 vm_map_entry_t next;
1806
1807 /*
1808 * Find the end of the proposed new region.
1809 * Be sure we didn't go beyond the end, or
1810 * wrap around the address.
1811 */
1812
1813 if (vmk_flags.vmkf_guard_before) {
1814 /* reserve space for the front guard page */
1815 start += VM_MAP_PAGE_SIZE(map);
1816 }
1817 end = ((start + mask) & ~mask);
1818
1819 if (end < start) {
1820 vm_map_entry_dispose(map, new_entry);
1821 vm_map_unlock(map);
1822 return KERN_NO_SPACE;
1823 }
1824 start = end;
1825 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1826 end += size;
1827 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1828
1829 if ((end > map->max_offset) || (end < start)) {
1830 vm_map_entry_dispose(map, new_entry);
1831 vm_map_unlock(map);
1832 return KERN_NO_SPACE;
1833 }
1834
1835 next = entry->vme_next;
1836
1837 if (map->holelistenabled) {
1838 if (entry->vme_end >= end) {
1839 break;
1840 }
1841 } else {
1842 /*
1843 * If there are no more entries, we must win.
1844 *
1845 * OR
1846 *
1847 * If there is another entry, it must be
1848 * after the end of the potential new region.
1849 */
1850
1851 if (next == vm_map_to_entry(map)) {
1852 break;
1853 }
1854
1855 if (next->vme_start >= end) {
1856 break;
1857 }
1858 }
1859
1860 /*
1861 * Didn't fit -- move to the next entry.
1862 */
1863
1864 entry = next;
1865
1866 if (map->holelistenabled) {
1867 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1868 /*
1869 * Wrapped around
1870 */
1871 vm_map_entry_dispose(map, new_entry);
1872 vm_map_unlock(map);
1873 return KERN_NO_SPACE;
1874 }
1875 start = entry->vme_start;
1876 } else {
1877 start = entry->vme_end;
1878 }
1879 }
1880
1881 if (map->holelistenabled) {
1882 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1883 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1884 }
1885 }
1886
1887 /*
1888 * At this point,
1889 * "start" and "end" should define the endpoints of the
1890 * available new range, and
1891 * "entry" should refer to the region before the new
1892 * range, and
1893 *
1894 * the map should be locked.
1895 */
1896
1897 if (vmk_flags.vmkf_guard_before) {
1898 /* go back for the front guard page */
1899 start -= VM_MAP_PAGE_SIZE(map);
1900 }
1901 *address = start;
1902
1903 assert(start < end);
1904 new_entry->vme_start = start;
1905 new_entry->vme_end = end;
1906 assert(page_aligned(new_entry->vme_start));
1907 assert(page_aligned(new_entry->vme_end));
1908 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1909 VM_MAP_PAGE_MASK(map)));
1910 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1911 VM_MAP_PAGE_MASK(map)));
1912
1913 new_entry->is_shared = FALSE;
1914 new_entry->is_sub_map = FALSE;
1915 new_entry->use_pmap = TRUE;
1916 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1917 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1918
1919 new_entry->needs_copy = FALSE;
1920
1921 new_entry->inheritance = VM_INHERIT_DEFAULT;
1922 new_entry->protection = VM_PROT_DEFAULT;
1923 new_entry->max_protection = VM_PROT_ALL;
1924 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1925 new_entry->wired_count = 0;
1926 new_entry->user_wired_count = 0;
1927
1928 new_entry->in_transition = FALSE;
1929 new_entry->needs_wakeup = FALSE;
1930 new_entry->no_cache = FALSE;
1931 new_entry->permanent = FALSE;
1932 new_entry->superpage_size = FALSE;
1933 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1934 new_entry->map_aligned = TRUE;
1935 } else {
1936 new_entry->map_aligned = FALSE;
1937 }
1938
1939 new_entry->used_for_jit = FALSE;
1940 new_entry->pmap_cs_associated = FALSE;
1941 new_entry->zero_wired_pages = FALSE;
1942 new_entry->iokit_acct = FALSE;
1943 new_entry->vme_resilient_codesign = FALSE;
1944 new_entry->vme_resilient_media = FALSE;
1945 if (vmk_flags.vmkf_atomic_entry) {
1946 new_entry->vme_atomic = TRUE;
1947 } else {
1948 new_entry->vme_atomic = FALSE;
1949 }
1950
1951 VME_ALIAS_SET(new_entry, tag);
1952
1953 /*
1954 * Insert the new entry into the list
1955 */
1956
1957 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1958
1959 map->size += size;
1960
1961 /*
1962 * Update the lookup hint
1963 */
1964 SAVE_HINT_MAP_WRITE(map, new_entry);
1965
1966 *o_entry = new_entry;
1967 return KERN_SUCCESS;
1968 }
1969
1970 int vm_map_pmap_enter_print = FALSE;
1971 int vm_map_pmap_enter_enable = FALSE;
1972
1973 /*
1974 * Routine: vm_map_pmap_enter [internal only]
1975 *
1976 * Description:
1977 * Force pages from the specified object to be entered into
1978 * the pmap at the specified address if they are present.
1979 * As soon as a page not found in the object the scan ends.
1980 *
1981 * Returns:
1982 * Nothing.
1983 *
1984 * In/out conditions:
1985 * The source map should not be locked on entry.
1986 */
1987 __unused static void
1988 vm_map_pmap_enter(
1989 vm_map_t map,
1990 vm_map_offset_t addr,
1991 vm_map_offset_t end_addr,
1992 vm_object_t object,
1993 vm_object_offset_t offset,
1994 vm_prot_t protection)
1995 {
1996 int type_of_fault;
1997 kern_return_t kr;
1998 struct vm_object_fault_info fault_info = {};
1999
2000 if (map->pmap == 0) {
2001 return;
2002 }
2003
2004 while (addr < end_addr) {
2005 vm_page_t m;
2006
2007
2008 /*
2009 * TODO:
2010 * From vm_map_enter(), we come into this function without the map
2011 * lock held or the object lock held.
2012 * We haven't taken a reference on the object either.
2013 * We should do a proper lookup on the map to make sure
2014 * that things are sane before we go locking objects that
2015 * could have been deallocated from under us.
2016 */
2017
2018 vm_object_lock(object);
2019
2020 m = vm_page_lookup(object, offset);
2021
2022 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2023 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2024 vm_object_unlock(object);
2025 return;
2026 }
2027
2028 if (vm_map_pmap_enter_print) {
2029 printf("vm_map_pmap_enter:");
2030 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2031 map, (unsigned long long)addr, object, (unsigned long long)offset);
2032 }
2033 type_of_fault = DBG_CACHE_HIT_FAULT;
2034 kr = vm_fault_enter(m, map->pmap,
2035 addr, protection, protection,
2036 VM_PAGE_WIRED(m),
2037 FALSE, /* change_wiring */
2038 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2039 &fault_info,
2040 NULL, /* need_retry */
2041 &type_of_fault);
2042
2043 vm_object_unlock(object);
2044
2045 offset += PAGE_SIZE_64;
2046 addr += PAGE_SIZE;
2047 }
2048 }
2049
2050 boolean_t vm_map_pmap_is_empty(
2051 vm_map_t map,
2052 vm_map_offset_t start,
2053 vm_map_offset_t end);
2054 boolean_t
2055 vm_map_pmap_is_empty(
2056 vm_map_t map,
2057 vm_map_offset_t start,
2058 vm_map_offset_t end)
2059 {
2060 #ifdef MACHINE_PMAP_IS_EMPTY
2061 return pmap_is_empty(map->pmap, start, end);
2062 #else /* MACHINE_PMAP_IS_EMPTY */
2063 vm_map_offset_t offset;
2064 ppnum_t phys_page;
2065
2066 if (map->pmap == NULL) {
2067 return TRUE;
2068 }
2069
2070 for (offset = start;
2071 offset < end;
2072 offset += PAGE_SIZE) {
2073 phys_page = pmap_find_phys(map->pmap, offset);
2074 if (phys_page) {
2075 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2076 "page %d at 0x%llx\n",
2077 map, (long long)start, (long long)end,
2078 phys_page, (long long)offset);
2079 return FALSE;
2080 }
2081 }
2082 return TRUE;
2083 #endif /* MACHINE_PMAP_IS_EMPTY */
2084 }
2085
2086 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2087 kern_return_t
2088 vm_map_random_address_for_size(
2089 vm_map_t map,
2090 vm_map_offset_t *address,
2091 vm_map_size_t size)
2092 {
2093 kern_return_t kr = KERN_SUCCESS;
2094 int tries = 0;
2095 vm_map_offset_t random_addr = 0;
2096 vm_map_offset_t hole_end;
2097
2098 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2099 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2100 vm_map_size_t vm_hole_size = 0;
2101 vm_map_size_t addr_space_size;
2102
2103 addr_space_size = vm_map_max(map) - vm_map_min(map);
2104
2105 assert(page_aligned(size));
2106
2107 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2108 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2109 random_addr = vm_map_trunc_page(
2110 vm_map_min(map) + (random_addr % addr_space_size),
2111 VM_MAP_PAGE_MASK(map));
2112
2113 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2114 if (prev_entry == vm_map_to_entry(map)) {
2115 next_entry = vm_map_first_entry(map);
2116 } else {
2117 next_entry = prev_entry->vme_next;
2118 }
2119 if (next_entry == vm_map_to_entry(map)) {
2120 hole_end = vm_map_max(map);
2121 } else {
2122 hole_end = next_entry->vme_start;
2123 }
2124 vm_hole_size = hole_end - random_addr;
2125 if (vm_hole_size >= size) {
2126 *address = random_addr;
2127 break;
2128 }
2129 }
2130 tries++;
2131 }
2132
2133 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2134 kr = KERN_NO_SPACE;
2135 }
2136 return kr;
2137 }
2138
2139 static boolean_t
2140 vm_memory_malloc_no_cow(
2141 int alias)
2142 {
2143 uint64_t alias_mask;
2144
2145 if (alias > 63) {
2146 return FALSE;
2147 }
2148
2149 alias_mask = 1ULL << alias;
2150 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2151 return TRUE;
2152 }
2153 return FALSE;
2154 }
2155
2156 /*
2157 * Routine: vm_map_enter
2158 *
2159 * Description:
2160 * Allocate a range in the specified virtual address map.
2161 * The resulting range will refer to memory defined by
2162 * the given memory object and offset into that object.
2163 *
2164 * Arguments are as defined in the vm_map call.
2165 */
2166 int _map_enter_debug = 0;
2167 static unsigned int vm_map_enter_restore_successes = 0;
2168 static unsigned int vm_map_enter_restore_failures = 0;
2169 kern_return_t
2170 vm_map_enter(
2171 vm_map_t map,
2172 vm_map_offset_t *address, /* IN/OUT */
2173 vm_map_size_t size,
2174 vm_map_offset_t mask,
2175 int flags,
2176 vm_map_kernel_flags_t vmk_flags,
2177 vm_tag_t alias,
2178 vm_object_t object,
2179 vm_object_offset_t offset,
2180 boolean_t needs_copy,
2181 vm_prot_t cur_protection,
2182 vm_prot_t max_protection,
2183 vm_inherit_t inheritance)
2184 {
2185 vm_map_entry_t entry, new_entry;
2186 vm_map_offset_t start, tmp_start, tmp_offset;
2187 vm_map_offset_t end, tmp_end;
2188 vm_map_offset_t tmp2_start, tmp2_end;
2189 vm_map_offset_t desired_empty_end;
2190 vm_map_offset_t step;
2191 kern_return_t result = KERN_SUCCESS;
2192 vm_map_t zap_old_map = VM_MAP_NULL;
2193 vm_map_t zap_new_map = VM_MAP_NULL;
2194 boolean_t map_locked = FALSE;
2195 boolean_t pmap_empty = TRUE;
2196 boolean_t new_mapping_established = FALSE;
2197 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2198 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2199 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2200 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2201 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2202 boolean_t is_submap = vmk_flags.vmkf_submap;
2203 boolean_t permanent = vmk_flags.vmkf_permanent;
2204 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2205 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2206 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2207 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2208 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2209 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2210 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2211 vm_tag_t user_alias;
2212 vm_map_offset_t effective_min_offset, effective_max_offset;
2213 kern_return_t kr;
2214 boolean_t clear_map_aligned = FALSE;
2215 vm_map_entry_t hole_entry;
2216 vm_map_size_t chunk_size = 0;
2217
2218 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2219
2220 if (flags & VM_FLAGS_4GB_CHUNK) {
2221 #if defined(__LP64__)
2222 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2223 #else /* __LP64__ */
2224 chunk_size = ANON_CHUNK_SIZE;
2225 #endif /* __LP64__ */
2226 } else {
2227 chunk_size = ANON_CHUNK_SIZE;
2228 }
2229
2230 if (superpage_size) {
2231 switch (superpage_size) {
2232 /*
2233 * Note that the current implementation only supports
2234 * a single size for superpages, SUPERPAGE_SIZE, per
2235 * architecture. As soon as more sizes are supposed
2236 * to be supported, SUPERPAGE_SIZE has to be replaced
2237 * with a lookup of the size depending on superpage_size.
2238 */
2239 #ifdef __x86_64__
2240 case SUPERPAGE_SIZE_ANY:
2241 /* handle it like 2 MB and round up to page size */
2242 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2243 case SUPERPAGE_SIZE_2MB:
2244 break;
2245 #endif
2246 default:
2247 return KERN_INVALID_ARGUMENT;
2248 }
2249 mask = SUPERPAGE_SIZE - 1;
2250 if (size & (SUPERPAGE_SIZE - 1)) {
2251 return KERN_INVALID_ARGUMENT;
2252 }
2253 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2254 }
2255
2256
2257 if ((cur_protection & VM_PROT_WRITE) &&
2258 (cur_protection & VM_PROT_EXECUTE) &&
2259 #if !CONFIG_EMBEDDED
2260 map != kernel_map &&
2261 (cs_process_global_enforcement() ||
2262 (vmk_flags.vmkf_cs_enforcement_override
2263 ? vmk_flags.vmkf_cs_enforcement
2264 : cs_process_enforcement(NULL))) &&
2265 #endif /* !CONFIG_EMBEDDED */
2266 !entry_for_jit) {
2267 DTRACE_VM3(cs_wx,
2268 uint64_t, 0,
2269 uint64_t, 0,
2270 vm_prot_t, cur_protection);
2271 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2272 #if VM_PROTECT_WX_FAIL
2273 "failing\n",
2274 #else /* VM_PROTECT_WX_FAIL */
2275 "turning off execute\n",
2276 #endif /* VM_PROTECT_WX_FAIL */
2277 proc_selfpid(),
2278 (current_task()->bsd_info
2279 ? proc_name_address(current_task()->bsd_info)
2280 : "?"),
2281 __FUNCTION__);
2282 cur_protection &= ~VM_PROT_EXECUTE;
2283 #if VM_PROTECT_WX_FAIL
2284 return KERN_PROTECTION_FAILURE;
2285 #endif /* VM_PROTECT_WX_FAIL */
2286 }
2287
2288 /*
2289 * If the task has requested executable lockdown,
2290 * deny any new executable mapping.
2291 */
2292 if (map->map_disallow_new_exec == TRUE) {
2293 if (cur_protection & VM_PROT_EXECUTE) {
2294 return KERN_PROTECTION_FAILURE;
2295 }
2296 }
2297
2298 if (resilient_codesign) {
2299 assert(!is_submap);
2300 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2301 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2302 return KERN_PROTECTION_FAILURE;
2303 }
2304 }
2305
2306 if (resilient_media) {
2307 assert(!is_submap);
2308 // assert(!needs_copy);
2309 if (object != VM_OBJECT_NULL &&
2310 !object->internal) {
2311 /*
2312 * This mapping is directly backed by an external
2313 * memory manager (e.g. a vnode pager for a file):
2314 * we would not have any safe place to inject
2315 * a zero-filled page if an actual page is not
2316 * available, without possibly impacting the actual
2317 * contents of the mapped object (e.g. the file),
2318 * so we can't provide any media resiliency here.
2319 */
2320 return KERN_INVALID_ARGUMENT;
2321 }
2322 }
2323
2324 if (is_submap) {
2325 if (purgable) {
2326 /* submaps can not be purgeable */
2327 return KERN_INVALID_ARGUMENT;
2328 }
2329 if (object == VM_OBJECT_NULL) {
2330 /* submaps can not be created lazily */
2331 return KERN_INVALID_ARGUMENT;
2332 }
2333 }
2334 if (vmk_flags.vmkf_already) {
2335 /*
2336 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2337 * is already present. For it to be meaningul, the requested
2338 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2339 * we shouldn't try and remove what was mapped there first
2340 * (!VM_FLAGS_OVERWRITE).
2341 */
2342 if ((flags & VM_FLAGS_ANYWHERE) ||
2343 (flags & VM_FLAGS_OVERWRITE)) {
2344 return KERN_INVALID_ARGUMENT;
2345 }
2346 }
2347
2348 effective_min_offset = map->min_offset;
2349
2350 if (vmk_flags.vmkf_beyond_max) {
2351 /*
2352 * Allow an insertion beyond the map's max offset.
2353 */
2354 #if !defined(__arm__) && !defined(__arm64__)
2355 if (vm_map_is_64bit(map)) {
2356 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2357 } else
2358 #endif /* __arm__ */
2359 effective_max_offset = 0x00000000FFFFF000ULL;
2360 } else {
2361 #if !defined(CONFIG_EMBEDDED)
2362 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2363 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2364 } else {
2365 effective_max_offset = map->max_offset;
2366 }
2367 #else
2368 effective_max_offset = map->max_offset;
2369 #endif
2370 }
2371
2372 if (size == 0 ||
2373 (offset & PAGE_MASK_64) != 0) {
2374 *address = 0;
2375 return KERN_INVALID_ARGUMENT;
2376 }
2377
2378 if (map->pmap == kernel_pmap) {
2379 user_alias = VM_KERN_MEMORY_NONE;
2380 } else {
2381 user_alias = alias;
2382 }
2383
2384 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2385 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2386 }
2387
2388 #define RETURN(value) { result = value; goto BailOut; }
2389
2390 assert(page_aligned(*address));
2391 assert(page_aligned(size));
2392
2393 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2394 /*
2395 * In most cases, the caller rounds the size up to the
2396 * map's page size.
2397 * If we get a size that is explicitly not map-aligned here,
2398 * we'll have to respect the caller's wish and mark the
2399 * mapping as "not map-aligned" to avoid tripping the
2400 * map alignment checks later.
2401 */
2402 clear_map_aligned = TRUE;
2403 }
2404 if (!anywhere &&
2405 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2406 /*
2407 * We've been asked to map at a fixed address and that
2408 * address is not aligned to the map's specific alignment.
2409 * The caller should know what it's doing (i.e. most likely
2410 * mapping some fragmented copy map, transferring memory from
2411 * a VM map with a different alignment), so clear map_aligned
2412 * for this new VM map entry and proceed.
2413 */
2414 clear_map_aligned = TRUE;
2415 }
2416
2417 /*
2418 * Only zero-fill objects are allowed to be purgable.
2419 * LP64todo - limit purgable objects to 32-bits for now
2420 */
2421 if (purgable &&
2422 (offset != 0 ||
2423 (object != VM_OBJECT_NULL &&
2424 (object->vo_size != size ||
2425 object->purgable == VM_PURGABLE_DENY))
2426 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2427 return KERN_INVALID_ARGUMENT;
2428 }
2429
2430 if (!anywhere && overwrite) {
2431 /*
2432 * Create a temporary VM map to hold the old mappings in the
2433 * affected area while we create the new one.
2434 * This avoids releasing the VM map lock in
2435 * vm_map_entry_delete() and allows atomicity
2436 * when we want to replace some mappings with a new one.
2437 * It also allows us to restore the old VM mappings if the
2438 * new mapping fails.
2439 */
2440 zap_old_map = vm_map_create(PMAP_NULL,
2441 *address,
2442 *address + size,
2443 map->hdr.entries_pageable);
2444 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2445 vm_map_disable_hole_optimization(zap_old_map);
2446 }
2447
2448 StartAgain:;
2449
2450 start = *address;
2451
2452 if (anywhere) {
2453 vm_map_lock(map);
2454 map_locked = TRUE;
2455
2456 if (entry_for_jit) {
2457 #if CONFIG_EMBEDDED
2458 if (map->jit_entry_exists) {
2459 result = KERN_INVALID_ARGUMENT;
2460 goto BailOut;
2461 }
2462 random_address = TRUE;
2463 #endif /* CONFIG_EMBEDDED */
2464 }
2465
2466 if (random_address) {
2467 /*
2468 * Get a random start address.
2469 */
2470 result = vm_map_random_address_for_size(map, address, size);
2471 if (result != KERN_SUCCESS) {
2472 goto BailOut;
2473 }
2474 start = *address;
2475 }
2476 #if !CONFIG_EMBEDDED
2477 else if ((start == 0 || start == vm_map_min(map)) &&
2478 !map->disable_vmentry_reuse &&
2479 map->vmmap_high_start != 0) {
2480 start = map->vmmap_high_start;
2481 }
2482 #endif
2483
2484
2485 /*
2486 * Calculate the first possible address.
2487 */
2488
2489 if (start < effective_min_offset) {
2490 start = effective_min_offset;
2491 }
2492 if (start > effective_max_offset) {
2493 RETURN(KERN_NO_SPACE);
2494 }
2495
2496 /*
2497 * Look for the first possible address;
2498 * if there's already something at this
2499 * address, we have to start after it.
2500 */
2501
2502 if (map->disable_vmentry_reuse == TRUE) {
2503 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2504 } else {
2505 if (map->holelistenabled) {
2506 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2507
2508 if (hole_entry == NULL) {
2509 /*
2510 * No more space in the map?
2511 */
2512 result = KERN_NO_SPACE;
2513 goto BailOut;
2514 } else {
2515 boolean_t found_hole = FALSE;
2516
2517 do {
2518 if (hole_entry->vme_start >= start) {
2519 start = hole_entry->vme_start;
2520 found_hole = TRUE;
2521 break;
2522 }
2523
2524 if (hole_entry->vme_end > start) {
2525 found_hole = TRUE;
2526 break;
2527 }
2528 hole_entry = hole_entry->vme_next;
2529 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2530
2531 if (found_hole == FALSE) {
2532 result = KERN_NO_SPACE;
2533 goto BailOut;
2534 }
2535
2536 entry = hole_entry;
2537
2538 if (start == 0) {
2539 start += PAGE_SIZE_64;
2540 }
2541 }
2542 } else {
2543 assert(first_free_is_valid(map));
2544
2545 entry = map->first_free;
2546
2547 if (entry == vm_map_to_entry(map)) {
2548 entry = NULL;
2549 } else {
2550 if (entry->vme_next == vm_map_to_entry(map)) {
2551 /*
2552 * Hole at the end of the map.
2553 */
2554 entry = NULL;
2555 } else {
2556 if (start < (entry->vme_next)->vme_start) {
2557 start = entry->vme_end;
2558 start = vm_map_round_page(start,
2559 VM_MAP_PAGE_MASK(map));
2560 } else {
2561 /*
2562 * Need to do a lookup.
2563 */
2564 entry = NULL;
2565 }
2566 }
2567 }
2568
2569 if (entry == NULL) {
2570 vm_map_entry_t tmp_entry;
2571 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2572 assert(!entry_for_jit);
2573 start = tmp_entry->vme_end;
2574 start = vm_map_round_page(start,
2575 VM_MAP_PAGE_MASK(map));
2576 }
2577 entry = tmp_entry;
2578 }
2579 }
2580 }
2581
2582 /*
2583 * In any case, the "entry" always precedes
2584 * the proposed new region throughout the
2585 * loop:
2586 */
2587
2588 while (TRUE) {
2589 vm_map_entry_t next;
2590
2591 /*
2592 * Find the end of the proposed new region.
2593 * Be sure we didn't go beyond the end, or
2594 * wrap around the address.
2595 */
2596
2597 end = ((start + mask) & ~mask);
2598 end = vm_map_round_page(end,
2599 VM_MAP_PAGE_MASK(map));
2600 if (end < start) {
2601 RETURN(KERN_NO_SPACE);
2602 }
2603 start = end;
2604 assert(VM_MAP_PAGE_ALIGNED(start,
2605 VM_MAP_PAGE_MASK(map)));
2606 end += size;
2607
2608 /* We want an entire page of empty space, but don't increase the allocation size. */
2609 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2610
2611 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2612 if (map->wait_for_space) {
2613 assert(!keep_map_locked);
2614 if (size <= (effective_max_offset -
2615 effective_min_offset)) {
2616 assert_wait((event_t)map,
2617 THREAD_ABORTSAFE);
2618 vm_map_unlock(map);
2619 map_locked = FALSE;
2620 thread_block(THREAD_CONTINUE_NULL);
2621 goto StartAgain;
2622 }
2623 }
2624 RETURN(KERN_NO_SPACE);
2625 }
2626
2627 next = entry->vme_next;
2628
2629 if (map->holelistenabled) {
2630 if (entry->vme_end >= desired_empty_end) {
2631 break;
2632 }
2633 } else {
2634 /*
2635 * If there are no more entries, we must win.
2636 *
2637 * OR
2638 *
2639 * If there is another entry, it must be
2640 * after the end of the potential new region.
2641 */
2642
2643 if (next == vm_map_to_entry(map)) {
2644 break;
2645 }
2646
2647 if (next->vme_start >= desired_empty_end) {
2648 break;
2649 }
2650 }
2651
2652 /*
2653 * Didn't fit -- move to the next entry.
2654 */
2655
2656 entry = next;
2657
2658 if (map->holelistenabled) {
2659 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2660 /*
2661 * Wrapped around
2662 */
2663 result = KERN_NO_SPACE;
2664 goto BailOut;
2665 }
2666 start = entry->vme_start;
2667 } else {
2668 start = entry->vme_end;
2669 }
2670
2671 start = vm_map_round_page(start,
2672 VM_MAP_PAGE_MASK(map));
2673 }
2674
2675 if (map->holelistenabled) {
2676 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2677 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2678 }
2679 }
2680
2681 *address = start;
2682 assert(VM_MAP_PAGE_ALIGNED(*address,
2683 VM_MAP_PAGE_MASK(map)));
2684 } else {
2685 /*
2686 * Verify that:
2687 * the address doesn't itself violate
2688 * the mask requirement.
2689 */
2690
2691 vm_map_lock(map);
2692 map_locked = TRUE;
2693 if ((start & mask) != 0) {
2694 RETURN(KERN_NO_SPACE);
2695 }
2696
2697 /*
2698 * ... the address is within bounds
2699 */
2700
2701 end = start + size;
2702
2703 if ((start < effective_min_offset) ||
2704 (end > effective_max_offset) ||
2705 (start >= end)) {
2706 RETURN(KERN_INVALID_ADDRESS);
2707 }
2708
2709 if (overwrite && zap_old_map != VM_MAP_NULL) {
2710 int remove_flags;
2711 /*
2712 * Fixed mapping and "overwrite" flag: attempt to
2713 * remove all existing mappings in the specified
2714 * address range, saving them in our "zap_old_map".
2715 */
2716 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2717 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2718 if (vmk_flags.vmkf_overwrite_immutable) {
2719 /* we can overwrite immutable mappings */
2720 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2721 }
2722 (void) vm_map_delete(map, start, end,
2723 remove_flags,
2724 zap_old_map);
2725 }
2726
2727 /*
2728 * ... the starting address isn't allocated
2729 */
2730
2731 if (vm_map_lookup_entry(map, start, &entry)) {
2732 if (!(vmk_flags.vmkf_already)) {
2733 RETURN(KERN_NO_SPACE);
2734 }
2735 /*
2736 * Check if what's already there is what we want.
2737 */
2738 tmp_start = start;
2739 tmp_offset = offset;
2740 if (entry->vme_start < start) {
2741 tmp_start -= start - entry->vme_start;
2742 tmp_offset -= start - entry->vme_start;
2743 }
2744 for (; entry->vme_start < end;
2745 entry = entry->vme_next) {
2746 /*
2747 * Check if the mapping's attributes
2748 * match the existing map entry.
2749 */
2750 if (entry == vm_map_to_entry(map) ||
2751 entry->vme_start != tmp_start ||
2752 entry->is_sub_map != is_submap ||
2753 VME_OFFSET(entry) != tmp_offset ||
2754 entry->needs_copy != needs_copy ||
2755 entry->protection != cur_protection ||
2756 entry->max_protection != max_protection ||
2757 entry->inheritance != inheritance ||
2758 entry->iokit_acct != iokit_acct ||
2759 VME_ALIAS(entry) != alias) {
2760 /* not the same mapping ! */
2761 RETURN(KERN_NO_SPACE);
2762 }
2763 /*
2764 * Check if the same object is being mapped.
2765 */
2766 if (is_submap) {
2767 if (VME_SUBMAP(entry) !=
2768 (vm_map_t) object) {
2769 /* not the same submap */
2770 RETURN(KERN_NO_SPACE);
2771 }
2772 } else {
2773 if (VME_OBJECT(entry) != object) {
2774 /* not the same VM object... */
2775 vm_object_t obj2;
2776
2777 obj2 = VME_OBJECT(entry);
2778 if ((obj2 == VM_OBJECT_NULL ||
2779 obj2->internal) &&
2780 (object == VM_OBJECT_NULL ||
2781 object->internal)) {
2782 /*
2783 * ... but both are
2784 * anonymous memory,
2785 * so equivalent.
2786 */
2787 } else {
2788 RETURN(KERN_NO_SPACE);
2789 }
2790 }
2791 }
2792
2793 tmp_offset += entry->vme_end - entry->vme_start;
2794 tmp_start += entry->vme_end - entry->vme_start;
2795 if (entry->vme_end >= end) {
2796 /* reached the end of our mapping */
2797 break;
2798 }
2799 }
2800 /* it all matches: let's use what's already there ! */
2801 RETURN(KERN_MEMORY_PRESENT);
2802 }
2803
2804 /*
2805 * ... the next region doesn't overlap the
2806 * end point.
2807 */
2808
2809 if ((entry->vme_next != vm_map_to_entry(map)) &&
2810 (entry->vme_next->vme_start < end)) {
2811 RETURN(KERN_NO_SPACE);
2812 }
2813 }
2814
2815 /*
2816 * At this point,
2817 * "start" and "end" should define the endpoints of the
2818 * available new range, and
2819 * "entry" should refer to the region before the new
2820 * range, and
2821 *
2822 * the map should be locked.
2823 */
2824
2825 /*
2826 * See whether we can avoid creating a new entry (and object) by
2827 * extending one of our neighbors. [So far, we only attempt to
2828 * extend from below.] Note that we can never extend/join
2829 * purgable objects because they need to remain distinct
2830 * entities in order to implement their "volatile object"
2831 * semantics.
2832 */
2833
2834 if (purgable ||
2835 entry_for_jit ||
2836 vm_memory_malloc_no_cow(user_alias)) {
2837 if (object == VM_OBJECT_NULL) {
2838 object = vm_object_allocate(size);
2839 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2840 object->true_share = FALSE;
2841 if (purgable) {
2842 task_t owner;
2843 object->purgable = VM_PURGABLE_NONVOLATILE;
2844 if (map->pmap == kernel_pmap) {
2845 /*
2846 * Purgeable mappings made in a kernel
2847 * map are "owned" by the kernel itself
2848 * rather than the current user task
2849 * because they're likely to be used by
2850 * more than this user task (see
2851 * execargs_purgeable_allocate(), for
2852 * example).
2853 */
2854 owner = kernel_task;
2855 } else {
2856 owner = current_task();
2857 }
2858 assert(object->vo_owner == NULL);
2859 assert(object->resident_page_count == 0);
2860 assert(object->wired_page_count == 0);
2861 vm_object_lock(object);
2862 vm_purgeable_nonvolatile_enqueue(object, owner);
2863 vm_object_unlock(object);
2864 }
2865 offset = (vm_object_offset_t)0;
2866 }
2867 } else if ((is_submap == FALSE) &&
2868 (object == VM_OBJECT_NULL) &&
2869 (entry != vm_map_to_entry(map)) &&
2870 (entry->vme_end == start) &&
2871 (!entry->is_shared) &&
2872 (!entry->is_sub_map) &&
2873 (!entry->in_transition) &&
2874 (!entry->needs_wakeup) &&
2875 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2876 (entry->protection == cur_protection) &&
2877 (entry->max_protection == max_protection) &&
2878 (entry->inheritance == inheritance) &&
2879 ((user_alias == VM_MEMORY_REALLOC) ||
2880 (VME_ALIAS(entry) == alias)) &&
2881 (entry->no_cache == no_cache) &&
2882 (entry->permanent == permanent) &&
2883 /* no coalescing for immutable executable mappings */
2884 !((entry->protection & VM_PROT_EXECUTE) &&
2885 entry->permanent) &&
2886 (!entry->superpage_size && !superpage_size) &&
2887 /*
2888 * No coalescing if not map-aligned, to avoid propagating
2889 * that condition any further than needed:
2890 */
2891 (!entry->map_aligned || !clear_map_aligned) &&
2892 (!entry->zero_wired_pages) &&
2893 (!entry->used_for_jit && !entry_for_jit) &&
2894 (!entry->pmap_cs_associated) &&
2895 (entry->iokit_acct == iokit_acct) &&
2896 (!entry->vme_resilient_codesign) &&
2897 (!entry->vme_resilient_media) &&
2898 (!entry->vme_atomic) &&
2899 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2900
2901 ((entry->vme_end - entry->vme_start) + size <=
2902 (user_alias == VM_MEMORY_REALLOC ?
2903 ANON_CHUNK_SIZE :
2904 NO_COALESCE_LIMIT)) &&
2905
2906 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2907 if (vm_object_coalesce(VME_OBJECT(entry),
2908 VM_OBJECT_NULL,
2909 VME_OFFSET(entry),
2910 (vm_object_offset_t) 0,
2911 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2912 (vm_map_size_t)(end - entry->vme_end))) {
2913 /*
2914 * Coalesced the two objects - can extend
2915 * the previous map entry to include the
2916 * new range.
2917 */
2918 map->size += (end - entry->vme_end);
2919 assert(entry->vme_start < end);
2920 assert(VM_MAP_PAGE_ALIGNED(end,
2921 VM_MAP_PAGE_MASK(map)));
2922 if (__improbable(vm_debug_events)) {
2923 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2924 }
2925 entry->vme_end = end;
2926 if (map->holelistenabled) {
2927 vm_map_store_update_first_free(map, entry, TRUE);
2928 } else {
2929 vm_map_store_update_first_free(map, map->first_free, TRUE);
2930 }
2931 new_mapping_established = TRUE;
2932 RETURN(KERN_SUCCESS);
2933 }
2934 }
2935
2936 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2937 new_entry = NULL;
2938
2939 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2940 tmp2_end = tmp2_start + step;
2941 /*
2942 * Create a new entry
2943 *
2944 * XXX FBDP
2945 * The reserved "page zero" in each process's address space can
2946 * be arbitrarily large. Splitting it into separate objects and
2947 * therefore different VM map entries serves no purpose and just
2948 * slows down operations on the VM map, so let's not split the
2949 * allocation into chunks if the max protection is NONE. That
2950 * memory should never be accessible, so it will never get to the
2951 * default pager.
2952 */
2953 tmp_start = tmp2_start;
2954 if (object == VM_OBJECT_NULL &&
2955 size > chunk_size &&
2956 max_protection != VM_PROT_NONE &&
2957 superpage_size == 0) {
2958 tmp_end = tmp_start + chunk_size;
2959 } else {
2960 tmp_end = tmp2_end;
2961 }
2962 do {
2963 new_entry = vm_map_entry_insert(
2964 map, entry, tmp_start, tmp_end,
2965 object, offset, needs_copy,
2966 FALSE, FALSE,
2967 cur_protection, max_protection,
2968 VM_BEHAVIOR_DEFAULT,
2969 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2970 0,
2971 no_cache,
2972 permanent,
2973 no_copy_on_read,
2974 superpage_size,
2975 clear_map_aligned,
2976 is_submap,
2977 entry_for_jit,
2978 alias);
2979
2980 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2981
2982 if (resilient_codesign &&
2983 !((cur_protection | max_protection) &
2984 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2985 new_entry->vme_resilient_codesign = TRUE;
2986 }
2987
2988 if (resilient_media &&
2989 (object == VM_OBJECT_NULL ||
2990 object->internal)) {
2991 new_entry->vme_resilient_media = TRUE;
2992 }
2993
2994 assert(!new_entry->iokit_acct);
2995 if (!is_submap &&
2996 object != VM_OBJECT_NULL &&
2997 (object->purgable != VM_PURGABLE_DENY ||
2998 object->vo_ledger_tag)) {
2999 assert(new_entry->use_pmap);
3000 assert(!new_entry->iokit_acct);
3001 /*
3002 * Turn off pmap accounting since
3003 * purgeable (or tagged) objects have their
3004 * own ledgers.
3005 */
3006 new_entry->use_pmap = FALSE;
3007 } else if (!is_submap &&
3008 iokit_acct &&
3009 object != VM_OBJECT_NULL &&
3010 object->internal) {
3011 /* alternate accounting */
3012 assert(!new_entry->iokit_acct);
3013 assert(new_entry->use_pmap);
3014 new_entry->iokit_acct = TRUE;
3015 new_entry->use_pmap = FALSE;
3016 DTRACE_VM4(
3017 vm_map_iokit_mapped_region,
3018 vm_map_t, map,
3019 vm_map_offset_t, new_entry->vme_start,
3020 vm_map_offset_t, new_entry->vme_end,
3021 int, VME_ALIAS(new_entry));
3022 vm_map_iokit_mapped_region(
3023 map,
3024 (new_entry->vme_end -
3025 new_entry->vme_start));
3026 } else if (!is_submap) {
3027 assert(!new_entry->iokit_acct);
3028 assert(new_entry->use_pmap);
3029 }
3030
3031 if (is_submap) {
3032 vm_map_t submap;
3033 boolean_t submap_is_64bit;
3034 boolean_t use_pmap;
3035
3036 assert(new_entry->is_sub_map);
3037 assert(!new_entry->use_pmap);
3038 assert(!new_entry->iokit_acct);
3039 submap = (vm_map_t) object;
3040 submap_is_64bit = vm_map_is_64bit(submap);
3041 use_pmap = vmk_flags.vmkf_nested_pmap;
3042 #ifndef NO_NESTED_PMAP
3043 if (use_pmap && submap->pmap == NULL) {
3044 ledger_t ledger = map->pmap->ledger;
3045 /* we need a sub pmap to nest... */
3046 submap->pmap = pmap_create_options(ledger, 0,
3047 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3048 if (submap->pmap == NULL) {
3049 /* let's proceed without nesting... */
3050 }
3051 #if defined(__arm__) || defined(__arm64__)
3052 else {
3053 pmap_set_nested(submap->pmap);
3054 }
3055 #endif
3056 }
3057 if (use_pmap && submap->pmap != NULL) {
3058 kr = pmap_nest(map->pmap,
3059 submap->pmap,
3060 tmp_start,
3061 tmp_start,
3062 tmp_end - tmp_start);
3063 if (kr != KERN_SUCCESS) {
3064 printf("vm_map_enter: "
3065 "pmap_nest(0x%llx,0x%llx) "
3066 "error 0x%x\n",
3067 (long long)tmp_start,
3068 (long long)tmp_end,
3069 kr);
3070 } else {
3071 /* we're now nested ! */
3072 new_entry->use_pmap = TRUE;
3073 pmap_empty = FALSE;
3074 }
3075 }
3076 #endif /* NO_NESTED_PMAP */
3077 }
3078 entry = new_entry;
3079
3080 if (superpage_size) {
3081 vm_page_t pages, m;
3082 vm_object_t sp_object;
3083 vm_object_offset_t sp_offset;
3084
3085 VME_OFFSET_SET(entry, 0);
3086
3087 /* allocate one superpage */
3088 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3089 if (kr != KERN_SUCCESS) {
3090 /* deallocate whole range... */
3091 new_mapping_established = TRUE;
3092 /* ... but only up to "tmp_end" */
3093 size -= end - tmp_end;
3094 RETURN(kr);
3095 }
3096
3097 /* create one vm_object per superpage */
3098 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3099 sp_object->phys_contiguous = TRUE;
3100 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3101 VME_OBJECT_SET(entry, sp_object);
3102 assert(entry->use_pmap);
3103
3104 /* enter the base pages into the object */
3105 vm_object_lock(sp_object);
3106 for (sp_offset = 0;
3107 sp_offset < SUPERPAGE_SIZE;
3108 sp_offset += PAGE_SIZE) {
3109 m = pages;
3110 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3111 pages = NEXT_PAGE(m);
3112 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3113 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3114 }
3115 vm_object_unlock(sp_object);
3116 }
3117 } while (tmp_end != tmp2_end &&
3118 (tmp_start = tmp_end) &&
3119 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3120 tmp_end + chunk_size : tmp2_end));
3121 }
3122
3123 new_mapping_established = TRUE;
3124
3125 BailOut:
3126 assert(map_locked == TRUE);
3127
3128 if (result == KERN_SUCCESS) {
3129 vm_prot_t pager_prot;
3130 memory_object_t pager;
3131
3132 #if DEBUG
3133 if (pmap_empty &&
3134 !(vmk_flags.vmkf_no_pmap_check)) {
3135 assert(vm_map_pmap_is_empty(map,
3136 *address,
3137 *address + size));
3138 }
3139 #endif /* DEBUG */
3140
3141 /*
3142 * For "named" VM objects, let the pager know that the
3143 * memory object is being mapped. Some pagers need to keep
3144 * track of this, to know when they can reclaim the memory
3145 * object, for example.
3146 * VM calls memory_object_map() for each mapping (specifying
3147 * the protection of each mapping) and calls
3148 * memory_object_last_unmap() when all the mappings are gone.
3149 */
3150 pager_prot = max_protection;
3151 if (needs_copy) {
3152 /*
3153 * Copy-On-Write mapping: won't modify
3154 * the memory object.
3155 */
3156 pager_prot &= ~VM_PROT_WRITE;
3157 }
3158 if (!is_submap &&
3159 object != VM_OBJECT_NULL &&
3160 object->named &&
3161 object->pager != MEMORY_OBJECT_NULL) {
3162 vm_object_lock(object);
3163 pager = object->pager;
3164 if (object->named &&
3165 pager != MEMORY_OBJECT_NULL) {
3166 assert(object->pager_ready);
3167 vm_object_mapping_wait(object, THREAD_UNINT);
3168 vm_object_mapping_begin(object);
3169 vm_object_unlock(object);
3170
3171 kr = memory_object_map(pager, pager_prot);
3172 assert(kr == KERN_SUCCESS);
3173
3174 vm_object_lock(object);
3175 vm_object_mapping_end(object);
3176 }
3177 vm_object_unlock(object);
3178 }
3179 }
3180
3181 assert(map_locked == TRUE);
3182
3183 if (!keep_map_locked) {
3184 vm_map_unlock(map);
3185 map_locked = FALSE;
3186 }
3187
3188 /*
3189 * We can't hold the map lock if we enter this block.
3190 */
3191
3192 if (result == KERN_SUCCESS) {
3193 /* Wire down the new entry if the user
3194 * requested all new map entries be wired.
3195 */
3196 if ((map->wiring_required) || (superpage_size)) {
3197 assert(!keep_map_locked);
3198 pmap_empty = FALSE; /* pmap won't be empty */
3199 kr = vm_map_wire_kernel(map, start, end,
3200 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3201 TRUE);
3202 result = kr;
3203 }
3204
3205 }
3206
3207 if (result != KERN_SUCCESS) {
3208 if (new_mapping_established) {
3209 /*
3210 * We have to get rid of the new mappings since we
3211 * won't make them available to the user.
3212 * Try and do that atomically, to minimize the risk
3213 * that someone else create new mappings that range.
3214 */
3215 zap_new_map = vm_map_create(PMAP_NULL,
3216 *address,
3217 *address + size,
3218 map->hdr.entries_pageable);
3219 vm_map_set_page_shift(zap_new_map,
3220 VM_MAP_PAGE_SHIFT(map));
3221 vm_map_disable_hole_optimization(zap_new_map);
3222
3223 if (!map_locked) {
3224 vm_map_lock(map);
3225 map_locked = TRUE;
3226 }
3227 (void) vm_map_delete(map, *address, *address + size,
3228 (VM_MAP_REMOVE_SAVE_ENTRIES |
3229 VM_MAP_REMOVE_NO_MAP_ALIGN),
3230 zap_new_map);
3231 }
3232 if (zap_old_map != VM_MAP_NULL &&
3233 zap_old_map->hdr.nentries != 0) {
3234 vm_map_entry_t entry1, entry2;
3235
3236 /*
3237 * The new mapping failed. Attempt to restore
3238 * the old mappings, saved in the "zap_old_map".
3239 */
3240 if (!map_locked) {
3241 vm_map_lock(map);
3242 map_locked = TRUE;
3243 }
3244
3245 /* first check if the coast is still clear */
3246 start = vm_map_first_entry(zap_old_map)->vme_start;
3247 end = vm_map_last_entry(zap_old_map)->vme_end;
3248 if (vm_map_lookup_entry(map, start, &entry1) ||
3249 vm_map_lookup_entry(map, end, &entry2) ||
3250 entry1 != entry2) {
3251 /*
3252 * Part of that range has already been
3253 * re-mapped: we can't restore the old
3254 * mappings...
3255 */
3256 vm_map_enter_restore_failures++;
3257 } else {
3258 /*
3259 * Transfer the saved map entries from
3260 * "zap_old_map" to the original "map",
3261 * inserting them all after "entry1".
3262 */
3263 for (entry2 = vm_map_first_entry(zap_old_map);
3264 entry2 != vm_map_to_entry(zap_old_map);
3265 entry2 = vm_map_first_entry(zap_old_map)) {
3266 vm_map_size_t entry_size;
3267
3268 entry_size = (entry2->vme_end -
3269 entry2->vme_start);
3270 vm_map_store_entry_unlink(zap_old_map,
3271 entry2);
3272 zap_old_map->size -= entry_size;
3273 vm_map_store_entry_link(map, entry1, entry2,
3274 VM_MAP_KERNEL_FLAGS_NONE);
3275 map->size += entry_size;
3276 entry1 = entry2;
3277 }
3278 if (map->wiring_required) {
3279 /*
3280 * XXX TODO: we should rewire the
3281 * old pages here...
3282 */
3283 }
3284 vm_map_enter_restore_successes++;
3285 }
3286 }
3287 }
3288
3289 /*
3290 * The caller is responsible for releasing the lock if it requested to
3291 * keep the map locked.
3292 */
3293 if (map_locked && !keep_map_locked) {
3294 vm_map_unlock(map);
3295 }
3296
3297 /*
3298 * Get rid of the "zap_maps" and all the map entries that
3299 * they may still contain.
3300 */
3301 if (zap_old_map != VM_MAP_NULL) {
3302 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3303 zap_old_map = VM_MAP_NULL;
3304 }
3305 if (zap_new_map != VM_MAP_NULL) {
3306 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3307 zap_new_map = VM_MAP_NULL;
3308 }
3309
3310 return result;
3311
3312 #undef RETURN
3313 }
3314
3315 #if __arm64__
3316 extern const struct memory_object_pager_ops fourk_pager_ops;
3317 kern_return_t
3318 vm_map_enter_fourk(
3319 vm_map_t map,
3320 vm_map_offset_t *address, /* IN/OUT */
3321 vm_map_size_t size,
3322 vm_map_offset_t mask,
3323 int flags,
3324 vm_map_kernel_flags_t vmk_flags,
3325 vm_tag_t alias,
3326 vm_object_t object,
3327 vm_object_offset_t offset,
3328 boolean_t needs_copy,
3329 vm_prot_t cur_protection,
3330 vm_prot_t max_protection,
3331 vm_inherit_t inheritance)
3332 {
3333 vm_map_entry_t entry, new_entry;
3334 vm_map_offset_t start, fourk_start;
3335 vm_map_offset_t end, fourk_end;
3336 vm_map_size_t fourk_size;
3337 kern_return_t result = KERN_SUCCESS;
3338 vm_map_t zap_old_map = VM_MAP_NULL;
3339 vm_map_t zap_new_map = VM_MAP_NULL;
3340 boolean_t map_locked = FALSE;
3341 boolean_t pmap_empty = TRUE;
3342 boolean_t new_mapping_established = FALSE;
3343 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3344 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3345 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3346 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3347 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3348 boolean_t is_submap = vmk_flags.vmkf_submap;
3349 boolean_t permanent = vmk_flags.vmkf_permanent;
3350 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3351 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3352 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3353 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3354 vm_map_offset_t effective_min_offset, effective_max_offset;
3355 kern_return_t kr;
3356 boolean_t clear_map_aligned = FALSE;
3357 memory_object_t fourk_mem_obj;
3358 vm_object_t fourk_object;
3359 vm_map_offset_t fourk_pager_offset;
3360 int fourk_pager_index_start, fourk_pager_index_num;
3361 int cur_idx;
3362 boolean_t fourk_copy;
3363 vm_object_t copy_object;
3364 vm_object_offset_t copy_offset;
3365
3366 fourk_mem_obj = MEMORY_OBJECT_NULL;
3367 fourk_object = VM_OBJECT_NULL;
3368
3369 if (superpage_size) {
3370 return KERN_NOT_SUPPORTED;
3371 }
3372
3373 if ((cur_protection & VM_PROT_WRITE) &&
3374 (cur_protection & VM_PROT_EXECUTE) &&
3375 #if !CONFIG_EMBEDDED
3376 map != kernel_map &&
3377 cs_process_enforcement(NULL) &&
3378 #endif /* !CONFIG_EMBEDDED */
3379 !entry_for_jit) {
3380 DTRACE_VM3(cs_wx,
3381 uint64_t, 0,
3382 uint64_t, 0,
3383 vm_prot_t, cur_protection);
3384 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3385 "turning off execute\n",
3386 proc_selfpid(),
3387 (current_task()->bsd_info
3388 ? proc_name_address(current_task()->bsd_info)
3389 : "?"),
3390 __FUNCTION__);
3391 cur_protection &= ~VM_PROT_EXECUTE;
3392 }
3393
3394 /*
3395 * If the task has requested executable lockdown,
3396 * deny any new executable mapping.
3397 */
3398 if (map->map_disallow_new_exec == TRUE) {
3399 if (cur_protection & VM_PROT_EXECUTE) {
3400 return KERN_PROTECTION_FAILURE;
3401 }
3402 }
3403
3404 if (is_submap) {
3405 return KERN_NOT_SUPPORTED;
3406 }
3407 if (vmk_flags.vmkf_already) {
3408 return KERN_NOT_SUPPORTED;
3409 }
3410 if (purgable || entry_for_jit) {
3411 return KERN_NOT_SUPPORTED;
3412 }
3413
3414 effective_min_offset = map->min_offset;
3415
3416 if (vmk_flags.vmkf_beyond_max) {
3417 return KERN_NOT_SUPPORTED;
3418 } else {
3419 effective_max_offset = map->max_offset;
3420 }
3421
3422 if (size == 0 ||
3423 (offset & FOURK_PAGE_MASK) != 0) {
3424 *address = 0;
3425 return KERN_INVALID_ARGUMENT;
3426 }
3427
3428 #define RETURN(value) { result = value; goto BailOut; }
3429
3430 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3431 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3432
3433 if (!anywhere && overwrite) {
3434 return KERN_NOT_SUPPORTED;
3435 }
3436 if (!anywhere && overwrite) {
3437 /*
3438 * Create a temporary VM map to hold the old mappings in the
3439 * affected area while we create the new one.
3440 * This avoids releasing the VM map lock in
3441 * vm_map_entry_delete() and allows atomicity
3442 * when we want to replace some mappings with a new one.
3443 * It also allows us to restore the old VM mappings if the
3444 * new mapping fails.
3445 */
3446 zap_old_map = vm_map_create(PMAP_NULL,
3447 *address,
3448 *address + size,
3449 map->hdr.entries_pageable);
3450 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3451 vm_map_disable_hole_optimization(zap_old_map);
3452 }
3453
3454 fourk_start = *address;
3455 fourk_size = size;
3456 fourk_end = fourk_start + fourk_size;
3457
3458 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3459 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3460 size = end - start;
3461
3462 if (anywhere) {
3463 return KERN_NOT_SUPPORTED;
3464 } else {
3465 /*
3466 * Verify that:
3467 * the address doesn't itself violate
3468 * the mask requirement.
3469 */
3470
3471 vm_map_lock(map);
3472 map_locked = TRUE;
3473 if ((start & mask) != 0) {
3474 RETURN(KERN_NO_SPACE);
3475 }
3476
3477 /*
3478 * ... the address is within bounds
3479 */
3480
3481 end = start + size;
3482
3483 if ((start < effective_min_offset) ||
3484 (end > effective_max_offset) ||
3485 (start >= end)) {
3486 RETURN(KERN_INVALID_ADDRESS);
3487 }
3488
3489 if (overwrite && zap_old_map != VM_MAP_NULL) {
3490 /*
3491 * Fixed mapping and "overwrite" flag: attempt to
3492 * remove all existing mappings in the specified
3493 * address range, saving them in our "zap_old_map".
3494 */
3495 (void) vm_map_delete(map, start, end,
3496 (VM_MAP_REMOVE_SAVE_ENTRIES |
3497 VM_MAP_REMOVE_NO_MAP_ALIGN),
3498 zap_old_map);
3499 }
3500
3501 /*
3502 * ... the starting address isn't allocated
3503 */
3504 if (vm_map_lookup_entry(map, start, &entry)) {
3505 vm_object_t cur_object, shadow_object;
3506
3507 /*
3508 * We might already some 4K mappings
3509 * in a 16K page here.
3510 */
3511
3512 if (entry->vme_end - entry->vme_start
3513 != SIXTEENK_PAGE_SIZE) {
3514 RETURN(KERN_NO_SPACE);
3515 }
3516 if (entry->is_sub_map) {
3517 RETURN(KERN_NO_SPACE);
3518 }
3519 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3520 RETURN(KERN_NO_SPACE);
3521 }
3522
3523 /* go all the way down the shadow chain */
3524 cur_object = VME_OBJECT(entry);
3525 vm_object_lock(cur_object);
3526 while (cur_object->shadow != VM_OBJECT_NULL) {
3527 shadow_object = cur_object->shadow;
3528 vm_object_lock(shadow_object);
3529 vm_object_unlock(cur_object);
3530 cur_object = shadow_object;
3531 shadow_object = VM_OBJECT_NULL;
3532 }
3533 if (cur_object->internal ||
3534 cur_object->pager == NULL) {
3535 vm_object_unlock(cur_object);
3536 RETURN(KERN_NO_SPACE);
3537 }
3538 if (cur_object->pager->mo_pager_ops
3539 != &fourk_pager_ops) {
3540 vm_object_unlock(cur_object);
3541 RETURN(KERN_NO_SPACE);
3542 }
3543 fourk_object = cur_object;
3544 fourk_mem_obj = fourk_object->pager;
3545
3546 /* keep the "4K" object alive */
3547 vm_object_reference_locked(fourk_object);
3548 vm_object_unlock(fourk_object);
3549
3550 /* merge permissions */
3551 entry->protection |= cur_protection;
3552 entry->max_protection |= max_protection;
3553 if ((entry->protection & (VM_PROT_WRITE |
3554 VM_PROT_EXECUTE)) ==
3555 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3556 fourk_binary_compatibility_unsafe &&
3557 fourk_binary_compatibility_allow_wx) {
3558 /* write+execute: need to be "jit" */
3559 entry->used_for_jit = TRUE;
3560 }
3561
3562 goto map_in_fourk_pager;
3563 }
3564
3565 /*
3566 * ... the next region doesn't overlap the
3567 * end point.
3568 */
3569
3570 if ((entry->vme_next != vm_map_to_entry(map)) &&
3571 (entry->vme_next->vme_start < end)) {
3572 RETURN(KERN_NO_SPACE);
3573 }
3574 }
3575
3576 /*
3577 * At this point,
3578 * "start" and "end" should define the endpoints of the
3579 * available new range, and
3580 * "entry" should refer to the region before the new
3581 * range, and
3582 *
3583 * the map should be locked.
3584 */
3585
3586 /* create a new "4K" pager */
3587 fourk_mem_obj = fourk_pager_create();
3588 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3589 assert(fourk_object);
3590
3591 /* keep the "4" object alive */
3592 vm_object_reference(fourk_object);
3593
3594 /* create a "copy" object, to map the "4K" object copy-on-write */
3595 fourk_copy = TRUE;
3596 result = vm_object_copy_strategically(fourk_object,
3597 0,
3598 end - start,
3599 &copy_object,
3600 &copy_offset,
3601 &fourk_copy);
3602 assert(result == KERN_SUCCESS);
3603 assert(copy_object != VM_OBJECT_NULL);
3604 assert(copy_offset == 0);
3605
3606 /* take a reference on the copy object, for this mapping */
3607 vm_object_reference(copy_object);
3608
3609 /* map the "4K" pager's copy object */
3610 new_entry =
3611 vm_map_entry_insert(map, entry,
3612 vm_map_trunc_page(start,
3613 VM_MAP_PAGE_MASK(map)),
3614 vm_map_round_page(end,
3615 VM_MAP_PAGE_MASK(map)),
3616 copy_object,
3617 0, /* offset */
3618 FALSE, /* needs_copy */
3619 FALSE,
3620 FALSE,
3621 cur_protection, max_protection,
3622 VM_BEHAVIOR_DEFAULT,
3623 ((entry_for_jit)
3624 ? VM_INHERIT_NONE
3625 : inheritance),
3626 0,
3627 no_cache,
3628 permanent,
3629 no_copy_on_read,
3630 superpage_size,
3631 clear_map_aligned,
3632 is_submap,
3633 FALSE, /* jit */
3634 alias);
3635 entry = new_entry;
3636
3637 #if VM_MAP_DEBUG_FOURK
3638 if (vm_map_debug_fourk) {
3639 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3640 map,
3641 (uint64_t) entry->vme_start,
3642 (uint64_t) entry->vme_end,
3643 fourk_mem_obj);
3644 }
3645 #endif /* VM_MAP_DEBUG_FOURK */
3646
3647 new_mapping_established = TRUE;
3648
3649 map_in_fourk_pager:
3650 /* "map" the original "object" where it belongs in the "4K" pager */
3651 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3652 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3653 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3654 fourk_pager_index_num = 4;
3655 } else {
3656 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3657 }
3658 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3659 fourk_pager_index_num = 4 - fourk_pager_index_start;
3660 }
3661 for (cur_idx = 0;
3662 cur_idx < fourk_pager_index_num;
3663 cur_idx++) {
3664 vm_object_t old_object;
3665 vm_object_offset_t old_offset;
3666
3667 kr = fourk_pager_populate(fourk_mem_obj,
3668 TRUE, /* overwrite */
3669 fourk_pager_index_start + cur_idx,
3670 object,
3671 (object
3672 ? (offset +
3673 (cur_idx * FOURK_PAGE_SIZE))
3674 : 0),
3675 &old_object,
3676 &old_offset);
3677 #if VM_MAP_DEBUG_FOURK
3678 if (vm_map_debug_fourk) {
3679 if (old_object == (vm_object_t) -1 &&
3680 old_offset == (vm_object_offset_t) -1) {
3681 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3682 "pager [%p:0x%llx] "
3683 "populate[%d] "
3684 "[object:%p,offset:0x%llx]\n",
3685 map,
3686 (uint64_t) entry->vme_start,
3687 (uint64_t) entry->vme_end,
3688 fourk_mem_obj,
3689 VME_OFFSET(entry),
3690 fourk_pager_index_start + cur_idx,
3691 object,
3692 (object
3693 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3694 : 0));
3695 } else {
3696 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3697 "pager [%p:0x%llx] "
3698 "populate[%d] [object:%p,offset:0x%llx] "
3699 "old [%p:0x%llx]\n",
3700 map,
3701 (uint64_t) entry->vme_start,
3702 (uint64_t) entry->vme_end,
3703 fourk_mem_obj,
3704 VME_OFFSET(entry),
3705 fourk_pager_index_start + cur_idx,
3706 object,
3707 (object
3708 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3709 : 0),
3710 old_object,
3711 old_offset);
3712 }
3713 }
3714 #endif /* VM_MAP_DEBUG_FOURK */
3715
3716 assert(kr == KERN_SUCCESS);
3717 if (object != old_object &&
3718 object != VM_OBJECT_NULL &&
3719 object != (vm_object_t) -1) {
3720 vm_object_reference(object);
3721 }
3722 if (object != old_object &&
3723 old_object != VM_OBJECT_NULL &&
3724 old_object != (vm_object_t) -1) {
3725 vm_object_deallocate(old_object);
3726 }
3727 }
3728
3729 BailOut:
3730 assert(map_locked == TRUE);
3731
3732 if (fourk_object != VM_OBJECT_NULL) {
3733 vm_object_deallocate(fourk_object);
3734 fourk_object = VM_OBJECT_NULL;
3735 fourk_mem_obj = MEMORY_OBJECT_NULL;
3736 }
3737
3738 if (result == KERN_SUCCESS) {
3739 vm_prot_t pager_prot;
3740 memory_object_t pager;
3741
3742 #if DEBUG
3743 if (pmap_empty &&
3744 !(vmk_flags.vmkf_no_pmap_check)) {
3745 assert(vm_map_pmap_is_empty(map,
3746 *address,
3747 *address + size));
3748 }
3749 #endif /* DEBUG */
3750
3751 /*
3752 * For "named" VM objects, let the pager know that the
3753 * memory object is being mapped. Some pagers need to keep
3754 * track of this, to know when they can reclaim the memory
3755 * object, for example.
3756 * VM calls memory_object_map() for each mapping (specifying
3757 * the protection of each mapping) and calls
3758 * memory_object_last_unmap() when all the mappings are gone.
3759 */
3760 pager_prot = max_protection;
3761 if (needs_copy) {
3762 /*
3763 * Copy-On-Write mapping: won't modify
3764 * the memory object.
3765 */
3766 pager_prot &= ~VM_PROT_WRITE;
3767 }
3768 if (!is_submap &&
3769 object != VM_OBJECT_NULL &&
3770 object->named &&
3771 object->pager != MEMORY_OBJECT_NULL) {
3772 vm_object_lock(object);
3773 pager = object->pager;
3774 if (object->named &&
3775 pager != MEMORY_OBJECT_NULL) {
3776 assert(object->pager_ready);
3777 vm_object_mapping_wait(object, THREAD_UNINT);
3778 vm_object_mapping_begin(object);
3779 vm_object_unlock(object);
3780
3781 kr = memory_object_map(pager, pager_prot);
3782 assert(kr == KERN_SUCCESS);
3783
3784 vm_object_lock(object);
3785 vm_object_mapping_end(object);
3786 }
3787 vm_object_unlock(object);
3788 }
3789 if (!is_submap &&
3790 fourk_object != VM_OBJECT_NULL &&
3791 fourk_object->named &&
3792 fourk_object->pager != MEMORY_OBJECT_NULL) {
3793 vm_object_lock(fourk_object);
3794 pager = fourk_object->pager;
3795 if (fourk_object->named &&
3796 pager != MEMORY_OBJECT_NULL) {
3797 assert(fourk_object->pager_ready);
3798 vm_object_mapping_wait(fourk_object,
3799 THREAD_UNINT);
3800 vm_object_mapping_begin(fourk_object);
3801 vm_object_unlock(fourk_object);
3802
3803 kr = memory_object_map(pager, VM_PROT_READ);
3804 assert(kr == KERN_SUCCESS);
3805
3806 vm_object_lock(fourk_object);
3807 vm_object_mapping_end(fourk_object);
3808 }
3809 vm_object_unlock(fourk_object);
3810 }
3811 }
3812
3813 assert(map_locked == TRUE);
3814
3815 if (!keep_map_locked) {
3816 vm_map_unlock(map);
3817 map_locked = FALSE;
3818 }
3819
3820 /*
3821 * We can't hold the map lock if we enter this block.
3822 */
3823
3824 if (result == KERN_SUCCESS) {
3825 /* Wire down the new entry if the user
3826 * requested all new map entries be wired.
3827 */
3828 if ((map->wiring_required) || (superpage_size)) {
3829 assert(!keep_map_locked);
3830 pmap_empty = FALSE; /* pmap won't be empty */
3831 kr = vm_map_wire_kernel(map, start, end,
3832 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3833 TRUE);
3834 result = kr;
3835 }
3836
3837 }
3838
3839 if (result != KERN_SUCCESS) {
3840 if (new_mapping_established) {
3841 /*
3842 * We have to get rid of the new mappings since we
3843 * won't make them available to the user.
3844 * Try and do that atomically, to minimize the risk
3845 * that someone else create new mappings that range.
3846 */
3847 zap_new_map = vm_map_create(PMAP_NULL,
3848 *address,
3849 *address + size,
3850 map->hdr.entries_pageable);
3851 vm_map_set_page_shift(zap_new_map,
3852 VM_MAP_PAGE_SHIFT(map));
3853 vm_map_disable_hole_optimization(zap_new_map);
3854
3855 if (!map_locked) {
3856 vm_map_lock(map);
3857 map_locked = TRUE;
3858 }
3859 (void) vm_map_delete(map, *address, *address + size,
3860 (VM_MAP_REMOVE_SAVE_ENTRIES |
3861 VM_MAP_REMOVE_NO_MAP_ALIGN),
3862 zap_new_map);
3863 }
3864 if (zap_old_map != VM_MAP_NULL &&
3865 zap_old_map->hdr.nentries != 0) {
3866 vm_map_entry_t entry1, entry2;
3867
3868 /*
3869 * The new mapping failed. Attempt to restore
3870 * the old mappings, saved in the "zap_old_map".
3871 */
3872 if (!map_locked) {
3873 vm_map_lock(map);
3874 map_locked = TRUE;
3875 }
3876
3877 /* first check if the coast is still clear */
3878 start = vm_map_first_entry(zap_old_map)->vme_start;
3879 end = vm_map_last_entry(zap_old_map)->vme_end;
3880 if (vm_map_lookup_entry(map, start, &entry1) ||
3881 vm_map_lookup_entry(map, end, &entry2) ||
3882 entry1 != entry2) {
3883 /*
3884 * Part of that range has already been
3885 * re-mapped: we can't restore the old
3886 * mappings...
3887 */
3888 vm_map_enter_restore_failures++;
3889 } else {
3890 /*
3891 * Transfer the saved map entries from
3892 * "zap_old_map" to the original "map",
3893 * inserting them all after "entry1".
3894 */
3895 for (entry2 = vm_map_first_entry(zap_old_map);
3896 entry2 != vm_map_to_entry(zap_old_map);
3897 entry2 = vm_map_first_entry(zap_old_map)) {
3898 vm_map_size_t entry_size;
3899
3900 entry_size = (entry2->vme_end -
3901 entry2->vme_start);
3902 vm_map_store_entry_unlink(zap_old_map,
3903 entry2);
3904 zap_old_map->size -= entry_size;
3905 vm_map_store_entry_link(map, entry1, entry2,
3906 VM_MAP_KERNEL_FLAGS_NONE);
3907 map->size += entry_size;
3908 entry1 = entry2;
3909 }
3910 if (map->wiring_required) {
3911 /*
3912 * XXX TODO: we should rewire the
3913 * old pages here...
3914 */
3915 }
3916 vm_map_enter_restore_successes++;
3917 }
3918 }
3919 }
3920
3921 /*
3922 * The caller is responsible for releasing the lock if it requested to
3923 * keep the map locked.
3924 */
3925 if (map_locked && !keep_map_locked) {
3926 vm_map_unlock(map);
3927 }
3928
3929 /*
3930 * Get rid of the "zap_maps" and all the map entries that
3931 * they may still contain.
3932 */
3933 if (zap_old_map != VM_MAP_NULL) {
3934 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3935 zap_old_map = VM_MAP_NULL;
3936 }
3937 if (zap_new_map != VM_MAP_NULL) {
3938 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3939 zap_new_map = VM_MAP_NULL;
3940 }
3941
3942 return result;
3943
3944 #undef RETURN
3945 }
3946 #endif /* __arm64__ */
3947
3948 /*
3949 * Counters for the prefault optimization.
3950 */
3951 int64_t vm_prefault_nb_pages = 0;
3952 int64_t vm_prefault_nb_bailout = 0;
3953
3954 static kern_return_t
3955 vm_map_enter_mem_object_helper(
3956 vm_map_t target_map,
3957 vm_map_offset_t *address,
3958 vm_map_size_t initial_size,
3959 vm_map_offset_t mask,
3960 int flags,
3961 vm_map_kernel_flags_t vmk_flags,
3962 vm_tag_t tag,
3963 ipc_port_t port,
3964 vm_object_offset_t offset,
3965 boolean_t copy,
3966 vm_prot_t cur_protection,
3967 vm_prot_t max_protection,
3968 vm_inherit_t inheritance,
3969 upl_page_list_ptr_t page_list,
3970 unsigned int page_list_count)
3971 {
3972 vm_map_address_t map_addr;
3973 vm_map_size_t map_size;
3974 vm_object_t object;
3975 vm_object_size_t size;
3976 kern_return_t result;
3977 boolean_t mask_cur_protection, mask_max_protection;
3978 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3979 vm_map_offset_t offset_in_mapping = 0;
3980 #if __arm64__
3981 boolean_t fourk = vmk_flags.vmkf_fourk;
3982 #endif /* __arm64__ */
3983
3984 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3985
3986 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3987 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3988 cur_protection &= ~VM_PROT_IS_MASK;
3989 max_protection &= ~VM_PROT_IS_MASK;
3990
3991 /*
3992 * Check arguments for validity
3993 */
3994 if ((target_map == VM_MAP_NULL) ||
3995 (cur_protection & ~VM_PROT_ALL) ||
3996 (max_protection & ~VM_PROT_ALL) ||
3997 (inheritance > VM_INHERIT_LAST_VALID) ||
3998 (try_prefault && (copy || !page_list)) ||
3999 initial_size == 0) {
4000 return KERN_INVALID_ARGUMENT;
4001 }
4002
4003 #if __arm64__
4004 if (fourk) {
4005 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4006 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4007 } else
4008 #endif /* __arm64__ */
4009 {
4010 map_addr = vm_map_trunc_page(*address,
4011 VM_MAP_PAGE_MASK(target_map));
4012 map_size = vm_map_round_page(initial_size,
4013 VM_MAP_PAGE_MASK(target_map));
4014 }
4015 size = vm_object_round_page(initial_size);
4016
4017 /*
4018 * Find the vm object (if any) corresponding to this port.
4019 */
4020 if (!IP_VALID(port)) {
4021 object = VM_OBJECT_NULL;
4022 offset = 0;
4023 copy = FALSE;
4024 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4025 vm_named_entry_t named_entry;
4026
4027 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4028
4029 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4030 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4031 offset += named_entry->data_offset;
4032 }
4033
4034 /* a few checks to make sure user is obeying rules */
4035 if (size == 0) {
4036 if (offset >= named_entry->size) {
4037 return KERN_INVALID_RIGHT;
4038 }
4039 size = named_entry->size - offset;
4040 }
4041 if (mask_max_protection) {
4042 max_protection &= named_entry->protection;
4043 }
4044 if (mask_cur_protection) {
4045 cur_protection &= named_entry->protection;
4046 }
4047 if ((named_entry->protection & max_protection) !=
4048 max_protection) {
4049 return KERN_INVALID_RIGHT;
4050 }
4051 if ((named_entry->protection & cur_protection) !=
4052 cur_protection) {
4053 return KERN_INVALID_RIGHT;
4054 }
4055 if (offset + size < offset) {
4056 /* overflow */
4057 return KERN_INVALID_ARGUMENT;
4058 }
4059 if (named_entry->size < (offset + initial_size)) {
4060 return KERN_INVALID_ARGUMENT;
4061 }
4062
4063 if (named_entry->is_copy) {
4064 /* for a vm_map_copy, we can only map it whole */
4065 if ((size != named_entry->size) &&
4066 (vm_map_round_page(size,
4067 VM_MAP_PAGE_MASK(target_map)) ==
4068 named_entry->size)) {
4069 /* XXX FBDP use the rounded size... */
4070 size = vm_map_round_page(
4071 size,
4072 VM_MAP_PAGE_MASK(target_map));
4073 }
4074
4075 if (!(flags & VM_FLAGS_ANYWHERE) &&
4076 (offset != 0 ||
4077 size != named_entry->size)) {
4078 /*
4079 * XXX for a mapping at a "fixed" address,
4080 * we can't trim after mapping the whole
4081 * memory entry, so reject a request for a
4082 * partial mapping.
4083 */
4084 return KERN_INVALID_ARGUMENT;
4085 }
4086 }
4087
4088 /* the callers parameter offset is defined to be the */
4089 /* offset from beginning of named entry offset in object */
4090 offset = offset + named_entry->offset;
4091
4092 if (!VM_MAP_PAGE_ALIGNED(size,
4093 VM_MAP_PAGE_MASK(target_map))) {
4094 /*
4095 * Let's not map more than requested;
4096 * vm_map_enter() will handle this "not map-aligned"
4097 * case.
4098 */
4099 map_size = size;
4100 }
4101
4102 named_entry_lock(named_entry);
4103 if (named_entry->is_sub_map) {
4104 vm_map_t submap;
4105
4106 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4107 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4108 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4109 }
4110
4111 submap = named_entry->backing.map;
4112 vm_map_lock(submap);
4113 vm_map_reference(submap);
4114 vm_map_unlock(submap);
4115 named_entry_unlock(named_entry);
4116
4117 vmk_flags.vmkf_submap = TRUE;
4118
4119 result = vm_map_enter(target_map,
4120 &map_addr,
4121 map_size,
4122 mask,
4123 flags,
4124 vmk_flags,
4125 tag,
4126 (vm_object_t)(uintptr_t) submap,
4127 offset,
4128 copy,
4129 cur_protection,
4130 max_protection,
4131 inheritance);
4132 if (result != KERN_SUCCESS) {
4133 vm_map_deallocate(submap);
4134 } else {
4135 /*
4136 * No need to lock "submap" just to check its
4137 * "mapped" flag: that flag is never reset
4138 * once it's been set and if we race, we'll
4139 * just end up setting it twice, which is OK.
4140 */
4141 if (submap->mapped_in_other_pmaps == FALSE &&
4142 vm_map_pmap(submap) != PMAP_NULL &&
4143 vm_map_pmap(submap) !=
4144 vm_map_pmap(target_map)) {
4145 /*
4146 * This submap is being mapped in a map
4147 * that uses a different pmap.
4148 * Set its "mapped_in_other_pmaps" flag
4149 * to indicate that we now need to
4150 * remove mappings from all pmaps rather
4151 * than just the submap's pmap.
4152 */
4153 vm_map_lock(submap);
4154 submap->mapped_in_other_pmaps = TRUE;
4155 vm_map_unlock(submap);
4156 }
4157 *address = map_addr;
4158 }
4159 return result;
4160 } else if (named_entry->is_copy) {
4161 kern_return_t kr;
4162 vm_map_copy_t copy_map;
4163 vm_map_entry_t copy_entry;
4164 vm_map_offset_t copy_addr;
4165
4166 if (flags & ~(VM_FLAGS_FIXED |
4167 VM_FLAGS_ANYWHERE |
4168 VM_FLAGS_OVERWRITE |
4169 VM_FLAGS_RETURN_4K_DATA_ADDR |
4170 VM_FLAGS_RETURN_DATA_ADDR |
4171 VM_FLAGS_ALIAS_MASK)) {
4172 named_entry_unlock(named_entry);
4173 return KERN_INVALID_ARGUMENT;
4174 }
4175
4176 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4177 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4178 offset_in_mapping = offset - vm_object_trunc_page(offset);
4179 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4180 offset_in_mapping &= ~((signed)(0xFFF));
4181 }
4182 offset = vm_object_trunc_page(offset);
4183 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4184 }
4185
4186 copy_map = named_entry->backing.copy;
4187 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4188 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4189 /* unsupported type; should not happen */
4190 printf("vm_map_enter_mem_object: "
4191 "memory_entry->backing.copy "
4192 "unsupported type 0x%x\n",
4193 copy_map->type);
4194 named_entry_unlock(named_entry);
4195 return KERN_INVALID_ARGUMENT;
4196 }
4197
4198 /* reserve a contiguous range */
4199 kr = vm_map_enter(target_map,
4200 &map_addr,
4201 /* map whole mem entry, trim later: */
4202 named_entry->size,
4203 mask,
4204 flags & (VM_FLAGS_ANYWHERE |
4205 VM_FLAGS_OVERWRITE |
4206 VM_FLAGS_RETURN_4K_DATA_ADDR |
4207 VM_FLAGS_RETURN_DATA_ADDR),
4208 vmk_flags,
4209 tag,
4210 VM_OBJECT_NULL,
4211 0,
4212 FALSE, /* copy */
4213 cur_protection,
4214 max_protection,
4215 inheritance);
4216 if (kr != KERN_SUCCESS) {
4217 named_entry_unlock(named_entry);
4218 return kr;
4219 }
4220
4221 copy_addr = map_addr;
4222
4223 for (copy_entry = vm_map_copy_first_entry(copy_map);
4224 copy_entry != vm_map_copy_to_entry(copy_map);
4225 copy_entry = copy_entry->vme_next) {
4226 int remap_flags;
4227 vm_map_kernel_flags_t vmk_remap_flags;
4228 vm_map_t copy_submap;
4229 vm_object_t copy_object;
4230 vm_map_size_t copy_size;
4231 vm_object_offset_t copy_offset;
4232 int copy_vm_alias;
4233
4234 remap_flags = 0;
4235 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4236
4237 copy_object = VME_OBJECT(copy_entry);
4238 copy_offset = VME_OFFSET(copy_entry);
4239 copy_size = (copy_entry->vme_end -
4240 copy_entry->vme_start);
4241 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4242 if (copy_vm_alias == 0) {
4243 /*
4244 * Caller does not want a specific
4245 * alias for this new mapping: use
4246 * the alias of the original mapping.
4247 */
4248 copy_vm_alias = VME_ALIAS(copy_entry);
4249 }
4250
4251 /* sanity check */
4252 if ((copy_addr + copy_size) >
4253 (map_addr +
4254 named_entry->size /* XXX full size */)) {
4255 /* over-mapping too much !? */
4256 kr = KERN_INVALID_ARGUMENT;
4257 /* abort */
4258 break;
4259 }
4260
4261 /* take a reference on the object */
4262 if (copy_entry->is_sub_map) {
4263 vmk_remap_flags.vmkf_submap = TRUE;
4264 copy_submap = VME_SUBMAP(copy_entry);
4265 vm_map_lock(copy_submap);
4266 vm_map_reference(copy_submap);
4267 vm_map_unlock(copy_submap);
4268 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4269 } else if (!copy &&
4270 copy_object != VM_OBJECT_NULL &&
4271 (copy_entry->needs_copy ||
4272 copy_object->shadowed ||
4273 (!copy_object->true_share &&
4274 !copy_entry->is_shared &&
4275 copy_object->vo_size > copy_size))) {
4276 /*
4277 * We need to resolve our side of this
4278 * "symmetric" copy-on-write now; we
4279 * need a new object to map and share,
4280 * instead of the current one which
4281 * might still be shared with the
4282 * original mapping.
4283 *
4284 * Note: A "vm_map_copy_t" does not
4285 * have a lock but we're protected by
4286 * the named entry's lock here.
4287 */
4288 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4289 VME_OBJECT_SHADOW(copy_entry, copy_size);
4290 if (!copy_entry->needs_copy &&
4291 copy_entry->protection & VM_PROT_WRITE) {
4292 vm_prot_t prot;
4293
4294 prot = copy_entry->protection & ~VM_PROT_WRITE;
4295 vm_object_pmap_protect(copy_object,
4296 copy_offset,
4297 copy_size,
4298 PMAP_NULL,
4299 0,
4300 prot);
4301 }
4302
4303 copy_entry->needs_copy = FALSE;
4304 copy_entry->is_shared = TRUE;
4305 copy_object = VME_OBJECT(copy_entry);
4306 copy_offset = VME_OFFSET(copy_entry);
4307 vm_object_lock(copy_object);
4308 vm_object_reference_locked(copy_object);
4309 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4310 /* we're about to make a shared mapping of this object */
4311 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4312 copy_object->true_share = TRUE;
4313 }
4314 vm_object_unlock(copy_object);
4315 } else {
4316 /*
4317 * We already have the right object
4318 * to map.
4319 */
4320 copy_object = VME_OBJECT(copy_entry);
4321 vm_object_reference(copy_object);
4322 }
4323
4324 /* over-map the object into destination */
4325 remap_flags |= flags;
4326 remap_flags |= VM_FLAGS_FIXED;
4327 remap_flags |= VM_FLAGS_OVERWRITE;
4328 remap_flags &= ~VM_FLAGS_ANYWHERE;
4329 if (!copy && !copy_entry->is_sub_map) {
4330 /*
4331 * copy-on-write should have been
4332 * resolved at this point, or we would
4333 * end up sharing instead of copying.
4334 */
4335 assert(!copy_entry->needs_copy);
4336 }
4337 #if !CONFIG_EMBEDDED
4338 if (copy_entry->used_for_jit) {
4339 vmk_remap_flags.vmkf_map_jit = TRUE;
4340 }
4341 #endif /* !CONFIG_EMBEDDED */
4342 kr = vm_map_enter(target_map,
4343 &copy_addr,
4344 copy_size,
4345 (vm_map_offset_t) 0,
4346 remap_flags,
4347 vmk_remap_flags,
4348 copy_vm_alias,
4349 copy_object,
4350 copy_offset,
4351 ((copy_object == NULL) ? FALSE : copy),
4352 cur_protection,
4353 max_protection,
4354 inheritance);
4355 if (kr != KERN_SUCCESS) {
4356 if (copy_entry->is_sub_map) {
4357 vm_map_deallocate(copy_submap);
4358 } else {
4359 vm_object_deallocate(copy_object);
4360 }
4361 /* abort */
4362 break;
4363 }
4364
4365 /* next mapping */
4366 copy_addr += copy_size;
4367 }
4368
4369 if (kr == KERN_SUCCESS) {
4370 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4371 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4372 *address = map_addr + offset_in_mapping;
4373 } else {
4374 *address = map_addr;
4375 }
4376
4377 if (offset) {
4378 /*
4379 * Trim in front, from 0 to "offset".
4380 */
4381 vm_map_remove(target_map,
4382 map_addr,
4383 map_addr + offset,
4384 VM_MAP_REMOVE_NO_FLAGS);
4385 *address += offset;
4386 }
4387 if (offset + map_size < named_entry->size) {
4388 /*
4389 * Trim in back, from
4390 * "offset + map_size" to
4391 * "named_entry->size".
4392 */
4393 vm_map_remove(target_map,
4394 (map_addr +
4395 offset + map_size),
4396 (map_addr +
4397 named_entry->size),
4398 VM_MAP_REMOVE_NO_FLAGS);
4399 }
4400 }
4401 named_entry_unlock(named_entry);
4402
4403 if (kr != KERN_SUCCESS) {
4404 if (!(flags & VM_FLAGS_OVERWRITE)) {
4405 /* deallocate the contiguous range */
4406 (void) vm_deallocate(target_map,
4407 map_addr,
4408 map_size);
4409 }
4410 }
4411
4412 return kr;
4413 } else {
4414 unsigned int access;
4415 vm_prot_t protections;
4416 unsigned int wimg_mode;
4417
4418 /* we are mapping a VM object */
4419
4420 protections = named_entry->protection & VM_PROT_ALL;
4421 access = GET_MAP_MEM(named_entry->protection);
4422
4423 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4424 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4425 offset_in_mapping = offset - vm_object_trunc_page(offset);
4426 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4427 offset_in_mapping &= ~((signed)(0xFFF));
4428 }
4429 offset = vm_object_trunc_page(offset);
4430 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4431 }
4432
4433 object = named_entry->backing.object;
4434 assert(object != VM_OBJECT_NULL);
4435 vm_object_lock(object);
4436 named_entry_unlock(named_entry);
4437
4438 vm_object_reference_locked(object);
4439
4440 wimg_mode = object->wimg_bits;
4441 vm_prot_to_wimg(access, &wimg_mode);
4442 if (object->wimg_bits != wimg_mode) {
4443 vm_object_change_wimg_mode(object, wimg_mode);
4444 }
4445
4446 vm_object_unlock(object);
4447 }
4448 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4449 /*
4450 * JMM - This is temporary until we unify named entries
4451 * and raw memory objects.
4452 *
4453 * Detected fake ip_kotype for a memory object. In
4454 * this case, the port isn't really a port at all, but
4455 * instead is just a raw memory object.
4456 */
4457 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4458 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4459 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4460 }
4461
4462 object = memory_object_to_vm_object((memory_object_t)port);
4463 if (object == VM_OBJECT_NULL) {
4464 return KERN_INVALID_OBJECT;
4465 }
4466 vm_object_reference(object);
4467
4468 /* wait for object (if any) to be ready */
4469 if (object != VM_OBJECT_NULL) {
4470 if (object == kernel_object) {
4471 printf("Warning: Attempt to map kernel object"
4472 " by a non-private kernel entity\n");
4473 return KERN_INVALID_OBJECT;
4474 }
4475 if (!object->pager_ready) {
4476 vm_object_lock(object);
4477
4478 while (!object->pager_ready) {
4479 vm_object_wait(object,
4480 VM_OBJECT_EVENT_PAGER_READY,
4481 THREAD_UNINT);
4482 vm_object_lock(object);
4483 }
4484 vm_object_unlock(object);
4485 }
4486 }
4487 } else {
4488 return KERN_INVALID_OBJECT;
4489 }
4490
4491 if (object != VM_OBJECT_NULL &&
4492 object->named &&
4493 object->pager != MEMORY_OBJECT_NULL &&
4494 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4495 memory_object_t pager;
4496 vm_prot_t pager_prot;
4497 kern_return_t kr;
4498
4499 /*
4500 * For "named" VM objects, let the pager know that the
4501 * memory object is being mapped. Some pagers need to keep
4502 * track of this, to know when they can reclaim the memory
4503 * object, for example.
4504 * VM calls memory_object_map() for each mapping (specifying
4505 * the protection of each mapping) and calls
4506 * memory_object_last_unmap() when all the mappings are gone.
4507 */
4508 pager_prot = max_protection;
4509 if (copy) {
4510 /*
4511 * Copy-On-Write mapping: won't modify the
4512 * memory object.
4513 */
4514 pager_prot &= ~VM_PROT_WRITE;
4515 }
4516 vm_object_lock(object);
4517 pager = object->pager;
4518 if (object->named &&
4519 pager != MEMORY_OBJECT_NULL &&
4520 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4521 assert(object->pager_ready);
4522 vm_object_mapping_wait(object, THREAD_UNINT);
4523 vm_object_mapping_begin(object);
4524 vm_object_unlock(object);
4525
4526 kr = memory_object_map(pager, pager_prot);
4527 assert(kr == KERN_SUCCESS);
4528
4529 vm_object_lock(object);
4530 vm_object_mapping_end(object);
4531 }
4532 vm_object_unlock(object);
4533 }
4534
4535 /*
4536 * Perform the copy if requested
4537 */
4538
4539 if (copy) {
4540 vm_object_t new_object;
4541 vm_object_offset_t new_offset;
4542
4543 result = vm_object_copy_strategically(object, offset,
4544 map_size,
4545 &new_object, &new_offset,
4546 &copy);
4547
4548
4549 if (result == KERN_MEMORY_RESTART_COPY) {
4550 boolean_t success;
4551 boolean_t src_needs_copy;
4552
4553 /*
4554 * XXX
4555 * We currently ignore src_needs_copy.
4556 * This really is the issue of how to make
4557 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4558 * non-kernel users to use. Solution forthcoming.
4559 * In the meantime, since we don't allow non-kernel
4560 * memory managers to specify symmetric copy,
4561 * we won't run into problems here.
4562 */
4563 new_object = object;
4564 new_offset = offset;
4565 success = vm_object_copy_quickly(&new_object,
4566 new_offset,
4567 map_size,
4568 &src_needs_copy,
4569 &copy);
4570 assert(success);
4571 result = KERN_SUCCESS;
4572 }
4573 /*
4574 * Throw away the reference to the
4575 * original object, as it won't be mapped.
4576 */
4577
4578 vm_object_deallocate(object);
4579
4580 if (result != KERN_SUCCESS) {
4581 return result;
4582 }
4583
4584 object = new_object;
4585 offset = new_offset;
4586 }
4587
4588 /*
4589 * If non-kernel users want to try to prefault pages, the mapping and prefault
4590 * needs to be atomic.
4591 */
4592 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4593 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4594
4595 #if __arm64__
4596 if (fourk) {
4597 /* map this object in a "4K" pager */
4598 result = vm_map_enter_fourk(target_map,
4599 &map_addr,
4600 map_size,
4601 (vm_map_offset_t) mask,
4602 flags,
4603 vmk_flags,
4604 tag,
4605 object,
4606 offset,
4607 copy,
4608 cur_protection,
4609 max_protection,
4610 inheritance);
4611 } else
4612 #endif /* __arm64__ */
4613 {
4614 result = vm_map_enter(target_map,
4615 &map_addr, map_size,
4616 (vm_map_offset_t)mask,
4617 flags,
4618 vmk_flags,
4619 tag,
4620 object, offset,
4621 copy,
4622 cur_protection, max_protection,
4623 inheritance);
4624 }
4625 if (result != KERN_SUCCESS) {
4626 vm_object_deallocate(object);
4627 }
4628
4629 /*
4630 * Try to prefault, and do not forget to release the vm map lock.
4631 */
4632 if (result == KERN_SUCCESS && try_prefault) {
4633 mach_vm_address_t va = map_addr;
4634 kern_return_t kr = KERN_SUCCESS;
4635 unsigned int i = 0;
4636 int pmap_options;
4637
4638 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4639 if (object->internal) {
4640 pmap_options |= PMAP_OPTIONS_INTERNAL;
4641 }
4642
4643 for (i = 0; i < page_list_count; ++i) {
4644 if (!UPL_VALID_PAGE(page_list, i)) {
4645 if (kernel_prefault) {
4646 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4647 result = KERN_MEMORY_ERROR;
4648 break;
4649 }
4650 } else {
4651 /*
4652 * If this function call failed, we should stop
4653 * trying to optimize, other calls are likely
4654 * going to fail too.
4655 *
4656 * We are not gonna report an error for such
4657 * failure though. That's an optimization, not
4658 * something critical.
4659 */
4660 kr = pmap_enter_options(target_map->pmap,
4661 va, UPL_PHYS_PAGE(page_list, i),
4662 cur_protection, VM_PROT_NONE,
4663 0, TRUE, pmap_options, NULL);
4664 if (kr != KERN_SUCCESS) {
4665 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4666 if (kernel_prefault) {
4667 result = kr;
4668 }
4669 break;
4670 }
4671 OSIncrementAtomic64(&vm_prefault_nb_pages);
4672 }
4673
4674 /* Next virtual address */
4675 va += PAGE_SIZE;
4676 }
4677 if (vmk_flags.vmkf_keep_map_locked) {
4678 vm_map_unlock(target_map);
4679 }
4680 }
4681
4682 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4683 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4684 *address = map_addr + offset_in_mapping;
4685 } else {
4686 *address = map_addr;
4687 }
4688 return result;
4689 }
4690
4691 kern_return_t
4692 vm_map_enter_mem_object(
4693 vm_map_t target_map,
4694 vm_map_offset_t *address,
4695 vm_map_size_t initial_size,
4696 vm_map_offset_t mask,
4697 int flags,
4698 vm_map_kernel_flags_t vmk_flags,
4699 vm_tag_t tag,
4700 ipc_port_t port,
4701 vm_object_offset_t offset,
4702 boolean_t copy,
4703 vm_prot_t cur_protection,
4704 vm_prot_t max_protection,
4705 vm_inherit_t inheritance)
4706 {
4707 kern_return_t ret;
4708
4709 ret = vm_map_enter_mem_object_helper(target_map,
4710 address,
4711 initial_size,
4712 mask,
4713 flags,
4714 vmk_flags,
4715 tag,
4716 port,
4717 offset,
4718 copy,
4719 cur_protection,
4720 max_protection,
4721 inheritance,
4722 NULL,
4723 0);
4724
4725 #if KASAN
4726 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4727 kasan_notify_address(*address, initial_size);
4728 }
4729 #endif
4730
4731 return ret;
4732 }
4733
4734 kern_return_t
4735 vm_map_enter_mem_object_prefault(
4736 vm_map_t target_map,
4737 vm_map_offset_t *address,
4738 vm_map_size_t initial_size,
4739 vm_map_offset_t mask,
4740 int flags,
4741 vm_map_kernel_flags_t vmk_flags,
4742 vm_tag_t tag,
4743 ipc_port_t port,
4744 vm_object_offset_t offset,
4745 vm_prot_t cur_protection,
4746 vm_prot_t max_protection,
4747 upl_page_list_ptr_t page_list,
4748 unsigned int page_list_count)
4749 {
4750 kern_return_t ret;
4751
4752 ret = vm_map_enter_mem_object_helper(target_map,
4753 address,
4754 initial_size,
4755 mask,
4756 flags,
4757 vmk_flags,
4758 tag,
4759 port,
4760 offset,
4761 FALSE,
4762 cur_protection,
4763 max_protection,
4764 VM_INHERIT_DEFAULT,
4765 page_list,
4766 page_list_count);
4767
4768 #if KASAN
4769 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4770 kasan_notify_address(*address, initial_size);
4771 }
4772 #endif
4773
4774 return ret;
4775 }
4776
4777
4778 kern_return_t
4779 vm_map_enter_mem_object_control(
4780 vm_map_t target_map,
4781 vm_map_offset_t *address,
4782 vm_map_size_t initial_size,
4783 vm_map_offset_t mask,
4784 int flags,
4785 vm_map_kernel_flags_t vmk_flags,
4786 vm_tag_t tag,
4787 memory_object_control_t control,
4788 vm_object_offset_t offset,
4789 boolean_t copy,
4790 vm_prot_t cur_protection,
4791 vm_prot_t max_protection,
4792 vm_inherit_t inheritance)
4793 {
4794 vm_map_address_t map_addr;
4795 vm_map_size_t map_size;
4796 vm_object_t object;
4797 vm_object_size_t size;
4798 kern_return_t result;
4799 memory_object_t pager;
4800 vm_prot_t pager_prot;
4801 kern_return_t kr;
4802 #if __arm64__
4803 boolean_t fourk = vmk_flags.vmkf_fourk;
4804 #endif /* __arm64__ */
4805
4806 /*
4807 * Check arguments for validity
4808 */
4809 if ((target_map == VM_MAP_NULL) ||
4810 (cur_protection & ~VM_PROT_ALL) ||
4811 (max_protection & ~VM_PROT_ALL) ||
4812 (inheritance > VM_INHERIT_LAST_VALID) ||
4813 initial_size == 0) {
4814 return KERN_INVALID_ARGUMENT;
4815 }
4816
4817 #if __arm64__
4818 if (fourk) {
4819 map_addr = vm_map_trunc_page(*address,
4820 FOURK_PAGE_MASK);
4821 map_size = vm_map_round_page(initial_size,
4822 FOURK_PAGE_MASK);
4823 } else
4824 #endif /* __arm64__ */
4825 {
4826 map_addr = vm_map_trunc_page(*address,
4827 VM_MAP_PAGE_MASK(target_map));
4828 map_size = vm_map_round_page(initial_size,
4829 VM_MAP_PAGE_MASK(target_map));
4830 }
4831 size = vm_object_round_page(initial_size);
4832
4833 object = memory_object_control_to_vm_object(control);
4834
4835 if (object == VM_OBJECT_NULL) {
4836 return KERN_INVALID_OBJECT;
4837 }
4838
4839 if (object == kernel_object) {
4840 printf("Warning: Attempt to map kernel object"
4841 " by a non-private kernel entity\n");
4842 return KERN_INVALID_OBJECT;
4843 }
4844
4845 vm_object_lock(object);
4846 object->ref_count++;
4847 vm_object_res_reference(object);
4848
4849 /*
4850 * For "named" VM objects, let the pager know that the
4851 * memory object is being mapped. Some pagers need to keep
4852 * track of this, to know when they can reclaim the memory
4853 * object, for example.
4854 * VM calls memory_object_map() for each mapping (specifying
4855 * the protection of each mapping) and calls
4856 * memory_object_last_unmap() when all the mappings are gone.
4857 */
4858 pager_prot = max_protection;
4859 if (copy) {
4860 pager_prot &= ~VM_PROT_WRITE;
4861 }
4862 pager = object->pager;
4863 if (object->named &&
4864 pager != MEMORY_OBJECT_NULL &&
4865 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4866 assert(object->pager_ready);
4867 vm_object_mapping_wait(object, THREAD_UNINT);
4868 vm_object_mapping_begin(object);
4869 vm_object_unlock(object);
4870
4871 kr = memory_object_map(pager, pager_prot);
4872 assert(kr == KERN_SUCCESS);
4873
4874 vm_object_lock(object);
4875 vm_object_mapping_end(object);
4876 }
4877 vm_object_unlock(object);
4878
4879 /*
4880 * Perform the copy if requested
4881 */
4882
4883 if (copy) {
4884 vm_object_t new_object;
4885 vm_object_offset_t new_offset;
4886
4887 result = vm_object_copy_strategically(object, offset, size,
4888 &new_object, &new_offset,
4889 &copy);
4890
4891
4892 if (result == KERN_MEMORY_RESTART_COPY) {
4893 boolean_t success;
4894 boolean_t src_needs_copy;
4895
4896 /*
4897 * XXX
4898 * We currently ignore src_needs_copy.
4899 * This really is the issue of how to make
4900 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4901 * non-kernel users to use. Solution forthcoming.
4902 * In the meantime, since we don't allow non-kernel
4903 * memory managers to specify symmetric copy,
4904 * we won't run into problems here.
4905 */
4906 new_object = object;
4907 new_offset = offset;
4908 success = vm_object_copy_quickly(&new_object,
4909 new_offset, size,
4910 &src_needs_copy,
4911 &copy);
4912 assert(success);
4913 result = KERN_SUCCESS;
4914 }
4915 /*
4916 * Throw away the reference to the
4917 * original object, as it won't be mapped.
4918 */
4919
4920 vm_object_deallocate(object);
4921
4922 if (result != KERN_SUCCESS) {
4923 return result;
4924 }
4925
4926 object = new_object;
4927 offset = new_offset;
4928 }
4929
4930 #if __arm64__
4931 if (fourk) {
4932 result = vm_map_enter_fourk(target_map,
4933 &map_addr,
4934 map_size,
4935 (vm_map_offset_t)mask,
4936 flags,
4937 vmk_flags,
4938 tag,
4939 object, offset,
4940 copy,
4941 cur_protection, max_protection,
4942 inheritance);
4943 } else
4944 #endif /* __arm64__ */
4945 {
4946 result = vm_map_enter(target_map,
4947 &map_addr, map_size,
4948 (vm_map_offset_t)mask,
4949 flags,
4950 vmk_flags,
4951 tag,
4952 object, offset,
4953 copy,
4954 cur_protection, max_protection,
4955 inheritance);
4956 }
4957 if (result != KERN_SUCCESS) {
4958 vm_object_deallocate(object);
4959 }
4960 *address = map_addr;
4961
4962 return result;
4963 }
4964
4965
4966 #if VM_CPM
4967
4968 #ifdef MACH_ASSERT
4969 extern pmap_paddr_t avail_start, avail_end;
4970 #endif
4971
4972 /*
4973 * Allocate memory in the specified map, with the caveat that
4974 * the memory is physically contiguous. This call may fail
4975 * if the system can't find sufficient contiguous memory.
4976 * This call may cause or lead to heart-stopping amounts of
4977 * paging activity.
4978 *
4979 * Memory obtained from this call should be freed in the
4980 * normal way, viz., via vm_deallocate.
4981 */
4982 kern_return_t
4983 vm_map_enter_cpm(
4984 vm_map_t map,
4985 vm_map_offset_t *addr,
4986 vm_map_size_t size,
4987 int flags)
4988 {
4989 vm_object_t cpm_obj;
4990 pmap_t pmap;
4991 vm_page_t m, pages;
4992 kern_return_t kr;
4993 vm_map_offset_t va, start, end, offset;
4994 #if MACH_ASSERT
4995 vm_map_offset_t prev_addr = 0;
4996 #endif /* MACH_ASSERT */
4997
4998 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4999 vm_tag_t tag;
5000
5001 VM_GET_FLAGS_ALIAS(flags, tag);
5002
5003 if (size == 0) {
5004 *addr = 0;
5005 return KERN_SUCCESS;
5006 }
5007 if (anywhere) {
5008 *addr = vm_map_min(map);
5009 } else {
5010 *addr = vm_map_trunc_page(*addr,
5011 VM_MAP_PAGE_MASK(map));
5012 }
5013 size = vm_map_round_page(size,
5014 VM_MAP_PAGE_MASK(map));
5015
5016 /*
5017 * LP64todo - cpm_allocate should probably allow
5018 * allocations of >4GB, but not with the current
5019 * algorithm, so just cast down the size for now.
5020 */
5021 if (size > VM_MAX_ADDRESS) {
5022 return KERN_RESOURCE_SHORTAGE;
5023 }
5024 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5025 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5026 return kr;
5027 }
5028
5029 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5030 assert(cpm_obj != VM_OBJECT_NULL);
5031 assert(cpm_obj->internal);
5032 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5033 assert(cpm_obj->can_persist == FALSE);
5034 assert(cpm_obj->pager_created == FALSE);
5035 assert(cpm_obj->pageout == FALSE);
5036 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5037
5038 /*
5039 * Insert pages into object.
5040 */
5041
5042 vm_object_lock(cpm_obj);
5043 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5044 m = pages;
5045 pages = NEXT_PAGE(m);
5046 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5047
5048 assert(!m->vmp_gobbled);
5049 assert(!m->vmp_wanted);
5050 assert(!m->vmp_pageout);
5051 assert(!m->vmp_tabled);
5052 assert(VM_PAGE_WIRED(m));
5053 assert(m->vmp_busy);
5054 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5055
5056 m->vmp_busy = FALSE;
5057 vm_page_insert(m, cpm_obj, offset);
5058 }
5059 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5060 vm_object_unlock(cpm_obj);
5061
5062 /*
5063 * Hang onto a reference on the object in case a
5064 * multi-threaded application for some reason decides
5065 * to deallocate the portion of the address space into
5066 * which we will insert this object.
5067 *
5068 * Unfortunately, we must insert the object now before
5069 * we can talk to the pmap module about which addresses
5070 * must be wired down. Hence, the race with a multi-
5071 * threaded app.
5072 */
5073 vm_object_reference(cpm_obj);
5074
5075 /*
5076 * Insert object into map.
5077 */
5078
5079 kr = vm_map_enter(
5080 map,
5081 addr,
5082 size,
5083 (vm_map_offset_t)0,
5084 flags,
5085 VM_MAP_KERNEL_FLAGS_NONE,
5086 cpm_obj,
5087 (vm_object_offset_t)0,
5088 FALSE,
5089 VM_PROT_ALL,
5090 VM_PROT_ALL,
5091 VM_INHERIT_DEFAULT);
5092
5093 if (kr != KERN_SUCCESS) {
5094 /*
5095 * A CPM object doesn't have can_persist set,
5096 * so all we have to do is deallocate it to
5097 * free up these pages.
5098 */
5099 assert(cpm_obj->pager_created == FALSE);
5100 assert(cpm_obj->can_persist == FALSE);
5101 assert(cpm_obj->pageout == FALSE);
5102 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5103 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5104 vm_object_deallocate(cpm_obj); /* kill creation ref */
5105 }
5106
5107 /*
5108 * Inform the physical mapping system that the
5109 * range of addresses may not fault, so that
5110 * page tables and such can be locked down as well.
5111 */
5112 start = *addr;
5113 end = start + size;
5114 pmap = vm_map_pmap(map);
5115 pmap_pageable(pmap, start, end, FALSE);
5116
5117 /*
5118 * Enter each page into the pmap, to avoid faults.
5119 * Note that this loop could be coded more efficiently,
5120 * if the need arose, rather than looking up each page
5121 * again.
5122 */
5123 for (offset = 0, va = start; offset < size;
5124 va += PAGE_SIZE, offset += PAGE_SIZE) {
5125 int type_of_fault;
5126
5127 vm_object_lock(cpm_obj);
5128 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5129 assert(m != VM_PAGE_NULL);
5130
5131 vm_page_zero_fill(m);
5132
5133 type_of_fault = DBG_ZERO_FILL_FAULT;
5134
5135 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5136 VM_PAGE_WIRED(m),
5137 FALSE, /* change_wiring */
5138 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5139 FALSE, /* no_cache */
5140 FALSE, /* cs_bypass */
5141 0, /* user_tag */
5142 0, /* pmap_options */
5143 NULL, /* need_retry */
5144 &type_of_fault);
5145
5146 vm_object_unlock(cpm_obj);
5147 }
5148
5149 #if MACH_ASSERT
5150 /*
5151 * Verify ordering in address space.
5152 */
5153 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5154 vm_object_lock(cpm_obj);
5155 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5156 vm_object_unlock(cpm_obj);
5157 if (m == VM_PAGE_NULL) {
5158 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5159 cpm_obj, (uint64_t)offset);
5160 }
5161 assert(m->vmp_tabled);
5162 assert(!m->vmp_busy);
5163 assert(!m->vmp_wanted);
5164 assert(!m->vmp_fictitious);
5165 assert(!m->vmp_private);
5166 assert(!m->vmp_absent);
5167 assert(!m->vmp_error);
5168 assert(!m->vmp_cleaning);
5169 assert(!m->vmp_laundry);
5170 assert(!m->vmp_precious);
5171 assert(!m->vmp_clustered);
5172 if (offset != 0) {
5173 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5174 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5175 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5176 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5177 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5178 panic("vm_allocate_cpm: pages not contig!");
5179 }
5180 }
5181 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5182 }
5183 #endif /* MACH_ASSERT */
5184
5185 vm_object_deallocate(cpm_obj); /* kill extra ref */
5186
5187 return kr;
5188 }
5189
5190
5191 #else /* VM_CPM */
5192
5193 /*
5194 * Interface is defined in all cases, but unless the kernel
5195 * is built explicitly for this option, the interface does
5196 * nothing.
5197 */
5198
5199 kern_return_t
5200 vm_map_enter_cpm(
5201 __unused vm_map_t map,
5202 __unused vm_map_offset_t *addr,
5203 __unused vm_map_size_t size,
5204 __unused int flags)
5205 {
5206 return KERN_FAILURE;
5207 }
5208 #endif /* VM_CPM */
5209
5210 /* Not used without nested pmaps */
5211 #ifndef NO_NESTED_PMAP
5212 /*
5213 * Clip and unnest a portion of a nested submap mapping.
5214 */
5215
5216
5217 static void
5218 vm_map_clip_unnest(
5219 vm_map_t map,
5220 vm_map_entry_t entry,
5221 vm_map_offset_t start_unnest,
5222 vm_map_offset_t end_unnest)
5223 {
5224 vm_map_offset_t old_start_unnest = start_unnest;
5225 vm_map_offset_t old_end_unnest = end_unnest;
5226
5227 assert(entry->is_sub_map);
5228 assert(VME_SUBMAP(entry) != NULL);
5229 assert(entry->use_pmap);
5230
5231 /*
5232 * Query the platform for the optimal unnest range.
5233 * DRK: There's some duplication of effort here, since
5234 * callers may have adjusted the range to some extent. This
5235 * routine was introduced to support 1GiB subtree nesting
5236 * for x86 platforms, which can also nest on 2MiB boundaries
5237 * depending on size/alignment.
5238 */
5239 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5240 assert(VME_SUBMAP(entry)->is_nested_map);
5241 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5242 log_unnest_badness(map,
5243 old_start_unnest,
5244 old_end_unnest,
5245 VME_SUBMAP(entry)->is_nested_map,
5246 (entry->vme_start +
5247 VME_SUBMAP(entry)->lowest_unnestable_start -
5248 VME_OFFSET(entry)));
5249 }
5250
5251 if (entry->vme_start > start_unnest ||
5252 entry->vme_end < end_unnest) {
5253 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5254 "bad nested entry: start=0x%llx end=0x%llx\n",
5255 (long long)start_unnest, (long long)end_unnest,
5256 (long long)entry->vme_start, (long long)entry->vme_end);
5257 }
5258
5259 if (start_unnest > entry->vme_start) {
5260 _vm_map_clip_start(&map->hdr,
5261 entry,
5262 start_unnest);
5263 if (map->holelistenabled) {
5264 vm_map_store_update_first_free(map, NULL, FALSE);
5265 } else {
5266 vm_map_store_update_first_free(map, map->first_free, FALSE);
5267 }
5268 }
5269 if (entry->vme_end > end_unnest) {
5270 _vm_map_clip_end(&map->hdr,
5271 entry,
5272 end_unnest);
5273 if (map->holelistenabled) {
5274 vm_map_store_update_first_free(map, NULL, FALSE);
5275 } else {
5276 vm_map_store_update_first_free(map, map->first_free, FALSE);
5277 }
5278 }
5279
5280 pmap_unnest(map->pmap,
5281 entry->vme_start,
5282 entry->vme_end - entry->vme_start);
5283 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5284 /* clean up parent map/maps */
5285 vm_map_submap_pmap_clean(
5286 map, entry->vme_start,
5287 entry->vme_end,
5288 VME_SUBMAP(entry),
5289 VME_OFFSET(entry));
5290 }
5291 entry->use_pmap = FALSE;
5292 if ((map->pmap != kernel_pmap) &&
5293 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5294 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5295 }
5296 }
5297 #endif /* NO_NESTED_PMAP */
5298
5299 /*
5300 * vm_map_clip_start: [ internal use only ]
5301 *
5302 * Asserts that the given entry begins at or after
5303 * the specified address; if necessary,
5304 * it splits the entry into two.
5305 */
5306 void
5307 vm_map_clip_start(
5308 vm_map_t map,
5309 vm_map_entry_t entry,
5310 vm_map_offset_t startaddr)
5311 {
5312 #ifndef NO_NESTED_PMAP
5313 if (entry->is_sub_map &&
5314 entry->use_pmap &&
5315 startaddr >= entry->vme_start) {
5316 vm_map_offset_t start_unnest, end_unnest;
5317
5318 /*
5319 * Make sure "startaddr" is no longer in a nested range
5320 * before we clip. Unnest only the minimum range the platform
5321 * can handle.
5322 * vm_map_clip_unnest may perform additional adjustments to
5323 * the unnest range.
5324 */
5325 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5326 end_unnest = start_unnest + pmap_nesting_size_min;
5327 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5328 }
5329 #endif /* NO_NESTED_PMAP */
5330 if (startaddr > entry->vme_start) {
5331 if (VME_OBJECT(entry) &&
5332 !entry->is_sub_map &&
5333 VME_OBJECT(entry)->phys_contiguous) {
5334 pmap_remove(map->pmap,
5335 (addr64_t)(entry->vme_start),
5336 (addr64_t)(entry->vme_end));
5337 }
5338 if (entry->vme_atomic) {
5339 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5340 }
5341
5342 DTRACE_VM5(
5343 vm_map_clip_start,
5344 vm_map_t, map,
5345 vm_map_offset_t, entry->vme_start,
5346 vm_map_offset_t, entry->vme_end,
5347 vm_map_offset_t, startaddr,
5348 int, VME_ALIAS(entry));
5349
5350 _vm_map_clip_start(&map->hdr, entry, startaddr);
5351 if (map->holelistenabled) {
5352 vm_map_store_update_first_free(map, NULL, FALSE);
5353 } else {
5354 vm_map_store_update_first_free(map, map->first_free, FALSE);
5355 }
5356 }
5357 }
5358
5359
5360 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5361 MACRO_BEGIN \
5362 if ((startaddr) > (entry)->vme_start) \
5363 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5364 MACRO_END
5365
5366 /*
5367 * This routine is called only when it is known that
5368 * the entry must be split.
5369 */
5370 static void
5371 _vm_map_clip_start(
5372 struct vm_map_header *map_header,
5373 vm_map_entry_t entry,
5374 vm_map_offset_t start)
5375 {
5376 vm_map_entry_t new_entry;
5377
5378 /*
5379 * Split off the front portion --
5380 * note that we must insert the new
5381 * entry BEFORE this one, so that
5382 * this entry has the specified starting
5383 * address.
5384 */
5385
5386 if (entry->map_aligned) {
5387 assert(VM_MAP_PAGE_ALIGNED(start,
5388 VM_MAP_HDR_PAGE_MASK(map_header)));
5389 }
5390
5391 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5392 vm_map_entry_copy_full(new_entry, entry);
5393
5394 new_entry->vme_end = start;
5395 assert(new_entry->vme_start < new_entry->vme_end);
5396 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5397 assert(start < entry->vme_end);
5398 entry->vme_start = start;
5399
5400 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5401
5402 if (entry->is_sub_map) {
5403 vm_map_reference(VME_SUBMAP(new_entry));
5404 } else {
5405 vm_object_reference(VME_OBJECT(new_entry));
5406 }
5407 }
5408
5409
5410 /*
5411 * vm_map_clip_end: [ internal use only ]
5412 *
5413 * Asserts that the given entry ends at or before
5414 * the specified address; if necessary,
5415 * it splits the entry into two.
5416 */
5417 void
5418 vm_map_clip_end(
5419 vm_map_t map,
5420 vm_map_entry_t entry,
5421 vm_map_offset_t endaddr)
5422 {
5423 if (endaddr > entry->vme_end) {
5424 /*
5425 * Within the scope of this clipping, limit "endaddr" to
5426 * the end of this map entry...
5427 */
5428 endaddr = entry->vme_end;
5429 }
5430 #ifndef NO_NESTED_PMAP
5431 if (entry->is_sub_map && entry->use_pmap) {
5432 vm_map_offset_t start_unnest, end_unnest;
5433
5434 /*
5435 * Make sure the range between the start of this entry and
5436 * the new "endaddr" is no longer nested before we clip.
5437 * Unnest only the minimum range the platform can handle.
5438 * vm_map_clip_unnest may perform additional adjustments to
5439 * the unnest range.
5440 */
5441 start_unnest = entry->vme_start;
5442 end_unnest =
5443 (endaddr + pmap_nesting_size_min - 1) &
5444 ~(pmap_nesting_size_min - 1);
5445 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5446 }
5447 #endif /* NO_NESTED_PMAP */
5448 if (endaddr < entry->vme_end) {
5449 if (VME_OBJECT(entry) &&
5450 !entry->is_sub_map &&
5451 VME_OBJECT(entry)->phys_contiguous) {
5452 pmap_remove(map->pmap,
5453 (addr64_t)(entry->vme_start),
5454 (addr64_t)(entry->vme_end));
5455 }
5456 if (entry->vme_atomic) {
5457 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5458 }
5459 DTRACE_VM5(
5460 vm_map_clip_end,
5461 vm_map_t, map,
5462 vm_map_offset_t, entry->vme_start,
5463 vm_map_offset_t, entry->vme_end,
5464 vm_map_offset_t, endaddr,
5465 int, VME_ALIAS(entry));
5466
5467 _vm_map_clip_end(&map->hdr, entry, endaddr);
5468 if (map->holelistenabled) {
5469 vm_map_store_update_first_free(map, NULL, FALSE);
5470 } else {
5471 vm_map_store_update_first_free(map, map->first_free, FALSE);
5472 }
5473 }
5474 }
5475
5476
5477 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5478 MACRO_BEGIN \
5479 if ((endaddr) < (entry)->vme_end) \
5480 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5481 MACRO_END
5482
5483 /*
5484 * This routine is called only when it is known that
5485 * the entry must be split.
5486 */
5487 static void
5488 _vm_map_clip_end(
5489 struct vm_map_header *map_header,
5490 vm_map_entry_t entry,
5491 vm_map_offset_t end)
5492 {
5493 vm_map_entry_t new_entry;
5494
5495 /*
5496 * Create a new entry and insert it
5497 * AFTER the specified entry
5498 */
5499
5500 if (entry->map_aligned) {
5501 assert(VM_MAP_PAGE_ALIGNED(end,
5502 VM_MAP_HDR_PAGE_MASK(map_header)));
5503 }
5504
5505 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5506 vm_map_entry_copy_full(new_entry, entry);
5507
5508 assert(entry->vme_start < end);
5509 new_entry->vme_start = entry->vme_end = end;
5510 VME_OFFSET_SET(new_entry,
5511 VME_OFFSET(new_entry) + (end - entry->vme_start));
5512 assert(new_entry->vme_start < new_entry->vme_end);
5513
5514 _vm_map_store_entry_link(map_header, entry, new_entry);
5515
5516 if (entry->is_sub_map) {
5517 vm_map_reference(VME_SUBMAP(new_entry));
5518 } else {
5519 vm_object_reference(VME_OBJECT(new_entry));
5520 }
5521 }
5522
5523
5524 /*
5525 * VM_MAP_RANGE_CHECK: [ internal use only ]
5526 *
5527 * Asserts that the starting and ending region
5528 * addresses fall within the valid range of the map.
5529 */
5530 #define VM_MAP_RANGE_CHECK(map, start, end) \
5531 MACRO_BEGIN \
5532 if (start < vm_map_min(map)) \
5533 start = vm_map_min(map); \
5534 if (end > vm_map_max(map)) \
5535 end = vm_map_max(map); \
5536 if (start > end) \
5537 start = end; \
5538 MACRO_END
5539
5540 /*
5541 * vm_map_range_check: [ internal use only ]
5542 *
5543 * Check that the region defined by the specified start and
5544 * end addresses are wholly contained within a single map
5545 * entry or set of adjacent map entries of the spacified map,
5546 * i.e. the specified region contains no unmapped space.
5547 * If any or all of the region is unmapped, FALSE is returned.
5548 * Otherwise, TRUE is returned and if the output argument 'entry'
5549 * is not NULL it points to the map entry containing the start
5550 * of the region.
5551 *
5552 * The map is locked for reading on entry and is left locked.
5553 */
5554 static boolean_t
5555 vm_map_range_check(
5556 vm_map_t map,
5557 vm_map_offset_t start,
5558 vm_map_offset_t end,
5559 vm_map_entry_t *entry)
5560 {
5561 vm_map_entry_t cur;
5562 vm_map_offset_t prev;
5563
5564 /*
5565 * Basic sanity checks first
5566 */
5567 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5568 return FALSE;
5569 }
5570
5571 /*
5572 * Check first if the region starts within a valid
5573 * mapping for the map.
5574 */
5575 if (!vm_map_lookup_entry(map, start, &cur)) {
5576 return FALSE;
5577 }
5578
5579 /*
5580 * Optimize for the case that the region is contained
5581 * in a single map entry.
5582 */
5583 if (entry != (vm_map_entry_t *) NULL) {
5584 *entry = cur;
5585 }
5586 if (end <= cur->vme_end) {
5587 return TRUE;
5588 }
5589
5590 /*
5591 * If the region is not wholly contained within a
5592 * single entry, walk the entries looking for holes.
5593 */
5594 prev = cur->vme_end;
5595 cur = cur->vme_next;
5596 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5597 if (end <= cur->vme_end) {
5598 return TRUE;
5599 }
5600 prev = cur->vme_end;
5601 cur = cur->vme_next;
5602 }
5603 return FALSE;
5604 }
5605
5606 /*
5607 * vm_map_submap: [ kernel use only ]
5608 *
5609 * Mark the given range as handled by a subordinate map.
5610 *
5611 * This range must have been created with vm_map_find using
5612 * the vm_submap_object, and no other operations may have been
5613 * performed on this range prior to calling vm_map_submap.
5614 *
5615 * Only a limited number of operations can be performed
5616 * within this rage after calling vm_map_submap:
5617 * vm_fault
5618 * [Don't try vm_map_copyin!]
5619 *
5620 * To remove a submapping, one must first remove the
5621 * range from the superior map, and then destroy the
5622 * submap (if desired). [Better yet, don't try it.]
5623 */
5624 kern_return_t
5625 vm_map_submap(
5626 vm_map_t map,
5627 vm_map_offset_t start,
5628 vm_map_offset_t end,
5629 vm_map_t submap,
5630 vm_map_offset_t offset,
5631 #ifdef NO_NESTED_PMAP
5632 __unused
5633 #endif /* NO_NESTED_PMAP */
5634 boolean_t use_pmap)
5635 {
5636 vm_map_entry_t entry;
5637 kern_return_t result = KERN_INVALID_ARGUMENT;
5638 vm_object_t object;
5639
5640 vm_map_lock(map);
5641
5642 if (!vm_map_lookup_entry(map, start, &entry)) {
5643 entry = entry->vme_next;
5644 }
5645
5646 if (entry == vm_map_to_entry(map) ||
5647 entry->is_sub_map) {
5648 vm_map_unlock(map);
5649 return KERN_INVALID_ARGUMENT;
5650 }
5651
5652 vm_map_clip_start(map, entry, start);
5653 vm_map_clip_end(map, entry, end);
5654
5655 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5656 (!entry->is_sub_map) &&
5657 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5658 (object->resident_page_count == 0) &&
5659 (object->copy == VM_OBJECT_NULL) &&
5660 (object->shadow == VM_OBJECT_NULL) &&
5661 (!object->pager_created)) {
5662 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5663 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5664 vm_object_deallocate(object);
5665 entry->is_sub_map = TRUE;
5666 entry->use_pmap = FALSE;
5667 VME_SUBMAP_SET(entry, submap);
5668 vm_map_reference(submap);
5669 if (submap->mapped_in_other_pmaps == FALSE &&
5670 vm_map_pmap(submap) != PMAP_NULL &&
5671 vm_map_pmap(submap) != vm_map_pmap(map)) {
5672 /*
5673 * This submap is being mapped in a map
5674 * that uses a different pmap.
5675 * Set its "mapped_in_other_pmaps" flag
5676 * to indicate that we now need to
5677 * remove mappings from all pmaps rather
5678 * than just the submap's pmap.
5679 */
5680 submap->mapped_in_other_pmaps = TRUE;
5681 }
5682
5683 #ifndef NO_NESTED_PMAP
5684 if (use_pmap) {
5685 /* nest if platform code will allow */
5686 if (submap->pmap == NULL) {
5687 ledger_t ledger = map->pmap->ledger;
5688 submap->pmap = pmap_create_options(ledger,
5689 (vm_map_size_t) 0, 0);
5690 if (submap->pmap == PMAP_NULL) {
5691 vm_map_unlock(map);
5692 return KERN_NO_SPACE;
5693 }
5694 #if defined(__arm__) || defined(__arm64__)
5695 pmap_set_nested(submap->pmap);
5696 #endif
5697 }
5698 result = pmap_nest(map->pmap,
5699 (VME_SUBMAP(entry))->pmap,
5700 (addr64_t)start,
5701 (addr64_t)start,
5702 (uint64_t)(end - start));
5703 if (result) {
5704 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5705 }
5706 entry->use_pmap = TRUE;
5707 }
5708 #else /* NO_NESTED_PMAP */
5709 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5710 #endif /* NO_NESTED_PMAP */
5711 result = KERN_SUCCESS;
5712 }
5713 vm_map_unlock(map);
5714
5715 return result;
5716 }
5717
5718 /*
5719 * vm_map_protect:
5720 *
5721 * Sets the protection of the specified address
5722 * region in the target map. If "set_max" is
5723 * specified, the maximum protection is to be set;
5724 * otherwise, only the current protection is affected.
5725 */
5726 kern_return_t
5727 vm_map_protect(
5728 vm_map_t map,
5729 vm_map_offset_t start,
5730 vm_map_offset_t end,
5731 vm_prot_t new_prot,
5732 boolean_t set_max)
5733 {
5734 vm_map_entry_t current;
5735 vm_map_offset_t prev;
5736 vm_map_entry_t entry;
5737 vm_prot_t new_max;
5738 int pmap_options = 0;
5739 kern_return_t kr;
5740
5741 if (new_prot & VM_PROT_COPY) {
5742 vm_map_offset_t new_start;
5743 vm_prot_t cur_prot, max_prot;
5744 vm_map_kernel_flags_t kflags;
5745
5746 /* LP64todo - see below */
5747 if (start >= map->max_offset) {
5748 return KERN_INVALID_ADDRESS;
5749 }
5750
5751 #if VM_PROTECT_WX_FAIL
5752 if ((new_prot & VM_PROT_EXECUTE) &&
5753 map != kernel_map &&
5754 cs_process_enforcement(NULL)) {
5755 DTRACE_VM3(cs_wx,
5756 uint64_t, (uint64_t) start,
5757 uint64_t, (uint64_t) end,
5758 vm_prot_t, new_prot);
5759 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5760 proc_selfpid(),
5761 (current_task()->bsd_info
5762 ? proc_name_address(current_task()->bsd_info)
5763 : "?"),
5764 __FUNCTION__);
5765 return KERN_PROTECTION_FAILURE;
5766 }
5767 #endif /* VM_PROTECT_WX_FAIL */
5768
5769 /*
5770 * Let vm_map_remap_extract() know that it will need to:
5771 * + make a copy of the mapping
5772 * + add VM_PROT_WRITE to the max protections
5773 * + remove any protections that are no longer allowed from the
5774 * max protections (to avoid any WRITE/EXECUTE conflict, for
5775 * example).
5776 * Note that "max_prot" is an IN/OUT parameter only for this
5777 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5778 * only.
5779 */
5780 max_prot = new_prot & VM_PROT_ALL;
5781 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5782 kflags.vmkf_remap_prot_copy = TRUE;
5783 kflags.vmkf_overwrite_immutable = TRUE;
5784 new_start = start;
5785 kr = vm_map_remap(map,
5786 &new_start,
5787 end - start,
5788 0, /* mask */
5789 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5790 kflags,
5791 0,
5792 map,
5793 start,
5794 TRUE, /* copy-on-write remapping! */
5795 &cur_prot,
5796 &max_prot,
5797 VM_INHERIT_DEFAULT);
5798 if (kr != KERN_SUCCESS) {
5799 return kr;
5800 }
5801 new_prot &= ~VM_PROT_COPY;
5802 }
5803
5804 vm_map_lock(map);
5805
5806 /* LP64todo - remove this check when vm_map_commpage64()
5807 * no longer has to stuff in a map_entry for the commpage
5808 * above the map's max_offset.
5809 */
5810 if (start >= map->max_offset) {
5811 vm_map_unlock(map);
5812 return KERN_INVALID_ADDRESS;
5813 }
5814
5815 while (1) {
5816 /*
5817 * Lookup the entry. If it doesn't start in a valid
5818 * entry, return an error.
5819 */
5820 if (!vm_map_lookup_entry(map, start, &entry)) {
5821 vm_map_unlock(map);
5822 return KERN_INVALID_ADDRESS;
5823 }
5824
5825 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5826 start = SUPERPAGE_ROUND_DOWN(start);
5827 continue;
5828 }
5829 break;
5830 }
5831 if (entry->superpage_size) {
5832 end = SUPERPAGE_ROUND_UP(end);
5833 }
5834
5835 /*
5836 * Make a first pass to check for protection and address
5837 * violations.
5838 */
5839
5840 current = entry;
5841 prev = current->vme_start;
5842 while ((current != vm_map_to_entry(map)) &&
5843 (current->vme_start < end)) {
5844 /*
5845 * If there is a hole, return an error.
5846 */
5847 if (current->vme_start != prev) {
5848 vm_map_unlock(map);
5849 return KERN_INVALID_ADDRESS;
5850 }
5851
5852 new_max = current->max_protection;
5853 if ((new_prot & new_max) != new_prot) {
5854 vm_map_unlock(map);
5855 return KERN_PROTECTION_FAILURE;
5856 }
5857
5858 if ((new_prot & VM_PROT_WRITE) &&
5859 (new_prot & VM_PROT_EXECUTE) &&
5860 #if !CONFIG_EMBEDDED
5861 map != kernel_map &&
5862 cs_process_enforcement(NULL) &&
5863 #endif /* !CONFIG_EMBEDDED */
5864 !(current->used_for_jit)) {
5865 DTRACE_VM3(cs_wx,
5866 uint64_t, (uint64_t) current->vme_start,
5867 uint64_t, (uint64_t) current->vme_end,
5868 vm_prot_t, new_prot);
5869 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5870 proc_selfpid(),
5871 (current_task()->bsd_info
5872 ? proc_name_address(current_task()->bsd_info)
5873 : "?"),
5874 __FUNCTION__);
5875 new_prot &= ~VM_PROT_EXECUTE;
5876 #if VM_PROTECT_WX_FAIL
5877 vm_map_unlock(map);
5878 return KERN_PROTECTION_FAILURE;
5879 #endif /* VM_PROTECT_WX_FAIL */
5880 }
5881
5882 /*
5883 * If the task has requested executable lockdown,
5884 * deny both:
5885 * - adding executable protections OR
5886 * - adding write protections to an existing executable mapping.
5887 */
5888 if (map->map_disallow_new_exec == TRUE) {
5889 if ((new_prot & VM_PROT_EXECUTE) ||
5890 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5891 vm_map_unlock(map);
5892 return KERN_PROTECTION_FAILURE;
5893 }
5894 }
5895
5896 prev = current->vme_end;
5897 current = current->vme_next;
5898 }
5899
5900 #if __arm64__
5901 if (end > prev &&
5902 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5903 vm_map_entry_t prev_entry;
5904
5905 prev_entry = current->vme_prev;
5906 if (prev_entry != vm_map_to_entry(map) &&
5907 !prev_entry->map_aligned &&
5908 (vm_map_round_page(prev_entry->vme_end,
5909 VM_MAP_PAGE_MASK(map))
5910 == end)) {
5911 /*
5912 * The last entry in our range is not "map-aligned"
5913 * but it would have reached all the way to "end"
5914 * if it had been map-aligned, so this is not really
5915 * a hole in the range and we can proceed.
5916 */
5917 prev = end;
5918 }
5919 }
5920 #endif /* __arm64__ */
5921
5922 if (end > prev) {
5923 vm_map_unlock(map);
5924 return KERN_INVALID_ADDRESS;
5925 }
5926
5927 /*
5928 * Go back and fix up protections.
5929 * Clip to start here if the range starts within
5930 * the entry.
5931 */
5932
5933 current = entry;
5934 if (current != vm_map_to_entry(map)) {
5935 /* clip and unnest if necessary */
5936 vm_map_clip_start(map, current, start);
5937 }
5938
5939 while ((current != vm_map_to_entry(map)) &&
5940 (current->vme_start < end)) {
5941 vm_prot_t old_prot;
5942
5943 vm_map_clip_end(map, current, end);
5944
5945 if (current->is_sub_map) {
5946 /* clipping did unnest if needed */
5947 assert(!current->use_pmap);
5948 }
5949
5950 old_prot = current->protection;
5951
5952 if (set_max) {
5953 current->max_protection = new_prot;
5954 current->protection = new_prot & old_prot;
5955 } else {
5956 current->protection = new_prot;
5957 }
5958
5959 /*
5960 * Update physical map if necessary.
5961 * If the request is to turn off write protection,
5962 * we won't do it for real (in pmap). This is because
5963 * it would cause copy-on-write to fail. We've already
5964 * set, the new protection in the map, so if a
5965 * write-protect fault occurred, it will be fixed up
5966 * properly, COW or not.
5967 */
5968 if (current->protection != old_prot) {
5969 /* Look one level in we support nested pmaps */
5970 /* from mapped submaps which are direct entries */
5971 /* in our map */
5972
5973 vm_prot_t prot;
5974
5975 prot = current->protection;
5976 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5977 prot &= ~VM_PROT_WRITE;
5978 } else {
5979 assert(!VME_OBJECT(current)->code_signed);
5980 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5981 }
5982
5983 if (override_nx(map, VME_ALIAS(current)) && prot) {
5984 prot |= VM_PROT_EXECUTE;
5985 }
5986
5987 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5988 if (!(old_prot & VM_PROT_EXECUTE) &&
5989 (prot & VM_PROT_EXECUTE) &&
5990 panic_on_unsigned_execute &&
5991 (proc_selfcsflags() & CS_KILL)) {
5992 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5993 }
5994 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5995
5996 if (pmap_has_prot_policy(prot)) {
5997 if (current->wired_count) {
5998 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5999 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6000 }
6001
6002 /* If the pmap layer cares about this
6003 * protection type, force a fault for
6004 * each page so that vm_fault will
6005 * repopulate the page with the full
6006 * set of protections.
6007 */
6008 /*
6009 * TODO: We don't seem to need this,
6010 * but this is due to an internal
6011 * implementation detail of
6012 * pmap_protect. Do we want to rely
6013 * on this?
6014 */
6015 prot = VM_PROT_NONE;
6016 }
6017
6018 if (current->is_sub_map && current->use_pmap) {
6019 pmap_protect(VME_SUBMAP(current)->pmap,
6020 current->vme_start,
6021 current->vme_end,
6022 prot);
6023 } else {
6024 if (prot & VM_PROT_WRITE) {
6025 if (VME_OBJECT(current) == compressor_object) {
6026 /*
6027 * For write requests on the
6028 * compressor, we wil ask the
6029 * pmap layer to prevent us from
6030 * taking a write fault when we
6031 * attempt to access the mapping
6032 * next.
6033 */
6034 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6035 }
6036 }
6037
6038 pmap_protect_options(map->pmap,
6039 current->vme_start,
6040 current->vme_end,
6041 prot,
6042 pmap_options,
6043 NULL);
6044 }
6045 }
6046 current = current->vme_next;
6047 }
6048
6049 current = entry;
6050 while ((current != vm_map_to_entry(map)) &&
6051 (current->vme_start <= end)) {
6052 vm_map_simplify_entry(map, current);
6053 current = current->vme_next;
6054 }
6055
6056 vm_map_unlock(map);
6057 return KERN_SUCCESS;
6058 }
6059
6060 /*
6061 * vm_map_inherit:
6062 *
6063 * Sets the inheritance of the specified address
6064 * range in the target map. Inheritance
6065 * affects how the map will be shared with
6066 * child maps at the time of vm_map_fork.
6067 */
6068 kern_return_t
6069 vm_map_inherit(
6070 vm_map_t map,
6071 vm_map_offset_t start,
6072 vm_map_offset_t end,
6073 vm_inherit_t new_inheritance)
6074 {
6075 vm_map_entry_t entry;
6076 vm_map_entry_t temp_entry;
6077
6078 vm_map_lock(map);
6079
6080 VM_MAP_RANGE_CHECK(map, start, end);
6081
6082 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6083 entry = temp_entry;
6084 } else {
6085 temp_entry = temp_entry->vme_next;
6086 entry = temp_entry;
6087 }
6088
6089 /* first check entire range for submaps which can't support the */
6090 /* given inheritance. */
6091 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6092 if (entry->is_sub_map) {
6093 if (new_inheritance == VM_INHERIT_COPY) {
6094 vm_map_unlock(map);
6095 return KERN_INVALID_ARGUMENT;
6096 }
6097 }
6098
6099 entry = entry->vme_next;
6100 }
6101
6102 entry = temp_entry;
6103 if (entry != vm_map_to_entry(map)) {
6104 /* clip and unnest if necessary */
6105 vm_map_clip_start(map, entry, start);
6106 }
6107
6108 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6109 vm_map_clip_end(map, entry, end);
6110 if (entry->is_sub_map) {
6111 /* clip did unnest if needed */
6112 assert(!entry->use_pmap);
6113 }
6114
6115 entry->inheritance = new_inheritance;
6116
6117 entry = entry->vme_next;
6118 }
6119
6120 vm_map_unlock(map);
6121 return KERN_SUCCESS;
6122 }
6123
6124 /*
6125 * Update the accounting for the amount of wired memory in this map. If the user has
6126 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6127 */
6128
6129 static kern_return_t
6130 add_wire_counts(
6131 vm_map_t map,
6132 vm_map_entry_t entry,
6133 boolean_t user_wire)
6134 {
6135 vm_map_size_t size;
6136
6137 if (user_wire) {
6138 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6139
6140 /*
6141 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6142 * this map entry.
6143 */
6144
6145 if (entry->user_wired_count == 0) {
6146 size = entry->vme_end - entry->vme_start;
6147
6148 /*
6149 * Since this is the first time the user is wiring this map entry, check to see if we're
6150 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6151 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6152 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6153 * limit, then we fail.
6154 */
6155
6156 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6157 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6158 return KERN_RESOURCE_SHORTAGE;
6159 }
6160
6161 /*
6162 * The first time the user wires an entry, we also increment the wired_count and add this to
6163 * the total that has been wired in the map.
6164 */
6165
6166 if (entry->wired_count >= MAX_WIRE_COUNT) {
6167 return KERN_FAILURE;
6168 }
6169
6170 entry->wired_count++;
6171 map->user_wire_size += size;
6172 }
6173
6174 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6175 return KERN_FAILURE;
6176 }
6177
6178 entry->user_wired_count++;
6179 } else {
6180 /*
6181 * The kernel's wiring the memory. Just bump the count and continue.
6182 */
6183
6184 if (entry->wired_count >= MAX_WIRE_COUNT) {
6185 panic("vm_map_wire: too many wirings");
6186 }
6187
6188 entry->wired_count++;
6189 }
6190
6191 return KERN_SUCCESS;
6192 }
6193
6194 /*
6195 * Update the memory wiring accounting now that the given map entry is being unwired.
6196 */
6197
6198 static void
6199 subtract_wire_counts(
6200 vm_map_t map,
6201 vm_map_entry_t entry,
6202 boolean_t user_wire)
6203 {
6204 if (user_wire) {
6205 /*
6206 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6207 */
6208
6209 if (entry->user_wired_count == 1) {
6210 /*
6211 * We're removing the last user wire reference. Decrement the wired_count and the total
6212 * user wired memory for this map.
6213 */
6214
6215 assert(entry->wired_count >= 1);
6216 entry->wired_count--;
6217 map->user_wire_size -= entry->vme_end - entry->vme_start;
6218 }
6219
6220 assert(entry->user_wired_count >= 1);
6221 entry->user_wired_count--;
6222 } else {
6223 /*
6224 * The kernel is unwiring the memory. Just update the count.
6225 */
6226
6227 assert(entry->wired_count >= 1);
6228 entry->wired_count--;
6229 }
6230 }
6231
6232 int cs_executable_wire = 0;
6233
6234 /*
6235 * vm_map_wire:
6236 *
6237 * Sets the pageability of the specified address range in the
6238 * target map as wired. Regions specified as not pageable require
6239 * locked-down physical memory and physical page maps. The
6240 * access_type variable indicates types of accesses that must not
6241 * generate page faults. This is checked against protection of
6242 * memory being locked-down.
6243 *
6244 * The map must not be locked, but a reference must remain to the
6245 * map throughout the call.
6246 */
6247 static kern_return_t
6248 vm_map_wire_nested(
6249 vm_map_t map,
6250 vm_map_offset_t start,
6251 vm_map_offset_t end,
6252 vm_prot_t caller_prot,
6253 vm_tag_t tag,
6254 boolean_t user_wire,
6255 pmap_t map_pmap,
6256 vm_map_offset_t pmap_addr,
6257 ppnum_t *physpage_p)
6258 {
6259 vm_map_entry_t entry;
6260 vm_prot_t access_type;
6261 struct vm_map_entry *first_entry, tmp_entry;
6262 vm_map_t real_map;
6263 vm_map_offset_t s, e;
6264 kern_return_t rc;
6265 boolean_t need_wakeup;
6266 boolean_t main_map = FALSE;
6267 wait_interrupt_t interruptible_state;
6268 thread_t cur_thread;
6269 unsigned int last_timestamp;
6270 vm_map_size_t size;
6271 boolean_t wire_and_extract;
6272
6273 access_type = (caller_prot & VM_PROT_ALL);
6274
6275 wire_and_extract = FALSE;
6276 if (physpage_p != NULL) {
6277 /*
6278 * The caller wants the physical page number of the
6279 * wired page. We return only one physical page number
6280 * so this works for only one page at a time.
6281 */
6282 if ((end - start) != PAGE_SIZE) {
6283 return KERN_INVALID_ARGUMENT;
6284 }
6285 wire_and_extract = TRUE;
6286 *physpage_p = 0;
6287 }
6288
6289 vm_map_lock(map);
6290 if (map_pmap == NULL) {
6291 main_map = TRUE;
6292 }
6293 last_timestamp = map->timestamp;
6294
6295 VM_MAP_RANGE_CHECK(map, start, end);
6296 assert(page_aligned(start));
6297 assert(page_aligned(end));
6298 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6299 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6300 if (start == end) {
6301 /* We wired what the caller asked for, zero pages */
6302 vm_map_unlock(map);
6303 return KERN_SUCCESS;
6304 }
6305
6306 need_wakeup = FALSE;
6307 cur_thread = current_thread();
6308
6309 s = start;
6310 rc = KERN_SUCCESS;
6311
6312 if (vm_map_lookup_entry(map, s, &first_entry)) {
6313 entry = first_entry;
6314 /*
6315 * vm_map_clip_start will be done later.
6316 * We don't want to unnest any nested submaps here !
6317 */
6318 } else {
6319 /* Start address is not in map */
6320 rc = KERN_INVALID_ADDRESS;
6321 goto done;
6322 }
6323
6324 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6325 /*
6326 * At this point, we have wired from "start" to "s".
6327 * We still need to wire from "s" to "end".
6328 *
6329 * "entry" hasn't been clipped, so it could start before "s"
6330 * and/or end after "end".
6331 */
6332
6333 /* "e" is how far we want to wire in this entry */
6334 e = entry->vme_end;
6335 if (e > end) {
6336 e = end;
6337 }
6338
6339 /*
6340 * If another thread is wiring/unwiring this entry then
6341 * block after informing other thread to wake us up.
6342 */
6343 if (entry->in_transition) {
6344 wait_result_t wait_result;
6345
6346 /*
6347 * We have not clipped the entry. Make sure that
6348 * the start address is in range so that the lookup
6349 * below will succeed.
6350 * "s" is the current starting point: we've already
6351 * wired from "start" to "s" and we still have
6352 * to wire from "s" to "end".
6353 */
6354
6355 entry->needs_wakeup = TRUE;
6356
6357 /*
6358 * wake up anybody waiting on entries that we have
6359 * already wired.
6360 */
6361 if (need_wakeup) {
6362 vm_map_entry_wakeup(map);
6363 need_wakeup = FALSE;
6364 }
6365 /*
6366 * User wiring is interruptible
6367 */
6368 wait_result = vm_map_entry_wait(map,
6369 (user_wire) ? THREAD_ABORTSAFE :
6370 THREAD_UNINT);
6371 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6372 /*
6373 * undo the wirings we have done so far
6374 * We do not clear the needs_wakeup flag,
6375 * because we cannot tell if we were the
6376 * only one waiting.
6377 */
6378 rc = KERN_FAILURE;
6379 goto done;
6380 }
6381
6382 /*
6383 * Cannot avoid a lookup here. reset timestamp.
6384 */
6385 last_timestamp = map->timestamp;
6386
6387 /*
6388 * The entry could have been clipped, look it up again.
6389 * Worse that can happen is, it may not exist anymore.
6390 */
6391 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6392 /*
6393 * User: undo everything upto the previous
6394 * entry. let vm_map_unwire worry about
6395 * checking the validity of the range.
6396 */
6397 rc = KERN_FAILURE;
6398 goto done;
6399 }
6400 entry = first_entry;
6401 continue;
6402 }
6403
6404 if (entry->is_sub_map) {
6405 vm_map_offset_t sub_start;
6406 vm_map_offset_t sub_end;
6407 vm_map_offset_t local_start;
6408 vm_map_offset_t local_end;
6409 pmap_t pmap;
6410
6411 if (wire_and_extract) {
6412 /*
6413 * Wiring would result in copy-on-write
6414 * which would not be compatible with
6415 * the sharing we have with the original
6416 * provider of this memory.
6417 */
6418 rc = KERN_INVALID_ARGUMENT;
6419 goto done;
6420 }
6421
6422 vm_map_clip_start(map, entry, s);
6423 vm_map_clip_end(map, entry, end);
6424
6425 sub_start = VME_OFFSET(entry);
6426 sub_end = entry->vme_end;
6427 sub_end += VME_OFFSET(entry) - entry->vme_start;
6428
6429 local_end = entry->vme_end;
6430 if (map_pmap == NULL) {
6431 vm_object_t object;
6432 vm_object_offset_t offset;
6433 vm_prot_t prot;
6434 boolean_t wired;
6435 vm_map_entry_t local_entry;
6436 vm_map_version_t version;
6437 vm_map_t lookup_map;
6438
6439 if (entry->use_pmap) {
6440 pmap = VME_SUBMAP(entry)->pmap;
6441 /* ppc implementation requires that */
6442 /* submaps pmap address ranges line */
6443 /* up with parent map */
6444 #ifdef notdef
6445 pmap_addr = sub_start;
6446 #endif
6447 pmap_addr = s;
6448 } else {
6449 pmap = map->pmap;
6450 pmap_addr = s;
6451 }
6452
6453 if (entry->wired_count) {
6454 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6455 goto done;
6456 }
6457
6458 /*
6459 * The map was not unlocked:
6460 * no need to goto re-lookup.
6461 * Just go directly to next entry.
6462 */
6463 entry = entry->vme_next;
6464 s = entry->vme_start;
6465 continue;
6466 }
6467
6468 /* call vm_map_lookup_locked to */
6469 /* cause any needs copy to be */
6470 /* evaluated */
6471 local_start = entry->vme_start;
6472 lookup_map = map;
6473 vm_map_lock_write_to_read(map);
6474 if (vm_map_lookup_locked(
6475 &lookup_map, local_start,
6476 access_type | VM_PROT_COPY,
6477 OBJECT_LOCK_EXCLUSIVE,
6478 &version, &object,
6479 &offset, &prot, &wired,
6480 NULL,
6481 &real_map)) {
6482 vm_map_unlock_read(lookup_map);
6483 assert(map_pmap == NULL);
6484 vm_map_unwire(map, start,
6485 s, user_wire);
6486 return KERN_FAILURE;
6487 }
6488 vm_object_unlock(object);
6489 if (real_map != lookup_map) {
6490 vm_map_unlock(real_map);
6491 }
6492 vm_map_unlock_read(lookup_map);
6493 vm_map_lock(map);
6494
6495 /* we unlocked, so must re-lookup */
6496 if (!vm_map_lookup_entry(map,
6497 local_start,
6498 &local_entry)) {
6499 rc = KERN_FAILURE;
6500 goto done;
6501 }
6502
6503 /*
6504 * entry could have been "simplified",
6505 * so re-clip
6506 */
6507 entry = local_entry;
6508 assert(s == local_start);
6509 vm_map_clip_start(map, entry, s);
6510 vm_map_clip_end(map, entry, end);
6511 /* re-compute "e" */
6512 e = entry->vme_end;
6513 if (e > end) {
6514 e = end;
6515 }
6516
6517 /* did we have a change of type? */
6518 if (!entry->is_sub_map) {
6519 last_timestamp = map->timestamp;
6520 continue;
6521 }
6522 } else {
6523 local_start = entry->vme_start;
6524 pmap = map_pmap;
6525 }
6526
6527 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6528 goto done;
6529 }
6530
6531 entry->in_transition = TRUE;
6532
6533 vm_map_unlock(map);
6534 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6535 sub_start, sub_end,
6536 caller_prot, tag,
6537 user_wire, pmap, pmap_addr,
6538 NULL);
6539 vm_map_lock(map);
6540
6541 /*
6542 * Find the entry again. It could have been clipped
6543 * after we unlocked the map.
6544 */
6545 if (!vm_map_lookup_entry(map, local_start,
6546 &first_entry)) {
6547 panic("vm_map_wire: re-lookup failed");
6548 }
6549 entry = first_entry;
6550
6551 assert(local_start == s);
6552 /* re-compute "e" */
6553 e = entry->vme_end;
6554 if (e > end) {
6555 e = end;
6556 }
6557
6558 last_timestamp = map->timestamp;
6559 while ((entry != vm_map_to_entry(map)) &&
6560 (entry->vme_start < e)) {
6561 assert(entry->in_transition);
6562 entry->in_transition = FALSE;
6563 if (entry->needs_wakeup) {
6564 entry->needs_wakeup = FALSE;
6565 need_wakeup = TRUE;
6566 }
6567 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6568 subtract_wire_counts(map, entry, user_wire);
6569 }
6570 entry = entry->vme_next;
6571 }
6572 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6573 goto done;
6574 }
6575
6576 /* no need to relookup again */
6577 s = entry->vme_start;
6578 continue;
6579 }
6580
6581 /*
6582 * If this entry is already wired then increment
6583 * the appropriate wire reference count.
6584 */
6585 if (entry->wired_count) {
6586 if ((entry->protection & access_type) != access_type) {
6587 /* found a protection problem */
6588
6589 /*
6590 * XXX FBDP
6591 * We should always return an error
6592 * in this case but since we didn't
6593 * enforce it before, let's do
6594 * it only for the new "wire_and_extract"
6595 * code path for now...
6596 */
6597 if (wire_and_extract) {
6598 rc = KERN_PROTECTION_FAILURE;
6599 goto done;
6600 }
6601 }
6602
6603 /*
6604 * entry is already wired down, get our reference
6605 * after clipping to our range.
6606 */
6607 vm_map_clip_start(map, entry, s);
6608 vm_map_clip_end(map, entry, end);
6609
6610 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6611 goto done;
6612 }
6613
6614 if (wire_and_extract) {
6615 vm_object_t object;
6616 vm_object_offset_t offset;
6617 vm_page_t m;
6618
6619 /*
6620 * We don't have to "wire" the page again
6621 * bit we still have to "extract" its
6622 * physical page number, after some sanity
6623 * checks.
6624 */
6625 assert((entry->vme_end - entry->vme_start)
6626 == PAGE_SIZE);
6627 assert(!entry->needs_copy);
6628 assert(!entry->is_sub_map);
6629 assert(VME_OBJECT(entry));
6630 if (((entry->vme_end - entry->vme_start)
6631 != PAGE_SIZE) ||
6632 entry->needs_copy ||
6633 entry->is_sub_map ||
6634 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6635 rc = KERN_INVALID_ARGUMENT;
6636 goto done;
6637 }
6638
6639 object = VME_OBJECT(entry);
6640 offset = VME_OFFSET(entry);
6641 /* need exclusive lock to update m->dirty */
6642 if (entry->protection & VM_PROT_WRITE) {
6643 vm_object_lock(object);
6644 } else {
6645 vm_object_lock_shared(object);
6646 }
6647 m = vm_page_lookup(object, offset);
6648 assert(m != VM_PAGE_NULL);
6649 assert(VM_PAGE_WIRED(m));
6650 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6651 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6652 if (entry->protection & VM_PROT_WRITE) {
6653 vm_object_lock_assert_exclusive(
6654 object);
6655 m->vmp_dirty = TRUE;
6656 }
6657 } else {
6658 /* not already wired !? */
6659 *physpage_p = 0;
6660 }
6661 vm_object_unlock(object);
6662 }
6663
6664 /* map was not unlocked: no need to relookup */
6665 entry = entry->vme_next;
6666 s = entry->vme_start;
6667 continue;
6668 }
6669
6670 /*
6671 * Unwired entry or wire request transmitted via submap
6672 */
6673
6674 /*
6675 * Wiring would copy the pages to the shadow object.
6676 * The shadow object would not be code-signed so
6677 * attempting to execute code from these copied pages
6678 * would trigger a code-signing violation.
6679 */
6680
6681 if ((entry->protection & VM_PROT_EXECUTE)
6682 #if !CONFIG_EMBEDDED
6683 &&
6684 map != kernel_map &&
6685 cs_process_enforcement(NULL)
6686 #endif /* !CONFIG_EMBEDDED */
6687 ) {
6688 #if MACH_ASSERT
6689 printf("pid %d[%s] wiring executable range from "
6690 "0x%llx to 0x%llx: rejected to preserve "
6691 "code-signing\n",
6692 proc_selfpid(),
6693 (current_task()->bsd_info
6694 ? proc_name_address(current_task()->bsd_info)
6695 : "?"),
6696 (uint64_t) entry->vme_start,
6697 (uint64_t) entry->vme_end);
6698 #endif /* MACH_ASSERT */
6699 DTRACE_VM2(cs_executable_wire,
6700 uint64_t, (uint64_t)entry->vme_start,
6701 uint64_t, (uint64_t)entry->vme_end);
6702 cs_executable_wire++;
6703 rc = KERN_PROTECTION_FAILURE;
6704 goto done;
6705 }
6706
6707 /*
6708 * Perform actions of vm_map_lookup that need the write
6709 * lock on the map: create a shadow object for a
6710 * copy-on-write region, or an object for a zero-fill
6711 * region.
6712 */
6713 size = entry->vme_end - entry->vme_start;
6714 /*
6715 * If wiring a copy-on-write page, we need to copy it now
6716 * even if we're only (currently) requesting read access.
6717 * This is aggressive, but once it's wired we can't move it.
6718 */
6719 if (entry->needs_copy) {
6720 if (wire_and_extract) {
6721 /*
6722 * We're supposed to share with the original
6723 * provider so should not be "needs_copy"
6724 */
6725 rc = KERN_INVALID_ARGUMENT;
6726 goto done;
6727 }
6728
6729 VME_OBJECT_SHADOW(entry, size);
6730 entry->needs_copy = FALSE;
6731 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6732 if (wire_and_extract) {
6733 /*
6734 * We're supposed to share with the original
6735 * provider so should already have an object.
6736 */
6737 rc = KERN_INVALID_ARGUMENT;
6738 goto done;
6739 }
6740 VME_OBJECT_SET(entry, vm_object_allocate(size));
6741 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6742 assert(entry->use_pmap);
6743 }
6744
6745 vm_map_clip_start(map, entry, s);
6746 vm_map_clip_end(map, entry, end);
6747
6748 /* re-compute "e" */
6749 e = entry->vme_end;
6750 if (e > end) {
6751 e = end;
6752 }
6753
6754 /*
6755 * Check for holes and protection mismatch.
6756 * Holes: Next entry should be contiguous unless this
6757 * is the end of the region.
6758 * Protection: Access requested must be allowed, unless
6759 * wiring is by protection class
6760 */
6761 if ((entry->vme_end < end) &&
6762 ((entry->vme_next == vm_map_to_entry(map)) ||
6763 (entry->vme_next->vme_start > entry->vme_end))) {
6764 /* found a hole */
6765 rc = KERN_INVALID_ADDRESS;
6766 goto done;
6767 }
6768 if ((entry->protection & access_type) != access_type) {
6769 /* found a protection problem */
6770 rc = KERN_PROTECTION_FAILURE;
6771 goto done;
6772 }
6773
6774 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6775
6776 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6777 goto done;
6778 }
6779
6780 entry->in_transition = TRUE;
6781
6782 /*
6783 * This entry might get split once we unlock the map.
6784 * In vm_fault_wire(), we need the current range as
6785 * defined by this entry. In order for this to work
6786 * along with a simultaneous clip operation, we make a
6787 * temporary copy of this entry and use that for the
6788 * wiring. Note that the underlying objects do not
6789 * change during a clip.
6790 */
6791 tmp_entry = *entry;
6792
6793 /*
6794 * The in_transition state guarentees that the entry
6795 * (or entries for this range, if split occured) will be
6796 * there when the map lock is acquired for the second time.
6797 */
6798 vm_map_unlock(map);
6799
6800 if (!user_wire && cur_thread != THREAD_NULL) {
6801 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6802 } else {
6803 interruptible_state = THREAD_UNINT;
6804 }
6805
6806 if (map_pmap) {
6807 rc = vm_fault_wire(map,
6808 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6809 physpage_p);
6810 } else {
6811 rc = vm_fault_wire(map,
6812 &tmp_entry, caller_prot, tag, map->pmap,
6813 tmp_entry.vme_start,
6814 physpage_p);
6815 }
6816
6817 if (!user_wire && cur_thread != THREAD_NULL) {
6818 thread_interrupt_level(interruptible_state);
6819 }
6820
6821 vm_map_lock(map);
6822
6823 if (last_timestamp + 1 != map->timestamp) {
6824 /*
6825 * Find the entry again. It could have been clipped
6826 * after we unlocked the map.
6827 */
6828 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6829 &first_entry)) {
6830 panic("vm_map_wire: re-lookup failed");
6831 }
6832
6833 entry = first_entry;
6834 }
6835
6836 last_timestamp = map->timestamp;
6837
6838 while ((entry != vm_map_to_entry(map)) &&
6839 (entry->vme_start < tmp_entry.vme_end)) {
6840 assert(entry->in_transition);
6841 entry->in_transition = FALSE;
6842 if (entry->needs_wakeup) {
6843 entry->needs_wakeup = FALSE;
6844 need_wakeup = TRUE;
6845 }
6846 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6847 subtract_wire_counts(map, entry, user_wire);
6848 }
6849 entry = entry->vme_next;
6850 }
6851
6852 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6853 goto done;
6854 }
6855
6856 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6857 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6858 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6859 /* found a "new" hole */
6860 s = tmp_entry.vme_end;
6861 rc = KERN_INVALID_ADDRESS;
6862 goto done;
6863 }
6864
6865 s = entry->vme_start;
6866 } /* end while loop through map entries */
6867
6868 done:
6869 if (rc == KERN_SUCCESS) {
6870 /* repair any damage we may have made to the VM map */
6871 vm_map_simplify_range(map, start, end);
6872 }
6873
6874 vm_map_unlock(map);
6875
6876 /*
6877 * wake up anybody waiting on entries we wired.
6878 */
6879 if (need_wakeup) {
6880 vm_map_entry_wakeup(map);
6881 }
6882
6883 if (rc != KERN_SUCCESS) {
6884 /* undo what has been wired so far */
6885 vm_map_unwire_nested(map, start, s, user_wire,
6886 map_pmap, pmap_addr);
6887 if (physpage_p) {
6888 *physpage_p = 0;
6889 }
6890 }
6891
6892 return rc;
6893 }
6894
6895 kern_return_t
6896 vm_map_wire_external(
6897 vm_map_t map,
6898 vm_map_offset_t start,
6899 vm_map_offset_t end,
6900 vm_prot_t caller_prot,
6901 boolean_t user_wire)
6902 {
6903 kern_return_t kret;
6904
6905 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6906 user_wire, (pmap_t)NULL, 0, NULL);
6907 return kret;
6908 }
6909
6910 kern_return_t
6911 vm_map_wire_kernel(
6912 vm_map_t map,
6913 vm_map_offset_t start,
6914 vm_map_offset_t end,
6915 vm_prot_t caller_prot,
6916 vm_tag_t tag,
6917 boolean_t user_wire)
6918 {
6919 kern_return_t kret;
6920
6921 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6922 user_wire, (pmap_t)NULL, 0, NULL);
6923 return kret;
6924 }
6925
6926 kern_return_t
6927 vm_map_wire_and_extract_external(
6928 vm_map_t map,
6929 vm_map_offset_t start,
6930 vm_prot_t caller_prot,
6931 boolean_t user_wire,
6932 ppnum_t *physpage_p)
6933 {
6934 kern_return_t kret;
6935
6936 kret = vm_map_wire_nested(map,
6937 start,
6938 start + VM_MAP_PAGE_SIZE(map),
6939 caller_prot,
6940 vm_tag_bt(),
6941 user_wire,
6942 (pmap_t)NULL,
6943 0,
6944 physpage_p);
6945 if (kret != KERN_SUCCESS &&
6946 physpage_p != NULL) {
6947 *physpage_p = 0;
6948 }
6949 return kret;
6950 }
6951
6952 kern_return_t
6953 vm_map_wire_and_extract_kernel(
6954 vm_map_t map,
6955 vm_map_offset_t start,
6956 vm_prot_t caller_prot,
6957 vm_tag_t tag,
6958 boolean_t user_wire,
6959 ppnum_t *physpage_p)
6960 {
6961 kern_return_t kret;
6962
6963 kret = vm_map_wire_nested(map,
6964 start,
6965 start + VM_MAP_PAGE_SIZE(map),
6966 caller_prot,
6967 tag,
6968 user_wire,
6969 (pmap_t)NULL,
6970 0,
6971 physpage_p);
6972 if (kret != KERN_SUCCESS &&
6973 physpage_p != NULL) {
6974 *physpage_p = 0;
6975 }
6976 return kret;
6977 }
6978
6979 /*
6980 * vm_map_unwire:
6981 *
6982 * Sets the pageability of the specified address range in the target
6983 * as pageable. Regions specified must have been wired previously.
6984 *
6985 * The map must not be locked, but a reference must remain to the map
6986 * throughout the call.
6987 *
6988 * Kernel will panic on failures. User unwire ignores holes and
6989 * unwired and intransition entries to avoid losing memory by leaving
6990 * it unwired.
6991 */
6992 static kern_return_t
6993 vm_map_unwire_nested(
6994 vm_map_t map,
6995 vm_map_offset_t start,
6996 vm_map_offset_t end,
6997 boolean_t user_wire,
6998 pmap_t map_pmap,
6999 vm_map_offset_t pmap_addr)
7000 {
7001 vm_map_entry_t entry;
7002 struct vm_map_entry *first_entry, tmp_entry;
7003 boolean_t need_wakeup;
7004 boolean_t main_map = FALSE;
7005 unsigned int last_timestamp;
7006
7007 vm_map_lock(map);
7008 if (map_pmap == NULL) {
7009 main_map = TRUE;
7010 }
7011 last_timestamp = map->timestamp;
7012
7013 VM_MAP_RANGE_CHECK(map, start, end);
7014 assert(page_aligned(start));
7015 assert(page_aligned(end));
7016 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7017 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7018
7019 if (start == end) {
7020 /* We unwired what the caller asked for: zero pages */
7021 vm_map_unlock(map);
7022 return KERN_SUCCESS;
7023 }
7024
7025 if (vm_map_lookup_entry(map, start, &first_entry)) {
7026 entry = first_entry;
7027 /*
7028 * vm_map_clip_start will be done later.
7029 * We don't want to unnest any nested sub maps here !
7030 */
7031 } else {
7032 if (!user_wire) {
7033 panic("vm_map_unwire: start not found");
7034 }
7035 /* Start address is not in map. */
7036 vm_map_unlock(map);
7037 return KERN_INVALID_ADDRESS;
7038 }
7039
7040 if (entry->superpage_size) {
7041 /* superpages are always wired */
7042 vm_map_unlock(map);
7043 return KERN_INVALID_ADDRESS;
7044 }
7045
7046 need_wakeup = FALSE;
7047 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7048 if (entry->in_transition) {
7049 /*
7050 * 1)
7051 * Another thread is wiring down this entry. Note
7052 * that if it is not for the other thread we would
7053 * be unwiring an unwired entry. This is not
7054 * permitted. If we wait, we will be unwiring memory
7055 * we did not wire.
7056 *
7057 * 2)
7058 * Another thread is unwiring this entry. We did not
7059 * have a reference to it, because if we did, this
7060 * entry will not be getting unwired now.
7061 */
7062 if (!user_wire) {
7063 /*
7064 * XXX FBDP
7065 * This could happen: there could be some
7066 * overlapping vslock/vsunlock operations
7067 * going on.
7068 * We should probably just wait and retry,
7069 * but then we have to be careful that this
7070 * entry could get "simplified" after
7071 * "in_transition" gets unset and before
7072 * we re-lookup the entry, so we would
7073 * have to re-clip the entry to avoid
7074 * re-unwiring what we have already unwired...
7075 * See vm_map_wire_nested().
7076 *
7077 * Or we could just ignore "in_transition"
7078 * here and proceed to decement the wired
7079 * count(s) on this entry. That should be fine
7080 * as long as "wired_count" doesn't drop all
7081 * the way to 0 (and we should panic if THAT
7082 * happens).
7083 */
7084 panic("vm_map_unwire: in_transition entry");
7085 }
7086
7087 entry = entry->vme_next;
7088 continue;
7089 }
7090
7091 if (entry->is_sub_map) {
7092 vm_map_offset_t sub_start;
7093 vm_map_offset_t sub_end;
7094 vm_map_offset_t local_end;
7095 pmap_t pmap;
7096
7097 vm_map_clip_start(map, entry, start);
7098 vm_map_clip_end(map, entry, end);
7099
7100 sub_start = VME_OFFSET(entry);
7101 sub_end = entry->vme_end - entry->vme_start;
7102 sub_end += VME_OFFSET(entry);
7103 local_end = entry->vme_end;
7104 if (map_pmap == NULL) {
7105 if (entry->use_pmap) {
7106 pmap = VME_SUBMAP(entry)->pmap;
7107 pmap_addr = sub_start;
7108 } else {
7109 pmap = map->pmap;
7110 pmap_addr = start;
7111 }
7112 if (entry->wired_count == 0 ||
7113 (user_wire && entry->user_wired_count == 0)) {
7114 if (!user_wire) {
7115 panic("vm_map_unwire: entry is unwired");
7116 }
7117 entry = entry->vme_next;
7118 continue;
7119 }
7120
7121 /*
7122 * Check for holes
7123 * Holes: Next entry should be contiguous unless
7124 * this is the end of the region.
7125 */
7126 if (((entry->vme_end < end) &&
7127 ((entry->vme_next == vm_map_to_entry(map)) ||
7128 (entry->vme_next->vme_start
7129 > entry->vme_end)))) {
7130 if (!user_wire) {
7131 panic("vm_map_unwire: non-contiguous region");
7132 }
7133 /*
7134 * entry = entry->vme_next;
7135 * continue;
7136 */
7137 }
7138
7139 subtract_wire_counts(map, entry, user_wire);
7140
7141 if (entry->wired_count != 0) {
7142 entry = entry->vme_next;
7143 continue;
7144 }
7145
7146 entry->in_transition = TRUE;
7147 tmp_entry = *entry;/* see comment in vm_map_wire() */
7148
7149 /*
7150 * We can unlock the map now. The in_transition state
7151 * guarantees existance of the entry.
7152 */
7153 vm_map_unlock(map);
7154 vm_map_unwire_nested(VME_SUBMAP(entry),
7155 sub_start, sub_end, user_wire, pmap, pmap_addr);
7156 vm_map_lock(map);
7157
7158 if (last_timestamp + 1 != map->timestamp) {
7159 /*
7160 * Find the entry again. It could have been
7161 * clipped or deleted after we unlocked the map.
7162 */
7163 if (!vm_map_lookup_entry(map,
7164 tmp_entry.vme_start,
7165 &first_entry)) {
7166 if (!user_wire) {
7167 panic("vm_map_unwire: re-lookup failed");
7168 }
7169 entry = first_entry->vme_next;
7170 } else {
7171 entry = first_entry;
7172 }
7173 }
7174 last_timestamp = map->timestamp;
7175
7176 /*
7177 * clear transition bit for all constituent entries
7178 * that were in the original entry (saved in
7179 * tmp_entry). Also check for waiters.
7180 */
7181 while ((entry != vm_map_to_entry(map)) &&
7182 (entry->vme_start < tmp_entry.vme_end)) {
7183 assert(entry->in_transition);
7184 entry->in_transition = FALSE;
7185 if (entry->needs_wakeup) {
7186 entry->needs_wakeup = FALSE;
7187 need_wakeup = TRUE;
7188 }
7189 entry = entry->vme_next;
7190 }
7191 continue;
7192 } else {
7193 vm_map_unlock(map);
7194 vm_map_unwire_nested(VME_SUBMAP(entry),
7195 sub_start, sub_end, user_wire, map_pmap,
7196 pmap_addr);
7197 vm_map_lock(map);
7198
7199 if (last_timestamp + 1 != map->timestamp) {
7200 /*
7201 * Find the entry again. It could have been
7202 * clipped or deleted after we unlocked the map.
7203 */
7204 if (!vm_map_lookup_entry(map,
7205 tmp_entry.vme_start,
7206 &first_entry)) {
7207 if (!user_wire) {
7208 panic("vm_map_unwire: re-lookup failed");
7209 }
7210 entry = first_entry->vme_next;
7211 } else {
7212 entry = first_entry;
7213 }
7214 }
7215 last_timestamp = map->timestamp;
7216 }
7217 }
7218
7219
7220 if ((entry->wired_count == 0) ||
7221 (user_wire && entry->user_wired_count == 0)) {
7222 if (!user_wire) {
7223 panic("vm_map_unwire: entry is unwired");
7224 }
7225
7226 entry = entry->vme_next;
7227 continue;
7228 }
7229
7230 assert(entry->wired_count > 0 &&
7231 (!user_wire || entry->user_wired_count > 0));
7232
7233 vm_map_clip_start(map, entry, start);
7234 vm_map_clip_end(map, entry, end);
7235
7236 /*
7237 * Check for holes
7238 * Holes: Next entry should be contiguous unless
7239 * this is the end of the region.
7240 */
7241 if (((entry->vme_end < end) &&
7242 ((entry->vme_next == vm_map_to_entry(map)) ||
7243 (entry->vme_next->vme_start > entry->vme_end)))) {
7244 if (!user_wire) {
7245 panic("vm_map_unwire: non-contiguous region");
7246 }
7247 entry = entry->vme_next;
7248 continue;
7249 }
7250
7251 subtract_wire_counts(map, entry, user_wire);
7252
7253 if (entry->wired_count != 0) {
7254 entry = entry->vme_next;
7255 continue;
7256 }
7257
7258 if (entry->zero_wired_pages) {
7259 entry->zero_wired_pages = FALSE;
7260 }
7261
7262 entry->in_transition = TRUE;
7263 tmp_entry = *entry; /* see comment in vm_map_wire() */
7264
7265 /*
7266 * We can unlock the map now. The in_transition state
7267 * guarantees existance of the entry.
7268 */
7269 vm_map_unlock(map);
7270 if (map_pmap) {
7271 vm_fault_unwire(map,
7272 &tmp_entry, FALSE, map_pmap, pmap_addr);
7273 } else {
7274 vm_fault_unwire(map,
7275 &tmp_entry, FALSE, map->pmap,
7276 tmp_entry.vme_start);
7277 }
7278 vm_map_lock(map);
7279
7280 if (last_timestamp + 1 != map->timestamp) {
7281 /*
7282 * Find the entry again. It could have been clipped
7283 * or deleted after we unlocked the map.
7284 */
7285 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7286 &first_entry)) {
7287 if (!user_wire) {
7288 panic("vm_map_unwire: re-lookup failed");
7289 }
7290 entry = first_entry->vme_next;
7291 } else {
7292 entry = first_entry;
7293 }
7294 }
7295 last_timestamp = map->timestamp;
7296
7297 /*
7298 * clear transition bit for all constituent entries that
7299 * were in the original entry (saved in tmp_entry). Also
7300 * check for waiters.
7301 */
7302 while ((entry != vm_map_to_entry(map)) &&
7303 (entry->vme_start < tmp_entry.vme_end)) {
7304 assert(entry->in_transition);
7305 entry->in_transition = FALSE;
7306 if (entry->needs_wakeup) {
7307 entry->needs_wakeup = FALSE;
7308 need_wakeup = TRUE;
7309 }
7310 entry = entry->vme_next;
7311 }
7312 }
7313
7314 /*
7315 * We might have fragmented the address space when we wired this
7316 * range of addresses. Attempt to re-coalesce these VM map entries
7317 * with their neighbors now that they're no longer wired.
7318 * Under some circumstances, address space fragmentation can
7319 * prevent VM object shadow chain collapsing, which can cause
7320 * swap space leaks.
7321 */
7322 vm_map_simplify_range(map, start, end);
7323
7324 vm_map_unlock(map);
7325 /*
7326 * wake up anybody waiting on entries that we have unwired.
7327 */
7328 if (need_wakeup) {
7329 vm_map_entry_wakeup(map);
7330 }
7331 return KERN_SUCCESS;
7332 }
7333
7334 kern_return_t
7335 vm_map_unwire(
7336 vm_map_t map,
7337 vm_map_offset_t start,
7338 vm_map_offset_t end,
7339 boolean_t user_wire)
7340 {
7341 return vm_map_unwire_nested(map, start, end,
7342 user_wire, (pmap_t)NULL, 0);
7343 }
7344
7345
7346 /*
7347 * vm_map_entry_delete: [ internal use only ]
7348 *
7349 * Deallocate the given entry from the target map.
7350 */
7351 static void
7352 vm_map_entry_delete(
7353 vm_map_t map,
7354 vm_map_entry_t entry)
7355 {
7356 vm_map_offset_t s, e;
7357 vm_object_t object;
7358 vm_map_t submap;
7359
7360 s = entry->vme_start;
7361 e = entry->vme_end;
7362 assert(page_aligned(s));
7363 assert(page_aligned(e));
7364 if (entry->map_aligned == TRUE) {
7365 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7366 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7367 }
7368 assert(entry->wired_count == 0);
7369 assert(entry->user_wired_count == 0);
7370 assert(!entry->permanent);
7371
7372 if (entry->is_sub_map) {
7373 object = NULL;
7374 submap = VME_SUBMAP(entry);
7375 } else {
7376 submap = NULL;
7377 object = VME_OBJECT(entry);
7378 }
7379
7380 vm_map_store_entry_unlink(map, entry);
7381 map->size -= e - s;
7382
7383 vm_map_entry_dispose(map, entry);
7384
7385 vm_map_unlock(map);
7386 /*
7387 * Deallocate the object only after removing all
7388 * pmap entries pointing to its pages.
7389 */
7390 if (submap) {
7391 vm_map_deallocate(submap);
7392 } else {
7393 vm_object_deallocate(object);
7394 }
7395 }
7396
7397 void
7398 vm_map_submap_pmap_clean(
7399 vm_map_t map,
7400 vm_map_offset_t start,
7401 vm_map_offset_t end,
7402 vm_map_t sub_map,
7403 vm_map_offset_t offset)
7404 {
7405 vm_map_offset_t submap_start;
7406 vm_map_offset_t submap_end;
7407 vm_map_size_t remove_size;
7408 vm_map_entry_t entry;
7409
7410 submap_end = offset + (end - start);
7411 submap_start = offset;
7412
7413 vm_map_lock_read(sub_map);
7414 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7415 remove_size = (entry->vme_end - entry->vme_start);
7416 if (offset > entry->vme_start) {
7417 remove_size -= offset - entry->vme_start;
7418 }
7419
7420
7421 if (submap_end < entry->vme_end) {
7422 remove_size -=
7423 entry->vme_end - submap_end;
7424 }
7425 if (entry->is_sub_map) {
7426 vm_map_submap_pmap_clean(
7427 sub_map,
7428 start,
7429 start + remove_size,
7430 VME_SUBMAP(entry),
7431 VME_OFFSET(entry));
7432 } else {
7433 if (map->mapped_in_other_pmaps &&
7434 os_ref_get_count(&map->map_refcnt) != 0 &&
7435 VME_OBJECT(entry) != NULL) {
7436 vm_object_pmap_protect_options(
7437 VME_OBJECT(entry),
7438 (VME_OFFSET(entry) +
7439 offset -
7440 entry->vme_start),
7441 remove_size,
7442 PMAP_NULL,
7443 entry->vme_start,
7444 VM_PROT_NONE,
7445 PMAP_OPTIONS_REMOVE);
7446 } else {
7447 pmap_remove(map->pmap,
7448 (addr64_t)start,
7449 (addr64_t)(start + remove_size));
7450 }
7451 }
7452 }
7453
7454 entry = entry->vme_next;
7455
7456 while ((entry != vm_map_to_entry(sub_map))
7457 && (entry->vme_start < submap_end)) {
7458 remove_size = (entry->vme_end - entry->vme_start);
7459 if (submap_end < entry->vme_end) {
7460 remove_size -= entry->vme_end - submap_end;
7461 }
7462 if (entry->is_sub_map) {
7463 vm_map_submap_pmap_clean(
7464 sub_map,
7465 (start + entry->vme_start) - offset,
7466 ((start + entry->vme_start) - offset) + remove_size,
7467 VME_SUBMAP(entry),
7468 VME_OFFSET(entry));
7469 } else {
7470 if (map->mapped_in_other_pmaps &&
7471 os_ref_get_count(&map->map_refcnt) != 0 &&
7472 VME_OBJECT(entry) != NULL) {
7473 vm_object_pmap_protect_options(
7474 VME_OBJECT(entry),
7475 VME_OFFSET(entry),
7476 remove_size,
7477 PMAP_NULL,
7478 entry->vme_start,
7479 VM_PROT_NONE,
7480 PMAP_OPTIONS_REMOVE);
7481 } else {
7482 pmap_remove(map->pmap,
7483 (addr64_t)((start + entry->vme_start)
7484 - offset),
7485 (addr64_t)(((start + entry->vme_start)
7486 - offset) + remove_size));
7487 }
7488 }
7489 entry = entry->vme_next;
7490 }
7491 vm_map_unlock_read(sub_map);
7492 return;
7493 }
7494
7495 /*
7496 * virt_memory_guard_ast:
7497 *
7498 * Handle the AST callout for a virtual memory guard.
7499 * raise an EXC_GUARD exception and terminate the task
7500 * if configured to do so.
7501 */
7502 void
7503 virt_memory_guard_ast(
7504 thread_t thread,
7505 mach_exception_data_type_t code,
7506 mach_exception_data_type_t subcode)
7507 {
7508 task_t task = thread->task;
7509 assert(task != kernel_task);
7510 assert(task == current_task());
7511 uint32_t behavior;
7512
7513 behavior = task->task_exc_guard;
7514
7515 /* Is delivery enabled */
7516 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7517 return;
7518 }
7519
7520 /* If only once, make sure we're that once */
7521 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7522 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7523
7524 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7525 break;
7526 }
7527 behavior = task->task_exc_guard;
7528 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7529 return;
7530 }
7531 }
7532
7533 /* Raise exception via corpse fork or synchronously */
7534 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7535 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7536 task_violated_guard(code, subcode, NULL);
7537 } else {
7538 task_exception_notify(EXC_GUARD, code, subcode);
7539 }
7540
7541 /* Terminate the task if desired */
7542 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7543 task_bsdtask_kill(current_task());
7544 }
7545 }
7546
7547 /*
7548 * vm_map_guard_exception:
7549 *
7550 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7551 *
7552 * Right now, we do this when we find nothing mapped, or a
7553 * gap in the mapping when a user address space deallocate
7554 * was requested. We report the address of the first gap found.
7555 */
7556 static void
7557 vm_map_guard_exception(
7558 vm_map_offset_t gap_start,
7559 unsigned reason)
7560 {
7561 mach_exception_code_t code = 0;
7562 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7563 unsigned int target = 0; /* should we pass in pid associated with map? */
7564 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7565 boolean_t fatal = FALSE;
7566
7567 task_t task = current_task();
7568
7569 /* Can't deliver exceptions to kernel task */
7570 if (task == kernel_task) {
7571 return;
7572 }
7573
7574 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7575 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7576 EXC_GUARD_ENCODE_TARGET(code, target);
7577
7578 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7579 fatal = TRUE;
7580 }
7581 thread_guard_violation(current_thread(), code, subcode, fatal);
7582 }
7583
7584 /*
7585 * vm_map_delete: [ internal use only ]
7586 *
7587 * Deallocates the given address range from the target map.
7588 * Removes all user wirings. Unwires one kernel wiring if
7589 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7590 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7591 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7592 *
7593 * This routine is called with map locked and leaves map locked.
7594 */
7595 static kern_return_t
7596 vm_map_delete(
7597 vm_map_t map,
7598 vm_map_offset_t start,
7599 vm_map_offset_t end,
7600 int flags,
7601 vm_map_t zap_map)
7602 {
7603 vm_map_entry_t entry, next;
7604 struct vm_map_entry *first_entry, tmp_entry;
7605 vm_map_offset_t s;
7606 vm_object_t object;
7607 boolean_t need_wakeup;
7608 unsigned int last_timestamp = ~0; /* unlikely value */
7609 int interruptible;
7610 vm_map_offset_t gap_start;
7611 __unused vm_map_offset_t save_start = start;
7612 __unused vm_map_offset_t save_end = end;
7613 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7614 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7615
7616 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7617 gap_start = FIND_GAP;
7618 } else {
7619 gap_start = GAPS_OK;
7620 }
7621
7622 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7623 THREAD_ABORTSAFE : THREAD_UNINT;
7624
7625 /*
7626 * All our DMA I/O operations in IOKit are currently done by
7627 * wiring through the map entries of the task requesting the I/O.
7628 * Because of this, we must always wait for kernel wirings
7629 * to go away on the entries before deleting them.
7630 *
7631 * Any caller who wants to actually remove a kernel wiring
7632 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7633 * properly remove one wiring instead of blasting through
7634 * them all.
7635 */
7636 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7637
7638 while (1) {
7639 /*
7640 * Find the start of the region, and clip it
7641 */
7642 if (vm_map_lookup_entry(map, start, &first_entry)) {
7643 entry = first_entry;
7644 if (map == kalloc_map &&
7645 (entry->vme_start != start ||
7646 entry->vme_end != end)) {
7647 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7648 "mismatched entry %p [0x%llx:0x%llx]\n",
7649 map,
7650 (uint64_t)start,
7651 (uint64_t)end,
7652 entry,
7653 (uint64_t)entry->vme_start,
7654 (uint64_t)entry->vme_end);
7655 }
7656
7657 /*
7658 * If in a superpage, extend the range to include the start of the mapping.
7659 */
7660 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7661 start = SUPERPAGE_ROUND_DOWN(start);
7662 continue;
7663 }
7664
7665 if (start == entry->vme_start) {
7666 /*
7667 * No need to clip. We don't want to cause
7668 * any unnecessary unnesting in this case...
7669 */
7670 } else {
7671 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7672 entry->map_aligned &&
7673 !VM_MAP_PAGE_ALIGNED(
7674 start,
7675 VM_MAP_PAGE_MASK(map))) {
7676 /*
7677 * The entry will no longer be
7678 * map-aligned after clipping
7679 * and the caller said it's OK.
7680 */
7681 entry->map_aligned = FALSE;
7682 }
7683 if (map == kalloc_map) {
7684 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7685 " clipping %p at 0x%llx\n",
7686 map,
7687 (uint64_t)start,
7688 (uint64_t)end,
7689 entry,
7690 (uint64_t)start);
7691 }
7692 vm_map_clip_start(map, entry, start);
7693 }
7694
7695 /*
7696 * Fix the lookup hint now, rather than each
7697 * time through the loop.
7698 */
7699 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7700 } else {
7701 if (map->pmap == kernel_pmap &&
7702 os_ref_get_count(&map->map_refcnt) != 0) {
7703 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7704 "no map entry at 0x%llx\n",
7705 map,
7706 (uint64_t)start,
7707 (uint64_t)end,
7708 (uint64_t)start);
7709 }
7710 entry = first_entry->vme_next;
7711 if (gap_start == FIND_GAP) {
7712 gap_start = start;
7713 }
7714 }
7715 break;
7716 }
7717 if (entry->superpage_size) {
7718 end = SUPERPAGE_ROUND_UP(end);
7719 }
7720
7721 need_wakeup = FALSE;
7722 /*
7723 * Step through all entries in this region
7724 */
7725 s = entry->vme_start;
7726 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7727 /*
7728 * At this point, we have deleted all the memory entries
7729 * between "start" and "s". We still need to delete
7730 * all memory entries between "s" and "end".
7731 * While we were blocked and the map was unlocked, some
7732 * new memory entries could have been re-allocated between
7733 * "start" and "s" and we don't want to mess with those.
7734 * Some of those entries could even have been re-assembled
7735 * with an entry after "s" (in vm_map_simplify_entry()), so
7736 * we may have to vm_map_clip_start() again.
7737 */
7738
7739 if (entry->vme_start >= s) {
7740 /*
7741 * This entry starts on or after "s"
7742 * so no need to clip its start.
7743 */
7744 } else {
7745 /*
7746 * This entry has been re-assembled by a
7747 * vm_map_simplify_entry(). We need to
7748 * re-clip its start.
7749 */
7750 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7751 entry->map_aligned &&
7752 !VM_MAP_PAGE_ALIGNED(s,
7753 VM_MAP_PAGE_MASK(map))) {
7754 /*
7755 * The entry will no longer be map-aligned
7756 * after clipping and the caller said it's OK.
7757 */
7758 entry->map_aligned = FALSE;
7759 }
7760 if (map == kalloc_map) {
7761 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7762 "clipping %p at 0x%llx\n",
7763 map,
7764 (uint64_t)start,
7765 (uint64_t)end,
7766 entry,
7767 (uint64_t)s);
7768 }
7769 vm_map_clip_start(map, entry, s);
7770 }
7771 if (entry->vme_end <= end) {
7772 /*
7773 * This entry is going away completely, so no need
7774 * to clip and possibly cause an unnecessary unnesting.
7775 */
7776 } else {
7777 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7778 entry->map_aligned &&
7779 !VM_MAP_PAGE_ALIGNED(end,
7780 VM_MAP_PAGE_MASK(map))) {
7781 /*
7782 * The entry will no longer be map-aligned
7783 * after clipping and the caller said it's OK.
7784 */
7785 entry->map_aligned = FALSE;
7786 }
7787 if (map == kalloc_map) {
7788 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7789 "clipping %p at 0x%llx\n",
7790 map,
7791 (uint64_t)start,
7792 (uint64_t)end,
7793 entry,
7794 (uint64_t)end);
7795 }
7796 vm_map_clip_end(map, entry, end);
7797 }
7798
7799 if (entry->permanent) {
7800 if (map->pmap == kernel_pmap) {
7801 panic("%s(%p,0x%llx,0x%llx): "
7802 "attempt to remove permanent "
7803 "VM map entry "
7804 "%p [0x%llx:0x%llx]\n",
7805 __FUNCTION__,
7806 map,
7807 (uint64_t) start,
7808 (uint64_t) end,
7809 entry,
7810 (uint64_t) entry->vme_start,
7811 (uint64_t) entry->vme_end);
7812 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7813 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7814 entry->permanent = FALSE;
7815 #if PMAP_CS
7816 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7817 entry->permanent = FALSE;
7818
7819 printf("%d[%s] %s(0x%llx,0x%llx): "
7820 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7821 "prot 0x%x/0x%x\n",
7822 proc_selfpid(),
7823 (current_task()->bsd_info
7824 ? proc_name_address(current_task()->bsd_info)
7825 : "?"),
7826 __FUNCTION__,
7827 (uint64_t) start,
7828 (uint64_t) end,
7829 (uint64_t)entry->vme_start,
7830 (uint64_t)entry->vme_end,
7831 entry->protection,
7832 entry->max_protection);
7833 #endif
7834 } else {
7835 if (vm_map_executable_immutable_verbose) {
7836 printf("%d[%s] %s(0x%llx,0x%llx): "
7837 "permanent entry [0x%llx:0x%llx] "
7838 "prot 0x%x/0x%x\n",
7839 proc_selfpid(),
7840 (current_task()->bsd_info
7841 ? proc_name_address(current_task()->bsd_info)
7842 : "?"),
7843 __FUNCTION__,
7844 (uint64_t) start,
7845 (uint64_t) end,
7846 (uint64_t)entry->vme_start,
7847 (uint64_t)entry->vme_end,
7848 entry->protection,
7849 entry->max_protection);
7850 }
7851 /*
7852 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7853 */
7854 DTRACE_VM5(vm_map_delete_permanent,
7855 vm_map_offset_t, entry->vme_start,
7856 vm_map_offset_t, entry->vme_end,
7857 vm_prot_t, entry->protection,
7858 vm_prot_t, entry->max_protection,
7859 int, VME_ALIAS(entry));
7860 }
7861 }
7862
7863
7864 if (entry->in_transition) {
7865 wait_result_t wait_result;
7866
7867 /*
7868 * Another thread is wiring/unwiring this entry.
7869 * Let the other thread know we are waiting.
7870 */
7871 assert(s == entry->vme_start);
7872 entry->needs_wakeup = TRUE;
7873
7874 /*
7875 * wake up anybody waiting on entries that we have
7876 * already unwired/deleted.
7877 */
7878 if (need_wakeup) {
7879 vm_map_entry_wakeup(map);
7880 need_wakeup = FALSE;
7881 }
7882
7883 wait_result = vm_map_entry_wait(map, interruptible);
7884
7885 if (interruptible &&
7886 wait_result == THREAD_INTERRUPTED) {
7887 /*
7888 * We do not clear the needs_wakeup flag,
7889 * since we cannot tell if we were the only one.
7890 */
7891 return KERN_ABORTED;
7892 }
7893
7894 /*
7895 * The entry could have been clipped or it
7896 * may not exist anymore. Look it up again.
7897 */
7898 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7899 /*
7900 * User: use the next entry
7901 */
7902 if (gap_start == FIND_GAP) {
7903 gap_start = s;
7904 }
7905 entry = first_entry->vme_next;
7906 s = entry->vme_start;
7907 } else {
7908 entry = first_entry;
7909 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7910 }
7911 last_timestamp = map->timestamp;
7912 continue;
7913 } /* end in_transition */
7914
7915 if (entry->wired_count) {
7916 boolean_t user_wire;
7917
7918 user_wire = entry->user_wired_count > 0;
7919
7920 /*
7921 * Remove a kernel wiring if requested
7922 */
7923 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7924 entry->wired_count--;
7925 }
7926
7927 /*
7928 * Remove all user wirings for proper accounting
7929 */
7930 if (entry->user_wired_count > 0) {
7931 while (entry->user_wired_count) {
7932 subtract_wire_counts(map, entry, user_wire);
7933 }
7934 }
7935
7936 if (entry->wired_count != 0) {
7937 assert(map != kernel_map);
7938 /*
7939 * Cannot continue. Typical case is when
7940 * a user thread has physical io pending on
7941 * on this page. Either wait for the
7942 * kernel wiring to go away or return an
7943 * error.
7944 */
7945 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7946 wait_result_t wait_result;
7947
7948 assert(s == entry->vme_start);
7949 entry->needs_wakeup = TRUE;
7950 wait_result = vm_map_entry_wait(map,
7951 interruptible);
7952
7953 if (interruptible &&
7954 wait_result == THREAD_INTERRUPTED) {
7955 /*
7956 * We do not clear the
7957 * needs_wakeup flag, since we
7958 * cannot tell if we were the
7959 * only one.
7960 */
7961 return KERN_ABORTED;
7962 }
7963
7964 /*
7965 * The entry could have been clipped or
7966 * it may not exist anymore. Look it
7967 * up again.
7968 */
7969 if (!vm_map_lookup_entry(map, s,
7970 &first_entry)) {
7971 assert(map != kernel_map);
7972 /*
7973 * User: use the next entry
7974 */
7975 if (gap_start == FIND_GAP) {
7976 gap_start = s;
7977 }
7978 entry = first_entry->vme_next;
7979 s = entry->vme_start;
7980 } else {
7981 entry = first_entry;
7982 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7983 }
7984 last_timestamp = map->timestamp;
7985 continue;
7986 } else {
7987 return KERN_FAILURE;
7988 }
7989 }
7990
7991 entry->in_transition = TRUE;
7992 /*
7993 * copy current entry. see comment in vm_map_wire()
7994 */
7995 tmp_entry = *entry;
7996 assert(s == entry->vme_start);
7997
7998 /*
7999 * We can unlock the map now. The in_transition
8000 * state guarentees existance of the entry.
8001 */
8002 vm_map_unlock(map);
8003
8004 if (tmp_entry.is_sub_map) {
8005 vm_map_t sub_map;
8006 vm_map_offset_t sub_start, sub_end;
8007 pmap_t pmap;
8008 vm_map_offset_t pmap_addr;
8009
8010
8011 sub_map = VME_SUBMAP(&tmp_entry);
8012 sub_start = VME_OFFSET(&tmp_entry);
8013 sub_end = sub_start + (tmp_entry.vme_end -
8014 tmp_entry.vme_start);
8015 if (tmp_entry.use_pmap) {
8016 pmap = sub_map->pmap;
8017 pmap_addr = tmp_entry.vme_start;
8018 } else {
8019 pmap = map->pmap;
8020 pmap_addr = tmp_entry.vme_start;
8021 }
8022 (void) vm_map_unwire_nested(sub_map,
8023 sub_start, sub_end,
8024 user_wire,
8025 pmap, pmap_addr);
8026 } else {
8027 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8028 pmap_protect_options(
8029 map->pmap,
8030 tmp_entry.vme_start,
8031 tmp_entry.vme_end,
8032 VM_PROT_NONE,
8033 PMAP_OPTIONS_REMOVE,
8034 NULL);
8035 }
8036 vm_fault_unwire(map, &tmp_entry,
8037 VME_OBJECT(&tmp_entry) == kernel_object,
8038 map->pmap, tmp_entry.vme_start);
8039 }
8040
8041 vm_map_lock(map);
8042
8043 if (last_timestamp + 1 != map->timestamp) {
8044 /*
8045 * Find the entry again. It could have
8046 * been clipped after we unlocked the map.
8047 */
8048 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8049 assert((map != kernel_map) &&
8050 (!entry->is_sub_map));
8051 if (gap_start == FIND_GAP) {
8052 gap_start = s;
8053 }
8054 first_entry = first_entry->vme_next;
8055 s = first_entry->vme_start;
8056 } else {
8057 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8058 }
8059 } else {
8060 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8061 first_entry = entry;
8062 }
8063
8064 last_timestamp = map->timestamp;
8065
8066 entry = first_entry;
8067 while ((entry != vm_map_to_entry(map)) &&
8068 (entry->vme_start < tmp_entry.vme_end)) {
8069 assert(entry->in_transition);
8070 entry->in_transition = FALSE;
8071 if (entry->needs_wakeup) {
8072 entry->needs_wakeup = FALSE;
8073 need_wakeup = TRUE;
8074 }
8075 entry = entry->vme_next;
8076 }
8077 /*
8078 * We have unwired the entry(s). Go back and
8079 * delete them.
8080 */
8081 entry = first_entry;
8082 continue;
8083 }
8084
8085 /* entry is unwired */
8086 assert(entry->wired_count == 0);
8087 assert(entry->user_wired_count == 0);
8088
8089 assert(s == entry->vme_start);
8090
8091 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8092 /*
8093 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8094 * vm_map_delete(), some map entries might have been
8095 * transferred to a "zap_map", which doesn't have a
8096 * pmap. The original pmap has already been flushed
8097 * in the vm_map_delete() call targeting the original
8098 * map, but when we get to destroying the "zap_map",
8099 * we don't have any pmap to flush, so let's just skip
8100 * all this.
8101 */
8102 } else if (entry->is_sub_map) {
8103 if (entry->use_pmap) {
8104 #ifndef NO_NESTED_PMAP
8105 int pmap_flags;
8106
8107 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8108 /*
8109 * This is the final cleanup of the
8110 * address space being terminated.
8111 * No new mappings are expected and
8112 * we don't really need to unnest the
8113 * shared region (and lose the "global"
8114 * pmap mappings, if applicable).
8115 *
8116 * Tell the pmap layer that we're
8117 * "clean" wrt nesting.
8118 */
8119 pmap_flags = PMAP_UNNEST_CLEAN;
8120 } else {
8121 /*
8122 * We're unmapping part of the nested
8123 * shared region, so we can't keep the
8124 * nested pmap.
8125 */
8126 pmap_flags = 0;
8127 }
8128 pmap_unnest_options(
8129 map->pmap,
8130 (addr64_t)entry->vme_start,
8131 entry->vme_end - entry->vme_start,
8132 pmap_flags);
8133 #endif /* NO_NESTED_PMAP */
8134 if (map->mapped_in_other_pmaps &&
8135 os_ref_get_count(&map->map_refcnt) != 0) {
8136 /* clean up parent map/maps */
8137 vm_map_submap_pmap_clean(
8138 map, entry->vme_start,
8139 entry->vme_end,
8140 VME_SUBMAP(entry),
8141 VME_OFFSET(entry));
8142 }
8143 } else {
8144 vm_map_submap_pmap_clean(
8145 map, entry->vme_start, entry->vme_end,
8146 VME_SUBMAP(entry),
8147 VME_OFFSET(entry));
8148 }
8149 } else if (VME_OBJECT(entry) != kernel_object &&
8150 VME_OBJECT(entry) != compressor_object) {
8151 object = VME_OBJECT(entry);
8152 if (map->mapped_in_other_pmaps &&
8153 os_ref_get_count(&map->map_refcnt) != 0) {
8154 vm_object_pmap_protect_options(
8155 object, VME_OFFSET(entry),
8156 entry->vme_end - entry->vme_start,
8157 PMAP_NULL,
8158 entry->vme_start,
8159 VM_PROT_NONE,
8160 PMAP_OPTIONS_REMOVE);
8161 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8162 (map->pmap == kernel_pmap)) {
8163 /* Remove translations associated
8164 * with this range unless the entry
8165 * does not have an object, or
8166 * it's the kernel map or a descendant
8167 * since the platform could potentially
8168 * create "backdoor" mappings invisible
8169 * to the VM. It is expected that
8170 * objectless, non-kernel ranges
8171 * do not have such VM invisible
8172 * translations.
8173 */
8174 pmap_remove_options(map->pmap,
8175 (addr64_t)entry->vme_start,
8176 (addr64_t)entry->vme_end,
8177 PMAP_OPTIONS_REMOVE);
8178 }
8179 }
8180
8181 if (entry->iokit_acct) {
8182 /* alternate accounting */
8183 DTRACE_VM4(vm_map_iokit_unmapped_region,
8184 vm_map_t, map,
8185 vm_map_offset_t, entry->vme_start,
8186 vm_map_offset_t, entry->vme_end,
8187 int, VME_ALIAS(entry));
8188 vm_map_iokit_unmapped_region(map,
8189 (entry->vme_end -
8190 entry->vme_start));
8191 entry->iokit_acct = FALSE;
8192 entry->use_pmap = FALSE;
8193 }
8194
8195 /*
8196 * All pmap mappings for this map entry must have been
8197 * cleared by now.
8198 */
8199 #if DEBUG
8200 assert(vm_map_pmap_is_empty(map,
8201 entry->vme_start,
8202 entry->vme_end));
8203 #endif /* DEBUG */
8204
8205 next = entry->vme_next;
8206
8207 if (map->pmap == kernel_pmap &&
8208 os_ref_get_count(&map->map_refcnt) != 0 &&
8209 entry->vme_end < end &&
8210 (next == vm_map_to_entry(map) ||
8211 next->vme_start != entry->vme_end)) {
8212 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8213 "hole after %p at 0x%llx\n",
8214 map,
8215 (uint64_t)start,
8216 (uint64_t)end,
8217 entry,
8218 (uint64_t)entry->vme_end);
8219 }
8220
8221 /*
8222 * If the desired range didn't end with "entry", then there is a gap if
8223 * we wrapped around to the start of the map or if "entry" and "next"
8224 * aren't contiguous.
8225 *
8226 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8227 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8228 */
8229 if (gap_start == FIND_GAP &&
8230 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8231 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8232 gap_start = entry->vme_end;
8233 }
8234 s = next->vme_start;
8235 last_timestamp = map->timestamp;
8236
8237 if (entry->permanent) {
8238 /*
8239 * A permanent entry can not be removed, so leave it
8240 * in place but remove all access permissions.
8241 */
8242 entry->protection = VM_PROT_NONE;
8243 entry->max_protection = VM_PROT_NONE;
8244 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8245 zap_map != VM_MAP_NULL) {
8246 vm_map_size_t entry_size;
8247 /*
8248 * The caller wants to save the affected VM map entries
8249 * into the "zap_map". The caller will take care of
8250 * these entries.
8251 */
8252 /* unlink the entry from "map" ... */
8253 vm_map_store_entry_unlink(map, entry);
8254 /* ... and add it to the end of the "zap_map" */
8255 vm_map_store_entry_link(zap_map,
8256 vm_map_last_entry(zap_map),
8257 entry,
8258 VM_MAP_KERNEL_FLAGS_NONE);
8259 entry_size = entry->vme_end - entry->vme_start;
8260 map->size -= entry_size;
8261 zap_map->size += entry_size;
8262 /* we didn't unlock the map, so no timestamp increase */
8263 last_timestamp--;
8264 } else {
8265 vm_map_entry_delete(map, entry);
8266 /* vm_map_entry_delete unlocks the map */
8267 vm_map_lock(map);
8268 }
8269
8270 entry = next;
8271
8272 if (entry == vm_map_to_entry(map)) {
8273 break;
8274 }
8275 if (last_timestamp + 1 != map->timestamp) {
8276 /*
8277 * We are responsible for deleting everything
8278 * from the given space. If someone has interfered,
8279 * we pick up where we left off. Back fills should
8280 * be all right for anyone, except map_delete, and
8281 * we have to assume that the task has been fully
8282 * disabled before we get here
8283 */
8284 if (!vm_map_lookup_entry(map, s, &entry)) {
8285 entry = entry->vme_next;
8286
8287 /*
8288 * Nothing found for s. If we weren't already done, then there is a gap.
8289 */
8290 if (gap_start == FIND_GAP && s < end) {
8291 gap_start = s;
8292 }
8293 s = entry->vme_start;
8294 } else {
8295 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8296 }
8297 /*
8298 * others can not only allocate behind us, we can
8299 * also see coalesce while we don't have the map lock
8300 */
8301 if (entry == vm_map_to_entry(map)) {
8302 break;
8303 }
8304 }
8305 last_timestamp = map->timestamp;
8306 }
8307
8308 if (map->wait_for_space) {
8309 thread_wakeup((event_t) map);
8310 }
8311 /*
8312 * wake up anybody waiting on entries that we have already deleted.
8313 */
8314 if (need_wakeup) {
8315 vm_map_entry_wakeup(map);
8316 }
8317
8318 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8319 DTRACE_VM3(kern_vm_deallocate_gap,
8320 vm_map_offset_t, gap_start,
8321 vm_map_offset_t, save_start,
8322 vm_map_offset_t, save_end);
8323 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8324 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8325 }
8326 }
8327
8328 return KERN_SUCCESS;
8329 }
8330
8331
8332 /*
8333 * vm_map_terminate:
8334 *
8335 * Clean out a task's map.
8336 */
8337 kern_return_t
8338 vm_map_terminate(
8339 vm_map_t map)
8340 {
8341 vm_map_lock(map);
8342 map->terminated = TRUE;
8343 vm_map_unlock(map);
8344
8345 return vm_map_remove(map,
8346 map->min_offset,
8347 map->max_offset,
8348 /*
8349 * Final cleanup:
8350 * + no unnesting
8351 * + remove immutable mappings
8352 * + allow gaps in range
8353 */
8354 (VM_MAP_REMOVE_NO_UNNESTING |
8355 VM_MAP_REMOVE_IMMUTABLE |
8356 VM_MAP_REMOVE_GAPS_OK));
8357 }
8358
8359 /*
8360 * vm_map_remove:
8361 *
8362 * Remove the given address range from the target map.
8363 * This is the exported form of vm_map_delete.
8364 */
8365 kern_return_t
8366 vm_map_remove(
8367 vm_map_t map,
8368 vm_map_offset_t start,
8369 vm_map_offset_t end,
8370 boolean_t flags)
8371 {
8372 kern_return_t result;
8373
8374 vm_map_lock(map);
8375 VM_MAP_RANGE_CHECK(map, start, end);
8376 /*
8377 * For the zone_map, the kernel controls the allocation/freeing of memory.
8378 * Any free to the zone_map should be within the bounds of the map and
8379 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8380 * free to the zone_map into a no-op, there is a problem and we should
8381 * panic.
8382 */
8383 if ((map == zone_map) && (start == end)) {
8384 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8385 }
8386 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8387 vm_map_unlock(map);
8388
8389 return result;
8390 }
8391
8392 /*
8393 * vm_map_remove_locked:
8394 *
8395 * Remove the given address range from the target locked map.
8396 * This is the exported form of vm_map_delete.
8397 */
8398 kern_return_t
8399 vm_map_remove_locked(
8400 vm_map_t map,
8401 vm_map_offset_t start,
8402 vm_map_offset_t end,
8403 boolean_t flags)
8404 {
8405 kern_return_t result;
8406
8407 VM_MAP_RANGE_CHECK(map, start, end);
8408 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8409 return result;
8410 }
8411
8412
8413 /*
8414 * Routine: vm_map_copy_allocate
8415 *
8416 * Description:
8417 * Allocates and initializes a map copy object.
8418 */
8419 static vm_map_copy_t
8420 vm_map_copy_allocate(void)
8421 {
8422 vm_map_copy_t new_copy;
8423
8424 new_copy = zalloc(vm_map_copy_zone);
8425 bzero(new_copy, sizeof(*new_copy));
8426 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8427 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8428 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8429 return new_copy;
8430 }
8431
8432 /*
8433 * Routine: vm_map_copy_discard
8434 *
8435 * Description:
8436 * Dispose of a map copy object (returned by
8437 * vm_map_copyin).
8438 */
8439 void
8440 vm_map_copy_discard(
8441 vm_map_copy_t copy)
8442 {
8443 if (copy == VM_MAP_COPY_NULL) {
8444 return;
8445 }
8446
8447 switch (copy->type) {
8448 case VM_MAP_COPY_ENTRY_LIST:
8449 while (vm_map_copy_first_entry(copy) !=
8450 vm_map_copy_to_entry(copy)) {
8451 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8452
8453 vm_map_copy_entry_unlink(copy, entry);
8454 if (entry->is_sub_map) {
8455 vm_map_deallocate(VME_SUBMAP(entry));
8456 } else {
8457 vm_object_deallocate(VME_OBJECT(entry));
8458 }
8459 vm_map_copy_entry_dispose(copy, entry);
8460 }
8461 break;
8462 case VM_MAP_COPY_OBJECT:
8463 vm_object_deallocate(copy->cpy_object);
8464 break;
8465 case VM_MAP_COPY_KERNEL_BUFFER:
8466
8467 /*
8468 * The vm_map_copy_t and possibly the data buffer were
8469 * allocated by a single call to kalloc(), i.e. the
8470 * vm_map_copy_t was not allocated out of the zone.
8471 */
8472 if (copy->size > msg_ool_size_small || copy->offset) {
8473 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8474 (long long)copy->size, (long long)copy->offset);
8475 }
8476 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8477 return;
8478 }
8479 zfree(vm_map_copy_zone, copy);
8480 }
8481
8482 /*
8483 * Routine: vm_map_copy_copy
8484 *
8485 * Description:
8486 * Move the information in a map copy object to
8487 * a new map copy object, leaving the old one
8488 * empty.
8489 *
8490 * This is used by kernel routines that need
8491 * to look at out-of-line data (in copyin form)
8492 * before deciding whether to return SUCCESS.
8493 * If the routine returns FAILURE, the original
8494 * copy object will be deallocated; therefore,
8495 * these routines must make a copy of the copy
8496 * object and leave the original empty so that
8497 * deallocation will not fail.
8498 */
8499 vm_map_copy_t
8500 vm_map_copy_copy(
8501 vm_map_copy_t copy)
8502 {
8503 vm_map_copy_t new_copy;
8504
8505 if (copy == VM_MAP_COPY_NULL) {
8506 return VM_MAP_COPY_NULL;
8507 }
8508
8509 /*
8510 * Allocate a new copy object, and copy the information
8511 * from the old one into it.
8512 */
8513
8514 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8515 *new_copy = *copy;
8516
8517 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8518 /*
8519 * The links in the entry chain must be
8520 * changed to point to the new copy object.
8521 */
8522 vm_map_copy_first_entry(copy)->vme_prev
8523 = vm_map_copy_to_entry(new_copy);
8524 vm_map_copy_last_entry(copy)->vme_next
8525 = vm_map_copy_to_entry(new_copy);
8526 }
8527
8528 /*
8529 * Change the old copy object into one that contains
8530 * nothing to be deallocated.
8531 */
8532 copy->type = VM_MAP_COPY_OBJECT;
8533 copy->cpy_object = VM_OBJECT_NULL;
8534
8535 /*
8536 * Return the new object.
8537 */
8538 return new_copy;
8539 }
8540
8541 static kern_return_t
8542 vm_map_overwrite_submap_recurse(
8543 vm_map_t dst_map,
8544 vm_map_offset_t dst_addr,
8545 vm_map_size_t dst_size)
8546 {
8547 vm_map_offset_t dst_end;
8548 vm_map_entry_t tmp_entry;
8549 vm_map_entry_t entry;
8550 kern_return_t result;
8551 boolean_t encountered_sub_map = FALSE;
8552
8553
8554
8555 /*
8556 * Verify that the destination is all writeable
8557 * initially. We have to trunc the destination
8558 * address and round the copy size or we'll end up
8559 * splitting entries in strange ways.
8560 */
8561
8562 dst_end = vm_map_round_page(dst_addr + dst_size,
8563 VM_MAP_PAGE_MASK(dst_map));
8564 vm_map_lock(dst_map);
8565
8566 start_pass_1:
8567 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8568 vm_map_unlock(dst_map);
8569 return KERN_INVALID_ADDRESS;
8570 }
8571
8572 vm_map_clip_start(dst_map,
8573 tmp_entry,
8574 vm_map_trunc_page(dst_addr,
8575 VM_MAP_PAGE_MASK(dst_map)));
8576 if (tmp_entry->is_sub_map) {
8577 /* clipping did unnest if needed */
8578 assert(!tmp_entry->use_pmap);
8579 }
8580
8581 for (entry = tmp_entry;;) {
8582 vm_map_entry_t next;
8583
8584 next = entry->vme_next;
8585 while (entry->is_sub_map) {
8586 vm_map_offset_t sub_start;
8587 vm_map_offset_t sub_end;
8588 vm_map_offset_t local_end;
8589
8590 if (entry->in_transition) {
8591 /*
8592 * Say that we are waiting, and wait for entry.
8593 */
8594 entry->needs_wakeup = TRUE;
8595 vm_map_entry_wait(dst_map, THREAD_UNINT);
8596
8597 goto start_pass_1;
8598 }
8599
8600 encountered_sub_map = TRUE;
8601 sub_start = VME_OFFSET(entry);
8602
8603 if (entry->vme_end < dst_end) {
8604 sub_end = entry->vme_end;
8605 } else {
8606 sub_end = dst_end;
8607 }
8608 sub_end -= entry->vme_start;
8609 sub_end += VME_OFFSET(entry);
8610 local_end = entry->vme_end;
8611 vm_map_unlock(dst_map);
8612
8613 result = vm_map_overwrite_submap_recurse(
8614 VME_SUBMAP(entry),
8615 sub_start,
8616 sub_end - sub_start);
8617
8618 if (result != KERN_SUCCESS) {
8619 return result;
8620 }
8621 if (dst_end <= entry->vme_end) {
8622 return KERN_SUCCESS;
8623 }
8624 vm_map_lock(dst_map);
8625 if (!vm_map_lookup_entry(dst_map, local_end,
8626 &tmp_entry)) {
8627 vm_map_unlock(dst_map);
8628 return KERN_INVALID_ADDRESS;
8629 }
8630 entry = tmp_entry;
8631 next = entry->vme_next;
8632 }
8633
8634 if (!(entry->protection & VM_PROT_WRITE)) {
8635 vm_map_unlock(dst_map);
8636 return KERN_PROTECTION_FAILURE;
8637 }
8638
8639 /*
8640 * If the entry is in transition, we must wait
8641 * for it to exit that state. Anything could happen
8642 * when we unlock the map, so start over.
8643 */
8644 if (entry->in_transition) {
8645 /*
8646 * Say that we are waiting, and wait for entry.
8647 */
8648 entry->needs_wakeup = TRUE;
8649 vm_map_entry_wait(dst_map, THREAD_UNINT);
8650
8651 goto start_pass_1;
8652 }
8653
8654 /*
8655 * our range is contained completely within this map entry
8656 */
8657 if (dst_end <= entry->vme_end) {
8658 vm_map_unlock(dst_map);
8659 return KERN_SUCCESS;
8660 }
8661 /*
8662 * check that range specified is contiguous region
8663 */
8664 if ((next == vm_map_to_entry(dst_map)) ||
8665 (next->vme_start != entry->vme_end)) {
8666 vm_map_unlock(dst_map);
8667 return KERN_INVALID_ADDRESS;
8668 }
8669
8670 /*
8671 * Check for permanent objects in the destination.
8672 */
8673 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8674 ((!VME_OBJECT(entry)->internal) ||
8675 (VME_OBJECT(entry)->true_share))) {
8676 if (encountered_sub_map) {
8677 vm_map_unlock(dst_map);
8678 return KERN_FAILURE;
8679 }
8680 }
8681
8682
8683 entry = next;
8684 }/* for */
8685 vm_map_unlock(dst_map);
8686 return KERN_SUCCESS;
8687 }
8688
8689 /*
8690 * Routine: vm_map_copy_overwrite
8691 *
8692 * Description:
8693 * Copy the memory described by the map copy
8694 * object (copy; returned by vm_map_copyin) onto
8695 * the specified destination region (dst_map, dst_addr).
8696 * The destination must be writeable.
8697 *
8698 * Unlike vm_map_copyout, this routine actually
8699 * writes over previously-mapped memory. If the
8700 * previous mapping was to a permanent (user-supplied)
8701 * memory object, it is preserved.
8702 *
8703 * The attributes (protection and inheritance) of the
8704 * destination region are preserved.
8705 *
8706 * If successful, consumes the copy object.
8707 * Otherwise, the caller is responsible for it.
8708 *
8709 * Implementation notes:
8710 * To overwrite aligned temporary virtual memory, it is
8711 * sufficient to remove the previous mapping and insert
8712 * the new copy. This replacement is done either on
8713 * the whole region (if no permanent virtual memory
8714 * objects are embedded in the destination region) or
8715 * in individual map entries.
8716 *
8717 * To overwrite permanent virtual memory , it is necessary
8718 * to copy each page, as the external memory management
8719 * interface currently does not provide any optimizations.
8720 *
8721 * Unaligned memory also has to be copied. It is possible
8722 * to use 'vm_trickery' to copy the aligned data. This is
8723 * not done but not hard to implement.
8724 *
8725 * Once a page of permanent memory has been overwritten,
8726 * it is impossible to interrupt this function; otherwise,
8727 * the call would be neither atomic nor location-independent.
8728 * The kernel-state portion of a user thread must be
8729 * interruptible.
8730 *
8731 * It may be expensive to forward all requests that might
8732 * overwrite permanent memory (vm_write, vm_copy) to
8733 * uninterruptible kernel threads. This routine may be
8734 * called by interruptible threads; however, success is
8735 * not guaranteed -- if the request cannot be performed
8736 * atomically and interruptibly, an error indication is
8737 * returned.
8738 */
8739
8740 static kern_return_t
8741 vm_map_copy_overwrite_nested(
8742 vm_map_t dst_map,
8743 vm_map_address_t dst_addr,
8744 vm_map_copy_t copy,
8745 boolean_t interruptible,
8746 pmap_t pmap,
8747 boolean_t discard_on_success)
8748 {
8749 vm_map_offset_t dst_end;
8750 vm_map_entry_t tmp_entry;
8751 vm_map_entry_t entry;
8752 kern_return_t kr;
8753 boolean_t aligned = TRUE;
8754 boolean_t contains_permanent_objects = FALSE;
8755 boolean_t encountered_sub_map = FALSE;
8756 vm_map_offset_t base_addr;
8757 vm_map_size_t copy_size;
8758 vm_map_size_t total_size;
8759
8760
8761 /*
8762 * Check for null copy object.
8763 */
8764
8765 if (copy == VM_MAP_COPY_NULL) {
8766 return KERN_SUCCESS;
8767 }
8768
8769 /*
8770 * Check for special kernel buffer allocated
8771 * by new_ipc_kmsg_copyin.
8772 */
8773
8774 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8775 return vm_map_copyout_kernel_buffer(
8776 dst_map, &dst_addr,
8777 copy, copy->size, TRUE, discard_on_success);
8778 }
8779
8780 /*
8781 * Only works for entry lists at the moment. Will
8782 * support page lists later.
8783 */
8784
8785 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8786
8787 if (copy->size == 0) {
8788 if (discard_on_success) {
8789 vm_map_copy_discard(copy);
8790 }
8791 return KERN_SUCCESS;
8792 }
8793
8794 /*
8795 * Verify that the destination is all writeable
8796 * initially. We have to trunc the destination
8797 * address and round the copy size or we'll end up
8798 * splitting entries in strange ways.
8799 */
8800
8801 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8802 VM_MAP_PAGE_MASK(dst_map)) ||
8803 !VM_MAP_PAGE_ALIGNED(copy->offset,
8804 VM_MAP_PAGE_MASK(dst_map)) ||
8805 !VM_MAP_PAGE_ALIGNED(dst_addr,
8806 VM_MAP_PAGE_MASK(dst_map))) {
8807 aligned = FALSE;
8808 dst_end = vm_map_round_page(dst_addr + copy->size,
8809 VM_MAP_PAGE_MASK(dst_map));
8810 } else {
8811 dst_end = dst_addr + copy->size;
8812 }
8813
8814 vm_map_lock(dst_map);
8815
8816 /* LP64todo - remove this check when vm_map_commpage64()
8817 * no longer has to stuff in a map_entry for the commpage
8818 * above the map's max_offset.
8819 */
8820 if (dst_addr >= dst_map->max_offset) {
8821 vm_map_unlock(dst_map);
8822 return KERN_INVALID_ADDRESS;
8823 }
8824
8825 start_pass_1:
8826 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8827 vm_map_unlock(dst_map);
8828 return KERN_INVALID_ADDRESS;
8829 }
8830 vm_map_clip_start(dst_map,
8831 tmp_entry,
8832 vm_map_trunc_page(dst_addr,
8833 VM_MAP_PAGE_MASK(dst_map)));
8834 for (entry = tmp_entry;;) {
8835 vm_map_entry_t next = entry->vme_next;
8836
8837 while (entry->is_sub_map) {
8838 vm_map_offset_t sub_start;
8839 vm_map_offset_t sub_end;
8840 vm_map_offset_t local_end;
8841
8842 if (entry->in_transition) {
8843 /*
8844 * Say that we are waiting, and wait for entry.
8845 */
8846 entry->needs_wakeup = TRUE;
8847 vm_map_entry_wait(dst_map, THREAD_UNINT);
8848
8849 goto start_pass_1;
8850 }
8851
8852 local_end = entry->vme_end;
8853 if (!(entry->needs_copy)) {
8854 /* if needs_copy we are a COW submap */
8855 /* in such a case we just replace so */
8856 /* there is no need for the follow- */
8857 /* ing check. */
8858 encountered_sub_map = TRUE;
8859 sub_start = VME_OFFSET(entry);
8860
8861 if (entry->vme_end < dst_end) {
8862 sub_end = entry->vme_end;
8863 } else {
8864 sub_end = dst_end;
8865 }
8866 sub_end -= entry->vme_start;
8867 sub_end += VME_OFFSET(entry);
8868 vm_map_unlock(dst_map);
8869
8870 kr = vm_map_overwrite_submap_recurse(
8871 VME_SUBMAP(entry),
8872 sub_start,
8873 sub_end - sub_start);
8874 if (kr != KERN_SUCCESS) {
8875 return kr;
8876 }
8877 vm_map_lock(dst_map);
8878 }
8879
8880 if (dst_end <= entry->vme_end) {
8881 goto start_overwrite;
8882 }
8883 if (!vm_map_lookup_entry(dst_map, local_end,
8884 &entry)) {
8885 vm_map_unlock(dst_map);
8886 return KERN_INVALID_ADDRESS;
8887 }
8888 next = entry->vme_next;
8889 }
8890
8891 if (!(entry->protection & VM_PROT_WRITE)) {
8892 vm_map_unlock(dst_map);
8893 return KERN_PROTECTION_FAILURE;
8894 }
8895
8896 /*
8897 * If the entry is in transition, we must wait
8898 * for it to exit that state. Anything could happen
8899 * when we unlock the map, so start over.
8900 */
8901 if (entry->in_transition) {
8902 /*
8903 * Say that we are waiting, and wait for entry.
8904 */
8905 entry->needs_wakeup = TRUE;
8906 vm_map_entry_wait(dst_map, THREAD_UNINT);
8907
8908 goto start_pass_1;
8909 }
8910
8911 /*
8912 * our range is contained completely within this map entry
8913 */
8914 if (dst_end <= entry->vme_end) {
8915 break;
8916 }
8917 /*
8918 * check that range specified is contiguous region
8919 */
8920 if ((next == vm_map_to_entry(dst_map)) ||
8921 (next->vme_start != entry->vme_end)) {
8922 vm_map_unlock(dst_map);
8923 return KERN_INVALID_ADDRESS;
8924 }
8925
8926
8927 /*
8928 * Check for permanent objects in the destination.
8929 */
8930 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8931 ((!VME_OBJECT(entry)->internal) ||
8932 (VME_OBJECT(entry)->true_share))) {
8933 contains_permanent_objects = TRUE;
8934 }
8935
8936 entry = next;
8937 }/* for */
8938
8939 start_overwrite:
8940 /*
8941 * If there are permanent objects in the destination, then
8942 * the copy cannot be interrupted.
8943 */
8944
8945 if (interruptible && contains_permanent_objects) {
8946 vm_map_unlock(dst_map);
8947 return KERN_FAILURE; /* XXX */
8948 }
8949
8950 /*
8951 *
8952 * Make a second pass, overwriting the data
8953 * At the beginning of each loop iteration,
8954 * the next entry to be overwritten is "tmp_entry"
8955 * (initially, the value returned from the lookup above),
8956 * and the starting address expected in that entry
8957 * is "start".
8958 */
8959
8960 total_size = copy->size;
8961 if (encountered_sub_map) {
8962 copy_size = 0;
8963 /* re-calculate tmp_entry since we've had the map */
8964 /* unlocked */
8965 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8966 vm_map_unlock(dst_map);
8967 return KERN_INVALID_ADDRESS;
8968 }
8969 } else {
8970 copy_size = copy->size;
8971 }
8972
8973 base_addr = dst_addr;
8974 while (TRUE) {
8975 /* deconstruct the copy object and do in parts */
8976 /* only in sub_map, interruptable case */
8977 vm_map_entry_t copy_entry;
8978 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8979 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8980 int nentries;
8981 int remaining_entries = 0;
8982 vm_map_offset_t new_offset = 0;
8983
8984 for (entry = tmp_entry; copy_size == 0;) {
8985 vm_map_entry_t next;
8986
8987 next = entry->vme_next;
8988
8989 /* tmp_entry and base address are moved along */
8990 /* each time we encounter a sub-map. Otherwise */
8991 /* entry can outpase tmp_entry, and the copy_size */
8992 /* may reflect the distance between them */
8993 /* if the current entry is found to be in transition */
8994 /* we will start over at the beginning or the last */
8995 /* encounter of a submap as dictated by base_addr */
8996 /* we will zero copy_size accordingly. */
8997 if (entry->in_transition) {
8998 /*
8999 * Say that we are waiting, and wait for entry.
9000 */
9001 entry->needs_wakeup = TRUE;
9002 vm_map_entry_wait(dst_map, THREAD_UNINT);
9003
9004 if (!vm_map_lookup_entry(dst_map, base_addr,
9005 &tmp_entry)) {
9006 vm_map_unlock(dst_map);
9007 return KERN_INVALID_ADDRESS;
9008 }
9009 copy_size = 0;
9010 entry = tmp_entry;
9011 continue;
9012 }
9013 if (entry->is_sub_map) {
9014 vm_map_offset_t sub_start;
9015 vm_map_offset_t sub_end;
9016 vm_map_offset_t local_end;
9017
9018 if (entry->needs_copy) {
9019 /* if this is a COW submap */
9020 /* just back the range with a */
9021 /* anonymous entry */
9022 if (entry->vme_end < dst_end) {
9023 sub_end = entry->vme_end;
9024 } else {
9025 sub_end = dst_end;
9026 }
9027 if (entry->vme_start < base_addr) {
9028 sub_start = base_addr;
9029 } else {
9030 sub_start = entry->vme_start;
9031 }
9032 vm_map_clip_end(
9033 dst_map, entry, sub_end);
9034 vm_map_clip_start(
9035 dst_map, entry, sub_start);
9036 assert(!entry->use_pmap);
9037 assert(!entry->iokit_acct);
9038 entry->use_pmap = TRUE;
9039 entry->is_sub_map = FALSE;
9040 vm_map_deallocate(
9041 VME_SUBMAP(entry));
9042 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9043 VME_OFFSET_SET(entry, 0);
9044 entry->is_shared = FALSE;
9045 entry->needs_copy = FALSE;
9046 entry->protection = VM_PROT_DEFAULT;
9047 entry->max_protection = VM_PROT_ALL;
9048 entry->wired_count = 0;
9049 entry->user_wired_count = 0;
9050 if (entry->inheritance
9051 == VM_INHERIT_SHARE) {
9052 entry->inheritance = VM_INHERIT_COPY;
9053 }
9054 continue;
9055 }
9056 /* first take care of any non-sub_map */
9057 /* entries to send */
9058 if (base_addr < entry->vme_start) {
9059 /* stuff to send */
9060 copy_size =
9061 entry->vme_start - base_addr;
9062 break;
9063 }
9064 sub_start = VME_OFFSET(entry);
9065
9066 if (entry->vme_end < dst_end) {
9067 sub_end = entry->vme_end;
9068 } else {
9069 sub_end = dst_end;
9070 }
9071 sub_end -= entry->vme_start;
9072 sub_end += VME_OFFSET(entry);
9073 local_end = entry->vme_end;
9074 vm_map_unlock(dst_map);
9075 copy_size = sub_end - sub_start;
9076
9077 /* adjust the copy object */
9078 if (total_size > copy_size) {
9079 vm_map_size_t local_size = 0;
9080 vm_map_size_t entry_size;
9081
9082 nentries = 1;
9083 new_offset = copy->offset;
9084 copy_entry = vm_map_copy_first_entry(copy);
9085 while (copy_entry !=
9086 vm_map_copy_to_entry(copy)) {
9087 entry_size = copy_entry->vme_end -
9088 copy_entry->vme_start;
9089 if ((local_size < copy_size) &&
9090 ((local_size + entry_size)
9091 >= copy_size)) {
9092 vm_map_copy_clip_end(copy,
9093 copy_entry,
9094 copy_entry->vme_start +
9095 (copy_size - local_size));
9096 entry_size = copy_entry->vme_end -
9097 copy_entry->vme_start;
9098 local_size += entry_size;
9099 new_offset += entry_size;
9100 }
9101 if (local_size >= copy_size) {
9102 next_copy = copy_entry->vme_next;
9103 copy_entry->vme_next =
9104 vm_map_copy_to_entry(copy);
9105 previous_prev =
9106 copy->cpy_hdr.links.prev;
9107 copy->cpy_hdr.links.prev = copy_entry;
9108 copy->size = copy_size;
9109 remaining_entries =
9110 copy->cpy_hdr.nentries;
9111 remaining_entries -= nentries;
9112 copy->cpy_hdr.nentries = nentries;
9113 break;
9114 } else {
9115 local_size += entry_size;
9116 new_offset += entry_size;
9117 nentries++;
9118 }
9119 copy_entry = copy_entry->vme_next;
9120 }
9121 }
9122
9123 if ((entry->use_pmap) && (pmap == NULL)) {
9124 kr = vm_map_copy_overwrite_nested(
9125 VME_SUBMAP(entry),
9126 sub_start,
9127 copy,
9128 interruptible,
9129 VME_SUBMAP(entry)->pmap,
9130 TRUE);
9131 } else if (pmap != NULL) {
9132 kr = vm_map_copy_overwrite_nested(
9133 VME_SUBMAP(entry),
9134 sub_start,
9135 copy,
9136 interruptible, pmap,
9137 TRUE);
9138 } else {
9139 kr = vm_map_copy_overwrite_nested(
9140 VME_SUBMAP(entry),
9141 sub_start,
9142 copy,
9143 interruptible,
9144 dst_map->pmap,
9145 TRUE);
9146 }
9147 if (kr != KERN_SUCCESS) {
9148 if (next_copy != NULL) {
9149 copy->cpy_hdr.nentries +=
9150 remaining_entries;
9151 copy->cpy_hdr.links.prev->vme_next =
9152 next_copy;
9153 copy->cpy_hdr.links.prev
9154 = previous_prev;
9155 copy->size = total_size;
9156 }
9157 return kr;
9158 }
9159 if (dst_end <= local_end) {
9160 return KERN_SUCCESS;
9161 }
9162 /* otherwise copy no longer exists, it was */
9163 /* destroyed after successful copy_overwrite */
9164 copy = vm_map_copy_allocate();
9165 copy->type = VM_MAP_COPY_ENTRY_LIST;
9166 copy->offset = new_offset;
9167
9168 /*
9169 * XXX FBDP
9170 * this does not seem to deal with
9171 * the VM map store (R&B tree)
9172 */
9173
9174 total_size -= copy_size;
9175 copy_size = 0;
9176 /* put back remainder of copy in container */
9177 if (next_copy != NULL) {
9178 copy->cpy_hdr.nentries = remaining_entries;
9179 copy->cpy_hdr.links.next = next_copy;
9180 copy->cpy_hdr.links.prev = previous_prev;
9181 copy->size = total_size;
9182 next_copy->vme_prev =
9183 vm_map_copy_to_entry(copy);
9184 next_copy = NULL;
9185 }
9186 base_addr = local_end;
9187 vm_map_lock(dst_map);
9188 if (!vm_map_lookup_entry(dst_map,
9189 local_end, &tmp_entry)) {
9190 vm_map_unlock(dst_map);
9191 return KERN_INVALID_ADDRESS;
9192 }
9193 entry = tmp_entry;
9194 continue;
9195 }
9196 if (dst_end <= entry->vme_end) {
9197 copy_size = dst_end - base_addr;
9198 break;
9199 }
9200
9201 if ((next == vm_map_to_entry(dst_map)) ||
9202 (next->vme_start != entry->vme_end)) {
9203 vm_map_unlock(dst_map);
9204 return KERN_INVALID_ADDRESS;
9205 }
9206
9207 entry = next;
9208 }/* for */
9209
9210 next_copy = NULL;
9211 nentries = 1;
9212
9213 /* adjust the copy object */
9214 if (total_size > copy_size) {
9215 vm_map_size_t local_size = 0;
9216 vm_map_size_t entry_size;
9217
9218 new_offset = copy->offset;
9219 copy_entry = vm_map_copy_first_entry(copy);
9220 while (copy_entry != vm_map_copy_to_entry(copy)) {
9221 entry_size = copy_entry->vme_end -
9222 copy_entry->vme_start;
9223 if ((local_size < copy_size) &&
9224 ((local_size + entry_size)
9225 >= copy_size)) {
9226 vm_map_copy_clip_end(copy, copy_entry,
9227 copy_entry->vme_start +
9228 (copy_size - local_size));
9229 entry_size = copy_entry->vme_end -
9230 copy_entry->vme_start;
9231 local_size += entry_size;
9232 new_offset += entry_size;
9233 }
9234 if (local_size >= copy_size) {
9235 next_copy = copy_entry->vme_next;
9236 copy_entry->vme_next =
9237 vm_map_copy_to_entry(copy);
9238 previous_prev =
9239 copy->cpy_hdr.links.prev;
9240 copy->cpy_hdr.links.prev = copy_entry;
9241 copy->size = copy_size;
9242 remaining_entries =
9243 copy->cpy_hdr.nentries;
9244 remaining_entries -= nentries;
9245 copy->cpy_hdr.nentries = nentries;
9246 break;
9247 } else {
9248 local_size += entry_size;
9249 new_offset += entry_size;
9250 nentries++;
9251 }
9252 copy_entry = copy_entry->vme_next;
9253 }
9254 }
9255
9256 if (aligned) {
9257 pmap_t local_pmap;
9258
9259 if (pmap) {
9260 local_pmap = pmap;
9261 } else {
9262 local_pmap = dst_map->pmap;
9263 }
9264
9265 if ((kr = vm_map_copy_overwrite_aligned(
9266 dst_map, tmp_entry, copy,
9267 base_addr, local_pmap)) != KERN_SUCCESS) {
9268 if (next_copy != NULL) {
9269 copy->cpy_hdr.nentries +=
9270 remaining_entries;
9271 copy->cpy_hdr.links.prev->vme_next =
9272 next_copy;
9273 copy->cpy_hdr.links.prev =
9274 previous_prev;
9275 copy->size += copy_size;
9276 }
9277 return kr;
9278 }
9279 vm_map_unlock(dst_map);
9280 } else {
9281 /*
9282 * Performance gain:
9283 *
9284 * if the copy and dst address are misaligned but the same
9285 * offset within the page we can copy_not_aligned the
9286 * misaligned parts and copy aligned the rest. If they are
9287 * aligned but len is unaligned we simply need to copy
9288 * the end bit unaligned. We'll need to split the misaligned
9289 * bits of the region in this case !
9290 */
9291 /* ALWAYS UNLOCKS THE dst_map MAP */
9292 kr = vm_map_copy_overwrite_unaligned(
9293 dst_map,
9294 tmp_entry,
9295 copy,
9296 base_addr,
9297 discard_on_success);
9298 if (kr != KERN_SUCCESS) {
9299 if (next_copy != NULL) {
9300 copy->cpy_hdr.nentries +=
9301 remaining_entries;
9302 copy->cpy_hdr.links.prev->vme_next =
9303 next_copy;
9304 copy->cpy_hdr.links.prev =
9305 previous_prev;
9306 copy->size += copy_size;
9307 }
9308 return kr;
9309 }
9310 }
9311 total_size -= copy_size;
9312 if (total_size == 0) {
9313 break;
9314 }
9315 base_addr += copy_size;
9316 copy_size = 0;
9317 copy->offset = new_offset;
9318 if (next_copy != NULL) {
9319 copy->cpy_hdr.nentries = remaining_entries;
9320 copy->cpy_hdr.links.next = next_copy;
9321 copy->cpy_hdr.links.prev = previous_prev;
9322 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9323 copy->size = total_size;
9324 }
9325 vm_map_lock(dst_map);
9326 while (TRUE) {
9327 if (!vm_map_lookup_entry(dst_map,
9328 base_addr, &tmp_entry)) {
9329 vm_map_unlock(dst_map);
9330 return KERN_INVALID_ADDRESS;
9331 }
9332 if (tmp_entry->in_transition) {
9333 entry->needs_wakeup = TRUE;
9334 vm_map_entry_wait(dst_map, THREAD_UNINT);
9335 } else {
9336 break;
9337 }
9338 }
9339 vm_map_clip_start(dst_map,
9340 tmp_entry,
9341 vm_map_trunc_page(base_addr,
9342 VM_MAP_PAGE_MASK(dst_map)));
9343
9344 entry = tmp_entry;
9345 } /* while */
9346
9347 /*
9348 * Throw away the vm_map_copy object
9349 */
9350 if (discard_on_success) {
9351 vm_map_copy_discard(copy);
9352 }
9353
9354 return KERN_SUCCESS;
9355 }/* vm_map_copy_overwrite */
9356
9357 kern_return_t
9358 vm_map_copy_overwrite(
9359 vm_map_t dst_map,
9360 vm_map_offset_t dst_addr,
9361 vm_map_copy_t copy,
9362 boolean_t interruptible)
9363 {
9364 vm_map_size_t head_size, tail_size;
9365 vm_map_copy_t head_copy, tail_copy;
9366 vm_map_offset_t head_addr, tail_addr;
9367 vm_map_entry_t entry;
9368 kern_return_t kr;
9369 vm_map_offset_t effective_page_mask, effective_page_size;
9370
9371 head_size = 0;
9372 tail_size = 0;
9373 head_copy = NULL;
9374 tail_copy = NULL;
9375 head_addr = 0;
9376 tail_addr = 0;
9377
9378 if (interruptible ||
9379 copy == VM_MAP_COPY_NULL ||
9380 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9381 /*
9382 * We can't split the "copy" map if we're interruptible
9383 * or if we don't have a "copy" map...
9384 */
9385 blunt_copy:
9386 return vm_map_copy_overwrite_nested(dst_map,
9387 dst_addr,
9388 copy,
9389 interruptible,
9390 (pmap_t) NULL,
9391 TRUE);
9392 }
9393
9394 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9395 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9396 effective_page_mask);
9397 effective_page_size = effective_page_mask + 1;
9398
9399 if (copy->size < 3 * effective_page_size) {
9400 /*
9401 * Too small to bother with optimizing...
9402 */
9403 goto blunt_copy;
9404 }
9405
9406 if ((dst_addr & effective_page_mask) !=
9407 (copy->offset & effective_page_mask)) {
9408 /*
9409 * Incompatible mis-alignment of source and destination...
9410 */
9411 goto blunt_copy;
9412 }
9413
9414 /*
9415 * Proper alignment or identical mis-alignment at the beginning.
9416 * Let's try and do a small unaligned copy first (if needed)
9417 * and then an aligned copy for the rest.
9418 */
9419 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9420 head_addr = dst_addr;
9421 head_size = (effective_page_size -
9422 (copy->offset & effective_page_mask));
9423 head_size = MIN(head_size, copy->size);
9424 }
9425 if (!vm_map_page_aligned(copy->offset + copy->size,
9426 effective_page_mask)) {
9427 /*
9428 * Mis-alignment at the end.
9429 * Do an aligned copy up to the last page and
9430 * then an unaligned copy for the remaining bytes.
9431 */
9432 tail_size = ((copy->offset + copy->size) &
9433 effective_page_mask);
9434 tail_size = MIN(tail_size, copy->size);
9435 tail_addr = dst_addr + copy->size - tail_size;
9436 assert(tail_addr >= head_addr + head_size);
9437 }
9438 assert(head_size + tail_size <= copy->size);
9439
9440 if (head_size + tail_size == copy->size) {
9441 /*
9442 * It's all unaligned, no optimization possible...
9443 */
9444 goto blunt_copy;
9445 }
9446
9447 /*
9448 * Can't optimize if there are any submaps in the
9449 * destination due to the way we free the "copy" map
9450 * progressively in vm_map_copy_overwrite_nested()
9451 * in that case.
9452 */
9453 vm_map_lock_read(dst_map);
9454 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9455 vm_map_unlock_read(dst_map);
9456 goto blunt_copy;
9457 }
9458 for (;
9459 (entry != vm_map_copy_to_entry(copy) &&
9460 entry->vme_start < dst_addr + copy->size);
9461 entry = entry->vme_next) {
9462 if (entry->is_sub_map) {
9463 vm_map_unlock_read(dst_map);
9464 goto blunt_copy;
9465 }
9466 }
9467 vm_map_unlock_read(dst_map);
9468
9469 if (head_size) {
9470 /*
9471 * Unaligned copy of the first "head_size" bytes, to reach
9472 * a page boundary.
9473 */
9474
9475 /*
9476 * Extract "head_copy" out of "copy".
9477 */
9478 head_copy = vm_map_copy_allocate();
9479 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9480 head_copy->cpy_hdr.entries_pageable =
9481 copy->cpy_hdr.entries_pageable;
9482 vm_map_store_init(&head_copy->cpy_hdr);
9483
9484 entry = vm_map_copy_first_entry(copy);
9485 if (entry->vme_end < copy->offset + head_size) {
9486 head_size = entry->vme_end - copy->offset;
9487 }
9488
9489 head_copy->offset = copy->offset;
9490 head_copy->size = head_size;
9491 copy->offset += head_size;
9492 copy->size -= head_size;
9493
9494 vm_map_copy_clip_end(copy, entry, copy->offset);
9495 vm_map_copy_entry_unlink(copy, entry);
9496 vm_map_copy_entry_link(head_copy,
9497 vm_map_copy_to_entry(head_copy),
9498 entry);
9499
9500 /*
9501 * Do the unaligned copy.
9502 */
9503 kr = vm_map_copy_overwrite_nested(dst_map,
9504 head_addr,
9505 head_copy,
9506 interruptible,
9507 (pmap_t) NULL,
9508 FALSE);
9509 if (kr != KERN_SUCCESS) {
9510 goto done;
9511 }
9512 }
9513
9514 if (tail_size) {
9515 /*
9516 * Extract "tail_copy" out of "copy".
9517 */
9518 tail_copy = vm_map_copy_allocate();
9519 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9520 tail_copy->cpy_hdr.entries_pageable =
9521 copy->cpy_hdr.entries_pageable;
9522 vm_map_store_init(&tail_copy->cpy_hdr);
9523
9524 tail_copy->offset = copy->offset + copy->size - tail_size;
9525 tail_copy->size = tail_size;
9526
9527 copy->size -= tail_size;
9528
9529 entry = vm_map_copy_last_entry(copy);
9530 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9531 entry = vm_map_copy_last_entry(copy);
9532 vm_map_copy_entry_unlink(copy, entry);
9533 vm_map_copy_entry_link(tail_copy,
9534 vm_map_copy_last_entry(tail_copy),
9535 entry);
9536 }
9537
9538 /*
9539 * Copy most (or possibly all) of the data.
9540 */
9541 kr = vm_map_copy_overwrite_nested(dst_map,
9542 dst_addr + head_size,
9543 copy,
9544 interruptible,
9545 (pmap_t) NULL,
9546 FALSE);
9547 if (kr != KERN_SUCCESS) {
9548 goto done;
9549 }
9550
9551 if (tail_size) {
9552 kr = vm_map_copy_overwrite_nested(dst_map,
9553 tail_addr,
9554 tail_copy,
9555 interruptible,
9556 (pmap_t) NULL,
9557 FALSE);
9558 }
9559
9560 done:
9561 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9562 if (kr == KERN_SUCCESS) {
9563 /*
9564 * Discard all the copy maps.
9565 */
9566 if (head_copy) {
9567 vm_map_copy_discard(head_copy);
9568 head_copy = NULL;
9569 }
9570 vm_map_copy_discard(copy);
9571 if (tail_copy) {
9572 vm_map_copy_discard(tail_copy);
9573 tail_copy = NULL;
9574 }
9575 } else {
9576 /*
9577 * Re-assemble the original copy map.
9578 */
9579 if (head_copy) {
9580 entry = vm_map_copy_first_entry(head_copy);
9581 vm_map_copy_entry_unlink(head_copy, entry);
9582 vm_map_copy_entry_link(copy,
9583 vm_map_copy_to_entry(copy),
9584 entry);
9585 copy->offset -= head_size;
9586 copy->size += head_size;
9587 vm_map_copy_discard(head_copy);
9588 head_copy = NULL;
9589 }
9590 if (tail_copy) {
9591 entry = vm_map_copy_last_entry(tail_copy);
9592 vm_map_copy_entry_unlink(tail_copy, entry);
9593 vm_map_copy_entry_link(copy,
9594 vm_map_copy_last_entry(copy),
9595 entry);
9596 copy->size += tail_size;
9597 vm_map_copy_discard(tail_copy);
9598 tail_copy = NULL;
9599 }
9600 }
9601 return kr;
9602 }
9603
9604
9605 /*
9606 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9607 *
9608 * Decription:
9609 * Physically copy unaligned data
9610 *
9611 * Implementation:
9612 * Unaligned parts of pages have to be physically copied. We use
9613 * a modified form of vm_fault_copy (which understands none-aligned
9614 * page offsets and sizes) to do the copy. We attempt to copy as
9615 * much memory in one go as possibly, however vm_fault_copy copies
9616 * within 1 memory object so we have to find the smaller of "amount left"
9617 * "source object data size" and "target object data size". With
9618 * unaligned data we don't need to split regions, therefore the source
9619 * (copy) object should be one map entry, the target range may be split
9620 * over multiple map entries however. In any event we are pessimistic
9621 * about these assumptions.
9622 *
9623 * Assumptions:
9624 * dst_map is locked on entry and is return locked on success,
9625 * unlocked on error.
9626 */
9627
9628 static kern_return_t
9629 vm_map_copy_overwrite_unaligned(
9630 vm_map_t dst_map,
9631 vm_map_entry_t entry,
9632 vm_map_copy_t copy,
9633 vm_map_offset_t start,
9634 boolean_t discard_on_success)
9635 {
9636 vm_map_entry_t copy_entry;
9637 vm_map_entry_t copy_entry_next;
9638 vm_map_version_t version;
9639 vm_object_t dst_object;
9640 vm_object_offset_t dst_offset;
9641 vm_object_offset_t src_offset;
9642 vm_object_offset_t entry_offset;
9643 vm_map_offset_t entry_end;
9644 vm_map_size_t src_size,
9645 dst_size,
9646 copy_size,
9647 amount_left;
9648 kern_return_t kr = KERN_SUCCESS;
9649
9650
9651 copy_entry = vm_map_copy_first_entry(copy);
9652
9653 vm_map_lock_write_to_read(dst_map);
9654
9655 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9656 amount_left = copy->size;
9657 /*
9658 * unaligned so we never clipped this entry, we need the offset into
9659 * the vm_object not just the data.
9660 */
9661 while (amount_left > 0) {
9662 if (entry == vm_map_to_entry(dst_map)) {
9663 vm_map_unlock_read(dst_map);
9664 return KERN_INVALID_ADDRESS;
9665 }
9666
9667 /* "start" must be within the current map entry */
9668 assert((start >= entry->vme_start) && (start < entry->vme_end));
9669
9670 dst_offset = start - entry->vme_start;
9671
9672 dst_size = entry->vme_end - start;
9673
9674 src_size = copy_entry->vme_end -
9675 (copy_entry->vme_start + src_offset);
9676
9677 if (dst_size < src_size) {
9678 /*
9679 * we can only copy dst_size bytes before
9680 * we have to get the next destination entry
9681 */
9682 copy_size = dst_size;
9683 } else {
9684 /*
9685 * we can only copy src_size bytes before
9686 * we have to get the next source copy entry
9687 */
9688 copy_size = src_size;
9689 }
9690
9691 if (copy_size > amount_left) {
9692 copy_size = amount_left;
9693 }
9694 /*
9695 * Entry needs copy, create a shadow shadow object for
9696 * Copy on write region.
9697 */
9698 if (entry->needs_copy &&
9699 ((entry->protection & VM_PROT_WRITE) != 0)) {
9700 if (vm_map_lock_read_to_write(dst_map)) {
9701 vm_map_lock_read(dst_map);
9702 goto RetryLookup;
9703 }
9704 VME_OBJECT_SHADOW(entry,
9705 (vm_map_size_t)(entry->vme_end
9706 - entry->vme_start));
9707 entry->needs_copy = FALSE;
9708 vm_map_lock_write_to_read(dst_map);
9709 }
9710 dst_object = VME_OBJECT(entry);
9711 /*
9712 * unlike with the virtual (aligned) copy we're going
9713 * to fault on it therefore we need a target object.
9714 */
9715 if (dst_object == VM_OBJECT_NULL) {
9716 if (vm_map_lock_read_to_write(dst_map)) {
9717 vm_map_lock_read(dst_map);
9718 goto RetryLookup;
9719 }
9720 dst_object = vm_object_allocate((vm_map_size_t)
9721 entry->vme_end - entry->vme_start);
9722 VME_OBJECT_SET(entry, dst_object);
9723 VME_OFFSET_SET(entry, 0);
9724 assert(entry->use_pmap);
9725 vm_map_lock_write_to_read(dst_map);
9726 }
9727 /*
9728 * Take an object reference and unlock map. The "entry" may
9729 * disappear or change when the map is unlocked.
9730 */
9731 vm_object_reference(dst_object);
9732 version.main_timestamp = dst_map->timestamp;
9733 entry_offset = VME_OFFSET(entry);
9734 entry_end = entry->vme_end;
9735 vm_map_unlock_read(dst_map);
9736 /*
9737 * Copy as much as possible in one pass
9738 */
9739 kr = vm_fault_copy(
9740 VME_OBJECT(copy_entry),
9741 VME_OFFSET(copy_entry) + src_offset,
9742 &copy_size,
9743 dst_object,
9744 entry_offset + dst_offset,
9745 dst_map,
9746 &version,
9747 THREAD_UNINT );
9748
9749 start += copy_size;
9750 src_offset += copy_size;
9751 amount_left -= copy_size;
9752 /*
9753 * Release the object reference
9754 */
9755 vm_object_deallocate(dst_object);
9756 /*
9757 * If a hard error occurred, return it now
9758 */
9759 if (kr != KERN_SUCCESS) {
9760 return kr;
9761 }
9762
9763 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9764 || amount_left == 0) {
9765 /*
9766 * all done with this copy entry, dispose.
9767 */
9768 copy_entry_next = copy_entry->vme_next;
9769
9770 if (discard_on_success) {
9771 vm_map_copy_entry_unlink(copy, copy_entry);
9772 assert(!copy_entry->is_sub_map);
9773 vm_object_deallocate(VME_OBJECT(copy_entry));
9774 vm_map_copy_entry_dispose(copy, copy_entry);
9775 }
9776
9777 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9778 amount_left) {
9779 /*
9780 * not finished copying but run out of source
9781 */
9782 return KERN_INVALID_ADDRESS;
9783 }
9784
9785 copy_entry = copy_entry_next;
9786
9787 src_offset = 0;
9788 }
9789
9790 if (amount_left == 0) {
9791 return KERN_SUCCESS;
9792 }
9793
9794 vm_map_lock_read(dst_map);
9795 if (version.main_timestamp == dst_map->timestamp) {
9796 if (start == entry_end) {
9797 /*
9798 * destination region is split. Use the version
9799 * information to avoid a lookup in the normal
9800 * case.
9801 */
9802 entry = entry->vme_next;
9803 /*
9804 * should be contiguous. Fail if we encounter
9805 * a hole in the destination.
9806 */
9807 if (start != entry->vme_start) {
9808 vm_map_unlock_read(dst_map);
9809 return KERN_INVALID_ADDRESS;
9810 }
9811 }
9812 } else {
9813 /*
9814 * Map version check failed.
9815 * we must lookup the entry because somebody
9816 * might have changed the map behind our backs.
9817 */
9818 RetryLookup:
9819 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9820 vm_map_unlock_read(dst_map);
9821 return KERN_INVALID_ADDRESS;
9822 }
9823 }
9824 }/* while */
9825
9826 return KERN_SUCCESS;
9827 }/* vm_map_copy_overwrite_unaligned */
9828
9829 /*
9830 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9831 *
9832 * Description:
9833 * Does all the vm_trickery possible for whole pages.
9834 *
9835 * Implementation:
9836 *
9837 * If there are no permanent objects in the destination,
9838 * and the source and destination map entry zones match,
9839 * and the destination map entry is not shared,
9840 * then the map entries can be deleted and replaced
9841 * with those from the copy. The following code is the
9842 * basic idea of what to do, but there are lots of annoying
9843 * little details about getting protection and inheritance
9844 * right. Should add protection, inheritance, and sharing checks
9845 * to the above pass and make sure that no wiring is involved.
9846 */
9847
9848 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9849 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9850 int vm_map_copy_overwrite_aligned_src_large = 0;
9851
9852 static kern_return_t
9853 vm_map_copy_overwrite_aligned(
9854 vm_map_t dst_map,
9855 vm_map_entry_t tmp_entry,
9856 vm_map_copy_t copy,
9857 vm_map_offset_t start,
9858 __unused pmap_t pmap)
9859 {
9860 vm_object_t object;
9861 vm_map_entry_t copy_entry;
9862 vm_map_size_t copy_size;
9863 vm_map_size_t size;
9864 vm_map_entry_t entry;
9865
9866 while ((copy_entry = vm_map_copy_first_entry(copy))
9867 != vm_map_copy_to_entry(copy)) {
9868 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9869
9870 entry = tmp_entry;
9871 if (entry->is_sub_map) {
9872 /* unnested when clipped earlier */
9873 assert(!entry->use_pmap);
9874 }
9875 if (entry == vm_map_to_entry(dst_map)) {
9876 vm_map_unlock(dst_map);
9877 return KERN_INVALID_ADDRESS;
9878 }
9879 size = (entry->vme_end - entry->vme_start);
9880 /*
9881 * Make sure that no holes popped up in the
9882 * address map, and that the protection is
9883 * still valid, in case the map was unlocked
9884 * earlier.
9885 */
9886
9887 if ((entry->vme_start != start) || ((entry->is_sub_map)
9888 && !entry->needs_copy)) {
9889 vm_map_unlock(dst_map);
9890 return KERN_INVALID_ADDRESS;
9891 }
9892 assert(entry != vm_map_to_entry(dst_map));
9893
9894 /*
9895 * Check protection again
9896 */
9897
9898 if (!(entry->protection & VM_PROT_WRITE)) {
9899 vm_map_unlock(dst_map);
9900 return KERN_PROTECTION_FAILURE;
9901 }
9902
9903 /*
9904 * Adjust to source size first
9905 */
9906
9907 if (copy_size < size) {
9908 if (entry->map_aligned &&
9909 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9910 VM_MAP_PAGE_MASK(dst_map))) {
9911 /* no longer map-aligned */
9912 entry->map_aligned = FALSE;
9913 }
9914 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9915 size = copy_size;
9916 }
9917
9918 /*
9919 * Adjust to destination size
9920 */
9921
9922 if (size < copy_size) {
9923 vm_map_copy_clip_end(copy, copy_entry,
9924 copy_entry->vme_start + size);
9925 copy_size = size;
9926 }
9927
9928 assert((entry->vme_end - entry->vme_start) == size);
9929 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9930 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9931
9932 /*
9933 * If the destination contains temporary unshared memory,
9934 * we can perform the copy by throwing it away and
9935 * installing the source data.
9936 */
9937
9938 object = VME_OBJECT(entry);
9939 if ((!entry->is_shared &&
9940 ((object == VM_OBJECT_NULL) ||
9941 (object->internal && !object->true_share))) ||
9942 entry->needs_copy) {
9943 vm_object_t old_object = VME_OBJECT(entry);
9944 vm_object_offset_t old_offset = VME_OFFSET(entry);
9945 vm_object_offset_t offset;
9946
9947 /*
9948 * Ensure that the source and destination aren't
9949 * identical
9950 */
9951 if (old_object == VME_OBJECT(copy_entry) &&
9952 old_offset == VME_OFFSET(copy_entry)) {
9953 vm_map_copy_entry_unlink(copy, copy_entry);
9954 vm_map_copy_entry_dispose(copy, copy_entry);
9955
9956 if (old_object != VM_OBJECT_NULL) {
9957 vm_object_deallocate(old_object);
9958 }
9959
9960 start = tmp_entry->vme_end;
9961 tmp_entry = tmp_entry->vme_next;
9962 continue;
9963 }
9964
9965 #if !CONFIG_EMBEDDED
9966 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9967 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9968 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9969 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9970 copy_size <= __TRADEOFF1_COPY_SIZE) {
9971 /*
9972 * Virtual vs. Physical copy tradeoff #1.
9973 *
9974 * Copying only a few pages out of a large
9975 * object: do a physical copy instead of
9976 * a virtual copy, to avoid possibly keeping
9977 * the entire large object alive because of
9978 * those few copy-on-write pages.
9979 */
9980 vm_map_copy_overwrite_aligned_src_large++;
9981 goto slow_copy;
9982 }
9983 #endif /* !CONFIG_EMBEDDED */
9984
9985 if ((dst_map->pmap != kernel_pmap) &&
9986 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9987 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9988 vm_object_t new_object, new_shadow;
9989
9990 /*
9991 * We're about to map something over a mapping
9992 * established by malloc()...
9993 */
9994 new_object = VME_OBJECT(copy_entry);
9995 if (new_object != VM_OBJECT_NULL) {
9996 vm_object_lock_shared(new_object);
9997 }
9998 while (new_object != VM_OBJECT_NULL &&
9999 #if !CONFIG_EMBEDDED
10000 !new_object->true_share &&
10001 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10002 #endif /* !CONFIG_EMBEDDED */
10003 new_object->internal) {
10004 new_shadow = new_object->shadow;
10005 if (new_shadow == VM_OBJECT_NULL) {
10006 break;
10007 }
10008 vm_object_lock_shared(new_shadow);
10009 vm_object_unlock(new_object);
10010 new_object = new_shadow;
10011 }
10012 if (new_object != VM_OBJECT_NULL) {
10013 if (!new_object->internal) {
10014 /*
10015 * The new mapping is backed
10016 * by an external object. We
10017 * don't want malloc'ed memory
10018 * to be replaced with such a
10019 * non-anonymous mapping, so
10020 * let's go off the optimized
10021 * path...
10022 */
10023 vm_map_copy_overwrite_aligned_src_not_internal++;
10024 vm_object_unlock(new_object);
10025 goto slow_copy;
10026 }
10027 #if !CONFIG_EMBEDDED
10028 if (new_object->true_share ||
10029 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10030 /*
10031 * Same if there's a "true_share"
10032 * object in the shadow chain, or
10033 * an object with a non-default
10034 * (SYMMETRIC) copy strategy.
10035 */
10036 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10037 vm_object_unlock(new_object);
10038 goto slow_copy;
10039 }
10040 #endif /* !CONFIG_EMBEDDED */
10041 vm_object_unlock(new_object);
10042 }
10043 /*
10044 * The new mapping is still backed by
10045 * anonymous (internal) memory, so it's
10046 * OK to substitute it for the original
10047 * malloc() mapping.
10048 */
10049 }
10050
10051 if (old_object != VM_OBJECT_NULL) {
10052 if (entry->is_sub_map) {
10053 if (entry->use_pmap) {
10054 #ifndef NO_NESTED_PMAP
10055 pmap_unnest(dst_map->pmap,
10056 (addr64_t)entry->vme_start,
10057 entry->vme_end - entry->vme_start);
10058 #endif /* NO_NESTED_PMAP */
10059 if (dst_map->mapped_in_other_pmaps) {
10060 /* clean up parent */
10061 /* map/maps */
10062 vm_map_submap_pmap_clean(
10063 dst_map, entry->vme_start,
10064 entry->vme_end,
10065 VME_SUBMAP(entry),
10066 VME_OFFSET(entry));
10067 }
10068 } else {
10069 vm_map_submap_pmap_clean(
10070 dst_map, entry->vme_start,
10071 entry->vme_end,
10072 VME_SUBMAP(entry),
10073 VME_OFFSET(entry));
10074 }
10075 vm_map_deallocate(VME_SUBMAP(entry));
10076 } else {
10077 if (dst_map->mapped_in_other_pmaps) {
10078 vm_object_pmap_protect_options(
10079 VME_OBJECT(entry),
10080 VME_OFFSET(entry),
10081 entry->vme_end
10082 - entry->vme_start,
10083 PMAP_NULL,
10084 entry->vme_start,
10085 VM_PROT_NONE,
10086 PMAP_OPTIONS_REMOVE);
10087 } else {
10088 pmap_remove_options(
10089 dst_map->pmap,
10090 (addr64_t)(entry->vme_start),
10091 (addr64_t)(entry->vme_end),
10092 PMAP_OPTIONS_REMOVE);
10093 }
10094 vm_object_deallocate(old_object);
10095 }
10096 }
10097
10098 if (entry->iokit_acct) {
10099 /* keep using iokit accounting */
10100 entry->use_pmap = FALSE;
10101 } else {
10102 /* use pmap accounting */
10103 entry->use_pmap = TRUE;
10104 }
10105 entry->is_sub_map = FALSE;
10106 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10107 object = VME_OBJECT(entry);
10108 entry->needs_copy = copy_entry->needs_copy;
10109 entry->wired_count = 0;
10110 entry->user_wired_count = 0;
10111 offset = VME_OFFSET(copy_entry);
10112 VME_OFFSET_SET(entry, offset);
10113
10114 vm_map_copy_entry_unlink(copy, copy_entry);
10115 vm_map_copy_entry_dispose(copy, copy_entry);
10116
10117 /*
10118 * we could try to push pages into the pmap at this point, BUT
10119 * this optimization only saved on average 2 us per page if ALL
10120 * the pages in the source were currently mapped
10121 * and ALL the pages in the dest were touched, if there were fewer
10122 * than 2/3 of the pages touched, this optimization actually cost more cycles
10123 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10124 */
10125
10126 /*
10127 * Set up for the next iteration. The map
10128 * has not been unlocked, so the next
10129 * address should be at the end of this
10130 * entry, and the next map entry should be
10131 * the one following it.
10132 */
10133
10134 start = tmp_entry->vme_end;
10135 tmp_entry = tmp_entry->vme_next;
10136 } else {
10137 vm_map_version_t version;
10138 vm_object_t dst_object;
10139 vm_object_offset_t dst_offset;
10140 kern_return_t r;
10141
10142 slow_copy:
10143 if (entry->needs_copy) {
10144 VME_OBJECT_SHADOW(entry,
10145 (entry->vme_end -
10146 entry->vme_start));
10147 entry->needs_copy = FALSE;
10148 }
10149
10150 dst_object = VME_OBJECT(entry);
10151 dst_offset = VME_OFFSET(entry);
10152
10153 /*
10154 * Take an object reference, and record
10155 * the map version information so that the
10156 * map can be safely unlocked.
10157 */
10158
10159 if (dst_object == VM_OBJECT_NULL) {
10160 /*
10161 * We would usually have just taken the
10162 * optimized path above if the destination
10163 * object has not been allocated yet. But we
10164 * now disable that optimization if the copy
10165 * entry's object is not backed by anonymous
10166 * memory to avoid replacing malloc'ed
10167 * (i.e. re-usable) anonymous memory with a
10168 * not-so-anonymous mapping.
10169 * So we have to handle this case here and
10170 * allocate a new VM object for this map entry.
10171 */
10172 dst_object = vm_object_allocate(
10173 entry->vme_end - entry->vme_start);
10174 dst_offset = 0;
10175 VME_OBJECT_SET(entry, dst_object);
10176 VME_OFFSET_SET(entry, dst_offset);
10177 assert(entry->use_pmap);
10178 }
10179
10180 vm_object_reference(dst_object);
10181
10182 /* account for unlock bumping up timestamp */
10183 version.main_timestamp = dst_map->timestamp + 1;
10184
10185 vm_map_unlock(dst_map);
10186
10187 /*
10188 * Copy as much as possible in one pass
10189 */
10190
10191 copy_size = size;
10192 r = vm_fault_copy(
10193 VME_OBJECT(copy_entry),
10194 VME_OFFSET(copy_entry),
10195 &copy_size,
10196 dst_object,
10197 dst_offset,
10198 dst_map,
10199 &version,
10200 THREAD_UNINT );
10201
10202 /*
10203 * Release the object reference
10204 */
10205
10206 vm_object_deallocate(dst_object);
10207
10208 /*
10209 * If a hard error occurred, return it now
10210 */
10211
10212 if (r != KERN_SUCCESS) {
10213 return r;
10214 }
10215
10216 if (copy_size != 0) {
10217 /*
10218 * Dispose of the copied region
10219 */
10220
10221 vm_map_copy_clip_end(copy, copy_entry,
10222 copy_entry->vme_start + copy_size);
10223 vm_map_copy_entry_unlink(copy, copy_entry);
10224 vm_object_deallocate(VME_OBJECT(copy_entry));
10225 vm_map_copy_entry_dispose(copy, copy_entry);
10226 }
10227
10228 /*
10229 * Pick up in the destination map where we left off.
10230 *
10231 * Use the version information to avoid a lookup
10232 * in the normal case.
10233 */
10234
10235 start += copy_size;
10236 vm_map_lock(dst_map);
10237 if (version.main_timestamp == dst_map->timestamp &&
10238 copy_size != 0) {
10239 /* We can safely use saved tmp_entry value */
10240
10241 if (tmp_entry->map_aligned &&
10242 !VM_MAP_PAGE_ALIGNED(
10243 start,
10244 VM_MAP_PAGE_MASK(dst_map))) {
10245 /* no longer map-aligned */
10246 tmp_entry->map_aligned = FALSE;
10247 }
10248 vm_map_clip_end(dst_map, tmp_entry, start);
10249 tmp_entry = tmp_entry->vme_next;
10250 } else {
10251 /* Must do lookup of tmp_entry */
10252
10253 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10254 vm_map_unlock(dst_map);
10255 return KERN_INVALID_ADDRESS;
10256 }
10257 if (tmp_entry->map_aligned &&
10258 !VM_MAP_PAGE_ALIGNED(
10259 start,
10260 VM_MAP_PAGE_MASK(dst_map))) {
10261 /* no longer map-aligned */
10262 tmp_entry->map_aligned = FALSE;
10263 }
10264 vm_map_clip_start(dst_map, tmp_entry, start);
10265 }
10266 }
10267 }/* while */
10268
10269 return KERN_SUCCESS;
10270 }/* vm_map_copy_overwrite_aligned */
10271
10272 /*
10273 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10274 *
10275 * Description:
10276 * Copy in data to a kernel buffer from space in the
10277 * source map. The original space may be optionally
10278 * deallocated.
10279 *
10280 * If successful, returns a new copy object.
10281 */
10282 static kern_return_t
10283 vm_map_copyin_kernel_buffer(
10284 vm_map_t src_map,
10285 vm_map_offset_t src_addr,
10286 vm_map_size_t len,
10287 boolean_t src_destroy,
10288 vm_map_copy_t *copy_result)
10289 {
10290 kern_return_t kr;
10291 vm_map_copy_t copy;
10292 vm_size_t kalloc_size;
10293
10294 if (len > msg_ool_size_small) {
10295 return KERN_INVALID_ARGUMENT;
10296 }
10297
10298 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10299
10300 copy = (vm_map_copy_t)kalloc(kalloc_size);
10301 if (copy == VM_MAP_COPY_NULL) {
10302 return KERN_RESOURCE_SHORTAGE;
10303 }
10304 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10305 copy->size = len;
10306 copy->offset = 0;
10307
10308 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10309 if (kr != KERN_SUCCESS) {
10310 kfree(copy, kalloc_size);
10311 return kr;
10312 }
10313 if (src_destroy) {
10314 (void) vm_map_remove(
10315 src_map,
10316 vm_map_trunc_page(src_addr,
10317 VM_MAP_PAGE_MASK(src_map)),
10318 vm_map_round_page(src_addr + len,
10319 VM_MAP_PAGE_MASK(src_map)),
10320 (VM_MAP_REMOVE_INTERRUPTIBLE |
10321 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10322 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10323 }
10324 *copy_result = copy;
10325 return KERN_SUCCESS;
10326 }
10327
10328 /*
10329 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10330 *
10331 * Description:
10332 * Copy out data from a kernel buffer into space in the
10333 * destination map. The space may be otpionally dynamically
10334 * allocated.
10335 *
10336 * If successful, consumes the copy object.
10337 * Otherwise, the caller is responsible for it.
10338 */
10339 static int vm_map_copyout_kernel_buffer_failures = 0;
10340 static kern_return_t
10341 vm_map_copyout_kernel_buffer(
10342 vm_map_t map,
10343 vm_map_address_t *addr, /* IN/OUT */
10344 vm_map_copy_t copy,
10345 vm_map_size_t copy_size,
10346 boolean_t overwrite,
10347 boolean_t consume_on_success)
10348 {
10349 kern_return_t kr = KERN_SUCCESS;
10350 thread_t thread = current_thread();
10351
10352 assert(copy->size == copy_size);
10353
10354 /*
10355 * check for corrupted vm_map_copy structure
10356 */
10357 if (copy_size > msg_ool_size_small || copy->offset) {
10358 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10359 (long long)copy->size, (long long)copy->offset);
10360 }
10361
10362 if (!overwrite) {
10363 /*
10364 * Allocate space in the target map for the data
10365 */
10366 *addr = 0;
10367 kr = vm_map_enter(map,
10368 addr,
10369 vm_map_round_page(copy_size,
10370 VM_MAP_PAGE_MASK(map)),
10371 (vm_map_offset_t) 0,
10372 VM_FLAGS_ANYWHERE,
10373 VM_MAP_KERNEL_FLAGS_NONE,
10374 VM_KERN_MEMORY_NONE,
10375 VM_OBJECT_NULL,
10376 (vm_object_offset_t) 0,
10377 FALSE,
10378 VM_PROT_DEFAULT,
10379 VM_PROT_ALL,
10380 VM_INHERIT_DEFAULT);
10381 if (kr != KERN_SUCCESS) {
10382 return kr;
10383 }
10384 #if KASAN
10385 if (map->pmap == kernel_pmap) {
10386 kasan_notify_address(*addr, copy->size);
10387 }
10388 #endif
10389 }
10390
10391 /*
10392 * Copyout the data from the kernel buffer to the target map.
10393 */
10394 if (thread->map == map) {
10395 /*
10396 * If the target map is the current map, just do
10397 * the copy.
10398 */
10399 assert((vm_size_t)copy_size == copy_size);
10400 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10401 kr = KERN_INVALID_ADDRESS;
10402 }
10403 } else {
10404 vm_map_t oldmap;
10405
10406 /*
10407 * If the target map is another map, assume the
10408 * target's address space identity for the duration
10409 * of the copy.
10410 */
10411 vm_map_reference(map);
10412 oldmap = vm_map_switch(map);
10413
10414 assert((vm_size_t)copy_size == copy_size);
10415 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10416 vm_map_copyout_kernel_buffer_failures++;
10417 kr = KERN_INVALID_ADDRESS;
10418 }
10419
10420 (void) vm_map_switch(oldmap);
10421 vm_map_deallocate(map);
10422 }
10423
10424 if (kr != KERN_SUCCESS) {
10425 /* the copy failed, clean up */
10426 if (!overwrite) {
10427 /*
10428 * Deallocate the space we allocated in the target map.
10429 */
10430 (void) vm_map_remove(
10431 map,
10432 vm_map_trunc_page(*addr,
10433 VM_MAP_PAGE_MASK(map)),
10434 vm_map_round_page((*addr +
10435 vm_map_round_page(copy_size,
10436 VM_MAP_PAGE_MASK(map))),
10437 VM_MAP_PAGE_MASK(map)),
10438 VM_MAP_REMOVE_NO_FLAGS);
10439 *addr = 0;
10440 }
10441 } else {
10442 /* copy was successful, dicard the copy structure */
10443 if (consume_on_success) {
10444 kfree(copy, copy_size + cpy_kdata_hdr_sz);
10445 }
10446 }
10447
10448 return kr;
10449 }
10450
10451 /*
10452 * Routine: vm_map_copy_insert [internal use only]
10453 *
10454 * Description:
10455 * Link a copy chain ("copy") into a map at the
10456 * specified location (after "where").
10457 * Side effects:
10458 * The copy chain is destroyed.
10459 */
10460 static void
10461 vm_map_copy_insert(
10462 vm_map_t map,
10463 vm_map_entry_t after_where,
10464 vm_map_copy_t copy)
10465 {
10466 vm_map_entry_t entry;
10467
10468 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10469 entry = vm_map_copy_first_entry(copy);
10470 vm_map_copy_entry_unlink(copy, entry);
10471 vm_map_store_entry_link(map, after_where, entry,
10472 VM_MAP_KERNEL_FLAGS_NONE);
10473 after_where = entry;
10474 }
10475 zfree(vm_map_copy_zone, copy);
10476 }
10477
10478 void
10479 vm_map_copy_remap(
10480 vm_map_t map,
10481 vm_map_entry_t where,
10482 vm_map_copy_t copy,
10483 vm_map_offset_t adjustment,
10484 vm_prot_t cur_prot,
10485 vm_prot_t max_prot,
10486 vm_inherit_t inheritance)
10487 {
10488 vm_map_entry_t copy_entry, new_entry;
10489
10490 for (copy_entry = vm_map_copy_first_entry(copy);
10491 copy_entry != vm_map_copy_to_entry(copy);
10492 copy_entry = copy_entry->vme_next) {
10493 /* get a new VM map entry for the map */
10494 new_entry = vm_map_entry_create(map,
10495 !map->hdr.entries_pageable);
10496 /* copy the "copy entry" to the new entry */
10497 vm_map_entry_copy(new_entry, copy_entry);
10498 /* adjust "start" and "end" */
10499 new_entry->vme_start += adjustment;
10500 new_entry->vme_end += adjustment;
10501 /* clear some attributes */
10502 new_entry->inheritance = inheritance;
10503 new_entry->protection = cur_prot;
10504 new_entry->max_protection = max_prot;
10505 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10506 /* take an extra reference on the entry's "object" */
10507 if (new_entry->is_sub_map) {
10508 assert(!new_entry->use_pmap); /* not nested */
10509 vm_map_lock(VME_SUBMAP(new_entry));
10510 vm_map_reference(VME_SUBMAP(new_entry));
10511 vm_map_unlock(VME_SUBMAP(new_entry));
10512 } else {
10513 vm_object_reference(VME_OBJECT(new_entry));
10514 }
10515 /* insert the new entry in the map */
10516 vm_map_store_entry_link(map, where, new_entry,
10517 VM_MAP_KERNEL_FLAGS_NONE);
10518 /* continue inserting the "copy entries" after the new entry */
10519 where = new_entry;
10520 }
10521 }
10522
10523
10524 /*
10525 * Returns true if *size matches (or is in the range of) copy->size.
10526 * Upon returning true, the *size field is updated with the actual size of the
10527 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10528 */
10529 boolean_t
10530 vm_map_copy_validate_size(
10531 vm_map_t dst_map,
10532 vm_map_copy_t copy,
10533 vm_map_size_t *size)
10534 {
10535 if (copy == VM_MAP_COPY_NULL) {
10536 return FALSE;
10537 }
10538 vm_map_size_t copy_sz = copy->size;
10539 vm_map_size_t sz = *size;
10540 switch (copy->type) {
10541 case VM_MAP_COPY_OBJECT:
10542 case VM_MAP_COPY_KERNEL_BUFFER:
10543 if (sz == copy_sz) {
10544 return TRUE;
10545 }
10546 break;
10547 case VM_MAP_COPY_ENTRY_LIST:
10548 /*
10549 * potential page-size rounding prevents us from exactly
10550 * validating this flavor of vm_map_copy, but we can at least
10551 * assert that it's within a range.
10552 */
10553 if (copy_sz >= sz &&
10554 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10555 *size = copy_sz;
10556 return TRUE;
10557 }
10558 break;
10559 default:
10560 break;
10561 }
10562 return FALSE;
10563 }
10564
10565 /*
10566 * Routine: vm_map_copyout_size
10567 *
10568 * Description:
10569 * Copy out a copy chain ("copy") into newly-allocated
10570 * space in the destination map. Uses a prevalidated
10571 * size for the copy object (vm_map_copy_validate_size).
10572 *
10573 * If successful, consumes the copy object.
10574 * Otherwise, the caller is responsible for it.
10575 */
10576 kern_return_t
10577 vm_map_copyout_size(
10578 vm_map_t dst_map,
10579 vm_map_address_t *dst_addr, /* OUT */
10580 vm_map_copy_t copy,
10581 vm_map_size_t copy_size)
10582 {
10583 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10584 TRUE, /* consume_on_success */
10585 VM_PROT_DEFAULT,
10586 VM_PROT_ALL,
10587 VM_INHERIT_DEFAULT);
10588 }
10589
10590 /*
10591 * Routine: vm_map_copyout
10592 *
10593 * Description:
10594 * Copy out a copy chain ("copy") into newly-allocated
10595 * space in the destination map.
10596 *
10597 * If successful, consumes the copy object.
10598 * Otherwise, the caller is responsible for it.
10599 */
10600 kern_return_t
10601 vm_map_copyout(
10602 vm_map_t dst_map,
10603 vm_map_address_t *dst_addr, /* OUT */
10604 vm_map_copy_t copy)
10605 {
10606 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10607 TRUE, /* consume_on_success */
10608 VM_PROT_DEFAULT,
10609 VM_PROT_ALL,
10610 VM_INHERIT_DEFAULT);
10611 }
10612
10613 kern_return_t
10614 vm_map_copyout_internal(
10615 vm_map_t dst_map,
10616 vm_map_address_t *dst_addr, /* OUT */
10617 vm_map_copy_t copy,
10618 vm_map_size_t copy_size,
10619 boolean_t consume_on_success,
10620 vm_prot_t cur_protection,
10621 vm_prot_t max_protection,
10622 vm_inherit_t inheritance)
10623 {
10624 vm_map_size_t size;
10625 vm_map_size_t adjustment;
10626 vm_map_offset_t start;
10627 vm_object_offset_t vm_copy_start;
10628 vm_map_entry_t last;
10629 vm_map_entry_t entry;
10630 vm_map_entry_t hole_entry;
10631
10632 /*
10633 * Check for null copy object.
10634 */
10635
10636 if (copy == VM_MAP_COPY_NULL) {
10637 *dst_addr = 0;
10638 return KERN_SUCCESS;
10639 }
10640
10641 if (copy->size != copy_size) {
10642 *dst_addr = 0;
10643 return KERN_FAILURE;
10644 }
10645
10646 /*
10647 * Check for special copy object, created
10648 * by vm_map_copyin_object.
10649 */
10650
10651 if (copy->type == VM_MAP_COPY_OBJECT) {
10652 vm_object_t object = copy->cpy_object;
10653 kern_return_t kr;
10654 vm_object_offset_t offset;
10655
10656 offset = vm_object_trunc_page(copy->offset);
10657 size = vm_map_round_page((copy_size +
10658 (vm_map_size_t)(copy->offset -
10659 offset)),
10660 VM_MAP_PAGE_MASK(dst_map));
10661 *dst_addr = 0;
10662 kr = vm_map_enter(dst_map, dst_addr, size,
10663 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10664 VM_MAP_KERNEL_FLAGS_NONE,
10665 VM_KERN_MEMORY_NONE,
10666 object, offset, FALSE,
10667 VM_PROT_DEFAULT, VM_PROT_ALL,
10668 VM_INHERIT_DEFAULT);
10669 if (kr != KERN_SUCCESS) {
10670 return kr;
10671 }
10672 /* Account for non-pagealigned copy object */
10673 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10674 if (consume_on_success) {
10675 zfree(vm_map_copy_zone, copy);
10676 }
10677 return KERN_SUCCESS;
10678 }
10679
10680 /*
10681 * Check for special kernel buffer allocated
10682 * by new_ipc_kmsg_copyin.
10683 */
10684
10685 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10686 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10687 copy, copy_size, FALSE,
10688 consume_on_success);
10689 }
10690
10691
10692 /*
10693 * Find space for the data
10694 */
10695
10696 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10697 VM_MAP_COPY_PAGE_MASK(copy));
10698 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10699 VM_MAP_COPY_PAGE_MASK(copy))
10700 - vm_copy_start;
10701
10702
10703 StartAgain:;
10704
10705 vm_map_lock(dst_map);
10706 if (dst_map->disable_vmentry_reuse == TRUE) {
10707 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10708 last = entry;
10709 } else {
10710 if (dst_map->holelistenabled) {
10711 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10712
10713 if (hole_entry == NULL) {
10714 /*
10715 * No more space in the map?
10716 */
10717 vm_map_unlock(dst_map);
10718 return KERN_NO_SPACE;
10719 }
10720
10721 last = hole_entry;
10722 start = last->vme_start;
10723 } else {
10724 assert(first_free_is_valid(dst_map));
10725 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10726 vm_map_min(dst_map) : last->vme_end;
10727 }
10728 start = vm_map_round_page(start,
10729 VM_MAP_PAGE_MASK(dst_map));
10730 }
10731
10732 while (TRUE) {
10733 vm_map_entry_t next = last->vme_next;
10734 vm_map_offset_t end = start + size;
10735
10736 if ((end > dst_map->max_offset) || (end < start)) {
10737 if (dst_map->wait_for_space) {
10738 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10739 assert_wait((event_t) dst_map,
10740 THREAD_INTERRUPTIBLE);
10741 vm_map_unlock(dst_map);
10742 thread_block(THREAD_CONTINUE_NULL);
10743 goto StartAgain;
10744 }
10745 }
10746 vm_map_unlock(dst_map);
10747 return KERN_NO_SPACE;
10748 }
10749
10750 if (dst_map->holelistenabled) {
10751 if (last->vme_end >= end) {
10752 break;
10753 }
10754 } else {
10755 /*
10756 * If there are no more entries, we must win.
10757 *
10758 * OR
10759 *
10760 * If there is another entry, it must be
10761 * after the end of the potential new region.
10762 */
10763
10764 if (next == vm_map_to_entry(dst_map)) {
10765 break;
10766 }
10767
10768 if (next->vme_start >= end) {
10769 break;
10770 }
10771 }
10772
10773 last = next;
10774
10775 if (dst_map->holelistenabled) {
10776 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10777 /*
10778 * Wrapped around
10779 */
10780 vm_map_unlock(dst_map);
10781 return KERN_NO_SPACE;
10782 }
10783 start = last->vme_start;
10784 } else {
10785 start = last->vme_end;
10786 }
10787 start = vm_map_round_page(start,
10788 VM_MAP_PAGE_MASK(dst_map));
10789 }
10790
10791 if (dst_map->holelistenabled) {
10792 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10793 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10794 }
10795 }
10796
10797
10798 adjustment = start - vm_copy_start;
10799 if (!consume_on_success) {
10800 /*
10801 * We're not allowed to consume "copy", so we'll have to
10802 * copy its map entries into the destination map below.
10803 * No need to re-allocate map entries from the correct
10804 * (pageable or not) zone, since we'll get new map entries
10805 * during the transfer.
10806 * We'll also adjust the map entries's "start" and "end"
10807 * during the transfer, to keep "copy"'s entries consistent
10808 * with its "offset".
10809 */
10810 goto after_adjustments;
10811 }
10812
10813 /*
10814 * Since we're going to just drop the map
10815 * entries from the copy into the destination
10816 * map, they must come from the same pool.
10817 */
10818
10819 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10820 /*
10821 * Mismatches occur when dealing with the default
10822 * pager.
10823 */
10824 zone_t old_zone;
10825 vm_map_entry_t next, new;
10826
10827 /*
10828 * Find the zone that the copies were allocated from
10829 */
10830
10831 entry = vm_map_copy_first_entry(copy);
10832
10833 /*
10834 * Reinitialize the copy so that vm_map_copy_entry_link
10835 * will work.
10836 */
10837 vm_map_store_copy_reset(copy, entry);
10838 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10839
10840 /*
10841 * Copy each entry.
10842 */
10843 while (entry != vm_map_copy_to_entry(copy)) {
10844 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10845 vm_map_entry_copy_full(new, entry);
10846 new->vme_no_copy_on_read = FALSE;
10847 assert(!new->iokit_acct);
10848 if (new->is_sub_map) {
10849 /* clr address space specifics */
10850 new->use_pmap = FALSE;
10851 }
10852 vm_map_copy_entry_link(copy,
10853 vm_map_copy_last_entry(copy),
10854 new);
10855 next = entry->vme_next;
10856 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10857 zfree(old_zone, entry);
10858 entry = next;
10859 }
10860 }
10861
10862 /*
10863 * Adjust the addresses in the copy chain, and
10864 * reset the region attributes.
10865 */
10866
10867 for (entry = vm_map_copy_first_entry(copy);
10868 entry != vm_map_copy_to_entry(copy);
10869 entry = entry->vme_next) {
10870 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10871 /*
10872 * We're injecting this copy entry into a map that
10873 * has the standard page alignment, so clear
10874 * "map_aligned" (which might have been inherited
10875 * from the original map entry).
10876 */
10877 entry->map_aligned = FALSE;
10878 }
10879
10880 entry->vme_start += adjustment;
10881 entry->vme_end += adjustment;
10882
10883 if (entry->map_aligned) {
10884 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10885 VM_MAP_PAGE_MASK(dst_map)));
10886 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10887 VM_MAP_PAGE_MASK(dst_map)));
10888 }
10889
10890 entry->inheritance = VM_INHERIT_DEFAULT;
10891 entry->protection = VM_PROT_DEFAULT;
10892 entry->max_protection = VM_PROT_ALL;
10893 entry->behavior = VM_BEHAVIOR_DEFAULT;
10894
10895 /*
10896 * If the entry is now wired,
10897 * map the pages into the destination map.
10898 */
10899 if (entry->wired_count != 0) {
10900 vm_map_offset_t va;
10901 vm_object_offset_t offset;
10902 vm_object_t object;
10903 vm_prot_t prot;
10904 int type_of_fault;
10905
10906 object = VME_OBJECT(entry);
10907 offset = VME_OFFSET(entry);
10908 va = entry->vme_start;
10909
10910 pmap_pageable(dst_map->pmap,
10911 entry->vme_start,
10912 entry->vme_end,
10913 TRUE);
10914
10915 while (va < entry->vme_end) {
10916 vm_page_t m;
10917 struct vm_object_fault_info fault_info = {};
10918
10919 /*
10920 * Look up the page in the object.
10921 * Assert that the page will be found in the
10922 * top object:
10923 * either
10924 * the object was newly created by
10925 * vm_object_copy_slowly, and has
10926 * copies of all of the pages from
10927 * the source object
10928 * or
10929 * the object was moved from the old
10930 * map entry; because the old map
10931 * entry was wired, all of the pages
10932 * were in the top-level object.
10933 * (XXX not true if we wire pages for
10934 * reading)
10935 */
10936 vm_object_lock(object);
10937
10938 m = vm_page_lookup(object, offset);
10939 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10940 m->vmp_absent) {
10941 panic("vm_map_copyout: wiring %p", m);
10942 }
10943
10944 prot = entry->protection;
10945
10946 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10947 prot) {
10948 prot |= VM_PROT_EXECUTE;
10949 }
10950
10951 type_of_fault = DBG_CACHE_HIT_FAULT;
10952
10953 fault_info.user_tag = VME_ALIAS(entry);
10954 fault_info.pmap_options = 0;
10955 if (entry->iokit_acct ||
10956 (!entry->is_sub_map && !entry->use_pmap)) {
10957 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10958 }
10959
10960 vm_fault_enter(m,
10961 dst_map->pmap,
10962 va,
10963 prot,
10964 prot,
10965 VM_PAGE_WIRED(m),
10966 FALSE, /* change_wiring */
10967 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10968 &fault_info,
10969 NULL, /* need_retry */
10970 &type_of_fault);
10971
10972 vm_object_unlock(object);
10973
10974 offset += PAGE_SIZE_64;
10975 va += PAGE_SIZE;
10976 }
10977 }
10978 }
10979
10980 after_adjustments:
10981
10982 /*
10983 * Correct the page alignment for the result
10984 */
10985
10986 *dst_addr = start + (copy->offset - vm_copy_start);
10987
10988 #if KASAN
10989 kasan_notify_address(*dst_addr, size);
10990 #endif
10991
10992 /*
10993 * Update the hints and the map size
10994 */
10995
10996 if (consume_on_success) {
10997 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10998 } else {
10999 SAVE_HINT_MAP_WRITE(dst_map, last);
11000 }
11001
11002 dst_map->size += size;
11003
11004 /*
11005 * Link in the copy
11006 */
11007
11008 if (consume_on_success) {
11009 vm_map_copy_insert(dst_map, last, copy);
11010 } else {
11011 vm_map_copy_remap(dst_map, last, copy, adjustment,
11012 cur_protection, max_protection,
11013 inheritance);
11014 }
11015
11016 vm_map_unlock(dst_map);
11017
11018 /*
11019 * XXX If wiring_required, call vm_map_pageable
11020 */
11021
11022 return KERN_SUCCESS;
11023 }
11024
11025 /*
11026 * Routine: vm_map_copyin
11027 *
11028 * Description:
11029 * see vm_map_copyin_common. Exported via Unsupported.exports.
11030 *
11031 */
11032
11033 #undef vm_map_copyin
11034
11035 kern_return_t
11036 vm_map_copyin(
11037 vm_map_t src_map,
11038 vm_map_address_t src_addr,
11039 vm_map_size_t len,
11040 boolean_t src_destroy,
11041 vm_map_copy_t *copy_result) /* OUT */
11042 {
11043 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11044 FALSE, copy_result, FALSE);
11045 }
11046
11047 /*
11048 * Routine: vm_map_copyin_common
11049 *
11050 * Description:
11051 * Copy the specified region (src_addr, len) from the
11052 * source address space (src_map), possibly removing
11053 * the region from the source address space (src_destroy).
11054 *
11055 * Returns:
11056 * A vm_map_copy_t object (copy_result), suitable for
11057 * insertion into another address space (using vm_map_copyout),
11058 * copying over another address space region (using
11059 * vm_map_copy_overwrite). If the copy is unused, it
11060 * should be destroyed (using vm_map_copy_discard).
11061 *
11062 * In/out conditions:
11063 * The source map should not be locked on entry.
11064 */
11065
11066 typedef struct submap_map {
11067 vm_map_t parent_map;
11068 vm_map_offset_t base_start;
11069 vm_map_offset_t base_end;
11070 vm_map_size_t base_len;
11071 struct submap_map *next;
11072 } submap_map_t;
11073
11074 kern_return_t
11075 vm_map_copyin_common(
11076 vm_map_t src_map,
11077 vm_map_address_t src_addr,
11078 vm_map_size_t len,
11079 boolean_t src_destroy,
11080 __unused boolean_t src_volatile,
11081 vm_map_copy_t *copy_result, /* OUT */
11082 boolean_t use_maxprot)
11083 {
11084 int flags;
11085
11086 flags = 0;
11087 if (src_destroy) {
11088 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11089 }
11090 if (use_maxprot) {
11091 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11092 }
11093 return vm_map_copyin_internal(src_map,
11094 src_addr,
11095 len,
11096 flags,
11097 copy_result);
11098 }
11099 kern_return_t
11100 vm_map_copyin_internal(
11101 vm_map_t src_map,
11102 vm_map_address_t src_addr,
11103 vm_map_size_t len,
11104 int flags,
11105 vm_map_copy_t *copy_result) /* OUT */
11106 {
11107 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11108 * in multi-level lookup, this
11109 * entry contains the actual
11110 * vm_object/offset.
11111 */
11112 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11113
11114 vm_map_offset_t src_start; /* Start of current entry --
11115 * where copy is taking place now
11116 */
11117 vm_map_offset_t src_end; /* End of entire region to be
11118 * copied */
11119 vm_map_offset_t src_base;
11120 vm_map_t base_map = src_map;
11121 boolean_t map_share = FALSE;
11122 submap_map_t *parent_maps = NULL;
11123
11124 vm_map_copy_t copy; /* Resulting copy */
11125 vm_map_address_t copy_addr;
11126 vm_map_size_t copy_size;
11127 boolean_t src_destroy;
11128 boolean_t use_maxprot;
11129 boolean_t preserve_purgeable;
11130 boolean_t entry_was_shared;
11131 vm_map_entry_t saved_src_entry;
11132
11133 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11134 return KERN_INVALID_ARGUMENT;
11135 }
11136
11137 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11138 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11139 preserve_purgeable =
11140 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11141
11142 /*
11143 * Check for copies of zero bytes.
11144 */
11145
11146 if (len == 0) {
11147 *copy_result = VM_MAP_COPY_NULL;
11148 return KERN_SUCCESS;
11149 }
11150
11151 /*
11152 * Check that the end address doesn't overflow
11153 */
11154 src_end = src_addr + len;
11155 if (src_end < src_addr) {
11156 return KERN_INVALID_ADDRESS;
11157 }
11158
11159 /*
11160 * Compute (page aligned) start and end of region
11161 */
11162 src_start = vm_map_trunc_page(src_addr,
11163 VM_MAP_PAGE_MASK(src_map));
11164 src_end = vm_map_round_page(src_end,
11165 VM_MAP_PAGE_MASK(src_map));
11166
11167 /*
11168 * If the copy is sufficiently small, use a kernel buffer instead
11169 * of making a virtual copy. The theory being that the cost of
11170 * setting up VM (and taking C-O-W faults) dominates the copy costs
11171 * for small regions.
11172 */
11173 if ((len < msg_ool_size_small) &&
11174 !use_maxprot &&
11175 !preserve_purgeable &&
11176 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11177 /*
11178 * Since the "msg_ool_size_small" threshold was increased and
11179 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11180 * address space limits, we revert to doing a virtual copy if the
11181 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11182 * of the commpage would now fail when it used to work.
11183 */
11184 (src_start >= vm_map_min(src_map) &&
11185 src_start < vm_map_max(src_map) &&
11186 src_end >= vm_map_min(src_map) &&
11187 src_end < vm_map_max(src_map))) {
11188 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11189 src_destroy, copy_result);
11190 }
11191
11192 /*
11193 * Allocate a header element for the list.
11194 *
11195 * Use the start and end in the header to
11196 * remember the endpoints prior to rounding.
11197 */
11198
11199 copy = vm_map_copy_allocate();
11200 copy->type = VM_MAP_COPY_ENTRY_LIST;
11201 copy->cpy_hdr.entries_pageable = TRUE;
11202 #if 00
11203 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11204 #else
11205 /*
11206 * The copy entries can be broken down for a variety of reasons,
11207 * so we can't guarantee that they will remain map-aligned...
11208 * Will need to adjust the first copy_entry's "vme_start" and
11209 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11210 * rather than the original map's alignment.
11211 */
11212 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11213 #endif
11214
11215 vm_map_store_init( &(copy->cpy_hdr));
11216
11217 copy->offset = src_addr;
11218 copy->size = len;
11219
11220 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11221
11222 #define RETURN(x) \
11223 MACRO_BEGIN \
11224 vm_map_unlock(src_map); \
11225 if(src_map != base_map) \
11226 vm_map_deallocate(src_map); \
11227 if (new_entry != VM_MAP_ENTRY_NULL) \
11228 vm_map_copy_entry_dispose(copy,new_entry); \
11229 vm_map_copy_discard(copy); \
11230 { \
11231 submap_map_t *_ptr; \
11232 \
11233 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11234 parent_maps=parent_maps->next; \
11235 if (_ptr->parent_map != base_map) \
11236 vm_map_deallocate(_ptr->parent_map); \
11237 kfree(_ptr, sizeof(submap_map_t)); \
11238 } \
11239 } \
11240 MACRO_RETURN(x); \
11241 MACRO_END
11242
11243 /*
11244 * Find the beginning of the region.
11245 */
11246
11247 vm_map_lock(src_map);
11248
11249 /*
11250 * Lookup the original "src_addr" rather than the truncated
11251 * "src_start", in case "src_start" falls in a non-map-aligned
11252 * map entry *before* the map entry that contains "src_addr"...
11253 */
11254 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11255 RETURN(KERN_INVALID_ADDRESS);
11256 }
11257 if (!tmp_entry->is_sub_map) {
11258 /*
11259 * ... but clip to the map-rounded "src_start" rather than
11260 * "src_addr" to preserve map-alignment. We'll adjust the
11261 * first copy entry at the end, if needed.
11262 */
11263 vm_map_clip_start(src_map, tmp_entry, src_start);
11264 }
11265 if (src_start < tmp_entry->vme_start) {
11266 /*
11267 * Move "src_start" up to the start of the
11268 * first map entry to copy.
11269 */
11270 src_start = tmp_entry->vme_start;
11271 }
11272 /* set for later submap fix-up */
11273 copy_addr = src_start;
11274
11275 /*
11276 * Go through entries until we get to the end.
11277 */
11278
11279 while (TRUE) {
11280 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11281 vm_map_size_t src_size; /* Size of source
11282 * map entry (in both
11283 * maps)
11284 */
11285
11286 vm_object_t src_object; /* Object to copy */
11287 vm_object_offset_t src_offset;
11288
11289 boolean_t src_needs_copy; /* Should source map
11290 * be made read-only
11291 * for copy-on-write?
11292 */
11293
11294 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11295
11296 boolean_t was_wired; /* Was source wired? */
11297 vm_map_version_t version; /* Version before locks
11298 * dropped to make copy
11299 */
11300 kern_return_t result; /* Return value from
11301 * copy_strategically.
11302 */
11303 while (tmp_entry->is_sub_map) {
11304 vm_map_size_t submap_len;
11305 submap_map_t *ptr;
11306
11307 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11308 ptr->next = parent_maps;
11309 parent_maps = ptr;
11310 ptr->parent_map = src_map;
11311 ptr->base_start = src_start;
11312 ptr->base_end = src_end;
11313 submap_len = tmp_entry->vme_end - src_start;
11314 if (submap_len > (src_end - src_start)) {
11315 submap_len = src_end - src_start;
11316 }
11317 ptr->base_len = submap_len;
11318
11319 src_start -= tmp_entry->vme_start;
11320 src_start += VME_OFFSET(tmp_entry);
11321 src_end = src_start + submap_len;
11322 src_map = VME_SUBMAP(tmp_entry);
11323 vm_map_lock(src_map);
11324 /* keep an outstanding reference for all maps in */
11325 /* the parents tree except the base map */
11326 vm_map_reference(src_map);
11327 vm_map_unlock(ptr->parent_map);
11328 if (!vm_map_lookup_entry(
11329 src_map, src_start, &tmp_entry)) {
11330 RETURN(KERN_INVALID_ADDRESS);
11331 }
11332 map_share = TRUE;
11333 if (!tmp_entry->is_sub_map) {
11334 vm_map_clip_start(src_map, tmp_entry, src_start);
11335 }
11336 src_entry = tmp_entry;
11337 }
11338 /* we are now in the lowest level submap... */
11339
11340 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11341 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11342 /* This is not, supported for now.In future */
11343 /* we will need to detect the phys_contig */
11344 /* condition and then upgrade copy_slowly */
11345 /* to do physical copy from the device mem */
11346 /* based object. We can piggy-back off of */
11347 /* the was wired boolean to set-up the */
11348 /* proper handling */
11349 RETURN(KERN_PROTECTION_FAILURE);
11350 }
11351 /*
11352 * Create a new address map entry to hold the result.
11353 * Fill in the fields from the appropriate source entries.
11354 * We must unlock the source map to do this if we need
11355 * to allocate a map entry.
11356 */
11357 if (new_entry == VM_MAP_ENTRY_NULL) {
11358 version.main_timestamp = src_map->timestamp;
11359 vm_map_unlock(src_map);
11360
11361 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11362
11363 vm_map_lock(src_map);
11364 if ((version.main_timestamp + 1) != src_map->timestamp) {
11365 if (!vm_map_lookup_entry(src_map, src_start,
11366 &tmp_entry)) {
11367 RETURN(KERN_INVALID_ADDRESS);
11368 }
11369 if (!tmp_entry->is_sub_map) {
11370 vm_map_clip_start(src_map, tmp_entry, src_start);
11371 }
11372 continue; /* restart w/ new tmp_entry */
11373 }
11374 }
11375
11376 /*
11377 * Verify that the region can be read.
11378 */
11379 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11380 !use_maxprot) ||
11381 (src_entry->max_protection & VM_PROT_READ) == 0) {
11382 RETURN(KERN_PROTECTION_FAILURE);
11383 }
11384
11385 /*
11386 * Clip against the endpoints of the entire region.
11387 */
11388
11389 vm_map_clip_end(src_map, src_entry, src_end);
11390
11391 src_size = src_entry->vme_end - src_start;
11392 src_object = VME_OBJECT(src_entry);
11393 src_offset = VME_OFFSET(src_entry);
11394 was_wired = (src_entry->wired_count != 0);
11395
11396 vm_map_entry_copy(new_entry, src_entry);
11397 if (new_entry->is_sub_map) {
11398 /* clr address space specifics */
11399 new_entry->use_pmap = FALSE;
11400 } else {
11401 /*
11402 * We're dealing with a copy-on-write operation,
11403 * so the resulting mapping should not inherit the
11404 * original mapping's accounting settings.
11405 * "iokit_acct" should have been cleared in
11406 * vm_map_entry_copy().
11407 * "use_pmap" should be reset to its default (TRUE)
11408 * so that the new mapping gets accounted for in
11409 * the task's memory footprint.
11410 */
11411 assert(!new_entry->iokit_acct);
11412 new_entry->use_pmap = TRUE;
11413 }
11414
11415 /*
11416 * Attempt non-blocking copy-on-write optimizations.
11417 */
11418
11419 /*
11420 * If we are destroying the source, and the object
11421 * is internal, we could move the object reference
11422 * from the source to the copy. The copy is
11423 * copy-on-write only if the source is.
11424 * We make another reference to the object, because
11425 * destroying the source entry will deallocate it.
11426 *
11427 * This memory transfer has to be atomic, (to prevent
11428 * the VM object from being shared or copied while
11429 * it's being moved here), so we could only do this
11430 * if we won't have to unlock the VM map until the
11431 * original mapping has been fully removed.
11432 */
11433
11434 RestartCopy:
11435 if ((src_object == VM_OBJECT_NULL ||
11436 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11437 vm_object_copy_quickly(
11438 VME_OBJECT_PTR(new_entry),
11439 src_offset,
11440 src_size,
11441 &src_needs_copy,
11442 &new_entry_needs_copy)) {
11443 new_entry->needs_copy = new_entry_needs_copy;
11444
11445 /*
11446 * Handle copy-on-write obligations
11447 */
11448
11449 if (src_needs_copy && !tmp_entry->needs_copy) {
11450 vm_prot_t prot;
11451
11452 prot = src_entry->protection & ~VM_PROT_WRITE;
11453
11454 if (override_nx(src_map, VME_ALIAS(src_entry))
11455 && prot) {
11456 prot |= VM_PROT_EXECUTE;
11457 }
11458
11459 vm_object_pmap_protect(
11460 src_object,
11461 src_offset,
11462 src_size,
11463 (src_entry->is_shared ?
11464 PMAP_NULL
11465 : src_map->pmap),
11466 src_entry->vme_start,
11467 prot);
11468
11469 assert(tmp_entry->wired_count == 0);
11470 tmp_entry->needs_copy = TRUE;
11471 }
11472
11473 /*
11474 * The map has never been unlocked, so it's safe
11475 * to move to the next entry rather than doing
11476 * another lookup.
11477 */
11478
11479 goto CopySuccessful;
11480 }
11481
11482 entry_was_shared = tmp_entry->is_shared;
11483
11484 /*
11485 * Take an object reference, so that we may
11486 * release the map lock(s).
11487 */
11488
11489 assert(src_object != VM_OBJECT_NULL);
11490 vm_object_reference(src_object);
11491
11492 /*
11493 * Record the timestamp for later verification.
11494 * Unlock the map.
11495 */
11496
11497 version.main_timestamp = src_map->timestamp;
11498 vm_map_unlock(src_map); /* Increments timestamp once! */
11499 saved_src_entry = src_entry;
11500 tmp_entry = VM_MAP_ENTRY_NULL;
11501 src_entry = VM_MAP_ENTRY_NULL;
11502
11503 /*
11504 * Perform the copy
11505 */
11506
11507 if (was_wired) {
11508 CopySlowly:
11509 vm_object_lock(src_object);
11510 result = vm_object_copy_slowly(
11511 src_object,
11512 src_offset,
11513 src_size,
11514 THREAD_UNINT,
11515 VME_OBJECT_PTR(new_entry));
11516 VME_OFFSET_SET(new_entry, 0);
11517 new_entry->needs_copy = FALSE;
11518 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11519 (entry_was_shared || map_share)) {
11520 vm_object_t new_object;
11521
11522 vm_object_lock_shared(src_object);
11523 new_object = vm_object_copy_delayed(
11524 src_object,
11525 src_offset,
11526 src_size,
11527 TRUE);
11528 if (new_object == VM_OBJECT_NULL) {
11529 goto CopySlowly;
11530 }
11531
11532 VME_OBJECT_SET(new_entry, new_object);
11533 assert(new_entry->wired_count == 0);
11534 new_entry->needs_copy = TRUE;
11535 assert(!new_entry->iokit_acct);
11536 assert(new_object->purgable == VM_PURGABLE_DENY);
11537 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11538 result = KERN_SUCCESS;
11539 } else {
11540 vm_object_offset_t new_offset;
11541 new_offset = VME_OFFSET(new_entry);
11542 result = vm_object_copy_strategically(src_object,
11543 src_offset,
11544 src_size,
11545 VME_OBJECT_PTR(new_entry),
11546 &new_offset,
11547 &new_entry_needs_copy);
11548 if (new_offset != VME_OFFSET(new_entry)) {
11549 VME_OFFSET_SET(new_entry, new_offset);
11550 }
11551
11552 new_entry->needs_copy = new_entry_needs_copy;
11553 }
11554
11555 if (result == KERN_SUCCESS &&
11556 preserve_purgeable &&
11557 src_object->purgable != VM_PURGABLE_DENY) {
11558 vm_object_t new_object;
11559
11560 new_object = VME_OBJECT(new_entry);
11561 assert(new_object != src_object);
11562 vm_object_lock(new_object);
11563 assert(new_object->ref_count == 1);
11564 assert(new_object->shadow == VM_OBJECT_NULL);
11565 assert(new_object->copy == VM_OBJECT_NULL);
11566 assert(new_object->vo_owner == NULL);
11567
11568 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11569 new_object->true_share = TRUE;
11570 /* start as non-volatile with no owner... */
11571 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11572 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11573 /* ... and move to src_object's purgeable state */
11574 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11575 int state;
11576 state = src_object->purgable;
11577 vm_object_purgable_control(
11578 new_object,
11579 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11580 &state);
11581 }
11582 vm_object_unlock(new_object);
11583 new_object = VM_OBJECT_NULL;
11584 /* no pmap accounting for purgeable objects */
11585 new_entry->use_pmap = FALSE;
11586 }
11587
11588 if (result != KERN_SUCCESS &&
11589 result != KERN_MEMORY_RESTART_COPY) {
11590 vm_map_lock(src_map);
11591 RETURN(result);
11592 }
11593
11594 /*
11595 * Throw away the extra reference
11596 */
11597
11598 vm_object_deallocate(src_object);
11599
11600 /*
11601 * Verify that the map has not substantially
11602 * changed while the copy was being made.
11603 */
11604
11605 vm_map_lock(src_map);
11606
11607 if ((version.main_timestamp + 1) == src_map->timestamp) {
11608 /* src_map hasn't changed: src_entry is still valid */
11609 src_entry = saved_src_entry;
11610 goto VerificationSuccessful;
11611 }
11612
11613 /*
11614 * Simple version comparison failed.
11615 *
11616 * Retry the lookup and verify that the
11617 * same object/offset are still present.
11618 *
11619 * [Note: a memory manager that colludes with
11620 * the calling task can detect that we have
11621 * cheated. While the map was unlocked, the
11622 * mapping could have been changed and restored.]
11623 */
11624
11625 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11626 if (result != KERN_MEMORY_RESTART_COPY) {
11627 vm_object_deallocate(VME_OBJECT(new_entry));
11628 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11629 /* reset accounting state */
11630 new_entry->iokit_acct = FALSE;
11631 new_entry->use_pmap = TRUE;
11632 }
11633 RETURN(KERN_INVALID_ADDRESS);
11634 }
11635
11636 src_entry = tmp_entry;
11637 vm_map_clip_start(src_map, src_entry, src_start);
11638
11639 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11640 !use_maxprot) ||
11641 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11642 goto VerificationFailed;
11643 }
11644
11645 if (src_entry->vme_end < new_entry->vme_end) {
11646 /*
11647 * This entry might have been shortened
11648 * (vm_map_clip_end) or been replaced with
11649 * an entry that ends closer to "src_start"
11650 * than before.
11651 * Adjust "new_entry" accordingly; copying
11652 * less memory would be correct but we also
11653 * redo the copy (see below) if the new entry
11654 * no longer points at the same object/offset.
11655 */
11656 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11657 VM_MAP_COPY_PAGE_MASK(copy)));
11658 new_entry->vme_end = src_entry->vme_end;
11659 src_size = new_entry->vme_end - src_start;
11660 } else if (src_entry->vme_end > new_entry->vme_end) {
11661 /*
11662 * This entry might have been extended
11663 * (vm_map_entry_simplify() or coalesce)
11664 * or been replaced with an entry that ends farther
11665 * from "src_start" than before.
11666 *
11667 * We've called vm_object_copy_*() only on
11668 * the previous <start:end> range, so we can't
11669 * just extend new_entry. We have to re-do
11670 * the copy based on the new entry as if it was
11671 * pointing at a different object/offset (see
11672 * "Verification failed" below).
11673 */
11674 }
11675
11676 if ((VME_OBJECT(src_entry) != src_object) ||
11677 (VME_OFFSET(src_entry) != src_offset) ||
11678 (src_entry->vme_end > new_entry->vme_end)) {
11679 /*
11680 * Verification failed.
11681 *
11682 * Start over with this top-level entry.
11683 */
11684
11685 VerificationFailed: ;
11686
11687 vm_object_deallocate(VME_OBJECT(new_entry));
11688 tmp_entry = src_entry;
11689 continue;
11690 }
11691
11692 /*
11693 * Verification succeeded.
11694 */
11695
11696 VerificationSuccessful:;
11697
11698 if (result == KERN_MEMORY_RESTART_COPY) {
11699 goto RestartCopy;
11700 }
11701
11702 /*
11703 * Copy succeeded.
11704 */
11705
11706 CopySuccessful: ;
11707
11708 /*
11709 * Link in the new copy entry.
11710 */
11711
11712 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11713 new_entry);
11714
11715 /*
11716 * Determine whether the entire region
11717 * has been copied.
11718 */
11719 src_base = src_start;
11720 src_start = new_entry->vme_end;
11721 new_entry = VM_MAP_ENTRY_NULL;
11722 while ((src_start >= src_end) && (src_end != 0)) {
11723 submap_map_t *ptr;
11724
11725 if (src_map == base_map) {
11726 /* back to the top */
11727 break;
11728 }
11729
11730 ptr = parent_maps;
11731 assert(ptr != NULL);
11732 parent_maps = parent_maps->next;
11733
11734 /* fix up the damage we did in that submap */
11735 vm_map_simplify_range(src_map,
11736 src_base,
11737 src_end);
11738
11739 vm_map_unlock(src_map);
11740 vm_map_deallocate(src_map);
11741 vm_map_lock(ptr->parent_map);
11742 src_map = ptr->parent_map;
11743 src_base = ptr->base_start;
11744 src_start = ptr->base_start + ptr->base_len;
11745 src_end = ptr->base_end;
11746 if (!vm_map_lookup_entry(src_map,
11747 src_start,
11748 &tmp_entry) &&
11749 (src_end > src_start)) {
11750 RETURN(KERN_INVALID_ADDRESS);
11751 }
11752 kfree(ptr, sizeof(submap_map_t));
11753 if (parent_maps == NULL) {
11754 map_share = FALSE;
11755 }
11756 src_entry = tmp_entry->vme_prev;
11757 }
11758
11759 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11760 (src_start >= src_addr + len) &&
11761 (src_addr + len != 0)) {
11762 /*
11763 * Stop copying now, even though we haven't reached
11764 * "src_end". We'll adjust the end of the last copy
11765 * entry at the end, if needed.
11766 *
11767 * If src_map's aligment is different from the
11768 * system's page-alignment, there could be
11769 * extra non-map-aligned map entries between
11770 * the original (non-rounded) "src_addr + len"
11771 * and the rounded "src_end".
11772 * We do not want to copy those map entries since
11773 * they're not part of the copied range.
11774 */
11775 break;
11776 }
11777
11778 if ((src_start >= src_end) && (src_end != 0)) {
11779 break;
11780 }
11781
11782 /*
11783 * Verify that there are no gaps in the region
11784 */
11785
11786 tmp_entry = src_entry->vme_next;
11787 if ((tmp_entry->vme_start != src_start) ||
11788 (tmp_entry == vm_map_to_entry(src_map))) {
11789 RETURN(KERN_INVALID_ADDRESS);
11790 }
11791 }
11792
11793 /*
11794 * If the source should be destroyed, do it now, since the
11795 * copy was successful.
11796 */
11797 if (src_destroy) {
11798 (void) vm_map_delete(
11799 src_map,
11800 vm_map_trunc_page(src_addr,
11801 VM_MAP_PAGE_MASK(src_map)),
11802 src_end,
11803 ((src_map == kernel_map) ?
11804 VM_MAP_REMOVE_KUNWIRE :
11805 VM_MAP_REMOVE_NO_FLAGS),
11806 VM_MAP_NULL);
11807 } else {
11808 /* fix up the damage we did in the base map */
11809 vm_map_simplify_range(
11810 src_map,
11811 vm_map_trunc_page(src_addr,
11812 VM_MAP_PAGE_MASK(src_map)),
11813 vm_map_round_page(src_end,
11814 VM_MAP_PAGE_MASK(src_map)));
11815 }
11816
11817 vm_map_unlock(src_map);
11818 tmp_entry = VM_MAP_ENTRY_NULL;
11819
11820 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11821 vm_map_offset_t original_start, original_offset, original_end;
11822
11823 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11824
11825 /* adjust alignment of first copy_entry's "vme_start" */
11826 tmp_entry = vm_map_copy_first_entry(copy);
11827 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11828 vm_map_offset_t adjustment;
11829
11830 original_start = tmp_entry->vme_start;
11831 original_offset = VME_OFFSET(tmp_entry);
11832
11833 /* map-align the start of the first copy entry... */
11834 adjustment = (tmp_entry->vme_start -
11835 vm_map_trunc_page(
11836 tmp_entry->vme_start,
11837 VM_MAP_PAGE_MASK(src_map)));
11838 tmp_entry->vme_start -= adjustment;
11839 VME_OFFSET_SET(tmp_entry,
11840 VME_OFFSET(tmp_entry) - adjustment);
11841 copy_addr -= adjustment;
11842 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11843 /* ... adjust for mis-aligned start of copy range */
11844 adjustment =
11845 (vm_map_trunc_page(copy->offset,
11846 PAGE_MASK) -
11847 vm_map_trunc_page(copy->offset,
11848 VM_MAP_PAGE_MASK(src_map)));
11849 if (adjustment) {
11850 assert(page_aligned(adjustment));
11851 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11852 tmp_entry->vme_start += adjustment;
11853 VME_OFFSET_SET(tmp_entry,
11854 (VME_OFFSET(tmp_entry) +
11855 adjustment));
11856 copy_addr += adjustment;
11857 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11858 }
11859
11860 /*
11861 * Assert that the adjustments haven't exposed
11862 * more than was originally copied...
11863 */
11864 assert(tmp_entry->vme_start >= original_start);
11865 assert(VME_OFFSET(tmp_entry) >= original_offset);
11866 /*
11867 * ... and that it did not adjust outside of a
11868 * a single 16K page.
11869 */
11870 assert(vm_map_trunc_page(tmp_entry->vme_start,
11871 VM_MAP_PAGE_MASK(src_map)) ==
11872 vm_map_trunc_page(original_start,
11873 VM_MAP_PAGE_MASK(src_map)));
11874 }
11875
11876 /* adjust alignment of last copy_entry's "vme_end" */
11877 tmp_entry = vm_map_copy_last_entry(copy);
11878 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11879 vm_map_offset_t adjustment;
11880
11881 original_end = tmp_entry->vme_end;
11882
11883 /* map-align the end of the last copy entry... */
11884 tmp_entry->vme_end =
11885 vm_map_round_page(tmp_entry->vme_end,
11886 VM_MAP_PAGE_MASK(src_map));
11887 /* ... adjust for mis-aligned end of copy range */
11888 adjustment =
11889 (vm_map_round_page((copy->offset +
11890 copy->size),
11891 VM_MAP_PAGE_MASK(src_map)) -
11892 vm_map_round_page((copy->offset +
11893 copy->size),
11894 PAGE_MASK));
11895 if (adjustment) {
11896 assert(page_aligned(adjustment));
11897 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11898 tmp_entry->vme_end -= adjustment;
11899 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11900 }
11901
11902 /*
11903 * Assert that the adjustments haven't exposed
11904 * more than was originally copied...
11905 */
11906 assert(tmp_entry->vme_end <= original_end);
11907 /*
11908 * ... and that it did not adjust outside of a
11909 * a single 16K page.
11910 */
11911 assert(vm_map_round_page(tmp_entry->vme_end,
11912 VM_MAP_PAGE_MASK(src_map)) ==
11913 vm_map_round_page(original_end,
11914 VM_MAP_PAGE_MASK(src_map)));
11915 }
11916 }
11917
11918 /* Fix-up start and end points in copy. This is necessary */
11919 /* when the various entries in the copy object were picked */
11920 /* up from different sub-maps */
11921
11922 tmp_entry = vm_map_copy_first_entry(copy);
11923 copy_size = 0; /* compute actual size */
11924 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11925 assert(VM_MAP_PAGE_ALIGNED(
11926 copy_addr + (tmp_entry->vme_end -
11927 tmp_entry->vme_start),
11928 VM_MAP_COPY_PAGE_MASK(copy)));
11929 assert(VM_MAP_PAGE_ALIGNED(
11930 copy_addr,
11931 VM_MAP_COPY_PAGE_MASK(copy)));
11932
11933 /*
11934 * The copy_entries will be injected directly into the
11935 * destination map and might not be "map aligned" there...
11936 */
11937 tmp_entry->map_aligned = FALSE;
11938
11939 tmp_entry->vme_end = copy_addr +
11940 (tmp_entry->vme_end - tmp_entry->vme_start);
11941 tmp_entry->vme_start = copy_addr;
11942 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11943 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11944 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11945 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11946 }
11947
11948 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11949 copy_size < copy->size) {
11950 /*
11951 * The actual size of the VM map copy is smaller than what
11952 * was requested by the caller. This must be because some
11953 * PAGE_SIZE-sized pages are missing at the end of the last
11954 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11955 * The caller might not have been aware of those missing
11956 * pages and might not want to be aware of it, which is
11957 * fine as long as they don't try to access (and crash on)
11958 * those missing pages.
11959 * Let's adjust the size of the "copy", to avoid failing
11960 * in vm_map_copyout() or vm_map_copy_overwrite().
11961 */
11962 assert(vm_map_round_page(copy_size,
11963 VM_MAP_PAGE_MASK(src_map)) ==
11964 vm_map_round_page(copy->size,
11965 VM_MAP_PAGE_MASK(src_map)));
11966 copy->size = copy_size;
11967 }
11968
11969 *copy_result = copy;
11970 return KERN_SUCCESS;
11971
11972 #undef RETURN
11973 }
11974
11975 kern_return_t
11976 vm_map_copy_extract(
11977 vm_map_t src_map,
11978 vm_map_address_t src_addr,
11979 vm_map_size_t len,
11980 vm_map_copy_t *copy_result, /* OUT */
11981 vm_prot_t *cur_prot, /* OUT */
11982 vm_prot_t *max_prot)
11983 {
11984 vm_map_offset_t src_start, src_end;
11985 vm_map_copy_t copy;
11986 kern_return_t kr;
11987
11988 /*
11989 * Check for copies of zero bytes.
11990 */
11991
11992 if (len == 0) {
11993 *copy_result = VM_MAP_COPY_NULL;
11994 return KERN_SUCCESS;
11995 }
11996
11997 /*
11998 * Check that the end address doesn't overflow
11999 */
12000 src_end = src_addr + len;
12001 if (src_end < src_addr) {
12002 return KERN_INVALID_ADDRESS;
12003 }
12004
12005 /*
12006 * Compute (page aligned) start and end of region
12007 */
12008 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
12009 src_end = vm_map_round_page(src_end, PAGE_MASK);
12010
12011 /*
12012 * Allocate a header element for the list.
12013 *
12014 * Use the start and end in the header to
12015 * remember the endpoints prior to rounding.
12016 */
12017
12018 copy = vm_map_copy_allocate();
12019 copy->type = VM_MAP_COPY_ENTRY_LIST;
12020 copy->cpy_hdr.entries_pageable = TRUE;
12021
12022 vm_map_store_init(&copy->cpy_hdr);
12023
12024 copy->offset = 0;
12025 copy->size = len;
12026
12027 kr = vm_map_remap_extract(src_map,
12028 src_addr,
12029 len,
12030 FALSE, /* copy */
12031 &copy->cpy_hdr,
12032 cur_prot,
12033 max_prot,
12034 VM_INHERIT_SHARE,
12035 TRUE, /* pageable */
12036 FALSE, /* same_map */
12037 VM_MAP_KERNEL_FLAGS_NONE);
12038 if (kr != KERN_SUCCESS) {
12039 vm_map_copy_discard(copy);
12040 return kr;
12041 }
12042
12043 *copy_result = copy;
12044 return KERN_SUCCESS;
12045 }
12046
12047 /*
12048 * vm_map_copyin_object:
12049 *
12050 * Create a copy object from an object.
12051 * Our caller donates an object reference.
12052 */
12053
12054 kern_return_t
12055 vm_map_copyin_object(
12056 vm_object_t object,
12057 vm_object_offset_t offset, /* offset of region in object */
12058 vm_object_size_t size, /* size of region in object */
12059 vm_map_copy_t *copy_result) /* OUT */
12060 {
12061 vm_map_copy_t copy; /* Resulting copy */
12062
12063 /*
12064 * We drop the object into a special copy object
12065 * that contains the object directly.
12066 */
12067
12068 copy = vm_map_copy_allocate();
12069 copy->type = VM_MAP_COPY_OBJECT;
12070 copy->cpy_object = object;
12071 copy->offset = offset;
12072 copy->size = size;
12073
12074 *copy_result = copy;
12075 return KERN_SUCCESS;
12076 }
12077
12078 static void
12079 vm_map_fork_share(
12080 vm_map_t old_map,
12081 vm_map_entry_t old_entry,
12082 vm_map_t new_map)
12083 {
12084 vm_object_t object;
12085 vm_map_entry_t new_entry;
12086
12087 /*
12088 * New sharing code. New map entry
12089 * references original object. Internal
12090 * objects use asynchronous copy algorithm for
12091 * future copies. First make sure we have
12092 * the right object. If we need a shadow,
12093 * or someone else already has one, then
12094 * make a new shadow and share it.
12095 */
12096
12097 object = VME_OBJECT(old_entry);
12098 if (old_entry->is_sub_map) {
12099 assert(old_entry->wired_count == 0);
12100 #ifndef NO_NESTED_PMAP
12101 if (old_entry->use_pmap) {
12102 kern_return_t result;
12103
12104 result = pmap_nest(new_map->pmap,
12105 (VME_SUBMAP(old_entry))->pmap,
12106 (addr64_t)old_entry->vme_start,
12107 (addr64_t)old_entry->vme_start,
12108 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12109 if (result) {
12110 panic("vm_map_fork_share: pmap_nest failed!");
12111 }
12112 }
12113 #endif /* NO_NESTED_PMAP */
12114 } else if (object == VM_OBJECT_NULL) {
12115 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12116 old_entry->vme_start));
12117 VME_OFFSET_SET(old_entry, 0);
12118 VME_OBJECT_SET(old_entry, object);
12119 old_entry->use_pmap = TRUE;
12120 // assert(!old_entry->needs_copy);
12121 } else if (object->copy_strategy !=
12122 MEMORY_OBJECT_COPY_SYMMETRIC) {
12123 /*
12124 * We are already using an asymmetric
12125 * copy, and therefore we already have
12126 * the right object.
12127 */
12128
12129 assert(!old_entry->needs_copy);
12130 } else if (old_entry->needs_copy || /* case 1 */
12131 object->shadowed || /* case 2 */
12132 (!object->true_share && /* case 3 */
12133 !old_entry->is_shared &&
12134 (object->vo_size >
12135 (vm_map_size_t)(old_entry->vme_end -
12136 old_entry->vme_start)))) {
12137 /*
12138 * We need to create a shadow.
12139 * There are three cases here.
12140 * In the first case, we need to
12141 * complete a deferred symmetrical
12142 * copy that we participated in.
12143 * In the second and third cases,
12144 * we need to create the shadow so
12145 * that changes that we make to the
12146 * object do not interfere with
12147 * any symmetrical copies which
12148 * have occured (case 2) or which
12149 * might occur (case 3).
12150 *
12151 * The first case is when we had
12152 * deferred shadow object creation
12153 * via the entry->needs_copy mechanism.
12154 * This mechanism only works when
12155 * only one entry points to the source
12156 * object, and we are about to create
12157 * a second entry pointing to the
12158 * same object. The problem is that
12159 * there is no way of mapping from
12160 * an object to the entries pointing
12161 * to it. (Deferred shadow creation
12162 * works with one entry because occurs
12163 * at fault time, and we walk from the
12164 * entry to the object when handling
12165 * the fault.)
12166 *
12167 * The second case is when the object
12168 * to be shared has already been copied
12169 * with a symmetric copy, but we point
12170 * directly to the object without
12171 * needs_copy set in our entry. (This
12172 * can happen because different ranges
12173 * of an object can be pointed to by
12174 * different entries. In particular,
12175 * a single entry pointing to an object
12176 * can be split by a call to vm_inherit,
12177 * which, combined with task_create, can
12178 * result in the different entries
12179 * having different needs_copy values.)
12180 * The shadowed flag in the object allows
12181 * us to detect this case. The problem
12182 * with this case is that if this object
12183 * has or will have shadows, then we
12184 * must not perform an asymmetric copy
12185 * of this object, since such a copy
12186 * allows the object to be changed, which
12187 * will break the previous symmetrical
12188 * copies (which rely upon the object
12189 * not changing). In a sense, the shadowed
12190 * flag says "don't change this object".
12191 * We fix this by creating a shadow
12192 * object for this object, and sharing
12193 * that. This works because we are free
12194 * to change the shadow object (and thus
12195 * to use an asymmetric copy strategy);
12196 * this is also semantically correct,
12197 * since this object is temporary, and
12198 * therefore a copy of the object is
12199 * as good as the object itself. (This
12200 * is not true for permanent objects,
12201 * since the pager needs to see changes,
12202 * which won't happen if the changes
12203 * are made to a copy.)
12204 *
12205 * The third case is when the object
12206 * to be shared has parts sticking
12207 * outside of the entry we're working
12208 * with, and thus may in the future
12209 * be subject to a symmetrical copy.
12210 * (This is a preemptive version of
12211 * case 2.)
12212 */
12213 VME_OBJECT_SHADOW(old_entry,
12214 (vm_map_size_t) (old_entry->vme_end -
12215 old_entry->vme_start));
12216
12217 /*
12218 * If we're making a shadow for other than
12219 * copy on write reasons, then we have
12220 * to remove write permission.
12221 */
12222
12223 if (!old_entry->needs_copy &&
12224 (old_entry->protection & VM_PROT_WRITE)) {
12225 vm_prot_t prot;
12226
12227 assert(!pmap_has_prot_policy(old_entry->protection));
12228
12229 prot = old_entry->protection & ~VM_PROT_WRITE;
12230
12231 assert(!pmap_has_prot_policy(prot));
12232
12233 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12234 prot |= VM_PROT_EXECUTE;
12235 }
12236
12237
12238 if (old_map->mapped_in_other_pmaps) {
12239 vm_object_pmap_protect(
12240 VME_OBJECT(old_entry),
12241 VME_OFFSET(old_entry),
12242 (old_entry->vme_end -
12243 old_entry->vme_start),
12244 PMAP_NULL,
12245 old_entry->vme_start,
12246 prot);
12247 } else {
12248 pmap_protect(old_map->pmap,
12249 old_entry->vme_start,
12250 old_entry->vme_end,
12251 prot);
12252 }
12253 }
12254
12255 old_entry->needs_copy = FALSE;
12256 object = VME_OBJECT(old_entry);
12257 }
12258
12259
12260 /*
12261 * If object was using a symmetric copy strategy,
12262 * change its copy strategy to the default
12263 * asymmetric copy strategy, which is copy_delay
12264 * in the non-norma case and copy_call in the
12265 * norma case. Bump the reference count for the
12266 * new entry.
12267 */
12268
12269 if (old_entry->is_sub_map) {
12270 vm_map_lock(VME_SUBMAP(old_entry));
12271 vm_map_reference(VME_SUBMAP(old_entry));
12272 vm_map_unlock(VME_SUBMAP(old_entry));
12273 } else {
12274 vm_object_lock(object);
12275 vm_object_reference_locked(object);
12276 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12277 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12278 }
12279 vm_object_unlock(object);
12280 }
12281
12282 /*
12283 * Clone the entry, using object ref from above.
12284 * Mark both entries as shared.
12285 */
12286
12287 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12288 * map or descendants */
12289 vm_map_entry_copy(new_entry, old_entry);
12290 old_entry->is_shared = TRUE;
12291 new_entry->is_shared = TRUE;
12292
12293 /*
12294 * We're dealing with a shared mapping, so the resulting mapping
12295 * should inherit some of the original mapping's accounting settings.
12296 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12297 * "use_pmap" should stay the same as before (if it hasn't been reset
12298 * to TRUE when we cleared "iokit_acct").
12299 */
12300 assert(!new_entry->iokit_acct);
12301
12302 /*
12303 * If old entry's inheritence is VM_INHERIT_NONE,
12304 * the new entry is for corpse fork, remove the
12305 * write permission from the new entry.
12306 */
12307 if (old_entry->inheritance == VM_INHERIT_NONE) {
12308 new_entry->protection &= ~VM_PROT_WRITE;
12309 new_entry->max_protection &= ~VM_PROT_WRITE;
12310 }
12311
12312 /*
12313 * Insert the entry into the new map -- we
12314 * know we're inserting at the end of the new
12315 * map.
12316 */
12317
12318 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12319 VM_MAP_KERNEL_FLAGS_NONE);
12320
12321 /*
12322 * Update the physical map
12323 */
12324
12325 if (old_entry->is_sub_map) {
12326 /* Bill Angell pmap support goes here */
12327 } else {
12328 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12329 old_entry->vme_end - old_entry->vme_start,
12330 old_entry->vme_start);
12331 }
12332 }
12333
12334 static boolean_t
12335 vm_map_fork_copy(
12336 vm_map_t old_map,
12337 vm_map_entry_t *old_entry_p,
12338 vm_map_t new_map,
12339 int vm_map_copyin_flags)
12340 {
12341 vm_map_entry_t old_entry = *old_entry_p;
12342 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12343 vm_map_offset_t start = old_entry->vme_start;
12344 vm_map_copy_t copy;
12345 vm_map_entry_t last = vm_map_last_entry(new_map);
12346
12347 vm_map_unlock(old_map);
12348 /*
12349 * Use maxprot version of copyin because we
12350 * care about whether this memory can ever
12351 * be accessed, not just whether it's accessible
12352 * right now.
12353 */
12354 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12355 if (vm_map_copyin_internal(old_map, start, entry_size,
12356 vm_map_copyin_flags, &copy)
12357 != KERN_SUCCESS) {
12358 /*
12359 * The map might have changed while it
12360 * was unlocked, check it again. Skip
12361 * any blank space or permanently
12362 * unreadable region.
12363 */
12364 vm_map_lock(old_map);
12365 if (!vm_map_lookup_entry(old_map, start, &last) ||
12366 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12367 last = last->vme_next;
12368 }
12369 *old_entry_p = last;
12370
12371 /*
12372 * XXX For some error returns, want to
12373 * XXX skip to the next element. Note
12374 * that INVALID_ADDRESS and
12375 * PROTECTION_FAILURE are handled above.
12376 */
12377
12378 return FALSE;
12379 }
12380
12381 /*
12382 * Insert the copy into the new map
12383 */
12384
12385 vm_map_copy_insert(new_map, last, copy);
12386
12387 /*
12388 * Pick up the traversal at the end of
12389 * the copied region.
12390 */
12391
12392 vm_map_lock(old_map);
12393 start += entry_size;
12394 if (!vm_map_lookup_entry(old_map, start, &last)) {
12395 last = last->vme_next;
12396 } else {
12397 if (last->vme_start == start) {
12398 /*
12399 * No need to clip here and we don't
12400 * want to cause any unnecessary
12401 * unnesting...
12402 */
12403 } else {
12404 vm_map_clip_start(old_map, last, start);
12405 }
12406 }
12407 *old_entry_p = last;
12408
12409 return TRUE;
12410 }
12411
12412 /*
12413 * vm_map_fork:
12414 *
12415 * Create and return a new map based on the old
12416 * map, according to the inheritance values on the
12417 * regions in that map and the options.
12418 *
12419 * The source map must not be locked.
12420 */
12421 vm_map_t
12422 vm_map_fork(
12423 ledger_t ledger,
12424 vm_map_t old_map,
12425 int options)
12426 {
12427 pmap_t new_pmap;
12428 vm_map_t new_map;
12429 vm_map_entry_t old_entry;
12430 vm_map_size_t new_size = 0, entry_size;
12431 vm_map_entry_t new_entry;
12432 boolean_t src_needs_copy;
12433 boolean_t new_entry_needs_copy;
12434 boolean_t pmap_is64bit;
12435 int vm_map_copyin_flags;
12436 vm_inherit_t old_entry_inheritance;
12437 int map_create_options;
12438 kern_return_t footprint_collect_kr;
12439
12440 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12441 VM_MAP_FORK_PRESERVE_PURGEABLE |
12442 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12443 /* unsupported option */
12444 return VM_MAP_NULL;
12445 }
12446
12447 pmap_is64bit =
12448 #if defined(__i386__) || defined(__x86_64__)
12449 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12450 #elif defined(__arm64__)
12451 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12452 #elif defined(__arm__)
12453 FALSE;
12454 #else
12455 #error Unknown architecture.
12456 #endif
12457
12458 unsigned int pmap_flags = 0;
12459 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12460 #if defined(HAS_APPLE_PAC)
12461 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12462 #endif
12463 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12464
12465 vm_map_reference_swap(old_map);
12466 vm_map_lock(old_map);
12467
12468 map_create_options = 0;
12469 if (old_map->hdr.entries_pageable) {
12470 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12471 }
12472 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12473 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12474 footprint_collect_kr = KERN_SUCCESS;
12475 }
12476 new_map = vm_map_create_options(new_pmap,
12477 old_map->min_offset,
12478 old_map->max_offset,
12479 map_create_options);
12480 vm_map_lock(new_map);
12481 vm_commit_pagezero_status(new_map);
12482 /* inherit the parent map's page size */
12483 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12484 for (
12485 old_entry = vm_map_first_entry(old_map);
12486 old_entry != vm_map_to_entry(old_map);
12487 ) {
12488 entry_size = old_entry->vme_end - old_entry->vme_start;
12489
12490 old_entry_inheritance = old_entry->inheritance;
12491 /*
12492 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12493 * share VM_INHERIT_NONE entries that are not backed by a
12494 * device pager.
12495 */
12496 if (old_entry_inheritance == VM_INHERIT_NONE &&
12497 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12498 !(!old_entry->is_sub_map &&
12499 VME_OBJECT(old_entry) != NULL &&
12500 VME_OBJECT(old_entry)->pager != NULL &&
12501 is_device_pager_ops(
12502 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12503 old_entry_inheritance = VM_INHERIT_SHARE;
12504 }
12505
12506 if (old_entry_inheritance != VM_INHERIT_NONE &&
12507 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12508 footprint_collect_kr == KERN_SUCCESS) {
12509 /*
12510 * The corpse won't have old_map->pmap to query
12511 * footprint information, so collect that data now
12512 * and store it in new_map->vmmap_corpse_footprint
12513 * for later autopsy.
12514 */
12515 footprint_collect_kr =
12516 vm_map_corpse_footprint_collect(old_map,
12517 old_entry,
12518 new_map);
12519 }
12520
12521 switch (old_entry_inheritance) {
12522 case VM_INHERIT_NONE:
12523 break;
12524
12525 case VM_INHERIT_SHARE:
12526 vm_map_fork_share(old_map, old_entry, new_map);
12527 new_size += entry_size;
12528 break;
12529
12530 case VM_INHERIT_COPY:
12531
12532 /*
12533 * Inline the copy_quickly case;
12534 * upon failure, fall back on call
12535 * to vm_map_fork_copy.
12536 */
12537
12538 if (old_entry->is_sub_map) {
12539 break;
12540 }
12541 if ((old_entry->wired_count != 0) ||
12542 ((VME_OBJECT(old_entry) != NULL) &&
12543 (VME_OBJECT(old_entry)->true_share))) {
12544 goto slow_vm_map_fork_copy;
12545 }
12546
12547 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12548 vm_map_entry_copy(new_entry, old_entry);
12549 if (new_entry->is_sub_map) {
12550 /* clear address space specifics */
12551 new_entry->use_pmap = FALSE;
12552 } else {
12553 /*
12554 * We're dealing with a copy-on-write operation,
12555 * so the resulting mapping should not inherit
12556 * the original mapping's accounting settings.
12557 * "iokit_acct" should have been cleared in
12558 * vm_map_entry_copy().
12559 * "use_pmap" should be reset to its default
12560 * (TRUE) so that the new mapping gets
12561 * accounted for in the task's memory footprint.
12562 */
12563 assert(!new_entry->iokit_acct);
12564 new_entry->use_pmap = TRUE;
12565 }
12566
12567 if (!vm_object_copy_quickly(
12568 VME_OBJECT_PTR(new_entry),
12569 VME_OFFSET(old_entry),
12570 (old_entry->vme_end -
12571 old_entry->vme_start),
12572 &src_needs_copy,
12573 &new_entry_needs_copy)) {
12574 vm_map_entry_dispose(new_map, new_entry);
12575 goto slow_vm_map_fork_copy;
12576 }
12577
12578 /*
12579 * Handle copy-on-write obligations
12580 */
12581
12582 if (src_needs_copy && !old_entry->needs_copy) {
12583 vm_prot_t prot;
12584
12585 assert(!pmap_has_prot_policy(old_entry->protection));
12586
12587 prot = old_entry->protection & ~VM_PROT_WRITE;
12588
12589 if (override_nx(old_map, VME_ALIAS(old_entry))
12590 && prot) {
12591 prot |= VM_PROT_EXECUTE;
12592 }
12593
12594 assert(!pmap_has_prot_policy(prot));
12595
12596 vm_object_pmap_protect(
12597 VME_OBJECT(old_entry),
12598 VME_OFFSET(old_entry),
12599 (old_entry->vme_end -
12600 old_entry->vme_start),
12601 ((old_entry->is_shared
12602 || old_map->mapped_in_other_pmaps)
12603 ? PMAP_NULL :
12604 old_map->pmap),
12605 old_entry->vme_start,
12606 prot);
12607
12608 assert(old_entry->wired_count == 0);
12609 old_entry->needs_copy = TRUE;
12610 }
12611 new_entry->needs_copy = new_entry_needs_copy;
12612
12613 /*
12614 * Insert the entry at the end
12615 * of the map.
12616 */
12617
12618 vm_map_store_entry_link(new_map,
12619 vm_map_last_entry(new_map),
12620 new_entry,
12621 VM_MAP_KERNEL_FLAGS_NONE);
12622 new_size += entry_size;
12623 break;
12624
12625 slow_vm_map_fork_copy:
12626 vm_map_copyin_flags = 0;
12627 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12628 vm_map_copyin_flags |=
12629 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12630 }
12631 if (vm_map_fork_copy(old_map,
12632 &old_entry,
12633 new_map,
12634 vm_map_copyin_flags)) {
12635 new_size += entry_size;
12636 }
12637 continue;
12638 }
12639 old_entry = old_entry->vme_next;
12640 }
12641
12642 #if defined(__arm64__)
12643 pmap_insert_sharedpage(new_map->pmap);
12644 #endif
12645
12646 new_map->size = new_size;
12647
12648 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12649 vm_map_corpse_footprint_collect_done(new_map);
12650 }
12651
12652 vm_map_unlock(new_map);
12653 vm_map_unlock(old_map);
12654 vm_map_deallocate(old_map);
12655
12656 return new_map;
12657 }
12658
12659 /*
12660 * vm_map_exec:
12661 *
12662 * Setup the "new_map" with the proper execution environment according
12663 * to the type of executable (platform, 64bit, chroot environment).
12664 * Map the comm page and shared region, etc...
12665 */
12666 kern_return_t
12667 vm_map_exec(
12668 vm_map_t new_map,
12669 task_t task,
12670 boolean_t is64bit,
12671 void *fsroot,
12672 cpu_type_t cpu,
12673 cpu_subtype_t cpu_subtype)
12674 {
12675 SHARED_REGION_TRACE_DEBUG(
12676 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12677 (void *)VM_KERNEL_ADDRPERM(current_task()),
12678 (void *)VM_KERNEL_ADDRPERM(new_map),
12679 (void *)VM_KERNEL_ADDRPERM(task),
12680 (void *)VM_KERNEL_ADDRPERM(fsroot),
12681 cpu,
12682 cpu_subtype));
12683 (void) vm_commpage_enter(new_map, task, is64bit);
12684 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12685 SHARED_REGION_TRACE_DEBUG(
12686 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12687 (void *)VM_KERNEL_ADDRPERM(current_task()),
12688 (void *)VM_KERNEL_ADDRPERM(new_map),
12689 (void *)VM_KERNEL_ADDRPERM(task),
12690 (void *)VM_KERNEL_ADDRPERM(fsroot),
12691 cpu,
12692 cpu_subtype));
12693 return KERN_SUCCESS;
12694 }
12695
12696 /*
12697 * vm_map_lookup_locked:
12698 *
12699 * Finds the VM object, offset, and
12700 * protection for a given virtual address in the
12701 * specified map, assuming a page fault of the
12702 * type specified.
12703 *
12704 * Returns the (object, offset, protection) for
12705 * this address, whether it is wired down, and whether
12706 * this map has the only reference to the data in question.
12707 * In order to later verify this lookup, a "version"
12708 * is returned.
12709 *
12710 * The map MUST be locked by the caller and WILL be
12711 * locked on exit. In order to guarantee the
12712 * existence of the returned object, it is returned
12713 * locked.
12714 *
12715 * If a lookup is requested with "write protection"
12716 * specified, the map may be changed to perform virtual
12717 * copying operations, although the data referenced will
12718 * remain the same.
12719 */
12720 kern_return_t
12721 vm_map_lookup_locked(
12722 vm_map_t *var_map, /* IN/OUT */
12723 vm_map_offset_t vaddr,
12724 vm_prot_t fault_type,
12725 int object_lock_type,
12726 vm_map_version_t *out_version, /* OUT */
12727 vm_object_t *object, /* OUT */
12728 vm_object_offset_t *offset, /* OUT */
12729 vm_prot_t *out_prot, /* OUT */
12730 boolean_t *wired, /* OUT */
12731 vm_object_fault_info_t fault_info, /* OUT */
12732 vm_map_t *real_map)
12733 {
12734 vm_map_entry_t entry;
12735 vm_map_t map = *var_map;
12736 vm_map_t old_map = *var_map;
12737 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12738 vm_map_offset_t cow_parent_vaddr = 0;
12739 vm_map_offset_t old_start = 0;
12740 vm_map_offset_t old_end = 0;
12741 vm_prot_t prot;
12742 boolean_t mask_protections;
12743 boolean_t force_copy;
12744 vm_prot_t original_fault_type;
12745
12746 /*
12747 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12748 * as a mask against the mapping's actual protections, not as an
12749 * absolute value.
12750 */
12751 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12752 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12753 fault_type &= VM_PROT_ALL;
12754 original_fault_type = fault_type;
12755
12756 *real_map = map;
12757
12758 RetryLookup:
12759 fault_type = original_fault_type;
12760
12761 /*
12762 * If the map has an interesting hint, try it before calling
12763 * full blown lookup routine.
12764 */
12765 entry = map->hint;
12766
12767 if ((entry == vm_map_to_entry(map)) ||
12768 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12769 vm_map_entry_t tmp_entry;
12770
12771 /*
12772 * Entry was either not a valid hint, or the vaddr
12773 * was not contained in the entry, so do a full lookup.
12774 */
12775 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12776 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12777 vm_map_unlock(cow_sub_map_parent);
12778 }
12779 if ((*real_map != map)
12780 && (*real_map != cow_sub_map_parent)) {
12781 vm_map_unlock(*real_map);
12782 }
12783 return KERN_INVALID_ADDRESS;
12784 }
12785
12786 entry = tmp_entry;
12787 }
12788 if (map == old_map) {
12789 old_start = entry->vme_start;
12790 old_end = entry->vme_end;
12791 }
12792
12793 /*
12794 * Handle submaps. Drop lock on upper map, submap is
12795 * returned locked.
12796 */
12797
12798 submap_recurse:
12799 if (entry->is_sub_map) {
12800 vm_map_offset_t local_vaddr;
12801 vm_map_offset_t end_delta;
12802 vm_map_offset_t start_delta;
12803 vm_map_entry_t submap_entry;
12804 vm_prot_t subentry_protection;
12805 vm_prot_t subentry_max_protection;
12806 boolean_t subentry_no_copy_on_read;
12807 boolean_t mapped_needs_copy = FALSE;
12808
12809 local_vaddr = vaddr;
12810
12811 if ((entry->use_pmap &&
12812 !((fault_type & VM_PROT_WRITE) ||
12813 force_copy))) {
12814 /* if real_map equals map we unlock below */
12815 if ((*real_map != map) &&
12816 (*real_map != cow_sub_map_parent)) {
12817 vm_map_unlock(*real_map);
12818 }
12819 *real_map = VME_SUBMAP(entry);
12820 }
12821
12822 if (entry->needs_copy &&
12823 ((fault_type & VM_PROT_WRITE) ||
12824 force_copy)) {
12825 if (!mapped_needs_copy) {
12826 if (vm_map_lock_read_to_write(map)) {
12827 vm_map_lock_read(map);
12828 *real_map = map;
12829 goto RetryLookup;
12830 }
12831 vm_map_lock_read(VME_SUBMAP(entry));
12832 *var_map = VME_SUBMAP(entry);
12833 cow_sub_map_parent = map;
12834 /* reset base to map before cow object */
12835 /* this is the map which will accept */
12836 /* the new cow object */
12837 old_start = entry->vme_start;
12838 old_end = entry->vme_end;
12839 cow_parent_vaddr = vaddr;
12840 mapped_needs_copy = TRUE;
12841 } else {
12842 vm_map_lock_read(VME_SUBMAP(entry));
12843 *var_map = VME_SUBMAP(entry);
12844 if ((cow_sub_map_parent != map) &&
12845 (*real_map != map)) {
12846 vm_map_unlock(map);
12847 }
12848 }
12849 } else {
12850 vm_map_lock_read(VME_SUBMAP(entry));
12851 *var_map = VME_SUBMAP(entry);
12852 /* leave map locked if it is a target */
12853 /* cow sub_map above otherwise, just */
12854 /* follow the maps down to the object */
12855 /* here we unlock knowing we are not */
12856 /* revisiting the map. */
12857 if ((*real_map != map) && (map != cow_sub_map_parent)) {
12858 vm_map_unlock_read(map);
12859 }
12860 }
12861
12862 map = *var_map;
12863
12864 /* calculate the offset in the submap for vaddr */
12865 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12866
12867 RetrySubMap:
12868 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12869 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12870 vm_map_unlock(cow_sub_map_parent);
12871 }
12872 if ((*real_map != map)
12873 && (*real_map != cow_sub_map_parent)) {
12874 vm_map_unlock(*real_map);
12875 }
12876 *real_map = map;
12877 return KERN_INVALID_ADDRESS;
12878 }
12879
12880 /* find the attenuated shadow of the underlying object */
12881 /* on our target map */
12882
12883 /* in english the submap object may extend beyond the */
12884 /* region mapped by the entry or, may only fill a portion */
12885 /* of it. For our purposes, we only care if the object */
12886 /* doesn't fill. In this case the area which will */
12887 /* ultimately be clipped in the top map will only need */
12888 /* to be as big as the portion of the underlying entry */
12889 /* which is mapped */
12890 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12891 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12892
12893 end_delta =
12894 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12895 submap_entry->vme_end ?
12896 0 : (VME_OFFSET(entry) +
12897 (old_end - old_start))
12898 - submap_entry->vme_end;
12899
12900 old_start += start_delta;
12901 old_end -= end_delta;
12902
12903 if (submap_entry->is_sub_map) {
12904 entry = submap_entry;
12905 vaddr = local_vaddr;
12906 goto submap_recurse;
12907 }
12908
12909 if (((fault_type & VM_PROT_WRITE) ||
12910 force_copy)
12911 && cow_sub_map_parent) {
12912 vm_object_t sub_object, copy_object;
12913 vm_object_offset_t copy_offset;
12914 vm_map_offset_t local_start;
12915 vm_map_offset_t local_end;
12916 boolean_t copied_slowly = FALSE;
12917
12918 if (vm_map_lock_read_to_write(map)) {
12919 vm_map_lock_read(map);
12920 old_start -= start_delta;
12921 old_end += end_delta;
12922 goto RetrySubMap;
12923 }
12924
12925
12926 sub_object = VME_OBJECT(submap_entry);
12927 if (sub_object == VM_OBJECT_NULL) {
12928 sub_object =
12929 vm_object_allocate(
12930 (vm_map_size_t)
12931 (submap_entry->vme_end -
12932 submap_entry->vme_start));
12933 VME_OBJECT_SET(submap_entry, sub_object);
12934 VME_OFFSET_SET(submap_entry, 0);
12935 assert(!submap_entry->is_sub_map);
12936 assert(submap_entry->use_pmap);
12937 }
12938 local_start = local_vaddr -
12939 (cow_parent_vaddr - old_start);
12940 local_end = local_vaddr +
12941 (old_end - cow_parent_vaddr);
12942 vm_map_clip_start(map, submap_entry, local_start);
12943 vm_map_clip_end(map, submap_entry, local_end);
12944 if (submap_entry->is_sub_map) {
12945 /* unnesting was done when clipping */
12946 assert(!submap_entry->use_pmap);
12947 }
12948
12949 /* This is the COW case, lets connect */
12950 /* an entry in our space to the underlying */
12951 /* object in the submap, bypassing the */
12952 /* submap. */
12953
12954
12955 if (submap_entry->wired_count != 0 ||
12956 (sub_object->copy_strategy ==
12957 MEMORY_OBJECT_COPY_NONE)) {
12958 vm_object_lock(sub_object);
12959 vm_object_copy_slowly(sub_object,
12960 VME_OFFSET(submap_entry),
12961 (submap_entry->vme_end -
12962 submap_entry->vme_start),
12963 FALSE,
12964 &copy_object);
12965 copied_slowly = TRUE;
12966 } else {
12967 /* set up shadow object */
12968 copy_object = sub_object;
12969 vm_object_lock(sub_object);
12970 vm_object_reference_locked(sub_object);
12971 sub_object->shadowed = TRUE;
12972 vm_object_unlock(sub_object);
12973
12974 assert(submap_entry->wired_count == 0);
12975 submap_entry->needs_copy = TRUE;
12976
12977 prot = submap_entry->protection;
12978 assert(!pmap_has_prot_policy(prot));
12979 prot = prot & ~VM_PROT_WRITE;
12980 assert(!pmap_has_prot_policy(prot));
12981
12982 if (override_nx(old_map,
12983 VME_ALIAS(submap_entry))
12984 && prot) {
12985 prot |= VM_PROT_EXECUTE;
12986 }
12987
12988 vm_object_pmap_protect(
12989 sub_object,
12990 VME_OFFSET(submap_entry),
12991 submap_entry->vme_end -
12992 submap_entry->vme_start,
12993 (submap_entry->is_shared
12994 || map->mapped_in_other_pmaps) ?
12995 PMAP_NULL : map->pmap,
12996 submap_entry->vme_start,
12997 prot);
12998 }
12999
13000 /*
13001 * Adjust the fault offset to the submap entry.
13002 */
13003 copy_offset = (local_vaddr -
13004 submap_entry->vme_start +
13005 VME_OFFSET(submap_entry));
13006
13007 /* This works diffently than the */
13008 /* normal submap case. We go back */
13009 /* to the parent of the cow map and*/
13010 /* clip out the target portion of */
13011 /* the sub_map, substituting the */
13012 /* new copy object, */
13013
13014 subentry_protection = submap_entry->protection;
13015 subentry_max_protection = submap_entry->max_protection;
13016 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13017 vm_map_unlock(map);
13018 submap_entry = NULL; /* not valid after map unlock */
13019
13020 local_start = old_start;
13021 local_end = old_end;
13022 map = cow_sub_map_parent;
13023 *var_map = cow_sub_map_parent;
13024 vaddr = cow_parent_vaddr;
13025 cow_sub_map_parent = NULL;
13026
13027 if (!vm_map_lookup_entry(map,
13028 vaddr, &entry)) {
13029 vm_object_deallocate(
13030 copy_object);
13031 vm_map_lock_write_to_read(map);
13032 return KERN_INVALID_ADDRESS;
13033 }
13034
13035 /* clip out the portion of space */
13036 /* mapped by the sub map which */
13037 /* corresponds to the underlying */
13038 /* object */
13039
13040 /*
13041 * Clip (and unnest) the smallest nested chunk
13042 * possible around the faulting address...
13043 */
13044 local_start = vaddr & ~(pmap_nesting_size_min - 1);
13045 local_end = local_start + pmap_nesting_size_min;
13046 /*
13047 * ... but don't go beyond the "old_start" to "old_end"
13048 * range, to avoid spanning over another VM region
13049 * with a possibly different VM object and/or offset.
13050 */
13051 if (local_start < old_start) {
13052 local_start = old_start;
13053 }
13054 if (local_end > old_end) {
13055 local_end = old_end;
13056 }
13057 /*
13058 * Adjust copy_offset to the start of the range.
13059 */
13060 copy_offset -= (vaddr - local_start);
13061
13062 vm_map_clip_start(map, entry, local_start);
13063 vm_map_clip_end(map, entry, local_end);
13064 if (entry->is_sub_map) {
13065 /* unnesting was done when clipping */
13066 assert(!entry->use_pmap);
13067 }
13068
13069 /* substitute copy object for */
13070 /* shared map entry */
13071 vm_map_deallocate(VME_SUBMAP(entry));
13072 assert(!entry->iokit_acct);
13073 entry->is_sub_map = FALSE;
13074 entry->use_pmap = TRUE;
13075 VME_OBJECT_SET(entry, copy_object);
13076
13077 /* propagate the submap entry's protections */
13078 if (entry->protection != VM_PROT_READ) {
13079 /*
13080 * Someone has already altered the top entry's
13081 * protections via vm_protect(VM_PROT_COPY).
13082 * Respect these new values and ignore the
13083 * submap entry's protections.
13084 */
13085 } else {
13086 /*
13087 * Regular copy-on-write: propagate the submap
13088 * entry's protections to the top map entry.
13089 */
13090 entry->protection |= subentry_protection;
13091 }
13092 entry->max_protection |= subentry_max_protection;
13093 /* propagate no_copy_on_read */
13094 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13095
13096 if ((entry->protection & VM_PROT_WRITE) &&
13097 (entry->protection & VM_PROT_EXECUTE) &&
13098 #if !CONFIG_EMBEDDED
13099 map != kernel_map &&
13100 cs_process_enforcement(NULL) &&
13101 #endif /* !CONFIG_EMBEDDED */
13102 !(entry->used_for_jit)) {
13103 DTRACE_VM3(cs_wx,
13104 uint64_t, (uint64_t)entry->vme_start,
13105 uint64_t, (uint64_t)entry->vme_end,
13106 vm_prot_t, entry->protection);
13107 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13108 proc_selfpid(),
13109 (current_task()->bsd_info
13110 ? proc_name_address(current_task()->bsd_info)
13111 : "?"),
13112 __FUNCTION__);
13113 entry->protection &= ~VM_PROT_EXECUTE;
13114 }
13115
13116 if (copied_slowly) {
13117 VME_OFFSET_SET(entry, local_start - old_start);
13118 entry->needs_copy = FALSE;
13119 entry->is_shared = FALSE;
13120 } else {
13121 VME_OFFSET_SET(entry, copy_offset);
13122 assert(entry->wired_count == 0);
13123 entry->needs_copy = TRUE;
13124 if (entry->inheritance == VM_INHERIT_SHARE) {
13125 entry->inheritance = VM_INHERIT_COPY;
13126 }
13127 if (map != old_map) {
13128 entry->is_shared = TRUE;
13129 }
13130 }
13131 if (entry->inheritance == VM_INHERIT_SHARE) {
13132 entry->inheritance = VM_INHERIT_COPY;
13133 }
13134
13135 vm_map_lock_write_to_read(map);
13136 } else {
13137 if ((cow_sub_map_parent)
13138 && (cow_sub_map_parent != *real_map)
13139 && (cow_sub_map_parent != map)) {
13140 vm_map_unlock(cow_sub_map_parent);
13141 }
13142 entry = submap_entry;
13143 vaddr = local_vaddr;
13144 }
13145 }
13146
13147 /*
13148 * Check whether this task is allowed to have
13149 * this page.
13150 */
13151
13152 prot = entry->protection;
13153
13154 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13155 /*
13156 * HACK -- if not a stack, then allow execution
13157 */
13158 prot |= VM_PROT_EXECUTE;
13159 }
13160
13161 if (mask_protections) {
13162 fault_type &= prot;
13163 if (fault_type == VM_PROT_NONE) {
13164 goto protection_failure;
13165 }
13166 }
13167 if (((fault_type & prot) != fault_type)
13168 #if __arm64__
13169 /* prefetch abort in execute-only page */
13170 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13171 #endif
13172 ) {
13173 protection_failure:
13174 if (*real_map != map) {
13175 vm_map_unlock(*real_map);
13176 }
13177 *real_map = map;
13178
13179 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13180 log_stack_execution_failure((addr64_t)vaddr, prot);
13181 }
13182
13183 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13184 return KERN_PROTECTION_FAILURE;
13185 }
13186
13187 /*
13188 * If this page is not pageable, we have to get
13189 * it for all possible accesses.
13190 */
13191
13192 *wired = (entry->wired_count != 0);
13193 if (*wired) {
13194 fault_type = prot;
13195 }
13196
13197 /*
13198 * If the entry was copy-on-write, we either ...
13199 */
13200
13201 if (entry->needs_copy) {
13202 /*
13203 * If we want to write the page, we may as well
13204 * handle that now since we've got the map locked.
13205 *
13206 * If we don't need to write the page, we just
13207 * demote the permissions allowed.
13208 */
13209
13210 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13211 /*
13212 * Make a new object, and place it in the
13213 * object chain. Note that no new references
13214 * have appeared -- one just moved from the
13215 * map to the new object.
13216 */
13217
13218 if (vm_map_lock_read_to_write(map)) {
13219 vm_map_lock_read(map);
13220 goto RetryLookup;
13221 }
13222
13223 if (VME_OBJECT(entry)->shadowed == FALSE) {
13224 vm_object_lock(VME_OBJECT(entry));
13225 VME_OBJECT(entry)->shadowed = TRUE;
13226 vm_object_unlock(VME_OBJECT(entry));
13227 }
13228 VME_OBJECT_SHADOW(entry,
13229 (vm_map_size_t) (entry->vme_end -
13230 entry->vme_start));
13231 entry->needs_copy = FALSE;
13232
13233 vm_map_lock_write_to_read(map);
13234 }
13235 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13236 /*
13237 * We're attempting to read a copy-on-write
13238 * page -- don't allow writes.
13239 */
13240
13241 prot &= (~VM_PROT_WRITE);
13242 }
13243 }
13244
13245 /*
13246 * Create an object if necessary.
13247 */
13248 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13249 if (vm_map_lock_read_to_write(map)) {
13250 vm_map_lock_read(map);
13251 goto RetryLookup;
13252 }
13253
13254 VME_OBJECT_SET(entry,
13255 vm_object_allocate(
13256 (vm_map_size_t)(entry->vme_end -
13257 entry->vme_start)));
13258 VME_OFFSET_SET(entry, 0);
13259 assert(entry->use_pmap);
13260 vm_map_lock_write_to_read(map);
13261 }
13262
13263 /*
13264 * Return the object/offset from this entry. If the entry
13265 * was copy-on-write or empty, it has been fixed up. Also
13266 * return the protection.
13267 */
13268
13269 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13270 *object = VME_OBJECT(entry);
13271 *out_prot = prot;
13272 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
13273
13274 if (fault_info) {
13275 fault_info->interruptible = THREAD_UNINT; /* for now... */
13276 /* ... the caller will change "interruptible" if needed */
13277 fault_info->cluster_size = 0;
13278 fault_info->user_tag = VME_ALIAS(entry);
13279 fault_info->pmap_options = 0;
13280 if (entry->iokit_acct ||
13281 (!entry->is_sub_map && !entry->use_pmap)) {
13282 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13283 }
13284 fault_info->behavior = entry->behavior;
13285 fault_info->lo_offset = VME_OFFSET(entry);
13286 fault_info->hi_offset =
13287 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13288 fault_info->no_cache = entry->no_cache;
13289 fault_info->stealth = FALSE;
13290 fault_info->io_sync = FALSE;
13291 if (entry->used_for_jit ||
13292 entry->vme_resilient_codesign) {
13293 fault_info->cs_bypass = TRUE;
13294 } else {
13295 fault_info->cs_bypass = FALSE;
13296 }
13297 fault_info->pmap_cs_associated = FALSE;
13298 #if CONFIG_PMAP_CS
13299 if (entry->pmap_cs_associated) {
13300 /*
13301 * The pmap layer will validate this page
13302 * before allowing it to be executed from.
13303 */
13304 fault_info->pmap_cs_associated = TRUE;
13305 }
13306 #endif /* CONFIG_PMAP_CS */
13307 fault_info->mark_zf_absent = FALSE;
13308 fault_info->batch_pmap_op = FALSE;
13309 fault_info->resilient_media = entry->vme_resilient_media;
13310 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13311 }
13312
13313 /*
13314 * Lock the object to prevent it from disappearing
13315 */
13316 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13317 vm_object_lock(*object);
13318 } else {
13319 vm_object_lock_shared(*object);
13320 }
13321
13322 /*
13323 * Save the version number
13324 */
13325
13326 out_version->main_timestamp = map->timestamp;
13327
13328 return KERN_SUCCESS;
13329 }
13330
13331
13332 /*
13333 * vm_map_verify:
13334 *
13335 * Verifies that the map in question has not changed
13336 * since the given version. The map has to be locked
13337 * ("shared" mode is fine) before calling this function
13338 * and it will be returned locked too.
13339 */
13340 boolean_t
13341 vm_map_verify(
13342 vm_map_t map,
13343 vm_map_version_t *version) /* REF */
13344 {
13345 boolean_t result;
13346
13347 vm_map_lock_assert_held(map);
13348 result = (map->timestamp == version->main_timestamp);
13349
13350 return result;
13351 }
13352
13353 /*
13354 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13355 * Goes away after regular vm_region_recurse function migrates to
13356 * 64 bits
13357 * vm_region_recurse: A form of vm_region which follows the
13358 * submaps in a target map
13359 *
13360 */
13361
13362 kern_return_t
13363 vm_map_region_recurse_64(
13364 vm_map_t map,
13365 vm_map_offset_t *address, /* IN/OUT */
13366 vm_map_size_t *size, /* OUT */
13367 natural_t *nesting_depth, /* IN/OUT */
13368 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13369 mach_msg_type_number_t *count) /* IN/OUT */
13370 {
13371 mach_msg_type_number_t original_count;
13372 vm_region_extended_info_data_t extended;
13373 vm_map_entry_t tmp_entry;
13374 vm_map_offset_t user_address;
13375 unsigned int user_max_depth;
13376
13377 /*
13378 * "curr_entry" is the VM map entry preceding or including the
13379 * address we're looking for.
13380 * "curr_map" is the map or sub-map containing "curr_entry".
13381 * "curr_address" is the equivalent of the top map's "user_address"
13382 * in the current map.
13383 * "curr_offset" is the cumulated offset of "curr_map" in the
13384 * target task's address space.
13385 * "curr_depth" is the depth of "curr_map" in the chain of
13386 * sub-maps.
13387 *
13388 * "curr_max_below" and "curr_max_above" limit the range (around
13389 * "curr_address") we should take into account in the current (sub)map.
13390 * They limit the range to what's visible through the map entries
13391 * we've traversed from the top map to the current map.
13392 *
13393 */
13394 vm_map_entry_t curr_entry;
13395 vm_map_address_t curr_address;
13396 vm_map_offset_t curr_offset;
13397 vm_map_t curr_map;
13398 unsigned int curr_depth;
13399 vm_map_offset_t curr_max_below, curr_max_above;
13400 vm_map_offset_t curr_skip;
13401
13402 /*
13403 * "next_" is the same as "curr_" but for the VM region immediately
13404 * after the address we're looking for. We need to keep track of this
13405 * too because we want to return info about that region if the
13406 * address we're looking for is not mapped.
13407 */
13408 vm_map_entry_t next_entry;
13409 vm_map_offset_t next_offset;
13410 vm_map_offset_t next_address;
13411 vm_map_t next_map;
13412 unsigned int next_depth;
13413 vm_map_offset_t next_max_below, next_max_above;
13414 vm_map_offset_t next_skip;
13415
13416 boolean_t look_for_pages;
13417 vm_region_submap_short_info_64_t short_info;
13418 boolean_t do_region_footprint;
13419
13420 if (map == VM_MAP_NULL) {
13421 /* no address space to work on */
13422 return KERN_INVALID_ARGUMENT;
13423 }
13424
13425
13426 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13427 /*
13428 * "info" structure is not big enough and
13429 * would overflow
13430 */
13431 return KERN_INVALID_ARGUMENT;
13432 }
13433
13434 do_region_footprint = task_self_region_footprint();
13435 original_count = *count;
13436
13437 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13438 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13439 look_for_pages = FALSE;
13440 short_info = (vm_region_submap_short_info_64_t) submap_info;
13441 submap_info = NULL;
13442 } else {
13443 look_for_pages = TRUE;
13444 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13445 short_info = NULL;
13446
13447 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13448 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13449 }
13450 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13451 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13452 }
13453 }
13454
13455 user_address = *address;
13456 user_max_depth = *nesting_depth;
13457
13458 if (not_in_kdp) {
13459 vm_map_lock_read(map);
13460 }
13461
13462 recurse_again:
13463 curr_entry = NULL;
13464 curr_map = map;
13465 curr_address = user_address;
13466 curr_offset = 0;
13467 curr_skip = 0;
13468 curr_depth = 0;
13469 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13470 curr_max_below = curr_address;
13471
13472 next_entry = NULL;
13473 next_map = NULL;
13474 next_address = 0;
13475 next_offset = 0;
13476 next_skip = 0;
13477 next_depth = 0;
13478 next_max_above = (vm_map_offset_t) -1;
13479 next_max_below = (vm_map_offset_t) -1;
13480
13481 for (;;) {
13482 if (vm_map_lookup_entry(curr_map,
13483 curr_address,
13484 &tmp_entry)) {
13485 /* tmp_entry contains the address we're looking for */
13486 curr_entry = tmp_entry;
13487 } else {
13488 vm_map_offset_t skip;
13489 /*
13490 * The address is not mapped. "tmp_entry" is the
13491 * map entry preceding the address. We want the next
13492 * one, if it exists.
13493 */
13494 curr_entry = tmp_entry->vme_next;
13495
13496 if (curr_entry == vm_map_to_entry(curr_map) ||
13497 (curr_entry->vme_start >=
13498 curr_address + curr_max_above)) {
13499 /* no next entry at this level: stop looking */
13500 if (not_in_kdp) {
13501 vm_map_unlock_read(curr_map);
13502 }
13503 curr_entry = NULL;
13504 curr_map = NULL;
13505 curr_skip = 0;
13506 curr_offset = 0;
13507 curr_depth = 0;
13508 curr_max_above = 0;
13509 curr_max_below = 0;
13510 break;
13511 }
13512
13513 /* adjust current address and offset */
13514 skip = curr_entry->vme_start - curr_address;
13515 curr_address = curr_entry->vme_start;
13516 curr_skip += skip;
13517 curr_offset += skip;
13518 curr_max_above -= skip;
13519 curr_max_below = 0;
13520 }
13521
13522 /*
13523 * Is the next entry at this level closer to the address (or
13524 * deeper in the submap chain) than the one we had
13525 * so far ?
13526 */
13527 tmp_entry = curr_entry->vme_next;
13528 if (tmp_entry == vm_map_to_entry(curr_map)) {
13529 /* no next entry at this level */
13530 } else if (tmp_entry->vme_start >=
13531 curr_address + curr_max_above) {
13532 /*
13533 * tmp_entry is beyond the scope of what we mapped of
13534 * this submap in the upper level: ignore it.
13535 */
13536 } else if ((next_entry == NULL) ||
13537 (tmp_entry->vme_start + curr_offset <=
13538 next_entry->vme_start + next_offset)) {
13539 /*
13540 * We didn't have a "next_entry" or this one is
13541 * closer to the address we're looking for:
13542 * use this "tmp_entry" as the new "next_entry".
13543 */
13544 if (next_entry != NULL) {
13545 /* unlock the last "next_map" */
13546 if (next_map != curr_map && not_in_kdp) {
13547 vm_map_unlock_read(next_map);
13548 }
13549 }
13550 next_entry = tmp_entry;
13551 next_map = curr_map;
13552 next_depth = curr_depth;
13553 next_address = next_entry->vme_start;
13554 next_skip = curr_skip;
13555 next_skip += (next_address - curr_address);
13556 next_offset = curr_offset;
13557 next_offset += (next_address - curr_address);
13558 next_max_above = MIN(next_max_above, curr_max_above);
13559 next_max_above = MIN(next_max_above,
13560 next_entry->vme_end - next_address);
13561 next_max_below = MIN(next_max_below, curr_max_below);
13562 next_max_below = MIN(next_max_below,
13563 next_address - next_entry->vme_start);
13564 }
13565
13566 /*
13567 * "curr_max_{above,below}" allow us to keep track of the
13568 * portion of the submap that is actually mapped at this level:
13569 * the rest of that submap is irrelevant to us, since it's not
13570 * mapped here.
13571 * The relevant portion of the map starts at
13572 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13573 */
13574 curr_max_above = MIN(curr_max_above,
13575 curr_entry->vme_end - curr_address);
13576 curr_max_below = MIN(curr_max_below,
13577 curr_address - curr_entry->vme_start);
13578
13579 if (!curr_entry->is_sub_map ||
13580 curr_depth >= user_max_depth) {
13581 /*
13582 * We hit a leaf map or we reached the maximum depth
13583 * we could, so stop looking. Keep the current map
13584 * locked.
13585 */
13586 break;
13587 }
13588
13589 /*
13590 * Get down to the next submap level.
13591 */
13592
13593 /*
13594 * Lock the next level and unlock the current level,
13595 * unless we need to keep it locked to access the "next_entry"
13596 * later.
13597 */
13598 if (not_in_kdp) {
13599 vm_map_lock_read(VME_SUBMAP(curr_entry));
13600 }
13601 if (curr_map == next_map) {
13602 /* keep "next_map" locked in case we need it */
13603 } else {
13604 /* release this map */
13605 if (not_in_kdp) {
13606 vm_map_unlock_read(curr_map);
13607 }
13608 }
13609
13610 /*
13611 * Adjust the offset. "curr_entry" maps the submap
13612 * at relative address "curr_entry->vme_start" in the
13613 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13614 * bytes of the submap.
13615 * "curr_offset" always represents the offset of a virtual
13616 * address in the curr_map relative to the absolute address
13617 * space (i.e. the top-level VM map).
13618 */
13619 curr_offset +=
13620 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13621 curr_address = user_address + curr_offset;
13622 /* switch to the submap */
13623 curr_map = VME_SUBMAP(curr_entry);
13624 curr_depth++;
13625 curr_entry = NULL;
13626 }
13627
13628 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13629 // so probably should be a real 32b ID vs. ptr.
13630 // Current users just check for equality
13631
13632 if (curr_entry == NULL) {
13633 /* no VM region contains the address... */
13634
13635 if (do_region_footprint && /* we want footprint numbers */
13636 next_entry == NULL && /* & there are no more regions */
13637 /* & we haven't already provided our fake region: */
13638 user_address <= vm_map_last_entry(map)->vme_end) {
13639 ledger_amount_t ledger_resident, ledger_compressed;
13640
13641 /*
13642 * Add a fake memory region to account for
13643 * purgeable and/or ledger-tagged memory that
13644 * counts towards this task's memory footprint,
13645 * i.e. the resident/compressed pages of non-volatile
13646 * objects owned by that task.
13647 */
13648 task_ledgers_footprint(map->pmap->ledger,
13649 &ledger_resident,
13650 &ledger_compressed);
13651 if (ledger_resident + ledger_compressed == 0) {
13652 /* no purgeable memory usage to report */
13653 return KERN_INVALID_ADDRESS;
13654 }
13655 /* fake region to show nonvolatile footprint */
13656 if (look_for_pages) {
13657 submap_info->protection = VM_PROT_DEFAULT;
13658 submap_info->max_protection = VM_PROT_DEFAULT;
13659 submap_info->inheritance = VM_INHERIT_DEFAULT;
13660 submap_info->offset = 0;
13661 submap_info->user_tag = -1;
13662 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13663 submap_info->pages_shared_now_private = 0;
13664 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13665 submap_info->pages_dirtied = submap_info->pages_resident;
13666 submap_info->ref_count = 1;
13667 submap_info->shadow_depth = 0;
13668 submap_info->external_pager = 0;
13669 submap_info->share_mode = SM_PRIVATE;
13670 submap_info->is_submap = 0;
13671 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13672 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13673 submap_info->user_wired_count = 0;
13674 submap_info->pages_reusable = 0;
13675 } else {
13676 short_info->user_tag = -1;
13677 short_info->offset = 0;
13678 short_info->protection = VM_PROT_DEFAULT;
13679 short_info->inheritance = VM_INHERIT_DEFAULT;
13680 short_info->max_protection = VM_PROT_DEFAULT;
13681 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13682 short_info->user_wired_count = 0;
13683 short_info->is_submap = 0;
13684 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13685 short_info->external_pager = 0;
13686 short_info->shadow_depth = 0;
13687 short_info->share_mode = SM_PRIVATE;
13688 short_info->ref_count = 1;
13689 }
13690 *nesting_depth = 0;
13691 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13692 // *address = user_address;
13693 *address = vm_map_last_entry(map)->vme_end;
13694 return KERN_SUCCESS;
13695 }
13696
13697 if (next_entry == NULL) {
13698 /* ... and no VM region follows it either */
13699 return KERN_INVALID_ADDRESS;
13700 }
13701 /* ... gather info about the next VM region */
13702 curr_entry = next_entry;
13703 curr_map = next_map; /* still locked ... */
13704 curr_address = next_address;
13705 curr_skip = next_skip;
13706 curr_offset = next_offset;
13707 curr_depth = next_depth;
13708 curr_max_above = next_max_above;
13709 curr_max_below = next_max_below;
13710 } else {
13711 /* we won't need "next_entry" after all */
13712 if (next_entry != NULL) {
13713 /* release "next_map" */
13714 if (next_map != curr_map && not_in_kdp) {
13715 vm_map_unlock_read(next_map);
13716 }
13717 }
13718 }
13719 next_entry = NULL;
13720 next_map = NULL;
13721 next_offset = 0;
13722 next_skip = 0;
13723 next_depth = 0;
13724 next_max_below = -1;
13725 next_max_above = -1;
13726
13727 if (curr_entry->is_sub_map &&
13728 curr_depth < user_max_depth) {
13729 /*
13730 * We're not as deep as we could be: we must have
13731 * gone back up after not finding anything mapped
13732 * below the original top-level map entry's.
13733 * Let's move "curr_address" forward and recurse again.
13734 */
13735 user_address = curr_address;
13736 goto recurse_again;
13737 }
13738
13739 *nesting_depth = curr_depth;
13740 *size = curr_max_above + curr_max_below;
13741 *address = user_address + curr_skip - curr_max_below;
13742
13743 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13744 // so probably should be a real 32b ID vs. ptr.
13745 // Current users just check for equality
13746 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13747
13748 if (look_for_pages) {
13749 submap_info->user_tag = VME_ALIAS(curr_entry);
13750 submap_info->offset = VME_OFFSET(curr_entry);
13751 submap_info->protection = curr_entry->protection;
13752 submap_info->inheritance = curr_entry->inheritance;
13753 submap_info->max_protection = curr_entry->max_protection;
13754 submap_info->behavior = curr_entry->behavior;
13755 submap_info->user_wired_count = curr_entry->user_wired_count;
13756 submap_info->is_submap = curr_entry->is_sub_map;
13757 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13758 } else {
13759 short_info->user_tag = VME_ALIAS(curr_entry);
13760 short_info->offset = VME_OFFSET(curr_entry);
13761 short_info->protection = curr_entry->protection;
13762 short_info->inheritance = curr_entry->inheritance;
13763 short_info->max_protection = curr_entry->max_protection;
13764 short_info->behavior = curr_entry->behavior;
13765 short_info->user_wired_count = curr_entry->user_wired_count;
13766 short_info->is_submap = curr_entry->is_sub_map;
13767 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13768 }
13769
13770 extended.pages_resident = 0;
13771 extended.pages_swapped_out = 0;
13772 extended.pages_shared_now_private = 0;
13773 extended.pages_dirtied = 0;
13774 extended.pages_reusable = 0;
13775 extended.external_pager = 0;
13776 extended.shadow_depth = 0;
13777 extended.share_mode = SM_EMPTY;
13778 extended.ref_count = 0;
13779
13780 if (not_in_kdp) {
13781 if (!curr_entry->is_sub_map) {
13782 vm_map_offset_t range_start, range_end;
13783 range_start = MAX((curr_address - curr_max_below),
13784 curr_entry->vme_start);
13785 range_end = MIN((curr_address + curr_max_above),
13786 curr_entry->vme_end);
13787 vm_map_region_walk(curr_map,
13788 range_start,
13789 curr_entry,
13790 (VME_OFFSET(curr_entry) +
13791 (range_start -
13792 curr_entry->vme_start)),
13793 range_end - range_start,
13794 &extended,
13795 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13796 if (extended.external_pager &&
13797 extended.ref_count == 2 &&
13798 extended.share_mode == SM_SHARED) {
13799 extended.share_mode = SM_PRIVATE;
13800 }
13801 } else {
13802 if (curr_entry->use_pmap) {
13803 extended.share_mode = SM_TRUESHARED;
13804 } else {
13805 extended.share_mode = SM_PRIVATE;
13806 }
13807 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13808 }
13809 }
13810
13811 if (look_for_pages) {
13812 submap_info->pages_resident = extended.pages_resident;
13813 submap_info->pages_swapped_out = extended.pages_swapped_out;
13814 submap_info->pages_shared_now_private =
13815 extended.pages_shared_now_private;
13816 submap_info->pages_dirtied = extended.pages_dirtied;
13817 submap_info->external_pager = extended.external_pager;
13818 submap_info->shadow_depth = extended.shadow_depth;
13819 submap_info->share_mode = extended.share_mode;
13820 submap_info->ref_count = extended.ref_count;
13821
13822 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13823 submap_info->pages_reusable = extended.pages_reusable;
13824 }
13825 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13826 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13827 }
13828 } else {
13829 short_info->external_pager = extended.external_pager;
13830 short_info->shadow_depth = extended.shadow_depth;
13831 short_info->share_mode = extended.share_mode;
13832 short_info->ref_count = extended.ref_count;
13833 }
13834
13835 if (not_in_kdp) {
13836 vm_map_unlock_read(curr_map);
13837 }
13838
13839 return KERN_SUCCESS;
13840 }
13841
13842 /*
13843 * vm_region:
13844 *
13845 * User call to obtain information about a region in
13846 * a task's address map. Currently, only one flavor is
13847 * supported.
13848 *
13849 * XXX The reserved and behavior fields cannot be filled
13850 * in until the vm merge from the IK is completed, and
13851 * vm_reserve is implemented.
13852 */
13853
13854 kern_return_t
13855 vm_map_region(
13856 vm_map_t map,
13857 vm_map_offset_t *address, /* IN/OUT */
13858 vm_map_size_t *size, /* OUT */
13859 vm_region_flavor_t flavor, /* IN */
13860 vm_region_info_t info, /* OUT */
13861 mach_msg_type_number_t *count, /* IN/OUT */
13862 mach_port_t *object_name) /* OUT */
13863 {
13864 vm_map_entry_t tmp_entry;
13865 vm_map_entry_t entry;
13866 vm_map_offset_t start;
13867
13868 if (map == VM_MAP_NULL) {
13869 return KERN_INVALID_ARGUMENT;
13870 }
13871
13872 switch (flavor) {
13873 case VM_REGION_BASIC_INFO:
13874 /* legacy for old 32-bit objects info */
13875 {
13876 vm_region_basic_info_t basic;
13877
13878 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13879 return KERN_INVALID_ARGUMENT;
13880 }
13881
13882 basic = (vm_region_basic_info_t) info;
13883 *count = VM_REGION_BASIC_INFO_COUNT;
13884
13885 vm_map_lock_read(map);
13886
13887 start = *address;
13888 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13889 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13890 vm_map_unlock_read(map);
13891 return KERN_INVALID_ADDRESS;
13892 }
13893 } else {
13894 entry = tmp_entry;
13895 }
13896
13897 start = entry->vme_start;
13898
13899 basic->offset = (uint32_t)VME_OFFSET(entry);
13900 basic->protection = entry->protection;
13901 basic->inheritance = entry->inheritance;
13902 basic->max_protection = entry->max_protection;
13903 basic->behavior = entry->behavior;
13904 basic->user_wired_count = entry->user_wired_count;
13905 basic->reserved = entry->is_sub_map;
13906 *address = start;
13907 *size = (entry->vme_end - start);
13908
13909 if (object_name) {
13910 *object_name = IP_NULL;
13911 }
13912 if (entry->is_sub_map) {
13913 basic->shared = FALSE;
13914 } else {
13915 basic->shared = entry->is_shared;
13916 }
13917
13918 vm_map_unlock_read(map);
13919 return KERN_SUCCESS;
13920 }
13921
13922 case VM_REGION_BASIC_INFO_64:
13923 {
13924 vm_region_basic_info_64_t basic;
13925
13926 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13927 return KERN_INVALID_ARGUMENT;
13928 }
13929
13930 basic = (vm_region_basic_info_64_t) info;
13931 *count = VM_REGION_BASIC_INFO_COUNT_64;
13932
13933 vm_map_lock_read(map);
13934
13935 start = *address;
13936 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13937 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13938 vm_map_unlock_read(map);
13939 return KERN_INVALID_ADDRESS;
13940 }
13941 } else {
13942 entry = tmp_entry;
13943 }
13944
13945 start = entry->vme_start;
13946
13947 basic->offset = VME_OFFSET(entry);
13948 basic->protection = entry->protection;
13949 basic->inheritance = entry->inheritance;
13950 basic->max_protection = entry->max_protection;
13951 basic->behavior = entry->behavior;
13952 basic->user_wired_count = entry->user_wired_count;
13953 basic->reserved = entry->is_sub_map;
13954 *address = start;
13955 *size = (entry->vme_end - start);
13956
13957 if (object_name) {
13958 *object_name = IP_NULL;
13959 }
13960 if (entry->is_sub_map) {
13961 basic->shared = FALSE;
13962 } else {
13963 basic->shared = entry->is_shared;
13964 }
13965
13966 vm_map_unlock_read(map);
13967 return KERN_SUCCESS;
13968 }
13969 case VM_REGION_EXTENDED_INFO:
13970 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13971 return KERN_INVALID_ARGUMENT;
13972 }
13973 /*fallthru*/
13974 case VM_REGION_EXTENDED_INFO__legacy:
13975 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13976 return KERN_INVALID_ARGUMENT;
13977 }
13978
13979 {
13980 vm_region_extended_info_t extended;
13981 mach_msg_type_number_t original_count;
13982
13983 extended = (vm_region_extended_info_t) info;
13984
13985 vm_map_lock_read(map);
13986
13987 start = *address;
13988 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13989 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13990 vm_map_unlock_read(map);
13991 return KERN_INVALID_ADDRESS;
13992 }
13993 } else {
13994 entry = tmp_entry;
13995 }
13996 start = entry->vme_start;
13997
13998 extended->protection = entry->protection;
13999 extended->user_tag = VME_ALIAS(entry);
14000 extended->pages_resident = 0;
14001 extended->pages_swapped_out = 0;
14002 extended->pages_shared_now_private = 0;
14003 extended->pages_dirtied = 0;
14004 extended->external_pager = 0;
14005 extended->shadow_depth = 0;
14006
14007 original_count = *count;
14008 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14009 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14010 } else {
14011 extended->pages_reusable = 0;
14012 *count = VM_REGION_EXTENDED_INFO_COUNT;
14013 }
14014
14015 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14016
14017 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14018 extended->share_mode = SM_PRIVATE;
14019 }
14020
14021 if (object_name) {
14022 *object_name = IP_NULL;
14023 }
14024 *address = start;
14025 *size = (entry->vme_end - start);
14026
14027 vm_map_unlock_read(map);
14028 return KERN_SUCCESS;
14029 }
14030 case VM_REGION_TOP_INFO:
14031 {
14032 vm_region_top_info_t top;
14033
14034 if (*count < VM_REGION_TOP_INFO_COUNT) {
14035 return KERN_INVALID_ARGUMENT;
14036 }
14037
14038 top = (vm_region_top_info_t) info;
14039 *count = VM_REGION_TOP_INFO_COUNT;
14040
14041 vm_map_lock_read(map);
14042
14043 start = *address;
14044 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14045 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14046 vm_map_unlock_read(map);
14047 return KERN_INVALID_ADDRESS;
14048 }
14049 } else {
14050 entry = tmp_entry;
14051 }
14052 start = entry->vme_start;
14053
14054 top->private_pages_resident = 0;
14055 top->shared_pages_resident = 0;
14056
14057 vm_map_region_top_walk(entry, top);
14058
14059 if (object_name) {
14060 *object_name = IP_NULL;
14061 }
14062 *address = start;
14063 *size = (entry->vme_end - start);
14064
14065 vm_map_unlock_read(map);
14066 return KERN_SUCCESS;
14067 }
14068 default:
14069 return KERN_INVALID_ARGUMENT;
14070 }
14071 }
14072
14073 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14074 MIN((entry_size), \
14075 ((obj)->all_reusable ? \
14076 (obj)->wired_page_count : \
14077 (obj)->resident_page_count - (obj)->reusable_page_count))
14078
14079 void
14080 vm_map_region_top_walk(
14081 vm_map_entry_t entry,
14082 vm_region_top_info_t top)
14083 {
14084 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14085 top->share_mode = SM_EMPTY;
14086 top->ref_count = 0;
14087 top->obj_id = 0;
14088 return;
14089 }
14090
14091 {
14092 struct vm_object *obj, *tmp_obj;
14093 int ref_count;
14094 uint32_t entry_size;
14095
14096 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14097
14098 obj = VME_OBJECT(entry);
14099
14100 vm_object_lock(obj);
14101
14102 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14103 ref_count--;
14104 }
14105
14106 assert(obj->reusable_page_count <= obj->resident_page_count);
14107 if (obj->shadow) {
14108 if (ref_count == 1) {
14109 top->private_pages_resident =
14110 OBJ_RESIDENT_COUNT(obj, entry_size);
14111 } else {
14112 top->shared_pages_resident =
14113 OBJ_RESIDENT_COUNT(obj, entry_size);
14114 }
14115 top->ref_count = ref_count;
14116 top->share_mode = SM_COW;
14117
14118 while ((tmp_obj = obj->shadow)) {
14119 vm_object_lock(tmp_obj);
14120 vm_object_unlock(obj);
14121 obj = tmp_obj;
14122
14123 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14124 ref_count--;
14125 }
14126
14127 assert(obj->reusable_page_count <= obj->resident_page_count);
14128 top->shared_pages_resident +=
14129 OBJ_RESIDENT_COUNT(obj, entry_size);
14130 top->ref_count += ref_count - 1;
14131 }
14132 } else {
14133 if (entry->superpage_size) {
14134 top->share_mode = SM_LARGE_PAGE;
14135 top->shared_pages_resident = 0;
14136 top->private_pages_resident = entry_size;
14137 } else if (entry->needs_copy) {
14138 top->share_mode = SM_COW;
14139 top->shared_pages_resident =
14140 OBJ_RESIDENT_COUNT(obj, entry_size);
14141 } else {
14142 if (ref_count == 1 ||
14143 (ref_count == 2 && obj->named)) {
14144 top->share_mode = SM_PRIVATE;
14145 top->private_pages_resident =
14146 OBJ_RESIDENT_COUNT(obj,
14147 entry_size);
14148 } else {
14149 top->share_mode = SM_SHARED;
14150 top->shared_pages_resident =
14151 OBJ_RESIDENT_COUNT(obj,
14152 entry_size);
14153 }
14154 }
14155 top->ref_count = ref_count;
14156 }
14157 /* XXX K64: obj_id will be truncated */
14158 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14159
14160 vm_object_unlock(obj);
14161 }
14162 }
14163
14164 void
14165 vm_map_region_walk(
14166 vm_map_t map,
14167 vm_map_offset_t va,
14168 vm_map_entry_t entry,
14169 vm_object_offset_t offset,
14170 vm_object_size_t range,
14171 vm_region_extended_info_t extended,
14172 boolean_t look_for_pages,
14173 mach_msg_type_number_t count)
14174 {
14175 struct vm_object *obj, *tmp_obj;
14176 vm_map_offset_t last_offset;
14177 int i;
14178 int ref_count;
14179 struct vm_object *shadow_object;
14180 int shadow_depth;
14181 boolean_t do_region_footprint;
14182
14183 do_region_footprint = task_self_region_footprint();
14184
14185 if ((VME_OBJECT(entry) == 0) ||
14186 (entry->is_sub_map) ||
14187 (VME_OBJECT(entry)->phys_contiguous &&
14188 !entry->superpage_size)) {
14189 extended->share_mode = SM_EMPTY;
14190 extended->ref_count = 0;
14191 return;
14192 }
14193
14194 if (entry->superpage_size) {
14195 extended->shadow_depth = 0;
14196 extended->share_mode = SM_LARGE_PAGE;
14197 extended->ref_count = 1;
14198 extended->external_pager = 0;
14199 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14200 extended->shadow_depth = 0;
14201 return;
14202 }
14203
14204 obj = VME_OBJECT(entry);
14205
14206 vm_object_lock(obj);
14207
14208 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14209 ref_count--;
14210 }
14211
14212 if (look_for_pages) {
14213 for (last_offset = offset + range;
14214 offset < last_offset;
14215 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14216 if (do_region_footprint) {
14217 int disp;
14218
14219 disp = 0;
14220 if (map->has_corpse_footprint) {
14221 /*
14222 * Query the page info data we saved
14223 * while forking the corpse.
14224 */
14225 vm_map_corpse_footprint_query_page_info(
14226 map,
14227 va,
14228 &disp);
14229 } else {
14230 /*
14231 * Query the pmap.
14232 */
14233 pmap_query_page_info(map->pmap,
14234 va,
14235 &disp);
14236 }
14237 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14238 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14239 extended->pages_resident++;
14240 }
14241 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14242 extended->pages_reusable++;
14243 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14244 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14245 /* alternate accounting */
14246 } else {
14247 extended->pages_dirtied++;
14248 }
14249 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14250 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14251 /* alternate accounting */
14252 } else {
14253 extended->pages_swapped_out++;
14254 }
14255 }
14256 /* deal with alternate accounting */
14257 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14258 /* && not tagged as no-footprint? */
14259 VM_OBJECT_OWNER(obj) != NULL &&
14260 VM_OBJECT_OWNER(obj)->map == map) {
14261 if ((((va
14262 - entry->vme_start
14263 + VME_OFFSET(entry))
14264 / PAGE_SIZE) <
14265 (obj->resident_page_count +
14266 vm_compressor_pager_get_count(obj->pager)))) {
14267 /*
14268 * Non-volatile purgeable object owned
14269 * by this task: report the first
14270 * "#resident + #compressed" pages as
14271 * "resident" (to show that they
14272 * contribute to the footprint) but not
14273 * "dirty" (to avoid double-counting
14274 * with the fake "non-volatile" region
14275 * we'll report at the end of the
14276 * address space to account for all
14277 * (mapped or not) non-volatile memory
14278 * owned by this task.
14279 */
14280 extended->pages_resident++;
14281 }
14282 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14283 obj->purgable == VM_PURGABLE_EMPTY) &&
14284 /* && not tagged as no-footprint? */
14285 VM_OBJECT_OWNER(obj) != NULL &&
14286 VM_OBJECT_OWNER(obj)->map == map) {
14287 if ((((va
14288 - entry->vme_start
14289 + VME_OFFSET(entry))
14290 / PAGE_SIZE) <
14291 obj->wired_page_count)) {
14292 /*
14293 * Volatile|empty purgeable object owned
14294 * by this task: report the first
14295 * "#wired" pages as "resident" (to
14296 * show that they contribute to the
14297 * footprint) but not "dirty" (to avoid
14298 * double-counting with the fake
14299 * "non-volatile" region we'll report
14300 * at the end of the address space to
14301 * account for all (mapped or not)
14302 * non-volatile memory owned by this
14303 * task.
14304 */
14305 extended->pages_resident++;
14306 }
14307 } else if (obj->purgable != VM_PURGABLE_DENY) {
14308 /*
14309 * Pages from purgeable objects
14310 * will be reported as dirty
14311 * appropriately in an extra
14312 * fake memory region at the end of
14313 * the address space.
14314 */
14315 } else if (entry->iokit_acct) {
14316 /*
14317 * IOKit mappings are considered
14318 * as fully dirty for footprint's
14319 * sake.
14320 */
14321 extended->pages_dirtied++;
14322 }
14323 continue;
14324 }
14325
14326 vm_map_region_look_for_page(map, va, obj,
14327 offset, ref_count,
14328 0, extended, count);
14329 }
14330
14331 if (do_region_footprint) {
14332 goto collect_object_info;
14333 }
14334 } else {
14335 collect_object_info:
14336 shadow_object = obj->shadow;
14337 shadow_depth = 0;
14338
14339 if (!(obj->internal)) {
14340 extended->external_pager = 1;
14341 }
14342
14343 if (shadow_object != VM_OBJECT_NULL) {
14344 vm_object_lock(shadow_object);
14345 for (;
14346 shadow_object != VM_OBJECT_NULL;
14347 shadow_depth++) {
14348 vm_object_t next_shadow;
14349
14350 if (!(shadow_object->internal)) {
14351 extended->external_pager = 1;
14352 }
14353
14354 next_shadow = shadow_object->shadow;
14355 if (next_shadow) {
14356 vm_object_lock(next_shadow);
14357 }
14358 vm_object_unlock(shadow_object);
14359 shadow_object = next_shadow;
14360 }
14361 }
14362 extended->shadow_depth = shadow_depth;
14363 }
14364
14365 if (extended->shadow_depth || entry->needs_copy) {
14366 extended->share_mode = SM_COW;
14367 } else {
14368 if (ref_count == 1) {
14369 extended->share_mode = SM_PRIVATE;
14370 } else {
14371 if (obj->true_share) {
14372 extended->share_mode = SM_TRUESHARED;
14373 } else {
14374 extended->share_mode = SM_SHARED;
14375 }
14376 }
14377 }
14378 extended->ref_count = ref_count - extended->shadow_depth;
14379
14380 for (i = 0; i < extended->shadow_depth; i++) {
14381 if ((tmp_obj = obj->shadow) == 0) {
14382 break;
14383 }
14384 vm_object_lock(tmp_obj);
14385 vm_object_unlock(obj);
14386
14387 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14388 ref_count--;
14389 }
14390
14391 extended->ref_count += ref_count;
14392 obj = tmp_obj;
14393 }
14394 vm_object_unlock(obj);
14395
14396 if (extended->share_mode == SM_SHARED) {
14397 vm_map_entry_t cur;
14398 vm_map_entry_t last;
14399 int my_refs;
14400
14401 obj = VME_OBJECT(entry);
14402 last = vm_map_to_entry(map);
14403 my_refs = 0;
14404
14405 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14406 ref_count--;
14407 }
14408 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14409 my_refs += vm_map_region_count_obj_refs(cur, obj);
14410 }
14411
14412 if (my_refs == ref_count) {
14413 extended->share_mode = SM_PRIVATE_ALIASED;
14414 } else if (my_refs > 1) {
14415 extended->share_mode = SM_SHARED_ALIASED;
14416 }
14417 }
14418 }
14419
14420
14421 /* object is locked on entry and locked on return */
14422
14423
14424 static void
14425 vm_map_region_look_for_page(
14426 __unused vm_map_t map,
14427 __unused vm_map_offset_t va,
14428 vm_object_t object,
14429 vm_object_offset_t offset,
14430 int max_refcnt,
14431 int depth,
14432 vm_region_extended_info_t extended,
14433 mach_msg_type_number_t count)
14434 {
14435 vm_page_t p;
14436 vm_object_t shadow;
14437 int ref_count;
14438 vm_object_t caller_object;
14439
14440 shadow = object->shadow;
14441 caller_object = object;
14442
14443
14444 while (TRUE) {
14445 if (!(object->internal)) {
14446 extended->external_pager = 1;
14447 }
14448
14449 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14450 if (shadow && (max_refcnt == 1)) {
14451 extended->pages_shared_now_private++;
14452 }
14453
14454 if (!p->vmp_fictitious &&
14455 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14456 extended->pages_dirtied++;
14457 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14458 if (p->vmp_reusable || object->all_reusable) {
14459 extended->pages_reusable++;
14460 }
14461 }
14462
14463 extended->pages_resident++;
14464
14465 if (object != caller_object) {
14466 vm_object_unlock(object);
14467 }
14468
14469 return;
14470 }
14471 if (object->internal &&
14472 object->alive &&
14473 !object->terminating &&
14474 object->pager_ready) {
14475 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14476 == VM_EXTERNAL_STATE_EXISTS) {
14477 /* the pager has that page */
14478 extended->pages_swapped_out++;
14479 if (object != caller_object) {
14480 vm_object_unlock(object);
14481 }
14482 return;
14483 }
14484 }
14485
14486 if (shadow) {
14487 vm_object_lock(shadow);
14488
14489 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14490 ref_count--;
14491 }
14492
14493 if (++depth > extended->shadow_depth) {
14494 extended->shadow_depth = depth;
14495 }
14496
14497 if (ref_count > max_refcnt) {
14498 max_refcnt = ref_count;
14499 }
14500
14501 if (object != caller_object) {
14502 vm_object_unlock(object);
14503 }
14504
14505 offset = offset + object->vo_shadow_offset;
14506 object = shadow;
14507 shadow = object->shadow;
14508 continue;
14509 }
14510 if (object != caller_object) {
14511 vm_object_unlock(object);
14512 }
14513 break;
14514 }
14515 }
14516
14517 static int
14518 vm_map_region_count_obj_refs(
14519 vm_map_entry_t entry,
14520 vm_object_t object)
14521 {
14522 int ref_count;
14523 vm_object_t chk_obj;
14524 vm_object_t tmp_obj;
14525
14526 if (VME_OBJECT(entry) == 0) {
14527 return 0;
14528 }
14529
14530 if (entry->is_sub_map) {
14531 return 0;
14532 } else {
14533 ref_count = 0;
14534
14535 chk_obj = VME_OBJECT(entry);
14536 vm_object_lock(chk_obj);
14537
14538 while (chk_obj) {
14539 if (chk_obj == object) {
14540 ref_count++;
14541 }
14542 tmp_obj = chk_obj->shadow;
14543 if (tmp_obj) {
14544 vm_object_lock(tmp_obj);
14545 }
14546 vm_object_unlock(chk_obj);
14547
14548 chk_obj = tmp_obj;
14549 }
14550 }
14551 return ref_count;
14552 }
14553
14554
14555 /*
14556 * Routine: vm_map_simplify
14557 *
14558 * Description:
14559 * Attempt to simplify the map representation in
14560 * the vicinity of the given starting address.
14561 * Note:
14562 * This routine is intended primarily to keep the
14563 * kernel maps more compact -- they generally don't
14564 * benefit from the "expand a map entry" technology
14565 * at allocation time because the adjacent entry
14566 * is often wired down.
14567 */
14568 void
14569 vm_map_simplify_entry(
14570 vm_map_t map,
14571 vm_map_entry_t this_entry)
14572 {
14573 vm_map_entry_t prev_entry;
14574
14575 counter(c_vm_map_simplify_entry_called++);
14576
14577 prev_entry = this_entry->vme_prev;
14578
14579 if ((this_entry != vm_map_to_entry(map)) &&
14580 (prev_entry != vm_map_to_entry(map)) &&
14581
14582 (prev_entry->vme_end == this_entry->vme_start) &&
14583
14584 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14585 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14586 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14587 prev_entry->vme_start))
14588 == VME_OFFSET(this_entry)) &&
14589
14590 (prev_entry->behavior == this_entry->behavior) &&
14591 (prev_entry->needs_copy == this_entry->needs_copy) &&
14592 (prev_entry->protection == this_entry->protection) &&
14593 (prev_entry->max_protection == this_entry->max_protection) &&
14594 (prev_entry->inheritance == this_entry->inheritance) &&
14595 (prev_entry->use_pmap == this_entry->use_pmap) &&
14596 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14597 (prev_entry->no_cache == this_entry->no_cache) &&
14598 (prev_entry->permanent == this_entry->permanent) &&
14599 (prev_entry->map_aligned == this_entry->map_aligned) &&
14600 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14601 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14602 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14603 /* from_reserved_zone: OK if that field doesn't match */
14604 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14605 (prev_entry->vme_resilient_codesign ==
14606 this_entry->vme_resilient_codesign) &&
14607 (prev_entry->vme_resilient_media ==
14608 this_entry->vme_resilient_media) &&
14609 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14610
14611 (prev_entry->wired_count == this_entry->wired_count) &&
14612 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14613
14614 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14615 (prev_entry->in_transition == FALSE) &&
14616 (this_entry->in_transition == FALSE) &&
14617 (prev_entry->needs_wakeup == FALSE) &&
14618 (this_entry->needs_wakeup == FALSE) &&
14619 (prev_entry->is_shared == FALSE) &&
14620 (this_entry->is_shared == FALSE) &&
14621 (prev_entry->superpage_size == FALSE) &&
14622 (this_entry->superpage_size == FALSE)
14623 ) {
14624 vm_map_store_entry_unlink(map, prev_entry);
14625 assert(prev_entry->vme_start < this_entry->vme_end);
14626 if (prev_entry->map_aligned) {
14627 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14628 VM_MAP_PAGE_MASK(map)));
14629 }
14630 this_entry->vme_start = prev_entry->vme_start;
14631 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14632
14633 if (map->holelistenabled) {
14634 vm_map_store_update_first_free(map, this_entry, TRUE);
14635 }
14636
14637 if (prev_entry->is_sub_map) {
14638 vm_map_deallocate(VME_SUBMAP(prev_entry));
14639 } else {
14640 vm_object_deallocate(VME_OBJECT(prev_entry));
14641 }
14642 vm_map_entry_dispose(map, prev_entry);
14643 SAVE_HINT_MAP_WRITE(map, this_entry);
14644 counter(c_vm_map_simplified++);
14645 }
14646 }
14647
14648 void
14649 vm_map_simplify(
14650 vm_map_t map,
14651 vm_map_offset_t start)
14652 {
14653 vm_map_entry_t this_entry;
14654
14655 vm_map_lock(map);
14656 if (vm_map_lookup_entry(map, start, &this_entry)) {
14657 vm_map_simplify_entry(map, this_entry);
14658 vm_map_simplify_entry(map, this_entry->vme_next);
14659 }
14660 counter(c_vm_map_simplify_called++);
14661 vm_map_unlock(map);
14662 }
14663
14664 static void
14665 vm_map_simplify_range(
14666 vm_map_t map,
14667 vm_map_offset_t start,
14668 vm_map_offset_t end)
14669 {
14670 vm_map_entry_t entry;
14671
14672 /*
14673 * The map should be locked (for "write") by the caller.
14674 */
14675
14676 if (start >= end) {
14677 /* invalid address range */
14678 return;
14679 }
14680
14681 start = vm_map_trunc_page(start,
14682 VM_MAP_PAGE_MASK(map));
14683 end = vm_map_round_page(end,
14684 VM_MAP_PAGE_MASK(map));
14685
14686 if (!vm_map_lookup_entry(map, start, &entry)) {
14687 /* "start" is not mapped and "entry" ends before "start" */
14688 if (entry == vm_map_to_entry(map)) {
14689 /* start with first entry in the map */
14690 entry = vm_map_first_entry(map);
14691 } else {
14692 /* start with next entry */
14693 entry = entry->vme_next;
14694 }
14695 }
14696
14697 while (entry != vm_map_to_entry(map) &&
14698 entry->vme_start <= end) {
14699 /* try and coalesce "entry" with its previous entry */
14700 vm_map_simplify_entry(map, entry);
14701 entry = entry->vme_next;
14702 }
14703 }
14704
14705
14706 /*
14707 * Routine: vm_map_machine_attribute
14708 * Purpose:
14709 * Provide machine-specific attributes to mappings,
14710 * such as cachability etc. for machines that provide
14711 * them. NUMA architectures and machines with big/strange
14712 * caches will use this.
14713 * Note:
14714 * Responsibilities for locking and checking are handled here,
14715 * everything else in the pmap module. If any non-volatile
14716 * information must be kept, the pmap module should handle
14717 * it itself. [This assumes that attributes do not
14718 * need to be inherited, which seems ok to me]
14719 */
14720 kern_return_t
14721 vm_map_machine_attribute(
14722 vm_map_t map,
14723 vm_map_offset_t start,
14724 vm_map_offset_t end,
14725 vm_machine_attribute_t attribute,
14726 vm_machine_attribute_val_t* value) /* IN/OUT */
14727 {
14728 kern_return_t ret;
14729 vm_map_size_t sync_size;
14730 vm_map_entry_t entry;
14731
14732 if (start < vm_map_min(map) || end > vm_map_max(map)) {
14733 return KERN_INVALID_ADDRESS;
14734 }
14735
14736 /* Figure how much memory we need to flush (in page increments) */
14737 sync_size = end - start;
14738
14739 vm_map_lock(map);
14740
14741 if (attribute != MATTR_CACHE) {
14742 /* If we don't have to find physical addresses, we */
14743 /* don't have to do an explicit traversal here. */
14744 ret = pmap_attribute(map->pmap, start, end - start,
14745 attribute, value);
14746 vm_map_unlock(map);
14747 return ret;
14748 }
14749
14750 ret = KERN_SUCCESS; /* Assume it all worked */
14751
14752 while (sync_size) {
14753 if (vm_map_lookup_entry(map, start, &entry)) {
14754 vm_map_size_t sub_size;
14755 if ((entry->vme_end - start) > sync_size) {
14756 sub_size = sync_size;
14757 sync_size = 0;
14758 } else {
14759 sub_size = entry->vme_end - start;
14760 sync_size -= sub_size;
14761 }
14762 if (entry->is_sub_map) {
14763 vm_map_offset_t sub_start;
14764 vm_map_offset_t sub_end;
14765
14766 sub_start = (start - entry->vme_start)
14767 + VME_OFFSET(entry);
14768 sub_end = sub_start + sub_size;
14769 vm_map_machine_attribute(
14770 VME_SUBMAP(entry),
14771 sub_start,
14772 sub_end,
14773 attribute, value);
14774 } else {
14775 if (VME_OBJECT(entry)) {
14776 vm_page_t m;
14777 vm_object_t object;
14778 vm_object_t base_object;
14779 vm_object_t last_object;
14780 vm_object_offset_t offset;
14781 vm_object_offset_t base_offset;
14782 vm_map_size_t range;
14783 range = sub_size;
14784 offset = (start - entry->vme_start)
14785 + VME_OFFSET(entry);
14786 base_offset = offset;
14787 object = VME_OBJECT(entry);
14788 base_object = object;
14789 last_object = NULL;
14790
14791 vm_object_lock(object);
14792
14793 while (range) {
14794 m = vm_page_lookup(
14795 object, offset);
14796
14797 if (m && !m->vmp_fictitious) {
14798 ret =
14799 pmap_attribute_cache_sync(
14800 VM_PAGE_GET_PHYS_PAGE(m),
14801 PAGE_SIZE,
14802 attribute, value);
14803 } else if (object->shadow) {
14804 offset = offset + object->vo_shadow_offset;
14805 last_object = object;
14806 object = object->shadow;
14807 vm_object_lock(last_object->shadow);
14808 vm_object_unlock(last_object);
14809 continue;
14810 }
14811 range -= PAGE_SIZE;
14812
14813 if (base_object != object) {
14814 vm_object_unlock(object);
14815 vm_object_lock(base_object);
14816 object = base_object;
14817 }
14818 /* Bump to the next page */
14819 base_offset += PAGE_SIZE;
14820 offset = base_offset;
14821 }
14822 vm_object_unlock(object);
14823 }
14824 }
14825 start += sub_size;
14826 } else {
14827 vm_map_unlock(map);
14828 return KERN_FAILURE;
14829 }
14830 }
14831
14832 vm_map_unlock(map);
14833
14834 return ret;
14835 }
14836
14837 /*
14838 * vm_map_behavior_set:
14839 *
14840 * Sets the paging reference behavior of the specified address
14841 * range in the target map. Paging reference behavior affects
14842 * how pagein operations resulting from faults on the map will be
14843 * clustered.
14844 */
14845 kern_return_t
14846 vm_map_behavior_set(
14847 vm_map_t map,
14848 vm_map_offset_t start,
14849 vm_map_offset_t end,
14850 vm_behavior_t new_behavior)
14851 {
14852 vm_map_entry_t entry;
14853 vm_map_entry_t temp_entry;
14854
14855 if (start > end ||
14856 start < vm_map_min(map) ||
14857 end > vm_map_max(map)) {
14858 return KERN_NO_SPACE;
14859 }
14860
14861 switch (new_behavior) {
14862 /*
14863 * This first block of behaviors all set a persistent state on the specified
14864 * memory range. All we have to do here is to record the desired behavior
14865 * in the vm_map_entry_t's.
14866 */
14867
14868 case VM_BEHAVIOR_DEFAULT:
14869 case VM_BEHAVIOR_RANDOM:
14870 case VM_BEHAVIOR_SEQUENTIAL:
14871 case VM_BEHAVIOR_RSEQNTL:
14872 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14873 vm_map_lock(map);
14874
14875 /*
14876 * The entire address range must be valid for the map.
14877 * Note that vm_map_range_check() does a
14878 * vm_map_lookup_entry() internally and returns the
14879 * entry containing the start of the address range if
14880 * the entire range is valid.
14881 */
14882 if (vm_map_range_check(map, start, end, &temp_entry)) {
14883 entry = temp_entry;
14884 vm_map_clip_start(map, entry, start);
14885 } else {
14886 vm_map_unlock(map);
14887 return KERN_INVALID_ADDRESS;
14888 }
14889
14890 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14891 vm_map_clip_end(map, entry, end);
14892 if (entry->is_sub_map) {
14893 assert(!entry->use_pmap);
14894 }
14895
14896 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14897 entry->zero_wired_pages = TRUE;
14898 } else {
14899 entry->behavior = new_behavior;
14900 }
14901 entry = entry->vme_next;
14902 }
14903
14904 vm_map_unlock(map);
14905 break;
14906
14907 /*
14908 * The rest of these are different from the above in that they cause
14909 * an immediate action to take place as opposed to setting a behavior that
14910 * affects future actions.
14911 */
14912
14913 case VM_BEHAVIOR_WILLNEED:
14914 return vm_map_willneed(map, start, end);
14915
14916 case VM_BEHAVIOR_DONTNEED:
14917 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14918
14919 case VM_BEHAVIOR_FREE:
14920 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14921
14922 case VM_BEHAVIOR_REUSABLE:
14923 return vm_map_reusable_pages(map, start, end);
14924
14925 case VM_BEHAVIOR_REUSE:
14926 return vm_map_reuse_pages(map, start, end);
14927
14928 case VM_BEHAVIOR_CAN_REUSE:
14929 return vm_map_can_reuse(map, start, end);
14930
14931 #if MACH_ASSERT
14932 case VM_BEHAVIOR_PAGEOUT:
14933 return vm_map_pageout(map, start, end);
14934 #endif /* MACH_ASSERT */
14935
14936 default:
14937 return KERN_INVALID_ARGUMENT;
14938 }
14939
14940 return KERN_SUCCESS;
14941 }
14942
14943
14944 /*
14945 * Internals for madvise(MADV_WILLNEED) system call.
14946 *
14947 * The implementation is to do:-
14948 * a) read-ahead if the mapping corresponds to a mapped regular file
14949 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14950 */
14951
14952
14953 static kern_return_t
14954 vm_map_willneed(
14955 vm_map_t map,
14956 vm_map_offset_t start,
14957 vm_map_offset_t end
14958 )
14959 {
14960 vm_map_entry_t entry;
14961 vm_object_t object;
14962 memory_object_t pager;
14963 struct vm_object_fault_info fault_info = {};
14964 kern_return_t kr;
14965 vm_object_size_t len;
14966 vm_object_offset_t offset;
14967
14968 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14969 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14970 fault_info.stealth = TRUE;
14971
14972 /*
14973 * The MADV_WILLNEED operation doesn't require any changes to the
14974 * vm_map_entry_t's, so the read lock is sufficient.
14975 */
14976
14977 vm_map_lock_read(map);
14978
14979 /*
14980 * The madvise semantics require that the address range be fully
14981 * allocated with no holes. Otherwise, we're required to return
14982 * an error.
14983 */
14984
14985 if (!vm_map_range_check(map, start, end, &entry)) {
14986 vm_map_unlock_read(map);
14987 return KERN_INVALID_ADDRESS;
14988 }
14989
14990 /*
14991 * Examine each vm_map_entry_t in the range.
14992 */
14993 for (; entry != vm_map_to_entry(map) && start < end;) {
14994 /*
14995 * The first time through, the start address could be anywhere
14996 * within the vm_map_entry we found. So adjust the offset to
14997 * correspond. After that, the offset will always be zero to
14998 * correspond to the beginning of the current vm_map_entry.
14999 */
15000 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15001
15002 /*
15003 * Set the length so we don't go beyond the end of the
15004 * map_entry or beyond the end of the range we were given.
15005 * This range could span also multiple map entries all of which
15006 * map different files, so make sure we only do the right amount
15007 * of I/O for each object. Note that it's possible for there
15008 * to be multiple map entries all referring to the same object
15009 * but with different page permissions, but it's not worth
15010 * trying to optimize that case.
15011 */
15012 len = MIN(entry->vme_end - start, end - start);
15013
15014 if ((vm_size_t) len != len) {
15015 /* 32-bit overflow */
15016 len = (vm_size_t) (0 - PAGE_SIZE);
15017 }
15018 fault_info.cluster_size = (vm_size_t) len;
15019 fault_info.lo_offset = offset;
15020 fault_info.hi_offset = offset + len;
15021 fault_info.user_tag = VME_ALIAS(entry);
15022 fault_info.pmap_options = 0;
15023 if (entry->iokit_acct ||
15024 (!entry->is_sub_map && !entry->use_pmap)) {
15025 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15026 }
15027
15028 /*
15029 * If the entry is a submap OR there's no read permission
15030 * to this mapping, then just skip it.
15031 */
15032 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15033 entry = entry->vme_next;
15034 start = entry->vme_start;
15035 continue;
15036 }
15037
15038 object = VME_OBJECT(entry);
15039
15040 if (object == NULL ||
15041 (object && object->internal)) {
15042 /*
15043 * Memory range backed by anonymous memory.
15044 */
15045 vm_size_t region_size = 0, effective_page_size = 0;
15046 vm_map_offset_t addr = 0, effective_page_mask = 0;
15047
15048 region_size = len;
15049 addr = start;
15050
15051 effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15052 effective_page_size = effective_page_mask + 1;
15053
15054 vm_map_unlock_read(map);
15055
15056 while (region_size) {
15057 vm_pre_fault(
15058 vm_map_trunc_page(addr, effective_page_mask),
15059 VM_PROT_READ | VM_PROT_WRITE);
15060
15061 region_size -= effective_page_size;
15062 addr += effective_page_size;
15063 }
15064 } else {
15065 /*
15066 * Find the file object backing this map entry. If there is
15067 * none, then we simply ignore the "will need" advice for this
15068 * entry and go on to the next one.
15069 */
15070 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15071 entry = entry->vme_next;
15072 start = entry->vme_start;
15073 continue;
15074 }
15075
15076 vm_object_paging_begin(object);
15077 pager = object->pager;
15078 vm_object_unlock(object);
15079
15080 /*
15081 * The data_request() could take a long time, so let's
15082 * release the map lock to avoid blocking other threads.
15083 */
15084 vm_map_unlock_read(map);
15085
15086 /*
15087 * Get the data from the object asynchronously.
15088 *
15089 * Note that memory_object_data_request() places limits on the
15090 * amount of I/O it will do. Regardless of the len we
15091 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15092 * silently truncates the len to that size. This isn't
15093 * necessarily bad since madvise shouldn't really be used to
15094 * page in unlimited amounts of data. Other Unix variants
15095 * limit the willneed case as well. If this turns out to be an
15096 * issue for developers, then we can always adjust the policy
15097 * here and still be backwards compatible since this is all
15098 * just "advice".
15099 */
15100 kr = memory_object_data_request(
15101 pager,
15102 offset + object->paging_offset,
15103 0, /* ignored */
15104 VM_PROT_READ,
15105 (memory_object_fault_info_t)&fault_info);
15106
15107 vm_object_lock(object);
15108 vm_object_paging_end(object);
15109 vm_object_unlock(object);
15110
15111 /*
15112 * If we couldn't do the I/O for some reason, just give up on
15113 * the madvise. We still return success to the user since
15114 * madvise isn't supposed to fail when the advice can't be
15115 * taken.
15116 */
15117
15118 if (kr != KERN_SUCCESS) {
15119 return KERN_SUCCESS;
15120 }
15121 }
15122
15123 start += len;
15124 if (start >= end) {
15125 /* done */
15126 return KERN_SUCCESS;
15127 }
15128
15129 /* look up next entry */
15130 vm_map_lock_read(map);
15131 if (!vm_map_lookup_entry(map, start, &entry)) {
15132 /*
15133 * There's a new hole in the address range.
15134 */
15135 vm_map_unlock_read(map);
15136 return KERN_INVALID_ADDRESS;
15137 }
15138 }
15139
15140 vm_map_unlock_read(map);
15141 return KERN_SUCCESS;
15142 }
15143
15144 static boolean_t
15145 vm_map_entry_is_reusable(
15146 vm_map_entry_t entry)
15147 {
15148 /* Only user map entries */
15149
15150 vm_object_t object;
15151
15152 if (entry->is_sub_map) {
15153 return FALSE;
15154 }
15155
15156 switch (VME_ALIAS(entry)) {
15157 case VM_MEMORY_MALLOC:
15158 case VM_MEMORY_MALLOC_SMALL:
15159 case VM_MEMORY_MALLOC_LARGE:
15160 case VM_MEMORY_REALLOC:
15161 case VM_MEMORY_MALLOC_TINY:
15162 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15163 case VM_MEMORY_MALLOC_LARGE_REUSED:
15164 /*
15165 * This is a malloc() memory region: check if it's still
15166 * in its original state and can be re-used for more
15167 * malloc() allocations.
15168 */
15169 break;
15170 default:
15171 /*
15172 * Not a malloc() memory region: let the caller decide if
15173 * it's re-usable.
15174 */
15175 return TRUE;
15176 }
15177
15178 if (/*entry->is_shared ||*/
15179 entry->is_sub_map ||
15180 entry->in_transition ||
15181 entry->protection != VM_PROT_DEFAULT ||
15182 entry->max_protection != VM_PROT_ALL ||
15183 entry->inheritance != VM_INHERIT_DEFAULT ||
15184 entry->no_cache ||
15185 entry->permanent ||
15186 entry->superpage_size != FALSE ||
15187 entry->zero_wired_pages ||
15188 entry->wired_count != 0 ||
15189 entry->user_wired_count != 0) {
15190 return FALSE;
15191 }
15192
15193 object = VME_OBJECT(entry);
15194 if (object == VM_OBJECT_NULL) {
15195 return TRUE;
15196 }
15197 if (
15198 #if 0
15199 /*
15200 * Let's proceed even if the VM object is potentially
15201 * shared.
15202 * We check for this later when processing the actual
15203 * VM pages, so the contents will be safe if shared.
15204 *
15205 * But we can still mark this memory region as "reusable" to
15206 * acknowledge that the caller did let us know that the memory
15207 * could be re-used and should not be penalized for holding
15208 * on to it. This allows its "resident size" to not include
15209 * the reusable range.
15210 */
15211 object->ref_count == 1 &&
15212 #endif
15213 object->wired_page_count == 0 &&
15214 object->copy == VM_OBJECT_NULL &&
15215 object->shadow == VM_OBJECT_NULL &&
15216 object->internal &&
15217 object->purgable == VM_PURGABLE_DENY &&
15218 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15219 !object->true_share &&
15220 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15221 !object->code_signed) {
15222 return TRUE;
15223 }
15224 return FALSE;
15225 }
15226
15227 static kern_return_t
15228 vm_map_reuse_pages(
15229 vm_map_t map,
15230 vm_map_offset_t start,
15231 vm_map_offset_t end)
15232 {
15233 vm_map_entry_t entry;
15234 vm_object_t object;
15235 vm_object_offset_t start_offset, end_offset;
15236
15237 /*
15238 * The MADV_REUSE operation doesn't require any changes to the
15239 * vm_map_entry_t's, so the read lock is sufficient.
15240 */
15241
15242 vm_map_lock_read(map);
15243 assert(map->pmap != kernel_pmap); /* protect alias access */
15244
15245 /*
15246 * The madvise semantics require that the address range be fully
15247 * allocated with no holes. Otherwise, we're required to return
15248 * an error.
15249 */
15250
15251 if (!vm_map_range_check(map, start, end, &entry)) {
15252 vm_map_unlock_read(map);
15253 vm_page_stats_reusable.reuse_pages_failure++;
15254 return KERN_INVALID_ADDRESS;
15255 }
15256
15257 /*
15258 * Examine each vm_map_entry_t in the range.
15259 */
15260 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15261 entry = entry->vme_next) {
15262 /*
15263 * Sanity check on the VM map entry.
15264 */
15265 if (!vm_map_entry_is_reusable(entry)) {
15266 vm_map_unlock_read(map);
15267 vm_page_stats_reusable.reuse_pages_failure++;
15268 return KERN_INVALID_ADDRESS;
15269 }
15270
15271 /*
15272 * The first time through, the start address could be anywhere
15273 * within the vm_map_entry we found. So adjust the offset to
15274 * correspond.
15275 */
15276 if (entry->vme_start < start) {
15277 start_offset = start - entry->vme_start;
15278 } else {
15279 start_offset = 0;
15280 }
15281 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15282 start_offset += VME_OFFSET(entry);
15283 end_offset += VME_OFFSET(entry);
15284
15285 assert(!entry->is_sub_map);
15286 object = VME_OBJECT(entry);
15287 if (object != VM_OBJECT_NULL) {
15288 vm_object_lock(object);
15289 vm_object_reuse_pages(object, start_offset, end_offset,
15290 TRUE);
15291 vm_object_unlock(object);
15292 }
15293
15294 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15295 /*
15296 * XXX
15297 * We do not hold the VM map exclusively here.
15298 * The "alias" field is not that critical, so it's
15299 * safe to update it here, as long as it is the only
15300 * one that can be modified while holding the VM map
15301 * "shared".
15302 */
15303 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15304 }
15305 }
15306
15307 vm_map_unlock_read(map);
15308 vm_page_stats_reusable.reuse_pages_success++;
15309 return KERN_SUCCESS;
15310 }
15311
15312
15313 static kern_return_t
15314 vm_map_reusable_pages(
15315 vm_map_t map,
15316 vm_map_offset_t start,
15317 vm_map_offset_t end)
15318 {
15319 vm_map_entry_t entry;
15320 vm_object_t object;
15321 vm_object_offset_t start_offset, end_offset;
15322 vm_map_offset_t pmap_offset;
15323
15324 /*
15325 * The MADV_REUSABLE operation doesn't require any changes to the
15326 * vm_map_entry_t's, so the read lock is sufficient.
15327 */
15328
15329 vm_map_lock_read(map);
15330 assert(map->pmap != kernel_pmap); /* protect alias access */
15331
15332 /*
15333 * The madvise semantics require that the address range be fully
15334 * allocated with no holes. Otherwise, we're required to return
15335 * an error.
15336 */
15337
15338 if (!vm_map_range_check(map, start, end, &entry)) {
15339 vm_map_unlock_read(map);
15340 vm_page_stats_reusable.reusable_pages_failure++;
15341 return KERN_INVALID_ADDRESS;
15342 }
15343
15344 /*
15345 * Examine each vm_map_entry_t in the range.
15346 */
15347 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15348 entry = entry->vme_next) {
15349 int kill_pages = 0;
15350
15351 /*
15352 * Sanity check on the VM map entry.
15353 */
15354 if (!vm_map_entry_is_reusable(entry)) {
15355 vm_map_unlock_read(map);
15356 vm_page_stats_reusable.reusable_pages_failure++;
15357 return KERN_INVALID_ADDRESS;
15358 }
15359
15360 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15361 /* not writable: can't discard contents */
15362 vm_map_unlock_read(map);
15363 vm_page_stats_reusable.reusable_nonwritable++;
15364 vm_page_stats_reusable.reusable_pages_failure++;
15365 return KERN_PROTECTION_FAILURE;
15366 }
15367
15368 /*
15369 * The first time through, the start address could be anywhere
15370 * within the vm_map_entry we found. So adjust the offset to
15371 * correspond.
15372 */
15373 if (entry->vme_start < start) {
15374 start_offset = start - entry->vme_start;
15375 pmap_offset = start;
15376 } else {
15377 start_offset = 0;
15378 pmap_offset = entry->vme_start;
15379 }
15380 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15381 start_offset += VME_OFFSET(entry);
15382 end_offset += VME_OFFSET(entry);
15383
15384 assert(!entry->is_sub_map);
15385 object = VME_OBJECT(entry);
15386 if (object == VM_OBJECT_NULL) {
15387 continue;
15388 }
15389
15390
15391 vm_object_lock(object);
15392 if (((object->ref_count == 1) ||
15393 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15394 object->copy == VM_OBJECT_NULL)) &&
15395 object->shadow == VM_OBJECT_NULL &&
15396 /*
15397 * "iokit_acct" entries are billed for their virtual size
15398 * (rather than for their resident pages only), so they
15399 * wouldn't benefit from making pages reusable, and it
15400 * would be hard to keep track of pages that are both
15401 * "iokit_acct" and "reusable" in the pmap stats and
15402 * ledgers.
15403 */
15404 !(entry->iokit_acct ||
15405 (!entry->is_sub_map && !entry->use_pmap))) {
15406 if (object->ref_count != 1) {
15407 vm_page_stats_reusable.reusable_shared++;
15408 }
15409 kill_pages = 1;
15410 } else {
15411 kill_pages = -1;
15412 }
15413 if (kill_pages != -1) {
15414 vm_object_deactivate_pages(object,
15415 start_offset,
15416 end_offset - start_offset,
15417 kill_pages,
15418 TRUE /*reusable_pages*/,
15419 map->pmap,
15420 pmap_offset);
15421 } else {
15422 vm_page_stats_reusable.reusable_pages_shared++;
15423 }
15424 vm_object_unlock(object);
15425
15426 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15427 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15428 /*
15429 * XXX
15430 * We do not hold the VM map exclusively here.
15431 * The "alias" field is not that critical, so it's
15432 * safe to update it here, as long as it is the only
15433 * one that can be modified while holding the VM map
15434 * "shared".
15435 */
15436 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15437 }
15438 }
15439
15440 vm_map_unlock_read(map);
15441 vm_page_stats_reusable.reusable_pages_success++;
15442 return KERN_SUCCESS;
15443 }
15444
15445
15446 static kern_return_t
15447 vm_map_can_reuse(
15448 vm_map_t map,
15449 vm_map_offset_t start,
15450 vm_map_offset_t end)
15451 {
15452 vm_map_entry_t entry;
15453
15454 /*
15455 * The MADV_REUSABLE operation doesn't require any changes to the
15456 * vm_map_entry_t's, so the read lock is sufficient.
15457 */
15458
15459 vm_map_lock_read(map);
15460 assert(map->pmap != kernel_pmap); /* protect alias access */
15461
15462 /*
15463 * The madvise semantics require that the address range be fully
15464 * allocated with no holes. Otherwise, we're required to return
15465 * an error.
15466 */
15467
15468 if (!vm_map_range_check(map, start, end, &entry)) {
15469 vm_map_unlock_read(map);
15470 vm_page_stats_reusable.can_reuse_failure++;
15471 return KERN_INVALID_ADDRESS;
15472 }
15473
15474 /*
15475 * Examine each vm_map_entry_t in the range.
15476 */
15477 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15478 entry = entry->vme_next) {
15479 /*
15480 * Sanity check on the VM map entry.
15481 */
15482 if (!vm_map_entry_is_reusable(entry)) {
15483 vm_map_unlock_read(map);
15484 vm_page_stats_reusable.can_reuse_failure++;
15485 return KERN_INVALID_ADDRESS;
15486 }
15487 }
15488
15489 vm_map_unlock_read(map);
15490 vm_page_stats_reusable.can_reuse_success++;
15491 return KERN_SUCCESS;
15492 }
15493
15494
15495 #if MACH_ASSERT
15496 static kern_return_t
15497 vm_map_pageout(
15498 vm_map_t map,
15499 vm_map_offset_t start,
15500 vm_map_offset_t end)
15501 {
15502 vm_map_entry_t entry;
15503
15504 /*
15505 * The MADV_PAGEOUT operation doesn't require any changes to the
15506 * vm_map_entry_t's, so the read lock is sufficient.
15507 */
15508
15509 vm_map_lock_read(map);
15510
15511 /*
15512 * The madvise semantics require that the address range be fully
15513 * allocated with no holes. Otherwise, we're required to return
15514 * an error.
15515 */
15516
15517 if (!vm_map_range_check(map, start, end, &entry)) {
15518 vm_map_unlock_read(map);
15519 return KERN_INVALID_ADDRESS;
15520 }
15521
15522 /*
15523 * Examine each vm_map_entry_t in the range.
15524 */
15525 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15526 entry = entry->vme_next) {
15527 vm_object_t object;
15528
15529 /*
15530 * Sanity check on the VM map entry.
15531 */
15532 if (entry->is_sub_map) {
15533 vm_map_t submap;
15534 vm_map_offset_t submap_start;
15535 vm_map_offset_t submap_end;
15536 vm_map_entry_t submap_entry;
15537
15538 submap = VME_SUBMAP(entry);
15539 submap_start = VME_OFFSET(entry);
15540 submap_end = submap_start + (entry->vme_end -
15541 entry->vme_start);
15542
15543 vm_map_lock_read(submap);
15544
15545 if (!vm_map_range_check(submap,
15546 submap_start,
15547 submap_end,
15548 &submap_entry)) {
15549 vm_map_unlock_read(submap);
15550 vm_map_unlock_read(map);
15551 return KERN_INVALID_ADDRESS;
15552 }
15553
15554 object = VME_OBJECT(submap_entry);
15555 if (submap_entry->is_sub_map ||
15556 object == VM_OBJECT_NULL ||
15557 !object->internal) {
15558 vm_map_unlock_read(submap);
15559 continue;
15560 }
15561
15562 vm_object_pageout(object);
15563
15564 vm_map_unlock_read(submap);
15565 submap = VM_MAP_NULL;
15566 submap_entry = VM_MAP_ENTRY_NULL;
15567 continue;
15568 }
15569
15570 object = VME_OBJECT(entry);
15571 if (entry->is_sub_map ||
15572 object == VM_OBJECT_NULL ||
15573 !object->internal) {
15574 continue;
15575 }
15576
15577 vm_object_pageout(object);
15578 }
15579
15580 vm_map_unlock_read(map);
15581 return KERN_SUCCESS;
15582 }
15583 #endif /* MACH_ASSERT */
15584
15585
15586 /*
15587 * Routine: vm_map_entry_insert
15588 *
15589 * Description: This routine inserts a new vm_entry in a locked map.
15590 */
15591 vm_map_entry_t
15592 vm_map_entry_insert(
15593 vm_map_t map,
15594 vm_map_entry_t insp_entry,
15595 vm_map_offset_t start,
15596 vm_map_offset_t end,
15597 vm_object_t object,
15598 vm_object_offset_t offset,
15599 boolean_t needs_copy,
15600 boolean_t is_shared,
15601 boolean_t in_transition,
15602 vm_prot_t cur_protection,
15603 vm_prot_t max_protection,
15604 vm_behavior_t behavior,
15605 vm_inherit_t inheritance,
15606 unsigned wired_count,
15607 boolean_t no_cache,
15608 boolean_t permanent,
15609 boolean_t no_copy_on_read,
15610 unsigned int superpage_size,
15611 boolean_t clear_map_aligned,
15612 boolean_t is_submap,
15613 boolean_t used_for_jit,
15614 int alias)
15615 {
15616 vm_map_entry_t new_entry;
15617
15618 assert(insp_entry != (vm_map_entry_t)0);
15619 vm_map_lock_assert_exclusive(map);
15620
15621 #if DEVELOPMENT || DEBUG
15622 vm_object_offset_t end_offset = 0;
15623 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15624 #endif /* DEVELOPMENT || DEBUG */
15625
15626 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15627
15628 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15629 new_entry->map_aligned = TRUE;
15630 } else {
15631 new_entry->map_aligned = FALSE;
15632 }
15633 if (clear_map_aligned &&
15634 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15635 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15636 new_entry->map_aligned = FALSE;
15637 }
15638
15639 new_entry->vme_start = start;
15640 new_entry->vme_end = end;
15641 assert(page_aligned(new_entry->vme_start));
15642 assert(page_aligned(new_entry->vme_end));
15643 if (new_entry->map_aligned) {
15644 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15645 VM_MAP_PAGE_MASK(map)));
15646 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15647 VM_MAP_PAGE_MASK(map)));
15648 }
15649 assert(new_entry->vme_start < new_entry->vme_end);
15650
15651 VME_OBJECT_SET(new_entry, object);
15652 VME_OFFSET_SET(new_entry, offset);
15653 new_entry->is_shared = is_shared;
15654 new_entry->is_sub_map = is_submap;
15655 new_entry->needs_copy = needs_copy;
15656 new_entry->in_transition = in_transition;
15657 new_entry->needs_wakeup = FALSE;
15658 new_entry->inheritance = inheritance;
15659 new_entry->protection = cur_protection;
15660 new_entry->max_protection = max_protection;
15661 new_entry->behavior = behavior;
15662 new_entry->wired_count = wired_count;
15663 new_entry->user_wired_count = 0;
15664 if (is_submap) {
15665 /*
15666 * submap: "use_pmap" means "nested".
15667 * default: false.
15668 */
15669 new_entry->use_pmap = FALSE;
15670 } else {
15671 /*
15672 * object: "use_pmap" means "use pmap accounting" for footprint.
15673 * default: true.
15674 */
15675 new_entry->use_pmap = TRUE;
15676 }
15677 VME_ALIAS_SET(new_entry, alias);
15678 new_entry->zero_wired_pages = FALSE;
15679 new_entry->no_cache = no_cache;
15680 new_entry->permanent = permanent;
15681 if (superpage_size) {
15682 new_entry->superpage_size = TRUE;
15683 } else {
15684 new_entry->superpage_size = FALSE;
15685 }
15686 if (used_for_jit) {
15687 #if CONFIG_EMBEDDED
15688 if (!(map->jit_entry_exists))
15689 #endif /* CONFIG_EMBEDDED */
15690 {
15691 new_entry->used_for_jit = TRUE;
15692 map->jit_entry_exists = TRUE;
15693 }
15694 } else {
15695 new_entry->used_for_jit = FALSE;
15696 }
15697 new_entry->pmap_cs_associated = FALSE;
15698 new_entry->iokit_acct = FALSE;
15699 new_entry->vme_resilient_codesign = FALSE;
15700 new_entry->vme_resilient_media = FALSE;
15701 new_entry->vme_atomic = FALSE;
15702 new_entry->vme_no_copy_on_read = no_copy_on_read;
15703
15704 /*
15705 * Insert the new entry into the list.
15706 */
15707
15708 vm_map_store_entry_link(map, insp_entry, new_entry,
15709 VM_MAP_KERNEL_FLAGS_NONE);
15710 map->size += end - start;
15711
15712 /*
15713 * Update the free space hint and the lookup hint.
15714 */
15715
15716 SAVE_HINT_MAP_WRITE(map, new_entry);
15717 return new_entry;
15718 }
15719
15720 /*
15721 * Routine: vm_map_remap_extract
15722 *
15723 * Descritpion: This routine returns a vm_entry list from a map.
15724 */
15725 static kern_return_t
15726 vm_map_remap_extract(
15727 vm_map_t map,
15728 vm_map_offset_t addr,
15729 vm_map_size_t size,
15730 boolean_t copy,
15731 struct vm_map_header *map_header,
15732 vm_prot_t *cur_protection,
15733 vm_prot_t *max_protection,
15734 /* What, no behavior? */
15735 vm_inherit_t inheritance,
15736 boolean_t pageable,
15737 boolean_t same_map,
15738 vm_map_kernel_flags_t vmk_flags)
15739 {
15740 kern_return_t result;
15741 vm_map_size_t mapped_size;
15742 vm_map_size_t tmp_size;
15743 vm_map_entry_t src_entry; /* result of last map lookup */
15744 vm_map_entry_t new_entry;
15745 vm_object_offset_t offset;
15746 vm_map_offset_t map_address;
15747 vm_map_offset_t src_start; /* start of entry to map */
15748 vm_map_offset_t src_end; /* end of region to be mapped */
15749 vm_object_t object;
15750 vm_map_version_t version;
15751 boolean_t src_needs_copy;
15752 boolean_t new_entry_needs_copy;
15753 vm_map_entry_t saved_src_entry;
15754 boolean_t src_entry_was_wired;
15755 vm_prot_t max_prot_for_prot_copy;
15756
15757 assert(map != VM_MAP_NULL);
15758 assert(size != 0);
15759 assert(size == vm_map_round_page(size, PAGE_MASK));
15760 assert(inheritance == VM_INHERIT_NONE ||
15761 inheritance == VM_INHERIT_COPY ||
15762 inheritance == VM_INHERIT_SHARE);
15763
15764 /*
15765 * Compute start and end of region.
15766 */
15767 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15768 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15769
15770
15771 /*
15772 * Initialize map_header.
15773 */
15774 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15775 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15776 map_header->nentries = 0;
15777 map_header->entries_pageable = pageable;
15778 map_header->page_shift = PAGE_SHIFT;
15779
15780 vm_map_store_init( map_header );
15781
15782 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15783 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15784 } else {
15785 max_prot_for_prot_copy = VM_PROT_NONE;
15786 }
15787 *cur_protection = VM_PROT_ALL;
15788 *max_protection = VM_PROT_ALL;
15789
15790 map_address = 0;
15791 mapped_size = 0;
15792 result = KERN_SUCCESS;
15793
15794 /*
15795 * The specified source virtual space might correspond to
15796 * multiple map entries, need to loop on them.
15797 */
15798 vm_map_lock(map);
15799 while (mapped_size != size) {
15800 vm_map_size_t entry_size;
15801
15802 /*
15803 * Find the beginning of the region.
15804 */
15805 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15806 result = KERN_INVALID_ADDRESS;
15807 break;
15808 }
15809
15810 if (src_start < src_entry->vme_start ||
15811 (mapped_size && src_start != src_entry->vme_start)) {
15812 result = KERN_INVALID_ADDRESS;
15813 break;
15814 }
15815
15816 tmp_size = size - mapped_size;
15817 if (src_end > src_entry->vme_end) {
15818 tmp_size -= (src_end - src_entry->vme_end);
15819 }
15820
15821 entry_size = (vm_map_size_t)(src_entry->vme_end -
15822 src_entry->vme_start);
15823
15824 if (src_entry->is_sub_map) {
15825 vm_map_reference(VME_SUBMAP(src_entry));
15826 object = VM_OBJECT_NULL;
15827 } else {
15828 object = VME_OBJECT(src_entry);
15829 if (src_entry->iokit_acct) {
15830 /*
15831 * This entry uses "IOKit accounting".
15832 */
15833 } else if (object != VM_OBJECT_NULL &&
15834 (object->purgable != VM_PURGABLE_DENY ||
15835 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15836 /*
15837 * Purgeable objects have their own accounting:
15838 * no pmap accounting for them.
15839 */
15840 assertf(!src_entry->use_pmap,
15841 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15842 map,
15843 src_entry,
15844 (uint64_t)src_entry->vme_start,
15845 (uint64_t)src_entry->vme_end,
15846 src_entry->protection,
15847 src_entry->max_protection,
15848 VME_ALIAS(src_entry));
15849 } else {
15850 /*
15851 * Not IOKit or purgeable:
15852 * must be accounted by pmap stats.
15853 */
15854 assertf(src_entry->use_pmap,
15855 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15856 map,
15857 src_entry,
15858 (uint64_t)src_entry->vme_start,
15859 (uint64_t)src_entry->vme_end,
15860 src_entry->protection,
15861 src_entry->max_protection,
15862 VME_ALIAS(src_entry));
15863 }
15864
15865 if (object == VM_OBJECT_NULL) {
15866 object = vm_object_allocate(entry_size);
15867 VME_OFFSET_SET(src_entry, 0);
15868 VME_OBJECT_SET(src_entry, object);
15869 assert(src_entry->use_pmap);
15870 } else if (object->copy_strategy !=
15871 MEMORY_OBJECT_COPY_SYMMETRIC) {
15872 /*
15873 * We are already using an asymmetric
15874 * copy, and therefore we already have
15875 * the right object.
15876 */
15877 assert(!src_entry->needs_copy);
15878 } else if (src_entry->needs_copy || object->shadowed ||
15879 (object->internal && !object->true_share &&
15880 !src_entry->is_shared &&
15881 object->vo_size > entry_size)) {
15882 VME_OBJECT_SHADOW(src_entry, entry_size);
15883 assert(src_entry->use_pmap);
15884
15885 if (!src_entry->needs_copy &&
15886 (src_entry->protection & VM_PROT_WRITE)) {
15887 vm_prot_t prot;
15888
15889 assert(!pmap_has_prot_policy(src_entry->protection));
15890
15891 prot = src_entry->protection & ~VM_PROT_WRITE;
15892
15893 if (override_nx(map,
15894 VME_ALIAS(src_entry))
15895 && prot) {
15896 prot |= VM_PROT_EXECUTE;
15897 }
15898
15899 assert(!pmap_has_prot_policy(prot));
15900
15901 if (map->mapped_in_other_pmaps) {
15902 vm_object_pmap_protect(
15903 VME_OBJECT(src_entry),
15904 VME_OFFSET(src_entry),
15905 entry_size,
15906 PMAP_NULL,
15907 src_entry->vme_start,
15908 prot);
15909 } else {
15910 pmap_protect(vm_map_pmap(map),
15911 src_entry->vme_start,
15912 src_entry->vme_end,
15913 prot);
15914 }
15915 }
15916
15917 object = VME_OBJECT(src_entry);
15918 src_entry->needs_copy = FALSE;
15919 }
15920
15921
15922 vm_object_lock(object);
15923 vm_object_reference_locked(object); /* object ref. for new entry */
15924 if (object->copy_strategy ==
15925 MEMORY_OBJECT_COPY_SYMMETRIC) {
15926 object->copy_strategy =
15927 MEMORY_OBJECT_COPY_DELAY;
15928 }
15929 vm_object_unlock(object);
15930 }
15931
15932 offset = (VME_OFFSET(src_entry) +
15933 (src_start - src_entry->vme_start));
15934
15935 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15936 vm_map_entry_copy(new_entry, src_entry);
15937 if (new_entry->is_sub_map) {
15938 /* clr address space specifics */
15939 new_entry->use_pmap = FALSE;
15940 } else if (copy) {
15941 /*
15942 * We're dealing with a copy-on-write operation,
15943 * so the resulting mapping should not inherit the
15944 * original mapping's accounting settings.
15945 * "use_pmap" should be reset to its default (TRUE)
15946 * so that the new mapping gets accounted for in
15947 * the task's memory footprint.
15948 */
15949 new_entry->use_pmap = TRUE;
15950 }
15951 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15952 assert(!new_entry->iokit_acct);
15953
15954 new_entry->map_aligned = FALSE;
15955
15956 new_entry->vme_start = map_address;
15957 new_entry->vme_end = map_address + tmp_size;
15958 assert(new_entry->vme_start < new_entry->vme_end);
15959 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15960 /*
15961 * Remapping for vm_map_protect(VM_PROT_COPY)
15962 * to convert a read-only mapping into a
15963 * copy-on-write version of itself but
15964 * with write access:
15965 * keep the original inheritance and add
15966 * VM_PROT_WRITE to the max protection.
15967 */
15968 new_entry->inheritance = src_entry->inheritance;
15969 new_entry->protection &= max_prot_for_prot_copy;
15970 new_entry->max_protection |= VM_PROT_WRITE;
15971 } else {
15972 new_entry->inheritance = inheritance;
15973 }
15974 VME_OFFSET_SET(new_entry, offset);
15975
15976 /*
15977 * The new region has to be copied now if required.
15978 */
15979 RestartCopy:
15980 if (!copy) {
15981 if (src_entry->used_for_jit == TRUE) {
15982 if (same_map) {
15983 #if __APRR_SUPPORTED__
15984 /*
15985 * Disallow re-mapping of any JIT regions on APRR devices.
15986 */
15987 result = KERN_PROTECTION_FAILURE;
15988 break;
15989 #endif /* __APRR_SUPPORTED__*/
15990 } else {
15991 #if CONFIG_EMBEDDED
15992 /*
15993 * Cannot allow an entry describing a JIT
15994 * region to be shared across address spaces.
15995 */
15996 result = KERN_INVALID_ARGUMENT;
15997 break;
15998 #endif /* CONFIG_EMBEDDED */
15999 }
16000 }
16001
16002 src_entry->is_shared = TRUE;
16003 new_entry->is_shared = TRUE;
16004 if (!(new_entry->is_sub_map)) {
16005 new_entry->needs_copy = FALSE;
16006 }
16007 } else if (src_entry->is_sub_map) {
16008 /* make this a COW sub_map if not already */
16009 assert(new_entry->wired_count == 0);
16010 new_entry->needs_copy = TRUE;
16011 object = VM_OBJECT_NULL;
16012 } else if (src_entry->wired_count == 0 &&
16013 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16014 VME_OFFSET(new_entry),
16015 (new_entry->vme_end -
16016 new_entry->vme_start),
16017 &src_needs_copy,
16018 &new_entry_needs_copy)) {
16019 new_entry->needs_copy = new_entry_needs_copy;
16020 new_entry->is_shared = FALSE;
16021 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16022
16023 /*
16024 * Handle copy_on_write semantics.
16025 */
16026 if (src_needs_copy && !src_entry->needs_copy) {
16027 vm_prot_t prot;
16028
16029 assert(!pmap_has_prot_policy(src_entry->protection));
16030
16031 prot = src_entry->protection & ~VM_PROT_WRITE;
16032
16033 if (override_nx(map,
16034 VME_ALIAS(src_entry))
16035 && prot) {
16036 prot |= VM_PROT_EXECUTE;
16037 }
16038
16039 assert(!pmap_has_prot_policy(prot));
16040
16041 vm_object_pmap_protect(object,
16042 offset,
16043 entry_size,
16044 ((src_entry->is_shared
16045 || map->mapped_in_other_pmaps) ?
16046 PMAP_NULL : map->pmap),
16047 src_entry->vme_start,
16048 prot);
16049
16050 assert(src_entry->wired_count == 0);
16051 src_entry->needs_copy = TRUE;
16052 }
16053 /*
16054 * Throw away the old object reference of the new entry.
16055 */
16056 vm_object_deallocate(object);
16057 } else {
16058 new_entry->is_shared = FALSE;
16059 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16060
16061 src_entry_was_wired = (src_entry->wired_count > 0);
16062 saved_src_entry = src_entry;
16063 src_entry = VM_MAP_ENTRY_NULL;
16064
16065 /*
16066 * The map can be safely unlocked since we
16067 * already hold a reference on the object.
16068 *
16069 * Record the timestamp of the map for later
16070 * verification, and unlock the map.
16071 */
16072 version.main_timestamp = map->timestamp;
16073 vm_map_unlock(map); /* Increments timestamp once! */
16074
16075 /*
16076 * Perform the copy.
16077 */
16078 if (src_entry_was_wired > 0) {
16079 vm_object_lock(object);
16080 result = vm_object_copy_slowly(
16081 object,
16082 offset,
16083 (new_entry->vme_end -
16084 new_entry->vme_start),
16085 THREAD_UNINT,
16086 VME_OBJECT_PTR(new_entry));
16087
16088 VME_OFFSET_SET(new_entry, 0);
16089 new_entry->needs_copy = FALSE;
16090 } else {
16091 vm_object_offset_t new_offset;
16092
16093 new_offset = VME_OFFSET(new_entry);
16094 result = vm_object_copy_strategically(
16095 object,
16096 offset,
16097 (new_entry->vme_end -
16098 new_entry->vme_start),
16099 VME_OBJECT_PTR(new_entry),
16100 &new_offset,
16101 &new_entry_needs_copy);
16102 if (new_offset != VME_OFFSET(new_entry)) {
16103 VME_OFFSET_SET(new_entry, new_offset);
16104 }
16105
16106 new_entry->needs_copy = new_entry_needs_copy;
16107 }
16108
16109 /*
16110 * Throw away the old object reference of the new entry.
16111 */
16112 vm_object_deallocate(object);
16113
16114 if (result != KERN_SUCCESS &&
16115 result != KERN_MEMORY_RESTART_COPY) {
16116 _vm_map_entry_dispose(map_header, new_entry);
16117 vm_map_lock(map);
16118 break;
16119 }
16120
16121 /*
16122 * Verify that the map has not substantially
16123 * changed while the copy was being made.
16124 */
16125
16126 vm_map_lock(map);
16127 if (version.main_timestamp + 1 != map->timestamp) {
16128 /*
16129 * Simple version comparison failed.
16130 *
16131 * Retry the lookup and verify that the
16132 * same object/offset are still present.
16133 */
16134 saved_src_entry = VM_MAP_ENTRY_NULL;
16135 vm_object_deallocate(VME_OBJECT(new_entry));
16136 _vm_map_entry_dispose(map_header, new_entry);
16137 if (result == KERN_MEMORY_RESTART_COPY) {
16138 result = KERN_SUCCESS;
16139 }
16140 continue;
16141 }
16142 /* map hasn't changed: src_entry is still valid */
16143 src_entry = saved_src_entry;
16144 saved_src_entry = VM_MAP_ENTRY_NULL;
16145
16146 if (result == KERN_MEMORY_RESTART_COPY) {
16147 vm_object_reference(object);
16148 goto RestartCopy;
16149 }
16150 }
16151
16152 _vm_map_store_entry_link(map_header,
16153 map_header->links.prev, new_entry);
16154
16155 /*Protections for submap mapping are irrelevant here*/
16156 if (!src_entry->is_sub_map) {
16157 *cur_protection &= src_entry->protection;
16158 *max_protection &= src_entry->max_protection;
16159 }
16160 map_address += tmp_size;
16161 mapped_size += tmp_size;
16162 src_start += tmp_size;
16163 } /* end while */
16164
16165 vm_map_unlock(map);
16166 if (result != KERN_SUCCESS) {
16167 /*
16168 * Free all allocated elements.
16169 */
16170 for (src_entry = map_header->links.next;
16171 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16172 src_entry = new_entry) {
16173 new_entry = src_entry->vme_next;
16174 _vm_map_store_entry_unlink(map_header, src_entry);
16175 if (src_entry->is_sub_map) {
16176 vm_map_deallocate(VME_SUBMAP(src_entry));
16177 } else {
16178 vm_object_deallocate(VME_OBJECT(src_entry));
16179 }
16180 _vm_map_entry_dispose(map_header, src_entry);
16181 }
16182 }
16183 return result;
16184 }
16185
16186 /*
16187 * Routine: vm_remap
16188 *
16189 * Map portion of a task's address space.
16190 * Mapped region must not overlap more than
16191 * one vm memory object. Protections and
16192 * inheritance attributes remain the same
16193 * as in the original task and are out parameters.
16194 * Source and Target task can be identical
16195 * Other attributes are identical as for vm_map()
16196 */
16197 kern_return_t
16198 vm_map_remap(
16199 vm_map_t target_map,
16200 vm_map_address_t *address,
16201 vm_map_size_t size,
16202 vm_map_offset_t mask,
16203 int flags,
16204 vm_map_kernel_flags_t vmk_flags,
16205 vm_tag_t tag,
16206 vm_map_t src_map,
16207 vm_map_offset_t memory_address,
16208 boolean_t copy,
16209 vm_prot_t *cur_protection,
16210 vm_prot_t *max_protection,
16211 vm_inherit_t inheritance)
16212 {
16213 kern_return_t result;
16214 vm_map_entry_t entry;
16215 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16216 vm_map_entry_t new_entry;
16217 struct vm_map_header map_header;
16218 vm_map_offset_t offset_in_mapping;
16219
16220 if (target_map == VM_MAP_NULL) {
16221 return KERN_INVALID_ARGUMENT;
16222 }
16223
16224 switch (inheritance) {
16225 case VM_INHERIT_NONE:
16226 case VM_INHERIT_COPY:
16227 case VM_INHERIT_SHARE:
16228 if (size != 0 && src_map != VM_MAP_NULL) {
16229 break;
16230 }
16231 /*FALL THRU*/
16232 default:
16233 return KERN_INVALID_ARGUMENT;
16234 }
16235
16236 /*
16237 * If the user is requesting that we return the address of the
16238 * first byte of the data (rather than the base of the page),
16239 * then we use different rounding semantics: specifically,
16240 * we assume that (memory_address, size) describes a region
16241 * all of whose pages we must cover, rather than a base to be truncated
16242 * down and a size to be added to that base. So we figure out
16243 * the highest page that the requested region includes and make
16244 * sure that the size will cover it.
16245 *
16246 * The key example we're worried about it is of the form:
16247 *
16248 * memory_address = 0x1ff0, size = 0x20
16249 *
16250 * With the old semantics, we round down the memory_address to 0x1000
16251 * and round up the size to 0x1000, resulting in our covering *only*
16252 * page 0x1000. With the new semantics, we'd realize that the region covers
16253 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16254 * 0x1000 and page 0x2000 in the region we remap.
16255 */
16256 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16257 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16258 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16259 } else {
16260 size = vm_map_round_page(size, PAGE_MASK);
16261 }
16262 if (size == 0) {
16263 return KERN_INVALID_ARGUMENT;
16264 }
16265
16266 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16267 /* must be copy-on-write to be "media resilient" */
16268 if (!copy) {
16269 return KERN_INVALID_ARGUMENT;
16270 }
16271 }
16272
16273 result = vm_map_remap_extract(src_map, memory_address,
16274 size, copy, &map_header,
16275 cur_protection,
16276 max_protection,
16277 inheritance,
16278 target_map->hdr.entries_pageable,
16279 src_map == target_map,
16280 vmk_flags);
16281
16282 if (result != KERN_SUCCESS) {
16283 return result;
16284 }
16285
16286 /*
16287 * Allocate/check a range of free virtual address
16288 * space for the target
16289 */
16290 *address = vm_map_trunc_page(*address,
16291 VM_MAP_PAGE_MASK(target_map));
16292 vm_map_lock(target_map);
16293 result = vm_map_remap_range_allocate(target_map, address, size,
16294 mask, flags, vmk_flags, tag,
16295 &insp_entry);
16296
16297 for (entry = map_header.links.next;
16298 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16299 entry = new_entry) {
16300 new_entry = entry->vme_next;
16301 _vm_map_store_entry_unlink(&map_header, entry);
16302 if (result == KERN_SUCCESS) {
16303 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16304 /* no codesigning -> read-only access */
16305 entry->max_protection = VM_PROT_READ;
16306 entry->protection = VM_PROT_READ;
16307 entry->vme_resilient_codesign = TRUE;
16308 }
16309 entry->vme_start += *address;
16310 entry->vme_end += *address;
16311 assert(!entry->map_aligned);
16312 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16313 !entry->is_sub_map &&
16314 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16315 VME_OBJECT(entry)->internal)) {
16316 entry->vme_resilient_media = TRUE;
16317 }
16318 vm_map_store_entry_link(target_map, insp_entry, entry,
16319 vmk_flags);
16320 insp_entry = entry;
16321 } else {
16322 if (!entry->is_sub_map) {
16323 vm_object_deallocate(VME_OBJECT(entry));
16324 } else {
16325 vm_map_deallocate(VME_SUBMAP(entry));
16326 }
16327 _vm_map_entry_dispose(&map_header, entry);
16328 }
16329 }
16330
16331 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16332 *cur_protection = VM_PROT_READ;
16333 *max_protection = VM_PROT_READ;
16334 }
16335
16336 if (target_map->disable_vmentry_reuse == TRUE) {
16337 assert(!target_map->is_nested_map);
16338 if (target_map->highest_entry_end < insp_entry->vme_end) {
16339 target_map->highest_entry_end = insp_entry->vme_end;
16340 }
16341 }
16342
16343 if (result == KERN_SUCCESS) {
16344 target_map->size += size;
16345 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16346
16347 #if PMAP_CS
16348 if (*max_protection & VM_PROT_EXECUTE) {
16349 vm_map_address_t region_start = 0, region_size = 0;
16350 struct pmap_cs_code_directory *region_cd = NULL;
16351 vm_map_address_t base = 0;
16352 struct pmap_cs_lookup_results results = {};
16353 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16354 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16355
16356 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16357 region_size = results.region_size;
16358 region_start = results.region_start;
16359 region_cd = results.region_cd_entry;
16360 base = results.base;
16361
16362 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16363 *cur_protection = VM_PROT_READ;
16364 *max_protection = VM_PROT_READ;
16365 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16366 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16367 page_addr, page_addr + assoc_size, *address,
16368 region_start, region_size,
16369 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16370 );
16371 }
16372 }
16373 #endif
16374 }
16375 vm_map_unlock(target_map);
16376
16377 if (result == KERN_SUCCESS && target_map->wiring_required) {
16378 result = vm_map_wire_kernel(target_map, *address,
16379 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16380 TRUE);
16381 }
16382
16383 /*
16384 * If requested, return the address of the data pointed to by the
16385 * request, rather than the base of the resulting page.
16386 */
16387 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16388 *address += offset_in_mapping;
16389 }
16390
16391 return result;
16392 }
16393
16394 /*
16395 * Routine: vm_map_remap_range_allocate
16396 *
16397 * Description:
16398 * Allocate a range in the specified virtual address map.
16399 * returns the address and the map entry just before the allocated
16400 * range
16401 *
16402 * Map must be locked.
16403 */
16404
16405 static kern_return_t
16406 vm_map_remap_range_allocate(
16407 vm_map_t map,
16408 vm_map_address_t *address, /* IN/OUT */
16409 vm_map_size_t size,
16410 vm_map_offset_t mask,
16411 int flags,
16412 vm_map_kernel_flags_t vmk_flags,
16413 __unused vm_tag_t tag,
16414 vm_map_entry_t *map_entry) /* OUT */
16415 {
16416 vm_map_entry_t entry;
16417 vm_map_offset_t start;
16418 vm_map_offset_t end;
16419 vm_map_offset_t desired_empty_end;
16420 kern_return_t kr;
16421 vm_map_entry_t hole_entry;
16422
16423 StartAgain:;
16424
16425 start = *address;
16426
16427 if (flags & VM_FLAGS_ANYWHERE) {
16428 if (flags & VM_FLAGS_RANDOM_ADDR) {
16429 /*
16430 * Get a random start address.
16431 */
16432 kr = vm_map_random_address_for_size(map, address, size);
16433 if (kr != KERN_SUCCESS) {
16434 return kr;
16435 }
16436 start = *address;
16437 }
16438
16439 /*
16440 * Calculate the first possible address.
16441 */
16442
16443 if (start < map->min_offset) {
16444 start = map->min_offset;
16445 }
16446 if (start > map->max_offset) {
16447 return KERN_NO_SPACE;
16448 }
16449
16450 /*
16451 * Look for the first possible address;
16452 * if there's already something at this
16453 * address, we have to start after it.
16454 */
16455
16456 if (map->disable_vmentry_reuse == TRUE) {
16457 VM_MAP_HIGHEST_ENTRY(map, entry, start);
16458 } else {
16459 if (map->holelistenabled) {
16460 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16461
16462 if (hole_entry == NULL) {
16463 /*
16464 * No more space in the map?
16465 */
16466 return KERN_NO_SPACE;
16467 } else {
16468 boolean_t found_hole = FALSE;
16469
16470 do {
16471 if (hole_entry->vme_start >= start) {
16472 start = hole_entry->vme_start;
16473 found_hole = TRUE;
16474 break;
16475 }
16476
16477 if (hole_entry->vme_end > start) {
16478 found_hole = TRUE;
16479 break;
16480 }
16481 hole_entry = hole_entry->vme_next;
16482 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16483
16484 if (found_hole == FALSE) {
16485 return KERN_NO_SPACE;
16486 }
16487
16488 entry = hole_entry;
16489 }
16490 } else {
16491 assert(first_free_is_valid(map));
16492 if (start == map->min_offset) {
16493 if ((entry = map->first_free) != vm_map_to_entry(map)) {
16494 start = entry->vme_end;
16495 }
16496 } else {
16497 vm_map_entry_t tmp_entry;
16498 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16499 start = tmp_entry->vme_end;
16500 }
16501 entry = tmp_entry;
16502 }
16503 }
16504 start = vm_map_round_page(start,
16505 VM_MAP_PAGE_MASK(map));
16506 }
16507
16508 /*
16509 * In any case, the "entry" always precedes
16510 * the proposed new region throughout the
16511 * loop:
16512 */
16513
16514 while (TRUE) {
16515 vm_map_entry_t next;
16516
16517 /*
16518 * Find the end of the proposed new region.
16519 * Be sure we didn't go beyond the end, or
16520 * wrap around the address.
16521 */
16522
16523 end = ((start + mask) & ~mask);
16524 end = vm_map_round_page(end,
16525 VM_MAP_PAGE_MASK(map));
16526 if (end < start) {
16527 return KERN_NO_SPACE;
16528 }
16529 start = end;
16530 end += size;
16531
16532 /* We want an entire page of empty space, but don't increase the allocation size. */
16533 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16534
16535 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16536 if (map->wait_for_space) {
16537 if (size <= (map->max_offset -
16538 map->min_offset)) {
16539 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16540 vm_map_unlock(map);
16541 thread_block(THREAD_CONTINUE_NULL);
16542 vm_map_lock(map);
16543 goto StartAgain;
16544 }
16545 }
16546
16547 return KERN_NO_SPACE;
16548 }
16549
16550 next = entry->vme_next;
16551
16552 if (map->holelistenabled) {
16553 if (entry->vme_end >= desired_empty_end) {
16554 break;
16555 }
16556 } else {
16557 /*
16558 * If there are no more entries, we must win.
16559 *
16560 * OR
16561 *
16562 * If there is another entry, it must be
16563 * after the end of the potential new region.
16564 */
16565
16566 if (next == vm_map_to_entry(map)) {
16567 break;
16568 }
16569
16570 if (next->vme_start >= desired_empty_end) {
16571 break;
16572 }
16573 }
16574
16575 /*
16576 * Didn't fit -- move to the next entry.
16577 */
16578
16579 entry = next;
16580
16581 if (map->holelistenabled) {
16582 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16583 /*
16584 * Wrapped around
16585 */
16586 return KERN_NO_SPACE;
16587 }
16588 start = entry->vme_start;
16589 } else {
16590 start = entry->vme_end;
16591 }
16592 }
16593
16594 if (map->holelistenabled) {
16595 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16596 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16597 }
16598 }
16599
16600 *address = start;
16601 } else {
16602 vm_map_entry_t temp_entry;
16603
16604 /*
16605 * Verify that:
16606 * the address doesn't itself violate
16607 * the mask requirement.
16608 */
16609
16610 if ((start & mask) != 0) {
16611 return KERN_NO_SPACE;
16612 }
16613
16614
16615 /*
16616 * ... the address is within bounds
16617 */
16618
16619 end = start + size;
16620
16621 if ((start < map->min_offset) ||
16622 (end > map->max_offset) ||
16623 (start >= end)) {
16624 return KERN_INVALID_ADDRESS;
16625 }
16626
16627 /*
16628 * If we're asked to overwrite whatever was mapped in that
16629 * range, first deallocate that range.
16630 */
16631 if (flags & VM_FLAGS_OVERWRITE) {
16632 vm_map_t zap_map;
16633 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16634
16635 /*
16636 * We use a "zap_map" to avoid having to unlock
16637 * the "map" in vm_map_delete(), which would compromise
16638 * the atomicity of the "deallocate" and then "remap"
16639 * combination.
16640 */
16641 zap_map = vm_map_create(PMAP_NULL,
16642 start,
16643 end,
16644 map->hdr.entries_pageable);
16645 if (zap_map == VM_MAP_NULL) {
16646 return KERN_RESOURCE_SHORTAGE;
16647 }
16648 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16649 vm_map_disable_hole_optimization(zap_map);
16650
16651 if (vmk_flags.vmkf_overwrite_immutable) {
16652 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16653 }
16654 kr = vm_map_delete(map, start, end,
16655 remove_flags,
16656 zap_map);
16657 if (kr == KERN_SUCCESS) {
16658 vm_map_destroy(zap_map,
16659 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16660 zap_map = VM_MAP_NULL;
16661 }
16662 }
16663
16664 /*
16665 * ... the starting address isn't allocated
16666 */
16667
16668 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16669 return KERN_NO_SPACE;
16670 }
16671
16672 entry = temp_entry;
16673
16674 /*
16675 * ... the next region doesn't overlap the
16676 * end point.
16677 */
16678
16679 if ((entry->vme_next != vm_map_to_entry(map)) &&
16680 (entry->vme_next->vme_start < end)) {
16681 return KERN_NO_SPACE;
16682 }
16683 }
16684 *map_entry = entry;
16685 return KERN_SUCCESS;
16686 }
16687
16688 /*
16689 * vm_map_switch:
16690 *
16691 * Set the address map for the current thread to the specified map
16692 */
16693
16694 vm_map_t
16695 vm_map_switch(
16696 vm_map_t map)
16697 {
16698 int mycpu;
16699 thread_t thread = current_thread();
16700 vm_map_t oldmap = thread->map;
16701
16702 mp_disable_preemption();
16703 mycpu = cpu_number();
16704
16705 /*
16706 * Deactivate the current map and activate the requested map
16707 */
16708 PMAP_SWITCH_USER(thread, map, mycpu);
16709
16710 mp_enable_preemption();
16711 return oldmap;
16712 }
16713
16714
16715 /*
16716 * Routine: vm_map_write_user
16717 *
16718 * Description:
16719 * Copy out data from a kernel space into space in the
16720 * destination map. The space must already exist in the
16721 * destination map.
16722 * NOTE: This routine should only be called by threads
16723 * which can block on a page fault. i.e. kernel mode user
16724 * threads.
16725 *
16726 */
16727 kern_return_t
16728 vm_map_write_user(
16729 vm_map_t map,
16730 void *src_p,
16731 vm_map_address_t dst_addr,
16732 vm_size_t size)
16733 {
16734 kern_return_t kr = KERN_SUCCESS;
16735
16736 if (current_map() == map) {
16737 if (copyout(src_p, dst_addr, size)) {
16738 kr = KERN_INVALID_ADDRESS;
16739 }
16740 } else {
16741 vm_map_t oldmap;
16742
16743 /* take on the identity of the target map while doing */
16744 /* the transfer */
16745
16746 vm_map_reference(map);
16747 oldmap = vm_map_switch(map);
16748 if (copyout(src_p, dst_addr, size)) {
16749 kr = KERN_INVALID_ADDRESS;
16750 }
16751 vm_map_switch(oldmap);
16752 vm_map_deallocate(map);
16753 }
16754 return kr;
16755 }
16756
16757 /*
16758 * Routine: vm_map_read_user
16759 *
16760 * Description:
16761 * Copy in data from a user space source map into the
16762 * kernel map. The space must already exist in the
16763 * kernel map.
16764 * NOTE: This routine should only be called by threads
16765 * which can block on a page fault. i.e. kernel mode user
16766 * threads.
16767 *
16768 */
16769 kern_return_t
16770 vm_map_read_user(
16771 vm_map_t map,
16772 vm_map_address_t src_addr,
16773 void *dst_p,
16774 vm_size_t size)
16775 {
16776 kern_return_t kr = KERN_SUCCESS;
16777
16778 if (current_map() == map) {
16779 if (copyin(src_addr, dst_p, size)) {
16780 kr = KERN_INVALID_ADDRESS;
16781 }
16782 } else {
16783 vm_map_t oldmap;
16784
16785 /* take on the identity of the target map while doing */
16786 /* the transfer */
16787
16788 vm_map_reference(map);
16789 oldmap = vm_map_switch(map);
16790 if (copyin(src_addr, dst_p, size)) {
16791 kr = KERN_INVALID_ADDRESS;
16792 }
16793 vm_map_switch(oldmap);
16794 vm_map_deallocate(map);
16795 }
16796 return kr;
16797 }
16798
16799
16800 /*
16801 * vm_map_check_protection:
16802 *
16803 * Assert that the target map allows the specified
16804 * privilege on the entire address region given.
16805 * The entire region must be allocated.
16806 */
16807 boolean_t
16808 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16809 vm_map_offset_t end, vm_prot_t protection)
16810 {
16811 vm_map_entry_t entry;
16812 vm_map_entry_t tmp_entry;
16813
16814 vm_map_lock(map);
16815
16816 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16817 vm_map_unlock(map);
16818 return FALSE;
16819 }
16820
16821 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16822 vm_map_unlock(map);
16823 return FALSE;
16824 }
16825
16826 entry = tmp_entry;
16827
16828 while (start < end) {
16829 if (entry == vm_map_to_entry(map)) {
16830 vm_map_unlock(map);
16831 return FALSE;
16832 }
16833
16834 /*
16835 * No holes allowed!
16836 */
16837
16838 if (start < entry->vme_start) {
16839 vm_map_unlock(map);
16840 return FALSE;
16841 }
16842
16843 /*
16844 * Check protection associated with entry.
16845 */
16846
16847 if ((entry->protection & protection) != protection) {
16848 vm_map_unlock(map);
16849 return FALSE;
16850 }
16851
16852 /* go to next entry */
16853
16854 start = entry->vme_end;
16855 entry = entry->vme_next;
16856 }
16857 vm_map_unlock(map);
16858 return TRUE;
16859 }
16860
16861 kern_return_t
16862 vm_map_purgable_control(
16863 vm_map_t map,
16864 vm_map_offset_t address,
16865 vm_purgable_t control,
16866 int *state)
16867 {
16868 vm_map_entry_t entry;
16869 vm_object_t object;
16870 kern_return_t kr;
16871 boolean_t was_nonvolatile;
16872
16873 /*
16874 * Vet all the input parameters and current type and state of the
16875 * underlaying object. Return with an error if anything is amiss.
16876 */
16877 if (map == VM_MAP_NULL) {
16878 return KERN_INVALID_ARGUMENT;
16879 }
16880
16881 if (control != VM_PURGABLE_SET_STATE &&
16882 control != VM_PURGABLE_GET_STATE &&
16883 control != VM_PURGABLE_PURGE_ALL &&
16884 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16885 return KERN_INVALID_ARGUMENT;
16886 }
16887
16888 if (control == VM_PURGABLE_PURGE_ALL) {
16889 vm_purgeable_object_purge_all();
16890 return KERN_SUCCESS;
16891 }
16892
16893 if ((control == VM_PURGABLE_SET_STATE ||
16894 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16895 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16896 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16897 return KERN_INVALID_ARGUMENT;
16898 }
16899
16900 vm_map_lock_read(map);
16901
16902 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16903 /*
16904 * Must pass a valid non-submap address.
16905 */
16906 vm_map_unlock_read(map);
16907 return KERN_INVALID_ADDRESS;
16908 }
16909
16910 if ((entry->protection & VM_PROT_WRITE) == 0) {
16911 /*
16912 * Can't apply purgable controls to something you can't write.
16913 */
16914 vm_map_unlock_read(map);
16915 return KERN_PROTECTION_FAILURE;
16916 }
16917
16918 object = VME_OBJECT(entry);
16919 if (object == VM_OBJECT_NULL ||
16920 object->purgable == VM_PURGABLE_DENY) {
16921 /*
16922 * Object must already be present and be purgeable.
16923 */
16924 vm_map_unlock_read(map);
16925 return KERN_INVALID_ARGUMENT;
16926 }
16927
16928 vm_object_lock(object);
16929
16930 #if 00
16931 if (VME_OFFSET(entry) != 0 ||
16932 entry->vme_end - entry->vme_start != object->vo_size) {
16933 /*
16934 * Can only apply purgable controls to the whole (existing)
16935 * object at once.
16936 */
16937 vm_map_unlock_read(map);
16938 vm_object_unlock(object);
16939 return KERN_INVALID_ARGUMENT;
16940 }
16941 #endif
16942
16943 assert(!entry->is_sub_map);
16944 assert(!entry->use_pmap); /* purgeable has its own accounting */
16945
16946 vm_map_unlock_read(map);
16947
16948 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16949
16950 kr = vm_object_purgable_control(object, control, state);
16951
16952 if (was_nonvolatile &&
16953 object->purgable != VM_PURGABLE_NONVOLATILE &&
16954 map->pmap == kernel_pmap) {
16955 #if DEBUG
16956 object->vo_purgeable_volatilizer = kernel_task;
16957 #endif /* DEBUG */
16958 }
16959
16960 vm_object_unlock(object);
16961
16962 return kr;
16963 }
16964
16965 kern_return_t
16966 vm_map_page_query_internal(
16967 vm_map_t target_map,
16968 vm_map_offset_t offset,
16969 int *disposition,
16970 int *ref_count)
16971 {
16972 kern_return_t kr;
16973 vm_page_info_basic_data_t info;
16974 mach_msg_type_number_t count;
16975
16976 count = VM_PAGE_INFO_BASIC_COUNT;
16977 kr = vm_map_page_info(target_map,
16978 offset,
16979 VM_PAGE_INFO_BASIC,
16980 (vm_page_info_t) &info,
16981 &count);
16982 if (kr == KERN_SUCCESS) {
16983 *disposition = info.disposition;
16984 *ref_count = info.ref_count;
16985 } else {
16986 *disposition = 0;
16987 *ref_count = 0;
16988 }
16989
16990 return kr;
16991 }
16992
16993 kern_return_t
16994 vm_map_page_info(
16995 vm_map_t map,
16996 vm_map_offset_t offset,
16997 vm_page_info_flavor_t flavor,
16998 vm_page_info_t info,
16999 mach_msg_type_number_t *count)
17000 {
17001 return vm_map_page_range_info_internal(map,
17002 offset, /* start of range */
17003 (offset + 1), /* this will get rounded in the call to the page boundary */
17004 flavor,
17005 info,
17006 count);
17007 }
17008
17009 kern_return_t
17010 vm_map_page_range_info_internal(
17011 vm_map_t map,
17012 vm_map_offset_t start_offset,
17013 vm_map_offset_t end_offset,
17014 vm_page_info_flavor_t flavor,
17015 vm_page_info_t info,
17016 mach_msg_type_number_t *count)
17017 {
17018 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
17019 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
17020 vm_page_t m = VM_PAGE_NULL;
17021 kern_return_t retval = KERN_SUCCESS;
17022 int disposition = 0;
17023 int ref_count = 0;
17024 int depth = 0, info_idx = 0;
17025 vm_page_info_basic_t basic_info = 0;
17026 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
17027 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
17028 boolean_t do_region_footprint;
17029 ledger_amount_t ledger_resident, ledger_compressed;
17030
17031 switch (flavor) {
17032 case VM_PAGE_INFO_BASIC:
17033 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
17034 /*
17035 * The "vm_page_info_basic_data" structure was not
17036 * properly padded, so allow the size to be off by
17037 * one to maintain backwards binary compatibility...
17038 */
17039 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17040 return KERN_INVALID_ARGUMENT;
17041 }
17042 }
17043 break;
17044 default:
17045 return KERN_INVALID_ARGUMENT;
17046 }
17047
17048 do_region_footprint = task_self_region_footprint();
17049 disposition = 0;
17050 ref_count = 0;
17051 depth = 0;
17052 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17053 retval = KERN_SUCCESS;
17054
17055 offset_in_page = start_offset & PAGE_MASK;
17056 start = vm_map_trunc_page(start_offset, PAGE_MASK);
17057 end = vm_map_round_page(end_offset, PAGE_MASK);
17058
17059 if (end < start) {
17060 return KERN_INVALID_ARGUMENT;
17061 }
17062
17063 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17064
17065 vm_map_lock_read(map);
17066
17067 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17068
17069 for (curr_s_offset = start; curr_s_offset < end;) {
17070 /*
17071 * New lookup needs reset of these variables.
17072 */
17073 curr_object = object = VM_OBJECT_NULL;
17074 offset_in_object = 0;
17075 ref_count = 0;
17076 depth = 0;
17077
17078 if (do_region_footprint &&
17079 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17080 /*
17081 * Request for "footprint" info about a page beyond
17082 * the end of address space: this must be for
17083 * the fake region vm_map_region_recurse_64()
17084 * reported to account for non-volatile purgeable
17085 * memory owned by this task.
17086 */
17087 disposition = 0;
17088
17089 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17090 (unsigned) ledger_compressed) {
17091 /*
17092 * We haven't reported all the "non-volatile
17093 * compressed" pages yet, so report this fake
17094 * page as "compressed".
17095 */
17096 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17097 } else {
17098 /*
17099 * We've reported all the non-volatile
17100 * compressed page but not all the non-volatile
17101 * pages , so report this fake page as
17102 * "resident dirty".
17103 */
17104 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17105 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17106 disposition |= VM_PAGE_QUERY_PAGE_REF;
17107 }
17108 switch (flavor) {
17109 case VM_PAGE_INFO_BASIC:
17110 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17111 basic_info->disposition = disposition;
17112 basic_info->ref_count = 1;
17113 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17114 basic_info->offset = 0;
17115 basic_info->depth = 0;
17116
17117 info_idx++;
17118 break;
17119 }
17120 curr_s_offset += PAGE_SIZE;
17121 continue;
17122 }
17123
17124 /*
17125 * First, find the map entry covering "curr_s_offset", going down
17126 * submaps if necessary.
17127 */
17128 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17129 /* no entry -> no object -> no page */
17130
17131 if (curr_s_offset < vm_map_min(map)) {
17132 /*
17133 * Illegal address that falls below map min.
17134 */
17135 curr_e_offset = MIN(end, vm_map_min(map));
17136 } else if (curr_s_offset >= vm_map_max(map)) {
17137 /*
17138 * Illegal address that falls on/after map max.
17139 */
17140 curr_e_offset = end;
17141 } else if (map_entry == vm_map_to_entry(map)) {
17142 /*
17143 * Hit a hole.
17144 */
17145 if (map_entry->vme_next == vm_map_to_entry(map)) {
17146 /*
17147 * Empty map.
17148 */
17149 curr_e_offset = MIN(map->max_offset, end);
17150 } else {
17151 /*
17152 * Hole at start of the map.
17153 */
17154 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17155 }
17156 } else {
17157 if (map_entry->vme_next == vm_map_to_entry(map)) {
17158 /*
17159 * Hole at the end of the map.
17160 */
17161 curr_e_offset = MIN(map->max_offset, end);
17162 } else {
17163 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17164 }
17165 }
17166
17167 assert(curr_e_offset >= curr_s_offset);
17168
17169 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17170
17171 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17172
17173 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17174
17175 curr_s_offset = curr_e_offset;
17176
17177 info_idx += num_pages;
17178
17179 continue;
17180 }
17181
17182 /* compute offset from this map entry's start */
17183 offset_in_object = curr_s_offset - map_entry->vme_start;
17184
17185 /* compute offset into this map entry's object (or submap) */
17186 offset_in_object += VME_OFFSET(map_entry);
17187
17188 if (map_entry->is_sub_map) {
17189 vm_map_t sub_map = VM_MAP_NULL;
17190 vm_page_info_t submap_info = 0;
17191 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17192
17193 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17194
17195 submap_s_offset = offset_in_object;
17196 submap_e_offset = submap_s_offset + range_len;
17197
17198 sub_map = VME_SUBMAP(map_entry);
17199
17200 vm_map_reference(sub_map);
17201 vm_map_unlock_read(map);
17202
17203 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17204
17205 retval = vm_map_page_range_info_internal(sub_map,
17206 submap_s_offset,
17207 submap_e_offset,
17208 VM_PAGE_INFO_BASIC,
17209 (vm_page_info_t) submap_info,
17210 count);
17211
17212 assert(retval == KERN_SUCCESS);
17213
17214 vm_map_lock_read(map);
17215 vm_map_deallocate(sub_map);
17216
17217 /* Move the "info" index by the number of pages we inspected.*/
17218 info_idx += range_len >> PAGE_SHIFT;
17219
17220 /* Move our current offset by the size of the range we inspected.*/
17221 curr_s_offset += range_len;
17222
17223 continue;
17224 }
17225
17226 object = VME_OBJECT(map_entry);
17227 if (object == VM_OBJECT_NULL) {
17228 /*
17229 * We don't have an object here and, hence,
17230 * no pages to inspect. We'll fill up the
17231 * info structure appropriately.
17232 */
17233
17234 curr_e_offset = MIN(map_entry->vme_end, end);
17235
17236 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17237
17238 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17239
17240 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17241
17242 curr_s_offset = curr_e_offset;
17243
17244 info_idx += num_pages;
17245
17246 continue;
17247 }
17248
17249 if (do_region_footprint) {
17250 int pmap_disp;
17251
17252 disposition = 0;
17253 pmap_disp = 0;
17254 if (map->has_corpse_footprint) {
17255 /*
17256 * Query the page info data we saved
17257 * while forking the corpse.
17258 */
17259 vm_map_corpse_footprint_query_page_info(
17260 map,
17261 curr_s_offset,
17262 &pmap_disp);
17263 } else {
17264 /*
17265 * Query the pmap.
17266 */
17267 pmap_query_page_info(map->pmap,
17268 curr_s_offset,
17269 &pmap_disp);
17270 }
17271 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17272 /* && not tagged as no-footprint? */
17273 VM_OBJECT_OWNER(object) != NULL &&
17274 VM_OBJECT_OWNER(object)->map == map) {
17275 if ((((curr_s_offset
17276 - map_entry->vme_start
17277 + VME_OFFSET(map_entry))
17278 / PAGE_SIZE) <
17279 (object->resident_page_count +
17280 vm_compressor_pager_get_count(object->pager)))) {
17281 /*
17282 * Non-volatile purgeable object owned
17283 * by this task: report the first
17284 * "#resident + #compressed" pages as
17285 * "resident" (to show that they
17286 * contribute to the footprint) but not
17287 * "dirty" (to avoid double-counting
17288 * with the fake "non-volatile" region
17289 * we'll report at the end of the
17290 * address space to account for all
17291 * (mapped or not) non-volatile memory
17292 * owned by this task.
17293 */
17294 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17295 }
17296 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17297 object->purgable == VM_PURGABLE_EMPTY) &&
17298 /* && not tagged as no-footprint? */
17299 VM_OBJECT_OWNER(object) != NULL &&
17300 VM_OBJECT_OWNER(object)->map == map) {
17301 if ((((curr_s_offset
17302 - map_entry->vme_start
17303 + VME_OFFSET(map_entry))
17304 / PAGE_SIZE) <
17305 object->wired_page_count)) {
17306 /*
17307 * Volatile|empty purgeable object owned
17308 * by this task: report the first
17309 * "#wired" pages as "resident" (to
17310 * show that they contribute to the
17311 * footprint) but not "dirty" (to avoid
17312 * double-counting with the fake
17313 * "non-volatile" region we'll report
17314 * at the end of the address space to
17315 * account for all (mapped or not)
17316 * non-volatile memory owned by this
17317 * task.
17318 */
17319 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17320 }
17321 } else if (map_entry->iokit_acct &&
17322 object->internal &&
17323 object->purgable == VM_PURGABLE_DENY) {
17324 /*
17325 * Non-purgeable IOKit memory: phys_footprint
17326 * includes the entire virtual mapping.
17327 */
17328 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17329 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17330 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17331 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17332 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17333 /* alternate accounting */
17334 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17335 if (map->pmap->footprint_was_suspended ||
17336 /*
17337 * XXX corpse does not know if original
17338 * pmap had its footprint suspended...
17339 */
17340 map->has_corpse_footprint) {
17341 /*
17342 * The assertion below can fail if dyld
17343 * suspended footprint accounting
17344 * while doing some adjustments to
17345 * this page; the mapping would say
17346 * "use pmap accounting" but the page
17347 * would be marked "alternate
17348 * accounting".
17349 */
17350 } else
17351 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17352 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17353 pmap_disp = 0;
17354 } else {
17355 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17356 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17357 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17358 disposition |= VM_PAGE_QUERY_PAGE_REF;
17359 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17360 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17361 } else {
17362 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17363 }
17364 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17365 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17366 }
17367 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17368 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17369 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17370 }
17371 }
17372 switch (flavor) {
17373 case VM_PAGE_INFO_BASIC:
17374 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17375 basic_info->disposition = disposition;
17376 basic_info->ref_count = 1;
17377 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17378 basic_info->offset = 0;
17379 basic_info->depth = 0;
17380
17381 info_idx++;
17382 break;
17383 }
17384 curr_s_offset += PAGE_SIZE;
17385 continue;
17386 }
17387
17388 vm_object_reference(object);
17389 /*
17390 * Shared mode -- so we can allow other readers
17391 * to grab the lock too.
17392 */
17393 vm_object_lock_shared(object);
17394
17395 curr_e_offset = MIN(map_entry->vme_end, end);
17396
17397 vm_map_unlock_read(map);
17398
17399 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17400
17401 curr_object = object;
17402
17403 for (; curr_s_offset < curr_e_offset;) {
17404 if (object == curr_object) {
17405 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17406 } else {
17407 ref_count = curr_object->ref_count;
17408 }
17409
17410 curr_offset_in_object = offset_in_object;
17411
17412 for (;;) {
17413 m = vm_page_lookup(curr_object, curr_offset_in_object);
17414
17415 if (m != VM_PAGE_NULL) {
17416 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17417 break;
17418 } else {
17419 if (curr_object->internal &&
17420 curr_object->alive &&
17421 !curr_object->terminating &&
17422 curr_object->pager_ready) {
17423 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17424 == VM_EXTERNAL_STATE_EXISTS) {
17425 /* the pager has that page */
17426 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17427 break;
17428 }
17429 }
17430
17431 /*
17432 * Go down the VM object shadow chain until we find the page
17433 * we're looking for.
17434 */
17435
17436 if (curr_object->shadow != VM_OBJECT_NULL) {
17437 vm_object_t shadow = VM_OBJECT_NULL;
17438
17439 curr_offset_in_object += curr_object->vo_shadow_offset;
17440 shadow = curr_object->shadow;
17441
17442 vm_object_lock_shared(shadow);
17443 vm_object_unlock(curr_object);
17444
17445 curr_object = shadow;
17446 depth++;
17447 continue;
17448 } else {
17449 break;
17450 }
17451 }
17452 }
17453
17454 /* The ref_count is not strictly accurate, it measures the number */
17455 /* of entities holding a ref on the object, they may not be mapping */
17456 /* the object or may not be mapping the section holding the */
17457 /* target page but its still a ball park number and though an over- */
17458 /* count, it picks up the copy-on-write cases */
17459
17460 /* We could also get a picture of page sharing from pmap_attributes */
17461 /* but this would under count as only faulted-in mappings would */
17462 /* show up. */
17463
17464 if ((curr_object == object) && curr_object->shadow) {
17465 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17466 }
17467
17468 if (!curr_object->internal) {
17469 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17470 }
17471
17472 if (m != VM_PAGE_NULL) {
17473 if (m->vmp_fictitious) {
17474 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17475 } else {
17476 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17477 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17478 }
17479
17480 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17481 disposition |= VM_PAGE_QUERY_PAGE_REF;
17482 }
17483
17484 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17485 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17486 }
17487
17488 if (m->vmp_cs_validated) {
17489 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17490 }
17491 if (m->vmp_cs_tainted) {
17492 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17493 }
17494 if (m->vmp_cs_nx) {
17495 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17496 }
17497 if (m->vmp_reusable || curr_object->all_reusable) {
17498 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17499 }
17500 }
17501 }
17502
17503 switch (flavor) {
17504 case VM_PAGE_INFO_BASIC:
17505 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17506 basic_info->disposition = disposition;
17507 basic_info->ref_count = ref_count;
17508 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17509 VM_KERNEL_ADDRPERM(curr_object);
17510 basic_info->offset =
17511 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17512 basic_info->depth = depth;
17513
17514 info_idx++;
17515 break;
17516 }
17517
17518 disposition = 0;
17519 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17520
17521 /*
17522 * Move to next offset in the range and in our object.
17523 */
17524 curr_s_offset += PAGE_SIZE;
17525 offset_in_object += PAGE_SIZE;
17526 curr_offset_in_object = offset_in_object;
17527
17528 if (curr_object != object) {
17529 vm_object_unlock(curr_object);
17530
17531 curr_object = object;
17532
17533 vm_object_lock_shared(curr_object);
17534 } else {
17535 vm_object_lock_yield_shared(curr_object);
17536 }
17537 }
17538
17539 vm_object_unlock(curr_object);
17540 vm_object_deallocate(curr_object);
17541
17542 vm_map_lock_read(map);
17543 }
17544
17545 vm_map_unlock_read(map);
17546 return retval;
17547 }
17548
17549 /*
17550 * vm_map_msync
17551 *
17552 * Synchronises the memory range specified with its backing store
17553 * image by either flushing or cleaning the contents to the appropriate
17554 * memory manager engaging in a memory object synchronize dialog with
17555 * the manager. The client doesn't return until the manager issues
17556 * m_o_s_completed message. MIG Magically converts user task parameter
17557 * to the task's address map.
17558 *
17559 * interpretation of sync_flags
17560 * VM_SYNC_INVALIDATE - discard pages, only return precious
17561 * pages to manager.
17562 *
17563 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17564 * - discard pages, write dirty or precious
17565 * pages back to memory manager.
17566 *
17567 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17568 * - write dirty or precious pages back to
17569 * the memory manager.
17570 *
17571 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17572 * is a hole in the region, and we would
17573 * have returned KERN_SUCCESS, return
17574 * KERN_INVALID_ADDRESS instead.
17575 *
17576 * NOTE
17577 * The memory object attributes have not yet been implemented, this
17578 * function will have to deal with the invalidate attribute
17579 *
17580 * RETURNS
17581 * KERN_INVALID_TASK Bad task parameter
17582 * KERN_INVALID_ARGUMENT both sync and async were specified.
17583 * KERN_SUCCESS The usual.
17584 * KERN_INVALID_ADDRESS There was a hole in the region.
17585 */
17586
17587 kern_return_t
17588 vm_map_msync(
17589 vm_map_t map,
17590 vm_map_address_t address,
17591 vm_map_size_t size,
17592 vm_sync_t sync_flags)
17593 {
17594 vm_map_entry_t entry;
17595 vm_map_size_t amount_left;
17596 vm_object_offset_t offset;
17597 boolean_t do_sync_req;
17598 boolean_t had_hole = FALSE;
17599 vm_map_offset_t pmap_offset;
17600
17601 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17602 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17603 return KERN_INVALID_ARGUMENT;
17604 }
17605
17606 /*
17607 * align address and size on page boundaries
17608 */
17609 size = (vm_map_round_page(address + size,
17610 VM_MAP_PAGE_MASK(map)) -
17611 vm_map_trunc_page(address,
17612 VM_MAP_PAGE_MASK(map)));
17613 address = vm_map_trunc_page(address,
17614 VM_MAP_PAGE_MASK(map));
17615
17616 if (map == VM_MAP_NULL) {
17617 return KERN_INVALID_TASK;
17618 }
17619
17620 if (size == 0) {
17621 return KERN_SUCCESS;
17622 }
17623
17624 amount_left = size;
17625
17626 while (amount_left > 0) {
17627 vm_object_size_t flush_size;
17628 vm_object_t object;
17629
17630 vm_map_lock(map);
17631 if (!vm_map_lookup_entry(map,
17632 address,
17633 &entry)) {
17634 vm_map_size_t skip;
17635
17636 /*
17637 * hole in the address map.
17638 */
17639 had_hole = TRUE;
17640
17641 if (sync_flags & VM_SYNC_KILLPAGES) {
17642 /*
17643 * For VM_SYNC_KILLPAGES, there should be
17644 * no holes in the range, since we couldn't
17645 * prevent someone else from allocating in
17646 * that hole and we wouldn't want to "kill"
17647 * their pages.
17648 */
17649 vm_map_unlock(map);
17650 break;
17651 }
17652
17653 /*
17654 * Check for empty map.
17655 */
17656 if (entry == vm_map_to_entry(map) &&
17657 entry->vme_next == entry) {
17658 vm_map_unlock(map);
17659 break;
17660 }
17661 /*
17662 * Check that we don't wrap and that
17663 * we have at least one real map entry.
17664 */
17665 if ((map->hdr.nentries == 0) ||
17666 (entry->vme_next->vme_start < address)) {
17667 vm_map_unlock(map);
17668 break;
17669 }
17670 /*
17671 * Move up to the next entry if needed
17672 */
17673 skip = (entry->vme_next->vme_start - address);
17674 if (skip >= amount_left) {
17675 amount_left = 0;
17676 } else {
17677 amount_left -= skip;
17678 }
17679 address = entry->vme_next->vme_start;
17680 vm_map_unlock(map);
17681 continue;
17682 }
17683
17684 offset = address - entry->vme_start;
17685 pmap_offset = address;
17686
17687 /*
17688 * do we have more to flush than is contained in this
17689 * entry ?
17690 */
17691 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17692 flush_size = entry->vme_end -
17693 (entry->vme_start + offset);
17694 } else {
17695 flush_size = amount_left;
17696 }
17697 amount_left -= flush_size;
17698 address += flush_size;
17699
17700 if (entry->is_sub_map == TRUE) {
17701 vm_map_t local_map;
17702 vm_map_offset_t local_offset;
17703
17704 local_map = VME_SUBMAP(entry);
17705 local_offset = VME_OFFSET(entry);
17706 vm_map_reference(local_map);
17707 vm_map_unlock(map);
17708 if (vm_map_msync(
17709 local_map,
17710 local_offset,
17711 flush_size,
17712 sync_flags) == KERN_INVALID_ADDRESS) {
17713 had_hole = TRUE;
17714 }
17715 vm_map_deallocate(local_map);
17716 continue;
17717 }
17718 object = VME_OBJECT(entry);
17719
17720 /*
17721 * We can't sync this object if the object has not been
17722 * created yet
17723 */
17724 if (object == VM_OBJECT_NULL) {
17725 vm_map_unlock(map);
17726 continue;
17727 }
17728 offset += VME_OFFSET(entry);
17729
17730 vm_object_lock(object);
17731
17732 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17733 int kill_pages = 0;
17734 boolean_t reusable_pages = FALSE;
17735
17736 if (sync_flags & VM_SYNC_KILLPAGES) {
17737 if (((object->ref_count == 1) ||
17738 ((object->copy_strategy !=
17739 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17740 (object->copy == VM_OBJECT_NULL))) &&
17741 (object->shadow == VM_OBJECT_NULL)) {
17742 if (object->ref_count != 1) {
17743 vm_page_stats_reusable.free_shared++;
17744 }
17745 kill_pages = 1;
17746 } else {
17747 kill_pages = -1;
17748 }
17749 }
17750 if (kill_pages != -1) {
17751 vm_object_deactivate_pages(
17752 object,
17753 offset,
17754 (vm_object_size_t) flush_size,
17755 kill_pages,
17756 reusable_pages,
17757 map->pmap,
17758 pmap_offset);
17759 }
17760 vm_object_unlock(object);
17761 vm_map_unlock(map);
17762 continue;
17763 }
17764 /*
17765 * We can't sync this object if there isn't a pager.
17766 * Don't bother to sync internal objects, since there can't
17767 * be any "permanent" storage for these objects anyway.
17768 */
17769 if ((object->pager == MEMORY_OBJECT_NULL) ||
17770 (object->internal) || (object->private)) {
17771 vm_object_unlock(object);
17772 vm_map_unlock(map);
17773 continue;
17774 }
17775 /*
17776 * keep reference on the object until syncing is done
17777 */
17778 vm_object_reference_locked(object);
17779 vm_object_unlock(object);
17780
17781 vm_map_unlock(map);
17782
17783 do_sync_req = vm_object_sync(object,
17784 offset,
17785 flush_size,
17786 sync_flags & VM_SYNC_INVALIDATE,
17787 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17788 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17789 sync_flags & VM_SYNC_SYNCHRONOUS);
17790
17791 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17792 /*
17793 * clear out the clustering and read-ahead hints
17794 */
17795 vm_object_lock(object);
17796
17797 object->pages_created = 0;
17798 object->pages_used = 0;
17799 object->sequential = 0;
17800 object->last_alloc = 0;
17801
17802 vm_object_unlock(object);
17803 }
17804 vm_object_deallocate(object);
17805 } /* while */
17806
17807 /* for proper msync() behaviour */
17808 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17809 return KERN_INVALID_ADDRESS;
17810 }
17811
17812 return KERN_SUCCESS;
17813 }/* vm_msync */
17814
17815 /*
17816 * Routine: convert_port_entry_to_map
17817 * Purpose:
17818 * Convert from a port specifying an entry or a task
17819 * to a map. Doesn't consume the port ref; produces a map ref,
17820 * which may be null. Unlike convert_port_to_map, the
17821 * port may be task or a named entry backed.
17822 * Conditions:
17823 * Nothing locked.
17824 */
17825
17826
17827 vm_map_t
17828 convert_port_entry_to_map(
17829 ipc_port_t port)
17830 {
17831 vm_map_t map;
17832 vm_named_entry_t named_entry;
17833 uint32_t try_failed_count = 0;
17834
17835 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17836 while (TRUE) {
17837 ip_lock(port);
17838 if (ip_active(port) && (ip_kotype(port)
17839 == IKOT_NAMED_ENTRY)) {
17840 named_entry =
17841 (vm_named_entry_t) ip_get_kobject(port);
17842 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17843 ip_unlock(port);
17844
17845 try_failed_count++;
17846 mutex_pause(try_failed_count);
17847 continue;
17848 }
17849 named_entry->ref_count++;
17850 lck_mtx_unlock(&(named_entry)->Lock);
17851 ip_unlock(port);
17852 if ((named_entry->is_sub_map) &&
17853 (named_entry->protection
17854 & VM_PROT_WRITE)) {
17855 map = named_entry->backing.map;
17856 } else {
17857 mach_destroy_memory_entry(port);
17858 return VM_MAP_NULL;
17859 }
17860 vm_map_reference_swap(map);
17861 mach_destroy_memory_entry(port);
17862 break;
17863 } else {
17864 return VM_MAP_NULL;
17865 }
17866 }
17867 } else {
17868 map = convert_port_to_map(port);
17869 }
17870
17871 return map;
17872 }
17873
17874 /*
17875 * Routine: convert_port_entry_to_object
17876 * Purpose:
17877 * Convert from a port specifying a named entry to an
17878 * object. Doesn't consume the port ref; produces a map ref,
17879 * which may be null.
17880 * Conditions:
17881 * Nothing locked.
17882 */
17883
17884
17885 vm_object_t
17886 convert_port_entry_to_object(
17887 ipc_port_t port)
17888 {
17889 vm_object_t object = VM_OBJECT_NULL;
17890 vm_named_entry_t named_entry;
17891 uint32_t try_failed_count = 0;
17892
17893 if (IP_VALID(port) &&
17894 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17895 try_again:
17896 ip_lock(port);
17897 if (ip_active(port) &&
17898 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17899 named_entry = (vm_named_entry_t) ip_get_kobject(port);
17900 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17901 ip_unlock(port);
17902 try_failed_count++;
17903 mutex_pause(try_failed_count);
17904 goto try_again;
17905 }
17906 named_entry->ref_count++;
17907 lck_mtx_unlock(&(named_entry)->Lock);
17908 ip_unlock(port);
17909 if (!(named_entry->is_sub_map) &&
17910 !(named_entry->is_copy) &&
17911 (named_entry->protection & VM_PROT_WRITE)) {
17912 object = named_entry->backing.object;
17913 vm_object_reference(object);
17914 }
17915 mach_destroy_memory_entry(port);
17916 }
17917 }
17918
17919 return object;
17920 }
17921
17922 /*
17923 * Export routines to other components for the things we access locally through
17924 * macros.
17925 */
17926 #undef current_map
17927 vm_map_t
17928 current_map(void)
17929 {
17930 return current_map_fast();
17931 }
17932
17933 /*
17934 * vm_map_reference:
17935 *
17936 * Most code internal to the osfmk will go through a
17937 * macro defining this. This is always here for the
17938 * use of other kernel components.
17939 */
17940 #undef vm_map_reference
17941 void
17942 vm_map_reference(
17943 vm_map_t map)
17944 {
17945 if (map == VM_MAP_NULL) {
17946 return;
17947 }
17948
17949 lck_mtx_lock(&map->s_lock);
17950 #if TASK_SWAPPER
17951 assert(map->res_count > 0);
17952 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17953 map->res_count++;
17954 #endif
17955 os_ref_retain_locked(&map->map_refcnt);
17956 lck_mtx_unlock(&map->s_lock);
17957 }
17958
17959 /*
17960 * vm_map_deallocate:
17961 *
17962 * Removes a reference from the specified map,
17963 * destroying it if no references remain.
17964 * The map should not be locked.
17965 */
17966 void
17967 vm_map_deallocate(
17968 vm_map_t map)
17969 {
17970 unsigned int ref;
17971
17972 if (map == VM_MAP_NULL) {
17973 return;
17974 }
17975
17976 lck_mtx_lock(&map->s_lock);
17977 ref = os_ref_release_locked(&map->map_refcnt);
17978 if (ref > 0) {
17979 vm_map_res_deallocate(map);
17980 lck_mtx_unlock(&map->s_lock);
17981 return;
17982 }
17983 assert(os_ref_get_count(&map->map_refcnt) == 0);
17984 lck_mtx_unlock(&map->s_lock);
17985
17986 #if TASK_SWAPPER
17987 /*
17988 * The map residence count isn't decremented here because
17989 * the vm_map_delete below will traverse the entire map,
17990 * deleting entries, and the residence counts on objects
17991 * and sharing maps will go away then.
17992 */
17993 #endif
17994
17995 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17996 }
17997
17998
17999 void
18000 vm_map_disable_NX(vm_map_t map)
18001 {
18002 if (map == NULL) {
18003 return;
18004 }
18005 if (map->pmap == NULL) {
18006 return;
18007 }
18008
18009 pmap_disable_NX(map->pmap);
18010 }
18011
18012 void
18013 vm_map_disallow_data_exec(vm_map_t map)
18014 {
18015 if (map == NULL) {
18016 return;
18017 }
18018
18019 map->map_disallow_data_exec = TRUE;
18020 }
18021
18022 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
18023 * more descriptive.
18024 */
18025 void
18026 vm_map_set_32bit(vm_map_t map)
18027 {
18028 #if defined(__arm__) || defined(__arm64__)
18029 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
18030 #else
18031 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
18032 #endif
18033 }
18034
18035
18036 void
18037 vm_map_set_64bit(vm_map_t map)
18038 {
18039 #if defined(__arm__) || defined(__arm64__)
18040 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18041 #else
18042 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18043 #endif
18044 }
18045
18046 /*
18047 * Expand the maximum size of an existing map to the maximum supported.
18048 */
18049 void
18050 vm_map_set_jumbo(vm_map_t map)
18051 {
18052 #if defined (__arm64__)
18053 vm_map_set_max_addr(map, ~0);
18054 #else /* arm64 */
18055 (void) map;
18056 #endif
18057 }
18058
18059 /*
18060 * This map has a JIT entitlement
18061 */
18062 void
18063 vm_map_set_jit_entitled(vm_map_t map)
18064 {
18065 #if defined (__arm64__)
18066 pmap_set_jit_entitled(map->pmap);
18067 #else /* arm64 */
18068 (void) map;
18069 #endif
18070 }
18071
18072 /*
18073 * Expand the maximum size of an existing map.
18074 */
18075 void
18076 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18077 {
18078 #if defined(__arm64__)
18079 vm_map_offset_t max_supported_offset = 0;
18080 vm_map_offset_t old_max_offset = map->max_offset;
18081 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18082
18083 new_max_offset = trunc_page(new_max_offset);
18084
18085 /* The address space cannot be shrunk using this routine. */
18086 if (old_max_offset >= new_max_offset) {
18087 return;
18088 }
18089
18090 if (max_supported_offset < new_max_offset) {
18091 new_max_offset = max_supported_offset;
18092 }
18093
18094 map->max_offset = new_max_offset;
18095
18096 if (map->holes_list->prev->vme_end == old_max_offset) {
18097 /*
18098 * There is already a hole at the end of the map; simply make it bigger.
18099 */
18100 map->holes_list->prev->vme_end = map->max_offset;
18101 } else {
18102 /*
18103 * There is no hole at the end, so we need to create a new hole
18104 * for the new empty space we're creating.
18105 */
18106 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18107 new_hole->start = old_max_offset;
18108 new_hole->end = map->max_offset;
18109 new_hole->prev = map->holes_list->prev;
18110 new_hole->next = (struct vm_map_entry *)map->holes_list;
18111 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18112 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18113 }
18114 #else
18115 (void)map;
18116 (void)new_max_offset;
18117 #endif
18118 }
18119
18120 vm_map_offset_t
18121 vm_compute_max_offset(boolean_t is64)
18122 {
18123 #if defined(__arm__) || defined(__arm64__)
18124 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18125 #else
18126 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18127 #endif
18128 }
18129
18130 void
18131 vm_map_get_max_aslr_slide_section(
18132 vm_map_t map __unused,
18133 int64_t *max_sections,
18134 int64_t *section_size)
18135 {
18136 #if defined(__arm64__)
18137 *max_sections = 3;
18138 *section_size = ARM_TT_TWIG_SIZE;
18139 #else
18140 *max_sections = 1;
18141 *section_size = 0;
18142 #endif
18143 }
18144
18145 uint64_t
18146 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18147 {
18148 #if defined(__arm64__)
18149 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18150 * limited embedded address space; this is also meant to minimize pmap
18151 * memory usage on 16KB page systems.
18152 */
18153 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18154 #else
18155 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18156 #endif
18157 }
18158
18159 uint64_t
18160 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18161 {
18162 #if defined(__arm64__)
18163 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18164 * of independent entropy on 16KB page systems.
18165 */
18166 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18167 #else
18168 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18169 #endif
18170 }
18171
18172 #ifndef __arm__
18173 boolean_t
18174 vm_map_is_64bit(
18175 vm_map_t map)
18176 {
18177 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18178 }
18179 #endif
18180
18181 boolean_t
18182 vm_map_has_hard_pagezero(
18183 vm_map_t map,
18184 vm_map_offset_t pagezero_size)
18185 {
18186 /*
18187 * XXX FBDP
18188 * We should lock the VM map (for read) here but we can get away
18189 * with it for now because there can't really be any race condition:
18190 * the VM map's min_offset is changed only when the VM map is created
18191 * and when the zero page is established (when the binary gets loaded),
18192 * and this routine gets called only when the task terminates and the
18193 * VM map is being torn down, and when a new map is created via
18194 * load_machfile()/execve().
18195 */
18196 return map->min_offset >= pagezero_size;
18197 }
18198
18199 /*
18200 * Raise a VM map's maximun offset.
18201 */
18202 kern_return_t
18203 vm_map_raise_max_offset(
18204 vm_map_t map,
18205 vm_map_offset_t new_max_offset)
18206 {
18207 kern_return_t ret;
18208
18209 vm_map_lock(map);
18210 ret = KERN_INVALID_ADDRESS;
18211
18212 if (new_max_offset >= map->max_offset) {
18213 if (!vm_map_is_64bit(map)) {
18214 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18215 map->max_offset = new_max_offset;
18216 ret = KERN_SUCCESS;
18217 }
18218 } else {
18219 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18220 map->max_offset = new_max_offset;
18221 ret = KERN_SUCCESS;
18222 }
18223 }
18224 }
18225
18226 vm_map_unlock(map);
18227 return ret;
18228 }
18229
18230
18231 /*
18232 * Raise a VM map's minimum offset.
18233 * To strictly enforce "page zero" reservation.
18234 */
18235 kern_return_t
18236 vm_map_raise_min_offset(
18237 vm_map_t map,
18238 vm_map_offset_t new_min_offset)
18239 {
18240 vm_map_entry_t first_entry;
18241
18242 new_min_offset = vm_map_round_page(new_min_offset,
18243 VM_MAP_PAGE_MASK(map));
18244
18245 vm_map_lock(map);
18246
18247 if (new_min_offset < map->min_offset) {
18248 /*
18249 * Can't move min_offset backwards, as that would expose
18250 * a part of the address space that was previously, and for
18251 * possibly good reasons, inaccessible.
18252 */
18253 vm_map_unlock(map);
18254 return KERN_INVALID_ADDRESS;
18255 }
18256 if (new_min_offset >= map->max_offset) {
18257 /* can't go beyond the end of the address space */
18258 vm_map_unlock(map);
18259 return KERN_INVALID_ADDRESS;
18260 }
18261
18262 first_entry = vm_map_first_entry(map);
18263 if (first_entry != vm_map_to_entry(map) &&
18264 first_entry->vme_start < new_min_offset) {
18265 /*
18266 * Some memory was already allocated below the new
18267 * minimun offset. It's too late to change it now...
18268 */
18269 vm_map_unlock(map);
18270 return KERN_NO_SPACE;
18271 }
18272
18273 map->min_offset = new_min_offset;
18274
18275 assert(map->holes_list);
18276 map->holes_list->start = new_min_offset;
18277 assert(new_min_offset < map->holes_list->end);
18278
18279 vm_map_unlock(map);
18280
18281 return KERN_SUCCESS;
18282 }
18283
18284 /*
18285 * Set the limit on the maximum amount of user wired memory allowed for this map.
18286 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18287 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18288 * don't have to reach over to the BSD data structures.
18289 */
18290
18291 void
18292 vm_map_set_user_wire_limit(vm_map_t map,
18293 vm_size_t limit)
18294 {
18295 map->user_wire_limit = limit;
18296 }
18297
18298
18299 void
18300 vm_map_switch_protect(vm_map_t map,
18301 boolean_t val)
18302 {
18303 vm_map_lock(map);
18304 map->switch_protect = val;
18305 vm_map_unlock(map);
18306 }
18307
18308 /*
18309 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18310 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18311 * bump both counters.
18312 */
18313 void
18314 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18315 {
18316 pmap_t pmap = vm_map_pmap(map);
18317
18318 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18319 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18320 }
18321
18322 void
18323 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18324 {
18325 pmap_t pmap = vm_map_pmap(map);
18326
18327 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18328 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18329 }
18330
18331 /* Add (generate) code signature for memory range */
18332 #if CONFIG_DYNAMIC_CODE_SIGNING
18333 kern_return_t
18334 vm_map_sign(vm_map_t map,
18335 vm_map_offset_t start,
18336 vm_map_offset_t end)
18337 {
18338 vm_map_entry_t entry;
18339 vm_page_t m;
18340 vm_object_t object;
18341
18342 /*
18343 * Vet all the input parameters and current type and state of the
18344 * underlaying object. Return with an error if anything is amiss.
18345 */
18346 if (map == VM_MAP_NULL) {
18347 return KERN_INVALID_ARGUMENT;
18348 }
18349
18350 vm_map_lock_read(map);
18351
18352 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18353 /*
18354 * Must pass a valid non-submap address.
18355 */
18356 vm_map_unlock_read(map);
18357 return KERN_INVALID_ADDRESS;
18358 }
18359
18360 if ((entry->vme_start > start) || (entry->vme_end < end)) {
18361 /*
18362 * Map entry doesn't cover the requested range. Not handling
18363 * this situation currently.
18364 */
18365 vm_map_unlock_read(map);
18366 return KERN_INVALID_ARGUMENT;
18367 }
18368
18369 object = VME_OBJECT(entry);
18370 if (object == VM_OBJECT_NULL) {
18371 /*
18372 * Object must already be present or we can't sign.
18373 */
18374 vm_map_unlock_read(map);
18375 return KERN_INVALID_ARGUMENT;
18376 }
18377
18378 vm_object_lock(object);
18379 vm_map_unlock_read(map);
18380
18381 while (start < end) {
18382 uint32_t refmod;
18383
18384 m = vm_page_lookup(object,
18385 start - entry->vme_start + VME_OFFSET(entry));
18386 if (m == VM_PAGE_NULL) {
18387 /* shoud we try to fault a page here? we can probably
18388 * demand it exists and is locked for this request */
18389 vm_object_unlock(object);
18390 return KERN_FAILURE;
18391 }
18392 /* deal with special page status */
18393 if (m->vmp_busy ||
18394 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18395 vm_object_unlock(object);
18396 return KERN_FAILURE;
18397 }
18398
18399 /* Page is OK... now "validate" it */
18400 /* This is the place where we'll call out to create a code
18401 * directory, later */
18402 m->vmp_cs_validated = TRUE;
18403
18404 /* The page is now "clean" for codesigning purposes. That means
18405 * we don't consider it as modified (wpmapped) anymore. But
18406 * we'll disconnect the page so we note any future modification
18407 * attempts. */
18408 m->vmp_wpmapped = FALSE;
18409 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18410
18411 /* Pull the dirty status from the pmap, since we cleared the
18412 * wpmapped bit */
18413 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18414 SET_PAGE_DIRTY(m, FALSE);
18415 }
18416
18417 /* On to the next page */
18418 start += PAGE_SIZE;
18419 }
18420 vm_object_unlock(object);
18421
18422 return KERN_SUCCESS;
18423 }
18424 #endif
18425
18426 kern_return_t
18427 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18428 {
18429 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
18430 vm_map_entry_t next_entry;
18431 kern_return_t kr = KERN_SUCCESS;
18432 vm_map_t zap_map;
18433
18434 vm_map_lock(map);
18435
18436 /*
18437 * We use a "zap_map" to avoid having to unlock
18438 * the "map" in vm_map_delete().
18439 */
18440 zap_map = vm_map_create(PMAP_NULL,
18441 map->min_offset,
18442 map->max_offset,
18443 map->hdr.entries_pageable);
18444
18445 if (zap_map == VM_MAP_NULL) {
18446 return KERN_RESOURCE_SHORTAGE;
18447 }
18448
18449 vm_map_set_page_shift(zap_map,
18450 VM_MAP_PAGE_SHIFT(map));
18451 vm_map_disable_hole_optimization(zap_map);
18452
18453 for (entry = vm_map_first_entry(map);
18454 entry != vm_map_to_entry(map);
18455 entry = next_entry) {
18456 next_entry = entry->vme_next;
18457
18458 if (VME_OBJECT(entry) &&
18459 !entry->is_sub_map &&
18460 (VME_OBJECT(entry)->internal == TRUE) &&
18461 (VME_OBJECT(entry)->ref_count == 1)) {
18462 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18463 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18464
18465 (void)vm_map_delete(map,
18466 entry->vme_start,
18467 entry->vme_end,
18468 VM_MAP_REMOVE_SAVE_ENTRIES,
18469 zap_map);
18470 }
18471 }
18472
18473 vm_map_unlock(map);
18474
18475 /*
18476 * Get rid of the "zap_maps" and all the map entries that
18477 * they may still contain.
18478 */
18479 if (zap_map != VM_MAP_NULL) {
18480 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18481 zap_map = VM_MAP_NULL;
18482 }
18483
18484 return kr;
18485 }
18486
18487
18488 #if DEVELOPMENT || DEBUG
18489
18490 int
18491 vm_map_disconnect_page_mappings(
18492 vm_map_t map,
18493 boolean_t do_unnest)
18494 {
18495 vm_map_entry_t entry;
18496 int page_count = 0;
18497
18498 if (do_unnest == TRUE) {
18499 #ifndef NO_NESTED_PMAP
18500 vm_map_lock(map);
18501
18502 for (entry = vm_map_first_entry(map);
18503 entry != vm_map_to_entry(map);
18504 entry = entry->vme_next) {
18505 if (entry->is_sub_map && entry->use_pmap) {
18506 /*
18507 * Make sure the range between the start of this entry and
18508 * the end of this entry is no longer nested, so that
18509 * we will only remove mappings from the pmap in use by this
18510 * this task
18511 */
18512 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18513 }
18514 }
18515 vm_map_unlock(map);
18516 #endif
18517 }
18518 vm_map_lock_read(map);
18519
18520 page_count = map->pmap->stats.resident_count;
18521
18522 for (entry = vm_map_first_entry(map);
18523 entry != vm_map_to_entry(map);
18524 entry = entry->vme_next) {
18525 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18526 (VME_OBJECT(entry)->phys_contiguous))) {
18527 continue;
18528 }
18529 if (entry->is_sub_map) {
18530 assert(!entry->use_pmap);
18531 }
18532
18533 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18534 }
18535 vm_map_unlock_read(map);
18536
18537 return page_count;
18538 }
18539
18540 #endif
18541
18542
18543 #if CONFIG_FREEZE
18544
18545
18546 int c_freezer_swapout_page_count;
18547 int c_freezer_compression_count = 0;
18548 AbsoluteTime c_freezer_last_yield_ts = 0;
18549
18550 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18551 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18552
18553 kern_return_t
18554 vm_map_freeze(
18555 task_t task,
18556 unsigned int *purgeable_count,
18557 unsigned int *wired_count,
18558 unsigned int *clean_count,
18559 unsigned int *dirty_count,
18560 unsigned int dirty_budget,
18561 unsigned int *shared_count,
18562 int *freezer_error_code,
18563 boolean_t eval_only)
18564 {
18565 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18566 kern_return_t kr = KERN_SUCCESS;
18567 boolean_t evaluation_phase = TRUE;
18568 vm_object_t cur_shared_object = NULL;
18569 int cur_shared_obj_ref_cnt = 0;
18570 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18571
18572 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18573
18574 /*
18575 * We need the exclusive lock here so that we can
18576 * block any page faults or lookups while we are
18577 * in the middle of freezing this vm map.
18578 */
18579 vm_map_t map = task->map;
18580
18581 vm_map_lock(map);
18582
18583 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18584
18585 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18586 if (vm_compressor_low_on_space()) {
18587 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18588 }
18589
18590 if (vm_swap_low_on_space()) {
18591 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18592 }
18593
18594 kr = KERN_NO_SPACE;
18595 goto done;
18596 }
18597
18598 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18599 /*
18600 * In-memory compressor backing the freezer. No disk.
18601 * So no need to do the evaluation phase.
18602 */
18603 evaluation_phase = FALSE;
18604
18605 if (eval_only == TRUE) {
18606 /*
18607 * We don't support 'eval_only' mode
18608 * in this non-swap config.
18609 */
18610 *freezer_error_code = FREEZER_ERROR_GENERIC;
18611 kr = KERN_INVALID_ARGUMENT;
18612 goto done;
18613 }
18614
18615 c_freezer_compression_count = 0;
18616 clock_get_uptime(&c_freezer_last_yield_ts);
18617 }
18618 again:
18619
18620 for (entry2 = vm_map_first_entry(map);
18621 entry2 != vm_map_to_entry(map);
18622 entry2 = entry2->vme_next) {
18623 vm_object_t src_object = VME_OBJECT(entry2);
18624
18625 if (src_object &&
18626 !entry2->is_sub_map &&
18627 !src_object->phys_contiguous) {
18628 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18629
18630 if (src_object->internal == TRUE) {
18631 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18632 /*
18633 * We skip purgeable objects during evaluation phase only.
18634 * If we decide to freeze this process, we'll explicitly
18635 * purge these objects before we go around again with
18636 * 'evaluation_phase' set to FALSE.
18637 */
18638
18639 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18640 /*
18641 * We want to purge objects that may not belong to this task but are mapped
18642 * in this task alone. Since we already purged this task's purgeable memory
18643 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18644 * on this task's purgeable objects. Hence the check for only volatile objects.
18645 */
18646 if (evaluation_phase == FALSE &&
18647 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18648 (src_object->ref_count == 1)) {
18649 vm_object_lock(src_object);
18650 vm_object_purge(src_object, 0);
18651 vm_object_unlock(src_object);
18652 }
18653 continue;
18654 }
18655
18656 /*
18657 * Pages belonging to this object could be swapped to disk.
18658 * Make sure it's not a shared object because we could end
18659 * up just bringing it back in again.
18660 *
18661 * We try to optimize somewhat by checking for objects that are mapped
18662 * more than once within our own map. But we don't do full searches,
18663 * we just look at the entries following our current entry.
18664 */
18665
18666 if (src_object->ref_count > 1) {
18667 if (src_object != cur_shared_object) {
18668 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18669 dirty_shared_count += obj_pages_snapshot;
18670
18671 cur_shared_object = src_object;
18672 cur_shared_obj_ref_cnt = 1;
18673 continue;
18674 } else {
18675 cur_shared_obj_ref_cnt++;
18676 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18677 /*
18678 * Fall through to below and treat this object as private.
18679 * So deduct its pages from our shared total and add it to the
18680 * private total.
18681 */
18682
18683 dirty_shared_count -= obj_pages_snapshot;
18684 dirty_private_count += obj_pages_snapshot;
18685 } else {
18686 continue;
18687 }
18688 }
18689 }
18690
18691
18692 if (src_object->ref_count == 1) {
18693 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18694 }
18695
18696 if (evaluation_phase == TRUE) {
18697 continue;
18698 }
18699 }
18700
18701 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18702 *wired_count += src_object->wired_page_count;
18703
18704 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18705 if (vm_compressor_low_on_space()) {
18706 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18707 }
18708
18709 if (vm_swap_low_on_space()) {
18710 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18711 }
18712
18713 kr = KERN_NO_SPACE;
18714 break;
18715 }
18716 if (paged_out_count >= dirty_budget) {
18717 break;
18718 }
18719 dirty_budget -= paged_out_count;
18720 }
18721 }
18722 }
18723
18724 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18725 if (evaluation_phase) {
18726 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18727
18728 if (dirty_shared_count > shared_pages_threshold) {
18729 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18730 kr = KERN_FAILURE;
18731 goto done;
18732 }
18733
18734 if (dirty_shared_count &&
18735 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18736 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18737 kr = KERN_FAILURE;
18738 goto done;
18739 }
18740
18741 evaluation_phase = FALSE;
18742 dirty_shared_count = dirty_private_count = 0;
18743
18744 c_freezer_compression_count = 0;
18745 clock_get_uptime(&c_freezer_last_yield_ts);
18746
18747 if (eval_only) {
18748 kr = KERN_SUCCESS;
18749 goto done;
18750 }
18751
18752 vm_purgeable_purge_task_owned(task);
18753
18754 goto again;
18755 } else {
18756 kr = KERN_SUCCESS;
18757 }
18758
18759 done:
18760 vm_map_unlock(map);
18761
18762 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18763 vm_object_compressed_freezer_done();
18764
18765 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18766 /*
18767 * reset the counter tracking the # of swapped compressed pages
18768 * because we are now done with this freeze session and task.
18769 */
18770
18771 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18772 c_freezer_swapout_page_count = 0;
18773 }
18774 }
18775 return kr;
18776 }
18777
18778 #endif
18779
18780 /*
18781 * vm_map_entry_should_cow_for_true_share:
18782 *
18783 * Determines if the map entry should be clipped and setup for copy-on-write
18784 * to avoid applying "true_share" to a large VM object when only a subset is
18785 * targeted.
18786 *
18787 * For now, we target only the map entries created for the Objective C
18788 * Garbage Collector, which initially have the following properties:
18789 * - alias == VM_MEMORY_MALLOC
18790 * - wired_count == 0
18791 * - !needs_copy
18792 * and a VM object with:
18793 * - internal
18794 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18795 * - !true_share
18796 * - vo_size == ANON_CHUNK_SIZE
18797 *
18798 * Only non-kernel map entries.
18799 */
18800 boolean_t
18801 vm_map_entry_should_cow_for_true_share(
18802 vm_map_entry_t entry)
18803 {
18804 vm_object_t object;
18805
18806 if (entry->is_sub_map) {
18807 /* entry does not point at a VM object */
18808 return FALSE;
18809 }
18810
18811 if (entry->needs_copy) {
18812 /* already set for copy_on_write: done! */
18813 return FALSE;
18814 }
18815
18816 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18817 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18818 /* not a malloc heap or Obj-C Garbage Collector heap */
18819 return FALSE;
18820 }
18821
18822 if (entry->wired_count) {
18823 /* wired: can't change the map entry... */
18824 vm_counters.should_cow_but_wired++;
18825 return FALSE;
18826 }
18827
18828 object = VME_OBJECT(entry);
18829
18830 if (object == VM_OBJECT_NULL) {
18831 /* no object yet... */
18832 return FALSE;
18833 }
18834
18835 if (!object->internal) {
18836 /* not an internal object */
18837 return FALSE;
18838 }
18839
18840 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18841 /* not the default copy strategy */
18842 return FALSE;
18843 }
18844
18845 if (object->true_share) {
18846 /* already true_share: too late to avoid it */
18847 return FALSE;
18848 }
18849
18850 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18851 object->vo_size != ANON_CHUNK_SIZE) {
18852 /* ... not an object created for the ObjC Garbage Collector */
18853 return FALSE;
18854 }
18855
18856 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18857 object->vo_size != 2048 * 4096) {
18858 /* ... not a "MALLOC_SMALL" heap */
18859 return FALSE;
18860 }
18861
18862 /*
18863 * All the criteria match: we have a large object being targeted for "true_share".
18864 * To limit the adverse side-effects linked with "true_share", tell the caller to
18865 * try and avoid setting up the entire object for "true_share" by clipping the
18866 * targeted range and setting it up for copy-on-write.
18867 */
18868 return TRUE;
18869 }
18870
18871 vm_map_offset_t
18872 vm_map_round_page_mask(
18873 vm_map_offset_t offset,
18874 vm_map_offset_t mask)
18875 {
18876 return VM_MAP_ROUND_PAGE(offset, mask);
18877 }
18878
18879 vm_map_offset_t
18880 vm_map_trunc_page_mask(
18881 vm_map_offset_t offset,
18882 vm_map_offset_t mask)
18883 {
18884 return VM_MAP_TRUNC_PAGE(offset, mask);
18885 }
18886
18887 boolean_t
18888 vm_map_page_aligned(
18889 vm_map_offset_t offset,
18890 vm_map_offset_t mask)
18891 {
18892 return ((offset) & mask) == 0;
18893 }
18894
18895 int
18896 vm_map_page_shift(
18897 vm_map_t map)
18898 {
18899 return VM_MAP_PAGE_SHIFT(map);
18900 }
18901
18902 int
18903 vm_map_page_size(
18904 vm_map_t map)
18905 {
18906 return VM_MAP_PAGE_SIZE(map);
18907 }
18908
18909 vm_map_offset_t
18910 vm_map_page_mask(
18911 vm_map_t map)
18912 {
18913 return VM_MAP_PAGE_MASK(map);
18914 }
18915
18916 kern_return_t
18917 vm_map_set_page_shift(
18918 vm_map_t map,
18919 int pageshift)
18920 {
18921 if (map->hdr.nentries != 0) {
18922 /* too late to change page size */
18923 return KERN_FAILURE;
18924 }
18925
18926 map->hdr.page_shift = pageshift;
18927
18928 return KERN_SUCCESS;
18929 }
18930
18931 kern_return_t
18932 vm_map_query_volatile(
18933 vm_map_t map,
18934 mach_vm_size_t *volatile_virtual_size_p,
18935 mach_vm_size_t *volatile_resident_size_p,
18936 mach_vm_size_t *volatile_compressed_size_p,
18937 mach_vm_size_t *volatile_pmap_size_p,
18938 mach_vm_size_t *volatile_compressed_pmap_size_p)
18939 {
18940 mach_vm_size_t volatile_virtual_size;
18941 mach_vm_size_t volatile_resident_count;
18942 mach_vm_size_t volatile_compressed_count;
18943 mach_vm_size_t volatile_pmap_count;
18944 mach_vm_size_t volatile_compressed_pmap_count;
18945 mach_vm_size_t resident_count;
18946 vm_map_entry_t entry;
18947 vm_object_t object;
18948
18949 /* map should be locked by caller */
18950
18951 volatile_virtual_size = 0;
18952 volatile_resident_count = 0;
18953 volatile_compressed_count = 0;
18954 volatile_pmap_count = 0;
18955 volatile_compressed_pmap_count = 0;
18956
18957 for (entry = vm_map_first_entry(map);
18958 entry != vm_map_to_entry(map);
18959 entry = entry->vme_next) {
18960 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
18961
18962 if (entry->is_sub_map) {
18963 continue;
18964 }
18965 if (!(entry->protection & VM_PROT_WRITE)) {
18966 continue;
18967 }
18968 object = VME_OBJECT(entry);
18969 if (object == VM_OBJECT_NULL) {
18970 continue;
18971 }
18972 if (object->purgable != VM_PURGABLE_VOLATILE &&
18973 object->purgable != VM_PURGABLE_EMPTY) {
18974 continue;
18975 }
18976 if (VME_OFFSET(entry)) {
18977 /*
18978 * If the map entry has been split and the object now
18979 * appears several times in the VM map, we don't want
18980 * to count the object's resident_page_count more than
18981 * once. We count it only for the first one, starting
18982 * at offset 0 and ignore the other VM map entries.
18983 */
18984 continue;
18985 }
18986 resident_count = object->resident_page_count;
18987 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18988 resident_count = 0;
18989 } else {
18990 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18991 }
18992
18993 volatile_virtual_size += entry->vme_end - entry->vme_start;
18994 volatile_resident_count += resident_count;
18995 if (object->pager) {
18996 volatile_compressed_count +=
18997 vm_compressor_pager_get_count(object->pager);
18998 }
18999 pmap_compressed_bytes = 0;
19000 pmap_resident_bytes =
19001 pmap_query_resident(map->pmap,
19002 entry->vme_start,
19003 entry->vme_end,
19004 &pmap_compressed_bytes);
19005 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
19006 volatile_compressed_pmap_count += (pmap_compressed_bytes
19007 / PAGE_SIZE);
19008 }
19009
19010 /* map is still locked on return */
19011
19012 *volatile_virtual_size_p = volatile_virtual_size;
19013 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
19014 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
19015 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
19016 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
19017
19018 return KERN_SUCCESS;
19019 }
19020
19021 void
19022 vm_map_sizes(vm_map_t map,
19023 vm_map_size_t * psize,
19024 vm_map_size_t * pfree,
19025 vm_map_size_t * plargest_free)
19026 {
19027 vm_map_entry_t entry;
19028 vm_map_offset_t prev;
19029 vm_map_size_t free, total_free, largest_free;
19030 boolean_t end;
19031
19032 if (!map) {
19033 *psize = *pfree = *plargest_free = 0;
19034 return;
19035 }
19036 total_free = largest_free = 0;
19037
19038 vm_map_lock_read(map);
19039 if (psize) {
19040 *psize = map->max_offset - map->min_offset;
19041 }
19042
19043 prev = map->min_offset;
19044 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19045 end = (entry == vm_map_to_entry(map));
19046
19047 if (end) {
19048 free = entry->vme_end - prev;
19049 } else {
19050 free = entry->vme_start - prev;
19051 }
19052
19053 total_free += free;
19054 if (free > largest_free) {
19055 largest_free = free;
19056 }
19057
19058 if (end) {
19059 break;
19060 }
19061 prev = entry->vme_end;
19062 }
19063 vm_map_unlock_read(map);
19064 if (pfree) {
19065 *pfree = total_free;
19066 }
19067 if (plargest_free) {
19068 *plargest_free = largest_free;
19069 }
19070 }
19071
19072 #if VM_SCAN_FOR_SHADOW_CHAIN
19073 int vm_map_shadow_max(vm_map_t map);
19074 int
19075 vm_map_shadow_max(
19076 vm_map_t map)
19077 {
19078 int shadows, shadows_max;
19079 vm_map_entry_t entry;
19080 vm_object_t object, next_object;
19081
19082 if (map == NULL) {
19083 return 0;
19084 }
19085
19086 shadows_max = 0;
19087
19088 vm_map_lock_read(map);
19089
19090 for (entry = vm_map_first_entry(map);
19091 entry != vm_map_to_entry(map);
19092 entry = entry->vme_next) {
19093 if (entry->is_sub_map) {
19094 continue;
19095 }
19096 object = VME_OBJECT(entry);
19097 if (object == NULL) {
19098 continue;
19099 }
19100 vm_object_lock_shared(object);
19101 for (shadows = 0;
19102 object->shadow != NULL;
19103 shadows++, object = next_object) {
19104 next_object = object->shadow;
19105 vm_object_lock_shared(next_object);
19106 vm_object_unlock(object);
19107 }
19108 vm_object_unlock(object);
19109 if (shadows > shadows_max) {
19110 shadows_max = shadows;
19111 }
19112 }
19113
19114 vm_map_unlock_read(map);
19115
19116 return shadows_max;
19117 }
19118 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19119
19120 void
19121 vm_commit_pagezero_status(vm_map_t lmap)
19122 {
19123 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19124 }
19125
19126 #if !CONFIG_EMBEDDED
19127 void
19128 vm_map_set_high_start(
19129 vm_map_t map,
19130 vm_map_offset_t high_start)
19131 {
19132 map->vmmap_high_start = high_start;
19133 }
19134 #endif
19135
19136 #if PMAP_CS
19137 kern_return_t
19138 vm_map_entry_cs_associate(
19139 vm_map_t map,
19140 vm_map_entry_t entry,
19141 vm_map_kernel_flags_t vmk_flags)
19142 {
19143 vm_object_t cs_object, cs_shadow;
19144 vm_object_offset_t cs_offset;
19145 void *cs_blobs;
19146 struct vnode *cs_vnode;
19147 kern_return_t cs_ret;
19148
19149 if (map->pmap == NULL ||
19150 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19151 VME_OBJECT(entry) == VM_OBJECT_NULL ||
19152 !(entry->protection & VM_PROT_EXECUTE)) {
19153 return KERN_SUCCESS;
19154 }
19155
19156 vm_map_lock_assert_exclusive(map);
19157
19158 if (entry->used_for_jit) {
19159 cs_ret = pmap_cs_associate(map->pmap,
19160 PMAP_CS_ASSOCIATE_JIT,
19161 entry->vme_start,
19162 entry->vme_end - entry->vme_start);
19163 goto done;
19164 }
19165
19166 if (vmk_flags.vmkf_remap_prot_copy) {
19167 cs_ret = pmap_cs_associate(map->pmap,
19168 PMAP_CS_ASSOCIATE_COW,
19169 entry->vme_start,
19170 entry->vme_end - entry->vme_start);
19171 goto done;
19172 }
19173
19174 vm_object_lock_shared(VME_OBJECT(entry));
19175 cs_offset = VME_OFFSET(entry);
19176 for (cs_object = VME_OBJECT(entry);
19177 (cs_object != VM_OBJECT_NULL &&
19178 !cs_object->code_signed);
19179 cs_object = cs_shadow) {
19180 cs_shadow = cs_object->shadow;
19181 if (cs_shadow != VM_OBJECT_NULL) {
19182 cs_offset += cs_object->vo_shadow_offset;
19183 vm_object_lock_shared(cs_shadow);
19184 }
19185 vm_object_unlock(cs_object);
19186 }
19187 if (cs_object == VM_OBJECT_NULL) {
19188 return KERN_SUCCESS;
19189 }
19190
19191 cs_offset += cs_object->paging_offset;
19192 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19193 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19194 &cs_blobs);
19195 assert(cs_ret == KERN_SUCCESS);
19196 cs_ret = cs_associate_blob_with_mapping(map->pmap,
19197 entry->vme_start,
19198 (entry->vme_end -
19199 entry->vme_start),
19200 cs_offset,
19201 cs_blobs);
19202 vm_object_unlock(cs_object);
19203 cs_object = VM_OBJECT_NULL;
19204
19205 done:
19206 if (cs_ret == KERN_SUCCESS) {
19207 DTRACE_VM2(vm_map_entry_cs_associate_success,
19208 vm_map_offset_t, entry->vme_start,
19209 vm_map_offset_t, entry->vme_end);
19210 if (vm_map_executable_immutable) {
19211 /*
19212 * Prevent this executable
19213 * mapping from being unmapped
19214 * or modified.
19215 */
19216 entry->permanent = TRUE;
19217 }
19218 /*
19219 * pmap says it will validate the
19220 * code-signing validity of pages
19221 * faulted in via this mapping, so
19222 * this map entry should be marked so
19223 * that vm_fault() bypasses code-signing
19224 * validation for faults coming through
19225 * this mapping.
19226 */
19227 entry->pmap_cs_associated = TRUE;
19228 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19229 /*
19230 * pmap won't check the code-signing
19231 * validity of pages faulted in via
19232 * this mapping, so VM should keep
19233 * doing it.
19234 */
19235 DTRACE_VM3(vm_map_entry_cs_associate_off,
19236 vm_map_offset_t, entry->vme_start,
19237 vm_map_offset_t, entry->vme_end,
19238 int, cs_ret);
19239 } else {
19240 /*
19241 * A real error: do not allow
19242 * execution in this mapping.
19243 */
19244 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19245 vm_map_offset_t, entry->vme_start,
19246 vm_map_offset_t, entry->vme_end,
19247 int, cs_ret);
19248 entry->protection &= ~VM_PROT_EXECUTE;
19249 entry->max_protection &= ~VM_PROT_EXECUTE;
19250 }
19251
19252 return cs_ret;
19253 }
19254 #endif /* PMAP_CS */
19255
19256 /*
19257 * FORKED CORPSE FOOTPRINT
19258 *
19259 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19260 * empty since it never ran and never got to fault in any pages.
19261 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19262 * a forked corpse would therefore return very little information.
19263 *
19264 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19265 * to vm_map_fork() to collect footprint information from the original VM map
19266 * and its pmap, and store it in the forked corpse's VM map. That information
19267 * is stored in place of the VM map's "hole list" since we'll never need to
19268 * lookup for holes in the corpse's map.
19269 *
19270 * The corpse's footprint info looks like this:
19271 *
19272 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19273 * as follows:
19274 * +---------------------------------------+
19275 * header-> | cf_size |
19276 * +-------------------+-------------------+
19277 * | cf_last_region | cf_last_zeroes |
19278 * +-------------------+-------------------+
19279 * region1-> | cfr_vaddr |
19280 * +-------------------+-------------------+
19281 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19282 * +---------------------------------------+
19283 * | d4 | d5 | ... |
19284 * +---------------------------------------+
19285 * | ... |
19286 * +-------------------+-------------------+
19287 * | dy | dz | na | na | cfr_vaddr... | <-region2
19288 * +-------------------+-------------------+
19289 * | cfr_vaddr (ctd) | cfr_num_pages |
19290 * +---------------------------------------+
19291 * | d0 | d1 ... |
19292 * +---------------------------------------+
19293 * ...
19294 * +---------------------------------------+
19295 * last region-> | cfr_vaddr |
19296 * +---------------------------------------+
19297 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19298 * +---------------------------------------+
19299 * ...
19300 * +---------------------------------------+
19301 * | dx | dy | dz | na | na | na | na | na |
19302 * +---------------------------------------+
19303 *
19304 * where:
19305 * cf_size: total size of the buffer (rounded to page size)
19306 * cf_last_region: offset in the buffer of the last "region" sub-header
19307 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19308 * of last region
19309 * cfr_vaddr: virtual address of the start of the covered "region"
19310 * cfr_num_pages: number of pages in the covered "region"
19311 * d*: disposition of the page at that virtual address
19312 * Regions in the buffer are word-aligned.
19313 *
19314 * We estimate the size of the buffer based on the number of memory regions
19315 * and the virtual size of the address space. While copying each memory region
19316 * during vm_map_fork(), we also collect the footprint info for that region
19317 * and store it in the buffer, packing it as much as possible (coalescing
19318 * contiguous memory regions to avoid having too many region headers and
19319 * avoiding long streaks of "zero" page dispositions by splitting footprint
19320 * "regions", so the number of regions in the footprint buffer might not match
19321 * the number of memory regions in the address space.
19322 *
19323 * We also have to copy the original task's "nonvolatile" ledgers since that's
19324 * part of the footprint and will need to be reported to any tool asking for
19325 * the footprint information of the forked corpse.
19326 */
19327
19328 uint64_t vm_map_corpse_footprint_count = 0;
19329 uint64_t vm_map_corpse_footprint_size_avg = 0;
19330 uint64_t vm_map_corpse_footprint_size_max = 0;
19331 uint64_t vm_map_corpse_footprint_full = 0;
19332 uint64_t vm_map_corpse_footprint_no_buf = 0;
19333
19334 /*
19335 * vm_map_corpse_footprint_new_region:
19336 * closes the current footprint "region" and creates a new one
19337 *
19338 * Returns NULL if there's not enough space in the buffer for a new region.
19339 */
19340 static struct vm_map_corpse_footprint_region *
19341 vm_map_corpse_footprint_new_region(
19342 struct vm_map_corpse_footprint_header *footprint_header)
19343 {
19344 uintptr_t footprint_edge;
19345 uint32_t new_region_offset;
19346 struct vm_map_corpse_footprint_region *footprint_region;
19347 struct vm_map_corpse_footprint_region *new_footprint_region;
19348
19349 footprint_edge = ((uintptr_t)footprint_header +
19350 footprint_header->cf_size);
19351 footprint_region = ((struct vm_map_corpse_footprint_region *)
19352 ((char *)footprint_header +
19353 footprint_header->cf_last_region));
19354 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19355 footprint_edge);
19356
19357 /* get rid of trailing zeroes in the last region */
19358 assert(footprint_region->cfr_num_pages >=
19359 footprint_header->cf_last_zeroes);
19360 footprint_region->cfr_num_pages -=
19361 footprint_header->cf_last_zeroes;
19362 footprint_header->cf_last_zeroes = 0;
19363
19364 /* reuse this region if it's now empty */
19365 if (footprint_region->cfr_num_pages == 0) {
19366 return footprint_region;
19367 }
19368
19369 /* compute offset of new region */
19370 new_region_offset = footprint_header->cf_last_region;
19371 new_region_offset += sizeof(*footprint_region);
19372 new_region_offset += footprint_region->cfr_num_pages;
19373 new_region_offset = roundup(new_region_offset, sizeof(int));
19374
19375 /* check if we're going over the edge */
19376 if (((uintptr_t)footprint_header +
19377 new_region_offset +
19378 sizeof(*footprint_region)) >=
19379 footprint_edge) {
19380 /* over the edge: no new region */
19381 return NULL;
19382 }
19383
19384 /* adjust offset of last region in header */
19385 footprint_header->cf_last_region = new_region_offset;
19386
19387 new_footprint_region = (struct vm_map_corpse_footprint_region *)
19388 ((char *)footprint_header +
19389 footprint_header->cf_last_region);
19390 new_footprint_region->cfr_vaddr = 0;
19391 new_footprint_region->cfr_num_pages = 0;
19392 /* caller needs to initialize new region */
19393
19394 return new_footprint_region;
19395 }
19396
19397 /*
19398 * vm_map_corpse_footprint_collect:
19399 * collect footprint information for "old_entry" in "old_map" and
19400 * stores it in "new_map"'s vmmap_footprint_info.
19401 */
19402 kern_return_t
19403 vm_map_corpse_footprint_collect(
19404 vm_map_t old_map,
19405 vm_map_entry_t old_entry,
19406 vm_map_t new_map)
19407 {
19408 vm_map_offset_t va;
19409 int disp;
19410 kern_return_t kr;
19411 struct vm_map_corpse_footprint_header *footprint_header;
19412 struct vm_map_corpse_footprint_region *footprint_region;
19413 struct vm_map_corpse_footprint_region *new_footprint_region;
19414 unsigned char *next_disp_p;
19415 uintptr_t footprint_edge;
19416 uint32_t num_pages_tmp;
19417
19418 va = old_entry->vme_start;
19419
19420 vm_map_lock_assert_exclusive(old_map);
19421 vm_map_lock_assert_exclusive(new_map);
19422
19423 assert(new_map->has_corpse_footprint);
19424 assert(!old_map->has_corpse_footprint);
19425 if (!new_map->has_corpse_footprint ||
19426 old_map->has_corpse_footprint) {
19427 /*
19428 * This can only transfer footprint info from a
19429 * map with a live pmap to a map with a corpse footprint.
19430 */
19431 return KERN_NOT_SUPPORTED;
19432 }
19433
19434 if (new_map->vmmap_corpse_footprint == NULL) {
19435 vm_offset_t buf;
19436 vm_size_t buf_size;
19437
19438 buf = 0;
19439 buf_size = (sizeof(*footprint_header) +
19440 (old_map->hdr.nentries
19441 *
19442 (sizeof(*footprint_region) +
19443 +3)) /* potential alignment for each region */
19444 +
19445 ((old_map->size / PAGE_SIZE)
19446 *
19447 sizeof(char))); /* disposition for each page */
19448 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19449 buf_size = round_page(buf_size);
19450
19451 /* limit buffer to 1 page to validate overflow detection */
19452 // buf_size = PAGE_SIZE;
19453
19454 /* limit size to a somewhat sane amount */
19455 #if CONFIG_EMBEDDED
19456 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19457 #else /* CONFIG_EMBEDDED */
19458 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19459 #endif /* CONFIG_EMBEDDED */
19460 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19461 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19462 }
19463
19464 /*
19465 * Allocate the pageable buffer (with a trailing guard page).
19466 * It will be zero-filled on demand.
19467 */
19468 kr = kernel_memory_allocate(kernel_map,
19469 &buf,
19470 (buf_size
19471 + PAGE_SIZE), /* trailing guard page */
19472 0, /* mask */
19473 KMA_PAGEABLE | KMA_GUARD_LAST,
19474 VM_KERN_MEMORY_DIAG);
19475 if (kr != KERN_SUCCESS) {
19476 vm_map_corpse_footprint_no_buf++;
19477 return kr;
19478 }
19479
19480 /* initialize header and 1st region */
19481 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19482 new_map->vmmap_corpse_footprint = footprint_header;
19483
19484 footprint_header->cf_size = buf_size;
19485 footprint_header->cf_last_region =
19486 sizeof(*footprint_header);
19487 footprint_header->cf_last_zeroes = 0;
19488
19489 footprint_region = (struct vm_map_corpse_footprint_region *)
19490 ((char *)footprint_header +
19491 footprint_header->cf_last_region);
19492 footprint_region->cfr_vaddr = 0;
19493 footprint_region->cfr_num_pages = 0;
19494 } else {
19495 /* retrieve header and last region */
19496 footprint_header = (struct vm_map_corpse_footprint_header *)
19497 new_map->vmmap_corpse_footprint;
19498 footprint_region = (struct vm_map_corpse_footprint_region *)
19499 ((char *)footprint_header +
19500 footprint_header->cf_last_region);
19501 }
19502 footprint_edge = ((uintptr_t)footprint_header +
19503 footprint_header->cf_size);
19504
19505 if ((footprint_region->cfr_vaddr +
19506 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19507 PAGE_SIZE))
19508 != old_entry->vme_start) {
19509 uint64_t num_pages_delta;
19510 uint32_t region_offset_delta;
19511
19512 /*
19513 * Not the next contiguous virtual address:
19514 * start a new region or store "zero" dispositions for
19515 * the missing pages?
19516 */
19517 /* size of gap in actual page dispositions */
19518 num_pages_delta = (((old_entry->vme_start -
19519 footprint_region->cfr_vaddr) / PAGE_SIZE)
19520 - footprint_region->cfr_num_pages);
19521 /* size of gap as a new footprint region header */
19522 region_offset_delta =
19523 (sizeof(*footprint_region) +
19524 roundup((footprint_region->cfr_num_pages -
19525 footprint_header->cf_last_zeroes),
19526 sizeof(int)) -
19527 (footprint_region->cfr_num_pages -
19528 footprint_header->cf_last_zeroes));
19529 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19530 if (region_offset_delta < num_pages_delta ||
19531 os_add3_overflow(footprint_region->cfr_num_pages,
19532 (uint32_t) num_pages_delta,
19533 1,
19534 &num_pages_tmp)) {
19535 /*
19536 * Storing data for this gap would take more space
19537 * than inserting a new footprint region header:
19538 * let's start a new region and save space. If it's a
19539 * tie, let's avoid using a new region, since that
19540 * would require more region hops to find the right
19541 * range during lookups.
19542 *
19543 * If the current region's cfr_num_pages would overflow
19544 * if we added "zero" page dispositions for the gap,
19545 * no choice but to start a new region.
19546 */
19547 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19548 new_footprint_region =
19549 vm_map_corpse_footprint_new_region(footprint_header);
19550 /* check that we're not going over the edge */
19551 if (new_footprint_region == NULL) {
19552 goto over_the_edge;
19553 }
19554 footprint_region = new_footprint_region;
19555 /* initialize new region as empty */
19556 footprint_region->cfr_vaddr = old_entry->vme_start;
19557 footprint_region->cfr_num_pages = 0;
19558 } else {
19559 /*
19560 * Store "zero" page dispositions for the missing
19561 * pages.
19562 */
19563 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19564 for (; num_pages_delta > 0; num_pages_delta--) {
19565 next_disp_p =
19566 ((unsigned char *) footprint_region +
19567 sizeof(*footprint_region) +
19568 footprint_region->cfr_num_pages);
19569 /* check that we're not going over the edge */
19570 if ((uintptr_t)next_disp_p >= footprint_edge) {
19571 goto over_the_edge;
19572 }
19573 /* store "zero" disposition for this gap page */
19574 footprint_region->cfr_num_pages++;
19575 *next_disp_p = (unsigned char) 0;
19576 footprint_header->cf_last_zeroes++;
19577 }
19578 }
19579 }
19580
19581 for (va = old_entry->vme_start;
19582 va < old_entry->vme_end;
19583 va += PAGE_SIZE) {
19584 vm_object_t object;
19585
19586 object = VME_OBJECT(old_entry);
19587 if (!old_entry->is_sub_map &&
19588 old_entry->iokit_acct &&
19589 object != VM_OBJECT_NULL &&
19590 object->internal &&
19591 object->purgable == VM_PURGABLE_DENY) {
19592 /*
19593 * Non-purgeable IOKit memory: phys_footprint
19594 * includes the entire virtual mapping.
19595 * Since the forked corpse's VM map entry will not
19596 * have "iokit_acct", pretend that this page's
19597 * disposition is "present & internal", so that it
19598 * shows up in the forked corpse's footprint.
19599 */
19600 disp = (PMAP_QUERY_PAGE_PRESENT |
19601 PMAP_QUERY_PAGE_INTERNAL);
19602 } else {
19603 disp = 0;
19604 pmap_query_page_info(old_map->pmap,
19605 va,
19606 &disp);
19607 }
19608
19609 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19610
19611 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19612 /*
19613 * Ignore "zero" dispositions at start of
19614 * region: just move start of region.
19615 */
19616 footprint_region->cfr_vaddr += PAGE_SIZE;
19617 continue;
19618 }
19619
19620 /* would region's cfr_num_pages overflow? */
19621 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19622 &num_pages_tmp)) {
19623 /* overflow: create a new region */
19624 new_footprint_region =
19625 vm_map_corpse_footprint_new_region(
19626 footprint_header);
19627 if (new_footprint_region == NULL) {
19628 goto over_the_edge;
19629 }
19630 footprint_region = new_footprint_region;
19631 footprint_region->cfr_vaddr = va;
19632 footprint_region->cfr_num_pages = 0;
19633 }
19634
19635 next_disp_p = ((unsigned char *)footprint_region +
19636 sizeof(*footprint_region) +
19637 footprint_region->cfr_num_pages);
19638 /* check that we're not going over the edge */
19639 if ((uintptr_t)next_disp_p >= footprint_edge) {
19640 goto over_the_edge;
19641 }
19642 /* store this dispostion */
19643 *next_disp_p = (unsigned char) disp;
19644 footprint_region->cfr_num_pages++;
19645
19646 if (disp != 0) {
19647 /* non-zero disp: break the current zero streak */
19648 footprint_header->cf_last_zeroes = 0;
19649 /* done */
19650 continue;
19651 }
19652
19653 /* zero disp: add to the current streak of zeroes */
19654 footprint_header->cf_last_zeroes++;
19655 if ((footprint_header->cf_last_zeroes +
19656 roundup((footprint_region->cfr_num_pages -
19657 footprint_header->cf_last_zeroes) &
19658 (sizeof(int) - 1),
19659 sizeof(int))) <
19660 (sizeof(*footprint_header))) {
19661 /*
19662 * There are not enough trailing "zero" dispositions
19663 * (+ the extra padding we would need for the previous
19664 * region); creating a new region would not save space
19665 * at this point, so let's keep this "zero" disposition
19666 * in this region and reconsider later.
19667 */
19668 continue;
19669 }
19670 /*
19671 * Create a new region to avoid having too many consecutive
19672 * "zero" dispositions.
19673 */
19674 new_footprint_region =
19675 vm_map_corpse_footprint_new_region(footprint_header);
19676 if (new_footprint_region == NULL) {
19677 goto over_the_edge;
19678 }
19679 footprint_region = new_footprint_region;
19680 /* initialize the new region as empty ... */
19681 footprint_region->cfr_num_pages = 0;
19682 /* ... and skip this "zero" disp */
19683 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19684 }
19685
19686 return KERN_SUCCESS;
19687
19688 over_the_edge:
19689 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19690 vm_map_corpse_footprint_full++;
19691 return KERN_RESOURCE_SHORTAGE;
19692 }
19693
19694 /*
19695 * vm_map_corpse_footprint_collect_done:
19696 * completes the footprint collection by getting rid of any remaining
19697 * trailing "zero" dispositions and trimming the unused part of the
19698 * kernel buffer
19699 */
19700 void
19701 vm_map_corpse_footprint_collect_done(
19702 vm_map_t new_map)
19703 {
19704 struct vm_map_corpse_footprint_header *footprint_header;
19705 struct vm_map_corpse_footprint_region *footprint_region;
19706 vm_size_t buf_size, actual_size;
19707 kern_return_t kr;
19708
19709 assert(new_map->has_corpse_footprint);
19710 if (!new_map->has_corpse_footprint ||
19711 new_map->vmmap_corpse_footprint == NULL) {
19712 return;
19713 }
19714
19715 footprint_header = (struct vm_map_corpse_footprint_header *)
19716 new_map->vmmap_corpse_footprint;
19717 buf_size = footprint_header->cf_size;
19718
19719 footprint_region = (struct vm_map_corpse_footprint_region *)
19720 ((char *)footprint_header +
19721 footprint_header->cf_last_region);
19722
19723 /* get rid of trailing zeroes in last region */
19724 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19725 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19726 footprint_header->cf_last_zeroes = 0;
19727
19728 actual_size = (vm_size_t)(footprint_header->cf_last_region +
19729 sizeof(*footprint_region) +
19730 footprint_region->cfr_num_pages);
19731
19732 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19733 vm_map_corpse_footprint_size_avg =
19734 (((vm_map_corpse_footprint_size_avg *
19735 vm_map_corpse_footprint_count) +
19736 actual_size) /
19737 (vm_map_corpse_footprint_count + 1));
19738 vm_map_corpse_footprint_count++;
19739 if (actual_size > vm_map_corpse_footprint_size_max) {
19740 vm_map_corpse_footprint_size_max = actual_size;
19741 }
19742
19743 actual_size = round_page(actual_size);
19744 if (buf_size > actual_size) {
19745 kr = vm_deallocate(kernel_map,
19746 ((vm_address_t)footprint_header +
19747 actual_size +
19748 PAGE_SIZE), /* trailing guard page */
19749 (buf_size - actual_size));
19750 assertf(kr == KERN_SUCCESS,
19751 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19752 footprint_header,
19753 (uint64_t) buf_size,
19754 (uint64_t) actual_size,
19755 kr);
19756 kr = vm_protect(kernel_map,
19757 ((vm_address_t)footprint_header +
19758 actual_size),
19759 PAGE_SIZE,
19760 FALSE, /* set_maximum */
19761 VM_PROT_NONE);
19762 assertf(kr == KERN_SUCCESS,
19763 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19764 footprint_header,
19765 (uint64_t) buf_size,
19766 (uint64_t) actual_size,
19767 kr);
19768 }
19769
19770 footprint_header->cf_size = actual_size;
19771 }
19772
19773 /*
19774 * vm_map_corpse_footprint_query_page_info:
19775 * retrieves the disposition of the page at virtual address "vaddr"
19776 * in the forked corpse's VM map
19777 *
19778 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19779 */
19780 kern_return_t
19781 vm_map_corpse_footprint_query_page_info(
19782 vm_map_t map,
19783 vm_map_offset_t va,
19784 int *disp)
19785 {
19786 struct vm_map_corpse_footprint_header *footprint_header;
19787 struct vm_map_corpse_footprint_region *footprint_region;
19788 uint32_t footprint_region_offset;
19789 vm_map_offset_t region_start, region_end;
19790 int disp_idx;
19791 kern_return_t kr;
19792
19793 if (!map->has_corpse_footprint) {
19794 *disp = 0;
19795 kr = KERN_INVALID_ARGUMENT;
19796 goto done;
19797 }
19798
19799 footprint_header = map->vmmap_corpse_footprint;
19800 if (footprint_header == NULL) {
19801 *disp = 0;
19802 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19803 kr = KERN_INVALID_ARGUMENT;
19804 goto done;
19805 }
19806
19807 /* start looking at the hint ("cf_hint_region") */
19808 footprint_region_offset = footprint_header->cf_hint_region;
19809
19810 lookup_again:
19811 if (footprint_region_offset < sizeof(*footprint_header)) {
19812 /* hint too low: start from 1st region */
19813 footprint_region_offset = sizeof(*footprint_header);
19814 }
19815 if (footprint_region_offset >= footprint_header->cf_last_region) {
19816 /* hint too high: re-start from 1st region */
19817 footprint_region_offset = sizeof(*footprint_header);
19818 }
19819 footprint_region = (struct vm_map_corpse_footprint_region *)
19820 ((char *)footprint_header + footprint_region_offset);
19821 region_start = footprint_region->cfr_vaddr;
19822 region_end = (region_start +
19823 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19824 PAGE_SIZE));
19825 if (va < region_start &&
19826 footprint_region_offset != sizeof(*footprint_header)) {
19827 /* our range starts before the hint region */
19828
19829 /* reset the hint (in a racy way...) */
19830 footprint_header->cf_hint_region = sizeof(*footprint_header);
19831 /* lookup "va" again from 1st region */
19832 footprint_region_offset = sizeof(*footprint_header);
19833 goto lookup_again;
19834 }
19835
19836 while (va >= region_end) {
19837 if (footprint_region_offset >= footprint_header->cf_last_region) {
19838 break;
19839 }
19840 /* skip the region's header */
19841 footprint_region_offset += sizeof(*footprint_region);
19842 /* skip the region's page dispositions */
19843 footprint_region_offset += footprint_region->cfr_num_pages;
19844 /* align to next word boundary */
19845 footprint_region_offset =
19846 roundup(footprint_region_offset,
19847 sizeof(int));
19848 footprint_region = (struct vm_map_corpse_footprint_region *)
19849 ((char *)footprint_header + footprint_region_offset);
19850 region_start = footprint_region->cfr_vaddr;
19851 region_end = (region_start +
19852 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19853 PAGE_SIZE));
19854 }
19855 if (va < region_start || va >= region_end) {
19856 /* page not found */
19857 *disp = 0;
19858 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19859 kr = KERN_SUCCESS;
19860 goto done;
19861 }
19862
19863 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19864 footprint_header->cf_hint_region = footprint_region_offset;
19865
19866 /* get page disposition for "va" in this region */
19867 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19868 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19869
19870 kr = KERN_SUCCESS;
19871 done:
19872 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19873 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19874 DTRACE_VM4(footprint_query_page_info,
19875 vm_map_t, map,
19876 vm_map_offset_t, va,
19877 int, *disp,
19878 kern_return_t, kr);
19879
19880 return kr;
19881 }
19882
19883
19884 static void
19885 vm_map_corpse_footprint_destroy(
19886 vm_map_t map)
19887 {
19888 if (map->has_corpse_footprint &&
19889 map->vmmap_corpse_footprint != 0) {
19890 struct vm_map_corpse_footprint_header *footprint_header;
19891 vm_size_t buf_size;
19892 kern_return_t kr;
19893
19894 footprint_header = map->vmmap_corpse_footprint;
19895 buf_size = footprint_header->cf_size;
19896 kr = vm_deallocate(kernel_map,
19897 (vm_offset_t) map->vmmap_corpse_footprint,
19898 ((vm_size_t) buf_size
19899 + PAGE_SIZE)); /* trailing guard page */
19900 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19901 map->vmmap_corpse_footprint = 0;
19902 map->has_corpse_footprint = FALSE;
19903 }
19904 }
19905
19906 /*
19907 * vm_map_copy_footprint_ledgers:
19908 * copies any ledger that's relevant to the memory footprint of "old_task"
19909 * into the forked corpse's task ("new_task")
19910 */
19911 void
19912 vm_map_copy_footprint_ledgers(
19913 task_t old_task,
19914 task_t new_task)
19915 {
19916 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19917 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19918 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19919 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19920 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19921 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19922 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19923 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19924 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19925 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19926 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19927 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19928 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19929 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19930 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19931 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19932 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19933 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19934 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19935 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19936 }
19937
19938 /*
19939 * vm_map_copy_ledger:
19940 * copy a single ledger from "old_task" to "new_task"
19941 */
19942 void
19943 vm_map_copy_ledger(
19944 task_t old_task,
19945 task_t new_task,
19946 int ledger_entry)
19947 {
19948 ledger_amount_t old_balance, new_balance, delta;
19949
19950 assert(new_task->map->has_corpse_footprint);
19951 if (!new_task->map->has_corpse_footprint) {
19952 return;
19953 }
19954
19955 /* turn off sanity checks for the ledger we're about to mess with */
19956 ledger_disable_panic_on_negative(new_task->ledger,
19957 ledger_entry);
19958
19959 /* adjust "new_task" to match "old_task" */
19960 ledger_get_balance(old_task->ledger,
19961 ledger_entry,
19962 &old_balance);
19963 ledger_get_balance(new_task->ledger,
19964 ledger_entry,
19965 &new_balance);
19966 if (new_balance == old_balance) {
19967 /* new == old: done */
19968 } else if (new_balance > old_balance) {
19969 /* new > old ==> new -= new - old */
19970 delta = new_balance - old_balance;
19971 ledger_debit(new_task->ledger,
19972 ledger_entry,
19973 delta);
19974 } else {
19975 /* new < old ==> new += old - new */
19976 delta = old_balance - new_balance;
19977 ledger_credit(new_task->ledger,
19978 ledger_entry,
19979 delta);
19980 }
19981 }
19982
19983 #if MACH_ASSERT
19984
19985 extern int pmap_ledgers_panic;
19986 extern int pmap_ledgers_panic_leeway;
19987
19988 #define LEDGER_DRIFT(__LEDGER) \
19989 int __LEDGER##_over; \
19990 ledger_amount_t __LEDGER##_over_total; \
19991 ledger_amount_t __LEDGER##_over_max; \
19992 int __LEDGER##_under; \
19993 ledger_amount_t __LEDGER##_under_total; \
19994 ledger_amount_t __LEDGER##_under_max
19995
19996 struct {
19997 uint64_t num_pmaps_checked;
19998
19999 LEDGER_DRIFT(phys_footprint);
20000 LEDGER_DRIFT(internal);
20001 LEDGER_DRIFT(internal_compressed);
20002 LEDGER_DRIFT(iokit_mapped);
20003 LEDGER_DRIFT(alternate_accounting);
20004 LEDGER_DRIFT(alternate_accounting_compressed);
20005 LEDGER_DRIFT(page_table);
20006 LEDGER_DRIFT(purgeable_volatile);
20007 LEDGER_DRIFT(purgeable_nonvolatile);
20008 LEDGER_DRIFT(purgeable_volatile_compressed);
20009 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
20010 LEDGER_DRIFT(tagged_nofootprint);
20011 LEDGER_DRIFT(tagged_footprint);
20012 LEDGER_DRIFT(tagged_nofootprint_compressed);
20013 LEDGER_DRIFT(tagged_footprint_compressed);
20014 LEDGER_DRIFT(network_volatile);
20015 LEDGER_DRIFT(network_nonvolatile);
20016 LEDGER_DRIFT(network_volatile_compressed);
20017 LEDGER_DRIFT(network_nonvolatile_compressed);
20018 LEDGER_DRIFT(media_nofootprint);
20019 LEDGER_DRIFT(media_footprint);
20020 LEDGER_DRIFT(media_nofootprint_compressed);
20021 LEDGER_DRIFT(media_footprint_compressed);
20022 LEDGER_DRIFT(graphics_nofootprint);
20023 LEDGER_DRIFT(graphics_footprint);
20024 LEDGER_DRIFT(graphics_nofootprint_compressed);
20025 LEDGER_DRIFT(graphics_footprint_compressed);
20026 LEDGER_DRIFT(neural_nofootprint);
20027 LEDGER_DRIFT(neural_footprint);
20028 LEDGER_DRIFT(neural_nofootprint_compressed);
20029 LEDGER_DRIFT(neural_footprint_compressed);
20030 } pmap_ledgers_drift;
20031
20032 void
20033 vm_map_pmap_check_ledgers(
20034 pmap_t pmap,
20035 ledger_t ledger,
20036 int pid,
20037 char *procname)
20038 {
20039 ledger_amount_t bal;
20040 boolean_t do_panic;
20041
20042 do_panic = FALSE;
20043
20044 pmap_ledgers_drift.num_pmaps_checked++;
20045
20046 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20047 MACRO_BEGIN \
20048 int panic_on_negative = TRUE; \
20049 ledger_get_balance(ledger, \
20050 task_ledgers.__LEDGER, \
20051 &bal); \
20052 ledger_get_panic_on_negative(ledger, \
20053 task_ledgers.__LEDGER, \
20054 &panic_on_negative); \
20055 if (bal != 0) { \
20056 if (panic_on_negative || \
20057 (pmap_ledgers_panic && \
20058 pmap_ledgers_panic_leeway > 0 && \
20059 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20060 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20061 do_panic = TRUE; \
20062 } \
20063 printf("LEDGER BALANCE proc %d (%s) " \
20064 "\"%s\" = %lld\n", \
20065 pid, procname, #__LEDGER, bal); \
20066 if (bal > 0) { \
20067 pmap_ledgers_drift.__LEDGER##_over++; \
20068 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20069 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20070 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20071 } \
20072 } else if (bal < 0) { \
20073 pmap_ledgers_drift.__LEDGER##_under++; \
20074 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20075 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20076 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20077 } \
20078 } \
20079 } \
20080 MACRO_END
20081
20082 LEDGER_CHECK_BALANCE(phys_footprint);
20083 LEDGER_CHECK_BALANCE(internal);
20084 LEDGER_CHECK_BALANCE(internal_compressed);
20085 LEDGER_CHECK_BALANCE(iokit_mapped);
20086 LEDGER_CHECK_BALANCE(alternate_accounting);
20087 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20088 LEDGER_CHECK_BALANCE(page_table);
20089 LEDGER_CHECK_BALANCE(purgeable_volatile);
20090 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20091 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20092 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20093 LEDGER_CHECK_BALANCE(tagged_nofootprint);
20094 LEDGER_CHECK_BALANCE(tagged_footprint);
20095 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20096 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20097 LEDGER_CHECK_BALANCE(network_volatile);
20098 LEDGER_CHECK_BALANCE(network_nonvolatile);
20099 LEDGER_CHECK_BALANCE(network_volatile_compressed);
20100 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20101 LEDGER_CHECK_BALANCE(media_nofootprint);
20102 LEDGER_CHECK_BALANCE(media_footprint);
20103 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20104 LEDGER_CHECK_BALANCE(media_footprint_compressed);
20105 LEDGER_CHECK_BALANCE(graphics_nofootprint);
20106 LEDGER_CHECK_BALANCE(graphics_footprint);
20107 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20108 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20109 LEDGER_CHECK_BALANCE(neural_nofootprint);
20110 LEDGER_CHECK_BALANCE(neural_footprint);
20111 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20112 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20113
20114 if (do_panic) {
20115 if (pmap_ledgers_panic) {
20116 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20117 pmap, pid, procname);
20118 } else {
20119 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20120 pmap, pid, procname);
20121 }
20122 }
20123 }
20124 #endif /* MACH_ASSERT */