]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
121 #if CONFIG_EMBEDDED
122 extern int panic_on_unsigned_execute;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if __arm64__
127 extern const int fourk_binary_compatibility_unsafe;
128 extern const int fourk_binary_compatibility_allow_wx;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p);
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect = 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk = 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
139
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
141 int vm_map_executable_immutable_verbose = 0;
142
143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
144
145 extern u_int32_t random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
147 */
148
149 static void vm_map_simplify_range(
150 vm_map_t map,
151 vm_map_offset_t start,
152 vm_map_offset_t end); /* forward */
153
154 static boolean_t vm_map_range_check(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 vm_map_entry_t *entry);
159
160 static vm_map_entry_t _vm_map_entry_create(
161 struct vm_map_header *map_header, boolean_t map_locked);
162
163 static void _vm_map_entry_dispose(
164 struct vm_map_header *map_header,
165 vm_map_entry_t entry);
166
167 static void vm_map_pmap_enter(
168 vm_map_t map,
169 vm_map_offset_t addr,
170 vm_map_offset_t end_addr,
171 vm_object_t object,
172 vm_object_offset_t offset,
173 vm_prot_t protection);
174
175 static void _vm_map_clip_end(
176 struct vm_map_header *map_header,
177 vm_map_entry_t entry,
178 vm_map_offset_t end);
179
180 static void _vm_map_clip_start(
181 struct vm_map_header *map_header,
182 vm_map_entry_t entry,
183 vm_map_offset_t start);
184
185 static void vm_map_entry_delete(
186 vm_map_t map,
187 vm_map_entry_t entry);
188
189 static kern_return_t vm_map_delete(
190 vm_map_t map,
191 vm_map_offset_t start,
192 vm_map_offset_t end,
193 int flags,
194 vm_map_t zap_map);
195
196 static void vm_map_copy_insert(
197 vm_map_t map,
198 vm_map_entry_t after_where,
199 vm_map_copy_t copy);
200
201 static kern_return_t vm_map_copy_overwrite_unaligned(
202 vm_map_t dst_map,
203 vm_map_entry_t entry,
204 vm_map_copy_t copy,
205 vm_map_address_t start,
206 boolean_t discard_on_success);
207
208 static kern_return_t vm_map_copy_overwrite_aligned(
209 vm_map_t dst_map,
210 vm_map_entry_t tmp_entry,
211 vm_map_copy_t copy,
212 vm_map_offset_t start,
213 pmap_t pmap);
214
215 static kern_return_t vm_map_copyin_kernel_buffer(
216 vm_map_t src_map,
217 vm_map_address_t src_addr,
218 vm_map_size_t len,
219 boolean_t src_destroy,
220 vm_map_copy_t *copy_result); /* OUT */
221
222 static kern_return_t vm_map_copyout_kernel_buffer(
223 vm_map_t map,
224 vm_map_address_t *addr, /* IN/OUT */
225 vm_map_copy_t copy,
226 vm_map_size_t copy_size,
227 boolean_t overwrite,
228 boolean_t consume_on_success);
229
230 static void vm_map_fork_share(
231 vm_map_t old_map,
232 vm_map_entry_t old_entry,
233 vm_map_t new_map);
234
235 static boolean_t vm_map_fork_copy(
236 vm_map_t old_map,
237 vm_map_entry_t *old_entry_p,
238 vm_map_t new_map,
239 int vm_map_copyin_flags);
240
241 static kern_return_t vm_map_wire_nested(
242 vm_map_t map,
243 vm_map_offset_t start,
244 vm_map_offset_t end,
245 vm_prot_t caller_prot,
246 vm_tag_t tag,
247 boolean_t user_wire,
248 pmap_t map_pmap,
249 vm_map_offset_t pmap_addr,
250 ppnum_t *physpage_p);
251
252 static kern_return_t vm_map_unwire_nested(
253 vm_map_t map,
254 vm_map_offset_t start,
255 vm_map_offset_t end,
256 boolean_t user_wire,
257 pmap_t map_pmap,
258 vm_map_offset_t pmap_addr);
259
260 static kern_return_t vm_map_overwrite_submap_recurse(
261 vm_map_t dst_map,
262 vm_map_offset_t dst_addr,
263 vm_map_size_t dst_size);
264
265 static kern_return_t vm_map_copy_overwrite_nested(
266 vm_map_t dst_map,
267 vm_map_offset_t dst_addr,
268 vm_map_copy_t copy,
269 boolean_t interruptible,
270 pmap_t pmap,
271 boolean_t discard_on_success);
272
273 static kern_return_t vm_map_remap_extract(
274 vm_map_t map,
275 vm_map_offset_t addr,
276 vm_map_size_t size,
277 boolean_t copy,
278 struct vm_map_header *map_header,
279 vm_prot_t *cur_protection,
280 vm_prot_t *max_protection,
281 vm_inherit_t inheritance,
282 boolean_t pageable,
283 boolean_t same_map,
284 vm_map_kernel_flags_t vmk_flags);
285
286 static kern_return_t vm_map_remap_range_allocate(
287 vm_map_t map,
288 vm_map_address_t *address,
289 vm_map_size_t size,
290 vm_map_offset_t mask,
291 int flags,
292 vm_map_kernel_flags_t vmk_flags,
293 vm_tag_t tag,
294 vm_map_entry_t *map_entry);
295
296 static void vm_map_region_look_for_page(
297 vm_map_t map,
298 vm_map_offset_t va,
299 vm_object_t object,
300 vm_object_offset_t offset,
301 int max_refcnt,
302 int depth,
303 vm_region_extended_info_t extended,
304 mach_msg_type_number_t count);
305
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry,
308 vm_object_t object);
309
310
311 static kern_return_t vm_map_willneed(
312 vm_map_t map,
313 vm_map_offset_t start,
314 vm_map_offset_t end);
315
316 static kern_return_t vm_map_reuse_pages(
317 vm_map_t map,
318 vm_map_offset_t start,
319 vm_map_offset_t end);
320
321 static kern_return_t vm_map_reusable_pages(
322 vm_map_t map,
323 vm_map_offset_t start,
324 vm_map_offset_t end);
325
326 static kern_return_t vm_map_can_reuse(
327 vm_map_t map,
328 vm_map_offset_t start,
329 vm_map_offset_t end);
330
331 #if MACH_ASSERT
332 static kern_return_t vm_map_pageout(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
336 #endif /* MACH_ASSERT */
337
338 static void vm_map_corpse_footprint_destroy(
339 vm_map_t map);
340
341 pid_t find_largest_process_vm_map_entries(void);
342
343 /*
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
350 * vm_map_copyout.
351 */
352
353 #if CONFIG_EMBEDDED
354
355 /*
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
360 */
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
362 MACRO_BEGIN \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
365 MACRO_END
366
367 #else /* CONFIG_EMBEDDED */
368
369 /*
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
372 */
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
374 MACRO_BEGIN \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
377 MACRO_END
378
379 #endif /* CONFIG_EMBEDDED */
380
381 #define vm_map_entry_copy(NEW, OLD) \
382 MACRO_BEGIN \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
384 *(NEW) = *(OLD); \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
397 } \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
402 MACRO_END
403
404 #define vm_map_entry_copy_full(NEW, OLD) \
405 MACRO_BEGIN \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
407 (*(NEW) = *(OLD)); \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
409 MACRO_END
410
411 /*
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
414 */
415 __attribute__((always_inline))
416 int
417 vm_map_lock_read_to_write(vm_map_t map)
418 {
419 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
420 DTRACE_VM(vm_map_lock_upgrade);
421 return 0;
422 }
423 return 1;
424 }
425
426 __attribute__((always_inline))
427 boolean_t
428 vm_map_try_lock(vm_map_t map)
429 {
430 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
431 DTRACE_VM(vm_map_lock_w);
432 return TRUE;
433 }
434 return FALSE;
435 }
436
437 __attribute__((always_inline))
438 boolean_t
439 vm_map_try_lock_read(vm_map_t map)
440 {
441 if (lck_rw_try_lock_shared(&(map)->lock)) {
442 DTRACE_VM(vm_map_lock_r);
443 return TRUE;
444 }
445 return FALSE;
446 }
447
448 /*
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
459 *
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
463 *
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
468 *
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
476 */
477
478 extern int allow_data_exec, allow_stack_exec;
479
480 int
481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
482 {
483 int current_abi;
484
485 if (map->pmap == kernel_pmap) {
486 return FALSE;
487 }
488
489 /*
490 * Determine if the app is running in 32 or 64 bit mode.
491 */
492
493 if (vm_map_is_64bit(map)) {
494 current_abi = VM_ABI_64;
495 } else {
496 current_abi = VM_ABI_32;
497 }
498
499 /*
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
502 */
503
504 if (user_tag == VM_MEMORY_STACK) {
505 return allow_stack_exec & current_abi;
506 }
507
508 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
509 }
510
511
512 /*
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
517 *
518 * Synchronization is required prior to most operations.
519 *
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
522 *
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
530 *
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
539 * abutting entries.
540 *
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
553 */
554
555 static zone_t vm_map_zone; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
560
561
562 /*
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
566 */
567
568 vm_object_t vm_submap_object;
569
570 static void *map_data;
571 static vm_size_t map_data_size;
572 static void *kentry_data;
573 static vm_size_t kentry_data_size;
574 static void *map_holes_data;
575 static vm_size_t map_holes_data_size;
576
577 #if CONFIG_EMBEDDED
578 #define NO_COALESCE_LIMIT 0
579 #else
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
581 #endif
582
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp = 1;
585
586 unsigned int vm_map_set_cache_attr_count = 0;
587
588 kern_return_t
589 vm_map_set_cache_attr(
590 vm_map_t map,
591 vm_map_offset_t va)
592 {
593 vm_map_entry_t map_entry;
594 vm_object_t object;
595 kern_return_t kr = KERN_SUCCESS;
596
597 vm_map_lock_read(map);
598
599 if (!vm_map_lookup_entry(map, va, &map_entry) ||
600 map_entry->is_sub_map) {
601 /*
602 * that memory is not properly mapped
603 */
604 kr = KERN_INVALID_ARGUMENT;
605 goto done;
606 }
607 object = VME_OBJECT(map_entry);
608
609 if (object == VM_OBJECT_NULL) {
610 /*
611 * there should be a VM object here at this point
612 */
613 kr = KERN_INVALID_ARGUMENT;
614 goto done;
615 }
616 vm_object_lock(object);
617 object->set_cache_attr = TRUE;
618 vm_object_unlock(object);
619
620 vm_map_set_cache_attr_count++;
621 done:
622 vm_map_unlock_read(map);
623
624 return kr;
625 }
626
627
628 #if CONFIG_CODE_DECRYPTION
629 /*
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
636 */
637 kern_return_t
638 vm_map_apple_protected(
639 vm_map_t map,
640 vm_map_offset_t start,
641 vm_map_offset_t end,
642 vm_object_offset_t crypto_backing_offset,
643 struct pager_crypt_info *crypt_info)
644 {
645 boolean_t map_locked;
646 kern_return_t kr;
647 vm_map_entry_t map_entry;
648 struct vm_map_entry tmp_entry;
649 memory_object_t unprotected_mem_obj;
650 vm_object_t protected_object;
651 vm_map_offset_t map_addr;
652 vm_map_offset_t start_aligned, end_aligned;
653 vm_object_offset_t crypto_start, crypto_end;
654 int vm_flags;
655 vm_map_kernel_flags_t vmk_flags;
656
657 vm_flags = 0;
658 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
659
660 map_locked = FALSE;
661 unprotected_mem_obj = MEMORY_OBJECT_NULL;
662
663 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
664 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
665 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
666 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
667
668 #if __arm64__
669 /*
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
672 *
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
675 * + the center,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
681 */
682 #endif /* __arm64__ */
683
684 map_addr = start_aligned;
685 for (map_addr = start_aligned;
686 map_addr < end;
687 map_addr = tmp_entry.vme_end) {
688 vm_map_lock(map);
689 map_locked = TRUE;
690
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map,
693 map_addr,
694 &map_entry) ||
695 map_entry->is_sub_map ||
696 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
697 !(map_entry->protection & VM_PROT_EXECUTE)) {
698 /* that memory is not properly mapped */
699 kr = KERN_INVALID_ARGUMENT;
700 goto done;
701 }
702
703 /* get the protected object to be decrypted */
704 protected_object = VME_OBJECT(map_entry);
705 if (protected_object == VM_OBJECT_NULL) {
706 /* there should be a VM object here at this point */
707 kr = KERN_INVALID_ARGUMENT;
708 goto done;
709 }
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object);
712
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map, map_entry, start_aligned);
715 vm_map_clip_end(map, map_entry, end_aligned);
716
717 tmp_entry = *map_entry;
718 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
719 vm_map_unlock(map);
720 map_locked = FALSE;
721
722 /*
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
725 */
726 crypto_start = 0;
727 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
728 if (tmp_entry.vme_start < start) {
729 if (tmp_entry.vme_start != start_aligned) {
730 kr = KERN_INVALID_ADDRESS;
731 }
732 crypto_start += (start - tmp_entry.vme_start);
733 }
734 if (tmp_entry.vme_end > end) {
735 if (tmp_entry.vme_end != end_aligned) {
736 kr = KERN_INVALID_ADDRESS;
737 }
738 crypto_end -= (tmp_entry.vme_end - end);
739 }
740
741 /*
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
745 */
746 if (crypto_backing_offset == (vm_object_offset_t) -1) {
747 crypto_backing_offset = VME_OFFSET(&tmp_entry);
748 }
749
750 /*
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
755 * it.
756 */
757 unprotected_mem_obj = apple_protect_pager_setup(
758 protected_object,
759 VME_OFFSET(&tmp_entry),
760 crypto_backing_offset,
761 crypt_info,
762 crypto_start,
763 crypto_end);
764
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object);
767
768 if (unprotected_mem_obj == NULL) {
769 kr = KERN_FAILURE;
770 goto done;
771 }
772
773 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
774 /* can overwrite an immutable mapping */
775 vmk_flags.vmkf_overwrite_immutable = TRUE;
776 #if __arm64__
777 if (tmp_entry.used_for_jit &&
778 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
779 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
780 fourk_binary_compatibility_unsafe &&
781 fourk_binary_compatibility_allow_wx) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry.vme_start);
785 vmk_flags.vmkf_map_jit = TRUE;
786 }
787 #endif /* __arm64__ */
788
789 /* map this memory object in place of the current one */
790 map_addr = tmp_entry.vme_start;
791 kr = vm_map_enter_mem_object(map,
792 &map_addr,
793 (tmp_entry.vme_end -
794 tmp_entry.vme_start),
795 (mach_vm_offset_t) 0,
796 vm_flags,
797 vmk_flags,
798 VM_KERN_MEMORY_NONE,
799 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
800 0,
801 TRUE,
802 tmp_entry.protection,
803 tmp_entry.max_protection,
804 tmp_entry.inheritance);
805 assertf(kr == KERN_SUCCESS,
806 "kr = 0x%x\n", kr);
807 assertf(map_addr == tmp_entry.vme_start,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
809 (uint64_t)map_addr,
810 (uint64_t) tmp_entry.vme_start,
811 &tmp_entry);
812
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
819 map,
820 (uint64_t) map_addr,
821 (uint64_t) (map_addr + (tmp_entry.vme_end -
822 tmp_entry.vme_start)),
823 unprotected_mem_obj,
824 protected_object,
825 VME_OFFSET(&tmp_entry),
826 crypto_backing_offset,
827 crypto_start,
828 crypto_end);
829 }
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
831
832 /*
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
836 * the memory object.
837 */
838 memory_object_deallocate(unprotected_mem_obj);
839 unprotected_mem_obj = MEMORY_OBJECT_NULL;
840
841 /* continue with next map entry */
842 crypto_backing_offset += (tmp_entry.vme_end -
843 tmp_entry.vme_start);
844 crypto_backing_offset -= crypto_start;
845 }
846 kr = KERN_SUCCESS;
847
848 done:
849 if (map_locked) {
850 vm_map_unlock(map);
851 }
852 return kr;
853 }
854 #endif /* CONFIG_CODE_DECRYPTION */
855
856
857 lck_grp_t vm_map_lck_grp;
858 lck_grp_attr_t vm_map_lck_grp_attr;
859 lck_attr_t vm_map_lck_attr;
860 lck_attr_t vm_map_lck_rw_attr;
861
862 #if CONFIG_EMBEDDED
863 int malloc_no_cow = 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow = 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
870
871 /*
872 * vm_map_init:
873 *
874 * Initialize the vm_map module. Must be called before
875 * any other vm_map routines.
876 *
877 * Map and entry structures are allocated from zones -- we must
878 * initialize those zones.
879 *
880 * There are three zones of interest:
881 *
882 * vm_map_zone: used to allocate maps.
883 * vm_map_entry_zone: used to allocate map entries.
884 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
885 *
886 * The kernel allocates map entries from a special zone that is initially
887 * "crammed" with memory. It would be difficult (perhaps impossible) for
888 * the kernel to allocate more memory to a entry zone when it became
889 * empty since the very act of allocating memory implies the creation
890 * of a new entry.
891 */
892 void
893 vm_map_init(
894 void)
895 {
896 vm_size_t entry_zone_alloc_size;
897 const char *mez_name = "VM map entries";
898
899 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
900 PAGE_SIZE, "maps");
901 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
902 #if defined(__LP64__)
903 entry_zone_alloc_size = PAGE_SIZE * 5;
904 #else
905 entry_zone_alloc_size = PAGE_SIZE * 6;
906 #endif
907 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
908 1024 * 1024, entry_zone_alloc_size,
909 mez_name);
910 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
911 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
912 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
913
914 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
915 kentry_data_size * 64, kentry_data_size,
916 "Reserved VM map entries");
917 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
918 /* Don't quarantine because we always need elements available */
919 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
920
921 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
922 16 * 1024, PAGE_SIZE, "VM map copies");
923 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
924
925 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
926 16 * 1024, PAGE_SIZE, "VM map holes");
927 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
928
929 /*
930 * Cram the map and kentry zones with initial data.
931 * Set reserved_zone non-collectible to aid zone_gc().
932 */
933 zone_change(vm_map_zone, Z_COLLECT, FALSE);
934 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
935 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
936
937 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
938 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
939 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
940 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
941 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
942 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
943 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
944
945 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
946 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
947 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
948 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
949 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
950 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
951
952 /*
953 * Add the stolen memory to zones, adjust zone size and stolen counts.
954 * zcram only up to the maximum number of pages for each zone chunk.
955 */
956 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
957
958 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
959 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
960 zcram(vm_map_entry_reserved_zone,
961 (vm_offset_t)kentry_data + off,
962 MIN(kentry_data_size - off, stride));
963 }
964 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
965 zcram(vm_map_holes_zone,
966 (vm_offset_t)map_holes_data + off,
967 MIN(map_holes_data_size - off, stride));
968 }
969
970 /*
971 * Since these are covered by zones, remove them from stolen page accounting.
972 */
973 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
974
975 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
976 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
977 lck_attr_setdefault(&vm_map_lck_attr);
978
979 lck_attr_setdefault(&vm_map_lck_rw_attr);
980 lck_attr_cleardebug(&vm_map_lck_rw_attr);
981
982 #if VM_MAP_DEBUG_APPLE_PROTECT
983 PE_parse_boot_argn("vm_map_debug_apple_protect",
984 &vm_map_debug_apple_protect,
985 sizeof(vm_map_debug_apple_protect));
986 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
987 #if VM_MAP_DEBUG_APPLE_FOURK
988 PE_parse_boot_argn("vm_map_debug_fourk",
989 &vm_map_debug_fourk,
990 sizeof(vm_map_debug_fourk));
991 #endif /* VM_MAP_DEBUG_FOURK */
992 PE_parse_boot_argn("vm_map_executable_immutable",
993 &vm_map_executable_immutable,
994 sizeof(vm_map_executable_immutable));
995 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
996 &vm_map_executable_immutable_verbose,
997 sizeof(vm_map_executable_immutable_verbose));
998
999 PE_parse_boot_argn("malloc_no_cow",
1000 &malloc_no_cow,
1001 sizeof(malloc_no_cow));
1002 if (malloc_no_cow) {
1003 vm_memory_malloc_no_cow_mask = 0ULL;
1004 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1005 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1006 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1007 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1008 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1009 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1010 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1011 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1012 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1013 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1014 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1015 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1016 &vm_memory_malloc_no_cow_mask,
1017 sizeof(vm_memory_malloc_no_cow_mask));
1018 }
1019 }
1020
1021 void
1022 vm_map_steal_memory(
1023 void)
1024 {
1025 uint32_t kentry_initial_pages;
1026
1027 map_data_size = round_page(10 * sizeof(struct _vm_map));
1028 map_data = pmap_steal_memory(map_data_size);
1029
1030 /*
1031 * kentry_initial_pages corresponds to the number of kernel map entries
1032 * required during bootstrap until the asynchronous replenishment
1033 * scheme is activated and/or entries are available from the general
1034 * map entry pool.
1035 */
1036 #if defined(__LP64__)
1037 kentry_initial_pages = 10;
1038 #else
1039 kentry_initial_pages = 6;
1040 #endif
1041
1042 #if CONFIG_GZALLOC
1043 /* If using the guard allocator, reserve more memory for the kernel
1044 * reserved map entry pool.
1045 */
1046 if (gzalloc_enabled()) {
1047 kentry_initial_pages *= 1024;
1048 }
1049 #endif
1050
1051 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1052 kentry_data = pmap_steal_memory(kentry_data_size);
1053
1054 map_holes_data_size = kentry_data_size;
1055 map_holes_data = pmap_steal_memory(map_holes_data_size);
1056 }
1057
1058 boolean_t vm_map_supports_hole_optimization = FALSE;
1059
1060 void
1061 vm_kernel_reserved_entry_init(void)
1062 {
1063 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1064
1065 /*
1066 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1067 */
1068 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1069 vm_map_supports_hole_optimization = TRUE;
1070 }
1071
1072 void
1073 vm_map_disable_hole_optimization(vm_map_t map)
1074 {
1075 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1076
1077 if (map->holelistenabled) {
1078 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1079
1080 while (hole_entry != NULL) {
1081 next_hole_entry = hole_entry->vme_next;
1082
1083 hole_entry->vme_next = NULL;
1084 hole_entry->vme_prev = NULL;
1085 zfree(vm_map_holes_zone, hole_entry);
1086
1087 if (next_hole_entry == head_entry) {
1088 hole_entry = NULL;
1089 } else {
1090 hole_entry = next_hole_entry;
1091 }
1092 }
1093
1094 map->holes_list = NULL;
1095 map->holelistenabled = FALSE;
1096
1097 map->first_free = vm_map_first_entry(map);
1098 SAVE_HINT_HOLE_WRITE(map, NULL);
1099 }
1100 }
1101
1102 boolean_t
1103 vm_kernel_map_is_kernel(vm_map_t map)
1104 {
1105 return map->pmap == kernel_pmap;
1106 }
1107
1108 /*
1109 * vm_map_create:
1110 *
1111 * Creates and returns a new empty VM map with
1112 * the given physical map structure, and having
1113 * the given lower and upper address bounds.
1114 */
1115
1116 vm_map_t
1117 vm_map_create(
1118 pmap_t pmap,
1119 vm_map_offset_t min,
1120 vm_map_offset_t max,
1121 boolean_t pageable)
1122 {
1123 int options;
1124
1125 options = 0;
1126 if (pageable) {
1127 options |= VM_MAP_CREATE_PAGEABLE;
1128 }
1129 return vm_map_create_options(pmap, min, max, options);
1130 }
1131
1132 vm_map_t
1133 vm_map_create_options(
1134 pmap_t pmap,
1135 vm_map_offset_t min,
1136 vm_map_offset_t max,
1137 int options)
1138 {
1139 vm_map_t result;
1140 struct vm_map_links *hole_entry = NULL;
1141
1142 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1143 /* unknown option */
1144 return VM_MAP_NULL;
1145 }
1146
1147 result = (vm_map_t) zalloc(vm_map_zone);
1148 if (result == VM_MAP_NULL) {
1149 panic("vm_map_create");
1150 }
1151
1152 vm_map_first_entry(result) = vm_map_to_entry(result);
1153 vm_map_last_entry(result) = vm_map_to_entry(result);
1154 result->hdr.nentries = 0;
1155 if (options & VM_MAP_CREATE_PAGEABLE) {
1156 result->hdr.entries_pageable = TRUE;
1157 } else {
1158 result->hdr.entries_pageable = FALSE;
1159 }
1160
1161 vm_map_store_init( &(result->hdr));
1162
1163 result->hdr.page_shift = PAGE_SHIFT;
1164
1165 result->size = 0;
1166 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1167 result->user_wire_size = 0;
1168 #if !CONFIG_EMBEDDED
1169 result->vmmap_high_start = 0;
1170 #endif
1171 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1172 #if TASK_SWAPPER
1173 result->res_count = 1;
1174 result->sw_state = MAP_SW_IN;
1175 #endif /* TASK_SWAPPER */
1176 result->pmap = pmap;
1177 result->min_offset = min;
1178 result->max_offset = max;
1179 result->wiring_required = FALSE;
1180 result->no_zero_fill = FALSE;
1181 result->mapped_in_other_pmaps = FALSE;
1182 result->wait_for_space = FALSE;
1183 result->switch_protect = FALSE;
1184 result->disable_vmentry_reuse = FALSE;
1185 result->map_disallow_data_exec = FALSE;
1186 result->is_nested_map = FALSE;
1187 result->map_disallow_new_exec = FALSE;
1188 result->highest_entry_end = 0;
1189 result->first_free = vm_map_to_entry(result);
1190 result->hint = vm_map_to_entry(result);
1191 result->jit_entry_exists = FALSE;
1192
1193 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1194 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1195 result->has_corpse_footprint = TRUE;
1196 result->holelistenabled = FALSE;
1197 result->vmmap_corpse_footprint = NULL;
1198 } else {
1199 result->has_corpse_footprint = FALSE;
1200 if (vm_map_supports_hole_optimization) {
1201 hole_entry = zalloc(vm_map_holes_zone);
1202
1203 hole_entry->start = min;
1204 #if defined(__arm__) || defined(__arm64__)
1205 hole_entry->end = result->max_offset;
1206 #else
1207 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1208 #endif
1209 result->holes_list = result->hole_hint = hole_entry;
1210 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1211 result->holelistenabled = TRUE;
1212 } else {
1213 result->holelistenabled = FALSE;
1214 }
1215 }
1216
1217 vm_map_lock_init(result);
1218 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1219
1220 return result;
1221 }
1222
1223 /*
1224 * vm_map_entry_create: [ internal use only ]
1225 *
1226 * Allocates a VM map entry for insertion in the
1227 * given map (or map copy). No fields are filled.
1228 */
1229 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1230
1231 #define vm_map_copy_entry_create(copy, map_locked) \
1232 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1233 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1234
1235 static vm_map_entry_t
1236 _vm_map_entry_create(
1237 struct vm_map_header *map_header, boolean_t __unused map_locked)
1238 {
1239 zone_t zone;
1240 vm_map_entry_t entry;
1241
1242 zone = vm_map_entry_zone;
1243
1244 assert(map_header->entries_pageable ? !map_locked : TRUE);
1245
1246 if (map_header->entries_pageable) {
1247 entry = (vm_map_entry_t) zalloc(zone);
1248 } else {
1249 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1250
1251 if (entry == VM_MAP_ENTRY_NULL) {
1252 zone = vm_map_entry_reserved_zone;
1253 entry = (vm_map_entry_t) zalloc(zone);
1254 OSAddAtomic(1, &reserved_zalloc_count);
1255 } else {
1256 OSAddAtomic(1, &nonreserved_zalloc_count);
1257 }
1258 }
1259
1260 if (entry == VM_MAP_ENTRY_NULL) {
1261 panic("vm_map_entry_create");
1262 }
1263 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1264
1265 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1266 #if MAP_ENTRY_CREATION_DEBUG
1267 entry->vme_creation_maphdr = map_header;
1268 backtrace(&entry->vme_creation_bt[0],
1269 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1270 #endif
1271 return entry;
1272 }
1273
1274 /*
1275 * vm_map_entry_dispose: [ internal use only ]
1276 *
1277 * Inverse of vm_map_entry_create.
1278 *
1279 * write map lock held so no need to
1280 * do anything special to insure correctness
1281 * of the stores
1282 */
1283 #define vm_map_entry_dispose(map, entry) \
1284 _vm_map_entry_dispose(&(map)->hdr, (entry))
1285
1286 #define vm_map_copy_entry_dispose(map, entry) \
1287 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1288
1289 static void
1290 _vm_map_entry_dispose(
1291 struct vm_map_header *map_header,
1292 vm_map_entry_t entry)
1293 {
1294 zone_t zone;
1295
1296 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1297 zone = vm_map_entry_zone;
1298 } else {
1299 zone = vm_map_entry_reserved_zone;
1300 }
1301
1302 if (!map_header->entries_pageable) {
1303 if (zone == vm_map_entry_zone) {
1304 OSAddAtomic(-1, &nonreserved_zalloc_count);
1305 } else {
1306 OSAddAtomic(-1, &reserved_zalloc_count);
1307 }
1308 }
1309
1310 zfree(zone, entry);
1311 }
1312
1313 #if MACH_ASSERT
1314 static boolean_t first_free_check = FALSE;
1315 boolean_t
1316 first_free_is_valid(
1317 vm_map_t map)
1318 {
1319 if (!first_free_check) {
1320 return TRUE;
1321 }
1322
1323 return first_free_is_valid_store( map );
1324 }
1325 #endif /* MACH_ASSERT */
1326
1327
1328 #define vm_map_copy_entry_link(copy, after_where, entry) \
1329 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1330
1331 #define vm_map_copy_entry_unlink(copy, entry) \
1332 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1333
1334 #if MACH_ASSERT && TASK_SWAPPER
1335 /*
1336 * vm_map_res_reference:
1337 *
1338 * Adds another valid residence count to the given map.
1339 *
1340 * Map is locked so this function can be called from
1341 * vm_map_swapin.
1342 *
1343 */
1344 void
1345 vm_map_res_reference(vm_map_t map)
1346 {
1347 /* assert map is locked */
1348 assert(map->res_count >= 0);
1349 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1350 if (map->res_count == 0) {
1351 lck_mtx_unlock(&map->s_lock);
1352 vm_map_lock(map);
1353 vm_map_swapin(map);
1354 lck_mtx_lock(&map->s_lock);
1355 ++map->res_count;
1356 vm_map_unlock(map);
1357 } else {
1358 ++map->res_count;
1359 }
1360 }
1361
1362 /*
1363 * vm_map_reference_swap:
1364 *
1365 * Adds valid reference and residence counts to the given map.
1366 *
1367 * The map may not be in memory (i.e. zero residence count).
1368 *
1369 */
1370 void
1371 vm_map_reference_swap(vm_map_t map)
1372 {
1373 assert(map != VM_MAP_NULL);
1374 lck_mtx_lock(&map->s_lock);
1375 assert(map->res_count >= 0);
1376 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1377 os_ref_retain_locked(&map->map_refcnt);
1378 vm_map_res_reference(map);
1379 lck_mtx_unlock(&map->s_lock);
1380 }
1381
1382 /*
1383 * vm_map_res_deallocate:
1384 *
1385 * Decrement residence count on a map; possibly causing swapout.
1386 *
1387 * The map must be in memory (i.e. non-zero residence count).
1388 *
1389 * The map is locked, so this function is callable from vm_map_deallocate.
1390 *
1391 */
1392 void
1393 vm_map_res_deallocate(vm_map_t map)
1394 {
1395 assert(map->res_count > 0);
1396 if (--map->res_count == 0) {
1397 lck_mtx_unlock(&map->s_lock);
1398 vm_map_lock(map);
1399 vm_map_swapout(map);
1400 vm_map_unlock(map);
1401 lck_mtx_lock(&map->s_lock);
1402 }
1403 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1404 }
1405 #endif /* MACH_ASSERT && TASK_SWAPPER */
1406
1407 /*
1408 * vm_map_destroy:
1409 *
1410 * Actually destroy a map.
1411 */
1412 void
1413 vm_map_destroy(
1414 vm_map_t map,
1415 int flags)
1416 {
1417 vm_map_lock(map);
1418
1419 /* final cleanup: no need to unnest shared region */
1420 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1421 /* final cleanup: ok to remove immutable mappings */
1422 flags |= VM_MAP_REMOVE_IMMUTABLE;
1423 /* final cleanup: allow gaps in range */
1424 flags |= VM_MAP_REMOVE_GAPS_OK;
1425
1426 /* clean up regular map entries */
1427 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1428 flags, VM_MAP_NULL);
1429 /* clean up leftover special mappings (commpage, etc...) */
1430 #if !defined(__arm__) && !defined(__arm64__)
1431 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1432 flags, VM_MAP_NULL);
1433 #endif /* !__arm__ && !__arm64__ */
1434
1435 vm_map_disable_hole_optimization(map);
1436 vm_map_corpse_footprint_destroy(map);
1437
1438 vm_map_unlock(map);
1439
1440 assert(map->hdr.nentries == 0);
1441
1442 if (map->pmap) {
1443 pmap_destroy(map->pmap);
1444 }
1445
1446 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1447 /*
1448 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1449 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1450 * structure or kalloc'ed via lck_mtx_init.
1451 * An example is s_lock_ext within struct _vm_map.
1452 *
1453 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1454 * can add another tag to detect embedded vs alloc'ed indirect external
1455 * mutexes but that'll be additional checks in the lock path and require
1456 * updating dependencies for the old vs new tag.
1457 *
1458 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1459 * just when lock debugging is ON, we choose to forego explicitly destroying
1460 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1461 * count on vm_map_lck_grp, which has no serious side-effect.
1462 */
1463 } else {
1464 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1465 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1466 }
1467
1468 zfree(vm_map_zone, map);
1469 }
1470
1471 /*
1472 * Returns pid of the task with the largest number of VM map entries.
1473 * Used in the zone-map-exhaustion jetsam path.
1474 */
1475 pid_t
1476 find_largest_process_vm_map_entries(void)
1477 {
1478 pid_t victim_pid = -1;
1479 int max_vm_map_entries = 0;
1480 task_t task = TASK_NULL;
1481 queue_head_t *task_list = &tasks;
1482
1483 lck_mtx_lock(&tasks_threads_lock);
1484 queue_iterate(task_list, task, task_t, tasks) {
1485 if (task == kernel_task || !task->active) {
1486 continue;
1487 }
1488
1489 vm_map_t task_map = task->map;
1490 if (task_map != VM_MAP_NULL) {
1491 int task_vm_map_entries = task_map->hdr.nentries;
1492 if (task_vm_map_entries > max_vm_map_entries) {
1493 max_vm_map_entries = task_vm_map_entries;
1494 victim_pid = pid_from_task(task);
1495 }
1496 }
1497 }
1498 lck_mtx_unlock(&tasks_threads_lock);
1499
1500 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1501 return victim_pid;
1502 }
1503
1504 #if TASK_SWAPPER
1505 /*
1506 * vm_map_swapin/vm_map_swapout
1507 *
1508 * Swap a map in and out, either referencing or releasing its resources.
1509 * These functions are internal use only; however, they must be exported
1510 * because they may be called from macros, which are exported.
1511 *
1512 * In the case of swapout, there could be races on the residence count,
1513 * so if the residence count is up, we return, assuming that a
1514 * vm_map_deallocate() call in the near future will bring us back.
1515 *
1516 * Locking:
1517 * -- We use the map write lock for synchronization among races.
1518 * -- The map write lock, and not the simple s_lock, protects the
1519 * swap state of the map.
1520 * -- If a map entry is a share map, then we hold both locks, in
1521 * hierarchical order.
1522 *
1523 * Synchronization Notes:
1524 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1525 * will block on the map lock and proceed when swapout is through.
1526 * 2) A vm_map_reference() call at this time is illegal, and will
1527 * cause a panic. vm_map_reference() is only allowed on resident
1528 * maps, since it refuses to block.
1529 * 3) A vm_map_swapin() call during a swapin will block, and
1530 * proceeed when the first swapin is done, turning into a nop.
1531 * This is the reason the res_count is not incremented until
1532 * after the swapin is complete.
1533 * 4) There is a timing hole after the checks of the res_count, before
1534 * the map lock is taken, during which a swapin may get the lock
1535 * before a swapout about to happen. If this happens, the swapin
1536 * will detect the state and increment the reference count, causing
1537 * the swapout to be a nop, thereby delaying it until a later
1538 * vm_map_deallocate. If the swapout gets the lock first, then
1539 * the swapin will simply block until the swapout is done, and
1540 * then proceed.
1541 *
1542 * Because vm_map_swapin() is potentially an expensive operation, it
1543 * should be used with caution.
1544 *
1545 * Invariants:
1546 * 1) A map with a residence count of zero is either swapped, or
1547 * being swapped.
1548 * 2) A map with a non-zero residence count is either resident,
1549 * or being swapped in.
1550 */
1551
1552 int vm_map_swap_enable = 1;
1553
1554 void
1555 vm_map_swapin(vm_map_t map)
1556 {
1557 vm_map_entry_t entry;
1558
1559 if (!vm_map_swap_enable) { /* debug */
1560 return;
1561 }
1562
1563 /*
1564 * Map is locked
1565 * First deal with various races.
1566 */
1567 if (map->sw_state == MAP_SW_IN) {
1568 /*
1569 * we raced with swapout and won. Returning will incr.
1570 * the res_count, turning the swapout into a nop.
1571 */
1572 return;
1573 }
1574
1575 /*
1576 * The residence count must be zero. If we raced with another
1577 * swapin, the state would have been IN; if we raced with a
1578 * swapout (after another competing swapin), we must have lost
1579 * the race to get here (see above comment), in which case
1580 * res_count is still 0.
1581 */
1582 assert(map->res_count == 0);
1583
1584 /*
1585 * There are no intermediate states of a map going out or
1586 * coming in, since the map is locked during the transition.
1587 */
1588 assert(map->sw_state == MAP_SW_OUT);
1589
1590 /*
1591 * We now operate upon each map entry. If the entry is a sub-
1592 * or share-map, we call vm_map_res_reference upon it.
1593 * If the entry is an object, we call vm_object_res_reference
1594 * (this may iterate through the shadow chain).
1595 * Note that we hold the map locked the entire time,
1596 * even if we get back here via a recursive call in
1597 * vm_map_res_reference.
1598 */
1599 entry = vm_map_first_entry(map);
1600
1601 while (entry != vm_map_to_entry(map)) {
1602 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1603 if (entry->is_sub_map) {
1604 vm_map_t lmap = VME_SUBMAP(entry);
1605 lck_mtx_lock(&lmap->s_lock);
1606 vm_map_res_reference(lmap);
1607 lck_mtx_unlock(&lmap->s_lock);
1608 } else {
1609 vm_object_t object = VME_OBEJCT(entry);
1610 vm_object_lock(object);
1611 /*
1612 * This call may iterate through the
1613 * shadow chain.
1614 */
1615 vm_object_res_reference(object);
1616 vm_object_unlock(object);
1617 }
1618 }
1619 entry = entry->vme_next;
1620 }
1621 assert(map->sw_state == MAP_SW_OUT);
1622 map->sw_state = MAP_SW_IN;
1623 }
1624
1625 void
1626 vm_map_swapout(vm_map_t map)
1627 {
1628 vm_map_entry_t entry;
1629
1630 /*
1631 * Map is locked
1632 * First deal with various races.
1633 * If we raced with a swapin and lost, the residence count
1634 * will have been incremented to 1, and we simply return.
1635 */
1636 lck_mtx_lock(&map->s_lock);
1637 if (map->res_count != 0) {
1638 lck_mtx_unlock(&map->s_lock);
1639 return;
1640 }
1641 lck_mtx_unlock(&map->s_lock);
1642
1643 /*
1644 * There are no intermediate states of a map going out or
1645 * coming in, since the map is locked during the transition.
1646 */
1647 assert(map->sw_state == MAP_SW_IN);
1648
1649 if (!vm_map_swap_enable) {
1650 return;
1651 }
1652
1653 /*
1654 * We now operate upon each map entry. If the entry is a sub-
1655 * or share-map, we call vm_map_res_deallocate upon it.
1656 * If the entry is an object, we call vm_object_res_deallocate
1657 * (this may iterate through the shadow chain).
1658 * Note that we hold the map locked the entire time,
1659 * even if we get back here via a recursive call in
1660 * vm_map_res_deallocate.
1661 */
1662 entry = vm_map_first_entry(map);
1663
1664 while (entry != vm_map_to_entry(map)) {
1665 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1666 if (entry->is_sub_map) {
1667 vm_map_t lmap = VME_SUBMAP(entry);
1668 lck_mtx_lock(&lmap->s_lock);
1669 vm_map_res_deallocate(lmap);
1670 lck_mtx_unlock(&lmap->s_lock);
1671 } else {
1672 vm_object_t object = VME_OBJECT(entry);
1673 vm_object_lock(object);
1674 /*
1675 * This call may take a long time,
1676 * since it could actively push
1677 * out pages (if we implement it
1678 * that way).
1679 */
1680 vm_object_res_deallocate(object);
1681 vm_object_unlock(object);
1682 }
1683 }
1684 entry = entry->vme_next;
1685 }
1686 assert(map->sw_state == MAP_SW_IN);
1687 map->sw_state = MAP_SW_OUT;
1688 }
1689
1690 #endif /* TASK_SWAPPER */
1691
1692 /*
1693 * vm_map_lookup_entry: [ internal use only ]
1694 *
1695 * Calls into the vm map store layer to find the map
1696 * entry containing (or immediately preceding) the
1697 * specified address in the given map; the entry is returned
1698 * in the "entry" parameter. The boolean
1699 * result indicates whether the address is
1700 * actually contained in the map.
1701 */
1702 boolean_t
1703 vm_map_lookup_entry(
1704 vm_map_t map,
1705 vm_map_offset_t address,
1706 vm_map_entry_t *entry) /* OUT */
1707 {
1708 return vm_map_store_lookup_entry( map, address, entry );
1709 }
1710
1711 /*
1712 * Routine: vm_map_find_space
1713 * Purpose:
1714 * Allocate a range in the specified virtual address map,
1715 * returning the entry allocated for that range.
1716 * Used by kmem_alloc, etc.
1717 *
1718 * The map must be NOT be locked. It will be returned locked
1719 * on KERN_SUCCESS, unlocked on failure.
1720 *
1721 * If an entry is allocated, the object/offset fields
1722 * are initialized to zero.
1723 */
1724 kern_return_t
1725 vm_map_find_space(
1726 vm_map_t map,
1727 vm_map_offset_t *address, /* OUT */
1728 vm_map_size_t size,
1729 vm_map_offset_t mask,
1730 int flags __unused,
1731 vm_map_kernel_flags_t vmk_flags,
1732 vm_tag_t tag,
1733 vm_map_entry_t *o_entry) /* OUT */
1734 {
1735 vm_map_entry_t entry, new_entry;
1736 vm_map_offset_t start;
1737 vm_map_offset_t end;
1738 vm_map_entry_t hole_entry;
1739
1740 if (size == 0) {
1741 *address = 0;
1742 return KERN_INVALID_ARGUMENT;
1743 }
1744
1745 if (vmk_flags.vmkf_guard_after) {
1746 /* account for the back guard page in the size */
1747 size += VM_MAP_PAGE_SIZE(map);
1748 }
1749
1750 new_entry = vm_map_entry_create(map, FALSE);
1751
1752 /*
1753 * Look for the first possible address; if there's already
1754 * something at this address, we have to start after it.
1755 */
1756
1757 vm_map_lock(map);
1758
1759 if (map->disable_vmentry_reuse == TRUE) {
1760 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1761 } else {
1762 if (map->holelistenabled) {
1763 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1764
1765 if (hole_entry == NULL) {
1766 /*
1767 * No more space in the map?
1768 */
1769 vm_map_entry_dispose(map, new_entry);
1770 vm_map_unlock(map);
1771 return KERN_NO_SPACE;
1772 }
1773
1774 entry = hole_entry;
1775 start = entry->vme_start;
1776 } else {
1777 assert(first_free_is_valid(map));
1778 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1779 start = map->min_offset;
1780 } else {
1781 start = entry->vme_end;
1782 }
1783 }
1784 }
1785
1786 /*
1787 * In any case, the "entry" always precedes
1788 * the proposed new region throughout the loop:
1789 */
1790
1791 while (TRUE) {
1792 vm_map_entry_t next;
1793
1794 /*
1795 * Find the end of the proposed new region.
1796 * Be sure we didn't go beyond the end, or
1797 * wrap around the address.
1798 */
1799
1800 if (vmk_flags.vmkf_guard_before) {
1801 /* reserve space for the front guard page */
1802 start += VM_MAP_PAGE_SIZE(map);
1803 }
1804 end = ((start + mask) & ~mask);
1805
1806 if (end < start) {
1807 vm_map_entry_dispose(map, new_entry);
1808 vm_map_unlock(map);
1809 return KERN_NO_SPACE;
1810 }
1811 start = end;
1812 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1813 end += size;
1814 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1815
1816 if ((end > map->max_offset) || (end < start)) {
1817 vm_map_entry_dispose(map, new_entry);
1818 vm_map_unlock(map);
1819 return KERN_NO_SPACE;
1820 }
1821
1822 next = entry->vme_next;
1823
1824 if (map->holelistenabled) {
1825 if (entry->vme_end >= end) {
1826 break;
1827 }
1828 } else {
1829 /*
1830 * If there are no more entries, we must win.
1831 *
1832 * OR
1833 *
1834 * If there is another entry, it must be
1835 * after the end of the potential new region.
1836 */
1837
1838 if (next == vm_map_to_entry(map)) {
1839 break;
1840 }
1841
1842 if (next->vme_start >= end) {
1843 break;
1844 }
1845 }
1846
1847 /*
1848 * Didn't fit -- move to the next entry.
1849 */
1850
1851 entry = next;
1852
1853 if (map->holelistenabled) {
1854 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1855 /*
1856 * Wrapped around
1857 */
1858 vm_map_entry_dispose(map, new_entry);
1859 vm_map_unlock(map);
1860 return KERN_NO_SPACE;
1861 }
1862 start = entry->vme_start;
1863 } else {
1864 start = entry->vme_end;
1865 }
1866 }
1867
1868 if (map->holelistenabled) {
1869 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1870 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1871 }
1872 }
1873
1874 /*
1875 * At this point,
1876 * "start" and "end" should define the endpoints of the
1877 * available new range, and
1878 * "entry" should refer to the region before the new
1879 * range, and
1880 *
1881 * the map should be locked.
1882 */
1883
1884 if (vmk_flags.vmkf_guard_before) {
1885 /* go back for the front guard page */
1886 start -= VM_MAP_PAGE_SIZE(map);
1887 }
1888 *address = start;
1889
1890 assert(start < end);
1891 new_entry->vme_start = start;
1892 new_entry->vme_end = end;
1893 assert(page_aligned(new_entry->vme_start));
1894 assert(page_aligned(new_entry->vme_end));
1895 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1896 VM_MAP_PAGE_MASK(map)));
1897 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1898 VM_MAP_PAGE_MASK(map)));
1899
1900 new_entry->is_shared = FALSE;
1901 new_entry->is_sub_map = FALSE;
1902 new_entry->use_pmap = TRUE;
1903 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1904 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1905
1906 new_entry->needs_copy = FALSE;
1907
1908 new_entry->inheritance = VM_INHERIT_DEFAULT;
1909 new_entry->protection = VM_PROT_DEFAULT;
1910 new_entry->max_protection = VM_PROT_ALL;
1911 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1912 new_entry->wired_count = 0;
1913 new_entry->user_wired_count = 0;
1914
1915 new_entry->in_transition = FALSE;
1916 new_entry->needs_wakeup = FALSE;
1917 new_entry->no_cache = FALSE;
1918 new_entry->permanent = FALSE;
1919 new_entry->superpage_size = FALSE;
1920 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1921 new_entry->map_aligned = TRUE;
1922 } else {
1923 new_entry->map_aligned = FALSE;
1924 }
1925
1926 new_entry->used_for_jit = FALSE;
1927 new_entry->pmap_cs_associated = FALSE;
1928 new_entry->zero_wired_pages = FALSE;
1929 new_entry->iokit_acct = FALSE;
1930 new_entry->vme_resilient_codesign = FALSE;
1931 new_entry->vme_resilient_media = FALSE;
1932 if (vmk_flags.vmkf_atomic_entry) {
1933 new_entry->vme_atomic = TRUE;
1934 } else {
1935 new_entry->vme_atomic = FALSE;
1936 }
1937
1938 VME_ALIAS_SET(new_entry, tag);
1939
1940 /*
1941 * Insert the new entry into the list
1942 */
1943
1944 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1945
1946 map->size += size;
1947
1948 /*
1949 * Update the lookup hint
1950 */
1951 SAVE_HINT_MAP_WRITE(map, new_entry);
1952
1953 *o_entry = new_entry;
1954 return KERN_SUCCESS;
1955 }
1956
1957 int vm_map_pmap_enter_print = FALSE;
1958 int vm_map_pmap_enter_enable = FALSE;
1959
1960 /*
1961 * Routine: vm_map_pmap_enter [internal only]
1962 *
1963 * Description:
1964 * Force pages from the specified object to be entered into
1965 * the pmap at the specified address if they are present.
1966 * As soon as a page not found in the object the scan ends.
1967 *
1968 * Returns:
1969 * Nothing.
1970 *
1971 * In/out conditions:
1972 * The source map should not be locked on entry.
1973 */
1974 __unused static void
1975 vm_map_pmap_enter(
1976 vm_map_t map,
1977 vm_map_offset_t addr,
1978 vm_map_offset_t end_addr,
1979 vm_object_t object,
1980 vm_object_offset_t offset,
1981 vm_prot_t protection)
1982 {
1983 int type_of_fault;
1984 kern_return_t kr;
1985 struct vm_object_fault_info fault_info = {};
1986
1987 if (map->pmap == 0) {
1988 return;
1989 }
1990
1991 while (addr < end_addr) {
1992 vm_page_t m;
1993
1994
1995 /*
1996 * TODO:
1997 * From vm_map_enter(), we come into this function without the map
1998 * lock held or the object lock held.
1999 * We haven't taken a reference on the object either.
2000 * We should do a proper lookup on the map to make sure
2001 * that things are sane before we go locking objects that
2002 * could have been deallocated from under us.
2003 */
2004
2005 vm_object_lock(object);
2006
2007 m = vm_page_lookup(object, offset);
2008
2009 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2010 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2011 vm_object_unlock(object);
2012 return;
2013 }
2014
2015 if (vm_map_pmap_enter_print) {
2016 printf("vm_map_pmap_enter:");
2017 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2018 map, (unsigned long long)addr, object, (unsigned long long)offset);
2019 }
2020 type_of_fault = DBG_CACHE_HIT_FAULT;
2021 kr = vm_fault_enter(m, map->pmap,
2022 addr, protection, protection,
2023 VM_PAGE_WIRED(m),
2024 FALSE, /* change_wiring */
2025 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2026 &fault_info,
2027 NULL, /* need_retry */
2028 &type_of_fault);
2029
2030 vm_object_unlock(object);
2031
2032 offset += PAGE_SIZE_64;
2033 addr += PAGE_SIZE;
2034 }
2035 }
2036
2037 boolean_t vm_map_pmap_is_empty(
2038 vm_map_t map,
2039 vm_map_offset_t start,
2040 vm_map_offset_t end);
2041 boolean_t
2042 vm_map_pmap_is_empty(
2043 vm_map_t map,
2044 vm_map_offset_t start,
2045 vm_map_offset_t end)
2046 {
2047 #ifdef MACHINE_PMAP_IS_EMPTY
2048 return pmap_is_empty(map->pmap, start, end);
2049 #else /* MACHINE_PMAP_IS_EMPTY */
2050 vm_map_offset_t offset;
2051 ppnum_t phys_page;
2052
2053 if (map->pmap == NULL) {
2054 return TRUE;
2055 }
2056
2057 for (offset = start;
2058 offset < end;
2059 offset += PAGE_SIZE) {
2060 phys_page = pmap_find_phys(map->pmap, offset);
2061 if (phys_page) {
2062 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2063 "page %d at 0x%llx\n",
2064 map, (long long)start, (long long)end,
2065 phys_page, (long long)offset);
2066 return FALSE;
2067 }
2068 }
2069 return TRUE;
2070 #endif /* MACHINE_PMAP_IS_EMPTY */
2071 }
2072
2073 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2074 kern_return_t
2075 vm_map_random_address_for_size(
2076 vm_map_t map,
2077 vm_map_offset_t *address,
2078 vm_map_size_t size)
2079 {
2080 kern_return_t kr = KERN_SUCCESS;
2081 int tries = 0;
2082 vm_map_offset_t random_addr = 0;
2083 vm_map_offset_t hole_end;
2084
2085 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2086 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2087 vm_map_size_t vm_hole_size = 0;
2088 vm_map_size_t addr_space_size;
2089
2090 addr_space_size = vm_map_max(map) - vm_map_min(map);
2091
2092 assert(page_aligned(size));
2093
2094 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2095 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2096 random_addr = vm_map_trunc_page(
2097 vm_map_min(map) + (random_addr % addr_space_size),
2098 VM_MAP_PAGE_MASK(map));
2099
2100 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2101 if (prev_entry == vm_map_to_entry(map)) {
2102 next_entry = vm_map_first_entry(map);
2103 } else {
2104 next_entry = prev_entry->vme_next;
2105 }
2106 if (next_entry == vm_map_to_entry(map)) {
2107 hole_end = vm_map_max(map);
2108 } else {
2109 hole_end = next_entry->vme_start;
2110 }
2111 vm_hole_size = hole_end - random_addr;
2112 if (vm_hole_size >= size) {
2113 *address = random_addr;
2114 break;
2115 }
2116 }
2117 tries++;
2118 }
2119
2120 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2121 kr = KERN_NO_SPACE;
2122 }
2123 return kr;
2124 }
2125
2126 static boolean_t
2127 vm_memory_malloc_no_cow(
2128 int alias)
2129 {
2130 uint64_t alias_mask;
2131
2132 if (alias > 63) {
2133 return FALSE;
2134 }
2135
2136 alias_mask = 1ULL << alias;
2137 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2138 return TRUE;
2139 }
2140 return FALSE;
2141 }
2142
2143 /*
2144 * Routine: vm_map_enter
2145 *
2146 * Description:
2147 * Allocate a range in the specified virtual address map.
2148 * The resulting range will refer to memory defined by
2149 * the given memory object and offset into that object.
2150 *
2151 * Arguments are as defined in the vm_map call.
2152 */
2153 int _map_enter_debug = 0;
2154 static unsigned int vm_map_enter_restore_successes = 0;
2155 static unsigned int vm_map_enter_restore_failures = 0;
2156 kern_return_t
2157 vm_map_enter(
2158 vm_map_t map,
2159 vm_map_offset_t *address, /* IN/OUT */
2160 vm_map_size_t size,
2161 vm_map_offset_t mask,
2162 int flags,
2163 vm_map_kernel_flags_t vmk_flags,
2164 vm_tag_t alias,
2165 vm_object_t object,
2166 vm_object_offset_t offset,
2167 boolean_t needs_copy,
2168 vm_prot_t cur_protection,
2169 vm_prot_t max_protection,
2170 vm_inherit_t inheritance)
2171 {
2172 vm_map_entry_t entry, new_entry;
2173 vm_map_offset_t start, tmp_start, tmp_offset;
2174 vm_map_offset_t end, tmp_end;
2175 vm_map_offset_t tmp2_start, tmp2_end;
2176 vm_map_offset_t desired_empty_end;
2177 vm_map_offset_t step;
2178 kern_return_t result = KERN_SUCCESS;
2179 vm_map_t zap_old_map = VM_MAP_NULL;
2180 vm_map_t zap_new_map = VM_MAP_NULL;
2181 boolean_t map_locked = FALSE;
2182 boolean_t pmap_empty = TRUE;
2183 boolean_t new_mapping_established = FALSE;
2184 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2185 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2186 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2187 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2188 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2189 boolean_t is_submap = vmk_flags.vmkf_submap;
2190 boolean_t permanent = vmk_flags.vmkf_permanent;
2191 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2192 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2193 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2194 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2195 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2196 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2197 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2198 vm_tag_t user_alias;
2199 vm_map_offset_t effective_min_offset, effective_max_offset;
2200 kern_return_t kr;
2201 boolean_t clear_map_aligned = FALSE;
2202 vm_map_entry_t hole_entry;
2203 vm_map_size_t chunk_size = 0;
2204
2205 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2206
2207 if (flags & VM_FLAGS_4GB_CHUNK) {
2208 #if defined(__LP64__)
2209 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2210 #else /* __LP64__ */
2211 chunk_size = ANON_CHUNK_SIZE;
2212 #endif /* __LP64__ */
2213 } else {
2214 chunk_size = ANON_CHUNK_SIZE;
2215 }
2216
2217 if (superpage_size) {
2218 switch (superpage_size) {
2219 /*
2220 * Note that the current implementation only supports
2221 * a single size for superpages, SUPERPAGE_SIZE, per
2222 * architecture. As soon as more sizes are supposed
2223 * to be supported, SUPERPAGE_SIZE has to be replaced
2224 * with a lookup of the size depending on superpage_size.
2225 */
2226 #ifdef __x86_64__
2227 case SUPERPAGE_SIZE_ANY:
2228 /* handle it like 2 MB and round up to page size */
2229 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2230 case SUPERPAGE_SIZE_2MB:
2231 break;
2232 #endif
2233 default:
2234 return KERN_INVALID_ARGUMENT;
2235 }
2236 mask = SUPERPAGE_SIZE - 1;
2237 if (size & (SUPERPAGE_SIZE - 1)) {
2238 return KERN_INVALID_ARGUMENT;
2239 }
2240 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2241 }
2242
2243
2244 if ((cur_protection & VM_PROT_WRITE) &&
2245 (cur_protection & VM_PROT_EXECUTE) &&
2246 #if !CONFIG_EMBEDDED
2247 map != kernel_map &&
2248 (cs_process_global_enforcement() ||
2249 (vmk_flags.vmkf_cs_enforcement_override
2250 ? vmk_flags.vmkf_cs_enforcement
2251 : cs_process_enforcement(NULL))) &&
2252 #endif /* !CONFIG_EMBEDDED */
2253 !entry_for_jit) {
2254 DTRACE_VM3(cs_wx,
2255 uint64_t, 0,
2256 uint64_t, 0,
2257 vm_prot_t, cur_protection);
2258 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2259 #if VM_PROTECT_WX_FAIL
2260 "failing\n",
2261 #else /* VM_PROTECT_WX_FAIL */
2262 "turning off execute\n",
2263 #endif /* VM_PROTECT_WX_FAIL */
2264 proc_selfpid(),
2265 (current_task()->bsd_info
2266 ? proc_name_address(current_task()->bsd_info)
2267 : "?"),
2268 __FUNCTION__);
2269 cur_protection &= ~VM_PROT_EXECUTE;
2270 #if VM_PROTECT_WX_FAIL
2271 return KERN_PROTECTION_FAILURE;
2272 #endif /* VM_PROTECT_WX_FAIL */
2273 }
2274
2275 /*
2276 * If the task has requested executable lockdown,
2277 * deny any new executable mapping.
2278 */
2279 if (map->map_disallow_new_exec == TRUE) {
2280 if (cur_protection & VM_PROT_EXECUTE) {
2281 return KERN_PROTECTION_FAILURE;
2282 }
2283 }
2284
2285 if (resilient_codesign) {
2286 assert(!is_submap);
2287 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2288 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2289 return KERN_PROTECTION_FAILURE;
2290 }
2291 }
2292
2293 if (resilient_media) {
2294 assert(!is_submap);
2295 // assert(!needs_copy);
2296 if (object != VM_OBJECT_NULL &&
2297 !object->internal) {
2298 /*
2299 * This mapping is directly backed by an external
2300 * memory manager (e.g. a vnode pager for a file):
2301 * we would not have any safe place to inject
2302 * a zero-filled page if an actual page is not
2303 * available, without possibly impacting the actual
2304 * contents of the mapped object (e.g. the file),
2305 * so we can't provide any media resiliency here.
2306 */
2307 return KERN_INVALID_ARGUMENT;
2308 }
2309 }
2310
2311 if (is_submap) {
2312 if (purgable) {
2313 /* submaps can not be purgeable */
2314 return KERN_INVALID_ARGUMENT;
2315 }
2316 if (object == VM_OBJECT_NULL) {
2317 /* submaps can not be created lazily */
2318 return KERN_INVALID_ARGUMENT;
2319 }
2320 }
2321 if (vmk_flags.vmkf_already) {
2322 /*
2323 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2324 * is already present. For it to be meaningul, the requested
2325 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2326 * we shouldn't try and remove what was mapped there first
2327 * (!VM_FLAGS_OVERWRITE).
2328 */
2329 if ((flags & VM_FLAGS_ANYWHERE) ||
2330 (flags & VM_FLAGS_OVERWRITE)) {
2331 return KERN_INVALID_ARGUMENT;
2332 }
2333 }
2334
2335 effective_min_offset = map->min_offset;
2336
2337 if (vmk_flags.vmkf_beyond_max) {
2338 /*
2339 * Allow an insertion beyond the map's max offset.
2340 */
2341 #if !defined(__arm__) && !defined(__arm64__)
2342 if (vm_map_is_64bit(map)) {
2343 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2344 } else
2345 #endif /* __arm__ */
2346 effective_max_offset = 0x00000000FFFFF000ULL;
2347 } else {
2348 #if !defined(CONFIG_EMBEDDED)
2349 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2350 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2351 } else {
2352 effective_max_offset = map->max_offset;
2353 }
2354 #else
2355 effective_max_offset = map->max_offset;
2356 #endif
2357 }
2358
2359 if (size == 0 ||
2360 (offset & PAGE_MASK_64) != 0) {
2361 *address = 0;
2362 return KERN_INVALID_ARGUMENT;
2363 }
2364
2365 if (map->pmap == kernel_pmap) {
2366 user_alias = VM_KERN_MEMORY_NONE;
2367 } else {
2368 user_alias = alias;
2369 }
2370
2371 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2372 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2373 }
2374
2375 #define RETURN(value) { result = value; goto BailOut; }
2376
2377 assert(page_aligned(*address));
2378 assert(page_aligned(size));
2379
2380 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2381 /*
2382 * In most cases, the caller rounds the size up to the
2383 * map's page size.
2384 * If we get a size that is explicitly not map-aligned here,
2385 * we'll have to respect the caller's wish and mark the
2386 * mapping as "not map-aligned" to avoid tripping the
2387 * map alignment checks later.
2388 */
2389 clear_map_aligned = TRUE;
2390 }
2391 if (!anywhere &&
2392 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2393 /*
2394 * We've been asked to map at a fixed address and that
2395 * address is not aligned to the map's specific alignment.
2396 * The caller should know what it's doing (i.e. most likely
2397 * mapping some fragmented copy map, transferring memory from
2398 * a VM map with a different alignment), so clear map_aligned
2399 * for this new VM map entry and proceed.
2400 */
2401 clear_map_aligned = TRUE;
2402 }
2403
2404 /*
2405 * Only zero-fill objects are allowed to be purgable.
2406 * LP64todo - limit purgable objects to 32-bits for now
2407 */
2408 if (purgable &&
2409 (offset != 0 ||
2410 (object != VM_OBJECT_NULL &&
2411 (object->vo_size != size ||
2412 object->purgable == VM_PURGABLE_DENY))
2413 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2414 return KERN_INVALID_ARGUMENT;
2415 }
2416
2417 if (!anywhere && overwrite) {
2418 /*
2419 * Create a temporary VM map to hold the old mappings in the
2420 * affected area while we create the new one.
2421 * This avoids releasing the VM map lock in
2422 * vm_map_entry_delete() and allows atomicity
2423 * when we want to replace some mappings with a new one.
2424 * It also allows us to restore the old VM mappings if the
2425 * new mapping fails.
2426 */
2427 zap_old_map = vm_map_create(PMAP_NULL,
2428 *address,
2429 *address + size,
2430 map->hdr.entries_pageable);
2431 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2432 vm_map_disable_hole_optimization(zap_old_map);
2433 }
2434
2435 StartAgain:;
2436
2437 start = *address;
2438
2439 if (anywhere) {
2440 vm_map_lock(map);
2441 map_locked = TRUE;
2442
2443 if (entry_for_jit) {
2444 #if CONFIG_EMBEDDED
2445 if (map->jit_entry_exists) {
2446 result = KERN_INVALID_ARGUMENT;
2447 goto BailOut;
2448 }
2449 random_address = TRUE;
2450 #endif /* CONFIG_EMBEDDED */
2451 }
2452
2453 if (random_address) {
2454 /*
2455 * Get a random start address.
2456 */
2457 result = vm_map_random_address_for_size(map, address, size);
2458 if (result != KERN_SUCCESS) {
2459 goto BailOut;
2460 }
2461 start = *address;
2462 }
2463 #if !CONFIG_EMBEDDED
2464 else if ((start == 0 || start == vm_map_min(map)) &&
2465 !map->disable_vmentry_reuse &&
2466 map->vmmap_high_start != 0) {
2467 start = map->vmmap_high_start;
2468 }
2469 #endif
2470
2471
2472 /*
2473 * Calculate the first possible address.
2474 */
2475
2476 if (start < effective_min_offset) {
2477 start = effective_min_offset;
2478 }
2479 if (start > effective_max_offset) {
2480 RETURN(KERN_NO_SPACE);
2481 }
2482
2483 /*
2484 * Look for the first possible address;
2485 * if there's already something at this
2486 * address, we have to start after it.
2487 */
2488
2489 if (map->disable_vmentry_reuse == TRUE) {
2490 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2491 } else {
2492 if (map->holelistenabled) {
2493 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2494
2495 if (hole_entry == NULL) {
2496 /*
2497 * No more space in the map?
2498 */
2499 result = KERN_NO_SPACE;
2500 goto BailOut;
2501 } else {
2502 boolean_t found_hole = FALSE;
2503
2504 do {
2505 if (hole_entry->vme_start >= start) {
2506 start = hole_entry->vme_start;
2507 found_hole = TRUE;
2508 break;
2509 }
2510
2511 if (hole_entry->vme_end > start) {
2512 found_hole = TRUE;
2513 break;
2514 }
2515 hole_entry = hole_entry->vme_next;
2516 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2517
2518 if (found_hole == FALSE) {
2519 result = KERN_NO_SPACE;
2520 goto BailOut;
2521 }
2522
2523 entry = hole_entry;
2524
2525 if (start == 0) {
2526 start += PAGE_SIZE_64;
2527 }
2528 }
2529 } else {
2530 assert(first_free_is_valid(map));
2531
2532 entry = map->first_free;
2533
2534 if (entry == vm_map_to_entry(map)) {
2535 entry = NULL;
2536 } else {
2537 if (entry->vme_next == vm_map_to_entry(map)) {
2538 /*
2539 * Hole at the end of the map.
2540 */
2541 entry = NULL;
2542 } else {
2543 if (start < (entry->vme_next)->vme_start) {
2544 start = entry->vme_end;
2545 start = vm_map_round_page(start,
2546 VM_MAP_PAGE_MASK(map));
2547 } else {
2548 /*
2549 * Need to do a lookup.
2550 */
2551 entry = NULL;
2552 }
2553 }
2554 }
2555
2556 if (entry == NULL) {
2557 vm_map_entry_t tmp_entry;
2558 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2559 assert(!entry_for_jit);
2560 start = tmp_entry->vme_end;
2561 start = vm_map_round_page(start,
2562 VM_MAP_PAGE_MASK(map));
2563 }
2564 entry = tmp_entry;
2565 }
2566 }
2567 }
2568
2569 /*
2570 * In any case, the "entry" always precedes
2571 * the proposed new region throughout the
2572 * loop:
2573 */
2574
2575 while (TRUE) {
2576 vm_map_entry_t next;
2577
2578 /*
2579 * Find the end of the proposed new region.
2580 * Be sure we didn't go beyond the end, or
2581 * wrap around the address.
2582 */
2583
2584 end = ((start + mask) & ~mask);
2585 end = vm_map_round_page(end,
2586 VM_MAP_PAGE_MASK(map));
2587 if (end < start) {
2588 RETURN(KERN_NO_SPACE);
2589 }
2590 start = end;
2591 assert(VM_MAP_PAGE_ALIGNED(start,
2592 VM_MAP_PAGE_MASK(map)));
2593 end += size;
2594
2595 /* We want an entire page of empty space, but don't increase the allocation size. */
2596 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2597
2598 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2599 if (map->wait_for_space) {
2600 assert(!keep_map_locked);
2601 if (size <= (effective_max_offset -
2602 effective_min_offset)) {
2603 assert_wait((event_t)map,
2604 THREAD_ABORTSAFE);
2605 vm_map_unlock(map);
2606 map_locked = FALSE;
2607 thread_block(THREAD_CONTINUE_NULL);
2608 goto StartAgain;
2609 }
2610 }
2611 RETURN(KERN_NO_SPACE);
2612 }
2613
2614 next = entry->vme_next;
2615
2616 if (map->holelistenabled) {
2617 if (entry->vme_end >= desired_empty_end) {
2618 break;
2619 }
2620 } else {
2621 /*
2622 * If there are no more entries, we must win.
2623 *
2624 * OR
2625 *
2626 * If there is another entry, it must be
2627 * after the end of the potential new region.
2628 */
2629
2630 if (next == vm_map_to_entry(map)) {
2631 break;
2632 }
2633
2634 if (next->vme_start >= desired_empty_end) {
2635 break;
2636 }
2637 }
2638
2639 /*
2640 * Didn't fit -- move to the next entry.
2641 */
2642
2643 entry = next;
2644
2645 if (map->holelistenabled) {
2646 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2647 /*
2648 * Wrapped around
2649 */
2650 result = KERN_NO_SPACE;
2651 goto BailOut;
2652 }
2653 start = entry->vme_start;
2654 } else {
2655 start = entry->vme_end;
2656 }
2657
2658 start = vm_map_round_page(start,
2659 VM_MAP_PAGE_MASK(map));
2660 }
2661
2662 if (map->holelistenabled) {
2663 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2664 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2665 }
2666 }
2667
2668 *address = start;
2669 assert(VM_MAP_PAGE_ALIGNED(*address,
2670 VM_MAP_PAGE_MASK(map)));
2671 } else {
2672 /*
2673 * Verify that:
2674 * the address doesn't itself violate
2675 * the mask requirement.
2676 */
2677
2678 vm_map_lock(map);
2679 map_locked = TRUE;
2680 if ((start & mask) != 0) {
2681 RETURN(KERN_NO_SPACE);
2682 }
2683
2684 /*
2685 * ... the address is within bounds
2686 */
2687
2688 end = start + size;
2689
2690 if ((start < effective_min_offset) ||
2691 (end > effective_max_offset) ||
2692 (start >= end)) {
2693 RETURN(KERN_INVALID_ADDRESS);
2694 }
2695
2696 if (overwrite && zap_old_map != VM_MAP_NULL) {
2697 int remove_flags;
2698 /*
2699 * Fixed mapping and "overwrite" flag: attempt to
2700 * remove all existing mappings in the specified
2701 * address range, saving them in our "zap_old_map".
2702 */
2703 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2704 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2705 if (vmk_flags.vmkf_overwrite_immutable) {
2706 /* we can overwrite immutable mappings */
2707 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2708 }
2709 (void) vm_map_delete(map, start, end,
2710 remove_flags,
2711 zap_old_map);
2712 }
2713
2714 /*
2715 * ... the starting address isn't allocated
2716 */
2717
2718 if (vm_map_lookup_entry(map, start, &entry)) {
2719 if (!(vmk_flags.vmkf_already)) {
2720 RETURN(KERN_NO_SPACE);
2721 }
2722 /*
2723 * Check if what's already there is what we want.
2724 */
2725 tmp_start = start;
2726 tmp_offset = offset;
2727 if (entry->vme_start < start) {
2728 tmp_start -= start - entry->vme_start;
2729 tmp_offset -= start - entry->vme_start;
2730 }
2731 for (; entry->vme_start < end;
2732 entry = entry->vme_next) {
2733 /*
2734 * Check if the mapping's attributes
2735 * match the existing map entry.
2736 */
2737 if (entry == vm_map_to_entry(map) ||
2738 entry->vme_start != tmp_start ||
2739 entry->is_sub_map != is_submap ||
2740 VME_OFFSET(entry) != tmp_offset ||
2741 entry->needs_copy != needs_copy ||
2742 entry->protection != cur_protection ||
2743 entry->max_protection != max_protection ||
2744 entry->inheritance != inheritance ||
2745 entry->iokit_acct != iokit_acct ||
2746 VME_ALIAS(entry) != alias) {
2747 /* not the same mapping ! */
2748 RETURN(KERN_NO_SPACE);
2749 }
2750 /*
2751 * Check if the same object is being mapped.
2752 */
2753 if (is_submap) {
2754 if (VME_SUBMAP(entry) !=
2755 (vm_map_t) object) {
2756 /* not the same submap */
2757 RETURN(KERN_NO_SPACE);
2758 }
2759 } else {
2760 if (VME_OBJECT(entry) != object) {
2761 /* not the same VM object... */
2762 vm_object_t obj2;
2763
2764 obj2 = VME_OBJECT(entry);
2765 if ((obj2 == VM_OBJECT_NULL ||
2766 obj2->internal) &&
2767 (object == VM_OBJECT_NULL ||
2768 object->internal)) {
2769 /*
2770 * ... but both are
2771 * anonymous memory,
2772 * so equivalent.
2773 */
2774 } else {
2775 RETURN(KERN_NO_SPACE);
2776 }
2777 }
2778 }
2779
2780 tmp_offset += entry->vme_end - entry->vme_start;
2781 tmp_start += entry->vme_end - entry->vme_start;
2782 if (entry->vme_end >= end) {
2783 /* reached the end of our mapping */
2784 break;
2785 }
2786 }
2787 /* it all matches: let's use what's already there ! */
2788 RETURN(KERN_MEMORY_PRESENT);
2789 }
2790
2791 /*
2792 * ... the next region doesn't overlap the
2793 * end point.
2794 */
2795
2796 if ((entry->vme_next != vm_map_to_entry(map)) &&
2797 (entry->vme_next->vme_start < end)) {
2798 RETURN(KERN_NO_SPACE);
2799 }
2800 }
2801
2802 /*
2803 * At this point,
2804 * "start" and "end" should define the endpoints of the
2805 * available new range, and
2806 * "entry" should refer to the region before the new
2807 * range, and
2808 *
2809 * the map should be locked.
2810 */
2811
2812 /*
2813 * See whether we can avoid creating a new entry (and object) by
2814 * extending one of our neighbors. [So far, we only attempt to
2815 * extend from below.] Note that we can never extend/join
2816 * purgable objects because they need to remain distinct
2817 * entities in order to implement their "volatile object"
2818 * semantics.
2819 */
2820
2821 if (purgable ||
2822 entry_for_jit ||
2823 vm_memory_malloc_no_cow(user_alias)) {
2824 if (object == VM_OBJECT_NULL) {
2825 object = vm_object_allocate(size);
2826 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2827 object->true_share = FALSE;
2828 if (purgable) {
2829 task_t owner;
2830 object->purgable = VM_PURGABLE_NONVOLATILE;
2831 if (map->pmap == kernel_pmap) {
2832 /*
2833 * Purgeable mappings made in a kernel
2834 * map are "owned" by the kernel itself
2835 * rather than the current user task
2836 * because they're likely to be used by
2837 * more than this user task (see
2838 * execargs_purgeable_allocate(), for
2839 * example).
2840 */
2841 owner = kernel_task;
2842 } else {
2843 owner = current_task();
2844 }
2845 assert(object->vo_owner == NULL);
2846 assert(object->resident_page_count == 0);
2847 assert(object->wired_page_count == 0);
2848 vm_object_lock(object);
2849 vm_purgeable_nonvolatile_enqueue(object, owner);
2850 vm_object_unlock(object);
2851 }
2852 offset = (vm_object_offset_t)0;
2853 }
2854 } else if ((is_submap == FALSE) &&
2855 (object == VM_OBJECT_NULL) &&
2856 (entry != vm_map_to_entry(map)) &&
2857 (entry->vme_end == start) &&
2858 (!entry->is_shared) &&
2859 (!entry->is_sub_map) &&
2860 (!entry->in_transition) &&
2861 (!entry->needs_wakeup) &&
2862 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2863 (entry->protection == cur_protection) &&
2864 (entry->max_protection == max_protection) &&
2865 (entry->inheritance == inheritance) &&
2866 ((user_alias == VM_MEMORY_REALLOC) ||
2867 (VME_ALIAS(entry) == alias)) &&
2868 (entry->no_cache == no_cache) &&
2869 (entry->permanent == permanent) &&
2870 /* no coalescing for immutable executable mappings */
2871 !((entry->protection & VM_PROT_EXECUTE) &&
2872 entry->permanent) &&
2873 (!entry->superpage_size && !superpage_size) &&
2874 /*
2875 * No coalescing if not map-aligned, to avoid propagating
2876 * that condition any further than needed:
2877 */
2878 (!entry->map_aligned || !clear_map_aligned) &&
2879 (!entry->zero_wired_pages) &&
2880 (!entry->used_for_jit && !entry_for_jit) &&
2881 (!entry->pmap_cs_associated) &&
2882 (entry->iokit_acct == iokit_acct) &&
2883 (!entry->vme_resilient_codesign) &&
2884 (!entry->vme_resilient_media) &&
2885 (!entry->vme_atomic) &&
2886 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2887
2888 ((entry->vme_end - entry->vme_start) + size <=
2889 (user_alias == VM_MEMORY_REALLOC ?
2890 ANON_CHUNK_SIZE :
2891 NO_COALESCE_LIMIT)) &&
2892
2893 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2894 if (vm_object_coalesce(VME_OBJECT(entry),
2895 VM_OBJECT_NULL,
2896 VME_OFFSET(entry),
2897 (vm_object_offset_t) 0,
2898 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2899 (vm_map_size_t)(end - entry->vme_end))) {
2900 /*
2901 * Coalesced the two objects - can extend
2902 * the previous map entry to include the
2903 * new range.
2904 */
2905 map->size += (end - entry->vme_end);
2906 assert(entry->vme_start < end);
2907 assert(VM_MAP_PAGE_ALIGNED(end,
2908 VM_MAP_PAGE_MASK(map)));
2909 if (__improbable(vm_debug_events)) {
2910 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2911 }
2912 entry->vme_end = end;
2913 if (map->holelistenabled) {
2914 vm_map_store_update_first_free(map, entry, TRUE);
2915 } else {
2916 vm_map_store_update_first_free(map, map->first_free, TRUE);
2917 }
2918 new_mapping_established = TRUE;
2919 RETURN(KERN_SUCCESS);
2920 }
2921 }
2922
2923 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2924 new_entry = NULL;
2925
2926 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2927 tmp2_end = tmp2_start + step;
2928 /*
2929 * Create a new entry
2930 *
2931 * XXX FBDP
2932 * The reserved "page zero" in each process's address space can
2933 * be arbitrarily large. Splitting it into separate objects and
2934 * therefore different VM map entries serves no purpose and just
2935 * slows down operations on the VM map, so let's not split the
2936 * allocation into chunks if the max protection is NONE. That
2937 * memory should never be accessible, so it will never get to the
2938 * default pager.
2939 */
2940 tmp_start = tmp2_start;
2941 if (object == VM_OBJECT_NULL &&
2942 size > chunk_size &&
2943 max_protection != VM_PROT_NONE &&
2944 superpage_size == 0) {
2945 tmp_end = tmp_start + chunk_size;
2946 } else {
2947 tmp_end = tmp2_end;
2948 }
2949 do {
2950 new_entry = vm_map_entry_insert(
2951 map, entry, tmp_start, tmp_end,
2952 object, offset, needs_copy,
2953 FALSE, FALSE,
2954 cur_protection, max_protection,
2955 VM_BEHAVIOR_DEFAULT,
2956 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2957 0,
2958 no_cache,
2959 permanent,
2960 no_copy_on_read,
2961 superpage_size,
2962 clear_map_aligned,
2963 is_submap,
2964 entry_for_jit,
2965 alias);
2966
2967 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2968
2969 if (resilient_codesign &&
2970 !((cur_protection | max_protection) &
2971 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2972 new_entry->vme_resilient_codesign = TRUE;
2973 }
2974
2975 if (resilient_media &&
2976 (object == VM_OBJECT_NULL ||
2977 object->internal)) {
2978 new_entry->vme_resilient_media = TRUE;
2979 }
2980
2981 assert(!new_entry->iokit_acct);
2982 if (!is_submap &&
2983 object != VM_OBJECT_NULL &&
2984 (object->purgable != VM_PURGABLE_DENY ||
2985 object->vo_ledger_tag)) {
2986 assert(new_entry->use_pmap);
2987 assert(!new_entry->iokit_acct);
2988 /*
2989 * Turn off pmap accounting since
2990 * purgeable (or tagged) objects have their
2991 * own ledgers.
2992 */
2993 new_entry->use_pmap = FALSE;
2994 } else if (!is_submap &&
2995 iokit_acct &&
2996 object != VM_OBJECT_NULL &&
2997 object->internal) {
2998 /* alternate accounting */
2999 assert(!new_entry->iokit_acct);
3000 assert(new_entry->use_pmap);
3001 new_entry->iokit_acct = TRUE;
3002 new_entry->use_pmap = FALSE;
3003 DTRACE_VM4(
3004 vm_map_iokit_mapped_region,
3005 vm_map_t, map,
3006 vm_map_offset_t, new_entry->vme_start,
3007 vm_map_offset_t, new_entry->vme_end,
3008 int, VME_ALIAS(new_entry));
3009 vm_map_iokit_mapped_region(
3010 map,
3011 (new_entry->vme_end -
3012 new_entry->vme_start));
3013 } else if (!is_submap) {
3014 assert(!new_entry->iokit_acct);
3015 assert(new_entry->use_pmap);
3016 }
3017
3018 if (is_submap) {
3019 vm_map_t submap;
3020 boolean_t submap_is_64bit;
3021 boolean_t use_pmap;
3022
3023 assert(new_entry->is_sub_map);
3024 assert(!new_entry->use_pmap);
3025 assert(!new_entry->iokit_acct);
3026 submap = (vm_map_t) object;
3027 submap_is_64bit = vm_map_is_64bit(submap);
3028 use_pmap = vmk_flags.vmkf_nested_pmap;
3029 #ifndef NO_NESTED_PMAP
3030 if (use_pmap && submap->pmap == NULL) {
3031 ledger_t ledger = map->pmap->ledger;
3032 /* we need a sub pmap to nest... */
3033 submap->pmap = pmap_create_options(ledger, 0,
3034 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3035 if (submap->pmap == NULL) {
3036 /* let's proceed without nesting... */
3037 }
3038 #if defined(__arm__) || defined(__arm64__)
3039 else {
3040 pmap_set_nested(submap->pmap);
3041 }
3042 #endif
3043 }
3044 if (use_pmap && submap->pmap != NULL) {
3045 kr = pmap_nest(map->pmap,
3046 submap->pmap,
3047 tmp_start,
3048 tmp_start,
3049 tmp_end - tmp_start);
3050 if (kr != KERN_SUCCESS) {
3051 printf("vm_map_enter: "
3052 "pmap_nest(0x%llx,0x%llx) "
3053 "error 0x%x\n",
3054 (long long)tmp_start,
3055 (long long)tmp_end,
3056 kr);
3057 } else {
3058 /* we're now nested ! */
3059 new_entry->use_pmap = TRUE;
3060 pmap_empty = FALSE;
3061 }
3062 }
3063 #endif /* NO_NESTED_PMAP */
3064 }
3065 entry = new_entry;
3066
3067 if (superpage_size) {
3068 vm_page_t pages, m;
3069 vm_object_t sp_object;
3070 vm_object_offset_t sp_offset;
3071
3072 VME_OFFSET_SET(entry, 0);
3073
3074 /* allocate one superpage */
3075 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3076 if (kr != KERN_SUCCESS) {
3077 /* deallocate whole range... */
3078 new_mapping_established = TRUE;
3079 /* ... but only up to "tmp_end" */
3080 size -= end - tmp_end;
3081 RETURN(kr);
3082 }
3083
3084 /* create one vm_object per superpage */
3085 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3086 sp_object->phys_contiguous = TRUE;
3087 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3088 VME_OBJECT_SET(entry, sp_object);
3089 assert(entry->use_pmap);
3090
3091 /* enter the base pages into the object */
3092 vm_object_lock(sp_object);
3093 for (sp_offset = 0;
3094 sp_offset < SUPERPAGE_SIZE;
3095 sp_offset += PAGE_SIZE) {
3096 m = pages;
3097 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3098 pages = NEXT_PAGE(m);
3099 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3100 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3101 }
3102 vm_object_unlock(sp_object);
3103 }
3104 } while (tmp_end != tmp2_end &&
3105 (tmp_start = tmp_end) &&
3106 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3107 tmp_end + chunk_size : tmp2_end));
3108 }
3109
3110 new_mapping_established = TRUE;
3111
3112 BailOut:
3113 assert(map_locked == TRUE);
3114
3115 if (result == KERN_SUCCESS) {
3116 vm_prot_t pager_prot;
3117 memory_object_t pager;
3118
3119 #if DEBUG
3120 if (pmap_empty &&
3121 !(vmk_flags.vmkf_no_pmap_check)) {
3122 assert(vm_map_pmap_is_empty(map,
3123 *address,
3124 *address + size));
3125 }
3126 #endif /* DEBUG */
3127
3128 /*
3129 * For "named" VM objects, let the pager know that the
3130 * memory object is being mapped. Some pagers need to keep
3131 * track of this, to know when they can reclaim the memory
3132 * object, for example.
3133 * VM calls memory_object_map() for each mapping (specifying
3134 * the protection of each mapping) and calls
3135 * memory_object_last_unmap() when all the mappings are gone.
3136 */
3137 pager_prot = max_protection;
3138 if (needs_copy) {
3139 /*
3140 * Copy-On-Write mapping: won't modify
3141 * the memory object.
3142 */
3143 pager_prot &= ~VM_PROT_WRITE;
3144 }
3145 if (!is_submap &&
3146 object != VM_OBJECT_NULL &&
3147 object->named &&
3148 object->pager != MEMORY_OBJECT_NULL) {
3149 vm_object_lock(object);
3150 pager = object->pager;
3151 if (object->named &&
3152 pager != MEMORY_OBJECT_NULL) {
3153 assert(object->pager_ready);
3154 vm_object_mapping_wait(object, THREAD_UNINT);
3155 vm_object_mapping_begin(object);
3156 vm_object_unlock(object);
3157
3158 kr = memory_object_map(pager, pager_prot);
3159 assert(kr == KERN_SUCCESS);
3160
3161 vm_object_lock(object);
3162 vm_object_mapping_end(object);
3163 }
3164 vm_object_unlock(object);
3165 }
3166 }
3167
3168 assert(map_locked == TRUE);
3169
3170 if (!keep_map_locked) {
3171 vm_map_unlock(map);
3172 map_locked = FALSE;
3173 }
3174
3175 /*
3176 * We can't hold the map lock if we enter this block.
3177 */
3178
3179 if (result == KERN_SUCCESS) {
3180 /* Wire down the new entry if the user
3181 * requested all new map entries be wired.
3182 */
3183 if ((map->wiring_required) || (superpage_size)) {
3184 assert(!keep_map_locked);
3185 pmap_empty = FALSE; /* pmap won't be empty */
3186 kr = vm_map_wire_kernel(map, start, end,
3187 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3188 TRUE);
3189 result = kr;
3190 }
3191
3192 }
3193
3194 if (result != KERN_SUCCESS) {
3195 if (new_mapping_established) {
3196 /*
3197 * We have to get rid of the new mappings since we
3198 * won't make them available to the user.
3199 * Try and do that atomically, to minimize the risk
3200 * that someone else create new mappings that range.
3201 */
3202 zap_new_map = vm_map_create(PMAP_NULL,
3203 *address,
3204 *address + size,
3205 map->hdr.entries_pageable);
3206 vm_map_set_page_shift(zap_new_map,
3207 VM_MAP_PAGE_SHIFT(map));
3208 vm_map_disable_hole_optimization(zap_new_map);
3209
3210 if (!map_locked) {
3211 vm_map_lock(map);
3212 map_locked = TRUE;
3213 }
3214 (void) vm_map_delete(map, *address, *address + size,
3215 (VM_MAP_REMOVE_SAVE_ENTRIES |
3216 VM_MAP_REMOVE_NO_MAP_ALIGN),
3217 zap_new_map);
3218 }
3219 if (zap_old_map != VM_MAP_NULL &&
3220 zap_old_map->hdr.nentries != 0) {
3221 vm_map_entry_t entry1, entry2;
3222
3223 /*
3224 * The new mapping failed. Attempt to restore
3225 * the old mappings, saved in the "zap_old_map".
3226 */
3227 if (!map_locked) {
3228 vm_map_lock(map);
3229 map_locked = TRUE;
3230 }
3231
3232 /* first check if the coast is still clear */
3233 start = vm_map_first_entry(zap_old_map)->vme_start;
3234 end = vm_map_last_entry(zap_old_map)->vme_end;
3235 if (vm_map_lookup_entry(map, start, &entry1) ||
3236 vm_map_lookup_entry(map, end, &entry2) ||
3237 entry1 != entry2) {
3238 /*
3239 * Part of that range has already been
3240 * re-mapped: we can't restore the old
3241 * mappings...
3242 */
3243 vm_map_enter_restore_failures++;
3244 } else {
3245 /*
3246 * Transfer the saved map entries from
3247 * "zap_old_map" to the original "map",
3248 * inserting them all after "entry1".
3249 */
3250 for (entry2 = vm_map_first_entry(zap_old_map);
3251 entry2 != vm_map_to_entry(zap_old_map);
3252 entry2 = vm_map_first_entry(zap_old_map)) {
3253 vm_map_size_t entry_size;
3254
3255 entry_size = (entry2->vme_end -
3256 entry2->vme_start);
3257 vm_map_store_entry_unlink(zap_old_map,
3258 entry2);
3259 zap_old_map->size -= entry_size;
3260 vm_map_store_entry_link(map, entry1, entry2,
3261 VM_MAP_KERNEL_FLAGS_NONE);
3262 map->size += entry_size;
3263 entry1 = entry2;
3264 }
3265 if (map->wiring_required) {
3266 /*
3267 * XXX TODO: we should rewire the
3268 * old pages here...
3269 */
3270 }
3271 vm_map_enter_restore_successes++;
3272 }
3273 }
3274 }
3275
3276 /*
3277 * The caller is responsible for releasing the lock if it requested to
3278 * keep the map locked.
3279 */
3280 if (map_locked && !keep_map_locked) {
3281 vm_map_unlock(map);
3282 }
3283
3284 /*
3285 * Get rid of the "zap_maps" and all the map entries that
3286 * they may still contain.
3287 */
3288 if (zap_old_map != VM_MAP_NULL) {
3289 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3290 zap_old_map = VM_MAP_NULL;
3291 }
3292 if (zap_new_map != VM_MAP_NULL) {
3293 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3294 zap_new_map = VM_MAP_NULL;
3295 }
3296
3297 return result;
3298
3299 #undef RETURN
3300 }
3301
3302 #if __arm64__
3303 extern const struct memory_object_pager_ops fourk_pager_ops;
3304 kern_return_t
3305 vm_map_enter_fourk(
3306 vm_map_t map,
3307 vm_map_offset_t *address, /* IN/OUT */
3308 vm_map_size_t size,
3309 vm_map_offset_t mask,
3310 int flags,
3311 vm_map_kernel_flags_t vmk_flags,
3312 vm_tag_t alias,
3313 vm_object_t object,
3314 vm_object_offset_t offset,
3315 boolean_t needs_copy,
3316 vm_prot_t cur_protection,
3317 vm_prot_t max_protection,
3318 vm_inherit_t inheritance)
3319 {
3320 vm_map_entry_t entry, new_entry;
3321 vm_map_offset_t start, fourk_start;
3322 vm_map_offset_t end, fourk_end;
3323 vm_map_size_t fourk_size;
3324 kern_return_t result = KERN_SUCCESS;
3325 vm_map_t zap_old_map = VM_MAP_NULL;
3326 vm_map_t zap_new_map = VM_MAP_NULL;
3327 boolean_t map_locked = FALSE;
3328 boolean_t pmap_empty = TRUE;
3329 boolean_t new_mapping_established = FALSE;
3330 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3331 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3332 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3333 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3334 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3335 boolean_t is_submap = vmk_flags.vmkf_submap;
3336 boolean_t permanent = vmk_flags.vmkf_permanent;
3337 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3338 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3339 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3340 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3341 vm_map_offset_t effective_min_offset, effective_max_offset;
3342 kern_return_t kr;
3343 boolean_t clear_map_aligned = FALSE;
3344 memory_object_t fourk_mem_obj;
3345 vm_object_t fourk_object;
3346 vm_map_offset_t fourk_pager_offset;
3347 int fourk_pager_index_start, fourk_pager_index_num;
3348 int cur_idx;
3349 boolean_t fourk_copy;
3350 vm_object_t copy_object;
3351 vm_object_offset_t copy_offset;
3352
3353 fourk_mem_obj = MEMORY_OBJECT_NULL;
3354 fourk_object = VM_OBJECT_NULL;
3355
3356 if (superpage_size) {
3357 return KERN_NOT_SUPPORTED;
3358 }
3359
3360 if ((cur_protection & VM_PROT_WRITE) &&
3361 (cur_protection & VM_PROT_EXECUTE) &&
3362 #if !CONFIG_EMBEDDED
3363 map != kernel_map &&
3364 cs_process_enforcement(NULL) &&
3365 #endif /* !CONFIG_EMBEDDED */
3366 !entry_for_jit) {
3367 DTRACE_VM3(cs_wx,
3368 uint64_t, 0,
3369 uint64_t, 0,
3370 vm_prot_t, cur_protection);
3371 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3372 "turning off execute\n",
3373 proc_selfpid(),
3374 (current_task()->bsd_info
3375 ? proc_name_address(current_task()->bsd_info)
3376 : "?"),
3377 __FUNCTION__);
3378 cur_protection &= ~VM_PROT_EXECUTE;
3379 }
3380
3381 /*
3382 * If the task has requested executable lockdown,
3383 * deny any new executable mapping.
3384 */
3385 if (map->map_disallow_new_exec == TRUE) {
3386 if (cur_protection & VM_PROT_EXECUTE) {
3387 return KERN_PROTECTION_FAILURE;
3388 }
3389 }
3390
3391 if (is_submap) {
3392 return KERN_NOT_SUPPORTED;
3393 }
3394 if (vmk_flags.vmkf_already) {
3395 return KERN_NOT_SUPPORTED;
3396 }
3397 if (purgable || entry_for_jit) {
3398 return KERN_NOT_SUPPORTED;
3399 }
3400
3401 effective_min_offset = map->min_offset;
3402
3403 if (vmk_flags.vmkf_beyond_max) {
3404 return KERN_NOT_SUPPORTED;
3405 } else {
3406 effective_max_offset = map->max_offset;
3407 }
3408
3409 if (size == 0 ||
3410 (offset & FOURK_PAGE_MASK) != 0) {
3411 *address = 0;
3412 return KERN_INVALID_ARGUMENT;
3413 }
3414
3415 #define RETURN(value) { result = value; goto BailOut; }
3416
3417 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3418 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3419
3420 if (!anywhere && overwrite) {
3421 return KERN_NOT_SUPPORTED;
3422 }
3423 if (!anywhere && overwrite) {
3424 /*
3425 * Create a temporary VM map to hold the old mappings in the
3426 * affected area while we create the new one.
3427 * This avoids releasing the VM map lock in
3428 * vm_map_entry_delete() and allows atomicity
3429 * when we want to replace some mappings with a new one.
3430 * It also allows us to restore the old VM mappings if the
3431 * new mapping fails.
3432 */
3433 zap_old_map = vm_map_create(PMAP_NULL,
3434 *address,
3435 *address + size,
3436 map->hdr.entries_pageable);
3437 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3438 vm_map_disable_hole_optimization(zap_old_map);
3439 }
3440
3441 fourk_start = *address;
3442 fourk_size = size;
3443 fourk_end = fourk_start + fourk_size;
3444
3445 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3446 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3447 size = end - start;
3448
3449 if (anywhere) {
3450 return KERN_NOT_SUPPORTED;
3451 } else {
3452 /*
3453 * Verify that:
3454 * the address doesn't itself violate
3455 * the mask requirement.
3456 */
3457
3458 vm_map_lock(map);
3459 map_locked = TRUE;
3460 if ((start & mask) != 0) {
3461 RETURN(KERN_NO_SPACE);
3462 }
3463
3464 /*
3465 * ... the address is within bounds
3466 */
3467
3468 end = start + size;
3469
3470 if ((start < effective_min_offset) ||
3471 (end > effective_max_offset) ||
3472 (start >= end)) {
3473 RETURN(KERN_INVALID_ADDRESS);
3474 }
3475
3476 if (overwrite && zap_old_map != VM_MAP_NULL) {
3477 /*
3478 * Fixed mapping and "overwrite" flag: attempt to
3479 * remove all existing mappings in the specified
3480 * address range, saving them in our "zap_old_map".
3481 */
3482 (void) vm_map_delete(map, start, end,
3483 (VM_MAP_REMOVE_SAVE_ENTRIES |
3484 VM_MAP_REMOVE_NO_MAP_ALIGN),
3485 zap_old_map);
3486 }
3487
3488 /*
3489 * ... the starting address isn't allocated
3490 */
3491 if (vm_map_lookup_entry(map, start, &entry)) {
3492 vm_object_t cur_object, shadow_object;
3493
3494 /*
3495 * We might already some 4K mappings
3496 * in a 16K page here.
3497 */
3498
3499 if (entry->vme_end - entry->vme_start
3500 != SIXTEENK_PAGE_SIZE) {
3501 RETURN(KERN_NO_SPACE);
3502 }
3503 if (entry->is_sub_map) {
3504 RETURN(KERN_NO_SPACE);
3505 }
3506 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3507 RETURN(KERN_NO_SPACE);
3508 }
3509
3510 /* go all the way down the shadow chain */
3511 cur_object = VME_OBJECT(entry);
3512 vm_object_lock(cur_object);
3513 while (cur_object->shadow != VM_OBJECT_NULL) {
3514 shadow_object = cur_object->shadow;
3515 vm_object_lock(shadow_object);
3516 vm_object_unlock(cur_object);
3517 cur_object = shadow_object;
3518 shadow_object = VM_OBJECT_NULL;
3519 }
3520 if (cur_object->internal ||
3521 cur_object->pager == NULL) {
3522 vm_object_unlock(cur_object);
3523 RETURN(KERN_NO_SPACE);
3524 }
3525 if (cur_object->pager->mo_pager_ops
3526 != &fourk_pager_ops) {
3527 vm_object_unlock(cur_object);
3528 RETURN(KERN_NO_SPACE);
3529 }
3530 fourk_object = cur_object;
3531 fourk_mem_obj = fourk_object->pager;
3532
3533 /* keep the "4K" object alive */
3534 vm_object_reference_locked(fourk_object);
3535 vm_object_unlock(fourk_object);
3536
3537 /* merge permissions */
3538 entry->protection |= cur_protection;
3539 entry->max_protection |= max_protection;
3540 if ((entry->protection & (VM_PROT_WRITE |
3541 VM_PROT_EXECUTE)) ==
3542 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3543 fourk_binary_compatibility_unsafe &&
3544 fourk_binary_compatibility_allow_wx) {
3545 /* write+execute: need to be "jit" */
3546 entry->used_for_jit = TRUE;
3547 }
3548
3549 goto map_in_fourk_pager;
3550 }
3551
3552 /*
3553 * ... the next region doesn't overlap the
3554 * end point.
3555 */
3556
3557 if ((entry->vme_next != vm_map_to_entry(map)) &&
3558 (entry->vme_next->vme_start < end)) {
3559 RETURN(KERN_NO_SPACE);
3560 }
3561 }
3562
3563 /*
3564 * At this point,
3565 * "start" and "end" should define the endpoints of the
3566 * available new range, and
3567 * "entry" should refer to the region before the new
3568 * range, and
3569 *
3570 * the map should be locked.
3571 */
3572
3573 /* create a new "4K" pager */
3574 fourk_mem_obj = fourk_pager_create();
3575 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3576 assert(fourk_object);
3577
3578 /* keep the "4" object alive */
3579 vm_object_reference(fourk_object);
3580
3581 /* create a "copy" object, to map the "4K" object copy-on-write */
3582 fourk_copy = TRUE;
3583 result = vm_object_copy_strategically(fourk_object,
3584 0,
3585 end - start,
3586 &copy_object,
3587 &copy_offset,
3588 &fourk_copy);
3589 assert(result == KERN_SUCCESS);
3590 assert(copy_object != VM_OBJECT_NULL);
3591 assert(copy_offset == 0);
3592
3593 /* take a reference on the copy object, for this mapping */
3594 vm_object_reference(copy_object);
3595
3596 /* map the "4K" pager's copy object */
3597 new_entry =
3598 vm_map_entry_insert(map, entry,
3599 vm_map_trunc_page(start,
3600 VM_MAP_PAGE_MASK(map)),
3601 vm_map_round_page(end,
3602 VM_MAP_PAGE_MASK(map)),
3603 copy_object,
3604 0, /* offset */
3605 FALSE, /* needs_copy */
3606 FALSE,
3607 FALSE,
3608 cur_protection, max_protection,
3609 VM_BEHAVIOR_DEFAULT,
3610 ((entry_for_jit)
3611 ? VM_INHERIT_NONE
3612 : inheritance),
3613 0,
3614 no_cache,
3615 permanent,
3616 no_copy_on_read,
3617 superpage_size,
3618 clear_map_aligned,
3619 is_submap,
3620 FALSE, /* jit */
3621 alias);
3622 entry = new_entry;
3623
3624 #if VM_MAP_DEBUG_FOURK
3625 if (vm_map_debug_fourk) {
3626 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3627 map,
3628 (uint64_t) entry->vme_start,
3629 (uint64_t) entry->vme_end,
3630 fourk_mem_obj);
3631 }
3632 #endif /* VM_MAP_DEBUG_FOURK */
3633
3634 new_mapping_established = TRUE;
3635
3636 map_in_fourk_pager:
3637 /* "map" the original "object" where it belongs in the "4K" pager */
3638 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3639 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3640 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3641 fourk_pager_index_num = 4;
3642 } else {
3643 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3644 }
3645 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3646 fourk_pager_index_num = 4 - fourk_pager_index_start;
3647 }
3648 for (cur_idx = 0;
3649 cur_idx < fourk_pager_index_num;
3650 cur_idx++) {
3651 vm_object_t old_object;
3652 vm_object_offset_t old_offset;
3653
3654 kr = fourk_pager_populate(fourk_mem_obj,
3655 TRUE, /* overwrite */
3656 fourk_pager_index_start + cur_idx,
3657 object,
3658 (object
3659 ? (offset +
3660 (cur_idx * FOURK_PAGE_SIZE))
3661 : 0),
3662 &old_object,
3663 &old_offset);
3664 #if VM_MAP_DEBUG_FOURK
3665 if (vm_map_debug_fourk) {
3666 if (old_object == (vm_object_t) -1 &&
3667 old_offset == (vm_object_offset_t) -1) {
3668 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3669 "pager [%p:0x%llx] "
3670 "populate[%d] "
3671 "[object:%p,offset:0x%llx]\n",
3672 map,
3673 (uint64_t) entry->vme_start,
3674 (uint64_t) entry->vme_end,
3675 fourk_mem_obj,
3676 VME_OFFSET(entry),
3677 fourk_pager_index_start + cur_idx,
3678 object,
3679 (object
3680 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3681 : 0));
3682 } else {
3683 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3684 "pager [%p:0x%llx] "
3685 "populate[%d] [object:%p,offset:0x%llx] "
3686 "old [%p:0x%llx]\n",
3687 map,
3688 (uint64_t) entry->vme_start,
3689 (uint64_t) entry->vme_end,
3690 fourk_mem_obj,
3691 VME_OFFSET(entry),
3692 fourk_pager_index_start + cur_idx,
3693 object,
3694 (object
3695 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3696 : 0),
3697 old_object,
3698 old_offset);
3699 }
3700 }
3701 #endif /* VM_MAP_DEBUG_FOURK */
3702
3703 assert(kr == KERN_SUCCESS);
3704 if (object != old_object &&
3705 object != VM_OBJECT_NULL &&
3706 object != (vm_object_t) -1) {
3707 vm_object_reference(object);
3708 }
3709 if (object != old_object &&
3710 old_object != VM_OBJECT_NULL &&
3711 old_object != (vm_object_t) -1) {
3712 vm_object_deallocate(old_object);
3713 }
3714 }
3715
3716 BailOut:
3717 assert(map_locked == TRUE);
3718
3719 if (fourk_object != VM_OBJECT_NULL) {
3720 vm_object_deallocate(fourk_object);
3721 fourk_object = VM_OBJECT_NULL;
3722 fourk_mem_obj = MEMORY_OBJECT_NULL;
3723 }
3724
3725 if (result == KERN_SUCCESS) {
3726 vm_prot_t pager_prot;
3727 memory_object_t pager;
3728
3729 #if DEBUG
3730 if (pmap_empty &&
3731 !(vmk_flags.vmkf_no_pmap_check)) {
3732 assert(vm_map_pmap_is_empty(map,
3733 *address,
3734 *address + size));
3735 }
3736 #endif /* DEBUG */
3737
3738 /*
3739 * For "named" VM objects, let the pager know that the
3740 * memory object is being mapped. Some pagers need to keep
3741 * track of this, to know when they can reclaim the memory
3742 * object, for example.
3743 * VM calls memory_object_map() for each mapping (specifying
3744 * the protection of each mapping) and calls
3745 * memory_object_last_unmap() when all the mappings are gone.
3746 */
3747 pager_prot = max_protection;
3748 if (needs_copy) {
3749 /*
3750 * Copy-On-Write mapping: won't modify
3751 * the memory object.
3752 */
3753 pager_prot &= ~VM_PROT_WRITE;
3754 }
3755 if (!is_submap &&
3756 object != VM_OBJECT_NULL &&
3757 object->named &&
3758 object->pager != MEMORY_OBJECT_NULL) {
3759 vm_object_lock(object);
3760 pager = object->pager;
3761 if (object->named &&
3762 pager != MEMORY_OBJECT_NULL) {
3763 assert(object->pager_ready);
3764 vm_object_mapping_wait(object, THREAD_UNINT);
3765 vm_object_mapping_begin(object);
3766 vm_object_unlock(object);
3767
3768 kr = memory_object_map(pager, pager_prot);
3769 assert(kr == KERN_SUCCESS);
3770
3771 vm_object_lock(object);
3772 vm_object_mapping_end(object);
3773 }
3774 vm_object_unlock(object);
3775 }
3776 if (!is_submap &&
3777 fourk_object != VM_OBJECT_NULL &&
3778 fourk_object->named &&
3779 fourk_object->pager != MEMORY_OBJECT_NULL) {
3780 vm_object_lock(fourk_object);
3781 pager = fourk_object->pager;
3782 if (fourk_object->named &&
3783 pager != MEMORY_OBJECT_NULL) {
3784 assert(fourk_object->pager_ready);
3785 vm_object_mapping_wait(fourk_object,
3786 THREAD_UNINT);
3787 vm_object_mapping_begin(fourk_object);
3788 vm_object_unlock(fourk_object);
3789
3790 kr = memory_object_map(pager, VM_PROT_READ);
3791 assert(kr == KERN_SUCCESS);
3792
3793 vm_object_lock(fourk_object);
3794 vm_object_mapping_end(fourk_object);
3795 }
3796 vm_object_unlock(fourk_object);
3797 }
3798 }
3799
3800 assert(map_locked == TRUE);
3801
3802 if (!keep_map_locked) {
3803 vm_map_unlock(map);
3804 map_locked = FALSE;
3805 }
3806
3807 /*
3808 * We can't hold the map lock if we enter this block.
3809 */
3810
3811 if (result == KERN_SUCCESS) {
3812 /* Wire down the new entry if the user
3813 * requested all new map entries be wired.
3814 */
3815 if ((map->wiring_required) || (superpage_size)) {
3816 assert(!keep_map_locked);
3817 pmap_empty = FALSE; /* pmap won't be empty */
3818 kr = vm_map_wire_kernel(map, start, end,
3819 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3820 TRUE);
3821 result = kr;
3822 }
3823
3824 }
3825
3826 if (result != KERN_SUCCESS) {
3827 if (new_mapping_established) {
3828 /*
3829 * We have to get rid of the new mappings since we
3830 * won't make them available to the user.
3831 * Try and do that atomically, to minimize the risk
3832 * that someone else create new mappings that range.
3833 */
3834 zap_new_map = vm_map_create(PMAP_NULL,
3835 *address,
3836 *address + size,
3837 map->hdr.entries_pageable);
3838 vm_map_set_page_shift(zap_new_map,
3839 VM_MAP_PAGE_SHIFT(map));
3840 vm_map_disable_hole_optimization(zap_new_map);
3841
3842 if (!map_locked) {
3843 vm_map_lock(map);
3844 map_locked = TRUE;
3845 }
3846 (void) vm_map_delete(map, *address, *address + size,
3847 (VM_MAP_REMOVE_SAVE_ENTRIES |
3848 VM_MAP_REMOVE_NO_MAP_ALIGN),
3849 zap_new_map);
3850 }
3851 if (zap_old_map != VM_MAP_NULL &&
3852 zap_old_map->hdr.nentries != 0) {
3853 vm_map_entry_t entry1, entry2;
3854
3855 /*
3856 * The new mapping failed. Attempt to restore
3857 * the old mappings, saved in the "zap_old_map".
3858 */
3859 if (!map_locked) {
3860 vm_map_lock(map);
3861 map_locked = TRUE;
3862 }
3863
3864 /* first check if the coast is still clear */
3865 start = vm_map_first_entry(zap_old_map)->vme_start;
3866 end = vm_map_last_entry(zap_old_map)->vme_end;
3867 if (vm_map_lookup_entry(map, start, &entry1) ||
3868 vm_map_lookup_entry(map, end, &entry2) ||
3869 entry1 != entry2) {
3870 /*
3871 * Part of that range has already been
3872 * re-mapped: we can't restore the old
3873 * mappings...
3874 */
3875 vm_map_enter_restore_failures++;
3876 } else {
3877 /*
3878 * Transfer the saved map entries from
3879 * "zap_old_map" to the original "map",
3880 * inserting them all after "entry1".
3881 */
3882 for (entry2 = vm_map_first_entry(zap_old_map);
3883 entry2 != vm_map_to_entry(zap_old_map);
3884 entry2 = vm_map_first_entry(zap_old_map)) {
3885 vm_map_size_t entry_size;
3886
3887 entry_size = (entry2->vme_end -
3888 entry2->vme_start);
3889 vm_map_store_entry_unlink(zap_old_map,
3890 entry2);
3891 zap_old_map->size -= entry_size;
3892 vm_map_store_entry_link(map, entry1, entry2,
3893 VM_MAP_KERNEL_FLAGS_NONE);
3894 map->size += entry_size;
3895 entry1 = entry2;
3896 }
3897 if (map->wiring_required) {
3898 /*
3899 * XXX TODO: we should rewire the
3900 * old pages here...
3901 */
3902 }
3903 vm_map_enter_restore_successes++;
3904 }
3905 }
3906 }
3907
3908 /*
3909 * The caller is responsible for releasing the lock if it requested to
3910 * keep the map locked.
3911 */
3912 if (map_locked && !keep_map_locked) {
3913 vm_map_unlock(map);
3914 }
3915
3916 /*
3917 * Get rid of the "zap_maps" and all the map entries that
3918 * they may still contain.
3919 */
3920 if (zap_old_map != VM_MAP_NULL) {
3921 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3922 zap_old_map = VM_MAP_NULL;
3923 }
3924 if (zap_new_map != VM_MAP_NULL) {
3925 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3926 zap_new_map = VM_MAP_NULL;
3927 }
3928
3929 return result;
3930
3931 #undef RETURN
3932 }
3933 #endif /* __arm64__ */
3934
3935 /*
3936 * Counters for the prefault optimization.
3937 */
3938 int64_t vm_prefault_nb_pages = 0;
3939 int64_t vm_prefault_nb_bailout = 0;
3940
3941 static kern_return_t
3942 vm_map_enter_mem_object_helper(
3943 vm_map_t target_map,
3944 vm_map_offset_t *address,
3945 vm_map_size_t initial_size,
3946 vm_map_offset_t mask,
3947 int flags,
3948 vm_map_kernel_flags_t vmk_flags,
3949 vm_tag_t tag,
3950 ipc_port_t port,
3951 vm_object_offset_t offset,
3952 boolean_t copy,
3953 vm_prot_t cur_protection,
3954 vm_prot_t max_protection,
3955 vm_inherit_t inheritance,
3956 upl_page_list_ptr_t page_list,
3957 unsigned int page_list_count)
3958 {
3959 vm_map_address_t map_addr;
3960 vm_map_size_t map_size;
3961 vm_object_t object;
3962 vm_object_size_t size;
3963 kern_return_t result;
3964 boolean_t mask_cur_protection, mask_max_protection;
3965 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3966 vm_map_offset_t offset_in_mapping = 0;
3967 #if __arm64__
3968 boolean_t fourk = vmk_flags.vmkf_fourk;
3969 #endif /* __arm64__ */
3970
3971 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3972
3973 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3974 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3975 cur_protection &= ~VM_PROT_IS_MASK;
3976 max_protection &= ~VM_PROT_IS_MASK;
3977
3978 /*
3979 * Check arguments for validity
3980 */
3981 if ((target_map == VM_MAP_NULL) ||
3982 (cur_protection & ~VM_PROT_ALL) ||
3983 (max_protection & ~VM_PROT_ALL) ||
3984 (inheritance > VM_INHERIT_LAST_VALID) ||
3985 (try_prefault && (copy || !page_list)) ||
3986 initial_size == 0) {
3987 return KERN_INVALID_ARGUMENT;
3988 }
3989
3990 #if __arm64__
3991 if (fourk) {
3992 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3993 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3994 } else
3995 #endif /* __arm64__ */
3996 {
3997 map_addr = vm_map_trunc_page(*address,
3998 VM_MAP_PAGE_MASK(target_map));
3999 map_size = vm_map_round_page(initial_size,
4000 VM_MAP_PAGE_MASK(target_map));
4001 }
4002 size = vm_object_round_page(initial_size);
4003
4004 /*
4005 * Find the vm object (if any) corresponding to this port.
4006 */
4007 if (!IP_VALID(port)) {
4008 object = VM_OBJECT_NULL;
4009 offset = 0;
4010 copy = FALSE;
4011 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4012 vm_named_entry_t named_entry;
4013
4014 named_entry = (vm_named_entry_t) port->ip_kobject;
4015
4016 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4017 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4018 offset += named_entry->data_offset;
4019 }
4020
4021 /* a few checks to make sure user is obeying rules */
4022 if (size == 0) {
4023 if (offset >= named_entry->size) {
4024 return KERN_INVALID_RIGHT;
4025 }
4026 size = named_entry->size - offset;
4027 }
4028 if (mask_max_protection) {
4029 max_protection &= named_entry->protection;
4030 }
4031 if (mask_cur_protection) {
4032 cur_protection &= named_entry->protection;
4033 }
4034 if ((named_entry->protection & max_protection) !=
4035 max_protection) {
4036 return KERN_INVALID_RIGHT;
4037 }
4038 if ((named_entry->protection & cur_protection) !=
4039 cur_protection) {
4040 return KERN_INVALID_RIGHT;
4041 }
4042 if (offset + size < offset) {
4043 /* overflow */
4044 return KERN_INVALID_ARGUMENT;
4045 }
4046 if (named_entry->size < (offset + initial_size)) {
4047 return KERN_INVALID_ARGUMENT;
4048 }
4049
4050 if (named_entry->is_copy) {
4051 /* for a vm_map_copy, we can only map it whole */
4052 if ((size != named_entry->size) &&
4053 (vm_map_round_page(size,
4054 VM_MAP_PAGE_MASK(target_map)) ==
4055 named_entry->size)) {
4056 /* XXX FBDP use the rounded size... */
4057 size = vm_map_round_page(
4058 size,
4059 VM_MAP_PAGE_MASK(target_map));
4060 }
4061
4062 if (!(flags & VM_FLAGS_ANYWHERE) &&
4063 (offset != 0 ||
4064 size != named_entry->size)) {
4065 /*
4066 * XXX for a mapping at a "fixed" address,
4067 * we can't trim after mapping the whole
4068 * memory entry, so reject a request for a
4069 * partial mapping.
4070 */
4071 return KERN_INVALID_ARGUMENT;
4072 }
4073 }
4074
4075 /* the callers parameter offset is defined to be the */
4076 /* offset from beginning of named entry offset in object */
4077 offset = offset + named_entry->offset;
4078
4079 if (!VM_MAP_PAGE_ALIGNED(size,
4080 VM_MAP_PAGE_MASK(target_map))) {
4081 /*
4082 * Let's not map more than requested;
4083 * vm_map_enter() will handle this "not map-aligned"
4084 * case.
4085 */
4086 map_size = size;
4087 }
4088
4089 named_entry_lock(named_entry);
4090 if (named_entry->is_sub_map) {
4091 vm_map_t submap;
4092
4093 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4094 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4095 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4096 }
4097
4098 submap = named_entry->backing.map;
4099 vm_map_lock(submap);
4100 vm_map_reference(submap);
4101 vm_map_unlock(submap);
4102 named_entry_unlock(named_entry);
4103
4104 vmk_flags.vmkf_submap = TRUE;
4105
4106 result = vm_map_enter(target_map,
4107 &map_addr,
4108 map_size,
4109 mask,
4110 flags,
4111 vmk_flags,
4112 tag,
4113 (vm_object_t)(uintptr_t) submap,
4114 offset,
4115 copy,
4116 cur_protection,
4117 max_protection,
4118 inheritance);
4119 if (result != KERN_SUCCESS) {
4120 vm_map_deallocate(submap);
4121 } else {
4122 /*
4123 * No need to lock "submap" just to check its
4124 * "mapped" flag: that flag is never reset
4125 * once it's been set and if we race, we'll
4126 * just end up setting it twice, which is OK.
4127 */
4128 if (submap->mapped_in_other_pmaps == FALSE &&
4129 vm_map_pmap(submap) != PMAP_NULL &&
4130 vm_map_pmap(submap) !=
4131 vm_map_pmap(target_map)) {
4132 /*
4133 * This submap is being mapped in a map
4134 * that uses a different pmap.
4135 * Set its "mapped_in_other_pmaps" flag
4136 * to indicate that we now need to
4137 * remove mappings from all pmaps rather
4138 * than just the submap's pmap.
4139 */
4140 vm_map_lock(submap);
4141 submap->mapped_in_other_pmaps = TRUE;
4142 vm_map_unlock(submap);
4143 }
4144 *address = map_addr;
4145 }
4146 return result;
4147 } else if (named_entry->is_copy) {
4148 kern_return_t kr;
4149 vm_map_copy_t copy_map;
4150 vm_map_entry_t copy_entry;
4151 vm_map_offset_t copy_addr;
4152
4153 if (flags & ~(VM_FLAGS_FIXED |
4154 VM_FLAGS_ANYWHERE |
4155 VM_FLAGS_OVERWRITE |
4156 VM_FLAGS_RETURN_4K_DATA_ADDR |
4157 VM_FLAGS_RETURN_DATA_ADDR |
4158 VM_FLAGS_ALIAS_MASK)) {
4159 named_entry_unlock(named_entry);
4160 return KERN_INVALID_ARGUMENT;
4161 }
4162
4163 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4164 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4165 offset_in_mapping = offset - vm_object_trunc_page(offset);
4166 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4167 offset_in_mapping &= ~((signed)(0xFFF));
4168 }
4169 offset = vm_object_trunc_page(offset);
4170 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4171 }
4172
4173 copy_map = named_entry->backing.copy;
4174 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4175 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4176 /* unsupported type; should not happen */
4177 printf("vm_map_enter_mem_object: "
4178 "memory_entry->backing.copy "
4179 "unsupported type 0x%x\n",
4180 copy_map->type);
4181 named_entry_unlock(named_entry);
4182 return KERN_INVALID_ARGUMENT;
4183 }
4184
4185 /* reserve a contiguous range */
4186 kr = vm_map_enter(target_map,
4187 &map_addr,
4188 /* map whole mem entry, trim later: */
4189 named_entry->size,
4190 mask,
4191 flags & (VM_FLAGS_ANYWHERE |
4192 VM_FLAGS_OVERWRITE |
4193 VM_FLAGS_RETURN_4K_DATA_ADDR |
4194 VM_FLAGS_RETURN_DATA_ADDR),
4195 vmk_flags,
4196 tag,
4197 VM_OBJECT_NULL,
4198 0,
4199 FALSE, /* copy */
4200 cur_protection,
4201 max_protection,
4202 inheritance);
4203 if (kr != KERN_SUCCESS) {
4204 named_entry_unlock(named_entry);
4205 return kr;
4206 }
4207
4208 copy_addr = map_addr;
4209
4210 for (copy_entry = vm_map_copy_first_entry(copy_map);
4211 copy_entry != vm_map_copy_to_entry(copy_map);
4212 copy_entry = copy_entry->vme_next) {
4213 int remap_flags;
4214 vm_map_kernel_flags_t vmk_remap_flags;
4215 vm_map_t copy_submap;
4216 vm_object_t copy_object;
4217 vm_map_size_t copy_size;
4218 vm_object_offset_t copy_offset;
4219 int copy_vm_alias;
4220
4221 remap_flags = 0;
4222 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4223
4224 copy_object = VME_OBJECT(copy_entry);
4225 copy_offset = VME_OFFSET(copy_entry);
4226 copy_size = (copy_entry->vme_end -
4227 copy_entry->vme_start);
4228 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4229 if (copy_vm_alias == 0) {
4230 /*
4231 * Caller does not want a specific
4232 * alias for this new mapping: use
4233 * the alias of the original mapping.
4234 */
4235 copy_vm_alias = VME_ALIAS(copy_entry);
4236 }
4237
4238 /* sanity check */
4239 if ((copy_addr + copy_size) >
4240 (map_addr +
4241 named_entry->size /* XXX full size */)) {
4242 /* over-mapping too much !? */
4243 kr = KERN_INVALID_ARGUMENT;
4244 /* abort */
4245 break;
4246 }
4247
4248 /* take a reference on the object */
4249 if (copy_entry->is_sub_map) {
4250 vmk_remap_flags.vmkf_submap = TRUE;
4251 copy_submap = VME_SUBMAP(copy_entry);
4252 vm_map_lock(copy_submap);
4253 vm_map_reference(copy_submap);
4254 vm_map_unlock(copy_submap);
4255 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4256 } else if (!copy &&
4257 copy_object != VM_OBJECT_NULL &&
4258 (copy_entry->needs_copy ||
4259 copy_object->shadowed ||
4260 (!copy_object->true_share &&
4261 !copy_entry->is_shared &&
4262 copy_object->vo_size > copy_size))) {
4263 /*
4264 * We need to resolve our side of this
4265 * "symmetric" copy-on-write now; we
4266 * need a new object to map and share,
4267 * instead of the current one which
4268 * might still be shared with the
4269 * original mapping.
4270 *
4271 * Note: A "vm_map_copy_t" does not
4272 * have a lock but we're protected by
4273 * the named entry's lock here.
4274 */
4275 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4276 VME_OBJECT_SHADOW(copy_entry, copy_size);
4277 if (!copy_entry->needs_copy &&
4278 copy_entry->protection & VM_PROT_WRITE) {
4279 vm_prot_t prot;
4280
4281 prot = copy_entry->protection & ~VM_PROT_WRITE;
4282 vm_object_pmap_protect(copy_object,
4283 copy_offset,
4284 copy_size,
4285 PMAP_NULL,
4286 0,
4287 prot);
4288 }
4289
4290 copy_entry->needs_copy = FALSE;
4291 copy_entry->is_shared = TRUE;
4292 copy_object = VME_OBJECT(copy_entry);
4293 copy_offset = VME_OFFSET(copy_entry);
4294 vm_object_lock(copy_object);
4295 vm_object_reference_locked(copy_object);
4296 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4297 /* we're about to make a shared mapping of this object */
4298 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4299 copy_object->true_share = TRUE;
4300 }
4301 vm_object_unlock(copy_object);
4302 } else {
4303 /*
4304 * We already have the right object
4305 * to map.
4306 */
4307 copy_object = VME_OBJECT(copy_entry);
4308 vm_object_reference(copy_object);
4309 }
4310
4311 /* over-map the object into destination */
4312 remap_flags |= flags;
4313 remap_flags |= VM_FLAGS_FIXED;
4314 remap_flags |= VM_FLAGS_OVERWRITE;
4315 remap_flags &= ~VM_FLAGS_ANYWHERE;
4316 if (!copy && !copy_entry->is_sub_map) {
4317 /*
4318 * copy-on-write should have been
4319 * resolved at this point, or we would
4320 * end up sharing instead of copying.
4321 */
4322 assert(!copy_entry->needs_copy);
4323 }
4324 #if !CONFIG_EMBEDDED
4325 if (copy_entry->used_for_jit) {
4326 vmk_remap_flags.vmkf_map_jit = TRUE;
4327 }
4328 #endif /* !CONFIG_EMBEDDED */
4329 kr = vm_map_enter(target_map,
4330 &copy_addr,
4331 copy_size,
4332 (vm_map_offset_t) 0,
4333 remap_flags,
4334 vmk_remap_flags,
4335 copy_vm_alias,
4336 copy_object,
4337 copy_offset,
4338 ((copy_object == NULL) ? FALSE : copy),
4339 cur_protection,
4340 max_protection,
4341 inheritance);
4342 if (kr != KERN_SUCCESS) {
4343 if (copy_entry->is_sub_map) {
4344 vm_map_deallocate(copy_submap);
4345 } else {
4346 vm_object_deallocate(copy_object);
4347 }
4348 /* abort */
4349 break;
4350 }
4351
4352 /* next mapping */
4353 copy_addr += copy_size;
4354 }
4355
4356 if (kr == KERN_SUCCESS) {
4357 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4358 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4359 *address = map_addr + offset_in_mapping;
4360 } else {
4361 *address = map_addr;
4362 }
4363
4364 if (offset) {
4365 /*
4366 * Trim in front, from 0 to "offset".
4367 */
4368 vm_map_remove(target_map,
4369 map_addr,
4370 map_addr + offset,
4371 VM_MAP_REMOVE_NO_FLAGS);
4372 *address += offset;
4373 }
4374 if (offset + map_size < named_entry->size) {
4375 /*
4376 * Trim in back, from
4377 * "offset + map_size" to
4378 * "named_entry->size".
4379 */
4380 vm_map_remove(target_map,
4381 (map_addr +
4382 offset + map_size),
4383 (map_addr +
4384 named_entry->size),
4385 VM_MAP_REMOVE_NO_FLAGS);
4386 }
4387 }
4388 named_entry_unlock(named_entry);
4389
4390 if (kr != KERN_SUCCESS) {
4391 if (!(flags & VM_FLAGS_OVERWRITE)) {
4392 /* deallocate the contiguous range */
4393 (void) vm_deallocate(target_map,
4394 map_addr,
4395 map_size);
4396 }
4397 }
4398
4399 return kr;
4400 } else {
4401 unsigned int access;
4402 vm_prot_t protections;
4403 unsigned int wimg_mode;
4404
4405 /* we are mapping a VM object */
4406
4407 protections = named_entry->protection & VM_PROT_ALL;
4408 access = GET_MAP_MEM(named_entry->protection);
4409
4410 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4411 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4412 offset_in_mapping = offset - vm_object_trunc_page(offset);
4413 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4414 offset_in_mapping &= ~((signed)(0xFFF));
4415 }
4416 offset = vm_object_trunc_page(offset);
4417 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4418 }
4419
4420 object = named_entry->backing.object;
4421 assert(object != VM_OBJECT_NULL);
4422 vm_object_lock(object);
4423 named_entry_unlock(named_entry);
4424
4425 vm_object_reference_locked(object);
4426
4427 wimg_mode = object->wimg_bits;
4428 vm_prot_to_wimg(access, &wimg_mode);
4429 if (object->wimg_bits != wimg_mode) {
4430 vm_object_change_wimg_mode(object, wimg_mode);
4431 }
4432
4433 vm_object_unlock(object);
4434 }
4435 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4436 /*
4437 * JMM - This is temporary until we unify named entries
4438 * and raw memory objects.
4439 *
4440 * Detected fake ip_kotype for a memory object. In
4441 * this case, the port isn't really a port at all, but
4442 * instead is just a raw memory object.
4443 */
4444 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4445 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4446 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4447 }
4448
4449 object = memory_object_to_vm_object((memory_object_t)port);
4450 if (object == VM_OBJECT_NULL) {
4451 return KERN_INVALID_OBJECT;
4452 }
4453 vm_object_reference(object);
4454
4455 /* wait for object (if any) to be ready */
4456 if (object != VM_OBJECT_NULL) {
4457 if (object == kernel_object) {
4458 printf("Warning: Attempt to map kernel object"
4459 " by a non-private kernel entity\n");
4460 return KERN_INVALID_OBJECT;
4461 }
4462 if (!object->pager_ready) {
4463 vm_object_lock(object);
4464
4465 while (!object->pager_ready) {
4466 vm_object_wait(object,
4467 VM_OBJECT_EVENT_PAGER_READY,
4468 THREAD_UNINT);
4469 vm_object_lock(object);
4470 }
4471 vm_object_unlock(object);
4472 }
4473 }
4474 } else {
4475 return KERN_INVALID_OBJECT;
4476 }
4477
4478 if (object != VM_OBJECT_NULL &&
4479 object->named &&
4480 object->pager != MEMORY_OBJECT_NULL &&
4481 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4482 memory_object_t pager;
4483 vm_prot_t pager_prot;
4484 kern_return_t kr;
4485
4486 /*
4487 * For "named" VM objects, let the pager know that the
4488 * memory object is being mapped. Some pagers need to keep
4489 * track of this, to know when they can reclaim the memory
4490 * object, for example.
4491 * VM calls memory_object_map() for each mapping (specifying
4492 * the protection of each mapping) and calls
4493 * memory_object_last_unmap() when all the mappings are gone.
4494 */
4495 pager_prot = max_protection;
4496 if (copy) {
4497 /*
4498 * Copy-On-Write mapping: won't modify the
4499 * memory object.
4500 */
4501 pager_prot &= ~VM_PROT_WRITE;
4502 }
4503 vm_object_lock(object);
4504 pager = object->pager;
4505 if (object->named &&
4506 pager != MEMORY_OBJECT_NULL &&
4507 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4508 assert(object->pager_ready);
4509 vm_object_mapping_wait(object, THREAD_UNINT);
4510 vm_object_mapping_begin(object);
4511 vm_object_unlock(object);
4512
4513 kr = memory_object_map(pager, pager_prot);
4514 assert(kr == KERN_SUCCESS);
4515
4516 vm_object_lock(object);
4517 vm_object_mapping_end(object);
4518 }
4519 vm_object_unlock(object);
4520 }
4521
4522 /*
4523 * Perform the copy if requested
4524 */
4525
4526 if (copy) {
4527 vm_object_t new_object;
4528 vm_object_offset_t new_offset;
4529
4530 result = vm_object_copy_strategically(object, offset,
4531 map_size,
4532 &new_object, &new_offset,
4533 &copy);
4534
4535
4536 if (result == KERN_MEMORY_RESTART_COPY) {
4537 boolean_t success;
4538 boolean_t src_needs_copy;
4539
4540 /*
4541 * XXX
4542 * We currently ignore src_needs_copy.
4543 * This really is the issue of how to make
4544 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4545 * non-kernel users to use. Solution forthcoming.
4546 * In the meantime, since we don't allow non-kernel
4547 * memory managers to specify symmetric copy,
4548 * we won't run into problems here.
4549 */
4550 new_object = object;
4551 new_offset = offset;
4552 success = vm_object_copy_quickly(&new_object,
4553 new_offset,
4554 map_size,
4555 &src_needs_copy,
4556 &copy);
4557 assert(success);
4558 result = KERN_SUCCESS;
4559 }
4560 /*
4561 * Throw away the reference to the
4562 * original object, as it won't be mapped.
4563 */
4564
4565 vm_object_deallocate(object);
4566
4567 if (result != KERN_SUCCESS) {
4568 return result;
4569 }
4570
4571 object = new_object;
4572 offset = new_offset;
4573 }
4574
4575 /*
4576 * If non-kernel users want to try to prefault pages, the mapping and prefault
4577 * needs to be atomic.
4578 */
4579 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4580 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4581
4582 #if __arm64__
4583 if (fourk) {
4584 /* map this object in a "4K" pager */
4585 result = vm_map_enter_fourk(target_map,
4586 &map_addr,
4587 map_size,
4588 (vm_map_offset_t) mask,
4589 flags,
4590 vmk_flags,
4591 tag,
4592 object,
4593 offset,
4594 copy,
4595 cur_protection,
4596 max_protection,
4597 inheritance);
4598 } else
4599 #endif /* __arm64__ */
4600 {
4601 result = vm_map_enter(target_map,
4602 &map_addr, map_size,
4603 (vm_map_offset_t)mask,
4604 flags,
4605 vmk_flags,
4606 tag,
4607 object, offset,
4608 copy,
4609 cur_protection, max_protection,
4610 inheritance);
4611 }
4612 if (result != KERN_SUCCESS) {
4613 vm_object_deallocate(object);
4614 }
4615
4616 /*
4617 * Try to prefault, and do not forget to release the vm map lock.
4618 */
4619 if (result == KERN_SUCCESS && try_prefault) {
4620 mach_vm_address_t va = map_addr;
4621 kern_return_t kr = KERN_SUCCESS;
4622 unsigned int i = 0;
4623 int pmap_options;
4624
4625 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4626 if (object->internal) {
4627 pmap_options |= PMAP_OPTIONS_INTERNAL;
4628 }
4629
4630 for (i = 0; i < page_list_count; ++i) {
4631 if (!UPL_VALID_PAGE(page_list, i)) {
4632 if (kernel_prefault) {
4633 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4634 result = KERN_MEMORY_ERROR;
4635 break;
4636 }
4637 } else {
4638 /*
4639 * If this function call failed, we should stop
4640 * trying to optimize, other calls are likely
4641 * going to fail too.
4642 *
4643 * We are not gonna report an error for such
4644 * failure though. That's an optimization, not
4645 * something critical.
4646 */
4647 kr = pmap_enter_options(target_map->pmap,
4648 va, UPL_PHYS_PAGE(page_list, i),
4649 cur_protection, VM_PROT_NONE,
4650 0, TRUE, pmap_options, NULL);
4651 if (kr != KERN_SUCCESS) {
4652 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4653 if (kernel_prefault) {
4654 result = kr;
4655 }
4656 break;
4657 }
4658 OSIncrementAtomic64(&vm_prefault_nb_pages);
4659 }
4660
4661 /* Next virtual address */
4662 va += PAGE_SIZE;
4663 }
4664 if (vmk_flags.vmkf_keep_map_locked) {
4665 vm_map_unlock(target_map);
4666 }
4667 }
4668
4669 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4670 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4671 *address = map_addr + offset_in_mapping;
4672 } else {
4673 *address = map_addr;
4674 }
4675 return result;
4676 }
4677
4678 kern_return_t
4679 vm_map_enter_mem_object(
4680 vm_map_t target_map,
4681 vm_map_offset_t *address,
4682 vm_map_size_t initial_size,
4683 vm_map_offset_t mask,
4684 int flags,
4685 vm_map_kernel_flags_t vmk_flags,
4686 vm_tag_t tag,
4687 ipc_port_t port,
4688 vm_object_offset_t offset,
4689 boolean_t copy,
4690 vm_prot_t cur_protection,
4691 vm_prot_t max_protection,
4692 vm_inherit_t inheritance)
4693 {
4694 kern_return_t ret;
4695
4696 ret = vm_map_enter_mem_object_helper(target_map,
4697 address,
4698 initial_size,
4699 mask,
4700 flags,
4701 vmk_flags,
4702 tag,
4703 port,
4704 offset,
4705 copy,
4706 cur_protection,
4707 max_protection,
4708 inheritance,
4709 NULL,
4710 0);
4711
4712 #if KASAN
4713 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4714 kasan_notify_address(*address, initial_size);
4715 }
4716 #endif
4717
4718 return ret;
4719 }
4720
4721 kern_return_t
4722 vm_map_enter_mem_object_prefault(
4723 vm_map_t target_map,
4724 vm_map_offset_t *address,
4725 vm_map_size_t initial_size,
4726 vm_map_offset_t mask,
4727 int flags,
4728 vm_map_kernel_flags_t vmk_flags,
4729 vm_tag_t tag,
4730 ipc_port_t port,
4731 vm_object_offset_t offset,
4732 vm_prot_t cur_protection,
4733 vm_prot_t max_protection,
4734 upl_page_list_ptr_t page_list,
4735 unsigned int page_list_count)
4736 {
4737 kern_return_t ret;
4738
4739 ret = vm_map_enter_mem_object_helper(target_map,
4740 address,
4741 initial_size,
4742 mask,
4743 flags,
4744 vmk_flags,
4745 tag,
4746 port,
4747 offset,
4748 FALSE,
4749 cur_protection,
4750 max_protection,
4751 VM_INHERIT_DEFAULT,
4752 page_list,
4753 page_list_count);
4754
4755 #if KASAN
4756 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4757 kasan_notify_address(*address, initial_size);
4758 }
4759 #endif
4760
4761 return ret;
4762 }
4763
4764
4765 kern_return_t
4766 vm_map_enter_mem_object_control(
4767 vm_map_t target_map,
4768 vm_map_offset_t *address,
4769 vm_map_size_t initial_size,
4770 vm_map_offset_t mask,
4771 int flags,
4772 vm_map_kernel_flags_t vmk_flags,
4773 vm_tag_t tag,
4774 memory_object_control_t control,
4775 vm_object_offset_t offset,
4776 boolean_t copy,
4777 vm_prot_t cur_protection,
4778 vm_prot_t max_protection,
4779 vm_inherit_t inheritance)
4780 {
4781 vm_map_address_t map_addr;
4782 vm_map_size_t map_size;
4783 vm_object_t object;
4784 vm_object_size_t size;
4785 kern_return_t result;
4786 memory_object_t pager;
4787 vm_prot_t pager_prot;
4788 kern_return_t kr;
4789 #if __arm64__
4790 boolean_t fourk = vmk_flags.vmkf_fourk;
4791 #endif /* __arm64__ */
4792
4793 /*
4794 * Check arguments for validity
4795 */
4796 if ((target_map == VM_MAP_NULL) ||
4797 (cur_protection & ~VM_PROT_ALL) ||
4798 (max_protection & ~VM_PROT_ALL) ||
4799 (inheritance > VM_INHERIT_LAST_VALID) ||
4800 initial_size == 0) {
4801 return KERN_INVALID_ARGUMENT;
4802 }
4803
4804 #if __arm64__
4805 if (fourk) {
4806 map_addr = vm_map_trunc_page(*address,
4807 FOURK_PAGE_MASK);
4808 map_size = vm_map_round_page(initial_size,
4809 FOURK_PAGE_MASK);
4810 } else
4811 #endif /* __arm64__ */
4812 {
4813 map_addr = vm_map_trunc_page(*address,
4814 VM_MAP_PAGE_MASK(target_map));
4815 map_size = vm_map_round_page(initial_size,
4816 VM_MAP_PAGE_MASK(target_map));
4817 }
4818 size = vm_object_round_page(initial_size);
4819
4820 object = memory_object_control_to_vm_object(control);
4821
4822 if (object == VM_OBJECT_NULL) {
4823 return KERN_INVALID_OBJECT;
4824 }
4825
4826 if (object == kernel_object) {
4827 printf("Warning: Attempt to map kernel object"
4828 " by a non-private kernel entity\n");
4829 return KERN_INVALID_OBJECT;
4830 }
4831
4832 vm_object_lock(object);
4833 object->ref_count++;
4834 vm_object_res_reference(object);
4835
4836 /*
4837 * For "named" VM objects, let the pager know that the
4838 * memory object is being mapped. Some pagers need to keep
4839 * track of this, to know when they can reclaim the memory
4840 * object, for example.
4841 * VM calls memory_object_map() for each mapping (specifying
4842 * the protection of each mapping) and calls
4843 * memory_object_last_unmap() when all the mappings are gone.
4844 */
4845 pager_prot = max_protection;
4846 if (copy) {
4847 pager_prot &= ~VM_PROT_WRITE;
4848 }
4849 pager = object->pager;
4850 if (object->named &&
4851 pager != MEMORY_OBJECT_NULL &&
4852 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4853 assert(object->pager_ready);
4854 vm_object_mapping_wait(object, THREAD_UNINT);
4855 vm_object_mapping_begin(object);
4856 vm_object_unlock(object);
4857
4858 kr = memory_object_map(pager, pager_prot);
4859 assert(kr == KERN_SUCCESS);
4860
4861 vm_object_lock(object);
4862 vm_object_mapping_end(object);
4863 }
4864 vm_object_unlock(object);
4865
4866 /*
4867 * Perform the copy if requested
4868 */
4869
4870 if (copy) {
4871 vm_object_t new_object;
4872 vm_object_offset_t new_offset;
4873
4874 result = vm_object_copy_strategically(object, offset, size,
4875 &new_object, &new_offset,
4876 &copy);
4877
4878
4879 if (result == KERN_MEMORY_RESTART_COPY) {
4880 boolean_t success;
4881 boolean_t src_needs_copy;
4882
4883 /*
4884 * XXX
4885 * We currently ignore src_needs_copy.
4886 * This really is the issue of how to make
4887 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4888 * non-kernel users to use. Solution forthcoming.
4889 * In the meantime, since we don't allow non-kernel
4890 * memory managers to specify symmetric copy,
4891 * we won't run into problems here.
4892 */
4893 new_object = object;
4894 new_offset = offset;
4895 success = vm_object_copy_quickly(&new_object,
4896 new_offset, size,
4897 &src_needs_copy,
4898 &copy);
4899 assert(success);
4900 result = KERN_SUCCESS;
4901 }
4902 /*
4903 * Throw away the reference to the
4904 * original object, as it won't be mapped.
4905 */
4906
4907 vm_object_deallocate(object);
4908
4909 if (result != KERN_SUCCESS) {
4910 return result;
4911 }
4912
4913 object = new_object;
4914 offset = new_offset;
4915 }
4916
4917 #if __arm64__
4918 if (fourk) {
4919 result = vm_map_enter_fourk(target_map,
4920 &map_addr,
4921 map_size,
4922 (vm_map_offset_t)mask,
4923 flags,
4924 vmk_flags,
4925 tag,
4926 object, offset,
4927 copy,
4928 cur_protection, max_protection,
4929 inheritance);
4930 } else
4931 #endif /* __arm64__ */
4932 {
4933 result = vm_map_enter(target_map,
4934 &map_addr, map_size,
4935 (vm_map_offset_t)mask,
4936 flags,
4937 vmk_flags,
4938 tag,
4939 object, offset,
4940 copy,
4941 cur_protection, max_protection,
4942 inheritance);
4943 }
4944 if (result != KERN_SUCCESS) {
4945 vm_object_deallocate(object);
4946 }
4947 *address = map_addr;
4948
4949 return result;
4950 }
4951
4952
4953 #if VM_CPM
4954
4955 #ifdef MACH_ASSERT
4956 extern pmap_paddr_t avail_start, avail_end;
4957 #endif
4958
4959 /*
4960 * Allocate memory in the specified map, with the caveat that
4961 * the memory is physically contiguous. This call may fail
4962 * if the system can't find sufficient contiguous memory.
4963 * This call may cause or lead to heart-stopping amounts of
4964 * paging activity.
4965 *
4966 * Memory obtained from this call should be freed in the
4967 * normal way, viz., via vm_deallocate.
4968 */
4969 kern_return_t
4970 vm_map_enter_cpm(
4971 vm_map_t map,
4972 vm_map_offset_t *addr,
4973 vm_map_size_t size,
4974 int flags)
4975 {
4976 vm_object_t cpm_obj;
4977 pmap_t pmap;
4978 vm_page_t m, pages;
4979 kern_return_t kr;
4980 vm_map_offset_t va, start, end, offset;
4981 #if MACH_ASSERT
4982 vm_map_offset_t prev_addr = 0;
4983 #endif /* MACH_ASSERT */
4984
4985 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4986 vm_tag_t tag;
4987
4988 VM_GET_FLAGS_ALIAS(flags, tag);
4989
4990 if (size == 0) {
4991 *addr = 0;
4992 return KERN_SUCCESS;
4993 }
4994 if (anywhere) {
4995 *addr = vm_map_min(map);
4996 } else {
4997 *addr = vm_map_trunc_page(*addr,
4998 VM_MAP_PAGE_MASK(map));
4999 }
5000 size = vm_map_round_page(size,
5001 VM_MAP_PAGE_MASK(map));
5002
5003 /*
5004 * LP64todo - cpm_allocate should probably allow
5005 * allocations of >4GB, but not with the current
5006 * algorithm, so just cast down the size for now.
5007 */
5008 if (size > VM_MAX_ADDRESS) {
5009 return KERN_RESOURCE_SHORTAGE;
5010 }
5011 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5012 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5013 return kr;
5014 }
5015
5016 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5017 assert(cpm_obj != VM_OBJECT_NULL);
5018 assert(cpm_obj->internal);
5019 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5020 assert(cpm_obj->can_persist == FALSE);
5021 assert(cpm_obj->pager_created == FALSE);
5022 assert(cpm_obj->pageout == FALSE);
5023 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5024
5025 /*
5026 * Insert pages into object.
5027 */
5028
5029 vm_object_lock(cpm_obj);
5030 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5031 m = pages;
5032 pages = NEXT_PAGE(m);
5033 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5034
5035 assert(!m->vmp_gobbled);
5036 assert(!m->vmp_wanted);
5037 assert(!m->vmp_pageout);
5038 assert(!m->vmp_tabled);
5039 assert(VM_PAGE_WIRED(m));
5040 assert(m->vmp_busy);
5041 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5042
5043 m->vmp_busy = FALSE;
5044 vm_page_insert(m, cpm_obj, offset);
5045 }
5046 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5047 vm_object_unlock(cpm_obj);
5048
5049 /*
5050 * Hang onto a reference on the object in case a
5051 * multi-threaded application for some reason decides
5052 * to deallocate the portion of the address space into
5053 * which we will insert this object.
5054 *
5055 * Unfortunately, we must insert the object now before
5056 * we can talk to the pmap module about which addresses
5057 * must be wired down. Hence, the race with a multi-
5058 * threaded app.
5059 */
5060 vm_object_reference(cpm_obj);
5061
5062 /*
5063 * Insert object into map.
5064 */
5065
5066 kr = vm_map_enter(
5067 map,
5068 addr,
5069 size,
5070 (vm_map_offset_t)0,
5071 flags,
5072 VM_MAP_KERNEL_FLAGS_NONE,
5073 cpm_obj,
5074 (vm_object_offset_t)0,
5075 FALSE,
5076 VM_PROT_ALL,
5077 VM_PROT_ALL,
5078 VM_INHERIT_DEFAULT);
5079
5080 if (kr != KERN_SUCCESS) {
5081 /*
5082 * A CPM object doesn't have can_persist set,
5083 * so all we have to do is deallocate it to
5084 * free up these pages.
5085 */
5086 assert(cpm_obj->pager_created == FALSE);
5087 assert(cpm_obj->can_persist == FALSE);
5088 assert(cpm_obj->pageout == FALSE);
5089 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5090 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5091 vm_object_deallocate(cpm_obj); /* kill creation ref */
5092 }
5093
5094 /*
5095 * Inform the physical mapping system that the
5096 * range of addresses may not fault, so that
5097 * page tables and such can be locked down as well.
5098 */
5099 start = *addr;
5100 end = start + size;
5101 pmap = vm_map_pmap(map);
5102 pmap_pageable(pmap, start, end, FALSE);
5103
5104 /*
5105 * Enter each page into the pmap, to avoid faults.
5106 * Note that this loop could be coded more efficiently,
5107 * if the need arose, rather than looking up each page
5108 * again.
5109 */
5110 for (offset = 0, va = start; offset < size;
5111 va += PAGE_SIZE, offset += PAGE_SIZE) {
5112 int type_of_fault;
5113
5114 vm_object_lock(cpm_obj);
5115 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5116 assert(m != VM_PAGE_NULL);
5117
5118 vm_page_zero_fill(m);
5119
5120 type_of_fault = DBG_ZERO_FILL_FAULT;
5121
5122 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5123 VM_PAGE_WIRED(m),
5124 FALSE, /* change_wiring */
5125 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5126 FALSE, /* no_cache */
5127 FALSE, /* cs_bypass */
5128 0, /* user_tag */
5129 0, /* pmap_options */
5130 NULL, /* need_retry */
5131 &type_of_fault);
5132
5133 vm_object_unlock(cpm_obj);
5134 }
5135
5136 #if MACH_ASSERT
5137 /*
5138 * Verify ordering in address space.
5139 */
5140 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5141 vm_object_lock(cpm_obj);
5142 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5143 vm_object_unlock(cpm_obj);
5144 if (m == VM_PAGE_NULL) {
5145 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5146 cpm_obj, (uint64_t)offset);
5147 }
5148 assert(m->vmp_tabled);
5149 assert(!m->vmp_busy);
5150 assert(!m->vmp_wanted);
5151 assert(!m->vmp_fictitious);
5152 assert(!m->vmp_private);
5153 assert(!m->vmp_absent);
5154 assert(!m->vmp_error);
5155 assert(!m->vmp_cleaning);
5156 assert(!m->vmp_laundry);
5157 assert(!m->vmp_precious);
5158 assert(!m->vmp_clustered);
5159 if (offset != 0) {
5160 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5161 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5162 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5163 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5164 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5165 panic("vm_allocate_cpm: pages not contig!");
5166 }
5167 }
5168 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5169 }
5170 #endif /* MACH_ASSERT */
5171
5172 vm_object_deallocate(cpm_obj); /* kill extra ref */
5173
5174 return kr;
5175 }
5176
5177
5178 #else /* VM_CPM */
5179
5180 /*
5181 * Interface is defined in all cases, but unless the kernel
5182 * is built explicitly for this option, the interface does
5183 * nothing.
5184 */
5185
5186 kern_return_t
5187 vm_map_enter_cpm(
5188 __unused vm_map_t map,
5189 __unused vm_map_offset_t *addr,
5190 __unused vm_map_size_t size,
5191 __unused int flags)
5192 {
5193 return KERN_FAILURE;
5194 }
5195 #endif /* VM_CPM */
5196
5197 /* Not used without nested pmaps */
5198 #ifndef NO_NESTED_PMAP
5199 /*
5200 * Clip and unnest a portion of a nested submap mapping.
5201 */
5202
5203
5204 static void
5205 vm_map_clip_unnest(
5206 vm_map_t map,
5207 vm_map_entry_t entry,
5208 vm_map_offset_t start_unnest,
5209 vm_map_offset_t end_unnest)
5210 {
5211 vm_map_offset_t old_start_unnest = start_unnest;
5212 vm_map_offset_t old_end_unnest = end_unnest;
5213
5214 assert(entry->is_sub_map);
5215 assert(VME_SUBMAP(entry) != NULL);
5216 assert(entry->use_pmap);
5217
5218 /*
5219 * Query the platform for the optimal unnest range.
5220 * DRK: There's some duplication of effort here, since
5221 * callers may have adjusted the range to some extent. This
5222 * routine was introduced to support 1GiB subtree nesting
5223 * for x86 platforms, which can also nest on 2MiB boundaries
5224 * depending on size/alignment.
5225 */
5226 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5227 assert(VME_SUBMAP(entry)->is_nested_map);
5228 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5229 log_unnest_badness(map,
5230 old_start_unnest,
5231 old_end_unnest,
5232 VME_SUBMAP(entry)->is_nested_map,
5233 (entry->vme_start +
5234 VME_SUBMAP(entry)->lowest_unnestable_start -
5235 VME_OFFSET(entry)));
5236 }
5237
5238 if (entry->vme_start > start_unnest ||
5239 entry->vme_end < end_unnest) {
5240 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5241 "bad nested entry: start=0x%llx end=0x%llx\n",
5242 (long long)start_unnest, (long long)end_unnest,
5243 (long long)entry->vme_start, (long long)entry->vme_end);
5244 }
5245
5246 if (start_unnest > entry->vme_start) {
5247 _vm_map_clip_start(&map->hdr,
5248 entry,
5249 start_unnest);
5250 if (map->holelistenabled) {
5251 vm_map_store_update_first_free(map, NULL, FALSE);
5252 } else {
5253 vm_map_store_update_first_free(map, map->first_free, FALSE);
5254 }
5255 }
5256 if (entry->vme_end > end_unnest) {
5257 _vm_map_clip_end(&map->hdr,
5258 entry,
5259 end_unnest);
5260 if (map->holelistenabled) {
5261 vm_map_store_update_first_free(map, NULL, FALSE);
5262 } else {
5263 vm_map_store_update_first_free(map, map->first_free, FALSE);
5264 }
5265 }
5266
5267 pmap_unnest(map->pmap,
5268 entry->vme_start,
5269 entry->vme_end - entry->vme_start);
5270 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5271 /* clean up parent map/maps */
5272 vm_map_submap_pmap_clean(
5273 map, entry->vme_start,
5274 entry->vme_end,
5275 VME_SUBMAP(entry),
5276 VME_OFFSET(entry));
5277 }
5278 entry->use_pmap = FALSE;
5279 if ((map->pmap != kernel_pmap) &&
5280 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5281 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5282 }
5283 }
5284 #endif /* NO_NESTED_PMAP */
5285
5286 /*
5287 * vm_map_clip_start: [ internal use only ]
5288 *
5289 * Asserts that the given entry begins at or after
5290 * the specified address; if necessary,
5291 * it splits the entry into two.
5292 */
5293 void
5294 vm_map_clip_start(
5295 vm_map_t map,
5296 vm_map_entry_t entry,
5297 vm_map_offset_t startaddr)
5298 {
5299 #ifndef NO_NESTED_PMAP
5300 if (entry->is_sub_map &&
5301 entry->use_pmap &&
5302 startaddr >= entry->vme_start) {
5303 vm_map_offset_t start_unnest, end_unnest;
5304
5305 /*
5306 * Make sure "startaddr" is no longer in a nested range
5307 * before we clip. Unnest only the minimum range the platform
5308 * can handle.
5309 * vm_map_clip_unnest may perform additional adjustments to
5310 * the unnest range.
5311 */
5312 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5313 end_unnest = start_unnest + pmap_nesting_size_min;
5314 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5315 }
5316 #endif /* NO_NESTED_PMAP */
5317 if (startaddr > entry->vme_start) {
5318 if (VME_OBJECT(entry) &&
5319 !entry->is_sub_map &&
5320 VME_OBJECT(entry)->phys_contiguous) {
5321 pmap_remove(map->pmap,
5322 (addr64_t)(entry->vme_start),
5323 (addr64_t)(entry->vme_end));
5324 }
5325 if (entry->vme_atomic) {
5326 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5327 }
5328
5329 DTRACE_VM5(
5330 vm_map_clip_start,
5331 vm_map_t, map,
5332 vm_map_offset_t, entry->vme_start,
5333 vm_map_offset_t, entry->vme_end,
5334 vm_map_offset_t, startaddr,
5335 int, VME_ALIAS(entry));
5336
5337 _vm_map_clip_start(&map->hdr, entry, startaddr);
5338 if (map->holelistenabled) {
5339 vm_map_store_update_first_free(map, NULL, FALSE);
5340 } else {
5341 vm_map_store_update_first_free(map, map->first_free, FALSE);
5342 }
5343 }
5344 }
5345
5346
5347 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5348 MACRO_BEGIN \
5349 if ((startaddr) > (entry)->vme_start) \
5350 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5351 MACRO_END
5352
5353 /*
5354 * This routine is called only when it is known that
5355 * the entry must be split.
5356 */
5357 static void
5358 _vm_map_clip_start(
5359 struct vm_map_header *map_header,
5360 vm_map_entry_t entry,
5361 vm_map_offset_t start)
5362 {
5363 vm_map_entry_t new_entry;
5364
5365 /*
5366 * Split off the front portion --
5367 * note that we must insert the new
5368 * entry BEFORE this one, so that
5369 * this entry has the specified starting
5370 * address.
5371 */
5372
5373 if (entry->map_aligned) {
5374 assert(VM_MAP_PAGE_ALIGNED(start,
5375 VM_MAP_HDR_PAGE_MASK(map_header)));
5376 }
5377
5378 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5379 vm_map_entry_copy_full(new_entry, entry);
5380
5381 new_entry->vme_end = start;
5382 assert(new_entry->vme_start < new_entry->vme_end);
5383 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5384 assert(start < entry->vme_end);
5385 entry->vme_start = start;
5386
5387 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5388
5389 if (entry->is_sub_map) {
5390 vm_map_reference(VME_SUBMAP(new_entry));
5391 } else {
5392 vm_object_reference(VME_OBJECT(new_entry));
5393 }
5394 }
5395
5396
5397 /*
5398 * vm_map_clip_end: [ internal use only ]
5399 *
5400 * Asserts that the given entry ends at or before
5401 * the specified address; if necessary,
5402 * it splits the entry into two.
5403 */
5404 void
5405 vm_map_clip_end(
5406 vm_map_t map,
5407 vm_map_entry_t entry,
5408 vm_map_offset_t endaddr)
5409 {
5410 if (endaddr > entry->vme_end) {
5411 /*
5412 * Within the scope of this clipping, limit "endaddr" to
5413 * the end of this map entry...
5414 */
5415 endaddr = entry->vme_end;
5416 }
5417 #ifndef NO_NESTED_PMAP
5418 if (entry->is_sub_map && entry->use_pmap) {
5419 vm_map_offset_t start_unnest, end_unnest;
5420
5421 /*
5422 * Make sure the range between the start of this entry and
5423 * the new "endaddr" is no longer nested before we clip.
5424 * Unnest only the minimum range the platform can handle.
5425 * vm_map_clip_unnest may perform additional adjustments to
5426 * the unnest range.
5427 */
5428 start_unnest = entry->vme_start;
5429 end_unnest =
5430 (endaddr + pmap_nesting_size_min - 1) &
5431 ~(pmap_nesting_size_min - 1);
5432 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5433 }
5434 #endif /* NO_NESTED_PMAP */
5435 if (endaddr < entry->vme_end) {
5436 if (VME_OBJECT(entry) &&
5437 !entry->is_sub_map &&
5438 VME_OBJECT(entry)->phys_contiguous) {
5439 pmap_remove(map->pmap,
5440 (addr64_t)(entry->vme_start),
5441 (addr64_t)(entry->vme_end));
5442 }
5443 if (entry->vme_atomic) {
5444 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5445 }
5446 DTRACE_VM5(
5447 vm_map_clip_end,
5448 vm_map_t, map,
5449 vm_map_offset_t, entry->vme_start,
5450 vm_map_offset_t, entry->vme_end,
5451 vm_map_offset_t, endaddr,
5452 int, VME_ALIAS(entry));
5453
5454 _vm_map_clip_end(&map->hdr, entry, endaddr);
5455 if (map->holelistenabled) {
5456 vm_map_store_update_first_free(map, NULL, FALSE);
5457 } else {
5458 vm_map_store_update_first_free(map, map->first_free, FALSE);
5459 }
5460 }
5461 }
5462
5463
5464 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5465 MACRO_BEGIN \
5466 if ((endaddr) < (entry)->vme_end) \
5467 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5468 MACRO_END
5469
5470 /*
5471 * This routine is called only when it is known that
5472 * the entry must be split.
5473 */
5474 static void
5475 _vm_map_clip_end(
5476 struct vm_map_header *map_header,
5477 vm_map_entry_t entry,
5478 vm_map_offset_t end)
5479 {
5480 vm_map_entry_t new_entry;
5481
5482 /*
5483 * Create a new entry and insert it
5484 * AFTER the specified entry
5485 */
5486
5487 if (entry->map_aligned) {
5488 assert(VM_MAP_PAGE_ALIGNED(end,
5489 VM_MAP_HDR_PAGE_MASK(map_header)));
5490 }
5491
5492 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5493 vm_map_entry_copy_full(new_entry, entry);
5494
5495 assert(entry->vme_start < end);
5496 new_entry->vme_start = entry->vme_end = end;
5497 VME_OFFSET_SET(new_entry,
5498 VME_OFFSET(new_entry) + (end - entry->vme_start));
5499 assert(new_entry->vme_start < new_entry->vme_end);
5500
5501 _vm_map_store_entry_link(map_header, entry, new_entry);
5502
5503 if (entry->is_sub_map) {
5504 vm_map_reference(VME_SUBMAP(new_entry));
5505 } else {
5506 vm_object_reference(VME_OBJECT(new_entry));
5507 }
5508 }
5509
5510
5511 /*
5512 * VM_MAP_RANGE_CHECK: [ internal use only ]
5513 *
5514 * Asserts that the starting and ending region
5515 * addresses fall within the valid range of the map.
5516 */
5517 #define VM_MAP_RANGE_CHECK(map, start, end) \
5518 MACRO_BEGIN \
5519 if (start < vm_map_min(map)) \
5520 start = vm_map_min(map); \
5521 if (end > vm_map_max(map)) \
5522 end = vm_map_max(map); \
5523 if (start > end) \
5524 start = end; \
5525 MACRO_END
5526
5527 /*
5528 * vm_map_range_check: [ internal use only ]
5529 *
5530 * Check that the region defined by the specified start and
5531 * end addresses are wholly contained within a single map
5532 * entry or set of adjacent map entries of the spacified map,
5533 * i.e. the specified region contains no unmapped space.
5534 * If any or all of the region is unmapped, FALSE is returned.
5535 * Otherwise, TRUE is returned and if the output argument 'entry'
5536 * is not NULL it points to the map entry containing the start
5537 * of the region.
5538 *
5539 * The map is locked for reading on entry and is left locked.
5540 */
5541 static boolean_t
5542 vm_map_range_check(
5543 vm_map_t map,
5544 vm_map_offset_t start,
5545 vm_map_offset_t end,
5546 vm_map_entry_t *entry)
5547 {
5548 vm_map_entry_t cur;
5549 vm_map_offset_t prev;
5550
5551 /*
5552 * Basic sanity checks first
5553 */
5554 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5555 return FALSE;
5556 }
5557
5558 /*
5559 * Check first if the region starts within a valid
5560 * mapping for the map.
5561 */
5562 if (!vm_map_lookup_entry(map, start, &cur)) {
5563 return FALSE;
5564 }
5565
5566 /*
5567 * Optimize for the case that the region is contained
5568 * in a single map entry.
5569 */
5570 if (entry != (vm_map_entry_t *) NULL) {
5571 *entry = cur;
5572 }
5573 if (end <= cur->vme_end) {
5574 return TRUE;
5575 }
5576
5577 /*
5578 * If the region is not wholly contained within a
5579 * single entry, walk the entries looking for holes.
5580 */
5581 prev = cur->vme_end;
5582 cur = cur->vme_next;
5583 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5584 if (end <= cur->vme_end) {
5585 return TRUE;
5586 }
5587 prev = cur->vme_end;
5588 cur = cur->vme_next;
5589 }
5590 return FALSE;
5591 }
5592
5593 /*
5594 * vm_map_submap: [ kernel use only ]
5595 *
5596 * Mark the given range as handled by a subordinate map.
5597 *
5598 * This range must have been created with vm_map_find using
5599 * the vm_submap_object, and no other operations may have been
5600 * performed on this range prior to calling vm_map_submap.
5601 *
5602 * Only a limited number of operations can be performed
5603 * within this rage after calling vm_map_submap:
5604 * vm_fault
5605 * [Don't try vm_map_copyin!]
5606 *
5607 * To remove a submapping, one must first remove the
5608 * range from the superior map, and then destroy the
5609 * submap (if desired). [Better yet, don't try it.]
5610 */
5611 kern_return_t
5612 vm_map_submap(
5613 vm_map_t map,
5614 vm_map_offset_t start,
5615 vm_map_offset_t end,
5616 vm_map_t submap,
5617 vm_map_offset_t offset,
5618 #ifdef NO_NESTED_PMAP
5619 __unused
5620 #endif /* NO_NESTED_PMAP */
5621 boolean_t use_pmap)
5622 {
5623 vm_map_entry_t entry;
5624 kern_return_t result = KERN_INVALID_ARGUMENT;
5625 vm_object_t object;
5626
5627 vm_map_lock(map);
5628
5629 if (!vm_map_lookup_entry(map, start, &entry)) {
5630 entry = entry->vme_next;
5631 }
5632
5633 if (entry == vm_map_to_entry(map) ||
5634 entry->is_sub_map) {
5635 vm_map_unlock(map);
5636 return KERN_INVALID_ARGUMENT;
5637 }
5638
5639 vm_map_clip_start(map, entry, start);
5640 vm_map_clip_end(map, entry, end);
5641
5642 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5643 (!entry->is_sub_map) &&
5644 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5645 (object->resident_page_count == 0) &&
5646 (object->copy == VM_OBJECT_NULL) &&
5647 (object->shadow == VM_OBJECT_NULL) &&
5648 (!object->pager_created)) {
5649 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5650 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5651 vm_object_deallocate(object);
5652 entry->is_sub_map = TRUE;
5653 entry->use_pmap = FALSE;
5654 VME_SUBMAP_SET(entry, submap);
5655 vm_map_reference(submap);
5656 if (submap->mapped_in_other_pmaps == FALSE &&
5657 vm_map_pmap(submap) != PMAP_NULL &&
5658 vm_map_pmap(submap) != vm_map_pmap(map)) {
5659 /*
5660 * This submap is being mapped in a map
5661 * that uses a different pmap.
5662 * Set its "mapped_in_other_pmaps" flag
5663 * to indicate that we now need to
5664 * remove mappings from all pmaps rather
5665 * than just the submap's pmap.
5666 */
5667 submap->mapped_in_other_pmaps = TRUE;
5668 }
5669
5670 #ifndef NO_NESTED_PMAP
5671 if (use_pmap) {
5672 /* nest if platform code will allow */
5673 if (submap->pmap == NULL) {
5674 ledger_t ledger = map->pmap->ledger;
5675 submap->pmap = pmap_create_options(ledger,
5676 (vm_map_size_t) 0, 0);
5677 if (submap->pmap == PMAP_NULL) {
5678 vm_map_unlock(map);
5679 return KERN_NO_SPACE;
5680 }
5681 #if defined(__arm__) || defined(__arm64__)
5682 pmap_set_nested(submap->pmap);
5683 #endif
5684 }
5685 result = pmap_nest(map->pmap,
5686 (VME_SUBMAP(entry))->pmap,
5687 (addr64_t)start,
5688 (addr64_t)start,
5689 (uint64_t)(end - start));
5690 if (result) {
5691 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5692 }
5693 entry->use_pmap = TRUE;
5694 }
5695 #else /* NO_NESTED_PMAP */
5696 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5697 #endif /* NO_NESTED_PMAP */
5698 result = KERN_SUCCESS;
5699 }
5700 vm_map_unlock(map);
5701
5702 return result;
5703 }
5704
5705 /*
5706 * vm_map_protect:
5707 *
5708 * Sets the protection of the specified address
5709 * region in the target map. If "set_max" is
5710 * specified, the maximum protection is to be set;
5711 * otherwise, only the current protection is affected.
5712 */
5713 kern_return_t
5714 vm_map_protect(
5715 vm_map_t map,
5716 vm_map_offset_t start,
5717 vm_map_offset_t end,
5718 vm_prot_t new_prot,
5719 boolean_t set_max)
5720 {
5721 vm_map_entry_t current;
5722 vm_map_offset_t prev;
5723 vm_map_entry_t entry;
5724 vm_prot_t new_max;
5725 int pmap_options = 0;
5726 kern_return_t kr;
5727
5728 if (new_prot & VM_PROT_COPY) {
5729 vm_map_offset_t new_start;
5730 vm_prot_t cur_prot, max_prot;
5731 vm_map_kernel_flags_t kflags;
5732
5733 /* LP64todo - see below */
5734 if (start >= map->max_offset) {
5735 return KERN_INVALID_ADDRESS;
5736 }
5737
5738 #if VM_PROTECT_WX_FAIL
5739 if ((new_prot & VM_PROT_EXECUTE) &&
5740 map != kernel_map &&
5741 cs_process_enforcement(NULL)) {
5742 DTRACE_VM3(cs_wx,
5743 uint64_t, (uint64_t) start,
5744 uint64_t, (uint64_t) end,
5745 vm_prot_t, new_prot);
5746 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5747 proc_selfpid(),
5748 (current_task()->bsd_info
5749 ? proc_name_address(current_task()->bsd_info)
5750 : "?"),
5751 __FUNCTION__);
5752 return KERN_PROTECTION_FAILURE;
5753 }
5754 #endif /* VM_PROTECT_WX_FAIL */
5755
5756 /*
5757 * Let vm_map_remap_extract() know that it will need to:
5758 * + make a copy of the mapping
5759 * + add VM_PROT_WRITE to the max protections
5760 * + remove any protections that are no longer allowed from the
5761 * max protections (to avoid any WRITE/EXECUTE conflict, for
5762 * example).
5763 * Note that "max_prot" is an IN/OUT parameter only for this
5764 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5765 * only.
5766 */
5767 max_prot = new_prot & VM_PROT_ALL;
5768 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5769 kflags.vmkf_remap_prot_copy = TRUE;
5770 kflags.vmkf_overwrite_immutable = TRUE;
5771 new_start = start;
5772 kr = vm_map_remap(map,
5773 &new_start,
5774 end - start,
5775 0, /* mask */
5776 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5777 kflags,
5778 0,
5779 map,
5780 start,
5781 TRUE, /* copy-on-write remapping! */
5782 &cur_prot,
5783 &max_prot,
5784 VM_INHERIT_DEFAULT);
5785 if (kr != KERN_SUCCESS) {
5786 return kr;
5787 }
5788 new_prot &= ~VM_PROT_COPY;
5789 }
5790
5791 vm_map_lock(map);
5792
5793 /* LP64todo - remove this check when vm_map_commpage64()
5794 * no longer has to stuff in a map_entry for the commpage
5795 * above the map's max_offset.
5796 */
5797 if (start >= map->max_offset) {
5798 vm_map_unlock(map);
5799 return KERN_INVALID_ADDRESS;
5800 }
5801
5802 while (1) {
5803 /*
5804 * Lookup the entry. If it doesn't start in a valid
5805 * entry, return an error.
5806 */
5807 if (!vm_map_lookup_entry(map, start, &entry)) {
5808 vm_map_unlock(map);
5809 return KERN_INVALID_ADDRESS;
5810 }
5811
5812 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5813 start = SUPERPAGE_ROUND_DOWN(start);
5814 continue;
5815 }
5816 break;
5817 }
5818 if (entry->superpage_size) {
5819 end = SUPERPAGE_ROUND_UP(end);
5820 }
5821
5822 /*
5823 * Make a first pass to check for protection and address
5824 * violations.
5825 */
5826
5827 current = entry;
5828 prev = current->vme_start;
5829 while ((current != vm_map_to_entry(map)) &&
5830 (current->vme_start < end)) {
5831 /*
5832 * If there is a hole, return an error.
5833 */
5834 if (current->vme_start != prev) {
5835 vm_map_unlock(map);
5836 return KERN_INVALID_ADDRESS;
5837 }
5838
5839 new_max = current->max_protection;
5840 if ((new_prot & new_max) != new_prot) {
5841 vm_map_unlock(map);
5842 return KERN_PROTECTION_FAILURE;
5843 }
5844
5845 if ((new_prot & VM_PROT_WRITE) &&
5846 (new_prot & VM_PROT_EXECUTE) &&
5847 #if !CONFIG_EMBEDDED
5848 map != kernel_map &&
5849 cs_process_enforcement(NULL) &&
5850 #endif /* !CONFIG_EMBEDDED */
5851 !(current->used_for_jit)) {
5852 DTRACE_VM3(cs_wx,
5853 uint64_t, (uint64_t) current->vme_start,
5854 uint64_t, (uint64_t) current->vme_end,
5855 vm_prot_t, new_prot);
5856 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5857 proc_selfpid(),
5858 (current_task()->bsd_info
5859 ? proc_name_address(current_task()->bsd_info)
5860 : "?"),
5861 __FUNCTION__);
5862 new_prot &= ~VM_PROT_EXECUTE;
5863 #if VM_PROTECT_WX_FAIL
5864 vm_map_unlock(map);
5865 return KERN_PROTECTION_FAILURE;
5866 #endif /* VM_PROTECT_WX_FAIL */
5867 }
5868
5869 /*
5870 * If the task has requested executable lockdown,
5871 * deny both:
5872 * - adding executable protections OR
5873 * - adding write protections to an existing executable mapping.
5874 */
5875 if (map->map_disallow_new_exec == TRUE) {
5876 if ((new_prot & VM_PROT_EXECUTE) ||
5877 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5878 vm_map_unlock(map);
5879 return KERN_PROTECTION_FAILURE;
5880 }
5881 }
5882
5883 prev = current->vme_end;
5884 current = current->vme_next;
5885 }
5886
5887 #if __arm64__
5888 if (end > prev &&
5889 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5890 vm_map_entry_t prev_entry;
5891
5892 prev_entry = current->vme_prev;
5893 if (prev_entry != vm_map_to_entry(map) &&
5894 !prev_entry->map_aligned &&
5895 (vm_map_round_page(prev_entry->vme_end,
5896 VM_MAP_PAGE_MASK(map))
5897 == end)) {
5898 /*
5899 * The last entry in our range is not "map-aligned"
5900 * but it would have reached all the way to "end"
5901 * if it had been map-aligned, so this is not really
5902 * a hole in the range and we can proceed.
5903 */
5904 prev = end;
5905 }
5906 }
5907 #endif /* __arm64__ */
5908
5909 if (end > prev) {
5910 vm_map_unlock(map);
5911 return KERN_INVALID_ADDRESS;
5912 }
5913
5914 /*
5915 * Go back and fix up protections.
5916 * Clip to start here if the range starts within
5917 * the entry.
5918 */
5919
5920 current = entry;
5921 if (current != vm_map_to_entry(map)) {
5922 /* clip and unnest if necessary */
5923 vm_map_clip_start(map, current, start);
5924 }
5925
5926 while ((current != vm_map_to_entry(map)) &&
5927 (current->vme_start < end)) {
5928 vm_prot_t old_prot;
5929
5930 vm_map_clip_end(map, current, end);
5931
5932 if (current->is_sub_map) {
5933 /* clipping did unnest if needed */
5934 assert(!current->use_pmap);
5935 }
5936
5937 old_prot = current->protection;
5938
5939 if (set_max) {
5940 current->max_protection = new_prot;
5941 current->protection = new_prot & old_prot;
5942 } else {
5943 current->protection = new_prot;
5944 }
5945
5946 /*
5947 * Update physical map if necessary.
5948 * If the request is to turn off write protection,
5949 * we won't do it for real (in pmap). This is because
5950 * it would cause copy-on-write to fail. We've already
5951 * set, the new protection in the map, so if a
5952 * write-protect fault occurred, it will be fixed up
5953 * properly, COW or not.
5954 */
5955 if (current->protection != old_prot) {
5956 /* Look one level in we support nested pmaps */
5957 /* from mapped submaps which are direct entries */
5958 /* in our map */
5959
5960 vm_prot_t prot;
5961
5962 prot = current->protection;
5963 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5964 prot &= ~VM_PROT_WRITE;
5965 } else {
5966 assert(!VME_OBJECT(current)->code_signed);
5967 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5968 }
5969
5970 if (override_nx(map, VME_ALIAS(current)) && prot) {
5971 prot |= VM_PROT_EXECUTE;
5972 }
5973
5974 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5975 if (!(old_prot & VM_PROT_EXECUTE) &&
5976 (prot & VM_PROT_EXECUTE) &&
5977 panic_on_unsigned_execute &&
5978 (proc_selfcsflags() & CS_KILL)) {
5979 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5980 }
5981 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5982
5983 if (pmap_has_prot_policy(prot)) {
5984 if (current->wired_count) {
5985 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5986 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5987 }
5988
5989 /* If the pmap layer cares about this
5990 * protection type, force a fault for
5991 * each page so that vm_fault will
5992 * repopulate the page with the full
5993 * set of protections.
5994 */
5995 /*
5996 * TODO: We don't seem to need this,
5997 * but this is due to an internal
5998 * implementation detail of
5999 * pmap_protect. Do we want to rely
6000 * on this?
6001 */
6002 prot = VM_PROT_NONE;
6003 }
6004
6005 if (current->is_sub_map && current->use_pmap) {
6006 pmap_protect(VME_SUBMAP(current)->pmap,
6007 current->vme_start,
6008 current->vme_end,
6009 prot);
6010 } else {
6011 if (prot & VM_PROT_WRITE) {
6012 if (VME_OBJECT(current) == compressor_object) {
6013 /*
6014 * For write requests on the
6015 * compressor, we wil ask the
6016 * pmap layer to prevent us from
6017 * taking a write fault when we
6018 * attempt to access the mapping
6019 * next.
6020 */
6021 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6022 }
6023 }
6024
6025 pmap_protect_options(map->pmap,
6026 current->vme_start,
6027 current->vme_end,
6028 prot,
6029 pmap_options,
6030 NULL);
6031 }
6032 }
6033 current = current->vme_next;
6034 }
6035
6036 current = entry;
6037 while ((current != vm_map_to_entry(map)) &&
6038 (current->vme_start <= end)) {
6039 vm_map_simplify_entry(map, current);
6040 current = current->vme_next;
6041 }
6042
6043 vm_map_unlock(map);
6044 return KERN_SUCCESS;
6045 }
6046
6047 /*
6048 * vm_map_inherit:
6049 *
6050 * Sets the inheritance of the specified address
6051 * range in the target map. Inheritance
6052 * affects how the map will be shared with
6053 * child maps at the time of vm_map_fork.
6054 */
6055 kern_return_t
6056 vm_map_inherit(
6057 vm_map_t map,
6058 vm_map_offset_t start,
6059 vm_map_offset_t end,
6060 vm_inherit_t new_inheritance)
6061 {
6062 vm_map_entry_t entry;
6063 vm_map_entry_t temp_entry;
6064
6065 vm_map_lock(map);
6066
6067 VM_MAP_RANGE_CHECK(map, start, end);
6068
6069 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6070 entry = temp_entry;
6071 } else {
6072 temp_entry = temp_entry->vme_next;
6073 entry = temp_entry;
6074 }
6075
6076 /* first check entire range for submaps which can't support the */
6077 /* given inheritance. */
6078 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6079 if (entry->is_sub_map) {
6080 if (new_inheritance == VM_INHERIT_COPY) {
6081 vm_map_unlock(map);
6082 return KERN_INVALID_ARGUMENT;
6083 }
6084 }
6085
6086 entry = entry->vme_next;
6087 }
6088
6089 entry = temp_entry;
6090 if (entry != vm_map_to_entry(map)) {
6091 /* clip and unnest if necessary */
6092 vm_map_clip_start(map, entry, start);
6093 }
6094
6095 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6096 vm_map_clip_end(map, entry, end);
6097 if (entry->is_sub_map) {
6098 /* clip did unnest if needed */
6099 assert(!entry->use_pmap);
6100 }
6101
6102 entry->inheritance = new_inheritance;
6103
6104 entry = entry->vme_next;
6105 }
6106
6107 vm_map_unlock(map);
6108 return KERN_SUCCESS;
6109 }
6110
6111 /*
6112 * Update the accounting for the amount of wired memory in this map. If the user has
6113 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6114 */
6115
6116 static kern_return_t
6117 add_wire_counts(
6118 vm_map_t map,
6119 vm_map_entry_t entry,
6120 boolean_t user_wire)
6121 {
6122 vm_map_size_t size;
6123
6124 if (user_wire) {
6125 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6126
6127 /*
6128 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6129 * this map entry.
6130 */
6131
6132 if (entry->user_wired_count == 0) {
6133 size = entry->vme_end - entry->vme_start;
6134
6135 /*
6136 * Since this is the first time the user is wiring this map entry, check to see if we're
6137 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6138 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
6139 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6140 * limit, then we fail.
6141 */
6142
6143 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6144 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
6145 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) {
6146 return KERN_RESOURCE_SHORTAGE;
6147 }
6148
6149 /*
6150 * The first time the user wires an entry, we also increment the wired_count and add this to
6151 * the total that has been wired in the map.
6152 */
6153
6154 if (entry->wired_count >= MAX_WIRE_COUNT) {
6155 return KERN_FAILURE;
6156 }
6157
6158 entry->wired_count++;
6159 map->user_wire_size += size;
6160 }
6161
6162 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6163 return KERN_FAILURE;
6164 }
6165
6166 entry->user_wired_count++;
6167 } else {
6168 /*
6169 * The kernel's wiring the memory. Just bump the count and continue.
6170 */
6171
6172 if (entry->wired_count >= MAX_WIRE_COUNT) {
6173 panic("vm_map_wire: too many wirings");
6174 }
6175
6176 entry->wired_count++;
6177 }
6178
6179 return KERN_SUCCESS;
6180 }
6181
6182 /*
6183 * Update the memory wiring accounting now that the given map entry is being unwired.
6184 */
6185
6186 static void
6187 subtract_wire_counts(
6188 vm_map_t map,
6189 vm_map_entry_t entry,
6190 boolean_t user_wire)
6191 {
6192 if (user_wire) {
6193 /*
6194 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6195 */
6196
6197 if (entry->user_wired_count == 1) {
6198 /*
6199 * We're removing the last user wire reference. Decrement the wired_count and the total
6200 * user wired memory for this map.
6201 */
6202
6203 assert(entry->wired_count >= 1);
6204 entry->wired_count--;
6205 map->user_wire_size -= entry->vme_end - entry->vme_start;
6206 }
6207
6208 assert(entry->user_wired_count >= 1);
6209 entry->user_wired_count--;
6210 } else {
6211 /*
6212 * The kernel is unwiring the memory. Just update the count.
6213 */
6214
6215 assert(entry->wired_count >= 1);
6216 entry->wired_count--;
6217 }
6218 }
6219
6220 int cs_executable_wire = 0;
6221
6222 /*
6223 * vm_map_wire:
6224 *
6225 * Sets the pageability of the specified address range in the
6226 * target map as wired. Regions specified as not pageable require
6227 * locked-down physical memory and physical page maps. The
6228 * access_type variable indicates types of accesses that must not
6229 * generate page faults. This is checked against protection of
6230 * memory being locked-down.
6231 *
6232 * The map must not be locked, but a reference must remain to the
6233 * map throughout the call.
6234 */
6235 static kern_return_t
6236 vm_map_wire_nested(
6237 vm_map_t map,
6238 vm_map_offset_t start,
6239 vm_map_offset_t end,
6240 vm_prot_t caller_prot,
6241 vm_tag_t tag,
6242 boolean_t user_wire,
6243 pmap_t map_pmap,
6244 vm_map_offset_t pmap_addr,
6245 ppnum_t *physpage_p)
6246 {
6247 vm_map_entry_t entry;
6248 vm_prot_t access_type;
6249 struct vm_map_entry *first_entry, tmp_entry;
6250 vm_map_t real_map;
6251 vm_map_offset_t s, e;
6252 kern_return_t rc;
6253 boolean_t need_wakeup;
6254 boolean_t main_map = FALSE;
6255 wait_interrupt_t interruptible_state;
6256 thread_t cur_thread;
6257 unsigned int last_timestamp;
6258 vm_map_size_t size;
6259 boolean_t wire_and_extract;
6260
6261 access_type = (caller_prot & VM_PROT_ALL);
6262
6263 wire_and_extract = FALSE;
6264 if (physpage_p != NULL) {
6265 /*
6266 * The caller wants the physical page number of the
6267 * wired page. We return only one physical page number
6268 * so this works for only one page at a time.
6269 */
6270 if ((end - start) != PAGE_SIZE) {
6271 return KERN_INVALID_ARGUMENT;
6272 }
6273 wire_and_extract = TRUE;
6274 *physpage_p = 0;
6275 }
6276
6277 vm_map_lock(map);
6278 if (map_pmap == NULL) {
6279 main_map = TRUE;
6280 }
6281 last_timestamp = map->timestamp;
6282
6283 VM_MAP_RANGE_CHECK(map, start, end);
6284 assert(page_aligned(start));
6285 assert(page_aligned(end));
6286 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6287 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6288 if (start == end) {
6289 /* We wired what the caller asked for, zero pages */
6290 vm_map_unlock(map);
6291 return KERN_SUCCESS;
6292 }
6293
6294 need_wakeup = FALSE;
6295 cur_thread = current_thread();
6296
6297 s = start;
6298 rc = KERN_SUCCESS;
6299
6300 if (vm_map_lookup_entry(map, s, &first_entry)) {
6301 entry = first_entry;
6302 /*
6303 * vm_map_clip_start will be done later.
6304 * We don't want to unnest any nested submaps here !
6305 */
6306 } else {
6307 /* Start address is not in map */
6308 rc = KERN_INVALID_ADDRESS;
6309 goto done;
6310 }
6311
6312 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6313 /*
6314 * At this point, we have wired from "start" to "s".
6315 * We still need to wire from "s" to "end".
6316 *
6317 * "entry" hasn't been clipped, so it could start before "s"
6318 * and/or end after "end".
6319 */
6320
6321 /* "e" is how far we want to wire in this entry */
6322 e = entry->vme_end;
6323 if (e > end) {
6324 e = end;
6325 }
6326
6327 /*
6328 * If another thread is wiring/unwiring this entry then
6329 * block after informing other thread to wake us up.
6330 */
6331 if (entry->in_transition) {
6332 wait_result_t wait_result;
6333
6334 /*
6335 * We have not clipped the entry. Make sure that
6336 * the start address is in range so that the lookup
6337 * below will succeed.
6338 * "s" is the current starting point: we've already
6339 * wired from "start" to "s" and we still have
6340 * to wire from "s" to "end".
6341 */
6342
6343 entry->needs_wakeup = TRUE;
6344
6345 /*
6346 * wake up anybody waiting on entries that we have
6347 * already wired.
6348 */
6349 if (need_wakeup) {
6350 vm_map_entry_wakeup(map);
6351 need_wakeup = FALSE;
6352 }
6353 /*
6354 * User wiring is interruptible
6355 */
6356 wait_result = vm_map_entry_wait(map,
6357 (user_wire) ? THREAD_ABORTSAFE :
6358 THREAD_UNINT);
6359 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6360 /*
6361 * undo the wirings we have done so far
6362 * We do not clear the needs_wakeup flag,
6363 * because we cannot tell if we were the
6364 * only one waiting.
6365 */
6366 rc = KERN_FAILURE;
6367 goto done;
6368 }
6369
6370 /*
6371 * Cannot avoid a lookup here. reset timestamp.
6372 */
6373 last_timestamp = map->timestamp;
6374
6375 /*
6376 * The entry could have been clipped, look it up again.
6377 * Worse that can happen is, it may not exist anymore.
6378 */
6379 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6380 /*
6381 * User: undo everything upto the previous
6382 * entry. let vm_map_unwire worry about
6383 * checking the validity of the range.
6384 */
6385 rc = KERN_FAILURE;
6386 goto done;
6387 }
6388 entry = first_entry;
6389 continue;
6390 }
6391
6392 if (entry->is_sub_map) {
6393 vm_map_offset_t sub_start;
6394 vm_map_offset_t sub_end;
6395 vm_map_offset_t local_start;
6396 vm_map_offset_t local_end;
6397 pmap_t pmap;
6398
6399 if (wire_and_extract) {
6400 /*
6401 * Wiring would result in copy-on-write
6402 * which would not be compatible with
6403 * the sharing we have with the original
6404 * provider of this memory.
6405 */
6406 rc = KERN_INVALID_ARGUMENT;
6407 goto done;
6408 }
6409
6410 vm_map_clip_start(map, entry, s);
6411 vm_map_clip_end(map, entry, end);
6412
6413 sub_start = VME_OFFSET(entry);
6414 sub_end = entry->vme_end;
6415 sub_end += VME_OFFSET(entry) - entry->vme_start;
6416
6417 local_end = entry->vme_end;
6418 if (map_pmap == NULL) {
6419 vm_object_t object;
6420 vm_object_offset_t offset;
6421 vm_prot_t prot;
6422 boolean_t wired;
6423 vm_map_entry_t local_entry;
6424 vm_map_version_t version;
6425 vm_map_t lookup_map;
6426
6427 if (entry->use_pmap) {
6428 pmap = VME_SUBMAP(entry)->pmap;
6429 /* ppc implementation requires that */
6430 /* submaps pmap address ranges line */
6431 /* up with parent map */
6432 #ifdef notdef
6433 pmap_addr = sub_start;
6434 #endif
6435 pmap_addr = s;
6436 } else {
6437 pmap = map->pmap;
6438 pmap_addr = s;
6439 }
6440
6441 if (entry->wired_count) {
6442 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6443 goto done;
6444 }
6445
6446 /*
6447 * The map was not unlocked:
6448 * no need to goto re-lookup.
6449 * Just go directly to next entry.
6450 */
6451 entry = entry->vme_next;
6452 s = entry->vme_start;
6453 continue;
6454 }
6455
6456 /* call vm_map_lookup_locked to */
6457 /* cause any needs copy to be */
6458 /* evaluated */
6459 local_start = entry->vme_start;
6460 lookup_map = map;
6461 vm_map_lock_write_to_read(map);
6462 if (vm_map_lookup_locked(
6463 &lookup_map, local_start,
6464 access_type | VM_PROT_COPY,
6465 OBJECT_LOCK_EXCLUSIVE,
6466 &version, &object,
6467 &offset, &prot, &wired,
6468 NULL,
6469 &real_map)) {
6470 vm_map_unlock_read(lookup_map);
6471 assert(map_pmap == NULL);
6472 vm_map_unwire(map, start,
6473 s, user_wire);
6474 return KERN_FAILURE;
6475 }
6476 vm_object_unlock(object);
6477 if (real_map != lookup_map) {
6478 vm_map_unlock(real_map);
6479 }
6480 vm_map_unlock_read(lookup_map);
6481 vm_map_lock(map);
6482
6483 /* we unlocked, so must re-lookup */
6484 if (!vm_map_lookup_entry(map,
6485 local_start,
6486 &local_entry)) {
6487 rc = KERN_FAILURE;
6488 goto done;
6489 }
6490
6491 /*
6492 * entry could have been "simplified",
6493 * so re-clip
6494 */
6495 entry = local_entry;
6496 assert(s == local_start);
6497 vm_map_clip_start(map, entry, s);
6498 vm_map_clip_end(map, entry, end);
6499 /* re-compute "e" */
6500 e = entry->vme_end;
6501 if (e > end) {
6502 e = end;
6503 }
6504
6505 /* did we have a change of type? */
6506 if (!entry->is_sub_map) {
6507 last_timestamp = map->timestamp;
6508 continue;
6509 }
6510 } else {
6511 local_start = entry->vme_start;
6512 pmap = map_pmap;
6513 }
6514
6515 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6516 goto done;
6517 }
6518
6519 entry->in_transition = TRUE;
6520
6521 vm_map_unlock(map);
6522 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6523 sub_start, sub_end,
6524 caller_prot, tag,
6525 user_wire, pmap, pmap_addr,
6526 NULL);
6527 vm_map_lock(map);
6528
6529 /*
6530 * Find the entry again. It could have been clipped
6531 * after we unlocked the map.
6532 */
6533 if (!vm_map_lookup_entry(map, local_start,
6534 &first_entry)) {
6535 panic("vm_map_wire: re-lookup failed");
6536 }
6537 entry = first_entry;
6538
6539 assert(local_start == s);
6540 /* re-compute "e" */
6541 e = entry->vme_end;
6542 if (e > end) {
6543 e = end;
6544 }
6545
6546 last_timestamp = map->timestamp;
6547 while ((entry != vm_map_to_entry(map)) &&
6548 (entry->vme_start < e)) {
6549 assert(entry->in_transition);
6550 entry->in_transition = FALSE;
6551 if (entry->needs_wakeup) {
6552 entry->needs_wakeup = FALSE;
6553 need_wakeup = TRUE;
6554 }
6555 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6556 subtract_wire_counts(map, entry, user_wire);
6557 }
6558 entry = entry->vme_next;
6559 }
6560 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6561 goto done;
6562 }
6563
6564 /* no need to relookup again */
6565 s = entry->vme_start;
6566 continue;
6567 }
6568
6569 /*
6570 * If this entry is already wired then increment
6571 * the appropriate wire reference count.
6572 */
6573 if (entry->wired_count) {
6574 if ((entry->protection & access_type) != access_type) {
6575 /* found a protection problem */
6576
6577 /*
6578 * XXX FBDP
6579 * We should always return an error
6580 * in this case but since we didn't
6581 * enforce it before, let's do
6582 * it only for the new "wire_and_extract"
6583 * code path for now...
6584 */
6585 if (wire_and_extract) {
6586 rc = KERN_PROTECTION_FAILURE;
6587 goto done;
6588 }
6589 }
6590
6591 /*
6592 * entry is already wired down, get our reference
6593 * after clipping to our range.
6594 */
6595 vm_map_clip_start(map, entry, s);
6596 vm_map_clip_end(map, entry, end);
6597
6598 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6599 goto done;
6600 }
6601
6602 if (wire_and_extract) {
6603 vm_object_t object;
6604 vm_object_offset_t offset;
6605 vm_page_t m;
6606
6607 /*
6608 * We don't have to "wire" the page again
6609 * bit we still have to "extract" its
6610 * physical page number, after some sanity
6611 * checks.
6612 */
6613 assert((entry->vme_end - entry->vme_start)
6614 == PAGE_SIZE);
6615 assert(!entry->needs_copy);
6616 assert(!entry->is_sub_map);
6617 assert(VME_OBJECT(entry));
6618 if (((entry->vme_end - entry->vme_start)
6619 != PAGE_SIZE) ||
6620 entry->needs_copy ||
6621 entry->is_sub_map ||
6622 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6623 rc = KERN_INVALID_ARGUMENT;
6624 goto done;
6625 }
6626
6627 object = VME_OBJECT(entry);
6628 offset = VME_OFFSET(entry);
6629 /* need exclusive lock to update m->dirty */
6630 if (entry->protection & VM_PROT_WRITE) {
6631 vm_object_lock(object);
6632 } else {
6633 vm_object_lock_shared(object);
6634 }
6635 m = vm_page_lookup(object, offset);
6636 assert(m != VM_PAGE_NULL);
6637 assert(VM_PAGE_WIRED(m));
6638 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6639 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6640 if (entry->protection & VM_PROT_WRITE) {
6641 vm_object_lock_assert_exclusive(
6642 object);
6643 m->vmp_dirty = TRUE;
6644 }
6645 } else {
6646 /* not already wired !? */
6647 *physpage_p = 0;
6648 }
6649 vm_object_unlock(object);
6650 }
6651
6652 /* map was not unlocked: no need to relookup */
6653 entry = entry->vme_next;
6654 s = entry->vme_start;
6655 continue;
6656 }
6657
6658 /*
6659 * Unwired entry or wire request transmitted via submap
6660 */
6661
6662 /*
6663 * Wiring would copy the pages to the shadow object.
6664 * The shadow object would not be code-signed so
6665 * attempting to execute code from these copied pages
6666 * would trigger a code-signing violation.
6667 */
6668
6669 if ((entry->protection & VM_PROT_EXECUTE)
6670 #if !CONFIG_EMBEDDED
6671 &&
6672 map != kernel_map &&
6673 cs_process_enforcement(NULL)
6674 #endif /* !CONFIG_EMBEDDED */
6675 ) {
6676 #if MACH_ASSERT
6677 printf("pid %d[%s] wiring executable range from "
6678 "0x%llx to 0x%llx: rejected to preserve "
6679 "code-signing\n",
6680 proc_selfpid(),
6681 (current_task()->bsd_info
6682 ? proc_name_address(current_task()->bsd_info)
6683 : "?"),
6684 (uint64_t) entry->vme_start,
6685 (uint64_t) entry->vme_end);
6686 #endif /* MACH_ASSERT */
6687 DTRACE_VM2(cs_executable_wire,
6688 uint64_t, (uint64_t)entry->vme_start,
6689 uint64_t, (uint64_t)entry->vme_end);
6690 cs_executable_wire++;
6691 rc = KERN_PROTECTION_FAILURE;
6692 goto done;
6693 }
6694
6695 /*
6696 * Perform actions of vm_map_lookup that need the write
6697 * lock on the map: create a shadow object for a
6698 * copy-on-write region, or an object for a zero-fill
6699 * region.
6700 */
6701 size = entry->vme_end - entry->vme_start;
6702 /*
6703 * If wiring a copy-on-write page, we need to copy it now
6704 * even if we're only (currently) requesting read access.
6705 * This is aggressive, but once it's wired we can't move it.
6706 */
6707 if (entry->needs_copy) {
6708 if (wire_and_extract) {
6709 /*
6710 * We're supposed to share with the original
6711 * provider so should not be "needs_copy"
6712 */
6713 rc = KERN_INVALID_ARGUMENT;
6714 goto done;
6715 }
6716
6717 VME_OBJECT_SHADOW(entry, size);
6718 entry->needs_copy = FALSE;
6719 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6720 if (wire_and_extract) {
6721 /*
6722 * We're supposed to share with the original
6723 * provider so should already have an object.
6724 */
6725 rc = KERN_INVALID_ARGUMENT;
6726 goto done;
6727 }
6728 VME_OBJECT_SET(entry, vm_object_allocate(size));
6729 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6730 assert(entry->use_pmap);
6731 }
6732
6733 vm_map_clip_start(map, entry, s);
6734 vm_map_clip_end(map, entry, end);
6735
6736 /* re-compute "e" */
6737 e = entry->vme_end;
6738 if (e > end) {
6739 e = end;
6740 }
6741
6742 /*
6743 * Check for holes and protection mismatch.
6744 * Holes: Next entry should be contiguous unless this
6745 * is the end of the region.
6746 * Protection: Access requested must be allowed, unless
6747 * wiring is by protection class
6748 */
6749 if ((entry->vme_end < end) &&
6750 ((entry->vme_next == vm_map_to_entry(map)) ||
6751 (entry->vme_next->vme_start > entry->vme_end))) {
6752 /* found a hole */
6753 rc = KERN_INVALID_ADDRESS;
6754 goto done;
6755 }
6756 if ((entry->protection & access_type) != access_type) {
6757 /* found a protection problem */
6758 rc = KERN_PROTECTION_FAILURE;
6759 goto done;
6760 }
6761
6762 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6763
6764 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6765 goto done;
6766 }
6767
6768 entry->in_transition = TRUE;
6769
6770 /*
6771 * This entry might get split once we unlock the map.
6772 * In vm_fault_wire(), we need the current range as
6773 * defined by this entry. In order for this to work
6774 * along with a simultaneous clip operation, we make a
6775 * temporary copy of this entry and use that for the
6776 * wiring. Note that the underlying objects do not
6777 * change during a clip.
6778 */
6779 tmp_entry = *entry;
6780
6781 /*
6782 * The in_transition state guarentees that the entry
6783 * (or entries for this range, if split occured) will be
6784 * there when the map lock is acquired for the second time.
6785 */
6786 vm_map_unlock(map);
6787
6788 if (!user_wire && cur_thread != THREAD_NULL) {
6789 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6790 } else {
6791 interruptible_state = THREAD_UNINT;
6792 }
6793
6794 if (map_pmap) {
6795 rc = vm_fault_wire(map,
6796 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6797 physpage_p);
6798 } else {
6799 rc = vm_fault_wire(map,
6800 &tmp_entry, caller_prot, tag, map->pmap,
6801 tmp_entry.vme_start,
6802 physpage_p);
6803 }
6804
6805 if (!user_wire && cur_thread != THREAD_NULL) {
6806 thread_interrupt_level(interruptible_state);
6807 }
6808
6809 vm_map_lock(map);
6810
6811 if (last_timestamp + 1 != map->timestamp) {
6812 /*
6813 * Find the entry again. It could have been clipped
6814 * after we unlocked the map.
6815 */
6816 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6817 &first_entry)) {
6818 panic("vm_map_wire: re-lookup failed");
6819 }
6820
6821 entry = first_entry;
6822 }
6823
6824 last_timestamp = map->timestamp;
6825
6826 while ((entry != vm_map_to_entry(map)) &&
6827 (entry->vme_start < tmp_entry.vme_end)) {
6828 assert(entry->in_transition);
6829 entry->in_transition = FALSE;
6830 if (entry->needs_wakeup) {
6831 entry->needs_wakeup = FALSE;
6832 need_wakeup = TRUE;
6833 }
6834 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6835 subtract_wire_counts(map, entry, user_wire);
6836 }
6837 entry = entry->vme_next;
6838 }
6839
6840 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6841 goto done;
6842 }
6843
6844 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6845 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6846 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6847 /* found a "new" hole */
6848 s = tmp_entry.vme_end;
6849 rc = KERN_INVALID_ADDRESS;
6850 goto done;
6851 }
6852
6853 s = entry->vme_start;
6854 } /* end while loop through map entries */
6855
6856 done:
6857 if (rc == KERN_SUCCESS) {
6858 /* repair any damage we may have made to the VM map */
6859 vm_map_simplify_range(map, start, end);
6860 }
6861
6862 vm_map_unlock(map);
6863
6864 /*
6865 * wake up anybody waiting on entries we wired.
6866 */
6867 if (need_wakeup) {
6868 vm_map_entry_wakeup(map);
6869 }
6870
6871 if (rc != KERN_SUCCESS) {
6872 /* undo what has been wired so far */
6873 vm_map_unwire_nested(map, start, s, user_wire,
6874 map_pmap, pmap_addr);
6875 if (physpage_p) {
6876 *physpage_p = 0;
6877 }
6878 }
6879
6880 return rc;
6881 }
6882
6883 kern_return_t
6884 vm_map_wire_external(
6885 vm_map_t map,
6886 vm_map_offset_t start,
6887 vm_map_offset_t end,
6888 vm_prot_t caller_prot,
6889 boolean_t user_wire)
6890 {
6891 kern_return_t kret;
6892
6893 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6894 user_wire, (pmap_t)NULL, 0, NULL);
6895 return kret;
6896 }
6897
6898 kern_return_t
6899 vm_map_wire_kernel(
6900 vm_map_t map,
6901 vm_map_offset_t start,
6902 vm_map_offset_t end,
6903 vm_prot_t caller_prot,
6904 vm_tag_t tag,
6905 boolean_t user_wire)
6906 {
6907 kern_return_t kret;
6908
6909 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6910 user_wire, (pmap_t)NULL, 0, NULL);
6911 return kret;
6912 }
6913
6914 kern_return_t
6915 vm_map_wire_and_extract_external(
6916 vm_map_t map,
6917 vm_map_offset_t start,
6918 vm_prot_t caller_prot,
6919 boolean_t user_wire,
6920 ppnum_t *physpage_p)
6921 {
6922 kern_return_t kret;
6923
6924 kret = vm_map_wire_nested(map,
6925 start,
6926 start + VM_MAP_PAGE_SIZE(map),
6927 caller_prot,
6928 vm_tag_bt(),
6929 user_wire,
6930 (pmap_t)NULL,
6931 0,
6932 physpage_p);
6933 if (kret != KERN_SUCCESS &&
6934 physpage_p != NULL) {
6935 *physpage_p = 0;
6936 }
6937 return kret;
6938 }
6939
6940 kern_return_t
6941 vm_map_wire_and_extract_kernel(
6942 vm_map_t map,
6943 vm_map_offset_t start,
6944 vm_prot_t caller_prot,
6945 vm_tag_t tag,
6946 boolean_t user_wire,
6947 ppnum_t *physpage_p)
6948 {
6949 kern_return_t kret;
6950
6951 kret = vm_map_wire_nested(map,
6952 start,
6953 start + VM_MAP_PAGE_SIZE(map),
6954 caller_prot,
6955 tag,
6956 user_wire,
6957 (pmap_t)NULL,
6958 0,
6959 physpage_p);
6960 if (kret != KERN_SUCCESS &&
6961 physpage_p != NULL) {
6962 *physpage_p = 0;
6963 }
6964 return kret;
6965 }
6966
6967 /*
6968 * vm_map_unwire:
6969 *
6970 * Sets the pageability of the specified address range in the target
6971 * as pageable. Regions specified must have been wired previously.
6972 *
6973 * The map must not be locked, but a reference must remain to the map
6974 * throughout the call.
6975 *
6976 * Kernel will panic on failures. User unwire ignores holes and
6977 * unwired and intransition entries to avoid losing memory by leaving
6978 * it unwired.
6979 */
6980 static kern_return_t
6981 vm_map_unwire_nested(
6982 vm_map_t map,
6983 vm_map_offset_t start,
6984 vm_map_offset_t end,
6985 boolean_t user_wire,
6986 pmap_t map_pmap,
6987 vm_map_offset_t pmap_addr)
6988 {
6989 vm_map_entry_t entry;
6990 struct vm_map_entry *first_entry, tmp_entry;
6991 boolean_t need_wakeup;
6992 boolean_t main_map = FALSE;
6993 unsigned int last_timestamp;
6994
6995 vm_map_lock(map);
6996 if (map_pmap == NULL) {
6997 main_map = TRUE;
6998 }
6999 last_timestamp = map->timestamp;
7000
7001 VM_MAP_RANGE_CHECK(map, start, end);
7002 assert(page_aligned(start));
7003 assert(page_aligned(end));
7004 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7005 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7006
7007 if (start == end) {
7008 /* We unwired what the caller asked for: zero pages */
7009 vm_map_unlock(map);
7010 return KERN_SUCCESS;
7011 }
7012
7013 if (vm_map_lookup_entry(map, start, &first_entry)) {
7014 entry = first_entry;
7015 /*
7016 * vm_map_clip_start will be done later.
7017 * We don't want to unnest any nested sub maps here !
7018 */
7019 } else {
7020 if (!user_wire) {
7021 panic("vm_map_unwire: start not found");
7022 }
7023 /* Start address is not in map. */
7024 vm_map_unlock(map);
7025 return KERN_INVALID_ADDRESS;
7026 }
7027
7028 if (entry->superpage_size) {
7029 /* superpages are always wired */
7030 vm_map_unlock(map);
7031 return KERN_INVALID_ADDRESS;
7032 }
7033
7034 need_wakeup = FALSE;
7035 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7036 if (entry->in_transition) {
7037 /*
7038 * 1)
7039 * Another thread is wiring down this entry. Note
7040 * that if it is not for the other thread we would
7041 * be unwiring an unwired entry. This is not
7042 * permitted. If we wait, we will be unwiring memory
7043 * we did not wire.
7044 *
7045 * 2)
7046 * Another thread is unwiring this entry. We did not
7047 * have a reference to it, because if we did, this
7048 * entry will not be getting unwired now.
7049 */
7050 if (!user_wire) {
7051 /*
7052 * XXX FBDP
7053 * This could happen: there could be some
7054 * overlapping vslock/vsunlock operations
7055 * going on.
7056 * We should probably just wait and retry,
7057 * but then we have to be careful that this
7058 * entry could get "simplified" after
7059 * "in_transition" gets unset and before
7060 * we re-lookup the entry, so we would
7061 * have to re-clip the entry to avoid
7062 * re-unwiring what we have already unwired...
7063 * See vm_map_wire_nested().
7064 *
7065 * Or we could just ignore "in_transition"
7066 * here and proceed to decement the wired
7067 * count(s) on this entry. That should be fine
7068 * as long as "wired_count" doesn't drop all
7069 * the way to 0 (and we should panic if THAT
7070 * happens).
7071 */
7072 panic("vm_map_unwire: in_transition entry");
7073 }
7074
7075 entry = entry->vme_next;
7076 continue;
7077 }
7078
7079 if (entry->is_sub_map) {
7080 vm_map_offset_t sub_start;
7081 vm_map_offset_t sub_end;
7082 vm_map_offset_t local_end;
7083 pmap_t pmap;
7084
7085 vm_map_clip_start(map, entry, start);
7086 vm_map_clip_end(map, entry, end);
7087
7088 sub_start = VME_OFFSET(entry);
7089 sub_end = entry->vme_end - entry->vme_start;
7090 sub_end += VME_OFFSET(entry);
7091 local_end = entry->vme_end;
7092 if (map_pmap == NULL) {
7093 if (entry->use_pmap) {
7094 pmap = VME_SUBMAP(entry)->pmap;
7095 pmap_addr = sub_start;
7096 } else {
7097 pmap = map->pmap;
7098 pmap_addr = start;
7099 }
7100 if (entry->wired_count == 0 ||
7101 (user_wire && entry->user_wired_count == 0)) {
7102 if (!user_wire) {
7103 panic("vm_map_unwire: entry is unwired");
7104 }
7105 entry = entry->vme_next;
7106 continue;
7107 }
7108
7109 /*
7110 * Check for holes
7111 * Holes: Next entry should be contiguous unless
7112 * this is the end of the region.
7113 */
7114 if (((entry->vme_end < end) &&
7115 ((entry->vme_next == vm_map_to_entry(map)) ||
7116 (entry->vme_next->vme_start
7117 > entry->vme_end)))) {
7118 if (!user_wire) {
7119 panic("vm_map_unwire: non-contiguous region");
7120 }
7121 /*
7122 * entry = entry->vme_next;
7123 * continue;
7124 */
7125 }
7126
7127 subtract_wire_counts(map, entry, user_wire);
7128
7129 if (entry->wired_count != 0) {
7130 entry = entry->vme_next;
7131 continue;
7132 }
7133
7134 entry->in_transition = TRUE;
7135 tmp_entry = *entry;/* see comment in vm_map_wire() */
7136
7137 /*
7138 * We can unlock the map now. The in_transition state
7139 * guarantees existance of the entry.
7140 */
7141 vm_map_unlock(map);
7142 vm_map_unwire_nested(VME_SUBMAP(entry),
7143 sub_start, sub_end, user_wire, pmap, pmap_addr);
7144 vm_map_lock(map);
7145
7146 if (last_timestamp + 1 != map->timestamp) {
7147 /*
7148 * Find the entry again. It could have been
7149 * clipped or deleted after we unlocked the map.
7150 */
7151 if (!vm_map_lookup_entry(map,
7152 tmp_entry.vme_start,
7153 &first_entry)) {
7154 if (!user_wire) {
7155 panic("vm_map_unwire: re-lookup failed");
7156 }
7157 entry = first_entry->vme_next;
7158 } else {
7159 entry = first_entry;
7160 }
7161 }
7162 last_timestamp = map->timestamp;
7163
7164 /*
7165 * clear transition bit for all constituent entries
7166 * that were in the original entry (saved in
7167 * tmp_entry). Also check for waiters.
7168 */
7169 while ((entry != vm_map_to_entry(map)) &&
7170 (entry->vme_start < tmp_entry.vme_end)) {
7171 assert(entry->in_transition);
7172 entry->in_transition = FALSE;
7173 if (entry->needs_wakeup) {
7174 entry->needs_wakeup = FALSE;
7175 need_wakeup = TRUE;
7176 }
7177 entry = entry->vme_next;
7178 }
7179 continue;
7180 } else {
7181 vm_map_unlock(map);
7182 vm_map_unwire_nested(VME_SUBMAP(entry),
7183 sub_start, sub_end, user_wire, map_pmap,
7184 pmap_addr);
7185 vm_map_lock(map);
7186
7187 if (last_timestamp + 1 != map->timestamp) {
7188 /*
7189 * Find the entry again. It could have been
7190 * clipped or deleted after we unlocked the map.
7191 */
7192 if (!vm_map_lookup_entry(map,
7193 tmp_entry.vme_start,
7194 &first_entry)) {
7195 if (!user_wire) {
7196 panic("vm_map_unwire: re-lookup failed");
7197 }
7198 entry = first_entry->vme_next;
7199 } else {
7200 entry = first_entry;
7201 }
7202 }
7203 last_timestamp = map->timestamp;
7204 }
7205 }
7206
7207
7208 if ((entry->wired_count == 0) ||
7209 (user_wire && entry->user_wired_count == 0)) {
7210 if (!user_wire) {
7211 panic("vm_map_unwire: entry is unwired");
7212 }
7213
7214 entry = entry->vme_next;
7215 continue;
7216 }
7217
7218 assert(entry->wired_count > 0 &&
7219 (!user_wire || entry->user_wired_count > 0));
7220
7221 vm_map_clip_start(map, entry, start);
7222 vm_map_clip_end(map, entry, end);
7223
7224 /*
7225 * Check for holes
7226 * Holes: Next entry should be contiguous unless
7227 * this is the end of the region.
7228 */
7229 if (((entry->vme_end < end) &&
7230 ((entry->vme_next == vm_map_to_entry(map)) ||
7231 (entry->vme_next->vme_start > entry->vme_end)))) {
7232 if (!user_wire) {
7233 panic("vm_map_unwire: non-contiguous region");
7234 }
7235 entry = entry->vme_next;
7236 continue;
7237 }
7238
7239 subtract_wire_counts(map, entry, user_wire);
7240
7241 if (entry->wired_count != 0) {
7242 entry = entry->vme_next;
7243 continue;
7244 }
7245
7246 if (entry->zero_wired_pages) {
7247 entry->zero_wired_pages = FALSE;
7248 }
7249
7250 entry->in_transition = TRUE;
7251 tmp_entry = *entry; /* see comment in vm_map_wire() */
7252
7253 /*
7254 * We can unlock the map now. The in_transition state
7255 * guarantees existance of the entry.
7256 */
7257 vm_map_unlock(map);
7258 if (map_pmap) {
7259 vm_fault_unwire(map,
7260 &tmp_entry, FALSE, map_pmap, pmap_addr);
7261 } else {
7262 vm_fault_unwire(map,
7263 &tmp_entry, FALSE, map->pmap,
7264 tmp_entry.vme_start);
7265 }
7266 vm_map_lock(map);
7267
7268 if (last_timestamp + 1 != map->timestamp) {
7269 /*
7270 * Find the entry again. It could have been clipped
7271 * or deleted after we unlocked the map.
7272 */
7273 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7274 &first_entry)) {
7275 if (!user_wire) {
7276 panic("vm_map_unwire: re-lookup failed");
7277 }
7278 entry = first_entry->vme_next;
7279 } else {
7280 entry = first_entry;
7281 }
7282 }
7283 last_timestamp = map->timestamp;
7284
7285 /*
7286 * clear transition bit for all constituent entries that
7287 * were in the original entry (saved in tmp_entry). Also
7288 * check for waiters.
7289 */
7290 while ((entry != vm_map_to_entry(map)) &&
7291 (entry->vme_start < tmp_entry.vme_end)) {
7292 assert(entry->in_transition);
7293 entry->in_transition = FALSE;
7294 if (entry->needs_wakeup) {
7295 entry->needs_wakeup = FALSE;
7296 need_wakeup = TRUE;
7297 }
7298 entry = entry->vme_next;
7299 }
7300 }
7301
7302 /*
7303 * We might have fragmented the address space when we wired this
7304 * range of addresses. Attempt to re-coalesce these VM map entries
7305 * with their neighbors now that they're no longer wired.
7306 * Under some circumstances, address space fragmentation can
7307 * prevent VM object shadow chain collapsing, which can cause
7308 * swap space leaks.
7309 */
7310 vm_map_simplify_range(map, start, end);
7311
7312 vm_map_unlock(map);
7313 /*
7314 * wake up anybody waiting on entries that we have unwired.
7315 */
7316 if (need_wakeup) {
7317 vm_map_entry_wakeup(map);
7318 }
7319 return KERN_SUCCESS;
7320 }
7321
7322 kern_return_t
7323 vm_map_unwire(
7324 vm_map_t map,
7325 vm_map_offset_t start,
7326 vm_map_offset_t end,
7327 boolean_t user_wire)
7328 {
7329 return vm_map_unwire_nested(map, start, end,
7330 user_wire, (pmap_t)NULL, 0);
7331 }
7332
7333
7334 /*
7335 * vm_map_entry_delete: [ internal use only ]
7336 *
7337 * Deallocate the given entry from the target map.
7338 */
7339 static void
7340 vm_map_entry_delete(
7341 vm_map_t map,
7342 vm_map_entry_t entry)
7343 {
7344 vm_map_offset_t s, e;
7345 vm_object_t object;
7346 vm_map_t submap;
7347
7348 s = entry->vme_start;
7349 e = entry->vme_end;
7350 assert(page_aligned(s));
7351 assert(page_aligned(e));
7352 if (entry->map_aligned == TRUE) {
7353 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7354 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7355 }
7356 assert(entry->wired_count == 0);
7357 assert(entry->user_wired_count == 0);
7358 assert(!entry->permanent);
7359
7360 if (entry->is_sub_map) {
7361 object = NULL;
7362 submap = VME_SUBMAP(entry);
7363 } else {
7364 submap = NULL;
7365 object = VME_OBJECT(entry);
7366 }
7367
7368 vm_map_store_entry_unlink(map, entry);
7369 map->size -= e - s;
7370
7371 vm_map_entry_dispose(map, entry);
7372
7373 vm_map_unlock(map);
7374 /*
7375 * Deallocate the object only after removing all
7376 * pmap entries pointing to its pages.
7377 */
7378 if (submap) {
7379 vm_map_deallocate(submap);
7380 } else {
7381 vm_object_deallocate(object);
7382 }
7383 }
7384
7385 void
7386 vm_map_submap_pmap_clean(
7387 vm_map_t map,
7388 vm_map_offset_t start,
7389 vm_map_offset_t end,
7390 vm_map_t sub_map,
7391 vm_map_offset_t offset)
7392 {
7393 vm_map_offset_t submap_start;
7394 vm_map_offset_t submap_end;
7395 vm_map_size_t remove_size;
7396 vm_map_entry_t entry;
7397
7398 submap_end = offset + (end - start);
7399 submap_start = offset;
7400
7401 vm_map_lock_read(sub_map);
7402 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7403 remove_size = (entry->vme_end - entry->vme_start);
7404 if (offset > entry->vme_start) {
7405 remove_size -= offset - entry->vme_start;
7406 }
7407
7408
7409 if (submap_end < entry->vme_end) {
7410 remove_size -=
7411 entry->vme_end - submap_end;
7412 }
7413 if (entry->is_sub_map) {
7414 vm_map_submap_pmap_clean(
7415 sub_map,
7416 start,
7417 start + remove_size,
7418 VME_SUBMAP(entry),
7419 VME_OFFSET(entry));
7420 } else {
7421 if (map->mapped_in_other_pmaps &&
7422 os_ref_get_count(&map->map_refcnt) != 0 &&
7423 VME_OBJECT(entry) != NULL) {
7424 vm_object_pmap_protect_options(
7425 VME_OBJECT(entry),
7426 (VME_OFFSET(entry) +
7427 offset -
7428 entry->vme_start),
7429 remove_size,
7430 PMAP_NULL,
7431 entry->vme_start,
7432 VM_PROT_NONE,
7433 PMAP_OPTIONS_REMOVE);
7434 } else {
7435 pmap_remove(map->pmap,
7436 (addr64_t)start,
7437 (addr64_t)(start + remove_size));
7438 }
7439 }
7440 }
7441
7442 entry = entry->vme_next;
7443
7444 while ((entry != vm_map_to_entry(sub_map))
7445 && (entry->vme_start < submap_end)) {
7446 remove_size = (entry->vme_end - entry->vme_start);
7447 if (submap_end < entry->vme_end) {
7448 remove_size -= entry->vme_end - submap_end;
7449 }
7450 if (entry->is_sub_map) {
7451 vm_map_submap_pmap_clean(
7452 sub_map,
7453 (start + entry->vme_start) - offset,
7454 ((start + entry->vme_start) - offset) + remove_size,
7455 VME_SUBMAP(entry),
7456 VME_OFFSET(entry));
7457 } else {
7458 if (map->mapped_in_other_pmaps &&
7459 os_ref_get_count(&map->map_refcnt) != 0 &&
7460 VME_OBJECT(entry) != NULL) {
7461 vm_object_pmap_protect_options(
7462 VME_OBJECT(entry),
7463 VME_OFFSET(entry),
7464 remove_size,
7465 PMAP_NULL,
7466 entry->vme_start,
7467 VM_PROT_NONE,
7468 PMAP_OPTIONS_REMOVE);
7469 } else {
7470 pmap_remove(map->pmap,
7471 (addr64_t)((start + entry->vme_start)
7472 - offset),
7473 (addr64_t)(((start + entry->vme_start)
7474 - offset) + remove_size));
7475 }
7476 }
7477 entry = entry->vme_next;
7478 }
7479 vm_map_unlock_read(sub_map);
7480 return;
7481 }
7482
7483 /*
7484 * virt_memory_guard_ast:
7485 *
7486 * Handle the AST callout for a virtual memory guard.
7487 * raise an EXC_GUARD exception and terminate the task
7488 * if configured to do so.
7489 */
7490 void
7491 virt_memory_guard_ast(
7492 thread_t thread,
7493 mach_exception_data_type_t code,
7494 mach_exception_data_type_t subcode)
7495 {
7496 task_t task = thread->task;
7497 assert(task != kernel_task);
7498 assert(task == current_task());
7499 uint32_t behavior;
7500
7501 behavior = task->task_exc_guard;
7502
7503 /* Is delivery enabled */
7504 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7505 return;
7506 }
7507
7508 /* If only once, make sure we're that once */
7509 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7510 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7511
7512 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7513 break;
7514 }
7515 behavior = task->task_exc_guard;
7516 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7517 return;
7518 }
7519 }
7520
7521 /* Raise exception via corpse fork or synchronously */
7522 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7523 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7524 task_violated_guard(code, subcode, NULL);
7525 } else {
7526 task_exception_notify(EXC_GUARD, code, subcode);
7527 }
7528
7529 /* Terminate the task if desired */
7530 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7531 task_bsdtask_kill(current_task());
7532 }
7533 }
7534
7535 /*
7536 * vm_map_guard_exception:
7537 *
7538 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7539 *
7540 * Right now, we do this when we find nothing mapped, or a
7541 * gap in the mapping when a user address space deallocate
7542 * was requested. We report the address of the first gap found.
7543 */
7544 static void
7545 vm_map_guard_exception(
7546 vm_map_offset_t gap_start,
7547 unsigned reason)
7548 {
7549 mach_exception_code_t code = 0;
7550 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7551 unsigned int target = 0; /* should we pass in pid associated with map? */
7552 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7553 boolean_t fatal = FALSE;
7554
7555 task_t task = current_task();
7556
7557 /* Can't deliver exceptions to kernel task */
7558 if (task == kernel_task) {
7559 return;
7560 }
7561
7562 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7563 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7564 EXC_GUARD_ENCODE_TARGET(code, target);
7565
7566 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7567 fatal = TRUE;
7568 }
7569 thread_guard_violation(current_thread(), code, subcode, fatal);
7570 }
7571
7572 /*
7573 * vm_map_delete: [ internal use only ]
7574 *
7575 * Deallocates the given address range from the target map.
7576 * Removes all user wirings. Unwires one kernel wiring if
7577 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7578 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7579 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7580 *
7581 * This routine is called with map locked and leaves map locked.
7582 */
7583 static kern_return_t
7584 vm_map_delete(
7585 vm_map_t map,
7586 vm_map_offset_t start,
7587 vm_map_offset_t end,
7588 int flags,
7589 vm_map_t zap_map)
7590 {
7591 vm_map_entry_t entry, next;
7592 struct vm_map_entry *first_entry, tmp_entry;
7593 vm_map_offset_t s;
7594 vm_object_t object;
7595 boolean_t need_wakeup;
7596 unsigned int last_timestamp = ~0; /* unlikely value */
7597 int interruptible;
7598 vm_map_offset_t gap_start;
7599 __unused vm_map_offset_t save_start = start;
7600 __unused vm_map_offset_t save_end = end;
7601 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7602 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7603
7604 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
7605 gap_start = FIND_GAP;
7606 } else {
7607 gap_start = GAPS_OK;
7608 }
7609
7610 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7611 THREAD_ABORTSAFE : THREAD_UNINT;
7612
7613 /*
7614 * All our DMA I/O operations in IOKit are currently done by
7615 * wiring through the map entries of the task requesting the I/O.
7616 * Because of this, we must always wait for kernel wirings
7617 * to go away on the entries before deleting them.
7618 *
7619 * Any caller who wants to actually remove a kernel wiring
7620 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7621 * properly remove one wiring instead of blasting through
7622 * them all.
7623 */
7624 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7625
7626 while (1) {
7627 /*
7628 * Find the start of the region, and clip it
7629 */
7630 if (vm_map_lookup_entry(map, start, &first_entry)) {
7631 entry = first_entry;
7632 if (map == kalloc_map &&
7633 (entry->vme_start != start ||
7634 entry->vme_end != end)) {
7635 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7636 "mismatched entry %p [0x%llx:0x%llx]\n",
7637 map,
7638 (uint64_t)start,
7639 (uint64_t)end,
7640 entry,
7641 (uint64_t)entry->vme_start,
7642 (uint64_t)entry->vme_end);
7643 }
7644
7645 /*
7646 * If in a superpage, extend the range to include the start of the mapping.
7647 */
7648 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7649 start = SUPERPAGE_ROUND_DOWN(start);
7650 continue;
7651 }
7652
7653 if (start == entry->vme_start) {
7654 /*
7655 * No need to clip. We don't want to cause
7656 * any unnecessary unnesting in this case...
7657 */
7658 } else {
7659 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7660 entry->map_aligned &&
7661 !VM_MAP_PAGE_ALIGNED(
7662 start,
7663 VM_MAP_PAGE_MASK(map))) {
7664 /*
7665 * The entry will no longer be
7666 * map-aligned after clipping
7667 * and the caller said it's OK.
7668 */
7669 entry->map_aligned = FALSE;
7670 }
7671 if (map == kalloc_map) {
7672 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7673 " clipping %p at 0x%llx\n",
7674 map,
7675 (uint64_t)start,
7676 (uint64_t)end,
7677 entry,
7678 (uint64_t)start);
7679 }
7680 vm_map_clip_start(map, entry, start);
7681 }
7682
7683 /*
7684 * Fix the lookup hint now, rather than each
7685 * time through the loop.
7686 */
7687 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7688 } else {
7689 if (map->pmap == kernel_pmap &&
7690 os_ref_get_count(&map->map_refcnt) != 0) {
7691 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7692 "no map entry at 0x%llx\n",
7693 map,
7694 (uint64_t)start,
7695 (uint64_t)end,
7696 (uint64_t)start);
7697 }
7698 entry = first_entry->vme_next;
7699 if (gap_start == FIND_GAP) {
7700 gap_start = start;
7701 }
7702 }
7703 break;
7704 }
7705 if (entry->superpage_size) {
7706 end = SUPERPAGE_ROUND_UP(end);
7707 }
7708
7709 need_wakeup = FALSE;
7710 /*
7711 * Step through all entries in this region
7712 */
7713 s = entry->vme_start;
7714 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7715 /*
7716 * At this point, we have deleted all the memory entries
7717 * between "start" and "s". We still need to delete
7718 * all memory entries between "s" and "end".
7719 * While we were blocked and the map was unlocked, some
7720 * new memory entries could have been re-allocated between
7721 * "start" and "s" and we don't want to mess with those.
7722 * Some of those entries could even have been re-assembled
7723 * with an entry after "s" (in vm_map_simplify_entry()), so
7724 * we may have to vm_map_clip_start() again.
7725 */
7726
7727 if (entry->vme_start >= s) {
7728 /*
7729 * This entry starts on or after "s"
7730 * so no need to clip its start.
7731 */
7732 } else {
7733 /*
7734 * This entry has been re-assembled by a
7735 * vm_map_simplify_entry(). We need to
7736 * re-clip its start.
7737 */
7738 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7739 entry->map_aligned &&
7740 !VM_MAP_PAGE_ALIGNED(s,
7741 VM_MAP_PAGE_MASK(map))) {
7742 /*
7743 * The entry will no longer be map-aligned
7744 * after clipping and the caller said it's OK.
7745 */
7746 entry->map_aligned = FALSE;
7747 }
7748 if (map == kalloc_map) {
7749 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7750 "clipping %p at 0x%llx\n",
7751 map,
7752 (uint64_t)start,
7753 (uint64_t)end,
7754 entry,
7755 (uint64_t)s);
7756 }
7757 vm_map_clip_start(map, entry, s);
7758 }
7759 if (entry->vme_end <= end) {
7760 /*
7761 * This entry is going away completely, so no need
7762 * to clip and possibly cause an unnecessary unnesting.
7763 */
7764 } else {
7765 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7766 entry->map_aligned &&
7767 !VM_MAP_PAGE_ALIGNED(end,
7768 VM_MAP_PAGE_MASK(map))) {
7769 /*
7770 * The entry will no longer be map-aligned
7771 * after clipping and the caller said it's OK.
7772 */
7773 entry->map_aligned = FALSE;
7774 }
7775 if (map == kalloc_map) {
7776 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7777 "clipping %p at 0x%llx\n",
7778 map,
7779 (uint64_t)start,
7780 (uint64_t)end,
7781 entry,
7782 (uint64_t)end);
7783 }
7784 vm_map_clip_end(map, entry, end);
7785 }
7786
7787 if (entry->permanent) {
7788 if (map->pmap == kernel_pmap) {
7789 panic("%s(%p,0x%llx,0x%llx): "
7790 "attempt to remove permanent "
7791 "VM map entry "
7792 "%p [0x%llx:0x%llx]\n",
7793 __FUNCTION__,
7794 map,
7795 (uint64_t) start,
7796 (uint64_t) end,
7797 entry,
7798 (uint64_t) entry->vme_start,
7799 (uint64_t) entry->vme_end);
7800 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7801 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7802 entry->permanent = FALSE;
7803 #if PMAP_CS
7804 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7805 entry->permanent = FALSE;
7806
7807 printf("%d[%s] %s(0x%llx,0x%llx): "
7808 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7809 "prot 0x%x/0x%x\n",
7810 proc_selfpid(),
7811 (current_task()->bsd_info
7812 ? proc_name_address(current_task()->bsd_info)
7813 : "?"),
7814 __FUNCTION__,
7815 (uint64_t) start,
7816 (uint64_t) end,
7817 (uint64_t)entry->vme_start,
7818 (uint64_t)entry->vme_end,
7819 entry->protection,
7820 entry->max_protection);
7821 #endif
7822 } else {
7823 if (vm_map_executable_immutable_verbose) {
7824 printf("%d[%s] %s(0x%llx,0x%llx): "
7825 "permanent entry [0x%llx:0x%llx] "
7826 "prot 0x%x/0x%x\n",
7827 proc_selfpid(),
7828 (current_task()->bsd_info
7829 ? proc_name_address(current_task()->bsd_info)
7830 : "?"),
7831 __FUNCTION__,
7832 (uint64_t) start,
7833 (uint64_t) end,
7834 (uint64_t)entry->vme_start,
7835 (uint64_t)entry->vme_end,
7836 entry->protection,
7837 entry->max_protection);
7838 }
7839 /*
7840 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7841 */
7842 DTRACE_VM5(vm_map_delete_permanent,
7843 vm_map_offset_t, entry->vme_start,
7844 vm_map_offset_t, entry->vme_end,
7845 vm_prot_t, entry->protection,
7846 vm_prot_t, entry->max_protection,
7847 int, VME_ALIAS(entry));
7848 }
7849 }
7850
7851
7852 if (entry->in_transition) {
7853 wait_result_t wait_result;
7854
7855 /*
7856 * Another thread is wiring/unwiring this entry.
7857 * Let the other thread know we are waiting.
7858 */
7859 assert(s == entry->vme_start);
7860 entry->needs_wakeup = TRUE;
7861
7862 /*
7863 * wake up anybody waiting on entries that we have
7864 * already unwired/deleted.
7865 */
7866 if (need_wakeup) {
7867 vm_map_entry_wakeup(map);
7868 need_wakeup = FALSE;
7869 }
7870
7871 wait_result = vm_map_entry_wait(map, interruptible);
7872
7873 if (interruptible &&
7874 wait_result == THREAD_INTERRUPTED) {
7875 /*
7876 * We do not clear the needs_wakeup flag,
7877 * since we cannot tell if we were the only one.
7878 */
7879 return KERN_ABORTED;
7880 }
7881
7882 /*
7883 * The entry could have been clipped or it
7884 * may not exist anymore. Look it up again.
7885 */
7886 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7887 /*
7888 * User: use the next entry
7889 */
7890 if (gap_start == FIND_GAP) {
7891 gap_start = s;
7892 }
7893 entry = first_entry->vme_next;
7894 s = entry->vme_start;
7895 } else {
7896 entry = first_entry;
7897 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7898 }
7899 last_timestamp = map->timestamp;
7900 continue;
7901 } /* end in_transition */
7902
7903 if (entry->wired_count) {
7904 boolean_t user_wire;
7905
7906 user_wire = entry->user_wired_count > 0;
7907
7908 /*
7909 * Remove a kernel wiring if requested
7910 */
7911 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7912 entry->wired_count--;
7913 }
7914
7915 /*
7916 * Remove all user wirings for proper accounting
7917 */
7918 if (entry->user_wired_count > 0) {
7919 while (entry->user_wired_count) {
7920 subtract_wire_counts(map, entry, user_wire);
7921 }
7922 }
7923
7924 if (entry->wired_count != 0) {
7925 assert(map != kernel_map);
7926 /*
7927 * Cannot continue. Typical case is when
7928 * a user thread has physical io pending on
7929 * on this page. Either wait for the
7930 * kernel wiring to go away or return an
7931 * error.
7932 */
7933 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7934 wait_result_t wait_result;
7935
7936 assert(s == entry->vme_start);
7937 entry->needs_wakeup = TRUE;
7938 wait_result = vm_map_entry_wait(map,
7939 interruptible);
7940
7941 if (interruptible &&
7942 wait_result == THREAD_INTERRUPTED) {
7943 /*
7944 * We do not clear the
7945 * needs_wakeup flag, since we
7946 * cannot tell if we were the
7947 * only one.
7948 */
7949 return KERN_ABORTED;
7950 }
7951
7952 /*
7953 * The entry could have been clipped or
7954 * it may not exist anymore. Look it
7955 * up again.
7956 */
7957 if (!vm_map_lookup_entry(map, s,
7958 &first_entry)) {
7959 assert(map != kernel_map);
7960 /*
7961 * User: use the next entry
7962 */
7963 if (gap_start == FIND_GAP) {
7964 gap_start = s;
7965 }
7966 entry = first_entry->vme_next;
7967 s = entry->vme_start;
7968 } else {
7969 entry = first_entry;
7970 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7971 }
7972 last_timestamp = map->timestamp;
7973 continue;
7974 } else {
7975 return KERN_FAILURE;
7976 }
7977 }
7978
7979 entry->in_transition = TRUE;
7980 /*
7981 * copy current entry. see comment in vm_map_wire()
7982 */
7983 tmp_entry = *entry;
7984 assert(s == entry->vme_start);
7985
7986 /*
7987 * We can unlock the map now. The in_transition
7988 * state guarentees existance of the entry.
7989 */
7990 vm_map_unlock(map);
7991
7992 if (tmp_entry.is_sub_map) {
7993 vm_map_t sub_map;
7994 vm_map_offset_t sub_start, sub_end;
7995 pmap_t pmap;
7996 vm_map_offset_t pmap_addr;
7997
7998
7999 sub_map = VME_SUBMAP(&tmp_entry);
8000 sub_start = VME_OFFSET(&tmp_entry);
8001 sub_end = sub_start + (tmp_entry.vme_end -
8002 tmp_entry.vme_start);
8003 if (tmp_entry.use_pmap) {
8004 pmap = sub_map->pmap;
8005 pmap_addr = tmp_entry.vme_start;
8006 } else {
8007 pmap = map->pmap;
8008 pmap_addr = tmp_entry.vme_start;
8009 }
8010 (void) vm_map_unwire_nested(sub_map,
8011 sub_start, sub_end,
8012 user_wire,
8013 pmap, pmap_addr);
8014 } else {
8015 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8016 pmap_protect_options(
8017 map->pmap,
8018 tmp_entry.vme_start,
8019 tmp_entry.vme_end,
8020 VM_PROT_NONE,
8021 PMAP_OPTIONS_REMOVE,
8022 NULL);
8023 }
8024 vm_fault_unwire(map, &tmp_entry,
8025 VME_OBJECT(&tmp_entry) == kernel_object,
8026 map->pmap, tmp_entry.vme_start);
8027 }
8028
8029 vm_map_lock(map);
8030
8031 if (last_timestamp + 1 != map->timestamp) {
8032 /*
8033 * Find the entry again. It could have
8034 * been clipped after we unlocked the map.
8035 */
8036 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8037 assert((map != kernel_map) &&
8038 (!entry->is_sub_map));
8039 if (gap_start == FIND_GAP) {
8040 gap_start = s;
8041 }
8042 first_entry = first_entry->vme_next;
8043 s = first_entry->vme_start;
8044 } else {
8045 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8046 }
8047 } else {
8048 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8049 first_entry = entry;
8050 }
8051
8052 last_timestamp = map->timestamp;
8053
8054 entry = first_entry;
8055 while ((entry != vm_map_to_entry(map)) &&
8056 (entry->vme_start < tmp_entry.vme_end)) {
8057 assert(entry->in_transition);
8058 entry->in_transition = FALSE;
8059 if (entry->needs_wakeup) {
8060 entry->needs_wakeup = FALSE;
8061 need_wakeup = TRUE;
8062 }
8063 entry = entry->vme_next;
8064 }
8065 /*
8066 * We have unwired the entry(s). Go back and
8067 * delete them.
8068 */
8069 entry = first_entry;
8070 continue;
8071 }
8072
8073 /* entry is unwired */
8074 assert(entry->wired_count == 0);
8075 assert(entry->user_wired_count == 0);
8076
8077 assert(s == entry->vme_start);
8078
8079 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8080 /*
8081 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8082 * vm_map_delete(), some map entries might have been
8083 * transferred to a "zap_map", which doesn't have a
8084 * pmap. The original pmap has already been flushed
8085 * in the vm_map_delete() call targeting the original
8086 * map, but when we get to destroying the "zap_map",
8087 * we don't have any pmap to flush, so let's just skip
8088 * all this.
8089 */
8090 } else if (entry->is_sub_map) {
8091 if (entry->use_pmap) {
8092 #ifndef NO_NESTED_PMAP
8093 int pmap_flags;
8094
8095 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8096 /*
8097 * This is the final cleanup of the
8098 * address space being terminated.
8099 * No new mappings are expected and
8100 * we don't really need to unnest the
8101 * shared region (and lose the "global"
8102 * pmap mappings, if applicable).
8103 *
8104 * Tell the pmap layer that we're
8105 * "clean" wrt nesting.
8106 */
8107 pmap_flags = PMAP_UNNEST_CLEAN;
8108 } else {
8109 /*
8110 * We're unmapping part of the nested
8111 * shared region, so we can't keep the
8112 * nested pmap.
8113 */
8114 pmap_flags = 0;
8115 }
8116 pmap_unnest_options(
8117 map->pmap,
8118 (addr64_t)entry->vme_start,
8119 entry->vme_end - entry->vme_start,
8120 pmap_flags);
8121 #endif /* NO_NESTED_PMAP */
8122 if (map->mapped_in_other_pmaps &&
8123 os_ref_get_count(&map->map_refcnt) != 0) {
8124 /* clean up parent map/maps */
8125 vm_map_submap_pmap_clean(
8126 map, entry->vme_start,
8127 entry->vme_end,
8128 VME_SUBMAP(entry),
8129 VME_OFFSET(entry));
8130 }
8131 } else {
8132 vm_map_submap_pmap_clean(
8133 map, entry->vme_start, entry->vme_end,
8134 VME_SUBMAP(entry),
8135 VME_OFFSET(entry));
8136 }
8137 } else if (VME_OBJECT(entry) != kernel_object &&
8138 VME_OBJECT(entry) != compressor_object) {
8139 object = VME_OBJECT(entry);
8140 if (map->mapped_in_other_pmaps &&
8141 os_ref_get_count(&map->map_refcnt) != 0) {
8142 vm_object_pmap_protect_options(
8143 object, VME_OFFSET(entry),
8144 entry->vme_end - entry->vme_start,
8145 PMAP_NULL,
8146 entry->vme_start,
8147 VM_PROT_NONE,
8148 PMAP_OPTIONS_REMOVE);
8149 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8150 (map->pmap == kernel_pmap)) {
8151 /* Remove translations associated
8152 * with this range unless the entry
8153 * does not have an object, or
8154 * it's the kernel map or a descendant
8155 * since the platform could potentially
8156 * create "backdoor" mappings invisible
8157 * to the VM. It is expected that
8158 * objectless, non-kernel ranges
8159 * do not have such VM invisible
8160 * translations.
8161 */
8162 pmap_remove_options(map->pmap,
8163 (addr64_t)entry->vme_start,
8164 (addr64_t)entry->vme_end,
8165 PMAP_OPTIONS_REMOVE);
8166 }
8167 }
8168
8169 if (entry->iokit_acct) {
8170 /* alternate accounting */
8171 DTRACE_VM4(vm_map_iokit_unmapped_region,
8172 vm_map_t, map,
8173 vm_map_offset_t, entry->vme_start,
8174 vm_map_offset_t, entry->vme_end,
8175 int, VME_ALIAS(entry));
8176 vm_map_iokit_unmapped_region(map,
8177 (entry->vme_end -
8178 entry->vme_start));
8179 entry->iokit_acct = FALSE;
8180 entry->use_pmap = FALSE;
8181 }
8182
8183 /*
8184 * All pmap mappings for this map entry must have been
8185 * cleared by now.
8186 */
8187 #if DEBUG
8188 assert(vm_map_pmap_is_empty(map,
8189 entry->vme_start,
8190 entry->vme_end));
8191 #endif /* DEBUG */
8192
8193 next = entry->vme_next;
8194
8195 if (map->pmap == kernel_pmap &&
8196 os_ref_get_count(&map->map_refcnt) != 0 &&
8197 entry->vme_end < end &&
8198 (next == vm_map_to_entry(map) ||
8199 next->vme_start != entry->vme_end)) {
8200 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8201 "hole after %p at 0x%llx\n",
8202 map,
8203 (uint64_t)start,
8204 (uint64_t)end,
8205 entry,
8206 (uint64_t)entry->vme_end);
8207 }
8208
8209 /*
8210 * If the desired range didn't end with "entry", then there is a gap if
8211 * we wrapped around to the start of the map or if "entry" and "next"
8212 * aren't contiguous.
8213 *
8214 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8215 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8216 */
8217 if (gap_start == FIND_GAP &&
8218 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8219 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8220 gap_start = entry->vme_end;
8221 }
8222 s = next->vme_start;
8223 last_timestamp = map->timestamp;
8224
8225 if (entry->permanent) {
8226 /*
8227 * A permanent entry can not be removed, so leave it
8228 * in place but remove all access permissions.
8229 */
8230 entry->protection = VM_PROT_NONE;
8231 entry->max_protection = VM_PROT_NONE;
8232 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8233 zap_map != VM_MAP_NULL) {
8234 vm_map_size_t entry_size;
8235 /*
8236 * The caller wants to save the affected VM map entries
8237 * into the "zap_map". The caller will take care of
8238 * these entries.
8239 */
8240 /* unlink the entry from "map" ... */
8241 vm_map_store_entry_unlink(map, entry);
8242 /* ... and add it to the end of the "zap_map" */
8243 vm_map_store_entry_link(zap_map,
8244 vm_map_last_entry(zap_map),
8245 entry,
8246 VM_MAP_KERNEL_FLAGS_NONE);
8247 entry_size = entry->vme_end - entry->vme_start;
8248 map->size -= entry_size;
8249 zap_map->size += entry_size;
8250 /* we didn't unlock the map, so no timestamp increase */
8251 last_timestamp--;
8252 } else {
8253 vm_map_entry_delete(map, entry);
8254 /* vm_map_entry_delete unlocks the map */
8255 vm_map_lock(map);
8256 }
8257
8258 entry = next;
8259
8260 if (entry == vm_map_to_entry(map)) {
8261 break;
8262 }
8263 if (last_timestamp + 1 != map->timestamp) {
8264 /*
8265 * We are responsible for deleting everything
8266 * from the given space. If someone has interfered,
8267 * we pick up where we left off. Back fills should
8268 * be all right for anyone, except map_delete, and
8269 * we have to assume that the task has been fully
8270 * disabled before we get here
8271 */
8272 if (!vm_map_lookup_entry(map, s, &entry)) {
8273 entry = entry->vme_next;
8274
8275 /*
8276 * Nothing found for s. If we weren't already done, then there is a gap.
8277 */
8278 if (gap_start == FIND_GAP && s < end) {
8279 gap_start = s;
8280 }
8281 s = entry->vme_start;
8282 } else {
8283 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8284 }
8285 /*
8286 * others can not only allocate behind us, we can
8287 * also see coalesce while we don't have the map lock
8288 */
8289 if (entry == vm_map_to_entry(map)) {
8290 break;
8291 }
8292 }
8293 last_timestamp = map->timestamp;
8294 }
8295
8296 if (map->wait_for_space) {
8297 thread_wakeup((event_t) map);
8298 }
8299 /*
8300 * wake up anybody waiting on entries that we have already deleted.
8301 */
8302 if (need_wakeup) {
8303 vm_map_entry_wakeup(map);
8304 }
8305
8306 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8307 DTRACE_VM3(kern_vm_deallocate_gap,
8308 vm_map_offset_t, gap_start,
8309 vm_map_offset_t, save_start,
8310 vm_map_offset_t, save_end);
8311 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8312 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8313 }
8314 }
8315
8316 return KERN_SUCCESS;
8317 }
8318
8319 /*
8320 * vm_map_remove:
8321 *
8322 * Remove the given address range from the target map.
8323 * This is the exported form of vm_map_delete.
8324 */
8325 kern_return_t
8326 vm_map_remove(
8327 vm_map_t map,
8328 vm_map_offset_t start,
8329 vm_map_offset_t end,
8330 boolean_t flags)
8331 {
8332 kern_return_t result;
8333
8334 vm_map_lock(map);
8335 VM_MAP_RANGE_CHECK(map, start, end);
8336 /*
8337 * For the zone_map, the kernel controls the allocation/freeing of memory.
8338 * Any free to the zone_map should be within the bounds of the map and
8339 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8340 * free to the zone_map into a no-op, there is a problem and we should
8341 * panic.
8342 */
8343 if ((map == zone_map) && (start == end)) {
8344 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8345 }
8346 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8347 vm_map_unlock(map);
8348
8349 return result;
8350 }
8351
8352 /*
8353 * vm_map_remove_locked:
8354 *
8355 * Remove the given address range from the target locked map.
8356 * This is the exported form of vm_map_delete.
8357 */
8358 kern_return_t
8359 vm_map_remove_locked(
8360 vm_map_t map,
8361 vm_map_offset_t start,
8362 vm_map_offset_t end,
8363 boolean_t flags)
8364 {
8365 kern_return_t result;
8366
8367 VM_MAP_RANGE_CHECK(map, start, end);
8368 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8369 return result;
8370 }
8371
8372
8373 /*
8374 * Routine: vm_map_copy_allocate
8375 *
8376 * Description:
8377 * Allocates and initializes a map copy object.
8378 */
8379 static vm_map_copy_t
8380 vm_map_copy_allocate(void)
8381 {
8382 vm_map_copy_t new_copy;
8383
8384 new_copy = zalloc(vm_map_copy_zone);
8385 bzero(new_copy, sizeof(*new_copy));
8386 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8387 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8388 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8389 return new_copy;
8390 }
8391
8392 /*
8393 * Routine: vm_map_copy_discard
8394 *
8395 * Description:
8396 * Dispose of a map copy object (returned by
8397 * vm_map_copyin).
8398 */
8399 void
8400 vm_map_copy_discard(
8401 vm_map_copy_t copy)
8402 {
8403 if (copy == VM_MAP_COPY_NULL) {
8404 return;
8405 }
8406
8407 switch (copy->type) {
8408 case VM_MAP_COPY_ENTRY_LIST:
8409 while (vm_map_copy_first_entry(copy) !=
8410 vm_map_copy_to_entry(copy)) {
8411 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8412
8413 vm_map_copy_entry_unlink(copy, entry);
8414 if (entry->is_sub_map) {
8415 vm_map_deallocate(VME_SUBMAP(entry));
8416 } else {
8417 vm_object_deallocate(VME_OBJECT(entry));
8418 }
8419 vm_map_copy_entry_dispose(copy, entry);
8420 }
8421 break;
8422 case VM_MAP_COPY_OBJECT:
8423 vm_object_deallocate(copy->cpy_object);
8424 break;
8425 case VM_MAP_COPY_KERNEL_BUFFER:
8426
8427 /*
8428 * The vm_map_copy_t and possibly the data buffer were
8429 * allocated by a single call to kalloc(), i.e. the
8430 * vm_map_copy_t was not allocated out of the zone.
8431 */
8432 if (copy->size > msg_ool_size_small || copy->offset) {
8433 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8434 (long long)copy->size, (long long)copy->offset);
8435 }
8436 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8437 return;
8438 }
8439 zfree(vm_map_copy_zone, copy);
8440 }
8441
8442 /*
8443 * Routine: vm_map_copy_copy
8444 *
8445 * Description:
8446 * Move the information in a map copy object to
8447 * a new map copy object, leaving the old one
8448 * empty.
8449 *
8450 * This is used by kernel routines that need
8451 * to look at out-of-line data (in copyin form)
8452 * before deciding whether to return SUCCESS.
8453 * If the routine returns FAILURE, the original
8454 * copy object will be deallocated; therefore,
8455 * these routines must make a copy of the copy
8456 * object and leave the original empty so that
8457 * deallocation will not fail.
8458 */
8459 vm_map_copy_t
8460 vm_map_copy_copy(
8461 vm_map_copy_t copy)
8462 {
8463 vm_map_copy_t new_copy;
8464
8465 if (copy == VM_MAP_COPY_NULL) {
8466 return VM_MAP_COPY_NULL;
8467 }
8468
8469 /*
8470 * Allocate a new copy object, and copy the information
8471 * from the old one into it.
8472 */
8473
8474 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8475 *new_copy = *copy;
8476
8477 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8478 /*
8479 * The links in the entry chain must be
8480 * changed to point to the new copy object.
8481 */
8482 vm_map_copy_first_entry(copy)->vme_prev
8483 = vm_map_copy_to_entry(new_copy);
8484 vm_map_copy_last_entry(copy)->vme_next
8485 = vm_map_copy_to_entry(new_copy);
8486 }
8487
8488 /*
8489 * Change the old copy object into one that contains
8490 * nothing to be deallocated.
8491 */
8492 copy->type = VM_MAP_COPY_OBJECT;
8493 copy->cpy_object = VM_OBJECT_NULL;
8494
8495 /*
8496 * Return the new object.
8497 */
8498 return new_copy;
8499 }
8500
8501 static kern_return_t
8502 vm_map_overwrite_submap_recurse(
8503 vm_map_t dst_map,
8504 vm_map_offset_t dst_addr,
8505 vm_map_size_t dst_size)
8506 {
8507 vm_map_offset_t dst_end;
8508 vm_map_entry_t tmp_entry;
8509 vm_map_entry_t entry;
8510 kern_return_t result;
8511 boolean_t encountered_sub_map = FALSE;
8512
8513
8514
8515 /*
8516 * Verify that the destination is all writeable
8517 * initially. We have to trunc the destination
8518 * address and round the copy size or we'll end up
8519 * splitting entries in strange ways.
8520 */
8521
8522 dst_end = vm_map_round_page(dst_addr + dst_size,
8523 VM_MAP_PAGE_MASK(dst_map));
8524 vm_map_lock(dst_map);
8525
8526 start_pass_1:
8527 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8528 vm_map_unlock(dst_map);
8529 return KERN_INVALID_ADDRESS;
8530 }
8531
8532 vm_map_clip_start(dst_map,
8533 tmp_entry,
8534 vm_map_trunc_page(dst_addr,
8535 VM_MAP_PAGE_MASK(dst_map)));
8536 if (tmp_entry->is_sub_map) {
8537 /* clipping did unnest if needed */
8538 assert(!tmp_entry->use_pmap);
8539 }
8540
8541 for (entry = tmp_entry;;) {
8542 vm_map_entry_t next;
8543
8544 next = entry->vme_next;
8545 while (entry->is_sub_map) {
8546 vm_map_offset_t sub_start;
8547 vm_map_offset_t sub_end;
8548 vm_map_offset_t local_end;
8549
8550 if (entry->in_transition) {
8551 /*
8552 * Say that we are waiting, and wait for entry.
8553 */
8554 entry->needs_wakeup = TRUE;
8555 vm_map_entry_wait(dst_map, THREAD_UNINT);
8556
8557 goto start_pass_1;
8558 }
8559
8560 encountered_sub_map = TRUE;
8561 sub_start = VME_OFFSET(entry);
8562
8563 if (entry->vme_end < dst_end) {
8564 sub_end = entry->vme_end;
8565 } else {
8566 sub_end = dst_end;
8567 }
8568 sub_end -= entry->vme_start;
8569 sub_end += VME_OFFSET(entry);
8570 local_end = entry->vme_end;
8571 vm_map_unlock(dst_map);
8572
8573 result = vm_map_overwrite_submap_recurse(
8574 VME_SUBMAP(entry),
8575 sub_start,
8576 sub_end - sub_start);
8577
8578 if (result != KERN_SUCCESS) {
8579 return result;
8580 }
8581 if (dst_end <= entry->vme_end) {
8582 return KERN_SUCCESS;
8583 }
8584 vm_map_lock(dst_map);
8585 if (!vm_map_lookup_entry(dst_map, local_end,
8586 &tmp_entry)) {
8587 vm_map_unlock(dst_map);
8588 return KERN_INVALID_ADDRESS;
8589 }
8590 entry = tmp_entry;
8591 next = entry->vme_next;
8592 }
8593
8594 if (!(entry->protection & VM_PROT_WRITE)) {
8595 vm_map_unlock(dst_map);
8596 return KERN_PROTECTION_FAILURE;
8597 }
8598
8599 /*
8600 * If the entry is in transition, we must wait
8601 * for it to exit that state. Anything could happen
8602 * when we unlock the map, so start over.
8603 */
8604 if (entry->in_transition) {
8605 /*
8606 * Say that we are waiting, and wait for entry.
8607 */
8608 entry->needs_wakeup = TRUE;
8609 vm_map_entry_wait(dst_map, THREAD_UNINT);
8610
8611 goto start_pass_1;
8612 }
8613
8614 /*
8615 * our range is contained completely within this map entry
8616 */
8617 if (dst_end <= entry->vme_end) {
8618 vm_map_unlock(dst_map);
8619 return KERN_SUCCESS;
8620 }
8621 /*
8622 * check that range specified is contiguous region
8623 */
8624 if ((next == vm_map_to_entry(dst_map)) ||
8625 (next->vme_start != entry->vme_end)) {
8626 vm_map_unlock(dst_map);
8627 return KERN_INVALID_ADDRESS;
8628 }
8629
8630 /*
8631 * Check for permanent objects in the destination.
8632 */
8633 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8634 ((!VME_OBJECT(entry)->internal) ||
8635 (VME_OBJECT(entry)->true_share))) {
8636 if (encountered_sub_map) {
8637 vm_map_unlock(dst_map);
8638 return KERN_FAILURE;
8639 }
8640 }
8641
8642
8643 entry = next;
8644 }/* for */
8645 vm_map_unlock(dst_map);
8646 return KERN_SUCCESS;
8647 }
8648
8649 /*
8650 * Routine: vm_map_copy_overwrite
8651 *
8652 * Description:
8653 * Copy the memory described by the map copy
8654 * object (copy; returned by vm_map_copyin) onto
8655 * the specified destination region (dst_map, dst_addr).
8656 * The destination must be writeable.
8657 *
8658 * Unlike vm_map_copyout, this routine actually
8659 * writes over previously-mapped memory. If the
8660 * previous mapping was to a permanent (user-supplied)
8661 * memory object, it is preserved.
8662 *
8663 * The attributes (protection and inheritance) of the
8664 * destination region are preserved.
8665 *
8666 * If successful, consumes the copy object.
8667 * Otherwise, the caller is responsible for it.
8668 *
8669 * Implementation notes:
8670 * To overwrite aligned temporary virtual memory, it is
8671 * sufficient to remove the previous mapping and insert
8672 * the new copy. This replacement is done either on
8673 * the whole region (if no permanent virtual memory
8674 * objects are embedded in the destination region) or
8675 * in individual map entries.
8676 *
8677 * To overwrite permanent virtual memory , it is necessary
8678 * to copy each page, as the external memory management
8679 * interface currently does not provide any optimizations.
8680 *
8681 * Unaligned memory also has to be copied. It is possible
8682 * to use 'vm_trickery' to copy the aligned data. This is
8683 * not done but not hard to implement.
8684 *
8685 * Once a page of permanent memory has been overwritten,
8686 * it is impossible to interrupt this function; otherwise,
8687 * the call would be neither atomic nor location-independent.
8688 * The kernel-state portion of a user thread must be
8689 * interruptible.
8690 *
8691 * It may be expensive to forward all requests that might
8692 * overwrite permanent memory (vm_write, vm_copy) to
8693 * uninterruptible kernel threads. This routine may be
8694 * called by interruptible threads; however, success is
8695 * not guaranteed -- if the request cannot be performed
8696 * atomically and interruptibly, an error indication is
8697 * returned.
8698 */
8699
8700 static kern_return_t
8701 vm_map_copy_overwrite_nested(
8702 vm_map_t dst_map,
8703 vm_map_address_t dst_addr,
8704 vm_map_copy_t copy,
8705 boolean_t interruptible,
8706 pmap_t pmap,
8707 boolean_t discard_on_success)
8708 {
8709 vm_map_offset_t dst_end;
8710 vm_map_entry_t tmp_entry;
8711 vm_map_entry_t entry;
8712 kern_return_t kr;
8713 boolean_t aligned = TRUE;
8714 boolean_t contains_permanent_objects = FALSE;
8715 boolean_t encountered_sub_map = FALSE;
8716 vm_map_offset_t base_addr;
8717 vm_map_size_t copy_size;
8718 vm_map_size_t total_size;
8719
8720
8721 /*
8722 * Check for null copy object.
8723 */
8724
8725 if (copy == VM_MAP_COPY_NULL) {
8726 return KERN_SUCCESS;
8727 }
8728
8729 /*
8730 * Check for special kernel buffer allocated
8731 * by new_ipc_kmsg_copyin.
8732 */
8733
8734 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8735 return vm_map_copyout_kernel_buffer(
8736 dst_map, &dst_addr,
8737 copy, copy->size, TRUE, discard_on_success);
8738 }
8739
8740 /*
8741 * Only works for entry lists at the moment. Will
8742 * support page lists later.
8743 */
8744
8745 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8746
8747 if (copy->size == 0) {
8748 if (discard_on_success) {
8749 vm_map_copy_discard(copy);
8750 }
8751 return KERN_SUCCESS;
8752 }
8753
8754 /*
8755 * Verify that the destination is all writeable
8756 * initially. We have to trunc the destination
8757 * address and round the copy size or we'll end up
8758 * splitting entries in strange ways.
8759 */
8760
8761 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8762 VM_MAP_PAGE_MASK(dst_map)) ||
8763 !VM_MAP_PAGE_ALIGNED(copy->offset,
8764 VM_MAP_PAGE_MASK(dst_map)) ||
8765 !VM_MAP_PAGE_ALIGNED(dst_addr,
8766 VM_MAP_PAGE_MASK(dst_map))) {
8767 aligned = FALSE;
8768 dst_end = vm_map_round_page(dst_addr + copy->size,
8769 VM_MAP_PAGE_MASK(dst_map));
8770 } else {
8771 dst_end = dst_addr + copy->size;
8772 }
8773
8774 vm_map_lock(dst_map);
8775
8776 /* LP64todo - remove this check when vm_map_commpage64()
8777 * no longer has to stuff in a map_entry for the commpage
8778 * above the map's max_offset.
8779 */
8780 if (dst_addr >= dst_map->max_offset) {
8781 vm_map_unlock(dst_map);
8782 return KERN_INVALID_ADDRESS;
8783 }
8784
8785 start_pass_1:
8786 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8787 vm_map_unlock(dst_map);
8788 return KERN_INVALID_ADDRESS;
8789 }
8790 vm_map_clip_start(dst_map,
8791 tmp_entry,
8792 vm_map_trunc_page(dst_addr,
8793 VM_MAP_PAGE_MASK(dst_map)));
8794 for (entry = tmp_entry;;) {
8795 vm_map_entry_t next = entry->vme_next;
8796
8797 while (entry->is_sub_map) {
8798 vm_map_offset_t sub_start;
8799 vm_map_offset_t sub_end;
8800 vm_map_offset_t local_end;
8801
8802 if (entry->in_transition) {
8803 /*
8804 * Say that we are waiting, and wait for entry.
8805 */
8806 entry->needs_wakeup = TRUE;
8807 vm_map_entry_wait(dst_map, THREAD_UNINT);
8808
8809 goto start_pass_1;
8810 }
8811
8812 local_end = entry->vme_end;
8813 if (!(entry->needs_copy)) {
8814 /* if needs_copy we are a COW submap */
8815 /* in such a case we just replace so */
8816 /* there is no need for the follow- */
8817 /* ing check. */
8818 encountered_sub_map = TRUE;
8819 sub_start = VME_OFFSET(entry);
8820
8821 if (entry->vme_end < dst_end) {
8822 sub_end = entry->vme_end;
8823 } else {
8824 sub_end = dst_end;
8825 }
8826 sub_end -= entry->vme_start;
8827 sub_end += VME_OFFSET(entry);
8828 vm_map_unlock(dst_map);
8829
8830 kr = vm_map_overwrite_submap_recurse(
8831 VME_SUBMAP(entry),
8832 sub_start,
8833 sub_end - sub_start);
8834 if (kr != KERN_SUCCESS) {
8835 return kr;
8836 }
8837 vm_map_lock(dst_map);
8838 }
8839
8840 if (dst_end <= entry->vme_end) {
8841 goto start_overwrite;
8842 }
8843 if (!vm_map_lookup_entry(dst_map, local_end,
8844 &entry)) {
8845 vm_map_unlock(dst_map);
8846 return KERN_INVALID_ADDRESS;
8847 }
8848 next = entry->vme_next;
8849 }
8850
8851 if (!(entry->protection & VM_PROT_WRITE)) {
8852 vm_map_unlock(dst_map);
8853 return KERN_PROTECTION_FAILURE;
8854 }
8855
8856 /*
8857 * If the entry is in transition, we must wait
8858 * for it to exit that state. Anything could happen
8859 * when we unlock the map, so start over.
8860 */
8861 if (entry->in_transition) {
8862 /*
8863 * Say that we are waiting, and wait for entry.
8864 */
8865 entry->needs_wakeup = TRUE;
8866 vm_map_entry_wait(dst_map, THREAD_UNINT);
8867
8868 goto start_pass_1;
8869 }
8870
8871 /*
8872 * our range is contained completely within this map entry
8873 */
8874 if (dst_end <= entry->vme_end) {
8875 break;
8876 }
8877 /*
8878 * check that range specified is contiguous region
8879 */
8880 if ((next == vm_map_to_entry(dst_map)) ||
8881 (next->vme_start != entry->vme_end)) {
8882 vm_map_unlock(dst_map);
8883 return KERN_INVALID_ADDRESS;
8884 }
8885
8886
8887 /*
8888 * Check for permanent objects in the destination.
8889 */
8890 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8891 ((!VME_OBJECT(entry)->internal) ||
8892 (VME_OBJECT(entry)->true_share))) {
8893 contains_permanent_objects = TRUE;
8894 }
8895
8896 entry = next;
8897 }/* for */
8898
8899 start_overwrite:
8900 /*
8901 * If there are permanent objects in the destination, then
8902 * the copy cannot be interrupted.
8903 */
8904
8905 if (interruptible && contains_permanent_objects) {
8906 vm_map_unlock(dst_map);
8907 return KERN_FAILURE; /* XXX */
8908 }
8909
8910 /*
8911 *
8912 * Make a second pass, overwriting the data
8913 * At the beginning of each loop iteration,
8914 * the next entry to be overwritten is "tmp_entry"
8915 * (initially, the value returned from the lookup above),
8916 * and the starting address expected in that entry
8917 * is "start".
8918 */
8919
8920 total_size = copy->size;
8921 if (encountered_sub_map) {
8922 copy_size = 0;
8923 /* re-calculate tmp_entry since we've had the map */
8924 /* unlocked */
8925 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8926 vm_map_unlock(dst_map);
8927 return KERN_INVALID_ADDRESS;
8928 }
8929 } else {
8930 copy_size = copy->size;
8931 }
8932
8933 base_addr = dst_addr;
8934 while (TRUE) {
8935 /* deconstruct the copy object and do in parts */
8936 /* only in sub_map, interruptable case */
8937 vm_map_entry_t copy_entry;
8938 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8939 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8940 int nentries;
8941 int remaining_entries = 0;
8942 vm_map_offset_t new_offset = 0;
8943
8944 for (entry = tmp_entry; copy_size == 0;) {
8945 vm_map_entry_t next;
8946
8947 next = entry->vme_next;
8948
8949 /* tmp_entry and base address are moved along */
8950 /* each time we encounter a sub-map. Otherwise */
8951 /* entry can outpase tmp_entry, and the copy_size */
8952 /* may reflect the distance between them */
8953 /* if the current entry is found to be in transition */
8954 /* we will start over at the beginning or the last */
8955 /* encounter of a submap as dictated by base_addr */
8956 /* we will zero copy_size accordingly. */
8957 if (entry->in_transition) {
8958 /*
8959 * Say that we are waiting, and wait for entry.
8960 */
8961 entry->needs_wakeup = TRUE;
8962 vm_map_entry_wait(dst_map, THREAD_UNINT);
8963
8964 if (!vm_map_lookup_entry(dst_map, base_addr,
8965 &tmp_entry)) {
8966 vm_map_unlock(dst_map);
8967 return KERN_INVALID_ADDRESS;
8968 }
8969 copy_size = 0;
8970 entry = tmp_entry;
8971 continue;
8972 }
8973 if (entry->is_sub_map) {
8974 vm_map_offset_t sub_start;
8975 vm_map_offset_t sub_end;
8976 vm_map_offset_t local_end;
8977
8978 if (entry->needs_copy) {
8979 /* if this is a COW submap */
8980 /* just back the range with a */
8981 /* anonymous entry */
8982 if (entry->vme_end < dst_end) {
8983 sub_end = entry->vme_end;
8984 } else {
8985 sub_end = dst_end;
8986 }
8987 if (entry->vme_start < base_addr) {
8988 sub_start = base_addr;
8989 } else {
8990 sub_start = entry->vme_start;
8991 }
8992 vm_map_clip_end(
8993 dst_map, entry, sub_end);
8994 vm_map_clip_start(
8995 dst_map, entry, sub_start);
8996 assert(!entry->use_pmap);
8997 assert(!entry->iokit_acct);
8998 entry->use_pmap = TRUE;
8999 entry->is_sub_map = FALSE;
9000 vm_map_deallocate(
9001 VME_SUBMAP(entry));
9002 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9003 VME_OFFSET_SET(entry, 0);
9004 entry->is_shared = FALSE;
9005 entry->needs_copy = FALSE;
9006 entry->protection = VM_PROT_DEFAULT;
9007 entry->max_protection = VM_PROT_ALL;
9008 entry->wired_count = 0;
9009 entry->user_wired_count = 0;
9010 if (entry->inheritance
9011 == VM_INHERIT_SHARE) {
9012 entry->inheritance = VM_INHERIT_COPY;
9013 }
9014 continue;
9015 }
9016 /* first take care of any non-sub_map */
9017 /* entries to send */
9018 if (base_addr < entry->vme_start) {
9019 /* stuff to send */
9020 copy_size =
9021 entry->vme_start - base_addr;
9022 break;
9023 }
9024 sub_start = VME_OFFSET(entry);
9025
9026 if (entry->vme_end < dst_end) {
9027 sub_end = entry->vme_end;
9028 } else {
9029 sub_end = dst_end;
9030 }
9031 sub_end -= entry->vme_start;
9032 sub_end += VME_OFFSET(entry);
9033 local_end = entry->vme_end;
9034 vm_map_unlock(dst_map);
9035 copy_size = sub_end - sub_start;
9036
9037 /* adjust the copy object */
9038 if (total_size > copy_size) {
9039 vm_map_size_t local_size = 0;
9040 vm_map_size_t entry_size;
9041
9042 nentries = 1;
9043 new_offset = copy->offset;
9044 copy_entry = vm_map_copy_first_entry(copy);
9045 while (copy_entry !=
9046 vm_map_copy_to_entry(copy)) {
9047 entry_size = copy_entry->vme_end -
9048 copy_entry->vme_start;
9049 if ((local_size < copy_size) &&
9050 ((local_size + entry_size)
9051 >= copy_size)) {
9052 vm_map_copy_clip_end(copy,
9053 copy_entry,
9054 copy_entry->vme_start +
9055 (copy_size - local_size));
9056 entry_size = copy_entry->vme_end -
9057 copy_entry->vme_start;
9058 local_size += entry_size;
9059 new_offset += entry_size;
9060 }
9061 if (local_size >= copy_size) {
9062 next_copy = copy_entry->vme_next;
9063 copy_entry->vme_next =
9064 vm_map_copy_to_entry(copy);
9065 previous_prev =
9066 copy->cpy_hdr.links.prev;
9067 copy->cpy_hdr.links.prev = copy_entry;
9068 copy->size = copy_size;
9069 remaining_entries =
9070 copy->cpy_hdr.nentries;
9071 remaining_entries -= nentries;
9072 copy->cpy_hdr.nentries = nentries;
9073 break;
9074 } else {
9075 local_size += entry_size;
9076 new_offset += entry_size;
9077 nentries++;
9078 }
9079 copy_entry = copy_entry->vme_next;
9080 }
9081 }
9082
9083 if ((entry->use_pmap) && (pmap == NULL)) {
9084 kr = vm_map_copy_overwrite_nested(
9085 VME_SUBMAP(entry),
9086 sub_start,
9087 copy,
9088 interruptible,
9089 VME_SUBMAP(entry)->pmap,
9090 TRUE);
9091 } else if (pmap != NULL) {
9092 kr = vm_map_copy_overwrite_nested(
9093 VME_SUBMAP(entry),
9094 sub_start,
9095 copy,
9096 interruptible, pmap,
9097 TRUE);
9098 } else {
9099 kr = vm_map_copy_overwrite_nested(
9100 VME_SUBMAP(entry),
9101 sub_start,
9102 copy,
9103 interruptible,
9104 dst_map->pmap,
9105 TRUE);
9106 }
9107 if (kr != KERN_SUCCESS) {
9108 if (next_copy != NULL) {
9109 copy->cpy_hdr.nentries +=
9110 remaining_entries;
9111 copy->cpy_hdr.links.prev->vme_next =
9112 next_copy;
9113 copy->cpy_hdr.links.prev
9114 = previous_prev;
9115 copy->size = total_size;
9116 }
9117 return kr;
9118 }
9119 if (dst_end <= local_end) {
9120 return KERN_SUCCESS;
9121 }
9122 /* otherwise copy no longer exists, it was */
9123 /* destroyed after successful copy_overwrite */
9124 copy = vm_map_copy_allocate();
9125 copy->type = VM_MAP_COPY_ENTRY_LIST;
9126 copy->offset = new_offset;
9127
9128 /*
9129 * XXX FBDP
9130 * this does not seem to deal with
9131 * the VM map store (R&B tree)
9132 */
9133
9134 total_size -= copy_size;
9135 copy_size = 0;
9136 /* put back remainder of copy in container */
9137 if (next_copy != NULL) {
9138 copy->cpy_hdr.nentries = remaining_entries;
9139 copy->cpy_hdr.links.next = next_copy;
9140 copy->cpy_hdr.links.prev = previous_prev;
9141 copy->size = total_size;
9142 next_copy->vme_prev =
9143 vm_map_copy_to_entry(copy);
9144 next_copy = NULL;
9145 }
9146 base_addr = local_end;
9147 vm_map_lock(dst_map);
9148 if (!vm_map_lookup_entry(dst_map,
9149 local_end, &tmp_entry)) {
9150 vm_map_unlock(dst_map);
9151 return KERN_INVALID_ADDRESS;
9152 }
9153 entry = tmp_entry;
9154 continue;
9155 }
9156 if (dst_end <= entry->vme_end) {
9157 copy_size = dst_end - base_addr;
9158 break;
9159 }
9160
9161 if ((next == vm_map_to_entry(dst_map)) ||
9162 (next->vme_start != entry->vme_end)) {
9163 vm_map_unlock(dst_map);
9164 return KERN_INVALID_ADDRESS;
9165 }
9166
9167 entry = next;
9168 }/* for */
9169
9170 next_copy = NULL;
9171 nentries = 1;
9172
9173 /* adjust the copy object */
9174 if (total_size > copy_size) {
9175 vm_map_size_t local_size = 0;
9176 vm_map_size_t entry_size;
9177
9178 new_offset = copy->offset;
9179 copy_entry = vm_map_copy_first_entry(copy);
9180 while (copy_entry != vm_map_copy_to_entry(copy)) {
9181 entry_size = copy_entry->vme_end -
9182 copy_entry->vme_start;
9183 if ((local_size < copy_size) &&
9184 ((local_size + entry_size)
9185 >= copy_size)) {
9186 vm_map_copy_clip_end(copy, copy_entry,
9187 copy_entry->vme_start +
9188 (copy_size - local_size));
9189 entry_size = copy_entry->vme_end -
9190 copy_entry->vme_start;
9191 local_size += entry_size;
9192 new_offset += entry_size;
9193 }
9194 if (local_size >= copy_size) {
9195 next_copy = copy_entry->vme_next;
9196 copy_entry->vme_next =
9197 vm_map_copy_to_entry(copy);
9198 previous_prev =
9199 copy->cpy_hdr.links.prev;
9200 copy->cpy_hdr.links.prev = copy_entry;
9201 copy->size = copy_size;
9202 remaining_entries =
9203 copy->cpy_hdr.nentries;
9204 remaining_entries -= nentries;
9205 copy->cpy_hdr.nentries = nentries;
9206 break;
9207 } else {
9208 local_size += entry_size;
9209 new_offset += entry_size;
9210 nentries++;
9211 }
9212 copy_entry = copy_entry->vme_next;
9213 }
9214 }
9215
9216 if (aligned) {
9217 pmap_t local_pmap;
9218
9219 if (pmap) {
9220 local_pmap = pmap;
9221 } else {
9222 local_pmap = dst_map->pmap;
9223 }
9224
9225 if ((kr = vm_map_copy_overwrite_aligned(
9226 dst_map, tmp_entry, copy,
9227 base_addr, local_pmap)) != KERN_SUCCESS) {
9228 if (next_copy != NULL) {
9229 copy->cpy_hdr.nentries +=
9230 remaining_entries;
9231 copy->cpy_hdr.links.prev->vme_next =
9232 next_copy;
9233 copy->cpy_hdr.links.prev =
9234 previous_prev;
9235 copy->size += copy_size;
9236 }
9237 return kr;
9238 }
9239 vm_map_unlock(dst_map);
9240 } else {
9241 /*
9242 * Performance gain:
9243 *
9244 * if the copy and dst address are misaligned but the same
9245 * offset within the page we can copy_not_aligned the
9246 * misaligned parts and copy aligned the rest. If they are
9247 * aligned but len is unaligned we simply need to copy
9248 * the end bit unaligned. We'll need to split the misaligned
9249 * bits of the region in this case !
9250 */
9251 /* ALWAYS UNLOCKS THE dst_map MAP */
9252 kr = vm_map_copy_overwrite_unaligned(
9253 dst_map,
9254 tmp_entry,
9255 copy,
9256 base_addr,
9257 discard_on_success);
9258 if (kr != KERN_SUCCESS) {
9259 if (next_copy != NULL) {
9260 copy->cpy_hdr.nentries +=
9261 remaining_entries;
9262 copy->cpy_hdr.links.prev->vme_next =
9263 next_copy;
9264 copy->cpy_hdr.links.prev =
9265 previous_prev;
9266 copy->size += copy_size;
9267 }
9268 return kr;
9269 }
9270 }
9271 total_size -= copy_size;
9272 if (total_size == 0) {
9273 break;
9274 }
9275 base_addr += copy_size;
9276 copy_size = 0;
9277 copy->offset = new_offset;
9278 if (next_copy != NULL) {
9279 copy->cpy_hdr.nentries = remaining_entries;
9280 copy->cpy_hdr.links.next = next_copy;
9281 copy->cpy_hdr.links.prev = previous_prev;
9282 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9283 copy->size = total_size;
9284 }
9285 vm_map_lock(dst_map);
9286 while (TRUE) {
9287 if (!vm_map_lookup_entry(dst_map,
9288 base_addr, &tmp_entry)) {
9289 vm_map_unlock(dst_map);
9290 return KERN_INVALID_ADDRESS;
9291 }
9292 if (tmp_entry->in_transition) {
9293 entry->needs_wakeup = TRUE;
9294 vm_map_entry_wait(dst_map, THREAD_UNINT);
9295 } else {
9296 break;
9297 }
9298 }
9299 vm_map_clip_start(dst_map,
9300 tmp_entry,
9301 vm_map_trunc_page(base_addr,
9302 VM_MAP_PAGE_MASK(dst_map)));
9303
9304 entry = tmp_entry;
9305 } /* while */
9306
9307 /*
9308 * Throw away the vm_map_copy object
9309 */
9310 if (discard_on_success) {
9311 vm_map_copy_discard(copy);
9312 }
9313
9314 return KERN_SUCCESS;
9315 }/* vm_map_copy_overwrite */
9316
9317 kern_return_t
9318 vm_map_copy_overwrite(
9319 vm_map_t dst_map,
9320 vm_map_offset_t dst_addr,
9321 vm_map_copy_t copy,
9322 boolean_t interruptible)
9323 {
9324 vm_map_size_t head_size, tail_size;
9325 vm_map_copy_t head_copy, tail_copy;
9326 vm_map_offset_t head_addr, tail_addr;
9327 vm_map_entry_t entry;
9328 kern_return_t kr;
9329 vm_map_offset_t effective_page_mask, effective_page_size;
9330
9331 head_size = 0;
9332 tail_size = 0;
9333 head_copy = NULL;
9334 tail_copy = NULL;
9335 head_addr = 0;
9336 tail_addr = 0;
9337
9338 if (interruptible ||
9339 copy == VM_MAP_COPY_NULL ||
9340 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9341 /*
9342 * We can't split the "copy" map if we're interruptible
9343 * or if we don't have a "copy" map...
9344 */
9345 blunt_copy:
9346 return vm_map_copy_overwrite_nested(dst_map,
9347 dst_addr,
9348 copy,
9349 interruptible,
9350 (pmap_t) NULL,
9351 TRUE);
9352 }
9353
9354 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9355 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9356 effective_page_mask);
9357 effective_page_size = effective_page_mask + 1;
9358
9359 if (copy->size < 3 * effective_page_size) {
9360 /*
9361 * Too small to bother with optimizing...
9362 */
9363 goto blunt_copy;
9364 }
9365
9366 if ((dst_addr & effective_page_mask) !=
9367 (copy->offset & effective_page_mask)) {
9368 /*
9369 * Incompatible mis-alignment of source and destination...
9370 */
9371 goto blunt_copy;
9372 }
9373
9374 /*
9375 * Proper alignment or identical mis-alignment at the beginning.
9376 * Let's try and do a small unaligned copy first (if needed)
9377 * and then an aligned copy for the rest.
9378 */
9379 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9380 head_addr = dst_addr;
9381 head_size = (effective_page_size -
9382 (copy->offset & effective_page_mask));
9383 head_size = MIN(head_size, copy->size);
9384 }
9385 if (!vm_map_page_aligned(copy->offset + copy->size,
9386 effective_page_mask)) {
9387 /*
9388 * Mis-alignment at the end.
9389 * Do an aligned copy up to the last page and
9390 * then an unaligned copy for the remaining bytes.
9391 */
9392 tail_size = ((copy->offset + copy->size) &
9393 effective_page_mask);
9394 tail_size = MIN(tail_size, copy->size);
9395 tail_addr = dst_addr + copy->size - tail_size;
9396 assert(tail_addr >= head_addr + head_size);
9397 }
9398 assert(head_size + tail_size <= copy->size);
9399
9400 if (head_size + tail_size == copy->size) {
9401 /*
9402 * It's all unaligned, no optimization possible...
9403 */
9404 goto blunt_copy;
9405 }
9406
9407 /*
9408 * Can't optimize if there are any submaps in the
9409 * destination due to the way we free the "copy" map
9410 * progressively in vm_map_copy_overwrite_nested()
9411 * in that case.
9412 */
9413 vm_map_lock_read(dst_map);
9414 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9415 vm_map_unlock_read(dst_map);
9416 goto blunt_copy;
9417 }
9418 for (;
9419 (entry != vm_map_copy_to_entry(copy) &&
9420 entry->vme_start < dst_addr + copy->size);
9421 entry = entry->vme_next) {
9422 if (entry->is_sub_map) {
9423 vm_map_unlock_read(dst_map);
9424 goto blunt_copy;
9425 }
9426 }
9427 vm_map_unlock_read(dst_map);
9428
9429 if (head_size) {
9430 /*
9431 * Unaligned copy of the first "head_size" bytes, to reach
9432 * a page boundary.
9433 */
9434
9435 /*
9436 * Extract "head_copy" out of "copy".
9437 */
9438 head_copy = vm_map_copy_allocate();
9439 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9440 head_copy->cpy_hdr.entries_pageable =
9441 copy->cpy_hdr.entries_pageable;
9442 vm_map_store_init(&head_copy->cpy_hdr);
9443
9444 entry = vm_map_copy_first_entry(copy);
9445 if (entry->vme_end < copy->offset + head_size) {
9446 head_size = entry->vme_end - copy->offset;
9447 }
9448
9449 head_copy->offset = copy->offset;
9450 head_copy->size = head_size;
9451 copy->offset += head_size;
9452 copy->size -= head_size;
9453
9454 vm_map_copy_clip_end(copy, entry, copy->offset);
9455 vm_map_copy_entry_unlink(copy, entry);
9456 vm_map_copy_entry_link(head_copy,
9457 vm_map_copy_to_entry(head_copy),
9458 entry);
9459
9460 /*
9461 * Do the unaligned copy.
9462 */
9463 kr = vm_map_copy_overwrite_nested(dst_map,
9464 head_addr,
9465 head_copy,
9466 interruptible,
9467 (pmap_t) NULL,
9468 FALSE);
9469 if (kr != KERN_SUCCESS) {
9470 goto done;
9471 }
9472 }
9473
9474 if (tail_size) {
9475 /*
9476 * Extract "tail_copy" out of "copy".
9477 */
9478 tail_copy = vm_map_copy_allocate();
9479 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9480 tail_copy->cpy_hdr.entries_pageable =
9481 copy->cpy_hdr.entries_pageable;
9482 vm_map_store_init(&tail_copy->cpy_hdr);
9483
9484 tail_copy->offset = copy->offset + copy->size - tail_size;
9485 tail_copy->size = tail_size;
9486
9487 copy->size -= tail_size;
9488
9489 entry = vm_map_copy_last_entry(copy);
9490 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9491 entry = vm_map_copy_last_entry(copy);
9492 vm_map_copy_entry_unlink(copy, entry);
9493 vm_map_copy_entry_link(tail_copy,
9494 vm_map_copy_last_entry(tail_copy),
9495 entry);
9496 }
9497
9498 /*
9499 * Copy most (or possibly all) of the data.
9500 */
9501 kr = vm_map_copy_overwrite_nested(dst_map,
9502 dst_addr + head_size,
9503 copy,
9504 interruptible,
9505 (pmap_t) NULL,
9506 FALSE);
9507 if (kr != KERN_SUCCESS) {
9508 goto done;
9509 }
9510
9511 if (tail_size) {
9512 kr = vm_map_copy_overwrite_nested(dst_map,
9513 tail_addr,
9514 tail_copy,
9515 interruptible,
9516 (pmap_t) NULL,
9517 FALSE);
9518 }
9519
9520 done:
9521 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9522 if (kr == KERN_SUCCESS) {
9523 /*
9524 * Discard all the copy maps.
9525 */
9526 if (head_copy) {
9527 vm_map_copy_discard(head_copy);
9528 head_copy = NULL;
9529 }
9530 vm_map_copy_discard(copy);
9531 if (tail_copy) {
9532 vm_map_copy_discard(tail_copy);
9533 tail_copy = NULL;
9534 }
9535 } else {
9536 /*
9537 * Re-assemble the original copy map.
9538 */
9539 if (head_copy) {
9540 entry = vm_map_copy_first_entry(head_copy);
9541 vm_map_copy_entry_unlink(head_copy, entry);
9542 vm_map_copy_entry_link(copy,
9543 vm_map_copy_to_entry(copy),
9544 entry);
9545 copy->offset -= head_size;
9546 copy->size += head_size;
9547 vm_map_copy_discard(head_copy);
9548 head_copy = NULL;
9549 }
9550 if (tail_copy) {
9551 entry = vm_map_copy_last_entry(tail_copy);
9552 vm_map_copy_entry_unlink(tail_copy, entry);
9553 vm_map_copy_entry_link(copy,
9554 vm_map_copy_last_entry(copy),
9555 entry);
9556 copy->size += tail_size;
9557 vm_map_copy_discard(tail_copy);
9558 tail_copy = NULL;
9559 }
9560 }
9561 return kr;
9562 }
9563
9564
9565 /*
9566 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9567 *
9568 * Decription:
9569 * Physically copy unaligned data
9570 *
9571 * Implementation:
9572 * Unaligned parts of pages have to be physically copied. We use
9573 * a modified form of vm_fault_copy (which understands none-aligned
9574 * page offsets and sizes) to do the copy. We attempt to copy as
9575 * much memory in one go as possibly, however vm_fault_copy copies
9576 * within 1 memory object so we have to find the smaller of "amount left"
9577 * "source object data size" and "target object data size". With
9578 * unaligned data we don't need to split regions, therefore the source
9579 * (copy) object should be one map entry, the target range may be split
9580 * over multiple map entries however. In any event we are pessimistic
9581 * about these assumptions.
9582 *
9583 * Assumptions:
9584 * dst_map is locked on entry and is return locked on success,
9585 * unlocked on error.
9586 */
9587
9588 static kern_return_t
9589 vm_map_copy_overwrite_unaligned(
9590 vm_map_t dst_map,
9591 vm_map_entry_t entry,
9592 vm_map_copy_t copy,
9593 vm_map_offset_t start,
9594 boolean_t discard_on_success)
9595 {
9596 vm_map_entry_t copy_entry;
9597 vm_map_entry_t copy_entry_next;
9598 vm_map_version_t version;
9599 vm_object_t dst_object;
9600 vm_object_offset_t dst_offset;
9601 vm_object_offset_t src_offset;
9602 vm_object_offset_t entry_offset;
9603 vm_map_offset_t entry_end;
9604 vm_map_size_t src_size,
9605 dst_size,
9606 copy_size,
9607 amount_left;
9608 kern_return_t kr = KERN_SUCCESS;
9609
9610
9611 copy_entry = vm_map_copy_first_entry(copy);
9612
9613 vm_map_lock_write_to_read(dst_map);
9614
9615 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9616 amount_left = copy->size;
9617 /*
9618 * unaligned so we never clipped this entry, we need the offset into
9619 * the vm_object not just the data.
9620 */
9621 while (amount_left > 0) {
9622 if (entry == vm_map_to_entry(dst_map)) {
9623 vm_map_unlock_read(dst_map);
9624 return KERN_INVALID_ADDRESS;
9625 }
9626
9627 /* "start" must be within the current map entry */
9628 assert((start >= entry->vme_start) && (start < entry->vme_end));
9629
9630 dst_offset = start - entry->vme_start;
9631
9632 dst_size = entry->vme_end - start;
9633
9634 src_size = copy_entry->vme_end -
9635 (copy_entry->vme_start + src_offset);
9636
9637 if (dst_size < src_size) {
9638 /*
9639 * we can only copy dst_size bytes before
9640 * we have to get the next destination entry
9641 */
9642 copy_size = dst_size;
9643 } else {
9644 /*
9645 * we can only copy src_size bytes before
9646 * we have to get the next source copy entry
9647 */
9648 copy_size = src_size;
9649 }
9650
9651 if (copy_size > amount_left) {
9652 copy_size = amount_left;
9653 }
9654 /*
9655 * Entry needs copy, create a shadow shadow object for
9656 * Copy on write region.
9657 */
9658 if (entry->needs_copy &&
9659 ((entry->protection & VM_PROT_WRITE) != 0)) {
9660 if (vm_map_lock_read_to_write(dst_map)) {
9661 vm_map_lock_read(dst_map);
9662 goto RetryLookup;
9663 }
9664 VME_OBJECT_SHADOW(entry,
9665 (vm_map_size_t)(entry->vme_end
9666 - entry->vme_start));
9667 entry->needs_copy = FALSE;
9668 vm_map_lock_write_to_read(dst_map);
9669 }
9670 dst_object = VME_OBJECT(entry);
9671 /*
9672 * unlike with the virtual (aligned) copy we're going
9673 * to fault on it therefore we need a target object.
9674 */
9675 if (dst_object == VM_OBJECT_NULL) {
9676 if (vm_map_lock_read_to_write(dst_map)) {
9677 vm_map_lock_read(dst_map);
9678 goto RetryLookup;
9679 }
9680 dst_object = vm_object_allocate((vm_map_size_t)
9681 entry->vme_end - entry->vme_start);
9682 VME_OBJECT_SET(entry, dst_object);
9683 VME_OFFSET_SET(entry, 0);
9684 assert(entry->use_pmap);
9685 vm_map_lock_write_to_read(dst_map);
9686 }
9687 /*
9688 * Take an object reference and unlock map. The "entry" may
9689 * disappear or change when the map is unlocked.
9690 */
9691 vm_object_reference(dst_object);
9692 version.main_timestamp = dst_map->timestamp;
9693 entry_offset = VME_OFFSET(entry);
9694 entry_end = entry->vme_end;
9695 vm_map_unlock_read(dst_map);
9696 /*
9697 * Copy as much as possible in one pass
9698 */
9699 kr = vm_fault_copy(
9700 VME_OBJECT(copy_entry),
9701 VME_OFFSET(copy_entry) + src_offset,
9702 &copy_size,
9703 dst_object,
9704 entry_offset + dst_offset,
9705 dst_map,
9706 &version,
9707 THREAD_UNINT );
9708
9709 start += copy_size;
9710 src_offset += copy_size;
9711 amount_left -= copy_size;
9712 /*
9713 * Release the object reference
9714 */
9715 vm_object_deallocate(dst_object);
9716 /*
9717 * If a hard error occurred, return it now
9718 */
9719 if (kr != KERN_SUCCESS) {
9720 return kr;
9721 }
9722
9723 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9724 || amount_left == 0) {
9725 /*
9726 * all done with this copy entry, dispose.
9727 */
9728 copy_entry_next = copy_entry->vme_next;
9729
9730 if (discard_on_success) {
9731 vm_map_copy_entry_unlink(copy, copy_entry);
9732 assert(!copy_entry->is_sub_map);
9733 vm_object_deallocate(VME_OBJECT(copy_entry));
9734 vm_map_copy_entry_dispose(copy, copy_entry);
9735 }
9736
9737 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9738 amount_left) {
9739 /*
9740 * not finished copying but run out of source
9741 */
9742 return KERN_INVALID_ADDRESS;
9743 }
9744
9745 copy_entry = copy_entry_next;
9746
9747 src_offset = 0;
9748 }
9749
9750 if (amount_left == 0) {
9751 return KERN_SUCCESS;
9752 }
9753
9754 vm_map_lock_read(dst_map);
9755 if (version.main_timestamp == dst_map->timestamp) {
9756 if (start == entry_end) {
9757 /*
9758 * destination region is split. Use the version
9759 * information to avoid a lookup in the normal
9760 * case.
9761 */
9762 entry = entry->vme_next;
9763 /*
9764 * should be contiguous. Fail if we encounter
9765 * a hole in the destination.
9766 */
9767 if (start != entry->vme_start) {
9768 vm_map_unlock_read(dst_map);
9769 return KERN_INVALID_ADDRESS;
9770 }
9771 }
9772 } else {
9773 /*
9774 * Map version check failed.
9775 * we must lookup the entry because somebody
9776 * might have changed the map behind our backs.
9777 */
9778 RetryLookup:
9779 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9780 vm_map_unlock_read(dst_map);
9781 return KERN_INVALID_ADDRESS;
9782 }
9783 }
9784 }/* while */
9785
9786 return KERN_SUCCESS;
9787 }/* vm_map_copy_overwrite_unaligned */
9788
9789 /*
9790 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9791 *
9792 * Description:
9793 * Does all the vm_trickery possible for whole pages.
9794 *
9795 * Implementation:
9796 *
9797 * If there are no permanent objects in the destination,
9798 * and the source and destination map entry zones match,
9799 * and the destination map entry is not shared,
9800 * then the map entries can be deleted and replaced
9801 * with those from the copy. The following code is the
9802 * basic idea of what to do, but there are lots of annoying
9803 * little details about getting protection and inheritance
9804 * right. Should add protection, inheritance, and sharing checks
9805 * to the above pass and make sure that no wiring is involved.
9806 */
9807
9808 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9809 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9810 int vm_map_copy_overwrite_aligned_src_large = 0;
9811
9812 static kern_return_t
9813 vm_map_copy_overwrite_aligned(
9814 vm_map_t dst_map,
9815 vm_map_entry_t tmp_entry,
9816 vm_map_copy_t copy,
9817 vm_map_offset_t start,
9818 __unused pmap_t pmap)
9819 {
9820 vm_object_t object;
9821 vm_map_entry_t copy_entry;
9822 vm_map_size_t copy_size;
9823 vm_map_size_t size;
9824 vm_map_entry_t entry;
9825
9826 while ((copy_entry = vm_map_copy_first_entry(copy))
9827 != vm_map_copy_to_entry(copy)) {
9828 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9829
9830 entry = tmp_entry;
9831 if (entry->is_sub_map) {
9832 /* unnested when clipped earlier */
9833 assert(!entry->use_pmap);
9834 }
9835 if (entry == vm_map_to_entry(dst_map)) {
9836 vm_map_unlock(dst_map);
9837 return KERN_INVALID_ADDRESS;
9838 }
9839 size = (entry->vme_end - entry->vme_start);
9840 /*
9841 * Make sure that no holes popped up in the
9842 * address map, and that the protection is
9843 * still valid, in case the map was unlocked
9844 * earlier.
9845 */
9846
9847 if ((entry->vme_start != start) || ((entry->is_sub_map)
9848 && !entry->needs_copy)) {
9849 vm_map_unlock(dst_map);
9850 return KERN_INVALID_ADDRESS;
9851 }
9852 assert(entry != vm_map_to_entry(dst_map));
9853
9854 /*
9855 * Check protection again
9856 */
9857
9858 if (!(entry->protection & VM_PROT_WRITE)) {
9859 vm_map_unlock(dst_map);
9860 return KERN_PROTECTION_FAILURE;
9861 }
9862
9863 /*
9864 * Adjust to source size first
9865 */
9866
9867 if (copy_size < size) {
9868 if (entry->map_aligned &&
9869 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9870 VM_MAP_PAGE_MASK(dst_map))) {
9871 /* no longer map-aligned */
9872 entry->map_aligned = FALSE;
9873 }
9874 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9875 size = copy_size;
9876 }
9877
9878 /*
9879 * Adjust to destination size
9880 */
9881
9882 if (size < copy_size) {
9883 vm_map_copy_clip_end(copy, copy_entry,
9884 copy_entry->vme_start + size);
9885 copy_size = size;
9886 }
9887
9888 assert((entry->vme_end - entry->vme_start) == size);
9889 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9890 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9891
9892 /*
9893 * If the destination contains temporary unshared memory,
9894 * we can perform the copy by throwing it away and
9895 * installing the source data.
9896 */
9897
9898 object = VME_OBJECT(entry);
9899 if ((!entry->is_shared &&
9900 ((object == VM_OBJECT_NULL) ||
9901 (object->internal && !object->true_share))) ||
9902 entry->needs_copy) {
9903 vm_object_t old_object = VME_OBJECT(entry);
9904 vm_object_offset_t old_offset = VME_OFFSET(entry);
9905 vm_object_offset_t offset;
9906
9907 /*
9908 * Ensure that the source and destination aren't
9909 * identical
9910 */
9911 if (old_object == VME_OBJECT(copy_entry) &&
9912 old_offset == VME_OFFSET(copy_entry)) {
9913 vm_map_copy_entry_unlink(copy, copy_entry);
9914 vm_map_copy_entry_dispose(copy, copy_entry);
9915
9916 if (old_object != VM_OBJECT_NULL) {
9917 vm_object_deallocate(old_object);
9918 }
9919
9920 start = tmp_entry->vme_end;
9921 tmp_entry = tmp_entry->vme_next;
9922 continue;
9923 }
9924
9925 #if !CONFIG_EMBEDDED
9926 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9927 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9928 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9929 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9930 copy_size <= __TRADEOFF1_COPY_SIZE) {
9931 /*
9932 * Virtual vs. Physical copy tradeoff #1.
9933 *
9934 * Copying only a few pages out of a large
9935 * object: do a physical copy instead of
9936 * a virtual copy, to avoid possibly keeping
9937 * the entire large object alive because of
9938 * those few copy-on-write pages.
9939 */
9940 vm_map_copy_overwrite_aligned_src_large++;
9941 goto slow_copy;
9942 }
9943 #endif /* !CONFIG_EMBEDDED */
9944
9945 if ((dst_map->pmap != kernel_pmap) &&
9946 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9947 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9948 vm_object_t new_object, new_shadow;
9949
9950 /*
9951 * We're about to map something over a mapping
9952 * established by malloc()...
9953 */
9954 new_object = VME_OBJECT(copy_entry);
9955 if (new_object != VM_OBJECT_NULL) {
9956 vm_object_lock_shared(new_object);
9957 }
9958 while (new_object != VM_OBJECT_NULL &&
9959 #if !CONFIG_EMBEDDED
9960 !new_object->true_share &&
9961 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9962 #endif /* !CONFIG_EMBEDDED */
9963 new_object->internal) {
9964 new_shadow = new_object->shadow;
9965 if (new_shadow == VM_OBJECT_NULL) {
9966 break;
9967 }
9968 vm_object_lock_shared(new_shadow);
9969 vm_object_unlock(new_object);
9970 new_object = new_shadow;
9971 }
9972 if (new_object != VM_OBJECT_NULL) {
9973 if (!new_object->internal) {
9974 /*
9975 * The new mapping is backed
9976 * by an external object. We
9977 * don't want malloc'ed memory
9978 * to be replaced with such a
9979 * non-anonymous mapping, so
9980 * let's go off the optimized
9981 * path...
9982 */
9983 vm_map_copy_overwrite_aligned_src_not_internal++;
9984 vm_object_unlock(new_object);
9985 goto slow_copy;
9986 }
9987 #if !CONFIG_EMBEDDED
9988 if (new_object->true_share ||
9989 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9990 /*
9991 * Same if there's a "true_share"
9992 * object in the shadow chain, or
9993 * an object with a non-default
9994 * (SYMMETRIC) copy strategy.
9995 */
9996 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9997 vm_object_unlock(new_object);
9998 goto slow_copy;
9999 }
10000 #endif /* !CONFIG_EMBEDDED */
10001 vm_object_unlock(new_object);
10002 }
10003 /*
10004 * The new mapping is still backed by
10005 * anonymous (internal) memory, so it's
10006 * OK to substitute it for the original
10007 * malloc() mapping.
10008 */
10009 }
10010
10011 if (old_object != VM_OBJECT_NULL) {
10012 if (entry->is_sub_map) {
10013 if (entry->use_pmap) {
10014 #ifndef NO_NESTED_PMAP
10015 pmap_unnest(dst_map->pmap,
10016 (addr64_t)entry->vme_start,
10017 entry->vme_end - entry->vme_start);
10018 #endif /* NO_NESTED_PMAP */
10019 if (dst_map->mapped_in_other_pmaps) {
10020 /* clean up parent */
10021 /* map/maps */
10022 vm_map_submap_pmap_clean(
10023 dst_map, entry->vme_start,
10024 entry->vme_end,
10025 VME_SUBMAP(entry),
10026 VME_OFFSET(entry));
10027 }
10028 } else {
10029 vm_map_submap_pmap_clean(
10030 dst_map, entry->vme_start,
10031 entry->vme_end,
10032 VME_SUBMAP(entry),
10033 VME_OFFSET(entry));
10034 }
10035 vm_map_deallocate(VME_SUBMAP(entry));
10036 } else {
10037 if (dst_map->mapped_in_other_pmaps) {
10038 vm_object_pmap_protect_options(
10039 VME_OBJECT(entry),
10040 VME_OFFSET(entry),
10041 entry->vme_end
10042 - entry->vme_start,
10043 PMAP_NULL,
10044 entry->vme_start,
10045 VM_PROT_NONE,
10046 PMAP_OPTIONS_REMOVE);
10047 } else {
10048 pmap_remove_options(
10049 dst_map->pmap,
10050 (addr64_t)(entry->vme_start),
10051 (addr64_t)(entry->vme_end),
10052 PMAP_OPTIONS_REMOVE);
10053 }
10054 vm_object_deallocate(old_object);
10055 }
10056 }
10057
10058 if (entry->iokit_acct) {
10059 /* keep using iokit accounting */
10060 entry->use_pmap = FALSE;
10061 } else {
10062 /* use pmap accounting */
10063 entry->use_pmap = TRUE;
10064 }
10065 entry->is_sub_map = FALSE;
10066 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10067 object = VME_OBJECT(entry);
10068 entry->needs_copy = copy_entry->needs_copy;
10069 entry->wired_count = 0;
10070 entry->user_wired_count = 0;
10071 offset = VME_OFFSET(copy_entry);
10072 VME_OFFSET_SET(entry, offset);
10073
10074 vm_map_copy_entry_unlink(copy, copy_entry);
10075 vm_map_copy_entry_dispose(copy, copy_entry);
10076
10077 /*
10078 * we could try to push pages into the pmap at this point, BUT
10079 * this optimization only saved on average 2 us per page if ALL
10080 * the pages in the source were currently mapped
10081 * and ALL the pages in the dest were touched, if there were fewer
10082 * than 2/3 of the pages touched, this optimization actually cost more cycles
10083 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10084 */
10085
10086 /*
10087 * Set up for the next iteration. The map
10088 * has not been unlocked, so the next
10089 * address should be at the end of this
10090 * entry, and the next map entry should be
10091 * the one following it.
10092 */
10093
10094 start = tmp_entry->vme_end;
10095 tmp_entry = tmp_entry->vme_next;
10096 } else {
10097 vm_map_version_t version;
10098 vm_object_t dst_object;
10099 vm_object_offset_t dst_offset;
10100 kern_return_t r;
10101
10102 slow_copy:
10103 if (entry->needs_copy) {
10104 VME_OBJECT_SHADOW(entry,
10105 (entry->vme_end -
10106 entry->vme_start));
10107 entry->needs_copy = FALSE;
10108 }
10109
10110 dst_object = VME_OBJECT(entry);
10111 dst_offset = VME_OFFSET(entry);
10112
10113 /*
10114 * Take an object reference, and record
10115 * the map version information so that the
10116 * map can be safely unlocked.
10117 */
10118
10119 if (dst_object == VM_OBJECT_NULL) {
10120 /*
10121 * We would usually have just taken the
10122 * optimized path above if the destination
10123 * object has not been allocated yet. But we
10124 * now disable that optimization if the copy
10125 * entry's object is not backed by anonymous
10126 * memory to avoid replacing malloc'ed
10127 * (i.e. re-usable) anonymous memory with a
10128 * not-so-anonymous mapping.
10129 * So we have to handle this case here and
10130 * allocate a new VM object for this map entry.
10131 */
10132 dst_object = vm_object_allocate(
10133 entry->vme_end - entry->vme_start);
10134 dst_offset = 0;
10135 VME_OBJECT_SET(entry, dst_object);
10136 VME_OFFSET_SET(entry, dst_offset);
10137 assert(entry->use_pmap);
10138 }
10139
10140 vm_object_reference(dst_object);
10141
10142 /* account for unlock bumping up timestamp */
10143 version.main_timestamp = dst_map->timestamp + 1;
10144
10145 vm_map_unlock(dst_map);
10146
10147 /*
10148 * Copy as much as possible in one pass
10149 */
10150
10151 copy_size = size;
10152 r = vm_fault_copy(
10153 VME_OBJECT(copy_entry),
10154 VME_OFFSET(copy_entry),
10155 &copy_size,
10156 dst_object,
10157 dst_offset,
10158 dst_map,
10159 &version,
10160 THREAD_UNINT );
10161
10162 /*
10163 * Release the object reference
10164 */
10165
10166 vm_object_deallocate(dst_object);
10167
10168 /*
10169 * If a hard error occurred, return it now
10170 */
10171
10172 if (r != KERN_SUCCESS) {
10173 return r;
10174 }
10175
10176 if (copy_size != 0) {
10177 /*
10178 * Dispose of the copied region
10179 */
10180
10181 vm_map_copy_clip_end(copy, copy_entry,
10182 copy_entry->vme_start + copy_size);
10183 vm_map_copy_entry_unlink(copy, copy_entry);
10184 vm_object_deallocate(VME_OBJECT(copy_entry));
10185 vm_map_copy_entry_dispose(copy, copy_entry);
10186 }
10187
10188 /*
10189 * Pick up in the destination map where we left off.
10190 *
10191 * Use the version information to avoid a lookup
10192 * in the normal case.
10193 */
10194
10195 start += copy_size;
10196 vm_map_lock(dst_map);
10197 if (version.main_timestamp == dst_map->timestamp &&
10198 copy_size != 0) {
10199 /* We can safely use saved tmp_entry value */
10200
10201 if (tmp_entry->map_aligned &&
10202 !VM_MAP_PAGE_ALIGNED(
10203 start,
10204 VM_MAP_PAGE_MASK(dst_map))) {
10205 /* no longer map-aligned */
10206 tmp_entry->map_aligned = FALSE;
10207 }
10208 vm_map_clip_end(dst_map, tmp_entry, start);
10209 tmp_entry = tmp_entry->vme_next;
10210 } else {
10211 /* Must do lookup of tmp_entry */
10212
10213 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10214 vm_map_unlock(dst_map);
10215 return KERN_INVALID_ADDRESS;
10216 }
10217 if (tmp_entry->map_aligned &&
10218 !VM_MAP_PAGE_ALIGNED(
10219 start,
10220 VM_MAP_PAGE_MASK(dst_map))) {
10221 /* no longer map-aligned */
10222 tmp_entry->map_aligned = FALSE;
10223 }
10224 vm_map_clip_start(dst_map, tmp_entry, start);
10225 }
10226 }
10227 }/* while */
10228
10229 return KERN_SUCCESS;
10230 }/* vm_map_copy_overwrite_aligned */
10231
10232 /*
10233 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10234 *
10235 * Description:
10236 * Copy in data to a kernel buffer from space in the
10237 * source map. The original space may be optionally
10238 * deallocated.
10239 *
10240 * If successful, returns a new copy object.
10241 */
10242 static kern_return_t
10243 vm_map_copyin_kernel_buffer(
10244 vm_map_t src_map,
10245 vm_map_offset_t src_addr,
10246 vm_map_size_t len,
10247 boolean_t src_destroy,
10248 vm_map_copy_t *copy_result)
10249 {
10250 kern_return_t kr;
10251 vm_map_copy_t copy;
10252 vm_size_t kalloc_size;
10253
10254 if (len > msg_ool_size_small) {
10255 return KERN_INVALID_ARGUMENT;
10256 }
10257
10258 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10259
10260 copy = (vm_map_copy_t)kalloc(kalloc_size);
10261 if (copy == VM_MAP_COPY_NULL) {
10262 return KERN_RESOURCE_SHORTAGE;
10263 }
10264 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10265 copy->size = len;
10266 copy->offset = 0;
10267
10268 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10269 if (kr != KERN_SUCCESS) {
10270 kfree(copy, kalloc_size);
10271 return kr;
10272 }
10273 if (src_destroy) {
10274 (void) vm_map_remove(
10275 src_map,
10276 vm_map_trunc_page(src_addr,
10277 VM_MAP_PAGE_MASK(src_map)),
10278 vm_map_round_page(src_addr + len,
10279 VM_MAP_PAGE_MASK(src_map)),
10280 (VM_MAP_REMOVE_INTERRUPTIBLE |
10281 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10282 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10283 }
10284 *copy_result = copy;
10285 return KERN_SUCCESS;
10286 }
10287
10288 /*
10289 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10290 *
10291 * Description:
10292 * Copy out data from a kernel buffer into space in the
10293 * destination map. The space may be otpionally dynamically
10294 * allocated.
10295 *
10296 * If successful, consumes the copy object.
10297 * Otherwise, the caller is responsible for it.
10298 */
10299 static int vm_map_copyout_kernel_buffer_failures = 0;
10300 static kern_return_t
10301 vm_map_copyout_kernel_buffer(
10302 vm_map_t map,
10303 vm_map_address_t *addr, /* IN/OUT */
10304 vm_map_copy_t copy,
10305 vm_map_size_t copy_size,
10306 boolean_t overwrite,
10307 boolean_t consume_on_success)
10308 {
10309 kern_return_t kr = KERN_SUCCESS;
10310 thread_t thread = current_thread();
10311
10312 assert(copy->size == copy_size);
10313
10314 /*
10315 * check for corrupted vm_map_copy structure
10316 */
10317 if (copy_size > msg_ool_size_small || copy->offset) {
10318 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10319 (long long)copy->size, (long long)copy->offset);
10320 }
10321
10322 if (!overwrite) {
10323 /*
10324 * Allocate space in the target map for the data
10325 */
10326 *addr = 0;
10327 kr = vm_map_enter(map,
10328 addr,
10329 vm_map_round_page(copy_size,
10330 VM_MAP_PAGE_MASK(map)),
10331 (vm_map_offset_t) 0,
10332 VM_FLAGS_ANYWHERE,
10333 VM_MAP_KERNEL_FLAGS_NONE,
10334 VM_KERN_MEMORY_NONE,
10335 VM_OBJECT_NULL,
10336 (vm_object_offset_t) 0,
10337 FALSE,
10338 VM_PROT_DEFAULT,
10339 VM_PROT_ALL,
10340 VM_INHERIT_DEFAULT);
10341 if (kr != KERN_SUCCESS) {
10342 return kr;
10343 }
10344 #if KASAN
10345 if (map->pmap == kernel_pmap) {
10346 kasan_notify_address(*addr, copy->size);
10347 }
10348 #endif
10349 }
10350
10351 /*
10352 * Copyout the data from the kernel buffer to the target map.
10353 */
10354 if (thread->map == map) {
10355 /*
10356 * If the target map is the current map, just do
10357 * the copy.
10358 */
10359 assert((vm_size_t)copy_size == copy_size);
10360 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10361 kr = KERN_INVALID_ADDRESS;
10362 }
10363 } else {
10364 vm_map_t oldmap;
10365
10366 /*
10367 * If the target map is another map, assume the
10368 * target's address space identity for the duration
10369 * of the copy.
10370 */
10371 vm_map_reference(map);
10372 oldmap = vm_map_switch(map);
10373
10374 assert((vm_size_t)copy_size == copy_size);
10375 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10376 vm_map_copyout_kernel_buffer_failures++;
10377 kr = KERN_INVALID_ADDRESS;
10378 }
10379
10380 (void) vm_map_switch(oldmap);
10381 vm_map_deallocate(map);
10382 }
10383
10384 if (kr != KERN_SUCCESS) {
10385 /* the copy failed, clean up */
10386 if (!overwrite) {
10387 /*
10388 * Deallocate the space we allocated in the target map.
10389 */
10390 (void) vm_map_remove(
10391 map,
10392 vm_map_trunc_page(*addr,
10393 VM_MAP_PAGE_MASK(map)),
10394 vm_map_round_page((*addr +
10395 vm_map_round_page(copy_size,
10396 VM_MAP_PAGE_MASK(map))),
10397 VM_MAP_PAGE_MASK(map)),
10398 VM_MAP_REMOVE_NO_FLAGS);
10399 *addr = 0;
10400 }
10401 } else {
10402 /* copy was successful, dicard the copy structure */
10403 if (consume_on_success) {
10404 kfree(copy, copy_size + cpy_kdata_hdr_sz);
10405 }
10406 }
10407
10408 return kr;
10409 }
10410
10411 /*
10412 * Routine: vm_map_copy_insert [internal use only]
10413 *
10414 * Description:
10415 * Link a copy chain ("copy") into a map at the
10416 * specified location (after "where").
10417 * Side effects:
10418 * The copy chain is destroyed.
10419 */
10420 static void
10421 vm_map_copy_insert(
10422 vm_map_t map,
10423 vm_map_entry_t after_where,
10424 vm_map_copy_t copy)
10425 {
10426 vm_map_entry_t entry;
10427
10428 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10429 entry = vm_map_copy_first_entry(copy);
10430 vm_map_copy_entry_unlink(copy, entry);
10431 vm_map_store_entry_link(map, after_where, entry,
10432 VM_MAP_KERNEL_FLAGS_NONE);
10433 after_where = entry;
10434 }
10435 zfree(vm_map_copy_zone, copy);
10436 }
10437
10438 void
10439 vm_map_copy_remap(
10440 vm_map_t map,
10441 vm_map_entry_t where,
10442 vm_map_copy_t copy,
10443 vm_map_offset_t adjustment,
10444 vm_prot_t cur_prot,
10445 vm_prot_t max_prot,
10446 vm_inherit_t inheritance)
10447 {
10448 vm_map_entry_t copy_entry, new_entry;
10449
10450 for (copy_entry = vm_map_copy_first_entry(copy);
10451 copy_entry != vm_map_copy_to_entry(copy);
10452 copy_entry = copy_entry->vme_next) {
10453 /* get a new VM map entry for the map */
10454 new_entry = vm_map_entry_create(map,
10455 !map->hdr.entries_pageable);
10456 /* copy the "copy entry" to the new entry */
10457 vm_map_entry_copy(new_entry, copy_entry);
10458 /* adjust "start" and "end" */
10459 new_entry->vme_start += adjustment;
10460 new_entry->vme_end += adjustment;
10461 /* clear some attributes */
10462 new_entry->inheritance = inheritance;
10463 new_entry->protection = cur_prot;
10464 new_entry->max_protection = max_prot;
10465 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10466 /* take an extra reference on the entry's "object" */
10467 if (new_entry->is_sub_map) {
10468 assert(!new_entry->use_pmap); /* not nested */
10469 vm_map_lock(VME_SUBMAP(new_entry));
10470 vm_map_reference(VME_SUBMAP(new_entry));
10471 vm_map_unlock(VME_SUBMAP(new_entry));
10472 } else {
10473 vm_object_reference(VME_OBJECT(new_entry));
10474 }
10475 /* insert the new entry in the map */
10476 vm_map_store_entry_link(map, where, new_entry,
10477 VM_MAP_KERNEL_FLAGS_NONE);
10478 /* continue inserting the "copy entries" after the new entry */
10479 where = new_entry;
10480 }
10481 }
10482
10483
10484 /*
10485 * Returns true if *size matches (or is in the range of) copy->size.
10486 * Upon returning true, the *size field is updated with the actual size of the
10487 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10488 */
10489 boolean_t
10490 vm_map_copy_validate_size(
10491 vm_map_t dst_map,
10492 vm_map_copy_t copy,
10493 vm_map_size_t *size)
10494 {
10495 if (copy == VM_MAP_COPY_NULL) {
10496 return FALSE;
10497 }
10498 vm_map_size_t copy_sz = copy->size;
10499 vm_map_size_t sz = *size;
10500 switch (copy->type) {
10501 case VM_MAP_COPY_OBJECT:
10502 case VM_MAP_COPY_KERNEL_BUFFER:
10503 if (sz == copy_sz) {
10504 return TRUE;
10505 }
10506 break;
10507 case VM_MAP_COPY_ENTRY_LIST:
10508 /*
10509 * potential page-size rounding prevents us from exactly
10510 * validating this flavor of vm_map_copy, but we can at least
10511 * assert that it's within a range.
10512 */
10513 if (copy_sz >= sz &&
10514 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10515 *size = copy_sz;
10516 return TRUE;
10517 }
10518 break;
10519 default:
10520 break;
10521 }
10522 return FALSE;
10523 }
10524
10525 /*
10526 * Routine: vm_map_copyout_size
10527 *
10528 * Description:
10529 * Copy out a copy chain ("copy") into newly-allocated
10530 * space in the destination map. Uses a prevalidated
10531 * size for the copy object (vm_map_copy_validate_size).
10532 *
10533 * If successful, consumes the copy object.
10534 * Otherwise, the caller is responsible for it.
10535 */
10536 kern_return_t
10537 vm_map_copyout_size(
10538 vm_map_t dst_map,
10539 vm_map_address_t *dst_addr, /* OUT */
10540 vm_map_copy_t copy,
10541 vm_map_size_t copy_size)
10542 {
10543 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10544 TRUE, /* consume_on_success */
10545 VM_PROT_DEFAULT,
10546 VM_PROT_ALL,
10547 VM_INHERIT_DEFAULT);
10548 }
10549
10550 /*
10551 * Routine: vm_map_copyout
10552 *
10553 * Description:
10554 * Copy out a copy chain ("copy") into newly-allocated
10555 * space in the destination map.
10556 *
10557 * If successful, consumes the copy object.
10558 * Otherwise, the caller is responsible for it.
10559 */
10560 kern_return_t
10561 vm_map_copyout(
10562 vm_map_t dst_map,
10563 vm_map_address_t *dst_addr, /* OUT */
10564 vm_map_copy_t copy)
10565 {
10566 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10567 TRUE, /* consume_on_success */
10568 VM_PROT_DEFAULT,
10569 VM_PROT_ALL,
10570 VM_INHERIT_DEFAULT);
10571 }
10572
10573 kern_return_t
10574 vm_map_copyout_internal(
10575 vm_map_t dst_map,
10576 vm_map_address_t *dst_addr, /* OUT */
10577 vm_map_copy_t copy,
10578 vm_map_size_t copy_size,
10579 boolean_t consume_on_success,
10580 vm_prot_t cur_protection,
10581 vm_prot_t max_protection,
10582 vm_inherit_t inheritance)
10583 {
10584 vm_map_size_t size;
10585 vm_map_size_t adjustment;
10586 vm_map_offset_t start;
10587 vm_object_offset_t vm_copy_start;
10588 vm_map_entry_t last;
10589 vm_map_entry_t entry;
10590 vm_map_entry_t hole_entry;
10591
10592 /*
10593 * Check for null copy object.
10594 */
10595
10596 if (copy == VM_MAP_COPY_NULL) {
10597 *dst_addr = 0;
10598 return KERN_SUCCESS;
10599 }
10600
10601 if (copy->size != copy_size) {
10602 *dst_addr = 0;
10603 return KERN_FAILURE;
10604 }
10605
10606 /*
10607 * Check for special copy object, created
10608 * by vm_map_copyin_object.
10609 */
10610
10611 if (copy->type == VM_MAP_COPY_OBJECT) {
10612 vm_object_t object = copy->cpy_object;
10613 kern_return_t kr;
10614 vm_object_offset_t offset;
10615
10616 offset = vm_object_trunc_page(copy->offset);
10617 size = vm_map_round_page((copy_size +
10618 (vm_map_size_t)(copy->offset -
10619 offset)),
10620 VM_MAP_PAGE_MASK(dst_map));
10621 *dst_addr = 0;
10622 kr = vm_map_enter(dst_map, dst_addr, size,
10623 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10624 VM_MAP_KERNEL_FLAGS_NONE,
10625 VM_KERN_MEMORY_NONE,
10626 object, offset, FALSE,
10627 VM_PROT_DEFAULT, VM_PROT_ALL,
10628 VM_INHERIT_DEFAULT);
10629 if (kr != KERN_SUCCESS) {
10630 return kr;
10631 }
10632 /* Account for non-pagealigned copy object */
10633 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10634 if (consume_on_success) {
10635 zfree(vm_map_copy_zone, copy);
10636 }
10637 return KERN_SUCCESS;
10638 }
10639
10640 /*
10641 * Check for special kernel buffer allocated
10642 * by new_ipc_kmsg_copyin.
10643 */
10644
10645 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10646 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10647 copy, copy_size, FALSE,
10648 consume_on_success);
10649 }
10650
10651
10652 /*
10653 * Find space for the data
10654 */
10655
10656 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10657 VM_MAP_COPY_PAGE_MASK(copy));
10658 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10659 VM_MAP_COPY_PAGE_MASK(copy))
10660 - vm_copy_start;
10661
10662
10663 StartAgain:;
10664
10665 vm_map_lock(dst_map);
10666 if (dst_map->disable_vmentry_reuse == TRUE) {
10667 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10668 last = entry;
10669 } else {
10670 if (dst_map->holelistenabled) {
10671 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10672
10673 if (hole_entry == NULL) {
10674 /*
10675 * No more space in the map?
10676 */
10677 vm_map_unlock(dst_map);
10678 return KERN_NO_SPACE;
10679 }
10680
10681 last = hole_entry;
10682 start = last->vme_start;
10683 } else {
10684 assert(first_free_is_valid(dst_map));
10685 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10686 vm_map_min(dst_map) : last->vme_end;
10687 }
10688 start = vm_map_round_page(start,
10689 VM_MAP_PAGE_MASK(dst_map));
10690 }
10691
10692 while (TRUE) {
10693 vm_map_entry_t next = last->vme_next;
10694 vm_map_offset_t end = start + size;
10695
10696 if ((end > dst_map->max_offset) || (end < start)) {
10697 if (dst_map->wait_for_space) {
10698 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10699 assert_wait((event_t) dst_map,
10700 THREAD_INTERRUPTIBLE);
10701 vm_map_unlock(dst_map);
10702 thread_block(THREAD_CONTINUE_NULL);
10703 goto StartAgain;
10704 }
10705 }
10706 vm_map_unlock(dst_map);
10707 return KERN_NO_SPACE;
10708 }
10709
10710 if (dst_map->holelistenabled) {
10711 if (last->vme_end >= end) {
10712 break;
10713 }
10714 } else {
10715 /*
10716 * If there are no more entries, we must win.
10717 *
10718 * OR
10719 *
10720 * If there is another entry, it must be
10721 * after the end of the potential new region.
10722 */
10723
10724 if (next == vm_map_to_entry(dst_map)) {
10725 break;
10726 }
10727
10728 if (next->vme_start >= end) {
10729 break;
10730 }
10731 }
10732
10733 last = next;
10734
10735 if (dst_map->holelistenabled) {
10736 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10737 /*
10738 * Wrapped around
10739 */
10740 vm_map_unlock(dst_map);
10741 return KERN_NO_SPACE;
10742 }
10743 start = last->vme_start;
10744 } else {
10745 start = last->vme_end;
10746 }
10747 start = vm_map_round_page(start,
10748 VM_MAP_PAGE_MASK(dst_map));
10749 }
10750
10751 if (dst_map->holelistenabled) {
10752 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10753 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10754 }
10755 }
10756
10757
10758 adjustment = start - vm_copy_start;
10759 if (!consume_on_success) {
10760 /*
10761 * We're not allowed to consume "copy", so we'll have to
10762 * copy its map entries into the destination map below.
10763 * No need to re-allocate map entries from the correct
10764 * (pageable or not) zone, since we'll get new map entries
10765 * during the transfer.
10766 * We'll also adjust the map entries's "start" and "end"
10767 * during the transfer, to keep "copy"'s entries consistent
10768 * with its "offset".
10769 */
10770 goto after_adjustments;
10771 }
10772
10773 /*
10774 * Since we're going to just drop the map
10775 * entries from the copy into the destination
10776 * map, they must come from the same pool.
10777 */
10778
10779 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10780 /*
10781 * Mismatches occur when dealing with the default
10782 * pager.
10783 */
10784 zone_t old_zone;
10785 vm_map_entry_t next, new;
10786
10787 /*
10788 * Find the zone that the copies were allocated from
10789 */
10790
10791 entry = vm_map_copy_first_entry(copy);
10792
10793 /*
10794 * Reinitialize the copy so that vm_map_copy_entry_link
10795 * will work.
10796 */
10797 vm_map_store_copy_reset(copy, entry);
10798 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10799
10800 /*
10801 * Copy each entry.
10802 */
10803 while (entry != vm_map_copy_to_entry(copy)) {
10804 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10805 vm_map_entry_copy_full(new, entry);
10806 new->vme_no_copy_on_read = FALSE;
10807 assert(!new->iokit_acct);
10808 if (new->is_sub_map) {
10809 /* clr address space specifics */
10810 new->use_pmap = FALSE;
10811 }
10812 vm_map_copy_entry_link(copy,
10813 vm_map_copy_last_entry(copy),
10814 new);
10815 next = entry->vme_next;
10816 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10817 zfree(old_zone, entry);
10818 entry = next;
10819 }
10820 }
10821
10822 /*
10823 * Adjust the addresses in the copy chain, and
10824 * reset the region attributes.
10825 */
10826
10827 for (entry = vm_map_copy_first_entry(copy);
10828 entry != vm_map_copy_to_entry(copy);
10829 entry = entry->vme_next) {
10830 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10831 /*
10832 * We're injecting this copy entry into a map that
10833 * has the standard page alignment, so clear
10834 * "map_aligned" (which might have been inherited
10835 * from the original map entry).
10836 */
10837 entry->map_aligned = FALSE;
10838 }
10839
10840 entry->vme_start += adjustment;
10841 entry->vme_end += adjustment;
10842
10843 if (entry->map_aligned) {
10844 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10845 VM_MAP_PAGE_MASK(dst_map)));
10846 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10847 VM_MAP_PAGE_MASK(dst_map)));
10848 }
10849
10850 entry->inheritance = VM_INHERIT_DEFAULT;
10851 entry->protection = VM_PROT_DEFAULT;
10852 entry->max_protection = VM_PROT_ALL;
10853 entry->behavior = VM_BEHAVIOR_DEFAULT;
10854
10855 /*
10856 * If the entry is now wired,
10857 * map the pages into the destination map.
10858 */
10859 if (entry->wired_count != 0) {
10860 vm_map_offset_t va;
10861 vm_object_offset_t offset;
10862 vm_object_t object;
10863 vm_prot_t prot;
10864 int type_of_fault;
10865
10866 object = VME_OBJECT(entry);
10867 offset = VME_OFFSET(entry);
10868 va = entry->vme_start;
10869
10870 pmap_pageable(dst_map->pmap,
10871 entry->vme_start,
10872 entry->vme_end,
10873 TRUE);
10874
10875 while (va < entry->vme_end) {
10876 vm_page_t m;
10877 struct vm_object_fault_info fault_info = {};
10878
10879 /*
10880 * Look up the page in the object.
10881 * Assert that the page will be found in the
10882 * top object:
10883 * either
10884 * the object was newly created by
10885 * vm_object_copy_slowly, and has
10886 * copies of all of the pages from
10887 * the source object
10888 * or
10889 * the object was moved from the old
10890 * map entry; because the old map
10891 * entry was wired, all of the pages
10892 * were in the top-level object.
10893 * (XXX not true if we wire pages for
10894 * reading)
10895 */
10896 vm_object_lock(object);
10897
10898 m = vm_page_lookup(object, offset);
10899 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10900 m->vmp_absent) {
10901 panic("vm_map_copyout: wiring %p", m);
10902 }
10903
10904 prot = entry->protection;
10905
10906 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10907 prot) {
10908 prot |= VM_PROT_EXECUTE;
10909 }
10910
10911 type_of_fault = DBG_CACHE_HIT_FAULT;
10912
10913 fault_info.user_tag = VME_ALIAS(entry);
10914 fault_info.pmap_options = 0;
10915 if (entry->iokit_acct ||
10916 (!entry->is_sub_map && !entry->use_pmap)) {
10917 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10918 }
10919
10920 vm_fault_enter(m,
10921 dst_map->pmap,
10922 va,
10923 prot,
10924 prot,
10925 VM_PAGE_WIRED(m),
10926 FALSE, /* change_wiring */
10927 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10928 &fault_info,
10929 NULL, /* need_retry */
10930 &type_of_fault);
10931
10932 vm_object_unlock(object);
10933
10934 offset += PAGE_SIZE_64;
10935 va += PAGE_SIZE;
10936 }
10937 }
10938 }
10939
10940 after_adjustments:
10941
10942 /*
10943 * Correct the page alignment for the result
10944 */
10945
10946 *dst_addr = start + (copy->offset - vm_copy_start);
10947
10948 #if KASAN
10949 kasan_notify_address(*dst_addr, size);
10950 #endif
10951
10952 /*
10953 * Update the hints and the map size
10954 */
10955
10956 if (consume_on_success) {
10957 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10958 } else {
10959 SAVE_HINT_MAP_WRITE(dst_map, last);
10960 }
10961
10962 dst_map->size += size;
10963
10964 /*
10965 * Link in the copy
10966 */
10967
10968 if (consume_on_success) {
10969 vm_map_copy_insert(dst_map, last, copy);
10970 } else {
10971 vm_map_copy_remap(dst_map, last, copy, adjustment,
10972 cur_protection, max_protection,
10973 inheritance);
10974 }
10975
10976 vm_map_unlock(dst_map);
10977
10978 /*
10979 * XXX If wiring_required, call vm_map_pageable
10980 */
10981
10982 return KERN_SUCCESS;
10983 }
10984
10985 /*
10986 * Routine: vm_map_copyin
10987 *
10988 * Description:
10989 * see vm_map_copyin_common. Exported via Unsupported.exports.
10990 *
10991 */
10992
10993 #undef vm_map_copyin
10994
10995 kern_return_t
10996 vm_map_copyin(
10997 vm_map_t src_map,
10998 vm_map_address_t src_addr,
10999 vm_map_size_t len,
11000 boolean_t src_destroy,
11001 vm_map_copy_t *copy_result) /* OUT */
11002 {
11003 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11004 FALSE, copy_result, FALSE);
11005 }
11006
11007 /*
11008 * Routine: vm_map_copyin_common
11009 *
11010 * Description:
11011 * Copy the specified region (src_addr, len) from the
11012 * source address space (src_map), possibly removing
11013 * the region from the source address space (src_destroy).
11014 *
11015 * Returns:
11016 * A vm_map_copy_t object (copy_result), suitable for
11017 * insertion into another address space (using vm_map_copyout),
11018 * copying over another address space region (using
11019 * vm_map_copy_overwrite). If the copy is unused, it
11020 * should be destroyed (using vm_map_copy_discard).
11021 *
11022 * In/out conditions:
11023 * The source map should not be locked on entry.
11024 */
11025
11026 typedef struct submap_map {
11027 vm_map_t parent_map;
11028 vm_map_offset_t base_start;
11029 vm_map_offset_t base_end;
11030 vm_map_size_t base_len;
11031 struct submap_map *next;
11032 } submap_map_t;
11033
11034 kern_return_t
11035 vm_map_copyin_common(
11036 vm_map_t src_map,
11037 vm_map_address_t src_addr,
11038 vm_map_size_t len,
11039 boolean_t src_destroy,
11040 __unused boolean_t src_volatile,
11041 vm_map_copy_t *copy_result, /* OUT */
11042 boolean_t use_maxprot)
11043 {
11044 int flags;
11045
11046 flags = 0;
11047 if (src_destroy) {
11048 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11049 }
11050 if (use_maxprot) {
11051 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11052 }
11053 return vm_map_copyin_internal(src_map,
11054 src_addr,
11055 len,
11056 flags,
11057 copy_result);
11058 }
11059 kern_return_t
11060 vm_map_copyin_internal(
11061 vm_map_t src_map,
11062 vm_map_address_t src_addr,
11063 vm_map_size_t len,
11064 int flags,
11065 vm_map_copy_t *copy_result) /* OUT */
11066 {
11067 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11068 * in multi-level lookup, this
11069 * entry contains the actual
11070 * vm_object/offset.
11071 */
11072 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11073
11074 vm_map_offset_t src_start; /* Start of current entry --
11075 * where copy is taking place now
11076 */
11077 vm_map_offset_t src_end; /* End of entire region to be
11078 * copied */
11079 vm_map_offset_t src_base;
11080 vm_map_t base_map = src_map;
11081 boolean_t map_share = FALSE;
11082 submap_map_t *parent_maps = NULL;
11083
11084 vm_map_copy_t copy; /* Resulting copy */
11085 vm_map_address_t copy_addr;
11086 vm_map_size_t copy_size;
11087 boolean_t src_destroy;
11088 boolean_t use_maxprot;
11089 boolean_t preserve_purgeable;
11090 boolean_t entry_was_shared;
11091 vm_map_entry_t saved_src_entry;
11092
11093 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11094 return KERN_INVALID_ARGUMENT;
11095 }
11096
11097 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11098 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11099 preserve_purgeable =
11100 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11101
11102 /*
11103 * Check for copies of zero bytes.
11104 */
11105
11106 if (len == 0) {
11107 *copy_result = VM_MAP_COPY_NULL;
11108 return KERN_SUCCESS;
11109 }
11110
11111 /*
11112 * Check that the end address doesn't overflow
11113 */
11114 src_end = src_addr + len;
11115 if (src_end < src_addr) {
11116 return KERN_INVALID_ADDRESS;
11117 }
11118
11119 /*
11120 * Compute (page aligned) start and end of region
11121 */
11122 src_start = vm_map_trunc_page(src_addr,
11123 VM_MAP_PAGE_MASK(src_map));
11124 src_end = vm_map_round_page(src_end,
11125 VM_MAP_PAGE_MASK(src_map));
11126
11127 /*
11128 * If the copy is sufficiently small, use a kernel buffer instead
11129 * of making a virtual copy. The theory being that the cost of
11130 * setting up VM (and taking C-O-W faults) dominates the copy costs
11131 * for small regions.
11132 */
11133 if ((len < msg_ool_size_small) &&
11134 !use_maxprot &&
11135 !preserve_purgeable &&
11136 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11137 /*
11138 * Since the "msg_ool_size_small" threshold was increased and
11139 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11140 * address space limits, we revert to doing a virtual copy if the
11141 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11142 * of the commpage would now fail when it used to work.
11143 */
11144 (src_start >= vm_map_min(src_map) &&
11145 src_start < vm_map_max(src_map) &&
11146 src_end >= vm_map_min(src_map) &&
11147 src_end < vm_map_max(src_map))) {
11148 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11149 src_destroy, copy_result);
11150 }
11151
11152 /*
11153 * Allocate a header element for the list.
11154 *
11155 * Use the start and end in the header to
11156 * remember the endpoints prior to rounding.
11157 */
11158
11159 copy = vm_map_copy_allocate();
11160 copy->type = VM_MAP_COPY_ENTRY_LIST;
11161 copy->cpy_hdr.entries_pageable = TRUE;
11162 #if 00
11163 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11164 #else
11165 /*
11166 * The copy entries can be broken down for a variety of reasons,
11167 * so we can't guarantee that they will remain map-aligned...
11168 * Will need to adjust the first copy_entry's "vme_start" and
11169 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11170 * rather than the original map's alignment.
11171 */
11172 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11173 #endif
11174
11175 vm_map_store_init( &(copy->cpy_hdr));
11176
11177 copy->offset = src_addr;
11178 copy->size = len;
11179
11180 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11181
11182 #define RETURN(x) \
11183 MACRO_BEGIN \
11184 vm_map_unlock(src_map); \
11185 if(src_map != base_map) \
11186 vm_map_deallocate(src_map); \
11187 if (new_entry != VM_MAP_ENTRY_NULL) \
11188 vm_map_copy_entry_dispose(copy,new_entry); \
11189 vm_map_copy_discard(copy); \
11190 { \
11191 submap_map_t *_ptr; \
11192 \
11193 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11194 parent_maps=parent_maps->next; \
11195 if (_ptr->parent_map != base_map) \
11196 vm_map_deallocate(_ptr->parent_map); \
11197 kfree(_ptr, sizeof(submap_map_t)); \
11198 } \
11199 } \
11200 MACRO_RETURN(x); \
11201 MACRO_END
11202
11203 /*
11204 * Find the beginning of the region.
11205 */
11206
11207 vm_map_lock(src_map);
11208
11209 /*
11210 * Lookup the original "src_addr" rather than the truncated
11211 * "src_start", in case "src_start" falls in a non-map-aligned
11212 * map entry *before* the map entry that contains "src_addr"...
11213 */
11214 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11215 RETURN(KERN_INVALID_ADDRESS);
11216 }
11217 if (!tmp_entry->is_sub_map) {
11218 /*
11219 * ... but clip to the map-rounded "src_start" rather than
11220 * "src_addr" to preserve map-alignment. We'll adjust the
11221 * first copy entry at the end, if needed.
11222 */
11223 vm_map_clip_start(src_map, tmp_entry, src_start);
11224 }
11225 if (src_start < tmp_entry->vme_start) {
11226 /*
11227 * Move "src_start" up to the start of the
11228 * first map entry to copy.
11229 */
11230 src_start = tmp_entry->vme_start;
11231 }
11232 /* set for later submap fix-up */
11233 copy_addr = src_start;
11234
11235 /*
11236 * Go through entries until we get to the end.
11237 */
11238
11239 while (TRUE) {
11240 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11241 vm_map_size_t src_size; /* Size of source
11242 * map entry (in both
11243 * maps)
11244 */
11245
11246 vm_object_t src_object; /* Object to copy */
11247 vm_object_offset_t src_offset;
11248
11249 boolean_t src_needs_copy; /* Should source map
11250 * be made read-only
11251 * for copy-on-write?
11252 */
11253
11254 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11255
11256 boolean_t was_wired; /* Was source wired? */
11257 vm_map_version_t version; /* Version before locks
11258 * dropped to make copy
11259 */
11260 kern_return_t result; /* Return value from
11261 * copy_strategically.
11262 */
11263 while (tmp_entry->is_sub_map) {
11264 vm_map_size_t submap_len;
11265 submap_map_t *ptr;
11266
11267 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11268 ptr->next = parent_maps;
11269 parent_maps = ptr;
11270 ptr->parent_map = src_map;
11271 ptr->base_start = src_start;
11272 ptr->base_end = src_end;
11273 submap_len = tmp_entry->vme_end - src_start;
11274 if (submap_len > (src_end - src_start)) {
11275 submap_len = src_end - src_start;
11276 }
11277 ptr->base_len = submap_len;
11278
11279 src_start -= tmp_entry->vme_start;
11280 src_start += VME_OFFSET(tmp_entry);
11281 src_end = src_start + submap_len;
11282 src_map = VME_SUBMAP(tmp_entry);
11283 vm_map_lock(src_map);
11284 /* keep an outstanding reference for all maps in */
11285 /* the parents tree except the base map */
11286 vm_map_reference(src_map);
11287 vm_map_unlock(ptr->parent_map);
11288 if (!vm_map_lookup_entry(
11289 src_map, src_start, &tmp_entry)) {
11290 RETURN(KERN_INVALID_ADDRESS);
11291 }
11292 map_share = TRUE;
11293 if (!tmp_entry->is_sub_map) {
11294 vm_map_clip_start(src_map, tmp_entry, src_start);
11295 }
11296 src_entry = tmp_entry;
11297 }
11298 /* we are now in the lowest level submap... */
11299
11300 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11301 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11302 /* This is not, supported for now.In future */
11303 /* we will need to detect the phys_contig */
11304 /* condition and then upgrade copy_slowly */
11305 /* to do physical copy from the device mem */
11306 /* based object. We can piggy-back off of */
11307 /* the was wired boolean to set-up the */
11308 /* proper handling */
11309 RETURN(KERN_PROTECTION_FAILURE);
11310 }
11311 /*
11312 * Create a new address map entry to hold the result.
11313 * Fill in the fields from the appropriate source entries.
11314 * We must unlock the source map to do this if we need
11315 * to allocate a map entry.
11316 */
11317 if (new_entry == VM_MAP_ENTRY_NULL) {
11318 version.main_timestamp = src_map->timestamp;
11319 vm_map_unlock(src_map);
11320
11321 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11322
11323 vm_map_lock(src_map);
11324 if ((version.main_timestamp + 1) != src_map->timestamp) {
11325 if (!vm_map_lookup_entry(src_map, src_start,
11326 &tmp_entry)) {
11327 RETURN(KERN_INVALID_ADDRESS);
11328 }
11329 if (!tmp_entry->is_sub_map) {
11330 vm_map_clip_start(src_map, tmp_entry, src_start);
11331 }
11332 continue; /* restart w/ new tmp_entry */
11333 }
11334 }
11335
11336 /*
11337 * Verify that the region can be read.
11338 */
11339 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11340 !use_maxprot) ||
11341 (src_entry->max_protection & VM_PROT_READ) == 0) {
11342 RETURN(KERN_PROTECTION_FAILURE);
11343 }
11344
11345 /*
11346 * Clip against the endpoints of the entire region.
11347 */
11348
11349 vm_map_clip_end(src_map, src_entry, src_end);
11350
11351 src_size = src_entry->vme_end - src_start;
11352 src_object = VME_OBJECT(src_entry);
11353 src_offset = VME_OFFSET(src_entry);
11354 was_wired = (src_entry->wired_count != 0);
11355
11356 vm_map_entry_copy(new_entry, src_entry);
11357 if (new_entry->is_sub_map) {
11358 /* clr address space specifics */
11359 new_entry->use_pmap = FALSE;
11360 } else {
11361 /*
11362 * We're dealing with a copy-on-write operation,
11363 * so the resulting mapping should not inherit the
11364 * original mapping's accounting settings.
11365 * "iokit_acct" should have been cleared in
11366 * vm_map_entry_copy().
11367 * "use_pmap" should be reset to its default (TRUE)
11368 * so that the new mapping gets accounted for in
11369 * the task's memory footprint.
11370 */
11371 assert(!new_entry->iokit_acct);
11372 new_entry->use_pmap = TRUE;
11373 }
11374
11375 /*
11376 * Attempt non-blocking copy-on-write optimizations.
11377 */
11378
11379 if (src_destroy &&
11380 (src_object == VM_OBJECT_NULL ||
11381 (src_object->internal &&
11382 src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11383 src_entry->vme_start <= src_addr &&
11384 src_entry->vme_end >= src_end &&
11385 !map_share))) {
11386 /*
11387 * If we are destroying the source, and the object
11388 * is internal, we can move the object reference
11389 * from the source to the copy. The copy is
11390 * copy-on-write only if the source is.
11391 * We make another reference to the object, because
11392 * destroying the source entry will deallocate it.
11393 *
11394 * This memory transfer has to be atomic (to prevent
11395 * the VM object from being shared or copied while
11396 * it's being moved here), so we can only do this
11397 * if we won't have to unlock the VM map, i.e. the
11398 * entire range must be covered by this map entry.
11399 */
11400 vm_object_reference(src_object);
11401
11402 /*
11403 * Copy is always unwired. vm_map_copy_entry
11404 * set its wired count to zero.
11405 */
11406
11407 goto CopySuccessful;
11408 }
11409
11410
11411 RestartCopy:
11412 if ((src_object == VM_OBJECT_NULL ||
11413 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11414 vm_object_copy_quickly(
11415 VME_OBJECT_PTR(new_entry),
11416 src_offset,
11417 src_size,
11418 &src_needs_copy,
11419 &new_entry_needs_copy)) {
11420 new_entry->needs_copy = new_entry_needs_copy;
11421
11422 /*
11423 * Handle copy-on-write obligations
11424 */
11425
11426 if (src_needs_copy && !tmp_entry->needs_copy) {
11427 vm_prot_t prot;
11428
11429 prot = src_entry->protection & ~VM_PROT_WRITE;
11430
11431 if (override_nx(src_map, VME_ALIAS(src_entry))
11432 && prot) {
11433 prot |= VM_PROT_EXECUTE;
11434 }
11435
11436 vm_object_pmap_protect(
11437 src_object,
11438 src_offset,
11439 src_size,
11440 (src_entry->is_shared ?
11441 PMAP_NULL
11442 : src_map->pmap),
11443 src_entry->vme_start,
11444 prot);
11445
11446 assert(tmp_entry->wired_count == 0);
11447 tmp_entry->needs_copy = TRUE;
11448 }
11449
11450 /*
11451 * The map has never been unlocked, so it's safe
11452 * to move to the next entry rather than doing
11453 * another lookup.
11454 */
11455
11456 goto CopySuccessful;
11457 }
11458
11459 entry_was_shared = tmp_entry->is_shared;
11460
11461 /*
11462 * Take an object reference, so that we may
11463 * release the map lock(s).
11464 */
11465
11466 assert(src_object != VM_OBJECT_NULL);
11467 vm_object_reference(src_object);
11468
11469 /*
11470 * Record the timestamp for later verification.
11471 * Unlock the map.
11472 */
11473
11474 version.main_timestamp = src_map->timestamp;
11475 vm_map_unlock(src_map); /* Increments timestamp once! */
11476 saved_src_entry = src_entry;
11477 tmp_entry = VM_MAP_ENTRY_NULL;
11478 src_entry = VM_MAP_ENTRY_NULL;
11479
11480 /*
11481 * Perform the copy
11482 */
11483
11484 if (was_wired) {
11485 CopySlowly:
11486 vm_object_lock(src_object);
11487 result = vm_object_copy_slowly(
11488 src_object,
11489 src_offset,
11490 src_size,
11491 THREAD_UNINT,
11492 VME_OBJECT_PTR(new_entry));
11493 VME_OFFSET_SET(new_entry, 0);
11494 new_entry->needs_copy = FALSE;
11495 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11496 (entry_was_shared || map_share)) {
11497 vm_object_t new_object;
11498
11499 vm_object_lock_shared(src_object);
11500 new_object = vm_object_copy_delayed(
11501 src_object,
11502 src_offset,
11503 src_size,
11504 TRUE);
11505 if (new_object == VM_OBJECT_NULL) {
11506 goto CopySlowly;
11507 }
11508
11509 VME_OBJECT_SET(new_entry, new_object);
11510 assert(new_entry->wired_count == 0);
11511 new_entry->needs_copy = TRUE;
11512 assert(!new_entry->iokit_acct);
11513 assert(new_object->purgable == VM_PURGABLE_DENY);
11514 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11515 result = KERN_SUCCESS;
11516 } else {
11517 vm_object_offset_t new_offset;
11518 new_offset = VME_OFFSET(new_entry);
11519 result = vm_object_copy_strategically(src_object,
11520 src_offset,
11521 src_size,
11522 VME_OBJECT_PTR(new_entry),
11523 &new_offset,
11524 &new_entry_needs_copy);
11525 if (new_offset != VME_OFFSET(new_entry)) {
11526 VME_OFFSET_SET(new_entry, new_offset);
11527 }
11528
11529 new_entry->needs_copy = new_entry_needs_copy;
11530 }
11531
11532 if (result == KERN_SUCCESS &&
11533 preserve_purgeable &&
11534 src_object->purgable != VM_PURGABLE_DENY) {
11535 vm_object_t new_object;
11536
11537 new_object = VME_OBJECT(new_entry);
11538 assert(new_object != src_object);
11539 vm_object_lock(new_object);
11540 assert(new_object->ref_count == 1);
11541 assert(new_object->shadow == VM_OBJECT_NULL);
11542 assert(new_object->copy == VM_OBJECT_NULL);
11543 assert(new_object->vo_owner == NULL);
11544
11545 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11546 new_object->true_share = TRUE;
11547 /* start as non-volatile with no owner... */
11548 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11549 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11550 /* ... and move to src_object's purgeable state */
11551 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11552 int state;
11553 state = src_object->purgable;
11554 vm_object_purgable_control(
11555 new_object,
11556 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11557 &state);
11558 }
11559 vm_object_unlock(new_object);
11560 new_object = VM_OBJECT_NULL;
11561 /* no pmap accounting for purgeable objects */
11562 new_entry->use_pmap = FALSE;
11563 }
11564
11565 if (result != KERN_SUCCESS &&
11566 result != KERN_MEMORY_RESTART_COPY) {
11567 vm_map_lock(src_map);
11568 RETURN(result);
11569 }
11570
11571 /*
11572 * Throw away the extra reference
11573 */
11574
11575 vm_object_deallocate(src_object);
11576
11577 /*
11578 * Verify that the map has not substantially
11579 * changed while the copy was being made.
11580 */
11581
11582 vm_map_lock(src_map);
11583
11584 if ((version.main_timestamp + 1) == src_map->timestamp) {
11585 /* src_map hasn't changed: src_entry is still valid */
11586 src_entry = saved_src_entry;
11587 goto VerificationSuccessful;
11588 }
11589
11590 /*
11591 * Simple version comparison failed.
11592 *
11593 * Retry the lookup and verify that the
11594 * same object/offset are still present.
11595 *
11596 * [Note: a memory manager that colludes with
11597 * the calling task can detect that we have
11598 * cheated. While the map was unlocked, the
11599 * mapping could have been changed and restored.]
11600 */
11601
11602 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11603 if (result != KERN_MEMORY_RESTART_COPY) {
11604 vm_object_deallocate(VME_OBJECT(new_entry));
11605 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11606 /* reset accounting state */
11607 new_entry->iokit_acct = FALSE;
11608 new_entry->use_pmap = TRUE;
11609 }
11610 RETURN(KERN_INVALID_ADDRESS);
11611 }
11612
11613 src_entry = tmp_entry;
11614 vm_map_clip_start(src_map, src_entry, src_start);
11615
11616 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11617 !use_maxprot) ||
11618 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11619 goto VerificationFailed;
11620 }
11621
11622 if (src_entry->vme_end < new_entry->vme_end) {
11623 /*
11624 * This entry might have been shortened
11625 * (vm_map_clip_end) or been replaced with
11626 * an entry that ends closer to "src_start"
11627 * than before.
11628 * Adjust "new_entry" accordingly; copying
11629 * less memory would be correct but we also
11630 * redo the copy (see below) if the new entry
11631 * no longer points at the same object/offset.
11632 */
11633 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11634 VM_MAP_COPY_PAGE_MASK(copy)));
11635 new_entry->vme_end = src_entry->vme_end;
11636 src_size = new_entry->vme_end - src_start;
11637 } else if (src_entry->vme_end > new_entry->vme_end) {
11638 /*
11639 * This entry might have been extended
11640 * (vm_map_entry_simplify() or coalesce)
11641 * or been replaced with an entry that ends farther
11642 * from "src_start" than before.
11643 *
11644 * We've called vm_object_copy_*() only on
11645 * the previous <start:end> range, so we can't
11646 * just extend new_entry. We have to re-do
11647 * the copy based on the new entry as if it was
11648 * pointing at a different object/offset (see
11649 * "Verification failed" below).
11650 */
11651 }
11652
11653 if ((VME_OBJECT(src_entry) != src_object) ||
11654 (VME_OFFSET(src_entry) != src_offset) ||
11655 (src_entry->vme_end > new_entry->vme_end)) {
11656 /*
11657 * Verification failed.
11658 *
11659 * Start over with this top-level entry.
11660 */
11661
11662 VerificationFailed: ;
11663
11664 vm_object_deallocate(VME_OBJECT(new_entry));
11665 tmp_entry = src_entry;
11666 continue;
11667 }
11668
11669 /*
11670 * Verification succeeded.
11671 */
11672
11673 VerificationSuccessful:;
11674
11675 if (result == KERN_MEMORY_RESTART_COPY) {
11676 goto RestartCopy;
11677 }
11678
11679 /*
11680 * Copy succeeded.
11681 */
11682
11683 CopySuccessful: ;
11684
11685 /*
11686 * Link in the new copy entry.
11687 */
11688
11689 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11690 new_entry);
11691
11692 /*
11693 * Determine whether the entire region
11694 * has been copied.
11695 */
11696 src_base = src_start;
11697 src_start = new_entry->vme_end;
11698 new_entry = VM_MAP_ENTRY_NULL;
11699 while ((src_start >= src_end) && (src_end != 0)) {
11700 submap_map_t *ptr;
11701
11702 if (src_map == base_map) {
11703 /* back to the top */
11704 break;
11705 }
11706
11707 ptr = parent_maps;
11708 assert(ptr != NULL);
11709 parent_maps = parent_maps->next;
11710
11711 /* fix up the damage we did in that submap */
11712 vm_map_simplify_range(src_map,
11713 src_base,
11714 src_end);
11715
11716 vm_map_unlock(src_map);
11717 vm_map_deallocate(src_map);
11718 vm_map_lock(ptr->parent_map);
11719 src_map = ptr->parent_map;
11720 src_base = ptr->base_start;
11721 src_start = ptr->base_start + ptr->base_len;
11722 src_end = ptr->base_end;
11723 if (!vm_map_lookup_entry(src_map,
11724 src_start,
11725 &tmp_entry) &&
11726 (src_end > src_start)) {
11727 RETURN(KERN_INVALID_ADDRESS);
11728 }
11729 kfree(ptr, sizeof(submap_map_t));
11730 if (parent_maps == NULL) {
11731 map_share = FALSE;
11732 }
11733 src_entry = tmp_entry->vme_prev;
11734 }
11735
11736 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11737 (src_start >= src_addr + len) &&
11738 (src_addr + len != 0)) {
11739 /*
11740 * Stop copying now, even though we haven't reached
11741 * "src_end". We'll adjust the end of the last copy
11742 * entry at the end, if needed.
11743 *
11744 * If src_map's aligment is different from the
11745 * system's page-alignment, there could be
11746 * extra non-map-aligned map entries between
11747 * the original (non-rounded) "src_addr + len"
11748 * and the rounded "src_end".
11749 * We do not want to copy those map entries since
11750 * they're not part of the copied range.
11751 */
11752 break;
11753 }
11754
11755 if ((src_start >= src_end) && (src_end != 0)) {
11756 break;
11757 }
11758
11759 /*
11760 * Verify that there are no gaps in the region
11761 */
11762
11763 tmp_entry = src_entry->vme_next;
11764 if ((tmp_entry->vme_start != src_start) ||
11765 (tmp_entry == vm_map_to_entry(src_map))) {
11766 RETURN(KERN_INVALID_ADDRESS);
11767 }
11768 }
11769
11770 /*
11771 * If the source should be destroyed, do it now, since the
11772 * copy was successful.
11773 */
11774 if (src_destroy) {
11775 (void) vm_map_delete(
11776 src_map,
11777 vm_map_trunc_page(src_addr,
11778 VM_MAP_PAGE_MASK(src_map)),
11779 src_end,
11780 ((src_map == kernel_map) ?
11781 VM_MAP_REMOVE_KUNWIRE :
11782 VM_MAP_REMOVE_NO_FLAGS),
11783 VM_MAP_NULL);
11784 } else {
11785 /* fix up the damage we did in the base map */
11786 vm_map_simplify_range(
11787 src_map,
11788 vm_map_trunc_page(src_addr,
11789 VM_MAP_PAGE_MASK(src_map)),
11790 vm_map_round_page(src_end,
11791 VM_MAP_PAGE_MASK(src_map)));
11792 }
11793
11794 vm_map_unlock(src_map);
11795 tmp_entry = VM_MAP_ENTRY_NULL;
11796
11797 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11798 vm_map_offset_t original_start, original_offset, original_end;
11799
11800 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11801
11802 /* adjust alignment of first copy_entry's "vme_start" */
11803 tmp_entry = vm_map_copy_first_entry(copy);
11804 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11805 vm_map_offset_t adjustment;
11806
11807 original_start = tmp_entry->vme_start;
11808 original_offset = VME_OFFSET(tmp_entry);
11809
11810 /* map-align the start of the first copy entry... */
11811 adjustment = (tmp_entry->vme_start -
11812 vm_map_trunc_page(
11813 tmp_entry->vme_start,
11814 VM_MAP_PAGE_MASK(src_map)));
11815 tmp_entry->vme_start -= adjustment;
11816 VME_OFFSET_SET(tmp_entry,
11817 VME_OFFSET(tmp_entry) - adjustment);
11818 copy_addr -= adjustment;
11819 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11820 /* ... adjust for mis-aligned start of copy range */
11821 adjustment =
11822 (vm_map_trunc_page(copy->offset,
11823 PAGE_MASK) -
11824 vm_map_trunc_page(copy->offset,
11825 VM_MAP_PAGE_MASK(src_map)));
11826 if (adjustment) {
11827 assert(page_aligned(adjustment));
11828 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11829 tmp_entry->vme_start += adjustment;
11830 VME_OFFSET_SET(tmp_entry,
11831 (VME_OFFSET(tmp_entry) +
11832 adjustment));
11833 copy_addr += adjustment;
11834 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11835 }
11836
11837 /*
11838 * Assert that the adjustments haven't exposed
11839 * more than was originally copied...
11840 */
11841 assert(tmp_entry->vme_start >= original_start);
11842 assert(VME_OFFSET(tmp_entry) >= original_offset);
11843 /*
11844 * ... and that it did not adjust outside of a
11845 * a single 16K page.
11846 */
11847 assert(vm_map_trunc_page(tmp_entry->vme_start,
11848 VM_MAP_PAGE_MASK(src_map)) ==
11849 vm_map_trunc_page(original_start,
11850 VM_MAP_PAGE_MASK(src_map)));
11851 }
11852
11853 /* adjust alignment of last copy_entry's "vme_end" */
11854 tmp_entry = vm_map_copy_last_entry(copy);
11855 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11856 vm_map_offset_t adjustment;
11857
11858 original_end = tmp_entry->vme_end;
11859
11860 /* map-align the end of the last copy entry... */
11861 tmp_entry->vme_end =
11862 vm_map_round_page(tmp_entry->vme_end,
11863 VM_MAP_PAGE_MASK(src_map));
11864 /* ... adjust for mis-aligned end of copy range */
11865 adjustment =
11866 (vm_map_round_page((copy->offset +
11867 copy->size),
11868 VM_MAP_PAGE_MASK(src_map)) -
11869 vm_map_round_page((copy->offset +
11870 copy->size),
11871 PAGE_MASK));
11872 if (adjustment) {
11873 assert(page_aligned(adjustment));
11874 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11875 tmp_entry->vme_end -= adjustment;
11876 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11877 }
11878
11879 /*
11880 * Assert that the adjustments haven't exposed
11881 * more than was originally copied...
11882 */
11883 assert(tmp_entry->vme_end <= original_end);
11884 /*
11885 * ... and that it did not adjust outside of a
11886 * a single 16K page.
11887 */
11888 assert(vm_map_round_page(tmp_entry->vme_end,
11889 VM_MAP_PAGE_MASK(src_map)) ==
11890 vm_map_round_page(original_end,
11891 VM_MAP_PAGE_MASK(src_map)));
11892 }
11893 }
11894
11895 /* Fix-up start and end points in copy. This is necessary */
11896 /* when the various entries in the copy object were picked */
11897 /* up from different sub-maps */
11898
11899 tmp_entry = vm_map_copy_first_entry(copy);
11900 copy_size = 0; /* compute actual size */
11901 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11902 assert(VM_MAP_PAGE_ALIGNED(
11903 copy_addr + (tmp_entry->vme_end -
11904 tmp_entry->vme_start),
11905 VM_MAP_COPY_PAGE_MASK(copy)));
11906 assert(VM_MAP_PAGE_ALIGNED(
11907 copy_addr,
11908 VM_MAP_COPY_PAGE_MASK(copy)));
11909
11910 /*
11911 * The copy_entries will be injected directly into the
11912 * destination map and might not be "map aligned" there...
11913 */
11914 tmp_entry->map_aligned = FALSE;
11915
11916 tmp_entry->vme_end = copy_addr +
11917 (tmp_entry->vme_end - tmp_entry->vme_start);
11918 tmp_entry->vme_start = copy_addr;
11919 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11920 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11921 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11922 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11923 }
11924
11925 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11926 copy_size < copy->size) {
11927 /*
11928 * The actual size of the VM map copy is smaller than what
11929 * was requested by the caller. This must be because some
11930 * PAGE_SIZE-sized pages are missing at the end of the last
11931 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11932 * The caller might not have been aware of those missing
11933 * pages and might not want to be aware of it, which is
11934 * fine as long as they don't try to access (and crash on)
11935 * those missing pages.
11936 * Let's adjust the size of the "copy", to avoid failing
11937 * in vm_map_copyout() or vm_map_copy_overwrite().
11938 */
11939 assert(vm_map_round_page(copy_size,
11940 VM_MAP_PAGE_MASK(src_map)) ==
11941 vm_map_round_page(copy->size,
11942 VM_MAP_PAGE_MASK(src_map)));
11943 copy->size = copy_size;
11944 }
11945
11946 *copy_result = copy;
11947 return KERN_SUCCESS;
11948
11949 #undef RETURN
11950 }
11951
11952 kern_return_t
11953 vm_map_copy_extract(
11954 vm_map_t src_map,
11955 vm_map_address_t src_addr,
11956 vm_map_size_t len,
11957 vm_map_copy_t *copy_result, /* OUT */
11958 vm_prot_t *cur_prot, /* OUT */
11959 vm_prot_t *max_prot)
11960 {
11961 vm_map_offset_t src_start, src_end;
11962 vm_map_copy_t copy;
11963 kern_return_t kr;
11964
11965 /*
11966 * Check for copies of zero bytes.
11967 */
11968
11969 if (len == 0) {
11970 *copy_result = VM_MAP_COPY_NULL;
11971 return KERN_SUCCESS;
11972 }
11973
11974 /*
11975 * Check that the end address doesn't overflow
11976 */
11977 src_end = src_addr + len;
11978 if (src_end < src_addr) {
11979 return KERN_INVALID_ADDRESS;
11980 }
11981
11982 /*
11983 * Compute (page aligned) start and end of region
11984 */
11985 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11986 src_end = vm_map_round_page(src_end, PAGE_MASK);
11987
11988 /*
11989 * Allocate a header element for the list.
11990 *
11991 * Use the start and end in the header to
11992 * remember the endpoints prior to rounding.
11993 */
11994
11995 copy = vm_map_copy_allocate();
11996 copy->type = VM_MAP_COPY_ENTRY_LIST;
11997 copy->cpy_hdr.entries_pageable = TRUE;
11998
11999 vm_map_store_init(&copy->cpy_hdr);
12000
12001 copy->offset = 0;
12002 copy->size = len;
12003
12004 kr = vm_map_remap_extract(src_map,
12005 src_addr,
12006 len,
12007 FALSE, /* copy */
12008 &copy->cpy_hdr,
12009 cur_prot,
12010 max_prot,
12011 VM_INHERIT_SHARE,
12012 TRUE, /* pageable */
12013 FALSE, /* same_map */
12014 VM_MAP_KERNEL_FLAGS_NONE);
12015 if (kr != KERN_SUCCESS) {
12016 vm_map_copy_discard(copy);
12017 return kr;
12018 }
12019
12020 *copy_result = copy;
12021 return KERN_SUCCESS;
12022 }
12023
12024 /*
12025 * vm_map_copyin_object:
12026 *
12027 * Create a copy object from an object.
12028 * Our caller donates an object reference.
12029 */
12030
12031 kern_return_t
12032 vm_map_copyin_object(
12033 vm_object_t object,
12034 vm_object_offset_t offset, /* offset of region in object */
12035 vm_object_size_t size, /* size of region in object */
12036 vm_map_copy_t *copy_result) /* OUT */
12037 {
12038 vm_map_copy_t copy; /* Resulting copy */
12039
12040 /*
12041 * We drop the object into a special copy object
12042 * that contains the object directly.
12043 */
12044
12045 copy = vm_map_copy_allocate();
12046 copy->type = VM_MAP_COPY_OBJECT;
12047 copy->cpy_object = object;
12048 copy->offset = offset;
12049 copy->size = size;
12050
12051 *copy_result = copy;
12052 return KERN_SUCCESS;
12053 }
12054
12055 static void
12056 vm_map_fork_share(
12057 vm_map_t old_map,
12058 vm_map_entry_t old_entry,
12059 vm_map_t new_map)
12060 {
12061 vm_object_t object;
12062 vm_map_entry_t new_entry;
12063
12064 /*
12065 * New sharing code. New map entry
12066 * references original object. Internal
12067 * objects use asynchronous copy algorithm for
12068 * future copies. First make sure we have
12069 * the right object. If we need a shadow,
12070 * or someone else already has one, then
12071 * make a new shadow and share it.
12072 */
12073
12074 object = VME_OBJECT(old_entry);
12075 if (old_entry->is_sub_map) {
12076 assert(old_entry->wired_count == 0);
12077 #ifndef NO_NESTED_PMAP
12078 if (old_entry->use_pmap) {
12079 kern_return_t result;
12080
12081 result = pmap_nest(new_map->pmap,
12082 (VME_SUBMAP(old_entry))->pmap,
12083 (addr64_t)old_entry->vme_start,
12084 (addr64_t)old_entry->vme_start,
12085 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12086 if (result) {
12087 panic("vm_map_fork_share: pmap_nest failed!");
12088 }
12089 }
12090 #endif /* NO_NESTED_PMAP */
12091 } else if (object == VM_OBJECT_NULL) {
12092 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12093 old_entry->vme_start));
12094 VME_OFFSET_SET(old_entry, 0);
12095 VME_OBJECT_SET(old_entry, object);
12096 old_entry->use_pmap = TRUE;
12097 // assert(!old_entry->needs_copy);
12098 } else if (object->copy_strategy !=
12099 MEMORY_OBJECT_COPY_SYMMETRIC) {
12100 /*
12101 * We are already using an asymmetric
12102 * copy, and therefore we already have
12103 * the right object.
12104 */
12105
12106 assert(!old_entry->needs_copy);
12107 } else if (old_entry->needs_copy || /* case 1 */
12108 object->shadowed || /* case 2 */
12109 (!object->true_share && /* case 3 */
12110 !old_entry->is_shared &&
12111 (object->vo_size >
12112 (vm_map_size_t)(old_entry->vme_end -
12113 old_entry->vme_start)))) {
12114 /*
12115 * We need to create a shadow.
12116 * There are three cases here.
12117 * In the first case, we need to
12118 * complete a deferred symmetrical
12119 * copy that we participated in.
12120 * In the second and third cases,
12121 * we need to create the shadow so
12122 * that changes that we make to the
12123 * object do not interfere with
12124 * any symmetrical copies which
12125 * have occured (case 2) or which
12126 * might occur (case 3).
12127 *
12128 * The first case is when we had
12129 * deferred shadow object creation
12130 * via the entry->needs_copy mechanism.
12131 * This mechanism only works when
12132 * only one entry points to the source
12133 * object, and we are about to create
12134 * a second entry pointing to the
12135 * same object. The problem is that
12136 * there is no way of mapping from
12137 * an object to the entries pointing
12138 * to it. (Deferred shadow creation
12139 * works with one entry because occurs
12140 * at fault time, and we walk from the
12141 * entry to the object when handling
12142 * the fault.)
12143 *
12144 * The second case is when the object
12145 * to be shared has already been copied
12146 * with a symmetric copy, but we point
12147 * directly to the object without
12148 * needs_copy set in our entry. (This
12149 * can happen because different ranges
12150 * of an object can be pointed to by
12151 * different entries. In particular,
12152 * a single entry pointing to an object
12153 * can be split by a call to vm_inherit,
12154 * which, combined with task_create, can
12155 * result in the different entries
12156 * having different needs_copy values.)
12157 * The shadowed flag in the object allows
12158 * us to detect this case. The problem
12159 * with this case is that if this object
12160 * has or will have shadows, then we
12161 * must not perform an asymmetric copy
12162 * of this object, since such a copy
12163 * allows the object to be changed, which
12164 * will break the previous symmetrical
12165 * copies (which rely upon the object
12166 * not changing). In a sense, the shadowed
12167 * flag says "don't change this object".
12168 * We fix this by creating a shadow
12169 * object for this object, and sharing
12170 * that. This works because we are free
12171 * to change the shadow object (and thus
12172 * to use an asymmetric copy strategy);
12173 * this is also semantically correct,
12174 * since this object is temporary, and
12175 * therefore a copy of the object is
12176 * as good as the object itself. (This
12177 * is not true for permanent objects,
12178 * since the pager needs to see changes,
12179 * which won't happen if the changes
12180 * are made to a copy.)
12181 *
12182 * The third case is when the object
12183 * to be shared has parts sticking
12184 * outside of the entry we're working
12185 * with, and thus may in the future
12186 * be subject to a symmetrical copy.
12187 * (This is a preemptive version of
12188 * case 2.)
12189 */
12190 VME_OBJECT_SHADOW(old_entry,
12191 (vm_map_size_t) (old_entry->vme_end -
12192 old_entry->vme_start));
12193
12194 /*
12195 * If we're making a shadow for other than
12196 * copy on write reasons, then we have
12197 * to remove write permission.
12198 */
12199
12200 if (!old_entry->needs_copy &&
12201 (old_entry->protection & VM_PROT_WRITE)) {
12202 vm_prot_t prot;
12203
12204 assert(!pmap_has_prot_policy(old_entry->protection));
12205
12206 prot = old_entry->protection & ~VM_PROT_WRITE;
12207
12208 assert(!pmap_has_prot_policy(prot));
12209
12210 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12211 prot |= VM_PROT_EXECUTE;
12212 }
12213
12214
12215 if (old_map->mapped_in_other_pmaps) {
12216 vm_object_pmap_protect(
12217 VME_OBJECT(old_entry),
12218 VME_OFFSET(old_entry),
12219 (old_entry->vme_end -
12220 old_entry->vme_start),
12221 PMAP_NULL,
12222 old_entry->vme_start,
12223 prot);
12224 } else {
12225 pmap_protect(old_map->pmap,
12226 old_entry->vme_start,
12227 old_entry->vme_end,
12228 prot);
12229 }
12230 }
12231
12232 old_entry->needs_copy = FALSE;
12233 object = VME_OBJECT(old_entry);
12234 }
12235
12236
12237 /*
12238 * If object was using a symmetric copy strategy,
12239 * change its copy strategy to the default
12240 * asymmetric copy strategy, which is copy_delay
12241 * in the non-norma case and copy_call in the
12242 * norma case. Bump the reference count for the
12243 * new entry.
12244 */
12245
12246 if (old_entry->is_sub_map) {
12247 vm_map_lock(VME_SUBMAP(old_entry));
12248 vm_map_reference(VME_SUBMAP(old_entry));
12249 vm_map_unlock(VME_SUBMAP(old_entry));
12250 } else {
12251 vm_object_lock(object);
12252 vm_object_reference_locked(object);
12253 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12254 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12255 }
12256 vm_object_unlock(object);
12257 }
12258
12259 /*
12260 * Clone the entry, using object ref from above.
12261 * Mark both entries as shared.
12262 */
12263
12264 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12265 * map or descendants */
12266 vm_map_entry_copy(new_entry, old_entry);
12267 old_entry->is_shared = TRUE;
12268 new_entry->is_shared = TRUE;
12269
12270 /*
12271 * We're dealing with a shared mapping, so the resulting mapping
12272 * should inherit some of the original mapping's accounting settings.
12273 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12274 * "use_pmap" should stay the same as before (if it hasn't been reset
12275 * to TRUE when we cleared "iokit_acct").
12276 */
12277 assert(!new_entry->iokit_acct);
12278
12279 /*
12280 * If old entry's inheritence is VM_INHERIT_NONE,
12281 * the new entry is for corpse fork, remove the
12282 * write permission from the new entry.
12283 */
12284 if (old_entry->inheritance == VM_INHERIT_NONE) {
12285 new_entry->protection &= ~VM_PROT_WRITE;
12286 new_entry->max_protection &= ~VM_PROT_WRITE;
12287 }
12288
12289 /*
12290 * Insert the entry into the new map -- we
12291 * know we're inserting at the end of the new
12292 * map.
12293 */
12294
12295 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12296 VM_MAP_KERNEL_FLAGS_NONE);
12297
12298 /*
12299 * Update the physical map
12300 */
12301
12302 if (old_entry->is_sub_map) {
12303 /* Bill Angell pmap support goes here */
12304 } else {
12305 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12306 old_entry->vme_end - old_entry->vme_start,
12307 old_entry->vme_start);
12308 }
12309 }
12310
12311 static boolean_t
12312 vm_map_fork_copy(
12313 vm_map_t old_map,
12314 vm_map_entry_t *old_entry_p,
12315 vm_map_t new_map,
12316 int vm_map_copyin_flags)
12317 {
12318 vm_map_entry_t old_entry = *old_entry_p;
12319 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12320 vm_map_offset_t start = old_entry->vme_start;
12321 vm_map_copy_t copy;
12322 vm_map_entry_t last = vm_map_last_entry(new_map);
12323
12324 vm_map_unlock(old_map);
12325 /*
12326 * Use maxprot version of copyin because we
12327 * care about whether this memory can ever
12328 * be accessed, not just whether it's accessible
12329 * right now.
12330 */
12331 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12332 if (vm_map_copyin_internal(old_map, start, entry_size,
12333 vm_map_copyin_flags, &copy)
12334 != KERN_SUCCESS) {
12335 /*
12336 * The map might have changed while it
12337 * was unlocked, check it again. Skip
12338 * any blank space or permanently
12339 * unreadable region.
12340 */
12341 vm_map_lock(old_map);
12342 if (!vm_map_lookup_entry(old_map, start, &last) ||
12343 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12344 last = last->vme_next;
12345 }
12346 *old_entry_p = last;
12347
12348 /*
12349 * XXX For some error returns, want to
12350 * XXX skip to the next element. Note
12351 * that INVALID_ADDRESS and
12352 * PROTECTION_FAILURE are handled above.
12353 */
12354
12355 return FALSE;
12356 }
12357
12358 /*
12359 * Insert the copy into the new map
12360 */
12361
12362 vm_map_copy_insert(new_map, last, copy);
12363
12364 /*
12365 * Pick up the traversal at the end of
12366 * the copied region.
12367 */
12368
12369 vm_map_lock(old_map);
12370 start += entry_size;
12371 if (!vm_map_lookup_entry(old_map, start, &last)) {
12372 last = last->vme_next;
12373 } else {
12374 if (last->vme_start == start) {
12375 /*
12376 * No need to clip here and we don't
12377 * want to cause any unnecessary
12378 * unnesting...
12379 */
12380 } else {
12381 vm_map_clip_start(old_map, last, start);
12382 }
12383 }
12384 *old_entry_p = last;
12385
12386 return TRUE;
12387 }
12388
12389 /*
12390 * vm_map_fork:
12391 *
12392 * Create and return a new map based on the old
12393 * map, according to the inheritance values on the
12394 * regions in that map and the options.
12395 *
12396 * The source map must not be locked.
12397 */
12398 vm_map_t
12399 vm_map_fork(
12400 ledger_t ledger,
12401 vm_map_t old_map,
12402 int options)
12403 {
12404 pmap_t new_pmap;
12405 vm_map_t new_map;
12406 vm_map_entry_t old_entry;
12407 vm_map_size_t new_size = 0, entry_size;
12408 vm_map_entry_t new_entry;
12409 boolean_t src_needs_copy;
12410 boolean_t new_entry_needs_copy;
12411 boolean_t pmap_is64bit;
12412 int vm_map_copyin_flags;
12413 vm_inherit_t old_entry_inheritance;
12414 int map_create_options;
12415 kern_return_t footprint_collect_kr;
12416
12417 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12418 VM_MAP_FORK_PRESERVE_PURGEABLE |
12419 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12420 /* unsupported option */
12421 return VM_MAP_NULL;
12422 }
12423
12424 pmap_is64bit =
12425 #if defined(__i386__) || defined(__x86_64__)
12426 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12427 #elif defined(__arm64__)
12428 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12429 #elif defined(__arm__)
12430 FALSE;
12431 #else
12432 #error Unknown architecture.
12433 #endif
12434
12435 unsigned int pmap_flags = 0;
12436 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12437 #if defined(HAS_APPLE_PAC)
12438 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12439 #endif
12440 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12441
12442 vm_map_reference_swap(old_map);
12443 vm_map_lock(old_map);
12444
12445 map_create_options = 0;
12446 if (old_map->hdr.entries_pageable) {
12447 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12448 }
12449 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12450 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12451 footprint_collect_kr = KERN_SUCCESS;
12452 }
12453 new_map = vm_map_create_options(new_pmap,
12454 old_map->min_offset,
12455 old_map->max_offset,
12456 map_create_options);
12457 vm_map_lock(new_map);
12458 vm_commit_pagezero_status(new_map);
12459 /* inherit the parent map's page size */
12460 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12461 for (
12462 old_entry = vm_map_first_entry(old_map);
12463 old_entry != vm_map_to_entry(old_map);
12464 ) {
12465 entry_size = old_entry->vme_end - old_entry->vme_start;
12466
12467 old_entry_inheritance = old_entry->inheritance;
12468 /*
12469 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12470 * share VM_INHERIT_NONE entries that are not backed by a
12471 * device pager.
12472 */
12473 if (old_entry_inheritance == VM_INHERIT_NONE &&
12474 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12475 !(!old_entry->is_sub_map &&
12476 VME_OBJECT(old_entry) != NULL &&
12477 VME_OBJECT(old_entry)->pager != NULL &&
12478 is_device_pager_ops(
12479 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12480 old_entry_inheritance = VM_INHERIT_SHARE;
12481 }
12482
12483 if (old_entry_inheritance != VM_INHERIT_NONE &&
12484 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12485 footprint_collect_kr == KERN_SUCCESS) {
12486 /*
12487 * The corpse won't have old_map->pmap to query
12488 * footprint information, so collect that data now
12489 * and store it in new_map->vmmap_corpse_footprint
12490 * for later autopsy.
12491 */
12492 footprint_collect_kr =
12493 vm_map_corpse_footprint_collect(old_map,
12494 old_entry,
12495 new_map);
12496 }
12497
12498 switch (old_entry_inheritance) {
12499 case VM_INHERIT_NONE:
12500 break;
12501
12502 case VM_INHERIT_SHARE:
12503 vm_map_fork_share(old_map, old_entry, new_map);
12504 new_size += entry_size;
12505 break;
12506
12507 case VM_INHERIT_COPY:
12508
12509 /*
12510 * Inline the copy_quickly case;
12511 * upon failure, fall back on call
12512 * to vm_map_fork_copy.
12513 */
12514
12515 if (old_entry->is_sub_map) {
12516 break;
12517 }
12518 if ((old_entry->wired_count != 0) ||
12519 ((VME_OBJECT(old_entry) != NULL) &&
12520 (VME_OBJECT(old_entry)->true_share))) {
12521 goto slow_vm_map_fork_copy;
12522 }
12523
12524 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12525 vm_map_entry_copy(new_entry, old_entry);
12526 if (new_entry->is_sub_map) {
12527 /* clear address space specifics */
12528 new_entry->use_pmap = FALSE;
12529 } else {
12530 /*
12531 * We're dealing with a copy-on-write operation,
12532 * so the resulting mapping should not inherit
12533 * the original mapping's accounting settings.
12534 * "iokit_acct" should have been cleared in
12535 * vm_map_entry_copy().
12536 * "use_pmap" should be reset to its default
12537 * (TRUE) so that the new mapping gets
12538 * accounted for in the task's memory footprint.
12539 */
12540 assert(!new_entry->iokit_acct);
12541 new_entry->use_pmap = TRUE;
12542 }
12543
12544 if (!vm_object_copy_quickly(
12545 VME_OBJECT_PTR(new_entry),
12546 VME_OFFSET(old_entry),
12547 (old_entry->vme_end -
12548 old_entry->vme_start),
12549 &src_needs_copy,
12550 &new_entry_needs_copy)) {
12551 vm_map_entry_dispose(new_map, new_entry);
12552 goto slow_vm_map_fork_copy;
12553 }
12554
12555 /*
12556 * Handle copy-on-write obligations
12557 */
12558
12559 if (src_needs_copy && !old_entry->needs_copy) {
12560 vm_prot_t prot;
12561
12562 assert(!pmap_has_prot_policy(old_entry->protection));
12563
12564 prot = old_entry->protection & ~VM_PROT_WRITE;
12565
12566 if (override_nx(old_map, VME_ALIAS(old_entry))
12567 && prot) {
12568 prot |= VM_PROT_EXECUTE;
12569 }
12570
12571 assert(!pmap_has_prot_policy(prot));
12572
12573 vm_object_pmap_protect(
12574 VME_OBJECT(old_entry),
12575 VME_OFFSET(old_entry),
12576 (old_entry->vme_end -
12577 old_entry->vme_start),
12578 ((old_entry->is_shared
12579 || old_map->mapped_in_other_pmaps)
12580 ? PMAP_NULL :
12581 old_map->pmap),
12582 old_entry->vme_start,
12583 prot);
12584
12585 assert(old_entry->wired_count == 0);
12586 old_entry->needs_copy = TRUE;
12587 }
12588 new_entry->needs_copy = new_entry_needs_copy;
12589
12590 /*
12591 * Insert the entry at the end
12592 * of the map.
12593 */
12594
12595 vm_map_store_entry_link(new_map,
12596 vm_map_last_entry(new_map),
12597 new_entry,
12598 VM_MAP_KERNEL_FLAGS_NONE);
12599 new_size += entry_size;
12600 break;
12601
12602 slow_vm_map_fork_copy:
12603 vm_map_copyin_flags = 0;
12604 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12605 vm_map_copyin_flags |=
12606 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12607 }
12608 if (vm_map_fork_copy(old_map,
12609 &old_entry,
12610 new_map,
12611 vm_map_copyin_flags)) {
12612 new_size += entry_size;
12613 }
12614 continue;
12615 }
12616 old_entry = old_entry->vme_next;
12617 }
12618
12619 #if defined(__arm64__)
12620 pmap_insert_sharedpage(new_map->pmap);
12621 #endif
12622
12623 new_map->size = new_size;
12624
12625 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12626 vm_map_corpse_footprint_collect_done(new_map);
12627 }
12628
12629 vm_map_unlock(new_map);
12630 vm_map_unlock(old_map);
12631 vm_map_deallocate(old_map);
12632
12633 return new_map;
12634 }
12635
12636 /*
12637 * vm_map_exec:
12638 *
12639 * Setup the "new_map" with the proper execution environment according
12640 * to the type of executable (platform, 64bit, chroot environment).
12641 * Map the comm page and shared region, etc...
12642 */
12643 kern_return_t
12644 vm_map_exec(
12645 vm_map_t new_map,
12646 task_t task,
12647 boolean_t is64bit,
12648 void *fsroot,
12649 cpu_type_t cpu,
12650 cpu_subtype_t cpu_subtype)
12651 {
12652 SHARED_REGION_TRACE_DEBUG(
12653 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12654 (void *)VM_KERNEL_ADDRPERM(current_task()),
12655 (void *)VM_KERNEL_ADDRPERM(new_map),
12656 (void *)VM_KERNEL_ADDRPERM(task),
12657 (void *)VM_KERNEL_ADDRPERM(fsroot),
12658 cpu,
12659 cpu_subtype));
12660 (void) vm_commpage_enter(new_map, task, is64bit);
12661 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12662 SHARED_REGION_TRACE_DEBUG(
12663 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12664 (void *)VM_KERNEL_ADDRPERM(current_task()),
12665 (void *)VM_KERNEL_ADDRPERM(new_map),
12666 (void *)VM_KERNEL_ADDRPERM(task),
12667 (void *)VM_KERNEL_ADDRPERM(fsroot),
12668 cpu,
12669 cpu_subtype));
12670 return KERN_SUCCESS;
12671 }
12672
12673 /*
12674 * vm_map_lookup_locked:
12675 *
12676 * Finds the VM object, offset, and
12677 * protection for a given virtual address in the
12678 * specified map, assuming a page fault of the
12679 * type specified.
12680 *
12681 * Returns the (object, offset, protection) for
12682 * this address, whether it is wired down, and whether
12683 * this map has the only reference to the data in question.
12684 * In order to later verify this lookup, a "version"
12685 * is returned.
12686 *
12687 * The map MUST be locked by the caller and WILL be
12688 * locked on exit. In order to guarantee the
12689 * existence of the returned object, it is returned
12690 * locked.
12691 *
12692 * If a lookup is requested with "write protection"
12693 * specified, the map may be changed to perform virtual
12694 * copying operations, although the data referenced will
12695 * remain the same.
12696 */
12697 kern_return_t
12698 vm_map_lookup_locked(
12699 vm_map_t *var_map, /* IN/OUT */
12700 vm_map_offset_t vaddr,
12701 vm_prot_t fault_type,
12702 int object_lock_type,
12703 vm_map_version_t *out_version, /* OUT */
12704 vm_object_t *object, /* OUT */
12705 vm_object_offset_t *offset, /* OUT */
12706 vm_prot_t *out_prot, /* OUT */
12707 boolean_t *wired, /* OUT */
12708 vm_object_fault_info_t fault_info, /* OUT */
12709 vm_map_t *real_map)
12710 {
12711 vm_map_entry_t entry;
12712 vm_map_t map = *var_map;
12713 vm_map_t old_map = *var_map;
12714 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12715 vm_map_offset_t cow_parent_vaddr = 0;
12716 vm_map_offset_t old_start = 0;
12717 vm_map_offset_t old_end = 0;
12718 vm_prot_t prot;
12719 boolean_t mask_protections;
12720 boolean_t force_copy;
12721 vm_prot_t original_fault_type;
12722
12723 /*
12724 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12725 * as a mask against the mapping's actual protections, not as an
12726 * absolute value.
12727 */
12728 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12729 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12730 fault_type &= VM_PROT_ALL;
12731 original_fault_type = fault_type;
12732
12733 *real_map = map;
12734
12735 RetryLookup:
12736 fault_type = original_fault_type;
12737
12738 /*
12739 * If the map has an interesting hint, try it before calling
12740 * full blown lookup routine.
12741 */
12742 entry = map->hint;
12743
12744 if ((entry == vm_map_to_entry(map)) ||
12745 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12746 vm_map_entry_t tmp_entry;
12747
12748 /*
12749 * Entry was either not a valid hint, or the vaddr
12750 * was not contained in the entry, so do a full lookup.
12751 */
12752 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12753 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12754 vm_map_unlock(cow_sub_map_parent);
12755 }
12756 if ((*real_map != map)
12757 && (*real_map != cow_sub_map_parent)) {
12758 vm_map_unlock(*real_map);
12759 }
12760 return KERN_INVALID_ADDRESS;
12761 }
12762
12763 entry = tmp_entry;
12764 }
12765 if (map == old_map) {
12766 old_start = entry->vme_start;
12767 old_end = entry->vme_end;
12768 }
12769
12770 /*
12771 * Handle submaps. Drop lock on upper map, submap is
12772 * returned locked.
12773 */
12774
12775 submap_recurse:
12776 if (entry->is_sub_map) {
12777 vm_map_offset_t local_vaddr;
12778 vm_map_offset_t end_delta;
12779 vm_map_offset_t start_delta;
12780 vm_map_entry_t submap_entry;
12781 vm_prot_t subentry_protection;
12782 vm_prot_t subentry_max_protection;
12783 boolean_t subentry_no_copy_on_read;
12784 boolean_t mapped_needs_copy = FALSE;
12785
12786 local_vaddr = vaddr;
12787
12788 if ((entry->use_pmap &&
12789 !((fault_type & VM_PROT_WRITE) ||
12790 force_copy))) {
12791 /* if real_map equals map we unlock below */
12792 if ((*real_map != map) &&
12793 (*real_map != cow_sub_map_parent)) {
12794 vm_map_unlock(*real_map);
12795 }
12796 *real_map = VME_SUBMAP(entry);
12797 }
12798
12799 if (entry->needs_copy &&
12800 ((fault_type & VM_PROT_WRITE) ||
12801 force_copy)) {
12802 if (!mapped_needs_copy) {
12803 if (vm_map_lock_read_to_write(map)) {
12804 vm_map_lock_read(map);
12805 *real_map = map;
12806 goto RetryLookup;
12807 }
12808 vm_map_lock_read(VME_SUBMAP(entry));
12809 *var_map = VME_SUBMAP(entry);
12810 cow_sub_map_parent = map;
12811 /* reset base to map before cow object */
12812 /* this is the map which will accept */
12813 /* the new cow object */
12814 old_start = entry->vme_start;
12815 old_end = entry->vme_end;
12816 cow_parent_vaddr = vaddr;
12817 mapped_needs_copy = TRUE;
12818 } else {
12819 vm_map_lock_read(VME_SUBMAP(entry));
12820 *var_map = VME_SUBMAP(entry);
12821 if ((cow_sub_map_parent != map) &&
12822 (*real_map != map)) {
12823 vm_map_unlock(map);
12824 }
12825 }
12826 } else {
12827 vm_map_lock_read(VME_SUBMAP(entry));
12828 *var_map = VME_SUBMAP(entry);
12829 /* leave map locked if it is a target */
12830 /* cow sub_map above otherwise, just */
12831 /* follow the maps down to the object */
12832 /* here we unlock knowing we are not */
12833 /* revisiting the map. */
12834 if ((*real_map != map) && (map != cow_sub_map_parent)) {
12835 vm_map_unlock_read(map);
12836 }
12837 }
12838
12839 map = *var_map;
12840
12841 /* calculate the offset in the submap for vaddr */
12842 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12843
12844 RetrySubMap:
12845 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12846 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12847 vm_map_unlock(cow_sub_map_parent);
12848 }
12849 if ((*real_map != map)
12850 && (*real_map != cow_sub_map_parent)) {
12851 vm_map_unlock(*real_map);
12852 }
12853 *real_map = map;
12854 return KERN_INVALID_ADDRESS;
12855 }
12856
12857 /* find the attenuated shadow of the underlying object */
12858 /* on our target map */
12859
12860 /* in english the submap object may extend beyond the */
12861 /* region mapped by the entry or, may only fill a portion */
12862 /* of it. For our purposes, we only care if the object */
12863 /* doesn't fill. In this case the area which will */
12864 /* ultimately be clipped in the top map will only need */
12865 /* to be as big as the portion of the underlying entry */
12866 /* which is mapped */
12867 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12868 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12869
12870 end_delta =
12871 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12872 submap_entry->vme_end ?
12873 0 : (VME_OFFSET(entry) +
12874 (old_end - old_start))
12875 - submap_entry->vme_end;
12876
12877 old_start += start_delta;
12878 old_end -= end_delta;
12879
12880 if (submap_entry->is_sub_map) {
12881 entry = submap_entry;
12882 vaddr = local_vaddr;
12883 goto submap_recurse;
12884 }
12885
12886 if (((fault_type & VM_PROT_WRITE) ||
12887 force_copy)
12888 && cow_sub_map_parent) {
12889 vm_object_t sub_object, copy_object;
12890 vm_object_offset_t copy_offset;
12891 vm_map_offset_t local_start;
12892 vm_map_offset_t local_end;
12893 boolean_t copied_slowly = FALSE;
12894
12895 if (vm_map_lock_read_to_write(map)) {
12896 vm_map_lock_read(map);
12897 old_start -= start_delta;
12898 old_end += end_delta;
12899 goto RetrySubMap;
12900 }
12901
12902
12903 sub_object = VME_OBJECT(submap_entry);
12904 if (sub_object == VM_OBJECT_NULL) {
12905 sub_object =
12906 vm_object_allocate(
12907 (vm_map_size_t)
12908 (submap_entry->vme_end -
12909 submap_entry->vme_start));
12910 VME_OBJECT_SET(submap_entry, sub_object);
12911 VME_OFFSET_SET(submap_entry, 0);
12912 assert(!submap_entry->is_sub_map);
12913 assert(submap_entry->use_pmap);
12914 }
12915 local_start = local_vaddr -
12916 (cow_parent_vaddr - old_start);
12917 local_end = local_vaddr +
12918 (old_end - cow_parent_vaddr);
12919 vm_map_clip_start(map, submap_entry, local_start);
12920 vm_map_clip_end(map, submap_entry, local_end);
12921 if (submap_entry->is_sub_map) {
12922 /* unnesting was done when clipping */
12923 assert(!submap_entry->use_pmap);
12924 }
12925
12926 /* This is the COW case, lets connect */
12927 /* an entry in our space to the underlying */
12928 /* object in the submap, bypassing the */
12929 /* submap. */
12930
12931
12932 if (submap_entry->wired_count != 0 ||
12933 (sub_object->copy_strategy ==
12934 MEMORY_OBJECT_COPY_NONE)) {
12935 vm_object_lock(sub_object);
12936 vm_object_copy_slowly(sub_object,
12937 VME_OFFSET(submap_entry),
12938 (submap_entry->vme_end -
12939 submap_entry->vme_start),
12940 FALSE,
12941 &copy_object);
12942 copied_slowly = TRUE;
12943 } else {
12944 /* set up shadow object */
12945 copy_object = sub_object;
12946 vm_object_lock(sub_object);
12947 vm_object_reference_locked(sub_object);
12948 sub_object->shadowed = TRUE;
12949 vm_object_unlock(sub_object);
12950
12951 assert(submap_entry->wired_count == 0);
12952 submap_entry->needs_copy = TRUE;
12953
12954 prot = submap_entry->protection;
12955 assert(!pmap_has_prot_policy(prot));
12956 prot = prot & ~VM_PROT_WRITE;
12957 assert(!pmap_has_prot_policy(prot));
12958
12959 if (override_nx(old_map,
12960 VME_ALIAS(submap_entry))
12961 && prot) {
12962 prot |= VM_PROT_EXECUTE;
12963 }
12964
12965 vm_object_pmap_protect(
12966 sub_object,
12967 VME_OFFSET(submap_entry),
12968 submap_entry->vme_end -
12969 submap_entry->vme_start,
12970 (submap_entry->is_shared
12971 || map->mapped_in_other_pmaps) ?
12972 PMAP_NULL : map->pmap,
12973 submap_entry->vme_start,
12974 prot);
12975 }
12976
12977 /*
12978 * Adjust the fault offset to the submap entry.
12979 */
12980 copy_offset = (local_vaddr -
12981 submap_entry->vme_start +
12982 VME_OFFSET(submap_entry));
12983
12984 /* This works diffently than the */
12985 /* normal submap case. We go back */
12986 /* to the parent of the cow map and*/
12987 /* clip out the target portion of */
12988 /* the sub_map, substituting the */
12989 /* new copy object, */
12990
12991 subentry_protection = submap_entry->protection;
12992 subentry_max_protection = submap_entry->max_protection;
12993 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
12994 vm_map_unlock(map);
12995 submap_entry = NULL; /* not valid after map unlock */
12996
12997 local_start = old_start;
12998 local_end = old_end;
12999 map = cow_sub_map_parent;
13000 *var_map = cow_sub_map_parent;
13001 vaddr = cow_parent_vaddr;
13002 cow_sub_map_parent = NULL;
13003
13004 if (!vm_map_lookup_entry(map,
13005 vaddr, &entry)) {
13006 vm_object_deallocate(
13007 copy_object);
13008 vm_map_lock_write_to_read(map);
13009 return KERN_INVALID_ADDRESS;
13010 }
13011
13012 /* clip out the portion of space */
13013 /* mapped by the sub map which */
13014 /* corresponds to the underlying */
13015 /* object */
13016
13017 /*
13018 * Clip (and unnest) the smallest nested chunk
13019 * possible around the faulting address...
13020 */
13021 local_start = vaddr & ~(pmap_nesting_size_min - 1);
13022 local_end = local_start + pmap_nesting_size_min;
13023 /*
13024 * ... but don't go beyond the "old_start" to "old_end"
13025 * range, to avoid spanning over another VM region
13026 * with a possibly different VM object and/or offset.
13027 */
13028 if (local_start < old_start) {
13029 local_start = old_start;
13030 }
13031 if (local_end > old_end) {
13032 local_end = old_end;
13033 }
13034 /*
13035 * Adjust copy_offset to the start of the range.
13036 */
13037 copy_offset -= (vaddr - local_start);
13038
13039 vm_map_clip_start(map, entry, local_start);
13040 vm_map_clip_end(map, entry, local_end);
13041 if (entry->is_sub_map) {
13042 /* unnesting was done when clipping */
13043 assert(!entry->use_pmap);
13044 }
13045
13046 /* substitute copy object for */
13047 /* shared map entry */
13048 vm_map_deallocate(VME_SUBMAP(entry));
13049 assert(!entry->iokit_acct);
13050 entry->is_sub_map = FALSE;
13051 entry->use_pmap = TRUE;
13052 VME_OBJECT_SET(entry, copy_object);
13053
13054 /* propagate the submap entry's protections */
13055 if (entry->protection != VM_PROT_READ) {
13056 /*
13057 * Someone has already altered the top entry's
13058 * protections via vm_protect(VM_PROT_COPY).
13059 * Respect these new values and ignore the
13060 * submap entry's protections.
13061 */
13062 } else {
13063 /*
13064 * Regular copy-on-write: propagate the submap
13065 * entry's protections to the top map entry.
13066 */
13067 entry->protection |= subentry_protection;
13068 }
13069 entry->max_protection |= subentry_max_protection;
13070 /* propagate no_copy_on_read */
13071 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13072
13073 if ((entry->protection & VM_PROT_WRITE) &&
13074 (entry->protection & VM_PROT_EXECUTE) &&
13075 #if !CONFIG_EMBEDDED
13076 map != kernel_map &&
13077 cs_process_enforcement(NULL) &&
13078 #endif /* !CONFIG_EMBEDDED */
13079 !(entry->used_for_jit)) {
13080 DTRACE_VM3(cs_wx,
13081 uint64_t, (uint64_t)entry->vme_start,
13082 uint64_t, (uint64_t)entry->vme_end,
13083 vm_prot_t, entry->protection);
13084 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13085 proc_selfpid(),
13086 (current_task()->bsd_info
13087 ? proc_name_address(current_task()->bsd_info)
13088 : "?"),
13089 __FUNCTION__);
13090 entry->protection &= ~VM_PROT_EXECUTE;
13091 }
13092
13093 if (copied_slowly) {
13094 VME_OFFSET_SET(entry, local_start - old_start);
13095 entry->needs_copy = FALSE;
13096 entry->is_shared = FALSE;
13097 } else {
13098 VME_OFFSET_SET(entry, copy_offset);
13099 assert(entry->wired_count == 0);
13100 entry->needs_copy = TRUE;
13101 if (entry->inheritance == VM_INHERIT_SHARE) {
13102 entry->inheritance = VM_INHERIT_COPY;
13103 }
13104 if (map != old_map) {
13105 entry->is_shared = TRUE;
13106 }
13107 }
13108 if (entry->inheritance == VM_INHERIT_SHARE) {
13109 entry->inheritance = VM_INHERIT_COPY;
13110 }
13111
13112 vm_map_lock_write_to_read(map);
13113 } else {
13114 if ((cow_sub_map_parent)
13115 && (cow_sub_map_parent != *real_map)
13116 && (cow_sub_map_parent != map)) {
13117 vm_map_unlock(cow_sub_map_parent);
13118 }
13119 entry = submap_entry;
13120 vaddr = local_vaddr;
13121 }
13122 }
13123
13124 /*
13125 * Check whether this task is allowed to have
13126 * this page.
13127 */
13128
13129 prot = entry->protection;
13130
13131 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13132 /*
13133 * HACK -- if not a stack, then allow execution
13134 */
13135 prot |= VM_PROT_EXECUTE;
13136 }
13137
13138 if (mask_protections) {
13139 fault_type &= prot;
13140 if (fault_type == VM_PROT_NONE) {
13141 goto protection_failure;
13142 }
13143 }
13144 if (((fault_type & prot) != fault_type)
13145 #if __arm64__
13146 /* prefetch abort in execute-only page */
13147 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13148 #endif
13149 ) {
13150 protection_failure:
13151 if (*real_map != map) {
13152 vm_map_unlock(*real_map);
13153 }
13154 *real_map = map;
13155
13156 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13157 log_stack_execution_failure((addr64_t)vaddr, prot);
13158 }
13159
13160 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13161 return KERN_PROTECTION_FAILURE;
13162 }
13163
13164 /*
13165 * If this page is not pageable, we have to get
13166 * it for all possible accesses.
13167 */
13168
13169 *wired = (entry->wired_count != 0);
13170 if (*wired) {
13171 fault_type = prot;
13172 }
13173
13174 /*
13175 * If the entry was copy-on-write, we either ...
13176 */
13177
13178 if (entry->needs_copy) {
13179 /*
13180 * If we want to write the page, we may as well
13181 * handle that now since we've got the map locked.
13182 *
13183 * If we don't need to write the page, we just
13184 * demote the permissions allowed.
13185 */
13186
13187 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13188 /*
13189 * Make a new object, and place it in the
13190 * object chain. Note that no new references
13191 * have appeared -- one just moved from the
13192 * map to the new object.
13193 */
13194
13195 if (vm_map_lock_read_to_write(map)) {
13196 vm_map_lock_read(map);
13197 goto RetryLookup;
13198 }
13199
13200 if (VME_OBJECT(entry)->shadowed == FALSE) {
13201 vm_object_lock(VME_OBJECT(entry));
13202 VME_OBJECT(entry)->shadowed = TRUE;
13203 vm_object_unlock(VME_OBJECT(entry));
13204 }
13205 VME_OBJECT_SHADOW(entry,
13206 (vm_map_size_t) (entry->vme_end -
13207 entry->vme_start));
13208 entry->needs_copy = FALSE;
13209
13210 vm_map_lock_write_to_read(map);
13211 }
13212 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13213 /*
13214 * We're attempting to read a copy-on-write
13215 * page -- don't allow writes.
13216 */
13217
13218 prot &= (~VM_PROT_WRITE);
13219 }
13220 }
13221
13222 /*
13223 * Create an object if necessary.
13224 */
13225 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13226 if (vm_map_lock_read_to_write(map)) {
13227 vm_map_lock_read(map);
13228 goto RetryLookup;
13229 }
13230
13231 VME_OBJECT_SET(entry,
13232 vm_object_allocate(
13233 (vm_map_size_t)(entry->vme_end -
13234 entry->vme_start)));
13235 VME_OFFSET_SET(entry, 0);
13236 assert(entry->use_pmap);
13237 vm_map_lock_write_to_read(map);
13238 }
13239
13240 /*
13241 * Return the object/offset from this entry. If the entry
13242 * was copy-on-write or empty, it has been fixed up. Also
13243 * return the protection.
13244 */
13245
13246 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13247 *object = VME_OBJECT(entry);
13248 *out_prot = prot;
13249
13250 if (fault_info) {
13251 fault_info->interruptible = THREAD_UNINT; /* for now... */
13252 /* ... the caller will change "interruptible" if needed */
13253 fault_info->cluster_size = 0;
13254 fault_info->user_tag = VME_ALIAS(entry);
13255 fault_info->pmap_options = 0;
13256 if (entry->iokit_acct ||
13257 (!entry->is_sub_map && !entry->use_pmap)) {
13258 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13259 }
13260 fault_info->behavior = entry->behavior;
13261 fault_info->lo_offset = VME_OFFSET(entry);
13262 fault_info->hi_offset =
13263 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13264 fault_info->no_cache = entry->no_cache;
13265 fault_info->stealth = FALSE;
13266 fault_info->io_sync = FALSE;
13267 if (entry->used_for_jit ||
13268 entry->vme_resilient_codesign) {
13269 fault_info->cs_bypass = TRUE;
13270 } else {
13271 fault_info->cs_bypass = FALSE;
13272 }
13273 fault_info->pmap_cs_associated = FALSE;
13274 #if CONFIG_PMAP_CS
13275 if (entry->pmap_cs_associated) {
13276 /*
13277 * The pmap layer will validate this page
13278 * before allowing it to be executed from.
13279 */
13280 fault_info->pmap_cs_associated = TRUE;
13281 }
13282 #endif /* CONFIG_PMAP_CS */
13283 fault_info->mark_zf_absent = FALSE;
13284 fault_info->batch_pmap_op = FALSE;
13285 fault_info->resilient_media = entry->vme_resilient_media;
13286 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13287 }
13288
13289 /*
13290 * Lock the object to prevent it from disappearing
13291 */
13292 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13293 vm_object_lock(*object);
13294 } else {
13295 vm_object_lock_shared(*object);
13296 }
13297
13298 /*
13299 * Save the version number
13300 */
13301
13302 out_version->main_timestamp = map->timestamp;
13303
13304 return KERN_SUCCESS;
13305 }
13306
13307
13308 /*
13309 * vm_map_verify:
13310 *
13311 * Verifies that the map in question has not changed
13312 * since the given version. The map has to be locked
13313 * ("shared" mode is fine) before calling this function
13314 * and it will be returned locked too.
13315 */
13316 boolean_t
13317 vm_map_verify(
13318 vm_map_t map,
13319 vm_map_version_t *version) /* REF */
13320 {
13321 boolean_t result;
13322
13323 vm_map_lock_assert_held(map);
13324 result = (map->timestamp == version->main_timestamp);
13325
13326 return result;
13327 }
13328
13329 /*
13330 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13331 * Goes away after regular vm_region_recurse function migrates to
13332 * 64 bits
13333 * vm_region_recurse: A form of vm_region which follows the
13334 * submaps in a target map
13335 *
13336 */
13337
13338 kern_return_t
13339 vm_map_region_recurse_64(
13340 vm_map_t map,
13341 vm_map_offset_t *address, /* IN/OUT */
13342 vm_map_size_t *size, /* OUT */
13343 natural_t *nesting_depth, /* IN/OUT */
13344 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13345 mach_msg_type_number_t *count) /* IN/OUT */
13346 {
13347 mach_msg_type_number_t original_count;
13348 vm_region_extended_info_data_t extended;
13349 vm_map_entry_t tmp_entry;
13350 vm_map_offset_t user_address;
13351 unsigned int user_max_depth;
13352
13353 /*
13354 * "curr_entry" is the VM map entry preceding or including the
13355 * address we're looking for.
13356 * "curr_map" is the map or sub-map containing "curr_entry".
13357 * "curr_address" is the equivalent of the top map's "user_address"
13358 * in the current map.
13359 * "curr_offset" is the cumulated offset of "curr_map" in the
13360 * target task's address space.
13361 * "curr_depth" is the depth of "curr_map" in the chain of
13362 * sub-maps.
13363 *
13364 * "curr_max_below" and "curr_max_above" limit the range (around
13365 * "curr_address") we should take into account in the current (sub)map.
13366 * They limit the range to what's visible through the map entries
13367 * we've traversed from the top map to the current map.
13368 *
13369 */
13370 vm_map_entry_t curr_entry;
13371 vm_map_address_t curr_address;
13372 vm_map_offset_t curr_offset;
13373 vm_map_t curr_map;
13374 unsigned int curr_depth;
13375 vm_map_offset_t curr_max_below, curr_max_above;
13376 vm_map_offset_t curr_skip;
13377
13378 /*
13379 * "next_" is the same as "curr_" but for the VM region immediately
13380 * after the address we're looking for. We need to keep track of this
13381 * too because we want to return info about that region if the
13382 * address we're looking for is not mapped.
13383 */
13384 vm_map_entry_t next_entry;
13385 vm_map_offset_t next_offset;
13386 vm_map_offset_t next_address;
13387 vm_map_t next_map;
13388 unsigned int next_depth;
13389 vm_map_offset_t next_max_below, next_max_above;
13390 vm_map_offset_t next_skip;
13391
13392 boolean_t look_for_pages;
13393 vm_region_submap_short_info_64_t short_info;
13394 boolean_t do_region_footprint;
13395
13396 if (map == VM_MAP_NULL) {
13397 /* no address space to work on */
13398 return KERN_INVALID_ARGUMENT;
13399 }
13400
13401
13402 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13403 /*
13404 * "info" structure is not big enough and
13405 * would overflow
13406 */
13407 return KERN_INVALID_ARGUMENT;
13408 }
13409
13410 do_region_footprint = task_self_region_footprint();
13411 original_count = *count;
13412
13413 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13414 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13415 look_for_pages = FALSE;
13416 short_info = (vm_region_submap_short_info_64_t) submap_info;
13417 submap_info = NULL;
13418 } else {
13419 look_for_pages = TRUE;
13420 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13421 short_info = NULL;
13422
13423 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13424 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13425 }
13426 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13427 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13428 }
13429 }
13430
13431 user_address = *address;
13432 user_max_depth = *nesting_depth;
13433
13434 if (not_in_kdp) {
13435 vm_map_lock_read(map);
13436 }
13437
13438 recurse_again:
13439 curr_entry = NULL;
13440 curr_map = map;
13441 curr_address = user_address;
13442 curr_offset = 0;
13443 curr_skip = 0;
13444 curr_depth = 0;
13445 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13446 curr_max_below = curr_address;
13447
13448 next_entry = NULL;
13449 next_map = NULL;
13450 next_address = 0;
13451 next_offset = 0;
13452 next_skip = 0;
13453 next_depth = 0;
13454 next_max_above = (vm_map_offset_t) -1;
13455 next_max_below = (vm_map_offset_t) -1;
13456
13457 for (;;) {
13458 if (vm_map_lookup_entry(curr_map,
13459 curr_address,
13460 &tmp_entry)) {
13461 /* tmp_entry contains the address we're looking for */
13462 curr_entry = tmp_entry;
13463 } else {
13464 vm_map_offset_t skip;
13465 /*
13466 * The address is not mapped. "tmp_entry" is the
13467 * map entry preceding the address. We want the next
13468 * one, if it exists.
13469 */
13470 curr_entry = tmp_entry->vme_next;
13471
13472 if (curr_entry == vm_map_to_entry(curr_map) ||
13473 (curr_entry->vme_start >=
13474 curr_address + curr_max_above)) {
13475 /* no next entry at this level: stop looking */
13476 if (not_in_kdp) {
13477 vm_map_unlock_read(curr_map);
13478 }
13479 curr_entry = NULL;
13480 curr_map = NULL;
13481 curr_skip = 0;
13482 curr_offset = 0;
13483 curr_depth = 0;
13484 curr_max_above = 0;
13485 curr_max_below = 0;
13486 break;
13487 }
13488
13489 /* adjust current address and offset */
13490 skip = curr_entry->vme_start - curr_address;
13491 curr_address = curr_entry->vme_start;
13492 curr_skip += skip;
13493 curr_offset += skip;
13494 curr_max_above -= skip;
13495 curr_max_below = 0;
13496 }
13497
13498 /*
13499 * Is the next entry at this level closer to the address (or
13500 * deeper in the submap chain) than the one we had
13501 * so far ?
13502 */
13503 tmp_entry = curr_entry->vme_next;
13504 if (tmp_entry == vm_map_to_entry(curr_map)) {
13505 /* no next entry at this level */
13506 } else if (tmp_entry->vme_start >=
13507 curr_address + curr_max_above) {
13508 /*
13509 * tmp_entry is beyond the scope of what we mapped of
13510 * this submap in the upper level: ignore it.
13511 */
13512 } else if ((next_entry == NULL) ||
13513 (tmp_entry->vme_start + curr_offset <=
13514 next_entry->vme_start + next_offset)) {
13515 /*
13516 * We didn't have a "next_entry" or this one is
13517 * closer to the address we're looking for:
13518 * use this "tmp_entry" as the new "next_entry".
13519 */
13520 if (next_entry != NULL) {
13521 /* unlock the last "next_map" */
13522 if (next_map != curr_map && not_in_kdp) {
13523 vm_map_unlock_read(next_map);
13524 }
13525 }
13526 next_entry = tmp_entry;
13527 next_map = curr_map;
13528 next_depth = curr_depth;
13529 next_address = next_entry->vme_start;
13530 next_skip = curr_skip;
13531 next_skip += (next_address - curr_address);
13532 next_offset = curr_offset;
13533 next_offset += (next_address - curr_address);
13534 next_max_above = MIN(next_max_above, curr_max_above);
13535 next_max_above = MIN(next_max_above,
13536 next_entry->vme_end - next_address);
13537 next_max_below = MIN(next_max_below, curr_max_below);
13538 next_max_below = MIN(next_max_below,
13539 next_address - next_entry->vme_start);
13540 }
13541
13542 /*
13543 * "curr_max_{above,below}" allow us to keep track of the
13544 * portion of the submap that is actually mapped at this level:
13545 * the rest of that submap is irrelevant to us, since it's not
13546 * mapped here.
13547 * The relevant portion of the map starts at
13548 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13549 */
13550 curr_max_above = MIN(curr_max_above,
13551 curr_entry->vme_end - curr_address);
13552 curr_max_below = MIN(curr_max_below,
13553 curr_address - curr_entry->vme_start);
13554
13555 if (!curr_entry->is_sub_map ||
13556 curr_depth >= user_max_depth) {
13557 /*
13558 * We hit a leaf map or we reached the maximum depth
13559 * we could, so stop looking. Keep the current map
13560 * locked.
13561 */
13562 break;
13563 }
13564
13565 /*
13566 * Get down to the next submap level.
13567 */
13568
13569 /*
13570 * Lock the next level and unlock the current level,
13571 * unless we need to keep it locked to access the "next_entry"
13572 * later.
13573 */
13574 if (not_in_kdp) {
13575 vm_map_lock_read(VME_SUBMAP(curr_entry));
13576 }
13577 if (curr_map == next_map) {
13578 /* keep "next_map" locked in case we need it */
13579 } else {
13580 /* release this map */
13581 if (not_in_kdp) {
13582 vm_map_unlock_read(curr_map);
13583 }
13584 }
13585
13586 /*
13587 * Adjust the offset. "curr_entry" maps the submap
13588 * at relative address "curr_entry->vme_start" in the
13589 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13590 * bytes of the submap.
13591 * "curr_offset" always represents the offset of a virtual
13592 * address in the curr_map relative to the absolute address
13593 * space (i.e. the top-level VM map).
13594 */
13595 curr_offset +=
13596 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13597 curr_address = user_address + curr_offset;
13598 /* switch to the submap */
13599 curr_map = VME_SUBMAP(curr_entry);
13600 curr_depth++;
13601 curr_entry = NULL;
13602 }
13603
13604 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13605 // so probably should be a real 32b ID vs. ptr.
13606 // Current users just check for equality
13607
13608 if (curr_entry == NULL) {
13609 /* no VM region contains the address... */
13610
13611 if (do_region_footprint && /* we want footprint numbers */
13612 next_entry == NULL && /* & there are no more regions */
13613 /* & we haven't already provided our fake region: */
13614 user_address <= vm_map_last_entry(map)->vme_end) {
13615 ledger_amount_t ledger_resident, ledger_compressed;
13616
13617 /*
13618 * Add a fake memory region to account for
13619 * purgeable and/or ledger-tagged memory that
13620 * counts towards this task's memory footprint,
13621 * i.e. the resident/compressed pages of non-volatile
13622 * objects owned by that task.
13623 */
13624 task_ledgers_footprint(map->pmap->ledger,
13625 &ledger_resident,
13626 &ledger_compressed);
13627 if (ledger_resident + ledger_compressed == 0) {
13628 /* no purgeable memory usage to report */
13629 return KERN_INVALID_ADDRESS;
13630 }
13631 /* fake region to show nonvolatile footprint */
13632 if (look_for_pages) {
13633 submap_info->protection = VM_PROT_DEFAULT;
13634 submap_info->max_protection = VM_PROT_DEFAULT;
13635 submap_info->inheritance = VM_INHERIT_DEFAULT;
13636 submap_info->offset = 0;
13637 submap_info->user_tag = -1;
13638 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13639 submap_info->pages_shared_now_private = 0;
13640 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13641 submap_info->pages_dirtied = submap_info->pages_resident;
13642 submap_info->ref_count = 1;
13643 submap_info->shadow_depth = 0;
13644 submap_info->external_pager = 0;
13645 submap_info->share_mode = SM_PRIVATE;
13646 submap_info->is_submap = 0;
13647 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13648 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13649 submap_info->user_wired_count = 0;
13650 submap_info->pages_reusable = 0;
13651 } else {
13652 short_info->user_tag = -1;
13653 short_info->offset = 0;
13654 short_info->protection = VM_PROT_DEFAULT;
13655 short_info->inheritance = VM_INHERIT_DEFAULT;
13656 short_info->max_protection = VM_PROT_DEFAULT;
13657 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13658 short_info->user_wired_count = 0;
13659 short_info->is_submap = 0;
13660 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13661 short_info->external_pager = 0;
13662 short_info->shadow_depth = 0;
13663 short_info->share_mode = SM_PRIVATE;
13664 short_info->ref_count = 1;
13665 }
13666 *nesting_depth = 0;
13667 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13668 // *address = user_address;
13669 *address = vm_map_last_entry(map)->vme_end;
13670 return KERN_SUCCESS;
13671 }
13672
13673 if (next_entry == NULL) {
13674 /* ... and no VM region follows it either */
13675 return KERN_INVALID_ADDRESS;
13676 }
13677 /* ... gather info about the next VM region */
13678 curr_entry = next_entry;
13679 curr_map = next_map; /* still locked ... */
13680 curr_address = next_address;
13681 curr_skip = next_skip;
13682 curr_offset = next_offset;
13683 curr_depth = next_depth;
13684 curr_max_above = next_max_above;
13685 curr_max_below = next_max_below;
13686 } else {
13687 /* we won't need "next_entry" after all */
13688 if (next_entry != NULL) {
13689 /* release "next_map" */
13690 if (next_map != curr_map && not_in_kdp) {
13691 vm_map_unlock_read(next_map);
13692 }
13693 }
13694 }
13695 next_entry = NULL;
13696 next_map = NULL;
13697 next_offset = 0;
13698 next_skip = 0;
13699 next_depth = 0;
13700 next_max_below = -1;
13701 next_max_above = -1;
13702
13703 if (curr_entry->is_sub_map &&
13704 curr_depth < user_max_depth) {
13705 /*
13706 * We're not as deep as we could be: we must have
13707 * gone back up after not finding anything mapped
13708 * below the original top-level map entry's.
13709 * Let's move "curr_address" forward and recurse again.
13710 */
13711 user_address = curr_address;
13712 goto recurse_again;
13713 }
13714
13715 *nesting_depth = curr_depth;
13716 *size = curr_max_above + curr_max_below;
13717 *address = user_address + curr_skip - curr_max_below;
13718
13719 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13720 // so probably should be a real 32b ID vs. ptr.
13721 // Current users just check for equality
13722 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13723
13724 if (look_for_pages) {
13725 submap_info->user_tag = VME_ALIAS(curr_entry);
13726 submap_info->offset = VME_OFFSET(curr_entry);
13727 submap_info->protection = curr_entry->protection;
13728 submap_info->inheritance = curr_entry->inheritance;
13729 submap_info->max_protection = curr_entry->max_protection;
13730 submap_info->behavior = curr_entry->behavior;
13731 submap_info->user_wired_count = curr_entry->user_wired_count;
13732 submap_info->is_submap = curr_entry->is_sub_map;
13733 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13734 } else {
13735 short_info->user_tag = VME_ALIAS(curr_entry);
13736 short_info->offset = VME_OFFSET(curr_entry);
13737 short_info->protection = curr_entry->protection;
13738 short_info->inheritance = curr_entry->inheritance;
13739 short_info->max_protection = curr_entry->max_protection;
13740 short_info->behavior = curr_entry->behavior;
13741 short_info->user_wired_count = curr_entry->user_wired_count;
13742 short_info->is_submap = curr_entry->is_sub_map;
13743 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13744 }
13745
13746 extended.pages_resident = 0;
13747 extended.pages_swapped_out = 0;
13748 extended.pages_shared_now_private = 0;
13749 extended.pages_dirtied = 0;
13750 extended.pages_reusable = 0;
13751 extended.external_pager = 0;
13752 extended.shadow_depth = 0;
13753 extended.share_mode = SM_EMPTY;
13754 extended.ref_count = 0;
13755
13756 if (not_in_kdp) {
13757 if (!curr_entry->is_sub_map) {
13758 vm_map_offset_t range_start, range_end;
13759 range_start = MAX((curr_address - curr_max_below),
13760 curr_entry->vme_start);
13761 range_end = MIN((curr_address + curr_max_above),
13762 curr_entry->vme_end);
13763 vm_map_region_walk(curr_map,
13764 range_start,
13765 curr_entry,
13766 (VME_OFFSET(curr_entry) +
13767 (range_start -
13768 curr_entry->vme_start)),
13769 range_end - range_start,
13770 &extended,
13771 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13772 if (extended.external_pager &&
13773 extended.ref_count == 2 &&
13774 extended.share_mode == SM_SHARED) {
13775 extended.share_mode = SM_PRIVATE;
13776 }
13777 } else {
13778 if (curr_entry->use_pmap) {
13779 extended.share_mode = SM_TRUESHARED;
13780 } else {
13781 extended.share_mode = SM_PRIVATE;
13782 }
13783 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13784 }
13785 }
13786
13787 if (look_for_pages) {
13788 submap_info->pages_resident = extended.pages_resident;
13789 submap_info->pages_swapped_out = extended.pages_swapped_out;
13790 submap_info->pages_shared_now_private =
13791 extended.pages_shared_now_private;
13792 submap_info->pages_dirtied = extended.pages_dirtied;
13793 submap_info->external_pager = extended.external_pager;
13794 submap_info->shadow_depth = extended.shadow_depth;
13795 submap_info->share_mode = extended.share_mode;
13796 submap_info->ref_count = extended.ref_count;
13797
13798 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13799 submap_info->pages_reusable = extended.pages_reusable;
13800 }
13801 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13802 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13803 }
13804 } else {
13805 short_info->external_pager = extended.external_pager;
13806 short_info->shadow_depth = extended.shadow_depth;
13807 short_info->share_mode = extended.share_mode;
13808 short_info->ref_count = extended.ref_count;
13809 }
13810
13811 if (not_in_kdp) {
13812 vm_map_unlock_read(curr_map);
13813 }
13814
13815 return KERN_SUCCESS;
13816 }
13817
13818 /*
13819 * vm_region:
13820 *
13821 * User call to obtain information about a region in
13822 * a task's address map. Currently, only one flavor is
13823 * supported.
13824 *
13825 * XXX The reserved and behavior fields cannot be filled
13826 * in until the vm merge from the IK is completed, and
13827 * vm_reserve is implemented.
13828 */
13829
13830 kern_return_t
13831 vm_map_region(
13832 vm_map_t map,
13833 vm_map_offset_t *address, /* IN/OUT */
13834 vm_map_size_t *size, /* OUT */
13835 vm_region_flavor_t flavor, /* IN */
13836 vm_region_info_t info, /* OUT */
13837 mach_msg_type_number_t *count, /* IN/OUT */
13838 mach_port_t *object_name) /* OUT */
13839 {
13840 vm_map_entry_t tmp_entry;
13841 vm_map_entry_t entry;
13842 vm_map_offset_t start;
13843
13844 if (map == VM_MAP_NULL) {
13845 return KERN_INVALID_ARGUMENT;
13846 }
13847
13848 switch (flavor) {
13849 case VM_REGION_BASIC_INFO:
13850 /* legacy for old 32-bit objects info */
13851 {
13852 vm_region_basic_info_t basic;
13853
13854 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13855 return KERN_INVALID_ARGUMENT;
13856 }
13857
13858 basic = (vm_region_basic_info_t) info;
13859 *count = VM_REGION_BASIC_INFO_COUNT;
13860
13861 vm_map_lock_read(map);
13862
13863 start = *address;
13864 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13865 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13866 vm_map_unlock_read(map);
13867 return KERN_INVALID_ADDRESS;
13868 }
13869 } else {
13870 entry = tmp_entry;
13871 }
13872
13873 start = entry->vme_start;
13874
13875 basic->offset = (uint32_t)VME_OFFSET(entry);
13876 basic->protection = entry->protection;
13877 basic->inheritance = entry->inheritance;
13878 basic->max_protection = entry->max_protection;
13879 basic->behavior = entry->behavior;
13880 basic->user_wired_count = entry->user_wired_count;
13881 basic->reserved = entry->is_sub_map;
13882 *address = start;
13883 *size = (entry->vme_end - start);
13884
13885 if (object_name) {
13886 *object_name = IP_NULL;
13887 }
13888 if (entry->is_sub_map) {
13889 basic->shared = FALSE;
13890 } else {
13891 basic->shared = entry->is_shared;
13892 }
13893
13894 vm_map_unlock_read(map);
13895 return KERN_SUCCESS;
13896 }
13897
13898 case VM_REGION_BASIC_INFO_64:
13899 {
13900 vm_region_basic_info_64_t basic;
13901
13902 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13903 return KERN_INVALID_ARGUMENT;
13904 }
13905
13906 basic = (vm_region_basic_info_64_t) info;
13907 *count = VM_REGION_BASIC_INFO_COUNT_64;
13908
13909 vm_map_lock_read(map);
13910
13911 start = *address;
13912 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13913 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13914 vm_map_unlock_read(map);
13915 return KERN_INVALID_ADDRESS;
13916 }
13917 } else {
13918 entry = tmp_entry;
13919 }
13920
13921 start = entry->vme_start;
13922
13923 basic->offset = VME_OFFSET(entry);
13924 basic->protection = entry->protection;
13925 basic->inheritance = entry->inheritance;
13926 basic->max_protection = entry->max_protection;
13927 basic->behavior = entry->behavior;
13928 basic->user_wired_count = entry->user_wired_count;
13929 basic->reserved = entry->is_sub_map;
13930 *address = start;
13931 *size = (entry->vme_end - start);
13932
13933 if (object_name) {
13934 *object_name = IP_NULL;
13935 }
13936 if (entry->is_sub_map) {
13937 basic->shared = FALSE;
13938 } else {
13939 basic->shared = entry->is_shared;
13940 }
13941
13942 vm_map_unlock_read(map);
13943 return KERN_SUCCESS;
13944 }
13945 case VM_REGION_EXTENDED_INFO:
13946 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13947 return KERN_INVALID_ARGUMENT;
13948 }
13949 /*fallthru*/
13950 case VM_REGION_EXTENDED_INFO__legacy:
13951 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13952 return KERN_INVALID_ARGUMENT;
13953 }
13954
13955 {
13956 vm_region_extended_info_t extended;
13957 mach_msg_type_number_t original_count;
13958
13959 extended = (vm_region_extended_info_t) info;
13960
13961 vm_map_lock_read(map);
13962
13963 start = *address;
13964 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13965 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13966 vm_map_unlock_read(map);
13967 return KERN_INVALID_ADDRESS;
13968 }
13969 } else {
13970 entry = tmp_entry;
13971 }
13972 start = entry->vme_start;
13973
13974 extended->protection = entry->protection;
13975 extended->user_tag = VME_ALIAS(entry);
13976 extended->pages_resident = 0;
13977 extended->pages_swapped_out = 0;
13978 extended->pages_shared_now_private = 0;
13979 extended->pages_dirtied = 0;
13980 extended->external_pager = 0;
13981 extended->shadow_depth = 0;
13982
13983 original_count = *count;
13984 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13985 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13986 } else {
13987 extended->pages_reusable = 0;
13988 *count = VM_REGION_EXTENDED_INFO_COUNT;
13989 }
13990
13991 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13992
13993 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
13994 extended->share_mode = SM_PRIVATE;
13995 }
13996
13997 if (object_name) {
13998 *object_name = IP_NULL;
13999 }
14000 *address = start;
14001 *size = (entry->vme_end - start);
14002
14003 vm_map_unlock_read(map);
14004 return KERN_SUCCESS;
14005 }
14006 case VM_REGION_TOP_INFO:
14007 {
14008 vm_region_top_info_t top;
14009
14010 if (*count < VM_REGION_TOP_INFO_COUNT) {
14011 return KERN_INVALID_ARGUMENT;
14012 }
14013
14014 top = (vm_region_top_info_t) info;
14015 *count = VM_REGION_TOP_INFO_COUNT;
14016
14017 vm_map_lock_read(map);
14018
14019 start = *address;
14020 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14021 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14022 vm_map_unlock_read(map);
14023 return KERN_INVALID_ADDRESS;
14024 }
14025 } else {
14026 entry = tmp_entry;
14027 }
14028 start = entry->vme_start;
14029
14030 top->private_pages_resident = 0;
14031 top->shared_pages_resident = 0;
14032
14033 vm_map_region_top_walk(entry, top);
14034
14035 if (object_name) {
14036 *object_name = IP_NULL;
14037 }
14038 *address = start;
14039 *size = (entry->vme_end - start);
14040
14041 vm_map_unlock_read(map);
14042 return KERN_SUCCESS;
14043 }
14044 default:
14045 return KERN_INVALID_ARGUMENT;
14046 }
14047 }
14048
14049 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14050 MIN((entry_size), \
14051 ((obj)->all_reusable ? \
14052 (obj)->wired_page_count : \
14053 (obj)->resident_page_count - (obj)->reusable_page_count))
14054
14055 void
14056 vm_map_region_top_walk(
14057 vm_map_entry_t entry,
14058 vm_region_top_info_t top)
14059 {
14060 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14061 top->share_mode = SM_EMPTY;
14062 top->ref_count = 0;
14063 top->obj_id = 0;
14064 return;
14065 }
14066
14067 {
14068 struct vm_object *obj, *tmp_obj;
14069 int ref_count;
14070 uint32_t entry_size;
14071
14072 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14073
14074 obj = VME_OBJECT(entry);
14075
14076 vm_object_lock(obj);
14077
14078 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14079 ref_count--;
14080 }
14081
14082 assert(obj->reusable_page_count <= obj->resident_page_count);
14083 if (obj->shadow) {
14084 if (ref_count == 1) {
14085 top->private_pages_resident =
14086 OBJ_RESIDENT_COUNT(obj, entry_size);
14087 } else {
14088 top->shared_pages_resident =
14089 OBJ_RESIDENT_COUNT(obj, entry_size);
14090 }
14091 top->ref_count = ref_count;
14092 top->share_mode = SM_COW;
14093
14094 while ((tmp_obj = obj->shadow)) {
14095 vm_object_lock(tmp_obj);
14096 vm_object_unlock(obj);
14097 obj = tmp_obj;
14098
14099 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14100 ref_count--;
14101 }
14102
14103 assert(obj->reusable_page_count <= obj->resident_page_count);
14104 top->shared_pages_resident +=
14105 OBJ_RESIDENT_COUNT(obj, entry_size);
14106 top->ref_count += ref_count - 1;
14107 }
14108 } else {
14109 if (entry->superpage_size) {
14110 top->share_mode = SM_LARGE_PAGE;
14111 top->shared_pages_resident = 0;
14112 top->private_pages_resident = entry_size;
14113 } else if (entry->needs_copy) {
14114 top->share_mode = SM_COW;
14115 top->shared_pages_resident =
14116 OBJ_RESIDENT_COUNT(obj, entry_size);
14117 } else {
14118 if (ref_count == 1 ||
14119 (ref_count == 2 && obj->named)) {
14120 top->share_mode = SM_PRIVATE;
14121 top->private_pages_resident =
14122 OBJ_RESIDENT_COUNT(obj,
14123 entry_size);
14124 } else {
14125 top->share_mode = SM_SHARED;
14126 top->shared_pages_resident =
14127 OBJ_RESIDENT_COUNT(obj,
14128 entry_size);
14129 }
14130 }
14131 top->ref_count = ref_count;
14132 }
14133 /* XXX K64: obj_id will be truncated */
14134 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14135
14136 vm_object_unlock(obj);
14137 }
14138 }
14139
14140 void
14141 vm_map_region_walk(
14142 vm_map_t map,
14143 vm_map_offset_t va,
14144 vm_map_entry_t entry,
14145 vm_object_offset_t offset,
14146 vm_object_size_t range,
14147 vm_region_extended_info_t extended,
14148 boolean_t look_for_pages,
14149 mach_msg_type_number_t count)
14150 {
14151 struct vm_object *obj, *tmp_obj;
14152 vm_map_offset_t last_offset;
14153 int i;
14154 int ref_count;
14155 struct vm_object *shadow_object;
14156 int shadow_depth;
14157 boolean_t do_region_footprint;
14158
14159 do_region_footprint = task_self_region_footprint();
14160
14161 if ((VME_OBJECT(entry) == 0) ||
14162 (entry->is_sub_map) ||
14163 (VME_OBJECT(entry)->phys_contiguous &&
14164 !entry->superpage_size)) {
14165 extended->share_mode = SM_EMPTY;
14166 extended->ref_count = 0;
14167 return;
14168 }
14169
14170 if (entry->superpage_size) {
14171 extended->shadow_depth = 0;
14172 extended->share_mode = SM_LARGE_PAGE;
14173 extended->ref_count = 1;
14174 extended->external_pager = 0;
14175 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14176 extended->shadow_depth = 0;
14177 return;
14178 }
14179
14180 obj = VME_OBJECT(entry);
14181
14182 vm_object_lock(obj);
14183
14184 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14185 ref_count--;
14186 }
14187
14188 if (look_for_pages) {
14189 for (last_offset = offset + range;
14190 offset < last_offset;
14191 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14192 if (do_region_footprint) {
14193 int disp;
14194
14195 disp = 0;
14196 if (map->has_corpse_footprint) {
14197 /*
14198 * Query the page info data we saved
14199 * while forking the corpse.
14200 */
14201 vm_map_corpse_footprint_query_page_info(
14202 map,
14203 va,
14204 &disp);
14205 } else {
14206 /*
14207 * Query the pmap.
14208 */
14209 pmap_query_page_info(map->pmap,
14210 va,
14211 &disp);
14212 }
14213 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14214 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14215 extended->pages_resident++;
14216 }
14217 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14218 extended->pages_reusable++;
14219 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14220 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14221 /* alternate accounting */
14222 } else {
14223 extended->pages_dirtied++;
14224 }
14225 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14226 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14227 /* alternate accounting */
14228 } else {
14229 extended->pages_swapped_out++;
14230 }
14231 }
14232 /* deal with alternate accounting */
14233 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14234 /* && not tagged as no-footprint? */
14235 VM_OBJECT_OWNER(obj) != NULL &&
14236 VM_OBJECT_OWNER(obj)->map == map) {
14237 if ((((va
14238 - entry->vme_start
14239 + VME_OFFSET(entry))
14240 / PAGE_SIZE) <
14241 (obj->resident_page_count +
14242 vm_compressor_pager_get_count(obj->pager)))) {
14243 /*
14244 * Non-volatile purgeable object owned
14245 * by this task: report the first
14246 * "#resident + #compressed" pages as
14247 * "resident" (to show that they
14248 * contribute to the footprint) but not
14249 * "dirty" (to avoid double-counting
14250 * with the fake "non-volatile" region
14251 * we'll report at the end of the
14252 * address space to account for all
14253 * (mapped or not) non-volatile memory
14254 * owned by this task.
14255 */
14256 extended->pages_resident++;
14257 }
14258 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14259 obj->purgable == VM_PURGABLE_EMPTY) &&
14260 /* && not tagged as no-footprint? */
14261 VM_OBJECT_OWNER(obj) != NULL &&
14262 VM_OBJECT_OWNER(obj)->map == map) {
14263 if ((((va
14264 - entry->vme_start
14265 + VME_OFFSET(entry))
14266 / PAGE_SIZE) <
14267 obj->wired_page_count)) {
14268 /*
14269 * Volatile|empty purgeable object owned
14270 * by this task: report the first
14271 * "#wired" pages as "resident" (to
14272 * show that they contribute to the
14273 * footprint) but not "dirty" (to avoid
14274 * double-counting with the fake
14275 * "non-volatile" region we'll report
14276 * at the end of the address space to
14277 * account for all (mapped or not)
14278 * non-volatile memory owned by this
14279 * task.
14280 */
14281 extended->pages_resident++;
14282 }
14283 } else if (obj->purgable != VM_PURGABLE_DENY) {
14284 /*
14285 * Pages from purgeable objects
14286 * will be reported as dirty
14287 * appropriately in an extra
14288 * fake memory region at the end of
14289 * the address space.
14290 */
14291 } else if (entry->iokit_acct) {
14292 /*
14293 * IOKit mappings are considered
14294 * as fully dirty for footprint's
14295 * sake.
14296 */
14297 extended->pages_dirtied++;
14298 }
14299 continue;
14300 }
14301
14302 vm_map_region_look_for_page(map, va, obj,
14303 offset, ref_count,
14304 0, extended, count);
14305 }
14306
14307 if (do_region_footprint) {
14308 goto collect_object_info;
14309 }
14310 } else {
14311 collect_object_info:
14312 shadow_object = obj->shadow;
14313 shadow_depth = 0;
14314
14315 if (!(obj->internal)) {
14316 extended->external_pager = 1;
14317 }
14318
14319 if (shadow_object != VM_OBJECT_NULL) {
14320 vm_object_lock(shadow_object);
14321 for (;
14322 shadow_object != VM_OBJECT_NULL;
14323 shadow_depth++) {
14324 vm_object_t next_shadow;
14325
14326 if (!(shadow_object->internal)) {
14327 extended->external_pager = 1;
14328 }
14329
14330 next_shadow = shadow_object->shadow;
14331 if (next_shadow) {
14332 vm_object_lock(next_shadow);
14333 }
14334 vm_object_unlock(shadow_object);
14335 shadow_object = next_shadow;
14336 }
14337 }
14338 extended->shadow_depth = shadow_depth;
14339 }
14340
14341 if (extended->shadow_depth || entry->needs_copy) {
14342 extended->share_mode = SM_COW;
14343 } else {
14344 if (ref_count == 1) {
14345 extended->share_mode = SM_PRIVATE;
14346 } else {
14347 if (obj->true_share) {
14348 extended->share_mode = SM_TRUESHARED;
14349 } else {
14350 extended->share_mode = SM_SHARED;
14351 }
14352 }
14353 }
14354 extended->ref_count = ref_count - extended->shadow_depth;
14355
14356 for (i = 0; i < extended->shadow_depth; i++) {
14357 if ((tmp_obj = obj->shadow) == 0) {
14358 break;
14359 }
14360 vm_object_lock(tmp_obj);
14361 vm_object_unlock(obj);
14362
14363 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14364 ref_count--;
14365 }
14366
14367 extended->ref_count += ref_count;
14368 obj = tmp_obj;
14369 }
14370 vm_object_unlock(obj);
14371
14372 if (extended->share_mode == SM_SHARED) {
14373 vm_map_entry_t cur;
14374 vm_map_entry_t last;
14375 int my_refs;
14376
14377 obj = VME_OBJECT(entry);
14378 last = vm_map_to_entry(map);
14379 my_refs = 0;
14380
14381 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14382 ref_count--;
14383 }
14384 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14385 my_refs += vm_map_region_count_obj_refs(cur, obj);
14386 }
14387
14388 if (my_refs == ref_count) {
14389 extended->share_mode = SM_PRIVATE_ALIASED;
14390 } else if (my_refs > 1) {
14391 extended->share_mode = SM_SHARED_ALIASED;
14392 }
14393 }
14394 }
14395
14396
14397 /* object is locked on entry and locked on return */
14398
14399
14400 static void
14401 vm_map_region_look_for_page(
14402 __unused vm_map_t map,
14403 __unused vm_map_offset_t va,
14404 vm_object_t object,
14405 vm_object_offset_t offset,
14406 int max_refcnt,
14407 int depth,
14408 vm_region_extended_info_t extended,
14409 mach_msg_type_number_t count)
14410 {
14411 vm_page_t p;
14412 vm_object_t shadow;
14413 int ref_count;
14414 vm_object_t caller_object;
14415
14416 shadow = object->shadow;
14417 caller_object = object;
14418
14419
14420 while (TRUE) {
14421 if (!(object->internal)) {
14422 extended->external_pager = 1;
14423 }
14424
14425 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14426 if (shadow && (max_refcnt == 1)) {
14427 extended->pages_shared_now_private++;
14428 }
14429
14430 if (!p->vmp_fictitious &&
14431 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14432 extended->pages_dirtied++;
14433 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14434 if (p->vmp_reusable || object->all_reusable) {
14435 extended->pages_reusable++;
14436 }
14437 }
14438
14439 extended->pages_resident++;
14440
14441 if (object != caller_object) {
14442 vm_object_unlock(object);
14443 }
14444
14445 return;
14446 }
14447 if (object->internal &&
14448 object->alive &&
14449 !object->terminating &&
14450 object->pager_ready) {
14451 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14452 == VM_EXTERNAL_STATE_EXISTS) {
14453 /* the pager has that page */
14454 extended->pages_swapped_out++;
14455 if (object != caller_object) {
14456 vm_object_unlock(object);
14457 }
14458 return;
14459 }
14460 }
14461
14462 if (shadow) {
14463 vm_object_lock(shadow);
14464
14465 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14466 ref_count--;
14467 }
14468
14469 if (++depth > extended->shadow_depth) {
14470 extended->shadow_depth = depth;
14471 }
14472
14473 if (ref_count > max_refcnt) {
14474 max_refcnt = ref_count;
14475 }
14476
14477 if (object != caller_object) {
14478 vm_object_unlock(object);
14479 }
14480
14481 offset = offset + object->vo_shadow_offset;
14482 object = shadow;
14483 shadow = object->shadow;
14484 continue;
14485 }
14486 if (object != caller_object) {
14487 vm_object_unlock(object);
14488 }
14489 break;
14490 }
14491 }
14492
14493 static int
14494 vm_map_region_count_obj_refs(
14495 vm_map_entry_t entry,
14496 vm_object_t object)
14497 {
14498 int ref_count;
14499 vm_object_t chk_obj;
14500 vm_object_t tmp_obj;
14501
14502 if (VME_OBJECT(entry) == 0) {
14503 return 0;
14504 }
14505
14506 if (entry->is_sub_map) {
14507 return 0;
14508 } else {
14509 ref_count = 0;
14510
14511 chk_obj = VME_OBJECT(entry);
14512 vm_object_lock(chk_obj);
14513
14514 while (chk_obj) {
14515 if (chk_obj == object) {
14516 ref_count++;
14517 }
14518 tmp_obj = chk_obj->shadow;
14519 if (tmp_obj) {
14520 vm_object_lock(tmp_obj);
14521 }
14522 vm_object_unlock(chk_obj);
14523
14524 chk_obj = tmp_obj;
14525 }
14526 }
14527 return ref_count;
14528 }
14529
14530
14531 /*
14532 * Routine: vm_map_simplify
14533 *
14534 * Description:
14535 * Attempt to simplify the map representation in
14536 * the vicinity of the given starting address.
14537 * Note:
14538 * This routine is intended primarily to keep the
14539 * kernel maps more compact -- they generally don't
14540 * benefit from the "expand a map entry" technology
14541 * at allocation time because the adjacent entry
14542 * is often wired down.
14543 */
14544 void
14545 vm_map_simplify_entry(
14546 vm_map_t map,
14547 vm_map_entry_t this_entry)
14548 {
14549 vm_map_entry_t prev_entry;
14550
14551 counter(c_vm_map_simplify_entry_called++);
14552
14553 prev_entry = this_entry->vme_prev;
14554
14555 if ((this_entry != vm_map_to_entry(map)) &&
14556 (prev_entry != vm_map_to_entry(map)) &&
14557
14558 (prev_entry->vme_end == this_entry->vme_start) &&
14559
14560 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14561 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14562 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14563 prev_entry->vme_start))
14564 == VME_OFFSET(this_entry)) &&
14565
14566 (prev_entry->behavior == this_entry->behavior) &&
14567 (prev_entry->needs_copy == this_entry->needs_copy) &&
14568 (prev_entry->protection == this_entry->protection) &&
14569 (prev_entry->max_protection == this_entry->max_protection) &&
14570 (prev_entry->inheritance == this_entry->inheritance) &&
14571 (prev_entry->use_pmap == this_entry->use_pmap) &&
14572 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14573 (prev_entry->no_cache == this_entry->no_cache) &&
14574 (prev_entry->permanent == this_entry->permanent) &&
14575 (prev_entry->map_aligned == this_entry->map_aligned) &&
14576 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14577 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14578 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14579 /* from_reserved_zone: OK if that field doesn't match */
14580 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14581 (prev_entry->vme_resilient_codesign ==
14582 this_entry->vme_resilient_codesign) &&
14583 (prev_entry->vme_resilient_media ==
14584 this_entry->vme_resilient_media) &&
14585 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14586
14587 (prev_entry->wired_count == this_entry->wired_count) &&
14588 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14589
14590 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14591 (prev_entry->in_transition == FALSE) &&
14592 (this_entry->in_transition == FALSE) &&
14593 (prev_entry->needs_wakeup == FALSE) &&
14594 (this_entry->needs_wakeup == FALSE) &&
14595 (prev_entry->is_shared == FALSE) &&
14596 (this_entry->is_shared == FALSE) &&
14597 (prev_entry->superpage_size == FALSE) &&
14598 (this_entry->superpage_size == FALSE)
14599 ) {
14600 vm_map_store_entry_unlink(map, prev_entry);
14601 assert(prev_entry->vme_start < this_entry->vme_end);
14602 if (prev_entry->map_aligned) {
14603 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14604 VM_MAP_PAGE_MASK(map)));
14605 }
14606 this_entry->vme_start = prev_entry->vme_start;
14607 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14608
14609 if (map->holelistenabled) {
14610 vm_map_store_update_first_free(map, this_entry, TRUE);
14611 }
14612
14613 if (prev_entry->is_sub_map) {
14614 vm_map_deallocate(VME_SUBMAP(prev_entry));
14615 } else {
14616 vm_object_deallocate(VME_OBJECT(prev_entry));
14617 }
14618 vm_map_entry_dispose(map, prev_entry);
14619 SAVE_HINT_MAP_WRITE(map, this_entry);
14620 counter(c_vm_map_simplified++);
14621 }
14622 }
14623
14624 void
14625 vm_map_simplify(
14626 vm_map_t map,
14627 vm_map_offset_t start)
14628 {
14629 vm_map_entry_t this_entry;
14630
14631 vm_map_lock(map);
14632 if (vm_map_lookup_entry(map, start, &this_entry)) {
14633 vm_map_simplify_entry(map, this_entry);
14634 vm_map_simplify_entry(map, this_entry->vme_next);
14635 }
14636 counter(c_vm_map_simplify_called++);
14637 vm_map_unlock(map);
14638 }
14639
14640 static void
14641 vm_map_simplify_range(
14642 vm_map_t map,
14643 vm_map_offset_t start,
14644 vm_map_offset_t end)
14645 {
14646 vm_map_entry_t entry;
14647
14648 /*
14649 * The map should be locked (for "write") by the caller.
14650 */
14651
14652 if (start >= end) {
14653 /* invalid address range */
14654 return;
14655 }
14656
14657 start = vm_map_trunc_page(start,
14658 VM_MAP_PAGE_MASK(map));
14659 end = vm_map_round_page(end,
14660 VM_MAP_PAGE_MASK(map));
14661
14662 if (!vm_map_lookup_entry(map, start, &entry)) {
14663 /* "start" is not mapped and "entry" ends before "start" */
14664 if (entry == vm_map_to_entry(map)) {
14665 /* start with first entry in the map */
14666 entry = vm_map_first_entry(map);
14667 } else {
14668 /* start with next entry */
14669 entry = entry->vme_next;
14670 }
14671 }
14672
14673 while (entry != vm_map_to_entry(map) &&
14674 entry->vme_start <= end) {
14675 /* try and coalesce "entry" with its previous entry */
14676 vm_map_simplify_entry(map, entry);
14677 entry = entry->vme_next;
14678 }
14679 }
14680
14681
14682 /*
14683 * Routine: vm_map_machine_attribute
14684 * Purpose:
14685 * Provide machine-specific attributes to mappings,
14686 * such as cachability etc. for machines that provide
14687 * them. NUMA architectures and machines with big/strange
14688 * caches will use this.
14689 * Note:
14690 * Responsibilities for locking and checking are handled here,
14691 * everything else in the pmap module. If any non-volatile
14692 * information must be kept, the pmap module should handle
14693 * it itself. [This assumes that attributes do not
14694 * need to be inherited, which seems ok to me]
14695 */
14696 kern_return_t
14697 vm_map_machine_attribute(
14698 vm_map_t map,
14699 vm_map_offset_t start,
14700 vm_map_offset_t end,
14701 vm_machine_attribute_t attribute,
14702 vm_machine_attribute_val_t* value) /* IN/OUT */
14703 {
14704 kern_return_t ret;
14705 vm_map_size_t sync_size;
14706 vm_map_entry_t entry;
14707
14708 if (start < vm_map_min(map) || end > vm_map_max(map)) {
14709 return KERN_INVALID_ADDRESS;
14710 }
14711
14712 /* Figure how much memory we need to flush (in page increments) */
14713 sync_size = end - start;
14714
14715 vm_map_lock(map);
14716
14717 if (attribute != MATTR_CACHE) {
14718 /* If we don't have to find physical addresses, we */
14719 /* don't have to do an explicit traversal here. */
14720 ret = pmap_attribute(map->pmap, start, end - start,
14721 attribute, value);
14722 vm_map_unlock(map);
14723 return ret;
14724 }
14725
14726 ret = KERN_SUCCESS; /* Assume it all worked */
14727
14728 while (sync_size) {
14729 if (vm_map_lookup_entry(map, start, &entry)) {
14730 vm_map_size_t sub_size;
14731 if ((entry->vme_end - start) > sync_size) {
14732 sub_size = sync_size;
14733 sync_size = 0;
14734 } else {
14735 sub_size = entry->vme_end - start;
14736 sync_size -= sub_size;
14737 }
14738 if (entry->is_sub_map) {
14739 vm_map_offset_t sub_start;
14740 vm_map_offset_t sub_end;
14741
14742 sub_start = (start - entry->vme_start)
14743 + VME_OFFSET(entry);
14744 sub_end = sub_start + sub_size;
14745 vm_map_machine_attribute(
14746 VME_SUBMAP(entry),
14747 sub_start,
14748 sub_end,
14749 attribute, value);
14750 } else {
14751 if (VME_OBJECT(entry)) {
14752 vm_page_t m;
14753 vm_object_t object;
14754 vm_object_t base_object;
14755 vm_object_t last_object;
14756 vm_object_offset_t offset;
14757 vm_object_offset_t base_offset;
14758 vm_map_size_t range;
14759 range = sub_size;
14760 offset = (start - entry->vme_start)
14761 + VME_OFFSET(entry);
14762 base_offset = offset;
14763 object = VME_OBJECT(entry);
14764 base_object = object;
14765 last_object = NULL;
14766
14767 vm_object_lock(object);
14768
14769 while (range) {
14770 m = vm_page_lookup(
14771 object, offset);
14772
14773 if (m && !m->vmp_fictitious) {
14774 ret =
14775 pmap_attribute_cache_sync(
14776 VM_PAGE_GET_PHYS_PAGE(m),
14777 PAGE_SIZE,
14778 attribute, value);
14779 } else if (object->shadow) {
14780 offset = offset + object->vo_shadow_offset;
14781 last_object = object;
14782 object = object->shadow;
14783 vm_object_lock(last_object->shadow);
14784 vm_object_unlock(last_object);
14785 continue;
14786 }
14787 range -= PAGE_SIZE;
14788
14789 if (base_object != object) {
14790 vm_object_unlock(object);
14791 vm_object_lock(base_object);
14792 object = base_object;
14793 }
14794 /* Bump to the next page */
14795 base_offset += PAGE_SIZE;
14796 offset = base_offset;
14797 }
14798 vm_object_unlock(object);
14799 }
14800 }
14801 start += sub_size;
14802 } else {
14803 vm_map_unlock(map);
14804 return KERN_FAILURE;
14805 }
14806 }
14807
14808 vm_map_unlock(map);
14809
14810 return ret;
14811 }
14812
14813 /*
14814 * vm_map_behavior_set:
14815 *
14816 * Sets the paging reference behavior of the specified address
14817 * range in the target map. Paging reference behavior affects
14818 * how pagein operations resulting from faults on the map will be
14819 * clustered.
14820 */
14821 kern_return_t
14822 vm_map_behavior_set(
14823 vm_map_t map,
14824 vm_map_offset_t start,
14825 vm_map_offset_t end,
14826 vm_behavior_t new_behavior)
14827 {
14828 vm_map_entry_t entry;
14829 vm_map_entry_t temp_entry;
14830
14831 if (start > end ||
14832 start < vm_map_min(map) ||
14833 end > vm_map_max(map)) {
14834 return KERN_NO_SPACE;
14835 }
14836
14837 switch (new_behavior) {
14838 /*
14839 * This first block of behaviors all set a persistent state on the specified
14840 * memory range. All we have to do here is to record the desired behavior
14841 * in the vm_map_entry_t's.
14842 */
14843
14844 case VM_BEHAVIOR_DEFAULT:
14845 case VM_BEHAVIOR_RANDOM:
14846 case VM_BEHAVIOR_SEQUENTIAL:
14847 case VM_BEHAVIOR_RSEQNTL:
14848 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14849 vm_map_lock(map);
14850
14851 /*
14852 * The entire address range must be valid for the map.
14853 * Note that vm_map_range_check() does a
14854 * vm_map_lookup_entry() internally and returns the
14855 * entry containing the start of the address range if
14856 * the entire range is valid.
14857 */
14858 if (vm_map_range_check(map, start, end, &temp_entry)) {
14859 entry = temp_entry;
14860 vm_map_clip_start(map, entry, start);
14861 } else {
14862 vm_map_unlock(map);
14863 return KERN_INVALID_ADDRESS;
14864 }
14865
14866 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14867 vm_map_clip_end(map, entry, end);
14868 if (entry->is_sub_map) {
14869 assert(!entry->use_pmap);
14870 }
14871
14872 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14873 entry->zero_wired_pages = TRUE;
14874 } else {
14875 entry->behavior = new_behavior;
14876 }
14877 entry = entry->vme_next;
14878 }
14879
14880 vm_map_unlock(map);
14881 break;
14882
14883 /*
14884 * The rest of these are different from the above in that they cause
14885 * an immediate action to take place as opposed to setting a behavior that
14886 * affects future actions.
14887 */
14888
14889 case VM_BEHAVIOR_WILLNEED:
14890 return vm_map_willneed(map, start, end);
14891
14892 case VM_BEHAVIOR_DONTNEED:
14893 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14894
14895 case VM_BEHAVIOR_FREE:
14896 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14897
14898 case VM_BEHAVIOR_REUSABLE:
14899 return vm_map_reusable_pages(map, start, end);
14900
14901 case VM_BEHAVIOR_REUSE:
14902 return vm_map_reuse_pages(map, start, end);
14903
14904 case VM_BEHAVIOR_CAN_REUSE:
14905 return vm_map_can_reuse(map, start, end);
14906
14907 #if MACH_ASSERT
14908 case VM_BEHAVIOR_PAGEOUT:
14909 return vm_map_pageout(map, start, end);
14910 #endif /* MACH_ASSERT */
14911
14912 default:
14913 return KERN_INVALID_ARGUMENT;
14914 }
14915
14916 return KERN_SUCCESS;
14917 }
14918
14919
14920 /*
14921 * Internals for madvise(MADV_WILLNEED) system call.
14922 *
14923 * The implementation is to do:-
14924 * a) read-ahead if the mapping corresponds to a mapped regular file
14925 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14926 */
14927
14928
14929 static kern_return_t
14930 vm_map_willneed(
14931 vm_map_t map,
14932 vm_map_offset_t start,
14933 vm_map_offset_t end
14934 )
14935 {
14936 vm_map_entry_t entry;
14937 vm_object_t object;
14938 memory_object_t pager;
14939 struct vm_object_fault_info fault_info = {};
14940 kern_return_t kr;
14941 vm_object_size_t len;
14942 vm_object_offset_t offset;
14943
14944 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14945 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14946 fault_info.stealth = TRUE;
14947
14948 /*
14949 * The MADV_WILLNEED operation doesn't require any changes to the
14950 * vm_map_entry_t's, so the read lock is sufficient.
14951 */
14952
14953 vm_map_lock_read(map);
14954
14955 /*
14956 * The madvise semantics require that the address range be fully
14957 * allocated with no holes. Otherwise, we're required to return
14958 * an error.
14959 */
14960
14961 if (!vm_map_range_check(map, start, end, &entry)) {
14962 vm_map_unlock_read(map);
14963 return KERN_INVALID_ADDRESS;
14964 }
14965
14966 /*
14967 * Examine each vm_map_entry_t in the range.
14968 */
14969 for (; entry != vm_map_to_entry(map) && start < end;) {
14970 /*
14971 * The first time through, the start address could be anywhere
14972 * within the vm_map_entry we found. So adjust the offset to
14973 * correspond. After that, the offset will always be zero to
14974 * correspond to the beginning of the current vm_map_entry.
14975 */
14976 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14977
14978 /*
14979 * Set the length so we don't go beyond the end of the
14980 * map_entry or beyond the end of the range we were given.
14981 * This range could span also multiple map entries all of which
14982 * map different files, so make sure we only do the right amount
14983 * of I/O for each object. Note that it's possible for there
14984 * to be multiple map entries all referring to the same object
14985 * but with different page permissions, but it's not worth
14986 * trying to optimize that case.
14987 */
14988 len = MIN(entry->vme_end - start, end - start);
14989
14990 if ((vm_size_t) len != len) {
14991 /* 32-bit overflow */
14992 len = (vm_size_t) (0 - PAGE_SIZE);
14993 }
14994 fault_info.cluster_size = (vm_size_t) len;
14995 fault_info.lo_offset = offset;
14996 fault_info.hi_offset = offset + len;
14997 fault_info.user_tag = VME_ALIAS(entry);
14998 fault_info.pmap_options = 0;
14999 if (entry->iokit_acct ||
15000 (!entry->is_sub_map && !entry->use_pmap)) {
15001 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15002 }
15003
15004 /*
15005 * If the entry is a submap OR there's no read permission
15006 * to this mapping, then just skip it.
15007 */
15008 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15009 entry = entry->vme_next;
15010 start = entry->vme_start;
15011 continue;
15012 }
15013
15014 object = VME_OBJECT(entry);
15015
15016 if (object == NULL ||
15017 (object && object->internal)) {
15018 /*
15019 * Memory range backed by anonymous memory.
15020 */
15021 vm_size_t region_size = 0, effective_page_size = 0;
15022 vm_map_offset_t addr = 0, effective_page_mask = 0;
15023
15024 region_size = len;
15025 addr = start;
15026
15027 effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15028 effective_page_size = effective_page_mask + 1;
15029
15030 vm_map_unlock_read(map);
15031
15032 while (region_size) {
15033 vm_pre_fault(
15034 vm_map_trunc_page(addr, effective_page_mask),
15035 VM_PROT_READ | VM_PROT_WRITE);
15036
15037 region_size -= effective_page_size;
15038 addr += effective_page_size;
15039 }
15040 } else {
15041 /*
15042 * Find the file object backing this map entry. If there is
15043 * none, then we simply ignore the "will need" advice for this
15044 * entry and go on to the next one.
15045 */
15046 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15047 entry = entry->vme_next;
15048 start = entry->vme_start;
15049 continue;
15050 }
15051
15052 vm_object_paging_begin(object);
15053 pager = object->pager;
15054 vm_object_unlock(object);
15055
15056 /*
15057 * The data_request() could take a long time, so let's
15058 * release the map lock to avoid blocking other threads.
15059 */
15060 vm_map_unlock_read(map);
15061
15062 /*
15063 * Get the data from the object asynchronously.
15064 *
15065 * Note that memory_object_data_request() places limits on the
15066 * amount of I/O it will do. Regardless of the len we
15067 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15068 * silently truncates the len to that size. This isn't
15069 * necessarily bad since madvise shouldn't really be used to
15070 * page in unlimited amounts of data. Other Unix variants
15071 * limit the willneed case as well. If this turns out to be an
15072 * issue for developers, then we can always adjust the policy
15073 * here and still be backwards compatible since this is all
15074 * just "advice".
15075 */
15076 kr = memory_object_data_request(
15077 pager,
15078 offset + object->paging_offset,
15079 0, /* ignored */
15080 VM_PROT_READ,
15081 (memory_object_fault_info_t)&fault_info);
15082
15083 vm_object_lock(object);
15084 vm_object_paging_end(object);
15085 vm_object_unlock(object);
15086
15087 /*
15088 * If we couldn't do the I/O for some reason, just give up on
15089 * the madvise. We still return success to the user since
15090 * madvise isn't supposed to fail when the advice can't be
15091 * taken.
15092 */
15093
15094 if (kr != KERN_SUCCESS) {
15095 return KERN_SUCCESS;
15096 }
15097 }
15098
15099 start += len;
15100 if (start >= end) {
15101 /* done */
15102 return KERN_SUCCESS;
15103 }
15104
15105 /* look up next entry */
15106 vm_map_lock_read(map);
15107 if (!vm_map_lookup_entry(map, start, &entry)) {
15108 /*
15109 * There's a new hole in the address range.
15110 */
15111 vm_map_unlock_read(map);
15112 return KERN_INVALID_ADDRESS;
15113 }
15114 }
15115
15116 vm_map_unlock_read(map);
15117 return KERN_SUCCESS;
15118 }
15119
15120 static boolean_t
15121 vm_map_entry_is_reusable(
15122 vm_map_entry_t entry)
15123 {
15124 /* Only user map entries */
15125
15126 vm_object_t object;
15127
15128 if (entry->is_sub_map) {
15129 return FALSE;
15130 }
15131
15132 switch (VME_ALIAS(entry)) {
15133 case VM_MEMORY_MALLOC:
15134 case VM_MEMORY_MALLOC_SMALL:
15135 case VM_MEMORY_MALLOC_LARGE:
15136 case VM_MEMORY_REALLOC:
15137 case VM_MEMORY_MALLOC_TINY:
15138 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15139 case VM_MEMORY_MALLOC_LARGE_REUSED:
15140 /*
15141 * This is a malloc() memory region: check if it's still
15142 * in its original state and can be re-used for more
15143 * malloc() allocations.
15144 */
15145 break;
15146 default:
15147 /*
15148 * Not a malloc() memory region: let the caller decide if
15149 * it's re-usable.
15150 */
15151 return TRUE;
15152 }
15153
15154 if (/*entry->is_shared ||*/
15155 entry->is_sub_map ||
15156 entry->in_transition ||
15157 entry->protection != VM_PROT_DEFAULT ||
15158 entry->max_protection != VM_PROT_ALL ||
15159 entry->inheritance != VM_INHERIT_DEFAULT ||
15160 entry->no_cache ||
15161 entry->permanent ||
15162 entry->superpage_size != FALSE ||
15163 entry->zero_wired_pages ||
15164 entry->wired_count != 0 ||
15165 entry->user_wired_count != 0) {
15166 return FALSE;
15167 }
15168
15169 object = VME_OBJECT(entry);
15170 if (object == VM_OBJECT_NULL) {
15171 return TRUE;
15172 }
15173 if (
15174 #if 0
15175 /*
15176 * Let's proceed even if the VM object is potentially
15177 * shared.
15178 * We check for this later when processing the actual
15179 * VM pages, so the contents will be safe if shared.
15180 *
15181 * But we can still mark this memory region as "reusable" to
15182 * acknowledge that the caller did let us know that the memory
15183 * could be re-used and should not be penalized for holding
15184 * on to it. This allows its "resident size" to not include
15185 * the reusable range.
15186 */
15187 object->ref_count == 1 &&
15188 #endif
15189 object->wired_page_count == 0 &&
15190 object->copy == VM_OBJECT_NULL &&
15191 object->shadow == VM_OBJECT_NULL &&
15192 object->internal &&
15193 object->purgable == VM_PURGABLE_DENY &&
15194 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15195 !object->true_share &&
15196 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15197 !object->code_signed) {
15198 return TRUE;
15199 }
15200 return FALSE;
15201 }
15202
15203 static kern_return_t
15204 vm_map_reuse_pages(
15205 vm_map_t map,
15206 vm_map_offset_t start,
15207 vm_map_offset_t end)
15208 {
15209 vm_map_entry_t entry;
15210 vm_object_t object;
15211 vm_object_offset_t start_offset, end_offset;
15212
15213 /*
15214 * The MADV_REUSE operation doesn't require any changes to the
15215 * vm_map_entry_t's, so the read lock is sufficient.
15216 */
15217
15218 vm_map_lock_read(map);
15219 assert(map->pmap != kernel_pmap); /* protect alias access */
15220
15221 /*
15222 * The madvise semantics require that the address range be fully
15223 * allocated with no holes. Otherwise, we're required to return
15224 * an error.
15225 */
15226
15227 if (!vm_map_range_check(map, start, end, &entry)) {
15228 vm_map_unlock_read(map);
15229 vm_page_stats_reusable.reuse_pages_failure++;
15230 return KERN_INVALID_ADDRESS;
15231 }
15232
15233 /*
15234 * Examine each vm_map_entry_t in the range.
15235 */
15236 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15237 entry = entry->vme_next) {
15238 /*
15239 * Sanity check on the VM map entry.
15240 */
15241 if (!vm_map_entry_is_reusable(entry)) {
15242 vm_map_unlock_read(map);
15243 vm_page_stats_reusable.reuse_pages_failure++;
15244 return KERN_INVALID_ADDRESS;
15245 }
15246
15247 /*
15248 * The first time through, the start address could be anywhere
15249 * within the vm_map_entry we found. So adjust the offset to
15250 * correspond.
15251 */
15252 if (entry->vme_start < start) {
15253 start_offset = start - entry->vme_start;
15254 } else {
15255 start_offset = 0;
15256 }
15257 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15258 start_offset += VME_OFFSET(entry);
15259 end_offset += VME_OFFSET(entry);
15260
15261 assert(!entry->is_sub_map);
15262 object = VME_OBJECT(entry);
15263 if (object != VM_OBJECT_NULL) {
15264 vm_object_lock(object);
15265 vm_object_reuse_pages(object, start_offset, end_offset,
15266 TRUE);
15267 vm_object_unlock(object);
15268 }
15269
15270 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15271 /*
15272 * XXX
15273 * We do not hold the VM map exclusively here.
15274 * The "alias" field is not that critical, so it's
15275 * safe to update it here, as long as it is the only
15276 * one that can be modified while holding the VM map
15277 * "shared".
15278 */
15279 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15280 }
15281 }
15282
15283 vm_map_unlock_read(map);
15284 vm_page_stats_reusable.reuse_pages_success++;
15285 return KERN_SUCCESS;
15286 }
15287
15288
15289 static kern_return_t
15290 vm_map_reusable_pages(
15291 vm_map_t map,
15292 vm_map_offset_t start,
15293 vm_map_offset_t end)
15294 {
15295 vm_map_entry_t entry;
15296 vm_object_t object;
15297 vm_object_offset_t start_offset, end_offset;
15298 vm_map_offset_t pmap_offset;
15299
15300 /*
15301 * The MADV_REUSABLE operation doesn't require any changes to the
15302 * vm_map_entry_t's, so the read lock is sufficient.
15303 */
15304
15305 vm_map_lock_read(map);
15306 assert(map->pmap != kernel_pmap); /* protect alias access */
15307
15308 /*
15309 * The madvise semantics require that the address range be fully
15310 * allocated with no holes. Otherwise, we're required to return
15311 * an error.
15312 */
15313
15314 if (!vm_map_range_check(map, start, end, &entry)) {
15315 vm_map_unlock_read(map);
15316 vm_page_stats_reusable.reusable_pages_failure++;
15317 return KERN_INVALID_ADDRESS;
15318 }
15319
15320 /*
15321 * Examine each vm_map_entry_t in the range.
15322 */
15323 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15324 entry = entry->vme_next) {
15325 int kill_pages = 0;
15326
15327 /*
15328 * Sanity check on the VM map entry.
15329 */
15330 if (!vm_map_entry_is_reusable(entry)) {
15331 vm_map_unlock_read(map);
15332 vm_page_stats_reusable.reusable_pages_failure++;
15333 return KERN_INVALID_ADDRESS;
15334 }
15335
15336 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15337 /* not writable: can't discard contents */
15338 vm_map_unlock_read(map);
15339 vm_page_stats_reusable.reusable_nonwritable++;
15340 vm_page_stats_reusable.reusable_pages_failure++;
15341 return KERN_PROTECTION_FAILURE;
15342 }
15343
15344 /*
15345 * The first time through, the start address could be anywhere
15346 * within the vm_map_entry we found. So adjust the offset to
15347 * correspond.
15348 */
15349 if (entry->vme_start < start) {
15350 start_offset = start - entry->vme_start;
15351 pmap_offset = start;
15352 } else {
15353 start_offset = 0;
15354 pmap_offset = entry->vme_start;
15355 }
15356 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15357 start_offset += VME_OFFSET(entry);
15358 end_offset += VME_OFFSET(entry);
15359
15360 assert(!entry->is_sub_map);
15361 object = VME_OBJECT(entry);
15362 if (object == VM_OBJECT_NULL) {
15363 continue;
15364 }
15365
15366
15367 vm_object_lock(object);
15368 if (((object->ref_count == 1) ||
15369 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15370 object->copy == VM_OBJECT_NULL)) &&
15371 object->shadow == VM_OBJECT_NULL &&
15372 /*
15373 * "iokit_acct" entries are billed for their virtual size
15374 * (rather than for their resident pages only), so they
15375 * wouldn't benefit from making pages reusable, and it
15376 * would be hard to keep track of pages that are both
15377 * "iokit_acct" and "reusable" in the pmap stats and
15378 * ledgers.
15379 */
15380 !(entry->iokit_acct ||
15381 (!entry->is_sub_map && !entry->use_pmap))) {
15382 if (object->ref_count != 1) {
15383 vm_page_stats_reusable.reusable_shared++;
15384 }
15385 kill_pages = 1;
15386 } else {
15387 kill_pages = -1;
15388 }
15389 if (kill_pages != -1) {
15390 vm_object_deactivate_pages(object,
15391 start_offset,
15392 end_offset - start_offset,
15393 kill_pages,
15394 TRUE /*reusable_pages*/,
15395 map->pmap,
15396 pmap_offset);
15397 } else {
15398 vm_page_stats_reusable.reusable_pages_shared++;
15399 }
15400 vm_object_unlock(object);
15401
15402 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15403 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15404 /*
15405 * XXX
15406 * We do not hold the VM map exclusively here.
15407 * The "alias" field is not that critical, so it's
15408 * safe to update it here, as long as it is the only
15409 * one that can be modified while holding the VM map
15410 * "shared".
15411 */
15412 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15413 }
15414 }
15415
15416 vm_map_unlock_read(map);
15417 vm_page_stats_reusable.reusable_pages_success++;
15418 return KERN_SUCCESS;
15419 }
15420
15421
15422 static kern_return_t
15423 vm_map_can_reuse(
15424 vm_map_t map,
15425 vm_map_offset_t start,
15426 vm_map_offset_t end)
15427 {
15428 vm_map_entry_t entry;
15429
15430 /*
15431 * The MADV_REUSABLE operation doesn't require any changes to the
15432 * vm_map_entry_t's, so the read lock is sufficient.
15433 */
15434
15435 vm_map_lock_read(map);
15436 assert(map->pmap != kernel_pmap); /* protect alias access */
15437
15438 /*
15439 * The madvise semantics require that the address range be fully
15440 * allocated with no holes. Otherwise, we're required to return
15441 * an error.
15442 */
15443
15444 if (!vm_map_range_check(map, start, end, &entry)) {
15445 vm_map_unlock_read(map);
15446 vm_page_stats_reusable.can_reuse_failure++;
15447 return KERN_INVALID_ADDRESS;
15448 }
15449
15450 /*
15451 * Examine each vm_map_entry_t in the range.
15452 */
15453 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15454 entry = entry->vme_next) {
15455 /*
15456 * Sanity check on the VM map entry.
15457 */
15458 if (!vm_map_entry_is_reusable(entry)) {
15459 vm_map_unlock_read(map);
15460 vm_page_stats_reusable.can_reuse_failure++;
15461 return KERN_INVALID_ADDRESS;
15462 }
15463 }
15464
15465 vm_map_unlock_read(map);
15466 vm_page_stats_reusable.can_reuse_success++;
15467 return KERN_SUCCESS;
15468 }
15469
15470
15471 #if MACH_ASSERT
15472 static kern_return_t
15473 vm_map_pageout(
15474 vm_map_t map,
15475 vm_map_offset_t start,
15476 vm_map_offset_t end)
15477 {
15478 vm_map_entry_t entry;
15479
15480 /*
15481 * The MADV_PAGEOUT operation doesn't require any changes to the
15482 * vm_map_entry_t's, so the read lock is sufficient.
15483 */
15484
15485 vm_map_lock_read(map);
15486
15487 /*
15488 * The madvise semantics require that the address range be fully
15489 * allocated with no holes. Otherwise, we're required to return
15490 * an error.
15491 */
15492
15493 if (!vm_map_range_check(map, start, end, &entry)) {
15494 vm_map_unlock_read(map);
15495 return KERN_INVALID_ADDRESS;
15496 }
15497
15498 /*
15499 * Examine each vm_map_entry_t in the range.
15500 */
15501 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15502 entry = entry->vme_next) {
15503 vm_object_t object;
15504
15505 /*
15506 * Sanity check on the VM map entry.
15507 */
15508 if (entry->is_sub_map) {
15509 vm_map_t submap;
15510 vm_map_offset_t submap_start;
15511 vm_map_offset_t submap_end;
15512 vm_map_entry_t submap_entry;
15513
15514 submap = VME_SUBMAP(entry);
15515 submap_start = VME_OFFSET(entry);
15516 submap_end = submap_start + (entry->vme_end -
15517 entry->vme_start);
15518
15519 vm_map_lock_read(submap);
15520
15521 if (!vm_map_range_check(submap,
15522 submap_start,
15523 submap_end,
15524 &submap_entry)) {
15525 vm_map_unlock_read(submap);
15526 vm_map_unlock_read(map);
15527 return KERN_INVALID_ADDRESS;
15528 }
15529
15530 object = VME_OBJECT(submap_entry);
15531 if (submap_entry->is_sub_map ||
15532 object == VM_OBJECT_NULL ||
15533 !object->internal) {
15534 vm_map_unlock_read(submap);
15535 continue;
15536 }
15537
15538 vm_object_pageout(object);
15539
15540 vm_map_unlock_read(submap);
15541 submap = VM_MAP_NULL;
15542 submap_entry = VM_MAP_ENTRY_NULL;
15543 continue;
15544 }
15545
15546 object = VME_OBJECT(entry);
15547 if (entry->is_sub_map ||
15548 object == VM_OBJECT_NULL ||
15549 !object->internal) {
15550 continue;
15551 }
15552
15553 vm_object_pageout(object);
15554 }
15555
15556 vm_map_unlock_read(map);
15557 return KERN_SUCCESS;
15558 }
15559 #endif /* MACH_ASSERT */
15560
15561
15562 /*
15563 * Routine: vm_map_entry_insert
15564 *
15565 * Description: This routine inserts a new vm_entry in a locked map.
15566 */
15567 vm_map_entry_t
15568 vm_map_entry_insert(
15569 vm_map_t map,
15570 vm_map_entry_t insp_entry,
15571 vm_map_offset_t start,
15572 vm_map_offset_t end,
15573 vm_object_t object,
15574 vm_object_offset_t offset,
15575 boolean_t needs_copy,
15576 boolean_t is_shared,
15577 boolean_t in_transition,
15578 vm_prot_t cur_protection,
15579 vm_prot_t max_protection,
15580 vm_behavior_t behavior,
15581 vm_inherit_t inheritance,
15582 unsigned wired_count,
15583 boolean_t no_cache,
15584 boolean_t permanent,
15585 boolean_t no_copy_on_read,
15586 unsigned int superpage_size,
15587 boolean_t clear_map_aligned,
15588 boolean_t is_submap,
15589 boolean_t used_for_jit,
15590 int alias)
15591 {
15592 vm_map_entry_t new_entry;
15593
15594 assert(insp_entry != (vm_map_entry_t)0);
15595 vm_map_lock_assert_exclusive(map);
15596
15597 #if DEVELOPMENT || DEBUG
15598 vm_object_offset_t end_offset = 0;
15599 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15600 #endif /* DEVELOPMENT || DEBUG */
15601
15602 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15603
15604 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15605 new_entry->map_aligned = TRUE;
15606 } else {
15607 new_entry->map_aligned = FALSE;
15608 }
15609 if (clear_map_aligned &&
15610 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15611 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15612 new_entry->map_aligned = FALSE;
15613 }
15614
15615 new_entry->vme_start = start;
15616 new_entry->vme_end = end;
15617 assert(page_aligned(new_entry->vme_start));
15618 assert(page_aligned(new_entry->vme_end));
15619 if (new_entry->map_aligned) {
15620 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15621 VM_MAP_PAGE_MASK(map)));
15622 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15623 VM_MAP_PAGE_MASK(map)));
15624 }
15625 assert(new_entry->vme_start < new_entry->vme_end);
15626
15627 VME_OBJECT_SET(new_entry, object);
15628 VME_OFFSET_SET(new_entry, offset);
15629 new_entry->is_shared = is_shared;
15630 new_entry->is_sub_map = is_submap;
15631 new_entry->needs_copy = needs_copy;
15632 new_entry->in_transition = in_transition;
15633 new_entry->needs_wakeup = FALSE;
15634 new_entry->inheritance = inheritance;
15635 new_entry->protection = cur_protection;
15636 new_entry->max_protection = max_protection;
15637 new_entry->behavior = behavior;
15638 new_entry->wired_count = wired_count;
15639 new_entry->user_wired_count = 0;
15640 if (is_submap) {
15641 /*
15642 * submap: "use_pmap" means "nested".
15643 * default: false.
15644 */
15645 new_entry->use_pmap = FALSE;
15646 } else {
15647 /*
15648 * object: "use_pmap" means "use pmap accounting" for footprint.
15649 * default: true.
15650 */
15651 new_entry->use_pmap = TRUE;
15652 }
15653 VME_ALIAS_SET(new_entry, alias);
15654 new_entry->zero_wired_pages = FALSE;
15655 new_entry->no_cache = no_cache;
15656 new_entry->permanent = permanent;
15657 if (superpage_size) {
15658 new_entry->superpage_size = TRUE;
15659 } else {
15660 new_entry->superpage_size = FALSE;
15661 }
15662 if (used_for_jit) {
15663 #if CONFIG_EMBEDDED
15664 if (!(map->jit_entry_exists))
15665 #endif /* CONFIG_EMBEDDED */
15666 {
15667 new_entry->used_for_jit = TRUE;
15668 map->jit_entry_exists = TRUE;
15669 }
15670 } else {
15671 new_entry->used_for_jit = FALSE;
15672 }
15673 new_entry->pmap_cs_associated = FALSE;
15674 new_entry->iokit_acct = FALSE;
15675 new_entry->vme_resilient_codesign = FALSE;
15676 new_entry->vme_resilient_media = FALSE;
15677 new_entry->vme_atomic = FALSE;
15678 new_entry->vme_no_copy_on_read = no_copy_on_read;
15679
15680 /*
15681 * Insert the new entry into the list.
15682 */
15683
15684 vm_map_store_entry_link(map, insp_entry, new_entry,
15685 VM_MAP_KERNEL_FLAGS_NONE);
15686 map->size += end - start;
15687
15688 /*
15689 * Update the free space hint and the lookup hint.
15690 */
15691
15692 SAVE_HINT_MAP_WRITE(map, new_entry);
15693 return new_entry;
15694 }
15695
15696 /*
15697 * Routine: vm_map_remap_extract
15698 *
15699 * Descritpion: This routine returns a vm_entry list from a map.
15700 */
15701 static kern_return_t
15702 vm_map_remap_extract(
15703 vm_map_t map,
15704 vm_map_offset_t addr,
15705 vm_map_size_t size,
15706 boolean_t copy,
15707 struct vm_map_header *map_header,
15708 vm_prot_t *cur_protection,
15709 vm_prot_t *max_protection,
15710 /* What, no behavior? */
15711 vm_inherit_t inheritance,
15712 boolean_t pageable,
15713 boolean_t same_map,
15714 vm_map_kernel_flags_t vmk_flags)
15715 {
15716 kern_return_t result;
15717 vm_map_size_t mapped_size;
15718 vm_map_size_t tmp_size;
15719 vm_map_entry_t src_entry; /* result of last map lookup */
15720 vm_map_entry_t new_entry;
15721 vm_object_offset_t offset;
15722 vm_map_offset_t map_address;
15723 vm_map_offset_t src_start; /* start of entry to map */
15724 vm_map_offset_t src_end; /* end of region to be mapped */
15725 vm_object_t object;
15726 vm_map_version_t version;
15727 boolean_t src_needs_copy;
15728 boolean_t new_entry_needs_copy;
15729 vm_map_entry_t saved_src_entry;
15730 boolean_t src_entry_was_wired;
15731 vm_prot_t max_prot_for_prot_copy;
15732
15733 assert(map != VM_MAP_NULL);
15734 assert(size != 0);
15735 assert(size == vm_map_round_page(size, PAGE_MASK));
15736 assert(inheritance == VM_INHERIT_NONE ||
15737 inheritance == VM_INHERIT_COPY ||
15738 inheritance == VM_INHERIT_SHARE);
15739
15740 /*
15741 * Compute start and end of region.
15742 */
15743 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15744 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15745
15746
15747 /*
15748 * Initialize map_header.
15749 */
15750 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15751 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15752 map_header->nentries = 0;
15753 map_header->entries_pageable = pageable;
15754 map_header->page_shift = PAGE_SHIFT;
15755
15756 vm_map_store_init( map_header );
15757
15758 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15759 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15760 } else {
15761 max_prot_for_prot_copy = VM_PROT_NONE;
15762 }
15763 *cur_protection = VM_PROT_ALL;
15764 *max_protection = VM_PROT_ALL;
15765
15766 map_address = 0;
15767 mapped_size = 0;
15768 result = KERN_SUCCESS;
15769
15770 /*
15771 * The specified source virtual space might correspond to
15772 * multiple map entries, need to loop on them.
15773 */
15774 vm_map_lock(map);
15775 while (mapped_size != size) {
15776 vm_map_size_t entry_size;
15777
15778 /*
15779 * Find the beginning of the region.
15780 */
15781 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15782 result = KERN_INVALID_ADDRESS;
15783 break;
15784 }
15785
15786 if (src_start < src_entry->vme_start ||
15787 (mapped_size && src_start != src_entry->vme_start)) {
15788 result = KERN_INVALID_ADDRESS;
15789 break;
15790 }
15791
15792 tmp_size = size - mapped_size;
15793 if (src_end > src_entry->vme_end) {
15794 tmp_size -= (src_end - src_entry->vme_end);
15795 }
15796
15797 entry_size = (vm_map_size_t)(src_entry->vme_end -
15798 src_entry->vme_start);
15799
15800 if (src_entry->is_sub_map) {
15801 vm_map_reference(VME_SUBMAP(src_entry));
15802 object = VM_OBJECT_NULL;
15803 } else {
15804 object = VME_OBJECT(src_entry);
15805 if (src_entry->iokit_acct) {
15806 /*
15807 * This entry uses "IOKit accounting".
15808 */
15809 } else if (object != VM_OBJECT_NULL &&
15810 (object->purgable != VM_PURGABLE_DENY ||
15811 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15812 /*
15813 * Purgeable objects have their own accounting:
15814 * no pmap accounting for them.
15815 */
15816 assertf(!src_entry->use_pmap,
15817 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15818 map,
15819 src_entry,
15820 (uint64_t)src_entry->vme_start,
15821 (uint64_t)src_entry->vme_end,
15822 src_entry->protection,
15823 src_entry->max_protection,
15824 VME_ALIAS(src_entry));
15825 } else {
15826 /*
15827 * Not IOKit or purgeable:
15828 * must be accounted by pmap stats.
15829 */
15830 assertf(src_entry->use_pmap,
15831 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15832 map,
15833 src_entry,
15834 (uint64_t)src_entry->vme_start,
15835 (uint64_t)src_entry->vme_end,
15836 src_entry->protection,
15837 src_entry->max_protection,
15838 VME_ALIAS(src_entry));
15839 }
15840
15841 if (object == VM_OBJECT_NULL) {
15842 object = vm_object_allocate(entry_size);
15843 VME_OFFSET_SET(src_entry, 0);
15844 VME_OBJECT_SET(src_entry, object);
15845 assert(src_entry->use_pmap);
15846 } else if (object->copy_strategy !=
15847 MEMORY_OBJECT_COPY_SYMMETRIC) {
15848 /*
15849 * We are already using an asymmetric
15850 * copy, and therefore we already have
15851 * the right object.
15852 */
15853 assert(!src_entry->needs_copy);
15854 } else if (src_entry->needs_copy || object->shadowed ||
15855 (object->internal && !object->true_share &&
15856 !src_entry->is_shared &&
15857 object->vo_size > entry_size)) {
15858 VME_OBJECT_SHADOW(src_entry, entry_size);
15859 assert(src_entry->use_pmap);
15860
15861 if (!src_entry->needs_copy &&
15862 (src_entry->protection & VM_PROT_WRITE)) {
15863 vm_prot_t prot;
15864
15865 assert(!pmap_has_prot_policy(src_entry->protection));
15866
15867 prot = src_entry->protection & ~VM_PROT_WRITE;
15868
15869 if (override_nx(map,
15870 VME_ALIAS(src_entry))
15871 && prot) {
15872 prot |= VM_PROT_EXECUTE;
15873 }
15874
15875 assert(!pmap_has_prot_policy(prot));
15876
15877 if (map->mapped_in_other_pmaps) {
15878 vm_object_pmap_protect(
15879 VME_OBJECT(src_entry),
15880 VME_OFFSET(src_entry),
15881 entry_size,
15882 PMAP_NULL,
15883 src_entry->vme_start,
15884 prot);
15885 } else {
15886 pmap_protect(vm_map_pmap(map),
15887 src_entry->vme_start,
15888 src_entry->vme_end,
15889 prot);
15890 }
15891 }
15892
15893 object = VME_OBJECT(src_entry);
15894 src_entry->needs_copy = FALSE;
15895 }
15896
15897
15898 vm_object_lock(object);
15899 vm_object_reference_locked(object); /* object ref. for new entry */
15900 if (object->copy_strategy ==
15901 MEMORY_OBJECT_COPY_SYMMETRIC) {
15902 object->copy_strategy =
15903 MEMORY_OBJECT_COPY_DELAY;
15904 }
15905 vm_object_unlock(object);
15906 }
15907
15908 offset = (VME_OFFSET(src_entry) +
15909 (src_start - src_entry->vme_start));
15910
15911 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15912 vm_map_entry_copy(new_entry, src_entry);
15913 if (new_entry->is_sub_map) {
15914 /* clr address space specifics */
15915 new_entry->use_pmap = FALSE;
15916 } else if (copy) {
15917 /*
15918 * We're dealing with a copy-on-write operation,
15919 * so the resulting mapping should not inherit the
15920 * original mapping's accounting settings.
15921 * "use_pmap" should be reset to its default (TRUE)
15922 * so that the new mapping gets accounted for in
15923 * the task's memory footprint.
15924 */
15925 new_entry->use_pmap = TRUE;
15926 }
15927 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15928 assert(!new_entry->iokit_acct);
15929
15930 new_entry->map_aligned = FALSE;
15931
15932 new_entry->vme_start = map_address;
15933 new_entry->vme_end = map_address + tmp_size;
15934 assert(new_entry->vme_start < new_entry->vme_end);
15935 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15936 /*
15937 * Remapping for vm_map_protect(VM_PROT_COPY)
15938 * to convert a read-only mapping into a
15939 * copy-on-write version of itself but
15940 * with write access:
15941 * keep the original inheritance and add
15942 * VM_PROT_WRITE to the max protection.
15943 */
15944 new_entry->inheritance = src_entry->inheritance;
15945 new_entry->protection &= max_prot_for_prot_copy;
15946 new_entry->max_protection |= VM_PROT_WRITE;
15947 } else {
15948 new_entry->inheritance = inheritance;
15949 }
15950 VME_OFFSET_SET(new_entry, offset);
15951
15952 /*
15953 * The new region has to be copied now if required.
15954 */
15955 RestartCopy:
15956 if (!copy) {
15957 if (src_entry->used_for_jit == TRUE) {
15958 if (same_map) {
15959 } else {
15960 #if CONFIG_EMBEDDED
15961 /*
15962 * Cannot allow an entry describing a JIT
15963 * region to be shared across address spaces.
15964 */
15965 result = KERN_INVALID_ARGUMENT;
15966 break;
15967 #endif /* CONFIG_EMBEDDED */
15968 }
15969 }
15970
15971 src_entry->is_shared = TRUE;
15972 new_entry->is_shared = TRUE;
15973 if (!(new_entry->is_sub_map)) {
15974 new_entry->needs_copy = FALSE;
15975 }
15976 } else if (src_entry->is_sub_map) {
15977 /* make this a COW sub_map if not already */
15978 assert(new_entry->wired_count == 0);
15979 new_entry->needs_copy = TRUE;
15980 object = VM_OBJECT_NULL;
15981 } else if (src_entry->wired_count == 0 &&
15982 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
15983 VME_OFFSET(new_entry),
15984 (new_entry->vme_end -
15985 new_entry->vme_start),
15986 &src_needs_copy,
15987 &new_entry_needs_copy)) {
15988 new_entry->needs_copy = new_entry_needs_copy;
15989 new_entry->is_shared = FALSE;
15990 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15991
15992 /*
15993 * Handle copy_on_write semantics.
15994 */
15995 if (src_needs_copy && !src_entry->needs_copy) {
15996 vm_prot_t prot;
15997
15998 assert(!pmap_has_prot_policy(src_entry->protection));
15999
16000 prot = src_entry->protection & ~VM_PROT_WRITE;
16001
16002 if (override_nx(map,
16003 VME_ALIAS(src_entry))
16004 && prot) {
16005 prot |= VM_PROT_EXECUTE;
16006 }
16007
16008 assert(!pmap_has_prot_policy(prot));
16009
16010 vm_object_pmap_protect(object,
16011 offset,
16012 entry_size,
16013 ((src_entry->is_shared
16014 || map->mapped_in_other_pmaps) ?
16015 PMAP_NULL : map->pmap),
16016 src_entry->vme_start,
16017 prot);
16018
16019 assert(src_entry->wired_count == 0);
16020 src_entry->needs_copy = TRUE;
16021 }
16022 /*
16023 * Throw away the old object reference of the new entry.
16024 */
16025 vm_object_deallocate(object);
16026 } else {
16027 new_entry->is_shared = FALSE;
16028 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16029
16030 src_entry_was_wired = (src_entry->wired_count > 0);
16031 saved_src_entry = src_entry;
16032 src_entry = VM_MAP_ENTRY_NULL;
16033
16034 /*
16035 * The map can be safely unlocked since we
16036 * already hold a reference on the object.
16037 *
16038 * Record the timestamp of the map for later
16039 * verification, and unlock the map.
16040 */
16041 version.main_timestamp = map->timestamp;
16042 vm_map_unlock(map); /* Increments timestamp once! */
16043
16044 /*
16045 * Perform the copy.
16046 */
16047 if (src_entry_was_wired > 0) {
16048 vm_object_lock(object);
16049 result = vm_object_copy_slowly(
16050 object,
16051 offset,
16052 (new_entry->vme_end -
16053 new_entry->vme_start),
16054 THREAD_UNINT,
16055 VME_OBJECT_PTR(new_entry));
16056
16057 VME_OFFSET_SET(new_entry, 0);
16058 new_entry->needs_copy = FALSE;
16059 } else {
16060 vm_object_offset_t new_offset;
16061
16062 new_offset = VME_OFFSET(new_entry);
16063 result = vm_object_copy_strategically(
16064 object,
16065 offset,
16066 (new_entry->vme_end -
16067 new_entry->vme_start),
16068 VME_OBJECT_PTR(new_entry),
16069 &new_offset,
16070 &new_entry_needs_copy);
16071 if (new_offset != VME_OFFSET(new_entry)) {
16072 VME_OFFSET_SET(new_entry, new_offset);
16073 }
16074
16075 new_entry->needs_copy = new_entry_needs_copy;
16076 }
16077
16078 /*
16079 * Throw away the old object reference of the new entry.
16080 */
16081 vm_object_deallocate(object);
16082
16083 if (result != KERN_SUCCESS &&
16084 result != KERN_MEMORY_RESTART_COPY) {
16085 _vm_map_entry_dispose(map_header, new_entry);
16086 vm_map_lock(map);
16087 break;
16088 }
16089
16090 /*
16091 * Verify that the map has not substantially
16092 * changed while the copy was being made.
16093 */
16094
16095 vm_map_lock(map);
16096 if (version.main_timestamp + 1 != map->timestamp) {
16097 /*
16098 * Simple version comparison failed.
16099 *
16100 * Retry the lookup and verify that the
16101 * same object/offset are still present.
16102 */
16103 saved_src_entry = VM_MAP_ENTRY_NULL;
16104 vm_object_deallocate(VME_OBJECT(new_entry));
16105 _vm_map_entry_dispose(map_header, new_entry);
16106 if (result == KERN_MEMORY_RESTART_COPY) {
16107 result = KERN_SUCCESS;
16108 }
16109 continue;
16110 }
16111 /* map hasn't changed: src_entry is still valid */
16112 src_entry = saved_src_entry;
16113 saved_src_entry = VM_MAP_ENTRY_NULL;
16114
16115 if (result == KERN_MEMORY_RESTART_COPY) {
16116 vm_object_reference(object);
16117 goto RestartCopy;
16118 }
16119 }
16120
16121 _vm_map_store_entry_link(map_header,
16122 map_header->links.prev, new_entry);
16123
16124 /*Protections for submap mapping are irrelevant here*/
16125 if (!src_entry->is_sub_map) {
16126 *cur_protection &= src_entry->protection;
16127 *max_protection &= src_entry->max_protection;
16128 }
16129 map_address += tmp_size;
16130 mapped_size += tmp_size;
16131 src_start += tmp_size;
16132 } /* end while */
16133
16134 vm_map_unlock(map);
16135 if (result != KERN_SUCCESS) {
16136 /*
16137 * Free all allocated elements.
16138 */
16139 for (src_entry = map_header->links.next;
16140 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16141 src_entry = new_entry) {
16142 new_entry = src_entry->vme_next;
16143 _vm_map_store_entry_unlink(map_header, src_entry);
16144 if (src_entry->is_sub_map) {
16145 vm_map_deallocate(VME_SUBMAP(src_entry));
16146 } else {
16147 vm_object_deallocate(VME_OBJECT(src_entry));
16148 }
16149 _vm_map_entry_dispose(map_header, src_entry);
16150 }
16151 }
16152 return result;
16153 }
16154
16155 /*
16156 * Routine: vm_remap
16157 *
16158 * Map portion of a task's address space.
16159 * Mapped region must not overlap more than
16160 * one vm memory object. Protections and
16161 * inheritance attributes remain the same
16162 * as in the original task and are out parameters.
16163 * Source and Target task can be identical
16164 * Other attributes are identical as for vm_map()
16165 */
16166 kern_return_t
16167 vm_map_remap(
16168 vm_map_t target_map,
16169 vm_map_address_t *address,
16170 vm_map_size_t size,
16171 vm_map_offset_t mask,
16172 int flags,
16173 vm_map_kernel_flags_t vmk_flags,
16174 vm_tag_t tag,
16175 vm_map_t src_map,
16176 vm_map_offset_t memory_address,
16177 boolean_t copy,
16178 vm_prot_t *cur_protection,
16179 vm_prot_t *max_protection,
16180 vm_inherit_t inheritance)
16181 {
16182 kern_return_t result;
16183 vm_map_entry_t entry;
16184 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16185 vm_map_entry_t new_entry;
16186 struct vm_map_header map_header;
16187 vm_map_offset_t offset_in_mapping;
16188
16189 if (target_map == VM_MAP_NULL) {
16190 return KERN_INVALID_ARGUMENT;
16191 }
16192
16193 switch (inheritance) {
16194 case VM_INHERIT_NONE:
16195 case VM_INHERIT_COPY:
16196 case VM_INHERIT_SHARE:
16197 if (size != 0 && src_map != VM_MAP_NULL) {
16198 break;
16199 }
16200 /*FALL THRU*/
16201 default:
16202 return KERN_INVALID_ARGUMENT;
16203 }
16204
16205 /*
16206 * If the user is requesting that we return the address of the
16207 * first byte of the data (rather than the base of the page),
16208 * then we use different rounding semantics: specifically,
16209 * we assume that (memory_address, size) describes a region
16210 * all of whose pages we must cover, rather than a base to be truncated
16211 * down and a size to be added to that base. So we figure out
16212 * the highest page that the requested region includes and make
16213 * sure that the size will cover it.
16214 *
16215 * The key example we're worried about it is of the form:
16216 *
16217 * memory_address = 0x1ff0, size = 0x20
16218 *
16219 * With the old semantics, we round down the memory_address to 0x1000
16220 * and round up the size to 0x1000, resulting in our covering *only*
16221 * page 0x1000. With the new semantics, we'd realize that the region covers
16222 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16223 * 0x1000 and page 0x2000 in the region we remap.
16224 */
16225 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16226 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16227 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16228 } else {
16229 size = vm_map_round_page(size, PAGE_MASK);
16230 }
16231 if (size == 0) {
16232 return KERN_INVALID_ARGUMENT;
16233 }
16234
16235 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16236 /* must be copy-on-write to be "media resilient" */
16237 if (!copy) {
16238 return KERN_INVALID_ARGUMENT;
16239 }
16240 }
16241
16242 result = vm_map_remap_extract(src_map, memory_address,
16243 size, copy, &map_header,
16244 cur_protection,
16245 max_protection,
16246 inheritance,
16247 target_map->hdr.entries_pageable,
16248 src_map == target_map,
16249 vmk_flags);
16250
16251 if (result != KERN_SUCCESS) {
16252 return result;
16253 }
16254
16255 /*
16256 * Allocate/check a range of free virtual address
16257 * space for the target
16258 */
16259 *address = vm_map_trunc_page(*address,
16260 VM_MAP_PAGE_MASK(target_map));
16261 vm_map_lock(target_map);
16262 result = vm_map_remap_range_allocate(target_map, address, size,
16263 mask, flags, vmk_flags, tag,
16264 &insp_entry);
16265
16266 for (entry = map_header.links.next;
16267 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16268 entry = new_entry) {
16269 new_entry = entry->vme_next;
16270 _vm_map_store_entry_unlink(&map_header, entry);
16271 if (result == KERN_SUCCESS) {
16272 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16273 /* no codesigning -> read-only access */
16274 entry->max_protection = VM_PROT_READ;
16275 entry->protection = VM_PROT_READ;
16276 entry->vme_resilient_codesign = TRUE;
16277 }
16278 entry->vme_start += *address;
16279 entry->vme_end += *address;
16280 assert(!entry->map_aligned);
16281 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16282 !entry->is_sub_map &&
16283 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16284 VME_OBJECT(entry)->internal)) {
16285 entry->vme_resilient_media = TRUE;
16286 }
16287 vm_map_store_entry_link(target_map, insp_entry, entry,
16288 vmk_flags);
16289 insp_entry = entry;
16290 } else {
16291 if (!entry->is_sub_map) {
16292 vm_object_deallocate(VME_OBJECT(entry));
16293 } else {
16294 vm_map_deallocate(VME_SUBMAP(entry));
16295 }
16296 _vm_map_entry_dispose(&map_header, entry);
16297 }
16298 }
16299
16300 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16301 *cur_protection = VM_PROT_READ;
16302 *max_protection = VM_PROT_READ;
16303 }
16304
16305 if (target_map->disable_vmentry_reuse == TRUE) {
16306 assert(!target_map->is_nested_map);
16307 if (target_map->highest_entry_end < insp_entry->vme_end) {
16308 target_map->highest_entry_end = insp_entry->vme_end;
16309 }
16310 }
16311
16312 if (result == KERN_SUCCESS) {
16313 target_map->size += size;
16314 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16315
16316 #if PMAP_CS
16317 if (*max_protection & VM_PROT_EXECUTE) {
16318 vm_map_address_t region_start = 0, region_size = 0;
16319 struct pmap_cs_code_directory *region_cd = NULL;
16320 vm_map_address_t base = 0;
16321 struct pmap_cs_lookup_results results = {};
16322 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16323 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16324
16325 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16326 region_size = results.region_size;
16327 region_start = results.region_start;
16328 region_cd = results.region_cd_entry;
16329 base = results.base;
16330
16331 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16332 *cur_protection = VM_PROT_READ;
16333 *max_protection = VM_PROT_READ;
16334 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16335 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16336 page_addr, page_addr + assoc_size, *address,
16337 region_start, region_size,
16338 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16339 );
16340 }
16341 }
16342 #endif
16343 }
16344 vm_map_unlock(target_map);
16345
16346 if (result == KERN_SUCCESS && target_map->wiring_required) {
16347 result = vm_map_wire_kernel(target_map, *address,
16348 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16349 TRUE);
16350 }
16351
16352 /*
16353 * If requested, return the address of the data pointed to by the
16354 * request, rather than the base of the resulting page.
16355 */
16356 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16357 *address += offset_in_mapping;
16358 }
16359
16360 return result;
16361 }
16362
16363 /*
16364 * Routine: vm_map_remap_range_allocate
16365 *
16366 * Description:
16367 * Allocate a range in the specified virtual address map.
16368 * returns the address and the map entry just before the allocated
16369 * range
16370 *
16371 * Map must be locked.
16372 */
16373
16374 static kern_return_t
16375 vm_map_remap_range_allocate(
16376 vm_map_t map,
16377 vm_map_address_t *address, /* IN/OUT */
16378 vm_map_size_t size,
16379 vm_map_offset_t mask,
16380 int flags,
16381 vm_map_kernel_flags_t vmk_flags,
16382 __unused vm_tag_t tag,
16383 vm_map_entry_t *map_entry) /* OUT */
16384 {
16385 vm_map_entry_t entry;
16386 vm_map_offset_t start;
16387 vm_map_offset_t end;
16388 vm_map_offset_t desired_empty_end;
16389 kern_return_t kr;
16390 vm_map_entry_t hole_entry;
16391
16392 StartAgain:;
16393
16394 start = *address;
16395
16396 if (flags & VM_FLAGS_ANYWHERE) {
16397 if (flags & VM_FLAGS_RANDOM_ADDR) {
16398 /*
16399 * Get a random start address.
16400 */
16401 kr = vm_map_random_address_for_size(map, address, size);
16402 if (kr != KERN_SUCCESS) {
16403 return kr;
16404 }
16405 start = *address;
16406 }
16407
16408 /*
16409 * Calculate the first possible address.
16410 */
16411
16412 if (start < map->min_offset) {
16413 start = map->min_offset;
16414 }
16415 if (start > map->max_offset) {
16416 return KERN_NO_SPACE;
16417 }
16418
16419 /*
16420 * Look for the first possible address;
16421 * if there's already something at this
16422 * address, we have to start after it.
16423 */
16424
16425 if (map->disable_vmentry_reuse == TRUE) {
16426 VM_MAP_HIGHEST_ENTRY(map, entry, start);
16427 } else {
16428 if (map->holelistenabled) {
16429 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16430
16431 if (hole_entry == NULL) {
16432 /*
16433 * No more space in the map?
16434 */
16435 return KERN_NO_SPACE;
16436 } else {
16437 boolean_t found_hole = FALSE;
16438
16439 do {
16440 if (hole_entry->vme_start >= start) {
16441 start = hole_entry->vme_start;
16442 found_hole = TRUE;
16443 break;
16444 }
16445
16446 if (hole_entry->vme_end > start) {
16447 found_hole = TRUE;
16448 break;
16449 }
16450 hole_entry = hole_entry->vme_next;
16451 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16452
16453 if (found_hole == FALSE) {
16454 return KERN_NO_SPACE;
16455 }
16456
16457 entry = hole_entry;
16458 }
16459 } else {
16460 assert(first_free_is_valid(map));
16461 if (start == map->min_offset) {
16462 if ((entry = map->first_free) != vm_map_to_entry(map)) {
16463 start = entry->vme_end;
16464 }
16465 } else {
16466 vm_map_entry_t tmp_entry;
16467 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16468 start = tmp_entry->vme_end;
16469 }
16470 entry = tmp_entry;
16471 }
16472 }
16473 start = vm_map_round_page(start,
16474 VM_MAP_PAGE_MASK(map));
16475 }
16476
16477 /*
16478 * In any case, the "entry" always precedes
16479 * the proposed new region throughout the
16480 * loop:
16481 */
16482
16483 while (TRUE) {
16484 vm_map_entry_t next;
16485
16486 /*
16487 * Find the end of the proposed new region.
16488 * Be sure we didn't go beyond the end, or
16489 * wrap around the address.
16490 */
16491
16492 end = ((start + mask) & ~mask);
16493 end = vm_map_round_page(end,
16494 VM_MAP_PAGE_MASK(map));
16495 if (end < start) {
16496 return KERN_NO_SPACE;
16497 }
16498 start = end;
16499 end += size;
16500
16501 /* We want an entire page of empty space, but don't increase the allocation size. */
16502 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16503
16504 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16505 if (map->wait_for_space) {
16506 if (size <= (map->max_offset -
16507 map->min_offset)) {
16508 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16509 vm_map_unlock(map);
16510 thread_block(THREAD_CONTINUE_NULL);
16511 vm_map_lock(map);
16512 goto StartAgain;
16513 }
16514 }
16515
16516 return KERN_NO_SPACE;
16517 }
16518
16519 next = entry->vme_next;
16520
16521 if (map->holelistenabled) {
16522 if (entry->vme_end >= desired_empty_end) {
16523 break;
16524 }
16525 } else {
16526 /*
16527 * If there are no more entries, we must win.
16528 *
16529 * OR
16530 *
16531 * If there is another entry, it must be
16532 * after the end of the potential new region.
16533 */
16534
16535 if (next == vm_map_to_entry(map)) {
16536 break;
16537 }
16538
16539 if (next->vme_start >= desired_empty_end) {
16540 break;
16541 }
16542 }
16543
16544 /*
16545 * Didn't fit -- move to the next entry.
16546 */
16547
16548 entry = next;
16549
16550 if (map->holelistenabled) {
16551 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16552 /*
16553 * Wrapped around
16554 */
16555 return KERN_NO_SPACE;
16556 }
16557 start = entry->vme_start;
16558 } else {
16559 start = entry->vme_end;
16560 }
16561 }
16562
16563 if (map->holelistenabled) {
16564 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16565 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16566 }
16567 }
16568
16569 *address = start;
16570 } else {
16571 vm_map_entry_t temp_entry;
16572
16573 /*
16574 * Verify that:
16575 * the address doesn't itself violate
16576 * the mask requirement.
16577 */
16578
16579 if ((start & mask) != 0) {
16580 return KERN_NO_SPACE;
16581 }
16582
16583
16584 /*
16585 * ... the address is within bounds
16586 */
16587
16588 end = start + size;
16589
16590 if ((start < map->min_offset) ||
16591 (end > map->max_offset) ||
16592 (start >= end)) {
16593 return KERN_INVALID_ADDRESS;
16594 }
16595
16596 /*
16597 * If we're asked to overwrite whatever was mapped in that
16598 * range, first deallocate that range.
16599 */
16600 if (flags & VM_FLAGS_OVERWRITE) {
16601 vm_map_t zap_map;
16602 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16603
16604 /*
16605 * We use a "zap_map" to avoid having to unlock
16606 * the "map" in vm_map_delete(), which would compromise
16607 * the atomicity of the "deallocate" and then "remap"
16608 * combination.
16609 */
16610 zap_map = vm_map_create(PMAP_NULL,
16611 start,
16612 end,
16613 map->hdr.entries_pageable);
16614 if (zap_map == VM_MAP_NULL) {
16615 return KERN_RESOURCE_SHORTAGE;
16616 }
16617 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16618 vm_map_disable_hole_optimization(zap_map);
16619
16620 if (vmk_flags.vmkf_overwrite_immutable) {
16621 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16622 }
16623 kr = vm_map_delete(map, start, end,
16624 remove_flags,
16625 zap_map);
16626 if (kr == KERN_SUCCESS) {
16627 vm_map_destroy(zap_map,
16628 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16629 zap_map = VM_MAP_NULL;
16630 }
16631 }
16632
16633 /*
16634 * ... the starting address isn't allocated
16635 */
16636
16637 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16638 return KERN_NO_SPACE;
16639 }
16640
16641 entry = temp_entry;
16642
16643 /*
16644 * ... the next region doesn't overlap the
16645 * end point.
16646 */
16647
16648 if ((entry->vme_next != vm_map_to_entry(map)) &&
16649 (entry->vme_next->vme_start < end)) {
16650 return KERN_NO_SPACE;
16651 }
16652 }
16653 *map_entry = entry;
16654 return KERN_SUCCESS;
16655 }
16656
16657 /*
16658 * vm_map_switch:
16659 *
16660 * Set the address map for the current thread to the specified map
16661 */
16662
16663 vm_map_t
16664 vm_map_switch(
16665 vm_map_t map)
16666 {
16667 int mycpu;
16668 thread_t thread = current_thread();
16669 vm_map_t oldmap = thread->map;
16670
16671 mp_disable_preemption();
16672 mycpu = cpu_number();
16673
16674 /*
16675 * Deactivate the current map and activate the requested map
16676 */
16677 PMAP_SWITCH_USER(thread, map, mycpu);
16678
16679 mp_enable_preemption();
16680 return oldmap;
16681 }
16682
16683
16684 /*
16685 * Routine: vm_map_write_user
16686 *
16687 * Description:
16688 * Copy out data from a kernel space into space in the
16689 * destination map. The space must already exist in the
16690 * destination map.
16691 * NOTE: This routine should only be called by threads
16692 * which can block on a page fault. i.e. kernel mode user
16693 * threads.
16694 *
16695 */
16696 kern_return_t
16697 vm_map_write_user(
16698 vm_map_t map,
16699 void *src_p,
16700 vm_map_address_t dst_addr,
16701 vm_size_t size)
16702 {
16703 kern_return_t kr = KERN_SUCCESS;
16704
16705 if (current_map() == map) {
16706 if (copyout(src_p, dst_addr, size)) {
16707 kr = KERN_INVALID_ADDRESS;
16708 }
16709 } else {
16710 vm_map_t oldmap;
16711
16712 /* take on the identity of the target map while doing */
16713 /* the transfer */
16714
16715 vm_map_reference(map);
16716 oldmap = vm_map_switch(map);
16717 if (copyout(src_p, dst_addr, size)) {
16718 kr = KERN_INVALID_ADDRESS;
16719 }
16720 vm_map_switch(oldmap);
16721 vm_map_deallocate(map);
16722 }
16723 return kr;
16724 }
16725
16726 /*
16727 * Routine: vm_map_read_user
16728 *
16729 * Description:
16730 * Copy in data from a user space source map into the
16731 * kernel map. The space must already exist in the
16732 * kernel map.
16733 * NOTE: This routine should only be called by threads
16734 * which can block on a page fault. i.e. kernel mode user
16735 * threads.
16736 *
16737 */
16738 kern_return_t
16739 vm_map_read_user(
16740 vm_map_t map,
16741 vm_map_address_t src_addr,
16742 void *dst_p,
16743 vm_size_t size)
16744 {
16745 kern_return_t kr = KERN_SUCCESS;
16746
16747 if (current_map() == map) {
16748 if (copyin(src_addr, dst_p, size)) {
16749 kr = KERN_INVALID_ADDRESS;
16750 }
16751 } else {
16752 vm_map_t oldmap;
16753
16754 /* take on the identity of the target map while doing */
16755 /* the transfer */
16756
16757 vm_map_reference(map);
16758 oldmap = vm_map_switch(map);
16759 if (copyin(src_addr, dst_p, size)) {
16760 kr = KERN_INVALID_ADDRESS;
16761 }
16762 vm_map_switch(oldmap);
16763 vm_map_deallocate(map);
16764 }
16765 return kr;
16766 }
16767
16768
16769 /*
16770 * vm_map_check_protection:
16771 *
16772 * Assert that the target map allows the specified
16773 * privilege on the entire address region given.
16774 * The entire region must be allocated.
16775 */
16776 boolean_t
16777 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16778 vm_map_offset_t end, vm_prot_t protection)
16779 {
16780 vm_map_entry_t entry;
16781 vm_map_entry_t tmp_entry;
16782
16783 vm_map_lock(map);
16784
16785 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16786 vm_map_unlock(map);
16787 return FALSE;
16788 }
16789
16790 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16791 vm_map_unlock(map);
16792 return FALSE;
16793 }
16794
16795 entry = tmp_entry;
16796
16797 while (start < end) {
16798 if (entry == vm_map_to_entry(map)) {
16799 vm_map_unlock(map);
16800 return FALSE;
16801 }
16802
16803 /*
16804 * No holes allowed!
16805 */
16806
16807 if (start < entry->vme_start) {
16808 vm_map_unlock(map);
16809 return FALSE;
16810 }
16811
16812 /*
16813 * Check protection associated with entry.
16814 */
16815
16816 if ((entry->protection & protection) != protection) {
16817 vm_map_unlock(map);
16818 return FALSE;
16819 }
16820
16821 /* go to next entry */
16822
16823 start = entry->vme_end;
16824 entry = entry->vme_next;
16825 }
16826 vm_map_unlock(map);
16827 return TRUE;
16828 }
16829
16830 kern_return_t
16831 vm_map_purgable_control(
16832 vm_map_t map,
16833 vm_map_offset_t address,
16834 vm_purgable_t control,
16835 int *state)
16836 {
16837 vm_map_entry_t entry;
16838 vm_object_t object;
16839 kern_return_t kr;
16840 boolean_t was_nonvolatile;
16841
16842 /*
16843 * Vet all the input parameters and current type and state of the
16844 * underlaying object. Return with an error if anything is amiss.
16845 */
16846 if (map == VM_MAP_NULL) {
16847 return KERN_INVALID_ARGUMENT;
16848 }
16849
16850 if (control != VM_PURGABLE_SET_STATE &&
16851 control != VM_PURGABLE_GET_STATE &&
16852 control != VM_PURGABLE_PURGE_ALL &&
16853 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16854 return KERN_INVALID_ARGUMENT;
16855 }
16856
16857 if (control == VM_PURGABLE_PURGE_ALL) {
16858 vm_purgeable_object_purge_all();
16859 return KERN_SUCCESS;
16860 }
16861
16862 if ((control == VM_PURGABLE_SET_STATE ||
16863 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16864 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16865 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16866 return KERN_INVALID_ARGUMENT;
16867 }
16868
16869 vm_map_lock_read(map);
16870
16871 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16872 /*
16873 * Must pass a valid non-submap address.
16874 */
16875 vm_map_unlock_read(map);
16876 return KERN_INVALID_ADDRESS;
16877 }
16878
16879 if ((entry->protection & VM_PROT_WRITE) == 0) {
16880 /*
16881 * Can't apply purgable controls to something you can't write.
16882 */
16883 vm_map_unlock_read(map);
16884 return KERN_PROTECTION_FAILURE;
16885 }
16886
16887 object = VME_OBJECT(entry);
16888 if (object == VM_OBJECT_NULL ||
16889 object->purgable == VM_PURGABLE_DENY) {
16890 /*
16891 * Object must already be present and be purgeable.
16892 */
16893 vm_map_unlock_read(map);
16894 return KERN_INVALID_ARGUMENT;
16895 }
16896
16897 vm_object_lock(object);
16898
16899 #if 00
16900 if (VME_OFFSET(entry) != 0 ||
16901 entry->vme_end - entry->vme_start != object->vo_size) {
16902 /*
16903 * Can only apply purgable controls to the whole (existing)
16904 * object at once.
16905 */
16906 vm_map_unlock_read(map);
16907 vm_object_unlock(object);
16908 return KERN_INVALID_ARGUMENT;
16909 }
16910 #endif
16911
16912 assert(!entry->is_sub_map);
16913 assert(!entry->use_pmap); /* purgeable has its own accounting */
16914
16915 vm_map_unlock_read(map);
16916
16917 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16918
16919 kr = vm_object_purgable_control(object, control, state);
16920
16921 if (was_nonvolatile &&
16922 object->purgable != VM_PURGABLE_NONVOLATILE &&
16923 map->pmap == kernel_pmap) {
16924 #if DEBUG
16925 object->vo_purgeable_volatilizer = kernel_task;
16926 #endif /* DEBUG */
16927 }
16928
16929 vm_object_unlock(object);
16930
16931 return kr;
16932 }
16933
16934 kern_return_t
16935 vm_map_page_query_internal(
16936 vm_map_t target_map,
16937 vm_map_offset_t offset,
16938 int *disposition,
16939 int *ref_count)
16940 {
16941 kern_return_t kr;
16942 vm_page_info_basic_data_t info;
16943 mach_msg_type_number_t count;
16944
16945 count = VM_PAGE_INFO_BASIC_COUNT;
16946 kr = vm_map_page_info(target_map,
16947 offset,
16948 VM_PAGE_INFO_BASIC,
16949 (vm_page_info_t) &info,
16950 &count);
16951 if (kr == KERN_SUCCESS) {
16952 *disposition = info.disposition;
16953 *ref_count = info.ref_count;
16954 } else {
16955 *disposition = 0;
16956 *ref_count = 0;
16957 }
16958
16959 return kr;
16960 }
16961
16962 kern_return_t
16963 vm_map_page_info(
16964 vm_map_t map,
16965 vm_map_offset_t offset,
16966 vm_page_info_flavor_t flavor,
16967 vm_page_info_t info,
16968 mach_msg_type_number_t *count)
16969 {
16970 return vm_map_page_range_info_internal(map,
16971 offset, /* start of range */
16972 (offset + 1), /* this will get rounded in the call to the page boundary */
16973 flavor,
16974 info,
16975 count);
16976 }
16977
16978 kern_return_t
16979 vm_map_page_range_info_internal(
16980 vm_map_t map,
16981 vm_map_offset_t start_offset,
16982 vm_map_offset_t end_offset,
16983 vm_page_info_flavor_t flavor,
16984 vm_page_info_t info,
16985 mach_msg_type_number_t *count)
16986 {
16987 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16988 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16989 vm_page_t m = VM_PAGE_NULL;
16990 kern_return_t retval = KERN_SUCCESS;
16991 int disposition = 0;
16992 int ref_count = 0;
16993 int depth = 0, info_idx = 0;
16994 vm_page_info_basic_t basic_info = 0;
16995 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16996 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16997 boolean_t do_region_footprint;
16998 ledger_amount_t ledger_resident, ledger_compressed;
16999
17000 switch (flavor) {
17001 case VM_PAGE_INFO_BASIC:
17002 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
17003 /*
17004 * The "vm_page_info_basic_data" structure was not
17005 * properly padded, so allow the size to be off by
17006 * one to maintain backwards binary compatibility...
17007 */
17008 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17009 return KERN_INVALID_ARGUMENT;
17010 }
17011 }
17012 break;
17013 default:
17014 return KERN_INVALID_ARGUMENT;
17015 }
17016
17017 do_region_footprint = task_self_region_footprint();
17018 disposition = 0;
17019 ref_count = 0;
17020 depth = 0;
17021 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17022 retval = KERN_SUCCESS;
17023
17024 offset_in_page = start_offset & PAGE_MASK;
17025 start = vm_map_trunc_page(start_offset, PAGE_MASK);
17026 end = vm_map_round_page(end_offset, PAGE_MASK);
17027
17028 if (end < start) {
17029 return KERN_INVALID_ARGUMENT;
17030 }
17031
17032 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17033
17034 vm_map_lock_read(map);
17035
17036 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17037
17038 for (curr_s_offset = start; curr_s_offset < end;) {
17039 /*
17040 * New lookup needs reset of these variables.
17041 */
17042 curr_object = object = VM_OBJECT_NULL;
17043 offset_in_object = 0;
17044 ref_count = 0;
17045 depth = 0;
17046
17047 if (do_region_footprint &&
17048 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17049 /*
17050 * Request for "footprint" info about a page beyond
17051 * the end of address space: this must be for
17052 * the fake region vm_map_region_recurse_64()
17053 * reported to account for non-volatile purgeable
17054 * memory owned by this task.
17055 */
17056 disposition = 0;
17057
17058 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17059 (unsigned) ledger_compressed) {
17060 /*
17061 * We haven't reported all the "non-volatile
17062 * compressed" pages yet, so report this fake
17063 * page as "compressed".
17064 */
17065 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17066 } else {
17067 /*
17068 * We've reported all the non-volatile
17069 * compressed page but not all the non-volatile
17070 * pages , so report this fake page as
17071 * "resident dirty".
17072 */
17073 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17074 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17075 disposition |= VM_PAGE_QUERY_PAGE_REF;
17076 }
17077 switch (flavor) {
17078 case VM_PAGE_INFO_BASIC:
17079 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17080 basic_info->disposition = disposition;
17081 basic_info->ref_count = 1;
17082 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17083 basic_info->offset = 0;
17084 basic_info->depth = 0;
17085
17086 info_idx++;
17087 break;
17088 }
17089 curr_s_offset += PAGE_SIZE;
17090 continue;
17091 }
17092
17093 /*
17094 * First, find the map entry covering "curr_s_offset", going down
17095 * submaps if necessary.
17096 */
17097 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17098 /* no entry -> no object -> no page */
17099
17100 if (curr_s_offset < vm_map_min(map)) {
17101 /*
17102 * Illegal address that falls below map min.
17103 */
17104 curr_e_offset = MIN(end, vm_map_min(map));
17105 } else if (curr_s_offset >= vm_map_max(map)) {
17106 /*
17107 * Illegal address that falls on/after map max.
17108 */
17109 curr_e_offset = end;
17110 } else if (map_entry == vm_map_to_entry(map)) {
17111 /*
17112 * Hit a hole.
17113 */
17114 if (map_entry->vme_next == vm_map_to_entry(map)) {
17115 /*
17116 * Empty map.
17117 */
17118 curr_e_offset = MIN(map->max_offset, end);
17119 } else {
17120 /*
17121 * Hole at start of the map.
17122 */
17123 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17124 }
17125 } else {
17126 if (map_entry->vme_next == vm_map_to_entry(map)) {
17127 /*
17128 * Hole at the end of the map.
17129 */
17130 curr_e_offset = MIN(map->max_offset, end);
17131 } else {
17132 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17133 }
17134 }
17135
17136 assert(curr_e_offset >= curr_s_offset);
17137
17138 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17139
17140 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17141
17142 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17143
17144 curr_s_offset = curr_e_offset;
17145
17146 info_idx += num_pages;
17147
17148 continue;
17149 }
17150
17151 /* compute offset from this map entry's start */
17152 offset_in_object = curr_s_offset - map_entry->vme_start;
17153
17154 /* compute offset into this map entry's object (or submap) */
17155 offset_in_object += VME_OFFSET(map_entry);
17156
17157 if (map_entry->is_sub_map) {
17158 vm_map_t sub_map = VM_MAP_NULL;
17159 vm_page_info_t submap_info = 0;
17160 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17161
17162 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17163
17164 submap_s_offset = offset_in_object;
17165 submap_e_offset = submap_s_offset + range_len;
17166
17167 sub_map = VME_SUBMAP(map_entry);
17168
17169 vm_map_reference(sub_map);
17170 vm_map_unlock_read(map);
17171
17172 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17173
17174 retval = vm_map_page_range_info_internal(sub_map,
17175 submap_s_offset,
17176 submap_e_offset,
17177 VM_PAGE_INFO_BASIC,
17178 (vm_page_info_t) submap_info,
17179 count);
17180
17181 assert(retval == KERN_SUCCESS);
17182
17183 vm_map_lock_read(map);
17184 vm_map_deallocate(sub_map);
17185
17186 /* Move the "info" index by the number of pages we inspected.*/
17187 info_idx += range_len >> PAGE_SHIFT;
17188
17189 /* Move our current offset by the size of the range we inspected.*/
17190 curr_s_offset += range_len;
17191
17192 continue;
17193 }
17194
17195 object = VME_OBJECT(map_entry);
17196 if (object == VM_OBJECT_NULL) {
17197 /*
17198 * We don't have an object here and, hence,
17199 * no pages to inspect. We'll fill up the
17200 * info structure appropriately.
17201 */
17202
17203 curr_e_offset = MIN(map_entry->vme_end, end);
17204
17205 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17206
17207 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17208
17209 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17210
17211 curr_s_offset = curr_e_offset;
17212
17213 info_idx += num_pages;
17214
17215 continue;
17216 }
17217
17218 if (do_region_footprint) {
17219 int pmap_disp;
17220
17221 disposition = 0;
17222 pmap_disp = 0;
17223 if (map->has_corpse_footprint) {
17224 /*
17225 * Query the page info data we saved
17226 * while forking the corpse.
17227 */
17228 vm_map_corpse_footprint_query_page_info(
17229 map,
17230 curr_s_offset,
17231 &pmap_disp);
17232 } else {
17233 /*
17234 * Query the pmap.
17235 */
17236 pmap_query_page_info(map->pmap,
17237 curr_s_offset,
17238 &pmap_disp);
17239 }
17240 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17241 /* && not tagged as no-footprint? */
17242 VM_OBJECT_OWNER(object) != NULL &&
17243 VM_OBJECT_OWNER(object)->map == map) {
17244 if ((((curr_s_offset
17245 - map_entry->vme_start
17246 + VME_OFFSET(map_entry))
17247 / PAGE_SIZE) <
17248 (object->resident_page_count +
17249 vm_compressor_pager_get_count(object->pager)))) {
17250 /*
17251 * Non-volatile purgeable object owned
17252 * by this task: report the first
17253 * "#resident + #compressed" pages as
17254 * "resident" (to show that they
17255 * contribute to the footprint) but not
17256 * "dirty" (to avoid double-counting
17257 * with the fake "non-volatile" region
17258 * we'll report at the end of the
17259 * address space to account for all
17260 * (mapped or not) non-volatile memory
17261 * owned by this task.
17262 */
17263 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17264 }
17265 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17266 object->purgable == VM_PURGABLE_EMPTY) &&
17267 /* && not tagged as no-footprint? */
17268 VM_OBJECT_OWNER(object) != NULL &&
17269 VM_OBJECT_OWNER(object)->map == map) {
17270 if ((((curr_s_offset
17271 - map_entry->vme_start
17272 + VME_OFFSET(map_entry))
17273 / PAGE_SIZE) <
17274 object->wired_page_count)) {
17275 /*
17276 * Volatile|empty purgeable object owned
17277 * by this task: report the first
17278 * "#wired" pages as "resident" (to
17279 * show that they contribute to the
17280 * footprint) but not "dirty" (to avoid
17281 * double-counting with the fake
17282 * "non-volatile" region we'll report
17283 * at the end of the address space to
17284 * account for all (mapped or not)
17285 * non-volatile memory owned by this
17286 * task.
17287 */
17288 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17289 }
17290 } else if (map_entry->iokit_acct &&
17291 object->internal &&
17292 object->purgable == VM_PURGABLE_DENY) {
17293 /*
17294 * Non-purgeable IOKit memory: phys_footprint
17295 * includes the entire virtual mapping.
17296 */
17297 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17298 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17299 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17300 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17301 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17302 /* alternate accounting */
17303 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17304 if (map->pmap->footprint_was_suspended ||
17305 /*
17306 * XXX corpse does not know if original
17307 * pmap had its footprint suspended...
17308 */
17309 map->has_corpse_footprint) {
17310 /*
17311 * The assertion below can fail if dyld
17312 * suspended footprint accounting
17313 * while doing some adjustments to
17314 * this page; the mapping would say
17315 * "use pmap accounting" but the page
17316 * would be marked "alternate
17317 * accounting".
17318 */
17319 } else
17320 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17321 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17322 pmap_disp = 0;
17323 } else {
17324 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17325 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17326 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17327 disposition |= VM_PAGE_QUERY_PAGE_REF;
17328 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17329 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17330 } else {
17331 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17332 }
17333 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17334 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17335 }
17336 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17337 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17338 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17339 }
17340 }
17341 switch (flavor) {
17342 case VM_PAGE_INFO_BASIC:
17343 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17344 basic_info->disposition = disposition;
17345 basic_info->ref_count = 1;
17346 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17347 basic_info->offset = 0;
17348 basic_info->depth = 0;
17349
17350 info_idx++;
17351 break;
17352 }
17353 curr_s_offset += PAGE_SIZE;
17354 continue;
17355 }
17356
17357 vm_object_reference(object);
17358 /*
17359 * Shared mode -- so we can allow other readers
17360 * to grab the lock too.
17361 */
17362 vm_object_lock_shared(object);
17363
17364 curr_e_offset = MIN(map_entry->vme_end, end);
17365
17366 vm_map_unlock_read(map);
17367
17368 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17369
17370 curr_object = object;
17371
17372 for (; curr_s_offset < curr_e_offset;) {
17373 if (object == curr_object) {
17374 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17375 } else {
17376 ref_count = curr_object->ref_count;
17377 }
17378
17379 curr_offset_in_object = offset_in_object;
17380
17381 for (;;) {
17382 m = vm_page_lookup(curr_object, curr_offset_in_object);
17383
17384 if (m != VM_PAGE_NULL) {
17385 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17386 break;
17387 } else {
17388 if (curr_object->internal &&
17389 curr_object->alive &&
17390 !curr_object->terminating &&
17391 curr_object->pager_ready) {
17392 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17393 == VM_EXTERNAL_STATE_EXISTS) {
17394 /* the pager has that page */
17395 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17396 break;
17397 }
17398 }
17399
17400 /*
17401 * Go down the VM object shadow chain until we find the page
17402 * we're looking for.
17403 */
17404
17405 if (curr_object->shadow != VM_OBJECT_NULL) {
17406 vm_object_t shadow = VM_OBJECT_NULL;
17407
17408 curr_offset_in_object += curr_object->vo_shadow_offset;
17409 shadow = curr_object->shadow;
17410
17411 vm_object_lock_shared(shadow);
17412 vm_object_unlock(curr_object);
17413
17414 curr_object = shadow;
17415 depth++;
17416 continue;
17417 } else {
17418 break;
17419 }
17420 }
17421 }
17422
17423 /* The ref_count is not strictly accurate, it measures the number */
17424 /* of entities holding a ref on the object, they may not be mapping */
17425 /* the object or may not be mapping the section holding the */
17426 /* target page but its still a ball park number and though an over- */
17427 /* count, it picks up the copy-on-write cases */
17428
17429 /* We could also get a picture of page sharing from pmap_attributes */
17430 /* but this would under count as only faulted-in mappings would */
17431 /* show up. */
17432
17433 if ((curr_object == object) && curr_object->shadow) {
17434 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17435 }
17436
17437 if (!curr_object->internal) {
17438 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17439 }
17440
17441 if (m != VM_PAGE_NULL) {
17442 if (m->vmp_fictitious) {
17443 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17444 } else {
17445 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17446 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17447 }
17448
17449 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17450 disposition |= VM_PAGE_QUERY_PAGE_REF;
17451 }
17452
17453 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17454 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17455 }
17456
17457 if (m->vmp_cs_validated) {
17458 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17459 }
17460 if (m->vmp_cs_tainted) {
17461 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17462 }
17463 if (m->vmp_cs_nx) {
17464 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17465 }
17466 if (m->vmp_reusable || curr_object->all_reusable) {
17467 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17468 }
17469 }
17470 }
17471
17472 switch (flavor) {
17473 case VM_PAGE_INFO_BASIC:
17474 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17475 basic_info->disposition = disposition;
17476 basic_info->ref_count = ref_count;
17477 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17478 VM_KERNEL_ADDRPERM(curr_object);
17479 basic_info->offset =
17480 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17481 basic_info->depth = depth;
17482
17483 info_idx++;
17484 break;
17485 }
17486
17487 disposition = 0;
17488 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17489
17490 /*
17491 * Move to next offset in the range and in our object.
17492 */
17493 curr_s_offset += PAGE_SIZE;
17494 offset_in_object += PAGE_SIZE;
17495 curr_offset_in_object = offset_in_object;
17496
17497 if (curr_object != object) {
17498 vm_object_unlock(curr_object);
17499
17500 curr_object = object;
17501
17502 vm_object_lock_shared(curr_object);
17503 } else {
17504 vm_object_lock_yield_shared(curr_object);
17505 }
17506 }
17507
17508 vm_object_unlock(curr_object);
17509 vm_object_deallocate(curr_object);
17510
17511 vm_map_lock_read(map);
17512 }
17513
17514 vm_map_unlock_read(map);
17515 return retval;
17516 }
17517
17518 /*
17519 * vm_map_msync
17520 *
17521 * Synchronises the memory range specified with its backing store
17522 * image by either flushing or cleaning the contents to the appropriate
17523 * memory manager engaging in a memory object synchronize dialog with
17524 * the manager. The client doesn't return until the manager issues
17525 * m_o_s_completed message. MIG Magically converts user task parameter
17526 * to the task's address map.
17527 *
17528 * interpretation of sync_flags
17529 * VM_SYNC_INVALIDATE - discard pages, only return precious
17530 * pages to manager.
17531 *
17532 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17533 * - discard pages, write dirty or precious
17534 * pages back to memory manager.
17535 *
17536 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17537 * - write dirty or precious pages back to
17538 * the memory manager.
17539 *
17540 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17541 * is a hole in the region, and we would
17542 * have returned KERN_SUCCESS, return
17543 * KERN_INVALID_ADDRESS instead.
17544 *
17545 * NOTE
17546 * The memory object attributes have not yet been implemented, this
17547 * function will have to deal with the invalidate attribute
17548 *
17549 * RETURNS
17550 * KERN_INVALID_TASK Bad task parameter
17551 * KERN_INVALID_ARGUMENT both sync and async were specified.
17552 * KERN_SUCCESS The usual.
17553 * KERN_INVALID_ADDRESS There was a hole in the region.
17554 */
17555
17556 kern_return_t
17557 vm_map_msync(
17558 vm_map_t map,
17559 vm_map_address_t address,
17560 vm_map_size_t size,
17561 vm_sync_t sync_flags)
17562 {
17563 vm_map_entry_t entry;
17564 vm_map_size_t amount_left;
17565 vm_object_offset_t offset;
17566 boolean_t do_sync_req;
17567 boolean_t had_hole = FALSE;
17568 vm_map_offset_t pmap_offset;
17569
17570 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17571 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17572 return KERN_INVALID_ARGUMENT;
17573 }
17574
17575 /*
17576 * align address and size on page boundaries
17577 */
17578 size = (vm_map_round_page(address + size,
17579 VM_MAP_PAGE_MASK(map)) -
17580 vm_map_trunc_page(address,
17581 VM_MAP_PAGE_MASK(map)));
17582 address = vm_map_trunc_page(address,
17583 VM_MAP_PAGE_MASK(map));
17584
17585 if (map == VM_MAP_NULL) {
17586 return KERN_INVALID_TASK;
17587 }
17588
17589 if (size == 0) {
17590 return KERN_SUCCESS;
17591 }
17592
17593 amount_left = size;
17594
17595 while (amount_left > 0) {
17596 vm_object_size_t flush_size;
17597 vm_object_t object;
17598
17599 vm_map_lock(map);
17600 if (!vm_map_lookup_entry(map,
17601 address,
17602 &entry)) {
17603 vm_map_size_t skip;
17604
17605 /*
17606 * hole in the address map.
17607 */
17608 had_hole = TRUE;
17609
17610 if (sync_flags & VM_SYNC_KILLPAGES) {
17611 /*
17612 * For VM_SYNC_KILLPAGES, there should be
17613 * no holes in the range, since we couldn't
17614 * prevent someone else from allocating in
17615 * that hole and we wouldn't want to "kill"
17616 * their pages.
17617 */
17618 vm_map_unlock(map);
17619 break;
17620 }
17621
17622 /*
17623 * Check for empty map.
17624 */
17625 if (entry == vm_map_to_entry(map) &&
17626 entry->vme_next == entry) {
17627 vm_map_unlock(map);
17628 break;
17629 }
17630 /*
17631 * Check that we don't wrap and that
17632 * we have at least one real map entry.
17633 */
17634 if ((map->hdr.nentries == 0) ||
17635 (entry->vme_next->vme_start < address)) {
17636 vm_map_unlock(map);
17637 break;
17638 }
17639 /*
17640 * Move up to the next entry if needed
17641 */
17642 skip = (entry->vme_next->vme_start - address);
17643 if (skip >= amount_left) {
17644 amount_left = 0;
17645 } else {
17646 amount_left -= skip;
17647 }
17648 address = entry->vme_next->vme_start;
17649 vm_map_unlock(map);
17650 continue;
17651 }
17652
17653 offset = address - entry->vme_start;
17654 pmap_offset = address;
17655
17656 /*
17657 * do we have more to flush than is contained in this
17658 * entry ?
17659 */
17660 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17661 flush_size = entry->vme_end -
17662 (entry->vme_start + offset);
17663 } else {
17664 flush_size = amount_left;
17665 }
17666 amount_left -= flush_size;
17667 address += flush_size;
17668
17669 if (entry->is_sub_map == TRUE) {
17670 vm_map_t local_map;
17671 vm_map_offset_t local_offset;
17672
17673 local_map = VME_SUBMAP(entry);
17674 local_offset = VME_OFFSET(entry);
17675 vm_map_unlock(map);
17676 if (vm_map_msync(
17677 local_map,
17678 local_offset,
17679 flush_size,
17680 sync_flags) == KERN_INVALID_ADDRESS) {
17681 had_hole = TRUE;
17682 }
17683 continue;
17684 }
17685 object = VME_OBJECT(entry);
17686
17687 /*
17688 * We can't sync this object if the object has not been
17689 * created yet
17690 */
17691 if (object == VM_OBJECT_NULL) {
17692 vm_map_unlock(map);
17693 continue;
17694 }
17695 offset += VME_OFFSET(entry);
17696
17697 vm_object_lock(object);
17698
17699 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17700 int kill_pages = 0;
17701 boolean_t reusable_pages = FALSE;
17702
17703 if (sync_flags & VM_SYNC_KILLPAGES) {
17704 if (((object->ref_count == 1) ||
17705 ((object->copy_strategy !=
17706 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17707 (object->copy == VM_OBJECT_NULL))) &&
17708 (object->shadow == VM_OBJECT_NULL)) {
17709 if (object->ref_count != 1) {
17710 vm_page_stats_reusable.free_shared++;
17711 }
17712 kill_pages = 1;
17713 } else {
17714 kill_pages = -1;
17715 }
17716 }
17717 if (kill_pages != -1) {
17718 vm_object_deactivate_pages(
17719 object,
17720 offset,
17721 (vm_object_size_t) flush_size,
17722 kill_pages,
17723 reusable_pages,
17724 map->pmap,
17725 pmap_offset);
17726 }
17727 vm_object_unlock(object);
17728 vm_map_unlock(map);
17729 continue;
17730 }
17731 /*
17732 * We can't sync this object if there isn't a pager.
17733 * Don't bother to sync internal objects, since there can't
17734 * be any "permanent" storage for these objects anyway.
17735 */
17736 if ((object->pager == MEMORY_OBJECT_NULL) ||
17737 (object->internal) || (object->private)) {
17738 vm_object_unlock(object);
17739 vm_map_unlock(map);
17740 continue;
17741 }
17742 /*
17743 * keep reference on the object until syncing is done
17744 */
17745 vm_object_reference_locked(object);
17746 vm_object_unlock(object);
17747
17748 vm_map_unlock(map);
17749
17750 do_sync_req = vm_object_sync(object,
17751 offset,
17752 flush_size,
17753 sync_flags & VM_SYNC_INVALIDATE,
17754 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17755 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17756 sync_flags & VM_SYNC_SYNCHRONOUS);
17757
17758 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17759 /*
17760 * clear out the clustering and read-ahead hints
17761 */
17762 vm_object_lock(object);
17763
17764 object->pages_created = 0;
17765 object->pages_used = 0;
17766 object->sequential = 0;
17767 object->last_alloc = 0;
17768
17769 vm_object_unlock(object);
17770 }
17771 vm_object_deallocate(object);
17772 } /* while */
17773
17774 /* for proper msync() behaviour */
17775 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17776 return KERN_INVALID_ADDRESS;
17777 }
17778
17779 return KERN_SUCCESS;
17780 }/* vm_msync */
17781
17782 /*
17783 * Routine: convert_port_entry_to_map
17784 * Purpose:
17785 * Convert from a port specifying an entry or a task
17786 * to a map. Doesn't consume the port ref; produces a map ref,
17787 * which may be null. Unlike convert_port_to_map, the
17788 * port may be task or a named entry backed.
17789 * Conditions:
17790 * Nothing locked.
17791 */
17792
17793
17794 vm_map_t
17795 convert_port_entry_to_map(
17796 ipc_port_t port)
17797 {
17798 vm_map_t map;
17799 vm_named_entry_t named_entry;
17800 uint32_t try_failed_count = 0;
17801
17802 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17803 while (TRUE) {
17804 ip_lock(port);
17805 if (ip_active(port) && (ip_kotype(port)
17806 == IKOT_NAMED_ENTRY)) {
17807 named_entry =
17808 (vm_named_entry_t)port->ip_kobject;
17809 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17810 ip_unlock(port);
17811
17812 try_failed_count++;
17813 mutex_pause(try_failed_count);
17814 continue;
17815 }
17816 named_entry->ref_count++;
17817 lck_mtx_unlock(&(named_entry)->Lock);
17818 ip_unlock(port);
17819 if ((named_entry->is_sub_map) &&
17820 (named_entry->protection
17821 & VM_PROT_WRITE)) {
17822 map = named_entry->backing.map;
17823 } else {
17824 mach_destroy_memory_entry(port);
17825 return VM_MAP_NULL;
17826 }
17827 vm_map_reference_swap(map);
17828 mach_destroy_memory_entry(port);
17829 break;
17830 } else {
17831 return VM_MAP_NULL;
17832 }
17833 }
17834 } else {
17835 map = convert_port_to_map(port);
17836 }
17837
17838 return map;
17839 }
17840
17841 /*
17842 * Routine: convert_port_entry_to_object
17843 * Purpose:
17844 * Convert from a port specifying a named entry to an
17845 * object. Doesn't consume the port ref; produces a map ref,
17846 * which may be null.
17847 * Conditions:
17848 * Nothing locked.
17849 */
17850
17851
17852 vm_object_t
17853 convert_port_entry_to_object(
17854 ipc_port_t port)
17855 {
17856 vm_object_t object = VM_OBJECT_NULL;
17857 vm_named_entry_t named_entry;
17858 uint32_t try_failed_count = 0;
17859
17860 if (IP_VALID(port) &&
17861 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17862 try_again:
17863 ip_lock(port);
17864 if (ip_active(port) &&
17865 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17866 named_entry = (vm_named_entry_t)port->ip_kobject;
17867 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17868 ip_unlock(port);
17869 try_failed_count++;
17870 mutex_pause(try_failed_count);
17871 goto try_again;
17872 }
17873 named_entry->ref_count++;
17874 lck_mtx_unlock(&(named_entry)->Lock);
17875 ip_unlock(port);
17876 if (!(named_entry->is_sub_map) &&
17877 !(named_entry->is_copy) &&
17878 (named_entry->protection & VM_PROT_WRITE)) {
17879 object = named_entry->backing.object;
17880 vm_object_reference(object);
17881 }
17882 mach_destroy_memory_entry(port);
17883 }
17884 }
17885
17886 return object;
17887 }
17888
17889 /*
17890 * Export routines to other components for the things we access locally through
17891 * macros.
17892 */
17893 #undef current_map
17894 vm_map_t
17895 current_map(void)
17896 {
17897 return current_map_fast();
17898 }
17899
17900 /*
17901 * vm_map_reference:
17902 *
17903 * Most code internal to the osfmk will go through a
17904 * macro defining this. This is always here for the
17905 * use of other kernel components.
17906 */
17907 #undef vm_map_reference
17908 void
17909 vm_map_reference(
17910 vm_map_t map)
17911 {
17912 if (map == VM_MAP_NULL) {
17913 return;
17914 }
17915
17916 lck_mtx_lock(&map->s_lock);
17917 #if TASK_SWAPPER
17918 assert(map->res_count > 0);
17919 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17920 map->res_count++;
17921 #endif
17922 os_ref_retain_locked(&map->map_refcnt);
17923 lck_mtx_unlock(&map->s_lock);
17924 }
17925
17926 /*
17927 * vm_map_deallocate:
17928 *
17929 * Removes a reference from the specified map,
17930 * destroying it if no references remain.
17931 * The map should not be locked.
17932 */
17933 void
17934 vm_map_deallocate(
17935 vm_map_t map)
17936 {
17937 unsigned int ref;
17938
17939 if (map == VM_MAP_NULL) {
17940 return;
17941 }
17942
17943 lck_mtx_lock(&map->s_lock);
17944 ref = os_ref_release_locked(&map->map_refcnt);
17945 if (ref > 0) {
17946 vm_map_res_deallocate(map);
17947 lck_mtx_unlock(&map->s_lock);
17948 return;
17949 }
17950 assert(os_ref_get_count(&map->map_refcnt) == 0);
17951 lck_mtx_unlock(&map->s_lock);
17952
17953 #if TASK_SWAPPER
17954 /*
17955 * The map residence count isn't decremented here because
17956 * the vm_map_delete below will traverse the entire map,
17957 * deleting entries, and the residence counts on objects
17958 * and sharing maps will go away then.
17959 */
17960 #endif
17961
17962 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17963 }
17964
17965
17966 void
17967 vm_map_disable_NX(vm_map_t map)
17968 {
17969 if (map == NULL) {
17970 return;
17971 }
17972 if (map->pmap == NULL) {
17973 return;
17974 }
17975
17976 pmap_disable_NX(map->pmap);
17977 }
17978
17979 void
17980 vm_map_disallow_data_exec(vm_map_t map)
17981 {
17982 if (map == NULL) {
17983 return;
17984 }
17985
17986 map->map_disallow_data_exec = TRUE;
17987 }
17988
17989 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17990 * more descriptive.
17991 */
17992 void
17993 vm_map_set_32bit(vm_map_t map)
17994 {
17995 #if defined(__arm__) || defined(__arm64__)
17996 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17997 #else
17998 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17999 #endif
18000 }
18001
18002
18003 void
18004 vm_map_set_64bit(vm_map_t map)
18005 {
18006 #if defined(__arm__) || defined(__arm64__)
18007 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18008 #else
18009 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18010 #endif
18011 }
18012
18013 /*
18014 * Expand the maximum size of an existing map to the maximum supported.
18015 */
18016 void
18017 vm_map_set_jumbo(vm_map_t map)
18018 {
18019 #if defined (__arm64__)
18020 vm_map_set_max_addr(map, ~0);
18021 #else /* arm64 */
18022 (void) map;
18023 #endif
18024 }
18025
18026 /*
18027 * This map has a JIT entitlement
18028 */
18029 void
18030 vm_map_set_jit_entitled(vm_map_t map)
18031 {
18032 #if defined (__arm64__)
18033 pmap_set_jit_entitled(map->pmap);
18034 #else /* arm64 */
18035 (void) map;
18036 #endif
18037 }
18038
18039 /*
18040 * Expand the maximum size of an existing map.
18041 */
18042 void
18043 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18044 {
18045 #if defined(__arm64__)
18046 vm_map_offset_t max_supported_offset = 0;
18047 vm_map_offset_t old_max_offset = map->max_offset;
18048 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18049
18050 new_max_offset = trunc_page(new_max_offset);
18051
18052 /* The address space cannot be shrunk using this routine. */
18053 if (old_max_offset >= new_max_offset) {
18054 return;
18055 }
18056
18057 if (max_supported_offset < new_max_offset) {
18058 new_max_offset = max_supported_offset;
18059 }
18060
18061 map->max_offset = new_max_offset;
18062
18063 if (map->holes_list->prev->vme_end == old_max_offset) {
18064 /*
18065 * There is already a hole at the end of the map; simply make it bigger.
18066 */
18067 map->holes_list->prev->vme_end = map->max_offset;
18068 } else {
18069 /*
18070 * There is no hole at the end, so we need to create a new hole
18071 * for the new empty space we're creating.
18072 */
18073 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18074 new_hole->start = old_max_offset;
18075 new_hole->end = map->max_offset;
18076 new_hole->prev = map->holes_list->prev;
18077 new_hole->next = (struct vm_map_entry *)map->holes_list;
18078 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18079 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18080 }
18081 #else
18082 (void)map;
18083 (void)new_max_offset;
18084 #endif
18085 }
18086
18087 vm_map_offset_t
18088 vm_compute_max_offset(boolean_t is64)
18089 {
18090 #if defined(__arm__) || defined(__arm64__)
18091 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18092 #else
18093 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18094 #endif
18095 }
18096
18097 void
18098 vm_map_get_max_aslr_slide_section(
18099 vm_map_t map __unused,
18100 int64_t *max_sections,
18101 int64_t *section_size)
18102 {
18103 #if defined(__arm64__)
18104 *max_sections = 3;
18105 *section_size = ARM_TT_TWIG_SIZE;
18106 #else
18107 *max_sections = 1;
18108 *section_size = 0;
18109 #endif
18110 }
18111
18112 uint64_t
18113 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18114 {
18115 #if defined(__arm64__)
18116 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18117 * limited embedded address space; this is also meant to minimize pmap
18118 * memory usage on 16KB page systems.
18119 */
18120 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18121 #else
18122 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18123 #endif
18124 }
18125
18126 uint64_t
18127 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18128 {
18129 #if defined(__arm64__)
18130 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18131 * of independent entropy on 16KB page systems.
18132 */
18133 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18134 #else
18135 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18136 #endif
18137 }
18138
18139 #ifndef __arm__
18140 boolean_t
18141 vm_map_is_64bit(
18142 vm_map_t map)
18143 {
18144 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18145 }
18146 #endif
18147
18148 boolean_t
18149 vm_map_has_hard_pagezero(
18150 vm_map_t map,
18151 vm_map_offset_t pagezero_size)
18152 {
18153 /*
18154 * XXX FBDP
18155 * We should lock the VM map (for read) here but we can get away
18156 * with it for now because there can't really be any race condition:
18157 * the VM map's min_offset is changed only when the VM map is created
18158 * and when the zero page is established (when the binary gets loaded),
18159 * and this routine gets called only when the task terminates and the
18160 * VM map is being torn down, and when a new map is created via
18161 * load_machfile()/execve().
18162 */
18163 return map->min_offset >= pagezero_size;
18164 }
18165
18166 /*
18167 * Raise a VM map's maximun offset.
18168 */
18169 kern_return_t
18170 vm_map_raise_max_offset(
18171 vm_map_t map,
18172 vm_map_offset_t new_max_offset)
18173 {
18174 kern_return_t ret;
18175
18176 vm_map_lock(map);
18177 ret = KERN_INVALID_ADDRESS;
18178
18179 if (new_max_offset >= map->max_offset) {
18180 if (!vm_map_is_64bit(map)) {
18181 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18182 map->max_offset = new_max_offset;
18183 ret = KERN_SUCCESS;
18184 }
18185 } else {
18186 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18187 map->max_offset = new_max_offset;
18188 ret = KERN_SUCCESS;
18189 }
18190 }
18191 }
18192
18193 vm_map_unlock(map);
18194 return ret;
18195 }
18196
18197
18198 /*
18199 * Raise a VM map's minimum offset.
18200 * To strictly enforce "page zero" reservation.
18201 */
18202 kern_return_t
18203 vm_map_raise_min_offset(
18204 vm_map_t map,
18205 vm_map_offset_t new_min_offset)
18206 {
18207 vm_map_entry_t first_entry;
18208
18209 new_min_offset = vm_map_round_page(new_min_offset,
18210 VM_MAP_PAGE_MASK(map));
18211
18212 vm_map_lock(map);
18213
18214 if (new_min_offset < map->min_offset) {
18215 /*
18216 * Can't move min_offset backwards, as that would expose
18217 * a part of the address space that was previously, and for
18218 * possibly good reasons, inaccessible.
18219 */
18220 vm_map_unlock(map);
18221 return KERN_INVALID_ADDRESS;
18222 }
18223 if (new_min_offset >= map->max_offset) {
18224 /* can't go beyond the end of the address space */
18225 vm_map_unlock(map);
18226 return KERN_INVALID_ADDRESS;
18227 }
18228
18229 first_entry = vm_map_first_entry(map);
18230 if (first_entry != vm_map_to_entry(map) &&
18231 first_entry->vme_start < new_min_offset) {
18232 /*
18233 * Some memory was already allocated below the new
18234 * minimun offset. It's too late to change it now...
18235 */
18236 vm_map_unlock(map);
18237 return KERN_NO_SPACE;
18238 }
18239
18240 map->min_offset = new_min_offset;
18241
18242 assert(map->holes_list);
18243 map->holes_list->start = new_min_offset;
18244 assert(new_min_offset < map->holes_list->end);
18245
18246 vm_map_unlock(map);
18247
18248 return KERN_SUCCESS;
18249 }
18250
18251 /*
18252 * Set the limit on the maximum amount of user wired memory allowed for this map.
18253 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18254 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18255 * don't have to reach over to the BSD data structures.
18256 */
18257
18258 void
18259 vm_map_set_user_wire_limit(vm_map_t map,
18260 vm_size_t limit)
18261 {
18262 map->user_wire_limit = limit;
18263 }
18264
18265
18266 void
18267 vm_map_switch_protect(vm_map_t map,
18268 boolean_t val)
18269 {
18270 vm_map_lock(map);
18271 map->switch_protect = val;
18272 vm_map_unlock(map);
18273 }
18274
18275 /*
18276 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18277 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18278 * bump both counters.
18279 */
18280 void
18281 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18282 {
18283 pmap_t pmap = vm_map_pmap(map);
18284
18285 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18286 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18287 }
18288
18289 void
18290 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18291 {
18292 pmap_t pmap = vm_map_pmap(map);
18293
18294 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18295 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18296 }
18297
18298 /* Add (generate) code signature for memory range */
18299 #if CONFIG_DYNAMIC_CODE_SIGNING
18300 kern_return_t
18301 vm_map_sign(vm_map_t map,
18302 vm_map_offset_t start,
18303 vm_map_offset_t end)
18304 {
18305 vm_map_entry_t entry;
18306 vm_page_t m;
18307 vm_object_t object;
18308
18309 /*
18310 * Vet all the input parameters and current type and state of the
18311 * underlaying object. Return with an error if anything is amiss.
18312 */
18313 if (map == VM_MAP_NULL) {
18314 return KERN_INVALID_ARGUMENT;
18315 }
18316
18317 vm_map_lock_read(map);
18318
18319 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18320 /*
18321 * Must pass a valid non-submap address.
18322 */
18323 vm_map_unlock_read(map);
18324 return KERN_INVALID_ADDRESS;
18325 }
18326
18327 if ((entry->vme_start > start) || (entry->vme_end < end)) {
18328 /*
18329 * Map entry doesn't cover the requested range. Not handling
18330 * this situation currently.
18331 */
18332 vm_map_unlock_read(map);
18333 return KERN_INVALID_ARGUMENT;
18334 }
18335
18336 object = VME_OBJECT(entry);
18337 if (object == VM_OBJECT_NULL) {
18338 /*
18339 * Object must already be present or we can't sign.
18340 */
18341 vm_map_unlock_read(map);
18342 return KERN_INVALID_ARGUMENT;
18343 }
18344
18345 vm_object_lock(object);
18346 vm_map_unlock_read(map);
18347
18348 while (start < end) {
18349 uint32_t refmod;
18350
18351 m = vm_page_lookup(object,
18352 start - entry->vme_start + VME_OFFSET(entry));
18353 if (m == VM_PAGE_NULL) {
18354 /* shoud we try to fault a page here? we can probably
18355 * demand it exists and is locked for this request */
18356 vm_object_unlock(object);
18357 return KERN_FAILURE;
18358 }
18359 /* deal with special page status */
18360 if (m->vmp_busy ||
18361 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18362 vm_object_unlock(object);
18363 return KERN_FAILURE;
18364 }
18365
18366 /* Page is OK... now "validate" it */
18367 /* This is the place where we'll call out to create a code
18368 * directory, later */
18369 m->vmp_cs_validated = TRUE;
18370
18371 /* The page is now "clean" for codesigning purposes. That means
18372 * we don't consider it as modified (wpmapped) anymore. But
18373 * we'll disconnect the page so we note any future modification
18374 * attempts. */
18375 m->vmp_wpmapped = FALSE;
18376 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18377
18378 /* Pull the dirty status from the pmap, since we cleared the
18379 * wpmapped bit */
18380 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18381 SET_PAGE_DIRTY(m, FALSE);
18382 }
18383
18384 /* On to the next page */
18385 start += PAGE_SIZE;
18386 }
18387 vm_object_unlock(object);
18388
18389 return KERN_SUCCESS;
18390 }
18391 #endif
18392
18393 kern_return_t
18394 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18395 {
18396 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
18397 vm_map_entry_t next_entry;
18398 kern_return_t kr = KERN_SUCCESS;
18399 vm_map_t zap_map;
18400
18401 vm_map_lock(map);
18402
18403 /*
18404 * We use a "zap_map" to avoid having to unlock
18405 * the "map" in vm_map_delete().
18406 */
18407 zap_map = vm_map_create(PMAP_NULL,
18408 map->min_offset,
18409 map->max_offset,
18410 map->hdr.entries_pageable);
18411
18412 if (zap_map == VM_MAP_NULL) {
18413 return KERN_RESOURCE_SHORTAGE;
18414 }
18415
18416 vm_map_set_page_shift(zap_map,
18417 VM_MAP_PAGE_SHIFT(map));
18418 vm_map_disable_hole_optimization(zap_map);
18419
18420 for (entry = vm_map_first_entry(map);
18421 entry != vm_map_to_entry(map);
18422 entry = next_entry) {
18423 next_entry = entry->vme_next;
18424
18425 if (VME_OBJECT(entry) &&
18426 !entry->is_sub_map &&
18427 (VME_OBJECT(entry)->internal == TRUE) &&
18428 (VME_OBJECT(entry)->ref_count == 1)) {
18429 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18430 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18431
18432 (void)vm_map_delete(map,
18433 entry->vme_start,
18434 entry->vme_end,
18435 VM_MAP_REMOVE_SAVE_ENTRIES,
18436 zap_map);
18437 }
18438 }
18439
18440 vm_map_unlock(map);
18441
18442 /*
18443 * Get rid of the "zap_maps" and all the map entries that
18444 * they may still contain.
18445 */
18446 if (zap_map != VM_MAP_NULL) {
18447 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18448 zap_map = VM_MAP_NULL;
18449 }
18450
18451 return kr;
18452 }
18453
18454
18455 #if DEVELOPMENT || DEBUG
18456
18457 int
18458 vm_map_disconnect_page_mappings(
18459 vm_map_t map,
18460 boolean_t do_unnest)
18461 {
18462 vm_map_entry_t entry;
18463 int page_count = 0;
18464
18465 if (do_unnest == TRUE) {
18466 #ifndef NO_NESTED_PMAP
18467 vm_map_lock(map);
18468
18469 for (entry = vm_map_first_entry(map);
18470 entry != vm_map_to_entry(map);
18471 entry = entry->vme_next) {
18472 if (entry->is_sub_map && entry->use_pmap) {
18473 /*
18474 * Make sure the range between the start of this entry and
18475 * the end of this entry is no longer nested, so that
18476 * we will only remove mappings from the pmap in use by this
18477 * this task
18478 */
18479 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18480 }
18481 }
18482 vm_map_unlock(map);
18483 #endif
18484 }
18485 vm_map_lock_read(map);
18486
18487 page_count = map->pmap->stats.resident_count;
18488
18489 for (entry = vm_map_first_entry(map);
18490 entry != vm_map_to_entry(map);
18491 entry = entry->vme_next) {
18492 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18493 (VME_OBJECT(entry)->phys_contiguous))) {
18494 continue;
18495 }
18496 if (entry->is_sub_map) {
18497 assert(!entry->use_pmap);
18498 }
18499
18500 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18501 }
18502 vm_map_unlock_read(map);
18503
18504 return page_count;
18505 }
18506
18507 #endif
18508
18509
18510 #if CONFIG_FREEZE
18511
18512
18513 int c_freezer_swapout_page_count;
18514 int c_freezer_compression_count = 0;
18515 AbsoluteTime c_freezer_last_yield_ts = 0;
18516
18517 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18518 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18519
18520 kern_return_t
18521 vm_map_freeze(
18522 task_t task,
18523 unsigned int *purgeable_count,
18524 unsigned int *wired_count,
18525 unsigned int *clean_count,
18526 unsigned int *dirty_count,
18527 unsigned int dirty_budget,
18528 unsigned int *shared_count,
18529 int *freezer_error_code,
18530 boolean_t eval_only)
18531 {
18532 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18533 kern_return_t kr = KERN_SUCCESS;
18534 boolean_t evaluation_phase = TRUE;
18535 vm_object_t cur_shared_object = NULL;
18536 int cur_shared_obj_ref_cnt = 0;
18537 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18538
18539 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18540
18541 /*
18542 * We need the exclusive lock here so that we can
18543 * block any page faults or lookups while we are
18544 * in the middle of freezing this vm map.
18545 */
18546 vm_map_t map = task->map;
18547
18548 vm_map_lock(map);
18549
18550 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18551
18552 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18553 if (vm_compressor_low_on_space()) {
18554 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18555 }
18556
18557 if (vm_swap_low_on_space()) {
18558 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18559 }
18560
18561 kr = KERN_NO_SPACE;
18562 goto done;
18563 }
18564
18565 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18566 /*
18567 * In-memory compressor backing the freezer. No disk.
18568 * So no need to do the evaluation phase.
18569 */
18570 evaluation_phase = FALSE;
18571
18572 if (eval_only == TRUE) {
18573 /*
18574 * We don't support 'eval_only' mode
18575 * in this non-swap config.
18576 */
18577 *freezer_error_code = FREEZER_ERROR_GENERIC;
18578 kr = KERN_INVALID_ARGUMENT;
18579 goto done;
18580 }
18581
18582 c_freezer_compression_count = 0;
18583 clock_get_uptime(&c_freezer_last_yield_ts);
18584 }
18585 again:
18586
18587 for (entry2 = vm_map_first_entry(map);
18588 entry2 != vm_map_to_entry(map);
18589 entry2 = entry2->vme_next) {
18590 vm_object_t src_object = VME_OBJECT(entry2);
18591
18592 if (src_object &&
18593 !entry2->is_sub_map &&
18594 !src_object->phys_contiguous) {
18595 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18596
18597 if (src_object->internal == TRUE) {
18598 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18599 /*
18600 * We skip purgeable objects during evaluation phase only.
18601 * If we decide to freeze this process, we'll explicitly
18602 * purge these objects before we go around again with
18603 * 'evaluation_phase' set to FALSE.
18604 */
18605
18606 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18607 /*
18608 * We want to purge objects that may not belong to this task but are mapped
18609 * in this task alone. Since we already purged this task's purgeable memory
18610 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18611 * on this task's purgeable objects. Hence the check for only volatile objects.
18612 */
18613 if (evaluation_phase == FALSE &&
18614 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18615 (src_object->ref_count == 1)) {
18616 vm_object_lock(src_object);
18617 vm_object_purge(src_object, 0);
18618 vm_object_unlock(src_object);
18619 }
18620 continue;
18621 }
18622
18623 /*
18624 * Pages belonging to this object could be swapped to disk.
18625 * Make sure it's not a shared object because we could end
18626 * up just bringing it back in again.
18627 *
18628 * We try to optimize somewhat by checking for objects that are mapped
18629 * more than once within our own map. But we don't do full searches,
18630 * we just look at the entries following our current entry.
18631 */
18632
18633 if (src_object->ref_count > 1) {
18634 if (src_object != cur_shared_object) {
18635 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18636 dirty_shared_count += obj_pages_snapshot;
18637
18638 cur_shared_object = src_object;
18639 cur_shared_obj_ref_cnt = 1;
18640 continue;
18641 } else {
18642 cur_shared_obj_ref_cnt++;
18643 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18644 /*
18645 * Fall through to below and treat this object as private.
18646 * So deduct its pages from our shared total and add it to the
18647 * private total.
18648 */
18649
18650 dirty_shared_count -= obj_pages_snapshot;
18651 dirty_private_count += obj_pages_snapshot;
18652 } else {
18653 continue;
18654 }
18655 }
18656 }
18657
18658
18659 if (src_object->ref_count == 1) {
18660 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18661 }
18662
18663 if (evaluation_phase == TRUE) {
18664 continue;
18665 }
18666 }
18667
18668 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18669 *wired_count += src_object->wired_page_count;
18670
18671 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18672 if (vm_compressor_low_on_space()) {
18673 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18674 }
18675
18676 if (vm_swap_low_on_space()) {
18677 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18678 }
18679
18680 kr = KERN_NO_SPACE;
18681 break;
18682 }
18683 if (paged_out_count >= dirty_budget) {
18684 break;
18685 }
18686 dirty_budget -= paged_out_count;
18687 }
18688 }
18689 }
18690
18691 if (evaluation_phase) {
18692 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18693
18694 if (dirty_shared_count > shared_pages_threshold) {
18695 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18696 kr = KERN_FAILURE;
18697 goto done;
18698 }
18699
18700 if (dirty_shared_count &&
18701 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18702 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18703 kr = KERN_FAILURE;
18704 goto done;
18705 }
18706
18707 evaluation_phase = FALSE;
18708 dirty_shared_count = dirty_private_count = 0;
18709
18710 c_freezer_compression_count = 0;
18711 clock_get_uptime(&c_freezer_last_yield_ts);
18712
18713 if (eval_only) {
18714 kr = KERN_SUCCESS;
18715 goto done;
18716 }
18717
18718 vm_purgeable_purge_task_owned(task);
18719
18720 goto again;
18721 } else {
18722 kr = KERN_SUCCESS;
18723 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18724 }
18725
18726 done:
18727 vm_map_unlock(map);
18728
18729 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18730 vm_object_compressed_freezer_done();
18731
18732 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18733 /*
18734 * reset the counter tracking the # of swapped compressed pages
18735 * because we are now done with this freeze session and task.
18736 */
18737
18738 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18739 c_freezer_swapout_page_count = 0;
18740 }
18741 }
18742 return kr;
18743 }
18744
18745 #endif
18746
18747 /*
18748 * vm_map_entry_should_cow_for_true_share:
18749 *
18750 * Determines if the map entry should be clipped and setup for copy-on-write
18751 * to avoid applying "true_share" to a large VM object when only a subset is
18752 * targeted.
18753 *
18754 * For now, we target only the map entries created for the Objective C
18755 * Garbage Collector, which initially have the following properties:
18756 * - alias == VM_MEMORY_MALLOC
18757 * - wired_count == 0
18758 * - !needs_copy
18759 * and a VM object with:
18760 * - internal
18761 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18762 * - !true_share
18763 * - vo_size == ANON_CHUNK_SIZE
18764 *
18765 * Only non-kernel map entries.
18766 */
18767 boolean_t
18768 vm_map_entry_should_cow_for_true_share(
18769 vm_map_entry_t entry)
18770 {
18771 vm_object_t object;
18772
18773 if (entry->is_sub_map) {
18774 /* entry does not point at a VM object */
18775 return FALSE;
18776 }
18777
18778 if (entry->needs_copy) {
18779 /* already set for copy_on_write: done! */
18780 return FALSE;
18781 }
18782
18783 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18784 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18785 /* not a malloc heap or Obj-C Garbage Collector heap */
18786 return FALSE;
18787 }
18788
18789 if (entry->wired_count) {
18790 /* wired: can't change the map entry... */
18791 vm_counters.should_cow_but_wired++;
18792 return FALSE;
18793 }
18794
18795 object = VME_OBJECT(entry);
18796
18797 if (object == VM_OBJECT_NULL) {
18798 /* no object yet... */
18799 return FALSE;
18800 }
18801
18802 if (!object->internal) {
18803 /* not an internal object */
18804 return FALSE;
18805 }
18806
18807 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18808 /* not the default copy strategy */
18809 return FALSE;
18810 }
18811
18812 if (object->true_share) {
18813 /* already true_share: too late to avoid it */
18814 return FALSE;
18815 }
18816
18817 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18818 object->vo_size != ANON_CHUNK_SIZE) {
18819 /* ... not an object created for the ObjC Garbage Collector */
18820 return FALSE;
18821 }
18822
18823 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18824 object->vo_size != 2048 * 4096) {
18825 /* ... not a "MALLOC_SMALL" heap */
18826 return FALSE;
18827 }
18828
18829 /*
18830 * All the criteria match: we have a large object being targeted for "true_share".
18831 * To limit the adverse side-effects linked with "true_share", tell the caller to
18832 * try and avoid setting up the entire object for "true_share" by clipping the
18833 * targeted range and setting it up for copy-on-write.
18834 */
18835 return TRUE;
18836 }
18837
18838 vm_map_offset_t
18839 vm_map_round_page_mask(
18840 vm_map_offset_t offset,
18841 vm_map_offset_t mask)
18842 {
18843 return VM_MAP_ROUND_PAGE(offset, mask);
18844 }
18845
18846 vm_map_offset_t
18847 vm_map_trunc_page_mask(
18848 vm_map_offset_t offset,
18849 vm_map_offset_t mask)
18850 {
18851 return VM_MAP_TRUNC_PAGE(offset, mask);
18852 }
18853
18854 boolean_t
18855 vm_map_page_aligned(
18856 vm_map_offset_t offset,
18857 vm_map_offset_t mask)
18858 {
18859 return ((offset) & mask) == 0;
18860 }
18861
18862 int
18863 vm_map_page_shift(
18864 vm_map_t map)
18865 {
18866 return VM_MAP_PAGE_SHIFT(map);
18867 }
18868
18869 int
18870 vm_map_page_size(
18871 vm_map_t map)
18872 {
18873 return VM_MAP_PAGE_SIZE(map);
18874 }
18875
18876 vm_map_offset_t
18877 vm_map_page_mask(
18878 vm_map_t map)
18879 {
18880 return VM_MAP_PAGE_MASK(map);
18881 }
18882
18883 kern_return_t
18884 vm_map_set_page_shift(
18885 vm_map_t map,
18886 int pageshift)
18887 {
18888 if (map->hdr.nentries != 0) {
18889 /* too late to change page size */
18890 return KERN_FAILURE;
18891 }
18892
18893 map->hdr.page_shift = pageshift;
18894
18895 return KERN_SUCCESS;
18896 }
18897
18898 kern_return_t
18899 vm_map_query_volatile(
18900 vm_map_t map,
18901 mach_vm_size_t *volatile_virtual_size_p,
18902 mach_vm_size_t *volatile_resident_size_p,
18903 mach_vm_size_t *volatile_compressed_size_p,
18904 mach_vm_size_t *volatile_pmap_size_p,
18905 mach_vm_size_t *volatile_compressed_pmap_size_p)
18906 {
18907 mach_vm_size_t volatile_virtual_size;
18908 mach_vm_size_t volatile_resident_count;
18909 mach_vm_size_t volatile_compressed_count;
18910 mach_vm_size_t volatile_pmap_count;
18911 mach_vm_size_t volatile_compressed_pmap_count;
18912 mach_vm_size_t resident_count;
18913 vm_map_entry_t entry;
18914 vm_object_t object;
18915
18916 /* map should be locked by caller */
18917
18918 volatile_virtual_size = 0;
18919 volatile_resident_count = 0;
18920 volatile_compressed_count = 0;
18921 volatile_pmap_count = 0;
18922 volatile_compressed_pmap_count = 0;
18923
18924 for (entry = vm_map_first_entry(map);
18925 entry != vm_map_to_entry(map);
18926 entry = entry->vme_next) {
18927 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
18928
18929 if (entry->is_sub_map) {
18930 continue;
18931 }
18932 if (!(entry->protection & VM_PROT_WRITE)) {
18933 continue;
18934 }
18935 object = VME_OBJECT(entry);
18936 if (object == VM_OBJECT_NULL) {
18937 continue;
18938 }
18939 if (object->purgable != VM_PURGABLE_VOLATILE &&
18940 object->purgable != VM_PURGABLE_EMPTY) {
18941 continue;
18942 }
18943 if (VME_OFFSET(entry)) {
18944 /*
18945 * If the map entry has been split and the object now
18946 * appears several times in the VM map, we don't want
18947 * to count the object's resident_page_count more than
18948 * once. We count it only for the first one, starting
18949 * at offset 0 and ignore the other VM map entries.
18950 */
18951 continue;
18952 }
18953 resident_count = object->resident_page_count;
18954 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18955 resident_count = 0;
18956 } else {
18957 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18958 }
18959
18960 volatile_virtual_size += entry->vme_end - entry->vme_start;
18961 volatile_resident_count += resident_count;
18962 if (object->pager) {
18963 volatile_compressed_count +=
18964 vm_compressor_pager_get_count(object->pager);
18965 }
18966 pmap_compressed_bytes = 0;
18967 pmap_resident_bytes =
18968 pmap_query_resident(map->pmap,
18969 entry->vme_start,
18970 entry->vme_end,
18971 &pmap_compressed_bytes);
18972 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18973 volatile_compressed_pmap_count += (pmap_compressed_bytes
18974 / PAGE_SIZE);
18975 }
18976
18977 /* map is still locked on return */
18978
18979 *volatile_virtual_size_p = volatile_virtual_size;
18980 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18981 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18982 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18983 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18984
18985 return KERN_SUCCESS;
18986 }
18987
18988 void
18989 vm_map_sizes(vm_map_t map,
18990 vm_map_size_t * psize,
18991 vm_map_size_t * pfree,
18992 vm_map_size_t * plargest_free)
18993 {
18994 vm_map_entry_t entry;
18995 vm_map_offset_t prev;
18996 vm_map_size_t free, total_free, largest_free;
18997 boolean_t end;
18998
18999 if (!map) {
19000 *psize = *pfree = *plargest_free = 0;
19001 return;
19002 }
19003 total_free = largest_free = 0;
19004
19005 vm_map_lock_read(map);
19006 if (psize) {
19007 *psize = map->max_offset - map->min_offset;
19008 }
19009
19010 prev = map->min_offset;
19011 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19012 end = (entry == vm_map_to_entry(map));
19013
19014 if (end) {
19015 free = entry->vme_end - prev;
19016 } else {
19017 free = entry->vme_start - prev;
19018 }
19019
19020 total_free += free;
19021 if (free > largest_free) {
19022 largest_free = free;
19023 }
19024
19025 if (end) {
19026 break;
19027 }
19028 prev = entry->vme_end;
19029 }
19030 vm_map_unlock_read(map);
19031 if (pfree) {
19032 *pfree = total_free;
19033 }
19034 if (plargest_free) {
19035 *plargest_free = largest_free;
19036 }
19037 }
19038
19039 #if VM_SCAN_FOR_SHADOW_CHAIN
19040 int vm_map_shadow_max(vm_map_t map);
19041 int
19042 vm_map_shadow_max(
19043 vm_map_t map)
19044 {
19045 int shadows, shadows_max;
19046 vm_map_entry_t entry;
19047 vm_object_t object, next_object;
19048
19049 if (map == NULL) {
19050 return 0;
19051 }
19052
19053 shadows_max = 0;
19054
19055 vm_map_lock_read(map);
19056
19057 for (entry = vm_map_first_entry(map);
19058 entry != vm_map_to_entry(map);
19059 entry = entry->vme_next) {
19060 if (entry->is_sub_map) {
19061 continue;
19062 }
19063 object = VME_OBJECT(entry);
19064 if (object == NULL) {
19065 continue;
19066 }
19067 vm_object_lock_shared(object);
19068 for (shadows = 0;
19069 object->shadow != NULL;
19070 shadows++, object = next_object) {
19071 next_object = object->shadow;
19072 vm_object_lock_shared(next_object);
19073 vm_object_unlock(object);
19074 }
19075 vm_object_unlock(object);
19076 if (shadows > shadows_max) {
19077 shadows_max = shadows;
19078 }
19079 }
19080
19081 vm_map_unlock_read(map);
19082
19083 return shadows_max;
19084 }
19085 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19086
19087 void
19088 vm_commit_pagezero_status(vm_map_t lmap)
19089 {
19090 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19091 }
19092
19093 #if !CONFIG_EMBEDDED
19094 void
19095 vm_map_set_high_start(
19096 vm_map_t map,
19097 vm_map_offset_t high_start)
19098 {
19099 map->vmmap_high_start = high_start;
19100 }
19101 #endif
19102
19103 #if PMAP_CS
19104 kern_return_t
19105 vm_map_entry_cs_associate(
19106 vm_map_t map,
19107 vm_map_entry_t entry,
19108 vm_map_kernel_flags_t vmk_flags)
19109 {
19110 vm_object_t cs_object, cs_shadow;
19111 vm_object_offset_t cs_offset;
19112 void *cs_blobs;
19113 struct vnode *cs_vnode;
19114 kern_return_t cs_ret;
19115
19116 if (map->pmap == NULL ||
19117 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19118 VME_OBJECT(entry) == VM_OBJECT_NULL ||
19119 !(entry->protection & VM_PROT_EXECUTE)) {
19120 return KERN_SUCCESS;
19121 }
19122
19123 vm_map_lock_assert_exclusive(map);
19124
19125 if (entry->used_for_jit) {
19126 cs_ret = pmap_cs_associate(map->pmap,
19127 PMAP_CS_ASSOCIATE_JIT,
19128 entry->vme_start,
19129 entry->vme_end - entry->vme_start);
19130 goto done;
19131 }
19132
19133 if (vmk_flags.vmkf_remap_prot_copy) {
19134 cs_ret = pmap_cs_associate(map->pmap,
19135 PMAP_CS_ASSOCIATE_COW,
19136 entry->vme_start,
19137 entry->vme_end - entry->vme_start);
19138 goto done;
19139 }
19140
19141 vm_object_lock_shared(VME_OBJECT(entry));
19142 cs_offset = VME_OFFSET(entry);
19143 for (cs_object = VME_OBJECT(entry);
19144 (cs_object != VM_OBJECT_NULL &&
19145 !cs_object->code_signed);
19146 cs_object = cs_shadow) {
19147 cs_shadow = cs_object->shadow;
19148 if (cs_shadow != VM_OBJECT_NULL) {
19149 cs_offset += cs_object->vo_shadow_offset;
19150 vm_object_lock_shared(cs_shadow);
19151 }
19152 vm_object_unlock(cs_object);
19153 }
19154 if (cs_object == VM_OBJECT_NULL) {
19155 return KERN_SUCCESS;
19156 }
19157
19158 cs_offset += cs_object->paging_offset;
19159 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19160 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19161 &cs_blobs);
19162 assert(cs_ret == KERN_SUCCESS);
19163 cs_ret = cs_associate_blob_with_mapping(map->pmap,
19164 entry->vme_start,
19165 (entry->vme_end -
19166 entry->vme_start),
19167 cs_offset,
19168 cs_blobs);
19169 vm_object_unlock(cs_object);
19170 cs_object = VM_OBJECT_NULL;
19171
19172 done:
19173 if (cs_ret == KERN_SUCCESS) {
19174 DTRACE_VM2(vm_map_entry_cs_associate_success,
19175 vm_map_offset_t, entry->vme_start,
19176 vm_map_offset_t, entry->vme_end);
19177 if (vm_map_executable_immutable) {
19178 /*
19179 * Prevent this executable
19180 * mapping from being unmapped
19181 * or modified.
19182 */
19183 entry->permanent = TRUE;
19184 }
19185 /*
19186 * pmap says it will validate the
19187 * code-signing validity of pages
19188 * faulted in via this mapping, so
19189 * this map entry should be marked so
19190 * that vm_fault() bypasses code-signing
19191 * validation for faults coming through
19192 * this mapping.
19193 */
19194 entry->pmap_cs_associated = TRUE;
19195 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19196 /*
19197 * pmap won't check the code-signing
19198 * validity of pages faulted in via
19199 * this mapping, so VM should keep
19200 * doing it.
19201 */
19202 DTRACE_VM3(vm_map_entry_cs_associate_off,
19203 vm_map_offset_t, entry->vme_start,
19204 vm_map_offset_t, entry->vme_end,
19205 int, cs_ret);
19206 } else {
19207 /*
19208 * A real error: do not allow
19209 * execution in this mapping.
19210 */
19211 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19212 vm_map_offset_t, entry->vme_start,
19213 vm_map_offset_t, entry->vme_end,
19214 int, cs_ret);
19215 entry->protection &= ~VM_PROT_EXECUTE;
19216 entry->max_protection &= ~VM_PROT_EXECUTE;
19217 }
19218
19219 return cs_ret;
19220 }
19221 #endif /* PMAP_CS */
19222
19223 /*
19224 * FORKED CORPSE FOOTPRINT
19225 *
19226 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19227 * empty since it never ran and never got to fault in any pages.
19228 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19229 * a forked corpse would therefore return very little information.
19230 *
19231 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19232 * to vm_map_fork() to collect footprint information from the original VM map
19233 * and its pmap, and store it in the forked corpse's VM map. That information
19234 * is stored in place of the VM map's "hole list" since we'll never need to
19235 * lookup for holes in the corpse's map.
19236 *
19237 * The corpse's footprint info looks like this:
19238 *
19239 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19240 * as follows:
19241 * +---------------------------------------+
19242 * header-> | cf_size |
19243 * +-------------------+-------------------+
19244 * | cf_last_region | cf_last_zeroes |
19245 * +-------------------+-------------------+
19246 * region1-> | cfr_vaddr |
19247 * +-------------------+-------------------+
19248 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19249 * +---------------------------------------+
19250 * | d4 | d5 | ... |
19251 * +---------------------------------------+
19252 * | ... |
19253 * +-------------------+-------------------+
19254 * | dy | dz | na | na | cfr_vaddr... | <-region2
19255 * +-------------------+-------------------+
19256 * | cfr_vaddr (ctd) | cfr_num_pages |
19257 * +---------------------------------------+
19258 * | d0 | d1 ... |
19259 * +---------------------------------------+
19260 * ...
19261 * +---------------------------------------+
19262 * last region-> | cfr_vaddr |
19263 * +---------------------------------------+
19264 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19265 * +---------------------------------------+
19266 * ...
19267 * +---------------------------------------+
19268 * | dx | dy | dz | na | na | na | na | na |
19269 * +---------------------------------------+
19270 *
19271 * where:
19272 * cf_size: total size of the buffer (rounded to page size)
19273 * cf_last_region: offset in the buffer of the last "region" sub-header
19274 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19275 * of last region
19276 * cfr_vaddr: virtual address of the start of the covered "region"
19277 * cfr_num_pages: number of pages in the covered "region"
19278 * d*: disposition of the page at that virtual address
19279 * Regions in the buffer are word-aligned.
19280 *
19281 * We estimate the size of the buffer based on the number of memory regions
19282 * and the virtual size of the address space. While copying each memory region
19283 * during vm_map_fork(), we also collect the footprint info for that region
19284 * and store it in the buffer, packing it as much as possible (coalescing
19285 * contiguous memory regions to avoid having too many region headers and
19286 * avoiding long streaks of "zero" page dispositions by splitting footprint
19287 * "regions", so the number of regions in the footprint buffer might not match
19288 * the number of memory regions in the address space.
19289 *
19290 * We also have to copy the original task's "nonvolatile" ledgers since that's
19291 * part of the footprint and will need to be reported to any tool asking for
19292 * the footprint information of the forked corpse.
19293 */
19294
19295 uint64_t vm_map_corpse_footprint_count = 0;
19296 uint64_t vm_map_corpse_footprint_size_avg = 0;
19297 uint64_t vm_map_corpse_footprint_size_max = 0;
19298 uint64_t vm_map_corpse_footprint_full = 0;
19299 uint64_t vm_map_corpse_footprint_no_buf = 0;
19300
19301 /*
19302 * vm_map_corpse_footprint_new_region:
19303 * closes the current footprint "region" and creates a new one
19304 *
19305 * Returns NULL if there's not enough space in the buffer for a new region.
19306 */
19307 static struct vm_map_corpse_footprint_region *
19308 vm_map_corpse_footprint_new_region(
19309 struct vm_map_corpse_footprint_header *footprint_header)
19310 {
19311 uintptr_t footprint_edge;
19312 uint32_t new_region_offset;
19313 struct vm_map_corpse_footprint_region *footprint_region;
19314 struct vm_map_corpse_footprint_region *new_footprint_region;
19315
19316 footprint_edge = ((uintptr_t)footprint_header +
19317 footprint_header->cf_size);
19318 footprint_region = ((struct vm_map_corpse_footprint_region *)
19319 ((char *)footprint_header +
19320 footprint_header->cf_last_region));
19321 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19322 footprint_edge);
19323
19324 /* get rid of trailing zeroes in the last region */
19325 assert(footprint_region->cfr_num_pages >=
19326 footprint_header->cf_last_zeroes);
19327 footprint_region->cfr_num_pages -=
19328 footprint_header->cf_last_zeroes;
19329 footprint_header->cf_last_zeroes = 0;
19330
19331 /* reuse this region if it's now empty */
19332 if (footprint_region->cfr_num_pages == 0) {
19333 return footprint_region;
19334 }
19335
19336 /* compute offset of new region */
19337 new_region_offset = footprint_header->cf_last_region;
19338 new_region_offset += sizeof(*footprint_region);
19339 new_region_offset += footprint_region->cfr_num_pages;
19340 new_region_offset = roundup(new_region_offset, sizeof(int));
19341
19342 /* check if we're going over the edge */
19343 if (((uintptr_t)footprint_header +
19344 new_region_offset +
19345 sizeof(*footprint_region)) >=
19346 footprint_edge) {
19347 /* over the edge: no new region */
19348 return NULL;
19349 }
19350
19351 /* adjust offset of last region in header */
19352 footprint_header->cf_last_region = new_region_offset;
19353
19354 new_footprint_region = (struct vm_map_corpse_footprint_region *)
19355 ((char *)footprint_header +
19356 footprint_header->cf_last_region);
19357 new_footprint_region->cfr_vaddr = 0;
19358 new_footprint_region->cfr_num_pages = 0;
19359 /* caller needs to initialize new region */
19360
19361 return new_footprint_region;
19362 }
19363
19364 /*
19365 * vm_map_corpse_footprint_collect:
19366 * collect footprint information for "old_entry" in "old_map" and
19367 * stores it in "new_map"'s vmmap_footprint_info.
19368 */
19369 kern_return_t
19370 vm_map_corpse_footprint_collect(
19371 vm_map_t old_map,
19372 vm_map_entry_t old_entry,
19373 vm_map_t new_map)
19374 {
19375 vm_map_offset_t va;
19376 int disp;
19377 kern_return_t kr;
19378 struct vm_map_corpse_footprint_header *footprint_header;
19379 struct vm_map_corpse_footprint_region *footprint_region;
19380 struct vm_map_corpse_footprint_region *new_footprint_region;
19381 unsigned char *next_disp_p;
19382 uintptr_t footprint_edge;
19383 uint32_t num_pages_tmp;
19384
19385 va = old_entry->vme_start;
19386
19387 vm_map_lock_assert_exclusive(old_map);
19388 vm_map_lock_assert_exclusive(new_map);
19389
19390 assert(new_map->has_corpse_footprint);
19391 assert(!old_map->has_corpse_footprint);
19392 if (!new_map->has_corpse_footprint ||
19393 old_map->has_corpse_footprint) {
19394 /*
19395 * This can only transfer footprint info from a
19396 * map with a live pmap to a map with a corpse footprint.
19397 */
19398 return KERN_NOT_SUPPORTED;
19399 }
19400
19401 if (new_map->vmmap_corpse_footprint == NULL) {
19402 vm_offset_t buf;
19403 vm_size_t buf_size;
19404
19405 buf = 0;
19406 buf_size = (sizeof(*footprint_header) +
19407 (old_map->hdr.nentries
19408 *
19409 (sizeof(*footprint_region) +
19410 +3)) /* potential alignment for each region */
19411 +
19412 ((old_map->size / PAGE_SIZE)
19413 *
19414 sizeof(char))); /* disposition for each page */
19415 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19416 buf_size = round_page(buf_size);
19417
19418 /* limit buffer to 1 page to validate overflow detection */
19419 // buf_size = PAGE_SIZE;
19420
19421 /* limit size to a somewhat sane amount */
19422 #if CONFIG_EMBEDDED
19423 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19424 #else /* CONFIG_EMBEDDED */
19425 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19426 #endif /* CONFIG_EMBEDDED */
19427 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19428 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19429 }
19430
19431 /*
19432 * Allocate the pageable buffer (with a trailing guard page).
19433 * It will be zero-filled on demand.
19434 */
19435 kr = kernel_memory_allocate(kernel_map,
19436 &buf,
19437 (buf_size
19438 + PAGE_SIZE), /* trailing guard page */
19439 0, /* mask */
19440 KMA_PAGEABLE | KMA_GUARD_LAST,
19441 VM_KERN_MEMORY_DIAG);
19442 if (kr != KERN_SUCCESS) {
19443 vm_map_corpse_footprint_no_buf++;
19444 return kr;
19445 }
19446
19447 /* initialize header and 1st region */
19448 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19449 new_map->vmmap_corpse_footprint = footprint_header;
19450
19451 footprint_header->cf_size = buf_size;
19452 footprint_header->cf_last_region =
19453 sizeof(*footprint_header);
19454 footprint_header->cf_last_zeroes = 0;
19455
19456 footprint_region = (struct vm_map_corpse_footprint_region *)
19457 ((char *)footprint_header +
19458 footprint_header->cf_last_region);
19459 footprint_region->cfr_vaddr = 0;
19460 footprint_region->cfr_num_pages = 0;
19461 } else {
19462 /* retrieve header and last region */
19463 footprint_header = (struct vm_map_corpse_footprint_header *)
19464 new_map->vmmap_corpse_footprint;
19465 footprint_region = (struct vm_map_corpse_footprint_region *)
19466 ((char *)footprint_header +
19467 footprint_header->cf_last_region);
19468 }
19469 footprint_edge = ((uintptr_t)footprint_header +
19470 footprint_header->cf_size);
19471
19472 if ((footprint_region->cfr_vaddr +
19473 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19474 PAGE_SIZE))
19475 != old_entry->vme_start) {
19476 uint64_t num_pages_delta;
19477 uint32_t region_offset_delta;
19478
19479 /*
19480 * Not the next contiguous virtual address:
19481 * start a new region or store "zero" dispositions for
19482 * the missing pages?
19483 */
19484 /* size of gap in actual page dispositions */
19485 num_pages_delta = (((old_entry->vme_start -
19486 footprint_region->cfr_vaddr) / PAGE_SIZE)
19487 - footprint_region->cfr_num_pages);
19488 /* size of gap as a new footprint region header */
19489 region_offset_delta =
19490 (sizeof(*footprint_region) +
19491 roundup((footprint_region->cfr_num_pages -
19492 footprint_header->cf_last_zeroes),
19493 sizeof(int)) -
19494 (footprint_region->cfr_num_pages -
19495 footprint_header->cf_last_zeroes));
19496 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19497 if (region_offset_delta < num_pages_delta ||
19498 os_add3_overflow(footprint_region->cfr_num_pages,
19499 (uint32_t) num_pages_delta,
19500 1,
19501 &num_pages_tmp)) {
19502 /*
19503 * Storing data for this gap would take more space
19504 * than inserting a new footprint region header:
19505 * let's start a new region and save space. If it's a
19506 * tie, let's avoid using a new region, since that
19507 * would require more region hops to find the right
19508 * range during lookups.
19509 *
19510 * If the current region's cfr_num_pages would overflow
19511 * if we added "zero" page dispositions for the gap,
19512 * no choice but to start a new region.
19513 */
19514 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19515 new_footprint_region =
19516 vm_map_corpse_footprint_new_region(footprint_header);
19517 /* check that we're not going over the edge */
19518 if (new_footprint_region == NULL) {
19519 goto over_the_edge;
19520 }
19521 footprint_region = new_footprint_region;
19522 /* initialize new region as empty */
19523 footprint_region->cfr_vaddr = old_entry->vme_start;
19524 footprint_region->cfr_num_pages = 0;
19525 } else {
19526 /*
19527 * Store "zero" page dispositions for the missing
19528 * pages.
19529 */
19530 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19531 for (; num_pages_delta > 0; num_pages_delta--) {
19532 next_disp_p =
19533 ((unsigned char *) footprint_region +
19534 sizeof(*footprint_region) +
19535 footprint_region->cfr_num_pages);
19536 /* check that we're not going over the edge */
19537 if ((uintptr_t)next_disp_p >= footprint_edge) {
19538 goto over_the_edge;
19539 }
19540 /* store "zero" disposition for this gap page */
19541 footprint_region->cfr_num_pages++;
19542 *next_disp_p = (unsigned char) 0;
19543 footprint_header->cf_last_zeroes++;
19544 }
19545 }
19546 }
19547
19548 for (va = old_entry->vme_start;
19549 va < old_entry->vme_end;
19550 va += PAGE_SIZE) {
19551 vm_object_t object;
19552
19553 object = VME_OBJECT(old_entry);
19554 if (!old_entry->is_sub_map &&
19555 old_entry->iokit_acct &&
19556 object != VM_OBJECT_NULL &&
19557 object->internal &&
19558 object->purgable == VM_PURGABLE_DENY) {
19559 /*
19560 * Non-purgeable IOKit memory: phys_footprint
19561 * includes the entire virtual mapping.
19562 * Since the forked corpse's VM map entry will not
19563 * have "iokit_acct", pretend that this page's
19564 * disposition is "present & internal", so that it
19565 * shows up in the forked corpse's footprint.
19566 */
19567 disp = (PMAP_QUERY_PAGE_PRESENT |
19568 PMAP_QUERY_PAGE_INTERNAL);
19569 } else {
19570 disp = 0;
19571 pmap_query_page_info(old_map->pmap,
19572 va,
19573 &disp);
19574 }
19575
19576 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19577
19578 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19579 /*
19580 * Ignore "zero" dispositions at start of
19581 * region: just move start of region.
19582 */
19583 footprint_region->cfr_vaddr += PAGE_SIZE;
19584 continue;
19585 }
19586
19587 /* would region's cfr_num_pages overflow? */
19588 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19589 &num_pages_tmp)) {
19590 /* overflow: create a new region */
19591 new_footprint_region =
19592 vm_map_corpse_footprint_new_region(
19593 footprint_header);
19594 if (new_footprint_region == NULL) {
19595 goto over_the_edge;
19596 }
19597 footprint_region = new_footprint_region;
19598 footprint_region->cfr_vaddr = va;
19599 footprint_region->cfr_num_pages = 0;
19600 }
19601
19602 next_disp_p = ((unsigned char *)footprint_region +
19603 sizeof(*footprint_region) +
19604 footprint_region->cfr_num_pages);
19605 /* check that we're not going over the edge */
19606 if ((uintptr_t)next_disp_p >= footprint_edge) {
19607 goto over_the_edge;
19608 }
19609 /* store this dispostion */
19610 *next_disp_p = (unsigned char) disp;
19611 footprint_region->cfr_num_pages++;
19612
19613 if (disp != 0) {
19614 /* non-zero disp: break the current zero streak */
19615 footprint_header->cf_last_zeroes = 0;
19616 /* done */
19617 continue;
19618 }
19619
19620 /* zero disp: add to the current streak of zeroes */
19621 footprint_header->cf_last_zeroes++;
19622 if ((footprint_header->cf_last_zeroes +
19623 roundup((footprint_region->cfr_num_pages -
19624 footprint_header->cf_last_zeroes) &
19625 (sizeof(int) - 1),
19626 sizeof(int))) <
19627 (sizeof(*footprint_header))) {
19628 /*
19629 * There are not enough trailing "zero" dispositions
19630 * (+ the extra padding we would need for the previous
19631 * region); creating a new region would not save space
19632 * at this point, so let's keep this "zero" disposition
19633 * in this region and reconsider later.
19634 */
19635 continue;
19636 }
19637 /*
19638 * Create a new region to avoid having too many consecutive
19639 * "zero" dispositions.
19640 */
19641 new_footprint_region =
19642 vm_map_corpse_footprint_new_region(footprint_header);
19643 if (new_footprint_region == NULL) {
19644 goto over_the_edge;
19645 }
19646 footprint_region = new_footprint_region;
19647 /* initialize the new region as empty ... */
19648 footprint_region->cfr_num_pages = 0;
19649 /* ... and skip this "zero" disp */
19650 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19651 }
19652
19653 return KERN_SUCCESS;
19654
19655 over_the_edge:
19656 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19657 vm_map_corpse_footprint_full++;
19658 return KERN_RESOURCE_SHORTAGE;
19659 }
19660
19661 /*
19662 * vm_map_corpse_footprint_collect_done:
19663 * completes the footprint collection by getting rid of any remaining
19664 * trailing "zero" dispositions and trimming the unused part of the
19665 * kernel buffer
19666 */
19667 void
19668 vm_map_corpse_footprint_collect_done(
19669 vm_map_t new_map)
19670 {
19671 struct vm_map_corpse_footprint_header *footprint_header;
19672 struct vm_map_corpse_footprint_region *footprint_region;
19673 vm_size_t buf_size, actual_size;
19674 kern_return_t kr;
19675
19676 assert(new_map->has_corpse_footprint);
19677 if (!new_map->has_corpse_footprint ||
19678 new_map->vmmap_corpse_footprint == NULL) {
19679 return;
19680 }
19681
19682 footprint_header = (struct vm_map_corpse_footprint_header *)
19683 new_map->vmmap_corpse_footprint;
19684 buf_size = footprint_header->cf_size;
19685
19686 footprint_region = (struct vm_map_corpse_footprint_region *)
19687 ((char *)footprint_header +
19688 footprint_header->cf_last_region);
19689
19690 /* get rid of trailing zeroes in last region */
19691 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19692 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19693 footprint_header->cf_last_zeroes = 0;
19694
19695 actual_size = (vm_size_t)(footprint_header->cf_last_region +
19696 sizeof(*footprint_region) +
19697 footprint_region->cfr_num_pages);
19698
19699 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19700 vm_map_corpse_footprint_size_avg =
19701 (((vm_map_corpse_footprint_size_avg *
19702 vm_map_corpse_footprint_count) +
19703 actual_size) /
19704 (vm_map_corpse_footprint_count + 1));
19705 vm_map_corpse_footprint_count++;
19706 if (actual_size > vm_map_corpse_footprint_size_max) {
19707 vm_map_corpse_footprint_size_max = actual_size;
19708 }
19709
19710 actual_size = round_page(actual_size);
19711 if (buf_size > actual_size) {
19712 kr = vm_deallocate(kernel_map,
19713 ((vm_address_t)footprint_header +
19714 actual_size +
19715 PAGE_SIZE), /* trailing guard page */
19716 (buf_size - actual_size));
19717 assertf(kr == KERN_SUCCESS,
19718 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19719 footprint_header,
19720 (uint64_t) buf_size,
19721 (uint64_t) actual_size,
19722 kr);
19723 kr = vm_protect(kernel_map,
19724 ((vm_address_t)footprint_header +
19725 actual_size),
19726 PAGE_SIZE,
19727 FALSE, /* set_maximum */
19728 VM_PROT_NONE);
19729 assertf(kr == KERN_SUCCESS,
19730 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19731 footprint_header,
19732 (uint64_t) buf_size,
19733 (uint64_t) actual_size,
19734 kr);
19735 }
19736
19737 footprint_header->cf_size = actual_size;
19738 }
19739
19740 /*
19741 * vm_map_corpse_footprint_query_page_info:
19742 * retrieves the disposition of the page at virtual address "vaddr"
19743 * in the forked corpse's VM map
19744 *
19745 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19746 */
19747 kern_return_t
19748 vm_map_corpse_footprint_query_page_info(
19749 vm_map_t map,
19750 vm_map_offset_t va,
19751 int *disp)
19752 {
19753 struct vm_map_corpse_footprint_header *footprint_header;
19754 struct vm_map_corpse_footprint_region *footprint_region;
19755 uint32_t footprint_region_offset;
19756 vm_map_offset_t region_start, region_end;
19757 int disp_idx;
19758 kern_return_t kr;
19759
19760 if (!map->has_corpse_footprint) {
19761 *disp = 0;
19762 kr = KERN_INVALID_ARGUMENT;
19763 goto done;
19764 }
19765
19766 footprint_header = map->vmmap_corpse_footprint;
19767 if (footprint_header == NULL) {
19768 *disp = 0;
19769 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19770 kr = KERN_INVALID_ARGUMENT;
19771 goto done;
19772 }
19773
19774 /* start looking at the hint ("cf_hint_region") */
19775 footprint_region_offset = footprint_header->cf_hint_region;
19776
19777 lookup_again:
19778 if (footprint_region_offset < sizeof(*footprint_header)) {
19779 /* hint too low: start from 1st region */
19780 footprint_region_offset = sizeof(*footprint_header);
19781 }
19782 if (footprint_region_offset >= footprint_header->cf_last_region) {
19783 /* hint too high: re-start from 1st region */
19784 footprint_region_offset = sizeof(*footprint_header);
19785 }
19786 footprint_region = (struct vm_map_corpse_footprint_region *)
19787 ((char *)footprint_header + footprint_region_offset);
19788 region_start = footprint_region->cfr_vaddr;
19789 region_end = (region_start +
19790 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19791 PAGE_SIZE));
19792 if (va < region_start &&
19793 footprint_region_offset != sizeof(*footprint_header)) {
19794 /* our range starts before the hint region */
19795
19796 /* reset the hint (in a racy way...) */
19797 footprint_header->cf_hint_region = sizeof(*footprint_header);
19798 /* lookup "va" again from 1st region */
19799 footprint_region_offset = sizeof(*footprint_header);
19800 goto lookup_again;
19801 }
19802
19803 while (va >= region_end) {
19804 if (footprint_region_offset >= footprint_header->cf_last_region) {
19805 break;
19806 }
19807 /* skip the region's header */
19808 footprint_region_offset += sizeof(*footprint_region);
19809 /* skip the region's page dispositions */
19810 footprint_region_offset += footprint_region->cfr_num_pages;
19811 /* align to next word boundary */
19812 footprint_region_offset =
19813 roundup(footprint_region_offset,
19814 sizeof(int));
19815 footprint_region = (struct vm_map_corpse_footprint_region *)
19816 ((char *)footprint_header + footprint_region_offset);
19817 region_start = footprint_region->cfr_vaddr;
19818 region_end = (region_start +
19819 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19820 PAGE_SIZE));
19821 }
19822 if (va < region_start || va >= region_end) {
19823 /* page not found */
19824 *disp = 0;
19825 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19826 kr = KERN_SUCCESS;
19827 goto done;
19828 }
19829
19830 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19831 footprint_header->cf_hint_region = footprint_region_offset;
19832
19833 /* get page disposition for "va" in this region */
19834 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19835 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19836
19837 kr = KERN_SUCCESS;
19838 done:
19839 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19840 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19841 DTRACE_VM4(footprint_query_page_info,
19842 vm_map_t, map,
19843 vm_map_offset_t, va,
19844 int, *disp,
19845 kern_return_t, kr);
19846
19847 return kr;
19848 }
19849
19850
19851 static void
19852 vm_map_corpse_footprint_destroy(
19853 vm_map_t map)
19854 {
19855 if (map->has_corpse_footprint &&
19856 map->vmmap_corpse_footprint != 0) {
19857 struct vm_map_corpse_footprint_header *footprint_header;
19858 vm_size_t buf_size;
19859 kern_return_t kr;
19860
19861 footprint_header = map->vmmap_corpse_footprint;
19862 buf_size = footprint_header->cf_size;
19863 kr = vm_deallocate(kernel_map,
19864 (vm_offset_t) map->vmmap_corpse_footprint,
19865 ((vm_size_t) buf_size
19866 + PAGE_SIZE)); /* trailing guard page */
19867 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19868 map->vmmap_corpse_footprint = 0;
19869 map->has_corpse_footprint = FALSE;
19870 }
19871 }
19872
19873 /*
19874 * vm_map_copy_footprint_ledgers:
19875 * copies any ledger that's relevant to the memory footprint of "old_task"
19876 * into the forked corpse's task ("new_task")
19877 */
19878 void
19879 vm_map_copy_footprint_ledgers(
19880 task_t old_task,
19881 task_t new_task)
19882 {
19883 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19884 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19885 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19886 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19887 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19888 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19889 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19890 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19891 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19892 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19893 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19894 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19895 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19896 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19897 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19898 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19899 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19900 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19901 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19902 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19903 }
19904
19905 /*
19906 * vm_map_copy_ledger:
19907 * copy a single ledger from "old_task" to "new_task"
19908 */
19909 void
19910 vm_map_copy_ledger(
19911 task_t old_task,
19912 task_t new_task,
19913 int ledger_entry)
19914 {
19915 ledger_amount_t old_balance, new_balance, delta;
19916
19917 assert(new_task->map->has_corpse_footprint);
19918 if (!new_task->map->has_corpse_footprint) {
19919 return;
19920 }
19921
19922 /* turn off sanity checks for the ledger we're about to mess with */
19923 ledger_disable_panic_on_negative(new_task->ledger,
19924 ledger_entry);
19925
19926 /* adjust "new_task" to match "old_task" */
19927 ledger_get_balance(old_task->ledger,
19928 ledger_entry,
19929 &old_balance);
19930 ledger_get_balance(new_task->ledger,
19931 ledger_entry,
19932 &new_balance);
19933 if (new_balance == old_balance) {
19934 /* new == old: done */
19935 } else if (new_balance > old_balance) {
19936 /* new > old ==> new -= new - old */
19937 delta = new_balance - old_balance;
19938 ledger_debit(new_task->ledger,
19939 ledger_entry,
19940 delta);
19941 } else {
19942 /* new < old ==> new += old - new */
19943 delta = old_balance - new_balance;
19944 ledger_credit(new_task->ledger,
19945 ledger_entry,
19946 delta);
19947 }
19948 }
19949
19950 #if MACH_ASSERT
19951
19952 extern int pmap_ledgers_panic;
19953 extern int pmap_ledgers_panic_leeway;
19954
19955 #define LEDGER_DRIFT(__LEDGER) \
19956 int __LEDGER##_over; \
19957 ledger_amount_t __LEDGER##_over_total; \
19958 ledger_amount_t __LEDGER##_over_max; \
19959 int __LEDGER##_under; \
19960 ledger_amount_t __LEDGER##_under_total; \
19961 ledger_amount_t __LEDGER##_under_max
19962
19963 struct {
19964 uint64_t num_pmaps_checked;
19965
19966 LEDGER_DRIFT(phys_footprint);
19967 LEDGER_DRIFT(internal);
19968 LEDGER_DRIFT(internal_compressed);
19969 LEDGER_DRIFT(iokit_mapped);
19970 LEDGER_DRIFT(alternate_accounting);
19971 LEDGER_DRIFT(alternate_accounting_compressed);
19972 LEDGER_DRIFT(page_table);
19973 LEDGER_DRIFT(purgeable_volatile);
19974 LEDGER_DRIFT(purgeable_nonvolatile);
19975 LEDGER_DRIFT(purgeable_volatile_compressed);
19976 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
19977 LEDGER_DRIFT(tagged_nofootprint);
19978 LEDGER_DRIFT(tagged_footprint);
19979 LEDGER_DRIFT(tagged_nofootprint_compressed);
19980 LEDGER_DRIFT(tagged_footprint_compressed);
19981 LEDGER_DRIFT(network_volatile);
19982 LEDGER_DRIFT(network_nonvolatile);
19983 LEDGER_DRIFT(network_volatile_compressed);
19984 LEDGER_DRIFT(network_nonvolatile_compressed);
19985 LEDGER_DRIFT(media_nofootprint);
19986 LEDGER_DRIFT(media_footprint);
19987 LEDGER_DRIFT(media_nofootprint_compressed);
19988 LEDGER_DRIFT(media_footprint_compressed);
19989 LEDGER_DRIFT(graphics_nofootprint);
19990 LEDGER_DRIFT(graphics_footprint);
19991 LEDGER_DRIFT(graphics_nofootprint_compressed);
19992 LEDGER_DRIFT(graphics_footprint_compressed);
19993 LEDGER_DRIFT(neural_nofootprint);
19994 LEDGER_DRIFT(neural_footprint);
19995 LEDGER_DRIFT(neural_nofootprint_compressed);
19996 LEDGER_DRIFT(neural_footprint_compressed);
19997 } pmap_ledgers_drift;
19998
19999 void
20000 vm_map_pmap_check_ledgers(
20001 pmap_t pmap,
20002 ledger_t ledger,
20003 int pid,
20004 char *procname)
20005 {
20006 ledger_amount_t bal;
20007 boolean_t do_panic;
20008
20009 do_panic = FALSE;
20010
20011 pmap_ledgers_drift.num_pmaps_checked++;
20012
20013 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20014 MACRO_BEGIN \
20015 int panic_on_negative = TRUE; \
20016 ledger_get_balance(ledger, \
20017 task_ledgers.__LEDGER, \
20018 &bal); \
20019 ledger_get_panic_on_negative(ledger, \
20020 task_ledgers.__LEDGER, \
20021 &panic_on_negative); \
20022 if (bal != 0) { \
20023 if (panic_on_negative || \
20024 (pmap_ledgers_panic && \
20025 pmap_ledgers_panic_leeway > 0 && \
20026 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20027 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20028 do_panic = TRUE; \
20029 } \
20030 printf("LEDGER BALANCE proc %d (%s) " \
20031 "\"%s\" = %lld\n", \
20032 pid, procname, #__LEDGER, bal); \
20033 if (bal > 0) { \
20034 pmap_ledgers_drift.__LEDGER##_over++; \
20035 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20036 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20037 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20038 } \
20039 } else if (bal < 0) { \
20040 pmap_ledgers_drift.__LEDGER##_under++; \
20041 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20042 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20043 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20044 } \
20045 } \
20046 } \
20047 MACRO_END
20048
20049 LEDGER_CHECK_BALANCE(phys_footprint);
20050 LEDGER_CHECK_BALANCE(internal);
20051 LEDGER_CHECK_BALANCE(internal_compressed);
20052 LEDGER_CHECK_BALANCE(iokit_mapped);
20053 LEDGER_CHECK_BALANCE(alternate_accounting);
20054 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20055 LEDGER_CHECK_BALANCE(page_table);
20056 LEDGER_CHECK_BALANCE(purgeable_volatile);
20057 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20058 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20059 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20060 LEDGER_CHECK_BALANCE(tagged_nofootprint);
20061 LEDGER_CHECK_BALANCE(tagged_footprint);
20062 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20063 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20064 LEDGER_CHECK_BALANCE(network_volatile);
20065 LEDGER_CHECK_BALANCE(network_nonvolatile);
20066 LEDGER_CHECK_BALANCE(network_volatile_compressed);
20067 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20068 LEDGER_CHECK_BALANCE(media_nofootprint);
20069 LEDGER_CHECK_BALANCE(media_footprint);
20070 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20071 LEDGER_CHECK_BALANCE(media_footprint_compressed);
20072 LEDGER_CHECK_BALANCE(graphics_nofootprint);
20073 LEDGER_CHECK_BALANCE(graphics_footprint);
20074 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20075 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20076 LEDGER_CHECK_BALANCE(neural_nofootprint);
20077 LEDGER_CHECK_BALANCE(neural_footprint);
20078 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20079 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20080
20081 if (do_panic) {
20082 if (pmap_ledgers_panic) {
20083 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20084 pmap, pid, procname);
20085 } else {
20086 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20087 pmap, pid, procname);
20088 }
20089 }
20090 }
20091 #endif /* MACH_ASSERT */