]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
305c8d67700ad6386fba685f4475045d90215f71
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
121 #if CONFIG_EMBEDDED
122 extern int panic_on_unsigned_execute;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if __arm64__
127 extern const int fourk_binary_compatibility_unsafe;
128 extern const int fourk_binary_compatibility_allow_wx;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p);
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect = 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk = 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
139
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
141 int vm_map_executable_immutable_verbose = 0;
142
143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
144
145 extern u_int32_t random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
147 */
148
149 static void vm_map_simplify_range(
150 vm_map_t map,
151 vm_map_offset_t start,
152 vm_map_offset_t end); /* forward */
153
154 static boolean_t vm_map_range_check(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 vm_map_entry_t *entry);
159
160 static vm_map_entry_t _vm_map_entry_create(
161 struct vm_map_header *map_header, boolean_t map_locked);
162
163 static void _vm_map_entry_dispose(
164 struct vm_map_header *map_header,
165 vm_map_entry_t entry);
166
167 static void vm_map_pmap_enter(
168 vm_map_t map,
169 vm_map_offset_t addr,
170 vm_map_offset_t end_addr,
171 vm_object_t object,
172 vm_object_offset_t offset,
173 vm_prot_t protection);
174
175 static void _vm_map_clip_end(
176 struct vm_map_header *map_header,
177 vm_map_entry_t entry,
178 vm_map_offset_t end);
179
180 static void _vm_map_clip_start(
181 struct vm_map_header *map_header,
182 vm_map_entry_t entry,
183 vm_map_offset_t start);
184
185 static void vm_map_entry_delete(
186 vm_map_t map,
187 vm_map_entry_t entry);
188
189 static kern_return_t vm_map_delete(
190 vm_map_t map,
191 vm_map_offset_t start,
192 vm_map_offset_t end,
193 int flags,
194 vm_map_t zap_map);
195
196 static void vm_map_copy_insert(
197 vm_map_t map,
198 vm_map_entry_t after_where,
199 vm_map_copy_t copy);
200
201 static kern_return_t vm_map_copy_overwrite_unaligned(
202 vm_map_t dst_map,
203 vm_map_entry_t entry,
204 vm_map_copy_t copy,
205 vm_map_address_t start,
206 boolean_t discard_on_success);
207
208 static kern_return_t vm_map_copy_overwrite_aligned(
209 vm_map_t dst_map,
210 vm_map_entry_t tmp_entry,
211 vm_map_copy_t copy,
212 vm_map_offset_t start,
213 pmap_t pmap);
214
215 static kern_return_t vm_map_copyin_kernel_buffer(
216 vm_map_t src_map,
217 vm_map_address_t src_addr,
218 vm_map_size_t len,
219 boolean_t src_destroy,
220 vm_map_copy_t *copy_result); /* OUT */
221
222 static kern_return_t vm_map_copyout_kernel_buffer(
223 vm_map_t map,
224 vm_map_address_t *addr, /* IN/OUT */
225 vm_map_copy_t copy,
226 vm_map_size_t copy_size,
227 boolean_t overwrite,
228 boolean_t consume_on_success);
229
230 static void vm_map_fork_share(
231 vm_map_t old_map,
232 vm_map_entry_t old_entry,
233 vm_map_t new_map);
234
235 static boolean_t vm_map_fork_copy(
236 vm_map_t old_map,
237 vm_map_entry_t *old_entry_p,
238 vm_map_t new_map,
239 int vm_map_copyin_flags);
240
241 static kern_return_t vm_map_wire_nested(
242 vm_map_t map,
243 vm_map_offset_t start,
244 vm_map_offset_t end,
245 vm_prot_t caller_prot,
246 vm_tag_t tag,
247 boolean_t user_wire,
248 pmap_t map_pmap,
249 vm_map_offset_t pmap_addr,
250 ppnum_t *physpage_p);
251
252 static kern_return_t vm_map_unwire_nested(
253 vm_map_t map,
254 vm_map_offset_t start,
255 vm_map_offset_t end,
256 boolean_t user_wire,
257 pmap_t map_pmap,
258 vm_map_offset_t pmap_addr);
259
260 static kern_return_t vm_map_overwrite_submap_recurse(
261 vm_map_t dst_map,
262 vm_map_offset_t dst_addr,
263 vm_map_size_t dst_size);
264
265 static kern_return_t vm_map_copy_overwrite_nested(
266 vm_map_t dst_map,
267 vm_map_offset_t dst_addr,
268 vm_map_copy_t copy,
269 boolean_t interruptible,
270 pmap_t pmap,
271 boolean_t discard_on_success);
272
273 static kern_return_t vm_map_remap_extract(
274 vm_map_t map,
275 vm_map_offset_t addr,
276 vm_map_size_t size,
277 boolean_t copy,
278 struct vm_map_header *map_header,
279 vm_prot_t *cur_protection,
280 vm_prot_t *max_protection,
281 vm_inherit_t inheritance,
282 boolean_t pageable,
283 boolean_t same_map,
284 vm_map_kernel_flags_t vmk_flags);
285
286 static kern_return_t vm_map_remap_range_allocate(
287 vm_map_t map,
288 vm_map_address_t *address,
289 vm_map_size_t size,
290 vm_map_offset_t mask,
291 int flags,
292 vm_map_kernel_flags_t vmk_flags,
293 vm_tag_t tag,
294 vm_map_entry_t *map_entry);
295
296 static void vm_map_region_look_for_page(
297 vm_map_t map,
298 vm_map_offset_t va,
299 vm_object_t object,
300 vm_object_offset_t offset,
301 int max_refcnt,
302 int depth,
303 vm_region_extended_info_t extended,
304 mach_msg_type_number_t count);
305
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry,
308 vm_object_t object);
309
310
311 static kern_return_t vm_map_willneed(
312 vm_map_t map,
313 vm_map_offset_t start,
314 vm_map_offset_t end);
315
316 static kern_return_t vm_map_reuse_pages(
317 vm_map_t map,
318 vm_map_offset_t start,
319 vm_map_offset_t end);
320
321 static kern_return_t vm_map_reusable_pages(
322 vm_map_t map,
323 vm_map_offset_t start,
324 vm_map_offset_t end);
325
326 static kern_return_t vm_map_can_reuse(
327 vm_map_t map,
328 vm_map_offset_t start,
329 vm_map_offset_t end);
330
331 #if MACH_ASSERT
332 static kern_return_t vm_map_pageout(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
336 #endif /* MACH_ASSERT */
337
338 static void vm_map_corpse_footprint_destroy(
339 vm_map_t map);
340
341 pid_t find_largest_process_vm_map_entries(void);
342
343 /*
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
350 * vm_map_copyout.
351 */
352
353 #if CONFIG_EMBEDDED
354
355 /*
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
360 */
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
362 MACRO_BEGIN \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
365 MACRO_END
366
367 #else /* CONFIG_EMBEDDED */
368
369 /*
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
372 */
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
374 MACRO_BEGIN \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
377 MACRO_END
378
379 #endif /* CONFIG_EMBEDDED */
380
381 #define vm_map_entry_copy(NEW, OLD) \
382 MACRO_BEGIN \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
384 *(NEW) = *(OLD); \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
397 } \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
402 MACRO_END
403
404 #define vm_map_entry_copy_full(NEW, OLD) \
405 MACRO_BEGIN \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
407 (*(NEW) = *(OLD)); \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
409 MACRO_END
410
411 /*
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
414 */
415 __attribute__((always_inline))
416 int
417 vm_map_lock_read_to_write(vm_map_t map)
418 {
419 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
420 DTRACE_VM(vm_map_lock_upgrade);
421 return 0;
422 }
423 return 1;
424 }
425
426 __attribute__((always_inline))
427 boolean_t
428 vm_map_try_lock(vm_map_t map)
429 {
430 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
431 DTRACE_VM(vm_map_lock_w);
432 return TRUE;
433 }
434 return FALSE;
435 }
436
437 __attribute__((always_inline))
438 boolean_t
439 vm_map_try_lock_read(vm_map_t map)
440 {
441 if (lck_rw_try_lock_shared(&(map)->lock)) {
442 DTRACE_VM(vm_map_lock_r);
443 return TRUE;
444 }
445 return FALSE;
446 }
447
448 /*
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
459 *
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
463 *
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
468 *
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
476 */
477
478 extern int allow_data_exec, allow_stack_exec;
479
480 int
481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
482 {
483 int current_abi;
484
485 if (map->pmap == kernel_pmap) {
486 return FALSE;
487 }
488
489 /*
490 * Determine if the app is running in 32 or 64 bit mode.
491 */
492
493 if (vm_map_is_64bit(map)) {
494 current_abi = VM_ABI_64;
495 } else {
496 current_abi = VM_ABI_32;
497 }
498
499 /*
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
502 */
503
504 if (user_tag == VM_MEMORY_STACK) {
505 return allow_stack_exec & current_abi;
506 }
507
508 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
509 }
510
511
512 /*
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
517 *
518 * Synchronization is required prior to most operations.
519 *
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
522 *
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
530 *
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
539 * abutting entries.
540 *
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
553 */
554
555 static zone_t vm_map_zone; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
560
561
562 /*
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
566 */
567
568 vm_object_t vm_submap_object;
569
570 static void *map_data;
571 static vm_size_t map_data_size;
572 static void *kentry_data;
573 static vm_size_t kentry_data_size;
574 static void *map_holes_data;
575 static vm_size_t map_holes_data_size;
576
577 #if CONFIG_EMBEDDED
578 #define NO_COALESCE_LIMIT 0
579 #else
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
581 #endif
582
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp = 1;
585
586 unsigned int vm_map_set_cache_attr_count = 0;
587
588 kern_return_t
589 vm_map_set_cache_attr(
590 vm_map_t map,
591 vm_map_offset_t va)
592 {
593 vm_map_entry_t map_entry;
594 vm_object_t object;
595 kern_return_t kr = KERN_SUCCESS;
596
597 vm_map_lock_read(map);
598
599 if (!vm_map_lookup_entry(map, va, &map_entry) ||
600 map_entry->is_sub_map) {
601 /*
602 * that memory is not properly mapped
603 */
604 kr = KERN_INVALID_ARGUMENT;
605 goto done;
606 }
607 object = VME_OBJECT(map_entry);
608
609 if (object == VM_OBJECT_NULL) {
610 /*
611 * there should be a VM object here at this point
612 */
613 kr = KERN_INVALID_ARGUMENT;
614 goto done;
615 }
616 vm_object_lock(object);
617 object->set_cache_attr = TRUE;
618 vm_object_unlock(object);
619
620 vm_map_set_cache_attr_count++;
621 done:
622 vm_map_unlock_read(map);
623
624 return kr;
625 }
626
627
628 #if CONFIG_CODE_DECRYPTION
629 /*
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
636 */
637 kern_return_t
638 vm_map_apple_protected(
639 vm_map_t map,
640 vm_map_offset_t start,
641 vm_map_offset_t end,
642 vm_object_offset_t crypto_backing_offset,
643 struct pager_crypt_info *crypt_info)
644 {
645 boolean_t map_locked;
646 kern_return_t kr;
647 vm_map_entry_t map_entry;
648 struct vm_map_entry tmp_entry;
649 memory_object_t unprotected_mem_obj;
650 vm_object_t protected_object;
651 vm_map_offset_t map_addr;
652 vm_map_offset_t start_aligned, end_aligned;
653 vm_object_offset_t crypto_start, crypto_end;
654 int vm_flags;
655 vm_map_kernel_flags_t vmk_flags;
656
657 vm_flags = 0;
658 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
659
660 map_locked = FALSE;
661 unprotected_mem_obj = MEMORY_OBJECT_NULL;
662
663 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
664 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
665 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
666 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
667
668 #if __arm64__
669 /*
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
672 *
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
675 * + the center,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
681 */
682 #endif /* __arm64__ */
683
684 map_addr = start_aligned;
685 for (map_addr = start_aligned;
686 map_addr < end;
687 map_addr = tmp_entry.vme_end) {
688 vm_map_lock(map);
689 map_locked = TRUE;
690
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map,
693 map_addr,
694 &map_entry) ||
695 map_entry->is_sub_map ||
696 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
697 !(map_entry->protection & VM_PROT_EXECUTE)) {
698 /* that memory is not properly mapped */
699 kr = KERN_INVALID_ARGUMENT;
700 goto done;
701 }
702
703 /* get the protected object to be decrypted */
704 protected_object = VME_OBJECT(map_entry);
705 if (protected_object == VM_OBJECT_NULL) {
706 /* there should be a VM object here at this point */
707 kr = KERN_INVALID_ARGUMENT;
708 goto done;
709 }
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object);
712
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map, map_entry, start_aligned);
715 vm_map_clip_end(map, map_entry, end_aligned);
716
717 tmp_entry = *map_entry;
718 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
719 vm_map_unlock(map);
720 map_locked = FALSE;
721
722 /*
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
725 */
726 crypto_start = 0;
727 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
728 if (tmp_entry.vme_start < start) {
729 if (tmp_entry.vme_start != start_aligned) {
730 kr = KERN_INVALID_ADDRESS;
731 }
732 crypto_start += (start - tmp_entry.vme_start);
733 }
734 if (tmp_entry.vme_end > end) {
735 if (tmp_entry.vme_end != end_aligned) {
736 kr = KERN_INVALID_ADDRESS;
737 }
738 crypto_end -= (tmp_entry.vme_end - end);
739 }
740
741 /*
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
745 */
746 if (crypto_backing_offset == (vm_object_offset_t) -1) {
747 crypto_backing_offset = VME_OFFSET(&tmp_entry);
748 }
749
750 /*
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
755 * it.
756 */
757 unprotected_mem_obj = apple_protect_pager_setup(
758 protected_object,
759 VME_OFFSET(&tmp_entry),
760 crypto_backing_offset,
761 crypt_info,
762 crypto_start,
763 crypto_end);
764
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object);
767
768 if (unprotected_mem_obj == NULL) {
769 kr = KERN_FAILURE;
770 goto done;
771 }
772
773 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
774 /* can overwrite an immutable mapping */
775 vmk_flags.vmkf_overwrite_immutable = TRUE;
776 #if __arm64__
777 if (tmp_entry.used_for_jit &&
778 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
779 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
780 fourk_binary_compatibility_unsafe &&
781 fourk_binary_compatibility_allow_wx) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry.vme_start);
785 vmk_flags.vmkf_map_jit = TRUE;
786 }
787 #endif /* __arm64__ */
788
789 /* map this memory object in place of the current one */
790 map_addr = tmp_entry.vme_start;
791 kr = vm_map_enter_mem_object(map,
792 &map_addr,
793 (tmp_entry.vme_end -
794 tmp_entry.vme_start),
795 (mach_vm_offset_t) 0,
796 vm_flags,
797 vmk_flags,
798 VM_KERN_MEMORY_NONE,
799 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
800 0,
801 TRUE,
802 tmp_entry.protection,
803 tmp_entry.max_protection,
804 tmp_entry.inheritance);
805 assertf(kr == KERN_SUCCESS,
806 "kr = 0x%x\n", kr);
807 assertf(map_addr == tmp_entry.vme_start,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
809 (uint64_t)map_addr,
810 (uint64_t) tmp_entry.vme_start,
811 &tmp_entry);
812
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
819 map,
820 (uint64_t) map_addr,
821 (uint64_t) (map_addr + (tmp_entry.vme_end -
822 tmp_entry.vme_start)),
823 unprotected_mem_obj,
824 protected_object,
825 VME_OFFSET(&tmp_entry),
826 crypto_backing_offset,
827 crypto_start,
828 crypto_end);
829 }
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
831
832 /*
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
836 * the memory object.
837 */
838 memory_object_deallocate(unprotected_mem_obj);
839 unprotected_mem_obj = MEMORY_OBJECT_NULL;
840
841 /* continue with next map entry */
842 crypto_backing_offset += (tmp_entry.vme_end -
843 tmp_entry.vme_start);
844 crypto_backing_offset -= crypto_start;
845 }
846 kr = KERN_SUCCESS;
847
848 done:
849 if (map_locked) {
850 vm_map_unlock(map);
851 }
852 return kr;
853 }
854 #endif /* CONFIG_CODE_DECRYPTION */
855
856
857 lck_grp_t vm_map_lck_grp;
858 lck_grp_attr_t vm_map_lck_grp_attr;
859 lck_attr_t vm_map_lck_attr;
860 lck_attr_t vm_map_lck_rw_attr;
861
862 #if CONFIG_EMBEDDED
863 int malloc_no_cow = 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow = 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
870 #if DEBUG
871 int vm_check_map_sanity = 0;
872 #endif
873
874 /*
875 * vm_map_init:
876 *
877 * Initialize the vm_map module. Must be called before
878 * any other vm_map routines.
879 *
880 * Map and entry structures are allocated from zones -- we must
881 * initialize those zones.
882 *
883 * There are three zones of interest:
884 *
885 * vm_map_zone: used to allocate maps.
886 * vm_map_entry_zone: used to allocate map entries.
887 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
888 *
889 * The kernel allocates map entries from a special zone that is initially
890 * "crammed" with memory. It would be difficult (perhaps impossible) for
891 * the kernel to allocate more memory to a entry zone when it became
892 * empty since the very act of allocating memory implies the creation
893 * of a new entry.
894 */
895 void
896 vm_map_init(
897 void)
898 {
899 vm_size_t entry_zone_alloc_size;
900 const char *mez_name = "VM map entries";
901
902 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
903 PAGE_SIZE, "maps");
904 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
905 #if defined(__LP64__)
906 entry_zone_alloc_size = PAGE_SIZE * 5;
907 #else
908 entry_zone_alloc_size = PAGE_SIZE * 6;
909 #endif
910 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
911 1024 * 1024, entry_zone_alloc_size,
912 mez_name);
913 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
914 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
915 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
916
917 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
918 kentry_data_size * 64, kentry_data_size,
919 "Reserved VM map entries");
920 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
921 /* Don't quarantine because we always need elements available */
922 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
923
924 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
925 16 * 1024, PAGE_SIZE, "VM map copies");
926 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
927
928 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
929 16 * 1024, PAGE_SIZE, "VM map holes");
930 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
931
932 /*
933 * Cram the map and kentry zones with initial data.
934 * Set reserved_zone non-collectible to aid zone_gc().
935 */
936 zone_change(vm_map_zone, Z_COLLECT, FALSE);
937 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
938 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
939
940 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
941 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
942 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
943 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
944 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
945 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
946 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
947
948 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
949 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
950 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
951 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
952 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
953 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
954
955 /*
956 * Add the stolen memory to zones, adjust zone size and stolen counts.
957 * zcram only up to the maximum number of pages for each zone chunk.
958 */
959 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
960
961 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
962 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
963 zcram(vm_map_entry_reserved_zone,
964 (vm_offset_t)kentry_data + off,
965 MIN(kentry_data_size - off, stride));
966 }
967 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
968 zcram(vm_map_holes_zone,
969 (vm_offset_t)map_holes_data + off,
970 MIN(map_holes_data_size - off, stride));
971 }
972
973 /*
974 * Since these are covered by zones, remove them from stolen page accounting.
975 */
976 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
977
978 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
979 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
980 lck_attr_setdefault(&vm_map_lck_attr);
981
982 lck_attr_setdefault(&vm_map_lck_rw_attr);
983 lck_attr_cleardebug(&vm_map_lck_rw_attr);
984
985 #if VM_MAP_DEBUG_APPLE_PROTECT
986 PE_parse_boot_argn("vm_map_debug_apple_protect",
987 &vm_map_debug_apple_protect,
988 sizeof(vm_map_debug_apple_protect));
989 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
990 #if VM_MAP_DEBUG_APPLE_FOURK
991 PE_parse_boot_argn("vm_map_debug_fourk",
992 &vm_map_debug_fourk,
993 sizeof(vm_map_debug_fourk));
994 #endif /* VM_MAP_DEBUG_FOURK */
995 PE_parse_boot_argn("vm_map_executable_immutable",
996 &vm_map_executable_immutable,
997 sizeof(vm_map_executable_immutable));
998 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
999 &vm_map_executable_immutable_verbose,
1000 sizeof(vm_map_executable_immutable_verbose));
1001
1002 PE_parse_boot_argn("malloc_no_cow",
1003 &malloc_no_cow,
1004 sizeof(malloc_no_cow));
1005 if (malloc_no_cow) {
1006 vm_memory_malloc_no_cow_mask = 0ULL;
1007 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1008 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1009 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1010 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1011 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1012 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1013 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1014 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1015 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1016 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1017 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1018 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1019 &vm_memory_malloc_no_cow_mask,
1020 sizeof(vm_memory_malloc_no_cow_mask));
1021 }
1022
1023 #if DEBUG
1024 PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1025 if (vm_check_map_sanity) {
1026 kprintf("VM sanity checking enabled\n");
1027 } else {
1028 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1029 }
1030 #endif /* DEBUG */
1031 }
1032
1033 void
1034 vm_map_steal_memory(
1035 void)
1036 {
1037 uint32_t kentry_initial_pages;
1038
1039 map_data_size = round_page(10 * sizeof(struct _vm_map));
1040 map_data = pmap_steal_memory(map_data_size);
1041
1042 /*
1043 * kentry_initial_pages corresponds to the number of kernel map entries
1044 * required during bootstrap until the asynchronous replenishment
1045 * scheme is activated and/or entries are available from the general
1046 * map entry pool.
1047 */
1048 #if defined(__LP64__)
1049 kentry_initial_pages = 10;
1050 #else
1051 kentry_initial_pages = 6;
1052 #endif
1053
1054 #if CONFIG_GZALLOC
1055 /* If using the guard allocator, reserve more memory for the kernel
1056 * reserved map entry pool.
1057 */
1058 if (gzalloc_enabled()) {
1059 kentry_initial_pages *= 1024;
1060 }
1061 #endif
1062
1063 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1064 kentry_data = pmap_steal_memory(kentry_data_size);
1065
1066 map_holes_data_size = kentry_data_size;
1067 map_holes_data = pmap_steal_memory(map_holes_data_size);
1068 }
1069
1070 boolean_t vm_map_supports_hole_optimization = FALSE;
1071
1072 void
1073 vm_kernel_reserved_entry_init(void)
1074 {
1075 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1076
1077 /*
1078 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1079 */
1080 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1081 vm_map_supports_hole_optimization = TRUE;
1082 }
1083
1084 void
1085 vm_map_disable_hole_optimization(vm_map_t map)
1086 {
1087 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1088
1089 if (map->holelistenabled) {
1090 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1091
1092 while (hole_entry != NULL) {
1093 next_hole_entry = hole_entry->vme_next;
1094
1095 hole_entry->vme_next = NULL;
1096 hole_entry->vme_prev = NULL;
1097 zfree(vm_map_holes_zone, hole_entry);
1098
1099 if (next_hole_entry == head_entry) {
1100 hole_entry = NULL;
1101 } else {
1102 hole_entry = next_hole_entry;
1103 }
1104 }
1105
1106 map->holes_list = NULL;
1107 map->holelistenabled = FALSE;
1108
1109 map->first_free = vm_map_first_entry(map);
1110 SAVE_HINT_HOLE_WRITE(map, NULL);
1111 }
1112 }
1113
1114 boolean_t
1115 vm_kernel_map_is_kernel(vm_map_t map)
1116 {
1117 return map->pmap == kernel_pmap;
1118 }
1119
1120 /*
1121 * vm_map_create:
1122 *
1123 * Creates and returns a new empty VM map with
1124 * the given physical map structure, and having
1125 * the given lower and upper address bounds.
1126 */
1127
1128 vm_map_t
1129 vm_map_create(
1130 pmap_t pmap,
1131 vm_map_offset_t min,
1132 vm_map_offset_t max,
1133 boolean_t pageable)
1134 {
1135 int options;
1136
1137 options = 0;
1138 if (pageable) {
1139 options |= VM_MAP_CREATE_PAGEABLE;
1140 }
1141 return vm_map_create_options(pmap, min, max, options);
1142 }
1143
1144 vm_map_t
1145 vm_map_create_options(
1146 pmap_t pmap,
1147 vm_map_offset_t min,
1148 vm_map_offset_t max,
1149 int options)
1150 {
1151 vm_map_t result;
1152 struct vm_map_links *hole_entry = NULL;
1153
1154 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1155 /* unknown option */
1156 return VM_MAP_NULL;
1157 }
1158
1159 result = (vm_map_t) zalloc(vm_map_zone);
1160 if (result == VM_MAP_NULL) {
1161 panic("vm_map_create");
1162 }
1163
1164 vm_map_first_entry(result) = vm_map_to_entry(result);
1165 vm_map_last_entry(result) = vm_map_to_entry(result);
1166 result->hdr.nentries = 0;
1167 if (options & VM_MAP_CREATE_PAGEABLE) {
1168 result->hdr.entries_pageable = TRUE;
1169 } else {
1170 result->hdr.entries_pageable = FALSE;
1171 }
1172
1173 vm_map_store_init( &(result->hdr));
1174
1175 result->hdr.page_shift = PAGE_SHIFT;
1176
1177 result->size = 0;
1178 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1179 result->user_wire_size = 0;
1180 #if !CONFIG_EMBEDDED
1181 result->vmmap_high_start = 0;
1182 #endif
1183 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1184 #if TASK_SWAPPER
1185 result->res_count = 1;
1186 result->sw_state = MAP_SW_IN;
1187 #endif /* TASK_SWAPPER */
1188 result->pmap = pmap;
1189 result->min_offset = min;
1190 result->max_offset = max;
1191 result->wiring_required = FALSE;
1192 result->no_zero_fill = FALSE;
1193 result->mapped_in_other_pmaps = FALSE;
1194 result->wait_for_space = FALSE;
1195 result->switch_protect = FALSE;
1196 result->disable_vmentry_reuse = FALSE;
1197 result->map_disallow_data_exec = FALSE;
1198 result->is_nested_map = FALSE;
1199 result->map_disallow_new_exec = FALSE;
1200 result->highest_entry_end = 0;
1201 result->first_free = vm_map_to_entry(result);
1202 result->hint = vm_map_to_entry(result);
1203 result->jit_entry_exists = FALSE;
1204
1205 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1206 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1207 result->has_corpse_footprint = TRUE;
1208 result->holelistenabled = FALSE;
1209 result->vmmap_corpse_footprint = NULL;
1210 } else {
1211 result->has_corpse_footprint = FALSE;
1212 if (vm_map_supports_hole_optimization) {
1213 hole_entry = zalloc(vm_map_holes_zone);
1214
1215 hole_entry->start = min;
1216 #if defined(__arm__) || defined(__arm64__)
1217 hole_entry->end = result->max_offset;
1218 #else
1219 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1220 #endif
1221 result->holes_list = result->hole_hint = hole_entry;
1222 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1223 result->holelistenabled = TRUE;
1224 } else {
1225 result->holelistenabled = FALSE;
1226 }
1227 }
1228
1229 vm_map_lock_init(result);
1230 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1231
1232 return result;
1233 }
1234
1235 /*
1236 * vm_map_entry_create: [ internal use only ]
1237 *
1238 * Allocates a VM map entry for insertion in the
1239 * given map (or map copy). No fields are filled.
1240 */
1241 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1242
1243 #define vm_map_copy_entry_create(copy, map_locked) \
1244 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1245 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1246
1247 static vm_map_entry_t
1248 _vm_map_entry_create(
1249 struct vm_map_header *map_header, boolean_t __unused map_locked)
1250 {
1251 zone_t zone;
1252 vm_map_entry_t entry;
1253
1254 zone = vm_map_entry_zone;
1255
1256 assert(map_header->entries_pageable ? !map_locked : TRUE);
1257
1258 if (map_header->entries_pageable) {
1259 entry = (vm_map_entry_t) zalloc(zone);
1260 } else {
1261 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1262
1263 if (entry == VM_MAP_ENTRY_NULL) {
1264 zone = vm_map_entry_reserved_zone;
1265 entry = (vm_map_entry_t) zalloc(zone);
1266 OSAddAtomic(1, &reserved_zalloc_count);
1267 } else {
1268 OSAddAtomic(1, &nonreserved_zalloc_count);
1269 }
1270 }
1271
1272 if (entry == VM_MAP_ENTRY_NULL) {
1273 panic("vm_map_entry_create");
1274 }
1275 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1276
1277 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1278 #if MAP_ENTRY_CREATION_DEBUG
1279 entry->vme_creation_maphdr = map_header;
1280 backtrace(&entry->vme_creation_bt[0],
1281 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1282 #endif
1283 return entry;
1284 }
1285
1286 /*
1287 * vm_map_entry_dispose: [ internal use only ]
1288 *
1289 * Inverse of vm_map_entry_create.
1290 *
1291 * write map lock held so no need to
1292 * do anything special to insure correctness
1293 * of the stores
1294 */
1295 #define vm_map_entry_dispose(map, entry) \
1296 _vm_map_entry_dispose(&(map)->hdr, (entry))
1297
1298 #define vm_map_copy_entry_dispose(map, entry) \
1299 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1300
1301 static void
1302 _vm_map_entry_dispose(
1303 struct vm_map_header *map_header,
1304 vm_map_entry_t entry)
1305 {
1306 zone_t zone;
1307
1308 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1309 zone = vm_map_entry_zone;
1310 } else {
1311 zone = vm_map_entry_reserved_zone;
1312 }
1313
1314 if (!map_header->entries_pageable) {
1315 if (zone == vm_map_entry_zone) {
1316 OSAddAtomic(-1, &nonreserved_zalloc_count);
1317 } else {
1318 OSAddAtomic(-1, &reserved_zalloc_count);
1319 }
1320 }
1321
1322 zfree(zone, entry);
1323 }
1324
1325 #if MACH_ASSERT
1326 static boolean_t first_free_check = FALSE;
1327 boolean_t
1328 first_free_is_valid(
1329 vm_map_t map)
1330 {
1331 if (!first_free_check) {
1332 return TRUE;
1333 }
1334
1335 return first_free_is_valid_store( map );
1336 }
1337 #endif /* MACH_ASSERT */
1338
1339
1340 #define vm_map_copy_entry_link(copy, after_where, entry) \
1341 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1342
1343 #define vm_map_copy_entry_unlink(copy, entry) \
1344 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1345
1346 #if MACH_ASSERT && TASK_SWAPPER
1347 /*
1348 * vm_map_res_reference:
1349 *
1350 * Adds another valid residence count to the given map.
1351 *
1352 * Map is locked so this function can be called from
1353 * vm_map_swapin.
1354 *
1355 */
1356 void
1357 vm_map_res_reference(vm_map_t map)
1358 {
1359 /* assert map is locked */
1360 assert(map->res_count >= 0);
1361 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1362 if (map->res_count == 0) {
1363 lck_mtx_unlock(&map->s_lock);
1364 vm_map_lock(map);
1365 vm_map_swapin(map);
1366 lck_mtx_lock(&map->s_lock);
1367 ++map->res_count;
1368 vm_map_unlock(map);
1369 } else {
1370 ++map->res_count;
1371 }
1372 }
1373
1374 /*
1375 * vm_map_reference_swap:
1376 *
1377 * Adds valid reference and residence counts to the given map.
1378 *
1379 * The map may not be in memory (i.e. zero residence count).
1380 *
1381 */
1382 void
1383 vm_map_reference_swap(vm_map_t map)
1384 {
1385 assert(map != VM_MAP_NULL);
1386 lck_mtx_lock(&map->s_lock);
1387 assert(map->res_count >= 0);
1388 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1389 os_ref_retain_locked(&map->map_refcnt);
1390 vm_map_res_reference(map);
1391 lck_mtx_unlock(&map->s_lock);
1392 }
1393
1394 /*
1395 * vm_map_res_deallocate:
1396 *
1397 * Decrement residence count on a map; possibly causing swapout.
1398 *
1399 * The map must be in memory (i.e. non-zero residence count).
1400 *
1401 * The map is locked, so this function is callable from vm_map_deallocate.
1402 *
1403 */
1404 void
1405 vm_map_res_deallocate(vm_map_t map)
1406 {
1407 assert(map->res_count > 0);
1408 if (--map->res_count == 0) {
1409 lck_mtx_unlock(&map->s_lock);
1410 vm_map_lock(map);
1411 vm_map_swapout(map);
1412 vm_map_unlock(map);
1413 lck_mtx_lock(&map->s_lock);
1414 }
1415 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1416 }
1417 #endif /* MACH_ASSERT && TASK_SWAPPER */
1418
1419 /*
1420 * vm_map_destroy:
1421 *
1422 * Actually destroy a map.
1423 */
1424 void
1425 vm_map_destroy(
1426 vm_map_t map,
1427 int flags)
1428 {
1429 vm_map_lock(map);
1430
1431 /* final cleanup: no need to unnest shared region */
1432 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1433 /* final cleanup: ok to remove immutable mappings */
1434 flags |= VM_MAP_REMOVE_IMMUTABLE;
1435 /* final cleanup: allow gaps in range */
1436 flags |= VM_MAP_REMOVE_GAPS_OK;
1437
1438 /* clean up regular map entries */
1439 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1440 flags, VM_MAP_NULL);
1441 /* clean up leftover special mappings (commpage, etc...) */
1442 #if !defined(__arm__) && !defined(__arm64__)
1443 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1444 flags, VM_MAP_NULL);
1445 #endif /* !__arm__ && !__arm64__ */
1446
1447 vm_map_disable_hole_optimization(map);
1448 vm_map_corpse_footprint_destroy(map);
1449
1450 vm_map_unlock(map);
1451
1452 assert(map->hdr.nentries == 0);
1453
1454 if (map->pmap) {
1455 pmap_destroy(map->pmap);
1456 }
1457
1458 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1459 /*
1460 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1461 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1462 * structure or kalloc'ed via lck_mtx_init.
1463 * An example is s_lock_ext within struct _vm_map.
1464 *
1465 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1466 * can add another tag to detect embedded vs alloc'ed indirect external
1467 * mutexes but that'll be additional checks in the lock path and require
1468 * updating dependencies for the old vs new tag.
1469 *
1470 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1471 * just when lock debugging is ON, we choose to forego explicitly destroying
1472 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1473 * count on vm_map_lck_grp, which has no serious side-effect.
1474 */
1475 } else {
1476 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1477 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1478 }
1479
1480 zfree(vm_map_zone, map);
1481 }
1482
1483 /*
1484 * Returns pid of the task with the largest number of VM map entries.
1485 * Used in the zone-map-exhaustion jetsam path.
1486 */
1487 pid_t
1488 find_largest_process_vm_map_entries(void)
1489 {
1490 pid_t victim_pid = -1;
1491 int max_vm_map_entries = 0;
1492 task_t task = TASK_NULL;
1493 queue_head_t *task_list = &tasks;
1494
1495 lck_mtx_lock(&tasks_threads_lock);
1496 queue_iterate(task_list, task, task_t, tasks) {
1497 if (task == kernel_task || !task->active) {
1498 continue;
1499 }
1500
1501 vm_map_t task_map = task->map;
1502 if (task_map != VM_MAP_NULL) {
1503 int task_vm_map_entries = task_map->hdr.nentries;
1504 if (task_vm_map_entries > max_vm_map_entries) {
1505 max_vm_map_entries = task_vm_map_entries;
1506 victim_pid = pid_from_task(task);
1507 }
1508 }
1509 }
1510 lck_mtx_unlock(&tasks_threads_lock);
1511
1512 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1513 return victim_pid;
1514 }
1515
1516 #if TASK_SWAPPER
1517 /*
1518 * vm_map_swapin/vm_map_swapout
1519 *
1520 * Swap a map in and out, either referencing or releasing its resources.
1521 * These functions are internal use only; however, they must be exported
1522 * because they may be called from macros, which are exported.
1523 *
1524 * In the case of swapout, there could be races on the residence count,
1525 * so if the residence count is up, we return, assuming that a
1526 * vm_map_deallocate() call in the near future will bring us back.
1527 *
1528 * Locking:
1529 * -- We use the map write lock for synchronization among races.
1530 * -- The map write lock, and not the simple s_lock, protects the
1531 * swap state of the map.
1532 * -- If a map entry is a share map, then we hold both locks, in
1533 * hierarchical order.
1534 *
1535 * Synchronization Notes:
1536 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1537 * will block on the map lock and proceed when swapout is through.
1538 * 2) A vm_map_reference() call at this time is illegal, and will
1539 * cause a panic. vm_map_reference() is only allowed on resident
1540 * maps, since it refuses to block.
1541 * 3) A vm_map_swapin() call during a swapin will block, and
1542 * proceeed when the first swapin is done, turning into a nop.
1543 * This is the reason the res_count is not incremented until
1544 * after the swapin is complete.
1545 * 4) There is a timing hole after the checks of the res_count, before
1546 * the map lock is taken, during which a swapin may get the lock
1547 * before a swapout about to happen. If this happens, the swapin
1548 * will detect the state and increment the reference count, causing
1549 * the swapout to be a nop, thereby delaying it until a later
1550 * vm_map_deallocate. If the swapout gets the lock first, then
1551 * the swapin will simply block until the swapout is done, and
1552 * then proceed.
1553 *
1554 * Because vm_map_swapin() is potentially an expensive operation, it
1555 * should be used with caution.
1556 *
1557 * Invariants:
1558 * 1) A map with a residence count of zero is either swapped, or
1559 * being swapped.
1560 * 2) A map with a non-zero residence count is either resident,
1561 * or being swapped in.
1562 */
1563
1564 int vm_map_swap_enable = 1;
1565
1566 void
1567 vm_map_swapin(vm_map_t map)
1568 {
1569 vm_map_entry_t entry;
1570
1571 if (!vm_map_swap_enable) { /* debug */
1572 return;
1573 }
1574
1575 /*
1576 * Map is locked
1577 * First deal with various races.
1578 */
1579 if (map->sw_state == MAP_SW_IN) {
1580 /*
1581 * we raced with swapout and won. Returning will incr.
1582 * the res_count, turning the swapout into a nop.
1583 */
1584 return;
1585 }
1586
1587 /*
1588 * The residence count must be zero. If we raced with another
1589 * swapin, the state would have been IN; if we raced with a
1590 * swapout (after another competing swapin), we must have lost
1591 * the race to get here (see above comment), in which case
1592 * res_count is still 0.
1593 */
1594 assert(map->res_count == 0);
1595
1596 /*
1597 * There are no intermediate states of a map going out or
1598 * coming in, since the map is locked during the transition.
1599 */
1600 assert(map->sw_state == MAP_SW_OUT);
1601
1602 /*
1603 * We now operate upon each map entry. If the entry is a sub-
1604 * or share-map, we call vm_map_res_reference upon it.
1605 * If the entry is an object, we call vm_object_res_reference
1606 * (this may iterate through the shadow chain).
1607 * Note that we hold the map locked the entire time,
1608 * even if we get back here via a recursive call in
1609 * vm_map_res_reference.
1610 */
1611 entry = vm_map_first_entry(map);
1612
1613 while (entry != vm_map_to_entry(map)) {
1614 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1615 if (entry->is_sub_map) {
1616 vm_map_t lmap = VME_SUBMAP(entry);
1617 lck_mtx_lock(&lmap->s_lock);
1618 vm_map_res_reference(lmap);
1619 lck_mtx_unlock(&lmap->s_lock);
1620 } else {
1621 vm_object_t object = VME_OBEJCT(entry);
1622 vm_object_lock(object);
1623 /*
1624 * This call may iterate through the
1625 * shadow chain.
1626 */
1627 vm_object_res_reference(object);
1628 vm_object_unlock(object);
1629 }
1630 }
1631 entry = entry->vme_next;
1632 }
1633 assert(map->sw_state == MAP_SW_OUT);
1634 map->sw_state = MAP_SW_IN;
1635 }
1636
1637 void
1638 vm_map_swapout(vm_map_t map)
1639 {
1640 vm_map_entry_t entry;
1641
1642 /*
1643 * Map is locked
1644 * First deal with various races.
1645 * If we raced with a swapin and lost, the residence count
1646 * will have been incremented to 1, and we simply return.
1647 */
1648 lck_mtx_lock(&map->s_lock);
1649 if (map->res_count != 0) {
1650 lck_mtx_unlock(&map->s_lock);
1651 return;
1652 }
1653 lck_mtx_unlock(&map->s_lock);
1654
1655 /*
1656 * There are no intermediate states of a map going out or
1657 * coming in, since the map is locked during the transition.
1658 */
1659 assert(map->sw_state == MAP_SW_IN);
1660
1661 if (!vm_map_swap_enable) {
1662 return;
1663 }
1664
1665 /*
1666 * We now operate upon each map entry. If the entry is a sub-
1667 * or share-map, we call vm_map_res_deallocate upon it.
1668 * If the entry is an object, we call vm_object_res_deallocate
1669 * (this may iterate through the shadow chain).
1670 * Note that we hold the map locked the entire time,
1671 * even if we get back here via a recursive call in
1672 * vm_map_res_deallocate.
1673 */
1674 entry = vm_map_first_entry(map);
1675
1676 while (entry != vm_map_to_entry(map)) {
1677 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1678 if (entry->is_sub_map) {
1679 vm_map_t lmap = VME_SUBMAP(entry);
1680 lck_mtx_lock(&lmap->s_lock);
1681 vm_map_res_deallocate(lmap);
1682 lck_mtx_unlock(&lmap->s_lock);
1683 } else {
1684 vm_object_t object = VME_OBJECT(entry);
1685 vm_object_lock(object);
1686 /*
1687 * This call may take a long time,
1688 * since it could actively push
1689 * out pages (if we implement it
1690 * that way).
1691 */
1692 vm_object_res_deallocate(object);
1693 vm_object_unlock(object);
1694 }
1695 }
1696 entry = entry->vme_next;
1697 }
1698 assert(map->sw_state == MAP_SW_IN);
1699 map->sw_state = MAP_SW_OUT;
1700 }
1701
1702 #endif /* TASK_SWAPPER */
1703
1704 /*
1705 * vm_map_lookup_entry: [ internal use only ]
1706 *
1707 * Calls into the vm map store layer to find the map
1708 * entry containing (or immediately preceding) the
1709 * specified address in the given map; the entry is returned
1710 * in the "entry" parameter. The boolean
1711 * result indicates whether the address is
1712 * actually contained in the map.
1713 */
1714 boolean_t
1715 vm_map_lookup_entry(
1716 vm_map_t map,
1717 vm_map_offset_t address,
1718 vm_map_entry_t *entry) /* OUT */
1719 {
1720 return vm_map_store_lookup_entry( map, address, entry );
1721 }
1722
1723 /*
1724 * Routine: vm_map_find_space
1725 * Purpose:
1726 * Allocate a range in the specified virtual address map,
1727 * returning the entry allocated for that range.
1728 * Used by kmem_alloc, etc.
1729 *
1730 * The map must be NOT be locked. It will be returned locked
1731 * on KERN_SUCCESS, unlocked on failure.
1732 *
1733 * If an entry is allocated, the object/offset fields
1734 * are initialized to zero.
1735 */
1736 kern_return_t
1737 vm_map_find_space(
1738 vm_map_t map,
1739 vm_map_offset_t *address, /* OUT */
1740 vm_map_size_t size,
1741 vm_map_offset_t mask,
1742 int flags __unused,
1743 vm_map_kernel_flags_t vmk_flags,
1744 vm_tag_t tag,
1745 vm_map_entry_t *o_entry) /* OUT */
1746 {
1747 vm_map_entry_t entry, new_entry;
1748 vm_map_offset_t start;
1749 vm_map_offset_t end;
1750 vm_map_entry_t hole_entry;
1751
1752 if (size == 0) {
1753 *address = 0;
1754 return KERN_INVALID_ARGUMENT;
1755 }
1756
1757 if (vmk_flags.vmkf_guard_after) {
1758 /* account for the back guard page in the size */
1759 size += VM_MAP_PAGE_SIZE(map);
1760 }
1761
1762 new_entry = vm_map_entry_create(map, FALSE);
1763
1764 /*
1765 * Look for the first possible address; if there's already
1766 * something at this address, we have to start after it.
1767 */
1768
1769 vm_map_lock(map);
1770
1771 if (map->disable_vmentry_reuse == TRUE) {
1772 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1773 } else {
1774 if (map->holelistenabled) {
1775 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1776
1777 if (hole_entry == NULL) {
1778 /*
1779 * No more space in the map?
1780 */
1781 vm_map_entry_dispose(map, new_entry);
1782 vm_map_unlock(map);
1783 return KERN_NO_SPACE;
1784 }
1785
1786 entry = hole_entry;
1787 start = entry->vme_start;
1788 } else {
1789 assert(first_free_is_valid(map));
1790 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1791 start = map->min_offset;
1792 } else {
1793 start = entry->vme_end;
1794 }
1795 }
1796 }
1797
1798 /*
1799 * In any case, the "entry" always precedes
1800 * the proposed new region throughout the loop:
1801 */
1802
1803 while (TRUE) {
1804 vm_map_entry_t next;
1805
1806 /*
1807 * Find the end of the proposed new region.
1808 * Be sure we didn't go beyond the end, or
1809 * wrap around the address.
1810 */
1811
1812 if (vmk_flags.vmkf_guard_before) {
1813 /* reserve space for the front guard page */
1814 start += VM_MAP_PAGE_SIZE(map);
1815 }
1816 end = ((start + mask) & ~mask);
1817
1818 if (end < start) {
1819 vm_map_entry_dispose(map, new_entry);
1820 vm_map_unlock(map);
1821 return KERN_NO_SPACE;
1822 }
1823 start = end;
1824 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1825 end += size;
1826 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1827
1828 if ((end > map->max_offset) || (end < start)) {
1829 vm_map_entry_dispose(map, new_entry);
1830 vm_map_unlock(map);
1831 return KERN_NO_SPACE;
1832 }
1833
1834 next = entry->vme_next;
1835
1836 if (map->holelistenabled) {
1837 if (entry->vme_end >= end) {
1838 break;
1839 }
1840 } else {
1841 /*
1842 * If there are no more entries, we must win.
1843 *
1844 * OR
1845 *
1846 * If there is another entry, it must be
1847 * after the end of the potential new region.
1848 */
1849
1850 if (next == vm_map_to_entry(map)) {
1851 break;
1852 }
1853
1854 if (next->vme_start >= end) {
1855 break;
1856 }
1857 }
1858
1859 /*
1860 * Didn't fit -- move to the next entry.
1861 */
1862
1863 entry = next;
1864
1865 if (map->holelistenabled) {
1866 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1867 /*
1868 * Wrapped around
1869 */
1870 vm_map_entry_dispose(map, new_entry);
1871 vm_map_unlock(map);
1872 return KERN_NO_SPACE;
1873 }
1874 start = entry->vme_start;
1875 } else {
1876 start = entry->vme_end;
1877 }
1878 }
1879
1880 if (map->holelistenabled) {
1881 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1882 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1883 }
1884 }
1885
1886 /*
1887 * At this point,
1888 * "start" and "end" should define the endpoints of the
1889 * available new range, and
1890 * "entry" should refer to the region before the new
1891 * range, and
1892 *
1893 * the map should be locked.
1894 */
1895
1896 if (vmk_flags.vmkf_guard_before) {
1897 /* go back for the front guard page */
1898 start -= VM_MAP_PAGE_SIZE(map);
1899 }
1900 *address = start;
1901
1902 assert(start < end);
1903 new_entry->vme_start = start;
1904 new_entry->vme_end = end;
1905 assert(page_aligned(new_entry->vme_start));
1906 assert(page_aligned(new_entry->vme_end));
1907 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1908 VM_MAP_PAGE_MASK(map)));
1909 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1910 VM_MAP_PAGE_MASK(map)));
1911
1912 new_entry->is_shared = FALSE;
1913 new_entry->is_sub_map = FALSE;
1914 new_entry->use_pmap = TRUE;
1915 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1916 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1917
1918 new_entry->needs_copy = FALSE;
1919
1920 new_entry->inheritance = VM_INHERIT_DEFAULT;
1921 new_entry->protection = VM_PROT_DEFAULT;
1922 new_entry->max_protection = VM_PROT_ALL;
1923 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1924 new_entry->wired_count = 0;
1925 new_entry->user_wired_count = 0;
1926
1927 new_entry->in_transition = FALSE;
1928 new_entry->needs_wakeup = FALSE;
1929 new_entry->no_cache = FALSE;
1930 new_entry->permanent = FALSE;
1931 new_entry->superpage_size = FALSE;
1932 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1933 new_entry->map_aligned = TRUE;
1934 } else {
1935 new_entry->map_aligned = FALSE;
1936 }
1937
1938 new_entry->used_for_jit = FALSE;
1939 new_entry->pmap_cs_associated = FALSE;
1940 new_entry->zero_wired_pages = FALSE;
1941 new_entry->iokit_acct = FALSE;
1942 new_entry->vme_resilient_codesign = FALSE;
1943 new_entry->vme_resilient_media = FALSE;
1944 if (vmk_flags.vmkf_atomic_entry) {
1945 new_entry->vme_atomic = TRUE;
1946 } else {
1947 new_entry->vme_atomic = FALSE;
1948 }
1949
1950 VME_ALIAS_SET(new_entry, tag);
1951
1952 /*
1953 * Insert the new entry into the list
1954 */
1955
1956 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1957
1958 map->size += size;
1959
1960 /*
1961 * Update the lookup hint
1962 */
1963 SAVE_HINT_MAP_WRITE(map, new_entry);
1964
1965 *o_entry = new_entry;
1966 return KERN_SUCCESS;
1967 }
1968
1969 int vm_map_pmap_enter_print = FALSE;
1970 int vm_map_pmap_enter_enable = FALSE;
1971
1972 /*
1973 * Routine: vm_map_pmap_enter [internal only]
1974 *
1975 * Description:
1976 * Force pages from the specified object to be entered into
1977 * the pmap at the specified address if they are present.
1978 * As soon as a page not found in the object the scan ends.
1979 *
1980 * Returns:
1981 * Nothing.
1982 *
1983 * In/out conditions:
1984 * The source map should not be locked on entry.
1985 */
1986 __unused static void
1987 vm_map_pmap_enter(
1988 vm_map_t map,
1989 vm_map_offset_t addr,
1990 vm_map_offset_t end_addr,
1991 vm_object_t object,
1992 vm_object_offset_t offset,
1993 vm_prot_t protection)
1994 {
1995 int type_of_fault;
1996 kern_return_t kr;
1997 struct vm_object_fault_info fault_info = {};
1998
1999 if (map->pmap == 0) {
2000 return;
2001 }
2002
2003 while (addr < end_addr) {
2004 vm_page_t m;
2005
2006
2007 /*
2008 * TODO:
2009 * From vm_map_enter(), we come into this function without the map
2010 * lock held or the object lock held.
2011 * We haven't taken a reference on the object either.
2012 * We should do a proper lookup on the map to make sure
2013 * that things are sane before we go locking objects that
2014 * could have been deallocated from under us.
2015 */
2016
2017 vm_object_lock(object);
2018
2019 m = vm_page_lookup(object, offset);
2020
2021 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2022 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2023 vm_object_unlock(object);
2024 return;
2025 }
2026
2027 if (vm_map_pmap_enter_print) {
2028 printf("vm_map_pmap_enter:");
2029 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2030 map, (unsigned long long)addr, object, (unsigned long long)offset);
2031 }
2032 type_of_fault = DBG_CACHE_HIT_FAULT;
2033 kr = vm_fault_enter(m, map->pmap,
2034 addr, protection, protection,
2035 VM_PAGE_WIRED(m),
2036 FALSE, /* change_wiring */
2037 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2038 &fault_info,
2039 NULL, /* need_retry */
2040 &type_of_fault);
2041
2042 vm_object_unlock(object);
2043
2044 offset += PAGE_SIZE_64;
2045 addr += PAGE_SIZE;
2046 }
2047 }
2048
2049 boolean_t vm_map_pmap_is_empty(
2050 vm_map_t map,
2051 vm_map_offset_t start,
2052 vm_map_offset_t end);
2053 boolean_t
2054 vm_map_pmap_is_empty(
2055 vm_map_t map,
2056 vm_map_offset_t start,
2057 vm_map_offset_t end)
2058 {
2059 #ifdef MACHINE_PMAP_IS_EMPTY
2060 return pmap_is_empty(map->pmap, start, end);
2061 #else /* MACHINE_PMAP_IS_EMPTY */
2062 vm_map_offset_t offset;
2063 ppnum_t phys_page;
2064
2065 if (map->pmap == NULL) {
2066 return TRUE;
2067 }
2068
2069 for (offset = start;
2070 offset < end;
2071 offset += PAGE_SIZE) {
2072 phys_page = pmap_find_phys(map->pmap, offset);
2073 if (phys_page) {
2074 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2075 "page %d at 0x%llx\n",
2076 map, (long long)start, (long long)end,
2077 phys_page, (long long)offset);
2078 return FALSE;
2079 }
2080 }
2081 return TRUE;
2082 #endif /* MACHINE_PMAP_IS_EMPTY */
2083 }
2084
2085 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2086 kern_return_t
2087 vm_map_random_address_for_size(
2088 vm_map_t map,
2089 vm_map_offset_t *address,
2090 vm_map_size_t size)
2091 {
2092 kern_return_t kr = KERN_SUCCESS;
2093 int tries = 0;
2094 vm_map_offset_t random_addr = 0;
2095 vm_map_offset_t hole_end;
2096
2097 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2098 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2099 vm_map_size_t vm_hole_size = 0;
2100 vm_map_size_t addr_space_size;
2101
2102 addr_space_size = vm_map_max(map) - vm_map_min(map);
2103
2104 assert(page_aligned(size));
2105
2106 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2107 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2108 random_addr = vm_map_trunc_page(
2109 vm_map_min(map) + (random_addr % addr_space_size),
2110 VM_MAP_PAGE_MASK(map));
2111
2112 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2113 if (prev_entry == vm_map_to_entry(map)) {
2114 next_entry = vm_map_first_entry(map);
2115 } else {
2116 next_entry = prev_entry->vme_next;
2117 }
2118 if (next_entry == vm_map_to_entry(map)) {
2119 hole_end = vm_map_max(map);
2120 } else {
2121 hole_end = next_entry->vme_start;
2122 }
2123 vm_hole_size = hole_end - random_addr;
2124 if (vm_hole_size >= size) {
2125 *address = random_addr;
2126 break;
2127 }
2128 }
2129 tries++;
2130 }
2131
2132 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2133 kr = KERN_NO_SPACE;
2134 }
2135 return kr;
2136 }
2137
2138 static boolean_t
2139 vm_memory_malloc_no_cow(
2140 int alias)
2141 {
2142 uint64_t alias_mask;
2143
2144 if (alias > 63) {
2145 return FALSE;
2146 }
2147
2148 alias_mask = 1ULL << alias;
2149 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2150 return TRUE;
2151 }
2152 return FALSE;
2153 }
2154
2155 /*
2156 * Routine: vm_map_enter
2157 *
2158 * Description:
2159 * Allocate a range in the specified virtual address map.
2160 * The resulting range will refer to memory defined by
2161 * the given memory object and offset into that object.
2162 *
2163 * Arguments are as defined in the vm_map call.
2164 */
2165 int _map_enter_debug = 0;
2166 static unsigned int vm_map_enter_restore_successes = 0;
2167 static unsigned int vm_map_enter_restore_failures = 0;
2168 kern_return_t
2169 vm_map_enter(
2170 vm_map_t map,
2171 vm_map_offset_t *address, /* IN/OUT */
2172 vm_map_size_t size,
2173 vm_map_offset_t mask,
2174 int flags,
2175 vm_map_kernel_flags_t vmk_flags,
2176 vm_tag_t alias,
2177 vm_object_t object,
2178 vm_object_offset_t offset,
2179 boolean_t needs_copy,
2180 vm_prot_t cur_protection,
2181 vm_prot_t max_protection,
2182 vm_inherit_t inheritance)
2183 {
2184 vm_map_entry_t entry, new_entry;
2185 vm_map_offset_t start, tmp_start, tmp_offset;
2186 vm_map_offset_t end, tmp_end;
2187 vm_map_offset_t tmp2_start, tmp2_end;
2188 vm_map_offset_t desired_empty_end;
2189 vm_map_offset_t step;
2190 kern_return_t result = KERN_SUCCESS;
2191 vm_map_t zap_old_map = VM_MAP_NULL;
2192 vm_map_t zap_new_map = VM_MAP_NULL;
2193 boolean_t map_locked = FALSE;
2194 boolean_t pmap_empty = TRUE;
2195 boolean_t new_mapping_established = FALSE;
2196 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2197 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2198 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2199 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2200 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2201 boolean_t is_submap = vmk_flags.vmkf_submap;
2202 boolean_t permanent = vmk_flags.vmkf_permanent;
2203 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2204 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2205 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2206 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2207 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2208 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2209 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2210 vm_tag_t user_alias;
2211 vm_map_offset_t effective_min_offset, effective_max_offset;
2212 kern_return_t kr;
2213 boolean_t clear_map_aligned = FALSE;
2214 vm_map_entry_t hole_entry;
2215 vm_map_size_t chunk_size = 0;
2216
2217 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2218
2219 if (flags & VM_FLAGS_4GB_CHUNK) {
2220 #if defined(__LP64__)
2221 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2222 #else /* __LP64__ */
2223 chunk_size = ANON_CHUNK_SIZE;
2224 #endif /* __LP64__ */
2225 } else {
2226 chunk_size = ANON_CHUNK_SIZE;
2227 }
2228
2229 if (superpage_size) {
2230 switch (superpage_size) {
2231 /*
2232 * Note that the current implementation only supports
2233 * a single size for superpages, SUPERPAGE_SIZE, per
2234 * architecture. As soon as more sizes are supposed
2235 * to be supported, SUPERPAGE_SIZE has to be replaced
2236 * with a lookup of the size depending on superpage_size.
2237 */
2238 #ifdef __x86_64__
2239 case SUPERPAGE_SIZE_ANY:
2240 /* handle it like 2 MB and round up to page size */
2241 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2242 case SUPERPAGE_SIZE_2MB:
2243 break;
2244 #endif
2245 default:
2246 return KERN_INVALID_ARGUMENT;
2247 }
2248 mask = SUPERPAGE_SIZE - 1;
2249 if (size & (SUPERPAGE_SIZE - 1)) {
2250 return KERN_INVALID_ARGUMENT;
2251 }
2252 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2253 }
2254
2255
2256 if ((cur_protection & VM_PROT_WRITE) &&
2257 (cur_protection & VM_PROT_EXECUTE) &&
2258 #if !CONFIG_EMBEDDED
2259 map != kernel_map &&
2260 (cs_process_global_enforcement() ||
2261 (vmk_flags.vmkf_cs_enforcement_override
2262 ? vmk_flags.vmkf_cs_enforcement
2263 : cs_process_enforcement(NULL))) &&
2264 #endif /* !CONFIG_EMBEDDED */
2265 !entry_for_jit) {
2266 DTRACE_VM3(cs_wx,
2267 uint64_t, 0,
2268 uint64_t, 0,
2269 vm_prot_t, cur_protection);
2270 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2271 #if VM_PROTECT_WX_FAIL
2272 "failing\n",
2273 #else /* VM_PROTECT_WX_FAIL */
2274 "turning off execute\n",
2275 #endif /* VM_PROTECT_WX_FAIL */
2276 proc_selfpid(),
2277 (current_task()->bsd_info
2278 ? proc_name_address(current_task()->bsd_info)
2279 : "?"),
2280 __FUNCTION__);
2281 cur_protection &= ~VM_PROT_EXECUTE;
2282 #if VM_PROTECT_WX_FAIL
2283 return KERN_PROTECTION_FAILURE;
2284 #endif /* VM_PROTECT_WX_FAIL */
2285 }
2286
2287 /*
2288 * If the task has requested executable lockdown,
2289 * deny any new executable mapping.
2290 */
2291 if (map->map_disallow_new_exec == TRUE) {
2292 if (cur_protection & VM_PROT_EXECUTE) {
2293 return KERN_PROTECTION_FAILURE;
2294 }
2295 }
2296
2297 if (resilient_codesign) {
2298 assert(!is_submap);
2299 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2300 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2301 return KERN_PROTECTION_FAILURE;
2302 }
2303 }
2304
2305 if (resilient_media) {
2306 assert(!is_submap);
2307 // assert(!needs_copy);
2308 if (object != VM_OBJECT_NULL &&
2309 !object->internal) {
2310 /*
2311 * This mapping is directly backed by an external
2312 * memory manager (e.g. a vnode pager for a file):
2313 * we would not have any safe place to inject
2314 * a zero-filled page if an actual page is not
2315 * available, without possibly impacting the actual
2316 * contents of the mapped object (e.g. the file),
2317 * so we can't provide any media resiliency here.
2318 */
2319 return KERN_INVALID_ARGUMENT;
2320 }
2321 }
2322
2323 if (is_submap) {
2324 if (purgable) {
2325 /* submaps can not be purgeable */
2326 return KERN_INVALID_ARGUMENT;
2327 }
2328 if (object == VM_OBJECT_NULL) {
2329 /* submaps can not be created lazily */
2330 return KERN_INVALID_ARGUMENT;
2331 }
2332 }
2333 if (vmk_flags.vmkf_already) {
2334 /*
2335 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2336 * is already present. For it to be meaningul, the requested
2337 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2338 * we shouldn't try and remove what was mapped there first
2339 * (!VM_FLAGS_OVERWRITE).
2340 */
2341 if ((flags & VM_FLAGS_ANYWHERE) ||
2342 (flags & VM_FLAGS_OVERWRITE)) {
2343 return KERN_INVALID_ARGUMENT;
2344 }
2345 }
2346
2347 effective_min_offset = map->min_offset;
2348
2349 if (vmk_flags.vmkf_beyond_max) {
2350 /*
2351 * Allow an insertion beyond the map's max offset.
2352 */
2353 #if !defined(__arm__) && !defined(__arm64__)
2354 if (vm_map_is_64bit(map)) {
2355 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2356 } else
2357 #endif /* __arm__ */
2358 effective_max_offset = 0x00000000FFFFF000ULL;
2359 } else {
2360 #if !defined(CONFIG_EMBEDDED)
2361 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2362 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2363 } else {
2364 effective_max_offset = map->max_offset;
2365 }
2366 #else
2367 effective_max_offset = map->max_offset;
2368 #endif
2369 }
2370
2371 if (size == 0 ||
2372 (offset & PAGE_MASK_64) != 0) {
2373 *address = 0;
2374 return KERN_INVALID_ARGUMENT;
2375 }
2376
2377 if (map->pmap == kernel_pmap) {
2378 user_alias = VM_KERN_MEMORY_NONE;
2379 } else {
2380 user_alias = alias;
2381 }
2382
2383 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2384 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2385 }
2386
2387 #define RETURN(value) { result = value; goto BailOut; }
2388
2389 assert(page_aligned(*address));
2390 assert(page_aligned(size));
2391
2392 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2393 /*
2394 * In most cases, the caller rounds the size up to the
2395 * map's page size.
2396 * If we get a size that is explicitly not map-aligned here,
2397 * we'll have to respect the caller's wish and mark the
2398 * mapping as "not map-aligned" to avoid tripping the
2399 * map alignment checks later.
2400 */
2401 clear_map_aligned = TRUE;
2402 }
2403 if (!anywhere &&
2404 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2405 /*
2406 * We've been asked to map at a fixed address and that
2407 * address is not aligned to the map's specific alignment.
2408 * The caller should know what it's doing (i.e. most likely
2409 * mapping some fragmented copy map, transferring memory from
2410 * a VM map with a different alignment), so clear map_aligned
2411 * for this new VM map entry and proceed.
2412 */
2413 clear_map_aligned = TRUE;
2414 }
2415
2416 /*
2417 * Only zero-fill objects are allowed to be purgable.
2418 * LP64todo - limit purgable objects to 32-bits for now
2419 */
2420 if (purgable &&
2421 (offset != 0 ||
2422 (object != VM_OBJECT_NULL &&
2423 (object->vo_size != size ||
2424 object->purgable == VM_PURGABLE_DENY))
2425 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2426 return KERN_INVALID_ARGUMENT;
2427 }
2428
2429 if (!anywhere && overwrite) {
2430 /*
2431 * Create a temporary VM map to hold the old mappings in the
2432 * affected area while we create the new one.
2433 * This avoids releasing the VM map lock in
2434 * vm_map_entry_delete() and allows atomicity
2435 * when we want to replace some mappings with a new one.
2436 * It also allows us to restore the old VM mappings if the
2437 * new mapping fails.
2438 */
2439 zap_old_map = vm_map_create(PMAP_NULL,
2440 *address,
2441 *address + size,
2442 map->hdr.entries_pageable);
2443 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2444 vm_map_disable_hole_optimization(zap_old_map);
2445 }
2446
2447 StartAgain:;
2448
2449 start = *address;
2450
2451 if (anywhere) {
2452 vm_map_lock(map);
2453 map_locked = TRUE;
2454
2455 if (entry_for_jit) {
2456 #if CONFIG_EMBEDDED
2457 if (map->jit_entry_exists) {
2458 result = KERN_INVALID_ARGUMENT;
2459 goto BailOut;
2460 }
2461 random_address = TRUE;
2462 #endif /* CONFIG_EMBEDDED */
2463 }
2464
2465 if (random_address) {
2466 /*
2467 * Get a random start address.
2468 */
2469 result = vm_map_random_address_for_size(map, address, size);
2470 if (result != KERN_SUCCESS) {
2471 goto BailOut;
2472 }
2473 start = *address;
2474 }
2475 #if !CONFIG_EMBEDDED
2476 else if ((start == 0 || start == vm_map_min(map)) &&
2477 !map->disable_vmentry_reuse &&
2478 map->vmmap_high_start != 0) {
2479 start = map->vmmap_high_start;
2480 }
2481 #endif
2482
2483
2484 /*
2485 * Calculate the first possible address.
2486 */
2487
2488 if (start < effective_min_offset) {
2489 start = effective_min_offset;
2490 }
2491 if (start > effective_max_offset) {
2492 RETURN(KERN_NO_SPACE);
2493 }
2494
2495 /*
2496 * Look for the first possible address;
2497 * if there's already something at this
2498 * address, we have to start after it.
2499 */
2500
2501 if (map->disable_vmentry_reuse == TRUE) {
2502 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2503 } else {
2504 if (map->holelistenabled) {
2505 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2506
2507 if (hole_entry == NULL) {
2508 /*
2509 * No more space in the map?
2510 */
2511 result = KERN_NO_SPACE;
2512 goto BailOut;
2513 } else {
2514 boolean_t found_hole = FALSE;
2515
2516 do {
2517 if (hole_entry->vme_start >= start) {
2518 start = hole_entry->vme_start;
2519 found_hole = TRUE;
2520 break;
2521 }
2522
2523 if (hole_entry->vme_end > start) {
2524 found_hole = TRUE;
2525 break;
2526 }
2527 hole_entry = hole_entry->vme_next;
2528 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2529
2530 if (found_hole == FALSE) {
2531 result = KERN_NO_SPACE;
2532 goto BailOut;
2533 }
2534
2535 entry = hole_entry;
2536
2537 if (start == 0) {
2538 start += PAGE_SIZE_64;
2539 }
2540 }
2541 } else {
2542 assert(first_free_is_valid(map));
2543
2544 entry = map->first_free;
2545
2546 if (entry == vm_map_to_entry(map)) {
2547 entry = NULL;
2548 } else {
2549 if (entry->vme_next == vm_map_to_entry(map)) {
2550 /*
2551 * Hole at the end of the map.
2552 */
2553 entry = NULL;
2554 } else {
2555 if (start < (entry->vme_next)->vme_start) {
2556 start = entry->vme_end;
2557 start = vm_map_round_page(start,
2558 VM_MAP_PAGE_MASK(map));
2559 } else {
2560 /*
2561 * Need to do a lookup.
2562 */
2563 entry = NULL;
2564 }
2565 }
2566 }
2567
2568 if (entry == NULL) {
2569 vm_map_entry_t tmp_entry;
2570 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2571 assert(!entry_for_jit);
2572 start = tmp_entry->vme_end;
2573 start = vm_map_round_page(start,
2574 VM_MAP_PAGE_MASK(map));
2575 }
2576 entry = tmp_entry;
2577 }
2578 }
2579 }
2580
2581 /*
2582 * In any case, the "entry" always precedes
2583 * the proposed new region throughout the
2584 * loop:
2585 */
2586
2587 while (TRUE) {
2588 vm_map_entry_t next;
2589
2590 /*
2591 * Find the end of the proposed new region.
2592 * Be sure we didn't go beyond the end, or
2593 * wrap around the address.
2594 */
2595
2596 end = ((start + mask) & ~mask);
2597 end = vm_map_round_page(end,
2598 VM_MAP_PAGE_MASK(map));
2599 if (end < start) {
2600 RETURN(KERN_NO_SPACE);
2601 }
2602 start = end;
2603 assert(VM_MAP_PAGE_ALIGNED(start,
2604 VM_MAP_PAGE_MASK(map)));
2605 end += size;
2606
2607 /* We want an entire page of empty space, but don't increase the allocation size. */
2608 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2609
2610 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2611 if (map->wait_for_space) {
2612 assert(!keep_map_locked);
2613 if (size <= (effective_max_offset -
2614 effective_min_offset)) {
2615 assert_wait((event_t)map,
2616 THREAD_ABORTSAFE);
2617 vm_map_unlock(map);
2618 map_locked = FALSE;
2619 thread_block(THREAD_CONTINUE_NULL);
2620 goto StartAgain;
2621 }
2622 }
2623 RETURN(KERN_NO_SPACE);
2624 }
2625
2626 next = entry->vme_next;
2627
2628 if (map->holelistenabled) {
2629 if (entry->vme_end >= desired_empty_end) {
2630 break;
2631 }
2632 } else {
2633 /*
2634 * If there are no more entries, we must win.
2635 *
2636 * OR
2637 *
2638 * If there is another entry, it must be
2639 * after the end of the potential new region.
2640 */
2641
2642 if (next == vm_map_to_entry(map)) {
2643 break;
2644 }
2645
2646 if (next->vme_start >= desired_empty_end) {
2647 break;
2648 }
2649 }
2650
2651 /*
2652 * Didn't fit -- move to the next entry.
2653 */
2654
2655 entry = next;
2656
2657 if (map->holelistenabled) {
2658 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2659 /*
2660 * Wrapped around
2661 */
2662 result = KERN_NO_SPACE;
2663 goto BailOut;
2664 }
2665 start = entry->vme_start;
2666 } else {
2667 start = entry->vme_end;
2668 }
2669
2670 start = vm_map_round_page(start,
2671 VM_MAP_PAGE_MASK(map));
2672 }
2673
2674 if (map->holelistenabled) {
2675 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2676 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2677 }
2678 }
2679
2680 *address = start;
2681 assert(VM_MAP_PAGE_ALIGNED(*address,
2682 VM_MAP_PAGE_MASK(map)));
2683 } else {
2684 /*
2685 * Verify that:
2686 * the address doesn't itself violate
2687 * the mask requirement.
2688 */
2689
2690 vm_map_lock(map);
2691 map_locked = TRUE;
2692 if ((start & mask) != 0) {
2693 RETURN(KERN_NO_SPACE);
2694 }
2695
2696 /*
2697 * ... the address is within bounds
2698 */
2699
2700 end = start + size;
2701
2702 if ((start < effective_min_offset) ||
2703 (end > effective_max_offset) ||
2704 (start >= end)) {
2705 RETURN(KERN_INVALID_ADDRESS);
2706 }
2707
2708 if (overwrite && zap_old_map != VM_MAP_NULL) {
2709 int remove_flags;
2710 /*
2711 * Fixed mapping and "overwrite" flag: attempt to
2712 * remove all existing mappings in the specified
2713 * address range, saving them in our "zap_old_map".
2714 */
2715 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2716 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2717 if (vmk_flags.vmkf_overwrite_immutable) {
2718 /* we can overwrite immutable mappings */
2719 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2720 }
2721 (void) vm_map_delete(map, start, end,
2722 remove_flags,
2723 zap_old_map);
2724 }
2725
2726 /*
2727 * ... the starting address isn't allocated
2728 */
2729
2730 if (vm_map_lookup_entry(map, start, &entry)) {
2731 if (!(vmk_flags.vmkf_already)) {
2732 RETURN(KERN_NO_SPACE);
2733 }
2734 /*
2735 * Check if what's already there is what we want.
2736 */
2737 tmp_start = start;
2738 tmp_offset = offset;
2739 if (entry->vme_start < start) {
2740 tmp_start -= start - entry->vme_start;
2741 tmp_offset -= start - entry->vme_start;
2742 }
2743 for (; entry->vme_start < end;
2744 entry = entry->vme_next) {
2745 /*
2746 * Check if the mapping's attributes
2747 * match the existing map entry.
2748 */
2749 if (entry == vm_map_to_entry(map) ||
2750 entry->vme_start != tmp_start ||
2751 entry->is_sub_map != is_submap ||
2752 VME_OFFSET(entry) != tmp_offset ||
2753 entry->needs_copy != needs_copy ||
2754 entry->protection != cur_protection ||
2755 entry->max_protection != max_protection ||
2756 entry->inheritance != inheritance ||
2757 entry->iokit_acct != iokit_acct ||
2758 VME_ALIAS(entry) != alias) {
2759 /* not the same mapping ! */
2760 RETURN(KERN_NO_SPACE);
2761 }
2762 /*
2763 * Check if the same object is being mapped.
2764 */
2765 if (is_submap) {
2766 if (VME_SUBMAP(entry) !=
2767 (vm_map_t) object) {
2768 /* not the same submap */
2769 RETURN(KERN_NO_SPACE);
2770 }
2771 } else {
2772 if (VME_OBJECT(entry) != object) {
2773 /* not the same VM object... */
2774 vm_object_t obj2;
2775
2776 obj2 = VME_OBJECT(entry);
2777 if ((obj2 == VM_OBJECT_NULL ||
2778 obj2->internal) &&
2779 (object == VM_OBJECT_NULL ||
2780 object->internal)) {
2781 /*
2782 * ... but both are
2783 * anonymous memory,
2784 * so equivalent.
2785 */
2786 } else {
2787 RETURN(KERN_NO_SPACE);
2788 }
2789 }
2790 }
2791
2792 tmp_offset += entry->vme_end - entry->vme_start;
2793 tmp_start += entry->vme_end - entry->vme_start;
2794 if (entry->vme_end >= end) {
2795 /* reached the end of our mapping */
2796 break;
2797 }
2798 }
2799 /* it all matches: let's use what's already there ! */
2800 RETURN(KERN_MEMORY_PRESENT);
2801 }
2802
2803 /*
2804 * ... the next region doesn't overlap the
2805 * end point.
2806 */
2807
2808 if ((entry->vme_next != vm_map_to_entry(map)) &&
2809 (entry->vme_next->vme_start < end)) {
2810 RETURN(KERN_NO_SPACE);
2811 }
2812 }
2813
2814 /*
2815 * At this point,
2816 * "start" and "end" should define the endpoints of the
2817 * available new range, and
2818 * "entry" should refer to the region before the new
2819 * range, and
2820 *
2821 * the map should be locked.
2822 */
2823
2824 /*
2825 * See whether we can avoid creating a new entry (and object) by
2826 * extending one of our neighbors. [So far, we only attempt to
2827 * extend from below.] Note that we can never extend/join
2828 * purgable objects because they need to remain distinct
2829 * entities in order to implement their "volatile object"
2830 * semantics.
2831 */
2832
2833 if (purgable ||
2834 entry_for_jit ||
2835 vm_memory_malloc_no_cow(user_alias)) {
2836 if (object == VM_OBJECT_NULL) {
2837 object = vm_object_allocate(size);
2838 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2839 object->true_share = FALSE;
2840 if (purgable) {
2841 task_t owner;
2842 object->purgable = VM_PURGABLE_NONVOLATILE;
2843 if (map->pmap == kernel_pmap) {
2844 /*
2845 * Purgeable mappings made in a kernel
2846 * map are "owned" by the kernel itself
2847 * rather than the current user task
2848 * because they're likely to be used by
2849 * more than this user task (see
2850 * execargs_purgeable_allocate(), for
2851 * example).
2852 */
2853 owner = kernel_task;
2854 } else {
2855 owner = current_task();
2856 }
2857 assert(object->vo_owner == NULL);
2858 assert(object->resident_page_count == 0);
2859 assert(object->wired_page_count == 0);
2860 vm_object_lock(object);
2861 vm_purgeable_nonvolatile_enqueue(object, owner);
2862 vm_object_unlock(object);
2863 }
2864 offset = (vm_object_offset_t)0;
2865 }
2866 } else if ((is_submap == FALSE) &&
2867 (object == VM_OBJECT_NULL) &&
2868 (entry != vm_map_to_entry(map)) &&
2869 (entry->vme_end == start) &&
2870 (!entry->is_shared) &&
2871 (!entry->is_sub_map) &&
2872 (!entry->in_transition) &&
2873 (!entry->needs_wakeup) &&
2874 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2875 (entry->protection == cur_protection) &&
2876 (entry->max_protection == max_protection) &&
2877 (entry->inheritance == inheritance) &&
2878 ((user_alias == VM_MEMORY_REALLOC) ||
2879 (VME_ALIAS(entry) == alias)) &&
2880 (entry->no_cache == no_cache) &&
2881 (entry->permanent == permanent) &&
2882 /* no coalescing for immutable executable mappings */
2883 !((entry->protection & VM_PROT_EXECUTE) &&
2884 entry->permanent) &&
2885 (!entry->superpage_size && !superpage_size) &&
2886 /*
2887 * No coalescing if not map-aligned, to avoid propagating
2888 * that condition any further than needed:
2889 */
2890 (!entry->map_aligned || !clear_map_aligned) &&
2891 (!entry->zero_wired_pages) &&
2892 (!entry->used_for_jit && !entry_for_jit) &&
2893 (!entry->pmap_cs_associated) &&
2894 (entry->iokit_acct == iokit_acct) &&
2895 (!entry->vme_resilient_codesign) &&
2896 (!entry->vme_resilient_media) &&
2897 (!entry->vme_atomic) &&
2898 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2899
2900 ((entry->vme_end - entry->vme_start) + size <=
2901 (user_alias == VM_MEMORY_REALLOC ?
2902 ANON_CHUNK_SIZE :
2903 NO_COALESCE_LIMIT)) &&
2904
2905 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2906 if (vm_object_coalesce(VME_OBJECT(entry),
2907 VM_OBJECT_NULL,
2908 VME_OFFSET(entry),
2909 (vm_object_offset_t) 0,
2910 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2911 (vm_map_size_t)(end - entry->vme_end))) {
2912 /*
2913 * Coalesced the two objects - can extend
2914 * the previous map entry to include the
2915 * new range.
2916 */
2917 map->size += (end - entry->vme_end);
2918 assert(entry->vme_start < end);
2919 assert(VM_MAP_PAGE_ALIGNED(end,
2920 VM_MAP_PAGE_MASK(map)));
2921 if (__improbable(vm_debug_events)) {
2922 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2923 }
2924 entry->vme_end = end;
2925 if (map->holelistenabled) {
2926 vm_map_store_update_first_free(map, entry, TRUE);
2927 } else {
2928 vm_map_store_update_first_free(map, map->first_free, TRUE);
2929 }
2930 new_mapping_established = TRUE;
2931 RETURN(KERN_SUCCESS);
2932 }
2933 }
2934
2935 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2936 new_entry = NULL;
2937
2938 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2939 tmp2_end = tmp2_start + step;
2940 /*
2941 * Create a new entry
2942 *
2943 * XXX FBDP
2944 * The reserved "page zero" in each process's address space can
2945 * be arbitrarily large. Splitting it into separate objects and
2946 * therefore different VM map entries serves no purpose and just
2947 * slows down operations on the VM map, so let's not split the
2948 * allocation into chunks if the max protection is NONE. That
2949 * memory should never be accessible, so it will never get to the
2950 * default pager.
2951 */
2952 tmp_start = tmp2_start;
2953 if (object == VM_OBJECT_NULL &&
2954 size > chunk_size &&
2955 max_protection != VM_PROT_NONE &&
2956 superpage_size == 0) {
2957 tmp_end = tmp_start + chunk_size;
2958 } else {
2959 tmp_end = tmp2_end;
2960 }
2961 do {
2962 new_entry = vm_map_entry_insert(
2963 map, entry, tmp_start, tmp_end,
2964 object, offset, needs_copy,
2965 FALSE, FALSE,
2966 cur_protection, max_protection,
2967 VM_BEHAVIOR_DEFAULT,
2968 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2969 0,
2970 no_cache,
2971 permanent,
2972 no_copy_on_read,
2973 superpage_size,
2974 clear_map_aligned,
2975 is_submap,
2976 entry_for_jit,
2977 alias);
2978
2979 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2980
2981 if (resilient_codesign &&
2982 !((cur_protection | max_protection) &
2983 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2984 new_entry->vme_resilient_codesign = TRUE;
2985 }
2986
2987 if (resilient_media &&
2988 (object == VM_OBJECT_NULL ||
2989 object->internal)) {
2990 new_entry->vme_resilient_media = TRUE;
2991 }
2992
2993 assert(!new_entry->iokit_acct);
2994 if (!is_submap &&
2995 object != VM_OBJECT_NULL &&
2996 (object->purgable != VM_PURGABLE_DENY ||
2997 object->vo_ledger_tag)) {
2998 assert(new_entry->use_pmap);
2999 assert(!new_entry->iokit_acct);
3000 /*
3001 * Turn off pmap accounting since
3002 * purgeable (or tagged) objects have their
3003 * own ledgers.
3004 */
3005 new_entry->use_pmap = FALSE;
3006 } else if (!is_submap &&
3007 iokit_acct &&
3008 object != VM_OBJECT_NULL &&
3009 object->internal) {
3010 /* alternate accounting */
3011 assert(!new_entry->iokit_acct);
3012 assert(new_entry->use_pmap);
3013 new_entry->iokit_acct = TRUE;
3014 new_entry->use_pmap = FALSE;
3015 DTRACE_VM4(
3016 vm_map_iokit_mapped_region,
3017 vm_map_t, map,
3018 vm_map_offset_t, new_entry->vme_start,
3019 vm_map_offset_t, new_entry->vme_end,
3020 int, VME_ALIAS(new_entry));
3021 vm_map_iokit_mapped_region(
3022 map,
3023 (new_entry->vme_end -
3024 new_entry->vme_start));
3025 } else if (!is_submap) {
3026 assert(!new_entry->iokit_acct);
3027 assert(new_entry->use_pmap);
3028 }
3029
3030 if (is_submap) {
3031 vm_map_t submap;
3032 boolean_t submap_is_64bit;
3033 boolean_t use_pmap;
3034
3035 assert(new_entry->is_sub_map);
3036 assert(!new_entry->use_pmap);
3037 assert(!new_entry->iokit_acct);
3038 submap = (vm_map_t) object;
3039 submap_is_64bit = vm_map_is_64bit(submap);
3040 use_pmap = vmk_flags.vmkf_nested_pmap;
3041 #ifndef NO_NESTED_PMAP
3042 if (use_pmap && submap->pmap == NULL) {
3043 ledger_t ledger = map->pmap->ledger;
3044 /* we need a sub pmap to nest... */
3045 submap->pmap = pmap_create_options(ledger, 0,
3046 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3047 if (submap->pmap == NULL) {
3048 /* let's proceed without nesting... */
3049 }
3050 #if defined(__arm__) || defined(__arm64__)
3051 else {
3052 pmap_set_nested(submap->pmap);
3053 }
3054 #endif
3055 }
3056 if (use_pmap && submap->pmap != NULL) {
3057 kr = pmap_nest(map->pmap,
3058 submap->pmap,
3059 tmp_start,
3060 tmp_start,
3061 tmp_end - tmp_start);
3062 if (kr != KERN_SUCCESS) {
3063 printf("vm_map_enter: "
3064 "pmap_nest(0x%llx,0x%llx) "
3065 "error 0x%x\n",
3066 (long long)tmp_start,
3067 (long long)tmp_end,
3068 kr);
3069 } else {
3070 /* we're now nested ! */
3071 new_entry->use_pmap = TRUE;
3072 pmap_empty = FALSE;
3073 }
3074 }
3075 #endif /* NO_NESTED_PMAP */
3076 }
3077 entry = new_entry;
3078
3079 if (superpage_size) {
3080 vm_page_t pages, m;
3081 vm_object_t sp_object;
3082 vm_object_offset_t sp_offset;
3083
3084 VME_OFFSET_SET(entry, 0);
3085
3086 /* allocate one superpage */
3087 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3088 if (kr != KERN_SUCCESS) {
3089 /* deallocate whole range... */
3090 new_mapping_established = TRUE;
3091 /* ... but only up to "tmp_end" */
3092 size -= end - tmp_end;
3093 RETURN(kr);
3094 }
3095
3096 /* create one vm_object per superpage */
3097 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3098 sp_object->phys_contiguous = TRUE;
3099 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3100 VME_OBJECT_SET(entry, sp_object);
3101 assert(entry->use_pmap);
3102
3103 /* enter the base pages into the object */
3104 vm_object_lock(sp_object);
3105 for (sp_offset = 0;
3106 sp_offset < SUPERPAGE_SIZE;
3107 sp_offset += PAGE_SIZE) {
3108 m = pages;
3109 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3110 pages = NEXT_PAGE(m);
3111 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3112 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3113 }
3114 vm_object_unlock(sp_object);
3115 }
3116 } while (tmp_end != tmp2_end &&
3117 (tmp_start = tmp_end) &&
3118 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3119 tmp_end + chunk_size : tmp2_end));
3120 }
3121
3122 new_mapping_established = TRUE;
3123
3124 BailOut:
3125 assert(map_locked == TRUE);
3126
3127 if (result == KERN_SUCCESS) {
3128 vm_prot_t pager_prot;
3129 memory_object_t pager;
3130
3131 #if DEBUG
3132 if (pmap_empty &&
3133 !(vmk_flags.vmkf_no_pmap_check)) {
3134 assert(vm_map_pmap_is_empty(map,
3135 *address,
3136 *address + size));
3137 }
3138 #endif /* DEBUG */
3139
3140 /*
3141 * For "named" VM objects, let the pager know that the
3142 * memory object is being mapped. Some pagers need to keep
3143 * track of this, to know when they can reclaim the memory
3144 * object, for example.
3145 * VM calls memory_object_map() for each mapping (specifying
3146 * the protection of each mapping) and calls
3147 * memory_object_last_unmap() when all the mappings are gone.
3148 */
3149 pager_prot = max_protection;
3150 if (needs_copy) {
3151 /*
3152 * Copy-On-Write mapping: won't modify
3153 * the memory object.
3154 */
3155 pager_prot &= ~VM_PROT_WRITE;
3156 }
3157 if (!is_submap &&
3158 object != VM_OBJECT_NULL &&
3159 object->named &&
3160 object->pager != MEMORY_OBJECT_NULL) {
3161 vm_object_lock(object);
3162 pager = object->pager;
3163 if (object->named &&
3164 pager != MEMORY_OBJECT_NULL) {
3165 assert(object->pager_ready);
3166 vm_object_mapping_wait(object, THREAD_UNINT);
3167 vm_object_mapping_begin(object);
3168 vm_object_unlock(object);
3169
3170 kr = memory_object_map(pager, pager_prot);
3171 assert(kr == KERN_SUCCESS);
3172
3173 vm_object_lock(object);
3174 vm_object_mapping_end(object);
3175 }
3176 vm_object_unlock(object);
3177 }
3178 }
3179
3180 assert(map_locked == TRUE);
3181
3182 if (!keep_map_locked) {
3183 vm_map_unlock(map);
3184 map_locked = FALSE;
3185 }
3186
3187 /*
3188 * We can't hold the map lock if we enter this block.
3189 */
3190
3191 if (result == KERN_SUCCESS) {
3192 /* Wire down the new entry if the user
3193 * requested all new map entries be wired.
3194 */
3195 if ((map->wiring_required) || (superpage_size)) {
3196 assert(!keep_map_locked);
3197 pmap_empty = FALSE; /* pmap won't be empty */
3198 kr = vm_map_wire_kernel(map, start, end,
3199 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3200 TRUE);
3201 result = kr;
3202 }
3203
3204 }
3205
3206 if (result != KERN_SUCCESS) {
3207 if (new_mapping_established) {
3208 /*
3209 * We have to get rid of the new mappings since we
3210 * won't make them available to the user.
3211 * Try and do that atomically, to minimize the risk
3212 * that someone else create new mappings that range.
3213 */
3214 zap_new_map = vm_map_create(PMAP_NULL,
3215 *address,
3216 *address + size,
3217 map->hdr.entries_pageable);
3218 vm_map_set_page_shift(zap_new_map,
3219 VM_MAP_PAGE_SHIFT(map));
3220 vm_map_disable_hole_optimization(zap_new_map);
3221
3222 if (!map_locked) {
3223 vm_map_lock(map);
3224 map_locked = TRUE;
3225 }
3226 (void) vm_map_delete(map, *address, *address + size,
3227 (VM_MAP_REMOVE_SAVE_ENTRIES |
3228 VM_MAP_REMOVE_NO_MAP_ALIGN),
3229 zap_new_map);
3230 }
3231 if (zap_old_map != VM_MAP_NULL &&
3232 zap_old_map->hdr.nentries != 0) {
3233 vm_map_entry_t entry1, entry2;
3234
3235 /*
3236 * The new mapping failed. Attempt to restore
3237 * the old mappings, saved in the "zap_old_map".
3238 */
3239 if (!map_locked) {
3240 vm_map_lock(map);
3241 map_locked = TRUE;
3242 }
3243
3244 /* first check if the coast is still clear */
3245 start = vm_map_first_entry(zap_old_map)->vme_start;
3246 end = vm_map_last_entry(zap_old_map)->vme_end;
3247 if (vm_map_lookup_entry(map, start, &entry1) ||
3248 vm_map_lookup_entry(map, end, &entry2) ||
3249 entry1 != entry2) {
3250 /*
3251 * Part of that range has already been
3252 * re-mapped: we can't restore the old
3253 * mappings...
3254 */
3255 vm_map_enter_restore_failures++;
3256 } else {
3257 /*
3258 * Transfer the saved map entries from
3259 * "zap_old_map" to the original "map",
3260 * inserting them all after "entry1".
3261 */
3262 for (entry2 = vm_map_first_entry(zap_old_map);
3263 entry2 != vm_map_to_entry(zap_old_map);
3264 entry2 = vm_map_first_entry(zap_old_map)) {
3265 vm_map_size_t entry_size;
3266
3267 entry_size = (entry2->vme_end -
3268 entry2->vme_start);
3269 vm_map_store_entry_unlink(zap_old_map,
3270 entry2);
3271 zap_old_map->size -= entry_size;
3272 vm_map_store_entry_link(map, entry1, entry2,
3273 VM_MAP_KERNEL_FLAGS_NONE);
3274 map->size += entry_size;
3275 entry1 = entry2;
3276 }
3277 if (map->wiring_required) {
3278 /*
3279 * XXX TODO: we should rewire the
3280 * old pages here...
3281 */
3282 }
3283 vm_map_enter_restore_successes++;
3284 }
3285 }
3286 }
3287
3288 /*
3289 * The caller is responsible for releasing the lock if it requested to
3290 * keep the map locked.
3291 */
3292 if (map_locked && !keep_map_locked) {
3293 vm_map_unlock(map);
3294 }
3295
3296 /*
3297 * Get rid of the "zap_maps" and all the map entries that
3298 * they may still contain.
3299 */
3300 if (zap_old_map != VM_MAP_NULL) {
3301 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3302 zap_old_map = VM_MAP_NULL;
3303 }
3304 if (zap_new_map != VM_MAP_NULL) {
3305 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3306 zap_new_map = VM_MAP_NULL;
3307 }
3308
3309 return result;
3310
3311 #undef RETURN
3312 }
3313
3314 #if __arm64__
3315 extern const struct memory_object_pager_ops fourk_pager_ops;
3316 kern_return_t
3317 vm_map_enter_fourk(
3318 vm_map_t map,
3319 vm_map_offset_t *address, /* IN/OUT */
3320 vm_map_size_t size,
3321 vm_map_offset_t mask,
3322 int flags,
3323 vm_map_kernel_flags_t vmk_flags,
3324 vm_tag_t alias,
3325 vm_object_t object,
3326 vm_object_offset_t offset,
3327 boolean_t needs_copy,
3328 vm_prot_t cur_protection,
3329 vm_prot_t max_protection,
3330 vm_inherit_t inheritance)
3331 {
3332 vm_map_entry_t entry, new_entry;
3333 vm_map_offset_t start, fourk_start;
3334 vm_map_offset_t end, fourk_end;
3335 vm_map_size_t fourk_size;
3336 kern_return_t result = KERN_SUCCESS;
3337 vm_map_t zap_old_map = VM_MAP_NULL;
3338 vm_map_t zap_new_map = VM_MAP_NULL;
3339 boolean_t map_locked = FALSE;
3340 boolean_t pmap_empty = TRUE;
3341 boolean_t new_mapping_established = FALSE;
3342 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3343 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3344 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3345 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3346 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3347 boolean_t is_submap = vmk_flags.vmkf_submap;
3348 boolean_t permanent = vmk_flags.vmkf_permanent;
3349 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3350 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3351 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3352 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3353 vm_map_offset_t effective_min_offset, effective_max_offset;
3354 kern_return_t kr;
3355 boolean_t clear_map_aligned = FALSE;
3356 memory_object_t fourk_mem_obj;
3357 vm_object_t fourk_object;
3358 vm_map_offset_t fourk_pager_offset;
3359 int fourk_pager_index_start, fourk_pager_index_num;
3360 int cur_idx;
3361 boolean_t fourk_copy;
3362 vm_object_t copy_object;
3363 vm_object_offset_t copy_offset;
3364
3365 fourk_mem_obj = MEMORY_OBJECT_NULL;
3366 fourk_object = VM_OBJECT_NULL;
3367
3368 if (superpage_size) {
3369 return KERN_NOT_SUPPORTED;
3370 }
3371
3372 if ((cur_protection & VM_PROT_WRITE) &&
3373 (cur_protection & VM_PROT_EXECUTE) &&
3374 #if !CONFIG_EMBEDDED
3375 map != kernel_map &&
3376 cs_process_enforcement(NULL) &&
3377 #endif /* !CONFIG_EMBEDDED */
3378 !entry_for_jit) {
3379 DTRACE_VM3(cs_wx,
3380 uint64_t, 0,
3381 uint64_t, 0,
3382 vm_prot_t, cur_protection);
3383 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3384 "turning off execute\n",
3385 proc_selfpid(),
3386 (current_task()->bsd_info
3387 ? proc_name_address(current_task()->bsd_info)
3388 : "?"),
3389 __FUNCTION__);
3390 cur_protection &= ~VM_PROT_EXECUTE;
3391 }
3392
3393 /*
3394 * If the task has requested executable lockdown,
3395 * deny any new executable mapping.
3396 */
3397 if (map->map_disallow_new_exec == TRUE) {
3398 if (cur_protection & VM_PROT_EXECUTE) {
3399 return KERN_PROTECTION_FAILURE;
3400 }
3401 }
3402
3403 if (is_submap) {
3404 return KERN_NOT_SUPPORTED;
3405 }
3406 if (vmk_flags.vmkf_already) {
3407 return KERN_NOT_SUPPORTED;
3408 }
3409 if (purgable || entry_for_jit) {
3410 return KERN_NOT_SUPPORTED;
3411 }
3412
3413 effective_min_offset = map->min_offset;
3414
3415 if (vmk_flags.vmkf_beyond_max) {
3416 return KERN_NOT_SUPPORTED;
3417 } else {
3418 effective_max_offset = map->max_offset;
3419 }
3420
3421 if (size == 0 ||
3422 (offset & FOURK_PAGE_MASK) != 0) {
3423 *address = 0;
3424 return KERN_INVALID_ARGUMENT;
3425 }
3426
3427 #define RETURN(value) { result = value; goto BailOut; }
3428
3429 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3430 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3431
3432 if (!anywhere && overwrite) {
3433 return KERN_NOT_SUPPORTED;
3434 }
3435 if (!anywhere && overwrite) {
3436 /*
3437 * Create a temporary VM map to hold the old mappings in the
3438 * affected area while we create the new one.
3439 * This avoids releasing the VM map lock in
3440 * vm_map_entry_delete() and allows atomicity
3441 * when we want to replace some mappings with a new one.
3442 * It also allows us to restore the old VM mappings if the
3443 * new mapping fails.
3444 */
3445 zap_old_map = vm_map_create(PMAP_NULL,
3446 *address,
3447 *address + size,
3448 map->hdr.entries_pageable);
3449 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3450 vm_map_disable_hole_optimization(zap_old_map);
3451 }
3452
3453 fourk_start = *address;
3454 fourk_size = size;
3455 fourk_end = fourk_start + fourk_size;
3456
3457 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3458 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3459 size = end - start;
3460
3461 if (anywhere) {
3462 return KERN_NOT_SUPPORTED;
3463 } else {
3464 /*
3465 * Verify that:
3466 * the address doesn't itself violate
3467 * the mask requirement.
3468 */
3469
3470 vm_map_lock(map);
3471 map_locked = TRUE;
3472 if ((start & mask) != 0) {
3473 RETURN(KERN_NO_SPACE);
3474 }
3475
3476 /*
3477 * ... the address is within bounds
3478 */
3479
3480 end = start + size;
3481
3482 if ((start < effective_min_offset) ||
3483 (end > effective_max_offset) ||
3484 (start >= end)) {
3485 RETURN(KERN_INVALID_ADDRESS);
3486 }
3487
3488 if (overwrite && zap_old_map != VM_MAP_NULL) {
3489 /*
3490 * Fixed mapping and "overwrite" flag: attempt to
3491 * remove all existing mappings in the specified
3492 * address range, saving them in our "zap_old_map".
3493 */
3494 (void) vm_map_delete(map, start, end,
3495 (VM_MAP_REMOVE_SAVE_ENTRIES |
3496 VM_MAP_REMOVE_NO_MAP_ALIGN),
3497 zap_old_map);
3498 }
3499
3500 /*
3501 * ... the starting address isn't allocated
3502 */
3503 if (vm_map_lookup_entry(map, start, &entry)) {
3504 vm_object_t cur_object, shadow_object;
3505
3506 /*
3507 * We might already some 4K mappings
3508 * in a 16K page here.
3509 */
3510
3511 if (entry->vme_end - entry->vme_start
3512 != SIXTEENK_PAGE_SIZE) {
3513 RETURN(KERN_NO_SPACE);
3514 }
3515 if (entry->is_sub_map) {
3516 RETURN(KERN_NO_SPACE);
3517 }
3518 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3519 RETURN(KERN_NO_SPACE);
3520 }
3521
3522 /* go all the way down the shadow chain */
3523 cur_object = VME_OBJECT(entry);
3524 vm_object_lock(cur_object);
3525 while (cur_object->shadow != VM_OBJECT_NULL) {
3526 shadow_object = cur_object->shadow;
3527 vm_object_lock(shadow_object);
3528 vm_object_unlock(cur_object);
3529 cur_object = shadow_object;
3530 shadow_object = VM_OBJECT_NULL;
3531 }
3532 if (cur_object->internal ||
3533 cur_object->pager == NULL) {
3534 vm_object_unlock(cur_object);
3535 RETURN(KERN_NO_SPACE);
3536 }
3537 if (cur_object->pager->mo_pager_ops
3538 != &fourk_pager_ops) {
3539 vm_object_unlock(cur_object);
3540 RETURN(KERN_NO_SPACE);
3541 }
3542 fourk_object = cur_object;
3543 fourk_mem_obj = fourk_object->pager;
3544
3545 /* keep the "4K" object alive */
3546 vm_object_reference_locked(fourk_object);
3547 vm_object_unlock(fourk_object);
3548
3549 /* merge permissions */
3550 entry->protection |= cur_protection;
3551 entry->max_protection |= max_protection;
3552 if ((entry->protection & (VM_PROT_WRITE |
3553 VM_PROT_EXECUTE)) ==
3554 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3555 fourk_binary_compatibility_unsafe &&
3556 fourk_binary_compatibility_allow_wx) {
3557 /* write+execute: need to be "jit" */
3558 entry->used_for_jit = TRUE;
3559 }
3560
3561 goto map_in_fourk_pager;
3562 }
3563
3564 /*
3565 * ... the next region doesn't overlap the
3566 * end point.
3567 */
3568
3569 if ((entry->vme_next != vm_map_to_entry(map)) &&
3570 (entry->vme_next->vme_start < end)) {
3571 RETURN(KERN_NO_SPACE);
3572 }
3573 }
3574
3575 /*
3576 * At this point,
3577 * "start" and "end" should define the endpoints of the
3578 * available new range, and
3579 * "entry" should refer to the region before the new
3580 * range, and
3581 *
3582 * the map should be locked.
3583 */
3584
3585 /* create a new "4K" pager */
3586 fourk_mem_obj = fourk_pager_create();
3587 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3588 assert(fourk_object);
3589
3590 /* keep the "4" object alive */
3591 vm_object_reference(fourk_object);
3592
3593 /* create a "copy" object, to map the "4K" object copy-on-write */
3594 fourk_copy = TRUE;
3595 result = vm_object_copy_strategically(fourk_object,
3596 0,
3597 end - start,
3598 &copy_object,
3599 &copy_offset,
3600 &fourk_copy);
3601 assert(result == KERN_SUCCESS);
3602 assert(copy_object != VM_OBJECT_NULL);
3603 assert(copy_offset == 0);
3604
3605 /* take a reference on the copy object, for this mapping */
3606 vm_object_reference(copy_object);
3607
3608 /* map the "4K" pager's copy object */
3609 new_entry =
3610 vm_map_entry_insert(map, entry,
3611 vm_map_trunc_page(start,
3612 VM_MAP_PAGE_MASK(map)),
3613 vm_map_round_page(end,
3614 VM_MAP_PAGE_MASK(map)),
3615 copy_object,
3616 0, /* offset */
3617 FALSE, /* needs_copy */
3618 FALSE,
3619 FALSE,
3620 cur_protection, max_protection,
3621 VM_BEHAVIOR_DEFAULT,
3622 ((entry_for_jit)
3623 ? VM_INHERIT_NONE
3624 : inheritance),
3625 0,
3626 no_cache,
3627 permanent,
3628 no_copy_on_read,
3629 superpage_size,
3630 clear_map_aligned,
3631 is_submap,
3632 FALSE, /* jit */
3633 alias);
3634 entry = new_entry;
3635
3636 #if VM_MAP_DEBUG_FOURK
3637 if (vm_map_debug_fourk) {
3638 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3639 map,
3640 (uint64_t) entry->vme_start,
3641 (uint64_t) entry->vme_end,
3642 fourk_mem_obj);
3643 }
3644 #endif /* VM_MAP_DEBUG_FOURK */
3645
3646 new_mapping_established = TRUE;
3647
3648 map_in_fourk_pager:
3649 /* "map" the original "object" where it belongs in the "4K" pager */
3650 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3651 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3652 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3653 fourk_pager_index_num = 4;
3654 } else {
3655 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3656 }
3657 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3658 fourk_pager_index_num = 4 - fourk_pager_index_start;
3659 }
3660 for (cur_idx = 0;
3661 cur_idx < fourk_pager_index_num;
3662 cur_idx++) {
3663 vm_object_t old_object;
3664 vm_object_offset_t old_offset;
3665
3666 kr = fourk_pager_populate(fourk_mem_obj,
3667 TRUE, /* overwrite */
3668 fourk_pager_index_start + cur_idx,
3669 object,
3670 (object
3671 ? (offset +
3672 (cur_idx * FOURK_PAGE_SIZE))
3673 : 0),
3674 &old_object,
3675 &old_offset);
3676 #if VM_MAP_DEBUG_FOURK
3677 if (vm_map_debug_fourk) {
3678 if (old_object == (vm_object_t) -1 &&
3679 old_offset == (vm_object_offset_t) -1) {
3680 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3681 "pager [%p:0x%llx] "
3682 "populate[%d] "
3683 "[object:%p,offset:0x%llx]\n",
3684 map,
3685 (uint64_t) entry->vme_start,
3686 (uint64_t) entry->vme_end,
3687 fourk_mem_obj,
3688 VME_OFFSET(entry),
3689 fourk_pager_index_start + cur_idx,
3690 object,
3691 (object
3692 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3693 : 0));
3694 } else {
3695 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3696 "pager [%p:0x%llx] "
3697 "populate[%d] [object:%p,offset:0x%llx] "
3698 "old [%p:0x%llx]\n",
3699 map,
3700 (uint64_t) entry->vme_start,
3701 (uint64_t) entry->vme_end,
3702 fourk_mem_obj,
3703 VME_OFFSET(entry),
3704 fourk_pager_index_start + cur_idx,
3705 object,
3706 (object
3707 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3708 : 0),
3709 old_object,
3710 old_offset);
3711 }
3712 }
3713 #endif /* VM_MAP_DEBUG_FOURK */
3714
3715 assert(kr == KERN_SUCCESS);
3716 if (object != old_object &&
3717 object != VM_OBJECT_NULL &&
3718 object != (vm_object_t) -1) {
3719 vm_object_reference(object);
3720 }
3721 if (object != old_object &&
3722 old_object != VM_OBJECT_NULL &&
3723 old_object != (vm_object_t) -1) {
3724 vm_object_deallocate(old_object);
3725 }
3726 }
3727
3728 BailOut:
3729 assert(map_locked == TRUE);
3730
3731 if (fourk_object != VM_OBJECT_NULL) {
3732 vm_object_deallocate(fourk_object);
3733 fourk_object = VM_OBJECT_NULL;
3734 fourk_mem_obj = MEMORY_OBJECT_NULL;
3735 }
3736
3737 if (result == KERN_SUCCESS) {
3738 vm_prot_t pager_prot;
3739 memory_object_t pager;
3740
3741 #if DEBUG
3742 if (pmap_empty &&
3743 !(vmk_flags.vmkf_no_pmap_check)) {
3744 assert(vm_map_pmap_is_empty(map,
3745 *address,
3746 *address + size));
3747 }
3748 #endif /* DEBUG */
3749
3750 /*
3751 * For "named" VM objects, let the pager know that the
3752 * memory object is being mapped. Some pagers need to keep
3753 * track of this, to know when they can reclaim the memory
3754 * object, for example.
3755 * VM calls memory_object_map() for each mapping (specifying
3756 * the protection of each mapping) and calls
3757 * memory_object_last_unmap() when all the mappings are gone.
3758 */
3759 pager_prot = max_protection;
3760 if (needs_copy) {
3761 /*
3762 * Copy-On-Write mapping: won't modify
3763 * the memory object.
3764 */
3765 pager_prot &= ~VM_PROT_WRITE;
3766 }
3767 if (!is_submap &&
3768 object != VM_OBJECT_NULL &&
3769 object->named &&
3770 object->pager != MEMORY_OBJECT_NULL) {
3771 vm_object_lock(object);
3772 pager = object->pager;
3773 if (object->named &&
3774 pager != MEMORY_OBJECT_NULL) {
3775 assert(object->pager_ready);
3776 vm_object_mapping_wait(object, THREAD_UNINT);
3777 vm_object_mapping_begin(object);
3778 vm_object_unlock(object);
3779
3780 kr = memory_object_map(pager, pager_prot);
3781 assert(kr == KERN_SUCCESS);
3782
3783 vm_object_lock(object);
3784 vm_object_mapping_end(object);
3785 }
3786 vm_object_unlock(object);
3787 }
3788 if (!is_submap &&
3789 fourk_object != VM_OBJECT_NULL &&
3790 fourk_object->named &&
3791 fourk_object->pager != MEMORY_OBJECT_NULL) {
3792 vm_object_lock(fourk_object);
3793 pager = fourk_object->pager;
3794 if (fourk_object->named &&
3795 pager != MEMORY_OBJECT_NULL) {
3796 assert(fourk_object->pager_ready);
3797 vm_object_mapping_wait(fourk_object,
3798 THREAD_UNINT);
3799 vm_object_mapping_begin(fourk_object);
3800 vm_object_unlock(fourk_object);
3801
3802 kr = memory_object_map(pager, VM_PROT_READ);
3803 assert(kr == KERN_SUCCESS);
3804
3805 vm_object_lock(fourk_object);
3806 vm_object_mapping_end(fourk_object);
3807 }
3808 vm_object_unlock(fourk_object);
3809 }
3810 }
3811
3812 assert(map_locked == TRUE);
3813
3814 if (!keep_map_locked) {
3815 vm_map_unlock(map);
3816 map_locked = FALSE;
3817 }
3818
3819 /*
3820 * We can't hold the map lock if we enter this block.
3821 */
3822
3823 if (result == KERN_SUCCESS) {
3824 /* Wire down the new entry if the user
3825 * requested all new map entries be wired.
3826 */
3827 if ((map->wiring_required) || (superpage_size)) {
3828 assert(!keep_map_locked);
3829 pmap_empty = FALSE; /* pmap won't be empty */
3830 kr = vm_map_wire_kernel(map, start, end,
3831 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3832 TRUE);
3833 result = kr;
3834 }
3835
3836 }
3837
3838 if (result != KERN_SUCCESS) {
3839 if (new_mapping_established) {
3840 /*
3841 * We have to get rid of the new mappings since we
3842 * won't make them available to the user.
3843 * Try and do that atomically, to minimize the risk
3844 * that someone else create new mappings that range.
3845 */
3846 zap_new_map = vm_map_create(PMAP_NULL,
3847 *address,
3848 *address + size,
3849 map->hdr.entries_pageable);
3850 vm_map_set_page_shift(zap_new_map,
3851 VM_MAP_PAGE_SHIFT(map));
3852 vm_map_disable_hole_optimization(zap_new_map);
3853
3854 if (!map_locked) {
3855 vm_map_lock(map);
3856 map_locked = TRUE;
3857 }
3858 (void) vm_map_delete(map, *address, *address + size,
3859 (VM_MAP_REMOVE_SAVE_ENTRIES |
3860 VM_MAP_REMOVE_NO_MAP_ALIGN),
3861 zap_new_map);
3862 }
3863 if (zap_old_map != VM_MAP_NULL &&
3864 zap_old_map->hdr.nentries != 0) {
3865 vm_map_entry_t entry1, entry2;
3866
3867 /*
3868 * The new mapping failed. Attempt to restore
3869 * the old mappings, saved in the "zap_old_map".
3870 */
3871 if (!map_locked) {
3872 vm_map_lock(map);
3873 map_locked = TRUE;
3874 }
3875
3876 /* first check if the coast is still clear */
3877 start = vm_map_first_entry(zap_old_map)->vme_start;
3878 end = vm_map_last_entry(zap_old_map)->vme_end;
3879 if (vm_map_lookup_entry(map, start, &entry1) ||
3880 vm_map_lookup_entry(map, end, &entry2) ||
3881 entry1 != entry2) {
3882 /*
3883 * Part of that range has already been
3884 * re-mapped: we can't restore the old
3885 * mappings...
3886 */
3887 vm_map_enter_restore_failures++;
3888 } else {
3889 /*
3890 * Transfer the saved map entries from
3891 * "zap_old_map" to the original "map",
3892 * inserting them all after "entry1".
3893 */
3894 for (entry2 = vm_map_first_entry(zap_old_map);
3895 entry2 != vm_map_to_entry(zap_old_map);
3896 entry2 = vm_map_first_entry(zap_old_map)) {
3897 vm_map_size_t entry_size;
3898
3899 entry_size = (entry2->vme_end -
3900 entry2->vme_start);
3901 vm_map_store_entry_unlink(zap_old_map,
3902 entry2);
3903 zap_old_map->size -= entry_size;
3904 vm_map_store_entry_link(map, entry1, entry2,
3905 VM_MAP_KERNEL_FLAGS_NONE);
3906 map->size += entry_size;
3907 entry1 = entry2;
3908 }
3909 if (map->wiring_required) {
3910 /*
3911 * XXX TODO: we should rewire the
3912 * old pages here...
3913 */
3914 }
3915 vm_map_enter_restore_successes++;
3916 }
3917 }
3918 }
3919
3920 /*
3921 * The caller is responsible for releasing the lock if it requested to
3922 * keep the map locked.
3923 */
3924 if (map_locked && !keep_map_locked) {
3925 vm_map_unlock(map);
3926 }
3927
3928 /*
3929 * Get rid of the "zap_maps" and all the map entries that
3930 * they may still contain.
3931 */
3932 if (zap_old_map != VM_MAP_NULL) {
3933 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3934 zap_old_map = VM_MAP_NULL;
3935 }
3936 if (zap_new_map != VM_MAP_NULL) {
3937 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3938 zap_new_map = VM_MAP_NULL;
3939 }
3940
3941 return result;
3942
3943 #undef RETURN
3944 }
3945 #endif /* __arm64__ */
3946
3947 /*
3948 * Counters for the prefault optimization.
3949 */
3950 int64_t vm_prefault_nb_pages = 0;
3951 int64_t vm_prefault_nb_bailout = 0;
3952
3953 static kern_return_t
3954 vm_map_enter_mem_object_helper(
3955 vm_map_t target_map,
3956 vm_map_offset_t *address,
3957 vm_map_size_t initial_size,
3958 vm_map_offset_t mask,
3959 int flags,
3960 vm_map_kernel_flags_t vmk_flags,
3961 vm_tag_t tag,
3962 ipc_port_t port,
3963 vm_object_offset_t offset,
3964 boolean_t copy,
3965 vm_prot_t cur_protection,
3966 vm_prot_t max_protection,
3967 vm_inherit_t inheritance,
3968 upl_page_list_ptr_t page_list,
3969 unsigned int page_list_count)
3970 {
3971 vm_map_address_t map_addr;
3972 vm_map_size_t map_size;
3973 vm_object_t object;
3974 vm_object_size_t size;
3975 kern_return_t result;
3976 boolean_t mask_cur_protection, mask_max_protection;
3977 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3978 vm_map_offset_t offset_in_mapping = 0;
3979 #if __arm64__
3980 boolean_t fourk = vmk_flags.vmkf_fourk;
3981 #endif /* __arm64__ */
3982
3983 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3984
3985 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3986 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3987 cur_protection &= ~VM_PROT_IS_MASK;
3988 max_protection &= ~VM_PROT_IS_MASK;
3989
3990 /*
3991 * Check arguments for validity
3992 */
3993 if ((target_map == VM_MAP_NULL) ||
3994 (cur_protection & ~VM_PROT_ALL) ||
3995 (max_protection & ~VM_PROT_ALL) ||
3996 (inheritance > VM_INHERIT_LAST_VALID) ||
3997 (try_prefault && (copy || !page_list)) ||
3998 initial_size == 0) {
3999 return KERN_INVALID_ARGUMENT;
4000 }
4001
4002 #if __arm64__
4003 if (fourk) {
4004 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4005 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4006 } else
4007 #endif /* __arm64__ */
4008 {
4009 map_addr = vm_map_trunc_page(*address,
4010 VM_MAP_PAGE_MASK(target_map));
4011 map_size = vm_map_round_page(initial_size,
4012 VM_MAP_PAGE_MASK(target_map));
4013 }
4014 size = vm_object_round_page(initial_size);
4015
4016 /*
4017 * Find the vm object (if any) corresponding to this port.
4018 */
4019 if (!IP_VALID(port)) {
4020 object = VM_OBJECT_NULL;
4021 offset = 0;
4022 copy = FALSE;
4023 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4024 vm_named_entry_t named_entry;
4025
4026 named_entry = (vm_named_entry_t) port->ip_kobject;
4027
4028 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4029 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4030 offset += named_entry->data_offset;
4031 }
4032
4033 /* a few checks to make sure user is obeying rules */
4034 if (size == 0) {
4035 if (offset >= named_entry->size) {
4036 return KERN_INVALID_RIGHT;
4037 }
4038 size = named_entry->size - offset;
4039 }
4040 if (mask_max_protection) {
4041 max_protection &= named_entry->protection;
4042 }
4043 if (mask_cur_protection) {
4044 cur_protection &= named_entry->protection;
4045 }
4046 if ((named_entry->protection & max_protection) !=
4047 max_protection) {
4048 return KERN_INVALID_RIGHT;
4049 }
4050 if ((named_entry->protection & cur_protection) !=
4051 cur_protection) {
4052 return KERN_INVALID_RIGHT;
4053 }
4054 if (offset + size < offset) {
4055 /* overflow */
4056 return KERN_INVALID_ARGUMENT;
4057 }
4058 if (named_entry->size < (offset + initial_size)) {
4059 return KERN_INVALID_ARGUMENT;
4060 }
4061
4062 if (named_entry->is_copy) {
4063 /* for a vm_map_copy, we can only map it whole */
4064 if ((size != named_entry->size) &&
4065 (vm_map_round_page(size,
4066 VM_MAP_PAGE_MASK(target_map)) ==
4067 named_entry->size)) {
4068 /* XXX FBDP use the rounded size... */
4069 size = vm_map_round_page(
4070 size,
4071 VM_MAP_PAGE_MASK(target_map));
4072 }
4073
4074 if (!(flags & VM_FLAGS_ANYWHERE) &&
4075 (offset != 0 ||
4076 size != named_entry->size)) {
4077 /*
4078 * XXX for a mapping at a "fixed" address,
4079 * we can't trim after mapping the whole
4080 * memory entry, so reject a request for a
4081 * partial mapping.
4082 */
4083 return KERN_INVALID_ARGUMENT;
4084 }
4085 }
4086
4087 /* the callers parameter offset is defined to be the */
4088 /* offset from beginning of named entry offset in object */
4089 offset = offset + named_entry->offset;
4090
4091 if (!VM_MAP_PAGE_ALIGNED(size,
4092 VM_MAP_PAGE_MASK(target_map))) {
4093 /*
4094 * Let's not map more than requested;
4095 * vm_map_enter() will handle this "not map-aligned"
4096 * case.
4097 */
4098 map_size = size;
4099 }
4100
4101 named_entry_lock(named_entry);
4102 if (named_entry->is_sub_map) {
4103 vm_map_t submap;
4104
4105 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4106 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4107 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4108 }
4109
4110 submap = named_entry->backing.map;
4111 vm_map_lock(submap);
4112 vm_map_reference(submap);
4113 vm_map_unlock(submap);
4114 named_entry_unlock(named_entry);
4115
4116 vmk_flags.vmkf_submap = TRUE;
4117
4118 result = vm_map_enter(target_map,
4119 &map_addr,
4120 map_size,
4121 mask,
4122 flags,
4123 vmk_flags,
4124 tag,
4125 (vm_object_t)(uintptr_t) submap,
4126 offset,
4127 copy,
4128 cur_protection,
4129 max_protection,
4130 inheritance);
4131 if (result != KERN_SUCCESS) {
4132 vm_map_deallocate(submap);
4133 } else {
4134 /*
4135 * No need to lock "submap" just to check its
4136 * "mapped" flag: that flag is never reset
4137 * once it's been set and if we race, we'll
4138 * just end up setting it twice, which is OK.
4139 */
4140 if (submap->mapped_in_other_pmaps == FALSE &&
4141 vm_map_pmap(submap) != PMAP_NULL &&
4142 vm_map_pmap(submap) !=
4143 vm_map_pmap(target_map)) {
4144 /*
4145 * This submap is being mapped in a map
4146 * that uses a different pmap.
4147 * Set its "mapped_in_other_pmaps" flag
4148 * to indicate that we now need to
4149 * remove mappings from all pmaps rather
4150 * than just the submap's pmap.
4151 */
4152 vm_map_lock(submap);
4153 submap->mapped_in_other_pmaps = TRUE;
4154 vm_map_unlock(submap);
4155 }
4156 *address = map_addr;
4157 }
4158 return result;
4159 } else if (named_entry->is_copy) {
4160 kern_return_t kr;
4161 vm_map_copy_t copy_map;
4162 vm_map_entry_t copy_entry;
4163 vm_map_offset_t copy_addr;
4164
4165 if (flags & ~(VM_FLAGS_FIXED |
4166 VM_FLAGS_ANYWHERE |
4167 VM_FLAGS_OVERWRITE |
4168 VM_FLAGS_RETURN_4K_DATA_ADDR |
4169 VM_FLAGS_RETURN_DATA_ADDR |
4170 VM_FLAGS_ALIAS_MASK)) {
4171 named_entry_unlock(named_entry);
4172 return KERN_INVALID_ARGUMENT;
4173 }
4174
4175 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4176 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4177 offset_in_mapping = offset - vm_object_trunc_page(offset);
4178 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4179 offset_in_mapping &= ~((signed)(0xFFF));
4180 }
4181 offset = vm_object_trunc_page(offset);
4182 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4183 }
4184
4185 copy_map = named_entry->backing.copy;
4186 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4187 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4188 /* unsupported type; should not happen */
4189 printf("vm_map_enter_mem_object: "
4190 "memory_entry->backing.copy "
4191 "unsupported type 0x%x\n",
4192 copy_map->type);
4193 named_entry_unlock(named_entry);
4194 return KERN_INVALID_ARGUMENT;
4195 }
4196
4197 /* reserve a contiguous range */
4198 kr = vm_map_enter(target_map,
4199 &map_addr,
4200 /* map whole mem entry, trim later: */
4201 named_entry->size,
4202 mask,
4203 flags & (VM_FLAGS_ANYWHERE |
4204 VM_FLAGS_OVERWRITE |
4205 VM_FLAGS_RETURN_4K_DATA_ADDR |
4206 VM_FLAGS_RETURN_DATA_ADDR),
4207 vmk_flags,
4208 tag,
4209 VM_OBJECT_NULL,
4210 0,
4211 FALSE, /* copy */
4212 cur_protection,
4213 max_protection,
4214 inheritance);
4215 if (kr != KERN_SUCCESS) {
4216 named_entry_unlock(named_entry);
4217 return kr;
4218 }
4219
4220 copy_addr = map_addr;
4221
4222 for (copy_entry = vm_map_copy_first_entry(copy_map);
4223 copy_entry != vm_map_copy_to_entry(copy_map);
4224 copy_entry = copy_entry->vme_next) {
4225 int remap_flags;
4226 vm_map_kernel_flags_t vmk_remap_flags;
4227 vm_map_t copy_submap;
4228 vm_object_t copy_object;
4229 vm_map_size_t copy_size;
4230 vm_object_offset_t copy_offset;
4231 int copy_vm_alias;
4232
4233 remap_flags = 0;
4234 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4235
4236 copy_object = VME_OBJECT(copy_entry);
4237 copy_offset = VME_OFFSET(copy_entry);
4238 copy_size = (copy_entry->vme_end -
4239 copy_entry->vme_start);
4240 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4241 if (copy_vm_alias == 0) {
4242 /*
4243 * Caller does not want a specific
4244 * alias for this new mapping: use
4245 * the alias of the original mapping.
4246 */
4247 copy_vm_alias = VME_ALIAS(copy_entry);
4248 }
4249
4250 /* sanity check */
4251 if ((copy_addr + copy_size) >
4252 (map_addr +
4253 named_entry->size /* XXX full size */)) {
4254 /* over-mapping too much !? */
4255 kr = KERN_INVALID_ARGUMENT;
4256 /* abort */
4257 break;
4258 }
4259
4260 /* take a reference on the object */
4261 if (copy_entry->is_sub_map) {
4262 vmk_remap_flags.vmkf_submap = TRUE;
4263 copy_submap = VME_SUBMAP(copy_entry);
4264 vm_map_lock(copy_submap);
4265 vm_map_reference(copy_submap);
4266 vm_map_unlock(copy_submap);
4267 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4268 } else if (!copy &&
4269 copy_object != VM_OBJECT_NULL &&
4270 (copy_entry->needs_copy ||
4271 copy_object->shadowed ||
4272 (!copy_object->true_share &&
4273 !copy_entry->is_shared &&
4274 copy_object->vo_size > copy_size))) {
4275 /*
4276 * We need to resolve our side of this
4277 * "symmetric" copy-on-write now; we
4278 * need a new object to map and share,
4279 * instead of the current one which
4280 * might still be shared with the
4281 * original mapping.
4282 *
4283 * Note: A "vm_map_copy_t" does not
4284 * have a lock but we're protected by
4285 * the named entry's lock here.
4286 */
4287 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4288 VME_OBJECT_SHADOW(copy_entry, copy_size);
4289 if (!copy_entry->needs_copy &&
4290 copy_entry->protection & VM_PROT_WRITE) {
4291 vm_prot_t prot;
4292
4293 prot = copy_entry->protection & ~VM_PROT_WRITE;
4294 vm_object_pmap_protect(copy_object,
4295 copy_offset,
4296 copy_size,
4297 PMAP_NULL,
4298 0,
4299 prot);
4300 }
4301
4302 copy_entry->needs_copy = FALSE;
4303 copy_entry->is_shared = TRUE;
4304 copy_object = VME_OBJECT(copy_entry);
4305 copy_offset = VME_OFFSET(copy_entry);
4306 vm_object_lock(copy_object);
4307 vm_object_reference_locked(copy_object);
4308 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4309 /* we're about to make a shared mapping of this object */
4310 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4311 copy_object->true_share = TRUE;
4312 }
4313 vm_object_unlock(copy_object);
4314 } else {
4315 /*
4316 * We already have the right object
4317 * to map.
4318 */
4319 copy_object = VME_OBJECT(copy_entry);
4320 vm_object_reference(copy_object);
4321 }
4322
4323 /* over-map the object into destination */
4324 remap_flags |= flags;
4325 remap_flags |= VM_FLAGS_FIXED;
4326 remap_flags |= VM_FLAGS_OVERWRITE;
4327 remap_flags &= ~VM_FLAGS_ANYWHERE;
4328 if (!copy && !copy_entry->is_sub_map) {
4329 /*
4330 * copy-on-write should have been
4331 * resolved at this point, or we would
4332 * end up sharing instead of copying.
4333 */
4334 assert(!copy_entry->needs_copy);
4335 }
4336 #if !CONFIG_EMBEDDED
4337 if (copy_entry->used_for_jit) {
4338 vmk_remap_flags.vmkf_map_jit = TRUE;
4339 }
4340 #endif /* !CONFIG_EMBEDDED */
4341 kr = vm_map_enter(target_map,
4342 &copy_addr,
4343 copy_size,
4344 (vm_map_offset_t) 0,
4345 remap_flags,
4346 vmk_remap_flags,
4347 copy_vm_alias,
4348 copy_object,
4349 copy_offset,
4350 ((copy_object == NULL) ? FALSE : copy),
4351 cur_protection,
4352 max_protection,
4353 inheritance);
4354 if (kr != KERN_SUCCESS) {
4355 if (copy_entry->is_sub_map) {
4356 vm_map_deallocate(copy_submap);
4357 } else {
4358 vm_object_deallocate(copy_object);
4359 }
4360 /* abort */
4361 break;
4362 }
4363
4364 /* next mapping */
4365 copy_addr += copy_size;
4366 }
4367
4368 if (kr == KERN_SUCCESS) {
4369 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4370 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4371 *address = map_addr + offset_in_mapping;
4372 } else {
4373 *address = map_addr;
4374 }
4375
4376 if (offset) {
4377 /*
4378 * Trim in front, from 0 to "offset".
4379 */
4380 vm_map_remove(target_map,
4381 map_addr,
4382 map_addr + offset,
4383 VM_MAP_REMOVE_NO_FLAGS);
4384 *address += offset;
4385 }
4386 if (offset + map_size < named_entry->size) {
4387 /*
4388 * Trim in back, from
4389 * "offset + map_size" to
4390 * "named_entry->size".
4391 */
4392 vm_map_remove(target_map,
4393 (map_addr +
4394 offset + map_size),
4395 (map_addr +
4396 named_entry->size),
4397 VM_MAP_REMOVE_NO_FLAGS);
4398 }
4399 }
4400 named_entry_unlock(named_entry);
4401
4402 if (kr != KERN_SUCCESS) {
4403 if (!(flags & VM_FLAGS_OVERWRITE)) {
4404 /* deallocate the contiguous range */
4405 (void) vm_deallocate(target_map,
4406 map_addr,
4407 map_size);
4408 }
4409 }
4410
4411 return kr;
4412 } else {
4413 unsigned int access;
4414 vm_prot_t protections;
4415 unsigned int wimg_mode;
4416
4417 /* we are mapping a VM object */
4418
4419 protections = named_entry->protection & VM_PROT_ALL;
4420 access = GET_MAP_MEM(named_entry->protection);
4421
4422 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4423 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4424 offset_in_mapping = offset - vm_object_trunc_page(offset);
4425 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4426 offset_in_mapping &= ~((signed)(0xFFF));
4427 }
4428 offset = vm_object_trunc_page(offset);
4429 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4430 }
4431
4432 object = named_entry->backing.object;
4433 assert(object != VM_OBJECT_NULL);
4434 vm_object_lock(object);
4435 named_entry_unlock(named_entry);
4436
4437 vm_object_reference_locked(object);
4438
4439 wimg_mode = object->wimg_bits;
4440 vm_prot_to_wimg(access, &wimg_mode);
4441 if (object->wimg_bits != wimg_mode) {
4442 vm_object_change_wimg_mode(object, wimg_mode);
4443 }
4444
4445 vm_object_unlock(object);
4446 }
4447 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4448 /*
4449 * JMM - This is temporary until we unify named entries
4450 * and raw memory objects.
4451 *
4452 * Detected fake ip_kotype for a memory object. In
4453 * this case, the port isn't really a port at all, but
4454 * instead is just a raw memory object.
4455 */
4456 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4457 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4458 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4459 }
4460
4461 object = memory_object_to_vm_object((memory_object_t)port);
4462 if (object == VM_OBJECT_NULL) {
4463 return KERN_INVALID_OBJECT;
4464 }
4465 vm_object_reference(object);
4466
4467 /* wait for object (if any) to be ready */
4468 if (object != VM_OBJECT_NULL) {
4469 if (object == kernel_object) {
4470 printf("Warning: Attempt to map kernel object"
4471 " by a non-private kernel entity\n");
4472 return KERN_INVALID_OBJECT;
4473 }
4474 if (!object->pager_ready) {
4475 vm_object_lock(object);
4476
4477 while (!object->pager_ready) {
4478 vm_object_wait(object,
4479 VM_OBJECT_EVENT_PAGER_READY,
4480 THREAD_UNINT);
4481 vm_object_lock(object);
4482 }
4483 vm_object_unlock(object);
4484 }
4485 }
4486 } else {
4487 return KERN_INVALID_OBJECT;
4488 }
4489
4490 if (object != VM_OBJECT_NULL &&
4491 object->named &&
4492 object->pager != MEMORY_OBJECT_NULL &&
4493 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4494 memory_object_t pager;
4495 vm_prot_t pager_prot;
4496 kern_return_t kr;
4497
4498 /*
4499 * For "named" VM objects, let the pager know that the
4500 * memory object is being mapped. Some pagers need to keep
4501 * track of this, to know when they can reclaim the memory
4502 * object, for example.
4503 * VM calls memory_object_map() for each mapping (specifying
4504 * the protection of each mapping) and calls
4505 * memory_object_last_unmap() when all the mappings are gone.
4506 */
4507 pager_prot = max_protection;
4508 if (copy) {
4509 /*
4510 * Copy-On-Write mapping: won't modify the
4511 * memory object.
4512 */
4513 pager_prot &= ~VM_PROT_WRITE;
4514 }
4515 vm_object_lock(object);
4516 pager = object->pager;
4517 if (object->named &&
4518 pager != MEMORY_OBJECT_NULL &&
4519 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4520 assert(object->pager_ready);
4521 vm_object_mapping_wait(object, THREAD_UNINT);
4522 vm_object_mapping_begin(object);
4523 vm_object_unlock(object);
4524
4525 kr = memory_object_map(pager, pager_prot);
4526 assert(kr == KERN_SUCCESS);
4527
4528 vm_object_lock(object);
4529 vm_object_mapping_end(object);
4530 }
4531 vm_object_unlock(object);
4532 }
4533
4534 /*
4535 * Perform the copy if requested
4536 */
4537
4538 if (copy) {
4539 vm_object_t new_object;
4540 vm_object_offset_t new_offset;
4541
4542 result = vm_object_copy_strategically(object, offset,
4543 map_size,
4544 &new_object, &new_offset,
4545 &copy);
4546
4547
4548 if (result == KERN_MEMORY_RESTART_COPY) {
4549 boolean_t success;
4550 boolean_t src_needs_copy;
4551
4552 /*
4553 * XXX
4554 * We currently ignore src_needs_copy.
4555 * This really is the issue of how to make
4556 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4557 * non-kernel users to use. Solution forthcoming.
4558 * In the meantime, since we don't allow non-kernel
4559 * memory managers to specify symmetric copy,
4560 * we won't run into problems here.
4561 */
4562 new_object = object;
4563 new_offset = offset;
4564 success = vm_object_copy_quickly(&new_object,
4565 new_offset,
4566 map_size,
4567 &src_needs_copy,
4568 &copy);
4569 assert(success);
4570 result = KERN_SUCCESS;
4571 }
4572 /*
4573 * Throw away the reference to the
4574 * original object, as it won't be mapped.
4575 */
4576
4577 vm_object_deallocate(object);
4578
4579 if (result != KERN_SUCCESS) {
4580 return result;
4581 }
4582
4583 object = new_object;
4584 offset = new_offset;
4585 }
4586
4587 /*
4588 * If non-kernel users want to try to prefault pages, the mapping and prefault
4589 * needs to be atomic.
4590 */
4591 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4592 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4593
4594 #if __arm64__
4595 if (fourk) {
4596 /* map this object in a "4K" pager */
4597 result = vm_map_enter_fourk(target_map,
4598 &map_addr,
4599 map_size,
4600 (vm_map_offset_t) mask,
4601 flags,
4602 vmk_flags,
4603 tag,
4604 object,
4605 offset,
4606 copy,
4607 cur_protection,
4608 max_protection,
4609 inheritance);
4610 } else
4611 #endif /* __arm64__ */
4612 {
4613 result = vm_map_enter(target_map,
4614 &map_addr, map_size,
4615 (vm_map_offset_t)mask,
4616 flags,
4617 vmk_flags,
4618 tag,
4619 object, offset,
4620 copy,
4621 cur_protection, max_protection,
4622 inheritance);
4623 }
4624 if (result != KERN_SUCCESS) {
4625 vm_object_deallocate(object);
4626 }
4627
4628 /*
4629 * Try to prefault, and do not forget to release the vm map lock.
4630 */
4631 if (result == KERN_SUCCESS && try_prefault) {
4632 mach_vm_address_t va = map_addr;
4633 kern_return_t kr = KERN_SUCCESS;
4634 unsigned int i = 0;
4635 int pmap_options;
4636
4637 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4638 if (object->internal) {
4639 pmap_options |= PMAP_OPTIONS_INTERNAL;
4640 }
4641
4642 for (i = 0; i < page_list_count; ++i) {
4643 if (!UPL_VALID_PAGE(page_list, i)) {
4644 if (kernel_prefault) {
4645 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4646 result = KERN_MEMORY_ERROR;
4647 break;
4648 }
4649 } else {
4650 /*
4651 * If this function call failed, we should stop
4652 * trying to optimize, other calls are likely
4653 * going to fail too.
4654 *
4655 * We are not gonna report an error for such
4656 * failure though. That's an optimization, not
4657 * something critical.
4658 */
4659 kr = pmap_enter_options(target_map->pmap,
4660 va, UPL_PHYS_PAGE(page_list, i),
4661 cur_protection, VM_PROT_NONE,
4662 0, TRUE, pmap_options, NULL);
4663 if (kr != KERN_SUCCESS) {
4664 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4665 if (kernel_prefault) {
4666 result = kr;
4667 }
4668 break;
4669 }
4670 OSIncrementAtomic64(&vm_prefault_nb_pages);
4671 }
4672
4673 /* Next virtual address */
4674 va += PAGE_SIZE;
4675 }
4676 if (vmk_flags.vmkf_keep_map_locked) {
4677 vm_map_unlock(target_map);
4678 }
4679 }
4680
4681 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4682 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4683 *address = map_addr + offset_in_mapping;
4684 } else {
4685 *address = map_addr;
4686 }
4687 return result;
4688 }
4689
4690 kern_return_t
4691 vm_map_enter_mem_object(
4692 vm_map_t target_map,
4693 vm_map_offset_t *address,
4694 vm_map_size_t initial_size,
4695 vm_map_offset_t mask,
4696 int flags,
4697 vm_map_kernel_flags_t vmk_flags,
4698 vm_tag_t tag,
4699 ipc_port_t port,
4700 vm_object_offset_t offset,
4701 boolean_t copy,
4702 vm_prot_t cur_protection,
4703 vm_prot_t max_protection,
4704 vm_inherit_t inheritance)
4705 {
4706 kern_return_t ret;
4707
4708 ret = vm_map_enter_mem_object_helper(target_map,
4709 address,
4710 initial_size,
4711 mask,
4712 flags,
4713 vmk_flags,
4714 tag,
4715 port,
4716 offset,
4717 copy,
4718 cur_protection,
4719 max_protection,
4720 inheritance,
4721 NULL,
4722 0);
4723
4724 #if KASAN
4725 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4726 kasan_notify_address(*address, initial_size);
4727 }
4728 #endif
4729
4730 return ret;
4731 }
4732
4733 kern_return_t
4734 vm_map_enter_mem_object_prefault(
4735 vm_map_t target_map,
4736 vm_map_offset_t *address,
4737 vm_map_size_t initial_size,
4738 vm_map_offset_t mask,
4739 int flags,
4740 vm_map_kernel_flags_t vmk_flags,
4741 vm_tag_t tag,
4742 ipc_port_t port,
4743 vm_object_offset_t offset,
4744 vm_prot_t cur_protection,
4745 vm_prot_t max_protection,
4746 upl_page_list_ptr_t page_list,
4747 unsigned int page_list_count)
4748 {
4749 kern_return_t ret;
4750
4751 ret = vm_map_enter_mem_object_helper(target_map,
4752 address,
4753 initial_size,
4754 mask,
4755 flags,
4756 vmk_flags,
4757 tag,
4758 port,
4759 offset,
4760 FALSE,
4761 cur_protection,
4762 max_protection,
4763 VM_INHERIT_DEFAULT,
4764 page_list,
4765 page_list_count);
4766
4767 #if KASAN
4768 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4769 kasan_notify_address(*address, initial_size);
4770 }
4771 #endif
4772
4773 return ret;
4774 }
4775
4776
4777 kern_return_t
4778 vm_map_enter_mem_object_control(
4779 vm_map_t target_map,
4780 vm_map_offset_t *address,
4781 vm_map_size_t initial_size,
4782 vm_map_offset_t mask,
4783 int flags,
4784 vm_map_kernel_flags_t vmk_flags,
4785 vm_tag_t tag,
4786 memory_object_control_t control,
4787 vm_object_offset_t offset,
4788 boolean_t copy,
4789 vm_prot_t cur_protection,
4790 vm_prot_t max_protection,
4791 vm_inherit_t inheritance)
4792 {
4793 vm_map_address_t map_addr;
4794 vm_map_size_t map_size;
4795 vm_object_t object;
4796 vm_object_size_t size;
4797 kern_return_t result;
4798 memory_object_t pager;
4799 vm_prot_t pager_prot;
4800 kern_return_t kr;
4801 #if __arm64__
4802 boolean_t fourk = vmk_flags.vmkf_fourk;
4803 #endif /* __arm64__ */
4804
4805 /*
4806 * Check arguments for validity
4807 */
4808 if ((target_map == VM_MAP_NULL) ||
4809 (cur_protection & ~VM_PROT_ALL) ||
4810 (max_protection & ~VM_PROT_ALL) ||
4811 (inheritance > VM_INHERIT_LAST_VALID) ||
4812 initial_size == 0) {
4813 return KERN_INVALID_ARGUMENT;
4814 }
4815
4816 #if __arm64__
4817 if (fourk) {
4818 map_addr = vm_map_trunc_page(*address,
4819 FOURK_PAGE_MASK);
4820 map_size = vm_map_round_page(initial_size,
4821 FOURK_PAGE_MASK);
4822 } else
4823 #endif /* __arm64__ */
4824 {
4825 map_addr = vm_map_trunc_page(*address,
4826 VM_MAP_PAGE_MASK(target_map));
4827 map_size = vm_map_round_page(initial_size,
4828 VM_MAP_PAGE_MASK(target_map));
4829 }
4830 size = vm_object_round_page(initial_size);
4831
4832 object = memory_object_control_to_vm_object(control);
4833
4834 if (object == VM_OBJECT_NULL) {
4835 return KERN_INVALID_OBJECT;
4836 }
4837
4838 if (object == kernel_object) {
4839 printf("Warning: Attempt to map kernel object"
4840 " by a non-private kernel entity\n");
4841 return KERN_INVALID_OBJECT;
4842 }
4843
4844 vm_object_lock(object);
4845 object->ref_count++;
4846 vm_object_res_reference(object);
4847
4848 /*
4849 * For "named" VM objects, let the pager know that the
4850 * memory object is being mapped. Some pagers need to keep
4851 * track of this, to know when they can reclaim the memory
4852 * object, for example.
4853 * VM calls memory_object_map() for each mapping (specifying
4854 * the protection of each mapping) and calls
4855 * memory_object_last_unmap() when all the mappings are gone.
4856 */
4857 pager_prot = max_protection;
4858 if (copy) {
4859 pager_prot &= ~VM_PROT_WRITE;
4860 }
4861 pager = object->pager;
4862 if (object->named &&
4863 pager != MEMORY_OBJECT_NULL &&
4864 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4865 assert(object->pager_ready);
4866 vm_object_mapping_wait(object, THREAD_UNINT);
4867 vm_object_mapping_begin(object);
4868 vm_object_unlock(object);
4869
4870 kr = memory_object_map(pager, pager_prot);
4871 assert(kr == KERN_SUCCESS);
4872
4873 vm_object_lock(object);
4874 vm_object_mapping_end(object);
4875 }
4876 vm_object_unlock(object);
4877
4878 /*
4879 * Perform the copy if requested
4880 */
4881
4882 if (copy) {
4883 vm_object_t new_object;
4884 vm_object_offset_t new_offset;
4885
4886 result = vm_object_copy_strategically(object, offset, size,
4887 &new_object, &new_offset,
4888 &copy);
4889
4890
4891 if (result == KERN_MEMORY_RESTART_COPY) {
4892 boolean_t success;
4893 boolean_t src_needs_copy;
4894
4895 /*
4896 * XXX
4897 * We currently ignore src_needs_copy.
4898 * This really is the issue of how to make
4899 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4900 * non-kernel users to use. Solution forthcoming.
4901 * In the meantime, since we don't allow non-kernel
4902 * memory managers to specify symmetric copy,
4903 * we won't run into problems here.
4904 */
4905 new_object = object;
4906 new_offset = offset;
4907 success = vm_object_copy_quickly(&new_object,
4908 new_offset, size,
4909 &src_needs_copy,
4910 &copy);
4911 assert(success);
4912 result = KERN_SUCCESS;
4913 }
4914 /*
4915 * Throw away the reference to the
4916 * original object, as it won't be mapped.
4917 */
4918
4919 vm_object_deallocate(object);
4920
4921 if (result != KERN_SUCCESS) {
4922 return result;
4923 }
4924
4925 object = new_object;
4926 offset = new_offset;
4927 }
4928
4929 #if __arm64__
4930 if (fourk) {
4931 result = vm_map_enter_fourk(target_map,
4932 &map_addr,
4933 map_size,
4934 (vm_map_offset_t)mask,
4935 flags,
4936 vmk_flags,
4937 tag,
4938 object, offset,
4939 copy,
4940 cur_protection, max_protection,
4941 inheritance);
4942 } else
4943 #endif /* __arm64__ */
4944 {
4945 result = vm_map_enter(target_map,
4946 &map_addr, map_size,
4947 (vm_map_offset_t)mask,
4948 flags,
4949 vmk_flags,
4950 tag,
4951 object, offset,
4952 copy,
4953 cur_protection, max_protection,
4954 inheritance);
4955 }
4956 if (result != KERN_SUCCESS) {
4957 vm_object_deallocate(object);
4958 }
4959 *address = map_addr;
4960
4961 return result;
4962 }
4963
4964
4965 #if VM_CPM
4966
4967 #ifdef MACH_ASSERT
4968 extern pmap_paddr_t avail_start, avail_end;
4969 #endif
4970
4971 /*
4972 * Allocate memory in the specified map, with the caveat that
4973 * the memory is physically contiguous. This call may fail
4974 * if the system can't find sufficient contiguous memory.
4975 * This call may cause or lead to heart-stopping amounts of
4976 * paging activity.
4977 *
4978 * Memory obtained from this call should be freed in the
4979 * normal way, viz., via vm_deallocate.
4980 */
4981 kern_return_t
4982 vm_map_enter_cpm(
4983 vm_map_t map,
4984 vm_map_offset_t *addr,
4985 vm_map_size_t size,
4986 int flags)
4987 {
4988 vm_object_t cpm_obj;
4989 pmap_t pmap;
4990 vm_page_t m, pages;
4991 kern_return_t kr;
4992 vm_map_offset_t va, start, end, offset;
4993 #if MACH_ASSERT
4994 vm_map_offset_t prev_addr = 0;
4995 #endif /* MACH_ASSERT */
4996
4997 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4998 vm_tag_t tag;
4999
5000 VM_GET_FLAGS_ALIAS(flags, tag);
5001
5002 if (size == 0) {
5003 *addr = 0;
5004 return KERN_SUCCESS;
5005 }
5006 if (anywhere) {
5007 *addr = vm_map_min(map);
5008 } else {
5009 *addr = vm_map_trunc_page(*addr,
5010 VM_MAP_PAGE_MASK(map));
5011 }
5012 size = vm_map_round_page(size,
5013 VM_MAP_PAGE_MASK(map));
5014
5015 /*
5016 * LP64todo - cpm_allocate should probably allow
5017 * allocations of >4GB, but not with the current
5018 * algorithm, so just cast down the size for now.
5019 */
5020 if (size > VM_MAX_ADDRESS) {
5021 return KERN_RESOURCE_SHORTAGE;
5022 }
5023 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5024 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5025 return kr;
5026 }
5027
5028 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5029 assert(cpm_obj != VM_OBJECT_NULL);
5030 assert(cpm_obj->internal);
5031 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5032 assert(cpm_obj->can_persist == FALSE);
5033 assert(cpm_obj->pager_created == FALSE);
5034 assert(cpm_obj->pageout == FALSE);
5035 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5036
5037 /*
5038 * Insert pages into object.
5039 */
5040
5041 vm_object_lock(cpm_obj);
5042 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5043 m = pages;
5044 pages = NEXT_PAGE(m);
5045 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5046
5047 assert(!m->vmp_gobbled);
5048 assert(!m->vmp_wanted);
5049 assert(!m->vmp_pageout);
5050 assert(!m->vmp_tabled);
5051 assert(VM_PAGE_WIRED(m));
5052 assert(m->vmp_busy);
5053 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5054
5055 m->vmp_busy = FALSE;
5056 vm_page_insert(m, cpm_obj, offset);
5057 }
5058 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5059 vm_object_unlock(cpm_obj);
5060
5061 /*
5062 * Hang onto a reference on the object in case a
5063 * multi-threaded application for some reason decides
5064 * to deallocate the portion of the address space into
5065 * which we will insert this object.
5066 *
5067 * Unfortunately, we must insert the object now before
5068 * we can talk to the pmap module about which addresses
5069 * must be wired down. Hence, the race with a multi-
5070 * threaded app.
5071 */
5072 vm_object_reference(cpm_obj);
5073
5074 /*
5075 * Insert object into map.
5076 */
5077
5078 kr = vm_map_enter(
5079 map,
5080 addr,
5081 size,
5082 (vm_map_offset_t)0,
5083 flags,
5084 VM_MAP_KERNEL_FLAGS_NONE,
5085 cpm_obj,
5086 (vm_object_offset_t)0,
5087 FALSE,
5088 VM_PROT_ALL,
5089 VM_PROT_ALL,
5090 VM_INHERIT_DEFAULT);
5091
5092 if (kr != KERN_SUCCESS) {
5093 /*
5094 * A CPM object doesn't have can_persist set,
5095 * so all we have to do is deallocate it to
5096 * free up these pages.
5097 */
5098 assert(cpm_obj->pager_created == FALSE);
5099 assert(cpm_obj->can_persist == FALSE);
5100 assert(cpm_obj->pageout == FALSE);
5101 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5102 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5103 vm_object_deallocate(cpm_obj); /* kill creation ref */
5104 }
5105
5106 /*
5107 * Inform the physical mapping system that the
5108 * range of addresses may not fault, so that
5109 * page tables and such can be locked down as well.
5110 */
5111 start = *addr;
5112 end = start + size;
5113 pmap = vm_map_pmap(map);
5114 pmap_pageable(pmap, start, end, FALSE);
5115
5116 /*
5117 * Enter each page into the pmap, to avoid faults.
5118 * Note that this loop could be coded more efficiently,
5119 * if the need arose, rather than looking up each page
5120 * again.
5121 */
5122 for (offset = 0, va = start; offset < size;
5123 va += PAGE_SIZE, offset += PAGE_SIZE) {
5124 int type_of_fault;
5125
5126 vm_object_lock(cpm_obj);
5127 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5128 assert(m != VM_PAGE_NULL);
5129
5130 vm_page_zero_fill(m);
5131
5132 type_of_fault = DBG_ZERO_FILL_FAULT;
5133
5134 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5135 VM_PAGE_WIRED(m),
5136 FALSE, /* change_wiring */
5137 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5138 FALSE, /* no_cache */
5139 FALSE, /* cs_bypass */
5140 0, /* user_tag */
5141 0, /* pmap_options */
5142 NULL, /* need_retry */
5143 &type_of_fault);
5144
5145 vm_object_unlock(cpm_obj);
5146 }
5147
5148 #if MACH_ASSERT
5149 /*
5150 * Verify ordering in address space.
5151 */
5152 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5153 vm_object_lock(cpm_obj);
5154 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5155 vm_object_unlock(cpm_obj);
5156 if (m == VM_PAGE_NULL) {
5157 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5158 cpm_obj, (uint64_t)offset);
5159 }
5160 assert(m->vmp_tabled);
5161 assert(!m->vmp_busy);
5162 assert(!m->vmp_wanted);
5163 assert(!m->vmp_fictitious);
5164 assert(!m->vmp_private);
5165 assert(!m->vmp_absent);
5166 assert(!m->vmp_error);
5167 assert(!m->vmp_cleaning);
5168 assert(!m->vmp_laundry);
5169 assert(!m->vmp_precious);
5170 assert(!m->vmp_clustered);
5171 if (offset != 0) {
5172 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5173 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5174 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5175 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5176 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5177 panic("vm_allocate_cpm: pages not contig!");
5178 }
5179 }
5180 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5181 }
5182 #endif /* MACH_ASSERT */
5183
5184 vm_object_deallocate(cpm_obj); /* kill extra ref */
5185
5186 return kr;
5187 }
5188
5189
5190 #else /* VM_CPM */
5191
5192 /*
5193 * Interface is defined in all cases, but unless the kernel
5194 * is built explicitly for this option, the interface does
5195 * nothing.
5196 */
5197
5198 kern_return_t
5199 vm_map_enter_cpm(
5200 __unused vm_map_t map,
5201 __unused vm_map_offset_t *addr,
5202 __unused vm_map_size_t size,
5203 __unused int flags)
5204 {
5205 return KERN_FAILURE;
5206 }
5207 #endif /* VM_CPM */
5208
5209 /* Not used without nested pmaps */
5210 #ifndef NO_NESTED_PMAP
5211 /*
5212 * Clip and unnest a portion of a nested submap mapping.
5213 */
5214
5215
5216 static void
5217 vm_map_clip_unnest(
5218 vm_map_t map,
5219 vm_map_entry_t entry,
5220 vm_map_offset_t start_unnest,
5221 vm_map_offset_t end_unnest)
5222 {
5223 vm_map_offset_t old_start_unnest = start_unnest;
5224 vm_map_offset_t old_end_unnest = end_unnest;
5225
5226 assert(entry->is_sub_map);
5227 assert(VME_SUBMAP(entry) != NULL);
5228 assert(entry->use_pmap);
5229
5230 /*
5231 * Query the platform for the optimal unnest range.
5232 * DRK: There's some duplication of effort here, since
5233 * callers may have adjusted the range to some extent. This
5234 * routine was introduced to support 1GiB subtree nesting
5235 * for x86 platforms, which can also nest on 2MiB boundaries
5236 * depending on size/alignment.
5237 */
5238 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5239 assert(VME_SUBMAP(entry)->is_nested_map);
5240 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5241 log_unnest_badness(map,
5242 old_start_unnest,
5243 old_end_unnest,
5244 VME_SUBMAP(entry)->is_nested_map,
5245 (entry->vme_start +
5246 VME_SUBMAP(entry)->lowest_unnestable_start -
5247 VME_OFFSET(entry)));
5248 }
5249
5250 if (entry->vme_start > start_unnest ||
5251 entry->vme_end < end_unnest) {
5252 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5253 "bad nested entry: start=0x%llx end=0x%llx\n",
5254 (long long)start_unnest, (long long)end_unnest,
5255 (long long)entry->vme_start, (long long)entry->vme_end);
5256 }
5257
5258 if (start_unnest > entry->vme_start) {
5259 _vm_map_clip_start(&map->hdr,
5260 entry,
5261 start_unnest);
5262 if (map->holelistenabled) {
5263 vm_map_store_update_first_free(map, NULL, FALSE);
5264 } else {
5265 vm_map_store_update_first_free(map, map->first_free, FALSE);
5266 }
5267 }
5268 if (entry->vme_end > end_unnest) {
5269 _vm_map_clip_end(&map->hdr,
5270 entry,
5271 end_unnest);
5272 if (map->holelistenabled) {
5273 vm_map_store_update_first_free(map, NULL, FALSE);
5274 } else {
5275 vm_map_store_update_first_free(map, map->first_free, FALSE);
5276 }
5277 }
5278
5279 pmap_unnest(map->pmap,
5280 entry->vme_start,
5281 entry->vme_end - entry->vme_start);
5282 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5283 /* clean up parent map/maps */
5284 vm_map_submap_pmap_clean(
5285 map, entry->vme_start,
5286 entry->vme_end,
5287 VME_SUBMAP(entry),
5288 VME_OFFSET(entry));
5289 }
5290 entry->use_pmap = FALSE;
5291 if ((map->pmap != kernel_pmap) &&
5292 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5293 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5294 }
5295 }
5296 #endif /* NO_NESTED_PMAP */
5297
5298 /*
5299 * vm_map_clip_start: [ internal use only ]
5300 *
5301 * Asserts that the given entry begins at or after
5302 * the specified address; if necessary,
5303 * it splits the entry into two.
5304 */
5305 void
5306 vm_map_clip_start(
5307 vm_map_t map,
5308 vm_map_entry_t entry,
5309 vm_map_offset_t startaddr)
5310 {
5311 #ifndef NO_NESTED_PMAP
5312 if (entry->is_sub_map &&
5313 entry->use_pmap &&
5314 startaddr >= entry->vme_start) {
5315 vm_map_offset_t start_unnest, end_unnest;
5316
5317 /*
5318 * Make sure "startaddr" is no longer in a nested range
5319 * before we clip. Unnest only the minimum range the platform
5320 * can handle.
5321 * vm_map_clip_unnest may perform additional adjustments to
5322 * the unnest range.
5323 */
5324 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5325 end_unnest = start_unnest + pmap_nesting_size_min;
5326 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5327 }
5328 #endif /* NO_NESTED_PMAP */
5329 if (startaddr > entry->vme_start) {
5330 if (VME_OBJECT(entry) &&
5331 !entry->is_sub_map &&
5332 VME_OBJECT(entry)->phys_contiguous) {
5333 pmap_remove(map->pmap,
5334 (addr64_t)(entry->vme_start),
5335 (addr64_t)(entry->vme_end));
5336 }
5337 if (entry->vme_atomic) {
5338 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5339 }
5340
5341 DTRACE_VM5(
5342 vm_map_clip_start,
5343 vm_map_t, map,
5344 vm_map_offset_t, entry->vme_start,
5345 vm_map_offset_t, entry->vme_end,
5346 vm_map_offset_t, startaddr,
5347 int, VME_ALIAS(entry));
5348
5349 _vm_map_clip_start(&map->hdr, entry, startaddr);
5350 if (map->holelistenabled) {
5351 vm_map_store_update_first_free(map, NULL, FALSE);
5352 } else {
5353 vm_map_store_update_first_free(map, map->first_free, FALSE);
5354 }
5355 }
5356 }
5357
5358
5359 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5360 MACRO_BEGIN \
5361 if ((startaddr) > (entry)->vme_start) \
5362 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5363 MACRO_END
5364
5365 /*
5366 * This routine is called only when it is known that
5367 * the entry must be split.
5368 */
5369 static void
5370 _vm_map_clip_start(
5371 struct vm_map_header *map_header,
5372 vm_map_entry_t entry,
5373 vm_map_offset_t start)
5374 {
5375 vm_map_entry_t new_entry;
5376
5377 /*
5378 * Split off the front portion --
5379 * note that we must insert the new
5380 * entry BEFORE this one, so that
5381 * this entry has the specified starting
5382 * address.
5383 */
5384
5385 if (entry->map_aligned) {
5386 assert(VM_MAP_PAGE_ALIGNED(start,
5387 VM_MAP_HDR_PAGE_MASK(map_header)));
5388 }
5389
5390 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5391 vm_map_entry_copy_full(new_entry, entry);
5392
5393 new_entry->vme_end = start;
5394 assert(new_entry->vme_start < new_entry->vme_end);
5395 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5396 assert(start < entry->vme_end);
5397 entry->vme_start = start;
5398
5399 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5400
5401 if (entry->is_sub_map) {
5402 vm_map_reference(VME_SUBMAP(new_entry));
5403 } else {
5404 vm_object_reference(VME_OBJECT(new_entry));
5405 }
5406 }
5407
5408
5409 /*
5410 * vm_map_clip_end: [ internal use only ]
5411 *
5412 * Asserts that the given entry ends at or before
5413 * the specified address; if necessary,
5414 * it splits the entry into two.
5415 */
5416 void
5417 vm_map_clip_end(
5418 vm_map_t map,
5419 vm_map_entry_t entry,
5420 vm_map_offset_t endaddr)
5421 {
5422 if (endaddr > entry->vme_end) {
5423 /*
5424 * Within the scope of this clipping, limit "endaddr" to
5425 * the end of this map entry...
5426 */
5427 endaddr = entry->vme_end;
5428 }
5429 #ifndef NO_NESTED_PMAP
5430 if (entry->is_sub_map && entry->use_pmap) {
5431 vm_map_offset_t start_unnest, end_unnest;
5432
5433 /*
5434 * Make sure the range between the start of this entry and
5435 * the new "endaddr" is no longer nested before we clip.
5436 * Unnest only the minimum range the platform can handle.
5437 * vm_map_clip_unnest may perform additional adjustments to
5438 * the unnest range.
5439 */
5440 start_unnest = entry->vme_start;
5441 end_unnest =
5442 (endaddr + pmap_nesting_size_min - 1) &
5443 ~(pmap_nesting_size_min - 1);
5444 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5445 }
5446 #endif /* NO_NESTED_PMAP */
5447 if (endaddr < entry->vme_end) {
5448 if (VME_OBJECT(entry) &&
5449 !entry->is_sub_map &&
5450 VME_OBJECT(entry)->phys_contiguous) {
5451 pmap_remove(map->pmap,
5452 (addr64_t)(entry->vme_start),
5453 (addr64_t)(entry->vme_end));
5454 }
5455 if (entry->vme_atomic) {
5456 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5457 }
5458 DTRACE_VM5(
5459 vm_map_clip_end,
5460 vm_map_t, map,
5461 vm_map_offset_t, entry->vme_start,
5462 vm_map_offset_t, entry->vme_end,
5463 vm_map_offset_t, endaddr,
5464 int, VME_ALIAS(entry));
5465
5466 _vm_map_clip_end(&map->hdr, entry, endaddr);
5467 if (map->holelistenabled) {
5468 vm_map_store_update_first_free(map, NULL, FALSE);
5469 } else {
5470 vm_map_store_update_first_free(map, map->first_free, FALSE);
5471 }
5472 }
5473 }
5474
5475
5476 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5477 MACRO_BEGIN \
5478 if ((endaddr) < (entry)->vme_end) \
5479 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5480 MACRO_END
5481
5482 /*
5483 * This routine is called only when it is known that
5484 * the entry must be split.
5485 */
5486 static void
5487 _vm_map_clip_end(
5488 struct vm_map_header *map_header,
5489 vm_map_entry_t entry,
5490 vm_map_offset_t end)
5491 {
5492 vm_map_entry_t new_entry;
5493
5494 /*
5495 * Create a new entry and insert it
5496 * AFTER the specified entry
5497 */
5498
5499 if (entry->map_aligned) {
5500 assert(VM_MAP_PAGE_ALIGNED(end,
5501 VM_MAP_HDR_PAGE_MASK(map_header)));
5502 }
5503
5504 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5505 vm_map_entry_copy_full(new_entry, entry);
5506
5507 assert(entry->vme_start < end);
5508 new_entry->vme_start = entry->vme_end = end;
5509 VME_OFFSET_SET(new_entry,
5510 VME_OFFSET(new_entry) + (end - entry->vme_start));
5511 assert(new_entry->vme_start < new_entry->vme_end);
5512
5513 _vm_map_store_entry_link(map_header, entry, new_entry);
5514
5515 if (entry->is_sub_map) {
5516 vm_map_reference(VME_SUBMAP(new_entry));
5517 } else {
5518 vm_object_reference(VME_OBJECT(new_entry));
5519 }
5520 }
5521
5522
5523 /*
5524 * VM_MAP_RANGE_CHECK: [ internal use only ]
5525 *
5526 * Asserts that the starting and ending region
5527 * addresses fall within the valid range of the map.
5528 */
5529 #define VM_MAP_RANGE_CHECK(map, start, end) \
5530 MACRO_BEGIN \
5531 if (start < vm_map_min(map)) \
5532 start = vm_map_min(map); \
5533 if (end > vm_map_max(map)) \
5534 end = vm_map_max(map); \
5535 if (start > end) \
5536 start = end; \
5537 MACRO_END
5538
5539 /*
5540 * vm_map_range_check: [ internal use only ]
5541 *
5542 * Check that the region defined by the specified start and
5543 * end addresses are wholly contained within a single map
5544 * entry or set of adjacent map entries of the spacified map,
5545 * i.e. the specified region contains no unmapped space.
5546 * If any or all of the region is unmapped, FALSE is returned.
5547 * Otherwise, TRUE is returned and if the output argument 'entry'
5548 * is not NULL it points to the map entry containing the start
5549 * of the region.
5550 *
5551 * The map is locked for reading on entry and is left locked.
5552 */
5553 static boolean_t
5554 vm_map_range_check(
5555 vm_map_t map,
5556 vm_map_offset_t start,
5557 vm_map_offset_t end,
5558 vm_map_entry_t *entry)
5559 {
5560 vm_map_entry_t cur;
5561 vm_map_offset_t prev;
5562
5563 /*
5564 * Basic sanity checks first
5565 */
5566 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5567 return FALSE;
5568 }
5569
5570 /*
5571 * Check first if the region starts within a valid
5572 * mapping for the map.
5573 */
5574 if (!vm_map_lookup_entry(map, start, &cur)) {
5575 return FALSE;
5576 }
5577
5578 /*
5579 * Optimize for the case that the region is contained
5580 * in a single map entry.
5581 */
5582 if (entry != (vm_map_entry_t *) NULL) {
5583 *entry = cur;
5584 }
5585 if (end <= cur->vme_end) {
5586 return TRUE;
5587 }
5588
5589 /*
5590 * If the region is not wholly contained within a
5591 * single entry, walk the entries looking for holes.
5592 */
5593 prev = cur->vme_end;
5594 cur = cur->vme_next;
5595 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5596 if (end <= cur->vme_end) {
5597 return TRUE;
5598 }
5599 prev = cur->vme_end;
5600 cur = cur->vme_next;
5601 }
5602 return FALSE;
5603 }
5604
5605 /*
5606 * vm_map_submap: [ kernel use only ]
5607 *
5608 * Mark the given range as handled by a subordinate map.
5609 *
5610 * This range must have been created with vm_map_find using
5611 * the vm_submap_object, and no other operations may have been
5612 * performed on this range prior to calling vm_map_submap.
5613 *
5614 * Only a limited number of operations can be performed
5615 * within this rage after calling vm_map_submap:
5616 * vm_fault
5617 * [Don't try vm_map_copyin!]
5618 *
5619 * To remove a submapping, one must first remove the
5620 * range from the superior map, and then destroy the
5621 * submap (if desired). [Better yet, don't try it.]
5622 */
5623 kern_return_t
5624 vm_map_submap(
5625 vm_map_t map,
5626 vm_map_offset_t start,
5627 vm_map_offset_t end,
5628 vm_map_t submap,
5629 vm_map_offset_t offset,
5630 #ifdef NO_NESTED_PMAP
5631 __unused
5632 #endif /* NO_NESTED_PMAP */
5633 boolean_t use_pmap)
5634 {
5635 vm_map_entry_t entry;
5636 kern_return_t result = KERN_INVALID_ARGUMENT;
5637 vm_object_t object;
5638
5639 vm_map_lock(map);
5640
5641 if (!vm_map_lookup_entry(map, start, &entry)) {
5642 entry = entry->vme_next;
5643 }
5644
5645 if (entry == vm_map_to_entry(map) ||
5646 entry->is_sub_map) {
5647 vm_map_unlock(map);
5648 return KERN_INVALID_ARGUMENT;
5649 }
5650
5651 vm_map_clip_start(map, entry, start);
5652 vm_map_clip_end(map, entry, end);
5653
5654 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5655 (!entry->is_sub_map) &&
5656 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5657 (object->resident_page_count == 0) &&
5658 (object->copy == VM_OBJECT_NULL) &&
5659 (object->shadow == VM_OBJECT_NULL) &&
5660 (!object->pager_created)) {
5661 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5662 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5663 vm_object_deallocate(object);
5664 entry->is_sub_map = TRUE;
5665 entry->use_pmap = FALSE;
5666 VME_SUBMAP_SET(entry, submap);
5667 vm_map_reference(submap);
5668 if (submap->mapped_in_other_pmaps == FALSE &&
5669 vm_map_pmap(submap) != PMAP_NULL &&
5670 vm_map_pmap(submap) != vm_map_pmap(map)) {
5671 /*
5672 * This submap is being mapped in a map
5673 * that uses a different pmap.
5674 * Set its "mapped_in_other_pmaps" flag
5675 * to indicate that we now need to
5676 * remove mappings from all pmaps rather
5677 * than just the submap's pmap.
5678 */
5679 submap->mapped_in_other_pmaps = TRUE;
5680 }
5681
5682 #ifndef NO_NESTED_PMAP
5683 if (use_pmap) {
5684 /* nest if platform code will allow */
5685 if (submap->pmap == NULL) {
5686 ledger_t ledger = map->pmap->ledger;
5687 submap->pmap = pmap_create_options(ledger,
5688 (vm_map_size_t) 0, 0);
5689 if (submap->pmap == PMAP_NULL) {
5690 vm_map_unlock(map);
5691 return KERN_NO_SPACE;
5692 }
5693 #if defined(__arm__) || defined(__arm64__)
5694 pmap_set_nested(submap->pmap);
5695 #endif
5696 }
5697 result = pmap_nest(map->pmap,
5698 (VME_SUBMAP(entry))->pmap,
5699 (addr64_t)start,
5700 (addr64_t)start,
5701 (uint64_t)(end - start));
5702 if (result) {
5703 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5704 }
5705 entry->use_pmap = TRUE;
5706 }
5707 #else /* NO_NESTED_PMAP */
5708 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5709 #endif /* NO_NESTED_PMAP */
5710 result = KERN_SUCCESS;
5711 }
5712 vm_map_unlock(map);
5713
5714 return result;
5715 }
5716
5717 /*
5718 * vm_map_protect:
5719 *
5720 * Sets the protection of the specified address
5721 * region in the target map. If "set_max" is
5722 * specified, the maximum protection is to be set;
5723 * otherwise, only the current protection is affected.
5724 */
5725 kern_return_t
5726 vm_map_protect(
5727 vm_map_t map,
5728 vm_map_offset_t start,
5729 vm_map_offset_t end,
5730 vm_prot_t new_prot,
5731 boolean_t set_max)
5732 {
5733 vm_map_entry_t current;
5734 vm_map_offset_t prev;
5735 vm_map_entry_t entry;
5736 vm_prot_t new_max;
5737 int pmap_options = 0;
5738 kern_return_t kr;
5739
5740 if (new_prot & VM_PROT_COPY) {
5741 vm_map_offset_t new_start;
5742 vm_prot_t cur_prot, max_prot;
5743 vm_map_kernel_flags_t kflags;
5744
5745 /* LP64todo - see below */
5746 if (start >= map->max_offset) {
5747 return KERN_INVALID_ADDRESS;
5748 }
5749
5750 #if VM_PROTECT_WX_FAIL
5751 if ((new_prot & VM_PROT_EXECUTE) &&
5752 map != kernel_map &&
5753 cs_process_enforcement(NULL)) {
5754 DTRACE_VM3(cs_wx,
5755 uint64_t, (uint64_t) start,
5756 uint64_t, (uint64_t) end,
5757 vm_prot_t, new_prot);
5758 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5759 proc_selfpid(),
5760 (current_task()->bsd_info
5761 ? proc_name_address(current_task()->bsd_info)
5762 : "?"),
5763 __FUNCTION__);
5764 return KERN_PROTECTION_FAILURE;
5765 }
5766 #endif /* VM_PROTECT_WX_FAIL */
5767
5768 /*
5769 * Let vm_map_remap_extract() know that it will need to:
5770 * + make a copy of the mapping
5771 * + add VM_PROT_WRITE to the max protections
5772 * + remove any protections that are no longer allowed from the
5773 * max protections (to avoid any WRITE/EXECUTE conflict, for
5774 * example).
5775 * Note that "max_prot" is an IN/OUT parameter only for this
5776 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5777 * only.
5778 */
5779 max_prot = new_prot & VM_PROT_ALL;
5780 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5781 kflags.vmkf_remap_prot_copy = TRUE;
5782 kflags.vmkf_overwrite_immutable = TRUE;
5783 new_start = start;
5784 kr = vm_map_remap(map,
5785 &new_start,
5786 end - start,
5787 0, /* mask */
5788 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5789 kflags,
5790 0,
5791 map,
5792 start,
5793 TRUE, /* copy-on-write remapping! */
5794 &cur_prot,
5795 &max_prot,
5796 VM_INHERIT_DEFAULT);
5797 if (kr != KERN_SUCCESS) {
5798 return kr;
5799 }
5800 new_prot &= ~VM_PROT_COPY;
5801 }
5802
5803 vm_map_lock(map);
5804
5805 /* LP64todo - remove this check when vm_map_commpage64()
5806 * no longer has to stuff in a map_entry for the commpage
5807 * above the map's max_offset.
5808 */
5809 if (start >= map->max_offset) {
5810 vm_map_unlock(map);
5811 return KERN_INVALID_ADDRESS;
5812 }
5813
5814 while (1) {
5815 /*
5816 * Lookup the entry. If it doesn't start in a valid
5817 * entry, return an error.
5818 */
5819 if (!vm_map_lookup_entry(map, start, &entry)) {
5820 vm_map_unlock(map);
5821 return KERN_INVALID_ADDRESS;
5822 }
5823
5824 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5825 start = SUPERPAGE_ROUND_DOWN(start);
5826 continue;
5827 }
5828 break;
5829 }
5830 if (entry->superpage_size) {
5831 end = SUPERPAGE_ROUND_UP(end);
5832 }
5833
5834 /*
5835 * Make a first pass to check for protection and address
5836 * violations.
5837 */
5838
5839 current = entry;
5840 prev = current->vme_start;
5841 while ((current != vm_map_to_entry(map)) &&
5842 (current->vme_start < end)) {
5843 /*
5844 * If there is a hole, return an error.
5845 */
5846 if (current->vme_start != prev) {
5847 vm_map_unlock(map);
5848 return KERN_INVALID_ADDRESS;
5849 }
5850
5851 new_max = current->max_protection;
5852 if ((new_prot & new_max) != new_prot) {
5853 vm_map_unlock(map);
5854 return KERN_PROTECTION_FAILURE;
5855 }
5856
5857 if ((new_prot & VM_PROT_WRITE) &&
5858 (new_prot & VM_PROT_EXECUTE) &&
5859 #if !CONFIG_EMBEDDED
5860 map != kernel_map &&
5861 cs_process_enforcement(NULL) &&
5862 #endif /* !CONFIG_EMBEDDED */
5863 !(current->used_for_jit)) {
5864 DTRACE_VM3(cs_wx,
5865 uint64_t, (uint64_t) current->vme_start,
5866 uint64_t, (uint64_t) current->vme_end,
5867 vm_prot_t, new_prot);
5868 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5869 proc_selfpid(),
5870 (current_task()->bsd_info
5871 ? proc_name_address(current_task()->bsd_info)
5872 : "?"),
5873 __FUNCTION__);
5874 new_prot &= ~VM_PROT_EXECUTE;
5875 #if VM_PROTECT_WX_FAIL
5876 vm_map_unlock(map);
5877 return KERN_PROTECTION_FAILURE;
5878 #endif /* VM_PROTECT_WX_FAIL */
5879 }
5880
5881 /*
5882 * If the task has requested executable lockdown,
5883 * deny both:
5884 * - adding executable protections OR
5885 * - adding write protections to an existing executable mapping.
5886 */
5887 if (map->map_disallow_new_exec == TRUE) {
5888 if ((new_prot & VM_PROT_EXECUTE) ||
5889 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5890 vm_map_unlock(map);
5891 return KERN_PROTECTION_FAILURE;
5892 }
5893 }
5894
5895 prev = current->vme_end;
5896 current = current->vme_next;
5897 }
5898
5899 #if __arm64__
5900 if (end > prev &&
5901 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5902 vm_map_entry_t prev_entry;
5903
5904 prev_entry = current->vme_prev;
5905 if (prev_entry != vm_map_to_entry(map) &&
5906 !prev_entry->map_aligned &&
5907 (vm_map_round_page(prev_entry->vme_end,
5908 VM_MAP_PAGE_MASK(map))
5909 == end)) {
5910 /*
5911 * The last entry in our range is not "map-aligned"
5912 * but it would have reached all the way to "end"
5913 * if it had been map-aligned, so this is not really
5914 * a hole in the range and we can proceed.
5915 */
5916 prev = end;
5917 }
5918 }
5919 #endif /* __arm64__ */
5920
5921 if (end > prev) {
5922 vm_map_unlock(map);
5923 return KERN_INVALID_ADDRESS;
5924 }
5925
5926 /*
5927 * Go back and fix up protections.
5928 * Clip to start here if the range starts within
5929 * the entry.
5930 */
5931
5932 current = entry;
5933 if (current != vm_map_to_entry(map)) {
5934 /* clip and unnest if necessary */
5935 vm_map_clip_start(map, current, start);
5936 }
5937
5938 while ((current != vm_map_to_entry(map)) &&
5939 (current->vme_start < end)) {
5940 vm_prot_t old_prot;
5941
5942 vm_map_clip_end(map, current, end);
5943
5944 if (current->is_sub_map) {
5945 /* clipping did unnest if needed */
5946 assert(!current->use_pmap);
5947 }
5948
5949 old_prot = current->protection;
5950
5951 if (set_max) {
5952 current->max_protection = new_prot;
5953 current->protection = new_prot & old_prot;
5954 } else {
5955 current->protection = new_prot;
5956 }
5957
5958 /*
5959 * Update physical map if necessary.
5960 * If the request is to turn off write protection,
5961 * we won't do it for real (in pmap). This is because
5962 * it would cause copy-on-write to fail. We've already
5963 * set, the new protection in the map, so if a
5964 * write-protect fault occurred, it will be fixed up
5965 * properly, COW or not.
5966 */
5967 if (current->protection != old_prot) {
5968 /* Look one level in we support nested pmaps */
5969 /* from mapped submaps which are direct entries */
5970 /* in our map */
5971
5972 vm_prot_t prot;
5973
5974 prot = current->protection;
5975 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5976 prot &= ~VM_PROT_WRITE;
5977 } else {
5978 assert(!VME_OBJECT(current)->code_signed);
5979 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5980 }
5981
5982 if (override_nx(map, VME_ALIAS(current)) && prot) {
5983 prot |= VM_PROT_EXECUTE;
5984 }
5985
5986 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5987 if (!(old_prot & VM_PROT_EXECUTE) &&
5988 (prot & VM_PROT_EXECUTE) &&
5989 panic_on_unsigned_execute &&
5990 (proc_selfcsflags() & CS_KILL)) {
5991 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5992 }
5993 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5994
5995 if (pmap_has_prot_policy(prot)) {
5996 if (current->wired_count) {
5997 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5998 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5999 }
6000
6001 /* If the pmap layer cares about this
6002 * protection type, force a fault for
6003 * each page so that vm_fault will
6004 * repopulate the page with the full
6005 * set of protections.
6006 */
6007 /*
6008 * TODO: We don't seem to need this,
6009 * but this is due to an internal
6010 * implementation detail of
6011 * pmap_protect. Do we want to rely
6012 * on this?
6013 */
6014 prot = VM_PROT_NONE;
6015 }
6016
6017 if (current->is_sub_map && current->use_pmap) {
6018 pmap_protect(VME_SUBMAP(current)->pmap,
6019 current->vme_start,
6020 current->vme_end,
6021 prot);
6022 } else {
6023 if (prot & VM_PROT_WRITE) {
6024 if (VME_OBJECT(current) == compressor_object) {
6025 /*
6026 * For write requests on the
6027 * compressor, we wil ask the
6028 * pmap layer to prevent us from
6029 * taking a write fault when we
6030 * attempt to access the mapping
6031 * next.
6032 */
6033 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6034 }
6035 }
6036
6037 pmap_protect_options(map->pmap,
6038 current->vme_start,
6039 current->vme_end,
6040 prot,
6041 pmap_options,
6042 NULL);
6043 }
6044 }
6045 current = current->vme_next;
6046 }
6047
6048 current = entry;
6049 while ((current != vm_map_to_entry(map)) &&
6050 (current->vme_start <= end)) {
6051 vm_map_simplify_entry(map, current);
6052 current = current->vme_next;
6053 }
6054
6055 vm_map_unlock(map);
6056 return KERN_SUCCESS;
6057 }
6058
6059 /*
6060 * vm_map_inherit:
6061 *
6062 * Sets the inheritance of the specified address
6063 * range in the target map. Inheritance
6064 * affects how the map will be shared with
6065 * child maps at the time of vm_map_fork.
6066 */
6067 kern_return_t
6068 vm_map_inherit(
6069 vm_map_t map,
6070 vm_map_offset_t start,
6071 vm_map_offset_t end,
6072 vm_inherit_t new_inheritance)
6073 {
6074 vm_map_entry_t entry;
6075 vm_map_entry_t temp_entry;
6076
6077 vm_map_lock(map);
6078
6079 VM_MAP_RANGE_CHECK(map, start, end);
6080
6081 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6082 entry = temp_entry;
6083 } else {
6084 temp_entry = temp_entry->vme_next;
6085 entry = temp_entry;
6086 }
6087
6088 /* first check entire range for submaps which can't support the */
6089 /* given inheritance. */
6090 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6091 if (entry->is_sub_map) {
6092 if (new_inheritance == VM_INHERIT_COPY) {
6093 vm_map_unlock(map);
6094 return KERN_INVALID_ARGUMENT;
6095 }
6096 }
6097
6098 entry = entry->vme_next;
6099 }
6100
6101 entry = temp_entry;
6102 if (entry != vm_map_to_entry(map)) {
6103 /* clip and unnest if necessary */
6104 vm_map_clip_start(map, entry, start);
6105 }
6106
6107 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6108 vm_map_clip_end(map, entry, end);
6109 if (entry->is_sub_map) {
6110 /* clip did unnest if needed */
6111 assert(!entry->use_pmap);
6112 }
6113
6114 entry->inheritance = new_inheritance;
6115
6116 entry = entry->vme_next;
6117 }
6118
6119 vm_map_unlock(map);
6120 return KERN_SUCCESS;
6121 }
6122
6123 /*
6124 * Update the accounting for the amount of wired memory in this map. If the user has
6125 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6126 */
6127
6128 static kern_return_t
6129 add_wire_counts(
6130 vm_map_t map,
6131 vm_map_entry_t entry,
6132 boolean_t user_wire)
6133 {
6134 vm_map_size_t size;
6135
6136 if (user_wire) {
6137 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6138
6139 /*
6140 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6141 * this map entry.
6142 */
6143
6144 if (entry->user_wired_count == 0) {
6145 size = entry->vme_end - entry->vme_start;
6146
6147 /*
6148 * Since this is the first time the user is wiring this map entry, check to see if we're
6149 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6150 * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value. There is also
6151 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6152 * limit, then we fail.
6153 */
6154
6155 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6156 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6157 return KERN_RESOURCE_SHORTAGE;
6158 }
6159
6160 /*
6161 * The first time the user wires an entry, we also increment the wired_count and add this to
6162 * the total that has been wired in the map.
6163 */
6164
6165 if (entry->wired_count >= MAX_WIRE_COUNT) {
6166 return KERN_FAILURE;
6167 }
6168
6169 entry->wired_count++;
6170 map->user_wire_size += size;
6171 }
6172
6173 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6174 return KERN_FAILURE;
6175 }
6176
6177 entry->user_wired_count++;
6178 } else {
6179 /*
6180 * The kernel's wiring the memory. Just bump the count and continue.
6181 */
6182
6183 if (entry->wired_count >= MAX_WIRE_COUNT) {
6184 panic("vm_map_wire: too many wirings");
6185 }
6186
6187 entry->wired_count++;
6188 }
6189
6190 return KERN_SUCCESS;
6191 }
6192
6193 /*
6194 * Update the memory wiring accounting now that the given map entry is being unwired.
6195 */
6196
6197 static void
6198 subtract_wire_counts(
6199 vm_map_t map,
6200 vm_map_entry_t entry,
6201 boolean_t user_wire)
6202 {
6203 if (user_wire) {
6204 /*
6205 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6206 */
6207
6208 if (entry->user_wired_count == 1) {
6209 /*
6210 * We're removing the last user wire reference. Decrement the wired_count and the total
6211 * user wired memory for this map.
6212 */
6213
6214 assert(entry->wired_count >= 1);
6215 entry->wired_count--;
6216 map->user_wire_size -= entry->vme_end - entry->vme_start;
6217 }
6218
6219 assert(entry->user_wired_count >= 1);
6220 entry->user_wired_count--;
6221 } else {
6222 /*
6223 * The kernel is unwiring the memory. Just update the count.
6224 */
6225
6226 assert(entry->wired_count >= 1);
6227 entry->wired_count--;
6228 }
6229 }
6230
6231 int cs_executable_wire = 0;
6232
6233 /*
6234 * vm_map_wire:
6235 *
6236 * Sets the pageability of the specified address range in the
6237 * target map as wired. Regions specified as not pageable require
6238 * locked-down physical memory and physical page maps. The
6239 * access_type variable indicates types of accesses that must not
6240 * generate page faults. This is checked against protection of
6241 * memory being locked-down.
6242 *
6243 * The map must not be locked, but a reference must remain to the
6244 * map throughout the call.
6245 */
6246 static kern_return_t
6247 vm_map_wire_nested(
6248 vm_map_t map,
6249 vm_map_offset_t start,
6250 vm_map_offset_t end,
6251 vm_prot_t caller_prot,
6252 vm_tag_t tag,
6253 boolean_t user_wire,
6254 pmap_t map_pmap,
6255 vm_map_offset_t pmap_addr,
6256 ppnum_t *physpage_p)
6257 {
6258 vm_map_entry_t entry;
6259 vm_prot_t access_type;
6260 struct vm_map_entry *first_entry, tmp_entry;
6261 vm_map_t real_map;
6262 vm_map_offset_t s, e;
6263 kern_return_t rc;
6264 boolean_t need_wakeup;
6265 boolean_t main_map = FALSE;
6266 wait_interrupt_t interruptible_state;
6267 thread_t cur_thread;
6268 unsigned int last_timestamp;
6269 vm_map_size_t size;
6270 boolean_t wire_and_extract;
6271
6272 access_type = (caller_prot & VM_PROT_ALL);
6273
6274 wire_and_extract = FALSE;
6275 if (physpage_p != NULL) {
6276 /*
6277 * The caller wants the physical page number of the
6278 * wired page. We return only one physical page number
6279 * so this works for only one page at a time.
6280 */
6281 if ((end - start) != PAGE_SIZE) {
6282 return KERN_INVALID_ARGUMENT;
6283 }
6284 wire_and_extract = TRUE;
6285 *physpage_p = 0;
6286 }
6287
6288 vm_map_lock(map);
6289 if (map_pmap == NULL) {
6290 main_map = TRUE;
6291 }
6292 last_timestamp = map->timestamp;
6293
6294 VM_MAP_RANGE_CHECK(map, start, end);
6295 assert(page_aligned(start));
6296 assert(page_aligned(end));
6297 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6298 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6299 if (start == end) {
6300 /* We wired what the caller asked for, zero pages */
6301 vm_map_unlock(map);
6302 return KERN_SUCCESS;
6303 }
6304
6305 need_wakeup = FALSE;
6306 cur_thread = current_thread();
6307
6308 s = start;
6309 rc = KERN_SUCCESS;
6310
6311 if (vm_map_lookup_entry(map, s, &first_entry)) {
6312 entry = first_entry;
6313 /*
6314 * vm_map_clip_start will be done later.
6315 * We don't want to unnest any nested submaps here !
6316 */
6317 } else {
6318 /* Start address is not in map */
6319 rc = KERN_INVALID_ADDRESS;
6320 goto done;
6321 }
6322
6323 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6324 /*
6325 * At this point, we have wired from "start" to "s".
6326 * We still need to wire from "s" to "end".
6327 *
6328 * "entry" hasn't been clipped, so it could start before "s"
6329 * and/or end after "end".
6330 */
6331
6332 /* "e" is how far we want to wire in this entry */
6333 e = entry->vme_end;
6334 if (e > end) {
6335 e = end;
6336 }
6337
6338 /*
6339 * If another thread is wiring/unwiring this entry then
6340 * block after informing other thread to wake us up.
6341 */
6342 if (entry->in_transition) {
6343 wait_result_t wait_result;
6344
6345 /*
6346 * We have not clipped the entry. Make sure that
6347 * the start address is in range so that the lookup
6348 * below will succeed.
6349 * "s" is the current starting point: we've already
6350 * wired from "start" to "s" and we still have
6351 * to wire from "s" to "end".
6352 */
6353
6354 entry->needs_wakeup = TRUE;
6355
6356 /*
6357 * wake up anybody waiting on entries that we have
6358 * already wired.
6359 */
6360 if (need_wakeup) {
6361 vm_map_entry_wakeup(map);
6362 need_wakeup = FALSE;
6363 }
6364 /*
6365 * User wiring is interruptible
6366 */
6367 wait_result = vm_map_entry_wait(map,
6368 (user_wire) ? THREAD_ABORTSAFE :
6369 THREAD_UNINT);
6370 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6371 /*
6372 * undo the wirings we have done so far
6373 * We do not clear the needs_wakeup flag,
6374 * because we cannot tell if we were the
6375 * only one waiting.
6376 */
6377 rc = KERN_FAILURE;
6378 goto done;
6379 }
6380
6381 /*
6382 * Cannot avoid a lookup here. reset timestamp.
6383 */
6384 last_timestamp = map->timestamp;
6385
6386 /*
6387 * The entry could have been clipped, look it up again.
6388 * Worse that can happen is, it may not exist anymore.
6389 */
6390 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6391 /*
6392 * User: undo everything upto the previous
6393 * entry. let vm_map_unwire worry about
6394 * checking the validity of the range.
6395 */
6396 rc = KERN_FAILURE;
6397 goto done;
6398 }
6399 entry = first_entry;
6400 continue;
6401 }
6402
6403 if (entry->is_sub_map) {
6404 vm_map_offset_t sub_start;
6405 vm_map_offset_t sub_end;
6406 vm_map_offset_t local_start;
6407 vm_map_offset_t local_end;
6408 pmap_t pmap;
6409
6410 if (wire_and_extract) {
6411 /*
6412 * Wiring would result in copy-on-write
6413 * which would not be compatible with
6414 * the sharing we have with the original
6415 * provider of this memory.
6416 */
6417 rc = KERN_INVALID_ARGUMENT;
6418 goto done;
6419 }
6420
6421 vm_map_clip_start(map, entry, s);
6422 vm_map_clip_end(map, entry, end);
6423
6424 sub_start = VME_OFFSET(entry);
6425 sub_end = entry->vme_end;
6426 sub_end += VME_OFFSET(entry) - entry->vme_start;
6427
6428 local_end = entry->vme_end;
6429 if (map_pmap == NULL) {
6430 vm_object_t object;
6431 vm_object_offset_t offset;
6432 vm_prot_t prot;
6433 boolean_t wired;
6434 vm_map_entry_t local_entry;
6435 vm_map_version_t version;
6436 vm_map_t lookup_map;
6437
6438 if (entry->use_pmap) {
6439 pmap = VME_SUBMAP(entry)->pmap;
6440 /* ppc implementation requires that */
6441 /* submaps pmap address ranges line */
6442 /* up with parent map */
6443 #ifdef notdef
6444 pmap_addr = sub_start;
6445 #endif
6446 pmap_addr = s;
6447 } else {
6448 pmap = map->pmap;
6449 pmap_addr = s;
6450 }
6451
6452 if (entry->wired_count) {
6453 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6454 goto done;
6455 }
6456
6457 /*
6458 * The map was not unlocked:
6459 * no need to goto re-lookup.
6460 * Just go directly to next entry.
6461 */
6462 entry = entry->vme_next;
6463 s = entry->vme_start;
6464 continue;
6465 }
6466
6467 /* call vm_map_lookup_locked to */
6468 /* cause any needs copy to be */
6469 /* evaluated */
6470 local_start = entry->vme_start;
6471 lookup_map = map;
6472 vm_map_lock_write_to_read(map);
6473 if (vm_map_lookup_locked(
6474 &lookup_map, local_start,
6475 access_type | VM_PROT_COPY,
6476 OBJECT_LOCK_EXCLUSIVE,
6477 &version, &object,
6478 &offset, &prot, &wired,
6479 NULL,
6480 &real_map)) {
6481 vm_map_unlock_read(lookup_map);
6482 assert(map_pmap == NULL);
6483 vm_map_unwire(map, start,
6484 s, user_wire);
6485 return KERN_FAILURE;
6486 }
6487 vm_object_unlock(object);
6488 if (real_map != lookup_map) {
6489 vm_map_unlock(real_map);
6490 }
6491 vm_map_unlock_read(lookup_map);
6492 vm_map_lock(map);
6493
6494 /* we unlocked, so must re-lookup */
6495 if (!vm_map_lookup_entry(map,
6496 local_start,
6497 &local_entry)) {
6498 rc = KERN_FAILURE;
6499 goto done;
6500 }
6501
6502 /*
6503 * entry could have been "simplified",
6504 * so re-clip
6505 */
6506 entry = local_entry;
6507 assert(s == local_start);
6508 vm_map_clip_start(map, entry, s);
6509 vm_map_clip_end(map, entry, end);
6510 /* re-compute "e" */
6511 e = entry->vme_end;
6512 if (e > end) {
6513 e = end;
6514 }
6515
6516 /* did we have a change of type? */
6517 if (!entry->is_sub_map) {
6518 last_timestamp = map->timestamp;
6519 continue;
6520 }
6521 } else {
6522 local_start = entry->vme_start;
6523 pmap = map_pmap;
6524 }
6525
6526 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6527 goto done;
6528 }
6529
6530 entry->in_transition = TRUE;
6531
6532 vm_map_unlock(map);
6533 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6534 sub_start, sub_end,
6535 caller_prot, tag,
6536 user_wire, pmap, pmap_addr,
6537 NULL);
6538 vm_map_lock(map);
6539
6540 /*
6541 * Find the entry again. It could have been clipped
6542 * after we unlocked the map.
6543 */
6544 if (!vm_map_lookup_entry(map, local_start,
6545 &first_entry)) {
6546 panic("vm_map_wire: re-lookup failed");
6547 }
6548 entry = first_entry;
6549
6550 assert(local_start == s);
6551 /* re-compute "e" */
6552 e = entry->vme_end;
6553 if (e > end) {
6554 e = end;
6555 }
6556
6557 last_timestamp = map->timestamp;
6558 while ((entry != vm_map_to_entry(map)) &&
6559 (entry->vme_start < e)) {
6560 assert(entry->in_transition);
6561 entry->in_transition = FALSE;
6562 if (entry->needs_wakeup) {
6563 entry->needs_wakeup = FALSE;
6564 need_wakeup = TRUE;
6565 }
6566 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6567 subtract_wire_counts(map, entry, user_wire);
6568 }
6569 entry = entry->vme_next;
6570 }
6571 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6572 goto done;
6573 }
6574
6575 /* no need to relookup again */
6576 s = entry->vme_start;
6577 continue;
6578 }
6579
6580 /*
6581 * If this entry is already wired then increment
6582 * the appropriate wire reference count.
6583 */
6584 if (entry->wired_count) {
6585 if ((entry->protection & access_type) != access_type) {
6586 /* found a protection problem */
6587
6588 /*
6589 * XXX FBDP
6590 * We should always return an error
6591 * in this case but since we didn't
6592 * enforce it before, let's do
6593 * it only for the new "wire_and_extract"
6594 * code path for now...
6595 */
6596 if (wire_and_extract) {
6597 rc = KERN_PROTECTION_FAILURE;
6598 goto done;
6599 }
6600 }
6601
6602 /*
6603 * entry is already wired down, get our reference
6604 * after clipping to our range.
6605 */
6606 vm_map_clip_start(map, entry, s);
6607 vm_map_clip_end(map, entry, end);
6608
6609 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6610 goto done;
6611 }
6612
6613 if (wire_and_extract) {
6614 vm_object_t object;
6615 vm_object_offset_t offset;
6616 vm_page_t m;
6617
6618 /*
6619 * We don't have to "wire" the page again
6620 * bit we still have to "extract" its
6621 * physical page number, after some sanity
6622 * checks.
6623 */
6624 assert((entry->vme_end - entry->vme_start)
6625 == PAGE_SIZE);
6626 assert(!entry->needs_copy);
6627 assert(!entry->is_sub_map);
6628 assert(VME_OBJECT(entry));
6629 if (((entry->vme_end - entry->vme_start)
6630 != PAGE_SIZE) ||
6631 entry->needs_copy ||
6632 entry->is_sub_map ||
6633 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6634 rc = KERN_INVALID_ARGUMENT;
6635 goto done;
6636 }
6637
6638 object = VME_OBJECT(entry);
6639 offset = VME_OFFSET(entry);
6640 /* need exclusive lock to update m->dirty */
6641 if (entry->protection & VM_PROT_WRITE) {
6642 vm_object_lock(object);
6643 } else {
6644 vm_object_lock_shared(object);
6645 }
6646 m = vm_page_lookup(object, offset);
6647 assert(m != VM_PAGE_NULL);
6648 assert(VM_PAGE_WIRED(m));
6649 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6650 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6651 if (entry->protection & VM_PROT_WRITE) {
6652 vm_object_lock_assert_exclusive(
6653 object);
6654 m->vmp_dirty = TRUE;
6655 }
6656 } else {
6657 /* not already wired !? */
6658 *physpage_p = 0;
6659 }
6660 vm_object_unlock(object);
6661 }
6662
6663 /* map was not unlocked: no need to relookup */
6664 entry = entry->vme_next;
6665 s = entry->vme_start;
6666 continue;
6667 }
6668
6669 /*
6670 * Unwired entry or wire request transmitted via submap
6671 */
6672
6673 /*
6674 * Wiring would copy the pages to the shadow object.
6675 * The shadow object would not be code-signed so
6676 * attempting to execute code from these copied pages
6677 * would trigger a code-signing violation.
6678 */
6679
6680 if ((entry->protection & VM_PROT_EXECUTE)
6681 #if !CONFIG_EMBEDDED
6682 &&
6683 map != kernel_map &&
6684 cs_process_enforcement(NULL)
6685 #endif /* !CONFIG_EMBEDDED */
6686 ) {
6687 #if MACH_ASSERT
6688 printf("pid %d[%s] wiring executable range from "
6689 "0x%llx to 0x%llx: rejected to preserve "
6690 "code-signing\n",
6691 proc_selfpid(),
6692 (current_task()->bsd_info
6693 ? proc_name_address(current_task()->bsd_info)
6694 : "?"),
6695 (uint64_t) entry->vme_start,
6696 (uint64_t) entry->vme_end);
6697 #endif /* MACH_ASSERT */
6698 DTRACE_VM2(cs_executable_wire,
6699 uint64_t, (uint64_t)entry->vme_start,
6700 uint64_t, (uint64_t)entry->vme_end);
6701 cs_executable_wire++;
6702 rc = KERN_PROTECTION_FAILURE;
6703 goto done;
6704 }
6705
6706 /*
6707 * Perform actions of vm_map_lookup that need the write
6708 * lock on the map: create a shadow object for a
6709 * copy-on-write region, or an object for a zero-fill
6710 * region.
6711 */
6712 size = entry->vme_end - entry->vme_start;
6713 /*
6714 * If wiring a copy-on-write page, we need to copy it now
6715 * even if we're only (currently) requesting read access.
6716 * This is aggressive, but once it's wired we can't move it.
6717 */
6718 if (entry->needs_copy) {
6719 if (wire_and_extract) {
6720 /*
6721 * We're supposed to share with the original
6722 * provider so should not be "needs_copy"
6723 */
6724 rc = KERN_INVALID_ARGUMENT;
6725 goto done;
6726 }
6727
6728 VME_OBJECT_SHADOW(entry, size);
6729 entry->needs_copy = FALSE;
6730 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6731 if (wire_and_extract) {
6732 /*
6733 * We're supposed to share with the original
6734 * provider so should already have an object.
6735 */
6736 rc = KERN_INVALID_ARGUMENT;
6737 goto done;
6738 }
6739 VME_OBJECT_SET(entry, vm_object_allocate(size));
6740 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6741 assert(entry->use_pmap);
6742 }
6743
6744 vm_map_clip_start(map, entry, s);
6745 vm_map_clip_end(map, entry, end);
6746
6747 /* re-compute "e" */
6748 e = entry->vme_end;
6749 if (e > end) {
6750 e = end;
6751 }
6752
6753 /*
6754 * Check for holes and protection mismatch.
6755 * Holes: Next entry should be contiguous unless this
6756 * is the end of the region.
6757 * Protection: Access requested must be allowed, unless
6758 * wiring is by protection class
6759 */
6760 if ((entry->vme_end < end) &&
6761 ((entry->vme_next == vm_map_to_entry(map)) ||
6762 (entry->vme_next->vme_start > entry->vme_end))) {
6763 /* found a hole */
6764 rc = KERN_INVALID_ADDRESS;
6765 goto done;
6766 }
6767 if ((entry->protection & access_type) != access_type) {
6768 /* found a protection problem */
6769 rc = KERN_PROTECTION_FAILURE;
6770 goto done;
6771 }
6772
6773 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6774
6775 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6776 goto done;
6777 }
6778
6779 entry->in_transition = TRUE;
6780
6781 /*
6782 * This entry might get split once we unlock the map.
6783 * In vm_fault_wire(), we need the current range as
6784 * defined by this entry. In order for this to work
6785 * along with a simultaneous clip operation, we make a
6786 * temporary copy of this entry and use that for the
6787 * wiring. Note that the underlying objects do not
6788 * change during a clip.
6789 */
6790 tmp_entry = *entry;
6791
6792 /*
6793 * The in_transition state guarentees that the entry
6794 * (or entries for this range, if split occured) will be
6795 * there when the map lock is acquired for the second time.
6796 */
6797 vm_map_unlock(map);
6798
6799 if (!user_wire && cur_thread != THREAD_NULL) {
6800 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6801 } else {
6802 interruptible_state = THREAD_UNINT;
6803 }
6804
6805 if (map_pmap) {
6806 rc = vm_fault_wire(map,
6807 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6808 physpage_p);
6809 } else {
6810 rc = vm_fault_wire(map,
6811 &tmp_entry, caller_prot, tag, map->pmap,
6812 tmp_entry.vme_start,
6813 physpage_p);
6814 }
6815
6816 if (!user_wire && cur_thread != THREAD_NULL) {
6817 thread_interrupt_level(interruptible_state);
6818 }
6819
6820 vm_map_lock(map);
6821
6822 if (last_timestamp + 1 != map->timestamp) {
6823 /*
6824 * Find the entry again. It could have been clipped
6825 * after we unlocked the map.
6826 */
6827 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6828 &first_entry)) {
6829 panic("vm_map_wire: re-lookup failed");
6830 }
6831
6832 entry = first_entry;
6833 }
6834
6835 last_timestamp = map->timestamp;
6836
6837 while ((entry != vm_map_to_entry(map)) &&
6838 (entry->vme_start < tmp_entry.vme_end)) {
6839 assert(entry->in_transition);
6840 entry->in_transition = FALSE;
6841 if (entry->needs_wakeup) {
6842 entry->needs_wakeup = FALSE;
6843 need_wakeup = TRUE;
6844 }
6845 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6846 subtract_wire_counts(map, entry, user_wire);
6847 }
6848 entry = entry->vme_next;
6849 }
6850
6851 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6852 goto done;
6853 }
6854
6855 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6856 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6857 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6858 /* found a "new" hole */
6859 s = tmp_entry.vme_end;
6860 rc = KERN_INVALID_ADDRESS;
6861 goto done;
6862 }
6863
6864 s = entry->vme_start;
6865 } /* end while loop through map entries */
6866
6867 done:
6868 if (rc == KERN_SUCCESS) {
6869 /* repair any damage we may have made to the VM map */
6870 vm_map_simplify_range(map, start, end);
6871 }
6872
6873 vm_map_unlock(map);
6874
6875 /*
6876 * wake up anybody waiting on entries we wired.
6877 */
6878 if (need_wakeup) {
6879 vm_map_entry_wakeup(map);
6880 }
6881
6882 if (rc != KERN_SUCCESS) {
6883 /* undo what has been wired so far */
6884 vm_map_unwire_nested(map, start, s, user_wire,
6885 map_pmap, pmap_addr);
6886 if (physpage_p) {
6887 *physpage_p = 0;
6888 }
6889 }
6890
6891 return rc;
6892 }
6893
6894 kern_return_t
6895 vm_map_wire_external(
6896 vm_map_t map,
6897 vm_map_offset_t start,
6898 vm_map_offset_t end,
6899 vm_prot_t caller_prot,
6900 boolean_t user_wire)
6901 {
6902 kern_return_t kret;
6903
6904 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6905 user_wire, (pmap_t)NULL, 0, NULL);
6906 return kret;
6907 }
6908
6909 kern_return_t
6910 vm_map_wire_kernel(
6911 vm_map_t map,
6912 vm_map_offset_t start,
6913 vm_map_offset_t end,
6914 vm_prot_t caller_prot,
6915 vm_tag_t tag,
6916 boolean_t user_wire)
6917 {
6918 kern_return_t kret;
6919
6920 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6921 user_wire, (pmap_t)NULL, 0, NULL);
6922 return kret;
6923 }
6924
6925 kern_return_t
6926 vm_map_wire_and_extract_external(
6927 vm_map_t map,
6928 vm_map_offset_t start,
6929 vm_prot_t caller_prot,
6930 boolean_t user_wire,
6931 ppnum_t *physpage_p)
6932 {
6933 kern_return_t kret;
6934
6935 kret = vm_map_wire_nested(map,
6936 start,
6937 start + VM_MAP_PAGE_SIZE(map),
6938 caller_prot,
6939 vm_tag_bt(),
6940 user_wire,
6941 (pmap_t)NULL,
6942 0,
6943 physpage_p);
6944 if (kret != KERN_SUCCESS &&
6945 physpage_p != NULL) {
6946 *physpage_p = 0;
6947 }
6948 return kret;
6949 }
6950
6951 kern_return_t
6952 vm_map_wire_and_extract_kernel(
6953 vm_map_t map,
6954 vm_map_offset_t start,
6955 vm_prot_t caller_prot,
6956 vm_tag_t tag,
6957 boolean_t user_wire,
6958 ppnum_t *physpage_p)
6959 {
6960 kern_return_t kret;
6961
6962 kret = vm_map_wire_nested(map,
6963 start,
6964 start + VM_MAP_PAGE_SIZE(map),
6965 caller_prot,
6966 tag,
6967 user_wire,
6968 (pmap_t)NULL,
6969 0,
6970 physpage_p);
6971 if (kret != KERN_SUCCESS &&
6972 physpage_p != NULL) {
6973 *physpage_p = 0;
6974 }
6975 return kret;
6976 }
6977
6978 /*
6979 * vm_map_unwire:
6980 *
6981 * Sets the pageability of the specified address range in the target
6982 * as pageable. Regions specified must have been wired previously.
6983 *
6984 * The map must not be locked, but a reference must remain to the map
6985 * throughout the call.
6986 *
6987 * Kernel will panic on failures. User unwire ignores holes and
6988 * unwired and intransition entries to avoid losing memory by leaving
6989 * it unwired.
6990 */
6991 static kern_return_t
6992 vm_map_unwire_nested(
6993 vm_map_t map,
6994 vm_map_offset_t start,
6995 vm_map_offset_t end,
6996 boolean_t user_wire,
6997 pmap_t map_pmap,
6998 vm_map_offset_t pmap_addr)
6999 {
7000 vm_map_entry_t entry;
7001 struct vm_map_entry *first_entry, tmp_entry;
7002 boolean_t need_wakeup;
7003 boolean_t main_map = FALSE;
7004 unsigned int last_timestamp;
7005
7006 vm_map_lock(map);
7007 if (map_pmap == NULL) {
7008 main_map = TRUE;
7009 }
7010 last_timestamp = map->timestamp;
7011
7012 VM_MAP_RANGE_CHECK(map, start, end);
7013 assert(page_aligned(start));
7014 assert(page_aligned(end));
7015 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7016 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7017
7018 if (start == end) {
7019 /* We unwired what the caller asked for: zero pages */
7020 vm_map_unlock(map);
7021 return KERN_SUCCESS;
7022 }
7023
7024 if (vm_map_lookup_entry(map, start, &first_entry)) {
7025 entry = first_entry;
7026 /*
7027 * vm_map_clip_start will be done later.
7028 * We don't want to unnest any nested sub maps here !
7029 */
7030 } else {
7031 if (!user_wire) {
7032 panic("vm_map_unwire: start not found");
7033 }
7034 /* Start address is not in map. */
7035 vm_map_unlock(map);
7036 return KERN_INVALID_ADDRESS;
7037 }
7038
7039 if (entry->superpage_size) {
7040 /* superpages are always wired */
7041 vm_map_unlock(map);
7042 return KERN_INVALID_ADDRESS;
7043 }
7044
7045 need_wakeup = FALSE;
7046 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7047 if (entry->in_transition) {
7048 /*
7049 * 1)
7050 * Another thread is wiring down this entry. Note
7051 * that if it is not for the other thread we would
7052 * be unwiring an unwired entry. This is not
7053 * permitted. If we wait, we will be unwiring memory
7054 * we did not wire.
7055 *
7056 * 2)
7057 * Another thread is unwiring this entry. We did not
7058 * have a reference to it, because if we did, this
7059 * entry will not be getting unwired now.
7060 */
7061 if (!user_wire) {
7062 /*
7063 * XXX FBDP
7064 * This could happen: there could be some
7065 * overlapping vslock/vsunlock operations
7066 * going on.
7067 * We should probably just wait and retry,
7068 * but then we have to be careful that this
7069 * entry could get "simplified" after
7070 * "in_transition" gets unset and before
7071 * we re-lookup the entry, so we would
7072 * have to re-clip the entry to avoid
7073 * re-unwiring what we have already unwired...
7074 * See vm_map_wire_nested().
7075 *
7076 * Or we could just ignore "in_transition"
7077 * here and proceed to decement the wired
7078 * count(s) on this entry. That should be fine
7079 * as long as "wired_count" doesn't drop all
7080 * the way to 0 (and we should panic if THAT
7081 * happens).
7082 */
7083 panic("vm_map_unwire: in_transition entry");
7084 }
7085
7086 entry = entry->vme_next;
7087 continue;
7088 }
7089
7090 if (entry->is_sub_map) {
7091 vm_map_offset_t sub_start;
7092 vm_map_offset_t sub_end;
7093 vm_map_offset_t local_end;
7094 pmap_t pmap;
7095
7096 vm_map_clip_start(map, entry, start);
7097 vm_map_clip_end(map, entry, end);
7098
7099 sub_start = VME_OFFSET(entry);
7100 sub_end = entry->vme_end - entry->vme_start;
7101 sub_end += VME_OFFSET(entry);
7102 local_end = entry->vme_end;
7103 if (map_pmap == NULL) {
7104 if (entry->use_pmap) {
7105 pmap = VME_SUBMAP(entry)->pmap;
7106 pmap_addr = sub_start;
7107 } else {
7108 pmap = map->pmap;
7109 pmap_addr = start;
7110 }
7111 if (entry->wired_count == 0 ||
7112 (user_wire && entry->user_wired_count == 0)) {
7113 if (!user_wire) {
7114 panic("vm_map_unwire: entry is unwired");
7115 }
7116 entry = entry->vme_next;
7117 continue;
7118 }
7119
7120 /*
7121 * Check for holes
7122 * Holes: Next entry should be contiguous unless
7123 * this is the end of the region.
7124 */
7125 if (((entry->vme_end < end) &&
7126 ((entry->vme_next == vm_map_to_entry(map)) ||
7127 (entry->vme_next->vme_start
7128 > entry->vme_end)))) {
7129 if (!user_wire) {
7130 panic("vm_map_unwire: non-contiguous region");
7131 }
7132 /*
7133 * entry = entry->vme_next;
7134 * continue;
7135 */
7136 }
7137
7138 subtract_wire_counts(map, entry, user_wire);
7139
7140 if (entry->wired_count != 0) {
7141 entry = entry->vme_next;
7142 continue;
7143 }
7144
7145 entry->in_transition = TRUE;
7146 tmp_entry = *entry;/* see comment in vm_map_wire() */
7147
7148 /*
7149 * We can unlock the map now. The in_transition state
7150 * guarantees existance of the entry.
7151 */
7152 vm_map_unlock(map);
7153 vm_map_unwire_nested(VME_SUBMAP(entry),
7154 sub_start, sub_end, user_wire, pmap, pmap_addr);
7155 vm_map_lock(map);
7156
7157 if (last_timestamp + 1 != map->timestamp) {
7158 /*
7159 * Find the entry again. It could have been
7160 * clipped or deleted after we unlocked the map.
7161 */
7162 if (!vm_map_lookup_entry(map,
7163 tmp_entry.vme_start,
7164 &first_entry)) {
7165 if (!user_wire) {
7166 panic("vm_map_unwire: re-lookup failed");
7167 }
7168 entry = first_entry->vme_next;
7169 } else {
7170 entry = first_entry;
7171 }
7172 }
7173 last_timestamp = map->timestamp;
7174
7175 /*
7176 * clear transition bit for all constituent entries
7177 * that were in the original entry (saved in
7178 * tmp_entry). Also check for waiters.
7179 */
7180 while ((entry != vm_map_to_entry(map)) &&
7181 (entry->vme_start < tmp_entry.vme_end)) {
7182 assert(entry->in_transition);
7183 entry->in_transition = FALSE;
7184 if (entry->needs_wakeup) {
7185 entry->needs_wakeup = FALSE;
7186 need_wakeup = TRUE;
7187 }
7188 entry = entry->vme_next;
7189 }
7190 continue;
7191 } else {
7192 vm_map_unlock(map);
7193 vm_map_unwire_nested(VME_SUBMAP(entry),
7194 sub_start, sub_end, user_wire, map_pmap,
7195 pmap_addr);
7196 vm_map_lock(map);
7197
7198 if (last_timestamp + 1 != map->timestamp) {
7199 /*
7200 * Find the entry again. It could have been
7201 * clipped or deleted after we unlocked the map.
7202 */
7203 if (!vm_map_lookup_entry(map,
7204 tmp_entry.vme_start,
7205 &first_entry)) {
7206 if (!user_wire) {
7207 panic("vm_map_unwire: re-lookup failed");
7208 }
7209 entry = first_entry->vme_next;
7210 } else {
7211 entry = first_entry;
7212 }
7213 }
7214 last_timestamp = map->timestamp;
7215 }
7216 }
7217
7218
7219 if ((entry->wired_count == 0) ||
7220 (user_wire && entry->user_wired_count == 0)) {
7221 if (!user_wire) {
7222 panic("vm_map_unwire: entry is unwired");
7223 }
7224
7225 entry = entry->vme_next;
7226 continue;
7227 }
7228
7229 assert(entry->wired_count > 0 &&
7230 (!user_wire || entry->user_wired_count > 0));
7231
7232 vm_map_clip_start(map, entry, start);
7233 vm_map_clip_end(map, entry, end);
7234
7235 /*
7236 * Check for holes
7237 * Holes: Next entry should be contiguous unless
7238 * this is the end of the region.
7239 */
7240 if (((entry->vme_end < end) &&
7241 ((entry->vme_next == vm_map_to_entry(map)) ||
7242 (entry->vme_next->vme_start > entry->vme_end)))) {
7243 if (!user_wire) {
7244 panic("vm_map_unwire: non-contiguous region");
7245 }
7246 entry = entry->vme_next;
7247 continue;
7248 }
7249
7250 subtract_wire_counts(map, entry, user_wire);
7251
7252 if (entry->wired_count != 0) {
7253 entry = entry->vme_next;
7254 continue;
7255 }
7256
7257 if (entry->zero_wired_pages) {
7258 entry->zero_wired_pages = FALSE;
7259 }
7260
7261 entry->in_transition = TRUE;
7262 tmp_entry = *entry; /* see comment in vm_map_wire() */
7263
7264 /*
7265 * We can unlock the map now. The in_transition state
7266 * guarantees existance of the entry.
7267 */
7268 vm_map_unlock(map);
7269 if (map_pmap) {
7270 vm_fault_unwire(map,
7271 &tmp_entry, FALSE, map_pmap, pmap_addr);
7272 } else {
7273 vm_fault_unwire(map,
7274 &tmp_entry, FALSE, map->pmap,
7275 tmp_entry.vme_start);
7276 }
7277 vm_map_lock(map);
7278
7279 if (last_timestamp + 1 != map->timestamp) {
7280 /*
7281 * Find the entry again. It could have been clipped
7282 * or deleted after we unlocked the map.
7283 */
7284 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7285 &first_entry)) {
7286 if (!user_wire) {
7287 panic("vm_map_unwire: re-lookup failed");
7288 }
7289 entry = first_entry->vme_next;
7290 } else {
7291 entry = first_entry;
7292 }
7293 }
7294 last_timestamp = map->timestamp;
7295
7296 /*
7297 * clear transition bit for all constituent entries that
7298 * were in the original entry (saved in tmp_entry). Also
7299 * check for waiters.
7300 */
7301 while ((entry != vm_map_to_entry(map)) &&
7302 (entry->vme_start < tmp_entry.vme_end)) {
7303 assert(entry->in_transition);
7304 entry->in_transition = FALSE;
7305 if (entry->needs_wakeup) {
7306 entry->needs_wakeup = FALSE;
7307 need_wakeup = TRUE;
7308 }
7309 entry = entry->vme_next;
7310 }
7311 }
7312
7313 /*
7314 * We might have fragmented the address space when we wired this
7315 * range of addresses. Attempt to re-coalesce these VM map entries
7316 * with their neighbors now that they're no longer wired.
7317 * Under some circumstances, address space fragmentation can
7318 * prevent VM object shadow chain collapsing, which can cause
7319 * swap space leaks.
7320 */
7321 vm_map_simplify_range(map, start, end);
7322
7323 vm_map_unlock(map);
7324 /*
7325 * wake up anybody waiting on entries that we have unwired.
7326 */
7327 if (need_wakeup) {
7328 vm_map_entry_wakeup(map);
7329 }
7330 return KERN_SUCCESS;
7331 }
7332
7333 kern_return_t
7334 vm_map_unwire(
7335 vm_map_t map,
7336 vm_map_offset_t start,
7337 vm_map_offset_t end,
7338 boolean_t user_wire)
7339 {
7340 return vm_map_unwire_nested(map, start, end,
7341 user_wire, (pmap_t)NULL, 0);
7342 }
7343
7344
7345 /*
7346 * vm_map_entry_delete: [ internal use only ]
7347 *
7348 * Deallocate the given entry from the target map.
7349 */
7350 static void
7351 vm_map_entry_delete(
7352 vm_map_t map,
7353 vm_map_entry_t entry)
7354 {
7355 vm_map_offset_t s, e;
7356 vm_object_t object;
7357 vm_map_t submap;
7358
7359 s = entry->vme_start;
7360 e = entry->vme_end;
7361 assert(page_aligned(s));
7362 assert(page_aligned(e));
7363 if (entry->map_aligned == TRUE) {
7364 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7365 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7366 }
7367 assert(entry->wired_count == 0);
7368 assert(entry->user_wired_count == 0);
7369 assert(!entry->permanent);
7370
7371 if (entry->is_sub_map) {
7372 object = NULL;
7373 submap = VME_SUBMAP(entry);
7374 } else {
7375 submap = NULL;
7376 object = VME_OBJECT(entry);
7377 }
7378
7379 vm_map_store_entry_unlink(map, entry);
7380 map->size -= e - s;
7381
7382 vm_map_entry_dispose(map, entry);
7383
7384 vm_map_unlock(map);
7385 /*
7386 * Deallocate the object only after removing all
7387 * pmap entries pointing to its pages.
7388 */
7389 if (submap) {
7390 vm_map_deallocate(submap);
7391 } else {
7392 vm_object_deallocate(object);
7393 }
7394 }
7395
7396 void
7397 vm_map_submap_pmap_clean(
7398 vm_map_t map,
7399 vm_map_offset_t start,
7400 vm_map_offset_t end,
7401 vm_map_t sub_map,
7402 vm_map_offset_t offset)
7403 {
7404 vm_map_offset_t submap_start;
7405 vm_map_offset_t submap_end;
7406 vm_map_size_t remove_size;
7407 vm_map_entry_t entry;
7408
7409 submap_end = offset + (end - start);
7410 submap_start = offset;
7411
7412 vm_map_lock_read(sub_map);
7413 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7414 remove_size = (entry->vme_end - entry->vme_start);
7415 if (offset > entry->vme_start) {
7416 remove_size -= offset - entry->vme_start;
7417 }
7418
7419
7420 if (submap_end < entry->vme_end) {
7421 remove_size -=
7422 entry->vme_end - submap_end;
7423 }
7424 if (entry->is_sub_map) {
7425 vm_map_submap_pmap_clean(
7426 sub_map,
7427 start,
7428 start + remove_size,
7429 VME_SUBMAP(entry),
7430 VME_OFFSET(entry));
7431 } else {
7432 if (map->mapped_in_other_pmaps &&
7433 os_ref_get_count(&map->map_refcnt) != 0 &&
7434 VME_OBJECT(entry) != NULL) {
7435 vm_object_pmap_protect_options(
7436 VME_OBJECT(entry),
7437 (VME_OFFSET(entry) +
7438 offset -
7439 entry->vme_start),
7440 remove_size,
7441 PMAP_NULL,
7442 entry->vme_start,
7443 VM_PROT_NONE,
7444 PMAP_OPTIONS_REMOVE);
7445 } else {
7446 pmap_remove(map->pmap,
7447 (addr64_t)start,
7448 (addr64_t)(start + remove_size));
7449 }
7450 }
7451 }
7452
7453 entry = entry->vme_next;
7454
7455 while ((entry != vm_map_to_entry(sub_map))
7456 && (entry->vme_start < submap_end)) {
7457 remove_size = (entry->vme_end - entry->vme_start);
7458 if (submap_end < entry->vme_end) {
7459 remove_size -= entry->vme_end - submap_end;
7460 }
7461 if (entry->is_sub_map) {
7462 vm_map_submap_pmap_clean(
7463 sub_map,
7464 (start + entry->vme_start) - offset,
7465 ((start + entry->vme_start) - offset) + remove_size,
7466 VME_SUBMAP(entry),
7467 VME_OFFSET(entry));
7468 } else {
7469 if (map->mapped_in_other_pmaps &&
7470 os_ref_get_count(&map->map_refcnt) != 0 &&
7471 VME_OBJECT(entry) != NULL) {
7472 vm_object_pmap_protect_options(
7473 VME_OBJECT(entry),
7474 VME_OFFSET(entry),
7475 remove_size,
7476 PMAP_NULL,
7477 entry->vme_start,
7478 VM_PROT_NONE,
7479 PMAP_OPTIONS_REMOVE);
7480 } else {
7481 pmap_remove(map->pmap,
7482 (addr64_t)((start + entry->vme_start)
7483 - offset),
7484 (addr64_t)(((start + entry->vme_start)
7485 - offset) + remove_size));
7486 }
7487 }
7488 entry = entry->vme_next;
7489 }
7490 vm_map_unlock_read(sub_map);
7491 return;
7492 }
7493
7494 /*
7495 * virt_memory_guard_ast:
7496 *
7497 * Handle the AST callout for a virtual memory guard.
7498 * raise an EXC_GUARD exception and terminate the task
7499 * if configured to do so.
7500 */
7501 void
7502 virt_memory_guard_ast(
7503 thread_t thread,
7504 mach_exception_data_type_t code,
7505 mach_exception_data_type_t subcode)
7506 {
7507 task_t task = thread->task;
7508 assert(task != kernel_task);
7509 assert(task == current_task());
7510 uint32_t behavior;
7511
7512 behavior = task->task_exc_guard;
7513
7514 /* Is delivery enabled */
7515 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7516 return;
7517 }
7518
7519 /* If only once, make sure we're that once */
7520 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7521 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7522
7523 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7524 break;
7525 }
7526 behavior = task->task_exc_guard;
7527 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7528 return;
7529 }
7530 }
7531
7532 /* Raise exception via corpse fork or synchronously */
7533 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7534 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7535 task_violated_guard(code, subcode, NULL);
7536 } else {
7537 task_exception_notify(EXC_GUARD, code, subcode);
7538 }
7539
7540 /* Terminate the task if desired */
7541 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7542 task_bsdtask_kill(current_task());
7543 }
7544 }
7545
7546 /*
7547 * vm_map_guard_exception:
7548 *
7549 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7550 *
7551 * Right now, we do this when we find nothing mapped, or a
7552 * gap in the mapping when a user address space deallocate
7553 * was requested. We report the address of the first gap found.
7554 */
7555 static void
7556 vm_map_guard_exception(
7557 vm_map_offset_t gap_start,
7558 unsigned reason)
7559 {
7560 mach_exception_code_t code = 0;
7561 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7562 unsigned int target = 0; /* should we pass in pid associated with map? */
7563 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7564 boolean_t fatal = FALSE;
7565
7566 task_t task = current_task();
7567
7568 /* Can't deliver exceptions to kernel task */
7569 if (task == kernel_task) {
7570 return;
7571 }
7572
7573 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7574 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7575 EXC_GUARD_ENCODE_TARGET(code, target);
7576
7577 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7578 fatal = TRUE;
7579 }
7580 thread_guard_violation(current_thread(), code, subcode, fatal);
7581 }
7582
7583 /*
7584 * vm_map_delete: [ internal use only ]
7585 *
7586 * Deallocates the given address range from the target map.
7587 * Removes all user wirings. Unwires one kernel wiring if
7588 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7589 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7590 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7591 *
7592 * This routine is called with map locked and leaves map locked.
7593 */
7594 static kern_return_t
7595 vm_map_delete(
7596 vm_map_t map,
7597 vm_map_offset_t start,
7598 vm_map_offset_t end,
7599 int flags,
7600 vm_map_t zap_map)
7601 {
7602 vm_map_entry_t entry, next;
7603 struct vm_map_entry *first_entry, tmp_entry;
7604 vm_map_offset_t s;
7605 vm_object_t object;
7606 boolean_t need_wakeup;
7607 unsigned int last_timestamp = ~0; /* unlikely value */
7608 int interruptible;
7609 vm_map_offset_t gap_start;
7610 __unused vm_map_offset_t save_start = start;
7611 __unused vm_map_offset_t save_end = end;
7612 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7613 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7614
7615 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
7616 gap_start = FIND_GAP;
7617 } else {
7618 gap_start = GAPS_OK;
7619 }
7620
7621 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7622 THREAD_ABORTSAFE : THREAD_UNINT;
7623
7624 /*
7625 * All our DMA I/O operations in IOKit are currently done by
7626 * wiring through the map entries of the task requesting the I/O.
7627 * Because of this, we must always wait for kernel wirings
7628 * to go away on the entries before deleting them.
7629 *
7630 * Any caller who wants to actually remove a kernel wiring
7631 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7632 * properly remove one wiring instead of blasting through
7633 * them all.
7634 */
7635 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7636
7637 while (1) {
7638 /*
7639 * Find the start of the region, and clip it
7640 */
7641 if (vm_map_lookup_entry(map, start, &first_entry)) {
7642 entry = first_entry;
7643 if (map == kalloc_map &&
7644 (entry->vme_start != start ||
7645 entry->vme_end != end)) {
7646 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7647 "mismatched entry %p [0x%llx:0x%llx]\n",
7648 map,
7649 (uint64_t)start,
7650 (uint64_t)end,
7651 entry,
7652 (uint64_t)entry->vme_start,
7653 (uint64_t)entry->vme_end);
7654 }
7655
7656 /*
7657 * If in a superpage, extend the range to include the start of the mapping.
7658 */
7659 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7660 start = SUPERPAGE_ROUND_DOWN(start);
7661 continue;
7662 }
7663
7664 if (start == entry->vme_start) {
7665 /*
7666 * No need to clip. We don't want to cause
7667 * any unnecessary unnesting in this case...
7668 */
7669 } else {
7670 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7671 entry->map_aligned &&
7672 !VM_MAP_PAGE_ALIGNED(
7673 start,
7674 VM_MAP_PAGE_MASK(map))) {
7675 /*
7676 * The entry will no longer be
7677 * map-aligned after clipping
7678 * and the caller said it's OK.
7679 */
7680 entry->map_aligned = FALSE;
7681 }
7682 if (map == kalloc_map) {
7683 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7684 " clipping %p at 0x%llx\n",
7685 map,
7686 (uint64_t)start,
7687 (uint64_t)end,
7688 entry,
7689 (uint64_t)start);
7690 }
7691 vm_map_clip_start(map, entry, start);
7692 }
7693
7694 /*
7695 * Fix the lookup hint now, rather than each
7696 * time through the loop.
7697 */
7698 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7699 } else {
7700 if (map->pmap == kernel_pmap &&
7701 os_ref_get_count(&map->map_refcnt) != 0) {
7702 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7703 "no map entry at 0x%llx\n",
7704 map,
7705 (uint64_t)start,
7706 (uint64_t)end,
7707 (uint64_t)start);
7708 }
7709 entry = first_entry->vme_next;
7710 if (gap_start == FIND_GAP) {
7711 gap_start = start;
7712 }
7713 }
7714 break;
7715 }
7716 if (entry->superpage_size) {
7717 end = SUPERPAGE_ROUND_UP(end);
7718 }
7719
7720 need_wakeup = FALSE;
7721 /*
7722 * Step through all entries in this region
7723 */
7724 s = entry->vme_start;
7725 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7726 /*
7727 * At this point, we have deleted all the memory entries
7728 * between "start" and "s". We still need to delete
7729 * all memory entries between "s" and "end".
7730 * While we were blocked and the map was unlocked, some
7731 * new memory entries could have been re-allocated between
7732 * "start" and "s" and we don't want to mess with those.
7733 * Some of those entries could even have been re-assembled
7734 * with an entry after "s" (in vm_map_simplify_entry()), so
7735 * we may have to vm_map_clip_start() again.
7736 */
7737
7738 if (entry->vme_start >= s) {
7739 /*
7740 * This entry starts on or after "s"
7741 * so no need to clip its start.
7742 */
7743 } else {
7744 /*
7745 * This entry has been re-assembled by a
7746 * vm_map_simplify_entry(). We need to
7747 * re-clip its start.
7748 */
7749 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7750 entry->map_aligned &&
7751 !VM_MAP_PAGE_ALIGNED(s,
7752 VM_MAP_PAGE_MASK(map))) {
7753 /*
7754 * The entry will no longer be map-aligned
7755 * after clipping and the caller said it's OK.
7756 */
7757 entry->map_aligned = FALSE;
7758 }
7759 if (map == kalloc_map) {
7760 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7761 "clipping %p at 0x%llx\n",
7762 map,
7763 (uint64_t)start,
7764 (uint64_t)end,
7765 entry,
7766 (uint64_t)s);
7767 }
7768 vm_map_clip_start(map, entry, s);
7769 }
7770 if (entry->vme_end <= end) {
7771 /*
7772 * This entry is going away completely, so no need
7773 * to clip and possibly cause an unnecessary unnesting.
7774 */
7775 } else {
7776 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7777 entry->map_aligned &&
7778 !VM_MAP_PAGE_ALIGNED(end,
7779 VM_MAP_PAGE_MASK(map))) {
7780 /*
7781 * The entry will no longer be map-aligned
7782 * after clipping and the caller said it's OK.
7783 */
7784 entry->map_aligned = FALSE;
7785 }
7786 if (map == kalloc_map) {
7787 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7788 "clipping %p at 0x%llx\n",
7789 map,
7790 (uint64_t)start,
7791 (uint64_t)end,
7792 entry,
7793 (uint64_t)end);
7794 }
7795 vm_map_clip_end(map, entry, end);
7796 }
7797
7798 if (entry->permanent) {
7799 if (map->pmap == kernel_pmap) {
7800 panic("%s(%p,0x%llx,0x%llx): "
7801 "attempt to remove permanent "
7802 "VM map entry "
7803 "%p [0x%llx:0x%llx]\n",
7804 __FUNCTION__,
7805 map,
7806 (uint64_t) start,
7807 (uint64_t) end,
7808 entry,
7809 (uint64_t) entry->vme_start,
7810 (uint64_t) entry->vme_end);
7811 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7812 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7813 entry->permanent = FALSE;
7814 #if PMAP_CS
7815 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7816 entry->permanent = FALSE;
7817
7818 printf("%d[%s] %s(0x%llx,0x%llx): "
7819 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7820 "prot 0x%x/0x%x\n",
7821 proc_selfpid(),
7822 (current_task()->bsd_info
7823 ? proc_name_address(current_task()->bsd_info)
7824 : "?"),
7825 __FUNCTION__,
7826 (uint64_t) start,
7827 (uint64_t) end,
7828 (uint64_t)entry->vme_start,
7829 (uint64_t)entry->vme_end,
7830 entry->protection,
7831 entry->max_protection);
7832 #endif
7833 } else {
7834 if (vm_map_executable_immutable_verbose) {
7835 printf("%d[%s] %s(0x%llx,0x%llx): "
7836 "permanent entry [0x%llx:0x%llx] "
7837 "prot 0x%x/0x%x\n",
7838 proc_selfpid(),
7839 (current_task()->bsd_info
7840 ? proc_name_address(current_task()->bsd_info)
7841 : "?"),
7842 __FUNCTION__,
7843 (uint64_t) start,
7844 (uint64_t) end,
7845 (uint64_t)entry->vme_start,
7846 (uint64_t)entry->vme_end,
7847 entry->protection,
7848 entry->max_protection);
7849 }
7850 /*
7851 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7852 */
7853 DTRACE_VM5(vm_map_delete_permanent,
7854 vm_map_offset_t, entry->vme_start,
7855 vm_map_offset_t, entry->vme_end,
7856 vm_prot_t, entry->protection,
7857 vm_prot_t, entry->max_protection,
7858 int, VME_ALIAS(entry));
7859 }
7860 }
7861
7862
7863 if (entry->in_transition) {
7864 wait_result_t wait_result;
7865
7866 /*
7867 * Another thread is wiring/unwiring this entry.
7868 * Let the other thread know we are waiting.
7869 */
7870 assert(s == entry->vme_start);
7871 entry->needs_wakeup = TRUE;
7872
7873 /*
7874 * wake up anybody waiting on entries that we have
7875 * already unwired/deleted.
7876 */
7877 if (need_wakeup) {
7878 vm_map_entry_wakeup(map);
7879 need_wakeup = FALSE;
7880 }
7881
7882 wait_result = vm_map_entry_wait(map, interruptible);
7883
7884 if (interruptible &&
7885 wait_result == THREAD_INTERRUPTED) {
7886 /*
7887 * We do not clear the needs_wakeup flag,
7888 * since we cannot tell if we were the only one.
7889 */
7890 return KERN_ABORTED;
7891 }
7892
7893 /*
7894 * The entry could have been clipped or it
7895 * may not exist anymore. Look it up again.
7896 */
7897 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7898 /*
7899 * User: use the next entry
7900 */
7901 if (gap_start == FIND_GAP) {
7902 gap_start = s;
7903 }
7904 entry = first_entry->vme_next;
7905 s = entry->vme_start;
7906 } else {
7907 entry = first_entry;
7908 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7909 }
7910 last_timestamp = map->timestamp;
7911 continue;
7912 } /* end in_transition */
7913
7914 if (entry->wired_count) {
7915 boolean_t user_wire;
7916
7917 user_wire = entry->user_wired_count > 0;
7918
7919 /*
7920 * Remove a kernel wiring if requested
7921 */
7922 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7923 entry->wired_count--;
7924 }
7925
7926 /*
7927 * Remove all user wirings for proper accounting
7928 */
7929 if (entry->user_wired_count > 0) {
7930 while (entry->user_wired_count) {
7931 subtract_wire_counts(map, entry, user_wire);
7932 }
7933 }
7934
7935 if (entry->wired_count != 0) {
7936 assert(map != kernel_map);
7937 /*
7938 * Cannot continue. Typical case is when
7939 * a user thread has physical io pending on
7940 * on this page. Either wait for the
7941 * kernel wiring to go away or return an
7942 * error.
7943 */
7944 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7945 wait_result_t wait_result;
7946
7947 assert(s == entry->vme_start);
7948 entry->needs_wakeup = TRUE;
7949 wait_result = vm_map_entry_wait(map,
7950 interruptible);
7951
7952 if (interruptible &&
7953 wait_result == THREAD_INTERRUPTED) {
7954 /*
7955 * We do not clear the
7956 * needs_wakeup flag, since we
7957 * cannot tell if we were the
7958 * only one.
7959 */
7960 return KERN_ABORTED;
7961 }
7962
7963 /*
7964 * The entry could have been clipped or
7965 * it may not exist anymore. Look it
7966 * up again.
7967 */
7968 if (!vm_map_lookup_entry(map, s,
7969 &first_entry)) {
7970 assert(map != kernel_map);
7971 /*
7972 * User: use the next entry
7973 */
7974 if (gap_start == FIND_GAP) {
7975 gap_start = s;
7976 }
7977 entry = first_entry->vme_next;
7978 s = entry->vme_start;
7979 } else {
7980 entry = first_entry;
7981 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7982 }
7983 last_timestamp = map->timestamp;
7984 continue;
7985 } else {
7986 return KERN_FAILURE;
7987 }
7988 }
7989
7990 entry->in_transition = TRUE;
7991 /*
7992 * copy current entry. see comment in vm_map_wire()
7993 */
7994 tmp_entry = *entry;
7995 assert(s == entry->vme_start);
7996
7997 /*
7998 * We can unlock the map now. The in_transition
7999 * state guarentees existance of the entry.
8000 */
8001 vm_map_unlock(map);
8002
8003 if (tmp_entry.is_sub_map) {
8004 vm_map_t sub_map;
8005 vm_map_offset_t sub_start, sub_end;
8006 pmap_t pmap;
8007 vm_map_offset_t pmap_addr;
8008
8009
8010 sub_map = VME_SUBMAP(&tmp_entry);
8011 sub_start = VME_OFFSET(&tmp_entry);
8012 sub_end = sub_start + (tmp_entry.vme_end -
8013 tmp_entry.vme_start);
8014 if (tmp_entry.use_pmap) {
8015 pmap = sub_map->pmap;
8016 pmap_addr = tmp_entry.vme_start;
8017 } else {
8018 pmap = map->pmap;
8019 pmap_addr = tmp_entry.vme_start;
8020 }
8021 (void) vm_map_unwire_nested(sub_map,
8022 sub_start, sub_end,
8023 user_wire,
8024 pmap, pmap_addr);
8025 } else {
8026 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8027 pmap_protect_options(
8028 map->pmap,
8029 tmp_entry.vme_start,
8030 tmp_entry.vme_end,
8031 VM_PROT_NONE,
8032 PMAP_OPTIONS_REMOVE,
8033 NULL);
8034 }
8035 vm_fault_unwire(map, &tmp_entry,
8036 VME_OBJECT(&tmp_entry) == kernel_object,
8037 map->pmap, tmp_entry.vme_start);
8038 }
8039
8040 vm_map_lock(map);
8041
8042 if (last_timestamp + 1 != map->timestamp) {
8043 /*
8044 * Find the entry again. It could have
8045 * been clipped after we unlocked the map.
8046 */
8047 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8048 assert((map != kernel_map) &&
8049 (!entry->is_sub_map));
8050 if (gap_start == FIND_GAP) {
8051 gap_start = s;
8052 }
8053 first_entry = first_entry->vme_next;
8054 s = first_entry->vme_start;
8055 } else {
8056 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8057 }
8058 } else {
8059 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8060 first_entry = entry;
8061 }
8062
8063 last_timestamp = map->timestamp;
8064
8065 entry = first_entry;
8066 while ((entry != vm_map_to_entry(map)) &&
8067 (entry->vme_start < tmp_entry.vme_end)) {
8068 assert(entry->in_transition);
8069 entry->in_transition = FALSE;
8070 if (entry->needs_wakeup) {
8071 entry->needs_wakeup = FALSE;
8072 need_wakeup = TRUE;
8073 }
8074 entry = entry->vme_next;
8075 }
8076 /*
8077 * We have unwired the entry(s). Go back and
8078 * delete them.
8079 */
8080 entry = first_entry;
8081 continue;
8082 }
8083
8084 /* entry is unwired */
8085 assert(entry->wired_count == 0);
8086 assert(entry->user_wired_count == 0);
8087
8088 assert(s == entry->vme_start);
8089
8090 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8091 /*
8092 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8093 * vm_map_delete(), some map entries might have been
8094 * transferred to a "zap_map", which doesn't have a
8095 * pmap. The original pmap has already been flushed
8096 * in the vm_map_delete() call targeting the original
8097 * map, but when we get to destroying the "zap_map",
8098 * we don't have any pmap to flush, so let's just skip
8099 * all this.
8100 */
8101 } else if (entry->is_sub_map) {
8102 if (entry->use_pmap) {
8103 #ifndef NO_NESTED_PMAP
8104 int pmap_flags;
8105
8106 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8107 /*
8108 * This is the final cleanup of the
8109 * address space being terminated.
8110 * No new mappings are expected and
8111 * we don't really need to unnest the
8112 * shared region (and lose the "global"
8113 * pmap mappings, if applicable).
8114 *
8115 * Tell the pmap layer that we're
8116 * "clean" wrt nesting.
8117 */
8118 pmap_flags = PMAP_UNNEST_CLEAN;
8119 } else {
8120 /*
8121 * We're unmapping part of the nested
8122 * shared region, so we can't keep the
8123 * nested pmap.
8124 */
8125 pmap_flags = 0;
8126 }
8127 pmap_unnest_options(
8128 map->pmap,
8129 (addr64_t)entry->vme_start,
8130 entry->vme_end - entry->vme_start,
8131 pmap_flags);
8132 #endif /* NO_NESTED_PMAP */
8133 if (map->mapped_in_other_pmaps &&
8134 os_ref_get_count(&map->map_refcnt) != 0) {
8135 /* clean up parent map/maps */
8136 vm_map_submap_pmap_clean(
8137 map, entry->vme_start,
8138 entry->vme_end,
8139 VME_SUBMAP(entry),
8140 VME_OFFSET(entry));
8141 }
8142 } else {
8143 vm_map_submap_pmap_clean(
8144 map, entry->vme_start, entry->vme_end,
8145 VME_SUBMAP(entry),
8146 VME_OFFSET(entry));
8147 }
8148 } else if (VME_OBJECT(entry) != kernel_object &&
8149 VME_OBJECT(entry) != compressor_object) {
8150 object = VME_OBJECT(entry);
8151 if (map->mapped_in_other_pmaps &&
8152 os_ref_get_count(&map->map_refcnt) != 0) {
8153 vm_object_pmap_protect_options(
8154 object, VME_OFFSET(entry),
8155 entry->vme_end - entry->vme_start,
8156 PMAP_NULL,
8157 entry->vme_start,
8158 VM_PROT_NONE,
8159 PMAP_OPTIONS_REMOVE);
8160 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8161 (map->pmap == kernel_pmap)) {
8162 /* Remove translations associated
8163 * with this range unless the entry
8164 * does not have an object, or
8165 * it's the kernel map or a descendant
8166 * since the platform could potentially
8167 * create "backdoor" mappings invisible
8168 * to the VM. It is expected that
8169 * objectless, non-kernel ranges
8170 * do not have such VM invisible
8171 * translations.
8172 */
8173 pmap_remove_options(map->pmap,
8174 (addr64_t)entry->vme_start,
8175 (addr64_t)entry->vme_end,
8176 PMAP_OPTIONS_REMOVE);
8177 }
8178 }
8179
8180 if (entry->iokit_acct) {
8181 /* alternate accounting */
8182 DTRACE_VM4(vm_map_iokit_unmapped_region,
8183 vm_map_t, map,
8184 vm_map_offset_t, entry->vme_start,
8185 vm_map_offset_t, entry->vme_end,
8186 int, VME_ALIAS(entry));
8187 vm_map_iokit_unmapped_region(map,
8188 (entry->vme_end -
8189 entry->vme_start));
8190 entry->iokit_acct = FALSE;
8191 entry->use_pmap = FALSE;
8192 }
8193
8194 /*
8195 * All pmap mappings for this map entry must have been
8196 * cleared by now.
8197 */
8198 #if DEBUG
8199 assert(vm_map_pmap_is_empty(map,
8200 entry->vme_start,
8201 entry->vme_end));
8202 #endif /* DEBUG */
8203
8204 next = entry->vme_next;
8205
8206 if (map->pmap == kernel_pmap &&
8207 os_ref_get_count(&map->map_refcnt) != 0 &&
8208 entry->vme_end < end &&
8209 (next == vm_map_to_entry(map) ||
8210 next->vme_start != entry->vme_end)) {
8211 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8212 "hole after %p at 0x%llx\n",
8213 map,
8214 (uint64_t)start,
8215 (uint64_t)end,
8216 entry,
8217 (uint64_t)entry->vme_end);
8218 }
8219
8220 /*
8221 * If the desired range didn't end with "entry", then there is a gap if
8222 * we wrapped around to the start of the map or if "entry" and "next"
8223 * aren't contiguous.
8224 *
8225 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8226 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8227 */
8228 if (gap_start == FIND_GAP &&
8229 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8230 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8231 gap_start = entry->vme_end;
8232 }
8233 s = next->vme_start;
8234 last_timestamp = map->timestamp;
8235
8236 if (entry->permanent) {
8237 /*
8238 * A permanent entry can not be removed, so leave it
8239 * in place but remove all access permissions.
8240 */
8241 entry->protection = VM_PROT_NONE;
8242 entry->max_protection = VM_PROT_NONE;
8243 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8244 zap_map != VM_MAP_NULL) {
8245 vm_map_size_t entry_size;
8246 /*
8247 * The caller wants to save the affected VM map entries
8248 * into the "zap_map". The caller will take care of
8249 * these entries.
8250 */
8251 /* unlink the entry from "map" ... */
8252 vm_map_store_entry_unlink(map, entry);
8253 /* ... and add it to the end of the "zap_map" */
8254 vm_map_store_entry_link(zap_map,
8255 vm_map_last_entry(zap_map),
8256 entry,
8257 VM_MAP_KERNEL_FLAGS_NONE);
8258 entry_size = entry->vme_end - entry->vme_start;
8259 map->size -= entry_size;
8260 zap_map->size += entry_size;
8261 /* we didn't unlock the map, so no timestamp increase */
8262 last_timestamp--;
8263 } else {
8264 vm_map_entry_delete(map, entry);
8265 /* vm_map_entry_delete unlocks the map */
8266 vm_map_lock(map);
8267 }
8268
8269 entry = next;
8270
8271 if (entry == vm_map_to_entry(map)) {
8272 break;
8273 }
8274 if (last_timestamp + 1 != map->timestamp) {
8275 /*
8276 * We are responsible for deleting everything
8277 * from the given space. If someone has interfered,
8278 * we pick up where we left off. Back fills should
8279 * be all right for anyone, except map_delete, and
8280 * we have to assume that the task has been fully
8281 * disabled before we get here
8282 */
8283 if (!vm_map_lookup_entry(map, s, &entry)) {
8284 entry = entry->vme_next;
8285
8286 /*
8287 * Nothing found for s. If we weren't already done, then there is a gap.
8288 */
8289 if (gap_start == FIND_GAP && s < end) {
8290 gap_start = s;
8291 }
8292 s = entry->vme_start;
8293 } else {
8294 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8295 }
8296 /*
8297 * others can not only allocate behind us, we can
8298 * also see coalesce while we don't have the map lock
8299 */
8300 if (entry == vm_map_to_entry(map)) {
8301 break;
8302 }
8303 }
8304 last_timestamp = map->timestamp;
8305 }
8306
8307 if (map->wait_for_space) {
8308 thread_wakeup((event_t) map);
8309 }
8310 /*
8311 * wake up anybody waiting on entries that we have already deleted.
8312 */
8313 if (need_wakeup) {
8314 vm_map_entry_wakeup(map);
8315 }
8316
8317 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8318 DTRACE_VM3(kern_vm_deallocate_gap,
8319 vm_map_offset_t, gap_start,
8320 vm_map_offset_t, save_start,
8321 vm_map_offset_t, save_end);
8322 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8323 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8324 }
8325 }
8326
8327 return KERN_SUCCESS;
8328 }
8329
8330 /*
8331 * vm_map_remove:
8332 *
8333 * Remove the given address range from the target map.
8334 * This is the exported form of vm_map_delete.
8335 */
8336 kern_return_t
8337 vm_map_remove(
8338 vm_map_t map,
8339 vm_map_offset_t start,
8340 vm_map_offset_t end,
8341 boolean_t flags)
8342 {
8343 kern_return_t result;
8344
8345 vm_map_lock(map);
8346 VM_MAP_RANGE_CHECK(map, start, end);
8347 /*
8348 * For the zone_map, the kernel controls the allocation/freeing of memory.
8349 * Any free to the zone_map should be within the bounds of the map and
8350 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8351 * free to the zone_map into a no-op, there is a problem and we should
8352 * panic.
8353 */
8354 if ((map == zone_map) && (start == end)) {
8355 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8356 }
8357 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8358 vm_map_unlock(map);
8359
8360 return result;
8361 }
8362
8363 /*
8364 * vm_map_remove_locked:
8365 *
8366 * Remove the given address range from the target locked map.
8367 * This is the exported form of vm_map_delete.
8368 */
8369 kern_return_t
8370 vm_map_remove_locked(
8371 vm_map_t map,
8372 vm_map_offset_t start,
8373 vm_map_offset_t end,
8374 boolean_t flags)
8375 {
8376 kern_return_t result;
8377
8378 VM_MAP_RANGE_CHECK(map, start, end);
8379 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8380 return result;
8381 }
8382
8383
8384 /*
8385 * Routine: vm_map_copy_allocate
8386 *
8387 * Description:
8388 * Allocates and initializes a map copy object.
8389 */
8390 static vm_map_copy_t
8391 vm_map_copy_allocate(void)
8392 {
8393 vm_map_copy_t new_copy;
8394
8395 new_copy = zalloc(vm_map_copy_zone);
8396 bzero(new_copy, sizeof(*new_copy));
8397 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8398 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8399 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8400 return new_copy;
8401 }
8402
8403 /*
8404 * Routine: vm_map_copy_discard
8405 *
8406 * Description:
8407 * Dispose of a map copy object (returned by
8408 * vm_map_copyin).
8409 */
8410 void
8411 vm_map_copy_discard(
8412 vm_map_copy_t copy)
8413 {
8414 if (copy == VM_MAP_COPY_NULL) {
8415 return;
8416 }
8417
8418 switch (copy->type) {
8419 case VM_MAP_COPY_ENTRY_LIST:
8420 while (vm_map_copy_first_entry(copy) !=
8421 vm_map_copy_to_entry(copy)) {
8422 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8423
8424 vm_map_copy_entry_unlink(copy, entry);
8425 if (entry->is_sub_map) {
8426 vm_map_deallocate(VME_SUBMAP(entry));
8427 } else {
8428 vm_object_deallocate(VME_OBJECT(entry));
8429 }
8430 vm_map_copy_entry_dispose(copy, entry);
8431 }
8432 break;
8433 case VM_MAP_COPY_OBJECT:
8434 vm_object_deallocate(copy->cpy_object);
8435 break;
8436 case VM_MAP_COPY_KERNEL_BUFFER:
8437
8438 /*
8439 * The vm_map_copy_t and possibly the data buffer were
8440 * allocated by a single call to kalloc(), i.e. the
8441 * vm_map_copy_t was not allocated out of the zone.
8442 */
8443 if (copy->size > msg_ool_size_small || copy->offset) {
8444 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8445 (long long)copy->size, (long long)copy->offset);
8446 }
8447 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8448 return;
8449 }
8450 zfree(vm_map_copy_zone, copy);
8451 }
8452
8453 /*
8454 * Routine: vm_map_copy_copy
8455 *
8456 * Description:
8457 * Move the information in a map copy object to
8458 * a new map copy object, leaving the old one
8459 * empty.
8460 *
8461 * This is used by kernel routines that need
8462 * to look at out-of-line data (in copyin form)
8463 * before deciding whether to return SUCCESS.
8464 * If the routine returns FAILURE, the original
8465 * copy object will be deallocated; therefore,
8466 * these routines must make a copy of the copy
8467 * object and leave the original empty so that
8468 * deallocation will not fail.
8469 */
8470 vm_map_copy_t
8471 vm_map_copy_copy(
8472 vm_map_copy_t copy)
8473 {
8474 vm_map_copy_t new_copy;
8475
8476 if (copy == VM_MAP_COPY_NULL) {
8477 return VM_MAP_COPY_NULL;
8478 }
8479
8480 /*
8481 * Allocate a new copy object, and copy the information
8482 * from the old one into it.
8483 */
8484
8485 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8486 *new_copy = *copy;
8487
8488 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8489 /*
8490 * The links in the entry chain must be
8491 * changed to point to the new copy object.
8492 */
8493 vm_map_copy_first_entry(copy)->vme_prev
8494 = vm_map_copy_to_entry(new_copy);
8495 vm_map_copy_last_entry(copy)->vme_next
8496 = vm_map_copy_to_entry(new_copy);
8497 }
8498
8499 /*
8500 * Change the old copy object into one that contains
8501 * nothing to be deallocated.
8502 */
8503 copy->type = VM_MAP_COPY_OBJECT;
8504 copy->cpy_object = VM_OBJECT_NULL;
8505
8506 /*
8507 * Return the new object.
8508 */
8509 return new_copy;
8510 }
8511
8512 static kern_return_t
8513 vm_map_overwrite_submap_recurse(
8514 vm_map_t dst_map,
8515 vm_map_offset_t dst_addr,
8516 vm_map_size_t dst_size)
8517 {
8518 vm_map_offset_t dst_end;
8519 vm_map_entry_t tmp_entry;
8520 vm_map_entry_t entry;
8521 kern_return_t result;
8522 boolean_t encountered_sub_map = FALSE;
8523
8524
8525
8526 /*
8527 * Verify that the destination is all writeable
8528 * initially. We have to trunc the destination
8529 * address and round the copy size or we'll end up
8530 * splitting entries in strange ways.
8531 */
8532
8533 dst_end = vm_map_round_page(dst_addr + dst_size,
8534 VM_MAP_PAGE_MASK(dst_map));
8535 vm_map_lock(dst_map);
8536
8537 start_pass_1:
8538 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8539 vm_map_unlock(dst_map);
8540 return KERN_INVALID_ADDRESS;
8541 }
8542
8543 vm_map_clip_start(dst_map,
8544 tmp_entry,
8545 vm_map_trunc_page(dst_addr,
8546 VM_MAP_PAGE_MASK(dst_map)));
8547 if (tmp_entry->is_sub_map) {
8548 /* clipping did unnest if needed */
8549 assert(!tmp_entry->use_pmap);
8550 }
8551
8552 for (entry = tmp_entry;;) {
8553 vm_map_entry_t next;
8554
8555 next = entry->vme_next;
8556 while (entry->is_sub_map) {
8557 vm_map_offset_t sub_start;
8558 vm_map_offset_t sub_end;
8559 vm_map_offset_t local_end;
8560
8561 if (entry->in_transition) {
8562 /*
8563 * Say that we are waiting, and wait for entry.
8564 */
8565 entry->needs_wakeup = TRUE;
8566 vm_map_entry_wait(dst_map, THREAD_UNINT);
8567
8568 goto start_pass_1;
8569 }
8570
8571 encountered_sub_map = TRUE;
8572 sub_start = VME_OFFSET(entry);
8573
8574 if (entry->vme_end < dst_end) {
8575 sub_end = entry->vme_end;
8576 } else {
8577 sub_end = dst_end;
8578 }
8579 sub_end -= entry->vme_start;
8580 sub_end += VME_OFFSET(entry);
8581 local_end = entry->vme_end;
8582 vm_map_unlock(dst_map);
8583
8584 result = vm_map_overwrite_submap_recurse(
8585 VME_SUBMAP(entry),
8586 sub_start,
8587 sub_end - sub_start);
8588
8589 if (result != KERN_SUCCESS) {
8590 return result;
8591 }
8592 if (dst_end <= entry->vme_end) {
8593 return KERN_SUCCESS;
8594 }
8595 vm_map_lock(dst_map);
8596 if (!vm_map_lookup_entry(dst_map, local_end,
8597 &tmp_entry)) {
8598 vm_map_unlock(dst_map);
8599 return KERN_INVALID_ADDRESS;
8600 }
8601 entry = tmp_entry;
8602 next = entry->vme_next;
8603 }
8604
8605 if (!(entry->protection & VM_PROT_WRITE)) {
8606 vm_map_unlock(dst_map);
8607 return KERN_PROTECTION_FAILURE;
8608 }
8609
8610 /*
8611 * If the entry is in transition, we must wait
8612 * for it to exit that state. Anything could happen
8613 * when we unlock the map, so start over.
8614 */
8615 if (entry->in_transition) {
8616 /*
8617 * Say that we are waiting, and wait for entry.
8618 */
8619 entry->needs_wakeup = TRUE;
8620 vm_map_entry_wait(dst_map, THREAD_UNINT);
8621
8622 goto start_pass_1;
8623 }
8624
8625 /*
8626 * our range is contained completely within this map entry
8627 */
8628 if (dst_end <= entry->vme_end) {
8629 vm_map_unlock(dst_map);
8630 return KERN_SUCCESS;
8631 }
8632 /*
8633 * check that range specified is contiguous region
8634 */
8635 if ((next == vm_map_to_entry(dst_map)) ||
8636 (next->vme_start != entry->vme_end)) {
8637 vm_map_unlock(dst_map);
8638 return KERN_INVALID_ADDRESS;
8639 }
8640
8641 /*
8642 * Check for permanent objects in the destination.
8643 */
8644 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8645 ((!VME_OBJECT(entry)->internal) ||
8646 (VME_OBJECT(entry)->true_share))) {
8647 if (encountered_sub_map) {
8648 vm_map_unlock(dst_map);
8649 return KERN_FAILURE;
8650 }
8651 }
8652
8653
8654 entry = next;
8655 }/* for */
8656 vm_map_unlock(dst_map);
8657 return KERN_SUCCESS;
8658 }
8659
8660 /*
8661 * Routine: vm_map_copy_overwrite
8662 *
8663 * Description:
8664 * Copy the memory described by the map copy
8665 * object (copy; returned by vm_map_copyin) onto
8666 * the specified destination region (dst_map, dst_addr).
8667 * The destination must be writeable.
8668 *
8669 * Unlike vm_map_copyout, this routine actually
8670 * writes over previously-mapped memory. If the
8671 * previous mapping was to a permanent (user-supplied)
8672 * memory object, it is preserved.
8673 *
8674 * The attributes (protection and inheritance) of the
8675 * destination region are preserved.
8676 *
8677 * If successful, consumes the copy object.
8678 * Otherwise, the caller is responsible for it.
8679 *
8680 * Implementation notes:
8681 * To overwrite aligned temporary virtual memory, it is
8682 * sufficient to remove the previous mapping and insert
8683 * the new copy. This replacement is done either on
8684 * the whole region (if no permanent virtual memory
8685 * objects are embedded in the destination region) or
8686 * in individual map entries.
8687 *
8688 * To overwrite permanent virtual memory , it is necessary
8689 * to copy each page, as the external memory management
8690 * interface currently does not provide any optimizations.
8691 *
8692 * Unaligned memory also has to be copied. It is possible
8693 * to use 'vm_trickery' to copy the aligned data. This is
8694 * not done but not hard to implement.
8695 *
8696 * Once a page of permanent memory has been overwritten,
8697 * it is impossible to interrupt this function; otherwise,
8698 * the call would be neither atomic nor location-independent.
8699 * The kernel-state portion of a user thread must be
8700 * interruptible.
8701 *
8702 * It may be expensive to forward all requests that might
8703 * overwrite permanent memory (vm_write, vm_copy) to
8704 * uninterruptible kernel threads. This routine may be
8705 * called by interruptible threads; however, success is
8706 * not guaranteed -- if the request cannot be performed
8707 * atomically and interruptibly, an error indication is
8708 * returned.
8709 */
8710
8711 static kern_return_t
8712 vm_map_copy_overwrite_nested(
8713 vm_map_t dst_map,
8714 vm_map_address_t dst_addr,
8715 vm_map_copy_t copy,
8716 boolean_t interruptible,
8717 pmap_t pmap,
8718 boolean_t discard_on_success)
8719 {
8720 vm_map_offset_t dst_end;
8721 vm_map_entry_t tmp_entry;
8722 vm_map_entry_t entry;
8723 kern_return_t kr;
8724 boolean_t aligned = TRUE;
8725 boolean_t contains_permanent_objects = FALSE;
8726 boolean_t encountered_sub_map = FALSE;
8727 vm_map_offset_t base_addr;
8728 vm_map_size_t copy_size;
8729 vm_map_size_t total_size;
8730
8731
8732 /*
8733 * Check for null copy object.
8734 */
8735
8736 if (copy == VM_MAP_COPY_NULL) {
8737 return KERN_SUCCESS;
8738 }
8739
8740 /*
8741 * Check for special kernel buffer allocated
8742 * by new_ipc_kmsg_copyin.
8743 */
8744
8745 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8746 return vm_map_copyout_kernel_buffer(
8747 dst_map, &dst_addr,
8748 copy, copy->size, TRUE, discard_on_success);
8749 }
8750
8751 /*
8752 * Only works for entry lists at the moment. Will
8753 * support page lists later.
8754 */
8755
8756 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8757
8758 if (copy->size == 0) {
8759 if (discard_on_success) {
8760 vm_map_copy_discard(copy);
8761 }
8762 return KERN_SUCCESS;
8763 }
8764
8765 /*
8766 * Verify that the destination is all writeable
8767 * initially. We have to trunc the destination
8768 * address and round the copy size or we'll end up
8769 * splitting entries in strange ways.
8770 */
8771
8772 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8773 VM_MAP_PAGE_MASK(dst_map)) ||
8774 !VM_MAP_PAGE_ALIGNED(copy->offset,
8775 VM_MAP_PAGE_MASK(dst_map)) ||
8776 !VM_MAP_PAGE_ALIGNED(dst_addr,
8777 VM_MAP_PAGE_MASK(dst_map))) {
8778 aligned = FALSE;
8779 dst_end = vm_map_round_page(dst_addr + copy->size,
8780 VM_MAP_PAGE_MASK(dst_map));
8781 } else {
8782 dst_end = dst_addr + copy->size;
8783 }
8784
8785 vm_map_lock(dst_map);
8786
8787 /* LP64todo - remove this check when vm_map_commpage64()
8788 * no longer has to stuff in a map_entry for the commpage
8789 * above the map's max_offset.
8790 */
8791 if (dst_addr >= dst_map->max_offset) {
8792 vm_map_unlock(dst_map);
8793 return KERN_INVALID_ADDRESS;
8794 }
8795
8796 start_pass_1:
8797 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8798 vm_map_unlock(dst_map);
8799 return KERN_INVALID_ADDRESS;
8800 }
8801 vm_map_clip_start(dst_map,
8802 tmp_entry,
8803 vm_map_trunc_page(dst_addr,
8804 VM_MAP_PAGE_MASK(dst_map)));
8805 for (entry = tmp_entry;;) {
8806 vm_map_entry_t next = entry->vme_next;
8807
8808 while (entry->is_sub_map) {
8809 vm_map_offset_t sub_start;
8810 vm_map_offset_t sub_end;
8811 vm_map_offset_t local_end;
8812
8813 if (entry->in_transition) {
8814 /*
8815 * Say that we are waiting, and wait for entry.
8816 */
8817 entry->needs_wakeup = TRUE;
8818 vm_map_entry_wait(dst_map, THREAD_UNINT);
8819
8820 goto start_pass_1;
8821 }
8822
8823 local_end = entry->vme_end;
8824 if (!(entry->needs_copy)) {
8825 /* if needs_copy we are a COW submap */
8826 /* in such a case we just replace so */
8827 /* there is no need for the follow- */
8828 /* ing check. */
8829 encountered_sub_map = TRUE;
8830 sub_start = VME_OFFSET(entry);
8831
8832 if (entry->vme_end < dst_end) {
8833 sub_end = entry->vme_end;
8834 } else {
8835 sub_end = dst_end;
8836 }
8837 sub_end -= entry->vme_start;
8838 sub_end += VME_OFFSET(entry);
8839 vm_map_unlock(dst_map);
8840
8841 kr = vm_map_overwrite_submap_recurse(
8842 VME_SUBMAP(entry),
8843 sub_start,
8844 sub_end - sub_start);
8845 if (kr != KERN_SUCCESS) {
8846 return kr;
8847 }
8848 vm_map_lock(dst_map);
8849 }
8850
8851 if (dst_end <= entry->vme_end) {
8852 goto start_overwrite;
8853 }
8854 if (!vm_map_lookup_entry(dst_map, local_end,
8855 &entry)) {
8856 vm_map_unlock(dst_map);
8857 return KERN_INVALID_ADDRESS;
8858 }
8859 next = entry->vme_next;
8860 }
8861
8862 if (!(entry->protection & VM_PROT_WRITE)) {
8863 vm_map_unlock(dst_map);
8864 return KERN_PROTECTION_FAILURE;
8865 }
8866
8867 /*
8868 * If the entry is in transition, we must wait
8869 * for it to exit that state. Anything could happen
8870 * when we unlock the map, so start over.
8871 */
8872 if (entry->in_transition) {
8873 /*
8874 * Say that we are waiting, and wait for entry.
8875 */
8876 entry->needs_wakeup = TRUE;
8877 vm_map_entry_wait(dst_map, THREAD_UNINT);
8878
8879 goto start_pass_1;
8880 }
8881
8882 /*
8883 * our range is contained completely within this map entry
8884 */
8885 if (dst_end <= entry->vme_end) {
8886 break;
8887 }
8888 /*
8889 * check that range specified is contiguous region
8890 */
8891 if ((next == vm_map_to_entry(dst_map)) ||
8892 (next->vme_start != entry->vme_end)) {
8893 vm_map_unlock(dst_map);
8894 return KERN_INVALID_ADDRESS;
8895 }
8896
8897
8898 /*
8899 * Check for permanent objects in the destination.
8900 */
8901 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8902 ((!VME_OBJECT(entry)->internal) ||
8903 (VME_OBJECT(entry)->true_share))) {
8904 contains_permanent_objects = TRUE;
8905 }
8906
8907 entry = next;
8908 }/* for */
8909
8910 start_overwrite:
8911 /*
8912 * If there are permanent objects in the destination, then
8913 * the copy cannot be interrupted.
8914 */
8915
8916 if (interruptible && contains_permanent_objects) {
8917 vm_map_unlock(dst_map);
8918 return KERN_FAILURE; /* XXX */
8919 }
8920
8921 /*
8922 *
8923 * Make a second pass, overwriting the data
8924 * At the beginning of each loop iteration,
8925 * the next entry to be overwritten is "tmp_entry"
8926 * (initially, the value returned from the lookup above),
8927 * and the starting address expected in that entry
8928 * is "start".
8929 */
8930
8931 total_size = copy->size;
8932 if (encountered_sub_map) {
8933 copy_size = 0;
8934 /* re-calculate tmp_entry since we've had the map */
8935 /* unlocked */
8936 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8937 vm_map_unlock(dst_map);
8938 return KERN_INVALID_ADDRESS;
8939 }
8940 } else {
8941 copy_size = copy->size;
8942 }
8943
8944 base_addr = dst_addr;
8945 while (TRUE) {
8946 /* deconstruct the copy object and do in parts */
8947 /* only in sub_map, interruptable case */
8948 vm_map_entry_t copy_entry;
8949 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8950 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8951 int nentries;
8952 int remaining_entries = 0;
8953 vm_map_offset_t new_offset = 0;
8954
8955 for (entry = tmp_entry; copy_size == 0;) {
8956 vm_map_entry_t next;
8957
8958 next = entry->vme_next;
8959
8960 /* tmp_entry and base address are moved along */
8961 /* each time we encounter a sub-map. Otherwise */
8962 /* entry can outpase tmp_entry, and the copy_size */
8963 /* may reflect the distance between them */
8964 /* if the current entry is found to be in transition */
8965 /* we will start over at the beginning or the last */
8966 /* encounter of a submap as dictated by base_addr */
8967 /* we will zero copy_size accordingly. */
8968 if (entry->in_transition) {
8969 /*
8970 * Say that we are waiting, and wait for entry.
8971 */
8972 entry->needs_wakeup = TRUE;
8973 vm_map_entry_wait(dst_map, THREAD_UNINT);
8974
8975 if (!vm_map_lookup_entry(dst_map, base_addr,
8976 &tmp_entry)) {
8977 vm_map_unlock(dst_map);
8978 return KERN_INVALID_ADDRESS;
8979 }
8980 copy_size = 0;
8981 entry = tmp_entry;
8982 continue;
8983 }
8984 if (entry->is_sub_map) {
8985 vm_map_offset_t sub_start;
8986 vm_map_offset_t sub_end;
8987 vm_map_offset_t local_end;
8988
8989 if (entry->needs_copy) {
8990 /* if this is a COW submap */
8991 /* just back the range with a */
8992 /* anonymous entry */
8993 if (entry->vme_end < dst_end) {
8994 sub_end = entry->vme_end;
8995 } else {
8996 sub_end = dst_end;
8997 }
8998 if (entry->vme_start < base_addr) {
8999 sub_start = base_addr;
9000 } else {
9001 sub_start = entry->vme_start;
9002 }
9003 vm_map_clip_end(
9004 dst_map, entry, sub_end);
9005 vm_map_clip_start(
9006 dst_map, entry, sub_start);
9007 assert(!entry->use_pmap);
9008 assert(!entry->iokit_acct);
9009 entry->use_pmap = TRUE;
9010 entry->is_sub_map = FALSE;
9011 vm_map_deallocate(
9012 VME_SUBMAP(entry));
9013 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9014 VME_OFFSET_SET(entry, 0);
9015 entry->is_shared = FALSE;
9016 entry->needs_copy = FALSE;
9017 entry->protection = VM_PROT_DEFAULT;
9018 entry->max_protection = VM_PROT_ALL;
9019 entry->wired_count = 0;
9020 entry->user_wired_count = 0;
9021 if (entry->inheritance
9022 == VM_INHERIT_SHARE) {
9023 entry->inheritance = VM_INHERIT_COPY;
9024 }
9025 continue;
9026 }
9027 /* first take care of any non-sub_map */
9028 /* entries to send */
9029 if (base_addr < entry->vme_start) {
9030 /* stuff to send */
9031 copy_size =
9032 entry->vme_start - base_addr;
9033 break;
9034 }
9035 sub_start = VME_OFFSET(entry);
9036
9037 if (entry->vme_end < dst_end) {
9038 sub_end = entry->vme_end;
9039 } else {
9040 sub_end = dst_end;
9041 }
9042 sub_end -= entry->vme_start;
9043 sub_end += VME_OFFSET(entry);
9044 local_end = entry->vme_end;
9045 vm_map_unlock(dst_map);
9046 copy_size = sub_end - sub_start;
9047
9048 /* adjust the copy object */
9049 if (total_size > copy_size) {
9050 vm_map_size_t local_size = 0;
9051 vm_map_size_t entry_size;
9052
9053 nentries = 1;
9054 new_offset = copy->offset;
9055 copy_entry = vm_map_copy_first_entry(copy);
9056 while (copy_entry !=
9057 vm_map_copy_to_entry(copy)) {
9058 entry_size = copy_entry->vme_end -
9059 copy_entry->vme_start;
9060 if ((local_size < copy_size) &&
9061 ((local_size + entry_size)
9062 >= copy_size)) {
9063 vm_map_copy_clip_end(copy,
9064 copy_entry,
9065 copy_entry->vme_start +
9066 (copy_size - local_size));
9067 entry_size = copy_entry->vme_end -
9068 copy_entry->vme_start;
9069 local_size += entry_size;
9070 new_offset += entry_size;
9071 }
9072 if (local_size >= copy_size) {
9073 next_copy = copy_entry->vme_next;
9074 copy_entry->vme_next =
9075 vm_map_copy_to_entry(copy);
9076 previous_prev =
9077 copy->cpy_hdr.links.prev;
9078 copy->cpy_hdr.links.prev = copy_entry;
9079 copy->size = copy_size;
9080 remaining_entries =
9081 copy->cpy_hdr.nentries;
9082 remaining_entries -= nentries;
9083 copy->cpy_hdr.nentries = nentries;
9084 break;
9085 } else {
9086 local_size += entry_size;
9087 new_offset += entry_size;
9088 nentries++;
9089 }
9090 copy_entry = copy_entry->vme_next;
9091 }
9092 }
9093
9094 if ((entry->use_pmap) && (pmap == NULL)) {
9095 kr = vm_map_copy_overwrite_nested(
9096 VME_SUBMAP(entry),
9097 sub_start,
9098 copy,
9099 interruptible,
9100 VME_SUBMAP(entry)->pmap,
9101 TRUE);
9102 } else if (pmap != NULL) {
9103 kr = vm_map_copy_overwrite_nested(
9104 VME_SUBMAP(entry),
9105 sub_start,
9106 copy,
9107 interruptible, pmap,
9108 TRUE);
9109 } else {
9110 kr = vm_map_copy_overwrite_nested(
9111 VME_SUBMAP(entry),
9112 sub_start,
9113 copy,
9114 interruptible,
9115 dst_map->pmap,
9116 TRUE);
9117 }
9118 if (kr != KERN_SUCCESS) {
9119 if (next_copy != NULL) {
9120 copy->cpy_hdr.nentries +=
9121 remaining_entries;
9122 copy->cpy_hdr.links.prev->vme_next =
9123 next_copy;
9124 copy->cpy_hdr.links.prev
9125 = previous_prev;
9126 copy->size = total_size;
9127 }
9128 return kr;
9129 }
9130 if (dst_end <= local_end) {
9131 return KERN_SUCCESS;
9132 }
9133 /* otherwise copy no longer exists, it was */
9134 /* destroyed after successful copy_overwrite */
9135 copy = vm_map_copy_allocate();
9136 copy->type = VM_MAP_COPY_ENTRY_LIST;
9137 copy->offset = new_offset;
9138
9139 /*
9140 * XXX FBDP
9141 * this does not seem to deal with
9142 * the VM map store (R&B tree)
9143 */
9144
9145 total_size -= copy_size;
9146 copy_size = 0;
9147 /* put back remainder of copy in container */
9148 if (next_copy != NULL) {
9149 copy->cpy_hdr.nentries = remaining_entries;
9150 copy->cpy_hdr.links.next = next_copy;
9151 copy->cpy_hdr.links.prev = previous_prev;
9152 copy->size = total_size;
9153 next_copy->vme_prev =
9154 vm_map_copy_to_entry(copy);
9155 next_copy = NULL;
9156 }
9157 base_addr = local_end;
9158 vm_map_lock(dst_map);
9159 if (!vm_map_lookup_entry(dst_map,
9160 local_end, &tmp_entry)) {
9161 vm_map_unlock(dst_map);
9162 return KERN_INVALID_ADDRESS;
9163 }
9164 entry = tmp_entry;
9165 continue;
9166 }
9167 if (dst_end <= entry->vme_end) {
9168 copy_size = dst_end - base_addr;
9169 break;
9170 }
9171
9172 if ((next == vm_map_to_entry(dst_map)) ||
9173 (next->vme_start != entry->vme_end)) {
9174 vm_map_unlock(dst_map);
9175 return KERN_INVALID_ADDRESS;
9176 }
9177
9178 entry = next;
9179 }/* for */
9180
9181 next_copy = NULL;
9182 nentries = 1;
9183
9184 /* adjust the copy object */
9185 if (total_size > copy_size) {
9186 vm_map_size_t local_size = 0;
9187 vm_map_size_t entry_size;
9188
9189 new_offset = copy->offset;
9190 copy_entry = vm_map_copy_first_entry(copy);
9191 while (copy_entry != vm_map_copy_to_entry(copy)) {
9192 entry_size = copy_entry->vme_end -
9193 copy_entry->vme_start;
9194 if ((local_size < copy_size) &&
9195 ((local_size + entry_size)
9196 >= copy_size)) {
9197 vm_map_copy_clip_end(copy, copy_entry,
9198 copy_entry->vme_start +
9199 (copy_size - local_size));
9200 entry_size = copy_entry->vme_end -
9201 copy_entry->vme_start;
9202 local_size += entry_size;
9203 new_offset += entry_size;
9204 }
9205 if (local_size >= copy_size) {
9206 next_copy = copy_entry->vme_next;
9207 copy_entry->vme_next =
9208 vm_map_copy_to_entry(copy);
9209 previous_prev =
9210 copy->cpy_hdr.links.prev;
9211 copy->cpy_hdr.links.prev = copy_entry;
9212 copy->size = copy_size;
9213 remaining_entries =
9214 copy->cpy_hdr.nentries;
9215 remaining_entries -= nentries;
9216 copy->cpy_hdr.nentries = nentries;
9217 break;
9218 } else {
9219 local_size += entry_size;
9220 new_offset += entry_size;
9221 nentries++;
9222 }
9223 copy_entry = copy_entry->vme_next;
9224 }
9225 }
9226
9227 if (aligned) {
9228 pmap_t local_pmap;
9229
9230 if (pmap) {
9231 local_pmap = pmap;
9232 } else {
9233 local_pmap = dst_map->pmap;
9234 }
9235
9236 if ((kr = vm_map_copy_overwrite_aligned(
9237 dst_map, tmp_entry, copy,
9238 base_addr, local_pmap)) != KERN_SUCCESS) {
9239 if (next_copy != NULL) {
9240 copy->cpy_hdr.nentries +=
9241 remaining_entries;
9242 copy->cpy_hdr.links.prev->vme_next =
9243 next_copy;
9244 copy->cpy_hdr.links.prev =
9245 previous_prev;
9246 copy->size += copy_size;
9247 }
9248 return kr;
9249 }
9250 vm_map_unlock(dst_map);
9251 } else {
9252 /*
9253 * Performance gain:
9254 *
9255 * if the copy and dst address are misaligned but the same
9256 * offset within the page we can copy_not_aligned the
9257 * misaligned parts and copy aligned the rest. If they are
9258 * aligned but len is unaligned we simply need to copy
9259 * the end bit unaligned. We'll need to split the misaligned
9260 * bits of the region in this case !
9261 */
9262 /* ALWAYS UNLOCKS THE dst_map MAP */
9263 kr = vm_map_copy_overwrite_unaligned(
9264 dst_map,
9265 tmp_entry,
9266 copy,
9267 base_addr,
9268 discard_on_success);
9269 if (kr != KERN_SUCCESS) {
9270 if (next_copy != NULL) {
9271 copy->cpy_hdr.nentries +=
9272 remaining_entries;
9273 copy->cpy_hdr.links.prev->vme_next =
9274 next_copy;
9275 copy->cpy_hdr.links.prev =
9276 previous_prev;
9277 copy->size += copy_size;
9278 }
9279 return kr;
9280 }
9281 }
9282 total_size -= copy_size;
9283 if (total_size == 0) {
9284 break;
9285 }
9286 base_addr += copy_size;
9287 copy_size = 0;
9288 copy->offset = new_offset;
9289 if (next_copy != NULL) {
9290 copy->cpy_hdr.nentries = remaining_entries;
9291 copy->cpy_hdr.links.next = next_copy;
9292 copy->cpy_hdr.links.prev = previous_prev;
9293 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9294 copy->size = total_size;
9295 }
9296 vm_map_lock(dst_map);
9297 while (TRUE) {
9298 if (!vm_map_lookup_entry(dst_map,
9299 base_addr, &tmp_entry)) {
9300 vm_map_unlock(dst_map);
9301 return KERN_INVALID_ADDRESS;
9302 }
9303 if (tmp_entry->in_transition) {
9304 entry->needs_wakeup = TRUE;
9305 vm_map_entry_wait(dst_map, THREAD_UNINT);
9306 } else {
9307 break;
9308 }
9309 }
9310 vm_map_clip_start(dst_map,
9311 tmp_entry,
9312 vm_map_trunc_page(base_addr,
9313 VM_MAP_PAGE_MASK(dst_map)));
9314
9315 entry = tmp_entry;
9316 } /* while */
9317
9318 /*
9319 * Throw away the vm_map_copy object
9320 */
9321 if (discard_on_success) {
9322 vm_map_copy_discard(copy);
9323 }
9324
9325 return KERN_SUCCESS;
9326 }/* vm_map_copy_overwrite */
9327
9328 kern_return_t
9329 vm_map_copy_overwrite(
9330 vm_map_t dst_map,
9331 vm_map_offset_t dst_addr,
9332 vm_map_copy_t copy,
9333 boolean_t interruptible)
9334 {
9335 vm_map_size_t head_size, tail_size;
9336 vm_map_copy_t head_copy, tail_copy;
9337 vm_map_offset_t head_addr, tail_addr;
9338 vm_map_entry_t entry;
9339 kern_return_t kr;
9340 vm_map_offset_t effective_page_mask, effective_page_size;
9341
9342 head_size = 0;
9343 tail_size = 0;
9344 head_copy = NULL;
9345 tail_copy = NULL;
9346 head_addr = 0;
9347 tail_addr = 0;
9348
9349 if (interruptible ||
9350 copy == VM_MAP_COPY_NULL ||
9351 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9352 /*
9353 * We can't split the "copy" map if we're interruptible
9354 * or if we don't have a "copy" map...
9355 */
9356 blunt_copy:
9357 return vm_map_copy_overwrite_nested(dst_map,
9358 dst_addr,
9359 copy,
9360 interruptible,
9361 (pmap_t) NULL,
9362 TRUE);
9363 }
9364
9365 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9366 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9367 effective_page_mask);
9368 effective_page_size = effective_page_mask + 1;
9369
9370 if (copy->size < 3 * effective_page_size) {
9371 /*
9372 * Too small to bother with optimizing...
9373 */
9374 goto blunt_copy;
9375 }
9376
9377 if ((dst_addr & effective_page_mask) !=
9378 (copy->offset & effective_page_mask)) {
9379 /*
9380 * Incompatible mis-alignment of source and destination...
9381 */
9382 goto blunt_copy;
9383 }
9384
9385 /*
9386 * Proper alignment or identical mis-alignment at the beginning.
9387 * Let's try and do a small unaligned copy first (if needed)
9388 * and then an aligned copy for the rest.
9389 */
9390 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9391 head_addr = dst_addr;
9392 head_size = (effective_page_size -
9393 (copy->offset & effective_page_mask));
9394 head_size = MIN(head_size, copy->size);
9395 }
9396 if (!vm_map_page_aligned(copy->offset + copy->size,
9397 effective_page_mask)) {
9398 /*
9399 * Mis-alignment at the end.
9400 * Do an aligned copy up to the last page and
9401 * then an unaligned copy for the remaining bytes.
9402 */
9403 tail_size = ((copy->offset + copy->size) &
9404 effective_page_mask);
9405 tail_size = MIN(tail_size, copy->size);
9406 tail_addr = dst_addr + copy->size - tail_size;
9407 assert(tail_addr >= head_addr + head_size);
9408 }
9409 assert(head_size + tail_size <= copy->size);
9410
9411 if (head_size + tail_size == copy->size) {
9412 /*
9413 * It's all unaligned, no optimization possible...
9414 */
9415 goto blunt_copy;
9416 }
9417
9418 /*
9419 * Can't optimize if there are any submaps in the
9420 * destination due to the way we free the "copy" map
9421 * progressively in vm_map_copy_overwrite_nested()
9422 * in that case.
9423 */
9424 vm_map_lock_read(dst_map);
9425 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9426 vm_map_unlock_read(dst_map);
9427 goto blunt_copy;
9428 }
9429 for (;
9430 (entry != vm_map_copy_to_entry(copy) &&
9431 entry->vme_start < dst_addr + copy->size);
9432 entry = entry->vme_next) {
9433 if (entry->is_sub_map) {
9434 vm_map_unlock_read(dst_map);
9435 goto blunt_copy;
9436 }
9437 }
9438 vm_map_unlock_read(dst_map);
9439
9440 if (head_size) {
9441 /*
9442 * Unaligned copy of the first "head_size" bytes, to reach
9443 * a page boundary.
9444 */
9445
9446 /*
9447 * Extract "head_copy" out of "copy".
9448 */
9449 head_copy = vm_map_copy_allocate();
9450 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9451 head_copy->cpy_hdr.entries_pageable =
9452 copy->cpy_hdr.entries_pageable;
9453 vm_map_store_init(&head_copy->cpy_hdr);
9454
9455 entry = vm_map_copy_first_entry(copy);
9456 if (entry->vme_end < copy->offset + head_size) {
9457 head_size = entry->vme_end - copy->offset;
9458 }
9459
9460 head_copy->offset = copy->offset;
9461 head_copy->size = head_size;
9462 copy->offset += head_size;
9463 copy->size -= head_size;
9464
9465 vm_map_copy_clip_end(copy, entry, copy->offset);
9466 vm_map_copy_entry_unlink(copy, entry);
9467 vm_map_copy_entry_link(head_copy,
9468 vm_map_copy_to_entry(head_copy),
9469 entry);
9470
9471 /*
9472 * Do the unaligned copy.
9473 */
9474 kr = vm_map_copy_overwrite_nested(dst_map,
9475 head_addr,
9476 head_copy,
9477 interruptible,
9478 (pmap_t) NULL,
9479 FALSE);
9480 if (kr != KERN_SUCCESS) {
9481 goto done;
9482 }
9483 }
9484
9485 if (tail_size) {
9486 /*
9487 * Extract "tail_copy" out of "copy".
9488 */
9489 tail_copy = vm_map_copy_allocate();
9490 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9491 tail_copy->cpy_hdr.entries_pageable =
9492 copy->cpy_hdr.entries_pageable;
9493 vm_map_store_init(&tail_copy->cpy_hdr);
9494
9495 tail_copy->offset = copy->offset + copy->size - tail_size;
9496 tail_copy->size = tail_size;
9497
9498 copy->size -= tail_size;
9499
9500 entry = vm_map_copy_last_entry(copy);
9501 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9502 entry = vm_map_copy_last_entry(copy);
9503 vm_map_copy_entry_unlink(copy, entry);
9504 vm_map_copy_entry_link(tail_copy,
9505 vm_map_copy_last_entry(tail_copy),
9506 entry);
9507 }
9508
9509 /*
9510 * Copy most (or possibly all) of the data.
9511 */
9512 kr = vm_map_copy_overwrite_nested(dst_map,
9513 dst_addr + head_size,
9514 copy,
9515 interruptible,
9516 (pmap_t) NULL,
9517 FALSE);
9518 if (kr != KERN_SUCCESS) {
9519 goto done;
9520 }
9521
9522 if (tail_size) {
9523 kr = vm_map_copy_overwrite_nested(dst_map,
9524 tail_addr,
9525 tail_copy,
9526 interruptible,
9527 (pmap_t) NULL,
9528 FALSE);
9529 }
9530
9531 done:
9532 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9533 if (kr == KERN_SUCCESS) {
9534 /*
9535 * Discard all the copy maps.
9536 */
9537 if (head_copy) {
9538 vm_map_copy_discard(head_copy);
9539 head_copy = NULL;
9540 }
9541 vm_map_copy_discard(copy);
9542 if (tail_copy) {
9543 vm_map_copy_discard(tail_copy);
9544 tail_copy = NULL;
9545 }
9546 } else {
9547 /*
9548 * Re-assemble the original copy map.
9549 */
9550 if (head_copy) {
9551 entry = vm_map_copy_first_entry(head_copy);
9552 vm_map_copy_entry_unlink(head_copy, entry);
9553 vm_map_copy_entry_link(copy,
9554 vm_map_copy_to_entry(copy),
9555 entry);
9556 copy->offset -= head_size;
9557 copy->size += head_size;
9558 vm_map_copy_discard(head_copy);
9559 head_copy = NULL;
9560 }
9561 if (tail_copy) {
9562 entry = vm_map_copy_last_entry(tail_copy);
9563 vm_map_copy_entry_unlink(tail_copy, entry);
9564 vm_map_copy_entry_link(copy,
9565 vm_map_copy_last_entry(copy),
9566 entry);
9567 copy->size += tail_size;
9568 vm_map_copy_discard(tail_copy);
9569 tail_copy = NULL;
9570 }
9571 }
9572 return kr;
9573 }
9574
9575
9576 /*
9577 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9578 *
9579 * Decription:
9580 * Physically copy unaligned data
9581 *
9582 * Implementation:
9583 * Unaligned parts of pages have to be physically copied. We use
9584 * a modified form of vm_fault_copy (which understands none-aligned
9585 * page offsets and sizes) to do the copy. We attempt to copy as
9586 * much memory in one go as possibly, however vm_fault_copy copies
9587 * within 1 memory object so we have to find the smaller of "amount left"
9588 * "source object data size" and "target object data size". With
9589 * unaligned data we don't need to split regions, therefore the source
9590 * (copy) object should be one map entry, the target range may be split
9591 * over multiple map entries however. In any event we are pessimistic
9592 * about these assumptions.
9593 *
9594 * Assumptions:
9595 * dst_map is locked on entry and is return locked on success,
9596 * unlocked on error.
9597 */
9598
9599 static kern_return_t
9600 vm_map_copy_overwrite_unaligned(
9601 vm_map_t dst_map,
9602 vm_map_entry_t entry,
9603 vm_map_copy_t copy,
9604 vm_map_offset_t start,
9605 boolean_t discard_on_success)
9606 {
9607 vm_map_entry_t copy_entry;
9608 vm_map_entry_t copy_entry_next;
9609 vm_map_version_t version;
9610 vm_object_t dst_object;
9611 vm_object_offset_t dst_offset;
9612 vm_object_offset_t src_offset;
9613 vm_object_offset_t entry_offset;
9614 vm_map_offset_t entry_end;
9615 vm_map_size_t src_size,
9616 dst_size,
9617 copy_size,
9618 amount_left;
9619 kern_return_t kr = KERN_SUCCESS;
9620
9621
9622 copy_entry = vm_map_copy_first_entry(copy);
9623
9624 vm_map_lock_write_to_read(dst_map);
9625
9626 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9627 amount_left = copy->size;
9628 /*
9629 * unaligned so we never clipped this entry, we need the offset into
9630 * the vm_object not just the data.
9631 */
9632 while (amount_left > 0) {
9633 if (entry == vm_map_to_entry(dst_map)) {
9634 vm_map_unlock_read(dst_map);
9635 return KERN_INVALID_ADDRESS;
9636 }
9637
9638 /* "start" must be within the current map entry */
9639 assert((start >= entry->vme_start) && (start < entry->vme_end));
9640
9641 dst_offset = start - entry->vme_start;
9642
9643 dst_size = entry->vme_end - start;
9644
9645 src_size = copy_entry->vme_end -
9646 (copy_entry->vme_start + src_offset);
9647
9648 if (dst_size < src_size) {
9649 /*
9650 * we can only copy dst_size bytes before
9651 * we have to get the next destination entry
9652 */
9653 copy_size = dst_size;
9654 } else {
9655 /*
9656 * we can only copy src_size bytes before
9657 * we have to get the next source copy entry
9658 */
9659 copy_size = src_size;
9660 }
9661
9662 if (copy_size > amount_left) {
9663 copy_size = amount_left;
9664 }
9665 /*
9666 * Entry needs copy, create a shadow shadow object for
9667 * Copy on write region.
9668 */
9669 if (entry->needs_copy &&
9670 ((entry->protection & VM_PROT_WRITE) != 0)) {
9671 if (vm_map_lock_read_to_write(dst_map)) {
9672 vm_map_lock_read(dst_map);
9673 goto RetryLookup;
9674 }
9675 VME_OBJECT_SHADOW(entry,
9676 (vm_map_size_t)(entry->vme_end
9677 - entry->vme_start));
9678 entry->needs_copy = FALSE;
9679 vm_map_lock_write_to_read(dst_map);
9680 }
9681 dst_object = VME_OBJECT(entry);
9682 /*
9683 * unlike with the virtual (aligned) copy we're going
9684 * to fault on it therefore we need a target object.
9685 */
9686 if (dst_object == VM_OBJECT_NULL) {
9687 if (vm_map_lock_read_to_write(dst_map)) {
9688 vm_map_lock_read(dst_map);
9689 goto RetryLookup;
9690 }
9691 dst_object = vm_object_allocate((vm_map_size_t)
9692 entry->vme_end - entry->vme_start);
9693 VME_OBJECT_SET(entry, dst_object);
9694 VME_OFFSET_SET(entry, 0);
9695 assert(entry->use_pmap);
9696 vm_map_lock_write_to_read(dst_map);
9697 }
9698 /*
9699 * Take an object reference and unlock map. The "entry" may
9700 * disappear or change when the map is unlocked.
9701 */
9702 vm_object_reference(dst_object);
9703 version.main_timestamp = dst_map->timestamp;
9704 entry_offset = VME_OFFSET(entry);
9705 entry_end = entry->vme_end;
9706 vm_map_unlock_read(dst_map);
9707 /*
9708 * Copy as much as possible in one pass
9709 */
9710 kr = vm_fault_copy(
9711 VME_OBJECT(copy_entry),
9712 VME_OFFSET(copy_entry) + src_offset,
9713 &copy_size,
9714 dst_object,
9715 entry_offset + dst_offset,
9716 dst_map,
9717 &version,
9718 THREAD_UNINT );
9719
9720 start += copy_size;
9721 src_offset += copy_size;
9722 amount_left -= copy_size;
9723 /*
9724 * Release the object reference
9725 */
9726 vm_object_deallocate(dst_object);
9727 /*
9728 * If a hard error occurred, return it now
9729 */
9730 if (kr != KERN_SUCCESS) {
9731 return kr;
9732 }
9733
9734 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9735 || amount_left == 0) {
9736 /*
9737 * all done with this copy entry, dispose.
9738 */
9739 copy_entry_next = copy_entry->vme_next;
9740
9741 if (discard_on_success) {
9742 vm_map_copy_entry_unlink(copy, copy_entry);
9743 assert(!copy_entry->is_sub_map);
9744 vm_object_deallocate(VME_OBJECT(copy_entry));
9745 vm_map_copy_entry_dispose(copy, copy_entry);
9746 }
9747
9748 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9749 amount_left) {
9750 /*
9751 * not finished copying but run out of source
9752 */
9753 return KERN_INVALID_ADDRESS;
9754 }
9755
9756 copy_entry = copy_entry_next;
9757
9758 src_offset = 0;
9759 }
9760
9761 if (amount_left == 0) {
9762 return KERN_SUCCESS;
9763 }
9764
9765 vm_map_lock_read(dst_map);
9766 if (version.main_timestamp == dst_map->timestamp) {
9767 if (start == entry_end) {
9768 /*
9769 * destination region is split. Use the version
9770 * information to avoid a lookup in the normal
9771 * case.
9772 */
9773 entry = entry->vme_next;
9774 /*
9775 * should be contiguous. Fail if we encounter
9776 * a hole in the destination.
9777 */
9778 if (start != entry->vme_start) {
9779 vm_map_unlock_read(dst_map);
9780 return KERN_INVALID_ADDRESS;
9781 }
9782 }
9783 } else {
9784 /*
9785 * Map version check failed.
9786 * we must lookup the entry because somebody
9787 * might have changed the map behind our backs.
9788 */
9789 RetryLookup:
9790 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9791 vm_map_unlock_read(dst_map);
9792 return KERN_INVALID_ADDRESS;
9793 }
9794 }
9795 }/* while */
9796
9797 return KERN_SUCCESS;
9798 }/* vm_map_copy_overwrite_unaligned */
9799
9800 /*
9801 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9802 *
9803 * Description:
9804 * Does all the vm_trickery possible for whole pages.
9805 *
9806 * Implementation:
9807 *
9808 * If there are no permanent objects in the destination,
9809 * and the source and destination map entry zones match,
9810 * and the destination map entry is not shared,
9811 * then the map entries can be deleted and replaced
9812 * with those from the copy. The following code is the
9813 * basic idea of what to do, but there are lots of annoying
9814 * little details about getting protection and inheritance
9815 * right. Should add protection, inheritance, and sharing checks
9816 * to the above pass and make sure that no wiring is involved.
9817 */
9818
9819 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9820 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9821 int vm_map_copy_overwrite_aligned_src_large = 0;
9822
9823 static kern_return_t
9824 vm_map_copy_overwrite_aligned(
9825 vm_map_t dst_map,
9826 vm_map_entry_t tmp_entry,
9827 vm_map_copy_t copy,
9828 vm_map_offset_t start,
9829 __unused pmap_t pmap)
9830 {
9831 vm_object_t object;
9832 vm_map_entry_t copy_entry;
9833 vm_map_size_t copy_size;
9834 vm_map_size_t size;
9835 vm_map_entry_t entry;
9836
9837 while ((copy_entry = vm_map_copy_first_entry(copy))
9838 != vm_map_copy_to_entry(copy)) {
9839 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9840
9841 entry = tmp_entry;
9842 if (entry->is_sub_map) {
9843 /* unnested when clipped earlier */
9844 assert(!entry->use_pmap);
9845 }
9846 if (entry == vm_map_to_entry(dst_map)) {
9847 vm_map_unlock(dst_map);
9848 return KERN_INVALID_ADDRESS;
9849 }
9850 size = (entry->vme_end - entry->vme_start);
9851 /*
9852 * Make sure that no holes popped up in the
9853 * address map, and that the protection is
9854 * still valid, in case the map was unlocked
9855 * earlier.
9856 */
9857
9858 if ((entry->vme_start != start) || ((entry->is_sub_map)
9859 && !entry->needs_copy)) {
9860 vm_map_unlock(dst_map);
9861 return KERN_INVALID_ADDRESS;
9862 }
9863 assert(entry != vm_map_to_entry(dst_map));
9864
9865 /*
9866 * Check protection again
9867 */
9868
9869 if (!(entry->protection & VM_PROT_WRITE)) {
9870 vm_map_unlock(dst_map);
9871 return KERN_PROTECTION_FAILURE;
9872 }
9873
9874 /*
9875 * Adjust to source size first
9876 */
9877
9878 if (copy_size < size) {
9879 if (entry->map_aligned &&
9880 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9881 VM_MAP_PAGE_MASK(dst_map))) {
9882 /* no longer map-aligned */
9883 entry->map_aligned = FALSE;
9884 }
9885 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9886 size = copy_size;
9887 }
9888
9889 /*
9890 * Adjust to destination size
9891 */
9892
9893 if (size < copy_size) {
9894 vm_map_copy_clip_end(copy, copy_entry,
9895 copy_entry->vme_start + size);
9896 copy_size = size;
9897 }
9898
9899 assert((entry->vme_end - entry->vme_start) == size);
9900 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9901 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9902
9903 /*
9904 * If the destination contains temporary unshared memory,
9905 * we can perform the copy by throwing it away and
9906 * installing the source data.
9907 */
9908
9909 object = VME_OBJECT(entry);
9910 if ((!entry->is_shared &&
9911 ((object == VM_OBJECT_NULL) ||
9912 (object->internal && !object->true_share))) ||
9913 entry->needs_copy) {
9914 vm_object_t old_object = VME_OBJECT(entry);
9915 vm_object_offset_t old_offset = VME_OFFSET(entry);
9916 vm_object_offset_t offset;
9917
9918 /*
9919 * Ensure that the source and destination aren't
9920 * identical
9921 */
9922 if (old_object == VME_OBJECT(copy_entry) &&
9923 old_offset == VME_OFFSET(copy_entry)) {
9924 vm_map_copy_entry_unlink(copy, copy_entry);
9925 vm_map_copy_entry_dispose(copy, copy_entry);
9926
9927 if (old_object != VM_OBJECT_NULL) {
9928 vm_object_deallocate(old_object);
9929 }
9930
9931 start = tmp_entry->vme_end;
9932 tmp_entry = tmp_entry->vme_next;
9933 continue;
9934 }
9935
9936 #if !CONFIG_EMBEDDED
9937 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9938 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9939 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9940 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9941 copy_size <= __TRADEOFF1_COPY_SIZE) {
9942 /*
9943 * Virtual vs. Physical copy tradeoff #1.
9944 *
9945 * Copying only a few pages out of a large
9946 * object: do a physical copy instead of
9947 * a virtual copy, to avoid possibly keeping
9948 * the entire large object alive because of
9949 * those few copy-on-write pages.
9950 */
9951 vm_map_copy_overwrite_aligned_src_large++;
9952 goto slow_copy;
9953 }
9954 #endif /* !CONFIG_EMBEDDED */
9955
9956 if ((dst_map->pmap != kernel_pmap) &&
9957 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9958 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9959 vm_object_t new_object, new_shadow;
9960
9961 /*
9962 * We're about to map something over a mapping
9963 * established by malloc()...
9964 */
9965 new_object = VME_OBJECT(copy_entry);
9966 if (new_object != VM_OBJECT_NULL) {
9967 vm_object_lock_shared(new_object);
9968 }
9969 while (new_object != VM_OBJECT_NULL &&
9970 #if !CONFIG_EMBEDDED
9971 !new_object->true_share &&
9972 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9973 #endif /* !CONFIG_EMBEDDED */
9974 new_object->internal) {
9975 new_shadow = new_object->shadow;
9976 if (new_shadow == VM_OBJECT_NULL) {
9977 break;
9978 }
9979 vm_object_lock_shared(new_shadow);
9980 vm_object_unlock(new_object);
9981 new_object = new_shadow;
9982 }
9983 if (new_object != VM_OBJECT_NULL) {
9984 if (!new_object->internal) {
9985 /*
9986 * The new mapping is backed
9987 * by an external object. We
9988 * don't want malloc'ed memory
9989 * to be replaced with such a
9990 * non-anonymous mapping, so
9991 * let's go off the optimized
9992 * path...
9993 */
9994 vm_map_copy_overwrite_aligned_src_not_internal++;
9995 vm_object_unlock(new_object);
9996 goto slow_copy;
9997 }
9998 #if !CONFIG_EMBEDDED
9999 if (new_object->true_share ||
10000 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10001 /*
10002 * Same if there's a "true_share"
10003 * object in the shadow chain, or
10004 * an object with a non-default
10005 * (SYMMETRIC) copy strategy.
10006 */
10007 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10008 vm_object_unlock(new_object);
10009 goto slow_copy;
10010 }
10011 #endif /* !CONFIG_EMBEDDED */
10012 vm_object_unlock(new_object);
10013 }
10014 /*
10015 * The new mapping is still backed by
10016 * anonymous (internal) memory, so it's
10017 * OK to substitute it for the original
10018 * malloc() mapping.
10019 */
10020 }
10021
10022 if (old_object != VM_OBJECT_NULL) {
10023 if (entry->is_sub_map) {
10024 if (entry->use_pmap) {
10025 #ifndef NO_NESTED_PMAP
10026 pmap_unnest(dst_map->pmap,
10027 (addr64_t)entry->vme_start,
10028 entry->vme_end - entry->vme_start);
10029 #endif /* NO_NESTED_PMAP */
10030 if (dst_map->mapped_in_other_pmaps) {
10031 /* clean up parent */
10032 /* map/maps */
10033 vm_map_submap_pmap_clean(
10034 dst_map, entry->vme_start,
10035 entry->vme_end,
10036 VME_SUBMAP(entry),
10037 VME_OFFSET(entry));
10038 }
10039 } else {
10040 vm_map_submap_pmap_clean(
10041 dst_map, entry->vme_start,
10042 entry->vme_end,
10043 VME_SUBMAP(entry),
10044 VME_OFFSET(entry));
10045 }
10046 vm_map_deallocate(VME_SUBMAP(entry));
10047 } else {
10048 if (dst_map->mapped_in_other_pmaps) {
10049 vm_object_pmap_protect_options(
10050 VME_OBJECT(entry),
10051 VME_OFFSET(entry),
10052 entry->vme_end
10053 - entry->vme_start,
10054 PMAP_NULL,
10055 entry->vme_start,
10056 VM_PROT_NONE,
10057 PMAP_OPTIONS_REMOVE);
10058 } else {
10059 pmap_remove_options(
10060 dst_map->pmap,
10061 (addr64_t)(entry->vme_start),
10062 (addr64_t)(entry->vme_end),
10063 PMAP_OPTIONS_REMOVE);
10064 }
10065 vm_object_deallocate(old_object);
10066 }
10067 }
10068
10069 if (entry->iokit_acct) {
10070 /* keep using iokit accounting */
10071 entry->use_pmap = FALSE;
10072 } else {
10073 /* use pmap accounting */
10074 entry->use_pmap = TRUE;
10075 }
10076 entry->is_sub_map = FALSE;
10077 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10078 object = VME_OBJECT(entry);
10079 entry->needs_copy = copy_entry->needs_copy;
10080 entry->wired_count = 0;
10081 entry->user_wired_count = 0;
10082 offset = VME_OFFSET(copy_entry);
10083 VME_OFFSET_SET(entry, offset);
10084
10085 vm_map_copy_entry_unlink(copy, copy_entry);
10086 vm_map_copy_entry_dispose(copy, copy_entry);
10087
10088 /*
10089 * we could try to push pages into the pmap at this point, BUT
10090 * this optimization only saved on average 2 us per page if ALL
10091 * the pages in the source were currently mapped
10092 * and ALL the pages in the dest were touched, if there were fewer
10093 * than 2/3 of the pages touched, this optimization actually cost more cycles
10094 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10095 */
10096
10097 /*
10098 * Set up for the next iteration. The map
10099 * has not been unlocked, so the next
10100 * address should be at the end of this
10101 * entry, and the next map entry should be
10102 * the one following it.
10103 */
10104
10105 start = tmp_entry->vme_end;
10106 tmp_entry = tmp_entry->vme_next;
10107 } else {
10108 vm_map_version_t version;
10109 vm_object_t dst_object;
10110 vm_object_offset_t dst_offset;
10111 kern_return_t r;
10112
10113 slow_copy:
10114 if (entry->needs_copy) {
10115 VME_OBJECT_SHADOW(entry,
10116 (entry->vme_end -
10117 entry->vme_start));
10118 entry->needs_copy = FALSE;
10119 }
10120
10121 dst_object = VME_OBJECT(entry);
10122 dst_offset = VME_OFFSET(entry);
10123
10124 /*
10125 * Take an object reference, and record
10126 * the map version information so that the
10127 * map can be safely unlocked.
10128 */
10129
10130 if (dst_object == VM_OBJECT_NULL) {
10131 /*
10132 * We would usually have just taken the
10133 * optimized path above if the destination
10134 * object has not been allocated yet. But we
10135 * now disable that optimization if the copy
10136 * entry's object is not backed by anonymous
10137 * memory to avoid replacing malloc'ed
10138 * (i.e. re-usable) anonymous memory with a
10139 * not-so-anonymous mapping.
10140 * So we have to handle this case here and
10141 * allocate a new VM object for this map entry.
10142 */
10143 dst_object = vm_object_allocate(
10144 entry->vme_end - entry->vme_start);
10145 dst_offset = 0;
10146 VME_OBJECT_SET(entry, dst_object);
10147 VME_OFFSET_SET(entry, dst_offset);
10148 assert(entry->use_pmap);
10149 }
10150
10151 vm_object_reference(dst_object);
10152
10153 /* account for unlock bumping up timestamp */
10154 version.main_timestamp = dst_map->timestamp + 1;
10155
10156 vm_map_unlock(dst_map);
10157
10158 /*
10159 * Copy as much as possible in one pass
10160 */
10161
10162 copy_size = size;
10163 r = vm_fault_copy(
10164 VME_OBJECT(copy_entry),
10165 VME_OFFSET(copy_entry),
10166 &copy_size,
10167 dst_object,
10168 dst_offset,
10169 dst_map,
10170 &version,
10171 THREAD_UNINT );
10172
10173 /*
10174 * Release the object reference
10175 */
10176
10177 vm_object_deallocate(dst_object);
10178
10179 /*
10180 * If a hard error occurred, return it now
10181 */
10182
10183 if (r != KERN_SUCCESS) {
10184 return r;
10185 }
10186
10187 if (copy_size != 0) {
10188 /*
10189 * Dispose of the copied region
10190 */
10191
10192 vm_map_copy_clip_end(copy, copy_entry,
10193 copy_entry->vme_start + copy_size);
10194 vm_map_copy_entry_unlink(copy, copy_entry);
10195 vm_object_deallocate(VME_OBJECT(copy_entry));
10196 vm_map_copy_entry_dispose(copy, copy_entry);
10197 }
10198
10199 /*
10200 * Pick up in the destination map where we left off.
10201 *
10202 * Use the version information to avoid a lookup
10203 * in the normal case.
10204 */
10205
10206 start += copy_size;
10207 vm_map_lock(dst_map);
10208 if (version.main_timestamp == dst_map->timestamp &&
10209 copy_size != 0) {
10210 /* We can safely use saved tmp_entry value */
10211
10212 if (tmp_entry->map_aligned &&
10213 !VM_MAP_PAGE_ALIGNED(
10214 start,
10215 VM_MAP_PAGE_MASK(dst_map))) {
10216 /* no longer map-aligned */
10217 tmp_entry->map_aligned = FALSE;
10218 }
10219 vm_map_clip_end(dst_map, tmp_entry, start);
10220 tmp_entry = tmp_entry->vme_next;
10221 } else {
10222 /* Must do lookup of tmp_entry */
10223
10224 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10225 vm_map_unlock(dst_map);
10226 return KERN_INVALID_ADDRESS;
10227 }
10228 if (tmp_entry->map_aligned &&
10229 !VM_MAP_PAGE_ALIGNED(
10230 start,
10231 VM_MAP_PAGE_MASK(dst_map))) {
10232 /* no longer map-aligned */
10233 tmp_entry->map_aligned = FALSE;
10234 }
10235 vm_map_clip_start(dst_map, tmp_entry, start);
10236 }
10237 }
10238 }/* while */
10239
10240 return KERN_SUCCESS;
10241 }/* vm_map_copy_overwrite_aligned */
10242
10243 /*
10244 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10245 *
10246 * Description:
10247 * Copy in data to a kernel buffer from space in the
10248 * source map. The original space may be optionally
10249 * deallocated.
10250 *
10251 * If successful, returns a new copy object.
10252 */
10253 static kern_return_t
10254 vm_map_copyin_kernel_buffer(
10255 vm_map_t src_map,
10256 vm_map_offset_t src_addr,
10257 vm_map_size_t len,
10258 boolean_t src_destroy,
10259 vm_map_copy_t *copy_result)
10260 {
10261 kern_return_t kr;
10262 vm_map_copy_t copy;
10263 vm_size_t kalloc_size;
10264
10265 if (len > msg_ool_size_small) {
10266 return KERN_INVALID_ARGUMENT;
10267 }
10268
10269 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10270
10271 copy = (vm_map_copy_t)kalloc(kalloc_size);
10272 if (copy == VM_MAP_COPY_NULL) {
10273 return KERN_RESOURCE_SHORTAGE;
10274 }
10275 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10276 copy->size = len;
10277 copy->offset = 0;
10278
10279 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10280 if (kr != KERN_SUCCESS) {
10281 kfree(copy, kalloc_size);
10282 return kr;
10283 }
10284 if (src_destroy) {
10285 (void) vm_map_remove(
10286 src_map,
10287 vm_map_trunc_page(src_addr,
10288 VM_MAP_PAGE_MASK(src_map)),
10289 vm_map_round_page(src_addr + len,
10290 VM_MAP_PAGE_MASK(src_map)),
10291 (VM_MAP_REMOVE_INTERRUPTIBLE |
10292 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10293 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10294 }
10295 *copy_result = copy;
10296 return KERN_SUCCESS;
10297 }
10298
10299 /*
10300 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10301 *
10302 * Description:
10303 * Copy out data from a kernel buffer into space in the
10304 * destination map. The space may be otpionally dynamically
10305 * allocated.
10306 *
10307 * If successful, consumes the copy object.
10308 * Otherwise, the caller is responsible for it.
10309 */
10310 static int vm_map_copyout_kernel_buffer_failures = 0;
10311 static kern_return_t
10312 vm_map_copyout_kernel_buffer(
10313 vm_map_t map,
10314 vm_map_address_t *addr, /* IN/OUT */
10315 vm_map_copy_t copy,
10316 vm_map_size_t copy_size,
10317 boolean_t overwrite,
10318 boolean_t consume_on_success)
10319 {
10320 kern_return_t kr = KERN_SUCCESS;
10321 thread_t thread = current_thread();
10322
10323 assert(copy->size == copy_size);
10324
10325 /*
10326 * check for corrupted vm_map_copy structure
10327 */
10328 if (copy_size > msg_ool_size_small || copy->offset) {
10329 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10330 (long long)copy->size, (long long)copy->offset);
10331 }
10332
10333 if (!overwrite) {
10334 /*
10335 * Allocate space in the target map for the data
10336 */
10337 *addr = 0;
10338 kr = vm_map_enter(map,
10339 addr,
10340 vm_map_round_page(copy_size,
10341 VM_MAP_PAGE_MASK(map)),
10342 (vm_map_offset_t) 0,
10343 VM_FLAGS_ANYWHERE,
10344 VM_MAP_KERNEL_FLAGS_NONE,
10345 VM_KERN_MEMORY_NONE,
10346 VM_OBJECT_NULL,
10347 (vm_object_offset_t) 0,
10348 FALSE,
10349 VM_PROT_DEFAULT,
10350 VM_PROT_ALL,
10351 VM_INHERIT_DEFAULT);
10352 if (kr != KERN_SUCCESS) {
10353 return kr;
10354 }
10355 #if KASAN
10356 if (map->pmap == kernel_pmap) {
10357 kasan_notify_address(*addr, copy->size);
10358 }
10359 #endif
10360 }
10361
10362 /*
10363 * Copyout the data from the kernel buffer to the target map.
10364 */
10365 if (thread->map == map) {
10366 /*
10367 * If the target map is the current map, just do
10368 * the copy.
10369 */
10370 assert((vm_size_t)copy_size == copy_size);
10371 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10372 kr = KERN_INVALID_ADDRESS;
10373 }
10374 } else {
10375 vm_map_t oldmap;
10376
10377 /*
10378 * If the target map is another map, assume the
10379 * target's address space identity for the duration
10380 * of the copy.
10381 */
10382 vm_map_reference(map);
10383 oldmap = vm_map_switch(map);
10384
10385 assert((vm_size_t)copy_size == copy_size);
10386 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10387 vm_map_copyout_kernel_buffer_failures++;
10388 kr = KERN_INVALID_ADDRESS;
10389 }
10390
10391 (void) vm_map_switch(oldmap);
10392 vm_map_deallocate(map);
10393 }
10394
10395 if (kr != KERN_SUCCESS) {
10396 /* the copy failed, clean up */
10397 if (!overwrite) {
10398 /*
10399 * Deallocate the space we allocated in the target map.
10400 */
10401 (void) vm_map_remove(
10402 map,
10403 vm_map_trunc_page(*addr,
10404 VM_MAP_PAGE_MASK(map)),
10405 vm_map_round_page((*addr +
10406 vm_map_round_page(copy_size,
10407 VM_MAP_PAGE_MASK(map))),
10408 VM_MAP_PAGE_MASK(map)),
10409 VM_MAP_REMOVE_NO_FLAGS);
10410 *addr = 0;
10411 }
10412 } else {
10413 /* copy was successful, dicard the copy structure */
10414 if (consume_on_success) {
10415 kfree(copy, copy_size + cpy_kdata_hdr_sz);
10416 }
10417 }
10418
10419 return kr;
10420 }
10421
10422 /*
10423 * Routine: vm_map_copy_insert [internal use only]
10424 *
10425 * Description:
10426 * Link a copy chain ("copy") into a map at the
10427 * specified location (after "where").
10428 * Side effects:
10429 * The copy chain is destroyed.
10430 */
10431 static void
10432 vm_map_copy_insert(
10433 vm_map_t map,
10434 vm_map_entry_t after_where,
10435 vm_map_copy_t copy)
10436 {
10437 vm_map_entry_t entry;
10438
10439 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10440 entry = vm_map_copy_first_entry(copy);
10441 vm_map_copy_entry_unlink(copy, entry);
10442 vm_map_store_entry_link(map, after_where, entry,
10443 VM_MAP_KERNEL_FLAGS_NONE);
10444 after_where = entry;
10445 }
10446 zfree(vm_map_copy_zone, copy);
10447 }
10448
10449 void
10450 vm_map_copy_remap(
10451 vm_map_t map,
10452 vm_map_entry_t where,
10453 vm_map_copy_t copy,
10454 vm_map_offset_t adjustment,
10455 vm_prot_t cur_prot,
10456 vm_prot_t max_prot,
10457 vm_inherit_t inheritance)
10458 {
10459 vm_map_entry_t copy_entry, new_entry;
10460
10461 for (copy_entry = vm_map_copy_first_entry(copy);
10462 copy_entry != vm_map_copy_to_entry(copy);
10463 copy_entry = copy_entry->vme_next) {
10464 /* get a new VM map entry for the map */
10465 new_entry = vm_map_entry_create(map,
10466 !map->hdr.entries_pageable);
10467 /* copy the "copy entry" to the new entry */
10468 vm_map_entry_copy(new_entry, copy_entry);
10469 /* adjust "start" and "end" */
10470 new_entry->vme_start += adjustment;
10471 new_entry->vme_end += adjustment;
10472 /* clear some attributes */
10473 new_entry->inheritance = inheritance;
10474 new_entry->protection = cur_prot;
10475 new_entry->max_protection = max_prot;
10476 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10477 /* take an extra reference on the entry's "object" */
10478 if (new_entry->is_sub_map) {
10479 assert(!new_entry->use_pmap); /* not nested */
10480 vm_map_lock(VME_SUBMAP(new_entry));
10481 vm_map_reference(VME_SUBMAP(new_entry));
10482 vm_map_unlock(VME_SUBMAP(new_entry));
10483 } else {
10484 vm_object_reference(VME_OBJECT(new_entry));
10485 }
10486 /* insert the new entry in the map */
10487 vm_map_store_entry_link(map, where, new_entry,
10488 VM_MAP_KERNEL_FLAGS_NONE);
10489 /* continue inserting the "copy entries" after the new entry */
10490 where = new_entry;
10491 }
10492 }
10493
10494
10495 /*
10496 * Returns true if *size matches (or is in the range of) copy->size.
10497 * Upon returning true, the *size field is updated with the actual size of the
10498 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10499 */
10500 boolean_t
10501 vm_map_copy_validate_size(
10502 vm_map_t dst_map,
10503 vm_map_copy_t copy,
10504 vm_map_size_t *size)
10505 {
10506 if (copy == VM_MAP_COPY_NULL) {
10507 return FALSE;
10508 }
10509 vm_map_size_t copy_sz = copy->size;
10510 vm_map_size_t sz = *size;
10511 switch (copy->type) {
10512 case VM_MAP_COPY_OBJECT:
10513 case VM_MAP_COPY_KERNEL_BUFFER:
10514 if (sz == copy_sz) {
10515 return TRUE;
10516 }
10517 break;
10518 case VM_MAP_COPY_ENTRY_LIST:
10519 /*
10520 * potential page-size rounding prevents us from exactly
10521 * validating this flavor of vm_map_copy, but we can at least
10522 * assert that it's within a range.
10523 */
10524 if (copy_sz >= sz &&
10525 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10526 *size = copy_sz;
10527 return TRUE;
10528 }
10529 break;
10530 default:
10531 break;
10532 }
10533 return FALSE;
10534 }
10535
10536 /*
10537 * Routine: vm_map_copyout_size
10538 *
10539 * Description:
10540 * Copy out a copy chain ("copy") into newly-allocated
10541 * space in the destination map. Uses a prevalidated
10542 * size for the copy object (vm_map_copy_validate_size).
10543 *
10544 * If successful, consumes the copy object.
10545 * Otherwise, the caller is responsible for it.
10546 */
10547 kern_return_t
10548 vm_map_copyout_size(
10549 vm_map_t dst_map,
10550 vm_map_address_t *dst_addr, /* OUT */
10551 vm_map_copy_t copy,
10552 vm_map_size_t copy_size)
10553 {
10554 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10555 TRUE, /* consume_on_success */
10556 VM_PROT_DEFAULT,
10557 VM_PROT_ALL,
10558 VM_INHERIT_DEFAULT);
10559 }
10560
10561 /*
10562 * Routine: vm_map_copyout
10563 *
10564 * Description:
10565 * Copy out a copy chain ("copy") into newly-allocated
10566 * space in the destination map.
10567 *
10568 * If successful, consumes the copy object.
10569 * Otherwise, the caller is responsible for it.
10570 */
10571 kern_return_t
10572 vm_map_copyout(
10573 vm_map_t dst_map,
10574 vm_map_address_t *dst_addr, /* OUT */
10575 vm_map_copy_t copy)
10576 {
10577 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10578 TRUE, /* consume_on_success */
10579 VM_PROT_DEFAULT,
10580 VM_PROT_ALL,
10581 VM_INHERIT_DEFAULT);
10582 }
10583
10584 kern_return_t
10585 vm_map_copyout_internal(
10586 vm_map_t dst_map,
10587 vm_map_address_t *dst_addr, /* OUT */
10588 vm_map_copy_t copy,
10589 vm_map_size_t copy_size,
10590 boolean_t consume_on_success,
10591 vm_prot_t cur_protection,
10592 vm_prot_t max_protection,
10593 vm_inherit_t inheritance)
10594 {
10595 vm_map_size_t size;
10596 vm_map_size_t adjustment;
10597 vm_map_offset_t start;
10598 vm_object_offset_t vm_copy_start;
10599 vm_map_entry_t last;
10600 vm_map_entry_t entry;
10601 vm_map_entry_t hole_entry;
10602
10603 /*
10604 * Check for null copy object.
10605 */
10606
10607 if (copy == VM_MAP_COPY_NULL) {
10608 *dst_addr = 0;
10609 return KERN_SUCCESS;
10610 }
10611
10612 if (copy->size != copy_size) {
10613 *dst_addr = 0;
10614 return KERN_FAILURE;
10615 }
10616
10617 /*
10618 * Check for special copy object, created
10619 * by vm_map_copyin_object.
10620 */
10621
10622 if (copy->type == VM_MAP_COPY_OBJECT) {
10623 vm_object_t object = copy->cpy_object;
10624 kern_return_t kr;
10625 vm_object_offset_t offset;
10626
10627 offset = vm_object_trunc_page(copy->offset);
10628 size = vm_map_round_page((copy_size +
10629 (vm_map_size_t)(copy->offset -
10630 offset)),
10631 VM_MAP_PAGE_MASK(dst_map));
10632 *dst_addr = 0;
10633 kr = vm_map_enter(dst_map, dst_addr, size,
10634 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10635 VM_MAP_KERNEL_FLAGS_NONE,
10636 VM_KERN_MEMORY_NONE,
10637 object, offset, FALSE,
10638 VM_PROT_DEFAULT, VM_PROT_ALL,
10639 VM_INHERIT_DEFAULT);
10640 if (kr != KERN_SUCCESS) {
10641 return kr;
10642 }
10643 /* Account for non-pagealigned copy object */
10644 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10645 if (consume_on_success) {
10646 zfree(vm_map_copy_zone, copy);
10647 }
10648 return KERN_SUCCESS;
10649 }
10650
10651 /*
10652 * Check for special kernel buffer allocated
10653 * by new_ipc_kmsg_copyin.
10654 */
10655
10656 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10657 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10658 copy, copy_size, FALSE,
10659 consume_on_success);
10660 }
10661
10662
10663 /*
10664 * Find space for the data
10665 */
10666
10667 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10668 VM_MAP_COPY_PAGE_MASK(copy));
10669 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10670 VM_MAP_COPY_PAGE_MASK(copy))
10671 - vm_copy_start;
10672
10673
10674 StartAgain:;
10675
10676 vm_map_lock(dst_map);
10677 if (dst_map->disable_vmentry_reuse == TRUE) {
10678 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10679 last = entry;
10680 } else {
10681 if (dst_map->holelistenabled) {
10682 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10683
10684 if (hole_entry == NULL) {
10685 /*
10686 * No more space in the map?
10687 */
10688 vm_map_unlock(dst_map);
10689 return KERN_NO_SPACE;
10690 }
10691
10692 last = hole_entry;
10693 start = last->vme_start;
10694 } else {
10695 assert(first_free_is_valid(dst_map));
10696 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10697 vm_map_min(dst_map) : last->vme_end;
10698 }
10699 start = vm_map_round_page(start,
10700 VM_MAP_PAGE_MASK(dst_map));
10701 }
10702
10703 while (TRUE) {
10704 vm_map_entry_t next = last->vme_next;
10705 vm_map_offset_t end = start + size;
10706
10707 if ((end > dst_map->max_offset) || (end < start)) {
10708 if (dst_map->wait_for_space) {
10709 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10710 assert_wait((event_t) dst_map,
10711 THREAD_INTERRUPTIBLE);
10712 vm_map_unlock(dst_map);
10713 thread_block(THREAD_CONTINUE_NULL);
10714 goto StartAgain;
10715 }
10716 }
10717 vm_map_unlock(dst_map);
10718 return KERN_NO_SPACE;
10719 }
10720
10721 if (dst_map->holelistenabled) {
10722 if (last->vme_end >= end) {
10723 break;
10724 }
10725 } else {
10726 /*
10727 * If there are no more entries, we must win.
10728 *
10729 * OR
10730 *
10731 * If there is another entry, it must be
10732 * after the end of the potential new region.
10733 */
10734
10735 if (next == vm_map_to_entry(dst_map)) {
10736 break;
10737 }
10738
10739 if (next->vme_start >= end) {
10740 break;
10741 }
10742 }
10743
10744 last = next;
10745
10746 if (dst_map->holelistenabled) {
10747 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10748 /*
10749 * Wrapped around
10750 */
10751 vm_map_unlock(dst_map);
10752 return KERN_NO_SPACE;
10753 }
10754 start = last->vme_start;
10755 } else {
10756 start = last->vme_end;
10757 }
10758 start = vm_map_round_page(start,
10759 VM_MAP_PAGE_MASK(dst_map));
10760 }
10761
10762 if (dst_map->holelistenabled) {
10763 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10764 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10765 }
10766 }
10767
10768
10769 adjustment = start - vm_copy_start;
10770 if (!consume_on_success) {
10771 /*
10772 * We're not allowed to consume "copy", so we'll have to
10773 * copy its map entries into the destination map below.
10774 * No need to re-allocate map entries from the correct
10775 * (pageable or not) zone, since we'll get new map entries
10776 * during the transfer.
10777 * We'll also adjust the map entries's "start" and "end"
10778 * during the transfer, to keep "copy"'s entries consistent
10779 * with its "offset".
10780 */
10781 goto after_adjustments;
10782 }
10783
10784 /*
10785 * Since we're going to just drop the map
10786 * entries from the copy into the destination
10787 * map, they must come from the same pool.
10788 */
10789
10790 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10791 /*
10792 * Mismatches occur when dealing with the default
10793 * pager.
10794 */
10795 zone_t old_zone;
10796 vm_map_entry_t next, new;
10797
10798 /*
10799 * Find the zone that the copies were allocated from
10800 */
10801
10802 entry = vm_map_copy_first_entry(copy);
10803
10804 /*
10805 * Reinitialize the copy so that vm_map_copy_entry_link
10806 * will work.
10807 */
10808 vm_map_store_copy_reset(copy, entry);
10809 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10810
10811 /*
10812 * Copy each entry.
10813 */
10814 while (entry != vm_map_copy_to_entry(copy)) {
10815 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10816 vm_map_entry_copy_full(new, entry);
10817 new->vme_no_copy_on_read = FALSE;
10818 assert(!new->iokit_acct);
10819 if (new->is_sub_map) {
10820 /* clr address space specifics */
10821 new->use_pmap = FALSE;
10822 }
10823 vm_map_copy_entry_link(copy,
10824 vm_map_copy_last_entry(copy),
10825 new);
10826 next = entry->vme_next;
10827 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10828 zfree(old_zone, entry);
10829 entry = next;
10830 }
10831 }
10832
10833 /*
10834 * Adjust the addresses in the copy chain, and
10835 * reset the region attributes.
10836 */
10837
10838 for (entry = vm_map_copy_first_entry(copy);
10839 entry != vm_map_copy_to_entry(copy);
10840 entry = entry->vme_next) {
10841 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10842 /*
10843 * We're injecting this copy entry into a map that
10844 * has the standard page alignment, so clear
10845 * "map_aligned" (which might have been inherited
10846 * from the original map entry).
10847 */
10848 entry->map_aligned = FALSE;
10849 }
10850
10851 entry->vme_start += adjustment;
10852 entry->vme_end += adjustment;
10853
10854 if (entry->map_aligned) {
10855 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10856 VM_MAP_PAGE_MASK(dst_map)));
10857 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10858 VM_MAP_PAGE_MASK(dst_map)));
10859 }
10860
10861 entry->inheritance = VM_INHERIT_DEFAULT;
10862 entry->protection = VM_PROT_DEFAULT;
10863 entry->max_protection = VM_PROT_ALL;
10864 entry->behavior = VM_BEHAVIOR_DEFAULT;
10865
10866 /*
10867 * If the entry is now wired,
10868 * map the pages into the destination map.
10869 */
10870 if (entry->wired_count != 0) {
10871 vm_map_offset_t va;
10872 vm_object_offset_t offset;
10873 vm_object_t object;
10874 vm_prot_t prot;
10875 int type_of_fault;
10876
10877 object = VME_OBJECT(entry);
10878 offset = VME_OFFSET(entry);
10879 va = entry->vme_start;
10880
10881 pmap_pageable(dst_map->pmap,
10882 entry->vme_start,
10883 entry->vme_end,
10884 TRUE);
10885
10886 while (va < entry->vme_end) {
10887 vm_page_t m;
10888 struct vm_object_fault_info fault_info = {};
10889
10890 /*
10891 * Look up the page in the object.
10892 * Assert that the page will be found in the
10893 * top object:
10894 * either
10895 * the object was newly created by
10896 * vm_object_copy_slowly, and has
10897 * copies of all of the pages from
10898 * the source object
10899 * or
10900 * the object was moved from the old
10901 * map entry; because the old map
10902 * entry was wired, all of the pages
10903 * were in the top-level object.
10904 * (XXX not true if we wire pages for
10905 * reading)
10906 */
10907 vm_object_lock(object);
10908
10909 m = vm_page_lookup(object, offset);
10910 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10911 m->vmp_absent) {
10912 panic("vm_map_copyout: wiring %p", m);
10913 }
10914
10915 prot = entry->protection;
10916
10917 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10918 prot) {
10919 prot |= VM_PROT_EXECUTE;
10920 }
10921
10922 type_of_fault = DBG_CACHE_HIT_FAULT;
10923
10924 fault_info.user_tag = VME_ALIAS(entry);
10925 fault_info.pmap_options = 0;
10926 if (entry->iokit_acct ||
10927 (!entry->is_sub_map && !entry->use_pmap)) {
10928 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10929 }
10930
10931 vm_fault_enter(m,
10932 dst_map->pmap,
10933 va,
10934 prot,
10935 prot,
10936 VM_PAGE_WIRED(m),
10937 FALSE, /* change_wiring */
10938 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10939 &fault_info,
10940 NULL, /* need_retry */
10941 &type_of_fault);
10942
10943 vm_object_unlock(object);
10944
10945 offset += PAGE_SIZE_64;
10946 va += PAGE_SIZE;
10947 }
10948 }
10949 }
10950
10951 after_adjustments:
10952
10953 /*
10954 * Correct the page alignment for the result
10955 */
10956
10957 *dst_addr = start + (copy->offset - vm_copy_start);
10958
10959 #if KASAN
10960 kasan_notify_address(*dst_addr, size);
10961 #endif
10962
10963 /*
10964 * Update the hints and the map size
10965 */
10966
10967 if (consume_on_success) {
10968 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10969 } else {
10970 SAVE_HINT_MAP_WRITE(dst_map, last);
10971 }
10972
10973 dst_map->size += size;
10974
10975 /*
10976 * Link in the copy
10977 */
10978
10979 if (consume_on_success) {
10980 vm_map_copy_insert(dst_map, last, copy);
10981 } else {
10982 vm_map_copy_remap(dst_map, last, copy, adjustment,
10983 cur_protection, max_protection,
10984 inheritance);
10985 }
10986
10987 vm_map_unlock(dst_map);
10988
10989 /*
10990 * XXX If wiring_required, call vm_map_pageable
10991 */
10992
10993 return KERN_SUCCESS;
10994 }
10995
10996 /*
10997 * Routine: vm_map_copyin
10998 *
10999 * Description:
11000 * see vm_map_copyin_common. Exported via Unsupported.exports.
11001 *
11002 */
11003
11004 #undef vm_map_copyin
11005
11006 kern_return_t
11007 vm_map_copyin(
11008 vm_map_t src_map,
11009 vm_map_address_t src_addr,
11010 vm_map_size_t len,
11011 boolean_t src_destroy,
11012 vm_map_copy_t *copy_result) /* OUT */
11013 {
11014 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11015 FALSE, copy_result, FALSE);
11016 }
11017
11018 /*
11019 * Routine: vm_map_copyin_common
11020 *
11021 * Description:
11022 * Copy the specified region (src_addr, len) from the
11023 * source address space (src_map), possibly removing
11024 * the region from the source address space (src_destroy).
11025 *
11026 * Returns:
11027 * A vm_map_copy_t object (copy_result), suitable for
11028 * insertion into another address space (using vm_map_copyout),
11029 * copying over another address space region (using
11030 * vm_map_copy_overwrite). If the copy is unused, it
11031 * should be destroyed (using vm_map_copy_discard).
11032 *
11033 * In/out conditions:
11034 * The source map should not be locked on entry.
11035 */
11036
11037 typedef struct submap_map {
11038 vm_map_t parent_map;
11039 vm_map_offset_t base_start;
11040 vm_map_offset_t base_end;
11041 vm_map_size_t base_len;
11042 struct submap_map *next;
11043 } submap_map_t;
11044
11045 kern_return_t
11046 vm_map_copyin_common(
11047 vm_map_t src_map,
11048 vm_map_address_t src_addr,
11049 vm_map_size_t len,
11050 boolean_t src_destroy,
11051 __unused boolean_t src_volatile,
11052 vm_map_copy_t *copy_result, /* OUT */
11053 boolean_t use_maxprot)
11054 {
11055 int flags;
11056
11057 flags = 0;
11058 if (src_destroy) {
11059 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11060 }
11061 if (use_maxprot) {
11062 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11063 }
11064 return vm_map_copyin_internal(src_map,
11065 src_addr,
11066 len,
11067 flags,
11068 copy_result);
11069 }
11070 kern_return_t
11071 vm_map_copyin_internal(
11072 vm_map_t src_map,
11073 vm_map_address_t src_addr,
11074 vm_map_size_t len,
11075 int flags,
11076 vm_map_copy_t *copy_result) /* OUT */
11077 {
11078 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11079 * in multi-level lookup, this
11080 * entry contains the actual
11081 * vm_object/offset.
11082 */
11083 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11084
11085 vm_map_offset_t src_start; /* Start of current entry --
11086 * where copy is taking place now
11087 */
11088 vm_map_offset_t src_end; /* End of entire region to be
11089 * copied */
11090 vm_map_offset_t src_base;
11091 vm_map_t base_map = src_map;
11092 boolean_t map_share = FALSE;
11093 submap_map_t *parent_maps = NULL;
11094
11095 vm_map_copy_t copy; /* Resulting copy */
11096 vm_map_address_t copy_addr;
11097 vm_map_size_t copy_size;
11098 boolean_t src_destroy;
11099 boolean_t use_maxprot;
11100 boolean_t preserve_purgeable;
11101 boolean_t entry_was_shared;
11102 vm_map_entry_t saved_src_entry;
11103
11104 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11105 return KERN_INVALID_ARGUMENT;
11106 }
11107
11108 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11109 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11110 preserve_purgeable =
11111 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11112
11113 /*
11114 * Check for copies of zero bytes.
11115 */
11116
11117 if (len == 0) {
11118 *copy_result = VM_MAP_COPY_NULL;
11119 return KERN_SUCCESS;
11120 }
11121
11122 /*
11123 * Check that the end address doesn't overflow
11124 */
11125 src_end = src_addr + len;
11126 if (src_end < src_addr) {
11127 return KERN_INVALID_ADDRESS;
11128 }
11129
11130 /*
11131 * Compute (page aligned) start and end of region
11132 */
11133 src_start = vm_map_trunc_page(src_addr,
11134 VM_MAP_PAGE_MASK(src_map));
11135 src_end = vm_map_round_page(src_end,
11136 VM_MAP_PAGE_MASK(src_map));
11137
11138 /*
11139 * If the copy is sufficiently small, use a kernel buffer instead
11140 * of making a virtual copy. The theory being that the cost of
11141 * setting up VM (and taking C-O-W faults) dominates the copy costs
11142 * for small regions.
11143 */
11144 if ((len < msg_ool_size_small) &&
11145 !use_maxprot &&
11146 !preserve_purgeable &&
11147 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11148 /*
11149 * Since the "msg_ool_size_small" threshold was increased and
11150 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11151 * address space limits, we revert to doing a virtual copy if the
11152 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11153 * of the commpage would now fail when it used to work.
11154 */
11155 (src_start >= vm_map_min(src_map) &&
11156 src_start < vm_map_max(src_map) &&
11157 src_end >= vm_map_min(src_map) &&
11158 src_end < vm_map_max(src_map))) {
11159 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11160 src_destroy, copy_result);
11161 }
11162
11163 /*
11164 * Allocate a header element for the list.
11165 *
11166 * Use the start and end in the header to
11167 * remember the endpoints prior to rounding.
11168 */
11169
11170 copy = vm_map_copy_allocate();
11171 copy->type = VM_MAP_COPY_ENTRY_LIST;
11172 copy->cpy_hdr.entries_pageable = TRUE;
11173 #if 00
11174 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11175 #else
11176 /*
11177 * The copy entries can be broken down for a variety of reasons,
11178 * so we can't guarantee that they will remain map-aligned...
11179 * Will need to adjust the first copy_entry's "vme_start" and
11180 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11181 * rather than the original map's alignment.
11182 */
11183 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11184 #endif
11185
11186 vm_map_store_init( &(copy->cpy_hdr));
11187
11188 copy->offset = src_addr;
11189 copy->size = len;
11190
11191 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11192
11193 #define RETURN(x) \
11194 MACRO_BEGIN \
11195 vm_map_unlock(src_map); \
11196 if(src_map != base_map) \
11197 vm_map_deallocate(src_map); \
11198 if (new_entry != VM_MAP_ENTRY_NULL) \
11199 vm_map_copy_entry_dispose(copy,new_entry); \
11200 vm_map_copy_discard(copy); \
11201 { \
11202 submap_map_t *_ptr; \
11203 \
11204 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11205 parent_maps=parent_maps->next; \
11206 if (_ptr->parent_map != base_map) \
11207 vm_map_deallocate(_ptr->parent_map); \
11208 kfree(_ptr, sizeof(submap_map_t)); \
11209 } \
11210 } \
11211 MACRO_RETURN(x); \
11212 MACRO_END
11213
11214 /*
11215 * Find the beginning of the region.
11216 */
11217
11218 vm_map_lock(src_map);
11219
11220 /*
11221 * Lookup the original "src_addr" rather than the truncated
11222 * "src_start", in case "src_start" falls in a non-map-aligned
11223 * map entry *before* the map entry that contains "src_addr"...
11224 */
11225 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11226 RETURN(KERN_INVALID_ADDRESS);
11227 }
11228 if (!tmp_entry->is_sub_map) {
11229 /*
11230 * ... but clip to the map-rounded "src_start" rather than
11231 * "src_addr" to preserve map-alignment. We'll adjust the
11232 * first copy entry at the end, if needed.
11233 */
11234 vm_map_clip_start(src_map, tmp_entry, src_start);
11235 }
11236 if (src_start < tmp_entry->vme_start) {
11237 /*
11238 * Move "src_start" up to the start of the
11239 * first map entry to copy.
11240 */
11241 src_start = tmp_entry->vme_start;
11242 }
11243 /* set for later submap fix-up */
11244 copy_addr = src_start;
11245
11246 /*
11247 * Go through entries until we get to the end.
11248 */
11249
11250 while (TRUE) {
11251 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11252 vm_map_size_t src_size; /* Size of source
11253 * map entry (in both
11254 * maps)
11255 */
11256
11257 vm_object_t src_object; /* Object to copy */
11258 vm_object_offset_t src_offset;
11259
11260 boolean_t src_needs_copy; /* Should source map
11261 * be made read-only
11262 * for copy-on-write?
11263 */
11264
11265 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11266
11267 boolean_t was_wired; /* Was source wired? */
11268 vm_map_version_t version; /* Version before locks
11269 * dropped to make copy
11270 */
11271 kern_return_t result; /* Return value from
11272 * copy_strategically.
11273 */
11274 while (tmp_entry->is_sub_map) {
11275 vm_map_size_t submap_len;
11276 submap_map_t *ptr;
11277
11278 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11279 ptr->next = parent_maps;
11280 parent_maps = ptr;
11281 ptr->parent_map = src_map;
11282 ptr->base_start = src_start;
11283 ptr->base_end = src_end;
11284 submap_len = tmp_entry->vme_end - src_start;
11285 if (submap_len > (src_end - src_start)) {
11286 submap_len = src_end - src_start;
11287 }
11288 ptr->base_len = submap_len;
11289
11290 src_start -= tmp_entry->vme_start;
11291 src_start += VME_OFFSET(tmp_entry);
11292 src_end = src_start + submap_len;
11293 src_map = VME_SUBMAP(tmp_entry);
11294 vm_map_lock(src_map);
11295 /* keep an outstanding reference for all maps in */
11296 /* the parents tree except the base map */
11297 vm_map_reference(src_map);
11298 vm_map_unlock(ptr->parent_map);
11299 if (!vm_map_lookup_entry(
11300 src_map, src_start, &tmp_entry)) {
11301 RETURN(KERN_INVALID_ADDRESS);
11302 }
11303 map_share = TRUE;
11304 if (!tmp_entry->is_sub_map) {
11305 vm_map_clip_start(src_map, tmp_entry, src_start);
11306 }
11307 src_entry = tmp_entry;
11308 }
11309 /* we are now in the lowest level submap... */
11310
11311 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11312 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11313 /* This is not, supported for now.In future */
11314 /* we will need to detect the phys_contig */
11315 /* condition and then upgrade copy_slowly */
11316 /* to do physical copy from the device mem */
11317 /* based object. We can piggy-back off of */
11318 /* the was wired boolean to set-up the */
11319 /* proper handling */
11320 RETURN(KERN_PROTECTION_FAILURE);
11321 }
11322 /*
11323 * Create a new address map entry to hold the result.
11324 * Fill in the fields from the appropriate source entries.
11325 * We must unlock the source map to do this if we need
11326 * to allocate a map entry.
11327 */
11328 if (new_entry == VM_MAP_ENTRY_NULL) {
11329 version.main_timestamp = src_map->timestamp;
11330 vm_map_unlock(src_map);
11331
11332 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11333
11334 vm_map_lock(src_map);
11335 if ((version.main_timestamp + 1) != src_map->timestamp) {
11336 if (!vm_map_lookup_entry(src_map, src_start,
11337 &tmp_entry)) {
11338 RETURN(KERN_INVALID_ADDRESS);
11339 }
11340 if (!tmp_entry->is_sub_map) {
11341 vm_map_clip_start(src_map, tmp_entry, src_start);
11342 }
11343 continue; /* restart w/ new tmp_entry */
11344 }
11345 }
11346
11347 /*
11348 * Verify that the region can be read.
11349 */
11350 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11351 !use_maxprot) ||
11352 (src_entry->max_protection & VM_PROT_READ) == 0) {
11353 RETURN(KERN_PROTECTION_FAILURE);
11354 }
11355
11356 /*
11357 * Clip against the endpoints of the entire region.
11358 */
11359
11360 vm_map_clip_end(src_map, src_entry, src_end);
11361
11362 src_size = src_entry->vme_end - src_start;
11363 src_object = VME_OBJECT(src_entry);
11364 src_offset = VME_OFFSET(src_entry);
11365 was_wired = (src_entry->wired_count != 0);
11366
11367 vm_map_entry_copy(new_entry, src_entry);
11368 if (new_entry->is_sub_map) {
11369 /* clr address space specifics */
11370 new_entry->use_pmap = FALSE;
11371 } else {
11372 /*
11373 * We're dealing with a copy-on-write operation,
11374 * so the resulting mapping should not inherit the
11375 * original mapping's accounting settings.
11376 * "iokit_acct" should have been cleared in
11377 * vm_map_entry_copy().
11378 * "use_pmap" should be reset to its default (TRUE)
11379 * so that the new mapping gets accounted for in
11380 * the task's memory footprint.
11381 */
11382 assert(!new_entry->iokit_acct);
11383 new_entry->use_pmap = TRUE;
11384 }
11385
11386 /*
11387 * Attempt non-blocking copy-on-write optimizations.
11388 */
11389
11390 /*
11391 * If we are destroying the source, and the object
11392 * is internal, we could move the object reference
11393 * from the source to the copy. The copy is
11394 * copy-on-write only if the source is.
11395 * We make another reference to the object, because
11396 * destroying the source entry will deallocate it.
11397 *
11398 * This memory transfer has to be atomic, (to prevent
11399 * the VM object from being shared or copied while
11400 * it's being moved here), so we could only do this
11401 * if we won't have to unlock the VM map until the
11402 * original mapping has been fully removed.
11403 */
11404
11405 RestartCopy:
11406 if ((src_object == VM_OBJECT_NULL ||
11407 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11408 vm_object_copy_quickly(
11409 VME_OBJECT_PTR(new_entry),
11410 src_offset,
11411 src_size,
11412 &src_needs_copy,
11413 &new_entry_needs_copy)) {
11414 new_entry->needs_copy = new_entry_needs_copy;
11415
11416 /*
11417 * Handle copy-on-write obligations
11418 */
11419
11420 if (src_needs_copy && !tmp_entry->needs_copy) {
11421 vm_prot_t prot;
11422
11423 prot = src_entry->protection & ~VM_PROT_WRITE;
11424
11425 if (override_nx(src_map, VME_ALIAS(src_entry))
11426 && prot) {
11427 prot |= VM_PROT_EXECUTE;
11428 }
11429
11430 vm_object_pmap_protect(
11431 src_object,
11432 src_offset,
11433 src_size,
11434 (src_entry->is_shared ?
11435 PMAP_NULL
11436 : src_map->pmap),
11437 src_entry->vme_start,
11438 prot);
11439
11440 assert(tmp_entry->wired_count == 0);
11441 tmp_entry->needs_copy = TRUE;
11442 }
11443
11444 /*
11445 * The map has never been unlocked, so it's safe
11446 * to move to the next entry rather than doing
11447 * another lookup.
11448 */
11449
11450 goto CopySuccessful;
11451 }
11452
11453 entry_was_shared = tmp_entry->is_shared;
11454
11455 /*
11456 * Take an object reference, so that we may
11457 * release the map lock(s).
11458 */
11459
11460 assert(src_object != VM_OBJECT_NULL);
11461 vm_object_reference(src_object);
11462
11463 /*
11464 * Record the timestamp for later verification.
11465 * Unlock the map.
11466 */
11467
11468 version.main_timestamp = src_map->timestamp;
11469 vm_map_unlock(src_map); /* Increments timestamp once! */
11470 saved_src_entry = src_entry;
11471 tmp_entry = VM_MAP_ENTRY_NULL;
11472 src_entry = VM_MAP_ENTRY_NULL;
11473
11474 /*
11475 * Perform the copy
11476 */
11477
11478 if (was_wired) {
11479 CopySlowly:
11480 vm_object_lock(src_object);
11481 result = vm_object_copy_slowly(
11482 src_object,
11483 src_offset,
11484 src_size,
11485 THREAD_UNINT,
11486 VME_OBJECT_PTR(new_entry));
11487 VME_OFFSET_SET(new_entry, 0);
11488 new_entry->needs_copy = FALSE;
11489 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11490 (entry_was_shared || map_share)) {
11491 vm_object_t new_object;
11492
11493 vm_object_lock_shared(src_object);
11494 new_object = vm_object_copy_delayed(
11495 src_object,
11496 src_offset,
11497 src_size,
11498 TRUE);
11499 if (new_object == VM_OBJECT_NULL) {
11500 goto CopySlowly;
11501 }
11502
11503 VME_OBJECT_SET(new_entry, new_object);
11504 assert(new_entry->wired_count == 0);
11505 new_entry->needs_copy = TRUE;
11506 assert(!new_entry->iokit_acct);
11507 assert(new_object->purgable == VM_PURGABLE_DENY);
11508 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11509 result = KERN_SUCCESS;
11510 } else {
11511 vm_object_offset_t new_offset;
11512 new_offset = VME_OFFSET(new_entry);
11513 result = vm_object_copy_strategically(src_object,
11514 src_offset,
11515 src_size,
11516 VME_OBJECT_PTR(new_entry),
11517 &new_offset,
11518 &new_entry_needs_copy);
11519 if (new_offset != VME_OFFSET(new_entry)) {
11520 VME_OFFSET_SET(new_entry, new_offset);
11521 }
11522
11523 new_entry->needs_copy = new_entry_needs_copy;
11524 }
11525
11526 if (result == KERN_SUCCESS &&
11527 preserve_purgeable &&
11528 src_object->purgable != VM_PURGABLE_DENY) {
11529 vm_object_t new_object;
11530
11531 new_object = VME_OBJECT(new_entry);
11532 assert(new_object != src_object);
11533 vm_object_lock(new_object);
11534 assert(new_object->ref_count == 1);
11535 assert(new_object->shadow == VM_OBJECT_NULL);
11536 assert(new_object->copy == VM_OBJECT_NULL);
11537 assert(new_object->vo_owner == NULL);
11538
11539 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11540 new_object->true_share = TRUE;
11541 /* start as non-volatile with no owner... */
11542 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11543 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11544 /* ... and move to src_object's purgeable state */
11545 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11546 int state;
11547 state = src_object->purgable;
11548 vm_object_purgable_control(
11549 new_object,
11550 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11551 &state);
11552 }
11553 vm_object_unlock(new_object);
11554 new_object = VM_OBJECT_NULL;
11555 /* no pmap accounting for purgeable objects */
11556 new_entry->use_pmap = FALSE;
11557 }
11558
11559 if (result != KERN_SUCCESS &&
11560 result != KERN_MEMORY_RESTART_COPY) {
11561 vm_map_lock(src_map);
11562 RETURN(result);
11563 }
11564
11565 /*
11566 * Throw away the extra reference
11567 */
11568
11569 vm_object_deallocate(src_object);
11570
11571 /*
11572 * Verify that the map has not substantially
11573 * changed while the copy was being made.
11574 */
11575
11576 vm_map_lock(src_map);
11577
11578 if ((version.main_timestamp + 1) == src_map->timestamp) {
11579 /* src_map hasn't changed: src_entry is still valid */
11580 src_entry = saved_src_entry;
11581 goto VerificationSuccessful;
11582 }
11583
11584 /*
11585 * Simple version comparison failed.
11586 *
11587 * Retry the lookup and verify that the
11588 * same object/offset are still present.
11589 *
11590 * [Note: a memory manager that colludes with
11591 * the calling task can detect that we have
11592 * cheated. While the map was unlocked, the
11593 * mapping could have been changed and restored.]
11594 */
11595
11596 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11597 if (result != KERN_MEMORY_RESTART_COPY) {
11598 vm_object_deallocate(VME_OBJECT(new_entry));
11599 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11600 /* reset accounting state */
11601 new_entry->iokit_acct = FALSE;
11602 new_entry->use_pmap = TRUE;
11603 }
11604 RETURN(KERN_INVALID_ADDRESS);
11605 }
11606
11607 src_entry = tmp_entry;
11608 vm_map_clip_start(src_map, src_entry, src_start);
11609
11610 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11611 !use_maxprot) ||
11612 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11613 goto VerificationFailed;
11614 }
11615
11616 if (src_entry->vme_end < new_entry->vme_end) {
11617 /*
11618 * This entry might have been shortened
11619 * (vm_map_clip_end) or been replaced with
11620 * an entry that ends closer to "src_start"
11621 * than before.
11622 * Adjust "new_entry" accordingly; copying
11623 * less memory would be correct but we also
11624 * redo the copy (see below) if the new entry
11625 * no longer points at the same object/offset.
11626 */
11627 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11628 VM_MAP_COPY_PAGE_MASK(copy)));
11629 new_entry->vme_end = src_entry->vme_end;
11630 src_size = new_entry->vme_end - src_start;
11631 } else if (src_entry->vme_end > new_entry->vme_end) {
11632 /*
11633 * This entry might have been extended
11634 * (vm_map_entry_simplify() or coalesce)
11635 * or been replaced with an entry that ends farther
11636 * from "src_start" than before.
11637 *
11638 * We've called vm_object_copy_*() only on
11639 * the previous <start:end> range, so we can't
11640 * just extend new_entry. We have to re-do
11641 * the copy based on the new entry as if it was
11642 * pointing at a different object/offset (see
11643 * "Verification failed" below).
11644 */
11645 }
11646
11647 if ((VME_OBJECT(src_entry) != src_object) ||
11648 (VME_OFFSET(src_entry) != src_offset) ||
11649 (src_entry->vme_end > new_entry->vme_end)) {
11650 /*
11651 * Verification failed.
11652 *
11653 * Start over with this top-level entry.
11654 */
11655
11656 VerificationFailed: ;
11657
11658 vm_object_deallocate(VME_OBJECT(new_entry));
11659 tmp_entry = src_entry;
11660 continue;
11661 }
11662
11663 /*
11664 * Verification succeeded.
11665 */
11666
11667 VerificationSuccessful:;
11668
11669 if (result == KERN_MEMORY_RESTART_COPY) {
11670 goto RestartCopy;
11671 }
11672
11673 /*
11674 * Copy succeeded.
11675 */
11676
11677 CopySuccessful: ;
11678
11679 /*
11680 * Link in the new copy entry.
11681 */
11682
11683 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11684 new_entry);
11685
11686 /*
11687 * Determine whether the entire region
11688 * has been copied.
11689 */
11690 src_base = src_start;
11691 src_start = new_entry->vme_end;
11692 new_entry = VM_MAP_ENTRY_NULL;
11693 while ((src_start >= src_end) && (src_end != 0)) {
11694 submap_map_t *ptr;
11695
11696 if (src_map == base_map) {
11697 /* back to the top */
11698 break;
11699 }
11700
11701 ptr = parent_maps;
11702 assert(ptr != NULL);
11703 parent_maps = parent_maps->next;
11704
11705 /* fix up the damage we did in that submap */
11706 vm_map_simplify_range(src_map,
11707 src_base,
11708 src_end);
11709
11710 vm_map_unlock(src_map);
11711 vm_map_deallocate(src_map);
11712 vm_map_lock(ptr->parent_map);
11713 src_map = ptr->parent_map;
11714 src_base = ptr->base_start;
11715 src_start = ptr->base_start + ptr->base_len;
11716 src_end = ptr->base_end;
11717 if (!vm_map_lookup_entry(src_map,
11718 src_start,
11719 &tmp_entry) &&
11720 (src_end > src_start)) {
11721 RETURN(KERN_INVALID_ADDRESS);
11722 }
11723 kfree(ptr, sizeof(submap_map_t));
11724 if (parent_maps == NULL) {
11725 map_share = FALSE;
11726 }
11727 src_entry = tmp_entry->vme_prev;
11728 }
11729
11730 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11731 (src_start >= src_addr + len) &&
11732 (src_addr + len != 0)) {
11733 /*
11734 * Stop copying now, even though we haven't reached
11735 * "src_end". We'll adjust the end of the last copy
11736 * entry at the end, if needed.
11737 *
11738 * If src_map's aligment is different from the
11739 * system's page-alignment, there could be
11740 * extra non-map-aligned map entries between
11741 * the original (non-rounded) "src_addr + len"
11742 * and the rounded "src_end".
11743 * We do not want to copy those map entries since
11744 * they're not part of the copied range.
11745 */
11746 break;
11747 }
11748
11749 if ((src_start >= src_end) && (src_end != 0)) {
11750 break;
11751 }
11752
11753 /*
11754 * Verify that there are no gaps in the region
11755 */
11756
11757 tmp_entry = src_entry->vme_next;
11758 if ((tmp_entry->vme_start != src_start) ||
11759 (tmp_entry == vm_map_to_entry(src_map))) {
11760 RETURN(KERN_INVALID_ADDRESS);
11761 }
11762 }
11763
11764 /*
11765 * If the source should be destroyed, do it now, since the
11766 * copy was successful.
11767 */
11768 if (src_destroy) {
11769 (void) vm_map_delete(
11770 src_map,
11771 vm_map_trunc_page(src_addr,
11772 VM_MAP_PAGE_MASK(src_map)),
11773 src_end,
11774 ((src_map == kernel_map) ?
11775 VM_MAP_REMOVE_KUNWIRE :
11776 VM_MAP_REMOVE_NO_FLAGS),
11777 VM_MAP_NULL);
11778 } else {
11779 /* fix up the damage we did in the base map */
11780 vm_map_simplify_range(
11781 src_map,
11782 vm_map_trunc_page(src_addr,
11783 VM_MAP_PAGE_MASK(src_map)),
11784 vm_map_round_page(src_end,
11785 VM_MAP_PAGE_MASK(src_map)));
11786 }
11787
11788 vm_map_unlock(src_map);
11789 tmp_entry = VM_MAP_ENTRY_NULL;
11790
11791 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11792 vm_map_offset_t original_start, original_offset, original_end;
11793
11794 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11795
11796 /* adjust alignment of first copy_entry's "vme_start" */
11797 tmp_entry = vm_map_copy_first_entry(copy);
11798 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11799 vm_map_offset_t adjustment;
11800
11801 original_start = tmp_entry->vme_start;
11802 original_offset = VME_OFFSET(tmp_entry);
11803
11804 /* map-align the start of the first copy entry... */
11805 adjustment = (tmp_entry->vme_start -
11806 vm_map_trunc_page(
11807 tmp_entry->vme_start,
11808 VM_MAP_PAGE_MASK(src_map)));
11809 tmp_entry->vme_start -= adjustment;
11810 VME_OFFSET_SET(tmp_entry,
11811 VME_OFFSET(tmp_entry) - adjustment);
11812 copy_addr -= adjustment;
11813 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11814 /* ... adjust for mis-aligned start of copy range */
11815 adjustment =
11816 (vm_map_trunc_page(copy->offset,
11817 PAGE_MASK) -
11818 vm_map_trunc_page(copy->offset,
11819 VM_MAP_PAGE_MASK(src_map)));
11820 if (adjustment) {
11821 assert(page_aligned(adjustment));
11822 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11823 tmp_entry->vme_start += adjustment;
11824 VME_OFFSET_SET(tmp_entry,
11825 (VME_OFFSET(tmp_entry) +
11826 adjustment));
11827 copy_addr += adjustment;
11828 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11829 }
11830
11831 /*
11832 * Assert that the adjustments haven't exposed
11833 * more than was originally copied...
11834 */
11835 assert(tmp_entry->vme_start >= original_start);
11836 assert(VME_OFFSET(tmp_entry) >= original_offset);
11837 /*
11838 * ... and that it did not adjust outside of a
11839 * a single 16K page.
11840 */
11841 assert(vm_map_trunc_page(tmp_entry->vme_start,
11842 VM_MAP_PAGE_MASK(src_map)) ==
11843 vm_map_trunc_page(original_start,
11844 VM_MAP_PAGE_MASK(src_map)));
11845 }
11846
11847 /* adjust alignment of last copy_entry's "vme_end" */
11848 tmp_entry = vm_map_copy_last_entry(copy);
11849 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11850 vm_map_offset_t adjustment;
11851
11852 original_end = tmp_entry->vme_end;
11853
11854 /* map-align the end of the last copy entry... */
11855 tmp_entry->vme_end =
11856 vm_map_round_page(tmp_entry->vme_end,
11857 VM_MAP_PAGE_MASK(src_map));
11858 /* ... adjust for mis-aligned end of copy range */
11859 adjustment =
11860 (vm_map_round_page((copy->offset +
11861 copy->size),
11862 VM_MAP_PAGE_MASK(src_map)) -
11863 vm_map_round_page((copy->offset +
11864 copy->size),
11865 PAGE_MASK));
11866 if (adjustment) {
11867 assert(page_aligned(adjustment));
11868 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11869 tmp_entry->vme_end -= adjustment;
11870 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11871 }
11872
11873 /*
11874 * Assert that the adjustments haven't exposed
11875 * more than was originally copied...
11876 */
11877 assert(tmp_entry->vme_end <= original_end);
11878 /*
11879 * ... and that it did not adjust outside of a
11880 * a single 16K page.
11881 */
11882 assert(vm_map_round_page(tmp_entry->vme_end,
11883 VM_MAP_PAGE_MASK(src_map)) ==
11884 vm_map_round_page(original_end,
11885 VM_MAP_PAGE_MASK(src_map)));
11886 }
11887 }
11888
11889 /* Fix-up start and end points in copy. This is necessary */
11890 /* when the various entries in the copy object were picked */
11891 /* up from different sub-maps */
11892
11893 tmp_entry = vm_map_copy_first_entry(copy);
11894 copy_size = 0; /* compute actual size */
11895 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11896 assert(VM_MAP_PAGE_ALIGNED(
11897 copy_addr + (tmp_entry->vme_end -
11898 tmp_entry->vme_start),
11899 VM_MAP_COPY_PAGE_MASK(copy)));
11900 assert(VM_MAP_PAGE_ALIGNED(
11901 copy_addr,
11902 VM_MAP_COPY_PAGE_MASK(copy)));
11903
11904 /*
11905 * The copy_entries will be injected directly into the
11906 * destination map and might not be "map aligned" there...
11907 */
11908 tmp_entry->map_aligned = FALSE;
11909
11910 tmp_entry->vme_end = copy_addr +
11911 (tmp_entry->vme_end - tmp_entry->vme_start);
11912 tmp_entry->vme_start = copy_addr;
11913 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11914 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11915 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11916 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11917 }
11918
11919 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11920 copy_size < copy->size) {
11921 /*
11922 * The actual size of the VM map copy is smaller than what
11923 * was requested by the caller. This must be because some
11924 * PAGE_SIZE-sized pages are missing at the end of the last
11925 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11926 * The caller might not have been aware of those missing
11927 * pages and might not want to be aware of it, which is
11928 * fine as long as they don't try to access (and crash on)
11929 * those missing pages.
11930 * Let's adjust the size of the "copy", to avoid failing
11931 * in vm_map_copyout() or vm_map_copy_overwrite().
11932 */
11933 assert(vm_map_round_page(copy_size,
11934 VM_MAP_PAGE_MASK(src_map)) ==
11935 vm_map_round_page(copy->size,
11936 VM_MAP_PAGE_MASK(src_map)));
11937 copy->size = copy_size;
11938 }
11939
11940 *copy_result = copy;
11941 return KERN_SUCCESS;
11942
11943 #undef RETURN
11944 }
11945
11946 kern_return_t
11947 vm_map_copy_extract(
11948 vm_map_t src_map,
11949 vm_map_address_t src_addr,
11950 vm_map_size_t len,
11951 vm_map_copy_t *copy_result, /* OUT */
11952 vm_prot_t *cur_prot, /* OUT */
11953 vm_prot_t *max_prot)
11954 {
11955 vm_map_offset_t src_start, src_end;
11956 vm_map_copy_t copy;
11957 kern_return_t kr;
11958
11959 /*
11960 * Check for copies of zero bytes.
11961 */
11962
11963 if (len == 0) {
11964 *copy_result = VM_MAP_COPY_NULL;
11965 return KERN_SUCCESS;
11966 }
11967
11968 /*
11969 * Check that the end address doesn't overflow
11970 */
11971 src_end = src_addr + len;
11972 if (src_end < src_addr) {
11973 return KERN_INVALID_ADDRESS;
11974 }
11975
11976 /*
11977 * Compute (page aligned) start and end of region
11978 */
11979 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11980 src_end = vm_map_round_page(src_end, PAGE_MASK);
11981
11982 /*
11983 * Allocate a header element for the list.
11984 *
11985 * Use the start and end in the header to
11986 * remember the endpoints prior to rounding.
11987 */
11988
11989 copy = vm_map_copy_allocate();
11990 copy->type = VM_MAP_COPY_ENTRY_LIST;
11991 copy->cpy_hdr.entries_pageable = TRUE;
11992
11993 vm_map_store_init(&copy->cpy_hdr);
11994
11995 copy->offset = 0;
11996 copy->size = len;
11997
11998 kr = vm_map_remap_extract(src_map,
11999 src_addr,
12000 len,
12001 FALSE, /* copy */
12002 &copy->cpy_hdr,
12003 cur_prot,
12004 max_prot,
12005 VM_INHERIT_SHARE,
12006 TRUE, /* pageable */
12007 FALSE, /* same_map */
12008 VM_MAP_KERNEL_FLAGS_NONE);
12009 if (kr != KERN_SUCCESS) {
12010 vm_map_copy_discard(copy);
12011 return kr;
12012 }
12013
12014 *copy_result = copy;
12015 return KERN_SUCCESS;
12016 }
12017
12018 /*
12019 * vm_map_copyin_object:
12020 *
12021 * Create a copy object from an object.
12022 * Our caller donates an object reference.
12023 */
12024
12025 kern_return_t
12026 vm_map_copyin_object(
12027 vm_object_t object,
12028 vm_object_offset_t offset, /* offset of region in object */
12029 vm_object_size_t size, /* size of region in object */
12030 vm_map_copy_t *copy_result) /* OUT */
12031 {
12032 vm_map_copy_t copy; /* Resulting copy */
12033
12034 /*
12035 * We drop the object into a special copy object
12036 * that contains the object directly.
12037 */
12038
12039 copy = vm_map_copy_allocate();
12040 copy->type = VM_MAP_COPY_OBJECT;
12041 copy->cpy_object = object;
12042 copy->offset = offset;
12043 copy->size = size;
12044
12045 *copy_result = copy;
12046 return KERN_SUCCESS;
12047 }
12048
12049 static void
12050 vm_map_fork_share(
12051 vm_map_t old_map,
12052 vm_map_entry_t old_entry,
12053 vm_map_t new_map)
12054 {
12055 vm_object_t object;
12056 vm_map_entry_t new_entry;
12057
12058 /*
12059 * New sharing code. New map entry
12060 * references original object. Internal
12061 * objects use asynchronous copy algorithm for
12062 * future copies. First make sure we have
12063 * the right object. If we need a shadow,
12064 * or someone else already has one, then
12065 * make a new shadow and share it.
12066 */
12067
12068 object = VME_OBJECT(old_entry);
12069 if (old_entry->is_sub_map) {
12070 assert(old_entry->wired_count == 0);
12071 #ifndef NO_NESTED_PMAP
12072 if (old_entry->use_pmap) {
12073 kern_return_t result;
12074
12075 result = pmap_nest(new_map->pmap,
12076 (VME_SUBMAP(old_entry))->pmap,
12077 (addr64_t)old_entry->vme_start,
12078 (addr64_t)old_entry->vme_start,
12079 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12080 if (result) {
12081 panic("vm_map_fork_share: pmap_nest failed!");
12082 }
12083 }
12084 #endif /* NO_NESTED_PMAP */
12085 } else if (object == VM_OBJECT_NULL) {
12086 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12087 old_entry->vme_start));
12088 VME_OFFSET_SET(old_entry, 0);
12089 VME_OBJECT_SET(old_entry, object);
12090 old_entry->use_pmap = TRUE;
12091 // assert(!old_entry->needs_copy);
12092 } else if (object->copy_strategy !=
12093 MEMORY_OBJECT_COPY_SYMMETRIC) {
12094 /*
12095 * We are already using an asymmetric
12096 * copy, and therefore we already have
12097 * the right object.
12098 */
12099
12100 assert(!old_entry->needs_copy);
12101 } else if (old_entry->needs_copy || /* case 1 */
12102 object->shadowed || /* case 2 */
12103 (!object->true_share && /* case 3 */
12104 !old_entry->is_shared &&
12105 (object->vo_size >
12106 (vm_map_size_t)(old_entry->vme_end -
12107 old_entry->vme_start)))) {
12108 /*
12109 * We need to create a shadow.
12110 * There are three cases here.
12111 * In the first case, we need to
12112 * complete a deferred symmetrical
12113 * copy that we participated in.
12114 * In the second and third cases,
12115 * we need to create the shadow so
12116 * that changes that we make to the
12117 * object do not interfere with
12118 * any symmetrical copies which
12119 * have occured (case 2) or which
12120 * might occur (case 3).
12121 *
12122 * The first case is when we had
12123 * deferred shadow object creation
12124 * via the entry->needs_copy mechanism.
12125 * This mechanism only works when
12126 * only one entry points to the source
12127 * object, and we are about to create
12128 * a second entry pointing to the
12129 * same object. The problem is that
12130 * there is no way of mapping from
12131 * an object to the entries pointing
12132 * to it. (Deferred shadow creation
12133 * works with one entry because occurs
12134 * at fault time, and we walk from the
12135 * entry to the object when handling
12136 * the fault.)
12137 *
12138 * The second case is when the object
12139 * to be shared has already been copied
12140 * with a symmetric copy, but we point
12141 * directly to the object without
12142 * needs_copy set in our entry. (This
12143 * can happen because different ranges
12144 * of an object can be pointed to by
12145 * different entries. In particular,
12146 * a single entry pointing to an object
12147 * can be split by a call to vm_inherit,
12148 * which, combined with task_create, can
12149 * result in the different entries
12150 * having different needs_copy values.)
12151 * The shadowed flag in the object allows
12152 * us to detect this case. The problem
12153 * with this case is that if this object
12154 * has or will have shadows, then we
12155 * must not perform an asymmetric copy
12156 * of this object, since such a copy
12157 * allows the object to be changed, which
12158 * will break the previous symmetrical
12159 * copies (which rely upon the object
12160 * not changing). In a sense, the shadowed
12161 * flag says "don't change this object".
12162 * We fix this by creating a shadow
12163 * object for this object, and sharing
12164 * that. This works because we are free
12165 * to change the shadow object (and thus
12166 * to use an asymmetric copy strategy);
12167 * this is also semantically correct,
12168 * since this object is temporary, and
12169 * therefore a copy of the object is
12170 * as good as the object itself. (This
12171 * is not true for permanent objects,
12172 * since the pager needs to see changes,
12173 * which won't happen if the changes
12174 * are made to a copy.)
12175 *
12176 * The third case is when the object
12177 * to be shared has parts sticking
12178 * outside of the entry we're working
12179 * with, and thus may in the future
12180 * be subject to a symmetrical copy.
12181 * (This is a preemptive version of
12182 * case 2.)
12183 */
12184 VME_OBJECT_SHADOW(old_entry,
12185 (vm_map_size_t) (old_entry->vme_end -
12186 old_entry->vme_start));
12187
12188 /*
12189 * If we're making a shadow for other than
12190 * copy on write reasons, then we have
12191 * to remove write permission.
12192 */
12193
12194 if (!old_entry->needs_copy &&
12195 (old_entry->protection & VM_PROT_WRITE)) {
12196 vm_prot_t prot;
12197
12198 assert(!pmap_has_prot_policy(old_entry->protection));
12199
12200 prot = old_entry->protection & ~VM_PROT_WRITE;
12201
12202 assert(!pmap_has_prot_policy(prot));
12203
12204 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12205 prot |= VM_PROT_EXECUTE;
12206 }
12207
12208
12209 if (old_map->mapped_in_other_pmaps) {
12210 vm_object_pmap_protect(
12211 VME_OBJECT(old_entry),
12212 VME_OFFSET(old_entry),
12213 (old_entry->vme_end -
12214 old_entry->vme_start),
12215 PMAP_NULL,
12216 old_entry->vme_start,
12217 prot);
12218 } else {
12219 pmap_protect(old_map->pmap,
12220 old_entry->vme_start,
12221 old_entry->vme_end,
12222 prot);
12223 }
12224 }
12225
12226 old_entry->needs_copy = FALSE;
12227 object = VME_OBJECT(old_entry);
12228 }
12229
12230
12231 /*
12232 * If object was using a symmetric copy strategy,
12233 * change its copy strategy to the default
12234 * asymmetric copy strategy, which is copy_delay
12235 * in the non-norma case and copy_call in the
12236 * norma case. Bump the reference count for the
12237 * new entry.
12238 */
12239
12240 if (old_entry->is_sub_map) {
12241 vm_map_lock(VME_SUBMAP(old_entry));
12242 vm_map_reference(VME_SUBMAP(old_entry));
12243 vm_map_unlock(VME_SUBMAP(old_entry));
12244 } else {
12245 vm_object_lock(object);
12246 vm_object_reference_locked(object);
12247 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12248 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12249 }
12250 vm_object_unlock(object);
12251 }
12252
12253 /*
12254 * Clone the entry, using object ref from above.
12255 * Mark both entries as shared.
12256 */
12257
12258 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12259 * map or descendants */
12260 vm_map_entry_copy(new_entry, old_entry);
12261 old_entry->is_shared = TRUE;
12262 new_entry->is_shared = TRUE;
12263
12264 /*
12265 * We're dealing with a shared mapping, so the resulting mapping
12266 * should inherit some of the original mapping's accounting settings.
12267 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12268 * "use_pmap" should stay the same as before (if it hasn't been reset
12269 * to TRUE when we cleared "iokit_acct").
12270 */
12271 assert(!new_entry->iokit_acct);
12272
12273 /*
12274 * If old entry's inheritence is VM_INHERIT_NONE,
12275 * the new entry is for corpse fork, remove the
12276 * write permission from the new entry.
12277 */
12278 if (old_entry->inheritance == VM_INHERIT_NONE) {
12279 new_entry->protection &= ~VM_PROT_WRITE;
12280 new_entry->max_protection &= ~VM_PROT_WRITE;
12281 }
12282
12283 /*
12284 * Insert the entry into the new map -- we
12285 * know we're inserting at the end of the new
12286 * map.
12287 */
12288
12289 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12290 VM_MAP_KERNEL_FLAGS_NONE);
12291
12292 /*
12293 * Update the physical map
12294 */
12295
12296 if (old_entry->is_sub_map) {
12297 /* Bill Angell pmap support goes here */
12298 } else {
12299 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12300 old_entry->vme_end - old_entry->vme_start,
12301 old_entry->vme_start);
12302 }
12303 }
12304
12305 static boolean_t
12306 vm_map_fork_copy(
12307 vm_map_t old_map,
12308 vm_map_entry_t *old_entry_p,
12309 vm_map_t new_map,
12310 int vm_map_copyin_flags)
12311 {
12312 vm_map_entry_t old_entry = *old_entry_p;
12313 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12314 vm_map_offset_t start = old_entry->vme_start;
12315 vm_map_copy_t copy;
12316 vm_map_entry_t last = vm_map_last_entry(new_map);
12317
12318 vm_map_unlock(old_map);
12319 /*
12320 * Use maxprot version of copyin because we
12321 * care about whether this memory can ever
12322 * be accessed, not just whether it's accessible
12323 * right now.
12324 */
12325 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12326 if (vm_map_copyin_internal(old_map, start, entry_size,
12327 vm_map_copyin_flags, &copy)
12328 != KERN_SUCCESS) {
12329 /*
12330 * The map might have changed while it
12331 * was unlocked, check it again. Skip
12332 * any blank space or permanently
12333 * unreadable region.
12334 */
12335 vm_map_lock(old_map);
12336 if (!vm_map_lookup_entry(old_map, start, &last) ||
12337 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12338 last = last->vme_next;
12339 }
12340 *old_entry_p = last;
12341
12342 /*
12343 * XXX For some error returns, want to
12344 * XXX skip to the next element. Note
12345 * that INVALID_ADDRESS and
12346 * PROTECTION_FAILURE are handled above.
12347 */
12348
12349 return FALSE;
12350 }
12351
12352 /*
12353 * Insert the copy into the new map
12354 */
12355
12356 vm_map_copy_insert(new_map, last, copy);
12357
12358 /*
12359 * Pick up the traversal at the end of
12360 * the copied region.
12361 */
12362
12363 vm_map_lock(old_map);
12364 start += entry_size;
12365 if (!vm_map_lookup_entry(old_map, start, &last)) {
12366 last = last->vme_next;
12367 } else {
12368 if (last->vme_start == start) {
12369 /*
12370 * No need to clip here and we don't
12371 * want to cause any unnecessary
12372 * unnesting...
12373 */
12374 } else {
12375 vm_map_clip_start(old_map, last, start);
12376 }
12377 }
12378 *old_entry_p = last;
12379
12380 return TRUE;
12381 }
12382
12383 /*
12384 * vm_map_fork:
12385 *
12386 * Create and return a new map based on the old
12387 * map, according to the inheritance values on the
12388 * regions in that map and the options.
12389 *
12390 * The source map must not be locked.
12391 */
12392 vm_map_t
12393 vm_map_fork(
12394 ledger_t ledger,
12395 vm_map_t old_map,
12396 int options)
12397 {
12398 pmap_t new_pmap;
12399 vm_map_t new_map;
12400 vm_map_entry_t old_entry;
12401 vm_map_size_t new_size = 0, entry_size;
12402 vm_map_entry_t new_entry;
12403 boolean_t src_needs_copy;
12404 boolean_t new_entry_needs_copy;
12405 boolean_t pmap_is64bit;
12406 int vm_map_copyin_flags;
12407 vm_inherit_t old_entry_inheritance;
12408 int map_create_options;
12409 kern_return_t footprint_collect_kr;
12410
12411 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12412 VM_MAP_FORK_PRESERVE_PURGEABLE |
12413 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12414 /* unsupported option */
12415 return VM_MAP_NULL;
12416 }
12417
12418 pmap_is64bit =
12419 #if defined(__i386__) || defined(__x86_64__)
12420 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12421 #elif defined(__arm64__)
12422 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12423 #elif defined(__arm__)
12424 FALSE;
12425 #else
12426 #error Unknown architecture.
12427 #endif
12428
12429 unsigned int pmap_flags = 0;
12430 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12431 #if defined(HAS_APPLE_PAC)
12432 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12433 #endif
12434 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12435
12436 vm_map_reference_swap(old_map);
12437 vm_map_lock(old_map);
12438
12439 map_create_options = 0;
12440 if (old_map->hdr.entries_pageable) {
12441 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12442 }
12443 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12444 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12445 footprint_collect_kr = KERN_SUCCESS;
12446 }
12447 new_map = vm_map_create_options(new_pmap,
12448 old_map->min_offset,
12449 old_map->max_offset,
12450 map_create_options);
12451 vm_map_lock(new_map);
12452 vm_commit_pagezero_status(new_map);
12453 /* inherit the parent map's page size */
12454 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12455 for (
12456 old_entry = vm_map_first_entry(old_map);
12457 old_entry != vm_map_to_entry(old_map);
12458 ) {
12459 entry_size = old_entry->vme_end - old_entry->vme_start;
12460
12461 old_entry_inheritance = old_entry->inheritance;
12462 /*
12463 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12464 * share VM_INHERIT_NONE entries that are not backed by a
12465 * device pager.
12466 */
12467 if (old_entry_inheritance == VM_INHERIT_NONE &&
12468 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12469 !(!old_entry->is_sub_map &&
12470 VME_OBJECT(old_entry) != NULL &&
12471 VME_OBJECT(old_entry)->pager != NULL &&
12472 is_device_pager_ops(
12473 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12474 old_entry_inheritance = VM_INHERIT_SHARE;
12475 }
12476
12477 if (old_entry_inheritance != VM_INHERIT_NONE &&
12478 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12479 footprint_collect_kr == KERN_SUCCESS) {
12480 /*
12481 * The corpse won't have old_map->pmap to query
12482 * footprint information, so collect that data now
12483 * and store it in new_map->vmmap_corpse_footprint
12484 * for later autopsy.
12485 */
12486 footprint_collect_kr =
12487 vm_map_corpse_footprint_collect(old_map,
12488 old_entry,
12489 new_map);
12490 }
12491
12492 switch (old_entry_inheritance) {
12493 case VM_INHERIT_NONE:
12494 break;
12495
12496 case VM_INHERIT_SHARE:
12497 vm_map_fork_share(old_map, old_entry, new_map);
12498 new_size += entry_size;
12499 break;
12500
12501 case VM_INHERIT_COPY:
12502
12503 /*
12504 * Inline the copy_quickly case;
12505 * upon failure, fall back on call
12506 * to vm_map_fork_copy.
12507 */
12508
12509 if (old_entry->is_sub_map) {
12510 break;
12511 }
12512 if ((old_entry->wired_count != 0) ||
12513 ((VME_OBJECT(old_entry) != NULL) &&
12514 (VME_OBJECT(old_entry)->true_share))) {
12515 goto slow_vm_map_fork_copy;
12516 }
12517
12518 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12519 vm_map_entry_copy(new_entry, old_entry);
12520 if (new_entry->is_sub_map) {
12521 /* clear address space specifics */
12522 new_entry->use_pmap = FALSE;
12523 } else {
12524 /*
12525 * We're dealing with a copy-on-write operation,
12526 * so the resulting mapping should not inherit
12527 * the original mapping's accounting settings.
12528 * "iokit_acct" should have been cleared in
12529 * vm_map_entry_copy().
12530 * "use_pmap" should be reset to its default
12531 * (TRUE) so that the new mapping gets
12532 * accounted for in the task's memory footprint.
12533 */
12534 assert(!new_entry->iokit_acct);
12535 new_entry->use_pmap = TRUE;
12536 }
12537
12538 if (!vm_object_copy_quickly(
12539 VME_OBJECT_PTR(new_entry),
12540 VME_OFFSET(old_entry),
12541 (old_entry->vme_end -
12542 old_entry->vme_start),
12543 &src_needs_copy,
12544 &new_entry_needs_copy)) {
12545 vm_map_entry_dispose(new_map, new_entry);
12546 goto slow_vm_map_fork_copy;
12547 }
12548
12549 /*
12550 * Handle copy-on-write obligations
12551 */
12552
12553 if (src_needs_copy && !old_entry->needs_copy) {
12554 vm_prot_t prot;
12555
12556 assert(!pmap_has_prot_policy(old_entry->protection));
12557
12558 prot = old_entry->protection & ~VM_PROT_WRITE;
12559
12560 if (override_nx(old_map, VME_ALIAS(old_entry))
12561 && prot) {
12562 prot |= VM_PROT_EXECUTE;
12563 }
12564
12565 assert(!pmap_has_prot_policy(prot));
12566
12567 vm_object_pmap_protect(
12568 VME_OBJECT(old_entry),
12569 VME_OFFSET(old_entry),
12570 (old_entry->vme_end -
12571 old_entry->vme_start),
12572 ((old_entry->is_shared
12573 || old_map->mapped_in_other_pmaps)
12574 ? PMAP_NULL :
12575 old_map->pmap),
12576 old_entry->vme_start,
12577 prot);
12578
12579 assert(old_entry->wired_count == 0);
12580 old_entry->needs_copy = TRUE;
12581 }
12582 new_entry->needs_copy = new_entry_needs_copy;
12583
12584 /*
12585 * Insert the entry at the end
12586 * of the map.
12587 */
12588
12589 vm_map_store_entry_link(new_map,
12590 vm_map_last_entry(new_map),
12591 new_entry,
12592 VM_MAP_KERNEL_FLAGS_NONE);
12593 new_size += entry_size;
12594 break;
12595
12596 slow_vm_map_fork_copy:
12597 vm_map_copyin_flags = 0;
12598 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12599 vm_map_copyin_flags |=
12600 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12601 }
12602 if (vm_map_fork_copy(old_map,
12603 &old_entry,
12604 new_map,
12605 vm_map_copyin_flags)) {
12606 new_size += entry_size;
12607 }
12608 continue;
12609 }
12610 old_entry = old_entry->vme_next;
12611 }
12612
12613 #if defined(__arm64__)
12614 pmap_insert_sharedpage(new_map->pmap);
12615 #endif
12616
12617 new_map->size = new_size;
12618
12619 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12620 vm_map_corpse_footprint_collect_done(new_map);
12621 }
12622
12623 vm_map_unlock(new_map);
12624 vm_map_unlock(old_map);
12625 vm_map_deallocate(old_map);
12626
12627 return new_map;
12628 }
12629
12630 /*
12631 * vm_map_exec:
12632 *
12633 * Setup the "new_map" with the proper execution environment according
12634 * to the type of executable (platform, 64bit, chroot environment).
12635 * Map the comm page and shared region, etc...
12636 */
12637 kern_return_t
12638 vm_map_exec(
12639 vm_map_t new_map,
12640 task_t task,
12641 boolean_t is64bit,
12642 void *fsroot,
12643 cpu_type_t cpu,
12644 cpu_subtype_t cpu_subtype)
12645 {
12646 SHARED_REGION_TRACE_DEBUG(
12647 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12648 (void *)VM_KERNEL_ADDRPERM(current_task()),
12649 (void *)VM_KERNEL_ADDRPERM(new_map),
12650 (void *)VM_KERNEL_ADDRPERM(task),
12651 (void *)VM_KERNEL_ADDRPERM(fsroot),
12652 cpu,
12653 cpu_subtype));
12654 (void) vm_commpage_enter(new_map, task, is64bit);
12655 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12656 SHARED_REGION_TRACE_DEBUG(
12657 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12658 (void *)VM_KERNEL_ADDRPERM(current_task()),
12659 (void *)VM_KERNEL_ADDRPERM(new_map),
12660 (void *)VM_KERNEL_ADDRPERM(task),
12661 (void *)VM_KERNEL_ADDRPERM(fsroot),
12662 cpu,
12663 cpu_subtype));
12664 return KERN_SUCCESS;
12665 }
12666
12667 /*
12668 * vm_map_lookup_locked:
12669 *
12670 * Finds the VM object, offset, and
12671 * protection for a given virtual address in the
12672 * specified map, assuming a page fault of the
12673 * type specified.
12674 *
12675 * Returns the (object, offset, protection) for
12676 * this address, whether it is wired down, and whether
12677 * this map has the only reference to the data in question.
12678 * In order to later verify this lookup, a "version"
12679 * is returned.
12680 *
12681 * The map MUST be locked by the caller and WILL be
12682 * locked on exit. In order to guarantee the
12683 * existence of the returned object, it is returned
12684 * locked.
12685 *
12686 * If a lookup is requested with "write protection"
12687 * specified, the map may be changed to perform virtual
12688 * copying operations, although the data referenced will
12689 * remain the same.
12690 */
12691 kern_return_t
12692 vm_map_lookup_locked(
12693 vm_map_t *var_map, /* IN/OUT */
12694 vm_map_offset_t vaddr,
12695 vm_prot_t fault_type,
12696 int object_lock_type,
12697 vm_map_version_t *out_version, /* OUT */
12698 vm_object_t *object, /* OUT */
12699 vm_object_offset_t *offset, /* OUT */
12700 vm_prot_t *out_prot, /* OUT */
12701 boolean_t *wired, /* OUT */
12702 vm_object_fault_info_t fault_info, /* OUT */
12703 vm_map_t *real_map)
12704 {
12705 vm_map_entry_t entry;
12706 vm_map_t map = *var_map;
12707 vm_map_t old_map = *var_map;
12708 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12709 vm_map_offset_t cow_parent_vaddr = 0;
12710 vm_map_offset_t old_start = 0;
12711 vm_map_offset_t old_end = 0;
12712 vm_prot_t prot;
12713 boolean_t mask_protections;
12714 boolean_t force_copy;
12715 vm_prot_t original_fault_type;
12716
12717 /*
12718 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12719 * as a mask against the mapping's actual protections, not as an
12720 * absolute value.
12721 */
12722 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12723 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12724 fault_type &= VM_PROT_ALL;
12725 original_fault_type = fault_type;
12726
12727 *real_map = map;
12728
12729 RetryLookup:
12730 fault_type = original_fault_type;
12731
12732 /*
12733 * If the map has an interesting hint, try it before calling
12734 * full blown lookup routine.
12735 */
12736 entry = map->hint;
12737
12738 if ((entry == vm_map_to_entry(map)) ||
12739 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12740 vm_map_entry_t tmp_entry;
12741
12742 /*
12743 * Entry was either not a valid hint, or the vaddr
12744 * was not contained in the entry, so do a full lookup.
12745 */
12746 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12747 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12748 vm_map_unlock(cow_sub_map_parent);
12749 }
12750 if ((*real_map != map)
12751 && (*real_map != cow_sub_map_parent)) {
12752 vm_map_unlock(*real_map);
12753 }
12754 return KERN_INVALID_ADDRESS;
12755 }
12756
12757 entry = tmp_entry;
12758 }
12759 if (map == old_map) {
12760 old_start = entry->vme_start;
12761 old_end = entry->vme_end;
12762 }
12763
12764 /*
12765 * Handle submaps. Drop lock on upper map, submap is
12766 * returned locked.
12767 */
12768
12769 submap_recurse:
12770 if (entry->is_sub_map) {
12771 vm_map_offset_t local_vaddr;
12772 vm_map_offset_t end_delta;
12773 vm_map_offset_t start_delta;
12774 vm_map_entry_t submap_entry;
12775 vm_prot_t subentry_protection;
12776 vm_prot_t subentry_max_protection;
12777 boolean_t subentry_no_copy_on_read;
12778 boolean_t mapped_needs_copy = FALSE;
12779
12780 local_vaddr = vaddr;
12781
12782 if ((entry->use_pmap &&
12783 !((fault_type & VM_PROT_WRITE) ||
12784 force_copy))) {
12785 /* if real_map equals map we unlock below */
12786 if ((*real_map != map) &&
12787 (*real_map != cow_sub_map_parent)) {
12788 vm_map_unlock(*real_map);
12789 }
12790 *real_map = VME_SUBMAP(entry);
12791 }
12792
12793 if (entry->needs_copy &&
12794 ((fault_type & VM_PROT_WRITE) ||
12795 force_copy)) {
12796 if (!mapped_needs_copy) {
12797 if (vm_map_lock_read_to_write(map)) {
12798 vm_map_lock_read(map);
12799 *real_map = map;
12800 goto RetryLookup;
12801 }
12802 vm_map_lock_read(VME_SUBMAP(entry));
12803 *var_map = VME_SUBMAP(entry);
12804 cow_sub_map_parent = map;
12805 /* reset base to map before cow object */
12806 /* this is the map which will accept */
12807 /* the new cow object */
12808 old_start = entry->vme_start;
12809 old_end = entry->vme_end;
12810 cow_parent_vaddr = vaddr;
12811 mapped_needs_copy = TRUE;
12812 } else {
12813 vm_map_lock_read(VME_SUBMAP(entry));
12814 *var_map = VME_SUBMAP(entry);
12815 if ((cow_sub_map_parent != map) &&
12816 (*real_map != map)) {
12817 vm_map_unlock(map);
12818 }
12819 }
12820 } else {
12821 vm_map_lock_read(VME_SUBMAP(entry));
12822 *var_map = VME_SUBMAP(entry);
12823 /* leave map locked if it is a target */
12824 /* cow sub_map above otherwise, just */
12825 /* follow the maps down to the object */
12826 /* here we unlock knowing we are not */
12827 /* revisiting the map. */
12828 if ((*real_map != map) && (map != cow_sub_map_parent)) {
12829 vm_map_unlock_read(map);
12830 }
12831 }
12832
12833 map = *var_map;
12834
12835 /* calculate the offset in the submap for vaddr */
12836 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12837
12838 RetrySubMap:
12839 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12840 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12841 vm_map_unlock(cow_sub_map_parent);
12842 }
12843 if ((*real_map != map)
12844 && (*real_map != cow_sub_map_parent)) {
12845 vm_map_unlock(*real_map);
12846 }
12847 *real_map = map;
12848 return KERN_INVALID_ADDRESS;
12849 }
12850
12851 /* find the attenuated shadow of the underlying object */
12852 /* on our target map */
12853
12854 /* in english the submap object may extend beyond the */
12855 /* region mapped by the entry or, may only fill a portion */
12856 /* of it. For our purposes, we only care if the object */
12857 /* doesn't fill. In this case the area which will */
12858 /* ultimately be clipped in the top map will only need */
12859 /* to be as big as the portion of the underlying entry */
12860 /* which is mapped */
12861 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12862 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12863
12864 end_delta =
12865 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12866 submap_entry->vme_end ?
12867 0 : (VME_OFFSET(entry) +
12868 (old_end - old_start))
12869 - submap_entry->vme_end;
12870
12871 old_start += start_delta;
12872 old_end -= end_delta;
12873
12874 if (submap_entry->is_sub_map) {
12875 entry = submap_entry;
12876 vaddr = local_vaddr;
12877 goto submap_recurse;
12878 }
12879
12880 if (((fault_type & VM_PROT_WRITE) ||
12881 force_copy)
12882 && cow_sub_map_parent) {
12883 vm_object_t sub_object, copy_object;
12884 vm_object_offset_t copy_offset;
12885 vm_map_offset_t local_start;
12886 vm_map_offset_t local_end;
12887 boolean_t copied_slowly = FALSE;
12888
12889 if (vm_map_lock_read_to_write(map)) {
12890 vm_map_lock_read(map);
12891 old_start -= start_delta;
12892 old_end += end_delta;
12893 goto RetrySubMap;
12894 }
12895
12896
12897 sub_object = VME_OBJECT(submap_entry);
12898 if (sub_object == VM_OBJECT_NULL) {
12899 sub_object =
12900 vm_object_allocate(
12901 (vm_map_size_t)
12902 (submap_entry->vme_end -
12903 submap_entry->vme_start));
12904 VME_OBJECT_SET(submap_entry, sub_object);
12905 VME_OFFSET_SET(submap_entry, 0);
12906 assert(!submap_entry->is_sub_map);
12907 assert(submap_entry->use_pmap);
12908 }
12909 local_start = local_vaddr -
12910 (cow_parent_vaddr - old_start);
12911 local_end = local_vaddr +
12912 (old_end - cow_parent_vaddr);
12913 vm_map_clip_start(map, submap_entry, local_start);
12914 vm_map_clip_end(map, submap_entry, local_end);
12915 if (submap_entry->is_sub_map) {
12916 /* unnesting was done when clipping */
12917 assert(!submap_entry->use_pmap);
12918 }
12919
12920 /* This is the COW case, lets connect */
12921 /* an entry in our space to the underlying */
12922 /* object in the submap, bypassing the */
12923 /* submap. */
12924
12925
12926 if (submap_entry->wired_count != 0 ||
12927 (sub_object->copy_strategy ==
12928 MEMORY_OBJECT_COPY_NONE)) {
12929 vm_object_lock(sub_object);
12930 vm_object_copy_slowly(sub_object,
12931 VME_OFFSET(submap_entry),
12932 (submap_entry->vme_end -
12933 submap_entry->vme_start),
12934 FALSE,
12935 &copy_object);
12936 copied_slowly = TRUE;
12937 } else {
12938 /* set up shadow object */
12939 copy_object = sub_object;
12940 vm_object_lock(sub_object);
12941 vm_object_reference_locked(sub_object);
12942 sub_object->shadowed = TRUE;
12943 vm_object_unlock(sub_object);
12944
12945 assert(submap_entry->wired_count == 0);
12946 submap_entry->needs_copy = TRUE;
12947
12948 prot = submap_entry->protection;
12949 assert(!pmap_has_prot_policy(prot));
12950 prot = prot & ~VM_PROT_WRITE;
12951 assert(!pmap_has_prot_policy(prot));
12952
12953 if (override_nx(old_map,
12954 VME_ALIAS(submap_entry))
12955 && prot) {
12956 prot |= VM_PROT_EXECUTE;
12957 }
12958
12959 vm_object_pmap_protect(
12960 sub_object,
12961 VME_OFFSET(submap_entry),
12962 submap_entry->vme_end -
12963 submap_entry->vme_start,
12964 (submap_entry->is_shared
12965 || map->mapped_in_other_pmaps) ?
12966 PMAP_NULL : map->pmap,
12967 submap_entry->vme_start,
12968 prot);
12969 }
12970
12971 /*
12972 * Adjust the fault offset to the submap entry.
12973 */
12974 copy_offset = (local_vaddr -
12975 submap_entry->vme_start +
12976 VME_OFFSET(submap_entry));
12977
12978 /* This works diffently than the */
12979 /* normal submap case. We go back */
12980 /* to the parent of the cow map and*/
12981 /* clip out the target portion of */
12982 /* the sub_map, substituting the */
12983 /* new copy object, */
12984
12985 subentry_protection = submap_entry->protection;
12986 subentry_max_protection = submap_entry->max_protection;
12987 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
12988 vm_map_unlock(map);
12989 submap_entry = NULL; /* not valid after map unlock */
12990
12991 local_start = old_start;
12992 local_end = old_end;
12993 map = cow_sub_map_parent;
12994 *var_map = cow_sub_map_parent;
12995 vaddr = cow_parent_vaddr;
12996 cow_sub_map_parent = NULL;
12997
12998 if (!vm_map_lookup_entry(map,
12999 vaddr, &entry)) {
13000 vm_object_deallocate(
13001 copy_object);
13002 vm_map_lock_write_to_read(map);
13003 return KERN_INVALID_ADDRESS;
13004 }
13005
13006 /* clip out the portion of space */
13007 /* mapped by the sub map which */
13008 /* corresponds to the underlying */
13009 /* object */
13010
13011 /*
13012 * Clip (and unnest) the smallest nested chunk
13013 * possible around the faulting address...
13014 */
13015 local_start = vaddr & ~(pmap_nesting_size_min - 1);
13016 local_end = local_start + pmap_nesting_size_min;
13017 /*
13018 * ... but don't go beyond the "old_start" to "old_end"
13019 * range, to avoid spanning over another VM region
13020 * with a possibly different VM object and/or offset.
13021 */
13022 if (local_start < old_start) {
13023 local_start = old_start;
13024 }
13025 if (local_end > old_end) {
13026 local_end = old_end;
13027 }
13028 /*
13029 * Adjust copy_offset to the start of the range.
13030 */
13031 copy_offset -= (vaddr - local_start);
13032
13033 vm_map_clip_start(map, entry, local_start);
13034 vm_map_clip_end(map, entry, local_end);
13035 if (entry->is_sub_map) {
13036 /* unnesting was done when clipping */
13037 assert(!entry->use_pmap);
13038 }
13039
13040 /* substitute copy object for */
13041 /* shared map entry */
13042 vm_map_deallocate(VME_SUBMAP(entry));
13043 assert(!entry->iokit_acct);
13044 entry->is_sub_map = FALSE;
13045 entry->use_pmap = TRUE;
13046 VME_OBJECT_SET(entry, copy_object);
13047
13048 /* propagate the submap entry's protections */
13049 if (entry->protection != VM_PROT_READ) {
13050 /*
13051 * Someone has already altered the top entry's
13052 * protections via vm_protect(VM_PROT_COPY).
13053 * Respect these new values and ignore the
13054 * submap entry's protections.
13055 */
13056 } else {
13057 /*
13058 * Regular copy-on-write: propagate the submap
13059 * entry's protections to the top map entry.
13060 */
13061 entry->protection |= subentry_protection;
13062 }
13063 entry->max_protection |= subentry_max_protection;
13064 /* propagate no_copy_on_read */
13065 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13066
13067 if ((entry->protection & VM_PROT_WRITE) &&
13068 (entry->protection & VM_PROT_EXECUTE) &&
13069 #if !CONFIG_EMBEDDED
13070 map != kernel_map &&
13071 cs_process_enforcement(NULL) &&
13072 #endif /* !CONFIG_EMBEDDED */
13073 !(entry->used_for_jit)) {
13074 DTRACE_VM3(cs_wx,
13075 uint64_t, (uint64_t)entry->vme_start,
13076 uint64_t, (uint64_t)entry->vme_end,
13077 vm_prot_t, entry->protection);
13078 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13079 proc_selfpid(),
13080 (current_task()->bsd_info
13081 ? proc_name_address(current_task()->bsd_info)
13082 : "?"),
13083 __FUNCTION__);
13084 entry->protection &= ~VM_PROT_EXECUTE;
13085 }
13086
13087 if (copied_slowly) {
13088 VME_OFFSET_SET(entry, local_start - old_start);
13089 entry->needs_copy = FALSE;
13090 entry->is_shared = FALSE;
13091 } else {
13092 VME_OFFSET_SET(entry, copy_offset);
13093 assert(entry->wired_count == 0);
13094 entry->needs_copy = TRUE;
13095 if (entry->inheritance == VM_INHERIT_SHARE) {
13096 entry->inheritance = VM_INHERIT_COPY;
13097 }
13098 if (map != old_map) {
13099 entry->is_shared = TRUE;
13100 }
13101 }
13102 if (entry->inheritance == VM_INHERIT_SHARE) {
13103 entry->inheritance = VM_INHERIT_COPY;
13104 }
13105
13106 vm_map_lock_write_to_read(map);
13107 } else {
13108 if ((cow_sub_map_parent)
13109 && (cow_sub_map_parent != *real_map)
13110 && (cow_sub_map_parent != map)) {
13111 vm_map_unlock(cow_sub_map_parent);
13112 }
13113 entry = submap_entry;
13114 vaddr = local_vaddr;
13115 }
13116 }
13117
13118 /*
13119 * Check whether this task is allowed to have
13120 * this page.
13121 */
13122
13123 prot = entry->protection;
13124
13125 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13126 /*
13127 * HACK -- if not a stack, then allow execution
13128 */
13129 prot |= VM_PROT_EXECUTE;
13130 }
13131
13132 if (mask_protections) {
13133 fault_type &= prot;
13134 if (fault_type == VM_PROT_NONE) {
13135 goto protection_failure;
13136 }
13137 }
13138 if (((fault_type & prot) != fault_type)
13139 #if __arm64__
13140 /* prefetch abort in execute-only page */
13141 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13142 #endif
13143 ) {
13144 protection_failure:
13145 if (*real_map != map) {
13146 vm_map_unlock(*real_map);
13147 }
13148 *real_map = map;
13149
13150 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13151 log_stack_execution_failure((addr64_t)vaddr, prot);
13152 }
13153
13154 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13155 return KERN_PROTECTION_FAILURE;
13156 }
13157
13158 /*
13159 * If this page is not pageable, we have to get
13160 * it for all possible accesses.
13161 */
13162
13163 *wired = (entry->wired_count != 0);
13164 if (*wired) {
13165 fault_type = prot;
13166 }
13167
13168 /*
13169 * If the entry was copy-on-write, we either ...
13170 */
13171
13172 if (entry->needs_copy) {
13173 /*
13174 * If we want to write the page, we may as well
13175 * handle that now since we've got the map locked.
13176 *
13177 * If we don't need to write the page, we just
13178 * demote the permissions allowed.
13179 */
13180
13181 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13182 /*
13183 * Make a new object, and place it in the
13184 * object chain. Note that no new references
13185 * have appeared -- one just moved from the
13186 * map to the new object.
13187 */
13188
13189 if (vm_map_lock_read_to_write(map)) {
13190 vm_map_lock_read(map);
13191 goto RetryLookup;
13192 }
13193
13194 if (VME_OBJECT(entry)->shadowed == FALSE) {
13195 vm_object_lock(VME_OBJECT(entry));
13196 VME_OBJECT(entry)->shadowed = TRUE;
13197 vm_object_unlock(VME_OBJECT(entry));
13198 }
13199 VME_OBJECT_SHADOW(entry,
13200 (vm_map_size_t) (entry->vme_end -
13201 entry->vme_start));
13202 entry->needs_copy = FALSE;
13203
13204 vm_map_lock_write_to_read(map);
13205 }
13206 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13207 /*
13208 * We're attempting to read a copy-on-write
13209 * page -- don't allow writes.
13210 */
13211
13212 prot &= (~VM_PROT_WRITE);
13213 }
13214 }
13215
13216 /*
13217 * Create an object if necessary.
13218 */
13219 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13220 if (vm_map_lock_read_to_write(map)) {
13221 vm_map_lock_read(map);
13222 goto RetryLookup;
13223 }
13224
13225 VME_OBJECT_SET(entry,
13226 vm_object_allocate(
13227 (vm_map_size_t)(entry->vme_end -
13228 entry->vme_start)));
13229 VME_OFFSET_SET(entry, 0);
13230 assert(entry->use_pmap);
13231 vm_map_lock_write_to_read(map);
13232 }
13233
13234 /*
13235 * Return the object/offset from this entry. If the entry
13236 * was copy-on-write or empty, it has been fixed up. Also
13237 * return the protection.
13238 */
13239
13240 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13241 *object = VME_OBJECT(entry);
13242 *out_prot = prot;
13243 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
13244
13245 if (fault_info) {
13246 fault_info->interruptible = THREAD_UNINT; /* for now... */
13247 /* ... the caller will change "interruptible" if needed */
13248 fault_info->cluster_size = 0;
13249 fault_info->user_tag = VME_ALIAS(entry);
13250 fault_info->pmap_options = 0;
13251 if (entry->iokit_acct ||
13252 (!entry->is_sub_map && !entry->use_pmap)) {
13253 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13254 }
13255 fault_info->behavior = entry->behavior;
13256 fault_info->lo_offset = VME_OFFSET(entry);
13257 fault_info->hi_offset =
13258 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13259 fault_info->no_cache = entry->no_cache;
13260 fault_info->stealth = FALSE;
13261 fault_info->io_sync = FALSE;
13262 if (entry->used_for_jit ||
13263 entry->vme_resilient_codesign) {
13264 fault_info->cs_bypass = TRUE;
13265 } else {
13266 fault_info->cs_bypass = FALSE;
13267 }
13268 fault_info->pmap_cs_associated = FALSE;
13269 #if CONFIG_PMAP_CS
13270 if (entry->pmap_cs_associated) {
13271 /*
13272 * The pmap layer will validate this page
13273 * before allowing it to be executed from.
13274 */
13275 fault_info->pmap_cs_associated = TRUE;
13276 }
13277 #endif /* CONFIG_PMAP_CS */
13278 fault_info->mark_zf_absent = FALSE;
13279 fault_info->batch_pmap_op = FALSE;
13280 fault_info->resilient_media = entry->vme_resilient_media;
13281 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13282 }
13283
13284 /*
13285 * Lock the object to prevent it from disappearing
13286 */
13287 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13288 vm_object_lock(*object);
13289 } else {
13290 vm_object_lock_shared(*object);
13291 }
13292
13293 /*
13294 * Save the version number
13295 */
13296
13297 out_version->main_timestamp = map->timestamp;
13298
13299 return KERN_SUCCESS;
13300 }
13301
13302
13303 /*
13304 * vm_map_verify:
13305 *
13306 * Verifies that the map in question has not changed
13307 * since the given version. The map has to be locked
13308 * ("shared" mode is fine) before calling this function
13309 * and it will be returned locked too.
13310 */
13311 boolean_t
13312 vm_map_verify(
13313 vm_map_t map,
13314 vm_map_version_t *version) /* REF */
13315 {
13316 boolean_t result;
13317
13318 vm_map_lock_assert_held(map);
13319 result = (map->timestamp == version->main_timestamp);
13320
13321 return result;
13322 }
13323
13324 /*
13325 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13326 * Goes away after regular vm_region_recurse function migrates to
13327 * 64 bits
13328 * vm_region_recurse: A form of vm_region which follows the
13329 * submaps in a target map
13330 *
13331 */
13332
13333 kern_return_t
13334 vm_map_region_recurse_64(
13335 vm_map_t map,
13336 vm_map_offset_t *address, /* IN/OUT */
13337 vm_map_size_t *size, /* OUT */
13338 natural_t *nesting_depth, /* IN/OUT */
13339 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13340 mach_msg_type_number_t *count) /* IN/OUT */
13341 {
13342 mach_msg_type_number_t original_count;
13343 vm_region_extended_info_data_t extended;
13344 vm_map_entry_t tmp_entry;
13345 vm_map_offset_t user_address;
13346 unsigned int user_max_depth;
13347
13348 /*
13349 * "curr_entry" is the VM map entry preceding or including the
13350 * address we're looking for.
13351 * "curr_map" is the map or sub-map containing "curr_entry".
13352 * "curr_address" is the equivalent of the top map's "user_address"
13353 * in the current map.
13354 * "curr_offset" is the cumulated offset of "curr_map" in the
13355 * target task's address space.
13356 * "curr_depth" is the depth of "curr_map" in the chain of
13357 * sub-maps.
13358 *
13359 * "curr_max_below" and "curr_max_above" limit the range (around
13360 * "curr_address") we should take into account in the current (sub)map.
13361 * They limit the range to what's visible through the map entries
13362 * we've traversed from the top map to the current map.
13363 *
13364 */
13365 vm_map_entry_t curr_entry;
13366 vm_map_address_t curr_address;
13367 vm_map_offset_t curr_offset;
13368 vm_map_t curr_map;
13369 unsigned int curr_depth;
13370 vm_map_offset_t curr_max_below, curr_max_above;
13371 vm_map_offset_t curr_skip;
13372
13373 /*
13374 * "next_" is the same as "curr_" but for the VM region immediately
13375 * after the address we're looking for. We need to keep track of this
13376 * too because we want to return info about that region if the
13377 * address we're looking for is not mapped.
13378 */
13379 vm_map_entry_t next_entry;
13380 vm_map_offset_t next_offset;
13381 vm_map_offset_t next_address;
13382 vm_map_t next_map;
13383 unsigned int next_depth;
13384 vm_map_offset_t next_max_below, next_max_above;
13385 vm_map_offset_t next_skip;
13386
13387 boolean_t look_for_pages;
13388 vm_region_submap_short_info_64_t short_info;
13389 boolean_t do_region_footprint;
13390
13391 if (map == VM_MAP_NULL) {
13392 /* no address space to work on */
13393 return KERN_INVALID_ARGUMENT;
13394 }
13395
13396
13397 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13398 /*
13399 * "info" structure is not big enough and
13400 * would overflow
13401 */
13402 return KERN_INVALID_ARGUMENT;
13403 }
13404
13405 do_region_footprint = task_self_region_footprint();
13406 original_count = *count;
13407
13408 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13409 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13410 look_for_pages = FALSE;
13411 short_info = (vm_region_submap_short_info_64_t) submap_info;
13412 submap_info = NULL;
13413 } else {
13414 look_for_pages = TRUE;
13415 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13416 short_info = NULL;
13417
13418 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13419 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13420 }
13421 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13422 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13423 }
13424 }
13425
13426 user_address = *address;
13427 user_max_depth = *nesting_depth;
13428
13429 if (not_in_kdp) {
13430 vm_map_lock_read(map);
13431 }
13432
13433 recurse_again:
13434 curr_entry = NULL;
13435 curr_map = map;
13436 curr_address = user_address;
13437 curr_offset = 0;
13438 curr_skip = 0;
13439 curr_depth = 0;
13440 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13441 curr_max_below = curr_address;
13442
13443 next_entry = NULL;
13444 next_map = NULL;
13445 next_address = 0;
13446 next_offset = 0;
13447 next_skip = 0;
13448 next_depth = 0;
13449 next_max_above = (vm_map_offset_t) -1;
13450 next_max_below = (vm_map_offset_t) -1;
13451
13452 for (;;) {
13453 if (vm_map_lookup_entry(curr_map,
13454 curr_address,
13455 &tmp_entry)) {
13456 /* tmp_entry contains the address we're looking for */
13457 curr_entry = tmp_entry;
13458 } else {
13459 vm_map_offset_t skip;
13460 /*
13461 * The address is not mapped. "tmp_entry" is the
13462 * map entry preceding the address. We want the next
13463 * one, if it exists.
13464 */
13465 curr_entry = tmp_entry->vme_next;
13466
13467 if (curr_entry == vm_map_to_entry(curr_map) ||
13468 (curr_entry->vme_start >=
13469 curr_address + curr_max_above)) {
13470 /* no next entry at this level: stop looking */
13471 if (not_in_kdp) {
13472 vm_map_unlock_read(curr_map);
13473 }
13474 curr_entry = NULL;
13475 curr_map = NULL;
13476 curr_skip = 0;
13477 curr_offset = 0;
13478 curr_depth = 0;
13479 curr_max_above = 0;
13480 curr_max_below = 0;
13481 break;
13482 }
13483
13484 /* adjust current address and offset */
13485 skip = curr_entry->vme_start - curr_address;
13486 curr_address = curr_entry->vme_start;
13487 curr_skip += skip;
13488 curr_offset += skip;
13489 curr_max_above -= skip;
13490 curr_max_below = 0;
13491 }
13492
13493 /*
13494 * Is the next entry at this level closer to the address (or
13495 * deeper in the submap chain) than the one we had
13496 * so far ?
13497 */
13498 tmp_entry = curr_entry->vme_next;
13499 if (tmp_entry == vm_map_to_entry(curr_map)) {
13500 /* no next entry at this level */
13501 } else if (tmp_entry->vme_start >=
13502 curr_address + curr_max_above) {
13503 /*
13504 * tmp_entry is beyond the scope of what we mapped of
13505 * this submap in the upper level: ignore it.
13506 */
13507 } else if ((next_entry == NULL) ||
13508 (tmp_entry->vme_start + curr_offset <=
13509 next_entry->vme_start + next_offset)) {
13510 /*
13511 * We didn't have a "next_entry" or this one is
13512 * closer to the address we're looking for:
13513 * use this "tmp_entry" as the new "next_entry".
13514 */
13515 if (next_entry != NULL) {
13516 /* unlock the last "next_map" */
13517 if (next_map != curr_map && not_in_kdp) {
13518 vm_map_unlock_read(next_map);
13519 }
13520 }
13521 next_entry = tmp_entry;
13522 next_map = curr_map;
13523 next_depth = curr_depth;
13524 next_address = next_entry->vme_start;
13525 next_skip = curr_skip;
13526 next_skip += (next_address - curr_address);
13527 next_offset = curr_offset;
13528 next_offset += (next_address - curr_address);
13529 next_max_above = MIN(next_max_above, curr_max_above);
13530 next_max_above = MIN(next_max_above,
13531 next_entry->vme_end - next_address);
13532 next_max_below = MIN(next_max_below, curr_max_below);
13533 next_max_below = MIN(next_max_below,
13534 next_address - next_entry->vme_start);
13535 }
13536
13537 /*
13538 * "curr_max_{above,below}" allow us to keep track of the
13539 * portion of the submap that is actually mapped at this level:
13540 * the rest of that submap is irrelevant to us, since it's not
13541 * mapped here.
13542 * The relevant portion of the map starts at
13543 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13544 */
13545 curr_max_above = MIN(curr_max_above,
13546 curr_entry->vme_end - curr_address);
13547 curr_max_below = MIN(curr_max_below,
13548 curr_address - curr_entry->vme_start);
13549
13550 if (!curr_entry->is_sub_map ||
13551 curr_depth >= user_max_depth) {
13552 /*
13553 * We hit a leaf map or we reached the maximum depth
13554 * we could, so stop looking. Keep the current map
13555 * locked.
13556 */
13557 break;
13558 }
13559
13560 /*
13561 * Get down to the next submap level.
13562 */
13563
13564 /*
13565 * Lock the next level and unlock the current level,
13566 * unless we need to keep it locked to access the "next_entry"
13567 * later.
13568 */
13569 if (not_in_kdp) {
13570 vm_map_lock_read(VME_SUBMAP(curr_entry));
13571 }
13572 if (curr_map == next_map) {
13573 /* keep "next_map" locked in case we need it */
13574 } else {
13575 /* release this map */
13576 if (not_in_kdp) {
13577 vm_map_unlock_read(curr_map);
13578 }
13579 }
13580
13581 /*
13582 * Adjust the offset. "curr_entry" maps the submap
13583 * at relative address "curr_entry->vme_start" in the
13584 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13585 * bytes of the submap.
13586 * "curr_offset" always represents the offset of a virtual
13587 * address in the curr_map relative to the absolute address
13588 * space (i.e. the top-level VM map).
13589 */
13590 curr_offset +=
13591 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13592 curr_address = user_address + curr_offset;
13593 /* switch to the submap */
13594 curr_map = VME_SUBMAP(curr_entry);
13595 curr_depth++;
13596 curr_entry = NULL;
13597 }
13598
13599 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13600 // so probably should be a real 32b ID vs. ptr.
13601 // Current users just check for equality
13602
13603 if (curr_entry == NULL) {
13604 /* no VM region contains the address... */
13605
13606 if (do_region_footprint && /* we want footprint numbers */
13607 next_entry == NULL && /* & there are no more regions */
13608 /* & we haven't already provided our fake region: */
13609 user_address <= vm_map_last_entry(map)->vme_end) {
13610 ledger_amount_t ledger_resident, ledger_compressed;
13611
13612 /*
13613 * Add a fake memory region to account for
13614 * purgeable and/or ledger-tagged memory that
13615 * counts towards this task's memory footprint,
13616 * i.e. the resident/compressed pages of non-volatile
13617 * objects owned by that task.
13618 */
13619 task_ledgers_footprint(map->pmap->ledger,
13620 &ledger_resident,
13621 &ledger_compressed);
13622 if (ledger_resident + ledger_compressed == 0) {
13623 /* no purgeable memory usage to report */
13624 return KERN_INVALID_ADDRESS;
13625 }
13626 /* fake region to show nonvolatile footprint */
13627 if (look_for_pages) {
13628 submap_info->protection = VM_PROT_DEFAULT;
13629 submap_info->max_protection = VM_PROT_DEFAULT;
13630 submap_info->inheritance = VM_INHERIT_DEFAULT;
13631 submap_info->offset = 0;
13632 submap_info->user_tag = -1;
13633 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13634 submap_info->pages_shared_now_private = 0;
13635 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13636 submap_info->pages_dirtied = submap_info->pages_resident;
13637 submap_info->ref_count = 1;
13638 submap_info->shadow_depth = 0;
13639 submap_info->external_pager = 0;
13640 submap_info->share_mode = SM_PRIVATE;
13641 submap_info->is_submap = 0;
13642 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13643 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13644 submap_info->user_wired_count = 0;
13645 submap_info->pages_reusable = 0;
13646 } else {
13647 short_info->user_tag = -1;
13648 short_info->offset = 0;
13649 short_info->protection = VM_PROT_DEFAULT;
13650 short_info->inheritance = VM_INHERIT_DEFAULT;
13651 short_info->max_protection = VM_PROT_DEFAULT;
13652 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13653 short_info->user_wired_count = 0;
13654 short_info->is_submap = 0;
13655 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13656 short_info->external_pager = 0;
13657 short_info->shadow_depth = 0;
13658 short_info->share_mode = SM_PRIVATE;
13659 short_info->ref_count = 1;
13660 }
13661 *nesting_depth = 0;
13662 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13663 // *address = user_address;
13664 *address = vm_map_last_entry(map)->vme_end;
13665 return KERN_SUCCESS;
13666 }
13667
13668 if (next_entry == NULL) {
13669 /* ... and no VM region follows it either */
13670 return KERN_INVALID_ADDRESS;
13671 }
13672 /* ... gather info about the next VM region */
13673 curr_entry = next_entry;
13674 curr_map = next_map; /* still locked ... */
13675 curr_address = next_address;
13676 curr_skip = next_skip;
13677 curr_offset = next_offset;
13678 curr_depth = next_depth;
13679 curr_max_above = next_max_above;
13680 curr_max_below = next_max_below;
13681 } else {
13682 /* we won't need "next_entry" after all */
13683 if (next_entry != NULL) {
13684 /* release "next_map" */
13685 if (next_map != curr_map && not_in_kdp) {
13686 vm_map_unlock_read(next_map);
13687 }
13688 }
13689 }
13690 next_entry = NULL;
13691 next_map = NULL;
13692 next_offset = 0;
13693 next_skip = 0;
13694 next_depth = 0;
13695 next_max_below = -1;
13696 next_max_above = -1;
13697
13698 if (curr_entry->is_sub_map &&
13699 curr_depth < user_max_depth) {
13700 /*
13701 * We're not as deep as we could be: we must have
13702 * gone back up after not finding anything mapped
13703 * below the original top-level map entry's.
13704 * Let's move "curr_address" forward and recurse again.
13705 */
13706 user_address = curr_address;
13707 goto recurse_again;
13708 }
13709
13710 *nesting_depth = curr_depth;
13711 *size = curr_max_above + curr_max_below;
13712 *address = user_address + curr_skip - curr_max_below;
13713
13714 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13715 // so probably should be a real 32b ID vs. ptr.
13716 // Current users just check for equality
13717 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13718
13719 if (look_for_pages) {
13720 submap_info->user_tag = VME_ALIAS(curr_entry);
13721 submap_info->offset = VME_OFFSET(curr_entry);
13722 submap_info->protection = curr_entry->protection;
13723 submap_info->inheritance = curr_entry->inheritance;
13724 submap_info->max_protection = curr_entry->max_protection;
13725 submap_info->behavior = curr_entry->behavior;
13726 submap_info->user_wired_count = curr_entry->user_wired_count;
13727 submap_info->is_submap = curr_entry->is_sub_map;
13728 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13729 } else {
13730 short_info->user_tag = VME_ALIAS(curr_entry);
13731 short_info->offset = VME_OFFSET(curr_entry);
13732 short_info->protection = curr_entry->protection;
13733 short_info->inheritance = curr_entry->inheritance;
13734 short_info->max_protection = curr_entry->max_protection;
13735 short_info->behavior = curr_entry->behavior;
13736 short_info->user_wired_count = curr_entry->user_wired_count;
13737 short_info->is_submap = curr_entry->is_sub_map;
13738 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13739 }
13740
13741 extended.pages_resident = 0;
13742 extended.pages_swapped_out = 0;
13743 extended.pages_shared_now_private = 0;
13744 extended.pages_dirtied = 0;
13745 extended.pages_reusable = 0;
13746 extended.external_pager = 0;
13747 extended.shadow_depth = 0;
13748 extended.share_mode = SM_EMPTY;
13749 extended.ref_count = 0;
13750
13751 if (not_in_kdp) {
13752 if (!curr_entry->is_sub_map) {
13753 vm_map_offset_t range_start, range_end;
13754 range_start = MAX((curr_address - curr_max_below),
13755 curr_entry->vme_start);
13756 range_end = MIN((curr_address + curr_max_above),
13757 curr_entry->vme_end);
13758 vm_map_region_walk(curr_map,
13759 range_start,
13760 curr_entry,
13761 (VME_OFFSET(curr_entry) +
13762 (range_start -
13763 curr_entry->vme_start)),
13764 range_end - range_start,
13765 &extended,
13766 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13767 if (extended.external_pager &&
13768 extended.ref_count == 2 &&
13769 extended.share_mode == SM_SHARED) {
13770 extended.share_mode = SM_PRIVATE;
13771 }
13772 } else {
13773 if (curr_entry->use_pmap) {
13774 extended.share_mode = SM_TRUESHARED;
13775 } else {
13776 extended.share_mode = SM_PRIVATE;
13777 }
13778 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13779 }
13780 }
13781
13782 if (look_for_pages) {
13783 submap_info->pages_resident = extended.pages_resident;
13784 submap_info->pages_swapped_out = extended.pages_swapped_out;
13785 submap_info->pages_shared_now_private =
13786 extended.pages_shared_now_private;
13787 submap_info->pages_dirtied = extended.pages_dirtied;
13788 submap_info->external_pager = extended.external_pager;
13789 submap_info->shadow_depth = extended.shadow_depth;
13790 submap_info->share_mode = extended.share_mode;
13791 submap_info->ref_count = extended.ref_count;
13792
13793 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13794 submap_info->pages_reusable = extended.pages_reusable;
13795 }
13796 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13797 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13798 }
13799 } else {
13800 short_info->external_pager = extended.external_pager;
13801 short_info->shadow_depth = extended.shadow_depth;
13802 short_info->share_mode = extended.share_mode;
13803 short_info->ref_count = extended.ref_count;
13804 }
13805
13806 if (not_in_kdp) {
13807 vm_map_unlock_read(curr_map);
13808 }
13809
13810 return KERN_SUCCESS;
13811 }
13812
13813 /*
13814 * vm_region:
13815 *
13816 * User call to obtain information about a region in
13817 * a task's address map. Currently, only one flavor is
13818 * supported.
13819 *
13820 * XXX The reserved and behavior fields cannot be filled
13821 * in until the vm merge from the IK is completed, and
13822 * vm_reserve is implemented.
13823 */
13824
13825 kern_return_t
13826 vm_map_region(
13827 vm_map_t map,
13828 vm_map_offset_t *address, /* IN/OUT */
13829 vm_map_size_t *size, /* OUT */
13830 vm_region_flavor_t flavor, /* IN */
13831 vm_region_info_t info, /* OUT */
13832 mach_msg_type_number_t *count, /* IN/OUT */
13833 mach_port_t *object_name) /* OUT */
13834 {
13835 vm_map_entry_t tmp_entry;
13836 vm_map_entry_t entry;
13837 vm_map_offset_t start;
13838
13839 if (map == VM_MAP_NULL) {
13840 return KERN_INVALID_ARGUMENT;
13841 }
13842
13843 switch (flavor) {
13844 case VM_REGION_BASIC_INFO:
13845 /* legacy for old 32-bit objects info */
13846 {
13847 vm_region_basic_info_t basic;
13848
13849 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13850 return KERN_INVALID_ARGUMENT;
13851 }
13852
13853 basic = (vm_region_basic_info_t) info;
13854 *count = VM_REGION_BASIC_INFO_COUNT;
13855
13856 vm_map_lock_read(map);
13857
13858 start = *address;
13859 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13860 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13861 vm_map_unlock_read(map);
13862 return KERN_INVALID_ADDRESS;
13863 }
13864 } else {
13865 entry = tmp_entry;
13866 }
13867
13868 start = entry->vme_start;
13869
13870 basic->offset = (uint32_t)VME_OFFSET(entry);
13871 basic->protection = entry->protection;
13872 basic->inheritance = entry->inheritance;
13873 basic->max_protection = entry->max_protection;
13874 basic->behavior = entry->behavior;
13875 basic->user_wired_count = entry->user_wired_count;
13876 basic->reserved = entry->is_sub_map;
13877 *address = start;
13878 *size = (entry->vme_end - start);
13879
13880 if (object_name) {
13881 *object_name = IP_NULL;
13882 }
13883 if (entry->is_sub_map) {
13884 basic->shared = FALSE;
13885 } else {
13886 basic->shared = entry->is_shared;
13887 }
13888
13889 vm_map_unlock_read(map);
13890 return KERN_SUCCESS;
13891 }
13892
13893 case VM_REGION_BASIC_INFO_64:
13894 {
13895 vm_region_basic_info_64_t basic;
13896
13897 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13898 return KERN_INVALID_ARGUMENT;
13899 }
13900
13901 basic = (vm_region_basic_info_64_t) info;
13902 *count = VM_REGION_BASIC_INFO_COUNT_64;
13903
13904 vm_map_lock_read(map);
13905
13906 start = *address;
13907 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13908 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13909 vm_map_unlock_read(map);
13910 return KERN_INVALID_ADDRESS;
13911 }
13912 } else {
13913 entry = tmp_entry;
13914 }
13915
13916 start = entry->vme_start;
13917
13918 basic->offset = VME_OFFSET(entry);
13919 basic->protection = entry->protection;
13920 basic->inheritance = entry->inheritance;
13921 basic->max_protection = entry->max_protection;
13922 basic->behavior = entry->behavior;
13923 basic->user_wired_count = entry->user_wired_count;
13924 basic->reserved = entry->is_sub_map;
13925 *address = start;
13926 *size = (entry->vme_end - start);
13927
13928 if (object_name) {
13929 *object_name = IP_NULL;
13930 }
13931 if (entry->is_sub_map) {
13932 basic->shared = FALSE;
13933 } else {
13934 basic->shared = entry->is_shared;
13935 }
13936
13937 vm_map_unlock_read(map);
13938 return KERN_SUCCESS;
13939 }
13940 case VM_REGION_EXTENDED_INFO:
13941 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13942 return KERN_INVALID_ARGUMENT;
13943 }
13944 /*fallthru*/
13945 case VM_REGION_EXTENDED_INFO__legacy:
13946 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13947 return KERN_INVALID_ARGUMENT;
13948 }
13949
13950 {
13951 vm_region_extended_info_t extended;
13952 mach_msg_type_number_t original_count;
13953
13954 extended = (vm_region_extended_info_t) info;
13955
13956 vm_map_lock_read(map);
13957
13958 start = *address;
13959 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13960 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13961 vm_map_unlock_read(map);
13962 return KERN_INVALID_ADDRESS;
13963 }
13964 } else {
13965 entry = tmp_entry;
13966 }
13967 start = entry->vme_start;
13968
13969 extended->protection = entry->protection;
13970 extended->user_tag = VME_ALIAS(entry);
13971 extended->pages_resident = 0;
13972 extended->pages_swapped_out = 0;
13973 extended->pages_shared_now_private = 0;
13974 extended->pages_dirtied = 0;
13975 extended->external_pager = 0;
13976 extended->shadow_depth = 0;
13977
13978 original_count = *count;
13979 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13980 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13981 } else {
13982 extended->pages_reusable = 0;
13983 *count = VM_REGION_EXTENDED_INFO_COUNT;
13984 }
13985
13986 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13987
13988 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
13989 extended->share_mode = SM_PRIVATE;
13990 }
13991
13992 if (object_name) {
13993 *object_name = IP_NULL;
13994 }
13995 *address = start;
13996 *size = (entry->vme_end - start);
13997
13998 vm_map_unlock_read(map);
13999 return KERN_SUCCESS;
14000 }
14001 case VM_REGION_TOP_INFO:
14002 {
14003 vm_region_top_info_t top;
14004
14005 if (*count < VM_REGION_TOP_INFO_COUNT) {
14006 return KERN_INVALID_ARGUMENT;
14007 }
14008
14009 top = (vm_region_top_info_t) info;
14010 *count = VM_REGION_TOP_INFO_COUNT;
14011
14012 vm_map_lock_read(map);
14013
14014 start = *address;
14015 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14016 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14017 vm_map_unlock_read(map);
14018 return KERN_INVALID_ADDRESS;
14019 }
14020 } else {
14021 entry = tmp_entry;
14022 }
14023 start = entry->vme_start;
14024
14025 top->private_pages_resident = 0;
14026 top->shared_pages_resident = 0;
14027
14028 vm_map_region_top_walk(entry, top);
14029
14030 if (object_name) {
14031 *object_name = IP_NULL;
14032 }
14033 *address = start;
14034 *size = (entry->vme_end - start);
14035
14036 vm_map_unlock_read(map);
14037 return KERN_SUCCESS;
14038 }
14039 default:
14040 return KERN_INVALID_ARGUMENT;
14041 }
14042 }
14043
14044 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14045 MIN((entry_size), \
14046 ((obj)->all_reusable ? \
14047 (obj)->wired_page_count : \
14048 (obj)->resident_page_count - (obj)->reusable_page_count))
14049
14050 void
14051 vm_map_region_top_walk(
14052 vm_map_entry_t entry,
14053 vm_region_top_info_t top)
14054 {
14055 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14056 top->share_mode = SM_EMPTY;
14057 top->ref_count = 0;
14058 top->obj_id = 0;
14059 return;
14060 }
14061
14062 {
14063 struct vm_object *obj, *tmp_obj;
14064 int ref_count;
14065 uint32_t entry_size;
14066
14067 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14068
14069 obj = VME_OBJECT(entry);
14070
14071 vm_object_lock(obj);
14072
14073 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14074 ref_count--;
14075 }
14076
14077 assert(obj->reusable_page_count <= obj->resident_page_count);
14078 if (obj->shadow) {
14079 if (ref_count == 1) {
14080 top->private_pages_resident =
14081 OBJ_RESIDENT_COUNT(obj, entry_size);
14082 } else {
14083 top->shared_pages_resident =
14084 OBJ_RESIDENT_COUNT(obj, entry_size);
14085 }
14086 top->ref_count = ref_count;
14087 top->share_mode = SM_COW;
14088
14089 while ((tmp_obj = obj->shadow)) {
14090 vm_object_lock(tmp_obj);
14091 vm_object_unlock(obj);
14092 obj = tmp_obj;
14093
14094 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14095 ref_count--;
14096 }
14097
14098 assert(obj->reusable_page_count <= obj->resident_page_count);
14099 top->shared_pages_resident +=
14100 OBJ_RESIDENT_COUNT(obj, entry_size);
14101 top->ref_count += ref_count - 1;
14102 }
14103 } else {
14104 if (entry->superpage_size) {
14105 top->share_mode = SM_LARGE_PAGE;
14106 top->shared_pages_resident = 0;
14107 top->private_pages_resident = entry_size;
14108 } else if (entry->needs_copy) {
14109 top->share_mode = SM_COW;
14110 top->shared_pages_resident =
14111 OBJ_RESIDENT_COUNT(obj, entry_size);
14112 } else {
14113 if (ref_count == 1 ||
14114 (ref_count == 2 && obj->named)) {
14115 top->share_mode = SM_PRIVATE;
14116 top->private_pages_resident =
14117 OBJ_RESIDENT_COUNT(obj,
14118 entry_size);
14119 } else {
14120 top->share_mode = SM_SHARED;
14121 top->shared_pages_resident =
14122 OBJ_RESIDENT_COUNT(obj,
14123 entry_size);
14124 }
14125 }
14126 top->ref_count = ref_count;
14127 }
14128 /* XXX K64: obj_id will be truncated */
14129 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14130
14131 vm_object_unlock(obj);
14132 }
14133 }
14134
14135 void
14136 vm_map_region_walk(
14137 vm_map_t map,
14138 vm_map_offset_t va,
14139 vm_map_entry_t entry,
14140 vm_object_offset_t offset,
14141 vm_object_size_t range,
14142 vm_region_extended_info_t extended,
14143 boolean_t look_for_pages,
14144 mach_msg_type_number_t count)
14145 {
14146 struct vm_object *obj, *tmp_obj;
14147 vm_map_offset_t last_offset;
14148 int i;
14149 int ref_count;
14150 struct vm_object *shadow_object;
14151 int shadow_depth;
14152 boolean_t do_region_footprint;
14153
14154 do_region_footprint = task_self_region_footprint();
14155
14156 if ((VME_OBJECT(entry) == 0) ||
14157 (entry->is_sub_map) ||
14158 (VME_OBJECT(entry)->phys_contiguous &&
14159 !entry->superpage_size)) {
14160 extended->share_mode = SM_EMPTY;
14161 extended->ref_count = 0;
14162 return;
14163 }
14164
14165 if (entry->superpage_size) {
14166 extended->shadow_depth = 0;
14167 extended->share_mode = SM_LARGE_PAGE;
14168 extended->ref_count = 1;
14169 extended->external_pager = 0;
14170 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14171 extended->shadow_depth = 0;
14172 return;
14173 }
14174
14175 obj = VME_OBJECT(entry);
14176
14177 vm_object_lock(obj);
14178
14179 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14180 ref_count--;
14181 }
14182
14183 if (look_for_pages) {
14184 for (last_offset = offset + range;
14185 offset < last_offset;
14186 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14187 if (do_region_footprint) {
14188 int disp;
14189
14190 disp = 0;
14191 if (map->has_corpse_footprint) {
14192 /*
14193 * Query the page info data we saved
14194 * while forking the corpse.
14195 */
14196 vm_map_corpse_footprint_query_page_info(
14197 map,
14198 va,
14199 &disp);
14200 } else {
14201 /*
14202 * Query the pmap.
14203 */
14204 pmap_query_page_info(map->pmap,
14205 va,
14206 &disp);
14207 }
14208 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14209 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14210 extended->pages_resident++;
14211 }
14212 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14213 extended->pages_reusable++;
14214 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14215 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14216 /* alternate accounting */
14217 } else {
14218 extended->pages_dirtied++;
14219 }
14220 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14221 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14222 /* alternate accounting */
14223 } else {
14224 extended->pages_swapped_out++;
14225 }
14226 }
14227 /* deal with alternate accounting */
14228 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14229 /* && not tagged as no-footprint? */
14230 VM_OBJECT_OWNER(obj) != NULL &&
14231 VM_OBJECT_OWNER(obj)->map == map) {
14232 if ((((va
14233 - entry->vme_start
14234 + VME_OFFSET(entry))
14235 / PAGE_SIZE) <
14236 (obj->resident_page_count +
14237 vm_compressor_pager_get_count(obj->pager)))) {
14238 /*
14239 * Non-volatile purgeable object owned
14240 * by this task: report the first
14241 * "#resident + #compressed" pages as
14242 * "resident" (to show that they
14243 * contribute to the footprint) but not
14244 * "dirty" (to avoid double-counting
14245 * with the fake "non-volatile" region
14246 * we'll report at the end of the
14247 * address space to account for all
14248 * (mapped or not) non-volatile memory
14249 * owned by this task.
14250 */
14251 extended->pages_resident++;
14252 }
14253 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14254 obj->purgable == VM_PURGABLE_EMPTY) &&
14255 /* && not tagged as no-footprint? */
14256 VM_OBJECT_OWNER(obj) != NULL &&
14257 VM_OBJECT_OWNER(obj)->map == map) {
14258 if ((((va
14259 - entry->vme_start
14260 + VME_OFFSET(entry))
14261 / PAGE_SIZE) <
14262 obj->wired_page_count)) {
14263 /*
14264 * Volatile|empty purgeable object owned
14265 * by this task: report the first
14266 * "#wired" pages as "resident" (to
14267 * show that they contribute to the
14268 * footprint) but not "dirty" (to avoid
14269 * double-counting with the fake
14270 * "non-volatile" region we'll report
14271 * at the end of the address space to
14272 * account for all (mapped or not)
14273 * non-volatile memory owned by this
14274 * task.
14275 */
14276 extended->pages_resident++;
14277 }
14278 } else if (obj->purgable != VM_PURGABLE_DENY) {
14279 /*
14280 * Pages from purgeable objects
14281 * will be reported as dirty
14282 * appropriately in an extra
14283 * fake memory region at the end of
14284 * the address space.
14285 */
14286 } else if (entry->iokit_acct) {
14287 /*
14288 * IOKit mappings are considered
14289 * as fully dirty for footprint's
14290 * sake.
14291 */
14292 extended->pages_dirtied++;
14293 }
14294 continue;
14295 }
14296
14297 vm_map_region_look_for_page(map, va, obj,
14298 offset, ref_count,
14299 0, extended, count);
14300 }
14301
14302 if (do_region_footprint) {
14303 goto collect_object_info;
14304 }
14305 } else {
14306 collect_object_info:
14307 shadow_object = obj->shadow;
14308 shadow_depth = 0;
14309
14310 if (!(obj->internal)) {
14311 extended->external_pager = 1;
14312 }
14313
14314 if (shadow_object != VM_OBJECT_NULL) {
14315 vm_object_lock(shadow_object);
14316 for (;
14317 shadow_object != VM_OBJECT_NULL;
14318 shadow_depth++) {
14319 vm_object_t next_shadow;
14320
14321 if (!(shadow_object->internal)) {
14322 extended->external_pager = 1;
14323 }
14324
14325 next_shadow = shadow_object->shadow;
14326 if (next_shadow) {
14327 vm_object_lock(next_shadow);
14328 }
14329 vm_object_unlock(shadow_object);
14330 shadow_object = next_shadow;
14331 }
14332 }
14333 extended->shadow_depth = shadow_depth;
14334 }
14335
14336 if (extended->shadow_depth || entry->needs_copy) {
14337 extended->share_mode = SM_COW;
14338 } else {
14339 if (ref_count == 1) {
14340 extended->share_mode = SM_PRIVATE;
14341 } else {
14342 if (obj->true_share) {
14343 extended->share_mode = SM_TRUESHARED;
14344 } else {
14345 extended->share_mode = SM_SHARED;
14346 }
14347 }
14348 }
14349 extended->ref_count = ref_count - extended->shadow_depth;
14350
14351 for (i = 0; i < extended->shadow_depth; i++) {
14352 if ((tmp_obj = obj->shadow) == 0) {
14353 break;
14354 }
14355 vm_object_lock(tmp_obj);
14356 vm_object_unlock(obj);
14357
14358 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14359 ref_count--;
14360 }
14361
14362 extended->ref_count += ref_count;
14363 obj = tmp_obj;
14364 }
14365 vm_object_unlock(obj);
14366
14367 if (extended->share_mode == SM_SHARED) {
14368 vm_map_entry_t cur;
14369 vm_map_entry_t last;
14370 int my_refs;
14371
14372 obj = VME_OBJECT(entry);
14373 last = vm_map_to_entry(map);
14374 my_refs = 0;
14375
14376 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14377 ref_count--;
14378 }
14379 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14380 my_refs += vm_map_region_count_obj_refs(cur, obj);
14381 }
14382
14383 if (my_refs == ref_count) {
14384 extended->share_mode = SM_PRIVATE_ALIASED;
14385 } else if (my_refs > 1) {
14386 extended->share_mode = SM_SHARED_ALIASED;
14387 }
14388 }
14389 }
14390
14391
14392 /* object is locked on entry and locked on return */
14393
14394
14395 static void
14396 vm_map_region_look_for_page(
14397 __unused vm_map_t map,
14398 __unused vm_map_offset_t va,
14399 vm_object_t object,
14400 vm_object_offset_t offset,
14401 int max_refcnt,
14402 int depth,
14403 vm_region_extended_info_t extended,
14404 mach_msg_type_number_t count)
14405 {
14406 vm_page_t p;
14407 vm_object_t shadow;
14408 int ref_count;
14409 vm_object_t caller_object;
14410
14411 shadow = object->shadow;
14412 caller_object = object;
14413
14414
14415 while (TRUE) {
14416 if (!(object->internal)) {
14417 extended->external_pager = 1;
14418 }
14419
14420 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14421 if (shadow && (max_refcnt == 1)) {
14422 extended->pages_shared_now_private++;
14423 }
14424
14425 if (!p->vmp_fictitious &&
14426 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14427 extended->pages_dirtied++;
14428 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14429 if (p->vmp_reusable || object->all_reusable) {
14430 extended->pages_reusable++;
14431 }
14432 }
14433
14434 extended->pages_resident++;
14435
14436 if (object != caller_object) {
14437 vm_object_unlock(object);
14438 }
14439
14440 return;
14441 }
14442 if (object->internal &&
14443 object->alive &&
14444 !object->terminating &&
14445 object->pager_ready) {
14446 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14447 == VM_EXTERNAL_STATE_EXISTS) {
14448 /* the pager has that page */
14449 extended->pages_swapped_out++;
14450 if (object != caller_object) {
14451 vm_object_unlock(object);
14452 }
14453 return;
14454 }
14455 }
14456
14457 if (shadow) {
14458 vm_object_lock(shadow);
14459
14460 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14461 ref_count--;
14462 }
14463
14464 if (++depth > extended->shadow_depth) {
14465 extended->shadow_depth = depth;
14466 }
14467
14468 if (ref_count > max_refcnt) {
14469 max_refcnt = ref_count;
14470 }
14471
14472 if (object != caller_object) {
14473 vm_object_unlock(object);
14474 }
14475
14476 offset = offset + object->vo_shadow_offset;
14477 object = shadow;
14478 shadow = object->shadow;
14479 continue;
14480 }
14481 if (object != caller_object) {
14482 vm_object_unlock(object);
14483 }
14484 break;
14485 }
14486 }
14487
14488 static int
14489 vm_map_region_count_obj_refs(
14490 vm_map_entry_t entry,
14491 vm_object_t object)
14492 {
14493 int ref_count;
14494 vm_object_t chk_obj;
14495 vm_object_t tmp_obj;
14496
14497 if (VME_OBJECT(entry) == 0) {
14498 return 0;
14499 }
14500
14501 if (entry->is_sub_map) {
14502 return 0;
14503 } else {
14504 ref_count = 0;
14505
14506 chk_obj = VME_OBJECT(entry);
14507 vm_object_lock(chk_obj);
14508
14509 while (chk_obj) {
14510 if (chk_obj == object) {
14511 ref_count++;
14512 }
14513 tmp_obj = chk_obj->shadow;
14514 if (tmp_obj) {
14515 vm_object_lock(tmp_obj);
14516 }
14517 vm_object_unlock(chk_obj);
14518
14519 chk_obj = tmp_obj;
14520 }
14521 }
14522 return ref_count;
14523 }
14524
14525
14526 /*
14527 * Routine: vm_map_simplify
14528 *
14529 * Description:
14530 * Attempt to simplify the map representation in
14531 * the vicinity of the given starting address.
14532 * Note:
14533 * This routine is intended primarily to keep the
14534 * kernel maps more compact -- they generally don't
14535 * benefit from the "expand a map entry" technology
14536 * at allocation time because the adjacent entry
14537 * is often wired down.
14538 */
14539 void
14540 vm_map_simplify_entry(
14541 vm_map_t map,
14542 vm_map_entry_t this_entry)
14543 {
14544 vm_map_entry_t prev_entry;
14545
14546 counter(c_vm_map_simplify_entry_called++);
14547
14548 prev_entry = this_entry->vme_prev;
14549
14550 if ((this_entry != vm_map_to_entry(map)) &&
14551 (prev_entry != vm_map_to_entry(map)) &&
14552
14553 (prev_entry->vme_end == this_entry->vme_start) &&
14554
14555 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14556 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14557 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14558 prev_entry->vme_start))
14559 == VME_OFFSET(this_entry)) &&
14560
14561 (prev_entry->behavior == this_entry->behavior) &&
14562 (prev_entry->needs_copy == this_entry->needs_copy) &&
14563 (prev_entry->protection == this_entry->protection) &&
14564 (prev_entry->max_protection == this_entry->max_protection) &&
14565 (prev_entry->inheritance == this_entry->inheritance) &&
14566 (prev_entry->use_pmap == this_entry->use_pmap) &&
14567 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14568 (prev_entry->no_cache == this_entry->no_cache) &&
14569 (prev_entry->permanent == this_entry->permanent) &&
14570 (prev_entry->map_aligned == this_entry->map_aligned) &&
14571 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14572 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14573 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14574 /* from_reserved_zone: OK if that field doesn't match */
14575 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14576 (prev_entry->vme_resilient_codesign ==
14577 this_entry->vme_resilient_codesign) &&
14578 (prev_entry->vme_resilient_media ==
14579 this_entry->vme_resilient_media) &&
14580 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14581
14582 (prev_entry->wired_count == this_entry->wired_count) &&
14583 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14584
14585 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14586 (prev_entry->in_transition == FALSE) &&
14587 (this_entry->in_transition == FALSE) &&
14588 (prev_entry->needs_wakeup == FALSE) &&
14589 (this_entry->needs_wakeup == FALSE) &&
14590 (prev_entry->is_shared == FALSE) &&
14591 (this_entry->is_shared == FALSE) &&
14592 (prev_entry->superpage_size == FALSE) &&
14593 (this_entry->superpage_size == FALSE)
14594 ) {
14595 vm_map_store_entry_unlink(map, prev_entry);
14596 assert(prev_entry->vme_start < this_entry->vme_end);
14597 if (prev_entry->map_aligned) {
14598 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14599 VM_MAP_PAGE_MASK(map)));
14600 }
14601 this_entry->vme_start = prev_entry->vme_start;
14602 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14603
14604 if (map->holelistenabled) {
14605 vm_map_store_update_first_free(map, this_entry, TRUE);
14606 }
14607
14608 if (prev_entry->is_sub_map) {
14609 vm_map_deallocate(VME_SUBMAP(prev_entry));
14610 } else {
14611 vm_object_deallocate(VME_OBJECT(prev_entry));
14612 }
14613 vm_map_entry_dispose(map, prev_entry);
14614 SAVE_HINT_MAP_WRITE(map, this_entry);
14615 counter(c_vm_map_simplified++);
14616 }
14617 }
14618
14619 void
14620 vm_map_simplify(
14621 vm_map_t map,
14622 vm_map_offset_t start)
14623 {
14624 vm_map_entry_t this_entry;
14625
14626 vm_map_lock(map);
14627 if (vm_map_lookup_entry(map, start, &this_entry)) {
14628 vm_map_simplify_entry(map, this_entry);
14629 vm_map_simplify_entry(map, this_entry->vme_next);
14630 }
14631 counter(c_vm_map_simplify_called++);
14632 vm_map_unlock(map);
14633 }
14634
14635 static void
14636 vm_map_simplify_range(
14637 vm_map_t map,
14638 vm_map_offset_t start,
14639 vm_map_offset_t end)
14640 {
14641 vm_map_entry_t entry;
14642
14643 /*
14644 * The map should be locked (for "write") by the caller.
14645 */
14646
14647 if (start >= end) {
14648 /* invalid address range */
14649 return;
14650 }
14651
14652 start = vm_map_trunc_page(start,
14653 VM_MAP_PAGE_MASK(map));
14654 end = vm_map_round_page(end,
14655 VM_MAP_PAGE_MASK(map));
14656
14657 if (!vm_map_lookup_entry(map, start, &entry)) {
14658 /* "start" is not mapped and "entry" ends before "start" */
14659 if (entry == vm_map_to_entry(map)) {
14660 /* start with first entry in the map */
14661 entry = vm_map_first_entry(map);
14662 } else {
14663 /* start with next entry */
14664 entry = entry->vme_next;
14665 }
14666 }
14667
14668 while (entry != vm_map_to_entry(map) &&
14669 entry->vme_start <= end) {
14670 /* try and coalesce "entry" with its previous entry */
14671 vm_map_simplify_entry(map, entry);
14672 entry = entry->vme_next;
14673 }
14674 }
14675
14676
14677 /*
14678 * Routine: vm_map_machine_attribute
14679 * Purpose:
14680 * Provide machine-specific attributes to mappings,
14681 * such as cachability etc. for machines that provide
14682 * them. NUMA architectures and machines with big/strange
14683 * caches will use this.
14684 * Note:
14685 * Responsibilities for locking and checking are handled here,
14686 * everything else in the pmap module. If any non-volatile
14687 * information must be kept, the pmap module should handle
14688 * it itself. [This assumes that attributes do not
14689 * need to be inherited, which seems ok to me]
14690 */
14691 kern_return_t
14692 vm_map_machine_attribute(
14693 vm_map_t map,
14694 vm_map_offset_t start,
14695 vm_map_offset_t end,
14696 vm_machine_attribute_t attribute,
14697 vm_machine_attribute_val_t* value) /* IN/OUT */
14698 {
14699 kern_return_t ret;
14700 vm_map_size_t sync_size;
14701 vm_map_entry_t entry;
14702
14703 if (start < vm_map_min(map) || end > vm_map_max(map)) {
14704 return KERN_INVALID_ADDRESS;
14705 }
14706
14707 /* Figure how much memory we need to flush (in page increments) */
14708 sync_size = end - start;
14709
14710 vm_map_lock(map);
14711
14712 if (attribute != MATTR_CACHE) {
14713 /* If we don't have to find physical addresses, we */
14714 /* don't have to do an explicit traversal here. */
14715 ret = pmap_attribute(map->pmap, start, end - start,
14716 attribute, value);
14717 vm_map_unlock(map);
14718 return ret;
14719 }
14720
14721 ret = KERN_SUCCESS; /* Assume it all worked */
14722
14723 while (sync_size) {
14724 if (vm_map_lookup_entry(map, start, &entry)) {
14725 vm_map_size_t sub_size;
14726 if ((entry->vme_end - start) > sync_size) {
14727 sub_size = sync_size;
14728 sync_size = 0;
14729 } else {
14730 sub_size = entry->vme_end - start;
14731 sync_size -= sub_size;
14732 }
14733 if (entry->is_sub_map) {
14734 vm_map_offset_t sub_start;
14735 vm_map_offset_t sub_end;
14736
14737 sub_start = (start - entry->vme_start)
14738 + VME_OFFSET(entry);
14739 sub_end = sub_start + sub_size;
14740 vm_map_machine_attribute(
14741 VME_SUBMAP(entry),
14742 sub_start,
14743 sub_end,
14744 attribute, value);
14745 } else {
14746 if (VME_OBJECT(entry)) {
14747 vm_page_t m;
14748 vm_object_t object;
14749 vm_object_t base_object;
14750 vm_object_t last_object;
14751 vm_object_offset_t offset;
14752 vm_object_offset_t base_offset;
14753 vm_map_size_t range;
14754 range = sub_size;
14755 offset = (start - entry->vme_start)
14756 + VME_OFFSET(entry);
14757 base_offset = offset;
14758 object = VME_OBJECT(entry);
14759 base_object = object;
14760 last_object = NULL;
14761
14762 vm_object_lock(object);
14763
14764 while (range) {
14765 m = vm_page_lookup(
14766 object, offset);
14767
14768 if (m && !m->vmp_fictitious) {
14769 ret =
14770 pmap_attribute_cache_sync(
14771 VM_PAGE_GET_PHYS_PAGE(m),
14772 PAGE_SIZE,
14773 attribute, value);
14774 } else if (object->shadow) {
14775 offset = offset + object->vo_shadow_offset;
14776 last_object = object;
14777 object = object->shadow;
14778 vm_object_lock(last_object->shadow);
14779 vm_object_unlock(last_object);
14780 continue;
14781 }
14782 range -= PAGE_SIZE;
14783
14784 if (base_object != object) {
14785 vm_object_unlock(object);
14786 vm_object_lock(base_object);
14787 object = base_object;
14788 }
14789 /* Bump to the next page */
14790 base_offset += PAGE_SIZE;
14791 offset = base_offset;
14792 }
14793 vm_object_unlock(object);
14794 }
14795 }
14796 start += sub_size;
14797 } else {
14798 vm_map_unlock(map);
14799 return KERN_FAILURE;
14800 }
14801 }
14802
14803 vm_map_unlock(map);
14804
14805 return ret;
14806 }
14807
14808 /*
14809 * vm_map_behavior_set:
14810 *
14811 * Sets the paging reference behavior of the specified address
14812 * range in the target map. Paging reference behavior affects
14813 * how pagein operations resulting from faults on the map will be
14814 * clustered.
14815 */
14816 kern_return_t
14817 vm_map_behavior_set(
14818 vm_map_t map,
14819 vm_map_offset_t start,
14820 vm_map_offset_t end,
14821 vm_behavior_t new_behavior)
14822 {
14823 vm_map_entry_t entry;
14824 vm_map_entry_t temp_entry;
14825
14826 if (start > end ||
14827 start < vm_map_min(map) ||
14828 end > vm_map_max(map)) {
14829 return KERN_NO_SPACE;
14830 }
14831
14832 switch (new_behavior) {
14833 /*
14834 * This first block of behaviors all set a persistent state on the specified
14835 * memory range. All we have to do here is to record the desired behavior
14836 * in the vm_map_entry_t's.
14837 */
14838
14839 case VM_BEHAVIOR_DEFAULT:
14840 case VM_BEHAVIOR_RANDOM:
14841 case VM_BEHAVIOR_SEQUENTIAL:
14842 case VM_BEHAVIOR_RSEQNTL:
14843 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14844 vm_map_lock(map);
14845
14846 /*
14847 * The entire address range must be valid for the map.
14848 * Note that vm_map_range_check() does a
14849 * vm_map_lookup_entry() internally and returns the
14850 * entry containing the start of the address range if
14851 * the entire range is valid.
14852 */
14853 if (vm_map_range_check(map, start, end, &temp_entry)) {
14854 entry = temp_entry;
14855 vm_map_clip_start(map, entry, start);
14856 } else {
14857 vm_map_unlock(map);
14858 return KERN_INVALID_ADDRESS;
14859 }
14860
14861 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14862 vm_map_clip_end(map, entry, end);
14863 if (entry->is_sub_map) {
14864 assert(!entry->use_pmap);
14865 }
14866
14867 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14868 entry->zero_wired_pages = TRUE;
14869 } else {
14870 entry->behavior = new_behavior;
14871 }
14872 entry = entry->vme_next;
14873 }
14874
14875 vm_map_unlock(map);
14876 break;
14877
14878 /*
14879 * The rest of these are different from the above in that they cause
14880 * an immediate action to take place as opposed to setting a behavior that
14881 * affects future actions.
14882 */
14883
14884 case VM_BEHAVIOR_WILLNEED:
14885 return vm_map_willneed(map, start, end);
14886
14887 case VM_BEHAVIOR_DONTNEED:
14888 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14889
14890 case VM_BEHAVIOR_FREE:
14891 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14892
14893 case VM_BEHAVIOR_REUSABLE:
14894 return vm_map_reusable_pages(map, start, end);
14895
14896 case VM_BEHAVIOR_REUSE:
14897 return vm_map_reuse_pages(map, start, end);
14898
14899 case VM_BEHAVIOR_CAN_REUSE:
14900 return vm_map_can_reuse(map, start, end);
14901
14902 #if MACH_ASSERT
14903 case VM_BEHAVIOR_PAGEOUT:
14904 return vm_map_pageout(map, start, end);
14905 #endif /* MACH_ASSERT */
14906
14907 default:
14908 return KERN_INVALID_ARGUMENT;
14909 }
14910
14911 return KERN_SUCCESS;
14912 }
14913
14914
14915 /*
14916 * Internals for madvise(MADV_WILLNEED) system call.
14917 *
14918 * The implementation is to do:-
14919 * a) read-ahead if the mapping corresponds to a mapped regular file
14920 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14921 */
14922
14923
14924 static kern_return_t
14925 vm_map_willneed(
14926 vm_map_t map,
14927 vm_map_offset_t start,
14928 vm_map_offset_t end
14929 )
14930 {
14931 vm_map_entry_t entry;
14932 vm_object_t object;
14933 memory_object_t pager;
14934 struct vm_object_fault_info fault_info = {};
14935 kern_return_t kr;
14936 vm_object_size_t len;
14937 vm_object_offset_t offset;
14938
14939 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14940 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14941 fault_info.stealth = TRUE;
14942
14943 /*
14944 * The MADV_WILLNEED operation doesn't require any changes to the
14945 * vm_map_entry_t's, so the read lock is sufficient.
14946 */
14947
14948 vm_map_lock_read(map);
14949
14950 /*
14951 * The madvise semantics require that the address range be fully
14952 * allocated with no holes. Otherwise, we're required to return
14953 * an error.
14954 */
14955
14956 if (!vm_map_range_check(map, start, end, &entry)) {
14957 vm_map_unlock_read(map);
14958 return KERN_INVALID_ADDRESS;
14959 }
14960
14961 /*
14962 * Examine each vm_map_entry_t in the range.
14963 */
14964 for (; entry != vm_map_to_entry(map) && start < end;) {
14965 /*
14966 * The first time through, the start address could be anywhere
14967 * within the vm_map_entry we found. So adjust the offset to
14968 * correspond. After that, the offset will always be zero to
14969 * correspond to the beginning of the current vm_map_entry.
14970 */
14971 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14972
14973 /*
14974 * Set the length so we don't go beyond the end of the
14975 * map_entry or beyond the end of the range we were given.
14976 * This range could span also multiple map entries all of which
14977 * map different files, so make sure we only do the right amount
14978 * of I/O for each object. Note that it's possible for there
14979 * to be multiple map entries all referring to the same object
14980 * but with different page permissions, but it's not worth
14981 * trying to optimize that case.
14982 */
14983 len = MIN(entry->vme_end - start, end - start);
14984
14985 if ((vm_size_t) len != len) {
14986 /* 32-bit overflow */
14987 len = (vm_size_t) (0 - PAGE_SIZE);
14988 }
14989 fault_info.cluster_size = (vm_size_t) len;
14990 fault_info.lo_offset = offset;
14991 fault_info.hi_offset = offset + len;
14992 fault_info.user_tag = VME_ALIAS(entry);
14993 fault_info.pmap_options = 0;
14994 if (entry->iokit_acct ||
14995 (!entry->is_sub_map && !entry->use_pmap)) {
14996 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14997 }
14998
14999 /*
15000 * If the entry is a submap OR there's no read permission
15001 * to this mapping, then just skip it.
15002 */
15003 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15004 entry = entry->vme_next;
15005 start = entry->vme_start;
15006 continue;
15007 }
15008
15009 object = VME_OBJECT(entry);
15010
15011 if (object == NULL ||
15012 (object && object->internal)) {
15013 /*
15014 * Memory range backed by anonymous memory.
15015 */
15016 vm_size_t region_size = 0, effective_page_size = 0;
15017 vm_map_offset_t addr = 0, effective_page_mask = 0;
15018
15019 region_size = len;
15020 addr = start;
15021
15022 effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15023 effective_page_size = effective_page_mask + 1;
15024
15025 vm_map_unlock_read(map);
15026
15027 while (region_size) {
15028 vm_pre_fault(
15029 vm_map_trunc_page(addr, effective_page_mask),
15030 VM_PROT_READ | VM_PROT_WRITE);
15031
15032 region_size -= effective_page_size;
15033 addr += effective_page_size;
15034 }
15035 } else {
15036 /*
15037 * Find the file object backing this map entry. If there is
15038 * none, then we simply ignore the "will need" advice for this
15039 * entry and go on to the next one.
15040 */
15041 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15042 entry = entry->vme_next;
15043 start = entry->vme_start;
15044 continue;
15045 }
15046
15047 vm_object_paging_begin(object);
15048 pager = object->pager;
15049 vm_object_unlock(object);
15050
15051 /*
15052 * The data_request() could take a long time, so let's
15053 * release the map lock to avoid blocking other threads.
15054 */
15055 vm_map_unlock_read(map);
15056
15057 /*
15058 * Get the data from the object asynchronously.
15059 *
15060 * Note that memory_object_data_request() places limits on the
15061 * amount of I/O it will do. Regardless of the len we
15062 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15063 * silently truncates the len to that size. This isn't
15064 * necessarily bad since madvise shouldn't really be used to
15065 * page in unlimited amounts of data. Other Unix variants
15066 * limit the willneed case as well. If this turns out to be an
15067 * issue for developers, then we can always adjust the policy
15068 * here and still be backwards compatible since this is all
15069 * just "advice".
15070 */
15071 kr = memory_object_data_request(
15072 pager,
15073 offset + object->paging_offset,
15074 0, /* ignored */
15075 VM_PROT_READ,
15076 (memory_object_fault_info_t)&fault_info);
15077
15078 vm_object_lock(object);
15079 vm_object_paging_end(object);
15080 vm_object_unlock(object);
15081
15082 /*
15083 * If we couldn't do the I/O for some reason, just give up on
15084 * the madvise. We still return success to the user since
15085 * madvise isn't supposed to fail when the advice can't be
15086 * taken.
15087 */
15088
15089 if (kr != KERN_SUCCESS) {
15090 return KERN_SUCCESS;
15091 }
15092 }
15093
15094 start += len;
15095 if (start >= end) {
15096 /* done */
15097 return KERN_SUCCESS;
15098 }
15099
15100 /* look up next entry */
15101 vm_map_lock_read(map);
15102 if (!vm_map_lookup_entry(map, start, &entry)) {
15103 /*
15104 * There's a new hole in the address range.
15105 */
15106 vm_map_unlock_read(map);
15107 return KERN_INVALID_ADDRESS;
15108 }
15109 }
15110
15111 vm_map_unlock_read(map);
15112 return KERN_SUCCESS;
15113 }
15114
15115 static boolean_t
15116 vm_map_entry_is_reusable(
15117 vm_map_entry_t entry)
15118 {
15119 /* Only user map entries */
15120
15121 vm_object_t object;
15122
15123 if (entry->is_sub_map) {
15124 return FALSE;
15125 }
15126
15127 switch (VME_ALIAS(entry)) {
15128 case VM_MEMORY_MALLOC:
15129 case VM_MEMORY_MALLOC_SMALL:
15130 case VM_MEMORY_MALLOC_LARGE:
15131 case VM_MEMORY_REALLOC:
15132 case VM_MEMORY_MALLOC_TINY:
15133 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15134 case VM_MEMORY_MALLOC_LARGE_REUSED:
15135 /*
15136 * This is a malloc() memory region: check if it's still
15137 * in its original state and can be re-used for more
15138 * malloc() allocations.
15139 */
15140 break;
15141 default:
15142 /*
15143 * Not a malloc() memory region: let the caller decide if
15144 * it's re-usable.
15145 */
15146 return TRUE;
15147 }
15148
15149 if (/*entry->is_shared ||*/
15150 entry->is_sub_map ||
15151 entry->in_transition ||
15152 entry->protection != VM_PROT_DEFAULT ||
15153 entry->max_protection != VM_PROT_ALL ||
15154 entry->inheritance != VM_INHERIT_DEFAULT ||
15155 entry->no_cache ||
15156 entry->permanent ||
15157 entry->superpage_size != FALSE ||
15158 entry->zero_wired_pages ||
15159 entry->wired_count != 0 ||
15160 entry->user_wired_count != 0) {
15161 return FALSE;
15162 }
15163
15164 object = VME_OBJECT(entry);
15165 if (object == VM_OBJECT_NULL) {
15166 return TRUE;
15167 }
15168 if (
15169 #if 0
15170 /*
15171 * Let's proceed even if the VM object is potentially
15172 * shared.
15173 * We check for this later when processing the actual
15174 * VM pages, so the contents will be safe if shared.
15175 *
15176 * But we can still mark this memory region as "reusable" to
15177 * acknowledge that the caller did let us know that the memory
15178 * could be re-used and should not be penalized for holding
15179 * on to it. This allows its "resident size" to not include
15180 * the reusable range.
15181 */
15182 object->ref_count == 1 &&
15183 #endif
15184 object->wired_page_count == 0 &&
15185 object->copy == VM_OBJECT_NULL &&
15186 object->shadow == VM_OBJECT_NULL &&
15187 object->internal &&
15188 object->purgable == VM_PURGABLE_DENY &&
15189 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15190 !object->true_share &&
15191 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15192 !object->code_signed) {
15193 return TRUE;
15194 }
15195 return FALSE;
15196 }
15197
15198 static kern_return_t
15199 vm_map_reuse_pages(
15200 vm_map_t map,
15201 vm_map_offset_t start,
15202 vm_map_offset_t end)
15203 {
15204 vm_map_entry_t entry;
15205 vm_object_t object;
15206 vm_object_offset_t start_offset, end_offset;
15207
15208 /*
15209 * The MADV_REUSE operation doesn't require any changes to the
15210 * vm_map_entry_t's, so the read lock is sufficient.
15211 */
15212
15213 vm_map_lock_read(map);
15214 assert(map->pmap != kernel_pmap); /* protect alias access */
15215
15216 /*
15217 * The madvise semantics require that the address range be fully
15218 * allocated with no holes. Otherwise, we're required to return
15219 * an error.
15220 */
15221
15222 if (!vm_map_range_check(map, start, end, &entry)) {
15223 vm_map_unlock_read(map);
15224 vm_page_stats_reusable.reuse_pages_failure++;
15225 return KERN_INVALID_ADDRESS;
15226 }
15227
15228 /*
15229 * Examine each vm_map_entry_t in the range.
15230 */
15231 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15232 entry = entry->vme_next) {
15233 /*
15234 * Sanity check on the VM map entry.
15235 */
15236 if (!vm_map_entry_is_reusable(entry)) {
15237 vm_map_unlock_read(map);
15238 vm_page_stats_reusable.reuse_pages_failure++;
15239 return KERN_INVALID_ADDRESS;
15240 }
15241
15242 /*
15243 * The first time through, the start address could be anywhere
15244 * within the vm_map_entry we found. So adjust the offset to
15245 * correspond.
15246 */
15247 if (entry->vme_start < start) {
15248 start_offset = start - entry->vme_start;
15249 } else {
15250 start_offset = 0;
15251 }
15252 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15253 start_offset += VME_OFFSET(entry);
15254 end_offset += VME_OFFSET(entry);
15255
15256 assert(!entry->is_sub_map);
15257 object = VME_OBJECT(entry);
15258 if (object != VM_OBJECT_NULL) {
15259 vm_object_lock(object);
15260 vm_object_reuse_pages(object, start_offset, end_offset,
15261 TRUE);
15262 vm_object_unlock(object);
15263 }
15264
15265 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15266 /*
15267 * XXX
15268 * We do not hold the VM map exclusively here.
15269 * The "alias" field is not that critical, so it's
15270 * safe to update it here, as long as it is the only
15271 * one that can be modified while holding the VM map
15272 * "shared".
15273 */
15274 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15275 }
15276 }
15277
15278 vm_map_unlock_read(map);
15279 vm_page_stats_reusable.reuse_pages_success++;
15280 return KERN_SUCCESS;
15281 }
15282
15283
15284 static kern_return_t
15285 vm_map_reusable_pages(
15286 vm_map_t map,
15287 vm_map_offset_t start,
15288 vm_map_offset_t end)
15289 {
15290 vm_map_entry_t entry;
15291 vm_object_t object;
15292 vm_object_offset_t start_offset, end_offset;
15293 vm_map_offset_t pmap_offset;
15294
15295 /*
15296 * The MADV_REUSABLE operation doesn't require any changes to the
15297 * vm_map_entry_t's, so the read lock is sufficient.
15298 */
15299
15300 vm_map_lock_read(map);
15301 assert(map->pmap != kernel_pmap); /* protect alias access */
15302
15303 /*
15304 * The madvise semantics require that the address range be fully
15305 * allocated with no holes. Otherwise, we're required to return
15306 * an error.
15307 */
15308
15309 if (!vm_map_range_check(map, start, end, &entry)) {
15310 vm_map_unlock_read(map);
15311 vm_page_stats_reusable.reusable_pages_failure++;
15312 return KERN_INVALID_ADDRESS;
15313 }
15314
15315 /*
15316 * Examine each vm_map_entry_t in the range.
15317 */
15318 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15319 entry = entry->vme_next) {
15320 int kill_pages = 0;
15321
15322 /*
15323 * Sanity check on the VM map entry.
15324 */
15325 if (!vm_map_entry_is_reusable(entry)) {
15326 vm_map_unlock_read(map);
15327 vm_page_stats_reusable.reusable_pages_failure++;
15328 return KERN_INVALID_ADDRESS;
15329 }
15330
15331 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15332 /* not writable: can't discard contents */
15333 vm_map_unlock_read(map);
15334 vm_page_stats_reusable.reusable_nonwritable++;
15335 vm_page_stats_reusable.reusable_pages_failure++;
15336 return KERN_PROTECTION_FAILURE;
15337 }
15338
15339 /*
15340 * The first time through, the start address could be anywhere
15341 * within the vm_map_entry we found. So adjust the offset to
15342 * correspond.
15343 */
15344 if (entry->vme_start < start) {
15345 start_offset = start - entry->vme_start;
15346 pmap_offset = start;
15347 } else {
15348 start_offset = 0;
15349 pmap_offset = entry->vme_start;
15350 }
15351 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15352 start_offset += VME_OFFSET(entry);
15353 end_offset += VME_OFFSET(entry);
15354
15355 assert(!entry->is_sub_map);
15356 object = VME_OBJECT(entry);
15357 if (object == VM_OBJECT_NULL) {
15358 continue;
15359 }
15360
15361
15362 vm_object_lock(object);
15363 if (((object->ref_count == 1) ||
15364 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15365 object->copy == VM_OBJECT_NULL)) &&
15366 object->shadow == VM_OBJECT_NULL &&
15367 /*
15368 * "iokit_acct" entries are billed for their virtual size
15369 * (rather than for their resident pages only), so they
15370 * wouldn't benefit from making pages reusable, and it
15371 * would be hard to keep track of pages that are both
15372 * "iokit_acct" and "reusable" in the pmap stats and
15373 * ledgers.
15374 */
15375 !(entry->iokit_acct ||
15376 (!entry->is_sub_map && !entry->use_pmap))) {
15377 if (object->ref_count != 1) {
15378 vm_page_stats_reusable.reusable_shared++;
15379 }
15380 kill_pages = 1;
15381 } else {
15382 kill_pages = -1;
15383 }
15384 if (kill_pages != -1) {
15385 vm_object_deactivate_pages(object,
15386 start_offset,
15387 end_offset - start_offset,
15388 kill_pages,
15389 TRUE /*reusable_pages*/,
15390 map->pmap,
15391 pmap_offset);
15392 } else {
15393 vm_page_stats_reusable.reusable_pages_shared++;
15394 }
15395 vm_object_unlock(object);
15396
15397 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15398 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15399 /*
15400 * XXX
15401 * We do not hold the VM map exclusively here.
15402 * The "alias" field is not that critical, so it's
15403 * safe to update it here, as long as it is the only
15404 * one that can be modified while holding the VM map
15405 * "shared".
15406 */
15407 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15408 }
15409 }
15410
15411 vm_map_unlock_read(map);
15412 vm_page_stats_reusable.reusable_pages_success++;
15413 return KERN_SUCCESS;
15414 }
15415
15416
15417 static kern_return_t
15418 vm_map_can_reuse(
15419 vm_map_t map,
15420 vm_map_offset_t start,
15421 vm_map_offset_t end)
15422 {
15423 vm_map_entry_t entry;
15424
15425 /*
15426 * The MADV_REUSABLE operation doesn't require any changes to the
15427 * vm_map_entry_t's, so the read lock is sufficient.
15428 */
15429
15430 vm_map_lock_read(map);
15431 assert(map->pmap != kernel_pmap); /* protect alias access */
15432
15433 /*
15434 * The madvise semantics require that the address range be fully
15435 * allocated with no holes. Otherwise, we're required to return
15436 * an error.
15437 */
15438
15439 if (!vm_map_range_check(map, start, end, &entry)) {
15440 vm_map_unlock_read(map);
15441 vm_page_stats_reusable.can_reuse_failure++;
15442 return KERN_INVALID_ADDRESS;
15443 }
15444
15445 /*
15446 * Examine each vm_map_entry_t in the range.
15447 */
15448 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15449 entry = entry->vme_next) {
15450 /*
15451 * Sanity check on the VM map entry.
15452 */
15453 if (!vm_map_entry_is_reusable(entry)) {
15454 vm_map_unlock_read(map);
15455 vm_page_stats_reusable.can_reuse_failure++;
15456 return KERN_INVALID_ADDRESS;
15457 }
15458 }
15459
15460 vm_map_unlock_read(map);
15461 vm_page_stats_reusable.can_reuse_success++;
15462 return KERN_SUCCESS;
15463 }
15464
15465
15466 #if MACH_ASSERT
15467 static kern_return_t
15468 vm_map_pageout(
15469 vm_map_t map,
15470 vm_map_offset_t start,
15471 vm_map_offset_t end)
15472 {
15473 vm_map_entry_t entry;
15474
15475 /*
15476 * The MADV_PAGEOUT operation doesn't require any changes to the
15477 * vm_map_entry_t's, so the read lock is sufficient.
15478 */
15479
15480 vm_map_lock_read(map);
15481
15482 /*
15483 * The madvise semantics require that the address range be fully
15484 * allocated with no holes. Otherwise, we're required to return
15485 * an error.
15486 */
15487
15488 if (!vm_map_range_check(map, start, end, &entry)) {
15489 vm_map_unlock_read(map);
15490 return KERN_INVALID_ADDRESS;
15491 }
15492
15493 /*
15494 * Examine each vm_map_entry_t in the range.
15495 */
15496 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15497 entry = entry->vme_next) {
15498 vm_object_t object;
15499
15500 /*
15501 * Sanity check on the VM map entry.
15502 */
15503 if (entry->is_sub_map) {
15504 vm_map_t submap;
15505 vm_map_offset_t submap_start;
15506 vm_map_offset_t submap_end;
15507 vm_map_entry_t submap_entry;
15508
15509 submap = VME_SUBMAP(entry);
15510 submap_start = VME_OFFSET(entry);
15511 submap_end = submap_start + (entry->vme_end -
15512 entry->vme_start);
15513
15514 vm_map_lock_read(submap);
15515
15516 if (!vm_map_range_check(submap,
15517 submap_start,
15518 submap_end,
15519 &submap_entry)) {
15520 vm_map_unlock_read(submap);
15521 vm_map_unlock_read(map);
15522 return KERN_INVALID_ADDRESS;
15523 }
15524
15525 object = VME_OBJECT(submap_entry);
15526 if (submap_entry->is_sub_map ||
15527 object == VM_OBJECT_NULL ||
15528 !object->internal) {
15529 vm_map_unlock_read(submap);
15530 continue;
15531 }
15532
15533 vm_object_pageout(object);
15534
15535 vm_map_unlock_read(submap);
15536 submap = VM_MAP_NULL;
15537 submap_entry = VM_MAP_ENTRY_NULL;
15538 continue;
15539 }
15540
15541 object = VME_OBJECT(entry);
15542 if (entry->is_sub_map ||
15543 object == VM_OBJECT_NULL ||
15544 !object->internal) {
15545 continue;
15546 }
15547
15548 vm_object_pageout(object);
15549 }
15550
15551 vm_map_unlock_read(map);
15552 return KERN_SUCCESS;
15553 }
15554 #endif /* MACH_ASSERT */
15555
15556
15557 /*
15558 * Routine: vm_map_entry_insert
15559 *
15560 * Description: This routine inserts a new vm_entry in a locked map.
15561 */
15562 vm_map_entry_t
15563 vm_map_entry_insert(
15564 vm_map_t map,
15565 vm_map_entry_t insp_entry,
15566 vm_map_offset_t start,
15567 vm_map_offset_t end,
15568 vm_object_t object,
15569 vm_object_offset_t offset,
15570 boolean_t needs_copy,
15571 boolean_t is_shared,
15572 boolean_t in_transition,
15573 vm_prot_t cur_protection,
15574 vm_prot_t max_protection,
15575 vm_behavior_t behavior,
15576 vm_inherit_t inheritance,
15577 unsigned wired_count,
15578 boolean_t no_cache,
15579 boolean_t permanent,
15580 boolean_t no_copy_on_read,
15581 unsigned int superpage_size,
15582 boolean_t clear_map_aligned,
15583 boolean_t is_submap,
15584 boolean_t used_for_jit,
15585 int alias)
15586 {
15587 vm_map_entry_t new_entry;
15588
15589 assert(insp_entry != (vm_map_entry_t)0);
15590 vm_map_lock_assert_exclusive(map);
15591
15592 #if DEVELOPMENT || DEBUG
15593 vm_object_offset_t end_offset = 0;
15594 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15595 #endif /* DEVELOPMENT || DEBUG */
15596
15597 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15598
15599 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15600 new_entry->map_aligned = TRUE;
15601 } else {
15602 new_entry->map_aligned = FALSE;
15603 }
15604 if (clear_map_aligned &&
15605 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15606 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15607 new_entry->map_aligned = FALSE;
15608 }
15609
15610 new_entry->vme_start = start;
15611 new_entry->vme_end = end;
15612 assert(page_aligned(new_entry->vme_start));
15613 assert(page_aligned(new_entry->vme_end));
15614 if (new_entry->map_aligned) {
15615 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15616 VM_MAP_PAGE_MASK(map)));
15617 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15618 VM_MAP_PAGE_MASK(map)));
15619 }
15620 assert(new_entry->vme_start < new_entry->vme_end);
15621
15622 VME_OBJECT_SET(new_entry, object);
15623 VME_OFFSET_SET(new_entry, offset);
15624 new_entry->is_shared = is_shared;
15625 new_entry->is_sub_map = is_submap;
15626 new_entry->needs_copy = needs_copy;
15627 new_entry->in_transition = in_transition;
15628 new_entry->needs_wakeup = FALSE;
15629 new_entry->inheritance = inheritance;
15630 new_entry->protection = cur_protection;
15631 new_entry->max_protection = max_protection;
15632 new_entry->behavior = behavior;
15633 new_entry->wired_count = wired_count;
15634 new_entry->user_wired_count = 0;
15635 if (is_submap) {
15636 /*
15637 * submap: "use_pmap" means "nested".
15638 * default: false.
15639 */
15640 new_entry->use_pmap = FALSE;
15641 } else {
15642 /*
15643 * object: "use_pmap" means "use pmap accounting" for footprint.
15644 * default: true.
15645 */
15646 new_entry->use_pmap = TRUE;
15647 }
15648 VME_ALIAS_SET(new_entry, alias);
15649 new_entry->zero_wired_pages = FALSE;
15650 new_entry->no_cache = no_cache;
15651 new_entry->permanent = permanent;
15652 if (superpage_size) {
15653 new_entry->superpage_size = TRUE;
15654 } else {
15655 new_entry->superpage_size = FALSE;
15656 }
15657 if (used_for_jit) {
15658 #if CONFIG_EMBEDDED
15659 if (!(map->jit_entry_exists))
15660 #endif /* CONFIG_EMBEDDED */
15661 {
15662 new_entry->used_for_jit = TRUE;
15663 map->jit_entry_exists = TRUE;
15664 }
15665 } else {
15666 new_entry->used_for_jit = FALSE;
15667 }
15668 new_entry->pmap_cs_associated = FALSE;
15669 new_entry->iokit_acct = FALSE;
15670 new_entry->vme_resilient_codesign = FALSE;
15671 new_entry->vme_resilient_media = FALSE;
15672 new_entry->vme_atomic = FALSE;
15673 new_entry->vme_no_copy_on_read = no_copy_on_read;
15674
15675 /*
15676 * Insert the new entry into the list.
15677 */
15678
15679 vm_map_store_entry_link(map, insp_entry, new_entry,
15680 VM_MAP_KERNEL_FLAGS_NONE);
15681 map->size += end - start;
15682
15683 /*
15684 * Update the free space hint and the lookup hint.
15685 */
15686
15687 SAVE_HINT_MAP_WRITE(map, new_entry);
15688 return new_entry;
15689 }
15690
15691 /*
15692 * Routine: vm_map_remap_extract
15693 *
15694 * Descritpion: This routine returns a vm_entry list from a map.
15695 */
15696 static kern_return_t
15697 vm_map_remap_extract(
15698 vm_map_t map,
15699 vm_map_offset_t addr,
15700 vm_map_size_t size,
15701 boolean_t copy,
15702 struct vm_map_header *map_header,
15703 vm_prot_t *cur_protection,
15704 vm_prot_t *max_protection,
15705 /* What, no behavior? */
15706 vm_inherit_t inheritance,
15707 boolean_t pageable,
15708 boolean_t same_map,
15709 vm_map_kernel_flags_t vmk_flags)
15710 {
15711 kern_return_t result;
15712 vm_map_size_t mapped_size;
15713 vm_map_size_t tmp_size;
15714 vm_map_entry_t src_entry; /* result of last map lookup */
15715 vm_map_entry_t new_entry;
15716 vm_object_offset_t offset;
15717 vm_map_offset_t map_address;
15718 vm_map_offset_t src_start; /* start of entry to map */
15719 vm_map_offset_t src_end; /* end of region to be mapped */
15720 vm_object_t object;
15721 vm_map_version_t version;
15722 boolean_t src_needs_copy;
15723 boolean_t new_entry_needs_copy;
15724 vm_map_entry_t saved_src_entry;
15725 boolean_t src_entry_was_wired;
15726 vm_prot_t max_prot_for_prot_copy;
15727
15728 assert(map != VM_MAP_NULL);
15729 assert(size != 0);
15730 assert(size == vm_map_round_page(size, PAGE_MASK));
15731 assert(inheritance == VM_INHERIT_NONE ||
15732 inheritance == VM_INHERIT_COPY ||
15733 inheritance == VM_INHERIT_SHARE);
15734
15735 /*
15736 * Compute start and end of region.
15737 */
15738 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15739 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15740
15741
15742 /*
15743 * Initialize map_header.
15744 */
15745 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15746 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15747 map_header->nentries = 0;
15748 map_header->entries_pageable = pageable;
15749 map_header->page_shift = PAGE_SHIFT;
15750
15751 vm_map_store_init( map_header );
15752
15753 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15754 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15755 } else {
15756 max_prot_for_prot_copy = VM_PROT_NONE;
15757 }
15758 *cur_protection = VM_PROT_ALL;
15759 *max_protection = VM_PROT_ALL;
15760
15761 map_address = 0;
15762 mapped_size = 0;
15763 result = KERN_SUCCESS;
15764
15765 /*
15766 * The specified source virtual space might correspond to
15767 * multiple map entries, need to loop on them.
15768 */
15769 vm_map_lock(map);
15770 while (mapped_size != size) {
15771 vm_map_size_t entry_size;
15772
15773 /*
15774 * Find the beginning of the region.
15775 */
15776 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15777 result = KERN_INVALID_ADDRESS;
15778 break;
15779 }
15780
15781 if (src_start < src_entry->vme_start ||
15782 (mapped_size && src_start != src_entry->vme_start)) {
15783 result = KERN_INVALID_ADDRESS;
15784 break;
15785 }
15786
15787 tmp_size = size - mapped_size;
15788 if (src_end > src_entry->vme_end) {
15789 tmp_size -= (src_end - src_entry->vme_end);
15790 }
15791
15792 entry_size = (vm_map_size_t)(src_entry->vme_end -
15793 src_entry->vme_start);
15794
15795 if (src_entry->is_sub_map) {
15796 vm_map_reference(VME_SUBMAP(src_entry));
15797 object = VM_OBJECT_NULL;
15798 } else {
15799 object = VME_OBJECT(src_entry);
15800 if (src_entry->iokit_acct) {
15801 /*
15802 * This entry uses "IOKit accounting".
15803 */
15804 } else if (object != VM_OBJECT_NULL &&
15805 (object->purgable != VM_PURGABLE_DENY ||
15806 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15807 /*
15808 * Purgeable objects have their own accounting:
15809 * no pmap accounting for them.
15810 */
15811 assertf(!src_entry->use_pmap,
15812 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15813 map,
15814 src_entry,
15815 (uint64_t)src_entry->vme_start,
15816 (uint64_t)src_entry->vme_end,
15817 src_entry->protection,
15818 src_entry->max_protection,
15819 VME_ALIAS(src_entry));
15820 } else {
15821 /*
15822 * Not IOKit or purgeable:
15823 * must be accounted by pmap stats.
15824 */
15825 assertf(src_entry->use_pmap,
15826 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15827 map,
15828 src_entry,
15829 (uint64_t)src_entry->vme_start,
15830 (uint64_t)src_entry->vme_end,
15831 src_entry->protection,
15832 src_entry->max_protection,
15833 VME_ALIAS(src_entry));
15834 }
15835
15836 if (object == VM_OBJECT_NULL) {
15837 object = vm_object_allocate(entry_size);
15838 VME_OFFSET_SET(src_entry, 0);
15839 VME_OBJECT_SET(src_entry, object);
15840 assert(src_entry->use_pmap);
15841 } else if (object->copy_strategy !=
15842 MEMORY_OBJECT_COPY_SYMMETRIC) {
15843 /*
15844 * We are already using an asymmetric
15845 * copy, and therefore we already have
15846 * the right object.
15847 */
15848 assert(!src_entry->needs_copy);
15849 } else if (src_entry->needs_copy || object->shadowed ||
15850 (object->internal && !object->true_share &&
15851 !src_entry->is_shared &&
15852 object->vo_size > entry_size)) {
15853 VME_OBJECT_SHADOW(src_entry, entry_size);
15854 assert(src_entry->use_pmap);
15855
15856 if (!src_entry->needs_copy &&
15857 (src_entry->protection & VM_PROT_WRITE)) {
15858 vm_prot_t prot;
15859
15860 assert(!pmap_has_prot_policy(src_entry->protection));
15861
15862 prot = src_entry->protection & ~VM_PROT_WRITE;
15863
15864 if (override_nx(map,
15865 VME_ALIAS(src_entry))
15866 && prot) {
15867 prot |= VM_PROT_EXECUTE;
15868 }
15869
15870 assert(!pmap_has_prot_policy(prot));
15871
15872 if (map->mapped_in_other_pmaps) {
15873 vm_object_pmap_protect(
15874 VME_OBJECT(src_entry),
15875 VME_OFFSET(src_entry),
15876 entry_size,
15877 PMAP_NULL,
15878 src_entry->vme_start,
15879 prot);
15880 } else {
15881 pmap_protect(vm_map_pmap(map),
15882 src_entry->vme_start,
15883 src_entry->vme_end,
15884 prot);
15885 }
15886 }
15887
15888 object = VME_OBJECT(src_entry);
15889 src_entry->needs_copy = FALSE;
15890 }
15891
15892
15893 vm_object_lock(object);
15894 vm_object_reference_locked(object); /* object ref. for new entry */
15895 if (object->copy_strategy ==
15896 MEMORY_OBJECT_COPY_SYMMETRIC) {
15897 object->copy_strategy =
15898 MEMORY_OBJECT_COPY_DELAY;
15899 }
15900 vm_object_unlock(object);
15901 }
15902
15903 offset = (VME_OFFSET(src_entry) +
15904 (src_start - src_entry->vme_start));
15905
15906 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15907 vm_map_entry_copy(new_entry, src_entry);
15908 if (new_entry->is_sub_map) {
15909 /* clr address space specifics */
15910 new_entry->use_pmap = FALSE;
15911 } else if (copy) {
15912 /*
15913 * We're dealing with a copy-on-write operation,
15914 * so the resulting mapping should not inherit the
15915 * original mapping's accounting settings.
15916 * "use_pmap" should be reset to its default (TRUE)
15917 * so that the new mapping gets accounted for in
15918 * the task's memory footprint.
15919 */
15920 new_entry->use_pmap = TRUE;
15921 }
15922 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15923 assert(!new_entry->iokit_acct);
15924
15925 new_entry->map_aligned = FALSE;
15926
15927 new_entry->vme_start = map_address;
15928 new_entry->vme_end = map_address + tmp_size;
15929 assert(new_entry->vme_start < new_entry->vme_end);
15930 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15931 /*
15932 * Remapping for vm_map_protect(VM_PROT_COPY)
15933 * to convert a read-only mapping into a
15934 * copy-on-write version of itself but
15935 * with write access:
15936 * keep the original inheritance and add
15937 * VM_PROT_WRITE to the max protection.
15938 */
15939 new_entry->inheritance = src_entry->inheritance;
15940 new_entry->protection &= max_prot_for_prot_copy;
15941 new_entry->max_protection |= VM_PROT_WRITE;
15942 } else {
15943 new_entry->inheritance = inheritance;
15944 }
15945 VME_OFFSET_SET(new_entry, offset);
15946
15947 /*
15948 * The new region has to be copied now if required.
15949 */
15950 RestartCopy:
15951 if (!copy) {
15952 if (src_entry->used_for_jit == TRUE) {
15953 if (same_map) {
15954 } else {
15955 #if CONFIG_EMBEDDED
15956 /*
15957 * Cannot allow an entry describing a JIT
15958 * region to be shared across address spaces.
15959 */
15960 result = KERN_INVALID_ARGUMENT;
15961 break;
15962 #endif /* CONFIG_EMBEDDED */
15963 }
15964 }
15965
15966 src_entry->is_shared = TRUE;
15967 new_entry->is_shared = TRUE;
15968 if (!(new_entry->is_sub_map)) {
15969 new_entry->needs_copy = FALSE;
15970 }
15971 } else if (src_entry->is_sub_map) {
15972 /* make this a COW sub_map if not already */
15973 assert(new_entry->wired_count == 0);
15974 new_entry->needs_copy = TRUE;
15975 object = VM_OBJECT_NULL;
15976 } else if (src_entry->wired_count == 0 &&
15977 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
15978 VME_OFFSET(new_entry),
15979 (new_entry->vme_end -
15980 new_entry->vme_start),
15981 &src_needs_copy,
15982 &new_entry_needs_copy)) {
15983 new_entry->needs_copy = new_entry_needs_copy;
15984 new_entry->is_shared = FALSE;
15985 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15986
15987 /*
15988 * Handle copy_on_write semantics.
15989 */
15990 if (src_needs_copy && !src_entry->needs_copy) {
15991 vm_prot_t prot;
15992
15993 assert(!pmap_has_prot_policy(src_entry->protection));
15994
15995 prot = src_entry->protection & ~VM_PROT_WRITE;
15996
15997 if (override_nx(map,
15998 VME_ALIAS(src_entry))
15999 && prot) {
16000 prot |= VM_PROT_EXECUTE;
16001 }
16002
16003 assert(!pmap_has_prot_policy(prot));
16004
16005 vm_object_pmap_protect(object,
16006 offset,
16007 entry_size,
16008 ((src_entry->is_shared
16009 || map->mapped_in_other_pmaps) ?
16010 PMAP_NULL : map->pmap),
16011 src_entry->vme_start,
16012 prot);
16013
16014 assert(src_entry->wired_count == 0);
16015 src_entry->needs_copy = TRUE;
16016 }
16017 /*
16018 * Throw away the old object reference of the new entry.
16019 */
16020 vm_object_deallocate(object);
16021 } else {
16022 new_entry->is_shared = FALSE;
16023 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16024
16025 src_entry_was_wired = (src_entry->wired_count > 0);
16026 saved_src_entry = src_entry;
16027 src_entry = VM_MAP_ENTRY_NULL;
16028
16029 /*
16030 * The map can be safely unlocked since we
16031 * already hold a reference on the object.
16032 *
16033 * Record the timestamp of the map for later
16034 * verification, and unlock the map.
16035 */
16036 version.main_timestamp = map->timestamp;
16037 vm_map_unlock(map); /* Increments timestamp once! */
16038
16039 /*
16040 * Perform the copy.
16041 */
16042 if (src_entry_was_wired > 0) {
16043 vm_object_lock(object);
16044 result = vm_object_copy_slowly(
16045 object,
16046 offset,
16047 (new_entry->vme_end -
16048 new_entry->vme_start),
16049 THREAD_UNINT,
16050 VME_OBJECT_PTR(new_entry));
16051
16052 VME_OFFSET_SET(new_entry, 0);
16053 new_entry->needs_copy = FALSE;
16054 } else {
16055 vm_object_offset_t new_offset;
16056
16057 new_offset = VME_OFFSET(new_entry);
16058 result = vm_object_copy_strategically(
16059 object,
16060 offset,
16061 (new_entry->vme_end -
16062 new_entry->vme_start),
16063 VME_OBJECT_PTR(new_entry),
16064 &new_offset,
16065 &new_entry_needs_copy);
16066 if (new_offset != VME_OFFSET(new_entry)) {
16067 VME_OFFSET_SET(new_entry, new_offset);
16068 }
16069
16070 new_entry->needs_copy = new_entry_needs_copy;
16071 }
16072
16073 /*
16074 * Throw away the old object reference of the new entry.
16075 */
16076 vm_object_deallocate(object);
16077
16078 if (result != KERN_SUCCESS &&
16079 result != KERN_MEMORY_RESTART_COPY) {
16080 _vm_map_entry_dispose(map_header, new_entry);
16081 vm_map_lock(map);
16082 break;
16083 }
16084
16085 /*
16086 * Verify that the map has not substantially
16087 * changed while the copy was being made.
16088 */
16089
16090 vm_map_lock(map);
16091 if (version.main_timestamp + 1 != map->timestamp) {
16092 /*
16093 * Simple version comparison failed.
16094 *
16095 * Retry the lookup and verify that the
16096 * same object/offset are still present.
16097 */
16098 saved_src_entry = VM_MAP_ENTRY_NULL;
16099 vm_object_deallocate(VME_OBJECT(new_entry));
16100 _vm_map_entry_dispose(map_header, new_entry);
16101 if (result == KERN_MEMORY_RESTART_COPY) {
16102 result = KERN_SUCCESS;
16103 }
16104 continue;
16105 }
16106 /* map hasn't changed: src_entry is still valid */
16107 src_entry = saved_src_entry;
16108 saved_src_entry = VM_MAP_ENTRY_NULL;
16109
16110 if (result == KERN_MEMORY_RESTART_COPY) {
16111 vm_object_reference(object);
16112 goto RestartCopy;
16113 }
16114 }
16115
16116 _vm_map_store_entry_link(map_header,
16117 map_header->links.prev, new_entry);
16118
16119 /*Protections for submap mapping are irrelevant here*/
16120 if (!src_entry->is_sub_map) {
16121 *cur_protection &= src_entry->protection;
16122 *max_protection &= src_entry->max_protection;
16123 }
16124 map_address += tmp_size;
16125 mapped_size += tmp_size;
16126 src_start += tmp_size;
16127 } /* end while */
16128
16129 vm_map_unlock(map);
16130 if (result != KERN_SUCCESS) {
16131 /*
16132 * Free all allocated elements.
16133 */
16134 for (src_entry = map_header->links.next;
16135 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16136 src_entry = new_entry) {
16137 new_entry = src_entry->vme_next;
16138 _vm_map_store_entry_unlink(map_header, src_entry);
16139 if (src_entry->is_sub_map) {
16140 vm_map_deallocate(VME_SUBMAP(src_entry));
16141 } else {
16142 vm_object_deallocate(VME_OBJECT(src_entry));
16143 }
16144 _vm_map_entry_dispose(map_header, src_entry);
16145 }
16146 }
16147 return result;
16148 }
16149
16150 /*
16151 * Routine: vm_remap
16152 *
16153 * Map portion of a task's address space.
16154 * Mapped region must not overlap more than
16155 * one vm memory object. Protections and
16156 * inheritance attributes remain the same
16157 * as in the original task and are out parameters.
16158 * Source and Target task can be identical
16159 * Other attributes are identical as for vm_map()
16160 */
16161 kern_return_t
16162 vm_map_remap(
16163 vm_map_t target_map,
16164 vm_map_address_t *address,
16165 vm_map_size_t size,
16166 vm_map_offset_t mask,
16167 int flags,
16168 vm_map_kernel_flags_t vmk_flags,
16169 vm_tag_t tag,
16170 vm_map_t src_map,
16171 vm_map_offset_t memory_address,
16172 boolean_t copy,
16173 vm_prot_t *cur_protection,
16174 vm_prot_t *max_protection,
16175 vm_inherit_t inheritance)
16176 {
16177 kern_return_t result;
16178 vm_map_entry_t entry;
16179 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16180 vm_map_entry_t new_entry;
16181 struct vm_map_header map_header;
16182 vm_map_offset_t offset_in_mapping;
16183
16184 if (target_map == VM_MAP_NULL) {
16185 return KERN_INVALID_ARGUMENT;
16186 }
16187
16188 switch (inheritance) {
16189 case VM_INHERIT_NONE:
16190 case VM_INHERIT_COPY:
16191 case VM_INHERIT_SHARE:
16192 if (size != 0 && src_map != VM_MAP_NULL) {
16193 break;
16194 }
16195 /*FALL THRU*/
16196 default:
16197 return KERN_INVALID_ARGUMENT;
16198 }
16199
16200 /*
16201 * If the user is requesting that we return the address of the
16202 * first byte of the data (rather than the base of the page),
16203 * then we use different rounding semantics: specifically,
16204 * we assume that (memory_address, size) describes a region
16205 * all of whose pages we must cover, rather than a base to be truncated
16206 * down and a size to be added to that base. So we figure out
16207 * the highest page that the requested region includes and make
16208 * sure that the size will cover it.
16209 *
16210 * The key example we're worried about it is of the form:
16211 *
16212 * memory_address = 0x1ff0, size = 0x20
16213 *
16214 * With the old semantics, we round down the memory_address to 0x1000
16215 * and round up the size to 0x1000, resulting in our covering *only*
16216 * page 0x1000. With the new semantics, we'd realize that the region covers
16217 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16218 * 0x1000 and page 0x2000 in the region we remap.
16219 */
16220 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16221 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16222 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16223 } else {
16224 size = vm_map_round_page(size, PAGE_MASK);
16225 }
16226 if (size == 0) {
16227 return KERN_INVALID_ARGUMENT;
16228 }
16229
16230 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16231 /* must be copy-on-write to be "media resilient" */
16232 if (!copy) {
16233 return KERN_INVALID_ARGUMENT;
16234 }
16235 }
16236
16237 result = vm_map_remap_extract(src_map, memory_address,
16238 size, copy, &map_header,
16239 cur_protection,
16240 max_protection,
16241 inheritance,
16242 target_map->hdr.entries_pageable,
16243 src_map == target_map,
16244 vmk_flags);
16245
16246 if (result != KERN_SUCCESS) {
16247 return result;
16248 }
16249
16250 /*
16251 * Allocate/check a range of free virtual address
16252 * space for the target
16253 */
16254 *address = vm_map_trunc_page(*address,
16255 VM_MAP_PAGE_MASK(target_map));
16256 vm_map_lock(target_map);
16257 result = vm_map_remap_range_allocate(target_map, address, size,
16258 mask, flags, vmk_flags, tag,
16259 &insp_entry);
16260
16261 for (entry = map_header.links.next;
16262 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16263 entry = new_entry) {
16264 new_entry = entry->vme_next;
16265 _vm_map_store_entry_unlink(&map_header, entry);
16266 if (result == KERN_SUCCESS) {
16267 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16268 /* no codesigning -> read-only access */
16269 entry->max_protection = VM_PROT_READ;
16270 entry->protection = VM_PROT_READ;
16271 entry->vme_resilient_codesign = TRUE;
16272 }
16273 entry->vme_start += *address;
16274 entry->vme_end += *address;
16275 assert(!entry->map_aligned);
16276 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16277 !entry->is_sub_map &&
16278 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16279 VME_OBJECT(entry)->internal)) {
16280 entry->vme_resilient_media = TRUE;
16281 }
16282 vm_map_store_entry_link(target_map, insp_entry, entry,
16283 vmk_flags);
16284 insp_entry = entry;
16285 } else {
16286 if (!entry->is_sub_map) {
16287 vm_object_deallocate(VME_OBJECT(entry));
16288 } else {
16289 vm_map_deallocate(VME_SUBMAP(entry));
16290 }
16291 _vm_map_entry_dispose(&map_header, entry);
16292 }
16293 }
16294
16295 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16296 *cur_protection = VM_PROT_READ;
16297 *max_protection = VM_PROT_READ;
16298 }
16299
16300 if (target_map->disable_vmentry_reuse == TRUE) {
16301 assert(!target_map->is_nested_map);
16302 if (target_map->highest_entry_end < insp_entry->vme_end) {
16303 target_map->highest_entry_end = insp_entry->vme_end;
16304 }
16305 }
16306
16307 if (result == KERN_SUCCESS) {
16308 target_map->size += size;
16309 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16310
16311 #if PMAP_CS
16312 if (*max_protection & VM_PROT_EXECUTE) {
16313 vm_map_address_t region_start = 0, region_size = 0;
16314 struct pmap_cs_code_directory *region_cd = NULL;
16315 vm_map_address_t base = 0;
16316 struct pmap_cs_lookup_results results = {};
16317 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16318 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16319
16320 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16321 region_size = results.region_size;
16322 region_start = results.region_start;
16323 region_cd = results.region_cd_entry;
16324 base = results.base;
16325
16326 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16327 *cur_protection = VM_PROT_READ;
16328 *max_protection = VM_PROT_READ;
16329 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16330 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16331 page_addr, page_addr + assoc_size, *address,
16332 region_start, region_size,
16333 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16334 );
16335 }
16336 }
16337 #endif
16338 }
16339 vm_map_unlock(target_map);
16340
16341 if (result == KERN_SUCCESS && target_map->wiring_required) {
16342 result = vm_map_wire_kernel(target_map, *address,
16343 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16344 TRUE);
16345 }
16346
16347 /*
16348 * If requested, return the address of the data pointed to by the
16349 * request, rather than the base of the resulting page.
16350 */
16351 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16352 *address += offset_in_mapping;
16353 }
16354
16355 return result;
16356 }
16357
16358 /*
16359 * Routine: vm_map_remap_range_allocate
16360 *
16361 * Description:
16362 * Allocate a range in the specified virtual address map.
16363 * returns the address and the map entry just before the allocated
16364 * range
16365 *
16366 * Map must be locked.
16367 */
16368
16369 static kern_return_t
16370 vm_map_remap_range_allocate(
16371 vm_map_t map,
16372 vm_map_address_t *address, /* IN/OUT */
16373 vm_map_size_t size,
16374 vm_map_offset_t mask,
16375 int flags,
16376 vm_map_kernel_flags_t vmk_flags,
16377 __unused vm_tag_t tag,
16378 vm_map_entry_t *map_entry) /* OUT */
16379 {
16380 vm_map_entry_t entry;
16381 vm_map_offset_t start;
16382 vm_map_offset_t end;
16383 vm_map_offset_t desired_empty_end;
16384 kern_return_t kr;
16385 vm_map_entry_t hole_entry;
16386
16387 StartAgain:;
16388
16389 start = *address;
16390
16391 if (flags & VM_FLAGS_ANYWHERE) {
16392 if (flags & VM_FLAGS_RANDOM_ADDR) {
16393 /*
16394 * Get a random start address.
16395 */
16396 kr = vm_map_random_address_for_size(map, address, size);
16397 if (kr != KERN_SUCCESS) {
16398 return kr;
16399 }
16400 start = *address;
16401 }
16402
16403 /*
16404 * Calculate the first possible address.
16405 */
16406
16407 if (start < map->min_offset) {
16408 start = map->min_offset;
16409 }
16410 if (start > map->max_offset) {
16411 return KERN_NO_SPACE;
16412 }
16413
16414 /*
16415 * Look for the first possible address;
16416 * if there's already something at this
16417 * address, we have to start after it.
16418 */
16419
16420 if (map->disable_vmentry_reuse == TRUE) {
16421 VM_MAP_HIGHEST_ENTRY(map, entry, start);
16422 } else {
16423 if (map->holelistenabled) {
16424 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16425
16426 if (hole_entry == NULL) {
16427 /*
16428 * No more space in the map?
16429 */
16430 return KERN_NO_SPACE;
16431 } else {
16432 boolean_t found_hole = FALSE;
16433
16434 do {
16435 if (hole_entry->vme_start >= start) {
16436 start = hole_entry->vme_start;
16437 found_hole = TRUE;
16438 break;
16439 }
16440
16441 if (hole_entry->vme_end > start) {
16442 found_hole = TRUE;
16443 break;
16444 }
16445 hole_entry = hole_entry->vme_next;
16446 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16447
16448 if (found_hole == FALSE) {
16449 return KERN_NO_SPACE;
16450 }
16451
16452 entry = hole_entry;
16453 }
16454 } else {
16455 assert(first_free_is_valid(map));
16456 if (start == map->min_offset) {
16457 if ((entry = map->first_free) != vm_map_to_entry(map)) {
16458 start = entry->vme_end;
16459 }
16460 } else {
16461 vm_map_entry_t tmp_entry;
16462 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16463 start = tmp_entry->vme_end;
16464 }
16465 entry = tmp_entry;
16466 }
16467 }
16468 start = vm_map_round_page(start,
16469 VM_MAP_PAGE_MASK(map));
16470 }
16471
16472 /*
16473 * In any case, the "entry" always precedes
16474 * the proposed new region throughout the
16475 * loop:
16476 */
16477
16478 while (TRUE) {
16479 vm_map_entry_t next;
16480
16481 /*
16482 * Find the end of the proposed new region.
16483 * Be sure we didn't go beyond the end, or
16484 * wrap around the address.
16485 */
16486
16487 end = ((start + mask) & ~mask);
16488 end = vm_map_round_page(end,
16489 VM_MAP_PAGE_MASK(map));
16490 if (end < start) {
16491 return KERN_NO_SPACE;
16492 }
16493 start = end;
16494 end += size;
16495
16496 /* We want an entire page of empty space, but don't increase the allocation size. */
16497 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16498
16499 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16500 if (map->wait_for_space) {
16501 if (size <= (map->max_offset -
16502 map->min_offset)) {
16503 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16504 vm_map_unlock(map);
16505 thread_block(THREAD_CONTINUE_NULL);
16506 vm_map_lock(map);
16507 goto StartAgain;
16508 }
16509 }
16510
16511 return KERN_NO_SPACE;
16512 }
16513
16514 next = entry->vme_next;
16515
16516 if (map->holelistenabled) {
16517 if (entry->vme_end >= desired_empty_end) {
16518 break;
16519 }
16520 } else {
16521 /*
16522 * If there are no more entries, we must win.
16523 *
16524 * OR
16525 *
16526 * If there is another entry, it must be
16527 * after the end of the potential new region.
16528 */
16529
16530 if (next == vm_map_to_entry(map)) {
16531 break;
16532 }
16533
16534 if (next->vme_start >= desired_empty_end) {
16535 break;
16536 }
16537 }
16538
16539 /*
16540 * Didn't fit -- move to the next entry.
16541 */
16542
16543 entry = next;
16544
16545 if (map->holelistenabled) {
16546 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16547 /*
16548 * Wrapped around
16549 */
16550 return KERN_NO_SPACE;
16551 }
16552 start = entry->vme_start;
16553 } else {
16554 start = entry->vme_end;
16555 }
16556 }
16557
16558 if (map->holelistenabled) {
16559 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16560 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16561 }
16562 }
16563
16564 *address = start;
16565 } else {
16566 vm_map_entry_t temp_entry;
16567
16568 /*
16569 * Verify that:
16570 * the address doesn't itself violate
16571 * the mask requirement.
16572 */
16573
16574 if ((start & mask) != 0) {
16575 return KERN_NO_SPACE;
16576 }
16577
16578
16579 /*
16580 * ... the address is within bounds
16581 */
16582
16583 end = start + size;
16584
16585 if ((start < map->min_offset) ||
16586 (end > map->max_offset) ||
16587 (start >= end)) {
16588 return KERN_INVALID_ADDRESS;
16589 }
16590
16591 /*
16592 * If we're asked to overwrite whatever was mapped in that
16593 * range, first deallocate that range.
16594 */
16595 if (flags & VM_FLAGS_OVERWRITE) {
16596 vm_map_t zap_map;
16597 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16598
16599 /*
16600 * We use a "zap_map" to avoid having to unlock
16601 * the "map" in vm_map_delete(), which would compromise
16602 * the atomicity of the "deallocate" and then "remap"
16603 * combination.
16604 */
16605 zap_map = vm_map_create(PMAP_NULL,
16606 start,
16607 end,
16608 map->hdr.entries_pageable);
16609 if (zap_map == VM_MAP_NULL) {
16610 return KERN_RESOURCE_SHORTAGE;
16611 }
16612 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16613 vm_map_disable_hole_optimization(zap_map);
16614
16615 if (vmk_flags.vmkf_overwrite_immutable) {
16616 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16617 }
16618 kr = vm_map_delete(map, start, end,
16619 remove_flags,
16620 zap_map);
16621 if (kr == KERN_SUCCESS) {
16622 vm_map_destroy(zap_map,
16623 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16624 zap_map = VM_MAP_NULL;
16625 }
16626 }
16627
16628 /*
16629 * ... the starting address isn't allocated
16630 */
16631
16632 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16633 return KERN_NO_SPACE;
16634 }
16635
16636 entry = temp_entry;
16637
16638 /*
16639 * ... the next region doesn't overlap the
16640 * end point.
16641 */
16642
16643 if ((entry->vme_next != vm_map_to_entry(map)) &&
16644 (entry->vme_next->vme_start < end)) {
16645 return KERN_NO_SPACE;
16646 }
16647 }
16648 *map_entry = entry;
16649 return KERN_SUCCESS;
16650 }
16651
16652 /*
16653 * vm_map_switch:
16654 *
16655 * Set the address map for the current thread to the specified map
16656 */
16657
16658 vm_map_t
16659 vm_map_switch(
16660 vm_map_t map)
16661 {
16662 int mycpu;
16663 thread_t thread = current_thread();
16664 vm_map_t oldmap = thread->map;
16665
16666 mp_disable_preemption();
16667 mycpu = cpu_number();
16668
16669 /*
16670 * Deactivate the current map and activate the requested map
16671 */
16672 PMAP_SWITCH_USER(thread, map, mycpu);
16673
16674 mp_enable_preemption();
16675 return oldmap;
16676 }
16677
16678
16679 /*
16680 * Routine: vm_map_write_user
16681 *
16682 * Description:
16683 * Copy out data from a kernel space into space in the
16684 * destination map. The space must already exist in the
16685 * destination map.
16686 * NOTE: This routine should only be called by threads
16687 * which can block on a page fault. i.e. kernel mode user
16688 * threads.
16689 *
16690 */
16691 kern_return_t
16692 vm_map_write_user(
16693 vm_map_t map,
16694 void *src_p,
16695 vm_map_address_t dst_addr,
16696 vm_size_t size)
16697 {
16698 kern_return_t kr = KERN_SUCCESS;
16699
16700 if (current_map() == map) {
16701 if (copyout(src_p, dst_addr, size)) {
16702 kr = KERN_INVALID_ADDRESS;
16703 }
16704 } else {
16705 vm_map_t oldmap;
16706
16707 /* take on the identity of the target map while doing */
16708 /* the transfer */
16709
16710 vm_map_reference(map);
16711 oldmap = vm_map_switch(map);
16712 if (copyout(src_p, dst_addr, size)) {
16713 kr = KERN_INVALID_ADDRESS;
16714 }
16715 vm_map_switch(oldmap);
16716 vm_map_deallocate(map);
16717 }
16718 return kr;
16719 }
16720
16721 /*
16722 * Routine: vm_map_read_user
16723 *
16724 * Description:
16725 * Copy in data from a user space source map into the
16726 * kernel map. The space must already exist in the
16727 * kernel map.
16728 * NOTE: This routine should only be called by threads
16729 * which can block on a page fault. i.e. kernel mode user
16730 * threads.
16731 *
16732 */
16733 kern_return_t
16734 vm_map_read_user(
16735 vm_map_t map,
16736 vm_map_address_t src_addr,
16737 void *dst_p,
16738 vm_size_t size)
16739 {
16740 kern_return_t kr = KERN_SUCCESS;
16741
16742 if (current_map() == map) {
16743 if (copyin(src_addr, dst_p, size)) {
16744 kr = KERN_INVALID_ADDRESS;
16745 }
16746 } else {
16747 vm_map_t oldmap;
16748
16749 /* take on the identity of the target map while doing */
16750 /* the transfer */
16751
16752 vm_map_reference(map);
16753 oldmap = vm_map_switch(map);
16754 if (copyin(src_addr, dst_p, size)) {
16755 kr = KERN_INVALID_ADDRESS;
16756 }
16757 vm_map_switch(oldmap);
16758 vm_map_deallocate(map);
16759 }
16760 return kr;
16761 }
16762
16763
16764 /*
16765 * vm_map_check_protection:
16766 *
16767 * Assert that the target map allows the specified
16768 * privilege on the entire address region given.
16769 * The entire region must be allocated.
16770 */
16771 boolean_t
16772 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16773 vm_map_offset_t end, vm_prot_t protection)
16774 {
16775 vm_map_entry_t entry;
16776 vm_map_entry_t tmp_entry;
16777
16778 vm_map_lock(map);
16779
16780 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16781 vm_map_unlock(map);
16782 return FALSE;
16783 }
16784
16785 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16786 vm_map_unlock(map);
16787 return FALSE;
16788 }
16789
16790 entry = tmp_entry;
16791
16792 while (start < end) {
16793 if (entry == vm_map_to_entry(map)) {
16794 vm_map_unlock(map);
16795 return FALSE;
16796 }
16797
16798 /*
16799 * No holes allowed!
16800 */
16801
16802 if (start < entry->vme_start) {
16803 vm_map_unlock(map);
16804 return FALSE;
16805 }
16806
16807 /*
16808 * Check protection associated with entry.
16809 */
16810
16811 if ((entry->protection & protection) != protection) {
16812 vm_map_unlock(map);
16813 return FALSE;
16814 }
16815
16816 /* go to next entry */
16817
16818 start = entry->vme_end;
16819 entry = entry->vme_next;
16820 }
16821 vm_map_unlock(map);
16822 return TRUE;
16823 }
16824
16825 kern_return_t
16826 vm_map_purgable_control(
16827 vm_map_t map,
16828 vm_map_offset_t address,
16829 vm_purgable_t control,
16830 int *state)
16831 {
16832 vm_map_entry_t entry;
16833 vm_object_t object;
16834 kern_return_t kr;
16835 boolean_t was_nonvolatile;
16836
16837 /*
16838 * Vet all the input parameters and current type and state of the
16839 * underlaying object. Return with an error if anything is amiss.
16840 */
16841 if (map == VM_MAP_NULL) {
16842 return KERN_INVALID_ARGUMENT;
16843 }
16844
16845 if (control != VM_PURGABLE_SET_STATE &&
16846 control != VM_PURGABLE_GET_STATE &&
16847 control != VM_PURGABLE_PURGE_ALL &&
16848 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16849 return KERN_INVALID_ARGUMENT;
16850 }
16851
16852 if (control == VM_PURGABLE_PURGE_ALL) {
16853 vm_purgeable_object_purge_all();
16854 return KERN_SUCCESS;
16855 }
16856
16857 if ((control == VM_PURGABLE_SET_STATE ||
16858 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16859 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16860 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16861 return KERN_INVALID_ARGUMENT;
16862 }
16863
16864 vm_map_lock_read(map);
16865
16866 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16867 /*
16868 * Must pass a valid non-submap address.
16869 */
16870 vm_map_unlock_read(map);
16871 return KERN_INVALID_ADDRESS;
16872 }
16873
16874 if ((entry->protection & VM_PROT_WRITE) == 0) {
16875 /*
16876 * Can't apply purgable controls to something you can't write.
16877 */
16878 vm_map_unlock_read(map);
16879 return KERN_PROTECTION_FAILURE;
16880 }
16881
16882 object = VME_OBJECT(entry);
16883 if (object == VM_OBJECT_NULL ||
16884 object->purgable == VM_PURGABLE_DENY) {
16885 /*
16886 * Object must already be present and be purgeable.
16887 */
16888 vm_map_unlock_read(map);
16889 return KERN_INVALID_ARGUMENT;
16890 }
16891
16892 vm_object_lock(object);
16893
16894 #if 00
16895 if (VME_OFFSET(entry) != 0 ||
16896 entry->vme_end - entry->vme_start != object->vo_size) {
16897 /*
16898 * Can only apply purgable controls to the whole (existing)
16899 * object at once.
16900 */
16901 vm_map_unlock_read(map);
16902 vm_object_unlock(object);
16903 return KERN_INVALID_ARGUMENT;
16904 }
16905 #endif
16906
16907 assert(!entry->is_sub_map);
16908 assert(!entry->use_pmap); /* purgeable has its own accounting */
16909
16910 vm_map_unlock_read(map);
16911
16912 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16913
16914 kr = vm_object_purgable_control(object, control, state);
16915
16916 if (was_nonvolatile &&
16917 object->purgable != VM_PURGABLE_NONVOLATILE &&
16918 map->pmap == kernel_pmap) {
16919 #if DEBUG
16920 object->vo_purgeable_volatilizer = kernel_task;
16921 #endif /* DEBUG */
16922 }
16923
16924 vm_object_unlock(object);
16925
16926 return kr;
16927 }
16928
16929 kern_return_t
16930 vm_map_page_query_internal(
16931 vm_map_t target_map,
16932 vm_map_offset_t offset,
16933 int *disposition,
16934 int *ref_count)
16935 {
16936 kern_return_t kr;
16937 vm_page_info_basic_data_t info;
16938 mach_msg_type_number_t count;
16939
16940 count = VM_PAGE_INFO_BASIC_COUNT;
16941 kr = vm_map_page_info(target_map,
16942 offset,
16943 VM_PAGE_INFO_BASIC,
16944 (vm_page_info_t) &info,
16945 &count);
16946 if (kr == KERN_SUCCESS) {
16947 *disposition = info.disposition;
16948 *ref_count = info.ref_count;
16949 } else {
16950 *disposition = 0;
16951 *ref_count = 0;
16952 }
16953
16954 return kr;
16955 }
16956
16957 kern_return_t
16958 vm_map_page_info(
16959 vm_map_t map,
16960 vm_map_offset_t offset,
16961 vm_page_info_flavor_t flavor,
16962 vm_page_info_t info,
16963 mach_msg_type_number_t *count)
16964 {
16965 return vm_map_page_range_info_internal(map,
16966 offset, /* start of range */
16967 (offset + 1), /* this will get rounded in the call to the page boundary */
16968 flavor,
16969 info,
16970 count);
16971 }
16972
16973 kern_return_t
16974 vm_map_page_range_info_internal(
16975 vm_map_t map,
16976 vm_map_offset_t start_offset,
16977 vm_map_offset_t end_offset,
16978 vm_page_info_flavor_t flavor,
16979 vm_page_info_t info,
16980 mach_msg_type_number_t *count)
16981 {
16982 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16983 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16984 vm_page_t m = VM_PAGE_NULL;
16985 kern_return_t retval = KERN_SUCCESS;
16986 int disposition = 0;
16987 int ref_count = 0;
16988 int depth = 0, info_idx = 0;
16989 vm_page_info_basic_t basic_info = 0;
16990 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16991 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16992 boolean_t do_region_footprint;
16993 ledger_amount_t ledger_resident, ledger_compressed;
16994
16995 switch (flavor) {
16996 case VM_PAGE_INFO_BASIC:
16997 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
16998 /*
16999 * The "vm_page_info_basic_data" structure was not
17000 * properly padded, so allow the size to be off by
17001 * one to maintain backwards binary compatibility...
17002 */
17003 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17004 return KERN_INVALID_ARGUMENT;
17005 }
17006 }
17007 break;
17008 default:
17009 return KERN_INVALID_ARGUMENT;
17010 }
17011
17012 do_region_footprint = task_self_region_footprint();
17013 disposition = 0;
17014 ref_count = 0;
17015 depth = 0;
17016 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17017 retval = KERN_SUCCESS;
17018
17019 offset_in_page = start_offset & PAGE_MASK;
17020 start = vm_map_trunc_page(start_offset, PAGE_MASK);
17021 end = vm_map_round_page(end_offset, PAGE_MASK);
17022
17023 if (end < start) {
17024 return KERN_INVALID_ARGUMENT;
17025 }
17026
17027 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17028
17029 vm_map_lock_read(map);
17030
17031 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17032
17033 for (curr_s_offset = start; curr_s_offset < end;) {
17034 /*
17035 * New lookup needs reset of these variables.
17036 */
17037 curr_object = object = VM_OBJECT_NULL;
17038 offset_in_object = 0;
17039 ref_count = 0;
17040 depth = 0;
17041
17042 if (do_region_footprint &&
17043 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17044 /*
17045 * Request for "footprint" info about a page beyond
17046 * the end of address space: this must be for
17047 * the fake region vm_map_region_recurse_64()
17048 * reported to account for non-volatile purgeable
17049 * memory owned by this task.
17050 */
17051 disposition = 0;
17052
17053 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17054 (unsigned) ledger_compressed) {
17055 /*
17056 * We haven't reported all the "non-volatile
17057 * compressed" pages yet, so report this fake
17058 * page as "compressed".
17059 */
17060 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17061 } else {
17062 /*
17063 * We've reported all the non-volatile
17064 * compressed page but not all the non-volatile
17065 * pages , so report this fake page as
17066 * "resident dirty".
17067 */
17068 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17069 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17070 disposition |= VM_PAGE_QUERY_PAGE_REF;
17071 }
17072 switch (flavor) {
17073 case VM_PAGE_INFO_BASIC:
17074 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17075 basic_info->disposition = disposition;
17076 basic_info->ref_count = 1;
17077 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17078 basic_info->offset = 0;
17079 basic_info->depth = 0;
17080
17081 info_idx++;
17082 break;
17083 }
17084 curr_s_offset += PAGE_SIZE;
17085 continue;
17086 }
17087
17088 /*
17089 * First, find the map entry covering "curr_s_offset", going down
17090 * submaps if necessary.
17091 */
17092 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17093 /* no entry -> no object -> no page */
17094
17095 if (curr_s_offset < vm_map_min(map)) {
17096 /*
17097 * Illegal address that falls below map min.
17098 */
17099 curr_e_offset = MIN(end, vm_map_min(map));
17100 } else if (curr_s_offset >= vm_map_max(map)) {
17101 /*
17102 * Illegal address that falls on/after map max.
17103 */
17104 curr_e_offset = end;
17105 } else if (map_entry == vm_map_to_entry(map)) {
17106 /*
17107 * Hit a hole.
17108 */
17109 if (map_entry->vme_next == vm_map_to_entry(map)) {
17110 /*
17111 * Empty map.
17112 */
17113 curr_e_offset = MIN(map->max_offset, end);
17114 } else {
17115 /*
17116 * Hole at start of the map.
17117 */
17118 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17119 }
17120 } else {
17121 if (map_entry->vme_next == vm_map_to_entry(map)) {
17122 /*
17123 * Hole at the end of the map.
17124 */
17125 curr_e_offset = MIN(map->max_offset, end);
17126 } else {
17127 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17128 }
17129 }
17130
17131 assert(curr_e_offset >= curr_s_offset);
17132
17133 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17134
17135 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17136
17137 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17138
17139 curr_s_offset = curr_e_offset;
17140
17141 info_idx += num_pages;
17142
17143 continue;
17144 }
17145
17146 /* compute offset from this map entry's start */
17147 offset_in_object = curr_s_offset - map_entry->vme_start;
17148
17149 /* compute offset into this map entry's object (or submap) */
17150 offset_in_object += VME_OFFSET(map_entry);
17151
17152 if (map_entry->is_sub_map) {
17153 vm_map_t sub_map = VM_MAP_NULL;
17154 vm_page_info_t submap_info = 0;
17155 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17156
17157 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17158
17159 submap_s_offset = offset_in_object;
17160 submap_e_offset = submap_s_offset + range_len;
17161
17162 sub_map = VME_SUBMAP(map_entry);
17163
17164 vm_map_reference(sub_map);
17165 vm_map_unlock_read(map);
17166
17167 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17168
17169 retval = vm_map_page_range_info_internal(sub_map,
17170 submap_s_offset,
17171 submap_e_offset,
17172 VM_PAGE_INFO_BASIC,
17173 (vm_page_info_t) submap_info,
17174 count);
17175
17176 assert(retval == KERN_SUCCESS);
17177
17178 vm_map_lock_read(map);
17179 vm_map_deallocate(sub_map);
17180
17181 /* Move the "info" index by the number of pages we inspected.*/
17182 info_idx += range_len >> PAGE_SHIFT;
17183
17184 /* Move our current offset by the size of the range we inspected.*/
17185 curr_s_offset += range_len;
17186
17187 continue;
17188 }
17189
17190 object = VME_OBJECT(map_entry);
17191 if (object == VM_OBJECT_NULL) {
17192 /*
17193 * We don't have an object here and, hence,
17194 * no pages to inspect. We'll fill up the
17195 * info structure appropriately.
17196 */
17197
17198 curr_e_offset = MIN(map_entry->vme_end, end);
17199
17200 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17201
17202 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17203
17204 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17205
17206 curr_s_offset = curr_e_offset;
17207
17208 info_idx += num_pages;
17209
17210 continue;
17211 }
17212
17213 if (do_region_footprint) {
17214 int pmap_disp;
17215
17216 disposition = 0;
17217 pmap_disp = 0;
17218 if (map->has_corpse_footprint) {
17219 /*
17220 * Query the page info data we saved
17221 * while forking the corpse.
17222 */
17223 vm_map_corpse_footprint_query_page_info(
17224 map,
17225 curr_s_offset,
17226 &pmap_disp);
17227 } else {
17228 /*
17229 * Query the pmap.
17230 */
17231 pmap_query_page_info(map->pmap,
17232 curr_s_offset,
17233 &pmap_disp);
17234 }
17235 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17236 /* && not tagged as no-footprint? */
17237 VM_OBJECT_OWNER(object) != NULL &&
17238 VM_OBJECT_OWNER(object)->map == map) {
17239 if ((((curr_s_offset
17240 - map_entry->vme_start
17241 + VME_OFFSET(map_entry))
17242 / PAGE_SIZE) <
17243 (object->resident_page_count +
17244 vm_compressor_pager_get_count(object->pager)))) {
17245 /*
17246 * Non-volatile purgeable object owned
17247 * by this task: report the first
17248 * "#resident + #compressed" pages as
17249 * "resident" (to show that they
17250 * contribute to the footprint) but not
17251 * "dirty" (to avoid double-counting
17252 * with the fake "non-volatile" region
17253 * we'll report at the end of the
17254 * address space to account for all
17255 * (mapped or not) non-volatile memory
17256 * owned by this task.
17257 */
17258 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17259 }
17260 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17261 object->purgable == VM_PURGABLE_EMPTY) &&
17262 /* && not tagged as no-footprint? */
17263 VM_OBJECT_OWNER(object) != NULL &&
17264 VM_OBJECT_OWNER(object)->map == map) {
17265 if ((((curr_s_offset
17266 - map_entry->vme_start
17267 + VME_OFFSET(map_entry))
17268 / PAGE_SIZE) <
17269 object->wired_page_count)) {
17270 /*
17271 * Volatile|empty purgeable object owned
17272 * by this task: report the first
17273 * "#wired" pages as "resident" (to
17274 * show that they contribute to the
17275 * footprint) but not "dirty" (to avoid
17276 * double-counting with the fake
17277 * "non-volatile" region we'll report
17278 * at the end of the address space to
17279 * account for all (mapped or not)
17280 * non-volatile memory owned by this
17281 * task.
17282 */
17283 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17284 }
17285 } else if (map_entry->iokit_acct &&
17286 object->internal &&
17287 object->purgable == VM_PURGABLE_DENY) {
17288 /*
17289 * Non-purgeable IOKit memory: phys_footprint
17290 * includes the entire virtual mapping.
17291 */
17292 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17293 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17294 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17295 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17296 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17297 /* alternate accounting */
17298 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17299 if (map->pmap->footprint_was_suspended ||
17300 /*
17301 * XXX corpse does not know if original
17302 * pmap had its footprint suspended...
17303 */
17304 map->has_corpse_footprint) {
17305 /*
17306 * The assertion below can fail if dyld
17307 * suspended footprint accounting
17308 * while doing some adjustments to
17309 * this page; the mapping would say
17310 * "use pmap accounting" but the page
17311 * would be marked "alternate
17312 * accounting".
17313 */
17314 } else
17315 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17316 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17317 pmap_disp = 0;
17318 } else {
17319 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17320 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17321 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17322 disposition |= VM_PAGE_QUERY_PAGE_REF;
17323 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17324 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17325 } else {
17326 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17327 }
17328 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17329 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17330 }
17331 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17332 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17333 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17334 }
17335 }
17336 switch (flavor) {
17337 case VM_PAGE_INFO_BASIC:
17338 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17339 basic_info->disposition = disposition;
17340 basic_info->ref_count = 1;
17341 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17342 basic_info->offset = 0;
17343 basic_info->depth = 0;
17344
17345 info_idx++;
17346 break;
17347 }
17348 curr_s_offset += PAGE_SIZE;
17349 continue;
17350 }
17351
17352 vm_object_reference(object);
17353 /*
17354 * Shared mode -- so we can allow other readers
17355 * to grab the lock too.
17356 */
17357 vm_object_lock_shared(object);
17358
17359 curr_e_offset = MIN(map_entry->vme_end, end);
17360
17361 vm_map_unlock_read(map);
17362
17363 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17364
17365 curr_object = object;
17366
17367 for (; curr_s_offset < curr_e_offset;) {
17368 if (object == curr_object) {
17369 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17370 } else {
17371 ref_count = curr_object->ref_count;
17372 }
17373
17374 curr_offset_in_object = offset_in_object;
17375
17376 for (;;) {
17377 m = vm_page_lookup(curr_object, curr_offset_in_object);
17378
17379 if (m != VM_PAGE_NULL) {
17380 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17381 break;
17382 } else {
17383 if (curr_object->internal &&
17384 curr_object->alive &&
17385 !curr_object->terminating &&
17386 curr_object->pager_ready) {
17387 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17388 == VM_EXTERNAL_STATE_EXISTS) {
17389 /* the pager has that page */
17390 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17391 break;
17392 }
17393 }
17394
17395 /*
17396 * Go down the VM object shadow chain until we find the page
17397 * we're looking for.
17398 */
17399
17400 if (curr_object->shadow != VM_OBJECT_NULL) {
17401 vm_object_t shadow = VM_OBJECT_NULL;
17402
17403 curr_offset_in_object += curr_object->vo_shadow_offset;
17404 shadow = curr_object->shadow;
17405
17406 vm_object_lock_shared(shadow);
17407 vm_object_unlock(curr_object);
17408
17409 curr_object = shadow;
17410 depth++;
17411 continue;
17412 } else {
17413 break;
17414 }
17415 }
17416 }
17417
17418 /* The ref_count is not strictly accurate, it measures the number */
17419 /* of entities holding a ref on the object, they may not be mapping */
17420 /* the object or may not be mapping the section holding the */
17421 /* target page but its still a ball park number and though an over- */
17422 /* count, it picks up the copy-on-write cases */
17423
17424 /* We could also get a picture of page sharing from pmap_attributes */
17425 /* but this would under count as only faulted-in mappings would */
17426 /* show up. */
17427
17428 if ((curr_object == object) && curr_object->shadow) {
17429 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17430 }
17431
17432 if (!curr_object->internal) {
17433 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17434 }
17435
17436 if (m != VM_PAGE_NULL) {
17437 if (m->vmp_fictitious) {
17438 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17439 } else {
17440 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17441 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17442 }
17443
17444 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17445 disposition |= VM_PAGE_QUERY_PAGE_REF;
17446 }
17447
17448 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17449 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17450 }
17451
17452 if (m->vmp_cs_validated) {
17453 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17454 }
17455 if (m->vmp_cs_tainted) {
17456 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17457 }
17458 if (m->vmp_cs_nx) {
17459 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17460 }
17461 if (m->vmp_reusable || curr_object->all_reusable) {
17462 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17463 }
17464 }
17465 }
17466
17467 switch (flavor) {
17468 case VM_PAGE_INFO_BASIC:
17469 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17470 basic_info->disposition = disposition;
17471 basic_info->ref_count = ref_count;
17472 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17473 VM_KERNEL_ADDRPERM(curr_object);
17474 basic_info->offset =
17475 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17476 basic_info->depth = depth;
17477
17478 info_idx++;
17479 break;
17480 }
17481
17482 disposition = 0;
17483 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17484
17485 /*
17486 * Move to next offset in the range and in our object.
17487 */
17488 curr_s_offset += PAGE_SIZE;
17489 offset_in_object += PAGE_SIZE;
17490 curr_offset_in_object = offset_in_object;
17491
17492 if (curr_object != object) {
17493 vm_object_unlock(curr_object);
17494
17495 curr_object = object;
17496
17497 vm_object_lock_shared(curr_object);
17498 } else {
17499 vm_object_lock_yield_shared(curr_object);
17500 }
17501 }
17502
17503 vm_object_unlock(curr_object);
17504 vm_object_deallocate(curr_object);
17505
17506 vm_map_lock_read(map);
17507 }
17508
17509 vm_map_unlock_read(map);
17510 return retval;
17511 }
17512
17513 /*
17514 * vm_map_msync
17515 *
17516 * Synchronises the memory range specified with its backing store
17517 * image by either flushing or cleaning the contents to the appropriate
17518 * memory manager engaging in a memory object synchronize dialog with
17519 * the manager. The client doesn't return until the manager issues
17520 * m_o_s_completed message. MIG Magically converts user task parameter
17521 * to the task's address map.
17522 *
17523 * interpretation of sync_flags
17524 * VM_SYNC_INVALIDATE - discard pages, only return precious
17525 * pages to manager.
17526 *
17527 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17528 * - discard pages, write dirty or precious
17529 * pages back to memory manager.
17530 *
17531 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17532 * - write dirty or precious pages back to
17533 * the memory manager.
17534 *
17535 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17536 * is a hole in the region, and we would
17537 * have returned KERN_SUCCESS, return
17538 * KERN_INVALID_ADDRESS instead.
17539 *
17540 * NOTE
17541 * The memory object attributes have not yet been implemented, this
17542 * function will have to deal with the invalidate attribute
17543 *
17544 * RETURNS
17545 * KERN_INVALID_TASK Bad task parameter
17546 * KERN_INVALID_ARGUMENT both sync and async were specified.
17547 * KERN_SUCCESS The usual.
17548 * KERN_INVALID_ADDRESS There was a hole in the region.
17549 */
17550
17551 kern_return_t
17552 vm_map_msync(
17553 vm_map_t map,
17554 vm_map_address_t address,
17555 vm_map_size_t size,
17556 vm_sync_t sync_flags)
17557 {
17558 vm_map_entry_t entry;
17559 vm_map_size_t amount_left;
17560 vm_object_offset_t offset;
17561 boolean_t do_sync_req;
17562 boolean_t had_hole = FALSE;
17563 vm_map_offset_t pmap_offset;
17564
17565 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17566 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17567 return KERN_INVALID_ARGUMENT;
17568 }
17569
17570 /*
17571 * align address and size on page boundaries
17572 */
17573 size = (vm_map_round_page(address + size,
17574 VM_MAP_PAGE_MASK(map)) -
17575 vm_map_trunc_page(address,
17576 VM_MAP_PAGE_MASK(map)));
17577 address = vm_map_trunc_page(address,
17578 VM_MAP_PAGE_MASK(map));
17579
17580 if (map == VM_MAP_NULL) {
17581 return KERN_INVALID_TASK;
17582 }
17583
17584 if (size == 0) {
17585 return KERN_SUCCESS;
17586 }
17587
17588 amount_left = size;
17589
17590 while (amount_left > 0) {
17591 vm_object_size_t flush_size;
17592 vm_object_t object;
17593
17594 vm_map_lock(map);
17595 if (!vm_map_lookup_entry(map,
17596 address,
17597 &entry)) {
17598 vm_map_size_t skip;
17599
17600 /*
17601 * hole in the address map.
17602 */
17603 had_hole = TRUE;
17604
17605 if (sync_flags & VM_SYNC_KILLPAGES) {
17606 /*
17607 * For VM_SYNC_KILLPAGES, there should be
17608 * no holes in the range, since we couldn't
17609 * prevent someone else from allocating in
17610 * that hole and we wouldn't want to "kill"
17611 * their pages.
17612 */
17613 vm_map_unlock(map);
17614 break;
17615 }
17616
17617 /*
17618 * Check for empty map.
17619 */
17620 if (entry == vm_map_to_entry(map) &&
17621 entry->vme_next == entry) {
17622 vm_map_unlock(map);
17623 break;
17624 }
17625 /*
17626 * Check that we don't wrap and that
17627 * we have at least one real map entry.
17628 */
17629 if ((map->hdr.nentries == 0) ||
17630 (entry->vme_next->vme_start < address)) {
17631 vm_map_unlock(map);
17632 break;
17633 }
17634 /*
17635 * Move up to the next entry if needed
17636 */
17637 skip = (entry->vme_next->vme_start - address);
17638 if (skip >= amount_left) {
17639 amount_left = 0;
17640 } else {
17641 amount_left -= skip;
17642 }
17643 address = entry->vme_next->vme_start;
17644 vm_map_unlock(map);
17645 continue;
17646 }
17647
17648 offset = address - entry->vme_start;
17649 pmap_offset = address;
17650
17651 /*
17652 * do we have more to flush than is contained in this
17653 * entry ?
17654 */
17655 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17656 flush_size = entry->vme_end -
17657 (entry->vme_start + offset);
17658 } else {
17659 flush_size = amount_left;
17660 }
17661 amount_left -= flush_size;
17662 address += flush_size;
17663
17664 if (entry->is_sub_map == TRUE) {
17665 vm_map_t local_map;
17666 vm_map_offset_t local_offset;
17667
17668 local_map = VME_SUBMAP(entry);
17669 local_offset = VME_OFFSET(entry);
17670 vm_map_reference(local_map);
17671 vm_map_unlock(map);
17672 if (vm_map_msync(
17673 local_map,
17674 local_offset,
17675 flush_size,
17676 sync_flags) == KERN_INVALID_ADDRESS) {
17677 had_hole = TRUE;
17678 }
17679 vm_map_deallocate(local_map);
17680 continue;
17681 }
17682 object = VME_OBJECT(entry);
17683
17684 /*
17685 * We can't sync this object if the object has not been
17686 * created yet
17687 */
17688 if (object == VM_OBJECT_NULL) {
17689 vm_map_unlock(map);
17690 continue;
17691 }
17692 offset += VME_OFFSET(entry);
17693
17694 vm_object_lock(object);
17695
17696 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17697 int kill_pages = 0;
17698 boolean_t reusable_pages = FALSE;
17699
17700 if (sync_flags & VM_SYNC_KILLPAGES) {
17701 if (((object->ref_count == 1) ||
17702 ((object->copy_strategy !=
17703 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17704 (object->copy == VM_OBJECT_NULL))) &&
17705 (object->shadow == VM_OBJECT_NULL)) {
17706 if (object->ref_count != 1) {
17707 vm_page_stats_reusable.free_shared++;
17708 }
17709 kill_pages = 1;
17710 } else {
17711 kill_pages = -1;
17712 }
17713 }
17714 if (kill_pages != -1) {
17715 vm_object_deactivate_pages(
17716 object,
17717 offset,
17718 (vm_object_size_t) flush_size,
17719 kill_pages,
17720 reusable_pages,
17721 map->pmap,
17722 pmap_offset);
17723 }
17724 vm_object_unlock(object);
17725 vm_map_unlock(map);
17726 continue;
17727 }
17728 /*
17729 * We can't sync this object if there isn't a pager.
17730 * Don't bother to sync internal objects, since there can't
17731 * be any "permanent" storage for these objects anyway.
17732 */
17733 if ((object->pager == MEMORY_OBJECT_NULL) ||
17734 (object->internal) || (object->private)) {
17735 vm_object_unlock(object);
17736 vm_map_unlock(map);
17737 continue;
17738 }
17739 /*
17740 * keep reference on the object until syncing is done
17741 */
17742 vm_object_reference_locked(object);
17743 vm_object_unlock(object);
17744
17745 vm_map_unlock(map);
17746
17747 do_sync_req = vm_object_sync(object,
17748 offset,
17749 flush_size,
17750 sync_flags & VM_SYNC_INVALIDATE,
17751 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17752 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17753 sync_flags & VM_SYNC_SYNCHRONOUS);
17754
17755 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17756 /*
17757 * clear out the clustering and read-ahead hints
17758 */
17759 vm_object_lock(object);
17760
17761 object->pages_created = 0;
17762 object->pages_used = 0;
17763 object->sequential = 0;
17764 object->last_alloc = 0;
17765
17766 vm_object_unlock(object);
17767 }
17768 vm_object_deallocate(object);
17769 } /* while */
17770
17771 /* for proper msync() behaviour */
17772 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17773 return KERN_INVALID_ADDRESS;
17774 }
17775
17776 return KERN_SUCCESS;
17777 }/* vm_msync */
17778
17779 /*
17780 * Routine: convert_port_entry_to_map
17781 * Purpose:
17782 * Convert from a port specifying an entry or a task
17783 * to a map. Doesn't consume the port ref; produces a map ref,
17784 * which may be null. Unlike convert_port_to_map, the
17785 * port may be task or a named entry backed.
17786 * Conditions:
17787 * Nothing locked.
17788 */
17789
17790
17791 vm_map_t
17792 convert_port_entry_to_map(
17793 ipc_port_t port)
17794 {
17795 vm_map_t map;
17796 vm_named_entry_t named_entry;
17797 uint32_t try_failed_count = 0;
17798
17799 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17800 while (TRUE) {
17801 ip_lock(port);
17802 if (ip_active(port) && (ip_kotype(port)
17803 == IKOT_NAMED_ENTRY)) {
17804 named_entry =
17805 (vm_named_entry_t)port->ip_kobject;
17806 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17807 ip_unlock(port);
17808
17809 try_failed_count++;
17810 mutex_pause(try_failed_count);
17811 continue;
17812 }
17813 named_entry->ref_count++;
17814 lck_mtx_unlock(&(named_entry)->Lock);
17815 ip_unlock(port);
17816 if ((named_entry->is_sub_map) &&
17817 (named_entry->protection
17818 & VM_PROT_WRITE)) {
17819 map = named_entry->backing.map;
17820 } else {
17821 mach_destroy_memory_entry(port);
17822 return VM_MAP_NULL;
17823 }
17824 vm_map_reference_swap(map);
17825 mach_destroy_memory_entry(port);
17826 break;
17827 } else {
17828 return VM_MAP_NULL;
17829 }
17830 }
17831 } else {
17832 map = convert_port_to_map(port);
17833 }
17834
17835 return map;
17836 }
17837
17838 /*
17839 * Routine: convert_port_entry_to_object
17840 * Purpose:
17841 * Convert from a port specifying a named entry to an
17842 * object. Doesn't consume the port ref; produces a map ref,
17843 * which may be null.
17844 * Conditions:
17845 * Nothing locked.
17846 */
17847
17848
17849 vm_object_t
17850 convert_port_entry_to_object(
17851 ipc_port_t port)
17852 {
17853 vm_object_t object = VM_OBJECT_NULL;
17854 vm_named_entry_t named_entry;
17855 uint32_t try_failed_count = 0;
17856
17857 if (IP_VALID(port) &&
17858 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17859 try_again:
17860 ip_lock(port);
17861 if (ip_active(port) &&
17862 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17863 named_entry = (vm_named_entry_t)port->ip_kobject;
17864 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17865 ip_unlock(port);
17866 try_failed_count++;
17867 mutex_pause(try_failed_count);
17868 goto try_again;
17869 }
17870 named_entry->ref_count++;
17871 lck_mtx_unlock(&(named_entry)->Lock);
17872 ip_unlock(port);
17873 if (!(named_entry->is_sub_map) &&
17874 !(named_entry->is_copy) &&
17875 (named_entry->protection & VM_PROT_WRITE)) {
17876 object = named_entry->backing.object;
17877 vm_object_reference(object);
17878 }
17879 mach_destroy_memory_entry(port);
17880 }
17881 }
17882
17883 return object;
17884 }
17885
17886 /*
17887 * Export routines to other components for the things we access locally through
17888 * macros.
17889 */
17890 #undef current_map
17891 vm_map_t
17892 current_map(void)
17893 {
17894 return current_map_fast();
17895 }
17896
17897 /*
17898 * vm_map_reference:
17899 *
17900 * Most code internal to the osfmk will go through a
17901 * macro defining this. This is always here for the
17902 * use of other kernel components.
17903 */
17904 #undef vm_map_reference
17905 void
17906 vm_map_reference(
17907 vm_map_t map)
17908 {
17909 if (map == VM_MAP_NULL) {
17910 return;
17911 }
17912
17913 lck_mtx_lock(&map->s_lock);
17914 #if TASK_SWAPPER
17915 assert(map->res_count > 0);
17916 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17917 map->res_count++;
17918 #endif
17919 os_ref_retain_locked(&map->map_refcnt);
17920 lck_mtx_unlock(&map->s_lock);
17921 }
17922
17923 /*
17924 * vm_map_deallocate:
17925 *
17926 * Removes a reference from the specified map,
17927 * destroying it if no references remain.
17928 * The map should not be locked.
17929 */
17930 void
17931 vm_map_deallocate(
17932 vm_map_t map)
17933 {
17934 unsigned int ref;
17935
17936 if (map == VM_MAP_NULL) {
17937 return;
17938 }
17939
17940 lck_mtx_lock(&map->s_lock);
17941 ref = os_ref_release_locked(&map->map_refcnt);
17942 if (ref > 0) {
17943 vm_map_res_deallocate(map);
17944 lck_mtx_unlock(&map->s_lock);
17945 return;
17946 }
17947 assert(os_ref_get_count(&map->map_refcnt) == 0);
17948 lck_mtx_unlock(&map->s_lock);
17949
17950 #if TASK_SWAPPER
17951 /*
17952 * The map residence count isn't decremented here because
17953 * the vm_map_delete below will traverse the entire map,
17954 * deleting entries, and the residence counts on objects
17955 * and sharing maps will go away then.
17956 */
17957 #endif
17958
17959 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17960 }
17961
17962
17963 void
17964 vm_map_disable_NX(vm_map_t map)
17965 {
17966 if (map == NULL) {
17967 return;
17968 }
17969 if (map->pmap == NULL) {
17970 return;
17971 }
17972
17973 pmap_disable_NX(map->pmap);
17974 }
17975
17976 void
17977 vm_map_disallow_data_exec(vm_map_t map)
17978 {
17979 if (map == NULL) {
17980 return;
17981 }
17982
17983 map->map_disallow_data_exec = TRUE;
17984 }
17985
17986 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17987 * more descriptive.
17988 */
17989 void
17990 vm_map_set_32bit(vm_map_t map)
17991 {
17992 #if defined(__arm__) || defined(__arm64__)
17993 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
17994 #else
17995 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
17996 #endif
17997 }
17998
17999
18000 void
18001 vm_map_set_64bit(vm_map_t map)
18002 {
18003 #if defined(__arm__) || defined(__arm64__)
18004 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18005 #else
18006 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18007 #endif
18008 }
18009
18010 /*
18011 * Expand the maximum size of an existing map to the maximum supported.
18012 */
18013 void
18014 vm_map_set_jumbo(vm_map_t map)
18015 {
18016 #if defined (__arm64__)
18017 vm_map_set_max_addr(map, ~0);
18018 #else /* arm64 */
18019 (void) map;
18020 #endif
18021 }
18022
18023 /*
18024 * This map has a JIT entitlement
18025 */
18026 void
18027 vm_map_set_jit_entitled(vm_map_t map)
18028 {
18029 #if defined (__arm64__)
18030 pmap_set_jit_entitled(map->pmap);
18031 #else /* arm64 */
18032 (void) map;
18033 #endif
18034 }
18035
18036 /*
18037 * Expand the maximum size of an existing map.
18038 */
18039 void
18040 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18041 {
18042 #if defined(__arm64__)
18043 vm_map_offset_t max_supported_offset = 0;
18044 vm_map_offset_t old_max_offset = map->max_offset;
18045 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18046
18047 new_max_offset = trunc_page(new_max_offset);
18048
18049 /* The address space cannot be shrunk using this routine. */
18050 if (old_max_offset >= new_max_offset) {
18051 return;
18052 }
18053
18054 if (max_supported_offset < new_max_offset) {
18055 new_max_offset = max_supported_offset;
18056 }
18057
18058 map->max_offset = new_max_offset;
18059
18060 if (map->holes_list->prev->vme_end == old_max_offset) {
18061 /*
18062 * There is already a hole at the end of the map; simply make it bigger.
18063 */
18064 map->holes_list->prev->vme_end = map->max_offset;
18065 } else {
18066 /*
18067 * There is no hole at the end, so we need to create a new hole
18068 * for the new empty space we're creating.
18069 */
18070 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18071 new_hole->start = old_max_offset;
18072 new_hole->end = map->max_offset;
18073 new_hole->prev = map->holes_list->prev;
18074 new_hole->next = (struct vm_map_entry *)map->holes_list;
18075 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18076 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18077 }
18078 #else
18079 (void)map;
18080 (void)new_max_offset;
18081 #endif
18082 }
18083
18084 vm_map_offset_t
18085 vm_compute_max_offset(boolean_t is64)
18086 {
18087 #if defined(__arm__) || defined(__arm64__)
18088 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18089 #else
18090 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18091 #endif
18092 }
18093
18094 void
18095 vm_map_get_max_aslr_slide_section(
18096 vm_map_t map __unused,
18097 int64_t *max_sections,
18098 int64_t *section_size)
18099 {
18100 #if defined(__arm64__)
18101 *max_sections = 3;
18102 *section_size = ARM_TT_TWIG_SIZE;
18103 #else
18104 *max_sections = 1;
18105 *section_size = 0;
18106 #endif
18107 }
18108
18109 uint64_t
18110 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18111 {
18112 #if defined(__arm64__)
18113 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18114 * limited embedded address space; this is also meant to minimize pmap
18115 * memory usage on 16KB page systems.
18116 */
18117 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18118 #else
18119 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18120 #endif
18121 }
18122
18123 uint64_t
18124 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18125 {
18126 #if defined(__arm64__)
18127 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18128 * of independent entropy on 16KB page systems.
18129 */
18130 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18131 #else
18132 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18133 #endif
18134 }
18135
18136 #ifndef __arm__
18137 boolean_t
18138 vm_map_is_64bit(
18139 vm_map_t map)
18140 {
18141 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18142 }
18143 #endif
18144
18145 boolean_t
18146 vm_map_has_hard_pagezero(
18147 vm_map_t map,
18148 vm_map_offset_t pagezero_size)
18149 {
18150 /*
18151 * XXX FBDP
18152 * We should lock the VM map (for read) here but we can get away
18153 * with it for now because there can't really be any race condition:
18154 * the VM map's min_offset is changed only when the VM map is created
18155 * and when the zero page is established (when the binary gets loaded),
18156 * and this routine gets called only when the task terminates and the
18157 * VM map is being torn down, and when a new map is created via
18158 * load_machfile()/execve().
18159 */
18160 return map->min_offset >= pagezero_size;
18161 }
18162
18163 /*
18164 * Raise a VM map's maximun offset.
18165 */
18166 kern_return_t
18167 vm_map_raise_max_offset(
18168 vm_map_t map,
18169 vm_map_offset_t new_max_offset)
18170 {
18171 kern_return_t ret;
18172
18173 vm_map_lock(map);
18174 ret = KERN_INVALID_ADDRESS;
18175
18176 if (new_max_offset >= map->max_offset) {
18177 if (!vm_map_is_64bit(map)) {
18178 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18179 map->max_offset = new_max_offset;
18180 ret = KERN_SUCCESS;
18181 }
18182 } else {
18183 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18184 map->max_offset = new_max_offset;
18185 ret = KERN_SUCCESS;
18186 }
18187 }
18188 }
18189
18190 vm_map_unlock(map);
18191 return ret;
18192 }
18193
18194
18195 /*
18196 * Raise a VM map's minimum offset.
18197 * To strictly enforce "page zero" reservation.
18198 */
18199 kern_return_t
18200 vm_map_raise_min_offset(
18201 vm_map_t map,
18202 vm_map_offset_t new_min_offset)
18203 {
18204 vm_map_entry_t first_entry;
18205
18206 new_min_offset = vm_map_round_page(new_min_offset,
18207 VM_MAP_PAGE_MASK(map));
18208
18209 vm_map_lock(map);
18210
18211 if (new_min_offset < map->min_offset) {
18212 /*
18213 * Can't move min_offset backwards, as that would expose
18214 * a part of the address space that was previously, and for
18215 * possibly good reasons, inaccessible.
18216 */
18217 vm_map_unlock(map);
18218 return KERN_INVALID_ADDRESS;
18219 }
18220 if (new_min_offset >= map->max_offset) {
18221 /* can't go beyond the end of the address space */
18222 vm_map_unlock(map);
18223 return KERN_INVALID_ADDRESS;
18224 }
18225
18226 first_entry = vm_map_first_entry(map);
18227 if (first_entry != vm_map_to_entry(map) &&
18228 first_entry->vme_start < new_min_offset) {
18229 /*
18230 * Some memory was already allocated below the new
18231 * minimun offset. It's too late to change it now...
18232 */
18233 vm_map_unlock(map);
18234 return KERN_NO_SPACE;
18235 }
18236
18237 map->min_offset = new_min_offset;
18238
18239 assert(map->holes_list);
18240 map->holes_list->start = new_min_offset;
18241 assert(new_min_offset < map->holes_list->end);
18242
18243 vm_map_unlock(map);
18244
18245 return KERN_SUCCESS;
18246 }
18247
18248 /*
18249 * Set the limit on the maximum amount of user wired memory allowed for this map.
18250 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18251 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18252 * don't have to reach over to the BSD data structures.
18253 */
18254
18255 void
18256 vm_map_set_user_wire_limit(vm_map_t map,
18257 vm_size_t limit)
18258 {
18259 map->user_wire_limit = limit;
18260 }
18261
18262
18263 void
18264 vm_map_switch_protect(vm_map_t map,
18265 boolean_t val)
18266 {
18267 vm_map_lock(map);
18268 map->switch_protect = val;
18269 vm_map_unlock(map);
18270 }
18271
18272 /*
18273 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18274 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18275 * bump both counters.
18276 */
18277 void
18278 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18279 {
18280 pmap_t pmap = vm_map_pmap(map);
18281
18282 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18283 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18284 }
18285
18286 void
18287 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18288 {
18289 pmap_t pmap = vm_map_pmap(map);
18290
18291 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18292 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18293 }
18294
18295 /* Add (generate) code signature for memory range */
18296 #if CONFIG_DYNAMIC_CODE_SIGNING
18297 kern_return_t
18298 vm_map_sign(vm_map_t map,
18299 vm_map_offset_t start,
18300 vm_map_offset_t end)
18301 {
18302 vm_map_entry_t entry;
18303 vm_page_t m;
18304 vm_object_t object;
18305
18306 /*
18307 * Vet all the input parameters and current type and state of the
18308 * underlaying object. Return with an error if anything is amiss.
18309 */
18310 if (map == VM_MAP_NULL) {
18311 return KERN_INVALID_ARGUMENT;
18312 }
18313
18314 vm_map_lock_read(map);
18315
18316 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18317 /*
18318 * Must pass a valid non-submap address.
18319 */
18320 vm_map_unlock_read(map);
18321 return KERN_INVALID_ADDRESS;
18322 }
18323
18324 if ((entry->vme_start > start) || (entry->vme_end < end)) {
18325 /*
18326 * Map entry doesn't cover the requested range. Not handling
18327 * this situation currently.
18328 */
18329 vm_map_unlock_read(map);
18330 return KERN_INVALID_ARGUMENT;
18331 }
18332
18333 object = VME_OBJECT(entry);
18334 if (object == VM_OBJECT_NULL) {
18335 /*
18336 * Object must already be present or we can't sign.
18337 */
18338 vm_map_unlock_read(map);
18339 return KERN_INVALID_ARGUMENT;
18340 }
18341
18342 vm_object_lock(object);
18343 vm_map_unlock_read(map);
18344
18345 while (start < end) {
18346 uint32_t refmod;
18347
18348 m = vm_page_lookup(object,
18349 start - entry->vme_start + VME_OFFSET(entry));
18350 if (m == VM_PAGE_NULL) {
18351 /* shoud we try to fault a page here? we can probably
18352 * demand it exists and is locked for this request */
18353 vm_object_unlock(object);
18354 return KERN_FAILURE;
18355 }
18356 /* deal with special page status */
18357 if (m->vmp_busy ||
18358 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18359 vm_object_unlock(object);
18360 return KERN_FAILURE;
18361 }
18362
18363 /* Page is OK... now "validate" it */
18364 /* This is the place where we'll call out to create a code
18365 * directory, later */
18366 m->vmp_cs_validated = TRUE;
18367
18368 /* The page is now "clean" for codesigning purposes. That means
18369 * we don't consider it as modified (wpmapped) anymore. But
18370 * we'll disconnect the page so we note any future modification
18371 * attempts. */
18372 m->vmp_wpmapped = FALSE;
18373 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18374
18375 /* Pull the dirty status from the pmap, since we cleared the
18376 * wpmapped bit */
18377 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18378 SET_PAGE_DIRTY(m, FALSE);
18379 }
18380
18381 /* On to the next page */
18382 start += PAGE_SIZE;
18383 }
18384 vm_object_unlock(object);
18385
18386 return KERN_SUCCESS;
18387 }
18388 #endif
18389
18390 kern_return_t
18391 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18392 {
18393 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
18394 vm_map_entry_t next_entry;
18395 kern_return_t kr = KERN_SUCCESS;
18396 vm_map_t zap_map;
18397
18398 vm_map_lock(map);
18399
18400 /*
18401 * We use a "zap_map" to avoid having to unlock
18402 * the "map" in vm_map_delete().
18403 */
18404 zap_map = vm_map_create(PMAP_NULL,
18405 map->min_offset,
18406 map->max_offset,
18407 map->hdr.entries_pageable);
18408
18409 if (zap_map == VM_MAP_NULL) {
18410 return KERN_RESOURCE_SHORTAGE;
18411 }
18412
18413 vm_map_set_page_shift(zap_map,
18414 VM_MAP_PAGE_SHIFT(map));
18415 vm_map_disable_hole_optimization(zap_map);
18416
18417 for (entry = vm_map_first_entry(map);
18418 entry != vm_map_to_entry(map);
18419 entry = next_entry) {
18420 next_entry = entry->vme_next;
18421
18422 if (VME_OBJECT(entry) &&
18423 !entry->is_sub_map &&
18424 (VME_OBJECT(entry)->internal == TRUE) &&
18425 (VME_OBJECT(entry)->ref_count == 1)) {
18426 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18427 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18428
18429 (void)vm_map_delete(map,
18430 entry->vme_start,
18431 entry->vme_end,
18432 VM_MAP_REMOVE_SAVE_ENTRIES,
18433 zap_map);
18434 }
18435 }
18436
18437 vm_map_unlock(map);
18438
18439 /*
18440 * Get rid of the "zap_maps" and all the map entries that
18441 * they may still contain.
18442 */
18443 if (zap_map != VM_MAP_NULL) {
18444 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18445 zap_map = VM_MAP_NULL;
18446 }
18447
18448 return kr;
18449 }
18450
18451
18452 #if DEVELOPMENT || DEBUG
18453
18454 int
18455 vm_map_disconnect_page_mappings(
18456 vm_map_t map,
18457 boolean_t do_unnest)
18458 {
18459 vm_map_entry_t entry;
18460 int page_count = 0;
18461
18462 if (do_unnest == TRUE) {
18463 #ifndef NO_NESTED_PMAP
18464 vm_map_lock(map);
18465
18466 for (entry = vm_map_first_entry(map);
18467 entry != vm_map_to_entry(map);
18468 entry = entry->vme_next) {
18469 if (entry->is_sub_map && entry->use_pmap) {
18470 /*
18471 * Make sure the range between the start of this entry and
18472 * the end of this entry is no longer nested, so that
18473 * we will only remove mappings from the pmap in use by this
18474 * this task
18475 */
18476 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18477 }
18478 }
18479 vm_map_unlock(map);
18480 #endif
18481 }
18482 vm_map_lock_read(map);
18483
18484 page_count = map->pmap->stats.resident_count;
18485
18486 for (entry = vm_map_first_entry(map);
18487 entry != vm_map_to_entry(map);
18488 entry = entry->vme_next) {
18489 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18490 (VME_OBJECT(entry)->phys_contiguous))) {
18491 continue;
18492 }
18493 if (entry->is_sub_map) {
18494 assert(!entry->use_pmap);
18495 }
18496
18497 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18498 }
18499 vm_map_unlock_read(map);
18500
18501 return page_count;
18502 }
18503
18504 #endif
18505
18506
18507 #if CONFIG_FREEZE
18508
18509
18510 int c_freezer_swapout_page_count;
18511 int c_freezer_compression_count = 0;
18512 AbsoluteTime c_freezer_last_yield_ts = 0;
18513
18514 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18515 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18516
18517 kern_return_t
18518 vm_map_freeze(
18519 task_t task,
18520 unsigned int *purgeable_count,
18521 unsigned int *wired_count,
18522 unsigned int *clean_count,
18523 unsigned int *dirty_count,
18524 unsigned int dirty_budget,
18525 unsigned int *shared_count,
18526 int *freezer_error_code,
18527 boolean_t eval_only)
18528 {
18529 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18530 kern_return_t kr = KERN_SUCCESS;
18531 boolean_t evaluation_phase = TRUE;
18532 vm_object_t cur_shared_object = NULL;
18533 int cur_shared_obj_ref_cnt = 0;
18534 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18535
18536 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18537
18538 /*
18539 * We need the exclusive lock here so that we can
18540 * block any page faults or lookups while we are
18541 * in the middle of freezing this vm map.
18542 */
18543 vm_map_t map = task->map;
18544
18545 vm_map_lock(map);
18546
18547 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18548
18549 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18550 if (vm_compressor_low_on_space()) {
18551 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18552 }
18553
18554 if (vm_swap_low_on_space()) {
18555 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18556 }
18557
18558 kr = KERN_NO_SPACE;
18559 goto done;
18560 }
18561
18562 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18563 /*
18564 * In-memory compressor backing the freezer. No disk.
18565 * So no need to do the evaluation phase.
18566 */
18567 evaluation_phase = FALSE;
18568
18569 if (eval_only == TRUE) {
18570 /*
18571 * We don't support 'eval_only' mode
18572 * in this non-swap config.
18573 */
18574 *freezer_error_code = FREEZER_ERROR_GENERIC;
18575 kr = KERN_INVALID_ARGUMENT;
18576 goto done;
18577 }
18578
18579 c_freezer_compression_count = 0;
18580 clock_get_uptime(&c_freezer_last_yield_ts);
18581 }
18582 again:
18583
18584 for (entry2 = vm_map_first_entry(map);
18585 entry2 != vm_map_to_entry(map);
18586 entry2 = entry2->vme_next) {
18587 vm_object_t src_object = VME_OBJECT(entry2);
18588
18589 if (src_object &&
18590 !entry2->is_sub_map &&
18591 !src_object->phys_contiguous) {
18592 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18593
18594 if (src_object->internal == TRUE) {
18595 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18596 /*
18597 * We skip purgeable objects during evaluation phase only.
18598 * If we decide to freeze this process, we'll explicitly
18599 * purge these objects before we go around again with
18600 * 'evaluation_phase' set to FALSE.
18601 */
18602
18603 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18604 /*
18605 * We want to purge objects that may not belong to this task but are mapped
18606 * in this task alone. Since we already purged this task's purgeable memory
18607 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18608 * on this task's purgeable objects. Hence the check for only volatile objects.
18609 */
18610 if (evaluation_phase == FALSE &&
18611 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18612 (src_object->ref_count == 1)) {
18613 vm_object_lock(src_object);
18614 vm_object_purge(src_object, 0);
18615 vm_object_unlock(src_object);
18616 }
18617 continue;
18618 }
18619
18620 /*
18621 * Pages belonging to this object could be swapped to disk.
18622 * Make sure it's not a shared object because we could end
18623 * up just bringing it back in again.
18624 *
18625 * We try to optimize somewhat by checking for objects that are mapped
18626 * more than once within our own map. But we don't do full searches,
18627 * we just look at the entries following our current entry.
18628 */
18629
18630 if (src_object->ref_count > 1) {
18631 if (src_object != cur_shared_object) {
18632 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18633 dirty_shared_count += obj_pages_snapshot;
18634
18635 cur_shared_object = src_object;
18636 cur_shared_obj_ref_cnt = 1;
18637 continue;
18638 } else {
18639 cur_shared_obj_ref_cnt++;
18640 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18641 /*
18642 * Fall through to below and treat this object as private.
18643 * So deduct its pages from our shared total and add it to the
18644 * private total.
18645 */
18646
18647 dirty_shared_count -= obj_pages_snapshot;
18648 dirty_private_count += obj_pages_snapshot;
18649 } else {
18650 continue;
18651 }
18652 }
18653 }
18654
18655
18656 if (src_object->ref_count == 1) {
18657 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18658 }
18659
18660 if (evaluation_phase == TRUE) {
18661 continue;
18662 }
18663 }
18664
18665 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18666 *wired_count += src_object->wired_page_count;
18667
18668 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18669 if (vm_compressor_low_on_space()) {
18670 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18671 }
18672
18673 if (vm_swap_low_on_space()) {
18674 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18675 }
18676
18677 kr = KERN_NO_SPACE;
18678 break;
18679 }
18680 if (paged_out_count >= dirty_budget) {
18681 break;
18682 }
18683 dirty_budget -= paged_out_count;
18684 }
18685 }
18686 }
18687
18688 if (evaluation_phase) {
18689 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18690
18691 if (dirty_shared_count > shared_pages_threshold) {
18692 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18693 kr = KERN_FAILURE;
18694 goto done;
18695 }
18696
18697 if (dirty_shared_count &&
18698 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18699 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18700 kr = KERN_FAILURE;
18701 goto done;
18702 }
18703
18704 evaluation_phase = FALSE;
18705 dirty_shared_count = dirty_private_count = 0;
18706
18707 c_freezer_compression_count = 0;
18708 clock_get_uptime(&c_freezer_last_yield_ts);
18709
18710 if (eval_only) {
18711 kr = KERN_SUCCESS;
18712 goto done;
18713 }
18714
18715 vm_purgeable_purge_task_owned(task);
18716
18717 goto again;
18718 } else {
18719 kr = KERN_SUCCESS;
18720 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18721 }
18722
18723 done:
18724 vm_map_unlock(map);
18725
18726 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18727 vm_object_compressed_freezer_done();
18728
18729 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18730 /*
18731 * reset the counter tracking the # of swapped compressed pages
18732 * because we are now done with this freeze session and task.
18733 */
18734
18735 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18736 c_freezer_swapout_page_count = 0;
18737 }
18738 }
18739 return kr;
18740 }
18741
18742 #endif
18743
18744 /*
18745 * vm_map_entry_should_cow_for_true_share:
18746 *
18747 * Determines if the map entry should be clipped and setup for copy-on-write
18748 * to avoid applying "true_share" to a large VM object when only a subset is
18749 * targeted.
18750 *
18751 * For now, we target only the map entries created for the Objective C
18752 * Garbage Collector, which initially have the following properties:
18753 * - alias == VM_MEMORY_MALLOC
18754 * - wired_count == 0
18755 * - !needs_copy
18756 * and a VM object with:
18757 * - internal
18758 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18759 * - !true_share
18760 * - vo_size == ANON_CHUNK_SIZE
18761 *
18762 * Only non-kernel map entries.
18763 */
18764 boolean_t
18765 vm_map_entry_should_cow_for_true_share(
18766 vm_map_entry_t entry)
18767 {
18768 vm_object_t object;
18769
18770 if (entry->is_sub_map) {
18771 /* entry does not point at a VM object */
18772 return FALSE;
18773 }
18774
18775 if (entry->needs_copy) {
18776 /* already set for copy_on_write: done! */
18777 return FALSE;
18778 }
18779
18780 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18781 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18782 /* not a malloc heap or Obj-C Garbage Collector heap */
18783 return FALSE;
18784 }
18785
18786 if (entry->wired_count) {
18787 /* wired: can't change the map entry... */
18788 vm_counters.should_cow_but_wired++;
18789 return FALSE;
18790 }
18791
18792 object = VME_OBJECT(entry);
18793
18794 if (object == VM_OBJECT_NULL) {
18795 /* no object yet... */
18796 return FALSE;
18797 }
18798
18799 if (!object->internal) {
18800 /* not an internal object */
18801 return FALSE;
18802 }
18803
18804 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18805 /* not the default copy strategy */
18806 return FALSE;
18807 }
18808
18809 if (object->true_share) {
18810 /* already true_share: too late to avoid it */
18811 return FALSE;
18812 }
18813
18814 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18815 object->vo_size != ANON_CHUNK_SIZE) {
18816 /* ... not an object created for the ObjC Garbage Collector */
18817 return FALSE;
18818 }
18819
18820 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18821 object->vo_size != 2048 * 4096) {
18822 /* ... not a "MALLOC_SMALL" heap */
18823 return FALSE;
18824 }
18825
18826 /*
18827 * All the criteria match: we have a large object being targeted for "true_share".
18828 * To limit the adverse side-effects linked with "true_share", tell the caller to
18829 * try and avoid setting up the entire object for "true_share" by clipping the
18830 * targeted range and setting it up for copy-on-write.
18831 */
18832 return TRUE;
18833 }
18834
18835 vm_map_offset_t
18836 vm_map_round_page_mask(
18837 vm_map_offset_t offset,
18838 vm_map_offset_t mask)
18839 {
18840 return VM_MAP_ROUND_PAGE(offset, mask);
18841 }
18842
18843 vm_map_offset_t
18844 vm_map_trunc_page_mask(
18845 vm_map_offset_t offset,
18846 vm_map_offset_t mask)
18847 {
18848 return VM_MAP_TRUNC_PAGE(offset, mask);
18849 }
18850
18851 boolean_t
18852 vm_map_page_aligned(
18853 vm_map_offset_t offset,
18854 vm_map_offset_t mask)
18855 {
18856 return ((offset) & mask) == 0;
18857 }
18858
18859 int
18860 vm_map_page_shift(
18861 vm_map_t map)
18862 {
18863 return VM_MAP_PAGE_SHIFT(map);
18864 }
18865
18866 int
18867 vm_map_page_size(
18868 vm_map_t map)
18869 {
18870 return VM_MAP_PAGE_SIZE(map);
18871 }
18872
18873 vm_map_offset_t
18874 vm_map_page_mask(
18875 vm_map_t map)
18876 {
18877 return VM_MAP_PAGE_MASK(map);
18878 }
18879
18880 kern_return_t
18881 vm_map_set_page_shift(
18882 vm_map_t map,
18883 int pageshift)
18884 {
18885 if (map->hdr.nentries != 0) {
18886 /* too late to change page size */
18887 return KERN_FAILURE;
18888 }
18889
18890 map->hdr.page_shift = pageshift;
18891
18892 return KERN_SUCCESS;
18893 }
18894
18895 kern_return_t
18896 vm_map_query_volatile(
18897 vm_map_t map,
18898 mach_vm_size_t *volatile_virtual_size_p,
18899 mach_vm_size_t *volatile_resident_size_p,
18900 mach_vm_size_t *volatile_compressed_size_p,
18901 mach_vm_size_t *volatile_pmap_size_p,
18902 mach_vm_size_t *volatile_compressed_pmap_size_p)
18903 {
18904 mach_vm_size_t volatile_virtual_size;
18905 mach_vm_size_t volatile_resident_count;
18906 mach_vm_size_t volatile_compressed_count;
18907 mach_vm_size_t volatile_pmap_count;
18908 mach_vm_size_t volatile_compressed_pmap_count;
18909 mach_vm_size_t resident_count;
18910 vm_map_entry_t entry;
18911 vm_object_t object;
18912
18913 /* map should be locked by caller */
18914
18915 volatile_virtual_size = 0;
18916 volatile_resident_count = 0;
18917 volatile_compressed_count = 0;
18918 volatile_pmap_count = 0;
18919 volatile_compressed_pmap_count = 0;
18920
18921 for (entry = vm_map_first_entry(map);
18922 entry != vm_map_to_entry(map);
18923 entry = entry->vme_next) {
18924 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
18925
18926 if (entry->is_sub_map) {
18927 continue;
18928 }
18929 if (!(entry->protection & VM_PROT_WRITE)) {
18930 continue;
18931 }
18932 object = VME_OBJECT(entry);
18933 if (object == VM_OBJECT_NULL) {
18934 continue;
18935 }
18936 if (object->purgable != VM_PURGABLE_VOLATILE &&
18937 object->purgable != VM_PURGABLE_EMPTY) {
18938 continue;
18939 }
18940 if (VME_OFFSET(entry)) {
18941 /*
18942 * If the map entry has been split and the object now
18943 * appears several times in the VM map, we don't want
18944 * to count the object's resident_page_count more than
18945 * once. We count it only for the first one, starting
18946 * at offset 0 and ignore the other VM map entries.
18947 */
18948 continue;
18949 }
18950 resident_count = object->resident_page_count;
18951 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18952 resident_count = 0;
18953 } else {
18954 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18955 }
18956
18957 volatile_virtual_size += entry->vme_end - entry->vme_start;
18958 volatile_resident_count += resident_count;
18959 if (object->pager) {
18960 volatile_compressed_count +=
18961 vm_compressor_pager_get_count(object->pager);
18962 }
18963 pmap_compressed_bytes = 0;
18964 pmap_resident_bytes =
18965 pmap_query_resident(map->pmap,
18966 entry->vme_start,
18967 entry->vme_end,
18968 &pmap_compressed_bytes);
18969 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18970 volatile_compressed_pmap_count += (pmap_compressed_bytes
18971 / PAGE_SIZE);
18972 }
18973
18974 /* map is still locked on return */
18975
18976 *volatile_virtual_size_p = volatile_virtual_size;
18977 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18978 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18979 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18980 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18981
18982 return KERN_SUCCESS;
18983 }
18984
18985 void
18986 vm_map_sizes(vm_map_t map,
18987 vm_map_size_t * psize,
18988 vm_map_size_t * pfree,
18989 vm_map_size_t * plargest_free)
18990 {
18991 vm_map_entry_t entry;
18992 vm_map_offset_t prev;
18993 vm_map_size_t free, total_free, largest_free;
18994 boolean_t end;
18995
18996 if (!map) {
18997 *psize = *pfree = *plargest_free = 0;
18998 return;
18999 }
19000 total_free = largest_free = 0;
19001
19002 vm_map_lock_read(map);
19003 if (psize) {
19004 *psize = map->max_offset - map->min_offset;
19005 }
19006
19007 prev = map->min_offset;
19008 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19009 end = (entry == vm_map_to_entry(map));
19010
19011 if (end) {
19012 free = entry->vme_end - prev;
19013 } else {
19014 free = entry->vme_start - prev;
19015 }
19016
19017 total_free += free;
19018 if (free > largest_free) {
19019 largest_free = free;
19020 }
19021
19022 if (end) {
19023 break;
19024 }
19025 prev = entry->vme_end;
19026 }
19027 vm_map_unlock_read(map);
19028 if (pfree) {
19029 *pfree = total_free;
19030 }
19031 if (plargest_free) {
19032 *plargest_free = largest_free;
19033 }
19034 }
19035
19036 #if VM_SCAN_FOR_SHADOW_CHAIN
19037 int vm_map_shadow_max(vm_map_t map);
19038 int
19039 vm_map_shadow_max(
19040 vm_map_t map)
19041 {
19042 int shadows, shadows_max;
19043 vm_map_entry_t entry;
19044 vm_object_t object, next_object;
19045
19046 if (map == NULL) {
19047 return 0;
19048 }
19049
19050 shadows_max = 0;
19051
19052 vm_map_lock_read(map);
19053
19054 for (entry = vm_map_first_entry(map);
19055 entry != vm_map_to_entry(map);
19056 entry = entry->vme_next) {
19057 if (entry->is_sub_map) {
19058 continue;
19059 }
19060 object = VME_OBJECT(entry);
19061 if (object == NULL) {
19062 continue;
19063 }
19064 vm_object_lock_shared(object);
19065 for (shadows = 0;
19066 object->shadow != NULL;
19067 shadows++, object = next_object) {
19068 next_object = object->shadow;
19069 vm_object_lock_shared(next_object);
19070 vm_object_unlock(object);
19071 }
19072 vm_object_unlock(object);
19073 if (shadows > shadows_max) {
19074 shadows_max = shadows;
19075 }
19076 }
19077
19078 vm_map_unlock_read(map);
19079
19080 return shadows_max;
19081 }
19082 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19083
19084 void
19085 vm_commit_pagezero_status(vm_map_t lmap)
19086 {
19087 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19088 }
19089
19090 #if !CONFIG_EMBEDDED
19091 void
19092 vm_map_set_high_start(
19093 vm_map_t map,
19094 vm_map_offset_t high_start)
19095 {
19096 map->vmmap_high_start = high_start;
19097 }
19098 #endif
19099
19100 #if PMAP_CS
19101 kern_return_t
19102 vm_map_entry_cs_associate(
19103 vm_map_t map,
19104 vm_map_entry_t entry,
19105 vm_map_kernel_flags_t vmk_flags)
19106 {
19107 vm_object_t cs_object, cs_shadow;
19108 vm_object_offset_t cs_offset;
19109 void *cs_blobs;
19110 struct vnode *cs_vnode;
19111 kern_return_t cs_ret;
19112
19113 if (map->pmap == NULL ||
19114 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19115 VME_OBJECT(entry) == VM_OBJECT_NULL ||
19116 !(entry->protection & VM_PROT_EXECUTE)) {
19117 return KERN_SUCCESS;
19118 }
19119
19120 vm_map_lock_assert_exclusive(map);
19121
19122 if (entry->used_for_jit) {
19123 cs_ret = pmap_cs_associate(map->pmap,
19124 PMAP_CS_ASSOCIATE_JIT,
19125 entry->vme_start,
19126 entry->vme_end - entry->vme_start);
19127 goto done;
19128 }
19129
19130 if (vmk_flags.vmkf_remap_prot_copy) {
19131 cs_ret = pmap_cs_associate(map->pmap,
19132 PMAP_CS_ASSOCIATE_COW,
19133 entry->vme_start,
19134 entry->vme_end - entry->vme_start);
19135 goto done;
19136 }
19137
19138 vm_object_lock_shared(VME_OBJECT(entry));
19139 cs_offset = VME_OFFSET(entry);
19140 for (cs_object = VME_OBJECT(entry);
19141 (cs_object != VM_OBJECT_NULL &&
19142 !cs_object->code_signed);
19143 cs_object = cs_shadow) {
19144 cs_shadow = cs_object->shadow;
19145 if (cs_shadow != VM_OBJECT_NULL) {
19146 cs_offset += cs_object->vo_shadow_offset;
19147 vm_object_lock_shared(cs_shadow);
19148 }
19149 vm_object_unlock(cs_object);
19150 }
19151 if (cs_object == VM_OBJECT_NULL) {
19152 return KERN_SUCCESS;
19153 }
19154
19155 cs_offset += cs_object->paging_offset;
19156 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19157 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19158 &cs_blobs);
19159 assert(cs_ret == KERN_SUCCESS);
19160 cs_ret = cs_associate_blob_with_mapping(map->pmap,
19161 entry->vme_start,
19162 (entry->vme_end -
19163 entry->vme_start),
19164 cs_offset,
19165 cs_blobs);
19166 vm_object_unlock(cs_object);
19167 cs_object = VM_OBJECT_NULL;
19168
19169 done:
19170 if (cs_ret == KERN_SUCCESS) {
19171 DTRACE_VM2(vm_map_entry_cs_associate_success,
19172 vm_map_offset_t, entry->vme_start,
19173 vm_map_offset_t, entry->vme_end);
19174 if (vm_map_executable_immutable) {
19175 /*
19176 * Prevent this executable
19177 * mapping from being unmapped
19178 * or modified.
19179 */
19180 entry->permanent = TRUE;
19181 }
19182 /*
19183 * pmap says it will validate the
19184 * code-signing validity of pages
19185 * faulted in via this mapping, so
19186 * this map entry should be marked so
19187 * that vm_fault() bypasses code-signing
19188 * validation for faults coming through
19189 * this mapping.
19190 */
19191 entry->pmap_cs_associated = TRUE;
19192 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19193 /*
19194 * pmap won't check the code-signing
19195 * validity of pages faulted in via
19196 * this mapping, so VM should keep
19197 * doing it.
19198 */
19199 DTRACE_VM3(vm_map_entry_cs_associate_off,
19200 vm_map_offset_t, entry->vme_start,
19201 vm_map_offset_t, entry->vme_end,
19202 int, cs_ret);
19203 } else {
19204 /*
19205 * A real error: do not allow
19206 * execution in this mapping.
19207 */
19208 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19209 vm_map_offset_t, entry->vme_start,
19210 vm_map_offset_t, entry->vme_end,
19211 int, cs_ret);
19212 entry->protection &= ~VM_PROT_EXECUTE;
19213 entry->max_protection &= ~VM_PROT_EXECUTE;
19214 }
19215
19216 return cs_ret;
19217 }
19218 #endif /* PMAP_CS */
19219
19220 /*
19221 * FORKED CORPSE FOOTPRINT
19222 *
19223 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19224 * empty since it never ran and never got to fault in any pages.
19225 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19226 * a forked corpse would therefore return very little information.
19227 *
19228 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19229 * to vm_map_fork() to collect footprint information from the original VM map
19230 * and its pmap, and store it in the forked corpse's VM map. That information
19231 * is stored in place of the VM map's "hole list" since we'll never need to
19232 * lookup for holes in the corpse's map.
19233 *
19234 * The corpse's footprint info looks like this:
19235 *
19236 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19237 * as follows:
19238 * +---------------------------------------+
19239 * header-> | cf_size |
19240 * +-------------------+-------------------+
19241 * | cf_last_region | cf_last_zeroes |
19242 * +-------------------+-------------------+
19243 * region1-> | cfr_vaddr |
19244 * +-------------------+-------------------+
19245 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19246 * +---------------------------------------+
19247 * | d4 | d5 | ... |
19248 * +---------------------------------------+
19249 * | ... |
19250 * +-------------------+-------------------+
19251 * | dy | dz | na | na | cfr_vaddr... | <-region2
19252 * +-------------------+-------------------+
19253 * | cfr_vaddr (ctd) | cfr_num_pages |
19254 * +---------------------------------------+
19255 * | d0 | d1 ... |
19256 * +---------------------------------------+
19257 * ...
19258 * +---------------------------------------+
19259 * last region-> | cfr_vaddr |
19260 * +---------------------------------------+
19261 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19262 * +---------------------------------------+
19263 * ...
19264 * +---------------------------------------+
19265 * | dx | dy | dz | na | na | na | na | na |
19266 * +---------------------------------------+
19267 *
19268 * where:
19269 * cf_size: total size of the buffer (rounded to page size)
19270 * cf_last_region: offset in the buffer of the last "region" sub-header
19271 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19272 * of last region
19273 * cfr_vaddr: virtual address of the start of the covered "region"
19274 * cfr_num_pages: number of pages in the covered "region"
19275 * d*: disposition of the page at that virtual address
19276 * Regions in the buffer are word-aligned.
19277 *
19278 * We estimate the size of the buffer based on the number of memory regions
19279 * and the virtual size of the address space. While copying each memory region
19280 * during vm_map_fork(), we also collect the footprint info for that region
19281 * and store it in the buffer, packing it as much as possible (coalescing
19282 * contiguous memory regions to avoid having too many region headers and
19283 * avoiding long streaks of "zero" page dispositions by splitting footprint
19284 * "regions", so the number of regions in the footprint buffer might not match
19285 * the number of memory regions in the address space.
19286 *
19287 * We also have to copy the original task's "nonvolatile" ledgers since that's
19288 * part of the footprint and will need to be reported to any tool asking for
19289 * the footprint information of the forked corpse.
19290 */
19291
19292 uint64_t vm_map_corpse_footprint_count = 0;
19293 uint64_t vm_map_corpse_footprint_size_avg = 0;
19294 uint64_t vm_map_corpse_footprint_size_max = 0;
19295 uint64_t vm_map_corpse_footprint_full = 0;
19296 uint64_t vm_map_corpse_footprint_no_buf = 0;
19297
19298 /*
19299 * vm_map_corpse_footprint_new_region:
19300 * closes the current footprint "region" and creates a new one
19301 *
19302 * Returns NULL if there's not enough space in the buffer for a new region.
19303 */
19304 static struct vm_map_corpse_footprint_region *
19305 vm_map_corpse_footprint_new_region(
19306 struct vm_map_corpse_footprint_header *footprint_header)
19307 {
19308 uintptr_t footprint_edge;
19309 uint32_t new_region_offset;
19310 struct vm_map_corpse_footprint_region *footprint_region;
19311 struct vm_map_corpse_footprint_region *new_footprint_region;
19312
19313 footprint_edge = ((uintptr_t)footprint_header +
19314 footprint_header->cf_size);
19315 footprint_region = ((struct vm_map_corpse_footprint_region *)
19316 ((char *)footprint_header +
19317 footprint_header->cf_last_region));
19318 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19319 footprint_edge);
19320
19321 /* get rid of trailing zeroes in the last region */
19322 assert(footprint_region->cfr_num_pages >=
19323 footprint_header->cf_last_zeroes);
19324 footprint_region->cfr_num_pages -=
19325 footprint_header->cf_last_zeroes;
19326 footprint_header->cf_last_zeroes = 0;
19327
19328 /* reuse this region if it's now empty */
19329 if (footprint_region->cfr_num_pages == 0) {
19330 return footprint_region;
19331 }
19332
19333 /* compute offset of new region */
19334 new_region_offset = footprint_header->cf_last_region;
19335 new_region_offset += sizeof(*footprint_region);
19336 new_region_offset += footprint_region->cfr_num_pages;
19337 new_region_offset = roundup(new_region_offset, sizeof(int));
19338
19339 /* check if we're going over the edge */
19340 if (((uintptr_t)footprint_header +
19341 new_region_offset +
19342 sizeof(*footprint_region)) >=
19343 footprint_edge) {
19344 /* over the edge: no new region */
19345 return NULL;
19346 }
19347
19348 /* adjust offset of last region in header */
19349 footprint_header->cf_last_region = new_region_offset;
19350
19351 new_footprint_region = (struct vm_map_corpse_footprint_region *)
19352 ((char *)footprint_header +
19353 footprint_header->cf_last_region);
19354 new_footprint_region->cfr_vaddr = 0;
19355 new_footprint_region->cfr_num_pages = 0;
19356 /* caller needs to initialize new region */
19357
19358 return new_footprint_region;
19359 }
19360
19361 /*
19362 * vm_map_corpse_footprint_collect:
19363 * collect footprint information for "old_entry" in "old_map" and
19364 * stores it in "new_map"'s vmmap_footprint_info.
19365 */
19366 kern_return_t
19367 vm_map_corpse_footprint_collect(
19368 vm_map_t old_map,
19369 vm_map_entry_t old_entry,
19370 vm_map_t new_map)
19371 {
19372 vm_map_offset_t va;
19373 int disp;
19374 kern_return_t kr;
19375 struct vm_map_corpse_footprint_header *footprint_header;
19376 struct vm_map_corpse_footprint_region *footprint_region;
19377 struct vm_map_corpse_footprint_region *new_footprint_region;
19378 unsigned char *next_disp_p;
19379 uintptr_t footprint_edge;
19380 uint32_t num_pages_tmp;
19381
19382 va = old_entry->vme_start;
19383
19384 vm_map_lock_assert_exclusive(old_map);
19385 vm_map_lock_assert_exclusive(new_map);
19386
19387 assert(new_map->has_corpse_footprint);
19388 assert(!old_map->has_corpse_footprint);
19389 if (!new_map->has_corpse_footprint ||
19390 old_map->has_corpse_footprint) {
19391 /*
19392 * This can only transfer footprint info from a
19393 * map with a live pmap to a map with a corpse footprint.
19394 */
19395 return KERN_NOT_SUPPORTED;
19396 }
19397
19398 if (new_map->vmmap_corpse_footprint == NULL) {
19399 vm_offset_t buf;
19400 vm_size_t buf_size;
19401
19402 buf = 0;
19403 buf_size = (sizeof(*footprint_header) +
19404 (old_map->hdr.nentries
19405 *
19406 (sizeof(*footprint_region) +
19407 +3)) /* potential alignment for each region */
19408 +
19409 ((old_map->size / PAGE_SIZE)
19410 *
19411 sizeof(char))); /* disposition for each page */
19412 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19413 buf_size = round_page(buf_size);
19414
19415 /* limit buffer to 1 page to validate overflow detection */
19416 // buf_size = PAGE_SIZE;
19417
19418 /* limit size to a somewhat sane amount */
19419 #if CONFIG_EMBEDDED
19420 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19421 #else /* CONFIG_EMBEDDED */
19422 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19423 #endif /* CONFIG_EMBEDDED */
19424 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19425 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19426 }
19427
19428 /*
19429 * Allocate the pageable buffer (with a trailing guard page).
19430 * It will be zero-filled on demand.
19431 */
19432 kr = kernel_memory_allocate(kernel_map,
19433 &buf,
19434 (buf_size
19435 + PAGE_SIZE), /* trailing guard page */
19436 0, /* mask */
19437 KMA_PAGEABLE | KMA_GUARD_LAST,
19438 VM_KERN_MEMORY_DIAG);
19439 if (kr != KERN_SUCCESS) {
19440 vm_map_corpse_footprint_no_buf++;
19441 return kr;
19442 }
19443
19444 /* initialize header and 1st region */
19445 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19446 new_map->vmmap_corpse_footprint = footprint_header;
19447
19448 footprint_header->cf_size = buf_size;
19449 footprint_header->cf_last_region =
19450 sizeof(*footprint_header);
19451 footprint_header->cf_last_zeroes = 0;
19452
19453 footprint_region = (struct vm_map_corpse_footprint_region *)
19454 ((char *)footprint_header +
19455 footprint_header->cf_last_region);
19456 footprint_region->cfr_vaddr = 0;
19457 footprint_region->cfr_num_pages = 0;
19458 } else {
19459 /* retrieve header and last region */
19460 footprint_header = (struct vm_map_corpse_footprint_header *)
19461 new_map->vmmap_corpse_footprint;
19462 footprint_region = (struct vm_map_corpse_footprint_region *)
19463 ((char *)footprint_header +
19464 footprint_header->cf_last_region);
19465 }
19466 footprint_edge = ((uintptr_t)footprint_header +
19467 footprint_header->cf_size);
19468
19469 if ((footprint_region->cfr_vaddr +
19470 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19471 PAGE_SIZE))
19472 != old_entry->vme_start) {
19473 uint64_t num_pages_delta;
19474 uint32_t region_offset_delta;
19475
19476 /*
19477 * Not the next contiguous virtual address:
19478 * start a new region or store "zero" dispositions for
19479 * the missing pages?
19480 */
19481 /* size of gap in actual page dispositions */
19482 num_pages_delta = (((old_entry->vme_start -
19483 footprint_region->cfr_vaddr) / PAGE_SIZE)
19484 - footprint_region->cfr_num_pages);
19485 /* size of gap as a new footprint region header */
19486 region_offset_delta =
19487 (sizeof(*footprint_region) +
19488 roundup((footprint_region->cfr_num_pages -
19489 footprint_header->cf_last_zeroes),
19490 sizeof(int)) -
19491 (footprint_region->cfr_num_pages -
19492 footprint_header->cf_last_zeroes));
19493 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19494 if (region_offset_delta < num_pages_delta ||
19495 os_add3_overflow(footprint_region->cfr_num_pages,
19496 (uint32_t) num_pages_delta,
19497 1,
19498 &num_pages_tmp)) {
19499 /*
19500 * Storing data for this gap would take more space
19501 * than inserting a new footprint region header:
19502 * let's start a new region and save space. If it's a
19503 * tie, let's avoid using a new region, since that
19504 * would require more region hops to find the right
19505 * range during lookups.
19506 *
19507 * If the current region's cfr_num_pages would overflow
19508 * if we added "zero" page dispositions for the gap,
19509 * no choice but to start a new region.
19510 */
19511 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19512 new_footprint_region =
19513 vm_map_corpse_footprint_new_region(footprint_header);
19514 /* check that we're not going over the edge */
19515 if (new_footprint_region == NULL) {
19516 goto over_the_edge;
19517 }
19518 footprint_region = new_footprint_region;
19519 /* initialize new region as empty */
19520 footprint_region->cfr_vaddr = old_entry->vme_start;
19521 footprint_region->cfr_num_pages = 0;
19522 } else {
19523 /*
19524 * Store "zero" page dispositions for the missing
19525 * pages.
19526 */
19527 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19528 for (; num_pages_delta > 0; num_pages_delta--) {
19529 next_disp_p =
19530 ((unsigned char *) footprint_region +
19531 sizeof(*footprint_region) +
19532 footprint_region->cfr_num_pages);
19533 /* check that we're not going over the edge */
19534 if ((uintptr_t)next_disp_p >= footprint_edge) {
19535 goto over_the_edge;
19536 }
19537 /* store "zero" disposition for this gap page */
19538 footprint_region->cfr_num_pages++;
19539 *next_disp_p = (unsigned char) 0;
19540 footprint_header->cf_last_zeroes++;
19541 }
19542 }
19543 }
19544
19545 for (va = old_entry->vme_start;
19546 va < old_entry->vme_end;
19547 va += PAGE_SIZE) {
19548 vm_object_t object;
19549
19550 object = VME_OBJECT(old_entry);
19551 if (!old_entry->is_sub_map &&
19552 old_entry->iokit_acct &&
19553 object != VM_OBJECT_NULL &&
19554 object->internal &&
19555 object->purgable == VM_PURGABLE_DENY) {
19556 /*
19557 * Non-purgeable IOKit memory: phys_footprint
19558 * includes the entire virtual mapping.
19559 * Since the forked corpse's VM map entry will not
19560 * have "iokit_acct", pretend that this page's
19561 * disposition is "present & internal", so that it
19562 * shows up in the forked corpse's footprint.
19563 */
19564 disp = (PMAP_QUERY_PAGE_PRESENT |
19565 PMAP_QUERY_PAGE_INTERNAL);
19566 } else {
19567 disp = 0;
19568 pmap_query_page_info(old_map->pmap,
19569 va,
19570 &disp);
19571 }
19572
19573 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19574
19575 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19576 /*
19577 * Ignore "zero" dispositions at start of
19578 * region: just move start of region.
19579 */
19580 footprint_region->cfr_vaddr += PAGE_SIZE;
19581 continue;
19582 }
19583
19584 /* would region's cfr_num_pages overflow? */
19585 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19586 &num_pages_tmp)) {
19587 /* overflow: create a new region */
19588 new_footprint_region =
19589 vm_map_corpse_footprint_new_region(
19590 footprint_header);
19591 if (new_footprint_region == NULL) {
19592 goto over_the_edge;
19593 }
19594 footprint_region = new_footprint_region;
19595 footprint_region->cfr_vaddr = va;
19596 footprint_region->cfr_num_pages = 0;
19597 }
19598
19599 next_disp_p = ((unsigned char *)footprint_region +
19600 sizeof(*footprint_region) +
19601 footprint_region->cfr_num_pages);
19602 /* check that we're not going over the edge */
19603 if ((uintptr_t)next_disp_p >= footprint_edge) {
19604 goto over_the_edge;
19605 }
19606 /* store this dispostion */
19607 *next_disp_p = (unsigned char) disp;
19608 footprint_region->cfr_num_pages++;
19609
19610 if (disp != 0) {
19611 /* non-zero disp: break the current zero streak */
19612 footprint_header->cf_last_zeroes = 0;
19613 /* done */
19614 continue;
19615 }
19616
19617 /* zero disp: add to the current streak of zeroes */
19618 footprint_header->cf_last_zeroes++;
19619 if ((footprint_header->cf_last_zeroes +
19620 roundup((footprint_region->cfr_num_pages -
19621 footprint_header->cf_last_zeroes) &
19622 (sizeof(int) - 1),
19623 sizeof(int))) <
19624 (sizeof(*footprint_header))) {
19625 /*
19626 * There are not enough trailing "zero" dispositions
19627 * (+ the extra padding we would need for the previous
19628 * region); creating a new region would not save space
19629 * at this point, so let's keep this "zero" disposition
19630 * in this region and reconsider later.
19631 */
19632 continue;
19633 }
19634 /*
19635 * Create a new region to avoid having too many consecutive
19636 * "zero" dispositions.
19637 */
19638 new_footprint_region =
19639 vm_map_corpse_footprint_new_region(footprint_header);
19640 if (new_footprint_region == NULL) {
19641 goto over_the_edge;
19642 }
19643 footprint_region = new_footprint_region;
19644 /* initialize the new region as empty ... */
19645 footprint_region->cfr_num_pages = 0;
19646 /* ... and skip this "zero" disp */
19647 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19648 }
19649
19650 return KERN_SUCCESS;
19651
19652 over_the_edge:
19653 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19654 vm_map_corpse_footprint_full++;
19655 return KERN_RESOURCE_SHORTAGE;
19656 }
19657
19658 /*
19659 * vm_map_corpse_footprint_collect_done:
19660 * completes the footprint collection by getting rid of any remaining
19661 * trailing "zero" dispositions and trimming the unused part of the
19662 * kernel buffer
19663 */
19664 void
19665 vm_map_corpse_footprint_collect_done(
19666 vm_map_t new_map)
19667 {
19668 struct vm_map_corpse_footprint_header *footprint_header;
19669 struct vm_map_corpse_footprint_region *footprint_region;
19670 vm_size_t buf_size, actual_size;
19671 kern_return_t kr;
19672
19673 assert(new_map->has_corpse_footprint);
19674 if (!new_map->has_corpse_footprint ||
19675 new_map->vmmap_corpse_footprint == NULL) {
19676 return;
19677 }
19678
19679 footprint_header = (struct vm_map_corpse_footprint_header *)
19680 new_map->vmmap_corpse_footprint;
19681 buf_size = footprint_header->cf_size;
19682
19683 footprint_region = (struct vm_map_corpse_footprint_region *)
19684 ((char *)footprint_header +
19685 footprint_header->cf_last_region);
19686
19687 /* get rid of trailing zeroes in last region */
19688 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19689 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19690 footprint_header->cf_last_zeroes = 0;
19691
19692 actual_size = (vm_size_t)(footprint_header->cf_last_region +
19693 sizeof(*footprint_region) +
19694 footprint_region->cfr_num_pages);
19695
19696 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19697 vm_map_corpse_footprint_size_avg =
19698 (((vm_map_corpse_footprint_size_avg *
19699 vm_map_corpse_footprint_count) +
19700 actual_size) /
19701 (vm_map_corpse_footprint_count + 1));
19702 vm_map_corpse_footprint_count++;
19703 if (actual_size > vm_map_corpse_footprint_size_max) {
19704 vm_map_corpse_footprint_size_max = actual_size;
19705 }
19706
19707 actual_size = round_page(actual_size);
19708 if (buf_size > actual_size) {
19709 kr = vm_deallocate(kernel_map,
19710 ((vm_address_t)footprint_header +
19711 actual_size +
19712 PAGE_SIZE), /* trailing guard page */
19713 (buf_size - actual_size));
19714 assertf(kr == KERN_SUCCESS,
19715 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19716 footprint_header,
19717 (uint64_t) buf_size,
19718 (uint64_t) actual_size,
19719 kr);
19720 kr = vm_protect(kernel_map,
19721 ((vm_address_t)footprint_header +
19722 actual_size),
19723 PAGE_SIZE,
19724 FALSE, /* set_maximum */
19725 VM_PROT_NONE);
19726 assertf(kr == KERN_SUCCESS,
19727 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19728 footprint_header,
19729 (uint64_t) buf_size,
19730 (uint64_t) actual_size,
19731 kr);
19732 }
19733
19734 footprint_header->cf_size = actual_size;
19735 }
19736
19737 /*
19738 * vm_map_corpse_footprint_query_page_info:
19739 * retrieves the disposition of the page at virtual address "vaddr"
19740 * in the forked corpse's VM map
19741 *
19742 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19743 */
19744 kern_return_t
19745 vm_map_corpse_footprint_query_page_info(
19746 vm_map_t map,
19747 vm_map_offset_t va,
19748 int *disp)
19749 {
19750 struct vm_map_corpse_footprint_header *footprint_header;
19751 struct vm_map_corpse_footprint_region *footprint_region;
19752 uint32_t footprint_region_offset;
19753 vm_map_offset_t region_start, region_end;
19754 int disp_idx;
19755 kern_return_t kr;
19756
19757 if (!map->has_corpse_footprint) {
19758 *disp = 0;
19759 kr = KERN_INVALID_ARGUMENT;
19760 goto done;
19761 }
19762
19763 footprint_header = map->vmmap_corpse_footprint;
19764 if (footprint_header == NULL) {
19765 *disp = 0;
19766 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19767 kr = KERN_INVALID_ARGUMENT;
19768 goto done;
19769 }
19770
19771 /* start looking at the hint ("cf_hint_region") */
19772 footprint_region_offset = footprint_header->cf_hint_region;
19773
19774 lookup_again:
19775 if (footprint_region_offset < sizeof(*footprint_header)) {
19776 /* hint too low: start from 1st region */
19777 footprint_region_offset = sizeof(*footprint_header);
19778 }
19779 if (footprint_region_offset >= footprint_header->cf_last_region) {
19780 /* hint too high: re-start from 1st region */
19781 footprint_region_offset = sizeof(*footprint_header);
19782 }
19783 footprint_region = (struct vm_map_corpse_footprint_region *)
19784 ((char *)footprint_header + footprint_region_offset);
19785 region_start = footprint_region->cfr_vaddr;
19786 region_end = (region_start +
19787 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19788 PAGE_SIZE));
19789 if (va < region_start &&
19790 footprint_region_offset != sizeof(*footprint_header)) {
19791 /* our range starts before the hint region */
19792
19793 /* reset the hint (in a racy way...) */
19794 footprint_header->cf_hint_region = sizeof(*footprint_header);
19795 /* lookup "va" again from 1st region */
19796 footprint_region_offset = sizeof(*footprint_header);
19797 goto lookup_again;
19798 }
19799
19800 while (va >= region_end) {
19801 if (footprint_region_offset >= footprint_header->cf_last_region) {
19802 break;
19803 }
19804 /* skip the region's header */
19805 footprint_region_offset += sizeof(*footprint_region);
19806 /* skip the region's page dispositions */
19807 footprint_region_offset += footprint_region->cfr_num_pages;
19808 /* align to next word boundary */
19809 footprint_region_offset =
19810 roundup(footprint_region_offset,
19811 sizeof(int));
19812 footprint_region = (struct vm_map_corpse_footprint_region *)
19813 ((char *)footprint_header + footprint_region_offset);
19814 region_start = footprint_region->cfr_vaddr;
19815 region_end = (region_start +
19816 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19817 PAGE_SIZE));
19818 }
19819 if (va < region_start || va >= region_end) {
19820 /* page not found */
19821 *disp = 0;
19822 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19823 kr = KERN_SUCCESS;
19824 goto done;
19825 }
19826
19827 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19828 footprint_header->cf_hint_region = footprint_region_offset;
19829
19830 /* get page disposition for "va" in this region */
19831 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19832 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19833
19834 kr = KERN_SUCCESS;
19835 done:
19836 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19837 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19838 DTRACE_VM4(footprint_query_page_info,
19839 vm_map_t, map,
19840 vm_map_offset_t, va,
19841 int, *disp,
19842 kern_return_t, kr);
19843
19844 return kr;
19845 }
19846
19847
19848 static void
19849 vm_map_corpse_footprint_destroy(
19850 vm_map_t map)
19851 {
19852 if (map->has_corpse_footprint &&
19853 map->vmmap_corpse_footprint != 0) {
19854 struct vm_map_corpse_footprint_header *footprint_header;
19855 vm_size_t buf_size;
19856 kern_return_t kr;
19857
19858 footprint_header = map->vmmap_corpse_footprint;
19859 buf_size = footprint_header->cf_size;
19860 kr = vm_deallocate(kernel_map,
19861 (vm_offset_t) map->vmmap_corpse_footprint,
19862 ((vm_size_t) buf_size
19863 + PAGE_SIZE)); /* trailing guard page */
19864 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19865 map->vmmap_corpse_footprint = 0;
19866 map->has_corpse_footprint = FALSE;
19867 }
19868 }
19869
19870 /*
19871 * vm_map_copy_footprint_ledgers:
19872 * copies any ledger that's relevant to the memory footprint of "old_task"
19873 * into the forked corpse's task ("new_task")
19874 */
19875 void
19876 vm_map_copy_footprint_ledgers(
19877 task_t old_task,
19878 task_t new_task)
19879 {
19880 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19881 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19882 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19883 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19884 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19885 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19886 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19887 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19888 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19889 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19890 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19891 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19892 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19893 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19894 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19895 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19896 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19897 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19898 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19899 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19900 }
19901
19902 /*
19903 * vm_map_copy_ledger:
19904 * copy a single ledger from "old_task" to "new_task"
19905 */
19906 void
19907 vm_map_copy_ledger(
19908 task_t old_task,
19909 task_t new_task,
19910 int ledger_entry)
19911 {
19912 ledger_amount_t old_balance, new_balance, delta;
19913
19914 assert(new_task->map->has_corpse_footprint);
19915 if (!new_task->map->has_corpse_footprint) {
19916 return;
19917 }
19918
19919 /* turn off sanity checks for the ledger we're about to mess with */
19920 ledger_disable_panic_on_negative(new_task->ledger,
19921 ledger_entry);
19922
19923 /* adjust "new_task" to match "old_task" */
19924 ledger_get_balance(old_task->ledger,
19925 ledger_entry,
19926 &old_balance);
19927 ledger_get_balance(new_task->ledger,
19928 ledger_entry,
19929 &new_balance);
19930 if (new_balance == old_balance) {
19931 /* new == old: done */
19932 } else if (new_balance > old_balance) {
19933 /* new > old ==> new -= new - old */
19934 delta = new_balance - old_balance;
19935 ledger_debit(new_task->ledger,
19936 ledger_entry,
19937 delta);
19938 } else {
19939 /* new < old ==> new += old - new */
19940 delta = old_balance - new_balance;
19941 ledger_credit(new_task->ledger,
19942 ledger_entry,
19943 delta);
19944 }
19945 }
19946
19947 #if MACH_ASSERT
19948
19949 extern int pmap_ledgers_panic;
19950 extern int pmap_ledgers_panic_leeway;
19951
19952 #define LEDGER_DRIFT(__LEDGER) \
19953 int __LEDGER##_over; \
19954 ledger_amount_t __LEDGER##_over_total; \
19955 ledger_amount_t __LEDGER##_over_max; \
19956 int __LEDGER##_under; \
19957 ledger_amount_t __LEDGER##_under_total; \
19958 ledger_amount_t __LEDGER##_under_max
19959
19960 struct {
19961 uint64_t num_pmaps_checked;
19962
19963 LEDGER_DRIFT(phys_footprint);
19964 LEDGER_DRIFT(internal);
19965 LEDGER_DRIFT(internal_compressed);
19966 LEDGER_DRIFT(iokit_mapped);
19967 LEDGER_DRIFT(alternate_accounting);
19968 LEDGER_DRIFT(alternate_accounting_compressed);
19969 LEDGER_DRIFT(page_table);
19970 LEDGER_DRIFT(purgeable_volatile);
19971 LEDGER_DRIFT(purgeable_nonvolatile);
19972 LEDGER_DRIFT(purgeable_volatile_compressed);
19973 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
19974 LEDGER_DRIFT(tagged_nofootprint);
19975 LEDGER_DRIFT(tagged_footprint);
19976 LEDGER_DRIFT(tagged_nofootprint_compressed);
19977 LEDGER_DRIFT(tagged_footprint_compressed);
19978 LEDGER_DRIFT(network_volatile);
19979 LEDGER_DRIFT(network_nonvolatile);
19980 LEDGER_DRIFT(network_volatile_compressed);
19981 LEDGER_DRIFT(network_nonvolatile_compressed);
19982 LEDGER_DRIFT(media_nofootprint);
19983 LEDGER_DRIFT(media_footprint);
19984 LEDGER_DRIFT(media_nofootprint_compressed);
19985 LEDGER_DRIFT(media_footprint_compressed);
19986 LEDGER_DRIFT(graphics_nofootprint);
19987 LEDGER_DRIFT(graphics_footprint);
19988 LEDGER_DRIFT(graphics_nofootprint_compressed);
19989 LEDGER_DRIFT(graphics_footprint_compressed);
19990 LEDGER_DRIFT(neural_nofootprint);
19991 LEDGER_DRIFT(neural_footprint);
19992 LEDGER_DRIFT(neural_nofootprint_compressed);
19993 LEDGER_DRIFT(neural_footprint_compressed);
19994 } pmap_ledgers_drift;
19995
19996 void
19997 vm_map_pmap_check_ledgers(
19998 pmap_t pmap,
19999 ledger_t ledger,
20000 int pid,
20001 char *procname)
20002 {
20003 ledger_amount_t bal;
20004 boolean_t do_panic;
20005
20006 do_panic = FALSE;
20007
20008 pmap_ledgers_drift.num_pmaps_checked++;
20009
20010 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20011 MACRO_BEGIN \
20012 int panic_on_negative = TRUE; \
20013 ledger_get_balance(ledger, \
20014 task_ledgers.__LEDGER, \
20015 &bal); \
20016 ledger_get_panic_on_negative(ledger, \
20017 task_ledgers.__LEDGER, \
20018 &panic_on_negative); \
20019 if (bal != 0) { \
20020 if (panic_on_negative || \
20021 (pmap_ledgers_panic && \
20022 pmap_ledgers_panic_leeway > 0 && \
20023 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20024 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20025 do_panic = TRUE; \
20026 } \
20027 printf("LEDGER BALANCE proc %d (%s) " \
20028 "\"%s\" = %lld\n", \
20029 pid, procname, #__LEDGER, bal); \
20030 if (bal > 0) { \
20031 pmap_ledgers_drift.__LEDGER##_over++; \
20032 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20033 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20034 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20035 } \
20036 } else if (bal < 0) { \
20037 pmap_ledgers_drift.__LEDGER##_under++; \
20038 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20039 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20040 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20041 } \
20042 } \
20043 } \
20044 MACRO_END
20045
20046 LEDGER_CHECK_BALANCE(phys_footprint);
20047 LEDGER_CHECK_BALANCE(internal);
20048 LEDGER_CHECK_BALANCE(internal_compressed);
20049 LEDGER_CHECK_BALANCE(iokit_mapped);
20050 LEDGER_CHECK_BALANCE(alternate_accounting);
20051 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20052 LEDGER_CHECK_BALANCE(page_table);
20053 LEDGER_CHECK_BALANCE(purgeable_volatile);
20054 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20055 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20056 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20057 LEDGER_CHECK_BALANCE(tagged_nofootprint);
20058 LEDGER_CHECK_BALANCE(tagged_footprint);
20059 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20060 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20061 LEDGER_CHECK_BALANCE(network_volatile);
20062 LEDGER_CHECK_BALANCE(network_nonvolatile);
20063 LEDGER_CHECK_BALANCE(network_volatile_compressed);
20064 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20065 LEDGER_CHECK_BALANCE(media_nofootprint);
20066 LEDGER_CHECK_BALANCE(media_footprint);
20067 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20068 LEDGER_CHECK_BALANCE(media_footprint_compressed);
20069 LEDGER_CHECK_BALANCE(graphics_nofootprint);
20070 LEDGER_CHECK_BALANCE(graphics_footprint);
20071 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20072 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20073 LEDGER_CHECK_BALANCE(neural_nofootprint);
20074 LEDGER_CHECK_BALANCE(neural_footprint);
20075 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20076 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20077
20078 if (do_panic) {
20079 if (pmap_ledgers_panic) {
20080 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20081 pmap, pid, procname);
20082 } else {
20083 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20084 pmap, pid, procname);
20085 }
20086 }
20087 }
20088 #endif /* MACH_ASSERT */