]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
d130132c30bd3b34ab6251047e6b1e0ce9a688b9
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/backtrace.h>
86 #include <kern/counters.h>
87 #include <kern/exc_guard.h>
88 #include <kern/kalloc.h>
89 #include <kern/zalloc.h>
90
91 #include <vm/cpm.h>
92 #include <vm/vm_compressor.h>
93 #include <vm/vm_compressor_pager.h>
94 #include <vm/vm_init.h>
95 #include <vm/vm_fault.h>
96 #include <vm/vm_map.h>
97 #include <vm/vm_object.h>
98 #include <vm/vm_page.h>
99 #include <vm/vm_pageout.h>
100 #include <vm/pmap.h>
101 #include <vm/vm_kern.h>
102 #include <ipc/ipc_port.h>
103 #include <kern/sched_prim.h>
104 #include <kern/misc_protos.h>
105
106 #include <mach/vm_map_server.h>
107 #include <mach/mach_host_server.h>
108 #include <vm/vm_protos.h>
109 #include <vm/vm_purgeable_internal.h>
110
111 #include <vm/vm_protos.h>
112 #include <vm/vm_shared_region.h>
113 #include <vm/vm_map_store.h>
114
115 #include <san/kasan.h>
116
117 #include <sys/codesign.h>
118 #include <libkern/section_keywords.h>
119 #if DEVELOPMENT || DEBUG
120 extern int proc_selfcsflags(void);
121 #if CONFIG_EMBEDDED
122 extern int panic_on_unsigned_execute;
123 #endif /* CONFIG_EMBEDDED */
124 #endif /* DEVELOPMENT || DEBUG */
125
126 #if __arm64__
127 extern const int fourk_binary_compatibility_unsafe;
128 extern const int fourk_binary_compatibility_allow_wx;
129 #endif /* __arm64__ */
130 extern int proc_selfpid(void);
131 extern char *proc_name_address(void *p);
132
133 #if VM_MAP_DEBUG_APPLE_PROTECT
134 int vm_map_debug_apple_protect = 0;
135 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
136 #if VM_MAP_DEBUG_FOURK
137 int vm_map_debug_fourk = 0;
138 #endif /* VM_MAP_DEBUG_FOURK */
139
140 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
141 int vm_map_executable_immutable_verbose = 0;
142
143 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
144
145 extern u_int32_t random(void); /* from <libkern/libkern.h> */
146 /* Internal prototypes
147 */
148
149 static void vm_map_simplify_range(
150 vm_map_t map,
151 vm_map_offset_t start,
152 vm_map_offset_t end); /* forward */
153
154 static boolean_t vm_map_range_check(
155 vm_map_t map,
156 vm_map_offset_t start,
157 vm_map_offset_t end,
158 vm_map_entry_t *entry);
159
160 static vm_map_entry_t _vm_map_entry_create(
161 struct vm_map_header *map_header, boolean_t map_locked);
162
163 static void _vm_map_entry_dispose(
164 struct vm_map_header *map_header,
165 vm_map_entry_t entry);
166
167 static void vm_map_pmap_enter(
168 vm_map_t map,
169 vm_map_offset_t addr,
170 vm_map_offset_t end_addr,
171 vm_object_t object,
172 vm_object_offset_t offset,
173 vm_prot_t protection);
174
175 static void _vm_map_clip_end(
176 struct vm_map_header *map_header,
177 vm_map_entry_t entry,
178 vm_map_offset_t end);
179
180 static void _vm_map_clip_start(
181 struct vm_map_header *map_header,
182 vm_map_entry_t entry,
183 vm_map_offset_t start);
184
185 static void vm_map_entry_delete(
186 vm_map_t map,
187 vm_map_entry_t entry);
188
189 static kern_return_t vm_map_delete(
190 vm_map_t map,
191 vm_map_offset_t start,
192 vm_map_offset_t end,
193 int flags,
194 vm_map_t zap_map);
195
196 static void vm_map_copy_insert(
197 vm_map_t map,
198 vm_map_entry_t after_where,
199 vm_map_copy_t copy);
200
201 static kern_return_t vm_map_copy_overwrite_unaligned(
202 vm_map_t dst_map,
203 vm_map_entry_t entry,
204 vm_map_copy_t copy,
205 vm_map_address_t start,
206 boolean_t discard_on_success);
207
208 static kern_return_t vm_map_copy_overwrite_aligned(
209 vm_map_t dst_map,
210 vm_map_entry_t tmp_entry,
211 vm_map_copy_t copy,
212 vm_map_offset_t start,
213 pmap_t pmap);
214
215 static kern_return_t vm_map_copyin_kernel_buffer(
216 vm_map_t src_map,
217 vm_map_address_t src_addr,
218 vm_map_size_t len,
219 boolean_t src_destroy,
220 vm_map_copy_t *copy_result); /* OUT */
221
222 static kern_return_t vm_map_copyout_kernel_buffer(
223 vm_map_t map,
224 vm_map_address_t *addr, /* IN/OUT */
225 vm_map_copy_t copy,
226 vm_map_size_t copy_size,
227 boolean_t overwrite,
228 boolean_t consume_on_success);
229
230 static void vm_map_fork_share(
231 vm_map_t old_map,
232 vm_map_entry_t old_entry,
233 vm_map_t new_map);
234
235 static boolean_t vm_map_fork_copy(
236 vm_map_t old_map,
237 vm_map_entry_t *old_entry_p,
238 vm_map_t new_map,
239 int vm_map_copyin_flags);
240
241 static kern_return_t vm_map_wire_nested(
242 vm_map_t map,
243 vm_map_offset_t start,
244 vm_map_offset_t end,
245 vm_prot_t caller_prot,
246 vm_tag_t tag,
247 boolean_t user_wire,
248 pmap_t map_pmap,
249 vm_map_offset_t pmap_addr,
250 ppnum_t *physpage_p);
251
252 static kern_return_t vm_map_unwire_nested(
253 vm_map_t map,
254 vm_map_offset_t start,
255 vm_map_offset_t end,
256 boolean_t user_wire,
257 pmap_t map_pmap,
258 vm_map_offset_t pmap_addr);
259
260 static kern_return_t vm_map_overwrite_submap_recurse(
261 vm_map_t dst_map,
262 vm_map_offset_t dst_addr,
263 vm_map_size_t dst_size);
264
265 static kern_return_t vm_map_copy_overwrite_nested(
266 vm_map_t dst_map,
267 vm_map_offset_t dst_addr,
268 vm_map_copy_t copy,
269 boolean_t interruptible,
270 pmap_t pmap,
271 boolean_t discard_on_success);
272
273 static kern_return_t vm_map_remap_extract(
274 vm_map_t map,
275 vm_map_offset_t addr,
276 vm_map_size_t size,
277 boolean_t copy,
278 struct vm_map_header *map_header,
279 vm_prot_t *cur_protection,
280 vm_prot_t *max_protection,
281 vm_inherit_t inheritance,
282 boolean_t pageable,
283 boolean_t same_map,
284 vm_map_kernel_flags_t vmk_flags);
285
286 static kern_return_t vm_map_remap_range_allocate(
287 vm_map_t map,
288 vm_map_address_t *address,
289 vm_map_size_t size,
290 vm_map_offset_t mask,
291 int flags,
292 vm_map_kernel_flags_t vmk_flags,
293 vm_tag_t tag,
294 vm_map_entry_t *map_entry);
295
296 static void vm_map_region_look_for_page(
297 vm_map_t map,
298 vm_map_offset_t va,
299 vm_object_t object,
300 vm_object_offset_t offset,
301 int max_refcnt,
302 int depth,
303 vm_region_extended_info_t extended,
304 mach_msg_type_number_t count);
305
306 static int vm_map_region_count_obj_refs(
307 vm_map_entry_t entry,
308 vm_object_t object);
309
310
311 static kern_return_t vm_map_willneed(
312 vm_map_t map,
313 vm_map_offset_t start,
314 vm_map_offset_t end);
315
316 static kern_return_t vm_map_reuse_pages(
317 vm_map_t map,
318 vm_map_offset_t start,
319 vm_map_offset_t end);
320
321 static kern_return_t vm_map_reusable_pages(
322 vm_map_t map,
323 vm_map_offset_t start,
324 vm_map_offset_t end);
325
326 static kern_return_t vm_map_can_reuse(
327 vm_map_t map,
328 vm_map_offset_t start,
329 vm_map_offset_t end);
330
331 #if MACH_ASSERT
332 static kern_return_t vm_map_pageout(
333 vm_map_t map,
334 vm_map_offset_t start,
335 vm_map_offset_t end);
336 #endif /* MACH_ASSERT */
337
338 static void vm_map_corpse_footprint_destroy(
339 vm_map_t map);
340
341 pid_t find_largest_process_vm_map_entries(void);
342
343 /*
344 * Macros to copy a vm_map_entry. We must be careful to correctly
345 * manage the wired page count. vm_map_entry_copy() creates a new
346 * map entry to the same memory - the wired count in the new entry
347 * must be set to zero. vm_map_entry_copy_full() creates a new
348 * entry that is identical to the old entry. This preserves the
349 * wire count; it's used for map splitting and zone changing in
350 * vm_map_copyout.
351 */
352
353 #if CONFIG_EMBEDDED
354
355 /*
356 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
357 * But for security reasons on embedded platforms, we don't want the
358 * new mapping to be "used for jit", so we always reset the flag here.
359 * Same for "pmap_cs_associated".
360 */
361 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
362 MACRO_BEGIN \
363 (NEW)->used_for_jit = FALSE; \
364 (NEW)->pmap_cs_associated = FALSE; \
365 MACRO_END
366
367 #else /* CONFIG_EMBEDDED */
368
369 /*
370 * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
371 * On macOS, the new mapping can be "used for jit".
372 */
373 #define VM_MAP_ENTRY_COPY_CODE_SIGNING(NEW, OLD) \
374 MACRO_BEGIN \
375 assert((NEW)->used_for_jit == (OLD)->used_for_jit); \
376 assert((NEW)->pmap_cs_associated == FALSE); \
377 MACRO_END
378
379 #endif /* CONFIG_EMBEDDED */
380
381 #define vm_map_entry_copy(NEW, OLD) \
382 MACRO_BEGIN \
383 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
384 *(NEW) = *(OLD); \
385 (NEW)->is_shared = FALSE; \
386 (NEW)->needs_wakeup = FALSE; \
387 (NEW)->in_transition = FALSE; \
388 (NEW)->wired_count = 0; \
389 (NEW)->user_wired_count = 0; \
390 (NEW)->permanent = FALSE; \
391 VM_MAP_ENTRY_COPY_CODE_SIGNING((NEW),(OLD)); \
392 (NEW)->from_reserved_zone = _vmec_reserved; \
393 if ((NEW)->iokit_acct) { \
394 assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
395 (NEW)->iokit_acct = FALSE; \
396 (NEW)->use_pmap = TRUE; \
397 } \
398 (NEW)->vme_resilient_codesign = FALSE; \
399 (NEW)->vme_resilient_media = FALSE; \
400 (NEW)->vme_atomic = FALSE; \
401 (NEW)->vme_no_copy_on_read = FALSE; \
402 MACRO_END
403
404 #define vm_map_entry_copy_full(NEW, OLD) \
405 MACRO_BEGIN \
406 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
407 (*(NEW) = *(OLD)); \
408 (NEW)->from_reserved_zone = _vmecf_reserved; \
409 MACRO_END
410
411 /*
412 * Normal lock_read_to_write() returns FALSE/0 on failure.
413 * These functions evaluate to zero on success and non-zero value on failure.
414 */
415 __attribute__((always_inline))
416 int
417 vm_map_lock_read_to_write(vm_map_t map)
418 {
419 if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
420 DTRACE_VM(vm_map_lock_upgrade);
421 return 0;
422 }
423 return 1;
424 }
425
426 __attribute__((always_inline))
427 boolean_t
428 vm_map_try_lock(vm_map_t map)
429 {
430 if (lck_rw_try_lock_exclusive(&(map)->lock)) {
431 DTRACE_VM(vm_map_lock_w);
432 return TRUE;
433 }
434 return FALSE;
435 }
436
437 __attribute__((always_inline))
438 boolean_t
439 vm_map_try_lock_read(vm_map_t map)
440 {
441 if (lck_rw_try_lock_shared(&(map)->lock)) {
442 DTRACE_VM(vm_map_lock_r);
443 return TRUE;
444 }
445 return FALSE;
446 }
447
448 /*
449 * Decide if we want to allow processes to execute from their data or stack areas.
450 * override_nx() returns true if we do. Data/stack execution can be enabled independently
451 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
452 * or allow_stack_exec to enable data execution for that type of data area for that particular
453 * ABI (or both by or'ing the flags together). These are initialized in the architecture
454 * specific pmap files since the default behavior varies according to architecture. The
455 * main reason it varies is because of the need to provide binary compatibility with old
456 * applications that were written before these restrictions came into being. In the old
457 * days, an app could execute anything it could read, but this has slowly been tightened
458 * up over time. The default behavior is:
459 *
460 * 32-bit PPC apps may execute from both stack and data areas
461 * 32-bit Intel apps may exeucte from data areas but not stack
462 * 64-bit PPC/Intel apps may not execute from either data or stack
463 *
464 * An application on any architecture may override these defaults by explicitly
465 * adding PROT_EXEC permission to the page in question with the mprotect(2)
466 * system call. This code here just determines what happens when an app tries to
467 * execute from a page that lacks execute permission.
468 *
469 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
470 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
471 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
472 * execution from data areas for a particular binary even if the arch normally permits it. As
473 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
474 * to support some complicated use cases, notably browsers with out-of-process plugins that
475 * are not all NX-safe.
476 */
477
478 extern int allow_data_exec, allow_stack_exec;
479
480 int
481 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
482 {
483 int current_abi;
484
485 if (map->pmap == kernel_pmap) {
486 return FALSE;
487 }
488
489 /*
490 * Determine if the app is running in 32 or 64 bit mode.
491 */
492
493 if (vm_map_is_64bit(map)) {
494 current_abi = VM_ABI_64;
495 } else {
496 current_abi = VM_ABI_32;
497 }
498
499 /*
500 * Determine if we should allow the execution based on whether it's a
501 * stack or data area and the current architecture.
502 */
503
504 if (user_tag == VM_MEMORY_STACK) {
505 return allow_stack_exec & current_abi;
506 }
507
508 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
509 }
510
511
512 /*
513 * Virtual memory maps provide for the mapping, protection,
514 * and sharing of virtual memory objects. In addition,
515 * this module provides for an efficient virtual copy of
516 * memory from one map to another.
517 *
518 * Synchronization is required prior to most operations.
519 *
520 * Maps consist of an ordered doubly-linked list of simple
521 * entries; a single hint is used to speed up lookups.
522 *
523 * Sharing maps have been deleted from this version of Mach.
524 * All shared objects are now mapped directly into the respective
525 * maps. This requires a change in the copy on write strategy;
526 * the asymmetric (delayed) strategy is used for shared temporary
527 * objects instead of the symmetric (shadow) strategy. All maps
528 * are now "top level" maps (either task map, kernel map or submap
529 * of the kernel map).
530 *
531 * Since portions of maps are specified by start/end addreses,
532 * which may not align with existing map entries, all
533 * routines merely "clip" entries to these start/end values.
534 * [That is, an entry is split into two, bordering at a
535 * start or end value.] Note that these clippings may not
536 * always be necessary (as the two resulting entries are then
537 * not changed); however, the clipping is done for convenience.
538 * No attempt is currently made to "glue back together" two
539 * abutting entries.
540 *
541 * The symmetric (shadow) copy strategy implements virtual copy
542 * by copying VM object references from one map to
543 * another, and then marking both regions as copy-on-write.
544 * It is important to note that only one writeable reference
545 * to a VM object region exists in any map when this strategy
546 * is used -- this means that shadow object creation can be
547 * delayed until a write operation occurs. The symmetric (delayed)
548 * strategy allows multiple maps to have writeable references to
549 * the same region of a vm object, and hence cannot delay creating
550 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
551 * Copying of permanent objects is completely different; see
552 * vm_object_copy_strategically() in vm_object.c.
553 */
554
555 static zone_t vm_map_zone; /* zone for vm_map structures */
556 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
557 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
558 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
559 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
560
561
562 /*
563 * Placeholder object for submap operations. This object is dropped
564 * into the range by a call to vm_map_find, and removed when
565 * vm_map_submap creates the submap.
566 */
567
568 vm_object_t vm_submap_object;
569
570 static void *map_data;
571 static vm_size_t map_data_size;
572 static void *kentry_data;
573 static vm_size_t kentry_data_size;
574 static void *map_holes_data;
575 static vm_size_t map_holes_data_size;
576
577 #if CONFIG_EMBEDDED
578 #define NO_COALESCE_LIMIT 0
579 #else
580 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
581 #endif
582
583 /* Skip acquiring locks if we're in the midst of a kernel core dump */
584 unsigned int not_in_kdp = 1;
585
586 unsigned int vm_map_set_cache_attr_count = 0;
587
588 kern_return_t
589 vm_map_set_cache_attr(
590 vm_map_t map,
591 vm_map_offset_t va)
592 {
593 vm_map_entry_t map_entry;
594 vm_object_t object;
595 kern_return_t kr = KERN_SUCCESS;
596
597 vm_map_lock_read(map);
598
599 if (!vm_map_lookup_entry(map, va, &map_entry) ||
600 map_entry->is_sub_map) {
601 /*
602 * that memory is not properly mapped
603 */
604 kr = KERN_INVALID_ARGUMENT;
605 goto done;
606 }
607 object = VME_OBJECT(map_entry);
608
609 if (object == VM_OBJECT_NULL) {
610 /*
611 * there should be a VM object here at this point
612 */
613 kr = KERN_INVALID_ARGUMENT;
614 goto done;
615 }
616 vm_object_lock(object);
617 object->set_cache_attr = TRUE;
618 vm_object_unlock(object);
619
620 vm_map_set_cache_attr_count++;
621 done:
622 vm_map_unlock_read(map);
623
624 return kr;
625 }
626
627
628 #if CONFIG_CODE_DECRYPTION
629 /*
630 * vm_map_apple_protected:
631 * This remaps the requested part of the object with an object backed by
632 * the decrypting pager.
633 * crypt_info contains entry points and session data for the crypt module.
634 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
635 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
636 */
637 kern_return_t
638 vm_map_apple_protected(
639 vm_map_t map,
640 vm_map_offset_t start,
641 vm_map_offset_t end,
642 vm_object_offset_t crypto_backing_offset,
643 struct pager_crypt_info *crypt_info)
644 {
645 boolean_t map_locked;
646 kern_return_t kr;
647 vm_map_entry_t map_entry;
648 struct vm_map_entry tmp_entry;
649 memory_object_t unprotected_mem_obj;
650 vm_object_t protected_object;
651 vm_map_offset_t map_addr;
652 vm_map_offset_t start_aligned, end_aligned;
653 vm_object_offset_t crypto_start, crypto_end;
654 int vm_flags;
655 vm_map_kernel_flags_t vmk_flags;
656
657 vm_flags = 0;
658 vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
659
660 map_locked = FALSE;
661 unprotected_mem_obj = MEMORY_OBJECT_NULL;
662
663 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
664 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
665 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
666 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
667
668 #if __arm64__
669 /*
670 * "start" and "end" might be 4K-aligned but not 16K-aligned,
671 * so we might have to loop and establish up to 3 mappings:
672 *
673 * + the first 16K-page, which might overlap with the previous
674 * 4K-aligned mapping,
675 * + the center,
676 * + the last 16K-page, which might overlap with the next
677 * 4K-aligned mapping.
678 * Each of these mapping might be backed by a vnode pager (if
679 * properly page-aligned) or a "fourk_pager", itself backed by a
680 * vnode pager (if 4K-aligned but not page-aligned).
681 */
682 #endif /* __arm64__ */
683
684 map_addr = start_aligned;
685 for (map_addr = start_aligned;
686 map_addr < end;
687 map_addr = tmp_entry.vme_end) {
688 vm_map_lock(map);
689 map_locked = TRUE;
690
691 /* lookup the protected VM object */
692 if (!vm_map_lookup_entry(map,
693 map_addr,
694 &map_entry) ||
695 map_entry->is_sub_map ||
696 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
697 !(map_entry->protection & VM_PROT_EXECUTE)) {
698 /* that memory is not properly mapped */
699 kr = KERN_INVALID_ARGUMENT;
700 goto done;
701 }
702
703 /* get the protected object to be decrypted */
704 protected_object = VME_OBJECT(map_entry);
705 if (protected_object == VM_OBJECT_NULL) {
706 /* there should be a VM object here at this point */
707 kr = KERN_INVALID_ARGUMENT;
708 goto done;
709 }
710 /* ensure protected object stays alive while map is unlocked */
711 vm_object_reference(protected_object);
712
713 /* limit the map entry to the area we want to cover */
714 vm_map_clip_start(map, map_entry, start_aligned);
715 vm_map_clip_end(map, map_entry, end_aligned);
716
717 tmp_entry = *map_entry;
718 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
719 vm_map_unlock(map);
720 map_locked = FALSE;
721
722 /*
723 * This map entry might be only partially encrypted
724 * (if not fully "page-aligned").
725 */
726 crypto_start = 0;
727 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
728 if (tmp_entry.vme_start < start) {
729 if (tmp_entry.vme_start != start_aligned) {
730 kr = KERN_INVALID_ADDRESS;
731 }
732 crypto_start += (start - tmp_entry.vme_start);
733 }
734 if (tmp_entry.vme_end > end) {
735 if (tmp_entry.vme_end != end_aligned) {
736 kr = KERN_INVALID_ADDRESS;
737 }
738 crypto_end -= (tmp_entry.vme_end - end);
739 }
740
741 /*
742 * This "extra backing offset" is needed to get the decryption
743 * routine to use the right key. It adjusts for the possibly
744 * relative offset of an interposed "4K" pager...
745 */
746 if (crypto_backing_offset == (vm_object_offset_t) -1) {
747 crypto_backing_offset = VME_OFFSET(&tmp_entry);
748 }
749
750 /*
751 * Lookup (and create if necessary) the protected memory object
752 * matching that VM object.
753 * If successful, this also grabs a reference on the memory object,
754 * to guarantee that it doesn't go away before we get a chance to map
755 * it.
756 */
757 unprotected_mem_obj = apple_protect_pager_setup(
758 protected_object,
759 VME_OFFSET(&tmp_entry),
760 crypto_backing_offset,
761 crypt_info,
762 crypto_start,
763 crypto_end);
764
765 /* release extra ref on protected object */
766 vm_object_deallocate(protected_object);
767
768 if (unprotected_mem_obj == NULL) {
769 kr = KERN_FAILURE;
770 goto done;
771 }
772
773 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
774 /* can overwrite an immutable mapping */
775 vmk_flags.vmkf_overwrite_immutable = TRUE;
776 #if __arm64__
777 if (tmp_entry.used_for_jit &&
778 (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
779 PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
780 fourk_binary_compatibility_unsafe &&
781 fourk_binary_compatibility_allow_wx) {
782 printf("** FOURK_COMPAT [%d]: "
783 "allowing write+execute at 0x%llx\n",
784 proc_selfpid(), tmp_entry.vme_start);
785 vmk_flags.vmkf_map_jit = TRUE;
786 }
787 #endif /* __arm64__ */
788
789 /* map this memory object in place of the current one */
790 map_addr = tmp_entry.vme_start;
791 kr = vm_map_enter_mem_object(map,
792 &map_addr,
793 (tmp_entry.vme_end -
794 tmp_entry.vme_start),
795 (mach_vm_offset_t) 0,
796 vm_flags,
797 vmk_flags,
798 VM_KERN_MEMORY_NONE,
799 (ipc_port_t)(uintptr_t) unprotected_mem_obj,
800 0,
801 TRUE,
802 tmp_entry.protection,
803 tmp_entry.max_protection,
804 tmp_entry.inheritance);
805 assertf(kr == KERN_SUCCESS,
806 "kr = 0x%x\n", kr);
807 assertf(map_addr == tmp_entry.vme_start,
808 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
809 (uint64_t)map_addr,
810 (uint64_t) tmp_entry.vme_start,
811 &tmp_entry);
812
813 #if VM_MAP_DEBUG_APPLE_PROTECT
814 if (vm_map_debug_apple_protect) {
815 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
816 " backing:[object:%p,offset:0x%llx,"
817 "crypto_backing_offset:0x%llx,"
818 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
819 map,
820 (uint64_t) map_addr,
821 (uint64_t) (map_addr + (tmp_entry.vme_end -
822 tmp_entry.vme_start)),
823 unprotected_mem_obj,
824 protected_object,
825 VME_OFFSET(&tmp_entry),
826 crypto_backing_offset,
827 crypto_start,
828 crypto_end);
829 }
830 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
831
832 /*
833 * Release the reference obtained by
834 * apple_protect_pager_setup().
835 * The mapping (if it succeeded) is now holding a reference on
836 * the memory object.
837 */
838 memory_object_deallocate(unprotected_mem_obj);
839 unprotected_mem_obj = MEMORY_OBJECT_NULL;
840
841 /* continue with next map entry */
842 crypto_backing_offset += (tmp_entry.vme_end -
843 tmp_entry.vme_start);
844 crypto_backing_offset -= crypto_start;
845 }
846 kr = KERN_SUCCESS;
847
848 done:
849 if (map_locked) {
850 vm_map_unlock(map);
851 }
852 return kr;
853 }
854 #endif /* CONFIG_CODE_DECRYPTION */
855
856
857 lck_grp_t vm_map_lck_grp;
858 lck_grp_attr_t vm_map_lck_grp_attr;
859 lck_attr_t vm_map_lck_attr;
860 lck_attr_t vm_map_lck_rw_attr;
861
862 #if CONFIG_EMBEDDED
863 int malloc_no_cow = 1;
864 #define VM_PROTECT_WX_FAIL 0
865 #else /* CONFIG_EMBEDDED */
866 int malloc_no_cow = 0;
867 #define VM_PROTECT_WX_FAIL 1
868 #endif /* CONFIG_EMBEDDED */
869 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
870
871 /*
872 * vm_map_init:
873 *
874 * Initialize the vm_map module. Must be called before
875 * any other vm_map routines.
876 *
877 * Map and entry structures are allocated from zones -- we must
878 * initialize those zones.
879 *
880 * There are three zones of interest:
881 *
882 * vm_map_zone: used to allocate maps.
883 * vm_map_entry_zone: used to allocate map entries.
884 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
885 *
886 * The kernel allocates map entries from a special zone that is initially
887 * "crammed" with memory. It would be difficult (perhaps impossible) for
888 * the kernel to allocate more memory to a entry zone when it became
889 * empty since the very act of allocating memory implies the creation
890 * of a new entry.
891 */
892 void
893 vm_map_init(
894 void)
895 {
896 vm_size_t entry_zone_alloc_size;
897 const char *mez_name = "VM map entries";
898
899 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40 * 1024,
900 PAGE_SIZE, "maps");
901 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
902 #if defined(__LP64__)
903 entry_zone_alloc_size = PAGE_SIZE * 5;
904 #else
905 entry_zone_alloc_size = PAGE_SIZE * 6;
906 #endif
907 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
908 1024 * 1024, entry_zone_alloc_size,
909 mez_name);
910 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
911 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
912 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
913
914 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
915 kentry_data_size * 64, kentry_data_size,
916 "Reserved VM map entries");
917 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
918 /* Don't quarantine because we always need elements available */
919 zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
920
921 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
922 16 * 1024, PAGE_SIZE, "VM map copies");
923 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
924
925 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
926 16 * 1024, PAGE_SIZE, "VM map holes");
927 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
928
929 /*
930 * Cram the map and kentry zones with initial data.
931 * Set reserved_zone non-collectible to aid zone_gc().
932 */
933 zone_change(vm_map_zone, Z_COLLECT, FALSE);
934 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
935 zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
936
937 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
938 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
939 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
940 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
941 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
942 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
943 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
944
945 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
946 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
947 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
948 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
949 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
950 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
951
952 /*
953 * Add the stolen memory to zones, adjust zone size and stolen counts.
954 * zcram only up to the maximum number of pages for each zone chunk.
955 */
956 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
957
958 const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
959 for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
960 zcram(vm_map_entry_reserved_zone,
961 (vm_offset_t)kentry_data + off,
962 MIN(kentry_data_size - off, stride));
963 }
964 for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
965 zcram(vm_map_holes_zone,
966 (vm_offset_t)map_holes_data + off,
967 MIN(map_holes_data_size - off, stride));
968 }
969
970 /*
971 * Since these are covered by zones, remove them from stolen page accounting.
972 */
973 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
974
975 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
976 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
977 lck_attr_setdefault(&vm_map_lck_attr);
978
979 lck_attr_setdefault(&vm_map_lck_rw_attr);
980 lck_attr_cleardebug(&vm_map_lck_rw_attr);
981
982 #if VM_MAP_DEBUG_APPLE_PROTECT
983 PE_parse_boot_argn("vm_map_debug_apple_protect",
984 &vm_map_debug_apple_protect,
985 sizeof(vm_map_debug_apple_protect));
986 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
987 #if VM_MAP_DEBUG_APPLE_FOURK
988 PE_parse_boot_argn("vm_map_debug_fourk",
989 &vm_map_debug_fourk,
990 sizeof(vm_map_debug_fourk));
991 #endif /* VM_MAP_DEBUG_FOURK */
992 PE_parse_boot_argn("vm_map_executable_immutable",
993 &vm_map_executable_immutable,
994 sizeof(vm_map_executable_immutable));
995 PE_parse_boot_argn("vm_map_executable_immutable_verbose",
996 &vm_map_executable_immutable_verbose,
997 sizeof(vm_map_executable_immutable_verbose));
998
999 PE_parse_boot_argn("malloc_no_cow",
1000 &malloc_no_cow,
1001 sizeof(malloc_no_cow));
1002 if (malloc_no_cow) {
1003 vm_memory_malloc_no_cow_mask = 0ULL;
1004 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1005 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1006 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1007 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1008 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1009 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1010 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1011 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1012 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1013 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1014 // vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1015 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1016 &vm_memory_malloc_no_cow_mask,
1017 sizeof(vm_memory_malloc_no_cow_mask));
1018 }
1019 }
1020
1021 void
1022 vm_map_steal_memory(
1023 void)
1024 {
1025 uint32_t kentry_initial_pages;
1026
1027 map_data_size = round_page(10 * sizeof(struct _vm_map));
1028 map_data = pmap_steal_memory(map_data_size);
1029
1030 /*
1031 * kentry_initial_pages corresponds to the number of kernel map entries
1032 * required during bootstrap until the asynchronous replenishment
1033 * scheme is activated and/or entries are available from the general
1034 * map entry pool.
1035 */
1036 #if defined(__LP64__)
1037 kentry_initial_pages = 10;
1038 #else
1039 kentry_initial_pages = 6;
1040 #endif
1041
1042 #if CONFIG_GZALLOC
1043 /* If using the guard allocator, reserve more memory for the kernel
1044 * reserved map entry pool.
1045 */
1046 if (gzalloc_enabled()) {
1047 kentry_initial_pages *= 1024;
1048 }
1049 #endif
1050
1051 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1052 kentry_data = pmap_steal_memory(kentry_data_size);
1053
1054 map_holes_data_size = kentry_data_size;
1055 map_holes_data = pmap_steal_memory(map_holes_data_size);
1056 }
1057
1058 boolean_t vm_map_supports_hole_optimization = FALSE;
1059
1060 void
1061 vm_kernel_reserved_entry_init(void)
1062 {
1063 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_entry));
1064
1065 /*
1066 * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1067 */
1068 zone_prio_refill_configure(vm_map_holes_zone, (6 * PAGE_SIZE) / sizeof(struct vm_map_links));
1069 vm_map_supports_hole_optimization = TRUE;
1070 }
1071
1072 void
1073 vm_map_disable_hole_optimization(vm_map_t map)
1074 {
1075 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
1076
1077 if (map->holelistenabled) {
1078 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1079
1080 while (hole_entry != NULL) {
1081 next_hole_entry = hole_entry->vme_next;
1082
1083 hole_entry->vme_next = NULL;
1084 hole_entry->vme_prev = NULL;
1085 zfree(vm_map_holes_zone, hole_entry);
1086
1087 if (next_hole_entry == head_entry) {
1088 hole_entry = NULL;
1089 } else {
1090 hole_entry = next_hole_entry;
1091 }
1092 }
1093
1094 map->holes_list = NULL;
1095 map->holelistenabled = FALSE;
1096
1097 map->first_free = vm_map_first_entry(map);
1098 SAVE_HINT_HOLE_WRITE(map, NULL);
1099 }
1100 }
1101
1102 boolean_t
1103 vm_kernel_map_is_kernel(vm_map_t map)
1104 {
1105 return map->pmap == kernel_pmap;
1106 }
1107
1108 /*
1109 * vm_map_create:
1110 *
1111 * Creates and returns a new empty VM map with
1112 * the given physical map structure, and having
1113 * the given lower and upper address bounds.
1114 */
1115
1116 vm_map_t
1117 vm_map_create(
1118 pmap_t pmap,
1119 vm_map_offset_t min,
1120 vm_map_offset_t max,
1121 boolean_t pageable)
1122 {
1123 int options;
1124
1125 options = 0;
1126 if (pageable) {
1127 options |= VM_MAP_CREATE_PAGEABLE;
1128 }
1129 return vm_map_create_options(pmap, min, max, options);
1130 }
1131
1132 vm_map_t
1133 vm_map_create_options(
1134 pmap_t pmap,
1135 vm_map_offset_t min,
1136 vm_map_offset_t max,
1137 int options)
1138 {
1139 vm_map_t result;
1140 struct vm_map_links *hole_entry = NULL;
1141
1142 if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1143 /* unknown option */
1144 return VM_MAP_NULL;
1145 }
1146
1147 result = (vm_map_t) zalloc(vm_map_zone);
1148 if (result == VM_MAP_NULL) {
1149 panic("vm_map_create");
1150 }
1151
1152 vm_map_first_entry(result) = vm_map_to_entry(result);
1153 vm_map_last_entry(result) = vm_map_to_entry(result);
1154 result->hdr.nentries = 0;
1155 if (options & VM_MAP_CREATE_PAGEABLE) {
1156 result->hdr.entries_pageable = TRUE;
1157 } else {
1158 result->hdr.entries_pageable = FALSE;
1159 }
1160
1161 vm_map_store_init( &(result->hdr));
1162
1163 result->hdr.page_shift = PAGE_SHIFT;
1164
1165 result->size = 0;
1166 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
1167 result->user_wire_size = 0;
1168 #if !CONFIG_EMBEDDED
1169 result->vmmap_high_start = 0;
1170 #endif
1171 os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1172 #if TASK_SWAPPER
1173 result->res_count = 1;
1174 result->sw_state = MAP_SW_IN;
1175 #endif /* TASK_SWAPPER */
1176 result->pmap = pmap;
1177 result->min_offset = min;
1178 result->max_offset = max;
1179 result->wiring_required = FALSE;
1180 result->no_zero_fill = FALSE;
1181 result->mapped_in_other_pmaps = FALSE;
1182 result->wait_for_space = FALSE;
1183 result->switch_protect = FALSE;
1184 result->disable_vmentry_reuse = FALSE;
1185 result->map_disallow_data_exec = FALSE;
1186 result->is_nested_map = FALSE;
1187 result->map_disallow_new_exec = FALSE;
1188 result->highest_entry_end = 0;
1189 result->first_free = vm_map_to_entry(result);
1190 result->hint = vm_map_to_entry(result);
1191 result->jit_entry_exists = FALSE;
1192
1193 /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1194 if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1195 result->has_corpse_footprint = TRUE;
1196 result->holelistenabled = FALSE;
1197 result->vmmap_corpse_footprint = NULL;
1198 } else {
1199 result->has_corpse_footprint = FALSE;
1200 if (vm_map_supports_hole_optimization) {
1201 hole_entry = zalloc(vm_map_holes_zone);
1202
1203 hole_entry->start = min;
1204 #if defined(__arm__) || defined(__arm64__)
1205 hole_entry->end = result->max_offset;
1206 #else
1207 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1208 #endif
1209 result->holes_list = result->hole_hint = hole_entry;
1210 hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1211 result->holelistenabled = TRUE;
1212 } else {
1213 result->holelistenabled = FALSE;
1214 }
1215 }
1216
1217 vm_map_lock_init(result);
1218 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1219
1220 return result;
1221 }
1222
1223 /*
1224 * vm_map_entry_create: [ internal use only ]
1225 *
1226 * Allocates a VM map entry for insertion in the
1227 * given map (or map copy). No fields are filled.
1228 */
1229 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1230
1231 #define vm_map_copy_entry_create(copy, map_locked) \
1232 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1233 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1234
1235 static vm_map_entry_t
1236 _vm_map_entry_create(
1237 struct vm_map_header *map_header, boolean_t __unused map_locked)
1238 {
1239 zone_t zone;
1240 vm_map_entry_t entry;
1241
1242 zone = vm_map_entry_zone;
1243
1244 assert(map_header->entries_pageable ? !map_locked : TRUE);
1245
1246 if (map_header->entries_pageable) {
1247 entry = (vm_map_entry_t) zalloc(zone);
1248 } else {
1249 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1250
1251 if (entry == VM_MAP_ENTRY_NULL) {
1252 zone = vm_map_entry_reserved_zone;
1253 entry = (vm_map_entry_t) zalloc(zone);
1254 OSAddAtomic(1, &reserved_zalloc_count);
1255 } else {
1256 OSAddAtomic(1, &nonreserved_zalloc_count);
1257 }
1258 }
1259
1260 if (entry == VM_MAP_ENTRY_NULL) {
1261 panic("vm_map_entry_create");
1262 }
1263 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1264
1265 vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1266 #if MAP_ENTRY_CREATION_DEBUG
1267 entry->vme_creation_maphdr = map_header;
1268 backtrace(&entry->vme_creation_bt[0],
1269 (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1270 #endif
1271 return entry;
1272 }
1273
1274 /*
1275 * vm_map_entry_dispose: [ internal use only ]
1276 *
1277 * Inverse of vm_map_entry_create.
1278 *
1279 * write map lock held so no need to
1280 * do anything special to insure correctness
1281 * of the stores
1282 */
1283 #define vm_map_entry_dispose(map, entry) \
1284 _vm_map_entry_dispose(&(map)->hdr, (entry))
1285
1286 #define vm_map_copy_entry_dispose(map, entry) \
1287 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1288
1289 static void
1290 _vm_map_entry_dispose(
1291 struct vm_map_header *map_header,
1292 vm_map_entry_t entry)
1293 {
1294 zone_t zone;
1295
1296 if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1297 zone = vm_map_entry_zone;
1298 } else {
1299 zone = vm_map_entry_reserved_zone;
1300 }
1301
1302 if (!map_header->entries_pageable) {
1303 if (zone == vm_map_entry_zone) {
1304 OSAddAtomic(-1, &nonreserved_zalloc_count);
1305 } else {
1306 OSAddAtomic(-1, &reserved_zalloc_count);
1307 }
1308 }
1309
1310 zfree(zone, entry);
1311 }
1312
1313 #if MACH_ASSERT
1314 static boolean_t first_free_check = FALSE;
1315 boolean_t
1316 first_free_is_valid(
1317 vm_map_t map)
1318 {
1319 if (!first_free_check) {
1320 return TRUE;
1321 }
1322
1323 return first_free_is_valid_store( map );
1324 }
1325 #endif /* MACH_ASSERT */
1326
1327
1328 #define vm_map_copy_entry_link(copy, after_where, entry) \
1329 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1330
1331 #define vm_map_copy_entry_unlink(copy, entry) \
1332 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1333
1334 #if MACH_ASSERT && TASK_SWAPPER
1335 /*
1336 * vm_map_res_reference:
1337 *
1338 * Adds another valid residence count to the given map.
1339 *
1340 * Map is locked so this function can be called from
1341 * vm_map_swapin.
1342 *
1343 */
1344 void
1345 vm_map_res_reference(vm_map_t map)
1346 {
1347 /* assert map is locked */
1348 assert(map->res_count >= 0);
1349 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1350 if (map->res_count == 0) {
1351 lck_mtx_unlock(&map->s_lock);
1352 vm_map_lock(map);
1353 vm_map_swapin(map);
1354 lck_mtx_lock(&map->s_lock);
1355 ++map->res_count;
1356 vm_map_unlock(map);
1357 } else {
1358 ++map->res_count;
1359 }
1360 }
1361
1362 /*
1363 * vm_map_reference_swap:
1364 *
1365 * Adds valid reference and residence counts to the given map.
1366 *
1367 * The map may not be in memory (i.e. zero residence count).
1368 *
1369 */
1370 void
1371 vm_map_reference_swap(vm_map_t map)
1372 {
1373 assert(map != VM_MAP_NULL);
1374 lck_mtx_lock(&map->s_lock);
1375 assert(map->res_count >= 0);
1376 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1377 os_ref_retain_locked(&map->map_refcnt);
1378 vm_map_res_reference(map);
1379 lck_mtx_unlock(&map->s_lock);
1380 }
1381
1382 /*
1383 * vm_map_res_deallocate:
1384 *
1385 * Decrement residence count on a map; possibly causing swapout.
1386 *
1387 * The map must be in memory (i.e. non-zero residence count).
1388 *
1389 * The map is locked, so this function is callable from vm_map_deallocate.
1390 *
1391 */
1392 void
1393 vm_map_res_deallocate(vm_map_t map)
1394 {
1395 assert(map->res_count > 0);
1396 if (--map->res_count == 0) {
1397 lck_mtx_unlock(&map->s_lock);
1398 vm_map_lock(map);
1399 vm_map_swapout(map);
1400 vm_map_unlock(map);
1401 lck_mtx_lock(&map->s_lock);
1402 }
1403 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1404 }
1405 #endif /* MACH_ASSERT && TASK_SWAPPER */
1406
1407 /*
1408 * vm_map_destroy:
1409 *
1410 * Actually destroy a map.
1411 */
1412 void
1413 vm_map_destroy(
1414 vm_map_t map,
1415 int flags)
1416 {
1417 vm_map_lock(map);
1418
1419 /* final cleanup: no need to unnest shared region */
1420 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1421 /* final cleanup: ok to remove immutable mappings */
1422 flags |= VM_MAP_REMOVE_IMMUTABLE;
1423 /* final cleanup: allow gaps in range */
1424 flags |= VM_MAP_REMOVE_GAPS_OK;
1425
1426 /* clean up regular map entries */
1427 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1428 flags, VM_MAP_NULL);
1429 /* clean up leftover special mappings (commpage, etc...) */
1430 #if !defined(__arm__) && !defined(__arm64__)
1431 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1432 flags, VM_MAP_NULL);
1433 #endif /* !__arm__ && !__arm64__ */
1434
1435 vm_map_disable_hole_optimization(map);
1436 vm_map_corpse_footprint_destroy(map);
1437
1438 vm_map_unlock(map);
1439
1440 assert(map->hdr.nentries == 0);
1441
1442 if (map->pmap) {
1443 pmap_destroy(map->pmap);
1444 }
1445
1446 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1447 /*
1448 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1449 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1450 * structure or kalloc'ed via lck_mtx_init.
1451 * An example is s_lock_ext within struct _vm_map.
1452 *
1453 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1454 * can add another tag to detect embedded vs alloc'ed indirect external
1455 * mutexes but that'll be additional checks in the lock path and require
1456 * updating dependencies for the old vs new tag.
1457 *
1458 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1459 * just when lock debugging is ON, we choose to forego explicitly destroying
1460 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1461 * count on vm_map_lck_grp, which has no serious side-effect.
1462 */
1463 } else {
1464 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1465 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1466 }
1467
1468 zfree(vm_map_zone, map);
1469 }
1470
1471 /*
1472 * Returns pid of the task with the largest number of VM map entries.
1473 * Used in the zone-map-exhaustion jetsam path.
1474 */
1475 pid_t
1476 find_largest_process_vm_map_entries(void)
1477 {
1478 pid_t victim_pid = -1;
1479 int max_vm_map_entries = 0;
1480 task_t task = TASK_NULL;
1481 queue_head_t *task_list = &tasks;
1482
1483 lck_mtx_lock(&tasks_threads_lock);
1484 queue_iterate(task_list, task, task_t, tasks) {
1485 if (task == kernel_task || !task->active) {
1486 continue;
1487 }
1488
1489 vm_map_t task_map = task->map;
1490 if (task_map != VM_MAP_NULL) {
1491 int task_vm_map_entries = task_map->hdr.nentries;
1492 if (task_vm_map_entries > max_vm_map_entries) {
1493 max_vm_map_entries = task_vm_map_entries;
1494 victim_pid = pid_from_task(task);
1495 }
1496 }
1497 }
1498 lck_mtx_unlock(&tasks_threads_lock);
1499
1500 printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1501 return victim_pid;
1502 }
1503
1504 #if TASK_SWAPPER
1505 /*
1506 * vm_map_swapin/vm_map_swapout
1507 *
1508 * Swap a map in and out, either referencing or releasing its resources.
1509 * These functions are internal use only; however, they must be exported
1510 * because they may be called from macros, which are exported.
1511 *
1512 * In the case of swapout, there could be races on the residence count,
1513 * so if the residence count is up, we return, assuming that a
1514 * vm_map_deallocate() call in the near future will bring us back.
1515 *
1516 * Locking:
1517 * -- We use the map write lock for synchronization among races.
1518 * -- The map write lock, and not the simple s_lock, protects the
1519 * swap state of the map.
1520 * -- If a map entry is a share map, then we hold both locks, in
1521 * hierarchical order.
1522 *
1523 * Synchronization Notes:
1524 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1525 * will block on the map lock and proceed when swapout is through.
1526 * 2) A vm_map_reference() call at this time is illegal, and will
1527 * cause a panic. vm_map_reference() is only allowed on resident
1528 * maps, since it refuses to block.
1529 * 3) A vm_map_swapin() call during a swapin will block, and
1530 * proceeed when the first swapin is done, turning into a nop.
1531 * This is the reason the res_count is not incremented until
1532 * after the swapin is complete.
1533 * 4) There is a timing hole after the checks of the res_count, before
1534 * the map lock is taken, during which a swapin may get the lock
1535 * before a swapout about to happen. If this happens, the swapin
1536 * will detect the state and increment the reference count, causing
1537 * the swapout to be a nop, thereby delaying it until a later
1538 * vm_map_deallocate. If the swapout gets the lock first, then
1539 * the swapin will simply block until the swapout is done, and
1540 * then proceed.
1541 *
1542 * Because vm_map_swapin() is potentially an expensive operation, it
1543 * should be used with caution.
1544 *
1545 * Invariants:
1546 * 1) A map with a residence count of zero is either swapped, or
1547 * being swapped.
1548 * 2) A map with a non-zero residence count is either resident,
1549 * or being swapped in.
1550 */
1551
1552 int vm_map_swap_enable = 1;
1553
1554 void
1555 vm_map_swapin(vm_map_t map)
1556 {
1557 vm_map_entry_t entry;
1558
1559 if (!vm_map_swap_enable) { /* debug */
1560 return;
1561 }
1562
1563 /*
1564 * Map is locked
1565 * First deal with various races.
1566 */
1567 if (map->sw_state == MAP_SW_IN) {
1568 /*
1569 * we raced with swapout and won. Returning will incr.
1570 * the res_count, turning the swapout into a nop.
1571 */
1572 return;
1573 }
1574
1575 /*
1576 * The residence count must be zero. If we raced with another
1577 * swapin, the state would have been IN; if we raced with a
1578 * swapout (after another competing swapin), we must have lost
1579 * the race to get here (see above comment), in which case
1580 * res_count is still 0.
1581 */
1582 assert(map->res_count == 0);
1583
1584 /*
1585 * There are no intermediate states of a map going out or
1586 * coming in, since the map is locked during the transition.
1587 */
1588 assert(map->sw_state == MAP_SW_OUT);
1589
1590 /*
1591 * We now operate upon each map entry. If the entry is a sub-
1592 * or share-map, we call vm_map_res_reference upon it.
1593 * If the entry is an object, we call vm_object_res_reference
1594 * (this may iterate through the shadow chain).
1595 * Note that we hold the map locked the entire time,
1596 * even if we get back here via a recursive call in
1597 * vm_map_res_reference.
1598 */
1599 entry = vm_map_first_entry(map);
1600
1601 while (entry != vm_map_to_entry(map)) {
1602 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1603 if (entry->is_sub_map) {
1604 vm_map_t lmap = VME_SUBMAP(entry);
1605 lck_mtx_lock(&lmap->s_lock);
1606 vm_map_res_reference(lmap);
1607 lck_mtx_unlock(&lmap->s_lock);
1608 } else {
1609 vm_object_t object = VME_OBEJCT(entry);
1610 vm_object_lock(object);
1611 /*
1612 * This call may iterate through the
1613 * shadow chain.
1614 */
1615 vm_object_res_reference(object);
1616 vm_object_unlock(object);
1617 }
1618 }
1619 entry = entry->vme_next;
1620 }
1621 assert(map->sw_state == MAP_SW_OUT);
1622 map->sw_state = MAP_SW_IN;
1623 }
1624
1625 void
1626 vm_map_swapout(vm_map_t map)
1627 {
1628 vm_map_entry_t entry;
1629
1630 /*
1631 * Map is locked
1632 * First deal with various races.
1633 * If we raced with a swapin and lost, the residence count
1634 * will have been incremented to 1, and we simply return.
1635 */
1636 lck_mtx_lock(&map->s_lock);
1637 if (map->res_count != 0) {
1638 lck_mtx_unlock(&map->s_lock);
1639 return;
1640 }
1641 lck_mtx_unlock(&map->s_lock);
1642
1643 /*
1644 * There are no intermediate states of a map going out or
1645 * coming in, since the map is locked during the transition.
1646 */
1647 assert(map->sw_state == MAP_SW_IN);
1648
1649 if (!vm_map_swap_enable) {
1650 return;
1651 }
1652
1653 /*
1654 * We now operate upon each map entry. If the entry is a sub-
1655 * or share-map, we call vm_map_res_deallocate upon it.
1656 * If the entry is an object, we call vm_object_res_deallocate
1657 * (this may iterate through the shadow chain).
1658 * Note that we hold the map locked the entire time,
1659 * even if we get back here via a recursive call in
1660 * vm_map_res_deallocate.
1661 */
1662 entry = vm_map_first_entry(map);
1663
1664 while (entry != vm_map_to_entry(map)) {
1665 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1666 if (entry->is_sub_map) {
1667 vm_map_t lmap = VME_SUBMAP(entry);
1668 lck_mtx_lock(&lmap->s_lock);
1669 vm_map_res_deallocate(lmap);
1670 lck_mtx_unlock(&lmap->s_lock);
1671 } else {
1672 vm_object_t object = VME_OBJECT(entry);
1673 vm_object_lock(object);
1674 /*
1675 * This call may take a long time,
1676 * since it could actively push
1677 * out pages (if we implement it
1678 * that way).
1679 */
1680 vm_object_res_deallocate(object);
1681 vm_object_unlock(object);
1682 }
1683 }
1684 entry = entry->vme_next;
1685 }
1686 assert(map->sw_state == MAP_SW_IN);
1687 map->sw_state = MAP_SW_OUT;
1688 }
1689
1690 #endif /* TASK_SWAPPER */
1691
1692 /*
1693 * vm_map_lookup_entry: [ internal use only ]
1694 *
1695 * Calls into the vm map store layer to find the map
1696 * entry containing (or immediately preceding) the
1697 * specified address in the given map; the entry is returned
1698 * in the "entry" parameter. The boolean
1699 * result indicates whether the address is
1700 * actually contained in the map.
1701 */
1702 boolean_t
1703 vm_map_lookup_entry(
1704 vm_map_t map,
1705 vm_map_offset_t address,
1706 vm_map_entry_t *entry) /* OUT */
1707 {
1708 return vm_map_store_lookup_entry( map, address, entry );
1709 }
1710
1711 /*
1712 * Routine: vm_map_find_space
1713 * Purpose:
1714 * Allocate a range in the specified virtual address map,
1715 * returning the entry allocated for that range.
1716 * Used by kmem_alloc, etc.
1717 *
1718 * The map must be NOT be locked. It will be returned locked
1719 * on KERN_SUCCESS, unlocked on failure.
1720 *
1721 * If an entry is allocated, the object/offset fields
1722 * are initialized to zero.
1723 */
1724 kern_return_t
1725 vm_map_find_space(
1726 vm_map_t map,
1727 vm_map_offset_t *address, /* OUT */
1728 vm_map_size_t size,
1729 vm_map_offset_t mask,
1730 int flags __unused,
1731 vm_map_kernel_flags_t vmk_flags,
1732 vm_tag_t tag,
1733 vm_map_entry_t *o_entry) /* OUT */
1734 {
1735 vm_map_entry_t entry, new_entry;
1736 vm_map_offset_t start;
1737 vm_map_offset_t end;
1738 vm_map_entry_t hole_entry;
1739
1740 if (size == 0) {
1741 *address = 0;
1742 return KERN_INVALID_ARGUMENT;
1743 }
1744
1745 if (vmk_flags.vmkf_guard_after) {
1746 /* account for the back guard page in the size */
1747 size += VM_MAP_PAGE_SIZE(map);
1748 }
1749
1750 new_entry = vm_map_entry_create(map, FALSE);
1751
1752 /*
1753 * Look for the first possible address; if there's already
1754 * something at this address, we have to start after it.
1755 */
1756
1757 vm_map_lock(map);
1758
1759 if (map->disable_vmentry_reuse == TRUE) {
1760 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1761 } else {
1762 if (map->holelistenabled) {
1763 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1764
1765 if (hole_entry == NULL) {
1766 /*
1767 * No more space in the map?
1768 */
1769 vm_map_entry_dispose(map, new_entry);
1770 vm_map_unlock(map);
1771 return KERN_NO_SPACE;
1772 }
1773
1774 entry = hole_entry;
1775 start = entry->vme_start;
1776 } else {
1777 assert(first_free_is_valid(map));
1778 if ((entry = map->first_free) == vm_map_to_entry(map)) {
1779 start = map->min_offset;
1780 } else {
1781 start = entry->vme_end;
1782 }
1783 }
1784 }
1785
1786 /*
1787 * In any case, the "entry" always precedes
1788 * the proposed new region throughout the loop:
1789 */
1790
1791 while (TRUE) {
1792 vm_map_entry_t next;
1793
1794 /*
1795 * Find the end of the proposed new region.
1796 * Be sure we didn't go beyond the end, or
1797 * wrap around the address.
1798 */
1799
1800 if (vmk_flags.vmkf_guard_before) {
1801 /* reserve space for the front guard page */
1802 start += VM_MAP_PAGE_SIZE(map);
1803 }
1804 end = ((start + mask) & ~mask);
1805
1806 if (end < start) {
1807 vm_map_entry_dispose(map, new_entry);
1808 vm_map_unlock(map);
1809 return KERN_NO_SPACE;
1810 }
1811 start = end;
1812 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
1813 end += size;
1814 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1815
1816 if ((end > map->max_offset) || (end < start)) {
1817 vm_map_entry_dispose(map, new_entry);
1818 vm_map_unlock(map);
1819 return KERN_NO_SPACE;
1820 }
1821
1822 next = entry->vme_next;
1823
1824 if (map->holelistenabled) {
1825 if (entry->vme_end >= end) {
1826 break;
1827 }
1828 } else {
1829 /*
1830 * If there are no more entries, we must win.
1831 *
1832 * OR
1833 *
1834 * If there is another entry, it must be
1835 * after the end of the potential new region.
1836 */
1837
1838 if (next == vm_map_to_entry(map)) {
1839 break;
1840 }
1841
1842 if (next->vme_start >= end) {
1843 break;
1844 }
1845 }
1846
1847 /*
1848 * Didn't fit -- move to the next entry.
1849 */
1850
1851 entry = next;
1852
1853 if (map->holelistenabled) {
1854 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
1855 /*
1856 * Wrapped around
1857 */
1858 vm_map_entry_dispose(map, new_entry);
1859 vm_map_unlock(map);
1860 return KERN_NO_SPACE;
1861 }
1862 start = entry->vme_start;
1863 } else {
1864 start = entry->vme_end;
1865 }
1866 }
1867
1868 if (map->holelistenabled) {
1869 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1870 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1871 }
1872 }
1873
1874 /*
1875 * At this point,
1876 * "start" and "end" should define the endpoints of the
1877 * available new range, and
1878 * "entry" should refer to the region before the new
1879 * range, and
1880 *
1881 * the map should be locked.
1882 */
1883
1884 if (vmk_flags.vmkf_guard_before) {
1885 /* go back for the front guard page */
1886 start -= VM_MAP_PAGE_SIZE(map);
1887 }
1888 *address = start;
1889
1890 assert(start < end);
1891 new_entry->vme_start = start;
1892 new_entry->vme_end = end;
1893 assert(page_aligned(new_entry->vme_start));
1894 assert(page_aligned(new_entry->vme_end));
1895 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1896 VM_MAP_PAGE_MASK(map)));
1897 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1898 VM_MAP_PAGE_MASK(map)));
1899
1900 new_entry->is_shared = FALSE;
1901 new_entry->is_sub_map = FALSE;
1902 new_entry->use_pmap = TRUE;
1903 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1904 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1905
1906 new_entry->needs_copy = FALSE;
1907
1908 new_entry->inheritance = VM_INHERIT_DEFAULT;
1909 new_entry->protection = VM_PROT_DEFAULT;
1910 new_entry->max_protection = VM_PROT_ALL;
1911 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1912 new_entry->wired_count = 0;
1913 new_entry->user_wired_count = 0;
1914
1915 new_entry->in_transition = FALSE;
1916 new_entry->needs_wakeup = FALSE;
1917 new_entry->no_cache = FALSE;
1918 new_entry->permanent = FALSE;
1919 new_entry->superpage_size = FALSE;
1920 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1921 new_entry->map_aligned = TRUE;
1922 } else {
1923 new_entry->map_aligned = FALSE;
1924 }
1925
1926 new_entry->used_for_jit = FALSE;
1927 new_entry->pmap_cs_associated = FALSE;
1928 new_entry->zero_wired_pages = FALSE;
1929 new_entry->iokit_acct = FALSE;
1930 new_entry->vme_resilient_codesign = FALSE;
1931 new_entry->vme_resilient_media = FALSE;
1932 if (vmk_flags.vmkf_atomic_entry) {
1933 new_entry->vme_atomic = TRUE;
1934 } else {
1935 new_entry->vme_atomic = FALSE;
1936 }
1937
1938 VME_ALIAS_SET(new_entry, tag);
1939
1940 /*
1941 * Insert the new entry into the list
1942 */
1943
1944 vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
1945
1946 map->size += size;
1947
1948 /*
1949 * Update the lookup hint
1950 */
1951 SAVE_HINT_MAP_WRITE(map, new_entry);
1952
1953 *o_entry = new_entry;
1954 return KERN_SUCCESS;
1955 }
1956
1957 int vm_map_pmap_enter_print = FALSE;
1958 int vm_map_pmap_enter_enable = FALSE;
1959
1960 /*
1961 * Routine: vm_map_pmap_enter [internal only]
1962 *
1963 * Description:
1964 * Force pages from the specified object to be entered into
1965 * the pmap at the specified address if they are present.
1966 * As soon as a page not found in the object the scan ends.
1967 *
1968 * Returns:
1969 * Nothing.
1970 *
1971 * In/out conditions:
1972 * The source map should not be locked on entry.
1973 */
1974 __unused static void
1975 vm_map_pmap_enter(
1976 vm_map_t map,
1977 vm_map_offset_t addr,
1978 vm_map_offset_t end_addr,
1979 vm_object_t object,
1980 vm_object_offset_t offset,
1981 vm_prot_t protection)
1982 {
1983 int type_of_fault;
1984 kern_return_t kr;
1985 struct vm_object_fault_info fault_info = {};
1986
1987 if (map->pmap == 0) {
1988 return;
1989 }
1990
1991 while (addr < end_addr) {
1992 vm_page_t m;
1993
1994
1995 /*
1996 * TODO:
1997 * From vm_map_enter(), we come into this function without the map
1998 * lock held or the object lock held.
1999 * We haven't taken a reference on the object either.
2000 * We should do a proper lookup on the map to make sure
2001 * that things are sane before we go locking objects that
2002 * could have been deallocated from under us.
2003 */
2004
2005 vm_object_lock(object);
2006
2007 m = vm_page_lookup(object, offset);
2008
2009 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2010 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2011 vm_object_unlock(object);
2012 return;
2013 }
2014
2015 if (vm_map_pmap_enter_print) {
2016 printf("vm_map_pmap_enter:");
2017 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2018 map, (unsigned long long)addr, object, (unsigned long long)offset);
2019 }
2020 type_of_fault = DBG_CACHE_HIT_FAULT;
2021 kr = vm_fault_enter(m, map->pmap,
2022 addr, protection, protection,
2023 VM_PAGE_WIRED(m),
2024 FALSE, /* change_wiring */
2025 VM_KERN_MEMORY_NONE, /* tag - not wiring */
2026 &fault_info,
2027 NULL, /* need_retry */
2028 &type_of_fault);
2029
2030 vm_object_unlock(object);
2031
2032 offset += PAGE_SIZE_64;
2033 addr += PAGE_SIZE;
2034 }
2035 }
2036
2037 boolean_t vm_map_pmap_is_empty(
2038 vm_map_t map,
2039 vm_map_offset_t start,
2040 vm_map_offset_t end);
2041 boolean_t
2042 vm_map_pmap_is_empty(
2043 vm_map_t map,
2044 vm_map_offset_t start,
2045 vm_map_offset_t end)
2046 {
2047 #ifdef MACHINE_PMAP_IS_EMPTY
2048 return pmap_is_empty(map->pmap, start, end);
2049 #else /* MACHINE_PMAP_IS_EMPTY */
2050 vm_map_offset_t offset;
2051 ppnum_t phys_page;
2052
2053 if (map->pmap == NULL) {
2054 return TRUE;
2055 }
2056
2057 for (offset = start;
2058 offset < end;
2059 offset += PAGE_SIZE) {
2060 phys_page = pmap_find_phys(map->pmap, offset);
2061 if (phys_page) {
2062 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2063 "page %d at 0x%llx\n",
2064 map, (long long)start, (long long)end,
2065 phys_page, (long long)offset);
2066 return FALSE;
2067 }
2068 }
2069 return TRUE;
2070 #endif /* MACHINE_PMAP_IS_EMPTY */
2071 }
2072
2073 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2074 kern_return_t
2075 vm_map_random_address_for_size(
2076 vm_map_t map,
2077 vm_map_offset_t *address,
2078 vm_map_size_t size)
2079 {
2080 kern_return_t kr = KERN_SUCCESS;
2081 int tries = 0;
2082 vm_map_offset_t random_addr = 0;
2083 vm_map_offset_t hole_end;
2084
2085 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
2086 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
2087 vm_map_size_t vm_hole_size = 0;
2088 vm_map_size_t addr_space_size;
2089
2090 addr_space_size = vm_map_max(map) - vm_map_min(map);
2091
2092 assert(page_aligned(size));
2093
2094 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2095 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
2096 random_addr = vm_map_trunc_page(
2097 vm_map_min(map) + (random_addr % addr_space_size),
2098 VM_MAP_PAGE_MASK(map));
2099
2100 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2101 if (prev_entry == vm_map_to_entry(map)) {
2102 next_entry = vm_map_first_entry(map);
2103 } else {
2104 next_entry = prev_entry->vme_next;
2105 }
2106 if (next_entry == vm_map_to_entry(map)) {
2107 hole_end = vm_map_max(map);
2108 } else {
2109 hole_end = next_entry->vme_start;
2110 }
2111 vm_hole_size = hole_end - random_addr;
2112 if (vm_hole_size >= size) {
2113 *address = random_addr;
2114 break;
2115 }
2116 }
2117 tries++;
2118 }
2119
2120 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2121 kr = KERN_NO_SPACE;
2122 }
2123 return kr;
2124 }
2125
2126 static boolean_t
2127 vm_memory_malloc_no_cow(
2128 int alias)
2129 {
2130 uint64_t alias_mask;
2131
2132 if (alias > 63) {
2133 return FALSE;
2134 }
2135
2136 alias_mask = 1ULL << alias;
2137 if (alias_mask & vm_memory_malloc_no_cow_mask) {
2138 return TRUE;
2139 }
2140 return FALSE;
2141 }
2142
2143 /*
2144 * Routine: vm_map_enter
2145 *
2146 * Description:
2147 * Allocate a range in the specified virtual address map.
2148 * The resulting range will refer to memory defined by
2149 * the given memory object and offset into that object.
2150 *
2151 * Arguments are as defined in the vm_map call.
2152 */
2153 int _map_enter_debug = 0;
2154 static unsigned int vm_map_enter_restore_successes = 0;
2155 static unsigned int vm_map_enter_restore_failures = 0;
2156 kern_return_t
2157 vm_map_enter(
2158 vm_map_t map,
2159 vm_map_offset_t *address, /* IN/OUT */
2160 vm_map_size_t size,
2161 vm_map_offset_t mask,
2162 int flags,
2163 vm_map_kernel_flags_t vmk_flags,
2164 vm_tag_t alias,
2165 vm_object_t object,
2166 vm_object_offset_t offset,
2167 boolean_t needs_copy,
2168 vm_prot_t cur_protection,
2169 vm_prot_t max_protection,
2170 vm_inherit_t inheritance)
2171 {
2172 vm_map_entry_t entry, new_entry;
2173 vm_map_offset_t start, tmp_start, tmp_offset;
2174 vm_map_offset_t end, tmp_end;
2175 vm_map_offset_t tmp2_start, tmp2_end;
2176 vm_map_offset_t desired_empty_end;
2177 vm_map_offset_t step;
2178 kern_return_t result = KERN_SUCCESS;
2179 vm_map_t zap_old_map = VM_MAP_NULL;
2180 vm_map_t zap_new_map = VM_MAP_NULL;
2181 boolean_t map_locked = FALSE;
2182 boolean_t pmap_empty = TRUE;
2183 boolean_t new_mapping_established = FALSE;
2184 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2185 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2186 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2187 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2188 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2189 boolean_t is_submap = vmk_flags.vmkf_submap;
2190 boolean_t permanent = vmk_flags.vmkf_permanent;
2191 boolean_t no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2192 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
2193 boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
2194 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2195 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2196 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2197 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2198 vm_tag_t user_alias;
2199 vm_map_offset_t effective_min_offset, effective_max_offset;
2200 kern_return_t kr;
2201 boolean_t clear_map_aligned = FALSE;
2202 vm_map_entry_t hole_entry;
2203 vm_map_size_t chunk_size = 0;
2204
2205 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2206
2207 if (flags & VM_FLAGS_4GB_CHUNK) {
2208 #if defined(__LP64__)
2209 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2210 #else /* __LP64__ */
2211 chunk_size = ANON_CHUNK_SIZE;
2212 #endif /* __LP64__ */
2213 } else {
2214 chunk_size = ANON_CHUNK_SIZE;
2215 }
2216
2217 if (superpage_size) {
2218 switch (superpage_size) {
2219 /*
2220 * Note that the current implementation only supports
2221 * a single size for superpages, SUPERPAGE_SIZE, per
2222 * architecture. As soon as more sizes are supposed
2223 * to be supported, SUPERPAGE_SIZE has to be replaced
2224 * with a lookup of the size depending on superpage_size.
2225 */
2226 #ifdef __x86_64__
2227 case SUPERPAGE_SIZE_ANY:
2228 /* handle it like 2 MB and round up to page size */
2229 size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2230 case SUPERPAGE_SIZE_2MB:
2231 break;
2232 #endif
2233 default:
2234 return KERN_INVALID_ARGUMENT;
2235 }
2236 mask = SUPERPAGE_SIZE - 1;
2237 if (size & (SUPERPAGE_SIZE - 1)) {
2238 return KERN_INVALID_ARGUMENT;
2239 }
2240 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
2241 }
2242
2243
2244 if ((cur_protection & VM_PROT_WRITE) &&
2245 (cur_protection & VM_PROT_EXECUTE) &&
2246 #if !CONFIG_EMBEDDED
2247 map != kernel_map &&
2248 (cs_process_global_enforcement() ||
2249 (vmk_flags.vmkf_cs_enforcement_override
2250 ? vmk_flags.vmkf_cs_enforcement
2251 : cs_process_enforcement(NULL))) &&
2252 #endif /* !CONFIG_EMBEDDED */
2253 !entry_for_jit) {
2254 DTRACE_VM3(cs_wx,
2255 uint64_t, 0,
2256 uint64_t, 0,
2257 vm_prot_t, cur_protection);
2258 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
2259 #if VM_PROTECT_WX_FAIL
2260 "failing\n",
2261 #else /* VM_PROTECT_WX_FAIL */
2262 "turning off execute\n",
2263 #endif /* VM_PROTECT_WX_FAIL */
2264 proc_selfpid(),
2265 (current_task()->bsd_info
2266 ? proc_name_address(current_task()->bsd_info)
2267 : "?"),
2268 __FUNCTION__);
2269 cur_protection &= ~VM_PROT_EXECUTE;
2270 #if VM_PROTECT_WX_FAIL
2271 return KERN_PROTECTION_FAILURE;
2272 #endif /* VM_PROTECT_WX_FAIL */
2273 }
2274
2275 /*
2276 * If the task has requested executable lockdown,
2277 * deny any new executable mapping.
2278 */
2279 if (map->map_disallow_new_exec == TRUE) {
2280 if (cur_protection & VM_PROT_EXECUTE) {
2281 return KERN_PROTECTION_FAILURE;
2282 }
2283 }
2284
2285 if (resilient_codesign) {
2286 assert(!is_submap);
2287 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2288 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2289 return KERN_PROTECTION_FAILURE;
2290 }
2291 }
2292
2293 if (resilient_media) {
2294 assert(!is_submap);
2295 // assert(!needs_copy);
2296 if (object != VM_OBJECT_NULL &&
2297 !object->internal) {
2298 /*
2299 * This mapping is directly backed by an external
2300 * memory manager (e.g. a vnode pager for a file):
2301 * we would not have any safe place to inject
2302 * a zero-filled page if an actual page is not
2303 * available, without possibly impacting the actual
2304 * contents of the mapped object (e.g. the file),
2305 * so we can't provide any media resiliency here.
2306 */
2307 return KERN_INVALID_ARGUMENT;
2308 }
2309 }
2310
2311 if (is_submap) {
2312 if (purgable) {
2313 /* submaps can not be purgeable */
2314 return KERN_INVALID_ARGUMENT;
2315 }
2316 if (object == VM_OBJECT_NULL) {
2317 /* submaps can not be created lazily */
2318 return KERN_INVALID_ARGUMENT;
2319 }
2320 }
2321 if (vmk_flags.vmkf_already) {
2322 /*
2323 * VM_FLAGS_ALREADY says that it's OK if the same mapping
2324 * is already present. For it to be meaningul, the requested
2325 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2326 * we shouldn't try and remove what was mapped there first
2327 * (!VM_FLAGS_OVERWRITE).
2328 */
2329 if ((flags & VM_FLAGS_ANYWHERE) ||
2330 (flags & VM_FLAGS_OVERWRITE)) {
2331 return KERN_INVALID_ARGUMENT;
2332 }
2333 }
2334
2335 effective_min_offset = map->min_offset;
2336
2337 if (vmk_flags.vmkf_beyond_max) {
2338 /*
2339 * Allow an insertion beyond the map's max offset.
2340 */
2341 #if !defined(__arm__) && !defined(__arm64__)
2342 if (vm_map_is_64bit(map)) {
2343 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2344 } else
2345 #endif /* __arm__ */
2346 effective_max_offset = 0x00000000FFFFF000ULL;
2347 } else {
2348 #if !defined(CONFIG_EMBEDDED)
2349 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2350 effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2351 } else {
2352 effective_max_offset = map->max_offset;
2353 }
2354 #else
2355 effective_max_offset = map->max_offset;
2356 #endif
2357 }
2358
2359 if (size == 0 ||
2360 (offset & PAGE_MASK_64) != 0) {
2361 *address = 0;
2362 return KERN_INVALID_ARGUMENT;
2363 }
2364
2365 if (map->pmap == kernel_pmap) {
2366 user_alias = VM_KERN_MEMORY_NONE;
2367 } else {
2368 user_alias = alias;
2369 }
2370
2371 if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2372 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2373 }
2374
2375 #define RETURN(value) { result = value; goto BailOut; }
2376
2377 assert(page_aligned(*address));
2378 assert(page_aligned(size));
2379
2380 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2381 /*
2382 * In most cases, the caller rounds the size up to the
2383 * map's page size.
2384 * If we get a size that is explicitly not map-aligned here,
2385 * we'll have to respect the caller's wish and mark the
2386 * mapping as "not map-aligned" to avoid tripping the
2387 * map alignment checks later.
2388 */
2389 clear_map_aligned = TRUE;
2390 }
2391 if (!anywhere &&
2392 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2393 /*
2394 * We've been asked to map at a fixed address and that
2395 * address is not aligned to the map's specific alignment.
2396 * The caller should know what it's doing (i.e. most likely
2397 * mapping some fragmented copy map, transferring memory from
2398 * a VM map with a different alignment), so clear map_aligned
2399 * for this new VM map entry and proceed.
2400 */
2401 clear_map_aligned = TRUE;
2402 }
2403
2404 /*
2405 * Only zero-fill objects are allowed to be purgable.
2406 * LP64todo - limit purgable objects to 32-bits for now
2407 */
2408 if (purgable &&
2409 (offset != 0 ||
2410 (object != VM_OBJECT_NULL &&
2411 (object->vo_size != size ||
2412 object->purgable == VM_PURGABLE_DENY))
2413 || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2414 return KERN_INVALID_ARGUMENT;
2415 }
2416
2417 if (!anywhere && overwrite) {
2418 /*
2419 * Create a temporary VM map to hold the old mappings in the
2420 * affected area while we create the new one.
2421 * This avoids releasing the VM map lock in
2422 * vm_map_entry_delete() and allows atomicity
2423 * when we want to replace some mappings with a new one.
2424 * It also allows us to restore the old VM mappings if the
2425 * new mapping fails.
2426 */
2427 zap_old_map = vm_map_create(PMAP_NULL,
2428 *address,
2429 *address + size,
2430 map->hdr.entries_pageable);
2431 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2432 vm_map_disable_hole_optimization(zap_old_map);
2433 }
2434
2435 StartAgain:;
2436
2437 start = *address;
2438
2439 if (anywhere) {
2440 vm_map_lock(map);
2441 map_locked = TRUE;
2442
2443 if (entry_for_jit) {
2444 #if CONFIG_EMBEDDED
2445 if (map->jit_entry_exists) {
2446 result = KERN_INVALID_ARGUMENT;
2447 goto BailOut;
2448 }
2449 random_address = TRUE;
2450 #endif /* CONFIG_EMBEDDED */
2451 }
2452
2453 if (random_address) {
2454 /*
2455 * Get a random start address.
2456 */
2457 result = vm_map_random_address_for_size(map, address, size);
2458 if (result != KERN_SUCCESS) {
2459 goto BailOut;
2460 }
2461 start = *address;
2462 }
2463 #if !CONFIG_EMBEDDED
2464 else if ((start == 0 || start == vm_map_min(map)) &&
2465 !map->disable_vmentry_reuse &&
2466 map->vmmap_high_start != 0) {
2467 start = map->vmmap_high_start;
2468 }
2469 #endif
2470
2471
2472 /*
2473 * Calculate the first possible address.
2474 */
2475
2476 if (start < effective_min_offset) {
2477 start = effective_min_offset;
2478 }
2479 if (start > effective_max_offset) {
2480 RETURN(KERN_NO_SPACE);
2481 }
2482
2483 /*
2484 * Look for the first possible address;
2485 * if there's already something at this
2486 * address, we have to start after it.
2487 */
2488
2489 if (map->disable_vmentry_reuse == TRUE) {
2490 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2491 } else {
2492 if (map->holelistenabled) {
2493 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2494
2495 if (hole_entry == NULL) {
2496 /*
2497 * No more space in the map?
2498 */
2499 result = KERN_NO_SPACE;
2500 goto BailOut;
2501 } else {
2502 boolean_t found_hole = FALSE;
2503
2504 do {
2505 if (hole_entry->vme_start >= start) {
2506 start = hole_entry->vme_start;
2507 found_hole = TRUE;
2508 break;
2509 }
2510
2511 if (hole_entry->vme_end > start) {
2512 found_hole = TRUE;
2513 break;
2514 }
2515 hole_entry = hole_entry->vme_next;
2516 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2517
2518 if (found_hole == FALSE) {
2519 result = KERN_NO_SPACE;
2520 goto BailOut;
2521 }
2522
2523 entry = hole_entry;
2524
2525 if (start == 0) {
2526 start += PAGE_SIZE_64;
2527 }
2528 }
2529 } else {
2530 assert(first_free_is_valid(map));
2531
2532 entry = map->first_free;
2533
2534 if (entry == vm_map_to_entry(map)) {
2535 entry = NULL;
2536 } else {
2537 if (entry->vme_next == vm_map_to_entry(map)) {
2538 /*
2539 * Hole at the end of the map.
2540 */
2541 entry = NULL;
2542 } else {
2543 if (start < (entry->vme_next)->vme_start) {
2544 start = entry->vme_end;
2545 start = vm_map_round_page(start,
2546 VM_MAP_PAGE_MASK(map));
2547 } else {
2548 /*
2549 * Need to do a lookup.
2550 */
2551 entry = NULL;
2552 }
2553 }
2554 }
2555
2556 if (entry == NULL) {
2557 vm_map_entry_t tmp_entry;
2558 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2559 assert(!entry_for_jit);
2560 start = tmp_entry->vme_end;
2561 start = vm_map_round_page(start,
2562 VM_MAP_PAGE_MASK(map));
2563 }
2564 entry = tmp_entry;
2565 }
2566 }
2567 }
2568
2569 /*
2570 * In any case, the "entry" always precedes
2571 * the proposed new region throughout the
2572 * loop:
2573 */
2574
2575 while (TRUE) {
2576 vm_map_entry_t next;
2577
2578 /*
2579 * Find the end of the proposed new region.
2580 * Be sure we didn't go beyond the end, or
2581 * wrap around the address.
2582 */
2583
2584 end = ((start + mask) & ~mask);
2585 end = vm_map_round_page(end,
2586 VM_MAP_PAGE_MASK(map));
2587 if (end < start) {
2588 RETURN(KERN_NO_SPACE);
2589 }
2590 start = end;
2591 assert(VM_MAP_PAGE_ALIGNED(start,
2592 VM_MAP_PAGE_MASK(map)));
2593 end += size;
2594
2595 /* We want an entire page of empty space, but don't increase the allocation size. */
2596 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2597
2598 if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2599 if (map->wait_for_space) {
2600 assert(!keep_map_locked);
2601 if (size <= (effective_max_offset -
2602 effective_min_offset)) {
2603 assert_wait((event_t)map,
2604 THREAD_ABORTSAFE);
2605 vm_map_unlock(map);
2606 map_locked = FALSE;
2607 thread_block(THREAD_CONTINUE_NULL);
2608 goto StartAgain;
2609 }
2610 }
2611 RETURN(KERN_NO_SPACE);
2612 }
2613
2614 next = entry->vme_next;
2615
2616 if (map->holelistenabled) {
2617 if (entry->vme_end >= desired_empty_end) {
2618 break;
2619 }
2620 } else {
2621 /*
2622 * If there are no more entries, we must win.
2623 *
2624 * OR
2625 *
2626 * If there is another entry, it must be
2627 * after the end of the potential new region.
2628 */
2629
2630 if (next == vm_map_to_entry(map)) {
2631 break;
2632 }
2633
2634 if (next->vme_start >= desired_empty_end) {
2635 break;
2636 }
2637 }
2638
2639 /*
2640 * Didn't fit -- move to the next entry.
2641 */
2642
2643 entry = next;
2644
2645 if (map->holelistenabled) {
2646 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2647 /*
2648 * Wrapped around
2649 */
2650 result = KERN_NO_SPACE;
2651 goto BailOut;
2652 }
2653 start = entry->vme_start;
2654 } else {
2655 start = entry->vme_end;
2656 }
2657
2658 start = vm_map_round_page(start,
2659 VM_MAP_PAGE_MASK(map));
2660 }
2661
2662 if (map->holelistenabled) {
2663 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2664 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2665 }
2666 }
2667
2668 *address = start;
2669 assert(VM_MAP_PAGE_ALIGNED(*address,
2670 VM_MAP_PAGE_MASK(map)));
2671 } else {
2672 /*
2673 * Verify that:
2674 * the address doesn't itself violate
2675 * the mask requirement.
2676 */
2677
2678 vm_map_lock(map);
2679 map_locked = TRUE;
2680 if ((start & mask) != 0) {
2681 RETURN(KERN_NO_SPACE);
2682 }
2683
2684 /*
2685 * ... the address is within bounds
2686 */
2687
2688 end = start + size;
2689
2690 if ((start < effective_min_offset) ||
2691 (end > effective_max_offset) ||
2692 (start >= end)) {
2693 RETURN(KERN_INVALID_ADDRESS);
2694 }
2695
2696 if (overwrite && zap_old_map != VM_MAP_NULL) {
2697 int remove_flags;
2698 /*
2699 * Fixed mapping and "overwrite" flag: attempt to
2700 * remove all existing mappings in the specified
2701 * address range, saving them in our "zap_old_map".
2702 */
2703 remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2704 remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2705 if (vmk_flags.vmkf_overwrite_immutable) {
2706 /* we can overwrite immutable mappings */
2707 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2708 }
2709 (void) vm_map_delete(map, start, end,
2710 remove_flags,
2711 zap_old_map);
2712 }
2713
2714 /*
2715 * ... the starting address isn't allocated
2716 */
2717
2718 if (vm_map_lookup_entry(map, start, &entry)) {
2719 if (!(vmk_flags.vmkf_already)) {
2720 RETURN(KERN_NO_SPACE);
2721 }
2722 /*
2723 * Check if what's already there is what we want.
2724 */
2725 tmp_start = start;
2726 tmp_offset = offset;
2727 if (entry->vme_start < start) {
2728 tmp_start -= start - entry->vme_start;
2729 tmp_offset -= start - entry->vme_start;
2730 }
2731 for (; entry->vme_start < end;
2732 entry = entry->vme_next) {
2733 /*
2734 * Check if the mapping's attributes
2735 * match the existing map entry.
2736 */
2737 if (entry == vm_map_to_entry(map) ||
2738 entry->vme_start != tmp_start ||
2739 entry->is_sub_map != is_submap ||
2740 VME_OFFSET(entry) != tmp_offset ||
2741 entry->needs_copy != needs_copy ||
2742 entry->protection != cur_protection ||
2743 entry->max_protection != max_protection ||
2744 entry->inheritance != inheritance ||
2745 entry->iokit_acct != iokit_acct ||
2746 VME_ALIAS(entry) != alias) {
2747 /* not the same mapping ! */
2748 RETURN(KERN_NO_SPACE);
2749 }
2750 /*
2751 * Check if the same object is being mapped.
2752 */
2753 if (is_submap) {
2754 if (VME_SUBMAP(entry) !=
2755 (vm_map_t) object) {
2756 /* not the same submap */
2757 RETURN(KERN_NO_SPACE);
2758 }
2759 } else {
2760 if (VME_OBJECT(entry) != object) {
2761 /* not the same VM object... */
2762 vm_object_t obj2;
2763
2764 obj2 = VME_OBJECT(entry);
2765 if ((obj2 == VM_OBJECT_NULL ||
2766 obj2->internal) &&
2767 (object == VM_OBJECT_NULL ||
2768 object->internal)) {
2769 /*
2770 * ... but both are
2771 * anonymous memory,
2772 * so equivalent.
2773 */
2774 } else {
2775 RETURN(KERN_NO_SPACE);
2776 }
2777 }
2778 }
2779
2780 tmp_offset += entry->vme_end - entry->vme_start;
2781 tmp_start += entry->vme_end - entry->vme_start;
2782 if (entry->vme_end >= end) {
2783 /* reached the end of our mapping */
2784 break;
2785 }
2786 }
2787 /* it all matches: let's use what's already there ! */
2788 RETURN(KERN_MEMORY_PRESENT);
2789 }
2790
2791 /*
2792 * ... the next region doesn't overlap the
2793 * end point.
2794 */
2795
2796 if ((entry->vme_next != vm_map_to_entry(map)) &&
2797 (entry->vme_next->vme_start < end)) {
2798 RETURN(KERN_NO_SPACE);
2799 }
2800 }
2801
2802 /*
2803 * At this point,
2804 * "start" and "end" should define the endpoints of the
2805 * available new range, and
2806 * "entry" should refer to the region before the new
2807 * range, and
2808 *
2809 * the map should be locked.
2810 */
2811
2812 /*
2813 * See whether we can avoid creating a new entry (and object) by
2814 * extending one of our neighbors. [So far, we only attempt to
2815 * extend from below.] Note that we can never extend/join
2816 * purgable objects because they need to remain distinct
2817 * entities in order to implement their "volatile object"
2818 * semantics.
2819 */
2820
2821 if (purgable ||
2822 entry_for_jit ||
2823 vm_memory_malloc_no_cow(user_alias)) {
2824 if (object == VM_OBJECT_NULL) {
2825 object = vm_object_allocate(size);
2826 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2827 object->true_share = FALSE;
2828 if (purgable) {
2829 task_t owner;
2830 object->purgable = VM_PURGABLE_NONVOLATILE;
2831 if (map->pmap == kernel_pmap) {
2832 /*
2833 * Purgeable mappings made in a kernel
2834 * map are "owned" by the kernel itself
2835 * rather than the current user task
2836 * because they're likely to be used by
2837 * more than this user task (see
2838 * execargs_purgeable_allocate(), for
2839 * example).
2840 */
2841 owner = kernel_task;
2842 } else {
2843 owner = current_task();
2844 }
2845 assert(object->vo_owner == NULL);
2846 assert(object->resident_page_count == 0);
2847 assert(object->wired_page_count == 0);
2848 vm_object_lock(object);
2849 vm_purgeable_nonvolatile_enqueue(object, owner);
2850 vm_object_unlock(object);
2851 }
2852 offset = (vm_object_offset_t)0;
2853 }
2854 } else if ((is_submap == FALSE) &&
2855 (object == VM_OBJECT_NULL) &&
2856 (entry != vm_map_to_entry(map)) &&
2857 (entry->vme_end == start) &&
2858 (!entry->is_shared) &&
2859 (!entry->is_sub_map) &&
2860 (!entry->in_transition) &&
2861 (!entry->needs_wakeup) &&
2862 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2863 (entry->protection == cur_protection) &&
2864 (entry->max_protection == max_protection) &&
2865 (entry->inheritance == inheritance) &&
2866 ((user_alias == VM_MEMORY_REALLOC) ||
2867 (VME_ALIAS(entry) == alias)) &&
2868 (entry->no_cache == no_cache) &&
2869 (entry->permanent == permanent) &&
2870 /* no coalescing for immutable executable mappings */
2871 !((entry->protection & VM_PROT_EXECUTE) &&
2872 entry->permanent) &&
2873 (!entry->superpage_size && !superpage_size) &&
2874 /*
2875 * No coalescing if not map-aligned, to avoid propagating
2876 * that condition any further than needed:
2877 */
2878 (!entry->map_aligned || !clear_map_aligned) &&
2879 (!entry->zero_wired_pages) &&
2880 (!entry->used_for_jit && !entry_for_jit) &&
2881 (!entry->pmap_cs_associated) &&
2882 (entry->iokit_acct == iokit_acct) &&
2883 (!entry->vme_resilient_codesign) &&
2884 (!entry->vme_resilient_media) &&
2885 (!entry->vme_atomic) &&
2886 (entry->vme_no_copy_on_read == no_copy_on_read) &&
2887
2888 ((entry->vme_end - entry->vme_start) + size <=
2889 (user_alias == VM_MEMORY_REALLOC ?
2890 ANON_CHUNK_SIZE :
2891 NO_COALESCE_LIMIT)) &&
2892
2893 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2894 if (vm_object_coalesce(VME_OBJECT(entry),
2895 VM_OBJECT_NULL,
2896 VME_OFFSET(entry),
2897 (vm_object_offset_t) 0,
2898 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2899 (vm_map_size_t)(end - entry->vme_end))) {
2900 /*
2901 * Coalesced the two objects - can extend
2902 * the previous map entry to include the
2903 * new range.
2904 */
2905 map->size += (end - entry->vme_end);
2906 assert(entry->vme_start < end);
2907 assert(VM_MAP_PAGE_ALIGNED(end,
2908 VM_MAP_PAGE_MASK(map)));
2909 if (__improbable(vm_debug_events)) {
2910 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2911 }
2912 entry->vme_end = end;
2913 if (map->holelistenabled) {
2914 vm_map_store_update_first_free(map, entry, TRUE);
2915 } else {
2916 vm_map_store_update_first_free(map, map->first_free, TRUE);
2917 }
2918 new_mapping_established = TRUE;
2919 RETURN(KERN_SUCCESS);
2920 }
2921 }
2922
2923 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2924 new_entry = NULL;
2925
2926 for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
2927 tmp2_end = tmp2_start + step;
2928 /*
2929 * Create a new entry
2930 *
2931 * XXX FBDP
2932 * The reserved "page zero" in each process's address space can
2933 * be arbitrarily large. Splitting it into separate objects and
2934 * therefore different VM map entries serves no purpose and just
2935 * slows down operations on the VM map, so let's not split the
2936 * allocation into chunks if the max protection is NONE. That
2937 * memory should never be accessible, so it will never get to the
2938 * default pager.
2939 */
2940 tmp_start = tmp2_start;
2941 if (object == VM_OBJECT_NULL &&
2942 size > chunk_size &&
2943 max_protection != VM_PROT_NONE &&
2944 superpage_size == 0) {
2945 tmp_end = tmp_start + chunk_size;
2946 } else {
2947 tmp_end = tmp2_end;
2948 }
2949 do {
2950 new_entry = vm_map_entry_insert(
2951 map, entry, tmp_start, tmp_end,
2952 object, offset, needs_copy,
2953 FALSE, FALSE,
2954 cur_protection, max_protection,
2955 VM_BEHAVIOR_DEFAULT,
2956 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2957 0,
2958 no_cache,
2959 permanent,
2960 no_copy_on_read,
2961 superpage_size,
2962 clear_map_aligned,
2963 is_submap,
2964 entry_for_jit,
2965 alias);
2966
2967 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2968
2969 if (resilient_codesign &&
2970 !((cur_protection | max_protection) &
2971 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2972 new_entry->vme_resilient_codesign = TRUE;
2973 }
2974
2975 if (resilient_media &&
2976 (object == VM_OBJECT_NULL ||
2977 object->internal)) {
2978 new_entry->vme_resilient_media = TRUE;
2979 }
2980
2981 assert(!new_entry->iokit_acct);
2982 if (!is_submap &&
2983 object != VM_OBJECT_NULL &&
2984 (object->purgable != VM_PURGABLE_DENY ||
2985 object->vo_ledger_tag)) {
2986 assert(new_entry->use_pmap);
2987 assert(!new_entry->iokit_acct);
2988 /*
2989 * Turn off pmap accounting since
2990 * purgeable (or tagged) objects have their
2991 * own ledgers.
2992 */
2993 new_entry->use_pmap = FALSE;
2994 } else if (!is_submap &&
2995 iokit_acct &&
2996 object != VM_OBJECT_NULL &&
2997 object->internal) {
2998 /* alternate accounting */
2999 assert(!new_entry->iokit_acct);
3000 assert(new_entry->use_pmap);
3001 new_entry->iokit_acct = TRUE;
3002 new_entry->use_pmap = FALSE;
3003 DTRACE_VM4(
3004 vm_map_iokit_mapped_region,
3005 vm_map_t, map,
3006 vm_map_offset_t, new_entry->vme_start,
3007 vm_map_offset_t, new_entry->vme_end,
3008 int, VME_ALIAS(new_entry));
3009 vm_map_iokit_mapped_region(
3010 map,
3011 (new_entry->vme_end -
3012 new_entry->vme_start));
3013 } else if (!is_submap) {
3014 assert(!new_entry->iokit_acct);
3015 assert(new_entry->use_pmap);
3016 }
3017
3018 if (is_submap) {
3019 vm_map_t submap;
3020 boolean_t submap_is_64bit;
3021 boolean_t use_pmap;
3022
3023 assert(new_entry->is_sub_map);
3024 assert(!new_entry->use_pmap);
3025 assert(!new_entry->iokit_acct);
3026 submap = (vm_map_t) object;
3027 submap_is_64bit = vm_map_is_64bit(submap);
3028 use_pmap = vmk_flags.vmkf_nested_pmap;
3029 #ifndef NO_NESTED_PMAP
3030 if (use_pmap && submap->pmap == NULL) {
3031 ledger_t ledger = map->pmap->ledger;
3032 /* we need a sub pmap to nest... */
3033 submap->pmap = pmap_create_options(ledger, 0,
3034 submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3035 if (submap->pmap == NULL) {
3036 /* let's proceed without nesting... */
3037 }
3038 #if defined(__arm__) || defined(__arm64__)
3039 else {
3040 pmap_set_nested(submap->pmap);
3041 }
3042 #endif
3043 }
3044 if (use_pmap && submap->pmap != NULL) {
3045 kr = pmap_nest(map->pmap,
3046 submap->pmap,
3047 tmp_start,
3048 tmp_start,
3049 tmp_end - tmp_start);
3050 if (kr != KERN_SUCCESS) {
3051 printf("vm_map_enter: "
3052 "pmap_nest(0x%llx,0x%llx) "
3053 "error 0x%x\n",
3054 (long long)tmp_start,
3055 (long long)tmp_end,
3056 kr);
3057 } else {
3058 /* we're now nested ! */
3059 new_entry->use_pmap = TRUE;
3060 pmap_empty = FALSE;
3061 }
3062 }
3063 #endif /* NO_NESTED_PMAP */
3064 }
3065 entry = new_entry;
3066
3067 if (superpage_size) {
3068 vm_page_t pages, m;
3069 vm_object_t sp_object;
3070 vm_object_offset_t sp_offset;
3071
3072 VME_OFFSET_SET(entry, 0);
3073
3074 /* allocate one superpage */
3075 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3076 if (kr != KERN_SUCCESS) {
3077 /* deallocate whole range... */
3078 new_mapping_established = TRUE;
3079 /* ... but only up to "tmp_end" */
3080 size -= end - tmp_end;
3081 RETURN(kr);
3082 }
3083
3084 /* create one vm_object per superpage */
3085 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3086 sp_object->phys_contiguous = TRUE;
3087 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3088 VME_OBJECT_SET(entry, sp_object);
3089 assert(entry->use_pmap);
3090
3091 /* enter the base pages into the object */
3092 vm_object_lock(sp_object);
3093 for (sp_offset = 0;
3094 sp_offset < SUPERPAGE_SIZE;
3095 sp_offset += PAGE_SIZE) {
3096 m = pages;
3097 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3098 pages = NEXT_PAGE(m);
3099 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3100 vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3101 }
3102 vm_object_unlock(sp_object);
3103 }
3104 } while (tmp_end != tmp2_end &&
3105 (tmp_start = tmp_end) &&
3106 (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3107 tmp_end + chunk_size : tmp2_end));
3108 }
3109
3110 new_mapping_established = TRUE;
3111
3112 BailOut:
3113 assert(map_locked == TRUE);
3114
3115 if (result == KERN_SUCCESS) {
3116 vm_prot_t pager_prot;
3117 memory_object_t pager;
3118
3119 #if DEBUG
3120 if (pmap_empty &&
3121 !(vmk_flags.vmkf_no_pmap_check)) {
3122 assert(vm_map_pmap_is_empty(map,
3123 *address,
3124 *address + size));
3125 }
3126 #endif /* DEBUG */
3127
3128 /*
3129 * For "named" VM objects, let the pager know that the
3130 * memory object is being mapped. Some pagers need to keep
3131 * track of this, to know when they can reclaim the memory
3132 * object, for example.
3133 * VM calls memory_object_map() for each mapping (specifying
3134 * the protection of each mapping) and calls
3135 * memory_object_last_unmap() when all the mappings are gone.
3136 */
3137 pager_prot = max_protection;
3138 if (needs_copy) {
3139 /*
3140 * Copy-On-Write mapping: won't modify
3141 * the memory object.
3142 */
3143 pager_prot &= ~VM_PROT_WRITE;
3144 }
3145 if (!is_submap &&
3146 object != VM_OBJECT_NULL &&
3147 object->named &&
3148 object->pager != MEMORY_OBJECT_NULL) {
3149 vm_object_lock(object);
3150 pager = object->pager;
3151 if (object->named &&
3152 pager != MEMORY_OBJECT_NULL) {
3153 assert(object->pager_ready);
3154 vm_object_mapping_wait(object, THREAD_UNINT);
3155 vm_object_mapping_begin(object);
3156 vm_object_unlock(object);
3157
3158 kr = memory_object_map(pager, pager_prot);
3159 assert(kr == KERN_SUCCESS);
3160
3161 vm_object_lock(object);
3162 vm_object_mapping_end(object);
3163 }
3164 vm_object_unlock(object);
3165 }
3166 }
3167
3168 assert(map_locked == TRUE);
3169
3170 if (!keep_map_locked) {
3171 vm_map_unlock(map);
3172 map_locked = FALSE;
3173 }
3174
3175 /*
3176 * We can't hold the map lock if we enter this block.
3177 */
3178
3179 if (result == KERN_SUCCESS) {
3180 /* Wire down the new entry if the user
3181 * requested all new map entries be wired.
3182 */
3183 if ((map->wiring_required) || (superpage_size)) {
3184 assert(!keep_map_locked);
3185 pmap_empty = FALSE; /* pmap won't be empty */
3186 kr = vm_map_wire_kernel(map, start, end,
3187 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3188 TRUE);
3189 result = kr;
3190 }
3191
3192 }
3193
3194 if (result != KERN_SUCCESS) {
3195 if (new_mapping_established) {
3196 /*
3197 * We have to get rid of the new mappings since we
3198 * won't make them available to the user.
3199 * Try and do that atomically, to minimize the risk
3200 * that someone else create new mappings that range.
3201 */
3202 zap_new_map = vm_map_create(PMAP_NULL,
3203 *address,
3204 *address + size,
3205 map->hdr.entries_pageable);
3206 vm_map_set_page_shift(zap_new_map,
3207 VM_MAP_PAGE_SHIFT(map));
3208 vm_map_disable_hole_optimization(zap_new_map);
3209
3210 if (!map_locked) {
3211 vm_map_lock(map);
3212 map_locked = TRUE;
3213 }
3214 (void) vm_map_delete(map, *address, *address + size,
3215 (VM_MAP_REMOVE_SAVE_ENTRIES |
3216 VM_MAP_REMOVE_NO_MAP_ALIGN),
3217 zap_new_map);
3218 }
3219 if (zap_old_map != VM_MAP_NULL &&
3220 zap_old_map->hdr.nentries != 0) {
3221 vm_map_entry_t entry1, entry2;
3222
3223 /*
3224 * The new mapping failed. Attempt to restore
3225 * the old mappings, saved in the "zap_old_map".
3226 */
3227 if (!map_locked) {
3228 vm_map_lock(map);
3229 map_locked = TRUE;
3230 }
3231
3232 /* first check if the coast is still clear */
3233 start = vm_map_first_entry(zap_old_map)->vme_start;
3234 end = vm_map_last_entry(zap_old_map)->vme_end;
3235 if (vm_map_lookup_entry(map, start, &entry1) ||
3236 vm_map_lookup_entry(map, end, &entry2) ||
3237 entry1 != entry2) {
3238 /*
3239 * Part of that range has already been
3240 * re-mapped: we can't restore the old
3241 * mappings...
3242 */
3243 vm_map_enter_restore_failures++;
3244 } else {
3245 /*
3246 * Transfer the saved map entries from
3247 * "zap_old_map" to the original "map",
3248 * inserting them all after "entry1".
3249 */
3250 for (entry2 = vm_map_first_entry(zap_old_map);
3251 entry2 != vm_map_to_entry(zap_old_map);
3252 entry2 = vm_map_first_entry(zap_old_map)) {
3253 vm_map_size_t entry_size;
3254
3255 entry_size = (entry2->vme_end -
3256 entry2->vme_start);
3257 vm_map_store_entry_unlink(zap_old_map,
3258 entry2);
3259 zap_old_map->size -= entry_size;
3260 vm_map_store_entry_link(map, entry1, entry2,
3261 VM_MAP_KERNEL_FLAGS_NONE);
3262 map->size += entry_size;
3263 entry1 = entry2;
3264 }
3265 if (map->wiring_required) {
3266 /*
3267 * XXX TODO: we should rewire the
3268 * old pages here...
3269 */
3270 }
3271 vm_map_enter_restore_successes++;
3272 }
3273 }
3274 }
3275
3276 /*
3277 * The caller is responsible for releasing the lock if it requested to
3278 * keep the map locked.
3279 */
3280 if (map_locked && !keep_map_locked) {
3281 vm_map_unlock(map);
3282 }
3283
3284 /*
3285 * Get rid of the "zap_maps" and all the map entries that
3286 * they may still contain.
3287 */
3288 if (zap_old_map != VM_MAP_NULL) {
3289 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3290 zap_old_map = VM_MAP_NULL;
3291 }
3292 if (zap_new_map != VM_MAP_NULL) {
3293 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3294 zap_new_map = VM_MAP_NULL;
3295 }
3296
3297 return result;
3298
3299 #undef RETURN
3300 }
3301
3302 #if __arm64__
3303 extern const struct memory_object_pager_ops fourk_pager_ops;
3304 kern_return_t
3305 vm_map_enter_fourk(
3306 vm_map_t map,
3307 vm_map_offset_t *address, /* IN/OUT */
3308 vm_map_size_t size,
3309 vm_map_offset_t mask,
3310 int flags,
3311 vm_map_kernel_flags_t vmk_flags,
3312 vm_tag_t alias,
3313 vm_object_t object,
3314 vm_object_offset_t offset,
3315 boolean_t needs_copy,
3316 vm_prot_t cur_protection,
3317 vm_prot_t max_protection,
3318 vm_inherit_t inheritance)
3319 {
3320 vm_map_entry_t entry, new_entry;
3321 vm_map_offset_t start, fourk_start;
3322 vm_map_offset_t end, fourk_end;
3323 vm_map_size_t fourk_size;
3324 kern_return_t result = KERN_SUCCESS;
3325 vm_map_t zap_old_map = VM_MAP_NULL;
3326 vm_map_t zap_new_map = VM_MAP_NULL;
3327 boolean_t map_locked = FALSE;
3328 boolean_t pmap_empty = TRUE;
3329 boolean_t new_mapping_established = FALSE;
3330 boolean_t keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3331 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3332 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3333 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3334 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3335 boolean_t is_submap = vmk_flags.vmkf_submap;
3336 boolean_t permanent = vmk_flags.vmkf_permanent;
3337 boolean_t no_copy_on_read = vmk_flags.vmkf_permanent;
3338 boolean_t entry_for_jit = vmk_flags.vmkf_map_jit;
3339 // boolean_t iokit_acct = vmk_flags.vmkf_iokit_acct;
3340 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3341 vm_map_offset_t effective_min_offset, effective_max_offset;
3342 kern_return_t kr;
3343 boolean_t clear_map_aligned = FALSE;
3344 memory_object_t fourk_mem_obj;
3345 vm_object_t fourk_object;
3346 vm_map_offset_t fourk_pager_offset;
3347 int fourk_pager_index_start, fourk_pager_index_num;
3348 int cur_idx;
3349 boolean_t fourk_copy;
3350 vm_object_t copy_object;
3351 vm_object_offset_t copy_offset;
3352
3353 fourk_mem_obj = MEMORY_OBJECT_NULL;
3354 fourk_object = VM_OBJECT_NULL;
3355
3356 if (superpage_size) {
3357 return KERN_NOT_SUPPORTED;
3358 }
3359
3360 if ((cur_protection & VM_PROT_WRITE) &&
3361 (cur_protection & VM_PROT_EXECUTE) &&
3362 #if !CONFIG_EMBEDDED
3363 map != kernel_map &&
3364 cs_process_enforcement(NULL) &&
3365 #endif /* !CONFIG_EMBEDDED */
3366 !entry_for_jit) {
3367 DTRACE_VM3(cs_wx,
3368 uint64_t, 0,
3369 uint64_t, 0,
3370 vm_prot_t, cur_protection);
3371 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3372 "turning off execute\n",
3373 proc_selfpid(),
3374 (current_task()->bsd_info
3375 ? proc_name_address(current_task()->bsd_info)
3376 : "?"),
3377 __FUNCTION__);
3378 cur_protection &= ~VM_PROT_EXECUTE;
3379 }
3380
3381 /*
3382 * If the task has requested executable lockdown,
3383 * deny any new executable mapping.
3384 */
3385 if (map->map_disallow_new_exec == TRUE) {
3386 if (cur_protection & VM_PROT_EXECUTE) {
3387 return KERN_PROTECTION_FAILURE;
3388 }
3389 }
3390
3391 if (is_submap) {
3392 return KERN_NOT_SUPPORTED;
3393 }
3394 if (vmk_flags.vmkf_already) {
3395 return KERN_NOT_SUPPORTED;
3396 }
3397 if (purgable || entry_for_jit) {
3398 return KERN_NOT_SUPPORTED;
3399 }
3400
3401 effective_min_offset = map->min_offset;
3402
3403 if (vmk_flags.vmkf_beyond_max) {
3404 return KERN_NOT_SUPPORTED;
3405 } else {
3406 effective_max_offset = map->max_offset;
3407 }
3408
3409 if (size == 0 ||
3410 (offset & FOURK_PAGE_MASK) != 0) {
3411 *address = 0;
3412 return KERN_INVALID_ARGUMENT;
3413 }
3414
3415 #define RETURN(value) { result = value; goto BailOut; }
3416
3417 assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3418 assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3419
3420 if (!anywhere && overwrite) {
3421 return KERN_NOT_SUPPORTED;
3422 }
3423 if (!anywhere && overwrite) {
3424 /*
3425 * Create a temporary VM map to hold the old mappings in the
3426 * affected area while we create the new one.
3427 * This avoids releasing the VM map lock in
3428 * vm_map_entry_delete() and allows atomicity
3429 * when we want to replace some mappings with a new one.
3430 * It also allows us to restore the old VM mappings if the
3431 * new mapping fails.
3432 */
3433 zap_old_map = vm_map_create(PMAP_NULL,
3434 *address,
3435 *address + size,
3436 map->hdr.entries_pageable);
3437 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3438 vm_map_disable_hole_optimization(zap_old_map);
3439 }
3440
3441 fourk_start = *address;
3442 fourk_size = size;
3443 fourk_end = fourk_start + fourk_size;
3444
3445 start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3446 end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3447 size = end - start;
3448
3449 if (anywhere) {
3450 return KERN_NOT_SUPPORTED;
3451 } else {
3452 /*
3453 * Verify that:
3454 * the address doesn't itself violate
3455 * the mask requirement.
3456 */
3457
3458 vm_map_lock(map);
3459 map_locked = TRUE;
3460 if ((start & mask) != 0) {
3461 RETURN(KERN_NO_SPACE);
3462 }
3463
3464 /*
3465 * ... the address is within bounds
3466 */
3467
3468 end = start + size;
3469
3470 if ((start < effective_min_offset) ||
3471 (end > effective_max_offset) ||
3472 (start >= end)) {
3473 RETURN(KERN_INVALID_ADDRESS);
3474 }
3475
3476 if (overwrite && zap_old_map != VM_MAP_NULL) {
3477 /*
3478 * Fixed mapping and "overwrite" flag: attempt to
3479 * remove all existing mappings in the specified
3480 * address range, saving them in our "zap_old_map".
3481 */
3482 (void) vm_map_delete(map, start, end,
3483 (VM_MAP_REMOVE_SAVE_ENTRIES |
3484 VM_MAP_REMOVE_NO_MAP_ALIGN),
3485 zap_old_map);
3486 }
3487
3488 /*
3489 * ... the starting address isn't allocated
3490 */
3491 if (vm_map_lookup_entry(map, start, &entry)) {
3492 vm_object_t cur_object, shadow_object;
3493
3494 /*
3495 * We might already some 4K mappings
3496 * in a 16K page here.
3497 */
3498
3499 if (entry->vme_end - entry->vme_start
3500 != SIXTEENK_PAGE_SIZE) {
3501 RETURN(KERN_NO_SPACE);
3502 }
3503 if (entry->is_sub_map) {
3504 RETURN(KERN_NO_SPACE);
3505 }
3506 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3507 RETURN(KERN_NO_SPACE);
3508 }
3509
3510 /* go all the way down the shadow chain */
3511 cur_object = VME_OBJECT(entry);
3512 vm_object_lock(cur_object);
3513 while (cur_object->shadow != VM_OBJECT_NULL) {
3514 shadow_object = cur_object->shadow;
3515 vm_object_lock(shadow_object);
3516 vm_object_unlock(cur_object);
3517 cur_object = shadow_object;
3518 shadow_object = VM_OBJECT_NULL;
3519 }
3520 if (cur_object->internal ||
3521 cur_object->pager == NULL) {
3522 vm_object_unlock(cur_object);
3523 RETURN(KERN_NO_SPACE);
3524 }
3525 if (cur_object->pager->mo_pager_ops
3526 != &fourk_pager_ops) {
3527 vm_object_unlock(cur_object);
3528 RETURN(KERN_NO_SPACE);
3529 }
3530 fourk_object = cur_object;
3531 fourk_mem_obj = fourk_object->pager;
3532
3533 /* keep the "4K" object alive */
3534 vm_object_reference_locked(fourk_object);
3535 vm_object_unlock(fourk_object);
3536
3537 /* merge permissions */
3538 entry->protection |= cur_protection;
3539 entry->max_protection |= max_protection;
3540 if ((entry->protection & (VM_PROT_WRITE |
3541 VM_PROT_EXECUTE)) ==
3542 (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3543 fourk_binary_compatibility_unsafe &&
3544 fourk_binary_compatibility_allow_wx) {
3545 /* write+execute: need to be "jit" */
3546 entry->used_for_jit = TRUE;
3547 }
3548
3549 goto map_in_fourk_pager;
3550 }
3551
3552 /*
3553 * ... the next region doesn't overlap the
3554 * end point.
3555 */
3556
3557 if ((entry->vme_next != vm_map_to_entry(map)) &&
3558 (entry->vme_next->vme_start < end)) {
3559 RETURN(KERN_NO_SPACE);
3560 }
3561 }
3562
3563 /*
3564 * At this point,
3565 * "start" and "end" should define the endpoints of the
3566 * available new range, and
3567 * "entry" should refer to the region before the new
3568 * range, and
3569 *
3570 * the map should be locked.
3571 */
3572
3573 /* create a new "4K" pager */
3574 fourk_mem_obj = fourk_pager_create();
3575 fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3576 assert(fourk_object);
3577
3578 /* keep the "4" object alive */
3579 vm_object_reference(fourk_object);
3580
3581 /* create a "copy" object, to map the "4K" object copy-on-write */
3582 fourk_copy = TRUE;
3583 result = vm_object_copy_strategically(fourk_object,
3584 0,
3585 end - start,
3586 &copy_object,
3587 &copy_offset,
3588 &fourk_copy);
3589 assert(result == KERN_SUCCESS);
3590 assert(copy_object != VM_OBJECT_NULL);
3591 assert(copy_offset == 0);
3592
3593 /* take a reference on the copy object, for this mapping */
3594 vm_object_reference(copy_object);
3595
3596 /* map the "4K" pager's copy object */
3597 new_entry =
3598 vm_map_entry_insert(map, entry,
3599 vm_map_trunc_page(start,
3600 VM_MAP_PAGE_MASK(map)),
3601 vm_map_round_page(end,
3602 VM_MAP_PAGE_MASK(map)),
3603 copy_object,
3604 0, /* offset */
3605 FALSE, /* needs_copy */
3606 FALSE,
3607 FALSE,
3608 cur_protection, max_protection,
3609 VM_BEHAVIOR_DEFAULT,
3610 ((entry_for_jit)
3611 ? VM_INHERIT_NONE
3612 : inheritance),
3613 0,
3614 no_cache,
3615 permanent,
3616 no_copy_on_read,
3617 superpage_size,
3618 clear_map_aligned,
3619 is_submap,
3620 FALSE, /* jit */
3621 alias);
3622 entry = new_entry;
3623
3624 #if VM_MAP_DEBUG_FOURK
3625 if (vm_map_debug_fourk) {
3626 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3627 map,
3628 (uint64_t) entry->vme_start,
3629 (uint64_t) entry->vme_end,
3630 fourk_mem_obj);
3631 }
3632 #endif /* VM_MAP_DEBUG_FOURK */
3633
3634 new_mapping_established = TRUE;
3635
3636 map_in_fourk_pager:
3637 /* "map" the original "object" where it belongs in the "4K" pager */
3638 fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3639 fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3640 if (fourk_size > SIXTEENK_PAGE_SIZE) {
3641 fourk_pager_index_num = 4;
3642 } else {
3643 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3644 }
3645 if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3646 fourk_pager_index_num = 4 - fourk_pager_index_start;
3647 }
3648 for (cur_idx = 0;
3649 cur_idx < fourk_pager_index_num;
3650 cur_idx++) {
3651 vm_object_t old_object;
3652 vm_object_offset_t old_offset;
3653
3654 kr = fourk_pager_populate(fourk_mem_obj,
3655 TRUE, /* overwrite */
3656 fourk_pager_index_start + cur_idx,
3657 object,
3658 (object
3659 ? (offset +
3660 (cur_idx * FOURK_PAGE_SIZE))
3661 : 0),
3662 &old_object,
3663 &old_offset);
3664 #if VM_MAP_DEBUG_FOURK
3665 if (vm_map_debug_fourk) {
3666 if (old_object == (vm_object_t) -1 &&
3667 old_offset == (vm_object_offset_t) -1) {
3668 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3669 "pager [%p:0x%llx] "
3670 "populate[%d] "
3671 "[object:%p,offset:0x%llx]\n",
3672 map,
3673 (uint64_t) entry->vme_start,
3674 (uint64_t) entry->vme_end,
3675 fourk_mem_obj,
3676 VME_OFFSET(entry),
3677 fourk_pager_index_start + cur_idx,
3678 object,
3679 (object
3680 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3681 : 0));
3682 } else {
3683 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3684 "pager [%p:0x%llx] "
3685 "populate[%d] [object:%p,offset:0x%llx] "
3686 "old [%p:0x%llx]\n",
3687 map,
3688 (uint64_t) entry->vme_start,
3689 (uint64_t) entry->vme_end,
3690 fourk_mem_obj,
3691 VME_OFFSET(entry),
3692 fourk_pager_index_start + cur_idx,
3693 object,
3694 (object
3695 ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3696 : 0),
3697 old_object,
3698 old_offset);
3699 }
3700 }
3701 #endif /* VM_MAP_DEBUG_FOURK */
3702
3703 assert(kr == KERN_SUCCESS);
3704 if (object != old_object &&
3705 object != VM_OBJECT_NULL &&
3706 object != (vm_object_t) -1) {
3707 vm_object_reference(object);
3708 }
3709 if (object != old_object &&
3710 old_object != VM_OBJECT_NULL &&
3711 old_object != (vm_object_t) -1) {
3712 vm_object_deallocate(old_object);
3713 }
3714 }
3715
3716 BailOut:
3717 assert(map_locked == TRUE);
3718
3719 if (fourk_object != VM_OBJECT_NULL) {
3720 vm_object_deallocate(fourk_object);
3721 fourk_object = VM_OBJECT_NULL;
3722 fourk_mem_obj = MEMORY_OBJECT_NULL;
3723 }
3724
3725 if (result == KERN_SUCCESS) {
3726 vm_prot_t pager_prot;
3727 memory_object_t pager;
3728
3729 #if DEBUG
3730 if (pmap_empty &&
3731 !(vmk_flags.vmkf_no_pmap_check)) {
3732 assert(vm_map_pmap_is_empty(map,
3733 *address,
3734 *address + size));
3735 }
3736 #endif /* DEBUG */
3737
3738 /*
3739 * For "named" VM objects, let the pager know that the
3740 * memory object is being mapped. Some pagers need to keep
3741 * track of this, to know when they can reclaim the memory
3742 * object, for example.
3743 * VM calls memory_object_map() for each mapping (specifying
3744 * the protection of each mapping) and calls
3745 * memory_object_last_unmap() when all the mappings are gone.
3746 */
3747 pager_prot = max_protection;
3748 if (needs_copy) {
3749 /*
3750 * Copy-On-Write mapping: won't modify
3751 * the memory object.
3752 */
3753 pager_prot &= ~VM_PROT_WRITE;
3754 }
3755 if (!is_submap &&
3756 object != VM_OBJECT_NULL &&
3757 object->named &&
3758 object->pager != MEMORY_OBJECT_NULL) {
3759 vm_object_lock(object);
3760 pager = object->pager;
3761 if (object->named &&
3762 pager != MEMORY_OBJECT_NULL) {
3763 assert(object->pager_ready);
3764 vm_object_mapping_wait(object, THREAD_UNINT);
3765 vm_object_mapping_begin(object);
3766 vm_object_unlock(object);
3767
3768 kr = memory_object_map(pager, pager_prot);
3769 assert(kr == KERN_SUCCESS);
3770
3771 vm_object_lock(object);
3772 vm_object_mapping_end(object);
3773 }
3774 vm_object_unlock(object);
3775 }
3776 if (!is_submap &&
3777 fourk_object != VM_OBJECT_NULL &&
3778 fourk_object->named &&
3779 fourk_object->pager != MEMORY_OBJECT_NULL) {
3780 vm_object_lock(fourk_object);
3781 pager = fourk_object->pager;
3782 if (fourk_object->named &&
3783 pager != MEMORY_OBJECT_NULL) {
3784 assert(fourk_object->pager_ready);
3785 vm_object_mapping_wait(fourk_object,
3786 THREAD_UNINT);
3787 vm_object_mapping_begin(fourk_object);
3788 vm_object_unlock(fourk_object);
3789
3790 kr = memory_object_map(pager, VM_PROT_READ);
3791 assert(kr == KERN_SUCCESS);
3792
3793 vm_object_lock(fourk_object);
3794 vm_object_mapping_end(fourk_object);
3795 }
3796 vm_object_unlock(fourk_object);
3797 }
3798 }
3799
3800 assert(map_locked == TRUE);
3801
3802 if (!keep_map_locked) {
3803 vm_map_unlock(map);
3804 map_locked = FALSE;
3805 }
3806
3807 /*
3808 * We can't hold the map lock if we enter this block.
3809 */
3810
3811 if (result == KERN_SUCCESS) {
3812 /* Wire down the new entry if the user
3813 * requested all new map entries be wired.
3814 */
3815 if ((map->wiring_required) || (superpage_size)) {
3816 assert(!keep_map_locked);
3817 pmap_empty = FALSE; /* pmap won't be empty */
3818 kr = vm_map_wire_kernel(map, start, end,
3819 new_entry->protection, VM_KERN_MEMORY_MLOCK,
3820 TRUE);
3821 result = kr;
3822 }
3823
3824 }
3825
3826 if (result != KERN_SUCCESS) {
3827 if (new_mapping_established) {
3828 /*
3829 * We have to get rid of the new mappings since we
3830 * won't make them available to the user.
3831 * Try and do that atomically, to minimize the risk
3832 * that someone else create new mappings that range.
3833 */
3834 zap_new_map = vm_map_create(PMAP_NULL,
3835 *address,
3836 *address + size,
3837 map->hdr.entries_pageable);
3838 vm_map_set_page_shift(zap_new_map,
3839 VM_MAP_PAGE_SHIFT(map));
3840 vm_map_disable_hole_optimization(zap_new_map);
3841
3842 if (!map_locked) {
3843 vm_map_lock(map);
3844 map_locked = TRUE;
3845 }
3846 (void) vm_map_delete(map, *address, *address + size,
3847 (VM_MAP_REMOVE_SAVE_ENTRIES |
3848 VM_MAP_REMOVE_NO_MAP_ALIGN),
3849 zap_new_map);
3850 }
3851 if (zap_old_map != VM_MAP_NULL &&
3852 zap_old_map->hdr.nentries != 0) {
3853 vm_map_entry_t entry1, entry2;
3854
3855 /*
3856 * The new mapping failed. Attempt to restore
3857 * the old mappings, saved in the "zap_old_map".
3858 */
3859 if (!map_locked) {
3860 vm_map_lock(map);
3861 map_locked = TRUE;
3862 }
3863
3864 /* first check if the coast is still clear */
3865 start = vm_map_first_entry(zap_old_map)->vme_start;
3866 end = vm_map_last_entry(zap_old_map)->vme_end;
3867 if (vm_map_lookup_entry(map, start, &entry1) ||
3868 vm_map_lookup_entry(map, end, &entry2) ||
3869 entry1 != entry2) {
3870 /*
3871 * Part of that range has already been
3872 * re-mapped: we can't restore the old
3873 * mappings...
3874 */
3875 vm_map_enter_restore_failures++;
3876 } else {
3877 /*
3878 * Transfer the saved map entries from
3879 * "zap_old_map" to the original "map",
3880 * inserting them all after "entry1".
3881 */
3882 for (entry2 = vm_map_first_entry(zap_old_map);
3883 entry2 != vm_map_to_entry(zap_old_map);
3884 entry2 = vm_map_first_entry(zap_old_map)) {
3885 vm_map_size_t entry_size;
3886
3887 entry_size = (entry2->vme_end -
3888 entry2->vme_start);
3889 vm_map_store_entry_unlink(zap_old_map,
3890 entry2);
3891 zap_old_map->size -= entry_size;
3892 vm_map_store_entry_link(map, entry1, entry2,
3893 VM_MAP_KERNEL_FLAGS_NONE);
3894 map->size += entry_size;
3895 entry1 = entry2;
3896 }
3897 if (map->wiring_required) {
3898 /*
3899 * XXX TODO: we should rewire the
3900 * old pages here...
3901 */
3902 }
3903 vm_map_enter_restore_successes++;
3904 }
3905 }
3906 }
3907
3908 /*
3909 * The caller is responsible for releasing the lock if it requested to
3910 * keep the map locked.
3911 */
3912 if (map_locked && !keep_map_locked) {
3913 vm_map_unlock(map);
3914 }
3915
3916 /*
3917 * Get rid of the "zap_maps" and all the map entries that
3918 * they may still contain.
3919 */
3920 if (zap_old_map != VM_MAP_NULL) {
3921 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3922 zap_old_map = VM_MAP_NULL;
3923 }
3924 if (zap_new_map != VM_MAP_NULL) {
3925 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3926 zap_new_map = VM_MAP_NULL;
3927 }
3928
3929 return result;
3930
3931 #undef RETURN
3932 }
3933 #endif /* __arm64__ */
3934
3935 /*
3936 * Counters for the prefault optimization.
3937 */
3938 int64_t vm_prefault_nb_pages = 0;
3939 int64_t vm_prefault_nb_bailout = 0;
3940
3941 static kern_return_t
3942 vm_map_enter_mem_object_helper(
3943 vm_map_t target_map,
3944 vm_map_offset_t *address,
3945 vm_map_size_t initial_size,
3946 vm_map_offset_t mask,
3947 int flags,
3948 vm_map_kernel_flags_t vmk_flags,
3949 vm_tag_t tag,
3950 ipc_port_t port,
3951 vm_object_offset_t offset,
3952 boolean_t copy,
3953 vm_prot_t cur_protection,
3954 vm_prot_t max_protection,
3955 vm_inherit_t inheritance,
3956 upl_page_list_ptr_t page_list,
3957 unsigned int page_list_count)
3958 {
3959 vm_map_address_t map_addr;
3960 vm_map_size_t map_size;
3961 vm_object_t object;
3962 vm_object_size_t size;
3963 kern_return_t result;
3964 boolean_t mask_cur_protection, mask_max_protection;
3965 boolean_t kernel_prefault, try_prefault = (page_list_count != 0);
3966 vm_map_offset_t offset_in_mapping = 0;
3967 #if __arm64__
3968 boolean_t fourk = vmk_flags.vmkf_fourk;
3969 #endif /* __arm64__ */
3970
3971 assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3972
3973 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3974 mask_max_protection = max_protection & VM_PROT_IS_MASK;
3975 cur_protection &= ~VM_PROT_IS_MASK;
3976 max_protection &= ~VM_PROT_IS_MASK;
3977
3978 /*
3979 * Check arguments for validity
3980 */
3981 if ((target_map == VM_MAP_NULL) ||
3982 (cur_protection & ~VM_PROT_ALL) ||
3983 (max_protection & ~VM_PROT_ALL) ||
3984 (inheritance > VM_INHERIT_LAST_VALID) ||
3985 (try_prefault && (copy || !page_list)) ||
3986 initial_size == 0) {
3987 return KERN_INVALID_ARGUMENT;
3988 }
3989
3990 #if __arm64__
3991 if (fourk) {
3992 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3993 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3994 } else
3995 #endif /* __arm64__ */
3996 {
3997 map_addr = vm_map_trunc_page(*address,
3998 VM_MAP_PAGE_MASK(target_map));
3999 map_size = vm_map_round_page(initial_size,
4000 VM_MAP_PAGE_MASK(target_map));
4001 }
4002 size = vm_object_round_page(initial_size);
4003
4004 /*
4005 * Find the vm object (if any) corresponding to this port.
4006 */
4007 if (!IP_VALID(port)) {
4008 object = VM_OBJECT_NULL;
4009 offset = 0;
4010 copy = FALSE;
4011 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4012 vm_named_entry_t named_entry;
4013
4014 named_entry = (vm_named_entry_t) port->ip_kobject;
4015
4016 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4017 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4018 offset += named_entry->data_offset;
4019 }
4020
4021 /* a few checks to make sure user is obeying rules */
4022 if (size == 0) {
4023 if (offset >= named_entry->size) {
4024 return KERN_INVALID_RIGHT;
4025 }
4026 size = named_entry->size - offset;
4027 }
4028 if (mask_max_protection) {
4029 max_protection &= named_entry->protection;
4030 }
4031 if (mask_cur_protection) {
4032 cur_protection &= named_entry->protection;
4033 }
4034 if ((named_entry->protection & max_protection) !=
4035 max_protection) {
4036 return KERN_INVALID_RIGHT;
4037 }
4038 if ((named_entry->protection & cur_protection) !=
4039 cur_protection) {
4040 return KERN_INVALID_RIGHT;
4041 }
4042 if (offset + size < offset) {
4043 /* overflow */
4044 return KERN_INVALID_ARGUMENT;
4045 }
4046 if (named_entry->size < (offset + initial_size)) {
4047 return KERN_INVALID_ARGUMENT;
4048 }
4049
4050 if (named_entry->is_copy) {
4051 /* for a vm_map_copy, we can only map it whole */
4052 if ((size != named_entry->size) &&
4053 (vm_map_round_page(size,
4054 VM_MAP_PAGE_MASK(target_map)) ==
4055 named_entry->size)) {
4056 /* XXX FBDP use the rounded size... */
4057 size = vm_map_round_page(
4058 size,
4059 VM_MAP_PAGE_MASK(target_map));
4060 }
4061
4062 if (!(flags & VM_FLAGS_ANYWHERE) &&
4063 (offset != 0 ||
4064 size != named_entry->size)) {
4065 /*
4066 * XXX for a mapping at a "fixed" address,
4067 * we can't trim after mapping the whole
4068 * memory entry, so reject a request for a
4069 * partial mapping.
4070 */
4071 return KERN_INVALID_ARGUMENT;
4072 }
4073 }
4074
4075 /* the callers parameter offset is defined to be the */
4076 /* offset from beginning of named entry offset in object */
4077 offset = offset + named_entry->offset;
4078
4079 if (!VM_MAP_PAGE_ALIGNED(size,
4080 VM_MAP_PAGE_MASK(target_map))) {
4081 /*
4082 * Let's not map more than requested;
4083 * vm_map_enter() will handle this "not map-aligned"
4084 * case.
4085 */
4086 map_size = size;
4087 }
4088
4089 named_entry_lock(named_entry);
4090 if (named_entry->is_sub_map) {
4091 vm_map_t submap;
4092
4093 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4094 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4095 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4096 }
4097
4098 submap = named_entry->backing.map;
4099 vm_map_lock(submap);
4100 vm_map_reference(submap);
4101 vm_map_unlock(submap);
4102 named_entry_unlock(named_entry);
4103
4104 vmk_flags.vmkf_submap = TRUE;
4105
4106 result = vm_map_enter(target_map,
4107 &map_addr,
4108 map_size,
4109 mask,
4110 flags,
4111 vmk_flags,
4112 tag,
4113 (vm_object_t)(uintptr_t) submap,
4114 offset,
4115 copy,
4116 cur_protection,
4117 max_protection,
4118 inheritance);
4119 if (result != KERN_SUCCESS) {
4120 vm_map_deallocate(submap);
4121 } else {
4122 /*
4123 * No need to lock "submap" just to check its
4124 * "mapped" flag: that flag is never reset
4125 * once it's been set and if we race, we'll
4126 * just end up setting it twice, which is OK.
4127 */
4128 if (submap->mapped_in_other_pmaps == FALSE &&
4129 vm_map_pmap(submap) != PMAP_NULL &&
4130 vm_map_pmap(submap) !=
4131 vm_map_pmap(target_map)) {
4132 /*
4133 * This submap is being mapped in a map
4134 * that uses a different pmap.
4135 * Set its "mapped_in_other_pmaps" flag
4136 * to indicate that we now need to
4137 * remove mappings from all pmaps rather
4138 * than just the submap's pmap.
4139 */
4140 vm_map_lock(submap);
4141 submap->mapped_in_other_pmaps = TRUE;
4142 vm_map_unlock(submap);
4143 }
4144 *address = map_addr;
4145 }
4146 return result;
4147 } else if (named_entry->is_copy) {
4148 kern_return_t kr;
4149 vm_map_copy_t copy_map;
4150 vm_map_entry_t copy_entry;
4151 vm_map_offset_t copy_addr;
4152
4153 if (flags & ~(VM_FLAGS_FIXED |
4154 VM_FLAGS_ANYWHERE |
4155 VM_FLAGS_OVERWRITE |
4156 VM_FLAGS_RETURN_4K_DATA_ADDR |
4157 VM_FLAGS_RETURN_DATA_ADDR |
4158 VM_FLAGS_ALIAS_MASK)) {
4159 named_entry_unlock(named_entry);
4160 return KERN_INVALID_ARGUMENT;
4161 }
4162
4163 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4164 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4165 offset_in_mapping = offset - vm_object_trunc_page(offset);
4166 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4167 offset_in_mapping &= ~((signed)(0xFFF));
4168 }
4169 offset = vm_object_trunc_page(offset);
4170 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4171 }
4172
4173 copy_map = named_entry->backing.copy;
4174 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4175 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4176 /* unsupported type; should not happen */
4177 printf("vm_map_enter_mem_object: "
4178 "memory_entry->backing.copy "
4179 "unsupported type 0x%x\n",
4180 copy_map->type);
4181 named_entry_unlock(named_entry);
4182 return KERN_INVALID_ARGUMENT;
4183 }
4184
4185 /* reserve a contiguous range */
4186 kr = vm_map_enter(target_map,
4187 &map_addr,
4188 /* map whole mem entry, trim later: */
4189 named_entry->size,
4190 mask,
4191 flags & (VM_FLAGS_ANYWHERE |
4192 VM_FLAGS_OVERWRITE |
4193 VM_FLAGS_RETURN_4K_DATA_ADDR |
4194 VM_FLAGS_RETURN_DATA_ADDR),
4195 vmk_flags,
4196 tag,
4197 VM_OBJECT_NULL,
4198 0,
4199 FALSE, /* copy */
4200 cur_protection,
4201 max_protection,
4202 inheritance);
4203 if (kr != KERN_SUCCESS) {
4204 named_entry_unlock(named_entry);
4205 return kr;
4206 }
4207
4208 copy_addr = map_addr;
4209
4210 for (copy_entry = vm_map_copy_first_entry(copy_map);
4211 copy_entry != vm_map_copy_to_entry(copy_map);
4212 copy_entry = copy_entry->vme_next) {
4213 int remap_flags;
4214 vm_map_kernel_flags_t vmk_remap_flags;
4215 vm_map_t copy_submap;
4216 vm_object_t copy_object;
4217 vm_map_size_t copy_size;
4218 vm_object_offset_t copy_offset;
4219 int copy_vm_alias;
4220
4221 remap_flags = 0;
4222 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4223
4224 copy_object = VME_OBJECT(copy_entry);
4225 copy_offset = VME_OFFSET(copy_entry);
4226 copy_size = (copy_entry->vme_end -
4227 copy_entry->vme_start);
4228 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4229 if (copy_vm_alias == 0) {
4230 /*
4231 * Caller does not want a specific
4232 * alias for this new mapping: use
4233 * the alias of the original mapping.
4234 */
4235 copy_vm_alias = VME_ALIAS(copy_entry);
4236 }
4237
4238 /* sanity check */
4239 if ((copy_addr + copy_size) >
4240 (map_addr +
4241 named_entry->size /* XXX full size */)) {
4242 /* over-mapping too much !? */
4243 kr = KERN_INVALID_ARGUMENT;
4244 /* abort */
4245 break;
4246 }
4247
4248 /* take a reference on the object */
4249 if (copy_entry->is_sub_map) {
4250 vmk_remap_flags.vmkf_submap = TRUE;
4251 copy_submap = VME_SUBMAP(copy_entry);
4252 vm_map_lock(copy_submap);
4253 vm_map_reference(copy_submap);
4254 vm_map_unlock(copy_submap);
4255 copy_object = (vm_object_t)(uintptr_t) copy_submap;
4256 } else if (!copy &&
4257 copy_object != VM_OBJECT_NULL &&
4258 (copy_entry->needs_copy ||
4259 copy_object->shadowed ||
4260 (!copy_object->true_share &&
4261 !copy_entry->is_shared &&
4262 copy_object->vo_size > copy_size))) {
4263 /*
4264 * We need to resolve our side of this
4265 * "symmetric" copy-on-write now; we
4266 * need a new object to map and share,
4267 * instead of the current one which
4268 * might still be shared with the
4269 * original mapping.
4270 *
4271 * Note: A "vm_map_copy_t" does not
4272 * have a lock but we're protected by
4273 * the named entry's lock here.
4274 */
4275 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4276 VME_OBJECT_SHADOW(copy_entry, copy_size);
4277 if (!copy_entry->needs_copy &&
4278 copy_entry->protection & VM_PROT_WRITE) {
4279 vm_prot_t prot;
4280
4281 prot = copy_entry->protection & ~VM_PROT_WRITE;
4282 vm_object_pmap_protect(copy_object,
4283 copy_offset,
4284 copy_size,
4285 PMAP_NULL,
4286 0,
4287 prot);
4288 }
4289
4290 copy_entry->needs_copy = FALSE;
4291 copy_entry->is_shared = TRUE;
4292 copy_object = VME_OBJECT(copy_entry);
4293 copy_offset = VME_OFFSET(copy_entry);
4294 vm_object_lock(copy_object);
4295 vm_object_reference_locked(copy_object);
4296 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4297 /* we're about to make a shared mapping of this object */
4298 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4299 copy_object->true_share = TRUE;
4300 }
4301 vm_object_unlock(copy_object);
4302 } else {
4303 /*
4304 * We already have the right object
4305 * to map.
4306 */
4307 copy_object = VME_OBJECT(copy_entry);
4308 vm_object_reference(copy_object);
4309 }
4310
4311 /* over-map the object into destination */
4312 remap_flags |= flags;
4313 remap_flags |= VM_FLAGS_FIXED;
4314 remap_flags |= VM_FLAGS_OVERWRITE;
4315 remap_flags &= ~VM_FLAGS_ANYWHERE;
4316 if (!copy && !copy_entry->is_sub_map) {
4317 /*
4318 * copy-on-write should have been
4319 * resolved at this point, or we would
4320 * end up sharing instead of copying.
4321 */
4322 assert(!copy_entry->needs_copy);
4323 }
4324 #if !CONFIG_EMBEDDED
4325 if (copy_entry->used_for_jit) {
4326 vmk_remap_flags.vmkf_map_jit = TRUE;
4327 }
4328 #endif /* !CONFIG_EMBEDDED */
4329 kr = vm_map_enter(target_map,
4330 &copy_addr,
4331 copy_size,
4332 (vm_map_offset_t) 0,
4333 remap_flags,
4334 vmk_remap_flags,
4335 copy_vm_alias,
4336 copy_object,
4337 copy_offset,
4338 ((copy_object == NULL) ? FALSE : copy),
4339 cur_protection,
4340 max_protection,
4341 inheritance);
4342 if (kr != KERN_SUCCESS) {
4343 if (copy_entry->is_sub_map) {
4344 vm_map_deallocate(copy_submap);
4345 } else {
4346 vm_object_deallocate(copy_object);
4347 }
4348 /* abort */
4349 break;
4350 }
4351
4352 /* next mapping */
4353 copy_addr += copy_size;
4354 }
4355
4356 if (kr == KERN_SUCCESS) {
4357 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4358 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4359 *address = map_addr + offset_in_mapping;
4360 } else {
4361 *address = map_addr;
4362 }
4363
4364 if (offset) {
4365 /*
4366 * Trim in front, from 0 to "offset".
4367 */
4368 vm_map_remove(target_map,
4369 map_addr,
4370 map_addr + offset,
4371 VM_MAP_REMOVE_NO_FLAGS);
4372 *address += offset;
4373 }
4374 if (offset + map_size < named_entry->size) {
4375 /*
4376 * Trim in back, from
4377 * "offset + map_size" to
4378 * "named_entry->size".
4379 */
4380 vm_map_remove(target_map,
4381 (map_addr +
4382 offset + map_size),
4383 (map_addr +
4384 named_entry->size),
4385 VM_MAP_REMOVE_NO_FLAGS);
4386 }
4387 }
4388 named_entry_unlock(named_entry);
4389
4390 if (kr != KERN_SUCCESS) {
4391 if (!(flags & VM_FLAGS_OVERWRITE)) {
4392 /* deallocate the contiguous range */
4393 (void) vm_deallocate(target_map,
4394 map_addr,
4395 map_size);
4396 }
4397 }
4398
4399 return kr;
4400 } else {
4401 unsigned int access;
4402 vm_prot_t protections;
4403 unsigned int wimg_mode;
4404
4405 /* we are mapping a VM object */
4406
4407 protections = named_entry->protection & VM_PROT_ALL;
4408 access = GET_MAP_MEM(named_entry->protection);
4409
4410 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4411 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4412 offset_in_mapping = offset - vm_object_trunc_page(offset);
4413 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4414 offset_in_mapping &= ~((signed)(0xFFF));
4415 }
4416 offset = vm_object_trunc_page(offset);
4417 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4418 }
4419
4420 object = named_entry->backing.object;
4421 assert(object != VM_OBJECT_NULL);
4422 vm_object_lock(object);
4423 named_entry_unlock(named_entry);
4424
4425 vm_object_reference_locked(object);
4426
4427 wimg_mode = object->wimg_bits;
4428 vm_prot_to_wimg(access, &wimg_mode);
4429 if (object->wimg_bits != wimg_mode) {
4430 vm_object_change_wimg_mode(object, wimg_mode);
4431 }
4432
4433 vm_object_unlock(object);
4434 }
4435 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4436 /*
4437 * JMM - This is temporary until we unify named entries
4438 * and raw memory objects.
4439 *
4440 * Detected fake ip_kotype for a memory object. In
4441 * this case, the port isn't really a port at all, but
4442 * instead is just a raw memory object.
4443 */
4444 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4445 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4446 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4447 }
4448
4449 object = memory_object_to_vm_object((memory_object_t)port);
4450 if (object == VM_OBJECT_NULL) {
4451 return KERN_INVALID_OBJECT;
4452 }
4453 vm_object_reference(object);
4454
4455 /* wait for object (if any) to be ready */
4456 if (object != VM_OBJECT_NULL) {
4457 if (object == kernel_object) {
4458 printf("Warning: Attempt to map kernel object"
4459 " by a non-private kernel entity\n");
4460 return KERN_INVALID_OBJECT;
4461 }
4462 if (!object->pager_ready) {
4463 vm_object_lock(object);
4464
4465 while (!object->pager_ready) {
4466 vm_object_wait(object,
4467 VM_OBJECT_EVENT_PAGER_READY,
4468 THREAD_UNINT);
4469 vm_object_lock(object);
4470 }
4471 vm_object_unlock(object);
4472 }
4473 }
4474 } else {
4475 return KERN_INVALID_OBJECT;
4476 }
4477
4478 if (object != VM_OBJECT_NULL &&
4479 object->named &&
4480 object->pager != MEMORY_OBJECT_NULL &&
4481 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4482 memory_object_t pager;
4483 vm_prot_t pager_prot;
4484 kern_return_t kr;
4485
4486 /*
4487 * For "named" VM objects, let the pager know that the
4488 * memory object is being mapped. Some pagers need to keep
4489 * track of this, to know when they can reclaim the memory
4490 * object, for example.
4491 * VM calls memory_object_map() for each mapping (specifying
4492 * the protection of each mapping) and calls
4493 * memory_object_last_unmap() when all the mappings are gone.
4494 */
4495 pager_prot = max_protection;
4496 if (copy) {
4497 /*
4498 * Copy-On-Write mapping: won't modify the
4499 * memory object.
4500 */
4501 pager_prot &= ~VM_PROT_WRITE;
4502 }
4503 vm_object_lock(object);
4504 pager = object->pager;
4505 if (object->named &&
4506 pager != MEMORY_OBJECT_NULL &&
4507 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4508 assert(object->pager_ready);
4509 vm_object_mapping_wait(object, THREAD_UNINT);
4510 vm_object_mapping_begin(object);
4511 vm_object_unlock(object);
4512
4513 kr = memory_object_map(pager, pager_prot);
4514 assert(kr == KERN_SUCCESS);
4515
4516 vm_object_lock(object);
4517 vm_object_mapping_end(object);
4518 }
4519 vm_object_unlock(object);
4520 }
4521
4522 /*
4523 * Perform the copy if requested
4524 */
4525
4526 if (copy) {
4527 vm_object_t new_object;
4528 vm_object_offset_t new_offset;
4529
4530 result = vm_object_copy_strategically(object, offset,
4531 map_size,
4532 &new_object, &new_offset,
4533 &copy);
4534
4535
4536 if (result == KERN_MEMORY_RESTART_COPY) {
4537 boolean_t success;
4538 boolean_t src_needs_copy;
4539
4540 /*
4541 * XXX
4542 * We currently ignore src_needs_copy.
4543 * This really is the issue of how to make
4544 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4545 * non-kernel users to use. Solution forthcoming.
4546 * In the meantime, since we don't allow non-kernel
4547 * memory managers to specify symmetric copy,
4548 * we won't run into problems here.
4549 */
4550 new_object = object;
4551 new_offset = offset;
4552 success = vm_object_copy_quickly(&new_object,
4553 new_offset,
4554 map_size,
4555 &src_needs_copy,
4556 &copy);
4557 assert(success);
4558 result = KERN_SUCCESS;
4559 }
4560 /*
4561 * Throw away the reference to the
4562 * original object, as it won't be mapped.
4563 */
4564
4565 vm_object_deallocate(object);
4566
4567 if (result != KERN_SUCCESS) {
4568 return result;
4569 }
4570
4571 object = new_object;
4572 offset = new_offset;
4573 }
4574
4575 /*
4576 * If non-kernel users want to try to prefault pages, the mapping and prefault
4577 * needs to be atomic.
4578 */
4579 kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4580 vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4581
4582 #if __arm64__
4583 if (fourk) {
4584 /* map this object in a "4K" pager */
4585 result = vm_map_enter_fourk(target_map,
4586 &map_addr,
4587 map_size,
4588 (vm_map_offset_t) mask,
4589 flags,
4590 vmk_flags,
4591 tag,
4592 object,
4593 offset,
4594 copy,
4595 cur_protection,
4596 max_protection,
4597 inheritance);
4598 } else
4599 #endif /* __arm64__ */
4600 {
4601 result = vm_map_enter(target_map,
4602 &map_addr, map_size,
4603 (vm_map_offset_t)mask,
4604 flags,
4605 vmk_flags,
4606 tag,
4607 object, offset,
4608 copy,
4609 cur_protection, max_protection,
4610 inheritance);
4611 }
4612 if (result != KERN_SUCCESS) {
4613 vm_object_deallocate(object);
4614 }
4615
4616 /*
4617 * Try to prefault, and do not forget to release the vm map lock.
4618 */
4619 if (result == KERN_SUCCESS && try_prefault) {
4620 mach_vm_address_t va = map_addr;
4621 kern_return_t kr = KERN_SUCCESS;
4622 unsigned int i = 0;
4623 int pmap_options;
4624
4625 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4626 if (object->internal) {
4627 pmap_options |= PMAP_OPTIONS_INTERNAL;
4628 }
4629
4630 for (i = 0; i < page_list_count; ++i) {
4631 if (!UPL_VALID_PAGE(page_list, i)) {
4632 if (kernel_prefault) {
4633 assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4634 result = KERN_MEMORY_ERROR;
4635 break;
4636 }
4637 } else {
4638 /*
4639 * If this function call failed, we should stop
4640 * trying to optimize, other calls are likely
4641 * going to fail too.
4642 *
4643 * We are not gonna report an error for such
4644 * failure though. That's an optimization, not
4645 * something critical.
4646 */
4647 kr = pmap_enter_options(target_map->pmap,
4648 va, UPL_PHYS_PAGE(page_list, i),
4649 cur_protection, VM_PROT_NONE,
4650 0, TRUE, pmap_options, NULL);
4651 if (kr != KERN_SUCCESS) {
4652 OSIncrementAtomic64(&vm_prefault_nb_bailout);
4653 if (kernel_prefault) {
4654 result = kr;
4655 }
4656 break;
4657 }
4658 OSIncrementAtomic64(&vm_prefault_nb_pages);
4659 }
4660
4661 /* Next virtual address */
4662 va += PAGE_SIZE;
4663 }
4664 if (vmk_flags.vmkf_keep_map_locked) {
4665 vm_map_unlock(target_map);
4666 }
4667 }
4668
4669 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4670 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4671 *address = map_addr + offset_in_mapping;
4672 } else {
4673 *address = map_addr;
4674 }
4675 return result;
4676 }
4677
4678 kern_return_t
4679 vm_map_enter_mem_object(
4680 vm_map_t target_map,
4681 vm_map_offset_t *address,
4682 vm_map_size_t initial_size,
4683 vm_map_offset_t mask,
4684 int flags,
4685 vm_map_kernel_flags_t vmk_flags,
4686 vm_tag_t tag,
4687 ipc_port_t port,
4688 vm_object_offset_t offset,
4689 boolean_t copy,
4690 vm_prot_t cur_protection,
4691 vm_prot_t max_protection,
4692 vm_inherit_t inheritance)
4693 {
4694 kern_return_t ret;
4695
4696 ret = vm_map_enter_mem_object_helper(target_map,
4697 address,
4698 initial_size,
4699 mask,
4700 flags,
4701 vmk_flags,
4702 tag,
4703 port,
4704 offset,
4705 copy,
4706 cur_protection,
4707 max_protection,
4708 inheritance,
4709 NULL,
4710 0);
4711
4712 #if KASAN
4713 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4714 kasan_notify_address(*address, initial_size);
4715 }
4716 #endif
4717
4718 return ret;
4719 }
4720
4721 kern_return_t
4722 vm_map_enter_mem_object_prefault(
4723 vm_map_t target_map,
4724 vm_map_offset_t *address,
4725 vm_map_size_t initial_size,
4726 vm_map_offset_t mask,
4727 int flags,
4728 vm_map_kernel_flags_t vmk_flags,
4729 vm_tag_t tag,
4730 ipc_port_t port,
4731 vm_object_offset_t offset,
4732 vm_prot_t cur_protection,
4733 vm_prot_t max_protection,
4734 upl_page_list_ptr_t page_list,
4735 unsigned int page_list_count)
4736 {
4737 kern_return_t ret;
4738
4739 ret = vm_map_enter_mem_object_helper(target_map,
4740 address,
4741 initial_size,
4742 mask,
4743 flags,
4744 vmk_flags,
4745 tag,
4746 port,
4747 offset,
4748 FALSE,
4749 cur_protection,
4750 max_protection,
4751 VM_INHERIT_DEFAULT,
4752 page_list,
4753 page_list_count);
4754
4755 #if KASAN
4756 if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4757 kasan_notify_address(*address, initial_size);
4758 }
4759 #endif
4760
4761 return ret;
4762 }
4763
4764
4765 kern_return_t
4766 vm_map_enter_mem_object_control(
4767 vm_map_t target_map,
4768 vm_map_offset_t *address,
4769 vm_map_size_t initial_size,
4770 vm_map_offset_t mask,
4771 int flags,
4772 vm_map_kernel_flags_t vmk_flags,
4773 vm_tag_t tag,
4774 memory_object_control_t control,
4775 vm_object_offset_t offset,
4776 boolean_t copy,
4777 vm_prot_t cur_protection,
4778 vm_prot_t max_protection,
4779 vm_inherit_t inheritance)
4780 {
4781 vm_map_address_t map_addr;
4782 vm_map_size_t map_size;
4783 vm_object_t object;
4784 vm_object_size_t size;
4785 kern_return_t result;
4786 memory_object_t pager;
4787 vm_prot_t pager_prot;
4788 kern_return_t kr;
4789 #if __arm64__
4790 boolean_t fourk = vmk_flags.vmkf_fourk;
4791 #endif /* __arm64__ */
4792
4793 /*
4794 * Check arguments for validity
4795 */
4796 if ((target_map == VM_MAP_NULL) ||
4797 (cur_protection & ~VM_PROT_ALL) ||
4798 (max_protection & ~VM_PROT_ALL) ||
4799 (inheritance > VM_INHERIT_LAST_VALID) ||
4800 initial_size == 0) {
4801 return KERN_INVALID_ARGUMENT;
4802 }
4803
4804 #if __arm64__
4805 if (fourk) {
4806 map_addr = vm_map_trunc_page(*address,
4807 FOURK_PAGE_MASK);
4808 map_size = vm_map_round_page(initial_size,
4809 FOURK_PAGE_MASK);
4810 } else
4811 #endif /* __arm64__ */
4812 {
4813 map_addr = vm_map_trunc_page(*address,
4814 VM_MAP_PAGE_MASK(target_map));
4815 map_size = vm_map_round_page(initial_size,
4816 VM_MAP_PAGE_MASK(target_map));
4817 }
4818 size = vm_object_round_page(initial_size);
4819
4820 object = memory_object_control_to_vm_object(control);
4821
4822 if (object == VM_OBJECT_NULL) {
4823 return KERN_INVALID_OBJECT;
4824 }
4825
4826 if (object == kernel_object) {
4827 printf("Warning: Attempt to map kernel object"
4828 " by a non-private kernel entity\n");
4829 return KERN_INVALID_OBJECT;
4830 }
4831
4832 vm_object_lock(object);
4833 object->ref_count++;
4834 vm_object_res_reference(object);
4835
4836 /*
4837 * For "named" VM objects, let the pager know that the
4838 * memory object is being mapped. Some pagers need to keep
4839 * track of this, to know when they can reclaim the memory
4840 * object, for example.
4841 * VM calls memory_object_map() for each mapping (specifying
4842 * the protection of each mapping) and calls
4843 * memory_object_last_unmap() when all the mappings are gone.
4844 */
4845 pager_prot = max_protection;
4846 if (copy) {
4847 pager_prot &= ~VM_PROT_WRITE;
4848 }
4849 pager = object->pager;
4850 if (object->named &&
4851 pager != MEMORY_OBJECT_NULL &&
4852 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4853 assert(object->pager_ready);
4854 vm_object_mapping_wait(object, THREAD_UNINT);
4855 vm_object_mapping_begin(object);
4856 vm_object_unlock(object);
4857
4858 kr = memory_object_map(pager, pager_prot);
4859 assert(kr == KERN_SUCCESS);
4860
4861 vm_object_lock(object);
4862 vm_object_mapping_end(object);
4863 }
4864 vm_object_unlock(object);
4865
4866 /*
4867 * Perform the copy if requested
4868 */
4869
4870 if (copy) {
4871 vm_object_t new_object;
4872 vm_object_offset_t new_offset;
4873
4874 result = vm_object_copy_strategically(object, offset, size,
4875 &new_object, &new_offset,
4876 &copy);
4877
4878
4879 if (result == KERN_MEMORY_RESTART_COPY) {
4880 boolean_t success;
4881 boolean_t src_needs_copy;
4882
4883 /*
4884 * XXX
4885 * We currently ignore src_needs_copy.
4886 * This really is the issue of how to make
4887 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4888 * non-kernel users to use. Solution forthcoming.
4889 * In the meantime, since we don't allow non-kernel
4890 * memory managers to specify symmetric copy,
4891 * we won't run into problems here.
4892 */
4893 new_object = object;
4894 new_offset = offset;
4895 success = vm_object_copy_quickly(&new_object,
4896 new_offset, size,
4897 &src_needs_copy,
4898 &copy);
4899 assert(success);
4900 result = KERN_SUCCESS;
4901 }
4902 /*
4903 * Throw away the reference to the
4904 * original object, as it won't be mapped.
4905 */
4906
4907 vm_object_deallocate(object);
4908
4909 if (result != KERN_SUCCESS) {
4910 return result;
4911 }
4912
4913 object = new_object;
4914 offset = new_offset;
4915 }
4916
4917 #if __arm64__
4918 if (fourk) {
4919 result = vm_map_enter_fourk(target_map,
4920 &map_addr,
4921 map_size,
4922 (vm_map_offset_t)mask,
4923 flags,
4924 vmk_flags,
4925 tag,
4926 object, offset,
4927 copy,
4928 cur_protection, max_protection,
4929 inheritance);
4930 } else
4931 #endif /* __arm64__ */
4932 {
4933 result = vm_map_enter(target_map,
4934 &map_addr, map_size,
4935 (vm_map_offset_t)mask,
4936 flags,
4937 vmk_flags,
4938 tag,
4939 object, offset,
4940 copy,
4941 cur_protection, max_protection,
4942 inheritance);
4943 }
4944 if (result != KERN_SUCCESS) {
4945 vm_object_deallocate(object);
4946 }
4947 *address = map_addr;
4948
4949 return result;
4950 }
4951
4952
4953 #if VM_CPM
4954
4955 #ifdef MACH_ASSERT
4956 extern pmap_paddr_t avail_start, avail_end;
4957 #endif
4958
4959 /*
4960 * Allocate memory in the specified map, with the caveat that
4961 * the memory is physically contiguous. This call may fail
4962 * if the system can't find sufficient contiguous memory.
4963 * This call may cause or lead to heart-stopping amounts of
4964 * paging activity.
4965 *
4966 * Memory obtained from this call should be freed in the
4967 * normal way, viz., via vm_deallocate.
4968 */
4969 kern_return_t
4970 vm_map_enter_cpm(
4971 vm_map_t map,
4972 vm_map_offset_t *addr,
4973 vm_map_size_t size,
4974 int flags)
4975 {
4976 vm_object_t cpm_obj;
4977 pmap_t pmap;
4978 vm_page_t m, pages;
4979 kern_return_t kr;
4980 vm_map_offset_t va, start, end, offset;
4981 #if MACH_ASSERT
4982 vm_map_offset_t prev_addr = 0;
4983 #endif /* MACH_ASSERT */
4984
4985 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4986 vm_tag_t tag;
4987
4988 VM_GET_FLAGS_ALIAS(flags, tag);
4989
4990 if (size == 0) {
4991 *addr = 0;
4992 return KERN_SUCCESS;
4993 }
4994 if (anywhere) {
4995 *addr = vm_map_min(map);
4996 } else {
4997 *addr = vm_map_trunc_page(*addr,
4998 VM_MAP_PAGE_MASK(map));
4999 }
5000 size = vm_map_round_page(size,
5001 VM_MAP_PAGE_MASK(map));
5002
5003 /*
5004 * LP64todo - cpm_allocate should probably allow
5005 * allocations of >4GB, but not with the current
5006 * algorithm, so just cast down the size for now.
5007 */
5008 if (size > VM_MAX_ADDRESS) {
5009 return KERN_RESOURCE_SHORTAGE;
5010 }
5011 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5012 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5013 return kr;
5014 }
5015
5016 cpm_obj = vm_object_allocate((vm_object_size_t)size);
5017 assert(cpm_obj != VM_OBJECT_NULL);
5018 assert(cpm_obj->internal);
5019 assert(cpm_obj->vo_size == (vm_object_size_t)size);
5020 assert(cpm_obj->can_persist == FALSE);
5021 assert(cpm_obj->pager_created == FALSE);
5022 assert(cpm_obj->pageout == FALSE);
5023 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5024
5025 /*
5026 * Insert pages into object.
5027 */
5028
5029 vm_object_lock(cpm_obj);
5030 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5031 m = pages;
5032 pages = NEXT_PAGE(m);
5033 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5034
5035 assert(!m->vmp_gobbled);
5036 assert(!m->vmp_wanted);
5037 assert(!m->vmp_pageout);
5038 assert(!m->vmp_tabled);
5039 assert(VM_PAGE_WIRED(m));
5040 assert(m->vmp_busy);
5041 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5042
5043 m->vmp_busy = FALSE;
5044 vm_page_insert(m, cpm_obj, offset);
5045 }
5046 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5047 vm_object_unlock(cpm_obj);
5048
5049 /*
5050 * Hang onto a reference on the object in case a
5051 * multi-threaded application for some reason decides
5052 * to deallocate the portion of the address space into
5053 * which we will insert this object.
5054 *
5055 * Unfortunately, we must insert the object now before
5056 * we can talk to the pmap module about which addresses
5057 * must be wired down. Hence, the race with a multi-
5058 * threaded app.
5059 */
5060 vm_object_reference(cpm_obj);
5061
5062 /*
5063 * Insert object into map.
5064 */
5065
5066 kr = vm_map_enter(
5067 map,
5068 addr,
5069 size,
5070 (vm_map_offset_t)0,
5071 flags,
5072 VM_MAP_KERNEL_FLAGS_NONE,
5073 cpm_obj,
5074 (vm_object_offset_t)0,
5075 FALSE,
5076 VM_PROT_ALL,
5077 VM_PROT_ALL,
5078 VM_INHERIT_DEFAULT);
5079
5080 if (kr != KERN_SUCCESS) {
5081 /*
5082 * A CPM object doesn't have can_persist set,
5083 * so all we have to do is deallocate it to
5084 * free up these pages.
5085 */
5086 assert(cpm_obj->pager_created == FALSE);
5087 assert(cpm_obj->can_persist == FALSE);
5088 assert(cpm_obj->pageout == FALSE);
5089 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5090 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5091 vm_object_deallocate(cpm_obj); /* kill creation ref */
5092 }
5093
5094 /*
5095 * Inform the physical mapping system that the
5096 * range of addresses may not fault, so that
5097 * page tables and such can be locked down as well.
5098 */
5099 start = *addr;
5100 end = start + size;
5101 pmap = vm_map_pmap(map);
5102 pmap_pageable(pmap, start, end, FALSE);
5103
5104 /*
5105 * Enter each page into the pmap, to avoid faults.
5106 * Note that this loop could be coded more efficiently,
5107 * if the need arose, rather than looking up each page
5108 * again.
5109 */
5110 for (offset = 0, va = start; offset < size;
5111 va += PAGE_SIZE, offset += PAGE_SIZE) {
5112 int type_of_fault;
5113
5114 vm_object_lock(cpm_obj);
5115 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5116 assert(m != VM_PAGE_NULL);
5117
5118 vm_page_zero_fill(m);
5119
5120 type_of_fault = DBG_ZERO_FILL_FAULT;
5121
5122 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
5123 VM_PAGE_WIRED(m),
5124 FALSE, /* change_wiring */
5125 VM_KERN_MEMORY_NONE, /* tag - not wiring */
5126 FALSE, /* no_cache */
5127 FALSE, /* cs_bypass */
5128 0, /* user_tag */
5129 0, /* pmap_options */
5130 NULL, /* need_retry */
5131 &type_of_fault);
5132
5133 vm_object_unlock(cpm_obj);
5134 }
5135
5136 #if MACH_ASSERT
5137 /*
5138 * Verify ordering in address space.
5139 */
5140 for (offset = 0; offset < size; offset += PAGE_SIZE) {
5141 vm_object_lock(cpm_obj);
5142 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5143 vm_object_unlock(cpm_obj);
5144 if (m == VM_PAGE_NULL) {
5145 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
5146 cpm_obj, (uint64_t)offset);
5147 }
5148 assert(m->vmp_tabled);
5149 assert(!m->vmp_busy);
5150 assert(!m->vmp_wanted);
5151 assert(!m->vmp_fictitious);
5152 assert(!m->vmp_private);
5153 assert(!m->vmp_absent);
5154 assert(!m->vmp_error);
5155 assert(!m->vmp_cleaning);
5156 assert(!m->vmp_laundry);
5157 assert(!m->vmp_precious);
5158 assert(!m->vmp_clustered);
5159 if (offset != 0) {
5160 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5161 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5162 (uint64_t)start, (uint64_t)end, (uint64_t)va);
5163 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5164 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5165 panic("vm_allocate_cpm: pages not contig!");
5166 }
5167 }
5168 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5169 }
5170 #endif /* MACH_ASSERT */
5171
5172 vm_object_deallocate(cpm_obj); /* kill extra ref */
5173
5174 return kr;
5175 }
5176
5177
5178 #else /* VM_CPM */
5179
5180 /*
5181 * Interface is defined in all cases, but unless the kernel
5182 * is built explicitly for this option, the interface does
5183 * nothing.
5184 */
5185
5186 kern_return_t
5187 vm_map_enter_cpm(
5188 __unused vm_map_t map,
5189 __unused vm_map_offset_t *addr,
5190 __unused vm_map_size_t size,
5191 __unused int flags)
5192 {
5193 return KERN_FAILURE;
5194 }
5195 #endif /* VM_CPM */
5196
5197 /* Not used without nested pmaps */
5198 #ifndef NO_NESTED_PMAP
5199 /*
5200 * Clip and unnest a portion of a nested submap mapping.
5201 */
5202
5203
5204 static void
5205 vm_map_clip_unnest(
5206 vm_map_t map,
5207 vm_map_entry_t entry,
5208 vm_map_offset_t start_unnest,
5209 vm_map_offset_t end_unnest)
5210 {
5211 vm_map_offset_t old_start_unnest = start_unnest;
5212 vm_map_offset_t old_end_unnest = end_unnest;
5213
5214 assert(entry->is_sub_map);
5215 assert(VME_SUBMAP(entry) != NULL);
5216 assert(entry->use_pmap);
5217
5218 /*
5219 * Query the platform for the optimal unnest range.
5220 * DRK: There's some duplication of effort here, since
5221 * callers may have adjusted the range to some extent. This
5222 * routine was introduced to support 1GiB subtree nesting
5223 * for x86 platforms, which can also nest on 2MiB boundaries
5224 * depending on size/alignment.
5225 */
5226 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5227 assert(VME_SUBMAP(entry)->is_nested_map);
5228 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5229 log_unnest_badness(map,
5230 old_start_unnest,
5231 old_end_unnest,
5232 VME_SUBMAP(entry)->is_nested_map,
5233 (entry->vme_start +
5234 VME_SUBMAP(entry)->lowest_unnestable_start -
5235 VME_OFFSET(entry)));
5236 }
5237
5238 if (entry->vme_start > start_unnest ||
5239 entry->vme_end < end_unnest) {
5240 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5241 "bad nested entry: start=0x%llx end=0x%llx\n",
5242 (long long)start_unnest, (long long)end_unnest,
5243 (long long)entry->vme_start, (long long)entry->vme_end);
5244 }
5245
5246 if (start_unnest > entry->vme_start) {
5247 _vm_map_clip_start(&map->hdr,
5248 entry,
5249 start_unnest);
5250 if (map->holelistenabled) {
5251 vm_map_store_update_first_free(map, NULL, FALSE);
5252 } else {
5253 vm_map_store_update_first_free(map, map->first_free, FALSE);
5254 }
5255 }
5256 if (entry->vme_end > end_unnest) {
5257 _vm_map_clip_end(&map->hdr,
5258 entry,
5259 end_unnest);
5260 if (map->holelistenabled) {
5261 vm_map_store_update_first_free(map, NULL, FALSE);
5262 } else {
5263 vm_map_store_update_first_free(map, map->first_free, FALSE);
5264 }
5265 }
5266
5267 pmap_unnest(map->pmap,
5268 entry->vme_start,
5269 entry->vme_end - entry->vme_start);
5270 if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5271 /* clean up parent map/maps */
5272 vm_map_submap_pmap_clean(
5273 map, entry->vme_start,
5274 entry->vme_end,
5275 VME_SUBMAP(entry),
5276 VME_OFFSET(entry));
5277 }
5278 entry->use_pmap = FALSE;
5279 if ((map->pmap != kernel_pmap) &&
5280 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5281 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5282 }
5283 }
5284 #endif /* NO_NESTED_PMAP */
5285
5286 /*
5287 * vm_map_clip_start: [ internal use only ]
5288 *
5289 * Asserts that the given entry begins at or after
5290 * the specified address; if necessary,
5291 * it splits the entry into two.
5292 */
5293 void
5294 vm_map_clip_start(
5295 vm_map_t map,
5296 vm_map_entry_t entry,
5297 vm_map_offset_t startaddr)
5298 {
5299 #ifndef NO_NESTED_PMAP
5300 if (entry->is_sub_map &&
5301 entry->use_pmap &&
5302 startaddr >= entry->vme_start) {
5303 vm_map_offset_t start_unnest, end_unnest;
5304
5305 /*
5306 * Make sure "startaddr" is no longer in a nested range
5307 * before we clip. Unnest only the minimum range the platform
5308 * can handle.
5309 * vm_map_clip_unnest may perform additional adjustments to
5310 * the unnest range.
5311 */
5312 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5313 end_unnest = start_unnest + pmap_nesting_size_min;
5314 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5315 }
5316 #endif /* NO_NESTED_PMAP */
5317 if (startaddr > entry->vme_start) {
5318 if (VME_OBJECT(entry) &&
5319 !entry->is_sub_map &&
5320 VME_OBJECT(entry)->phys_contiguous) {
5321 pmap_remove(map->pmap,
5322 (addr64_t)(entry->vme_start),
5323 (addr64_t)(entry->vme_end));
5324 }
5325 if (entry->vme_atomic) {
5326 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5327 }
5328
5329 DTRACE_VM5(
5330 vm_map_clip_start,
5331 vm_map_t, map,
5332 vm_map_offset_t, entry->vme_start,
5333 vm_map_offset_t, entry->vme_end,
5334 vm_map_offset_t, startaddr,
5335 int, VME_ALIAS(entry));
5336
5337 _vm_map_clip_start(&map->hdr, entry, startaddr);
5338 if (map->holelistenabled) {
5339 vm_map_store_update_first_free(map, NULL, FALSE);
5340 } else {
5341 vm_map_store_update_first_free(map, map->first_free, FALSE);
5342 }
5343 }
5344 }
5345
5346
5347 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5348 MACRO_BEGIN \
5349 if ((startaddr) > (entry)->vme_start) \
5350 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5351 MACRO_END
5352
5353 /*
5354 * This routine is called only when it is known that
5355 * the entry must be split.
5356 */
5357 static void
5358 _vm_map_clip_start(
5359 struct vm_map_header *map_header,
5360 vm_map_entry_t entry,
5361 vm_map_offset_t start)
5362 {
5363 vm_map_entry_t new_entry;
5364
5365 /*
5366 * Split off the front portion --
5367 * note that we must insert the new
5368 * entry BEFORE this one, so that
5369 * this entry has the specified starting
5370 * address.
5371 */
5372
5373 if (entry->map_aligned) {
5374 assert(VM_MAP_PAGE_ALIGNED(start,
5375 VM_MAP_HDR_PAGE_MASK(map_header)));
5376 }
5377
5378 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5379 vm_map_entry_copy_full(new_entry, entry);
5380
5381 new_entry->vme_end = start;
5382 assert(new_entry->vme_start < new_entry->vme_end);
5383 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5384 assert(start < entry->vme_end);
5385 entry->vme_start = start;
5386
5387 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5388
5389 if (entry->is_sub_map) {
5390 vm_map_reference(VME_SUBMAP(new_entry));
5391 } else {
5392 vm_object_reference(VME_OBJECT(new_entry));
5393 }
5394 }
5395
5396
5397 /*
5398 * vm_map_clip_end: [ internal use only ]
5399 *
5400 * Asserts that the given entry ends at or before
5401 * the specified address; if necessary,
5402 * it splits the entry into two.
5403 */
5404 void
5405 vm_map_clip_end(
5406 vm_map_t map,
5407 vm_map_entry_t entry,
5408 vm_map_offset_t endaddr)
5409 {
5410 if (endaddr > entry->vme_end) {
5411 /*
5412 * Within the scope of this clipping, limit "endaddr" to
5413 * the end of this map entry...
5414 */
5415 endaddr = entry->vme_end;
5416 }
5417 #ifndef NO_NESTED_PMAP
5418 if (entry->is_sub_map && entry->use_pmap) {
5419 vm_map_offset_t start_unnest, end_unnest;
5420
5421 /*
5422 * Make sure the range between the start of this entry and
5423 * the new "endaddr" is no longer nested before we clip.
5424 * Unnest only the minimum range the platform can handle.
5425 * vm_map_clip_unnest may perform additional adjustments to
5426 * the unnest range.
5427 */
5428 start_unnest = entry->vme_start;
5429 end_unnest =
5430 (endaddr + pmap_nesting_size_min - 1) &
5431 ~(pmap_nesting_size_min - 1);
5432 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5433 }
5434 #endif /* NO_NESTED_PMAP */
5435 if (endaddr < entry->vme_end) {
5436 if (VME_OBJECT(entry) &&
5437 !entry->is_sub_map &&
5438 VME_OBJECT(entry)->phys_contiguous) {
5439 pmap_remove(map->pmap,
5440 (addr64_t)(entry->vme_start),
5441 (addr64_t)(entry->vme_end));
5442 }
5443 if (entry->vme_atomic) {
5444 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5445 }
5446 DTRACE_VM5(
5447 vm_map_clip_end,
5448 vm_map_t, map,
5449 vm_map_offset_t, entry->vme_start,
5450 vm_map_offset_t, entry->vme_end,
5451 vm_map_offset_t, endaddr,
5452 int, VME_ALIAS(entry));
5453
5454 _vm_map_clip_end(&map->hdr, entry, endaddr);
5455 if (map->holelistenabled) {
5456 vm_map_store_update_first_free(map, NULL, FALSE);
5457 } else {
5458 vm_map_store_update_first_free(map, map->first_free, FALSE);
5459 }
5460 }
5461 }
5462
5463
5464 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5465 MACRO_BEGIN \
5466 if ((endaddr) < (entry)->vme_end) \
5467 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5468 MACRO_END
5469
5470 /*
5471 * This routine is called only when it is known that
5472 * the entry must be split.
5473 */
5474 static void
5475 _vm_map_clip_end(
5476 struct vm_map_header *map_header,
5477 vm_map_entry_t entry,
5478 vm_map_offset_t end)
5479 {
5480 vm_map_entry_t new_entry;
5481
5482 /*
5483 * Create a new entry and insert it
5484 * AFTER the specified entry
5485 */
5486
5487 if (entry->map_aligned) {
5488 assert(VM_MAP_PAGE_ALIGNED(end,
5489 VM_MAP_HDR_PAGE_MASK(map_header)));
5490 }
5491
5492 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5493 vm_map_entry_copy_full(new_entry, entry);
5494
5495 assert(entry->vme_start < end);
5496 new_entry->vme_start = entry->vme_end = end;
5497 VME_OFFSET_SET(new_entry,
5498 VME_OFFSET(new_entry) + (end - entry->vme_start));
5499 assert(new_entry->vme_start < new_entry->vme_end);
5500
5501 _vm_map_store_entry_link(map_header, entry, new_entry);
5502
5503 if (entry->is_sub_map) {
5504 vm_map_reference(VME_SUBMAP(new_entry));
5505 } else {
5506 vm_object_reference(VME_OBJECT(new_entry));
5507 }
5508 }
5509
5510
5511 /*
5512 * VM_MAP_RANGE_CHECK: [ internal use only ]
5513 *
5514 * Asserts that the starting and ending region
5515 * addresses fall within the valid range of the map.
5516 */
5517 #define VM_MAP_RANGE_CHECK(map, start, end) \
5518 MACRO_BEGIN \
5519 if (start < vm_map_min(map)) \
5520 start = vm_map_min(map); \
5521 if (end > vm_map_max(map)) \
5522 end = vm_map_max(map); \
5523 if (start > end) \
5524 start = end; \
5525 MACRO_END
5526
5527 /*
5528 * vm_map_range_check: [ internal use only ]
5529 *
5530 * Check that the region defined by the specified start and
5531 * end addresses are wholly contained within a single map
5532 * entry or set of adjacent map entries of the spacified map,
5533 * i.e. the specified region contains no unmapped space.
5534 * If any or all of the region is unmapped, FALSE is returned.
5535 * Otherwise, TRUE is returned and if the output argument 'entry'
5536 * is not NULL it points to the map entry containing the start
5537 * of the region.
5538 *
5539 * The map is locked for reading on entry and is left locked.
5540 */
5541 static boolean_t
5542 vm_map_range_check(
5543 vm_map_t map,
5544 vm_map_offset_t start,
5545 vm_map_offset_t end,
5546 vm_map_entry_t *entry)
5547 {
5548 vm_map_entry_t cur;
5549 vm_map_offset_t prev;
5550
5551 /*
5552 * Basic sanity checks first
5553 */
5554 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5555 return FALSE;
5556 }
5557
5558 /*
5559 * Check first if the region starts within a valid
5560 * mapping for the map.
5561 */
5562 if (!vm_map_lookup_entry(map, start, &cur)) {
5563 return FALSE;
5564 }
5565
5566 /*
5567 * Optimize for the case that the region is contained
5568 * in a single map entry.
5569 */
5570 if (entry != (vm_map_entry_t *) NULL) {
5571 *entry = cur;
5572 }
5573 if (end <= cur->vme_end) {
5574 return TRUE;
5575 }
5576
5577 /*
5578 * If the region is not wholly contained within a
5579 * single entry, walk the entries looking for holes.
5580 */
5581 prev = cur->vme_end;
5582 cur = cur->vme_next;
5583 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5584 if (end <= cur->vme_end) {
5585 return TRUE;
5586 }
5587 prev = cur->vme_end;
5588 cur = cur->vme_next;
5589 }
5590 return FALSE;
5591 }
5592
5593 /*
5594 * vm_map_submap: [ kernel use only ]
5595 *
5596 * Mark the given range as handled by a subordinate map.
5597 *
5598 * This range must have been created with vm_map_find using
5599 * the vm_submap_object, and no other operations may have been
5600 * performed on this range prior to calling vm_map_submap.
5601 *
5602 * Only a limited number of operations can be performed
5603 * within this rage after calling vm_map_submap:
5604 * vm_fault
5605 * [Don't try vm_map_copyin!]
5606 *
5607 * To remove a submapping, one must first remove the
5608 * range from the superior map, and then destroy the
5609 * submap (if desired). [Better yet, don't try it.]
5610 */
5611 kern_return_t
5612 vm_map_submap(
5613 vm_map_t map,
5614 vm_map_offset_t start,
5615 vm_map_offset_t end,
5616 vm_map_t submap,
5617 vm_map_offset_t offset,
5618 #ifdef NO_NESTED_PMAP
5619 __unused
5620 #endif /* NO_NESTED_PMAP */
5621 boolean_t use_pmap)
5622 {
5623 vm_map_entry_t entry;
5624 kern_return_t result = KERN_INVALID_ARGUMENT;
5625 vm_object_t object;
5626
5627 vm_map_lock(map);
5628
5629 if (!vm_map_lookup_entry(map, start, &entry)) {
5630 entry = entry->vme_next;
5631 }
5632
5633 if (entry == vm_map_to_entry(map) ||
5634 entry->is_sub_map) {
5635 vm_map_unlock(map);
5636 return KERN_INVALID_ARGUMENT;
5637 }
5638
5639 vm_map_clip_start(map, entry, start);
5640 vm_map_clip_end(map, entry, end);
5641
5642 if ((entry->vme_start == start) && (entry->vme_end == end) &&
5643 (!entry->is_sub_map) &&
5644 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5645 (object->resident_page_count == 0) &&
5646 (object->copy == VM_OBJECT_NULL) &&
5647 (object->shadow == VM_OBJECT_NULL) &&
5648 (!object->pager_created)) {
5649 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5650 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5651 vm_object_deallocate(object);
5652 entry->is_sub_map = TRUE;
5653 entry->use_pmap = FALSE;
5654 VME_SUBMAP_SET(entry, submap);
5655 vm_map_reference(submap);
5656 if (submap->mapped_in_other_pmaps == FALSE &&
5657 vm_map_pmap(submap) != PMAP_NULL &&
5658 vm_map_pmap(submap) != vm_map_pmap(map)) {
5659 /*
5660 * This submap is being mapped in a map
5661 * that uses a different pmap.
5662 * Set its "mapped_in_other_pmaps" flag
5663 * to indicate that we now need to
5664 * remove mappings from all pmaps rather
5665 * than just the submap's pmap.
5666 */
5667 submap->mapped_in_other_pmaps = TRUE;
5668 }
5669
5670 #ifndef NO_NESTED_PMAP
5671 if (use_pmap) {
5672 /* nest if platform code will allow */
5673 if (submap->pmap == NULL) {
5674 ledger_t ledger = map->pmap->ledger;
5675 submap->pmap = pmap_create_options(ledger,
5676 (vm_map_size_t) 0, 0);
5677 if (submap->pmap == PMAP_NULL) {
5678 vm_map_unlock(map);
5679 return KERN_NO_SPACE;
5680 }
5681 #if defined(__arm__) || defined(__arm64__)
5682 pmap_set_nested(submap->pmap);
5683 #endif
5684 }
5685 result = pmap_nest(map->pmap,
5686 (VME_SUBMAP(entry))->pmap,
5687 (addr64_t)start,
5688 (addr64_t)start,
5689 (uint64_t)(end - start));
5690 if (result) {
5691 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5692 }
5693 entry->use_pmap = TRUE;
5694 }
5695 #else /* NO_NESTED_PMAP */
5696 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5697 #endif /* NO_NESTED_PMAP */
5698 result = KERN_SUCCESS;
5699 }
5700 vm_map_unlock(map);
5701
5702 return result;
5703 }
5704
5705 /*
5706 * vm_map_protect:
5707 *
5708 * Sets the protection of the specified address
5709 * region in the target map. If "set_max" is
5710 * specified, the maximum protection is to be set;
5711 * otherwise, only the current protection is affected.
5712 */
5713 kern_return_t
5714 vm_map_protect(
5715 vm_map_t map,
5716 vm_map_offset_t start,
5717 vm_map_offset_t end,
5718 vm_prot_t new_prot,
5719 boolean_t set_max)
5720 {
5721 vm_map_entry_t current;
5722 vm_map_offset_t prev;
5723 vm_map_entry_t entry;
5724 vm_prot_t new_max;
5725 int pmap_options = 0;
5726 kern_return_t kr;
5727
5728 if (new_prot & VM_PROT_COPY) {
5729 vm_map_offset_t new_start;
5730 vm_prot_t cur_prot, max_prot;
5731 vm_map_kernel_flags_t kflags;
5732
5733 /* LP64todo - see below */
5734 if (start >= map->max_offset) {
5735 return KERN_INVALID_ADDRESS;
5736 }
5737
5738 #if VM_PROTECT_WX_FAIL
5739 if ((new_prot & VM_PROT_EXECUTE) &&
5740 map != kernel_map &&
5741 cs_process_enforcement(NULL)) {
5742 DTRACE_VM3(cs_wx,
5743 uint64_t, (uint64_t) start,
5744 uint64_t, (uint64_t) end,
5745 vm_prot_t, new_prot);
5746 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5747 proc_selfpid(),
5748 (current_task()->bsd_info
5749 ? proc_name_address(current_task()->bsd_info)
5750 : "?"),
5751 __FUNCTION__);
5752 return KERN_PROTECTION_FAILURE;
5753 }
5754 #endif /* VM_PROTECT_WX_FAIL */
5755
5756 /*
5757 * Let vm_map_remap_extract() know that it will need to:
5758 * + make a copy of the mapping
5759 * + add VM_PROT_WRITE to the max protections
5760 * + remove any protections that are no longer allowed from the
5761 * max protections (to avoid any WRITE/EXECUTE conflict, for
5762 * example).
5763 * Note that "max_prot" is an IN/OUT parameter only for this
5764 * specific (VM_PROT_COPY) case. It's usually an OUT parameter
5765 * only.
5766 */
5767 max_prot = new_prot & VM_PROT_ALL;
5768 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5769 kflags.vmkf_remap_prot_copy = TRUE;
5770 kflags.vmkf_overwrite_immutable = TRUE;
5771 new_start = start;
5772 kr = vm_map_remap(map,
5773 &new_start,
5774 end - start,
5775 0, /* mask */
5776 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5777 kflags,
5778 0,
5779 map,
5780 start,
5781 TRUE, /* copy-on-write remapping! */
5782 &cur_prot,
5783 &max_prot,
5784 VM_INHERIT_DEFAULT);
5785 if (kr != KERN_SUCCESS) {
5786 return kr;
5787 }
5788 new_prot &= ~VM_PROT_COPY;
5789 }
5790
5791 vm_map_lock(map);
5792
5793 /* LP64todo - remove this check when vm_map_commpage64()
5794 * no longer has to stuff in a map_entry for the commpage
5795 * above the map's max_offset.
5796 */
5797 if (start >= map->max_offset) {
5798 vm_map_unlock(map);
5799 return KERN_INVALID_ADDRESS;
5800 }
5801
5802 while (1) {
5803 /*
5804 * Lookup the entry. If it doesn't start in a valid
5805 * entry, return an error.
5806 */
5807 if (!vm_map_lookup_entry(map, start, &entry)) {
5808 vm_map_unlock(map);
5809 return KERN_INVALID_ADDRESS;
5810 }
5811
5812 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
5813 start = SUPERPAGE_ROUND_DOWN(start);
5814 continue;
5815 }
5816 break;
5817 }
5818 if (entry->superpage_size) {
5819 end = SUPERPAGE_ROUND_UP(end);
5820 }
5821
5822 /*
5823 * Make a first pass to check for protection and address
5824 * violations.
5825 */
5826
5827 current = entry;
5828 prev = current->vme_start;
5829 while ((current != vm_map_to_entry(map)) &&
5830 (current->vme_start < end)) {
5831 /*
5832 * If there is a hole, return an error.
5833 */
5834 if (current->vme_start != prev) {
5835 vm_map_unlock(map);
5836 return KERN_INVALID_ADDRESS;
5837 }
5838
5839 new_max = current->max_protection;
5840 if ((new_prot & new_max) != new_prot) {
5841 vm_map_unlock(map);
5842 return KERN_PROTECTION_FAILURE;
5843 }
5844
5845 if ((new_prot & VM_PROT_WRITE) &&
5846 (new_prot & VM_PROT_EXECUTE) &&
5847 #if !CONFIG_EMBEDDED
5848 map != kernel_map &&
5849 cs_process_enforcement(NULL) &&
5850 #endif /* !CONFIG_EMBEDDED */
5851 !(current->used_for_jit)) {
5852 DTRACE_VM3(cs_wx,
5853 uint64_t, (uint64_t) current->vme_start,
5854 uint64_t, (uint64_t) current->vme_end,
5855 vm_prot_t, new_prot);
5856 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
5857 proc_selfpid(),
5858 (current_task()->bsd_info
5859 ? proc_name_address(current_task()->bsd_info)
5860 : "?"),
5861 __FUNCTION__);
5862 new_prot &= ~VM_PROT_EXECUTE;
5863 #if VM_PROTECT_WX_FAIL
5864 vm_map_unlock(map);
5865 return KERN_PROTECTION_FAILURE;
5866 #endif /* VM_PROTECT_WX_FAIL */
5867 }
5868
5869 /*
5870 * If the task has requested executable lockdown,
5871 * deny both:
5872 * - adding executable protections OR
5873 * - adding write protections to an existing executable mapping.
5874 */
5875 if (map->map_disallow_new_exec == TRUE) {
5876 if ((new_prot & VM_PROT_EXECUTE) ||
5877 ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
5878 vm_map_unlock(map);
5879 return KERN_PROTECTION_FAILURE;
5880 }
5881 }
5882
5883 prev = current->vme_end;
5884 current = current->vme_next;
5885 }
5886
5887 #if __arm64__
5888 if (end > prev &&
5889 end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5890 vm_map_entry_t prev_entry;
5891
5892 prev_entry = current->vme_prev;
5893 if (prev_entry != vm_map_to_entry(map) &&
5894 !prev_entry->map_aligned &&
5895 (vm_map_round_page(prev_entry->vme_end,
5896 VM_MAP_PAGE_MASK(map))
5897 == end)) {
5898 /*
5899 * The last entry in our range is not "map-aligned"
5900 * but it would have reached all the way to "end"
5901 * if it had been map-aligned, so this is not really
5902 * a hole in the range and we can proceed.
5903 */
5904 prev = end;
5905 }
5906 }
5907 #endif /* __arm64__ */
5908
5909 if (end > prev) {
5910 vm_map_unlock(map);
5911 return KERN_INVALID_ADDRESS;
5912 }
5913
5914 /*
5915 * Go back and fix up protections.
5916 * Clip to start here if the range starts within
5917 * the entry.
5918 */
5919
5920 current = entry;
5921 if (current != vm_map_to_entry(map)) {
5922 /* clip and unnest if necessary */
5923 vm_map_clip_start(map, current, start);
5924 }
5925
5926 while ((current != vm_map_to_entry(map)) &&
5927 (current->vme_start < end)) {
5928 vm_prot_t old_prot;
5929
5930 vm_map_clip_end(map, current, end);
5931
5932 if (current->is_sub_map) {
5933 /* clipping did unnest if needed */
5934 assert(!current->use_pmap);
5935 }
5936
5937 old_prot = current->protection;
5938
5939 if (set_max) {
5940 current->max_protection = new_prot;
5941 current->protection = new_prot & old_prot;
5942 } else {
5943 current->protection = new_prot;
5944 }
5945
5946 /*
5947 * Update physical map if necessary.
5948 * If the request is to turn off write protection,
5949 * we won't do it for real (in pmap). This is because
5950 * it would cause copy-on-write to fail. We've already
5951 * set, the new protection in the map, so if a
5952 * write-protect fault occurred, it will be fixed up
5953 * properly, COW or not.
5954 */
5955 if (current->protection != old_prot) {
5956 /* Look one level in we support nested pmaps */
5957 /* from mapped submaps which are direct entries */
5958 /* in our map */
5959
5960 vm_prot_t prot;
5961
5962 prot = current->protection;
5963 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5964 prot &= ~VM_PROT_WRITE;
5965 } else {
5966 assert(!VME_OBJECT(current)->code_signed);
5967 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5968 }
5969
5970 if (override_nx(map, VME_ALIAS(current)) && prot) {
5971 prot |= VM_PROT_EXECUTE;
5972 }
5973
5974 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5975 if (!(old_prot & VM_PROT_EXECUTE) &&
5976 (prot & VM_PROT_EXECUTE) &&
5977 panic_on_unsigned_execute &&
5978 (proc_selfcsflags() & CS_KILL)) {
5979 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5980 }
5981 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5982
5983 if (pmap_has_prot_policy(prot)) {
5984 if (current->wired_count) {
5985 panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5986 map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5987 }
5988
5989 /* If the pmap layer cares about this
5990 * protection type, force a fault for
5991 * each page so that vm_fault will
5992 * repopulate the page with the full
5993 * set of protections.
5994 */
5995 /*
5996 * TODO: We don't seem to need this,
5997 * but this is due to an internal
5998 * implementation detail of
5999 * pmap_protect. Do we want to rely
6000 * on this?
6001 */
6002 prot = VM_PROT_NONE;
6003 }
6004
6005 if (current->is_sub_map && current->use_pmap) {
6006 pmap_protect(VME_SUBMAP(current)->pmap,
6007 current->vme_start,
6008 current->vme_end,
6009 prot);
6010 } else {
6011 if (prot & VM_PROT_WRITE) {
6012 if (VME_OBJECT(current) == compressor_object) {
6013 /*
6014 * For write requests on the
6015 * compressor, we wil ask the
6016 * pmap layer to prevent us from
6017 * taking a write fault when we
6018 * attempt to access the mapping
6019 * next.
6020 */
6021 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6022 }
6023 }
6024
6025 pmap_protect_options(map->pmap,
6026 current->vme_start,
6027 current->vme_end,
6028 prot,
6029 pmap_options,
6030 NULL);
6031 }
6032 }
6033 current = current->vme_next;
6034 }
6035
6036 current = entry;
6037 while ((current != vm_map_to_entry(map)) &&
6038 (current->vme_start <= end)) {
6039 vm_map_simplify_entry(map, current);
6040 current = current->vme_next;
6041 }
6042
6043 vm_map_unlock(map);
6044 return KERN_SUCCESS;
6045 }
6046
6047 /*
6048 * vm_map_inherit:
6049 *
6050 * Sets the inheritance of the specified address
6051 * range in the target map. Inheritance
6052 * affects how the map will be shared with
6053 * child maps at the time of vm_map_fork.
6054 */
6055 kern_return_t
6056 vm_map_inherit(
6057 vm_map_t map,
6058 vm_map_offset_t start,
6059 vm_map_offset_t end,
6060 vm_inherit_t new_inheritance)
6061 {
6062 vm_map_entry_t entry;
6063 vm_map_entry_t temp_entry;
6064
6065 vm_map_lock(map);
6066
6067 VM_MAP_RANGE_CHECK(map, start, end);
6068
6069 if (vm_map_lookup_entry(map, start, &temp_entry)) {
6070 entry = temp_entry;
6071 } else {
6072 temp_entry = temp_entry->vme_next;
6073 entry = temp_entry;
6074 }
6075
6076 /* first check entire range for submaps which can't support the */
6077 /* given inheritance. */
6078 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6079 if (entry->is_sub_map) {
6080 if (new_inheritance == VM_INHERIT_COPY) {
6081 vm_map_unlock(map);
6082 return KERN_INVALID_ARGUMENT;
6083 }
6084 }
6085
6086 entry = entry->vme_next;
6087 }
6088
6089 entry = temp_entry;
6090 if (entry != vm_map_to_entry(map)) {
6091 /* clip and unnest if necessary */
6092 vm_map_clip_start(map, entry, start);
6093 }
6094
6095 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6096 vm_map_clip_end(map, entry, end);
6097 if (entry->is_sub_map) {
6098 /* clip did unnest if needed */
6099 assert(!entry->use_pmap);
6100 }
6101
6102 entry->inheritance = new_inheritance;
6103
6104 entry = entry->vme_next;
6105 }
6106
6107 vm_map_unlock(map);
6108 return KERN_SUCCESS;
6109 }
6110
6111 /*
6112 * Update the accounting for the amount of wired memory in this map. If the user has
6113 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
6114 */
6115
6116 static kern_return_t
6117 add_wire_counts(
6118 vm_map_t map,
6119 vm_map_entry_t entry,
6120 boolean_t user_wire)
6121 {
6122 vm_map_size_t size;
6123
6124 if (user_wire) {
6125 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
6126
6127 /*
6128 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
6129 * this map entry.
6130 */
6131
6132 if (entry->user_wired_count == 0) {
6133 size = entry->vme_end - entry->vme_start;
6134
6135 /*
6136 * Since this is the first time the user is wiring this map entry, check to see if we're
6137 * exceeding the user wire limits. There is a per map limit which is the smaller of either
6138 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
6139 * a system-wide limit on the amount of memory all users can wire. If the user is over either
6140 * limit, then we fail.
6141 */
6142
6143 if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6144 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
6145 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) {
6146 return KERN_RESOURCE_SHORTAGE;
6147 }
6148
6149 /*
6150 * The first time the user wires an entry, we also increment the wired_count and add this to
6151 * the total that has been wired in the map.
6152 */
6153
6154 if (entry->wired_count >= MAX_WIRE_COUNT) {
6155 return KERN_FAILURE;
6156 }
6157
6158 entry->wired_count++;
6159 map->user_wire_size += size;
6160 }
6161
6162 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6163 return KERN_FAILURE;
6164 }
6165
6166 entry->user_wired_count++;
6167 } else {
6168 /*
6169 * The kernel's wiring the memory. Just bump the count and continue.
6170 */
6171
6172 if (entry->wired_count >= MAX_WIRE_COUNT) {
6173 panic("vm_map_wire: too many wirings");
6174 }
6175
6176 entry->wired_count++;
6177 }
6178
6179 return KERN_SUCCESS;
6180 }
6181
6182 /*
6183 * Update the memory wiring accounting now that the given map entry is being unwired.
6184 */
6185
6186 static void
6187 subtract_wire_counts(
6188 vm_map_t map,
6189 vm_map_entry_t entry,
6190 boolean_t user_wire)
6191 {
6192 if (user_wire) {
6193 /*
6194 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
6195 */
6196
6197 if (entry->user_wired_count == 1) {
6198 /*
6199 * We're removing the last user wire reference. Decrement the wired_count and the total
6200 * user wired memory for this map.
6201 */
6202
6203 assert(entry->wired_count >= 1);
6204 entry->wired_count--;
6205 map->user_wire_size -= entry->vme_end - entry->vme_start;
6206 }
6207
6208 assert(entry->user_wired_count >= 1);
6209 entry->user_wired_count--;
6210 } else {
6211 /*
6212 * The kernel is unwiring the memory. Just update the count.
6213 */
6214
6215 assert(entry->wired_count >= 1);
6216 entry->wired_count--;
6217 }
6218 }
6219
6220 int cs_executable_wire = 0;
6221
6222 /*
6223 * vm_map_wire:
6224 *
6225 * Sets the pageability of the specified address range in the
6226 * target map as wired. Regions specified as not pageable require
6227 * locked-down physical memory and physical page maps. The
6228 * access_type variable indicates types of accesses that must not
6229 * generate page faults. This is checked against protection of
6230 * memory being locked-down.
6231 *
6232 * The map must not be locked, but a reference must remain to the
6233 * map throughout the call.
6234 */
6235 static kern_return_t
6236 vm_map_wire_nested(
6237 vm_map_t map,
6238 vm_map_offset_t start,
6239 vm_map_offset_t end,
6240 vm_prot_t caller_prot,
6241 vm_tag_t tag,
6242 boolean_t user_wire,
6243 pmap_t map_pmap,
6244 vm_map_offset_t pmap_addr,
6245 ppnum_t *physpage_p)
6246 {
6247 vm_map_entry_t entry;
6248 vm_prot_t access_type;
6249 struct vm_map_entry *first_entry, tmp_entry;
6250 vm_map_t real_map;
6251 vm_map_offset_t s, e;
6252 kern_return_t rc;
6253 boolean_t need_wakeup;
6254 boolean_t main_map = FALSE;
6255 wait_interrupt_t interruptible_state;
6256 thread_t cur_thread;
6257 unsigned int last_timestamp;
6258 vm_map_size_t size;
6259 boolean_t wire_and_extract;
6260
6261 access_type = (caller_prot & VM_PROT_ALL);
6262
6263 wire_and_extract = FALSE;
6264 if (physpage_p != NULL) {
6265 /*
6266 * The caller wants the physical page number of the
6267 * wired page. We return only one physical page number
6268 * so this works for only one page at a time.
6269 */
6270 if ((end - start) != PAGE_SIZE) {
6271 return KERN_INVALID_ARGUMENT;
6272 }
6273 wire_and_extract = TRUE;
6274 *physpage_p = 0;
6275 }
6276
6277 vm_map_lock(map);
6278 if (map_pmap == NULL) {
6279 main_map = TRUE;
6280 }
6281 last_timestamp = map->timestamp;
6282
6283 VM_MAP_RANGE_CHECK(map, start, end);
6284 assert(page_aligned(start));
6285 assert(page_aligned(end));
6286 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6287 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6288 if (start == end) {
6289 /* We wired what the caller asked for, zero pages */
6290 vm_map_unlock(map);
6291 return KERN_SUCCESS;
6292 }
6293
6294 need_wakeup = FALSE;
6295 cur_thread = current_thread();
6296
6297 s = start;
6298 rc = KERN_SUCCESS;
6299
6300 if (vm_map_lookup_entry(map, s, &first_entry)) {
6301 entry = first_entry;
6302 /*
6303 * vm_map_clip_start will be done later.
6304 * We don't want to unnest any nested submaps here !
6305 */
6306 } else {
6307 /* Start address is not in map */
6308 rc = KERN_INVALID_ADDRESS;
6309 goto done;
6310 }
6311
6312 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6313 /*
6314 * At this point, we have wired from "start" to "s".
6315 * We still need to wire from "s" to "end".
6316 *
6317 * "entry" hasn't been clipped, so it could start before "s"
6318 * and/or end after "end".
6319 */
6320
6321 /* "e" is how far we want to wire in this entry */
6322 e = entry->vme_end;
6323 if (e > end) {
6324 e = end;
6325 }
6326
6327 /*
6328 * If another thread is wiring/unwiring this entry then
6329 * block after informing other thread to wake us up.
6330 */
6331 if (entry->in_transition) {
6332 wait_result_t wait_result;
6333
6334 /*
6335 * We have not clipped the entry. Make sure that
6336 * the start address is in range so that the lookup
6337 * below will succeed.
6338 * "s" is the current starting point: we've already
6339 * wired from "start" to "s" and we still have
6340 * to wire from "s" to "end".
6341 */
6342
6343 entry->needs_wakeup = TRUE;
6344
6345 /*
6346 * wake up anybody waiting on entries that we have
6347 * already wired.
6348 */
6349 if (need_wakeup) {
6350 vm_map_entry_wakeup(map);
6351 need_wakeup = FALSE;
6352 }
6353 /*
6354 * User wiring is interruptible
6355 */
6356 wait_result = vm_map_entry_wait(map,
6357 (user_wire) ? THREAD_ABORTSAFE :
6358 THREAD_UNINT);
6359 if (user_wire && wait_result == THREAD_INTERRUPTED) {
6360 /*
6361 * undo the wirings we have done so far
6362 * We do not clear the needs_wakeup flag,
6363 * because we cannot tell if we were the
6364 * only one waiting.
6365 */
6366 rc = KERN_FAILURE;
6367 goto done;
6368 }
6369
6370 /*
6371 * Cannot avoid a lookup here. reset timestamp.
6372 */
6373 last_timestamp = map->timestamp;
6374
6375 /*
6376 * The entry could have been clipped, look it up again.
6377 * Worse that can happen is, it may not exist anymore.
6378 */
6379 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6380 /*
6381 * User: undo everything upto the previous
6382 * entry. let vm_map_unwire worry about
6383 * checking the validity of the range.
6384 */
6385 rc = KERN_FAILURE;
6386 goto done;
6387 }
6388 entry = first_entry;
6389 continue;
6390 }
6391
6392 if (entry->is_sub_map) {
6393 vm_map_offset_t sub_start;
6394 vm_map_offset_t sub_end;
6395 vm_map_offset_t local_start;
6396 vm_map_offset_t local_end;
6397 pmap_t pmap;
6398
6399 if (wire_and_extract) {
6400 /*
6401 * Wiring would result in copy-on-write
6402 * which would not be compatible with
6403 * the sharing we have with the original
6404 * provider of this memory.
6405 */
6406 rc = KERN_INVALID_ARGUMENT;
6407 goto done;
6408 }
6409
6410 vm_map_clip_start(map, entry, s);
6411 vm_map_clip_end(map, entry, end);
6412
6413 sub_start = VME_OFFSET(entry);
6414 sub_end = entry->vme_end;
6415 sub_end += VME_OFFSET(entry) - entry->vme_start;
6416
6417 local_end = entry->vme_end;
6418 if (map_pmap == NULL) {
6419 vm_object_t object;
6420 vm_object_offset_t offset;
6421 vm_prot_t prot;
6422 boolean_t wired;
6423 vm_map_entry_t local_entry;
6424 vm_map_version_t version;
6425 vm_map_t lookup_map;
6426
6427 if (entry->use_pmap) {
6428 pmap = VME_SUBMAP(entry)->pmap;
6429 /* ppc implementation requires that */
6430 /* submaps pmap address ranges line */
6431 /* up with parent map */
6432 #ifdef notdef
6433 pmap_addr = sub_start;
6434 #endif
6435 pmap_addr = s;
6436 } else {
6437 pmap = map->pmap;
6438 pmap_addr = s;
6439 }
6440
6441 if (entry->wired_count) {
6442 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6443 goto done;
6444 }
6445
6446 /*
6447 * The map was not unlocked:
6448 * no need to goto re-lookup.
6449 * Just go directly to next entry.
6450 */
6451 entry = entry->vme_next;
6452 s = entry->vme_start;
6453 continue;
6454 }
6455
6456 /* call vm_map_lookup_locked to */
6457 /* cause any needs copy to be */
6458 /* evaluated */
6459 local_start = entry->vme_start;
6460 lookup_map = map;
6461 vm_map_lock_write_to_read(map);
6462 if (vm_map_lookup_locked(
6463 &lookup_map, local_start,
6464 access_type | VM_PROT_COPY,
6465 OBJECT_LOCK_EXCLUSIVE,
6466 &version, &object,
6467 &offset, &prot, &wired,
6468 NULL,
6469 &real_map)) {
6470 vm_map_unlock_read(lookup_map);
6471 assert(map_pmap == NULL);
6472 vm_map_unwire(map, start,
6473 s, user_wire);
6474 return KERN_FAILURE;
6475 }
6476 vm_object_unlock(object);
6477 if (real_map != lookup_map) {
6478 vm_map_unlock(real_map);
6479 }
6480 vm_map_unlock_read(lookup_map);
6481 vm_map_lock(map);
6482
6483 /* we unlocked, so must re-lookup */
6484 if (!vm_map_lookup_entry(map,
6485 local_start,
6486 &local_entry)) {
6487 rc = KERN_FAILURE;
6488 goto done;
6489 }
6490
6491 /*
6492 * entry could have been "simplified",
6493 * so re-clip
6494 */
6495 entry = local_entry;
6496 assert(s == local_start);
6497 vm_map_clip_start(map, entry, s);
6498 vm_map_clip_end(map, entry, end);
6499 /* re-compute "e" */
6500 e = entry->vme_end;
6501 if (e > end) {
6502 e = end;
6503 }
6504
6505 /* did we have a change of type? */
6506 if (!entry->is_sub_map) {
6507 last_timestamp = map->timestamp;
6508 continue;
6509 }
6510 } else {
6511 local_start = entry->vme_start;
6512 pmap = map_pmap;
6513 }
6514
6515 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6516 goto done;
6517 }
6518
6519 entry->in_transition = TRUE;
6520
6521 vm_map_unlock(map);
6522 rc = vm_map_wire_nested(VME_SUBMAP(entry),
6523 sub_start, sub_end,
6524 caller_prot, tag,
6525 user_wire, pmap, pmap_addr,
6526 NULL);
6527 vm_map_lock(map);
6528
6529 /*
6530 * Find the entry again. It could have been clipped
6531 * after we unlocked the map.
6532 */
6533 if (!vm_map_lookup_entry(map, local_start,
6534 &first_entry)) {
6535 panic("vm_map_wire: re-lookup failed");
6536 }
6537 entry = first_entry;
6538
6539 assert(local_start == s);
6540 /* re-compute "e" */
6541 e = entry->vme_end;
6542 if (e > end) {
6543 e = end;
6544 }
6545
6546 last_timestamp = map->timestamp;
6547 while ((entry != vm_map_to_entry(map)) &&
6548 (entry->vme_start < e)) {
6549 assert(entry->in_transition);
6550 entry->in_transition = FALSE;
6551 if (entry->needs_wakeup) {
6552 entry->needs_wakeup = FALSE;
6553 need_wakeup = TRUE;
6554 }
6555 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6556 subtract_wire_counts(map, entry, user_wire);
6557 }
6558 entry = entry->vme_next;
6559 }
6560 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6561 goto done;
6562 }
6563
6564 /* no need to relookup again */
6565 s = entry->vme_start;
6566 continue;
6567 }
6568
6569 /*
6570 * If this entry is already wired then increment
6571 * the appropriate wire reference count.
6572 */
6573 if (entry->wired_count) {
6574 if ((entry->protection & access_type) != access_type) {
6575 /* found a protection problem */
6576
6577 /*
6578 * XXX FBDP
6579 * We should always return an error
6580 * in this case but since we didn't
6581 * enforce it before, let's do
6582 * it only for the new "wire_and_extract"
6583 * code path for now...
6584 */
6585 if (wire_and_extract) {
6586 rc = KERN_PROTECTION_FAILURE;
6587 goto done;
6588 }
6589 }
6590
6591 /*
6592 * entry is already wired down, get our reference
6593 * after clipping to our range.
6594 */
6595 vm_map_clip_start(map, entry, s);
6596 vm_map_clip_end(map, entry, end);
6597
6598 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6599 goto done;
6600 }
6601
6602 if (wire_and_extract) {
6603 vm_object_t object;
6604 vm_object_offset_t offset;
6605 vm_page_t m;
6606
6607 /*
6608 * We don't have to "wire" the page again
6609 * bit we still have to "extract" its
6610 * physical page number, after some sanity
6611 * checks.
6612 */
6613 assert((entry->vme_end - entry->vme_start)
6614 == PAGE_SIZE);
6615 assert(!entry->needs_copy);
6616 assert(!entry->is_sub_map);
6617 assert(VME_OBJECT(entry));
6618 if (((entry->vme_end - entry->vme_start)
6619 != PAGE_SIZE) ||
6620 entry->needs_copy ||
6621 entry->is_sub_map ||
6622 VME_OBJECT(entry) == VM_OBJECT_NULL) {
6623 rc = KERN_INVALID_ARGUMENT;
6624 goto done;
6625 }
6626
6627 object = VME_OBJECT(entry);
6628 offset = VME_OFFSET(entry);
6629 /* need exclusive lock to update m->dirty */
6630 if (entry->protection & VM_PROT_WRITE) {
6631 vm_object_lock(object);
6632 } else {
6633 vm_object_lock_shared(object);
6634 }
6635 m = vm_page_lookup(object, offset);
6636 assert(m != VM_PAGE_NULL);
6637 assert(VM_PAGE_WIRED(m));
6638 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6639 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6640 if (entry->protection & VM_PROT_WRITE) {
6641 vm_object_lock_assert_exclusive(
6642 object);
6643 m->vmp_dirty = TRUE;
6644 }
6645 } else {
6646 /* not already wired !? */
6647 *physpage_p = 0;
6648 }
6649 vm_object_unlock(object);
6650 }
6651
6652 /* map was not unlocked: no need to relookup */
6653 entry = entry->vme_next;
6654 s = entry->vme_start;
6655 continue;
6656 }
6657
6658 /*
6659 * Unwired entry or wire request transmitted via submap
6660 */
6661
6662 /*
6663 * Wiring would copy the pages to the shadow object.
6664 * The shadow object would not be code-signed so
6665 * attempting to execute code from these copied pages
6666 * would trigger a code-signing violation.
6667 */
6668
6669 if ((entry->protection & VM_PROT_EXECUTE)
6670 #if !CONFIG_EMBEDDED
6671 &&
6672 map != kernel_map &&
6673 cs_process_enforcement(NULL)
6674 #endif /* !CONFIG_EMBEDDED */
6675 ) {
6676 #if MACH_ASSERT
6677 printf("pid %d[%s] wiring executable range from "
6678 "0x%llx to 0x%llx: rejected to preserve "
6679 "code-signing\n",
6680 proc_selfpid(),
6681 (current_task()->bsd_info
6682 ? proc_name_address(current_task()->bsd_info)
6683 : "?"),
6684 (uint64_t) entry->vme_start,
6685 (uint64_t) entry->vme_end);
6686 #endif /* MACH_ASSERT */
6687 DTRACE_VM2(cs_executable_wire,
6688 uint64_t, (uint64_t)entry->vme_start,
6689 uint64_t, (uint64_t)entry->vme_end);
6690 cs_executable_wire++;
6691 rc = KERN_PROTECTION_FAILURE;
6692 goto done;
6693 }
6694
6695 /*
6696 * Perform actions of vm_map_lookup that need the write
6697 * lock on the map: create a shadow object for a
6698 * copy-on-write region, or an object for a zero-fill
6699 * region.
6700 */
6701 size = entry->vme_end - entry->vme_start;
6702 /*
6703 * If wiring a copy-on-write page, we need to copy it now
6704 * even if we're only (currently) requesting read access.
6705 * This is aggressive, but once it's wired we can't move it.
6706 */
6707 if (entry->needs_copy) {
6708 if (wire_and_extract) {
6709 /*
6710 * We're supposed to share with the original
6711 * provider so should not be "needs_copy"
6712 */
6713 rc = KERN_INVALID_ARGUMENT;
6714 goto done;
6715 }
6716
6717 VME_OBJECT_SHADOW(entry, size);
6718 entry->needs_copy = FALSE;
6719 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6720 if (wire_and_extract) {
6721 /*
6722 * We're supposed to share with the original
6723 * provider so should already have an object.
6724 */
6725 rc = KERN_INVALID_ARGUMENT;
6726 goto done;
6727 }
6728 VME_OBJECT_SET(entry, vm_object_allocate(size));
6729 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6730 assert(entry->use_pmap);
6731 }
6732
6733 vm_map_clip_start(map, entry, s);
6734 vm_map_clip_end(map, entry, end);
6735
6736 /* re-compute "e" */
6737 e = entry->vme_end;
6738 if (e > end) {
6739 e = end;
6740 }
6741
6742 /*
6743 * Check for holes and protection mismatch.
6744 * Holes: Next entry should be contiguous unless this
6745 * is the end of the region.
6746 * Protection: Access requested must be allowed, unless
6747 * wiring is by protection class
6748 */
6749 if ((entry->vme_end < end) &&
6750 ((entry->vme_next == vm_map_to_entry(map)) ||
6751 (entry->vme_next->vme_start > entry->vme_end))) {
6752 /* found a hole */
6753 rc = KERN_INVALID_ADDRESS;
6754 goto done;
6755 }
6756 if ((entry->protection & access_type) != access_type) {
6757 /* found a protection problem */
6758 rc = KERN_PROTECTION_FAILURE;
6759 goto done;
6760 }
6761
6762 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6763
6764 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6765 goto done;
6766 }
6767
6768 entry->in_transition = TRUE;
6769
6770 /*
6771 * This entry might get split once we unlock the map.
6772 * In vm_fault_wire(), we need the current range as
6773 * defined by this entry. In order for this to work
6774 * along with a simultaneous clip operation, we make a
6775 * temporary copy of this entry and use that for the
6776 * wiring. Note that the underlying objects do not
6777 * change during a clip.
6778 */
6779 tmp_entry = *entry;
6780
6781 /*
6782 * The in_transition state guarentees that the entry
6783 * (or entries for this range, if split occured) will be
6784 * there when the map lock is acquired for the second time.
6785 */
6786 vm_map_unlock(map);
6787
6788 if (!user_wire && cur_thread != THREAD_NULL) {
6789 interruptible_state = thread_interrupt_level(THREAD_UNINT);
6790 } else {
6791 interruptible_state = THREAD_UNINT;
6792 }
6793
6794 if (map_pmap) {
6795 rc = vm_fault_wire(map,
6796 &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6797 physpage_p);
6798 } else {
6799 rc = vm_fault_wire(map,
6800 &tmp_entry, caller_prot, tag, map->pmap,
6801 tmp_entry.vme_start,
6802 physpage_p);
6803 }
6804
6805 if (!user_wire && cur_thread != THREAD_NULL) {
6806 thread_interrupt_level(interruptible_state);
6807 }
6808
6809 vm_map_lock(map);
6810
6811 if (last_timestamp + 1 != map->timestamp) {
6812 /*
6813 * Find the entry again. It could have been clipped
6814 * after we unlocked the map.
6815 */
6816 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6817 &first_entry)) {
6818 panic("vm_map_wire: re-lookup failed");
6819 }
6820
6821 entry = first_entry;
6822 }
6823
6824 last_timestamp = map->timestamp;
6825
6826 while ((entry != vm_map_to_entry(map)) &&
6827 (entry->vme_start < tmp_entry.vme_end)) {
6828 assert(entry->in_transition);
6829 entry->in_transition = FALSE;
6830 if (entry->needs_wakeup) {
6831 entry->needs_wakeup = FALSE;
6832 need_wakeup = TRUE;
6833 }
6834 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6835 subtract_wire_counts(map, entry, user_wire);
6836 }
6837 entry = entry->vme_next;
6838 }
6839
6840 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
6841 goto done;
6842 }
6843
6844 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6845 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
6846 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6847 /* found a "new" hole */
6848 s = tmp_entry.vme_end;
6849 rc = KERN_INVALID_ADDRESS;
6850 goto done;
6851 }
6852
6853 s = entry->vme_start;
6854 } /* end while loop through map entries */
6855
6856 done:
6857 if (rc == KERN_SUCCESS) {
6858 /* repair any damage we may have made to the VM map */
6859 vm_map_simplify_range(map, start, end);
6860 }
6861
6862 vm_map_unlock(map);
6863
6864 /*
6865 * wake up anybody waiting on entries we wired.
6866 */
6867 if (need_wakeup) {
6868 vm_map_entry_wakeup(map);
6869 }
6870
6871 if (rc != KERN_SUCCESS) {
6872 /* undo what has been wired so far */
6873 vm_map_unwire_nested(map, start, s, user_wire,
6874 map_pmap, pmap_addr);
6875 if (physpage_p) {
6876 *physpage_p = 0;
6877 }
6878 }
6879
6880 return rc;
6881 }
6882
6883 kern_return_t
6884 vm_map_wire_external(
6885 vm_map_t map,
6886 vm_map_offset_t start,
6887 vm_map_offset_t end,
6888 vm_prot_t caller_prot,
6889 boolean_t user_wire)
6890 {
6891 kern_return_t kret;
6892
6893 kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6894 user_wire, (pmap_t)NULL, 0, NULL);
6895 return kret;
6896 }
6897
6898 kern_return_t
6899 vm_map_wire_kernel(
6900 vm_map_t map,
6901 vm_map_offset_t start,
6902 vm_map_offset_t end,
6903 vm_prot_t caller_prot,
6904 vm_tag_t tag,
6905 boolean_t user_wire)
6906 {
6907 kern_return_t kret;
6908
6909 kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6910 user_wire, (pmap_t)NULL, 0, NULL);
6911 return kret;
6912 }
6913
6914 kern_return_t
6915 vm_map_wire_and_extract_external(
6916 vm_map_t map,
6917 vm_map_offset_t start,
6918 vm_prot_t caller_prot,
6919 boolean_t user_wire,
6920 ppnum_t *physpage_p)
6921 {
6922 kern_return_t kret;
6923
6924 kret = vm_map_wire_nested(map,
6925 start,
6926 start + VM_MAP_PAGE_SIZE(map),
6927 caller_prot,
6928 vm_tag_bt(),
6929 user_wire,
6930 (pmap_t)NULL,
6931 0,
6932 physpage_p);
6933 if (kret != KERN_SUCCESS &&
6934 physpage_p != NULL) {
6935 *physpage_p = 0;
6936 }
6937 return kret;
6938 }
6939
6940 kern_return_t
6941 vm_map_wire_and_extract_kernel(
6942 vm_map_t map,
6943 vm_map_offset_t start,
6944 vm_prot_t caller_prot,
6945 vm_tag_t tag,
6946 boolean_t user_wire,
6947 ppnum_t *physpage_p)
6948 {
6949 kern_return_t kret;
6950
6951 kret = vm_map_wire_nested(map,
6952 start,
6953 start + VM_MAP_PAGE_SIZE(map),
6954 caller_prot,
6955 tag,
6956 user_wire,
6957 (pmap_t)NULL,
6958 0,
6959 physpage_p);
6960 if (kret != KERN_SUCCESS &&
6961 physpage_p != NULL) {
6962 *physpage_p = 0;
6963 }
6964 return kret;
6965 }
6966
6967 /*
6968 * vm_map_unwire:
6969 *
6970 * Sets the pageability of the specified address range in the target
6971 * as pageable. Regions specified must have been wired previously.
6972 *
6973 * The map must not be locked, but a reference must remain to the map
6974 * throughout the call.
6975 *
6976 * Kernel will panic on failures. User unwire ignores holes and
6977 * unwired and intransition entries to avoid losing memory by leaving
6978 * it unwired.
6979 */
6980 static kern_return_t
6981 vm_map_unwire_nested(
6982 vm_map_t map,
6983 vm_map_offset_t start,
6984 vm_map_offset_t end,
6985 boolean_t user_wire,
6986 pmap_t map_pmap,
6987 vm_map_offset_t pmap_addr)
6988 {
6989 vm_map_entry_t entry;
6990 struct vm_map_entry *first_entry, tmp_entry;
6991 boolean_t need_wakeup;
6992 boolean_t main_map = FALSE;
6993 unsigned int last_timestamp;
6994
6995 vm_map_lock(map);
6996 if (map_pmap == NULL) {
6997 main_map = TRUE;
6998 }
6999 last_timestamp = map->timestamp;
7000
7001 VM_MAP_RANGE_CHECK(map, start, end);
7002 assert(page_aligned(start));
7003 assert(page_aligned(end));
7004 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7005 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7006
7007 if (start == end) {
7008 /* We unwired what the caller asked for: zero pages */
7009 vm_map_unlock(map);
7010 return KERN_SUCCESS;
7011 }
7012
7013 if (vm_map_lookup_entry(map, start, &first_entry)) {
7014 entry = first_entry;
7015 /*
7016 * vm_map_clip_start will be done later.
7017 * We don't want to unnest any nested sub maps here !
7018 */
7019 } else {
7020 if (!user_wire) {
7021 panic("vm_map_unwire: start not found");
7022 }
7023 /* Start address is not in map. */
7024 vm_map_unlock(map);
7025 return KERN_INVALID_ADDRESS;
7026 }
7027
7028 if (entry->superpage_size) {
7029 /* superpages are always wired */
7030 vm_map_unlock(map);
7031 return KERN_INVALID_ADDRESS;
7032 }
7033
7034 need_wakeup = FALSE;
7035 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7036 if (entry->in_transition) {
7037 /*
7038 * 1)
7039 * Another thread is wiring down this entry. Note
7040 * that if it is not for the other thread we would
7041 * be unwiring an unwired entry. This is not
7042 * permitted. If we wait, we will be unwiring memory
7043 * we did not wire.
7044 *
7045 * 2)
7046 * Another thread is unwiring this entry. We did not
7047 * have a reference to it, because if we did, this
7048 * entry will not be getting unwired now.
7049 */
7050 if (!user_wire) {
7051 /*
7052 * XXX FBDP
7053 * This could happen: there could be some
7054 * overlapping vslock/vsunlock operations
7055 * going on.
7056 * We should probably just wait and retry,
7057 * but then we have to be careful that this
7058 * entry could get "simplified" after
7059 * "in_transition" gets unset and before
7060 * we re-lookup the entry, so we would
7061 * have to re-clip the entry to avoid
7062 * re-unwiring what we have already unwired...
7063 * See vm_map_wire_nested().
7064 *
7065 * Or we could just ignore "in_transition"
7066 * here and proceed to decement the wired
7067 * count(s) on this entry. That should be fine
7068 * as long as "wired_count" doesn't drop all
7069 * the way to 0 (and we should panic if THAT
7070 * happens).
7071 */
7072 panic("vm_map_unwire: in_transition entry");
7073 }
7074
7075 entry = entry->vme_next;
7076 continue;
7077 }
7078
7079 if (entry->is_sub_map) {
7080 vm_map_offset_t sub_start;
7081 vm_map_offset_t sub_end;
7082 vm_map_offset_t local_end;
7083 pmap_t pmap;
7084
7085 vm_map_clip_start(map, entry, start);
7086 vm_map_clip_end(map, entry, end);
7087
7088 sub_start = VME_OFFSET(entry);
7089 sub_end = entry->vme_end - entry->vme_start;
7090 sub_end += VME_OFFSET(entry);
7091 local_end = entry->vme_end;
7092 if (map_pmap == NULL) {
7093 if (entry->use_pmap) {
7094 pmap = VME_SUBMAP(entry)->pmap;
7095 pmap_addr = sub_start;
7096 } else {
7097 pmap = map->pmap;
7098 pmap_addr = start;
7099 }
7100 if (entry->wired_count == 0 ||
7101 (user_wire && entry->user_wired_count == 0)) {
7102 if (!user_wire) {
7103 panic("vm_map_unwire: entry is unwired");
7104 }
7105 entry = entry->vme_next;
7106 continue;
7107 }
7108
7109 /*
7110 * Check for holes
7111 * Holes: Next entry should be contiguous unless
7112 * this is the end of the region.
7113 */
7114 if (((entry->vme_end < end) &&
7115 ((entry->vme_next == vm_map_to_entry(map)) ||
7116 (entry->vme_next->vme_start
7117 > entry->vme_end)))) {
7118 if (!user_wire) {
7119 panic("vm_map_unwire: non-contiguous region");
7120 }
7121 /*
7122 * entry = entry->vme_next;
7123 * continue;
7124 */
7125 }
7126
7127 subtract_wire_counts(map, entry, user_wire);
7128
7129 if (entry->wired_count != 0) {
7130 entry = entry->vme_next;
7131 continue;
7132 }
7133
7134 entry->in_transition = TRUE;
7135 tmp_entry = *entry;/* see comment in vm_map_wire() */
7136
7137 /*
7138 * We can unlock the map now. The in_transition state
7139 * guarantees existance of the entry.
7140 */
7141 vm_map_unlock(map);
7142 vm_map_unwire_nested(VME_SUBMAP(entry),
7143 sub_start, sub_end, user_wire, pmap, pmap_addr);
7144 vm_map_lock(map);
7145
7146 if (last_timestamp + 1 != map->timestamp) {
7147 /*
7148 * Find the entry again. It could have been
7149 * clipped or deleted after we unlocked the map.
7150 */
7151 if (!vm_map_lookup_entry(map,
7152 tmp_entry.vme_start,
7153 &first_entry)) {
7154 if (!user_wire) {
7155 panic("vm_map_unwire: re-lookup failed");
7156 }
7157 entry = first_entry->vme_next;
7158 } else {
7159 entry = first_entry;
7160 }
7161 }
7162 last_timestamp = map->timestamp;
7163
7164 /*
7165 * clear transition bit for all constituent entries
7166 * that were in the original entry (saved in
7167 * tmp_entry). Also check for waiters.
7168 */
7169 while ((entry != vm_map_to_entry(map)) &&
7170 (entry->vme_start < tmp_entry.vme_end)) {
7171 assert(entry->in_transition);
7172 entry->in_transition = FALSE;
7173 if (entry->needs_wakeup) {
7174 entry->needs_wakeup = FALSE;
7175 need_wakeup = TRUE;
7176 }
7177 entry = entry->vme_next;
7178 }
7179 continue;
7180 } else {
7181 vm_map_unlock(map);
7182 vm_map_unwire_nested(VME_SUBMAP(entry),
7183 sub_start, sub_end, user_wire, map_pmap,
7184 pmap_addr);
7185 vm_map_lock(map);
7186
7187 if (last_timestamp + 1 != map->timestamp) {
7188 /*
7189 * Find the entry again. It could have been
7190 * clipped or deleted after we unlocked the map.
7191 */
7192 if (!vm_map_lookup_entry(map,
7193 tmp_entry.vme_start,
7194 &first_entry)) {
7195 if (!user_wire) {
7196 panic("vm_map_unwire: re-lookup failed");
7197 }
7198 entry = first_entry->vme_next;
7199 } else {
7200 entry = first_entry;
7201 }
7202 }
7203 last_timestamp = map->timestamp;
7204 }
7205 }
7206
7207
7208 if ((entry->wired_count == 0) ||
7209 (user_wire && entry->user_wired_count == 0)) {
7210 if (!user_wire) {
7211 panic("vm_map_unwire: entry is unwired");
7212 }
7213
7214 entry = entry->vme_next;
7215 continue;
7216 }
7217
7218 assert(entry->wired_count > 0 &&
7219 (!user_wire || entry->user_wired_count > 0));
7220
7221 vm_map_clip_start(map, entry, start);
7222 vm_map_clip_end(map, entry, end);
7223
7224 /*
7225 * Check for holes
7226 * Holes: Next entry should be contiguous unless
7227 * this is the end of the region.
7228 */
7229 if (((entry->vme_end < end) &&
7230 ((entry->vme_next == vm_map_to_entry(map)) ||
7231 (entry->vme_next->vme_start > entry->vme_end)))) {
7232 if (!user_wire) {
7233 panic("vm_map_unwire: non-contiguous region");
7234 }
7235 entry = entry->vme_next;
7236 continue;
7237 }
7238
7239 subtract_wire_counts(map, entry, user_wire);
7240
7241 if (entry->wired_count != 0) {
7242 entry = entry->vme_next;
7243 continue;
7244 }
7245
7246 if (entry->zero_wired_pages) {
7247 entry->zero_wired_pages = FALSE;
7248 }
7249
7250 entry->in_transition = TRUE;
7251 tmp_entry = *entry; /* see comment in vm_map_wire() */
7252
7253 /*
7254 * We can unlock the map now. The in_transition state
7255 * guarantees existance of the entry.
7256 */
7257 vm_map_unlock(map);
7258 if (map_pmap) {
7259 vm_fault_unwire(map,
7260 &tmp_entry, FALSE, map_pmap, pmap_addr);
7261 } else {
7262 vm_fault_unwire(map,
7263 &tmp_entry, FALSE, map->pmap,
7264 tmp_entry.vme_start);
7265 }
7266 vm_map_lock(map);
7267
7268 if (last_timestamp + 1 != map->timestamp) {
7269 /*
7270 * Find the entry again. It could have been clipped
7271 * or deleted after we unlocked the map.
7272 */
7273 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7274 &first_entry)) {
7275 if (!user_wire) {
7276 panic("vm_map_unwire: re-lookup failed");
7277 }
7278 entry = first_entry->vme_next;
7279 } else {
7280 entry = first_entry;
7281 }
7282 }
7283 last_timestamp = map->timestamp;
7284
7285 /*
7286 * clear transition bit for all constituent entries that
7287 * were in the original entry (saved in tmp_entry). Also
7288 * check for waiters.
7289 */
7290 while ((entry != vm_map_to_entry(map)) &&
7291 (entry->vme_start < tmp_entry.vme_end)) {
7292 assert(entry->in_transition);
7293 entry->in_transition = FALSE;
7294 if (entry->needs_wakeup) {
7295 entry->needs_wakeup = FALSE;
7296 need_wakeup = TRUE;
7297 }
7298 entry = entry->vme_next;
7299 }
7300 }
7301
7302 /*
7303 * We might have fragmented the address space when we wired this
7304 * range of addresses. Attempt to re-coalesce these VM map entries
7305 * with their neighbors now that they're no longer wired.
7306 * Under some circumstances, address space fragmentation can
7307 * prevent VM object shadow chain collapsing, which can cause
7308 * swap space leaks.
7309 */
7310 vm_map_simplify_range(map, start, end);
7311
7312 vm_map_unlock(map);
7313 /*
7314 * wake up anybody waiting on entries that we have unwired.
7315 */
7316 if (need_wakeup) {
7317 vm_map_entry_wakeup(map);
7318 }
7319 return KERN_SUCCESS;
7320 }
7321
7322 kern_return_t
7323 vm_map_unwire(
7324 vm_map_t map,
7325 vm_map_offset_t start,
7326 vm_map_offset_t end,
7327 boolean_t user_wire)
7328 {
7329 return vm_map_unwire_nested(map, start, end,
7330 user_wire, (pmap_t)NULL, 0);
7331 }
7332
7333
7334 /*
7335 * vm_map_entry_delete: [ internal use only ]
7336 *
7337 * Deallocate the given entry from the target map.
7338 */
7339 static void
7340 vm_map_entry_delete(
7341 vm_map_t map,
7342 vm_map_entry_t entry)
7343 {
7344 vm_map_offset_t s, e;
7345 vm_object_t object;
7346 vm_map_t submap;
7347
7348 s = entry->vme_start;
7349 e = entry->vme_end;
7350 assert(page_aligned(s));
7351 assert(page_aligned(e));
7352 if (entry->map_aligned == TRUE) {
7353 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7354 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7355 }
7356 assert(entry->wired_count == 0);
7357 assert(entry->user_wired_count == 0);
7358 assert(!entry->permanent);
7359
7360 if (entry->is_sub_map) {
7361 object = NULL;
7362 submap = VME_SUBMAP(entry);
7363 } else {
7364 submap = NULL;
7365 object = VME_OBJECT(entry);
7366 }
7367
7368 vm_map_store_entry_unlink(map, entry);
7369 map->size -= e - s;
7370
7371 vm_map_entry_dispose(map, entry);
7372
7373 vm_map_unlock(map);
7374 /*
7375 * Deallocate the object only after removing all
7376 * pmap entries pointing to its pages.
7377 */
7378 if (submap) {
7379 vm_map_deallocate(submap);
7380 } else {
7381 vm_object_deallocate(object);
7382 }
7383 }
7384
7385 void
7386 vm_map_submap_pmap_clean(
7387 vm_map_t map,
7388 vm_map_offset_t start,
7389 vm_map_offset_t end,
7390 vm_map_t sub_map,
7391 vm_map_offset_t offset)
7392 {
7393 vm_map_offset_t submap_start;
7394 vm_map_offset_t submap_end;
7395 vm_map_size_t remove_size;
7396 vm_map_entry_t entry;
7397
7398 submap_end = offset + (end - start);
7399 submap_start = offset;
7400
7401 vm_map_lock_read(sub_map);
7402 if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7403 remove_size = (entry->vme_end - entry->vme_start);
7404 if (offset > entry->vme_start) {
7405 remove_size -= offset - entry->vme_start;
7406 }
7407
7408
7409 if (submap_end < entry->vme_end) {
7410 remove_size -=
7411 entry->vme_end - submap_end;
7412 }
7413 if (entry->is_sub_map) {
7414 vm_map_submap_pmap_clean(
7415 sub_map,
7416 start,
7417 start + remove_size,
7418 VME_SUBMAP(entry),
7419 VME_OFFSET(entry));
7420 } else {
7421 if (map->mapped_in_other_pmaps &&
7422 os_ref_get_count(&map->map_refcnt) != 0 &&
7423 VME_OBJECT(entry) != NULL) {
7424 vm_object_pmap_protect_options(
7425 VME_OBJECT(entry),
7426 (VME_OFFSET(entry) +
7427 offset -
7428 entry->vme_start),
7429 remove_size,
7430 PMAP_NULL,
7431 entry->vme_start,
7432 VM_PROT_NONE,
7433 PMAP_OPTIONS_REMOVE);
7434 } else {
7435 pmap_remove(map->pmap,
7436 (addr64_t)start,
7437 (addr64_t)(start + remove_size));
7438 }
7439 }
7440 }
7441
7442 entry = entry->vme_next;
7443
7444 while ((entry != vm_map_to_entry(sub_map))
7445 && (entry->vme_start < submap_end)) {
7446 remove_size = (entry->vme_end - entry->vme_start);
7447 if (submap_end < entry->vme_end) {
7448 remove_size -= entry->vme_end - submap_end;
7449 }
7450 if (entry->is_sub_map) {
7451 vm_map_submap_pmap_clean(
7452 sub_map,
7453 (start + entry->vme_start) - offset,
7454 ((start + entry->vme_start) - offset) + remove_size,
7455 VME_SUBMAP(entry),
7456 VME_OFFSET(entry));
7457 } else {
7458 if (map->mapped_in_other_pmaps &&
7459 os_ref_get_count(&map->map_refcnt) != 0 &&
7460 VME_OBJECT(entry) != NULL) {
7461 vm_object_pmap_protect_options(
7462 VME_OBJECT(entry),
7463 VME_OFFSET(entry),
7464 remove_size,
7465 PMAP_NULL,
7466 entry->vme_start,
7467 VM_PROT_NONE,
7468 PMAP_OPTIONS_REMOVE);
7469 } else {
7470 pmap_remove(map->pmap,
7471 (addr64_t)((start + entry->vme_start)
7472 - offset),
7473 (addr64_t)(((start + entry->vme_start)
7474 - offset) + remove_size));
7475 }
7476 }
7477 entry = entry->vme_next;
7478 }
7479 vm_map_unlock_read(sub_map);
7480 return;
7481 }
7482
7483 /*
7484 * virt_memory_guard_ast:
7485 *
7486 * Handle the AST callout for a virtual memory guard.
7487 * raise an EXC_GUARD exception and terminate the task
7488 * if configured to do so.
7489 */
7490 void
7491 virt_memory_guard_ast(
7492 thread_t thread,
7493 mach_exception_data_type_t code,
7494 mach_exception_data_type_t subcode)
7495 {
7496 task_t task = thread->task;
7497 assert(task != kernel_task);
7498 assert(task == current_task());
7499 uint32_t behavior;
7500
7501 behavior = task->task_exc_guard;
7502
7503 /* Is delivery enabled */
7504 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7505 return;
7506 }
7507
7508 /* If only once, make sure we're that once */
7509 while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7510 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7511
7512 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7513 break;
7514 }
7515 behavior = task->task_exc_guard;
7516 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7517 return;
7518 }
7519 }
7520
7521 /* Raise exception via corpse fork or synchronously */
7522 if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7523 (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7524 task_violated_guard(code, subcode, NULL);
7525 } else {
7526 task_exception_notify(EXC_GUARD, code, subcode);
7527 }
7528
7529 /* Terminate the task if desired */
7530 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7531 task_bsdtask_kill(current_task());
7532 }
7533 }
7534
7535 /*
7536 * vm_map_guard_exception:
7537 *
7538 * Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7539 *
7540 * Right now, we do this when we find nothing mapped, or a
7541 * gap in the mapping when a user address space deallocate
7542 * was requested. We report the address of the first gap found.
7543 */
7544 static void
7545 vm_map_guard_exception(
7546 vm_map_offset_t gap_start,
7547 unsigned reason)
7548 {
7549 mach_exception_code_t code = 0;
7550 unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7551 unsigned int target = 0; /* should we pass in pid associated with map? */
7552 mach_exception_data_type_t subcode = (uint64_t)gap_start;
7553 boolean_t fatal = FALSE;
7554
7555 task_t task = current_task();
7556
7557 /* Can't deliver exceptions to kernel task */
7558 if (task == kernel_task) {
7559 return;
7560 }
7561
7562 EXC_GUARD_ENCODE_TYPE(code, guard_type);
7563 EXC_GUARD_ENCODE_FLAVOR(code, reason);
7564 EXC_GUARD_ENCODE_TARGET(code, target);
7565
7566 if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7567 fatal = TRUE;
7568 }
7569 thread_guard_violation(current_thread(), code, subcode, fatal);
7570 }
7571
7572 /*
7573 * vm_map_delete: [ internal use only ]
7574 *
7575 * Deallocates the given address range from the target map.
7576 * Removes all user wirings. Unwires one kernel wiring if
7577 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
7578 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
7579 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7580 *
7581 * This routine is called with map locked and leaves map locked.
7582 */
7583 static kern_return_t
7584 vm_map_delete(
7585 vm_map_t map,
7586 vm_map_offset_t start,
7587 vm_map_offset_t end,
7588 int flags,
7589 vm_map_t zap_map)
7590 {
7591 vm_map_entry_t entry, next;
7592 struct vm_map_entry *first_entry, tmp_entry;
7593 vm_map_offset_t s;
7594 vm_object_t object;
7595 boolean_t need_wakeup;
7596 unsigned int last_timestamp = ~0; /* unlikely value */
7597 int interruptible;
7598 vm_map_offset_t gap_start;
7599 __unused vm_map_offset_t save_start = start;
7600 __unused vm_map_offset_t save_end = end;
7601 const vm_map_offset_t FIND_GAP = 1; /* a not page aligned value */
7602 const vm_map_offset_t GAPS_OK = 2; /* a different not page aligned value */
7603
7604 if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK)) {
7605 gap_start = FIND_GAP;
7606 } else {
7607 gap_start = GAPS_OK;
7608 }
7609
7610 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7611 THREAD_ABORTSAFE : THREAD_UNINT;
7612
7613 /*
7614 * All our DMA I/O operations in IOKit are currently done by
7615 * wiring through the map entries of the task requesting the I/O.
7616 * Because of this, we must always wait for kernel wirings
7617 * to go away on the entries before deleting them.
7618 *
7619 * Any caller who wants to actually remove a kernel wiring
7620 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7621 * properly remove one wiring instead of blasting through
7622 * them all.
7623 */
7624 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7625
7626 while (1) {
7627 /*
7628 * Find the start of the region, and clip it
7629 */
7630 if (vm_map_lookup_entry(map, start, &first_entry)) {
7631 entry = first_entry;
7632 if (map == kalloc_map &&
7633 (entry->vme_start != start ||
7634 entry->vme_end != end)) {
7635 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7636 "mismatched entry %p [0x%llx:0x%llx]\n",
7637 map,
7638 (uint64_t)start,
7639 (uint64_t)end,
7640 entry,
7641 (uint64_t)entry->vme_start,
7642 (uint64_t)entry->vme_end);
7643 }
7644
7645 /*
7646 * If in a superpage, extend the range to include the start of the mapping.
7647 */
7648 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
7649 start = SUPERPAGE_ROUND_DOWN(start);
7650 continue;
7651 }
7652
7653 if (start == entry->vme_start) {
7654 /*
7655 * No need to clip. We don't want to cause
7656 * any unnecessary unnesting in this case...
7657 */
7658 } else {
7659 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7660 entry->map_aligned &&
7661 !VM_MAP_PAGE_ALIGNED(
7662 start,
7663 VM_MAP_PAGE_MASK(map))) {
7664 /*
7665 * The entry will no longer be
7666 * map-aligned after clipping
7667 * and the caller said it's OK.
7668 */
7669 entry->map_aligned = FALSE;
7670 }
7671 if (map == kalloc_map) {
7672 panic("vm_map_delete(%p,0x%llx,0x%llx):"
7673 " clipping %p at 0x%llx\n",
7674 map,
7675 (uint64_t)start,
7676 (uint64_t)end,
7677 entry,
7678 (uint64_t)start);
7679 }
7680 vm_map_clip_start(map, entry, start);
7681 }
7682
7683 /*
7684 * Fix the lookup hint now, rather than each
7685 * time through the loop.
7686 */
7687 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7688 } else {
7689 if (map->pmap == kernel_pmap &&
7690 os_ref_get_count(&map->map_refcnt) != 0) {
7691 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7692 "no map entry at 0x%llx\n",
7693 map,
7694 (uint64_t)start,
7695 (uint64_t)end,
7696 (uint64_t)start);
7697 }
7698 entry = first_entry->vme_next;
7699 if (gap_start == FIND_GAP) {
7700 gap_start = start;
7701 }
7702 }
7703 break;
7704 }
7705 if (entry->superpage_size) {
7706 end = SUPERPAGE_ROUND_UP(end);
7707 }
7708
7709 need_wakeup = FALSE;
7710 /*
7711 * Step through all entries in this region
7712 */
7713 s = entry->vme_start;
7714 while ((entry != vm_map_to_entry(map)) && (s < end)) {
7715 /*
7716 * At this point, we have deleted all the memory entries
7717 * between "start" and "s". We still need to delete
7718 * all memory entries between "s" and "end".
7719 * While we were blocked and the map was unlocked, some
7720 * new memory entries could have been re-allocated between
7721 * "start" and "s" and we don't want to mess with those.
7722 * Some of those entries could even have been re-assembled
7723 * with an entry after "s" (in vm_map_simplify_entry()), so
7724 * we may have to vm_map_clip_start() again.
7725 */
7726
7727 if (entry->vme_start >= s) {
7728 /*
7729 * This entry starts on or after "s"
7730 * so no need to clip its start.
7731 */
7732 } else {
7733 /*
7734 * This entry has been re-assembled by a
7735 * vm_map_simplify_entry(). We need to
7736 * re-clip its start.
7737 */
7738 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7739 entry->map_aligned &&
7740 !VM_MAP_PAGE_ALIGNED(s,
7741 VM_MAP_PAGE_MASK(map))) {
7742 /*
7743 * The entry will no longer be map-aligned
7744 * after clipping and the caller said it's OK.
7745 */
7746 entry->map_aligned = FALSE;
7747 }
7748 if (map == kalloc_map) {
7749 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7750 "clipping %p at 0x%llx\n",
7751 map,
7752 (uint64_t)start,
7753 (uint64_t)end,
7754 entry,
7755 (uint64_t)s);
7756 }
7757 vm_map_clip_start(map, entry, s);
7758 }
7759 if (entry->vme_end <= end) {
7760 /*
7761 * This entry is going away completely, so no need
7762 * to clip and possibly cause an unnecessary unnesting.
7763 */
7764 } else {
7765 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7766 entry->map_aligned &&
7767 !VM_MAP_PAGE_ALIGNED(end,
7768 VM_MAP_PAGE_MASK(map))) {
7769 /*
7770 * The entry will no longer be map-aligned
7771 * after clipping and the caller said it's OK.
7772 */
7773 entry->map_aligned = FALSE;
7774 }
7775 if (map == kalloc_map) {
7776 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7777 "clipping %p at 0x%llx\n",
7778 map,
7779 (uint64_t)start,
7780 (uint64_t)end,
7781 entry,
7782 (uint64_t)end);
7783 }
7784 vm_map_clip_end(map, entry, end);
7785 }
7786
7787 if (entry->permanent) {
7788 if (map->pmap == kernel_pmap) {
7789 panic("%s(%p,0x%llx,0x%llx): "
7790 "attempt to remove permanent "
7791 "VM map entry "
7792 "%p [0x%llx:0x%llx]\n",
7793 __FUNCTION__,
7794 map,
7795 (uint64_t) start,
7796 (uint64_t) end,
7797 entry,
7798 (uint64_t) entry->vme_start,
7799 (uint64_t) entry->vme_end);
7800 } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7801 // printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7802 entry->permanent = FALSE;
7803 #if PMAP_CS
7804 } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
7805 entry->permanent = FALSE;
7806
7807 printf("%d[%s] %s(0x%llx,0x%llx): "
7808 "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
7809 "prot 0x%x/0x%x\n",
7810 proc_selfpid(),
7811 (current_task()->bsd_info
7812 ? proc_name_address(current_task()->bsd_info)
7813 : "?"),
7814 __FUNCTION__,
7815 (uint64_t) start,
7816 (uint64_t) end,
7817 (uint64_t)entry->vme_start,
7818 (uint64_t)entry->vme_end,
7819 entry->protection,
7820 entry->max_protection);
7821 #endif
7822 } else {
7823 if (vm_map_executable_immutable_verbose) {
7824 printf("%d[%s] %s(0x%llx,0x%llx): "
7825 "permanent entry [0x%llx:0x%llx] "
7826 "prot 0x%x/0x%x\n",
7827 proc_selfpid(),
7828 (current_task()->bsd_info
7829 ? proc_name_address(current_task()->bsd_info)
7830 : "?"),
7831 __FUNCTION__,
7832 (uint64_t) start,
7833 (uint64_t) end,
7834 (uint64_t)entry->vme_start,
7835 (uint64_t)entry->vme_end,
7836 entry->protection,
7837 entry->max_protection);
7838 }
7839 /*
7840 * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7841 */
7842 DTRACE_VM5(vm_map_delete_permanent,
7843 vm_map_offset_t, entry->vme_start,
7844 vm_map_offset_t, entry->vme_end,
7845 vm_prot_t, entry->protection,
7846 vm_prot_t, entry->max_protection,
7847 int, VME_ALIAS(entry));
7848 }
7849 }
7850
7851
7852 if (entry->in_transition) {
7853 wait_result_t wait_result;
7854
7855 /*
7856 * Another thread is wiring/unwiring this entry.
7857 * Let the other thread know we are waiting.
7858 */
7859 assert(s == entry->vme_start);
7860 entry->needs_wakeup = TRUE;
7861
7862 /*
7863 * wake up anybody waiting on entries that we have
7864 * already unwired/deleted.
7865 */
7866 if (need_wakeup) {
7867 vm_map_entry_wakeup(map);
7868 need_wakeup = FALSE;
7869 }
7870
7871 wait_result = vm_map_entry_wait(map, interruptible);
7872
7873 if (interruptible &&
7874 wait_result == THREAD_INTERRUPTED) {
7875 /*
7876 * We do not clear the needs_wakeup flag,
7877 * since we cannot tell if we were the only one.
7878 */
7879 return KERN_ABORTED;
7880 }
7881
7882 /*
7883 * The entry could have been clipped or it
7884 * may not exist anymore. Look it up again.
7885 */
7886 if (!vm_map_lookup_entry(map, s, &first_entry)) {
7887 /*
7888 * User: use the next entry
7889 */
7890 if (gap_start == FIND_GAP) {
7891 gap_start = s;
7892 }
7893 entry = first_entry->vme_next;
7894 s = entry->vme_start;
7895 } else {
7896 entry = first_entry;
7897 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7898 }
7899 last_timestamp = map->timestamp;
7900 continue;
7901 } /* end in_transition */
7902
7903 if (entry->wired_count) {
7904 boolean_t user_wire;
7905
7906 user_wire = entry->user_wired_count > 0;
7907
7908 /*
7909 * Remove a kernel wiring if requested
7910 */
7911 if (flags & VM_MAP_REMOVE_KUNWIRE) {
7912 entry->wired_count--;
7913 }
7914
7915 /*
7916 * Remove all user wirings for proper accounting
7917 */
7918 if (entry->user_wired_count > 0) {
7919 while (entry->user_wired_count) {
7920 subtract_wire_counts(map, entry, user_wire);
7921 }
7922 }
7923
7924 if (entry->wired_count != 0) {
7925 assert(map != kernel_map);
7926 /*
7927 * Cannot continue. Typical case is when
7928 * a user thread has physical io pending on
7929 * on this page. Either wait for the
7930 * kernel wiring to go away or return an
7931 * error.
7932 */
7933 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7934 wait_result_t wait_result;
7935
7936 assert(s == entry->vme_start);
7937 entry->needs_wakeup = TRUE;
7938 wait_result = vm_map_entry_wait(map,
7939 interruptible);
7940
7941 if (interruptible &&
7942 wait_result == THREAD_INTERRUPTED) {
7943 /*
7944 * We do not clear the
7945 * needs_wakeup flag, since we
7946 * cannot tell if we were the
7947 * only one.
7948 */
7949 return KERN_ABORTED;
7950 }
7951
7952 /*
7953 * The entry could have been clipped or
7954 * it may not exist anymore. Look it
7955 * up again.
7956 */
7957 if (!vm_map_lookup_entry(map, s,
7958 &first_entry)) {
7959 assert(map != kernel_map);
7960 /*
7961 * User: use the next entry
7962 */
7963 if (gap_start == FIND_GAP) {
7964 gap_start = s;
7965 }
7966 entry = first_entry->vme_next;
7967 s = entry->vme_start;
7968 } else {
7969 entry = first_entry;
7970 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7971 }
7972 last_timestamp = map->timestamp;
7973 continue;
7974 } else {
7975 return KERN_FAILURE;
7976 }
7977 }
7978
7979 entry->in_transition = TRUE;
7980 /*
7981 * copy current entry. see comment in vm_map_wire()
7982 */
7983 tmp_entry = *entry;
7984 assert(s == entry->vme_start);
7985
7986 /*
7987 * We can unlock the map now. The in_transition
7988 * state guarentees existance of the entry.
7989 */
7990 vm_map_unlock(map);
7991
7992 if (tmp_entry.is_sub_map) {
7993 vm_map_t sub_map;
7994 vm_map_offset_t sub_start, sub_end;
7995 pmap_t pmap;
7996 vm_map_offset_t pmap_addr;
7997
7998
7999 sub_map = VME_SUBMAP(&tmp_entry);
8000 sub_start = VME_OFFSET(&tmp_entry);
8001 sub_end = sub_start + (tmp_entry.vme_end -
8002 tmp_entry.vme_start);
8003 if (tmp_entry.use_pmap) {
8004 pmap = sub_map->pmap;
8005 pmap_addr = tmp_entry.vme_start;
8006 } else {
8007 pmap = map->pmap;
8008 pmap_addr = tmp_entry.vme_start;
8009 }
8010 (void) vm_map_unwire_nested(sub_map,
8011 sub_start, sub_end,
8012 user_wire,
8013 pmap, pmap_addr);
8014 } else {
8015 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8016 pmap_protect_options(
8017 map->pmap,
8018 tmp_entry.vme_start,
8019 tmp_entry.vme_end,
8020 VM_PROT_NONE,
8021 PMAP_OPTIONS_REMOVE,
8022 NULL);
8023 }
8024 vm_fault_unwire(map, &tmp_entry,
8025 VME_OBJECT(&tmp_entry) == kernel_object,
8026 map->pmap, tmp_entry.vme_start);
8027 }
8028
8029 vm_map_lock(map);
8030
8031 if (last_timestamp + 1 != map->timestamp) {
8032 /*
8033 * Find the entry again. It could have
8034 * been clipped after we unlocked the map.
8035 */
8036 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8037 assert((map != kernel_map) &&
8038 (!entry->is_sub_map));
8039 if (gap_start == FIND_GAP) {
8040 gap_start = s;
8041 }
8042 first_entry = first_entry->vme_next;
8043 s = first_entry->vme_start;
8044 } else {
8045 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8046 }
8047 } else {
8048 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8049 first_entry = entry;
8050 }
8051
8052 last_timestamp = map->timestamp;
8053
8054 entry = first_entry;
8055 while ((entry != vm_map_to_entry(map)) &&
8056 (entry->vme_start < tmp_entry.vme_end)) {
8057 assert(entry->in_transition);
8058 entry->in_transition = FALSE;
8059 if (entry->needs_wakeup) {
8060 entry->needs_wakeup = FALSE;
8061 need_wakeup = TRUE;
8062 }
8063 entry = entry->vme_next;
8064 }
8065 /*
8066 * We have unwired the entry(s). Go back and
8067 * delete them.
8068 */
8069 entry = first_entry;
8070 continue;
8071 }
8072
8073 /* entry is unwired */
8074 assert(entry->wired_count == 0);
8075 assert(entry->user_wired_count == 0);
8076
8077 assert(s == entry->vme_start);
8078
8079 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8080 /*
8081 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8082 * vm_map_delete(), some map entries might have been
8083 * transferred to a "zap_map", which doesn't have a
8084 * pmap. The original pmap has already been flushed
8085 * in the vm_map_delete() call targeting the original
8086 * map, but when we get to destroying the "zap_map",
8087 * we don't have any pmap to flush, so let's just skip
8088 * all this.
8089 */
8090 } else if (entry->is_sub_map) {
8091 if (entry->use_pmap) {
8092 #ifndef NO_NESTED_PMAP
8093 int pmap_flags;
8094
8095 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8096 /*
8097 * This is the final cleanup of the
8098 * address space being terminated.
8099 * No new mappings are expected and
8100 * we don't really need to unnest the
8101 * shared region (and lose the "global"
8102 * pmap mappings, if applicable).
8103 *
8104 * Tell the pmap layer that we're
8105 * "clean" wrt nesting.
8106 */
8107 pmap_flags = PMAP_UNNEST_CLEAN;
8108 } else {
8109 /*
8110 * We're unmapping part of the nested
8111 * shared region, so we can't keep the
8112 * nested pmap.
8113 */
8114 pmap_flags = 0;
8115 }
8116 pmap_unnest_options(
8117 map->pmap,
8118 (addr64_t)entry->vme_start,
8119 entry->vme_end - entry->vme_start,
8120 pmap_flags);
8121 #endif /* NO_NESTED_PMAP */
8122 if (map->mapped_in_other_pmaps &&
8123 os_ref_get_count(&map->map_refcnt) != 0) {
8124 /* clean up parent map/maps */
8125 vm_map_submap_pmap_clean(
8126 map, entry->vme_start,
8127 entry->vme_end,
8128 VME_SUBMAP(entry),
8129 VME_OFFSET(entry));
8130 }
8131 } else {
8132 vm_map_submap_pmap_clean(
8133 map, entry->vme_start, entry->vme_end,
8134 VME_SUBMAP(entry),
8135 VME_OFFSET(entry));
8136 }
8137 } else if (VME_OBJECT(entry) != kernel_object &&
8138 VME_OBJECT(entry) != compressor_object) {
8139 object = VME_OBJECT(entry);
8140 if (map->mapped_in_other_pmaps &&
8141 os_ref_get_count(&map->map_refcnt) != 0) {
8142 vm_object_pmap_protect_options(
8143 object, VME_OFFSET(entry),
8144 entry->vme_end - entry->vme_start,
8145 PMAP_NULL,
8146 entry->vme_start,
8147 VM_PROT_NONE,
8148 PMAP_OPTIONS_REMOVE);
8149 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8150 (map->pmap == kernel_pmap)) {
8151 /* Remove translations associated
8152 * with this range unless the entry
8153 * does not have an object, or
8154 * it's the kernel map or a descendant
8155 * since the platform could potentially
8156 * create "backdoor" mappings invisible
8157 * to the VM. It is expected that
8158 * objectless, non-kernel ranges
8159 * do not have such VM invisible
8160 * translations.
8161 */
8162 pmap_remove_options(map->pmap,
8163 (addr64_t)entry->vme_start,
8164 (addr64_t)entry->vme_end,
8165 PMAP_OPTIONS_REMOVE);
8166 }
8167 }
8168
8169 if (entry->iokit_acct) {
8170 /* alternate accounting */
8171 DTRACE_VM4(vm_map_iokit_unmapped_region,
8172 vm_map_t, map,
8173 vm_map_offset_t, entry->vme_start,
8174 vm_map_offset_t, entry->vme_end,
8175 int, VME_ALIAS(entry));
8176 vm_map_iokit_unmapped_region(map,
8177 (entry->vme_end -
8178 entry->vme_start));
8179 entry->iokit_acct = FALSE;
8180 entry->use_pmap = FALSE;
8181 }
8182
8183 /*
8184 * All pmap mappings for this map entry must have been
8185 * cleared by now.
8186 */
8187 #if DEBUG
8188 assert(vm_map_pmap_is_empty(map,
8189 entry->vme_start,
8190 entry->vme_end));
8191 #endif /* DEBUG */
8192
8193 next = entry->vme_next;
8194
8195 if (map->pmap == kernel_pmap &&
8196 os_ref_get_count(&map->map_refcnt) != 0 &&
8197 entry->vme_end < end &&
8198 (next == vm_map_to_entry(map) ||
8199 next->vme_start != entry->vme_end)) {
8200 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8201 "hole after %p at 0x%llx\n",
8202 map,
8203 (uint64_t)start,
8204 (uint64_t)end,
8205 entry,
8206 (uint64_t)entry->vme_end);
8207 }
8208
8209 /*
8210 * If the desired range didn't end with "entry", then there is a gap if
8211 * we wrapped around to the start of the map or if "entry" and "next"
8212 * aren't contiguous.
8213 *
8214 * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8215 * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8216 */
8217 if (gap_start == FIND_GAP &&
8218 vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8219 (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8220 gap_start = entry->vme_end;
8221 }
8222 s = next->vme_start;
8223 last_timestamp = map->timestamp;
8224
8225 if (entry->permanent) {
8226 /*
8227 * A permanent entry can not be removed, so leave it
8228 * in place but remove all access permissions.
8229 */
8230 entry->protection = VM_PROT_NONE;
8231 entry->max_protection = VM_PROT_NONE;
8232 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8233 zap_map != VM_MAP_NULL) {
8234 vm_map_size_t entry_size;
8235 /*
8236 * The caller wants to save the affected VM map entries
8237 * into the "zap_map". The caller will take care of
8238 * these entries.
8239 */
8240 /* unlink the entry from "map" ... */
8241 vm_map_store_entry_unlink(map, entry);
8242 /* ... and add it to the end of the "zap_map" */
8243 vm_map_store_entry_link(zap_map,
8244 vm_map_last_entry(zap_map),
8245 entry,
8246 VM_MAP_KERNEL_FLAGS_NONE);
8247 entry_size = entry->vme_end - entry->vme_start;
8248 map->size -= entry_size;
8249 zap_map->size += entry_size;
8250 /* we didn't unlock the map, so no timestamp increase */
8251 last_timestamp--;
8252 } else {
8253 vm_map_entry_delete(map, entry);
8254 /* vm_map_entry_delete unlocks the map */
8255 vm_map_lock(map);
8256 }
8257
8258 entry = next;
8259
8260 if (entry == vm_map_to_entry(map)) {
8261 break;
8262 }
8263 if (last_timestamp + 1 != map->timestamp) {
8264 /*
8265 * We are responsible for deleting everything
8266 * from the given space. If someone has interfered,
8267 * we pick up where we left off. Back fills should
8268 * be all right for anyone, except map_delete, and
8269 * we have to assume that the task has been fully
8270 * disabled before we get here
8271 */
8272 if (!vm_map_lookup_entry(map, s, &entry)) {
8273 entry = entry->vme_next;
8274
8275 /*
8276 * Nothing found for s. If we weren't already done, then there is a gap.
8277 */
8278 if (gap_start == FIND_GAP && s < end) {
8279 gap_start = s;
8280 }
8281 s = entry->vme_start;
8282 } else {
8283 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8284 }
8285 /*
8286 * others can not only allocate behind us, we can
8287 * also see coalesce while we don't have the map lock
8288 */
8289 if (entry == vm_map_to_entry(map)) {
8290 break;
8291 }
8292 }
8293 last_timestamp = map->timestamp;
8294 }
8295
8296 if (map->wait_for_space) {
8297 thread_wakeup((event_t) map);
8298 }
8299 /*
8300 * wake up anybody waiting on entries that we have already deleted.
8301 */
8302 if (need_wakeup) {
8303 vm_map_entry_wakeup(map);
8304 }
8305
8306 if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8307 DTRACE_VM3(kern_vm_deallocate_gap,
8308 vm_map_offset_t, gap_start,
8309 vm_map_offset_t, save_start,
8310 vm_map_offset_t, save_end);
8311 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8312 vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8313 }
8314 }
8315
8316 return KERN_SUCCESS;
8317 }
8318
8319 /*
8320 * vm_map_remove:
8321 *
8322 * Remove the given address range from the target map.
8323 * This is the exported form of vm_map_delete.
8324 */
8325 kern_return_t
8326 vm_map_remove(
8327 vm_map_t map,
8328 vm_map_offset_t start,
8329 vm_map_offset_t end,
8330 boolean_t flags)
8331 {
8332 kern_return_t result;
8333
8334 vm_map_lock(map);
8335 VM_MAP_RANGE_CHECK(map, start, end);
8336 /*
8337 * For the zone_map, the kernel controls the allocation/freeing of memory.
8338 * Any free to the zone_map should be within the bounds of the map and
8339 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8340 * free to the zone_map into a no-op, there is a problem and we should
8341 * panic.
8342 */
8343 if ((map == zone_map) && (start == end)) {
8344 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
8345 }
8346 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8347 vm_map_unlock(map);
8348
8349 return result;
8350 }
8351
8352 /*
8353 * vm_map_remove_locked:
8354 *
8355 * Remove the given address range from the target locked map.
8356 * This is the exported form of vm_map_delete.
8357 */
8358 kern_return_t
8359 vm_map_remove_locked(
8360 vm_map_t map,
8361 vm_map_offset_t start,
8362 vm_map_offset_t end,
8363 boolean_t flags)
8364 {
8365 kern_return_t result;
8366
8367 VM_MAP_RANGE_CHECK(map, start, end);
8368 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8369 return result;
8370 }
8371
8372
8373 /*
8374 * Routine: vm_map_copy_allocate
8375 *
8376 * Description:
8377 * Allocates and initializes a map copy object.
8378 */
8379 static vm_map_copy_t
8380 vm_map_copy_allocate(void)
8381 {
8382 vm_map_copy_t new_copy;
8383
8384 new_copy = zalloc(vm_map_copy_zone);
8385 bzero(new_copy, sizeof(*new_copy));
8386 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8387 vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8388 vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8389 return new_copy;
8390 }
8391
8392 /*
8393 * Routine: vm_map_copy_discard
8394 *
8395 * Description:
8396 * Dispose of a map copy object (returned by
8397 * vm_map_copyin).
8398 */
8399 void
8400 vm_map_copy_discard(
8401 vm_map_copy_t copy)
8402 {
8403 if (copy == VM_MAP_COPY_NULL) {
8404 return;
8405 }
8406
8407 switch (copy->type) {
8408 case VM_MAP_COPY_ENTRY_LIST:
8409 while (vm_map_copy_first_entry(copy) !=
8410 vm_map_copy_to_entry(copy)) {
8411 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
8412
8413 vm_map_copy_entry_unlink(copy, entry);
8414 if (entry->is_sub_map) {
8415 vm_map_deallocate(VME_SUBMAP(entry));
8416 } else {
8417 vm_object_deallocate(VME_OBJECT(entry));
8418 }
8419 vm_map_copy_entry_dispose(copy, entry);
8420 }
8421 break;
8422 case VM_MAP_COPY_OBJECT:
8423 vm_object_deallocate(copy->cpy_object);
8424 break;
8425 case VM_MAP_COPY_KERNEL_BUFFER:
8426
8427 /*
8428 * The vm_map_copy_t and possibly the data buffer were
8429 * allocated by a single call to kalloc(), i.e. the
8430 * vm_map_copy_t was not allocated out of the zone.
8431 */
8432 if (copy->size > msg_ool_size_small || copy->offset) {
8433 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8434 (long long)copy->size, (long long)copy->offset);
8435 }
8436 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8437 return;
8438 }
8439 zfree(vm_map_copy_zone, copy);
8440 }
8441
8442 /*
8443 * Routine: vm_map_copy_copy
8444 *
8445 * Description:
8446 * Move the information in a map copy object to
8447 * a new map copy object, leaving the old one
8448 * empty.
8449 *
8450 * This is used by kernel routines that need
8451 * to look at out-of-line data (in copyin form)
8452 * before deciding whether to return SUCCESS.
8453 * If the routine returns FAILURE, the original
8454 * copy object will be deallocated; therefore,
8455 * these routines must make a copy of the copy
8456 * object and leave the original empty so that
8457 * deallocation will not fail.
8458 */
8459 vm_map_copy_t
8460 vm_map_copy_copy(
8461 vm_map_copy_t copy)
8462 {
8463 vm_map_copy_t new_copy;
8464
8465 if (copy == VM_MAP_COPY_NULL) {
8466 return VM_MAP_COPY_NULL;
8467 }
8468
8469 /*
8470 * Allocate a new copy object, and copy the information
8471 * from the old one into it.
8472 */
8473
8474 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8475 *new_copy = *copy;
8476
8477 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8478 /*
8479 * The links in the entry chain must be
8480 * changed to point to the new copy object.
8481 */
8482 vm_map_copy_first_entry(copy)->vme_prev
8483 = vm_map_copy_to_entry(new_copy);
8484 vm_map_copy_last_entry(copy)->vme_next
8485 = vm_map_copy_to_entry(new_copy);
8486 }
8487
8488 /*
8489 * Change the old copy object into one that contains
8490 * nothing to be deallocated.
8491 */
8492 copy->type = VM_MAP_COPY_OBJECT;
8493 copy->cpy_object = VM_OBJECT_NULL;
8494
8495 /*
8496 * Return the new object.
8497 */
8498 return new_copy;
8499 }
8500
8501 static kern_return_t
8502 vm_map_overwrite_submap_recurse(
8503 vm_map_t dst_map,
8504 vm_map_offset_t dst_addr,
8505 vm_map_size_t dst_size)
8506 {
8507 vm_map_offset_t dst_end;
8508 vm_map_entry_t tmp_entry;
8509 vm_map_entry_t entry;
8510 kern_return_t result;
8511 boolean_t encountered_sub_map = FALSE;
8512
8513
8514
8515 /*
8516 * Verify that the destination is all writeable
8517 * initially. We have to trunc the destination
8518 * address and round the copy size or we'll end up
8519 * splitting entries in strange ways.
8520 */
8521
8522 dst_end = vm_map_round_page(dst_addr + dst_size,
8523 VM_MAP_PAGE_MASK(dst_map));
8524 vm_map_lock(dst_map);
8525
8526 start_pass_1:
8527 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8528 vm_map_unlock(dst_map);
8529 return KERN_INVALID_ADDRESS;
8530 }
8531
8532 vm_map_clip_start(dst_map,
8533 tmp_entry,
8534 vm_map_trunc_page(dst_addr,
8535 VM_MAP_PAGE_MASK(dst_map)));
8536 if (tmp_entry->is_sub_map) {
8537 /* clipping did unnest if needed */
8538 assert(!tmp_entry->use_pmap);
8539 }
8540
8541 for (entry = tmp_entry;;) {
8542 vm_map_entry_t next;
8543
8544 next = entry->vme_next;
8545 while (entry->is_sub_map) {
8546 vm_map_offset_t sub_start;
8547 vm_map_offset_t sub_end;
8548 vm_map_offset_t local_end;
8549
8550 if (entry->in_transition) {
8551 /*
8552 * Say that we are waiting, and wait for entry.
8553 */
8554 entry->needs_wakeup = TRUE;
8555 vm_map_entry_wait(dst_map, THREAD_UNINT);
8556
8557 goto start_pass_1;
8558 }
8559
8560 encountered_sub_map = TRUE;
8561 sub_start = VME_OFFSET(entry);
8562
8563 if (entry->vme_end < dst_end) {
8564 sub_end = entry->vme_end;
8565 } else {
8566 sub_end = dst_end;
8567 }
8568 sub_end -= entry->vme_start;
8569 sub_end += VME_OFFSET(entry);
8570 local_end = entry->vme_end;
8571 vm_map_unlock(dst_map);
8572
8573 result = vm_map_overwrite_submap_recurse(
8574 VME_SUBMAP(entry),
8575 sub_start,
8576 sub_end - sub_start);
8577
8578 if (result != KERN_SUCCESS) {
8579 return result;
8580 }
8581 if (dst_end <= entry->vme_end) {
8582 return KERN_SUCCESS;
8583 }
8584 vm_map_lock(dst_map);
8585 if (!vm_map_lookup_entry(dst_map, local_end,
8586 &tmp_entry)) {
8587 vm_map_unlock(dst_map);
8588 return KERN_INVALID_ADDRESS;
8589 }
8590 entry = tmp_entry;
8591 next = entry->vme_next;
8592 }
8593
8594 if (!(entry->protection & VM_PROT_WRITE)) {
8595 vm_map_unlock(dst_map);
8596 return KERN_PROTECTION_FAILURE;
8597 }
8598
8599 /*
8600 * If the entry is in transition, we must wait
8601 * for it to exit that state. Anything could happen
8602 * when we unlock the map, so start over.
8603 */
8604 if (entry->in_transition) {
8605 /*
8606 * Say that we are waiting, and wait for entry.
8607 */
8608 entry->needs_wakeup = TRUE;
8609 vm_map_entry_wait(dst_map, THREAD_UNINT);
8610
8611 goto start_pass_1;
8612 }
8613
8614 /*
8615 * our range is contained completely within this map entry
8616 */
8617 if (dst_end <= entry->vme_end) {
8618 vm_map_unlock(dst_map);
8619 return KERN_SUCCESS;
8620 }
8621 /*
8622 * check that range specified is contiguous region
8623 */
8624 if ((next == vm_map_to_entry(dst_map)) ||
8625 (next->vme_start != entry->vme_end)) {
8626 vm_map_unlock(dst_map);
8627 return KERN_INVALID_ADDRESS;
8628 }
8629
8630 /*
8631 * Check for permanent objects in the destination.
8632 */
8633 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8634 ((!VME_OBJECT(entry)->internal) ||
8635 (VME_OBJECT(entry)->true_share))) {
8636 if (encountered_sub_map) {
8637 vm_map_unlock(dst_map);
8638 return KERN_FAILURE;
8639 }
8640 }
8641
8642
8643 entry = next;
8644 }/* for */
8645 vm_map_unlock(dst_map);
8646 return KERN_SUCCESS;
8647 }
8648
8649 /*
8650 * Routine: vm_map_copy_overwrite
8651 *
8652 * Description:
8653 * Copy the memory described by the map copy
8654 * object (copy; returned by vm_map_copyin) onto
8655 * the specified destination region (dst_map, dst_addr).
8656 * The destination must be writeable.
8657 *
8658 * Unlike vm_map_copyout, this routine actually
8659 * writes over previously-mapped memory. If the
8660 * previous mapping was to a permanent (user-supplied)
8661 * memory object, it is preserved.
8662 *
8663 * The attributes (protection and inheritance) of the
8664 * destination region are preserved.
8665 *
8666 * If successful, consumes the copy object.
8667 * Otherwise, the caller is responsible for it.
8668 *
8669 * Implementation notes:
8670 * To overwrite aligned temporary virtual memory, it is
8671 * sufficient to remove the previous mapping and insert
8672 * the new copy. This replacement is done either on
8673 * the whole region (if no permanent virtual memory
8674 * objects are embedded in the destination region) or
8675 * in individual map entries.
8676 *
8677 * To overwrite permanent virtual memory , it is necessary
8678 * to copy each page, as the external memory management
8679 * interface currently does not provide any optimizations.
8680 *
8681 * Unaligned memory also has to be copied. It is possible
8682 * to use 'vm_trickery' to copy the aligned data. This is
8683 * not done but not hard to implement.
8684 *
8685 * Once a page of permanent memory has been overwritten,
8686 * it is impossible to interrupt this function; otherwise,
8687 * the call would be neither atomic nor location-independent.
8688 * The kernel-state portion of a user thread must be
8689 * interruptible.
8690 *
8691 * It may be expensive to forward all requests that might
8692 * overwrite permanent memory (vm_write, vm_copy) to
8693 * uninterruptible kernel threads. This routine may be
8694 * called by interruptible threads; however, success is
8695 * not guaranteed -- if the request cannot be performed
8696 * atomically and interruptibly, an error indication is
8697 * returned.
8698 */
8699
8700 static kern_return_t
8701 vm_map_copy_overwrite_nested(
8702 vm_map_t dst_map,
8703 vm_map_address_t dst_addr,
8704 vm_map_copy_t copy,
8705 boolean_t interruptible,
8706 pmap_t pmap,
8707 boolean_t discard_on_success)
8708 {
8709 vm_map_offset_t dst_end;
8710 vm_map_entry_t tmp_entry;
8711 vm_map_entry_t entry;
8712 kern_return_t kr;
8713 boolean_t aligned = TRUE;
8714 boolean_t contains_permanent_objects = FALSE;
8715 boolean_t encountered_sub_map = FALSE;
8716 vm_map_offset_t base_addr;
8717 vm_map_size_t copy_size;
8718 vm_map_size_t total_size;
8719
8720
8721 /*
8722 * Check for null copy object.
8723 */
8724
8725 if (copy == VM_MAP_COPY_NULL) {
8726 return KERN_SUCCESS;
8727 }
8728
8729 /*
8730 * Check for special kernel buffer allocated
8731 * by new_ipc_kmsg_copyin.
8732 */
8733
8734 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8735 return vm_map_copyout_kernel_buffer(
8736 dst_map, &dst_addr,
8737 copy, copy->size, TRUE, discard_on_success);
8738 }
8739
8740 /*
8741 * Only works for entry lists at the moment. Will
8742 * support page lists later.
8743 */
8744
8745 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8746
8747 if (copy->size == 0) {
8748 if (discard_on_success) {
8749 vm_map_copy_discard(copy);
8750 }
8751 return KERN_SUCCESS;
8752 }
8753
8754 /*
8755 * Verify that the destination is all writeable
8756 * initially. We have to trunc the destination
8757 * address and round the copy size or we'll end up
8758 * splitting entries in strange ways.
8759 */
8760
8761 if (!VM_MAP_PAGE_ALIGNED(copy->size,
8762 VM_MAP_PAGE_MASK(dst_map)) ||
8763 !VM_MAP_PAGE_ALIGNED(copy->offset,
8764 VM_MAP_PAGE_MASK(dst_map)) ||
8765 !VM_MAP_PAGE_ALIGNED(dst_addr,
8766 VM_MAP_PAGE_MASK(dst_map))) {
8767 aligned = FALSE;
8768 dst_end = vm_map_round_page(dst_addr + copy->size,
8769 VM_MAP_PAGE_MASK(dst_map));
8770 } else {
8771 dst_end = dst_addr + copy->size;
8772 }
8773
8774 vm_map_lock(dst_map);
8775
8776 /* LP64todo - remove this check when vm_map_commpage64()
8777 * no longer has to stuff in a map_entry for the commpage
8778 * above the map's max_offset.
8779 */
8780 if (dst_addr >= dst_map->max_offset) {
8781 vm_map_unlock(dst_map);
8782 return KERN_INVALID_ADDRESS;
8783 }
8784
8785 start_pass_1:
8786 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8787 vm_map_unlock(dst_map);
8788 return KERN_INVALID_ADDRESS;
8789 }
8790 vm_map_clip_start(dst_map,
8791 tmp_entry,
8792 vm_map_trunc_page(dst_addr,
8793 VM_MAP_PAGE_MASK(dst_map)));
8794 for (entry = tmp_entry;;) {
8795 vm_map_entry_t next = entry->vme_next;
8796
8797 while (entry->is_sub_map) {
8798 vm_map_offset_t sub_start;
8799 vm_map_offset_t sub_end;
8800 vm_map_offset_t local_end;
8801
8802 if (entry->in_transition) {
8803 /*
8804 * Say that we are waiting, and wait for entry.
8805 */
8806 entry->needs_wakeup = TRUE;
8807 vm_map_entry_wait(dst_map, THREAD_UNINT);
8808
8809 goto start_pass_1;
8810 }
8811
8812 local_end = entry->vme_end;
8813 if (!(entry->needs_copy)) {
8814 /* if needs_copy we are a COW submap */
8815 /* in such a case we just replace so */
8816 /* there is no need for the follow- */
8817 /* ing check. */
8818 encountered_sub_map = TRUE;
8819 sub_start = VME_OFFSET(entry);
8820
8821 if (entry->vme_end < dst_end) {
8822 sub_end = entry->vme_end;
8823 } else {
8824 sub_end = dst_end;
8825 }
8826 sub_end -= entry->vme_start;
8827 sub_end += VME_OFFSET(entry);
8828 vm_map_unlock(dst_map);
8829
8830 kr = vm_map_overwrite_submap_recurse(
8831 VME_SUBMAP(entry),
8832 sub_start,
8833 sub_end - sub_start);
8834 if (kr != KERN_SUCCESS) {
8835 return kr;
8836 }
8837 vm_map_lock(dst_map);
8838 }
8839
8840 if (dst_end <= entry->vme_end) {
8841 goto start_overwrite;
8842 }
8843 if (!vm_map_lookup_entry(dst_map, local_end,
8844 &entry)) {
8845 vm_map_unlock(dst_map);
8846 return KERN_INVALID_ADDRESS;
8847 }
8848 next = entry->vme_next;
8849 }
8850
8851 if (!(entry->protection & VM_PROT_WRITE)) {
8852 vm_map_unlock(dst_map);
8853 return KERN_PROTECTION_FAILURE;
8854 }
8855
8856 /*
8857 * If the entry is in transition, we must wait
8858 * for it to exit that state. Anything could happen
8859 * when we unlock the map, so start over.
8860 */
8861 if (entry->in_transition) {
8862 /*
8863 * Say that we are waiting, and wait for entry.
8864 */
8865 entry->needs_wakeup = TRUE;
8866 vm_map_entry_wait(dst_map, THREAD_UNINT);
8867
8868 goto start_pass_1;
8869 }
8870
8871 /*
8872 * our range is contained completely within this map entry
8873 */
8874 if (dst_end <= entry->vme_end) {
8875 break;
8876 }
8877 /*
8878 * check that range specified is contiguous region
8879 */
8880 if ((next == vm_map_to_entry(dst_map)) ||
8881 (next->vme_start != entry->vme_end)) {
8882 vm_map_unlock(dst_map);
8883 return KERN_INVALID_ADDRESS;
8884 }
8885
8886
8887 /*
8888 * Check for permanent objects in the destination.
8889 */
8890 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8891 ((!VME_OBJECT(entry)->internal) ||
8892 (VME_OBJECT(entry)->true_share))) {
8893 contains_permanent_objects = TRUE;
8894 }
8895
8896 entry = next;
8897 }/* for */
8898
8899 start_overwrite:
8900 /*
8901 * If there are permanent objects in the destination, then
8902 * the copy cannot be interrupted.
8903 */
8904
8905 if (interruptible && contains_permanent_objects) {
8906 vm_map_unlock(dst_map);
8907 return KERN_FAILURE; /* XXX */
8908 }
8909
8910 /*
8911 *
8912 * Make a second pass, overwriting the data
8913 * At the beginning of each loop iteration,
8914 * the next entry to be overwritten is "tmp_entry"
8915 * (initially, the value returned from the lookup above),
8916 * and the starting address expected in that entry
8917 * is "start".
8918 */
8919
8920 total_size = copy->size;
8921 if (encountered_sub_map) {
8922 copy_size = 0;
8923 /* re-calculate tmp_entry since we've had the map */
8924 /* unlocked */
8925 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8926 vm_map_unlock(dst_map);
8927 return KERN_INVALID_ADDRESS;
8928 }
8929 } else {
8930 copy_size = copy->size;
8931 }
8932
8933 base_addr = dst_addr;
8934 while (TRUE) {
8935 /* deconstruct the copy object and do in parts */
8936 /* only in sub_map, interruptable case */
8937 vm_map_entry_t copy_entry;
8938 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
8939 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
8940 int nentries;
8941 int remaining_entries = 0;
8942 vm_map_offset_t new_offset = 0;
8943
8944 for (entry = tmp_entry; copy_size == 0;) {
8945 vm_map_entry_t next;
8946
8947 next = entry->vme_next;
8948
8949 /* tmp_entry and base address are moved along */
8950 /* each time we encounter a sub-map. Otherwise */
8951 /* entry can outpase tmp_entry, and the copy_size */
8952 /* may reflect the distance between them */
8953 /* if the current entry is found to be in transition */
8954 /* we will start over at the beginning or the last */
8955 /* encounter of a submap as dictated by base_addr */
8956 /* we will zero copy_size accordingly. */
8957 if (entry->in_transition) {
8958 /*
8959 * Say that we are waiting, and wait for entry.
8960 */
8961 entry->needs_wakeup = TRUE;
8962 vm_map_entry_wait(dst_map, THREAD_UNINT);
8963
8964 if (!vm_map_lookup_entry(dst_map, base_addr,
8965 &tmp_entry)) {
8966 vm_map_unlock(dst_map);
8967 return KERN_INVALID_ADDRESS;
8968 }
8969 copy_size = 0;
8970 entry = tmp_entry;
8971 continue;
8972 }
8973 if (entry->is_sub_map) {
8974 vm_map_offset_t sub_start;
8975 vm_map_offset_t sub_end;
8976 vm_map_offset_t local_end;
8977
8978 if (entry->needs_copy) {
8979 /* if this is a COW submap */
8980 /* just back the range with a */
8981 /* anonymous entry */
8982 if (entry->vme_end < dst_end) {
8983 sub_end = entry->vme_end;
8984 } else {
8985 sub_end = dst_end;
8986 }
8987 if (entry->vme_start < base_addr) {
8988 sub_start = base_addr;
8989 } else {
8990 sub_start = entry->vme_start;
8991 }
8992 vm_map_clip_end(
8993 dst_map, entry, sub_end);
8994 vm_map_clip_start(
8995 dst_map, entry, sub_start);
8996 assert(!entry->use_pmap);
8997 assert(!entry->iokit_acct);
8998 entry->use_pmap = TRUE;
8999 entry->is_sub_map = FALSE;
9000 vm_map_deallocate(
9001 VME_SUBMAP(entry));
9002 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9003 VME_OFFSET_SET(entry, 0);
9004 entry->is_shared = FALSE;
9005 entry->needs_copy = FALSE;
9006 entry->protection = VM_PROT_DEFAULT;
9007 entry->max_protection = VM_PROT_ALL;
9008 entry->wired_count = 0;
9009 entry->user_wired_count = 0;
9010 if (entry->inheritance
9011 == VM_INHERIT_SHARE) {
9012 entry->inheritance = VM_INHERIT_COPY;
9013 }
9014 continue;
9015 }
9016 /* first take care of any non-sub_map */
9017 /* entries to send */
9018 if (base_addr < entry->vme_start) {
9019 /* stuff to send */
9020 copy_size =
9021 entry->vme_start - base_addr;
9022 break;
9023 }
9024 sub_start = VME_OFFSET(entry);
9025
9026 if (entry->vme_end < dst_end) {
9027 sub_end = entry->vme_end;
9028 } else {
9029 sub_end = dst_end;
9030 }
9031 sub_end -= entry->vme_start;
9032 sub_end += VME_OFFSET(entry);
9033 local_end = entry->vme_end;
9034 vm_map_unlock(dst_map);
9035 copy_size = sub_end - sub_start;
9036
9037 /* adjust the copy object */
9038 if (total_size > copy_size) {
9039 vm_map_size_t local_size = 0;
9040 vm_map_size_t entry_size;
9041
9042 nentries = 1;
9043 new_offset = copy->offset;
9044 copy_entry = vm_map_copy_first_entry(copy);
9045 while (copy_entry !=
9046 vm_map_copy_to_entry(copy)) {
9047 entry_size = copy_entry->vme_end -
9048 copy_entry->vme_start;
9049 if ((local_size < copy_size) &&
9050 ((local_size + entry_size)
9051 >= copy_size)) {
9052 vm_map_copy_clip_end(copy,
9053 copy_entry,
9054 copy_entry->vme_start +
9055 (copy_size - local_size));
9056 entry_size = copy_entry->vme_end -
9057 copy_entry->vme_start;
9058 local_size += entry_size;
9059 new_offset += entry_size;
9060 }
9061 if (local_size >= copy_size) {
9062 next_copy = copy_entry->vme_next;
9063 copy_entry->vme_next =
9064 vm_map_copy_to_entry(copy);
9065 previous_prev =
9066 copy->cpy_hdr.links.prev;
9067 copy->cpy_hdr.links.prev = copy_entry;
9068 copy->size = copy_size;
9069 remaining_entries =
9070 copy->cpy_hdr.nentries;
9071 remaining_entries -= nentries;
9072 copy->cpy_hdr.nentries = nentries;
9073 break;
9074 } else {
9075 local_size += entry_size;
9076 new_offset += entry_size;
9077 nentries++;
9078 }
9079 copy_entry = copy_entry->vme_next;
9080 }
9081 }
9082
9083 if ((entry->use_pmap) && (pmap == NULL)) {
9084 kr = vm_map_copy_overwrite_nested(
9085 VME_SUBMAP(entry),
9086 sub_start,
9087 copy,
9088 interruptible,
9089 VME_SUBMAP(entry)->pmap,
9090 TRUE);
9091 } else if (pmap != NULL) {
9092 kr = vm_map_copy_overwrite_nested(
9093 VME_SUBMAP(entry),
9094 sub_start,
9095 copy,
9096 interruptible, pmap,
9097 TRUE);
9098 } else {
9099 kr = vm_map_copy_overwrite_nested(
9100 VME_SUBMAP(entry),
9101 sub_start,
9102 copy,
9103 interruptible,
9104 dst_map->pmap,
9105 TRUE);
9106 }
9107 if (kr != KERN_SUCCESS) {
9108 if (next_copy != NULL) {
9109 copy->cpy_hdr.nentries +=
9110 remaining_entries;
9111 copy->cpy_hdr.links.prev->vme_next =
9112 next_copy;
9113 copy->cpy_hdr.links.prev
9114 = previous_prev;
9115 copy->size = total_size;
9116 }
9117 return kr;
9118 }
9119 if (dst_end <= local_end) {
9120 return KERN_SUCCESS;
9121 }
9122 /* otherwise copy no longer exists, it was */
9123 /* destroyed after successful copy_overwrite */
9124 copy = vm_map_copy_allocate();
9125 copy->type = VM_MAP_COPY_ENTRY_LIST;
9126 copy->offset = new_offset;
9127
9128 /*
9129 * XXX FBDP
9130 * this does not seem to deal with
9131 * the VM map store (R&B tree)
9132 */
9133
9134 total_size -= copy_size;
9135 copy_size = 0;
9136 /* put back remainder of copy in container */
9137 if (next_copy != NULL) {
9138 copy->cpy_hdr.nentries = remaining_entries;
9139 copy->cpy_hdr.links.next = next_copy;
9140 copy->cpy_hdr.links.prev = previous_prev;
9141 copy->size = total_size;
9142 next_copy->vme_prev =
9143 vm_map_copy_to_entry(copy);
9144 next_copy = NULL;
9145 }
9146 base_addr = local_end;
9147 vm_map_lock(dst_map);
9148 if (!vm_map_lookup_entry(dst_map,
9149 local_end, &tmp_entry)) {
9150 vm_map_unlock(dst_map);
9151 return KERN_INVALID_ADDRESS;
9152 }
9153 entry = tmp_entry;
9154 continue;
9155 }
9156 if (dst_end <= entry->vme_end) {
9157 copy_size = dst_end - base_addr;
9158 break;
9159 }
9160
9161 if ((next == vm_map_to_entry(dst_map)) ||
9162 (next->vme_start != entry->vme_end)) {
9163 vm_map_unlock(dst_map);
9164 return KERN_INVALID_ADDRESS;
9165 }
9166
9167 entry = next;
9168 }/* for */
9169
9170 next_copy = NULL;
9171 nentries = 1;
9172
9173 /* adjust the copy object */
9174 if (total_size > copy_size) {
9175 vm_map_size_t local_size = 0;
9176 vm_map_size_t entry_size;
9177
9178 new_offset = copy->offset;
9179 copy_entry = vm_map_copy_first_entry(copy);
9180 while (copy_entry != vm_map_copy_to_entry(copy)) {
9181 entry_size = copy_entry->vme_end -
9182 copy_entry->vme_start;
9183 if ((local_size < copy_size) &&
9184 ((local_size + entry_size)
9185 >= copy_size)) {
9186 vm_map_copy_clip_end(copy, copy_entry,
9187 copy_entry->vme_start +
9188 (copy_size - local_size));
9189 entry_size = copy_entry->vme_end -
9190 copy_entry->vme_start;
9191 local_size += entry_size;
9192 new_offset += entry_size;
9193 }
9194 if (local_size >= copy_size) {
9195 next_copy = copy_entry->vme_next;
9196 copy_entry->vme_next =
9197 vm_map_copy_to_entry(copy);
9198 previous_prev =
9199 copy->cpy_hdr.links.prev;
9200 copy->cpy_hdr.links.prev = copy_entry;
9201 copy->size = copy_size;
9202 remaining_entries =
9203 copy->cpy_hdr.nentries;
9204 remaining_entries -= nentries;
9205 copy->cpy_hdr.nentries = nentries;
9206 break;
9207 } else {
9208 local_size += entry_size;
9209 new_offset += entry_size;
9210 nentries++;
9211 }
9212 copy_entry = copy_entry->vme_next;
9213 }
9214 }
9215
9216 if (aligned) {
9217 pmap_t local_pmap;
9218
9219 if (pmap) {
9220 local_pmap = pmap;
9221 } else {
9222 local_pmap = dst_map->pmap;
9223 }
9224
9225 if ((kr = vm_map_copy_overwrite_aligned(
9226 dst_map, tmp_entry, copy,
9227 base_addr, local_pmap)) != KERN_SUCCESS) {
9228 if (next_copy != NULL) {
9229 copy->cpy_hdr.nentries +=
9230 remaining_entries;
9231 copy->cpy_hdr.links.prev->vme_next =
9232 next_copy;
9233 copy->cpy_hdr.links.prev =
9234 previous_prev;
9235 copy->size += copy_size;
9236 }
9237 return kr;
9238 }
9239 vm_map_unlock(dst_map);
9240 } else {
9241 /*
9242 * Performance gain:
9243 *
9244 * if the copy and dst address are misaligned but the same
9245 * offset within the page we can copy_not_aligned the
9246 * misaligned parts and copy aligned the rest. If they are
9247 * aligned but len is unaligned we simply need to copy
9248 * the end bit unaligned. We'll need to split the misaligned
9249 * bits of the region in this case !
9250 */
9251 /* ALWAYS UNLOCKS THE dst_map MAP */
9252 kr = vm_map_copy_overwrite_unaligned(
9253 dst_map,
9254 tmp_entry,
9255 copy,
9256 base_addr,
9257 discard_on_success);
9258 if (kr != KERN_SUCCESS) {
9259 if (next_copy != NULL) {
9260 copy->cpy_hdr.nentries +=
9261 remaining_entries;
9262 copy->cpy_hdr.links.prev->vme_next =
9263 next_copy;
9264 copy->cpy_hdr.links.prev =
9265 previous_prev;
9266 copy->size += copy_size;
9267 }
9268 return kr;
9269 }
9270 }
9271 total_size -= copy_size;
9272 if (total_size == 0) {
9273 break;
9274 }
9275 base_addr += copy_size;
9276 copy_size = 0;
9277 copy->offset = new_offset;
9278 if (next_copy != NULL) {
9279 copy->cpy_hdr.nentries = remaining_entries;
9280 copy->cpy_hdr.links.next = next_copy;
9281 copy->cpy_hdr.links.prev = previous_prev;
9282 next_copy->vme_prev = vm_map_copy_to_entry(copy);
9283 copy->size = total_size;
9284 }
9285 vm_map_lock(dst_map);
9286 while (TRUE) {
9287 if (!vm_map_lookup_entry(dst_map,
9288 base_addr, &tmp_entry)) {
9289 vm_map_unlock(dst_map);
9290 return KERN_INVALID_ADDRESS;
9291 }
9292 if (tmp_entry->in_transition) {
9293 entry->needs_wakeup = TRUE;
9294 vm_map_entry_wait(dst_map, THREAD_UNINT);
9295 } else {
9296 break;
9297 }
9298 }
9299 vm_map_clip_start(dst_map,
9300 tmp_entry,
9301 vm_map_trunc_page(base_addr,
9302 VM_MAP_PAGE_MASK(dst_map)));
9303
9304 entry = tmp_entry;
9305 } /* while */
9306
9307 /*
9308 * Throw away the vm_map_copy object
9309 */
9310 if (discard_on_success) {
9311 vm_map_copy_discard(copy);
9312 }
9313
9314 return KERN_SUCCESS;
9315 }/* vm_map_copy_overwrite */
9316
9317 kern_return_t
9318 vm_map_copy_overwrite(
9319 vm_map_t dst_map,
9320 vm_map_offset_t dst_addr,
9321 vm_map_copy_t copy,
9322 boolean_t interruptible)
9323 {
9324 vm_map_size_t head_size, tail_size;
9325 vm_map_copy_t head_copy, tail_copy;
9326 vm_map_offset_t head_addr, tail_addr;
9327 vm_map_entry_t entry;
9328 kern_return_t kr;
9329 vm_map_offset_t effective_page_mask, effective_page_size;
9330
9331 head_size = 0;
9332 tail_size = 0;
9333 head_copy = NULL;
9334 tail_copy = NULL;
9335 head_addr = 0;
9336 tail_addr = 0;
9337
9338 if (interruptible ||
9339 copy == VM_MAP_COPY_NULL ||
9340 copy->type != VM_MAP_COPY_ENTRY_LIST) {
9341 /*
9342 * We can't split the "copy" map if we're interruptible
9343 * or if we don't have a "copy" map...
9344 */
9345 blunt_copy:
9346 return vm_map_copy_overwrite_nested(dst_map,
9347 dst_addr,
9348 copy,
9349 interruptible,
9350 (pmap_t) NULL,
9351 TRUE);
9352 }
9353
9354 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9355 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9356 effective_page_mask);
9357 effective_page_size = effective_page_mask + 1;
9358
9359 if (copy->size < 3 * effective_page_size) {
9360 /*
9361 * Too small to bother with optimizing...
9362 */
9363 goto blunt_copy;
9364 }
9365
9366 if ((dst_addr & effective_page_mask) !=
9367 (copy->offset & effective_page_mask)) {
9368 /*
9369 * Incompatible mis-alignment of source and destination...
9370 */
9371 goto blunt_copy;
9372 }
9373
9374 /*
9375 * Proper alignment or identical mis-alignment at the beginning.
9376 * Let's try and do a small unaligned copy first (if needed)
9377 * and then an aligned copy for the rest.
9378 */
9379 if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9380 head_addr = dst_addr;
9381 head_size = (effective_page_size -
9382 (copy->offset & effective_page_mask));
9383 head_size = MIN(head_size, copy->size);
9384 }
9385 if (!vm_map_page_aligned(copy->offset + copy->size,
9386 effective_page_mask)) {
9387 /*
9388 * Mis-alignment at the end.
9389 * Do an aligned copy up to the last page and
9390 * then an unaligned copy for the remaining bytes.
9391 */
9392 tail_size = ((copy->offset + copy->size) &
9393 effective_page_mask);
9394 tail_size = MIN(tail_size, copy->size);
9395 tail_addr = dst_addr + copy->size - tail_size;
9396 assert(tail_addr >= head_addr + head_size);
9397 }
9398 assert(head_size + tail_size <= copy->size);
9399
9400 if (head_size + tail_size == copy->size) {
9401 /*
9402 * It's all unaligned, no optimization possible...
9403 */
9404 goto blunt_copy;
9405 }
9406
9407 /*
9408 * Can't optimize if there are any submaps in the
9409 * destination due to the way we free the "copy" map
9410 * progressively in vm_map_copy_overwrite_nested()
9411 * in that case.
9412 */
9413 vm_map_lock_read(dst_map);
9414 if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9415 vm_map_unlock_read(dst_map);
9416 goto blunt_copy;
9417 }
9418 for (;
9419 (entry != vm_map_copy_to_entry(copy) &&
9420 entry->vme_start < dst_addr + copy->size);
9421 entry = entry->vme_next) {
9422 if (entry->is_sub_map) {
9423 vm_map_unlock_read(dst_map);
9424 goto blunt_copy;
9425 }
9426 }
9427 vm_map_unlock_read(dst_map);
9428
9429 if (head_size) {
9430 /*
9431 * Unaligned copy of the first "head_size" bytes, to reach
9432 * a page boundary.
9433 */
9434
9435 /*
9436 * Extract "head_copy" out of "copy".
9437 */
9438 head_copy = vm_map_copy_allocate();
9439 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9440 head_copy->cpy_hdr.entries_pageable =
9441 copy->cpy_hdr.entries_pageable;
9442 vm_map_store_init(&head_copy->cpy_hdr);
9443
9444 entry = vm_map_copy_first_entry(copy);
9445 if (entry->vme_end < copy->offset + head_size) {
9446 head_size = entry->vme_end - copy->offset;
9447 }
9448
9449 head_copy->offset = copy->offset;
9450 head_copy->size = head_size;
9451 copy->offset += head_size;
9452 copy->size -= head_size;
9453
9454 vm_map_copy_clip_end(copy, entry, copy->offset);
9455 vm_map_copy_entry_unlink(copy, entry);
9456 vm_map_copy_entry_link(head_copy,
9457 vm_map_copy_to_entry(head_copy),
9458 entry);
9459
9460 /*
9461 * Do the unaligned copy.
9462 */
9463 kr = vm_map_copy_overwrite_nested(dst_map,
9464 head_addr,
9465 head_copy,
9466 interruptible,
9467 (pmap_t) NULL,
9468 FALSE);
9469 if (kr != KERN_SUCCESS) {
9470 goto done;
9471 }
9472 }
9473
9474 if (tail_size) {
9475 /*
9476 * Extract "tail_copy" out of "copy".
9477 */
9478 tail_copy = vm_map_copy_allocate();
9479 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9480 tail_copy->cpy_hdr.entries_pageable =
9481 copy->cpy_hdr.entries_pageable;
9482 vm_map_store_init(&tail_copy->cpy_hdr);
9483
9484 tail_copy->offset = copy->offset + copy->size - tail_size;
9485 tail_copy->size = tail_size;
9486
9487 copy->size -= tail_size;
9488
9489 entry = vm_map_copy_last_entry(copy);
9490 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9491 entry = vm_map_copy_last_entry(copy);
9492 vm_map_copy_entry_unlink(copy, entry);
9493 vm_map_copy_entry_link(tail_copy,
9494 vm_map_copy_last_entry(tail_copy),
9495 entry);
9496 }
9497
9498 /*
9499 * Copy most (or possibly all) of the data.
9500 */
9501 kr = vm_map_copy_overwrite_nested(dst_map,
9502 dst_addr + head_size,
9503 copy,
9504 interruptible,
9505 (pmap_t) NULL,
9506 FALSE);
9507 if (kr != KERN_SUCCESS) {
9508 goto done;
9509 }
9510
9511 if (tail_size) {
9512 kr = vm_map_copy_overwrite_nested(dst_map,
9513 tail_addr,
9514 tail_copy,
9515 interruptible,
9516 (pmap_t) NULL,
9517 FALSE);
9518 }
9519
9520 done:
9521 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9522 if (kr == KERN_SUCCESS) {
9523 /*
9524 * Discard all the copy maps.
9525 */
9526 if (head_copy) {
9527 vm_map_copy_discard(head_copy);
9528 head_copy = NULL;
9529 }
9530 vm_map_copy_discard(copy);
9531 if (tail_copy) {
9532 vm_map_copy_discard(tail_copy);
9533 tail_copy = NULL;
9534 }
9535 } else {
9536 /*
9537 * Re-assemble the original copy map.
9538 */
9539 if (head_copy) {
9540 entry = vm_map_copy_first_entry(head_copy);
9541 vm_map_copy_entry_unlink(head_copy, entry);
9542 vm_map_copy_entry_link(copy,
9543 vm_map_copy_to_entry(copy),
9544 entry);
9545 copy->offset -= head_size;
9546 copy->size += head_size;
9547 vm_map_copy_discard(head_copy);
9548 head_copy = NULL;
9549 }
9550 if (tail_copy) {
9551 entry = vm_map_copy_last_entry(tail_copy);
9552 vm_map_copy_entry_unlink(tail_copy, entry);
9553 vm_map_copy_entry_link(copy,
9554 vm_map_copy_last_entry(copy),
9555 entry);
9556 copy->size += tail_size;
9557 vm_map_copy_discard(tail_copy);
9558 tail_copy = NULL;
9559 }
9560 }
9561 return kr;
9562 }
9563
9564
9565 /*
9566 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
9567 *
9568 * Decription:
9569 * Physically copy unaligned data
9570 *
9571 * Implementation:
9572 * Unaligned parts of pages have to be physically copied. We use
9573 * a modified form of vm_fault_copy (which understands none-aligned
9574 * page offsets and sizes) to do the copy. We attempt to copy as
9575 * much memory in one go as possibly, however vm_fault_copy copies
9576 * within 1 memory object so we have to find the smaller of "amount left"
9577 * "source object data size" and "target object data size". With
9578 * unaligned data we don't need to split regions, therefore the source
9579 * (copy) object should be one map entry, the target range may be split
9580 * over multiple map entries however. In any event we are pessimistic
9581 * about these assumptions.
9582 *
9583 * Assumptions:
9584 * dst_map is locked on entry and is return locked on success,
9585 * unlocked on error.
9586 */
9587
9588 static kern_return_t
9589 vm_map_copy_overwrite_unaligned(
9590 vm_map_t dst_map,
9591 vm_map_entry_t entry,
9592 vm_map_copy_t copy,
9593 vm_map_offset_t start,
9594 boolean_t discard_on_success)
9595 {
9596 vm_map_entry_t copy_entry;
9597 vm_map_entry_t copy_entry_next;
9598 vm_map_version_t version;
9599 vm_object_t dst_object;
9600 vm_object_offset_t dst_offset;
9601 vm_object_offset_t src_offset;
9602 vm_object_offset_t entry_offset;
9603 vm_map_offset_t entry_end;
9604 vm_map_size_t src_size,
9605 dst_size,
9606 copy_size,
9607 amount_left;
9608 kern_return_t kr = KERN_SUCCESS;
9609
9610
9611 copy_entry = vm_map_copy_first_entry(copy);
9612
9613 vm_map_lock_write_to_read(dst_map);
9614
9615 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9616 amount_left = copy->size;
9617 /*
9618 * unaligned so we never clipped this entry, we need the offset into
9619 * the vm_object not just the data.
9620 */
9621 while (amount_left > 0) {
9622 if (entry == vm_map_to_entry(dst_map)) {
9623 vm_map_unlock_read(dst_map);
9624 return KERN_INVALID_ADDRESS;
9625 }
9626
9627 /* "start" must be within the current map entry */
9628 assert((start >= entry->vme_start) && (start < entry->vme_end));
9629
9630 dst_offset = start - entry->vme_start;
9631
9632 dst_size = entry->vme_end - start;
9633
9634 src_size = copy_entry->vme_end -
9635 (copy_entry->vme_start + src_offset);
9636
9637 if (dst_size < src_size) {
9638 /*
9639 * we can only copy dst_size bytes before
9640 * we have to get the next destination entry
9641 */
9642 copy_size = dst_size;
9643 } else {
9644 /*
9645 * we can only copy src_size bytes before
9646 * we have to get the next source copy entry
9647 */
9648 copy_size = src_size;
9649 }
9650
9651 if (copy_size > amount_left) {
9652 copy_size = amount_left;
9653 }
9654 /*
9655 * Entry needs copy, create a shadow shadow object for
9656 * Copy on write region.
9657 */
9658 if (entry->needs_copy &&
9659 ((entry->protection & VM_PROT_WRITE) != 0)) {
9660 if (vm_map_lock_read_to_write(dst_map)) {
9661 vm_map_lock_read(dst_map);
9662 goto RetryLookup;
9663 }
9664 VME_OBJECT_SHADOW(entry,
9665 (vm_map_size_t)(entry->vme_end
9666 - entry->vme_start));
9667 entry->needs_copy = FALSE;
9668 vm_map_lock_write_to_read(dst_map);
9669 }
9670 dst_object = VME_OBJECT(entry);
9671 /*
9672 * unlike with the virtual (aligned) copy we're going
9673 * to fault on it therefore we need a target object.
9674 */
9675 if (dst_object == VM_OBJECT_NULL) {
9676 if (vm_map_lock_read_to_write(dst_map)) {
9677 vm_map_lock_read(dst_map);
9678 goto RetryLookup;
9679 }
9680 dst_object = vm_object_allocate((vm_map_size_t)
9681 entry->vme_end - entry->vme_start);
9682 VME_OBJECT_SET(entry, dst_object);
9683 VME_OFFSET_SET(entry, 0);
9684 assert(entry->use_pmap);
9685 vm_map_lock_write_to_read(dst_map);
9686 }
9687 /*
9688 * Take an object reference and unlock map. The "entry" may
9689 * disappear or change when the map is unlocked.
9690 */
9691 vm_object_reference(dst_object);
9692 version.main_timestamp = dst_map->timestamp;
9693 entry_offset = VME_OFFSET(entry);
9694 entry_end = entry->vme_end;
9695 vm_map_unlock_read(dst_map);
9696 /*
9697 * Copy as much as possible in one pass
9698 */
9699 kr = vm_fault_copy(
9700 VME_OBJECT(copy_entry),
9701 VME_OFFSET(copy_entry) + src_offset,
9702 &copy_size,
9703 dst_object,
9704 entry_offset + dst_offset,
9705 dst_map,
9706 &version,
9707 THREAD_UNINT );
9708
9709 start += copy_size;
9710 src_offset += copy_size;
9711 amount_left -= copy_size;
9712 /*
9713 * Release the object reference
9714 */
9715 vm_object_deallocate(dst_object);
9716 /*
9717 * If a hard error occurred, return it now
9718 */
9719 if (kr != KERN_SUCCESS) {
9720 return kr;
9721 }
9722
9723 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9724 || amount_left == 0) {
9725 /*
9726 * all done with this copy entry, dispose.
9727 */
9728 copy_entry_next = copy_entry->vme_next;
9729
9730 if (discard_on_success) {
9731 vm_map_copy_entry_unlink(copy, copy_entry);
9732 assert(!copy_entry->is_sub_map);
9733 vm_object_deallocate(VME_OBJECT(copy_entry));
9734 vm_map_copy_entry_dispose(copy, copy_entry);
9735 }
9736
9737 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9738 amount_left) {
9739 /*
9740 * not finished copying but run out of source
9741 */
9742 return KERN_INVALID_ADDRESS;
9743 }
9744
9745 copy_entry = copy_entry_next;
9746
9747 src_offset = 0;
9748 }
9749
9750 if (amount_left == 0) {
9751 return KERN_SUCCESS;
9752 }
9753
9754 vm_map_lock_read(dst_map);
9755 if (version.main_timestamp == dst_map->timestamp) {
9756 if (start == entry_end) {
9757 /*
9758 * destination region is split. Use the version
9759 * information to avoid a lookup in the normal
9760 * case.
9761 */
9762 entry = entry->vme_next;
9763 /*
9764 * should be contiguous. Fail if we encounter
9765 * a hole in the destination.
9766 */
9767 if (start != entry->vme_start) {
9768 vm_map_unlock_read(dst_map);
9769 return KERN_INVALID_ADDRESS;
9770 }
9771 }
9772 } else {
9773 /*
9774 * Map version check failed.
9775 * we must lookup the entry because somebody
9776 * might have changed the map behind our backs.
9777 */
9778 RetryLookup:
9779 if (!vm_map_lookup_entry(dst_map, start, &entry)) {
9780 vm_map_unlock_read(dst_map);
9781 return KERN_INVALID_ADDRESS;
9782 }
9783 }
9784 }/* while */
9785
9786 return KERN_SUCCESS;
9787 }/* vm_map_copy_overwrite_unaligned */
9788
9789 /*
9790 * Routine: vm_map_copy_overwrite_aligned [internal use only]
9791 *
9792 * Description:
9793 * Does all the vm_trickery possible for whole pages.
9794 *
9795 * Implementation:
9796 *
9797 * If there are no permanent objects in the destination,
9798 * and the source and destination map entry zones match,
9799 * and the destination map entry is not shared,
9800 * then the map entries can be deleted and replaced
9801 * with those from the copy. The following code is the
9802 * basic idea of what to do, but there are lots of annoying
9803 * little details about getting protection and inheritance
9804 * right. Should add protection, inheritance, and sharing checks
9805 * to the above pass and make sure that no wiring is involved.
9806 */
9807
9808 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9809 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9810 int vm_map_copy_overwrite_aligned_src_large = 0;
9811
9812 static kern_return_t
9813 vm_map_copy_overwrite_aligned(
9814 vm_map_t dst_map,
9815 vm_map_entry_t tmp_entry,
9816 vm_map_copy_t copy,
9817 vm_map_offset_t start,
9818 __unused pmap_t pmap)
9819 {
9820 vm_object_t object;
9821 vm_map_entry_t copy_entry;
9822 vm_map_size_t copy_size;
9823 vm_map_size_t size;
9824 vm_map_entry_t entry;
9825
9826 while ((copy_entry = vm_map_copy_first_entry(copy))
9827 != vm_map_copy_to_entry(copy)) {
9828 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9829
9830 entry = tmp_entry;
9831 if (entry->is_sub_map) {
9832 /* unnested when clipped earlier */
9833 assert(!entry->use_pmap);
9834 }
9835 if (entry == vm_map_to_entry(dst_map)) {
9836 vm_map_unlock(dst_map);
9837 return KERN_INVALID_ADDRESS;
9838 }
9839 size = (entry->vme_end - entry->vme_start);
9840 /*
9841 * Make sure that no holes popped up in the
9842 * address map, and that the protection is
9843 * still valid, in case the map was unlocked
9844 * earlier.
9845 */
9846
9847 if ((entry->vme_start != start) || ((entry->is_sub_map)
9848 && !entry->needs_copy)) {
9849 vm_map_unlock(dst_map);
9850 return KERN_INVALID_ADDRESS;
9851 }
9852 assert(entry != vm_map_to_entry(dst_map));
9853
9854 /*
9855 * Check protection again
9856 */
9857
9858 if (!(entry->protection & VM_PROT_WRITE)) {
9859 vm_map_unlock(dst_map);
9860 return KERN_PROTECTION_FAILURE;
9861 }
9862
9863 /*
9864 * Adjust to source size first
9865 */
9866
9867 if (copy_size < size) {
9868 if (entry->map_aligned &&
9869 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9870 VM_MAP_PAGE_MASK(dst_map))) {
9871 /* no longer map-aligned */
9872 entry->map_aligned = FALSE;
9873 }
9874 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9875 size = copy_size;
9876 }
9877
9878 /*
9879 * Adjust to destination size
9880 */
9881
9882 if (size < copy_size) {
9883 vm_map_copy_clip_end(copy, copy_entry,
9884 copy_entry->vme_start + size);
9885 copy_size = size;
9886 }
9887
9888 assert((entry->vme_end - entry->vme_start) == size);
9889 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9890 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9891
9892 /*
9893 * If the destination contains temporary unshared memory,
9894 * we can perform the copy by throwing it away and
9895 * installing the source data.
9896 */
9897
9898 object = VME_OBJECT(entry);
9899 if ((!entry->is_shared &&
9900 ((object == VM_OBJECT_NULL) ||
9901 (object->internal && !object->true_share))) ||
9902 entry->needs_copy) {
9903 vm_object_t old_object = VME_OBJECT(entry);
9904 vm_object_offset_t old_offset = VME_OFFSET(entry);
9905 vm_object_offset_t offset;
9906
9907 /*
9908 * Ensure that the source and destination aren't
9909 * identical
9910 */
9911 if (old_object == VME_OBJECT(copy_entry) &&
9912 old_offset == VME_OFFSET(copy_entry)) {
9913 vm_map_copy_entry_unlink(copy, copy_entry);
9914 vm_map_copy_entry_dispose(copy, copy_entry);
9915
9916 if (old_object != VM_OBJECT_NULL) {
9917 vm_object_deallocate(old_object);
9918 }
9919
9920 start = tmp_entry->vme_end;
9921 tmp_entry = tmp_entry->vme_next;
9922 continue;
9923 }
9924
9925 #if !CONFIG_EMBEDDED
9926 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9927 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
9928 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9929 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9930 copy_size <= __TRADEOFF1_COPY_SIZE) {
9931 /*
9932 * Virtual vs. Physical copy tradeoff #1.
9933 *
9934 * Copying only a few pages out of a large
9935 * object: do a physical copy instead of
9936 * a virtual copy, to avoid possibly keeping
9937 * the entire large object alive because of
9938 * those few copy-on-write pages.
9939 */
9940 vm_map_copy_overwrite_aligned_src_large++;
9941 goto slow_copy;
9942 }
9943 #endif /* !CONFIG_EMBEDDED */
9944
9945 if ((dst_map->pmap != kernel_pmap) &&
9946 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9947 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
9948 vm_object_t new_object, new_shadow;
9949
9950 /*
9951 * We're about to map something over a mapping
9952 * established by malloc()...
9953 */
9954 new_object = VME_OBJECT(copy_entry);
9955 if (new_object != VM_OBJECT_NULL) {
9956 vm_object_lock_shared(new_object);
9957 }
9958 while (new_object != VM_OBJECT_NULL &&
9959 #if !CONFIG_EMBEDDED
9960 !new_object->true_share &&
9961 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9962 #endif /* !CONFIG_EMBEDDED */
9963 new_object->internal) {
9964 new_shadow = new_object->shadow;
9965 if (new_shadow == VM_OBJECT_NULL) {
9966 break;
9967 }
9968 vm_object_lock_shared(new_shadow);
9969 vm_object_unlock(new_object);
9970 new_object = new_shadow;
9971 }
9972 if (new_object != VM_OBJECT_NULL) {
9973 if (!new_object->internal) {
9974 /*
9975 * The new mapping is backed
9976 * by an external object. We
9977 * don't want malloc'ed memory
9978 * to be replaced with such a
9979 * non-anonymous mapping, so
9980 * let's go off the optimized
9981 * path...
9982 */
9983 vm_map_copy_overwrite_aligned_src_not_internal++;
9984 vm_object_unlock(new_object);
9985 goto slow_copy;
9986 }
9987 #if !CONFIG_EMBEDDED
9988 if (new_object->true_share ||
9989 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9990 /*
9991 * Same if there's a "true_share"
9992 * object in the shadow chain, or
9993 * an object with a non-default
9994 * (SYMMETRIC) copy strategy.
9995 */
9996 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9997 vm_object_unlock(new_object);
9998 goto slow_copy;
9999 }
10000 #endif /* !CONFIG_EMBEDDED */
10001 vm_object_unlock(new_object);
10002 }
10003 /*
10004 * The new mapping is still backed by
10005 * anonymous (internal) memory, so it's
10006 * OK to substitute it for the original
10007 * malloc() mapping.
10008 */
10009 }
10010
10011 if (old_object != VM_OBJECT_NULL) {
10012 if (entry->is_sub_map) {
10013 if (entry->use_pmap) {
10014 #ifndef NO_NESTED_PMAP
10015 pmap_unnest(dst_map->pmap,
10016 (addr64_t)entry->vme_start,
10017 entry->vme_end - entry->vme_start);
10018 #endif /* NO_NESTED_PMAP */
10019 if (dst_map->mapped_in_other_pmaps) {
10020 /* clean up parent */
10021 /* map/maps */
10022 vm_map_submap_pmap_clean(
10023 dst_map, entry->vme_start,
10024 entry->vme_end,
10025 VME_SUBMAP(entry),
10026 VME_OFFSET(entry));
10027 }
10028 } else {
10029 vm_map_submap_pmap_clean(
10030 dst_map, entry->vme_start,
10031 entry->vme_end,
10032 VME_SUBMAP(entry),
10033 VME_OFFSET(entry));
10034 }
10035 vm_map_deallocate(VME_SUBMAP(entry));
10036 } else {
10037 if (dst_map->mapped_in_other_pmaps) {
10038 vm_object_pmap_protect_options(
10039 VME_OBJECT(entry),
10040 VME_OFFSET(entry),
10041 entry->vme_end
10042 - entry->vme_start,
10043 PMAP_NULL,
10044 entry->vme_start,
10045 VM_PROT_NONE,
10046 PMAP_OPTIONS_REMOVE);
10047 } else {
10048 pmap_remove_options(
10049 dst_map->pmap,
10050 (addr64_t)(entry->vme_start),
10051 (addr64_t)(entry->vme_end),
10052 PMAP_OPTIONS_REMOVE);
10053 }
10054 vm_object_deallocate(old_object);
10055 }
10056 }
10057
10058 if (entry->iokit_acct) {
10059 /* keep using iokit accounting */
10060 entry->use_pmap = FALSE;
10061 } else {
10062 /* use pmap accounting */
10063 entry->use_pmap = TRUE;
10064 }
10065 entry->is_sub_map = FALSE;
10066 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10067 object = VME_OBJECT(entry);
10068 entry->needs_copy = copy_entry->needs_copy;
10069 entry->wired_count = 0;
10070 entry->user_wired_count = 0;
10071 offset = VME_OFFSET(copy_entry);
10072 VME_OFFSET_SET(entry, offset);
10073
10074 vm_map_copy_entry_unlink(copy, copy_entry);
10075 vm_map_copy_entry_dispose(copy, copy_entry);
10076
10077 /*
10078 * we could try to push pages into the pmap at this point, BUT
10079 * this optimization only saved on average 2 us per page if ALL
10080 * the pages in the source were currently mapped
10081 * and ALL the pages in the dest were touched, if there were fewer
10082 * than 2/3 of the pages touched, this optimization actually cost more cycles
10083 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10084 */
10085
10086 /*
10087 * Set up for the next iteration. The map
10088 * has not been unlocked, so the next
10089 * address should be at the end of this
10090 * entry, and the next map entry should be
10091 * the one following it.
10092 */
10093
10094 start = tmp_entry->vme_end;
10095 tmp_entry = tmp_entry->vme_next;
10096 } else {
10097 vm_map_version_t version;
10098 vm_object_t dst_object;
10099 vm_object_offset_t dst_offset;
10100 kern_return_t r;
10101
10102 slow_copy:
10103 if (entry->needs_copy) {
10104 VME_OBJECT_SHADOW(entry,
10105 (entry->vme_end -
10106 entry->vme_start));
10107 entry->needs_copy = FALSE;
10108 }
10109
10110 dst_object = VME_OBJECT(entry);
10111 dst_offset = VME_OFFSET(entry);
10112
10113 /*
10114 * Take an object reference, and record
10115 * the map version information so that the
10116 * map can be safely unlocked.
10117 */
10118
10119 if (dst_object == VM_OBJECT_NULL) {
10120 /*
10121 * We would usually have just taken the
10122 * optimized path above if the destination
10123 * object has not been allocated yet. But we
10124 * now disable that optimization if the copy
10125 * entry's object is not backed by anonymous
10126 * memory to avoid replacing malloc'ed
10127 * (i.e. re-usable) anonymous memory with a
10128 * not-so-anonymous mapping.
10129 * So we have to handle this case here and
10130 * allocate a new VM object for this map entry.
10131 */
10132 dst_object = vm_object_allocate(
10133 entry->vme_end - entry->vme_start);
10134 dst_offset = 0;
10135 VME_OBJECT_SET(entry, dst_object);
10136 VME_OFFSET_SET(entry, dst_offset);
10137 assert(entry->use_pmap);
10138 }
10139
10140 vm_object_reference(dst_object);
10141
10142 /* account for unlock bumping up timestamp */
10143 version.main_timestamp = dst_map->timestamp + 1;
10144
10145 vm_map_unlock(dst_map);
10146
10147 /*
10148 * Copy as much as possible in one pass
10149 */
10150
10151 copy_size = size;
10152 r = vm_fault_copy(
10153 VME_OBJECT(copy_entry),
10154 VME_OFFSET(copy_entry),
10155 &copy_size,
10156 dst_object,
10157 dst_offset,
10158 dst_map,
10159 &version,
10160 THREAD_UNINT );
10161
10162 /*
10163 * Release the object reference
10164 */
10165
10166 vm_object_deallocate(dst_object);
10167
10168 /*
10169 * If a hard error occurred, return it now
10170 */
10171
10172 if (r != KERN_SUCCESS) {
10173 return r;
10174 }
10175
10176 if (copy_size != 0) {
10177 /*
10178 * Dispose of the copied region
10179 */
10180
10181 vm_map_copy_clip_end(copy, copy_entry,
10182 copy_entry->vme_start + copy_size);
10183 vm_map_copy_entry_unlink(copy, copy_entry);
10184 vm_object_deallocate(VME_OBJECT(copy_entry));
10185 vm_map_copy_entry_dispose(copy, copy_entry);
10186 }
10187
10188 /*
10189 * Pick up in the destination map where we left off.
10190 *
10191 * Use the version information to avoid a lookup
10192 * in the normal case.
10193 */
10194
10195 start += copy_size;
10196 vm_map_lock(dst_map);
10197 if (version.main_timestamp == dst_map->timestamp &&
10198 copy_size != 0) {
10199 /* We can safely use saved tmp_entry value */
10200
10201 if (tmp_entry->map_aligned &&
10202 !VM_MAP_PAGE_ALIGNED(
10203 start,
10204 VM_MAP_PAGE_MASK(dst_map))) {
10205 /* no longer map-aligned */
10206 tmp_entry->map_aligned = FALSE;
10207 }
10208 vm_map_clip_end(dst_map, tmp_entry, start);
10209 tmp_entry = tmp_entry->vme_next;
10210 } else {
10211 /* Must do lookup of tmp_entry */
10212
10213 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10214 vm_map_unlock(dst_map);
10215 return KERN_INVALID_ADDRESS;
10216 }
10217 if (tmp_entry->map_aligned &&
10218 !VM_MAP_PAGE_ALIGNED(
10219 start,
10220 VM_MAP_PAGE_MASK(dst_map))) {
10221 /* no longer map-aligned */
10222 tmp_entry->map_aligned = FALSE;
10223 }
10224 vm_map_clip_start(dst_map, tmp_entry, start);
10225 }
10226 }
10227 }/* while */
10228
10229 return KERN_SUCCESS;
10230 }/* vm_map_copy_overwrite_aligned */
10231
10232 /*
10233 * Routine: vm_map_copyin_kernel_buffer [internal use only]
10234 *
10235 * Description:
10236 * Copy in data to a kernel buffer from space in the
10237 * source map. The original space may be optionally
10238 * deallocated.
10239 *
10240 * If successful, returns a new copy object.
10241 */
10242 static kern_return_t
10243 vm_map_copyin_kernel_buffer(
10244 vm_map_t src_map,
10245 vm_map_offset_t src_addr,
10246 vm_map_size_t len,
10247 boolean_t src_destroy,
10248 vm_map_copy_t *copy_result)
10249 {
10250 kern_return_t kr;
10251 vm_map_copy_t copy;
10252 vm_size_t kalloc_size;
10253
10254 if (len > msg_ool_size_small) {
10255 return KERN_INVALID_ARGUMENT;
10256 }
10257
10258 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
10259
10260 copy = (vm_map_copy_t)kalloc(kalloc_size);
10261 if (copy == VM_MAP_COPY_NULL) {
10262 return KERN_RESOURCE_SHORTAGE;
10263 }
10264 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10265 copy->size = len;
10266 copy->offset = 0;
10267
10268 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10269 if (kr != KERN_SUCCESS) {
10270 kfree(copy, kalloc_size);
10271 return kr;
10272 }
10273 if (src_destroy) {
10274 (void) vm_map_remove(
10275 src_map,
10276 vm_map_trunc_page(src_addr,
10277 VM_MAP_PAGE_MASK(src_map)),
10278 vm_map_round_page(src_addr + len,
10279 VM_MAP_PAGE_MASK(src_map)),
10280 (VM_MAP_REMOVE_INTERRUPTIBLE |
10281 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10282 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10283 }
10284 *copy_result = copy;
10285 return KERN_SUCCESS;
10286 }
10287
10288 /*
10289 * Routine: vm_map_copyout_kernel_buffer [internal use only]
10290 *
10291 * Description:
10292 * Copy out data from a kernel buffer into space in the
10293 * destination map. The space may be otpionally dynamically
10294 * allocated.
10295 *
10296 * If successful, consumes the copy object.
10297 * Otherwise, the caller is responsible for it.
10298 */
10299 static int vm_map_copyout_kernel_buffer_failures = 0;
10300 static kern_return_t
10301 vm_map_copyout_kernel_buffer(
10302 vm_map_t map,
10303 vm_map_address_t *addr, /* IN/OUT */
10304 vm_map_copy_t copy,
10305 vm_map_size_t copy_size,
10306 boolean_t overwrite,
10307 boolean_t consume_on_success)
10308 {
10309 kern_return_t kr = KERN_SUCCESS;
10310 thread_t thread = current_thread();
10311
10312 assert(copy->size == copy_size);
10313
10314 /*
10315 * check for corrupted vm_map_copy structure
10316 */
10317 if (copy_size > msg_ool_size_small || copy->offset) {
10318 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10319 (long long)copy->size, (long long)copy->offset);
10320 }
10321
10322 if (!overwrite) {
10323 /*
10324 * Allocate space in the target map for the data
10325 */
10326 *addr = 0;
10327 kr = vm_map_enter(map,
10328 addr,
10329 vm_map_round_page(copy_size,
10330 VM_MAP_PAGE_MASK(map)),
10331 (vm_map_offset_t) 0,
10332 VM_FLAGS_ANYWHERE,
10333 VM_MAP_KERNEL_FLAGS_NONE,
10334 VM_KERN_MEMORY_NONE,
10335 VM_OBJECT_NULL,
10336 (vm_object_offset_t) 0,
10337 FALSE,
10338 VM_PROT_DEFAULT,
10339 VM_PROT_ALL,
10340 VM_INHERIT_DEFAULT);
10341 if (kr != KERN_SUCCESS) {
10342 return kr;
10343 }
10344 #if KASAN
10345 if (map->pmap == kernel_pmap) {
10346 kasan_notify_address(*addr, copy->size);
10347 }
10348 #endif
10349 }
10350
10351 /*
10352 * Copyout the data from the kernel buffer to the target map.
10353 */
10354 if (thread->map == map) {
10355 /*
10356 * If the target map is the current map, just do
10357 * the copy.
10358 */
10359 assert((vm_size_t)copy_size == copy_size);
10360 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10361 kr = KERN_INVALID_ADDRESS;
10362 }
10363 } else {
10364 vm_map_t oldmap;
10365
10366 /*
10367 * If the target map is another map, assume the
10368 * target's address space identity for the duration
10369 * of the copy.
10370 */
10371 vm_map_reference(map);
10372 oldmap = vm_map_switch(map);
10373
10374 assert((vm_size_t)copy_size == copy_size);
10375 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10376 vm_map_copyout_kernel_buffer_failures++;
10377 kr = KERN_INVALID_ADDRESS;
10378 }
10379
10380 (void) vm_map_switch(oldmap);
10381 vm_map_deallocate(map);
10382 }
10383
10384 if (kr != KERN_SUCCESS) {
10385 /* the copy failed, clean up */
10386 if (!overwrite) {
10387 /*
10388 * Deallocate the space we allocated in the target map.
10389 */
10390 (void) vm_map_remove(
10391 map,
10392 vm_map_trunc_page(*addr,
10393 VM_MAP_PAGE_MASK(map)),
10394 vm_map_round_page((*addr +
10395 vm_map_round_page(copy_size,
10396 VM_MAP_PAGE_MASK(map))),
10397 VM_MAP_PAGE_MASK(map)),
10398 VM_MAP_REMOVE_NO_FLAGS);
10399 *addr = 0;
10400 }
10401 } else {
10402 /* copy was successful, dicard the copy structure */
10403 if (consume_on_success) {
10404 kfree(copy, copy_size + cpy_kdata_hdr_sz);
10405 }
10406 }
10407
10408 return kr;
10409 }
10410
10411 /*
10412 * Routine: vm_map_copy_insert [internal use only]
10413 *
10414 * Description:
10415 * Link a copy chain ("copy") into a map at the
10416 * specified location (after "where").
10417 * Side effects:
10418 * The copy chain is destroyed.
10419 */
10420 static void
10421 vm_map_copy_insert(
10422 vm_map_t map,
10423 vm_map_entry_t after_where,
10424 vm_map_copy_t copy)
10425 {
10426 vm_map_entry_t entry;
10427
10428 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10429 entry = vm_map_copy_first_entry(copy);
10430 vm_map_copy_entry_unlink(copy, entry);
10431 vm_map_store_entry_link(map, after_where, entry,
10432 VM_MAP_KERNEL_FLAGS_NONE);
10433 after_where = entry;
10434 }
10435 zfree(vm_map_copy_zone, copy);
10436 }
10437
10438 void
10439 vm_map_copy_remap(
10440 vm_map_t map,
10441 vm_map_entry_t where,
10442 vm_map_copy_t copy,
10443 vm_map_offset_t adjustment,
10444 vm_prot_t cur_prot,
10445 vm_prot_t max_prot,
10446 vm_inherit_t inheritance)
10447 {
10448 vm_map_entry_t copy_entry, new_entry;
10449
10450 for (copy_entry = vm_map_copy_first_entry(copy);
10451 copy_entry != vm_map_copy_to_entry(copy);
10452 copy_entry = copy_entry->vme_next) {
10453 /* get a new VM map entry for the map */
10454 new_entry = vm_map_entry_create(map,
10455 !map->hdr.entries_pageable);
10456 /* copy the "copy entry" to the new entry */
10457 vm_map_entry_copy(new_entry, copy_entry);
10458 /* adjust "start" and "end" */
10459 new_entry->vme_start += adjustment;
10460 new_entry->vme_end += adjustment;
10461 /* clear some attributes */
10462 new_entry->inheritance = inheritance;
10463 new_entry->protection = cur_prot;
10464 new_entry->max_protection = max_prot;
10465 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10466 /* take an extra reference on the entry's "object" */
10467 if (new_entry->is_sub_map) {
10468 assert(!new_entry->use_pmap); /* not nested */
10469 vm_map_lock(VME_SUBMAP(new_entry));
10470 vm_map_reference(VME_SUBMAP(new_entry));
10471 vm_map_unlock(VME_SUBMAP(new_entry));
10472 } else {
10473 vm_object_reference(VME_OBJECT(new_entry));
10474 }
10475 /* insert the new entry in the map */
10476 vm_map_store_entry_link(map, where, new_entry,
10477 VM_MAP_KERNEL_FLAGS_NONE);
10478 /* continue inserting the "copy entries" after the new entry */
10479 where = new_entry;
10480 }
10481 }
10482
10483
10484 /*
10485 * Returns true if *size matches (or is in the range of) copy->size.
10486 * Upon returning true, the *size field is updated with the actual size of the
10487 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10488 */
10489 boolean_t
10490 vm_map_copy_validate_size(
10491 vm_map_t dst_map,
10492 vm_map_copy_t copy,
10493 vm_map_size_t *size)
10494 {
10495 if (copy == VM_MAP_COPY_NULL) {
10496 return FALSE;
10497 }
10498 vm_map_size_t copy_sz = copy->size;
10499 vm_map_size_t sz = *size;
10500 switch (copy->type) {
10501 case VM_MAP_COPY_OBJECT:
10502 case VM_MAP_COPY_KERNEL_BUFFER:
10503 if (sz == copy_sz) {
10504 return TRUE;
10505 }
10506 break;
10507 case VM_MAP_COPY_ENTRY_LIST:
10508 /*
10509 * potential page-size rounding prevents us from exactly
10510 * validating this flavor of vm_map_copy, but we can at least
10511 * assert that it's within a range.
10512 */
10513 if (copy_sz >= sz &&
10514 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10515 *size = copy_sz;
10516 return TRUE;
10517 }
10518 break;
10519 default:
10520 break;
10521 }
10522 return FALSE;
10523 }
10524
10525 /*
10526 * Routine: vm_map_copyout_size
10527 *
10528 * Description:
10529 * Copy out a copy chain ("copy") into newly-allocated
10530 * space in the destination map. Uses a prevalidated
10531 * size for the copy object (vm_map_copy_validate_size).
10532 *
10533 * If successful, consumes the copy object.
10534 * Otherwise, the caller is responsible for it.
10535 */
10536 kern_return_t
10537 vm_map_copyout_size(
10538 vm_map_t dst_map,
10539 vm_map_address_t *dst_addr, /* OUT */
10540 vm_map_copy_t copy,
10541 vm_map_size_t copy_size)
10542 {
10543 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
10544 TRUE, /* consume_on_success */
10545 VM_PROT_DEFAULT,
10546 VM_PROT_ALL,
10547 VM_INHERIT_DEFAULT);
10548 }
10549
10550 /*
10551 * Routine: vm_map_copyout
10552 *
10553 * Description:
10554 * Copy out a copy chain ("copy") into newly-allocated
10555 * space in the destination map.
10556 *
10557 * If successful, consumes the copy object.
10558 * Otherwise, the caller is responsible for it.
10559 */
10560 kern_return_t
10561 vm_map_copyout(
10562 vm_map_t dst_map,
10563 vm_map_address_t *dst_addr, /* OUT */
10564 vm_map_copy_t copy)
10565 {
10566 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
10567 TRUE, /* consume_on_success */
10568 VM_PROT_DEFAULT,
10569 VM_PROT_ALL,
10570 VM_INHERIT_DEFAULT);
10571 }
10572
10573 kern_return_t
10574 vm_map_copyout_internal(
10575 vm_map_t dst_map,
10576 vm_map_address_t *dst_addr, /* OUT */
10577 vm_map_copy_t copy,
10578 vm_map_size_t copy_size,
10579 boolean_t consume_on_success,
10580 vm_prot_t cur_protection,
10581 vm_prot_t max_protection,
10582 vm_inherit_t inheritance)
10583 {
10584 vm_map_size_t size;
10585 vm_map_size_t adjustment;
10586 vm_map_offset_t start;
10587 vm_object_offset_t vm_copy_start;
10588 vm_map_entry_t last;
10589 vm_map_entry_t entry;
10590 vm_map_entry_t hole_entry;
10591
10592 /*
10593 * Check for null copy object.
10594 */
10595
10596 if (copy == VM_MAP_COPY_NULL) {
10597 *dst_addr = 0;
10598 return KERN_SUCCESS;
10599 }
10600
10601 if (copy->size != copy_size) {
10602 *dst_addr = 0;
10603 return KERN_FAILURE;
10604 }
10605
10606 /*
10607 * Check for special copy object, created
10608 * by vm_map_copyin_object.
10609 */
10610
10611 if (copy->type == VM_MAP_COPY_OBJECT) {
10612 vm_object_t object = copy->cpy_object;
10613 kern_return_t kr;
10614 vm_object_offset_t offset;
10615
10616 offset = vm_object_trunc_page(copy->offset);
10617 size = vm_map_round_page((copy_size +
10618 (vm_map_size_t)(copy->offset -
10619 offset)),
10620 VM_MAP_PAGE_MASK(dst_map));
10621 *dst_addr = 0;
10622 kr = vm_map_enter(dst_map, dst_addr, size,
10623 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10624 VM_MAP_KERNEL_FLAGS_NONE,
10625 VM_KERN_MEMORY_NONE,
10626 object, offset, FALSE,
10627 VM_PROT_DEFAULT, VM_PROT_ALL,
10628 VM_INHERIT_DEFAULT);
10629 if (kr != KERN_SUCCESS) {
10630 return kr;
10631 }
10632 /* Account for non-pagealigned copy object */
10633 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10634 if (consume_on_success) {
10635 zfree(vm_map_copy_zone, copy);
10636 }
10637 return KERN_SUCCESS;
10638 }
10639
10640 /*
10641 * Check for special kernel buffer allocated
10642 * by new_ipc_kmsg_copyin.
10643 */
10644
10645 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10646 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10647 copy, copy_size, FALSE,
10648 consume_on_success);
10649 }
10650
10651
10652 /*
10653 * Find space for the data
10654 */
10655
10656 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10657 VM_MAP_COPY_PAGE_MASK(copy));
10658 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10659 VM_MAP_COPY_PAGE_MASK(copy))
10660 - vm_copy_start;
10661
10662
10663 StartAgain:;
10664
10665 vm_map_lock(dst_map);
10666 if (dst_map->disable_vmentry_reuse == TRUE) {
10667 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10668 last = entry;
10669 } else {
10670 if (dst_map->holelistenabled) {
10671 hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
10672
10673 if (hole_entry == NULL) {
10674 /*
10675 * No more space in the map?
10676 */
10677 vm_map_unlock(dst_map);
10678 return KERN_NO_SPACE;
10679 }
10680
10681 last = hole_entry;
10682 start = last->vme_start;
10683 } else {
10684 assert(first_free_is_valid(dst_map));
10685 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10686 vm_map_min(dst_map) : last->vme_end;
10687 }
10688 start = vm_map_round_page(start,
10689 VM_MAP_PAGE_MASK(dst_map));
10690 }
10691
10692 while (TRUE) {
10693 vm_map_entry_t next = last->vme_next;
10694 vm_map_offset_t end = start + size;
10695
10696 if ((end > dst_map->max_offset) || (end < start)) {
10697 if (dst_map->wait_for_space) {
10698 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10699 assert_wait((event_t) dst_map,
10700 THREAD_INTERRUPTIBLE);
10701 vm_map_unlock(dst_map);
10702 thread_block(THREAD_CONTINUE_NULL);
10703 goto StartAgain;
10704 }
10705 }
10706 vm_map_unlock(dst_map);
10707 return KERN_NO_SPACE;
10708 }
10709
10710 if (dst_map->holelistenabled) {
10711 if (last->vme_end >= end) {
10712 break;
10713 }
10714 } else {
10715 /*
10716 * If there are no more entries, we must win.
10717 *
10718 * OR
10719 *
10720 * If there is another entry, it must be
10721 * after the end of the potential new region.
10722 */
10723
10724 if (next == vm_map_to_entry(dst_map)) {
10725 break;
10726 }
10727
10728 if (next->vme_start >= end) {
10729 break;
10730 }
10731 }
10732
10733 last = next;
10734
10735 if (dst_map->holelistenabled) {
10736 if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
10737 /*
10738 * Wrapped around
10739 */
10740 vm_map_unlock(dst_map);
10741 return KERN_NO_SPACE;
10742 }
10743 start = last->vme_start;
10744 } else {
10745 start = last->vme_end;
10746 }
10747 start = vm_map_round_page(start,
10748 VM_MAP_PAGE_MASK(dst_map));
10749 }
10750
10751 if (dst_map->holelistenabled) {
10752 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10753 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10754 }
10755 }
10756
10757
10758 adjustment = start - vm_copy_start;
10759 if (!consume_on_success) {
10760 /*
10761 * We're not allowed to consume "copy", so we'll have to
10762 * copy its map entries into the destination map below.
10763 * No need to re-allocate map entries from the correct
10764 * (pageable or not) zone, since we'll get new map entries
10765 * during the transfer.
10766 * We'll also adjust the map entries's "start" and "end"
10767 * during the transfer, to keep "copy"'s entries consistent
10768 * with its "offset".
10769 */
10770 goto after_adjustments;
10771 }
10772
10773 /*
10774 * Since we're going to just drop the map
10775 * entries from the copy into the destination
10776 * map, they must come from the same pool.
10777 */
10778
10779 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10780 /*
10781 * Mismatches occur when dealing with the default
10782 * pager.
10783 */
10784 zone_t old_zone;
10785 vm_map_entry_t next, new;
10786
10787 /*
10788 * Find the zone that the copies were allocated from
10789 */
10790
10791 entry = vm_map_copy_first_entry(copy);
10792
10793 /*
10794 * Reinitialize the copy so that vm_map_copy_entry_link
10795 * will work.
10796 */
10797 vm_map_store_copy_reset(copy, entry);
10798 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10799
10800 /*
10801 * Copy each entry.
10802 */
10803 while (entry != vm_map_copy_to_entry(copy)) {
10804 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10805 vm_map_entry_copy_full(new, entry);
10806 new->vme_no_copy_on_read = FALSE;
10807 assert(!new->iokit_acct);
10808 if (new->is_sub_map) {
10809 /* clr address space specifics */
10810 new->use_pmap = FALSE;
10811 }
10812 vm_map_copy_entry_link(copy,
10813 vm_map_copy_last_entry(copy),
10814 new);
10815 next = entry->vme_next;
10816 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10817 zfree(old_zone, entry);
10818 entry = next;
10819 }
10820 }
10821
10822 /*
10823 * Adjust the addresses in the copy chain, and
10824 * reset the region attributes.
10825 */
10826
10827 for (entry = vm_map_copy_first_entry(copy);
10828 entry != vm_map_copy_to_entry(copy);
10829 entry = entry->vme_next) {
10830 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10831 /*
10832 * We're injecting this copy entry into a map that
10833 * has the standard page alignment, so clear
10834 * "map_aligned" (which might have been inherited
10835 * from the original map entry).
10836 */
10837 entry->map_aligned = FALSE;
10838 }
10839
10840 entry->vme_start += adjustment;
10841 entry->vme_end += adjustment;
10842
10843 if (entry->map_aligned) {
10844 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10845 VM_MAP_PAGE_MASK(dst_map)));
10846 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10847 VM_MAP_PAGE_MASK(dst_map)));
10848 }
10849
10850 entry->inheritance = VM_INHERIT_DEFAULT;
10851 entry->protection = VM_PROT_DEFAULT;
10852 entry->max_protection = VM_PROT_ALL;
10853 entry->behavior = VM_BEHAVIOR_DEFAULT;
10854
10855 /*
10856 * If the entry is now wired,
10857 * map the pages into the destination map.
10858 */
10859 if (entry->wired_count != 0) {
10860 vm_map_offset_t va;
10861 vm_object_offset_t offset;
10862 vm_object_t object;
10863 vm_prot_t prot;
10864 int type_of_fault;
10865
10866 object = VME_OBJECT(entry);
10867 offset = VME_OFFSET(entry);
10868 va = entry->vme_start;
10869
10870 pmap_pageable(dst_map->pmap,
10871 entry->vme_start,
10872 entry->vme_end,
10873 TRUE);
10874
10875 while (va < entry->vme_end) {
10876 vm_page_t m;
10877 struct vm_object_fault_info fault_info = {};
10878
10879 /*
10880 * Look up the page in the object.
10881 * Assert that the page will be found in the
10882 * top object:
10883 * either
10884 * the object was newly created by
10885 * vm_object_copy_slowly, and has
10886 * copies of all of the pages from
10887 * the source object
10888 * or
10889 * the object was moved from the old
10890 * map entry; because the old map
10891 * entry was wired, all of the pages
10892 * were in the top-level object.
10893 * (XXX not true if we wire pages for
10894 * reading)
10895 */
10896 vm_object_lock(object);
10897
10898 m = vm_page_lookup(object, offset);
10899 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10900 m->vmp_absent) {
10901 panic("vm_map_copyout: wiring %p", m);
10902 }
10903
10904 prot = entry->protection;
10905
10906 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10907 prot) {
10908 prot |= VM_PROT_EXECUTE;
10909 }
10910
10911 type_of_fault = DBG_CACHE_HIT_FAULT;
10912
10913 fault_info.user_tag = VME_ALIAS(entry);
10914 fault_info.pmap_options = 0;
10915 if (entry->iokit_acct ||
10916 (!entry->is_sub_map && !entry->use_pmap)) {
10917 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10918 }
10919
10920 vm_fault_enter(m,
10921 dst_map->pmap,
10922 va,
10923 prot,
10924 prot,
10925 VM_PAGE_WIRED(m),
10926 FALSE, /* change_wiring */
10927 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10928 &fault_info,
10929 NULL, /* need_retry */
10930 &type_of_fault);
10931
10932 vm_object_unlock(object);
10933
10934 offset += PAGE_SIZE_64;
10935 va += PAGE_SIZE;
10936 }
10937 }
10938 }
10939
10940 after_adjustments:
10941
10942 /*
10943 * Correct the page alignment for the result
10944 */
10945
10946 *dst_addr = start + (copy->offset - vm_copy_start);
10947
10948 #if KASAN
10949 kasan_notify_address(*dst_addr, size);
10950 #endif
10951
10952 /*
10953 * Update the hints and the map size
10954 */
10955
10956 if (consume_on_success) {
10957 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10958 } else {
10959 SAVE_HINT_MAP_WRITE(dst_map, last);
10960 }
10961
10962 dst_map->size += size;
10963
10964 /*
10965 * Link in the copy
10966 */
10967
10968 if (consume_on_success) {
10969 vm_map_copy_insert(dst_map, last, copy);
10970 } else {
10971 vm_map_copy_remap(dst_map, last, copy, adjustment,
10972 cur_protection, max_protection,
10973 inheritance);
10974 }
10975
10976 vm_map_unlock(dst_map);
10977
10978 /*
10979 * XXX If wiring_required, call vm_map_pageable
10980 */
10981
10982 return KERN_SUCCESS;
10983 }
10984
10985 /*
10986 * Routine: vm_map_copyin
10987 *
10988 * Description:
10989 * see vm_map_copyin_common. Exported via Unsupported.exports.
10990 *
10991 */
10992
10993 #undef vm_map_copyin
10994
10995 kern_return_t
10996 vm_map_copyin(
10997 vm_map_t src_map,
10998 vm_map_address_t src_addr,
10999 vm_map_size_t len,
11000 boolean_t src_destroy,
11001 vm_map_copy_t *copy_result) /* OUT */
11002 {
11003 return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11004 FALSE, copy_result, FALSE);
11005 }
11006
11007 /*
11008 * Routine: vm_map_copyin_common
11009 *
11010 * Description:
11011 * Copy the specified region (src_addr, len) from the
11012 * source address space (src_map), possibly removing
11013 * the region from the source address space (src_destroy).
11014 *
11015 * Returns:
11016 * A vm_map_copy_t object (copy_result), suitable for
11017 * insertion into another address space (using vm_map_copyout),
11018 * copying over another address space region (using
11019 * vm_map_copy_overwrite). If the copy is unused, it
11020 * should be destroyed (using vm_map_copy_discard).
11021 *
11022 * In/out conditions:
11023 * The source map should not be locked on entry.
11024 */
11025
11026 typedef struct submap_map {
11027 vm_map_t parent_map;
11028 vm_map_offset_t base_start;
11029 vm_map_offset_t base_end;
11030 vm_map_size_t base_len;
11031 struct submap_map *next;
11032 } submap_map_t;
11033
11034 kern_return_t
11035 vm_map_copyin_common(
11036 vm_map_t src_map,
11037 vm_map_address_t src_addr,
11038 vm_map_size_t len,
11039 boolean_t src_destroy,
11040 __unused boolean_t src_volatile,
11041 vm_map_copy_t *copy_result, /* OUT */
11042 boolean_t use_maxprot)
11043 {
11044 int flags;
11045
11046 flags = 0;
11047 if (src_destroy) {
11048 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11049 }
11050 if (use_maxprot) {
11051 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11052 }
11053 return vm_map_copyin_internal(src_map,
11054 src_addr,
11055 len,
11056 flags,
11057 copy_result);
11058 }
11059 kern_return_t
11060 vm_map_copyin_internal(
11061 vm_map_t src_map,
11062 vm_map_address_t src_addr,
11063 vm_map_size_t len,
11064 int flags,
11065 vm_map_copy_t *copy_result) /* OUT */
11066 {
11067 vm_map_entry_t tmp_entry; /* Result of last map lookup --
11068 * in multi-level lookup, this
11069 * entry contains the actual
11070 * vm_object/offset.
11071 */
11072 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
11073
11074 vm_map_offset_t src_start; /* Start of current entry --
11075 * where copy is taking place now
11076 */
11077 vm_map_offset_t src_end; /* End of entire region to be
11078 * copied */
11079 vm_map_offset_t src_base;
11080 vm_map_t base_map = src_map;
11081 boolean_t map_share = FALSE;
11082 submap_map_t *parent_maps = NULL;
11083
11084 vm_map_copy_t copy; /* Resulting copy */
11085 vm_map_address_t copy_addr;
11086 vm_map_size_t copy_size;
11087 boolean_t src_destroy;
11088 boolean_t use_maxprot;
11089 boolean_t preserve_purgeable;
11090 boolean_t entry_was_shared;
11091 vm_map_entry_t saved_src_entry;
11092
11093 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11094 return KERN_INVALID_ARGUMENT;
11095 }
11096
11097 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11098 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11099 preserve_purgeable =
11100 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11101
11102 /*
11103 * Check for copies of zero bytes.
11104 */
11105
11106 if (len == 0) {
11107 *copy_result = VM_MAP_COPY_NULL;
11108 return KERN_SUCCESS;
11109 }
11110
11111 /*
11112 * Check that the end address doesn't overflow
11113 */
11114 src_end = src_addr + len;
11115 if (src_end < src_addr) {
11116 return KERN_INVALID_ADDRESS;
11117 }
11118
11119 /*
11120 * Compute (page aligned) start and end of region
11121 */
11122 src_start = vm_map_trunc_page(src_addr,
11123 VM_MAP_PAGE_MASK(src_map));
11124 src_end = vm_map_round_page(src_end,
11125 VM_MAP_PAGE_MASK(src_map));
11126
11127 /*
11128 * If the copy is sufficiently small, use a kernel buffer instead
11129 * of making a virtual copy. The theory being that the cost of
11130 * setting up VM (and taking C-O-W faults) dominates the copy costs
11131 * for small regions.
11132 */
11133 if ((len < msg_ool_size_small) &&
11134 !use_maxprot &&
11135 !preserve_purgeable &&
11136 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11137 /*
11138 * Since the "msg_ool_size_small" threshold was increased and
11139 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11140 * address space limits, we revert to doing a virtual copy if the
11141 * copied range goes beyond those limits. Otherwise, mach_vm_read()
11142 * of the commpage would now fail when it used to work.
11143 */
11144 (src_start >= vm_map_min(src_map) &&
11145 src_start < vm_map_max(src_map) &&
11146 src_end >= vm_map_min(src_map) &&
11147 src_end < vm_map_max(src_map))) {
11148 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11149 src_destroy, copy_result);
11150 }
11151
11152 /*
11153 * Allocate a header element for the list.
11154 *
11155 * Use the start and end in the header to
11156 * remember the endpoints prior to rounding.
11157 */
11158
11159 copy = vm_map_copy_allocate();
11160 copy->type = VM_MAP_COPY_ENTRY_LIST;
11161 copy->cpy_hdr.entries_pageable = TRUE;
11162 #if 00
11163 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
11164 #else
11165 /*
11166 * The copy entries can be broken down for a variety of reasons,
11167 * so we can't guarantee that they will remain map-aligned...
11168 * Will need to adjust the first copy_entry's "vme_start" and
11169 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
11170 * rather than the original map's alignment.
11171 */
11172 copy->cpy_hdr.page_shift = PAGE_SHIFT;
11173 #endif
11174
11175 vm_map_store_init( &(copy->cpy_hdr));
11176
11177 copy->offset = src_addr;
11178 copy->size = len;
11179
11180 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11181
11182 #define RETURN(x) \
11183 MACRO_BEGIN \
11184 vm_map_unlock(src_map); \
11185 if(src_map != base_map) \
11186 vm_map_deallocate(src_map); \
11187 if (new_entry != VM_MAP_ENTRY_NULL) \
11188 vm_map_copy_entry_dispose(copy,new_entry); \
11189 vm_map_copy_discard(copy); \
11190 { \
11191 submap_map_t *_ptr; \
11192 \
11193 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11194 parent_maps=parent_maps->next; \
11195 if (_ptr->parent_map != base_map) \
11196 vm_map_deallocate(_ptr->parent_map); \
11197 kfree(_ptr, sizeof(submap_map_t)); \
11198 } \
11199 } \
11200 MACRO_RETURN(x); \
11201 MACRO_END
11202
11203 /*
11204 * Find the beginning of the region.
11205 */
11206
11207 vm_map_lock(src_map);
11208
11209 /*
11210 * Lookup the original "src_addr" rather than the truncated
11211 * "src_start", in case "src_start" falls in a non-map-aligned
11212 * map entry *before* the map entry that contains "src_addr"...
11213 */
11214 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11215 RETURN(KERN_INVALID_ADDRESS);
11216 }
11217 if (!tmp_entry->is_sub_map) {
11218 /*
11219 * ... but clip to the map-rounded "src_start" rather than
11220 * "src_addr" to preserve map-alignment. We'll adjust the
11221 * first copy entry at the end, if needed.
11222 */
11223 vm_map_clip_start(src_map, tmp_entry, src_start);
11224 }
11225 if (src_start < tmp_entry->vme_start) {
11226 /*
11227 * Move "src_start" up to the start of the
11228 * first map entry to copy.
11229 */
11230 src_start = tmp_entry->vme_start;
11231 }
11232 /* set for later submap fix-up */
11233 copy_addr = src_start;
11234
11235 /*
11236 * Go through entries until we get to the end.
11237 */
11238
11239 while (TRUE) {
11240 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
11241 vm_map_size_t src_size; /* Size of source
11242 * map entry (in both
11243 * maps)
11244 */
11245
11246 vm_object_t src_object; /* Object to copy */
11247 vm_object_offset_t src_offset;
11248
11249 boolean_t src_needs_copy; /* Should source map
11250 * be made read-only
11251 * for copy-on-write?
11252 */
11253
11254 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
11255
11256 boolean_t was_wired; /* Was source wired? */
11257 vm_map_version_t version; /* Version before locks
11258 * dropped to make copy
11259 */
11260 kern_return_t result; /* Return value from
11261 * copy_strategically.
11262 */
11263 while (tmp_entry->is_sub_map) {
11264 vm_map_size_t submap_len;
11265 submap_map_t *ptr;
11266
11267 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11268 ptr->next = parent_maps;
11269 parent_maps = ptr;
11270 ptr->parent_map = src_map;
11271 ptr->base_start = src_start;
11272 ptr->base_end = src_end;
11273 submap_len = tmp_entry->vme_end - src_start;
11274 if (submap_len > (src_end - src_start)) {
11275 submap_len = src_end - src_start;
11276 }
11277 ptr->base_len = submap_len;
11278
11279 src_start -= tmp_entry->vme_start;
11280 src_start += VME_OFFSET(tmp_entry);
11281 src_end = src_start + submap_len;
11282 src_map = VME_SUBMAP(tmp_entry);
11283 vm_map_lock(src_map);
11284 /* keep an outstanding reference for all maps in */
11285 /* the parents tree except the base map */
11286 vm_map_reference(src_map);
11287 vm_map_unlock(ptr->parent_map);
11288 if (!vm_map_lookup_entry(
11289 src_map, src_start, &tmp_entry)) {
11290 RETURN(KERN_INVALID_ADDRESS);
11291 }
11292 map_share = TRUE;
11293 if (!tmp_entry->is_sub_map) {
11294 vm_map_clip_start(src_map, tmp_entry, src_start);
11295 }
11296 src_entry = tmp_entry;
11297 }
11298 /* we are now in the lowest level submap... */
11299
11300 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11301 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11302 /* This is not, supported for now.In future */
11303 /* we will need to detect the phys_contig */
11304 /* condition and then upgrade copy_slowly */
11305 /* to do physical copy from the device mem */
11306 /* based object. We can piggy-back off of */
11307 /* the was wired boolean to set-up the */
11308 /* proper handling */
11309 RETURN(KERN_PROTECTION_FAILURE);
11310 }
11311 /*
11312 * Create a new address map entry to hold the result.
11313 * Fill in the fields from the appropriate source entries.
11314 * We must unlock the source map to do this if we need
11315 * to allocate a map entry.
11316 */
11317 if (new_entry == VM_MAP_ENTRY_NULL) {
11318 version.main_timestamp = src_map->timestamp;
11319 vm_map_unlock(src_map);
11320
11321 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11322
11323 vm_map_lock(src_map);
11324 if ((version.main_timestamp + 1) != src_map->timestamp) {
11325 if (!vm_map_lookup_entry(src_map, src_start,
11326 &tmp_entry)) {
11327 RETURN(KERN_INVALID_ADDRESS);
11328 }
11329 if (!tmp_entry->is_sub_map) {
11330 vm_map_clip_start(src_map, tmp_entry, src_start);
11331 }
11332 continue; /* restart w/ new tmp_entry */
11333 }
11334 }
11335
11336 /*
11337 * Verify that the region can be read.
11338 */
11339 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11340 !use_maxprot) ||
11341 (src_entry->max_protection & VM_PROT_READ) == 0) {
11342 RETURN(KERN_PROTECTION_FAILURE);
11343 }
11344
11345 /*
11346 * Clip against the endpoints of the entire region.
11347 */
11348
11349 vm_map_clip_end(src_map, src_entry, src_end);
11350
11351 src_size = src_entry->vme_end - src_start;
11352 src_object = VME_OBJECT(src_entry);
11353 src_offset = VME_OFFSET(src_entry);
11354 was_wired = (src_entry->wired_count != 0);
11355
11356 vm_map_entry_copy(new_entry, src_entry);
11357 if (new_entry->is_sub_map) {
11358 /* clr address space specifics */
11359 new_entry->use_pmap = FALSE;
11360 } else {
11361 /*
11362 * We're dealing with a copy-on-write operation,
11363 * so the resulting mapping should not inherit the
11364 * original mapping's accounting settings.
11365 * "iokit_acct" should have been cleared in
11366 * vm_map_entry_copy().
11367 * "use_pmap" should be reset to its default (TRUE)
11368 * so that the new mapping gets accounted for in
11369 * the task's memory footprint.
11370 */
11371 assert(!new_entry->iokit_acct);
11372 new_entry->use_pmap = TRUE;
11373 }
11374
11375 /*
11376 * Attempt non-blocking copy-on-write optimizations.
11377 */
11378
11379 if (src_destroy &&
11380 (src_object == VM_OBJECT_NULL ||
11381 (src_object->internal &&
11382 src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11383 src_entry->vme_start <= src_addr &&
11384 src_entry->vme_end >= src_end &&
11385 !map_share))) {
11386 /*
11387 * If we are destroying the source, and the object
11388 * is internal, we can move the object reference
11389 * from the source to the copy. The copy is
11390 * copy-on-write only if the source is.
11391 * We make another reference to the object, because
11392 * destroying the source entry will deallocate it.
11393 *
11394 * This memory transfer has to be atomic (to prevent
11395 * the VM object from being shared or copied while
11396 * it's being moved here), so we can only do this
11397 * if we won't have to unlock the VM map, i.e. the
11398 * entire range must be covered by this map entry.
11399 */
11400 vm_object_reference(src_object);
11401
11402 /*
11403 * Copy is always unwired. vm_map_copy_entry
11404 * set its wired count to zero.
11405 */
11406
11407 goto CopySuccessful;
11408 }
11409
11410
11411 RestartCopy:
11412 if ((src_object == VM_OBJECT_NULL ||
11413 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
11414 vm_object_copy_quickly(
11415 VME_OBJECT_PTR(new_entry),
11416 src_offset,
11417 src_size,
11418 &src_needs_copy,
11419 &new_entry_needs_copy)) {
11420 new_entry->needs_copy = new_entry_needs_copy;
11421
11422 /*
11423 * Handle copy-on-write obligations
11424 */
11425
11426 if (src_needs_copy && !tmp_entry->needs_copy) {
11427 vm_prot_t prot;
11428
11429 prot = src_entry->protection & ~VM_PROT_WRITE;
11430
11431 if (override_nx(src_map, VME_ALIAS(src_entry))
11432 && prot) {
11433 prot |= VM_PROT_EXECUTE;
11434 }
11435
11436 vm_object_pmap_protect(
11437 src_object,
11438 src_offset,
11439 src_size,
11440 (src_entry->is_shared ?
11441 PMAP_NULL
11442 : src_map->pmap),
11443 src_entry->vme_start,
11444 prot);
11445
11446 assert(tmp_entry->wired_count == 0);
11447 tmp_entry->needs_copy = TRUE;
11448 }
11449
11450 /*
11451 * The map has never been unlocked, so it's safe
11452 * to move to the next entry rather than doing
11453 * another lookup.
11454 */
11455
11456 goto CopySuccessful;
11457 }
11458
11459 entry_was_shared = tmp_entry->is_shared;
11460
11461 /*
11462 * Take an object reference, so that we may
11463 * release the map lock(s).
11464 */
11465
11466 assert(src_object != VM_OBJECT_NULL);
11467 vm_object_reference(src_object);
11468
11469 /*
11470 * Record the timestamp for later verification.
11471 * Unlock the map.
11472 */
11473
11474 version.main_timestamp = src_map->timestamp;
11475 vm_map_unlock(src_map); /* Increments timestamp once! */
11476 saved_src_entry = src_entry;
11477 tmp_entry = VM_MAP_ENTRY_NULL;
11478 src_entry = VM_MAP_ENTRY_NULL;
11479
11480 /*
11481 * Perform the copy
11482 */
11483
11484 if (was_wired) {
11485 CopySlowly:
11486 vm_object_lock(src_object);
11487 result = vm_object_copy_slowly(
11488 src_object,
11489 src_offset,
11490 src_size,
11491 THREAD_UNINT,
11492 VME_OBJECT_PTR(new_entry));
11493 VME_OFFSET_SET(new_entry, 0);
11494 new_entry->needs_copy = FALSE;
11495 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11496 (entry_was_shared || map_share)) {
11497 vm_object_t new_object;
11498
11499 vm_object_lock_shared(src_object);
11500 new_object = vm_object_copy_delayed(
11501 src_object,
11502 src_offset,
11503 src_size,
11504 TRUE);
11505 if (new_object == VM_OBJECT_NULL) {
11506 goto CopySlowly;
11507 }
11508
11509 VME_OBJECT_SET(new_entry, new_object);
11510 assert(new_entry->wired_count == 0);
11511 new_entry->needs_copy = TRUE;
11512 assert(!new_entry->iokit_acct);
11513 assert(new_object->purgable == VM_PURGABLE_DENY);
11514 assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
11515 result = KERN_SUCCESS;
11516 } else {
11517 vm_object_offset_t new_offset;
11518 new_offset = VME_OFFSET(new_entry);
11519 result = vm_object_copy_strategically(src_object,
11520 src_offset,
11521 src_size,
11522 VME_OBJECT_PTR(new_entry),
11523 &new_offset,
11524 &new_entry_needs_copy);
11525 if (new_offset != VME_OFFSET(new_entry)) {
11526 VME_OFFSET_SET(new_entry, new_offset);
11527 }
11528
11529 new_entry->needs_copy = new_entry_needs_copy;
11530 }
11531
11532 if (result == KERN_SUCCESS &&
11533 preserve_purgeable &&
11534 src_object->purgable != VM_PURGABLE_DENY) {
11535 vm_object_t new_object;
11536
11537 new_object = VME_OBJECT(new_entry);
11538 assert(new_object != src_object);
11539 vm_object_lock(new_object);
11540 assert(new_object->ref_count == 1);
11541 assert(new_object->shadow == VM_OBJECT_NULL);
11542 assert(new_object->copy == VM_OBJECT_NULL);
11543 assert(new_object->vo_owner == NULL);
11544
11545 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
11546 new_object->true_share = TRUE;
11547 /* start as non-volatile with no owner... */
11548 new_object->purgable = VM_PURGABLE_NONVOLATILE;
11549 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
11550 /* ... and move to src_object's purgeable state */
11551 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
11552 int state;
11553 state = src_object->purgable;
11554 vm_object_purgable_control(
11555 new_object,
11556 VM_PURGABLE_SET_STATE_FROM_KERNEL,
11557 &state);
11558 }
11559 vm_object_unlock(new_object);
11560 new_object = VM_OBJECT_NULL;
11561 /* no pmap accounting for purgeable objects */
11562 new_entry->use_pmap = FALSE;
11563 }
11564
11565 if (result != KERN_SUCCESS &&
11566 result != KERN_MEMORY_RESTART_COPY) {
11567 vm_map_lock(src_map);
11568 RETURN(result);
11569 }
11570
11571 /*
11572 * Throw away the extra reference
11573 */
11574
11575 vm_object_deallocate(src_object);
11576
11577 /*
11578 * Verify that the map has not substantially
11579 * changed while the copy was being made.
11580 */
11581
11582 vm_map_lock(src_map);
11583
11584 if ((version.main_timestamp + 1) == src_map->timestamp) {
11585 /* src_map hasn't changed: src_entry is still valid */
11586 src_entry = saved_src_entry;
11587 goto VerificationSuccessful;
11588 }
11589
11590 /*
11591 * Simple version comparison failed.
11592 *
11593 * Retry the lookup and verify that the
11594 * same object/offset are still present.
11595 *
11596 * [Note: a memory manager that colludes with
11597 * the calling task can detect that we have
11598 * cheated. While the map was unlocked, the
11599 * mapping could have been changed and restored.]
11600 */
11601
11602 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
11603 if (result != KERN_MEMORY_RESTART_COPY) {
11604 vm_object_deallocate(VME_OBJECT(new_entry));
11605 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
11606 /* reset accounting state */
11607 new_entry->iokit_acct = FALSE;
11608 new_entry->use_pmap = TRUE;
11609 }
11610 RETURN(KERN_INVALID_ADDRESS);
11611 }
11612
11613 src_entry = tmp_entry;
11614 vm_map_clip_start(src_map, src_entry, src_start);
11615
11616 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
11617 !use_maxprot) ||
11618 ((src_entry->max_protection & VM_PROT_READ) == 0)) {
11619 goto VerificationFailed;
11620 }
11621
11622 if (src_entry->vme_end < new_entry->vme_end) {
11623 /*
11624 * This entry might have been shortened
11625 * (vm_map_clip_end) or been replaced with
11626 * an entry that ends closer to "src_start"
11627 * than before.
11628 * Adjust "new_entry" accordingly; copying
11629 * less memory would be correct but we also
11630 * redo the copy (see below) if the new entry
11631 * no longer points at the same object/offset.
11632 */
11633 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
11634 VM_MAP_COPY_PAGE_MASK(copy)));
11635 new_entry->vme_end = src_entry->vme_end;
11636 src_size = new_entry->vme_end - src_start;
11637 } else if (src_entry->vme_end > new_entry->vme_end) {
11638 /*
11639 * This entry might have been extended
11640 * (vm_map_entry_simplify() or coalesce)
11641 * or been replaced with an entry that ends farther
11642 * from "src_start" than before.
11643 *
11644 * We've called vm_object_copy_*() only on
11645 * the previous <start:end> range, so we can't
11646 * just extend new_entry. We have to re-do
11647 * the copy based on the new entry as if it was
11648 * pointing at a different object/offset (see
11649 * "Verification failed" below).
11650 */
11651 }
11652
11653 if ((VME_OBJECT(src_entry) != src_object) ||
11654 (VME_OFFSET(src_entry) != src_offset) ||
11655 (src_entry->vme_end > new_entry->vme_end)) {
11656 /*
11657 * Verification failed.
11658 *
11659 * Start over with this top-level entry.
11660 */
11661
11662 VerificationFailed: ;
11663
11664 vm_object_deallocate(VME_OBJECT(new_entry));
11665 tmp_entry = src_entry;
11666 continue;
11667 }
11668
11669 /*
11670 * Verification succeeded.
11671 */
11672
11673 VerificationSuccessful:;
11674
11675 if (result == KERN_MEMORY_RESTART_COPY) {
11676 goto RestartCopy;
11677 }
11678
11679 /*
11680 * Copy succeeded.
11681 */
11682
11683 CopySuccessful: ;
11684
11685 /*
11686 * Link in the new copy entry.
11687 */
11688
11689 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11690 new_entry);
11691
11692 /*
11693 * Determine whether the entire region
11694 * has been copied.
11695 */
11696 src_base = src_start;
11697 src_start = new_entry->vme_end;
11698 new_entry = VM_MAP_ENTRY_NULL;
11699 while ((src_start >= src_end) && (src_end != 0)) {
11700 submap_map_t *ptr;
11701
11702 if (src_map == base_map) {
11703 /* back to the top */
11704 break;
11705 }
11706
11707 ptr = parent_maps;
11708 assert(ptr != NULL);
11709 parent_maps = parent_maps->next;
11710
11711 /* fix up the damage we did in that submap */
11712 vm_map_simplify_range(src_map,
11713 src_base,
11714 src_end);
11715
11716 vm_map_unlock(src_map);
11717 vm_map_deallocate(src_map);
11718 vm_map_lock(ptr->parent_map);
11719 src_map = ptr->parent_map;
11720 src_base = ptr->base_start;
11721 src_start = ptr->base_start + ptr->base_len;
11722 src_end = ptr->base_end;
11723 if (!vm_map_lookup_entry(src_map,
11724 src_start,
11725 &tmp_entry) &&
11726 (src_end > src_start)) {
11727 RETURN(KERN_INVALID_ADDRESS);
11728 }
11729 kfree(ptr, sizeof(submap_map_t));
11730 if (parent_maps == NULL) {
11731 map_share = FALSE;
11732 }
11733 src_entry = tmp_entry->vme_prev;
11734 }
11735
11736 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11737 (src_start >= src_addr + len) &&
11738 (src_addr + len != 0)) {
11739 /*
11740 * Stop copying now, even though we haven't reached
11741 * "src_end". We'll adjust the end of the last copy
11742 * entry at the end, if needed.
11743 *
11744 * If src_map's aligment is different from the
11745 * system's page-alignment, there could be
11746 * extra non-map-aligned map entries between
11747 * the original (non-rounded) "src_addr + len"
11748 * and the rounded "src_end".
11749 * We do not want to copy those map entries since
11750 * they're not part of the copied range.
11751 */
11752 break;
11753 }
11754
11755 if ((src_start >= src_end) && (src_end != 0)) {
11756 break;
11757 }
11758
11759 /*
11760 * Verify that there are no gaps in the region
11761 */
11762
11763 tmp_entry = src_entry->vme_next;
11764 if ((tmp_entry->vme_start != src_start) ||
11765 (tmp_entry == vm_map_to_entry(src_map))) {
11766 RETURN(KERN_INVALID_ADDRESS);
11767 }
11768 }
11769
11770 /*
11771 * If the source should be destroyed, do it now, since the
11772 * copy was successful.
11773 */
11774 if (src_destroy) {
11775 (void) vm_map_delete(
11776 src_map,
11777 vm_map_trunc_page(src_addr,
11778 VM_MAP_PAGE_MASK(src_map)),
11779 src_end,
11780 ((src_map == kernel_map) ?
11781 VM_MAP_REMOVE_KUNWIRE :
11782 VM_MAP_REMOVE_NO_FLAGS),
11783 VM_MAP_NULL);
11784 } else {
11785 /* fix up the damage we did in the base map */
11786 vm_map_simplify_range(
11787 src_map,
11788 vm_map_trunc_page(src_addr,
11789 VM_MAP_PAGE_MASK(src_map)),
11790 vm_map_round_page(src_end,
11791 VM_MAP_PAGE_MASK(src_map)));
11792 }
11793
11794 vm_map_unlock(src_map);
11795 tmp_entry = VM_MAP_ENTRY_NULL;
11796
11797 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11798 vm_map_offset_t original_start, original_offset, original_end;
11799
11800 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11801
11802 /* adjust alignment of first copy_entry's "vme_start" */
11803 tmp_entry = vm_map_copy_first_entry(copy);
11804 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11805 vm_map_offset_t adjustment;
11806
11807 original_start = tmp_entry->vme_start;
11808 original_offset = VME_OFFSET(tmp_entry);
11809
11810 /* map-align the start of the first copy entry... */
11811 adjustment = (tmp_entry->vme_start -
11812 vm_map_trunc_page(
11813 tmp_entry->vme_start,
11814 VM_MAP_PAGE_MASK(src_map)));
11815 tmp_entry->vme_start -= adjustment;
11816 VME_OFFSET_SET(tmp_entry,
11817 VME_OFFSET(tmp_entry) - adjustment);
11818 copy_addr -= adjustment;
11819 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11820 /* ... adjust for mis-aligned start of copy range */
11821 adjustment =
11822 (vm_map_trunc_page(copy->offset,
11823 PAGE_MASK) -
11824 vm_map_trunc_page(copy->offset,
11825 VM_MAP_PAGE_MASK(src_map)));
11826 if (adjustment) {
11827 assert(page_aligned(adjustment));
11828 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11829 tmp_entry->vme_start += adjustment;
11830 VME_OFFSET_SET(tmp_entry,
11831 (VME_OFFSET(tmp_entry) +
11832 adjustment));
11833 copy_addr += adjustment;
11834 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11835 }
11836
11837 /*
11838 * Assert that the adjustments haven't exposed
11839 * more than was originally copied...
11840 */
11841 assert(tmp_entry->vme_start >= original_start);
11842 assert(VME_OFFSET(tmp_entry) >= original_offset);
11843 /*
11844 * ... and that it did not adjust outside of a
11845 * a single 16K page.
11846 */
11847 assert(vm_map_trunc_page(tmp_entry->vme_start,
11848 VM_MAP_PAGE_MASK(src_map)) ==
11849 vm_map_trunc_page(original_start,
11850 VM_MAP_PAGE_MASK(src_map)));
11851 }
11852
11853 /* adjust alignment of last copy_entry's "vme_end" */
11854 tmp_entry = vm_map_copy_last_entry(copy);
11855 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11856 vm_map_offset_t adjustment;
11857
11858 original_end = tmp_entry->vme_end;
11859
11860 /* map-align the end of the last copy entry... */
11861 tmp_entry->vme_end =
11862 vm_map_round_page(tmp_entry->vme_end,
11863 VM_MAP_PAGE_MASK(src_map));
11864 /* ... adjust for mis-aligned end of copy range */
11865 adjustment =
11866 (vm_map_round_page((copy->offset +
11867 copy->size),
11868 VM_MAP_PAGE_MASK(src_map)) -
11869 vm_map_round_page((copy->offset +
11870 copy->size),
11871 PAGE_MASK));
11872 if (adjustment) {
11873 assert(page_aligned(adjustment));
11874 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11875 tmp_entry->vme_end -= adjustment;
11876 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11877 }
11878
11879 /*
11880 * Assert that the adjustments haven't exposed
11881 * more than was originally copied...
11882 */
11883 assert(tmp_entry->vme_end <= original_end);
11884 /*
11885 * ... and that it did not adjust outside of a
11886 * a single 16K page.
11887 */
11888 assert(vm_map_round_page(tmp_entry->vme_end,
11889 VM_MAP_PAGE_MASK(src_map)) ==
11890 vm_map_round_page(original_end,
11891 VM_MAP_PAGE_MASK(src_map)));
11892 }
11893 }
11894
11895 /* Fix-up start and end points in copy. This is necessary */
11896 /* when the various entries in the copy object were picked */
11897 /* up from different sub-maps */
11898
11899 tmp_entry = vm_map_copy_first_entry(copy);
11900 copy_size = 0; /* compute actual size */
11901 while (tmp_entry != vm_map_copy_to_entry(copy)) {
11902 assert(VM_MAP_PAGE_ALIGNED(
11903 copy_addr + (tmp_entry->vme_end -
11904 tmp_entry->vme_start),
11905 VM_MAP_COPY_PAGE_MASK(copy)));
11906 assert(VM_MAP_PAGE_ALIGNED(
11907 copy_addr,
11908 VM_MAP_COPY_PAGE_MASK(copy)));
11909
11910 /*
11911 * The copy_entries will be injected directly into the
11912 * destination map and might not be "map aligned" there...
11913 */
11914 tmp_entry->map_aligned = FALSE;
11915
11916 tmp_entry->vme_end = copy_addr +
11917 (tmp_entry->vme_end - tmp_entry->vme_start);
11918 tmp_entry->vme_start = copy_addr;
11919 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11920 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11921 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11922 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11923 }
11924
11925 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11926 copy_size < copy->size) {
11927 /*
11928 * The actual size of the VM map copy is smaller than what
11929 * was requested by the caller. This must be because some
11930 * PAGE_SIZE-sized pages are missing at the end of the last
11931 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11932 * The caller might not have been aware of those missing
11933 * pages and might not want to be aware of it, which is
11934 * fine as long as they don't try to access (and crash on)
11935 * those missing pages.
11936 * Let's adjust the size of the "copy", to avoid failing
11937 * in vm_map_copyout() or vm_map_copy_overwrite().
11938 */
11939 assert(vm_map_round_page(copy_size,
11940 VM_MAP_PAGE_MASK(src_map)) ==
11941 vm_map_round_page(copy->size,
11942 VM_MAP_PAGE_MASK(src_map)));
11943 copy->size = copy_size;
11944 }
11945
11946 *copy_result = copy;
11947 return KERN_SUCCESS;
11948
11949 #undef RETURN
11950 }
11951
11952 kern_return_t
11953 vm_map_copy_extract(
11954 vm_map_t src_map,
11955 vm_map_address_t src_addr,
11956 vm_map_size_t len,
11957 vm_map_copy_t *copy_result, /* OUT */
11958 vm_prot_t *cur_prot, /* OUT */
11959 vm_prot_t *max_prot)
11960 {
11961 vm_map_offset_t src_start, src_end;
11962 vm_map_copy_t copy;
11963 kern_return_t kr;
11964
11965 /*
11966 * Check for copies of zero bytes.
11967 */
11968
11969 if (len == 0) {
11970 *copy_result = VM_MAP_COPY_NULL;
11971 return KERN_SUCCESS;
11972 }
11973
11974 /*
11975 * Check that the end address doesn't overflow
11976 */
11977 src_end = src_addr + len;
11978 if (src_end < src_addr) {
11979 return KERN_INVALID_ADDRESS;
11980 }
11981
11982 /*
11983 * Compute (page aligned) start and end of region
11984 */
11985 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11986 src_end = vm_map_round_page(src_end, PAGE_MASK);
11987
11988 /*
11989 * Allocate a header element for the list.
11990 *
11991 * Use the start and end in the header to
11992 * remember the endpoints prior to rounding.
11993 */
11994
11995 copy = vm_map_copy_allocate();
11996 copy->type = VM_MAP_COPY_ENTRY_LIST;
11997 copy->cpy_hdr.entries_pageable = TRUE;
11998
11999 vm_map_store_init(&copy->cpy_hdr);
12000
12001 copy->offset = 0;
12002 copy->size = len;
12003
12004 kr = vm_map_remap_extract(src_map,
12005 src_addr,
12006 len,
12007 FALSE, /* copy */
12008 &copy->cpy_hdr,
12009 cur_prot,
12010 max_prot,
12011 VM_INHERIT_SHARE,
12012 TRUE, /* pageable */
12013 FALSE, /* same_map */
12014 VM_MAP_KERNEL_FLAGS_NONE);
12015 if (kr != KERN_SUCCESS) {
12016 vm_map_copy_discard(copy);
12017 return kr;
12018 }
12019
12020 *copy_result = copy;
12021 return KERN_SUCCESS;
12022 }
12023
12024 /*
12025 * vm_map_copyin_object:
12026 *
12027 * Create a copy object from an object.
12028 * Our caller donates an object reference.
12029 */
12030
12031 kern_return_t
12032 vm_map_copyin_object(
12033 vm_object_t object,
12034 vm_object_offset_t offset, /* offset of region in object */
12035 vm_object_size_t size, /* size of region in object */
12036 vm_map_copy_t *copy_result) /* OUT */
12037 {
12038 vm_map_copy_t copy; /* Resulting copy */
12039
12040 /*
12041 * We drop the object into a special copy object
12042 * that contains the object directly.
12043 */
12044
12045 copy = vm_map_copy_allocate();
12046 copy->type = VM_MAP_COPY_OBJECT;
12047 copy->cpy_object = object;
12048 copy->offset = offset;
12049 copy->size = size;
12050
12051 *copy_result = copy;
12052 return KERN_SUCCESS;
12053 }
12054
12055 static void
12056 vm_map_fork_share(
12057 vm_map_t old_map,
12058 vm_map_entry_t old_entry,
12059 vm_map_t new_map)
12060 {
12061 vm_object_t object;
12062 vm_map_entry_t new_entry;
12063
12064 /*
12065 * New sharing code. New map entry
12066 * references original object. Internal
12067 * objects use asynchronous copy algorithm for
12068 * future copies. First make sure we have
12069 * the right object. If we need a shadow,
12070 * or someone else already has one, then
12071 * make a new shadow and share it.
12072 */
12073
12074 object = VME_OBJECT(old_entry);
12075 if (old_entry->is_sub_map) {
12076 assert(old_entry->wired_count == 0);
12077 #ifndef NO_NESTED_PMAP
12078 if (old_entry->use_pmap) {
12079 kern_return_t result;
12080
12081 result = pmap_nest(new_map->pmap,
12082 (VME_SUBMAP(old_entry))->pmap,
12083 (addr64_t)old_entry->vme_start,
12084 (addr64_t)old_entry->vme_start,
12085 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12086 if (result) {
12087 panic("vm_map_fork_share: pmap_nest failed!");
12088 }
12089 }
12090 #endif /* NO_NESTED_PMAP */
12091 } else if (object == VM_OBJECT_NULL) {
12092 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12093 old_entry->vme_start));
12094 VME_OFFSET_SET(old_entry, 0);
12095 VME_OBJECT_SET(old_entry, object);
12096 old_entry->use_pmap = TRUE;
12097 // assert(!old_entry->needs_copy);
12098 } else if (object->copy_strategy !=
12099 MEMORY_OBJECT_COPY_SYMMETRIC) {
12100 /*
12101 * We are already using an asymmetric
12102 * copy, and therefore we already have
12103 * the right object.
12104 */
12105
12106 assert(!old_entry->needs_copy);
12107 } else if (old_entry->needs_copy || /* case 1 */
12108 object->shadowed || /* case 2 */
12109 (!object->true_share && /* case 3 */
12110 !old_entry->is_shared &&
12111 (object->vo_size >
12112 (vm_map_size_t)(old_entry->vme_end -
12113 old_entry->vme_start)))) {
12114 /*
12115 * We need to create a shadow.
12116 * There are three cases here.
12117 * In the first case, we need to
12118 * complete a deferred symmetrical
12119 * copy that we participated in.
12120 * In the second and third cases,
12121 * we need to create the shadow so
12122 * that changes that we make to the
12123 * object do not interfere with
12124 * any symmetrical copies which
12125 * have occured (case 2) or which
12126 * might occur (case 3).
12127 *
12128 * The first case is when we had
12129 * deferred shadow object creation
12130 * via the entry->needs_copy mechanism.
12131 * This mechanism only works when
12132 * only one entry points to the source
12133 * object, and we are about to create
12134 * a second entry pointing to the
12135 * same object. The problem is that
12136 * there is no way of mapping from
12137 * an object to the entries pointing
12138 * to it. (Deferred shadow creation
12139 * works with one entry because occurs
12140 * at fault time, and we walk from the
12141 * entry to the object when handling
12142 * the fault.)
12143 *
12144 * The second case is when the object
12145 * to be shared has already been copied
12146 * with a symmetric copy, but we point
12147 * directly to the object without
12148 * needs_copy set in our entry. (This
12149 * can happen because different ranges
12150 * of an object can be pointed to by
12151 * different entries. In particular,
12152 * a single entry pointing to an object
12153 * can be split by a call to vm_inherit,
12154 * which, combined with task_create, can
12155 * result in the different entries
12156 * having different needs_copy values.)
12157 * The shadowed flag in the object allows
12158 * us to detect this case. The problem
12159 * with this case is that if this object
12160 * has or will have shadows, then we
12161 * must not perform an asymmetric copy
12162 * of this object, since such a copy
12163 * allows the object to be changed, which
12164 * will break the previous symmetrical
12165 * copies (which rely upon the object
12166 * not changing). In a sense, the shadowed
12167 * flag says "don't change this object".
12168 * We fix this by creating a shadow
12169 * object for this object, and sharing
12170 * that. This works because we are free
12171 * to change the shadow object (and thus
12172 * to use an asymmetric copy strategy);
12173 * this is also semantically correct,
12174 * since this object is temporary, and
12175 * therefore a copy of the object is
12176 * as good as the object itself. (This
12177 * is not true for permanent objects,
12178 * since the pager needs to see changes,
12179 * which won't happen if the changes
12180 * are made to a copy.)
12181 *
12182 * The third case is when the object
12183 * to be shared has parts sticking
12184 * outside of the entry we're working
12185 * with, and thus may in the future
12186 * be subject to a symmetrical copy.
12187 * (This is a preemptive version of
12188 * case 2.)
12189 */
12190 VME_OBJECT_SHADOW(old_entry,
12191 (vm_map_size_t) (old_entry->vme_end -
12192 old_entry->vme_start));
12193
12194 /*
12195 * If we're making a shadow for other than
12196 * copy on write reasons, then we have
12197 * to remove write permission.
12198 */
12199
12200 if (!old_entry->needs_copy &&
12201 (old_entry->protection & VM_PROT_WRITE)) {
12202 vm_prot_t prot;
12203
12204 assert(!pmap_has_prot_policy(old_entry->protection));
12205
12206 prot = old_entry->protection & ~VM_PROT_WRITE;
12207
12208 assert(!pmap_has_prot_policy(prot));
12209
12210 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12211 prot |= VM_PROT_EXECUTE;
12212 }
12213
12214
12215 if (old_map->mapped_in_other_pmaps) {
12216 vm_object_pmap_protect(
12217 VME_OBJECT(old_entry),
12218 VME_OFFSET(old_entry),
12219 (old_entry->vme_end -
12220 old_entry->vme_start),
12221 PMAP_NULL,
12222 old_entry->vme_start,
12223 prot);
12224 } else {
12225 pmap_protect(old_map->pmap,
12226 old_entry->vme_start,
12227 old_entry->vme_end,
12228 prot);
12229 }
12230 }
12231
12232 old_entry->needs_copy = FALSE;
12233 object = VME_OBJECT(old_entry);
12234 }
12235
12236
12237 /*
12238 * If object was using a symmetric copy strategy,
12239 * change its copy strategy to the default
12240 * asymmetric copy strategy, which is copy_delay
12241 * in the non-norma case and copy_call in the
12242 * norma case. Bump the reference count for the
12243 * new entry.
12244 */
12245
12246 if (old_entry->is_sub_map) {
12247 vm_map_lock(VME_SUBMAP(old_entry));
12248 vm_map_reference(VME_SUBMAP(old_entry));
12249 vm_map_unlock(VME_SUBMAP(old_entry));
12250 } else {
12251 vm_object_lock(object);
12252 vm_object_reference_locked(object);
12253 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12254 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12255 }
12256 vm_object_unlock(object);
12257 }
12258
12259 /*
12260 * Clone the entry, using object ref from above.
12261 * Mark both entries as shared.
12262 */
12263
12264 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12265 * map or descendants */
12266 vm_map_entry_copy(new_entry, old_entry);
12267 old_entry->is_shared = TRUE;
12268 new_entry->is_shared = TRUE;
12269
12270 /*
12271 * We're dealing with a shared mapping, so the resulting mapping
12272 * should inherit some of the original mapping's accounting settings.
12273 * "iokit_acct" should have been cleared in vm_map_entry_copy().
12274 * "use_pmap" should stay the same as before (if it hasn't been reset
12275 * to TRUE when we cleared "iokit_acct").
12276 */
12277 assert(!new_entry->iokit_acct);
12278
12279 /*
12280 * If old entry's inheritence is VM_INHERIT_NONE,
12281 * the new entry is for corpse fork, remove the
12282 * write permission from the new entry.
12283 */
12284 if (old_entry->inheritance == VM_INHERIT_NONE) {
12285 new_entry->protection &= ~VM_PROT_WRITE;
12286 new_entry->max_protection &= ~VM_PROT_WRITE;
12287 }
12288
12289 /*
12290 * Insert the entry into the new map -- we
12291 * know we're inserting at the end of the new
12292 * map.
12293 */
12294
12295 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12296 VM_MAP_KERNEL_FLAGS_NONE);
12297
12298 /*
12299 * Update the physical map
12300 */
12301
12302 if (old_entry->is_sub_map) {
12303 /* Bill Angell pmap support goes here */
12304 } else {
12305 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12306 old_entry->vme_end - old_entry->vme_start,
12307 old_entry->vme_start);
12308 }
12309 }
12310
12311 static boolean_t
12312 vm_map_fork_copy(
12313 vm_map_t old_map,
12314 vm_map_entry_t *old_entry_p,
12315 vm_map_t new_map,
12316 int vm_map_copyin_flags)
12317 {
12318 vm_map_entry_t old_entry = *old_entry_p;
12319 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12320 vm_map_offset_t start = old_entry->vme_start;
12321 vm_map_copy_t copy;
12322 vm_map_entry_t last = vm_map_last_entry(new_map);
12323
12324 vm_map_unlock(old_map);
12325 /*
12326 * Use maxprot version of copyin because we
12327 * care about whether this memory can ever
12328 * be accessed, not just whether it's accessible
12329 * right now.
12330 */
12331 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12332 if (vm_map_copyin_internal(old_map, start, entry_size,
12333 vm_map_copyin_flags, &copy)
12334 != KERN_SUCCESS) {
12335 /*
12336 * The map might have changed while it
12337 * was unlocked, check it again. Skip
12338 * any blank space or permanently
12339 * unreadable region.
12340 */
12341 vm_map_lock(old_map);
12342 if (!vm_map_lookup_entry(old_map, start, &last) ||
12343 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12344 last = last->vme_next;
12345 }
12346 *old_entry_p = last;
12347
12348 /*
12349 * XXX For some error returns, want to
12350 * XXX skip to the next element. Note
12351 * that INVALID_ADDRESS and
12352 * PROTECTION_FAILURE are handled above.
12353 */
12354
12355 return FALSE;
12356 }
12357
12358 /*
12359 * Insert the copy into the new map
12360 */
12361
12362 vm_map_copy_insert(new_map, last, copy);
12363
12364 /*
12365 * Pick up the traversal at the end of
12366 * the copied region.
12367 */
12368
12369 vm_map_lock(old_map);
12370 start += entry_size;
12371 if (!vm_map_lookup_entry(old_map, start, &last)) {
12372 last = last->vme_next;
12373 } else {
12374 if (last->vme_start == start) {
12375 /*
12376 * No need to clip here and we don't
12377 * want to cause any unnecessary
12378 * unnesting...
12379 */
12380 } else {
12381 vm_map_clip_start(old_map, last, start);
12382 }
12383 }
12384 *old_entry_p = last;
12385
12386 return TRUE;
12387 }
12388
12389 /*
12390 * vm_map_fork:
12391 *
12392 * Create and return a new map based on the old
12393 * map, according to the inheritance values on the
12394 * regions in that map and the options.
12395 *
12396 * The source map must not be locked.
12397 */
12398 vm_map_t
12399 vm_map_fork(
12400 ledger_t ledger,
12401 vm_map_t old_map,
12402 int options)
12403 {
12404 pmap_t new_pmap;
12405 vm_map_t new_map;
12406 vm_map_entry_t old_entry;
12407 vm_map_size_t new_size = 0, entry_size;
12408 vm_map_entry_t new_entry;
12409 boolean_t src_needs_copy;
12410 boolean_t new_entry_needs_copy;
12411 boolean_t pmap_is64bit;
12412 int vm_map_copyin_flags;
12413 vm_inherit_t old_entry_inheritance;
12414 int map_create_options;
12415 kern_return_t footprint_collect_kr;
12416
12417 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12418 VM_MAP_FORK_PRESERVE_PURGEABLE |
12419 VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12420 /* unsupported option */
12421 return VM_MAP_NULL;
12422 }
12423
12424 pmap_is64bit =
12425 #if defined(__i386__) || defined(__x86_64__)
12426 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12427 #elif defined(__arm64__)
12428 old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12429 #elif defined(__arm__)
12430 FALSE;
12431 #else
12432 #error Unknown architecture.
12433 #endif
12434
12435 unsigned int pmap_flags = 0;
12436 pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12437 #if defined(HAS_APPLE_PAC)
12438 pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12439 #endif
12440 new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12441
12442 vm_map_reference_swap(old_map);
12443 vm_map_lock(old_map);
12444
12445 map_create_options = 0;
12446 if (old_map->hdr.entries_pageable) {
12447 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12448 }
12449 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12450 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12451 footprint_collect_kr = KERN_SUCCESS;
12452 }
12453 new_map = vm_map_create_options(new_pmap,
12454 old_map->min_offset,
12455 old_map->max_offset,
12456 map_create_options);
12457 vm_map_lock(new_map);
12458 vm_commit_pagezero_status(new_map);
12459 /* inherit the parent map's page size */
12460 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12461 for (
12462 old_entry = vm_map_first_entry(old_map);
12463 old_entry != vm_map_to_entry(old_map);
12464 ) {
12465 entry_size = old_entry->vme_end - old_entry->vme_start;
12466
12467 old_entry_inheritance = old_entry->inheritance;
12468 /*
12469 * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12470 * share VM_INHERIT_NONE entries that are not backed by a
12471 * device pager.
12472 */
12473 if (old_entry_inheritance == VM_INHERIT_NONE &&
12474 (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12475 !(!old_entry->is_sub_map &&
12476 VME_OBJECT(old_entry) != NULL &&
12477 VME_OBJECT(old_entry)->pager != NULL &&
12478 is_device_pager_ops(
12479 VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
12480 old_entry_inheritance = VM_INHERIT_SHARE;
12481 }
12482
12483 if (old_entry_inheritance != VM_INHERIT_NONE &&
12484 (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
12485 footprint_collect_kr == KERN_SUCCESS) {
12486 /*
12487 * The corpse won't have old_map->pmap to query
12488 * footprint information, so collect that data now
12489 * and store it in new_map->vmmap_corpse_footprint
12490 * for later autopsy.
12491 */
12492 footprint_collect_kr =
12493 vm_map_corpse_footprint_collect(old_map,
12494 old_entry,
12495 new_map);
12496 }
12497
12498 switch (old_entry_inheritance) {
12499 case VM_INHERIT_NONE:
12500 break;
12501
12502 case VM_INHERIT_SHARE:
12503 vm_map_fork_share(old_map, old_entry, new_map);
12504 new_size += entry_size;
12505 break;
12506
12507 case VM_INHERIT_COPY:
12508
12509 /*
12510 * Inline the copy_quickly case;
12511 * upon failure, fall back on call
12512 * to vm_map_fork_copy.
12513 */
12514
12515 if (old_entry->is_sub_map) {
12516 break;
12517 }
12518 if ((old_entry->wired_count != 0) ||
12519 ((VME_OBJECT(old_entry) != NULL) &&
12520 (VME_OBJECT(old_entry)->true_share))) {
12521 goto slow_vm_map_fork_copy;
12522 }
12523
12524 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
12525 vm_map_entry_copy(new_entry, old_entry);
12526 if (new_entry->is_sub_map) {
12527 /* clear address space specifics */
12528 new_entry->use_pmap = FALSE;
12529 } else {
12530 /*
12531 * We're dealing with a copy-on-write operation,
12532 * so the resulting mapping should not inherit
12533 * the original mapping's accounting settings.
12534 * "iokit_acct" should have been cleared in
12535 * vm_map_entry_copy().
12536 * "use_pmap" should be reset to its default
12537 * (TRUE) so that the new mapping gets
12538 * accounted for in the task's memory footprint.
12539 */
12540 assert(!new_entry->iokit_acct);
12541 new_entry->use_pmap = TRUE;
12542 }
12543
12544 if (!vm_object_copy_quickly(
12545 VME_OBJECT_PTR(new_entry),
12546 VME_OFFSET(old_entry),
12547 (old_entry->vme_end -
12548 old_entry->vme_start),
12549 &src_needs_copy,
12550 &new_entry_needs_copy)) {
12551 vm_map_entry_dispose(new_map, new_entry);
12552 goto slow_vm_map_fork_copy;
12553 }
12554
12555 /*
12556 * Handle copy-on-write obligations
12557 */
12558
12559 if (src_needs_copy && !old_entry->needs_copy) {
12560 vm_prot_t prot;
12561
12562 assert(!pmap_has_prot_policy(old_entry->protection));
12563
12564 prot = old_entry->protection & ~VM_PROT_WRITE;
12565
12566 if (override_nx(old_map, VME_ALIAS(old_entry))
12567 && prot) {
12568 prot |= VM_PROT_EXECUTE;
12569 }
12570
12571 assert(!pmap_has_prot_policy(prot));
12572
12573 vm_object_pmap_protect(
12574 VME_OBJECT(old_entry),
12575 VME_OFFSET(old_entry),
12576 (old_entry->vme_end -
12577 old_entry->vme_start),
12578 ((old_entry->is_shared
12579 || old_map->mapped_in_other_pmaps)
12580 ? PMAP_NULL :
12581 old_map->pmap),
12582 old_entry->vme_start,
12583 prot);
12584
12585 assert(old_entry->wired_count == 0);
12586 old_entry->needs_copy = TRUE;
12587 }
12588 new_entry->needs_copy = new_entry_needs_copy;
12589
12590 /*
12591 * Insert the entry at the end
12592 * of the map.
12593 */
12594
12595 vm_map_store_entry_link(new_map,
12596 vm_map_last_entry(new_map),
12597 new_entry,
12598 VM_MAP_KERNEL_FLAGS_NONE);
12599 new_size += entry_size;
12600 break;
12601
12602 slow_vm_map_fork_copy:
12603 vm_map_copyin_flags = 0;
12604 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
12605 vm_map_copyin_flags |=
12606 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
12607 }
12608 if (vm_map_fork_copy(old_map,
12609 &old_entry,
12610 new_map,
12611 vm_map_copyin_flags)) {
12612 new_size += entry_size;
12613 }
12614 continue;
12615 }
12616 old_entry = old_entry->vme_next;
12617 }
12618
12619 #if defined(__arm64__)
12620 pmap_insert_sharedpage(new_map->pmap);
12621 #endif
12622
12623 new_map->size = new_size;
12624
12625 if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12626 vm_map_corpse_footprint_collect_done(new_map);
12627 }
12628
12629 vm_map_unlock(new_map);
12630 vm_map_unlock(old_map);
12631 vm_map_deallocate(old_map);
12632
12633 return new_map;
12634 }
12635
12636 /*
12637 * vm_map_exec:
12638 *
12639 * Setup the "new_map" with the proper execution environment according
12640 * to the type of executable (platform, 64bit, chroot environment).
12641 * Map the comm page and shared region, etc...
12642 */
12643 kern_return_t
12644 vm_map_exec(
12645 vm_map_t new_map,
12646 task_t task,
12647 boolean_t is64bit,
12648 void *fsroot,
12649 cpu_type_t cpu,
12650 cpu_subtype_t cpu_subtype)
12651 {
12652 SHARED_REGION_TRACE_DEBUG(
12653 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
12654 (void *)VM_KERNEL_ADDRPERM(current_task()),
12655 (void *)VM_KERNEL_ADDRPERM(new_map),
12656 (void *)VM_KERNEL_ADDRPERM(task),
12657 (void *)VM_KERNEL_ADDRPERM(fsroot),
12658 cpu,
12659 cpu_subtype));
12660 (void) vm_commpage_enter(new_map, task, is64bit);
12661 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype);
12662 SHARED_REGION_TRACE_DEBUG(
12663 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
12664 (void *)VM_KERNEL_ADDRPERM(current_task()),
12665 (void *)VM_KERNEL_ADDRPERM(new_map),
12666 (void *)VM_KERNEL_ADDRPERM(task),
12667 (void *)VM_KERNEL_ADDRPERM(fsroot),
12668 cpu,
12669 cpu_subtype));
12670 return KERN_SUCCESS;
12671 }
12672
12673 /*
12674 * vm_map_lookup_locked:
12675 *
12676 * Finds the VM object, offset, and
12677 * protection for a given virtual address in the
12678 * specified map, assuming a page fault of the
12679 * type specified.
12680 *
12681 * Returns the (object, offset, protection) for
12682 * this address, whether it is wired down, and whether
12683 * this map has the only reference to the data in question.
12684 * In order to later verify this lookup, a "version"
12685 * is returned.
12686 *
12687 * The map MUST be locked by the caller and WILL be
12688 * locked on exit. In order to guarantee the
12689 * existence of the returned object, it is returned
12690 * locked.
12691 *
12692 * If a lookup is requested with "write protection"
12693 * specified, the map may be changed to perform virtual
12694 * copying operations, although the data referenced will
12695 * remain the same.
12696 */
12697 kern_return_t
12698 vm_map_lookup_locked(
12699 vm_map_t *var_map, /* IN/OUT */
12700 vm_map_offset_t vaddr,
12701 vm_prot_t fault_type,
12702 int object_lock_type,
12703 vm_map_version_t *out_version, /* OUT */
12704 vm_object_t *object, /* OUT */
12705 vm_object_offset_t *offset, /* OUT */
12706 vm_prot_t *out_prot, /* OUT */
12707 boolean_t *wired, /* OUT */
12708 vm_object_fault_info_t fault_info, /* OUT */
12709 vm_map_t *real_map)
12710 {
12711 vm_map_entry_t entry;
12712 vm_map_t map = *var_map;
12713 vm_map_t old_map = *var_map;
12714 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
12715 vm_map_offset_t cow_parent_vaddr = 0;
12716 vm_map_offset_t old_start = 0;
12717 vm_map_offset_t old_end = 0;
12718 vm_prot_t prot;
12719 boolean_t mask_protections;
12720 boolean_t force_copy;
12721 vm_prot_t original_fault_type;
12722
12723 /*
12724 * VM_PROT_MASK means that the caller wants us to use "fault_type"
12725 * as a mask against the mapping's actual protections, not as an
12726 * absolute value.
12727 */
12728 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12729 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12730 fault_type &= VM_PROT_ALL;
12731 original_fault_type = fault_type;
12732
12733 *real_map = map;
12734
12735 RetryLookup:
12736 fault_type = original_fault_type;
12737
12738 /*
12739 * If the map has an interesting hint, try it before calling
12740 * full blown lookup routine.
12741 */
12742 entry = map->hint;
12743
12744 if ((entry == vm_map_to_entry(map)) ||
12745 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12746 vm_map_entry_t tmp_entry;
12747
12748 /*
12749 * Entry was either not a valid hint, or the vaddr
12750 * was not contained in the entry, so do a full lookup.
12751 */
12752 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12753 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12754 vm_map_unlock(cow_sub_map_parent);
12755 }
12756 if ((*real_map != map)
12757 && (*real_map != cow_sub_map_parent)) {
12758 vm_map_unlock(*real_map);
12759 }
12760 return KERN_INVALID_ADDRESS;
12761 }
12762
12763 entry = tmp_entry;
12764 }
12765 if (map == old_map) {
12766 old_start = entry->vme_start;
12767 old_end = entry->vme_end;
12768 }
12769
12770 /*
12771 * Handle submaps. Drop lock on upper map, submap is
12772 * returned locked.
12773 */
12774
12775 submap_recurse:
12776 if (entry->is_sub_map) {
12777 vm_map_offset_t local_vaddr;
12778 vm_map_offset_t end_delta;
12779 vm_map_offset_t start_delta;
12780 vm_map_entry_t submap_entry;
12781 vm_prot_t subentry_protection;
12782 vm_prot_t subentry_max_protection;
12783 boolean_t subentry_no_copy_on_read;
12784 boolean_t mapped_needs_copy = FALSE;
12785
12786 local_vaddr = vaddr;
12787
12788 if ((entry->use_pmap &&
12789 !((fault_type & VM_PROT_WRITE) ||
12790 force_copy))) {
12791 /* if real_map equals map we unlock below */
12792 if ((*real_map != map) &&
12793 (*real_map != cow_sub_map_parent)) {
12794 vm_map_unlock(*real_map);
12795 }
12796 *real_map = VME_SUBMAP(entry);
12797 }
12798
12799 if (entry->needs_copy &&
12800 ((fault_type & VM_PROT_WRITE) ||
12801 force_copy)) {
12802 if (!mapped_needs_copy) {
12803 if (vm_map_lock_read_to_write(map)) {
12804 vm_map_lock_read(map);
12805 *real_map = map;
12806 goto RetryLookup;
12807 }
12808 vm_map_lock_read(VME_SUBMAP(entry));
12809 *var_map = VME_SUBMAP(entry);
12810 cow_sub_map_parent = map;
12811 /* reset base to map before cow object */
12812 /* this is the map which will accept */
12813 /* the new cow object */
12814 old_start = entry->vme_start;
12815 old_end = entry->vme_end;
12816 cow_parent_vaddr = vaddr;
12817 mapped_needs_copy = TRUE;
12818 } else {
12819 vm_map_lock_read(VME_SUBMAP(entry));
12820 *var_map = VME_SUBMAP(entry);
12821 if ((cow_sub_map_parent != map) &&
12822 (*real_map != map)) {
12823 vm_map_unlock(map);
12824 }
12825 }
12826 } else {
12827 vm_map_lock_read(VME_SUBMAP(entry));
12828 *var_map = VME_SUBMAP(entry);
12829 /* leave map locked if it is a target */
12830 /* cow sub_map above otherwise, just */
12831 /* follow the maps down to the object */
12832 /* here we unlock knowing we are not */
12833 /* revisiting the map. */
12834 if ((*real_map != map) && (map != cow_sub_map_parent)) {
12835 vm_map_unlock_read(map);
12836 }
12837 }
12838
12839 map = *var_map;
12840
12841 /* calculate the offset in the submap for vaddr */
12842 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12843
12844 RetrySubMap:
12845 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12846 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
12847 vm_map_unlock(cow_sub_map_parent);
12848 }
12849 if ((*real_map != map)
12850 && (*real_map != cow_sub_map_parent)) {
12851 vm_map_unlock(*real_map);
12852 }
12853 *real_map = map;
12854 return KERN_INVALID_ADDRESS;
12855 }
12856
12857 /* find the attenuated shadow of the underlying object */
12858 /* on our target map */
12859
12860 /* in english the submap object may extend beyond the */
12861 /* region mapped by the entry or, may only fill a portion */
12862 /* of it. For our purposes, we only care if the object */
12863 /* doesn't fill. In this case the area which will */
12864 /* ultimately be clipped in the top map will only need */
12865 /* to be as big as the portion of the underlying entry */
12866 /* which is mapped */
12867 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12868 submap_entry->vme_start - VME_OFFSET(entry) : 0;
12869
12870 end_delta =
12871 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12872 submap_entry->vme_end ?
12873 0 : (VME_OFFSET(entry) +
12874 (old_end - old_start))
12875 - submap_entry->vme_end;
12876
12877 old_start += start_delta;
12878 old_end -= end_delta;
12879
12880 if (submap_entry->is_sub_map) {
12881 entry = submap_entry;
12882 vaddr = local_vaddr;
12883 goto submap_recurse;
12884 }
12885
12886 if (((fault_type & VM_PROT_WRITE) ||
12887 force_copy)
12888 && cow_sub_map_parent) {
12889 vm_object_t sub_object, copy_object;
12890 vm_object_offset_t copy_offset;
12891 vm_map_offset_t local_start;
12892 vm_map_offset_t local_end;
12893 boolean_t copied_slowly = FALSE;
12894
12895 if (vm_map_lock_read_to_write(map)) {
12896 vm_map_lock_read(map);
12897 old_start -= start_delta;
12898 old_end += end_delta;
12899 goto RetrySubMap;
12900 }
12901
12902
12903 sub_object = VME_OBJECT(submap_entry);
12904 if (sub_object == VM_OBJECT_NULL) {
12905 sub_object =
12906 vm_object_allocate(
12907 (vm_map_size_t)
12908 (submap_entry->vme_end -
12909 submap_entry->vme_start));
12910 VME_OBJECT_SET(submap_entry, sub_object);
12911 VME_OFFSET_SET(submap_entry, 0);
12912 assert(!submap_entry->is_sub_map);
12913 assert(submap_entry->use_pmap);
12914 }
12915 local_start = local_vaddr -
12916 (cow_parent_vaddr - old_start);
12917 local_end = local_vaddr +
12918 (old_end - cow_parent_vaddr);
12919 vm_map_clip_start(map, submap_entry, local_start);
12920 vm_map_clip_end(map, submap_entry, local_end);
12921 if (submap_entry->is_sub_map) {
12922 /* unnesting was done when clipping */
12923 assert(!submap_entry->use_pmap);
12924 }
12925
12926 /* This is the COW case, lets connect */
12927 /* an entry in our space to the underlying */
12928 /* object in the submap, bypassing the */
12929 /* submap. */
12930
12931
12932 if (submap_entry->wired_count != 0 ||
12933 (sub_object->copy_strategy ==
12934 MEMORY_OBJECT_COPY_NONE)) {
12935 vm_object_lock(sub_object);
12936 vm_object_copy_slowly(sub_object,
12937 VME_OFFSET(submap_entry),
12938 (submap_entry->vme_end -
12939 submap_entry->vme_start),
12940 FALSE,
12941 &copy_object);
12942 copied_slowly = TRUE;
12943 } else {
12944 /* set up shadow object */
12945 copy_object = sub_object;
12946 vm_object_lock(sub_object);
12947 vm_object_reference_locked(sub_object);
12948 sub_object->shadowed = TRUE;
12949 vm_object_unlock(sub_object);
12950
12951 assert(submap_entry->wired_count == 0);
12952 submap_entry->needs_copy = TRUE;
12953
12954 prot = submap_entry->protection;
12955 assert(!pmap_has_prot_policy(prot));
12956 prot = prot & ~VM_PROT_WRITE;
12957 assert(!pmap_has_prot_policy(prot));
12958
12959 if (override_nx(old_map,
12960 VME_ALIAS(submap_entry))
12961 && prot) {
12962 prot |= VM_PROT_EXECUTE;
12963 }
12964
12965 vm_object_pmap_protect(
12966 sub_object,
12967 VME_OFFSET(submap_entry),
12968 submap_entry->vme_end -
12969 submap_entry->vme_start,
12970 (submap_entry->is_shared
12971 || map->mapped_in_other_pmaps) ?
12972 PMAP_NULL : map->pmap,
12973 submap_entry->vme_start,
12974 prot);
12975 }
12976
12977 /*
12978 * Adjust the fault offset to the submap entry.
12979 */
12980 copy_offset = (local_vaddr -
12981 submap_entry->vme_start +
12982 VME_OFFSET(submap_entry));
12983
12984 /* This works diffently than the */
12985 /* normal submap case. We go back */
12986 /* to the parent of the cow map and*/
12987 /* clip out the target portion of */
12988 /* the sub_map, substituting the */
12989 /* new copy object, */
12990
12991 subentry_protection = submap_entry->protection;
12992 subentry_max_protection = submap_entry->max_protection;
12993 subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
12994 vm_map_unlock(map);
12995 submap_entry = NULL; /* not valid after map unlock */
12996
12997 local_start = old_start;
12998 local_end = old_end;
12999 map = cow_sub_map_parent;
13000 *var_map = cow_sub_map_parent;
13001 vaddr = cow_parent_vaddr;
13002 cow_sub_map_parent = NULL;
13003
13004 if (!vm_map_lookup_entry(map,
13005 vaddr, &entry)) {
13006 vm_object_deallocate(
13007 copy_object);
13008 vm_map_lock_write_to_read(map);
13009 return KERN_INVALID_ADDRESS;
13010 }
13011
13012 /* clip out the portion of space */
13013 /* mapped by the sub map which */
13014 /* corresponds to the underlying */
13015 /* object */
13016
13017 /*
13018 * Clip (and unnest) the smallest nested chunk
13019 * possible around the faulting address...
13020 */
13021 local_start = vaddr & ~(pmap_nesting_size_min - 1);
13022 local_end = local_start + pmap_nesting_size_min;
13023 /*
13024 * ... but don't go beyond the "old_start" to "old_end"
13025 * range, to avoid spanning over another VM region
13026 * with a possibly different VM object and/or offset.
13027 */
13028 if (local_start < old_start) {
13029 local_start = old_start;
13030 }
13031 if (local_end > old_end) {
13032 local_end = old_end;
13033 }
13034 /*
13035 * Adjust copy_offset to the start of the range.
13036 */
13037 copy_offset -= (vaddr - local_start);
13038
13039 vm_map_clip_start(map, entry, local_start);
13040 vm_map_clip_end(map, entry, local_end);
13041 if (entry->is_sub_map) {
13042 /* unnesting was done when clipping */
13043 assert(!entry->use_pmap);
13044 }
13045
13046 /* substitute copy object for */
13047 /* shared map entry */
13048 vm_map_deallocate(VME_SUBMAP(entry));
13049 assert(!entry->iokit_acct);
13050 entry->is_sub_map = FALSE;
13051 entry->use_pmap = TRUE;
13052 VME_OBJECT_SET(entry, copy_object);
13053
13054 /* propagate the submap entry's protections */
13055 if (entry->protection != VM_PROT_READ) {
13056 /*
13057 * Someone has already altered the top entry's
13058 * protections via vm_protect(VM_PROT_COPY).
13059 * Respect these new values and ignore the
13060 * submap entry's protections.
13061 */
13062 } else {
13063 /*
13064 * Regular copy-on-write: propagate the submap
13065 * entry's protections to the top map entry.
13066 */
13067 entry->protection |= subentry_protection;
13068 }
13069 entry->max_protection |= subentry_max_protection;
13070 /* propagate no_copy_on_read */
13071 entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13072
13073 if ((entry->protection & VM_PROT_WRITE) &&
13074 (entry->protection & VM_PROT_EXECUTE) &&
13075 #if !CONFIG_EMBEDDED
13076 map != kernel_map &&
13077 cs_process_enforcement(NULL) &&
13078 #endif /* !CONFIG_EMBEDDED */
13079 !(entry->used_for_jit)) {
13080 DTRACE_VM3(cs_wx,
13081 uint64_t, (uint64_t)entry->vme_start,
13082 uint64_t, (uint64_t)entry->vme_end,
13083 vm_prot_t, entry->protection);
13084 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13085 proc_selfpid(),
13086 (current_task()->bsd_info
13087 ? proc_name_address(current_task()->bsd_info)
13088 : "?"),
13089 __FUNCTION__);
13090 entry->protection &= ~VM_PROT_EXECUTE;
13091 }
13092
13093 if (copied_slowly) {
13094 VME_OFFSET_SET(entry, local_start - old_start);
13095 entry->needs_copy = FALSE;
13096 entry->is_shared = FALSE;
13097 } else {
13098 VME_OFFSET_SET(entry, copy_offset);
13099 assert(entry->wired_count == 0);
13100 entry->needs_copy = TRUE;
13101 if (entry->inheritance == VM_INHERIT_SHARE) {
13102 entry->inheritance = VM_INHERIT_COPY;
13103 }
13104 if (map != old_map) {
13105 entry->is_shared = TRUE;
13106 }
13107 }
13108 if (entry->inheritance == VM_INHERIT_SHARE) {
13109 entry->inheritance = VM_INHERIT_COPY;
13110 }
13111
13112 vm_map_lock_write_to_read(map);
13113 } else {
13114 if ((cow_sub_map_parent)
13115 && (cow_sub_map_parent != *real_map)
13116 && (cow_sub_map_parent != map)) {
13117 vm_map_unlock(cow_sub_map_parent);
13118 }
13119 entry = submap_entry;
13120 vaddr = local_vaddr;
13121 }
13122 }
13123
13124 /*
13125 * Check whether this task is allowed to have
13126 * this page.
13127 */
13128
13129 prot = entry->protection;
13130
13131 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13132 /*
13133 * HACK -- if not a stack, then allow execution
13134 */
13135 prot |= VM_PROT_EXECUTE;
13136 }
13137
13138 if (mask_protections) {
13139 fault_type &= prot;
13140 if (fault_type == VM_PROT_NONE) {
13141 goto protection_failure;
13142 }
13143 }
13144 if (((fault_type & prot) != fault_type)
13145 #if __arm64__
13146 /* prefetch abort in execute-only page */
13147 && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13148 #endif
13149 ) {
13150 protection_failure:
13151 if (*real_map != map) {
13152 vm_map_unlock(*real_map);
13153 }
13154 *real_map = map;
13155
13156 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13157 log_stack_execution_failure((addr64_t)vaddr, prot);
13158 }
13159
13160 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13161 return KERN_PROTECTION_FAILURE;
13162 }
13163
13164 /*
13165 * If this page is not pageable, we have to get
13166 * it for all possible accesses.
13167 */
13168
13169 *wired = (entry->wired_count != 0);
13170 if (*wired) {
13171 fault_type = prot;
13172 }
13173
13174 /*
13175 * If the entry was copy-on-write, we either ...
13176 */
13177
13178 if (entry->needs_copy) {
13179 /*
13180 * If we want to write the page, we may as well
13181 * handle that now since we've got the map locked.
13182 *
13183 * If we don't need to write the page, we just
13184 * demote the permissions allowed.
13185 */
13186
13187 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13188 /*
13189 * Make a new object, and place it in the
13190 * object chain. Note that no new references
13191 * have appeared -- one just moved from the
13192 * map to the new object.
13193 */
13194
13195 if (vm_map_lock_read_to_write(map)) {
13196 vm_map_lock_read(map);
13197 goto RetryLookup;
13198 }
13199
13200 if (VME_OBJECT(entry)->shadowed == FALSE) {
13201 vm_object_lock(VME_OBJECT(entry));
13202 VME_OBJECT(entry)->shadowed = TRUE;
13203 vm_object_unlock(VME_OBJECT(entry));
13204 }
13205 VME_OBJECT_SHADOW(entry,
13206 (vm_map_size_t) (entry->vme_end -
13207 entry->vme_start));
13208 entry->needs_copy = FALSE;
13209
13210 vm_map_lock_write_to_read(map);
13211 }
13212 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13213 /*
13214 * We're attempting to read a copy-on-write
13215 * page -- don't allow writes.
13216 */
13217
13218 prot &= (~VM_PROT_WRITE);
13219 }
13220 }
13221
13222 /*
13223 * Create an object if necessary.
13224 */
13225 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13226 if (vm_map_lock_read_to_write(map)) {
13227 vm_map_lock_read(map);
13228 goto RetryLookup;
13229 }
13230
13231 VME_OBJECT_SET(entry,
13232 vm_object_allocate(
13233 (vm_map_size_t)(entry->vme_end -
13234 entry->vme_start)));
13235 VME_OFFSET_SET(entry, 0);
13236 assert(entry->use_pmap);
13237 vm_map_lock_write_to_read(map);
13238 }
13239
13240 /*
13241 * Return the object/offset from this entry. If the entry
13242 * was copy-on-write or empty, it has been fixed up. Also
13243 * return the protection.
13244 */
13245
13246 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13247 *object = VME_OBJECT(entry);
13248 *out_prot = prot;
13249 KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), 0, 0, 0, 0);
13250
13251 if (fault_info) {
13252 fault_info->interruptible = THREAD_UNINT; /* for now... */
13253 /* ... the caller will change "interruptible" if needed */
13254 fault_info->cluster_size = 0;
13255 fault_info->user_tag = VME_ALIAS(entry);
13256 fault_info->pmap_options = 0;
13257 if (entry->iokit_acct ||
13258 (!entry->is_sub_map && !entry->use_pmap)) {
13259 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13260 }
13261 fault_info->behavior = entry->behavior;
13262 fault_info->lo_offset = VME_OFFSET(entry);
13263 fault_info->hi_offset =
13264 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13265 fault_info->no_cache = entry->no_cache;
13266 fault_info->stealth = FALSE;
13267 fault_info->io_sync = FALSE;
13268 if (entry->used_for_jit ||
13269 entry->vme_resilient_codesign) {
13270 fault_info->cs_bypass = TRUE;
13271 } else {
13272 fault_info->cs_bypass = FALSE;
13273 }
13274 fault_info->pmap_cs_associated = FALSE;
13275 #if CONFIG_PMAP_CS
13276 if (entry->pmap_cs_associated) {
13277 /*
13278 * The pmap layer will validate this page
13279 * before allowing it to be executed from.
13280 */
13281 fault_info->pmap_cs_associated = TRUE;
13282 }
13283 #endif /* CONFIG_PMAP_CS */
13284 fault_info->mark_zf_absent = FALSE;
13285 fault_info->batch_pmap_op = FALSE;
13286 fault_info->resilient_media = entry->vme_resilient_media;
13287 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13288 }
13289
13290 /*
13291 * Lock the object to prevent it from disappearing
13292 */
13293 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
13294 vm_object_lock(*object);
13295 } else {
13296 vm_object_lock_shared(*object);
13297 }
13298
13299 /*
13300 * Save the version number
13301 */
13302
13303 out_version->main_timestamp = map->timestamp;
13304
13305 return KERN_SUCCESS;
13306 }
13307
13308
13309 /*
13310 * vm_map_verify:
13311 *
13312 * Verifies that the map in question has not changed
13313 * since the given version. The map has to be locked
13314 * ("shared" mode is fine) before calling this function
13315 * and it will be returned locked too.
13316 */
13317 boolean_t
13318 vm_map_verify(
13319 vm_map_t map,
13320 vm_map_version_t *version) /* REF */
13321 {
13322 boolean_t result;
13323
13324 vm_map_lock_assert_held(map);
13325 result = (map->timestamp == version->main_timestamp);
13326
13327 return result;
13328 }
13329
13330 /*
13331 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
13332 * Goes away after regular vm_region_recurse function migrates to
13333 * 64 bits
13334 * vm_region_recurse: A form of vm_region which follows the
13335 * submaps in a target map
13336 *
13337 */
13338
13339 kern_return_t
13340 vm_map_region_recurse_64(
13341 vm_map_t map,
13342 vm_map_offset_t *address, /* IN/OUT */
13343 vm_map_size_t *size, /* OUT */
13344 natural_t *nesting_depth, /* IN/OUT */
13345 vm_region_submap_info_64_t submap_info, /* IN/OUT */
13346 mach_msg_type_number_t *count) /* IN/OUT */
13347 {
13348 mach_msg_type_number_t original_count;
13349 vm_region_extended_info_data_t extended;
13350 vm_map_entry_t tmp_entry;
13351 vm_map_offset_t user_address;
13352 unsigned int user_max_depth;
13353
13354 /*
13355 * "curr_entry" is the VM map entry preceding or including the
13356 * address we're looking for.
13357 * "curr_map" is the map or sub-map containing "curr_entry".
13358 * "curr_address" is the equivalent of the top map's "user_address"
13359 * in the current map.
13360 * "curr_offset" is the cumulated offset of "curr_map" in the
13361 * target task's address space.
13362 * "curr_depth" is the depth of "curr_map" in the chain of
13363 * sub-maps.
13364 *
13365 * "curr_max_below" and "curr_max_above" limit the range (around
13366 * "curr_address") we should take into account in the current (sub)map.
13367 * They limit the range to what's visible through the map entries
13368 * we've traversed from the top map to the current map.
13369 *
13370 */
13371 vm_map_entry_t curr_entry;
13372 vm_map_address_t curr_address;
13373 vm_map_offset_t curr_offset;
13374 vm_map_t curr_map;
13375 unsigned int curr_depth;
13376 vm_map_offset_t curr_max_below, curr_max_above;
13377 vm_map_offset_t curr_skip;
13378
13379 /*
13380 * "next_" is the same as "curr_" but for the VM region immediately
13381 * after the address we're looking for. We need to keep track of this
13382 * too because we want to return info about that region if the
13383 * address we're looking for is not mapped.
13384 */
13385 vm_map_entry_t next_entry;
13386 vm_map_offset_t next_offset;
13387 vm_map_offset_t next_address;
13388 vm_map_t next_map;
13389 unsigned int next_depth;
13390 vm_map_offset_t next_max_below, next_max_above;
13391 vm_map_offset_t next_skip;
13392
13393 boolean_t look_for_pages;
13394 vm_region_submap_short_info_64_t short_info;
13395 boolean_t do_region_footprint;
13396
13397 if (map == VM_MAP_NULL) {
13398 /* no address space to work on */
13399 return KERN_INVALID_ARGUMENT;
13400 }
13401
13402
13403 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
13404 /*
13405 * "info" structure is not big enough and
13406 * would overflow
13407 */
13408 return KERN_INVALID_ARGUMENT;
13409 }
13410
13411 do_region_footprint = task_self_region_footprint();
13412 original_count = *count;
13413
13414 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
13415 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
13416 look_for_pages = FALSE;
13417 short_info = (vm_region_submap_short_info_64_t) submap_info;
13418 submap_info = NULL;
13419 } else {
13420 look_for_pages = TRUE;
13421 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
13422 short_info = NULL;
13423
13424 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13425 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
13426 }
13427 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13428 *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
13429 }
13430 }
13431
13432 user_address = *address;
13433 user_max_depth = *nesting_depth;
13434
13435 if (not_in_kdp) {
13436 vm_map_lock_read(map);
13437 }
13438
13439 recurse_again:
13440 curr_entry = NULL;
13441 curr_map = map;
13442 curr_address = user_address;
13443 curr_offset = 0;
13444 curr_skip = 0;
13445 curr_depth = 0;
13446 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
13447 curr_max_below = curr_address;
13448
13449 next_entry = NULL;
13450 next_map = NULL;
13451 next_address = 0;
13452 next_offset = 0;
13453 next_skip = 0;
13454 next_depth = 0;
13455 next_max_above = (vm_map_offset_t) -1;
13456 next_max_below = (vm_map_offset_t) -1;
13457
13458 for (;;) {
13459 if (vm_map_lookup_entry(curr_map,
13460 curr_address,
13461 &tmp_entry)) {
13462 /* tmp_entry contains the address we're looking for */
13463 curr_entry = tmp_entry;
13464 } else {
13465 vm_map_offset_t skip;
13466 /*
13467 * The address is not mapped. "tmp_entry" is the
13468 * map entry preceding the address. We want the next
13469 * one, if it exists.
13470 */
13471 curr_entry = tmp_entry->vme_next;
13472
13473 if (curr_entry == vm_map_to_entry(curr_map) ||
13474 (curr_entry->vme_start >=
13475 curr_address + curr_max_above)) {
13476 /* no next entry at this level: stop looking */
13477 if (not_in_kdp) {
13478 vm_map_unlock_read(curr_map);
13479 }
13480 curr_entry = NULL;
13481 curr_map = NULL;
13482 curr_skip = 0;
13483 curr_offset = 0;
13484 curr_depth = 0;
13485 curr_max_above = 0;
13486 curr_max_below = 0;
13487 break;
13488 }
13489
13490 /* adjust current address and offset */
13491 skip = curr_entry->vme_start - curr_address;
13492 curr_address = curr_entry->vme_start;
13493 curr_skip += skip;
13494 curr_offset += skip;
13495 curr_max_above -= skip;
13496 curr_max_below = 0;
13497 }
13498
13499 /*
13500 * Is the next entry at this level closer to the address (or
13501 * deeper in the submap chain) than the one we had
13502 * so far ?
13503 */
13504 tmp_entry = curr_entry->vme_next;
13505 if (tmp_entry == vm_map_to_entry(curr_map)) {
13506 /* no next entry at this level */
13507 } else if (tmp_entry->vme_start >=
13508 curr_address + curr_max_above) {
13509 /*
13510 * tmp_entry is beyond the scope of what we mapped of
13511 * this submap in the upper level: ignore it.
13512 */
13513 } else if ((next_entry == NULL) ||
13514 (tmp_entry->vme_start + curr_offset <=
13515 next_entry->vme_start + next_offset)) {
13516 /*
13517 * We didn't have a "next_entry" or this one is
13518 * closer to the address we're looking for:
13519 * use this "tmp_entry" as the new "next_entry".
13520 */
13521 if (next_entry != NULL) {
13522 /* unlock the last "next_map" */
13523 if (next_map != curr_map && not_in_kdp) {
13524 vm_map_unlock_read(next_map);
13525 }
13526 }
13527 next_entry = tmp_entry;
13528 next_map = curr_map;
13529 next_depth = curr_depth;
13530 next_address = next_entry->vme_start;
13531 next_skip = curr_skip;
13532 next_skip += (next_address - curr_address);
13533 next_offset = curr_offset;
13534 next_offset += (next_address - curr_address);
13535 next_max_above = MIN(next_max_above, curr_max_above);
13536 next_max_above = MIN(next_max_above,
13537 next_entry->vme_end - next_address);
13538 next_max_below = MIN(next_max_below, curr_max_below);
13539 next_max_below = MIN(next_max_below,
13540 next_address - next_entry->vme_start);
13541 }
13542
13543 /*
13544 * "curr_max_{above,below}" allow us to keep track of the
13545 * portion of the submap that is actually mapped at this level:
13546 * the rest of that submap is irrelevant to us, since it's not
13547 * mapped here.
13548 * The relevant portion of the map starts at
13549 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
13550 */
13551 curr_max_above = MIN(curr_max_above,
13552 curr_entry->vme_end - curr_address);
13553 curr_max_below = MIN(curr_max_below,
13554 curr_address - curr_entry->vme_start);
13555
13556 if (!curr_entry->is_sub_map ||
13557 curr_depth >= user_max_depth) {
13558 /*
13559 * We hit a leaf map or we reached the maximum depth
13560 * we could, so stop looking. Keep the current map
13561 * locked.
13562 */
13563 break;
13564 }
13565
13566 /*
13567 * Get down to the next submap level.
13568 */
13569
13570 /*
13571 * Lock the next level and unlock the current level,
13572 * unless we need to keep it locked to access the "next_entry"
13573 * later.
13574 */
13575 if (not_in_kdp) {
13576 vm_map_lock_read(VME_SUBMAP(curr_entry));
13577 }
13578 if (curr_map == next_map) {
13579 /* keep "next_map" locked in case we need it */
13580 } else {
13581 /* release this map */
13582 if (not_in_kdp) {
13583 vm_map_unlock_read(curr_map);
13584 }
13585 }
13586
13587 /*
13588 * Adjust the offset. "curr_entry" maps the submap
13589 * at relative address "curr_entry->vme_start" in the
13590 * curr_map but skips the first "VME_OFFSET(curr_entry)"
13591 * bytes of the submap.
13592 * "curr_offset" always represents the offset of a virtual
13593 * address in the curr_map relative to the absolute address
13594 * space (i.e. the top-level VM map).
13595 */
13596 curr_offset +=
13597 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
13598 curr_address = user_address + curr_offset;
13599 /* switch to the submap */
13600 curr_map = VME_SUBMAP(curr_entry);
13601 curr_depth++;
13602 curr_entry = NULL;
13603 }
13604
13605 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13606 // so probably should be a real 32b ID vs. ptr.
13607 // Current users just check for equality
13608
13609 if (curr_entry == NULL) {
13610 /* no VM region contains the address... */
13611
13612 if (do_region_footprint && /* we want footprint numbers */
13613 next_entry == NULL && /* & there are no more regions */
13614 /* & we haven't already provided our fake region: */
13615 user_address <= vm_map_last_entry(map)->vme_end) {
13616 ledger_amount_t ledger_resident, ledger_compressed;
13617
13618 /*
13619 * Add a fake memory region to account for
13620 * purgeable and/or ledger-tagged memory that
13621 * counts towards this task's memory footprint,
13622 * i.e. the resident/compressed pages of non-volatile
13623 * objects owned by that task.
13624 */
13625 task_ledgers_footprint(map->pmap->ledger,
13626 &ledger_resident,
13627 &ledger_compressed);
13628 if (ledger_resident + ledger_compressed == 0) {
13629 /* no purgeable memory usage to report */
13630 return KERN_INVALID_ADDRESS;
13631 }
13632 /* fake region to show nonvolatile footprint */
13633 if (look_for_pages) {
13634 submap_info->protection = VM_PROT_DEFAULT;
13635 submap_info->max_protection = VM_PROT_DEFAULT;
13636 submap_info->inheritance = VM_INHERIT_DEFAULT;
13637 submap_info->offset = 0;
13638 submap_info->user_tag = -1;
13639 submap_info->pages_resident = (unsigned int) (ledger_resident / PAGE_SIZE);
13640 submap_info->pages_shared_now_private = 0;
13641 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / PAGE_SIZE);
13642 submap_info->pages_dirtied = submap_info->pages_resident;
13643 submap_info->ref_count = 1;
13644 submap_info->shadow_depth = 0;
13645 submap_info->external_pager = 0;
13646 submap_info->share_mode = SM_PRIVATE;
13647 submap_info->is_submap = 0;
13648 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
13649 submap_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13650 submap_info->user_wired_count = 0;
13651 submap_info->pages_reusable = 0;
13652 } else {
13653 short_info->user_tag = -1;
13654 short_info->offset = 0;
13655 short_info->protection = VM_PROT_DEFAULT;
13656 short_info->inheritance = VM_INHERIT_DEFAULT;
13657 short_info->max_protection = VM_PROT_DEFAULT;
13658 short_info->behavior = VM_BEHAVIOR_DEFAULT;
13659 short_info->user_wired_count = 0;
13660 short_info->is_submap = 0;
13661 short_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
13662 short_info->external_pager = 0;
13663 short_info->shadow_depth = 0;
13664 short_info->share_mode = SM_PRIVATE;
13665 short_info->ref_count = 1;
13666 }
13667 *nesting_depth = 0;
13668 *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
13669 // *address = user_address;
13670 *address = vm_map_last_entry(map)->vme_end;
13671 return KERN_SUCCESS;
13672 }
13673
13674 if (next_entry == NULL) {
13675 /* ... and no VM region follows it either */
13676 return KERN_INVALID_ADDRESS;
13677 }
13678 /* ... gather info about the next VM region */
13679 curr_entry = next_entry;
13680 curr_map = next_map; /* still locked ... */
13681 curr_address = next_address;
13682 curr_skip = next_skip;
13683 curr_offset = next_offset;
13684 curr_depth = next_depth;
13685 curr_max_above = next_max_above;
13686 curr_max_below = next_max_below;
13687 } else {
13688 /* we won't need "next_entry" after all */
13689 if (next_entry != NULL) {
13690 /* release "next_map" */
13691 if (next_map != curr_map && not_in_kdp) {
13692 vm_map_unlock_read(next_map);
13693 }
13694 }
13695 }
13696 next_entry = NULL;
13697 next_map = NULL;
13698 next_offset = 0;
13699 next_skip = 0;
13700 next_depth = 0;
13701 next_max_below = -1;
13702 next_max_above = -1;
13703
13704 if (curr_entry->is_sub_map &&
13705 curr_depth < user_max_depth) {
13706 /*
13707 * We're not as deep as we could be: we must have
13708 * gone back up after not finding anything mapped
13709 * below the original top-level map entry's.
13710 * Let's move "curr_address" forward and recurse again.
13711 */
13712 user_address = curr_address;
13713 goto recurse_again;
13714 }
13715
13716 *nesting_depth = curr_depth;
13717 *size = curr_max_above + curr_max_below;
13718 *address = user_address + curr_skip - curr_max_below;
13719
13720 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
13721 // so probably should be a real 32b ID vs. ptr.
13722 // Current users just check for equality
13723 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
13724
13725 if (look_for_pages) {
13726 submap_info->user_tag = VME_ALIAS(curr_entry);
13727 submap_info->offset = VME_OFFSET(curr_entry);
13728 submap_info->protection = curr_entry->protection;
13729 submap_info->inheritance = curr_entry->inheritance;
13730 submap_info->max_protection = curr_entry->max_protection;
13731 submap_info->behavior = curr_entry->behavior;
13732 submap_info->user_wired_count = curr_entry->user_wired_count;
13733 submap_info->is_submap = curr_entry->is_sub_map;
13734 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13735 } else {
13736 short_info->user_tag = VME_ALIAS(curr_entry);
13737 short_info->offset = VME_OFFSET(curr_entry);
13738 short_info->protection = curr_entry->protection;
13739 short_info->inheritance = curr_entry->inheritance;
13740 short_info->max_protection = curr_entry->max_protection;
13741 short_info->behavior = curr_entry->behavior;
13742 short_info->user_wired_count = curr_entry->user_wired_count;
13743 short_info->is_submap = curr_entry->is_sub_map;
13744 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
13745 }
13746
13747 extended.pages_resident = 0;
13748 extended.pages_swapped_out = 0;
13749 extended.pages_shared_now_private = 0;
13750 extended.pages_dirtied = 0;
13751 extended.pages_reusable = 0;
13752 extended.external_pager = 0;
13753 extended.shadow_depth = 0;
13754 extended.share_mode = SM_EMPTY;
13755 extended.ref_count = 0;
13756
13757 if (not_in_kdp) {
13758 if (!curr_entry->is_sub_map) {
13759 vm_map_offset_t range_start, range_end;
13760 range_start = MAX((curr_address - curr_max_below),
13761 curr_entry->vme_start);
13762 range_end = MIN((curr_address + curr_max_above),
13763 curr_entry->vme_end);
13764 vm_map_region_walk(curr_map,
13765 range_start,
13766 curr_entry,
13767 (VME_OFFSET(curr_entry) +
13768 (range_start -
13769 curr_entry->vme_start)),
13770 range_end - range_start,
13771 &extended,
13772 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
13773 if (extended.external_pager &&
13774 extended.ref_count == 2 &&
13775 extended.share_mode == SM_SHARED) {
13776 extended.share_mode = SM_PRIVATE;
13777 }
13778 } else {
13779 if (curr_entry->use_pmap) {
13780 extended.share_mode = SM_TRUESHARED;
13781 } else {
13782 extended.share_mode = SM_PRIVATE;
13783 }
13784 extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
13785 }
13786 }
13787
13788 if (look_for_pages) {
13789 submap_info->pages_resident = extended.pages_resident;
13790 submap_info->pages_swapped_out = extended.pages_swapped_out;
13791 submap_info->pages_shared_now_private =
13792 extended.pages_shared_now_private;
13793 submap_info->pages_dirtied = extended.pages_dirtied;
13794 submap_info->external_pager = extended.external_pager;
13795 submap_info->shadow_depth = extended.shadow_depth;
13796 submap_info->share_mode = extended.share_mode;
13797 submap_info->ref_count = extended.ref_count;
13798
13799 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13800 submap_info->pages_reusable = extended.pages_reusable;
13801 }
13802 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
13803 submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
13804 }
13805 } else {
13806 short_info->external_pager = extended.external_pager;
13807 short_info->shadow_depth = extended.shadow_depth;
13808 short_info->share_mode = extended.share_mode;
13809 short_info->ref_count = extended.ref_count;
13810 }
13811
13812 if (not_in_kdp) {
13813 vm_map_unlock_read(curr_map);
13814 }
13815
13816 return KERN_SUCCESS;
13817 }
13818
13819 /*
13820 * vm_region:
13821 *
13822 * User call to obtain information about a region in
13823 * a task's address map. Currently, only one flavor is
13824 * supported.
13825 *
13826 * XXX The reserved and behavior fields cannot be filled
13827 * in until the vm merge from the IK is completed, and
13828 * vm_reserve is implemented.
13829 */
13830
13831 kern_return_t
13832 vm_map_region(
13833 vm_map_t map,
13834 vm_map_offset_t *address, /* IN/OUT */
13835 vm_map_size_t *size, /* OUT */
13836 vm_region_flavor_t flavor, /* IN */
13837 vm_region_info_t info, /* OUT */
13838 mach_msg_type_number_t *count, /* IN/OUT */
13839 mach_port_t *object_name) /* OUT */
13840 {
13841 vm_map_entry_t tmp_entry;
13842 vm_map_entry_t entry;
13843 vm_map_offset_t start;
13844
13845 if (map == VM_MAP_NULL) {
13846 return KERN_INVALID_ARGUMENT;
13847 }
13848
13849 switch (flavor) {
13850 case VM_REGION_BASIC_INFO:
13851 /* legacy for old 32-bit objects info */
13852 {
13853 vm_region_basic_info_t basic;
13854
13855 if (*count < VM_REGION_BASIC_INFO_COUNT) {
13856 return KERN_INVALID_ARGUMENT;
13857 }
13858
13859 basic = (vm_region_basic_info_t) info;
13860 *count = VM_REGION_BASIC_INFO_COUNT;
13861
13862 vm_map_lock_read(map);
13863
13864 start = *address;
13865 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13866 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13867 vm_map_unlock_read(map);
13868 return KERN_INVALID_ADDRESS;
13869 }
13870 } else {
13871 entry = tmp_entry;
13872 }
13873
13874 start = entry->vme_start;
13875
13876 basic->offset = (uint32_t)VME_OFFSET(entry);
13877 basic->protection = entry->protection;
13878 basic->inheritance = entry->inheritance;
13879 basic->max_protection = entry->max_protection;
13880 basic->behavior = entry->behavior;
13881 basic->user_wired_count = entry->user_wired_count;
13882 basic->reserved = entry->is_sub_map;
13883 *address = start;
13884 *size = (entry->vme_end - start);
13885
13886 if (object_name) {
13887 *object_name = IP_NULL;
13888 }
13889 if (entry->is_sub_map) {
13890 basic->shared = FALSE;
13891 } else {
13892 basic->shared = entry->is_shared;
13893 }
13894
13895 vm_map_unlock_read(map);
13896 return KERN_SUCCESS;
13897 }
13898
13899 case VM_REGION_BASIC_INFO_64:
13900 {
13901 vm_region_basic_info_64_t basic;
13902
13903 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
13904 return KERN_INVALID_ARGUMENT;
13905 }
13906
13907 basic = (vm_region_basic_info_64_t) info;
13908 *count = VM_REGION_BASIC_INFO_COUNT_64;
13909
13910 vm_map_lock_read(map);
13911
13912 start = *address;
13913 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13914 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13915 vm_map_unlock_read(map);
13916 return KERN_INVALID_ADDRESS;
13917 }
13918 } else {
13919 entry = tmp_entry;
13920 }
13921
13922 start = entry->vme_start;
13923
13924 basic->offset = VME_OFFSET(entry);
13925 basic->protection = entry->protection;
13926 basic->inheritance = entry->inheritance;
13927 basic->max_protection = entry->max_protection;
13928 basic->behavior = entry->behavior;
13929 basic->user_wired_count = entry->user_wired_count;
13930 basic->reserved = entry->is_sub_map;
13931 *address = start;
13932 *size = (entry->vme_end - start);
13933
13934 if (object_name) {
13935 *object_name = IP_NULL;
13936 }
13937 if (entry->is_sub_map) {
13938 basic->shared = FALSE;
13939 } else {
13940 basic->shared = entry->is_shared;
13941 }
13942
13943 vm_map_unlock_read(map);
13944 return KERN_SUCCESS;
13945 }
13946 case VM_REGION_EXTENDED_INFO:
13947 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
13948 return KERN_INVALID_ARGUMENT;
13949 }
13950 /*fallthru*/
13951 case VM_REGION_EXTENDED_INFO__legacy:
13952 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
13953 return KERN_INVALID_ARGUMENT;
13954 }
13955
13956 {
13957 vm_region_extended_info_t extended;
13958 mach_msg_type_number_t original_count;
13959
13960 extended = (vm_region_extended_info_t) info;
13961
13962 vm_map_lock_read(map);
13963
13964 start = *address;
13965 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13966 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13967 vm_map_unlock_read(map);
13968 return KERN_INVALID_ADDRESS;
13969 }
13970 } else {
13971 entry = tmp_entry;
13972 }
13973 start = entry->vme_start;
13974
13975 extended->protection = entry->protection;
13976 extended->user_tag = VME_ALIAS(entry);
13977 extended->pages_resident = 0;
13978 extended->pages_swapped_out = 0;
13979 extended->pages_shared_now_private = 0;
13980 extended->pages_dirtied = 0;
13981 extended->external_pager = 0;
13982 extended->shadow_depth = 0;
13983
13984 original_count = *count;
13985 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13986 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13987 } else {
13988 extended->pages_reusable = 0;
13989 *count = VM_REGION_EXTENDED_INFO_COUNT;
13990 }
13991
13992 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13993
13994 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
13995 extended->share_mode = SM_PRIVATE;
13996 }
13997
13998 if (object_name) {
13999 *object_name = IP_NULL;
14000 }
14001 *address = start;
14002 *size = (entry->vme_end - start);
14003
14004 vm_map_unlock_read(map);
14005 return KERN_SUCCESS;
14006 }
14007 case VM_REGION_TOP_INFO:
14008 {
14009 vm_region_top_info_t top;
14010
14011 if (*count < VM_REGION_TOP_INFO_COUNT) {
14012 return KERN_INVALID_ARGUMENT;
14013 }
14014
14015 top = (vm_region_top_info_t) info;
14016 *count = VM_REGION_TOP_INFO_COUNT;
14017
14018 vm_map_lock_read(map);
14019
14020 start = *address;
14021 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14022 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14023 vm_map_unlock_read(map);
14024 return KERN_INVALID_ADDRESS;
14025 }
14026 } else {
14027 entry = tmp_entry;
14028 }
14029 start = entry->vme_start;
14030
14031 top->private_pages_resident = 0;
14032 top->shared_pages_resident = 0;
14033
14034 vm_map_region_top_walk(entry, top);
14035
14036 if (object_name) {
14037 *object_name = IP_NULL;
14038 }
14039 *address = start;
14040 *size = (entry->vme_end - start);
14041
14042 vm_map_unlock_read(map);
14043 return KERN_SUCCESS;
14044 }
14045 default:
14046 return KERN_INVALID_ARGUMENT;
14047 }
14048 }
14049
14050 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
14051 MIN((entry_size), \
14052 ((obj)->all_reusable ? \
14053 (obj)->wired_page_count : \
14054 (obj)->resident_page_count - (obj)->reusable_page_count))
14055
14056 void
14057 vm_map_region_top_walk(
14058 vm_map_entry_t entry,
14059 vm_region_top_info_t top)
14060 {
14061 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14062 top->share_mode = SM_EMPTY;
14063 top->ref_count = 0;
14064 top->obj_id = 0;
14065 return;
14066 }
14067
14068 {
14069 struct vm_object *obj, *tmp_obj;
14070 int ref_count;
14071 uint32_t entry_size;
14072
14073 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14074
14075 obj = VME_OBJECT(entry);
14076
14077 vm_object_lock(obj);
14078
14079 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14080 ref_count--;
14081 }
14082
14083 assert(obj->reusable_page_count <= obj->resident_page_count);
14084 if (obj->shadow) {
14085 if (ref_count == 1) {
14086 top->private_pages_resident =
14087 OBJ_RESIDENT_COUNT(obj, entry_size);
14088 } else {
14089 top->shared_pages_resident =
14090 OBJ_RESIDENT_COUNT(obj, entry_size);
14091 }
14092 top->ref_count = ref_count;
14093 top->share_mode = SM_COW;
14094
14095 while ((tmp_obj = obj->shadow)) {
14096 vm_object_lock(tmp_obj);
14097 vm_object_unlock(obj);
14098 obj = tmp_obj;
14099
14100 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14101 ref_count--;
14102 }
14103
14104 assert(obj->reusable_page_count <= obj->resident_page_count);
14105 top->shared_pages_resident +=
14106 OBJ_RESIDENT_COUNT(obj, entry_size);
14107 top->ref_count += ref_count - 1;
14108 }
14109 } else {
14110 if (entry->superpage_size) {
14111 top->share_mode = SM_LARGE_PAGE;
14112 top->shared_pages_resident = 0;
14113 top->private_pages_resident = entry_size;
14114 } else if (entry->needs_copy) {
14115 top->share_mode = SM_COW;
14116 top->shared_pages_resident =
14117 OBJ_RESIDENT_COUNT(obj, entry_size);
14118 } else {
14119 if (ref_count == 1 ||
14120 (ref_count == 2 && obj->named)) {
14121 top->share_mode = SM_PRIVATE;
14122 top->private_pages_resident =
14123 OBJ_RESIDENT_COUNT(obj,
14124 entry_size);
14125 } else {
14126 top->share_mode = SM_SHARED;
14127 top->shared_pages_resident =
14128 OBJ_RESIDENT_COUNT(obj,
14129 entry_size);
14130 }
14131 }
14132 top->ref_count = ref_count;
14133 }
14134 /* XXX K64: obj_id will be truncated */
14135 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14136
14137 vm_object_unlock(obj);
14138 }
14139 }
14140
14141 void
14142 vm_map_region_walk(
14143 vm_map_t map,
14144 vm_map_offset_t va,
14145 vm_map_entry_t entry,
14146 vm_object_offset_t offset,
14147 vm_object_size_t range,
14148 vm_region_extended_info_t extended,
14149 boolean_t look_for_pages,
14150 mach_msg_type_number_t count)
14151 {
14152 struct vm_object *obj, *tmp_obj;
14153 vm_map_offset_t last_offset;
14154 int i;
14155 int ref_count;
14156 struct vm_object *shadow_object;
14157 int shadow_depth;
14158 boolean_t do_region_footprint;
14159
14160 do_region_footprint = task_self_region_footprint();
14161
14162 if ((VME_OBJECT(entry) == 0) ||
14163 (entry->is_sub_map) ||
14164 (VME_OBJECT(entry)->phys_contiguous &&
14165 !entry->superpage_size)) {
14166 extended->share_mode = SM_EMPTY;
14167 extended->ref_count = 0;
14168 return;
14169 }
14170
14171 if (entry->superpage_size) {
14172 extended->shadow_depth = 0;
14173 extended->share_mode = SM_LARGE_PAGE;
14174 extended->ref_count = 1;
14175 extended->external_pager = 0;
14176 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14177 extended->shadow_depth = 0;
14178 return;
14179 }
14180
14181 obj = VME_OBJECT(entry);
14182
14183 vm_object_lock(obj);
14184
14185 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14186 ref_count--;
14187 }
14188
14189 if (look_for_pages) {
14190 for (last_offset = offset + range;
14191 offset < last_offset;
14192 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
14193 if (do_region_footprint) {
14194 int disp;
14195
14196 disp = 0;
14197 if (map->has_corpse_footprint) {
14198 /*
14199 * Query the page info data we saved
14200 * while forking the corpse.
14201 */
14202 vm_map_corpse_footprint_query_page_info(
14203 map,
14204 va,
14205 &disp);
14206 } else {
14207 /*
14208 * Query the pmap.
14209 */
14210 pmap_query_page_info(map->pmap,
14211 va,
14212 &disp);
14213 }
14214 if (disp & PMAP_QUERY_PAGE_PRESENT) {
14215 if (!(disp & PMAP_QUERY_PAGE_ALTACCT)) {
14216 extended->pages_resident++;
14217 }
14218 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
14219 extended->pages_reusable++;
14220 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
14221 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
14222 /* alternate accounting */
14223 } else {
14224 extended->pages_dirtied++;
14225 }
14226 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14227 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
14228 /* alternate accounting */
14229 } else {
14230 extended->pages_swapped_out++;
14231 }
14232 }
14233 /* deal with alternate accounting */
14234 if (obj->purgable == VM_PURGABLE_NONVOLATILE &&
14235 /* && not tagged as no-footprint? */
14236 VM_OBJECT_OWNER(obj) != NULL &&
14237 VM_OBJECT_OWNER(obj)->map == map) {
14238 if ((((va
14239 - entry->vme_start
14240 + VME_OFFSET(entry))
14241 / PAGE_SIZE) <
14242 (obj->resident_page_count +
14243 vm_compressor_pager_get_count(obj->pager)))) {
14244 /*
14245 * Non-volatile purgeable object owned
14246 * by this task: report the first
14247 * "#resident + #compressed" pages as
14248 * "resident" (to show that they
14249 * contribute to the footprint) but not
14250 * "dirty" (to avoid double-counting
14251 * with the fake "non-volatile" region
14252 * we'll report at the end of the
14253 * address space to account for all
14254 * (mapped or not) non-volatile memory
14255 * owned by this task.
14256 */
14257 extended->pages_resident++;
14258 }
14259 } else if ((obj->purgable == VM_PURGABLE_VOLATILE ||
14260 obj->purgable == VM_PURGABLE_EMPTY) &&
14261 /* && not tagged as no-footprint? */
14262 VM_OBJECT_OWNER(obj) != NULL &&
14263 VM_OBJECT_OWNER(obj)->map == map) {
14264 if ((((va
14265 - entry->vme_start
14266 + VME_OFFSET(entry))
14267 / PAGE_SIZE) <
14268 obj->wired_page_count)) {
14269 /*
14270 * Volatile|empty purgeable object owned
14271 * by this task: report the first
14272 * "#wired" pages as "resident" (to
14273 * show that they contribute to the
14274 * footprint) but not "dirty" (to avoid
14275 * double-counting with the fake
14276 * "non-volatile" region we'll report
14277 * at the end of the address space to
14278 * account for all (mapped or not)
14279 * non-volatile memory owned by this
14280 * task.
14281 */
14282 extended->pages_resident++;
14283 }
14284 } else if (obj->purgable != VM_PURGABLE_DENY) {
14285 /*
14286 * Pages from purgeable objects
14287 * will be reported as dirty
14288 * appropriately in an extra
14289 * fake memory region at the end of
14290 * the address space.
14291 */
14292 } else if (entry->iokit_acct) {
14293 /*
14294 * IOKit mappings are considered
14295 * as fully dirty for footprint's
14296 * sake.
14297 */
14298 extended->pages_dirtied++;
14299 }
14300 continue;
14301 }
14302
14303 vm_map_region_look_for_page(map, va, obj,
14304 offset, ref_count,
14305 0, extended, count);
14306 }
14307
14308 if (do_region_footprint) {
14309 goto collect_object_info;
14310 }
14311 } else {
14312 collect_object_info:
14313 shadow_object = obj->shadow;
14314 shadow_depth = 0;
14315
14316 if (!(obj->internal)) {
14317 extended->external_pager = 1;
14318 }
14319
14320 if (shadow_object != VM_OBJECT_NULL) {
14321 vm_object_lock(shadow_object);
14322 for (;
14323 shadow_object != VM_OBJECT_NULL;
14324 shadow_depth++) {
14325 vm_object_t next_shadow;
14326
14327 if (!(shadow_object->internal)) {
14328 extended->external_pager = 1;
14329 }
14330
14331 next_shadow = shadow_object->shadow;
14332 if (next_shadow) {
14333 vm_object_lock(next_shadow);
14334 }
14335 vm_object_unlock(shadow_object);
14336 shadow_object = next_shadow;
14337 }
14338 }
14339 extended->shadow_depth = shadow_depth;
14340 }
14341
14342 if (extended->shadow_depth || entry->needs_copy) {
14343 extended->share_mode = SM_COW;
14344 } else {
14345 if (ref_count == 1) {
14346 extended->share_mode = SM_PRIVATE;
14347 } else {
14348 if (obj->true_share) {
14349 extended->share_mode = SM_TRUESHARED;
14350 } else {
14351 extended->share_mode = SM_SHARED;
14352 }
14353 }
14354 }
14355 extended->ref_count = ref_count - extended->shadow_depth;
14356
14357 for (i = 0; i < extended->shadow_depth; i++) {
14358 if ((tmp_obj = obj->shadow) == 0) {
14359 break;
14360 }
14361 vm_object_lock(tmp_obj);
14362 vm_object_unlock(obj);
14363
14364 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
14365 ref_count--;
14366 }
14367
14368 extended->ref_count += ref_count;
14369 obj = tmp_obj;
14370 }
14371 vm_object_unlock(obj);
14372
14373 if (extended->share_mode == SM_SHARED) {
14374 vm_map_entry_t cur;
14375 vm_map_entry_t last;
14376 int my_refs;
14377
14378 obj = VME_OBJECT(entry);
14379 last = vm_map_to_entry(map);
14380 my_refs = 0;
14381
14382 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14383 ref_count--;
14384 }
14385 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
14386 my_refs += vm_map_region_count_obj_refs(cur, obj);
14387 }
14388
14389 if (my_refs == ref_count) {
14390 extended->share_mode = SM_PRIVATE_ALIASED;
14391 } else if (my_refs > 1) {
14392 extended->share_mode = SM_SHARED_ALIASED;
14393 }
14394 }
14395 }
14396
14397
14398 /* object is locked on entry and locked on return */
14399
14400
14401 static void
14402 vm_map_region_look_for_page(
14403 __unused vm_map_t map,
14404 __unused vm_map_offset_t va,
14405 vm_object_t object,
14406 vm_object_offset_t offset,
14407 int max_refcnt,
14408 int depth,
14409 vm_region_extended_info_t extended,
14410 mach_msg_type_number_t count)
14411 {
14412 vm_page_t p;
14413 vm_object_t shadow;
14414 int ref_count;
14415 vm_object_t caller_object;
14416
14417 shadow = object->shadow;
14418 caller_object = object;
14419
14420
14421 while (TRUE) {
14422 if (!(object->internal)) {
14423 extended->external_pager = 1;
14424 }
14425
14426 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
14427 if (shadow && (max_refcnt == 1)) {
14428 extended->pages_shared_now_private++;
14429 }
14430
14431 if (!p->vmp_fictitious &&
14432 (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
14433 extended->pages_dirtied++;
14434 } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
14435 if (p->vmp_reusable || object->all_reusable) {
14436 extended->pages_reusable++;
14437 }
14438 }
14439
14440 extended->pages_resident++;
14441
14442 if (object != caller_object) {
14443 vm_object_unlock(object);
14444 }
14445
14446 return;
14447 }
14448 if (object->internal &&
14449 object->alive &&
14450 !object->terminating &&
14451 object->pager_ready) {
14452 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14453 == VM_EXTERNAL_STATE_EXISTS) {
14454 /* the pager has that page */
14455 extended->pages_swapped_out++;
14456 if (object != caller_object) {
14457 vm_object_unlock(object);
14458 }
14459 return;
14460 }
14461 }
14462
14463 if (shadow) {
14464 vm_object_lock(shadow);
14465
14466 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
14467 ref_count--;
14468 }
14469
14470 if (++depth > extended->shadow_depth) {
14471 extended->shadow_depth = depth;
14472 }
14473
14474 if (ref_count > max_refcnt) {
14475 max_refcnt = ref_count;
14476 }
14477
14478 if (object != caller_object) {
14479 vm_object_unlock(object);
14480 }
14481
14482 offset = offset + object->vo_shadow_offset;
14483 object = shadow;
14484 shadow = object->shadow;
14485 continue;
14486 }
14487 if (object != caller_object) {
14488 vm_object_unlock(object);
14489 }
14490 break;
14491 }
14492 }
14493
14494 static int
14495 vm_map_region_count_obj_refs(
14496 vm_map_entry_t entry,
14497 vm_object_t object)
14498 {
14499 int ref_count;
14500 vm_object_t chk_obj;
14501 vm_object_t tmp_obj;
14502
14503 if (VME_OBJECT(entry) == 0) {
14504 return 0;
14505 }
14506
14507 if (entry->is_sub_map) {
14508 return 0;
14509 } else {
14510 ref_count = 0;
14511
14512 chk_obj = VME_OBJECT(entry);
14513 vm_object_lock(chk_obj);
14514
14515 while (chk_obj) {
14516 if (chk_obj == object) {
14517 ref_count++;
14518 }
14519 tmp_obj = chk_obj->shadow;
14520 if (tmp_obj) {
14521 vm_object_lock(tmp_obj);
14522 }
14523 vm_object_unlock(chk_obj);
14524
14525 chk_obj = tmp_obj;
14526 }
14527 }
14528 return ref_count;
14529 }
14530
14531
14532 /*
14533 * Routine: vm_map_simplify
14534 *
14535 * Description:
14536 * Attempt to simplify the map representation in
14537 * the vicinity of the given starting address.
14538 * Note:
14539 * This routine is intended primarily to keep the
14540 * kernel maps more compact -- they generally don't
14541 * benefit from the "expand a map entry" technology
14542 * at allocation time because the adjacent entry
14543 * is often wired down.
14544 */
14545 void
14546 vm_map_simplify_entry(
14547 vm_map_t map,
14548 vm_map_entry_t this_entry)
14549 {
14550 vm_map_entry_t prev_entry;
14551
14552 counter(c_vm_map_simplify_entry_called++);
14553
14554 prev_entry = this_entry->vme_prev;
14555
14556 if ((this_entry != vm_map_to_entry(map)) &&
14557 (prev_entry != vm_map_to_entry(map)) &&
14558
14559 (prev_entry->vme_end == this_entry->vme_start) &&
14560
14561 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
14562 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
14563 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
14564 prev_entry->vme_start))
14565 == VME_OFFSET(this_entry)) &&
14566
14567 (prev_entry->behavior == this_entry->behavior) &&
14568 (prev_entry->needs_copy == this_entry->needs_copy) &&
14569 (prev_entry->protection == this_entry->protection) &&
14570 (prev_entry->max_protection == this_entry->max_protection) &&
14571 (prev_entry->inheritance == this_entry->inheritance) &&
14572 (prev_entry->use_pmap == this_entry->use_pmap) &&
14573 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
14574 (prev_entry->no_cache == this_entry->no_cache) &&
14575 (prev_entry->permanent == this_entry->permanent) &&
14576 (prev_entry->map_aligned == this_entry->map_aligned) &&
14577 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
14578 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
14579 (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
14580 /* from_reserved_zone: OK if that field doesn't match */
14581 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
14582 (prev_entry->vme_resilient_codesign ==
14583 this_entry->vme_resilient_codesign) &&
14584 (prev_entry->vme_resilient_media ==
14585 this_entry->vme_resilient_media) &&
14586 (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
14587
14588 (prev_entry->wired_count == this_entry->wired_count) &&
14589 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
14590
14591 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
14592 (prev_entry->in_transition == FALSE) &&
14593 (this_entry->in_transition == FALSE) &&
14594 (prev_entry->needs_wakeup == FALSE) &&
14595 (this_entry->needs_wakeup == FALSE) &&
14596 (prev_entry->is_shared == FALSE) &&
14597 (this_entry->is_shared == FALSE) &&
14598 (prev_entry->superpage_size == FALSE) &&
14599 (this_entry->superpage_size == FALSE)
14600 ) {
14601 vm_map_store_entry_unlink(map, prev_entry);
14602 assert(prev_entry->vme_start < this_entry->vme_end);
14603 if (prev_entry->map_aligned) {
14604 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
14605 VM_MAP_PAGE_MASK(map)));
14606 }
14607 this_entry->vme_start = prev_entry->vme_start;
14608 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
14609
14610 if (map->holelistenabled) {
14611 vm_map_store_update_first_free(map, this_entry, TRUE);
14612 }
14613
14614 if (prev_entry->is_sub_map) {
14615 vm_map_deallocate(VME_SUBMAP(prev_entry));
14616 } else {
14617 vm_object_deallocate(VME_OBJECT(prev_entry));
14618 }
14619 vm_map_entry_dispose(map, prev_entry);
14620 SAVE_HINT_MAP_WRITE(map, this_entry);
14621 counter(c_vm_map_simplified++);
14622 }
14623 }
14624
14625 void
14626 vm_map_simplify(
14627 vm_map_t map,
14628 vm_map_offset_t start)
14629 {
14630 vm_map_entry_t this_entry;
14631
14632 vm_map_lock(map);
14633 if (vm_map_lookup_entry(map, start, &this_entry)) {
14634 vm_map_simplify_entry(map, this_entry);
14635 vm_map_simplify_entry(map, this_entry->vme_next);
14636 }
14637 counter(c_vm_map_simplify_called++);
14638 vm_map_unlock(map);
14639 }
14640
14641 static void
14642 vm_map_simplify_range(
14643 vm_map_t map,
14644 vm_map_offset_t start,
14645 vm_map_offset_t end)
14646 {
14647 vm_map_entry_t entry;
14648
14649 /*
14650 * The map should be locked (for "write") by the caller.
14651 */
14652
14653 if (start >= end) {
14654 /* invalid address range */
14655 return;
14656 }
14657
14658 start = vm_map_trunc_page(start,
14659 VM_MAP_PAGE_MASK(map));
14660 end = vm_map_round_page(end,
14661 VM_MAP_PAGE_MASK(map));
14662
14663 if (!vm_map_lookup_entry(map, start, &entry)) {
14664 /* "start" is not mapped and "entry" ends before "start" */
14665 if (entry == vm_map_to_entry(map)) {
14666 /* start with first entry in the map */
14667 entry = vm_map_first_entry(map);
14668 } else {
14669 /* start with next entry */
14670 entry = entry->vme_next;
14671 }
14672 }
14673
14674 while (entry != vm_map_to_entry(map) &&
14675 entry->vme_start <= end) {
14676 /* try and coalesce "entry" with its previous entry */
14677 vm_map_simplify_entry(map, entry);
14678 entry = entry->vme_next;
14679 }
14680 }
14681
14682
14683 /*
14684 * Routine: vm_map_machine_attribute
14685 * Purpose:
14686 * Provide machine-specific attributes to mappings,
14687 * such as cachability etc. for machines that provide
14688 * them. NUMA architectures and machines with big/strange
14689 * caches will use this.
14690 * Note:
14691 * Responsibilities for locking and checking are handled here,
14692 * everything else in the pmap module. If any non-volatile
14693 * information must be kept, the pmap module should handle
14694 * it itself. [This assumes that attributes do not
14695 * need to be inherited, which seems ok to me]
14696 */
14697 kern_return_t
14698 vm_map_machine_attribute(
14699 vm_map_t map,
14700 vm_map_offset_t start,
14701 vm_map_offset_t end,
14702 vm_machine_attribute_t attribute,
14703 vm_machine_attribute_val_t* value) /* IN/OUT */
14704 {
14705 kern_return_t ret;
14706 vm_map_size_t sync_size;
14707 vm_map_entry_t entry;
14708
14709 if (start < vm_map_min(map) || end > vm_map_max(map)) {
14710 return KERN_INVALID_ADDRESS;
14711 }
14712
14713 /* Figure how much memory we need to flush (in page increments) */
14714 sync_size = end - start;
14715
14716 vm_map_lock(map);
14717
14718 if (attribute != MATTR_CACHE) {
14719 /* If we don't have to find physical addresses, we */
14720 /* don't have to do an explicit traversal here. */
14721 ret = pmap_attribute(map->pmap, start, end - start,
14722 attribute, value);
14723 vm_map_unlock(map);
14724 return ret;
14725 }
14726
14727 ret = KERN_SUCCESS; /* Assume it all worked */
14728
14729 while (sync_size) {
14730 if (vm_map_lookup_entry(map, start, &entry)) {
14731 vm_map_size_t sub_size;
14732 if ((entry->vme_end - start) > sync_size) {
14733 sub_size = sync_size;
14734 sync_size = 0;
14735 } else {
14736 sub_size = entry->vme_end - start;
14737 sync_size -= sub_size;
14738 }
14739 if (entry->is_sub_map) {
14740 vm_map_offset_t sub_start;
14741 vm_map_offset_t sub_end;
14742
14743 sub_start = (start - entry->vme_start)
14744 + VME_OFFSET(entry);
14745 sub_end = sub_start + sub_size;
14746 vm_map_machine_attribute(
14747 VME_SUBMAP(entry),
14748 sub_start,
14749 sub_end,
14750 attribute, value);
14751 } else {
14752 if (VME_OBJECT(entry)) {
14753 vm_page_t m;
14754 vm_object_t object;
14755 vm_object_t base_object;
14756 vm_object_t last_object;
14757 vm_object_offset_t offset;
14758 vm_object_offset_t base_offset;
14759 vm_map_size_t range;
14760 range = sub_size;
14761 offset = (start - entry->vme_start)
14762 + VME_OFFSET(entry);
14763 base_offset = offset;
14764 object = VME_OBJECT(entry);
14765 base_object = object;
14766 last_object = NULL;
14767
14768 vm_object_lock(object);
14769
14770 while (range) {
14771 m = vm_page_lookup(
14772 object, offset);
14773
14774 if (m && !m->vmp_fictitious) {
14775 ret =
14776 pmap_attribute_cache_sync(
14777 VM_PAGE_GET_PHYS_PAGE(m),
14778 PAGE_SIZE,
14779 attribute, value);
14780 } else if (object->shadow) {
14781 offset = offset + object->vo_shadow_offset;
14782 last_object = object;
14783 object = object->shadow;
14784 vm_object_lock(last_object->shadow);
14785 vm_object_unlock(last_object);
14786 continue;
14787 }
14788 range -= PAGE_SIZE;
14789
14790 if (base_object != object) {
14791 vm_object_unlock(object);
14792 vm_object_lock(base_object);
14793 object = base_object;
14794 }
14795 /* Bump to the next page */
14796 base_offset += PAGE_SIZE;
14797 offset = base_offset;
14798 }
14799 vm_object_unlock(object);
14800 }
14801 }
14802 start += sub_size;
14803 } else {
14804 vm_map_unlock(map);
14805 return KERN_FAILURE;
14806 }
14807 }
14808
14809 vm_map_unlock(map);
14810
14811 return ret;
14812 }
14813
14814 /*
14815 * vm_map_behavior_set:
14816 *
14817 * Sets the paging reference behavior of the specified address
14818 * range in the target map. Paging reference behavior affects
14819 * how pagein operations resulting from faults on the map will be
14820 * clustered.
14821 */
14822 kern_return_t
14823 vm_map_behavior_set(
14824 vm_map_t map,
14825 vm_map_offset_t start,
14826 vm_map_offset_t end,
14827 vm_behavior_t new_behavior)
14828 {
14829 vm_map_entry_t entry;
14830 vm_map_entry_t temp_entry;
14831
14832 if (start > end ||
14833 start < vm_map_min(map) ||
14834 end > vm_map_max(map)) {
14835 return KERN_NO_SPACE;
14836 }
14837
14838 switch (new_behavior) {
14839 /*
14840 * This first block of behaviors all set a persistent state on the specified
14841 * memory range. All we have to do here is to record the desired behavior
14842 * in the vm_map_entry_t's.
14843 */
14844
14845 case VM_BEHAVIOR_DEFAULT:
14846 case VM_BEHAVIOR_RANDOM:
14847 case VM_BEHAVIOR_SEQUENTIAL:
14848 case VM_BEHAVIOR_RSEQNTL:
14849 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
14850 vm_map_lock(map);
14851
14852 /*
14853 * The entire address range must be valid for the map.
14854 * Note that vm_map_range_check() does a
14855 * vm_map_lookup_entry() internally and returns the
14856 * entry containing the start of the address range if
14857 * the entire range is valid.
14858 */
14859 if (vm_map_range_check(map, start, end, &temp_entry)) {
14860 entry = temp_entry;
14861 vm_map_clip_start(map, entry, start);
14862 } else {
14863 vm_map_unlock(map);
14864 return KERN_INVALID_ADDRESS;
14865 }
14866
14867 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
14868 vm_map_clip_end(map, entry, end);
14869 if (entry->is_sub_map) {
14870 assert(!entry->use_pmap);
14871 }
14872
14873 if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
14874 entry->zero_wired_pages = TRUE;
14875 } else {
14876 entry->behavior = new_behavior;
14877 }
14878 entry = entry->vme_next;
14879 }
14880
14881 vm_map_unlock(map);
14882 break;
14883
14884 /*
14885 * The rest of these are different from the above in that they cause
14886 * an immediate action to take place as opposed to setting a behavior that
14887 * affects future actions.
14888 */
14889
14890 case VM_BEHAVIOR_WILLNEED:
14891 return vm_map_willneed(map, start, end);
14892
14893 case VM_BEHAVIOR_DONTNEED:
14894 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14895
14896 case VM_BEHAVIOR_FREE:
14897 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14898
14899 case VM_BEHAVIOR_REUSABLE:
14900 return vm_map_reusable_pages(map, start, end);
14901
14902 case VM_BEHAVIOR_REUSE:
14903 return vm_map_reuse_pages(map, start, end);
14904
14905 case VM_BEHAVIOR_CAN_REUSE:
14906 return vm_map_can_reuse(map, start, end);
14907
14908 #if MACH_ASSERT
14909 case VM_BEHAVIOR_PAGEOUT:
14910 return vm_map_pageout(map, start, end);
14911 #endif /* MACH_ASSERT */
14912
14913 default:
14914 return KERN_INVALID_ARGUMENT;
14915 }
14916
14917 return KERN_SUCCESS;
14918 }
14919
14920
14921 /*
14922 * Internals for madvise(MADV_WILLNEED) system call.
14923 *
14924 * The implementation is to do:-
14925 * a) read-ahead if the mapping corresponds to a mapped regular file
14926 * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
14927 */
14928
14929
14930 static kern_return_t
14931 vm_map_willneed(
14932 vm_map_t map,
14933 vm_map_offset_t start,
14934 vm_map_offset_t end
14935 )
14936 {
14937 vm_map_entry_t entry;
14938 vm_object_t object;
14939 memory_object_t pager;
14940 struct vm_object_fault_info fault_info = {};
14941 kern_return_t kr;
14942 vm_object_size_t len;
14943 vm_object_offset_t offset;
14944
14945 fault_info.interruptible = THREAD_UNINT; /* ignored value */
14946 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
14947 fault_info.stealth = TRUE;
14948
14949 /*
14950 * The MADV_WILLNEED operation doesn't require any changes to the
14951 * vm_map_entry_t's, so the read lock is sufficient.
14952 */
14953
14954 vm_map_lock_read(map);
14955
14956 /*
14957 * The madvise semantics require that the address range be fully
14958 * allocated with no holes. Otherwise, we're required to return
14959 * an error.
14960 */
14961
14962 if (!vm_map_range_check(map, start, end, &entry)) {
14963 vm_map_unlock_read(map);
14964 return KERN_INVALID_ADDRESS;
14965 }
14966
14967 /*
14968 * Examine each vm_map_entry_t in the range.
14969 */
14970 for (; entry != vm_map_to_entry(map) && start < end;) {
14971 /*
14972 * The first time through, the start address could be anywhere
14973 * within the vm_map_entry we found. So adjust the offset to
14974 * correspond. After that, the offset will always be zero to
14975 * correspond to the beginning of the current vm_map_entry.
14976 */
14977 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14978
14979 /*
14980 * Set the length so we don't go beyond the end of the
14981 * map_entry or beyond the end of the range we were given.
14982 * This range could span also multiple map entries all of which
14983 * map different files, so make sure we only do the right amount
14984 * of I/O for each object. Note that it's possible for there
14985 * to be multiple map entries all referring to the same object
14986 * but with different page permissions, but it's not worth
14987 * trying to optimize that case.
14988 */
14989 len = MIN(entry->vme_end - start, end - start);
14990
14991 if ((vm_size_t) len != len) {
14992 /* 32-bit overflow */
14993 len = (vm_size_t) (0 - PAGE_SIZE);
14994 }
14995 fault_info.cluster_size = (vm_size_t) len;
14996 fault_info.lo_offset = offset;
14997 fault_info.hi_offset = offset + len;
14998 fault_info.user_tag = VME_ALIAS(entry);
14999 fault_info.pmap_options = 0;
15000 if (entry->iokit_acct ||
15001 (!entry->is_sub_map && !entry->use_pmap)) {
15002 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15003 }
15004
15005 /*
15006 * If the entry is a submap OR there's no read permission
15007 * to this mapping, then just skip it.
15008 */
15009 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15010 entry = entry->vme_next;
15011 start = entry->vme_start;
15012 continue;
15013 }
15014
15015 object = VME_OBJECT(entry);
15016
15017 if (object == NULL ||
15018 (object && object->internal)) {
15019 /*
15020 * Memory range backed by anonymous memory.
15021 */
15022 vm_size_t region_size = 0, effective_page_size = 0;
15023 vm_map_offset_t addr = 0, effective_page_mask = 0;
15024
15025 region_size = len;
15026 addr = start;
15027
15028 effective_page_mask = MAX(vm_map_page_mask(current_map()), PAGE_MASK);
15029 effective_page_size = effective_page_mask + 1;
15030
15031 vm_map_unlock_read(map);
15032
15033 while (region_size) {
15034 vm_pre_fault(
15035 vm_map_trunc_page(addr, effective_page_mask),
15036 VM_PROT_READ | VM_PROT_WRITE);
15037
15038 region_size -= effective_page_size;
15039 addr += effective_page_size;
15040 }
15041 } else {
15042 /*
15043 * Find the file object backing this map entry. If there is
15044 * none, then we simply ignore the "will need" advice for this
15045 * entry and go on to the next one.
15046 */
15047 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15048 entry = entry->vme_next;
15049 start = entry->vme_start;
15050 continue;
15051 }
15052
15053 vm_object_paging_begin(object);
15054 pager = object->pager;
15055 vm_object_unlock(object);
15056
15057 /*
15058 * The data_request() could take a long time, so let's
15059 * release the map lock to avoid blocking other threads.
15060 */
15061 vm_map_unlock_read(map);
15062
15063 /*
15064 * Get the data from the object asynchronously.
15065 *
15066 * Note that memory_object_data_request() places limits on the
15067 * amount of I/O it will do. Regardless of the len we
15068 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15069 * silently truncates the len to that size. This isn't
15070 * necessarily bad since madvise shouldn't really be used to
15071 * page in unlimited amounts of data. Other Unix variants
15072 * limit the willneed case as well. If this turns out to be an
15073 * issue for developers, then we can always adjust the policy
15074 * here and still be backwards compatible since this is all
15075 * just "advice".
15076 */
15077 kr = memory_object_data_request(
15078 pager,
15079 offset + object->paging_offset,
15080 0, /* ignored */
15081 VM_PROT_READ,
15082 (memory_object_fault_info_t)&fault_info);
15083
15084 vm_object_lock(object);
15085 vm_object_paging_end(object);
15086 vm_object_unlock(object);
15087
15088 /*
15089 * If we couldn't do the I/O for some reason, just give up on
15090 * the madvise. We still return success to the user since
15091 * madvise isn't supposed to fail when the advice can't be
15092 * taken.
15093 */
15094
15095 if (kr != KERN_SUCCESS) {
15096 return KERN_SUCCESS;
15097 }
15098 }
15099
15100 start += len;
15101 if (start >= end) {
15102 /* done */
15103 return KERN_SUCCESS;
15104 }
15105
15106 /* look up next entry */
15107 vm_map_lock_read(map);
15108 if (!vm_map_lookup_entry(map, start, &entry)) {
15109 /*
15110 * There's a new hole in the address range.
15111 */
15112 vm_map_unlock_read(map);
15113 return KERN_INVALID_ADDRESS;
15114 }
15115 }
15116
15117 vm_map_unlock_read(map);
15118 return KERN_SUCCESS;
15119 }
15120
15121 static boolean_t
15122 vm_map_entry_is_reusable(
15123 vm_map_entry_t entry)
15124 {
15125 /* Only user map entries */
15126
15127 vm_object_t object;
15128
15129 if (entry->is_sub_map) {
15130 return FALSE;
15131 }
15132
15133 switch (VME_ALIAS(entry)) {
15134 case VM_MEMORY_MALLOC:
15135 case VM_MEMORY_MALLOC_SMALL:
15136 case VM_MEMORY_MALLOC_LARGE:
15137 case VM_MEMORY_REALLOC:
15138 case VM_MEMORY_MALLOC_TINY:
15139 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15140 case VM_MEMORY_MALLOC_LARGE_REUSED:
15141 /*
15142 * This is a malloc() memory region: check if it's still
15143 * in its original state and can be re-used for more
15144 * malloc() allocations.
15145 */
15146 break;
15147 default:
15148 /*
15149 * Not a malloc() memory region: let the caller decide if
15150 * it's re-usable.
15151 */
15152 return TRUE;
15153 }
15154
15155 if (/*entry->is_shared ||*/
15156 entry->is_sub_map ||
15157 entry->in_transition ||
15158 entry->protection != VM_PROT_DEFAULT ||
15159 entry->max_protection != VM_PROT_ALL ||
15160 entry->inheritance != VM_INHERIT_DEFAULT ||
15161 entry->no_cache ||
15162 entry->permanent ||
15163 entry->superpage_size != FALSE ||
15164 entry->zero_wired_pages ||
15165 entry->wired_count != 0 ||
15166 entry->user_wired_count != 0) {
15167 return FALSE;
15168 }
15169
15170 object = VME_OBJECT(entry);
15171 if (object == VM_OBJECT_NULL) {
15172 return TRUE;
15173 }
15174 if (
15175 #if 0
15176 /*
15177 * Let's proceed even if the VM object is potentially
15178 * shared.
15179 * We check for this later when processing the actual
15180 * VM pages, so the contents will be safe if shared.
15181 *
15182 * But we can still mark this memory region as "reusable" to
15183 * acknowledge that the caller did let us know that the memory
15184 * could be re-used and should not be penalized for holding
15185 * on to it. This allows its "resident size" to not include
15186 * the reusable range.
15187 */
15188 object->ref_count == 1 &&
15189 #endif
15190 object->wired_page_count == 0 &&
15191 object->copy == VM_OBJECT_NULL &&
15192 object->shadow == VM_OBJECT_NULL &&
15193 object->internal &&
15194 object->purgable == VM_PURGABLE_DENY &&
15195 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15196 !object->true_share &&
15197 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15198 !object->code_signed) {
15199 return TRUE;
15200 }
15201 return FALSE;
15202 }
15203
15204 static kern_return_t
15205 vm_map_reuse_pages(
15206 vm_map_t map,
15207 vm_map_offset_t start,
15208 vm_map_offset_t end)
15209 {
15210 vm_map_entry_t entry;
15211 vm_object_t object;
15212 vm_object_offset_t start_offset, end_offset;
15213
15214 /*
15215 * The MADV_REUSE operation doesn't require any changes to the
15216 * vm_map_entry_t's, so the read lock is sufficient.
15217 */
15218
15219 vm_map_lock_read(map);
15220 assert(map->pmap != kernel_pmap); /* protect alias access */
15221
15222 /*
15223 * The madvise semantics require that the address range be fully
15224 * allocated with no holes. Otherwise, we're required to return
15225 * an error.
15226 */
15227
15228 if (!vm_map_range_check(map, start, end, &entry)) {
15229 vm_map_unlock_read(map);
15230 vm_page_stats_reusable.reuse_pages_failure++;
15231 return KERN_INVALID_ADDRESS;
15232 }
15233
15234 /*
15235 * Examine each vm_map_entry_t in the range.
15236 */
15237 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15238 entry = entry->vme_next) {
15239 /*
15240 * Sanity check on the VM map entry.
15241 */
15242 if (!vm_map_entry_is_reusable(entry)) {
15243 vm_map_unlock_read(map);
15244 vm_page_stats_reusable.reuse_pages_failure++;
15245 return KERN_INVALID_ADDRESS;
15246 }
15247
15248 /*
15249 * The first time through, the start address could be anywhere
15250 * within the vm_map_entry we found. So adjust the offset to
15251 * correspond.
15252 */
15253 if (entry->vme_start < start) {
15254 start_offset = start - entry->vme_start;
15255 } else {
15256 start_offset = 0;
15257 }
15258 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15259 start_offset += VME_OFFSET(entry);
15260 end_offset += VME_OFFSET(entry);
15261
15262 assert(!entry->is_sub_map);
15263 object = VME_OBJECT(entry);
15264 if (object != VM_OBJECT_NULL) {
15265 vm_object_lock(object);
15266 vm_object_reuse_pages(object, start_offset, end_offset,
15267 TRUE);
15268 vm_object_unlock(object);
15269 }
15270
15271 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15272 /*
15273 * XXX
15274 * We do not hold the VM map exclusively here.
15275 * The "alias" field is not that critical, so it's
15276 * safe to update it here, as long as it is the only
15277 * one that can be modified while holding the VM map
15278 * "shared".
15279 */
15280 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15281 }
15282 }
15283
15284 vm_map_unlock_read(map);
15285 vm_page_stats_reusable.reuse_pages_success++;
15286 return KERN_SUCCESS;
15287 }
15288
15289
15290 static kern_return_t
15291 vm_map_reusable_pages(
15292 vm_map_t map,
15293 vm_map_offset_t start,
15294 vm_map_offset_t end)
15295 {
15296 vm_map_entry_t entry;
15297 vm_object_t object;
15298 vm_object_offset_t start_offset, end_offset;
15299 vm_map_offset_t pmap_offset;
15300
15301 /*
15302 * The MADV_REUSABLE operation doesn't require any changes to the
15303 * vm_map_entry_t's, so the read lock is sufficient.
15304 */
15305
15306 vm_map_lock_read(map);
15307 assert(map->pmap != kernel_pmap); /* protect alias access */
15308
15309 /*
15310 * The madvise semantics require that the address range be fully
15311 * allocated with no holes. Otherwise, we're required to return
15312 * an error.
15313 */
15314
15315 if (!vm_map_range_check(map, start, end, &entry)) {
15316 vm_map_unlock_read(map);
15317 vm_page_stats_reusable.reusable_pages_failure++;
15318 return KERN_INVALID_ADDRESS;
15319 }
15320
15321 /*
15322 * Examine each vm_map_entry_t in the range.
15323 */
15324 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15325 entry = entry->vme_next) {
15326 int kill_pages = 0;
15327
15328 /*
15329 * Sanity check on the VM map entry.
15330 */
15331 if (!vm_map_entry_is_reusable(entry)) {
15332 vm_map_unlock_read(map);
15333 vm_page_stats_reusable.reusable_pages_failure++;
15334 return KERN_INVALID_ADDRESS;
15335 }
15336
15337 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
15338 /* not writable: can't discard contents */
15339 vm_map_unlock_read(map);
15340 vm_page_stats_reusable.reusable_nonwritable++;
15341 vm_page_stats_reusable.reusable_pages_failure++;
15342 return KERN_PROTECTION_FAILURE;
15343 }
15344
15345 /*
15346 * The first time through, the start address could be anywhere
15347 * within the vm_map_entry we found. So adjust the offset to
15348 * correspond.
15349 */
15350 if (entry->vme_start < start) {
15351 start_offset = start - entry->vme_start;
15352 pmap_offset = start;
15353 } else {
15354 start_offset = 0;
15355 pmap_offset = entry->vme_start;
15356 }
15357 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15358 start_offset += VME_OFFSET(entry);
15359 end_offset += VME_OFFSET(entry);
15360
15361 assert(!entry->is_sub_map);
15362 object = VME_OBJECT(entry);
15363 if (object == VM_OBJECT_NULL) {
15364 continue;
15365 }
15366
15367
15368 vm_object_lock(object);
15369 if (((object->ref_count == 1) ||
15370 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
15371 object->copy == VM_OBJECT_NULL)) &&
15372 object->shadow == VM_OBJECT_NULL &&
15373 /*
15374 * "iokit_acct" entries are billed for their virtual size
15375 * (rather than for their resident pages only), so they
15376 * wouldn't benefit from making pages reusable, and it
15377 * would be hard to keep track of pages that are both
15378 * "iokit_acct" and "reusable" in the pmap stats and
15379 * ledgers.
15380 */
15381 !(entry->iokit_acct ||
15382 (!entry->is_sub_map && !entry->use_pmap))) {
15383 if (object->ref_count != 1) {
15384 vm_page_stats_reusable.reusable_shared++;
15385 }
15386 kill_pages = 1;
15387 } else {
15388 kill_pages = -1;
15389 }
15390 if (kill_pages != -1) {
15391 vm_object_deactivate_pages(object,
15392 start_offset,
15393 end_offset - start_offset,
15394 kill_pages,
15395 TRUE /*reusable_pages*/,
15396 map->pmap,
15397 pmap_offset);
15398 } else {
15399 vm_page_stats_reusable.reusable_pages_shared++;
15400 }
15401 vm_object_unlock(object);
15402
15403 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
15404 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
15405 /*
15406 * XXX
15407 * We do not hold the VM map exclusively here.
15408 * The "alias" field is not that critical, so it's
15409 * safe to update it here, as long as it is the only
15410 * one that can be modified while holding the VM map
15411 * "shared".
15412 */
15413 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
15414 }
15415 }
15416
15417 vm_map_unlock_read(map);
15418 vm_page_stats_reusable.reusable_pages_success++;
15419 return KERN_SUCCESS;
15420 }
15421
15422
15423 static kern_return_t
15424 vm_map_can_reuse(
15425 vm_map_t map,
15426 vm_map_offset_t start,
15427 vm_map_offset_t end)
15428 {
15429 vm_map_entry_t entry;
15430
15431 /*
15432 * The MADV_REUSABLE operation doesn't require any changes to the
15433 * vm_map_entry_t's, so the read lock is sufficient.
15434 */
15435
15436 vm_map_lock_read(map);
15437 assert(map->pmap != kernel_pmap); /* protect alias access */
15438
15439 /*
15440 * The madvise semantics require that the address range be fully
15441 * allocated with no holes. Otherwise, we're required to return
15442 * an error.
15443 */
15444
15445 if (!vm_map_range_check(map, start, end, &entry)) {
15446 vm_map_unlock_read(map);
15447 vm_page_stats_reusable.can_reuse_failure++;
15448 return KERN_INVALID_ADDRESS;
15449 }
15450
15451 /*
15452 * Examine each vm_map_entry_t in the range.
15453 */
15454 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15455 entry = entry->vme_next) {
15456 /*
15457 * Sanity check on the VM map entry.
15458 */
15459 if (!vm_map_entry_is_reusable(entry)) {
15460 vm_map_unlock_read(map);
15461 vm_page_stats_reusable.can_reuse_failure++;
15462 return KERN_INVALID_ADDRESS;
15463 }
15464 }
15465
15466 vm_map_unlock_read(map);
15467 vm_page_stats_reusable.can_reuse_success++;
15468 return KERN_SUCCESS;
15469 }
15470
15471
15472 #if MACH_ASSERT
15473 static kern_return_t
15474 vm_map_pageout(
15475 vm_map_t map,
15476 vm_map_offset_t start,
15477 vm_map_offset_t end)
15478 {
15479 vm_map_entry_t entry;
15480
15481 /*
15482 * The MADV_PAGEOUT operation doesn't require any changes to the
15483 * vm_map_entry_t's, so the read lock is sufficient.
15484 */
15485
15486 vm_map_lock_read(map);
15487
15488 /*
15489 * The madvise semantics require that the address range be fully
15490 * allocated with no holes. Otherwise, we're required to return
15491 * an error.
15492 */
15493
15494 if (!vm_map_range_check(map, start, end, &entry)) {
15495 vm_map_unlock_read(map);
15496 return KERN_INVALID_ADDRESS;
15497 }
15498
15499 /*
15500 * Examine each vm_map_entry_t in the range.
15501 */
15502 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15503 entry = entry->vme_next) {
15504 vm_object_t object;
15505
15506 /*
15507 * Sanity check on the VM map entry.
15508 */
15509 if (entry->is_sub_map) {
15510 vm_map_t submap;
15511 vm_map_offset_t submap_start;
15512 vm_map_offset_t submap_end;
15513 vm_map_entry_t submap_entry;
15514
15515 submap = VME_SUBMAP(entry);
15516 submap_start = VME_OFFSET(entry);
15517 submap_end = submap_start + (entry->vme_end -
15518 entry->vme_start);
15519
15520 vm_map_lock_read(submap);
15521
15522 if (!vm_map_range_check(submap,
15523 submap_start,
15524 submap_end,
15525 &submap_entry)) {
15526 vm_map_unlock_read(submap);
15527 vm_map_unlock_read(map);
15528 return KERN_INVALID_ADDRESS;
15529 }
15530
15531 object = VME_OBJECT(submap_entry);
15532 if (submap_entry->is_sub_map ||
15533 object == VM_OBJECT_NULL ||
15534 !object->internal) {
15535 vm_map_unlock_read(submap);
15536 continue;
15537 }
15538
15539 vm_object_pageout(object);
15540
15541 vm_map_unlock_read(submap);
15542 submap = VM_MAP_NULL;
15543 submap_entry = VM_MAP_ENTRY_NULL;
15544 continue;
15545 }
15546
15547 object = VME_OBJECT(entry);
15548 if (entry->is_sub_map ||
15549 object == VM_OBJECT_NULL ||
15550 !object->internal) {
15551 continue;
15552 }
15553
15554 vm_object_pageout(object);
15555 }
15556
15557 vm_map_unlock_read(map);
15558 return KERN_SUCCESS;
15559 }
15560 #endif /* MACH_ASSERT */
15561
15562
15563 /*
15564 * Routine: vm_map_entry_insert
15565 *
15566 * Description: This routine inserts a new vm_entry in a locked map.
15567 */
15568 vm_map_entry_t
15569 vm_map_entry_insert(
15570 vm_map_t map,
15571 vm_map_entry_t insp_entry,
15572 vm_map_offset_t start,
15573 vm_map_offset_t end,
15574 vm_object_t object,
15575 vm_object_offset_t offset,
15576 boolean_t needs_copy,
15577 boolean_t is_shared,
15578 boolean_t in_transition,
15579 vm_prot_t cur_protection,
15580 vm_prot_t max_protection,
15581 vm_behavior_t behavior,
15582 vm_inherit_t inheritance,
15583 unsigned wired_count,
15584 boolean_t no_cache,
15585 boolean_t permanent,
15586 boolean_t no_copy_on_read,
15587 unsigned int superpage_size,
15588 boolean_t clear_map_aligned,
15589 boolean_t is_submap,
15590 boolean_t used_for_jit,
15591 int alias)
15592 {
15593 vm_map_entry_t new_entry;
15594
15595 assert(insp_entry != (vm_map_entry_t)0);
15596 vm_map_lock_assert_exclusive(map);
15597
15598 #if DEVELOPMENT || DEBUG
15599 vm_object_offset_t end_offset = 0;
15600 assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
15601 #endif /* DEVELOPMENT || DEBUG */
15602
15603 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
15604
15605 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
15606 new_entry->map_aligned = TRUE;
15607 } else {
15608 new_entry->map_aligned = FALSE;
15609 }
15610 if (clear_map_aligned &&
15611 (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
15612 !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
15613 new_entry->map_aligned = FALSE;
15614 }
15615
15616 new_entry->vme_start = start;
15617 new_entry->vme_end = end;
15618 assert(page_aligned(new_entry->vme_start));
15619 assert(page_aligned(new_entry->vme_end));
15620 if (new_entry->map_aligned) {
15621 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
15622 VM_MAP_PAGE_MASK(map)));
15623 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
15624 VM_MAP_PAGE_MASK(map)));
15625 }
15626 assert(new_entry->vme_start < new_entry->vme_end);
15627
15628 VME_OBJECT_SET(new_entry, object);
15629 VME_OFFSET_SET(new_entry, offset);
15630 new_entry->is_shared = is_shared;
15631 new_entry->is_sub_map = is_submap;
15632 new_entry->needs_copy = needs_copy;
15633 new_entry->in_transition = in_transition;
15634 new_entry->needs_wakeup = FALSE;
15635 new_entry->inheritance = inheritance;
15636 new_entry->protection = cur_protection;
15637 new_entry->max_protection = max_protection;
15638 new_entry->behavior = behavior;
15639 new_entry->wired_count = wired_count;
15640 new_entry->user_wired_count = 0;
15641 if (is_submap) {
15642 /*
15643 * submap: "use_pmap" means "nested".
15644 * default: false.
15645 */
15646 new_entry->use_pmap = FALSE;
15647 } else {
15648 /*
15649 * object: "use_pmap" means "use pmap accounting" for footprint.
15650 * default: true.
15651 */
15652 new_entry->use_pmap = TRUE;
15653 }
15654 VME_ALIAS_SET(new_entry, alias);
15655 new_entry->zero_wired_pages = FALSE;
15656 new_entry->no_cache = no_cache;
15657 new_entry->permanent = permanent;
15658 if (superpage_size) {
15659 new_entry->superpage_size = TRUE;
15660 } else {
15661 new_entry->superpage_size = FALSE;
15662 }
15663 if (used_for_jit) {
15664 #if CONFIG_EMBEDDED
15665 if (!(map->jit_entry_exists))
15666 #endif /* CONFIG_EMBEDDED */
15667 {
15668 new_entry->used_for_jit = TRUE;
15669 map->jit_entry_exists = TRUE;
15670 }
15671 } else {
15672 new_entry->used_for_jit = FALSE;
15673 }
15674 new_entry->pmap_cs_associated = FALSE;
15675 new_entry->iokit_acct = FALSE;
15676 new_entry->vme_resilient_codesign = FALSE;
15677 new_entry->vme_resilient_media = FALSE;
15678 new_entry->vme_atomic = FALSE;
15679 new_entry->vme_no_copy_on_read = no_copy_on_read;
15680
15681 /*
15682 * Insert the new entry into the list.
15683 */
15684
15685 vm_map_store_entry_link(map, insp_entry, new_entry,
15686 VM_MAP_KERNEL_FLAGS_NONE);
15687 map->size += end - start;
15688
15689 /*
15690 * Update the free space hint and the lookup hint.
15691 */
15692
15693 SAVE_HINT_MAP_WRITE(map, new_entry);
15694 return new_entry;
15695 }
15696
15697 /*
15698 * Routine: vm_map_remap_extract
15699 *
15700 * Descritpion: This routine returns a vm_entry list from a map.
15701 */
15702 static kern_return_t
15703 vm_map_remap_extract(
15704 vm_map_t map,
15705 vm_map_offset_t addr,
15706 vm_map_size_t size,
15707 boolean_t copy,
15708 struct vm_map_header *map_header,
15709 vm_prot_t *cur_protection,
15710 vm_prot_t *max_protection,
15711 /* What, no behavior? */
15712 vm_inherit_t inheritance,
15713 boolean_t pageable,
15714 boolean_t same_map,
15715 vm_map_kernel_flags_t vmk_flags)
15716 {
15717 kern_return_t result;
15718 vm_map_size_t mapped_size;
15719 vm_map_size_t tmp_size;
15720 vm_map_entry_t src_entry; /* result of last map lookup */
15721 vm_map_entry_t new_entry;
15722 vm_object_offset_t offset;
15723 vm_map_offset_t map_address;
15724 vm_map_offset_t src_start; /* start of entry to map */
15725 vm_map_offset_t src_end; /* end of region to be mapped */
15726 vm_object_t object;
15727 vm_map_version_t version;
15728 boolean_t src_needs_copy;
15729 boolean_t new_entry_needs_copy;
15730 vm_map_entry_t saved_src_entry;
15731 boolean_t src_entry_was_wired;
15732 vm_prot_t max_prot_for_prot_copy;
15733
15734 assert(map != VM_MAP_NULL);
15735 assert(size != 0);
15736 assert(size == vm_map_round_page(size, PAGE_MASK));
15737 assert(inheritance == VM_INHERIT_NONE ||
15738 inheritance == VM_INHERIT_COPY ||
15739 inheritance == VM_INHERIT_SHARE);
15740
15741 /*
15742 * Compute start and end of region.
15743 */
15744 src_start = vm_map_trunc_page(addr, PAGE_MASK);
15745 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
15746
15747
15748 /*
15749 * Initialize map_header.
15750 */
15751 map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15752 map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
15753 map_header->nentries = 0;
15754 map_header->entries_pageable = pageable;
15755 map_header->page_shift = PAGE_SHIFT;
15756
15757 vm_map_store_init( map_header );
15758
15759 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15760 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
15761 } else {
15762 max_prot_for_prot_copy = VM_PROT_NONE;
15763 }
15764 *cur_protection = VM_PROT_ALL;
15765 *max_protection = VM_PROT_ALL;
15766
15767 map_address = 0;
15768 mapped_size = 0;
15769 result = KERN_SUCCESS;
15770
15771 /*
15772 * The specified source virtual space might correspond to
15773 * multiple map entries, need to loop on them.
15774 */
15775 vm_map_lock(map);
15776 while (mapped_size != size) {
15777 vm_map_size_t entry_size;
15778
15779 /*
15780 * Find the beginning of the region.
15781 */
15782 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
15783 result = KERN_INVALID_ADDRESS;
15784 break;
15785 }
15786
15787 if (src_start < src_entry->vme_start ||
15788 (mapped_size && src_start != src_entry->vme_start)) {
15789 result = KERN_INVALID_ADDRESS;
15790 break;
15791 }
15792
15793 tmp_size = size - mapped_size;
15794 if (src_end > src_entry->vme_end) {
15795 tmp_size -= (src_end - src_entry->vme_end);
15796 }
15797
15798 entry_size = (vm_map_size_t)(src_entry->vme_end -
15799 src_entry->vme_start);
15800
15801 if (src_entry->is_sub_map) {
15802 vm_map_reference(VME_SUBMAP(src_entry));
15803 object = VM_OBJECT_NULL;
15804 } else {
15805 object = VME_OBJECT(src_entry);
15806 if (src_entry->iokit_acct) {
15807 /*
15808 * This entry uses "IOKit accounting".
15809 */
15810 } else if (object != VM_OBJECT_NULL &&
15811 (object->purgable != VM_PURGABLE_DENY ||
15812 object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
15813 /*
15814 * Purgeable objects have their own accounting:
15815 * no pmap accounting for them.
15816 */
15817 assertf(!src_entry->use_pmap,
15818 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15819 map,
15820 src_entry,
15821 (uint64_t)src_entry->vme_start,
15822 (uint64_t)src_entry->vme_end,
15823 src_entry->protection,
15824 src_entry->max_protection,
15825 VME_ALIAS(src_entry));
15826 } else {
15827 /*
15828 * Not IOKit or purgeable:
15829 * must be accounted by pmap stats.
15830 */
15831 assertf(src_entry->use_pmap,
15832 "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
15833 map,
15834 src_entry,
15835 (uint64_t)src_entry->vme_start,
15836 (uint64_t)src_entry->vme_end,
15837 src_entry->protection,
15838 src_entry->max_protection,
15839 VME_ALIAS(src_entry));
15840 }
15841
15842 if (object == VM_OBJECT_NULL) {
15843 object = vm_object_allocate(entry_size);
15844 VME_OFFSET_SET(src_entry, 0);
15845 VME_OBJECT_SET(src_entry, object);
15846 assert(src_entry->use_pmap);
15847 } else if (object->copy_strategy !=
15848 MEMORY_OBJECT_COPY_SYMMETRIC) {
15849 /*
15850 * We are already using an asymmetric
15851 * copy, and therefore we already have
15852 * the right object.
15853 */
15854 assert(!src_entry->needs_copy);
15855 } else if (src_entry->needs_copy || object->shadowed ||
15856 (object->internal && !object->true_share &&
15857 !src_entry->is_shared &&
15858 object->vo_size > entry_size)) {
15859 VME_OBJECT_SHADOW(src_entry, entry_size);
15860 assert(src_entry->use_pmap);
15861
15862 if (!src_entry->needs_copy &&
15863 (src_entry->protection & VM_PROT_WRITE)) {
15864 vm_prot_t prot;
15865
15866 assert(!pmap_has_prot_policy(src_entry->protection));
15867
15868 prot = src_entry->protection & ~VM_PROT_WRITE;
15869
15870 if (override_nx(map,
15871 VME_ALIAS(src_entry))
15872 && prot) {
15873 prot |= VM_PROT_EXECUTE;
15874 }
15875
15876 assert(!pmap_has_prot_policy(prot));
15877
15878 if (map->mapped_in_other_pmaps) {
15879 vm_object_pmap_protect(
15880 VME_OBJECT(src_entry),
15881 VME_OFFSET(src_entry),
15882 entry_size,
15883 PMAP_NULL,
15884 src_entry->vme_start,
15885 prot);
15886 } else {
15887 pmap_protect(vm_map_pmap(map),
15888 src_entry->vme_start,
15889 src_entry->vme_end,
15890 prot);
15891 }
15892 }
15893
15894 object = VME_OBJECT(src_entry);
15895 src_entry->needs_copy = FALSE;
15896 }
15897
15898
15899 vm_object_lock(object);
15900 vm_object_reference_locked(object); /* object ref. for new entry */
15901 if (object->copy_strategy ==
15902 MEMORY_OBJECT_COPY_SYMMETRIC) {
15903 object->copy_strategy =
15904 MEMORY_OBJECT_COPY_DELAY;
15905 }
15906 vm_object_unlock(object);
15907 }
15908
15909 offset = (VME_OFFSET(src_entry) +
15910 (src_start - src_entry->vme_start));
15911
15912 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
15913 vm_map_entry_copy(new_entry, src_entry);
15914 if (new_entry->is_sub_map) {
15915 /* clr address space specifics */
15916 new_entry->use_pmap = FALSE;
15917 } else if (copy) {
15918 /*
15919 * We're dealing with a copy-on-write operation,
15920 * so the resulting mapping should not inherit the
15921 * original mapping's accounting settings.
15922 * "use_pmap" should be reset to its default (TRUE)
15923 * so that the new mapping gets accounted for in
15924 * the task's memory footprint.
15925 */
15926 new_entry->use_pmap = TRUE;
15927 }
15928 /* "iokit_acct" was cleared in vm_map_entry_copy() */
15929 assert(!new_entry->iokit_acct);
15930
15931 new_entry->map_aligned = FALSE;
15932
15933 new_entry->vme_start = map_address;
15934 new_entry->vme_end = map_address + tmp_size;
15935 assert(new_entry->vme_start < new_entry->vme_end);
15936 if (copy && vmk_flags.vmkf_remap_prot_copy) {
15937 /*
15938 * Remapping for vm_map_protect(VM_PROT_COPY)
15939 * to convert a read-only mapping into a
15940 * copy-on-write version of itself but
15941 * with write access:
15942 * keep the original inheritance and add
15943 * VM_PROT_WRITE to the max protection.
15944 */
15945 new_entry->inheritance = src_entry->inheritance;
15946 new_entry->protection &= max_prot_for_prot_copy;
15947 new_entry->max_protection |= VM_PROT_WRITE;
15948 } else {
15949 new_entry->inheritance = inheritance;
15950 }
15951 VME_OFFSET_SET(new_entry, offset);
15952
15953 /*
15954 * The new region has to be copied now if required.
15955 */
15956 RestartCopy:
15957 if (!copy) {
15958 if (src_entry->used_for_jit == TRUE) {
15959 if (same_map) {
15960 } else {
15961 #if CONFIG_EMBEDDED
15962 /*
15963 * Cannot allow an entry describing a JIT
15964 * region to be shared across address spaces.
15965 */
15966 result = KERN_INVALID_ARGUMENT;
15967 break;
15968 #endif /* CONFIG_EMBEDDED */
15969 }
15970 }
15971
15972 src_entry->is_shared = TRUE;
15973 new_entry->is_shared = TRUE;
15974 if (!(new_entry->is_sub_map)) {
15975 new_entry->needs_copy = FALSE;
15976 }
15977 } else if (src_entry->is_sub_map) {
15978 /* make this a COW sub_map if not already */
15979 assert(new_entry->wired_count == 0);
15980 new_entry->needs_copy = TRUE;
15981 object = VM_OBJECT_NULL;
15982 } else if (src_entry->wired_count == 0 &&
15983 vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
15984 VME_OFFSET(new_entry),
15985 (new_entry->vme_end -
15986 new_entry->vme_start),
15987 &src_needs_copy,
15988 &new_entry_needs_copy)) {
15989 new_entry->needs_copy = new_entry_needs_copy;
15990 new_entry->is_shared = FALSE;
15991 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
15992
15993 /*
15994 * Handle copy_on_write semantics.
15995 */
15996 if (src_needs_copy && !src_entry->needs_copy) {
15997 vm_prot_t prot;
15998
15999 assert(!pmap_has_prot_policy(src_entry->protection));
16000
16001 prot = src_entry->protection & ~VM_PROT_WRITE;
16002
16003 if (override_nx(map,
16004 VME_ALIAS(src_entry))
16005 && prot) {
16006 prot |= VM_PROT_EXECUTE;
16007 }
16008
16009 assert(!pmap_has_prot_policy(prot));
16010
16011 vm_object_pmap_protect(object,
16012 offset,
16013 entry_size,
16014 ((src_entry->is_shared
16015 || map->mapped_in_other_pmaps) ?
16016 PMAP_NULL : map->pmap),
16017 src_entry->vme_start,
16018 prot);
16019
16020 assert(src_entry->wired_count == 0);
16021 src_entry->needs_copy = TRUE;
16022 }
16023 /*
16024 * Throw away the old object reference of the new entry.
16025 */
16026 vm_object_deallocate(object);
16027 } else {
16028 new_entry->is_shared = FALSE;
16029 assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16030
16031 src_entry_was_wired = (src_entry->wired_count > 0);
16032 saved_src_entry = src_entry;
16033 src_entry = VM_MAP_ENTRY_NULL;
16034
16035 /*
16036 * The map can be safely unlocked since we
16037 * already hold a reference on the object.
16038 *
16039 * Record the timestamp of the map for later
16040 * verification, and unlock the map.
16041 */
16042 version.main_timestamp = map->timestamp;
16043 vm_map_unlock(map); /* Increments timestamp once! */
16044
16045 /*
16046 * Perform the copy.
16047 */
16048 if (src_entry_was_wired > 0) {
16049 vm_object_lock(object);
16050 result = vm_object_copy_slowly(
16051 object,
16052 offset,
16053 (new_entry->vme_end -
16054 new_entry->vme_start),
16055 THREAD_UNINT,
16056 VME_OBJECT_PTR(new_entry));
16057
16058 VME_OFFSET_SET(new_entry, 0);
16059 new_entry->needs_copy = FALSE;
16060 } else {
16061 vm_object_offset_t new_offset;
16062
16063 new_offset = VME_OFFSET(new_entry);
16064 result = vm_object_copy_strategically(
16065 object,
16066 offset,
16067 (new_entry->vme_end -
16068 new_entry->vme_start),
16069 VME_OBJECT_PTR(new_entry),
16070 &new_offset,
16071 &new_entry_needs_copy);
16072 if (new_offset != VME_OFFSET(new_entry)) {
16073 VME_OFFSET_SET(new_entry, new_offset);
16074 }
16075
16076 new_entry->needs_copy = new_entry_needs_copy;
16077 }
16078
16079 /*
16080 * Throw away the old object reference of the new entry.
16081 */
16082 vm_object_deallocate(object);
16083
16084 if (result != KERN_SUCCESS &&
16085 result != KERN_MEMORY_RESTART_COPY) {
16086 _vm_map_entry_dispose(map_header, new_entry);
16087 vm_map_lock(map);
16088 break;
16089 }
16090
16091 /*
16092 * Verify that the map has not substantially
16093 * changed while the copy was being made.
16094 */
16095
16096 vm_map_lock(map);
16097 if (version.main_timestamp + 1 != map->timestamp) {
16098 /*
16099 * Simple version comparison failed.
16100 *
16101 * Retry the lookup and verify that the
16102 * same object/offset are still present.
16103 */
16104 saved_src_entry = VM_MAP_ENTRY_NULL;
16105 vm_object_deallocate(VME_OBJECT(new_entry));
16106 _vm_map_entry_dispose(map_header, new_entry);
16107 if (result == KERN_MEMORY_RESTART_COPY) {
16108 result = KERN_SUCCESS;
16109 }
16110 continue;
16111 }
16112 /* map hasn't changed: src_entry is still valid */
16113 src_entry = saved_src_entry;
16114 saved_src_entry = VM_MAP_ENTRY_NULL;
16115
16116 if (result == KERN_MEMORY_RESTART_COPY) {
16117 vm_object_reference(object);
16118 goto RestartCopy;
16119 }
16120 }
16121
16122 _vm_map_store_entry_link(map_header,
16123 map_header->links.prev, new_entry);
16124
16125 /*Protections for submap mapping are irrelevant here*/
16126 if (!src_entry->is_sub_map) {
16127 *cur_protection &= src_entry->protection;
16128 *max_protection &= src_entry->max_protection;
16129 }
16130 map_address += tmp_size;
16131 mapped_size += tmp_size;
16132 src_start += tmp_size;
16133 } /* end while */
16134
16135 vm_map_unlock(map);
16136 if (result != KERN_SUCCESS) {
16137 /*
16138 * Free all allocated elements.
16139 */
16140 for (src_entry = map_header->links.next;
16141 src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
16142 src_entry = new_entry) {
16143 new_entry = src_entry->vme_next;
16144 _vm_map_store_entry_unlink(map_header, src_entry);
16145 if (src_entry->is_sub_map) {
16146 vm_map_deallocate(VME_SUBMAP(src_entry));
16147 } else {
16148 vm_object_deallocate(VME_OBJECT(src_entry));
16149 }
16150 _vm_map_entry_dispose(map_header, src_entry);
16151 }
16152 }
16153 return result;
16154 }
16155
16156 /*
16157 * Routine: vm_remap
16158 *
16159 * Map portion of a task's address space.
16160 * Mapped region must not overlap more than
16161 * one vm memory object. Protections and
16162 * inheritance attributes remain the same
16163 * as in the original task and are out parameters.
16164 * Source and Target task can be identical
16165 * Other attributes are identical as for vm_map()
16166 */
16167 kern_return_t
16168 vm_map_remap(
16169 vm_map_t target_map,
16170 vm_map_address_t *address,
16171 vm_map_size_t size,
16172 vm_map_offset_t mask,
16173 int flags,
16174 vm_map_kernel_flags_t vmk_flags,
16175 vm_tag_t tag,
16176 vm_map_t src_map,
16177 vm_map_offset_t memory_address,
16178 boolean_t copy,
16179 vm_prot_t *cur_protection,
16180 vm_prot_t *max_protection,
16181 vm_inherit_t inheritance)
16182 {
16183 kern_return_t result;
16184 vm_map_entry_t entry;
16185 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
16186 vm_map_entry_t new_entry;
16187 struct vm_map_header map_header;
16188 vm_map_offset_t offset_in_mapping;
16189
16190 if (target_map == VM_MAP_NULL) {
16191 return KERN_INVALID_ARGUMENT;
16192 }
16193
16194 switch (inheritance) {
16195 case VM_INHERIT_NONE:
16196 case VM_INHERIT_COPY:
16197 case VM_INHERIT_SHARE:
16198 if (size != 0 && src_map != VM_MAP_NULL) {
16199 break;
16200 }
16201 /*FALL THRU*/
16202 default:
16203 return KERN_INVALID_ARGUMENT;
16204 }
16205
16206 /*
16207 * If the user is requesting that we return the address of the
16208 * first byte of the data (rather than the base of the page),
16209 * then we use different rounding semantics: specifically,
16210 * we assume that (memory_address, size) describes a region
16211 * all of whose pages we must cover, rather than a base to be truncated
16212 * down and a size to be added to that base. So we figure out
16213 * the highest page that the requested region includes and make
16214 * sure that the size will cover it.
16215 *
16216 * The key example we're worried about it is of the form:
16217 *
16218 * memory_address = 0x1ff0, size = 0x20
16219 *
16220 * With the old semantics, we round down the memory_address to 0x1000
16221 * and round up the size to 0x1000, resulting in our covering *only*
16222 * page 0x1000. With the new semantics, we'd realize that the region covers
16223 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
16224 * 0x1000 and page 0x2000 in the region we remap.
16225 */
16226 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16227 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
16228 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
16229 } else {
16230 size = vm_map_round_page(size, PAGE_MASK);
16231 }
16232 if (size == 0) {
16233 return KERN_INVALID_ARGUMENT;
16234 }
16235
16236 if (flags & VM_FLAGS_RESILIENT_MEDIA) {
16237 /* must be copy-on-write to be "media resilient" */
16238 if (!copy) {
16239 return KERN_INVALID_ARGUMENT;
16240 }
16241 }
16242
16243 result = vm_map_remap_extract(src_map, memory_address,
16244 size, copy, &map_header,
16245 cur_protection,
16246 max_protection,
16247 inheritance,
16248 target_map->hdr.entries_pageable,
16249 src_map == target_map,
16250 vmk_flags);
16251
16252 if (result != KERN_SUCCESS) {
16253 return result;
16254 }
16255
16256 /*
16257 * Allocate/check a range of free virtual address
16258 * space for the target
16259 */
16260 *address = vm_map_trunc_page(*address,
16261 VM_MAP_PAGE_MASK(target_map));
16262 vm_map_lock(target_map);
16263 result = vm_map_remap_range_allocate(target_map, address, size,
16264 mask, flags, vmk_flags, tag,
16265 &insp_entry);
16266
16267 for (entry = map_header.links.next;
16268 entry != CAST_TO_VM_MAP_ENTRY(&map_header.links);
16269 entry = new_entry) {
16270 new_entry = entry->vme_next;
16271 _vm_map_store_entry_unlink(&map_header, entry);
16272 if (result == KERN_SUCCESS) {
16273 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16274 /* no codesigning -> read-only access */
16275 entry->max_protection = VM_PROT_READ;
16276 entry->protection = VM_PROT_READ;
16277 entry->vme_resilient_codesign = TRUE;
16278 }
16279 entry->vme_start += *address;
16280 entry->vme_end += *address;
16281 assert(!entry->map_aligned);
16282 if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
16283 !entry->is_sub_map &&
16284 (VME_OBJECT(entry) == VM_OBJECT_NULL ||
16285 VME_OBJECT(entry)->internal)) {
16286 entry->vme_resilient_media = TRUE;
16287 }
16288 vm_map_store_entry_link(target_map, insp_entry, entry,
16289 vmk_flags);
16290 insp_entry = entry;
16291 } else {
16292 if (!entry->is_sub_map) {
16293 vm_object_deallocate(VME_OBJECT(entry));
16294 } else {
16295 vm_map_deallocate(VME_SUBMAP(entry));
16296 }
16297 _vm_map_entry_dispose(&map_header, entry);
16298 }
16299 }
16300
16301 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
16302 *cur_protection = VM_PROT_READ;
16303 *max_protection = VM_PROT_READ;
16304 }
16305
16306 if (target_map->disable_vmentry_reuse == TRUE) {
16307 assert(!target_map->is_nested_map);
16308 if (target_map->highest_entry_end < insp_entry->vme_end) {
16309 target_map->highest_entry_end = insp_entry->vme_end;
16310 }
16311 }
16312
16313 if (result == KERN_SUCCESS) {
16314 target_map->size += size;
16315 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
16316
16317 #if PMAP_CS
16318 if (*max_protection & VM_PROT_EXECUTE) {
16319 vm_map_address_t region_start = 0, region_size = 0;
16320 struct pmap_cs_code_directory *region_cd = NULL;
16321 vm_map_address_t base = 0;
16322 struct pmap_cs_lookup_results results = {};
16323 vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
16324 vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
16325
16326 pmap_cs_lookup(src_map->pmap, memory_address, &results);
16327 region_size = results.region_size;
16328 region_start = results.region_start;
16329 region_cd = results.region_cd_entry;
16330 base = results.base;
16331
16332 if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
16333 *cur_protection = VM_PROT_READ;
16334 *max_protection = VM_PROT_READ;
16335 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
16336 "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
16337 page_addr, page_addr + assoc_size, *address,
16338 region_start, region_size,
16339 region_cd != NULL ? "not " : "" // Don't leak kernel slide
16340 );
16341 }
16342 }
16343 #endif
16344 }
16345 vm_map_unlock(target_map);
16346
16347 if (result == KERN_SUCCESS && target_map->wiring_required) {
16348 result = vm_map_wire_kernel(target_map, *address,
16349 *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
16350 TRUE);
16351 }
16352
16353 /*
16354 * If requested, return the address of the data pointed to by the
16355 * request, rather than the base of the resulting page.
16356 */
16357 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
16358 *address += offset_in_mapping;
16359 }
16360
16361 return result;
16362 }
16363
16364 /*
16365 * Routine: vm_map_remap_range_allocate
16366 *
16367 * Description:
16368 * Allocate a range in the specified virtual address map.
16369 * returns the address and the map entry just before the allocated
16370 * range
16371 *
16372 * Map must be locked.
16373 */
16374
16375 static kern_return_t
16376 vm_map_remap_range_allocate(
16377 vm_map_t map,
16378 vm_map_address_t *address, /* IN/OUT */
16379 vm_map_size_t size,
16380 vm_map_offset_t mask,
16381 int flags,
16382 vm_map_kernel_flags_t vmk_flags,
16383 __unused vm_tag_t tag,
16384 vm_map_entry_t *map_entry) /* OUT */
16385 {
16386 vm_map_entry_t entry;
16387 vm_map_offset_t start;
16388 vm_map_offset_t end;
16389 vm_map_offset_t desired_empty_end;
16390 kern_return_t kr;
16391 vm_map_entry_t hole_entry;
16392
16393 StartAgain:;
16394
16395 start = *address;
16396
16397 if (flags & VM_FLAGS_ANYWHERE) {
16398 if (flags & VM_FLAGS_RANDOM_ADDR) {
16399 /*
16400 * Get a random start address.
16401 */
16402 kr = vm_map_random_address_for_size(map, address, size);
16403 if (kr != KERN_SUCCESS) {
16404 return kr;
16405 }
16406 start = *address;
16407 }
16408
16409 /*
16410 * Calculate the first possible address.
16411 */
16412
16413 if (start < map->min_offset) {
16414 start = map->min_offset;
16415 }
16416 if (start > map->max_offset) {
16417 return KERN_NO_SPACE;
16418 }
16419
16420 /*
16421 * Look for the first possible address;
16422 * if there's already something at this
16423 * address, we have to start after it.
16424 */
16425
16426 if (map->disable_vmentry_reuse == TRUE) {
16427 VM_MAP_HIGHEST_ENTRY(map, entry, start);
16428 } else {
16429 if (map->holelistenabled) {
16430 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
16431
16432 if (hole_entry == NULL) {
16433 /*
16434 * No more space in the map?
16435 */
16436 return KERN_NO_SPACE;
16437 } else {
16438 boolean_t found_hole = FALSE;
16439
16440 do {
16441 if (hole_entry->vme_start >= start) {
16442 start = hole_entry->vme_start;
16443 found_hole = TRUE;
16444 break;
16445 }
16446
16447 if (hole_entry->vme_end > start) {
16448 found_hole = TRUE;
16449 break;
16450 }
16451 hole_entry = hole_entry->vme_next;
16452 } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
16453
16454 if (found_hole == FALSE) {
16455 return KERN_NO_SPACE;
16456 }
16457
16458 entry = hole_entry;
16459 }
16460 } else {
16461 assert(first_free_is_valid(map));
16462 if (start == map->min_offset) {
16463 if ((entry = map->first_free) != vm_map_to_entry(map)) {
16464 start = entry->vme_end;
16465 }
16466 } else {
16467 vm_map_entry_t tmp_entry;
16468 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
16469 start = tmp_entry->vme_end;
16470 }
16471 entry = tmp_entry;
16472 }
16473 }
16474 start = vm_map_round_page(start,
16475 VM_MAP_PAGE_MASK(map));
16476 }
16477
16478 /*
16479 * In any case, the "entry" always precedes
16480 * the proposed new region throughout the
16481 * loop:
16482 */
16483
16484 while (TRUE) {
16485 vm_map_entry_t next;
16486
16487 /*
16488 * Find the end of the proposed new region.
16489 * Be sure we didn't go beyond the end, or
16490 * wrap around the address.
16491 */
16492
16493 end = ((start + mask) & ~mask);
16494 end = vm_map_round_page(end,
16495 VM_MAP_PAGE_MASK(map));
16496 if (end < start) {
16497 return KERN_NO_SPACE;
16498 }
16499 start = end;
16500 end += size;
16501
16502 /* We want an entire page of empty space, but don't increase the allocation size. */
16503 desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
16504
16505 if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
16506 if (map->wait_for_space) {
16507 if (size <= (map->max_offset -
16508 map->min_offset)) {
16509 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
16510 vm_map_unlock(map);
16511 thread_block(THREAD_CONTINUE_NULL);
16512 vm_map_lock(map);
16513 goto StartAgain;
16514 }
16515 }
16516
16517 return KERN_NO_SPACE;
16518 }
16519
16520 next = entry->vme_next;
16521
16522 if (map->holelistenabled) {
16523 if (entry->vme_end >= desired_empty_end) {
16524 break;
16525 }
16526 } else {
16527 /*
16528 * If there are no more entries, we must win.
16529 *
16530 * OR
16531 *
16532 * If there is another entry, it must be
16533 * after the end of the potential new region.
16534 */
16535
16536 if (next == vm_map_to_entry(map)) {
16537 break;
16538 }
16539
16540 if (next->vme_start >= desired_empty_end) {
16541 break;
16542 }
16543 }
16544
16545 /*
16546 * Didn't fit -- move to the next entry.
16547 */
16548
16549 entry = next;
16550
16551 if (map->holelistenabled) {
16552 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
16553 /*
16554 * Wrapped around
16555 */
16556 return KERN_NO_SPACE;
16557 }
16558 start = entry->vme_start;
16559 } else {
16560 start = entry->vme_end;
16561 }
16562 }
16563
16564 if (map->holelistenabled) {
16565 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
16566 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
16567 }
16568 }
16569
16570 *address = start;
16571 } else {
16572 vm_map_entry_t temp_entry;
16573
16574 /*
16575 * Verify that:
16576 * the address doesn't itself violate
16577 * the mask requirement.
16578 */
16579
16580 if ((start & mask) != 0) {
16581 return KERN_NO_SPACE;
16582 }
16583
16584
16585 /*
16586 * ... the address is within bounds
16587 */
16588
16589 end = start + size;
16590
16591 if ((start < map->min_offset) ||
16592 (end > map->max_offset) ||
16593 (start >= end)) {
16594 return KERN_INVALID_ADDRESS;
16595 }
16596
16597 /*
16598 * If we're asked to overwrite whatever was mapped in that
16599 * range, first deallocate that range.
16600 */
16601 if (flags & VM_FLAGS_OVERWRITE) {
16602 vm_map_t zap_map;
16603 int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
16604
16605 /*
16606 * We use a "zap_map" to avoid having to unlock
16607 * the "map" in vm_map_delete(), which would compromise
16608 * the atomicity of the "deallocate" and then "remap"
16609 * combination.
16610 */
16611 zap_map = vm_map_create(PMAP_NULL,
16612 start,
16613 end,
16614 map->hdr.entries_pageable);
16615 if (zap_map == VM_MAP_NULL) {
16616 return KERN_RESOURCE_SHORTAGE;
16617 }
16618 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
16619 vm_map_disable_hole_optimization(zap_map);
16620
16621 if (vmk_flags.vmkf_overwrite_immutable) {
16622 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
16623 }
16624 kr = vm_map_delete(map, start, end,
16625 remove_flags,
16626 zap_map);
16627 if (kr == KERN_SUCCESS) {
16628 vm_map_destroy(zap_map,
16629 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
16630 zap_map = VM_MAP_NULL;
16631 }
16632 }
16633
16634 /*
16635 * ... the starting address isn't allocated
16636 */
16637
16638 if (vm_map_lookup_entry(map, start, &temp_entry)) {
16639 return KERN_NO_SPACE;
16640 }
16641
16642 entry = temp_entry;
16643
16644 /*
16645 * ... the next region doesn't overlap the
16646 * end point.
16647 */
16648
16649 if ((entry->vme_next != vm_map_to_entry(map)) &&
16650 (entry->vme_next->vme_start < end)) {
16651 return KERN_NO_SPACE;
16652 }
16653 }
16654 *map_entry = entry;
16655 return KERN_SUCCESS;
16656 }
16657
16658 /*
16659 * vm_map_switch:
16660 *
16661 * Set the address map for the current thread to the specified map
16662 */
16663
16664 vm_map_t
16665 vm_map_switch(
16666 vm_map_t map)
16667 {
16668 int mycpu;
16669 thread_t thread = current_thread();
16670 vm_map_t oldmap = thread->map;
16671
16672 mp_disable_preemption();
16673 mycpu = cpu_number();
16674
16675 /*
16676 * Deactivate the current map and activate the requested map
16677 */
16678 PMAP_SWITCH_USER(thread, map, mycpu);
16679
16680 mp_enable_preemption();
16681 return oldmap;
16682 }
16683
16684
16685 /*
16686 * Routine: vm_map_write_user
16687 *
16688 * Description:
16689 * Copy out data from a kernel space into space in the
16690 * destination map. The space must already exist in the
16691 * destination map.
16692 * NOTE: This routine should only be called by threads
16693 * which can block on a page fault. i.e. kernel mode user
16694 * threads.
16695 *
16696 */
16697 kern_return_t
16698 vm_map_write_user(
16699 vm_map_t map,
16700 void *src_p,
16701 vm_map_address_t dst_addr,
16702 vm_size_t size)
16703 {
16704 kern_return_t kr = KERN_SUCCESS;
16705
16706 if (current_map() == map) {
16707 if (copyout(src_p, dst_addr, size)) {
16708 kr = KERN_INVALID_ADDRESS;
16709 }
16710 } else {
16711 vm_map_t oldmap;
16712
16713 /* take on the identity of the target map while doing */
16714 /* the transfer */
16715
16716 vm_map_reference(map);
16717 oldmap = vm_map_switch(map);
16718 if (copyout(src_p, dst_addr, size)) {
16719 kr = KERN_INVALID_ADDRESS;
16720 }
16721 vm_map_switch(oldmap);
16722 vm_map_deallocate(map);
16723 }
16724 return kr;
16725 }
16726
16727 /*
16728 * Routine: vm_map_read_user
16729 *
16730 * Description:
16731 * Copy in data from a user space source map into the
16732 * kernel map. The space must already exist in the
16733 * kernel map.
16734 * NOTE: This routine should only be called by threads
16735 * which can block on a page fault. i.e. kernel mode user
16736 * threads.
16737 *
16738 */
16739 kern_return_t
16740 vm_map_read_user(
16741 vm_map_t map,
16742 vm_map_address_t src_addr,
16743 void *dst_p,
16744 vm_size_t size)
16745 {
16746 kern_return_t kr = KERN_SUCCESS;
16747
16748 if (current_map() == map) {
16749 if (copyin(src_addr, dst_p, size)) {
16750 kr = KERN_INVALID_ADDRESS;
16751 }
16752 } else {
16753 vm_map_t oldmap;
16754
16755 /* take on the identity of the target map while doing */
16756 /* the transfer */
16757
16758 vm_map_reference(map);
16759 oldmap = vm_map_switch(map);
16760 if (copyin(src_addr, dst_p, size)) {
16761 kr = KERN_INVALID_ADDRESS;
16762 }
16763 vm_map_switch(oldmap);
16764 vm_map_deallocate(map);
16765 }
16766 return kr;
16767 }
16768
16769
16770 /*
16771 * vm_map_check_protection:
16772 *
16773 * Assert that the target map allows the specified
16774 * privilege on the entire address region given.
16775 * The entire region must be allocated.
16776 */
16777 boolean_t
16778 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
16779 vm_map_offset_t end, vm_prot_t protection)
16780 {
16781 vm_map_entry_t entry;
16782 vm_map_entry_t tmp_entry;
16783
16784 vm_map_lock(map);
16785
16786 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
16787 vm_map_unlock(map);
16788 return FALSE;
16789 }
16790
16791 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
16792 vm_map_unlock(map);
16793 return FALSE;
16794 }
16795
16796 entry = tmp_entry;
16797
16798 while (start < end) {
16799 if (entry == vm_map_to_entry(map)) {
16800 vm_map_unlock(map);
16801 return FALSE;
16802 }
16803
16804 /*
16805 * No holes allowed!
16806 */
16807
16808 if (start < entry->vme_start) {
16809 vm_map_unlock(map);
16810 return FALSE;
16811 }
16812
16813 /*
16814 * Check protection associated with entry.
16815 */
16816
16817 if ((entry->protection & protection) != protection) {
16818 vm_map_unlock(map);
16819 return FALSE;
16820 }
16821
16822 /* go to next entry */
16823
16824 start = entry->vme_end;
16825 entry = entry->vme_next;
16826 }
16827 vm_map_unlock(map);
16828 return TRUE;
16829 }
16830
16831 kern_return_t
16832 vm_map_purgable_control(
16833 vm_map_t map,
16834 vm_map_offset_t address,
16835 vm_purgable_t control,
16836 int *state)
16837 {
16838 vm_map_entry_t entry;
16839 vm_object_t object;
16840 kern_return_t kr;
16841 boolean_t was_nonvolatile;
16842
16843 /*
16844 * Vet all the input parameters and current type and state of the
16845 * underlaying object. Return with an error if anything is amiss.
16846 */
16847 if (map == VM_MAP_NULL) {
16848 return KERN_INVALID_ARGUMENT;
16849 }
16850
16851 if (control != VM_PURGABLE_SET_STATE &&
16852 control != VM_PURGABLE_GET_STATE &&
16853 control != VM_PURGABLE_PURGE_ALL &&
16854 control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
16855 return KERN_INVALID_ARGUMENT;
16856 }
16857
16858 if (control == VM_PURGABLE_PURGE_ALL) {
16859 vm_purgeable_object_purge_all();
16860 return KERN_SUCCESS;
16861 }
16862
16863 if ((control == VM_PURGABLE_SET_STATE ||
16864 control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
16865 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
16866 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
16867 return KERN_INVALID_ARGUMENT;
16868 }
16869
16870 vm_map_lock_read(map);
16871
16872 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
16873 /*
16874 * Must pass a valid non-submap address.
16875 */
16876 vm_map_unlock_read(map);
16877 return KERN_INVALID_ADDRESS;
16878 }
16879
16880 if ((entry->protection & VM_PROT_WRITE) == 0) {
16881 /*
16882 * Can't apply purgable controls to something you can't write.
16883 */
16884 vm_map_unlock_read(map);
16885 return KERN_PROTECTION_FAILURE;
16886 }
16887
16888 object = VME_OBJECT(entry);
16889 if (object == VM_OBJECT_NULL ||
16890 object->purgable == VM_PURGABLE_DENY) {
16891 /*
16892 * Object must already be present and be purgeable.
16893 */
16894 vm_map_unlock_read(map);
16895 return KERN_INVALID_ARGUMENT;
16896 }
16897
16898 vm_object_lock(object);
16899
16900 #if 00
16901 if (VME_OFFSET(entry) != 0 ||
16902 entry->vme_end - entry->vme_start != object->vo_size) {
16903 /*
16904 * Can only apply purgable controls to the whole (existing)
16905 * object at once.
16906 */
16907 vm_map_unlock_read(map);
16908 vm_object_unlock(object);
16909 return KERN_INVALID_ARGUMENT;
16910 }
16911 #endif
16912
16913 assert(!entry->is_sub_map);
16914 assert(!entry->use_pmap); /* purgeable has its own accounting */
16915
16916 vm_map_unlock_read(map);
16917
16918 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
16919
16920 kr = vm_object_purgable_control(object, control, state);
16921
16922 if (was_nonvolatile &&
16923 object->purgable != VM_PURGABLE_NONVOLATILE &&
16924 map->pmap == kernel_pmap) {
16925 #if DEBUG
16926 object->vo_purgeable_volatilizer = kernel_task;
16927 #endif /* DEBUG */
16928 }
16929
16930 vm_object_unlock(object);
16931
16932 return kr;
16933 }
16934
16935 kern_return_t
16936 vm_map_page_query_internal(
16937 vm_map_t target_map,
16938 vm_map_offset_t offset,
16939 int *disposition,
16940 int *ref_count)
16941 {
16942 kern_return_t kr;
16943 vm_page_info_basic_data_t info;
16944 mach_msg_type_number_t count;
16945
16946 count = VM_PAGE_INFO_BASIC_COUNT;
16947 kr = vm_map_page_info(target_map,
16948 offset,
16949 VM_PAGE_INFO_BASIC,
16950 (vm_page_info_t) &info,
16951 &count);
16952 if (kr == KERN_SUCCESS) {
16953 *disposition = info.disposition;
16954 *ref_count = info.ref_count;
16955 } else {
16956 *disposition = 0;
16957 *ref_count = 0;
16958 }
16959
16960 return kr;
16961 }
16962
16963 kern_return_t
16964 vm_map_page_info(
16965 vm_map_t map,
16966 vm_map_offset_t offset,
16967 vm_page_info_flavor_t flavor,
16968 vm_page_info_t info,
16969 mach_msg_type_number_t *count)
16970 {
16971 return vm_map_page_range_info_internal(map,
16972 offset, /* start of range */
16973 (offset + 1), /* this will get rounded in the call to the page boundary */
16974 flavor,
16975 info,
16976 count);
16977 }
16978
16979 kern_return_t
16980 vm_map_page_range_info_internal(
16981 vm_map_t map,
16982 vm_map_offset_t start_offset,
16983 vm_map_offset_t end_offset,
16984 vm_page_info_flavor_t flavor,
16985 vm_page_info_t info,
16986 mach_msg_type_number_t *count)
16987 {
16988 vm_map_entry_t map_entry = VM_MAP_ENTRY_NULL;
16989 vm_object_t object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
16990 vm_page_t m = VM_PAGE_NULL;
16991 kern_return_t retval = KERN_SUCCESS;
16992 int disposition = 0;
16993 int ref_count = 0;
16994 int depth = 0, info_idx = 0;
16995 vm_page_info_basic_t basic_info = 0;
16996 vm_map_offset_t offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
16997 vm_map_offset_t start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
16998 boolean_t do_region_footprint;
16999 ledger_amount_t ledger_resident, ledger_compressed;
17000
17001 switch (flavor) {
17002 case VM_PAGE_INFO_BASIC:
17003 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
17004 /*
17005 * The "vm_page_info_basic_data" structure was not
17006 * properly padded, so allow the size to be off by
17007 * one to maintain backwards binary compatibility...
17008 */
17009 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
17010 return KERN_INVALID_ARGUMENT;
17011 }
17012 }
17013 break;
17014 default:
17015 return KERN_INVALID_ARGUMENT;
17016 }
17017
17018 do_region_footprint = task_self_region_footprint();
17019 disposition = 0;
17020 ref_count = 0;
17021 depth = 0;
17022 info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
17023 retval = KERN_SUCCESS;
17024
17025 offset_in_page = start_offset & PAGE_MASK;
17026 start = vm_map_trunc_page(start_offset, PAGE_MASK);
17027 end = vm_map_round_page(end_offset, PAGE_MASK);
17028
17029 if (end < start) {
17030 return KERN_INVALID_ARGUMENT;
17031 }
17032
17033 assert((end - start) <= MAX_PAGE_RANGE_QUERY);
17034
17035 vm_map_lock_read(map);
17036
17037 task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
17038
17039 for (curr_s_offset = start; curr_s_offset < end;) {
17040 /*
17041 * New lookup needs reset of these variables.
17042 */
17043 curr_object = object = VM_OBJECT_NULL;
17044 offset_in_object = 0;
17045 ref_count = 0;
17046 depth = 0;
17047
17048 if (do_region_footprint &&
17049 curr_s_offset >= vm_map_last_entry(map)->vme_end) {
17050 /*
17051 * Request for "footprint" info about a page beyond
17052 * the end of address space: this must be for
17053 * the fake region vm_map_region_recurse_64()
17054 * reported to account for non-volatile purgeable
17055 * memory owned by this task.
17056 */
17057 disposition = 0;
17058
17059 if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
17060 (unsigned) ledger_compressed) {
17061 /*
17062 * We haven't reported all the "non-volatile
17063 * compressed" pages yet, so report this fake
17064 * page as "compressed".
17065 */
17066 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17067 } else {
17068 /*
17069 * We've reported all the non-volatile
17070 * compressed page but not all the non-volatile
17071 * pages , so report this fake page as
17072 * "resident dirty".
17073 */
17074 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17075 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17076 disposition |= VM_PAGE_QUERY_PAGE_REF;
17077 }
17078 switch (flavor) {
17079 case VM_PAGE_INFO_BASIC:
17080 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17081 basic_info->disposition = disposition;
17082 basic_info->ref_count = 1;
17083 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17084 basic_info->offset = 0;
17085 basic_info->depth = 0;
17086
17087 info_idx++;
17088 break;
17089 }
17090 curr_s_offset += PAGE_SIZE;
17091 continue;
17092 }
17093
17094 /*
17095 * First, find the map entry covering "curr_s_offset", going down
17096 * submaps if necessary.
17097 */
17098 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
17099 /* no entry -> no object -> no page */
17100
17101 if (curr_s_offset < vm_map_min(map)) {
17102 /*
17103 * Illegal address that falls below map min.
17104 */
17105 curr_e_offset = MIN(end, vm_map_min(map));
17106 } else if (curr_s_offset >= vm_map_max(map)) {
17107 /*
17108 * Illegal address that falls on/after map max.
17109 */
17110 curr_e_offset = end;
17111 } else if (map_entry == vm_map_to_entry(map)) {
17112 /*
17113 * Hit a hole.
17114 */
17115 if (map_entry->vme_next == vm_map_to_entry(map)) {
17116 /*
17117 * Empty map.
17118 */
17119 curr_e_offset = MIN(map->max_offset, end);
17120 } else {
17121 /*
17122 * Hole at start of the map.
17123 */
17124 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17125 }
17126 } else {
17127 if (map_entry->vme_next == vm_map_to_entry(map)) {
17128 /*
17129 * Hole at the end of the map.
17130 */
17131 curr_e_offset = MIN(map->max_offset, end);
17132 } else {
17133 curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
17134 }
17135 }
17136
17137 assert(curr_e_offset >= curr_s_offset);
17138
17139 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17140
17141 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17142
17143 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17144
17145 curr_s_offset = curr_e_offset;
17146
17147 info_idx += num_pages;
17148
17149 continue;
17150 }
17151
17152 /* compute offset from this map entry's start */
17153 offset_in_object = curr_s_offset - map_entry->vme_start;
17154
17155 /* compute offset into this map entry's object (or submap) */
17156 offset_in_object += VME_OFFSET(map_entry);
17157
17158 if (map_entry->is_sub_map) {
17159 vm_map_t sub_map = VM_MAP_NULL;
17160 vm_page_info_t submap_info = 0;
17161 vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
17162
17163 range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
17164
17165 submap_s_offset = offset_in_object;
17166 submap_e_offset = submap_s_offset + range_len;
17167
17168 sub_map = VME_SUBMAP(map_entry);
17169
17170 vm_map_reference(sub_map);
17171 vm_map_unlock_read(map);
17172
17173 submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17174
17175 retval = vm_map_page_range_info_internal(sub_map,
17176 submap_s_offset,
17177 submap_e_offset,
17178 VM_PAGE_INFO_BASIC,
17179 (vm_page_info_t) submap_info,
17180 count);
17181
17182 assert(retval == KERN_SUCCESS);
17183
17184 vm_map_lock_read(map);
17185 vm_map_deallocate(sub_map);
17186
17187 /* Move the "info" index by the number of pages we inspected.*/
17188 info_idx += range_len >> PAGE_SHIFT;
17189
17190 /* Move our current offset by the size of the range we inspected.*/
17191 curr_s_offset += range_len;
17192
17193 continue;
17194 }
17195
17196 object = VME_OBJECT(map_entry);
17197 if (object == VM_OBJECT_NULL) {
17198 /*
17199 * We don't have an object here and, hence,
17200 * no pages to inspect. We'll fill up the
17201 * info structure appropriately.
17202 */
17203
17204 curr_e_offset = MIN(map_entry->vme_end, end);
17205
17206 uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
17207
17208 void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17209
17210 bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
17211
17212 curr_s_offset = curr_e_offset;
17213
17214 info_idx += num_pages;
17215
17216 continue;
17217 }
17218
17219 if (do_region_footprint) {
17220 int pmap_disp;
17221
17222 disposition = 0;
17223 pmap_disp = 0;
17224 if (map->has_corpse_footprint) {
17225 /*
17226 * Query the page info data we saved
17227 * while forking the corpse.
17228 */
17229 vm_map_corpse_footprint_query_page_info(
17230 map,
17231 curr_s_offset,
17232 &pmap_disp);
17233 } else {
17234 /*
17235 * Query the pmap.
17236 */
17237 pmap_query_page_info(map->pmap,
17238 curr_s_offset,
17239 &pmap_disp);
17240 }
17241 if (object->purgable == VM_PURGABLE_NONVOLATILE &&
17242 /* && not tagged as no-footprint? */
17243 VM_OBJECT_OWNER(object) != NULL &&
17244 VM_OBJECT_OWNER(object)->map == map) {
17245 if ((((curr_s_offset
17246 - map_entry->vme_start
17247 + VME_OFFSET(map_entry))
17248 / PAGE_SIZE) <
17249 (object->resident_page_count +
17250 vm_compressor_pager_get_count(object->pager)))) {
17251 /*
17252 * Non-volatile purgeable object owned
17253 * by this task: report the first
17254 * "#resident + #compressed" pages as
17255 * "resident" (to show that they
17256 * contribute to the footprint) but not
17257 * "dirty" (to avoid double-counting
17258 * with the fake "non-volatile" region
17259 * we'll report at the end of the
17260 * address space to account for all
17261 * (mapped or not) non-volatile memory
17262 * owned by this task.
17263 */
17264 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17265 }
17266 } else if ((object->purgable == VM_PURGABLE_VOLATILE ||
17267 object->purgable == VM_PURGABLE_EMPTY) &&
17268 /* && not tagged as no-footprint? */
17269 VM_OBJECT_OWNER(object) != NULL &&
17270 VM_OBJECT_OWNER(object)->map == map) {
17271 if ((((curr_s_offset
17272 - map_entry->vme_start
17273 + VME_OFFSET(map_entry))
17274 / PAGE_SIZE) <
17275 object->wired_page_count)) {
17276 /*
17277 * Volatile|empty purgeable object owned
17278 * by this task: report the first
17279 * "#wired" pages as "resident" (to
17280 * show that they contribute to the
17281 * footprint) but not "dirty" (to avoid
17282 * double-counting with the fake
17283 * "non-volatile" region we'll report
17284 * at the end of the address space to
17285 * account for all (mapped or not)
17286 * non-volatile memory owned by this
17287 * task.
17288 */
17289 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17290 }
17291 } else if (map_entry->iokit_acct &&
17292 object->internal &&
17293 object->purgable == VM_PURGABLE_DENY) {
17294 /*
17295 * Non-purgeable IOKit memory: phys_footprint
17296 * includes the entire virtual mapping.
17297 */
17298 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17299 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17300 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17301 } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
17302 PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
17303 /* alternate accounting */
17304 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
17305 if (map->pmap->footprint_was_suspended ||
17306 /*
17307 * XXX corpse does not know if original
17308 * pmap had its footprint suspended...
17309 */
17310 map->has_corpse_footprint) {
17311 /*
17312 * The assertion below can fail if dyld
17313 * suspended footprint accounting
17314 * while doing some adjustments to
17315 * this page; the mapping would say
17316 * "use pmap accounting" but the page
17317 * would be marked "alternate
17318 * accounting".
17319 */
17320 } else
17321 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
17322 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17323 pmap_disp = 0;
17324 } else {
17325 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
17326 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17327 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17328 disposition |= VM_PAGE_QUERY_PAGE_REF;
17329 if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
17330 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17331 } else {
17332 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17333 }
17334 if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
17335 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17336 }
17337 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
17338 assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
17339 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17340 }
17341 }
17342 switch (flavor) {
17343 case VM_PAGE_INFO_BASIC:
17344 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17345 basic_info->disposition = disposition;
17346 basic_info->ref_count = 1;
17347 basic_info->object_id = INFO_MAKE_FAKE_OBJECT_ID(map, task_ledgers.purgeable_nonvolatile);
17348 basic_info->offset = 0;
17349 basic_info->depth = 0;
17350
17351 info_idx++;
17352 break;
17353 }
17354 curr_s_offset += PAGE_SIZE;
17355 continue;
17356 }
17357
17358 vm_object_reference(object);
17359 /*
17360 * Shared mode -- so we can allow other readers
17361 * to grab the lock too.
17362 */
17363 vm_object_lock_shared(object);
17364
17365 curr_e_offset = MIN(map_entry->vme_end, end);
17366
17367 vm_map_unlock_read(map);
17368
17369 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
17370
17371 curr_object = object;
17372
17373 for (; curr_s_offset < curr_e_offset;) {
17374 if (object == curr_object) {
17375 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
17376 } else {
17377 ref_count = curr_object->ref_count;
17378 }
17379
17380 curr_offset_in_object = offset_in_object;
17381
17382 for (;;) {
17383 m = vm_page_lookup(curr_object, curr_offset_in_object);
17384
17385 if (m != VM_PAGE_NULL) {
17386 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
17387 break;
17388 } else {
17389 if (curr_object->internal &&
17390 curr_object->alive &&
17391 !curr_object->terminating &&
17392 curr_object->pager_ready) {
17393 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
17394 == VM_EXTERNAL_STATE_EXISTS) {
17395 /* the pager has that page */
17396 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
17397 break;
17398 }
17399 }
17400
17401 /*
17402 * Go down the VM object shadow chain until we find the page
17403 * we're looking for.
17404 */
17405
17406 if (curr_object->shadow != VM_OBJECT_NULL) {
17407 vm_object_t shadow = VM_OBJECT_NULL;
17408
17409 curr_offset_in_object += curr_object->vo_shadow_offset;
17410 shadow = curr_object->shadow;
17411
17412 vm_object_lock_shared(shadow);
17413 vm_object_unlock(curr_object);
17414
17415 curr_object = shadow;
17416 depth++;
17417 continue;
17418 } else {
17419 break;
17420 }
17421 }
17422 }
17423
17424 /* The ref_count is not strictly accurate, it measures the number */
17425 /* of entities holding a ref on the object, they may not be mapping */
17426 /* the object or may not be mapping the section holding the */
17427 /* target page but its still a ball park number and though an over- */
17428 /* count, it picks up the copy-on-write cases */
17429
17430 /* We could also get a picture of page sharing from pmap_attributes */
17431 /* but this would under count as only faulted-in mappings would */
17432 /* show up. */
17433
17434 if ((curr_object == object) && curr_object->shadow) {
17435 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
17436 }
17437
17438 if (!curr_object->internal) {
17439 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
17440 }
17441
17442 if (m != VM_PAGE_NULL) {
17443 if (m->vmp_fictitious) {
17444 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
17445 } else {
17446 if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
17447 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
17448 }
17449
17450 if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
17451 disposition |= VM_PAGE_QUERY_PAGE_REF;
17452 }
17453
17454 if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
17455 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
17456 }
17457
17458 if (m->vmp_cs_validated) {
17459 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
17460 }
17461 if (m->vmp_cs_tainted) {
17462 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
17463 }
17464 if (m->vmp_cs_nx) {
17465 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
17466 }
17467 if (m->vmp_reusable || curr_object->all_reusable) {
17468 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
17469 }
17470 }
17471 }
17472
17473 switch (flavor) {
17474 case VM_PAGE_INFO_BASIC:
17475 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
17476 basic_info->disposition = disposition;
17477 basic_info->ref_count = ref_count;
17478 basic_info->object_id = (vm_object_id_t) (uintptr_t)
17479 VM_KERNEL_ADDRPERM(curr_object);
17480 basic_info->offset =
17481 (memory_object_offset_t) curr_offset_in_object + offset_in_page;
17482 basic_info->depth = depth;
17483
17484 info_idx++;
17485 break;
17486 }
17487
17488 disposition = 0;
17489 offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
17490
17491 /*
17492 * Move to next offset in the range and in our object.
17493 */
17494 curr_s_offset += PAGE_SIZE;
17495 offset_in_object += PAGE_SIZE;
17496 curr_offset_in_object = offset_in_object;
17497
17498 if (curr_object != object) {
17499 vm_object_unlock(curr_object);
17500
17501 curr_object = object;
17502
17503 vm_object_lock_shared(curr_object);
17504 } else {
17505 vm_object_lock_yield_shared(curr_object);
17506 }
17507 }
17508
17509 vm_object_unlock(curr_object);
17510 vm_object_deallocate(curr_object);
17511
17512 vm_map_lock_read(map);
17513 }
17514
17515 vm_map_unlock_read(map);
17516 return retval;
17517 }
17518
17519 /*
17520 * vm_map_msync
17521 *
17522 * Synchronises the memory range specified with its backing store
17523 * image by either flushing or cleaning the contents to the appropriate
17524 * memory manager engaging in a memory object synchronize dialog with
17525 * the manager. The client doesn't return until the manager issues
17526 * m_o_s_completed message. MIG Magically converts user task parameter
17527 * to the task's address map.
17528 *
17529 * interpretation of sync_flags
17530 * VM_SYNC_INVALIDATE - discard pages, only return precious
17531 * pages to manager.
17532 *
17533 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
17534 * - discard pages, write dirty or precious
17535 * pages back to memory manager.
17536 *
17537 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
17538 * - write dirty or precious pages back to
17539 * the memory manager.
17540 *
17541 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
17542 * is a hole in the region, and we would
17543 * have returned KERN_SUCCESS, return
17544 * KERN_INVALID_ADDRESS instead.
17545 *
17546 * NOTE
17547 * The memory object attributes have not yet been implemented, this
17548 * function will have to deal with the invalidate attribute
17549 *
17550 * RETURNS
17551 * KERN_INVALID_TASK Bad task parameter
17552 * KERN_INVALID_ARGUMENT both sync and async were specified.
17553 * KERN_SUCCESS The usual.
17554 * KERN_INVALID_ADDRESS There was a hole in the region.
17555 */
17556
17557 kern_return_t
17558 vm_map_msync(
17559 vm_map_t map,
17560 vm_map_address_t address,
17561 vm_map_size_t size,
17562 vm_sync_t sync_flags)
17563 {
17564 vm_map_entry_t entry;
17565 vm_map_size_t amount_left;
17566 vm_object_offset_t offset;
17567 boolean_t do_sync_req;
17568 boolean_t had_hole = FALSE;
17569 vm_map_offset_t pmap_offset;
17570
17571 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
17572 (sync_flags & VM_SYNC_SYNCHRONOUS)) {
17573 return KERN_INVALID_ARGUMENT;
17574 }
17575
17576 /*
17577 * align address and size on page boundaries
17578 */
17579 size = (vm_map_round_page(address + size,
17580 VM_MAP_PAGE_MASK(map)) -
17581 vm_map_trunc_page(address,
17582 VM_MAP_PAGE_MASK(map)));
17583 address = vm_map_trunc_page(address,
17584 VM_MAP_PAGE_MASK(map));
17585
17586 if (map == VM_MAP_NULL) {
17587 return KERN_INVALID_TASK;
17588 }
17589
17590 if (size == 0) {
17591 return KERN_SUCCESS;
17592 }
17593
17594 amount_left = size;
17595
17596 while (amount_left > 0) {
17597 vm_object_size_t flush_size;
17598 vm_object_t object;
17599
17600 vm_map_lock(map);
17601 if (!vm_map_lookup_entry(map,
17602 address,
17603 &entry)) {
17604 vm_map_size_t skip;
17605
17606 /*
17607 * hole in the address map.
17608 */
17609 had_hole = TRUE;
17610
17611 if (sync_flags & VM_SYNC_KILLPAGES) {
17612 /*
17613 * For VM_SYNC_KILLPAGES, there should be
17614 * no holes in the range, since we couldn't
17615 * prevent someone else from allocating in
17616 * that hole and we wouldn't want to "kill"
17617 * their pages.
17618 */
17619 vm_map_unlock(map);
17620 break;
17621 }
17622
17623 /*
17624 * Check for empty map.
17625 */
17626 if (entry == vm_map_to_entry(map) &&
17627 entry->vme_next == entry) {
17628 vm_map_unlock(map);
17629 break;
17630 }
17631 /*
17632 * Check that we don't wrap and that
17633 * we have at least one real map entry.
17634 */
17635 if ((map->hdr.nentries == 0) ||
17636 (entry->vme_next->vme_start < address)) {
17637 vm_map_unlock(map);
17638 break;
17639 }
17640 /*
17641 * Move up to the next entry if needed
17642 */
17643 skip = (entry->vme_next->vme_start - address);
17644 if (skip >= amount_left) {
17645 amount_left = 0;
17646 } else {
17647 amount_left -= skip;
17648 }
17649 address = entry->vme_next->vme_start;
17650 vm_map_unlock(map);
17651 continue;
17652 }
17653
17654 offset = address - entry->vme_start;
17655 pmap_offset = address;
17656
17657 /*
17658 * do we have more to flush than is contained in this
17659 * entry ?
17660 */
17661 if (amount_left + entry->vme_start + offset > entry->vme_end) {
17662 flush_size = entry->vme_end -
17663 (entry->vme_start + offset);
17664 } else {
17665 flush_size = amount_left;
17666 }
17667 amount_left -= flush_size;
17668 address += flush_size;
17669
17670 if (entry->is_sub_map == TRUE) {
17671 vm_map_t local_map;
17672 vm_map_offset_t local_offset;
17673
17674 local_map = VME_SUBMAP(entry);
17675 local_offset = VME_OFFSET(entry);
17676 vm_map_reference(local_map);
17677 vm_map_unlock(map);
17678 if (vm_map_msync(
17679 local_map,
17680 local_offset,
17681 flush_size,
17682 sync_flags) == KERN_INVALID_ADDRESS) {
17683 had_hole = TRUE;
17684 }
17685 vm_map_deallocate(local_map);
17686 continue;
17687 }
17688 object = VME_OBJECT(entry);
17689
17690 /*
17691 * We can't sync this object if the object has not been
17692 * created yet
17693 */
17694 if (object == VM_OBJECT_NULL) {
17695 vm_map_unlock(map);
17696 continue;
17697 }
17698 offset += VME_OFFSET(entry);
17699
17700 vm_object_lock(object);
17701
17702 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
17703 int kill_pages = 0;
17704 boolean_t reusable_pages = FALSE;
17705
17706 if (sync_flags & VM_SYNC_KILLPAGES) {
17707 if (((object->ref_count == 1) ||
17708 ((object->copy_strategy !=
17709 MEMORY_OBJECT_COPY_SYMMETRIC) &&
17710 (object->copy == VM_OBJECT_NULL))) &&
17711 (object->shadow == VM_OBJECT_NULL)) {
17712 if (object->ref_count != 1) {
17713 vm_page_stats_reusable.free_shared++;
17714 }
17715 kill_pages = 1;
17716 } else {
17717 kill_pages = -1;
17718 }
17719 }
17720 if (kill_pages != -1) {
17721 vm_object_deactivate_pages(
17722 object,
17723 offset,
17724 (vm_object_size_t) flush_size,
17725 kill_pages,
17726 reusable_pages,
17727 map->pmap,
17728 pmap_offset);
17729 }
17730 vm_object_unlock(object);
17731 vm_map_unlock(map);
17732 continue;
17733 }
17734 /*
17735 * We can't sync this object if there isn't a pager.
17736 * Don't bother to sync internal objects, since there can't
17737 * be any "permanent" storage for these objects anyway.
17738 */
17739 if ((object->pager == MEMORY_OBJECT_NULL) ||
17740 (object->internal) || (object->private)) {
17741 vm_object_unlock(object);
17742 vm_map_unlock(map);
17743 continue;
17744 }
17745 /*
17746 * keep reference on the object until syncing is done
17747 */
17748 vm_object_reference_locked(object);
17749 vm_object_unlock(object);
17750
17751 vm_map_unlock(map);
17752
17753 do_sync_req = vm_object_sync(object,
17754 offset,
17755 flush_size,
17756 sync_flags & VM_SYNC_INVALIDATE,
17757 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
17758 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
17759 sync_flags & VM_SYNC_SYNCHRONOUS);
17760
17761 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
17762 /*
17763 * clear out the clustering and read-ahead hints
17764 */
17765 vm_object_lock(object);
17766
17767 object->pages_created = 0;
17768 object->pages_used = 0;
17769 object->sequential = 0;
17770 object->last_alloc = 0;
17771
17772 vm_object_unlock(object);
17773 }
17774 vm_object_deallocate(object);
17775 } /* while */
17776
17777 /* for proper msync() behaviour */
17778 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
17779 return KERN_INVALID_ADDRESS;
17780 }
17781
17782 return KERN_SUCCESS;
17783 }/* vm_msync */
17784
17785 /*
17786 * Routine: convert_port_entry_to_map
17787 * Purpose:
17788 * Convert from a port specifying an entry or a task
17789 * to a map. Doesn't consume the port ref; produces a map ref,
17790 * which may be null. Unlike convert_port_to_map, the
17791 * port may be task or a named entry backed.
17792 * Conditions:
17793 * Nothing locked.
17794 */
17795
17796
17797 vm_map_t
17798 convert_port_entry_to_map(
17799 ipc_port_t port)
17800 {
17801 vm_map_t map;
17802 vm_named_entry_t named_entry;
17803 uint32_t try_failed_count = 0;
17804
17805 if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17806 while (TRUE) {
17807 ip_lock(port);
17808 if (ip_active(port) && (ip_kotype(port)
17809 == IKOT_NAMED_ENTRY)) {
17810 named_entry =
17811 (vm_named_entry_t)port->ip_kobject;
17812 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17813 ip_unlock(port);
17814
17815 try_failed_count++;
17816 mutex_pause(try_failed_count);
17817 continue;
17818 }
17819 named_entry->ref_count++;
17820 lck_mtx_unlock(&(named_entry)->Lock);
17821 ip_unlock(port);
17822 if ((named_entry->is_sub_map) &&
17823 (named_entry->protection
17824 & VM_PROT_WRITE)) {
17825 map = named_entry->backing.map;
17826 } else {
17827 mach_destroy_memory_entry(port);
17828 return VM_MAP_NULL;
17829 }
17830 vm_map_reference_swap(map);
17831 mach_destroy_memory_entry(port);
17832 break;
17833 } else {
17834 return VM_MAP_NULL;
17835 }
17836 }
17837 } else {
17838 map = convert_port_to_map(port);
17839 }
17840
17841 return map;
17842 }
17843
17844 /*
17845 * Routine: convert_port_entry_to_object
17846 * Purpose:
17847 * Convert from a port specifying a named entry to an
17848 * object. Doesn't consume the port ref; produces a map ref,
17849 * which may be null.
17850 * Conditions:
17851 * Nothing locked.
17852 */
17853
17854
17855 vm_object_t
17856 convert_port_entry_to_object(
17857 ipc_port_t port)
17858 {
17859 vm_object_t object = VM_OBJECT_NULL;
17860 vm_named_entry_t named_entry;
17861 uint32_t try_failed_count = 0;
17862
17863 if (IP_VALID(port) &&
17864 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17865 try_again:
17866 ip_lock(port);
17867 if (ip_active(port) &&
17868 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
17869 named_entry = (vm_named_entry_t)port->ip_kobject;
17870 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
17871 ip_unlock(port);
17872 try_failed_count++;
17873 mutex_pause(try_failed_count);
17874 goto try_again;
17875 }
17876 named_entry->ref_count++;
17877 lck_mtx_unlock(&(named_entry)->Lock);
17878 ip_unlock(port);
17879 if (!(named_entry->is_sub_map) &&
17880 !(named_entry->is_copy) &&
17881 (named_entry->protection & VM_PROT_WRITE)) {
17882 object = named_entry->backing.object;
17883 vm_object_reference(object);
17884 }
17885 mach_destroy_memory_entry(port);
17886 }
17887 }
17888
17889 return object;
17890 }
17891
17892 /*
17893 * Export routines to other components for the things we access locally through
17894 * macros.
17895 */
17896 #undef current_map
17897 vm_map_t
17898 current_map(void)
17899 {
17900 return current_map_fast();
17901 }
17902
17903 /*
17904 * vm_map_reference:
17905 *
17906 * Most code internal to the osfmk will go through a
17907 * macro defining this. This is always here for the
17908 * use of other kernel components.
17909 */
17910 #undef vm_map_reference
17911 void
17912 vm_map_reference(
17913 vm_map_t map)
17914 {
17915 if (map == VM_MAP_NULL) {
17916 return;
17917 }
17918
17919 lck_mtx_lock(&map->s_lock);
17920 #if TASK_SWAPPER
17921 assert(map->res_count > 0);
17922 assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
17923 map->res_count++;
17924 #endif
17925 os_ref_retain_locked(&map->map_refcnt);
17926 lck_mtx_unlock(&map->s_lock);
17927 }
17928
17929 /*
17930 * vm_map_deallocate:
17931 *
17932 * Removes a reference from the specified map,
17933 * destroying it if no references remain.
17934 * The map should not be locked.
17935 */
17936 void
17937 vm_map_deallocate(
17938 vm_map_t map)
17939 {
17940 unsigned int ref;
17941
17942 if (map == VM_MAP_NULL) {
17943 return;
17944 }
17945
17946 lck_mtx_lock(&map->s_lock);
17947 ref = os_ref_release_locked(&map->map_refcnt);
17948 if (ref > 0) {
17949 vm_map_res_deallocate(map);
17950 lck_mtx_unlock(&map->s_lock);
17951 return;
17952 }
17953 assert(os_ref_get_count(&map->map_refcnt) == 0);
17954 lck_mtx_unlock(&map->s_lock);
17955
17956 #if TASK_SWAPPER
17957 /*
17958 * The map residence count isn't decremented here because
17959 * the vm_map_delete below will traverse the entire map,
17960 * deleting entries, and the residence counts on objects
17961 * and sharing maps will go away then.
17962 */
17963 #endif
17964
17965 vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
17966 }
17967
17968
17969 void
17970 vm_map_disable_NX(vm_map_t map)
17971 {
17972 if (map == NULL) {
17973 return;
17974 }
17975 if (map->pmap == NULL) {
17976 return;
17977 }
17978
17979 pmap_disable_NX(map->pmap);
17980 }
17981
17982 void
17983 vm_map_disallow_data_exec(vm_map_t map)
17984 {
17985 if (map == NULL) {
17986 return;
17987 }
17988
17989 map->map_disallow_data_exec = TRUE;
17990 }
17991
17992 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
17993 * more descriptive.
17994 */
17995 void
17996 vm_map_set_32bit(vm_map_t map)
17997 {
17998 #if defined(__arm__) || defined(__arm64__)
17999 map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
18000 #else
18001 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
18002 #endif
18003 }
18004
18005
18006 void
18007 vm_map_set_64bit(vm_map_t map)
18008 {
18009 #if defined(__arm__) || defined(__arm64__)
18010 map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
18011 #else
18012 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
18013 #endif
18014 }
18015
18016 /*
18017 * Expand the maximum size of an existing map to the maximum supported.
18018 */
18019 void
18020 vm_map_set_jumbo(vm_map_t map)
18021 {
18022 #if defined (__arm64__)
18023 vm_map_set_max_addr(map, ~0);
18024 #else /* arm64 */
18025 (void) map;
18026 #endif
18027 }
18028
18029 /*
18030 * This map has a JIT entitlement
18031 */
18032 void
18033 vm_map_set_jit_entitled(vm_map_t map)
18034 {
18035 #if defined (__arm64__)
18036 pmap_set_jit_entitled(map->pmap);
18037 #else /* arm64 */
18038 (void) map;
18039 #endif
18040 }
18041
18042 /*
18043 * Expand the maximum size of an existing map.
18044 */
18045 void
18046 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
18047 {
18048 #if defined(__arm64__)
18049 vm_map_offset_t max_supported_offset = 0;
18050 vm_map_offset_t old_max_offset = map->max_offset;
18051 max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
18052
18053 new_max_offset = trunc_page(new_max_offset);
18054
18055 /* The address space cannot be shrunk using this routine. */
18056 if (old_max_offset >= new_max_offset) {
18057 return;
18058 }
18059
18060 if (max_supported_offset < new_max_offset) {
18061 new_max_offset = max_supported_offset;
18062 }
18063
18064 map->max_offset = new_max_offset;
18065
18066 if (map->holes_list->prev->vme_end == old_max_offset) {
18067 /*
18068 * There is already a hole at the end of the map; simply make it bigger.
18069 */
18070 map->holes_list->prev->vme_end = map->max_offset;
18071 } else {
18072 /*
18073 * There is no hole at the end, so we need to create a new hole
18074 * for the new empty space we're creating.
18075 */
18076 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
18077 new_hole->start = old_max_offset;
18078 new_hole->end = map->max_offset;
18079 new_hole->prev = map->holes_list->prev;
18080 new_hole->next = (struct vm_map_entry *)map->holes_list;
18081 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
18082 map->holes_list->prev = (struct vm_map_entry *)new_hole;
18083 }
18084 #else
18085 (void)map;
18086 (void)new_max_offset;
18087 #endif
18088 }
18089
18090 vm_map_offset_t
18091 vm_compute_max_offset(boolean_t is64)
18092 {
18093 #if defined(__arm__) || defined(__arm64__)
18094 return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
18095 #else
18096 return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
18097 #endif
18098 }
18099
18100 void
18101 vm_map_get_max_aslr_slide_section(
18102 vm_map_t map __unused,
18103 int64_t *max_sections,
18104 int64_t *section_size)
18105 {
18106 #if defined(__arm64__)
18107 *max_sections = 3;
18108 *section_size = ARM_TT_TWIG_SIZE;
18109 #else
18110 *max_sections = 1;
18111 *section_size = 0;
18112 #endif
18113 }
18114
18115 uint64_t
18116 vm_map_get_max_aslr_slide_pages(vm_map_t map)
18117 {
18118 #if defined(__arm64__)
18119 /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
18120 * limited embedded address space; this is also meant to minimize pmap
18121 * memory usage on 16KB page systems.
18122 */
18123 return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
18124 #else
18125 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18126 #endif
18127 }
18128
18129 uint64_t
18130 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
18131 {
18132 #if defined(__arm64__)
18133 /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
18134 * of independent entropy on 16KB page systems.
18135 */
18136 return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
18137 #else
18138 return 1 << (vm_map_is_64bit(map) ? 16 : 8);
18139 #endif
18140 }
18141
18142 #ifndef __arm__
18143 boolean_t
18144 vm_map_is_64bit(
18145 vm_map_t map)
18146 {
18147 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
18148 }
18149 #endif
18150
18151 boolean_t
18152 vm_map_has_hard_pagezero(
18153 vm_map_t map,
18154 vm_map_offset_t pagezero_size)
18155 {
18156 /*
18157 * XXX FBDP
18158 * We should lock the VM map (for read) here but we can get away
18159 * with it for now because there can't really be any race condition:
18160 * the VM map's min_offset is changed only when the VM map is created
18161 * and when the zero page is established (when the binary gets loaded),
18162 * and this routine gets called only when the task terminates and the
18163 * VM map is being torn down, and when a new map is created via
18164 * load_machfile()/execve().
18165 */
18166 return map->min_offset >= pagezero_size;
18167 }
18168
18169 /*
18170 * Raise a VM map's maximun offset.
18171 */
18172 kern_return_t
18173 vm_map_raise_max_offset(
18174 vm_map_t map,
18175 vm_map_offset_t new_max_offset)
18176 {
18177 kern_return_t ret;
18178
18179 vm_map_lock(map);
18180 ret = KERN_INVALID_ADDRESS;
18181
18182 if (new_max_offset >= map->max_offset) {
18183 if (!vm_map_is_64bit(map)) {
18184 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
18185 map->max_offset = new_max_offset;
18186 ret = KERN_SUCCESS;
18187 }
18188 } else {
18189 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
18190 map->max_offset = new_max_offset;
18191 ret = KERN_SUCCESS;
18192 }
18193 }
18194 }
18195
18196 vm_map_unlock(map);
18197 return ret;
18198 }
18199
18200
18201 /*
18202 * Raise a VM map's minimum offset.
18203 * To strictly enforce "page zero" reservation.
18204 */
18205 kern_return_t
18206 vm_map_raise_min_offset(
18207 vm_map_t map,
18208 vm_map_offset_t new_min_offset)
18209 {
18210 vm_map_entry_t first_entry;
18211
18212 new_min_offset = vm_map_round_page(new_min_offset,
18213 VM_MAP_PAGE_MASK(map));
18214
18215 vm_map_lock(map);
18216
18217 if (new_min_offset < map->min_offset) {
18218 /*
18219 * Can't move min_offset backwards, as that would expose
18220 * a part of the address space that was previously, and for
18221 * possibly good reasons, inaccessible.
18222 */
18223 vm_map_unlock(map);
18224 return KERN_INVALID_ADDRESS;
18225 }
18226 if (new_min_offset >= map->max_offset) {
18227 /* can't go beyond the end of the address space */
18228 vm_map_unlock(map);
18229 return KERN_INVALID_ADDRESS;
18230 }
18231
18232 first_entry = vm_map_first_entry(map);
18233 if (first_entry != vm_map_to_entry(map) &&
18234 first_entry->vme_start < new_min_offset) {
18235 /*
18236 * Some memory was already allocated below the new
18237 * minimun offset. It's too late to change it now...
18238 */
18239 vm_map_unlock(map);
18240 return KERN_NO_SPACE;
18241 }
18242
18243 map->min_offset = new_min_offset;
18244
18245 assert(map->holes_list);
18246 map->holes_list->start = new_min_offset;
18247 assert(new_min_offset < map->holes_list->end);
18248
18249 vm_map_unlock(map);
18250
18251 return KERN_SUCCESS;
18252 }
18253
18254 /*
18255 * Set the limit on the maximum amount of user wired memory allowed for this map.
18256 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
18257 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
18258 * don't have to reach over to the BSD data structures.
18259 */
18260
18261 void
18262 vm_map_set_user_wire_limit(vm_map_t map,
18263 vm_size_t limit)
18264 {
18265 map->user_wire_limit = limit;
18266 }
18267
18268
18269 void
18270 vm_map_switch_protect(vm_map_t map,
18271 boolean_t val)
18272 {
18273 vm_map_lock(map);
18274 map->switch_protect = val;
18275 vm_map_unlock(map);
18276 }
18277
18278 /*
18279 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
18280 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
18281 * bump both counters.
18282 */
18283 void
18284 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
18285 {
18286 pmap_t pmap = vm_map_pmap(map);
18287
18288 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18289 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18290 }
18291
18292 void
18293 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
18294 {
18295 pmap_t pmap = vm_map_pmap(map);
18296
18297 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
18298 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
18299 }
18300
18301 /* Add (generate) code signature for memory range */
18302 #if CONFIG_DYNAMIC_CODE_SIGNING
18303 kern_return_t
18304 vm_map_sign(vm_map_t map,
18305 vm_map_offset_t start,
18306 vm_map_offset_t end)
18307 {
18308 vm_map_entry_t entry;
18309 vm_page_t m;
18310 vm_object_t object;
18311
18312 /*
18313 * Vet all the input parameters and current type and state of the
18314 * underlaying object. Return with an error if anything is amiss.
18315 */
18316 if (map == VM_MAP_NULL) {
18317 return KERN_INVALID_ARGUMENT;
18318 }
18319
18320 vm_map_lock_read(map);
18321
18322 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
18323 /*
18324 * Must pass a valid non-submap address.
18325 */
18326 vm_map_unlock_read(map);
18327 return KERN_INVALID_ADDRESS;
18328 }
18329
18330 if ((entry->vme_start > start) || (entry->vme_end < end)) {
18331 /*
18332 * Map entry doesn't cover the requested range. Not handling
18333 * this situation currently.
18334 */
18335 vm_map_unlock_read(map);
18336 return KERN_INVALID_ARGUMENT;
18337 }
18338
18339 object = VME_OBJECT(entry);
18340 if (object == VM_OBJECT_NULL) {
18341 /*
18342 * Object must already be present or we can't sign.
18343 */
18344 vm_map_unlock_read(map);
18345 return KERN_INVALID_ARGUMENT;
18346 }
18347
18348 vm_object_lock(object);
18349 vm_map_unlock_read(map);
18350
18351 while (start < end) {
18352 uint32_t refmod;
18353
18354 m = vm_page_lookup(object,
18355 start - entry->vme_start + VME_OFFSET(entry));
18356 if (m == VM_PAGE_NULL) {
18357 /* shoud we try to fault a page here? we can probably
18358 * demand it exists and is locked for this request */
18359 vm_object_unlock(object);
18360 return KERN_FAILURE;
18361 }
18362 /* deal with special page status */
18363 if (m->vmp_busy ||
18364 (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
18365 vm_object_unlock(object);
18366 return KERN_FAILURE;
18367 }
18368
18369 /* Page is OK... now "validate" it */
18370 /* This is the place where we'll call out to create a code
18371 * directory, later */
18372 m->vmp_cs_validated = TRUE;
18373
18374 /* The page is now "clean" for codesigning purposes. That means
18375 * we don't consider it as modified (wpmapped) anymore. But
18376 * we'll disconnect the page so we note any future modification
18377 * attempts. */
18378 m->vmp_wpmapped = FALSE;
18379 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
18380
18381 /* Pull the dirty status from the pmap, since we cleared the
18382 * wpmapped bit */
18383 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
18384 SET_PAGE_DIRTY(m, FALSE);
18385 }
18386
18387 /* On to the next page */
18388 start += PAGE_SIZE;
18389 }
18390 vm_object_unlock(object);
18391
18392 return KERN_SUCCESS;
18393 }
18394 #endif
18395
18396 kern_return_t
18397 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
18398 {
18399 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
18400 vm_map_entry_t next_entry;
18401 kern_return_t kr = KERN_SUCCESS;
18402 vm_map_t zap_map;
18403
18404 vm_map_lock(map);
18405
18406 /*
18407 * We use a "zap_map" to avoid having to unlock
18408 * the "map" in vm_map_delete().
18409 */
18410 zap_map = vm_map_create(PMAP_NULL,
18411 map->min_offset,
18412 map->max_offset,
18413 map->hdr.entries_pageable);
18414
18415 if (zap_map == VM_MAP_NULL) {
18416 return KERN_RESOURCE_SHORTAGE;
18417 }
18418
18419 vm_map_set_page_shift(zap_map,
18420 VM_MAP_PAGE_SHIFT(map));
18421 vm_map_disable_hole_optimization(zap_map);
18422
18423 for (entry = vm_map_first_entry(map);
18424 entry != vm_map_to_entry(map);
18425 entry = next_entry) {
18426 next_entry = entry->vme_next;
18427
18428 if (VME_OBJECT(entry) &&
18429 !entry->is_sub_map &&
18430 (VME_OBJECT(entry)->internal == TRUE) &&
18431 (VME_OBJECT(entry)->ref_count == 1)) {
18432 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
18433 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
18434
18435 (void)vm_map_delete(map,
18436 entry->vme_start,
18437 entry->vme_end,
18438 VM_MAP_REMOVE_SAVE_ENTRIES,
18439 zap_map);
18440 }
18441 }
18442
18443 vm_map_unlock(map);
18444
18445 /*
18446 * Get rid of the "zap_maps" and all the map entries that
18447 * they may still contain.
18448 */
18449 if (zap_map != VM_MAP_NULL) {
18450 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18451 zap_map = VM_MAP_NULL;
18452 }
18453
18454 return kr;
18455 }
18456
18457
18458 #if DEVELOPMENT || DEBUG
18459
18460 int
18461 vm_map_disconnect_page_mappings(
18462 vm_map_t map,
18463 boolean_t do_unnest)
18464 {
18465 vm_map_entry_t entry;
18466 int page_count = 0;
18467
18468 if (do_unnest == TRUE) {
18469 #ifndef NO_NESTED_PMAP
18470 vm_map_lock(map);
18471
18472 for (entry = vm_map_first_entry(map);
18473 entry != vm_map_to_entry(map);
18474 entry = entry->vme_next) {
18475 if (entry->is_sub_map && entry->use_pmap) {
18476 /*
18477 * Make sure the range between the start of this entry and
18478 * the end of this entry is no longer nested, so that
18479 * we will only remove mappings from the pmap in use by this
18480 * this task
18481 */
18482 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
18483 }
18484 }
18485 vm_map_unlock(map);
18486 #endif
18487 }
18488 vm_map_lock_read(map);
18489
18490 page_count = map->pmap->stats.resident_count;
18491
18492 for (entry = vm_map_first_entry(map);
18493 entry != vm_map_to_entry(map);
18494 entry = entry->vme_next) {
18495 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
18496 (VME_OBJECT(entry)->phys_contiguous))) {
18497 continue;
18498 }
18499 if (entry->is_sub_map) {
18500 assert(!entry->use_pmap);
18501 }
18502
18503 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
18504 }
18505 vm_map_unlock_read(map);
18506
18507 return page_count;
18508 }
18509
18510 #endif
18511
18512
18513 #if CONFIG_FREEZE
18514
18515
18516 int c_freezer_swapout_page_count;
18517 int c_freezer_compression_count = 0;
18518 AbsoluteTime c_freezer_last_yield_ts = 0;
18519
18520 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
18521 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
18522
18523 kern_return_t
18524 vm_map_freeze(
18525 task_t task,
18526 unsigned int *purgeable_count,
18527 unsigned int *wired_count,
18528 unsigned int *clean_count,
18529 unsigned int *dirty_count,
18530 unsigned int dirty_budget,
18531 unsigned int *shared_count,
18532 int *freezer_error_code,
18533 boolean_t eval_only)
18534 {
18535 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
18536 kern_return_t kr = KERN_SUCCESS;
18537 boolean_t evaluation_phase = TRUE;
18538 vm_object_t cur_shared_object = NULL;
18539 int cur_shared_obj_ref_cnt = 0;
18540 unsigned int dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
18541
18542 *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
18543
18544 /*
18545 * We need the exclusive lock here so that we can
18546 * block any page faults or lookups while we are
18547 * in the middle of freezing this vm map.
18548 */
18549 vm_map_t map = task->map;
18550
18551 vm_map_lock(map);
18552
18553 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
18554
18555 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18556 if (vm_compressor_low_on_space()) {
18557 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18558 }
18559
18560 if (vm_swap_low_on_space()) {
18561 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18562 }
18563
18564 kr = KERN_NO_SPACE;
18565 goto done;
18566 }
18567
18568 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
18569 /*
18570 * In-memory compressor backing the freezer. No disk.
18571 * So no need to do the evaluation phase.
18572 */
18573 evaluation_phase = FALSE;
18574
18575 if (eval_only == TRUE) {
18576 /*
18577 * We don't support 'eval_only' mode
18578 * in this non-swap config.
18579 */
18580 *freezer_error_code = FREEZER_ERROR_GENERIC;
18581 kr = KERN_INVALID_ARGUMENT;
18582 goto done;
18583 }
18584
18585 c_freezer_compression_count = 0;
18586 clock_get_uptime(&c_freezer_last_yield_ts);
18587 }
18588 again:
18589
18590 for (entry2 = vm_map_first_entry(map);
18591 entry2 != vm_map_to_entry(map);
18592 entry2 = entry2->vme_next) {
18593 vm_object_t src_object = VME_OBJECT(entry2);
18594
18595 if (src_object &&
18596 !entry2->is_sub_map &&
18597 !src_object->phys_contiguous) {
18598 /* If eligible, scan the entry, moving eligible pages over to our parent object */
18599
18600 if (src_object->internal == TRUE) {
18601 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18602 /*
18603 * We skip purgeable objects during evaluation phase only.
18604 * If we decide to freeze this process, we'll explicitly
18605 * purge these objects before we go around again with
18606 * 'evaluation_phase' set to FALSE.
18607 */
18608
18609 if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
18610 /*
18611 * We want to purge objects that may not belong to this task but are mapped
18612 * in this task alone. Since we already purged this task's purgeable memory
18613 * at the end of a successful evaluation phase, we want to avoid doing no-op calls
18614 * on this task's purgeable objects. Hence the check for only volatile objects.
18615 */
18616 if (evaluation_phase == FALSE &&
18617 (src_object->purgable == VM_PURGABLE_VOLATILE) &&
18618 (src_object->ref_count == 1)) {
18619 vm_object_lock(src_object);
18620 vm_object_purge(src_object, 0);
18621 vm_object_unlock(src_object);
18622 }
18623 continue;
18624 }
18625
18626 /*
18627 * Pages belonging to this object could be swapped to disk.
18628 * Make sure it's not a shared object because we could end
18629 * up just bringing it back in again.
18630 *
18631 * We try to optimize somewhat by checking for objects that are mapped
18632 * more than once within our own map. But we don't do full searches,
18633 * we just look at the entries following our current entry.
18634 */
18635
18636 if (src_object->ref_count > 1) {
18637 if (src_object != cur_shared_object) {
18638 obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18639 dirty_shared_count += obj_pages_snapshot;
18640
18641 cur_shared_object = src_object;
18642 cur_shared_obj_ref_cnt = 1;
18643 continue;
18644 } else {
18645 cur_shared_obj_ref_cnt++;
18646 if (src_object->ref_count == cur_shared_obj_ref_cnt) {
18647 /*
18648 * Fall through to below and treat this object as private.
18649 * So deduct its pages from our shared total and add it to the
18650 * private total.
18651 */
18652
18653 dirty_shared_count -= obj_pages_snapshot;
18654 dirty_private_count += obj_pages_snapshot;
18655 } else {
18656 continue;
18657 }
18658 }
18659 }
18660
18661
18662 if (src_object->ref_count == 1) {
18663 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
18664 }
18665
18666 if (evaluation_phase == TRUE) {
18667 continue;
18668 }
18669 }
18670
18671 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
18672 *wired_count += src_object->wired_page_count;
18673
18674 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
18675 if (vm_compressor_low_on_space()) {
18676 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
18677 }
18678
18679 if (vm_swap_low_on_space()) {
18680 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
18681 }
18682
18683 kr = KERN_NO_SPACE;
18684 break;
18685 }
18686 if (paged_out_count >= dirty_budget) {
18687 break;
18688 }
18689 dirty_budget -= paged_out_count;
18690 }
18691 }
18692 }
18693
18694 if (evaluation_phase) {
18695 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
18696
18697 if (dirty_shared_count > shared_pages_threshold) {
18698 *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
18699 kr = KERN_FAILURE;
18700 goto done;
18701 }
18702
18703 if (dirty_shared_count &&
18704 ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
18705 *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
18706 kr = KERN_FAILURE;
18707 goto done;
18708 }
18709
18710 evaluation_phase = FALSE;
18711 dirty_shared_count = dirty_private_count = 0;
18712
18713 c_freezer_compression_count = 0;
18714 clock_get_uptime(&c_freezer_last_yield_ts);
18715
18716 if (eval_only) {
18717 kr = KERN_SUCCESS;
18718 goto done;
18719 }
18720
18721 vm_purgeable_purge_task_owned(task);
18722
18723 goto again;
18724 } else {
18725 kr = KERN_SUCCESS;
18726 *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
18727 }
18728
18729 done:
18730 vm_map_unlock(map);
18731
18732 if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
18733 vm_object_compressed_freezer_done();
18734
18735 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
18736 /*
18737 * reset the counter tracking the # of swapped compressed pages
18738 * because we are now done with this freeze session and task.
18739 */
18740
18741 *dirty_count = c_freezer_swapout_page_count; //used to track pageouts
18742 c_freezer_swapout_page_count = 0;
18743 }
18744 }
18745 return kr;
18746 }
18747
18748 #endif
18749
18750 /*
18751 * vm_map_entry_should_cow_for_true_share:
18752 *
18753 * Determines if the map entry should be clipped and setup for copy-on-write
18754 * to avoid applying "true_share" to a large VM object when only a subset is
18755 * targeted.
18756 *
18757 * For now, we target only the map entries created for the Objective C
18758 * Garbage Collector, which initially have the following properties:
18759 * - alias == VM_MEMORY_MALLOC
18760 * - wired_count == 0
18761 * - !needs_copy
18762 * and a VM object with:
18763 * - internal
18764 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
18765 * - !true_share
18766 * - vo_size == ANON_CHUNK_SIZE
18767 *
18768 * Only non-kernel map entries.
18769 */
18770 boolean_t
18771 vm_map_entry_should_cow_for_true_share(
18772 vm_map_entry_t entry)
18773 {
18774 vm_object_t object;
18775
18776 if (entry->is_sub_map) {
18777 /* entry does not point at a VM object */
18778 return FALSE;
18779 }
18780
18781 if (entry->needs_copy) {
18782 /* already set for copy_on_write: done! */
18783 return FALSE;
18784 }
18785
18786 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
18787 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
18788 /* not a malloc heap or Obj-C Garbage Collector heap */
18789 return FALSE;
18790 }
18791
18792 if (entry->wired_count) {
18793 /* wired: can't change the map entry... */
18794 vm_counters.should_cow_but_wired++;
18795 return FALSE;
18796 }
18797
18798 object = VME_OBJECT(entry);
18799
18800 if (object == VM_OBJECT_NULL) {
18801 /* no object yet... */
18802 return FALSE;
18803 }
18804
18805 if (!object->internal) {
18806 /* not an internal object */
18807 return FALSE;
18808 }
18809
18810 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
18811 /* not the default copy strategy */
18812 return FALSE;
18813 }
18814
18815 if (object->true_share) {
18816 /* already true_share: too late to avoid it */
18817 return FALSE;
18818 }
18819
18820 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
18821 object->vo_size != ANON_CHUNK_SIZE) {
18822 /* ... not an object created for the ObjC Garbage Collector */
18823 return FALSE;
18824 }
18825
18826 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
18827 object->vo_size != 2048 * 4096) {
18828 /* ... not a "MALLOC_SMALL" heap */
18829 return FALSE;
18830 }
18831
18832 /*
18833 * All the criteria match: we have a large object being targeted for "true_share".
18834 * To limit the adverse side-effects linked with "true_share", tell the caller to
18835 * try and avoid setting up the entire object for "true_share" by clipping the
18836 * targeted range and setting it up for copy-on-write.
18837 */
18838 return TRUE;
18839 }
18840
18841 vm_map_offset_t
18842 vm_map_round_page_mask(
18843 vm_map_offset_t offset,
18844 vm_map_offset_t mask)
18845 {
18846 return VM_MAP_ROUND_PAGE(offset, mask);
18847 }
18848
18849 vm_map_offset_t
18850 vm_map_trunc_page_mask(
18851 vm_map_offset_t offset,
18852 vm_map_offset_t mask)
18853 {
18854 return VM_MAP_TRUNC_PAGE(offset, mask);
18855 }
18856
18857 boolean_t
18858 vm_map_page_aligned(
18859 vm_map_offset_t offset,
18860 vm_map_offset_t mask)
18861 {
18862 return ((offset) & mask) == 0;
18863 }
18864
18865 int
18866 vm_map_page_shift(
18867 vm_map_t map)
18868 {
18869 return VM_MAP_PAGE_SHIFT(map);
18870 }
18871
18872 int
18873 vm_map_page_size(
18874 vm_map_t map)
18875 {
18876 return VM_MAP_PAGE_SIZE(map);
18877 }
18878
18879 vm_map_offset_t
18880 vm_map_page_mask(
18881 vm_map_t map)
18882 {
18883 return VM_MAP_PAGE_MASK(map);
18884 }
18885
18886 kern_return_t
18887 vm_map_set_page_shift(
18888 vm_map_t map,
18889 int pageshift)
18890 {
18891 if (map->hdr.nentries != 0) {
18892 /* too late to change page size */
18893 return KERN_FAILURE;
18894 }
18895
18896 map->hdr.page_shift = pageshift;
18897
18898 return KERN_SUCCESS;
18899 }
18900
18901 kern_return_t
18902 vm_map_query_volatile(
18903 vm_map_t map,
18904 mach_vm_size_t *volatile_virtual_size_p,
18905 mach_vm_size_t *volatile_resident_size_p,
18906 mach_vm_size_t *volatile_compressed_size_p,
18907 mach_vm_size_t *volatile_pmap_size_p,
18908 mach_vm_size_t *volatile_compressed_pmap_size_p)
18909 {
18910 mach_vm_size_t volatile_virtual_size;
18911 mach_vm_size_t volatile_resident_count;
18912 mach_vm_size_t volatile_compressed_count;
18913 mach_vm_size_t volatile_pmap_count;
18914 mach_vm_size_t volatile_compressed_pmap_count;
18915 mach_vm_size_t resident_count;
18916 vm_map_entry_t entry;
18917 vm_object_t object;
18918
18919 /* map should be locked by caller */
18920
18921 volatile_virtual_size = 0;
18922 volatile_resident_count = 0;
18923 volatile_compressed_count = 0;
18924 volatile_pmap_count = 0;
18925 volatile_compressed_pmap_count = 0;
18926
18927 for (entry = vm_map_first_entry(map);
18928 entry != vm_map_to_entry(map);
18929 entry = entry->vme_next) {
18930 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
18931
18932 if (entry->is_sub_map) {
18933 continue;
18934 }
18935 if (!(entry->protection & VM_PROT_WRITE)) {
18936 continue;
18937 }
18938 object = VME_OBJECT(entry);
18939 if (object == VM_OBJECT_NULL) {
18940 continue;
18941 }
18942 if (object->purgable != VM_PURGABLE_VOLATILE &&
18943 object->purgable != VM_PURGABLE_EMPTY) {
18944 continue;
18945 }
18946 if (VME_OFFSET(entry)) {
18947 /*
18948 * If the map entry has been split and the object now
18949 * appears several times in the VM map, we don't want
18950 * to count the object's resident_page_count more than
18951 * once. We count it only for the first one, starting
18952 * at offset 0 and ignore the other VM map entries.
18953 */
18954 continue;
18955 }
18956 resident_count = object->resident_page_count;
18957 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
18958 resident_count = 0;
18959 } else {
18960 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
18961 }
18962
18963 volatile_virtual_size += entry->vme_end - entry->vme_start;
18964 volatile_resident_count += resident_count;
18965 if (object->pager) {
18966 volatile_compressed_count +=
18967 vm_compressor_pager_get_count(object->pager);
18968 }
18969 pmap_compressed_bytes = 0;
18970 pmap_resident_bytes =
18971 pmap_query_resident(map->pmap,
18972 entry->vme_start,
18973 entry->vme_end,
18974 &pmap_compressed_bytes);
18975 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
18976 volatile_compressed_pmap_count += (pmap_compressed_bytes
18977 / PAGE_SIZE);
18978 }
18979
18980 /* map is still locked on return */
18981
18982 *volatile_virtual_size_p = volatile_virtual_size;
18983 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
18984 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
18985 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
18986 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
18987
18988 return KERN_SUCCESS;
18989 }
18990
18991 void
18992 vm_map_sizes(vm_map_t map,
18993 vm_map_size_t * psize,
18994 vm_map_size_t * pfree,
18995 vm_map_size_t * plargest_free)
18996 {
18997 vm_map_entry_t entry;
18998 vm_map_offset_t prev;
18999 vm_map_size_t free, total_free, largest_free;
19000 boolean_t end;
19001
19002 if (!map) {
19003 *psize = *pfree = *plargest_free = 0;
19004 return;
19005 }
19006 total_free = largest_free = 0;
19007
19008 vm_map_lock_read(map);
19009 if (psize) {
19010 *psize = map->max_offset - map->min_offset;
19011 }
19012
19013 prev = map->min_offset;
19014 for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
19015 end = (entry == vm_map_to_entry(map));
19016
19017 if (end) {
19018 free = entry->vme_end - prev;
19019 } else {
19020 free = entry->vme_start - prev;
19021 }
19022
19023 total_free += free;
19024 if (free > largest_free) {
19025 largest_free = free;
19026 }
19027
19028 if (end) {
19029 break;
19030 }
19031 prev = entry->vme_end;
19032 }
19033 vm_map_unlock_read(map);
19034 if (pfree) {
19035 *pfree = total_free;
19036 }
19037 if (plargest_free) {
19038 *plargest_free = largest_free;
19039 }
19040 }
19041
19042 #if VM_SCAN_FOR_SHADOW_CHAIN
19043 int vm_map_shadow_max(vm_map_t map);
19044 int
19045 vm_map_shadow_max(
19046 vm_map_t map)
19047 {
19048 int shadows, shadows_max;
19049 vm_map_entry_t entry;
19050 vm_object_t object, next_object;
19051
19052 if (map == NULL) {
19053 return 0;
19054 }
19055
19056 shadows_max = 0;
19057
19058 vm_map_lock_read(map);
19059
19060 for (entry = vm_map_first_entry(map);
19061 entry != vm_map_to_entry(map);
19062 entry = entry->vme_next) {
19063 if (entry->is_sub_map) {
19064 continue;
19065 }
19066 object = VME_OBJECT(entry);
19067 if (object == NULL) {
19068 continue;
19069 }
19070 vm_object_lock_shared(object);
19071 for (shadows = 0;
19072 object->shadow != NULL;
19073 shadows++, object = next_object) {
19074 next_object = object->shadow;
19075 vm_object_lock_shared(next_object);
19076 vm_object_unlock(object);
19077 }
19078 vm_object_unlock(object);
19079 if (shadows > shadows_max) {
19080 shadows_max = shadows;
19081 }
19082 }
19083
19084 vm_map_unlock_read(map);
19085
19086 return shadows_max;
19087 }
19088 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
19089
19090 void
19091 vm_commit_pagezero_status(vm_map_t lmap)
19092 {
19093 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
19094 }
19095
19096 #if !CONFIG_EMBEDDED
19097 void
19098 vm_map_set_high_start(
19099 vm_map_t map,
19100 vm_map_offset_t high_start)
19101 {
19102 map->vmmap_high_start = high_start;
19103 }
19104 #endif
19105
19106 #if PMAP_CS
19107 kern_return_t
19108 vm_map_entry_cs_associate(
19109 vm_map_t map,
19110 vm_map_entry_t entry,
19111 vm_map_kernel_flags_t vmk_flags)
19112 {
19113 vm_object_t cs_object, cs_shadow;
19114 vm_object_offset_t cs_offset;
19115 void *cs_blobs;
19116 struct vnode *cs_vnode;
19117 kern_return_t cs_ret;
19118
19119 if (map->pmap == NULL ||
19120 entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
19121 VME_OBJECT(entry) == VM_OBJECT_NULL ||
19122 !(entry->protection & VM_PROT_EXECUTE)) {
19123 return KERN_SUCCESS;
19124 }
19125
19126 vm_map_lock_assert_exclusive(map);
19127
19128 if (entry->used_for_jit) {
19129 cs_ret = pmap_cs_associate(map->pmap,
19130 PMAP_CS_ASSOCIATE_JIT,
19131 entry->vme_start,
19132 entry->vme_end - entry->vme_start);
19133 goto done;
19134 }
19135
19136 if (vmk_flags.vmkf_remap_prot_copy) {
19137 cs_ret = pmap_cs_associate(map->pmap,
19138 PMAP_CS_ASSOCIATE_COW,
19139 entry->vme_start,
19140 entry->vme_end - entry->vme_start);
19141 goto done;
19142 }
19143
19144 vm_object_lock_shared(VME_OBJECT(entry));
19145 cs_offset = VME_OFFSET(entry);
19146 for (cs_object = VME_OBJECT(entry);
19147 (cs_object != VM_OBJECT_NULL &&
19148 !cs_object->code_signed);
19149 cs_object = cs_shadow) {
19150 cs_shadow = cs_object->shadow;
19151 if (cs_shadow != VM_OBJECT_NULL) {
19152 cs_offset += cs_object->vo_shadow_offset;
19153 vm_object_lock_shared(cs_shadow);
19154 }
19155 vm_object_unlock(cs_object);
19156 }
19157 if (cs_object == VM_OBJECT_NULL) {
19158 return KERN_SUCCESS;
19159 }
19160
19161 cs_offset += cs_object->paging_offset;
19162 cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
19163 cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
19164 &cs_blobs);
19165 assert(cs_ret == KERN_SUCCESS);
19166 cs_ret = cs_associate_blob_with_mapping(map->pmap,
19167 entry->vme_start,
19168 (entry->vme_end -
19169 entry->vme_start),
19170 cs_offset,
19171 cs_blobs);
19172 vm_object_unlock(cs_object);
19173 cs_object = VM_OBJECT_NULL;
19174
19175 done:
19176 if (cs_ret == KERN_SUCCESS) {
19177 DTRACE_VM2(vm_map_entry_cs_associate_success,
19178 vm_map_offset_t, entry->vme_start,
19179 vm_map_offset_t, entry->vme_end);
19180 if (vm_map_executable_immutable) {
19181 /*
19182 * Prevent this executable
19183 * mapping from being unmapped
19184 * or modified.
19185 */
19186 entry->permanent = TRUE;
19187 }
19188 /*
19189 * pmap says it will validate the
19190 * code-signing validity of pages
19191 * faulted in via this mapping, so
19192 * this map entry should be marked so
19193 * that vm_fault() bypasses code-signing
19194 * validation for faults coming through
19195 * this mapping.
19196 */
19197 entry->pmap_cs_associated = TRUE;
19198 } else if (cs_ret == KERN_NOT_SUPPORTED) {
19199 /*
19200 * pmap won't check the code-signing
19201 * validity of pages faulted in via
19202 * this mapping, so VM should keep
19203 * doing it.
19204 */
19205 DTRACE_VM3(vm_map_entry_cs_associate_off,
19206 vm_map_offset_t, entry->vme_start,
19207 vm_map_offset_t, entry->vme_end,
19208 int, cs_ret);
19209 } else {
19210 /*
19211 * A real error: do not allow
19212 * execution in this mapping.
19213 */
19214 DTRACE_VM3(vm_map_entry_cs_associate_failure,
19215 vm_map_offset_t, entry->vme_start,
19216 vm_map_offset_t, entry->vme_end,
19217 int, cs_ret);
19218 entry->protection &= ~VM_PROT_EXECUTE;
19219 entry->max_protection &= ~VM_PROT_EXECUTE;
19220 }
19221
19222 return cs_ret;
19223 }
19224 #endif /* PMAP_CS */
19225
19226 /*
19227 * FORKED CORPSE FOOTPRINT
19228 *
19229 * A forked corpse gets a copy of the original VM map but its pmap is mostly
19230 * empty since it never ran and never got to fault in any pages.
19231 * Collecting footprint info (via "sysctl vm.self_region_footprint") for
19232 * a forked corpse would therefore return very little information.
19233 *
19234 * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
19235 * to vm_map_fork() to collect footprint information from the original VM map
19236 * and its pmap, and store it in the forked corpse's VM map. That information
19237 * is stored in place of the VM map's "hole list" since we'll never need to
19238 * lookup for holes in the corpse's map.
19239 *
19240 * The corpse's footprint info looks like this:
19241 *
19242 * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
19243 * as follows:
19244 * +---------------------------------------+
19245 * header-> | cf_size |
19246 * +-------------------+-------------------+
19247 * | cf_last_region | cf_last_zeroes |
19248 * +-------------------+-------------------+
19249 * region1-> | cfr_vaddr |
19250 * +-------------------+-------------------+
19251 * | cfr_num_pages | d0 | d1 | d2 | d3 |
19252 * +---------------------------------------+
19253 * | d4 | d5 | ... |
19254 * +---------------------------------------+
19255 * | ... |
19256 * +-------------------+-------------------+
19257 * | dy | dz | na | na | cfr_vaddr... | <-region2
19258 * +-------------------+-------------------+
19259 * | cfr_vaddr (ctd) | cfr_num_pages |
19260 * +---------------------------------------+
19261 * | d0 | d1 ... |
19262 * +---------------------------------------+
19263 * ...
19264 * +---------------------------------------+
19265 * last region-> | cfr_vaddr |
19266 * +---------------------------------------+
19267 * + cfr_num_pages | d0 | d1 | d2 | d3 |
19268 * +---------------------------------------+
19269 * ...
19270 * +---------------------------------------+
19271 * | dx | dy | dz | na | na | na | na | na |
19272 * +---------------------------------------+
19273 *
19274 * where:
19275 * cf_size: total size of the buffer (rounded to page size)
19276 * cf_last_region: offset in the buffer of the last "region" sub-header
19277 * cf_last_zeroes: number of trailing "zero" dispositions at the end
19278 * of last region
19279 * cfr_vaddr: virtual address of the start of the covered "region"
19280 * cfr_num_pages: number of pages in the covered "region"
19281 * d*: disposition of the page at that virtual address
19282 * Regions in the buffer are word-aligned.
19283 *
19284 * We estimate the size of the buffer based on the number of memory regions
19285 * and the virtual size of the address space. While copying each memory region
19286 * during vm_map_fork(), we also collect the footprint info for that region
19287 * and store it in the buffer, packing it as much as possible (coalescing
19288 * contiguous memory regions to avoid having too many region headers and
19289 * avoiding long streaks of "zero" page dispositions by splitting footprint
19290 * "regions", so the number of regions in the footprint buffer might not match
19291 * the number of memory regions in the address space.
19292 *
19293 * We also have to copy the original task's "nonvolatile" ledgers since that's
19294 * part of the footprint and will need to be reported to any tool asking for
19295 * the footprint information of the forked corpse.
19296 */
19297
19298 uint64_t vm_map_corpse_footprint_count = 0;
19299 uint64_t vm_map_corpse_footprint_size_avg = 0;
19300 uint64_t vm_map_corpse_footprint_size_max = 0;
19301 uint64_t vm_map_corpse_footprint_full = 0;
19302 uint64_t vm_map_corpse_footprint_no_buf = 0;
19303
19304 /*
19305 * vm_map_corpse_footprint_new_region:
19306 * closes the current footprint "region" and creates a new one
19307 *
19308 * Returns NULL if there's not enough space in the buffer for a new region.
19309 */
19310 static struct vm_map_corpse_footprint_region *
19311 vm_map_corpse_footprint_new_region(
19312 struct vm_map_corpse_footprint_header *footprint_header)
19313 {
19314 uintptr_t footprint_edge;
19315 uint32_t new_region_offset;
19316 struct vm_map_corpse_footprint_region *footprint_region;
19317 struct vm_map_corpse_footprint_region *new_footprint_region;
19318
19319 footprint_edge = ((uintptr_t)footprint_header +
19320 footprint_header->cf_size);
19321 footprint_region = ((struct vm_map_corpse_footprint_region *)
19322 ((char *)footprint_header +
19323 footprint_header->cf_last_region));
19324 assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
19325 footprint_edge);
19326
19327 /* get rid of trailing zeroes in the last region */
19328 assert(footprint_region->cfr_num_pages >=
19329 footprint_header->cf_last_zeroes);
19330 footprint_region->cfr_num_pages -=
19331 footprint_header->cf_last_zeroes;
19332 footprint_header->cf_last_zeroes = 0;
19333
19334 /* reuse this region if it's now empty */
19335 if (footprint_region->cfr_num_pages == 0) {
19336 return footprint_region;
19337 }
19338
19339 /* compute offset of new region */
19340 new_region_offset = footprint_header->cf_last_region;
19341 new_region_offset += sizeof(*footprint_region);
19342 new_region_offset += footprint_region->cfr_num_pages;
19343 new_region_offset = roundup(new_region_offset, sizeof(int));
19344
19345 /* check if we're going over the edge */
19346 if (((uintptr_t)footprint_header +
19347 new_region_offset +
19348 sizeof(*footprint_region)) >=
19349 footprint_edge) {
19350 /* over the edge: no new region */
19351 return NULL;
19352 }
19353
19354 /* adjust offset of last region in header */
19355 footprint_header->cf_last_region = new_region_offset;
19356
19357 new_footprint_region = (struct vm_map_corpse_footprint_region *)
19358 ((char *)footprint_header +
19359 footprint_header->cf_last_region);
19360 new_footprint_region->cfr_vaddr = 0;
19361 new_footprint_region->cfr_num_pages = 0;
19362 /* caller needs to initialize new region */
19363
19364 return new_footprint_region;
19365 }
19366
19367 /*
19368 * vm_map_corpse_footprint_collect:
19369 * collect footprint information for "old_entry" in "old_map" and
19370 * stores it in "new_map"'s vmmap_footprint_info.
19371 */
19372 kern_return_t
19373 vm_map_corpse_footprint_collect(
19374 vm_map_t old_map,
19375 vm_map_entry_t old_entry,
19376 vm_map_t new_map)
19377 {
19378 vm_map_offset_t va;
19379 int disp;
19380 kern_return_t kr;
19381 struct vm_map_corpse_footprint_header *footprint_header;
19382 struct vm_map_corpse_footprint_region *footprint_region;
19383 struct vm_map_corpse_footprint_region *new_footprint_region;
19384 unsigned char *next_disp_p;
19385 uintptr_t footprint_edge;
19386 uint32_t num_pages_tmp;
19387
19388 va = old_entry->vme_start;
19389
19390 vm_map_lock_assert_exclusive(old_map);
19391 vm_map_lock_assert_exclusive(new_map);
19392
19393 assert(new_map->has_corpse_footprint);
19394 assert(!old_map->has_corpse_footprint);
19395 if (!new_map->has_corpse_footprint ||
19396 old_map->has_corpse_footprint) {
19397 /*
19398 * This can only transfer footprint info from a
19399 * map with a live pmap to a map with a corpse footprint.
19400 */
19401 return KERN_NOT_SUPPORTED;
19402 }
19403
19404 if (new_map->vmmap_corpse_footprint == NULL) {
19405 vm_offset_t buf;
19406 vm_size_t buf_size;
19407
19408 buf = 0;
19409 buf_size = (sizeof(*footprint_header) +
19410 (old_map->hdr.nentries
19411 *
19412 (sizeof(*footprint_region) +
19413 +3)) /* potential alignment for each region */
19414 +
19415 ((old_map->size / PAGE_SIZE)
19416 *
19417 sizeof(char))); /* disposition for each page */
19418 // printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
19419 buf_size = round_page(buf_size);
19420
19421 /* limit buffer to 1 page to validate overflow detection */
19422 // buf_size = PAGE_SIZE;
19423
19424 /* limit size to a somewhat sane amount */
19425 #if CONFIG_EMBEDDED
19426 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (256*1024) /* 256KB */
19427 #else /* CONFIG_EMBEDDED */
19428 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE (8*1024*1024) /* 8MB */
19429 #endif /* CONFIG_EMBEDDED */
19430 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
19431 buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
19432 }
19433
19434 /*
19435 * Allocate the pageable buffer (with a trailing guard page).
19436 * It will be zero-filled on demand.
19437 */
19438 kr = kernel_memory_allocate(kernel_map,
19439 &buf,
19440 (buf_size
19441 + PAGE_SIZE), /* trailing guard page */
19442 0, /* mask */
19443 KMA_PAGEABLE | KMA_GUARD_LAST,
19444 VM_KERN_MEMORY_DIAG);
19445 if (kr != KERN_SUCCESS) {
19446 vm_map_corpse_footprint_no_buf++;
19447 return kr;
19448 }
19449
19450 /* initialize header and 1st region */
19451 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
19452 new_map->vmmap_corpse_footprint = footprint_header;
19453
19454 footprint_header->cf_size = buf_size;
19455 footprint_header->cf_last_region =
19456 sizeof(*footprint_header);
19457 footprint_header->cf_last_zeroes = 0;
19458
19459 footprint_region = (struct vm_map_corpse_footprint_region *)
19460 ((char *)footprint_header +
19461 footprint_header->cf_last_region);
19462 footprint_region->cfr_vaddr = 0;
19463 footprint_region->cfr_num_pages = 0;
19464 } else {
19465 /* retrieve header and last region */
19466 footprint_header = (struct vm_map_corpse_footprint_header *)
19467 new_map->vmmap_corpse_footprint;
19468 footprint_region = (struct vm_map_corpse_footprint_region *)
19469 ((char *)footprint_header +
19470 footprint_header->cf_last_region);
19471 }
19472 footprint_edge = ((uintptr_t)footprint_header +
19473 footprint_header->cf_size);
19474
19475 if ((footprint_region->cfr_vaddr +
19476 (((vm_map_offset_t)footprint_region->cfr_num_pages) *
19477 PAGE_SIZE))
19478 != old_entry->vme_start) {
19479 uint64_t num_pages_delta;
19480 uint32_t region_offset_delta;
19481
19482 /*
19483 * Not the next contiguous virtual address:
19484 * start a new region or store "zero" dispositions for
19485 * the missing pages?
19486 */
19487 /* size of gap in actual page dispositions */
19488 num_pages_delta = (((old_entry->vme_start -
19489 footprint_region->cfr_vaddr) / PAGE_SIZE)
19490 - footprint_region->cfr_num_pages);
19491 /* size of gap as a new footprint region header */
19492 region_offset_delta =
19493 (sizeof(*footprint_region) +
19494 roundup((footprint_region->cfr_num_pages -
19495 footprint_header->cf_last_zeroes),
19496 sizeof(int)) -
19497 (footprint_region->cfr_num_pages -
19498 footprint_header->cf_last_zeroes));
19499 // printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
19500 if (region_offset_delta < num_pages_delta ||
19501 os_add3_overflow(footprint_region->cfr_num_pages,
19502 (uint32_t) num_pages_delta,
19503 1,
19504 &num_pages_tmp)) {
19505 /*
19506 * Storing data for this gap would take more space
19507 * than inserting a new footprint region header:
19508 * let's start a new region and save space. If it's a
19509 * tie, let's avoid using a new region, since that
19510 * would require more region hops to find the right
19511 * range during lookups.
19512 *
19513 * If the current region's cfr_num_pages would overflow
19514 * if we added "zero" page dispositions for the gap,
19515 * no choice but to start a new region.
19516 */
19517 // printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
19518 new_footprint_region =
19519 vm_map_corpse_footprint_new_region(footprint_header);
19520 /* check that we're not going over the edge */
19521 if (new_footprint_region == NULL) {
19522 goto over_the_edge;
19523 }
19524 footprint_region = new_footprint_region;
19525 /* initialize new region as empty */
19526 footprint_region->cfr_vaddr = old_entry->vme_start;
19527 footprint_region->cfr_num_pages = 0;
19528 } else {
19529 /*
19530 * Store "zero" page dispositions for the missing
19531 * pages.
19532 */
19533 // printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
19534 for (; num_pages_delta > 0; num_pages_delta--) {
19535 next_disp_p =
19536 ((unsigned char *) footprint_region +
19537 sizeof(*footprint_region) +
19538 footprint_region->cfr_num_pages);
19539 /* check that we're not going over the edge */
19540 if ((uintptr_t)next_disp_p >= footprint_edge) {
19541 goto over_the_edge;
19542 }
19543 /* store "zero" disposition for this gap page */
19544 footprint_region->cfr_num_pages++;
19545 *next_disp_p = (unsigned char) 0;
19546 footprint_header->cf_last_zeroes++;
19547 }
19548 }
19549 }
19550
19551 for (va = old_entry->vme_start;
19552 va < old_entry->vme_end;
19553 va += PAGE_SIZE) {
19554 vm_object_t object;
19555
19556 object = VME_OBJECT(old_entry);
19557 if (!old_entry->is_sub_map &&
19558 old_entry->iokit_acct &&
19559 object != VM_OBJECT_NULL &&
19560 object->internal &&
19561 object->purgable == VM_PURGABLE_DENY) {
19562 /*
19563 * Non-purgeable IOKit memory: phys_footprint
19564 * includes the entire virtual mapping.
19565 * Since the forked corpse's VM map entry will not
19566 * have "iokit_acct", pretend that this page's
19567 * disposition is "present & internal", so that it
19568 * shows up in the forked corpse's footprint.
19569 */
19570 disp = (PMAP_QUERY_PAGE_PRESENT |
19571 PMAP_QUERY_PAGE_INTERNAL);
19572 } else {
19573 disp = 0;
19574 pmap_query_page_info(old_map->pmap,
19575 va,
19576 &disp);
19577 }
19578
19579 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
19580
19581 if (disp == 0 && footprint_region->cfr_num_pages == 0) {
19582 /*
19583 * Ignore "zero" dispositions at start of
19584 * region: just move start of region.
19585 */
19586 footprint_region->cfr_vaddr += PAGE_SIZE;
19587 continue;
19588 }
19589
19590 /* would region's cfr_num_pages overflow? */
19591 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
19592 &num_pages_tmp)) {
19593 /* overflow: create a new region */
19594 new_footprint_region =
19595 vm_map_corpse_footprint_new_region(
19596 footprint_header);
19597 if (new_footprint_region == NULL) {
19598 goto over_the_edge;
19599 }
19600 footprint_region = new_footprint_region;
19601 footprint_region->cfr_vaddr = va;
19602 footprint_region->cfr_num_pages = 0;
19603 }
19604
19605 next_disp_p = ((unsigned char *)footprint_region +
19606 sizeof(*footprint_region) +
19607 footprint_region->cfr_num_pages);
19608 /* check that we're not going over the edge */
19609 if ((uintptr_t)next_disp_p >= footprint_edge) {
19610 goto over_the_edge;
19611 }
19612 /* store this dispostion */
19613 *next_disp_p = (unsigned char) disp;
19614 footprint_region->cfr_num_pages++;
19615
19616 if (disp != 0) {
19617 /* non-zero disp: break the current zero streak */
19618 footprint_header->cf_last_zeroes = 0;
19619 /* done */
19620 continue;
19621 }
19622
19623 /* zero disp: add to the current streak of zeroes */
19624 footprint_header->cf_last_zeroes++;
19625 if ((footprint_header->cf_last_zeroes +
19626 roundup((footprint_region->cfr_num_pages -
19627 footprint_header->cf_last_zeroes) &
19628 (sizeof(int) - 1),
19629 sizeof(int))) <
19630 (sizeof(*footprint_header))) {
19631 /*
19632 * There are not enough trailing "zero" dispositions
19633 * (+ the extra padding we would need for the previous
19634 * region); creating a new region would not save space
19635 * at this point, so let's keep this "zero" disposition
19636 * in this region and reconsider later.
19637 */
19638 continue;
19639 }
19640 /*
19641 * Create a new region to avoid having too many consecutive
19642 * "zero" dispositions.
19643 */
19644 new_footprint_region =
19645 vm_map_corpse_footprint_new_region(footprint_header);
19646 if (new_footprint_region == NULL) {
19647 goto over_the_edge;
19648 }
19649 footprint_region = new_footprint_region;
19650 /* initialize the new region as empty ... */
19651 footprint_region->cfr_num_pages = 0;
19652 /* ... and skip this "zero" disp */
19653 footprint_region->cfr_vaddr = va + PAGE_SIZE;
19654 }
19655
19656 return KERN_SUCCESS;
19657
19658 over_the_edge:
19659 // printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
19660 vm_map_corpse_footprint_full++;
19661 return KERN_RESOURCE_SHORTAGE;
19662 }
19663
19664 /*
19665 * vm_map_corpse_footprint_collect_done:
19666 * completes the footprint collection by getting rid of any remaining
19667 * trailing "zero" dispositions and trimming the unused part of the
19668 * kernel buffer
19669 */
19670 void
19671 vm_map_corpse_footprint_collect_done(
19672 vm_map_t new_map)
19673 {
19674 struct vm_map_corpse_footprint_header *footprint_header;
19675 struct vm_map_corpse_footprint_region *footprint_region;
19676 vm_size_t buf_size, actual_size;
19677 kern_return_t kr;
19678
19679 assert(new_map->has_corpse_footprint);
19680 if (!new_map->has_corpse_footprint ||
19681 new_map->vmmap_corpse_footprint == NULL) {
19682 return;
19683 }
19684
19685 footprint_header = (struct vm_map_corpse_footprint_header *)
19686 new_map->vmmap_corpse_footprint;
19687 buf_size = footprint_header->cf_size;
19688
19689 footprint_region = (struct vm_map_corpse_footprint_region *)
19690 ((char *)footprint_header +
19691 footprint_header->cf_last_region);
19692
19693 /* get rid of trailing zeroes in last region */
19694 assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
19695 footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
19696 footprint_header->cf_last_zeroes = 0;
19697
19698 actual_size = (vm_size_t)(footprint_header->cf_last_region +
19699 sizeof(*footprint_region) +
19700 footprint_region->cfr_num_pages);
19701
19702 // printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
19703 vm_map_corpse_footprint_size_avg =
19704 (((vm_map_corpse_footprint_size_avg *
19705 vm_map_corpse_footprint_count) +
19706 actual_size) /
19707 (vm_map_corpse_footprint_count + 1));
19708 vm_map_corpse_footprint_count++;
19709 if (actual_size > vm_map_corpse_footprint_size_max) {
19710 vm_map_corpse_footprint_size_max = actual_size;
19711 }
19712
19713 actual_size = round_page(actual_size);
19714 if (buf_size > actual_size) {
19715 kr = vm_deallocate(kernel_map,
19716 ((vm_address_t)footprint_header +
19717 actual_size +
19718 PAGE_SIZE), /* trailing guard page */
19719 (buf_size - actual_size));
19720 assertf(kr == KERN_SUCCESS,
19721 "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19722 footprint_header,
19723 (uint64_t) buf_size,
19724 (uint64_t) actual_size,
19725 kr);
19726 kr = vm_protect(kernel_map,
19727 ((vm_address_t)footprint_header +
19728 actual_size),
19729 PAGE_SIZE,
19730 FALSE, /* set_maximum */
19731 VM_PROT_NONE);
19732 assertf(kr == KERN_SUCCESS,
19733 "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
19734 footprint_header,
19735 (uint64_t) buf_size,
19736 (uint64_t) actual_size,
19737 kr);
19738 }
19739
19740 footprint_header->cf_size = actual_size;
19741 }
19742
19743 /*
19744 * vm_map_corpse_footprint_query_page_info:
19745 * retrieves the disposition of the page at virtual address "vaddr"
19746 * in the forked corpse's VM map
19747 *
19748 * This is the equivalent of pmap_query_page_info() for a forked corpse.
19749 */
19750 kern_return_t
19751 vm_map_corpse_footprint_query_page_info(
19752 vm_map_t map,
19753 vm_map_offset_t va,
19754 int *disp)
19755 {
19756 struct vm_map_corpse_footprint_header *footprint_header;
19757 struct vm_map_corpse_footprint_region *footprint_region;
19758 uint32_t footprint_region_offset;
19759 vm_map_offset_t region_start, region_end;
19760 int disp_idx;
19761 kern_return_t kr;
19762
19763 if (!map->has_corpse_footprint) {
19764 *disp = 0;
19765 kr = KERN_INVALID_ARGUMENT;
19766 goto done;
19767 }
19768
19769 footprint_header = map->vmmap_corpse_footprint;
19770 if (footprint_header == NULL) {
19771 *disp = 0;
19772 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19773 kr = KERN_INVALID_ARGUMENT;
19774 goto done;
19775 }
19776
19777 /* start looking at the hint ("cf_hint_region") */
19778 footprint_region_offset = footprint_header->cf_hint_region;
19779
19780 lookup_again:
19781 if (footprint_region_offset < sizeof(*footprint_header)) {
19782 /* hint too low: start from 1st region */
19783 footprint_region_offset = sizeof(*footprint_header);
19784 }
19785 if (footprint_region_offset >= footprint_header->cf_last_region) {
19786 /* hint too high: re-start from 1st region */
19787 footprint_region_offset = sizeof(*footprint_header);
19788 }
19789 footprint_region = (struct vm_map_corpse_footprint_region *)
19790 ((char *)footprint_header + footprint_region_offset);
19791 region_start = footprint_region->cfr_vaddr;
19792 region_end = (region_start +
19793 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19794 PAGE_SIZE));
19795 if (va < region_start &&
19796 footprint_region_offset != sizeof(*footprint_header)) {
19797 /* our range starts before the hint region */
19798
19799 /* reset the hint (in a racy way...) */
19800 footprint_header->cf_hint_region = sizeof(*footprint_header);
19801 /* lookup "va" again from 1st region */
19802 footprint_region_offset = sizeof(*footprint_header);
19803 goto lookup_again;
19804 }
19805
19806 while (va >= region_end) {
19807 if (footprint_region_offset >= footprint_header->cf_last_region) {
19808 break;
19809 }
19810 /* skip the region's header */
19811 footprint_region_offset += sizeof(*footprint_region);
19812 /* skip the region's page dispositions */
19813 footprint_region_offset += footprint_region->cfr_num_pages;
19814 /* align to next word boundary */
19815 footprint_region_offset =
19816 roundup(footprint_region_offset,
19817 sizeof(int));
19818 footprint_region = (struct vm_map_corpse_footprint_region *)
19819 ((char *)footprint_header + footprint_region_offset);
19820 region_start = footprint_region->cfr_vaddr;
19821 region_end = (region_start +
19822 ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
19823 PAGE_SIZE));
19824 }
19825 if (va < region_start || va >= region_end) {
19826 /* page not found */
19827 *disp = 0;
19828 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19829 kr = KERN_SUCCESS;
19830 goto done;
19831 }
19832
19833 /* "va" found: set the lookup hint for next lookup (in a racy way...) */
19834 footprint_header->cf_hint_region = footprint_region_offset;
19835
19836 /* get page disposition for "va" in this region */
19837 disp_idx = (int) ((va - footprint_region->cfr_vaddr) / PAGE_SIZE);
19838 *disp = (int) (footprint_region->cfr_disposition[disp_idx]);
19839
19840 kr = KERN_SUCCESS;
19841 done:
19842 // if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disp);
19843 /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
19844 DTRACE_VM4(footprint_query_page_info,
19845 vm_map_t, map,
19846 vm_map_offset_t, va,
19847 int, *disp,
19848 kern_return_t, kr);
19849
19850 return kr;
19851 }
19852
19853
19854 static void
19855 vm_map_corpse_footprint_destroy(
19856 vm_map_t map)
19857 {
19858 if (map->has_corpse_footprint &&
19859 map->vmmap_corpse_footprint != 0) {
19860 struct vm_map_corpse_footprint_header *footprint_header;
19861 vm_size_t buf_size;
19862 kern_return_t kr;
19863
19864 footprint_header = map->vmmap_corpse_footprint;
19865 buf_size = footprint_header->cf_size;
19866 kr = vm_deallocate(kernel_map,
19867 (vm_offset_t) map->vmmap_corpse_footprint,
19868 ((vm_size_t) buf_size
19869 + PAGE_SIZE)); /* trailing guard page */
19870 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
19871 map->vmmap_corpse_footprint = 0;
19872 map->has_corpse_footprint = FALSE;
19873 }
19874 }
19875
19876 /*
19877 * vm_map_copy_footprint_ledgers:
19878 * copies any ledger that's relevant to the memory footprint of "old_task"
19879 * into the forked corpse's task ("new_task")
19880 */
19881 void
19882 vm_map_copy_footprint_ledgers(
19883 task_t old_task,
19884 task_t new_task)
19885 {
19886 vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
19887 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
19888 vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
19889 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
19890 vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
19891 vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
19892 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
19893 vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
19894 vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
19895 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
19896 vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
19897 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
19898 vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
19899 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
19900 vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
19901 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
19902 vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
19903 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
19904 vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
19905 vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
19906 }
19907
19908 /*
19909 * vm_map_copy_ledger:
19910 * copy a single ledger from "old_task" to "new_task"
19911 */
19912 void
19913 vm_map_copy_ledger(
19914 task_t old_task,
19915 task_t new_task,
19916 int ledger_entry)
19917 {
19918 ledger_amount_t old_balance, new_balance, delta;
19919
19920 assert(new_task->map->has_corpse_footprint);
19921 if (!new_task->map->has_corpse_footprint) {
19922 return;
19923 }
19924
19925 /* turn off sanity checks for the ledger we're about to mess with */
19926 ledger_disable_panic_on_negative(new_task->ledger,
19927 ledger_entry);
19928
19929 /* adjust "new_task" to match "old_task" */
19930 ledger_get_balance(old_task->ledger,
19931 ledger_entry,
19932 &old_balance);
19933 ledger_get_balance(new_task->ledger,
19934 ledger_entry,
19935 &new_balance);
19936 if (new_balance == old_balance) {
19937 /* new == old: done */
19938 } else if (new_balance > old_balance) {
19939 /* new > old ==> new -= new - old */
19940 delta = new_balance - old_balance;
19941 ledger_debit(new_task->ledger,
19942 ledger_entry,
19943 delta);
19944 } else {
19945 /* new < old ==> new += old - new */
19946 delta = old_balance - new_balance;
19947 ledger_credit(new_task->ledger,
19948 ledger_entry,
19949 delta);
19950 }
19951 }
19952
19953 #if MACH_ASSERT
19954
19955 extern int pmap_ledgers_panic;
19956 extern int pmap_ledgers_panic_leeway;
19957
19958 #define LEDGER_DRIFT(__LEDGER) \
19959 int __LEDGER##_over; \
19960 ledger_amount_t __LEDGER##_over_total; \
19961 ledger_amount_t __LEDGER##_over_max; \
19962 int __LEDGER##_under; \
19963 ledger_amount_t __LEDGER##_under_total; \
19964 ledger_amount_t __LEDGER##_under_max
19965
19966 struct {
19967 uint64_t num_pmaps_checked;
19968
19969 LEDGER_DRIFT(phys_footprint);
19970 LEDGER_DRIFT(internal);
19971 LEDGER_DRIFT(internal_compressed);
19972 LEDGER_DRIFT(iokit_mapped);
19973 LEDGER_DRIFT(alternate_accounting);
19974 LEDGER_DRIFT(alternate_accounting_compressed);
19975 LEDGER_DRIFT(page_table);
19976 LEDGER_DRIFT(purgeable_volatile);
19977 LEDGER_DRIFT(purgeable_nonvolatile);
19978 LEDGER_DRIFT(purgeable_volatile_compressed);
19979 LEDGER_DRIFT(purgeable_nonvolatile_compressed);
19980 LEDGER_DRIFT(tagged_nofootprint);
19981 LEDGER_DRIFT(tagged_footprint);
19982 LEDGER_DRIFT(tagged_nofootprint_compressed);
19983 LEDGER_DRIFT(tagged_footprint_compressed);
19984 LEDGER_DRIFT(network_volatile);
19985 LEDGER_DRIFT(network_nonvolatile);
19986 LEDGER_DRIFT(network_volatile_compressed);
19987 LEDGER_DRIFT(network_nonvolatile_compressed);
19988 LEDGER_DRIFT(media_nofootprint);
19989 LEDGER_DRIFT(media_footprint);
19990 LEDGER_DRIFT(media_nofootprint_compressed);
19991 LEDGER_DRIFT(media_footprint_compressed);
19992 LEDGER_DRIFT(graphics_nofootprint);
19993 LEDGER_DRIFT(graphics_footprint);
19994 LEDGER_DRIFT(graphics_nofootprint_compressed);
19995 LEDGER_DRIFT(graphics_footprint_compressed);
19996 LEDGER_DRIFT(neural_nofootprint);
19997 LEDGER_DRIFT(neural_footprint);
19998 LEDGER_DRIFT(neural_nofootprint_compressed);
19999 LEDGER_DRIFT(neural_footprint_compressed);
20000 } pmap_ledgers_drift;
20001
20002 void
20003 vm_map_pmap_check_ledgers(
20004 pmap_t pmap,
20005 ledger_t ledger,
20006 int pid,
20007 char *procname)
20008 {
20009 ledger_amount_t bal;
20010 boolean_t do_panic;
20011
20012 do_panic = FALSE;
20013
20014 pmap_ledgers_drift.num_pmaps_checked++;
20015
20016 #define LEDGER_CHECK_BALANCE(__LEDGER) \
20017 MACRO_BEGIN \
20018 int panic_on_negative = TRUE; \
20019 ledger_get_balance(ledger, \
20020 task_ledgers.__LEDGER, \
20021 &bal); \
20022 ledger_get_panic_on_negative(ledger, \
20023 task_ledgers.__LEDGER, \
20024 &panic_on_negative); \
20025 if (bal != 0) { \
20026 if (panic_on_negative || \
20027 (pmap_ledgers_panic && \
20028 pmap_ledgers_panic_leeway > 0 && \
20029 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
20030 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
20031 do_panic = TRUE; \
20032 } \
20033 printf("LEDGER BALANCE proc %d (%s) " \
20034 "\"%s\" = %lld\n", \
20035 pid, procname, #__LEDGER, bal); \
20036 if (bal > 0) { \
20037 pmap_ledgers_drift.__LEDGER##_over++; \
20038 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
20039 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
20040 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
20041 } \
20042 } else if (bal < 0) { \
20043 pmap_ledgers_drift.__LEDGER##_under++; \
20044 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
20045 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
20046 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
20047 } \
20048 } \
20049 } \
20050 MACRO_END
20051
20052 LEDGER_CHECK_BALANCE(phys_footprint);
20053 LEDGER_CHECK_BALANCE(internal);
20054 LEDGER_CHECK_BALANCE(internal_compressed);
20055 LEDGER_CHECK_BALANCE(iokit_mapped);
20056 LEDGER_CHECK_BALANCE(alternate_accounting);
20057 LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
20058 LEDGER_CHECK_BALANCE(page_table);
20059 LEDGER_CHECK_BALANCE(purgeable_volatile);
20060 LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
20061 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
20062 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
20063 LEDGER_CHECK_BALANCE(tagged_nofootprint);
20064 LEDGER_CHECK_BALANCE(tagged_footprint);
20065 LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
20066 LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
20067 LEDGER_CHECK_BALANCE(network_volatile);
20068 LEDGER_CHECK_BALANCE(network_nonvolatile);
20069 LEDGER_CHECK_BALANCE(network_volatile_compressed);
20070 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
20071 LEDGER_CHECK_BALANCE(media_nofootprint);
20072 LEDGER_CHECK_BALANCE(media_footprint);
20073 LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
20074 LEDGER_CHECK_BALANCE(media_footprint_compressed);
20075 LEDGER_CHECK_BALANCE(graphics_nofootprint);
20076 LEDGER_CHECK_BALANCE(graphics_footprint);
20077 LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
20078 LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
20079 LEDGER_CHECK_BALANCE(neural_nofootprint);
20080 LEDGER_CHECK_BALANCE(neural_footprint);
20081 LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
20082 LEDGER_CHECK_BALANCE(neural_footprint_compressed);
20083
20084 if (do_panic) {
20085 if (pmap_ledgers_panic) {
20086 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20087 pmap, pid, procname);
20088 } else {
20089 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
20090 pmap, pid, procname);
20091 }
20092 }
20093 }
20094 #endif /* MACH_ASSERT */