]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
91447636 87#include <kern/kalloc.h>
1c79356b 88#include <kern/zalloc.h>
91447636
A
89
90#include <vm/cpm.h>
39236c6e 91#include <vm/vm_compressor_pager.h>
1c79356b
A
92#include <vm/vm_init.h>
93#include <vm/vm_fault.h>
94#include <vm/vm_map.h>
95#include <vm/vm_object.h>
96#include <vm/vm_page.h>
b0d623f7 97#include <vm/vm_pageout.h>
1c79356b
A
98#include <vm/vm_kern.h>
99#include <ipc/ipc_port.h>
100#include <kern/sched_prim.h>
101#include <kern/misc_protos.h>
1c79356b
A
102#include <kern/xpr.h>
103
91447636
A
104#include <mach/vm_map_server.h>
105#include <mach/mach_host_server.h>
2d21ac55 106#include <vm/vm_protos.h>
b0d623f7 107#include <vm/vm_purgeable_internal.h>
91447636 108
91447636 109#include <vm/vm_protos.h>
2d21ac55 110#include <vm/vm_shared_region.h>
6d2010ae 111#include <vm/vm_map_store.h>
91447636 112
39037602
A
113extern int proc_selfpid(void);
114extern char *proc_name_address(void *p);
115
116#if VM_MAP_DEBUG_APPLE_PROTECT
117int vm_map_debug_apple_protect = 0;
118#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
119#if VM_MAP_DEBUG_FOURK
120int vm_map_debug_fourk = 0;
121#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 122
316670eb 123extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
124/* Internal prototypes
125 */
2d21ac55 126
91447636
A
127static void vm_map_simplify_range(
128 vm_map_t map,
129 vm_map_offset_t start,
130 vm_map_offset_t end); /* forward */
131
132static boolean_t vm_map_range_check(
2d21ac55
A
133 vm_map_t map,
134 vm_map_offset_t start,
135 vm_map_offset_t end,
136 vm_map_entry_t *entry);
1c79356b 137
91447636 138static vm_map_entry_t _vm_map_entry_create(
7ddcb079 139 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 140
91447636 141static void _vm_map_entry_dispose(
2d21ac55
A
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry);
1c79356b 144
91447636 145static void vm_map_pmap_enter(
2d21ac55
A
146 vm_map_t map,
147 vm_map_offset_t addr,
148 vm_map_offset_t end_addr,
149 vm_object_t object,
150 vm_object_offset_t offset,
151 vm_prot_t protection);
1c79356b 152
91447636 153static void _vm_map_clip_end(
2d21ac55
A
154 struct vm_map_header *map_header,
155 vm_map_entry_t entry,
156 vm_map_offset_t end);
91447636
A
157
158static void _vm_map_clip_start(
2d21ac55
A
159 struct vm_map_header *map_header,
160 vm_map_entry_t entry,
161 vm_map_offset_t start);
1c79356b 162
91447636 163static void vm_map_entry_delete(
2d21ac55
A
164 vm_map_t map,
165 vm_map_entry_t entry);
1c79356b 166
91447636 167static kern_return_t vm_map_delete(
2d21ac55
A
168 vm_map_t map,
169 vm_map_offset_t start,
170 vm_map_offset_t end,
171 int flags,
172 vm_map_t zap_map);
1c79356b 173
91447636 174static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
175 vm_map_t dst_map,
176 vm_map_entry_t entry,
177 vm_map_copy_t copy,
39236c6e
A
178 vm_map_address_t start,
179 boolean_t discard_on_success);
1c79356b 180
91447636 181static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
182 vm_map_t dst_map,
183 vm_map_entry_t tmp_entry,
184 vm_map_copy_t copy,
185 vm_map_offset_t start,
186 pmap_t pmap);
1c79356b 187
91447636 188static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
189 vm_map_t src_map,
190 vm_map_address_t src_addr,
191 vm_map_size_t len,
192 boolean_t src_destroy,
193 vm_map_copy_t *copy_result); /* OUT */
1c79356b 194
91447636 195static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
196 vm_map_t map,
197 vm_map_address_t *addr, /* IN/OUT */
198 vm_map_copy_t copy,
39037602 199 vm_map_size_t copy_size,
39236c6e
A
200 boolean_t overwrite,
201 boolean_t consume_on_success);
1c79356b 202
91447636 203static void vm_map_fork_share(
2d21ac55
A
204 vm_map_t old_map,
205 vm_map_entry_t old_entry,
206 vm_map_t new_map);
1c79356b 207
91447636 208static boolean_t vm_map_fork_copy(
2d21ac55
A
209 vm_map_t old_map,
210 vm_map_entry_t *old_entry_p,
39037602
A
211 vm_map_t new_map,
212 int vm_map_copyin_flags);
1c79356b 213
0c530ab8 214void vm_map_region_top_walk(
2d21ac55
A
215 vm_map_entry_t entry,
216 vm_region_top_info_t top);
1c79356b 217
0c530ab8 218void vm_map_region_walk(
2d21ac55
A
219 vm_map_t map,
220 vm_map_offset_t va,
221 vm_map_entry_t entry,
222 vm_object_offset_t offset,
223 vm_object_size_t range,
224 vm_region_extended_info_t extended,
39236c6e
A
225 boolean_t look_for_pages,
226 mach_msg_type_number_t count);
91447636
A
227
228static kern_return_t vm_map_wire_nested(
2d21ac55
A
229 vm_map_t map,
230 vm_map_offset_t start,
231 vm_map_offset_t end,
3e170ce0 232 vm_prot_t caller_prot,
2d21ac55
A
233 boolean_t user_wire,
234 pmap_t map_pmap,
fe8ab488
A
235 vm_map_offset_t pmap_addr,
236 ppnum_t *physpage_p);
91447636
A
237
238static kern_return_t vm_map_unwire_nested(
2d21ac55
A
239 vm_map_t map,
240 vm_map_offset_t start,
241 vm_map_offset_t end,
242 boolean_t user_wire,
243 pmap_t map_pmap,
244 vm_map_offset_t pmap_addr);
91447636
A
245
246static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
247 vm_map_t dst_map,
248 vm_map_offset_t dst_addr,
249 vm_map_size_t dst_size);
91447636
A
250
251static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
252 vm_map_t dst_map,
253 vm_map_offset_t dst_addr,
254 vm_map_copy_t copy,
255 boolean_t interruptible,
6d2010ae
A
256 pmap_t pmap,
257 boolean_t discard_on_success);
91447636
A
258
259static kern_return_t vm_map_remap_extract(
2d21ac55
A
260 vm_map_t map,
261 vm_map_offset_t addr,
262 vm_map_size_t size,
263 boolean_t copy,
264 struct vm_map_header *map_header,
265 vm_prot_t *cur_protection,
266 vm_prot_t *max_protection,
267 vm_inherit_t inheritance,
39037602
A
268 boolean_t pageable,
269 boolean_t same_map);
91447636
A
270
271static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
272 vm_map_t map,
273 vm_map_address_t *address,
274 vm_map_size_t size,
275 vm_map_offset_t mask,
060df5ea 276 int flags,
2d21ac55 277 vm_map_entry_t *map_entry);
91447636
A
278
279static void vm_map_region_look_for_page(
2d21ac55
A
280 vm_map_t map,
281 vm_map_offset_t va,
282 vm_object_t object,
283 vm_object_offset_t offset,
284 int max_refcnt,
285 int depth,
39236c6e
A
286 vm_region_extended_info_t extended,
287 mach_msg_type_number_t count);
91447636
A
288
289static int vm_map_region_count_obj_refs(
2d21ac55
A
290 vm_map_entry_t entry,
291 vm_object_t object);
1c79356b 292
b0d623f7
A
293
294static kern_return_t vm_map_willneed(
295 vm_map_t map,
296 vm_map_offset_t start,
297 vm_map_offset_t end);
298
299static kern_return_t vm_map_reuse_pages(
300 vm_map_t map,
301 vm_map_offset_t start,
302 vm_map_offset_t end);
303
304static kern_return_t vm_map_reusable_pages(
305 vm_map_t map,
306 vm_map_offset_t start,
307 vm_map_offset_t end);
308
309static kern_return_t vm_map_can_reuse(
310 vm_map_t map,
311 vm_map_offset_t start,
312 vm_map_offset_t end);
313
3e170ce0
A
314#if MACH_ASSERT
315static kern_return_t vm_map_pageout(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end);
319#endif /* MACH_ASSERT */
6d2010ae 320
1c79356b
A
321/*
322 * Macros to copy a vm_map_entry. We must be careful to correctly
323 * manage the wired page count. vm_map_entry_copy() creates a new
324 * map entry to the same memory - the wired count in the new entry
325 * must be set to zero. vm_map_entry_copy_full() creates a new
326 * entry that is identical to the old entry. This preserves the
327 * wire count; it's used for map splitting and zone changing in
328 * vm_map_copyout.
329 */
316670eb 330
7ddcb079
A
331#define vm_map_entry_copy(NEW,OLD) \
332MACRO_BEGIN \
333boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
334 *(NEW) = *(OLD); \
335 (NEW)->is_shared = FALSE; \
336 (NEW)->needs_wakeup = FALSE; \
337 (NEW)->in_transition = FALSE; \
338 (NEW)->wired_count = 0; \
339 (NEW)->user_wired_count = 0; \
b0d623f7 340 (NEW)->permanent = FALSE; \
316670eb 341 (NEW)->used_for_jit = FALSE; \
fe8ab488
A
342 (NEW)->from_reserved_zone = _vmec_reserved; \
343 (NEW)->iokit_acct = FALSE; \
3e170ce0
A
344 (NEW)->vme_resilient_codesign = FALSE; \
345 (NEW)->vme_resilient_media = FALSE; \
39037602 346 (NEW)->vme_atomic = FALSE; \
1c79356b
A
347MACRO_END
348
7ddcb079
A
349#define vm_map_entry_copy_full(NEW,OLD) \
350MACRO_BEGIN \
351boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
352(*(NEW) = *(OLD)); \
353(NEW)->from_reserved_zone = _vmecf_reserved; \
354MACRO_END
1c79356b 355
2d21ac55
A
356/*
357 * Decide if we want to allow processes to execute from their data or stack areas.
358 * override_nx() returns true if we do. Data/stack execution can be enabled independently
359 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
360 * or allow_stack_exec to enable data execution for that type of data area for that particular
361 * ABI (or both by or'ing the flags together). These are initialized in the architecture
362 * specific pmap files since the default behavior varies according to architecture. The
363 * main reason it varies is because of the need to provide binary compatibility with old
364 * applications that were written before these restrictions came into being. In the old
365 * days, an app could execute anything it could read, but this has slowly been tightened
366 * up over time. The default behavior is:
367 *
368 * 32-bit PPC apps may execute from both stack and data areas
369 * 32-bit Intel apps may exeucte from data areas but not stack
370 * 64-bit PPC/Intel apps may not execute from either data or stack
371 *
372 * An application on any architecture may override these defaults by explicitly
373 * adding PROT_EXEC permission to the page in question with the mprotect(2)
374 * system call. This code here just determines what happens when an app tries to
375 * execute from a page that lacks execute permission.
376 *
377 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
378 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
379 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
380 * execution from data areas for a particular binary even if the arch normally permits it. As
381 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
382 * to support some complicated use cases, notably browsers with out-of-process plugins that
383 * are not all NX-safe.
2d21ac55
A
384 */
385
386extern int allow_data_exec, allow_stack_exec;
387
388int
389override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
390{
391 int current_abi;
392
3e170ce0
A
393 if (map->pmap == kernel_pmap) return FALSE;
394
2d21ac55
A
395 /*
396 * Determine if the app is running in 32 or 64 bit mode.
397 */
398
399 if (vm_map_is_64bit(map))
400 current_abi = VM_ABI_64;
401 else
402 current_abi = VM_ABI_32;
403
404 /*
405 * Determine if we should allow the execution based on whether it's a
406 * stack or data area and the current architecture.
407 */
408
409 if (user_tag == VM_MEMORY_STACK)
410 return allow_stack_exec & current_abi;
411
6d2010ae 412 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
413}
414
415
1c79356b
A
416/*
417 * Virtual memory maps provide for the mapping, protection,
418 * and sharing of virtual memory objects. In addition,
419 * this module provides for an efficient virtual copy of
420 * memory from one map to another.
421 *
422 * Synchronization is required prior to most operations.
423 *
424 * Maps consist of an ordered doubly-linked list of simple
425 * entries; a single hint is used to speed up lookups.
426 *
427 * Sharing maps have been deleted from this version of Mach.
428 * All shared objects are now mapped directly into the respective
429 * maps. This requires a change in the copy on write strategy;
430 * the asymmetric (delayed) strategy is used for shared temporary
431 * objects instead of the symmetric (shadow) strategy. All maps
432 * are now "top level" maps (either task map, kernel map or submap
433 * of the kernel map).
434 *
435 * Since portions of maps are specified by start/end addreses,
436 * which may not align with existing map entries, all
437 * routines merely "clip" entries to these start/end values.
438 * [That is, an entry is split into two, bordering at a
439 * start or end value.] Note that these clippings may not
440 * always be necessary (as the two resulting entries are then
441 * not changed); however, the clipping is done for convenience.
442 * No attempt is currently made to "glue back together" two
443 * abutting entries.
444 *
445 * The symmetric (shadow) copy strategy implements virtual copy
446 * by copying VM object references from one map to
447 * another, and then marking both regions as copy-on-write.
448 * It is important to note that only one writeable reference
449 * to a VM object region exists in any map when this strategy
450 * is used -- this means that shadow object creation can be
451 * delayed until a write operation occurs. The symmetric (delayed)
452 * strategy allows multiple maps to have writeable references to
453 * the same region of a vm object, and hence cannot delay creating
454 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
455 * Copying of permanent objects is completely different; see
456 * vm_object_copy_strategically() in vm_object.c.
457 */
458
91447636
A
459static zone_t vm_map_zone; /* zone for vm_map structures */
460static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
39037602 461zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
7ddcb079 462 * allocations */
91447636 463static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
3e170ce0 464zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
465
466
467/*
468 * Placeholder object for submap operations. This object is dropped
469 * into the range by a call to vm_map_find, and removed when
470 * vm_map_submap creates the submap.
471 */
472
473vm_object_t vm_submap_object;
474
91447636 475static void *map_data;
b0d623f7 476static vm_size_t map_data_size;
91447636 477static void *kentry_data;
b0d623f7 478static vm_size_t kentry_data_size;
3e170ce0
A
479static void *map_holes_data;
480static vm_size_t map_holes_data_size;
1c79356b 481
b0d623f7 482#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
1c79356b 483
55e303ae 484/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 485unsigned int not_in_kdp = 1;
55e303ae 486
6d2010ae
A
487unsigned int vm_map_set_cache_attr_count = 0;
488
489kern_return_t
490vm_map_set_cache_attr(
491 vm_map_t map,
492 vm_map_offset_t va)
493{
494 vm_map_entry_t map_entry;
495 vm_object_t object;
496 kern_return_t kr = KERN_SUCCESS;
497
498 vm_map_lock_read(map);
499
500 if (!vm_map_lookup_entry(map, va, &map_entry) ||
501 map_entry->is_sub_map) {
502 /*
503 * that memory is not properly mapped
504 */
505 kr = KERN_INVALID_ARGUMENT;
506 goto done;
507 }
3e170ce0 508 object = VME_OBJECT(map_entry);
6d2010ae
A
509
510 if (object == VM_OBJECT_NULL) {
511 /*
512 * there should be a VM object here at this point
513 */
514 kr = KERN_INVALID_ARGUMENT;
515 goto done;
516 }
517 vm_object_lock(object);
518 object->set_cache_attr = TRUE;
519 vm_object_unlock(object);
520
521 vm_map_set_cache_attr_count++;
522done:
523 vm_map_unlock_read(map);
524
525 return kr;
526}
527
528
593a1d5f
A
529#if CONFIG_CODE_DECRYPTION
530/*
531 * vm_map_apple_protected:
532 * This remaps the requested part of the object with an object backed by
533 * the decrypting pager.
534 * crypt_info contains entry points and session data for the crypt module.
535 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
536 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
537 */
0c530ab8
A
538kern_return_t
539vm_map_apple_protected(
3e170ce0
A
540 vm_map_t map,
541 vm_map_offset_t start,
542 vm_map_offset_t end,
543 vm_object_offset_t crypto_backing_offset,
593a1d5f 544 struct pager_crypt_info *crypt_info)
0c530ab8
A
545{
546 boolean_t map_locked;
547 kern_return_t kr;
548 vm_map_entry_t map_entry;
3e170ce0
A
549 struct vm_map_entry tmp_entry;
550 memory_object_t unprotected_mem_obj;
0c530ab8
A
551 vm_object_t protected_object;
552 vm_map_offset_t map_addr;
3e170ce0
A
553 vm_map_offset_t start_aligned, end_aligned;
554 vm_object_offset_t crypto_start, crypto_end;
555 int vm_flags;
0c530ab8 556
3e170ce0
A
557 map_locked = FALSE;
558 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 559
3e170ce0
A
560 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
561 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
562 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
563 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 564
3e170ce0
A
565 assert(start_aligned == start);
566 assert(end_aligned == end);
b0d623f7 567
3e170ce0
A
568 map_addr = start_aligned;
569 for (map_addr = start_aligned;
570 map_addr < end;
571 map_addr = tmp_entry.vme_end) {
572 vm_map_lock(map);
573 map_locked = TRUE;
b0d623f7 574
3e170ce0
A
575 /* lookup the protected VM object */
576 if (!vm_map_lookup_entry(map,
577 map_addr,
578 &map_entry) ||
579 map_entry->is_sub_map ||
580 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
581 !(map_entry->protection & VM_PROT_EXECUTE)) {
582 /* that memory is not properly mapped */
583 kr = KERN_INVALID_ARGUMENT;
584 goto done;
585 }
b0d623f7 586
3e170ce0
A
587 /* get the protected object to be decrypted */
588 protected_object = VME_OBJECT(map_entry);
589 if (protected_object == VM_OBJECT_NULL) {
590 /* there should be a VM object here at this point */
591 kr = KERN_INVALID_ARGUMENT;
592 goto done;
593 }
594 /* ensure protected object stays alive while map is unlocked */
595 vm_object_reference(protected_object);
596
597 /* limit the map entry to the area we want to cover */
598 vm_map_clip_start(map, map_entry, start_aligned);
599 vm_map_clip_end(map, map_entry, end_aligned);
600
601 tmp_entry = *map_entry;
602 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
603 vm_map_unlock(map);
604 map_locked = FALSE;
605
606 /*
607 * This map entry might be only partially encrypted
608 * (if not fully "page-aligned").
609 */
610 crypto_start = 0;
611 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
612 if (tmp_entry.vme_start < start) {
613 if (tmp_entry.vme_start != start_aligned) {
614 kr = KERN_INVALID_ADDRESS;
615 }
616 crypto_start += (start - tmp_entry.vme_start);
617 }
618 if (tmp_entry.vme_end > end) {
619 if (tmp_entry.vme_end != end_aligned) {
620 kr = KERN_INVALID_ADDRESS;
621 }
622 crypto_end -= (tmp_entry.vme_end - end);
623 }
624
625 /*
626 * This "extra backing offset" is needed to get the decryption
627 * routine to use the right key. It adjusts for the possibly
628 * relative offset of an interposed "4K" pager...
629 */
630 if (crypto_backing_offset == (vm_object_offset_t) -1) {
631 crypto_backing_offset = VME_OFFSET(&tmp_entry);
632 }
0c530ab8 633
3e170ce0
A
634 /*
635 * Lookup (and create if necessary) the protected memory object
636 * matching that VM object.
637 * If successful, this also grabs a reference on the memory object,
638 * to guarantee that it doesn't go away before we get a chance to map
639 * it.
640 */
641 unprotected_mem_obj = apple_protect_pager_setup(
642 protected_object,
643 VME_OFFSET(&tmp_entry),
644 crypto_backing_offset,
645 crypt_info,
646 crypto_start,
647 crypto_end);
648
649 /* release extra ref on protected object */
650 vm_object_deallocate(protected_object);
651
652 if (unprotected_mem_obj == NULL) {
653 kr = KERN_FAILURE;
654 goto done;
655 }
656
657 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
658
659 /* map this memory object in place of the current one */
660 map_addr = tmp_entry.vme_start;
661 kr = vm_map_enter_mem_object(map,
662 &map_addr,
663 (tmp_entry.vme_end -
664 tmp_entry.vme_start),
665 (mach_vm_offset_t) 0,
666 vm_flags,
667 (ipc_port_t) unprotected_mem_obj,
668 0,
669 TRUE,
670 tmp_entry.protection,
671 tmp_entry.max_protection,
672 tmp_entry.inheritance);
673 assert(kr == KERN_SUCCESS);
674 assert(map_addr == tmp_entry.vme_start);
675
676#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
677 if (vm_map_debug_apple_protect) {
678 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
679 " backing:[object:%p,offset:0x%llx,"
680 "crypto_backing_offset:0x%llx,"
681 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
682 map,
683 (uint64_t) map_addr,
684 (uint64_t) (map_addr + (tmp_entry.vme_end -
685 tmp_entry.vme_start)),
686 unprotected_mem_obj,
687 protected_object,
688 VME_OFFSET(&tmp_entry),
689 crypto_backing_offset,
690 crypto_start,
691 crypto_end);
692 }
3e170ce0
A
693#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
694
695 /*
696 * Release the reference obtained by
697 * apple_protect_pager_setup().
698 * The mapping (if it succeeded) is now holding a reference on
699 * the memory object.
700 */
701 memory_object_deallocate(unprotected_mem_obj);
702 unprotected_mem_obj = MEMORY_OBJECT_NULL;
703
704 /* continue with next map entry */
705 crypto_backing_offset += (tmp_entry.vme_end -
706 tmp_entry.vme_start);
707 crypto_backing_offset -= crypto_start;
708 }
709 kr = KERN_SUCCESS;
0c530ab8
A
710
711done:
712 if (map_locked) {
3e170ce0 713 vm_map_unlock(map);
0c530ab8
A
714 }
715 return kr;
716}
593a1d5f 717#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
718
719
b0d623f7
A
720lck_grp_t vm_map_lck_grp;
721lck_grp_attr_t vm_map_lck_grp_attr;
722lck_attr_t vm_map_lck_attr;
fe8ab488 723lck_attr_t vm_map_lck_rw_attr;
b0d623f7
A
724
725
593a1d5f
A
726/*
727 * vm_map_init:
728 *
729 * Initialize the vm_map module. Must be called before
730 * any other vm_map routines.
731 *
732 * Map and entry structures are allocated from zones -- we must
733 * initialize those zones.
734 *
735 * There are three zones of interest:
736 *
737 * vm_map_zone: used to allocate maps.
738 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 739 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
740 *
741 * The kernel allocates map entries from a special zone that is initially
742 * "crammed" with memory. It would be difficult (perhaps impossible) for
743 * the kernel to allocate more memory to a entry zone when it became
744 * empty since the very act of allocating memory implies the creation
745 * of a new entry.
746 */
1c79356b
A
747void
748vm_map_init(
749 void)
750{
7ddcb079 751 vm_size_t entry_zone_alloc_size;
316670eb
A
752 const char *mez_name = "VM map entries";
753
2d21ac55
A
754 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
755 PAGE_SIZE, "maps");
0b4c1975 756 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
757#if defined(__LP64__)
758 entry_zone_alloc_size = PAGE_SIZE * 5;
759#else
760 entry_zone_alloc_size = PAGE_SIZE * 6;
761#endif
91447636 762 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 763 1024*1024, entry_zone_alloc_size,
316670eb 764 mez_name);
0b4c1975 765 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 766 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 767 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 768
7ddcb079
A
769 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
770 kentry_data_size * 64, kentry_data_size,
771 "Reserved VM map entries");
772 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
1c79356b 773
91447636 774 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 775 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 776 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 777
3e170ce0
A
778 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
779 16*1024, PAGE_SIZE, "VM map holes");
780 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
781
1c79356b
A
782 /*
783 * Cram the map and kentry zones with initial data.
7ddcb079 784 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
785 */
786 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 787 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
7ddcb079
A
788
789 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
790 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
791 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
792 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
793 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 794 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 795 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 796
3e170ce0
A
797 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
798 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
799 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
800 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
801 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
802 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
803
804 /*
805 * Add the stolen memory to zones, adjust zone size and stolen counts.
806 */
7ddcb079
A
807 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
808 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
3e170ce0
A
809 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
810 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
811
b0d623f7
A
812 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
813 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
814 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 815
fe8ab488
A
816 lck_attr_setdefault(&vm_map_lck_rw_attr);
817 lck_attr_cleardebug(&vm_map_lck_rw_attr);
818
39037602
A
819#if VM_MAP_DEBUG_APPLE_PROTECT
820 PE_parse_boot_argn("vm_map_debug_apple_protect",
821 &vm_map_debug_apple_protect,
822 sizeof(vm_map_debug_apple_protect));
823#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
824#if VM_MAP_DEBUG_APPLE_FOURK
825 PE_parse_boot_argn("vm_map_debug_fourk",
826 &vm_map_debug_fourk,
827 sizeof(vm_map_debug_fourk));
828#endif /* VM_MAP_DEBUG_FOURK */
1c79356b
A
829}
830
831void
832vm_map_steal_memory(
833 void)
834{
7ddcb079
A
835 uint32_t kentry_initial_pages;
836
b0d623f7 837 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
838 map_data = pmap_steal_memory(map_data_size);
839
1c79356b 840 /*
7ddcb079
A
841 * kentry_initial_pages corresponds to the number of kernel map entries
842 * required during bootstrap until the asynchronous replenishment
843 * scheme is activated and/or entries are available from the general
844 * map entry pool.
1c79356b 845 */
7ddcb079
A
846#if defined(__LP64__)
847 kentry_initial_pages = 10;
848#else
849 kentry_initial_pages = 6;
1c79356b 850#endif
316670eb
A
851
852#if CONFIG_GZALLOC
853 /* If using the guard allocator, reserve more memory for the kernel
854 * reserved map entry pool.
855 */
856 if (gzalloc_enabled())
857 kentry_initial_pages *= 1024;
858#endif
859
7ddcb079 860 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 861 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
862
863 map_holes_data_size = kentry_data_size;
864 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
865}
866
3e170ce0
A
867void
868vm_kernel_reserved_entry_init(void) {
7ddcb079 869 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
3e170ce0
A
870 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
871}
872
873void
874vm_map_disable_hole_optimization(vm_map_t map)
875{
876 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
877
878 if (map->holelistenabled) {
879
880 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
881
882 while (hole_entry != NULL) {
883
884 next_hole_entry = hole_entry->vme_next;
885
886 hole_entry->vme_next = NULL;
887 hole_entry->vme_prev = NULL;
888 zfree(vm_map_holes_zone, hole_entry);
889
890 if (next_hole_entry == head_entry) {
891 hole_entry = NULL;
892 } else {
893 hole_entry = next_hole_entry;
894 }
895 }
896
897 map->holes_list = NULL;
898 map->holelistenabled = FALSE;
899
900 map->first_free = vm_map_first_entry(map);
901 SAVE_HINT_HOLE_WRITE(map, NULL);
902 }
903}
904
905boolean_t
906vm_kernel_map_is_kernel(vm_map_t map) {
907 return (map->pmap == kernel_pmap);
7ddcb079
A
908}
909
1c79356b
A
910/*
911 * vm_map_create:
912 *
913 * Creates and returns a new empty VM map with
914 * the given physical map structure, and having
915 * the given lower and upper address bounds.
916 */
3e170ce0
A
917
918boolean_t vm_map_supports_hole_optimization = TRUE;
919
1c79356b
A
920vm_map_t
921vm_map_create(
91447636
A
922 pmap_t pmap,
923 vm_map_offset_t min,
924 vm_map_offset_t max,
925 boolean_t pageable)
1c79356b 926{
2d21ac55 927 static int color_seed = 0;
39037602 928 vm_map_t result;
3e170ce0 929 struct vm_map_links *hole_entry = NULL;
1c79356b
A
930
931 result = (vm_map_t) zalloc(vm_map_zone);
932 if (result == VM_MAP_NULL)
933 panic("vm_map_create");
934
935 vm_map_first_entry(result) = vm_map_to_entry(result);
936 vm_map_last_entry(result) = vm_map_to_entry(result);
937 result->hdr.nentries = 0;
938 result->hdr.entries_pageable = pageable;
939
6d2010ae
A
940 vm_map_store_init( &(result->hdr) );
941
39236c6e
A
942 result->hdr.page_shift = PAGE_SHIFT;
943
1c79356b 944 result->size = 0;
2d21ac55
A
945 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
946 result->user_wire_size = 0;
1c79356b
A
947 result->ref_count = 1;
948#if TASK_SWAPPER
949 result->res_count = 1;
950 result->sw_state = MAP_SW_IN;
951#endif /* TASK_SWAPPER */
952 result->pmap = pmap;
953 result->min_offset = min;
954 result->max_offset = max;
955 result->wiring_required = FALSE;
956 result->no_zero_fill = FALSE;
316670eb 957 result->mapped_in_other_pmaps = FALSE;
1c79356b 958 result->wait_for_space = FALSE;
b0d623f7 959 result->switch_protect = FALSE;
6d2010ae
A
960 result->disable_vmentry_reuse = FALSE;
961 result->map_disallow_data_exec = FALSE;
39037602 962 result->is_nested_map = FALSE;
6d2010ae 963 result->highest_entry_end = 0;
1c79356b
A
964 result->first_free = vm_map_to_entry(result);
965 result->hint = vm_map_to_entry(result);
2d21ac55 966 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae 967 result->jit_entry_exists = FALSE;
3e170ce0
A
968
969 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
970 hole_entry = zalloc(vm_map_holes_zone);
971
972 hole_entry->start = min;
973 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
974 result->holes_list = result->hole_hint = hole_entry;
975 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
976 result->holelistenabled = TRUE;
977
978 } else {
979
980 result->holelistenabled = FALSE;
981 }
982
1c79356b 983 vm_map_lock_init(result);
b0d623f7
A
984 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
985
1c79356b
A
986 return(result);
987}
988
989/*
990 * vm_map_entry_create: [ internal use only ]
991 *
992 * Allocates a VM map entry for insertion in the
993 * given map (or map copy). No fields are filled.
994 */
7ddcb079 995#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 996
7ddcb079
A
997#define vm_map_copy_entry_create(copy, map_locked) \
998 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
999unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1000
91447636 1001static vm_map_entry_t
1c79356b 1002_vm_map_entry_create(
7ddcb079 1003 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1004{
7ddcb079
A
1005 zone_t zone;
1006 vm_map_entry_t entry;
1c79356b 1007
7ddcb079
A
1008 zone = vm_map_entry_zone;
1009
1010 assert(map_header->entries_pageable ? !map_locked : TRUE);
1011
1012 if (map_header->entries_pageable) {
1013 entry = (vm_map_entry_t) zalloc(zone);
1014 }
1015 else {
1016 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1017
1018 if (entry == VM_MAP_ENTRY_NULL) {
1019 zone = vm_map_entry_reserved_zone;
1020 entry = (vm_map_entry_t) zalloc(zone);
1021 OSAddAtomic(1, &reserved_zalloc_count);
1022 } else
1023 OSAddAtomic(1, &nonreserved_zalloc_count);
1024 }
1c79356b 1025
1c79356b
A
1026 if (entry == VM_MAP_ENTRY_NULL)
1027 panic("vm_map_entry_create");
7ddcb079
A
1028 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1029
6d2010ae 1030 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 1031#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1032 entry->vme_creation_maphdr = map_header;
39037602
A
1033 backtrace(&entry->vme_creation_bt[0],
1034 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 1035#endif
1c79356b
A
1036 return(entry);
1037}
1038
1039/*
1040 * vm_map_entry_dispose: [ internal use only ]
1041 *
1042 * Inverse of vm_map_entry_create.
2d21ac55
A
1043 *
1044 * write map lock held so no need to
1045 * do anything special to insure correctness
1046 * of the stores
1c79356b
A
1047 */
1048#define vm_map_entry_dispose(map, entry) \
6d2010ae 1049 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
1050
1051#define vm_map_copy_entry_dispose(map, entry) \
1052 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1053
91447636 1054static void
1c79356b 1055_vm_map_entry_dispose(
39037602
A
1056 struct vm_map_header *map_header,
1057 vm_map_entry_t entry)
1c79356b 1058{
39037602 1059 zone_t zone;
1c79356b 1060
7ddcb079 1061 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 1062 zone = vm_map_entry_zone;
1c79356b 1063 else
7ddcb079
A
1064 zone = vm_map_entry_reserved_zone;
1065
1066 if (!map_header->entries_pageable) {
1067 if (zone == vm_map_entry_zone)
1068 OSAddAtomic(-1, &nonreserved_zalloc_count);
1069 else
1070 OSAddAtomic(-1, &reserved_zalloc_count);
1071 }
1c79356b 1072
91447636 1073 zfree(zone, entry);
1c79356b
A
1074}
1075
91447636 1076#if MACH_ASSERT
91447636 1077static boolean_t first_free_check = FALSE;
6d2010ae 1078boolean_t
1c79356b
A
1079first_free_is_valid(
1080 vm_map_t map)
1081{
1c79356b
A
1082 if (!first_free_check)
1083 return TRUE;
2d21ac55 1084
6d2010ae 1085 return( first_free_is_valid_store( map ));
1c79356b 1086}
91447636 1087#endif /* MACH_ASSERT */
1c79356b 1088
1c79356b
A
1089
1090#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1091 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
1092
1093#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1094 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1095
1c79356b 1096#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1097/*
1098 * vm_map_res_reference:
1099 *
1100 * Adds another valid residence count to the given map.
1101 *
1102 * Map is locked so this function can be called from
1103 * vm_map_swapin.
1104 *
1105 */
39037602 1106void vm_map_res_reference(vm_map_t map)
1c79356b
A
1107{
1108 /* assert map is locked */
1109 assert(map->res_count >= 0);
1110 assert(map->ref_count >= map->res_count);
1111 if (map->res_count == 0) {
b0d623f7 1112 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1113 vm_map_lock(map);
1114 vm_map_swapin(map);
b0d623f7 1115 lck_mtx_lock(&map->s_lock);
1c79356b
A
1116 ++map->res_count;
1117 vm_map_unlock(map);
1118 } else
1119 ++map->res_count;
1120}
1121
1122/*
1123 * vm_map_reference_swap:
1124 *
1125 * Adds valid reference and residence counts to the given map.
1126 *
1127 * The map may not be in memory (i.e. zero residence count).
1128 *
1129 */
39037602 1130void vm_map_reference_swap(vm_map_t map)
1c79356b
A
1131{
1132 assert(map != VM_MAP_NULL);
b0d623f7 1133 lck_mtx_lock(&map->s_lock);
1c79356b
A
1134 assert(map->res_count >= 0);
1135 assert(map->ref_count >= map->res_count);
1136 map->ref_count++;
1137 vm_map_res_reference(map);
b0d623f7 1138 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1139}
1140
1141/*
1142 * vm_map_res_deallocate:
1143 *
1144 * Decrement residence count on a map; possibly causing swapout.
1145 *
1146 * The map must be in memory (i.e. non-zero residence count).
1147 *
1148 * The map is locked, so this function is callable from vm_map_deallocate.
1149 *
1150 */
39037602 1151void vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1152{
1153 assert(map->res_count > 0);
1154 if (--map->res_count == 0) {
b0d623f7 1155 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1156 vm_map_lock(map);
1157 vm_map_swapout(map);
1158 vm_map_unlock(map);
b0d623f7 1159 lck_mtx_lock(&map->s_lock);
1c79356b
A
1160 }
1161 assert(map->ref_count >= map->res_count);
1162}
1163#endif /* MACH_ASSERT && TASK_SWAPPER */
1164
1c79356b
A
1165/*
1166 * vm_map_destroy:
1167 *
1168 * Actually destroy a map.
1169 */
1170void
1171vm_map_destroy(
2d21ac55
A
1172 vm_map_t map,
1173 int flags)
91447636 1174{
1c79356b 1175 vm_map_lock(map);
2d21ac55 1176
3e170ce0
A
1177 /* final cleanup: no need to unnest shared region */
1178 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1179
2d21ac55
A
1180 /* clean up regular map entries */
1181 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1182 flags, VM_MAP_NULL);
1183 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
1184 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1185 flags, VM_MAP_NULL);
6d2010ae 1186
3e170ce0 1187 vm_map_disable_hole_optimization(map);
1c79356b
A
1188 vm_map_unlock(map);
1189
2d21ac55
A
1190 assert(map->hdr.nentries == 0);
1191
55e303ae
A
1192 if(map->pmap)
1193 pmap_destroy(map->pmap);
1c79356b 1194
39037602
A
1195 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1196 /*
1197 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1198 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1199 * structure or kalloc'ed via lck_mtx_init.
1200 * An example is s_lock_ext within struct _vm_map.
1201 *
1202 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1203 * can add another tag to detect embedded vs alloc'ed indirect external
1204 * mutexes but that'll be additional checks in the lock path and require
1205 * updating dependencies for the old vs new tag.
1206 *
1207 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1208 * just when lock debugging is ON, we choose to forego explicitly destroying
1209 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1210 * count on vm_map_lck_grp, which has no serious side-effect.
1211 */
1212 } else {
1213 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1214 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1215 }
1216
91447636 1217 zfree(vm_map_zone, map);
1c79356b
A
1218}
1219
1220#if TASK_SWAPPER
1221/*
1222 * vm_map_swapin/vm_map_swapout
1223 *
1224 * Swap a map in and out, either referencing or releasing its resources.
1225 * These functions are internal use only; however, they must be exported
1226 * because they may be called from macros, which are exported.
1227 *
1228 * In the case of swapout, there could be races on the residence count,
1229 * so if the residence count is up, we return, assuming that a
1230 * vm_map_deallocate() call in the near future will bring us back.
1231 *
1232 * Locking:
1233 * -- We use the map write lock for synchronization among races.
1234 * -- The map write lock, and not the simple s_lock, protects the
1235 * swap state of the map.
1236 * -- If a map entry is a share map, then we hold both locks, in
1237 * hierarchical order.
1238 *
1239 * Synchronization Notes:
1240 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1241 * will block on the map lock and proceed when swapout is through.
1242 * 2) A vm_map_reference() call at this time is illegal, and will
1243 * cause a panic. vm_map_reference() is only allowed on resident
1244 * maps, since it refuses to block.
1245 * 3) A vm_map_swapin() call during a swapin will block, and
1246 * proceeed when the first swapin is done, turning into a nop.
1247 * This is the reason the res_count is not incremented until
1248 * after the swapin is complete.
1249 * 4) There is a timing hole after the checks of the res_count, before
1250 * the map lock is taken, during which a swapin may get the lock
1251 * before a swapout about to happen. If this happens, the swapin
1252 * will detect the state and increment the reference count, causing
1253 * the swapout to be a nop, thereby delaying it until a later
1254 * vm_map_deallocate. If the swapout gets the lock first, then
1255 * the swapin will simply block until the swapout is done, and
1256 * then proceed.
1257 *
1258 * Because vm_map_swapin() is potentially an expensive operation, it
1259 * should be used with caution.
1260 *
1261 * Invariants:
1262 * 1) A map with a residence count of zero is either swapped, or
1263 * being swapped.
1264 * 2) A map with a non-zero residence count is either resident,
1265 * or being swapped in.
1266 */
1267
1268int vm_map_swap_enable = 1;
1269
1270void vm_map_swapin (vm_map_t map)
1271{
39037602 1272 vm_map_entry_t entry;
2d21ac55 1273
1c79356b
A
1274 if (!vm_map_swap_enable) /* debug */
1275 return;
1276
1277 /*
1278 * Map is locked
1279 * First deal with various races.
1280 */
1281 if (map->sw_state == MAP_SW_IN)
1282 /*
1283 * we raced with swapout and won. Returning will incr.
1284 * the res_count, turning the swapout into a nop.
1285 */
1286 return;
1287
1288 /*
1289 * The residence count must be zero. If we raced with another
1290 * swapin, the state would have been IN; if we raced with a
1291 * swapout (after another competing swapin), we must have lost
1292 * the race to get here (see above comment), in which case
1293 * res_count is still 0.
1294 */
1295 assert(map->res_count == 0);
1296
1297 /*
1298 * There are no intermediate states of a map going out or
1299 * coming in, since the map is locked during the transition.
1300 */
1301 assert(map->sw_state == MAP_SW_OUT);
1302
1303 /*
1304 * We now operate upon each map entry. If the entry is a sub-
1305 * or share-map, we call vm_map_res_reference upon it.
1306 * If the entry is an object, we call vm_object_res_reference
1307 * (this may iterate through the shadow chain).
1308 * Note that we hold the map locked the entire time,
1309 * even if we get back here via a recursive call in
1310 * vm_map_res_reference.
1311 */
1312 entry = vm_map_first_entry(map);
1313
1314 while (entry != vm_map_to_entry(map)) {
3e170ce0 1315 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1316 if (entry->is_sub_map) {
3e170ce0 1317 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1318 lck_mtx_lock(&lmap->s_lock);
1c79356b 1319 vm_map_res_reference(lmap);
b0d623f7 1320 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1321 } else {
3e170ce0 1322 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1323 vm_object_lock(object);
1324 /*
1325 * This call may iterate through the
1326 * shadow chain.
1327 */
1328 vm_object_res_reference(object);
1329 vm_object_unlock(object);
1330 }
1331 }
1332 entry = entry->vme_next;
1333 }
1334 assert(map->sw_state == MAP_SW_OUT);
1335 map->sw_state = MAP_SW_IN;
1336}
1337
1338void vm_map_swapout(vm_map_t map)
1339{
39037602 1340 vm_map_entry_t entry;
1c79356b
A
1341
1342 /*
1343 * Map is locked
1344 * First deal with various races.
1345 * If we raced with a swapin and lost, the residence count
1346 * will have been incremented to 1, and we simply return.
1347 */
b0d623f7 1348 lck_mtx_lock(&map->s_lock);
1c79356b 1349 if (map->res_count != 0) {
b0d623f7 1350 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1351 return;
1352 }
b0d623f7 1353 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1354
1355 /*
1356 * There are no intermediate states of a map going out or
1357 * coming in, since the map is locked during the transition.
1358 */
1359 assert(map->sw_state == MAP_SW_IN);
1360
1361 if (!vm_map_swap_enable)
1362 return;
1363
1364 /*
1365 * We now operate upon each map entry. If the entry is a sub-
1366 * or share-map, we call vm_map_res_deallocate upon it.
1367 * If the entry is an object, we call vm_object_res_deallocate
1368 * (this may iterate through the shadow chain).
1369 * Note that we hold the map locked the entire time,
1370 * even if we get back here via a recursive call in
1371 * vm_map_res_deallocate.
1372 */
1373 entry = vm_map_first_entry(map);
1374
1375 while (entry != vm_map_to_entry(map)) {
3e170ce0 1376 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1377 if (entry->is_sub_map) {
3e170ce0 1378 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1379 lck_mtx_lock(&lmap->s_lock);
1c79356b 1380 vm_map_res_deallocate(lmap);
b0d623f7 1381 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1382 } else {
3e170ce0 1383 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1384 vm_object_lock(object);
1385 /*
1386 * This call may take a long time,
1387 * since it could actively push
1388 * out pages (if we implement it
1389 * that way).
1390 */
1391 vm_object_res_deallocate(object);
1392 vm_object_unlock(object);
1393 }
1394 }
1395 entry = entry->vme_next;
1396 }
1397 assert(map->sw_state == MAP_SW_IN);
1398 map->sw_state = MAP_SW_OUT;
1399}
1400
1401#endif /* TASK_SWAPPER */
1402
1c79356b
A
1403/*
1404 * vm_map_lookup_entry: [ internal use only ]
1405 *
6d2010ae
A
1406 * Calls into the vm map store layer to find the map
1407 * entry containing (or immediately preceding) the
1408 * specified address in the given map; the entry is returned
1c79356b
A
1409 * in the "entry" parameter. The boolean
1410 * result indicates whether the address is
1411 * actually contained in the map.
1412 */
1413boolean_t
1414vm_map_lookup_entry(
39037602
A
1415 vm_map_t map,
1416 vm_map_offset_t address,
1c79356b
A
1417 vm_map_entry_t *entry) /* OUT */
1418{
6d2010ae 1419 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1420}
1421
1422/*
1423 * Routine: vm_map_find_space
1424 * Purpose:
1425 * Allocate a range in the specified virtual address map,
1426 * returning the entry allocated for that range.
1427 * Used by kmem_alloc, etc.
1428 *
1429 * The map must be NOT be locked. It will be returned locked
1430 * on KERN_SUCCESS, unlocked on failure.
1431 *
1432 * If an entry is allocated, the object/offset fields
1433 * are initialized to zero.
1434 */
1435kern_return_t
1436vm_map_find_space(
39037602 1437 vm_map_t map,
91447636
A
1438 vm_map_offset_t *address, /* OUT */
1439 vm_map_size_t size,
1440 vm_map_offset_t mask,
0c530ab8 1441 int flags,
1c79356b
A
1442 vm_map_entry_t *o_entry) /* OUT */
1443{
3e170ce0 1444 vm_map_entry_t entry, new_entry;
39037602
A
1445 vm_map_offset_t start;
1446 vm_map_offset_t end;
3e170ce0 1447 vm_map_entry_t hole_entry;
91447636
A
1448
1449 if (size == 0) {
1450 *address = 0;
1451 return KERN_INVALID_ARGUMENT;
1452 }
1c79356b 1453
2d21ac55
A
1454 if (flags & VM_FLAGS_GUARD_AFTER) {
1455 /* account for the back guard page in the size */
39236c6e 1456 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1457 }
1458
7ddcb079 1459 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1460
1461 /*
1462 * Look for the first possible address; if there's already
1463 * something at this address, we have to start after it.
1464 */
1465
1466 vm_map_lock(map);
1467
6d2010ae
A
1468 if( map->disable_vmentry_reuse == TRUE) {
1469 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1470 } else {
3e170ce0
A
1471 if (map->holelistenabled) {
1472 hole_entry = (vm_map_entry_t)map->holes_list;
1473
1474 if (hole_entry == NULL) {
1475 /*
1476 * No more space in the map?
1477 */
1478 vm_map_entry_dispose(map, new_entry);
1479 vm_map_unlock(map);
1480 return(KERN_NO_SPACE);
1481 }
1482
1483 entry = hole_entry;
1484 start = entry->vme_start;
1485 } else {
1486 assert(first_free_is_valid(map));
1487 if ((entry = map->first_free) == vm_map_to_entry(map))
1488 start = map->min_offset;
1489 else
1490 start = entry->vme_end;
1491 }
6d2010ae 1492 }
1c79356b
A
1493
1494 /*
1495 * In any case, the "entry" always precedes
1496 * the proposed new region throughout the loop:
1497 */
1498
1499 while (TRUE) {
39037602 1500 vm_map_entry_t next;
1c79356b
A
1501
1502 /*
1503 * Find the end of the proposed new region.
1504 * Be sure we didn't go beyond the end, or
1505 * wrap around the address.
1506 */
1507
2d21ac55
A
1508 if (flags & VM_FLAGS_GUARD_BEFORE) {
1509 /* reserve space for the front guard page */
39236c6e 1510 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1511 }
1c79356b 1512 end = ((start + mask) & ~mask);
2d21ac55 1513
1c79356b
A
1514 if (end < start) {
1515 vm_map_entry_dispose(map, new_entry);
1516 vm_map_unlock(map);
1517 return(KERN_NO_SPACE);
1518 }
1519 start = end;
1520 end += size;
1521
1522 if ((end > map->max_offset) || (end < start)) {
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527
1c79356b 1528 next = entry->vme_next;
1c79356b 1529
3e170ce0
A
1530 if (map->holelistenabled) {
1531 if (entry->vme_end >= end)
1532 break;
1533 } else {
1534 /*
1535 * If there are no more entries, we must win.
1536 *
1537 * OR
1538 *
1539 * If there is another entry, it must be
1540 * after the end of the potential new region.
1541 */
1c79356b 1542
3e170ce0
A
1543 if (next == vm_map_to_entry(map))
1544 break;
1545
1546 if (next->vme_start >= end)
1547 break;
1548 }
1c79356b
A
1549
1550 /*
1551 * Didn't fit -- move to the next entry.
1552 */
1553
1554 entry = next;
3e170ce0
A
1555
1556 if (map->holelistenabled) {
1557 if (entry == (vm_map_entry_t) map->holes_list) {
1558 /*
1559 * Wrapped around
1560 */
1561 vm_map_entry_dispose(map, new_entry);
1562 vm_map_unlock(map);
1563 return(KERN_NO_SPACE);
1564 }
1565 start = entry->vme_start;
1566 } else {
1567 start = entry->vme_end;
1568 }
1569 }
1570
1571 if (map->holelistenabled) {
1572 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1573 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1574 }
1c79356b
A
1575 }
1576
1577 /*
1578 * At this point,
1579 * "start" and "end" should define the endpoints of the
1580 * available new range, and
1581 * "entry" should refer to the region before the new
1582 * range, and
1583 *
1584 * the map should be locked.
1585 */
1586
2d21ac55
A
1587 if (flags & VM_FLAGS_GUARD_BEFORE) {
1588 /* go back for the front guard page */
39236c6e 1589 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1590 }
1c79356b
A
1591 *address = start;
1592
e2d2fc5c 1593 assert(start < end);
1c79356b
A
1594 new_entry->vme_start = start;
1595 new_entry->vme_end = end;
1596 assert(page_aligned(new_entry->vme_start));
1597 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1598 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1599 VM_MAP_PAGE_MASK(map)));
1600 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1601 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1602
1603 new_entry->is_shared = FALSE;
1604 new_entry->is_sub_map = FALSE;
fe8ab488 1605 new_entry->use_pmap = TRUE;
3e170ce0
A
1606 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1607 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1608
1609 new_entry->needs_copy = FALSE;
1610
1611 new_entry->inheritance = VM_INHERIT_DEFAULT;
1612 new_entry->protection = VM_PROT_DEFAULT;
1613 new_entry->max_protection = VM_PROT_ALL;
1614 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1615 new_entry->wired_count = 0;
1616 new_entry->user_wired_count = 0;
1617
1618 new_entry->in_transition = FALSE;
1619 new_entry->needs_wakeup = FALSE;
2d21ac55 1620 new_entry->no_cache = FALSE;
b0d623f7 1621 new_entry->permanent = FALSE;
39236c6e
A
1622 new_entry->superpage_size = FALSE;
1623 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1624 new_entry->map_aligned = TRUE;
1625 } else {
1626 new_entry->map_aligned = FALSE;
1627 }
2d21ac55 1628
3e170ce0 1629 new_entry->used_for_jit = FALSE;
b0d623f7 1630 new_entry->zero_wired_pages = FALSE;
fe8ab488 1631 new_entry->iokit_acct = FALSE;
3e170ce0
A
1632 new_entry->vme_resilient_codesign = FALSE;
1633 new_entry->vme_resilient_media = FALSE;
39037602
A
1634 if (flags & VM_FLAGS_ATOMIC_ENTRY)
1635 new_entry->vme_atomic = TRUE;
1636 else
1637 new_entry->vme_atomic = FALSE;
1c79356b 1638
3e170ce0
A
1639 int alias;
1640 VM_GET_FLAGS_ALIAS(flags, alias);
1641 VME_ALIAS_SET(new_entry, alias);
0c530ab8 1642
1c79356b
A
1643 /*
1644 * Insert the new entry into the list
1645 */
1646
6d2010ae 1647 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1648
1649 map->size += size;
1650
1651 /*
1652 * Update the lookup hint
1653 */
0c530ab8 1654 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1655
1656 *o_entry = new_entry;
1657 return(KERN_SUCCESS);
1658}
1659
1660int vm_map_pmap_enter_print = FALSE;
1661int vm_map_pmap_enter_enable = FALSE;
1662
1663/*
91447636 1664 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1665 *
1666 * Description:
1667 * Force pages from the specified object to be entered into
1668 * the pmap at the specified address if they are present.
1669 * As soon as a page not found in the object the scan ends.
1670 *
1671 * Returns:
1672 * Nothing.
1673 *
1674 * In/out conditions:
1675 * The source map should not be locked on entry.
1676 */
fe8ab488 1677__unused static void
1c79356b
A
1678vm_map_pmap_enter(
1679 vm_map_t map,
39037602
A
1680 vm_map_offset_t addr,
1681 vm_map_offset_t end_addr,
1682 vm_object_t object,
1c79356b
A
1683 vm_object_offset_t offset,
1684 vm_prot_t protection)
1685{
2d21ac55
A
1686 int type_of_fault;
1687 kern_return_t kr;
0b4e3aa0 1688
55e303ae
A
1689 if(map->pmap == 0)
1690 return;
1691
1c79356b 1692 while (addr < end_addr) {
39037602 1693 vm_page_t m;
1c79356b 1694
fe8ab488
A
1695
1696 /*
1697 * TODO:
1698 * From vm_map_enter(), we come into this function without the map
1699 * lock held or the object lock held.
1700 * We haven't taken a reference on the object either.
1701 * We should do a proper lookup on the map to make sure
1702 * that things are sane before we go locking objects that
1703 * could have been deallocated from under us.
1704 */
1705
1c79356b 1706 vm_object_lock(object);
1c79356b
A
1707
1708 m = vm_page_lookup(object, offset);
91447636
A
1709 /*
1710 * ENCRYPTED SWAP:
1711 * The user should never see encrypted data, so do not
1712 * enter an encrypted page in the page table.
1713 */
1714 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1715 m->fictitious ||
1716 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1717 vm_object_unlock(object);
1718 return;
1719 }
1720
1c79356b
A
1721 if (vm_map_pmap_enter_print) {
1722 printf("vm_map_pmap_enter:");
2d21ac55
A
1723 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1724 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1725 }
2d21ac55 1726 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1727 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
fe8ab488
A
1728 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1729 0, /* XXX need user tag / alias? */
1730 0, /* alternate accounting? */
1731 NULL,
2d21ac55 1732 &type_of_fault);
1c79356b 1733
1c79356b
A
1734 vm_object_unlock(object);
1735
1736 offset += PAGE_SIZE_64;
1737 addr += PAGE_SIZE;
1738 }
1739}
1740
91447636
A
1741boolean_t vm_map_pmap_is_empty(
1742 vm_map_t map,
1743 vm_map_offset_t start,
1744 vm_map_offset_t end);
1745boolean_t vm_map_pmap_is_empty(
1746 vm_map_t map,
1747 vm_map_offset_t start,
1748 vm_map_offset_t end)
1749{
2d21ac55
A
1750#ifdef MACHINE_PMAP_IS_EMPTY
1751 return pmap_is_empty(map->pmap, start, end);
1752#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1753 vm_map_offset_t offset;
1754 ppnum_t phys_page;
1755
1756 if (map->pmap == NULL) {
1757 return TRUE;
1758 }
2d21ac55 1759
91447636
A
1760 for (offset = start;
1761 offset < end;
1762 offset += PAGE_SIZE) {
1763 phys_page = pmap_find_phys(map->pmap, offset);
1764 if (phys_page) {
1765 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1766 "page %d at 0x%llx\n",
2d21ac55
A
1767 map, (long long)start, (long long)end,
1768 phys_page, (long long)offset);
91447636
A
1769 return FALSE;
1770 }
1771 }
1772 return TRUE;
2d21ac55 1773#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1774}
1775
316670eb
A
1776#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1777kern_return_t
1778vm_map_random_address_for_size(
1779 vm_map_t map,
1780 vm_map_offset_t *address,
1781 vm_map_size_t size)
1782{
1783 kern_return_t kr = KERN_SUCCESS;
1784 int tries = 0;
1785 vm_map_offset_t random_addr = 0;
1786 vm_map_offset_t hole_end;
1787
1788 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1789 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1790 vm_map_size_t vm_hole_size = 0;
1791 vm_map_size_t addr_space_size;
1792
1793 addr_space_size = vm_map_max(map) - vm_map_min(map);
1794
1795 assert(page_aligned(size));
1796
1797 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1798 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1799 random_addr = vm_map_trunc_page(
1800 vm_map_min(map) +(random_addr % addr_space_size),
1801 VM_MAP_PAGE_MASK(map));
316670eb
A
1802
1803 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1804 if (prev_entry == vm_map_to_entry(map)) {
1805 next_entry = vm_map_first_entry(map);
1806 } else {
1807 next_entry = prev_entry->vme_next;
1808 }
1809 if (next_entry == vm_map_to_entry(map)) {
1810 hole_end = vm_map_max(map);
1811 } else {
1812 hole_end = next_entry->vme_start;
1813 }
1814 vm_hole_size = hole_end - random_addr;
1815 if (vm_hole_size >= size) {
1816 *address = random_addr;
1817 break;
1818 }
1819 }
1820 tries++;
1821 }
1822
1823 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1824 kr = KERN_NO_SPACE;
1825 }
1826 return kr;
1827}
1828
1c79356b
A
1829/*
1830 * Routine: vm_map_enter
1831 *
1832 * Description:
1833 * Allocate a range in the specified virtual address map.
1834 * The resulting range will refer to memory defined by
1835 * the given memory object and offset into that object.
1836 *
1837 * Arguments are as defined in the vm_map call.
1838 */
91447636
A
1839int _map_enter_debug = 0;
1840static unsigned int vm_map_enter_restore_successes = 0;
1841static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1842kern_return_t
1843vm_map_enter(
91447636 1844 vm_map_t map,
593a1d5f 1845 vm_map_offset_t *address, /* IN/OUT */
91447636 1846 vm_map_size_t size,
593a1d5f 1847 vm_map_offset_t mask,
1c79356b
A
1848 int flags,
1849 vm_object_t object,
1850 vm_object_offset_t offset,
1851 boolean_t needs_copy,
1852 vm_prot_t cur_protection,
1853 vm_prot_t max_protection,
1854 vm_inherit_t inheritance)
1855{
91447636 1856 vm_map_entry_t entry, new_entry;
2d21ac55 1857 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1858 vm_map_offset_t end, tmp_end;
b0d623f7
A
1859 vm_map_offset_t tmp2_start, tmp2_end;
1860 vm_map_offset_t step;
1c79356b 1861 kern_return_t result = KERN_SUCCESS;
91447636
A
1862 vm_map_t zap_old_map = VM_MAP_NULL;
1863 vm_map_t zap_new_map = VM_MAP_NULL;
1864 boolean_t map_locked = FALSE;
1865 boolean_t pmap_empty = TRUE;
1866 boolean_t new_mapping_established = FALSE;
fe8ab488 1867 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
91447636
A
1868 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1869 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1870 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1871 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1872 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7 1873 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
316670eb 1874 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
fe8ab488 1875 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
3e170ce0
A
1876 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1877 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
39037602 1878 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
b0d623f7 1879 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3e170ce0 1880 vm_tag_t alias, user_alias;
2d21ac55 1881 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 1882 kern_return_t kr;
39236c6e 1883 boolean_t clear_map_aligned = FALSE;
3e170ce0 1884 vm_map_entry_t hole_entry;
593a1d5f 1885
b0d623f7
A
1886 if (superpage_size) {
1887 switch (superpage_size) {
1888 /*
1889 * Note that the current implementation only supports
1890 * a single size for superpages, SUPERPAGE_SIZE, per
1891 * architecture. As soon as more sizes are supposed
1892 * to be supported, SUPERPAGE_SIZE has to be replaced
1893 * with a lookup of the size depending on superpage_size.
1894 */
1895#ifdef __x86_64__
6d2010ae
A
1896 case SUPERPAGE_SIZE_ANY:
1897 /* handle it like 2 MB and round up to page size */
1898 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1899 case SUPERPAGE_SIZE_2MB:
1900 break;
1901#endif
1902 default:
1903 return KERN_INVALID_ARGUMENT;
1904 }
1905 mask = SUPERPAGE_SIZE-1;
1906 if (size & (SUPERPAGE_SIZE-1))
1907 return KERN_INVALID_ARGUMENT;
1908 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1909 }
1910
6d2010ae 1911
1c79356b 1912
3e170ce0
A
1913 if (resilient_codesign || resilient_media) {
1914 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1915 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1916 return KERN_PROTECTION_FAILURE;
1917 }
1918 }
1919
2d21ac55
A
1920 if (is_submap) {
1921 if (purgable) {
1922 /* submaps can not be purgeable */
1923 return KERN_INVALID_ARGUMENT;
1924 }
1925 if (object == VM_OBJECT_NULL) {
1926 /* submaps can not be created lazily */
1927 return KERN_INVALID_ARGUMENT;
1928 }
1929 }
1930 if (flags & VM_FLAGS_ALREADY) {
1931 /*
1932 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1933 * is already present. For it to be meaningul, the requested
1934 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1935 * we shouldn't try and remove what was mapped there first
1936 * (!VM_FLAGS_OVERWRITE).
1937 */
1938 if ((flags & VM_FLAGS_ANYWHERE) ||
1939 (flags & VM_FLAGS_OVERWRITE)) {
1940 return KERN_INVALID_ARGUMENT;
1941 }
1942 }
1943
6d2010ae 1944 effective_min_offset = map->min_offset;
b0d623f7 1945
2d21ac55
A
1946 if (flags & VM_FLAGS_BEYOND_MAX) {
1947 /*
b0d623f7 1948 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1949 */
1950 if (vm_map_is_64bit(map))
1951 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1952 else
1953 effective_max_offset = 0x00000000FFFFF000ULL;
1954 } else {
1955 effective_max_offset = map->max_offset;
1956 }
1957
1958 if (size == 0 ||
1959 (offset & PAGE_MASK_64) != 0) {
91447636
A
1960 *address = 0;
1961 return KERN_INVALID_ARGUMENT;
1962 }
1963
1c79356b 1964 VM_GET_FLAGS_ALIAS(flags, alias);
3e170ce0
A
1965 if (map->pmap == kernel_pmap) {
1966 user_alias = VM_KERN_MEMORY_NONE;
1967 } else {
1968 user_alias = alias;
1969 }
2d21ac55 1970
1c79356b
A
1971#define RETURN(value) { result = value; goto BailOut; }
1972
1973 assert(page_aligned(*address));
1974 assert(page_aligned(size));
91447636 1975
39236c6e
A
1976 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1977 /*
1978 * In most cases, the caller rounds the size up to the
1979 * map's page size.
1980 * If we get a size that is explicitly not map-aligned here,
1981 * we'll have to respect the caller's wish and mark the
1982 * mapping as "not map-aligned" to avoid tripping the
1983 * map alignment checks later.
1984 */
1985 clear_map_aligned = TRUE;
1986 }
fe8ab488
A
1987 if (!anywhere &&
1988 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1989 /*
1990 * We've been asked to map at a fixed address and that
1991 * address is not aligned to the map's specific alignment.
1992 * The caller should know what it's doing (i.e. most likely
1993 * mapping some fragmented copy map, transferring memory from
1994 * a VM map with a different alignment), so clear map_aligned
1995 * for this new VM map entry and proceed.
1996 */
1997 clear_map_aligned = TRUE;
1998 }
39236c6e 1999
91447636
A
2000 /*
2001 * Only zero-fill objects are allowed to be purgable.
2002 * LP64todo - limit purgable objects to 32-bits for now
2003 */
2004 if (purgable &&
2005 (offset != 0 ||
2006 (object != VM_OBJECT_NULL &&
6d2010ae 2007 (object->vo_size != size ||
2d21ac55 2008 object->purgable == VM_PURGABLE_DENY))
b0d623f7 2009 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
2010 return KERN_INVALID_ARGUMENT;
2011
2012 if (!anywhere && overwrite) {
2013 /*
2014 * Create a temporary VM map to hold the old mappings in the
2015 * affected area while we create the new one.
2016 * This avoids releasing the VM map lock in
2017 * vm_map_entry_delete() and allows atomicity
2018 * when we want to replace some mappings with a new one.
2019 * It also allows us to restore the old VM mappings if the
2020 * new mapping fails.
2021 */
2022 zap_old_map = vm_map_create(PMAP_NULL,
2023 *address,
2024 *address + size,
b0d623f7 2025 map->hdr.entries_pageable);
39236c6e 2026 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2027 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2028 }
2029
2d21ac55 2030StartAgain: ;
1c79356b
A
2031
2032 start = *address;
2033
2034 if (anywhere) {
2035 vm_map_lock(map);
91447636 2036 map_locked = TRUE;
6d2010ae 2037
316670eb
A
2038 if (entry_for_jit) {
2039 if (map->jit_entry_exists) {
2040 result = KERN_INVALID_ARGUMENT;
2041 goto BailOut;
2042 }
39037602
A
2043 random_address = TRUE;
2044 }
2045
2046 if (random_address) {
316670eb
A
2047 /*
2048 * Get a random start address.
2049 */
2050 result = vm_map_random_address_for_size(map, address, size);
2051 if (result != KERN_SUCCESS) {
2052 goto BailOut;
2053 }
2054 start = *address;
6d2010ae 2055 }
1c79356b 2056
316670eb 2057
1c79356b
A
2058 /*
2059 * Calculate the first possible address.
2060 */
2061
2d21ac55
A
2062 if (start < effective_min_offset)
2063 start = effective_min_offset;
2064 if (start > effective_max_offset)
1c79356b
A
2065 RETURN(KERN_NO_SPACE);
2066
2067 /*
2068 * Look for the first possible address;
2069 * if there's already something at this
2070 * address, we have to start after it.
2071 */
2072
6d2010ae
A
2073 if( map->disable_vmentry_reuse == TRUE) {
2074 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2075 } else {
6d2010ae 2076
3e170ce0
A
2077 if (map->holelistenabled) {
2078 hole_entry = (vm_map_entry_t)map->holes_list;
2079
2080 if (hole_entry == NULL) {
2081 /*
2082 * No more space in the map?
2083 */
2084 result = KERN_NO_SPACE;
2085 goto BailOut;
2086 } else {
2087
2088 boolean_t found_hole = FALSE;
2089
2090 do {
2091 if (hole_entry->vme_start >= start) {
2092 start = hole_entry->vme_start;
2093 found_hole = TRUE;
2094 break;
2095 }
2096
2097 if (hole_entry->vme_end > start) {
2098 found_hole = TRUE;
2099 break;
2100 }
2101 hole_entry = hole_entry->vme_next;
2102
2103 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2104
2105 if (found_hole == FALSE) {
2106 result = KERN_NO_SPACE;
2107 goto BailOut;
2108 }
2109
2110 entry = hole_entry;
6d2010ae 2111
3e170ce0
A
2112 if (start == 0)
2113 start += PAGE_SIZE_64;
2114 }
6d2010ae 2115 } else {
3e170ce0
A
2116 assert(first_free_is_valid(map));
2117
2118 entry = map->first_free;
2119
2120 if (entry == vm_map_to_entry(map)) {
6d2010ae 2121 entry = NULL;
3e170ce0
A
2122 } else {
2123 if (entry->vme_next == vm_map_to_entry(map)){
2124 /*
2125 * Hole at the end of the map.
2126 */
2127 entry = NULL;
2128 } else {
2129 if (start < (entry->vme_next)->vme_start ) {
2130 start = entry->vme_end;
2131 start = vm_map_round_page(start,
2132 VM_MAP_PAGE_MASK(map));
2133 } else {
2134 /*
2135 * Need to do a lookup.
2136 */
2137 entry = NULL;
2138 }
2139 }
2140 }
2141
2142 if (entry == NULL) {
2143 vm_map_entry_t tmp_entry;
2144 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2145 assert(!entry_for_jit);
2146 start = tmp_entry->vme_end;
39236c6e
A
2147 start = vm_map_round_page(start,
2148 VM_MAP_PAGE_MASK(map));
6d2010ae 2149 }
3e170ce0 2150 entry = tmp_entry;
316670eb 2151 }
6d2010ae 2152 }
1c79356b
A
2153 }
2154
2155 /*
2156 * In any case, the "entry" always precedes
2157 * the proposed new region throughout the
2158 * loop:
2159 */
2160
2161 while (TRUE) {
39037602 2162 vm_map_entry_t next;
1c79356b 2163
2d21ac55 2164 /*
1c79356b
A
2165 * Find the end of the proposed new region.
2166 * Be sure we didn't go beyond the end, or
2167 * wrap around the address.
2168 */
2169
2170 end = ((start + mask) & ~mask);
39236c6e
A
2171 end = vm_map_round_page(end,
2172 VM_MAP_PAGE_MASK(map));
1c79356b
A
2173 if (end < start)
2174 RETURN(KERN_NO_SPACE);
2175 start = end;
39236c6e
A
2176 assert(VM_MAP_PAGE_ALIGNED(start,
2177 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2178 end += size;
2179
2d21ac55 2180 if ((end > effective_max_offset) || (end < start)) {
1c79356b 2181 if (map->wait_for_space) {
fe8ab488 2182 assert(!keep_map_locked);
2d21ac55
A
2183 if (size <= (effective_max_offset -
2184 effective_min_offset)) {
1c79356b
A
2185 assert_wait((event_t)map,
2186 THREAD_ABORTSAFE);
2187 vm_map_unlock(map);
91447636
A
2188 map_locked = FALSE;
2189 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2190 goto StartAgain;
2191 }
2192 }
2193 RETURN(KERN_NO_SPACE);
2194 }
2195
1c79356b 2196 next = entry->vme_next;
1c79356b 2197
3e170ce0
A
2198 if (map->holelistenabled) {
2199 if (entry->vme_end >= end)
2200 break;
2201 } else {
2202 /*
2203 * If there are no more entries, we must win.
2204 *
2205 * OR
2206 *
2207 * If there is another entry, it must be
2208 * after the end of the potential new region.
2209 */
1c79356b 2210
3e170ce0
A
2211 if (next == vm_map_to_entry(map))
2212 break;
2213
2214 if (next->vme_start >= end)
2215 break;
2216 }
1c79356b
A
2217
2218 /*
2219 * Didn't fit -- move to the next entry.
2220 */
2221
2222 entry = next;
3e170ce0
A
2223
2224 if (map->holelistenabled) {
2225 if (entry == (vm_map_entry_t) map->holes_list) {
2226 /*
2227 * Wrapped around
2228 */
2229 result = KERN_NO_SPACE;
2230 goto BailOut;
2231 }
2232 start = entry->vme_start;
2233 } else {
2234 start = entry->vme_end;
2235 }
2236
39236c6e
A
2237 start = vm_map_round_page(start,
2238 VM_MAP_PAGE_MASK(map));
1c79356b 2239 }
3e170ce0
A
2240
2241 if (map->holelistenabled) {
2242 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2243 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2244 }
2245 }
2246
1c79356b 2247 *address = start;
39236c6e
A
2248 assert(VM_MAP_PAGE_ALIGNED(*address,
2249 VM_MAP_PAGE_MASK(map)));
1c79356b 2250 } else {
1c79356b
A
2251 /*
2252 * Verify that:
2253 * the address doesn't itself violate
2254 * the mask requirement.
2255 */
2256
2257 vm_map_lock(map);
91447636 2258 map_locked = TRUE;
1c79356b
A
2259 if ((start & mask) != 0)
2260 RETURN(KERN_NO_SPACE);
2261
2262 /*
2263 * ... the address is within bounds
2264 */
2265
2266 end = start + size;
2267
2d21ac55
A
2268 if ((start < effective_min_offset) ||
2269 (end > effective_max_offset) ||
1c79356b
A
2270 (start >= end)) {
2271 RETURN(KERN_INVALID_ADDRESS);
2272 }
2273
91447636
A
2274 if (overwrite && zap_old_map != VM_MAP_NULL) {
2275 /*
2276 * Fixed mapping and "overwrite" flag: attempt to
2277 * remove all existing mappings in the specified
2278 * address range, saving them in our "zap_old_map".
2279 */
2280 (void) vm_map_delete(map, start, end,
fe8ab488
A
2281 (VM_MAP_REMOVE_SAVE_ENTRIES |
2282 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2283 zap_old_map);
2284 }
2285
1c79356b
A
2286 /*
2287 * ... the starting address isn't allocated
2288 */
2289
2d21ac55
A
2290 if (vm_map_lookup_entry(map, start, &entry)) {
2291 if (! (flags & VM_FLAGS_ALREADY)) {
2292 RETURN(KERN_NO_SPACE);
2293 }
2294 /*
2295 * Check if what's already there is what we want.
2296 */
2297 tmp_start = start;
2298 tmp_offset = offset;
2299 if (entry->vme_start < start) {
2300 tmp_start -= start - entry->vme_start;
2301 tmp_offset -= start - entry->vme_start;
2302
2303 }
2304 for (; entry->vme_start < end;
2305 entry = entry->vme_next) {
4a3eedf9
A
2306 /*
2307 * Check if the mapping's attributes
2308 * match the existing map entry.
2309 */
2d21ac55
A
2310 if (entry == vm_map_to_entry(map) ||
2311 entry->vme_start != tmp_start ||
2312 entry->is_sub_map != is_submap ||
3e170ce0 2313 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2314 entry->needs_copy != needs_copy ||
2315 entry->protection != cur_protection ||
2316 entry->max_protection != max_protection ||
2317 entry->inheritance != inheritance ||
fe8ab488 2318 entry->iokit_acct != iokit_acct ||
3e170ce0 2319 VME_ALIAS(entry) != alias) {
2d21ac55
A
2320 /* not the same mapping ! */
2321 RETURN(KERN_NO_SPACE);
2322 }
4a3eedf9
A
2323 /*
2324 * Check if the same object is being mapped.
2325 */
2326 if (is_submap) {
3e170ce0 2327 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2328 (vm_map_t) object) {
2329 /* not the same submap */
2330 RETURN(KERN_NO_SPACE);
2331 }
2332 } else {
3e170ce0 2333 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2334 /* not the same VM object... */
2335 vm_object_t obj2;
2336
3e170ce0 2337 obj2 = VME_OBJECT(entry);
4a3eedf9
A
2338 if ((obj2 == VM_OBJECT_NULL ||
2339 obj2->internal) &&
2340 (object == VM_OBJECT_NULL ||
2341 object->internal)) {
2342 /*
2343 * ... but both are
2344 * anonymous memory,
2345 * so equivalent.
2346 */
2347 } else {
2348 RETURN(KERN_NO_SPACE);
2349 }
2350 }
2351 }
2352
2d21ac55
A
2353 tmp_offset += entry->vme_end - entry->vme_start;
2354 tmp_start += entry->vme_end - entry->vme_start;
2355 if (entry->vme_end >= end) {
2356 /* reached the end of our mapping */
2357 break;
2358 }
2359 }
2360 /* it all matches: let's use what's already there ! */
2361 RETURN(KERN_MEMORY_PRESENT);
2362 }
1c79356b
A
2363
2364 /*
2365 * ... the next region doesn't overlap the
2366 * end point.
2367 */
2368
2369 if ((entry->vme_next != vm_map_to_entry(map)) &&
2370 (entry->vme_next->vme_start < end))
2371 RETURN(KERN_NO_SPACE);
2372 }
2373
2374 /*
2375 * At this point,
2376 * "start" and "end" should define the endpoints of the
2377 * available new range, and
2378 * "entry" should refer to the region before the new
2379 * range, and
2380 *
2381 * the map should be locked.
2382 */
2383
2384 /*
2385 * See whether we can avoid creating a new entry (and object) by
2386 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2387 * extend from below.] Note that we can never extend/join
2388 * purgable objects because they need to remain distinct
2389 * entities in order to implement their "volatile object"
2390 * semantics.
1c79356b
A
2391 */
2392
316670eb 2393 if (purgable || entry_for_jit) {
91447636 2394 if (object == VM_OBJECT_NULL) {
3e170ce0 2395
91447636
A
2396 object = vm_object_allocate(size);
2397 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
fe8ab488 2398 object->true_share = TRUE;
316670eb 2399 if (purgable) {
fe8ab488 2400 task_t owner;
316670eb 2401 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2402 if (map->pmap == kernel_pmap) {
2403 /*
2404 * Purgeable mappings made in a kernel
2405 * map are "owned" by the kernel itself
2406 * rather than the current user task
2407 * because they're likely to be used by
2408 * more than this user task (see
2409 * execargs_purgeable_allocate(), for
2410 * example).
2411 */
2412 owner = kernel_task;
2413 } else {
2414 owner = current_task();
2415 }
2416 assert(object->vo_purgeable_owner == NULL);
2417 assert(object->resident_page_count == 0);
2418 assert(object->wired_page_count == 0);
2419 vm_object_lock(object);
2420 vm_purgeable_nonvolatile_enqueue(object, owner);
2421 vm_object_unlock(object);
316670eb 2422 }
91447636
A
2423 offset = (vm_object_offset_t)0;
2424 }
2d21ac55
A
2425 } else if ((is_submap == FALSE) &&
2426 (object == VM_OBJECT_NULL) &&
2427 (entry != vm_map_to_entry(map)) &&
2428 (entry->vme_end == start) &&
2429 (!entry->is_shared) &&
2430 (!entry->is_sub_map) &&
fe8ab488
A
2431 (!entry->in_transition) &&
2432 (!entry->needs_wakeup) &&
2433 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2d21ac55
A
2434 (entry->protection == cur_protection) &&
2435 (entry->max_protection == max_protection) &&
fe8ab488 2436 (entry->inheritance == inheritance) &&
3e170ce0
A
2437 ((user_alias == VM_MEMORY_REALLOC) ||
2438 (VME_ALIAS(entry) == alias)) &&
2d21ac55 2439 (entry->no_cache == no_cache) &&
fe8ab488
A
2440 (entry->permanent == permanent) &&
2441 (!entry->superpage_size && !superpage_size) &&
39236c6e
A
2442 /*
2443 * No coalescing if not map-aligned, to avoid propagating
2444 * that condition any further than needed:
2445 */
2446 (!entry->map_aligned || !clear_map_aligned) &&
fe8ab488
A
2447 (!entry->zero_wired_pages) &&
2448 (!entry->used_for_jit && !entry_for_jit) &&
2449 (entry->iokit_acct == iokit_acct) &&
3e170ce0
A
2450 (!entry->vme_resilient_codesign) &&
2451 (!entry->vme_resilient_media) &&
39037602 2452 (!entry->vme_atomic) &&
fe8ab488 2453
b0d623f7 2454 ((entry->vme_end - entry->vme_start) + size <=
3e170ce0 2455 (user_alias == VM_MEMORY_REALLOC ?
b0d623f7
A
2456 ANON_CHUNK_SIZE :
2457 NO_COALESCE_LIMIT)) &&
fe8ab488 2458
2d21ac55 2459 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2460 if (vm_object_coalesce(VME_OBJECT(entry),
2d21ac55 2461 VM_OBJECT_NULL,
3e170ce0 2462 VME_OFFSET(entry),
2d21ac55
A
2463 (vm_object_offset_t) 0,
2464 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2465 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2466
2467 /*
2468 * Coalesced the two objects - can extend
2469 * the previous map entry to include the
2470 * new range.
2471 */
2472 map->size += (end - entry->vme_end);
e2d2fc5c 2473 assert(entry->vme_start < end);
39236c6e
A
2474 assert(VM_MAP_PAGE_ALIGNED(end,
2475 VM_MAP_PAGE_MASK(map)));
3e170ce0
A
2476 if (__improbable(vm_debug_events))
2477 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
1c79356b 2478 entry->vme_end = end;
3e170ce0
A
2479 if (map->holelistenabled) {
2480 vm_map_store_update_first_free(map, entry, TRUE);
2481 } else {
2482 vm_map_store_update_first_free(map, map->first_free, TRUE);
2483 }
fe8ab488 2484 new_mapping_established = TRUE;
1c79356b
A
2485 RETURN(KERN_SUCCESS);
2486 }
2487 }
2488
b0d623f7
A
2489 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2490 new_entry = NULL;
2491
2492 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2493 tmp2_end = tmp2_start + step;
2494 /*
2495 * Create a new entry
2496 * LP64todo - for now, we can only allocate 4GB internal objects
2497 * because the default pager can't page bigger ones. Remove this
2498 * when it can.
2499 *
2500 * XXX FBDP
2501 * The reserved "page zero" in each process's address space can
2502 * be arbitrarily large. Splitting it into separate 4GB objects and
2503 * therefore different VM map entries serves no purpose and just
2504 * slows down operations on the VM map, so let's not split the
2505 * allocation into 4GB chunks if the max protection is NONE. That
2506 * memory should never be accessible, so it will never get to the
2507 * default pager.
2508 */
2509 tmp_start = tmp2_start;
2510 if (object == VM_OBJECT_NULL &&
2511 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2512 max_protection != VM_PROT_NONE &&
2513 superpage_size == 0)
2514 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2515 else
2516 tmp_end = tmp2_end;
2517 do {
2518 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2519 object, offset, needs_copy,
2520 FALSE, FALSE,
2521 cur_protection, max_protection,
2522 VM_BEHAVIOR_DEFAULT,
316670eb 2523 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
6d2010ae 2524 0, no_cache,
39236c6e
A
2525 permanent,
2526 superpage_size,
fe8ab488
A
2527 clear_map_aligned,
2528 is_submap);
3e170ce0
A
2529
2530 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2531 VME_ALIAS_SET(new_entry, alias);
2532
316670eb 2533 if (entry_for_jit){
6d2010ae
A
2534 if (!(map->jit_entry_exists)){
2535 new_entry->used_for_jit = TRUE;
2536 map->jit_entry_exists = TRUE;
2537 }
2538 }
2539
3e170ce0
A
2540 if (resilient_codesign &&
2541 ! ((cur_protection | max_protection) &
2542 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2543 new_entry->vme_resilient_codesign = TRUE;
2544 }
2545
2546 if (resilient_media &&
2547 ! ((cur_protection | max_protection) &
2548 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2549 new_entry->vme_resilient_media = TRUE;
2550 }
2551
fe8ab488
A
2552 assert(!new_entry->iokit_acct);
2553 if (!is_submap &&
2554 object != VM_OBJECT_NULL &&
2555 object->purgable != VM_PURGABLE_DENY) {
2556 assert(new_entry->use_pmap);
2557 assert(!new_entry->iokit_acct);
2558 /*
2559 * Turn off pmap accounting since
2560 * purgeable objects have their
2561 * own ledgers.
2562 */
2563 new_entry->use_pmap = FALSE;
2564 } else if (!is_submap &&
ecc0ceb4
A
2565 iokit_acct &&
2566 object != VM_OBJECT_NULL &&
2567 object->internal) {
fe8ab488
A
2568 /* alternate accounting */
2569 assert(!new_entry->iokit_acct);
2570 assert(new_entry->use_pmap);
2571 new_entry->iokit_acct = TRUE;
2572 new_entry->use_pmap = FALSE;
ecc0ceb4
A
2573 DTRACE_VM4(
2574 vm_map_iokit_mapped_region,
2575 vm_map_t, map,
2576 vm_map_offset_t, new_entry->vme_start,
2577 vm_map_offset_t, new_entry->vme_end,
2578 int, VME_ALIAS(new_entry));
fe8ab488
A
2579 vm_map_iokit_mapped_region(
2580 map,
2581 (new_entry->vme_end -
2582 new_entry->vme_start));
2583 } else if (!is_submap) {
2584 assert(!new_entry->iokit_acct);
2585 assert(new_entry->use_pmap);
2586 }
2587
b0d623f7
A
2588 if (is_submap) {
2589 vm_map_t submap;
2590 boolean_t submap_is_64bit;
2591 boolean_t use_pmap;
2592
fe8ab488
A
2593 assert(new_entry->is_sub_map);
2594 assert(!new_entry->use_pmap);
2595 assert(!new_entry->iokit_acct);
b0d623f7
A
2596 submap = (vm_map_t) object;
2597 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2598 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2599#ifndef NO_NESTED_PMAP
b0d623f7 2600 if (use_pmap && submap->pmap == NULL) {
316670eb 2601 ledger_t ledger = map->pmap->ledger;
b0d623f7 2602 /* we need a sub pmap to nest... */
316670eb
A
2603 submap->pmap = pmap_create(ledger, 0,
2604 submap_is_64bit);
b0d623f7
A
2605 if (submap->pmap == NULL) {
2606 /* let's proceed without nesting... */
2607 }
2d21ac55 2608 }
b0d623f7
A
2609 if (use_pmap && submap->pmap != NULL) {
2610 kr = pmap_nest(map->pmap,
2611 submap->pmap,
2612 tmp_start,
2613 tmp_start,
2614 tmp_end - tmp_start);
2615 if (kr != KERN_SUCCESS) {
2616 printf("vm_map_enter: "
2617 "pmap_nest(0x%llx,0x%llx) "
2618 "error 0x%x\n",
2619 (long long)tmp_start,
2620 (long long)tmp_end,
2621 kr);
2622 } else {
2623 /* we're now nested ! */
2624 new_entry->use_pmap = TRUE;
2625 pmap_empty = FALSE;
2626 }
2627 }
fe8ab488 2628#endif /* NO_NESTED_PMAP */
2d21ac55 2629 }
b0d623f7
A
2630 entry = new_entry;
2631
2632 if (superpage_size) {
2633 vm_page_t pages, m;
2634 vm_object_t sp_object;
2635
3e170ce0 2636 VME_OFFSET_SET(entry, 0);
b0d623f7
A
2637
2638 /* allocate one superpage */
2639 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2640 if (kr != KERN_SUCCESS) {
3e170ce0
A
2641 /* deallocate whole range... */
2642 new_mapping_established = TRUE;
2643 /* ... but only up to "tmp_end" */
2644 size -= end - tmp_end;
b0d623f7
A
2645 RETURN(kr);
2646 }
2647
2648 /* create one vm_object per superpage */
2649 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2650 sp_object->phys_contiguous = TRUE;
39037602 2651 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
3e170ce0 2652 VME_OBJECT_SET(entry, sp_object);
fe8ab488 2653 assert(entry->use_pmap);
b0d623f7
A
2654
2655 /* enter the base pages into the object */
2656 vm_object_lock(sp_object);
2657 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2658 m = pages;
39037602 2659 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
2660 pages = NEXT_PAGE(m);
2661 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3e170ce0 2662 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 2663 }
b0d623f7 2664 vm_object_unlock(sp_object);
2d21ac55 2665 }
b0d623f7
A
2666 } while (tmp_end != tmp2_end &&
2667 (tmp_start = tmp_end) &&
2668 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2669 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2670 }
91447636 2671
91447636 2672 new_mapping_established = TRUE;
1c79356b 2673
fe8ab488
A
2674BailOut:
2675 assert(map_locked == TRUE);
2d21ac55 2676
593a1d5f
A
2677 if (result == KERN_SUCCESS) {
2678 vm_prot_t pager_prot;
2679 memory_object_t pager;
91447636 2680
fe8ab488 2681#if DEBUG
593a1d5f
A
2682 if (pmap_empty &&
2683 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2684 assert(vm_map_pmap_is_empty(map,
2685 *address,
2686 *address+size));
2687 }
fe8ab488 2688#endif /* DEBUG */
593a1d5f
A
2689
2690 /*
2691 * For "named" VM objects, let the pager know that the
2692 * memory object is being mapped. Some pagers need to keep
2693 * track of this, to know when they can reclaim the memory
2694 * object, for example.
2695 * VM calls memory_object_map() for each mapping (specifying
2696 * the protection of each mapping) and calls
2697 * memory_object_last_unmap() when all the mappings are gone.
2698 */
2699 pager_prot = max_protection;
2700 if (needs_copy) {
2701 /*
2702 * Copy-On-Write mapping: won't modify
2703 * the memory object.
2704 */
2705 pager_prot &= ~VM_PROT_WRITE;
2706 }
2707 if (!is_submap &&
2708 object != VM_OBJECT_NULL &&
2709 object->named &&
2710 object->pager != MEMORY_OBJECT_NULL) {
2711 vm_object_lock(object);
2712 pager = object->pager;
2713 if (object->named &&
2714 pager != MEMORY_OBJECT_NULL) {
2715 assert(object->pager_ready);
2716 vm_object_mapping_wait(object, THREAD_UNINT);
2717 vm_object_mapping_begin(object);
2718 vm_object_unlock(object);
2719
2720 kr = memory_object_map(pager, pager_prot);
2721 assert(kr == KERN_SUCCESS);
2722
2723 vm_object_lock(object);
2724 vm_object_mapping_end(object);
2725 }
2726 vm_object_unlock(object);
2727 }
fe8ab488
A
2728 }
2729
2730 assert(map_locked == TRUE);
2731
2732 if (!keep_map_locked) {
2733 vm_map_unlock(map);
2734 map_locked = FALSE;
2735 }
2736
2737 /*
2738 * We can't hold the map lock if we enter this block.
2739 */
2740
2741 if (result == KERN_SUCCESS) {
2742
2743 /* Wire down the new entry if the user
2744 * requested all new map entries be wired.
2745 */
2746 if ((map->wiring_required)||(superpage_size)) {
2747 assert(!keep_map_locked);
2748 pmap_empty = FALSE; /* pmap won't be empty */
2749 kr = vm_map_wire(map, start, end,
3e170ce0
A
2750 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2751 TRUE);
fe8ab488
A
2752 result = kr;
2753 }
2754
2755 }
2756
2757 if (result != KERN_SUCCESS) {
91447636
A
2758 if (new_mapping_established) {
2759 /*
2760 * We have to get rid of the new mappings since we
2761 * won't make them available to the user.
2762 * Try and do that atomically, to minimize the risk
2763 * that someone else create new mappings that range.
2764 */
2765 zap_new_map = vm_map_create(PMAP_NULL,
2766 *address,
2767 *address + size,
b0d623f7 2768 map->hdr.entries_pageable);
39236c6e
A
2769 vm_map_set_page_shift(zap_new_map,
2770 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
2771 vm_map_disable_hole_optimization(zap_new_map);
2772
91447636
A
2773 if (!map_locked) {
2774 vm_map_lock(map);
2775 map_locked = TRUE;
2776 }
2777 (void) vm_map_delete(map, *address, *address+size,
fe8ab488
A
2778 (VM_MAP_REMOVE_SAVE_ENTRIES |
2779 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2780 zap_new_map);
2781 }
2782 if (zap_old_map != VM_MAP_NULL &&
2783 zap_old_map->hdr.nentries != 0) {
2784 vm_map_entry_t entry1, entry2;
2785
2786 /*
2787 * The new mapping failed. Attempt to restore
2788 * the old mappings, saved in the "zap_old_map".
2789 */
2790 if (!map_locked) {
2791 vm_map_lock(map);
2792 map_locked = TRUE;
2793 }
2794
2795 /* first check if the coast is still clear */
2796 start = vm_map_first_entry(zap_old_map)->vme_start;
2797 end = vm_map_last_entry(zap_old_map)->vme_end;
2798 if (vm_map_lookup_entry(map, start, &entry1) ||
2799 vm_map_lookup_entry(map, end, &entry2) ||
2800 entry1 != entry2) {
2801 /*
2802 * Part of that range has already been
2803 * re-mapped: we can't restore the old
2804 * mappings...
2805 */
2806 vm_map_enter_restore_failures++;
2807 } else {
2808 /*
2809 * Transfer the saved map entries from
2810 * "zap_old_map" to the original "map",
2811 * inserting them all after "entry1".
2812 */
2813 for (entry2 = vm_map_first_entry(zap_old_map);
2814 entry2 != vm_map_to_entry(zap_old_map);
2815 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2816 vm_map_size_t entry_size;
2817
2818 entry_size = (entry2->vme_end -
2819 entry2->vme_start);
6d2010ae 2820 vm_map_store_entry_unlink(zap_old_map,
91447636 2821 entry2);
2d21ac55 2822 zap_old_map->size -= entry_size;
6d2010ae 2823 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2824 map->size += entry_size;
91447636
A
2825 entry1 = entry2;
2826 }
2827 if (map->wiring_required) {
2828 /*
2829 * XXX TODO: we should rewire the
2830 * old pages here...
2831 */
2832 }
2833 vm_map_enter_restore_successes++;
2834 }
2835 }
2836 }
2837
fe8ab488
A
2838 /*
2839 * The caller is responsible for releasing the lock if it requested to
2840 * keep the map locked.
2841 */
2842 if (map_locked && !keep_map_locked) {
91447636
A
2843 vm_map_unlock(map);
2844 }
2845
2846 /*
2847 * Get rid of the "zap_maps" and all the map entries that
2848 * they may still contain.
2849 */
2850 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2851 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2852 zap_old_map = VM_MAP_NULL;
2853 }
2854 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2855 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2856 zap_new_map = VM_MAP_NULL;
2857 }
2858
2859 return result;
1c79356b
A
2860
2861#undef RETURN
2862}
2863
3e170ce0 2864
fe8ab488
A
2865/*
2866 * Counters for the prefault optimization.
2867 */
2868int64_t vm_prefault_nb_pages = 0;
2869int64_t vm_prefault_nb_bailout = 0;
2870
2871static kern_return_t
2872vm_map_enter_mem_object_helper(
2d21ac55
A
2873 vm_map_t target_map,
2874 vm_map_offset_t *address,
2875 vm_map_size_t initial_size,
2876 vm_map_offset_t mask,
2877 int flags,
2878 ipc_port_t port,
2879 vm_object_offset_t offset,
2880 boolean_t copy,
2881 vm_prot_t cur_protection,
2882 vm_prot_t max_protection,
fe8ab488
A
2883 vm_inherit_t inheritance,
2884 upl_page_list_ptr_t page_list,
2885 unsigned int page_list_count)
91447636 2886{
2d21ac55
A
2887 vm_map_address_t map_addr;
2888 vm_map_size_t map_size;
2889 vm_object_t object;
2890 vm_object_size_t size;
2891 kern_return_t result;
6d2010ae 2892 boolean_t mask_cur_protection, mask_max_protection;
fe8ab488 2893 boolean_t try_prefault = (page_list_count != 0);
3e170ce0 2894 vm_map_offset_t offset_in_mapping = 0;
6d2010ae
A
2895
2896 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2897 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2898 cur_protection &= ~VM_PROT_IS_MASK;
2899 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2900
2901 /*
2d21ac55 2902 * Check arguments for validity
91447636 2903 */
2d21ac55
A
2904 if ((target_map == VM_MAP_NULL) ||
2905 (cur_protection & ~VM_PROT_ALL) ||
2906 (max_protection & ~VM_PROT_ALL) ||
2907 (inheritance > VM_INHERIT_LAST_VALID) ||
fe8ab488 2908 (try_prefault && (copy || !page_list)) ||
3e170ce0 2909 initial_size == 0) {
2d21ac55 2910 return KERN_INVALID_ARGUMENT;
3e170ce0 2911 }
6d2010ae 2912
3e170ce0
A
2913 {
2914 map_addr = vm_map_trunc_page(*address,
2915 VM_MAP_PAGE_MASK(target_map));
2916 map_size = vm_map_round_page(initial_size,
2917 VM_MAP_PAGE_MASK(target_map));
2918 }
39236c6e 2919 size = vm_object_round_page(initial_size);
593a1d5f 2920
2d21ac55
A
2921 /*
2922 * Find the vm object (if any) corresponding to this port.
2923 */
2924 if (!IP_VALID(port)) {
2925 object = VM_OBJECT_NULL;
2926 offset = 0;
2927 copy = FALSE;
2928 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2929 vm_named_entry_t named_entry;
2930
2931 named_entry = (vm_named_entry_t) port->ip_kobject;
39236c6e 2932
3e170ce0
A
2933 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2934 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
2935 offset += named_entry->data_offset;
2936 }
2937
2d21ac55
A
2938 /* a few checks to make sure user is obeying rules */
2939 if (size == 0) {
2940 if (offset >= named_entry->size)
2941 return KERN_INVALID_RIGHT;
2942 size = named_entry->size - offset;
2943 }
6d2010ae
A
2944 if (mask_max_protection) {
2945 max_protection &= named_entry->protection;
2946 }
2947 if (mask_cur_protection) {
2948 cur_protection &= named_entry->protection;
2949 }
2d21ac55
A
2950 if ((named_entry->protection & max_protection) !=
2951 max_protection)
2952 return KERN_INVALID_RIGHT;
2953 if ((named_entry->protection & cur_protection) !=
2954 cur_protection)
2955 return KERN_INVALID_RIGHT;
22ba694c
A
2956 if (offset + size < offset) {
2957 /* overflow */
2958 return KERN_INVALID_ARGUMENT;
2959 }
3e170ce0 2960 if (named_entry->size < (offset + initial_size)) {
2d21ac55 2961 return KERN_INVALID_ARGUMENT;
3e170ce0 2962 }
2d21ac55 2963
39236c6e
A
2964 if (named_entry->is_copy) {
2965 /* for a vm_map_copy, we can only map it whole */
2966 if ((size != named_entry->size) &&
2967 (vm_map_round_page(size,
2968 VM_MAP_PAGE_MASK(target_map)) ==
2969 named_entry->size)) {
2970 /* XXX FBDP use the rounded size... */
2971 size = vm_map_round_page(
2972 size,
2973 VM_MAP_PAGE_MASK(target_map));
2974 }
2975
fe8ab488
A
2976 if (!(flags & VM_FLAGS_ANYWHERE) &&
2977 (offset != 0 ||
2978 size != named_entry->size)) {
2979 /*
2980 * XXX for a mapping at a "fixed" address,
2981 * we can't trim after mapping the whole
2982 * memory entry, so reject a request for a
2983 * partial mapping.
2984 */
39236c6e
A
2985 return KERN_INVALID_ARGUMENT;
2986 }
2987 }
2988
2d21ac55
A
2989 /* the callers parameter offset is defined to be the */
2990 /* offset from beginning of named entry offset in object */
2991 offset = offset + named_entry->offset;
2992
39236c6e
A
2993 if (! VM_MAP_PAGE_ALIGNED(size,
2994 VM_MAP_PAGE_MASK(target_map))) {
2995 /*
2996 * Let's not map more than requested;
2997 * vm_map_enter() will handle this "not map-aligned"
2998 * case.
2999 */
3000 map_size = size;
3001 }
3002
2d21ac55
A
3003 named_entry_lock(named_entry);
3004 if (named_entry->is_sub_map) {
3005 vm_map_t submap;
3006
3e170ce0
A
3007 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3008 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3009 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3010 }
3011
2d21ac55
A
3012 submap = named_entry->backing.map;
3013 vm_map_lock(submap);
3014 vm_map_reference(submap);
3015 vm_map_unlock(submap);
3016 named_entry_unlock(named_entry);
3017
3018 result = vm_map_enter(target_map,
3019 &map_addr,
3020 map_size,
3021 mask,
3022 flags | VM_FLAGS_SUBMAP,
3023 (vm_object_t) submap,
3024 offset,
3025 copy,
3026 cur_protection,
3027 max_protection,
3028 inheritance);
3029 if (result != KERN_SUCCESS) {
3030 vm_map_deallocate(submap);
3031 } else {
3032 /*
3033 * No need to lock "submap" just to check its
3034 * "mapped" flag: that flag is never reset
3035 * once it's been set and if we race, we'll
3036 * just end up setting it twice, which is OK.
3037 */
316670eb
A
3038 if (submap->mapped_in_other_pmaps == FALSE &&
3039 vm_map_pmap(submap) != PMAP_NULL &&
3040 vm_map_pmap(submap) !=
3041 vm_map_pmap(target_map)) {
2d21ac55 3042 /*
316670eb
A
3043 * This submap is being mapped in a map
3044 * that uses a different pmap.
3045 * Set its "mapped_in_other_pmaps" flag
3046 * to indicate that we now need to
3047 * remove mappings from all pmaps rather
3048 * than just the submap's pmap.
2d21ac55
A
3049 */
3050 vm_map_lock(submap);
316670eb 3051 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
3052 vm_map_unlock(submap);
3053 }
3054 *address = map_addr;
3055 }
3056 return result;
3057
3058 } else if (named_entry->is_pager) {
3059 unsigned int access;
3060 vm_prot_t protections;
3061 unsigned int wimg_mode;
2d21ac55
A
3062
3063 protections = named_entry->protection & VM_PROT_ALL;
3064 access = GET_MAP_MEM(named_entry->protection);
3065
3e170ce0
A
3066 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3067 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3068 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3069 }
3070
2d21ac55
A
3071 object = vm_object_enter(named_entry->backing.pager,
3072 named_entry->size,
3073 named_entry->internal,
3074 FALSE,
3075 FALSE);
3076 if (object == VM_OBJECT_NULL) {
3077 named_entry_unlock(named_entry);
3078 return KERN_INVALID_OBJECT;
3079 }
3080
3081 /* JMM - drop reference on pager here */
3082
3083 /* create an extra ref for the named entry */
3084 vm_object_lock(object);
3085 vm_object_reference_locked(object);
3086 named_entry->backing.object = object;
3087 named_entry->is_pager = FALSE;
3088 named_entry_unlock(named_entry);
3089
3090 wimg_mode = object->wimg_bits;
6d2010ae 3091
2d21ac55
A
3092 if (access == MAP_MEM_IO) {
3093 wimg_mode = VM_WIMG_IO;
3094 } else if (access == MAP_MEM_COPYBACK) {
3095 wimg_mode = VM_WIMG_USE_DEFAULT;
316670eb
A
3096 } else if (access == MAP_MEM_INNERWBACK) {
3097 wimg_mode = VM_WIMG_INNERWBACK;
2d21ac55
A
3098 } else if (access == MAP_MEM_WTHRU) {
3099 wimg_mode = VM_WIMG_WTHRU;
3100 } else if (access == MAP_MEM_WCOMB) {
3101 wimg_mode = VM_WIMG_WCOMB;
3102 }
2d21ac55
A
3103
3104 /* wait for object (if any) to be ready */
3105 if (!named_entry->internal) {
3106 while (!object->pager_ready) {
3107 vm_object_wait(
3108 object,
3109 VM_OBJECT_EVENT_PAGER_READY,
3110 THREAD_UNINT);
3111 vm_object_lock(object);
3112 }
3113 }
3114
6d2010ae
A
3115 if (object->wimg_bits != wimg_mode)
3116 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 3117
fe8ab488
A
3118#if VM_OBJECT_TRACKING_OP_TRUESHARE
3119 if (!object->true_share &&
3120 vm_object_tracking_inited) {
3121 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3122 int num = 0;
3123
3124 num = OSBacktrace(bt,
3125 VM_OBJECT_TRACKING_BTDEPTH);
3126 btlog_add_entry(vm_object_tracking_btlog,
3127 object,
3128 VM_OBJECT_TRACKING_OP_TRUESHARE,
3129 bt,
3130 num);
3131 }
3132#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3133
2d21ac55 3134 object->true_share = TRUE;
6d2010ae 3135
2d21ac55
A
3136 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3137 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3138 vm_object_unlock(object);
39236c6e
A
3139
3140 } else if (named_entry->is_copy) {
3141 kern_return_t kr;
3142 vm_map_copy_t copy_map;
3143 vm_map_entry_t copy_entry;
3144 vm_map_offset_t copy_addr;
3145
3146 if (flags & ~(VM_FLAGS_FIXED |
3147 VM_FLAGS_ANYWHERE |
3148 VM_FLAGS_OVERWRITE |
3e170ce0 3149 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3150 VM_FLAGS_RETURN_DATA_ADDR |
3151 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
3152 named_entry_unlock(named_entry);
3153 return KERN_INVALID_ARGUMENT;
3154 }
3155
3e170ce0
A
3156 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3157 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3158 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3159 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3160 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3161 offset = vm_object_trunc_page(offset);
3162 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3163 }
3164
3165 copy_map = named_entry->backing.copy;
3166 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3167 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3168 /* unsupported type; should not happen */
3169 printf("vm_map_enter_mem_object: "
3170 "memory_entry->backing.copy "
3171 "unsupported type 0x%x\n",
3172 copy_map->type);
3173 named_entry_unlock(named_entry);
3174 return KERN_INVALID_ARGUMENT;
3175 }
3176
3177 /* reserve a contiguous range */
3178 kr = vm_map_enter(target_map,
3179 &map_addr,
fe8ab488
A
3180 /* map whole mem entry, trim later: */
3181 named_entry->size,
39236c6e
A
3182 mask,
3183 flags & (VM_FLAGS_ANYWHERE |
3184 VM_FLAGS_OVERWRITE |
3e170ce0 3185 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3186 VM_FLAGS_RETURN_DATA_ADDR |
3187 VM_FLAGS_ALIAS_MASK),
39236c6e
A
3188 VM_OBJECT_NULL,
3189 0,
3190 FALSE, /* copy */
3191 cur_protection,
3192 max_protection,
3193 inheritance);
3194 if (kr != KERN_SUCCESS) {
3195 named_entry_unlock(named_entry);
3196 return kr;
3197 }
3198
3199 copy_addr = map_addr;
3200
3201 for (copy_entry = vm_map_copy_first_entry(copy_map);
3202 copy_entry != vm_map_copy_to_entry(copy_map);
3203 copy_entry = copy_entry->vme_next) {
3204 int remap_flags = 0;
3205 vm_map_t copy_submap;
3206 vm_object_t copy_object;
3207 vm_map_size_t copy_size;
3208 vm_object_offset_t copy_offset;
39037602 3209 int copy_vm_alias;
39236c6e 3210
3e170ce0 3211 copy_offset = VME_OFFSET(copy_entry);
39236c6e
A
3212 copy_size = (copy_entry->vme_end -
3213 copy_entry->vme_start);
39037602
A
3214 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3215 if (copy_vm_alias == 0) {
3216 /*
3217 * Caller does not want a specific
3218 * alias for this new mapping: use
3219 * the alias of the original mapping.
3220 */
3221 copy_vm_alias = VME_ALIAS(copy_entry);
3222 }
39236c6e
A
3223
3224 /* sanity check */
fe8ab488
A
3225 if ((copy_addr + copy_size) >
3226 (map_addr +
3227 named_entry->size /* XXX full size */ )) {
39236c6e
A
3228 /* over-mapping too much !? */
3229 kr = KERN_INVALID_ARGUMENT;
3230 /* abort */
3231 break;
3232 }
3233
3234 /* take a reference on the object */
3235 if (copy_entry->is_sub_map) {
3236 remap_flags |= VM_FLAGS_SUBMAP;
3e170ce0 3237 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
3238 vm_map_lock(copy_submap);
3239 vm_map_reference(copy_submap);
3240 vm_map_unlock(copy_submap);
3241 copy_object = (vm_object_t) copy_submap;
3242 } else {
3e170ce0 3243 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
3244 vm_object_reference(copy_object);
3245 }
3246
3247 /* over-map the object into destination */
3248 remap_flags |= flags;
3249 remap_flags |= VM_FLAGS_FIXED;
3250 remap_flags |= VM_FLAGS_OVERWRITE;
3251 remap_flags &= ~VM_FLAGS_ANYWHERE;
39037602 3252 remap_flags |= VM_MAKE_TAG(copy_vm_alias);
39236c6e
A
3253 kr = vm_map_enter(target_map,
3254 &copy_addr,
3255 copy_size,
3256 (vm_map_offset_t) 0,
3257 remap_flags,
3258 copy_object,
3259 copy_offset,
3260 copy,
3261 cur_protection,
3262 max_protection,
3263 inheritance);
3264 if (kr != KERN_SUCCESS) {
3265 if (copy_entry->is_sub_map) {
3266 vm_map_deallocate(copy_submap);
3267 } else {
3268 vm_object_deallocate(copy_object);
3269 }
3270 /* abort */
3271 break;
3272 }
3273
3274 /* next mapping */
3275 copy_addr += copy_size;
3276 }
3277
3278 if (kr == KERN_SUCCESS) {
3e170ce0
A
3279 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3280 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3281 *address = map_addr + offset_in_mapping;
3282 } else {
3283 *address = map_addr;
3284 }
fe8ab488
A
3285
3286 if (offset) {
3287 /*
3288 * Trim in front, from 0 to "offset".
3289 */
3290 vm_map_remove(target_map,
3291 map_addr,
3292 map_addr + offset,
3293 0);
3294 *address += offset;
3295 }
3296 if (offset + map_size < named_entry->size) {
3297 /*
3298 * Trim in back, from
3299 * "offset + map_size" to
3300 * "named_entry->size".
3301 */
3302 vm_map_remove(target_map,
3303 (map_addr +
3304 offset + map_size),
3305 (map_addr +
3306 named_entry->size),
3307 0);
3308 }
39236c6e
A
3309 }
3310 named_entry_unlock(named_entry);
3311
3312 if (kr != KERN_SUCCESS) {
3313 if (! (flags & VM_FLAGS_OVERWRITE)) {
3314 /* deallocate the contiguous range */
3315 (void) vm_deallocate(target_map,
3316 map_addr,
3317 map_size);
3318 }
3319 }
3320
3321 return kr;
3322
2d21ac55
A
3323 } else {
3324 /* This is the case where we are going to map */
3325 /* an already mapped object. If the object is */
3326 /* not ready it is internal. An external */
3327 /* object cannot be mapped until it is ready */
3328 /* we can therefore avoid the ready check */
3329 /* in this case. */
3e170ce0
A
3330 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3331 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3332 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3333 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3334 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3335 offset = vm_object_trunc_page(offset);
3336 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3337 }
3338
2d21ac55
A
3339 object = named_entry->backing.object;
3340 assert(object != VM_OBJECT_NULL);
3341 named_entry_unlock(named_entry);
3342 vm_object_reference(object);
3343 }
3344 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3345 /*
3346 * JMM - This is temporary until we unify named entries
3347 * and raw memory objects.
3348 *
3349 * Detected fake ip_kotype for a memory object. In
3350 * this case, the port isn't really a port at all, but
3351 * instead is just a raw memory object.
3352 */
3e170ce0
A
3353 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3354 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3355 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3356 }
3357
2d21ac55
A
3358 object = vm_object_enter((memory_object_t)port,
3359 size, FALSE, FALSE, FALSE);
3360 if (object == VM_OBJECT_NULL)
3361 return KERN_INVALID_OBJECT;
3362
3363 /* wait for object (if any) to be ready */
3364 if (object != VM_OBJECT_NULL) {
3365 if (object == kernel_object) {
3366 printf("Warning: Attempt to map kernel object"
3367 " by a non-private kernel entity\n");
3368 return KERN_INVALID_OBJECT;
3369 }
b0d623f7 3370 if (!object->pager_ready) {
2d21ac55 3371 vm_object_lock(object);
b0d623f7
A
3372
3373 while (!object->pager_ready) {
3374 vm_object_wait(object,
3375 VM_OBJECT_EVENT_PAGER_READY,
3376 THREAD_UNINT);
3377 vm_object_lock(object);
3378 }
3379 vm_object_unlock(object);
2d21ac55 3380 }
2d21ac55
A
3381 }
3382 } else {
3383 return KERN_INVALID_OBJECT;
3384 }
3385
593a1d5f
A
3386 if (object != VM_OBJECT_NULL &&
3387 object->named &&
3388 object->pager != MEMORY_OBJECT_NULL &&
3389 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3390 memory_object_t pager;
3391 vm_prot_t pager_prot;
3392 kern_return_t kr;
3393
3394 /*
3395 * For "named" VM objects, let the pager know that the
3396 * memory object is being mapped. Some pagers need to keep
3397 * track of this, to know when they can reclaim the memory
3398 * object, for example.
3399 * VM calls memory_object_map() for each mapping (specifying
3400 * the protection of each mapping) and calls
3401 * memory_object_last_unmap() when all the mappings are gone.
3402 */
3403 pager_prot = max_protection;
3404 if (copy) {
3405 /*
3406 * Copy-On-Write mapping: won't modify the
3407 * memory object.
3408 */
3409 pager_prot &= ~VM_PROT_WRITE;
3410 }
3411 vm_object_lock(object);
3412 pager = object->pager;
3413 if (object->named &&
3414 pager != MEMORY_OBJECT_NULL &&
3415 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3416 assert(object->pager_ready);
3417 vm_object_mapping_wait(object, THREAD_UNINT);
3418 vm_object_mapping_begin(object);
3419 vm_object_unlock(object);
3420
3421 kr = memory_object_map(pager, pager_prot);
3422 assert(kr == KERN_SUCCESS);
3423
3424 vm_object_lock(object);
3425 vm_object_mapping_end(object);
3426 }
3427 vm_object_unlock(object);
3428 }
3429
2d21ac55
A
3430 /*
3431 * Perform the copy if requested
3432 */
3433
3434 if (copy) {
3435 vm_object_t new_object;
3436 vm_object_offset_t new_offset;
3437
3e170ce0
A
3438 result = vm_object_copy_strategically(object, offset,
3439 map_size,
2d21ac55
A
3440 &new_object, &new_offset,
3441 &copy);
3442
3443
3444 if (result == KERN_MEMORY_RESTART_COPY) {
3445 boolean_t success;
3446 boolean_t src_needs_copy;
3447
3448 /*
3449 * XXX
3450 * We currently ignore src_needs_copy.
3451 * This really is the issue of how to make
3452 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3453 * non-kernel users to use. Solution forthcoming.
3454 * In the meantime, since we don't allow non-kernel
3455 * memory managers to specify symmetric copy,
3456 * we won't run into problems here.
3457 */
3458 new_object = object;
3459 new_offset = offset;
3460 success = vm_object_copy_quickly(&new_object,
3e170ce0
A
3461 new_offset,
3462 map_size,
2d21ac55
A
3463 &src_needs_copy,
3464 &copy);
3465 assert(success);
3466 result = KERN_SUCCESS;
3467 }
3468 /*
3469 * Throw away the reference to the
3470 * original object, as it won't be mapped.
3471 */
3472
3473 vm_object_deallocate(object);
3474
3e170ce0 3475 if (result != KERN_SUCCESS) {
2d21ac55 3476 return result;
3e170ce0 3477 }
2d21ac55
A
3478
3479 object = new_object;
3480 offset = new_offset;
3481 }
3482
fe8ab488
A
3483 /*
3484 * If users want to try to prefault pages, the mapping and prefault
3485 * needs to be atomic.
3486 */
3487 if (try_prefault)
3488 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3e170ce0
A
3489
3490 {
3491 result = vm_map_enter(target_map,
3492 &map_addr, map_size,
3493 (vm_map_offset_t)mask,
3494 flags,
3495 object, offset,
3496 copy,
3497 cur_protection, max_protection,
3498 inheritance);
3499 }
2d21ac55
A
3500 if (result != KERN_SUCCESS)
3501 vm_object_deallocate(object);
39236c6e 3502
fe8ab488
A
3503 /*
3504 * Try to prefault, and do not forget to release the vm map lock.
3505 */
3506 if (result == KERN_SUCCESS && try_prefault) {
3507 mach_vm_address_t va = map_addr;
3508 kern_return_t kr = KERN_SUCCESS;
3509 unsigned int i = 0;
39037602
A
3510 int pmap_options;
3511
3512 pmap_options = PMAP_OPTIONS_NOWAIT;
3513 if (object->internal) {
3514 pmap_options |= PMAP_OPTIONS_INTERNAL;
3515 }
fe8ab488
A
3516
3517 for (i = 0; i < page_list_count; ++i) {
3518 if (UPL_VALID_PAGE(page_list, i)) {
3519 /*
3520 * If this function call failed, we should stop
3521 * trying to optimize, other calls are likely
3522 * going to fail too.
3523 *
3524 * We are not gonna report an error for such
3525 * failure though. That's an optimization, not
3526 * something critical.
3527 */
3528 kr = pmap_enter_options(target_map->pmap,
3529 va, UPL_PHYS_PAGE(page_list, i),
3530 cur_protection, VM_PROT_NONE,
39037602 3531 0, TRUE, pmap_options, NULL);
fe8ab488
A
3532 if (kr != KERN_SUCCESS) {
3533 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3e170ce0 3534 break;
fe8ab488
A
3535 }
3536 OSIncrementAtomic64(&vm_prefault_nb_pages);
3537 }
3538
3539 /* Next virtual address */
3540 va += PAGE_SIZE;
3541 }
fe8ab488
A
3542 vm_map_unlock(target_map);
3543 }
3544
3e170ce0
A
3545 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3546 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3547 *address = map_addr + offset_in_mapping;
3548 } else {
3549 *address = map_addr;
3550 }
2d21ac55
A
3551 return result;
3552}
3553
fe8ab488
A
3554kern_return_t
3555vm_map_enter_mem_object(
3556 vm_map_t target_map,
3557 vm_map_offset_t *address,
3558 vm_map_size_t initial_size,
3559 vm_map_offset_t mask,
3560 int flags,
3561 ipc_port_t port,
3562 vm_object_offset_t offset,
3563 boolean_t copy,
3564 vm_prot_t cur_protection,
3565 vm_prot_t max_protection,
3566 vm_inherit_t inheritance)
3567{
3568 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3569 port, offset, copy, cur_protection, max_protection,
3570 inheritance, NULL, 0);
3571}
b0d623f7 3572
fe8ab488
A
3573kern_return_t
3574vm_map_enter_mem_object_prefault(
3575 vm_map_t target_map,
3576 vm_map_offset_t *address,
3577 vm_map_size_t initial_size,
3578 vm_map_offset_t mask,
3579 int flags,
3580 ipc_port_t port,
3581 vm_object_offset_t offset,
3582 vm_prot_t cur_protection,
3583 vm_prot_t max_protection,
3584 upl_page_list_ptr_t page_list,
3585 unsigned int page_list_count)
3586{
3587 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3588 port, offset, FALSE, cur_protection, max_protection,
3589 VM_INHERIT_DEFAULT, page_list, page_list_count);
3590}
b0d623f7
A
3591
3592
3593kern_return_t
3594vm_map_enter_mem_object_control(
3595 vm_map_t target_map,
3596 vm_map_offset_t *address,
3597 vm_map_size_t initial_size,
3598 vm_map_offset_t mask,
3599 int flags,
3600 memory_object_control_t control,
3601 vm_object_offset_t offset,
3602 boolean_t copy,
3603 vm_prot_t cur_protection,
3604 vm_prot_t max_protection,
3605 vm_inherit_t inheritance)
3606{
3607 vm_map_address_t map_addr;
3608 vm_map_size_t map_size;
3609 vm_object_t object;
3610 vm_object_size_t size;
3611 kern_return_t result;
3612 memory_object_t pager;
3613 vm_prot_t pager_prot;
3614 kern_return_t kr;
3615
3616 /*
3617 * Check arguments for validity
3618 */
3619 if ((target_map == VM_MAP_NULL) ||
3620 (cur_protection & ~VM_PROT_ALL) ||
3621 (max_protection & ~VM_PROT_ALL) ||
3622 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 3623 initial_size == 0) {
b0d623f7 3624 return KERN_INVALID_ARGUMENT;
3e170ce0 3625 }
b0d623f7 3626
3e170ce0
A
3627 {
3628 map_addr = vm_map_trunc_page(*address,
3629 VM_MAP_PAGE_MASK(target_map));
3630 map_size = vm_map_round_page(initial_size,
3631 VM_MAP_PAGE_MASK(target_map));
3632 }
3633 size = vm_object_round_page(initial_size);
b0d623f7
A
3634
3635 object = memory_object_control_to_vm_object(control);
3636
3637 if (object == VM_OBJECT_NULL)
3638 return KERN_INVALID_OBJECT;
3639
3640 if (object == kernel_object) {
3641 printf("Warning: Attempt to map kernel object"
3642 " by a non-private kernel entity\n");
3643 return KERN_INVALID_OBJECT;
3644 }
3645
3646 vm_object_lock(object);
3647 object->ref_count++;
3648 vm_object_res_reference(object);
3649
3650 /*
3651 * For "named" VM objects, let the pager know that the
3652 * memory object is being mapped. Some pagers need to keep
3653 * track of this, to know when they can reclaim the memory
3654 * object, for example.
3655 * VM calls memory_object_map() for each mapping (specifying
3656 * the protection of each mapping) and calls
3657 * memory_object_last_unmap() when all the mappings are gone.
3658 */
3659 pager_prot = max_protection;
3660 if (copy) {
3661 pager_prot &= ~VM_PROT_WRITE;
3662 }
3663 pager = object->pager;
3664 if (object->named &&
3665 pager != MEMORY_OBJECT_NULL &&
3666 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3667 assert(object->pager_ready);
3668 vm_object_mapping_wait(object, THREAD_UNINT);
3669 vm_object_mapping_begin(object);
3670 vm_object_unlock(object);
3671
3672 kr = memory_object_map(pager, pager_prot);
3673 assert(kr == KERN_SUCCESS);
3674
3675 vm_object_lock(object);
3676 vm_object_mapping_end(object);
3677 }
3678 vm_object_unlock(object);
3679
3680 /*
3681 * Perform the copy if requested
3682 */
3683
3684 if (copy) {
3685 vm_object_t new_object;
3686 vm_object_offset_t new_offset;
3687
3688 result = vm_object_copy_strategically(object, offset, size,
3689 &new_object, &new_offset,
3690 &copy);
3691
3692
3693 if (result == KERN_MEMORY_RESTART_COPY) {
3694 boolean_t success;
3695 boolean_t src_needs_copy;
3696
3697 /*
3698 * XXX
3699 * We currently ignore src_needs_copy.
3700 * This really is the issue of how to make
3701 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3702 * non-kernel users to use. Solution forthcoming.
3703 * In the meantime, since we don't allow non-kernel
3704 * memory managers to specify symmetric copy,
3705 * we won't run into problems here.
3706 */
3707 new_object = object;
3708 new_offset = offset;
3709 success = vm_object_copy_quickly(&new_object,
3710 new_offset, size,
3711 &src_needs_copy,
3712 &copy);
3713 assert(success);
3714 result = KERN_SUCCESS;
3715 }
3716 /*
3717 * Throw away the reference to the
3718 * original object, as it won't be mapped.
3719 */
3720
3721 vm_object_deallocate(object);
3722
3e170ce0 3723 if (result != KERN_SUCCESS) {
b0d623f7 3724 return result;
3e170ce0 3725 }
b0d623f7
A
3726
3727 object = new_object;
3728 offset = new_offset;
3729 }
3730
3e170ce0
A
3731 {
3732 result = vm_map_enter(target_map,
3733 &map_addr, map_size,
3734 (vm_map_offset_t)mask,
3735 flags,
3736 object, offset,
3737 copy,
3738 cur_protection, max_protection,
3739 inheritance);
3740 }
b0d623f7
A
3741 if (result != KERN_SUCCESS)
3742 vm_object_deallocate(object);
3743 *address = map_addr;
3744
3745 return result;
3746}
3747
3748
2d21ac55
A
3749#if VM_CPM
3750
3751#ifdef MACH_ASSERT
3752extern pmap_paddr_t avail_start, avail_end;
3753#endif
3754
3755/*
3756 * Allocate memory in the specified map, with the caveat that
3757 * the memory is physically contiguous. This call may fail
3758 * if the system can't find sufficient contiguous memory.
3759 * This call may cause or lead to heart-stopping amounts of
3760 * paging activity.
3761 *
3762 * Memory obtained from this call should be freed in the
3763 * normal way, viz., via vm_deallocate.
3764 */
3765kern_return_t
3766vm_map_enter_cpm(
3767 vm_map_t map,
3768 vm_map_offset_t *addr,
3769 vm_map_size_t size,
3770 int flags)
3771{
3772 vm_object_t cpm_obj;
3773 pmap_t pmap;
3774 vm_page_t m, pages;
3775 kern_return_t kr;
3776 vm_map_offset_t va, start, end, offset;
3777#if MACH_ASSERT
316670eb 3778 vm_map_offset_t prev_addr = 0;
2d21ac55
A
3779#endif /* MACH_ASSERT */
3780
3781 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
3782 vm_tag_t tag;
3783
3784 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 3785
2d21ac55
A
3786 if (size == 0) {
3787 *addr = 0;
3788 return KERN_SUCCESS;
3789 }
3790 if (anywhere)
3791 *addr = vm_map_min(map);
3792 else
39236c6e
A
3793 *addr = vm_map_trunc_page(*addr,
3794 VM_MAP_PAGE_MASK(map));
3795 size = vm_map_round_page(size,
3796 VM_MAP_PAGE_MASK(map));
2d21ac55
A
3797
3798 /*
3799 * LP64todo - cpm_allocate should probably allow
3800 * allocations of >4GB, but not with the current
3801 * algorithm, so just cast down the size for now.
3802 */
3803 if (size > VM_MAX_ADDRESS)
3804 return KERN_RESOURCE_SHORTAGE;
3805 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 3806 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
3807 return kr;
3808
3809 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3810 assert(cpm_obj != VM_OBJECT_NULL);
3811 assert(cpm_obj->internal);
316670eb 3812 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
3813 assert(cpm_obj->can_persist == FALSE);
3814 assert(cpm_obj->pager_created == FALSE);
3815 assert(cpm_obj->pageout == FALSE);
3816 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
3817
3818 /*
3819 * Insert pages into object.
3820 */
3821
3822 vm_object_lock(cpm_obj);
3823 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3824 m = pages;
3825 pages = NEXT_PAGE(m);
0c530ab8 3826 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
3827
3828 assert(!m->gobbled);
3829 assert(!m->wanted);
3830 assert(!m->pageout);
3831 assert(!m->tabled);
b0d623f7 3832 assert(VM_PAGE_WIRED(m));
91447636
A
3833 /*
3834 * ENCRYPTED SWAP:
3835 * "m" is not supposed to be pageable, so it
3836 * should not be encrypted. It wouldn't be safe
3837 * to enter it in a new VM object while encrypted.
3838 */
3839 ASSERT_PAGE_DECRYPTED(m);
3840 assert(m->busy);
39037602 3841 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
91447636
A
3842
3843 m->busy = FALSE;
3844 vm_page_insert(m, cpm_obj, offset);
3845 }
3846 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3847 vm_object_unlock(cpm_obj);
3848
3849 /*
3850 * Hang onto a reference on the object in case a
3851 * multi-threaded application for some reason decides
3852 * to deallocate the portion of the address space into
3853 * which we will insert this object.
3854 *
3855 * Unfortunately, we must insert the object now before
3856 * we can talk to the pmap module about which addresses
3857 * must be wired down. Hence, the race with a multi-
3858 * threaded app.
3859 */
3860 vm_object_reference(cpm_obj);
3861
3862 /*
3863 * Insert object into map.
3864 */
3865
3866 kr = vm_map_enter(
2d21ac55
A
3867 map,
3868 addr,
3869 size,
3870 (vm_map_offset_t)0,
3871 flags,
3872 cpm_obj,
3873 (vm_object_offset_t)0,
3874 FALSE,
3875 VM_PROT_ALL,
3876 VM_PROT_ALL,
3877 VM_INHERIT_DEFAULT);
91447636
A
3878
3879 if (kr != KERN_SUCCESS) {
3880 /*
3881 * A CPM object doesn't have can_persist set,
3882 * so all we have to do is deallocate it to
3883 * free up these pages.
3884 */
3885 assert(cpm_obj->pager_created == FALSE);
3886 assert(cpm_obj->can_persist == FALSE);
3887 assert(cpm_obj->pageout == FALSE);
3888 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3889 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3890 vm_object_deallocate(cpm_obj); /* kill creation ref */
3891 }
3892
3893 /*
3894 * Inform the physical mapping system that the
3895 * range of addresses may not fault, so that
3896 * page tables and such can be locked down as well.
3897 */
3898 start = *addr;
3899 end = start + size;
3900 pmap = vm_map_pmap(map);
3901 pmap_pageable(pmap, start, end, FALSE);
3902
3903 /*
3904 * Enter each page into the pmap, to avoid faults.
3905 * Note that this loop could be coded more efficiently,
3906 * if the need arose, rather than looking up each page
3907 * again.
3908 */
3909 for (offset = 0, va = start; offset < size;
3910 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
3911 int type_of_fault;
3912
91447636
A
3913 vm_object_lock(cpm_obj);
3914 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 3915 assert(m != VM_PAGE_NULL);
2d21ac55
A
3916
3917 vm_page_zero_fill(m);
3918
3919 type_of_fault = DBG_ZERO_FILL_FAULT;
3920
6d2010ae 3921 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
fe8ab488 3922 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
2d21ac55
A
3923 &type_of_fault);
3924
3925 vm_object_unlock(cpm_obj);
91447636
A
3926 }
3927
3928#if MACH_ASSERT
3929 /*
3930 * Verify ordering in address space.
3931 */
3932 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3933 vm_object_lock(cpm_obj);
3934 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3935 vm_object_unlock(cpm_obj);
3936 if (m == VM_PAGE_NULL)
316670eb
A
3937 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3938 cpm_obj, (uint64_t)offset);
91447636
A
3939 assert(m->tabled);
3940 assert(!m->busy);
3941 assert(!m->wanted);
3942 assert(!m->fictitious);
3943 assert(!m->private);
3944 assert(!m->absent);
3945 assert(!m->error);
3946 assert(!m->cleaning);
316670eb 3947 assert(!m->laundry);
91447636
A
3948 assert(!m->precious);
3949 assert(!m->clustered);
3950 if (offset != 0) {
39037602 3951 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb
A
3952 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3953 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3954 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3955 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
3956 panic("vm_allocate_cpm: pages not contig!");
3957 }
3958 }
39037602 3959 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636
A
3960 }
3961#endif /* MACH_ASSERT */
3962
3963 vm_object_deallocate(cpm_obj); /* kill extra ref */
3964
3965 return kr;
3966}
3967
3968
3969#else /* VM_CPM */
3970
3971/*
3972 * Interface is defined in all cases, but unless the kernel
3973 * is built explicitly for this option, the interface does
3974 * nothing.
3975 */
3976
3977kern_return_t
3978vm_map_enter_cpm(
3979 __unused vm_map_t map,
3980 __unused vm_map_offset_t *addr,
3981 __unused vm_map_size_t size,
3982 __unused int flags)
3983{
3984 return KERN_FAILURE;
3985}
3986#endif /* VM_CPM */
3987
b0d623f7
A
3988/* Not used without nested pmaps */
3989#ifndef NO_NESTED_PMAP
2d21ac55
A
3990/*
3991 * Clip and unnest a portion of a nested submap mapping.
3992 */
b0d623f7
A
3993
3994
2d21ac55
A
3995static void
3996vm_map_clip_unnest(
3997 vm_map_t map,
3998 vm_map_entry_t entry,
3999 vm_map_offset_t start_unnest,
4000 vm_map_offset_t end_unnest)
4001{
b0d623f7
A
4002 vm_map_offset_t old_start_unnest = start_unnest;
4003 vm_map_offset_t old_end_unnest = end_unnest;
4004
2d21ac55 4005 assert(entry->is_sub_map);
3e170ce0 4006 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 4007 assert(entry->use_pmap);
2d21ac55 4008
b0d623f7
A
4009 /*
4010 * Query the platform for the optimal unnest range.
4011 * DRK: There's some duplication of effort here, since
4012 * callers may have adjusted the range to some extent. This
4013 * routine was introduced to support 1GiB subtree nesting
4014 * for x86 platforms, which can also nest on 2MiB boundaries
4015 * depending on size/alignment.
4016 */
4017 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
4018 assert(VME_SUBMAP(entry)->is_nested_map);
4019 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4020 log_unnest_badness(map,
4021 old_start_unnest,
4022 old_end_unnest,
4023 VME_SUBMAP(entry)->is_nested_map,
4024 (entry->vme_start +
4025 VME_SUBMAP(entry)->lowest_unnestable_start -
4026 VME_OFFSET(entry)));
b0d623f7
A
4027 }
4028
2d21ac55
A
4029 if (entry->vme_start > start_unnest ||
4030 entry->vme_end < end_unnest) {
4031 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4032 "bad nested entry: start=0x%llx end=0x%llx\n",
4033 (long long)start_unnest, (long long)end_unnest,
4034 (long long)entry->vme_start, (long long)entry->vme_end);
4035 }
b0d623f7 4036
2d21ac55
A
4037 if (start_unnest > entry->vme_start) {
4038 _vm_map_clip_start(&map->hdr,
4039 entry,
4040 start_unnest);
3e170ce0
A
4041 if (map->holelistenabled) {
4042 vm_map_store_update_first_free(map, NULL, FALSE);
4043 } else {
4044 vm_map_store_update_first_free(map, map->first_free, FALSE);
4045 }
2d21ac55
A
4046 }
4047 if (entry->vme_end > end_unnest) {
4048 _vm_map_clip_end(&map->hdr,
4049 entry,
4050 end_unnest);
3e170ce0
A
4051 if (map->holelistenabled) {
4052 vm_map_store_update_first_free(map, NULL, FALSE);
4053 } else {
4054 vm_map_store_update_first_free(map, map->first_free, FALSE);
4055 }
2d21ac55
A
4056 }
4057
4058 pmap_unnest(map->pmap,
4059 entry->vme_start,
4060 entry->vme_end - entry->vme_start);
316670eb 4061 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
4062 /* clean up parent map/maps */
4063 vm_map_submap_pmap_clean(
4064 map, entry->vme_start,
4065 entry->vme_end,
3e170ce0
A
4066 VME_SUBMAP(entry),
4067 VME_OFFSET(entry));
2d21ac55
A
4068 }
4069 entry->use_pmap = FALSE;
3e170ce0
A
4070 if ((map->pmap != kernel_pmap) &&
4071 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4072 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 4073 }
2d21ac55 4074}
b0d623f7 4075#endif /* NO_NESTED_PMAP */
2d21ac55 4076
1c79356b
A
4077/*
4078 * vm_map_clip_start: [ internal use only ]
4079 *
4080 * Asserts that the given entry begins at or after
4081 * the specified address; if necessary,
4082 * it splits the entry into two.
4083 */
e2d2fc5c 4084void
2d21ac55
A
4085vm_map_clip_start(
4086 vm_map_t map,
4087 vm_map_entry_t entry,
4088 vm_map_offset_t startaddr)
4089{
0c530ab8 4090#ifndef NO_NESTED_PMAP
fe8ab488
A
4091 if (entry->is_sub_map &&
4092 entry->use_pmap &&
2d21ac55
A
4093 startaddr >= entry->vme_start) {
4094 vm_map_offset_t start_unnest, end_unnest;
4095
4096 /*
4097 * Make sure "startaddr" is no longer in a nested range
4098 * before we clip. Unnest only the minimum range the platform
4099 * can handle.
b0d623f7
A
4100 * vm_map_clip_unnest may perform additional adjustments to
4101 * the unnest range.
2d21ac55
A
4102 */
4103 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4104 end_unnest = start_unnest + pmap_nesting_size_min;
4105 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4106 }
4107#endif /* NO_NESTED_PMAP */
4108 if (startaddr > entry->vme_start) {
3e170ce0 4109 if (VME_OBJECT(entry) &&
2d21ac55 4110 !entry->is_sub_map &&
3e170ce0 4111 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4112 pmap_remove(map->pmap,
4113 (addr64_t)(entry->vme_start),
4114 (addr64_t)(entry->vme_end));
4115 }
39037602
A
4116 if (entry->vme_atomic) {
4117 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4118 }
2d21ac55 4119 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
4120 if (map->holelistenabled) {
4121 vm_map_store_update_first_free(map, NULL, FALSE);
4122 } else {
4123 vm_map_store_update_first_free(map, map->first_free, FALSE);
4124 }
2d21ac55
A
4125 }
4126}
4127
1c79356b
A
4128
4129#define vm_map_copy_clip_start(copy, entry, startaddr) \
4130 MACRO_BEGIN \
4131 if ((startaddr) > (entry)->vme_start) \
4132 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4133 MACRO_END
4134
4135/*
4136 * This routine is called only when it is known that
4137 * the entry must be split.
4138 */
91447636 4139static void
1c79356b 4140_vm_map_clip_start(
39037602
A
4141 struct vm_map_header *map_header,
4142 vm_map_entry_t entry,
4143 vm_map_offset_t start)
1c79356b 4144{
39037602 4145 vm_map_entry_t new_entry;
1c79356b
A
4146
4147 /*
4148 * Split off the front portion --
4149 * note that we must insert the new
4150 * entry BEFORE this one, so that
4151 * this entry has the specified starting
4152 * address.
4153 */
4154
fe8ab488
A
4155 if (entry->map_aligned) {
4156 assert(VM_MAP_PAGE_ALIGNED(start,
4157 VM_MAP_HDR_PAGE_MASK(map_header)));
4158 }
4159
7ddcb079 4160 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4161 vm_map_entry_copy_full(new_entry, entry);
4162
4163 new_entry->vme_end = start;
e2d2fc5c 4164 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 4165 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 4166 assert(start < entry->vme_end);
1c79356b
A
4167 entry->vme_start = start;
4168
6d2010ae 4169 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
4170
4171 if (entry->is_sub_map)
3e170ce0 4172 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4173 else
3e170ce0 4174 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4175}
4176
4177
4178/*
4179 * vm_map_clip_end: [ internal use only ]
4180 *
4181 * Asserts that the given entry ends at or before
4182 * the specified address; if necessary,
4183 * it splits the entry into two.
4184 */
e2d2fc5c 4185void
2d21ac55
A
4186vm_map_clip_end(
4187 vm_map_t map,
4188 vm_map_entry_t entry,
4189 vm_map_offset_t endaddr)
4190{
4191 if (endaddr > entry->vme_end) {
4192 /*
4193 * Within the scope of this clipping, limit "endaddr" to
4194 * the end of this map entry...
4195 */
4196 endaddr = entry->vme_end;
4197 }
4198#ifndef NO_NESTED_PMAP
fe8ab488 4199 if (entry->is_sub_map && entry->use_pmap) {
2d21ac55
A
4200 vm_map_offset_t start_unnest, end_unnest;
4201
4202 /*
4203 * Make sure the range between the start of this entry and
4204 * the new "endaddr" is no longer nested before we clip.
4205 * Unnest only the minimum range the platform can handle.
b0d623f7
A
4206 * vm_map_clip_unnest may perform additional adjustments to
4207 * the unnest range.
2d21ac55
A
4208 */
4209 start_unnest = entry->vme_start;
4210 end_unnest =
4211 (endaddr + pmap_nesting_size_min - 1) &
4212 ~(pmap_nesting_size_min - 1);
4213 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4214 }
4215#endif /* NO_NESTED_PMAP */
4216 if (endaddr < entry->vme_end) {
3e170ce0 4217 if (VME_OBJECT(entry) &&
2d21ac55 4218 !entry->is_sub_map &&
3e170ce0 4219 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4220 pmap_remove(map->pmap,
4221 (addr64_t)(entry->vme_start),
4222 (addr64_t)(entry->vme_end));
4223 }
39037602
A
4224 if (entry->vme_atomic) {
4225 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4226 }
2d21ac55 4227 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
4228 if (map->holelistenabled) {
4229 vm_map_store_update_first_free(map, NULL, FALSE);
4230 } else {
4231 vm_map_store_update_first_free(map, map->first_free, FALSE);
4232 }
2d21ac55
A
4233 }
4234}
0c530ab8 4235
1c79356b
A
4236
4237#define vm_map_copy_clip_end(copy, entry, endaddr) \
4238 MACRO_BEGIN \
4239 if ((endaddr) < (entry)->vme_end) \
4240 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4241 MACRO_END
4242
4243/*
4244 * This routine is called only when it is known that
4245 * the entry must be split.
4246 */
91447636 4247static void
1c79356b 4248_vm_map_clip_end(
39037602
A
4249 struct vm_map_header *map_header,
4250 vm_map_entry_t entry,
4251 vm_map_offset_t end)
1c79356b 4252{
39037602 4253 vm_map_entry_t new_entry;
1c79356b
A
4254
4255 /*
4256 * Create a new entry and insert it
4257 * AFTER the specified entry
4258 */
4259
fe8ab488
A
4260 if (entry->map_aligned) {
4261 assert(VM_MAP_PAGE_ALIGNED(end,
4262 VM_MAP_HDR_PAGE_MASK(map_header)));
4263 }
4264
7ddcb079 4265 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4266 vm_map_entry_copy_full(new_entry, entry);
4267
e2d2fc5c 4268 assert(entry->vme_start < end);
1c79356b 4269 new_entry->vme_start = entry->vme_end = end;
3e170ce0
A
4270 VME_OFFSET_SET(new_entry,
4271 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 4272 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 4273
6d2010ae 4274 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
4275
4276 if (entry->is_sub_map)
3e170ce0 4277 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4278 else
3e170ce0 4279 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4280}
4281
4282
4283/*
4284 * VM_MAP_RANGE_CHECK: [ internal use only ]
4285 *
4286 * Asserts that the starting and ending region
4287 * addresses fall within the valid range of the map.
4288 */
2d21ac55
A
4289#define VM_MAP_RANGE_CHECK(map, start, end) \
4290 MACRO_BEGIN \
4291 if (start < vm_map_min(map)) \
4292 start = vm_map_min(map); \
4293 if (end > vm_map_max(map)) \
4294 end = vm_map_max(map); \
4295 if (start > end) \
4296 start = end; \
4297 MACRO_END
1c79356b
A
4298
4299/*
4300 * vm_map_range_check: [ internal use only ]
4301 *
4302 * Check that the region defined by the specified start and
4303 * end addresses are wholly contained within a single map
4304 * entry or set of adjacent map entries of the spacified map,
4305 * i.e. the specified region contains no unmapped space.
4306 * If any or all of the region is unmapped, FALSE is returned.
4307 * Otherwise, TRUE is returned and if the output argument 'entry'
4308 * is not NULL it points to the map entry containing the start
4309 * of the region.
4310 *
4311 * The map is locked for reading on entry and is left locked.
4312 */
91447636 4313static boolean_t
1c79356b 4314vm_map_range_check(
39037602
A
4315 vm_map_t map,
4316 vm_map_offset_t start,
4317 vm_map_offset_t end,
1c79356b
A
4318 vm_map_entry_t *entry)
4319{
4320 vm_map_entry_t cur;
39037602 4321 vm_map_offset_t prev;
1c79356b
A
4322
4323 /*
4324 * Basic sanity checks first
4325 */
4326 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4327 return (FALSE);
4328
4329 /*
4330 * Check first if the region starts within a valid
4331 * mapping for the map.
4332 */
4333 if (!vm_map_lookup_entry(map, start, &cur))
4334 return (FALSE);
4335
4336 /*
4337 * Optimize for the case that the region is contained
4338 * in a single map entry.
4339 */
4340 if (entry != (vm_map_entry_t *) NULL)
4341 *entry = cur;
4342 if (end <= cur->vme_end)
4343 return (TRUE);
4344
4345 /*
4346 * If the region is not wholly contained within a
4347 * single entry, walk the entries looking for holes.
4348 */
4349 prev = cur->vme_end;
4350 cur = cur->vme_next;
4351 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4352 if (end <= cur->vme_end)
4353 return (TRUE);
4354 prev = cur->vme_end;
4355 cur = cur->vme_next;
4356 }
4357 return (FALSE);
4358}
4359
4360/*
4361 * vm_map_submap: [ kernel use only ]
4362 *
4363 * Mark the given range as handled by a subordinate map.
4364 *
4365 * This range must have been created with vm_map_find using
4366 * the vm_submap_object, and no other operations may have been
4367 * performed on this range prior to calling vm_map_submap.
4368 *
4369 * Only a limited number of operations can be performed
4370 * within this rage after calling vm_map_submap:
4371 * vm_fault
4372 * [Don't try vm_map_copyin!]
4373 *
4374 * To remove a submapping, one must first remove the
4375 * range from the superior map, and then destroy the
4376 * submap (if desired). [Better yet, don't try it.]
4377 */
4378kern_return_t
4379vm_map_submap(
fe8ab488 4380 vm_map_t map,
91447636
A
4381 vm_map_offset_t start,
4382 vm_map_offset_t end,
fe8ab488 4383 vm_map_t submap,
91447636 4384 vm_map_offset_t offset,
0c530ab8 4385#ifdef NO_NESTED_PMAP
91447636 4386 __unused
0c530ab8 4387#endif /* NO_NESTED_PMAP */
fe8ab488 4388 boolean_t use_pmap)
1c79356b
A
4389{
4390 vm_map_entry_t entry;
39037602
A
4391 kern_return_t result = KERN_INVALID_ARGUMENT;
4392 vm_object_t object;
1c79356b
A
4393
4394 vm_map_lock(map);
4395
2d21ac55 4396 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 4397 entry = entry->vme_next;
2d21ac55 4398 }
1c79356b 4399
2d21ac55
A
4400 if (entry == vm_map_to_entry(map) ||
4401 entry->is_sub_map) {
1c79356b
A
4402 vm_map_unlock(map);
4403 return KERN_INVALID_ARGUMENT;
4404 }
4405
2d21ac55 4406 vm_map_clip_start(map, entry, start);
1c79356b
A
4407 vm_map_clip_end(map, entry, end);
4408
4409 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4410 (!entry->is_sub_map) &&
3e170ce0 4411 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
4412 (object->resident_page_count == 0) &&
4413 (object->copy == VM_OBJECT_NULL) &&
4414 (object->shadow == VM_OBJECT_NULL) &&
4415 (!object->pager_created)) {
3e170ce0
A
4416 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4417 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
4418 vm_object_deallocate(object);
4419 entry->is_sub_map = TRUE;
fe8ab488 4420 entry->use_pmap = FALSE;
3e170ce0 4421 VME_SUBMAP_SET(entry, submap);
2d21ac55 4422 vm_map_reference(submap);
316670eb
A
4423 if (submap->mapped_in_other_pmaps == FALSE &&
4424 vm_map_pmap(submap) != PMAP_NULL &&
4425 vm_map_pmap(submap) != vm_map_pmap(map)) {
4426 /*
4427 * This submap is being mapped in a map
4428 * that uses a different pmap.
4429 * Set its "mapped_in_other_pmaps" flag
4430 * to indicate that we now need to
4431 * remove mappings from all pmaps rather
4432 * than just the submap's pmap.
4433 */
4434 submap->mapped_in_other_pmaps = TRUE;
4435 }
2d21ac55 4436
0c530ab8 4437#ifndef NO_NESTED_PMAP
2d21ac55
A
4438 if (use_pmap) {
4439 /* nest if platform code will allow */
4440 if(submap->pmap == NULL) {
316670eb
A
4441 ledger_t ledger = map->pmap->ledger;
4442 submap->pmap = pmap_create(ledger,
4443 (vm_map_size_t) 0, FALSE);
2d21ac55
A
4444 if(submap->pmap == PMAP_NULL) {
4445 vm_map_unlock(map);
4446 return(KERN_NO_SPACE);
55e303ae 4447 }
55e303ae 4448 }
2d21ac55 4449 result = pmap_nest(map->pmap,
3e170ce0 4450 (VME_SUBMAP(entry))->pmap,
2d21ac55
A
4451 (addr64_t)start,
4452 (addr64_t)start,
4453 (uint64_t)(end - start));
4454 if(result)
4455 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4456 entry->use_pmap = TRUE;
4457 }
0c530ab8 4458#else /* NO_NESTED_PMAP */
2d21ac55 4459 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 4460#endif /* NO_NESTED_PMAP */
2d21ac55 4461 result = KERN_SUCCESS;
1c79356b
A
4462 }
4463 vm_map_unlock(map);
4464
4465 return(result);
4466}
4467
490019cf 4468
1c79356b
A
4469/*
4470 * vm_map_protect:
4471 *
4472 * Sets the protection of the specified address
4473 * region in the target map. If "set_max" is
4474 * specified, the maximum protection is to be set;
4475 * otherwise, only the current protection is affected.
4476 */
4477kern_return_t
4478vm_map_protect(
39037602
A
4479 vm_map_t map,
4480 vm_map_offset_t start,
4481 vm_map_offset_t end,
4482 vm_prot_t new_prot,
4483 boolean_t set_max)
4484{
4485 vm_map_entry_t current;
4486 vm_map_offset_t prev;
1c79356b
A
4487 vm_map_entry_t entry;
4488 vm_prot_t new_max;
1c79356b
A
4489
4490 XPR(XPR_VM_MAP,
2d21ac55 4491 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 4492 map, start, end, new_prot, set_max);
1c79356b
A
4493
4494 vm_map_lock(map);
4495
91447636
A
4496 /* LP64todo - remove this check when vm_map_commpage64()
4497 * no longer has to stuff in a map_entry for the commpage
4498 * above the map's max_offset.
4499 */
4500 if (start >= map->max_offset) {
4501 vm_map_unlock(map);
4502 return(KERN_INVALID_ADDRESS);
4503 }
4504
b0d623f7
A
4505 while(1) {
4506 /*
4507 * Lookup the entry. If it doesn't start in a valid
4508 * entry, return an error.
4509 */
4510 if (! vm_map_lookup_entry(map, start, &entry)) {
4511 vm_map_unlock(map);
4512 return(KERN_INVALID_ADDRESS);
4513 }
4514
4515 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4516 start = SUPERPAGE_ROUND_DOWN(start);
4517 continue;
4518 }
4519 break;
4520 }
4521 if (entry->superpage_size)
4522 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
4523
4524 /*
4525 * Make a first pass to check for protection and address
4526 * violations.
4527 */
4528
4529 current = entry;
4530 prev = current->vme_start;
4531 while ((current != vm_map_to_entry(map)) &&
4532 (current->vme_start < end)) {
4533
4534 /*
4535 * If there is a hole, return an error.
4536 */
4537 if (current->vme_start != prev) {
4538 vm_map_unlock(map);
4539 return(KERN_INVALID_ADDRESS);
4540 }
4541
4542 new_max = current->max_protection;
4543 if(new_prot & VM_PROT_COPY) {
4544 new_max |= VM_PROT_WRITE;
4545 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4546 vm_map_unlock(map);
4547 return(KERN_PROTECTION_FAILURE);
4548 }
4549 } else {
4550 if ((new_prot & new_max) != new_prot) {
4551 vm_map_unlock(map);
4552 return(KERN_PROTECTION_FAILURE);
4553 }
4554 }
4555
593a1d5f 4556
1c79356b
A
4557 prev = current->vme_end;
4558 current = current->vme_next;
4559 }
39037602
A
4560
4561
1c79356b
A
4562 if (end > prev) {
4563 vm_map_unlock(map);
4564 return(KERN_INVALID_ADDRESS);
4565 }
4566
4567 /*
4568 * Go back and fix up protections.
4569 * Clip to start here if the range starts within
4570 * the entry.
4571 */
4572
4573 current = entry;
2d21ac55
A
4574 if (current != vm_map_to_entry(map)) {
4575 /* clip and unnest if necessary */
4576 vm_map_clip_start(map, current, start);
1c79356b 4577 }
2d21ac55 4578
1c79356b
A
4579 while ((current != vm_map_to_entry(map)) &&
4580 (current->vme_start < end)) {
4581
4582 vm_prot_t old_prot;
4583
4584 vm_map_clip_end(map, current, end);
4585
fe8ab488
A
4586 if (current->is_sub_map) {
4587 /* clipping did unnest if needed */
4588 assert(!current->use_pmap);
4589 }
2d21ac55 4590
1c79356b
A
4591 old_prot = current->protection;
4592
4593 if(new_prot & VM_PROT_COPY) {
4594 /* caller is asking specifically to copy the */
4595 /* mapped data, this implies that max protection */
4596 /* will include write. Caller must be prepared */
4597 /* for loss of shared memory communication in the */
4598 /* target area after taking this step */
6d2010ae 4599
3e170ce0
A
4600 if (current->is_sub_map == FALSE &&
4601 VME_OBJECT(current) == VM_OBJECT_NULL) {
4602 VME_OBJECT_SET(current,
4603 vm_object_allocate(
4604 (vm_map_size_t)
4605 (current->vme_end -
4606 current->vme_start)));
4607 VME_OFFSET_SET(current, 0);
fe8ab488 4608 assert(current->use_pmap);
6d2010ae 4609 }
3e170ce0 4610 assert(current->wired_count == 0);
1c79356b
A
4611 current->needs_copy = TRUE;
4612 current->max_protection |= VM_PROT_WRITE;
4613 }
4614
4615 if (set_max)
4616 current->protection =
4617 (current->max_protection =
2d21ac55
A
4618 new_prot & ~VM_PROT_COPY) &
4619 old_prot;
1c79356b
A
4620 else
4621 current->protection = new_prot & ~VM_PROT_COPY;
4622
4623 /*
4624 * Update physical map if necessary.
4625 * If the request is to turn off write protection,
4626 * we won't do it for real (in pmap). This is because
4627 * it would cause copy-on-write to fail. We've already
4628 * set, the new protection in the map, so if a
4629 * write-protect fault occurred, it will be fixed up
4630 * properly, COW or not.
4631 */
1c79356b 4632 if (current->protection != old_prot) {
1c79356b
A
4633 /* Look one level in we support nested pmaps */
4634 /* from mapped submaps which are direct entries */
4635 /* in our map */
0c530ab8 4636
2d21ac55 4637 vm_prot_t prot;
0c530ab8 4638
39037602
A
4639 prot = current->protection;
4640 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
4641 prot &= ~VM_PROT_WRITE;
4642 } else {
4643 assert(!VME_OBJECT(current)->code_signed);
4644 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4645 }
2d21ac55 4646
3e170ce0 4647 if (override_nx(map, VME_ALIAS(current)) && prot)
0c530ab8 4648 prot |= VM_PROT_EXECUTE;
2d21ac55 4649
490019cf 4650
0c530ab8 4651 if (current->is_sub_map && current->use_pmap) {
3e170ce0 4652 pmap_protect(VME_SUBMAP(current)->pmap,
2d21ac55
A
4653 current->vme_start,
4654 current->vme_end,
4655 prot);
1c79356b 4656 } else {
2d21ac55
A
4657 pmap_protect(map->pmap,
4658 current->vme_start,
4659 current->vme_end,
4660 prot);
1c79356b 4661 }
1c79356b
A
4662 }
4663 current = current->vme_next;
4664 }
4665
5353443c 4666 current = entry;
91447636
A
4667 while ((current != vm_map_to_entry(map)) &&
4668 (current->vme_start <= end)) {
5353443c
A
4669 vm_map_simplify_entry(map, current);
4670 current = current->vme_next;
4671 }
4672
1c79356b
A
4673 vm_map_unlock(map);
4674 return(KERN_SUCCESS);
4675}
4676
4677/*
4678 * vm_map_inherit:
4679 *
4680 * Sets the inheritance of the specified address
4681 * range in the target map. Inheritance
4682 * affects how the map will be shared with
4683 * child maps at the time of vm_map_fork.
4684 */
4685kern_return_t
4686vm_map_inherit(
39037602
A
4687 vm_map_t map,
4688 vm_map_offset_t start,
4689 vm_map_offset_t end,
4690 vm_inherit_t new_inheritance)
1c79356b 4691{
39037602 4692 vm_map_entry_t entry;
1c79356b
A
4693 vm_map_entry_t temp_entry;
4694
4695 vm_map_lock(map);
4696
4697 VM_MAP_RANGE_CHECK(map, start, end);
4698
4699 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4700 entry = temp_entry;
1c79356b
A
4701 }
4702 else {
4703 temp_entry = temp_entry->vme_next;
4704 entry = temp_entry;
4705 }
4706
4707 /* first check entire range for submaps which can't support the */
4708 /* given inheritance. */
4709 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4710 if(entry->is_sub_map) {
91447636
A
4711 if(new_inheritance == VM_INHERIT_COPY) {
4712 vm_map_unlock(map);
1c79356b 4713 return(KERN_INVALID_ARGUMENT);
91447636 4714 }
1c79356b
A
4715 }
4716
4717 entry = entry->vme_next;
4718 }
4719
4720 entry = temp_entry;
2d21ac55
A
4721 if (entry != vm_map_to_entry(map)) {
4722 /* clip and unnest if necessary */
4723 vm_map_clip_start(map, entry, start);
4724 }
1c79356b
A
4725
4726 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4727 vm_map_clip_end(map, entry, end);
fe8ab488
A
4728 if (entry->is_sub_map) {
4729 /* clip did unnest if needed */
4730 assert(!entry->use_pmap);
4731 }
1c79356b
A
4732
4733 entry->inheritance = new_inheritance;
4734
4735 entry = entry->vme_next;
4736 }
4737
4738 vm_map_unlock(map);
4739 return(KERN_SUCCESS);
4740}
4741
2d21ac55
A
4742/*
4743 * Update the accounting for the amount of wired memory in this map. If the user has
4744 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4745 */
4746
4747static kern_return_t
4748add_wire_counts(
4749 vm_map_t map,
4750 vm_map_entry_t entry,
4751 boolean_t user_wire)
4752{
4753 vm_map_size_t size;
4754
4755 if (user_wire) {
6d2010ae 4756 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
4757
4758 /*
4759 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4760 * this map entry.
4761 */
4762
4763 if (entry->user_wired_count == 0) {
4764 size = entry->vme_end - entry->vme_start;
4765
4766 /*
4767 * Since this is the first time the user is wiring this map entry, check to see if we're
4768 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4769 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4770 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4771 * limit, then we fail.
4772 */
4773
4774 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
4775 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4776 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
4777 return KERN_RESOURCE_SHORTAGE;
4778
4779 /*
4780 * The first time the user wires an entry, we also increment the wired_count and add this to
4781 * the total that has been wired in the map.
4782 */
4783
4784 if (entry->wired_count >= MAX_WIRE_COUNT)
4785 return KERN_FAILURE;
4786
4787 entry->wired_count++;
4788 map->user_wire_size += size;
4789 }
4790
4791 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4792 return KERN_FAILURE;
4793
4794 entry->user_wired_count++;
4795
4796 } else {
4797
4798 /*
4799 * The kernel's wiring the memory. Just bump the count and continue.
4800 */
4801
4802 if (entry->wired_count >= MAX_WIRE_COUNT)
4803 panic("vm_map_wire: too many wirings");
4804
4805 entry->wired_count++;
4806 }
4807
4808 return KERN_SUCCESS;
4809}
4810
4811/*
4812 * Update the memory wiring accounting now that the given map entry is being unwired.
4813 */
4814
4815static void
4816subtract_wire_counts(
4817 vm_map_t map,
4818 vm_map_entry_t entry,
4819 boolean_t user_wire)
4820{
4821
4822 if (user_wire) {
4823
4824 /*
4825 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4826 */
4827
4828 if (entry->user_wired_count == 1) {
4829
4830 /*
4831 * We're removing the last user wire reference. Decrement the wired_count and the total
4832 * user wired memory for this map.
4833 */
4834
4835 assert(entry->wired_count >= 1);
4836 entry->wired_count--;
4837 map->user_wire_size -= entry->vme_end - entry->vme_start;
4838 }
4839
4840 assert(entry->user_wired_count >= 1);
4841 entry->user_wired_count--;
4842
4843 } else {
4844
4845 /*
4846 * The kernel is unwiring the memory. Just update the count.
4847 */
4848
4849 assert(entry->wired_count >= 1);
4850 entry->wired_count--;
4851 }
4852}
4853
39037602 4854
1c79356b
A
4855/*
4856 * vm_map_wire:
4857 *
4858 * Sets the pageability of the specified address range in the
4859 * target map as wired. Regions specified as not pageable require
4860 * locked-down physical memory and physical page maps. The
4861 * access_type variable indicates types of accesses that must not
4862 * generate page faults. This is checked against protection of
4863 * memory being locked-down.
4864 *
4865 * The map must not be locked, but a reference must remain to the
4866 * map throughout the call.
4867 */
91447636 4868static kern_return_t
1c79356b 4869vm_map_wire_nested(
39037602
A
4870 vm_map_t map,
4871 vm_map_offset_t start,
4872 vm_map_offset_t end,
4873 vm_prot_t caller_prot,
1c79356b 4874 boolean_t user_wire,
9bccf70c 4875 pmap_t map_pmap,
fe8ab488
A
4876 vm_map_offset_t pmap_addr,
4877 ppnum_t *physpage_p)
1c79356b 4878{
39037602
A
4879 vm_map_entry_t entry;
4880 vm_prot_t access_type;
1c79356b 4881 struct vm_map_entry *first_entry, tmp_entry;
91447636 4882 vm_map_t real_map;
39037602 4883 vm_map_offset_t s,e;
1c79356b
A
4884 kern_return_t rc;
4885 boolean_t need_wakeup;
4886 boolean_t main_map = FALSE;
9bccf70c 4887 wait_interrupt_t interruptible_state;
0b4e3aa0 4888 thread_t cur_thread;
1c79356b 4889 unsigned int last_timestamp;
91447636 4890 vm_map_size_t size;
fe8ab488
A
4891 boolean_t wire_and_extract;
4892
3e170ce0
A
4893 access_type = (caller_prot & VM_PROT_ALL);
4894
fe8ab488
A
4895 wire_and_extract = FALSE;
4896 if (physpage_p != NULL) {
4897 /*
4898 * The caller wants the physical page number of the
4899 * wired page. We return only one physical page number
4900 * so this works for only one page at a time.
4901 */
4902 if ((end - start) != PAGE_SIZE) {
4903 return KERN_INVALID_ARGUMENT;
4904 }
4905 wire_and_extract = TRUE;
4906 *physpage_p = 0;
4907 }
1c79356b
A
4908
4909 vm_map_lock(map);
4910 if(map_pmap == NULL)
4911 main_map = TRUE;
4912 last_timestamp = map->timestamp;
4913
4914 VM_MAP_RANGE_CHECK(map, start, end);
4915 assert(page_aligned(start));
4916 assert(page_aligned(end));
39236c6e
A
4917 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4918 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
4919 if (start == end) {
4920 /* We wired what the caller asked for, zero pages */
4921 vm_map_unlock(map);
4922 return KERN_SUCCESS;
4923 }
1c79356b 4924
2d21ac55
A
4925 need_wakeup = FALSE;
4926 cur_thread = current_thread();
4927
4928 s = start;
4929 rc = KERN_SUCCESS;
4930
4931 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 4932 entry = first_entry;
2d21ac55
A
4933 /*
4934 * vm_map_clip_start will be done later.
4935 * We don't want to unnest any nested submaps here !
4936 */
1c79356b
A
4937 } else {
4938 /* Start address is not in map */
2d21ac55
A
4939 rc = KERN_INVALID_ADDRESS;
4940 goto done;
1c79356b
A
4941 }
4942
2d21ac55
A
4943 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4944 /*
4945 * At this point, we have wired from "start" to "s".
4946 * We still need to wire from "s" to "end".
4947 *
4948 * "entry" hasn't been clipped, so it could start before "s"
4949 * and/or end after "end".
4950 */
4951
4952 /* "e" is how far we want to wire in this entry */
4953 e = entry->vme_end;
4954 if (e > end)
4955 e = end;
4956
1c79356b
A
4957 /*
4958 * If another thread is wiring/unwiring this entry then
4959 * block after informing other thread to wake us up.
4960 */
4961 if (entry->in_transition) {
9bccf70c
A
4962 wait_result_t wait_result;
4963
1c79356b
A
4964 /*
4965 * We have not clipped the entry. Make sure that
4966 * the start address is in range so that the lookup
4967 * below will succeed.
2d21ac55
A
4968 * "s" is the current starting point: we've already
4969 * wired from "start" to "s" and we still have
4970 * to wire from "s" to "end".
1c79356b 4971 */
1c79356b
A
4972
4973 entry->needs_wakeup = TRUE;
4974
4975 /*
4976 * wake up anybody waiting on entries that we have
4977 * already wired.
4978 */
4979 if (need_wakeup) {
4980 vm_map_entry_wakeup(map);
4981 need_wakeup = FALSE;
4982 }
4983 /*
4984 * User wiring is interruptible
4985 */
9bccf70c 4986 wait_result = vm_map_entry_wait(map,
2d21ac55
A
4987 (user_wire) ? THREAD_ABORTSAFE :
4988 THREAD_UNINT);
9bccf70c 4989 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
4990 /*
4991 * undo the wirings we have done so far
4992 * We do not clear the needs_wakeup flag,
4993 * because we cannot tell if we were the
4994 * only one waiting.
4995 */
2d21ac55
A
4996 rc = KERN_FAILURE;
4997 goto done;
1c79356b
A
4998 }
4999
1c79356b
A
5000 /*
5001 * Cannot avoid a lookup here. reset timestamp.
5002 */
5003 last_timestamp = map->timestamp;
5004
5005 /*
5006 * The entry could have been clipped, look it up again.
5007 * Worse that can happen is, it may not exist anymore.
5008 */
5009 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
5010 /*
5011 * User: undo everything upto the previous
5012 * entry. let vm_map_unwire worry about
5013 * checking the validity of the range.
5014 */
2d21ac55
A
5015 rc = KERN_FAILURE;
5016 goto done;
1c79356b
A
5017 }
5018 entry = first_entry;
5019 continue;
5020 }
2d21ac55
A
5021
5022 if (entry->is_sub_map) {
91447636
A
5023 vm_map_offset_t sub_start;
5024 vm_map_offset_t sub_end;
5025 vm_map_offset_t local_start;
5026 vm_map_offset_t local_end;
1c79356b 5027 pmap_t pmap;
2d21ac55 5028
fe8ab488
A
5029 if (wire_and_extract) {
5030 /*
5031 * Wiring would result in copy-on-write
5032 * which would not be compatible with
5033 * the sharing we have with the original
5034 * provider of this memory.
5035 */
5036 rc = KERN_INVALID_ARGUMENT;
5037 goto done;
5038 }
5039
2d21ac55 5040 vm_map_clip_start(map, entry, s);
1c79356b
A
5041 vm_map_clip_end(map, entry, end);
5042
3e170ce0 5043 sub_start = VME_OFFSET(entry);
2d21ac55 5044 sub_end = entry->vme_end;
3e170ce0 5045 sub_end += VME_OFFSET(entry) - entry->vme_start;
2d21ac55 5046
1c79356b
A
5047 local_end = entry->vme_end;
5048 if(map_pmap == NULL) {
2d21ac55
A
5049 vm_object_t object;
5050 vm_object_offset_t offset;
5051 vm_prot_t prot;
5052 boolean_t wired;
5053 vm_map_entry_t local_entry;
5054 vm_map_version_t version;
5055 vm_map_t lookup_map;
5056
1c79356b 5057 if(entry->use_pmap) {
3e170ce0 5058 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
5059 /* ppc implementation requires that */
5060 /* submaps pmap address ranges line */
5061 /* up with parent map */
5062#ifdef notdef
5063 pmap_addr = sub_start;
5064#endif
2d21ac55 5065 pmap_addr = s;
1c79356b
A
5066 } else {
5067 pmap = map->pmap;
2d21ac55 5068 pmap_addr = s;
1c79356b 5069 }
2d21ac55 5070
1c79356b 5071 if (entry->wired_count) {
2d21ac55
A
5072 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5073 goto done;
5074
5075 /*
5076 * The map was not unlocked:
5077 * no need to goto re-lookup.
5078 * Just go directly to next entry.
5079 */
1c79356b 5080 entry = entry->vme_next;
2d21ac55 5081 s = entry->vme_start;
1c79356b
A
5082 continue;
5083
2d21ac55 5084 }
9bccf70c 5085
2d21ac55
A
5086 /* call vm_map_lookup_locked to */
5087 /* cause any needs copy to be */
5088 /* evaluated */
5089 local_start = entry->vme_start;
5090 lookup_map = map;
5091 vm_map_lock_write_to_read(map);
5092 if(vm_map_lookup_locked(
5093 &lookup_map, local_start,
39037602 5094 access_type | VM_PROT_COPY,
2d21ac55
A
5095 OBJECT_LOCK_EXCLUSIVE,
5096 &version, &object,
5097 &offset, &prot, &wired,
5098 NULL,
5099 &real_map)) {
1c79356b 5100
2d21ac55 5101 vm_map_unlock_read(lookup_map);
4bd07ac2 5102 assert(map_pmap == NULL);
2d21ac55
A
5103 vm_map_unwire(map, start,
5104 s, user_wire);
5105 return(KERN_FAILURE);
5106 }
316670eb 5107 vm_object_unlock(object);
2d21ac55
A
5108 if(real_map != lookup_map)
5109 vm_map_unlock(real_map);
5110 vm_map_unlock_read(lookup_map);
5111 vm_map_lock(map);
1c79356b 5112
2d21ac55
A
5113 /* we unlocked, so must re-lookup */
5114 if (!vm_map_lookup_entry(map,
5115 local_start,
5116 &local_entry)) {
5117 rc = KERN_FAILURE;
5118 goto done;
5119 }
5120
5121 /*
5122 * entry could have been "simplified",
5123 * so re-clip
5124 */
5125 entry = local_entry;
5126 assert(s == local_start);
5127 vm_map_clip_start(map, entry, s);
5128 vm_map_clip_end(map, entry, end);
5129 /* re-compute "e" */
5130 e = entry->vme_end;
5131 if (e > end)
5132 e = end;
5133
5134 /* did we have a change of type? */
5135 if (!entry->is_sub_map) {
5136 last_timestamp = map->timestamp;
5137 continue;
1c79356b
A
5138 }
5139 } else {
9bccf70c 5140 local_start = entry->vme_start;
2d21ac55
A
5141 pmap = map_pmap;
5142 }
5143
5144 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5145 goto done;
5146
5147 entry->in_transition = TRUE;
5148
5149 vm_map_unlock(map);
3e170ce0 5150 rc = vm_map_wire_nested(VME_SUBMAP(entry),
1c79356b 5151 sub_start, sub_end,
3e170ce0 5152 caller_prot,
fe8ab488
A
5153 user_wire, pmap, pmap_addr,
5154 NULL);
2d21ac55 5155 vm_map_lock(map);
9bccf70c 5156
1c79356b
A
5157 /*
5158 * Find the entry again. It could have been clipped
5159 * after we unlocked the map.
5160 */
9bccf70c
A
5161 if (!vm_map_lookup_entry(map, local_start,
5162 &first_entry))
5163 panic("vm_map_wire: re-lookup failed");
5164 entry = first_entry;
1c79356b 5165
2d21ac55
A
5166 assert(local_start == s);
5167 /* re-compute "e" */
5168 e = entry->vme_end;
5169 if (e > end)
5170 e = end;
5171
1c79356b
A
5172 last_timestamp = map->timestamp;
5173 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 5174 (entry->vme_start < e)) {
1c79356b
A
5175 assert(entry->in_transition);
5176 entry->in_transition = FALSE;
5177 if (entry->needs_wakeup) {
5178 entry->needs_wakeup = FALSE;
5179 need_wakeup = TRUE;
5180 }
5181 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 5182 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5183 }
5184 entry = entry->vme_next;
5185 }
5186 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5187 goto done;
1c79356b 5188 }
2d21ac55
A
5189
5190 /* no need to relookup again */
5191 s = entry->vme_start;
1c79356b
A
5192 continue;
5193 }
5194
5195 /*
5196 * If this entry is already wired then increment
5197 * the appropriate wire reference count.
5198 */
9bccf70c 5199 if (entry->wired_count) {
fe8ab488
A
5200
5201 if ((entry->protection & access_type) != access_type) {
5202 /* found a protection problem */
5203
5204 /*
5205 * XXX FBDP
5206 * We should always return an error
5207 * in this case but since we didn't
5208 * enforce it before, let's do
5209 * it only for the new "wire_and_extract"
5210 * code path for now...
5211 */
5212 if (wire_and_extract) {
5213 rc = KERN_PROTECTION_FAILURE;
5214 goto done;
5215 }
5216 }
5217
1c79356b
A
5218 /*
5219 * entry is already wired down, get our reference
5220 * after clipping to our range.
5221 */
2d21ac55 5222 vm_map_clip_start(map, entry, s);
1c79356b 5223 vm_map_clip_end(map, entry, end);
1c79356b 5224
2d21ac55
A
5225 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5226 goto done;
5227
fe8ab488
A
5228 if (wire_and_extract) {
5229 vm_object_t object;
5230 vm_object_offset_t offset;
5231 vm_page_t m;
5232
5233 /*
5234 * We don't have to "wire" the page again
5235 * bit we still have to "extract" its
5236 * physical page number, after some sanity
5237 * checks.
5238 */
5239 assert((entry->vme_end - entry->vme_start)
5240 == PAGE_SIZE);
5241 assert(!entry->needs_copy);
5242 assert(!entry->is_sub_map);
3e170ce0 5243 assert(VME_OBJECT(entry));
fe8ab488
A
5244 if (((entry->vme_end - entry->vme_start)
5245 != PAGE_SIZE) ||
5246 entry->needs_copy ||
5247 entry->is_sub_map ||
3e170ce0 5248 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5249 rc = KERN_INVALID_ARGUMENT;
5250 goto done;
5251 }
5252
3e170ce0
A
5253 object = VME_OBJECT(entry);
5254 offset = VME_OFFSET(entry);
fe8ab488
A
5255 /* need exclusive lock to update m->dirty */
5256 if (entry->protection & VM_PROT_WRITE) {
5257 vm_object_lock(object);
5258 } else {
5259 vm_object_lock_shared(object);
5260 }
5261 m = vm_page_lookup(object, offset);
5262 assert(m != VM_PAGE_NULL);
39037602
A
5263 assert(VM_PAGE_WIRED(m));
5264 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
5265 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
5266 if (entry->protection & VM_PROT_WRITE) {
5267 vm_object_lock_assert_exclusive(
39037602 5268 object);
fe8ab488
A
5269 m->dirty = TRUE;
5270 }
5271 } else {
5272 /* not already wired !? */
5273 *physpage_p = 0;
5274 }
5275 vm_object_unlock(object);
5276 }
5277
2d21ac55 5278 /* map was not unlocked: no need to relookup */
1c79356b 5279 entry = entry->vme_next;
2d21ac55 5280 s = entry->vme_start;
1c79356b
A
5281 continue;
5282 }
5283
5284 /*
5285 * Unwired entry or wire request transmitted via submap
5286 */
5287
5288
39037602 5289
1c79356b
A
5290 /*
5291 * Perform actions of vm_map_lookup that need the write
5292 * lock on the map: create a shadow object for a
5293 * copy-on-write region, or an object for a zero-fill
5294 * region.
5295 */
5296 size = entry->vme_end - entry->vme_start;
5297 /*
5298 * If wiring a copy-on-write page, we need to copy it now
5299 * even if we're only (currently) requesting read access.
5300 * This is aggressive, but once it's wired we can't move it.
5301 */
5302 if (entry->needs_copy) {
fe8ab488
A
5303 if (wire_and_extract) {
5304 /*
5305 * We're supposed to share with the original
5306 * provider so should not be "needs_copy"
5307 */
5308 rc = KERN_INVALID_ARGUMENT;
5309 goto done;
5310 }
3e170ce0
A
5311
5312 VME_OBJECT_SHADOW(entry, size);
1c79356b 5313 entry->needs_copy = FALSE;
3e170ce0 5314 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5315 if (wire_and_extract) {
5316 /*
5317 * We're supposed to share with the original
5318 * provider so should already have an object.
5319 */
5320 rc = KERN_INVALID_ARGUMENT;
5321 goto done;
5322 }
3e170ce0
A
5323 VME_OBJECT_SET(entry, vm_object_allocate(size));
5324 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 5325 assert(entry->use_pmap);
1c79356b
A
5326 }
5327
2d21ac55 5328 vm_map_clip_start(map, entry, s);
1c79356b
A
5329 vm_map_clip_end(map, entry, end);
5330
2d21ac55 5331 /* re-compute "e" */
1c79356b 5332 e = entry->vme_end;
2d21ac55
A
5333 if (e > end)
5334 e = end;
1c79356b
A
5335
5336 /*
5337 * Check for holes and protection mismatch.
5338 * Holes: Next entry should be contiguous unless this
5339 * is the end of the region.
5340 * Protection: Access requested must be allowed, unless
5341 * wiring is by protection class
5342 */
2d21ac55
A
5343 if ((entry->vme_end < end) &&
5344 ((entry->vme_next == vm_map_to_entry(map)) ||
5345 (entry->vme_next->vme_start > entry->vme_end))) {
5346 /* found a hole */
5347 rc = KERN_INVALID_ADDRESS;
5348 goto done;
5349 }
5350 if ((entry->protection & access_type) != access_type) {
5351 /* found a protection problem */
5352 rc = KERN_PROTECTION_FAILURE;
5353 goto done;
1c79356b
A
5354 }
5355
5356 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5357
2d21ac55
A
5358 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5359 goto done;
1c79356b
A
5360
5361 entry->in_transition = TRUE;
5362
5363 /*
5364 * This entry might get split once we unlock the map.
5365 * In vm_fault_wire(), we need the current range as
5366 * defined by this entry. In order for this to work
5367 * along with a simultaneous clip operation, we make a
5368 * temporary copy of this entry and use that for the
5369 * wiring. Note that the underlying objects do not
5370 * change during a clip.
5371 */
5372 tmp_entry = *entry;
5373
5374 /*
5375 * The in_transition state guarentees that the entry
5376 * (or entries for this range, if split occured) will be
5377 * there when the map lock is acquired for the second time.
5378 */
5379 vm_map_unlock(map);
0b4e3aa0 5380
9bccf70c
A
5381 if (!user_wire && cur_thread != THREAD_NULL)
5382 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
5383 else
5384 interruptible_state = THREAD_UNINT;
9bccf70c 5385
1c79356b 5386 if(map_pmap)
9bccf70c 5387 rc = vm_fault_wire(map,
3e170ce0 5388 &tmp_entry, caller_prot, map_pmap, pmap_addr,
fe8ab488 5389 physpage_p);
1c79356b 5390 else
9bccf70c 5391 rc = vm_fault_wire(map,
3e170ce0 5392 &tmp_entry, caller_prot, map->pmap,
fe8ab488
A
5393 tmp_entry.vme_start,
5394 physpage_p);
0b4e3aa0
A
5395
5396 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 5397 thread_interrupt_level(interruptible_state);
0b4e3aa0 5398
1c79356b
A
5399 vm_map_lock(map);
5400
5401 if (last_timestamp+1 != map->timestamp) {
5402 /*
5403 * Find the entry again. It could have been clipped
5404 * after we unlocked the map.
5405 */
5406 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5407 &first_entry))
1c79356b
A
5408 panic("vm_map_wire: re-lookup failed");
5409
5410 entry = first_entry;
5411 }
5412
5413 last_timestamp = map->timestamp;
5414
5415 while ((entry != vm_map_to_entry(map)) &&
5416 (entry->vme_start < tmp_entry.vme_end)) {
5417 assert(entry->in_transition);
5418 entry->in_transition = FALSE;
5419 if (entry->needs_wakeup) {
5420 entry->needs_wakeup = FALSE;
5421 need_wakeup = TRUE;
5422 }
5423 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5424 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5425 }
5426 entry = entry->vme_next;
5427 }
5428
5429 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5430 goto done;
1c79356b 5431 }
2d21ac55
A
5432
5433 s = entry->vme_start;
1c79356b 5434 } /* end while loop through map entries */
2d21ac55
A
5435
5436done:
5437 if (rc == KERN_SUCCESS) {
5438 /* repair any damage we may have made to the VM map */
5439 vm_map_simplify_range(map, start, end);
5440 }
5441
1c79356b
A
5442 vm_map_unlock(map);
5443
5444 /*
5445 * wake up anybody waiting on entries we wired.
5446 */
5447 if (need_wakeup)
5448 vm_map_entry_wakeup(map);
5449
2d21ac55
A
5450 if (rc != KERN_SUCCESS) {
5451 /* undo what has been wired so far */
4bd07ac2
A
5452 vm_map_unwire_nested(map, start, s, user_wire,
5453 map_pmap, pmap_addr);
fe8ab488
A
5454 if (physpage_p) {
5455 *physpage_p = 0;
5456 }
2d21ac55
A
5457 }
5458
5459 return rc;
1c79356b
A
5460
5461}
5462
5463kern_return_t
3e170ce0 5464vm_map_wire_external(
39037602
A
5465 vm_map_t map,
5466 vm_map_offset_t start,
5467 vm_map_offset_t end,
5468 vm_prot_t caller_prot,
1c79356b
A
5469 boolean_t user_wire)
5470{
3e170ce0
A
5471 kern_return_t kret;
5472
5473 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5474 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5475 kret = vm_map_wire_nested(map, start, end, caller_prot,
5476 user_wire, (pmap_t)NULL, 0, NULL);
5477 return kret;
5478}
1c79356b 5479
3e170ce0
A
5480kern_return_t
5481vm_map_wire(
39037602
A
5482 vm_map_t map,
5483 vm_map_offset_t start,
5484 vm_map_offset_t end,
5485 vm_prot_t caller_prot,
3e170ce0
A
5486 boolean_t user_wire)
5487{
1c79356b
A
5488 kern_return_t kret;
5489
3e170ce0 5490 kret = vm_map_wire_nested(map, start, end, caller_prot,
fe8ab488
A
5491 user_wire, (pmap_t)NULL, 0, NULL);
5492 return kret;
5493}
5494
5495kern_return_t
3e170ce0 5496vm_map_wire_and_extract_external(
fe8ab488
A
5497 vm_map_t map,
5498 vm_map_offset_t start,
3e170ce0 5499 vm_prot_t caller_prot,
fe8ab488
A
5500 boolean_t user_wire,
5501 ppnum_t *physpage_p)
5502{
3e170ce0
A
5503 kern_return_t kret;
5504
5505 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5506 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5507 kret = vm_map_wire_nested(map,
5508 start,
5509 start+VM_MAP_PAGE_SIZE(map),
5510 caller_prot,
5511 user_wire,
5512 (pmap_t)NULL,
5513 0,
5514 physpage_p);
5515 if (kret != KERN_SUCCESS &&
5516 physpage_p != NULL) {
5517 *physpage_p = 0;
5518 }
5519 return kret;
5520}
fe8ab488 5521
3e170ce0
A
5522kern_return_t
5523vm_map_wire_and_extract(
5524 vm_map_t map,
5525 vm_map_offset_t start,
5526 vm_prot_t caller_prot,
5527 boolean_t user_wire,
5528 ppnum_t *physpage_p)
5529{
fe8ab488
A
5530 kern_return_t kret;
5531
5532 kret = vm_map_wire_nested(map,
5533 start,
5534 start+VM_MAP_PAGE_SIZE(map),
3e170ce0 5535 caller_prot,
fe8ab488
A
5536 user_wire,
5537 (pmap_t)NULL,
5538 0,
5539 physpage_p);
5540 if (kret != KERN_SUCCESS &&
5541 physpage_p != NULL) {
5542 *physpage_p = 0;
5543 }
1c79356b
A
5544 return kret;
5545}
5546
5547/*
5548 * vm_map_unwire:
5549 *
5550 * Sets the pageability of the specified address range in the target
5551 * as pageable. Regions specified must have been wired previously.
5552 *
5553 * The map must not be locked, but a reference must remain to the map
5554 * throughout the call.
5555 *
5556 * Kernel will panic on failures. User unwire ignores holes and
5557 * unwired and intransition entries to avoid losing memory by leaving
5558 * it unwired.
5559 */
91447636 5560static kern_return_t
1c79356b 5561vm_map_unwire_nested(
39037602
A
5562 vm_map_t map,
5563 vm_map_offset_t start,
5564 vm_map_offset_t end,
1c79356b 5565 boolean_t user_wire,
9bccf70c 5566 pmap_t map_pmap,
91447636 5567 vm_map_offset_t pmap_addr)
1c79356b 5568{
39037602 5569 vm_map_entry_t entry;
1c79356b
A
5570 struct vm_map_entry *first_entry, tmp_entry;
5571 boolean_t need_wakeup;
5572 boolean_t main_map = FALSE;
5573 unsigned int last_timestamp;
5574
5575 vm_map_lock(map);
5576 if(map_pmap == NULL)
5577 main_map = TRUE;
5578 last_timestamp = map->timestamp;
5579
5580 VM_MAP_RANGE_CHECK(map, start, end);
5581 assert(page_aligned(start));
5582 assert(page_aligned(end));
39236c6e
A
5583 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5584 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 5585
2d21ac55
A
5586 if (start == end) {
5587 /* We unwired what the caller asked for: zero pages */
5588 vm_map_unlock(map);
5589 return KERN_SUCCESS;
5590 }
5591
1c79356b
A
5592 if (vm_map_lookup_entry(map, start, &first_entry)) {
5593 entry = first_entry;
2d21ac55
A
5594 /*
5595 * vm_map_clip_start will be done later.
5596 * We don't want to unnest any nested sub maps here !
5597 */
1c79356b
A
5598 }
5599 else {
2d21ac55
A
5600 if (!user_wire) {
5601 panic("vm_map_unwire: start not found");
5602 }
1c79356b
A
5603 /* Start address is not in map. */
5604 vm_map_unlock(map);
5605 return(KERN_INVALID_ADDRESS);
5606 }
5607
b0d623f7
A
5608 if (entry->superpage_size) {
5609 /* superpages are always wired */
5610 vm_map_unlock(map);
5611 return KERN_INVALID_ADDRESS;
5612 }
5613
1c79356b
A
5614 need_wakeup = FALSE;
5615 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5616 if (entry->in_transition) {
5617 /*
5618 * 1)
5619 * Another thread is wiring down this entry. Note
5620 * that if it is not for the other thread we would
5621 * be unwiring an unwired entry. This is not
5622 * permitted. If we wait, we will be unwiring memory
5623 * we did not wire.
5624 *
5625 * 2)
5626 * Another thread is unwiring this entry. We did not
5627 * have a reference to it, because if we did, this
5628 * entry will not be getting unwired now.
5629 */
2d21ac55
A
5630 if (!user_wire) {
5631 /*
5632 * XXX FBDP
5633 * This could happen: there could be some
5634 * overlapping vslock/vsunlock operations
5635 * going on.
5636 * We should probably just wait and retry,
5637 * but then we have to be careful that this
5638 * entry could get "simplified" after
5639 * "in_transition" gets unset and before
5640 * we re-lookup the entry, so we would
5641 * have to re-clip the entry to avoid
5642 * re-unwiring what we have already unwired...
5643 * See vm_map_wire_nested().
5644 *
5645 * Or we could just ignore "in_transition"
5646 * here and proceed to decement the wired
5647 * count(s) on this entry. That should be fine
5648 * as long as "wired_count" doesn't drop all
5649 * the way to 0 (and we should panic if THAT
5650 * happens).
5651 */
1c79356b 5652 panic("vm_map_unwire: in_transition entry");
2d21ac55 5653 }
1c79356b
A
5654
5655 entry = entry->vme_next;
5656 continue;
5657 }
5658
2d21ac55 5659 if (entry->is_sub_map) {
91447636
A
5660 vm_map_offset_t sub_start;
5661 vm_map_offset_t sub_end;
5662 vm_map_offset_t local_end;
1c79356b 5663 pmap_t pmap;
2d21ac55 5664
1c79356b
A
5665 vm_map_clip_start(map, entry, start);
5666 vm_map_clip_end(map, entry, end);
5667
3e170ce0 5668 sub_start = VME_OFFSET(entry);
1c79356b 5669 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 5670 sub_end += VME_OFFSET(entry);
1c79356b
A
5671 local_end = entry->vme_end;
5672 if(map_pmap == NULL) {
2d21ac55 5673 if(entry->use_pmap) {
3e170ce0 5674 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 5675 pmap_addr = sub_start;
2d21ac55 5676 } else {
1c79356b 5677 pmap = map->pmap;
9bccf70c 5678 pmap_addr = start;
2d21ac55
A
5679 }
5680 if (entry->wired_count == 0 ||
5681 (user_wire && entry->user_wired_count == 0)) {
5682 if (!user_wire)
5683 panic("vm_map_unwire: entry is unwired");
5684 entry = entry->vme_next;
5685 continue;
5686 }
5687
5688 /*
5689 * Check for holes
5690 * Holes: Next entry should be contiguous unless
5691 * this is the end of the region.
5692 */
5693 if (((entry->vme_end < end) &&
5694 ((entry->vme_next == vm_map_to_entry(map)) ||
5695 (entry->vme_next->vme_start
5696 > entry->vme_end)))) {
5697 if (!user_wire)
5698 panic("vm_map_unwire: non-contiguous region");
1c79356b 5699/*
2d21ac55
A
5700 entry = entry->vme_next;
5701 continue;
1c79356b 5702*/
2d21ac55 5703 }
1c79356b 5704
2d21ac55 5705 subtract_wire_counts(map, entry, user_wire);
1c79356b 5706
2d21ac55
A
5707 if (entry->wired_count != 0) {
5708 entry = entry->vme_next;
5709 continue;
5710 }
1c79356b 5711
2d21ac55
A
5712 entry->in_transition = TRUE;
5713 tmp_entry = *entry;/* see comment in vm_map_wire() */
5714
5715 /*
5716 * We can unlock the map now. The in_transition state
5717 * guarantees existance of the entry.
5718 */
5719 vm_map_unlock(map);
3e170ce0 5720 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5721 sub_start, sub_end, user_wire, pmap, pmap_addr);
5722 vm_map_lock(map);
1c79356b 5723
2d21ac55
A
5724 if (last_timestamp+1 != map->timestamp) {
5725 /*
5726 * Find the entry again. It could have been
5727 * clipped or deleted after we unlocked the map.
5728 */
5729 if (!vm_map_lookup_entry(map,
5730 tmp_entry.vme_start,
5731 &first_entry)) {
5732 if (!user_wire)
5733 panic("vm_map_unwire: re-lookup failed");
5734 entry = first_entry->vme_next;
5735 } else
5736 entry = first_entry;
5737 }
5738 last_timestamp = map->timestamp;
1c79356b 5739
1c79356b 5740 /*
2d21ac55
A
5741 * clear transition bit for all constituent entries
5742 * that were in the original entry (saved in
5743 * tmp_entry). Also check for waiters.
5744 */
5745 while ((entry != vm_map_to_entry(map)) &&
5746 (entry->vme_start < tmp_entry.vme_end)) {
5747 assert(entry->in_transition);
5748 entry->in_transition = FALSE;
5749 if (entry->needs_wakeup) {
5750 entry->needs_wakeup = FALSE;
5751 need_wakeup = TRUE;
5752 }
5753 entry = entry->vme_next;
1c79356b 5754 }
2d21ac55 5755 continue;
1c79356b 5756 } else {
2d21ac55 5757 vm_map_unlock(map);
3e170ce0 5758 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5759 sub_start, sub_end, user_wire, map_pmap,
5760 pmap_addr);
5761 vm_map_lock(map);
1c79356b 5762
2d21ac55
A
5763 if (last_timestamp+1 != map->timestamp) {
5764 /*
5765 * Find the entry again. It could have been
5766 * clipped or deleted after we unlocked the map.
5767 */
5768 if (!vm_map_lookup_entry(map,
5769 tmp_entry.vme_start,
5770 &first_entry)) {
5771 if (!user_wire)
5772 panic("vm_map_unwire: re-lookup failed");
5773 entry = first_entry->vme_next;
5774 } else
5775 entry = first_entry;
5776 }
5777 last_timestamp = map->timestamp;
1c79356b
A
5778 }
5779 }
5780
5781
9bccf70c 5782 if ((entry->wired_count == 0) ||
2d21ac55 5783 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
5784 if (!user_wire)
5785 panic("vm_map_unwire: entry is unwired");
5786
5787 entry = entry->vme_next;
5788 continue;
5789 }
2d21ac55 5790
1c79356b 5791 assert(entry->wired_count > 0 &&
2d21ac55 5792 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
5793
5794 vm_map_clip_start(map, entry, start);
5795 vm_map_clip_end(map, entry, end);
5796
5797 /*
5798 * Check for holes
5799 * Holes: Next entry should be contiguous unless
5800 * this is the end of the region.
5801 */
5802 if (((entry->vme_end < end) &&
2d21ac55
A
5803 ((entry->vme_next == vm_map_to_entry(map)) ||
5804 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
5805
5806 if (!user_wire)
5807 panic("vm_map_unwire: non-contiguous region");
5808 entry = entry->vme_next;
5809 continue;
5810 }
5811
2d21ac55 5812 subtract_wire_counts(map, entry, user_wire);
1c79356b 5813
9bccf70c 5814 if (entry->wired_count != 0) {
1c79356b
A
5815 entry = entry->vme_next;
5816 continue;
1c79356b
A
5817 }
5818
b0d623f7
A
5819 if(entry->zero_wired_pages) {
5820 entry->zero_wired_pages = FALSE;
5821 }
5822
1c79356b
A
5823 entry->in_transition = TRUE;
5824 tmp_entry = *entry; /* see comment in vm_map_wire() */
5825
5826 /*
5827 * We can unlock the map now. The in_transition state
5828 * guarantees existance of the entry.
5829 */
5830 vm_map_unlock(map);
5831 if(map_pmap) {
9bccf70c 5832 vm_fault_unwire(map,
2d21ac55 5833 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 5834 } else {
9bccf70c 5835 vm_fault_unwire(map,
2d21ac55
A
5836 &tmp_entry, FALSE, map->pmap,
5837 tmp_entry.vme_start);
1c79356b
A
5838 }
5839 vm_map_lock(map);
5840
5841 if (last_timestamp+1 != map->timestamp) {
5842 /*
5843 * Find the entry again. It could have been clipped
5844 * or deleted after we unlocked the map.
5845 */
5846 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5847 &first_entry)) {
1c79356b 5848 if (!user_wire)
2d21ac55 5849 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
5850 entry = first_entry->vme_next;
5851 } else
5852 entry = first_entry;
5853 }
5854 last_timestamp = map->timestamp;
5855
5856 /*
5857 * clear transition bit for all constituent entries that
5858 * were in the original entry (saved in tmp_entry). Also
5859 * check for waiters.
5860 */
5861 while ((entry != vm_map_to_entry(map)) &&
5862 (entry->vme_start < tmp_entry.vme_end)) {
5863 assert(entry->in_transition);
5864 entry->in_transition = FALSE;
5865 if (entry->needs_wakeup) {
5866 entry->needs_wakeup = FALSE;
5867 need_wakeup = TRUE;
5868 }
5869 entry = entry->vme_next;
5870 }
5871 }
91447636
A
5872
5873 /*
5874 * We might have fragmented the address space when we wired this
5875 * range of addresses. Attempt to re-coalesce these VM map entries
5876 * with their neighbors now that they're no longer wired.
5877 * Under some circumstances, address space fragmentation can
5878 * prevent VM object shadow chain collapsing, which can cause
5879 * swap space leaks.
5880 */
5881 vm_map_simplify_range(map, start, end);
5882
1c79356b
A
5883 vm_map_unlock(map);
5884 /*
5885 * wake up anybody waiting on entries that we have unwired.
5886 */
5887 if (need_wakeup)
5888 vm_map_entry_wakeup(map);
5889 return(KERN_SUCCESS);
5890
5891}
5892
5893kern_return_t
5894vm_map_unwire(
39037602
A
5895 vm_map_t map,
5896 vm_map_offset_t start,
5897 vm_map_offset_t end,
1c79356b
A
5898 boolean_t user_wire)
5899{
9bccf70c 5900 return vm_map_unwire_nested(map, start, end,
2d21ac55 5901 user_wire, (pmap_t)NULL, 0);
1c79356b
A
5902}
5903
5904
5905/*
5906 * vm_map_entry_delete: [ internal use only ]
5907 *
5908 * Deallocate the given entry from the target map.
5909 */
91447636 5910static void
1c79356b 5911vm_map_entry_delete(
39037602
A
5912 vm_map_t map,
5913 vm_map_entry_t entry)
1c79356b 5914{
39037602
A
5915 vm_map_offset_t s, e;
5916 vm_object_t object;
5917 vm_map_t submap;
1c79356b
A
5918
5919 s = entry->vme_start;
5920 e = entry->vme_end;
5921 assert(page_aligned(s));
5922 assert(page_aligned(e));
39236c6e
A
5923 if (entry->map_aligned == TRUE) {
5924 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5925 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5926 }
1c79356b
A
5927 assert(entry->wired_count == 0);
5928 assert(entry->user_wired_count == 0);
b0d623f7 5929 assert(!entry->permanent);
1c79356b
A
5930
5931 if (entry->is_sub_map) {
5932 object = NULL;
3e170ce0 5933 submap = VME_SUBMAP(entry);
1c79356b
A
5934 } else {
5935 submap = NULL;
3e170ce0 5936 object = VME_OBJECT(entry);
1c79356b
A
5937 }
5938
6d2010ae 5939 vm_map_store_entry_unlink(map, entry);
1c79356b
A
5940 map->size -= e - s;
5941
5942 vm_map_entry_dispose(map, entry);
5943
5944 vm_map_unlock(map);
5945 /*
5946 * Deallocate the object only after removing all
5947 * pmap entries pointing to its pages.
5948 */
5949 if (submap)
5950 vm_map_deallocate(submap);
5951 else
2d21ac55 5952 vm_object_deallocate(object);
1c79356b
A
5953
5954}
5955
5956void
5957vm_map_submap_pmap_clean(
5958 vm_map_t map,
91447636
A
5959 vm_map_offset_t start,
5960 vm_map_offset_t end,
1c79356b 5961 vm_map_t sub_map,
91447636 5962 vm_map_offset_t offset)
1c79356b 5963{
91447636
A
5964 vm_map_offset_t submap_start;
5965 vm_map_offset_t submap_end;
5966 vm_map_size_t remove_size;
1c79356b
A
5967 vm_map_entry_t entry;
5968
5969 submap_end = offset + (end - start);
5970 submap_start = offset;
b7266188
A
5971
5972 vm_map_lock_read(sub_map);
1c79356b 5973 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 5974
1c79356b
A
5975 remove_size = (entry->vme_end - entry->vme_start);
5976 if(offset > entry->vme_start)
5977 remove_size -= offset - entry->vme_start;
2d21ac55 5978
1c79356b
A
5979
5980 if(submap_end < entry->vme_end) {
5981 remove_size -=
5982 entry->vme_end - submap_end;
5983 }
5984 if(entry->is_sub_map) {
5985 vm_map_submap_pmap_clean(
5986 sub_map,
5987 start,
5988 start + remove_size,
3e170ce0
A
5989 VME_SUBMAP(entry),
5990 VME_OFFSET(entry));
1c79356b 5991 } else {
9bccf70c 5992
316670eb 5993 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
5994 && (VME_OBJECT(entry) != NULL)) {
5995 vm_object_pmap_protect_options(
5996 VME_OBJECT(entry),
5997 (VME_OFFSET(entry) +
5998 offset -
5999 entry->vme_start),
9bccf70c
A
6000 remove_size,
6001 PMAP_NULL,
6002 entry->vme_start,
3e170ce0
A
6003 VM_PROT_NONE,
6004 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6005 } else {
6006 pmap_remove(map->pmap,
2d21ac55
A
6007 (addr64_t)start,
6008 (addr64_t)(start + remove_size));
9bccf70c 6009 }
1c79356b
A
6010 }
6011 }
6012
6013 entry = entry->vme_next;
2d21ac55 6014
1c79356b 6015 while((entry != vm_map_to_entry(sub_map))
2d21ac55 6016 && (entry->vme_start < submap_end)) {
1c79356b
A
6017 remove_size = (entry->vme_end - entry->vme_start);
6018 if(submap_end < entry->vme_end) {
6019 remove_size -= entry->vme_end - submap_end;
6020 }
6021 if(entry->is_sub_map) {
6022 vm_map_submap_pmap_clean(
6023 sub_map,
6024 (start + entry->vme_start) - offset,
6025 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
6026 VME_SUBMAP(entry),
6027 VME_OFFSET(entry));
1c79356b 6028 } else {
316670eb 6029 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
6030 && (VME_OBJECT(entry) != NULL)) {
6031 vm_object_pmap_protect_options(
6032 VME_OBJECT(entry),
6033 VME_OFFSET(entry),
9bccf70c
A
6034 remove_size,
6035 PMAP_NULL,
6036 entry->vme_start,
3e170ce0
A
6037 VM_PROT_NONE,
6038 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6039 } else {
6040 pmap_remove(map->pmap,
2d21ac55
A
6041 (addr64_t)((start + entry->vme_start)
6042 - offset),
6043 (addr64_t)(((start + entry->vme_start)
6044 - offset) + remove_size));
9bccf70c 6045 }
1c79356b
A
6046 }
6047 entry = entry->vme_next;
b7266188
A
6048 }
6049 vm_map_unlock_read(sub_map);
1c79356b
A
6050 return;
6051}
6052
6053/*
6054 * vm_map_delete: [ internal use only ]
6055 *
6056 * Deallocates the given address range from the target map.
6057 * Removes all user wirings. Unwires one kernel wiring if
6058 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
6059 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
6060 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
6061 *
6062 * This routine is called with map locked and leaves map locked.
6063 */
91447636 6064static kern_return_t
1c79356b 6065vm_map_delete(
91447636
A
6066 vm_map_t map,
6067 vm_map_offset_t start,
6068 vm_map_offset_t end,
6069 int flags,
6070 vm_map_t zap_map)
1c79356b
A
6071{
6072 vm_map_entry_t entry, next;
6073 struct vm_map_entry *first_entry, tmp_entry;
39037602
A
6074 vm_map_offset_t s;
6075 vm_object_t object;
1c79356b
A
6076 boolean_t need_wakeup;
6077 unsigned int last_timestamp = ~0; /* unlikely value */
6078 int interruptible;
1c79356b
A
6079
6080 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 6081 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
6082
6083 /*
6084 * All our DMA I/O operations in IOKit are currently done by
6085 * wiring through the map entries of the task requesting the I/O.
6086 * Because of this, we must always wait for kernel wirings
6087 * to go away on the entries before deleting them.
6088 *
6089 * Any caller who wants to actually remove a kernel wiring
6090 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6091 * properly remove one wiring instead of blasting through
6092 * them all.
6093 */
6094 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6095
b0d623f7
A
6096 while(1) {
6097 /*
6098 * Find the start of the region, and clip it
6099 */
6100 if (vm_map_lookup_entry(map, start, &first_entry)) {
6101 entry = first_entry;
fe8ab488
A
6102 if (map == kalloc_map &&
6103 (entry->vme_start != start ||
6104 entry->vme_end != end)) {
6105 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6106 "mismatched entry %p [0x%llx:0x%llx]\n",
6107 map,
6108 (uint64_t)start,
6109 (uint64_t)end,
6110 entry,
6111 (uint64_t)entry->vme_start,
6112 (uint64_t)entry->vme_end);
6113 }
b0d623f7
A
6114 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6115 start = SUPERPAGE_ROUND_DOWN(start);
6116 continue;
6117 }
6118 if (start == entry->vme_start) {
6119 /*
6120 * No need to clip. We don't want to cause
6121 * any unnecessary unnesting in this case...
6122 */
6123 } else {
fe8ab488
A
6124 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6125 entry->map_aligned &&
6126 !VM_MAP_PAGE_ALIGNED(
6127 start,
6128 VM_MAP_PAGE_MASK(map))) {
6129 /*
6130 * The entry will no longer be
6131 * map-aligned after clipping
6132 * and the caller said it's OK.
6133 */
6134 entry->map_aligned = FALSE;
6135 }
6136 if (map == kalloc_map) {
6137 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6138 " clipping %p at 0x%llx\n",
6139 map,
6140 (uint64_t)start,
6141 (uint64_t)end,
6142 entry,
6143 (uint64_t)start);
6144 }
b0d623f7
A
6145 vm_map_clip_start(map, entry, start);
6146 }
6147
2d21ac55 6148 /*
b0d623f7
A
6149 * Fix the lookup hint now, rather than each
6150 * time through the loop.
2d21ac55 6151 */
b0d623f7 6152 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 6153 } else {
fe8ab488
A
6154 if (map->pmap == kernel_pmap &&
6155 map->ref_count != 0) {
6156 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6157 "no map entry at 0x%llx\n",
6158 map,
6159 (uint64_t)start,
6160 (uint64_t)end,
6161 (uint64_t)start);
6162 }
b0d623f7 6163 entry = first_entry->vme_next;
2d21ac55 6164 }
b0d623f7 6165 break;
1c79356b 6166 }
b0d623f7
A
6167 if (entry->superpage_size)
6168 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
6169
6170 need_wakeup = FALSE;
6171 /*
6172 * Step through all entries in this region
6173 */
2d21ac55
A
6174 s = entry->vme_start;
6175 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6176 /*
6177 * At this point, we have deleted all the memory entries
6178 * between "start" and "s". We still need to delete
6179 * all memory entries between "s" and "end".
6180 * While we were blocked and the map was unlocked, some
6181 * new memory entries could have been re-allocated between
6182 * "start" and "s" and we don't want to mess with those.
6183 * Some of those entries could even have been re-assembled
6184 * with an entry after "s" (in vm_map_simplify_entry()), so
6185 * we may have to vm_map_clip_start() again.
6186 */
1c79356b 6187
2d21ac55
A
6188 if (entry->vme_start >= s) {
6189 /*
6190 * This entry starts on or after "s"
6191 * so no need to clip its start.
6192 */
6193 } else {
6194 /*
6195 * This entry has been re-assembled by a
6196 * vm_map_simplify_entry(). We need to
6197 * re-clip its start.
6198 */
fe8ab488
A
6199 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6200 entry->map_aligned &&
6201 !VM_MAP_PAGE_ALIGNED(s,
6202 VM_MAP_PAGE_MASK(map))) {
6203 /*
6204 * The entry will no longer be map-aligned
6205 * after clipping and the caller said it's OK.
6206 */
6207 entry->map_aligned = FALSE;
6208 }
6209 if (map == kalloc_map) {
6210 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6211 "clipping %p at 0x%llx\n",
6212 map,
6213 (uint64_t)start,
6214 (uint64_t)end,
6215 entry,
6216 (uint64_t)s);
6217 }
2d21ac55
A
6218 vm_map_clip_start(map, entry, s);
6219 }
6220 if (entry->vme_end <= end) {
6221 /*
6222 * This entry is going away completely, so no need
6223 * to clip and possibly cause an unnecessary unnesting.
6224 */
6225 } else {
fe8ab488
A
6226 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6227 entry->map_aligned &&
6228 !VM_MAP_PAGE_ALIGNED(end,
6229 VM_MAP_PAGE_MASK(map))) {
6230 /*
6231 * The entry will no longer be map-aligned
6232 * after clipping and the caller said it's OK.
6233 */
6234 entry->map_aligned = FALSE;
6235 }
6236 if (map == kalloc_map) {
6237 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6238 "clipping %p at 0x%llx\n",
6239 map,
6240 (uint64_t)start,
6241 (uint64_t)end,
6242 entry,
6243 (uint64_t)end);
6244 }
2d21ac55
A
6245 vm_map_clip_end(map, entry, end);
6246 }
b0d623f7
A
6247
6248 if (entry->permanent) {
6249 panic("attempt to remove permanent VM map entry "
6250 "%p [0x%llx:0x%llx]\n",
6251 entry, (uint64_t) s, (uint64_t) end);
6252 }
6253
6254
1c79356b 6255 if (entry->in_transition) {
9bccf70c
A
6256 wait_result_t wait_result;
6257
1c79356b
A
6258 /*
6259 * Another thread is wiring/unwiring this entry.
6260 * Let the other thread know we are waiting.
6261 */
2d21ac55 6262 assert(s == entry->vme_start);
1c79356b
A
6263 entry->needs_wakeup = TRUE;
6264
6265 /*
6266 * wake up anybody waiting on entries that we have
6267 * already unwired/deleted.
6268 */
6269 if (need_wakeup) {
6270 vm_map_entry_wakeup(map);
6271 need_wakeup = FALSE;
6272 }
6273
9bccf70c 6274 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
6275
6276 if (interruptible &&
9bccf70c 6277 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6278 /*
6279 * We do not clear the needs_wakeup flag,
6280 * since we cannot tell if we were the only one.
6281 */
6282 return KERN_ABORTED;
9bccf70c 6283 }
1c79356b
A
6284
6285 /*
6286 * The entry could have been clipped or it
6287 * may not exist anymore. Look it up again.
6288 */
6289 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6290 /*
6291 * User: use the next entry
6292 */
6293 entry = first_entry->vme_next;
2d21ac55 6294 s = entry->vme_start;
1c79356b
A
6295 } else {
6296 entry = first_entry;
0c530ab8 6297 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6298 }
9bccf70c 6299 last_timestamp = map->timestamp;
1c79356b
A
6300 continue;
6301 } /* end in_transition */
6302
6303 if (entry->wired_count) {
2d21ac55
A
6304 boolean_t user_wire;
6305
6306 user_wire = entry->user_wired_count > 0;
6307
1c79356b 6308 /*
b0d623f7 6309 * Remove a kernel wiring if requested
1c79356b 6310 */
b0d623f7 6311 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 6312 entry->wired_count--;
b0d623f7
A
6313 }
6314
6315 /*
6316 * Remove all user wirings for proper accounting
6317 */
6318 if (entry->user_wired_count > 0) {
6319 while (entry->user_wired_count)
6320 subtract_wire_counts(map, entry, user_wire);
6321 }
1c79356b
A
6322
6323 if (entry->wired_count != 0) {
2d21ac55 6324 assert(map != kernel_map);
1c79356b
A
6325 /*
6326 * Cannot continue. Typical case is when
6327 * a user thread has physical io pending on
6328 * on this page. Either wait for the
6329 * kernel wiring to go away or return an
6330 * error.
6331 */
6332 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 6333 wait_result_t wait_result;
1c79356b 6334
2d21ac55 6335 assert(s == entry->vme_start);
1c79356b 6336 entry->needs_wakeup = TRUE;
9bccf70c 6337 wait_result = vm_map_entry_wait(map,
2d21ac55 6338 interruptible);
1c79356b
A
6339
6340 if (interruptible &&
2d21ac55 6341 wait_result == THREAD_INTERRUPTED) {
1c79356b 6342 /*
2d21ac55 6343 * We do not clear the
1c79356b
A
6344 * needs_wakeup flag, since we
6345 * cannot tell if we were the
6346 * only one.
2d21ac55 6347 */
1c79356b 6348 return KERN_ABORTED;
9bccf70c 6349 }
1c79356b
A
6350
6351 /*
2d21ac55 6352 * The entry could have been clipped or
1c79356b
A
6353 * it may not exist anymore. Look it
6354 * up again.
2d21ac55 6355 */
1c79356b 6356 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
6357 &first_entry)) {
6358 assert(map != kernel_map);
1c79356b 6359 /*
2d21ac55
A
6360 * User: use the next entry
6361 */
1c79356b 6362 entry = first_entry->vme_next;
2d21ac55 6363 s = entry->vme_start;
1c79356b
A
6364 } else {
6365 entry = first_entry;
0c530ab8 6366 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6367 }
9bccf70c 6368 last_timestamp = map->timestamp;
1c79356b
A
6369 continue;
6370 }
6371 else {
6372 return KERN_FAILURE;
6373 }
6374 }
6375
6376 entry->in_transition = TRUE;
6377 /*
6378 * copy current entry. see comment in vm_map_wire()
6379 */
6380 tmp_entry = *entry;
2d21ac55 6381 assert(s == entry->vme_start);
1c79356b
A
6382
6383 /*
6384 * We can unlock the map now. The in_transition
6385 * state guarentees existance of the entry.
6386 */
6387 vm_map_unlock(map);
2d21ac55
A
6388
6389 if (tmp_entry.is_sub_map) {
6390 vm_map_t sub_map;
6391 vm_map_offset_t sub_start, sub_end;
6392 pmap_t pmap;
6393 vm_map_offset_t pmap_addr;
6394
6395
3e170ce0
A
6396 sub_map = VME_SUBMAP(&tmp_entry);
6397 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55
A
6398 sub_end = sub_start + (tmp_entry.vme_end -
6399 tmp_entry.vme_start);
6400 if (tmp_entry.use_pmap) {
6401 pmap = sub_map->pmap;
6402 pmap_addr = tmp_entry.vme_start;
6403 } else {
6404 pmap = map->pmap;
6405 pmap_addr = tmp_entry.vme_start;
6406 }
6407 (void) vm_map_unwire_nested(sub_map,
6408 sub_start, sub_end,
6409 user_wire,
6410 pmap, pmap_addr);
6411 } else {
6412
3e170ce0 6413 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
6414 pmap_protect_options(
6415 map->pmap,
6416 tmp_entry.vme_start,
6417 tmp_entry.vme_end,
6418 VM_PROT_NONE,
6419 PMAP_OPTIONS_REMOVE,
6420 NULL);
6421 }
2d21ac55 6422 vm_fault_unwire(map, &tmp_entry,
3e170ce0 6423 VME_OBJECT(&tmp_entry) == kernel_object,
2d21ac55
A
6424 map->pmap, tmp_entry.vme_start);
6425 }
6426
1c79356b
A
6427 vm_map_lock(map);
6428
6429 if (last_timestamp+1 != map->timestamp) {
6430 /*
6431 * Find the entry again. It could have
6432 * been clipped after we unlocked the map.
6433 */
6434 if (!vm_map_lookup_entry(map, s, &first_entry)){
6435 assert((map != kernel_map) &&
2d21ac55 6436 (!entry->is_sub_map));
1c79356b 6437 first_entry = first_entry->vme_next;
2d21ac55 6438 s = first_entry->vme_start;
1c79356b 6439 } else {
0c530ab8 6440 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6441 }
6442 } else {
0c530ab8 6443 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6444 first_entry = entry;
6445 }
6446
6447 last_timestamp = map->timestamp;
6448
6449 entry = first_entry;
6450 while ((entry != vm_map_to_entry(map)) &&
6451 (entry->vme_start < tmp_entry.vme_end)) {
6452 assert(entry->in_transition);
6453 entry->in_transition = FALSE;
6454 if (entry->needs_wakeup) {
6455 entry->needs_wakeup = FALSE;
6456 need_wakeup = TRUE;
6457 }
6458 entry = entry->vme_next;
6459 }
6460 /*
6461 * We have unwired the entry(s). Go back and
6462 * delete them.
6463 */
6464 entry = first_entry;
6465 continue;
6466 }
6467
6468 /* entry is unwired */
6469 assert(entry->wired_count == 0);
6470 assert(entry->user_wired_count == 0);
6471
2d21ac55
A
6472 assert(s == entry->vme_start);
6473
6474 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6475 /*
6476 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6477 * vm_map_delete(), some map entries might have been
6478 * transferred to a "zap_map", which doesn't have a
6479 * pmap. The original pmap has already been flushed
6480 * in the vm_map_delete() call targeting the original
6481 * map, but when we get to destroying the "zap_map",
6482 * we don't have any pmap to flush, so let's just skip
6483 * all this.
6484 */
6485 } else if (entry->is_sub_map) {
6486 if (entry->use_pmap) {
0c530ab8 6487#ifndef NO_NESTED_PMAP
3e170ce0
A
6488 int pmap_flags;
6489
6490 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6491 /*
6492 * This is the final cleanup of the
6493 * address space being terminated.
6494 * No new mappings are expected and
6495 * we don't really need to unnest the
6496 * shared region (and lose the "global"
6497 * pmap mappings, if applicable).
6498 *
6499 * Tell the pmap layer that we're
6500 * "clean" wrt nesting.
6501 */
6502 pmap_flags = PMAP_UNNEST_CLEAN;
6503 } else {
6504 /*
6505 * We're unmapping part of the nested
6506 * shared region, so we can't keep the
6507 * nested pmap.
6508 */
6509 pmap_flags = 0;
6510 }
6511 pmap_unnest_options(
6512 map->pmap,
6513 (addr64_t)entry->vme_start,
6514 entry->vme_end - entry->vme_start,
6515 pmap_flags);
0c530ab8 6516#endif /* NO_NESTED_PMAP */
316670eb 6517 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
6518 /* clean up parent map/maps */
6519 vm_map_submap_pmap_clean(
6520 map, entry->vme_start,
6521 entry->vme_end,
3e170ce0
A
6522 VME_SUBMAP(entry),
6523 VME_OFFSET(entry));
9bccf70c 6524 }
2d21ac55 6525 } else {
1c79356b
A
6526 vm_map_submap_pmap_clean(
6527 map, entry->vme_start, entry->vme_end,
3e170ce0
A
6528 VME_SUBMAP(entry),
6529 VME_OFFSET(entry));
2d21ac55 6530 }
3e170ce0
A
6531 } else if (VME_OBJECT(entry) != kernel_object &&
6532 VME_OBJECT(entry) != compressor_object) {
6533 object = VME_OBJECT(entry);
39236c6e
A
6534 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6535 vm_object_pmap_protect_options(
3e170ce0 6536 object, VME_OFFSET(entry),
55e303ae
A
6537 entry->vme_end - entry->vme_start,
6538 PMAP_NULL,
6539 entry->vme_start,
39236c6e
A
6540 VM_PROT_NONE,
6541 PMAP_OPTIONS_REMOVE);
3e170ce0 6542 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
39236c6e
A
6543 (map->pmap == kernel_pmap)) {
6544 /* Remove translations associated
6545 * with this range unless the entry
6546 * does not have an object, or
6547 * it's the kernel map or a descendant
6548 * since the platform could potentially
6549 * create "backdoor" mappings invisible
6550 * to the VM. It is expected that
6551 * objectless, non-kernel ranges
6552 * do not have such VM invisible
6553 * translations.
6554 */
6555 pmap_remove_options(map->pmap,
6556 (addr64_t)entry->vme_start,
6557 (addr64_t)entry->vme_end,
6558 PMAP_OPTIONS_REMOVE);
1c79356b
A
6559 }
6560 }
6561
fe8ab488
A
6562 if (entry->iokit_acct) {
6563 /* alternate accounting */
ecc0ceb4
A
6564 DTRACE_VM4(vm_map_iokit_unmapped_region,
6565 vm_map_t, map,
6566 vm_map_offset_t, entry->vme_start,
6567 vm_map_offset_t, entry->vme_end,
6568 int, VME_ALIAS(entry));
fe8ab488
A
6569 vm_map_iokit_unmapped_region(map,
6570 (entry->vme_end -
6571 entry->vme_start));
6572 entry->iokit_acct = FALSE;
6573 }
6574
91447636
A
6575 /*
6576 * All pmap mappings for this map entry must have been
6577 * cleared by now.
6578 */
fe8ab488 6579#if DEBUG
91447636
A
6580 assert(vm_map_pmap_is_empty(map,
6581 entry->vme_start,
6582 entry->vme_end));
fe8ab488 6583#endif /* DEBUG */
91447636 6584
1c79356b 6585 next = entry->vme_next;
fe8ab488
A
6586
6587 if (map->pmap == kernel_pmap &&
6588 map->ref_count != 0 &&
6589 entry->vme_end < end &&
6590 (next == vm_map_to_entry(map) ||
6591 next->vme_start != entry->vme_end)) {
6592 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6593 "hole after %p at 0x%llx\n",
6594 map,
6595 (uint64_t)start,
6596 (uint64_t)end,
6597 entry,
6598 (uint64_t)entry->vme_end);
6599 }
6600
1c79356b
A
6601 s = next->vme_start;
6602 last_timestamp = map->timestamp;
91447636
A
6603
6604 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6605 zap_map != VM_MAP_NULL) {
2d21ac55 6606 vm_map_size_t entry_size;
91447636
A
6607 /*
6608 * The caller wants to save the affected VM map entries
6609 * into the "zap_map". The caller will take care of
6610 * these entries.
6611 */
6612 /* unlink the entry from "map" ... */
6d2010ae 6613 vm_map_store_entry_unlink(map, entry);
91447636 6614 /* ... and add it to the end of the "zap_map" */
6d2010ae 6615 vm_map_store_entry_link(zap_map,
91447636
A
6616 vm_map_last_entry(zap_map),
6617 entry);
2d21ac55
A
6618 entry_size = entry->vme_end - entry->vme_start;
6619 map->size -= entry_size;
6620 zap_map->size += entry_size;
6621 /* we didn't unlock the map, so no timestamp increase */
6622 last_timestamp--;
91447636
A
6623 } else {
6624 vm_map_entry_delete(map, entry);
6625 /* vm_map_entry_delete unlocks the map */
6626 vm_map_lock(map);
6627 }
6628
1c79356b
A
6629 entry = next;
6630
6631 if(entry == vm_map_to_entry(map)) {
6632 break;
6633 }
6634 if (last_timestamp+1 != map->timestamp) {
6635 /*
6636 * we are responsible for deleting everything
6637 * from the give space, if someone has interfered
6638 * we pick up where we left off, back fills should
6639 * be all right for anyone except map_delete and
6640 * we have to assume that the task has been fully
6641 * disabled before we get here
6642 */
6643 if (!vm_map_lookup_entry(map, s, &entry)){
6644 entry = entry->vme_next;
2d21ac55 6645 s = entry->vme_start;
1c79356b 6646 } else {
2d21ac55 6647 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6648 }
6649 /*
6650 * others can not only allocate behind us, we can
6651 * also see coalesce while we don't have the map lock
6652 */
6653 if(entry == vm_map_to_entry(map)) {
6654 break;
6655 }
1c79356b
A
6656 }
6657 last_timestamp = map->timestamp;
6658 }
6659
6660 if (map->wait_for_space)
6661 thread_wakeup((event_t) map);
6662 /*
6663 * wake up anybody waiting on entries that we have already deleted.
6664 */
6665 if (need_wakeup)
6666 vm_map_entry_wakeup(map);
6667
6668 return KERN_SUCCESS;
6669}
6670
6671/*
6672 * vm_map_remove:
6673 *
6674 * Remove the given address range from the target map.
6675 * This is the exported form of vm_map_delete.
6676 */
6677kern_return_t
6678vm_map_remove(
39037602
A
6679 vm_map_t map,
6680 vm_map_offset_t start,
6681 vm_map_offset_t end,
6682 boolean_t flags)
1c79356b 6683{
39037602 6684 kern_return_t result;
9bccf70c 6685
1c79356b
A
6686 vm_map_lock(map);
6687 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
6688 /*
6689 * For the zone_map, the kernel controls the allocation/freeing of memory.
6690 * Any free to the zone_map should be within the bounds of the map and
6691 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6692 * free to the zone_map into a no-op, there is a problem and we should
6693 * panic.
6694 */
6695 if ((map == zone_map) && (start == end))
6696 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 6697 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 6698 vm_map_unlock(map);
91447636 6699
1c79356b
A
6700 return(result);
6701}
6702
39037602
A
6703/*
6704 * vm_map_remove_locked:
6705 *
6706 * Remove the given address range from the target locked map.
6707 * This is the exported form of vm_map_delete.
6708 */
6709kern_return_t
6710vm_map_remove_locked(
6711 vm_map_t map,
6712 vm_map_offset_t start,
6713 vm_map_offset_t end,
6714 boolean_t flags)
6715{
6716 kern_return_t result;
6717
6718 VM_MAP_RANGE_CHECK(map, start, end);
6719 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6720 return(result);
6721}
6722
1c79356b 6723
1c79356b
A
6724/*
6725 * Routine: vm_map_copy_discard
6726 *
6727 * Description:
6728 * Dispose of a map copy object (returned by
6729 * vm_map_copyin).
6730 */
6731void
6732vm_map_copy_discard(
6733 vm_map_copy_t copy)
6734{
1c79356b
A
6735 if (copy == VM_MAP_COPY_NULL)
6736 return;
6737
6738 switch (copy->type) {
6739 case VM_MAP_COPY_ENTRY_LIST:
6740 while (vm_map_copy_first_entry(copy) !=
2d21ac55 6741 vm_map_copy_to_entry(copy)) {
1c79356b
A
6742 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6743
6744 vm_map_copy_entry_unlink(copy, entry);
39236c6e 6745 if (entry->is_sub_map) {
3e170ce0 6746 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 6747 } else {
3e170ce0 6748 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 6749 }
1c79356b
A
6750 vm_map_copy_entry_dispose(copy, entry);
6751 }
6752 break;
6753 case VM_MAP_COPY_OBJECT:
6754 vm_object_deallocate(copy->cpy_object);
6755 break;
1c79356b
A
6756 case VM_MAP_COPY_KERNEL_BUFFER:
6757
6758 /*
6759 * The vm_map_copy_t and possibly the data buffer were
6760 * allocated by a single call to kalloc(), i.e. the
6761 * vm_map_copy_t was not allocated out of the zone.
6762 */
3e170ce0
A
6763 if (copy->size > msg_ool_size_small || copy->offset)
6764 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6765 (long long)copy->size, (long long)copy->offset);
6766 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
6767 return;
6768 }
91447636 6769 zfree(vm_map_copy_zone, copy);
1c79356b
A
6770}
6771
6772/*
6773 * Routine: vm_map_copy_copy
6774 *
6775 * Description:
6776 * Move the information in a map copy object to
6777 * a new map copy object, leaving the old one
6778 * empty.
6779 *
6780 * This is used by kernel routines that need
6781 * to look at out-of-line data (in copyin form)
6782 * before deciding whether to return SUCCESS.
6783 * If the routine returns FAILURE, the original
6784 * copy object will be deallocated; therefore,
6785 * these routines must make a copy of the copy
6786 * object and leave the original empty so that
6787 * deallocation will not fail.
6788 */
6789vm_map_copy_t
6790vm_map_copy_copy(
6791 vm_map_copy_t copy)
6792{
6793 vm_map_copy_t new_copy;
6794
6795 if (copy == VM_MAP_COPY_NULL)
6796 return VM_MAP_COPY_NULL;
6797
6798 /*
6799 * Allocate a new copy object, and copy the information
6800 * from the old one into it.
6801 */
6802
6803 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 6804 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
6805 *new_copy = *copy;
6806
6807 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6808 /*
6809 * The links in the entry chain must be
6810 * changed to point to the new copy object.
6811 */
6812 vm_map_copy_first_entry(copy)->vme_prev
6813 = vm_map_copy_to_entry(new_copy);
6814 vm_map_copy_last_entry(copy)->vme_next
6815 = vm_map_copy_to_entry(new_copy);
6816 }
6817
6818 /*
6819 * Change the old copy object into one that contains
6820 * nothing to be deallocated.
6821 */
6822 copy->type = VM_MAP_COPY_OBJECT;
6823 copy->cpy_object = VM_OBJECT_NULL;
6824
6825 /*
6826 * Return the new object.
6827 */
6828 return new_copy;
6829}
6830
91447636 6831static kern_return_t
1c79356b
A
6832vm_map_overwrite_submap_recurse(
6833 vm_map_t dst_map,
91447636
A
6834 vm_map_offset_t dst_addr,
6835 vm_map_size_t dst_size)
1c79356b 6836{
91447636 6837 vm_map_offset_t dst_end;
1c79356b
A
6838 vm_map_entry_t tmp_entry;
6839 vm_map_entry_t entry;
6840 kern_return_t result;
6841 boolean_t encountered_sub_map = FALSE;
6842
6843
6844
6845 /*
6846 * Verify that the destination is all writeable
6847 * initially. We have to trunc the destination
6848 * address and round the copy size or we'll end up
6849 * splitting entries in strange ways.
6850 */
6851
39236c6e
A
6852 dst_end = vm_map_round_page(dst_addr + dst_size,
6853 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 6854 vm_map_lock(dst_map);
1c79356b
A
6855
6856start_pass_1:
1c79356b
A
6857 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6858 vm_map_unlock(dst_map);
6859 return(KERN_INVALID_ADDRESS);
6860 }
6861
39236c6e
A
6862 vm_map_clip_start(dst_map,
6863 tmp_entry,
6864 vm_map_trunc_page(dst_addr,
6865 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
6866 if (tmp_entry->is_sub_map) {
6867 /* clipping did unnest if needed */
6868 assert(!tmp_entry->use_pmap);
6869 }
1c79356b
A
6870
6871 for (entry = tmp_entry;;) {
6872 vm_map_entry_t next;
6873
6874 next = entry->vme_next;
6875 while(entry->is_sub_map) {
91447636
A
6876 vm_map_offset_t sub_start;
6877 vm_map_offset_t sub_end;
6878 vm_map_offset_t local_end;
1c79356b
A
6879
6880 if (entry->in_transition) {
2d21ac55
A
6881 /*
6882 * Say that we are waiting, and wait for entry.
6883 */
1c79356b
A
6884 entry->needs_wakeup = TRUE;
6885 vm_map_entry_wait(dst_map, THREAD_UNINT);
6886
6887 goto start_pass_1;
6888 }
6889
6890 encountered_sub_map = TRUE;
3e170ce0 6891 sub_start = VME_OFFSET(entry);
1c79356b
A
6892
6893 if(entry->vme_end < dst_end)
6894 sub_end = entry->vme_end;
6895 else
6896 sub_end = dst_end;
6897 sub_end -= entry->vme_start;
3e170ce0 6898 sub_end += VME_OFFSET(entry);
1c79356b
A
6899 local_end = entry->vme_end;
6900 vm_map_unlock(dst_map);
6901
6902 result = vm_map_overwrite_submap_recurse(
3e170ce0 6903 VME_SUBMAP(entry),
2d21ac55
A
6904 sub_start,
6905 sub_end - sub_start);
1c79356b
A
6906
6907 if(result != KERN_SUCCESS)
6908 return result;
6909 if (dst_end <= entry->vme_end)
6910 return KERN_SUCCESS;
6911 vm_map_lock(dst_map);
6912 if(!vm_map_lookup_entry(dst_map, local_end,
6913 &tmp_entry)) {
6914 vm_map_unlock(dst_map);
6915 return(KERN_INVALID_ADDRESS);
6916 }
6917 entry = tmp_entry;
6918 next = entry->vme_next;
6919 }
6920
6921 if ( ! (entry->protection & VM_PROT_WRITE)) {
6922 vm_map_unlock(dst_map);
6923 return(KERN_PROTECTION_FAILURE);
6924 }
6925
6926 /*
6927 * If the entry is in transition, we must wait
6928 * for it to exit that state. Anything could happen
6929 * when we unlock the map, so start over.
6930 */
6931 if (entry->in_transition) {
6932
6933 /*
6934 * Say that we are waiting, and wait for entry.
6935 */
6936 entry->needs_wakeup = TRUE;
6937 vm_map_entry_wait(dst_map, THREAD_UNINT);
6938
6939 goto start_pass_1;
6940 }
6941
6942/*
6943 * our range is contained completely within this map entry
6944 */
6945 if (dst_end <= entry->vme_end) {
6946 vm_map_unlock(dst_map);
6947 return KERN_SUCCESS;
6948 }
6949/*
6950 * check that range specified is contiguous region
6951 */
6952 if ((next == vm_map_to_entry(dst_map)) ||
6953 (next->vme_start != entry->vme_end)) {
6954 vm_map_unlock(dst_map);
6955 return(KERN_INVALID_ADDRESS);
6956 }
6957
6958 /*
6959 * Check for permanent objects in the destination.
6960 */
3e170ce0
A
6961 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6962 ((!VME_OBJECT(entry)->internal) ||
6963 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
6964 if(encountered_sub_map) {
6965 vm_map_unlock(dst_map);
6966 return(KERN_FAILURE);
6967 }
6968 }
6969
6970
6971 entry = next;
6972 }/* for */
6973 vm_map_unlock(dst_map);
6974 return(KERN_SUCCESS);
6975}
6976
6977/*
6978 * Routine: vm_map_copy_overwrite
6979 *
6980 * Description:
6981 * Copy the memory described by the map copy
6982 * object (copy; returned by vm_map_copyin) onto
6983 * the specified destination region (dst_map, dst_addr).
6984 * The destination must be writeable.
6985 *
6986 * Unlike vm_map_copyout, this routine actually
6987 * writes over previously-mapped memory. If the
6988 * previous mapping was to a permanent (user-supplied)
6989 * memory object, it is preserved.
6990 *
6991 * The attributes (protection and inheritance) of the
6992 * destination region are preserved.
6993 *
6994 * If successful, consumes the copy object.
6995 * Otherwise, the caller is responsible for it.
6996 *
6997 * Implementation notes:
6998 * To overwrite aligned temporary virtual memory, it is
6999 * sufficient to remove the previous mapping and insert
7000 * the new copy. This replacement is done either on
7001 * the whole region (if no permanent virtual memory
7002 * objects are embedded in the destination region) or
7003 * in individual map entries.
7004 *
7005 * To overwrite permanent virtual memory , it is necessary
7006 * to copy each page, as the external memory management
7007 * interface currently does not provide any optimizations.
7008 *
7009 * Unaligned memory also has to be copied. It is possible
7010 * to use 'vm_trickery' to copy the aligned data. This is
7011 * not done but not hard to implement.
7012 *
7013 * Once a page of permanent memory has been overwritten,
7014 * it is impossible to interrupt this function; otherwise,
7015 * the call would be neither atomic nor location-independent.
7016 * The kernel-state portion of a user thread must be
7017 * interruptible.
7018 *
7019 * It may be expensive to forward all requests that might
7020 * overwrite permanent memory (vm_write, vm_copy) to
7021 * uninterruptible kernel threads. This routine may be
7022 * called by interruptible threads; however, success is
7023 * not guaranteed -- if the request cannot be performed
7024 * atomically and interruptibly, an error indication is
7025 * returned.
7026 */
7027
91447636 7028static kern_return_t
1c79356b 7029vm_map_copy_overwrite_nested(
91447636
A
7030 vm_map_t dst_map,
7031 vm_map_address_t dst_addr,
7032 vm_map_copy_t copy,
7033 boolean_t interruptible,
6d2010ae
A
7034 pmap_t pmap,
7035 boolean_t discard_on_success)
1c79356b 7036{
91447636
A
7037 vm_map_offset_t dst_end;
7038 vm_map_entry_t tmp_entry;
7039 vm_map_entry_t entry;
7040 kern_return_t kr;
7041 boolean_t aligned = TRUE;
7042 boolean_t contains_permanent_objects = FALSE;
7043 boolean_t encountered_sub_map = FALSE;
7044 vm_map_offset_t base_addr;
7045 vm_map_size_t copy_size;
7046 vm_map_size_t total_size;
1c79356b
A
7047
7048
7049 /*
7050 * Check for null copy object.
7051 */
7052
7053 if (copy == VM_MAP_COPY_NULL)
7054 return(KERN_SUCCESS);
7055
7056 /*
7057 * Check for special kernel buffer allocated
7058 * by new_ipc_kmsg_copyin.
7059 */
7060
7061 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 7062 return(vm_map_copyout_kernel_buffer(
2d21ac55 7063 dst_map, &dst_addr,
39037602 7064 copy, copy->size, TRUE, discard_on_success));
1c79356b
A
7065 }
7066
7067 /*
7068 * Only works for entry lists at the moment. Will
7069 * support page lists later.
7070 */
7071
7072 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7073
7074 if (copy->size == 0) {
6d2010ae
A
7075 if (discard_on_success)
7076 vm_map_copy_discard(copy);
1c79356b
A
7077 return(KERN_SUCCESS);
7078 }
7079
7080 /*
7081 * Verify that the destination is all writeable
7082 * initially. We have to trunc the destination
7083 * address and round the copy size or we'll end up
7084 * splitting entries in strange ways.
7085 */
7086
39236c6e
A
7087 if (!VM_MAP_PAGE_ALIGNED(copy->size,
7088 VM_MAP_PAGE_MASK(dst_map)) ||
7089 !VM_MAP_PAGE_ALIGNED(copy->offset,
7090 VM_MAP_PAGE_MASK(dst_map)) ||
7091 !VM_MAP_PAGE_ALIGNED(dst_addr,
fe8ab488 7092 VM_MAP_PAGE_MASK(dst_map)))
1c79356b
A
7093 {
7094 aligned = FALSE;
39236c6e
A
7095 dst_end = vm_map_round_page(dst_addr + copy->size,
7096 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
7097 } else {
7098 dst_end = dst_addr + copy->size;
7099 }
7100
1c79356b 7101 vm_map_lock(dst_map);
9bccf70c 7102
91447636
A
7103 /* LP64todo - remove this check when vm_map_commpage64()
7104 * no longer has to stuff in a map_entry for the commpage
7105 * above the map's max_offset.
7106 */
7107 if (dst_addr >= dst_map->max_offset) {
7108 vm_map_unlock(dst_map);
7109 return(KERN_INVALID_ADDRESS);
7110 }
7111
9bccf70c 7112start_pass_1:
1c79356b
A
7113 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7114 vm_map_unlock(dst_map);
7115 return(KERN_INVALID_ADDRESS);
7116 }
39236c6e
A
7117 vm_map_clip_start(dst_map,
7118 tmp_entry,
7119 vm_map_trunc_page(dst_addr,
7120 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7121 for (entry = tmp_entry;;) {
7122 vm_map_entry_t next = entry->vme_next;
7123
7124 while(entry->is_sub_map) {
91447636
A
7125 vm_map_offset_t sub_start;
7126 vm_map_offset_t sub_end;
7127 vm_map_offset_t local_end;
1c79356b
A
7128
7129 if (entry->in_transition) {
7130
2d21ac55
A
7131 /*
7132 * Say that we are waiting, and wait for entry.
7133 */
1c79356b
A
7134 entry->needs_wakeup = TRUE;
7135 vm_map_entry_wait(dst_map, THREAD_UNINT);
7136
7137 goto start_pass_1;
7138 }
7139
7140 local_end = entry->vme_end;
7141 if (!(entry->needs_copy)) {
7142 /* if needs_copy we are a COW submap */
7143 /* in such a case we just replace so */
7144 /* there is no need for the follow- */
7145 /* ing check. */
7146 encountered_sub_map = TRUE;
3e170ce0 7147 sub_start = VME_OFFSET(entry);
1c79356b
A
7148
7149 if(entry->vme_end < dst_end)
7150 sub_end = entry->vme_end;
7151 else
7152 sub_end = dst_end;
7153 sub_end -= entry->vme_start;
3e170ce0 7154 sub_end += VME_OFFSET(entry);
1c79356b
A
7155 vm_map_unlock(dst_map);
7156
7157 kr = vm_map_overwrite_submap_recurse(
3e170ce0 7158 VME_SUBMAP(entry),
1c79356b
A
7159 sub_start,
7160 sub_end - sub_start);
7161 if(kr != KERN_SUCCESS)
7162 return kr;
7163 vm_map_lock(dst_map);
7164 }
7165
7166 if (dst_end <= entry->vme_end)
7167 goto start_overwrite;
7168 if(!vm_map_lookup_entry(dst_map, local_end,
7169 &entry)) {
7170 vm_map_unlock(dst_map);
7171 return(KERN_INVALID_ADDRESS);
7172 }
7173 next = entry->vme_next;
7174 }
7175
7176 if ( ! (entry->protection & VM_PROT_WRITE)) {
7177 vm_map_unlock(dst_map);
7178 return(KERN_PROTECTION_FAILURE);
7179 }
7180
7181 /*
7182 * If the entry is in transition, we must wait
7183 * for it to exit that state. Anything could happen
7184 * when we unlock the map, so start over.
7185 */
7186 if (entry->in_transition) {
7187
7188 /*
7189 * Say that we are waiting, and wait for entry.
7190 */
7191 entry->needs_wakeup = TRUE;
7192 vm_map_entry_wait(dst_map, THREAD_UNINT);
7193
7194 goto start_pass_1;
7195 }
7196
7197/*
7198 * our range is contained completely within this map entry
7199 */
7200 if (dst_end <= entry->vme_end)
7201 break;
7202/*
7203 * check that range specified is contiguous region
7204 */
7205 if ((next == vm_map_to_entry(dst_map)) ||
7206 (next->vme_start != entry->vme_end)) {
7207 vm_map_unlock(dst_map);
7208 return(KERN_INVALID_ADDRESS);
7209 }
7210
7211
7212 /*
7213 * Check for permanent objects in the destination.
7214 */
3e170ce0
A
7215 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7216 ((!VME_OBJECT(entry)->internal) ||
7217 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
7218 contains_permanent_objects = TRUE;
7219 }
7220
7221 entry = next;
7222 }/* for */
7223
7224start_overwrite:
7225 /*
7226 * If there are permanent objects in the destination, then
7227 * the copy cannot be interrupted.
7228 */
7229
7230 if (interruptible && contains_permanent_objects) {
7231 vm_map_unlock(dst_map);
7232 return(KERN_FAILURE); /* XXX */
7233 }
7234
7235 /*
7236 *
7237 * Make a second pass, overwriting the data
7238 * At the beginning of each loop iteration,
7239 * the next entry to be overwritten is "tmp_entry"
7240 * (initially, the value returned from the lookup above),
7241 * and the starting address expected in that entry
7242 * is "start".
7243 */
7244
7245 total_size = copy->size;
7246 if(encountered_sub_map) {
7247 copy_size = 0;
7248 /* re-calculate tmp_entry since we've had the map */
7249 /* unlocked */
7250 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7251 vm_map_unlock(dst_map);
7252 return(KERN_INVALID_ADDRESS);
7253 }
7254 } else {
7255 copy_size = copy->size;
7256 }
7257
7258 base_addr = dst_addr;
7259 while(TRUE) {
7260 /* deconstruct the copy object and do in parts */
7261 /* only in sub_map, interruptable case */
7262 vm_map_entry_t copy_entry;
91447636
A
7263 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7264 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 7265 int nentries;
91447636 7266 int remaining_entries = 0;
b0d623f7 7267 vm_map_offset_t new_offset = 0;
1c79356b
A
7268
7269 for (entry = tmp_entry; copy_size == 0;) {
7270 vm_map_entry_t next;
7271
7272 next = entry->vme_next;
7273
7274 /* tmp_entry and base address are moved along */
7275 /* each time we encounter a sub-map. Otherwise */
7276 /* entry can outpase tmp_entry, and the copy_size */
7277 /* may reflect the distance between them */
7278 /* if the current entry is found to be in transition */
7279 /* we will start over at the beginning or the last */
7280 /* encounter of a submap as dictated by base_addr */
7281 /* we will zero copy_size accordingly. */
7282 if (entry->in_transition) {
7283 /*
7284 * Say that we are waiting, and wait for entry.
7285 */
7286 entry->needs_wakeup = TRUE;
7287 vm_map_entry_wait(dst_map, THREAD_UNINT);
7288
1c79356b 7289 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 7290 &tmp_entry)) {
1c79356b
A
7291 vm_map_unlock(dst_map);
7292 return(KERN_INVALID_ADDRESS);
7293 }
7294 copy_size = 0;
7295 entry = tmp_entry;
7296 continue;
7297 }
7298 if(entry->is_sub_map) {
91447636
A
7299 vm_map_offset_t sub_start;
7300 vm_map_offset_t sub_end;
7301 vm_map_offset_t local_end;
1c79356b
A
7302
7303 if (entry->needs_copy) {
7304 /* if this is a COW submap */
7305 /* just back the range with a */
7306 /* anonymous entry */
7307 if(entry->vme_end < dst_end)
7308 sub_end = entry->vme_end;
7309 else
7310 sub_end = dst_end;
7311 if(entry->vme_start < base_addr)
7312 sub_start = base_addr;
7313 else
7314 sub_start = entry->vme_start;
7315 vm_map_clip_end(
7316 dst_map, entry, sub_end);
7317 vm_map_clip_start(
7318 dst_map, entry, sub_start);
2d21ac55 7319 assert(!entry->use_pmap);
1c79356b
A
7320 entry->is_sub_map = FALSE;
7321 vm_map_deallocate(
3e170ce0
A
7322 VME_SUBMAP(entry));
7323 VME_SUBMAP_SET(entry, NULL);
1c79356b
A
7324 entry->is_shared = FALSE;
7325 entry->needs_copy = FALSE;
3e170ce0 7326 VME_OFFSET_SET(entry, 0);
2d21ac55
A
7327 /*
7328 * XXX FBDP
7329 * We should propagate the protections
7330 * of the submap entry here instead
7331 * of forcing them to VM_PROT_ALL...
7332 * Or better yet, we should inherit
7333 * the protection of the copy_entry.
7334 */
1c79356b
A
7335 entry->protection = VM_PROT_ALL;
7336 entry->max_protection = VM_PROT_ALL;
7337 entry->wired_count = 0;
7338 entry->user_wired_count = 0;
7339 if(entry->inheritance
2d21ac55
A
7340 == VM_INHERIT_SHARE)
7341 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
7342 continue;
7343 }
7344 /* first take care of any non-sub_map */
7345 /* entries to send */
7346 if(base_addr < entry->vme_start) {
7347 /* stuff to send */
7348 copy_size =
7349 entry->vme_start - base_addr;
7350 break;
7351 }
3e170ce0 7352 sub_start = VME_OFFSET(entry);
1c79356b
A
7353
7354 if(entry->vme_end < dst_end)
7355 sub_end = entry->vme_end;
7356 else
7357 sub_end = dst_end;
7358 sub_end -= entry->vme_start;
3e170ce0 7359 sub_end += VME_OFFSET(entry);
1c79356b
A
7360 local_end = entry->vme_end;
7361 vm_map_unlock(dst_map);
7362 copy_size = sub_end - sub_start;
7363
7364 /* adjust the copy object */
7365 if (total_size > copy_size) {
91447636
A
7366 vm_map_size_t local_size = 0;
7367 vm_map_size_t entry_size;
1c79356b 7368
2d21ac55
A
7369 nentries = 1;
7370 new_offset = copy->offset;
7371 copy_entry = vm_map_copy_first_entry(copy);
7372 while(copy_entry !=
7373 vm_map_copy_to_entry(copy)){
7374 entry_size = copy_entry->vme_end -
7375 copy_entry->vme_start;
7376 if((local_size < copy_size) &&
7377 ((local_size + entry_size)
7378 >= copy_size)) {
7379 vm_map_copy_clip_end(copy,
7380 copy_entry,
7381 copy_entry->vme_start +
7382 (copy_size - local_size));
7383 entry_size = copy_entry->vme_end -
7384 copy_entry->vme_start;
7385 local_size += entry_size;
7386 new_offset += entry_size;
7387 }
7388 if(local_size >= copy_size) {
7389 next_copy = copy_entry->vme_next;
7390 copy_entry->vme_next =
7391 vm_map_copy_to_entry(copy);
7392 previous_prev =
7393 copy->cpy_hdr.links.prev;
7394 copy->cpy_hdr.links.prev = copy_entry;
7395 copy->size = copy_size;
7396 remaining_entries =
7397 copy->cpy_hdr.nentries;
7398 remaining_entries -= nentries;
7399 copy->cpy_hdr.nentries = nentries;
7400 break;
7401 } else {
7402 local_size += entry_size;
7403 new_offset += entry_size;
7404 nentries++;
7405 }
7406 copy_entry = copy_entry->vme_next;
7407 }
1c79356b
A
7408 }
7409
7410 if((entry->use_pmap) && (pmap == NULL)) {
7411 kr = vm_map_copy_overwrite_nested(
3e170ce0 7412 VME_SUBMAP(entry),
1c79356b
A
7413 sub_start,
7414 copy,
7415 interruptible,
3e170ce0 7416 VME_SUBMAP(entry)->pmap,
6d2010ae 7417 TRUE);
1c79356b
A
7418 } else if (pmap != NULL) {
7419 kr = vm_map_copy_overwrite_nested(
3e170ce0 7420 VME_SUBMAP(entry),
1c79356b
A
7421 sub_start,
7422 copy,
6d2010ae
A
7423 interruptible, pmap,
7424 TRUE);
1c79356b
A
7425 } else {
7426 kr = vm_map_copy_overwrite_nested(
3e170ce0 7427 VME_SUBMAP(entry),
1c79356b
A
7428 sub_start,
7429 copy,
7430 interruptible,
6d2010ae
A
7431 dst_map->pmap,
7432 TRUE);
1c79356b
A
7433 }
7434 if(kr != KERN_SUCCESS) {
7435 if(next_copy != NULL) {
2d21ac55
A
7436 copy->cpy_hdr.nentries +=
7437 remaining_entries;
7438 copy->cpy_hdr.links.prev->vme_next =
7439 next_copy;
7440 copy->cpy_hdr.links.prev
7441 = previous_prev;
7442 copy->size = total_size;
1c79356b
A
7443 }
7444 return kr;
7445 }
7446 if (dst_end <= local_end) {
7447 return(KERN_SUCCESS);
7448 }
7449 /* otherwise copy no longer exists, it was */
7450 /* destroyed after successful copy_overwrite */
7451 copy = (vm_map_copy_t)
2d21ac55 7452 zalloc(vm_map_copy_zone);
04b8595b 7453 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 7454 vm_map_copy_first_entry(copy) =
2d21ac55
A
7455 vm_map_copy_last_entry(copy) =
7456 vm_map_copy_to_entry(copy);
1c79356b
A
7457 copy->type = VM_MAP_COPY_ENTRY_LIST;
7458 copy->offset = new_offset;
7459
e2d2fc5c
A
7460 /*
7461 * XXX FBDP
7462 * this does not seem to deal with
7463 * the VM map store (R&B tree)
7464 */
7465
1c79356b
A
7466 total_size -= copy_size;
7467 copy_size = 0;
7468 /* put back remainder of copy in container */
7469 if(next_copy != NULL) {
2d21ac55
A
7470 copy->cpy_hdr.nentries = remaining_entries;
7471 copy->cpy_hdr.links.next = next_copy;
7472 copy->cpy_hdr.links.prev = previous_prev;
7473 copy->size = total_size;
7474 next_copy->vme_prev =
7475 vm_map_copy_to_entry(copy);
7476 next_copy = NULL;
1c79356b
A
7477 }
7478 base_addr = local_end;
7479 vm_map_lock(dst_map);
7480 if(!vm_map_lookup_entry(dst_map,
2d21ac55 7481 local_end, &tmp_entry)) {
1c79356b
A
7482 vm_map_unlock(dst_map);
7483 return(KERN_INVALID_ADDRESS);
7484 }
7485 entry = tmp_entry;
7486 continue;
7487 }
7488 if (dst_end <= entry->vme_end) {
7489 copy_size = dst_end - base_addr;
7490 break;
7491 }
7492
7493 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 7494 (next->vme_start != entry->vme_end)) {
1c79356b
A
7495 vm_map_unlock(dst_map);
7496 return(KERN_INVALID_ADDRESS);
7497 }
7498
7499 entry = next;
7500 }/* for */
7501
7502 next_copy = NULL;
7503 nentries = 1;
7504
7505 /* adjust the copy object */
7506 if (total_size > copy_size) {
91447636
A
7507 vm_map_size_t local_size = 0;
7508 vm_map_size_t entry_size;
1c79356b
A
7509
7510 new_offset = copy->offset;
7511 copy_entry = vm_map_copy_first_entry(copy);
7512 while(copy_entry != vm_map_copy_to_entry(copy)) {
7513 entry_size = copy_entry->vme_end -
2d21ac55 7514 copy_entry->vme_start;
1c79356b 7515 if((local_size < copy_size) &&
2d21ac55
A
7516 ((local_size + entry_size)
7517 >= copy_size)) {
1c79356b 7518 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
7519 copy_entry->vme_start +
7520 (copy_size - local_size));
1c79356b 7521 entry_size = copy_entry->vme_end -
2d21ac55 7522 copy_entry->vme_start;
1c79356b
A
7523 local_size += entry_size;
7524 new_offset += entry_size;
7525 }
7526 if(local_size >= copy_size) {
7527 next_copy = copy_entry->vme_next;
7528 copy_entry->vme_next =
7529 vm_map_copy_to_entry(copy);
7530 previous_prev =
7531 copy->cpy_hdr.links.prev;
7532 copy->cpy_hdr.links.prev = copy_entry;
7533 copy->size = copy_size;
7534 remaining_entries =
7535 copy->cpy_hdr.nentries;
7536 remaining_entries -= nentries;
7537 copy->cpy_hdr.nentries = nentries;
7538 break;
7539 } else {
7540 local_size += entry_size;
7541 new_offset += entry_size;
7542 nentries++;
7543 }
7544 copy_entry = copy_entry->vme_next;
7545 }
7546 }
7547
7548 if (aligned) {
7549 pmap_t local_pmap;
7550
7551 if(pmap)
7552 local_pmap = pmap;
7553 else
7554 local_pmap = dst_map->pmap;
7555
7556 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
7557 dst_map, tmp_entry, copy,
7558 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
7559 if(next_copy != NULL) {
7560 copy->cpy_hdr.nentries +=
2d21ac55 7561 remaining_entries;
1c79356b 7562 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7563 next_copy;
1c79356b 7564 copy->cpy_hdr.links.prev =
2d21ac55 7565 previous_prev;
1c79356b
A
7566 copy->size += copy_size;
7567 }
7568 return kr;
7569 }
7570 vm_map_unlock(dst_map);
7571 } else {
2d21ac55
A
7572 /*
7573 * Performance gain:
7574 *
7575 * if the copy and dst address are misaligned but the same
7576 * offset within the page we can copy_not_aligned the
7577 * misaligned parts and copy aligned the rest. If they are
7578 * aligned but len is unaligned we simply need to copy
7579 * the end bit unaligned. We'll need to split the misaligned
7580 * bits of the region in this case !
7581 */
7582 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
7583 kr = vm_map_copy_overwrite_unaligned(
7584 dst_map,
7585 tmp_entry,
7586 copy,
7587 base_addr,
7588 discard_on_success);
7589 if (kr != KERN_SUCCESS) {
1c79356b
A
7590 if(next_copy != NULL) {
7591 copy->cpy_hdr.nentries +=
2d21ac55 7592 remaining_entries;
1c79356b 7593 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7594 next_copy;
1c79356b
A
7595 copy->cpy_hdr.links.prev =
7596 previous_prev;
7597 copy->size += copy_size;
7598 }
7599 return kr;
7600 }
7601 }
7602 total_size -= copy_size;
7603 if(total_size == 0)
7604 break;
7605 base_addr += copy_size;
7606 copy_size = 0;
7607 copy->offset = new_offset;
7608 if(next_copy != NULL) {
7609 copy->cpy_hdr.nentries = remaining_entries;
7610 copy->cpy_hdr.links.next = next_copy;
7611 copy->cpy_hdr.links.prev = previous_prev;
7612 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7613 copy->size = total_size;
7614 }
7615 vm_map_lock(dst_map);
7616 while(TRUE) {
7617 if (!vm_map_lookup_entry(dst_map,
2d21ac55 7618 base_addr, &tmp_entry)) {
1c79356b
A
7619 vm_map_unlock(dst_map);
7620 return(KERN_INVALID_ADDRESS);
7621 }
7622 if (tmp_entry->in_transition) {
7623 entry->needs_wakeup = TRUE;
7624 vm_map_entry_wait(dst_map, THREAD_UNINT);
7625 } else {
7626 break;
7627 }
7628 }
39236c6e
A
7629 vm_map_clip_start(dst_map,
7630 tmp_entry,
7631 vm_map_trunc_page(base_addr,
7632 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7633
7634 entry = tmp_entry;
7635 } /* while */
7636
7637 /*
7638 * Throw away the vm_map_copy object
7639 */
6d2010ae
A
7640 if (discard_on_success)
7641 vm_map_copy_discard(copy);
1c79356b
A
7642
7643 return(KERN_SUCCESS);
7644}/* vm_map_copy_overwrite */
7645
7646kern_return_t
7647vm_map_copy_overwrite(
7648 vm_map_t dst_map,
91447636 7649 vm_map_offset_t dst_addr,
1c79356b
A
7650 vm_map_copy_t copy,
7651 boolean_t interruptible)
7652{
6d2010ae
A
7653 vm_map_size_t head_size, tail_size;
7654 vm_map_copy_t head_copy, tail_copy;
7655 vm_map_offset_t head_addr, tail_addr;
7656 vm_map_entry_t entry;
7657 kern_return_t kr;
7658
7659 head_size = 0;
7660 tail_size = 0;
7661 head_copy = NULL;
7662 tail_copy = NULL;
7663 head_addr = 0;
7664 tail_addr = 0;
7665
7666 if (interruptible ||
7667 copy == VM_MAP_COPY_NULL ||
7668 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7669 /*
7670 * We can't split the "copy" map if we're interruptible
7671 * or if we don't have a "copy" map...
7672 */
7673 blunt_copy:
7674 return vm_map_copy_overwrite_nested(dst_map,
7675 dst_addr,
7676 copy,
7677 interruptible,
7678 (pmap_t) NULL,
7679 TRUE);
7680 }
7681
7682 if (copy->size < 3 * PAGE_SIZE) {
7683 /*
7684 * Too small to bother with optimizing...
7685 */
7686 goto blunt_copy;
7687 }
7688
39236c6e
A
7689 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7690 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6d2010ae
A
7691 /*
7692 * Incompatible mis-alignment of source and destination...
7693 */
7694 goto blunt_copy;
7695 }
7696
7697 /*
7698 * Proper alignment or identical mis-alignment at the beginning.
7699 * Let's try and do a small unaligned copy first (if needed)
7700 * and then an aligned copy for the rest.
7701 */
7702 if (!page_aligned(dst_addr)) {
7703 head_addr = dst_addr;
39236c6e
A
7704 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7705 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6d2010ae
A
7706 }
7707 if (!page_aligned(copy->offset + copy->size)) {
7708 /*
7709 * Mis-alignment at the end.
7710 * Do an aligned copy up to the last page and
7711 * then an unaligned copy for the remaining bytes.
7712 */
39236c6e
A
7713 tail_size = ((copy->offset + copy->size) &
7714 VM_MAP_PAGE_MASK(dst_map));
6d2010ae
A
7715 tail_addr = dst_addr + copy->size - tail_size;
7716 }
7717
7718 if (head_size + tail_size == copy->size) {
7719 /*
7720 * It's all unaligned, no optimization possible...
7721 */
7722 goto blunt_copy;
7723 }
7724
7725 /*
7726 * Can't optimize if there are any submaps in the
7727 * destination due to the way we free the "copy" map
7728 * progressively in vm_map_copy_overwrite_nested()
7729 * in that case.
7730 */
7731 vm_map_lock_read(dst_map);
7732 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7733 vm_map_unlock_read(dst_map);
7734 goto blunt_copy;
7735 }
7736 for (;
7737 (entry != vm_map_copy_to_entry(copy) &&
7738 entry->vme_start < dst_addr + copy->size);
7739 entry = entry->vme_next) {
7740 if (entry->is_sub_map) {
7741 vm_map_unlock_read(dst_map);
7742 goto blunt_copy;
7743 }
7744 }
7745 vm_map_unlock_read(dst_map);
7746
7747 if (head_size) {
7748 /*
7749 * Unaligned copy of the first "head_size" bytes, to reach
7750 * a page boundary.
7751 */
7752
7753 /*
7754 * Extract "head_copy" out of "copy".
7755 */
7756 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7757 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7758 vm_map_copy_first_entry(head_copy) =
7759 vm_map_copy_to_entry(head_copy);
7760 vm_map_copy_last_entry(head_copy) =
7761 vm_map_copy_to_entry(head_copy);
7762 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7763 head_copy->cpy_hdr.nentries = 0;
7764 head_copy->cpy_hdr.entries_pageable =
7765 copy->cpy_hdr.entries_pageable;
7766 vm_map_store_init(&head_copy->cpy_hdr);
7767
7768 head_copy->offset = copy->offset;
7769 head_copy->size = head_size;
7770
7771 copy->offset += head_size;
7772 copy->size -= head_size;
7773
7774 entry = vm_map_copy_first_entry(copy);
7775 vm_map_copy_clip_end(copy, entry, copy->offset);
7776 vm_map_copy_entry_unlink(copy, entry);
7777 vm_map_copy_entry_link(head_copy,
7778 vm_map_copy_to_entry(head_copy),
7779 entry);
7780
7781 /*
7782 * Do the unaligned copy.
7783 */
7784 kr = vm_map_copy_overwrite_nested(dst_map,
7785 head_addr,
7786 head_copy,
7787 interruptible,
7788 (pmap_t) NULL,
7789 FALSE);
7790 if (kr != KERN_SUCCESS)
7791 goto done;
7792 }
7793
7794 if (tail_size) {
7795 /*
7796 * Extract "tail_copy" out of "copy".
7797 */
7798 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7799 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7800 vm_map_copy_first_entry(tail_copy) =
7801 vm_map_copy_to_entry(tail_copy);
7802 vm_map_copy_last_entry(tail_copy) =
7803 vm_map_copy_to_entry(tail_copy);
7804 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7805 tail_copy->cpy_hdr.nentries = 0;
7806 tail_copy->cpy_hdr.entries_pageable =
7807 copy->cpy_hdr.entries_pageable;
7808 vm_map_store_init(&tail_copy->cpy_hdr);
7809
7810 tail_copy->offset = copy->offset + copy->size - tail_size;
7811 tail_copy->size = tail_size;
7812
7813 copy->size -= tail_size;
7814
7815 entry = vm_map_copy_last_entry(copy);
7816 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7817 entry = vm_map_copy_last_entry(copy);
7818 vm_map_copy_entry_unlink(copy, entry);
7819 vm_map_copy_entry_link(tail_copy,
7820 vm_map_copy_last_entry(tail_copy),
7821 entry);
7822 }
7823
7824 /*
7825 * Copy most (or possibly all) of the data.
7826 */
7827 kr = vm_map_copy_overwrite_nested(dst_map,
7828 dst_addr + head_size,
7829 copy,
7830 interruptible,
7831 (pmap_t) NULL,
7832 FALSE);
7833 if (kr != KERN_SUCCESS) {
7834 goto done;
7835 }
7836
7837 if (tail_size) {
7838 kr = vm_map_copy_overwrite_nested(dst_map,
7839 tail_addr,
7840 tail_copy,
7841 interruptible,
7842 (pmap_t) NULL,
7843 FALSE);
7844 }
7845
7846done:
7847 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7848 if (kr == KERN_SUCCESS) {
7849 /*
7850 * Discard all the copy maps.
7851 */
7852 if (head_copy) {
7853 vm_map_copy_discard(head_copy);
7854 head_copy = NULL;
7855 }
7856 vm_map_copy_discard(copy);
7857 if (tail_copy) {
7858 vm_map_copy_discard(tail_copy);
7859 tail_copy = NULL;
7860 }
7861 } else {
7862 /*
7863 * Re-assemble the original copy map.
7864 */
7865 if (head_copy) {
7866 entry = vm_map_copy_first_entry(head_copy);
7867 vm_map_copy_entry_unlink(head_copy, entry);
7868 vm_map_copy_entry_link(copy,
7869 vm_map_copy_to_entry(copy),
7870 entry);
7871 copy->offset -= head_size;
7872 copy->size += head_size;
7873 vm_map_copy_discard(head_copy);
7874 head_copy = NULL;
7875 }
7876 if (tail_copy) {
7877 entry = vm_map_copy_last_entry(tail_copy);
7878 vm_map_copy_entry_unlink(tail_copy, entry);
7879 vm_map_copy_entry_link(copy,
7880 vm_map_copy_last_entry(copy),
7881 entry);
7882 copy->size += tail_size;
7883 vm_map_copy_discard(tail_copy);
7884 tail_copy = NULL;
7885 }
7886 }
7887 return kr;
1c79356b
A
7888}
7889
7890
7891/*
91447636 7892 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
7893 *
7894 * Decription:
7895 * Physically copy unaligned data
7896 *
7897 * Implementation:
7898 * Unaligned parts of pages have to be physically copied. We use
7899 * a modified form of vm_fault_copy (which understands none-aligned
7900 * page offsets and sizes) to do the copy. We attempt to copy as
7901 * much memory in one go as possibly, however vm_fault_copy copies
7902 * within 1 memory object so we have to find the smaller of "amount left"
7903 * "source object data size" and "target object data size". With
7904 * unaligned data we don't need to split regions, therefore the source
7905 * (copy) object should be one map entry, the target range may be split
7906 * over multiple map entries however. In any event we are pessimistic
7907 * about these assumptions.
7908 *
7909 * Assumptions:
7910 * dst_map is locked on entry and is return locked on success,
7911 * unlocked on error.
7912 */
7913
91447636 7914static kern_return_t
1c79356b
A
7915vm_map_copy_overwrite_unaligned(
7916 vm_map_t dst_map,
7917 vm_map_entry_t entry,
7918 vm_map_copy_t copy,
39236c6e
A
7919 vm_map_offset_t start,
7920 boolean_t discard_on_success)
1c79356b 7921{
39236c6e
A
7922 vm_map_entry_t copy_entry;
7923 vm_map_entry_t copy_entry_next;
1c79356b
A
7924 vm_map_version_t version;
7925 vm_object_t dst_object;
7926 vm_object_offset_t dst_offset;
7927 vm_object_offset_t src_offset;
7928 vm_object_offset_t entry_offset;
91447636
A
7929 vm_map_offset_t entry_end;
7930 vm_map_size_t src_size,
1c79356b
A
7931 dst_size,
7932 copy_size,
7933 amount_left;
7934 kern_return_t kr = KERN_SUCCESS;
7935
39236c6e
A
7936
7937 copy_entry = vm_map_copy_first_entry(copy);
7938
1c79356b
A
7939 vm_map_lock_write_to_read(dst_map);
7940
91447636 7941 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
7942 amount_left = copy->size;
7943/*
7944 * unaligned so we never clipped this entry, we need the offset into
7945 * the vm_object not just the data.
7946 */
7947 while (amount_left > 0) {
7948
7949 if (entry == vm_map_to_entry(dst_map)) {
7950 vm_map_unlock_read(dst_map);
7951 return KERN_INVALID_ADDRESS;
7952 }
7953
7954 /* "start" must be within the current map entry */
7955 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7956
7957 dst_offset = start - entry->vme_start;
7958
7959 dst_size = entry->vme_end - start;
7960
7961 src_size = copy_entry->vme_end -
7962 (copy_entry->vme_start + src_offset);
7963
7964 if (dst_size < src_size) {
7965/*
7966 * we can only copy dst_size bytes before
7967 * we have to get the next destination entry
7968 */
7969 copy_size = dst_size;
7970 } else {
7971/*
7972 * we can only copy src_size bytes before
7973 * we have to get the next source copy entry
7974 */
7975 copy_size = src_size;
7976 }
7977
7978 if (copy_size > amount_left) {
7979 copy_size = amount_left;
7980 }
7981/*
7982 * Entry needs copy, create a shadow shadow object for
7983 * Copy on write region.
7984 */
7985 if (entry->needs_copy &&
2d21ac55 7986 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
7987 {
7988 if (vm_map_lock_read_to_write(dst_map)) {
7989 vm_map_lock_read(dst_map);
7990 goto RetryLookup;
7991 }
3e170ce0
A
7992 VME_OBJECT_SHADOW(entry,
7993 (vm_map_size_t)(entry->vme_end
7994 - entry->vme_start));
1c79356b
A
7995 entry->needs_copy = FALSE;
7996 vm_map_lock_write_to_read(dst_map);
7997 }
3e170ce0 7998 dst_object = VME_OBJECT(entry);
1c79356b
A
7999/*
8000 * unlike with the virtual (aligned) copy we're going
8001 * to fault on it therefore we need a target object.
8002 */
8003 if (dst_object == VM_OBJECT_NULL) {
8004 if (vm_map_lock_read_to_write(dst_map)) {
8005 vm_map_lock_read(dst_map);
8006 goto RetryLookup;
8007 }
91447636 8008 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 8009 entry->vme_end - entry->vme_start);
3e170ce0
A
8010 VME_OBJECT(entry) = dst_object;
8011 VME_OFFSET_SET(entry, 0);
fe8ab488 8012 assert(entry->use_pmap);
1c79356b
A
8013 vm_map_lock_write_to_read(dst_map);
8014 }
8015/*
8016 * Take an object reference and unlock map. The "entry" may
8017 * disappear or change when the map is unlocked.
8018 */
8019 vm_object_reference(dst_object);
8020 version.main_timestamp = dst_map->timestamp;
3e170ce0 8021 entry_offset = VME_OFFSET(entry);
1c79356b
A
8022 entry_end = entry->vme_end;
8023 vm_map_unlock_read(dst_map);
8024/*
8025 * Copy as much as possible in one pass
8026 */
8027 kr = vm_fault_copy(
3e170ce0
A
8028 VME_OBJECT(copy_entry),
8029 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
8030 &copy_size,
8031 dst_object,
8032 entry_offset + dst_offset,
8033 dst_map,
8034 &version,
8035 THREAD_UNINT );
8036
8037 start += copy_size;
8038 src_offset += copy_size;
8039 amount_left -= copy_size;
8040/*
8041 * Release the object reference
8042 */
8043 vm_object_deallocate(dst_object);
8044/*
8045 * If a hard error occurred, return it now
8046 */
8047 if (kr != KERN_SUCCESS)
8048 return kr;
8049
8050 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 8051 || amount_left == 0)
1c79356b
A
8052 {
8053/*
8054 * all done with this copy entry, dispose.
8055 */
39236c6e
A
8056 copy_entry_next = copy_entry->vme_next;
8057
8058 if (discard_on_success) {
8059 vm_map_copy_entry_unlink(copy, copy_entry);
8060 assert(!copy_entry->is_sub_map);
3e170ce0 8061 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
8062 vm_map_copy_entry_dispose(copy, copy_entry);
8063 }
1c79356b 8064
39236c6e
A
8065 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
8066 amount_left) {
1c79356b
A
8067/*
8068 * not finished copying but run out of source
8069 */
8070 return KERN_INVALID_ADDRESS;
8071 }
39236c6e
A
8072
8073 copy_entry = copy_entry_next;
8074
1c79356b
A
8075 src_offset = 0;
8076 }
8077
8078 if (amount_left == 0)
8079 return KERN_SUCCESS;
8080
8081 vm_map_lock_read(dst_map);
8082 if (version.main_timestamp == dst_map->timestamp) {
8083 if (start == entry_end) {
8084/*
8085 * destination region is split. Use the version
8086 * information to avoid a lookup in the normal
8087 * case.
8088 */
8089 entry = entry->vme_next;
8090/*
8091 * should be contiguous. Fail if we encounter
8092 * a hole in the destination.
8093 */
8094 if (start != entry->vme_start) {
8095 vm_map_unlock_read(dst_map);
8096 return KERN_INVALID_ADDRESS ;
8097 }
8098 }
8099 } else {
8100/*
8101 * Map version check failed.
8102 * we must lookup the entry because somebody
8103 * might have changed the map behind our backs.
8104 */
2d21ac55 8105 RetryLookup:
1c79356b
A
8106 if (!vm_map_lookup_entry(dst_map, start, &entry))
8107 {
8108 vm_map_unlock_read(dst_map);
8109 return KERN_INVALID_ADDRESS ;
8110 }
8111 }
8112 }/* while */
8113
1c79356b
A
8114 return KERN_SUCCESS;
8115}/* vm_map_copy_overwrite_unaligned */
8116
8117/*
91447636 8118 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
8119 *
8120 * Description:
8121 * Does all the vm_trickery possible for whole pages.
8122 *
8123 * Implementation:
8124 *
8125 * If there are no permanent objects in the destination,
8126 * and the source and destination map entry zones match,
8127 * and the destination map entry is not shared,
8128 * then the map entries can be deleted and replaced
8129 * with those from the copy. The following code is the
8130 * basic idea of what to do, but there are lots of annoying
8131 * little details about getting protection and inheritance
8132 * right. Should add protection, inheritance, and sharing checks
8133 * to the above pass and make sure that no wiring is involved.
8134 */
8135
e2d2fc5c
A
8136int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8137int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8138int vm_map_copy_overwrite_aligned_src_large = 0;
8139
91447636 8140static kern_return_t
1c79356b
A
8141vm_map_copy_overwrite_aligned(
8142 vm_map_t dst_map,
8143 vm_map_entry_t tmp_entry,
8144 vm_map_copy_t copy,
91447636 8145 vm_map_offset_t start,
2d21ac55 8146 __unused pmap_t pmap)
1c79356b
A
8147{
8148 vm_object_t object;
8149 vm_map_entry_t copy_entry;
91447636
A
8150 vm_map_size_t copy_size;
8151 vm_map_size_t size;
1c79356b
A
8152 vm_map_entry_t entry;
8153
8154 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 8155 != vm_map_copy_to_entry(copy))
1c79356b
A
8156 {
8157 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8158
8159 entry = tmp_entry;
fe8ab488
A
8160 if (entry->is_sub_map) {
8161 /* unnested when clipped earlier */
8162 assert(!entry->use_pmap);
8163 }
1c79356b
A
8164 if (entry == vm_map_to_entry(dst_map)) {
8165 vm_map_unlock(dst_map);
8166 return KERN_INVALID_ADDRESS;
8167 }
8168 size = (entry->vme_end - entry->vme_start);
8169 /*
8170 * Make sure that no holes popped up in the
8171 * address map, and that the protection is
8172 * still valid, in case the map was unlocked
8173 * earlier.
8174 */
8175
8176 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 8177 && !entry->needs_copy)) {
1c79356b
A
8178 vm_map_unlock(dst_map);
8179 return(KERN_INVALID_ADDRESS);
8180 }
8181 assert(entry != vm_map_to_entry(dst_map));
8182
8183 /*
8184 * Check protection again
8185 */
8186
8187 if ( ! (entry->protection & VM_PROT_WRITE)) {
8188 vm_map_unlock(dst_map);
8189 return(KERN_PROTECTION_FAILURE);
8190 }
8191
8192 /*
8193 * Adjust to source size first
8194 */
8195
8196 if (copy_size < size) {
fe8ab488
A
8197 if (entry->map_aligned &&
8198 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8199 VM_MAP_PAGE_MASK(dst_map))) {
8200 /* no longer map-aligned */
8201 entry->map_aligned = FALSE;
8202 }
1c79356b
A
8203 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8204 size = copy_size;
8205 }
8206
8207 /*
8208 * Adjust to destination size
8209 */
8210
8211 if (size < copy_size) {
8212 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8213 copy_entry->vme_start + size);
1c79356b
A
8214 copy_size = size;
8215 }
8216
8217 assert((entry->vme_end - entry->vme_start) == size);
8218 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8219 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8220
8221 /*
8222 * If the destination contains temporary unshared memory,
8223 * we can perform the copy by throwing it away and
8224 * installing the source data.
8225 */
8226
3e170ce0 8227 object = VME_OBJECT(entry);
1c79356b 8228 if ((!entry->is_shared &&
2d21ac55
A
8229 ((object == VM_OBJECT_NULL) ||
8230 (object->internal && !object->true_share))) ||
1c79356b 8231 entry->needs_copy) {
3e170ce0
A
8232 vm_object_t old_object = VME_OBJECT(entry);
8233 vm_object_offset_t old_offset = VME_OFFSET(entry);
1c79356b
A
8234 vm_object_offset_t offset;
8235
8236 /*
8237 * Ensure that the source and destination aren't
8238 * identical
8239 */
3e170ce0
A
8240 if (old_object == VME_OBJECT(copy_entry) &&
8241 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
8242 vm_map_copy_entry_unlink(copy, copy_entry);
8243 vm_map_copy_entry_dispose(copy, copy_entry);
8244
8245 if (old_object != VM_OBJECT_NULL)
8246 vm_object_deallocate(old_object);
8247
8248 start = tmp_entry->vme_end;
8249 tmp_entry = tmp_entry->vme_next;
8250 continue;
8251 }
8252
e2d2fc5c
A
8253#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8254#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
8255 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8256 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
8257 copy_size <= __TRADEOFF1_COPY_SIZE) {
8258 /*
8259 * Virtual vs. Physical copy tradeoff #1.
8260 *
8261 * Copying only a few pages out of a large
8262 * object: do a physical copy instead of
8263 * a virtual copy, to avoid possibly keeping
8264 * the entire large object alive because of
8265 * those few copy-on-write pages.
8266 */
8267 vm_map_copy_overwrite_aligned_src_large++;
8268 goto slow_copy;
8269 }
e2d2fc5c 8270
3e170ce0
A
8271 if ((dst_map->pmap != kernel_pmap) &&
8272 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8273 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
ebb1b9f4
A
8274 vm_object_t new_object, new_shadow;
8275
8276 /*
8277 * We're about to map something over a mapping
8278 * established by malloc()...
8279 */
3e170ce0 8280 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
8281 if (new_object != VM_OBJECT_NULL) {
8282 vm_object_lock_shared(new_object);
8283 }
8284 while (new_object != VM_OBJECT_NULL &&
e2d2fc5c
A
8285 !new_object->true_share &&
8286 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
ebb1b9f4
A
8287 new_object->internal) {
8288 new_shadow = new_object->shadow;
8289 if (new_shadow == VM_OBJECT_NULL) {
8290 break;
8291 }
8292 vm_object_lock_shared(new_shadow);
8293 vm_object_unlock(new_object);
8294 new_object = new_shadow;
8295 }
8296 if (new_object != VM_OBJECT_NULL) {
8297 if (!new_object->internal) {
8298 /*
8299 * The new mapping is backed
8300 * by an external object. We
8301 * don't want malloc'ed memory
8302 * to be replaced with such a
8303 * non-anonymous mapping, so
8304 * let's go off the optimized
8305 * path...
8306 */
e2d2fc5c 8307 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
8308 vm_object_unlock(new_object);
8309 goto slow_copy;
8310 }
e2d2fc5c
A
8311 if (new_object->true_share ||
8312 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8313 /*
8314 * Same if there's a "true_share"
8315 * object in the shadow chain, or
8316 * an object with a non-default
8317 * (SYMMETRIC) copy strategy.
8318 */
8319 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8320 vm_object_unlock(new_object);
8321 goto slow_copy;
8322 }
ebb1b9f4
A
8323 vm_object_unlock(new_object);
8324 }
8325 /*
8326 * The new mapping is still backed by
8327 * anonymous (internal) memory, so it's
8328 * OK to substitute it for the original
8329 * malloc() mapping.
8330 */
8331 }
8332
1c79356b
A
8333 if (old_object != VM_OBJECT_NULL) {
8334 if(entry->is_sub_map) {
9bccf70c 8335 if(entry->use_pmap) {
0c530ab8 8336#ifndef NO_NESTED_PMAP
9bccf70c 8337 pmap_unnest(dst_map->pmap,
2d21ac55
A
8338 (addr64_t)entry->vme_start,
8339 entry->vme_end - entry->vme_start);
0c530ab8 8340#endif /* NO_NESTED_PMAP */
316670eb 8341 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
8342 /* clean up parent */
8343 /* map/maps */
2d21ac55
A
8344 vm_map_submap_pmap_clean(
8345 dst_map, entry->vme_start,
8346 entry->vme_end,
3e170ce0
A
8347 VME_SUBMAP(entry),
8348 VME_OFFSET(entry));
9bccf70c
A
8349 }
8350 } else {
8351 vm_map_submap_pmap_clean(
8352 dst_map, entry->vme_start,
8353 entry->vme_end,
3e170ce0
A
8354 VME_SUBMAP(entry),
8355 VME_OFFSET(entry));
9bccf70c 8356 }
3e170ce0 8357 vm_map_deallocate(VME_SUBMAP(entry));
9bccf70c 8358 } else {
316670eb 8359 if(dst_map->mapped_in_other_pmaps) {
39236c6e 8360 vm_object_pmap_protect_options(
3e170ce0
A
8361 VME_OBJECT(entry),
8362 VME_OFFSET(entry),
9bccf70c 8363 entry->vme_end
2d21ac55 8364 - entry->vme_start,
9bccf70c
A
8365 PMAP_NULL,
8366 entry->vme_start,
39236c6e
A
8367 VM_PROT_NONE,
8368 PMAP_OPTIONS_REMOVE);
9bccf70c 8369 } else {
39236c6e
A
8370 pmap_remove_options(
8371 dst_map->pmap,
8372 (addr64_t)(entry->vme_start),
8373 (addr64_t)(entry->vme_end),
8374 PMAP_OPTIONS_REMOVE);
9bccf70c 8375 }
1c79356b 8376 vm_object_deallocate(old_object);
9bccf70c 8377 }
1c79356b
A
8378 }
8379
8380 entry->is_sub_map = FALSE;
3e170ce0
A
8381 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8382 object = VME_OBJECT(entry);
1c79356b
A
8383 entry->needs_copy = copy_entry->needs_copy;
8384 entry->wired_count = 0;
8385 entry->user_wired_count = 0;
3e170ce0
A
8386 offset = VME_OFFSET(copy_entry);
8387 VME_OFFSET_SET(entry, offset);
1c79356b
A
8388
8389 vm_map_copy_entry_unlink(copy, copy_entry);
8390 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 8391
1c79356b 8392 /*
2d21ac55 8393 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
8394 * this optimization only saved on average 2 us per page if ALL
8395 * the pages in the source were currently mapped
8396 * and ALL the pages in the dest were touched, if there were fewer
8397 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 8398 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
8399 */
8400
1c79356b
A
8401 /*
8402 * Set up for the next iteration. The map
8403 * has not been unlocked, so the next
8404 * address should be at the end of this
8405 * entry, and the next map entry should be
8406 * the one following it.
8407 */
8408
8409 start = tmp_entry->vme_end;
8410 tmp_entry = tmp_entry->vme_next;
8411 } else {
8412 vm_map_version_t version;
ebb1b9f4
A
8413 vm_object_t dst_object;
8414 vm_object_offset_t dst_offset;
1c79356b
A
8415 kern_return_t r;
8416
ebb1b9f4 8417 slow_copy:
e2d2fc5c 8418 if (entry->needs_copy) {
3e170ce0
A
8419 VME_OBJECT_SHADOW(entry,
8420 (entry->vme_end -
8421 entry->vme_start));
e2d2fc5c
A
8422 entry->needs_copy = FALSE;
8423 }
8424
3e170ce0
A
8425 dst_object = VME_OBJECT(entry);
8426 dst_offset = VME_OFFSET(entry);
ebb1b9f4 8427
1c79356b
A
8428 /*
8429 * Take an object reference, and record
8430 * the map version information so that the
8431 * map can be safely unlocked.
8432 */
8433
ebb1b9f4
A
8434 if (dst_object == VM_OBJECT_NULL) {
8435 /*
8436 * We would usually have just taken the
8437 * optimized path above if the destination
8438 * object has not been allocated yet. But we
8439 * now disable that optimization if the copy
8440 * entry's object is not backed by anonymous
8441 * memory to avoid replacing malloc'ed
8442 * (i.e. re-usable) anonymous memory with a
8443 * not-so-anonymous mapping.
8444 * So we have to handle this case here and
8445 * allocate a new VM object for this map entry.
8446 */
8447 dst_object = vm_object_allocate(
8448 entry->vme_end - entry->vme_start);
8449 dst_offset = 0;
3e170ce0
A
8450 VME_OBJECT_SET(entry, dst_object);
8451 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 8452 assert(entry->use_pmap);
ebb1b9f4
A
8453
8454 }
8455
1c79356b
A
8456 vm_object_reference(dst_object);
8457
9bccf70c
A
8458 /* account for unlock bumping up timestamp */
8459 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
8460
8461 vm_map_unlock(dst_map);
8462
8463 /*
8464 * Copy as much as possible in one pass
8465 */
8466
8467 copy_size = size;
8468 r = vm_fault_copy(
3e170ce0
A
8469 VME_OBJECT(copy_entry),
8470 VME_OFFSET(copy_entry),
2d21ac55
A
8471 &copy_size,
8472 dst_object,
8473 dst_offset,
8474 dst_map,
8475 &version,
8476 THREAD_UNINT );
1c79356b
A
8477
8478 /*
8479 * Release the object reference
8480 */
8481
8482 vm_object_deallocate(dst_object);
8483
8484 /*
8485 * If a hard error occurred, return it now
8486 */
8487
8488 if (r != KERN_SUCCESS)
8489 return(r);
8490
8491 if (copy_size != 0) {
8492 /*
8493 * Dispose of the copied region
8494 */
8495
8496 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8497 copy_entry->vme_start + copy_size);
1c79356b 8498 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 8499 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
8500 vm_map_copy_entry_dispose(copy, copy_entry);
8501 }
8502
8503 /*
8504 * Pick up in the destination map where we left off.
8505 *
8506 * Use the version information to avoid a lookup
8507 * in the normal case.
8508 */
8509
8510 start += copy_size;
8511 vm_map_lock(dst_map);
e2d2fc5c
A
8512 if (version.main_timestamp == dst_map->timestamp &&
8513 copy_size != 0) {
1c79356b
A
8514 /* We can safely use saved tmp_entry value */
8515
fe8ab488
A
8516 if (tmp_entry->map_aligned &&
8517 !VM_MAP_PAGE_ALIGNED(
8518 start,
8519 VM_MAP_PAGE_MASK(dst_map))) {
8520 /* no longer map-aligned */
8521 tmp_entry->map_aligned = FALSE;
8522 }
1c79356b
A
8523 vm_map_clip_end(dst_map, tmp_entry, start);
8524 tmp_entry = tmp_entry->vme_next;
8525 } else {
8526 /* Must do lookup of tmp_entry */
8527
8528 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8529 vm_map_unlock(dst_map);
8530 return(KERN_INVALID_ADDRESS);
8531 }
fe8ab488
A
8532 if (tmp_entry->map_aligned &&
8533 !VM_MAP_PAGE_ALIGNED(
8534 start,
8535 VM_MAP_PAGE_MASK(dst_map))) {
8536 /* no longer map-aligned */
8537 tmp_entry->map_aligned = FALSE;
8538 }
1c79356b
A
8539 vm_map_clip_start(dst_map, tmp_entry, start);
8540 }
8541 }
8542 }/* while */
8543
8544 return(KERN_SUCCESS);
8545}/* vm_map_copy_overwrite_aligned */
8546
8547/*
91447636 8548 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
8549 *
8550 * Description:
8551 * Copy in data to a kernel buffer from space in the
91447636 8552 * source map. The original space may be optionally
1c79356b
A
8553 * deallocated.
8554 *
8555 * If successful, returns a new copy object.
8556 */
91447636 8557static kern_return_t
1c79356b
A
8558vm_map_copyin_kernel_buffer(
8559 vm_map_t src_map,
91447636
A
8560 vm_map_offset_t src_addr,
8561 vm_map_size_t len,
1c79356b
A
8562 boolean_t src_destroy,
8563 vm_map_copy_t *copy_result)
8564{
91447636 8565 kern_return_t kr;
1c79356b 8566 vm_map_copy_t copy;
b0d623f7
A
8567 vm_size_t kalloc_size;
8568
3e170ce0
A
8569 if (len > msg_ool_size_small)
8570 return KERN_INVALID_ARGUMENT;
1c79356b 8571
3e170ce0
A
8572 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8573
8574 copy = (vm_map_copy_t)kalloc(kalloc_size);
8575 if (copy == VM_MAP_COPY_NULL)
1c79356b 8576 return KERN_RESOURCE_SHORTAGE;
1c79356b
A
8577 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8578 copy->size = len;
8579 copy->offset = 0;
1c79356b 8580
3e170ce0 8581 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
8582 if (kr != KERN_SUCCESS) {
8583 kfree(copy, kalloc_size);
8584 return kr;
1c79356b
A
8585 }
8586 if (src_destroy) {
39236c6e
A
8587 (void) vm_map_remove(
8588 src_map,
8589 vm_map_trunc_page(src_addr,
8590 VM_MAP_PAGE_MASK(src_map)),
8591 vm_map_round_page(src_addr + len,
8592 VM_MAP_PAGE_MASK(src_map)),
8593 (VM_MAP_REMOVE_INTERRUPTIBLE |
8594 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
39037602 8595 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
1c79356b
A
8596 }
8597 *copy_result = copy;
8598 return KERN_SUCCESS;
8599}
8600
8601/*
91447636 8602 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
8603 *
8604 * Description:
8605 * Copy out data from a kernel buffer into space in the
8606 * destination map. The space may be otpionally dynamically
8607 * allocated.
8608 *
8609 * If successful, consumes the copy object.
8610 * Otherwise, the caller is responsible for it.
8611 */
91447636
A
8612static int vm_map_copyout_kernel_buffer_failures = 0;
8613static kern_return_t
1c79356b 8614vm_map_copyout_kernel_buffer(
91447636
A
8615 vm_map_t map,
8616 vm_map_address_t *addr, /* IN/OUT */
8617 vm_map_copy_t copy,
39037602 8618 vm_map_size_t copy_size,
39236c6e
A
8619 boolean_t overwrite,
8620 boolean_t consume_on_success)
1c79356b
A
8621{
8622 kern_return_t kr = KERN_SUCCESS;
91447636 8623 thread_t thread = current_thread();
1c79356b 8624
39037602
A
8625 assert(copy->size == copy_size);
8626
3e170ce0
A
8627 /*
8628 * check for corrupted vm_map_copy structure
8629 */
39037602 8630 if (copy_size > msg_ool_size_small || copy->offset)
3e170ce0
A
8631 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8632 (long long)copy->size, (long long)copy->offset);
8633
1c79356b
A
8634 if (!overwrite) {
8635
8636 /*
8637 * Allocate space in the target map for the data
8638 */
8639 *addr = 0;
8640 kr = vm_map_enter(map,
8641 addr,
39037602 8642 vm_map_round_page(copy_size,
39236c6e 8643 VM_MAP_PAGE_MASK(map)),
91447636
A
8644 (vm_map_offset_t) 0,
8645 VM_FLAGS_ANYWHERE,
1c79356b
A
8646 VM_OBJECT_NULL,
8647 (vm_object_offset_t) 0,
8648 FALSE,
8649 VM_PROT_DEFAULT,
8650 VM_PROT_ALL,
8651 VM_INHERIT_DEFAULT);
8652 if (kr != KERN_SUCCESS)
91447636 8653 return kr;
1c79356b
A
8654 }
8655
8656 /*
8657 * Copyout the data from the kernel buffer to the target map.
8658 */
91447636 8659 if (thread->map == map) {
1c79356b
A
8660
8661 /*
8662 * If the target map is the current map, just do
8663 * the copy.
8664 */
39037602
A
8665 assert((vm_size_t)copy_size == copy_size);
8666 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 8667 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8668 }
8669 }
8670 else {
8671 vm_map_t oldmap;
8672
8673 /*
8674 * If the target map is another map, assume the
8675 * target's address space identity for the duration
8676 * of the copy.
8677 */
8678 vm_map_reference(map);
8679 oldmap = vm_map_switch(map);
8680
39037602
A
8681 assert((vm_size_t)copy_size == copy_size);
8682 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
8683 vm_map_copyout_kernel_buffer_failures++;
8684 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8685 }
8686
8687 (void) vm_map_switch(oldmap);
8688 vm_map_deallocate(map);
8689 }
8690
91447636
A
8691 if (kr != KERN_SUCCESS) {
8692 /* the copy failed, clean up */
8693 if (!overwrite) {
8694 /*
8695 * Deallocate the space we allocated in the target map.
8696 */
39236c6e
A
8697 (void) vm_map_remove(
8698 map,
8699 vm_map_trunc_page(*addr,
8700 VM_MAP_PAGE_MASK(map)),
8701 vm_map_round_page((*addr +
39037602 8702 vm_map_round_page(copy_size,
39236c6e
A
8703 VM_MAP_PAGE_MASK(map))),
8704 VM_MAP_PAGE_MASK(map)),
8705 VM_MAP_NO_FLAGS);
91447636
A
8706 *addr = 0;
8707 }
8708 } else {
8709 /* copy was successful, dicard the copy structure */
39236c6e 8710 if (consume_on_success) {
39037602 8711 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 8712 }
91447636 8713 }
1c79356b 8714
91447636 8715 return kr;
1c79356b
A
8716}
8717
8718/*
8719 * Macro: vm_map_copy_insert
8720 *
8721 * Description:
8722 * Link a copy chain ("copy") into a map at the
8723 * specified location (after "where").
8724 * Side effects:
8725 * The copy chain is destroyed.
8726 * Warning:
8727 * The arguments are evaluated multiple times.
8728 */
8729#define vm_map_copy_insert(map, where, copy) \
8730MACRO_BEGIN \
6d2010ae
A
8731 vm_map_store_copy_insert(map, where, copy); \
8732 zfree(vm_map_copy_zone, copy); \
1c79356b
A
8733MACRO_END
8734
39236c6e
A
8735void
8736vm_map_copy_remap(
8737 vm_map_t map,
8738 vm_map_entry_t where,
8739 vm_map_copy_t copy,
8740 vm_map_offset_t adjustment,
8741 vm_prot_t cur_prot,
8742 vm_prot_t max_prot,
8743 vm_inherit_t inheritance)
8744{
8745 vm_map_entry_t copy_entry, new_entry;
8746
8747 for (copy_entry = vm_map_copy_first_entry(copy);
8748 copy_entry != vm_map_copy_to_entry(copy);
8749 copy_entry = copy_entry->vme_next) {
8750 /* get a new VM map entry for the map */
8751 new_entry = vm_map_entry_create(map,
8752 !map->hdr.entries_pageable);
8753 /* copy the "copy entry" to the new entry */
8754 vm_map_entry_copy(new_entry, copy_entry);
8755 /* adjust "start" and "end" */
8756 new_entry->vme_start += adjustment;
8757 new_entry->vme_end += adjustment;
8758 /* clear some attributes */
8759 new_entry->inheritance = inheritance;
8760 new_entry->protection = cur_prot;
8761 new_entry->max_protection = max_prot;
8762 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8763 /* take an extra reference on the entry's "object" */
8764 if (new_entry->is_sub_map) {
fe8ab488 8765 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
8766 vm_map_lock(VME_SUBMAP(new_entry));
8767 vm_map_reference(VME_SUBMAP(new_entry));
8768 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 8769 } else {
3e170ce0 8770 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
8771 }
8772 /* insert the new entry in the map */
8773 vm_map_store_entry_link(map, where, new_entry);
8774 /* continue inserting the "copy entries" after the new entry */
8775 where = new_entry;
8776 }
8777}
8778
2dced7af 8779
39037602
A
8780/*
8781 * Returns true if *size matches (or is in the range of) copy->size.
8782 * Upon returning true, the *size field is updated with the actual size of the
8783 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
8784 */
2dced7af
A
8785boolean_t
8786vm_map_copy_validate_size(
8787 vm_map_t dst_map,
8788 vm_map_copy_t copy,
39037602 8789 vm_map_size_t *size)
2dced7af
A
8790{
8791 if (copy == VM_MAP_COPY_NULL)
8792 return FALSE;
39037602
A
8793 vm_map_size_t copy_sz = copy->size;
8794 vm_map_size_t sz = *size;
2dced7af
A
8795 switch (copy->type) {
8796 case VM_MAP_COPY_OBJECT:
8797 case VM_MAP_COPY_KERNEL_BUFFER:
39037602 8798 if (sz == copy_sz)
2dced7af
A
8799 return TRUE;
8800 break;
8801 case VM_MAP_COPY_ENTRY_LIST:
8802 /*
8803 * potential page-size rounding prevents us from exactly
8804 * validating this flavor of vm_map_copy, but we can at least
8805 * assert that it's within a range.
8806 */
39037602
A
8807 if (copy_sz >= sz &&
8808 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
8809 *size = copy_sz;
2dced7af 8810 return TRUE;
39037602 8811 }
2dced7af
A
8812 break;
8813 default:
8814 break;
8815 }
8816 return FALSE;
8817}
8818
39037602
A
8819/*
8820 * Routine: vm_map_copyout_size
8821 *
8822 * Description:
8823 * Copy out a copy chain ("copy") into newly-allocated
8824 * space in the destination map. Uses a prevalidated
8825 * size for the copy object (vm_map_copy_validate_size).
8826 *
8827 * If successful, consumes the copy object.
8828 * Otherwise, the caller is responsible for it.
8829 */
8830kern_return_t
8831vm_map_copyout_size(
8832 vm_map_t dst_map,
8833 vm_map_address_t *dst_addr, /* OUT */
8834 vm_map_copy_t copy,
8835 vm_map_size_t copy_size)
8836{
8837 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
8838 TRUE, /* consume_on_success */
8839 VM_PROT_DEFAULT,
8840 VM_PROT_ALL,
8841 VM_INHERIT_DEFAULT);
8842}
2dced7af 8843
1c79356b
A
8844/*
8845 * Routine: vm_map_copyout
8846 *
8847 * Description:
8848 * Copy out a copy chain ("copy") into newly-allocated
8849 * space in the destination map.
8850 *
8851 * If successful, consumes the copy object.
8852 * Otherwise, the caller is responsible for it.
8853 */
8854kern_return_t
8855vm_map_copyout(
91447636
A
8856 vm_map_t dst_map,
8857 vm_map_address_t *dst_addr, /* OUT */
8858 vm_map_copy_t copy)
39236c6e 8859{
39037602
A
8860 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
8861 TRUE, /* consume_on_success */
8862 VM_PROT_DEFAULT,
8863 VM_PROT_ALL,
8864 VM_INHERIT_DEFAULT);
39236c6e
A
8865}
8866
8867kern_return_t
8868vm_map_copyout_internal(
8869 vm_map_t dst_map,
8870 vm_map_address_t *dst_addr, /* OUT */
8871 vm_map_copy_t copy,
39037602 8872 vm_map_size_t copy_size,
39236c6e
A
8873 boolean_t consume_on_success,
8874 vm_prot_t cur_protection,
8875 vm_prot_t max_protection,
8876 vm_inherit_t inheritance)
1c79356b 8877{
91447636
A
8878 vm_map_size_t size;
8879 vm_map_size_t adjustment;
8880 vm_map_offset_t start;
1c79356b
A
8881 vm_object_offset_t vm_copy_start;
8882 vm_map_entry_t last;
1c79356b 8883 vm_map_entry_t entry;
3e170ce0 8884 vm_map_entry_t hole_entry;
1c79356b
A
8885
8886 /*
8887 * Check for null copy object.
8888 */
8889
8890 if (copy == VM_MAP_COPY_NULL) {
8891 *dst_addr = 0;
8892 return(KERN_SUCCESS);
8893 }
8894
39037602
A
8895 if (copy->size != copy_size) {
8896 *dst_addr = 0;
8897 return KERN_FAILURE;
8898 }
8899
1c79356b
A
8900 /*
8901 * Check for special copy object, created
8902 * by vm_map_copyin_object.
8903 */
8904
8905 if (copy->type == VM_MAP_COPY_OBJECT) {
8906 vm_object_t object = copy->cpy_object;
8907 kern_return_t kr;
8908 vm_object_offset_t offset;
8909
91447636 8910 offset = vm_object_trunc_page(copy->offset);
39037602 8911 size = vm_map_round_page((copy_size +
39236c6e
A
8912 (vm_map_size_t)(copy->offset -
8913 offset)),
8914 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8915 *dst_addr = 0;
8916 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 8917 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
8918 object, offset, FALSE,
8919 VM_PROT_DEFAULT, VM_PROT_ALL,
8920 VM_INHERIT_DEFAULT);
8921 if (kr != KERN_SUCCESS)
8922 return(kr);
8923 /* Account for non-pagealigned copy object */
91447636 8924 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
8925 if (consume_on_success)
8926 zfree(vm_map_copy_zone, copy);
1c79356b
A
8927 return(KERN_SUCCESS);
8928 }
8929
8930 /*
8931 * Check for special kernel buffer allocated
8932 * by new_ipc_kmsg_copyin.
8933 */
8934
8935 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602
A
8936 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8937 copy, copy_size, FALSE,
39236c6e 8938 consume_on_success);
1c79356b
A
8939 }
8940
39236c6e 8941
1c79356b
A
8942 /*
8943 * Find space for the data
8944 */
8945
39236c6e
A
8946 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8947 VM_MAP_COPY_PAGE_MASK(copy));
39037602 8948 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
39236c6e 8949 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 8950 - vm_copy_start;
1c79356b 8951
39236c6e 8952
2d21ac55 8953StartAgain: ;
1c79356b
A
8954
8955 vm_map_lock(dst_map);
6d2010ae
A
8956 if( dst_map->disable_vmentry_reuse == TRUE) {
8957 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8958 last = entry;
8959 } else {
3e170ce0
A
8960 if (dst_map->holelistenabled) {
8961 hole_entry = (vm_map_entry_t)dst_map->holes_list;
8962
8963 if (hole_entry == NULL) {
8964 /*
8965 * No more space in the map?
8966 */
8967 vm_map_unlock(dst_map);
8968 return(KERN_NO_SPACE);
8969 }
8970
8971 last = hole_entry;
8972 start = last->vme_start;
8973 } else {
8974 assert(first_free_is_valid(dst_map));
8975 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8976 vm_map_min(dst_map) : last->vme_end;
8977 }
39236c6e
A
8978 start = vm_map_round_page(start,
8979 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 8980 }
1c79356b
A
8981
8982 while (TRUE) {
8983 vm_map_entry_t next = last->vme_next;
91447636 8984 vm_map_offset_t end = start + size;
1c79356b
A
8985
8986 if ((end > dst_map->max_offset) || (end < start)) {
8987 if (dst_map->wait_for_space) {
8988 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8989 assert_wait((event_t) dst_map,
8990 THREAD_INTERRUPTIBLE);
8991 vm_map_unlock(dst_map);
91447636 8992 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
8993 goto StartAgain;
8994 }
8995 }
8996 vm_map_unlock(dst_map);
8997 return(KERN_NO_SPACE);
8998 }
8999
3e170ce0
A
9000 if (dst_map->holelistenabled) {
9001 if (last->vme_end >= end)
9002 break;
9003 } else {
9004 /*
9005 * If there are no more entries, we must win.
9006 *
9007 * OR
9008 *
9009 * If there is another entry, it must be
9010 * after the end of the potential new region.
9011 */
9012
9013 if (next == vm_map_to_entry(dst_map))
9014 break;
9015
9016 if (next->vme_start >= end)
9017 break;
9018 }
1c79356b
A
9019
9020 last = next;
3e170ce0
A
9021
9022 if (dst_map->holelistenabled) {
9023 if (last == (vm_map_entry_t) dst_map->holes_list) {
9024 /*
9025 * Wrapped around
9026 */
9027 vm_map_unlock(dst_map);
9028 return(KERN_NO_SPACE);
9029 }
9030 start = last->vme_start;
9031 } else {
9032 start = last->vme_end;
9033 }
39236c6e
A
9034 start = vm_map_round_page(start,
9035 VM_MAP_PAGE_MASK(dst_map));
9036 }
9037
3e170ce0
A
9038 if (dst_map->holelistenabled) {
9039 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
9040 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
9041 }
9042 }
9043
9044
39236c6e
A
9045 adjustment = start - vm_copy_start;
9046 if (! consume_on_success) {
9047 /*
9048 * We're not allowed to consume "copy", so we'll have to
9049 * copy its map entries into the destination map below.
9050 * No need to re-allocate map entries from the correct
9051 * (pageable or not) zone, since we'll get new map entries
9052 * during the transfer.
9053 * We'll also adjust the map entries's "start" and "end"
9054 * during the transfer, to keep "copy"'s entries consistent
9055 * with its "offset".
9056 */
9057 goto after_adjustments;
1c79356b
A
9058 }
9059
9060 /*
9061 * Since we're going to just drop the map
9062 * entries from the copy into the destination
9063 * map, they must come from the same pool.
9064 */
9065
9066 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
9067 /*
9068 * Mismatches occur when dealing with the default
9069 * pager.
9070 */
9071 zone_t old_zone;
9072 vm_map_entry_t next, new;
9073
9074 /*
9075 * Find the zone that the copies were allocated from
9076 */
7ddcb079 9077
2d21ac55
A
9078 entry = vm_map_copy_first_entry(copy);
9079
9080 /*
9081 * Reinitialize the copy so that vm_map_copy_entry_link
9082 * will work.
9083 */
6d2010ae 9084 vm_map_store_copy_reset(copy, entry);
2d21ac55 9085 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
9086
9087 /*
9088 * Copy each entry.
9089 */
9090 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 9091 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 9092 vm_map_entry_copy_full(new, entry);
fe8ab488
A
9093 assert(!new->iokit_acct);
9094 if (new->is_sub_map) {
9095 /* clr address space specifics */
9096 new->use_pmap = FALSE;
9097 }
2d21ac55
A
9098 vm_map_copy_entry_link(copy,
9099 vm_map_copy_last_entry(copy),
9100 new);
9101 next = entry->vme_next;
7ddcb079 9102 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
9103 zfree(old_zone, entry);
9104 entry = next;
9105 }
1c79356b
A
9106 }
9107
9108 /*
9109 * Adjust the addresses in the copy chain, and
9110 * reset the region attributes.
9111 */
9112
1c79356b
A
9113 for (entry = vm_map_copy_first_entry(copy);
9114 entry != vm_map_copy_to_entry(copy);
9115 entry = entry->vme_next) {
39236c6e
A
9116 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
9117 /*
9118 * We're injecting this copy entry into a map that
9119 * has the standard page alignment, so clear
9120 * "map_aligned" (which might have been inherited
9121 * from the original map entry).
9122 */
9123 entry->map_aligned = FALSE;
9124 }
9125
1c79356b
A
9126 entry->vme_start += adjustment;
9127 entry->vme_end += adjustment;
9128
39236c6e
A
9129 if (entry->map_aligned) {
9130 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
9131 VM_MAP_PAGE_MASK(dst_map)));
9132 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
9133 VM_MAP_PAGE_MASK(dst_map)));
9134 }
9135
1c79356b
A
9136 entry->inheritance = VM_INHERIT_DEFAULT;
9137 entry->protection = VM_PROT_DEFAULT;
9138 entry->max_protection = VM_PROT_ALL;
9139 entry->behavior = VM_BEHAVIOR_DEFAULT;
9140
9141 /*
9142 * If the entry is now wired,
9143 * map the pages into the destination map.
9144 */
9145 if (entry->wired_count != 0) {
39037602 9146 vm_map_offset_t va;
2d21ac55 9147 vm_object_offset_t offset;
39037602 9148 vm_object_t object;
2d21ac55
A
9149 vm_prot_t prot;
9150 int type_of_fault;
1c79356b 9151
3e170ce0
A
9152 object = VME_OBJECT(entry);
9153 offset = VME_OFFSET(entry);
2d21ac55 9154 va = entry->vme_start;
1c79356b 9155
2d21ac55
A
9156 pmap_pageable(dst_map->pmap,
9157 entry->vme_start,
9158 entry->vme_end,
9159 TRUE);
1c79356b 9160
2d21ac55 9161 while (va < entry->vme_end) {
39037602 9162 vm_page_t m;
1c79356b 9163
2d21ac55
A
9164 /*
9165 * Look up the page in the object.
9166 * Assert that the page will be found in the
9167 * top object:
9168 * either
9169 * the object was newly created by
9170 * vm_object_copy_slowly, and has
9171 * copies of all of the pages from
9172 * the source object
9173 * or
9174 * the object was moved from the old
9175 * map entry; because the old map
9176 * entry was wired, all of the pages
9177 * were in the top-level object.
9178 * (XXX not true if we wire pages for
9179 * reading)
9180 */
9181 vm_object_lock(object);
91447636 9182
2d21ac55 9183 m = vm_page_lookup(object, offset);
b0d623f7 9184 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
9185 m->absent)
9186 panic("vm_map_copyout: wiring %p", m);
1c79356b 9187
2d21ac55
A
9188 /*
9189 * ENCRYPTED SWAP:
9190 * The page is assumed to be wired here, so it
9191 * shouldn't be encrypted. Otherwise, we
9192 * couldn't enter it in the page table, since
9193 * we don't want the user to see the encrypted
9194 * data.
9195 */
9196 ASSERT_PAGE_DECRYPTED(m);
1c79356b 9197
2d21ac55 9198 prot = entry->protection;
1c79356b 9199
3e170ce0
A
9200 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9201 prot)
2d21ac55 9202 prot |= VM_PROT_EXECUTE;
1c79356b 9203
2d21ac55 9204 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 9205
6d2010ae 9206 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
fe8ab488 9207 VM_PAGE_WIRED(m), FALSE, FALSE,
3e170ce0 9208 FALSE, VME_ALIAS(entry),
fe8ab488
A
9209 ((entry->iokit_acct ||
9210 (!entry->is_sub_map &&
9211 !entry->use_pmap))
9212 ? PMAP_OPTIONS_ALT_ACCT
9213 : 0),
9214 NULL, &type_of_fault);
1c79356b 9215
2d21ac55 9216 vm_object_unlock(object);
1c79356b 9217
2d21ac55
A
9218 offset += PAGE_SIZE_64;
9219 va += PAGE_SIZE;
1c79356b
A
9220 }
9221 }
9222 }
9223
39236c6e
A
9224after_adjustments:
9225
1c79356b
A
9226 /*
9227 * Correct the page alignment for the result
9228 */
9229
9230 *dst_addr = start + (copy->offset - vm_copy_start);
9231
9232 /*
9233 * Update the hints and the map size
9234 */
9235
39236c6e
A
9236 if (consume_on_success) {
9237 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9238 } else {
9239 SAVE_HINT_MAP_WRITE(dst_map, last);
9240 }
1c79356b
A
9241
9242 dst_map->size += size;
9243
9244 /*
9245 * Link in the copy
9246 */
9247
39236c6e
A
9248 if (consume_on_success) {
9249 vm_map_copy_insert(dst_map, last, copy);
9250 } else {
9251 vm_map_copy_remap(dst_map, last, copy, adjustment,
9252 cur_protection, max_protection,
9253 inheritance);
9254 }
1c79356b
A
9255
9256 vm_map_unlock(dst_map);
9257
9258 /*
9259 * XXX If wiring_required, call vm_map_pageable
9260 */
9261
9262 return(KERN_SUCCESS);
9263}
9264
1c79356b
A
9265/*
9266 * Routine: vm_map_copyin
9267 *
9268 * Description:
2d21ac55
A
9269 * see vm_map_copyin_common. Exported via Unsupported.exports.
9270 *
9271 */
9272
9273#undef vm_map_copyin
9274
9275kern_return_t
9276vm_map_copyin(
9277 vm_map_t src_map,
9278 vm_map_address_t src_addr,
9279 vm_map_size_t len,
9280 boolean_t src_destroy,
9281 vm_map_copy_t *copy_result) /* OUT */
9282{
9283 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9284 FALSE, copy_result, FALSE));
9285}
9286
9287/*
9288 * Routine: vm_map_copyin_common
9289 *
9290 * Description:
1c79356b
A
9291 * Copy the specified region (src_addr, len) from the
9292 * source address space (src_map), possibly removing
9293 * the region from the source address space (src_destroy).
9294 *
9295 * Returns:
9296 * A vm_map_copy_t object (copy_result), suitable for
9297 * insertion into another address space (using vm_map_copyout),
9298 * copying over another address space region (using
9299 * vm_map_copy_overwrite). If the copy is unused, it
9300 * should be destroyed (using vm_map_copy_discard).
9301 *
9302 * In/out conditions:
9303 * The source map should not be locked on entry.
9304 */
9305
9306typedef struct submap_map {
9307 vm_map_t parent_map;
91447636
A
9308 vm_map_offset_t base_start;
9309 vm_map_offset_t base_end;
2d21ac55 9310 vm_map_size_t base_len;
1c79356b
A
9311 struct submap_map *next;
9312} submap_map_t;
9313
9314kern_return_t
9315vm_map_copyin_common(
9316 vm_map_t src_map,
91447636
A
9317 vm_map_address_t src_addr,
9318 vm_map_size_t len,
1c79356b 9319 boolean_t src_destroy,
91447636 9320 __unused boolean_t src_volatile,
1c79356b
A
9321 vm_map_copy_t *copy_result, /* OUT */
9322 boolean_t use_maxprot)
4bd07ac2
A
9323{
9324 int flags;
9325
9326 flags = 0;
9327 if (src_destroy) {
9328 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9329 }
9330 if (use_maxprot) {
9331 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9332 }
9333 return vm_map_copyin_internal(src_map,
9334 src_addr,
9335 len,
9336 flags,
9337 copy_result);
9338}
9339kern_return_t
9340vm_map_copyin_internal(
9341 vm_map_t src_map,
9342 vm_map_address_t src_addr,
9343 vm_map_size_t len,
9344 int flags,
9345 vm_map_copy_t *copy_result) /* OUT */
1c79356b 9346{
1c79356b
A
9347 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9348 * in multi-level lookup, this
9349 * entry contains the actual
9350 * vm_object/offset.
9351 */
1c79356b
A
9352 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9353
91447636 9354 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
9355 * where copy is taking place now
9356 */
91447636 9357 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 9358 * copied */
2d21ac55 9359 vm_map_offset_t src_base;
91447636 9360 vm_map_t base_map = src_map;
1c79356b
A
9361 boolean_t map_share=FALSE;
9362 submap_map_t *parent_maps = NULL;
9363
1c79356b 9364 vm_map_copy_t copy; /* Resulting copy */
fe8ab488
A
9365 vm_map_address_t copy_addr;
9366 vm_map_size_t copy_size;
4bd07ac2
A
9367 boolean_t src_destroy;
9368 boolean_t use_maxprot;
39037602 9369 boolean_t preserve_purgeable;
4bd07ac2
A
9370
9371 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9372 return KERN_INVALID_ARGUMENT;
9373 }
9374
9375 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9376 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602
A
9377 preserve_purgeable =
9378 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
9379
9380 /*
9381 * Check for copies of zero bytes.
9382 */
9383
9384 if (len == 0) {
9385 *copy_result = VM_MAP_COPY_NULL;
9386 return(KERN_SUCCESS);
9387 }
9388
4a249263
A
9389 /*
9390 * Check that the end address doesn't overflow
9391 */
9392 src_end = src_addr + len;
9393 if (src_end < src_addr)
9394 return KERN_INVALID_ADDRESS;
9395
39037602
A
9396 /*
9397 * Compute (page aligned) start and end of region
9398 */
9399 src_start = vm_map_trunc_page(src_addr,
9400 VM_MAP_PAGE_MASK(src_map));
9401 src_end = vm_map_round_page(src_end,
9402 VM_MAP_PAGE_MASK(src_map));
9403
1c79356b
A
9404 /*
9405 * If the copy is sufficiently small, use a kernel buffer instead
9406 * of making a virtual copy. The theory being that the cost of
9407 * setting up VM (and taking C-O-W faults) dominates the copy costs
9408 * for small regions.
9409 */
4bd07ac2
A
9410 if ((len < msg_ool_size_small) &&
9411 !use_maxprot &&
39037602
A
9412 !preserve_purgeable &&
9413 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
9414 /*
9415 * Since the "msg_ool_size_small" threshold was increased and
9416 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
9417 * address space limits, we revert to doing a virtual copy if the
9418 * copied range goes beyond those limits. Otherwise, mach_vm_read()
9419 * of the commpage would now fail when it used to work.
9420 */
9421 (src_start >= vm_map_min(src_map) &&
9422 src_start < vm_map_max(src_map) &&
9423 src_end >= vm_map_min(src_map) &&
9424 src_end < vm_map_max(src_map)))
2d21ac55
A
9425 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9426 src_destroy, copy_result);
1c79356b 9427
b0d623f7 9428 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 9429
1c79356b
A
9430 /*
9431 * Allocate a header element for the list.
9432 *
9433 * Use the start and end in the header to
9434 * remember the endpoints prior to rounding.
9435 */
9436
9437 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 9438 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 9439 vm_map_copy_first_entry(copy) =
2d21ac55 9440 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
9441 copy->type = VM_MAP_COPY_ENTRY_LIST;
9442 copy->cpy_hdr.nentries = 0;
9443 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
9444#if 00
9445 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9446#else
9447 /*
9448 * The copy entries can be broken down for a variety of reasons,
9449 * so we can't guarantee that they will remain map-aligned...
9450 * Will need to adjust the first copy_entry's "vme_start" and
9451 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9452 * rather than the original map's alignment.
9453 */
9454 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9455#endif
1c79356b 9456
6d2010ae
A
9457 vm_map_store_init( &(copy->cpy_hdr) );
9458
1c79356b
A
9459 copy->offset = src_addr;
9460 copy->size = len;
9461
7ddcb079 9462 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
9463
9464#define RETURN(x) \
9465 MACRO_BEGIN \
9466 vm_map_unlock(src_map); \
9bccf70c
A
9467 if(src_map != base_map) \
9468 vm_map_deallocate(src_map); \
1c79356b
A
9469 if (new_entry != VM_MAP_ENTRY_NULL) \
9470 vm_map_copy_entry_dispose(copy,new_entry); \
9471 vm_map_copy_discard(copy); \
9472 { \
91447636 9473 submap_map_t *_ptr; \
1c79356b 9474 \
91447636 9475 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 9476 parent_maps=parent_maps->next; \
91447636
A
9477 if (_ptr->parent_map != base_map) \
9478 vm_map_deallocate(_ptr->parent_map); \
9479 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
9480 } \
9481 } \
9482 MACRO_RETURN(x); \
9483 MACRO_END
9484
9485 /*
9486 * Find the beginning of the region.
9487 */
9488
9489 vm_map_lock(src_map);
9490
fe8ab488
A
9491 /*
9492 * Lookup the original "src_addr" rather than the truncated
9493 * "src_start", in case "src_start" falls in a non-map-aligned
9494 * map entry *before* the map entry that contains "src_addr"...
9495 */
9496 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
1c79356b
A
9497 RETURN(KERN_INVALID_ADDRESS);
9498 if(!tmp_entry->is_sub_map) {
fe8ab488
A
9499 /*
9500 * ... but clip to the map-rounded "src_start" rather than
9501 * "src_addr" to preserve map-alignment. We'll adjust the
9502 * first copy entry at the end, if needed.
9503 */
1c79356b
A
9504 vm_map_clip_start(src_map, tmp_entry, src_start);
9505 }
fe8ab488
A
9506 if (src_start < tmp_entry->vme_start) {
9507 /*
9508 * Move "src_start" up to the start of the
9509 * first map entry to copy.
9510 */
9511 src_start = tmp_entry->vme_start;
9512 }
1c79356b
A
9513 /* set for later submap fix-up */
9514 copy_addr = src_start;
9515
9516 /*
9517 * Go through entries until we get to the end.
9518 */
9519
9520 while (TRUE) {
1c79356b 9521 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 9522 vm_map_size_t src_size; /* Size of source
1c79356b
A
9523 * map entry (in both
9524 * maps)
9525 */
9526
1c79356b
A
9527 vm_object_t src_object; /* Object to copy */
9528 vm_object_offset_t src_offset;
9529
9530 boolean_t src_needs_copy; /* Should source map
9531 * be made read-only
9532 * for copy-on-write?
9533 */
9534
9535 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9536
9537 boolean_t was_wired; /* Was source wired? */
9538 vm_map_version_t version; /* Version before locks
9539 * dropped to make copy
9540 */
9541 kern_return_t result; /* Return value from
9542 * copy_strategically.
9543 */
9544 while(tmp_entry->is_sub_map) {
91447636 9545 vm_map_size_t submap_len;
1c79356b
A
9546 submap_map_t *ptr;
9547
9548 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9549 ptr->next = parent_maps;
9550 parent_maps = ptr;
9551 ptr->parent_map = src_map;
9552 ptr->base_start = src_start;
9553 ptr->base_end = src_end;
9554 submap_len = tmp_entry->vme_end - src_start;
9555 if(submap_len > (src_end-src_start))
9556 submap_len = src_end-src_start;
2d21ac55 9557 ptr->base_len = submap_len;
1c79356b
A
9558
9559 src_start -= tmp_entry->vme_start;
3e170ce0 9560 src_start += VME_OFFSET(tmp_entry);
1c79356b 9561 src_end = src_start + submap_len;
3e170ce0 9562 src_map = VME_SUBMAP(tmp_entry);
1c79356b 9563 vm_map_lock(src_map);
9bccf70c
A
9564 /* keep an outstanding reference for all maps in */
9565 /* the parents tree except the base map */
9566 vm_map_reference(src_map);
1c79356b
A
9567 vm_map_unlock(ptr->parent_map);
9568 if (!vm_map_lookup_entry(
2d21ac55 9569 src_map, src_start, &tmp_entry))
1c79356b
A
9570 RETURN(KERN_INVALID_ADDRESS);
9571 map_share = TRUE;
9572 if(!tmp_entry->is_sub_map)
2d21ac55 9573 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
9574 src_entry = tmp_entry;
9575 }
2d21ac55
A
9576 /* we are now in the lowest level submap... */
9577
3e170ce0
A
9578 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9579 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
9580 /* This is not, supported for now.In future */
9581 /* we will need to detect the phys_contig */
9582 /* condition and then upgrade copy_slowly */
9583 /* to do physical copy from the device mem */
9584 /* based object. We can piggy-back off of */
9585 /* the was wired boolean to set-up the */
9586 /* proper handling */
0b4e3aa0
A
9587 RETURN(KERN_PROTECTION_FAILURE);
9588 }
1c79356b
A
9589 /*
9590 * Create a new address map entry to hold the result.
9591 * Fill in the fields from the appropriate source entries.
9592 * We must unlock the source map to do this if we need
9593 * to allocate a map entry.
9594 */
9595 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
9596 version.main_timestamp = src_map->timestamp;
9597 vm_map_unlock(src_map);
1c79356b 9598
7ddcb079 9599 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 9600
2d21ac55
A
9601 vm_map_lock(src_map);
9602 if ((version.main_timestamp + 1) != src_map->timestamp) {
9603 if (!vm_map_lookup_entry(src_map, src_start,
9604 &tmp_entry)) {
9605 RETURN(KERN_INVALID_ADDRESS);
9606 }
9607 if (!tmp_entry->is_sub_map)
9608 vm_map_clip_start(src_map, tmp_entry, src_start);
9609 continue; /* restart w/ new tmp_entry */
1c79356b 9610 }
1c79356b
A
9611 }
9612
9613 /*
9614 * Verify that the region can be read.
9615 */
9616 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 9617 !use_maxprot) ||
1c79356b
A
9618 (src_entry->max_protection & VM_PROT_READ) == 0)
9619 RETURN(KERN_PROTECTION_FAILURE);
9620
9621 /*
9622 * Clip against the endpoints of the entire region.
9623 */
9624
9625 vm_map_clip_end(src_map, src_entry, src_end);
9626
9627 src_size = src_entry->vme_end - src_start;
3e170ce0
A
9628 src_object = VME_OBJECT(src_entry);
9629 src_offset = VME_OFFSET(src_entry);
1c79356b
A
9630 was_wired = (src_entry->wired_count != 0);
9631
9632 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
9633 if (new_entry->is_sub_map) {
9634 /* clr address space specifics */
9635 new_entry->use_pmap = FALSE;
9636 }
1c79356b
A
9637
9638 /*
9639 * Attempt non-blocking copy-on-write optimizations.
9640 */
9641
9642 if (src_destroy &&
9643 (src_object == VM_OBJECT_NULL ||
2d21ac55
A
9644 (src_object->internal && !src_object->true_share
9645 && !map_share))) {
9646 /*
9647 * If we are destroying the source, and the object
9648 * is internal, we can move the object reference
9649 * from the source to the copy. The copy is
9650 * copy-on-write only if the source is.
9651 * We make another reference to the object, because
9652 * destroying the source entry will deallocate it.
9653 */
9654 vm_object_reference(src_object);
1c79356b 9655
2d21ac55
A
9656 /*
9657 * Copy is always unwired. vm_map_copy_entry
9658 * set its wired count to zero.
9659 */
1c79356b 9660
2d21ac55 9661 goto CopySuccessful;
1c79356b
A
9662 }
9663
9664
2d21ac55 9665 RestartCopy:
1c79356b 9666 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 9667 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 9668 was_wired, 0);
55e303ae 9669 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
9670 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9671 vm_object_copy_quickly(
3e170ce0 9672 &VME_OBJECT(new_entry),
2d21ac55
A
9673 src_offset,
9674 src_size,
9675 &src_needs_copy,
9676 &new_entry_needs_copy)) {
1c79356b
A
9677
9678 new_entry->needs_copy = new_entry_needs_copy;
9679
9680 /*
9681 * Handle copy-on-write obligations
9682 */
9683
9684 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
9685 vm_prot_t prot;
9686
9687 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 9688
3e170ce0
A
9689 if (override_nx(src_map, VME_ALIAS(src_entry))
9690 && prot)
0c530ab8 9691 prot |= VM_PROT_EXECUTE;
2d21ac55 9692
55e303ae
A
9693 vm_object_pmap_protect(
9694 src_object,
9695 src_offset,
9696 src_size,
9697 (src_entry->is_shared ?
2d21ac55
A
9698 PMAP_NULL
9699 : src_map->pmap),
55e303ae 9700 src_entry->vme_start,
0c530ab8
A
9701 prot);
9702
3e170ce0 9703 assert(tmp_entry->wired_count == 0);
55e303ae 9704 tmp_entry->needs_copy = TRUE;
1c79356b
A
9705 }
9706
9707 /*
9708 * The map has never been unlocked, so it's safe
9709 * to move to the next entry rather than doing
9710 * another lookup.
9711 */
9712
9713 goto CopySuccessful;
9714 }
9715
1c79356b
A
9716 /*
9717 * Take an object reference, so that we may
9718 * release the map lock(s).
9719 */
9720
9721 assert(src_object != VM_OBJECT_NULL);
9722 vm_object_reference(src_object);
9723
9724 /*
9725 * Record the timestamp for later verification.
9726 * Unlock the map.
9727 */
9728
9729 version.main_timestamp = src_map->timestamp;
9bccf70c 9730 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
9731
9732 /*
9733 * Perform the copy
9734 */
9735
9736 if (was_wired) {
55e303ae 9737 CopySlowly:
1c79356b
A
9738 vm_object_lock(src_object);
9739 result = vm_object_copy_slowly(
2d21ac55
A
9740 src_object,
9741 src_offset,
9742 src_size,
9743 THREAD_UNINT,
3e170ce0
A
9744 &VME_OBJECT(new_entry));
9745 VME_OFFSET_SET(new_entry, 0);
1c79356b 9746 new_entry->needs_copy = FALSE;
55e303ae
A
9747
9748 }
9749 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 9750 (tmp_entry->is_shared || map_share)) {
55e303ae
A
9751 vm_object_t new_object;
9752
2d21ac55 9753 vm_object_lock_shared(src_object);
55e303ae 9754 new_object = vm_object_copy_delayed(
2d21ac55
A
9755 src_object,
9756 src_offset,
9757 src_size,
9758 TRUE);
55e303ae
A
9759 if (new_object == VM_OBJECT_NULL)
9760 goto CopySlowly;
9761
3e170ce0
A
9762 VME_OBJECT_SET(new_entry, new_object);
9763 assert(new_entry->wired_count == 0);
55e303ae 9764 new_entry->needs_copy = TRUE;
fe8ab488
A
9765 assert(!new_entry->iokit_acct);
9766 assert(new_object->purgable == VM_PURGABLE_DENY);
9767 new_entry->use_pmap = TRUE;
55e303ae
A
9768 result = KERN_SUCCESS;
9769
1c79356b 9770 } else {
3e170ce0
A
9771 vm_object_offset_t new_offset;
9772 new_offset = VME_OFFSET(new_entry);
1c79356b 9773 result = vm_object_copy_strategically(src_object,
2d21ac55
A
9774 src_offset,
9775 src_size,
3e170ce0
A
9776 &VME_OBJECT(new_entry),
9777 &new_offset,
2d21ac55 9778 &new_entry_needs_copy);
3e170ce0
A
9779 if (new_offset != VME_OFFSET(new_entry)) {
9780 VME_OFFSET_SET(new_entry, new_offset);
9781 }
1c79356b
A
9782
9783 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
9784 }
9785
39037602
A
9786 if (result == KERN_SUCCESS &&
9787 preserve_purgeable &&
9788 src_object->purgable != VM_PURGABLE_DENY) {
9789 vm_object_t new_object;
9790
9791 new_object = VME_OBJECT(new_entry);
9792 assert(new_object != src_object);
9793 vm_object_lock(new_object);
9794 assert(new_object->ref_count == 1);
9795 assert(new_object->shadow == VM_OBJECT_NULL);
9796 assert(new_object->copy == VM_OBJECT_NULL);
9797 assert(new_object->vo_purgeable_owner == NULL);
9798
9799 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
9800 new_object->true_share = TRUE;
9801 /* start as non-volatile with no owner... */
9802 new_object->purgable = VM_PURGABLE_NONVOLATILE;
9803 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
9804 /* ... and move to src_object's purgeable state */
9805 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
9806 int state;
9807 state = src_object->purgable;
9808 vm_object_purgable_control(
9809 new_object,
9810 VM_PURGABLE_SET_STATE,
9811 &state);
9812 }
9813 vm_object_unlock(new_object);
9814 new_object = VM_OBJECT_NULL;
9815 }
9816
1c79356b
A
9817 if (result != KERN_SUCCESS &&
9818 result != KERN_MEMORY_RESTART_COPY) {
9819 vm_map_lock(src_map);
9820 RETURN(result);
9821 }
9822
9823 /*
9824 * Throw away the extra reference
9825 */
9826
9827 vm_object_deallocate(src_object);
9828
9829 /*
9830 * Verify that the map has not substantially
9831 * changed while the copy was being made.
9832 */
9833
9bccf70c 9834 vm_map_lock(src_map);
1c79356b
A
9835
9836 if ((version.main_timestamp + 1) == src_map->timestamp)
9837 goto VerificationSuccessful;
9838
9839 /*
9840 * Simple version comparison failed.
9841 *
9842 * Retry the lookup and verify that the
9843 * same object/offset are still present.
9844 *
9845 * [Note: a memory manager that colludes with
9846 * the calling task can detect that we have
9847 * cheated. While the map was unlocked, the
9848 * mapping could have been changed and restored.]
9849 */
9850
9851 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 9852 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
9853 vm_object_deallocate(VME_OBJECT(new_entry));
9854 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
fe8ab488
A
9855 assert(!new_entry->iokit_acct);
9856 new_entry->use_pmap = TRUE;
9857 }
1c79356b
A
9858 RETURN(KERN_INVALID_ADDRESS);
9859 }
9860
9861 src_entry = tmp_entry;
9862 vm_map_clip_start(src_map, src_entry, src_start);
9863
91447636
A
9864 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9865 !use_maxprot) ||
9866 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
9867 goto VerificationFailed;
9868
39236c6e 9869 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
9870 /*
9871 * This entry might have been shortened
9872 * (vm_map_clip_end) or been replaced with
9873 * an entry that ends closer to "src_start"
9874 * than before.
9875 * Adjust "new_entry" accordingly; copying
9876 * less memory would be correct but we also
9877 * redo the copy (see below) if the new entry
9878 * no longer points at the same object/offset.
9879 */
39236c6e
A
9880 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9881 VM_MAP_COPY_PAGE_MASK(copy)));
9882 new_entry->vme_end = src_entry->vme_end;
9883 src_size = new_entry->vme_end - src_start;
39037602
A
9884 } else if (src_entry->vme_end > new_entry->vme_end) {
9885 /*
9886 * This entry might have been extended
9887 * (vm_map_entry_simplify() or coalesce)
9888 * or been replaced with an entry that ends farther
9889 * from "src_start" than before.
9890 *
9891 * We've called vm_object_copy_*() only on
9892 * the previous <start:end> range, so we can't
9893 * just extend new_entry. We have to re-do
9894 * the copy based on the new entry as if it was
9895 * pointing at a different object/offset (see
9896 * "Verification failed" below).
9897 */
39236c6e 9898 }
1c79356b 9899
3e170ce0 9900 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
9901 (VME_OFFSET(src_entry) != src_offset) ||
9902 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
9903
9904 /*
9905 * Verification failed.
9906 *
9907 * Start over with this top-level entry.
9908 */
9909
2d21ac55 9910 VerificationFailed: ;
1c79356b 9911
3e170ce0 9912 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
9913 tmp_entry = src_entry;
9914 continue;
9915 }
9916
9917 /*
9918 * Verification succeeded.
9919 */
9920
2d21ac55 9921 VerificationSuccessful: ;
1c79356b
A
9922
9923 if (result == KERN_MEMORY_RESTART_COPY)
9924 goto RestartCopy;
9925
9926 /*
9927 * Copy succeeded.
9928 */
9929
2d21ac55 9930 CopySuccessful: ;
1c79356b
A
9931
9932 /*
9933 * Link in the new copy entry.
9934 */
9935
9936 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9937 new_entry);
9938
9939 /*
9940 * Determine whether the entire region
9941 * has been copied.
9942 */
2d21ac55 9943 src_base = src_start;
1c79356b
A
9944 src_start = new_entry->vme_end;
9945 new_entry = VM_MAP_ENTRY_NULL;
9946 while ((src_start >= src_end) && (src_end != 0)) {
fe8ab488
A
9947 submap_map_t *ptr;
9948
9949 if (src_map == base_map) {
9950 /* back to the top */
1c79356b 9951 break;
fe8ab488
A
9952 }
9953
9954 ptr = parent_maps;
9955 assert(ptr != NULL);
9956 parent_maps = parent_maps->next;
9957
9958 /* fix up the damage we did in that submap */
9959 vm_map_simplify_range(src_map,
9960 src_base,
9961 src_end);
9962
9963 vm_map_unlock(src_map);
9964 vm_map_deallocate(src_map);
9965 vm_map_lock(ptr->parent_map);
9966 src_map = ptr->parent_map;
9967 src_base = ptr->base_start;
9968 src_start = ptr->base_start + ptr->base_len;
9969 src_end = ptr->base_end;
9970 if (!vm_map_lookup_entry(src_map,
9971 src_start,
9972 &tmp_entry) &&
9973 (src_end > src_start)) {
9974 RETURN(KERN_INVALID_ADDRESS);
9975 }
9976 kfree(ptr, sizeof(submap_map_t));
9977 if (parent_maps == NULL)
9978 map_share = FALSE;
9979 src_entry = tmp_entry->vme_prev;
9980 }
9981
9982 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9983 (src_start >= src_addr + len) &&
9984 (src_addr + len != 0)) {
9985 /*
9986 * Stop copying now, even though we haven't reached
9987 * "src_end". We'll adjust the end of the last copy
9988 * entry at the end, if needed.
9989 *
9990 * If src_map's aligment is different from the
9991 * system's page-alignment, there could be
9992 * extra non-map-aligned map entries between
9993 * the original (non-rounded) "src_addr + len"
9994 * and the rounded "src_end".
9995 * We do not want to copy those map entries since
9996 * they're not part of the copied range.
9997 */
9998 break;
1c79356b 9999 }
fe8ab488 10000
1c79356b
A
10001 if ((src_start >= src_end) && (src_end != 0))
10002 break;
10003
10004 /*
10005 * Verify that there are no gaps in the region
10006 */
10007
10008 tmp_entry = src_entry->vme_next;
fe8ab488 10009 if ((tmp_entry->vme_start != src_start) ||
39236c6e 10010 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 10011 RETURN(KERN_INVALID_ADDRESS);
39236c6e 10012 }
1c79356b
A
10013 }
10014
10015 /*
10016 * If the source should be destroyed, do it now, since the
10017 * copy was successful.
10018 */
10019 if (src_destroy) {
39236c6e
A
10020 (void) vm_map_delete(
10021 src_map,
10022 vm_map_trunc_page(src_addr,
10023 VM_MAP_PAGE_MASK(src_map)),
10024 src_end,
10025 ((src_map == kernel_map) ?
10026 VM_MAP_REMOVE_KUNWIRE :
10027 VM_MAP_NO_FLAGS),
10028 VM_MAP_NULL);
2d21ac55
A
10029 } else {
10030 /* fix up the damage we did in the base map */
39236c6e
A
10031 vm_map_simplify_range(
10032 src_map,
10033 vm_map_trunc_page(src_addr,
10034 VM_MAP_PAGE_MASK(src_map)),
10035 vm_map_round_page(src_end,
10036 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
10037 }
10038
10039 vm_map_unlock(src_map);
10040
39236c6e 10041 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488
A
10042 vm_map_offset_t original_start, original_offset, original_end;
10043
39236c6e
A
10044 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
10045
10046 /* adjust alignment of first copy_entry's "vme_start" */
10047 tmp_entry = vm_map_copy_first_entry(copy);
10048 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10049 vm_map_offset_t adjustment;
fe8ab488
A
10050
10051 original_start = tmp_entry->vme_start;
3e170ce0 10052 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
10053
10054 /* map-align the start of the first copy entry... */
10055 adjustment = (tmp_entry->vme_start -
10056 vm_map_trunc_page(
10057 tmp_entry->vme_start,
10058 VM_MAP_PAGE_MASK(src_map)));
10059 tmp_entry->vme_start -= adjustment;
3e170ce0
A
10060 VME_OFFSET_SET(tmp_entry,
10061 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
10062 copy_addr -= adjustment;
10063 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10064 /* ... adjust for mis-aligned start of copy range */
39236c6e
A
10065 adjustment =
10066 (vm_map_trunc_page(copy->offset,
10067 PAGE_MASK) -
10068 vm_map_trunc_page(copy->offset,
10069 VM_MAP_PAGE_MASK(src_map)));
10070 if (adjustment) {
10071 assert(page_aligned(adjustment));
10072 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10073 tmp_entry->vme_start += adjustment;
3e170ce0
A
10074 VME_OFFSET_SET(tmp_entry,
10075 (VME_OFFSET(tmp_entry) +
10076 adjustment));
39236c6e
A
10077 copy_addr += adjustment;
10078 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10079 }
fe8ab488
A
10080
10081 /*
10082 * Assert that the adjustments haven't exposed
10083 * more than was originally copied...
10084 */
10085 assert(tmp_entry->vme_start >= original_start);
3e170ce0 10086 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
10087 /*
10088 * ... and that it did not adjust outside of a
10089 * a single 16K page.
10090 */
10091 assert(vm_map_trunc_page(tmp_entry->vme_start,
10092 VM_MAP_PAGE_MASK(src_map)) ==
10093 vm_map_trunc_page(original_start,
10094 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10095 }
10096
10097 /* adjust alignment of last copy_entry's "vme_end" */
10098 tmp_entry = vm_map_copy_last_entry(copy);
10099 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10100 vm_map_offset_t adjustment;
fe8ab488
A
10101
10102 original_end = tmp_entry->vme_end;
10103
10104 /* map-align the end of the last copy entry... */
10105 tmp_entry->vme_end =
10106 vm_map_round_page(tmp_entry->vme_end,
10107 VM_MAP_PAGE_MASK(src_map));
10108 /* ... adjust for mis-aligned end of copy range */
39236c6e
A
10109 adjustment =
10110 (vm_map_round_page((copy->offset +
10111 copy->size),
10112 VM_MAP_PAGE_MASK(src_map)) -
10113 vm_map_round_page((copy->offset +
10114 copy->size),
10115 PAGE_MASK));
10116 if (adjustment) {
10117 assert(page_aligned(adjustment));
10118 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10119 tmp_entry->vme_end -= adjustment;
10120 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10121 }
fe8ab488
A
10122
10123 /*
10124 * Assert that the adjustments haven't exposed
10125 * more than was originally copied...
10126 */
10127 assert(tmp_entry->vme_end <= original_end);
10128 /*
10129 * ... and that it did not adjust outside of a
10130 * a single 16K page.
10131 */
10132 assert(vm_map_round_page(tmp_entry->vme_end,
10133 VM_MAP_PAGE_MASK(src_map)) ==
10134 vm_map_round_page(original_end,
10135 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10136 }
10137 }
10138
1c79356b
A
10139 /* Fix-up start and end points in copy. This is necessary */
10140 /* when the various entries in the copy object were picked */
10141 /* up from different sub-maps */
10142
10143 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 10144 copy_size = 0; /* compute actual size */
1c79356b 10145 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
10146 assert(VM_MAP_PAGE_ALIGNED(
10147 copy_addr + (tmp_entry->vme_end -
10148 tmp_entry->vme_start),
10149 VM_MAP_COPY_PAGE_MASK(copy)));
10150 assert(VM_MAP_PAGE_ALIGNED(
10151 copy_addr,
10152 VM_MAP_COPY_PAGE_MASK(copy)));
10153
10154 /*
10155 * The copy_entries will be injected directly into the
10156 * destination map and might not be "map aligned" there...
10157 */
10158 tmp_entry->map_aligned = FALSE;
10159
1c79356b
A
10160 tmp_entry->vme_end = copy_addr +
10161 (tmp_entry->vme_end - tmp_entry->vme_start);
10162 tmp_entry->vme_start = copy_addr;
e2d2fc5c 10163 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 10164 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 10165 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
10166 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
10167 }
10168
fe8ab488
A
10169 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
10170 copy_size < copy->size) {
10171 /*
10172 * The actual size of the VM map copy is smaller than what
10173 * was requested by the caller. This must be because some
10174 * PAGE_SIZE-sized pages are missing at the end of the last
10175 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
10176 * The caller might not have been aware of those missing
10177 * pages and might not want to be aware of it, which is
10178 * fine as long as they don't try to access (and crash on)
10179 * those missing pages.
10180 * Let's adjust the size of the "copy", to avoid failing
10181 * in vm_map_copyout() or vm_map_copy_overwrite().
10182 */
10183 assert(vm_map_round_page(copy_size,
10184 VM_MAP_PAGE_MASK(src_map)) ==
10185 vm_map_round_page(copy->size,
10186 VM_MAP_PAGE_MASK(src_map)));
10187 copy->size = copy_size;
10188 }
10189
1c79356b
A
10190 *copy_result = copy;
10191 return(KERN_SUCCESS);
10192
10193#undef RETURN
10194}
10195
39236c6e
A
10196kern_return_t
10197vm_map_copy_extract(
10198 vm_map_t src_map,
10199 vm_map_address_t src_addr,
10200 vm_map_size_t len,
10201 vm_map_copy_t *copy_result, /* OUT */
10202 vm_prot_t *cur_prot, /* OUT */
10203 vm_prot_t *max_prot)
10204{
10205 vm_map_offset_t src_start, src_end;
10206 vm_map_copy_t copy;
10207 kern_return_t kr;
10208
10209 /*
10210 * Check for copies of zero bytes.
10211 */
10212
10213 if (len == 0) {
10214 *copy_result = VM_MAP_COPY_NULL;
10215 return(KERN_SUCCESS);
10216 }
10217
10218 /*
10219 * Check that the end address doesn't overflow
10220 */
10221 src_end = src_addr + len;
10222 if (src_end < src_addr)
10223 return KERN_INVALID_ADDRESS;
10224
10225 /*
10226 * Compute (page aligned) start and end of region
10227 */
10228 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10229 src_end = vm_map_round_page(src_end, PAGE_MASK);
10230
10231 /*
10232 * Allocate a header element for the list.
10233 *
10234 * Use the start and end in the header to
10235 * remember the endpoints prior to rounding.
10236 */
10237
10238 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10239 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
39236c6e
A
10240 vm_map_copy_first_entry(copy) =
10241 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10242 copy->type = VM_MAP_COPY_ENTRY_LIST;
10243 copy->cpy_hdr.nentries = 0;
10244 copy->cpy_hdr.entries_pageable = TRUE;
10245
10246 vm_map_store_init(&copy->cpy_hdr);
10247
10248 copy->offset = 0;
10249 copy->size = len;
10250
10251 kr = vm_map_remap_extract(src_map,
10252 src_addr,
10253 len,
10254 FALSE, /* copy */
10255 &copy->cpy_hdr,
10256 cur_prot,
10257 max_prot,
10258 VM_INHERIT_SHARE,
39037602
A
10259 TRUE, /* pageable */
10260 FALSE); /* same_map */
39236c6e
A
10261 if (kr != KERN_SUCCESS) {
10262 vm_map_copy_discard(copy);
10263 return kr;
10264 }
10265
10266 *copy_result = copy;
10267 return KERN_SUCCESS;
10268}
10269
1c79356b
A
10270/*
10271 * vm_map_copyin_object:
10272 *
10273 * Create a copy object from an object.
10274 * Our caller donates an object reference.
10275 */
10276
10277kern_return_t
10278vm_map_copyin_object(
10279 vm_object_t object,
10280 vm_object_offset_t offset, /* offset of region in object */
10281 vm_object_size_t size, /* size of region in object */
10282 vm_map_copy_t *copy_result) /* OUT */
10283{
10284 vm_map_copy_t copy; /* Resulting copy */
10285
10286 /*
10287 * We drop the object into a special copy object
10288 * that contains the object directly.
10289 */
10290
10291 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10292 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
10293 copy->type = VM_MAP_COPY_OBJECT;
10294 copy->cpy_object = object;
1c79356b
A
10295 copy->offset = offset;
10296 copy->size = size;
10297
10298 *copy_result = copy;
10299 return(KERN_SUCCESS);
10300}
10301
91447636 10302static void
1c79356b
A
10303vm_map_fork_share(
10304 vm_map_t old_map,
10305 vm_map_entry_t old_entry,
10306 vm_map_t new_map)
10307{
10308 vm_object_t object;
10309 vm_map_entry_t new_entry;
1c79356b
A
10310
10311 /*
10312 * New sharing code. New map entry
10313 * references original object. Internal
10314 * objects use asynchronous copy algorithm for
10315 * future copies. First make sure we have
10316 * the right object. If we need a shadow,
10317 * or someone else already has one, then
10318 * make a new shadow and share it.
10319 */
10320
3e170ce0 10321 object = VME_OBJECT(old_entry);
1c79356b
A
10322 if (old_entry->is_sub_map) {
10323 assert(old_entry->wired_count == 0);
0c530ab8 10324#ifndef NO_NESTED_PMAP
1c79356b 10325 if(old_entry->use_pmap) {
91447636
A
10326 kern_return_t result;
10327
1c79356b 10328 result = pmap_nest(new_map->pmap,
3e170ce0 10329 (VME_SUBMAP(old_entry))->pmap,
2d21ac55
A
10330 (addr64_t)old_entry->vme_start,
10331 (addr64_t)old_entry->vme_start,
10332 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
10333 if(result)
10334 panic("vm_map_fork_share: pmap_nest failed!");
10335 }
0c530ab8 10336#endif /* NO_NESTED_PMAP */
1c79356b 10337 } else if (object == VM_OBJECT_NULL) {
91447636 10338 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 10339 old_entry->vme_start));
3e170ce0
A
10340 VME_OFFSET_SET(old_entry, 0);
10341 VME_OBJECT_SET(old_entry, object);
fe8ab488 10342 old_entry->use_pmap = TRUE;
1c79356b
A
10343 assert(!old_entry->needs_copy);
10344 } else if (object->copy_strategy !=
2d21ac55 10345 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
10346
10347 /*
10348 * We are already using an asymmetric
10349 * copy, and therefore we already have
10350 * the right object.
10351 */
10352
10353 assert(! old_entry->needs_copy);
10354 }
10355 else if (old_entry->needs_copy || /* case 1 */
10356 object->shadowed || /* case 2 */
10357 (!object->true_share && /* case 3 */
2d21ac55 10358 !old_entry->is_shared &&
6d2010ae 10359 (object->vo_size >
2d21ac55
A
10360 (vm_map_size_t)(old_entry->vme_end -
10361 old_entry->vme_start)))) {
1c79356b
A
10362
10363 /*
10364 * We need to create a shadow.
10365 * There are three cases here.
10366 * In the first case, we need to
10367 * complete a deferred symmetrical
10368 * copy that we participated in.
10369 * In the second and third cases,
10370 * we need to create the shadow so
10371 * that changes that we make to the
10372 * object do not interfere with
10373 * any symmetrical copies which
10374 * have occured (case 2) or which
10375 * might occur (case 3).
10376 *
10377 * The first case is when we had
10378 * deferred shadow object creation
10379 * via the entry->needs_copy mechanism.
10380 * This mechanism only works when
10381 * only one entry points to the source
10382 * object, and we are about to create
10383 * a second entry pointing to the
10384 * same object. The problem is that
10385 * there is no way of mapping from
10386 * an object to the entries pointing
10387 * to it. (Deferred shadow creation
10388 * works with one entry because occurs
10389 * at fault time, and we walk from the
10390 * entry to the object when handling
10391 * the fault.)
10392 *
10393 * The second case is when the object
10394 * to be shared has already been copied
10395 * with a symmetric copy, but we point
10396 * directly to the object without
10397 * needs_copy set in our entry. (This
10398 * can happen because different ranges
10399 * of an object can be pointed to by
10400 * different entries. In particular,
10401 * a single entry pointing to an object
10402 * can be split by a call to vm_inherit,
10403 * which, combined with task_create, can
10404 * result in the different entries
10405 * having different needs_copy values.)
10406 * The shadowed flag in the object allows
10407 * us to detect this case. The problem
10408 * with this case is that if this object
10409 * has or will have shadows, then we
10410 * must not perform an asymmetric copy
10411 * of this object, since such a copy
10412 * allows the object to be changed, which
10413 * will break the previous symmetrical
10414 * copies (which rely upon the object
10415 * not changing). In a sense, the shadowed
10416 * flag says "don't change this object".
10417 * We fix this by creating a shadow
10418 * object for this object, and sharing
10419 * that. This works because we are free
10420 * to change the shadow object (and thus
10421 * to use an asymmetric copy strategy);
10422 * this is also semantically correct,
10423 * since this object is temporary, and
10424 * therefore a copy of the object is
10425 * as good as the object itself. (This
10426 * is not true for permanent objects,
10427 * since the pager needs to see changes,
10428 * which won't happen if the changes
10429 * are made to a copy.)
10430 *
10431 * The third case is when the object
10432 * to be shared has parts sticking
10433 * outside of the entry we're working
10434 * with, and thus may in the future
10435 * be subject to a symmetrical copy.
10436 * (This is a preemptive version of
10437 * case 2.)
10438 */
3e170ce0
A
10439 VME_OBJECT_SHADOW(old_entry,
10440 (vm_map_size_t) (old_entry->vme_end -
10441 old_entry->vme_start));
1c79356b
A
10442
10443 /*
10444 * If we're making a shadow for other than
10445 * copy on write reasons, then we have
10446 * to remove write permission.
10447 */
10448
1c79356b
A
10449 if (!old_entry->needs_copy &&
10450 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
10451 vm_prot_t prot;
10452
10453 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10454
3e170ce0 10455 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
0c530ab8 10456 prot |= VM_PROT_EXECUTE;
2d21ac55 10457
316670eb 10458 if (old_map->mapped_in_other_pmaps) {
9bccf70c 10459 vm_object_pmap_protect(
3e170ce0
A
10460 VME_OBJECT(old_entry),
10461 VME_OFFSET(old_entry),
9bccf70c 10462 (old_entry->vme_end -
2d21ac55 10463 old_entry->vme_start),
9bccf70c
A
10464 PMAP_NULL,
10465 old_entry->vme_start,
0c530ab8 10466 prot);
1c79356b 10467 } else {
9bccf70c 10468 pmap_protect(old_map->pmap,
2d21ac55
A
10469 old_entry->vme_start,
10470 old_entry->vme_end,
10471 prot);
1c79356b
A
10472 }
10473 }
10474
10475 old_entry->needs_copy = FALSE;
3e170ce0 10476 object = VME_OBJECT(old_entry);
1c79356b 10477 }
6d2010ae 10478
1c79356b
A
10479
10480 /*
10481 * If object was using a symmetric copy strategy,
10482 * change its copy strategy to the default
10483 * asymmetric copy strategy, which is copy_delay
10484 * in the non-norma case and copy_call in the
10485 * norma case. Bump the reference count for the
10486 * new entry.
10487 */
10488
10489 if(old_entry->is_sub_map) {
3e170ce0
A
10490 vm_map_lock(VME_SUBMAP(old_entry));
10491 vm_map_reference(VME_SUBMAP(old_entry));
10492 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
10493 } else {
10494 vm_object_lock(object);
2d21ac55 10495 vm_object_reference_locked(object);
1c79356b
A
10496 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10497 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10498 }
10499 vm_object_unlock(object);
10500 }
10501
10502 /*
10503 * Clone the entry, using object ref from above.
10504 * Mark both entries as shared.
10505 */
10506
7ddcb079
A
10507 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10508 * map or descendants */
1c79356b
A
10509 vm_map_entry_copy(new_entry, old_entry);
10510 old_entry->is_shared = TRUE;
10511 new_entry->is_shared = TRUE;
39037602
A
10512
10513 /*
10514 * If old entry's inheritence is VM_INHERIT_NONE,
10515 * the new entry is for corpse fork, remove the
10516 * write permission from the new entry.
10517 */
10518 if (old_entry->inheritance == VM_INHERIT_NONE) {
10519
10520 new_entry->protection &= ~VM_PROT_WRITE;
10521 new_entry->max_protection &= ~VM_PROT_WRITE;
10522 }
1c79356b
A
10523
10524 /*
10525 * Insert the entry into the new map -- we
10526 * know we're inserting at the end of the new
10527 * map.
10528 */
10529
6d2010ae 10530 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
10531
10532 /*
10533 * Update the physical map
10534 */
10535
10536 if (old_entry->is_sub_map) {
10537 /* Bill Angell pmap support goes here */
10538 } else {
10539 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
10540 old_entry->vme_end - old_entry->vme_start,
10541 old_entry->vme_start);
1c79356b
A
10542 }
10543}
10544
91447636 10545static boolean_t
1c79356b
A
10546vm_map_fork_copy(
10547 vm_map_t old_map,
10548 vm_map_entry_t *old_entry_p,
39037602
A
10549 vm_map_t new_map,
10550 int vm_map_copyin_flags)
1c79356b
A
10551{
10552 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
10553 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10554 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
10555 vm_map_copy_t copy;
10556 vm_map_entry_t last = vm_map_last_entry(new_map);
10557
10558 vm_map_unlock(old_map);
10559 /*
10560 * Use maxprot version of copyin because we
10561 * care about whether this memory can ever
10562 * be accessed, not just whether it's accessible
10563 * right now.
10564 */
39037602
A
10565 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
10566 if (vm_map_copyin_internal(old_map, start, entry_size,
10567 vm_map_copyin_flags, &copy)
1c79356b
A
10568 != KERN_SUCCESS) {
10569 /*
10570 * The map might have changed while it
10571 * was unlocked, check it again. Skip
10572 * any blank space or permanently
10573 * unreadable region.
10574 */
10575 vm_map_lock(old_map);
10576 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 10577 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
10578 last = last->vme_next;
10579 }
10580 *old_entry_p = last;
10581
10582 /*
10583 * XXX For some error returns, want to
10584 * XXX skip to the next element. Note
10585 * that INVALID_ADDRESS and
10586 * PROTECTION_FAILURE are handled above.
10587 */
10588
10589 return FALSE;
10590 }
10591
10592 /*
10593 * Insert the copy into the new map
10594 */
10595
10596 vm_map_copy_insert(new_map, last, copy);
10597
10598 /*
10599 * Pick up the traversal at the end of
10600 * the copied region.
10601 */
10602
10603 vm_map_lock(old_map);
10604 start += entry_size;
10605 if (! vm_map_lookup_entry(old_map, start, &last)) {
10606 last = last->vme_next;
10607 } else {
2d21ac55
A
10608 if (last->vme_start == start) {
10609 /*
10610 * No need to clip here and we don't
10611 * want to cause any unnecessary
10612 * unnesting...
10613 */
10614 } else {
10615 vm_map_clip_start(old_map, last, start);
10616 }
1c79356b
A
10617 }
10618 *old_entry_p = last;
10619
10620 return TRUE;
10621}
10622
10623/*
10624 * vm_map_fork:
10625 *
10626 * Create and return a new map based on the old
10627 * map, according to the inheritance values on the
39037602 10628 * regions in that map and the options.
1c79356b
A
10629 *
10630 * The source map must not be locked.
10631 */
10632vm_map_t
10633vm_map_fork(
316670eb 10634 ledger_t ledger,
39037602
A
10635 vm_map_t old_map,
10636 int options)
1c79356b 10637{
2d21ac55 10638 pmap_t new_pmap;
1c79356b
A
10639 vm_map_t new_map;
10640 vm_map_entry_t old_entry;
91447636 10641 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
10642 vm_map_entry_t new_entry;
10643 boolean_t src_needs_copy;
10644 boolean_t new_entry_needs_copy;
3e170ce0 10645 boolean_t pmap_is64bit;
39037602
A
10646 int vm_map_copyin_flags;
10647
10648 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
10649 VM_MAP_FORK_PRESERVE_PURGEABLE)) {
10650 /* unsupported option */
10651 return VM_MAP_NULL;
10652 }
1c79356b 10653
3e170ce0 10654 pmap_is64bit =
b0d623f7 10655#if defined(__i386__) || defined(__x86_64__)
3e170ce0 10656 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
b0d623f7 10657#else
316670eb 10658#error Unknown architecture.
b0d623f7 10659#endif
3e170ce0
A
10660
10661 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 10662
1c79356b
A
10663 vm_map_reference_swap(old_map);
10664 vm_map_lock(old_map);
10665
10666 new_map = vm_map_create(new_pmap,
2d21ac55
A
10667 old_map->min_offset,
10668 old_map->max_offset,
10669 old_map->hdr.entries_pageable);
39037602 10670 vm_commit_pagezero_status(new_map);
39236c6e
A
10671 /* inherit the parent map's page size */
10672 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 10673 for (
2d21ac55
A
10674 old_entry = vm_map_first_entry(old_map);
10675 old_entry != vm_map_to_entry(old_map);
10676 ) {
1c79356b
A
10677
10678 entry_size = old_entry->vme_end - old_entry->vme_start;
10679
10680 switch (old_entry->inheritance) {
10681 case VM_INHERIT_NONE:
39037602
A
10682 /*
10683 * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
10684 * is not passed or it is backed by a device pager.
10685 */
10686 if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
10687 (!old_entry->is_sub_map &&
10688 VME_OBJECT(old_entry) != NULL &&
10689 VME_OBJECT(old_entry)->pager != NULL &&
10690 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
10691 break;
10692 }
10693 /* FALLTHROUGH */
1c79356b
A
10694
10695 case VM_INHERIT_SHARE:
10696 vm_map_fork_share(old_map, old_entry, new_map);
10697 new_size += entry_size;
10698 break;
10699
10700 case VM_INHERIT_COPY:
10701
10702 /*
10703 * Inline the copy_quickly case;
10704 * upon failure, fall back on call
10705 * to vm_map_fork_copy.
10706 */
10707
10708 if(old_entry->is_sub_map)
10709 break;
9bccf70c 10710 if ((old_entry->wired_count != 0) ||
3e170ce0
A
10711 ((VME_OBJECT(old_entry) != NULL) &&
10712 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
10713 goto slow_vm_map_fork_copy;
10714 }
10715
7ddcb079 10716 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 10717 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
10718 if (new_entry->is_sub_map) {
10719 /* clear address space specifics */
10720 new_entry->use_pmap = FALSE;
10721 }
1c79356b
A
10722
10723 if (! vm_object_copy_quickly(
3e170ce0
A
10724 &VME_OBJECT(new_entry),
10725 VME_OFFSET(old_entry),
2d21ac55
A
10726 (old_entry->vme_end -
10727 old_entry->vme_start),
10728 &src_needs_copy,
10729 &new_entry_needs_copy)) {
1c79356b
A
10730 vm_map_entry_dispose(new_map, new_entry);
10731 goto slow_vm_map_fork_copy;
10732 }
10733
10734 /*
10735 * Handle copy-on-write obligations
10736 */
10737
10738 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
10739 vm_prot_t prot;
10740
10741 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10742
3e170ce0
A
10743 if (override_nx(old_map, VME_ALIAS(old_entry))
10744 && prot)
0c530ab8 10745 prot |= VM_PROT_EXECUTE;
2d21ac55 10746
1c79356b 10747 vm_object_pmap_protect(
3e170ce0
A
10748 VME_OBJECT(old_entry),
10749 VME_OFFSET(old_entry),
1c79356b 10750 (old_entry->vme_end -
2d21ac55 10751 old_entry->vme_start),
1c79356b 10752 ((old_entry->is_shared
316670eb 10753 || old_map->mapped_in_other_pmaps)
2d21ac55
A
10754 ? PMAP_NULL :
10755 old_map->pmap),
1c79356b 10756 old_entry->vme_start,
0c530ab8 10757 prot);
1c79356b 10758
3e170ce0 10759 assert(old_entry->wired_count == 0);
1c79356b
A
10760 old_entry->needs_copy = TRUE;
10761 }
10762 new_entry->needs_copy = new_entry_needs_copy;
10763
10764 /*
10765 * Insert the entry at the end
10766 * of the map.
10767 */
10768
6d2010ae 10769 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
10770 new_entry);
10771 new_size += entry_size;
10772 break;
10773
10774 slow_vm_map_fork_copy:
39037602
A
10775 vm_map_copyin_flags = 0;
10776 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
10777 vm_map_copyin_flags |=
10778 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
10779 }
10780 if (vm_map_fork_copy(old_map,
10781 &old_entry,
10782 new_map,
10783 vm_map_copyin_flags)) {
1c79356b
A
10784 new_size += entry_size;
10785 }
10786 continue;
10787 }
10788 old_entry = old_entry->vme_next;
10789 }
10790
fe8ab488 10791
1c79356b
A
10792 new_map->size = new_size;
10793 vm_map_unlock(old_map);
10794 vm_map_deallocate(old_map);
10795
10796 return(new_map);
10797}
10798
2d21ac55
A
10799/*
10800 * vm_map_exec:
10801 *
10802 * Setup the "new_map" with the proper execution environment according
10803 * to the type of executable (platform, 64bit, chroot environment).
10804 * Map the comm page and shared region, etc...
10805 */
10806kern_return_t
10807vm_map_exec(
10808 vm_map_t new_map,
10809 task_t task,
39037602 10810 boolean_t is64bit,
2d21ac55
A
10811 void *fsroot,
10812 cpu_type_t cpu)
10813{
10814 SHARED_REGION_TRACE_DEBUG(
10815 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
fe8ab488
A
10816 (void *)VM_KERNEL_ADDRPERM(current_task()),
10817 (void *)VM_KERNEL_ADDRPERM(new_map),
10818 (void *)VM_KERNEL_ADDRPERM(task),
10819 (void *)VM_KERNEL_ADDRPERM(fsroot),
10820 cpu));
39037602
A
10821 (void) vm_commpage_enter(new_map, task, is64bit);
10822 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
2d21ac55
A
10823 SHARED_REGION_TRACE_DEBUG(
10824 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
fe8ab488
A
10825 (void *)VM_KERNEL_ADDRPERM(current_task()),
10826 (void *)VM_KERNEL_ADDRPERM(new_map),
10827 (void *)VM_KERNEL_ADDRPERM(task),
10828 (void *)VM_KERNEL_ADDRPERM(fsroot),
10829 cpu));
2d21ac55
A
10830 return KERN_SUCCESS;
10831}
1c79356b
A
10832
10833/*
10834 * vm_map_lookup_locked:
10835 *
10836 * Finds the VM object, offset, and
10837 * protection for a given virtual address in the
10838 * specified map, assuming a page fault of the
10839 * type specified.
10840 *
10841 * Returns the (object, offset, protection) for
10842 * this address, whether it is wired down, and whether
10843 * this map has the only reference to the data in question.
10844 * In order to later verify this lookup, a "version"
10845 * is returned.
10846 *
10847 * The map MUST be locked by the caller and WILL be
10848 * locked on exit. In order to guarantee the
10849 * existence of the returned object, it is returned
10850 * locked.
10851 *
10852 * If a lookup is requested with "write protection"
10853 * specified, the map may be changed to perform virtual
10854 * copying operations, although the data referenced will
10855 * remain the same.
10856 */
10857kern_return_t
10858vm_map_lookup_locked(
10859 vm_map_t *var_map, /* IN/OUT */
2d21ac55 10860 vm_map_offset_t vaddr,
91447636 10861 vm_prot_t fault_type,
2d21ac55 10862 int object_lock_type,
1c79356b
A
10863 vm_map_version_t *out_version, /* OUT */
10864 vm_object_t *object, /* OUT */
10865 vm_object_offset_t *offset, /* OUT */
10866 vm_prot_t *out_prot, /* OUT */
10867 boolean_t *wired, /* OUT */
2d21ac55 10868 vm_object_fault_info_t fault_info, /* OUT */
91447636 10869 vm_map_t *real_map)
1c79356b
A
10870{
10871 vm_map_entry_t entry;
39037602 10872 vm_map_t map = *var_map;
1c79356b
A
10873 vm_map_t old_map = *var_map;
10874 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
10875 vm_map_offset_t cow_parent_vaddr = 0;
10876 vm_map_offset_t old_start = 0;
10877 vm_map_offset_t old_end = 0;
39037602 10878 vm_prot_t prot;
6d2010ae 10879 boolean_t mask_protections;
fe8ab488 10880 boolean_t force_copy;
6d2010ae
A
10881 vm_prot_t original_fault_type;
10882
10883 /*
10884 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10885 * as a mask against the mapping's actual protections, not as an
10886 * absolute value.
10887 */
10888 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
10889 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10890 fault_type &= VM_PROT_ALL;
6d2010ae 10891 original_fault_type = fault_type;
1c79356b 10892
91447636 10893 *real_map = map;
6d2010ae
A
10894
10895RetryLookup:
10896 fault_type = original_fault_type;
1c79356b
A
10897
10898 /*
10899 * If the map has an interesting hint, try it before calling
10900 * full blown lookup routine.
10901 */
1c79356b 10902 entry = map->hint;
1c79356b
A
10903
10904 if ((entry == vm_map_to_entry(map)) ||
10905 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10906 vm_map_entry_t tmp_entry;
10907
10908 /*
10909 * Entry was either not a valid hint, or the vaddr
10910 * was not contained in the entry, so do a full lookup.
10911 */
10912 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10913 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10914 vm_map_unlock(cow_sub_map_parent);
91447636 10915 if((*real_map != map)
2d21ac55 10916 && (*real_map != cow_sub_map_parent))
91447636 10917 vm_map_unlock(*real_map);
1c79356b
A
10918 return KERN_INVALID_ADDRESS;
10919 }
10920
10921 entry = tmp_entry;
10922 }
10923 if(map == old_map) {
10924 old_start = entry->vme_start;
10925 old_end = entry->vme_end;
10926 }
10927
10928 /*
10929 * Handle submaps. Drop lock on upper map, submap is
10930 * returned locked.
10931 */
10932
10933submap_recurse:
10934 if (entry->is_sub_map) {
91447636
A
10935 vm_map_offset_t local_vaddr;
10936 vm_map_offset_t end_delta;
10937 vm_map_offset_t start_delta;
1c79356b
A
10938 vm_map_entry_t submap_entry;
10939 boolean_t mapped_needs_copy=FALSE;
10940
10941 local_vaddr = vaddr;
10942
39037602
A
10943 if ((entry->use_pmap &&
10944 ! ((fault_type & VM_PROT_WRITE) ||
10945 force_copy))) {
91447636
A
10946 /* if real_map equals map we unlock below */
10947 if ((*real_map != map) &&
2d21ac55 10948 (*real_map != cow_sub_map_parent))
91447636 10949 vm_map_unlock(*real_map);
3e170ce0 10950 *real_map = VME_SUBMAP(entry);
1c79356b
A
10951 }
10952
39037602
A
10953 if(entry->needs_copy &&
10954 ((fault_type & VM_PROT_WRITE) ||
10955 force_copy)) {
1c79356b
A
10956 if (!mapped_needs_copy) {
10957 if (vm_map_lock_read_to_write(map)) {
10958 vm_map_lock_read(map);
99c3a104 10959 *real_map = map;
1c79356b
A
10960 goto RetryLookup;
10961 }
3e170ce0
A
10962 vm_map_lock_read(VME_SUBMAP(entry));
10963 *var_map = VME_SUBMAP(entry);
1c79356b
A
10964 cow_sub_map_parent = map;
10965 /* reset base to map before cow object */
10966 /* this is the map which will accept */
10967 /* the new cow object */
10968 old_start = entry->vme_start;
10969 old_end = entry->vme_end;
10970 cow_parent_vaddr = vaddr;
10971 mapped_needs_copy = TRUE;
10972 } else {
3e170ce0
A
10973 vm_map_lock_read(VME_SUBMAP(entry));
10974 *var_map = VME_SUBMAP(entry);
1c79356b 10975 if((cow_sub_map_parent != map) &&
2d21ac55 10976 (*real_map != map))
1c79356b
A
10977 vm_map_unlock(map);
10978 }
10979 } else {
3e170ce0
A
10980 vm_map_lock_read(VME_SUBMAP(entry));
10981 *var_map = VME_SUBMAP(entry);
1c79356b
A
10982 /* leave map locked if it is a target */
10983 /* cow sub_map above otherwise, just */
10984 /* follow the maps down to the object */
10985 /* here we unlock knowing we are not */
10986 /* revisiting the map. */
91447636 10987 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
10988 vm_map_unlock_read(map);
10989 }
10990
99c3a104 10991 map = *var_map;
1c79356b
A
10992
10993 /* calculate the offset in the submap for vaddr */
3e170ce0 10994 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 10995
2d21ac55 10996 RetrySubMap:
1c79356b
A
10997 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10998 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10999 vm_map_unlock(cow_sub_map_parent);
11000 }
91447636 11001 if((*real_map != map)
2d21ac55 11002 && (*real_map != cow_sub_map_parent)) {
91447636 11003 vm_map_unlock(*real_map);
1c79356b 11004 }
91447636 11005 *real_map = map;
1c79356b
A
11006 return KERN_INVALID_ADDRESS;
11007 }
2d21ac55 11008
1c79356b
A
11009 /* find the attenuated shadow of the underlying object */
11010 /* on our target map */
11011
11012 /* in english the submap object may extend beyond the */
11013 /* region mapped by the entry or, may only fill a portion */
11014 /* of it. For our purposes, we only care if the object */
11015 /* doesn't fill. In this case the area which will */
11016 /* ultimately be clipped in the top map will only need */
11017 /* to be as big as the portion of the underlying entry */
11018 /* which is mapped */
3e170ce0
A
11019 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
11020 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b
A
11021
11022 end_delta =
3e170ce0 11023 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
1c79356b 11024 submap_entry->vme_end ?
3e170ce0 11025 0 : (VME_OFFSET(entry) +
2d21ac55
A
11026 (old_end - old_start))
11027 - submap_entry->vme_end;
1c79356b
A
11028
11029 old_start += start_delta;
11030 old_end -= end_delta;
11031
11032 if(submap_entry->is_sub_map) {
11033 entry = submap_entry;
11034 vaddr = local_vaddr;
11035 goto submap_recurse;
11036 }
11037
39037602
A
11038 if (((fault_type & VM_PROT_WRITE) ||
11039 force_copy)
11040 && cow_sub_map_parent) {
1c79356b 11041
2d21ac55
A
11042 vm_object_t sub_object, copy_object;
11043 vm_object_offset_t copy_offset;
91447636
A
11044 vm_map_offset_t local_start;
11045 vm_map_offset_t local_end;
0b4e3aa0 11046 boolean_t copied_slowly = FALSE;
1c79356b
A
11047
11048 if (vm_map_lock_read_to_write(map)) {
11049 vm_map_lock_read(map);
11050 old_start -= start_delta;
11051 old_end += end_delta;
11052 goto RetrySubMap;
11053 }
0b4e3aa0
A
11054
11055
3e170ce0 11056 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
11057 if (sub_object == VM_OBJECT_NULL) {
11058 sub_object =
1c79356b 11059 vm_object_allocate(
91447636 11060 (vm_map_size_t)
2d21ac55
A
11061 (submap_entry->vme_end -
11062 submap_entry->vme_start));
3e170ce0
A
11063 VME_OBJECT_SET(submap_entry, sub_object);
11064 VME_OFFSET_SET(submap_entry, 0);
1c79356b
A
11065 }
11066 local_start = local_vaddr -
2d21ac55 11067 (cow_parent_vaddr - old_start);
1c79356b 11068 local_end = local_vaddr +
2d21ac55 11069 (old_end - cow_parent_vaddr);
1c79356b
A
11070 vm_map_clip_start(map, submap_entry, local_start);
11071 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
11072 if (submap_entry->is_sub_map) {
11073 /* unnesting was done when clipping */
11074 assert(!submap_entry->use_pmap);
11075 }
1c79356b
A
11076
11077 /* This is the COW case, lets connect */
11078 /* an entry in our space to the underlying */
11079 /* object in the submap, bypassing the */
11080 /* submap. */
0b4e3aa0
A
11081
11082
2d21ac55 11083 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
11084 (sub_object->copy_strategy ==
11085 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
11086 vm_object_lock(sub_object);
11087 vm_object_copy_slowly(sub_object,
3e170ce0 11088 VME_OFFSET(submap_entry),
2d21ac55
A
11089 (submap_entry->vme_end -
11090 submap_entry->vme_start),
11091 FALSE,
11092 &copy_object);
11093 copied_slowly = TRUE;
0b4e3aa0 11094 } else {
2d21ac55 11095
0b4e3aa0 11096 /* set up shadow object */
2d21ac55 11097 copy_object = sub_object;
39037602
A
11098 vm_object_lock(sub_object);
11099 vm_object_reference_locked(sub_object);
2d21ac55 11100 sub_object->shadowed = TRUE;
39037602
A
11101 vm_object_unlock(sub_object);
11102
3e170ce0 11103 assert(submap_entry->wired_count == 0);
0b4e3aa0 11104 submap_entry->needs_copy = TRUE;
0c530ab8
A
11105
11106 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11107
3e170ce0
A
11108 if (override_nx(old_map,
11109 VME_ALIAS(submap_entry))
11110 && prot)
0c530ab8 11111 prot |= VM_PROT_EXECUTE;
2d21ac55 11112
0b4e3aa0 11113 vm_object_pmap_protect(
2d21ac55 11114 sub_object,
3e170ce0 11115 VME_OFFSET(submap_entry),
1c79356b 11116 submap_entry->vme_end -
2d21ac55 11117 submap_entry->vme_start,
9bccf70c 11118 (submap_entry->is_shared
316670eb 11119 || map->mapped_in_other_pmaps) ?
2d21ac55 11120 PMAP_NULL : map->pmap,
1c79356b 11121 submap_entry->vme_start,
0c530ab8 11122 prot);
0b4e3aa0 11123 }
1c79356b 11124
2d21ac55
A
11125 /*
11126 * Adjust the fault offset to the submap entry.
11127 */
11128 copy_offset = (local_vaddr -
11129 submap_entry->vme_start +
3e170ce0 11130 VME_OFFSET(submap_entry));
1c79356b
A
11131
11132 /* This works diffently than the */
11133 /* normal submap case. We go back */
11134 /* to the parent of the cow map and*/
11135 /* clip out the target portion of */
11136 /* the sub_map, substituting the */
11137 /* new copy object, */
11138
11139 vm_map_unlock(map);
11140 local_start = old_start;
11141 local_end = old_end;
11142 map = cow_sub_map_parent;
11143 *var_map = cow_sub_map_parent;
11144 vaddr = cow_parent_vaddr;
11145 cow_sub_map_parent = NULL;
11146
2d21ac55
A
11147 if(!vm_map_lookup_entry(map,
11148 vaddr, &entry)) {
11149 vm_object_deallocate(
11150 copy_object);
11151 vm_map_lock_write_to_read(map);
11152 return KERN_INVALID_ADDRESS;
11153 }
11154
11155 /* clip out the portion of space */
11156 /* mapped by the sub map which */
11157 /* corresponds to the underlying */
11158 /* object */
11159
11160 /*
11161 * Clip (and unnest) the smallest nested chunk
11162 * possible around the faulting address...
11163 */
11164 local_start = vaddr & ~(pmap_nesting_size_min - 1);
11165 local_end = local_start + pmap_nesting_size_min;
11166 /*
11167 * ... but don't go beyond the "old_start" to "old_end"
11168 * range, to avoid spanning over another VM region
11169 * with a possibly different VM object and/or offset.
11170 */
11171 if (local_start < old_start) {
11172 local_start = old_start;
11173 }
11174 if (local_end > old_end) {
11175 local_end = old_end;
11176 }
11177 /*
11178 * Adjust copy_offset to the start of the range.
11179 */
11180 copy_offset -= (vaddr - local_start);
11181
1c79356b
A
11182 vm_map_clip_start(map, entry, local_start);
11183 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
11184 if (entry->is_sub_map) {
11185 /* unnesting was done when clipping */
11186 assert(!entry->use_pmap);
11187 }
1c79356b
A
11188
11189 /* substitute copy object for */
11190 /* shared map entry */
3e170ce0 11191 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 11192 assert(!entry->iokit_acct);
1c79356b 11193 entry->is_sub_map = FALSE;
fe8ab488 11194 entry->use_pmap = TRUE;
3e170ce0 11195 VME_OBJECT_SET(entry, copy_object);
1c79356b 11196
2d21ac55
A
11197 /* propagate the submap entry's protections */
11198 entry->protection |= submap_entry->protection;
11199 entry->max_protection |= submap_entry->max_protection;
11200
0b4e3aa0 11201 if(copied_slowly) {
3e170ce0 11202 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
11203 entry->needs_copy = FALSE;
11204 entry->is_shared = FALSE;
11205 } else {
3e170ce0
A
11206 VME_OFFSET_SET(entry, copy_offset);
11207 assert(entry->wired_count == 0);
0b4e3aa0
A
11208 entry->needs_copy = TRUE;
11209 if(entry->inheritance == VM_INHERIT_SHARE)
11210 entry->inheritance = VM_INHERIT_COPY;
11211 if (map != old_map)
11212 entry->is_shared = TRUE;
11213 }
1c79356b 11214 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 11215 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
11216
11217 vm_map_lock_write_to_read(map);
11218 } else {
11219 if((cow_sub_map_parent)
2d21ac55
A
11220 && (cow_sub_map_parent != *real_map)
11221 && (cow_sub_map_parent != map)) {
1c79356b
A
11222 vm_map_unlock(cow_sub_map_parent);
11223 }
11224 entry = submap_entry;
11225 vaddr = local_vaddr;
11226 }
11227 }
11228
11229 /*
11230 * Check whether this task is allowed to have
11231 * this page.
11232 */
2d21ac55 11233
6601e61a 11234 prot = entry->protection;
0c530ab8 11235
3e170ce0 11236 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0c530ab8 11237 /*
2d21ac55 11238 * HACK -- if not a stack, then allow execution
0c530ab8
A
11239 */
11240 prot |= VM_PROT_EXECUTE;
2d21ac55
A
11241 }
11242
6d2010ae
A
11243 if (mask_protections) {
11244 fault_type &= prot;
11245 if (fault_type == VM_PROT_NONE) {
11246 goto protection_failure;
11247 }
11248 }
39037602
A
11249 if (((fault_type & prot) != fault_type)
11250 ) {
6d2010ae 11251 protection_failure:
2d21ac55
A
11252 if (*real_map != map) {
11253 vm_map_unlock(*real_map);
0c530ab8
A
11254 }
11255 *real_map = map;
11256
11257 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 11258 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 11259
2d21ac55 11260 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 11261 return KERN_PROTECTION_FAILURE;
1c79356b
A
11262 }
11263
11264 /*
11265 * If this page is not pageable, we have to get
11266 * it for all possible accesses.
11267 */
11268
91447636
A
11269 *wired = (entry->wired_count != 0);
11270 if (*wired)
0c530ab8 11271 fault_type = prot;
1c79356b
A
11272
11273 /*
11274 * If the entry was copy-on-write, we either ...
11275 */
11276
11277 if (entry->needs_copy) {
11278 /*
11279 * If we want to write the page, we may as well
11280 * handle that now since we've got the map locked.
11281 *
11282 * If we don't need to write the page, we just
11283 * demote the permissions allowed.
11284 */
11285
fe8ab488 11286 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
11287 /*
11288 * Make a new object, and place it in the
11289 * object chain. Note that no new references
11290 * have appeared -- one just moved from the
11291 * map to the new object.
11292 */
11293
11294 if (vm_map_lock_read_to_write(map)) {
11295 vm_map_lock_read(map);
11296 goto RetryLookup;
11297 }
39037602
A
11298
11299 if (VME_OBJECT(entry)->shadowed == FALSE) {
11300 vm_object_lock(VME_OBJECT(entry));
11301 VME_OBJECT(entry)->shadowed = TRUE;
11302 vm_object_unlock(VME_OBJECT(entry));
11303 }
3e170ce0
A
11304 VME_OBJECT_SHADOW(entry,
11305 (vm_map_size_t) (entry->vme_end -
11306 entry->vme_start));
1c79356b 11307 entry->needs_copy = FALSE;
39037602 11308
1c79356b
A
11309 vm_map_lock_write_to_read(map);
11310 }
39037602 11311 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
11312 /*
11313 * We're attempting to read a copy-on-write
11314 * page -- don't allow writes.
11315 */
11316
11317 prot &= (~VM_PROT_WRITE);
11318 }
11319 }
11320
11321 /*
11322 * Create an object if necessary.
11323 */
3e170ce0 11324 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
11325
11326 if (vm_map_lock_read_to_write(map)) {
11327 vm_map_lock_read(map);
11328 goto RetryLookup;
11329 }
11330
3e170ce0
A
11331 VME_OBJECT_SET(entry,
11332 vm_object_allocate(
11333 (vm_map_size_t)(entry->vme_end -
11334 entry->vme_start)));
11335 VME_OFFSET_SET(entry, 0);
1c79356b
A
11336 vm_map_lock_write_to_read(map);
11337 }
11338
11339 /*
11340 * Return the object/offset from this entry. If the entry
11341 * was copy-on-write or empty, it has been fixed up. Also
11342 * return the protection.
11343 */
11344
3e170ce0
A
11345 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11346 *object = VME_OBJECT(entry);
1c79356b 11347 *out_prot = prot;
2d21ac55
A
11348
11349 if (fault_info) {
11350 fault_info->interruptible = THREAD_UNINT; /* for now... */
11351 /* ... the caller will change "interruptible" if needed */
11352 fault_info->cluster_size = 0;
3e170ce0 11353 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
11354 fault_info->pmap_options = 0;
11355 if (entry->iokit_acct ||
11356 (!entry->is_sub_map && !entry->use_pmap)) {
11357 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11358 }
2d21ac55 11359 fault_info->behavior = entry->behavior;
3e170ce0
A
11360 fault_info->lo_offset = VME_OFFSET(entry);
11361 fault_info->hi_offset =
11362 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 11363 fault_info->no_cache = entry->no_cache;
b0d623f7 11364 fault_info->stealth = FALSE;
6d2010ae 11365 fault_info->io_sync = FALSE;
3e170ce0
A
11366 if (entry->used_for_jit ||
11367 entry->vme_resilient_codesign) {
11368 fault_info->cs_bypass = TRUE;
11369 } else {
11370 fault_info->cs_bypass = FALSE;
11371 }
0b4c1975 11372 fault_info->mark_zf_absent = FALSE;
316670eb 11373 fault_info->batch_pmap_op = FALSE;
2d21ac55 11374 }
1c79356b
A
11375
11376 /*
11377 * Lock the object to prevent it from disappearing
11378 */
2d21ac55
A
11379 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11380 vm_object_lock(*object);
11381 else
11382 vm_object_lock_shared(*object);
11383
1c79356b
A
11384 /*
11385 * Save the version number
11386 */
11387
11388 out_version->main_timestamp = map->timestamp;
11389
11390 return KERN_SUCCESS;
11391}
11392
11393
11394/*
11395 * vm_map_verify:
11396 *
11397 * Verifies that the map in question has not changed
11398 * since the given version. If successful, the map
11399 * will not change until vm_map_verify_done() is called.
11400 */
11401boolean_t
11402vm_map_verify(
39037602
A
11403 vm_map_t map,
11404 vm_map_version_t *version) /* REF */
1c79356b
A
11405{
11406 boolean_t result;
11407
11408 vm_map_lock_read(map);
11409 result = (map->timestamp == version->main_timestamp);
11410
11411 if (!result)
11412 vm_map_unlock_read(map);
11413
11414 return(result);
11415}
11416
11417/*
11418 * vm_map_verify_done:
11419 *
11420 * Releases locks acquired by a vm_map_verify.
11421 *
11422 * This is now a macro in vm/vm_map.h. It does a
11423 * vm_map_unlock_read on the map.
11424 */
11425
11426
91447636
A
11427/*
11428 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11429 * Goes away after regular vm_region_recurse function migrates to
11430 * 64 bits
11431 * vm_region_recurse: A form of vm_region which follows the
11432 * submaps in a target map
11433 *
11434 */
11435
39037602
A
11436#if DEVELOPMENT || DEBUG
11437int vm_region_footprint = 0;
11438#endif /* DEVELOPMENT || DEBUG */
11439
91447636
A
11440kern_return_t
11441vm_map_region_recurse_64(
11442 vm_map_t map,
11443 vm_map_offset_t *address, /* IN/OUT */
11444 vm_map_size_t *size, /* OUT */
11445 natural_t *nesting_depth, /* IN/OUT */
11446 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11447 mach_msg_type_number_t *count) /* IN/OUT */
11448{
39236c6e 11449 mach_msg_type_number_t original_count;
91447636
A
11450 vm_region_extended_info_data_t extended;
11451 vm_map_entry_t tmp_entry;
11452 vm_map_offset_t user_address;
11453 unsigned int user_max_depth;
11454
11455 /*
11456 * "curr_entry" is the VM map entry preceding or including the
11457 * address we're looking for.
11458 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
11459 * "curr_address" is the equivalent of the top map's "user_address"
11460 * in the current map.
91447636
A
11461 * "curr_offset" is the cumulated offset of "curr_map" in the
11462 * target task's address space.
11463 * "curr_depth" is the depth of "curr_map" in the chain of
11464 * sub-maps.
6d2010ae
A
11465 *
11466 * "curr_max_below" and "curr_max_above" limit the range (around
11467 * "curr_address") we should take into account in the current (sub)map.
11468 * They limit the range to what's visible through the map entries
11469 * we've traversed from the top map to the current map.
11470
91447636
A
11471 */
11472 vm_map_entry_t curr_entry;
6d2010ae 11473 vm_map_address_t curr_address;
91447636
A
11474 vm_map_offset_t curr_offset;
11475 vm_map_t curr_map;
11476 unsigned int curr_depth;
6d2010ae
A
11477 vm_map_offset_t curr_max_below, curr_max_above;
11478 vm_map_offset_t curr_skip;
91447636
A
11479
11480 /*
11481 * "next_" is the same as "curr_" but for the VM region immediately
11482 * after the address we're looking for. We need to keep track of this
11483 * too because we want to return info about that region if the
11484 * address we're looking for is not mapped.
11485 */
11486 vm_map_entry_t next_entry;
11487 vm_map_offset_t next_offset;
6d2010ae 11488 vm_map_offset_t next_address;
91447636
A
11489 vm_map_t next_map;
11490 unsigned int next_depth;
6d2010ae
A
11491 vm_map_offset_t next_max_below, next_max_above;
11492 vm_map_offset_t next_skip;
91447636 11493
2d21ac55
A
11494 boolean_t look_for_pages;
11495 vm_region_submap_short_info_64_t short_info;
11496
91447636
A
11497 if (map == VM_MAP_NULL) {
11498 /* no address space to work on */
11499 return KERN_INVALID_ARGUMENT;
11500 }
11501
39236c6e
A
11502
11503 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11504 /*
11505 * "info" structure is not big enough and
11506 * would overflow
11507 */
11508 return KERN_INVALID_ARGUMENT;
11509 }
11510
11511 original_count = *count;
11512
11513 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11514 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11515 look_for_pages = FALSE;
11516 short_info = (vm_region_submap_short_info_64_t) submap_info;
11517 submap_info = NULL;
2d21ac55
A
11518 } else {
11519 look_for_pages = TRUE;
39236c6e 11520 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 11521 short_info = NULL;
39236c6e
A
11522
11523 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11524 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11525 }
91447636 11526 }
39236c6e 11527
91447636
A
11528 user_address = *address;
11529 user_max_depth = *nesting_depth;
11530
3e170ce0
A
11531 if (not_in_kdp) {
11532 vm_map_lock_read(map);
11533 }
11534
11535recurse_again:
91447636
A
11536 curr_entry = NULL;
11537 curr_map = map;
6d2010ae 11538 curr_address = user_address;
91447636 11539 curr_offset = 0;
6d2010ae 11540 curr_skip = 0;
91447636 11541 curr_depth = 0;
6d2010ae
A
11542 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11543 curr_max_below = curr_address;
91447636
A
11544
11545 next_entry = NULL;
11546 next_map = NULL;
6d2010ae 11547 next_address = 0;
91447636 11548 next_offset = 0;
6d2010ae 11549 next_skip = 0;
91447636 11550 next_depth = 0;
6d2010ae
A
11551 next_max_above = (vm_map_offset_t) -1;
11552 next_max_below = (vm_map_offset_t) -1;
91447636 11553
91447636
A
11554 for (;;) {
11555 if (vm_map_lookup_entry(curr_map,
6d2010ae 11556 curr_address,
91447636
A
11557 &tmp_entry)) {
11558 /* tmp_entry contains the address we're looking for */
11559 curr_entry = tmp_entry;
11560 } else {
6d2010ae 11561 vm_map_offset_t skip;
91447636
A
11562 /*
11563 * The address is not mapped. "tmp_entry" is the
11564 * map entry preceding the address. We want the next
11565 * one, if it exists.
11566 */
11567 curr_entry = tmp_entry->vme_next;
6d2010ae 11568
91447636 11569 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
11570 (curr_entry->vme_start >=
11571 curr_address + curr_max_above)) {
91447636
A
11572 /* no next entry at this level: stop looking */
11573 if (not_in_kdp) {
11574 vm_map_unlock_read(curr_map);
11575 }
11576 curr_entry = NULL;
11577 curr_map = NULL;
3e170ce0 11578 curr_skip = 0;
91447636
A
11579 curr_offset = 0;
11580 curr_depth = 0;
6d2010ae
A
11581 curr_max_above = 0;
11582 curr_max_below = 0;
91447636
A
11583 break;
11584 }
6d2010ae
A
11585
11586 /* adjust current address and offset */
11587 skip = curr_entry->vme_start - curr_address;
11588 curr_address = curr_entry->vme_start;
3e170ce0 11589 curr_skip += skip;
6d2010ae
A
11590 curr_offset += skip;
11591 curr_max_above -= skip;
11592 curr_max_below = 0;
91447636
A
11593 }
11594
11595 /*
11596 * Is the next entry at this level closer to the address (or
11597 * deeper in the submap chain) than the one we had
11598 * so far ?
11599 */
11600 tmp_entry = curr_entry->vme_next;
11601 if (tmp_entry == vm_map_to_entry(curr_map)) {
11602 /* no next entry at this level */
6d2010ae
A
11603 } else if (tmp_entry->vme_start >=
11604 curr_address + curr_max_above) {
91447636
A
11605 /*
11606 * tmp_entry is beyond the scope of what we mapped of
11607 * this submap in the upper level: ignore it.
11608 */
11609 } else if ((next_entry == NULL) ||
11610 (tmp_entry->vme_start + curr_offset <=
11611 next_entry->vme_start + next_offset)) {
11612 /*
11613 * We didn't have a "next_entry" or this one is
11614 * closer to the address we're looking for:
11615 * use this "tmp_entry" as the new "next_entry".
11616 */
11617 if (next_entry != NULL) {
11618 /* unlock the last "next_map" */
11619 if (next_map != curr_map && not_in_kdp) {
11620 vm_map_unlock_read(next_map);
11621 }
11622 }
11623 next_entry = tmp_entry;
11624 next_map = curr_map;
91447636 11625 next_depth = curr_depth;
6d2010ae
A
11626 next_address = next_entry->vme_start;
11627 next_skip = curr_skip;
3e170ce0 11628 next_skip += (next_address - curr_address);
6d2010ae
A
11629 next_offset = curr_offset;
11630 next_offset += (next_address - curr_address);
11631 next_max_above = MIN(next_max_above, curr_max_above);
11632 next_max_above = MIN(next_max_above,
11633 next_entry->vme_end - next_address);
11634 next_max_below = MIN(next_max_below, curr_max_below);
11635 next_max_below = MIN(next_max_below,
11636 next_address - next_entry->vme_start);
91447636
A
11637 }
11638
6d2010ae
A
11639 /*
11640 * "curr_max_{above,below}" allow us to keep track of the
11641 * portion of the submap that is actually mapped at this level:
11642 * the rest of that submap is irrelevant to us, since it's not
11643 * mapped here.
11644 * The relevant portion of the map starts at
3e170ce0 11645 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
11646 */
11647 curr_max_above = MIN(curr_max_above,
11648 curr_entry->vme_end - curr_address);
11649 curr_max_below = MIN(curr_max_below,
11650 curr_address - curr_entry->vme_start);
11651
91447636
A
11652 if (!curr_entry->is_sub_map ||
11653 curr_depth >= user_max_depth) {
11654 /*
11655 * We hit a leaf map or we reached the maximum depth
11656 * we could, so stop looking. Keep the current map
11657 * locked.
11658 */
11659 break;
11660 }
11661
11662 /*
11663 * Get down to the next submap level.
11664 */
11665
11666 /*
11667 * Lock the next level and unlock the current level,
11668 * unless we need to keep it locked to access the "next_entry"
11669 * later.
11670 */
11671 if (not_in_kdp) {
3e170ce0 11672 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
11673 }
11674 if (curr_map == next_map) {
11675 /* keep "next_map" locked in case we need it */
11676 } else {
11677 /* release this map */
b0d623f7
A
11678 if (not_in_kdp)
11679 vm_map_unlock_read(curr_map);
91447636
A
11680 }
11681
11682 /*
11683 * Adjust the offset. "curr_entry" maps the submap
11684 * at relative address "curr_entry->vme_start" in the
3e170ce0 11685 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
11686 * bytes of the submap.
11687 * "curr_offset" always represents the offset of a virtual
11688 * address in the curr_map relative to the absolute address
11689 * space (i.e. the top-level VM map).
11690 */
11691 curr_offset +=
3e170ce0 11692 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 11693 curr_address = user_address + curr_offset;
91447636 11694 /* switch to the submap */
3e170ce0 11695 curr_map = VME_SUBMAP(curr_entry);
91447636 11696 curr_depth++;
91447636
A
11697 curr_entry = NULL;
11698 }
11699
11700 if (curr_entry == NULL) {
11701 /* no VM region contains the address... */
39037602
A
11702#if DEVELOPMENT || DEBUG
11703 if (vm_region_footprint && /* we want footprint numbers */
11704 look_for_pages && /* & we want page counts */
11705 next_entry == NULL && /* & there are no more regions */
11706 /* & we haven't already provided our fake region: */
11707 user_address == vm_map_last_entry(map)->vme_end) {
11708 ledger_amount_t nonvol, nonvol_compressed;
11709 /*
11710 * Add a fake memory region to account for
11711 * purgeable memory that counts towards this
11712 * task's memory footprint, i.e. the resident
11713 * compressed pages of non-volatile objects
11714 * owned by that task.
11715 */
11716 ledger_get_balance(
11717 map->pmap->ledger,
11718 task_ledgers.purgeable_nonvolatile,
11719 &nonvol);
11720 ledger_get_balance(
11721 map->pmap->ledger,
11722 task_ledgers.purgeable_nonvolatile_compressed,
11723 &nonvol_compressed);
11724 if (nonvol + nonvol_compressed == 0) {
11725 /* no purgeable memory usage to report */
11726 return KERN_FAILURE;
11727 }
11728 /* fake region to show nonvolatile footprint */
11729 submap_info->protection = VM_PROT_DEFAULT;
11730 submap_info->max_protection = VM_PROT_DEFAULT;
11731 submap_info->inheritance = VM_INHERIT_DEFAULT;
11732 submap_info->offset = 0;
11733 submap_info->user_tag = 0;
11734 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
11735 submap_info->pages_shared_now_private = 0;
11736 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
11737 submap_info->pages_dirtied = submap_info->pages_resident;
11738 submap_info->ref_count = 1;
11739 submap_info->shadow_depth = 0;
11740 submap_info->external_pager = 0;
11741 submap_info->share_mode = SM_PRIVATE;
11742 submap_info->is_submap = 0;
11743 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
11744 submap_info->object_id = 0x11111111;
11745 submap_info->user_wired_count = 0;
11746 submap_info->pages_reusable = 0;
11747 *nesting_depth = 0;
11748 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
11749 *address = user_address;
11750 return KERN_SUCCESS;
11751 }
11752#endif /* DEVELOPMENT || DEBUG */
91447636
A
11753 if (next_entry == NULL) {
11754 /* ... and no VM region follows it either */
11755 return KERN_INVALID_ADDRESS;
11756 }
11757 /* ... gather info about the next VM region */
11758 curr_entry = next_entry;
11759 curr_map = next_map; /* still locked ... */
6d2010ae
A
11760 curr_address = next_address;
11761 curr_skip = next_skip;
91447636
A
11762 curr_offset = next_offset;
11763 curr_depth = next_depth;
6d2010ae
A
11764 curr_max_above = next_max_above;
11765 curr_max_below = next_max_below;
91447636
A
11766 } else {
11767 /* we won't need "next_entry" after all */
11768 if (next_entry != NULL) {
11769 /* release "next_map" */
11770 if (next_map != curr_map && not_in_kdp) {
11771 vm_map_unlock_read(next_map);
11772 }
11773 }
11774 }
11775 next_entry = NULL;
11776 next_map = NULL;
11777 next_offset = 0;
6d2010ae 11778 next_skip = 0;
91447636 11779 next_depth = 0;
6d2010ae
A
11780 next_max_below = -1;
11781 next_max_above = -1;
91447636 11782
3e170ce0
A
11783 if (curr_entry->is_sub_map &&
11784 curr_depth < user_max_depth) {
11785 /*
11786 * We're not as deep as we could be: we must have
11787 * gone back up after not finding anything mapped
11788 * below the original top-level map entry's.
11789 * Let's move "curr_address" forward and recurse again.
11790 */
11791 user_address = curr_address;
11792 goto recurse_again;
11793 }
11794
91447636 11795 *nesting_depth = curr_depth;
6d2010ae
A
11796 *size = curr_max_above + curr_max_below;
11797 *address = user_address + curr_skip - curr_max_below;
91447636 11798
b0d623f7
A
11799// LP64todo: all the current tools are 32bit, obviously never worked for 64b
11800// so probably should be a real 32b ID vs. ptr.
11801// Current users just check for equality
39236c6e 11802#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 11803
2d21ac55 11804 if (look_for_pages) {
3e170ce0
A
11805 submap_info->user_tag = VME_ALIAS(curr_entry);
11806 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11807 submap_info->protection = curr_entry->protection;
11808 submap_info->inheritance = curr_entry->inheritance;
11809 submap_info->max_protection = curr_entry->max_protection;
11810 submap_info->behavior = curr_entry->behavior;
11811 submap_info->user_wired_count = curr_entry->user_wired_count;
11812 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11813 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11814 } else {
3e170ce0
A
11815 short_info->user_tag = VME_ALIAS(curr_entry);
11816 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11817 short_info->protection = curr_entry->protection;
11818 short_info->inheritance = curr_entry->inheritance;
11819 short_info->max_protection = curr_entry->max_protection;
11820 short_info->behavior = curr_entry->behavior;
11821 short_info->user_wired_count = curr_entry->user_wired_count;
11822 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11823 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11824 }
91447636
A
11825
11826 extended.pages_resident = 0;
11827 extended.pages_swapped_out = 0;
11828 extended.pages_shared_now_private = 0;
11829 extended.pages_dirtied = 0;
39236c6e 11830 extended.pages_reusable = 0;
91447636
A
11831 extended.external_pager = 0;
11832 extended.shadow_depth = 0;
3e170ce0
A
11833 extended.share_mode = SM_EMPTY;
11834 extended.ref_count = 0;
91447636
A
11835
11836 if (not_in_kdp) {
11837 if (!curr_entry->is_sub_map) {
6d2010ae
A
11838 vm_map_offset_t range_start, range_end;
11839 range_start = MAX((curr_address - curr_max_below),
11840 curr_entry->vme_start);
11841 range_end = MIN((curr_address + curr_max_above),
11842 curr_entry->vme_end);
91447636 11843 vm_map_region_walk(curr_map,
6d2010ae 11844 range_start,
91447636 11845 curr_entry,
3e170ce0 11846 (VME_OFFSET(curr_entry) +
6d2010ae
A
11847 (range_start -
11848 curr_entry->vme_start)),
11849 range_end - range_start,
2d21ac55 11850 &extended,
39236c6e 11851 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
11852 if (extended.external_pager &&
11853 extended.ref_count == 2 &&
11854 extended.share_mode == SM_SHARED) {
2d21ac55 11855 extended.share_mode = SM_PRIVATE;
91447636 11856 }
91447636
A
11857 } else {
11858 if (curr_entry->use_pmap) {
2d21ac55 11859 extended.share_mode = SM_TRUESHARED;
91447636 11860 } else {
2d21ac55 11861 extended.share_mode = SM_PRIVATE;
91447636 11862 }
3e170ce0 11863 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
91447636
A
11864 }
11865 }
11866
2d21ac55
A
11867 if (look_for_pages) {
11868 submap_info->pages_resident = extended.pages_resident;
11869 submap_info->pages_swapped_out = extended.pages_swapped_out;
11870 submap_info->pages_shared_now_private =
11871 extended.pages_shared_now_private;
11872 submap_info->pages_dirtied = extended.pages_dirtied;
11873 submap_info->external_pager = extended.external_pager;
11874 submap_info->shadow_depth = extended.shadow_depth;
11875 submap_info->share_mode = extended.share_mode;
11876 submap_info->ref_count = extended.ref_count;
39236c6e
A
11877
11878 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11879 submap_info->pages_reusable = extended.pages_reusable;
11880 }
2d21ac55
A
11881 } else {
11882 short_info->external_pager = extended.external_pager;
11883 short_info->shadow_depth = extended.shadow_depth;
11884 short_info->share_mode = extended.share_mode;
11885 short_info->ref_count = extended.ref_count;
11886 }
91447636
A
11887
11888 if (not_in_kdp) {
11889 vm_map_unlock_read(curr_map);
11890 }
11891
11892 return KERN_SUCCESS;
11893}
11894
1c79356b
A
11895/*
11896 * vm_region:
11897 *
11898 * User call to obtain information about a region in
11899 * a task's address map. Currently, only one flavor is
11900 * supported.
11901 *
11902 * XXX The reserved and behavior fields cannot be filled
11903 * in until the vm merge from the IK is completed, and
11904 * vm_reserve is implemented.
1c79356b
A
11905 */
11906
11907kern_return_t
91447636 11908vm_map_region(
1c79356b 11909 vm_map_t map,
91447636
A
11910 vm_map_offset_t *address, /* IN/OUT */
11911 vm_map_size_t *size, /* OUT */
1c79356b
A
11912 vm_region_flavor_t flavor, /* IN */
11913 vm_region_info_t info, /* OUT */
91447636
A
11914 mach_msg_type_number_t *count, /* IN/OUT */
11915 mach_port_t *object_name) /* OUT */
1c79356b
A
11916{
11917 vm_map_entry_t tmp_entry;
1c79356b 11918 vm_map_entry_t entry;
91447636 11919 vm_map_offset_t start;
1c79356b
A
11920
11921 if (map == VM_MAP_NULL)
11922 return(KERN_INVALID_ARGUMENT);
11923
11924 switch (flavor) {
91447636 11925
1c79356b 11926 case VM_REGION_BASIC_INFO:
2d21ac55 11927 /* legacy for old 32-bit objects info */
1c79356b 11928 {
2d21ac55 11929 vm_region_basic_info_t basic;
91447636 11930
2d21ac55
A
11931 if (*count < VM_REGION_BASIC_INFO_COUNT)
11932 return(KERN_INVALID_ARGUMENT);
1c79356b 11933
2d21ac55
A
11934 basic = (vm_region_basic_info_t) info;
11935 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 11936
2d21ac55 11937 vm_map_lock_read(map);
1c79356b 11938
2d21ac55
A
11939 start = *address;
11940 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11941 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11942 vm_map_unlock_read(map);
11943 return(KERN_INVALID_ADDRESS);
11944 }
11945 } else {
11946 entry = tmp_entry;
1c79356b 11947 }
1c79356b 11948
2d21ac55 11949 start = entry->vme_start;
1c79356b 11950
3e170ce0 11951 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
11952 basic->protection = entry->protection;
11953 basic->inheritance = entry->inheritance;
11954 basic->max_protection = entry->max_protection;
11955 basic->behavior = entry->behavior;
11956 basic->user_wired_count = entry->user_wired_count;
11957 basic->reserved = entry->is_sub_map;
11958 *address = start;
11959 *size = (entry->vme_end - start);
91447636 11960
2d21ac55
A
11961 if (object_name) *object_name = IP_NULL;
11962 if (entry->is_sub_map) {
11963 basic->shared = FALSE;
11964 } else {
11965 basic->shared = entry->is_shared;
11966 }
91447636 11967
2d21ac55
A
11968 vm_map_unlock_read(map);
11969 return(KERN_SUCCESS);
91447636
A
11970 }
11971
11972 case VM_REGION_BASIC_INFO_64:
11973 {
2d21ac55 11974 vm_region_basic_info_64_t basic;
91447636 11975
2d21ac55
A
11976 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11977 return(KERN_INVALID_ARGUMENT);
11978
11979 basic = (vm_region_basic_info_64_t) info;
11980 *count = VM_REGION_BASIC_INFO_COUNT_64;
11981
11982 vm_map_lock_read(map);
11983
11984 start = *address;
11985 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11986 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11987 vm_map_unlock_read(map);
11988 return(KERN_INVALID_ADDRESS);
11989 }
11990 } else {
11991 entry = tmp_entry;
11992 }
91447636 11993
2d21ac55 11994 start = entry->vme_start;
91447636 11995
3e170ce0 11996 basic->offset = VME_OFFSET(entry);
2d21ac55
A
11997 basic->protection = entry->protection;
11998 basic->inheritance = entry->inheritance;
11999 basic->max_protection = entry->max_protection;
12000 basic->behavior = entry->behavior;
12001 basic->user_wired_count = entry->user_wired_count;
12002 basic->reserved = entry->is_sub_map;
12003 *address = start;
12004 *size = (entry->vme_end - start);
91447636 12005
2d21ac55
A
12006 if (object_name) *object_name = IP_NULL;
12007 if (entry->is_sub_map) {
12008 basic->shared = FALSE;
12009 } else {
12010 basic->shared = entry->is_shared;
91447636 12011 }
2d21ac55
A
12012
12013 vm_map_unlock_read(map);
12014 return(KERN_SUCCESS);
1c79356b
A
12015 }
12016 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
12017 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
12018 return(KERN_INVALID_ARGUMENT);
39236c6e
A
12019 /*fallthru*/
12020 case VM_REGION_EXTENDED_INFO__legacy:
12021 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
12022 return KERN_INVALID_ARGUMENT;
12023
12024 {
12025 vm_region_extended_info_t extended;
12026 mach_msg_type_number_t original_count;
1c79356b 12027
2d21ac55 12028 extended = (vm_region_extended_info_t) info;
1c79356b 12029
2d21ac55 12030 vm_map_lock_read(map);
1c79356b 12031
2d21ac55
A
12032 start = *address;
12033 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12034 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12035 vm_map_unlock_read(map);
12036 return(KERN_INVALID_ADDRESS);
12037 }
12038 } else {
12039 entry = tmp_entry;
1c79356b 12040 }
2d21ac55 12041 start = entry->vme_start;
1c79356b 12042
2d21ac55 12043 extended->protection = entry->protection;
3e170ce0 12044 extended->user_tag = VME_ALIAS(entry);
2d21ac55
A
12045 extended->pages_resident = 0;
12046 extended->pages_swapped_out = 0;
12047 extended->pages_shared_now_private = 0;
12048 extended->pages_dirtied = 0;
12049 extended->external_pager = 0;
12050 extended->shadow_depth = 0;
1c79356b 12051
39236c6e
A
12052 original_count = *count;
12053 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
12054 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
12055 } else {
12056 extended->pages_reusable = 0;
12057 *count = VM_REGION_EXTENDED_INFO_COUNT;
12058 }
12059
3e170ce0 12060 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 12061
2d21ac55
A
12062 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
12063 extended->share_mode = SM_PRIVATE;
1c79356b 12064
2d21ac55
A
12065 if (object_name)
12066 *object_name = IP_NULL;
12067 *address = start;
12068 *size = (entry->vme_end - start);
1c79356b 12069
2d21ac55
A
12070 vm_map_unlock_read(map);
12071 return(KERN_SUCCESS);
1c79356b
A
12072 }
12073 case VM_REGION_TOP_INFO:
12074 {
2d21ac55 12075 vm_region_top_info_t top;
1c79356b 12076
2d21ac55
A
12077 if (*count < VM_REGION_TOP_INFO_COUNT)
12078 return(KERN_INVALID_ARGUMENT);
1c79356b 12079
2d21ac55
A
12080 top = (vm_region_top_info_t) info;
12081 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 12082
2d21ac55 12083 vm_map_lock_read(map);
1c79356b 12084
2d21ac55
A
12085 start = *address;
12086 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12087 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12088 vm_map_unlock_read(map);
12089 return(KERN_INVALID_ADDRESS);
12090 }
12091 } else {
12092 entry = tmp_entry;
1c79356b 12093
2d21ac55
A
12094 }
12095 start = entry->vme_start;
1c79356b 12096
2d21ac55
A
12097 top->private_pages_resident = 0;
12098 top->shared_pages_resident = 0;
1c79356b 12099
2d21ac55 12100 vm_map_region_top_walk(entry, top);
1c79356b 12101
2d21ac55
A
12102 if (object_name)
12103 *object_name = IP_NULL;
12104 *address = start;
12105 *size = (entry->vme_end - start);
1c79356b 12106
2d21ac55
A
12107 vm_map_unlock_read(map);
12108 return(KERN_SUCCESS);
1c79356b
A
12109 }
12110 default:
2d21ac55 12111 return(KERN_INVALID_ARGUMENT);
1c79356b
A
12112 }
12113}
12114
b0d623f7
A
12115#define OBJ_RESIDENT_COUNT(obj, entry_size) \
12116 MIN((entry_size), \
12117 ((obj)->all_reusable ? \
12118 (obj)->wired_page_count : \
12119 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 12120
0c530ab8 12121void
91447636
A
12122vm_map_region_top_walk(
12123 vm_map_entry_t entry,
12124 vm_region_top_info_t top)
1c79356b 12125{
1c79356b 12126
3e170ce0 12127 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
12128 top->share_mode = SM_EMPTY;
12129 top->ref_count = 0;
12130 top->obj_id = 0;
12131 return;
1c79356b 12132 }
2d21ac55 12133
91447636 12134 {
2d21ac55
A
12135 struct vm_object *obj, *tmp_obj;
12136 int ref_count;
12137 uint32_t entry_size;
1c79356b 12138
b0d623f7 12139 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 12140
3e170ce0 12141 obj = VME_OBJECT(entry);
1c79356b 12142
2d21ac55
A
12143 vm_object_lock(obj);
12144
12145 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12146 ref_count--;
12147
b0d623f7 12148 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
12149 if (obj->shadow) {
12150 if (ref_count == 1)
b0d623f7
A
12151 top->private_pages_resident =
12152 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 12153 else
b0d623f7
A
12154 top->shared_pages_resident =
12155 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12156 top->ref_count = ref_count;
12157 top->share_mode = SM_COW;
91447636 12158
2d21ac55
A
12159 while ((tmp_obj = obj->shadow)) {
12160 vm_object_lock(tmp_obj);
12161 vm_object_unlock(obj);
12162 obj = tmp_obj;
1c79356b 12163
2d21ac55
A
12164 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12165 ref_count--;
1c79356b 12166
b0d623f7
A
12167 assert(obj->reusable_page_count <= obj->resident_page_count);
12168 top->shared_pages_resident +=
12169 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12170 top->ref_count += ref_count - 1;
12171 }
1c79356b 12172 } else {
6d2010ae
A
12173 if (entry->superpage_size) {
12174 top->share_mode = SM_LARGE_PAGE;
12175 top->shared_pages_resident = 0;
12176 top->private_pages_resident = entry_size;
12177 } else if (entry->needs_copy) {
2d21ac55 12178 top->share_mode = SM_COW;
b0d623f7
A
12179 top->shared_pages_resident =
12180 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12181 } else {
12182 if (ref_count == 1 ||
12183 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
12184 top->share_mode = SM_PRIVATE;
39236c6e
A
12185 top->private_pages_resident =
12186 OBJ_RESIDENT_COUNT(obj,
12187 entry_size);
2d21ac55
A
12188 } else {
12189 top->share_mode = SM_SHARED;
b0d623f7
A
12190 top->shared_pages_resident =
12191 OBJ_RESIDENT_COUNT(obj,
12192 entry_size);
2d21ac55
A
12193 }
12194 }
12195 top->ref_count = ref_count;
1c79356b 12196 }
b0d623f7 12197 /* XXX K64: obj_id will be truncated */
39236c6e 12198 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 12199
2d21ac55 12200 vm_object_unlock(obj);
1c79356b 12201 }
91447636
A
12202}
12203
0c530ab8 12204void
91447636
A
12205vm_map_region_walk(
12206 vm_map_t map,
2d21ac55
A
12207 vm_map_offset_t va,
12208 vm_map_entry_t entry,
91447636
A
12209 vm_object_offset_t offset,
12210 vm_object_size_t range,
2d21ac55 12211 vm_region_extended_info_t extended,
39236c6e
A
12212 boolean_t look_for_pages,
12213 mach_msg_type_number_t count)
91447636 12214{
39037602
A
12215 struct vm_object *obj, *tmp_obj;
12216 vm_map_offset_t last_offset;
12217 int i;
12218 int ref_count;
91447636
A
12219 struct vm_object *shadow_object;
12220 int shadow_depth;
12221
3e170ce0 12222 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 12223 (entry->is_sub_map) ||
3e170ce0 12224 (VME_OBJECT(entry)->phys_contiguous &&
6d2010ae 12225 !entry->superpage_size)) {
2d21ac55
A
12226 extended->share_mode = SM_EMPTY;
12227 extended->ref_count = 0;
12228 return;
1c79356b 12229 }
6d2010ae
A
12230
12231 if (entry->superpage_size) {
12232 extended->shadow_depth = 0;
12233 extended->share_mode = SM_LARGE_PAGE;
12234 extended->ref_count = 1;
12235 extended->external_pager = 0;
12236 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
12237 extended->shadow_depth = 0;
12238 return;
12239 }
12240
39037602 12241 obj = VME_OBJECT(entry);
2d21ac55 12242
39037602 12243 vm_object_lock(obj);
2d21ac55 12244
39037602
A
12245 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12246 ref_count--;
2d21ac55 12247
39037602
A
12248 if (look_for_pages) {
12249 for (last_offset = offset + range;
12250 offset < last_offset;
12251 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
12252#if DEVELOPMENT || DEBUG
12253 if (vm_region_footprint) {
12254 if (obj->purgable != VM_PURGABLE_DENY) {
12255 /* alternate accounting */
12256 } else if (entry->iokit_acct) {
12257 /* alternate accounting */
12258 extended->pages_resident++;
12259 extended->pages_dirtied++;
12260 } else {
12261 int disp;
12262
12263 disp = 0;
12264 pmap_query_page_info(map->pmap, va, &disp);
12265 if (disp & PMAP_QUERY_PAGE_PRESENT) {
12266 extended->pages_resident++;
12267 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
12268 extended->pages_reusable++;
12269 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
12270 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
12271 /* alternate accounting */
12272 } else {
12273 extended->pages_dirtied++;
12274 }
12275 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
12276 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
12277 /* alternate accounting */
12278 } else {
12279 extended->pages_swapped_out++;
12280 }
b0d623f7 12281 }
2d21ac55 12282 }
39037602 12283 continue;
2d21ac55 12284 }
39037602
A
12285#endif /* DEVELOPMENT || DEBUG */
12286 vm_map_region_look_for_page(map, va, obj,
12287 offset, ref_count,
12288 0, extended, count);
2d21ac55 12289 }
39037602
A
12290#if DEVELOPMENT || DEBUG
12291 if (vm_region_footprint) {
12292 goto collect_object_info;
12293 }
12294#endif /* DEVELOPMENT || DEBUG */
12295 } else {
12296#if DEVELOPMENT || DEBUG
12297 collect_object_info:
12298#endif /* DEVELOPMENT || DEBUG */
12299 shadow_object = obj->shadow;
12300 shadow_depth = 0;
2d21ac55 12301
39037602
A
12302 if ( !(obj->pager_trusted) && !(obj->internal))
12303 extended->external_pager = 1;
12304
12305 if (shadow_object != VM_OBJECT_NULL) {
12306 vm_object_lock(shadow_object);
12307 for (;
12308 shadow_object != VM_OBJECT_NULL;
12309 shadow_depth++) {
12310 vm_object_t next_shadow;
12311
12312 if ( !(shadow_object->pager_trusted) &&
12313 !(shadow_object->internal))
12314 extended->external_pager = 1;
12315
12316 next_shadow = shadow_object->shadow;
12317 if (next_shadow) {
12318 vm_object_lock(next_shadow);
12319 }
12320 vm_object_unlock(shadow_object);
12321 shadow_object = next_shadow;
2d21ac55 12322 }
91447636 12323 }
39037602
A
12324 extended->shadow_depth = shadow_depth;
12325 }
1c79356b 12326
39037602
A
12327 if (extended->shadow_depth || entry->needs_copy)
12328 extended->share_mode = SM_COW;
12329 else {
12330 if (ref_count == 1)
12331 extended->share_mode = SM_PRIVATE;
12332 else {
12333 if (obj->true_share)
12334 extended->share_mode = SM_TRUESHARED;
12335 else
12336 extended->share_mode = SM_SHARED;
2d21ac55 12337 }
39037602
A
12338 }
12339 extended->ref_count = ref_count - extended->shadow_depth;
12340
12341 for (i = 0; i < extended->shadow_depth; i++) {
12342 if ((tmp_obj = obj->shadow) == 0)
12343 break;
12344 vm_object_lock(tmp_obj);
2d21ac55 12345 vm_object_unlock(obj);
1c79356b 12346
39037602
A
12347 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
12348 ref_count--;
12349
12350 extended->ref_count += ref_count;
12351 obj = tmp_obj;
12352 }
12353 vm_object_unlock(obj);
91447636 12354
39037602
A
12355 if (extended->share_mode == SM_SHARED) {
12356 vm_map_entry_t cur;
12357 vm_map_entry_t last;
12358 int my_refs;
91447636 12359
39037602
A
12360 obj = VME_OBJECT(entry);
12361 last = vm_map_to_entry(map);
12362 my_refs = 0;
91447636 12363
39037602
A
12364 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12365 ref_count--;
12366 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
12367 my_refs += vm_map_region_count_obj_refs(cur, obj);
12368
12369 if (my_refs == ref_count)
12370 extended->share_mode = SM_PRIVATE_ALIASED;
12371 else if (my_refs > 1)
12372 extended->share_mode = SM_SHARED_ALIASED;
91447636 12373 }
1c79356b
A
12374}
12375
1c79356b 12376
91447636
A
12377/* object is locked on entry and locked on return */
12378
12379
12380static void
12381vm_map_region_look_for_page(
12382 __unused vm_map_t map,
2d21ac55
A
12383 __unused vm_map_offset_t va,
12384 vm_object_t object,
12385 vm_object_offset_t offset,
91447636
A
12386 int max_refcnt,
12387 int depth,
39236c6e
A
12388 vm_region_extended_info_t extended,
12389 mach_msg_type_number_t count)
1c79356b 12390{
39037602
A
12391 vm_page_t p;
12392 vm_object_t shadow;
12393 int ref_count;
12394 vm_object_t caller_object;
12395
91447636
A
12396 shadow = object->shadow;
12397 caller_object = object;
1c79356b 12398
91447636
A
12399
12400 while (TRUE) {
1c79356b 12401
91447636 12402 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 12403 extended->external_pager = 1;
1c79356b 12404
91447636
A
12405 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12406 if (shadow && (max_refcnt == 1))
12407 extended->pages_shared_now_private++;
1c79356b 12408
39236c6e 12409 if (!p->fictitious &&
39037602 12410 (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
91447636 12411 extended->pages_dirtied++;
39236c6e 12412 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
39037602 12413 if (p->reusable || object->all_reusable) {
39236c6e
A
12414 extended->pages_reusable++;
12415 }
12416 }
1c79356b 12417
39236c6e 12418 extended->pages_resident++;
91447636
A
12419
12420 if(object != caller_object)
2d21ac55 12421 vm_object_unlock(object);
91447636
A
12422
12423 return;
1c79356b 12424 }
39236c6e
A
12425 if (object->internal &&
12426 object->alive &&
12427 !object->terminating &&
12428 object->pager_ready) {
12429
39037602
A
12430 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
12431 == VM_EXTERNAL_STATE_EXISTS) {
12432 /* the pager has that page */
12433 extended->pages_swapped_out++;
12434 if (object != caller_object)
12435 vm_object_unlock(object);
12436 return;
2d21ac55 12437 }
1c79356b 12438 }
2d21ac55 12439
91447636 12440 if (shadow) {
2d21ac55 12441 vm_object_lock(shadow);
1c79356b 12442
91447636
A
12443 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12444 ref_count--;
1c79356b 12445
91447636
A
12446 if (++depth > extended->shadow_depth)
12447 extended->shadow_depth = depth;
1c79356b 12448
91447636
A
12449 if (ref_count > max_refcnt)
12450 max_refcnt = ref_count;
12451
12452 if(object != caller_object)
2d21ac55 12453 vm_object_unlock(object);
91447636 12454
6d2010ae 12455 offset = offset + object->vo_shadow_offset;
91447636
A
12456 object = shadow;
12457 shadow = object->shadow;
12458 continue;
1c79356b 12459 }
91447636 12460 if(object != caller_object)
2d21ac55 12461 vm_object_unlock(object);
91447636
A
12462 break;
12463 }
12464}
1c79356b 12465
91447636
A
12466static int
12467vm_map_region_count_obj_refs(
12468 vm_map_entry_t entry,
12469 vm_object_t object)
12470{
39037602
A
12471 int ref_count;
12472 vm_object_t chk_obj;
12473 vm_object_t tmp_obj;
1c79356b 12474
3e170ce0 12475 if (VME_OBJECT(entry) == 0)
2d21ac55 12476 return(0);
1c79356b 12477
91447636 12478 if (entry->is_sub_map)
2d21ac55 12479 return(0);
91447636 12480 else {
2d21ac55 12481 ref_count = 0;
1c79356b 12482
3e170ce0 12483 chk_obj = VME_OBJECT(entry);
2d21ac55 12484 vm_object_lock(chk_obj);
1c79356b 12485
2d21ac55
A
12486 while (chk_obj) {
12487 if (chk_obj == object)
12488 ref_count++;
12489 tmp_obj = chk_obj->shadow;
12490 if (tmp_obj)
12491 vm_object_lock(tmp_obj);
12492 vm_object_unlock(chk_obj);
1c79356b 12493
2d21ac55
A
12494 chk_obj = tmp_obj;
12495 }
1c79356b 12496 }
91447636 12497 return(ref_count);
1c79356b
A
12498}
12499
12500
12501/*
91447636
A
12502 * Routine: vm_map_simplify
12503 *
12504 * Description:
12505 * Attempt to simplify the map representation in
12506 * the vicinity of the given starting address.
12507 * Note:
12508 * This routine is intended primarily to keep the
12509 * kernel maps more compact -- they generally don't
12510 * benefit from the "expand a map entry" technology
12511 * at allocation time because the adjacent entry
12512 * is often wired down.
1c79356b 12513 */
91447636
A
12514void
12515vm_map_simplify_entry(
12516 vm_map_t map,
12517 vm_map_entry_t this_entry)
1c79356b 12518{
91447636 12519 vm_map_entry_t prev_entry;
1c79356b 12520
91447636 12521 counter(c_vm_map_simplify_entry_called++);
1c79356b 12522
91447636 12523 prev_entry = this_entry->vme_prev;
1c79356b 12524
91447636 12525 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 12526 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 12527
91447636 12528 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 12529
2d21ac55 12530 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
12531 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12532 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
91447636 12533 prev_entry->vme_start))
3e170ce0 12534 == VME_OFFSET(this_entry)) &&
1c79356b 12535
fe8ab488
A
12536 (prev_entry->behavior == this_entry->behavior) &&
12537 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
12538 (prev_entry->protection == this_entry->protection) &&
12539 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
12540 (prev_entry->inheritance == this_entry->inheritance) &&
12541 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 12542 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 12543 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
12544 (prev_entry->permanent == this_entry->permanent) &&
12545 (prev_entry->map_aligned == this_entry->map_aligned) &&
12546 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12547 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12548 /* from_reserved_zone: OK if that field doesn't match */
12549 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0
A
12550 (prev_entry->vme_resilient_codesign ==
12551 this_entry->vme_resilient_codesign) &&
12552 (prev_entry->vme_resilient_media ==
12553 this_entry->vme_resilient_media) &&
fe8ab488 12554
91447636
A
12555 (prev_entry->wired_count == this_entry->wired_count) &&
12556 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 12557
39037602 12558 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
12559 (prev_entry->in_transition == FALSE) &&
12560 (this_entry->in_transition == FALSE) &&
12561 (prev_entry->needs_wakeup == FALSE) &&
12562 (this_entry->needs_wakeup == FALSE) &&
12563 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
12564 (this_entry->is_shared == FALSE) &&
12565 (prev_entry->superpage_size == FALSE) &&
12566 (this_entry->superpage_size == FALSE)
2d21ac55 12567 ) {
316670eb 12568 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 12569 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
12570 if (prev_entry->map_aligned)
12571 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12572 VM_MAP_PAGE_MASK(map)));
91447636 12573 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
12574 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12575
12576 if (map->holelistenabled) {
12577 vm_map_store_update_first_free(map, this_entry, TRUE);
12578 }
12579
2d21ac55 12580 if (prev_entry->is_sub_map) {
3e170ce0 12581 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 12582 } else {
3e170ce0 12583 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 12584 }
91447636 12585 vm_map_entry_dispose(map, prev_entry);
0c530ab8 12586 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 12587 counter(c_vm_map_simplified++);
1c79356b 12588 }
91447636 12589}
1c79356b 12590
91447636
A
12591void
12592vm_map_simplify(
12593 vm_map_t map,
12594 vm_map_offset_t start)
12595{
12596 vm_map_entry_t this_entry;
1c79356b 12597
91447636
A
12598 vm_map_lock(map);
12599 if (vm_map_lookup_entry(map, start, &this_entry)) {
12600 vm_map_simplify_entry(map, this_entry);
12601 vm_map_simplify_entry(map, this_entry->vme_next);
12602 }
12603 counter(c_vm_map_simplify_called++);
12604 vm_map_unlock(map);
12605}
1c79356b 12606
91447636
A
12607static void
12608vm_map_simplify_range(
12609 vm_map_t map,
12610 vm_map_offset_t start,
12611 vm_map_offset_t end)
12612{
12613 vm_map_entry_t entry;
1c79356b 12614
91447636
A
12615 /*
12616 * The map should be locked (for "write") by the caller.
12617 */
1c79356b 12618
91447636
A
12619 if (start >= end) {
12620 /* invalid address range */
12621 return;
12622 }
1c79356b 12623
39236c6e
A
12624 start = vm_map_trunc_page(start,
12625 VM_MAP_PAGE_MASK(map));
12626 end = vm_map_round_page(end,
12627 VM_MAP_PAGE_MASK(map));
2d21ac55 12628
91447636
A
12629 if (!vm_map_lookup_entry(map, start, &entry)) {
12630 /* "start" is not mapped and "entry" ends before "start" */
12631 if (entry == vm_map_to_entry(map)) {
12632 /* start with first entry in the map */
12633 entry = vm_map_first_entry(map);
12634 } else {
12635 /* start with next entry */
12636 entry = entry->vme_next;
12637 }
12638 }
12639
12640 while (entry != vm_map_to_entry(map) &&
12641 entry->vme_start <= end) {
12642 /* try and coalesce "entry" with its previous entry */
12643 vm_map_simplify_entry(map, entry);
12644 entry = entry->vme_next;
12645 }
12646}
1c79356b 12647
1c79356b 12648
91447636
A
12649/*
12650 * Routine: vm_map_machine_attribute
12651 * Purpose:
12652 * Provide machine-specific attributes to mappings,
12653 * such as cachability etc. for machines that provide
12654 * them. NUMA architectures and machines with big/strange
12655 * caches will use this.
12656 * Note:
12657 * Responsibilities for locking and checking are handled here,
12658 * everything else in the pmap module. If any non-volatile
12659 * information must be kept, the pmap module should handle
12660 * it itself. [This assumes that attributes do not
12661 * need to be inherited, which seems ok to me]
12662 */
12663kern_return_t
12664vm_map_machine_attribute(
12665 vm_map_t map,
12666 vm_map_offset_t start,
12667 vm_map_offset_t end,
12668 vm_machine_attribute_t attribute,
12669 vm_machine_attribute_val_t* value) /* IN/OUT */
12670{
12671 kern_return_t ret;
12672 vm_map_size_t sync_size;
12673 vm_map_entry_t entry;
12674
12675 if (start < vm_map_min(map) || end > vm_map_max(map))
12676 return KERN_INVALID_ADDRESS;
1c79356b 12677
91447636
A
12678 /* Figure how much memory we need to flush (in page increments) */
12679 sync_size = end - start;
1c79356b 12680
91447636
A
12681 vm_map_lock(map);
12682
12683 if (attribute != MATTR_CACHE) {
12684 /* If we don't have to find physical addresses, we */
12685 /* don't have to do an explicit traversal here. */
12686 ret = pmap_attribute(map->pmap, start, end-start,
12687 attribute, value);
12688 vm_map_unlock(map);
12689 return ret;
12690 }
1c79356b 12691
91447636 12692 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 12693
91447636
A
12694 while(sync_size) {
12695 if (vm_map_lookup_entry(map, start, &entry)) {
12696 vm_map_size_t sub_size;
12697 if((entry->vme_end - start) > sync_size) {
12698 sub_size = sync_size;
12699 sync_size = 0;
12700 } else {
12701 sub_size = entry->vme_end - start;
2d21ac55 12702 sync_size -= sub_size;
91447636
A
12703 }
12704 if(entry->is_sub_map) {
12705 vm_map_offset_t sub_start;
12706 vm_map_offset_t sub_end;
1c79356b 12707
91447636 12708 sub_start = (start - entry->vme_start)
3e170ce0 12709 + VME_OFFSET(entry);
91447636
A
12710 sub_end = sub_start + sub_size;
12711 vm_map_machine_attribute(
3e170ce0 12712 VME_SUBMAP(entry),
91447636
A
12713 sub_start,
12714 sub_end,
12715 attribute, value);
12716 } else {
3e170ce0 12717 if (VME_OBJECT(entry)) {
91447636
A
12718 vm_page_t m;
12719 vm_object_t object;
12720 vm_object_t base_object;
12721 vm_object_t last_object;
12722 vm_object_offset_t offset;
12723 vm_object_offset_t base_offset;
12724 vm_map_size_t range;
12725 range = sub_size;
12726 offset = (start - entry->vme_start)
3e170ce0 12727 + VME_OFFSET(entry);
91447636 12728 base_offset = offset;
3e170ce0 12729 object = VME_OBJECT(entry);
91447636
A
12730 base_object = object;
12731 last_object = NULL;
1c79356b 12732
91447636 12733 vm_object_lock(object);
1c79356b 12734
91447636
A
12735 while (range) {
12736 m = vm_page_lookup(
12737 object, offset);
1c79356b 12738
91447636
A
12739 if (m && !m->fictitious) {
12740 ret =
2d21ac55 12741 pmap_attribute_cache_sync(
39037602 12742 VM_PAGE_GET_PHYS_PAGE(m),
2d21ac55
A
12743 PAGE_SIZE,
12744 attribute, value);
91447636
A
12745
12746 } else if (object->shadow) {
6d2010ae 12747 offset = offset + object->vo_shadow_offset;
91447636
A
12748 last_object = object;
12749 object = object->shadow;
12750 vm_object_lock(last_object->shadow);
12751 vm_object_unlock(last_object);
12752 continue;
12753 }
12754 range -= PAGE_SIZE;
1c79356b 12755
91447636
A
12756 if (base_object != object) {
12757 vm_object_unlock(object);
12758 vm_object_lock(base_object);
12759 object = base_object;
12760 }
12761 /* Bump to the next page */
12762 base_offset += PAGE_SIZE;
12763 offset = base_offset;
12764 }
12765 vm_object_unlock(object);
12766 }
12767 }
12768 start += sub_size;
12769 } else {
12770 vm_map_unlock(map);
12771 return KERN_FAILURE;
12772 }
12773
1c79356b 12774 }
e5568f75 12775
91447636 12776 vm_map_unlock(map);
e5568f75 12777
91447636
A
12778 return ret;
12779}
e5568f75 12780
91447636
A
12781/*
12782 * vm_map_behavior_set:
12783 *
12784 * Sets the paging reference behavior of the specified address
12785 * range in the target map. Paging reference behavior affects
12786 * how pagein operations resulting from faults on the map will be
12787 * clustered.
12788 */
12789kern_return_t
12790vm_map_behavior_set(
12791 vm_map_t map,
12792 vm_map_offset_t start,
12793 vm_map_offset_t end,
12794 vm_behavior_t new_behavior)
12795{
39037602 12796 vm_map_entry_t entry;
91447636 12797 vm_map_entry_t temp_entry;
e5568f75 12798
91447636 12799 XPR(XPR_VM_MAP,
2d21ac55 12800 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 12801 map, start, end, new_behavior, 0);
e5568f75 12802
6d2010ae
A
12803 if (start > end ||
12804 start < vm_map_min(map) ||
12805 end > vm_map_max(map)) {
12806 return KERN_NO_SPACE;
12807 }
12808
91447636 12809 switch (new_behavior) {
b0d623f7
A
12810
12811 /*
12812 * This first block of behaviors all set a persistent state on the specified
12813 * memory range. All we have to do here is to record the desired behavior
12814 * in the vm_map_entry_t's.
12815 */
12816
91447636
A
12817 case VM_BEHAVIOR_DEFAULT:
12818 case VM_BEHAVIOR_RANDOM:
12819 case VM_BEHAVIOR_SEQUENTIAL:
12820 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
12821 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12822 vm_map_lock(map);
12823
12824 /*
12825 * The entire address range must be valid for the map.
12826 * Note that vm_map_range_check() does a
12827 * vm_map_lookup_entry() internally and returns the
12828 * entry containing the start of the address range if
12829 * the entire range is valid.
12830 */
12831 if (vm_map_range_check(map, start, end, &temp_entry)) {
12832 entry = temp_entry;
12833 vm_map_clip_start(map, entry, start);
12834 }
12835 else {
12836 vm_map_unlock(map);
12837 return(KERN_INVALID_ADDRESS);
12838 }
12839
12840 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12841 vm_map_clip_end(map, entry, end);
fe8ab488
A
12842 if (entry->is_sub_map) {
12843 assert(!entry->use_pmap);
12844 }
b0d623f7
A
12845
12846 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12847 entry->zero_wired_pages = TRUE;
12848 } else {
12849 entry->behavior = new_behavior;
12850 }
12851 entry = entry->vme_next;
12852 }
12853
12854 vm_map_unlock(map);
91447636 12855 break;
b0d623f7
A
12856
12857 /*
12858 * The rest of these are different from the above in that they cause
12859 * an immediate action to take place as opposed to setting a behavior that
12860 * affects future actions.
12861 */
12862
91447636 12863 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
12864 return vm_map_willneed(map, start, end);
12865
91447636 12866 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
12867 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12868
12869 case VM_BEHAVIOR_FREE:
12870 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12871
12872 case VM_BEHAVIOR_REUSABLE:
12873 return vm_map_reusable_pages(map, start, end);
12874
12875 case VM_BEHAVIOR_REUSE:
12876 return vm_map_reuse_pages(map, start, end);
12877
12878 case VM_BEHAVIOR_CAN_REUSE:
12879 return vm_map_can_reuse(map, start, end);
12880
3e170ce0
A
12881#if MACH_ASSERT
12882 case VM_BEHAVIOR_PAGEOUT:
12883 return vm_map_pageout(map, start, end);
12884#endif /* MACH_ASSERT */
12885
1c79356b 12886 default:
91447636 12887 return(KERN_INVALID_ARGUMENT);
1c79356b 12888 }
1c79356b 12889
b0d623f7
A
12890 return(KERN_SUCCESS);
12891}
12892
12893
12894/*
12895 * Internals for madvise(MADV_WILLNEED) system call.
12896 *
12897 * The present implementation is to do a read-ahead if the mapping corresponds
12898 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12899 * and basically ignore the "advice" (which we are always free to do).
12900 */
12901
12902
12903static kern_return_t
12904vm_map_willneed(
12905 vm_map_t map,
12906 vm_map_offset_t start,
12907 vm_map_offset_t end
12908)
12909{
12910 vm_map_entry_t entry;
12911 vm_object_t object;
12912 memory_object_t pager;
12913 struct vm_object_fault_info fault_info;
12914 kern_return_t kr;
12915 vm_object_size_t len;
12916 vm_object_offset_t offset;
1c79356b 12917
91447636 12918 /*
b0d623f7
A
12919 * Fill in static values in fault_info. Several fields get ignored by the code
12920 * we call, but we'll fill them in anyway since uninitialized fields are bad
12921 * when it comes to future backwards compatibility.
91447636 12922 */
b0d623f7
A
12923
12924 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12925 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12926 fault_info.no_cache = FALSE; /* ignored value */
12927 fault_info.stealth = TRUE;
6d2010ae
A
12928 fault_info.io_sync = FALSE;
12929 fault_info.cs_bypass = FALSE;
0b4c1975 12930 fault_info.mark_zf_absent = FALSE;
316670eb 12931 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
12932
12933 /*
12934 * The MADV_WILLNEED operation doesn't require any changes to the
12935 * vm_map_entry_t's, so the read lock is sufficient.
12936 */
12937
12938 vm_map_lock_read(map);
12939
12940 /*
12941 * The madvise semantics require that the address range be fully
12942 * allocated with no holes. Otherwise, we're required to return
12943 * an error.
12944 */
12945
6d2010ae
A
12946 if (! vm_map_range_check(map, start, end, &entry)) {
12947 vm_map_unlock_read(map);
12948 return KERN_INVALID_ADDRESS;
12949 }
b0d623f7 12950
6d2010ae
A
12951 /*
12952 * Examine each vm_map_entry_t in the range.
12953 */
12954 for (; entry != vm_map_to_entry(map) && start < end; ) {
12955
b0d623f7 12956 /*
6d2010ae
A
12957 * The first time through, the start address could be anywhere
12958 * within the vm_map_entry we found. So adjust the offset to
12959 * correspond. After that, the offset will always be zero to
12960 * correspond to the beginning of the current vm_map_entry.
b0d623f7 12961 */
3e170ce0 12962 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 12963
6d2010ae
A
12964 /*
12965 * Set the length so we don't go beyond the end of the
12966 * map_entry or beyond the end of the range we were given.
12967 * This range could span also multiple map entries all of which
12968 * map different files, so make sure we only do the right amount
12969 * of I/O for each object. Note that it's possible for there
12970 * to be multiple map entries all referring to the same object
12971 * but with different page permissions, but it's not worth
12972 * trying to optimize that case.
12973 */
12974 len = MIN(entry->vme_end - start, end - start);
b0d623f7 12975
6d2010ae
A
12976 if ((vm_size_t) len != len) {
12977 /* 32-bit overflow */
12978 len = (vm_size_t) (0 - PAGE_SIZE);
12979 }
12980 fault_info.cluster_size = (vm_size_t) len;
12981 fault_info.lo_offset = offset;
12982 fault_info.hi_offset = offset + len;
3e170ce0 12983 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
12984 fault_info.pmap_options = 0;
12985 if (entry->iokit_acct ||
12986 (!entry->is_sub_map && !entry->use_pmap)) {
12987 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12988 }
b0d623f7 12989
6d2010ae
A
12990 /*
12991 * If there's no read permission to this mapping, then just
12992 * skip it.
12993 */
12994 if ((entry->protection & VM_PROT_READ) == 0) {
12995 entry = entry->vme_next;
12996 start = entry->vme_start;
12997 continue;
12998 }
b0d623f7 12999
6d2010ae
A
13000 /*
13001 * Find the file object backing this map entry. If there is
13002 * none, then we simply ignore the "will need" advice for this
13003 * entry and go on to the next one.
13004 */
13005 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
13006 entry = entry->vme_next;
13007 start = entry->vme_start;
13008 continue;
13009 }
b0d623f7 13010
6d2010ae
A
13011 /*
13012 * The data_request() could take a long time, so let's
13013 * release the map lock to avoid blocking other threads.
13014 */
13015 vm_map_unlock_read(map);
b0d623f7 13016
6d2010ae
A
13017 vm_object_paging_begin(object);
13018 pager = object->pager;
13019 vm_object_unlock(object);
b0d623f7 13020
6d2010ae
A
13021 /*
13022 * Get the data from the object asynchronously.
13023 *
13024 * Note that memory_object_data_request() places limits on the
13025 * amount of I/O it will do. Regardless of the len we
fe8ab488 13026 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
13027 * silently truncates the len to that size. This isn't
13028 * necessarily bad since madvise shouldn't really be used to
13029 * page in unlimited amounts of data. Other Unix variants
13030 * limit the willneed case as well. If this turns out to be an
13031 * issue for developers, then we can always adjust the policy
13032 * here and still be backwards compatible since this is all
13033 * just "advice".
13034 */
13035 kr = memory_object_data_request(
13036 pager,
13037 offset + object->paging_offset,
13038 0, /* ignored */
13039 VM_PROT_READ,
13040 (memory_object_fault_info_t)&fault_info);
b0d623f7 13041
6d2010ae
A
13042 vm_object_lock(object);
13043 vm_object_paging_end(object);
13044 vm_object_unlock(object);
b0d623f7 13045
6d2010ae
A
13046 /*
13047 * If we couldn't do the I/O for some reason, just give up on
13048 * the madvise. We still return success to the user since
13049 * madvise isn't supposed to fail when the advice can't be
13050 * taken.
13051 */
13052 if (kr != KERN_SUCCESS) {
13053 return KERN_SUCCESS;
13054 }
b0d623f7 13055
6d2010ae
A
13056 start += len;
13057 if (start >= end) {
13058 /* done */
13059 return KERN_SUCCESS;
13060 }
b0d623f7 13061
6d2010ae
A
13062 /* look up next entry */
13063 vm_map_lock_read(map);
13064 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 13065 /*
6d2010ae 13066 * There's a new hole in the address range.
b0d623f7 13067 */
6d2010ae
A
13068 vm_map_unlock_read(map);
13069 return KERN_INVALID_ADDRESS;
b0d623f7 13070 }
6d2010ae 13071 }
b0d623f7
A
13072
13073 vm_map_unlock_read(map);
6d2010ae 13074 return KERN_SUCCESS;
b0d623f7
A
13075}
13076
13077static boolean_t
13078vm_map_entry_is_reusable(
13079 vm_map_entry_t entry)
13080{
3e170ce0
A
13081 /* Only user map entries */
13082
b0d623f7
A
13083 vm_object_t object;
13084
2dced7af
A
13085 if (entry->is_sub_map) {
13086 return FALSE;
13087 }
13088
3e170ce0 13089 switch (VME_ALIAS(entry)) {
39236c6e
A
13090 case VM_MEMORY_MALLOC:
13091 case VM_MEMORY_MALLOC_SMALL:
13092 case VM_MEMORY_MALLOC_LARGE:
13093 case VM_MEMORY_REALLOC:
13094 case VM_MEMORY_MALLOC_TINY:
13095 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
13096 case VM_MEMORY_MALLOC_LARGE_REUSED:
13097 /*
13098 * This is a malloc() memory region: check if it's still
13099 * in its original state and can be re-used for more
13100 * malloc() allocations.
13101 */
13102 break;
13103 default:
13104 /*
13105 * Not a malloc() memory region: let the caller decide if
13106 * it's re-usable.
13107 */
13108 return TRUE;
13109 }
13110
b0d623f7
A
13111 if (entry->is_shared ||
13112 entry->is_sub_map ||
13113 entry->in_transition ||
13114 entry->protection != VM_PROT_DEFAULT ||
13115 entry->max_protection != VM_PROT_ALL ||
13116 entry->inheritance != VM_INHERIT_DEFAULT ||
13117 entry->no_cache ||
13118 entry->permanent ||
39236c6e 13119 entry->superpage_size != FALSE ||
b0d623f7
A
13120 entry->zero_wired_pages ||
13121 entry->wired_count != 0 ||
13122 entry->user_wired_count != 0) {
13123 return FALSE;
91447636 13124 }
b0d623f7 13125
3e170ce0 13126 object = VME_OBJECT(entry);
b0d623f7
A
13127 if (object == VM_OBJECT_NULL) {
13128 return TRUE;
13129 }
316670eb
A
13130 if (
13131#if 0
13132 /*
13133 * Let's proceed even if the VM object is potentially
13134 * shared.
13135 * We check for this later when processing the actual
13136 * VM pages, so the contents will be safe if shared.
13137 *
13138 * But we can still mark this memory region as "reusable" to
13139 * acknowledge that the caller did let us know that the memory
13140 * could be re-used and should not be penalized for holding
13141 * on to it. This allows its "resident size" to not include
13142 * the reusable range.
13143 */
13144 object->ref_count == 1 &&
13145#endif
b0d623f7
A
13146 object->wired_page_count == 0 &&
13147 object->copy == VM_OBJECT_NULL &&
13148 object->shadow == VM_OBJECT_NULL &&
13149 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
13150 object->internal &&
13151 !object->true_share &&
6d2010ae 13152 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
13153 !object->code_signed) {
13154 return TRUE;
1c79356b 13155 }
b0d623f7
A
13156 return FALSE;
13157
13158
13159}
1c79356b 13160
b0d623f7
A
13161static kern_return_t
13162vm_map_reuse_pages(
13163 vm_map_t map,
13164 vm_map_offset_t start,
13165 vm_map_offset_t end)
13166{
13167 vm_map_entry_t entry;
13168 vm_object_t object;
13169 vm_object_offset_t start_offset, end_offset;
13170
13171 /*
13172 * The MADV_REUSE operation doesn't require any changes to the
13173 * vm_map_entry_t's, so the read lock is sufficient.
13174 */
0b4e3aa0 13175
b0d623f7 13176 vm_map_lock_read(map);
3e170ce0 13177 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 13178
b0d623f7
A
13179 /*
13180 * The madvise semantics require that the address range be fully
13181 * allocated with no holes. Otherwise, we're required to return
13182 * an error.
13183 */
13184
13185 if (!vm_map_range_check(map, start, end, &entry)) {
13186 vm_map_unlock_read(map);
13187 vm_page_stats_reusable.reuse_pages_failure++;
13188 return KERN_INVALID_ADDRESS;
1c79356b 13189 }
91447636 13190
b0d623f7
A
13191 /*
13192 * Examine each vm_map_entry_t in the range.
13193 */
13194 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13195 entry = entry->vme_next) {
13196 /*
13197 * Sanity check on the VM map entry.
13198 */
13199 if (! vm_map_entry_is_reusable(entry)) {
13200 vm_map_unlock_read(map);
13201 vm_page_stats_reusable.reuse_pages_failure++;
13202 return KERN_INVALID_ADDRESS;
13203 }
13204
13205 /*
13206 * The first time through, the start address could be anywhere
13207 * within the vm_map_entry we found. So adjust the offset to
13208 * correspond.
13209 */
13210 if (entry->vme_start < start) {
13211 start_offset = start - entry->vme_start;
13212 } else {
13213 start_offset = 0;
13214 }
13215 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13216 start_offset += VME_OFFSET(entry);
13217 end_offset += VME_OFFSET(entry);
b0d623f7 13218
2dced7af 13219 assert(!entry->is_sub_map);
3e170ce0 13220 object = VME_OBJECT(entry);
b0d623f7
A
13221 if (object != VM_OBJECT_NULL) {
13222 vm_object_lock(object);
13223 vm_object_reuse_pages(object, start_offset, end_offset,
13224 TRUE);
13225 vm_object_unlock(object);
13226 }
13227
3e170ce0 13228 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
13229 /*
13230 * XXX
13231 * We do not hold the VM map exclusively here.
13232 * The "alias" field is not that critical, so it's
13233 * safe to update it here, as long as it is the only
13234 * one that can be modified while holding the VM map
13235 * "shared".
13236 */
3e170ce0 13237 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
13238 }
13239 }
13240
13241 vm_map_unlock_read(map);
13242 vm_page_stats_reusable.reuse_pages_success++;
13243 return KERN_SUCCESS;
1c79356b
A
13244}
13245
1c79356b 13246
b0d623f7
A
13247static kern_return_t
13248vm_map_reusable_pages(
13249 vm_map_t map,
13250 vm_map_offset_t start,
13251 vm_map_offset_t end)
13252{
13253 vm_map_entry_t entry;
13254 vm_object_t object;
13255 vm_object_offset_t start_offset, end_offset;
3e170ce0 13256 vm_map_offset_t pmap_offset;
b0d623f7
A
13257
13258 /*
13259 * The MADV_REUSABLE operation doesn't require any changes to the
13260 * vm_map_entry_t's, so the read lock is sufficient.
13261 */
13262
13263 vm_map_lock_read(map);
3e170ce0 13264 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13265
13266 /*
13267 * The madvise semantics require that the address range be fully
13268 * allocated with no holes. Otherwise, we're required to return
13269 * an error.
13270 */
13271
13272 if (!vm_map_range_check(map, start, end, &entry)) {
13273 vm_map_unlock_read(map);
13274 vm_page_stats_reusable.reusable_pages_failure++;
13275 return KERN_INVALID_ADDRESS;
13276 }
13277
13278 /*
13279 * Examine each vm_map_entry_t in the range.
13280 */
13281 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13282 entry = entry->vme_next) {
13283 int kill_pages = 0;
13284
13285 /*
13286 * Sanity check on the VM map entry.
13287 */
13288 if (! vm_map_entry_is_reusable(entry)) {
13289 vm_map_unlock_read(map);
13290 vm_page_stats_reusable.reusable_pages_failure++;
13291 return KERN_INVALID_ADDRESS;
13292 }
13293
39037602
A
13294 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
13295 /* not writable: can't discard contents */
13296 vm_map_unlock_read(map);
13297 vm_page_stats_reusable.reusable_nonwritable++;
13298 vm_page_stats_reusable.reusable_pages_failure++;
13299 return KERN_PROTECTION_FAILURE;
13300 }
13301
b0d623f7
A
13302 /*
13303 * The first time through, the start address could be anywhere
13304 * within the vm_map_entry we found. So adjust the offset to
13305 * correspond.
13306 */
13307 if (entry->vme_start < start) {
13308 start_offset = start - entry->vme_start;
3e170ce0 13309 pmap_offset = start;
b0d623f7
A
13310 } else {
13311 start_offset = 0;
3e170ce0 13312 pmap_offset = entry->vme_start;
b0d623f7
A
13313 }
13314 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13315 start_offset += VME_OFFSET(entry);
13316 end_offset += VME_OFFSET(entry);
b0d623f7 13317
2dced7af 13318 assert(!entry->is_sub_map);
3e170ce0 13319 object = VME_OBJECT(entry);
b0d623f7
A
13320 if (object == VM_OBJECT_NULL)
13321 continue;
13322
13323
13324 vm_object_lock(object);
39037602
A
13325 if (((object->ref_count == 1) ||
13326 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
13327 object->copy == VM_OBJECT_NULL)) &&
13328 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
13329 /*
13330 * "iokit_acct" entries are billed for their virtual size
13331 * (rather than for their resident pages only), so they
13332 * wouldn't benefit from making pages reusable, and it
13333 * would be hard to keep track of pages that are both
39037602
A
13334 * "iokit_acct" and "reusable" in the pmap stats and
13335 * ledgers.
fe8ab488
A
13336 */
13337 !(entry->iokit_acct ||
39037602
A
13338 (!entry->is_sub_map && !entry->use_pmap))) {
13339 if (object->ref_count != 1) {
13340 vm_page_stats_reusable.reusable_shared++;
13341 }
b0d623f7 13342 kill_pages = 1;
39037602 13343 } else {
b0d623f7 13344 kill_pages = -1;
39037602 13345 }
b0d623f7
A
13346 if (kill_pages != -1) {
13347 vm_object_deactivate_pages(object,
13348 start_offset,
13349 end_offset - start_offset,
13350 kill_pages,
3e170ce0
A
13351 TRUE /*reusable_pages*/,
13352 map->pmap,
13353 pmap_offset);
b0d623f7
A
13354 } else {
13355 vm_page_stats_reusable.reusable_pages_shared++;
13356 }
13357 vm_object_unlock(object);
13358
3e170ce0
A
13359 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13360 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
13361 /*
13362 * XXX
13363 * We do not hold the VM map exclusively here.
13364 * The "alias" field is not that critical, so it's
13365 * safe to update it here, as long as it is the only
13366 * one that can be modified while holding the VM map
13367 * "shared".
13368 */
3e170ce0 13369 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
13370 }
13371 }
13372
13373 vm_map_unlock_read(map);
13374 vm_page_stats_reusable.reusable_pages_success++;
13375 return KERN_SUCCESS;
13376}
13377
13378
13379static kern_return_t
13380vm_map_can_reuse(
13381 vm_map_t map,
13382 vm_map_offset_t start,
13383 vm_map_offset_t end)
13384{
13385 vm_map_entry_t entry;
13386
13387 /*
13388 * The MADV_REUSABLE operation doesn't require any changes to the
13389 * vm_map_entry_t's, so the read lock is sufficient.
13390 */
13391
13392 vm_map_lock_read(map);
3e170ce0 13393 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13394
13395 /*
13396 * The madvise semantics require that the address range be fully
13397 * allocated with no holes. Otherwise, we're required to return
13398 * an error.
13399 */
13400
13401 if (!vm_map_range_check(map, start, end, &entry)) {
13402 vm_map_unlock_read(map);
13403 vm_page_stats_reusable.can_reuse_failure++;
13404 return KERN_INVALID_ADDRESS;
13405 }
13406
13407 /*
13408 * Examine each vm_map_entry_t in the range.
13409 */
13410 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13411 entry = entry->vme_next) {
13412 /*
13413 * Sanity check on the VM map entry.
13414 */
13415 if (! vm_map_entry_is_reusable(entry)) {
13416 vm_map_unlock_read(map);
13417 vm_page_stats_reusable.can_reuse_failure++;
13418 return KERN_INVALID_ADDRESS;
13419 }
13420 }
13421
13422 vm_map_unlock_read(map);
13423 vm_page_stats_reusable.can_reuse_success++;
13424 return KERN_SUCCESS;
13425}
13426
13427
3e170ce0
A
13428#if MACH_ASSERT
13429static kern_return_t
13430vm_map_pageout(
13431 vm_map_t map,
13432 vm_map_offset_t start,
13433 vm_map_offset_t end)
13434{
13435 vm_map_entry_t entry;
13436
13437 /*
13438 * The MADV_PAGEOUT operation doesn't require any changes to the
13439 * vm_map_entry_t's, so the read lock is sufficient.
13440 */
13441
13442 vm_map_lock_read(map);
13443
13444 /*
13445 * The madvise semantics require that the address range be fully
13446 * allocated with no holes. Otherwise, we're required to return
13447 * an error.
13448 */
13449
13450 if (!vm_map_range_check(map, start, end, &entry)) {
13451 vm_map_unlock_read(map);
13452 return KERN_INVALID_ADDRESS;
13453 }
13454
13455 /*
13456 * Examine each vm_map_entry_t in the range.
13457 */
13458 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13459 entry = entry->vme_next) {
13460 vm_object_t object;
13461
13462 /*
13463 * Sanity check on the VM map entry.
13464 */
13465 if (entry->is_sub_map) {
13466 vm_map_t submap;
13467 vm_map_offset_t submap_start;
13468 vm_map_offset_t submap_end;
13469 vm_map_entry_t submap_entry;
13470
13471 submap = VME_SUBMAP(entry);
13472 submap_start = VME_OFFSET(entry);
13473 submap_end = submap_start + (entry->vme_end -
13474 entry->vme_start);
13475
13476 vm_map_lock_read(submap);
13477
13478 if (! vm_map_range_check(submap,
13479 submap_start,
13480 submap_end,
13481 &submap_entry)) {
13482 vm_map_unlock_read(submap);
13483 vm_map_unlock_read(map);
13484 return KERN_INVALID_ADDRESS;
13485 }
13486
13487 object = VME_OBJECT(submap_entry);
13488 if (submap_entry->is_sub_map ||
13489 object == VM_OBJECT_NULL ||
13490 !object->internal) {
13491 vm_map_unlock_read(submap);
13492 continue;
13493 }
13494
13495 vm_object_pageout(object);
13496
13497 vm_map_unlock_read(submap);
13498 submap = VM_MAP_NULL;
13499 submap_entry = VM_MAP_ENTRY_NULL;
13500 continue;
13501 }
13502
13503 object = VME_OBJECT(entry);
13504 if (entry->is_sub_map ||
13505 object == VM_OBJECT_NULL ||
13506 !object->internal) {
13507 continue;
13508 }
13509
13510 vm_object_pageout(object);
13511 }
13512
13513 vm_map_unlock_read(map);
13514 return KERN_SUCCESS;
13515}
13516#endif /* MACH_ASSERT */
13517
13518
1c79356b 13519/*
91447636
A
13520 * Routine: vm_map_entry_insert
13521 *
13522 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 13523 */
91447636
A
13524vm_map_entry_t
13525vm_map_entry_insert(
13526 vm_map_t map,
13527 vm_map_entry_t insp_entry,
13528 vm_map_offset_t start,
13529 vm_map_offset_t end,
13530 vm_object_t object,
13531 vm_object_offset_t offset,
13532 boolean_t needs_copy,
13533 boolean_t is_shared,
13534 boolean_t in_transition,
13535 vm_prot_t cur_protection,
13536 vm_prot_t max_protection,
13537 vm_behavior_t behavior,
13538 vm_inherit_t inheritance,
2d21ac55 13539 unsigned wired_count,
b0d623f7
A
13540 boolean_t no_cache,
13541 boolean_t permanent,
39236c6e 13542 unsigned int superpage_size,
fe8ab488
A
13543 boolean_t clear_map_aligned,
13544 boolean_t is_submap)
1c79356b 13545{
91447636 13546 vm_map_entry_t new_entry;
1c79356b 13547
91447636 13548 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 13549
7ddcb079 13550 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 13551
39236c6e
A
13552 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13553 new_entry->map_aligned = TRUE;
13554 } else {
13555 new_entry->map_aligned = FALSE;
13556 }
13557 if (clear_map_aligned &&
fe8ab488
A
13558 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13559 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
13560 new_entry->map_aligned = FALSE;
13561 }
13562
91447636
A
13563 new_entry->vme_start = start;
13564 new_entry->vme_end = end;
13565 assert(page_aligned(new_entry->vme_start));
13566 assert(page_aligned(new_entry->vme_end));
39236c6e 13567 if (new_entry->map_aligned) {
fe8ab488
A
13568 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13569 VM_MAP_PAGE_MASK(map)));
39236c6e
A
13570 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13571 VM_MAP_PAGE_MASK(map)));
13572 }
e2d2fc5c 13573 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 13574
3e170ce0
A
13575 VME_OBJECT_SET(new_entry, object);
13576 VME_OFFSET_SET(new_entry, offset);
91447636 13577 new_entry->is_shared = is_shared;
fe8ab488 13578 new_entry->is_sub_map = is_submap;
91447636
A
13579 new_entry->needs_copy = needs_copy;
13580 new_entry->in_transition = in_transition;
13581 new_entry->needs_wakeup = FALSE;
13582 new_entry->inheritance = inheritance;
13583 new_entry->protection = cur_protection;
13584 new_entry->max_protection = max_protection;
13585 new_entry->behavior = behavior;
13586 new_entry->wired_count = wired_count;
13587 new_entry->user_wired_count = 0;
fe8ab488
A
13588 if (is_submap) {
13589 /*
13590 * submap: "use_pmap" means "nested".
13591 * default: false.
13592 */
13593 new_entry->use_pmap = FALSE;
13594 } else {
13595 /*
13596 * object: "use_pmap" means "use pmap accounting" for footprint.
13597 * default: true.
13598 */
13599 new_entry->use_pmap = TRUE;
13600 }
3e170ce0 13601 VME_ALIAS_SET(new_entry, 0);
b0d623f7 13602 new_entry->zero_wired_pages = FALSE;
2d21ac55 13603 new_entry->no_cache = no_cache;
b0d623f7 13604 new_entry->permanent = permanent;
39236c6e
A
13605 if (superpage_size)
13606 new_entry->superpage_size = TRUE;
13607 else
13608 new_entry->superpage_size = FALSE;
6d2010ae 13609 new_entry->used_for_jit = FALSE;
fe8ab488 13610 new_entry->iokit_acct = FALSE;
3e170ce0
A
13611 new_entry->vme_resilient_codesign = FALSE;
13612 new_entry->vme_resilient_media = FALSE;
39037602 13613 new_entry->vme_atomic = FALSE;
1c79356b 13614
91447636
A
13615 /*
13616 * Insert the new entry into the list.
13617 */
1c79356b 13618
6d2010ae 13619 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
13620 map->size += end - start;
13621
13622 /*
13623 * Update the free space hint and the lookup hint.
13624 */
13625
0c530ab8 13626 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 13627 return new_entry;
1c79356b
A
13628}
13629
13630/*
91447636
A
13631 * Routine: vm_map_remap_extract
13632 *
13633 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 13634 */
91447636
A
13635static kern_return_t
13636vm_map_remap_extract(
13637 vm_map_t map,
13638 vm_map_offset_t addr,
13639 vm_map_size_t size,
13640 boolean_t copy,
13641 struct vm_map_header *map_header,
13642 vm_prot_t *cur_protection,
13643 vm_prot_t *max_protection,
13644 /* What, no behavior? */
13645 vm_inherit_t inheritance,
39037602
A
13646 boolean_t pageable,
13647 boolean_t same_map)
1c79356b 13648{
91447636
A
13649 kern_return_t result;
13650 vm_map_size_t mapped_size;
13651 vm_map_size_t tmp_size;
13652 vm_map_entry_t src_entry; /* result of last map lookup */
13653 vm_map_entry_t new_entry;
13654 vm_object_offset_t offset;
13655 vm_map_offset_t map_address;
13656 vm_map_offset_t src_start; /* start of entry to map */
13657 vm_map_offset_t src_end; /* end of region to be mapped */
13658 vm_object_t object;
13659 vm_map_version_t version;
13660 boolean_t src_needs_copy;
13661 boolean_t new_entry_needs_copy;
1c79356b 13662
91447636 13663 assert(map != VM_MAP_NULL);
39236c6e
A
13664 assert(size != 0);
13665 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
13666 assert(inheritance == VM_INHERIT_NONE ||
13667 inheritance == VM_INHERIT_COPY ||
13668 inheritance == VM_INHERIT_SHARE);
1c79356b 13669
91447636
A
13670 /*
13671 * Compute start and end of region.
13672 */
39236c6e
A
13673 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13674 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13675
1c79356b 13676
91447636
A
13677 /*
13678 * Initialize map_header.
13679 */
13680 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13681 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13682 map_header->nentries = 0;
13683 map_header->entries_pageable = pageable;
39236c6e 13684 map_header->page_shift = PAGE_SHIFT;
1c79356b 13685
6d2010ae
A
13686 vm_map_store_init( map_header );
13687
91447636
A
13688 *cur_protection = VM_PROT_ALL;
13689 *max_protection = VM_PROT_ALL;
1c79356b 13690
91447636
A
13691 map_address = 0;
13692 mapped_size = 0;
13693 result = KERN_SUCCESS;
1c79356b 13694
91447636
A
13695 /*
13696 * The specified source virtual space might correspond to
13697 * multiple map entries, need to loop on them.
13698 */
13699 vm_map_lock(map);
13700 while (mapped_size != size) {
13701 vm_map_size_t entry_size;
1c79356b 13702
91447636
A
13703 /*
13704 * Find the beginning of the region.
13705 */
13706 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13707 result = KERN_INVALID_ADDRESS;
13708 break;
13709 }
1c79356b 13710
91447636
A
13711 if (src_start < src_entry->vme_start ||
13712 (mapped_size && src_start != src_entry->vme_start)) {
13713 result = KERN_INVALID_ADDRESS;
13714 break;
13715 }
1c79356b 13716
91447636
A
13717 tmp_size = size - mapped_size;
13718 if (src_end > src_entry->vme_end)
13719 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 13720
91447636 13721 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 13722 src_entry->vme_start);
1c79356b 13723
91447636 13724 if(src_entry->is_sub_map) {
3e170ce0 13725 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
13726 object = VM_OBJECT_NULL;
13727 } else {
3e170ce0 13728 object = VME_OBJECT(src_entry);
fe8ab488
A
13729 if (src_entry->iokit_acct) {
13730 /*
13731 * This entry uses "IOKit accounting".
13732 */
13733 } else if (object != VM_OBJECT_NULL &&
13734 object->purgable != VM_PURGABLE_DENY) {
13735 /*
13736 * Purgeable objects have their own accounting:
13737 * no pmap accounting for them.
13738 */
13739 assert(!src_entry->use_pmap);
13740 } else {
13741 /*
13742 * Not IOKit or purgeable:
13743 * must be accounted by pmap stats.
13744 */
13745 assert(src_entry->use_pmap);
13746 }
55e303ae 13747
91447636
A
13748 if (object == VM_OBJECT_NULL) {
13749 object = vm_object_allocate(entry_size);
3e170ce0
A
13750 VME_OFFSET_SET(src_entry, 0);
13751 VME_OBJECT_SET(src_entry, object);
91447636
A
13752 } else if (object->copy_strategy !=
13753 MEMORY_OBJECT_COPY_SYMMETRIC) {
13754 /*
13755 * We are already using an asymmetric
13756 * copy, and therefore we already have
13757 * the right object.
13758 */
13759 assert(!src_entry->needs_copy);
13760 } else if (src_entry->needs_copy || object->shadowed ||
13761 (object->internal && !object->true_share &&
2d21ac55 13762 !src_entry->is_shared &&
6d2010ae 13763 object->vo_size > entry_size)) {
1c79356b 13764
3e170ce0 13765 VME_OBJECT_SHADOW(src_entry, entry_size);
1c79356b 13766
91447636
A
13767 if (!src_entry->needs_copy &&
13768 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
13769 vm_prot_t prot;
13770
13771 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13772
3e170ce0
A
13773 if (override_nx(map,
13774 VME_ALIAS(src_entry))
13775 && prot)
0c530ab8 13776 prot |= VM_PROT_EXECUTE;
2d21ac55 13777
316670eb 13778 if(map->mapped_in_other_pmaps) {
2d21ac55 13779 vm_object_pmap_protect(
3e170ce0
A
13780 VME_OBJECT(src_entry),
13781 VME_OFFSET(src_entry),
2d21ac55
A
13782 entry_size,
13783 PMAP_NULL,
0c530ab8 13784 src_entry->vme_start,
0c530ab8 13785 prot);
2d21ac55
A
13786 } else {
13787 pmap_protect(vm_map_pmap(map),
13788 src_entry->vme_start,
13789 src_entry->vme_end,
13790 prot);
91447636
A
13791 }
13792 }
1c79356b 13793
3e170ce0 13794 object = VME_OBJECT(src_entry);
91447636
A
13795 src_entry->needs_copy = FALSE;
13796 }
1c79356b 13797
1c79356b 13798
91447636 13799 vm_object_lock(object);
2d21ac55 13800 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 13801 if (object->copy_strategy ==
2d21ac55 13802 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
13803 object->copy_strategy =
13804 MEMORY_OBJECT_COPY_DELAY;
13805 }
13806 vm_object_unlock(object);
13807 }
1c79356b 13808
3e170ce0
A
13809 offset = (VME_OFFSET(src_entry) +
13810 (src_start - src_entry->vme_start));
1c79356b 13811
7ddcb079 13812 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 13813 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
13814 if (new_entry->is_sub_map) {
13815 /* clr address space specifics */
13816 new_entry->use_pmap = FALSE;
13817 }
1c79356b 13818
39236c6e
A
13819 new_entry->map_aligned = FALSE;
13820
91447636
A
13821 new_entry->vme_start = map_address;
13822 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 13823 assert(new_entry->vme_start < new_entry->vme_end);
91447636 13824 new_entry->inheritance = inheritance;
3e170ce0 13825 VME_OFFSET_SET(new_entry, offset);
1c79356b 13826
91447636
A
13827 /*
13828 * The new region has to be copied now if required.
13829 */
13830 RestartCopy:
13831 if (!copy) {
316670eb
A
13832 /*
13833 * Cannot allow an entry describing a JIT
13834 * region to be shared across address spaces.
13835 */
39037602 13836 if (src_entry->used_for_jit == TRUE && !same_map) {
316670eb
A
13837 result = KERN_INVALID_ARGUMENT;
13838 break;
13839 }
91447636
A
13840 src_entry->is_shared = TRUE;
13841 new_entry->is_shared = TRUE;
13842 if (!(new_entry->is_sub_map))
13843 new_entry->needs_copy = FALSE;
1c79356b 13844
91447636
A
13845 } else if (src_entry->is_sub_map) {
13846 /* make this a COW sub_map if not already */
3e170ce0 13847 assert(new_entry->wired_count == 0);
91447636
A
13848 new_entry->needs_copy = TRUE;
13849 object = VM_OBJECT_NULL;
13850 } else if (src_entry->wired_count == 0 &&
3e170ce0
A
13851 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13852 VME_OFFSET(new_entry),
2d21ac55
A
13853 (new_entry->vme_end -
13854 new_entry->vme_start),
13855 &src_needs_copy,
13856 &new_entry_needs_copy)) {
55e303ae 13857
91447636
A
13858 new_entry->needs_copy = new_entry_needs_copy;
13859 new_entry->is_shared = FALSE;
1c79356b 13860
91447636
A
13861 /*
13862 * Handle copy_on_write semantics.
13863 */
13864 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
13865 vm_prot_t prot;
13866
13867 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13868
3e170ce0
A
13869 if (override_nx(map,
13870 VME_ALIAS(src_entry))
13871 && prot)
0c530ab8 13872 prot |= VM_PROT_EXECUTE;
2d21ac55 13873
91447636
A
13874 vm_object_pmap_protect(object,
13875 offset,
13876 entry_size,
13877 ((src_entry->is_shared
316670eb 13878 || map->mapped_in_other_pmaps) ?
91447636
A
13879 PMAP_NULL : map->pmap),
13880 src_entry->vme_start,
0c530ab8 13881 prot);
1c79356b 13882
3e170ce0 13883 assert(src_entry->wired_count == 0);
91447636
A
13884 src_entry->needs_copy = TRUE;
13885 }
13886 /*
13887 * Throw away the old object reference of the new entry.
13888 */
13889 vm_object_deallocate(object);
1c79356b 13890
91447636
A
13891 } else {
13892 new_entry->is_shared = FALSE;
1c79356b 13893
91447636
A
13894 /*
13895 * The map can be safely unlocked since we
13896 * already hold a reference on the object.
13897 *
13898 * Record the timestamp of the map for later
13899 * verification, and unlock the map.
13900 */
13901 version.main_timestamp = map->timestamp;
13902 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 13903
91447636
A
13904 /*
13905 * Perform the copy.
13906 */
13907 if (src_entry->wired_count > 0) {
13908 vm_object_lock(object);
13909 result = vm_object_copy_slowly(
2d21ac55
A
13910 object,
13911 offset,
13912 entry_size,
13913 THREAD_UNINT,
3e170ce0 13914 &VME_OBJECT(new_entry));
1c79356b 13915
3e170ce0 13916 VME_OFFSET_SET(new_entry, 0);
91447636
A
13917 new_entry->needs_copy = FALSE;
13918 } else {
3e170ce0
A
13919 vm_object_offset_t new_offset;
13920
13921 new_offset = VME_OFFSET(new_entry);
91447636 13922 result = vm_object_copy_strategically(
2d21ac55
A
13923 object,
13924 offset,
13925 entry_size,
3e170ce0
A
13926 &VME_OBJECT(new_entry),
13927 &new_offset,
2d21ac55 13928 &new_entry_needs_copy);
3e170ce0
A
13929 if (new_offset != VME_OFFSET(new_entry)) {
13930 VME_OFFSET_SET(new_entry, new_offset);
13931 }
1c79356b 13932
91447636
A
13933 new_entry->needs_copy = new_entry_needs_copy;
13934 }
1c79356b 13935
91447636
A
13936 /*
13937 * Throw away the old object reference of the new entry.
13938 */
13939 vm_object_deallocate(object);
1c79356b 13940
91447636
A
13941 if (result != KERN_SUCCESS &&
13942 result != KERN_MEMORY_RESTART_COPY) {
13943 _vm_map_entry_dispose(map_header, new_entry);
39037602 13944 vm_map_lock(map);
91447636
A
13945 break;
13946 }
1c79356b 13947
91447636
A
13948 /*
13949 * Verify that the map has not substantially
13950 * changed while the copy was being made.
13951 */
1c79356b 13952
91447636
A
13953 vm_map_lock(map);
13954 if (version.main_timestamp + 1 != map->timestamp) {
13955 /*
13956 * Simple version comparison failed.
13957 *
13958 * Retry the lookup and verify that the
13959 * same object/offset are still present.
13960 */
3e170ce0 13961 vm_object_deallocate(VME_OBJECT(new_entry));
91447636
A
13962 _vm_map_entry_dispose(map_header, new_entry);
13963 if (result == KERN_MEMORY_RESTART_COPY)
13964 result = KERN_SUCCESS;
13965 continue;
13966 }
1c79356b 13967
91447636
A
13968 if (result == KERN_MEMORY_RESTART_COPY) {
13969 vm_object_reference(object);
13970 goto RestartCopy;
13971 }
13972 }
1c79356b 13973
6d2010ae 13974 _vm_map_store_entry_link(map_header,
91447636 13975 map_header->links.prev, new_entry);
1c79356b 13976
6d2010ae
A
13977 /*Protections for submap mapping are irrelevant here*/
13978 if( !src_entry->is_sub_map ) {
13979 *cur_protection &= src_entry->protection;
13980 *max_protection &= src_entry->max_protection;
13981 }
91447636
A
13982 map_address += tmp_size;
13983 mapped_size += tmp_size;
13984 src_start += tmp_size;
1c79356b 13985
91447636 13986 } /* end while */
1c79356b 13987
91447636
A
13988 vm_map_unlock(map);
13989 if (result != KERN_SUCCESS) {
13990 /*
13991 * Free all allocated elements.
13992 */
13993 for (src_entry = map_header->links.next;
13994 src_entry != (struct vm_map_entry *)&map_header->links;
13995 src_entry = new_entry) {
13996 new_entry = src_entry->vme_next;
6d2010ae 13997 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 13998 if (src_entry->is_sub_map) {
3e170ce0 13999 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 14000 } else {
3e170ce0 14001 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 14002 }
91447636
A
14003 _vm_map_entry_dispose(map_header, src_entry);
14004 }
14005 }
14006 return result;
1c79356b
A
14007}
14008
14009/*
91447636 14010 * Routine: vm_remap
1c79356b 14011 *
91447636
A
14012 * Map portion of a task's address space.
14013 * Mapped region must not overlap more than
14014 * one vm memory object. Protections and
14015 * inheritance attributes remain the same
14016 * as in the original task and are out parameters.
14017 * Source and Target task can be identical
14018 * Other attributes are identical as for vm_map()
1c79356b
A
14019 */
14020kern_return_t
91447636
A
14021vm_map_remap(
14022 vm_map_t target_map,
14023 vm_map_address_t *address,
14024 vm_map_size_t size,
14025 vm_map_offset_t mask,
060df5ea 14026 int flags,
91447636
A
14027 vm_map_t src_map,
14028 vm_map_offset_t memory_address,
1c79356b 14029 boolean_t copy,
1c79356b
A
14030 vm_prot_t *cur_protection,
14031 vm_prot_t *max_protection,
91447636 14032 vm_inherit_t inheritance)
1c79356b
A
14033{
14034 kern_return_t result;
91447636 14035 vm_map_entry_t entry;
0c530ab8 14036 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 14037 vm_map_entry_t new_entry;
91447636 14038 struct vm_map_header map_header;
39236c6e 14039 vm_map_offset_t offset_in_mapping;
1c79356b 14040
91447636
A
14041 if (target_map == VM_MAP_NULL)
14042 return KERN_INVALID_ARGUMENT;
1c79356b 14043
91447636 14044 switch (inheritance) {
2d21ac55
A
14045 case VM_INHERIT_NONE:
14046 case VM_INHERIT_COPY:
14047 case VM_INHERIT_SHARE:
91447636
A
14048 if (size != 0 && src_map != VM_MAP_NULL)
14049 break;
14050 /*FALL THRU*/
2d21ac55 14051 default:
91447636
A
14052 return KERN_INVALID_ARGUMENT;
14053 }
1c79356b 14054
39236c6e
A
14055 /*
14056 * If the user is requesting that we return the address of the
14057 * first byte of the data (rather than the base of the page),
14058 * then we use different rounding semantics: specifically,
14059 * we assume that (memory_address, size) describes a region
14060 * all of whose pages we must cover, rather than a base to be truncated
14061 * down and a size to be added to that base. So we figure out
14062 * the highest page that the requested region includes and make
14063 * sure that the size will cover it.
14064 *
14065 * The key example we're worried about it is of the form:
14066 *
14067 * memory_address = 0x1ff0, size = 0x20
14068 *
14069 * With the old semantics, we round down the memory_address to 0x1000
14070 * and round up the size to 0x1000, resulting in our covering *only*
14071 * page 0x1000. With the new semantics, we'd realize that the region covers
14072 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
14073 * 0x1000 and page 0x2000 in the region we remap.
14074 */
14075 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14076 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
14077 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
14078 } else {
14079 size = vm_map_round_page(size, PAGE_MASK);
14080 }
1c79356b 14081
91447636 14082 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
14083 size, copy, &map_header,
14084 cur_protection,
14085 max_protection,
14086 inheritance,
39037602
A
14087 target_map->hdr.entries_pageable,
14088 src_map == target_map);
1c79356b 14089
91447636
A
14090 if (result != KERN_SUCCESS) {
14091 return result;
14092 }
1c79356b 14093
91447636
A
14094 /*
14095 * Allocate/check a range of free virtual address
14096 * space for the target
1c79356b 14097 */
39236c6e
A
14098 *address = vm_map_trunc_page(*address,
14099 VM_MAP_PAGE_MASK(target_map));
91447636
A
14100 vm_map_lock(target_map);
14101 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 14102 mask, flags, &insp_entry);
1c79356b 14103
91447636
A
14104 for (entry = map_header.links.next;
14105 entry != (struct vm_map_entry *)&map_header.links;
14106 entry = new_entry) {
14107 new_entry = entry->vme_next;
6d2010ae 14108 _vm_map_store_entry_unlink(&map_header, entry);
91447636 14109 if (result == KERN_SUCCESS) {
3e170ce0
A
14110 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14111 /* no codesigning -> read-only access */
14112 assert(!entry->used_for_jit);
14113 entry->max_protection = VM_PROT_READ;
14114 entry->protection = VM_PROT_READ;
14115 entry->vme_resilient_codesign = TRUE;
14116 }
91447636
A
14117 entry->vme_start += *address;
14118 entry->vme_end += *address;
39236c6e 14119 assert(!entry->map_aligned);
6d2010ae 14120 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
14121 insp_entry = entry;
14122 } else {
14123 if (!entry->is_sub_map) {
3e170ce0 14124 vm_object_deallocate(VME_OBJECT(entry));
91447636 14125 } else {
3e170ce0 14126 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 14127 }
91447636 14128 _vm_map_entry_dispose(&map_header, entry);
1c79356b 14129 }
91447636 14130 }
1c79356b 14131
3e170ce0
A
14132 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14133 *cur_protection = VM_PROT_READ;
14134 *max_protection = VM_PROT_READ;
14135 }
14136
6d2010ae 14137 if( target_map->disable_vmentry_reuse == TRUE) {
39037602 14138 assert(!target_map->is_nested_map);
6d2010ae
A
14139 if( target_map->highest_entry_end < insp_entry->vme_end ){
14140 target_map->highest_entry_end = insp_entry->vme_end;
14141 }
14142 }
14143
91447636
A
14144 if (result == KERN_SUCCESS) {
14145 target_map->size += size;
0c530ab8 14146 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
14147 }
14148 vm_map_unlock(target_map);
1c79356b 14149
91447636
A
14150 if (result == KERN_SUCCESS && target_map->wiring_required)
14151 result = vm_map_wire(target_map, *address,
3e170ce0
A
14152 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
14153 TRUE);
39236c6e
A
14154
14155 /*
14156 * If requested, return the address of the data pointed to by the
14157 * request, rather than the base of the resulting page.
14158 */
14159 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14160 *address += offset_in_mapping;
14161 }
14162
91447636
A
14163 return result;
14164}
1c79356b 14165
91447636
A
14166/*
14167 * Routine: vm_map_remap_range_allocate
14168 *
14169 * Description:
14170 * Allocate a range in the specified virtual address map.
14171 * returns the address and the map entry just before the allocated
14172 * range
14173 *
14174 * Map must be locked.
14175 */
1c79356b 14176
91447636
A
14177static kern_return_t
14178vm_map_remap_range_allocate(
14179 vm_map_t map,
14180 vm_map_address_t *address, /* IN/OUT */
14181 vm_map_size_t size,
14182 vm_map_offset_t mask,
060df5ea 14183 int flags,
91447636
A
14184 vm_map_entry_t *map_entry) /* OUT */
14185{
060df5ea
A
14186 vm_map_entry_t entry;
14187 vm_map_offset_t start;
14188 vm_map_offset_t end;
14189 kern_return_t kr;
3e170ce0 14190 vm_map_entry_t hole_entry;
1c79356b 14191
2d21ac55 14192StartAgain: ;
1c79356b 14193
2d21ac55 14194 start = *address;
1c79356b 14195
060df5ea 14196 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55 14197 {
39037602
A
14198 if (flags & VM_FLAGS_RANDOM_ADDR)
14199 {
14200 /*
14201 * Get a random start address.
14202 */
14203 kr = vm_map_random_address_for_size(map, address, size);
14204 if (kr != KERN_SUCCESS) {
14205 return(kr);
14206 }
14207 start = *address;
14208 }
14209
2d21ac55
A
14210 /*
14211 * Calculate the first possible address.
14212 */
1c79356b 14213
2d21ac55
A
14214 if (start < map->min_offset)
14215 start = map->min_offset;
14216 if (start > map->max_offset)
14217 return(KERN_NO_SPACE);
91447636 14218
2d21ac55
A
14219 /*
14220 * Look for the first possible address;
14221 * if there's already something at this
14222 * address, we have to start after it.
14223 */
1c79356b 14224
6d2010ae
A
14225 if( map->disable_vmentry_reuse == TRUE) {
14226 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 14227 } else {
3e170ce0
A
14228
14229 if (map->holelistenabled) {
14230 hole_entry = (vm_map_entry_t)map->holes_list;
14231
14232 if (hole_entry == NULL) {
14233 /*
14234 * No more space in the map?
14235 */
14236 return(KERN_NO_SPACE);
14237 } else {
14238
14239 boolean_t found_hole = FALSE;
14240
14241 do {
14242 if (hole_entry->vme_start >= start) {
14243 start = hole_entry->vme_start;
14244 found_hole = TRUE;
14245 break;
14246 }
14247
14248 if (hole_entry->vme_end > start) {
14249 found_hole = TRUE;
14250 break;
14251 }
14252 hole_entry = hole_entry->vme_next;
14253
14254 } while (hole_entry != (vm_map_entry_t) map->holes_list);
14255
14256 if (found_hole == FALSE) {
14257 return (KERN_NO_SPACE);
14258 }
14259
14260 entry = hole_entry;
14261 }
6d2010ae 14262 } else {
3e170ce0
A
14263 assert(first_free_is_valid(map));
14264 if (start == map->min_offset) {
14265 if ((entry = map->first_free) != vm_map_to_entry(map))
14266 start = entry->vme_end;
14267 } else {
14268 vm_map_entry_t tmp_entry;
14269 if (vm_map_lookup_entry(map, start, &tmp_entry))
14270 start = tmp_entry->vme_end;
14271 entry = tmp_entry;
14272 }
6d2010ae 14273 }
39236c6e
A
14274 start = vm_map_round_page(start,
14275 VM_MAP_PAGE_MASK(map));
2d21ac55 14276 }
91447636 14277
2d21ac55
A
14278 /*
14279 * In any case, the "entry" always precedes
14280 * the proposed new region throughout the
14281 * loop:
14282 */
1c79356b 14283
2d21ac55 14284 while (TRUE) {
39037602 14285 vm_map_entry_t next;
2d21ac55
A
14286
14287 /*
14288 * Find the end of the proposed new region.
14289 * Be sure we didn't go beyond the end, or
14290 * wrap around the address.
14291 */
14292
14293 end = ((start + mask) & ~mask);
39236c6e
A
14294 end = vm_map_round_page(end,
14295 VM_MAP_PAGE_MASK(map));
2d21ac55
A
14296 if (end < start)
14297 return(KERN_NO_SPACE);
14298 start = end;
14299 end += size;
14300
14301 if ((end > map->max_offset) || (end < start)) {
14302 if (map->wait_for_space) {
14303 if (size <= (map->max_offset -
14304 map->min_offset)) {
14305 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
14306 vm_map_unlock(map);
14307 thread_block(THREAD_CONTINUE_NULL);
14308 vm_map_lock(map);
14309 goto StartAgain;
14310 }
14311 }
91447636 14312
2d21ac55
A
14313 return(KERN_NO_SPACE);
14314 }
1c79356b 14315
2d21ac55 14316 next = entry->vme_next;
1c79356b 14317
3e170ce0
A
14318 if (map->holelistenabled) {
14319 if (entry->vme_end >= end)
14320 break;
14321 } else {
14322 /*
14323 * If there are no more entries, we must win.
14324 *
14325 * OR
14326 *
14327 * If there is another entry, it must be
14328 * after the end of the potential new region.
14329 */
1c79356b 14330
3e170ce0
A
14331 if (next == vm_map_to_entry(map))
14332 break;
14333
14334 if (next->vme_start >= end)
14335 break;
14336 }
1c79356b 14337
2d21ac55
A
14338 /*
14339 * Didn't fit -- move to the next entry.
14340 */
1c79356b 14341
2d21ac55 14342 entry = next;
3e170ce0
A
14343
14344 if (map->holelistenabled) {
14345 if (entry == (vm_map_entry_t) map->holes_list) {
14346 /*
14347 * Wrapped around
14348 */
14349 return(KERN_NO_SPACE);
14350 }
14351 start = entry->vme_start;
14352 } else {
14353 start = entry->vme_end;
14354 }
14355 }
14356
14357 if (map->holelistenabled) {
14358
14359 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
14360 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
14361 }
2d21ac55 14362 }
3e170ce0 14363
2d21ac55 14364 *address = start;
3e170ce0 14365
2d21ac55
A
14366 } else {
14367 vm_map_entry_t temp_entry;
91447636 14368
2d21ac55
A
14369 /*
14370 * Verify that:
14371 * the address doesn't itself violate
14372 * the mask requirement.
14373 */
1c79356b 14374
2d21ac55
A
14375 if ((start & mask) != 0)
14376 return(KERN_NO_SPACE);
1c79356b 14377
1c79356b 14378
2d21ac55
A
14379 /*
14380 * ... the address is within bounds
14381 */
1c79356b 14382
2d21ac55 14383 end = start + size;
1c79356b 14384
2d21ac55
A
14385 if ((start < map->min_offset) ||
14386 (end > map->max_offset) ||
14387 (start >= end)) {
14388 return(KERN_INVALID_ADDRESS);
14389 }
1c79356b 14390
060df5ea
A
14391 /*
14392 * If we're asked to overwrite whatever was mapped in that
14393 * range, first deallocate that range.
14394 */
14395 if (flags & VM_FLAGS_OVERWRITE) {
14396 vm_map_t zap_map;
14397
14398 /*
14399 * We use a "zap_map" to avoid having to unlock
14400 * the "map" in vm_map_delete(), which would compromise
14401 * the atomicity of the "deallocate" and then "remap"
14402 * combination.
14403 */
14404 zap_map = vm_map_create(PMAP_NULL,
14405 start,
316670eb 14406 end,
060df5ea
A
14407 map->hdr.entries_pageable);
14408 if (zap_map == VM_MAP_NULL) {
14409 return KERN_RESOURCE_SHORTAGE;
14410 }
39236c6e 14411 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 14412 vm_map_disable_hole_optimization(zap_map);
060df5ea
A
14413
14414 kr = vm_map_delete(map, start, end,
fe8ab488
A
14415 (VM_MAP_REMOVE_SAVE_ENTRIES |
14416 VM_MAP_REMOVE_NO_MAP_ALIGN),
060df5ea
A
14417 zap_map);
14418 if (kr == KERN_SUCCESS) {
14419 vm_map_destroy(zap_map,
14420 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14421 zap_map = VM_MAP_NULL;
14422 }
14423 }
14424
2d21ac55
A
14425 /*
14426 * ... the starting address isn't allocated
14427 */
91447636 14428
2d21ac55
A
14429 if (vm_map_lookup_entry(map, start, &temp_entry))
14430 return(KERN_NO_SPACE);
91447636 14431
2d21ac55 14432 entry = temp_entry;
91447636 14433
2d21ac55
A
14434 /*
14435 * ... the next region doesn't overlap the
14436 * end point.
14437 */
1c79356b 14438
2d21ac55
A
14439 if ((entry->vme_next != vm_map_to_entry(map)) &&
14440 (entry->vme_next->vme_start < end))
14441 return(KERN_NO_SPACE);
14442 }
14443 *map_entry = entry;
14444 return(KERN_SUCCESS);
91447636 14445}
1c79356b 14446
91447636
A
14447/*
14448 * vm_map_switch:
14449 *
14450 * Set the address map for the current thread to the specified map
14451 */
1c79356b 14452
91447636
A
14453vm_map_t
14454vm_map_switch(
14455 vm_map_t map)
14456{
14457 int mycpu;
14458 thread_t thread = current_thread();
14459 vm_map_t oldmap = thread->map;
1c79356b 14460
91447636
A
14461 mp_disable_preemption();
14462 mycpu = cpu_number();
1c79356b 14463
91447636
A
14464 /*
14465 * Deactivate the current map and activate the requested map
14466 */
14467 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 14468
91447636
A
14469 mp_enable_preemption();
14470 return(oldmap);
14471}
1c79356b 14472
1c79356b 14473
91447636
A
14474/*
14475 * Routine: vm_map_write_user
14476 *
14477 * Description:
14478 * Copy out data from a kernel space into space in the
14479 * destination map. The space must already exist in the
14480 * destination map.
14481 * NOTE: This routine should only be called by threads
14482 * which can block on a page fault. i.e. kernel mode user
14483 * threads.
14484 *
14485 */
14486kern_return_t
14487vm_map_write_user(
14488 vm_map_t map,
14489 void *src_p,
14490 vm_map_address_t dst_addr,
14491 vm_size_t size)
14492{
14493 kern_return_t kr = KERN_SUCCESS;
1c79356b 14494
91447636
A
14495 if(current_map() == map) {
14496 if (copyout(src_p, dst_addr, size)) {
14497 kr = KERN_INVALID_ADDRESS;
14498 }
14499 } else {
14500 vm_map_t oldmap;
1c79356b 14501
91447636
A
14502 /* take on the identity of the target map while doing */
14503 /* the transfer */
1c79356b 14504
91447636
A
14505 vm_map_reference(map);
14506 oldmap = vm_map_switch(map);
14507 if (copyout(src_p, dst_addr, size)) {
14508 kr = KERN_INVALID_ADDRESS;
1c79356b 14509 }
91447636
A
14510 vm_map_switch(oldmap);
14511 vm_map_deallocate(map);
1c79356b 14512 }
91447636 14513 return kr;
1c79356b
A
14514}
14515
14516/*
91447636
A
14517 * Routine: vm_map_read_user
14518 *
14519 * Description:
14520 * Copy in data from a user space source map into the
14521 * kernel map. The space must already exist in the
14522 * kernel map.
14523 * NOTE: This routine should only be called by threads
14524 * which can block on a page fault. i.e. kernel mode user
14525 * threads.
1c79356b 14526 *
1c79356b
A
14527 */
14528kern_return_t
91447636
A
14529vm_map_read_user(
14530 vm_map_t map,
14531 vm_map_address_t src_addr,
14532 void *dst_p,
14533 vm_size_t size)
1c79356b 14534{
91447636 14535 kern_return_t kr = KERN_SUCCESS;
1c79356b 14536
91447636
A
14537 if(current_map() == map) {
14538 if (copyin(src_addr, dst_p, size)) {
14539 kr = KERN_INVALID_ADDRESS;
14540 }
14541 } else {
14542 vm_map_t oldmap;
1c79356b 14543
91447636
A
14544 /* take on the identity of the target map while doing */
14545 /* the transfer */
14546
14547 vm_map_reference(map);
14548 oldmap = vm_map_switch(map);
14549 if (copyin(src_addr, dst_p, size)) {
14550 kr = KERN_INVALID_ADDRESS;
14551 }
14552 vm_map_switch(oldmap);
14553 vm_map_deallocate(map);
1c79356b 14554 }
91447636
A
14555 return kr;
14556}
14557
1c79356b 14558
91447636
A
14559/*
14560 * vm_map_check_protection:
14561 *
14562 * Assert that the target map allows the specified
14563 * privilege on the entire address region given.
14564 * The entire region must be allocated.
14565 */
2d21ac55
A
14566boolean_t
14567vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14568 vm_map_offset_t end, vm_prot_t protection)
91447636 14569{
2d21ac55
A
14570 vm_map_entry_t entry;
14571 vm_map_entry_t tmp_entry;
1c79356b 14572
91447636 14573 vm_map_lock(map);
1c79356b 14574
2d21ac55 14575 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 14576 {
2d21ac55
A
14577 vm_map_unlock(map);
14578 return (FALSE);
1c79356b
A
14579 }
14580
91447636
A
14581 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14582 vm_map_unlock(map);
14583 return(FALSE);
14584 }
1c79356b 14585
91447636
A
14586 entry = tmp_entry;
14587
14588 while (start < end) {
14589 if (entry == vm_map_to_entry(map)) {
14590 vm_map_unlock(map);
14591 return(FALSE);
1c79356b 14592 }
1c79356b 14593
91447636
A
14594 /*
14595 * No holes allowed!
14596 */
1c79356b 14597
91447636
A
14598 if (start < entry->vme_start) {
14599 vm_map_unlock(map);
14600 return(FALSE);
14601 }
14602
14603 /*
14604 * Check protection associated with entry.
14605 */
14606
14607 if ((entry->protection & protection) != protection) {
14608 vm_map_unlock(map);
14609 return(FALSE);
14610 }
14611
14612 /* go to next entry */
14613
14614 start = entry->vme_end;
14615 entry = entry->vme_next;
14616 }
14617 vm_map_unlock(map);
14618 return(TRUE);
1c79356b
A
14619}
14620
1c79356b 14621kern_return_t
91447636
A
14622vm_map_purgable_control(
14623 vm_map_t map,
14624 vm_map_offset_t address,
14625 vm_purgable_t control,
14626 int *state)
1c79356b 14627{
91447636
A
14628 vm_map_entry_t entry;
14629 vm_object_t object;
14630 kern_return_t kr;
fe8ab488 14631 boolean_t was_nonvolatile;
1c79356b 14632
1c79356b 14633 /*
91447636
A
14634 * Vet all the input parameters and current type and state of the
14635 * underlaying object. Return with an error if anything is amiss.
1c79356b 14636 */
91447636
A
14637 if (map == VM_MAP_NULL)
14638 return(KERN_INVALID_ARGUMENT);
1c79356b 14639
91447636 14640 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
14641 control != VM_PURGABLE_GET_STATE &&
14642 control != VM_PURGABLE_PURGE_ALL)
91447636 14643 return(KERN_INVALID_ARGUMENT);
1c79356b 14644
b0d623f7
A
14645 if (control == VM_PURGABLE_PURGE_ALL) {
14646 vm_purgeable_object_purge_all();
14647 return KERN_SUCCESS;
14648 }
14649
91447636 14650 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 14651 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 14652 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
14653 return(KERN_INVALID_ARGUMENT);
14654
b0d623f7 14655 vm_map_lock_read(map);
91447636
A
14656
14657 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14658
14659 /*
14660 * Must pass a valid non-submap address.
14661 */
b0d623f7 14662 vm_map_unlock_read(map);
91447636
A
14663 return(KERN_INVALID_ADDRESS);
14664 }
14665
14666 if ((entry->protection & VM_PROT_WRITE) == 0) {
14667 /*
14668 * Can't apply purgable controls to something you can't write.
14669 */
b0d623f7 14670 vm_map_unlock_read(map);
91447636
A
14671 return(KERN_PROTECTION_FAILURE);
14672 }
14673
3e170ce0 14674 object = VME_OBJECT(entry);
fe8ab488
A
14675 if (object == VM_OBJECT_NULL ||
14676 object->purgable == VM_PURGABLE_DENY) {
91447636 14677 /*
fe8ab488 14678 * Object must already be present and be purgeable.
91447636 14679 */
b0d623f7 14680 vm_map_unlock_read(map);
91447636
A
14681 return KERN_INVALID_ARGUMENT;
14682 }
14683
14684 vm_object_lock(object);
14685
39236c6e 14686#if 00
3e170ce0 14687 if (VME_OFFSET(entry) != 0 ||
6d2010ae 14688 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
14689 /*
14690 * Can only apply purgable controls to the whole (existing)
14691 * object at once.
14692 */
b0d623f7 14693 vm_map_unlock_read(map);
91447636
A
14694 vm_object_unlock(object);
14695 return KERN_INVALID_ARGUMENT;
1c79356b 14696 }
39236c6e 14697#endif
fe8ab488
A
14698
14699 assert(!entry->is_sub_map);
14700 assert(!entry->use_pmap); /* purgeable has its own accounting */
14701
b0d623f7 14702 vm_map_unlock_read(map);
1c79356b 14703
fe8ab488
A
14704 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14705
91447636 14706 kr = vm_object_purgable_control(object, control, state);
1c79356b 14707
fe8ab488
A
14708 if (was_nonvolatile &&
14709 object->purgable != VM_PURGABLE_NONVOLATILE &&
14710 map->pmap == kernel_pmap) {
14711#if DEBUG
14712 object->vo_purgeable_volatilizer = kernel_task;
14713#endif /* DEBUG */
14714 }
14715
91447636 14716 vm_object_unlock(object);
1c79356b 14717
91447636
A
14718 return kr;
14719}
1c79356b 14720
91447636 14721kern_return_t
b0d623f7 14722vm_map_page_query_internal(
2d21ac55 14723 vm_map_t target_map,
91447636 14724 vm_map_offset_t offset,
2d21ac55
A
14725 int *disposition,
14726 int *ref_count)
91447636 14727{
b0d623f7
A
14728 kern_return_t kr;
14729 vm_page_info_basic_data_t info;
14730 mach_msg_type_number_t count;
14731
14732 count = VM_PAGE_INFO_BASIC_COUNT;
14733 kr = vm_map_page_info(target_map,
14734 offset,
14735 VM_PAGE_INFO_BASIC,
14736 (vm_page_info_t) &info,
14737 &count);
14738 if (kr == KERN_SUCCESS) {
14739 *disposition = info.disposition;
14740 *ref_count = info.ref_count;
14741 } else {
14742 *disposition = 0;
14743 *ref_count = 0;
14744 }
2d21ac55 14745
b0d623f7
A
14746 return kr;
14747}
14748
14749kern_return_t
14750vm_map_page_info(
14751 vm_map_t map,
14752 vm_map_offset_t offset,
14753 vm_page_info_flavor_t flavor,
14754 vm_page_info_t info,
14755 mach_msg_type_number_t *count)
14756{
14757 vm_map_entry_t map_entry;
14758 vm_object_t object;
14759 vm_page_t m;
b0d623f7
A
14760 kern_return_t retval = KERN_SUCCESS;
14761 boolean_t top_object;
14762 int disposition;
14763 int ref_count;
b0d623f7
A
14764 vm_page_info_basic_t basic_info;
14765 int depth;
6d2010ae 14766 vm_map_offset_t offset_in_page;
2d21ac55 14767
b0d623f7
A
14768 switch (flavor) {
14769 case VM_PAGE_INFO_BASIC:
14770 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
14771 /*
14772 * The "vm_page_info_basic_data" structure was not
14773 * properly padded, so allow the size to be off by
14774 * one to maintain backwards binary compatibility...
14775 */
14776 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14777 return KERN_INVALID_ARGUMENT;
b0d623f7
A
14778 }
14779 break;
14780 default:
14781 return KERN_INVALID_ARGUMENT;
91447636 14782 }
2d21ac55 14783
b0d623f7
A
14784 disposition = 0;
14785 ref_count = 0;
b0d623f7
A
14786 top_object = TRUE;
14787 depth = 0;
14788
14789 retval = KERN_SUCCESS;
6d2010ae 14790 offset_in_page = offset & PAGE_MASK;
39236c6e 14791 offset = vm_map_trunc_page(offset, PAGE_MASK);
b0d623f7
A
14792
14793 vm_map_lock_read(map);
14794
14795 /*
14796 * First, find the map entry covering "offset", going down
14797 * submaps if necessary.
14798 */
14799 for (;;) {
14800 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14801 vm_map_unlock_read(map);
14802 return KERN_INVALID_ADDRESS;
14803 }
14804 /* compute offset from this map entry's start */
14805 offset -= map_entry->vme_start;
14806 /* compute offset into this map entry's object (or submap) */
3e170ce0 14807 offset += VME_OFFSET(map_entry);
b0d623f7
A
14808
14809 if (map_entry->is_sub_map) {
14810 vm_map_t sub_map;
2d21ac55 14811
3e170ce0 14812 sub_map = VME_SUBMAP(map_entry);
2d21ac55 14813 vm_map_lock_read(sub_map);
b0d623f7 14814 vm_map_unlock_read(map);
2d21ac55 14815
b0d623f7
A
14816 map = sub_map;
14817
14818 ref_count = MAX(ref_count, map->ref_count);
14819 continue;
1c79356b 14820 }
b0d623f7 14821 break;
91447636 14822 }
b0d623f7 14823
3e170ce0 14824 object = VME_OBJECT(map_entry);
b0d623f7
A
14825 if (object == VM_OBJECT_NULL) {
14826 /* no object -> no page */
14827 vm_map_unlock_read(map);
14828 goto done;
14829 }
14830
91447636 14831 vm_object_lock(object);
b0d623f7
A
14832 vm_map_unlock_read(map);
14833
14834 /*
14835 * Go down the VM object shadow chain until we find the page
14836 * we're looking for.
14837 */
14838 for (;;) {
14839 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 14840
91447636 14841 m = vm_page_lookup(object, offset);
2d21ac55 14842
91447636 14843 if (m != VM_PAGE_NULL) {
b0d623f7 14844 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
14845 break;
14846 } else {
39236c6e
A
14847 if (object->internal &&
14848 object->alive &&
14849 !object->terminating &&
14850 object->pager_ready) {
14851
39037602
A
14852 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14853 == VM_EXTERNAL_STATE_EXISTS) {
14854 /* the pager has that page */
14855 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14856 break;
2d21ac55
A
14857 }
14858 }
b0d623f7 14859
2d21ac55
A
14860 if (object->shadow != VM_OBJECT_NULL) {
14861 vm_object_t shadow;
14862
6d2010ae 14863 offset += object->vo_shadow_offset;
2d21ac55
A
14864 shadow = object->shadow;
14865
14866 vm_object_lock(shadow);
14867 vm_object_unlock(object);
14868
14869 object = shadow;
14870 top_object = FALSE;
b0d623f7 14871 depth++;
2d21ac55 14872 } else {
b0d623f7
A
14873// if (!object->internal)
14874// break;
14875// retval = KERN_FAILURE;
14876// goto done_with_object;
14877 break;
91447636 14878 }
91447636
A
14879 }
14880 }
91447636
A
14881 /* The ref_count is not strictly accurate, it measures the number */
14882 /* of entities holding a ref on the object, they may not be mapping */
14883 /* the object or may not be mapping the section holding the */
14884 /* target page but its still a ball park number and though an over- */
14885 /* count, it picks up the copy-on-write cases */
1c79356b 14886
91447636
A
14887 /* We could also get a picture of page sharing from pmap_attributes */
14888 /* but this would under count as only faulted-in mappings would */
14889 /* show up. */
1c79356b 14890
2d21ac55 14891 if (top_object == TRUE && object->shadow)
b0d623f7
A
14892 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14893
14894 if (! object->internal)
14895 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
14896
14897 if (m == VM_PAGE_NULL)
b0d623f7 14898 goto done_with_object;
2d21ac55 14899
91447636 14900 if (m->fictitious) {
b0d623f7
A
14901 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14902 goto done_with_object;
91447636 14903 }
39037602 14904 if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14905 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 14906
39037602 14907 if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14908 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 14909
39037602 14910 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
b0d623f7 14911 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 14912
593a1d5f 14913 if (m->cs_validated)
b0d623f7 14914 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 14915 if (m->cs_tainted)
b0d623f7 14916 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
c18c124e
A
14917 if (m->cs_nx)
14918 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
593a1d5f 14919
b0d623f7 14920done_with_object:
2d21ac55 14921 vm_object_unlock(object);
b0d623f7
A
14922done:
14923
14924 switch (flavor) {
14925 case VM_PAGE_INFO_BASIC:
14926 basic_info = (vm_page_info_basic_t) info;
14927 basic_info->disposition = disposition;
14928 basic_info->ref_count = ref_count;
39236c6e
A
14929 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14930 VM_KERNEL_ADDRPERM(object);
6d2010ae
A
14931 basic_info->offset =
14932 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
14933 basic_info->depth = depth;
14934 break;
14935 }
0c530ab8 14936
2d21ac55 14937 return retval;
91447636
A
14938}
14939
14940/*
14941 * vm_map_msync
14942 *
14943 * Synchronises the memory range specified with its backing store
14944 * image by either flushing or cleaning the contents to the appropriate
14945 * memory manager engaging in a memory object synchronize dialog with
14946 * the manager. The client doesn't return until the manager issues
14947 * m_o_s_completed message. MIG Magically converts user task parameter
14948 * to the task's address map.
14949 *
14950 * interpretation of sync_flags
14951 * VM_SYNC_INVALIDATE - discard pages, only return precious
14952 * pages to manager.
14953 *
14954 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14955 * - discard pages, write dirty or precious
14956 * pages back to memory manager.
14957 *
14958 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14959 * - write dirty or precious pages back to
14960 * the memory manager.
14961 *
14962 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
14963 * is a hole in the region, and we would
14964 * have returned KERN_SUCCESS, return
14965 * KERN_INVALID_ADDRESS instead.
14966 *
14967 * NOTE
14968 * The memory object attributes have not yet been implemented, this
14969 * function will have to deal with the invalidate attribute
14970 *
14971 * RETURNS
14972 * KERN_INVALID_TASK Bad task parameter
14973 * KERN_INVALID_ARGUMENT both sync and async were specified.
14974 * KERN_SUCCESS The usual.
14975 * KERN_INVALID_ADDRESS There was a hole in the region.
14976 */
14977
14978kern_return_t
14979vm_map_msync(
14980 vm_map_t map,
14981 vm_map_address_t address,
14982 vm_map_size_t size,
14983 vm_sync_t sync_flags)
14984{
14985 msync_req_t msr;
14986 msync_req_t new_msr;
14987 queue_chain_t req_q; /* queue of requests for this msync */
14988 vm_map_entry_t entry;
14989 vm_map_size_t amount_left;
14990 vm_object_offset_t offset;
14991 boolean_t do_sync_req;
91447636 14992 boolean_t had_hole = FALSE;
2d21ac55 14993 memory_object_t pager;
3e170ce0 14994 vm_map_offset_t pmap_offset;
91447636
A
14995
14996 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14997 (sync_flags & VM_SYNC_SYNCHRONOUS))
14998 return(KERN_INVALID_ARGUMENT);
1c79356b
A
14999
15000 /*
91447636 15001 * align address and size on page boundaries
1c79356b 15002 */
39236c6e
A
15003 size = (vm_map_round_page(address + size,
15004 VM_MAP_PAGE_MASK(map)) -
15005 vm_map_trunc_page(address,
15006 VM_MAP_PAGE_MASK(map)));
15007 address = vm_map_trunc_page(address,
15008 VM_MAP_PAGE_MASK(map));
1c79356b 15009
91447636
A
15010 if (map == VM_MAP_NULL)
15011 return(KERN_INVALID_TASK);
1c79356b 15012
91447636
A
15013 if (size == 0)
15014 return(KERN_SUCCESS);
1c79356b 15015
91447636
A
15016 queue_init(&req_q);
15017 amount_left = size;
1c79356b 15018
91447636
A
15019 while (amount_left > 0) {
15020 vm_object_size_t flush_size;
15021 vm_object_t object;
1c79356b 15022
91447636
A
15023 vm_map_lock(map);
15024 if (!vm_map_lookup_entry(map,
3e170ce0 15025 address,
39236c6e 15026 &entry)) {
91447636 15027
2d21ac55 15028 vm_map_size_t skip;
91447636
A
15029
15030 /*
15031 * hole in the address map.
15032 */
15033 had_hole = TRUE;
15034
39037602
A
15035 if (sync_flags & VM_SYNC_KILLPAGES) {
15036 /*
15037 * For VM_SYNC_KILLPAGES, there should be
15038 * no holes in the range, since we couldn't
15039 * prevent someone else from allocating in
15040 * that hole and we wouldn't want to "kill"
15041 * their pages.
15042 */
15043 vm_map_unlock(map);
15044 break;
15045 }
15046
91447636
A
15047 /*
15048 * Check for empty map.
15049 */
15050 if (entry == vm_map_to_entry(map) &&
15051 entry->vme_next == entry) {
15052 vm_map_unlock(map);
15053 break;
15054 }
15055 /*
15056 * Check that we don't wrap and that
15057 * we have at least one real map entry.
15058 */
15059 if ((map->hdr.nentries == 0) ||
15060 (entry->vme_next->vme_start < address)) {
15061 vm_map_unlock(map);
15062 break;
15063 }
15064 /*
15065 * Move up to the next entry if needed
15066 */
15067 skip = (entry->vme_next->vme_start - address);
15068 if (skip >= amount_left)
15069 amount_left = 0;
15070 else
15071 amount_left -= skip;
15072 address = entry->vme_next->vme_start;
15073 vm_map_unlock(map);
15074 continue;
15075 }
1c79356b 15076
91447636 15077 offset = address - entry->vme_start;
3e170ce0 15078 pmap_offset = address;
1c79356b 15079
91447636
A
15080 /*
15081 * do we have more to flush than is contained in this
15082 * entry ?
15083 */
15084 if (amount_left + entry->vme_start + offset > entry->vme_end) {
15085 flush_size = entry->vme_end -
2d21ac55 15086 (entry->vme_start + offset);
91447636
A
15087 } else {
15088 flush_size = amount_left;
15089 }
15090 amount_left -= flush_size;
15091 address += flush_size;
1c79356b 15092
91447636
A
15093 if (entry->is_sub_map == TRUE) {
15094 vm_map_t local_map;
15095 vm_map_offset_t local_offset;
1c79356b 15096
3e170ce0
A
15097 local_map = VME_SUBMAP(entry);
15098 local_offset = VME_OFFSET(entry);
91447636
A
15099 vm_map_unlock(map);
15100 if (vm_map_msync(
2d21ac55
A
15101 local_map,
15102 local_offset,
15103 flush_size,
15104 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
15105 had_hole = TRUE;
15106 }
15107 continue;
15108 }
3e170ce0 15109 object = VME_OBJECT(entry);
1c79356b 15110
91447636
A
15111 /*
15112 * We can't sync this object if the object has not been
15113 * created yet
15114 */
15115 if (object == VM_OBJECT_NULL) {
15116 vm_map_unlock(map);
15117 continue;
15118 }
3e170ce0 15119 offset += VME_OFFSET(entry);
1c79356b 15120
91447636 15121 vm_object_lock(object);
1c79356b 15122
91447636 15123 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
15124 int kill_pages = 0;
15125 boolean_t reusable_pages = FALSE;
91447636
A
15126
15127 if (sync_flags & VM_SYNC_KILLPAGES) {
39037602
A
15128 if (((object->ref_count == 1) ||
15129 ((object->copy_strategy !=
15130 MEMORY_OBJECT_COPY_SYMMETRIC) &&
15131 (object->copy == VM_OBJECT_NULL))) &&
15132 (object->shadow == VM_OBJECT_NULL)) {
15133 if (object->ref_count != 1) {
15134 vm_page_stats_reusable.free_shared++;
15135 }
91447636 15136 kill_pages = 1;
39037602 15137 } else {
91447636 15138 kill_pages = -1;
39037602 15139 }
91447636
A
15140 }
15141 if (kill_pages != -1)
3e170ce0
A
15142 vm_object_deactivate_pages(
15143 object,
15144 offset,
15145 (vm_object_size_t) flush_size,
15146 kill_pages,
15147 reusable_pages,
15148 map->pmap,
15149 pmap_offset);
91447636
A
15150 vm_object_unlock(object);
15151 vm_map_unlock(map);
15152 continue;
1c79356b 15153 }
91447636
A
15154 /*
15155 * We can't sync this object if there isn't a pager.
15156 * Don't bother to sync internal objects, since there can't
15157 * be any "permanent" storage for these objects anyway.
15158 */
15159 if ((object->pager == MEMORY_OBJECT_NULL) ||
15160 (object->internal) || (object->private)) {
15161 vm_object_unlock(object);
15162 vm_map_unlock(map);
15163 continue;
15164 }
15165 /*
15166 * keep reference on the object until syncing is done
15167 */
2d21ac55 15168 vm_object_reference_locked(object);
91447636 15169 vm_object_unlock(object);
1c79356b 15170
91447636 15171 vm_map_unlock(map);
1c79356b 15172
91447636 15173 do_sync_req = vm_object_sync(object,
2d21ac55
A
15174 offset,
15175 flush_size,
15176 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
15177 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
15178 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 15179 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
15180 /*
15181 * only send a m_o_s if we returned pages or if the entry
15182 * is writable (ie dirty pages may have already been sent back)
15183 */
b0d623f7 15184 if (!do_sync_req) {
2d21ac55
A
15185 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
15186 /*
15187 * clear out the clustering and read-ahead hints
15188 */
15189 vm_object_lock(object);
15190
15191 object->pages_created = 0;
15192 object->pages_used = 0;
15193 object->sequential = 0;
15194 object->last_alloc = 0;
15195
15196 vm_object_unlock(object);
15197 }
91447636
A
15198 vm_object_deallocate(object);
15199 continue;
1c79356b 15200 }
91447636 15201 msync_req_alloc(new_msr);
1c79356b 15202
91447636
A
15203 vm_object_lock(object);
15204 offset += object->paging_offset;
1c79356b 15205
91447636
A
15206 new_msr->offset = offset;
15207 new_msr->length = flush_size;
15208 new_msr->object = object;
15209 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
15210 re_iterate:
15211
15212 /*
15213 * We can't sync this object if there isn't a pager. The
15214 * pager can disappear anytime we're not holding the object
15215 * lock. So this has to be checked anytime we goto re_iterate.
15216 */
15217
15218 pager = object->pager;
15219
15220 if (pager == MEMORY_OBJECT_NULL) {
15221 vm_object_unlock(object);
15222 vm_object_deallocate(object);
39236c6e
A
15223 msync_req_free(new_msr);
15224 new_msr = NULL;
2d21ac55
A
15225 continue;
15226 }
15227
91447636
A
15228 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
15229 /*
15230 * need to check for overlapping entry, if found, wait
15231 * on overlapping msr to be done, then reiterate
15232 */
15233 msr_lock(msr);
15234 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
15235 ((offset >= msr->offset &&
15236 offset < (msr->offset + msr->length)) ||
15237 (msr->offset >= offset &&
15238 msr->offset < (offset + flush_size))))
15239 {
15240 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
15241 msr_unlock(msr);
15242 vm_object_unlock(object);
15243 thread_block(THREAD_CONTINUE_NULL);
15244 vm_object_lock(object);
15245 goto re_iterate;
15246 }
15247 msr_unlock(msr);
15248 }/* queue_iterate */
1c79356b 15249
91447636 15250 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
15251
15252 vm_object_paging_begin(object);
91447636 15253 vm_object_unlock(object);
1c79356b 15254
91447636
A
15255 queue_enter(&req_q, new_msr, msync_req_t, req_q);
15256
15257 (void) memory_object_synchronize(
2d21ac55
A
15258 pager,
15259 offset,
15260 flush_size,
15261 sync_flags & ~VM_SYNC_CONTIGUOUS);
15262
15263 vm_object_lock(object);
15264 vm_object_paging_end(object);
15265 vm_object_unlock(object);
91447636
A
15266 }/* while */
15267
15268 /*
15269 * wait for memory_object_sychronize_completed messages from pager(s)
15270 */
15271
15272 while (!queue_empty(&req_q)) {
15273 msr = (msync_req_t)queue_first(&req_q);
15274 msr_lock(msr);
15275 while(msr->flag != VM_MSYNC_DONE) {
15276 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
15277 msr_unlock(msr);
15278 thread_block(THREAD_CONTINUE_NULL);
15279 msr_lock(msr);
15280 }/* while */
15281 queue_remove(&req_q, msr, msync_req_t, req_q);
15282 msr_unlock(msr);
15283 vm_object_deallocate(msr->object);
15284 msync_req_free(msr);
15285 }/* queue_iterate */
15286
15287 /* for proper msync() behaviour */
15288 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
15289 return(KERN_INVALID_ADDRESS);
15290
15291 return(KERN_SUCCESS);
15292}/* vm_msync */
1c79356b 15293
1c79356b 15294/*
91447636
A
15295 * Routine: convert_port_entry_to_map
15296 * Purpose:
15297 * Convert from a port specifying an entry or a task
15298 * to a map. Doesn't consume the port ref; produces a map ref,
15299 * which may be null. Unlike convert_port_to_map, the
15300 * port may be task or a named entry backed.
15301 * Conditions:
15302 * Nothing locked.
1c79356b 15303 */
1c79356b 15304
1c79356b 15305
91447636
A
15306vm_map_t
15307convert_port_entry_to_map(
15308 ipc_port_t port)
15309{
15310 vm_map_t map;
15311 vm_named_entry_t named_entry;
2d21ac55 15312 uint32_t try_failed_count = 0;
1c79356b 15313
91447636
A
15314 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15315 while(TRUE) {
15316 ip_lock(port);
15317 if(ip_active(port) && (ip_kotype(port)
2d21ac55 15318 == IKOT_NAMED_ENTRY)) {
91447636 15319 named_entry =
2d21ac55 15320 (vm_named_entry_t)port->ip_kobject;
b0d623f7 15321 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15322 ip_unlock(port);
2d21ac55
A
15323
15324 try_failed_count++;
15325 mutex_pause(try_failed_count);
91447636
A
15326 continue;
15327 }
15328 named_entry->ref_count++;
b0d623f7 15329 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
15330 ip_unlock(port);
15331 if ((named_entry->is_sub_map) &&
2d21ac55
A
15332 (named_entry->protection
15333 & VM_PROT_WRITE)) {
91447636
A
15334 map = named_entry->backing.map;
15335 } else {
15336 mach_destroy_memory_entry(port);
15337 return VM_MAP_NULL;
15338 }
15339 vm_map_reference_swap(map);
15340 mach_destroy_memory_entry(port);
15341 break;
15342 }
15343 else
15344 return VM_MAP_NULL;
15345 }
1c79356b 15346 }
91447636
A
15347 else
15348 map = convert_port_to_map(port);
1c79356b 15349
91447636
A
15350 return map;
15351}
1c79356b 15352
91447636
A
15353/*
15354 * Routine: convert_port_entry_to_object
15355 * Purpose:
15356 * Convert from a port specifying a named entry to an
15357 * object. Doesn't consume the port ref; produces a map ref,
15358 * which may be null.
15359 * Conditions:
15360 * Nothing locked.
15361 */
1c79356b 15362
1c79356b 15363
91447636
A
15364vm_object_t
15365convert_port_entry_to_object(
15366 ipc_port_t port)
15367{
39236c6e 15368 vm_object_t object = VM_OBJECT_NULL;
91447636 15369 vm_named_entry_t named_entry;
39236c6e
A
15370 uint32_t try_failed_count = 0;
15371
15372 if (IP_VALID(port) &&
15373 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15374 try_again:
15375 ip_lock(port);
15376 if (ip_active(port) &&
15377 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15378 named_entry = (vm_named_entry_t)port->ip_kobject;
15379 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15380 ip_unlock(port);
39236c6e
A
15381 try_failed_count++;
15382 mutex_pause(try_failed_count);
15383 goto try_again;
15384 }
15385 named_entry->ref_count++;
15386 lck_mtx_unlock(&(named_entry)->Lock);
15387 ip_unlock(port);
15388 if (!(named_entry->is_sub_map) &&
15389 !(named_entry->is_pager) &&
15390 !(named_entry->is_copy) &&
15391 (named_entry->protection & VM_PROT_WRITE)) {
15392 object = named_entry->backing.object;
15393 vm_object_reference(object);
91447636 15394 }
39236c6e 15395 mach_destroy_memory_entry(port);
1c79356b 15396 }
1c79356b 15397 }
91447636
A
15398
15399 return object;
1c79356b 15400}
9bccf70c
A
15401
15402/*
91447636
A
15403 * Export routines to other components for the things we access locally through
15404 * macros.
9bccf70c 15405 */
91447636
A
15406#undef current_map
15407vm_map_t
15408current_map(void)
9bccf70c 15409{
91447636 15410 return (current_map_fast());
9bccf70c
A
15411}
15412
15413/*
15414 * vm_map_reference:
15415 *
15416 * Most code internal to the osfmk will go through a
15417 * macro defining this. This is always here for the
15418 * use of other kernel components.
15419 */
15420#undef vm_map_reference
15421void
15422vm_map_reference(
39037602 15423 vm_map_t map)
9bccf70c
A
15424{
15425 if (map == VM_MAP_NULL)
15426 return;
15427
b0d623f7 15428 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15429#if TASK_SWAPPER
15430 assert(map->res_count > 0);
15431 assert(map->ref_count >= map->res_count);
15432 map->res_count++;
15433#endif
15434 map->ref_count++;
b0d623f7 15435 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15436}
15437
15438/*
15439 * vm_map_deallocate:
15440 *
15441 * Removes a reference from the specified map,
15442 * destroying it if no references remain.
15443 * The map should not be locked.
15444 */
15445void
15446vm_map_deallocate(
39037602 15447 vm_map_t map)
9bccf70c
A
15448{
15449 unsigned int ref;
15450
15451 if (map == VM_MAP_NULL)
15452 return;
15453
b0d623f7 15454 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15455 ref = --map->ref_count;
15456 if (ref > 0) {
15457 vm_map_res_deallocate(map);
b0d623f7 15458 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15459 return;
15460 }
15461 assert(map->ref_count == 0);
b0d623f7 15462 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15463
15464#if TASK_SWAPPER
15465 /*
15466 * The map residence count isn't decremented here because
15467 * the vm_map_delete below will traverse the entire map,
15468 * deleting entries, and the residence counts on objects
15469 * and sharing maps will go away then.
15470 */
15471#endif
15472
2d21ac55 15473 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 15474}
91447636 15475
91447636 15476
0c530ab8
A
15477void
15478vm_map_disable_NX(vm_map_t map)
15479{
15480 if (map == NULL)
15481 return;
15482 if (map->pmap == NULL)
15483 return;
15484
15485 pmap_disable_NX(map->pmap);
15486}
15487
6d2010ae
A
15488void
15489vm_map_disallow_data_exec(vm_map_t map)
15490{
15491 if (map == NULL)
15492 return;
15493
15494 map->map_disallow_data_exec = TRUE;
15495}
15496
0c530ab8
A
15497/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15498 * more descriptive.
15499 */
15500void
15501vm_map_set_32bit(vm_map_t map)
15502{
15503 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15504}
15505
15506
15507void
15508vm_map_set_64bit(vm_map_t map)
15509{
15510 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15511}
15512
15513vm_map_offset_t
3e170ce0 15514vm_compute_max_offset(boolean_t is64)
0c530ab8
A
15515{
15516 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15517}
15518
39236c6e
A
15519uint64_t
15520vm_map_get_max_aslr_slide_pages(vm_map_t map)
15521{
15522 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15523}
15524
0c530ab8 15525boolean_t
2d21ac55
A
15526vm_map_is_64bit(
15527 vm_map_t map)
15528{
15529 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15530}
15531
15532boolean_t
316670eb
A
15533vm_map_has_hard_pagezero(
15534 vm_map_t map,
15535 vm_map_offset_t pagezero_size)
0c530ab8
A
15536{
15537 /*
15538 * XXX FBDP
15539 * We should lock the VM map (for read) here but we can get away
15540 * with it for now because there can't really be any race condition:
15541 * the VM map's min_offset is changed only when the VM map is created
15542 * and when the zero page is established (when the binary gets loaded),
15543 * and this routine gets called only when the task terminates and the
15544 * VM map is being torn down, and when a new map is created via
15545 * load_machfile()/execve().
15546 */
316670eb 15547 return (map->min_offset >= pagezero_size);
0c530ab8
A
15548}
15549
316670eb
A
15550/*
15551 * Raise a VM map's maximun offset.
15552 */
15553kern_return_t
15554vm_map_raise_max_offset(
15555 vm_map_t map,
15556 vm_map_offset_t new_max_offset)
15557{
15558 kern_return_t ret;
15559
15560 vm_map_lock(map);
15561 ret = KERN_INVALID_ADDRESS;
15562
15563 if (new_max_offset >= map->max_offset) {
15564 if (!vm_map_is_64bit(map)) {
15565 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15566 map->max_offset = new_max_offset;
15567 ret = KERN_SUCCESS;
15568 }
15569 } else {
15570 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15571 map->max_offset = new_max_offset;
15572 ret = KERN_SUCCESS;
15573 }
15574 }
15575 }
15576
15577 vm_map_unlock(map);
15578 return ret;
15579}
15580
15581
0c530ab8
A
15582/*
15583 * Raise a VM map's minimum offset.
15584 * To strictly enforce "page zero" reservation.
15585 */
15586kern_return_t
15587vm_map_raise_min_offset(
15588 vm_map_t map,
15589 vm_map_offset_t new_min_offset)
15590{
15591 vm_map_entry_t first_entry;
15592
39236c6e
A
15593 new_min_offset = vm_map_round_page(new_min_offset,
15594 VM_MAP_PAGE_MASK(map));
0c530ab8
A
15595
15596 vm_map_lock(map);
15597
15598 if (new_min_offset < map->min_offset) {
15599 /*
15600 * Can't move min_offset backwards, as that would expose
15601 * a part of the address space that was previously, and for
15602 * possibly good reasons, inaccessible.
15603 */
15604 vm_map_unlock(map);
15605 return KERN_INVALID_ADDRESS;
15606 }
3e170ce0
A
15607 if (new_min_offset >= map->max_offset) {
15608 /* can't go beyond the end of the address space */
15609 vm_map_unlock(map);
15610 return KERN_INVALID_ADDRESS;
15611 }
0c530ab8
A
15612
15613 first_entry = vm_map_first_entry(map);
15614 if (first_entry != vm_map_to_entry(map) &&
15615 first_entry->vme_start < new_min_offset) {
15616 /*
15617 * Some memory was already allocated below the new
15618 * minimun offset. It's too late to change it now...
15619 */
15620 vm_map_unlock(map);
15621 return KERN_NO_SPACE;
15622 }
15623
15624 map->min_offset = new_min_offset;
15625
3e170ce0
A
15626 assert(map->holes_list);
15627 map->holes_list->start = new_min_offset;
15628 assert(new_min_offset < map->holes_list->end);
15629
0c530ab8
A
15630 vm_map_unlock(map);
15631
15632 return KERN_SUCCESS;
15633}
2d21ac55
A
15634
15635/*
15636 * Set the limit on the maximum amount of user wired memory allowed for this map.
15637 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15638 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15639 * don't have to reach over to the BSD data structures.
15640 */
15641
15642void
15643vm_map_set_user_wire_limit(vm_map_t map,
15644 vm_size_t limit)
15645{
15646 map->user_wire_limit = limit;
15647}
593a1d5f 15648
b0d623f7
A
15649
15650void vm_map_switch_protect(vm_map_t map,
15651 boolean_t val)
593a1d5f
A
15652{
15653 vm_map_lock(map);
b0d623f7 15654 map->switch_protect=val;
593a1d5f 15655 vm_map_unlock(map);
b0d623f7 15656}
b7266188 15657
39236c6e
A
15658/*
15659 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15660 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15661 * bump both counters.
15662 */
15663void
15664vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15665{
15666 pmap_t pmap = vm_map_pmap(map);
15667
fe8ab488 15668 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15669 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15670}
15671
15672void
15673vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15674{
15675 pmap_t pmap = vm_map_pmap(map);
15676
fe8ab488 15677 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15678 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15679}
15680
b7266188
A
15681/* Add (generate) code signature for memory range */
15682#if CONFIG_DYNAMIC_CODE_SIGNING
15683kern_return_t vm_map_sign(vm_map_t map,
15684 vm_map_offset_t start,
15685 vm_map_offset_t end)
15686{
15687 vm_map_entry_t entry;
15688 vm_page_t m;
15689 vm_object_t object;
15690
15691 /*
15692 * Vet all the input parameters and current type and state of the
15693 * underlaying object. Return with an error if anything is amiss.
15694 */
15695 if (map == VM_MAP_NULL)
15696 return(KERN_INVALID_ARGUMENT);
15697
15698 vm_map_lock_read(map);
15699
15700 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15701 /*
15702 * Must pass a valid non-submap address.
15703 */
15704 vm_map_unlock_read(map);
15705 return(KERN_INVALID_ADDRESS);
15706 }
15707
15708 if((entry->vme_start > start) || (entry->vme_end < end)) {
15709 /*
15710 * Map entry doesn't cover the requested range. Not handling
15711 * this situation currently.
15712 */
15713 vm_map_unlock_read(map);
15714 return(KERN_INVALID_ARGUMENT);
15715 }
15716
3e170ce0 15717 object = VME_OBJECT(entry);
b7266188
A
15718 if (object == VM_OBJECT_NULL) {
15719 /*
15720 * Object must already be present or we can't sign.
15721 */
15722 vm_map_unlock_read(map);
15723 return KERN_INVALID_ARGUMENT;
15724 }
15725
15726 vm_object_lock(object);
15727 vm_map_unlock_read(map);
15728
15729 while(start < end) {
15730 uint32_t refmod;
15731
3e170ce0
A
15732 m = vm_page_lookup(object,
15733 start - entry->vme_start + VME_OFFSET(entry));
b7266188
A
15734 if (m==VM_PAGE_NULL) {
15735 /* shoud we try to fault a page here? we can probably
15736 * demand it exists and is locked for this request */
15737 vm_object_unlock(object);
15738 return KERN_FAILURE;
15739 }
15740 /* deal with special page status */
15741 if (m->busy ||
15742 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15743 vm_object_unlock(object);
15744 return KERN_FAILURE;
15745 }
15746
15747 /* Page is OK... now "validate" it */
15748 /* This is the place where we'll call out to create a code
15749 * directory, later */
15750 m->cs_validated = TRUE;
15751
15752 /* The page is now "clean" for codesigning purposes. That means
15753 * we don't consider it as modified (wpmapped) anymore. But
15754 * we'll disconnect the page so we note any future modification
15755 * attempts. */
15756 m->wpmapped = FALSE;
39037602 15757 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
b7266188
A
15758
15759 /* Pull the dirty status from the pmap, since we cleared the
15760 * wpmapped bit */
15761 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 15762 SET_PAGE_DIRTY(m, FALSE);
b7266188
A
15763 }
15764
15765 /* On to the next page */
15766 start += PAGE_SIZE;
15767 }
15768 vm_object_unlock(object);
15769
15770 return KERN_SUCCESS;
15771}
15772#endif
6d2010ae 15773
fe8ab488
A
15774kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15775{
15776 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15777 vm_map_entry_t next_entry;
15778 kern_return_t kr = KERN_SUCCESS;
15779 vm_map_t zap_map;
15780
15781 vm_map_lock(map);
15782
15783 /*
15784 * We use a "zap_map" to avoid having to unlock
15785 * the "map" in vm_map_delete().
15786 */
15787 zap_map = vm_map_create(PMAP_NULL,
15788 map->min_offset,
15789 map->max_offset,
15790 map->hdr.entries_pageable);
15791
15792 if (zap_map == VM_MAP_NULL) {
15793 return KERN_RESOURCE_SHORTAGE;
15794 }
15795
15796 vm_map_set_page_shift(zap_map,
15797 VM_MAP_PAGE_SHIFT(map));
3e170ce0 15798 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
15799
15800 for (entry = vm_map_first_entry(map);
15801 entry != vm_map_to_entry(map);
15802 entry = next_entry) {
15803 next_entry = entry->vme_next;
15804
3e170ce0
A
15805 if (VME_OBJECT(entry) &&
15806 !entry->is_sub_map &&
15807 (VME_OBJECT(entry)->internal == TRUE) &&
15808 (VME_OBJECT(entry)->ref_count == 1)) {
fe8ab488 15809
3e170ce0
A
15810 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15811 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488
A
15812
15813 (void)vm_map_delete(map,
15814 entry->vme_start,
15815 entry->vme_end,
15816 VM_MAP_REMOVE_SAVE_ENTRIES,
15817 zap_map);
15818 }
15819 }
15820
15821 vm_map_unlock(map);
15822
15823 /*
15824 * Get rid of the "zap_maps" and all the map entries that
15825 * they may still contain.
15826 */
15827 if (zap_map != VM_MAP_NULL) {
15828 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15829 zap_map = VM_MAP_NULL;
15830 }
15831
15832 return kr;
15833}
15834
6d2010ae 15835
39037602
A
15836#if DEVELOPMENT || DEBUG
15837
15838int
15839vm_map_disconnect_page_mappings(
15840 vm_map_t map,
15841 boolean_t do_unnest)
6d2010ae
A
15842{
15843 vm_map_entry_t entry;
39037602
A
15844 int page_count = 0;
15845
15846 if (do_unnest == TRUE) {
15847#ifndef NO_NESTED_PMAP
15848 vm_map_lock(map);
15849
15850 for (entry = vm_map_first_entry(map);
15851 entry != vm_map_to_entry(map);
15852 entry = entry->vme_next) {
15853
15854 if (entry->is_sub_map && entry->use_pmap) {
15855 /*
15856 * Make sure the range between the start of this entry and
15857 * the end of this entry is no longer nested, so that
15858 * we will only remove mappings from the pmap in use by this
15859 * this task
15860 */
15861 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
15862 }
15863 }
15864 vm_map_unlock(map);
15865#endif
15866 }
6d2010ae 15867 vm_map_lock_read(map);
39037602
A
15868
15869 page_count = map->pmap->stats.resident_count;
15870
6d2010ae
A
15871 for (entry = vm_map_first_entry(map);
15872 entry != vm_map_to_entry(map);
15873 entry = entry->vme_next) {
6d2010ae 15874
39037602
A
15875 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
15876 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
15877 continue;
15878 }
39037602
A
15879 if (entry->is_sub_map)
15880 assert(!entry->use_pmap);
6d2010ae 15881
39037602 15882 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 15883 }
6d2010ae
A
15884 vm_map_unlock_read(map);
15885
39037602 15886 return page_count;
6d2010ae
A
15887}
15888
39037602
A
15889#endif
15890
15891
15892#if CONFIG_FREEZE
15893
15894
3e170ce0
A
15895int c_freezer_swapout_count;
15896int c_freezer_compression_count = 0;
15897AbsoluteTime c_freezer_last_yield_ts = 0;
15898
6d2010ae
A
15899kern_return_t vm_map_freeze(
15900 vm_map_t map,
15901 unsigned int *purgeable_count,
15902 unsigned int *wired_count,
15903 unsigned int *clean_count,
15904 unsigned int *dirty_count,
39037602 15905 __unused unsigned int dirty_budget,
6d2010ae
A
15906 boolean_t *has_shared)
15907{
39236c6e
A
15908 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15909 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
15910
15911 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15912 *has_shared = FALSE;
15913
6d2010ae
A
15914 /*
15915 * We need the exclusive lock here so that we can
15916 * block any page faults or lookups while we are
15917 * in the middle of freezing this vm map.
15918 */
15919 vm_map_lock(map);
15920
39037602
A
15921 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
15922
15923 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15924 kr = KERN_NO_SPACE;
15925 goto done;
6d2010ae 15926 }
39037602 15927
3e170ce0
A
15928 c_freezer_compression_count = 0;
15929 clock_get_uptime(&c_freezer_last_yield_ts);
15930
6d2010ae
A
15931 for (entry2 = vm_map_first_entry(map);
15932 entry2 != vm_map_to_entry(map);
15933 entry2 = entry2->vme_next) {
15934
3e170ce0 15935 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 15936
39037602 15937 if (src_object &&
3e170ce0 15938 !entry2->is_sub_map &&
39037602 15939 !src_object->phys_contiguous) {
39236c6e 15940 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 15941
39037602 15942 if (src_object->internal == TRUE) {
3e170ce0 15943
39037602
A
15944 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
15945 /*
15946 * Pages belonging to this object could be swapped to disk.
15947 * Make sure it's not a shared object because we could end
15948 * up just bringing it back in again.
15949 */
15950 if (src_object->ref_count > 1) {
15951 continue;
3e170ce0 15952 }
3e170ce0 15953 }
39037602 15954 vm_object_compressed_freezer_pageout(src_object);
3e170ce0
A
15955
15956 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15957 kr = KERN_NO_SPACE;
15958 break;
39236c6e 15959 }
6d2010ae
A
15960 }
15961 }
15962 }
6d2010ae
A
15963done:
15964 vm_map_unlock(map);
6d2010ae 15965
39037602
A
15966 vm_object_compressed_freezer_done();
15967
15968 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3e170ce0
A
15969 /*
15970 * reset the counter tracking the # of swapped c_segs
15971 * because we are now done with this freeze session and task.
15972 */
15973 c_freezer_swapout_count = 0;
15974 }
6d2010ae
A
15975 return kr;
15976}
15977
6d2010ae 15978#endif
e2d2fc5c 15979
e2d2fc5c
A
15980/*
15981 * vm_map_entry_should_cow_for_true_share:
15982 *
15983 * Determines if the map entry should be clipped and setup for copy-on-write
15984 * to avoid applying "true_share" to a large VM object when only a subset is
15985 * targeted.
15986 *
15987 * For now, we target only the map entries created for the Objective C
15988 * Garbage Collector, which initially have the following properties:
15989 * - alias == VM_MEMORY_MALLOC
15990 * - wired_count == 0
15991 * - !needs_copy
15992 * and a VM object with:
15993 * - internal
15994 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15995 * - !true_share
15996 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
15997 *
15998 * Only non-kernel map entries.
e2d2fc5c
A
15999 */
16000boolean_t
16001vm_map_entry_should_cow_for_true_share(
16002 vm_map_entry_t entry)
16003{
16004 vm_object_t object;
16005
16006 if (entry->is_sub_map) {
16007 /* entry does not point at a VM object */
16008 return FALSE;
16009 }
16010
16011 if (entry->needs_copy) {
16012 /* already set for copy_on_write: done! */
16013 return FALSE;
16014 }
16015
3e170ce0
A
16016 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
16017 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 16018 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
16019 return FALSE;
16020 }
16021
16022 if (entry->wired_count) {
16023 /* wired: can't change the map entry... */
fe8ab488 16024 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
16025 return FALSE;
16026 }
16027
3e170ce0 16028 object = VME_OBJECT(entry);
e2d2fc5c
A
16029
16030 if (object == VM_OBJECT_NULL) {
16031 /* no object yet... */
16032 return FALSE;
16033 }
16034
16035 if (!object->internal) {
16036 /* not an internal object */
16037 return FALSE;
16038 }
16039
16040 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16041 /* not the default copy strategy */
16042 return FALSE;
16043 }
16044
16045 if (object->true_share) {
16046 /* already true_share: too late to avoid it */
16047 return FALSE;
16048 }
16049
3e170ce0 16050 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
16051 object->vo_size != ANON_CHUNK_SIZE) {
16052 /* ... not an object created for the ObjC Garbage Collector */
16053 return FALSE;
16054 }
16055
3e170ce0 16056 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
16057 object->vo_size != 2048 * 4096) {
16058 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
16059 return FALSE;
16060 }
16061
16062 /*
16063 * All the criteria match: we have a large object being targeted for "true_share".
16064 * To limit the adverse side-effects linked with "true_share", tell the caller to
16065 * try and avoid setting up the entire object for "true_share" by clipping the
16066 * targeted range and setting it up for copy-on-write.
16067 */
16068 return TRUE;
16069}
39236c6e
A
16070
16071vm_map_offset_t
16072vm_map_round_page_mask(
16073 vm_map_offset_t offset,
16074 vm_map_offset_t mask)
16075{
16076 return VM_MAP_ROUND_PAGE(offset, mask);
16077}
16078
16079vm_map_offset_t
16080vm_map_trunc_page_mask(
16081 vm_map_offset_t offset,
16082 vm_map_offset_t mask)
16083{
16084 return VM_MAP_TRUNC_PAGE(offset, mask);
16085}
16086
3e170ce0
A
16087boolean_t
16088vm_map_page_aligned(
16089 vm_map_offset_t offset,
16090 vm_map_offset_t mask)
16091{
16092 return ((offset) & mask) == 0;
16093}
16094
39236c6e
A
16095int
16096vm_map_page_shift(
16097 vm_map_t map)
16098{
16099 return VM_MAP_PAGE_SHIFT(map);
16100}
16101
16102int
16103vm_map_page_size(
16104 vm_map_t map)
16105{
16106 return VM_MAP_PAGE_SIZE(map);
16107}
16108
3e170ce0 16109vm_map_offset_t
39236c6e
A
16110vm_map_page_mask(
16111 vm_map_t map)
16112{
16113 return VM_MAP_PAGE_MASK(map);
16114}
16115
16116kern_return_t
16117vm_map_set_page_shift(
16118 vm_map_t map,
16119 int pageshift)
16120{
16121 if (map->hdr.nentries != 0) {
16122 /* too late to change page size */
16123 return KERN_FAILURE;
16124 }
16125
16126 map->hdr.page_shift = pageshift;
16127
16128 return KERN_SUCCESS;
16129}
16130
16131kern_return_t
16132vm_map_query_volatile(
16133 vm_map_t map,
16134 mach_vm_size_t *volatile_virtual_size_p,
16135 mach_vm_size_t *volatile_resident_size_p,
3e170ce0
A
16136 mach_vm_size_t *volatile_compressed_size_p,
16137 mach_vm_size_t *volatile_pmap_size_p,
16138 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e
A
16139{
16140 mach_vm_size_t volatile_virtual_size;
16141 mach_vm_size_t volatile_resident_count;
3e170ce0 16142 mach_vm_size_t volatile_compressed_count;
39236c6e 16143 mach_vm_size_t volatile_pmap_count;
3e170ce0 16144 mach_vm_size_t volatile_compressed_pmap_count;
39236c6e
A
16145 mach_vm_size_t resident_count;
16146 vm_map_entry_t entry;
16147 vm_object_t object;
16148
16149 /* map should be locked by caller */
16150
16151 volatile_virtual_size = 0;
16152 volatile_resident_count = 0;
3e170ce0 16153 volatile_compressed_count = 0;
39236c6e 16154 volatile_pmap_count = 0;
3e170ce0 16155 volatile_compressed_pmap_count = 0;
39236c6e
A
16156
16157 for (entry = vm_map_first_entry(map);
16158 entry != vm_map_to_entry(map);
16159 entry = entry->vme_next) {
4bd07ac2
A
16160 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
16161
39236c6e
A
16162 if (entry->is_sub_map) {
16163 continue;
16164 }
16165 if (! (entry->protection & VM_PROT_WRITE)) {
16166 continue;
16167 }
3e170ce0 16168 object = VME_OBJECT(entry);
39236c6e
A
16169 if (object == VM_OBJECT_NULL) {
16170 continue;
16171 }
3e170ce0
A
16172 if (object->purgable != VM_PURGABLE_VOLATILE &&
16173 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
16174 continue;
16175 }
3e170ce0 16176 if (VME_OFFSET(entry)) {
39236c6e
A
16177 /*
16178 * If the map entry has been split and the object now
16179 * appears several times in the VM map, we don't want
16180 * to count the object's resident_page_count more than
16181 * once. We count it only for the first one, starting
16182 * at offset 0 and ignore the other VM map entries.
16183 */
16184 continue;
16185 }
16186 resident_count = object->resident_page_count;
3e170ce0 16187 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
16188 resident_count = 0;
16189 } else {
3e170ce0 16190 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
16191 }
16192
16193 volatile_virtual_size += entry->vme_end - entry->vme_start;
16194 volatile_resident_count += resident_count;
3e170ce0
A
16195 if (object->pager) {
16196 volatile_compressed_count +=
16197 vm_compressor_pager_get_count(object->pager);
16198 }
4bd07ac2
A
16199 pmap_compressed_bytes = 0;
16200 pmap_resident_bytes =
16201 pmap_query_resident(map->pmap,
16202 entry->vme_start,
16203 entry->vme_end,
16204 &pmap_compressed_bytes);
16205 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
16206 volatile_compressed_pmap_count += (pmap_compressed_bytes
16207 / PAGE_SIZE);
39236c6e
A
16208 }
16209
16210 /* map is still locked on return */
16211
16212 *volatile_virtual_size_p = volatile_virtual_size;
16213 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 16214 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 16215 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 16216 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
16217
16218 return KERN_SUCCESS;
16219}
fe8ab488 16220
3e170ce0
A
16221void
16222vm_map_sizes(vm_map_t map,
16223 vm_map_size_t * psize,
16224 vm_map_size_t * pfree,
16225 vm_map_size_t * plargest_free)
16226{
16227 vm_map_entry_t entry;
16228 vm_map_offset_t prev;
16229 vm_map_size_t free, total_free, largest_free;
16230 boolean_t end;
16231
39037602
A
16232 if (!map)
16233 {
16234 *psize = *pfree = *plargest_free = 0;
16235 return;
16236 }
3e170ce0
A
16237 total_free = largest_free = 0;
16238
16239 vm_map_lock_read(map);
16240 if (psize) *psize = map->max_offset - map->min_offset;
16241
16242 prev = map->min_offset;
16243 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16244 {
16245 end = (entry == vm_map_to_entry(map));
16246
16247 if (end) free = entry->vme_end - prev;
16248 else free = entry->vme_start - prev;
16249
16250 total_free += free;
16251 if (free > largest_free) largest_free = free;
16252
16253 if (end) break;
16254 prev = entry->vme_end;
16255 }
16256 vm_map_unlock_read(map);
16257 if (pfree) *pfree = total_free;
16258 if (plargest_free) *plargest_free = largest_free;
16259}
16260
fe8ab488
A
16261#if VM_SCAN_FOR_SHADOW_CHAIN
16262int vm_map_shadow_max(vm_map_t map);
16263int vm_map_shadow_max(
16264 vm_map_t map)
16265{
16266 int shadows, shadows_max;
16267 vm_map_entry_t entry;
16268 vm_object_t object, next_object;
16269
16270 if (map == NULL)
16271 return 0;
16272
16273 shadows_max = 0;
16274
16275 vm_map_lock_read(map);
16276
16277 for (entry = vm_map_first_entry(map);
16278 entry != vm_map_to_entry(map);
16279 entry = entry->vme_next) {
16280 if (entry->is_sub_map) {
16281 continue;
16282 }
3e170ce0 16283 object = VME_OBJECT(entry);
fe8ab488
A
16284 if (object == NULL) {
16285 continue;
16286 }
16287 vm_object_lock_shared(object);
16288 for (shadows = 0;
16289 object->shadow != NULL;
16290 shadows++, object = next_object) {
16291 next_object = object->shadow;
16292 vm_object_lock_shared(next_object);
16293 vm_object_unlock(object);
16294 }
16295 vm_object_unlock(object);
16296 if (shadows > shadows_max) {
16297 shadows_max = shadows;
16298 }
16299 }
16300
16301 vm_map_unlock_read(map);
16302
16303 return shadows_max;
16304}
16305#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602
A
16306
16307void vm_commit_pagezero_status(vm_map_t lmap) {
16308 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
16309}