]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
fe8ab488
A
68
69#include <vm/vm_options.h>
70
91447636 71#include <libkern/OSAtomic.h>
1c79356b
A
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
55e303ae 78#include <mach/vm_statistics.h>
91447636 79#include <mach/memory_object.h>
0c530ab8 80#include <mach/mach_vm.h>
91447636 81#include <machine/cpu_capabilities.h>
2d21ac55 82#include <mach/sdt.h>
91447636 83
1c79356b 84#include <kern/assert.h>
39037602 85#include <kern/backtrace.h>
1c79356b 86#include <kern/counters.h>
91447636 87#include <kern/kalloc.h>
1c79356b 88#include <kern/zalloc.h>
91447636
A
89
90#include <vm/cpm.h>
39236c6e 91#include <vm/vm_compressor_pager.h>
1c79356b
A
92#include <vm/vm_init.h>
93#include <vm/vm_fault.h>
94#include <vm/vm_map.h>
95#include <vm/vm_object.h>
96#include <vm/vm_page.h>
b0d623f7 97#include <vm/vm_pageout.h>
1c79356b
A
98#include <vm/vm_kern.h>
99#include <ipc/ipc_port.h>
100#include <kern/sched_prim.h>
101#include <kern/misc_protos.h>
1c79356b
A
102#include <kern/xpr.h>
103
91447636
A
104#include <mach/vm_map_server.h>
105#include <mach/mach_host_server.h>
2d21ac55 106#include <vm/vm_protos.h>
b0d623f7 107#include <vm/vm_purgeable_internal.h>
91447636 108
91447636 109#include <vm/vm_protos.h>
2d21ac55 110#include <vm/vm_shared_region.h>
6d2010ae 111#include <vm/vm_map_store.h>
91447636 112
39037602
A
113extern int proc_selfpid(void);
114extern char *proc_name_address(void *p);
115
116#if VM_MAP_DEBUG_APPLE_PROTECT
117int vm_map_debug_apple_protect = 0;
118#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
119#if VM_MAP_DEBUG_FOURK
120int vm_map_debug_fourk = 0;
121#endif /* VM_MAP_DEBUG_FOURK */
3e170ce0 122
316670eb 123extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
124/* Internal prototypes
125 */
2d21ac55 126
91447636
A
127static void vm_map_simplify_range(
128 vm_map_t map,
129 vm_map_offset_t start,
130 vm_map_offset_t end); /* forward */
131
132static boolean_t vm_map_range_check(
2d21ac55
A
133 vm_map_t map,
134 vm_map_offset_t start,
135 vm_map_offset_t end,
136 vm_map_entry_t *entry);
1c79356b 137
91447636 138static vm_map_entry_t _vm_map_entry_create(
7ddcb079 139 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 140
91447636 141static void _vm_map_entry_dispose(
2d21ac55
A
142 struct vm_map_header *map_header,
143 vm_map_entry_t entry);
1c79356b 144
91447636 145static void vm_map_pmap_enter(
2d21ac55
A
146 vm_map_t map,
147 vm_map_offset_t addr,
148 vm_map_offset_t end_addr,
149 vm_object_t object,
150 vm_object_offset_t offset,
151 vm_prot_t protection);
1c79356b 152
91447636 153static void _vm_map_clip_end(
2d21ac55
A
154 struct vm_map_header *map_header,
155 vm_map_entry_t entry,
156 vm_map_offset_t end);
91447636
A
157
158static void _vm_map_clip_start(
2d21ac55
A
159 struct vm_map_header *map_header,
160 vm_map_entry_t entry,
161 vm_map_offset_t start);
1c79356b 162
91447636 163static void vm_map_entry_delete(
2d21ac55
A
164 vm_map_t map,
165 vm_map_entry_t entry);
1c79356b 166
91447636 167static kern_return_t vm_map_delete(
2d21ac55
A
168 vm_map_t map,
169 vm_map_offset_t start,
170 vm_map_offset_t end,
171 int flags,
172 vm_map_t zap_map);
1c79356b 173
91447636 174static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
175 vm_map_t dst_map,
176 vm_map_entry_t entry,
177 vm_map_copy_t copy,
39236c6e
A
178 vm_map_address_t start,
179 boolean_t discard_on_success);
1c79356b 180
91447636 181static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
182 vm_map_t dst_map,
183 vm_map_entry_t tmp_entry,
184 vm_map_copy_t copy,
185 vm_map_offset_t start,
186 pmap_t pmap);
1c79356b 187
91447636 188static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
189 vm_map_t src_map,
190 vm_map_address_t src_addr,
191 vm_map_size_t len,
192 boolean_t src_destroy,
193 vm_map_copy_t *copy_result); /* OUT */
1c79356b 194
91447636 195static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
196 vm_map_t map,
197 vm_map_address_t *addr, /* IN/OUT */
198 vm_map_copy_t copy,
39037602 199 vm_map_size_t copy_size,
39236c6e
A
200 boolean_t overwrite,
201 boolean_t consume_on_success);
1c79356b 202
91447636 203static void vm_map_fork_share(
2d21ac55
A
204 vm_map_t old_map,
205 vm_map_entry_t old_entry,
206 vm_map_t new_map);
1c79356b 207
91447636 208static boolean_t vm_map_fork_copy(
2d21ac55
A
209 vm_map_t old_map,
210 vm_map_entry_t *old_entry_p,
39037602
A
211 vm_map_t new_map,
212 int vm_map_copyin_flags);
1c79356b 213
0c530ab8 214void vm_map_region_top_walk(
2d21ac55
A
215 vm_map_entry_t entry,
216 vm_region_top_info_t top);
1c79356b 217
0c530ab8 218void vm_map_region_walk(
2d21ac55
A
219 vm_map_t map,
220 vm_map_offset_t va,
221 vm_map_entry_t entry,
222 vm_object_offset_t offset,
223 vm_object_size_t range,
224 vm_region_extended_info_t extended,
39236c6e
A
225 boolean_t look_for_pages,
226 mach_msg_type_number_t count);
91447636
A
227
228static kern_return_t vm_map_wire_nested(
2d21ac55
A
229 vm_map_t map,
230 vm_map_offset_t start,
231 vm_map_offset_t end,
3e170ce0 232 vm_prot_t caller_prot,
2d21ac55
A
233 boolean_t user_wire,
234 pmap_t map_pmap,
fe8ab488
A
235 vm_map_offset_t pmap_addr,
236 ppnum_t *physpage_p);
91447636
A
237
238static kern_return_t vm_map_unwire_nested(
2d21ac55
A
239 vm_map_t map,
240 vm_map_offset_t start,
241 vm_map_offset_t end,
242 boolean_t user_wire,
243 pmap_t map_pmap,
244 vm_map_offset_t pmap_addr);
91447636
A
245
246static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
247 vm_map_t dst_map,
248 vm_map_offset_t dst_addr,
249 vm_map_size_t dst_size);
91447636
A
250
251static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
252 vm_map_t dst_map,
253 vm_map_offset_t dst_addr,
254 vm_map_copy_t copy,
255 boolean_t interruptible,
6d2010ae
A
256 pmap_t pmap,
257 boolean_t discard_on_success);
91447636
A
258
259static kern_return_t vm_map_remap_extract(
2d21ac55
A
260 vm_map_t map,
261 vm_map_offset_t addr,
262 vm_map_size_t size,
263 boolean_t copy,
264 struct vm_map_header *map_header,
265 vm_prot_t *cur_protection,
266 vm_prot_t *max_protection,
267 vm_inherit_t inheritance,
39037602
A
268 boolean_t pageable,
269 boolean_t same_map);
91447636
A
270
271static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
272 vm_map_t map,
273 vm_map_address_t *address,
274 vm_map_size_t size,
275 vm_map_offset_t mask,
060df5ea 276 int flags,
2d21ac55 277 vm_map_entry_t *map_entry);
91447636
A
278
279static void vm_map_region_look_for_page(
2d21ac55
A
280 vm_map_t map,
281 vm_map_offset_t va,
282 vm_object_t object,
283 vm_object_offset_t offset,
284 int max_refcnt,
285 int depth,
39236c6e
A
286 vm_region_extended_info_t extended,
287 mach_msg_type_number_t count);
91447636
A
288
289static int vm_map_region_count_obj_refs(
2d21ac55
A
290 vm_map_entry_t entry,
291 vm_object_t object);
1c79356b 292
b0d623f7
A
293
294static kern_return_t vm_map_willneed(
295 vm_map_t map,
296 vm_map_offset_t start,
297 vm_map_offset_t end);
298
299static kern_return_t vm_map_reuse_pages(
300 vm_map_t map,
301 vm_map_offset_t start,
302 vm_map_offset_t end);
303
304static kern_return_t vm_map_reusable_pages(
305 vm_map_t map,
306 vm_map_offset_t start,
307 vm_map_offset_t end);
308
309static kern_return_t vm_map_can_reuse(
310 vm_map_t map,
311 vm_map_offset_t start,
312 vm_map_offset_t end);
313
3e170ce0
A
314#if MACH_ASSERT
315static kern_return_t vm_map_pageout(
316 vm_map_t map,
317 vm_map_offset_t start,
318 vm_map_offset_t end);
319#endif /* MACH_ASSERT */
6d2010ae 320
1c79356b
A
321/*
322 * Macros to copy a vm_map_entry. We must be careful to correctly
323 * manage the wired page count. vm_map_entry_copy() creates a new
324 * map entry to the same memory - the wired count in the new entry
325 * must be set to zero. vm_map_entry_copy_full() creates a new
326 * entry that is identical to the old entry. This preserves the
327 * wire count; it's used for map splitting and zone changing in
328 * vm_map_copyout.
329 */
316670eb 330
7ddcb079
A
331#define vm_map_entry_copy(NEW,OLD) \
332MACRO_BEGIN \
333boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
334 *(NEW) = *(OLD); \
335 (NEW)->is_shared = FALSE; \
336 (NEW)->needs_wakeup = FALSE; \
337 (NEW)->in_transition = FALSE; \
338 (NEW)->wired_count = 0; \
339 (NEW)->user_wired_count = 0; \
b0d623f7 340 (NEW)->permanent = FALSE; \
316670eb 341 (NEW)->used_for_jit = FALSE; \
fe8ab488
A
342 (NEW)->from_reserved_zone = _vmec_reserved; \
343 (NEW)->iokit_acct = FALSE; \
3e170ce0
A
344 (NEW)->vme_resilient_codesign = FALSE; \
345 (NEW)->vme_resilient_media = FALSE; \
39037602 346 (NEW)->vme_atomic = FALSE; \
1c79356b
A
347MACRO_END
348
7ddcb079
A
349#define vm_map_entry_copy_full(NEW,OLD) \
350MACRO_BEGIN \
351boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
352(*(NEW) = *(OLD)); \
353(NEW)->from_reserved_zone = _vmecf_reserved; \
354MACRO_END
1c79356b 355
2d21ac55
A
356/*
357 * Decide if we want to allow processes to execute from their data or stack areas.
358 * override_nx() returns true if we do. Data/stack execution can be enabled independently
359 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
360 * or allow_stack_exec to enable data execution for that type of data area for that particular
361 * ABI (or both by or'ing the flags together). These are initialized in the architecture
362 * specific pmap files since the default behavior varies according to architecture. The
363 * main reason it varies is because of the need to provide binary compatibility with old
364 * applications that were written before these restrictions came into being. In the old
365 * days, an app could execute anything it could read, but this has slowly been tightened
366 * up over time. The default behavior is:
367 *
368 * 32-bit PPC apps may execute from both stack and data areas
369 * 32-bit Intel apps may exeucte from data areas but not stack
370 * 64-bit PPC/Intel apps may not execute from either data or stack
371 *
372 * An application on any architecture may override these defaults by explicitly
373 * adding PROT_EXEC permission to the page in question with the mprotect(2)
374 * system call. This code here just determines what happens when an app tries to
375 * execute from a page that lacks execute permission.
376 *
377 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
378 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
379 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
380 * execution from data areas for a particular binary even if the arch normally permits it. As
381 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
382 * to support some complicated use cases, notably browsers with out-of-process plugins that
383 * are not all NX-safe.
2d21ac55
A
384 */
385
386extern int allow_data_exec, allow_stack_exec;
387
388int
389override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
390{
391 int current_abi;
392
3e170ce0
A
393 if (map->pmap == kernel_pmap) return FALSE;
394
2d21ac55
A
395 /*
396 * Determine if the app is running in 32 or 64 bit mode.
397 */
398
399 if (vm_map_is_64bit(map))
400 current_abi = VM_ABI_64;
401 else
402 current_abi = VM_ABI_32;
403
404 /*
405 * Determine if we should allow the execution based on whether it's a
406 * stack or data area and the current architecture.
407 */
408
409 if (user_tag == VM_MEMORY_STACK)
410 return allow_stack_exec & current_abi;
411
6d2010ae 412 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
413}
414
415
1c79356b
A
416/*
417 * Virtual memory maps provide for the mapping, protection,
418 * and sharing of virtual memory objects. In addition,
419 * this module provides for an efficient virtual copy of
420 * memory from one map to another.
421 *
422 * Synchronization is required prior to most operations.
423 *
424 * Maps consist of an ordered doubly-linked list of simple
425 * entries; a single hint is used to speed up lookups.
426 *
427 * Sharing maps have been deleted from this version of Mach.
428 * All shared objects are now mapped directly into the respective
429 * maps. This requires a change in the copy on write strategy;
430 * the asymmetric (delayed) strategy is used for shared temporary
431 * objects instead of the symmetric (shadow) strategy. All maps
432 * are now "top level" maps (either task map, kernel map or submap
433 * of the kernel map).
434 *
435 * Since portions of maps are specified by start/end addreses,
436 * which may not align with existing map entries, all
437 * routines merely "clip" entries to these start/end values.
438 * [That is, an entry is split into two, bordering at a
439 * start or end value.] Note that these clippings may not
440 * always be necessary (as the two resulting entries are then
441 * not changed); however, the clipping is done for convenience.
442 * No attempt is currently made to "glue back together" two
443 * abutting entries.
444 *
445 * The symmetric (shadow) copy strategy implements virtual copy
446 * by copying VM object references from one map to
447 * another, and then marking both regions as copy-on-write.
448 * It is important to note that only one writeable reference
449 * to a VM object region exists in any map when this strategy
450 * is used -- this means that shadow object creation can be
451 * delayed until a write operation occurs. The symmetric (delayed)
452 * strategy allows multiple maps to have writeable references to
453 * the same region of a vm object, and hence cannot delay creating
454 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
455 * Copying of permanent objects is completely different; see
456 * vm_object_copy_strategically() in vm_object.c.
457 */
458
91447636
A
459static zone_t vm_map_zone; /* zone for vm_map structures */
460static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
39037602 461zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
7ddcb079 462 * allocations */
91447636 463static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
3e170ce0 464zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
1c79356b
A
465
466
467/*
468 * Placeholder object for submap operations. This object is dropped
469 * into the range by a call to vm_map_find, and removed when
470 * vm_map_submap creates the submap.
471 */
472
473vm_object_t vm_submap_object;
474
91447636 475static void *map_data;
b0d623f7 476static vm_size_t map_data_size;
91447636 477static void *kentry_data;
b0d623f7 478static vm_size_t kentry_data_size;
3e170ce0
A
479static void *map_holes_data;
480static vm_size_t map_holes_data_size;
1c79356b 481
b0d623f7 482#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
1c79356b 483
55e303ae 484/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 485unsigned int not_in_kdp = 1;
55e303ae 486
6d2010ae
A
487unsigned int vm_map_set_cache_attr_count = 0;
488
489kern_return_t
490vm_map_set_cache_attr(
491 vm_map_t map,
492 vm_map_offset_t va)
493{
494 vm_map_entry_t map_entry;
495 vm_object_t object;
496 kern_return_t kr = KERN_SUCCESS;
497
498 vm_map_lock_read(map);
499
500 if (!vm_map_lookup_entry(map, va, &map_entry) ||
501 map_entry->is_sub_map) {
502 /*
503 * that memory is not properly mapped
504 */
505 kr = KERN_INVALID_ARGUMENT;
506 goto done;
507 }
3e170ce0 508 object = VME_OBJECT(map_entry);
6d2010ae
A
509
510 if (object == VM_OBJECT_NULL) {
511 /*
512 * there should be a VM object here at this point
513 */
514 kr = KERN_INVALID_ARGUMENT;
515 goto done;
516 }
517 vm_object_lock(object);
518 object->set_cache_attr = TRUE;
519 vm_object_unlock(object);
520
521 vm_map_set_cache_attr_count++;
522done:
523 vm_map_unlock_read(map);
524
525 return kr;
526}
527
528
593a1d5f
A
529#if CONFIG_CODE_DECRYPTION
530/*
531 * vm_map_apple_protected:
532 * This remaps the requested part of the object with an object backed by
533 * the decrypting pager.
534 * crypt_info contains entry points and session data for the crypt module.
535 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
536 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
537 */
0c530ab8
A
538kern_return_t
539vm_map_apple_protected(
3e170ce0
A
540 vm_map_t map,
541 vm_map_offset_t start,
542 vm_map_offset_t end,
543 vm_object_offset_t crypto_backing_offset,
593a1d5f 544 struct pager_crypt_info *crypt_info)
0c530ab8
A
545{
546 boolean_t map_locked;
547 kern_return_t kr;
548 vm_map_entry_t map_entry;
3e170ce0
A
549 struct vm_map_entry tmp_entry;
550 memory_object_t unprotected_mem_obj;
0c530ab8
A
551 vm_object_t protected_object;
552 vm_map_offset_t map_addr;
3e170ce0
A
553 vm_map_offset_t start_aligned, end_aligned;
554 vm_object_offset_t crypto_start, crypto_end;
555 int vm_flags;
0c530ab8 556
3e170ce0
A
557 map_locked = FALSE;
558 unprotected_mem_obj = MEMORY_OBJECT_NULL;
0c530ab8 559
3e170ce0
A
560 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
561 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
562 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
563 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
b0d623f7 564
3e170ce0
A
565 assert(start_aligned == start);
566 assert(end_aligned == end);
b0d623f7 567
3e170ce0
A
568 map_addr = start_aligned;
569 for (map_addr = start_aligned;
570 map_addr < end;
571 map_addr = tmp_entry.vme_end) {
572 vm_map_lock(map);
573 map_locked = TRUE;
b0d623f7 574
3e170ce0
A
575 /* lookup the protected VM object */
576 if (!vm_map_lookup_entry(map,
577 map_addr,
578 &map_entry) ||
579 map_entry->is_sub_map ||
580 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
581 !(map_entry->protection & VM_PROT_EXECUTE)) {
582 /* that memory is not properly mapped */
583 kr = KERN_INVALID_ARGUMENT;
584 goto done;
585 }
b0d623f7 586
3e170ce0
A
587 /* get the protected object to be decrypted */
588 protected_object = VME_OBJECT(map_entry);
589 if (protected_object == VM_OBJECT_NULL) {
590 /* there should be a VM object here at this point */
591 kr = KERN_INVALID_ARGUMENT;
592 goto done;
593 }
594 /* ensure protected object stays alive while map is unlocked */
595 vm_object_reference(protected_object);
596
597 /* limit the map entry to the area we want to cover */
598 vm_map_clip_start(map, map_entry, start_aligned);
599 vm_map_clip_end(map, map_entry, end_aligned);
600
601 tmp_entry = *map_entry;
602 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
603 vm_map_unlock(map);
604 map_locked = FALSE;
605
606 /*
607 * This map entry might be only partially encrypted
608 * (if not fully "page-aligned").
609 */
610 crypto_start = 0;
611 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
612 if (tmp_entry.vme_start < start) {
613 if (tmp_entry.vme_start != start_aligned) {
614 kr = KERN_INVALID_ADDRESS;
615 }
616 crypto_start += (start - tmp_entry.vme_start);
617 }
618 if (tmp_entry.vme_end > end) {
619 if (tmp_entry.vme_end != end_aligned) {
620 kr = KERN_INVALID_ADDRESS;
621 }
622 crypto_end -= (tmp_entry.vme_end - end);
623 }
624
625 /*
626 * This "extra backing offset" is needed to get the decryption
627 * routine to use the right key. It adjusts for the possibly
628 * relative offset of an interposed "4K" pager...
629 */
630 if (crypto_backing_offset == (vm_object_offset_t) -1) {
631 crypto_backing_offset = VME_OFFSET(&tmp_entry);
632 }
0c530ab8 633
3e170ce0
A
634 /*
635 * Lookup (and create if necessary) the protected memory object
636 * matching that VM object.
637 * If successful, this also grabs a reference on the memory object,
638 * to guarantee that it doesn't go away before we get a chance to map
639 * it.
640 */
641 unprotected_mem_obj = apple_protect_pager_setup(
642 protected_object,
643 VME_OFFSET(&tmp_entry),
644 crypto_backing_offset,
645 crypt_info,
646 crypto_start,
647 crypto_end);
648
649 /* release extra ref on protected object */
650 vm_object_deallocate(protected_object);
651
652 if (unprotected_mem_obj == NULL) {
653 kr = KERN_FAILURE;
654 goto done;
655 }
656
657 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
658
659 /* map this memory object in place of the current one */
660 map_addr = tmp_entry.vme_start;
661 kr = vm_map_enter_mem_object(map,
662 &map_addr,
663 (tmp_entry.vme_end -
664 tmp_entry.vme_start),
665 (mach_vm_offset_t) 0,
666 vm_flags,
667 (ipc_port_t) unprotected_mem_obj,
668 0,
669 TRUE,
670 tmp_entry.protection,
671 tmp_entry.max_protection,
672 tmp_entry.inheritance);
673 assert(kr == KERN_SUCCESS);
674 assert(map_addr == tmp_entry.vme_start);
675
676#if VM_MAP_DEBUG_APPLE_PROTECT
39037602
A
677 if (vm_map_debug_apple_protect) {
678 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
679 " backing:[object:%p,offset:0x%llx,"
680 "crypto_backing_offset:0x%llx,"
681 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
682 map,
683 (uint64_t) map_addr,
684 (uint64_t) (map_addr + (tmp_entry.vme_end -
685 tmp_entry.vme_start)),
686 unprotected_mem_obj,
687 protected_object,
688 VME_OFFSET(&tmp_entry),
689 crypto_backing_offset,
690 crypto_start,
691 crypto_end);
692 }
3e170ce0
A
693#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
694
695 /*
696 * Release the reference obtained by
697 * apple_protect_pager_setup().
698 * The mapping (if it succeeded) is now holding a reference on
699 * the memory object.
700 */
701 memory_object_deallocate(unprotected_mem_obj);
702 unprotected_mem_obj = MEMORY_OBJECT_NULL;
703
704 /* continue with next map entry */
705 crypto_backing_offset += (tmp_entry.vme_end -
706 tmp_entry.vme_start);
707 crypto_backing_offset -= crypto_start;
708 }
709 kr = KERN_SUCCESS;
0c530ab8
A
710
711done:
712 if (map_locked) {
3e170ce0 713 vm_map_unlock(map);
0c530ab8
A
714 }
715 return kr;
716}
593a1d5f 717#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
718
719
b0d623f7
A
720lck_grp_t vm_map_lck_grp;
721lck_grp_attr_t vm_map_lck_grp_attr;
722lck_attr_t vm_map_lck_attr;
fe8ab488 723lck_attr_t vm_map_lck_rw_attr;
b0d623f7
A
724
725
593a1d5f
A
726/*
727 * vm_map_init:
728 *
729 * Initialize the vm_map module. Must be called before
730 * any other vm_map routines.
731 *
732 * Map and entry structures are allocated from zones -- we must
733 * initialize those zones.
734 *
735 * There are three zones of interest:
736 *
737 * vm_map_zone: used to allocate maps.
738 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 739 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
740 *
741 * The kernel allocates map entries from a special zone that is initially
742 * "crammed" with memory. It would be difficult (perhaps impossible) for
743 * the kernel to allocate more memory to a entry zone when it became
744 * empty since the very act of allocating memory implies the creation
745 * of a new entry.
746 */
1c79356b
A
747void
748vm_map_init(
749 void)
750{
7ddcb079 751 vm_size_t entry_zone_alloc_size;
316670eb
A
752 const char *mez_name = "VM map entries";
753
2d21ac55
A
754 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
755 PAGE_SIZE, "maps");
0b4c1975 756 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
757#if defined(__LP64__)
758 entry_zone_alloc_size = PAGE_SIZE * 5;
759#else
760 entry_zone_alloc_size = PAGE_SIZE * 6;
761#endif
91447636 762 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 763 1024*1024, entry_zone_alloc_size,
316670eb 764 mez_name);
0b4c1975 765 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 766 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 767 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 768
7ddcb079
A
769 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
770 kentry_data_size * 64, kentry_data_size,
771 "Reserved VM map entries");
772 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
1c79356b 773
91447636 774 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 775 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 776 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b 777
3e170ce0
A
778 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
779 16*1024, PAGE_SIZE, "VM map holes");
780 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
781
1c79356b
A
782 /*
783 * Cram the map and kentry zones with initial data.
7ddcb079 784 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
785 */
786 zone_change(vm_map_zone, Z_COLLECT, FALSE);
39037602 787 zone_change(vm_map_zone, Z_FOREIGN, TRUE);
7ddcb079
A
788
789 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
790 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
791 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
792 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
793 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 794 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 795 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 796
3e170ce0
A
797 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
798 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
799 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
800 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
801 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
802 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
803
804 /*
805 * Add the stolen memory to zones, adjust zone size and stolen counts.
806 */
7ddcb079
A
807 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
808 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
3e170ce0
A
809 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
810 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
811
b0d623f7
A
812 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
813 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
814 lck_attr_setdefault(&vm_map_lck_attr);
316670eb 815
fe8ab488
A
816 lck_attr_setdefault(&vm_map_lck_rw_attr);
817 lck_attr_cleardebug(&vm_map_lck_rw_attr);
818
39037602
A
819#if VM_MAP_DEBUG_APPLE_PROTECT
820 PE_parse_boot_argn("vm_map_debug_apple_protect",
821 &vm_map_debug_apple_protect,
822 sizeof(vm_map_debug_apple_protect));
823#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
824#if VM_MAP_DEBUG_APPLE_FOURK
825 PE_parse_boot_argn("vm_map_debug_fourk",
826 &vm_map_debug_fourk,
827 sizeof(vm_map_debug_fourk));
828#endif /* VM_MAP_DEBUG_FOURK */
1c79356b
A
829}
830
831void
832vm_map_steal_memory(
833 void)
834{
7ddcb079
A
835 uint32_t kentry_initial_pages;
836
b0d623f7 837 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
838 map_data = pmap_steal_memory(map_data_size);
839
1c79356b 840 /*
7ddcb079
A
841 * kentry_initial_pages corresponds to the number of kernel map entries
842 * required during bootstrap until the asynchronous replenishment
843 * scheme is activated and/or entries are available from the general
844 * map entry pool.
1c79356b 845 */
7ddcb079
A
846#if defined(__LP64__)
847 kentry_initial_pages = 10;
848#else
849 kentry_initial_pages = 6;
1c79356b 850#endif
316670eb
A
851
852#if CONFIG_GZALLOC
853 /* If using the guard allocator, reserve more memory for the kernel
854 * reserved map entry pool.
855 */
856 if (gzalloc_enabled())
857 kentry_initial_pages *= 1024;
858#endif
859
7ddcb079 860 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b 861 kentry_data = pmap_steal_memory(kentry_data_size);
3e170ce0
A
862
863 map_holes_data_size = kentry_data_size;
864 map_holes_data = pmap_steal_memory(map_holes_data_size);
1c79356b
A
865}
866
3e170ce0
A
867void
868vm_kernel_reserved_entry_init(void) {
7ddcb079 869 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
3e170ce0
A
870 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
871}
872
873void
874vm_map_disable_hole_optimization(vm_map_t map)
875{
876 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
877
878 if (map->holelistenabled) {
879
880 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
881
882 while (hole_entry != NULL) {
883
884 next_hole_entry = hole_entry->vme_next;
885
886 hole_entry->vme_next = NULL;
887 hole_entry->vme_prev = NULL;
888 zfree(vm_map_holes_zone, hole_entry);
889
890 if (next_hole_entry == head_entry) {
891 hole_entry = NULL;
892 } else {
893 hole_entry = next_hole_entry;
894 }
895 }
896
897 map->holes_list = NULL;
898 map->holelistenabled = FALSE;
899
900 map->first_free = vm_map_first_entry(map);
901 SAVE_HINT_HOLE_WRITE(map, NULL);
902 }
903}
904
905boolean_t
906vm_kernel_map_is_kernel(vm_map_t map) {
907 return (map->pmap == kernel_pmap);
7ddcb079
A
908}
909
1c79356b
A
910/*
911 * vm_map_create:
912 *
913 * Creates and returns a new empty VM map with
914 * the given physical map structure, and having
915 * the given lower and upper address bounds.
916 */
3e170ce0
A
917
918boolean_t vm_map_supports_hole_optimization = TRUE;
919
1c79356b
A
920vm_map_t
921vm_map_create(
91447636
A
922 pmap_t pmap,
923 vm_map_offset_t min,
924 vm_map_offset_t max,
925 boolean_t pageable)
1c79356b 926{
2d21ac55 927 static int color_seed = 0;
39037602 928 vm_map_t result;
3e170ce0 929 struct vm_map_links *hole_entry = NULL;
1c79356b
A
930
931 result = (vm_map_t) zalloc(vm_map_zone);
932 if (result == VM_MAP_NULL)
933 panic("vm_map_create");
934
935 vm_map_first_entry(result) = vm_map_to_entry(result);
936 vm_map_last_entry(result) = vm_map_to_entry(result);
937 result->hdr.nentries = 0;
938 result->hdr.entries_pageable = pageable;
939
6d2010ae
A
940 vm_map_store_init( &(result->hdr) );
941
39236c6e
A
942 result->hdr.page_shift = PAGE_SHIFT;
943
1c79356b 944 result->size = 0;
2d21ac55
A
945 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
946 result->user_wire_size = 0;
1c79356b
A
947 result->ref_count = 1;
948#if TASK_SWAPPER
949 result->res_count = 1;
950 result->sw_state = MAP_SW_IN;
951#endif /* TASK_SWAPPER */
952 result->pmap = pmap;
953 result->min_offset = min;
954 result->max_offset = max;
955 result->wiring_required = FALSE;
956 result->no_zero_fill = FALSE;
316670eb 957 result->mapped_in_other_pmaps = FALSE;
1c79356b 958 result->wait_for_space = FALSE;
b0d623f7 959 result->switch_protect = FALSE;
6d2010ae
A
960 result->disable_vmentry_reuse = FALSE;
961 result->map_disallow_data_exec = FALSE;
39037602 962 result->is_nested_map = FALSE;
6d2010ae 963 result->highest_entry_end = 0;
1c79356b
A
964 result->first_free = vm_map_to_entry(result);
965 result->hint = vm_map_to_entry(result);
2d21ac55 966 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae 967 result->jit_entry_exists = FALSE;
3e170ce0
A
968
969 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
970 hole_entry = zalloc(vm_map_holes_zone);
971
972 hole_entry->start = min;
973 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
974 result->holes_list = result->hole_hint = hole_entry;
975 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
976 result->holelistenabled = TRUE;
977
978 } else {
979
980 result->holelistenabled = FALSE;
981 }
982
1c79356b 983 vm_map_lock_init(result);
b0d623f7
A
984 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
985
1c79356b
A
986 return(result);
987}
988
989/*
990 * vm_map_entry_create: [ internal use only ]
991 *
992 * Allocates a VM map entry for insertion in the
993 * given map (or map copy). No fields are filled.
994 */
7ddcb079 995#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 996
7ddcb079
A
997#define vm_map_copy_entry_create(copy, map_locked) \
998 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
999unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 1000
91447636 1001static vm_map_entry_t
1c79356b 1002_vm_map_entry_create(
7ddcb079 1003 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 1004{
7ddcb079
A
1005 zone_t zone;
1006 vm_map_entry_t entry;
1c79356b 1007
7ddcb079
A
1008 zone = vm_map_entry_zone;
1009
1010 assert(map_header->entries_pageable ? !map_locked : TRUE);
1011
1012 if (map_header->entries_pageable) {
1013 entry = (vm_map_entry_t) zalloc(zone);
1014 }
1015 else {
1016 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1017
1018 if (entry == VM_MAP_ENTRY_NULL) {
1019 zone = vm_map_entry_reserved_zone;
1020 entry = (vm_map_entry_t) zalloc(zone);
1021 OSAddAtomic(1, &reserved_zalloc_count);
1022 } else
1023 OSAddAtomic(1, &nonreserved_zalloc_count);
1024 }
1c79356b 1025
1c79356b
A
1026 if (entry == VM_MAP_ENTRY_NULL)
1027 panic("vm_map_entry_create");
7ddcb079
A
1028 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1029
6d2010ae 1030 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 1031#if MAP_ENTRY_CREATION_DEBUG
39236c6e 1032 entry->vme_creation_maphdr = map_header;
39037602
A
1033 backtrace(&entry->vme_creation_bt[0],
1034 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 1035#endif
1c79356b
A
1036 return(entry);
1037}
1038
1039/*
1040 * vm_map_entry_dispose: [ internal use only ]
1041 *
1042 * Inverse of vm_map_entry_create.
2d21ac55
A
1043 *
1044 * write map lock held so no need to
1045 * do anything special to insure correctness
1046 * of the stores
1c79356b
A
1047 */
1048#define vm_map_entry_dispose(map, entry) \
6d2010ae 1049 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
1050
1051#define vm_map_copy_entry_dispose(map, entry) \
1052 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1053
91447636 1054static void
1c79356b 1055_vm_map_entry_dispose(
39037602
A
1056 struct vm_map_header *map_header,
1057 vm_map_entry_t entry)
1c79356b 1058{
39037602 1059 zone_t zone;
1c79356b 1060
7ddcb079 1061 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 1062 zone = vm_map_entry_zone;
1c79356b 1063 else
7ddcb079
A
1064 zone = vm_map_entry_reserved_zone;
1065
1066 if (!map_header->entries_pageable) {
1067 if (zone == vm_map_entry_zone)
1068 OSAddAtomic(-1, &nonreserved_zalloc_count);
1069 else
1070 OSAddAtomic(-1, &reserved_zalloc_count);
1071 }
1c79356b 1072
91447636 1073 zfree(zone, entry);
1c79356b
A
1074}
1075
91447636 1076#if MACH_ASSERT
91447636 1077static boolean_t first_free_check = FALSE;
6d2010ae 1078boolean_t
1c79356b
A
1079first_free_is_valid(
1080 vm_map_t map)
1081{
1c79356b
A
1082 if (!first_free_check)
1083 return TRUE;
2d21ac55 1084
6d2010ae 1085 return( first_free_is_valid_store( map ));
1c79356b 1086}
91447636 1087#endif /* MACH_ASSERT */
1c79356b 1088
1c79356b
A
1089
1090#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 1091 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
1092
1093#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 1094 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 1095
1c79356b 1096#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
1097/*
1098 * vm_map_res_reference:
1099 *
1100 * Adds another valid residence count to the given map.
1101 *
1102 * Map is locked so this function can be called from
1103 * vm_map_swapin.
1104 *
1105 */
39037602 1106void vm_map_res_reference(vm_map_t map)
1c79356b
A
1107{
1108 /* assert map is locked */
1109 assert(map->res_count >= 0);
1110 assert(map->ref_count >= map->res_count);
1111 if (map->res_count == 0) {
b0d623f7 1112 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1113 vm_map_lock(map);
1114 vm_map_swapin(map);
b0d623f7 1115 lck_mtx_lock(&map->s_lock);
1c79356b
A
1116 ++map->res_count;
1117 vm_map_unlock(map);
1118 } else
1119 ++map->res_count;
1120}
1121
1122/*
1123 * vm_map_reference_swap:
1124 *
1125 * Adds valid reference and residence counts to the given map.
1126 *
1127 * The map may not be in memory (i.e. zero residence count).
1128 *
1129 */
39037602 1130void vm_map_reference_swap(vm_map_t map)
1c79356b
A
1131{
1132 assert(map != VM_MAP_NULL);
b0d623f7 1133 lck_mtx_lock(&map->s_lock);
1c79356b
A
1134 assert(map->res_count >= 0);
1135 assert(map->ref_count >= map->res_count);
1136 map->ref_count++;
1137 vm_map_res_reference(map);
b0d623f7 1138 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1139}
1140
1141/*
1142 * vm_map_res_deallocate:
1143 *
1144 * Decrement residence count on a map; possibly causing swapout.
1145 *
1146 * The map must be in memory (i.e. non-zero residence count).
1147 *
1148 * The map is locked, so this function is callable from vm_map_deallocate.
1149 *
1150 */
39037602 1151void vm_map_res_deallocate(vm_map_t map)
1c79356b
A
1152{
1153 assert(map->res_count > 0);
1154 if (--map->res_count == 0) {
b0d623f7 1155 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1156 vm_map_lock(map);
1157 vm_map_swapout(map);
1158 vm_map_unlock(map);
b0d623f7 1159 lck_mtx_lock(&map->s_lock);
1c79356b
A
1160 }
1161 assert(map->ref_count >= map->res_count);
1162}
1163#endif /* MACH_ASSERT && TASK_SWAPPER */
1164
1c79356b
A
1165/*
1166 * vm_map_destroy:
1167 *
1168 * Actually destroy a map.
1169 */
1170void
1171vm_map_destroy(
2d21ac55
A
1172 vm_map_t map,
1173 int flags)
91447636 1174{
1c79356b 1175 vm_map_lock(map);
2d21ac55 1176
3e170ce0
A
1177 /* final cleanup: no need to unnest shared region */
1178 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1179
2d21ac55
A
1180 /* clean up regular map entries */
1181 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1182 flags, VM_MAP_NULL);
1183 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
1184 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1185 flags, VM_MAP_NULL);
6d2010ae 1186
3e170ce0 1187 vm_map_disable_hole_optimization(map);
1c79356b
A
1188 vm_map_unlock(map);
1189
2d21ac55
A
1190 assert(map->hdr.nentries == 0);
1191
55e303ae
A
1192 if(map->pmap)
1193 pmap_destroy(map->pmap);
1c79356b 1194
39037602
A
1195 if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1196 /*
1197 * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1198 * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1199 * structure or kalloc'ed via lck_mtx_init.
1200 * An example is s_lock_ext within struct _vm_map.
1201 *
1202 * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1203 * can add another tag to detect embedded vs alloc'ed indirect external
1204 * mutexes but that'll be additional checks in the lock path and require
1205 * updating dependencies for the old vs new tag.
1206 *
1207 * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1208 * just when lock debugging is ON, we choose to forego explicitly destroying
1209 * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1210 * count on vm_map_lck_grp, which has no serious side-effect.
1211 */
1212 } else {
1213 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1214 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1215 }
1216
91447636 1217 zfree(vm_map_zone, map);
1c79356b
A
1218}
1219
1220#if TASK_SWAPPER
1221/*
1222 * vm_map_swapin/vm_map_swapout
1223 *
1224 * Swap a map in and out, either referencing or releasing its resources.
1225 * These functions are internal use only; however, they must be exported
1226 * because they may be called from macros, which are exported.
1227 *
1228 * In the case of swapout, there could be races on the residence count,
1229 * so if the residence count is up, we return, assuming that a
1230 * vm_map_deallocate() call in the near future will bring us back.
1231 *
1232 * Locking:
1233 * -- We use the map write lock for synchronization among races.
1234 * -- The map write lock, and not the simple s_lock, protects the
1235 * swap state of the map.
1236 * -- If a map entry is a share map, then we hold both locks, in
1237 * hierarchical order.
1238 *
1239 * Synchronization Notes:
1240 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1241 * will block on the map lock and proceed when swapout is through.
1242 * 2) A vm_map_reference() call at this time is illegal, and will
1243 * cause a panic. vm_map_reference() is only allowed on resident
1244 * maps, since it refuses to block.
1245 * 3) A vm_map_swapin() call during a swapin will block, and
1246 * proceeed when the first swapin is done, turning into a nop.
1247 * This is the reason the res_count is not incremented until
1248 * after the swapin is complete.
1249 * 4) There is a timing hole after the checks of the res_count, before
1250 * the map lock is taken, during which a swapin may get the lock
1251 * before a swapout about to happen. If this happens, the swapin
1252 * will detect the state and increment the reference count, causing
1253 * the swapout to be a nop, thereby delaying it until a later
1254 * vm_map_deallocate. If the swapout gets the lock first, then
1255 * the swapin will simply block until the swapout is done, and
1256 * then proceed.
1257 *
1258 * Because vm_map_swapin() is potentially an expensive operation, it
1259 * should be used with caution.
1260 *
1261 * Invariants:
1262 * 1) A map with a residence count of zero is either swapped, or
1263 * being swapped.
1264 * 2) A map with a non-zero residence count is either resident,
1265 * or being swapped in.
1266 */
1267
1268int vm_map_swap_enable = 1;
1269
1270void vm_map_swapin (vm_map_t map)
1271{
39037602 1272 vm_map_entry_t entry;
2d21ac55 1273
1c79356b
A
1274 if (!vm_map_swap_enable) /* debug */
1275 return;
1276
1277 /*
1278 * Map is locked
1279 * First deal with various races.
1280 */
1281 if (map->sw_state == MAP_SW_IN)
1282 /*
1283 * we raced with swapout and won. Returning will incr.
1284 * the res_count, turning the swapout into a nop.
1285 */
1286 return;
1287
1288 /*
1289 * The residence count must be zero. If we raced with another
1290 * swapin, the state would have been IN; if we raced with a
1291 * swapout (after another competing swapin), we must have lost
1292 * the race to get here (see above comment), in which case
1293 * res_count is still 0.
1294 */
1295 assert(map->res_count == 0);
1296
1297 /*
1298 * There are no intermediate states of a map going out or
1299 * coming in, since the map is locked during the transition.
1300 */
1301 assert(map->sw_state == MAP_SW_OUT);
1302
1303 /*
1304 * We now operate upon each map entry. If the entry is a sub-
1305 * or share-map, we call vm_map_res_reference upon it.
1306 * If the entry is an object, we call vm_object_res_reference
1307 * (this may iterate through the shadow chain).
1308 * Note that we hold the map locked the entire time,
1309 * even if we get back here via a recursive call in
1310 * vm_map_res_reference.
1311 */
1312 entry = vm_map_first_entry(map);
1313
1314 while (entry != vm_map_to_entry(map)) {
3e170ce0 1315 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1316 if (entry->is_sub_map) {
3e170ce0 1317 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1318 lck_mtx_lock(&lmap->s_lock);
1c79356b 1319 vm_map_res_reference(lmap);
b0d623f7 1320 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1321 } else {
3e170ce0 1322 vm_object_t object = VME_OBEJCT(entry);
1c79356b
A
1323 vm_object_lock(object);
1324 /*
1325 * This call may iterate through the
1326 * shadow chain.
1327 */
1328 vm_object_res_reference(object);
1329 vm_object_unlock(object);
1330 }
1331 }
1332 entry = entry->vme_next;
1333 }
1334 assert(map->sw_state == MAP_SW_OUT);
1335 map->sw_state = MAP_SW_IN;
1336}
1337
1338void vm_map_swapout(vm_map_t map)
1339{
39037602 1340 vm_map_entry_t entry;
1c79356b
A
1341
1342 /*
1343 * Map is locked
1344 * First deal with various races.
1345 * If we raced with a swapin and lost, the residence count
1346 * will have been incremented to 1, and we simply return.
1347 */
b0d623f7 1348 lck_mtx_lock(&map->s_lock);
1c79356b 1349 if (map->res_count != 0) {
b0d623f7 1350 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1351 return;
1352 }
b0d623f7 1353 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1354
1355 /*
1356 * There are no intermediate states of a map going out or
1357 * coming in, since the map is locked during the transition.
1358 */
1359 assert(map->sw_state == MAP_SW_IN);
1360
1361 if (!vm_map_swap_enable)
1362 return;
1363
1364 /*
1365 * We now operate upon each map entry. If the entry is a sub-
1366 * or share-map, we call vm_map_res_deallocate upon it.
1367 * If the entry is an object, we call vm_object_res_deallocate
1368 * (this may iterate through the shadow chain).
1369 * Note that we hold the map locked the entire time,
1370 * even if we get back here via a recursive call in
1371 * vm_map_res_deallocate.
1372 */
1373 entry = vm_map_first_entry(map);
1374
1375 while (entry != vm_map_to_entry(map)) {
3e170ce0 1376 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1c79356b 1377 if (entry->is_sub_map) {
3e170ce0 1378 vm_map_t lmap = VME_SUBMAP(entry);
b0d623f7 1379 lck_mtx_lock(&lmap->s_lock);
1c79356b 1380 vm_map_res_deallocate(lmap);
b0d623f7 1381 lck_mtx_unlock(&lmap->s_lock);
1c79356b 1382 } else {
3e170ce0 1383 vm_object_t object = VME_OBJECT(entry);
1c79356b
A
1384 vm_object_lock(object);
1385 /*
1386 * This call may take a long time,
1387 * since it could actively push
1388 * out pages (if we implement it
1389 * that way).
1390 */
1391 vm_object_res_deallocate(object);
1392 vm_object_unlock(object);
1393 }
1394 }
1395 entry = entry->vme_next;
1396 }
1397 assert(map->sw_state == MAP_SW_IN);
1398 map->sw_state = MAP_SW_OUT;
1399}
1400
1401#endif /* TASK_SWAPPER */
1402
1c79356b
A
1403/*
1404 * vm_map_lookup_entry: [ internal use only ]
1405 *
6d2010ae
A
1406 * Calls into the vm map store layer to find the map
1407 * entry containing (or immediately preceding) the
1408 * specified address in the given map; the entry is returned
1c79356b
A
1409 * in the "entry" parameter. The boolean
1410 * result indicates whether the address is
1411 * actually contained in the map.
1412 */
1413boolean_t
1414vm_map_lookup_entry(
39037602
A
1415 vm_map_t map,
1416 vm_map_offset_t address,
1c79356b
A
1417 vm_map_entry_t *entry) /* OUT */
1418{
6d2010ae 1419 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1420}
1421
1422/*
1423 * Routine: vm_map_find_space
1424 * Purpose:
1425 * Allocate a range in the specified virtual address map,
1426 * returning the entry allocated for that range.
1427 * Used by kmem_alloc, etc.
1428 *
1429 * The map must be NOT be locked. It will be returned locked
1430 * on KERN_SUCCESS, unlocked on failure.
1431 *
1432 * If an entry is allocated, the object/offset fields
1433 * are initialized to zero.
1434 */
1435kern_return_t
1436vm_map_find_space(
39037602 1437 vm_map_t map,
91447636
A
1438 vm_map_offset_t *address, /* OUT */
1439 vm_map_size_t size,
1440 vm_map_offset_t mask,
0c530ab8 1441 int flags,
1c79356b
A
1442 vm_map_entry_t *o_entry) /* OUT */
1443{
3e170ce0 1444 vm_map_entry_t entry, new_entry;
39037602
A
1445 vm_map_offset_t start;
1446 vm_map_offset_t end;
3e170ce0 1447 vm_map_entry_t hole_entry;
91447636
A
1448
1449 if (size == 0) {
1450 *address = 0;
1451 return KERN_INVALID_ARGUMENT;
1452 }
1c79356b 1453
2d21ac55
A
1454 if (flags & VM_FLAGS_GUARD_AFTER) {
1455 /* account for the back guard page in the size */
39236c6e 1456 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1457 }
1458
7ddcb079 1459 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1460
1461 /*
1462 * Look for the first possible address; if there's already
1463 * something at this address, we have to start after it.
1464 */
1465
1466 vm_map_lock(map);
1467
6d2010ae
A
1468 if( map->disable_vmentry_reuse == TRUE) {
1469 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1470 } else {
3e170ce0
A
1471 if (map->holelistenabled) {
1472 hole_entry = (vm_map_entry_t)map->holes_list;
1473
1474 if (hole_entry == NULL) {
1475 /*
1476 * No more space in the map?
1477 */
1478 vm_map_entry_dispose(map, new_entry);
1479 vm_map_unlock(map);
1480 return(KERN_NO_SPACE);
1481 }
1482
1483 entry = hole_entry;
1484 start = entry->vme_start;
1485 } else {
1486 assert(first_free_is_valid(map));
1487 if ((entry = map->first_free) == vm_map_to_entry(map))
1488 start = map->min_offset;
1489 else
1490 start = entry->vme_end;
1491 }
6d2010ae 1492 }
1c79356b
A
1493
1494 /*
1495 * In any case, the "entry" always precedes
1496 * the proposed new region throughout the loop:
1497 */
1498
1499 while (TRUE) {
39037602 1500 vm_map_entry_t next;
1c79356b
A
1501
1502 /*
1503 * Find the end of the proposed new region.
1504 * Be sure we didn't go beyond the end, or
1505 * wrap around the address.
1506 */
1507
2d21ac55
A
1508 if (flags & VM_FLAGS_GUARD_BEFORE) {
1509 /* reserve space for the front guard page */
39236c6e 1510 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1511 }
1c79356b 1512 end = ((start + mask) & ~mask);
2d21ac55 1513
1c79356b
A
1514 if (end < start) {
1515 vm_map_entry_dispose(map, new_entry);
1516 vm_map_unlock(map);
1517 return(KERN_NO_SPACE);
1518 }
1519 start = end;
1520 end += size;
1521
1522 if ((end > map->max_offset) || (end < start)) {
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527
1c79356b 1528 next = entry->vme_next;
1c79356b 1529
3e170ce0
A
1530 if (map->holelistenabled) {
1531 if (entry->vme_end >= end)
1532 break;
1533 } else {
1534 /*
1535 * If there are no more entries, we must win.
1536 *
1537 * OR
1538 *
1539 * If there is another entry, it must be
1540 * after the end of the potential new region.
1541 */
1c79356b 1542
3e170ce0
A
1543 if (next == vm_map_to_entry(map))
1544 break;
1545
1546 if (next->vme_start >= end)
1547 break;
1548 }
1c79356b
A
1549
1550 /*
1551 * Didn't fit -- move to the next entry.
1552 */
1553
1554 entry = next;
3e170ce0
A
1555
1556 if (map->holelistenabled) {
1557 if (entry == (vm_map_entry_t) map->holes_list) {
1558 /*
1559 * Wrapped around
1560 */
1561 vm_map_entry_dispose(map, new_entry);
1562 vm_map_unlock(map);
1563 return(KERN_NO_SPACE);
1564 }
1565 start = entry->vme_start;
1566 } else {
1567 start = entry->vme_end;
1568 }
1569 }
1570
1571 if (map->holelistenabled) {
1572 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1573 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1574 }
1c79356b
A
1575 }
1576
1577 /*
1578 * At this point,
1579 * "start" and "end" should define the endpoints of the
1580 * available new range, and
1581 * "entry" should refer to the region before the new
1582 * range, and
1583 *
1584 * the map should be locked.
1585 */
1586
2d21ac55
A
1587 if (flags & VM_FLAGS_GUARD_BEFORE) {
1588 /* go back for the front guard page */
39236c6e 1589 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1590 }
1c79356b
A
1591 *address = start;
1592
e2d2fc5c 1593 assert(start < end);
1c79356b
A
1594 new_entry->vme_start = start;
1595 new_entry->vme_end = end;
1596 assert(page_aligned(new_entry->vme_start));
1597 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1598 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1599 VM_MAP_PAGE_MASK(map)));
1600 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1601 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1602
1603 new_entry->is_shared = FALSE;
1604 new_entry->is_sub_map = FALSE;
fe8ab488 1605 new_entry->use_pmap = TRUE;
3e170ce0
A
1606 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1607 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1c79356b
A
1608
1609 new_entry->needs_copy = FALSE;
1610
1611 new_entry->inheritance = VM_INHERIT_DEFAULT;
1612 new_entry->protection = VM_PROT_DEFAULT;
1613 new_entry->max_protection = VM_PROT_ALL;
1614 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1615 new_entry->wired_count = 0;
1616 new_entry->user_wired_count = 0;
1617
1618 new_entry->in_transition = FALSE;
1619 new_entry->needs_wakeup = FALSE;
2d21ac55 1620 new_entry->no_cache = FALSE;
b0d623f7 1621 new_entry->permanent = FALSE;
39236c6e
A
1622 new_entry->superpage_size = FALSE;
1623 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1624 new_entry->map_aligned = TRUE;
1625 } else {
1626 new_entry->map_aligned = FALSE;
1627 }
2d21ac55 1628
3e170ce0 1629 new_entry->used_for_jit = FALSE;
b0d623f7 1630 new_entry->zero_wired_pages = FALSE;
fe8ab488 1631 new_entry->iokit_acct = FALSE;
3e170ce0
A
1632 new_entry->vme_resilient_codesign = FALSE;
1633 new_entry->vme_resilient_media = FALSE;
39037602
A
1634 if (flags & VM_FLAGS_ATOMIC_ENTRY)
1635 new_entry->vme_atomic = TRUE;
1636 else
1637 new_entry->vme_atomic = FALSE;
1c79356b 1638
3e170ce0
A
1639 int alias;
1640 VM_GET_FLAGS_ALIAS(flags, alias);
1641 VME_ALIAS_SET(new_entry, alias);
0c530ab8 1642
1c79356b
A
1643 /*
1644 * Insert the new entry into the list
1645 */
1646
6d2010ae 1647 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1648
1649 map->size += size;
1650
1651 /*
1652 * Update the lookup hint
1653 */
0c530ab8 1654 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1655
1656 *o_entry = new_entry;
1657 return(KERN_SUCCESS);
1658}
1659
1660int vm_map_pmap_enter_print = FALSE;
1661int vm_map_pmap_enter_enable = FALSE;
1662
1663/*
91447636 1664 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1665 *
1666 * Description:
1667 * Force pages from the specified object to be entered into
1668 * the pmap at the specified address if they are present.
1669 * As soon as a page not found in the object the scan ends.
1670 *
1671 * Returns:
1672 * Nothing.
1673 *
1674 * In/out conditions:
1675 * The source map should not be locked on entry.
1676 */
fe8ab488 1677__unused static void
1c79356b
A
1678vm_map_pmap_enter(
1679 vm_map_t map,
39037602
A
1680 vm_map_offset_t addr,
1681 vm_map_offset_t end_addr,
1682 vm_object_t object,
1c79356b
A
1683 vm_object_offset_t offset,
1684 vm_prot_t protection)
1685{
2d21ac55
A
1686 int type_of_fault;
1687 kern_return_t kr;
0b4e3aa0 1688
55e303ae
A
1689 if(map->pmap == 0)
1690 return;
1691
1c79356b 1692 while (addr < end_addr) {
39037602 1693 vm_page_t m;
1c79356b 1694
fe8ab488
A
1695
1696 /*
1697 * TODO:
1698 * From vm_map_enter(), we come into this function without the map
1699 * lock held or the object lock held.
1700 * We haven't taken a reference on the object either.
1701 * We should do a proper lookup on the map to make sure
1702 * that things are sane before we go locking objects that
1703 * could have been deallocated from under us.
1704 */
1705
1c79356b 1706 vm_object_lock(object);
1c79356b
A
1707
1708 m = vm_page_lookup(object, offset);
91447636
A
1709 /*
1710 * ENCRYPTED SWAP:
1711 * The user should never see encrypted data, so do not
1712 * enter an encrypted page in the page table.
1713 */
1714 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1715 m->fictitious ||
1716 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1717 vm_object_unlock(object);
1718 return;
1719 }
1720
1c79356b
A
1721 if (vm_map_pmap_enter_print) {
1722 printf("vm_map_pmap_enter:");
2d21ac55
A
1723 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1724 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1725 }
2d21ac55 1726 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1727 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
fe8ab488
A
1728 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1729 0, /* XXX need user tag / alias? */
1730 0, /* alternate accounting? */
1731 NULL,
2d21ac55 1732 &type_of_fault);
1c79356b 1733
1c79356b
A
1734 vm_object_unlock(object);
1735
1736 offset += PAGE_SIZE_64;
1737 addr += PAGE_SIZE;
1738 }
1739}
1740
91447636
A
1741boolean_t vm_map_pmap_is_empty(
1742 vm_map_t map,
1743 vm_map_offset_t start,
1744 vm_map_offset_t end);
1745boolean_t vm_map_pmap_is_empty(
1746 vm_map_t map,
1747 vm_map_offset_t start,
1748 vm_map_offset_t end)
1749{
2d21ac55
A
1750#ifdef MACHINE_PMAP_IS_EMPTY
1751 return pmap_is_empty(map->pmap, start, end);
1752#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1753 vm_map_offset_t offset;
1754 ppnum_t phys_page;
1755
1756 if (map->pmap == NULL) {
1757 return TRUE;
1758 }
2d21ac55 1759
91447636
A
1760 for (offset = start;
1761 offset < end;
1762 offset += PAGE_SIZE) {
1763 phys_page = pmap_find_phys(map->pmap, offset);
1764 if (phys_page) {
1765 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1766 "page %d at 0x%llx\n",
2d21ac55
A
1767 map, (long long)start, (long long)end,
1768 phys_page, (long long)offset);
91447636
A
1769 return FALSE;
1770 }
1771 }
1772 return TRUE;
2d21ac55 1773#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1774}
1775
316670eb
A
1776#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1777kern_return_t
1778vm_map_random_address_for_size(
1779 vm_map_t map,
1780 vm_map_offset_t *address,
1781 vm_map_size_t size)
1782{
1783 kern_return_t kr = KERN_SUCCESS;
1784 int tries = 0;
1785 vm_map_offset_t random_addr = 0;
1786 vm_map_offset_t hole_end;
1787
1788 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1789 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1790 vm_map_size_t vm_hole_size = 0;
1791 vm_map_size_t addr_space_size;
1792
1793 addr_space_size = vm_map_max(map) - vm_map_min(map);
1794
1795 assert(page_aligned(size));
1796
1797 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1798 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1799 random_addr = vm_map_trunc_page(
1800 vm_map_min(map) +(random_addr % addr_space_size),
1801 VM_MAP_PAGE_MASK(map));
316670eb
A
1802
1803 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1804 if (prev_entry == vm_map_to_entry(map)) {
1805 next_entry = vm_map_first_entry(map);
1806 } else {
1807 next_entry = prev_entry->vme_next;
1808 }
1809 if (next_entry == vm_map_to_entry(map)) {
1810 hole_end = vm_map_max(map);
1811 } else {
1812 hole_end = next_entry->vme_start;
1813 }
1814 vm_hole_size = hole_end - random_addr;
1815 if (vm_hole_size >= size) {
1816 *address = random_addr;
1817 break;
1818 }
1819 }
1820 tries++;
1821 }
1822
1823 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1824 kr = KERN_NO_SPACE;
1825 }
1826 return kr;
1827}
1828
1c79356b
A
1829/*
1830 * Routine: vm_map_enter
1831 *
1832 * Description:
1833 * Allocate a range in the specified virtual address map.
1834 * The resulting range will refer to memory defined by
1835 * the given memory object and offset into that object.
1836 *
1837 * Arguments are as defined in the vm_map call.
1838 */
91447636
A
1839int _map_enter_debug = 0;
1840static unsigned int vm_map_enter_restore_successes = 0;
1841static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1842kern_return_t
1843vm_map_enter(
91447636 1844 vm_map_t map,
593a1d5f 1845 vm_map_offset_t *address, /* IN/OUT */
91447636 1846 vm_map_size_t size,
593a1d5f 1847 vm_map_offset_t mask,
1c79356b
A
1848 int flags,
1849 vm_object_t object,
1850 vm_object_offset_t offset,
1851 boolean_t needs_copy,
1852 vm_prot_t cur_protection,
1853 vm_prot_t max_protection,
1854 vm_inherit_t inheritance)
1855{
91447636 1856 vm_map_entry_t entry, new_entry;
2d21ac55 1857 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1858 vm_map_offset_t end, tmp_end;
b0d623f7
A
1859 vm_map_offset_t tmp2_start, tmp2_end;
1860 vm_map_offset_t step;
1c79356b 1861 kern_return_t result = KERN_SUCCESS;
91447636
A
1862 vm_map_t zap_old_map = VM_MAP_NULL;
1863 vm_map_t zap_new_map = VM_MAP_NULL;
1864 boolean_t map_locked = FALSE;
1865 boolean_t pmap_empty = TRUE;
1866 boolean_t new_mapping_established = FALSE;
fe8ab488 1867 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
91447636
A
1868 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1869 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1870 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1871 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1872 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7 1873 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
316670eb 1874 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
fe8ab488 1875 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
3e170ce0
A
1876 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1877 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
39037602 1878 boolean_t random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
b0d623f7 1879 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3e170ce0 1880 vm_tag_t alias, user_alias;
2d21ac55 1881 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 1882 kern_return_t kr;
39236c6e 1883 boolean_t clear_map_aligned = FALSE;
3e170ce0 1884 vm_map_entry_t hole_entry;
593a1d5f 1885
b0d623f7
A
1886 if (superpage_size) {
1887 switch (superpage_size) {
1888 /*
1889 * Note that the current implementation only supports
1890 * a single size for superpages, SUPERPAGE_SIZE, per
1891 * architecture. As soon as more sizes are supposed
1892 * to be supported, SUPERPAGE_SIZE has to be replaced
1893 * with a lookup of the size depending on superpage_size.
1894 */
1895#ifdef __x86_64__
6d2010ae
A
1896 case SUPERPAGE_SIZE_ANY:
1897 /* handle it like 2 MB and round up to page size */
1898 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1899 case SUPERPAGE_SIZE_2MB:
1900 break;
1901#endif
1902 default:
1903 return KERN_INVALID_ARGUMENT;
1904 }
1905 mask = SUPERPAGE_SIZE-1;
1906 if (size & (SUPERPAGE_SIZE-1))
1907 return KERN_INVALID_ARGUMENT;
1908 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1909 }
1910
6d2010ae 1911
1c79356b 1912
3e170ce0
A
1913 if (resilient_codesign || resilient_media) {
1914 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1915 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1916 return KERN_PROTECTION_FAILURE;
1917 }
1918 }
1919
2d21ac55
A
1920 if (is_submap) {
1921 if (purgable) {
1922 /* submaps can not be purgeable */
1923 return KERN_INVALID_ARGUMENT;
1924 }
1925 if (object == VM_OBJECT_NULL) {
1926 /* submaps can not be created lazily */
1927 return KERN_INVALID_ARGUMENT;
1928 }
1929 }
1930 if (flags & VM_FLAGS_ALREADY) {
1931 /*
1932 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1933 * is already present. For it to be meaningul, the requested
1934 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1935 * we shouldn't try and remove what was mapped there first
1936 * (!VM_FLAGS_OVERWRITE).
1937 */
1938 if ((flags & VM_FLAGS_ANYWHERE) ||
1939 (flags & VM_FLAGS_OVERWRITE)) {
1940 return KERN_INVALID_ARGUMENT;
1941 }
1942 }
1943
6d2010ae 1944 effective_min_offset = map->min_offset;
b0d623f7 1945
2d21ac55
A
1946 if (flags & VM_FLAGS_BEYOND_MAX) {
1947 /*
b0d623f7 1948 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1949 */
1950 if (vm_map_is_64bit(map))
1951 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1952 else
1953 effective_max_offset = 0x00000000FFFFF000ULL;
1954 } else {
1955 effective_max_offset = map->max_offset;
1956 }
1957
1958 if (size == 0 ||
1959 (offset & PAGE_MASK_64) != 0) {
91447636
A
1960 *address = 0;
1961 return KERN_INVALID_ARGUMENT;
1962 }
1963
1c79356b 1964 VM_GET_FLAGS_ALIAS(flags, alias);
3e170ce0
A
1965 if (map->pmap == kernel_pmap) {
1966 user_alias = VM_KERN_MEMORY_NONE;
1967 } else {
1968 user_alias = alias;
1969 }
2d21ac55 1970
1c79356b
A
1971#define RETURN(value) { result = value; goto BailOut; }
1972
1973 assert(page_aligned(*address));
1974 assert(page_aligned(size));
91447636 1975
39236c6e
A
1976 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1977 /*
1978 * In most cases, the caller rounds the size up to the
1979 * map's page size.
1980 * If we get a size that is explicitly not map-aligned here,
1981 * we'll have to respect the caller's wish and mark the
1982 * mapping as "not map-aligned" to avoid tripping the
1983 * map alignment checks later.
1984 */
1985 clear_map_aligned = TRUE;
1986 }
fe8ab488
A
1987 if (!anywhere &&
1988 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1989 /*
1990 * We've been asked to map at a fixed address and that
1991 * address is not aligned to the map's specific alignment.
1992 * The caller should know what it's doing (i.e. most likely
1993 * mapping some fragmented copy map, transferring memory from
1994 * a VM map with a different alignment), so clear map_aligned
1995 * for this new VM map entry and proceed.
1996 */
1997 clear_map_aligned = TRUE;
1998 }
39236c6e 1999
91447636
A
2000 /*
2001 * Only zero-fill objects are allowed to be purgable.
2002 * LP64todo - limit purgable objects to 32-bits for now
2003 */
2004 if (purgable &&
2005 (offset != 0 ||
2006 (object != VM_OBJECT_NULL &&
6d2010ae 2007 (object->vo_size != size ||
2d21ac55 2008 object->purgable == VM_PURGABLE_DENY))
b0d623f7 2009 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
2010 return KERN_INVALID_ARGUMENT;
2011
2012 if (!anywhere && overwrite) {
2013 /*
2014 * Create a temporary VM map to hold the old mappings in the
2015 * affected area while we create the new one.
2016 * This avoids releasing the VM map lock in
2017 * vm_map_entry_delete() and allows atomicity
2018 * when we want to replace some mappings with a new one.
2019 * It also allows us to restore the old VM mappings if the
2020 * new mapping fails.
2021 */
2022 zap_old_map = vm_map_create(PMAP_NULL,
2023 *address,
2024 *address + size,
b0d623f7 2025 map->hdr.entries_pageable);
39236c6e 2026 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 2027 vm_map_disable_hole_optimization(zap_old_map);
91447636
A
2028 }
2029
2d21ac55 2030StartAgain: ;
1c79356b
A
2031
2032 start = *address;
2033
2034 if (anywhere) {
2035 vm_map_lock(map);
91447636 2036 map_locked = TRUE;
6d2010ae 2037
316670eb
A
2038 if (entry_for_jit) {
2039 if (map->jit_entry_exists) {
2040 result = KERN_INVALID_ARGUMENT;
2041 goto BailOut;
2042 }
39037602
A
2043 random_address = TRUE;
2044 }
2045
2046 if (random_address) {
316670eb
A
2047 /*
2048 * Get a random start address.
2049 */
2050 result = vm_map_random_address_for_size(map, address, size);
2051 if (result != KERN_SUCCESS) {
2052 goto BailOut;
2053 }
2054 start = *address;
6d2010ae 2055 }
1c79356b 2056
316670eb 2057
1c79356b
A
2058 /*
2059 * Calculate the first possible address.
2060 */
2061
2d21ac55
A
2062 if (start < effective_min_offset)
2063 start = effective_min_offset;
2064 if (start > effective_max_offset)
1c79356b
A
2065 RETURN(KERN_NO_SPACE);
2066
2067 /*
2068 * Look for the first possible address;
2069 * if there's already something at this
2070 * address, we have to start after it.
2071 */
2072
6d2010ae
A
2073 if( map->disable_vmentry_reuse == TRUE) {
2074 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 2075 } else {
6d2010ae 2076
3e170ce0
A
2077 if (map->holelistenabled) {
2078 hole_entry = (vm_map_entry_t)map->holes_list;
2079
2080 if (hole_entry == NULL) {
2081 /*
2082 * No more space in the map?
2083 */
2084 result = KERN_NO_SPACE;
2085 goto BailOut;
2086 } else {
2087
2088 boolean_t found_hole = FALSE;
2089
2090 do {
2091 if (hole_entry->vme_start >= start) {
2092 start = hole_entry->vme_start;
2093 found_hole = TRUE;
2094 break;
2095 }
2096
2097 if (hole_entry->vme_end > start) {
2098 found_hole = TRUE;
2099 break;
2100 }
2101 hole_entry = hole_entry->vme_next;
2102
2103 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2104
2105 if (found_hole == FALSE) {
2106 result = KERN_NO_SPACE;
2107 goto BailOut;
2108 }
2109
2110 entry = hole_entry;
6d2010ae 2111
3e170ce0
A
2112 if (start == 0)
2113 start += PAGE_SIZE_64;
2114 }
6d2010ae 2115 } else {
3e170ce0
A
2116 assert(first_free_is_valid(map));
2117
2118 entry = map->first_free;
2119
2120 if (entry == vm_map_to_entry(map)) {
6d2010ae 2121 entry = NULL;
3e170ce0
A
2122 } else {
2123 if (entry->vme_next == vm_map_to_entry(map)){
2124 /*
2125 * Hole at the end of the map.
2126 */
2127 entry = NULL;
2128 } else {
2129 if (start < (entry->vme_next)->vme_start ) {
2130 start = entry->vme_end;
2131 start = vm_map_round_page(start,
2132 VM_MAP_PAGE_MASK(map));
2133 } else {
2134 /*
2135 * Need to do a lookup.
2136 */
2137 entry = NULL;
2138 }
2139 }
2140 }
2141
2142 if (entry == NULL) {
2143 vm_map_entry_t tmp_entry;
2144 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2145 assert(!entry_for_jit);
2146 start = tmp_entry->vme_end;
39236c6e
A
2147 start = vm_map_round_page(start,
2148 VM_MAP_PAGE_MASK(map));
6d2010ae 2149 }
3e170ce0 2150 entry = tmp_entry;
316670eb 2151 }
6d2010ae 2152 }
1c79356b
A
2153 }
2154
2155 /*
2156 * In any case, the "entry" always precedes
2157 * the proposed new region throughout the
2158 * loop:
2159 */
2160
2161 while (TRUE) {
39037602 2162 vm_map_entry_t next;
1c79356b 2163
2d21ac55 2164 /*
1c79356b
A
2165 * Find the end of the proposed new region.
2166 * Be sure we didn't go beyond the end, or
2167 * wrap around the address.
2168 */
2169
2170 end = ((start + mask) & ~mask);
39236c6e
A
2171 end = vm_map_round_page(end,
2172 VM_MAP_PAGE_MASK(map));
1c79356b
A
2173 if (end < start)
2174 RETURN(KERN_NO_SPACE);
2175 start = end;
39236c6e
A
2176 assert(VM_MAP_PAGE_ALIGNED(start,
2177 VM_MAP_PAGE_MASK(map)));
1c79356b
A
2178 end += size;
2179
2d21ac55 2180 if ((end > effective_max_offset) || (end < start)) {
1c79356b 2181 if (map->wait_for_space) {
fe8ab488 2182 assert(!keep_map_locked);
2d21ac55
A
2183 if (size <= (effective_max_offset -
2184 effective_min_offset)) {
1c79356b
A
2185 assert_wait((event_t)map,
2186 THREAD_ABORTSAFE);
2187 vm_map_unlock(map);
91447636
A
2188 map_locked = FALSE;
2189 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2190 goto StartAgain;
2191 }
2192 }
2193 RETURN(KERN_NO_SPACE);
2194 }
2195
1c79356b 2196 next = entry->vme_next;
1c79356b 2197
3e170ce0
A
2198 if (map->holelistenabled) {
2199 if (entry->vme_end >= end)
2200 break;
2201 } else {
2202 /*
2203 * If there are no more entries, we must win.
2204 *
2205 * OR
2206 *
2207 * If there is another entry, it must be
2208 * after the end of the potential new region.
2209 */
1c79356b 2210
3e170ce0
A
2211 if (next == vm_map_to_entry(map))
2212 break;
2213
2214 if (next->vme_start >= end)
2215 break;
2216 }
1c79356b
A
2217
2218 /*
2219 * Didn't fit -- move to the next entry.
2220 */
2221
2222 entry = next;
3e170ce0
A
2223
2224 if (map->holelistenabled) {
2225 if (entry == (vm_map_entry_t) map->holes_list) {
2226 /*
2227 * Wrapped around
2228 */
2229 result = KERN_NO_SPACE;
2230 goto BailOut;
2231 }
2232 start = entry->vme_start;
2233 } else {
2234 start = entry->vme_end;
2235 }
2236
39236c6e
A
2237 start = vm_map_round_page(start,
2238 VM_MAP_PAGE_MASK(map));
1c79356b 2239 }
3e170ce0
A
2240
2241 if (map->holelistenabled) {
2242 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2243 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2244 }
2245 }
2246
1c79356b 2247 *address = start;
39236c6e
A
2248 assert(VM_MAP_PAGE_ALIGNED(*address,
2249 VM_MAP_PAGE_MASK(map)));
1c79356b 2250 } else {
1c79356b
A
2251 /*
2252 * Verify that:
2253 * the address doesn't itself violate
2254 * the mask requirement.
2255 */
2256
2257 vm_map_lock(map);
91447636 2258 map_locked = TRUE;
1c79356b
A
2259 if ((start & mask) != 0)
2260 RETURN(KERN_NO_SPACE);
2261
2262 /*
2263 * ... the address is within bounds
2264 */
2265
2266 end = start + size;
2267
2d21ac55
A
2268 if ((start < effective_min_offset) ||
2269 (end > effective_max_offset) ||
1c79356b
A
2270 (start >= end)) {
2271 RETURN(KERN_INVALID_ADDRESS);
2272 }
2273
91447636
A
2274 if (overwrite && zap_old_map != VM_MAP_NULL) {
2275 /*
2276 * Fixed mapping and "overwrite" flag: attempt to
2277 * remove all existing mappings in the specified
2278 * address range, saving them in our "zap_old_map".
2279 */
2280 (void) vm_map_delete(map, start, end,
fe8ab488
A
2281 (VM_MAP_REMOVE_SAVE_ENTRIES |
2282 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2283 zap_old_map);
2284 }
2285
1c79356b
A
2286 /*
2287 * ... the starting address isn't allocated
2288 */
2289
2d21ac55
A
2290 if (vm_map_lookup_entry(map, start, &entry)) {
2291 if (! (flags & VM_FLAGS_ALREADY)) {
2292 RETURN(KERN_NO_SPACE);
2293 }
2294 /*
2295 * Check if what's already there is what we want.
2296 */
2297 tmp_start = start;
2298 tmp_offset = offset;
2299 if (entry->vme_start < start) {
2300 tmp_start -= start - entry->vme_start;
2301 tmp_offset -= start - entry->vme_start;
2302
2303 }
2304 for (; entry->vme_start < end;
2305 entry = entry->vme_next) {
4a3eedf9
A
2306 /*
2307 * Check if the mapping's attributes
2308 * match the existing map entry.
2309 */
2d21ac55
A
2310 if (entry == vm_map_to_entry(map) ||
2311 entry->vme_start != tmp_start ||
2312 entry->is_sub_map != is_submap ||
3e170ce0 2313 VME_OFFSET(entry) != tmp_offset ||
2d21ac55
A
2314 entry->needs_copy != needs_copy ||
2315 entry->protection != cur_protection ||
2316 entry->max_protection != max_protection ||
2317 entry->inheritance != inheritance ||
fe8ab488 2318 entry->iokit_acct != iokit_acct ||
3e170ce0 2319 VME_ALIAS(entry) != alias) {
2d21ac55
A
2320 /* not the same mapping ! */
2321 RETURN(KERN_NO_SPACE);
2322 }
4a3eedf9
A
2323 /*
2324 * Check if the same object is being mapped.
2325 */
2326 if (is_submap) {
3e170ce0 2327 if (VME_SUBMAP(entry) !=
4a3eedf9
A
2328 (vm_map_t) object) {
2329 /* not the same submap */
2330 RETURN(KERN_NO_SPACE);
2331 }
2332 } else {
3e170ce0 2333 if (VME_OBJECT(entry) != object) {
4a3eedf9
A
2334 /* not the same VM object... */
2335 vm_object_t obj2;
2336
3e170ce0 2337 obj2 = VME_OBJECT(entry);
4a3eedf9
A
2338 if ((obj2 == VM_OBJECT_NULL ||
2339 obj2->internal) &&
2340 (object == VM_OBJECT_NULL ||
2341 object->internal)) {
2342 /*
2343 * ... but both are
2344 * anonymous memory,
2345 * so equivalent.
2346 */
2347 } else {
2348 RETURN(KERN_NO_SPACE);
2349 }
2350 }
2351 }
2352
2d21ac55
A
2353 tmp_offset += entry->vme_end - entry->vme_start;
2354 tmp_start += entry->vme_end - entry->vme_start;
2355 if (entry->vme_end >= end) {
2356 /* reached the end of our mapping */
2357 break;
2358 }
2359 }
2360 /* it all matches: let's use what's already there ! */
2361 RETURN(KERN_MEMORY_PRESENT);
2362 }
1c79356b
A
2363
2364 /*
2365 * ... the next region doesn't overlap the
2366 * end point.
2367 */
2368
2369 if ((entry->vme_next != vm_map_to_entry(map)) &&
2370 (entry->vme_next->vme_start < end))
2371 RETURN(KERN_NO_SPACE);
2372 }
2373
2374 /*
2375 * At this point,
2376 * "start" and "end" should define the endpoints of the
2377 * available new range, and
2378 * "entry" should refer to the region before the new
2379 * range, and
2380 *
2381 * the map should be locked.
2382 */
2383
2384 /*
2385 * See whether we can avoid creating a new entry (and object) by
2386 * extending one of our neighbors. [So far, we only attempt to
91447636
A
2387 * extend from below.] Note that we can never extend/join
2388 * purgable objects because they need to remain distinct
2389 * entities in order to implement their "volatile object"
2390 * semantics.
1c79356b
A
2391 */
2392
316670eb 2393 if (purgable || entry_for_jit) {
91447636 2394 if (object == VM_OBJECT_NULL) {
3e170ce0 2395
91447636
A
2396 object = vm_object_allocate(size);
2397 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
fe8ab488 2398 object->true_share = TRUE;
316670eb 2399 if (purgable) {
fe8ab488 2400 task_t owner;
316670eb 2401 object->purgable = VM_PURGABLE_NONVOLATILE;
fe8ab488
A
2402 if (map->pmap == kernel_pmap) {
2403 /*
2404 * Purgeable mappings made in a kernel
2405 * map are "owned" by the kernel itself
2406 * rather than the current user task
2407 * because they're likely to be used by
2408 * more than this user task (see
2409 * execargs_purgeable_allocate(), for
2410 * example).
2411 */
2412 owner = kernel_task;
2413 } else {
2414 owner = current_task();
2415 }
2416 assert(object->vo_purgeable_owner == NULL);
2417 assert(object->resident_page_count == 0);
2418 assert(object->wired_page_count == 0);
2419 vm_object_lock(object);
2420 vm_purgeable_nonvolatile_enqueue(object, owner);
2421 vm_object_unlock(object);
316670eb 2422 }
91447636
A
2423 offset = (vm_object_offset_t)0;
2424 }
2d21ac55
A
2425 } else if ((is_submap == FALSE) &&
2426 (object == VM_OBJECT_NULL) &&
2427 (entry != vm_map_to_entry(map)) &&
2428 (entry->vme_end == start) &&
2429 (!entry->is_shared) &&
2430 (!entry->is_sub_map) &&
fe8ab488
A
2431 (!entry->in_transition) &&
2432 (!entry->needs_wakeup) &&
2433 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2d21ac55
A
2434 (entry->protection == cur_protection) &&
2435 (entry->max_protection == max_protection) &&
fe8ab488 2436 (entry->inheritance == inheritance) &&
3e170ce0
A
2437 ((user_alias == VM_MEMORY_REALLOC) ||
2438 (VME_ALIAS(entry) == alias)) &&
2d21ac55 2439 (entry->no_cache == no_cache) &&
fe8ab488
A
2440 (entry->permanent == permanent) &&
2441 (!entry->superpage_size && !superpage_size) &&
39236c6e
A
2442 /*
2443 * No coalescing if not map-aligned, to avoid propagating
2444 * that condition any further than needed:
2445 */
2446 (!entry->map_aligned || !clear_map_aligned) &&
fe8ab488
A
2447 (!entry->zero_wired_pages) &&
2448 (!entry->used_for_jit && !entry_for_jit) &&
2449 (entry->iokit_acct == iokit_acct) &&
3e170ce0
A
2450 (!entry->vme_resilient_codesign) &&
2451 (!entry->vme_resilient_media) &&
39037602 2452 (!entry->vme_atomic) &&
fe8ab488 2453
b0d623f7 2454 ((entry->vme_end - entry->vme_start) + size <=
3e170ce0 2455 (user_alias == VM_MEMORY_REALLOC ?
b0d623f7
A
2456 ANON_CHUNK_SIZE :
2457 NO_COALESCE_LIMIT)) &&
fe8ab488 2458
2d21ac55 2459 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
3e170ce0 2460 if (vm_object_coalesce(VME_OBJECT(entry),
2d21ac55 2461 VM_OBJECT_NULL,
3e170ce0 2462 VME_OFFSET(entry),
2d21ac55
A
2463 (vm_object_offset_t) 0,
2464 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2465 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2466
2467 /*
2468 * Coalesced the two objects - can extend
2469 * the previous map entry to include the
2470 * new range.
2471 */
2472 map->size += (end - entry->vme_end);
e2d2fc5c 2473 assert(entry->vme_start < end);
39236c6e
A
2474 assert(VM_MAP_PAGE_ALIGNED(end,
2475 VM_MAP_PAGE_MASK(map)));
3e170ce0
A
2476 if (__improbable(vm_debug_events))
2477 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
1c79356b 2478 entry->vme_end = end;
3e170ce0
A
2479 if (map->holelistenabled) {
2480 vm_map_store_update_first_free(map, entry, TRUE);
2481 } else {
2482 vm_map_store_update_first_free(map, map->first_free, TRUE);
2483 }
fe8ab488 2484 new_mapping_established = TRUE;
1c79356b
A
2485 RETURN(KERN_SUCCESS);
2486 }
2487 }
2488
b0d623f7
A
2489 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2490 new_entry = NULL;
2491
2492 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2493 tmp2_end = tmp2_start + step;
2494 /*
2495 * Create a new entry
2496 * LP64todo - for now, we can only allocate 4GB internal objects
2497 * because the default pager can't page bigger ones. Remove this
2498 * when it can.
2499 *
2500 * XXX FBDP
2501 * The reserved "page zero" in each process's address space can
2502 * be arbitrarily large. Splitting it into separate 4GB objects and
2503 * therefore different VM map entries serves no purpose and just
2504 * slows down operations on the VM map, so let's not split the
2505 * allocation into 4GB chunks if the max protection is NONE. That
2506 * memory should never be accessible, so it will never get to the
2507 * default pager.
2508 */
2509 tmp_start = tmp2_start;
2510 if (object == VM_OBJECT_NULL &&
2511 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2512 max_protection != VM_PROT_NONE &&
2513 superpage_size == 0)
2514 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2515 else
2516 tmp_end = tmp2_end;
2517 do {
2518 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2519 object, offset, needs_copy,
2520 FALSE, FALSE,
2521 cur_protection, max_protection,
2522 VM_BEHAVIOR_DEFAULT,
316670eb 2523 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
6d2010ae 2524 0, no_cache,
39236c6e
A
2525 permanent,
2526 superpage_size,
fe8ab488
A
2527 clear_map_aligned,
2528 is_submap);
3e170ce0
A
2529
2530 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2531 VME_ALIAS_SET(new_entry, alias);
2532
316670eb 2533 if (entry_for_jit){
6d2010ae
A
2534 if (!(map->jit_entry_exists)){
2535 new_entry->used_for_jit = TRUE;
2536 map->jit_entry_exists = TRUE;
2537 }
2538 }
2539
3e170ce0
A
2540 if (resilient_codesign &&
2541 ! ((cur_protection | max_protection) &
2542 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2543 new_entry->vme_resilient_codesign = TRUE;
2544 }
2545
2546 if (resilient_media &&
2547 ! ((cur_protection | max_protection) &
2548 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2549 new_entry->vme_resilient_media = TRUE;
2550 }
2551
fe8ab488
A
2552 assert(!new_entry->iokit_acct);
2553 if (!is_submap &&
2554 object != VM_OBJECT_NULL &&
2555 object->purgable != VM_PURGABLE_DENY) {
2556 assert(new_entry->use_pmap);
2557 assert(!new_entry->iokit_acct);
2558 /*
2559 * Turn off pmap accounting since
2560 * purgeable objects have their
2561 * own ledgers.
2562 */
2563 new_entry->use_pmap = FALSE;
2564 } else if (!is_submap &&
ecc0ceb4
A
2565 iokit_acct &&
2566 object != VM_OBJECT_NULL &&
2567 object->internal) {
fe8ab488
A
2568 /* alternate accounting */
2569 assert(!new_entry->iokit_acct);
2570 assert(new_entry->use_pmap);
2571 new_entry->iokit_acct = TRUE;
2572 new_entry->use_pmap = FALSE;
ecc0ceb4
A
2573 DTRACE_VM4(
2574 vm_map_iokit_mapped_region,
2575 vm_map_t, map,
2576 vm_map_offset_t, new_entry->vme_start,
2577 vm_map_offset_t, new_entry->vme_end,
2578 int, VME_ALIAS(new_entry));
fe8ab488
A
2579 vm_map_iokit_mapped_region(
2580 map,
2581 (new_entry->vme_end -
2582 new_entry->vme_start));
2583 } else if (!is_submap) {
2584 assert(!new_entry->iokit_acct);
2585 assert(new_entry->use_pmap);
2586 }
2587
b0d623f7
A
2588 if (is_submap) {
2589 vm_map_t submap;
2590 boolean_t submap_is_64bit;
2591 boolean_t use_pmap;
2592
fe8ab488
A
2593 assert(new_entry->is_sub_map);
2594 assert(!new_entry->use_pmap);
2595 assert(!new_entry->iokit_acct);
b0d623f7
A
2596 submap = (vm_map_t) object;
2597 submap_is_64bit = vm_map_is_64bit(submap);
3e170ce0 2598 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
fe8ab488 2599#ifndef NO_NESTED_PMAP
b0d623f7 2600 if (use_pmap && submap->pmap == NULL) {
316670eb 2601 ledger_t ledger = map->pmap->ledger;
b0d623f7 2602 /* we need a sub pmap to nest... */
316670eb
A
2603 submap->pmap = pmap_create(ledger, 0,
2604 submap_is_64bit);
b0d623f7
A
2605 if (submap->pmap == NULL) {
2606 /* let's proceed without nesting... */
2607 }
2d21ac55 2608 }
b0d623f7
A
2609 if (use_pmap && submap->pmap != NULL) {
2610 kr = pmap_nest(map->pmap,
2611 submap->pmap,
2612 tmp_start,
2613 tmp_start,
2614 tmp_end - tmp_start);
2615 if (kr != KERN_SUCCESS) {
2616 printf("vm_map_enter: "
2617 "pmap_nest(0x%llx,0x%llx) "
2618 "error 0x%x\n",
2619 (long long)tmp_start,
2620 (long long)tmp_end,
2621 kr);
2622 } else {
2623 /* we're now nested ! */
2624 new_entry->use_pmap = TRUE;
2625 pmap_empty = FALSE;
2626 }
2627 }
fe8ab488 2628#endif /* NO_NESTED_PMAP */
2d21ac55 2629 }
b0d623f7
A
2630 entry = new_entry;
2631
2632 if (superpage_size) {
2633 vm_page_t pages, m;
2634 vm_object_t sp_object;
2635
3e170ce0 2636 VME_OFFSET_SET(entry, 0);
b0d623f7
A
2637
2638 /* allocate one superpage */
2639 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2640 if (kr != KERN_SUCCESS) {
3e170ce0
A
2641 /* deallocate whole range... */
2642 new_mapping_established = TRUE;
2643 /* ... but only up to "tmp_end" */
2644 size -= end - tmp_end;
b0d623f7
A
2645 RETURN(kr);
2646 }
2647
2648 /* create one vm_object per superpage */
2649 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2650 sp_object->phys_contiguous = TRUE;
39037602 2651 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
3e170ce0 2652 VME_OBJECT_SET(entry, sp_object);
fe8ab488 2653 assert(entry->use_pmap);
b0d623f7
A
2654
2655 /* enter the base pages into the object */
2656 vm_object_lock(sp_object);
2657 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2658 m = pages;
39037602 2659 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
b0d623f7
A
2660 pages = NEXT_PAGE(m);
2661 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3e170ce0 2662 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2d21ac55 2663 }
b0d623f7 2664 vm_object_unlock(sp_object);
2d21ac55 2665 }
b0d623f7
A
2666 } while (tmp_end != tmp2_end &&
2667 (tmp_start = tmp_end) &&
2668 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2669 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2670 }
91447636 2671
91447636 2672 new_mapping_established = TRUE;
1c79356b 2673
fe8ab488
A
2674BailOut:
2675 assert(map_locked == TRUE);
2d21ac55 2676
593a1d5f
A
2677 if (result == KERN_SUCCESS) {
2678 vm_prot_t pager_prot;
2679 memory_object_t pager;
91447636 2680
fe8ab488 2681#if DEBUG
593a1d5f
A
2682 if (pmap_empty &&
2683 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2684 assert(vm_map_pmap_is_empty(map,
2685 *address,
2686 *address+size));
2687 }
fe8ab488 2688#endif /* DEBUG */
593a1d5f
A
2689
2690 /*
2691 * For "named" VM objects, let the pager know that the
2692 * memory object is being mapped. Some pagers need to keep
2693 * track of this, to know when they can reclaim the memory
2694 * object, for example.
2695 * VM calls memory_object_map() for each mapping (specifying
2696 * the protection of each mapping) and calls
2697 * memory_object_last_unmap() when all the mappings are gone.
2698 */
2699 pager_prot = max_protection;
2700 if (needs_copy) {
2701 /*
2702 * Copy-On-Write mapping: won't modify
2703 * the memory object.
2704 */
2705 pager_prot &= ~VM_PROT_WRITE;
2706 }
2707 if (!is_submap &&
2708 object != VM_OBJECT_NULL &&
2709 object->named &&
2710 object->pager != MEMORY_OBJECT_NULL) {
2711 vm_object_lock(object);
2712 pager = object->pager;
2713 if (object->named &&
2714 pager != MEMORY_OBJECT_NULL) {
2715 assert(object->pager_ready);
2716 vm_object_mapping_wait(object, THREAD_UNINT);
2717 vm_object_mapping_begin(object);
2718 vm_object_unlock(object);
2719
2720 kr = memory_object_map(pager, pager_prot);
2721 assert(kr == KERN_SUCCESS);
2722
2723 vm_object_lock(object);
2724 vm_object_mapping_end(object);
2725 }
2726 vm_object_unlock(object);
2727 }
fe8ab488
A
2728 }
2729
2730 assert(map_locked == TRUE);
2731
2732 if (!keep_map_locked) {
2733 vm_map_unlock(map);
2734 map_locked = FALSE;
2735 }
2736
2737 /*
2738 * We can't hold the map lock if we enter this block.
2739 */
2740
2741 if (result == KERN_SUCCESS) {
2742
2743 /* Wire down the new entry if the user
2744 * requested all new map entries be wired.
2745 */
2746 if ((map->wiring_required)||(superpage_size)) {
2747 assert(!keep_map_locked);
2748 pmap_empty = FALSE; /* pmap won't be empty */
2749 kr = vm_map_wire(map, start, end,
3e170ce0
A
2750 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2751 TRUE);
fe8ab488
A
2752 result = kr;
2753 }
2754
2755 }
2756
2757 if (result != KERN_SUCCESS) {
91447636
A
2758 if (new_mapping_established) {
2759 /*
2760 * We have to get rid of the new mappings since we
2761 * won't make them available to the user.
2762 * Try and do that atomically, to minimize the risk
2763 * that someone else create new mappings that range.
2764 */
2765 zap_new_map = vm_map_create(PMAP_NULL,
2766 *address,
2767 *address + size,
b0d623f7 2768 map->hdr.entries_pageable);
39236c6e
A
2769 vm_map_set_page_shift(zap_new_map,
2770 VM_MAP_PAGE_SHIFT(map));
3e170ce0
A
2771 vm_map_disable_hole_optimization(zap_new_map);
2772
91447636
A
2773 if (!map_locked) {
2774 vm_map_lock(map);
2775 map_locked = TRUE;
2776 }
2777 (void) vm_map_delete(map, *address, *address+size,
fe8ab488
A
2778 (VM_MAP_REMOVE_SAVE_ENTRIES |
2779 VM_MAP_REMOVE_NO_MAP_ALIGN),
91447636
A
2780 zap_new_map);
2781 }
2782 if (zap_old_map != VM_MAP_NULL &&
2783 zap_old_map->hdr.nentries != 0) {
2784 vm_map_entry_t entry1, entry2;
2785
2786 /*
2787 * The new mapping failed. Attempt to restore
2788 * the old mappings, saved in the "zap_old_map".
2789 */
2790 if (!map_locked) {
2791 vm_map_lock(map);
2792 map_locked = TRUE;
2793 }
2794
2795 /* first check if the coast is still clear */
2796 start = vm_map_first_entry(zap_old_map)->vme_start;
2797 end = vm_map_last_entry(zap_old_map)->vme_end;
2798 if (vm_map_lookup_entry(map, start, &entry1) ||
2799 vm_map_lookup_entry(map, end, &entry2) ||
2800 entry1 != entry2) {
2801 /*
2802 * Part of that range has already been
2803 * re-mapped: we can't restore the old
2804 * mappings...
2805 */
2806 vm_map_enter_restore_failures++;
2807 } else {
2808 /*
2809 * Transfer the saved map entries from
2810 * "zap_old_map" to the original "map",
2811 * inserting them all after "entry1".
2812 */
2813 for (entry2 = vm_map_first_entry(zap_old_map);
2814 entry2 != vm_map_to_entry(zap_old_map);
2815 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2816 vm_map_size_t entry_size;
2817
2818 entry_size = (entry2->vme_end -
2819 entry2->vme_start);
6d2010ae 2820 vm_map_store_entry_unlink(zap_old_map,
91447636 2821 entry2);
2d21ac55 2822 zap_old_map->size -= entry_size;
6d2010ae 2823 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2824 map->size += entry_size;
91447636
A
2825 entry1 = entry2;
2826 }
2827 if (map->wiring_required) {
2828 /*
2829 * XXX TODO: we should rewire the
2830 * old pages here...
2831 */
2832 }
2833 vm_map_enter_restore_successes++;
2834 }
2835 }
2836 }
2837
fe8ab488
A
2838 /*
2839 * The caller is responsible for releasing the lock if it requested to
2840 * keep the map locked.
2841 */
2842 if (map_locked && !keep_map_locked) {
91447636
A
2843 vm_map_unlock(map);
2844 }
2845
2846 /*
2847 * Get rid of the "zap_maps" and all the map entries that
2848 * they may still contain.
2849 */
2850 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2851 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2852 zap_old_map = VM_MAP_NULL;
2853 }
2854 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2855 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2856 zap_new_map = VM_MAP_NULL;
2857 }
2858
2859 return result;
1c79356b
A
2860
2861#undef RETURN
2862}
2863
3e170ce0 2864
fe8ab488
A
2865/*
2866 * Counters for the prefault optimization.
2867 */
2868int64_t vm_prefault_nb_pages = 0;
2869int64_t vm_prefault_nb_bailout = 0;
2870
2871static kern_return_t
2872vm_map_enter_mem_object_helper(
2d21ac55
A
2873 vm_map_t target_map,
2874 vm_map_offset_t *address,
2875 vm_map_size_t initial_size,
2876 vm_map_offset_t mask,
2877 int flags,
2878 ipc_port_t port,
2879 vm_object_offset_t offset,
2880 boolean_t copy,
2881 vm_prot_t cur_protection,
2882 vm_prot_t max_protection,
fe8ab488
A
2883 vm_inherit_t inheritance,
2884 upl_page_list_ptr_t page_list,
2885 unsigned int page_list_count)
91447636 2886{
2d21ac55
A
2887 vm_map_address_t map_addr;
2888 vm_map_size_t map_size;
2889 vm_object_t object;
2890 vm_object_size_t size;
2891 kern_return_t result;
6d2010ae 2892 boolean_t mask_cur_protection, mask_max_protection;
fe8ab488 2893 boolean_t try_prefault = (page_list_count != 0);
3e170ce0 2894 vm_map_offset_t offset_in_mapping = 0;
6d2010ae
A
2895
2896 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2897 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2898 cur_protection &= ~VM_PROT_IS_MASK;
2899 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2900
2901 /*
2d21ac55 2902 * Check arguments for validity
91447636 2903 */
2d21ac55
A
2904 if ((target_map == VM_MAP_NULL) ||
2905 (cur_protection & ~VM_PROT_ALL) ||
2906 (max_protection & ~VM_PROT_ALL) ||
2907 (inheritance > VM_INHERIT_LAST_VALID) ||
fe8ab488 2908 (try_prefault && (copy || !page_list)) ||
3e170ce0 2909 initial_size == 0) {
2d21ac55 2910 return KERN_INVALID_ARGUMENT;
3e170ce0 2911 }
6d2010ae 2912
3e170ce0
A
2913 {
2914 map_addr = vm_map_trunc_page(*address,
2915 VM_MAP_PAGE_MASK(target_map));
2916 map_size = vm_map_round_page(initial_size,
2917 VM_MAP_PAGE_MASK(target_map));
2918 }
39236c6e 2919 size = vm_object_round_page(initial_size);
593a1d5f 2920
2d21ac55
A
2921 /*
2922 * Find the vm object (if any) corresponding to this port.
2923 */
2924 if (!IP_VALID(port)) {
2925 object = VM_OBJECT_NULL;
2926 offset = 0;
2927 copy = FALSE;
2928 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2929 vm_named_entry_t named_entry;
2930
2931 named_entry = (vm_named_entry_t) port->ip_kobject;
39236c6e 2932
3e170ce0
A
2933 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2934 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
2935 offset += named_entry->data_offset;
2936 }
2937
2d21ac55
A
2938 /* a few checks to make sure user is obeying rules */
2939 if (size == 0) {
2940 if (offset >= named_entry->size)
2941 return KERN_INVALID_RIGHT;
2942 size = named_entry->size - offset;
2943 }
6d2010ae
A
2944 if (mask_max_protection) {
2945 max_protection &= named_entry->protection;
2946 }
2947 if (mask_cur_protection) {
2948 cur_protection &= named_entry->protection;
2949 }
2d21ac55
A
2950 if ((named_entry->protection & max_protection) !=
2951 max_protection)
2952 return KERN_INVALID_RIGHT;
2953 if ((named_entry->protection & cur_protection) !=
2954 cur_protection)
2955 return KERN_INVALID_RIGHT;
22ba694c
A
2956 if (offset + size < offset) {
2957 /* overflow */
2958 return KERN_INVALID_ARGUMENT;
2959 }
3e170ce0 2960 if (named_entry->size < (offset + initial_size)) {
2d21ac55 2961 return KERN_INVALID_ARGUMENT;
3e170ce0 2962 }
2d21ac55 2963
39236c6e
A
2964 if (named_entry->is_copy) {
2965 /* for a vm_map_copy, we can only map it whole */
2966 if ((size != named_entry->size) &&
2967 (vm_map_round_page(size,
2968 VM_MAP_PAGE_MASK(target_map)) ==
2969 named_entry->size)) {
2970 /* XXX FBDP use the rounded size... */
2971 size = vm_map_round_page(
2972 size,
2973 VM_MAP_PAGE_MASK(target_map));
2974 }
2975
fe8ab488
A
2976 if (!(flags & VM_FLAGS_ANYWHERE) &&
2977 (offset != 0 ||
2978 size != named_entry->size)) {
2979 /*
2980 * XXX for a mapping at a "fixed" address,
2981 * we can't trim after mapping the whole
2982 * memory entry, so reject a request for a
2983 * partial mapping.
2984 */
39236c6e
A
2985 return KERN_INVALID_ARGUMENT;
2986 }
2987 }
2988
2d21ac55
A
2989 /* the callers parameter offset is defined to be the */
2990 /* offset from beginning of named entry offset in object */
2991 offset = offset + named_entry->offset;
2992
39236c6e
A
2993 if (! VM_MAP_PAGE_ALIGNED(size,
2994 VM_MAP_PAGE_MASK(target_map))) {
2995 /*
2996 * Let's not map more than requested;
2997 * vm_map_enter() will handle this "not map-aligned"
2998 * case.
2999 */
3000 map_size = size;
3001 }
3002
2d21ac55
A
3003 named_entry_lock(named_entry);
3004 if (named_entry->is_sub_map) {
3005 vm_map_t submap;
3006
3e170ce0
A
3007 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3008 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3009 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3010 }
3011
2d21ac55
A
3012 submap = named_entry->backing.map;
3013 vm_map_lock(submap);
3014 vm_map_reference(submap);
3015 vm_map_unlock(submap);
3016 named_entry_unlock(named_entry);
3017
3018 result = vm_map_enter(target_map,
3019 &map_addr,
3020 map_size,
3021 mask,
3022 flags | VM_FLAGS_SUBMAP,
3023 (vm_object_t) submap,
3024 offset,
3025 copy,
3026 cur_protection,
3027 max_protection,
3028 inheritance);
3029 if (result != KERN_SUCCESS) {
3030 vm_map_deallocate(submap);
3031 } else {
3032 /*
3033 * No need to lock "submap" just to check its
3034 * "mapped" flag: that flag is never reset
3035 * once it's been set and if we race, we'll
3036 * just end up setting it twice, which is OK.
3037 */
316670eb
A
3038 if (submap->mapped_in_other_pmaps == FALSE &&
3039 vm_map_pmap(submap) != PMAP_NULL &&
3040 vm_map_pmap(submap) !=
3041 vm_map_pmap(target_map)) {
2d21ac55 3042 /*
316670eb
A
3043 * This submap is being mapped in a map
3044 * that uses a different pmap.
3045 * Set its "mapped_in_other_pmaps" flag
3046 * to indicate that we now need to
3047 * remove mappings from all pmaps rather
3048 * than just the submap's pmap.
2d21ac55
A
3049 */
3050 vm_map_lock(submap);
316670eb 3051 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
3052 vm_map_unlock(submap);
3053 }
3054 *address = map_addr;
3055 }
3056 return result;
3057
3058 } else if (named_entry->is_pager) {
3059 unsigned int access;
3060 vm_prot_t protections;
3061 unsigned int wimg_mode;
2d21ac55
A
3062
3063 protections = named_entry->protection & VM_PROT_ALL;
3064 access = GET_MAP_MEM(named_entry->protection);
3065
3e170ce0
A
3066 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3067 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3068 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3069 }
3070
2d21ac55
A
3071 object = vm_object_enter(named_entry->backing.pager,
3072 named_entry->size,
3073 named_entry->internal,
3074 FALSE,
3075 FALSE);
3076 if (object == VM_OBJECT_NULL) {
3077 named_entry_unlock(named_entry);
3078 return KERN_INVALID_OBJECT;
3079 }
3080
3081 /* JMM - drop reference on pager here */
3082
3083 /* create an extra ref for the named entry */
3084 vm_object_lock(object);
3085 vm_object_reference_locked(object);
3086 named_entry->backing.object = object;
3087 named_entry->is_pager = FALSE;
3088 named_entry_unlock(named_entry);
3089
3090 wimg_mode = object->wimg_bits;
6d2010ae 3091
2d21ac55
A
3092 if (access == MAP_MEM_IO) {
3093 wimg_mode = VM_WIMG_IO;
3094 } else if (access == MAP_MEM_COPYBACK) {
3095 wimg_mode = VM_WIMG_USE_DEFAULT;
316670eb
A
3096 } else if (access == MAP_MEM_INNERWBACK) {
3097 wimg_mode = VM_WIMG_INNERWBACK;
2d21ac55
A
3098 } else if (access == MAP_MEM_WTHRU) {
3099 wimg_mode = VM_WIMG_WTHRU;
3100 } else if (access == MAP_MEM_WCOMB) {
3101 wimg_mode = VM_WIMG_WCOMB;
3102 }
2d21ac55
A
3103
3104 /* wait for object (if any) to be ready */
3105 if (!named_entry->internal) {
3106 while (!object->pager_ready) {
3107 vm_object_wait(
3108 object,
3109 VM_OBJECT_EVENT_PAGER_READY,
3110 THREAD_UNINT);
3111 vm_object_lock(object);
3112 }
3113 }
3114
6d2010ae
A
3115 if (object->wimg_bits != wimg_mode)
3116 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 3117
fe8ab488
A
3118#if VM_OBJECT_TRACKING_OP_TRUESHARE
3119 if (!object->true_share &&
3120 vm_object_tracking_inited) {
3121 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3122 int num = 0;
3123
3124 num = OSBacktrace(bt,
3125 VM_OBJECT_TRACKING_BTDEPTH);
3126 btlog_add_entry(vm_object_tracking_btlog,
3127 object,
3128 VM_OBJECT_TRACKING_OP_TRUESHARE,
3129 bt,
3130 num);
3131 }
3132#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3133
2d21ac55 3134 object->true_share = TRUE;
6d2010ae 3135
2d21ac55
A
3136 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3137 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3138 vm_object_unlock(object);
39236c6e
A
3139
3140 } else if (named_entry->is_copy) {
3141 kern_return_t kr;
3142 vm_map_copy_t copy_map;
3143 vm_map_entry_t copy_entry;
3144 vm_map_offset_t copy_addr;
3145
3146 if (flags & ~(VM_FLAGS_FIXED |
3147 VM_FLAGS_ANYWHERE |
3148 VM_FLAGS_OVERWRITE |
d190cdc3 3149 VM_FLAGS_IOKIT_ACCT |
3e170ce0 3150 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3151 VM_FLAGS_RETURN_DATA_ADDR |
3152 VM_FLAGS_ALIAS_MASK)) {
39236c6e
A
3153 named_entry_unlock(named_entry);
3154 return KERN_INVALID_ARGUMENT;
3155 }
3156
3e170ce0
A
3157 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3158 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3159 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3160 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3161 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3162 offset = vm_object_trunc_page(offset);
3163 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3164 }
3165
3166 copy_map = named_entry->backing.copy;
3167 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3168 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3169 /* unsupported type; should not happen */
3170 printf("vm_map_enter_mem_object: "
3171 "memory_entry->backing.copy "
3172 "unsupported type 0x%x\n",
3173 copy_map->type);
3174 named_entry_unlock(named_entry);
3175 return KERN_INVALID_ARGUMENT;
3176 }
3177
3178 /* reserve a contiguous range */
3179 kr = vm_map_enter(target_map,
3180 &map_addr,
fe8ab488
A
3181 /* map whole mem entry, trim later: */
3182 named_entry->size,
39236c6e
A
3183 mask,
3184 flags & (VM_FLAGS_ANYWHERE |
3185 VM_FLAGS_OVERWRITE |
d190cdc3 3186 VM_FLAGS_IOKIT_ACCT |
3e170ce0 3187 VM_FLAGS_RETURN_4K_DATA_ADDR |
39037602
A
3188 VM_FLAGS_RETURN_DATA_ADDR |
3189 VM_FLAGS_ALIAS_MASK),
39236c6e
A
3190 VM_OBJECT_NULL,
3191 0,
3192 FALSE, /* copy */
3193 cur_protection,
3194 max_protection,
3195 inheritance);
3196 if (kr != KERN_SUCCESS) {
3197 named_entry_unlock(named_entry);
3198 return kr;
3199 }
3200
3201 copy_addr = map_addr;
3202
3203 for (copy_entry = vm_map_copy_first_entry(copy_map);
3204 copy_entry != vm_map_copy_to_entry(copy_map);
3205 copy_entry = copy_entry->vme_next) {
3206 int remap_flags = 0;
3207 vm_map_t copy_submap;
3208 vm_object_t copy_object;
3209 vm_map_size_t copy_size;
3210 vm_object_offset_t copy_offset;
39037602 3211 int copy_vm_alias;
39236c6e 3212
813fb2f6 3213 copy_object = VME_OBJECT(copy_entry);
3e170ce0 3214 copy_offset = VME_OFFSET(copy_entry);
39236c6e
A
3215 copy_size = (copy_entry->vme_end -
3216 copy_entry->vme_start);
39037602
A
3217 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3218 if (copy_vm_alias == 0) {
3219 /*
3220 * Caller does not want a specific
3221 * alias for this new mapping: use
3222 * the alias of the original mapping.
3223 */
3224 copy_vm_alias = VME_ALIAS(copy_entry);
3225 }
39236c6e
A
3226
3227 /* sanity check */
fe8ab488
A
3228 if ((copy_addr + copy_size) >
3229 (map_addr +
3230 named_entry->size /* XXX full size */ )) {
39236c6e
A
3231 /* over-mapping too much !? */
3232 kr = KERN_INVALID_ARGUMENT;
3233 /* abort */
3234 break;
3235 }
3236
3237 /* take a reference on the object */
3238 if (copy_entry->is_sub_map) {
3239 remap_flags |= VM_FLAGS_SUBMAP;
3e170ce0 3240 copy_submap = VME_SUBMAP(copy_entry);
39236c6e
A
3241 vm_map_lock(copy_submap);
3242 vm_map_reference(copy_submap);
3243 vm_map_unlock(copy_submap);
3244 copy_object = (vm_object_t) copy_submap;
813fb2f6
A
3245 } else if (!copy &&
3246 copy_object != VM_OBJECT_NULL &&
3247 (copy_entry->needs_copy ||
3248 copy_object->shadowed ||
3249 (!copy_object->true_share &&
3250 !copy_entry->is_shared &&
3251 copy_object->vo_size > copy_size))) {
3252 /*
3253 * We need to resolve our side of this
3254 * "symmetric" copy-on-write now; we
3255 * need a new object to map and share,
3256 * instead of the current one which
3257 * might still be shared with the
3258 * original mapping.
3259 *
3260 * Note: A "vm_map_copy_t" does not
3261 * have a lock but we're protected by
3262 * the named entry's lock here.
3263 */
3264 // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3265 VME_OBJECT_SHADOW(copy_entry, copy_size);
3266 if (!copy_entry->needs_copy &&
3267 copy_entry->protection & VM_PROT_WRITE) {
3268 vm_prot_t prot;
3269
3270 prot = copy_entry->protection & ~VM_PROT_WRITE;
3271 vm_object_pmap_protect(copy_object,
3272 copy_offset,
3273 copy_size,
3274 PMAP_NULL,
3275 0,
3276 prot);
3277 }
3278
3279 copy_entry->needs_copy = FALSE;
3280 copy_entry->is_shared = TRUE;
3281 copy_object = VME_OBJECT(copy_entry);
3282 copy_offset = VME_OFFSET(copy_entry);
3283 vm_object_lock(copy_object);
3284 vm_object_reference_locked(copy_object);
3285 if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
3286 /* we're about to make a shared mapping of this object */
3287 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3288 copy_object->true_share = TRUE;
3289 }
3290 vm_object_unlock(copy_object);
39236c6e 3291 } else {
813fb2f6
A
3292 /*
3293 * We already have the right object
3294 * to map.
3295 */
3e170ce0 3296 copy_object = VME_OBJECT(copy_entry);
39236c6e
A
3297 vm_object_reference(copy_object);
3298 }
3299
3300 /* over-map the object into destination */
3301 remap_flags |= flags;
3302 remap_flags |= VM_FLAGS_FIXED;
3303 remap_flags |= VM_FLAGS_OVERWRITE;
3304 remap_flags &= ~VM_FLAGS_ANYWHERE;
39037602 3305 remap_flags |= VM_MAKE_TAG(copy_vm_alias);
813fb2f6
A
3306 if (!copy && !copy_entry->is_sub_map) {
3307 /*
3308 * copy-on-write should have been
3309 * resolved at this point, or we would
3310 * end up sharing instead of copying.
3311 */
3312 assert(!copy_entry->needs_copy);
3313 }
39236c6e
A
3314 kr = vm_map_enter(target_map,
3315 &copy_addr,
3316 copy_size,
3317 (vm_map_offset_t) 0,
3318 remap_flags,
3319 copy_object,
3320 copy_offset,
3321 copy,
3322 cur_protection,
3323 max_protection,
3324 inheritance);
3325 if (kr != KERN_SUCCESS) {
3326 if (copy_entry->is_sub_map) {
3327 vm_map_deallocate(copy_submap);
3328 } else {
3329 vm_object_deallocate(copy_object);
3330 }
3331 /* abort */
3332 break;
3333 }
3334
3335 /* next mapping */
3336 copy_addr += copy_size;
3337 }
3338
3339 if (kr == KERN_SUCCESS) {
3e170ce0
A
3340 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3341 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3342 *address = map_addr + offset_in_mapping;
3343 } else {
3344 *address = map_addr;
3345 }
fe8ab488
A
3346
3347 if (offset) {
3348 /*
3349 * Trim in front, from 0 to "offset".
3350 */
3351 vm_map_remove(target_map,
3352 map_addr,
3353 map_addr + offset,
3354 0);
3355 *address += offset;
3356 }
3357 if (offset + map_size < named_entry->size) {
3358 /*
3359 * Trim in back, from
3360 * "offset + map_size" to
3361 * "named_entry->size".
3362 */
3363 vm_map_remove(target_map,
3364 (map_addr +
3365 offset + map_size),
3366 (map_addr +
3367 named_entry->size),
3368 0);
3369 }
39236c6e
A
3370 }
3371 named_entry_unlock(named_entry);
3372
3373 if (kr != KERN_SUCCESS) {
3374 if (! (flags & VM_FLAGS_OVERWRITE)) {
3375 /* deallocate the contiguous range */
3376 (void) vm_deallocate(target_map,
3377 map_addr,
3378 map_size);
3379 }
3380 }
3381
3382 return kr;
3383
2d21ac55
A
3384 } else {
3385 /* This is the case where we are going to map */
3386 /* an already mapped object. If the object is */
3387 /* not ready it is internal. An external */
3388 /* object cannot be mapped until it is ready */
3389 /* we can therefore avoid the ready check */
3390 /* in this case. */
3e170ce0
A
3391 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3392 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e 3393 offset_in_mapping = offset - vm_object_trunc_page(offset);
3e170ce0
A
3394 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3395 offset_in_mapping &= ~((signed)(0xFFF));
39236c6e
A
3396 offset = vm_object_trunc_page(offset);
3397 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3398 }
3399
2d21ac55
A
3400 object = named_entry->backing.object;
3401 assert(object != VM_OBJECT_NULL);
3402 named_entry_unlock(named_entry);
3403 vm_object_reference(object);
3404 }
3405 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3406 /*
3407 * JMM - This is temporary until we unify named entries
3408 * and raw memory objects.
3409 *
3410 * Detected fake ip_kotype for a memory object. In
3411 * this case, the port isn't really a port at all, but
3412 * instead is just a raw memory object.
3413 */
3e170ce0
A
3414 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3415 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3416 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3417 }
3418
2d21ac55
A
3419 object = vm_object_enter((memory_object_t)port,
3420 size, FALSE, FALSE, FALSE);
3421 if (object == VM_OBJECT_NULL)
3422 return KERN_INVALID_OBJECT;
3423
3424 /* wait for object (if any) to be ready */
3425 if (object != VM_OBJECT_NULL) {
3426 if (object == kernel_object) {
3427 printf("Warning: Attempt to map kernel object"
3428 " by a non-private kernel entity\n");
3429 return KERN_INVALID_OBJECT;
3430 }
b0d623f7 3431 if (!object->pager_ready) {
2d21ac55 3432 vm_object_lock(object);
b0d623f7
A
3433
3434 while (!object->pager_ready) {
3435 vm_object_wait(object,
3436 VM_OBJECT_EVENT_PAGER_READY,
3437 THREAD_UNINT);
3438 vm_object_lock(object);
3439 }
3440 vm_object_unlock(object);
2d21ac55 3441 }
2d21ac55
A
3442 }
3443 } else {
3444 return KERN_INVALID_OBJECT;
3445 }
3446
593a1d5f
A
3447 if (object != VM_OBJECT_NULL &&
3448 object->named &&
3449 object->pager != MEMORY_OBJECT_NULL &&
3450 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3451 memory_object_t pager;
3452 vm_prot_t pager_prot;
3453 kern_return_t kr;
3454
3455 /*
3456 * For "named" VM objects, let the pager know that the
3457 * memory object is being mapped. Some pagers need to keep
3458 * track of this, to know when they can reclaim the memory
3459 * object, for example.
3460 * VM calls memory_object_map() for each mapping (specifying
3461 * the protection of each mapping) and calls
3462 * memory_object_last_unmap() when all the mappings are gone.
3463 */
3464 pager_prot = max_protection;
3465 if (copy) {
3466 /*
3467 * Copy-On-Write mapping: won't modify the
3468 * memory object.
3469 */
3470 pager_prot &= ~VM_PROT_WRITE;
3471 }
3472 vm_object_lock(object);
3473 pager = object->pager;
3474 if (object->named &&
3475 pager != MEMORY_OBJECT_NULL &&
3476 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3477 assert(object->pager_ready);
3478 vm_object_mapping_wait(object, THREAD_UNINT);
3479 vm_object_mapping_begin(object);
3480 vm_object_unlock(object);
3481
3482 kr = memory_object_map(pager, pager_prot);
3483 assert(kr == KERN_SUCCESS);
3484
3485 vm_object_lock(object);
3486 vm_object_mapping_end(object);
3487 }
3488 vm_object_unlock(object);
3489 }
3490
2d21ac55
A
3491 /*
3492 * Perform the copy if requested
3493 */
3494
3495 if (copy) {
3496 vm_object_t new_object;
3497 vm_object_offset_t new_offset;
3498
3e170ce0
A
3499 result = vm_object_copy_strategically(object, offset,
3500 map_size,
2d21ac55
A
3501 &new_object, &new_offset,
3502 &copy);
3503
3504
3505 if (result == KERN_MEMORY_RESTART_COPY) {
3506 boolean_t success;
3507 boolean_t src_needs_copy;
3508
3509 /*
3510 * XXX
3511 * We currently ignore src_needs_copy.
3512 * This really is the issue of how to make
3513 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3514 * non-kernel users to use. Solution forthcoming.
3515 * In the meantime, since we don't allow non-kernel
3516 * memory managers to specify symmetric copy,
3517 * we won't run into problems here.
3518 */
3519 new_object = object;
3520 new_offset = offset;
3521 success = vm_object_copy_quickly(&new_object,
3e170ce0
A
3522 new_offset,
3523 map_size,
2d21ac55
A
3524 &src_needs_copy,
3525 &copy);
3526 assert(success);
3527 result = KERN_SUCCESS;
3528 }
3529 /*
3530 * Throw away the reference to the
3531 * original object, as it won't be mapped.
3532 */
3533
3534 vm_object_deallocate(object);
3535
3e170ce0 3536 if (result != KERN_SUCCESS) {
2d21ac55 3537 return result;
3e170ce0 3538 }
2d21ac55
A
3539
3540 object = new_object;
3541 offset = new_offset;
3542 }
3543
fe8ab488
A
3544 /*
3545 * If users want to try to prefault pages, the mapping and prefault
3546 * needs to be atomic.
3547 */
3548 if (try_prefault)
3549 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3e170ce0
A
3550
3551 {
3552 result = vm_map_enter(target_map,
3553 &map_addr, map_size,
3554 (vm_map_offset_t)mask,
3555 flags,
3556 object, offset,
3557 copy,
3558 cur_protection, max_protection,
3559 inheritance);
3560 }
2d21ac55
A
3561 if (result != KERN_SUCCESS)
3562 vm_object_deallocate(object);
39236c6e 3563
fe8ab488
A
3564 /*
3565 * Try to prefault, and do not forget to release the vm map lock.
3566 */
3567 if (result == KERN_SUCCESS && try_prefault) {
3568 mach_vm_address_t va = map_addr;
3569 kern_return_t kr = KERN_SUCCESS;
3570 unsigned int i = 0;
39037602
A
3571 int pmap_options;
3572
3573 pmap_options = PMAP_OPTIONS_NOWAIT;
3574 if (object->internal) {
3575 pmap_options |= PMAP_OPTIONS_INTERNAL;
3576 }
fe8ab488
A
3577
3578 for (i = 0; i < page_list_count; ++i) {
3579 if (UPL_VALID_PAGE(page_list, i)) {
3580 /*
3581 * If this function call failed, we should stop
3582 * trying to optimize, other calls are likely
3583 * going to fail too.
3584 *
3585 * We are not gonna report an error for such
3586 * failure though. That's an optimization, not
3587 * something critical.
3588 */
3589 kr = pmap_enter_options(target_map->pmap,
3590 va, UPL_PHYS_PAGE(page_list, i),
3591 cur_protection, VM_PROT_NONE,
39037602 3592 0, TRUE, pmap_options, NULL);
fe8ab488
A
3593 if (kr != KERN_SUCCESS) {
3594 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3e170ce0 3595 break;
fe8ab488
A
3596 }
3597 OSIncrementAtomic64(&vm_prefault_nb_pages);
3598 }
3599
3600 /* Next virtual address */
3601 va += PAGE_SIZE;
3602 }
fe8ab488
A
3603 vm_map_unlock(target_map);
3604 }
3605
3e170ce0
A
3606 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3607 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
39236c6e
A
3608 *address = map_addr + offset_in_mapping;
3609 } else {
3610 *address = map_addr;
3611 }
2d21ac55
A
3612 return result;
3613}
3614
fe8ab488
A
3615kern_return_t
3616vm_map_enter_mem_object(
3617 vm_map_t target_map,
3618 vm_map_offset_t *address,
3619 vm_map_size_t initial_size,
3620 vm_map_offset_t mask,
3621 int flags,
3622 ipc_port_t port,
3623 vm_object_offset_t offset,
3624 boolean_t copy,
3625 vm_prot_t cur_protection,
3626 vm_prot_t max_protection,
3627 vm_inherit_t inheritance)
3628{
3629 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3630 port, offset, copy, cur_protection, max_protection,
3631 inheritance, NULL, 0);
3632}
b0d623f7 3633
fe8ab488
A
3634kern_return_t
3635vm_map_enter_mem_object_prefault(
3636 vm_map_t target_map,
3637 vm_map_offset_t *address,
3638 vm_map_size_t initial_size,
3639 vm_map_offset_t mask,
3640 int flags,
3641 ipc_port_t port,
3642 vm_object_offset_t offset,
3643 vm_prot_t cur_protection,
3644 vm_prot_t max_protection,
3645 upl_page_list_ptr_t page_list,
3646 unsigned int page_list_count)
3647{
3648 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3649 port, offset, FALSE, cur_protection, max_protection,
3650 VM_INHERIT_DEFAULT, page_list, page_list_count);
3651}
b0d623f7
A
3652
3653
3654kern_return_t
3655vm_map_enter_mem_object_control(
3656 vm_map_t target_map,
3657 vm_map_offset_t *address,
3658 vm_map_size_t initial_size,
3659 vm_map_offset_t mask,
3660 int flags,
3661 memory_object_control_t control,
3662 vm_object_offset_t offset,
3663 boolean_t copy,
3664 vm_prot_t cur_protection,
3665 vm_prot_t max_protection,
3666 vm_inherit_t inheritance)
3667{
3668 vm_map_address_t map_addr;
3669 vm_map_size_t map_size;
3670 vm_object_t object;
3671 vm_object_size_t size;
3672 kern_return_t result;
3673 memory_object_t pager;
3674 vm_prot_t pager_prot;
3675 kern_return_t kr;
3676
3677 /*
3678 * Check arguments for validity
3679 */
3680 if ((target_map == VM_MAP_NULL) ||
3681 (cur_protection & ~VM_PROT_ALL) ||
3682 (max_protection & ~VM_PROT_ALL) ||
3683 (inheritance > VM_INHERIT_LAST_VALID) ||
3e170ce0 3684 initial_size == 0) {
b0d623f7 3685 return KERN_INVALID_ARGUMENT;
3e170ce0 3686 }
b0d623f7 3687
3e170ce0
A
3688 {
3689 map_addr = vm_map_trunc_page(*address,
3690 VM_MAP_PAGE_MASK(target_map));
3691 map_size = vm_map_round_page(initial_size,
3692 VM_MAP_PAGE_MASK(target_map));
3693 }
3694 size = vm_object_round_page(initial_size);
b0d623f7
A
3695
3696 object = memory_object_control_to_vm_object(control);
3697
3698 if (object == VM_OBJECT_NULL)
3699 return KERN_INVALID_OBJECT;
3700
3701 if (object == kernel_object) {
3702 printf("Warning: Attempt to map kernel object"
3703 " by a non-private kernel entity\n");
3704 return KERN_INVALID_OBJECT;
3705 }
3706
3707 vm_object_lock(object);
3708 object->ref_count++;
3709 vm_object_res_reference(object);
3710
3711 /*
3712 * For "named" VM objects, let the pager know that the
3713 * memory object is being mapped. Some pagers need to keep
3714 * track of this, to know when they can reclaim the memory
3715 * object, for example.
3716 * VM calls memory_object_map() for each mapping (specifying
3717 * the protection of each mapping) and calls
3718 * memory_object_last_unmap() when all the mappings are gone.
3719 */
3720 pager_prot = max_protection;
3721 if (copy) {
3722 pager_prot &= ~VM_PROT_WRITE;
3723 }
3724 pager = object->pager;
3725 if (object->named &&
3726 pager != MEMORY_OBJECT_NULL &&
3727 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3728 assert(object->pager_ready);
3729 vm_object_mapping_wait(object, THREAD_UNINT);
3730 vm_object_mapping_begin(object);
3731 vm_object_unlock(object);
3732
3733 kr = memory_object_map(pager, pager_prot);
3734 assert(kr == KERN_SUCCESS);
3735
3736 vm_object_lock(object);
3737 vm_object_mapping_end(object);
3738 }
3739 vm_object_unlock(object);
3740
3741 /*
3742 * Perform the copy if requested
3743 */
3744
3745 if (copy) {
3746 vm_object_t new_object;
3747 vm_object_offset_t new_offset;
3748
3749 result = vm_object_copy_strategically(object, offset, size,
3750 &new_object, &new_offset,
3751 &copy);
3752
3753
3754 if (result == KERN_MEMORY_RESTART_COPY) {
3755 boolean_t success;
3756 boolean_t src_needs_copy;
3757
3758 /*
3759 * XXX
3760 * We currently ignore src_needs_copy.
3761 * This really is the issue of how to make
3762 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3763 * non-kernel users to use. Solution forthcoming.
3764 * In the meantime, since we don't allow non-kernel
3765 * memory managers to specify symmetric copy,
3766 * we won't run into problems here.
3767 */
3768 new_object = object;
3769 new_offset = offset;
3770 success = vm_object_copy_quickly(&new_object,
3771 new_offset, size,
3772 &src_needs_copy,
3773 &copy);
3774 assert(success);
3775 result = KERN_SUCCESS;
3776 }
3777 /*
3778 * Throw away the reference to the
3779 * original object, as it won't be mapped.
3780 */
3781
3782 vm_object_deallocate(object);
3783
3e170ce0 3784 if (result != KERN_SUCCESS) {
b0d623f7 3785 return result;
3e170ce0 3786 }
b0d623f7
A
3787
3788 object = new_object;
3789 offset = new_offset;
3790 }
3791
3e170ce0
A
3792 {
3793 result = vm_map_enter(target_map,
3794 &map_addr, map_size,
3795 (vm_map_offset_t)mask,
3796 flags,
3797 object, offset,
3798 copy,
3799 cur_protection, max_protection,
3800 inheritance);
3801 }
b0d623f7
A
3802 if (result != KERN_SUCCESS)
3803 vm_object_deallocate(object);
3804 *address = map_addr;
3805
3806 return result;
3807}
3808
3809
2d21ac55
A
3810#if VM_CPM
3811
3812#ifdef MACH_ASSERT
3813extern pmap_paddr_t avail_start, avail_end;
3814#endif
3815
3816/*
3817 * Allocate memory in the specified map, with the caveat that
3818 * the memory is physically contiguous. This call may fail
3819 * if the system can't find sufficient contiguous memory.
3820 * This call may cause or lead to heart-stopping amounts of
3821 * paging activity.
3822 *
3823 * Memory obtained from this call should be freed in the
3824 * normal way, viz., via vm_deallocate.
3825 */
3826kern_return_t
3827vm_map_enter_cpm(
3828 vm_map_t map,
3829 vm_map_offset_t *addr,
3830 vm_map_size_t size,
3831 int flags)
3832{
3833 vm_object_t cpm_obj;
3834 pmap_t pmap;
3835 vm_page_t m, pages;
3836 kern_return_t kr;
3837 vm_map_offset_t va, start, end, offset;
3838#if MACH_ASSERT
316670eb 3839 vm_map_offset_t prev_addr = 0;
2d21ac55
A
3840#endif /* MACH_ASSERT */
3841
3842 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3e170ce0
A
3843 vm_tag_t tag;
3844
3845 VM_GET_FLAGS_ALIAS(flags, tag);
2d21ac55 3846
2d21ac55
A
3847 if (size == 0) {
3848 *addr = 0;
3849 return KERN_SUCCESS;
3850 }
3851 if (anywhere)
3852 *addr = vm_map_min(map);
3853 else
39236c6e
A
3854 *addr = vm_map_trunc_page(*addr,
3855 VM_MAP_PAGE_MASK(map));
3856 size = vm_map_round_page(size,
3857 VM_MAP_PAGE_MASK(map));
2d21ac55
A
3858
3859 /*
3860 * LP64todo - cpm_allocate should probably allow
3861 * allocations of >4GB, but not with the current
3862 * algorithm, so just cast down the size for now.
3863 */
3864 if (size > VM_MAX_ADDRESS)
3865 return KERN_RESOURCE_SHORTAGE;
3866 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 3867 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
3868 return kr;
3869
3870 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3871 assert(cpm_obj != VM_OBJECT_NULL);
3872 assert(cpm_obj->internal);
316670eb 3873 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
3874 assert(cpm_obj->can_persist == FALSE);
3875 assert(cpm_obj->pager_created == FALSE);
3876 assert(cpm_obj->pageout == FALSE);
3877 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
3878
3879 /*
3880 * Insert pages into object.
3881 */
3882
3883 vm_object_lock(cpm_obj);
3884 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3885 m = pages;
3886 pages = NEXT_PAGE(m);
0c530ab8 3887 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
3888
3889 assert(!m->gobbled);
3890 assert(!m->wanted);
3891 assert(!m->pageout);
3892 assert(!m->tabled);
b0d623f7 3893 assert(VM_PAGE_WIRED(m));
91447636
A
3894 /*
3895 * ENCRYPTED SWAP:
3896 * "m" is not supposed to be pageable, so it
3897 * should not be encrypted. It wouldn't be safe
3898 * to enter it in a new VM object while encrypted.
3899 */
3900 ASSERT_PAGE_DECRYPTED(m);
3901 assert(m->busy);
39037602 3902 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
91447636
A
3903
3904 m->busy = FALSE;
3905 vm_page_insert(m, cpm_obj, offset);
3906 }
3907 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3908 vm_object_unlock(cpm_obj);
3909
3910 /*
3911 * Hang onto a reference on the object in case a
3912 * multi-threaded application for some reason decides
3913 * to deallocate the portion of the address space into
3914 * which we will insert this object.
3915 *
3916 * Unfortunately, we must insert the object now before
3917 * we can talk to the pmap module about which addresses
3918 * must be wired down. Hence, the race with a multi-
3919 * threaded app.
3920 */
3921 vm_object_reference(cpm_obj);
3922
3923 /*
3924 * Insert object into map.
3925 */
3926
3927 kr = vm_map_enter(
2d21ac55
A
3928 map,
3929 addr,
3930 size,
3931 (vm_map_offset_t)0,
3932 flags,
3933 cpm_obj,
3934 (vm_object_offset_t)0,
3935 FALSE,
3936 VM_PROT_ALL,
3937 VM_PROT_ALL,
3938 VM_INHERIT_DEFAULT);
91447636
A
3939
3940 if (kr != KERN_SUCCESS) {
3941 /*
3942 * A CPM object doesn't have can_persist set,
3943 * so all we have to do is deallocate it to
3944 * free up these pages.
3945 */
3946 assert(cpm_obj->pager_created == FALSE);
3947 assert(cpm_obj->can_persist == FALSE);
3948 assert(cpm_obj->pageout == FALSE);
3949 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3950 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3951 vm_object_deallocate(cpm_obj); /* kill creation ref */
3952 }
3953
3954 /*
3955 * Inform the physical mapping system that the
3956 * range of addresses may not fault, so that
3957 * page tables and such can be locked down as well.
3958 */
3959 start = *addr;
3960 end = start + size;
3961 pmap = vm_map_pmap(map);
3962 pmap_pageable(pmap, start, end, FALSE);
3963
3964 /*
3965 * Enter each page into the pmap, to avoid faults.
3966 * Note that this loop could be coded more efficiently,
3967 * if the need arose, rather than looking up each page
3968 * again.
3969 */
3970 for (offset = 0, va = start; offset < size;
3971 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
3972 int type_of_fault;
3973
91447636
A
3974 vm_object_lock(cpm_obj);
3975 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 3976 assert(m != VM_PAGE_NULL);
2d21ac55
A
3977
3978 vm_page_zero_fill(m);
3979
3980 type_of_fault = DBG_ZERO_FILL_FAULT;
3981
6d2010ae 3982 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
fe8ab488 3983 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
2d21ac55
A
3984 &type_of_fault);
3985
3986 vm_object_unlock(cpm_obj);
91447636
A
3987 }
3988
3989#if MACH_ASSERT
3990 /*
3991 * Verify ordering in address space.
3992 */
3993 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3994 vm_object_lock(cpm_obj);
3995 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3996 vm_object_unlock(cpm_obj);
3997 if (m == VM_PAGE_NULL)
316670eb
A
3998 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3999 cpm_obj, (uint64_t)offset);
91447636
A
4000 assert(m->tabled);
4001 assert(!m->busy);
4002 assert(!m->wanted);
4003 assert(!m->fictitious);
4004 assert(!m->private);
4005 assert(!m->absent);
4006 assert(!m->error);
4007 assert(!m->cleaning);
316670eb 4008 assert(!m->laundry);
91447636
A
4009 assert(!m->precious);
4010 assert(!m->clustered);
4011 if (offset != 0) {
39037602 4012 if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
316670eb
A
4013 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4014 (uint64_t)start, (uint64_t)end, (uint64_t)va);
4015 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4016 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
4017 panic("vm_allocate_cpm: pages not contig!");
4018 }
4019 }
39037602 4020 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
91447636
A
4021 }
4022#endif /* MACH_ASSERT */
4023
4024 vm_object_deallocate(cpm_obj); /* kill extra ref */
4025
4026 return kr;
4027}
4028
4029
4030#else /* VM_CPM */
4031
4032/*
4033 * Interface is defined in all cases, but unless the kernel
4034 * is built explicitly for this option, the interface does
4035 * nothing.
4036 */
4037
4038kern_return_t
4039vm_map_enter_cpm(
4040 __unused vm_map_t map,
4041 __unused vm_map_offset_t *addr,
4042 __unused vm_map_size_t size,
4043 __unused int flags)
4044{
4045 return KERN_FAILURE;
4046}
4047#endif /* VM_CPM */
4048
b0d623f7
A
4049/* Not used without nested pmaps */
4050#ifndef NO_NESTED_PMAP
2d21ac55
A
4051/*
4052 * Clip and unnest a portion of a nested submap mapping.
4053 */
b0d623f7
A
4054
4055
2d21ac55
A
4056static void
4057vm_map_clip_unnest(
4058 vm_map_t map,
4059 vm_map_entry_t entry,
4060 vm_map_offset_t start_unnest,
4061 vm_map_offset_t end_unnest)
4062{
b0d623f7
A
4063 vm_map_offset_t old_start_unnest = start_unnest;
4064 vm_map_offset_t old_end_unnest = end_unnest;
4065
2d21ac55 4066 assert(entry->is_sub_map);
3e170ce0 4067 assert(VME_SUBMAP(entry) != NULL);
fe8ab488 4068 assert(entry->use_pmap);
2d21ac55 4069
b0d623f7
A
4070 /*
4071 * Query the platform for the optimal unnest range.
4072 * DRK: There's some duplication of effort here, since
4073 * callers may have adjusted the range to some extent. This
4074 * routine was introduced to support 1GiB subtree nesting
4075 * for x86 platforms, which can also nest on 2MiB boundaries
4076 * depending on size/alignment.
4077 */
4078 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
39037602
A
4079 assert(VME_SUBMAP(entry)->is_nested_map);
4080 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4081 log_unnest_badness(map,
4082 old_start_unnest,
4083 old_end_unnest,
4084 VME_SUBMAP(entry)->is_nested_map,
4085 (entry->vme_start +
4086 VME_SUBMAP(entry)->lowest_unnestable_start -
4087 VME_OFFSET(entry)));
b0d623f7
A
4088 }
4089
2d21ac55
A
4090 if (entry->vme_start > start_unnest ||
4091 entry->vme_end < end_unnest) {
4092 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4093 "bad nested entry: start=0x%llx end=0x%llx\n",
4094 (long long)start_unnest, (long long)end_unnest,
4095 (long long)entry->vme_start, (long long)entry->vme_end);
4096 }
b0d623f7 4097
2d21ac55
A
4098 if (start_unnest > entry->vme_start) {
4099 _vm_map_clip_start(&map->hdr,
4100 entry,
4101 start_unnest);
3e170ce0
A
4102 if (map->holelistenabled) {
4103 vm_map_store_update_first_free(map, NULL, FALSE);
4104 } else {
4105 vm_map_store_update_first_free(map, map->first_free, FALSE);
4106 }
2d21ac55
A
4107 }
4108 if (entry->vme_end > end_unnest) {
4109 _vm_map_clip_end(&map->hdr,
4110 entry,
4111 end_unnest);
3e170ce0
A
4112 if (map->holelistenabled) {
4113 vm_map_store_update_first_free(map, NULL, FALSE);
4114 } else {
4115 vm_map_store_update_first_free(map, map->first_free, FALSE);
4116 }
2d21ac55
A
4117 }
4118
4119 pmap_unnest(map->pmap,
4120 entry->vme_start,
4121 entry->vme_end - entry->vme_start);
316670eb 4122 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
4123 /* clean up parent map/maps */
4124 vm_map_submap_pmap_clean(
4125 map, entry->vme_start,
4126 entry->vme_end,
3e170ce0
A
4127 VME_SUBMAP(entry),
4128 VME_OFFSET(entry));
2d21ac55
A
4129 }
4130 entry->use_pmap = FALSE;
3e170ce0
A
4131 if ((map->pmap != kernel_pmap) &&
4132 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4133 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
316670eb 4134 }
2d21ac55 4135}
b0d623f7 4136#endif /* NO_NESTED_PMAP */
2d21ac55 4137
1c79356b
A
4138/*
4139 * vm_map_clip_start: [ internal use only ]
4140 *
4141 * Asserts that the given entry begins at or after
4142 * the specified address; if necessary,
4143 * it splits the entry into two.
4144 */
e2d2fc5c 4145void
2d21ac55
A
4146vm_map_clip_start(
4147 vm_map_t map,
4148 vm_map_entry_t entry,
4149 vm_map_offset_t startaddr)
4150{
0c530ab8 4151#ifndef NO_NESTED_PMAP
fe8ab488
A
4152 if (entry->is_sub_map &&
4153 entry->use_pmap &&
2d21ac55
A
4154 startaddr >= entry->vme_start) {
4155 vm_map_offset_t start_unnest, end_unnest;
4156
4157 /*
4158 * Make sure "startaddr" is no longer in a nested range
4159 * before we clip. Unnest only the minimum range the platform
4160 * can handle.
b0d623f7
A
4161 * vm_map_clip_unnest may perform additional adjustments to
4162 * the unnest range.
2d21ac55
A
4163 */
4164 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4165 end_unnest = start_unnest + pmap_nesting_size_min;
4166 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4167 }
4168#endif /* NO_NESTED_PMAP */
4169 if (startaddr > entry->vme_start) {
3e170ce0 4170 if (VME_OBJECT(entry) &&
2d21ac55 4171 !entry->is_sub_map &&
3e170ce0 4172 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4173 pmap_remove(map->pmap,
4174 (addr64_t)(entry->vme_start),
4175 (addr64_t)(entry->vme_end));
4176 }
39037602
A
4177 if (entry->vme_atomic) {
4178 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4179 }
2d21ac55 4180 _vm_map_clip_start(&map->hdr, entry, startaddr);
3e170ce0
A
4181 if (map->holelistenabled) {
4182 vm_map_store_update_first_free(map, NULL, FALSE);
4183 } else {
4184 vm_map_store_update_first_free(map, map->first_free, FALSE);
4185 }
2d21ac55
A
4186 }
4187}
4188
1c79356b
A
4189
4190#define vm_map_copy_clip_start(copy, entry, startaddr) \
4191 MACRO_BEGIN \
4192 if ((startaddr) > (entry)->vme_start) \
4193 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4194 MACRO_END
4195
4196/*
4197 * This routine is called only when it is known that
4198 * the entry must be split.
4199 */
91447636 4200static void
1c79356b 4201_vm_map_clip_start(
39037602
A
4202 struct vm_map_header *map_header,
4203 vm_map_entry_t entry,
4204 vm_map_offset_t start)
1c79356b 4205{
39037602 4206 vm_map_entry_t new_entry;
1c79356b
A
4207
4208 /*
4209 * Split off the front portion --
4210 * note that we must insert the new
4211 * entry BEFORE this one, so that
4212 * this entry has the specified starting
4213 * address.
4214 */
4215
fe8ab488
A
4216 if (entry->map_aligned) {
4217 assert(VM_MAP_PAGE_ALIGNED(start,
4218 VM_MAP_HDR_PAGE_MASK(map_header)));
4219 }
4220
7ddcb079 4221 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4222 vm_map_entry_copy_full(new_entry, entry);
4223
4224 new_entry->vme_end = start;
e2d2fc5c 4225 assert(new_entry->vme_start < new_entry->vme_end);
3e170ce0 4226 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
e2d2fc5c 4227 assert(start < entry->vme_end);
1c79356b
A
4228 entry->vme_start = start;
4229
6d2010ae 4230 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
4231
4232 if (entry->is_sub_map)
3e170ce0 4233 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4234 else
3e170ce0 4235 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4236}
4237
4238
4239/*
4240 * vm_map_clip_end: [ internal use only ]
4241 *
4242 * Asserts that the given entry ends at or before
4243 * the specified address; if necessary,
4244 * it splits the entry into two.
4245 */
e2d2fc5c 4246void
2d21ac55
A
4247vm_map_clip_end(
4248 vm_map_t map,
4249 vm_map_entry_t entry,
4250 vm_map_offset_t endaddr)
4251{
4252 if (endaddr > entry->vme_end) {
4253 /*
4254 * Within the scope of this clipping, limit "endaddr" to
4255 * the end of this map entry...
4256 */
4257 endaddr = entry->vme_end;
4258 }
4259#ifndef NO_NESTED_PMAP
fe8ab488 4260 if (entry->is_sub_map && entry->use_pmap) {
2d21ac55
A
4261 vm_map_offset_t start_unnest, end_unnest;
4262
4263 /*
4264 * Make sure the range between the start of this entry and
4265 * the new "endaddr" is no longer nested before we clip.
4266 * Unnest only the minimum range the platform can handle.
b0d623f7
A
4267 * vm_map_clip_unnest may perform additional adjustments to
4268 * the unnest range.
2d21ac55
A
4269 */
4270 start_unnest = entry->vme_start;
4271 end_unnest =
4272 (endaddr + pmap_nesting_size_min - 1) &
4273 ~(pmap_nesting_size_min - 1);
4274 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4275 }
4276#endif /* NO_NESTED_PMAP */
4277 if (endaddr < entry->vme_end) {
3e170ce0 4278 if (VME_OBJECT(entry) &&
2d21ac55 4279 !entry->is_sub_map &&
3e170ce0 4280 VME_OBJECT(entry)->phys_contiguous) {
2d21ac55
A
4281 pmap_remove(map->pmap,
4282 (addr64_t)(entry->vme_start),
4283 (addr64_t)(entry->vme_end));
4284 }
39037602
A
4285 if (entry->vme_atomic) {
4286 panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
4287 }
2d21ac55 4288 _vm_map_clip_end(&map->hdr, entry, endaddr);
3e170ce0
A
4289 if (map->holelistenabled) {
4290 vm_map_store_update_first_free(map, NULL, FALSE);
4291 } else {
4292 vm_map_store_update_first_free(map, map->first_free, FALSE);
4293 }
2d21ac55
A
4294 }
4295}
0c530ab8 4296
1c79356b
A
4297
4298#define vm_map_copy_clip_end(copy, entry, endaddr) \
4299 MACRO_BEGIN \
4300 if ((endaddr) < (entry)->vme_end) \
4301 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4302 MACRO_END
4303
4304/*
4305 * This routine is called only when it is known that
4306 * the entry must be split.
4307 */
91447636 4308static void
1c79356b 4309_vm_map_clip_end(
39037602
A
4310 struct vm_map_header *map_header,
4311 vm_map_entry_t entry,
4312 vm_map_offset_t end)
1c79356b 4313{
39037602 4314 vm_map_entry_t new_entry;
1c79356b
A
4315
4316 /*
4317 * Create a new entry and insert it
4318 * AFTER the specified entry
4319 */
4320
fe8ab488
A
4321 if (entry->map_aligned) {
4322 assert(VM_MAP_PAGE_ALIGNED(end,
4323 VM_MAP_HDR_PAGE_MASK(map_header)));
4324 }
4325
7ddcb079 4326 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
4327 vm_map_entry_copy_full(new_entry, entry);
4328
e2d2fc5c 4329 assert(entry->vme_start < end);
1c79356b 4330 new_entry->vme_start = entry->vme_end = end;
3e170ce0
A
4331 VME_OFFSET_SET(new_entry,
4332 VME_OFFSET(new_entry) + (end - entry->vme_start));
e2d2fc5c 4333 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 4334
6d2010ae 4335 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
4336
4337 if (entry->is_sub_map)
3e170ce0 4338 vm_map_reference(VME_SUBMAP(new_entry));
1c79356b 4339 else
3e170ce0 4340 vm_object_reference(VME_OBJECT(new_entry));
1c79356b
A
4341}
4342
4343
4344/*
4345 * VM_MAP_RANGE_CHECK: [ internal use only ]
4346 *
4347 * Asserts that the starting and ending region
4348 * addresses fall within the valid range of the map.
4349 */
2d21ac55
A
4350#define VM_MAP_RANGE_CHECK(map, start, end) \
4351 MACRO_BEGIN \
4352 if (start < vm_map_min(map)) \
4353 start = vm_map_min(map); \
4354 if (end > vm_map_max(map)) \
4355 end = vm_map_max(map); \
4356 if (start > end) \
4357 start = end; \
4358 MACRO_END
1c79356b
A
4359
4360/*
4361 * vm_map_range_check: [ internal use only ]
4362 *
4363 * Check that the region defined by the specified start and
4364 * end addresses are wholly contained within a single map
4365 * entry or set of adjacent map entries of the spacified map,
4366 * i.e. the specified region contains no unmapped space.
4367 * If any or all of the region is unmapped, FALSE is returned.
4368 * Otherwise, TRUE is returned and if the output argument 'entry'
4369 * is not NULL it points to the map entry containing the start
4370 * of the region.
4371 *
4372 * The map is locked for reading on entry and is left locked.
4373 */
91447636 4374static boolean_t
1c79356b 4375vm_map_range_check(
39037602
A
4376 vm_map_t map,
4377 vm_map_offset_t start,
4378 vm_map_offset_t end,
1c79356b
A
4379 vm_map_entry_t *entry)
4380{
4381 vm_map_entry_t cur;
39037602 4382 vm_map_offset_t prev;
1c79356b
A
4383
4384 /*
4385 * Basic sanity checks first
4386 */
4387 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4388 return (FALSE);
4389
4390 /*
4391 * Check first if the region starts within a valid
4392 * mapping for the map.
4393 */
4394 if (!vm_map_lookup_entry(map, start, &cur))
4395 return (FALSE);
4396
4397 /*
4398 * Optimize for the case that the region is contained
4399 * in a single map entry.
4400 */
4401 if (entry != (vm_map_entry_t *) NULL)
4402 *entry = cur;
4403 if (end <= cur->vme_end)
4404 return (TRUE);
4405
4406 /*
4407 * If the region is not wholly contained within a
4408 * single entry, walk the entries looking for holes.
4409 */
4410 prev = cur->vme_end;
4411 cur = cur->vme_next;
4412 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4413 if (end <= cur->vme_end)
4414 return (TRUE);
4415 prev = cur->vme_end;
4416 cur = cur->vme_next;
4417 }
4418 return (FALSE);
4419}
4420
4421/*
4422 * vm_map_submap: [ kernel use only ]
4423 *
4424 * Mark the given range as handled by a subordinate map.
4425 *
4426 * This range must have been created with vm_map_find using
4427 * the vm_submap_object, and no other operations may have been
4428 * performed on this range prior to calling vm_map_submap.
4429 *
4430 * Only a limited number of operations can be performed
4431 * within this rage after calling vm_map_submap:
4432 * vm_fault
4433 * [Don't try vm_map_copyin!]
4434 *
4435 * To remove a submapping, one must first remove the
4436 * range from the superior map, and then destroy the
4437 * submap (if desired). [Better yet, don't try it.]
4438 */
4439kern_return_t
4440vm_map_submap(
fe8ab488 4441 vm_map_t map,
91447636
A
4442 vm_map_offset_t start,
4443 vm_map_offset_t end,
fe8ab488 4444 vm_map_t submap,
91447636 4445 vm_map_offset_t offset,
0c530ab8 4446#ifdef NO_NESTED_PMAP
91447636 4447 __unused
0c530ab8 4448#endif /* NO_NESTED_PMAP */
fe8ab488 4449 boolean_t use_pmap)
1c79356b
A
4450{
4451 vm_map_entry_t entry;
39037602
A
4452 kern_return_t result = KERN_INVALID_ARGUMENT;
4453 vm_object_t object;
1c79356b
A
4454
4455 vm_map_lock(map);
4456
2d21ac55 4457 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 4458 entry = entry->vme_next;
2d21ac55 4459 }
1c79356b 4460
2d21ac55
A
4461 if (entry == vm_map_to_entry(map) ||
4462 entry->is_sub_map) {
1c79356b
A
4463 vm_map_unlock(map);
4464 return KERN_INVALID_ARGUMENT;
4465 }
4466
2d21ac55 4467 vm_map_clip_start(map, entry, start);
1c79356b
A
4468 vm_map_clip_end(map, entry, end);
4469
4470 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4471 (!entry->is_sub_map) &&
3e170ce0 4472 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
1c79356b
A
4473 (object->resident_page_count == 0) &&
4474 (object->copy == VM_OBJECT_NULL) &&
4475 (object->shadow == VM_OBJECT_NULL) &&
4476 (!object->pager_created)) {
3e170ce0
A
4477 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4478 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
2d21ac55
A
4479 vm_object_deallocate(object);
4480 entry->is_sub_map = TRUE;
fe8ab488 4481 entry->use_pmap = FALSE;
3e170ce0 4482 VME_SUBMAP_SET(entry, submap);
2d21ac55 4483 vm_map_reference(submap);
316670eb
A
4484 if (submap->mapped_in_other_pmaps == FALSE &&
4485 vm_map_pmap(submap) != PMAP_NULL &&
4486 vm_map_pmap(submap) != vm_map_pmap(map)) {
4487 /*
4488 * This submap is being mapped in a map
4489 * that uses a different pmap.
4490 * Set its "mapped_in_other_pmaps" flag
4491 * to indicate that we now need to
4492 * remove mappings from all pmaps rather
4493 * than just the submap's pmap.
4494 */
4495 submap->mapped_in_other_pmaps = TRUE;
4496 }
2d21ac55 4497
0c530ab8 4498#ifndef NO_NESTED_PMAP
2d21ac55
A
4499 if (use_pmap) {
4500 /* nest if platform code will allow */
4501 if(submap->pmap == NULL) {
316670eb
A
4502 ledger_t ledger = map->pmap->ledger;
4503 submap->pmap = pmap_create(ledger,
4504 (vm_map_size_t) 0, FALSE);
2d21ac55
A
4505 if(submap->pmap == PMAP_NULL) {
4506 vm_map_unlock(map);
4507 return(KERN_NO_SPACE);
55e303ae 4508 }
55e303ae 4509 }
2d21ac55 4510 result = pmap_nest(map->pmap,
3e170ce0 4511 (VME_SUBMAP(entry))->pmap,
2d21ac55
A
4512 (addr64_t)start,
4513 (addr64_t)start,
4514 (uint64_t)(end - start));
4515 if(result)
4516 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4517 entry->use_pmap = TRUE;
4518 }
0c530ab8 4519#else /* NO_NESTED_PMAP */
2d21ac55 4520 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 4521#endif /* NO_NESTED_PMAP */
2d21ac55 4522 result = KERN_SUCCESS;
1c79356b
A
4523 }
4524 vm_map_unlock(map);
4525
4526 return(result);
4527}
4528
490019cf 4529
1c79356b
A
4530/*
4531 * vm_map_protect:
4532 *
4533 * Sets the protection of the specified address
4534 * region in the target map. If "set_max" is
4535 * specified, the maximum protection is to be set;
4536 * otherwise, only the current protection is affected.
4537 */
4538kern_return_t
4539vm_map_protect(
39037602
A
4540 vm_map_t map,
4541 vm_map_offset_t start,
4542 vm_map_offset_t end,
4543 vm_prot_t new_prot,
4544 boolean_t set_max)
4545{
4546 vm_map_entry_t current;
4547 vm_map_offset_t prev;
1c79356b
A
4548 vm_map_entry_t entry;
4549 vm_prot_t new_max;
1c79356b
A
4550
4551 XPR(XPR_VM_MAP,
2d21ac55 4552 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 4553 map, start, end, new_prot, set_max);
1c79356b
A
4554
4555 vm_map_lock(map);
4556
91447636
A
4557 /* LP64todo - remove this check when vm_map_commpage64()
4558 * no longer has to stuff in a map_entry for the commpage
4559 * above the map's max_offset.
4560 */
4561 if (start >= map->max_offset) {
4562 vm_map_unlock(map);
4563 return(KERN_INVALID_ADDRESS);
4564 }
4565
b0d623f7
A
4566 while(1) {
4567 /*
4568 * Lookup the entry. If it doesn't start in a valid
4569 * entry, return an error.
4570 */
4571 if (! vm_map_lookup_entry(map, start, &entry)) {
4572 vm_map_unlock(map);
4573 return(KERN_INVALID_ADDRESS);
4574 }
4575
4576 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4577 start = SUPERPAGE_ROUND_DOWN(start);
4578 continue;
4579 }
4580 break;
4581 }
4582 if (entry->superpage_size)
4583 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
4584
4585 /*
4586 * Make a first pass to check for protection and address
4587 * violations.
4588 */
4589
4590 current = entry;
4591 prev = current->vme_start;
4592 while ((current != vm_map_to_entry(map)) &&
4593 (current->vme_start < end)) {
4594
4595 /*
4596 * If there is a hole, return an error.
4597 */
4598 if (current->vme_start != prev) {
4599 vm_map_unlock(map);
4600 return(KERN_INVALID_ADDRESS);
4601 }
4602
4603 new_max = current->max_protection;
4604 if(new_prot & VM_PROT_COPY) {
4605 new_max |= VM_PROT_WRITE;
4606 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4607 vm_map_unlock(map);
4608 return(KERN_PROTECTION_FAILURE);
4609 }
4610 } else {
4611 if ((new_prot & new_max) != new_prot) {
4612 vm_map_unlock(map);
4613 return(KERN_PROTECTION_FAILURE);
4614 }
4615 }
4616
593a1d5f 4617
1c79356b
A
4618 prev = current->vme_end;
4619 current = current->vme_next;
4620 }
39037602
A
4621
4622
1c79356b
A
4623 if (end > prev) {
4624 vm_map_unlock(map);
4625 return(KERN_INVALID_ADDRESS);
4626 }
4627
4628 /*
4629 * Go back and fix up protections.
4630 * Clip to start here if the range starts within
4631 * the entry.
4632 */
4633
4634 current = entry;
2d21ac55
A
4635 if (current != vm_map_to_entry(map)) {
4636 /* clip and unnest if necessary */
4637 vm_map_clip_start(map, current, start);
1c79356b 4638 }
2d21ac55 4639
1c79356b
A
4640 while ((current != vm_map_to_entry(map)) &&
4641 (current->vme_start < end)) {
4642
4643 vm_prot_t old_prot;
4644
4645 vm_map_clip_end(map, current, end);
4646
fe8ab488
A
4647 if (current->is_sub_map) {
4648 /* clipping did unnest if needed */
4649 assert(!current->use_pmap);
4650 }
2d21ac55 4651
1c79356b
A
4652 old_prot = current->protection;
4653
4654 if(new_prot & VM_PROT_COPY) {
4655 /* caller is asking specifically to copy the */
4656 /* mapped data, this implies that max protection */
4657 /* will include write. Caller must be prepared */
4658 /* for loss of shared memory communication in the */
4659 /* target area after taking this step */
6d2010ae 4660
3e170ce0
A
4661 if (current->is_sub_map == FALSE &&
4662 VME_OBJECT(current) == VM_OBJECT_NULL) {
4663 VME_OBJECT_SET(current,
4664 vm_object_allocate(
4665 (vm_map_size_t)
4666 (current->vme_end -
4667 current->vme_start)));
4668 VME_OFFSET_SET(current, 0);
fe8ab488 4669 assert(current->use_pmap);
6d2010ae 4670 }
3e170ce0 4671 assert(current->wired_count == 0);
1c79356b
A
4672 current->needs_copy = TRUE;
4673 current->max_protection |= VM_PROT_WRITE;
4674 }
4675
4676 if (set_max)
4677 current->protection =
4678 (current->max_protection =
2d21ac55
A
4679 new_prot & ~VM_PROT_COPY) &
4680 old_prot;
1c79356b
A
4681 else
4682 current->protection = new_prot & ~VM_PROT_COPY;
4683
4684 /*
4685 * Update physical map if necessary.
4686 * If the request is to turn off write protection,
4687 * we won't do it for real (in pmap). This is because
4688 * it would cause copy-on-write to fail. We've already
4689 * set, the new protection in the map, so if a
4690 * write-protect fault occurred, it will be fixed up
4691 * properly, COW or not.
4692 */
1c79356b 4693 if (current->protection != old_prot) {
1c79356b
A
4694 /* Look one level in we support nested pmaps */
4695 /* from mapped submaps which are direct entries */
4696 /* in our map */
0c530ab8 4697
2d21ac55 4698 vm_prot_t prot;
0c530ab8 4699
39037602
A
4700 prot = current->protection;
4701 if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
4702 prot &= ~VM_PROT_WRITE;
4703 } else {
4704 assert(!VME_OBJECT(current)->code_signed);
4705 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4706 }
2d21ac55 4707
3e170ce0 4708 if (override_nx(map, VME_ALIAS(current)) && prot)
0c530ab8 4709 prot |= VM_PROT_EXECUTE;
2d21ac55 4710
490019cf 4711
0c530ab8 4712 if (current->is_sub_map && current->use_pmap) {
3e170ce0 4713 pmap_protect(VME_SUBMAP(current)->pmap,
2d21ac55
A
4714 current->vme_start,
4715 current->vme_end,
4716 prot);
1c79356b 4717 } else {
2d21ac55
A
4718 pmap_protect(map->pmap,
4719 current->vme_start,
4720 current->vme_end,
4721 prot);
1c79356b 4722 }
1c79356b
A
4723 }
4724 current = current->vme_next;
4725 }
4726
5353443c 4727 current = entry;
91447636
A
4728 while ((current != vm_map_to_entry(map)) &&
4729 (current->vme_start <= end)) {
5353443c
A
4730 vm_map_simplify_entry(map, current);
4731 current = current->vme_next;
4732 }
4733
1c79356b
A
4734 vm_map_unlock(map);
4735 return(KERN_SUCCESS);
4736}
4737
4738/*
4739 * vm_map_inherit:
4740 *
4741 * Sets the inheritance of the specified address
4742 * range in the target map. Inheritance
4743 * affects how the map will be shared with
4744 * child maps at the time of vm_map_fork.
4745 */
4746kern_return_t
4747vm_map_inherit(
39037602
A
4748 vm_map_t map,
4749 vm_map_offset_t start,
4750 vm_map_offset_t end,
4751 vm_inherit_t new_inheritance)
1c79356b 4752{
39037602 4753 vm_map_entry_t entry;
1c79356b
A
4754 vm_map_entry_t temp_entry;
4755
4756 vm_map_lock(map);
4757
4758 VM_MAP_RANGE_CHECK(map, start, end);
4759
4760 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4761 entry = temp_entry;
1c79356b
A
4762 }
4763 else {
4764 temp_entry = temp_entry->vme_next;
4765 entry = temp_entry;
4766 }
4767
4768 /* first check entire range for submaps which can't support the */
4769 /* given inheritance. */
4770 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4771 if(entry->is_sub_map) {
91447636
A
4772 if(new_inheritance == VM_INHERIT_COPY) {
4773 vm_map_unlock(map);
1c79356b 4774 return(KERN_INVALID_ARGUMENT);
91447636 4775 }
1c79356b
A
4776 }
4777
4778 entry = entry->vme_next;
4779 }
4780
4781 entry = temp_entry;
2d21ac55
A
4782 if (entry != vm_map_to_entry(map)) {
4783 /* clip and unnest if necessary */
4784 vm_map_clip_start(map, entry, start);
4785 }
1c79356b
A
4786
4787 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4788 vm_map_clip_end(map, entry, end);
fe8ab488
A
4789 if (entry->is_sub_map) {
4790 /* clip did unnest if needed */
4791 assert(!entry->use_pmap);
4792 }
1c79356b
A
4793
4794 entry->inheritance = new_inheritance;
4795
4796 entry = entry->vme_next;
4797 }
4798
4799 vm_map_unlock(map);
4800 return(KERN_SUCCESS);
4801}
4802
2d21ac55
A
4803/*
4804 * Update the accounting for the amount of wired memory in this map. If the user has
4805 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4806 */
4807
4808static kern_return_t
4809add_wire_counts(
4810 vm_map_t map,
4811 vm_map_entry_t entry,
4812 boolean_t user_wire)
4813{
4814 vm_map_size_t size;
4815
4816 if (user_wire) {
6d2010ae 4817 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
4818
4819 /*
4820 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4821 * this map entry.
4822 */
4823
4824 if (entry->user_wired_count == 0) {
4825 size = entry->vme_end - entry->vme_start;
4826
4827 /*
4828 * Since this is the first time the user is wiring this map entry, check to see if we're
4829 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4830 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4831 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4832 * limit, then we fail.
4833 */
4834
4835 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
4836 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4837 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
4838 return KERN_RESOURCE_SHORTAGE;
4839
4840 /*
4841 * The first time the user wires an entry, we also increment the wired_count and add this to
4842 * the total that has been wired in the map.
4843 */
4844
4845 if (entry->wired_count >= MAX_WIRE_COUNT)
4846 return KERN_FAILURE;
4847
4848 entry->wired_count++;
4849 map->user_wire_size += size;
4850 }
4851
4852 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4853 return KERN_FAILURE;
4854
4855 entry->user_wired_count++;
4856
4857 } else {
4858
4859 /*
4860 * The kernel's wiring the memory. Just bump the count and continue.
4861 */
4862
4863 if (entry->wired_count >= MAX_WIRE_COUNT)
4864 panic("vm_map_wire: too many wirings");
4865
4866 entry->wired_count++;
4867 }
4868
4869 return KERN_SUCCESS;
4870}
4871
4872/*
4873 * Update the memory wiring accounting now that the given map entry is being unwired.
4874 */
4875
4876static void
4877subtract_wire_counts(
4878 vm_map_t map,
4879 vm_map_entry_t entry,
4880 boolean_t user_wire)
4881{
4882
4883 if (user_wire) {
4884
4885 /*
4886 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4887 */
4888
4889 if (entry->user_wired_count == 1) {
4890
4891 /*
4892 * We're removing the last user wire reference. Decrement the wired_count and the total
4893 * user wired memory for this map.
4894 */
4895
4896 assert(entry->wired_count >= 1);
4897 entry->wired_count--;
4898 map->user_wire_size -= entry->vme_end - entry->vme_start;
4899 }
4900
4901 assert(entry->user_wired_count >= 1);
4902 entry->user_wired_count--;
4903
4904 } else {
4905
4906 /*
4907 * The kernel is unwiring the memory. Just update the count.
4908 */
4909
4910 assert(entry->wired_count >= 1);
4911 entry->wired_count--;
4912 }
4913}
4914
39037602 4915
1c79356b
A
4916/*
4917 * vm_map_wire:
4918 *
4919 * Sets the pageability of the specified address range in the
4920 * target map as wired. Regions specified as not pageable require
4921 * locked-down physical memory and physical page maps. The
4922 * access_type variable indicates types of accesses that must not
4923 * generate page faults. This is checked against protection of
4924 * memory being locked-down.
4925 *
4926 * The map must not be locked, but a reference must remain to the
4927 * map throughout the call.
4928 */
91447636 4929static kern_return_t
1c79356b 4930vm_map_wire_nested(
39037602
A
4931 vm_map_t map,
4932 vm_map_offset_t start,
4933 vm_map_offset_t end,
4934 vm_prot_t caller_prot,
1c79356b 4935 boolean_t user_wire,
9bccf70c 4936 pmap_t map_pmap,
fe8ab488
A
4937 vm_map_offset_t pmap_addr,
4938 ppnum_t *physpage_p)
1c79356b 4939{
39037602
A
4940 vm_map_entry_t entry;
4941 vm_prot_t access_type;
1c79356b 4942 struct vm_map_entry *first_entry, tmp_entry;
91447636 4943 vm_map_t real_map;
39037602 4944 vm_map_offset_t s,e;
1c79356b
A
4945 kern_return_t rc;
4946 boolean_t need_wakeup;
4947 boolean_t main_map = FALSE;
9bccf70c 4948 wait_interrupt_t interruptible_state;
0b4e3aa0 4949 thread_t cur_thread;
1c79356b 4950 unsigned int last_timestamp;
91447636 4951 vm_map_size_t size;
fe8ab488
A
4952 boolean_t wire_and_extract;
4953
3e170ce0
A
4954 access_type = (caller_prot & VM_PROT_ALL);
4955
fe8ab488
A
4956 wire_and_extract = FALSE;
4957 if (physpage_p != NULL) {
4958 /*
4959 * The caller wants the physical page number of the
4960 * wired page. We return only one physical page number
4961 * so this works for only one page at a time.
4962 */
4963 if ((end - start) != PAGE_SIZE) {
4964 return KERN_INVALID_ARGUMENT;
4965 }
4966 wire_and_extract = TRUE;
4967 *physpage_p = 0;
4968 }
1c79356b
A
4969
4970 vm_map_lock(map);
4971 if(map_pmap == NULL)
4972 main_map = TRUE;
4973 last_timestamp = map->timestamp;
4974
4975 VM_MAP_RANGE_CHECK(map, start, end);
4976 assert(page_aligned(start));
4977 assert(page_aligned(end));
39236c6e
A
4978 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4979 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
4980 if (start == end) {
4981 /* We wired what the caller asked for, zero pages */
4982 vm_map_unlock(map);
4983 return KERN_SUCCESS;
4984 }
1c79356b 4985
2d21ac55
A
4986 need_wakeup = FALSE;
4987 cur_thread = current_thread();
4988
4989 s = start;
4990 rc = KERN_SUCCESS;
4991
4992 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 4993 entry = first_entry;
2d21ac55
A
4994 /*
4995 * vm_map_clip_start will be done later.
4996 * We don't want to unnest any nested submaps here !
4997 */
1c79356b
A
4998 } else {
4999 /* Start address is not in map */
2d21ac55
A
5000 rc = KERN_INVALID_ADDRESS;
5001 goto done;
1c79356b
A
5002 }
5003
2d21ac55
A
5004 while ((entry != vm_map_to_entry(map)) && (s < end)) {
5005 /*
5006 * At this point, we have wired from "start" to "s".
5007 * We still need to wire from "s" to "end".
5008 *
5009 * "entry" hasn't been clipped, so it could start before "s"
5010 * and/or end after "end".
5011 */
5012
5013 /* "e" is how far we want to wire in this entry */
5014 e = entry->vme_end;
5015 if (e > end)
5016 e = end;
5017
1c79356b
A
5018 /*
5019 * If another thread is wiring/unwiring this entry then
5020 * block after informing other thread to wake us up.
5021 */
5022 if (entry->in_transition) {
9bccf70c
A
5023 wait_result_t wait_result;
5024
1c79356b
A
5025 /*
5026 * We have not clipped the entry. Make sure that
5027 * the start address is in range so that the lookup
5028 * below will succeed.
2d21ac55
A
5029 * "s" is the current starting point: we've already
5030 * wired from "start" to "s" and we still have
5031 * to wire from "s" to "end".
1c79356b 5032 */
1c79356b
A
5033
5034 entry->needs_wakeup = TRUE;
5035
5036 /*
5037 * wake up anybody waiting on entries that we have
5038 * already wired.
5039 */
5040 if (need_wakeup) {
5041 vm_map_entry_wakeup(map);
5042 need_wakeup = FALSE;
5043 }
5044 /*
5045 * User wiring is interruptible
5046 */
9bccf70c 5047 wait_result = vm_map_entry_wait(map,
2d21ac55
A
5048 (user_wire) ? THREAD_ABORTSAFE :
5049 THREAD_UNINT);
9bccf70c 5050 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
5051 /*
5052 * undo the wirings we have done so far
5053 * We do not clear the needs_wakeup flag,
5054 * because we cannot tell if we were the
5055 * only one waiting.
5056 */
2d21ac55
A
5057 rc = KERN_FAILURE;
5058 goto done;
1c79356b
A
5059 }
5060
1c79356b
A
5061 /*
5062 * Cannot avoid a lookup here. reset timestamp.
5063 */
5064 last_timestamp = map->timestamp;
5065
5066 /*
5067 * The entry could have been clipped, look it up again.
5068 * Worse that can happen is, it may not exist anymore.
5069 */
5070 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
5071 /*
5072 * User: undo everything upto the previous
5073 * entry. let vm_map_unwire worry about
5074 * checking the validity of the range.
5075 */
2d21ac55
A
5076 rc = KERN_FAILURE;
5077 goto done;
1c79356b
A
5078 }
5079 entry = first_entry;
5080 continue;
5081 }
2d21ac55
A
5082
5083 if (entry->is_sub_map) {
91447636
A
5084 vm_map_offset_t sub_start;
5085 vm_map_offset_t sub_end;
5086 vm_map_offset_t local_start;
5087 vm_map_offset_t local_end;
1c79356b 5088 pmap_t pmap;
2d21ac55 5089
fe8ab488
A
5090 if (wire_and_extract) {
5091 /*
5092 * Wiring would result in copy-on-write
5093 * which would not be compatible with
5094 * the sharing we have with the original
5095 * provider of this memory.
5096 */
5097 rc = KERN_INVALID_ARGUMENT;
5098 goto done;
5099 }
5100
2d21ac55 5101 vm_map_clip_start(map, entry, s);
1c79356b
A
5102 vm_map_clip_end(map, entry, end);
5103
3e170ce0 5104 sub_start = VME_OFFSET(entry);
2d21ac55 5105 sub_end = entry->vme_end;
3e170ce0 5106 sub_end += VME_OFFSET(entry) - entry->vme_start;
2d21ac55 5107
1c79356b
A
5108 local_end = entry->vme_end;
5109 if(map_pmap == NULL) {
2d21ac55
A
5110 vm_object_t object;
5111 vm_object_offset_t offset;
5112 vm_prot_t prot;
5113 boolean_t wired;
5114 vm_map_entry_t local_entry;
5115 vm_map_version_t version;
5116 vm_map_t lookup_map;
5117
1c79356b 5118 if(entry->use_pmap) {
3e170ce0 5119 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c
A
5120 /* ppc implementation requires that */
5121 /* submaps pmap address ranges line */
5122 /* up with parent map */
5123#ifdef notdef
5124 pmap_addr = sub_start;
5125#endif
2d21ac55 5126 pmap_addr = s;
1c79356b
A
5127 } else {
5128 pmap = map->pmap;
2d21ac55 5129 pmap_addr = s;
1c79356b 5130 }
2d21ac55 5131
1c79356b 5132 if (entry->wired_count) {
2d21ac55
A
5133 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5134 goto done;
5135
5136 /*
5137 * The map was not unlocked:
5138 * no need to goto re-lookup.
5139 * Just go directly to next entry.
5140 */
1c79356b 5141 entry = entry->vme_next;
2d21ac55 5142 s = entry->vme_start;
1c79356b
A
5143 continue;
5144
2d21ac55 5145 }
9bccf70c 5146
2d21ac55
A
5147 /* call vm_map_lookup_locked to */
5148 /* cause any needs copy to be */
5149 /* evaluated */
5150 local_start = entry->vme_start;
5151 lookup_map = map;
5152 vm_map_lock_write_to_read(map);
5153 if(vm_map_lookup_locked(
5154 &lookup_map, local_start,
39037602 5155 access_type | VM_PROT_COPY,
2d21ac55
A
5156 OBJECT_LOCK_EXCLUSIVE,
5157 &version, &object,
5158 &offset, &prot, &wired,
5159 NULL,
5160 &real_map)) {
1c79356b 5161
2d21ac55 5162 vm_map_unlock_read(lookup_map);
4bd07ac2 5163 assert(map_pmap == NULL);
2d21ac55
A
5164 vm_map_unwire(map, start,
5165 s, user_wire);
5166 return(KERN_FAILURE);
5167 }
316670eb 5168 vm_object_unlock(object);
2d21ac55
A
5169 if(real_map != lookup_map)
5170 vm_map_unlock(real_map);
5171 vm_map_unlock_read(lookup_map);
5172 vm_map_lock(map);
1c79356b 5173
2d21ac55
A
5174 /* we unlocked, so must re-lookup */
5175 if (!vm_map_lookup_entry(map,
5176 local_start,
5177 &local_entry)) {
5178 rc = KERN_FAILURE;
5179 goto done;
5180 }
5181
5182 /*
5183 * entry could have been "simplified",
5184 * so re-clip
5185 */
5186 entry = local_entry;
5187 assert(s == local_start);
5188 vm_map_clip_start(map, entry, s);
5189 vm_map_clip_end(map, entry, end);
5190 /* re-compute "e" */
5191 e = entry->vme_end;
5192 if (e > end)
5193 e = end;
5194
5195 /* did we have a change of type? */
5196 if (!entry->is_sub_map) {
5197 last_timestamp = map->timestamp;
5198 continue;
1c79356b
A
5199 }
5200 } else {
9bccf70c 5201 local_start = entry->vme_start;
2d21ac55
A
5202 pmap = map_pmap;
5203 }
5204
5205 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5206 goto done;
5207
5208 entry->in_transition = TRUE;
5209
5210 vm_map_unlock(map);
3e170ce0 5211 rc = vm_map_wire_nested(VME_SUBMAP(entry),
1c79356b 5212 sub_start, sub_end,
3e170ce0 5213 caller_prot,
fe8ab488
A
5214 user_wire, pmap, pmap_addr,
5215 NULL);
2d21ac55 5216 vm_map_lock(map);
9bccf70c 5217
1c79356b
A
5218 /*
5219 * Find the entry again. It could have been clipped
5220 * after we unlocked the map.
5221 */
9bccf70c
A
5222 if (!vm_map_lookup_entry(map, local_start,
5223 &first_entry))
5224 panic("vm_map_wire: re-lookup failed");
5225 entry = first_entry;
1c79356b 5226
2d21ac55
A
5227 assert(local_start == s);
5228 /* re-compute "e" */
5229 e = entry->vme_end;
5230 if (e > end)
5231 e = end;
5232
1c79356b
A
5233 last_timestamp = map->timestamp;
5234 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 5235 (entry->vme_start < e)) {
1c79356b
A
5236 assert(entry->in_transition);
5237 entry->in_transition = FALSE;
5238 if (entry->needs_wakeup) {
5239 entry->needs_wakeup = FALSE;
5240 need_wakeup = TRUE;
5241 }
5242 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 5243 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5244 }
5245 entry = entry->vme_next;
5246 }
5247 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5248 goto done;
1c79356b 5249 }
2d21ac55
A
5250
5251 /* no need to relookup again */
5252 s = entry->vme_start;
1c79356b
A
5253 continue;
5254 }
5255
5256 /*
5257 * If this entry is already wired then increment
5258 * the appropriate wire reference count.
5259 */
9bccf70c 5260 if (entry->wired_count) {
fe8ab488
A
5261
5262 if ((entry->protection & access_type) != access_type) {
5263 /* found a protection problem */
5264
5265 /*
5266 * XXX FBDP
5267 * We should always return an error
5268 * in this case but since we didn't
5269 * enforce it before, let's do
5270 * it only for the new "wire_and_extract"
5271 * code path for now...
5272 */
5273 if (wire_and_extract) {
5274 rc = KERN_PROTECTION_FAILURE;
5275 goto done;
5276 }
5277 }
5278
1c79356b
A
5279 /*
5280 * entry is already wired down, get our reference
5281 * after clipping to our range.
5282 */
2d21ac55 5283 vm_map_clip_start(map, entry, s);
1c79356b 5284 vm_map_clip_end(map, entry, end);
1c79356b 5285
2d21ac55
A
5286 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5287 goto done;
5288
fe8ab488
A
5289 if (wire_and_extract) {
5290 vm_object_t object;
5291 vm_object_offset_t offset;
5292 vm_page_t m;
5293
5294 /*
5295 * We don't have to "wire" the page again
5296 * bit we still have to "extract" its
5297 * physical page number, after some sanity
5298 * checks.
5299 */
5300 assert((entry->vme_end - entry->vme_start)
5301 == PAGE_SIZE);
5302 assert(!entry->needs_copy);
5303 assert(!entry->is_sub_map);
3e170ce0 5304 assert(VME_OBJECT(entry));
fe8ab488
A
5305 if (((entry->vme_end - entry->vme_start)
5306 != PAGE_SIZE) ||
5307 entry->needs_copy ||
5308 entry->is_sub_map ||
3e170ce0 5309 VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5310 rc = KERN_INVALID_ARGUMENT;
5311 goto done;
5312 }
5313
3e170ce0
A
5314 object = VME_OBJECT(entry);
5315 offset = VME_OFFSET(entry);
fe8ab488
A
5316 /* need exclusive lock to update m->dirty */
5317 if (entry->protection & VM_PROT_WRITE) {
5318 vm_object_lock(object);
5319 } else {
5320 vm_object_lock_shared(object);
5321 }
5322 m = vm_page_lookup(object, offset);
5323 assert(m != VM_PAGE_NULL);
39037602
A
5324 assert(VM_PAGE_WIRED(m));
5325 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
5326 *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
fe8ab488
A
5327 if (entry->protection & VM_PROT_WRITE) {
5328 vm_object_lock_assert_exclusive(
39037602 5329 object);
fe8ab488
A
5330 m->dirty = TRUE;
5331 }
5332 } else {
5333 /* not already wired !? */
5334 *physpage_p = 0;
5335 }
5336 vm_object_unlock(object);
5337 }
5338
2d21ac55 5339 /* map was not unlocked: no need to relookup */
1c79356b 5340 entry = entry->vme_next;
2d21ac55 5341 s = entry->vme_start;
1c79356b
A
5342 continue;
5343 }
5344
5345 /*
5346 * Unwired entry or wire request transmitted via submap
5347 */
5348
5349
39037602 5350
1c79356b
A
5351 /*
5352 * Perform actions of vm_map_lookup that need the write
5353 * lock on the map: create a shadow object for a
5354 * copy-on-write region, or an object for a zero-fill
5355 * region.
5356 */
5357 size = entry->vme_end - entry->vme_start;
5358 /*
5359 * If wiring a copy-on-write page, we need to copy it now
5360 * even if we're only (currently) requesting read access.
5361 * This is aggressive, but once it's wired we can't move it.
5362 */
5363 if (entry->needs_copy) {
fe8ab488
A
5364 if (wire_and_extract) {
5365 /*
5366 * We're supposed to share with the original
5367 * provider so should not be "needs_copy"
5368 */
5369 rc = KERN_INVALID_ARGUMENT;
5370 goto done;
5371 }
3e170ce0
A
5372
5373 VME_OBJECT_SHADOW(entry, size);
1c79356b 5374 entry->needs_copy = FALSE;
3e170ce0 5375 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
fe8ab488
A
5376 if (wire_and_extract) {
5377 /*
5378 * We're supposed to share with the original
5379 * provider so should already have an object.
5380 */
5381 rc = KERN_INVALID_ARGUMENT;
5382 goto done;
5383 }
3e170ce0
A
5384 VME_OBJECT_SET(entry, vm_object_allocate(size));
5385 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
fe8ab488 5386 assert(entry->use_pmap);
1c79356b
A
5387 }
5388
2d21ac55 5389 vm_map_clip_start(map, entry, s);
1c79356b
A
5390 vm_map_clip_end(map, entry, end);
5391
2d21ac55 5392 /* re-compute "e" */
1c79356b 5393 e = entry->vme_end;
2d21ac55
A
5394 if (e > end)
5395 e = end;
1c79356b
A
5396
5397 /*
5398 * Check for holes and protection mismatch.
5399 * Holes: Next entry should be contiguous unless this
5400 * is the end of the region.
5401 * Protection: Access requested must be allowed, unless
5402 * wiring is by protection class
5403 */
2d21ac55
A
5404 if ((entry->vme_end < end) &&
5405 ((entry->vme_next == vm_map_to_entry(map)) ||
5406 (entry->vme_next->vme_start > entry->vme_end))) {
5407 /* found a hole */
5408 rc = KERN_INVALID_ADDRESS;
5409 goto done;
5410 }
5411 if ((entry->protection & access_type) != access_type) {
5412 /* found a protection problem */
5413 rc = KERN_PROTECTION_FAILURE;
5414 goto done;
1c79356b
A
5415 }
5416
5417 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5418
2d21ac55
A
5419 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5420 goto done;
1c79356b
A
5421
5422 entry->in_transition = TRUE;
5423
5424 /*
5425 * This entry might get split once we unlock the map.
5426 * In vm_fault_wire(), we need the current range as
5427 * defined by this entry. In order for this to work
5428 * along with a simultaneous clip operation, we make a
5429 * temporary copy of this entry and use that for the
5430 * wiring. Note that the underlying objects do not
5431 * change during a clip.
5432 */
5433 tmp_entry = *entry;
5434
5435 /*
5436 * The in_transition state guarentees that the entry
5437 * (or entries for this range, if split occured) will be
5438 * there when the map lock is acquired for the second time.
5439 */
5440 vm_map_unlock(map);
0b4e3aa0 5441
9bccf70c
A
5442 if (!user_wire && cur_thread != THREAD_NULL)
5443 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
5444 else
5445 interruptible_state = THREAD_UNINT;
9bccf70c 5446
1c79356b 5447 if(map_pmap)
9bccf70c 5448 rc = vm_fault_wire(map,
3e170ce0 5449 &tmp_entry, caller_prot, map_pmap, pmap_addr,
fe8ab488 5450 physpage_p);
1c79356b 5451 else
9bccf70c 5452 rc = vm_fault_wire(map,
3e170ce0 5453 &tmp_entry, caller_prot, map->pmap,
fe8ab488
A
5454 tmp_entry.vme_start,
5455 physpage_p);
0b4e3aa0
A
5456
5457 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 5458 thread_interrupt_level(interruptible_state);
0b4e3aa0 5459
1c79356b
A
5460 vm_map_lock(map);
5461
5462 if (last_timestamp+1 != map->timestamp) {
5463 /*
5464 * Find the entry again. It could have been clipped
5465 * after we unlocked the map.
5466 */
5467 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5468 &first_entry))
1c79356b
A
5469 panic("vm_map_wire: re-lookup failed");
5470
5471 entry = first_entry;
5472 }
5473
5474 last_timestamp = map->timestamp;
5475
5476 while ((entry != vm_map_to_entry(map)) &&
5477 (entry->vme_start < tmp_entry.vme_end)) {
5478 assert(entry->in_transition);
5479 entry->in_transition = FALSE;
5480 if (entry->needs_wakeup) {
5481 entry->needs_wakeup = FALSE;
5482 need_wakeup = TRUE;
5483 }
5484 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5485 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
5486 }
5487 entry = entry->vme_next;
5488 }
5489
5490 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 5491 goto done;
1c79356b 5492 }
2d21ac55 5493
d190cdc3
A
5494 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
5495 (tmp_entry.vme_end != end) && /* AND, we are not at the end of the requested range */
5496 (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
5497 /* found a "new" hole */
5498 s = tmp_entry.vme_end;
5499 rc = KERN_INVALID_ADDRESS;
5500 goto done;
5501 }
5502
2d21ac55 5503 s = entry->vme_start;
d190cdc3 5504
1c79356b 5505 } /* end while loop through map entries */
2d21ac55
A
5506
5507done:
5508 if (rc == KERN_SUCCESS) {
5509 /* repair any damage we may have made to the VM map */
5510 vm_map_simplify_range(map, start, end);
5511 }
5512
1c79356b
A
5513 vm_map_unlock(map);
5514
5515 /*
5516 * wake up anybody waiting on entries we wired.
5517 */
5518 if (need_wakeup)
5519 vm_map_entry_wakeup(map);
5520
2d21ac55
A
5521 if (rc != KERN_SUCCESS) {
5522 /* undo what has been wired so far */
4bd07ac2
A
5523 vm_map_unwire_nested(map, start, s, user_wire,
5524 map_pmap, pmap_addr);
fe8ab488
A
5525 if (physpage_p) {
5526 *physpage_p = 0;
5527 }
2d21ac55
A
5528 }
5529
5530 return rc;
1c79356b
A
5531
5532}
5533
5534kern_return_t
3e170ce0 5535vm_map_wire_external(
39037602
A
5536 vm_map_t map,
5537 vm_map_offset_t start,
5538 vm_map_offset_t end,
5539 vm_prot_t caller_prot,
1c79356b
A
5540 boolean_t user_wire)
5541{
3e170ce0
A
5542 kern_return_t kret;
5543
5544 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5545 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5546 kret = vm_map_wire_nested(map, start, end, caller_prot,
5547 user_wire, (pmap_t)NULL, 0, NULL);
5548 return kret;
5549}
1c79356b 5550
3e170ce0
A
5551kern_return_t
5552vm_map_wire(
39037602
A
5553 vm_map_t map,
5554 vm_map_offset_t start,
5555 vm_map_offset_t end,
5556 vm_prot_t caller_prot,
3e170ce0
A
5557 boolean_t user_wire)
5558{
1c79356b
A
5559 kern_return_t kret;
5560
3e170ce0 5561 kret = vm_map_wire_nested(map, start, end, caller_prot,
fe8ab488
A
5562 user_wire, (pmap_t)NULL, 0, NULL);
5563 return kret;
5564}
5565
5566kern_return_t
3e170ce0 5567vm_map_wire_and_extract_external(
fe8ab488
A
5568 vm_map_t map,
5569 vm_map_offset_t start,
3e170ce0 5570 vm_prot_t caller_prot,
fe8ab488
A
5571 boolean_t user_wire,
5572 ppnum_t *physpage_p)
5573{
3e170ce0
A
5574 kern_return_t kret;
5575
5576 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5577 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5578 kret = vm_map_wire_nested(map,
5579 start,
5580 start+VM_MAP_PAGE_SIZE(map),
5581 caller_prot,
5582 user_wire,
5583 (pmap_t)NULL,
5584 0,
5585 physpage_p);
5586 if (kret != KERN_SUCCESS &&
5587 physpage_p != NULL) {
5588 *physpage_p = 0;
5589 }
5590 return kret;
5591}
fe8ab488 5592
3e170ce0
A
5593kern_return_t
5594vm_map_wire_and_extract(
5595 vm_map_t map,
5596 vm_map_offset_t start,
5597 vm_prot_t caller_prot,
5598 boolean_t user_wire,
5599 ppnum_t *physpage_p)
5600{
fe8ab488
A
5601 kern_return_t kret;
5602
5603 kret = vm_map_wire_nested(map,
5604 start,
5605 start+VM_MAP_PAGE_SIZE(map),
3e170ce0 5606 caller_prot,
fe8ab488
A
5607 user_wire,
5608 (pmap_t)NULL,
5609 0,
5610 physpage_p);
5611 if (kret != KERN_SUCCESS &&
5612 physpage_p != NULL) {
5613 *physpage_p = 0;
5614 }
1c79356b
A
5615 return kret;
5616}
5617
5618/*
5619 * vm_map_unwire:
5620 *
5621 * Sets the pageability of the specified address range in the target
5622 * as pageable. Regions specified must have been wired previously.
5623 *
5624 * The map must not be locked, but a reference must remain to the map
5625 * throughout the call.
5626 *
5627 * Kernel will panic on failures. User unwire ignores holes and
5628 * unwired and intransition entries to avoid losing memory by leaving
5629 * it unwired.
5630 */
91447636 5631static kern_return_t
1c79356b 5632vm_map_unwire_nested(
39037602
A
5633 vm_map_t map,
5634 vm_map_offset_t start,
5635 vm_map_offset_t end,
1c79356b 5636 boolean_t user_wire,
9bccf70c 5637 pmap_t map_pmap,
91447636 5638 vm_map_offset_t pmap_addr)
1c79356b 5639{
39037602 5640 vm_map_entry_t entry;
1c79356b
A
5641 struct vm_map_entry *first_entry, tmp_entry;
5642 boolean_t need_wakeup;
5643 boolean_t main_map = FALSE;
5644 unsigned int last_timestamp;
5645
5646 vm_map_lock(map);
5647 if(map_pmap == NULL)
5648 main_map = TRUE;
5649 last_timestamp = map->timestamp;
5650
5651 VM_MAP_RANGE_CHECK(map, start, end);
5652 assert(page_aligned(start));
5653 assert(page_aligned(end));
39236c6e
A
5654 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5655 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 5656
2d21ac55
A
5657 if (start == end) {
5658 /* We unwired what the caller asked for: zero pages */
5659 vm_map_unlock(map);
5660 return KERN_SUCCESS;
5661 }
5662
1c79356b
A
5663 if (vm_map_lookup_entry(map, start, &first_entry)) {
5664 entry = first_entry;
2d21ac55
A
5665 /*
5666 * vm_map_clip_start will be done later.
5667 * We don't want to unnest any nested sub maps here !
5668 */
1c79356b
A
5669 }
5670 else {
2d21ac55
A
5671 if (!user_wire) {
5672 panic("vm_map_unwire: start not found");
5673 }
1c79356b
A
5674 /* Start address is not in map. */
5675 vm_map_unlock(map);
5676 return(KERN_INVALID_ADDRESS);
5677 }
5678
b0d623f7
A
5679 if (entry->superpage_size) {
5680 /* superpages are always wired */
5681 vm_map_unlock(map);
5682 return KERN_INVALID_ADDRESS;
5683 }
5684
1c79356b
A
5685 need_wakeup = FALSE;
5686 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5687 if (entry->in_transition) {
5688 /*
5689 * 1)
5690 * Another thread is wiring down this entry. Note
5691 * that if it is not for the other thread we would
5692 * be unwiring an unwired entry. This is not
5693 * permitted. If we wait, we will be unwiring memory
5694 * we did not wire.
5695 *
5696 * 2)
5697 * Another thread is unwiring this entry. We did not
5698 * have a reference to it, because if we did, this
5699 * entry will not be getting unwired now.
5700 */
2d21ac55
A
5701 if (!user_wire) {
5702 /*
5703 * XXX FBDP
5704 * This could happen: there could be some
5705 * overlapping vslock/vsunlock operations
5706 * going on.
5707 * We should probably just wait and retry,
5708 * but then we have to be careful that this
5709 * entry could get "simplified" after
5710 * "in_transition" gets unset and before
5711 * we re-lookup the entry, so we would
5712 * have to re-clip the entry to avoid
5713 * re-unwiring what we have already unwired...
5714 * See vm_map_wire_nested().
5715 *
5716 * Or we could just ignore "in_transition"
5717 * here and proceed to decement the wired
5718 * count(s) on this entry. That should be fine
5719 * as long as "wired_count" doesn't drop all
5720 * the way to 0 (and we should panic if THAT
5721 * happens).
5722 */
1c79356b 5723 panic("vm_map_unwire: in_transition entry");
2d21ac55 5724 }
1c79356b
A
5725
5726 entry = entry->vme_next;
5727 continue;
5728 }
5729
2d21ac55 5730 if (entry->is_sub_map) {
91447636
A
5731 vm_map_offset_t sub_start;
5732 vm_map_offset_t sub_end;
5733 vm_map_offset_t local_end;
1c79356b 5734 pmap_t pmap;
2d21ac55 5735
1c79356b
A
5736 vm_map_clip_start(map, entry, start);
5737 vm_map_clip_end(map, entry, end);
5738
3e170ce0 5739 sub_start = VME_OFFSET(entry);
1c79356b 5740 sub_end = entry->vme_end - entry->vme_start;
3e170ce0 5741 sub_end += VME_OFFSET(entry);
1c79356b
A
5742 local_end = entry->vme_end;
5743 if(map_pmap == NULL) {
2d21ac55 5744 if(entry->use_pmap) {
3e170ce0 5745 pmap = VME_SUBMAP(entry)->pmap;
9bccf70c 5746 pmap_addr = sub_start;
2d21ac55 5747 } else {
1c79356b 5748 pmap = map->pmap;
9bccf70c 5749 pmap_addr = start;
2d21ac55
A
5750 }
5751 if (entry->wired_count == 0 ||
5752 (user_wire && entry->user_wired_count == 0)) {
5753 if (!user_wire)
5754 panic("vm_map_unwire: entry is unwired");
5755 entry = entry->vme_next;
5756 continue;
5757 }
5758
5759 /*
5760 * Check for holes
5761 * Holes: Next entry should be contiguous unless
5762 * this is the end of the region.
5763 */
5764 if (((entry->vme_end < end) &&
5765 ((entry->vme_next == vm_map_to_entry(map)) ||
5766 (entry->vme_next->vme_start
5767 > entry->vme_end)))) {
5768 if (!user_wire)
5769 panic("vm_map_unwire: non-contiguous region");
1c79356b 5770/*
2d21ac55
A
5771 entry = entry->vme_next;
5772 continue;
1c79356b 5773*/
2d21ac55 5774 }
1c79356b 5775
2d21ac55 5776 subtract_wire_counts(map, entry, user_wire);
1c79356b 5777
2d21ac55
A
5778 if (entry->wired_count != 0) {
5779 entry = entry->vme_next;
5780 continue;
5781 }
1c79356b 5782
2d21ac55
A
5783 entry->in_transition = TRUE;
5784 tmp_entry = *entry;/* see comment in vm_map_wire() */
5785
5786 /*
5787 * We can unlock the map now. The in_transition state
5788 * guarantees existance of the entry.
5789 */
5790 vm_map_unlock(map);
3e170ce0 5791 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5792 sub_start, sub_end, user_wire, pmap, pmap_addr);
5793 vm_map_lock(map);
1c79356b 5794
2d21ac55
A
5795 if (last_timestamp+1 != map->timestamp) {
5796 /*
5797 * Find the entry again. It could have been
5798 * clipped or deleted after we unlocked the map.
5799 */
5800 if (!vm_map_lookup_entry(map,
5801 tmp_entry.vme_start,
5802 &first_entry)) {
5803 if (!user_wire)
5804 panic("vm_map_unwire: re-lookup failed");
5805 entry = first_entry->vme_next;
5806 } else
5807 entry = first_entry;
5808 }
5809 last_timestamp = map->timestamp;
1c79356b 5810
1c79356b 5811 /*
2d21ac55
A
5812 * clear transition bit for all constituent entries
5813 * that were in the original entry (saved in
5814 * tmp_entry). Also check for waiters.
5815 */
5816 while ((entry != vm_map_to_entry(map)) &&
5817 (entry->vme_start < tmp_entry.vme_end)) {
5818 assert(entry->in_transition);
5819 entry->in_transition = FALSE;
5820 if (entry->needs_wakeup) {
5821 entry->needs_wakeup = FALSE;
5822 need_wakeup = TRUE;
5823 }
5824 entry = entry->vme_next;
1c79356b 5825 }
2d21ac55 5826 continue;
1c79356b 5827 } else {
2d21ac55 5828 vm_map_unlock(map);
3e170ce0 5829 vm_map_unwire_nested(VME_SUBMAP(entry),
2d21ac55
A
5830 sub_start, sub_end, user_wire, map_pmap,
5831 pmap_addr);
5832 vm_map_lock(map);
1c79356b 5833
2d21ac55
A
5834 if (last_timestamp+1 != map->timestamp) {
5835 /*
5836 * Find the entry again. It could have been
5837 * clipped or deleted after we unlocked the map.
5838 */
5839 if (!vm_map_lookup_entry(map,
5840 tmp_entry.vme_start,
5841 &first_entry)) {
5842 if (!user_wire)
5843 panic("vm_map_unwire: re-lookup failed");
5844 entry = first_entry->vme_next;
5845 } else
5846 entry = first_entry;
5847 }
5848 last_timestamp = map->timestamp;
1c79356b
A
5849 }
5850 }
5851
5852
9bccf70c 5853 if ((entry->wired_count == 0) ||
2d21ac55 5854 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
5855 if (!user_wire)
5856 panic("vm_map_unwire: entry is unwired");
5857
5858 entry = entry->vme_next;
5859 continue;
5860 }
2d21ac55 5861
1c79356b 5862 assert(entry->wired_count > 0 &&
2d21ac55 5863 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
5864
5865 vm_map_clip_start(map, entry, start);
5866 vm_map_clip_end(map, entry, end);
5867
5868 /*
5869 * Check for holes
5870 * Holes: Next entry should be contiguous unless
5871 * this is the end of the region.
5872 */
5873 if (((entry->vme_end < end) &&
2d21ac55
A
5874 ((entry->vme_next == vm_map_to_entry(map)) ||
5875 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
5876
5877 if (!user_wire)
5878 panic("vm_map_unwire: non-contiguous region");
5879 entry = entry->vme_next;
5880 continue;
5881 }
5882
2d21ac55 5883 subtract_wire_counts(map, entry, user_wire);
1c79356b 5884
9bccf70c 5885 if (entry->wired_count != 0) {
1c79356b
A
5886 entry = entry->vme_next;
5887 continue;
1c79356b
A
5888 }
5889
b0d623f7
A
5890 if(entry->zero_wired_pages) {
5891 entry->zero_wired_pages = FALSE;
5892 }
5893
1c79356b
A
5894 entry->in_transition = TRUE;
5895 tmp_entry = *entry; /* see comment in vm_map_wire() */
5896
5897 /*
5898 * We can unlock the map now. The in_transition state
5899 * guarantees existance of the entry.
5900 */
5901 vm_map_unlock(map);
5902 if(map_pmap) {
9bccf70c 5903 vm_fault_unwire(map,
2d21ac55 5904 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 5905 } else {
9bccf70c 5906 vm_fault_unwire(map,
2d21ac55
A
5907 &tmp_entry, FALSE, map->pmap,
5908 tmp_entry.vme_start);
1c79356b
A
5909 }
5910 vm_map_lock(map);
5911
5912 if (last_timestamp+1 != map->timestamp) {
5913 /*
5914 * Find the entry again. It could have been clipped
5915 * or deleted after we unlocked the map.
5916 */
5917 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 5918 &first_entry)) {
1c79356b 5919 if (!user_wire)
2d21ac55 5920 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
5921 entry = first_entry->vme_next;
5922 } else
5923 entry = first_entry;
5924 }
5925 last_timestamp = map->timestamp;
5926
5927 /*
5928 * clear transition bit for all constituent entries that
5929 * were in the original entry (saved in tmp_entry). Also
5930 * check for waiters.
5931 */
5932 while ((entry != vm_map_to_entry(map)) &&
5933 (entry->vme_start < tmp_entry.vme_end)) {
5934 assert(entry->in_transition);
5935 entry->in_transition = FALSE;
5936 if (entry->needs_wakeup) {
5937 entry->needs_wakeup = FALSE;
5938 need_wakeup = TRUE;
5939 }
5940 entry = entry->vme_next;
5941 }
5942 }
91447636
A
5943
5944 /*
5945 * We might have fragmented the address space when we wired this
5946 * range of addresses. Attempt to re-coalesce these VM map entries
5947 * with their neighbors now that they're no longer wired.
5948 * Under some circumstances, address space fragmentation can
5949 * prevent VM object shadow chain collapsing, which can cause
5950 * swap space leaks.
5951 */
5952 vm_map_simplify_range(map, start, end);
5953
1c79356b
A
5954 vm_map_unlock(map);
5955 /*
5956 * wake up anybody waiting on entries that we have unwired.
5957 */
5958 if (need_wakeup)
5959 vm_map_entry_wakeup(map);
5960 return(KERN_SUCCESS);
5961
5962}
5963
5964kern_return_t
5965vm_map_unwire(
39037602
A
5966 vm_map_t map,
5967 vm_map_offset_t start,
5968 vm_map_offset_t end,
1c79356b
A
5969 boolean_t user_wire)
5970{
9bccf70c 5971 return vm_map_unwire_nested(map, start, end,
2d21ac55 5972 user_wire, (pmap_t)NULL, 0);
1c79356b
A
5973}
5974
5975
5976/*
5977 * vm_map_entry_delete: [ internal use only ]
5978 *
5979 * Deallocate the given entry from the target map.
5980 */
91447636 5981static void
1c79356b 5982vm_map_entry_delete(
39037602
A
5983 vm_map_t map,
5984 vm_map_entry_t entry)
1c79356b 5985{
39037602
A
5986 vm_map_offset_t s, e;
5987 vm_object_t object;
5988 vm_map_t submap;
1c79356b
A
5989
5990 s = entry->vme_start;
5991 e = entry->vme_end;
5992 assert(page_aligned(s));
5993 assert(page_aligned(e));
39236c6e
A
5994 if (entry->map_aligned == TRUE) {
5995 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5996 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5997 }
1c79356b
A
5998 assert(entry->wired_count == 0);
5999 assert(entry->user_wired_count == 0);
b0d623f7 6000 assert(!entry->permanent);
1c79356b
A
6001
6002 if (entry->is_sub_map) {
6003 object = NULL;
3e170ce0 6004 submap = VME_SUBMAP(entry);
1c79356b
A
6005 } else {
6006 submap = NULL;
3e170ce0 6007 object = VME_OBJECT(entry);
1c79356b
A
6008 }
6009
6d2010ae 6010 vm_map_store_entry_unlink(map, entry);
1c79356b
A
6011 map->size -= e - s;
6012
6013 vm_map_entry_dispose(map, entry);
6014
6015 vm_map_unlock(map);
6016 /*
6017 * Deallocate the object only after removing all
6018 * pmap entries pointing to its pages.
6019 */
6020 if (submap)
6021 vm_map_deallocate(submap);
6022 else
2d21ac55 6023 vm_object_deallocate(object);
1c79356b
A
6024
6025}
6026
6027void
6028vm_map_submap_pmap_clean(
6029 vm_map_t map,
91447636
A
6030 vm_map_offset_t start,
6031 vm_map_offset_t end,
1c79356b 6032 vm_map_t sub_map,
91447636 6033 vm_map_offset_t offset)
1c79356b 6034{
91447636
A
6035 vm_map_offset_t submap_start;
6036 vm_map_offset_t submap_end;
6037 vm_map_size_t remove_size;
1c79356b
A
6038 vm_map_entry_t entry;
6039
6040 submap_end = offset + (end - start);
6041 submap_start = offset;
b7266188
A
6042
6043 vm_map_lock_read(sub_map);
1c79356b 6044 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 6045
1c79356b
A
6046 remove_size = (entry->vme_end - entry->vme_start);
6047 if(offset > entry->vme_start)
6048 remove_size -= offset - entry->vme_start;
2d21ac55 6049
1c79356b
A
6050
6051 if(submap_end < entry->vme_end) {
6052 remove_size -=
6053 entry->vme_end - submap_end;
6054 }
6055 if(entry->is_sub_map) {
6056 vm_map_submap_pmap_clean(
6057 sub_map,
6058 start,
6059 start + remove_size,
3e170ce0
A
6060 VME_SUBMAP(entry),
6061 VME_OFFSET(entry));
1c79356b 6062 } else {
9bccf70c 6063
316670eb 6064 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
6065 && (VME_OBJECT(entry) != NULL)) {
6066 vm_object_pmap_protect_options(
6067 VME_OBJECT(entry),
6068 (VME_OFFSET(entry) +
6069 offset -
6070 entry->vme_start),
9bccf70c
A
6071 remove_size,
6072 PMAP_NULL,
6073 entry->vme_start,
3e170ce0
A
6074 VM_PROT_NONE,
6075 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6076 } else {
6077 pmap_remove(map->pmap,
2d21ac55
A
6078 (addr64_t)start,
6079 (addr64_t)(start + remove_size));
9bccf70c 6080 }
1c79356b
A
6081 }
6082 }
6083
6084 entry = entry->vme_next;
2d21ac55 6085
1c79356b 6086 while((entry != vm_map_to_entry(sub_map))
2d21ac55 6087 && (entry->vme_start < submap_end)) {
1c79356b
A
6088 remove_size = (entry->vme_end - entry->vme_start);
6089 if(submap_end < entry->vme_end) {
6090 remove_size -= entry->vme_end - submap_end;
6091 }
6092 if(entry->is_sub_map) {
6093 vm_map_submap_pmap_clean(
6094 sub_map,
6095 (start + entry->vme_start) - offset,
6096 ((start + entry->vme_start) - offset) + remove_size,
3e170ce0
A
6097 VME_SUBMAP(entry),
6098 VME_OFFSET(entry));
1c79356b 6099 } else {
316670eb 6100 if((map->mapped_in_other_pmaps) && (map->ref_count)
3e170ce0
A
6101 && (VME_OBJECT(entry) != NULL)) {
6102 vm_object_pmap_protect_options(
6103 VME_OBJECT(entry),
6104 VME_OFFSET(entry),
9bccf70c
A
6105 remove_size,
6106 PMAP_NULL,
6107 entry->vme_start,
3e170ce0
A
6108 VM_PROT_NONE,
6109 PMAP_OPTIONS_REMOVE);
9bccf70c
A
6110 } else {
6111 pmap_remove(map->pmap,
2d21ac55
A
6112 (addr64_t)((start + entry->vme_start)
6113 - offset),
6114 (addr64_t)(((start + entry->vme_start)
6115 - offset) + remove_size));
9bccf70c 6116 }
1c79356b
A
6117 }
6118 entry = entry->vme_next;
b7266188
A
6119 }
6120 vm_map_unlock_read(sub_map);
1c79356b
A
6121 return;
6122}
6123
6124/*
6125 * vm_map_delete: [ internal use only ]
6126 *
6127 * Deallocates the given address range from the target map.
6128 * Removes all user wirings. Unwires one kernel wiring if
6129 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
6130 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
6131 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
6132 *
6133 * This routine is called with map locked and leaves map locked.
6134 */
91447636 6135static kern_return_t
1c79356b 6136vm_map_delete(
91447636
A
6137 vm_map_t map,
6138 vm_map_offset_t start,
6139 vm_map_offset_t end,
6140 int flags,
6141 vm_map_t zap_map)
1c79356b
A
6142{
6143 vm_map_entry_t entry, next;
6144 struct vm_map_entry *first_entry, tmp_entry;
39037602
A
6145 vm_map_offset_t s;
6146 vm_object_t object;
1c79356b
A
6147 boolean_t need_wakeup;
6148 unsigned int last_timestamp = ~0; /* unlikely value */
6149 int interruptible;
1c79356b
A
6150
6151 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 6152 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
6153
6154 /*
6155 * All our DMA I/O operations in IOKit are currently done by
6156 * wiring through the map entries of the task requesting the I/O.
6157 * Because of this, we must always wait for kernel wirings
6158 * to go away on the entries before deleting them.
6159 *
6160 * Any caller who wants to actually remove a kernel wiring
6161 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
6162 * properly remove one wiring instead of blasting through
6163 * them all.
6164 */
6165 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
6166
b0d623f7
A
6167 while(1) {
6168 /*
6169 * Find the start of the region, and clip it
6170 */
6171 if (vm_map_lookup_entry(map, start, &first_entry)) {
6172 entry = first_entry;
fe8ab488
A
6173 if (map == kalloc_map &&
6174 (entry->vme_start != start ||
6175 entry->vme_end != end)) {
6176 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6177 "mismatched entry %p [0x%llx:0x%llx]\n",
6178 map,
6179 (uint64_t)start,
6180 (uint64_t)end,
6181 entry,
6182 (uint64_t)entry->vme_start,
6183 (uint64_t)entry->vme_end);
6184 }
b0d623f7
A
6185 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6186 start = SUPERPAGE_ROUND_DOWN(start);
6187 continue;
6188 }
6189 if (start == entry->vme_start) {
6190 /*
6191 * No need to clip. We don't want to cause
6192 * any unnecessary unnesting in this case...
6193 */
6194 } else {
fe8ab488
A
6195 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6196 entry->map_aligned &&
6197 !VM_MAP_PAGE_ALIGNED(
6198 start,
6199 VM_MAP_PAGE_MASK(map))) {
6200 /*
6201 * The entry will no longer be
6202 * map-aligned after clipping
6203 * and the caller said it's OK.
6204 */
6205 entry->map_aligned = FALSE;
6206 }
6207 if (map == kalloc_map) {
6208 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6209 " clipping %p at 0x%llx\n",
6210 map,
6211 (uint64_t)start,
6212 (uint64_t)end,
6213 entry,
6214 (uint64_t)start);
6215 }
b0d623f7
A
6216 vm_map_clip_start(map, entry, start);
6217 }
6218
2d21ac55 6219 /*
b0d623f7
A
6220 * Fix the lookup hint now, rather than each
6221 * time through the loop.
2d21ac55 6222 */
b0d623f7 6223 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 6224 } else {
fe8ab488
A
6225 if (map->pmap == kernel_pmap &&
6226 map->ref_count != 0) {
6227 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6228 "no map entry at 0x%llx\n",
6229 map,
6230 (uint64_t)start,
6231 (uint64_t)end,
6232 (uint64_t)start);
6233 }
b0d623f7 6234 entry = first_entry->vme_next;
2d21ac55 6235 }
b0d623f7 6236 break;
1c79356b 6237 }
b0d623f7
A
6238 if (entry->superpage_size)
6239 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
6240
6241 need_wakeup = FALSE;
6242 /*
6243 * Step through all entries in this region
6244 */
2d21ac55
A
6245 s = entry->vme_start;
6246 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6247 /*
6248 * At this point, we have deleted all the memory entries
6249 * between "start" and "s". We still need to delete
6250 * all memory entries between "s" and "end".
6251 * While we were blocked and the map was unlocked, some
6252 * new memory entries could have been re-allocated between
6253 * "start" and "s" and we don't want to mess with those.
6254 * Some of those entries could even have been re-assembled
6255 * with an entry after "s" (in vm_map_simplify_entry()), so
6256 * we may have to vm_map_clip_start() again.
6257 */
1c79356b 6258
2d21ac55
A
6259 if (entry->vme_start >= s) {
6260 /*
6261 * This entry starts on or after "s"
6262 * so no need to clip its start.
6263 */
6264 } else {
6265 /*
6266 * This entry has been re-assembled by a
6267 * vm_map_simplify_entry(). We need to
6268 * re-clip its start.
6269 */
fe8ab488
A
6270 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6271 entry->map_aligned &&
6272 !VM_MAP_PAGE_ALIGNED(s,
6273 VM_MAP_PAGE_MASK(map))) {
6274 /*
6275 * The entry will no longer be map-aligned
6276 * after clipping and the caller said it's OK.
6277 */
6278 entry->map_aligned = FALSE;
6279 }
6280 if (map == kalloc_map) {
6281 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6282 "clipping %p at 0x%llx\n",
6283 map,
6284 (uint64_t)start,
6285 (uint64_t)end,
6286 entry,
6287 (uint64_t)s);
6288 }
2d21ac55
A
6289 vm_map_clip_start(map, entry, s);
6290 }
6291 if (entry->vme_end <= end) {
6292 /*
6293 * This entry is going away completely, so no need
6294 * to clip and possibly cause an unnecessary unnesting.
6295 */
6296 } else {
fe8ab488
A
6297 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6298 entry->map_aligned &&
6299 !VM_MAP_PAGE_ALIGNED(end,
6300 VM_MAP_PAGE_MASK(map))) {
6301 /*
6302 * The entry will no longer be map-aligned
6303 * after clipping and the caller said it's OK.
6304 */
6305 entry->map_aligned = FALSE;
6306 }
6307 if (map == kalloc_map) {
6308 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6309 "clipping %p at 0x%llx\n",
6310 map,
6311 (uint64_t)start,
6312 (uint64_t)end,
6313 entry,
6314 (uint64_t)end);
6315 }
2d21ac55
A
6316 vm_map_clip_end(map, entry, end);
6317 }
b0d623f7
A
6318
6319 if (entry->permanent) {
6320 panic("attempt to remove permanent VM map entry "
6321 "%p [0x%llx:0x%llx]\n",
6322 entry, (uint64_t) s, (uint64_t) end);
6323 }
6324
6325
1c79356b 6326 if (entry->in_transition) {
9bccf70c
A
6327 wait_result_t wait_result;
6328
1c79356b
A
6329 /*
6330 * Another thread is wiring/unwiring this entry.
6331 * Let the other thread know we are waiting.
6332 */
2d21ac55 6333 assert(s == entry->vme_start);
1c79356b
A
6334 entry->needs_wakeup = TRUE;
6335
6336 /*
6337 * wake up anybody waiting on entries that we have
6338 * already unwired/deleted.
6339 */
6340 if (need_wakeup) {
6341 vm_map_entry_wakeup(map);
6342 need_wakeup = FALSE;
6343 }
6344
9bccf70c 6345 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
6346
6347 if (interruptible &&
9bccf70c 6348 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
6349 /*
6350 * We do not clear the needs_wakeup flag,
6351 * since we cannot tell if we were the only one.
6352 */
6353 return KERN_ABORTED;
9bccf70c 6354 }
1c79356b
A
6355
6356 /*
6357 * The entry could have been clipped or it
6358 * may not exist anymore. Look it up again.
6359 */
6360 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
6361 /*
6362 * User: use the next entry
6363 */
6364 entry = first_entry->vme_next;
2d21ac55 6365 s = entry->vme_start;
1c79356b
A
6366 } else {
6367 entry = first_entry;
0c530ab8 6368 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6369 }
9bccf70c 6370 last_timestamp = map->timestamp;
1c79356b
A
6371 continue;
6372 } /* end in_transition */
6373
6374 if (entry->wired_count) {
2d21ac55
A
6375 boolean_t user_wire;
6376
6377 user_wire = entry->user_wired_count > 0;
6378
1c79356b 6379 /*
b0d623f7 6380 * Remove a kernel wiring if requested
1c79356b 6381 */
b0d623f7 6382 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 6383 entry->wired_count--;
b0d623f7
A
6384 }
6385
6386 /*
6387 * Remove all user wirings for proper accounting
6388 */
6389 if (entry->user_wired_count > 0) {
6390 while (entry->user_wired_count)
6391 subtract_wire_counts(map, entry, user_wire);
6392 }
1c79356b
A
6393
6394 if (entry->wired_count != 0) {
2d21ac55 6395 assert(map != kernel_map);
1c79356b
A
6396 /*
6397 * Cannot continue. Typical case is when
6398 * a user thread has physical io pending on
6399 * on this page. Either wait for the
6400 * kernel wiring to go away or return an
6401 * error.
6402 */
6403 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 6404 wait_result_t wait_result;
1c79356b 6405
2d21ac55 6406 assert(s == entry->vme_start);
1c79356b 6407 entry->needs_wakeup = TRUE;
9bccf70c 6408 wait_result = vm_map_entry_wait(map,
2d21ac55 6409 interruptible);
1c79356b
A
6410
6411 if (interruptible &&
2d21ac55 6412 wait_result == THREAD_INTERRUPTED) {
1c79356b 6413 /*
2d21ac55 6414 * We do not clear the
1c79356b
A
6415 * needs_wakeup flag, since we
6416 * cannot tell if we were the
6417 * only one.
2d21ac55 6418 */
1c79356b 6419 return KERN_ABORTED;
9bccf70c 6420 }
1c79356b
A
6421
6422 /*
2d21ac55 6423 * The entry could have been clipped or
1c79356b
A
6424 * it may not exist anymore. Look it
6425 * up again.
2d21ac55 6426 */
1c79356b 6427 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
6428 &first_entry)) {
6429 assert(map != kernel_map);
1c79356b 6430 /*
2d21ac55
A
6431 * User: use the next entry
6432 */
1c79356b 6433 entry = first_entry->vme_next;
2d21ac55 6434 s = entry->vme_start;
1c79356b
A
6435 } else {
6436 entry = first_entry;
0c530ab8 6437 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 6438 }
9bccf70c 6439 last_timestamp = map->timestamp;
1c79356b
A
6440 continue;
6441 }
6442 else {
6443 return KERN_FAILURE;
6444 }
6445 }
6446
6447 entry->in_transition = TRUE;
6448 /*
6449 * copy current entry. see comment in vm_map_wire()
6450 */
6451 tmp_entry = *entry;
2d21ac55 6452 assert(s == entry->vme_start);
1c79356b
A
6453
6454 /*
6455 * We can unlock the map now. The in_transition
6456 * state guarentees existance of the entry.
6457 */
6458 vm_map_unlock(map);
2d21ac55
A
6459
6460 if (tmp_entry.is_sub_map) {
6461 vm_map_t sub_map;
6462 vm_map_offset_t sub_start, sub_end;
6463 pmap_t pmap;
6464 vm_map_offset_t pmap_addr;
6465
6466
3e170ce0
A
6467 sub_map = VME_SUBMAP(&tmp_entry);
6468 sub_start = VME_OFFSET(&tmp_entry);
2d21ac55
A
6469 sub_end = sub_start + (tmp_entry.vme_end -
6470 tmp_entry.vme_start);
6471 if (tmp_entry.use_pmap) {
6472 pmap = sub_map->pmap;
6473 pmap_addr = tmp_entry.vme_start;
6474 } else {
6475 pmap = map->pmap;
6476 pmap_addr = tmp_entry.vme_start;
6477 }
6478 (void) vm_map_unwire_nested(sub_map,
6479 sub_start, sub_end,
6480 user_wire,
6481 pmap, pmap_addr);
6482 } else {
6483
3e170ce0 6484 if (VME_OBJECT(&tmp_entry) == kernel_object) {
39236c6e
A
6485 pmap_protect_options(
6486 map->pmap,
6487 tmp_entry.vme_start,
6488 tmp_entry.vme_end,
6489 VM_PROT_NONE,
6490 PMAP_OPTIONS_REMOVE,
6491 NULL);
6492 }
2d21ac55 6493 vm_fault_unwire(map, &tmp_entry,
3e170ce0 6494 VME_OBJECT(&tmp_entry) == kernel_object,
2d21ac55
A
6495 map->pmap, tmp_entry.vme_start);
6496 }
6497
1c79356b
A
6498 vm_map_lock(map);
6499
6500 if (last_timestamp+1 != map->timestamp) {
6501 /*
6502 * Find the entry again. It could have
6503 * been clipped after we unlocked the map.
6504 */
6505 if (!vm_map_lookup_entry(map, s, &first_entry)){
6506 assert((map != kernel_map) &&
2d21ac55 6507 (!entry->is_sub_map));
1c79356b 6508 first_entry = first_entry->vme_next;
2d21ac55 6509 s = first_entry->vme_start;
1c79356b 6510 } else {
0c530ab8 6511 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6512 }
6513 } else {
0c530ab8 6514 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6515 first_entry = entry;
6516 }
6517
6518 last_timestamp = map->timestamp;
6519
6520 entry = first_entry;
6521 while ((entry != vm_map_to_entry(map)) &&
6522 (entry->vme_start < tmp_entry.vme_end)) {
6523 assert(entry->in_transition);
6524 entry->in_transition = FALSE;
6525 if (entry->needs_wakeup) {
6526 entry->needs_wakeup = FALSE;
6527 need_wakeup = TRUE;
6528 }
6529 entry = entry->vme_next;
6530 }
6531 /*
6532 * We have unwired the entry(s). Go back and
6533 * delete them.
6534 */
6535 entry = first_entry;
6536 continue;
6537 }
6538
6539 /* entry is unwired */
6540 assert(entry->wired_count == 0);
6541 assert(entry->user_wired_count == 0);
6542
2d21ac55
A
6543 assert(s == entry->vme_start);
6544
6545 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6546 /*
6547 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6548 * vm_map_delete(), some map entries might have been
6549 * transferred to a "zap_map", which doesn't have a
6550 * pmap. The original pmap has already been flushed
6551 * in the vm_map_delete() call targeting the original
6552 * map, but when we get to destroying the "zap_map",
6553 * we don't have any pmap to flush, so let's just skip
6554 * all this.
6555 */
6556 } else if (entry->is_sub_map) {
6557 if (entry->use_pmap) {
0c530ab8 6558#ifndef NO_NESTED_PMAP
3e170ce0
A
6559 int pmap_flags;
6560
6561 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6562 /*
6563 * This is the final cleanup of the
6564 * address space being terminated.
6565 * No new mappings are expected and
6566 * we don't really need to unnest the
6567 * shared region (and lose the "global"
6568 * pmap mappings, if applicable).
6569 *
6570 * Tell the pmap layer that we're
6571 * "clean" wrt nesting.
6572 */
6573 pmap_flags = PMAP_UNNEST_CLEAN;
6574 } else {
6575 /*
6576 * We're unmapping part of the nested
6577 * shared region, so we can't keep the
6578 * nested pmap.
6579 */
6580 pmap_flags = 0;
6581 }
6582 pmap_unnest_options(
6583 map->pmap,
6584 (addr64_t)entry->vme_start,
6585 entry->vme_end - entry->vme_start,
6586 pmap_flags);
0c530ab8 6587#endif /* NO_NESTED_PMAP */
316670eb 6588 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
6589 /* clean up parent map/maps */
6590 vm_map_submap_pmap_clean(
6591 map, entry->vme_start,
6592 entry->vme_end,
3e170ce0
A
6593 VME_SUBMAP(entry),
6594 VME_OFFSET(entry));
9bccf70c 6595 }
2d21ac55 6596 } else {
1c79356b
A
6597 vm_map_submap_pmap_clean(
6598 map, entry->vme_start, entry->vme_end,
3e170ce0
A
6599 VME_SUBMAP(entry),
6600 VME_OFFSET(entry));
2d21ac55 6601 }
3e170ce0
A
6602 } else if (VME_OBJECT(entry) != kernel_object &&
6603 VME_OBJECT(entry) != compressor_object) {
6604 object = VME_OBJECT(entry);
39236c6e
A
6605 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6606 vm_object_pmap_protect_options(
3e170ce0 6607 object, VME_OFFSET(entry),
55e303ae
A
6608 entry->vme_end - entry->vme_start,
6609 PMAP_NULL,
6610 entry->vme_start,
39236c6e
A
6611 VM_PROT_NONE,
6612 PMAP_OPTIONS_REMOVE);
3e170ce0 6613 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
39236c6e
A
6614 (map->pmap == kernel_pmap)) {
6615 /* Remove translations associated
6616 * with this range unless the entry
6617 * does not have an object, or
6618 * it's the kernel map or a descendant
6619 * since the platform could potentially
6620 * create "backdoor" mappings invisible
6621 * to the VM. It is expected that
6622 * objectless, non-kernel ranges
6623 * do not have such VM invisible
6624 * translations.
6625 */
6626 pmap_remove_options(map->pmap,
6627 (addr64_t)entry->vme_start,
6628 (addr64_t)entry->vme_end,
6629 PMAP_OPTIONS_REMOVE);
1c79356b
A
6630 }
6631 }
6632
fe8ab488
A
6633 if (entry->iokit_acct) {
6634 /* alternate accounting */
ecc0ceb4
A
6635 DTRACE_VM4(vm_map_iokit_unmapped_region,
6636 vm_map_t, map,
6637 vm_map_offset_t, entry->vme_start,
6638 vm_map_offset_t, entry->vme_end,
6639 int, VME_ALIAS(entry));
fe8ab488
A
6640 vm_map_iokit_unmapped_region(map,
6641 (entry->vme_end -
6642 entry->vme_start));
6643 entry->iokit_acct = FALSE;
6644 }
6645
91447636
A
6646 /*
6647 * All pmap mappings for this map entry must have been
6648 * cleared by now.
6649 */
fe8ab488 6650#if DEBUG
91447636
A
6651 assert(vm_map_pmap_is_empty(map,
6652 entry->vme_start,
6653 entry->vme_end));
fe8ab488 6654#endif /* DEBUG */
91447636 6655
1c79356b 6656 next = entry->vme_next;
fe8ab488
A
6657
6658 if (map->pmap == kernel_pmap &&
6659 map->ref_count != 0 &&
6660 entry->vme_end < end &&
6661 (next == vm_map_to_entry(map) ||
6662 next->vme_start != entry->vme_end)) {
6663 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6664 "hole after %p at 0x%llx\n",
6665 map,
6666 (uint64_t)start,
6667 (uint64_t)end,
6668 entry,
6669 (uint64_t)entry->vme_end);
6670 }
6671
1c79356b
A
6672 s = next->vme_start;
6673 last_timestamp = map->timestamp;
91447636
A
6674
6675 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6676 zap_map != VM_MAP_NULL) {
2d21ac55 6677 vm_map_size_t entry_size;
91447636
A
6678 /*
6679 * The caller wants to save the affected VM map entries
6680 * into the "zap_map". The caller will take care of
6681 * these entries.
6682 */
6683 /* unlink the entry from "map" ... */
6d2010ae 6684 vm_map_store_entry_unlink(map, entry);
91447636 6685 /* ... and add it to the end of the "zap_map" */
6d2010ae 6686 vm_map_store_entry_link(zap_map,
91447636
A
6687 vm_map_last_entry(zap_map),
6688 entry);
2d21ac55
A
6689 entry_size = entry->vme_end - entry->vme_start;
6690 map->size -= entry_size;
6691 zap_map->size += entry_size;
6692 /* we didn't unlock the map, so no timestamp increase */
6693 last_timestamp--;
91447636
A
6694 } else {
6695 vm_map_entry_delete(map, entry);
6696 /* vm_map_entry_delete unlocks the map */
6697 vm_map_lock(map);
6698 }
6699
1c79356b
A
6700 entry = next;
6701
6702 if(entry == vm_map_to_entry(map)) {
6703 break;
6704 }
6705 if (last_timestamp+1 != map->timestamp) {
6706 /*
6707 * we are responsible for deleting everything
6708 * from the give space, if someone has interfered
6709 * we pick up where we left off, back fills should
6710 * be all right for anyone except map_delete and
6711 * we have to assume that the task has been fully
6712 * disabled before we get here
6713 */
6714 if (!vm_map_lookup_entry(map, s, &entry)){
6715 entry = entry->vme_next;
2d21ac55 6716 s = entry->vme_start;
1c79356b 6717 } else {
2d21ac55 6718 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
6719 }
6720 /*
6721 * others can not only allocate behind us, we can
6722 * also see coalesce while we don't have the map lock
6723 */
6724 if(entry == vm_map_to_entry(map)) {
6725 break;
6726 }
1c79356b
A
6727 }
6728 last_timestamp = map->timestamp;
6729 }
6730
6731 if (map->wait_for_space)
6732 thread_wakeup((event_t) map);
6733 /*
6734 * wake up anybody waiting on entries that we have already deleted.
6735 */
6736 if (need_wakeup)
6737 vm_map_entry_wakeup(map);
6738
6739 return KERN_SUCCESS;
6740}
6741
6742/*
6743 * vm_map_remove:
6744 *
6745 * Remove the given address range from the target map.
6746 * This is the exported form of vm_map_delete.
6747 */
6748kern_return_t
6749vm_map_remove(
39037602
A
6750 vm_map_t map,
6751 vm_map_offset_t start,
6752 vm_map_offset_t end,
6753 boolean_t flags)
1c79356b 6754{
39037602 6755 kern_return_t result;
9bccf70c 6756
1c79356b
A
6757 vm_map_lock(map);
6758 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
6759 /*
6760 * For the zone_map, the kernel controls the allocation/freeing of memory.
6761 * Any free to the zone_map should be within the bounds of the map and
6762 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6763 * free to the zone_map into a no-op, there is a problem and we should
6764 * panic.
6765 */
6766 if ((map == zone_map) && (start == end))
6767 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 6768 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 6769 vm_map_unlock(map);
91447636 6770
1c79356b
A
6771 return(result);
6772}
6773
39037602
A
6774/*
6775 * vm_map_remove_locked:
6776 *
6777 * Remove the given address range from the target locked map.
6778 * This is the exported form of vm_map_delete.
6779 */
6780kern_return_t
6781vm_map_remove_locked(
6782 vm_map_t map,
6783 vm_map_offset_t start,
6784 vm_map_offset_t end,
6785 boolean_t flags)
6786{
6787 kern_return_t result;
6788
6789 VM_MAP_RANGE_CHECK(map, start, end);
6790 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6791 return(result);
6792}
6793
1c79356b 6794
1c79356b
A
6795/*
6796 * Routine: vm_map_copy_discard
6797 *
6798 * Description:
6799 * Dispose of a map copy object (returned by
6800 * vm_map_copyin).
6801 */
6802void
6803vm_map_copy_discard(
6804 vm_map_copy_t copy)
6805{
1c79356b
A
6806 if (copy == VM_MAP_COPY_NULL)
6807 return;
6808
6809 switch (copy->type) {
6810 case VM_MAP_COPY_ENTRY_LIST:
6811 while (vm_map_copy_first_entry(copy) !=
2d21ac55 6812 vm_map_copy_to_entry(copy)) {
1c79356b
A
6813 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6814
6815 vm_map_copy_entry_unlink(copy, entry);
39236c6e 6816 if (entry->is_sub_map) {
3e170ce0 6817 vm_map_deallocate(VME_SUBMAP(entry));
39236c6e 6818 } else {
3e170ce0 6819 vm_object_deallocate(VME_OBJECT(entry));
39236c6e 6820 }
1c79356b
A
6821 vm_map_copy_entry_dispose(copy, entry);
6822 }
6823 break;
6824 case VM_MAP_COPY_OBJECT:
6825 vm_object_deallocate(copy->cpy_object);
6826 break;
1c79356b
A
6827 case VM_MAP_COPY_KERNEL_BUFFER:
6828
6829 /*
6830 * The vm_map_copy_t and possibly the data buffer were
6831 * allocated by a single call to kalloc(), i.e. the
6832 * vm_map_copy_t was not allocated out of the zone.
6833 */
3e170ce0
A
6834 if (copy->size > msg_ool_size_small || copy->offset)
6835 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6836 (long long)copy->size, (long long)copy->offset);
6837 kfree(copy, copy->size + cpy_kdata_hdr_sz);
1c79356b
A
6838 return;
6839 }
91447636 6840 zfree(vm_map_copy_zone, copy);
1c79356b
A
6841}
6842
6843/*
6844 * Routine: vm_map_copy_copy
6845 *
6846 * Description:
6847 * Move the information in a map copy object to
6848 * a new map copy object, leaving the old one
6849 * empty.
6850 *
6851 * This is used by kernel routines that need
6852 * to look at out-of-line data (in copyin form)
6853 * before deciding whether to return SUCCESS.
6854 * If the routine returns FAILURE, the original
6855 * copy object will be deallocated; therefore,
6856 * these routines must make a copy of the copy
6857 * object and leave the original empty so that
6858 * deallocation will not fail.
6859 */
6860vm_map_copy_t
6861vm_map_copy_copy(
6862 vm_map_copy_t copy)
6863{
6864 vm_map_copy_t new_copy;
6865
6866 if (copy == VM_MAP_COPY_NULL)
6867 return VM_MAP_COPY_NULL;
6868
6869 /*
6870 * Allocate a new copy object, and copy the information
6871 * from the old one into it.
6872 */
6873
6874 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 6875 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
6876 *new_copy = *copy;
6877
6878 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6879 /*
6880 * The links in the entry chain must be
6881 * changed to point to the new copy object.
6882 */
6883 vm_map_copy_first_entry(copy)->vme_prev
6884 = vm_map_copy_to_entry(new_copy);
6885 vm_map_copy_last_entry(copy)->vme_next
6886 = vm_map_copy_to_entry(new_copy);
6887 }
6888
6889 /*
6890 * Change the old copy object into one that contains
6891 * nothing to be deallocated.
6892 */
6893 copy->type = VM_MAP_COPY_OBJECT;
6894 copy->cpy_object = VM_OBJECT_NULL;
6895
6896 /*
6897 * Return the new object.
6898 */
6899 return new_copy;
6900}
6901
91447636 6902static kern_return_t
1c79356b
A
6903vm_map_overwrite_submap_recurse(
6904 vm_map_t dst_map,
91447636
A
6905 vm_map_offset_t dst_addr,
6906 vm_map_size_t dst_size)
1c79356b 6907{
91447636 6908 vm_map_offset_t dst_end;
1c79356b
A
6909 vm_map_entry_t tmp_entry;
6910 vm_map_entry_t entry;
6911 kern_return_t result;
6912 boolean_t encountered_sub_map = FALSE;
6913
6914
6915
6916 /*
6917 * Verify that the destination is all writeable
6918 * initially. We have to trunc the destination
6919 * address and round the copy size or we'll end up
6920 * splitting entries in strange ways.
6921 */
6922
39236c6e
A
6923 dst_end = vm_map_round_page(dst_addr + dst_size,
6924 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 6925 vm_map_lock(dst_map);
1c79356b
A
6926
6927start_pass_1:
1c79356b
A
6928 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6929 vm_map_unlock(dst_map);
6930 return(KERN_INVALID_ADDRESS);
6931 }
6932
39236c6e
A
6933 vm_map_clip_start(dst_map,
6934 tmp_entry,
6935 vm_map_trunc_page(dst_addr,
6936 VM_MAP_PAGE_MASK(dst_map)));
fe8ab488
A
6937 if (tmp_entry->is_sub_map) {
6938 /* clipping did unnest if needed */
6939 assert(!tmp_entry->use_pmap);
6940 }
1c79356b
A
6941
6942 for (entry = tmp_entry;;) {
6943 vm_map_entry_t next;
6944
6945 next = entry->vme_next;
6946 while(entry->is_sub_map) {
91447636
A
6947 vm_map_offset_t sub_start;
6948 vm_map_offset_t sub_end;
6949 vm_map_offset_t local_end;
1c79356b
A
6950
6951 if (entry->in_transition) {
2d21ac55
A
6952 /*
6953 * Say that we are waiting, and wait for entry.
6954 */
1c79356b
A
6955 entry->needs_wakeup = TRUE;
6956 vm_map_entry_wait(dst_map, THREAD_UNINT);
6957
6958 goto start_pass_1;
6959 }
6960
6961 encountered_sub_map = TRUE;
3e170ce0 6962 sub_start = VME_OFFSET(entry);
1c79356b
A
6963
6964 if(entry->vme_end < dst_end)
6965 sub_end = entry->vme_end;
6966 else
6967 sub_end = dst_end;
6968 sub_end -= entry->vme_start;
3e170ce0 6969 sub_end += VME_OFFSET(entry);
1c79356b
A
6970 local_end = entry->vme_end;
6971 vm_map_unlock(dst_map);
6972
6973 result = vm_map_overwrite_submap_recurse(
3e170ce0 6974 VME_SUBMAP(entry),
2d21ac55
A
6975 sub_start,
6976 sub_end - sub_start);
1c79356b
A
6977
6978 if(result != KERN_SUCCESS)
6979 return result;
6980 if (dst_end <= entry->vme_end)
6981 return KERN_SUCCESS;
6982 vm_map_lock(dst_map);
6983 if(!vm_map_lookup_entry(dst_map, local_end,
6984 &tmp_entry)) {
6985 vm_map_unlock(dst_map);
6986 return(KERN_INVALID_ADDRESS);
6987 }
6988 entry = tmp_entry;
6989 next = entry->vme_next;
6990 }
6991
6992 if ( ! (entry->protection & VM_PROT_WRITE)) {
6993 vm_map_unlock(dst_map);
6994 return(KERN_PROTECTION_FAILURE);
6995 }
6996
6997 /*
6998 * If the entry is in transition, we must wait
6999 * for it to exit that state. Anything could happen
7000 * when we unlock the map, so start over.
7001 */
7002 if (entry->in_transition) {
7003
7004 /*
7005 * Say that we are waiting, and wait for entry.
7006 */
7007 entry->needs_wakeup = TRUE;
7008 vm_map_entry_wait(dst_map, THREAD_UNINT);
7009
7010 goto start_pass_1;
7011 }
7012
7013/*
7014 * our range is contained completely within this map entry
7015 */
7016 if (dst_end <= entry->vme_end) {
7017 vm_map_unlock(dst_map);
7018 return KERN_SUCCESS;
7019 }
7020/*
7021 * check that range specified is contiguous region
7022 */
7023 if ((next == vm_map_to_entry(dst_map)) ||
7024 (next->vme_start != entry->vme_end)) {
7025 vm_map_unlock(dst_map);
7026 return(KERN_INVALID_ADDRESS);
7027 }
7028
7029 /*
7030 * Check for permanent objects in the destination.
7031 */
3e170ce0
A
7032 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7033 ((!VME_OBJECT(entry)->internal) ||
7034 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
7035 if(encountered_sub_map) {
7036 vm_map_unlock(dst_map);
7037 return(KERN_FAILURE);
7038 }
7039 }
7040
7041
7042 entry = next;
7043 }/* for */
7044 vm_map_unlock(dst_map);
7045 return(KERN_SUCCESS);
7046}
7047
7048/*
7049 * Routine: vm_map_copy_overwrite
7050 *
7051 * Description:
7052 * Copy the memory described by the map copy
7053 * object (copy; returned by vm_map_copyin) onto
7054 * the specified destination region (dst_map, dst_addr).
7055 * The destination must be writeable.
7056 *
7057 * Unlike vm_map_copyout, this routine actually
7058 * writes over previously-mapped memory. If the
7059 * previous mapping was to a permanent (user-supplied)
7060 * memory object, it is preserved.
7061 *
7062 * The attributes (protection and inheritance) of the
7063 * destination region are preserved.
7064 *
7065 * If successful, consumes the copy object.
7066 * Otherwise, the caller is responsible for it.
7067 *
7068 * Implementation notes:
7069 * To overwrite aligned temporary virtual memory, it is
7070 * sufficient to remove the previous mapping and insert
7071 * the new copy. This replacement is done either on
7072 * the whole region (if no permanent virtual memory
7073 * objects are embedded in the destination region) or
7074 * in individual map entries.
7075 *
7076 * To overwrite permanent virtual memory , it is necessary
7077 * to copy each page, as the external memory management
7078 * interface currently does not provide any optimizations.
7079 *
7080 * Unaligned memory also has to be copied. It is possible
7081 * to use 'vm_trickery' to copy the aligned data. This is
7082 * not done but not hard to implement.
7083 *
7084 * Once a page of permanent memory has been overwritten,
7085 * it is impossible to interrupt this function; otherwise,
7086 * the call would be neither atomic nor location-independent.
7087 * The kernel-state portion of a user thread must be
7088 * interruptible.
7089 *
7090 * It may be expensive to forward all requests that might
7091 * overwrite permanent memory (vm_write, vm_copy) to
7092 * uninterruptible kernel threads. This routine may be
7093 * called by interruptible threads; however, success is
7094 * not guaranteed -- if the request cannot be performed
7095 * atomically and interruptibly, an error indication is
7096 * returned.
7097 */
7098
91447636 7099static kern_return_t
1c79356b 7100vm_map_copy_overwrite_nested(
91447636
A
7101 vm_map_t dst_map,
7102 vm_map_address_t dst_addr,
7103 vm_map_copy_t copy,
7104 boolean_t interruptible,
6d2010ae
A
7105 pmap_t pmap,
7106 boolean_t discard_on_success)
1c79356b 7107{
91447636
A
7108 vm_map_offset_t dst_end;
7109 vm_map_entry_t tmp_entry;
7110 vm_map_entry_t entry;
7111 kern_return_t kr;
7112 boolean_t aligned = TRUE;
7113 boolean_t contains_permanent_objects = FALSE;
7114 boolean_t encountered_sub_map = FALSE;
7115 vm_map_offset_t base_addr;
7116 vm_map_size_t copy_size;
7117 vm_map_size_t total_size;
1c79356b
A
7118
7119
7120 /*
7121 * Check for null copy object.
7122 */
7123
7124 if (copy == VM_MAP_COPY_NULL)
7125 return(KERN_SUCCESS);
7126
7127 /*
7128 * Check for special kernel buffer allocated
7129 * by new_ipc_kmsg_copyin.
7130 */
7131
7132 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 7133 return(vm_map_copyout_kernel_buffer(
2d21ac55 7134 dst_map, &dst_addr,
39037602 7135 copy, copy->size, TRUE, discard_on_success));
1c79356b
A
7136 }
7137
7138 /*
7139 * Only works for entry lists at the moment. Will
7140 * support page lists later.
7141 */
7142
7143 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7144
7145 if (copy->size == 0) {
6d2010ae
A
7146 if (discard_on_success)
7147 vm_map_copy_discard(copy);
1c79356b
A
7148 return(KERN_SUCCESS);
7149 }
7150
7151 /*
7152 * Verify that the destination is all writeable
7153 * initially. We have to trunc the destination
7154 * address and round the copy size or we'll end up
7155 * splitting entries in strange ways.
7156 */
7157
39236c6e
A
7158 if (!VM_MAP_PAGE_ALIGNED(copy->size,
7159 VM_MAP_PAGE_MASK(dst_map)) ||
7160 !VM_MAP_PAGE_ALIGNED(copy->offset,
7161 VM_MAP_PAGE_MASK(dst_map)) ||
7162 !VM_MAP_PAGE_ALIGNED(dst_addr,
fe8ab488 7163 VM_MAP_PAGE_MASK(dst_map)))
1c79356b
A
7164 {
7165 aligned = FALSE;
39236c6e
A
7166 dst_end = vm_map_round_page(dst_addr + copy->size,
7167 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
7168 } else {
7169 dst_end = dst_addr + copy->size;
7170 }
7171
1c79356b 7172 vm_map_lock(dst_map);
9bccf70c 7173
91447636
A
7174 /* LP64todo - remove this check when vm_map_commpage64()
7175 * no longer has to stuff in a map_entry for the commpage
7176 * above the map's max_offset.
7177 */
7178 if (dst_addr >= dst_map->max_offset) {
7179 vm_map_unlock(dst_map);
7180 return(KERN_INVALID_ADDRESS);
7181 }
7182
9bccf70c 7183start_pass_1:
1c79356b
A
7184 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7185 vm_map_unlock(dst_map);
7186 return(KERN_INVALID_ADDRESS);
7187 }
39236c6e
A
7188 vm_map_clip_start(dst_map,
7189 tmp_entry,
7190 vm_map_trunc_page(dst_addr,
7191 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7192 for (entry = tmp_entry;;) {
7193 vm_map_entry_t next = entry->vme_next;
7194
7195 while(entry->is_sub_map) {
91447636
A
7196 vm_map_offset_t sub_start;
7197 vm_map_offset_t sub_end;
7198 vm_map_offset_t local_end;
1c79356b
A
7199
7200 if (entry->in_transition) {
7201
2d21ac55
A
7202 /*
7203 * Say that we are waiting, and wait for entry.
7204 */
1c79356b
A
7205 entry->needs_wakeup = TRUE;
7206 vm_map_entry_wait(dst_map, THREAD_UNINT);
7207
7208 goto start_pass_1;
7209 }
7210
7211 local_end = entry->vme_end;
7212 if (!(entry->needs_copy)) {
7213 /* if needs_copy we are a COW submap */
7214 /* in such a case we just replace so */
7215 /* there is no need for the follow- */
7216 /* ing check. */
7217 encountered_sub_map = TRUE;
3e170ce0 7218 sub_start = VME_OFFSET(entry);
1c79356b
A
7219
7220 if(entry->vme_end < dst_end)
7221 sub_end = entry->vme_end;
7222 else
7223 sub_end = dst_end;
7224 sub_end -= entry->vme_start;
3e170ce0 7225 sub_end += VME_OFFSET(entry);
1c79356b
A
7226 vm_map_unlock(dst_map);
7227
7228 kr = vm_map_overwrite_submap_recurse(
3e170ce0 7229 VME_SUBMAP(entry),
1c79356b
A
7230 sub_start,
7231 sub_end - sub_start);
7232 if(kr != KERN_SUCCESS)
7233 return kr;
7234 vm_map_lock(dst_map);
7235 }
7236
7237 if (dst_end <= entry->vme_end)
7238 goto start_overwrite;
7239 if(!vm_map_lookup_entry(dst_map, local_end,
7240 &entry)) {
7241 vm_map_unlock(dst_map);
7242 return(KERN_INVALID_ADDRESS);
7243 }
7244 next = entry->vme_next;
7245 }
7246
7247 if ( ! (entry->protection & VM_PROT_WRITE)) {
7248 vm_map_unlock(dst_map);
7249 return(KERN_PROTECTION_FAILURE);
7250 }
7251
7252 /*
7253 * If the entry is in transition, we must wait
7254 * for it to exit that state. Anything could happen
7255 * when we unlock the map, so start over.
7256 */
7257 if (entry->in_transition) {
7258
7259 /*
7260 * Say that we are waiting, and wait for entry.
7261 */
7262 entry->needs_wakeup = TRUE;
7263 vm_map_entry_wait(dst_map, THREAD_UNINT);
7264
7265 goto start_pass_1;
7266 }
7267
7268/*
7269 * our range is contained completely within this map entry
7270 */
7271 if (dst_end <= entry->vme_end)
7272 break;
7273/*
7274 * check that range specified is contiguous region
7275 */
7276 if ((next == vm_map_to_entry(dst_map)) ||
7277 (next->vme_start != entry->vme_end)) {
7278 vm_map_unlock(dst_map);
7279 return(KERN_INVALID_ADDRESS);
7280 }
7281
7282
7283 /*
7284 * Check for permanent objects in the destination.
7285 */
3e170ce0
A
7286 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7287 ((!VME_OBJECT(entry)->internal) ||
7288 (VME_OBJECT(entry)->true_share))) {
1c79356b
A
7289 contains_permanent_objects = TRUE;
7290 }
7291
7292 entry = next;
7293 }/* for */
7294
7295start_overwrite:
7296 /*
7297 * If there are permanent objects in the destination, then
7298 * the copy cannot be interrupted.
7299 */
7300
7301 if (interruptible && contains_permanent_objects) {
7302 vm_map_unlock(dst_map);
7303 return(KERN_FAILURE); /* XXX */
7304 }
7305
7306 /*
7307 *
7308 * Make a second pass, overwriting the data
7309 * At the beginning of each loop iteration,
7310 * the next entry to be overwritten is "tmp_entry"
7311 * (initially, the value returned from the lookup above),
7312 * and the starting address expected in that entry
7313 * is "start".
7314 */
7315
7316 total_size = copy->size;
7317 if(encountered_sub_map) {
7318 copy_size = 0;
7319 /* re-calculate tmp_entry since we've had the map */
7320 /* unlocked */
7321 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7322 vm_map_unlock(dst_map);
7323 return(KERN_INVALID_ADDRESS);
7324 }
7325 } else {
7326 copy_size = copy->size;
7327 }
7328
7329 base_addr = dst_addr;
7330 while(TRUE) {
7331 /* deconstruct the copy object and do in parts */
7332 /* only in sub_map, interruptable case */
7333 vm_map_entry_t copy_entry;
91447636
A
7334 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7335 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 7336 int nentries;
91447636 7337 int remaining_entries = 0;
b0d623f7 7338 vm_map_offset_t new_offset = 0;
1c79356b
A
7339
7340 for (entry = tmp_entry; copy_size == 0;) {
7341 vm_map_entry_t next;
7342
7343 next = entry->vme_next;
7344
7345 /* tmp_entry and base address are moved along */
7346 /* each time we encounter a sub-map. Otherwise */
7347 /* entry can outpase tmp_entry, and the copy_size */
7348 /* may reflect the distance between them */
7349 /* if the current entry is found to be in transition */
7350 /* we will start over at the beginning or the last */
7351 /* encounter of a submap as dictated by base_addr */
7352 /* we will zero copy_size accordingly. */
7353 if (entry->in_transition) {
7354 /*
7355 * Say that we are waiting, and wait for entry.
7356 */
7357 entry->needs_wakeup = TRUE;
7358 vm_map_entry_wait(dst_map, THREAD_UNINT);
7359
1c79356b 7360 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 7361 &tmp_entry)) {
1c79356b
A
7362 vm_map_unlock(dst_map);
7363 return(KERN_INVALID_ADDRESS);
7364 }
7365 copy_size = 0;
7366 entry = tmp_entry;
7367 continue;
7368 }
7369 if(entry->is_sub_map) {
91447636
A
7370 vm_map_offset_t sub_start;
7371 vm_map_offset_t sub_end;
7372 vm_map_offset_t local_end;
1c79356b
A
7373
7374 if (entry->needs_copy) {
7375 /* if this is a COW submap */
7376 /* just back the range with a */
7377 /* anonymous entry */
7378 if(entry->vme_end < dst_end)
7379 sub_end = entry->vme_end;
7380 else
7381 sub_end = dst_end;
7382 if(entry->vme_start < base_addr)
7383 sub_start = base_addr;
7384 else
7385 sub_start = entry->vme_start;
7386 vm_map_clip_end(
7387 dst_map, entry, sub_end);
7388 vm_map_clip_start(
7389 dst_map, entry, sub_start);
2d21ac55 7390 assert(!entry->use_pmap);
1c79356b
A
7391 entry->is_sub_map = FALSE;
7392 vm_map_deallocate(
3e170ce0
A
7393 VME_SUBMAP(entry));
7394 VME_SUBMAP_SET(entry, NULL);
1c79356b
A
7395 entry->is_shared = FALSE;
7396 entry->needs_copy = FALSE;
3e170ce0 7397 VME_OFFSET_SET(entry, 0);
2d21ac55
A
7398 /*
7399 * XXX FBDP
7400 * We should propagate the protections
7401 * of the submap entry here instead
7402 * of forcing them to VM_PROT_ALL...
7403 * Or better yet, we should inherit
7404 * the protection of the copy_entry.
7405 */
1c79356b
A
7406 entry->protection = VM_PROT_ALL;
7407 entry->max_protection = VM_PROT_ALL;
7408 entry->wired_count = 0;
7409 entry->user_wired_count = 0;
7410 if(entry->inheritance
2d21ac55
A
7411 == VM_INHERIT_SHARE)
7412 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
7413 continue;
7414 }
7415 /* first take care of any non-sub_map */
7416 /* entries to send */
7417 if(base_addr < entry->vme_start) {
7418 /* stuff to send */
7419 copy_size =
7420 entry->vme_start - base_addr;
7421 break;
7422 }
3e170ce0 7423 sub_start = VME_OFFSET(entry);
1c79356b
A
7424
7425 if(entry->vme_end < dst_end)
7426 sub_end = entry->vme_end;
7427 else
7428 sub_end = dst_end;
7429 sub_end -= entry->vme_start;
3e170ce0 7430 sub_end += VME_OFFSET(entry);
1c79356b
A
7431 local_end = entry->vme_end;
7432 vm_map_unlock(dst_map);
7433 copy_size = sub_end - sub_start;
7434
7435 /* adjust the copy object */
7436 if (total_size > copy_size) {
91447636
A
7437 vm_map_size_t local_size = 0;
7438 vm_map_size_t entry_size;
1c79356b 7439
2d21ac55
A
7440 nentries = 1;
7441 new_offset = copy->offset;
7442 copy_entry = vm_map_copy_first_entry(copy);
7443 while(copy_entry !=
7444 vm_map_copy_to_entry(copy)){
7445 entry_size = copy_entry->vme_end -
7446 copy_entry->vme_start;
7447 if((local_size < copy_size) &&
7448 ((local_size + entry_size)
7449 >= copy_size)) {
7450 vm_map_copy_clip_end(copy,
7451 copy_entry,
7452 copy_entry->vme_start +
7453 (copy_size - local_size));
7454 entry_size = copy_entry->vme_end -
7455 copy_entry->vme_start;
7456 local_size += entry_size;
7457 new_offset += entry_size;
7458 }
7459 if(local_size >= copy_size) {
7460 next_copy = copy_entry->vme_next;
7461 copy_entry->vme_next =
7462 vm_map_copy_to_entry(copy);
7463 previous_prev =
7464 copy->cpy_hdr.links.prev;
7465 copy->cpy_hdr.links.prev = copy_entry;
7466 copy->size = copy_size;
7467 remaining_entries =
7468 copy->cpy_hdr.nentries;
7469 remaining_entries -= nentries;
7470 copy->cpy_hdr.nentries = nentries;
7471 break;
7472 } else {
7473 local_size += entry_size;
7474 new_offset += entry_size;
7475 nentries++;
7476 }
7477 copy_entry = copy_entry->vme_next;
7478 }
1c79356b
A
7479 }
7480
7481 if((entry->use_pmap) && (pmap == NULL)) {
7482 kr = vm_map_copy_overwrite_nested(
3e170ce0 7483 VME_SUBMAP(entry),
1c79356b
A
7484 sub_start,
7485 copy,
7486 interruptible,
3e170ce0 7487 VME_SUBMAP(entry)->pmap,
6d2010ae 7488 TRUE);
1c79356b
A
7489 } else if (pmap != NULL) {
7490 kr = vm_map_copy_overwrite_nested(
3e170ce0 7491 VME_SUBMAP(entry),
1c79356b
A
7492 sub_start,
7493 copy,
6d2010ae
A
7494 interruptible, pmap,
7495 TRUE);
1c79356b
A
7496 } else {
7497 kr = vm_map_copy_overwrite_nested(
3e170ce0 7498 VME_SUBMAP(entry),
1c79356b
A
7499 sub_start,
7500 copy,
7501 interruptible,
6d2010ae
A
7502 dst_map->pmap,
7503 TRUE);
1c79356b
A
7504 }
7505 if(kr != KERN_SUCCESS) {
7506 if(next_copy != NULL) {
2d21ac55
A
7507 copy->cpy_hdr.nentries +=
7508 remaining_entries;
7509 copy->cpy_hdr.links.prev->vme_next =
7510 next_copy;
7511 copy->cpy_hdr.links.prev
7512 = previous_prev;
7513 copy->size = total_size;
1c79356b
A
7514 }
7515 return kr;
7516 }
7517 if (dst_end <= local_end) {
7518 return(KERN_SUCCESS);
7519 }
7520 /* otherwise copy no longer exists, it was */
7521 /* destroyed after successful copy_overwrite */
7522 copy = (vm_map_copy_t)
2d21ac55 7523 zalloc(vm_map_copy_zone);
04b8595b 7524 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 7525 vm_map_copy_first_entry(copy) =
2d21ac55
A
7526 vm_map_copy_last_entry(copy) =
7527 vm_map_copy_to_entry(copy);
1c79356b
A
7528 copy->type = VM_MAP_COPY_ENTRY_LIST;
7529 copy->offset = new_offset;
7530
e2d2fc5c
A
7531 /*
7532 * XXX FBDP
7533 * this does not seem to deal with
7534 * the VM map store (R&B tree)
7535 */
7536
1c79356b
A
7537 total_size -= copy_size;
7538 copy_size = 0;
7539 /* put back remainder of copy in container */
7540 if(next_copy != NULL) {
2d21ac55
A
7541 copy->cpy_hdr.nentries = remaining_entries;
7542 copy->cpy_hdr.links.next = next_copy;
7543 copy->cpy_hdr.links.prev = previous_prev;
7544 copy->size = total_size;
7545 next_copy->vme_prev =
7546 vm_map_copy_to_entry(copy);
7547 next_copy = NULL;
1c79356b
A
7548 }
7549 base_addr = local_end;
7550 vm_map_lock(dst_map);
7551 if(!vm_map_lookup_entry(dst_map,
2d21ac55 7552 local_end, &tmp_entry)) {
1c79356b
A
7553 vm_map_unlock(dst_map);
7554 return(KERN_INVALID_ADDRESS);
7555 }
7556 entry = tmp_entry;
7557 continue;
7558 }
7559 if (dst_end <= entry->vme_end) {
7560 copy_size = dst_end - base_addr;
7561 break;
7562 }
7563
7564 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 7565 (next->vme_start != entry->vme_end)) {
1c79356b
A
7566 vm_map_unlock(dst_map);
7567 return(KERN_INVALID_ADDRESS);
7568 }
7569
7570 entry = next;
7571 }/* for */
7572
7573 next_copy = NULL;
7574 nentries = 1;
7575
7576 /* adjust the copy object */
7577 if (total_size > copy_size) {
91447636
A
7578 vm_map_size_t local_size = 0;
7579 vm_map_size_t entry_size;
1c79356b
A
7580
7581 new_offset = copy->offset;
7582 copy_entry = vm_map_copy_first_entry(copy);
7583 while(copy_entry != vm_map_copy_to_entry(copy)) {
7584 entry_size = copy_entry->vme_end -
2d21ac55 7585 copy_entry->vme_start;
1c79356b 7586 if((local_size < copy_size) &&
2d21ac55
A
7587 ((local_size + entry_size)
7588 >= copy_size)) {
1c79356b 7589 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
7590 copy_entry->vme_start +
7591 (copy_size - local_size));
1c79356b 7592 entry_size = copy_entry->vme_end -
2d21ac55 7593 copy_entry->vme_start;
1c79356b
A
7594 local_size += entry_size;
7595 new_offset += entry_size;
7596 }
7597 if(local_size >= copy_size) {
7598 next_copy = copy_entry->vme_next;
7599 copy_entry->vme_next =
7600 vm_map_copy_to_entry(copy);
7601 previous_prev =
7602 copy->cpy_hdr.links.prev;
7603 copy->cpy_hdr.links.prev = copy_entry;
7604 copy->size = copy_size;
7605 remaining_entries =
7606 copy->cpy_hdr.nentries;
7607 remaining_entries -= nentries;
7608 copy->cpy_hdr.nentries = nentries;
7609 break;
7610 } else {
7611 local_size += entry_size;
7612 new_offset += entry_size;
7613 nentries++;
7614 }
7615 copy_entry = copy_entry->vme_next;
7616 }
7617 }
7618
7619 if (aligned) {
7620 pmap_t local_pmap;
7621
7622 if(pmap)
7623 local_pmap = pmap;
7624 else
7625 local_pmap = dst_map->pmap;
7626
7627 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
7628 dst_map, tmp_entry, copy,
7629 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
7630 if(next_copy != NULL) {
7631 copy->cpy_hdr.nentries +=
2d21ac55 7632 remaining_entries;
1c79356b 7633 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7634 next_copy;
1c79356b 7635 copy->cpy_hdr.links.prev =
2d21ac55 7636 previous_prev;
1c79356b
A
7637 copy->size += copy_size;
7638 }
7639 return kr;
7640 }
7641 vm_map_unlock(dst_map);
7642 } else {
2d21ac55
A
7643 /*
7644 * Performance gain:
7645 *
7646 * if the copy and dst address are misaligned but the same
7647 * offset within the page we can copy_not_aligned the
7648 * misaligned parts and copy aligned the rest. If they are
7649 * aligned but len is unaligned we simply need to copy
7650 * the end bit unaligned. We'll need to split the misaligned
7651 * bits of the region in this case !
7652 */
7653 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
7654 kr = vm_map_copy_overwrite_unaligned(
7655 dst_map,
7656 tmp_entry,
7657 copy,
7658 base_addr,
7659 discard_on_success);
7660 if (kr != KERN_SUCCESS) {
1c79356b
A
7661 if(next_copy != NULL) {
7662 copy->cpy_hdr.nentries +=
2d21ac55 7663 remaining_entries;
1c79356b 7664 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 7665 next_copy;
1c79356b
A
7666 copy->cpy_hdr.links.prev =
7667 previous_prev;
7668 copy->size += copy_size;
7669 }
7670 return kr;
7671 }
7672 }
7673 total_size -= copy_size;
7674 if(total_size == 0)
7675 break;
7676 base_addr += copy_size;
7677 copy_size = 0;
7678 copy->offset = new_offset;
7679 if(next_copy != NULL) {
7680 copy->cpy_hdr.nentries = remaining_entries;
7681 copy->cpy_hdr.links.next = next_copy;
7682 copy->cpy_hdr.links.prev = previous_prev;
7683 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7684 copy->size = total_size;
7685 }
7686 vm_map_lock(dst_map);
7687 while(TRUE) {
7688 if (!vm_map_lookup_entry(dst_map,
2d21ac55 7689 base_addr, &tmp_entry)) {
1c79356b
A
7690 vm_map_unlock(dst_map);
7691 return(KERN_INVALID_ADDRESS);
7692 }
7693 if (tmp_entry->in_transition) {
7694 entry->needs_wakeup = TRUE;
7695 vm_map_entry_wait(dst_map, THREAD_UNINT);
7696 } else {
7697 break;
7698 }
7699 }
39236c6e
A
7700 vm_map_clip_start(dst_map,
7701 tmp_entry,
7702 vm_map_trunc_page(base_addr,
7703 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
7704
7705 entry = tmp_entry;
7706 } /* while */
7707
7708 /*
7709 * Throw away the vm_map_copy object
7710 */
6d2010ae
A
7711 if (discard_on_success)
7712 vm_map_copy_discard(copy);
1c79356b
A
7713
7714 return(KERN_SUCCESS);
7715}/* vm_map_copy_overwrite */
7716
7717kern_return_t
7718vm_map_copy_overwrite(
7719 vm_map_t dst_map,
91447636 7720 vm_map_offset_t dst_addr,
1c79356b
A
7721 vm_map_copy_t copy,
7722 boolean_t interruptible)
7723{
6d2010ae
A
7724 vm_map_size_t head_size, tail_size;
7725 vm_map_copy_t head_copy, tail_copy;
7726 vm_map_offset_t head_addr, tail_addr;
7727 vm_map_entry_t entry;
7728 kern_return_t kr;
7729
7730 head_size = 0;
7731 tail_size = 0;
7732 head_copy = NULL;
7733 tail_copy = NULL;
7734 head_addr = 0;
7735 tail_addr = 0;
7736
7737 if (interruptible ||
7738 copy == VM_MAP_COPY_NULL ||
7739 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7740 /*
7741 * We can't split the "copy" map if we're interruptible
7742 * or if we don't have a "copy" map...
7743 */
7744 blunt_copy:
7745 return vm_map_copy_overwrite_nested(dst_map,
7746 dst_addr,
7747 copy,
7748 interruptible,
7749 (pmap_t) NULL,
7750 TRUE);
7751 }
7752
7753 if (copy->size < 3 * PAGE_SIZE) {
7754 /*
7755 * Too small to bother with optimizing...
7756 */
7757 goto blunt_copy;
7758 }
7759
39236c6e
A
7760 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7761 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6d2010ae
A
7762 /*
7763 * Incompatible mis-alignment of source and destination...
7764 */
7765 goto blunt_copy;
7766 }
7767
7768 /*
7769 * Proper alignment or identical mis-alignment at the beginning.
7770 * Let's try and do a small unaligned copy first (if needed)
7771 * and then an aligned copy for the rest.
7772 */
7773 if (!page_aligned(dst_addr)) {
7774 head_addr = dst_addr;
39236c6e
A
7775 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7776 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6d2010ae
A
7777 }
7778 if (!page_aligned(copy->offset + copy->size)) {
7779 /*
7780 * Mis-alignment at the end.
7781 * Do an aligned copy up to the last page and
7782 * then an unaligned copy for the remaining bytes.
7783 */
39236c6e
A
7784 tail_size = ((copy->offset + copy->size) &
7785 VM_MAP_PAGE_MASK(dst_map));
6d2010ae
A
7786 tail_addr = dst_addr + copy->size - tail_size;
7787 }
7788
7789 if (head_size + tail_size == copy->size) {
7790 /*
7791 * It's all unaligned, no optimization possible...
7792 */
7793 goto blunt_copy;
7794 }
7795
7796 /*
7797 * Can't optimize if there are any submaps in the
7798 * destination due to the way we free the "copy" map
7799 * progressively in vm_map_copy_overwrite_nested()
7800 * in that case.
7801 */
7802 vm_map_lock_read(dst_map);
7803 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7804 vm_map_unlock_read(dst_map);
7805 goto blunt_copy;
7806 }
7807 for (;
7808 (entry != vm_map_copy_to_entry(copy) &&
7809 entry->vme_start < dst_addr + copy->size);
7810 entry = entry->vme_next) {
7811 if (entry->is_sub_map) {
7812 vm_map_unlock_read(dst_map);
7813 goto blunt_copy;
7814 }
7815 }
7816 vm_map_unlock_read(dst_map);
7817
7818 if (head_size) {
7819 /*
7820 * Unaligned copy of the first "head_size" bytes, to reach
7821 * a page boundary.
7822 */
7823
7824 /*
7825 * Extract "head_copy" out of "copy".
7826 */
7827 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7828 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7829 vm_map_copy_first_entry(head_copy) =
7830 vm_map_copy_to_entry(head_copy);
7831 vm_map_copy_last_entry(head_copy) =
7832 vm_map_copy_to_entry(head_copy);
7833 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7834 head_copy->cpy_hdr.nentries = 0;
7835 head_copy->cpy_hdr.entries_pageable =
7836 copy->cpy_hdr.entries_pageable;
7837 vm_map_store_init(&head_copy->cpy_hdr);
7838
7839 head_copy->offset = copy->offset;
7840 head_copy->size = head_size;
7841
7842 copy->offset += head_size;
7843 copy->size -= head_size;
7844
7845 entry = vm_map_copy_first_entry(copy);
7846 vm_map_copy_clip_end(copy, entry, copy->offset);
7847 vm_map_copy_entry_unlink(copy, entry);
7848 vm_map_copy_entry_link(head_copy,
7849 vm_map_copy_to_entry(head_copy),
7850 entry);
7851
7852 /*
7853 * Do the unaligned copy.
7854 */
7855 kr = vm_map_copy_overwrite_nested(dst_map,
7856 head_addr,
7857 head_copy,
7858 interruptible,
7859 (pmap_t) NULL,
7860 FALSE);
7861 if (kr != KERN_SUCCESS)
7862 goto done;
7863 }
7864
7865 if (tail_size) {
7866 /*
7867 * Extract "tail_copy" out of "copy".
7868 */
7869 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 7870 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6d2010ae
A
7871 vm_map_copy_first_entry(tail_copy) =
7872 vm_map_copy_to_entry(tail_copy);
7873 vm_map_copy_last_entry(tail_copy) =
7874 vm_map_copy_to_entry(tail_copy);
7875 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7876 tail_copy->cpy_hdr.nentries = 0;
7877 tail_copy->cpy_hdr.entries_pageable =
7878 copy->cpy_hdr.entries_pageable;
7879 vm_map_store_init(&tail_copy->cpy_hdr);
7880
7881 tail_copy->offset = copy->offset + copy->size - tail_size;
7882 tail_copy->size = tail_size;
7883
7884 copy->size -= tail_size;
7885
7886 entry = vm_map_copy_last_entry(copy);
7887 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7888 entry = vm_map_copy_last_entry(copy);
7889 vm_map_copy_entry_unlink(copy, entry);
7890 vm_map_copy_entry_link(tail_copy,
7891 vm_map_copy_last_entry(tail_copy),
7892 entry);
7893 }
7894
7895 /*
7896 * Copy most (or possibly all) of the data.
7897 */
7898 kr = vm_map_copy_overwrite_nested(dst_map,
7899 dst_addr + head_size,
7900 copy,
7901 interruptible,
7902 (pmap_t) NULL,
7903 FALSE);
7904 if (kr != KERN_SUCCESS) {
7905 goto done;
7906 }
7907
7908 if (tail_size) {
7909 kr = vm_map_copy_overwrite_nested(dst_map,
7910 tail_addr,
7911 tail_copy,
7912 interruptible,
7913 (pmap_t) NULL,
7914 FALSE);
7915 }
7916
7917done:
7918 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7919 if (kr == KERN_SUCCESS) {
7920 /*
7921 * Discard all the copy maps.
7922 */
7923 if (head_copy) {
7924 vm_map_copy_discard(head_copy);
7925 head_copy = NULL;
7926 }
7927 vm_map_copy_discard(copy);
7928 if (tail_copy) {
7929 vm_map_copy_discard(tail_copy);
7930 tail_copy = NULL;
7931 }
7932 } else {
7933 /*
7934 * Re-assemble the original copy map.
7935 */
7936 if (head_copy) {
7937 entry = vm_map_copy_first_entry(head_copy);
7938 vm_map_copy_entry_unlink(head_copy, entry);
7939 vm_map_copy_entry_link(copy,
7940 vm_map_copy_to_entry(copy),
7941 entry);
7942 copy->offset -= head_size;
7943 copy->size += head_size;
7944 vm_map_copy_discard(head_copy);
7945 head_copy = NULL;
7946 }
7947 if (tail_copy) {
7948 entry = vm_map_copy_last_entry(tail_copy);
7949 vm_map_copy_entry_unlink(tail_copy, entry);
7950 vm_map_copy_entry_link(copy,
7951 vm_map_copy_last_entry(copy),
7952 entry);
7953 copy->size += tail_size;
7954 vm_map_copy_discard(tail_copy);
7955 tail_copy = NULL;
7956 }
7957 }
7958 return kr;
1c79356b
A
7959}
7960
7961
7962/*
91447636 7963 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
7964 *
7965 * Decription:
7966 * Physically copy unaligned data
7967 *
7968 * Implementation:
7969 * Unaligned parts of pages have to be physically copied. We use
7970 * a modified form of vm_fault_copy (which understands none-aligned
7971 * page offsets and sizes) to do the copy. We attempt to copy as
7972 * much memory in one go as possibly, however vm_fault_copy copies
7973 * within 1 memory object so we have to find the smaller of "amount left"
7974 * "source object data size" and "target object data size". With
7975 * unaligned data we don't need to split regions, therefore the source
7976 * (copy) object should be one map entry, the target range may be split
7977 * over multiple map entries however. In any event we are pessimistic
7978 * about these assumptions.
7979 *
7980 * Assumptions:
7981 * dst_map is locked on entry and is return locked on success,
7982 * unlocked on error.
7983 */
7984
91447636 7985static kern_return_t
1c79356b
A
7986vm_map_copy_overwrite_unaligned(
7987 vm_map_t dst_map,
7988 vm_map_entry_t entry,
7989 vm_map_copy_t copy,
39236c6e
A
7990 vm_map_offset_t start,
7991 boolean_t discard_on_success)
1c79356b 7992{
39236c6e
A
7993 vm_map_entry_t copy_entry;
7994 vm_map_entry_t copy_entry_next;
1c79356b
A
7995 vm_map_version_t version;
7996 vm_object_t dst_object;
7997 vm_object_offset_t dst_offset;
7998 vm_object_offset_t src_offset;
7999 vm_object_offset_t entry_offset;
91447636
A
8000 vm_map_offset_t entry_end;
8001 vm_map_size_t src_size,
1c79356b
A
8002 dst_size,
8003 copy_size,
8004 amount_left;
8005 kern_return_t kr = KERN_SUCCESS;
8006
39236c6e
A
8007
8008 copy_entry = vm_map_copy_first_entry(copy);
8009
1c79356b
A
8010 vm_map_lock_write_to_read(dst_map);
8011
91447636 8012 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
8013 amount_left = copy->size;
8014/*
8015 * unaligned so we never clipped this entry, we need the offset into
8016 * the vm_object not just the data.
8017 */
8018 while (amount_left > 0) {
8019
8020 if (entry == vm_map_to_entry(dst_map)) {
8021 vm_map_unlock_read(dst_map);
8022 return KERN_INVALID_ADDRESS;
8023 }
8024
8025 /* "start" must be within the current map entry */
8026 assert ((start>=entry->vme_start) && (start<entry->vme_end));
8027
8028 dst_offset = start - entry->vme_start;
8029
8030 dst_size = entry->vme_end - start;
8031
8032 src_size = copy_entry->vme_end -
8033 (copy_entry->vme_start + src_offset);
8034
8035 if (dst_size < src_size) {
8036/*
8037 * we can only copy dst_size bytes before
8038 * we have to get the next destination entry
8039 */
8040 copy_size = dst_size;
8041 } else {
8042/*
8043 * we can only copy src_size bytes before
8044 * we have to get the next source copy entry
8045 */
8046 copy_size = src_size;
8047 }
8048
8049 if (copy_size > amount_left) {
8050 copy_size = amount_left;
8051 }
8052/*
8053 * Entry needs copy, create a shadow shadow object for
8054 * Copy on write region.
8055 */
8056 if (entry->needs_copy &&
2d21ac55 8057 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
8058 {
8059 if (vm_map_lock_read_to_write(dst_map)) {
8060 vm_map_lock_read(dst_map);
8061 goto RetryLookup;
8062 }
3e170ce0
A
8063 VME_OBJECT_SHADOW(entry,
8064 (vm_map_size_t)(entry->vme_end
8065 - entry->vme_start));
1c79356b
A
8066 entry->needs_copy = FALSE;
8067 vm_map_lock_write_to_read(dst_map);
8068 }
3e170ce0 8069 dst_object = VME_OBJECT(entry);
1c79356b
A
8070/*
8071 * unlike with the virtual (aligned) copy we're going
8072 * to fault on it therefore we need a target object.
8073 */
8074 if (dst_object == VM_OBJECT_NULL) {
8075 if (vm_map_lock_read_to_write(dst_map)) {
8076 vm_map_lock_read(dst_map);
8077 goto RetryLookup;
8078 }
91447636 8079 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 8080 entry->vme_end - entry->vme_start);
3e170ce0
A
8081 VME_OBJECT(entry) = dst_object;
8082 VME_OFFSET_SET(entry, 0);
fe8ab488 8083 assert(entry->use_pmap);
1c79356b
A
8084 vm_map_lock_write_to_read(dst_map);
8085 }
8086/*
8087 * Take an object reference and unlock map. The "entry" may
8088 * disappear or change when the map is unlocked.
8089 */
8090 vm_object_reference(dst_object);
8091 version.main_timestamp = dst_map->timestamp;
3e170ce0 8092 entry_offset = VME_OFFSET(entry);
1c79356b
A
8093 entry_end = entry->vme_end;
8094 vm_map_unlock_read(dst_map);
8095/*
8096 * Copy as much as possible in one pass
8097 */
8098 kr = vm_fault_copy(
3e170ce0
A
8099 VME_OBJECT(copy_entry),
8100 VME_OFFSET(copy_entry) + src_offset,
1c79356b
A
8101 &copy_size,
8102 dst_object,
8103 entry_offset + dst_offset,
8104 dst_map,
8105 &version,
8106 THREAD_UNINT );
8107
8108 start += copy_size;
8109 src_offset += copy_size;
8110 amount_left -= copy_size;
8111/*
8112 * Release the object reference
8113 */
8114 vm_object_deallocate(dst_object);
8115/*
8116 * If a hard error occurred, return it now
8117 */
8118 if (kr != KERN_SUCCESS)
8119 return kr;
8120
8121 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 8122 || amount_left == 0)
1c79356b
A
8123 {
8124/*
8125 * all done with this copy entry, dispose.
8126 */
39236c6e
A
8127 copy_entry_next = copy_entry->vme_next;
8128
8129 if (discard_on_success) {
8130 vm_map_copy_entry_unlink(copy, copy_entry);
8131 assert(!copy_entry->is_sub_map);
3e170ce0 8132 vm_object_deallocate(VME_OBJECT(copy_entry));
39236c6e
A
8133 vm_map_copy_entry_dispose(copy, copy_entry);
8134 }
1c79356b 8135
39236c6e
A
8136 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
8137 amount_left) {
1c79356b
A
8138/*
8139 * not finished copying but run out of source
8140 */
8141 return KERN_INVALID_ADDRESS;
8142 }
39236c6e
A
8143
8144 copy_entry = copy_entry_next;
8145
1c79356b
A
8146 src_offset = 0;
8147 }
8148
8149 if (amount_left == 0)
8150 return KERN_SUCCESS;
8151
8152 vm_map_lock_read(dst_map);
8153 if (version.main_timestamp == dst_map->timestamp) {
8154 if (start == entry_end) {
8155/*
8156 * destination region is split. Use the version
8157 * information to avoid a lookup in the normal
8158 * case.
8159 */
8160 entry = entry->vme_next;
8161/*
8162 * should be contiguous. Fail if we encounter
8163 * a hole in the destination.
8164 */
8165 if (start != entry->vme_start) {
8166 vm_map_unlock_read(dst_map);
8167 return KERN_INVALID_ADDRESS ;
8168 }
8169 }
8170 } else {
8171/*
8172 * Map version check failed.
8173 * we must lookup the entry because somebody
8174 * might have changed the map behind our backs.
8175 */
2d21ac55 8176 RetryLookup:
1c79356b
A
8177 if (!vm_map_lookup_entry(dst_map, start, &entry))
8178 {
8179 vm_map_unlock_read(dst_map);
8180 return KERN_INVALID_ADDRESS ;
8181 }
8182 }
8183 }/* while */
8184
1c79356b
A
8185 return KERN_SUCCESS;
8186}/* vm_map_copy_overwrite_unaligned */
8187
8188/*
91447636 8189 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
8190 *
8191 * Description:
8192 * Does all the vm_trickery possible for whole pages.
8193 *
8194 * Implementation:
8195 *
8196 * If there are no permanent objects in the destination,
8197 * and the source and destination map entry zones match,
8198 * and the destination map entry is not shared,
8199 * then the map entries can be deleted and replaced
8200 * with those from the copy. The following code is the
8201 * basic idea of what to do, but there are lots of annoying
8202 * little details about getting protection and inheritance
8203 * right. Should add protection, inheritance, and sharing checks
8204 * to the above pass and make sure that no wiring is involved.
8205 */
8206
e2d2fc5c
A
8207int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8208int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8209int vm_map_copy_overwrite_aligned_src_large = 0;
8210
91447636 8211static kern_return_t
1c79356b
A
8212vm_map_copy_overwrite_aligned(
8213 vm_map_t dst_map,
8214 vm_map_entry_t tmp_entry,
8215 vm_map_copy_t copy,
91447636 8216 vm_map_offset_t start,
2d21ac55 8217 __unused pmap_t pmap)
1c79356b
A
8218{
8219 vm_object_t object;
8220 vm_map_entry_t copy_entry;
91447636
A
8221 vm_map_size_t copy_size;
8222 vm_map_size_t size;
1c79356b
A
8223 vm_map_entry_t entry;
8224
8225 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 8226 != vm_map_copy_to_entry(copy))
1c79356b
A
8227 {
8228 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8229
8230 entry = tmp_entry;
fe8ab488
A
8231 if (entry->is_sub_map) {
8232 /* unnested when clipped earlier */
8233 assert(!entry->use_pmap);
8234 }
1c79356b
A
8235 if (entry == vm_map_to_entry(dst_map)) {
8236 vm_map_unlock(dst_map);
8237 return KERN_INVALID_ADDRESS;
8238 }
8239 size = (entry->vme_end - entry->vme_start);
8240 /*
8241 * Make sure that no holes popped up in the
8242 * address map, and that the protection is
8243 * still valid, in case the map was unlocked
8244 * earlier.
8245 */
8246
8247 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 8248 && !entry->needs_copy)) {
1c79356b
A
8249 vm_map_unlock(dst_map);
8250 return(KERN_INVALID_ADDRESS);
8251 }
8252 assert(entry != vm_map_to_entry(dst_map));
8253
8254 /*
8255 * Check protection again
8256 */
8257
8258 if ( ! (entry->protection & VM_PROT_WRITE)) {
8259 vm_map_unlock(dst_map);
8260 return(KERN_PROTECTION_FAILURE);
8261 }
8262
8263 /*
8264 * Adjust to source size first
8265 */
8266
8267 if (copy_size < size) {
fe8ab488
A
8268 if (entry->map_aligned &&
8269 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8270 VM_MAP_PAGE_MASK(dst_map))) {
8271 /* no longer map-aligned */
8272 entry->map_aligned = FALSE;
8273 }
1c79356b
A
8274 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8275 size = copy_size;
8276 }
8277
8278 /*
8279 * Adjust to destination size
8280 */
8281
8282 if (size < copy_size) {
8283 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8284 copy_entry->vme_start + size);
1c79356b
A
8285 copy_size = size;
8286 }
8287
8288 assert((entry->vme_end - entry->vme_start) == size);
8289 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8290 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8291
8292 /*
8293 * If the destination contains temporary unshared memory,
8294 * we can perform the copy by throwing it away and
8295 * installing the source data.
8296 */
8297
3e170ce0 8298 object = VME_OBJECT(entry);
1c79356b 8299 if ((!entry->is_shared &&
2d21ac55
A
8300 ((object == VM_OBJECT_NULL) ||
8301 (object->internal && !object->true_share))) ||
1c79356b 8302 entry->needs_copy) {
3e170ce0
A
8303 vm_object_t old_object = VME_OBJECT(entry);
8304 vm_object_offset_t old_offset = VME_OFFSET(entry);
1c79356b
A
8305 vm_object_offset_t offset;
8306
8307 /*
8308 * Ensure that the source and destination aren't
8309 * identical
8310 */
3e170ce0
A
8311 if (old_object == VME_OBJECT(copy_entry) &&
8312 old_offset == VME_OFFSET(copy_entry)) {
1c79356b
A
8313 vm_map_copy_entry_unlink(copy, copy_entry);
8314 vm_map_copy_entry_dispose(copy, copy_entry);
8315
8316 if (old_object != VM_OBJECT_NULL)
8317 vm_object_deallocate(old_object);
8318
8319 start = tmp_entry->vme_end;
8320 tmp_entry = tmp_entry->vme_next;
8321 continue;
8322 }
8323
e2d2fc5c
A
8324#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8325#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
3e170ce0
A
8326 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8327 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
e2d2fc5c
A
8328 copy_size <= __TRADEOFF1_COPY_SIZE) {
8329 /*
8330 * Virtual vs. Physical copy tradeoff #1.
8331 *
8332 * Copying only a few pages out of a large
8333 * object: do a physical copy instead of
8334 * a virtual copy, to avoid possibly keeping
8335 * the entire large object alive because of
8336 * those few copy-on-write pages.
8337 */
8338 vm_map_copy_overwrite_aligned_src_large++;
8339 goto slow_copy;
8340 }
e2d2fc5c 8341
3e170ce0
A
8342 if ((dst_map->pmap != kernel_pmap) &&
8343 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8344 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
ebb1b9f4
A
8345 vm_object_t new_object, new_shadow;
8346
8347 /*
8348 * We're about to map something over a mapping
8349 * established by malloc()...
8350 */
3e170ce0 8351 new_object = VME_OBJECT(copy_entry);
ebb1b9f4
A
8352 if (new_object != VM_OBJECT_NULL) {
8353 vm_object_lock_shared(new_object);
8354 }
8355 while (new_object != VM_OBJECT_NULL &&
e2d2fc5c
A
8356 !new_object->true_share &&
8357 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
ebb1b9f4
A
8358 new_object->internal) {
8359 new_shadow = new_object->shadow;
8360 if (new_shadow == VM_OBJECT_NULL) {
8361 break;
8362 }
8363 vm_object_lock_shared(new_shadow);
8364 vm_object_unlock(new_object);
8365 new_object = new_shadow;
8366 }
8367 if (new_object != VM_OBJECT_NULL) {
8368 if (!new_object->internal) {
8369 /*
8370 * The new mapping is backed
8371 * by an external object. We
8372 * don't want malloc'ed memory
8373 * to be replaced with such a
8374 * non-anonymous mapping, so
8375 * let's go off the optimized
8376 * path...
8377 */
e2d2fc5c 8378 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
8379 vm_object_unlock(new_object);
8380 goto slow_copy;
8381 }
e2d2fc5c
A
8382 if (new_object->true_share ||
8383 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8384 /*
8385 * Same if there's a "true_share"
8386 * object in the shadow chain, or
8387 * an object with a non-default
8388 * (SYMMETRIC) copy strategy.
8389 */
8390 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8391 vm_object_unlock(new_object);
8392 goto slow_copy;
8393 }
ebb1b9f4
A
8394 vm_object_unlock(new_object);
8395 }
8396 /*
8397 * The new mapping is still backed by
8398 * anonymous (internal) memory, so it's
8399 * OK to substitute it for the original
8400 * malloc() mapping.
8401 */
8402 }
8403
1c79356b
A
8404 if (old_object != VM_OBJECT_NULL) {
8405 if(entry->is_sub_map) {
9bccf70c 8406 if(entry->use_pmap) {
0c530ab8 8407#ifndef NO_NESTED_PMAP
9bccf70c 8408 pmap_unnest(dst_map->pmap,
2d21ac55
A
8409 (addr64_t)entry->vme_start,
8410 entry->vme_end - entry->vme_start);
0c530ab8 8411#endif /* NO_NESTED_PMAP */
316670eb 8412 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
8413 /* clean up parent */
8414 /* map/maps */
2d21ac55
A
8415 vm_map_submap_pmap_clean(
8416 dst_map, entry->vme_start,
8417 entry->vme_end,
3e170ce0
A
8418 VME_SUBMAP(entry),
8419 VME_OFFSET(entry));
9bccf70c
A
8420 }
8421 } else {
8422 vm_map_submap_pmap_clean(
8423 dst_map, entry->vme_start,
8424 entry->vme_end,
3e170ce0
A
8425 VME_SUBMAP(entry),
8426 VME_OFFSET(entry));
9bccf70c 8427 }
3e170ce0 8428 vm_map_deallocate(VME_SUBMAP(entry));
9bccf70c 8429 } else {
316670eb 8430 if(dst_map->mapped_in_other_pmaps) {
39236c6e 8431 vm_object_pmap_protect_options(
3e170ce0
A
8432 VME_OBJECT(entry),
8433 VME_OFFSET(entry),
9bccf70c 8434 entry->vme_end
2d21ac55 8435 - entry->vme_start,
9bccf70c
A
8436 PMAP_NULL,
8437 entry->vme_start,
39236c6e
A
8438 VM_PROT_NONE,
8439 PMAP_OPTIONS_REMOVE);
9bccf70c 8440 } else {
39236c6e
A
8441 pmap_remove_options(
8442 dst_map->pmap,
8443 (addr64_t)(entry->vme_start),
8444 (addr64_t)(entry->vme_end),
8445 PMAP_OPTIONS_REMOVE);
9bccf70c 8446 }
1c79356b 8447 vm_object_deallocate(old_object);
9bccf70c 8448 }
1c79356b
A
8449 }
8450
8451 entry->is_sub_map = FALSE;
3e170ce0
A
8452 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8453 object = VME_OBJECT(entry);
1c79356b
A
8454 entry->needs_copy = copy_entry->needs_copy;
8455 entry->wired_count = 0;
8456 entry->user_wired_count = 0;
3e170ce0
A
8457 offset = VME_OFFSET(copy_entry);
8458 VME_OFFSET_SET(entry, offset);
1c79356b
A
8459
8460 vm_map_copy_entry_unlink(copy, copy_entry);
8461 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 8462
1c79356b 8463 /*
2d21ac55 8464 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
8465 * this optimization only saved on average 2 us per page if ALL
8466 * the pages in the source were currently mapped
8467 * and ALL the pages in the dest were touched, if there were fewer
8468 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 8469 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
8470 */
8471
1c79356b
A
8472 /*
8473 * Set up for the next iteration. The map
8474 * has not been unlocked, so the next
8475 * address should be at the end of this
8476 * entry, and the next map entry should be
8477 * the one following it.
8478 */
8479
8480 start = tmp_entry->vme_end;
8481 tmp_entry = tmp_entry->vme_next;
8482 } else {
8483 vm_map_version_t version;
ebb1b9f4
A
8484 vm_object_t dst_object;
8485 vm_object_offset_t dst_offset;
1c79356b
A
8486 kern_return_t r;
8487
ebb1b9f4 8488 slow_copy:
e2d2fc5c 8489 if (entry->needs_copy) {
3e170ce0
A
8490 VME_OBJECT_SHADOW(entry,
8491 (entry->vme_end -
8492 entry->vme_start));
e2d2fc5c
A
8493 entry->needs_copy = FALSE;
8494 }
8495
3e170ce0
A
8496 dst_object = VME_OBJECT(entry);
8497 dst_offset = VME_OFFSET(entry);
ebb1b9f4 8498
1c79356b
A
8499 /*
8500 * Take an object reference, and record
8501 * the map version information so that the
8502 * map can be safely unlocked.
8503 */
8504
ebb1b9f4
A
8505 if (dst_object == VM_OBJECT_NULL) {
8506 /*
8507 * We would usually have just taken the
8508 * optimized path above if the destination
8509 * object has not been allocated yet. But we
8510 * now disable that optimization if the copy
8511 * entry's object is not backed by anonymous
8512 * memory to avoid replacing malloc'ed
8513 * (i.e. re-usable) anonymous memory with a
8514 * not-so-anonymous mapping.
8515 * So we have to handle this case here and
8516 * allocate a new VM object for this map entry.
8517 */
8518 dst_object = vm_object_allocate(
8519 entry->vme_end - entry->vme_start);
8520 dst_offset = 0;
3e170ce0
A
8521 VME_OBJECT_SET(entry, dst_object);
8522 VME_OFFSET_SET(entry, dst_offset);
fe8ab488 8523 assert(entry->use_pmap);
ebb1b9f4
A
8524
8525 }
8526
1c79356b
A
8527 vm_object_reference(dst_object);
8528
9bccf70c
A
8529 /* account for unlock bumping up timestamp */
8530 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
8531
8532 vm_map_unlock(dst_map);
8533
8534 /*
8535 * Copy as much as possible in one pass
8536 */
8537
8538 copy_size = size;
8539 r = vm_fault_copy(
3e170ce0
A
8540 VME_OBJECT(copy_entry),
8541 VME_OFFSET(copy_entry),
2d21ac55
A
8542 &copy_size,
8543 dst_object,
8544 dst_offset,
8545 dst_map,
8546 &version,
8547 THREAD_UNINT );
1c79356b
A
8548
8549 /*
8550 * Release the object reference
8551 */
8552
8553 vm_object_deallocate(dst_object);
8554
8555 /*
8556 * If a hard error occurred, return it now
8557 */
8558
8559 if (r != KERN_SUCCESS)
8560 return(r);
8561
8562 if (copy_size != 0) {
8563 /*
8564 * Dispose of the copied region
8565 */
8566
8567 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 8568 copy_entry->vme_start + copy_size);
1c79356b 8569 vm_map_copy_entry_unlink(copy, copy_entry);
3e170ce0 8570 vm_object_deallocate(VME_OBJECT(copy_entry));
1c79356b
A
8571 vm_map_copy_entry_dispose(copy, copy_entry);
8572 }
8573
8574 /*
8575 * Pick up in the destination map where we left off.
8576 *
8577 * Use the version information to avoid a lookup
8578 * in the normal case.
8579 */
8580
8581 start += copy_size;
8582 vm_map_lock(dst_map);
e2d2fc5c
A
8583 if (version.main_timestamp == dst_map->timestamp &&
8584 copy_size != 0) {
1c79356b
A
8585 /* We can safely use saved tmp_entry value */
8586
fe8ab488
A
8587 if (tmp_entry->map_aligned &&
8588 !VM_MAP_PAGE_ALIGNED(
8589 start,
8590 VM_MAP_PAGE_MASK(dst_map))) {
8591 /* no longer map-aligned */
8592 tmp_entry->map_aligned = FALSE;
8593 }
1c79356b
A
8594 vm_map_clip_end(dst_map, tmp_entry, start);
8595 tmp_entry = tmp_entry->vme_next;
8596 } else {
8597 /* Must do lookup of tmp_entry */
8598
8599 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8600 vm_map_unlock(dst_map);
8601 return(KERN_INVALID_ADDRESS);
8602 }
fe8ab488
A
8603 if (tmp_entry->map_aligned &&
8604 !VM_MAP_PAGE_ALIGNED(
8605 start,
8606 VM_MAP_PAGE_MASK(dst_map))) {
8607 /* no longer map-aligned */
8608 tmp_entry->map_aligned = FALSE;
8609 }
1c79356b
A
8610 vm_map_clip_start(dst_map, tmp_entry, start);
8611 }
8612 }
8613 }/* while */
8614
8615 return(KERN_SUCCESS);
8616}/* vm_map_copy_overwrite_aligned */
8617
8618/*
91447636 8619 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
8620 *
8621 * Description:
8622 * Copy in data to a kernel buffer from space in the
91447636 8623 * source map. The original space may be optionally
1c79356b
A
8624 * deallocated.
8625 *
8626 * If successful, returns a new copy object.
8627 */
91447636 8628static kern_return_t
1c79356b
A
8629vm_map_copyin_kernel_buffer(
8630 vm_map_t src_map,
91447636
A
8631 vm_map_offset_t src_addr,
8632 vm_map_size_t len,
1c79356b
A
8633 boolean_t src_destroy,
8634 vm_map_copy_t *copy_result)
8635{
91447636 8636 kern_return_t kr;
1c79356b 8637 vm_map_copy_t copy;
b0d623f7
A
8638 vm_size_t kalloc_size;
8639
3e170ce0
A
8640 if (len > msg_ool_size_small)
8641 return KERN_INVALID_ARGUMENT;
1c79356b 8642
3e170ce0
A
8643 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8644
8645 copy = (vm_map_copy_t)kalloc(kalloc_size);
8646 if (copy == VM_MAP_COPY_NULL)
1c79356b 8647 return KERN_RESOURCE_SHORTAGE;
1c79356b
A
8648 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8649 copy->size = len;
8650 copy->offset = 0;
1c79356b 8651
3e170ce0 8652 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
91447636
A
8653 if (kr != KERN_SUCCESS) {
8654 kfree(copy, kalloc_size);
8655 return kr;
1c79356b
A
8656 }
8657 if (src_destroy) {
39236c6e
A
8658 (void) vm_map_remove(
8659 src_map,
8660 vm_map_trunc_page(src_addr,
8661 VM_MAP_PAGE_MASK(src_map)),
8662 vm_map_round_page(src_addr + len,
8663 VM_MAP_PAGE_MASK(src_map)),
8664 (VM_MAP_REMOVE_INTERRUPTIBLE |
8665 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
39037602 8666 ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
1c79356b
A
8667 }
8668 *copy_result = copy;
8669 return KERN_SUCCESS;
8670}
8671
8672/*
91447636 8673 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
8674 *
8675 * Description:
8676 * Copy out data from a kernel buffer into space in the
8677 * destination map. The space may be otpionally dynamically
8678 * allocated.
8679 *
8680 * If successful, consumes the copy object.
8681 * Otherwise, the caller is responsible for it.
8682 */
91447636
A
8683static int vm_map_copyout_kernel_buffer_failures = 0;
8684static kern_return_t
1c79356b 8685vm_map_copyout_kernel_buffer(
91447636
A
8686 vm_map_t map,
8687 vm_map_address_t *addr, /* IN/OUT */
8688 vm_map_copy_t copy,
39037602 8689 vm_map_size_t copy_size,
39236c6e
A
8690 boolean_t overwrite,
8691 boolean_t consume_on_success)
1c79356b
A
8692{
8693 kern_return_t kr = KERN_SUCCESS;
91447636 8694 thread_t thread = current_thread();
1c79356b 8695
39037602
A
8696 assert(copy->size == copy_size);
8697
3e170ce0
A
8698 /*
8699 * check for corrupted vm_map_copy structure
8700 */
39037602 8701 if (copy_size > msg_ool_size_small || copy->offset)
3e170ce0
A
8702 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8703 (long long)copy->size, (long long)copy->offset);
8704
1c79356b
A
8705 if (!overwrite) {
8706
8707 /*
8708 * Allocate space in the target map for the data
8709 */
8710 *addr = 0;
8711 kr = vm_map_enter(map,
8712 addr,
39037602 8713 vm_map_round_page(copy_size,
39236c6e 8714 VM_MAP_PAGE_MASK(map)),
91447636
A
8715 (vm_map_offset_t) 0,
8716 VM_FLAGS_ANYWHERE,
1c79356b
A
8717 VM_OBJECT_NULL,
8718 (vm_object_offset_t) 0,
8719 FALSE,
8720 VM_PROT_DEFAULT,
8721 VM_PROT_ALL,
8722 VM_INHERIT_DEFAULT);
8723 if (kr != KERN_SUCCESS)
91447636 8724 return kr;
1c79356b
A
8725 }
8726
8727 /*
8728 * Copyout the data from the kernel buffer to the target map.
8729 */
91447636 8730 if (thread->map == map) {
1c79356b
A
8731
8732 /*
8733 * If the target map is the current map, just do
8734 * the copy.
8735 */
39037602
A
8736 assert((vm_size_t)copy_size == copy_size);
8737 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636 8738 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8739 }
8740 }
8741 else {
8742 vm_map_t oldmap;
8743
8744 /*
8745 * If the target map is another map, assume the
8746 * target's address space identity for the duration
8747 * of the copy.
8748 */
8749 vm_map_reference(map);
8750 oldmap = vm_map_switch(map);
8751
39037602
A
8752 assert((vm_size_t)copy_size == copy_size);
8753 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
91447636
A
8754 vm_map_copyout_kernel_buffer_failures++;
8755 kr = KERN_INVALID_ADDRESS;
1c79356b
A
8756 }
8757
8758 (void) vm_map_switch(oldmap);
8759 vm_map_deallocate(map);
8760 }
8761
91447636
A
8762 if (kr != KERN_SUCCESS) {
8763 /* the copy failed, clean up */
8764 if (!overwrite) {
8765 /*
8766 * Deallocate the space we allocated in the target map.
8767 */
39236c6e
A
8768 (void) vm_map_remove(
8769 map,
8770 vm_map_trunc_page(*addr,
8771 VM_MAP_PAGE_MASK(map)),
8772 vm_map_round_page((*addr +
39037602 8773 vm_map_round_page(copy_size,
39236c6e
A
8774 VM_MAP_PAGE_MASK(map))),
8775 VM_MAP_PAGE_MASK(map)),
8776 VM_MAP_NO_FLAGS);
91447636
A
8777 *addr = 0;
8778 }
8779 } else {
8780 /* copy was successful, dicard the copy structure */
39236c6e 8781 if (consume_on_success) {
39037602 8782 kfree(copy, copy_size + cpy_kdata_hdr_sz);
39236c6e 8783 }
91447636 8784 }
1c79356b 8785
91447636 8786 return kr;
1c79356b
A
8787}
8788
8789/*
8790 * Macro: vm_map_copy_insert
8791 *
8792 * Description:
8793 * Link a copy chain ("copy") into a map at the
8794 * specified location (after "where").
8795 * Side effects:
8796 * The copy chain is destroyed.
8797 * Warning:
8798 * The arguments are evaluated multiple times.
8799 */
8800#define vm_map_copy_insert(map, where, copy) \
8801MACRO_BEGIN \
6d2010ae
A
8802 vm_map_store_copy_insert(map, where, copy); \
8803 zfree(vm_map_copy_zone, copy); \
1c79356b
A
8804MACRO_END
8805
39236c6e
A
8806void
8807vm_map_copy_remap(
8808 vm_map_t map,
8809 vm_map_entry_t where,
8810 vm_map_copy_t copy,
8811 vm_map_offset_t adjustment,
8812 vm_prot_t cur_prot,
8813 vm_prot_t max_prot,
8814 vm_inherit_t inheritance)
8815{
8816 vm_map_entry_t copy_entry, new_entry;
8817
8818 for (copy_entry = vm_map_copy_first_entry(copy);
8819 copy_entry != vm_map_copy_to_entry(copy);
8820 copy_entry = copy_entry->vme_next) {
8821 /* get a new VM map entry for the map */
8822 new_entry = vm_map_entry_create(map,
8823 !map->hdr.entries_pageable);
8824 /* copy the "copy entry" to the new entry */
8825 vm_map_entry_copy(new_entry, copy_entry);
8826 /* adjust "start" and "end" */
8827 new_entry->vme_start += adjustment;
8828 new_entry->vme_end += adjustment;
8829 /* clear some attributes */
8830 new_entry->inheritance = inheritance;
8831 new_entry->protection = cur_prot;
8832 new_entry->max_protection = max_prot;
8833 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8834 /* take an extra reference on the entry's "object" */
8835 if (new_entry->is_sub_map) {
fe8ab488 8836 assert(!new_entry->use_pmap); /* not nested */
3e170ce0
A
8837 vm_map_lock(VME_SUBMAP(new_entry));
8838 vm_map_reference(VME_SUBMAP(new_entry));
8839 vm_map_unlock(VME_SUBMAP(new_entry));
39236c6e 8840 } else {
3e170ce0 8841 vm_object_reference(VME_OBJECT(new_entry));
39236c6e
A
8842 }
8843 /* insert the new entry in the map */
8844 vm_map_store_entry_link(map, where, new_entry);
8845 /* continue inserting the "copy entries" after the new entry */
8846 where = new_entry;
8847 }
8848}
8849
2dced7af 8850
39037602
A
8851/*
8852 * Returns true if *size matches (or is in the range of) copy->size.
8853 * Upon returning true, the *size field is updated with the actual size of the
8854 * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
8855 */
2dced7af
A
8856boolean_t
8857vm_map_copy_validate_size(
8858 vm_map_t dst_map,
8859 vm_map_copy_t copy,
39037602 8860 vm_map_size_t *size)
2dced7af
A
8861{
8862 if (copy == VM_MAP_COPY_NULL)
8863 return FALSE;
39037602
A
8864 vm_map_size_t copy_sz = copy->size;
8865 vm_map_size_t sz = *size;
2dced7af
A
8866 switch (copy->type) {
8867 case VM_MAP_COPY_OBJECT:
8868 case VM_MAP_COPY_KERNEL_BUFFER:
39037602 8869 if (sz == copy_sz)
2dced7af
A
8870 return TRUE;
8871 break;
8872 case VM_MAP_COPY_ENTRY_LIST:
8873 /*
8874 * potential page-size rounding prevents us from exactly
8875 * validating this flavor of vm_map_copy, but we can at least
8876 * assert that it's within a range.
8877 */
39037602
A
8878 if (copy_sz >= sz &&
8879 copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
8880 *size = copy_sz;
2dced7af 8881 return TRUE;
39037602 8882 }
2dced7af
A
8883 break;
8884 default:
8885 break;
8886 }
8887 return FALSE;
8888}
8889
39037602
A
8890/*
8891 * Routine: vm_map_copyout_size
8892 *
8893 * Description:
8894 * Copy out a copy chain ("copy") into newly-allocated
8895 * space in the destination map. Uses a prevalidated
8896 * size for the copy object (vm_map_copy_validate_size).
8897 *
8898 * If successful, consumes the copy object.
8899 * Otherwise, the caller is responsible for it.
8900 */
8901kern_return_t
8902vm_map_copyout_size(
8903 vm_map_t dst_map,
8904 vm_map_address_t *dst_addr, /* OUT */
8905 vm_map_copy_t copy,
8906 vm_map_size_t copy_size)
8907{
8908 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
8909 TRUE, /* consume_on_success */
8910 VM_PROT_DEFAULT,
8911 VM_PROT_ALL,
8912 VM_INHERIT_DEFAULT);
8913}
2dced7af 8914
1c79356b
A
8915/*
8916 * Routine: vm_map_copyout
8917 *
8918 * Description:
8919 * Copy out a copy chain ("copy") into newly-allocated
8920 * space in the destination map.
8921 *
8922 * If successful, consumes the copy object.
8923 * Otherwise, the caller is responsible for it.
8924 */
8925kern_return_t
8926vm_map_copyout(
91447636
A
8927 vm_map_t dst_map,
8928 vm_map_address_t *dst_addr, /* OUT */
8929 vm_map_copy_t copy)
39236c6e 8930{
39037602
A
8931 return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
8932 TRUE, /* consume_on_success */
8933 VM_PROT_DEFAULT,
8934 VM_PROT_ALL,
8935 VM_INHERIT_DEFAULT);
39236c6e
A
8936}
8937
8938kern_return_t
8939vm_map_copyout_internal(
8940 vm_map_t dst_map,
8941 vm_map_address_t *dst_addr, /* OUT */
8942 vm_map_copy_t copy,
39037602 8943 vm_map_size_t copy_size,
39236c6e
A
8944 boolean_t consume_on_success,
8945 vm_prot_t cur_protection,
8946 vm_prot_t max_protection,
8947 vm_inherit_t inheritance)
1c79356b 8948{
91447636
A
8949 vm_map_size_t size;
8950 vm_map_size_t adjustment;
8951 vm_map_offset_t start;
1c79356b
A
8952 vm_object_offset_t vm_copy_start;
8953 vm_map_entry_t last;
1c79356b 8954 vm_map_entry_t entry;
3e170ce0 8955 vm_map_entry_t hole_entry;
1c79356b
A
8956
8957 /*
8958 * Check for null copy object.
8959 */
8960
8961 if (copy == VM_MAP_COPY_NULL) {
8962 *dst_addr = 0;
8963 return(KERN_SUCCESS);
8964 }
8965
39037602
A
8966 if (copy->size != copy_size) {
8967 *dst_addr = 0;
8968 return KERN_FAILURE;
8969 }
8970
1c79356b
A
8971 /*
8972 * Check for special copy object, created
8973 * by vm_map_copyin_object.
8974 */
8975
8976 if (copy->type == VM_MAP_COPY_OBJECT) {
8977 vm_object_t object = copy->cpy_object;
8978 kern_return_t kr;
8979 vm_object_offset_t offset;
8980
91447636 8981 offset = vm_object_trunc_page(copy->offset);
39037602 8982 size = vm_map_round_page((copy_size +
39236c6e
A
8983 (vm_map_size_t)(copy->offset -
8984 offset)),
8985 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
8986 *dst_addr = 0;
8987 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 8988 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
8989 object, offset, FALSE,
8990 VM_PROT_DEFAULT, VM_PROT_ALL,
8991 VM_INHERIT_DEFAULT);
8992 if (kr != KERN_SUCCESS)
8993 return(kr);
8994 /* Account for non-pagealigned copy object */
91447636 8995 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
8996 if (consume_on_success)
8997 zfree(vm_map_copy_zone, copy);
1c79356b
A
8998 return(KERN_SUCCESS);
8999 }
9000
9001 /*
9002 * Check for special kernel buffer allocated
9003 * by new_ipc_kmsg_copyin.
9004 */
9005
9006 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39037602
A
9007 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
9008 copy, copy_size, FALSE,
39236c6e 9009 consume_on_success);
1c79356b
A
9010 }
9011
39236c6e 9012
1c79356b
A
9013 /*
9014 * Find space for the data
9015 */
9016
39236c6e
A
9017 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
9018 VM_MAP_COPY_PAGE_MASK(copy));
39037602 9019 size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
39236c6e 9020 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 9021 - vm_copy_start;
1c79356b 9022
39236c6e 9023
2d21ac55 9024StartAgain: ;
1c79356b
A
9025
9026 vm_map_lock(dst_map);
6d2010ae
A
9027 if( dst_map->disable_vmentry_reuse == TRUE) {
9028 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
9029 last = entry;
9030 } else {
3e170ce0
A
9031 if (dst_map->holelistenabled) {
9032 hole_entry = (vm_map_entry_t)dst_map->holes_list;
9033
9034 if (hole_entry == NULL) {
9035 /*
9036 * No more space in the map?
9037 */
9038 vm_map_unlock(dst_map);
9039 return(KERN_NO_SPACE);
9040 }
9041
9042 last = hole_entry;
9043 start = last->vme_start;
9044 } else {
9045 assert(first_free_is_valid(dst_map));
9046 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
9047 vm_map_min(dst_map) : last->vme_end;
9048 }
39236c6e
A
9049 start = vm_map_round_page(start,
9050 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 9051 }
1c79356b
A
9052
9053 while (TRUE) {
9054 vm_map_entry_t next = last->vme_next;
91447636 9055 vm_map_offset_t end = start + size;
1c79356b
A
9056
9057 if ((end > dst_map->max_offset) || (end < start)) {
9058 if (dst_map->wait_for_space) {
9059 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
9060 assert_wait((event_t) dst_map,
9061 THREAD_INTERRUPTIBLE);
9062 vm_map_unlock(dst_map);
91447636 9063 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
9064 goto StartAgain;
9065 }
9066 }
9067 vm_map_unlock(dst_map);
9068 return(KERN_NO_SPACE);
9069 }
9070
3e170ce0
A
9071 if (dst_map->holelistenabled) {
9072 if (last->vme_end >= end)
9073 break;
9074 } else {
9075 /*
9076 * If there are no more entries, we must win.
9077 *
9078 * OR
9079 *
9080 * If there is another entry, it must be
9081 * after the end of the potential new region.
9082 */
9083
9084 if (next == vm_map_to_entry(dst_map))
9085 break;
9086
9087 if (next->vme_start >= end)
9088 break;
9089 }
1c79356b
A
9090
9091 last = next;
3e170ce0
A
9092
9093 if (dst_map->holelistenabled) {
9094 if (last == (vm_map_entry_t) dst_map->holes_list) {
9095 /*
9096 * Wrapped around
9097 */
9098 vm_map_unlock(dst_map);
9099 return(KERN_NO_SPACE);
9100 }
9101 start = last->vme_start;
9102 } else {
9103 start = last->vme_end;
9104 }
39236c6e
A
9105 start = vm_map_round_page(start,
9106 VM_MAP_PAGE_MASK(dst_map));
9107 }
9108
3e170ce0
A
9109 if (dst_map->holelistenabled) {
9110 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
9111 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
9112 }
9113 }
9114
9115
39236c6e
A
9116 adjustment = start - vm_copy_start;
9117 if (! consume_on_success) {
9118 /*
9119 * We're not allowed to consume "copy", so we'll have to
9120 * copy its map entries into the destination map below.
9121 * No need to re-allocate map entries from the correct
9122 * (pageable or not) zone, since we'll get new map entries
9123 * during the transfer.
9124 * We'll also adjust the map entries's "start" and "end"
9125 * during the transfer, to keep "copy"'s entries consistent
9126 * with its "offset".
9127 */
9128 goto after_adjustments;
1c79356b
A
9129 }
9130
9131 /*
9132 * Since we're going to just drop the map
9133 * entries from the copy into the destination
9134 * map, they must come from the same pool.
9135 */
9136
9137 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
9138 /*
9139 * Mismatches occur when dealing with the default
9140 * pager.
9141 */
9142 zone_t old_zone;
9143 vm_map_entry_t next, new;
9144
9145 /*
9146 * Find the zone that the copies were allocated from
9147 */
7ddcb079 9148
2d21ac55
A
9149 entry = vm_map_copy_first_entry(copy);
9150
9151 /*
9152 * Reinitialize the copy so that vm_map_copy_entry_link
9153 * will work.
9154 */
6d2010ae 9155 vm_map_store_copy_reset(copy, entry);
2d21ac55 9156 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
9157
9158 /*
9159 * Copy each entry.
9160 */
9161 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 9162 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55 9163 vm_map_entry_copy_full(new, entry);
fe8ab488
A
9164 assert(!new->iokit_acct);
9165 if (new->is_sub_map) {
9166 /* clr address space specifics */
9167 new->use_pmap = FALSE;
9168 }
2d21ac55
A
9169 vm_map_copy_entry_link(copy,
9170 vm_map_copy_last_entry(copy),
9171 new);
9172 next = entry->vme_next;
7ddcb079 9173 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
9174 zfree(old_zone, entry);
9175 entry = next;
9176 }
1c79356b
A
9177 }
9178
9179 /*
9180 * Adjust the addresses in the copy chain, and
9181 * reset the region attributes.
9182 */
9183
1c79356b
A
9184 for (entry = vm_map_copy_first_entry(copy);
9185 entry != vm_map_copy_to_entry(copy);
9186 entry = entry->vme_next) {
39236c6e
A
9187 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
9188 /*
9189 * We're injecting this copy entry into a map that
9190 * has the standard page alignment, so clear
9191 * "map_aligned" (which might have been inherited
9192 * from the original map entry).
9193 */
9194 entry->map_aligned = FALSE;
9195 }
9196
1c79356b
A
9197 entry->vme_start += adjustment;
9198 entry->vme_end += adjustment;
9199
39236c6e
A
9200 if (entry->map_aligned) {
9201 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
9202 VM_MAP_PAGE_MASK(dst_map)));
9203 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
9204 VM_MAP_PAGE_MASK(dst_map)));
9205 }
9206
1c79356b
A
9207 entry->inheritance = VM_INHERIT_DEFAULT;
9208 entry->protection = VM_PROT_DEFAULT;
9209 entry->max_protection = VM_PROT_ALL;
9210 entry->behavior = VM_BEHAVIOR_DEFAULT;
9211
9212 /*
9213 * If the entry is now wired,
9214 * map the pages into the destination map.
9215 */
9216 if (entry->wired_count != 0) {
39037602 9217 vm_map_offset_t va;
2d21ac55 9218 vm_object_offset_t offset;
39037602 9219 vm_object_t object;
2d21ac55
A
9220 vm_prot_t prot;
9221 int type_of_fault;
1c79356b 9222
3e170ce0
A
9223 object = VME_OBJECT(entry);
9224 offset = VME_OFFSET(entry);
2d21ac55 9225 va = entry->vme_start;
1c79356b 9226
2d21ac55
A
9227 pmap_pageable(dst_map->pmap,
9228 entry->vme_start,
9229 entry->vme_end,
9230 TRUE);
1c79356b 9231
2d21ac55 9232 while (va < entry->vme_end) {
39037602 9233 vm_page_t m;
1c79356b 9234
2d21ac55
A
9235 /*
9236 * Look up the page in the object.
9237 * Assert that the page will be found in the
9238 * top object:
9239 * either
9240 * the object was newly created by
9241 * vm_object_copy_slowly, and has
9242 * copies of all of the pages from
9243 * the source object
9244 * or
9245 * the object was moved from the old
9246 * map entry; because the old map
9247 * entry was wired, all of the pages
9248 * were in the top-level object.
9249 * (XXX not true if we wire pages for
9250 * reading)
9251 */
9252 vm_object_lock(object);
91447636 9253
2d21ac55 9254 m = vm_page_lookup(object, offset);
b0d623f7 9255 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
9256 m->absent)
9257 panic("vm_map_copyout: wiring %p", m);
1c79356b 9258
2d21ac55
A
9259 /*
9260 * ENCRYPTED SWAP:
9261 * The page is assumed to be wired here, so it
9262 * shouldn't be encrypted. Otherwise, we
9263 * couldn't enter it in the page table, since
9264 * we don't want the user to see the encrypted
9265 * data.
9266 */
9267 ASSERT_PAGE_DECRYPTED(m);
1c79356b 9268
2d21ac55 9269 prot = entry->protection;
1c79356b 9270
3e170ce0
A
9271 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9272 prot)
2d21ac55 9273 prot |= VM_PROT_EXECUTE;
1c79356b 9274
2d21ac55 9275 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 9276
6d2010ae 9277 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
fe8ab488 9278 VM_PAGE_WIRED(m), FALSE, FALSE,
3e170ce0 9279 FALSE, VME_ALIAS(entry),
fe8ab488
A
9280 ((entry->iokit_acct ||
9281 (!entry->is_sub_map &&
9282 !entry->use_pmap))
9283 ? PMAP_OPTIONS_ALT_ACCT
9284 : 0),
9285 NULL, &type_of_fault);
1c79356b 9286
2d21ac55 9287 vm_object_unlock(object);
1c79356b 9288
2d21ac55
A
9289 offset += PAGE_SIZE_64;
9290 va += PAGE_SIZE;
1c79356b
A
9291 }
9292 }
9293 }
9294
39236c6e
A
9295after_adjustments:
9296
1c79356b
A
9297 /*
9298 * Correct the page alignment for the result
9299 */
9300
9301 *dst_addr = start + (copy->offset - vm_copy_start);
9302
9303 /*
9304 * Update the hints and the map size
9305 */
9306
39236c6e
A
9307 if (consume_on_success) {
9308 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9309 } else {
9310 SAVE_HINT_MAP_WRITE(dst_map, last);
9311 }
1c79356b
A
9312
9313 dst_map->size += size;
9314
9315 /*
9316 * Link in the copy
9317 */
9318
39236c6e
A
9319 if (consume_on_success) {
9320 vm_map_copy_insert(dst_map, last, copy);
9321 } else {
9322 vm_map_copy_remap(dst_map, last, copy, adjustment,
9323 cur_protection, max_protection,
9324 inheritance);
9325 }
1c79356b
A
9326
9327 vm_map_unlock(dst_map);
9328
9329 /*
9330 * XXX If wiring_required, call vm_map_pageable
9331 */
9332
9333 return(KERN_SUCCESS);
9334}
9335
1c79356b
A
9336/*
9337 * Routine: vm_map_copyin
9338 *
9339 * Description:
2d21ac55
A
9340 * see vm_map_copyin_common. Exported via Unsupported.exports.
9341 *
9342 */
9343
9344#undef vm_map_copyin
9345
9346kern_return_t
9347vm_map_copyin(
9348 vm_map_t src_map,
9349 vm_map_address_t src_addr,
9350 vm_map_size_t len,
9351 boolean_t src_destroy,
9352 vm_map_copy_t *copy_result) /* OUT */
9353{
9354 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9355 FALSE, copy_result, FALSE));
9356}
9357
9358/*
9359 * Routine: vm_map_copyin_common
9360 *
9361 * Description:
1c79356b
A
9362 * Copy the specified region (src_addr, len) from the
9363 * source address space (src_map), possibly removing
9364 * the region from the source address space (src_destroy).
9365 *
9366 * Returns:
9367 * A vm_map_copy_t object (copy_result), suitable for
9368 * insertion into another address space (using vm_map_copyout),
9369 * copying over another address space region (using
9370 * vm_map_copy_overwrite). If the copy is unused, it
9371 * should be destroyed (using vm_map_copy_discard).
9372 *
9373 * In/out conditions:
9374 * The source map should not be locked on entry.
9375 */
9376
9377typedef struct submap_map {
9378 vm_map_t parent_map;
91447636
A
9379 vm_map_offset_t base_start;
9380 vm_map_offset_t base_end;
2d21ac55 9381 vm_map_size_t base_len;
1c79356b
A
9382 struct submap_map *next;
9383} submap_map_t;
9384
9385kern_return_t
9386vm_map_copyin_common(
9387 vm_map_t src_map,
91447636
A
9388 vm_map_address_t src_addr,
9389 vm_map_size_t len,
1c79356b 9390 boolean_t src_destroy,
91447636 9391 __unused boolean_t src_volatile,
1c79356b
A
9392 vm_map_copy_t *copy_result, /* OUT */
9393 boolean_t use_maxprot)
4bd07ac2
A
9394{
9395 int flags;
9396
9397 flags = 0;
9398 if (src_destroy) {
9399 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9400 }
9401 if (use_maxprot) {
9402 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9403 }
9404 return vm_map_copyin_internal(src_map,
9405 src_addr,
9406 len,
9407 flags,
9408 copy_result);
9409}
9410kern_return_t
9411vm_map_copyin_internal(
9412 vm_map_t src_map,
9413 vm_map_address_t src_addr,
9414 vm_map_size_t len,
9415 int flags,
9416 vm_map_copy_t *copy_result) /* OUT */
1c79356b 9417{
1c79356b
A
9418 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9419 * in multi-level lookup, this
9420 * entry contains the actual
9421 * vm_object/offset.
9422 */
1c79356b
A
9423 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9424
91447636 9425 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
9426 * where copy is taking place now
9427 */
91447636 9428 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 9429 * copied */
2d21ac55 9430 vm_map_offset_t src_base;
91447636 9431 vm_map_t base_map = src_map;
1c79356b
A
9432 boolean_t map_share=FALSE;
9433 submap_map_t *parent_maps = NULL;
9434
1c79356b 9435 vm_map_copy_t copy; /* Resulting copy */
fe8ab488
A
9436 vm_map_address_t copy_addr;
9437 vm_map_size_t copy_size;
4bd07ac2
A
9438 boolean_t src_destroy;
9439 boolean_t use_maxprot;
39037602 9440 boolean_t preserve_purgeable;
4bd07ac2
A
9441
9442 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9443 return KERN_INVALID_ARGUMENT;
9444 }
9445
9446 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9447 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
39037602
A
9448 preserve_purgeable =
9449 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
1c79356b
A
9450
9451 /*
9452 * Check for copies of zero bytes.
9453 */
9454
9455 if (len == 0) {
9456 *copy_result = VM_MAP_COPY_NULL;
9457 return(KERN_SUCCESS);
9458 }
9459
4a249263
A
9460 /*
9461 * Check that the end address doesn't overflow
9462 */
9463 src_end = src_addr + len;
9464 if (src_end < src_addr)
9465 return KERN_INVALID_ADDRESS;
9466
39037602
A
9467 /*
9468 * Compute (page aligned) start and end of region
9469 */
9470 src_start = vm_map_trunc_page(src_addr,
9471 VM_MAP_PAGE_MASK(src_map));
9472 src_end = vm_map_round_page(src_end,
9473 VM_MAP_PAGE_MASK(src_map));
9474
1c79356b
A
9475 /*
9476 * If the copy is sufficiently small, use a kernel buffer instead
9477 * of making a virtual copy. The theory being that the cost of
9478 * setting up VM (and taking C-O-W faults) dominates the copy costs
9479 * for small regions.
9480 */
4bd07ac2
A
9481 if ((len < msg_ool_size_small) &&
9482 !use_maxprot &&
39037602
A
9483 !preserve_purgeable &&
9484 !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
9485 /*
9486 * Since the "msg_ool_size_small" threshold was increased and
9487 * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
9488 * address space limits, we revert to doing a virtual copy if the
9489 * copied range goes beyond those limits. Otherwise, mach_vm_read()
9490 * of the commpage would now fail when it used to work.
9491 */
9492 (src_start >= vm_map_min(src_map) &&
9493 src_start < vm_map_max(src_map) &&
9494 src_end >= vm_map_min(src_map) &&
9495 src_end < vm_map_max(src_map)))
2d21ac55
A
9496 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9497 src_destroy, copy_result);
1c79356b 9498
b0d623f7 9499 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 9500
1c79356b
A
9501 /*
9502 * Allocate a header element for the list.
9503 *
9504 * Use the start and end in the header to
9505 * remember the endpoints prior to rounding.
9506 */
9507
9508 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 9509 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b 9510 vm_map_copy_first_entry(copy) =
2d21ac55 9511 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
9512 copy->type = VM_MAP_COPY_ENTRY_LIST;
9513 copy->cpy_hdr.nentries = 0;
9514 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
9515#if 00
9516 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9517#else
9518 /*
9519 * The copy entries can be broken down for a variety of reasons,
9520 * so we can't guarantee that they will remain map-aligned...
9521 * Will need to adjust the first copy_entry's "vme_start" and
9522 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9523 * rather than the original map's alignment.
9524 */
9525 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9526#endif
1c79356b 9527
6d2010ae
A
9528 vm_map_store_init( &(copy->cpy_hdr) );
9529
1c79356b
A
9530 copy->offset = src_addr;
9531 copy->size = len;
9532
7ddcb079 9533 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
9534
9535#define RETURN(x) \
9536 MACRO_BEGIN \
9537 vm_map_unlock(src_map); \
9bccf70c
A
9538 if(src_map != base_map) \
9539 vm_map_deallocate(src_map); \
1c79356b
A
9540 if (new_entry != VM_MAP_ENTRY_NULL) \
9541 vm_map_copy_entry_dispose(copy,new_entry); \
9542 vm_map_copy_discard(copy); \
9543 { \
91447636 9544 submap_map_t *_ptr; \
1c79356b 9545 \
91447636 9546 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 9547 parent_maps=parent_maps->next; \
91447636
A
9548 if (_ptr->parent_map != base_map) \
9549 vm_map_deallocate(_ptr->parent_map); \
9550 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
9551 } \
9552 } \
9553 MACRO_RETURN(x); \
9554 MACRO_END
9555
9556 /*
9557 * Find the beginning of the region.
9558 */
9559
9560 vm_map_lock(src_map);
9561
fe8ab488
A
9562 /*
9563 * Lookup the original "src_addr" rather than the truncated
9564 * "src_start", in case "src_start" falls in a non-map-aligned
9565 * map entry *before* the map entry that contains "src_addr"...
9566 */
9567 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
1c79356b
A
9568 RETURN(KERN_INVALID_ADDRESS);
9569 if(!tmp_entry->is_sub_map) {
fe8ab488
A
9570 /*
9571 * ... but clip to the map-rounded "src_start" rather than
9572 * "src_addr" to preserve map-alignment. We'll adjust the
9573 * first copy entry at the end, if needed.
9574 */
1c79356b
A
9575 vm_map_clip_start(src_map, tmp_entry, src_start);
9576 }
fe8ab488
A
9577 if (src_start < tmp_entry->vme_start) {
9578 /*
9579 * Move "src_start" up to the start of the
9580 * first map entry to copy.
9581 */
9582 src_start = tmp_entry->vme_start;
9583 }
1c79356b
A
9584 /* set for later submap fix-up */
9585 copy_addr = src_start;
9586
9587 /*
9588 * Go through entries until we get to the end.
9589 */
9590
9591 while (TRUE) {
1c79356b 9592 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 9593 vm_map_size_t src_size; /* Size of source
1c79356b
A
9594 * map entry (in both
9595 * maps)
9596 */
9597
1c79356b
A
9598 vm_object_t src_object; /* Object to copy */
9599 vm_object_offset_t src_offset;
9600
9601 boolean_t src_needs_copy; /* Should source map
9602 * be made read-only
9603 * for copy-on-write?
9604 */
9605
9606 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9607
9608 boolean_t was_wired; /* Was source wired? */
9609 vm_map_version_t version; /* Version before locks
9610 * dropped to make copy
9611 */
9612 kern_return_t result; /* Return value from
9613 * copy_strategically.
9614 */
9615 while(tmp_entry->is_sub_map) {
91447636 9616 vm_map_size_t submap_len;
1c79356b
A
9617 submap_map_t *ptr;
9618
9619 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9620 ptr->next = parent_maps;
9621 parent_maps = ptr;
9622 ptr->parent_map = src_map;
9623 ptr->base_start = src_start;
9624 ptr->base_end = src_end;
9625 submap_len = tmp_entry->vme_end - src_start;
9626 if(submap_len > (src_end-src_start))
9627 submap_len = src_end-src_start;
2d21ac55 9628 ptr->base_len = submap_len;
1c79356b
A
9629
9630 src_start -= tmp_entry->vme_start;
3e170ce0 9631 src_start += VME_OFFSET(tmp_entry);
1c79356b 9632 src_end = src_start + submap_len;
3e170ce0 9633 src_map = VME_SUBMAP(tmp_entry);
1c79356b 9634 vm_map_lock(src_map);
9bccf70c
A
9635 /* keep an outstanding reference for all maps in */
9636 /* the parents tree except the base map */
9637 vm_map_reference(src_map);
1c79356b
A
9638 vm_map_unlock(ptr->parent_map);
9639 if (!vm_map_lookup_entry(
2d21ac55 9640 src_map, src_start, &tmp_entry))
1c79356b
A
9641 RETURN(KERN_INVALID_ADDRESS);
9642 map_share = TRUE;
9643 if(!tmp_entry->is_sub_map)
2d21ac55 9644 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
9645 src_entry = tmp_entry;
9646 }
2d21ac55
A
9647 /* we are now in the lowest level submap... */
9648
3e170ce0
A
9649 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9650 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
55e303ae
A
9651 /* This is not, supported for now.In future */
9652 /* we will need to detect the phys_contig */
9653 /* condition and then upgrade copy_slowly */
9654 /* to do physical copy from the device mem */
9655 /* based object. We can piggy-back off of */
9656 /* the was wired boolean to set-up the */
9657 /* proper handling */
0b4e3aa0
A
9658 RETURN(KERN_PROTECTION_FAILURE);
9659 }
1c79356b
A
9660 /*
9661 * Create a new address map entry to hold the result.
9662 * Fill in the fields from the appropriate source entries.
9663 * We must unlock the source map to do this if we need
9664 * to allocate a map entry.
9665 */
9666 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
9667 version.main_timestamp = src_map->timestamp;
9668 vm_map_unlock(src_map);
1c79356b 9669
7ddcb079 9670 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 9671
2d21ac55
A
9672 vm_map_lock(src_map);
9673 if ((version.main_timestamp + 1) != src_map->timestamp) {
9674 if (!vm_map_lookup_entry(src_map, src_start,
9675 &tmp_entry)) {
9676 RETURN(KERN_INVALID_ADDRESS);
9677 }
9678 if (!tmp_entry->is_sub_map)
9679 vm_map_clip_start(src_map, tmp_entry, src_start);
9680 continue; /* restart w/ new tmp_entry */
1c79356b 9681 }
1c79356b
A
9682 }
9683
9684 /*
9685 * Verify that the region can be read.
9686 */
9687 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 9688 !use_maxprot) ||
1c79356b
A
9689 (src_entry->max_protection & VM_PROT_READ) == 0)
9690 RETURN(KERN_PROTECTION_FAILURE);
9691
9692 /*
9693 * Clip against the endpoints of the entire region.
9694 */
9695
9696 vm_map_clip_end(src_map, src_entry, src_end);
9697
9698 src_size = src_entry->vme_end - src_start;
3e170ce0
A
9699 src_object = VME_OBJECT(src_entry);
9700 src_offset = VME_OFFSET(src_entry);
1c79356b
A
9701 was_wired = (src_entry->wired_count != 0);
9702
9703 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
9704 if (new_entry->is_sub_map) {
9705 /* clr address space specifics */
9706 new_entry->use_pmap = FALSE;
9707 }
1c79356b
A
9708
9709 /*
9710 * Attempt non-blocking copy-on-write optimizations.
9711 */
9712
813fb2f6
A
9713 if (src_destroy &&
9714 (src_object == VM_OBJECT_NULL ||
9715 (src_object->internal &&
9716 src_object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
9717 !src_object->true_share &&
9718 !map_share))) {
2d21ac55
A
9719 /*
9720 * If we are destroying the source, and the object
9721 * is internal, we can move the object reference
9722 * from the source to the copy. The copy is
9723 * copy-on-write only if the source is.
9724 * We make another reference to the object, because
9725 * destroying the source entry will deallocate it.
9726 */
9727 vm_object_reference(src_object);
1c79356b 9728
2d21ac55
A
9729 /*
9730 * Copy is always unwired. vm_map_copy_entry
9731 * set its wired count to zero.
9732 */
1c79356b 9733
2d21ac55 9734 goto CopySuccessful;
1c79356b
A
9735 }
9736
9737
2d21ac55 9738 RestartCopy:
1c79356b 9739 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
3e170ce0 9740 src_object, new_entry, VME_OBJECT(new_entry),
1c79356b 9741 was_wired, 0);
55e303ae 9742 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
9743 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9744 vm_object_copy_quickly(
3e170ce0 9745 &VME_OBJECT(new_entry),
2d21ac55
A
9746 src_offset,
9747 src_size,
9748 &src_needs_copy,
9749 &new_entry_needs_copy)) {
1c79356b
A
9750
9751 new_entry->needs_copy = new_entry_needs_copy;
9752
9753 /*
9754 * Handle copy-on-write obligations
9755 */
9756
9757 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
9758 vm_prot_t prot;
9759
9760 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 9761
3e170ce0
A
9762 if (override_nx(src_map, VME_ALIAS(src_entry))
9763 && prot)
0c530ab8 9764 prot |= VM_PROT_EXECUTE;
2d21ac55 9765
55e303ae
A
9766 vm_object_pmap_protect(
9767 src_object,
9768 src_offset,
9769 src_size,
9770 (src_entry->is_shared ?
2d21ac55
A
9771 PMAP_NULL
9772 : src_map->pmap),
55e303ae 9773 src_entry->vme_start,
0c530ab8
A
9774 prot);
9775
3e170ce0 9776 assert(tmp_entry->wired_count == 0);
55e303ae 9777 tmp_entry->needs_copy = TRUE;
1c79356b
A
9778 }
9779
9780 /*
9781 * The map has never been unlocked, so it's safe
9782 * to move to the next entry rather than doing
9783 * another lookup.
9784 */
9785
9786 goto CopySuccessful;
9787 }
9788
1c79356b
A
9789 /*
9790 * Take an object reference, so that we may
9791 * release the map lock(s).
9792 */
9793
9794 assert(src_object != VM_OBJECT_NULL);
9795 vm_object_reference(src_object);
9796
9797 /*
9798 * Record the timestamp for later verification.
9799 * Unlock the map.
9800 */
9801
9802 version.main_timestamp = src_map->timestamp;
9bccf70c 9803 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
9804
9805 /*
9806 * Perform the copy
9807 */
9808
9809 if (was_wired) {
55e303ae 9810 CopySlowly:
1c79356b
A
9811 vm_object_lock(src_object);
9812 result = vm_object_copy_slowly(
2d21ac55
A
9813 src_object,
9814 src_offset,
9815 src_size,
9816 THREAD_UNINT,
3e170ce0
A
9817 &VME_OBJECT(new_entry));
9818 VME_OFFSET_SET(new_entry, 0);
1c79356b 9819 new_entry->needs_copy = FALSE;
55e303ae
A
9820
9821 }
9822 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 9823 (tmp_entry->is_shared || map_share)) {
55e303ae
A
9824 vm_object_t new_object;
9825
2d21ac55 9826 vm_object_lock_shared(src_object);
55e303ae 9827 new_object = vm_object_copy_delayed(
2d21ac55
A
9828 src_object,
9829 src_offset,
9830 src_size,
9831 TRUE);
55e303ae
A
9832 if (new_object == VM_OBJECT_NULL)
9833 goto CopySlowly;
9834
3e170ce0
A
9835 VME_OBJECT_SET(new_entry, new_object);
9836 assert(new_entry->wired_count == 0);
55e303ae 9837 new_entry->needs_copy = TRUE;
fe8ab488
A
9838 assert(!new_entry->iokit_acct);
9839 assert(new_object->purgable == VM_PURGABLE_DENY);
9840 new_entry->use_pmap = TRUE;
55e303ae
A
9841 result = KERN_SUCCESS;
9842
1c79356b 9843 } else {
3e170ce0
A
9844 vm_object_offset_t new_offset;
9845 new_offset = VME_OFFSET(new_entry);
1c79356b 9846 result = vm_object_copy_strategically(src_object,
2d21ac55
A
9847 src_offset,
9848 src_size,
3e170ce0
A
9849 &VME_OBJECT(new_entry),
9850 &new_offset,
2d21ac55 9851 &new_entry_needs_copy);
3e170ce0
A
9852 if (new_offset != VME_OFFSET(new_entry)) {
9853 VME_OFFSET_SET(new_entry, new_offset);
9854 }
1c79356b
A
9855
9856 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
9857 }
9858
39037602
A
9859 if (result == KERN_SUCCESS &&
9860 preserve_purgeable &&
9861 src_object->purgable != VM_PURGABLE_DENY) {
9862 vm_object_t new_object;
9863
9864 new_object = VME_OBJECT(new_entry);
9865 assert(new_object != src_object);
9866 vm_object_lock(new_object);
9867 assert(new_object->ref_count == 1);
9868 assert(new_object->shadow == VM_OBJECT_NULL);
9869 assert(new_object->copy == VM_OBJECT_NULL);
9870 assert(new_object->vo_purgeable_owner == NULL);
9871
9872 new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
9873 new_object->true_share = TRUE;
9874 /* start as non-volatile with no owner... */
9875 new_object->purgable = VM_PURGABLE_NONVOLATILE;
9876 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
9877 /* ... and move to src_object's purgeable state */
9878 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
9879 int state;
9880 state = src_object->purgable;
9881 vm_object_purgable_control(
9882 new_object,
9883 VM_PURGABLE_SET_STATE,
9884 &state);
9885 }
9886 vm_object_unlock(new_object);
9887 new_object = VM_OBJECT_NULL;
9888 }
9889
1c79356b
A
9890 if (result != KERN_SUCCESS &&
9891 result != KERN_MEMORY_RESTART_COPY) {
9892 vm_map_lock(src_map);
9893 RETURN(result);
9894 }
9895
9896 /*
9897 * Throw away the extra reference
9898 */
9899
9900 vm_object_deallocate(src_object);
9901
9902 /*
9903 * Verify that the map has not substantially
9904 * changed while the copy was being made.
9905 */
9906
9bccf70c 9907 vm_map_lock(src_map);
1c79356b
A
9908
9909 if ((version.main_timestamp + 1) == src_map->timestamp)
9910 goto VerificationSuccessful;
9911
9912 /*
9913 * Simple version comparison failed.
9914 *
9915 * Retry the lookup and verify that the
9916 * same object/offset are still present.
9917 *
9918 * [Note: a memory manager that colludes with
9919 * the calling task can detect that we have
9920 * cheated. While the map was unlocked, the
9921 * mapping could have been changed and restored.]
9922 */
9923
9924 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
fe8ab488 9925 if (result != KERN_MEMORY_RESTART_COPY) {
3e170ce0
A
9926 vm_object_deallocate(VME_OBJECT(new_entry));
9927 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
fe8ab488
A
9928 assert(!new_entry->iokit_acct);
9929 new_entry->use_pmap = TRUE;
9930 }
1c79356b
A
9931 RETURN(KERN_INVALID_ADDRESS);
9932 }
9933
9934 src_entry = tmp_entry;
9935 vm_map_clip_start(src_map, src_entry, src_start);
9936
91447636
A
9937 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9938 !use_maxprot) ||
9939 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
9940 goto VerificationFailed;
9941
39236c6e 9942 if (src_entry->vme_end < new_entry->vme_end) {
39037602
A
9943 /*
9944 * This entry might have been shortened
9945 * (vm_map_clip_end) or been replaced with
9946 * an entry that ends closer to "src_start"
9947 * than before.
9948 * Adjust "new_entry" accordingly; copying
9949 * less memory would be correct but we also
9950 * redo the copy (see below) if the new entry
9951 * no longer points at the same object/offset.
9952 */
39236c6e
A
9953 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9954 VM_MAP_COPY_PAGE_MASK(copy)));
9955 new_entry->vme_end = src_entry->vme_end;
9956 src_size = new_entry->vme_end - src_start;
39037602
A
9957 } else if (src_entry->vme_end > new_entry->vme_end) {
9958 /*
9959 * This entry might have been extended
9960 * (vm_map_entry_simplify() or coalesce)
9961 * or been replaced with an entry that ends farther
9962 * from "src_start" than before.
9963 *
9964 * We've called vm_object_copy_*() only on
9965 * the previous <start:end> range, so we can't
9966 * just extend new_entry. We have to re-do
9967 * the copy based on the new entry as if it was
9968 * pointing at a different object/offset (see
9969 * "Verification failed" below).
9970 */
39236c6e 9971 }
1c79356b 9972
3e170ce0 9973 if ((VME_OBJECT(src_entry) != src_object) ||
39037602
A
9974 (VME_OFFSET(src_entry) != src_offset) ||
9975 (src_entry->vme_end > new_entry->vme_end)) {
1c79356b
A
9976
9977 /*
9978 * Verification failed.
9979 *
9980 * Start over with this top-level entry.
9981 */
9982
2d21ac55 9983 VerificationFailed: ;
1c79356b 9984
3e170ce0 9985 vm_object_deallocate(VME_OBJECT(new_entry));
1c79356b
A
9986 tmp_entry = src_entry;
9987 continue;
9988 }
9989
9990 /*
9991 * Verification succeeded.
9992 */
9993
2d21ac55 9994 VerificationSuccessful: ;
1c79356b
A
9995
9996 if (result == KERN_MEMORY_RESTART_COPY)
9997 goto RestartCopy;
9998
9999 /*
10000 * Copy succeeded.
10001 */
10002
2d21ac55 10003 CopySuccessful: ;
1c79356b
A
10004
10005 /*
10006 * Link in the new copy entry.
10007 */
10008
10009 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
10010 new_entry);
10011
10012 /*
10013 * Determine whether the entire region
10014 * has been copied.
10015 */
2d21ac55 10016 src_base = src_start;
1c79356b
A
10017 src_start = new_entry->vme_end;
10018 new_entry = VM_MAP_ENTRY_NULL;
10019 while ((src_start >= src_end) && (src_end != 0)) {
fe8ab488
A
10020 submap_map_t *ptr;
10021
10022 if (src_map == base_map) {
10023 /* back to the top */
1c79356b 10024 break;
fe8ab488
A
10025 }
10026
10027 ptr = parent_maps;
10028 assert(ptr != NULL);
10029 parent_maps = parent_maps->next;
10030
10031 /* fix up the damage we did in that submap */
10032 vm_map_simplify_range(src_map,
10033 src_base,
10034 src_end);
10035
10036 vm_map_unlock(src_map);
10037 vm_map_deallocate(src_map);
10038 vm_map_lock(ptr->parent_map);
10039 src_map = ptr->parent_map;
10040 src_base = ptr->base_start;
10041 src_start = ptr->base_start + ptr->base_len;
10042 src_end = ptr->base_end;
10043 if (!vm_map_lookup_entry(src_map,
10044 src_start,
10045 &tmp_entry) &&
10046 (src_end > src_start)) {
10047 RETURN(KERN_INVALID_ADDRESS);
10048 }
10049 kfree(ptr, sizeof(submap_map_t));
10050 if (parent_maps == NULL)
10051 map_share = FALSE;
10052 src_entry = tmp_entry->vme_prev;
10053 }
10054
10055 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
10056 (src_start >= src_addr + len) &&
10057 (src_addr + len != 0)) {
10058 /*
10059 * Stop copying now, even though we haven't reached
10060 * "src_end". We'll adjust the end of the last copy
10061 * entry at the end, if needed.
10062 *
10063 * If src_map's aligment is different from the
10064 * system's page-alignment, there could be
10065 * extra non-map-aligned map entries between
10066 * the original (non-rounded) "src_addr + len"
10067 * and the rounded "src_end".
10068 * We do not want to copy those map entries since
10069 * they're not part of the copied range.
10070 */
10071 break;
1c79356b 10072 }
fe8ab488 10073
1c79356b
A
10074 if ((src_start >= src_end) && (src_end != 0))
10075 break;
10076
10077 /*
10078 * Verify that there are no gaps in the region
10079 */
10080
10081 tmp_entry = src_entry->vme_next;
fe8ab488 10082 if ((tmp_entry->vme_start != src_start) ||
39236c6e 10083 (tmp_entry == vm_map_to_entry(src_map))) {
1c79356b 10084 RETURN(KERN_INVALID_ADDRESS);
39236c6e 10085 }
1c79356b
A
10086 }
10087
10088 /*
10089 * If the source should be destroyed, do it now, since the
10090 * copy was successful.
10091 */
10092 if (src_destroy) {
39236c6e
A
10093 (void) vm_map_delete(
10094 src_map,
10095 vm_map_trunc_page(src_addr,
10096 VM_MAP_PAGE_MASK(src_map)),
10097 src_end,
10098 ((src_map == kernel_map) ?
10099 VM_MAP_REMOVE_KUNWIRE :
10100 VM_MAP_NO_FLAGS),
10101 VM_MAP_NULL);
2d21ac55
A
10102 } else {
10103 /* fix up the damage we did in the base map */
39236c6e
A
10104 vm_map_simplify_range(
10105 src_map,
10106 vm_map_trunc_page(src_addr,
10107 VM_MAP_PAGE_MASK(src_map)),
10108 vm_map_round_page(src_end,
10109 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
10110 }
10111
10112 vm_map_unlock(src_map);
10113
39236c6e 10114 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
fe8ab488
A
10115 vm_map_offset_t original_start, original_offset, original_end;
10116
39236c6e
A
10117 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
10118
10119 /* adjust alignment of first copy_entry's "vme_start" */
10120 tmp_entry = vm_map_copy_first_entry(copy);
10121 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10122 vm_map_offset_t adjustment;
fe8ab488
A
10123
10124 original_start = tmp_entry->vme_start;
3e170ce0 10125 original_offset = VME_OFFSET(tmp_entry);
fe8ab488
A
10126
10127 /* map-align the start of the first copy entry... */
10128 adjustment = (tmp_entry->vme_start -
10129 vm_map_trunc_page(
10130 tmp_entry->vme_start,
10131 VM_MAP_PAGE_MASK(src_map)));
10132 tmp_entry->vme_start -= adjustment;
3e170ce0
A
10133 VME_OFFSET_SET(tmp_entry,
10134 VME_OFFSET(tmp_entry) - adjustment);
fe8ab488
A
10135 copy_addr -= adjustment;
10136 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10137 /* ... adjust for mis-aligned start of copy range */
39236c6e
A
10138 adjustment =
10139 (vm_map_trunc_page(copy->offset,
10140 PAGE_MASK) -
10141 vm_map_trunc_page(copy->offset,
10142 VM_MAP_PAGE_MASK(src_map)));
10143 if (adjustment) {
10144 assert(page_aligned(adjustment));
10145 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10146 tmp_entry->vme_start += adjustment;
3e170ce0
A
10147 VME_OFFSET_SET(tmp_entry,
10148 (VME_OFFSET(tmp_entry) +
10149 adjustment));
39236c6e
A
10150 copy_addr += adjustment;
10151 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10152 }
fe8ab488
A
10153
10154 /*
10155 * Assert that the adjustments haven't exposed
10156 * more than was originally copied...
10157 */
10158 assert(tmp_entry->vme_start >= original_start);
3e170ce0 10159 assert(VME_OFFSET(tmp_entry) >= original_offset);
fe8ab488
A
10160 /*
10161 * ... and that it did not adjust outside of a
10162 * a single 16K page.
10163 */
10164 assert(vm_map_trunc_page(tmp_entry->vme_start,
10165 VM_MAP_PAGE_MASK(src_map)) ==
10166 vm_map_trunc_page(original_start,
10167 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10168 }
10169
10170 /* adjust alignment of last copy_entry's "vme_end" */
10171 tmp_entry = vm_map_copy_last_entry(copy);
10172 if (tmp_entry != vm_map_copy_to_entry(copy)) {
10173 vm_map_offset_t adjustment;
fe8ab488
A
10174
10175 original_end = tmp_entry->vme_end;
10176
10177 /* map-align the end of the last copy entry... */
10178 tmp_entry->vme_end =
10179 vm_map_round_page(tmp_entry->vme_end,
10180 VM_MAP_PAGE_MASK(src_map));
10181 /* ... adjust for mis-aligned end of copy range */
39236c6e
A
10182 adjustment =
10183 (vm_map_round_page((copy->offset +
10184 copy->size),
10185 VM_MAP_PAGE_MASK(src_map)) -
10186 vm_map_round_page((copy->offset +
10187 copy->size),
10188 PAGE_MASK));
10189 if (adjustment) {
10190 assert(page_aligned(adjustment));
10191 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
10192 tmp_entry->vme_end -= adjustment;
10193 assert(tmp_entry->vme_start < tmp_entry->vme_end);
10194 }
fe8ab488
A
10195
10196 /*
10197 * Assert that the adjustments haven't exposed
10198 * more than was originally copied...
10199 */
10200 assert(tmp_entry->vme_end <= original_end);
10201 /*
10202 * ... and that it did not adjust outside of a
10203 * a single 16K page.
10204 */
10205 assert(vm_map_round_page(tmp_entry->vme_end,
10206 VM_MAP_PAGE_MASK(src_map)) ==
10207 vm_map_round_page(original_end,
10208 VM_MAP_PAGE_MASK(src_map)));
39236c6e
A
10209 }
10210 }
10211
1c79356b
A
10212 /* Fix-up start and end points in copy. This is necessary */
10213 /* when the various entries in the copy object were picked */
10214 /* up from different sub-maps */
10215
10216 tmp_entry = vm_map_copy_first_entry(copy);
fe8ab488 10217 copy_size = 0; /* compute actual size */
1c79356b 10218 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
10219 assert(VM_MAP_PAGE_ALIGNED(
10220 copy_addr + (tmp_entry->vme_end -
10221 tmp_entry->vme_start),
10222 VM_MAP_COPY_PAGE_MASK(copy)));
10223 assert(VM_MAP_PAGE_ALIGNED(
10224 copy_addr,
10225 VM_MAP_COPY_PAGE_MASK(copy)));
10226
10227 /*
10228 * The copy_entries will be injected directly into the
10229 * destination map and might not be "map aligned" there...
10230 */
10231 tmp_entry->map_aligned = FALSE;
10232
1c79356b
A
10233 tmp_entry->vme_end = copy_addr +
10234 (tmp_entry->vme_end - tmp_entry->vme_start);
10235 tmp_entry->vme_start = copy_addr;
e2d2fc5c 10236 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b 10237 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
fe8ab488 10238 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
1c79356b
A
10239 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
10240 }
10241
fe8ab488
A
10242 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
10243 copy_size < copy->size) {
10244 /*
10245 * The actual size of the VM map copy is smaller than what
10246 * was requested by the caller. This must be because some
10247 * PAGE_SIZE-sized pages are missing at the end of the last
10248 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
10249 * The caller might not have been aware of those missing
10250 * pages and might not want to be aware of it, which is
10251 * fine as long as they don't try to access (and crash on)
10252 * those missing pages.
10253 * Let's adjust the size of the "copy", to avoid failing
10254 * in vm_map_copyout() or vm_map_copy_overwrite().
10255 */
10256 assert(vm_map_round_page(copy_size,
10257 VM_MAP_PAGE_MASK(src_map)) ==
10258 vm_map_round_page(copy->size,
10259 VM_MAP_PAGE_MASK(src_map)));
10260 copy->size = copy_size;
10261 }
10262
1c79356b
A
10263 *copy_result = copy;
10264 return(KERN_SUCCESS);
10265
10266#undef RETURN
10267}
10268
39236c6e
A
10269kern_return_t
10270vm_map_copy_extract(
10271 vm_map_t src_map,
10272 vm_map_address_t src_addr,
10273 vm_map_size_t len,
10274 vm_map_copy_t *copy_result, /* OUT */
10275 vm_prot_t *cur_prot, /* OUT */
10276 vm_prot_t *max_prot)
10277{
10278 vm_map_offset_t src_start, src_end;
10279 vm_map_copy_t copy;
10280 kern_return_t kr;
10281
10282 /*
10283 * Check for copies of zero bytes.
10284 */
10285
10286 if (len == 0) {
10287 *copy_result = VM_MAP_COPY_NULL;
10288 return(KERN_SUCCESS);
10289 }
10290
10291 /*
10292 * Check that the end address doesn't overflow
10293 */
10294 src_end = src_addr + len;
10295 if (src_end < src_addr)
10296 return KERN_INVALID_ADDRESS;
10297
10298 /*
10299 * Compute (page aligned) start and end of region
10300 */
10301 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
10302 src_end = vm_map_round_page(src_end, PAGE_MASK);
10303
10304 /*
10305 * Allocate a header element for the list.
10306 *
10307 * Use the start and end in the header to
10308 * remember the endpoints prior to rounding.
10309 */
10310
10311 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10312 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
39236c6e
A
10313 vm_map_copy_first_entry(copy) =
10314 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10315 copy->type = VM_MAP_COPY_ENTRY_LIST;
10316 copy->cpy_hdr.nentries = 0;
10317 copy->cpy_hdr.entries_pageable = TRUE;
10318
10319 vm_map_store_init(&copy->cpy_hdr);
10320
10321 copy->offset = 0;
10322 copy->size = len;
10323
10324 kr = vm_map_remap_extract(src_map,
10325 src_addr,
10326 len,
10327 FALSE, /* copy */
10328 &copy->cpy_hdr,
10329 cur_prot,
10330 max_prot,
10331 VM_INHERIT_SHARE,
39037602
A
10332 TRUE, /* pageable */
10333 FALSE); /* same_map */
39236c6e
A
10334 if (kr != KERN_SUCCESS) {
10335 vm_map_copy_discard(copy);
10336 return kr;
10337 }
10338
10339 *copy_result = copy;
10340 return KERN_SUCCESS;
10341}
10342
1c79356b
A
10343/*
10344 * vm_map_copyin_object:
10345 *
10346 * Create a copy object from an object.
10347 * Our caller donates an object reference.
10348 */
10349
10350kern_return_t
10351vm_map_copyin_object(
10352 vm_object_t object,
10353 vm_object_offset_t offset, /* offset of region in object */
10354 vm_object_size_t size, /* size of region in object */
10355 vm_map_copy_t *copy_result) /* OUT */
10356{
10357 vm_map_copy_t copy; /* Resulting copy */
10358
10359 /*
10360 * We drop the object into a special copy object
10361 * that contains the object directly.
10362 */
10363
10364 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
04b8595b 10365 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
1c79356b
A
10366 copy->type = VM_MAP_COPY_OBJECT;
10367 copy->cpy_object = object;
1c79356b
A
10368 copy->offset = offset;
10369 copy->size = size;
10370
10371 *copy_result = copy;
10372 return(KERN_SUCCESS);
10373}
10374
91447636 10375static void
1c79356b
A
10376vm_map_fork_share(
10377 vm_map_t old_map,
10378 vm_map_entry_t old_entry,
10379 vm_map_t new_map)
10380{
10381 vm_object_t object;
10382 vm_map_entry_t new_entry;
1c79356b
A
10383
10384 /*
10385 * New sharing code. New map entry
10386 * references original object. Internal
10387 * objects use asynchronous copy algorithm for
10388 * future copies. First make sure we have
10389 * the right object. If we need a shadow,
10390 * or someone else already has one, then
10391 * make a new shadow and share it.
10392 */
10393
3e170ce0 10394 object = VME_OBJECT(old_entry);
1c79356b
A
10395 if (old_entry->is_sub_map) {
10396 assert(old_entry->wired_count == 0);
0c530ab8 10397#ifndef NO_NESTED_PMAP
1c79356b 10398 if(old_entry->use_pmap) {
91447636
A
10399 kern_return_t result;
10400
1c79356b 10401 result = pmap_nest(new_map->pmap,
3e170ce0 10402 (VME_SUBMAP(old_entry))->pmap,
2d21ac55
A
10403 (addr64_t)old_entry->vme_start,
10404 (addr64_t)old_entry->vme_start,
10405 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
10406 if(result)
10407 panic("vm_map_fork_share: pmap_nest failed!");
10408 }
0c530ab8 10409#endif /* NO_NESTED_PMAP */
1c79356b 10410 } else if (object == VM_OBJECT_NULL) {
91447636 10411 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 10412 old_entry->vme_start));
3e170ce0
A
10413 VME_OFFSET_SET(old_entry, 0);
10414 VME_OBJECT_SET(old_entry, object);
fe8ab488 10415 old_entry->use_pmap = TRUE;
1c79356b
A
10416 assert(!old_entry->needs_copy);
10417 } else if (object->copy_strategy !=
2d21ac55 10418 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
10419
10420 /*
10421 * We are already using an asymmetric
10422 * copy, and therefore we already have
10423 * the right object.
10424 */
10425
10426 assert(! old_entry->needs_copy);
10427 }
10428 else if (old_entry->needs_copy || /* case 1 */
10429 object->shadowed || /* case 2 */
10430 (!object->true_share && /* case 3 */
2d21ac55 10431 !old_entry->is_shared &&
6d2010ae 10432 (object->vo_size >
2d21ac55
A
10433 (vm_map_size_t)(old_entry->vme_end -
10434 old_entry->vme_start)))) {
1c79356b
A
10435
10436 /*
10437 * We need to create a shadow.
10438 * There are three cases here.
10439 * In the first case, we need to
10440 * complete a deferred symmetrical
10441 * copy that we participated in.
10442 * In the second and third cases,
10443 * we need to create the shadow so
10444 * that changes that we make to the
10445 * object do not interfere with
10446 * any symmetrical copies which
10447 * have occured (case 2) or which
10448 * might occur (case 3).
10449 *
10450 * The first case is when we had
10451 * deferred shadow object creation
10452 * via the entry->needs_copy mechanism.
10453 * This mechanism only works when
10454 * only one entry points to the source
10455 * object, and we are about to create
10456 * a second entry pointing to the
10457 * same object. The problem is that
10458 * there is no way of mapping from
10459 * an object to the entries pointing
10460 * to it. (Deferred shadow creation
10461 * works with one entry because occurs
10462 * at fault time, and we walk from the
10463 * entry to the object when handling
10464 * the fault.)
10465 *
10466 * The second case is when the object
10467 * to be shared has already been copied
10468 * with a symmetric copy, but we point
10469 * directly to the object without
10470 * needs_copy set in our entry. (This
10471 * can happen because different ranges
10472 * of an object can be pointed to by
10473 * different entries. In particular,
10474 * a single entry pointing to an object
10475 * can be split by a call to vm_inherit,
10476 * which, combined with task_create, can
10477 * result in the different entries
10478 * having different needs_copy values.)
10479 * The shadowed flag in the object allows
10480 * us to detect this case. The problem
10481 * with this case is that if this object
10482 * has or will have shadows, then we
10483 * must not perform an asymmetric copy
10484 * of this object, since such a copy
10485 * allows the object to be changed, which
10486 * will break the previous symmetrical
10487 * copies (which rely upon the object
10488 * not changing). In a sense, the shadowed
10489 * flag says "don't change this object".
10490 * We fix this by creating a shadow
10491 * object for this object, and sharing
10492 * that. This works because we are free
10493 * to change the shadow object (and thus
10494 * to use an asymmetric copy strategy);
10495 * this is also semantically correct,
10496 * since this object is temporary, and
10497 * therefore a copy of the object is
10498 * as good as the object itself. (This
10499 * is not true for permanent objects,
10500 * since the pager needs to see changes,
10501 * which won't happen if the changes
10502 * are made to a copy.)
10503 *
10504 * The third case is when the object
10505 * to be shared has parts sticking
10506 * outside of the entry we're working
10507 * with, and thus may in the future
10508 * be subject to a symmetrical copy.
10509 * (This is a preemptive version of
10510 * case 2.)
10511 */
3e170ce0
A
10512 VME_OBJECT_SHADOW(old_entry,
10513 (vm_map_size_t) (old_entry->vme_end -
10514 old_entry->vme_start));
1c79356b
A
10515
10516 /*
10517 * If we're making a shadow for other than
10518 * copy on write reasons, then we have
10519 * to remove write permission.
10520 */
10521
1c79356b
A
10522 if (!old_entry->needs_copy &&
10523 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
10524 vm_prot_t prot;
10525
10526 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10527
3e170ce0 10528 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
0c530ab8 10529 prot |= VM_PROT_EXECUTE;
2d21ac55 10530
316670eb 10531 if (old_map->mapped_in_other_pmaps) {
9bccf70c 10532 vm_object_pmap_protect(
3e170ce0
A
10533 VME_OBJECT(old_entry),
10534 VME_OFFSET(old_entry),
9bccf70c 10535 (old_entry->vme_end -
2d21ac55 10536 old_entry->vme_start),
9bccf70c
A
10537 PMAP_NULL,
10538 old_entry->vme_start,
0c530ab8 10539 prot);
1c79356b 10540 } else {
9bccf70c 10541 pmap_protect(old_map->pmap,
2d21ac55
A
10542 old_entry->vme_start,
10543 old_entry->vme_end,
10544 prot);
1c79356b
A
10545 }
10546 }
10547
10548 old_entry->needs_copy = FALSE;
3e170ce0 10549 object = VME_OBJECT(old_entry);
1c79356b 10550 }
6d2010ae 10551
1c79356b
A
10552
10553 /*
10554 * If object was using a symmetric copy strategy,
10555 * change its copy strategy to the default
10556 * asymmetric copy strategy, which is copy_delay
10557 * in the non-norma case and copy_call in the
10558 * norma case. Bump the reference count for the
10559 * new entry.
10560 */
10561
10562 if(old_entry->is_sub_map) {
3e170ce0
A
10563 vm_map_lock(VME_SUBMAP(old_entry));
10564 vm_map_reference(VME_SUBMAP(old_entry));
10565 vm_map_unlock(VME_SUBMAP(old_entry));
1c79356b
A
10566 } else {
10567 vm_object_lock(object);
2d21ac55 10568 vm_object_reference_locked(object);
1c79356b
A
10569 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10570 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10571 }
10572 vm_object_unlock(object);
10573 }
10574
10575 /*
10576 * Clone the entry, using object ref from above.
10577 * Mark both entries as shared.
10578 */
10579
7ddcb079
A
10580 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10581 * map or descendants */
1c79356b
A
10582 vm_map_entry_copy(new_entry, old_entry);
10583 old_entry->is_shared = TRUE;
10584 new_entry->is_shared = TRUE;
39037602
A
10585
10586 /*
10587 * If old entry's inheritence is VM_INHERIT_NONE,
10588 * the new entry is for corpse fork, remove the
10589 * write permission from the new entry.
10590 */
10591 if (old_entry->inheritance == VM_INHERIT_NONE) {
10592
10593 new_entry->protection &= ~VM_PROT_WRITE;
10594 new_entry->max_protection &= ~VM_PROT_WRITE;
10595 }
1c79356b
A
10596
10597 /*
10598 * Insert the entry into the new map -- we
10599 * know we're inserting at the end of the new
10600 * map.
10601 */
10602
6d2010ae 10603 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
10604
10605 /*
10606 * Update the physical map
10607 */
10608
10609 if (old_entry->is_sub_map) {
10610 /* Bill Angell pmap support goes here */
10611 } else {
10612 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
10613 old_entry->vme_end - old_entry->vme_start,
10614 old_entry->vme_start);
1c79356b
A
10615 }
10616}
10617
91447636 10618static boolean_t
1c79356b
A
10619vm_map_fork_copy(
10620 vm_map_t old_map,
10621 vm_map_entry_t *old_entry_p,
39037602
A
10622 vm_map_t new_map,
10623 int vm_map_copyin_flags)
1c79356b
A
10624{
10625 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
10626 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10627 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
10628 vm_map_copy_t copy;
10629 vm_map_entry_t last = vm_map_last_entry(new_map);
10630
10631 vm_map_unlock(old_map);
10632 /*
10633 * Use maxprot version of copyin because we
10634 * care about whether this memory can ever
10635 * be accessed, not just whether it's accessible
10636 * right now.
10637 */
39037602
A
10638 vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
10639 if (vm_map_copyin_internal(old_map, start, entry_size,
10640 vm_map_copyin_flags, &copy)
1c79356b
A
10641 != KERN_SUCCESS) {
10642 /*
10643 * The map might have changed while it
10644 * was unlocked, check it again. Skip
10645 * any blank space or permanently
10646 * unreadable region.
10647 */
10648 vm_map_lock(old_map);
10649 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 10650 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
10651 last = last->vme_next;
10652 }
10653 *old_entry_p = last;
10654
10655 /*
10656 * XXX For some error returns, want to
10657 * XXX skip to the next element. Note
10658 * that INVALID_ADDRESS and
10659 * PROTECTION_FAILURE are handled above.
10660 */
10661
10662 return FALSE;
10663 }
10664
10665 /*
10666 * Insert the copy into the new map
10667 */
10668
10669 vm_map_copy_insert(new_map, last, copy);
10670
10671 /*
10672 * Pick up the traversal at the end of
10673 * the copied region.
10674 */
10675
10676 vm_map_lock(old_map);
10677 start += entry_size;
10678 if (! vm_map_lookup_entry(old_map, start, &last)) {
10679 last = last->vme_next;
10680 } else {
2d21ac55
A
10681 if (last->vme_start == start) {
10682 /*
10683 * No need to clip here and we don't
10684 * want to cause any unnecessary
10685 * unnesting...
10686 */
10687 } else {
10688 vm_map_clip_start(old_map, last, start);
10689 }
1c79356b
A
10690 }
10691 *old_entry_p = last;
10692
10693 return TRUE;
10694}
10695
10696/*
10697 * vm_map_fork:
10698 *
10699 * Create and return a new map based on the old
10700 * map, according to the inheritance values on the
39037602 10701 * regions in that map and the options.
1c79356b
A
10702 *
10703 * The source map must not be locked.
10704 */
10705vm_map_t
10706vm_map_fork(
316670eb 10707 ledger_t ledger,
39037602
A
10708 vm_map_t old_map,
10709 int options)
1c79356b 10710{
2d21ac55 10711 pmap_t new_pmap;
1c79356b
A
10712 vm_map_t new_map;
10713 vm_map_entry_t old_entry;
91447636 10714 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
10715 vm_map_entry_t new_entry;
10716 boolean_t src_needs_copy;
10717 boolean_t new_entry_needs_copy;
3e170ce0 10718 boolean_t pmap_is64bit;
39037602
A
10719 int vm_map_copyin_flags;
10720
10721 if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
10722 VM_MAP_FORK_PRESERVE_PURGEABLE)) {
10723 /* unsupported option */
10724 return VM_MAP_NULL;
10725 }
1c79356b 10726
3e170ce0 10727 pmap_is64bit =
b0d623f7 10728#if defined(__i386__) || defined(__x86_64__)
3e170ce0 10729 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
b0d623f7 10730#else
316670eb 10731#error Unknown architecture.
b0d623f7 10732#endif
3e170ce0
A
10733
10734 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
2d21ac55 10735
1c79356b
A
10736 vm_map_reference_swap(old_map);
10737 vm_map_lock(old_map);
10738
10739 new_map = vm_map_create(new_pmap,
2d21ac55
A
10740 old_map->min_offset,
10741 old_map->max_offset,
10742 old_map->hdr.entries_pageable);
39037602 10743 vm_commit_pagezero_status(new_map);
39236c6e
A
10744 /* inherit the parent map's page size */
10745 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 10746 for (
2d21ac55
A
10747 old_entry = vm_map_first_entry(old_map);
10748 old_entry != vm_map_to_entry(old_map);
10749 ) {
1c79356b
A
10750
10751 entry_size = old_entry->vme_end - old_entry->vme_start;
10752
10753 switch (old_entry->inheritance) {
10754 case VM_INHERIT_NONE:
39037602
A
10755 /*
10756 * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
10757 * is not passed or it is backed by a device pager.
10758 */
10759 if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
10760 (!old_entry->is_sub_map &&
10761 VME_OBJECT(old_entry) != NULL &&
10762 VME_OBJECT(old_entry)->pager != NULL &&
10763 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
10764 break;
10765 }
10766 /* FALLTHROUGH */
1c79356b
A
10767
10768 case VM_INHERIT_SHARE:
10769 vm_map_fork_share(old_map, old_entry, new_map);
10770 new_size += entry_size;
10771 break;
10772
10773 case VM_INHERIT_COPY:
10774
10775 /*
10776 * Inline the copy_quickly case;
10777 * upon failure, fall back on call
10778 * to vm_map_fork_copy.
10779 */
10780
10781 if(old_entry->is_sub_map)
10782 break;
9bccf70c 10783 if ((old_entry->wired_count != 0) ||
3e170ce0
A
10784 ((VME_OBJECT(old_entry) != NULL) &&
10785 (VME_OBJECT(old_entry)->true_share))) {
1c79356b
A
10786 goto slow_vm_map_fork_copy;
10787 }
10788
7ddcb079 10789 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b 10790 vm_map_entry_copy(new_entry, old_entry);
fe8ab488
A
10791 if (new_entry->is_sub_map) {
10792 /* clear address space specifics */
10793 new_entry->use_pmap = FALSE;
10794 }
1c79356b
A
10795
10796 if (! vm_object_copy_quickly(
3e170ce0
A
10797 &VME_OBJECT(new_entry),
10798 VME_OFFSET(old_entry),
2d21ac55
A
10799 (old_entry->vme_end -
10800 old_entry->vme_start),
10801 &src_needs_copy,
10802 &new_entry_needs_copy)) {
1c79356b
A
10803 vm_map_entry_dispose(new_map, new_entry);
10804 goto slow_vm_map_fork_copy;
10805 }
10806
10807 /*
10808 * Handle copy-on-write obligations
10809 */
10810
10811 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
10812 vm_prot_t prot;
10813
10814 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55 10815
3e170ce0
A
10816 if (override_nx(old_map, VME_ALIAS(old_entry))
10817 && prot)
0c530ab8 10818 prot |= VM_PROT_EXECUTE;
2d21ac55 10819
1c79356b 10820 vm_object_pmap_protect(
3e170ce0
A
10821 VME_OBJECT(old_entry),
10822 VME_OFFSET(old_entry),
1c79356b 10823 (old_entry->vme_end -
2d21ac55 10824 old_entry->vme_start),
1c79356b 10825 ((old_entry->is_shared
316670eb 10826 || old_map->mapped_in_other_pmaps)
2d21ac55
A
10827 ? PMAP_NULL :
10828 old_map->pmap),
1c79356b 10829 old_entry->vme_start,
0c530ab8 10830 prot);
1c79356b 10831
3e170ce0 10832 assert(old_entry->wired_count == 0);
1c79356b
A
10833 old_entry->needs_copy = TRUE;
10834 }
10835 new_entry->needs_copy = new_entry_needs_copy;
10836
10837 /*
10838 * Insert the entry at the end
10839 * of the map.
10840 */
10841
6d2010ae 10842 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
10843 new_entry);
10844 new_size += entry_size;
10845 break;
10846
10847 slow_vm_map_fork_copy:
39037602
A
10848 vm_map_copyin_flags = 0;
10849 if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
10850 vm_map_copyin_flags |=
10851 VM_MAP_COPYIN_PRESERVE_PURGEABLE;
10852 }
10853 if (vm_map_fork_copy(old_map,
10854 &old_entry,
10855 new_map,
10856 vm_map_copyin_flags)) {
1c79356b
A
10857 new_size += entry_size;
10858 }
10859 continue;
10860 }
10861 old_entry = old_entry->vme_next;
10862 }
10863
fe8ab488 10864
1c79356b
A
10865 new_map->size = new_size;
10866 vm_map_unlock(old_map);
10867 vm_map_deallocate(old_map);
10868
10869 return(new_map);
10870}
10871
2d21ac55
A
10872/*
10873 * vm_map_exec:
10874 *
10875 * Setup the "new_map" with the proper execution environment according
10876 * to the type of executable (platform, 64bit, chroot environment).
10877 * Map the comm page and shared region, etc...
10878 */
10879kern_return_t
10880vm_map_exec(
10881 vm_map_t new_map,
10882 task_t task,
39037602 10883 boolean_t is64bit,
2d21ac55
A
10884 void *fsroot,
10885 cpu_type_t cpu)
10886{
10887 SHARED_REGION_TRACE_DEBUG(
10888 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
fe8ab488
A
10889 (void *)VM_KERNEL_ADDRPERM(current_task()),
10890 (void *)VM_KERNEL_ADDRPERM(new_map),
10891 (void *)VM_KERNEL_ADDRPERM(task),
10892 (void *)VM_KERNEL_ADDRPERM(fsroot),
10893 cpu));
39037602
A
10894 (void) vm_commpage_enter(new_map, task, is64bit);
10895 (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
2d21ac55
A
10896 SHARED_REGION_TRACE_DEBUG(
10897 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
fe8ab488
A
10898 (void *)VM_KERNEL_ADDRPERM(current_task()),
10899 (void *)VM_KERNEL_ADDRPERM(new_map),
10900 (void *)VM_KERNEL_ADDRPERM(task),
10901 (void *)VM_KERNEL_ADDRPERM(fsroot),
10902 cpu));
2d21ac55
A
10903 return KERN_SUCCESS;
10904}
1c79356b
A
10905
10906/*
10907 * vm_map_lookup_locked:
10908 *
10909 * Finds the VM object, offset, and
10910 * protection for a given virtual address in the
10911 * specified map, assuming a page fault of the
10912 * type specified.
10913 *
10914 * Returns the (object, offset, protection) for
10915 * this address, whether it is wired down, and whether
10916 * this map has the only reference to the data in question.
10917 * In order to later verify this lookup, a "version"
10918 * is returned.
10919 *
10920 * The map MUST be locked by the caller and WILL be
10921 * locked on exit. In order to guarantee the
10922 * existence of the returned object, it is returned
10923 * locked.
10924 *
10925 * If a lookup is requested with "write protection"
10926 * specified, the map may be changed to perform virtual
10927 * copying operations, although the data referenced will
10928 * remain the same.
10929 */
10930kern_return_t
10931vm_map_lookup_locked(
10932 vm_map_t *var_map, /* IN/OUT */
2d21ac55 10933 vm_map_offset_t vaddr,
91447636 10934 vm_prot_t fault_type,
2d21ac55 10935 int object_lock_type,
1c79356b
A
10936 vm_map_version_t *out_version, /* OUT */
10937 vm_object_t *object, /* OUT */
10938 vm_object_offset_t *offset, /* OUT */
10939 vm_prot_t *out_prot, /* OUT */
10940 boolean_t *wired, /* OUT */
2d21ac55 10941 vm_object_fault_info_t fault_info, /* OUT */
91447636 10942 vm_map_t *real_map)
1c79356b
A
10943{
10944 vm_map_entry_t entry;
39037602 10945 vm_map_t map = *var_map;
1c79356b
A
10946 vm_map_t old_map = *var_map;
10947 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
10948 vm_map_offset_t cow_parent_vaddr = 0;
10949 vm_map_offset_t old_start = 0;
10950 vm_map_offset_t old_end = 0;
39037602 10951 vm_prot_t prot;
6d2010ae 10952 boolean_t mask_protections;
fe8ab488 10953 boolean_t force_copy;
6d2010ae
A
10954 vm_prot_t original_fault_type;
10955
10956 /*
10957 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10958 * as a mask against the mapping's actual protections, not as an
10959 * absolute value.
10960 */
10961 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
fe8ab488
A
10962 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10963 fault_type &= VM_PROT_ALL;
6d2010ae 10964 original_fault_type = fault_type;
1c79356b 10965
91447636 10966 *real_map = map;
6d2010ae
A
10967
10968RetryLookup:
10969 fault_type = original_fault_type;
1c79356b
A
10970
10971 /*
10972 * If the map has an interesting hint, try it before calling
10973 * full blown lookup routine.
10974 */
1c79356b 10975 entry = map->hint;
1c79356b
A
10976
10977 if ((entry == vm_map_to_entry(map)) ||
10978 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10979 vm_map_entry_t tmp_entry;
10980
10981 /*
10982 * Entry was either not a valid hint, or the vaddr
10983 * was not contained in the entry, so do a full lookup.
10984 */
10985 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10986 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10987 vm_map_unlock(cow_sub_map_parent);
91447636 10988 if((*real_map != map)
2d21ac55 10989 && (*real_map != cow_sub_map_parent))
91447636 10990 vm_map_unlock(*real_map);
1c79356b
A
10991 return KERN_INVALID_ADDRESS;
10992 }
10993
10994 entry = tmp_entry;
10995 }
10996 if(map == old_map) {
10997 old_start = entry->vme_start;
10998 old_end = entry->vme_end;
10999 }
11000
11001 /*
11002 * Handle submaps. Drop lock on upper map, submap is
11003 * returned locked.
11004 */
11005
11006submap_recurse:
11007 if (entry->is_sub_map) {
91447636
A
11008 vm_map_offset_t local_vaddr;
11009 vm_map_offset_t end_delta;
11010 vm_map_offset_t start_delta;
1c79356b
A
11011 vm_map_entry_t submap_entry;
11012 boolean_t mapped_needs_copy=FALSE;
11013
11014 local_vaddr = vaddr;
11015
39037602
A
11016 if ((entry->use_pmap &&
11017 ! ((fault_type & VM_PROT_WRITE) ||
11018 force_copy))) {
91447636
A
11019 /* if real_map equals map we unlock below */
11020 if ((*real_map != map) &&
2d21ac55 11021 (*real_map != cow_sub_map_parent))
91447636 11022 vm_map_unlock(*real_map);
3e170ce0 11023 *real_map = VME_SUBMAP(entry);
1c79356b
A
11024 }
11025
39037602
A
11026 if(entry->needs_copy &&
11027 ((fault_type & VM_PROT_WRITE) ||
11028 force_copy)) {
1c79356b
A
11029 if (!mapped_needs_copy) {
11030 if (vm_map_lock_read_to_write(map)) {
11031 vm_map_lock_read(map);
99c3a104 11032 *real_map = map;
1c79356b
A
11033 goto RetryLookup;
11034 }
3e170ce0
A
11035 vm_map_lock_read(VME_SUBMAP(entry));
11036 *var_map = VME_SUBMAP(entry);
1c79356b
A
11037 cow_sub_map_parent = map;
11038 /* reset base to map before cow object */
11039 /* this is the map which will accept */
11040 /* the new cow object */
11041 old_start = entry->vme_start;
11042 old_end = entry->vme_end;
11043 cow_parent_vaddr = vaddr;
11044 mapped_needs_copy = TRUE;
11045 } else {
3e170ce0
A
11046 vm_map_lock_read(VME_SUBMAP(entry));
11047 *var_map = VME_SUBMAP(entry);
1c79356b 11048 if((cow_sub_map_parent != map) &&
2d21ac55 11049 (*real_map != map))
1c79356b
A
11050 vm_map_unlock(map);
11051 }
11052 } else {
3e170ce0
A
11053 vm_map_lock_read(VME_SUBMAP(entry));
11054 *var_map = VME_SUBMAP(entry);
1c79356b
A
11055 /* leave map locked if it is a target */
11056 /* cow sub_map above otherwise, just */
11057 /* follow the maps down to the object */
11058 /* here we unlock knowing we are not */
11059 /* revisiting the map. */
91447636 11060 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
11061 vm_map_unlock_read(map);
11062 }
11063
99c3a104 11064 map = *var_map;
1c79356b
A
11065
11066 /* calculate the offset in the submap for vaddr */
3e170ce0 11067 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
1c79356b 11068
2d21ac55 11069 RetrySubMap:
1c79356b
A
11070 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
11071 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
11072 vm_map_unlock(cow_sub_map_parent);
11073 }
91447636 11074 if((*real_map != map)
2d21ac55 11075 && (*real_map != cow_sub_map_parent)) {
91447636 11076 vm_map_unlock(*real_map);
1c79356b 11077 }
91447636 11078 *real_map = map;
1c79356b
A
11079 return KERN_INVALID_ADDRESS;
11080 }
2d21ac55 11081
1c79356b
A
11082 /* find the attenuated shadow of the underlying object */
11083 /* on our target map */
11084
11085 /* in english the submap object may extend beyond the */
11086 /* region mapped by the entry or, may only fill a portion */
11087 /* of it. For our purposes, we only care if the object */
11088 /* doesn't fill. In this case the area which will */
11089 /* ultimately be clipped in the top map will only need */
11090 /* to be as big as the portion of the underlying entry */
11091 /* which is mapped */
3e170ce0
A
11092 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
11093 submap_entry->vme_start - VME_OFFSET(entry) : 0;
1c79356b
A
11094
11095 end_delta =
3e170ce0 11096 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
1c79356b 11097 submap_entry->vme_end ?
3e170ce0 11098 0 : (VME_OFFSET(entry) +
2d21ac55
A
11099 (old_end - old_start))
11100 - submap_entry->vme_end;
1c79356b
A
11101
11102 old_start += start_delta;
11103 old_end -= end_delta;
11104
11105 if(submap_entry->is_sub_map) {
11106 entry = submap_entry;
11107 vaddr = local_vaddr;
11108 goto submap_recurse;
11109 }
11110
39037602
A
11111 if (((fault_type & VM_PROT_WRITE) ||
11112 force_copy)
11113 && cow_sub_map_parent) {
1c79356b 11114
2d21ac55
A
11115 vm_object_t sub_object, copy_object;
11116 vm_object_offset_t copy_offset;
91447636
A
11117 vm_map_offset_t local_start;
11118 vm_map_offset_t local_end;
0b4e3aa0 11119 boolean_t copied_slowly = FALSE;
1c79356b
A
11120
11121 if (vm_map_lock_read_to_write(map)) {
11122 vm_map_lock_read(map);
11123 old_start -= start_delta;
11124 old_end += end_delta;
11125 goto RetrySubMap;
11126 }
0b4e3aa0
A
11127
11128
3e170ce0 11129 sub_object = VME_OBJECT(submap_entry);
2d21ac55
A
11130 if (sub_object == VM_OBJECT_NULL) {
11131 sub_object =
1c79356b 11132 vm_object_allocate(
91447636 11133 (vm_map_size_t)
2d21ac55
A
11134 (submap_entry->vme_end -
11135 submap_entry->vme_start));
3e170ce0
A
11136 VME_OBJECT_SET(submap_entry, sub_object);
11137 VME_OFFSET_SET(submap_entry, 0);
1c79356b
A
11138 }
11139 local_start = local_vaddr -
2d21ac55 11140 (cow_parent_vaddr - old_start);
1c79356b 11141 local_end = local_vaddr +
2d21ac55 11142 (old_end - cow_parent_vaddr);
1c79356b
A
11143 vm_map_clip_start(map, submap_entry, local_start);
11144 vm_map_clip_end(map, submap_entry, local_end);
fe8ab488
A
11145 if (submap_entry->is_sub_map) {
11146 /* unnesting was done when clipping */
11147 assert(!submap_entry->use_pmap);
11148 }
1c79356b
A
11149
11150 /* This is the COW case, lets connect */
11151 /* an entry in our space to the underlying */
11152 /* object in the submap, bypassing the */
11153 /* submap. */
0b4e3aa0
A
11154
11155
2d21ac55 11156 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
11157 (sub_object->copy_strategy ==
11158 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
11159 vm_object_lock(sub_object);
11160 vm_object_copy_slowly(sub_object,
3e170ce0 11161 VME_OFFSET(submap_entry),
2d21ac55
A
11162 (submap_entry->vme_end -
11163 submap_entry->vme_start),
11164 FALSE,
11165 &copy_object);
11166 copied_slowly = TRUE;
0b4e3aa0 11167 } else {
2d21ac55 11168
0b4e3aa0 11169 /* set up shadow object */
2d21ac55 11170 copy_object = sub_object;
39037602
A
11171 vm_object_lock(sub_object);
11172 vm_object_reference_locked(sub_object);
2d21ac55 11173 sub_object->shadowed = TRUE;
39037602
A
11174 vm_object_unlock(sub_object);
11175
3e170ce0 11176 assert(submap_entry->wired_count == 0);
0b4e3aa0 11177 submap_entry->needs_copy = TRUE;
0c530ab8
A
11178
11179 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55 11180
3e170ce0
A
11181 if (override_nx(old_map,
11182 VME_ALIAS(submap_entry))
11183 && prot)
0c530ab8 11184 prot |= VM_PROT_EXECUTE;
2d21ac55 11185
0b4e3aa0 11186 vm_object_pmap_protect(
2d21ac55 11187 sub_object,
3e170ce0 11188 VME_OFFSET(submap_entry),
1c79356b 11189 submap_entry->vme_end -
2d21ac55 11190 submap_entry->vme_start,
9bccf70c 11191 (submap_entry->is_shared
316670eb 11192 || map->mapped_in_other_pmaps) ?
2d21ac55 11193 PMAP_NULL : map->pmap,
1c79356b 11194 submap_entry->vme_start,
0c530ab8 11195 prot);
0b4e3aa0 11196 }
1c79356b 11197
2d21ac55
A
11198 /*
11199 * Adjust the fault offset to the submap entry.
11200 */
11201 copy_offset = (local_vaddr -
11202 submap_entry->vme_start +
3e170ce0 11203 VME_OFFSET(submap_entry));
1c79356b
A
11204
11205 /* This works diffently than the */
11206 /* normal submap case. We go back */
11207 /* to the parent of the cow map and*/
11208 /* clip out the target portion of */
11209 /* the sub_map, substituting the */
11210 /* new copy object, */
11211
11212 vm_map_unlock(map);
11213 local_start = old_start;
11214 local_end = old_end;
11215 map = cow_sub_map_parent;
11216 *var_map = cow_sub_map_parent;
11217 vaddr = cow_parent_vaddr;
11218 cow_sub_map_parent = NULL;
11219
2d21ac55
A
11220 if(!vm_map_lookup_entry(map,
11221 vaddr, &entry)) {
11222 vm_object_deallocate(
11223 copy_object);
11224 vm_map_lock_write_to_read(map);
11225 return KERN_INVALID_ADDRESS;
11226 }
11227
11228 /* clip out the portion of space */
11229 /* mapped by the sub map which */
11230 /* corresponds to the underlying */
11231 /* object */
11232
11233 /*
11234 * Clip (and unnest) the smallest nested chunk
11235 * possible around the faulting address...
11236 */
11237 local_start = vaddr & ~(pmap_nesting_size_min - 1);
11238 local_end = local_start + pmap_nesting_size_min;
11239 /*
11240 * ... but don't go beyond the "old_start" to "old_end"
11241 * range, to avoid spanning over another VM region
11242 * with a possibly different VM object and/or offset.
11243 */
11244 if (local_start < old_start) {
11245 local_start = old_start;
11246 }
11247 if (local_end > old_end) {
11248 local_end = old_end;
11249 }
11250 /*
11251 * Adjust copy_offset to the start of the range.
11252 */
11253 copy_offset -= (vaddr - local_start);
11254
1c79356b
A
11255 vm_map_clip_start(map, entry, local_start);
11256 vm_map_clip_end(map, entry, local_end);
fe8ab488
A
11257 if (entry->is_sub_map) {
11258 /* unnesting was done when clipping */
11259 assert(!entry->use_pmap);
11260 }
1c79356b
A
11261
11262 /* substitute copy object for */
11263 /* shared map entry */
3e170ce0 11264 vm_map_deallocate(VME_SUBMAP(entry));
fe8ab488 11265 assert(!entry->iokit_acct);
1c79356b 11266 entry->is_sub_map = FALSE;
fe8ab488 11267 entry->use_pmap = TRUE;
3e170ce0 11268 VME_OBJECT_SET(entry, copy_object);
1c79356b 11269
2d21ac55
A
11270 /* propagate the submap entry's protections */
11271 entry->protection |= submap_entry->protection;
11272 entry->max_protection |= submap_entry->max_protection;
11273
0b4e3aa0 11274 if(copied_slowly) {
3e170ce0 11275 VME_OFFSET_SET(entry, local_start - old_start);
0b4e3aa0
A
11276 entry->needs_copy = FALSE;
11277 entry->is_shared = FALSE;
11278 } else {
3e170ce0
A
11279 VME_OFFSET_SET(entry, copy_offset);
11280 assert(entry->wired_count == 0);
0b4e3aa0
A
11281 entry->needs_copy = TRUE;
11282 if(entry->inheritance == VM_INHERIT_SHARE)
11283 entry->inheritance = VM_INHERIT_COPY;
11284 if (map != old_map)
11285 entry->is_shared = TRUE;
11286 }
1c79356b 11287 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 11288 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
11289
11290 vm_map_lock_write_to_read(map);
11291 } else {
11292 if((cow_sub_map_parent)
2d21ac55
A
11293 && (cow_sub_map_parent != *real_map)
11294 && (cow_sub_map_parent != map)) {
1c79356b
A
11295 vm_map_unlock(cow_sub_map_parent);
11296 }
11297 entry = submap_entry;
11298 vaddr = local_vaddr;
11299 }
11300 }
11301
11302 /*
11303 * Check whether this task is allowed to have
11304 * this page.
11305 */
2d21ac55 11306
6601e61a 11307 prot = entry->protection;
0c530ab8 11308
3e170ce0 11309 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
0c530ab8 11310 /*
2d21ac55 11311 * HACK -- if not a stack, then allow execution
0c530ab8
A
11312 */
11313 prot |= VM_PROT_EXECUTE;
2d21ac55
A
11314 }
11315
6d2010ae
A
11316 if (mask_protections) {
11317 fault_type &= prot;
11318 if (fault_type == VM_PROT_NONE) {
11319 goto protection_failure;
11320 }
11321 }
39037602
A
11322 if (((fault_type & prot) != fault_type)
11323 ) {
6d2010ae 11324 protection_failure:
2d21ac55
A
11325 if (*real_map != map) {
11326 vm_map_unlock(*real_map);
0c530ab8
A
11327 }
11328 *real_map = map;
11329
11330 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 11331 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 11332
2d21ac55 11333 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 11334 return KERN_PROTECTION_FAILURE;
1c79356b
A
11335 }
11336
11337 /*
11338 * If this page is not pageable, we have to get
11339 * it for all possible accesses.
11340 */
11341
91447636
A
11342 *wired = (entry->wired_count != 0);
11343 if (*wired)
0c530ab8 11344 fault_type = prot;
1c79356b
A
11345
11346 /*
11347 * If the entry was copy-on-write, we either ...
11348 */
11349
11350 if (entry->needs_copy) {
11351 /*
11352 * If we want to write the page, we may as well
11353 * handle that now since we've got the map locked.
11354 *
11355 * If we don't need to write the page, we just
11356 * demote the permissions allowed.
11357 */
11358
fe8ab488 11359 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
1c79356b
A
11360 /*
11361 * Make a new object, and place it in the
11362 * object chain. Note that no new references
11363 * have appeared -- one just moved from the
11364 * map to the new object.
11365 */
11366
11367 if (vm_map_lock_read_to_write(map)) {
11368 vm_map_lock_read(map);
11369 goto RetryLookup;
11370 }
39037602
A
11371
11372 if (VME_OBJECT(entry)->shadowed == FALSE) {
11373 vm_object_lock(VME_OBJECT(entry));
11374 VME_OBJECT(entry)->shadowed = TRUE;
11375 vm_object_unlock(VME_OBJECT(entry));
11376 }
3e170ce0
A
11377 VME_OBJECT_SHADOW(entry,
11378 (vm_map_size_t) (entry->vme_end -
11379 entry->vme_start));
1c79356b 11380 entry->needs_copy = FALSE;
39037602 11381
1c79356b
A
11382 vm_map_lock_write_to_read(map);
11383 }
39037602 11384 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
1c79356b
A
11385 /*
11386 * We're attempting to read a copy-on-write
11387 * page -- don't allow writes.
11388 */
11389
11390 prot &= (~VM_PROT_WRITE);
11391 }
11392 }
11393
11394 /*
11395 * Create an object if necessary.
11396 */
3e170ce0 11397 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
1c79356b
A
11398
11399 if (vm_map_lock_read_to_write(map)) {
11400 vm_map_lock_read(map);
11401 goto RetryLookup;
11402 }
11403
3e170ce0
A
11404 VME_OBJECT_SET(entry,
11405 vm_object_allocate(
11406 (vm_map_size_t)(entry->vme_end -
11407 entry->vme_start)));
11408 VME_OFFSET_SET(entry, 0);
1c79356b
A
11409 vm_map_lock_write_to_read(map);
11410 }
11411
11412 /*
11413 * Return the object/offset from this entry. If the entry
11414 * was copy-on-write or empty, it has been fixed up. Also
11415 * return the protection.
11416 */
11417
3e170ce0
A
11418 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11419 *object = VME_OBJECT(entry);
1c79356b 11420 *out_prot = prot;
2d21ac55
A
11421
11422 if (fault_info) {
11423 fault_info->interruptible = THREAD_UNINT; /* for now... */
11424 /* ... the caller will change "interruptible" if needed */
11425 fault_info->cluster_size = 0;
3e170ce0 11426 fault_info->user_tag = VME_ALIAS(entry);
fe8ab488
A
11427 fault_info->pmap_options = 0;
11428 if (entry->iokit_acct ||
11429 (!entry->is_sub_map && !entry->use_pmap)) {
11430 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11431 }
2d21ac55 11432 fault_info->behavior = entry->behavior;
3e170ce0
A
11433 fault_info->lo_offset = VME_OFFSET(entry);
11434 fault_info->hi_offset =
11435 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
2d21ac55 11436 fault_info->no_cache = entry->no_cache;
b0d623f7 11437 fault_info->stealth = FALSE;
6d2010ae 11438 fault_info->io_sync = FALSE;
3e170ce0
A
11439 if (entry->used_for_jit ||
11440 entry->vme_resilient_codesign) {
11441 fault_info->cs_bypass = TRUE;
11442 } else {
11443 fault_info->cs_bypass = FALSE;
11444 }
0b4c1975 11445 fault_info->mark_zf_absent = FALSE;
316670eb 11446 fault_info->batch_pmap_op = FALSE;
2d21ac55 11447 }
1c79356b
A
11448
11449 /*
11450 * Lock the object to prevent it from disappearing
11451 */
2d21ac55
A
11452 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11453 vm_object_lock(*object);
11454 else
11455 vm_object_lock_shared(*object);
11456
1c79356b
A
11457 /*
11458 * Save the version number
11459 */
11460
11461 out_version->main_timestamp = map->timestamp;
11462
11463 return KERN_SUCCESS;
11464}
11465
11466
11467/*
11468 * vm_map_verify:
11469 *
11470 * Verifies that the map in question has not changed
11471 * since the given version. If successful, the map
11472 * will not change until vm_map_verify_done() is called.
11473 */
11474boolean_t
11475vm_map_verify(
39037602
A
11476 vm_map_t map,
11477 vm_map_version_t *version) /* REF */
1c79356b
A
11478{
11479 boolean_t result;
11480
11481 vm_map_lock_read(map);
11482 result = (map->timestamp == version->main_timestamp);
11483
11484 if (!result)
11485 vm_map_unlock_read(map);
11486
11487 return(result);
11488}
11489
11490/*
11491 * vm_map_verify_done:
11492 *
11493 * Releases locks acquired by a vm_map_verify.
11494 *
11495 * This is now a macro in vm/vm_map.h. It does a
11496 * vm_map_unlock_read on the map.
11497 */
11498
11499
91447636
A
11500/*
11501 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11502 * Goes away after regular vm_region_recurse function migrates to
11503 * 64 bits
11504 * vm_region_recurse: A form of vm_region which follows the
11505 * submaps in a target map
11506 *
11507 */
11508
39037602
A
11509#if DEVELOPMENT || DEBUG
11510int vm_region_footprint = 0;
11511#endif /* DEVELOPMENT || DEBUG */
11512
91447636
A
11513kern_return_t
11514vm_map_region_recurse_64(
11515 vm_map_t map,
11516 vm_map_offset_t *address, /* IN/OUT */
11517 vm_map_size_t *size, /* OUT */
11518 natural_t *nesting_depth, /* IN/OUT */
11519 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11520 mach_msg_type_number_t *count) /* IN/OUT */
11521{
39236c6e 11522 mach_msg_type_number_t original_count;
91447636
A
11523 vm_region_extended_info_data_t extended;
11524 vm_map_entry_t tmp_entry;
11525 vm_map_offset_t user_address;
11526 unsigned int user_max_depth;
11527
11528 /*
11529 * "curr_entry" is the VM map entry preceding or including the
11530 * address we're looking for.
11531 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
11532 * "curr_address" is the equivalent of the top map's "user_address"
11533 * in the current map.
91447636
A
11534 * "curr_offset" is the cumulated offset of "curr_map" in the
11535 * target task's address space.
11536 * "curr_depth" is the depth of "curr_map" in the chain of
11537 * sub-maps.
6d2010ae
A
11538 *
11539 * "curr_max_below" and "curr_max_above" limit the range (around
11540 * "curr_address") we should take into account in the current (sub)map.
11541 * They limit the range to what's visible through the map entries
11542 * we've traversed from the top map to the current map.
11543
91447636
A
11544 */
11545 vm_map_entry_t curr_entry;
6d2010ae 11546 vm_map_address_t curr_address;
91447636
A
11547 vm_map_offset_t curr_offset;
11548 vm_map_t curr_map;
11549 unsigned int curr_depth;
6d2010ae
A
11550 vm_map_offset_t curr_max_below, curr_max_above;
11551 vm_map_offset_t curr_skip;
91447636
A
11552
11553 /*
11554 * "next_" is the same as "curr_" but for the VM region immediately
11555 * after the address we're looking for. We need to keep track of this
11556 * too because we want to return info about that region if the
11557 * address we're looking for is not mapped.
11558 */
11559 vm_map_entry_t next_entry;
11560 vm_map_offset_t next_offset;
6d2010ae 11561 vm_map_offset_t next_address;
91447636
A
11562 vm_map_t next_map;
11563 unsigned int next_depth;
6d2010ae
A
11564 vm_map_offset_t next_max_below, next_max_above;
11565 vm_map_offset_t next_skip;
91447636 11566
2d21ac55
A
11567 boolean_t look_for_pages;
11568 vm_region_submap_short_info_64_t short_info;
11569
91447636
A
11570 if (map == VM_MAP_NULL) {
11571 /* no address space to work on */
11572 return KERN_INVALID_ARGUMENT;
11573 }
11574
39236c6e
A
11575
11576 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11577 /*
11578 * "info" structure is not big enough and
11579 * would overflow
11580 */
11581 return KERN_INVALID_ARGUMENT;
11582 }
11583
11584 original_count = *count;
11585
11586 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11587 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11588 look_for_pages = FALSE;
11589 short_info = (vm_region_submap_short_info_64_t) submap_info;
11590 submap_info = NULL;
2d21ac55
A
11591 } else {
11592 look_for_pages = TRUE;
39236c6e 11593 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 11594 short_info = NULL;
39236c6e
A
11595
11596 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11597 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11598 }
91447636 11599 }
39236c6e 11600
91447636
A
11601 user_address = *address;
11602 user_max_depth = *nesting_depth;
11603
3e170ce0
A
11604 if (not_in_kdp) {
11605 vm_map_lock_read(map);
11606 }
11607
11608recurse_again:
91447636
A
11609 curr_entry = NULL;
11610 curr_map = map;
6d2010ae 11611 curr_address = user_address;
91447636 11612 curr_offset = 0;
6d2010ae 11613 curr_skip = 0;
91447636 11614 curr_depth = 0;
6d2010ae
A
11615 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11616 curr_max_below = curr_address;
91447636
A
11617
11618 next_entry = NULL;
11619 next_map = NULL;
6d2010ae 11620 next_address = 0;
91447636 11621 next_offset = 0;
6d2010ae 11622 next_skip = 0;
91447636 11623 next_depth = 0;
6d2010ae
A
11624 next_max_above = (vm_map_offset_t) -1;
11625 next_max_below = (vm_map_offset_t) -1;
91447636 11626
91447636
A
11627 for (;;) {
11628 if (vm_map_lookup_entry(curr_map,
6d2010ae 11629 curr_address,
91447636
A
11630 &tmp_entry)) {
11631 /* tmp_entry contains the address we're looking for */
11632 curr_entry = tmp_entry;
11633 } else {
6d2010ae 11634 vm_map_offset_t skip;
91447636
A
11635 /*
11636 * The address is not mapped. "tmp_entry" is the
11637 * map entry preceding the address. We want the next
11638 * one, if it exists.
11639 */
11640 curr_entry = tmp_entry->vme_next;
6d2010ae 11641
91447636 11642 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
11643 (curr_entry->vme_start >=
11644 curr_address + curr_max_above)) {
91447636
A
11645 /* no next entry at this level: stop looking */
11646 if (not_in_kdp) {
11647 vm_map_unlock_read(curr_map);
11648 }
11649 curr_entry = NULL;
11650 curr_map = NULL;
3e170ce0 11651 curr_skip = 0;
91447636
A
11652 curr_offset = 0;
11653 curr_depth = 0;
6d2010ae
A
11654 curr_max_above = 0;
11655 curr_max_below = 0;
91447636
A
11656 break;
11657 }
6d2010ae
A
11658
11659 /* adjust current address and offset */
11660 skip = curr_entry->vme_start - curr_address;
11661 curr_address = curr_entry->vme_start;
3e170ce0 11662 curr_skip += skip;
6d2010ae
A
11663 curr_offset += skip;
11664 curr_max_above -= skip;
11665 curr_max_below = 0;
91447636
A
11666 }
11667
11668 /*
11669 * Is the next entry at this level closer to the address (or
11670 * deeper in the submap chain) than the one we had
11671 * so far ?
11672 */
11673 tmp_entry = curr_entry->vme_next;
11674 if (tmp_entry == vm_map_to_entry(curr_map)) {
11675 /* no next entry at this level */
6d2010ae
A
11676 } else if (tmp_entry->vme_start >=
11677 curr_address + curr_max_above) {
91447636
A
11678 /*
11679 * tmp_entry is beyond the scope of what we mapped of
11680 * this submap in the upper level: ignore it.
11681 */
11682 } else if ((next_entry == NULL) ||
11683 (tmp_entry->vme_start + curr_offset <=
11684 next_entry->vme_start + next_offset)) {
11685 /*
11686 * We didn't have a "next_entry" or this one is
11687 * closer to the address we're looking for:
11688 * use this "tmp_entry" as the new "next_entry".
11689 */
11690 if (next_entry != NULL) {
11691 /* unlock the last "next_map" */
11692 if (next_map != curr_map && not_in_kdp) {
11693 vm_map_unlock_read(next_map);
11694 }
11695 }
11696 next_entry = tmp_entry;
11697 next_map = curr_map;
91447636 11698 next_depth = curr_depth;
6d2010ae
A
11699 next_address = next_entry->vme_start;
11700 next_skip = curr_skip;
3e170ce0 11701 next_skip += (next_address - curr_address);
6d2010ae
A
11702 next_offset = curr_offset;
11703 next_offset += (next_address - curr_address);
11704 next_max_above = MIN(next_max_above, curr_max_above);
11705 next_max_above = MIN(next_max_above,
11706 next_entry->vme_end - next_address);
11707 next_max_below = MIN(next_max_below, curr_max_below);
11708 next_max_below = MIN(next_max_below,
11709 next_address - next_entry->vme_start);
91447636
A
11710 }
11711
6d2010ae
A
11712 /*
11713 * "curr_max_{above,below}" allow us to keep track of the
11714 * portion of the submap that is actually mapped at this level:
11715 * the rest of that submap is irrelevant to us, since it's not
11716 * mapped here.
11717 * The relevant portion of the map starts at
3e170ce0 11718 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
6d2010ae
A
11719 */
11720 curr_max_above = MIN(curr_max_above,
11721 curr_entry->vme_end - curr_address);
11722 curr_max_below = MIN(curr_max_below,
11723 curr_address - curr_entry->vme_start);
11724
91447636
A
11725 if (!curr_entry->is_sub_map ||
11726 curr_depth >= user_max_depth) {
11727 /*
11728 * We hit a leaf map or we reached the maximum depth
11729 * we could, so stop looking. Keep the current map
11730 * locked.
11731 */
11732 break;
11733 }
11734
11735 /*
11736 * Get down to the next submap level.
11737 */
11738
11739 /*
11740 * Lock the next level and unlock the current level,
11741 * unless we need to keep it locked to access the "next_entry"
11742 * later.
11743 */
11744 if (not_in_kdp) {
3e170ce0 11745 vm_map_lock_read(VME_SUBMAP(curr_entry));
91447636
A
11746 }
11747 if (curr_map == next_map) {
11748 /* keep "next_map" locked in case we need it */
11749 } else {
11750 /* release this map */
b0d623f7
A
11751 if (not_in_kdp)
11752 vm_map_unlock_read(curr_map);
91447636
A
11753 }
11754
11755 /*
11756 * Adjust the offset. "curr_entry" maps the submap
11757 * at relative address "curr_entry->vme_start" in the
3e170ce0 11758 * curr_map but skips the first "VME_OFFSET(curr_entry)"
91447636
A
11759 * bytes of the submap.
11760 * "curr_offset" always represents the offset of a virtual
11761 * address in the curr_map relative to the absolute address
11762 * space (i.e. the top-level VM map).
11763 */
11764 curr_offset +=
3e170ce0 11765 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
6d2010ae 11766 curr_address = user_address + curr_offset;
91447636 11767 /* switch to the submap */
3e170ce0 11768 curr_map = VME_SUBMAP(curr_entry);
91447636 11769 curr_depth++;
91447636
A
11770 curr_entry = NULL;
11771 }
11772
11773 if (curr_entry == NULL) {
11774 /* no VM region contains the address... */
39037602
A
11775#if DEVELOPMENT || DEBUG
11776 if (vm_region_footprint && /* we want footprint numbers */
11777 look_for_pages && /* & we want page counts */
11778 next_entry == NULL && /* & there are no more regions */
11779 /* & we haven't already provided our fake region: */
11780 user_address == vm_map_last_entry(map)->vme_end) {
11781 ledger_amount_t nonvol, nonvol_compressed;
11782 /*
11783 * Add a fake memory region to account for
11784 * purgeable memory that counts towards this
11785 * task's memory footprint, i.e. the resident
11786 * compressed pages of non-volatile objects
11787 * owned by that task.
11788 */
11789 ledger_get_balance(
11790 map->pmap->ledger,
11791 task_ledgers.purgeable_nonvolatile,
11792 &nonvol);
11793 ledger_get_balance(
11794 map->pmap->ledger,
11795 task_ledgers.purgeable_nonvolatile_compressed,
11796 &nonvol_compressed);
11797 if (nonvol + nonvol_compressed == 0) {
11798 /* no purgeable memory usage to report */
11799 return KERN_FAILURE;
11800 }
11801 /* fake region to show nonvolatile footprint */
11802 submap_info->protection = VM_PROT_DEFAULT;
11803 submap_info->max_protection = VM_PROT_DEFAULT;
11804 submap_info->inheritance = VM_INHERIT_DEFAULT;
11805 submap_info->offset = 0;
11806 submap_info->user_tag = 0;
11807 submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
11808 submap_info->pages_shared_now_private = 0;
11809 submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
11810 submap_info->pages_dirtied = submap_info->pages_resident;
11811 submap_info->ref_count = 1;
11812 submap_info->shadow_depth = 0;
11813 submap_info->external_pager = 0;
11814 submap_info->share_mode = SM_PRIVATE;
11815 submap_info->is_submap = 0;
11816 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
11817 submap_info->object_id = 0x11111111;
11818 submap_info->user_wired_count = 0;
11819 submap_info->pages_reusable = 0;
11820 *nesting_depth = 0;
11821 *size = (vm_map_size_t) (nonvol + nonvol_compressed);
11822 *address = user_address;
11823 return KERN_SUCCESS;
11824 }
11825#endif /* DEVELOPMENT || DEBUG */
91447636
A
11826 if (next_entry == NULL) {
11827 /* ... and no VM region follows it either */
11828 return KERN_INVALID_ADDRESS;
11829 }
11830 /* ... gather info about the next VM region */
11831 curr_entry = next_entry;
11832 curr_map = next_map; /* still locked ... */
6d2010ae
A
11833 curr_address = next_address;
11834 curr_skip = next_skip;
91447636
A
11835 curr_offset = next_offset;
11836 curr_depth = next_depth;
6d2010ae
A
11837 curr_max_above = next_max_above;
11838 curr_max_below = next_max_below;
91447636
A
11839 } else {
11840 /* we won't need "next_entry" after all */
11841 if (next_entry != NULL) {
11842 /* release "next_map" */
11843 if (next_map != curr_map && not_in_kdp) {
11844 vm_map_unlock_read(next_map);
11845 }
11846 }
11847 }
11848 next_entry = NULL;
11849 next_map = NULL;
11850 next_offset = 0;
6d2010ae 11851 next_skip = 0;
91447636 11852 next_depth = 0;
6d2010ae
A
11853 next_max_below = -1;
11854 next_max_above = -1;
91447636 11855
3e170ce0
A
11856 if (curr_entry->is_sub_map &&
11857 curr_depth < user_max_depth) {
11858 /*
11859 * We're not as deep as we could be: we must have
11860 * gone back up after not finding anything mapped
11861 * below the original top-level map entry's.
11862 * Let's move "curr_address" forward and recurse again.
11863 */
11864 user_address = curr_address;
11865 goto recurse_again;
11866 }
11867
91447636 11868 *nesting_depth = curr_depth;
6d2010ae
A
11869 *size = curr_max_above + curr_max_below;
11870 *address = user_address + curr_skip - curr_max_below;
91447636 11871
b0d623f7
A
11872// LP64todo: all the current tools are 32bit, obviously never worked for 64b
11873// so probably should be a real 32b ID vs. ptr.
11874// Current users just check for equality
39236c6e 11875#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 11876
2d21ac55 11877 if (look_for_pages) {
3e170ce0
A
11878 submap_info->user_tag = VME_ALIAS(curr_entry);
11879 submap_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11880 submap_info->protection = curr_entry->protection;
11881 submap_info->inheritance = curr_entry->inheritance;
11882 submap_info->max_protection = curr_entry->max_protection;
11883 submap_info->behavior = curr_entry->behavior;
11884 submap_info->user_wired_count = curr_entry->user_wired_count;
11885 submap_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11886 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11887 } else {
3e170ce0
A
11888 short_info->user_tag = VME_ALIAS(curr_entry);
11889 short_info->offset = VME_OFFSET(curr_entry);
2d21ac55
A
11890 short_info->protection = curr_entry->protection;
11891 short_info->inheritance = curr_entry->inheritance;
11892 short_info->max_protection = curr_entry->max_protection;
11893 short_info->behavior = curr_entry->behavior;
11894 short_info->user_wired_count = curr_entry->user_wired_count;
11895 short_info->is_submap = curr_entry->is_sub_map;
3e170ce0 11896 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
2d21ac55 11897 }
91447636
A
11898
11899 extended.pages_resident = 0;
11900 extended.pages_swapped_out = 0;
11901 extended.pages_shared_now_private = 0;
11902 extended.pages_dirtied = 0;
39236c6e 11903 extended.pages_reusable = 0;
91447636
A
11904 extended.external_pager = 0;
11905 extended.shadow_depth = 0;
3e170ce0
A
11906 extended.share_mode = SM_EMPTY;
11907 extended.ref_count = 0;
91447636
A
11908
11909 if (not_in_kdp) {
11910 if (!curr_entry->is_sub_map) {
6d2010ae
A
11911 vm_map_offset_t range_start, range_end;
11912 range_start = MAX((curr_address - curr_max_below),
11913 curr_entry->vme_start);
11914 range_end = MIN((curr_address + curr_max_above),
11915 curr_entry->vme_end);
91447636 11916 vm_map_region_walk(curr_map,
6d2010ae 11917 range_start,
91447636 11918 curr_entry,
3e170ce0 11919 (VME_OFFSET(curr_entry) +
6d2010ae
A
11920 (range_start -
11921 curr_entry->vme_start)),
11922 range_end - range_start,
2d21ac55 11923 &extended,
39236c6e 11924 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
11925 if (extended.external_pager &&
11926 extended.ref_count == 2 &&
11927 extended.share_mode == SM_SHARED) {
2d21ac55 11928 extended.share_mode = SM_PRIVATE;
91447636 11929 }
91447636
A
11930 } else {
11931 if (curr_entry->use_pmap) {
2d21ac55 11932 extended.share_mode = SM_TRUESHARED;
91447636 11933 } else {
2d21ac55 11934 extended.share_mode = SM_PRIVATE;
91447636 11935 }
3e170ce0 11936 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
91447636
A
11937 }
11938 }
11939
2d21ac55
A
11940 if (look_for_pages) {
11941 submap_info->pages_resident = extended.pages_resident;
11942 submap_info->pages_swapped_out = extended.pages_swapped_out;
11943 submap_info->pages_shared_now_private =
11944 extended.pages_shared_now_private;
11945 submap_info->pages_dirtied = extended.pages_dirtied;
11946 submap_info->external_pager = extended.external_pager;
11947 submap_info->shadow_depth = extended.shadow_depth;
11948 submap_info->share_mode = extended.share_mode;
11949 submap_info->ref_count = extended.ref_count;
39236c6e
A
11950
11951 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11952 submap_info->pages_reusable = extended.pages_reusable;
11953 }
2d21ac55
A
11954 } else {
11955 short_info->external_pager = extended.external_pager;
11956 short_info->shadow_depth = extended.shadow_depth;
11957 short_info->share_mode = extended.share_mode;
11958 short_info->ref_count = extended.ref_count;
11959 }
91447636
A
11960
11961 if (not_in_kdp) {
11962 vm_map_unlock_read(curr_map);
11963 }
11964
11965 return KERN_SUCCESS;
11966}
11967
1c79356b
A
11968/*
11969 * vm_region:
11970 *
11971 * User call to obtain information about a region in
11972 * a task's address map. Currently, only one flavor is
11973 * supported.
11974 *
11975 * XXX The reserved and behavior fields cannot be filled
11976 * in until the vm merge from the IK is completed, and
11977 * vm_reserve is implemented.
1c79356b
A
11978 */
11979
11980kern_return_t
91447636 11981vm_map_region(
1c79356b 11982 vm_map_t map,
91447636
A
11983 vm_map_offset_t *address, /* IN/OUT */
11984 vm_map_size_t *size, /* OUT */
1c79356b
A
11985 vm_region_flavor_t flavor, /* IN */
11986 vm_region_info_t info, /* OUT */
91447636
A
11987 mach_msg_type_number_t *count, /* IN/OUT */
11988 mach_port_t *object_name) /* OUT */
1c79356b
A
11989{
11990 vm_map_entry_t tmp_entry;
1c79356b 11991 vm_map_entry_t entry;
91447636 11992 vm_map_offset_t start;
1c79356b
A
11993
11994 if (map == VM_MAP_NULL)
11995 return(KERN_INVALID_ARGUMENT);
11996
11997 switch (flavor) {
91447636 11998
1c79356b 11999 case VM_REGION_BASIC_INFO:
2d21ac55 12000 /* legacy for old 32-bit objects info */
1c79356b 12001 {
2d21ac55 12002 vm_region_basic_info_t basic;
91447636 12003
2d21ac55
A
12004 if (*count < VM_REGION_BASIC_INFO_COUNT)
12005 return(KERN_INVALID_ARGUMENT);
1c79356b 12006
2d21ac55
A
12007 basic = (vm_region_basic_info_t) info;
12008 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 12009
2d21ac55 12010 vm_map_lock_read(map);
1c79356b 12011
2d21ac55
A
12012 start = *address;
12013 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12014 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12015 vm_map_unlock_read(map);
12016 return(KERN_INVALID_ADDRESS);
12017 }
12018 } else {
12019 entry = tmp_entry;
1c79356b 12020 }
1c79356b 12021
2d21ac55 12022 start = entry->vme_start;
1c79356b 12023
3e170ce0 12024 basic->offset = (uint32_t)VME_OFFSET(entry);
2d21ac55
A
12025 basic->protection = entry->protection;
12026 basic->inheritance = entry->inheritance;
12027 basic->max_protection = entry->max_protection;
12028 basic->behavior = entry->behavior;
12029 basic->user_wired_count = entry->user_wired_count;
12030 basic->reserved = entry->is_sub_map;
12031 *address = start;
12032 *size = (entry->vme_end - start);
91447636 12033
2d21ac55
A
12034 if (object_name) *object_name = IP_NULL;
12035 if (entry->is_sub_map) {
12036 basic->shared = FALSE;
12037 } else {
12038 basic->shared = entry->is_shared;
12039 }
91447636 12040
2d21ac55
A
12041 vm_map_unlock_read(map);
12042 return(KERN_SUCCESS);
91447636
A
12043 }
12044
12045 case VM_REGION_BASIC_INFO_64:
12046 {
2d21ac55 12047 vm_region_basic_info_64_t basic;
91447636 12048
2d21ac55
A
12049 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
12050 return(KERN_INVALID_ARGUMENT);
12051
12052 basic = (vm_region_basic_info_64_t) info;
12053 *count = VM_REGION_BASIC_INFO_COUNT_64;
12054
12055 vm_map_lock_read(map);
12056
12057 start = *address;
12058 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12059 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12060 vm_map_unlock_read(map);
12061 return(KERN_INVALID_ADDRESS);
12062 }
12063 } else {
12064 entry = tmp_entry;
12065 }
91447636 12066
2d21ac55 12067 start = entry->vme_start;
91447636 12068
3e170ce0 12069 basic->offset = VME_OFFSET(entry);
2d21ac55
A
12070 basic->protection = entry->protection;
12071 basic->inheritance = entry->inheritance;
12072 basic->max_protection = entry->max_protection;
12073 basic->behavior = entry->behavior;
12074 basic->user_wired_count = entry->user_wired_count;
12075 basic->reserved = entry->is_sub_map;
12076 *address = start;
12077 *size = (entry->vme_end - start);
91447636 12078
2d21ac55
A
12079 if (object_name) *object_name = IP_NULL;
12080 if (entry->is_sub_map) {
12081 basic->shared = FALSE;
12082 } else {
12083 basic->shared = entry->is_shared;
91447636 12084 }
2d21ac55
A
12085
12086 vm_map_unlock_read(map);
12087 return(KERN_SUCCESS);
1c79356b
A
12088 }
12089 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
12090 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
12091 return(KERN_INVALID_ARGUMENT);
39236c6e
A
12092 /*fallthru*/
12093 case VM_REGION_EXTENDED_INFO__legacy:
12094 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
12095 return KERN_INVALID_ARGUMENT;
12096
12097 {
12098 vm_region_extended_info_t extended;
12099 mach_msg_type_number_t original_count;
1c79356b 12100
2d21ac55 12101 extended = (vm_region_extended_info_t) info;
1c79356b 12102
2d21ac55 12103 vm_map_lock_read(map);
1c79356b 12104
2d21ac55
A
12105 start = *address;
12106 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12107 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12108 vm_map_unlock_read(map);
12109 return(KERN_INVALID_ADDRESS);
12110 }
12111 } else {
12112 entry = tmp_entry;
1c79356b 12113 }
2d21ac55 12114 start = entry->vme_start;
1c79356b 12115
2d21ac55 12116 extended->protection = entry->protection;
3e170ce0 12117 extended->user_tag = VME_ALIAS(entry);
2d21ac55
A
12118 extended->pages_resident = 0;
12119 extended->pages_swapped_out = 0;
12120 extended->pages_shared_now_private = 0;
12121 extended->pages_dirtied = 0;
12122 extended->external_pager = 0;
12123 extended->shadow_depth = 0;
1c79356b 12124
39236c6e
A
12125 original_count = *count;
12126 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
12127 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
12128 } else {
12129 extended->pages_reusable = 0;
12130 *count = VM_REGION_EXTENDED_INFO_COUNT;
12131 }
12132
3e170ce0 12133 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
1c79356b 12134
2d21ac55
A
12135 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
12136 extended->share_mode = SM_PRIVATE;
1c79356b 12137
2d21ac55
A
12138 if (object_name)
12139 *object_name = IP_NULL;
12140 *address = start;
12141 *size = (entry->vme_end - start);
1c79356b 12142
2d21ac55
A
12143 vm_map_unlock_read(map);
12144 return(KERN_SUCCESS);
1c79356b
A
12145 }
12146 case VM_REGION_TOP_INFO:
12147 {
2d21ac55 12148 vm_region_top_info_t top;
1c79356b 12149
2d21ac55
A
12150 if (*count < VM_REGION_TOP_INFO_COUNT)
12151 return(KERN_INVALID_ARGUMENT);
1c79356b 12152
2d21ac55
A
12153 top = (vm_region_top_info_t) info;
12154 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 12155
2d21ac55 12156 vm_map_lock_read(map);
1c79356b 12157
2d21ac55
A
12158 start = *address;
12159 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12160 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
12161 vm_map_unlock_read(map);
12162 return(KERN_INVALID_ADDRESS);
12163 }
12164 } else {
12165 entry = tmp_entry;
1c79356b 12166
2d21ac55
A
12167 }
12168 start = entry->vme_start;
1c79356b 12169
2d21ac55
A
12170 top->private_pages_resident = 0;
12171 top->shared_pages_resident = 0;
1c79356b 12172
2d21ac55 12173 vm_map_region_top_walk(entry, top);
1c79356b 12174
2d21ac55
A
12175 if (object_name)
12176 *object_name = IP_NULL;
12177 *address = start;
12178 *size = (entry->vme_end - start);
1c79356b 12179
2d21ac55
A
12180 vm_map_unlock_read(map);
12181 return(KERN_SUCCESS);
1c79356b
A
12182 }
12183 default:
2d21ac55 12184 return(KERN_INVALID_ARGUMENT);
1c79356b
A
12185 }
12186}
12187
b0d623f7
A
12188#define OBJ_RESIDENT_COUNT(obj, entry_size) \
12189 MIN((entry_size), \
12190 ((obj)->all_reusable ? \
12191 (obj)->wired_page_count : \
12192 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 12193
0c530ab8 12194void
91447636
A
12195vm_map_region_top_walk(
12196 vm_map_entry_t entry,
12197 vm_region_top_info_t top)
1c79356b 12198{
1c79356b 12199
3e170ce0 12200 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
2d21ac55
A
12201 top->share_mode = SM_EMPTY;
12202 top->ref_count = 0;
12203 top->obj_id = 0;
12204 return;
1c79356b 12205 }
2d21ac55 12206
91447636 12207 {
2d21ac55
A
12208 struct vm_object *obj, *tmp_obj;
12209 int ref_count;
12210 uint32_t entry_size;
1c79356b 12211
b0d623f7 12212 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 12213
3e170ce0 12214 obj = VME_OBJECT(entry);
1c79356b 12215
2d21ac55
A
12216 vm_object_lock(obj);
12217
12218 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12219 ref_count--;
12220
b0d623f7 12221 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
12222 if (obj->shadow) {
12223 if (ref_count == 1)
b0d623f7
A
12224 top->private_pages_resident =
12225 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 12226 else
b0d623f7
A
12227 top->shared_pages_resident =
12228 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12229 top->ref_count = ref_count;
12230 top->share_mode = SM_COW;
91447636 12231
2d21ac55
A
12232 while ((tmp_obj = obj->shadow)) {
12233 vm_object_lock(tmp_obj);
12234 vm_object_unlock(obj);
12235 obj = tmp_obj;
1c79356b 12236
2d21ac55
A
12237 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12238 ref_count--;
1c79356b 12239
b0d623f7
A
12240 assert(obj->reusable_page_count <= obj->resident_page_count);
12241 top->shared_pages_resident +=
12242 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12243 top->ref_count += ref_count - 1;
12244 }
1c79356b 12245 } else {
6d2010ae
A
12246 if (entry->superpage_size) {
12247 top->share_mode = SM_LARGE_PAGE;
12248 top->shared_pages_resident = 0;
12249 top->private_pages_resident = entry_size;
12250 } else if (entry->needs_copy) {
2d21ac55 12251 top->share_mode = SM_COW;
b0d623f7
A
12252 top->shared_pages_resident =
12253 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
12254 } else {
12255 if (ref_count == 1 ||
12256 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
12257 top->share_mode = SM_PRIVATE;
39236c6e
A
12258 top->private_pages_resident =
12259 OBJ_RESIDENT_COUNT(obj,
12260 entry_size);
2d21ac55
A
12261 } else {
12262 top->share_mode = SM_SHARED;
b0d623f7
A
12263 top->shared_pages_resident =
12264 OBJ_RESIDENT_COUNT(obj,
12265 entry_size);
2d21ac55
A
12266 }
12267 }
12268 top->ref_count = ref_count;
1c79356b 12269 }
b0d623f7 12270 /* XXX K64: obj_id will be truncated */
39236c6e 12271 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 12272
2d21ac55 12273 vm_object_unlock(obj);
1c79356b 12274 }
91447636
A
12275}
12276
0c530ab8 12277void
91447636
A
12278vm_map_region_walk(
12279 vm_map_t map,
2d21ac55
A
12280 vm_map_offset_t va,
12281 vm_map_entry_t entry,
91447636
A
12282 vm_object_offset_t offset,
12283 vm_object_size_t range,
2d21ac55 12284 vm_region_extended_info_t extended,
39236c6e
A
12285 boolean_t look_for_pages,
12286 mach_msg_type_number_t count)
91447636 12287{
39037602
A
12288 struct vm_object *obj, *tmp_obj;
12289 vm_map_offset_t last_offset;
12290 int i;
12291 int ref_count;
91447636
A
12292 struct vm_object *shadow_object;
12293 int shadow_depth;
12294
3e170ce0 12295 if ((VME_OBJECT(entry) == 0) ||
2d21ac55 12296 (entry->is_sub_map) ||
3e170ce0 12297 (VME_OBJECT(entry)->phys_contiguous &&
6d2010ae 12298 !entry->superpage_size)) {
2d21ac55
A
12299 extended->share_mode = SM_EMPTY;
12300 extended->ref_count = 0;
12301 return;
1c79356b 12302 }
6d2010ae
A
12303
12304 if (entry->superpage_size) {
12305 extended->shadow_depth = 0;
12306 extended->share_mode = SM_LARGE_PAGE;
12307 extended->ref_count = 1;
12308 extended->external_pager = 0;
12309 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
12310 extended->shadow_depth = 0;
12311 return;
12312 }
12313
39037602 12314 obj = VME_OBJECT(entry);
2d21ac55 12315
39037602 12316 vm_object_lock(obj);
2d21ac55 12317
39037602
A
12318 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12319 ref_count--;
2d21ac55 12320
39037602
A
12321 if (look_for_pages) {
12322 for (last_offset = offset + range;
12323 offset < last_offset;
12324 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
12325#if DEVELOPMENT || DEBUG
12326 if (vm_region_footprint) {
12327 if (obj->purgable != VM_PURGABLE_DENY) {
12328 /* alternate accounting */
12329 } else if (entry->iokit_acct) {
12330 /* alternate accounting */
12331 extended->pages_resident++;
12332 extended->pages_dirtied++;
12333 } else {
12334 int disp;
12335
12336 disp = 0;
12337 pmap_query_page_info(map->pmap, va, &disp);
12338 if (disp & PMAP_QUERY_PAGE_PRESENT) {
12339 extended->pages_resident++;
12340 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
12341 extended->pages_reusable++;
12342 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
12343 (disp & PMAP_QUERY_PAGE_ALTACCT)) {
12344 /* alternate accounting */
12345 } else {
12346 extended->pages_dirtied++;
12347 }
12348 } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
12349 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
12350 /* alternate accounting */
12351 } else {
12352 extended->pages_swapped_out++;
12353 }
b0d623f7 12354 }
2d21ac55 12355 }
39037602 12356 continue;
2d21ac55 12357 }
39037602
A
12358#endif /* DEVELOPMENT || DEBUG */
12359 vm_map_region_look_for_page(map, va, obj,
12360 offset, ref_count,
12361 0, extended, count);
2d21ac55 12362 }
39037602
A
12363#if DEVELOPMENT || DEBUG
12364 if (vm_region_footprint) {
12365 goto collect_object_info;
12366 }
12367#endif /* DEVELOPMENT || DEBUG */
12368 } else {
12369#if DEVELOPMENT || DEBUG
12370 collect_object_info:
12371#endif /* DEVELOPMENT || DEBUG */
12372 shadow_object = obj->shadow;
12373 shadow_depth = 0;
2d21ac55 12374
39037602
A
12375 if ( !(obj->pager_trusted) && !(obj->internal))
12376 extended->external_pager = 1;
12377
12378 if (shadow_object != VM_OBJECT_NULL) {
12379 vm_object_lock(shadow_object);
12380 for (;
12381 shadow_object != VM_OBJECT_NULL;
12382 shadow_depth++) {
12383 vm_object_t next_shadow;
12384
12385 if ( !(shadow_object->pager_trusted) &&
12386 !(shadow_object->internal))
12387 extended->external_pager = 1;
12388
12389 next_shadow = shadow_object->shadow;
12390 if (next_shadow) {
12391 vm_object_lock(next_shadow);
12392 }
12393 vm_object_unlock(shadow_object);
12394 shadow_object = next_shadow;
2d21ac55 12395 }
91447636 12396 }
39037602
A
12397 extended->shadow_depth = shadow_depth;
12398 }
1c79356b 12399
39037602
A
12400 if (extended->shadow_depth || entry->needs_copy)
12401 extended->share_mode = SM_COW;
12402 else {
12403 if (ref_count == 1)
12404 extended->share_mode = SM_PRIVATE;
12405 else {
12406 if (obj->true_share)
12407 extended->share_mode = SM_TRUESHARED;
12408 else
12409 extended->share_mode = SM_SHARED;
2d21ac55 12410 }
39037602
A
12411 }
12412 extended->ref_count = ref_count - extended->shadow_depth;
12413
12414 for (i = 0; i < extended->shadow_depth; i++) {
12415 if ((tmp_obj = obj->shadow) == 0)
12416 break;
12417 vm_object_lock(tmp_obj);
2d21ac55 12418 vm_object_unlock(obj);
1c79356b 12419
39037602
A
12420 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
12421 ref_count--;
12422
12423 extended->ref_count += ref_count;
12424 obj = tmp_obj;
12425 }
12426 vm_object_unlock(obj);
91447636 12427
39037602
A
12428 if (extended->share_mode == SM_SHARED) {
12429 vm_map_entry_t cur;
12430 vm_map_entry_t last;
12431 int my_refs;
91447636 12432
39037602
A
12433 obj = VME_OBJECT(entry);
12434 last = vm_map_to_entry(map);
12435 my_refs = 0;
91447636 12436
39037602
A
12437 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
12438 ref_count--;
12439 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
12440 my_refs += vm_map_region_count_obj_refs(cur, obj);
12441
12442 if (my_refs == ref_count)
12443 extended->share_mode = SM_PRIVATE_ALIASED;
12444 else if (my_refs > 1)
12445 extended->share_mode = SM_SHARED_ALIASED;
91447636 12446 }
1c79356b
A
12447}
12448
1c79356b 12449
91447636
A
12450/* object is locked on entry and locked on return */
12451
12452
12453static void
12454vm_map_region_look_for_page(
12455 __unused vm_map_t map,
2d21ac55
A
12456 __unused vm_map_offset_t va,
12457 vm_object_t object,
12458 vm_object_offset_t offset,
91447636
A
12459 int max_refcnt,
12460 int depth,
39236c6e
A
12461 vm_region_extended_info_t extended,
12462 mach_msg_type_number_t count)
1c79356b 12463{
39037602
A
12464 vm_page_t p;
12465 vm_object_t shadow;
12466 int ref_count;
12467 vm_object_t caller_object;
12468
91447636
A
12469 shadow = object->shadow;
12470 caller_object = object;
1c79356b 12471
91447636
A
12472
12473 while (TRUE) {
1c79356b 12474
91447636 12475 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 12476 extended->external_pager = 1;
1c79356b 12477
91447636
A
12478 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12479 if (shadow && (max_refcnt == 1))
12480 extended->pages_shared_now_private++;
1c79356b 12481
39236c6e 12482 if (!p->fictitious &&
39037602 12483 (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
91447636 12484 extended->pages_dirtied++;
39236c6e 12485 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
39037602 12486 if (p->reusable || object->all_reusable) {
39236c6e
A
12487 extended->pages_reusable++;
12488 }
12489 }
1c79356b 12490
39236c6e 12491 extended->pages_resident++;
91447636
A
12492
12493 if(object != caller_object)
2d21ac55 12494 vm_object_unlock(object);
91447636
A
12495
12496 return;
1c79356b 12497 }
39236c6e
A
12498 if (object->internal &&
12499 object->alive &&
12500 !object->terminating &&
12501 object->pager_ready) {
12502
39037602
A
12503 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
12504 == VM_EXTERNAL_STATE_EXISTS) {
12505 /* the pager has that page */
12506 extended->pages_swapped_out++;
12507 if (object != caller_object)
12508 vm_object_unlock(object);
12509 return;
2d21ac55 12510 }
1c79356b 12511 }
2d21ac55 12512
91447636 12513 if (shadow) {
2d21ac55 12514 vm_object_lock(shadow);
1c79356b 12515
91447636
A
12516 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12517 ref_count--;
1c79356b 12518
91447636
A
12519 if (++depth > extended->shadow_depth)
12520 extended->shadow_depth = depth;
1c79356b 12521
91447636
A
12522 if (ref_count > max_refcnt)
12523 max_refcnt = ref_count;
12524
12525 if(object != caller_object)
2d21ac55 12526 vm_object_unlock(object);
91447636 12527
6d2010ae 12528 offset = offset + object->vo_shadow_offset;
91447636
A
12529 object = shadow;
12530 shadow = object->shadow;
12531 continue;
1c79356b 12532 }
91447636 12533 if(object != caller_object)
2d21ac55 12534 vm_object_unlock(object);
91447636
A
12535 break;
12536 }
12537}
1c79356b 12538
91447636
A
12539static int
12540vm_map_region_count_obj_refs(
12541 vm_map_entry_t entry,
12542 vm_object_t object)
12543{
39037602
A
12544 int ref_count;
12545 vm_object_t chk_obj;
12546 vm_object_t tmp_obj;
1c79356b 12547
3e170ce0 12548 if (VME_OBJECT(entry) == 0)
2d21ac55 12549 return(0);
1c79356b 12550
91447636 12551 if (entry->is_sub_map)
2d21ac55 12552 return(0);
91447636 12553 else {
2d21ac55 12554 ref_count = 0;
1c79356b 12555
3e170ce0 12556 chk_obj = VME_OBJECT(entry);
2d21ac55 12557 vm_object_lock(chk_obj);
1c79356b 12558
2d21ac55
A
12559 while (chk_obj) {
12560 if (chk_obj == object)
12561 ref_count++;
12562 tmp_obj = chk_obj->shadow;
12563 if (tmp_obj)
12564 vm_object_lock(tmp_obj);
12565 vm_object_unlock(chk_obj);
1c79356b 12566
2d21ac55
A
12567 chk_obj = tmp_obj;
12568 }
1c79356b 12569 }
91447636 12570 return(ref_count);
1c79356b
A
12571}
12572
12573
12574/*
91447636
A
12575 * Routine: vm_map_simplify
12576 *
12577 * Description:
12578 * Attempt to simplify the map representation in
12579 * the vicinity of the given starting address.
12580 * Note:
12581 * This routine is intended primarily to keep the
12582 * kernel maps more compact -- they generally don't
12583 * benefit from the "expand a map entry" technology
12584 * at allocation time because the adjacent entry
12585 * is often wired down.
1c79356b 12586 */
91447636
A
12587void
12588vm_map_simplify_entry(
12589 vm_map_t map,
12590 vm_map_entry_t this_entry)
1c79356b 12591{
91447636 12592 vm_map_entry_t prev_entry;
1c79356b 12593
91447636 12594 counter(c_vm_map_simplify_entry_called++);
1c79356b 12595
91447636 12596 prev_entry = this_entry->vme_prev;
1c79356b 12597
91447636 12598 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 12599 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 12600
91447636 12601 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 12602
2d21ac55 12603 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
3e170ce0
A
12604 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12605 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
91447636 12606 prev_entry->vme_start))
3e170ce0 12607 == VME_OFFSET(this_entry)) &&
1c79356b 12608
fe8ab488
A
12609 (prev_entry->behavior == this_entry->behavior) &&
12610 (prev_entry->needs_copy == this_entry->needs_copy) &&
91447636
A
12611 (prev_entry->protection == this_entry->protection) &&
12612 (prev_entry->max_protection == this_entry->max_protection) &&
fe8ab488
A
12613 (prev_entry->inheritance == this_entry->inheritance) &&
12614 (prev_entry->use_pmap == this_entry->use_pmap) &&
3e170ce0 12615 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
2d21ac55 12616 (prev_entry->no_cache == this_entry->no_cache) &&
fe8ab488
A
12617 (prev_entry->permanent == this_entry->permanent) &&
12618 (prev_entry->map_aligned == this_entry->map_aligned) &&
12619 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12620 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12621 /* from_reserved_zone: OK if that field doesn't match */
12622 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
3e170ce0
A
12623 (prev_entry->vme_resilient_codesign ==
12624 this_entry->vme_resilient_codesign) &&
12625 (prev_entry->vme_resilient_media ==
12626 this_entry->vme_resilient_media) &&
fe8ab488 12627
91447636
A
12628 (prev_entry->wired_count == this_entry->wired_count) &&
12629 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 12630
39037602 12631 ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
91447636
A
12632 (prev_entry->in_transition == FALSE) &&
12633 (this_entry->in_transition == FALSE) &&
12634 (prev_entry->needs_wakeup == FALSE) &&
12635 (this_entry->needs_wakeup == FALSE) &&
12636 (prev_entry->is_shared == FALSE) &&
fe8ab488
A
12637 (this_entry->is_shared == FALSE) &&
12638 (prev_entry->superpage_size == FALSE) &&
12639 (this_entry->superpage_size == FALSE)
2d21ac55 12640 ) {
316670eb 12641 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 12642 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
12643 if (prev_entry->map_aligned)
12644 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12645 VM_MAP_PAGE_MASK(map)));
91447636 12646 this_entry->vme_start = prev_entry->vme_start;
3e170ce0
A
12647 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12648
12649 if (map->holelistenabled) {
12650 vm_map_store_update_first_free(map, this_entry, TRUE);
12651 }
12652
2d21ac55 12653 if (prev_entry->is_sub_map) {
3e170ce0 12654 vm_map_deallocate(VME_SUBMAP(prev_entry));
2d21ac55 12655 } else {
3e170ce0 12656 vm_object_deallocate(VME_OBJECT(prev_entry));
2d21ac55 12657 }
91447636 12658 vm_map_entry_dispose(map, prev_entry);
0c530ab8 12659 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 12660 counter(c_vm_map_simplified++);
1c79356b 12661 }
91447636 12662}
1c79356b 12663
91447636
A
12664void
12665vm_map_simplify(
12666 vm_map_t map,
12667 vm_map_offset_t start)
12668{
12669 vm_map_entry_t this_entry;
1c79356b 12670
91447636
A
12671 vm_map_lock(map);
12672 if (vm_map_lookup_entry(map, start, &this_entry)) {
12673 vm_map_simplify_entry(map, this_entry);
12674 vm_map_simplify_entry(map, this_entry->vme_next);
12675 }
12676 counter(c_vm_map_simplify_called++);
12677 vm_map_unlock(map);
12678}
1c79356b 12679
91447636
A
12680static void
12681vm_map_simplify_range(
12682 vm_map_t map,
12683 vm_map_offset_t start,
12684 vm_map_offset_t end)
12685{
12686 vm_map_entry_t entry;
1c79356b 12687
91447636
A
12688 /*
12689 * The map should be locked (for "write") by the caller.
12690 */
1c79356b 12691
91447636
A
12692 if (start >= end) {
12693 /* invalid address range */
12694 return;
12695 }
1c79356b 12696
39236c6e
A
12697 start = vm_map_trunc_page(start,
12698 VM_MAP_PAGE_MASK(map));
12699 end = vm_map_round_page(end,
12700 VM_MAP_PAGE_MASK(map));
2d21ac55 12701
91447636
A
12702 if (!vm_map_lookup_entry(map, start, &entry)) {
12703 /* "start" is not mapped and "entry" ends before "start" */
12704 if (entry == vm_map_to_entry(map)) {
12705 /* start with first entry in the map */
12706 entry = vm_map_first_entry(map);
12707 } else {
12708 /* start with next entry */
12709 entry = entry->vme_next;
12710 }
12711 }
12712
12713 while (entry != vm_map_to_entry(map) &&
12714 entry->vme_start <= end) {
12715 /* try and coalesce "entry" with its previous entry */
12716 vm_map_simplify_entry(map, entry);
12717 entry = entry->vme_next;
12718 }
12719}
1c79356b 12720
1c79356b 12721
91447636
A
12722/*
12723 * Routine: vm_map_machine_attribute
12724 * Purpose:
12725 * Provide machine-specific attributes to mappings,
12726 * such as cachability etc. for machines that provide
12727 * them. NUMA architectures and machines with big/strange
12728 * caches will use this.
12729 * Note:
12730 * Responsibilities for locking and checking are handled here,
12731 * everything else in the pmap module. If any non-volatile
12732 * information must be kept, the pmap module should handle
12733 * it itself. [This assumes that attributes do not
12734 * need to be inherited, which seems ok to me]
12735 */
12736kern_return_t
12737vm_map_machine_attribute(
12738 vm_map_t map,
12739 vm_map_offset_t start,
12740 vm_map_offset_t end,
12741 vm_machine_attribute_t attribute,
12742 vm_machine_attribute_val_t* value) /* IN/OUT */
12743{
12744 kern_return_t ret;
12745 vm_map_size_t sync_size;
12746 vm_map_entry_t entry;
12747
12748 if (start < vm_map_min(map) || end > vm_map_max(map))
12749 return KERN_INVALID_ADDRESS;
1c79356b 12750
91447636
A
12751 /* Figure how much memory we need to flush (in page increments) */
12752 sync_size = end - start;
1c79356b 12753
91447636
A
12754 vm_map_lock(map);
12755
12756 if (attribute != MATTR_CACHE) {
12757 /* If we don't have to find physical addresses, we */
12758 /* don't have to do an explicit traversal here. */
12759 ret = pmap_attribute(map->pmap, start, end-start,
12760 attribute, value);
12761 vm_map_unlock(map);
12762 return ret;
12763 }
1c79356b 12764
91447636 12765 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 12766
91447636
A
12767 while(sync_size) {
12768 if (vm_map_lookup_entry(map, start, &entry)) {
12769 vm_map_size_t sub_size;
12770 if((entry->vme_end - start) > sync_size) {
12771 sub_size = sync_size;
12772 sync_size = 0;
12773 } else {
12774 sub_size = entry->vme_end - start;
2d21ac55 12775 sync_size -= sub_size;
91447636
A
12776 }
12777 if(entry->is_sub_map) {
12778 vm_map_offset_t sub_start;
12779 vm_map_offset_t sub_end;
1c79356b 12780
91447636 12781 sub_start = (start - entry->vme_start)
3e170ce0 12782 + VME_OFFSET(entry);
91447636
A
12783 sub_end = sub_start + sub_size;
12784 vm_map_machine_attribute(
3e170ce0 12785 VME_SUBMAP(entry),
91447636
A
12786 sub_start,
12787 sub_end,
12788 attribute, value);
12789 } else {
3e170ce0 12790 if (VME_OBJECT(entry)) {
91447636
A
12791 vm_page_t m;
12792 vm_object_t object;
12793 vm_object_t base_object;
12794 vm_object_t last_object;
12795 vm_object_offset_t offset;
12796 vm_object_offset_t base_offset;
12797 vm_map_size_t range;
12798 range = sub_size;
12799 offset = (start - entry->vme_start)
3e170ce0 12800 + VME_OFFSET(entry);
91447636 12801 base_offset = offset;
3e170ce0 12802 object = VME_OBJECT(entry);
91447636
A
12803 base_object = object;
12804 last_object = NULL;
1c79356b 12805
91447636 12806 vm_object_lock(object);
1c79356b 12807
91447636
A
12808 while (range) {
12809 m = vm_page_lookup(
12810 object, offset);
1c79356b 12811
91447636
A
12812 if (m && !m->fictitious) {
12813 ret =
2d21ac55 12814 pmap_attribute_cache_sync(
39037602 12815 VM_PAGE_GET_PHYS_PAGE(m),
2d21ac55
A
12816 PAGE_SIZE,
12817 attribute, value);
91447636
A
12818
12819 } else if (object->shadow) {
6d2010ae 12820 offset = offset + object->vo_shadow_offset;
91447636
A
12821 last_object = object;
12822 object = object->shadow;
12823 vm_object_lock(last_object->shadow);
12824 vm_object_unlock(last_object);
12825 continue;
12826 }
12827 range -= PAGE_SIZE;
1c79356b 12828
91447636
A
12829 if (base_object != object) {
12830 vm_object_unlock(object);
12831 vm_object_lock(base_object);
12832 object = base_object;
12833 }
12834 /* Bump to the next page */
12835 base_offset += PAGE_SIZE;
12836 offset = base_offset;
12837 }
12838 vm_object_unlock(object);
12839 }
12840 }
12841 start += sub_size;
12842 } else {
12843 vm_map_unlock(map);
12844 return KERN_FAILURE;
12845 }
12846
1c79356b 12847 }
e5568f75 12848
91447636 12849 vm_map_unlock(map);
e5568f75 12850
91447636
A
12851 return ret;
12852}
e5568f75 12853
91447636
A
12854/*
12855 * vm_map_behavior_set:
12856 *
12857 * Sets the paging reference behavior of the specified address
12858 * range in the target map. Paging reference behavior affects
12859 * how pagein operations resulting from faults on the map will be
12860 * clustered.
12861 */
12862kern_return_t
12863vm_map_behavior_set(
12864 vm_map_t map,
12865 vm_map_offset_t start,
12866 vm_map_offset_t end,
12867 vm_behavior_t new_behavior)
12868{
39037602 12869 vm_map_entry_t entry;
91447636 12870 vm_map_entry_t temp_entry;
e5568f75 12871
91447636 12872 XPR(XPR_VM_MAP,
2d21ac55 12873 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 12874 map, start, end, new_behavior, 0);
e5568f75 12875
6d2010ae
A
12876 if (start > end ||
12877 start < vm_map_min(map) ||
12878 end > vm_map_max(map)) {
12879 return KERN_NO_SPACE;
12880 }
12881
91447636 12882 switch (new_behavior) {
b0d623f7
A
12883
12884 /*
12885 * This first block of behaviors all set a persistent state on the specified
12886 * memory range. All we have to do here is to record the desired behavior
12887 * in the vm_map_entry_t's.
12888 */
12889
91447636
A
12890 case VM_BEHAVIOR_DEFAULT:
12891 case VM_BEHAVIOR_RANDOM:
12892 case VM_BEHAVIOR_SEQUENTIAL:
12893 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
12894 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12895 vm_map_lock(map);
12896
12897 /*
12898 * The entire address range must be valid for the map.
12899 * Note that vm_map_range_check() does a
12900 * vm_map_lookup_entry() internally and returns the
12901 * entry containing the start of the address range if
12902 * the entire range is valid.
12903 */
12904 if (vm_map_range_check(map, start, end, &temp_entry)) {
12905 entry = temp_entry;
12906 vm_map_clip_start(map, entry, start);
12907 }
12908 else {
12909 vm_map_unlock(map);
12910 return(KERN_INVALID_ADDRESS);
12911 }
12912
12913 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12914 vm_map_clip_end(map, entry, end);
fe8ab488
A
12915 if (entry->is_sub_map) {
12916 assert(!entry->use_pmap);
12917 }
b0d623f7
A
12918
12919 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12920 entry->zero_wired_pages = TRUE;
12921 } else {
12922 entry->behavior = new_behavior;
12923 }
12924 entry = entry->vme_next;
12925 }
12926
12927 vm_map_unlock(map);
91447636 12928 break;
b0d623f7
A
12929
12930 /*
12931 * The rest of these are different from the above in that they cause
12932 * an immediate action to take place as opposed to setting a behavior that
12933 * affects future actions.
12934 */
12935
91447636 12936 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
12937 return vm_map_willneed(map, start, end);
12938
91447636 12939 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
12940 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12941
12942 case VM_BEHAVIOR_FREE:
12943 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12944
12945 case VM_BEHAVIOR_REUSABLE:
12946 return vm_map_reusable_pages(map, start, end);
12947
12948 case VM_BEHAVIOR_REUSE:
12949 return vm_map_reuse_pages(map, start, end);
12950
12951 case VM_BEHAVIOR_CAN_REUSE:
12952 return vm_map_can_reuse(map, start, end);
12953
3e170ce0
A
12954#if MACH_ASSERT
12955 case VM_BEHAVIOR_PAGEOUT:
12956 return vm_map_pageout(map, start, end);
12957#endif /* MACH_ASSERT */
12958
1c79356b 12959 default:
91447636 12960 return(KERN_INVALID_ARGUMENT);
1c79356b 12961 }
1c79356b 12962
b0d623f7
A
12963 return(KERN_SUCCESS);
12964}
12965
12966
12967/*
12968 * Internals for madvise(MADV_WILLNEED) system call.
12969 *
12970 * The present implementation is to do a read-ahead if the mapping corresponds
12971 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12972 * and basically ignore the "advice" (which we are always free to do).
12973 */
12974
12975
12976static kern_return_t
12977vm_map_willneed(
12978 vm_map_t map,
12979 vm_map_offset_t start,
12980 vm_map_offset_t end
12981)
12982{
12983 vm_map_entry_t entry;
12984 vm_object_t object;
12985 memory_object_t pager;
12986 struct vm_object_fault_info fault_info;
12987 kern_return_t kr;
12988 vm_object_size_t len;
12989 vm_object_offset_t offset;
1c79356b 12990
91447636 12991 /*
b0d623f7
A
12992 * Fill in static values in fault_info. Several fields get ignored by the code
12993 * we call, but we'll fill them in anyway since uninitialized fields are bad
12994 * when it comes to future backwards compatibility.
91447636 12995 */
b0d623f7
A
12996
12997 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12998 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12999 fault_info.no_cache = FALSE; /* ignored value */
13000 fault_info.stealth = TRUE;
6d2010ae
A
13001 fault_info.io_sync = FALSE;
13002 fault_info.cs_bypass = FALSE;
0b4c1975 13003 fault_info.mark_zf_absent = FALSE;
316670eb 13004 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
13005
13006 /*
13007 * The MADV_WILLNEED operation doesn't require any changes to the
13008 * vm_map_entry_t's, so the read lock is sufficient.
13009 */
13010
13011 vm_map_lock_read(map);
13012
13013 /*
13014 * The madvise semantics require that the address range be fully
13015 * allocated with no holes. Otherwise, we're required to return
13016 * an error.
13017 */
13018
6d2010ae
A
13019 if (! vm_map_range_check(map, start, end, &entry)) {
13020 vm_map_unlock_read(map);
13021 return KERN_INVALID_ADDRESS;
13022 }
b0d623f7 13023
6d2010ae
A
13024 /*
13025 * Examine each vm_map_entry_t in the range.
13026 */
13027 for (; entry != vm_map_to_entry(map) && start < end; ) {
13028
b0d623f7 13029 /*
6d2010ae
A
13030 * The first time through, the start address could be anywhere
13031 * within the vm_map_entry we found. So adjust the offset to
13032 * correspond. After that, the offset will always be zero to
13033 * correspond to the beginning of the current vm_map_entry.
b0d623f7 13034 */
3e170ce0 13035 offset = (start - entry->vme_start) + VME_OFFSET(entry);
b0d623f7 13036
6d2010ae
A
13037 /*
13038 * Set the length so we don't go beyond the end of the
13039 * map_entry or beyond the end of the range we were given.
13040 * This range could span also multiple map entries all of which
13041 * map different files, so make sure we only do the right amount
13042 * of I/O for each object. Note that it's possible for there
13043 * to be multiple map entries all referring to the same object
13044 * but with different page permissions, but it's not worth
13045 * trying to optimize that case.
13046 */
13047 len = MIN(entry->vme_end - start, end - start);
b0d623f7 13048
6d2010ae
A
13049 if ((vm_size_t) len != len) {
13050 /* 32-bit overflow */
13051 len = (vm_size_t) (0 - PAGE_SIZE);
13052 }
13053 fault_info.cluster_size = (vm_size_t) len;
13054 fault_info.lo_offset = offset;
13055 fault_info.hi_offset = offset + len;
3e170ce0 13056 fault_info.user_tag = VME_ALIAS(entry);
fe8ab488
A
13057 fault_info.pmap_options = 0;
13058 if (entry->iokit_acct ||
13059 (!entry->is_sub_map && !entry->use_pmap)) {
13060 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13061 }
b0d623f7 13062
6d2010ae
A
13063 /*
13064 * If there's no read permission to this mapping, then just
13065 * skip it.
13066 */
13067 if ((entry->protection & VM_PROT_READ) == 0) {
13068 entry = entry->vme_next;
13069 start = entry->vme_start;
13070 continue;
13071 }
b0d623f7 13072
6d2010ae
A
13073 /*
13074 * Find the file object backing this map entry. If there is
13075 * none, then we simply ignore the "will need" advice for this
13076 * entry and go on to the next one.
13077 */
13078 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
13079 entry = entry->vme_next;
13080 start = entry->vme_start;
13081 continue;
13082 }
b0d623f7 13083
6d2010ae
A
13084 /*
13085 * The data_request() could take a long time, so let's
13086 * release the map lock to avoid blocking other threads.
13087 */
13088 vm_map_unlock_read(map);
b0d623f7 13089
6d2010ae
A
13090 vm_object_paging_begin(object);
13091 pager = object->pager;
13092 vm_object_unlock(object);
b0d623f7 13093
6d2010ae
A
13094 /*
13095 * Get the data from the object asynchronously.
13096 *
13097 * Note that memory_object_data_request() places limits on the
13098 * amount of I/O it will do. Regardless of the len we
fe8ab488 13099 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
6d2010ae
A
13100 * silently truncates the len to that size. This isn't
13101 * necessarily bad since madvise shouldn't really be used to
13102 * page in unlimited amounts of data. Other Unix variants
13103 * limit the willneed case as well. If this turns out to be an
13104 * issue for developers, then we can always adjust the policy
13105 * here and still be backwards compatible since this is all
13106 * just "advice".
13107 */
13108 kr = memory_object_data_request(
13109 pager,
13110 offset + object->paging_offset,
13111 0, /* ignored */
13112 VM_PROT_READ,
13113 (memory_object_fault_info_t)&fault_info);
b0d623f7 13114
6d2010ae
A
13115 vm_object_lock(object);
13116 vm_object_paging_end(object);
13117 vm_object_unlock(object);
b0d623f7 13118
6d2010ae
A
13119 /*
13120 * If we couldn't do the I/O for some reason, just give up on
13121 * the madvise. We still return success to the user since
13122 * madvise isn't supposed to fail when the advice can't be
13123 * taken.
13124 */
13125 if (kr != KERN_SUCCESS) {
13126 return KERN_SUCCESS;
13127 }
b0d623f7 13128
6d2010ae
A
13129 start += len;
13130 if (start >= end) {
13131 /* done */
13132 return KERN_SUCCESS;
13133 }
b0d623f7 13134
6d2010ae
A
13135 /* look up next entry */
13136 vm_map_lock_read(map);
13137 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 13138 /*
6d2010ae 13139 * There's a new hole in the address range.
b0d623f7 13140 */
6d2010ae
A
13141 vm_map_unlock_read(map);
13142 return KERN_INVALID_ADDRESS;
b0d623f7 13143 }
6d2010ae 13144 }
b0d623f7
A
13145
13146 vm_map_unlock_read(map);
6d2010ae 13147 return KERN_SUCCESS;
b0d623f7
A
13148}
13149
13150static boolean_t
13151vm_map_entry_is_reusable(
13152 vm_map_entry_t entry)
13153{
3e170ce0
A
13154 /* Only user map entries */
13155
b0d623f7
A
13156 vm_object_t object;
13157
2dced7af
A
13158 if (entry->is_sub_map) {
13159 return FALSE;
13160 }
13161
3e170ce0 13162 switch (VME_ALIAS(entry)) {
39236c6e
A
13163 case VM_MEMORY_MALLOC:
13164 case VM_MEMORY_MALLOC_SMALL:
13165 case VM_MEMORY_MALLOC_LARGE:
13166 case VM_MEMORY_REALLOC:
13167 case VM_MEMORY_MALLOC_TINY:
13168 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
13169 case VM_MEMORY_MALLOC_LARGE_REUSED:
13170 /*
13171 * This is a malloc() memory region: check if it's still
13172 * in its original state and can be re-used for more
13173 * malloc() allocations.
13174 */
13175 break;
13176 default:
13177 /*
13178 * Not a malloc() memory region: let the caller decide if
13179 * it's re-usable.
13180 */
13181 return TRUE;
13182 }
13183
b0d623f7
A
13184 if (entry->is_shared ||
13185 entry->is_sub_map ||
13186 entry->in_transition ||
13187 entry->protection != VM_PROT_DEFAULT ||
13188 entry->max_protection != VM_PROT_ALL ||
13189 entry->inheritance != VM_INHERIT_DEFAULT ||
13190 entry->no_cache ||
13191 entry->permanent ||
39236c6e 13192 entry->superpage_size != FALSE ||
b0d623f7
A
13193 entry->zero_wired_pages ||
13194 entry->wired_count != 0 ||
13195 entry->user_wired_count != 0) {
13196 return FALSE;
91447636 13197 }
b0d623f7 13198
3e170ce0 13199 object = VME_OBJECT(entry);
b0d623f7
A
13200 if (object == VM_OBJECT_NULL) {
13201 return TRUE;
13202 }
316670eb
A
13203 if (
13204#if 0
13205 /*
13206 * Let's proceed even if the VM object is potentially
13207 * shared.
13208 * We check for this later when processing the actual
13209 * VM pages, so the contents will be safe if shared.
13210 *
13211 * But we can still mark this memory region as "reusable" to
13212 * acknowledge that the caller did let us know that the memory
13213 * could be re-used and should not be penalized for holding
13214 * on to it. This allows its "resident size" to not include
13215 * the reusable range.
13216 */
13217 object->ref_count == 1 &&
13218#endif
b0d623f7
A
13219 object->wired_page_count == 0 &&
13220 object->copy == VM_OBJECT_NULL &&
13221 object->shadow == VM_OBJECT_NULL &&
13222 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
13223 object->internal &&
13224 !object->true_share &&
6d2010ae 13225 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
13226 !object->code_signed) {
13227 return TRUE;
1c79356b 13228 }
b0d623f7
A
13229 return FALSE;
13230
13231
13232}
1c79356b 13233
b0d623f7
A
13234static kern_return_t
13235vm_map_reuse_pages(
13236 vm_map_t map,
13237 vm_map_offset_t start,
13238 vm_map_offset_t end)
13239{
13240 vm_map_entry_t entry;
13241 vm_object_t object;
13242 vm_object_offset_t start_offset, end_offset;
13243
13244 /*
13245 * The MADV_REUSE operation doesn't require any changes to the
13246 * vm_map_entry_t's, so the read lock is sufficient.
13247 */
0b4e3aa0 13248
b0d623f7 13249 vm_map_lock_read(map);
3e170ce0 13250 assert(map->pmap != kernel_pmap); /* protect alias access */
1c79356b 13251
b0d623f7
A
13252 /*
13253 * The madvise semantics require that the address range be fully
13254 * allocated with no holes. Otherwise, we're required to return
13255 * an error.
13256 */
13257
13258 if (!vm_map_range_check(map, start, end, &entry)) {
13259 vm_map_unlock_read(map);
13260 vm_page_stats_reusable.reuse_pages_failure++;
13261 return KERN_INVALID_ADDRESS;
1c79356b 13262 }
91447636 13263
b0d623f7
A
13264 /*
13265 * Examine each vm_map_entry_t in the range.
13266 */
13267 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13268 entry = entry->vme_next) {
13269 /*
13270 * Sanity check on the VM map entry.
13271 */
13272 if (! vm_map_entry_is_reusable(entry)) {
13273 vm_map_unlock_read(map);
13274 vm_page_stats_reusable.reuse_pages_failure++;
13275 return KERN_INVALID_ADDRESS;
13276 }
13277
13278 /*
13279 * The first time through, the start address could be anywhere
13280 * within the vm_map_entry we found. So adjust the offset to
13281 * correspond.
13282 */
13283 if (entry->vme_start < start) {
13284 start_offset = start - entry->vme_start;
13285 } else {
13286 start_offset = 0;
13287 }
13288 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13289 start_offset += VME_OFFSET(entry);
13290 end_offset += VME_OFFSET(entry);
b0d623f7 13291
2dced7af 13292 assert(!entry->is_sub_map);
3e170ce0 13293 object = VME_OBJECT(entry);
b0d623f7
A
13294 if (object != VM_OBJECT_NULL) {
13295 vm_object_lock(object);
13296 vm_object_reuse_pages(object, start_offset, end_offset,
13297 TRUE);
13298 vm_object_unlock(object);
13299 }
13300
3e170ce0 13301 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
b0d623f7
A
13302 /*
13303 * XXX
13304 * We do not hold the VM map exclusively here.
13305 * The "alias" field is not that critical, so it's
13306 * safe to update it here, as long as it is the only
13307 * one that can be modified while holding the VM map
13308 * "shared".
13309 */
3e170ce0 13310 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
b0d623f7
A
13311 }
13312 }
13313
13314 vm_map_unlock_read(map);
13315 vm_page_stats_reusable.reuse_pages_success++;
13316 return KERN_SUCCESS;
1c79356b
A
13317}
13318
1c79356b 13319
b0d623f7
A
13320static kern_return_t
13321vm_map_reusable_pages(
13322 vm_map_t map,
13323 vm_map_offset_t start,
13324 vm_map_offset_t end)
13325{
13326 vm_map_entry_t entry;
13327 vm_object_t object;
13328 vm_object_offset_t start_offset, end_offset;
3e170ce0 13329 vm_map_offset_t pmap_offset;
b0d623f7
A
13330
13331 /*
13332 * The MADV_REUSABLE operation doesn't require any changes to the
13333 * vm_map_entry_t's, so the read lock is sufficient.
13334 */
13335
13336 vm_map_lock_read(map);
3e170ce0 13337 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13338
13339 /*
13340 * The madvise semantics require that the address range be fully
13341 * allocated with no holes. Otherwise, we're required to return
13342 * an error.
13343 */
13344
13345 if (!vm_map_range_check(map, start, end, &entry)) {
13346 vm_map_unlock_read(map);
13347 vm_page_stats_reusable.reusable_pages_failure++;
13348 return KERN_INVALID_ADDRESS;
13349 }
13350
13351 /*
13352 * Examine each vm_map_entry_t in the range.
13353 */
13354 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13355 entry = entry->vme_next) {
13356 int kill_pages = 0;
13357
13358 /*
13359 * Sanity check on the VM map entry.
13360 */
13361 if (! vm_map_entry_is_reusable(entry)) {
13362 vm_map_unlock_read(map);
13363 vm_page_stats_reusable.reusable_pages_failure++;
13364 return KERN_INVALID_ADDRESS;
13365 }
13366
39037602
A
13367 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
13368 /* not writable: can't discard contents */
13369 vm_map_unlock_read(map);
13370 vm_page_stats_reusable.reusable_nonwritable++;
13371 vm_page_stats_reusable.reusable_pages_failure++;
13372 return KERN_PROTECTION_FAILURE;
13373 }
13374
b0d623f7
A
13375 /*
13376 * The first time through, the start address could be anywhere
13377 * within the vm_map_entry we found. So adjust the offset to
13378 * correspond.
13379 */
13380 if (entry->vme_start < start) {
13381 start_offset = start - entry->vme_start;
3e170ce0 13382 pmap_offset = start;
b0d623f7
A
13383 } else {
13384 start_offset = 0;
3e170ce0 13385 pmap_offset = entry->vme_start;
b0d623f7
A
13386 }
13387 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
3e170ce0
A
13388 start_offset += VME_OFFSET(entry);
13389 end_offset += VME_OFFSET(entry);
b0d623f7 13390
2dced7af 13391 assert(!entry->is_sub_map);
3e170ce0 13392 object = VME_OBJECT(entry);
b0d623f7
A
13393 if (object == VM_OBJECT_NULL)
13394 continue;
13395
13396
13397 vm_object_lock(object);
39037602
A
13398 if (((object->ref_count == 1) ||
13399 (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
13400 object->copy == VM_OBJECT_NULL)) &&
13401 object->shadow == VM_OBJECT_NULL &&
fe8ab488
A
13402 /*
13403 * "iokit_acct" entries are billed for their virtual size
13404 * (rather than for their resident pages only), so they
13405 * wouldn't benefit from making pages reusable, and it
13406 * would be hard to keep track of pages that are both
39037602
A
13407 * "iokit_acct" and "reusable" in the pmap stats and
13408 * ledgers.
fe8ab488
A
13409 */
13410 !(entry->iokit_acct ||
39037602
A
13411 (!entry->is_sub_map && !entry->use_pmap))) {
13412 if (object->ref_count != 1) {
13413 vm_page_stats_reusable.reusable_shared++;
13414 }
b0d623f7 13415 kill_pages = 1;
39037602 13416 } else {
b0d623f7 13417 kill_pages = -1;
39037602 13418 }
b0d623f7
A
13419 if (kill_pages != -1) {
13420 vm_object_deactivate_pages(object,
13421 start_offset,
13422 end_offset - start_offset,
13423 kill_pages,
3e170ce0
A
13424 TRUE /*reusable_pages*/,
13425 map->pmap,
13426 pmap_offset);
b0d623f7
A
13427 } else {
13428 vm_page_stats_reusable.reusable_pages_shared++;
13429 }
13430 vm_object_unlock(object);
13431
3e170ce0
A
13432 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
13433 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
b0d623f7
A
13434 /*
13435 * XXX
13436 * We do not hold the VM map exclusively here.
13437 * The "alias" field is not that critical, so it's
13438 * safe to update it here, as long as it is the only
13439 * one that can be modified while holding the VM map
13440 * "shared".
13441 */
3e170ce0 13442 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
b0d623f7
A
13443 }
13444 }
13445
13446 vm_map_unlock_read(map);
13447 vm_page_stats_reusable.reusable_pages_success++;
13448 return KERN_SUCCESS;
13449}
13450
13451
13452static kern_return_t
13453vm_map_can_reuse(
13454 vm_map_t map,
13455 vm_map_offset_t start,
13456 vm_map_offset_t end)
13457{
13458 vm_map_entry_t entry;
13459
13460 /*
13461 * The MADV_REUSABLE operation doesn't require any changes to the
13462 * vm_map_entry_t's, so the read lock is sufficient.
13463 */
13464
13465 vm_map_lock_read(map);
3e170ce0 13466 assert(map->pmap != kernel_pmap); /* protect alias access */
b0d623f7
A
13467
13468 /*
13469 * The madvise semantics require that the address range be fully
13470 * allocated with no holes. Otherwise, we're required to return
13471 * an error.
13472 */
13473
13474 if (!vm_map_range_check(map, start, end, &entry)) {
13475 vm_map_unlock_read(map);
13476 vm_page_stats_reusable.can_reuse_failure++;
13477 return KERN_INVALID_ADDRESS;
13478 }
13479
13480 /*
13481 * Examine each vm_map_entry_t in the range.
13482 */
13483 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13484 entry = entry->vme_next) {
13485 /*
13486 * Sanity check on the VM map entry.
13487 */
13488 if (! vm_map_entry_is_reusable(entry)) {
13489 vm_map_unlock_read(map);
13490 vm_page_stats_reusable.can_reuse_failure++;
13491 return KERN_INVALID_ADDRESS;
13492 }
13493 }
13494
13495 vm_map_unlock_read(map);
13496 vm_page_stats_reusable.can_reuse_success++;
13497 return KERN_SUCCESS;
13498}
13499
13500
3e170ce0
A
13501#if MACH_ASSERT
13502static kern_return_t
13503vm_map_pageout(
13504 vm_map_t map,
13505 vm_map_offset_t start,
13506 vm_map_offset_t end)
13507{
13508 vm_map_entry_t entry;
13509
13510 /*
13511 * The MADV_PAGEOUT operation doesn't require any changes to the
13512 * vm_map_entry_t's, so the read lock is sufficient.
13513 */
13514
13515 vm_map_lock_read(map);
13516
13517 /*
13518 * The madvise semantics require that the address range be fully
13519 * allocated with no holes. Otherwise, we're required to return
13520 * an error.
13521 */
13522
13523 if (!vm_map_range_check(map, start, end, &entry)) {
13524 vm_map_unlock_read(map);
13525 return KERN_INVALID_ADDRESS;
13526 }
13527
13528 /*
13529 * Examine each vm_map_entry_t in the range.
13530 */
13531 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13532 entry = entry->vme_next) {
13533 vm_object_t object;
13534
13535 /*
13536 * Sanity check on the VM map entry.
13537 */
13538 if (entry->is_sub_map) {
13539 vm_map_t submap;
13540 vm_map_offset_t submap_start;
13541 vm_map_offset_t submap_end;
13542 vm_map_entry_t submap_entry;
13543
13544 submap = VME_SUBMAP(entry);
13545 submap_start = VME_OFFSET(entry);
13546 submap_end = submap_start + (entry->vme_end -
13547 entry->vme_start);
13548
13549 vm_map_lock_read(submap);
13550
13551 if (! vm_map_range_check(submap,
13552 submap_start,
13553 submap_end,
13554 &submap_entry)) {
13555 vm_map_unlock_read(submap);
13556 vm_map_unlock_read(map);
13557 return KERN_INVALID_ADDRESS;
13558 }
13559
13560 object = VME_OBJECT(submap_entry);
13561 if (submap_entry->is_sub_map ||
13562 object == VM_OBJECT_NULL ||
13563 !object->internal) {
13564 vm_map_unlock_read(submap);
13565 continue;
13566 }
13567
13568 vm_object_pageout(object);
13569
13570 vm_map_unlock_read(submap);
13571 submap = VM_MAP_NULL;
13572 submap_entry = VM_MAP_ENTRY_NULL;
13573 continue;
13574 }
13575
13576 object = VME_OBJECT(entry);
13577 if (entry->is_sub_map ||
13578 object == VM_OBJECT_NULL ||
13579 !object->internal) {
13580 continue;
13581 }
13582
13583 vm_object_pageout(object);
13584 }
13585
13586 vm_map_unlock_read(map);
13587 return KERN_SUCCESS;
13588}
13589#endif /* MACH_ASSERT */
13590
13591
1c79356b 13592/*
91447636
A
13593 * Routine: vm_map_entry_insert
13594 *
13595 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 13596 */
91447636
A
13597vm_map_entry_t
13598vm_map_entry_insert(
13599 vm_map_t map,
13600 vm_map_entry_t insp_entry,
13601 vm_map_offset_t start,
13602 vm_map_offset_t end,
13603 vm_object_t object,
13604 vm_object_offset_t offset,
13605 boolean_t needs_copy,
13606 boolean_t is_shared,
13607 boolean_t in_transition,
13608 vm_prot_t cur_protection,
13609 vm_prot_t max_protection,
13610 vm_behavior_t behavior,
13611 vm_inherit_t inheritance,
2d21ac55 13612 unsigned wired_count,
b0d623f7
A
13613 boolean_t no_cache,
13614 boolean_t permanent,
39236c6e 13615 unsigned int superpage_size,
fe8ab488
A
13616 boolean_t clear_map_aligned,
13617 boolean_t is_submap)
1c79356b 13618{
91447636 13619 vm_map_entry_t new_entry;
1c79356b 13620
91447636 13621 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 13622
7ddcb079 13623 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 13624
39236c6e
A
13625 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13626 new_entry->map_aligned = TRUE;
13627 } else {
13628 new_entry->map_aligned = FALSE;
13629 }
13630 if (clear_map_aligned &&
fe8ab488
A
13631 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13632 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
39236c6e
A
13633 new_entry->map_aligned = FALSE;
13634 }
13635
91447636
A
13636 new_entry->vme_start = start;
13637 new_entry->vme_end = end;
13638 assert(page_aligned(new_entry->vme_start));
13639 assert(page_aligned(new_entry->vme_end));
39236c6e 13640 if (new_entry->map_aligned) {
fe8ab488
A
13641 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13642 VM_MAP_PAGE_MASK(map)));
39236c6e
A
13643 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13644 VM_MAP_PAGE_MASK(map)));
13645 }
e2d2fc5c 13646 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 13647
3e170ce0
A
13648 VME_OBJECT_SET(new_entry, object);
13649 VME_OFFSET_SET(new_entry, offset);
91447636 13650 new_entry->is_shared = is_shared;
fe8ab488 13651 new_entry->is_sub_map = is_submap;
91447636
A
13652 new_entry->needs_copy = needs_copy;
13653 new_entry->in_transition = in_transition;
13654 new_entry->needs_wakeup = FALSE;
13655 new_entry->inheritance = inheritance;
13656 new_entry->protection = cur_protection;
13657 new_entry->max_protection = max_protection;
13658 new_entry->behavior = behavior;
13659 new_entry->wired_count = wired_count;
13660 new_entry->user_wired_count = 0;
fe8ab488
A
13661 if (is_submap) {
13662 /*
13663 * submap: "use_pmap" means "nested".
13664 * default: false.
13665 */
13666 new_entry->use_pmap = FALSE;
13667 } else {
13668 /*
13669 * object: "use_pmap" means "use pmap accounting" for footprint.
13670 * default: true.
13671 */
13672 new_entry->use_pmap = TRUE;
13673 }
3e170ce0 13674 VME_ALIAS_SET(new_entry, 0);
b0d623f7 13675 new_entry->zero_wired_pages = FALSE;
2d21ac55 13676 new_entry->no_cache = no_cache;
b0d623f7 13677 new_entry->permanent = permanent;
39236c6e
A
13678 if (superpage_size)
13679 new_entry->superpage_size = TRUE;
13680 else
13681 new_entry->superpage_size = FALSE;
6d2010ae 13682 new_entry->used_for_jit = FALSE;
fe8ab488 13683 new_entry->iokit_acct = FALSE;
3e170ce0
A
13684 new_entry->vme_resilient_codesign = FALSE;
13685 new_entry->vme_resilient_media = FALSE;
39037602 13686 new_entry->vme_atomic = FALSE;
1c79356b 13687
91447636
A
13688 /*
13689 * Insert the new entry into the list.
13690 */
1c79356b 13691
6d2010ae 13692 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
13693 map->size += end - start;
13694
13695 /*
13696 * Update the free space hint and the lookup hint.
13697 */
13698
0c530ab8 13699 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 13700 return new_entry;
1c79356b
A
13701}
13702
13703/*
91447636
A
13704 * Routine: vm_map_remap_extract
13705 *
13706 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 13707 */
91447636
A
13708static kern_return_t
13709vm_map_remap_extract(
13710 vm_map_t map,
13711 vm_map_offset_t addr,
13712 vm_map_size_t size,
13713 boolean_t copy,
13714 struct vm_map_header *map_header,
13715 vm_prot_t *cur_protection,
13716 vm_prot_t *max_protection,
13717 /* What, no behavior? */
13718 vm_inherit_t inheritance,
39037602
A
13719 boolean_t pageable,
13720 boolean_t same_map)
1c79356b 13721{
91447636
A
13722 kern_return_t result;
13723 vm_map_size_t mapped_size;
13724 vm_map_size_t tmp_size;
13725 vm_map_entry_t src_entry; /* result of last map lookup */
13726 vm_map_entry_t new_entry;
13727 vm_object_offset_t offset;
13728 vm_map_offset_t map_address;
13729 vm_map_offset_t src_start; /* start of entry to map */
13730 vm_map_offset_t src_end; /* end of region to be mapped */
13731 vm_object_t object;
13732 vm_map_version_t version;
13733 boolean_t src_needs_copy;
13734 boolean_t new_entry_needs_copy;
1c79356b 13735
91447636 13736 assert(map != VM_MAP_NULL);
39236c6e
A
13737 assert(size != 0);
13738 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
13739 assert(inheritance == VM_INHERIT_NONE ||
13740 inheritance == VM_INHERIT_COPY ||
13741 inheritance == VM_INHERIT_SHARE);
1c79356b 13742
91447636
A
13743 /*
13744 * Compute start and end of region.
13745 */
39236c6e
A
13746 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13747 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13748
1c79356b 13749
91447636
A
13750 /*
13751 * Initialize map_header.
13752 */
13753 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13754 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13755 map_header->nentries = 0;
13756 map_header->entries_pageable = pageable;
39236c6e 13757 map_header->page_shift = PAGE_SHIFT;
1c79356b 13758
6d2010ae
A
13759 vm_map_store_init( map_header );
13760
91447636
A
13761 *cur_protection = VM_PROT_ALL;
13762 *max_protection = VM_PROT_ALL;
1c79356b 13763
91447636
A
13764 map_address = 0;
13765 mapped_size = 0;
13766 result = KERN_SUCCESS;
1c79356b 13767
91447636
A
13768 /*
13769 * The specified source virtual space might correspond to
13770 * multiple map entries, need to loop on them.
13771 */
13772 vm_map_lock(map);
13773 while (mapped_size != size) {
13774 vm_map_size_t entry_size;
1c79356b 13775
91447636
A
13776 /*
13777 * Find the beginning of the region.
13778 */
13779 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13780 result = KERN_INVALID_ADDRESS;
13781 break;
13782 }
1c79356b 13783
91447636
A
13784 if (src_start < src_entry->vme_start ||
13785 (mapped_size && src_start != src_entry->vme_start)) {
13786 result = KERN_INVALID_ADDRESS;
13787 break;
13788 }
1c79356b 13789
91447636
A
13790 tmp_size = size - mapped_size;
13791 if (src_end > src_entry->vme_end)
13792 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 13793
91447636 13794 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 13795 src_entry->vme_start);
1c79356b 13796
91447636 13797 if(src_entry->is_sub_map) {
3e170ce0 13798 vm_map_reference(VME_SUBMAP(src_entry));
91447636
A
13799 object = VM_OBJECT_NULL;
13800 } else {
3e170ce0 13801 object = VME_OBJECT(src_entry);
fe8ab488
A
13802 if (src_entry->iokit_acct) {
13803 /*
13804 * This entry uses "IOKit accounting".
13805 */
13806 } else if (object != VM_OBJECT_NULL &&
13807 object->purgable != VM_PURGABLE_DENY) {
13808 /*
13809 * Purgeable objects have their own accounting:
13810 * no pmap accounting for them.
13811 */
13812 assert(!src_entry->use_pmap);
13813 } else {
13814 /*
13815 * Not IOKit or purgeable:
13816 * must be accounted by pmap stats.
13817 */
13818 assert(src_entry->use_pmap);
13819 }
55e303ae 13820
91447636
A
13821 if (object == VM_OBJECT_NULL) {
13822 object = vm_object_allocate(entry_size);
3e170ce0
A
13823 VME_OFFSET_SET(src_entry, 0);
13824 VME_OBJECT_SET(src_entry, object);
91447636
A
13825 } else if (object->copy_strategy !=
13826 MEMORY_OBJECT_COPY_SYMMETRIC) {
13827 /*
13828 * We are already using an asymmetric
13829 * copy, and therefore we already have
13830 * the right object.
13831 */
13832 assert(!src_entry->needs_copy);
13833 } else if (src_entry->needs_copy || object->shadowed ||
13834 (object->internal && !object->true_share &&
2d21ac55 13835 !src_entry->is_shared &&
6d2010ae 13836 object->vo_size > entry_size)) {
1c79356b 13837
3e170ce0 13838 VME_OBJECT_SHADOW(src_entry, entry_size);
1c79356b 13839
91447636
A
13840 if (!src_entry->needs_copy &&
13841 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
13842 vm_prot_t prot;
13843
13844 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13845
3e170ce0
A
13846 if (override_nx(map,
13847 VME_ALIAS(src_entry))
13848 && prot)
0c530ab8 13849 prot |= VM_PROT_EXECUTE;
2d21ac55 13850
316670eb 13851 if(map->mapped_in_other_pmaps) {
2d21ac55 13852 vm_object_pmap_protect(
3e170ce0
A
13853 VME_OBJECT(src_entry),
13854 VME_OFFSET(src_entry),
2d21ac55
A
13855 entry_size,
13856 PMAP_NULL,
0c530ab8 13857 src_entry->vme_start,
0c530ab8 13858 prot);
2d21ac55
A
13859 } else {
13860 pmap_protect(vm_map_pmap(map),
13861 src_entry->vme_start,
13862 src_entry->vme_end,
13863 prot);
91447636
A
13864 }
13865 }
1c79356b 13866
3e170ce0 13867 object = VME_OBJECT(src_entry);
91447636
A
13868 src_entry->needs_copy = FALSE;
13869 }
1c79356b 13870
1c79356b 13871
91447636 13872 vm_object_lock(object);
2d21ac55 13873 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 13874 if (object->copy_strategy ==
2d21ac55 13875 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
13876 object->copy_strategy =
13877 MEMORY_OBJECT_COPY_DELAY;
13878 }
13879 vm_object_unlock(object);
13880 }
1c79356b 13881
3e170ce0
A
13882 offset = (VME_OFFSET(src_entry) +
13883 (src_start - src_entry->vme_start));
1c79356b 13884
7ddcb079 13885 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636 13886 vm_map_entry_copy(new_entry, src_entry);
fe8ab488
A
13887 if (new_entry->is_sub_map) {
13888 /* clr address space specifics */
13889 new_entry->use_pmap = FALSE;
13890 }
1c79356b 13891
39236c6e
A
13892 new_entry->map_aligned = FALSE;
13893
91447636
A
13894 new_entry->vme_start = map_address;
13895 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 13896 assert(new_entry->vme_start < new_entry->vme_end);
91447636 13897 new_entry->inheritance = inheritance;
3e170ce0 13898 VME_OFFSET_SET(new_entry, offset);
1c79356b 13899
91447636
A
13900 /*
13901 * The new region has to be copied now if required.
13902 */
13903 RestartCopy:
13904 if (!copy) {
316670eb
A
13905 /*
13906 * Cannot allow an entry describing a JIT
13907 * region to be shared across address spaces.
13908 */
39037602 13909 if (src_entry->used_for_jit == TRUE && !same_map) {
316670eb
A
13910 result = KERN_INVALID_ARGUMENT;
13911 break;
13912 }
91447636
A
13913 src_entry->is_shared = TRUE;
13914 new_entry->is_shared = TRUE;
13915 if (!(new_entry->is_sub_map))
13916 new_entry->needs_copy = FALSE;
1c79356b 13917
91447636
A
13918 } else if (src_entry->is_sub_map) {
13919 /* make this a COW sub_map if not already */
3e170ce0 13920 assert(new_entry->wired_count == 0);
91447636
A
13921 new_entry->needs_copy = TRUE;
13922 object = VM_OBJECT_NULL;
13923 } else if (src_entry->wired_count == 0 &&
3e170ce0
A
13924 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13925 VME_OFFSET(new_entry),
2d21ac55
A
13926 (new_entry->vme_end -
13927 new_entry->vme_start),
13928 &src_needs_copy,
13929 &new_entry_needs_copy)) {
55e303ae 13930
91447636
A
13931 new_entry->needs_copy = new_entry_needs_copy;
13932 new_entry->is_shared = FALSE;
1c79356b 13933
91447636
A
13934 /*
13935 * Handle copy_on_write semantics.
13936 */
13937 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
13938 vm_prot_t prot;
13939
13940 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55 13941
3e170ce0
A
13942 if (override_nx(map,
13943 VME_ALIAS(src_entry))
13944 && prot)
0c530ab8 13945 prot |= VM_PROT_EXECUTE;
2d21ac55 13946
91447636
A
13947 vm_object_pmap_protect(object,
13948 offset,
13949 entry_size,
13950 ((src_entry->is_shared
316670eb 13951 || map->mapped_in_other_pmaps) ?
91447636
A
13952 PMAP_NULL : map->pmap),
13953 src_entry->vme_start,
0c530ab8 13954 prot);
1c79356b 13955
3e170ce0 13956 assert(src_entry->wired_count == 0);
91447636
A
13957 src_entry->needs_copy = TRUE;
13958 }
13959 /*
13960 * Throw away the old object reference of the new entry.
13961 */
13962 vm_object_deallocate(object);
1c79356b 13963
91447636
A
13964 } else {
13965 new_entry->is_shared = FALSE;
1c79356b 13966
91447636
A
13967 /*
13968 * The map can be safely unlocked since we
13969 * already hold a reference on the object.
13970 *
13971 * Record the timestamp of the map for later
13972 * verification, and unlock the map.
13973 */
13974 version.main_timestamp = map->timestamp;
13975 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 13976
91447636
A
13977 /*
13978 * Perform the copy.
13979 */
13980 if (src_entry->wired_count > 0) {
13981 vm_object_lock(object);
13982 result = vm_object_copy_slowly(
2d21ac55
A
13983 object,
13984 offset,
13985 entry_size,
13986 THREAD_UNINT,
3e170ce0 13987 &VME_OBJECT(new_entry));
1c79356b 13988
3e170ce0 13989 VME_OFFSET_SET(new_entry, 0);
91447636
A
13990 new_entry->needs_copy = FALSE;
13991 } else {
3e170ce0
A
13992 vm_object_offset_t new_offset;
13993
13994 new_offset = VME_OFFSET(new_entry);
91447636 13995 result = vm_object_copy_strategically(
2d21ac55
A
13996 object,
13997 offset,
13998 entry_size,
3e170ce0
A
13999 &VME_OBJECT(new_entry),
14000 &new_offset,
2d21ac55 14001 &new_entry_needs_copy);
3e170ce0
A
14002 if (new_offset != VME_OFFSET(new_entry)) {
14003 VME_OFFSET_SET(new_entry, new_offset);
14004 }
1c79356b 14005
91447636
A
14006 new_entry->needs_copy = new_entry_needs_copy;
14007 }
1c79356b 14008
91447636
A
14009 /*
14010 * Throw away the old object reference of the new entry.
14011 */
14012 vm_object_deallocate(object);
1c79356b 14013
91447636
A
14014 if (result != KERN_SUCCESS &&
14015 result != KERN_MEMORY_RESTART_COPY) {
14016 _vm_map_entry_dispose(map_header, new_entry);
39037602 14017 vm_map_lock(map);
91447636
A
14018 break;
14019 }
1c79356b 14020
91447636
A
14021 /*
14022 * Verify that the map has not substantially
14023 * changed while the copy was being made.
14024 */
1c79356b 14025
91447636
A
14026 vm_map_lock(map);
14027 if (version.main_timestamp + 1 != map->timestamp) {
14028 /*
14029 * Simple version comparison failed.
14030 *
14031 * Retry the lookup and verify that the
14032 * same object/offset are still present.
14033 */
3e170ce0 14034 vm_object_deallocate(VME_OBJECT(new_entry));
91447636
A
14035 _vm_map_entry_dispose(map_header, new_entry);
14036 if (result == KERN_MEMORY_RESTART_COPY)
14037 result = KERN_SUCCESS;
14038 continue;
14039 }
1c79356b 14040
91447636
A
14041 if (result == KERN_MEMORY_RESTART_COPY) {
14042 vm_object_reference(object);
14043 goto RestartCopy;
14044 }
14045 }
1c79356b 14046
6d2010ae 14047 _vm_map_store_entry_link(map_header,
91447636 14048 map_header->links.prev, new_entry);
1c79356b 14049
6d2010ae
A
14050 /*Protections for submap mapping are irrelevant here*/
14051 if( !src_entry->is_sub_map ) {
14052 *cur_protection &= src_entry->protection;
14053 *max_protection &= src_entry->max_protection;
14054 }
91447636
A
14055 map_address += tmp_size;
14056 mapped_size += tmp_size;
14057 src_start += tmp_size;
1c79356b 14058
91447636 14059 } /* end while */
1c79356b 14060
91447636
A
14061 vm_map_unlock(map);
14062 if (result != KERN_SUCCESS) {
14063 /*
14064 * Free all allocated elements.
14065 */
14066 for (src_entry = map_header->links.next;
14067 src_entry != (struct vm_map_entry *)&map_header->links;
14068 src_entry = new_entry) {
14069 new_entry = src_entry->vme_next;
6d2010ae 14070 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e 14071 if (src_entry->is_sub_map) {
3e170ce0 14072 vm_map_deallocate(VME_SUBMAP(src_entry));
39236c6e 14073 } else {
3e170ce0 14074 vm_object_deallocate(VME_OBJECT(src_entry));
39236c6e 14075 }
91447636
A
14076 _vm_map_entry_dispose(map_header, src_entry);
14077 }
14078 }
14079 return result;
1c79356b
A
14080}
14081
14082/*
91447636 14083 * Routine: vm_remap
1c79356b 14084 *
91447636
A
14085 * Map portion of a task's address space.
14086 * Mapped region must not overlap more than
14087 * one vm memory object. Protections and
14088 * inheritance attributes remain the same
14089 * as in the original task and are out parameters.
14090 * Source and Target task can be identical
14091 * Other attributes are identical as for vm_map()
1c79356b
A
14092 */
14093kern_return_t
91447636
A
14094vm_map_remap(
14095 vm_map_t target_map,
14096 vm_map_address_t *address,
14097 vm_map_size_t size,
14098 vm_map_offset_t mask,
060df5ea 14099 int flags,
91447636
A
14100 vm_map_t src_map,
14101 vm_map_offset_t memory_address,
1c79356b 14102 boolean_t copy,
1c79356b
A
14103 vm_prot_t *cur_protection,
14104 vm_prot_t *max_protection,
91447636 14105 vm_inherit_t inheritance)
1c79356b
A
14106{
14107 kern_return_t result;
91447636 14108 vm_map_entry_t entry;
0c530ab8 14109 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 14110 vm_map_entry_t new_entry;
91447636 14111 struct vm_map_header map_header;
39236c6e 14112 vm_map_offset_t offset_in_mapping;
1c79356b 14113
91447636
A
14114 if (target_map == VM_MAP_NULL)
14115 return KERN_INVALID_ARGUMENT;
1c79356b 14116
91447636 14117 switch (inheritance) {
2d21ac55
A
14118 case VM_INHERIT_NONE:
14119 case VM_INHERIT_COPY:
14120 case VM_INHERIT_SHARE:
91447636
A
14121 if (size != 0 && src_map != VM_MAP_NULL)
14122 break;
14123 /*FALL THRU*/
2d21ac55 14124 default:
91447636
A
14125 return KERN_INVALID_ARGUMENT;
14126 }
1c79356b 14127
39236c6e
A
14128 /*
14129 * If the user is requesting that we return the address of the
14130 * first byte of the data (rather than the base of the page),
14131 * then we use different rounding semantics: specifically,
14132 * we assume that (memory_address, size) describes a region
14133 * all of whose pages we must cover, rather than a base to be truncated
14134 * down and a size to be added to that base. So we figure out
14135 * the highest page that the requested region includes and make
14136 * sure that the size will cover it.
14137 *
14138 * The key example we're worried about it is of the form:
14139 *
14140 * memory_address = 0x1ff0, size = 0x20
14141 *
14142 * With the old semantics, we round down the memory_address to 0x1000
14143 * and round up the size to 0x1000, resulting in our covering *only*
14144 * page 0x1000. With the new semantics, we'd realize that the region covers
14145 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
14146 * 0x1000 and page 0x2000 in the region we remap.
14147 */
14148 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14149 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
14150 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
14151 } else {
14152 size = vm_map_round_page(size, PAGE_MASK);
14153 }
1c79356b 14154
91447636 14155 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
14156 size, copy, &map_header,
14157 cur_protection,
14158 max_protection,
14159 inheritance,
39037602
A
14160 target_map->hdr.entries_pageable,
14161 src_map == target_map);
1c79356b 14162
91447636
A
14163 if (result != KERN_SUCCESS) {
14164 return result;
14165 }
1c79356b 14166
91447636
A
14167 /*
14168 * Allocate/check a range of free virtual address
14169 * space for the target
1c79356b 14170 */
39236c6e
A
14171 *address = vm_map_trunc_page(*address,
14172 VM_MAP_PAGE_MASK(target_map));
91447636
A
14173 vm_map_lock(target_map);
14174 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 14175 mask, flags, &insp_entry);
1c79356b 14176
91447636
A
14177 for (entry = map_header.links.next;
14178 entry != (struct vm_map_entry *)&map_header.links;
14179 entry = new_entry) {
14180 new_entry = entry->vme_next;
6d2010ae 14181 _vm_map_store_entry_unlink(&map_header, entry);
91447636 14182 if (result == KERN_SUCCESS) {
3e170ce0
A
14183 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14184 /* no codesigning -> read-only access */
14185 assert(!entry->used_for_jit);
14186 entry->max_protection = VM_PROT_READ;
14187 entry->protection = VM_PROT_READ;
14188 entry->vme_resilient_codesign = TRUE;
14189 }
91447636
A
14190 entry->vme_start += *address;
14191 entry->vme_end += *address;
39236c6e 14192 assert(!entry->map_aligned);
6d2010ae 14193 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
14194 insp_entry = entry;
14195 } else {
14196 if (!entry->is_sub_map) {
3e170ce0 14197 vm_object_deallocate(VME_OBJECT(entry));
91447636 14198 } else {
3e170ce0 14199 vm_map_deallocate(VME_SUBMAP(entry));
2d21ac55 14200 }
91447636 14201 _vm_map_entry_dispose(&map_header, entry);
1c79356b 14202 }
91447636 14203 }
1c79356b 14204
3e170ce0
A
14205 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
14206 *cur_protection = VM_PROT_READ;
14207 *max_protection = VM_PROT_READ;
14208 }
14209
6d2010ae 14210 if( target_map->disable_vmentry_reuse == TRUE) {
39037602 14211 assert(!target_map->is_nested_map);
6d2010ae
A
14212 if( target_map->highest_entry_end < insp_entry->vme_end ){
14213 target_map->highest_entry_end = insp_entry->vme_end;
14214 }
14215 }
14216
91447636
A
14217 if (result == KERN_SUCCESS) {
14218 target_map->size += size;
0c530ab8 14219 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
14220 }
14221 vm_map_unlock(target_map);
1c79356b 14222
91447636
A
14223 if (result == KERN_SUCCESS && target_map->wiring_required)
14224 result = vm_map_wire(target_map, *address,
3e170ce0
A
14225 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
14226 TRUE);
39236c6e
A
14227
14228 /*
14229 * If requested, return the address of the data pointed to by the
14230 * request, rather than the base of the resulting page.
14231 */
14232 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
14233 *address += offset_in_mapping;
14234 }
14235
91447636
A
14236 return result;
14237}
1c79356b 14238
91447636
A
14239/*
14240 * Routine: vm_map_remap_range_allocate
14241 *
14242 * Description:
14243 * Allocate a range in the specified virtual address map.
14244 * returns the address and the map entry just before the allocated
14245 * range
14246 *
14247 * Map must be locked.
14248 */
1c79356b 14249
91447636
A
14250static kern_return_t
14251vm_map_remap_range_allocate(
14252 vm_map_t map,
14253 vm_map_address_t *address, /* IN/OUT */
14254 vm_map_size_t size,
14255 vm_map_offset_t mask,
060df5ea 14256 int flags,
91447636
A
14257 vm_map_entry_t *map_entry) /* OUT */
14258{
060df5ea
A
14259 vm_map_entry_t entry;
14260 vm_map_offset_t start;
14261 vm_map_offset_t end;
14262 kern_return_t kr;
3e170ce0 14263 vm_map_entry_t hole_entry;
1c79356b 14264
2d21ac55 14265StartAgain: ;
1c79356b 14266
2d21ac55 14267 start = *address;
1c79356b 14268
060df5ea 14269 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55 14270 {
39037602
A
14271 if (flags & VM_FLAGS_RANDOM_ADDR)
14272 {
14273 /*
14274 * Get a random start address.
14275 */
14276 kr = vm_map_random_address_for_size(map, address, size);
14277 if (kr != KERN_SUCCESS) {
14278 return(kr);
14279 }
14280 start = *address;
14281 }
14282
2d21ac55
A
14283 /*
14284 * Calculate the first possible address.
14285 */
1c79356b 14286
2d21ac55
A
14287 if (start < map->min_offset)
14288 start = map->min_offset;
14289 if (start > map->max_offset)
14290 return(KERN_NO_SPACE);
91447636 14291
2d21ac55
A
14292 /*
14293 * Look for the first possible address;
14294 * if there's already something at this
14295 * address, we have to start after it.
14296 */
1c79356b 14297
6d2010ae
A
14298 if( map->disable_vmentry_reuse == TRUE) {
14299 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 14300 } else {
3e170ce0
A
14301
14302 if (map->holelistenabled) {
14303 hole_entry = (vm_map_entry_t)map->holes_list;
14304
14305 if (hole_entry == NULL) {
14306 /*
14307 * No more space in the map?
14308 */
14309 return(KERN_NO_SPACE);
14310 } else {
14311
14312 boolean_t found_hole = FALSE;
14313
14314 do {
14315 if (hole_entry->vme_start >= start) {
14316 start = hole_entry->vme_start;
14317 found_hole = TRUE;
14318 break;
14319 }
14320
14321 if (hole_entry->vme_end > start) {
14322 found_hole = TRUE;
14323 break;
14324 }
14325 hole_entry = hole_entry->vme_next;
14326
14327 } while (hole_entry != (vm_map_entry_t) map->holes_list);
14328
14329 if (found_hole == FALSE) {
14330 return (KERN_NO_SPACE);
14331 }
14332
14333 entry = hole_entry;
14334 }
6d2010ae 14335 } else {
3e170ce0
A
14336 assert(first_free_is_valid(map));
14337 if (start == map->min_offset) {
14338 if ((entry = map->first_free) != vm_map_to_entry(map))
14339 start = entry->vme_end;
14340 } else {
14341 vm_map_entry_t tmp_entry;
14342 if (vm_map_lookup_entry(map, start, &tmp_entry))
14343 start = tmp_entry->vme_end;
14344 entry = tmp_entry;
14345 }
6d2010ae 14346 }
39236c6e
A
14347 start = vm_map_round_page(start,
14348 VM_MAP_PAGE_MASK(map));
2d21ac55 14349 }
91447636 14350
2d21ac55
A
14351 /*
14352 * In any case, the "entry" always precedes
14353 * the proposed new region throughout the
14354 * loop:
14355 */
1c79356b 14356
2d21ac55 14357 while (TRUE) {
39037602 14358 vm_map_entry_t next;
2d21ac55
A
14359
14360 /*
14361 * Find the end of the proposed new region.
14362 * Be sure we didn't go beyond the end, or
14363 * wrap around the address.
14364 */
14365
14366 end = ((start + mask) & ~mask);
39236c6e
A
14367 end = vm_map_round_page(end,
14368 VM_MAP_PAGE_MASK(map));
2d21ac55
A
14369 if (end < start)
14370 return(KERN_NO_SPACE);
14371 start = end;
14372 end += size;
14373
14374 if ((end > map->max_offset) || (end < start)) {
14375 if (map->wait_for_space) {
14376 if (size <= (map->max_offset -
14377 map->min_offset)) {
14378 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
14379 vm_map_unlock(map);
14380 thread_block(THREAD_CONTINUE_NULL);
14381 vm_map_lock(map);
14382 goto StartAgain;
14383 }
14384 }
91447636 14385
2d21ac55
A
14386 return(KERN_NO_SPACE);
14387 }
1c79356b 14388
2d21ac55 14389 next = entry->vme_next;
1c79356b 14390
3e170ce0
A
14391 if (map->holelistenabled) {
14392 if (entry->vme_end >= end)
14393 break;
14394 } else {
14395 /*
14396 * If there are no more entries, we must win.
14397 *
14398 * OR
14399 *
14400 * If there is another entry, it must be
14401 * after the end of the potential new region.
14402 */
1c79356b 14403
3e170ce0
A
14404 if (next == vm_map_to_entry(map))
14405 break;
14406
14407 if (next->vme_start >= end)
14408 break;
14409 }
1c79356b 14410
2d21ac55
A
14411 /*
14412 * Didn't fit -- move to the next entry.
14413 */
1c79356b 14414
2d21ac55 14415 entry = next;
3e170ce0
A
14416
14417 if (map->holelistenabled) {
14418 if (entry == (vm_map_entry_t) map->holes_list) {
14419 /*
14420 * Wrapped around
14421 */
14422 return(KERN_NO_SPACE);
14423 }
14424 start = entry->vme_start;
14425 } else {
14426 start = entry->vme_end;
14427 }
14428 }
14429
14430 if (map->holelistenabled) {
14431
14432 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
14433 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
14434 }
2d21ac55 14435 }
3e170ce0 14436
2d21ac55 14437 *address = start;
3e170ce0 14438
2d21ac55
A
14439 } else {
14440 vm_map_entry_t temp_entry;
91447636 14441
2d21ac55
A
14442 /*
14443 * Verify that:
14444 * the address doesn't itself violate
14445 * the mask requirement.
14446 */
1c79356b 14447
2d21ac55
A
14448 if ((start & mask) != 0)
14449 return(KERN_NO_SPACE);
1c79356b 14450
1c79356b 14451
2d21ac55
A
14452 /*
14453 * ... the address is within bounds
14454 */
1c79356b 14455
2d21ac55 14456 end = start + size;
1c79356b 14457
2d21ac55
A
14458 if ((start < map->min_offset) ||
14459 (end > map->max_offset) ||
14460 (start >= end)) {
14461 return(KERN_INVALID_ADDRESS);
14462 }
1c79356b 14463
060df5ea
A
14464 /*
14465 * If we're asked to overwrite whatever was mapped in that
14466 * range, first deallocate that range.
14467 */
14468 if (flags & VM_FLAGS_OVERWRITE) {
14469 vm_map_t zap_map;
14470
14471 /*
14472 * We use a "zap_map" to avoid having to unlock
14473 * the "map" in vm_map_delete(), which would compromise
14474 * the atomicity of the "deallocate" and then "remap"
14475 * combination.
14476 */
14477 zap_map = vm_map_create(PMAP_NULL,
14478 start,
316670eb 14479 end,
060df5ea
A
14480 map->hdr.entries_pageable);
14481 if (zap_map == VM_MAP_NULL) {
14482 return KERN_RESOURCE_SHORTAGE;
14483 }
39236c6e 14484 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
3e170ce0 14485 vm_map_disable_hole_optimization(zap_map);
060df5ea
A
14486
14487 kr = vm_map_delete(map, start, end,
fe8ab488
A
14488 (VM_MAP_REMOVE_SAVE_ENTRIES |
14489 VM_MAP_REMOVE_NO_MAP_ALIGN),
060df5ea
A
14490 zap_map);
14491 if (kr == KERN_SUCCESS) {
14492 vm_map_destroy(zap_map,
14493 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14494 zap_map = VM_MAP_NULL;
14495 }
14496 }
14497
2d21ac55
A
14498 /*
14499 * ... the starting address isn't allocated
14500 */
91447636 14501
2d21ac55
A
14502 if (vm_map_lookup_entry(map, start, &temp_entry))
14503 return(KERN_NO_SPACE);
91447636 14504
2d21ac55 14505 entry = temp_entry;
91447636 14506
2d21ac55
A
14507 /*
14508 * ... the next region doesn't overlap the
14509 * end point.
14510 */
1c79356b 14511
2d21ac55
A
14512 if ((entry->vme_next != vm_map_to_entry(map)) &&
14513 (entry->vme_next->vme_start < end))
14514 return(KERN_NO_SPACE);
14515 }
14516 *map_entry = entry;
14517 return(KERN_SUCCESS);
91447636 14518}
1c79356b 14519
91447636
A
14520/*
14521 * vm_map_switch:
14522 *
14523 * Set the address map for the current thread to the specified map
14524 */
1c79356b 14525
91447636
A
14526vm_map_t
14527vm_map_switch(
14528 vm_map_t map)
14529{
14530 int mycpu;
14531 thread_t thread = current_thread();
14532 vm_map_t oldmap = thread->map;
1c79356b 14533
91447636
A
14534 mp_disable_preemption();
14535 mycpu = cpu_number();
1c79356b 14536
91447636
A
14537 /*
14538 * Deactivate the current map and activate the requested map
14539 */
14540 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 14541
91447636
A
14542 mp_enable_preemption();
14543 return(oldmap);
14544}
1c79356b 14545
1c79356b 14546
91447636
A
14547/*
14548 * Routine: vm_map_write_user
14549 *
14550 * Description:
14551 * Copy out data from a kernel space into space in the
14552 * destination map. The space must already exist in the
14553 * destination map.
14554 * NOTE: This routine should only be called by threads
14555 * which can block on a page fault. i.e. kernel mode user
14556 * threads.
14557 *
14558 */
14559kern_return_t
14560vm_map_write_user(
14561 vm_map_t map,
14562 void *src_p,
14563 vm_map_address_t dst_addr,
14564 vm_size_t size)
14565{
14566 kern_return_t kr = KERN_SUCCESS;
1c79356b 14567
91447636
A
14568 if(current_map() == map) {
14569 if (copyout(src_p, dst_addr, size)) {
14570 kr = KERN_INVALID_ADDRESS;
14571 }
14572 } else {
14573 vm_map_t oldmap;
1c79356b 14574
91447636
A
14575 /* take on the identity of the target map while doing */
14576 /* the transfer */
1c79356b 14577
91447636
A
14578 vm_map_reference(map);
14579 oldmap = vm_map_switch(map);
14580 if (copyout(src_p, dst_addr, size)) {
14581 kr = KERN_INVALID_ADDRESS;
1c79356b 14582 }
91447636
A
14583 vm_map_switch(oldmap);
14584 vm_map_deallocate(map);
1c79356b 14585 }
91447636 14586 return kr;
1c79356b
A
14587}
14588
14589/*
91447636
A
14590 * Routine: vm_map_read_user
14591 *
14592 * Description:
14593 * Copy in data from a user space source map into the
14594 * kernel map. The space must already exist in the
14595 * kernel map.
14596 * NOTE: This routine should only be called by threads
14597 * which can block on a page fault. i.e. kernel mode user
14598 * threads.
1c79356b 14599 *
1c79356b
A
14600 */
14601kern_return_t
91447636
A
14602vm_map_read_user(
14603 vm_map_t map,
14604 vm_map_address_t src_addr,
14605 void *dst_p,
14606 vm_size_t size)
1c79356b 14607{
91447636 14608 kern_return_t kr = KERN_SUCCESS;
1c79356b 14609
91447636
A
14610 if(current_map() == map) {
14611 if (copyin(src_addr, dst_p, size)) {
14612 kr = KERN_INVALID_ADDRESS;
14613 }
14614 } else {
14615 vm_map_t oldmap;
1c79356b 14616
91447636
A
14617 /* take on the identity of the target map while doing */
14618 /* the transfer */
14619
14620 vm_map_reference(map);
14621 oldmap = vm_map_switch(map);
14622 if (copyin(src_addr, dst_p, size)) {
14623 kr = KERN_INVALID_ADDRESS;
14624 }
14625 vm_map_switch(oldmap);
14626 vm_map_deallocate(map);
1c79356b 14627 }
91447636
A
14628 return kr;
14629}
14630
1c79356b 14631
91447636
A
14632/*
14633 * vm_map_check_protection:
14634 *
14635 * Assert that the target map allows the specified
14636 * privilege on the entire address region given.
14637 * The entire region must be allocated.
14638 */
2d21ac55
A
14639boolean_t
14640vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14641 vm_map_offset_t end, vm_prot_t protection)
91447636 14642{
2d21ac55
A
14643 vm_map_entry_t entry;
14644 vm_map_entry_t tmp_entry;
1c79356b 14645
91447636 14646 vm_map_lock(map);
1c79356b 14647
2d21ac55 14648 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 14649 {
2d21ac55
A
14650 vm_map_unlock(map);
14651 return (FALSE);
1c79356b
A
14652 }
14653
91447636
A
14654 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14655 vm_map_unlock(map);
14656 return(FALSE);
14657 }
1c79356b 14658
91447636
A
14659 entry = tmp_entry;
14660
14661 while (start < end) {
14662 if (entry == vm_map_to_entry(map)) {
14663 vm_map_unlock(map);
14664 return(FALSE);
1c79356b 14665 }
1c79356b 14666
91447636
A
14667 /*
14668 * No holes allowed!
14669 */
1c79356b 14670
91447636
A
14671 if (start < entry->vme_start) {
14672 vm_map_unlock(map);
14673 return(FALSE);
14674 }
14675
14676 /*
14677 * Check protection associated with entry.
14678 */
14679
14680 if ((entry->protection & protection) != protection) {
14681 vm_map_unlock(map);
14682 return(FALSE);
14683 }
14684
14685 /* go to next entry */
14686
14687 start = entry->vme_end;
14688 entry = entry->vme_next;
14689 }
14690 vm_map_unlock(map);
14691 return(TRUE);
1c79356b
A
14692}
14693
1c79356b 14694kern_return_t
91447636
A
14695vm_map_purgable_control(
14696 vm_map_t map,
14697 vm_map_offset_t address,
14698 vm_purgable_t control,
14699 int *state)
1c79356b 14700{
91447636
A
14701 vm_map_entry_t entry;
14702 vm_object_t object;
14703 kern_return_t kr;
fe8ab488 14704 boolean_t was_nonvolatile;
1c79356b 14705
1c79356b 14706 /*
91447636
A
14707 * Vet all the input parameters and current type and state of the
14708 * underlaying object. Return with an error if anything is amiss.
1c79356b 14709 */
91447636
A
14710 if (map == VM_MAP_NULL)
14711 return(KERN_INVALID_ARGUMENT);
1c79356b 14712
91447636 14713 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
14714 control != VM_PURGABLE_GET_STATE &&
14715 control != VM_PURGABLE_PURGE_ALL)
91447636 14716 return(KERN_INVALID_ARGUMENT);
1c79356b 14717
b0d623f7
A
14718 if (control == VM_PURGABLE_PURGE_ALL) {
14719 vm_purgeable_object_purge_all();
14720 return KERN_SUCCESS;
14721 }
14722
91447636 14723 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 14724 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 14725 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
14726 return(KERN_INVALID_ARGUMENT);
14727
b0d623f7 14728 vm_map_lock_read(map);
91447636
A
14729
14730 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14731
14732 /*
14733 * Must pass a valid non-submap address.
14734 */
b0d623f7 14735 vm_map_unlock_read(map);
91447636
A
14736 return(KERN_INVALID_ADDRESS);
14737 }
14738
14739 if ((entry->protection & VM_PROT_WRITE) == 0) {
14740 /*
14741 * Can't apply purgable controls to something you can't write.
14742 */
b0d623f7 14743 vm_map_unlock_read(map);
91447636
A
14744 return(KERN_PROTECTION_FAILURE);
14745 }
14746
3e170ce0 14747 object = VME_OBJECT(entry);
fe8ab488
A
14748 if (object == VM_OBJECT_NULL ||
14749 object->purgable == VM_PURGABLE_DENY) {
91447636 14750 /*
fe8ab488 14751 * Object must already be present and be purgeable.
91447636 14752 */
b0d623f7 14753 vm_map_unlock_read(map);
91447636
A
14754 return KERN_INVALID_ARGUMENT;
14755 }
14756
14757 vm_object_lock(object);
14758
39236c6e 14759#if 00
3e170ce0 14760 if (VME_OFFSET(entry) != 0 ||
6d2010ae 14761 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
14762 /*
14763 * Can only apply purgable controls to the whole (existing)
14764 * object at once.
14765 */
b0d623f7 14766 vm_map_unlock_read(map);
91447636
A
14767 vm_object_unlock(object);
14768 return KERN_INVALID_ARGUMENT;
1c79356b 14769 }
39236c6e 14770#endif
fe8ab488
A
14771
14772 assert(!entry->is_sub_map);
14773 assert(!entry->use_pmap); /* purgeable has its own accounting */
14774
b0d623f7 14775 vm_map_unlock_read(map);
1c79356b 14776
fe8ab488
A
14777 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14778
91447636 14779 kr = vm_object_purgable_control(object, control, state);
1c79356b 14780
fe8ab488
A
14781 if (was_nonvolatile &&
14782 object->purgable != VM_PURGABLE_NONVOLATILE &&
14783 map->pmap == kernel_pmap) {
14784#if DEBUG
14785 object->vo_purgeable_volatilizer = kernel_task;
14786#endif /* DEBUG */
14787 }
14788
91447636 14789 vm_object_unlock(object);
1c79356b 14790
91447636
A
14791 return kr;
14792}
1c79356b 14793
91447636 14794kern_return_t
b0d623f7 14795vm_map_page_query_internal(
2d21ac55 14796 vm_map_t target_map,
91447636 14797 vm_map_offset_t offset,
2d21ac55
A
14798 int *disposition,
14799 int *ref_count)
91447636 14800{
b0d623f7
A
14801 kern_return_t kr;
14802 vm_page_info_basic_data_t info;
14803 mach_msg_type_number_t count;
14804
14805 count = VM_PAGE_INFO_BASIC_COUNT;
14806 kr = vm_map_page_info(target_map,
14807 offset,
14808 VM_PAGE_INFO_BASIC,
14809 (vm_page_info_t) &info,
14810 &count);
14811 if (kr == KERN_SUCCESS) {
14812 *disposition = info.disposition;
14813 *ref_count = info.ref_count;
14814 } else {
14815 *disposition = 0;
14816 *ref_count = 0;
14817 }
2d21ac55 14818
b0d623f7
A
14819 return kr;
14820}
14821
14822kern_return_t
14823vm_map_page_info(
14824 vm_map_t map,
14825 vm_map_offset_t offset,
14826 vm_page_info_flavor_t flavor,
14827 vm_page_info_t info,
14828 mach_msg_type_number_t *count)
14829{
14830 vm_map_entry_t map_entry;
14831 vm_object_t object;
14832 vm_page_t m;
b0d623f7
A
14833 kern_return_t retval = KERN_SUCCESS;
14834 boolean_t top_object;
14835 int disposition;
14836 int ref_count;
b0d623f7
A
14837 vm_page_info_basic_t basic_info;
14838 int depth;
6d2010ae 14839 vm_map_offset_t offset_in_page;
2d21ac55 14840
b0d623f7
A
14841 switch (flavor) {
14842 case VM_PAGE_INFO_BASIC:
14843 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
14844 /*
14845 * The "vm_page_info_basic_data" structure was not
14846 * properly padded, so allow the size to be off by
14847 * one to maintain backwards binary compatibility...
14848 */
14849 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14850 return KERN_INVALID_ARGUMENT;
b0d623f7
A
14851 }
14852 break;
14853 default:
14854 return KERN_INVALID_ARGUMENT;
91447636 14855 }
2d21ac55 14856
b0d623f7
A
14857 disposition = 0;
14858 ref_count = 0;
b0d623f7
A
14859 top_object = TRUE;
14860 depth = 0;
14861
14862 retval = KERN_SUCCESS;
6d2010ae 14863 offset_in_page = offset & PAGE_MASK;
39236c6e 14864 offset = vm_map_trunc_page(offset, PAGE_MASK);
b0d623f7
A
14865
14866 vm_map_lock_read(map);
14867
14868 /*
14869 * First, find the map entry covering "offset", going down
14870 * submaps if necessary.
14871 */
14872 for (;;) {
14873 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14874 vm_map_unlock_read(map);
14875 return KERN_INVALID_ADDRESS;
14876 }
14877 /* compute offset from this map entry's start */
14878 offset -= map_entry->vme_start;
14879 /* compute offset into this map entry's object (or submap) */
3e170ce0 14880 offset += VME_OFFSET(map_entry);
b0d623f7
A
14881
14882 if (map_entry->is_sub_map) {
14883 vm_map_t sub_map;
2d21ac55 14884
3e170ce0 14885 sub_map = VME_SUBMAP(map_entry);
2d21ac55 14886 vm_map_lock_read(sub_map);
b0d623f7 14887 vm_map_unlock_read(map);
2d21ac55 14888
b0d623f7
A
14889 map = sub_map;
14890
14891 ref_count = MAX(ref_count, map->ref_count);
14892 continue;
1c79356b 14893 }
b0d623f7 14894 break;
91447636 14895 }
b0d623f7 14896
3e170ce0 14897 object = VME_OBJECT(map_entry);
b0d623f7
A
14898 if (object == VM_OBJECT_NULL) {
14899 /* no object -> no page */
14900 vm_map_unlock_read(map);
14901 goto done;
14902 }
14903
91447636 14904 vm_object_lock(object);
b0d623f7
A
14905 vm_map_unlock_read(map);
14906
14907 /*
14908 * Go down the VM object shadow chain until we find the page
14909 * we're looking for.
14910 */
14911 for (;;) {
14912 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 14913
91447636 14914 m = vm_page_lookup(object, offset);
2d21ac55 14915
91447636 14916 if (m != VM_PAGE_NULL) {
b0d623f7 14917 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
14918 break;
14919 } else {
39236c6e
A
14920 if (object->internal &&
14921 object->alive &&
14922 !object->terminating &&
14923 object->pager_ready) {
14924
39037602
A
14925 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
14926 == VM_EXTERNAL_STATE_EXISTS) {
14927 /* the pager has that page */
14928 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14929 break;
2d21ac55
A
14930 }
14931 }
b0d623f7 14932
2d21ac55
A
14933 if (object->shadow != VM_OBJECT_NULL) {
14934 vm_object_t shadow;
14935
6d2010ae 14936 offset += object->vo_shadow_offset;
2d21ac55
A
14937 shadow = object->shadow;
14938
14939 vm_object_lock(shadow);
14940 vm_object_unlock(object);
14941
14942 object = shadow;
14943 top_object = FALSE;
b0d623f7 14944 depth++;
2d21ac55 14945 } else {
b0d623f7
A
14946// if (!object->internal)
14947// break;
14948// retval = KERN_FAILURE;
14949// goto done_with_object;
14950 break;
91447636 14951 }
91447636
A
14952 }
14953 }
91447636
A
14954 /* The ref_count is not strictly accurate, it measures the number */
14955 /* of entities holding a ref on the object, they may not be mapping */
14956 /* the object or may not be mapping the section holding the */
14957 /* target page but its still a ball park number and though an over- */
14958 /* count, it picks up the copy-on-write cases */
1c79356b 14959
91447636
A
14960 /* We could also get a picture of page sharing from pmap_attributes */
14961 /* but this would under count as only faulted-in mappings would */
14962 /* show up. */
1c79356b 14963
2d21ac55 14964 if (top_object == TRUE && object->shadow)
b0d623f7
A
14965 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14966
14967 if (! object->internal)
14968 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
14969
14970 if (m == VM_PAGE_NULL)
b0d623f7 14971 goto done_with_object;
2d21ac55 14972
91447636 14973 if (m->fictitious) {
b0d623f7
A
14974 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14975 goto done_with_object;
91447636 14976 }
39037602 14977 if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14978 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 14979
39037602 14980 if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
b0d623f7 14981 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 14982
39037602 14983 if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
b0d623f7 14984 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 14985
593a1d5f 14986 if (m->cs_validated)
b0d623f7 14987 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 14988 if (m->cs_tainted)
b0d623f7 14989 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
c18c124e
A
14990 if (m->cs_nx)
14991 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
593a1d5f 14992
b0d623f7 14993done_with_object:
2d21ac55 14994 vm_object_unlock(object);
b0d623f7
A
14995done:
14996
14997 switch (flavor) {
14998 case VM_PAGE_INFO_BASIC:
14999 basic_info = (vm_page_info_basic_t) info;
15000 basic_info->disposition = disposition;
15001 basic_info->ref_count = ref_count;
39236c6e
A
15002 basic_info->object_id = (vm_object_id_t) (uintptr_t)
15003 VM_KERNEL_ADDRPERM(object);
6d2010ae
A
15004 basic_info->offset =
15005 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
15006 basic_info->depth = depth;
15007 break;
15008 }
0c530ab8 15009
2d21ac55 15010 return retval;
91447636
A
15011}
15012
15013/*
15014 * vm_map_msync
15015 *
15016 * Synchronises the memory range specified with its backing store
15017 * image by either flushing or cleaning the contents to the appropriate
15018 * memory manager engaging in a memory object synchronize dialog with
15019 * the manager. The client doesn't return until the manager issues
15020 * m_o_s_completed message. MIG Magically converts user task parameter
15021 * to the task's address map.
15022 *
15023 * interpretation of sync_flags
15024 * VM_SYNC_INVALIDATE - discard pages, only return precious
15025 * pages to manager.
15026 *
15027 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
15028 * - discard pages, write dirty or precious
15029 * pages back to memory manager.
15030 *
15031 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
15032 * - write dirty or precious pages back to
15033 * the memory manager.
15034 *
15035 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
15036 * is a hole in the region, and we would
15037 * have returned KERN_SUCCESS, return
15038 * KERN_INVALID_ADDRESS instead.
15039 *
15040 * NOTE
15041 * The memory object attributes have not yet been implemented, this
15042 * function will have to deal with the invalidate attribute
15043 *
15044 * RETURNS
15045 * KERN_INVALID_TASK Bad task parameter
15046 * KERN_INVALID_ARGUMENT both sync and async were specified.
15047 * KERN_SUCCESS The usual.
15048 * KERN_INVALID_ADDRESS There was a hole in the region.
15049 */
15050
15051kern_return_t
15052vm_map_msync(
15053 vm_map_t map,
15054 vm_map_address_t address,
15055 vm_map_size_t size,
15056 vm_sync_t sync_flags)
15057{
15058 msync_req_t msr;
15059 msync_req_t new_msr;
15060 queue_chain_t req_q; /* queue of requests for this msync */
15061 vm_map_entry_t entry;
15062 vm_map_size_t amount_left;
15063 vm_object_offset_t offset;
15064 boolean_t do_sync_req;
91447636 15065 boolean_t had_hole = FALSE;
2d21ac55 15066 memory_object_t pager;
3e170ce0 15067 vm_map_offset_t pmap_offset;
91447636
A
15068
15069 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
15070 (sync_flags & VM_SYNC_SYNCHRONOUS))
15071 return(KERN_INVALID_ARGUMENT);
1c79356b
A
15072
15073 /*
91447636 15074 * align address and size on page boundaries
1c79356b 15075 */
39236c6e
A
15076 size = (vm_map_round_page(address + size,
15077 VM_MAP_PAGE_MASK(map)) -
15078 vm_map_trunc_page(address,
15079 VM_MAP_PAGE_MASK(map)));
15080 address = vm_map_trunc_page(address,
15081 VM_MAP_PAGE_MASK(map));
1c79356b 15082
91447636
A
15083 if (map == VM_MAP_NULL)
15084 return(KERN_INVALID_TASK);
1c79356b 15085
91447636
A
15086 if (size == 0)
15087 return(KERN_SUCCESS);
1c79356b 15088
91447636
A
15089 queue_init(&req_q);
15090 amount_left = size;
1c79356b 15091
91447636
A
15092 while (amount_left > 0) {
15093 vm_object_size_t flush_size;
15094 vm_object_t object;
1c79356b 15095
91447636
A
15096 vm_map_lock(map);
15097 if (!vm_map_lookup_entry(map,
3e170ce0 15098 address,
39236c6e 15099 &entry)) {
91447636 15100
2d21ac55 15101 vm_map_size_t skip;
91447636
A
15102
15103 /*
15104 * hole in the address map.
15105 */
15106 had_hole = TRUE;
15107
39037602
A
15108 if (sync_flags & VM_SYNC_KILLPAGES) {
15109 /*
15110 * For VM_SYNC_KILLPAGES, there should be
15111 * no holes in the range, since we couldn't
15112 * prevent someone else from allocating in
15113 * that hole and we wouldn't want to "kill"
15114 * their pages.
15115 */
15116 vm_map_unlock(map);
15117 break;
15118 }
15119
91447636
A
15120 /*
15121 * Check for empty map.
15122 */
15123 if (entry == vm_map_to_entry(map) &&
15124 entry->vme_next == entry) {
15125 vm_map_unlock(map);
15126 break;
15127 }
15128 /*
15129 * Check that we don't wrap and that
15130 * we have at least one real map entry.
15131 */
15132 if ((map->hdr.nentries == 0) ||
15133 (entry->vme_next->vme_start < address)) {
15134 vm_map_unlock(map);
15135 break;
15136 }
15137 /*
15138 * Move up to the next entry if needed
15139 */
15140 skip = (entry->vme_next->vme_start - address);
15141 if (skip >= amount_left)
15142 amount_left = 0;
15143 else
15144 amount_left -= skip;
15145 address = entry->vme_next->vme_start;
15146 vm_map_unlock(map);
15147 continue;
15148 }
1c79356b 15149
91447636 15150 offset = address - entry->vme_start;
3e170ce0 15151 pmap_offset = address;
1c79356b 15152
91447636
A
15153 /*
15154 * do we have more to flush than is contained in this
15155 * entry ?
15156 */
15157 if (amount_left + entry->vme_start + offset > entry->vme_end) {
15158 flush_size = entry->vme_end -
2d21ac55 15159 (entry->vme_start + offset);
91447636
A
15160 } else {
15161 flush_size = amount_left;
15162 }
15163 amount_left -= flush_size;
15164 address += flush_size;
1c79356b 15165
91447636
A
15166 if (entry->is_sub_map == TRUE) {
15167 vm_map_t local_map;
15168 vm_map_offset_t local_offset;
1c79356b 15169
3e170ce0
A
15170 local_map = VME_SUBMAP(entry);
15171 local_offset = VME_OFFSET(entry);
91447636
A
15172 vm_map_unlock(map);
15173 if (vm_map_msync(
2d21ac55
A
15174 local_map,
15175 local_offset,
15176 flush_size,
15177 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
15178 had_hole = TRUE;
15179 }
15180 continue;
15181 }
3e170ce0 15182 object = VME_OBJECT(entry);
1c79356b 15183
91447636
A
15184 /*
15185 * We can't sync this object if the object has not been
15186 * created yet
15187 */
15188 if (object == VM_OBJECT_NULL) {
15189 vm_map_unlock(map);
15190 continue;
15191 }
3e170ce0 15192 offset += VME_OFFSET(entry);
1c79356b 15193
91447636 15194 vm_object_lock(object);
1c79356b 15195
91447636 15196 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
15197 int kill_pages = 0;
15198 boolean_t reusable_pages = FALSE;
91447636
A
15199
15200 if (sync_flags & VM_SYNC_KILLPAGES) {
39037602
A
15201 if (((object->ref_count == 1) ||
15202 ((object->copy_strategy !=
15203 MEMORY_OBJECT_COPY_SYMMETRIC) &&
15204 (object->copy == VM_OBJECT_NULL))) &&
15205 (object->shadow == VM_OBJECT_NULL)) {
15206 if (object->ref_count != 1) {
15207 vm_page_stats_reusable.free_shared++;
15208 }
91447636 15209 kill_pages = 1;
39037602 15210 } else {
91447636 15211 kill_pages = -1;
39037602 15212 }
91447636
A
15213 }
15214 if (kill_pages != -1)
3e170ce0
A
15215 vm_object_deactivate_pages(
15216 object,
15217 offset,
15218 (vm_object_size_t) flush_size,
15219 kill_pages,
15220 reusable_pages,
15221 map->pmap,
15222 pmap_offset);
91447636
A
15223 vm_object_unlock(object);
15224 vm_map_unlock(map);
15225 continue;
1c79356b 15226 }
91447636
A
15227 /*
15228 * We can't sync this object if there isn't a pager.
15229 * Don't bother to sync internal objects, since there can't
15230 * be any "permanent" storage for these objects anyway.
15231 */
15232 if ((object->pager == MEMORY_OBJECT_NULL) ||
15233 (object->internal) || (object->private)) {
15234 vm_object_unlock(object);
15235 vm_map_unlock(map);
15236 continue;
15237 }
15238 /*
15239 * keep reference on the object until syncing is done
15240 */
2d21ac55 15241 vm_object_reference_locked(object);
91447636 15242 vm_object_unlock(object);
1c79356b 15243
91447636 15244 vm_map_unlock(map);
1c79356b 15245
91447636 15246 do_sync_req = vm_object_sync(object,
2d21ac55
A
15247 offset,
15248 flush_size,
15249 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
15250 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
15251 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 15252 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
15253 /*
15254 * only send a m_o_s if we returned pages or if the entry
15255 * is writable (ie dirty pages may have already been sent back)
15256 */
b0d623f7 15257 if (!do_sync_req) {
2d21ac55
A
15258 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
15259 /*
15260 * clear out the clustering and read-ahead hints
15261 */
15262 vm_object_lock(object);
15263
15264 object->pages_created = 0;
15265 object->pages_used = 0;
15266 object->sequential = 0;
15267 object->last_alloc = 0;
15268
15269 vm_object_unlock(object);
15270 }
91447636
A
15271 vm_object_deallocate(object);
15272 continue;
1c79356b 15273 }
91447636 15274 msync_req_alloc(new_msr);
1c79356b 15275
91447636
A
15276 vm_object_lock(object);
15277 offset += object->paging_offset;
1c79356b 15278
91447636
A
15279 new_msr->offset = offset;
15280 new_msr->length = flush_size;
15281 new_msr->object = object;
15282 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
15283 re_iterate:
15284
15285 /*
15286 * We can't sync this object if there isn't a pager. The
15287 * pager can disappear anytime we're not holding the object
15288 * lock. So this has to be checked anytime we goto re_iterate.
15289 */
15290
15291 pager = object->pager;
15292
15293 if (pager == MEMORY_OBJECT_NULL) {
15294 vm_object_unlock(object);
15295 vm_object_deallocate(object);
39236c6e
A
15296 msync_req_free(new_msr);
15297 new_msr = NULL;
2d21ac55
A
15298 continue;
15299 }
15300
91447636
A
15301 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
15302 /*
15303 * need to check for overlapping entry, if found, wait
15304 * on overlapping msr to be done, then reiterate
15305 */
15306 msr_lock(msr);
15307 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
15308 ((offset >= msr->offset &&
15309 offset < (msr->offset + msr->length)) ||
15310 (msr->offset >= offset &&
15311 msr->offset < (offset + flush_size))))
15312 {
15313 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
15314 msr_unlock(msr);
15315 vm_object_unlock(object);
15316 thread_block(THREAD_CONTINUE_NULL);
15317 vm_object_lock(object);
15318 goto re_iterate;
15319 }
15320 msr_unlock(msr);
15321 }/* queue_iterate */
1c79356b 15322
91447636 15323 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
15324
15325 vm_object_paging_begin(object);
91447636 15326 vm_object_unlock(object);
1c79356b 15327
91447636
A
15328 queue_enter(&req_q, new_msr, msync_req_t, req_q);
15329
15330 (void) memory_object_synchronize(
2d21ac55
A
15331 pager,
15332 offset,
15333 flush_size,
15334 sync_flags & ~VM_SYNC_CONTIGUOUS);
15335
15336 vm_object_lock(object);
15337 vm_object_paging_end(object);
15338 vm_object_unlock(object);
91447636
A
15339 }/* while */
15340
15341 /*
15342 * wait for memory_object_sychronize_completed messages from pager(s)
15343 */
15344
15345 while (!queue_empty(&req_q)) {
15346 msr = (msync_req_t)queue_first(&req_q);
15347 msr_lock(msr);
15348 while(msr->flag != VM_MSYNC_DONE) {
15349 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
15350 msr_unlock(msr);
15351 thread_block(THREAD_CONTINUE_NULL);
15352 msr_lock(msr);
15353 }/* while */
15354 queue_remove(&req_q, msr, msync_req_t, req_q);
15355 msr_unlock(msr);
15356 vm_object_deallocate(msr->object);
15357 msync_req_free(msr);
15358 }/* queue_iterate */
15359
15360 /* for proper msync() behaviour */
15361 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
15362 return(KERN_INVALID_ADDRESS);
15363
15364 return(KERN_SUCCESS);
15365}/* vm_msync */
1c79356b 15366
1c79356b 15367/*
91447636
A
15368 * Routine: convert_port_entry_to_map
15369 * Purpose:
15370 * Convert from a port specifying an entry or a task
15371 * to a map. Doesn't consume the port ref; produces a map ref,
15372 * which may be null. Unlike convert_port_to_map, the
15373 * port may be task or a named entry backed.
15374 * Conditions:
15375 * Nothing locked.
1c79356b 15376 */
1c79356b 15377
1c79356b 15378
91447636
A
15379vm_map_t
15380convert_port_entry_to_map(
15381 ipc_port_t port)
15382{
15383 vm_map_t map;
15384 vm_named_entry_t named_entry;
2d21ac55 15385 uint32_t try_failed_count = 0;
1c79356b 15386
91447636
A
15387 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15388 while(TRUE) {
15389 ip_lock(port);
15390 if(ip_active(port) && (ip_kotype(port)
2d21ac55 15391 == IKOT_NAMED_ENTRY)) {
91447636 15392 named_entry =
2d21ac55 15393 (vm_named_entry_t)port->ip_kobject;
b0d623f7 15394 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15395 ip_unlock(port);
2d21ac55
A
15396
15397 try_failed_count++;
15398 mutex_pause(try_failed_count);
91447636
A
15399 continue;
15400 }
15401 named_entry->ref_count++;
b0d623f7 15402 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
15403 ip_unlock(port);
15404 if ((named_entry->is_sub_map) &&
2d21ac55
A
15405 (named_entry->protection
15406 & VM_PROT_WRITE)) {
91447636
A
15407 map = named_entry->backing.map;
15408 } else {
15409 mach_destroy_memory_entry(port);
15410 return VM_MAP_NULL;
15411 }
15412 vm_map_reference_swap(map);
15413 mach_destroy_memory_entry(port);
15414 break;
15415 }
15416 else
15417 return VM_MAP_NULL;
15418 }
1c79356b 15419 }
91447636
A
15420 else
15421 map = convert_port_to_map(port);
1c79356b 15422
91447636
A
15423 return map;
15424}
1c79356b 15425
91447636
A
15426/*
15427 * Routine: convert_port_entry_to_object
15428 * Purpose:
15429 * Convert from a port specifying a named entry to an
15430 * object. Doesn't consume the port ref; produces a map ref,
15431 * which may be null.
15432 * Conditions:
15433 * Nothing locked.
15434 */
1c79356b 15435
1c79356b 15436
91447636
A
15437vm_object_t
15438convert_port_entry_to_object(
15439 ipc_port_t port)
15440{
39236c6e 15441 vm_object_t object = VM_OBJECT_NULL;
91447636 15442 vm_named_entry_t named_entry;
39236c6e
A
15443 uint32_t try_failed_count = 0;
15444
15445 if (IP_VALID(port) &&
15446 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15447 try_again:
15448 ip_lock(port);
15449 if (ip_active(port) &&
15450 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15451 named_entry = (vm_named_entry_t)port->ip_kobject;
15452 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 15453 ip_unlock(port);
39236c6e
A
15454 try_failed_count++;
15455 mutex_pause(try_failed_count);
15456 goto try_again;
15457 }
15458 named_entry->ref_count++;
15459 lck_mtx_unlock(&(named_entry)->Lock);
15460 ip_unlock(port);
15461 if (!(named_entry->is_sub_map) &&
15462 !(named_entry->is_pager) &&
15463 !(named_entry->is_copy) &&
15464 (named_entry->protection & VM_PROT_WRITE)) {
15465 object = named_entry->backing.object;
15466 vm_object_reference(object);
91447636 15467 }
39236c6e 15468 mach_destroy_memory_entry(port);
1c79356b 15469 }
1c79356b 15470 }
91447636
A
15471
15472 return object;
1c79356b 15473}
9bccf70c
A
15474
15475/*
91447636
A
15476 * Export routines to other components for the things we access locally through
15477 * macros.
9bccf70c 15478 */
91447636
A
15479#undef current_map
15480vm_map_t
15481current_map(void)
9bccf70c 15482{
91447636 15483 return (current_map_fast());
9bccf70c
A
15484}
15485
15486/*
15487 * vm_map_reference:
15488 *
15489 * Most code internal to the osfmk will go through a
15490 * macro defining this. This is always here for the
15491 * use of other kernel components.
15492 */
15493#undef vm_map_reference
15494void
15495vm_map_reference(
39037602 15496 vm_map_t map)
9bccf70c
A
15497{
15498 if (map == VM_MAP_NULL)
15499 return;
15500
b0d623f7 15501 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15502#if TASK_SWAPPER
15503 assert(map->res_count > 0);
15504 assert(map->ref_count >= map->res_count);
15505 map->res_count++;
15506#endif
15507 map->ref_count++;
b0d623f7 15508 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15509}
15510
15511/*
15512 * vm_map_deallocate:
15513 *
15514 * Removes a reference from the specified map,
15515 * destroying it if no references remain.
15516 * The map should not be locked.
15517 */
15518void
15519vm_map_deallocate(
39037602 15520 vm_map_t map)
9bccf70c
A
15521{
15522 unsigned int ref;
15523
15524 if (map == VM_MAP_NULL)
15525 return;
15526
b0d623f7 15527 lck_mtx_lock(&map->s_lock);
9bccf70c
A
15528 ref = --map->ref_count;
15529 if (ref > 0) {
15530 vm_map_res_deallocate(map);
b0d623f7 15531 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15532 return;
15533 }
15534 assert(map->ref_count == 0);
b0d623f7 15535 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
15536
15537#if TASK_SWAPPER
15538 /*
15539 * The map residence count isn't decremented here because
15540 * the vm_map_delete below will traverse the entire map,
15541 * deleting entries, and the residence counts on objects
15542 * and sharing maps will go away then.
15543 */
15544#endif
15545
2d21ac55 15546 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 15547}
91447636 15548
91447636 15549
0c530ab8
A
15550void
15551vm_map_disable_NX(vm_map_t map)
15552{
15553 if (map == NULL)
15554 return;
15555 if (map->pmap == NULL)
15556 return;
15557
15558 pmap_disable_NX(map->pmap);
15559}
15560
6d2010ae
A
15561void
15562vm_map_disallow_data_exec(vm_map_t map)
15563{
15564 if (map == NULL)
15565 return;
15566
15567 map->map_disallow_data_exec = TRUE;
15568}
15569
0c530ab8
A
15570/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15571 * more descriptive.
15572 */
15573void
15574vm_map_set_32bit(vm_map_t map)
15575{
15576 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15577}
15578
15579
15580void
15581vm_map_set_64bit(vm_map_t map)
15582{
15583 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15584}
15585
813fb2f6
A
15586/*
15587 * Expand the maximum size of an existing map.
15588 */
15589void
15590vm_map_set_jumbo(vm_map_t map)
15591{
15592 (void) map;
15593}
15594
0c530ab8 15595vm_map_offset_t
3e170ce0 15596vm_compute_max_offset(boolean_t is64)
0c530ab8
A
15597{
15598 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15599}
15600
39236c6e
A
15601uint64_t
15602vm_map_get_max_aslr_slide_pages(vm_map_t map)
15603{
15604 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15605}
15606
0c530ab8 15607boolean_t
2d21ac55
A
15608vm_map_is_64bit(
15609 vm_map_t map)
15610{
15611 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15612}
15613
15614boolean_t
316670eb
A
15615vm_map_has_hard_pagezero(
15616 vm_map_t map,
15617 vm_map_offset_t pagezero_size)
0c530ab8
A
15618{
15619 /*
15620 * XXX FBDP
15621 * We should lock the VM map (for read) here but we can get away
15622 * with it for now because there can't really be any race condition:
15623 * the VM map's min_offset is changed only when the VM map is created
15624 * and when the zero page is established (when the binary gets loaded),
15625 * and this routine gets called only when the task terminates and the
15626 * VM map is being torn down, and when a new map is created via
15627 * load_machfile()/execve().
15628 */
316670eb 15629 return (map->min_offset >= pagezero_size);
0c530ab8
A
15630}
15631
316670eb
A
15632/*
15633 * Raise a VM map's maximun offset.
15634 */
15635kern_return_t
15636vm_map_raise_max_offset(
15637 vm_map_t map,
15638 vm_map_offset_t new_max_offset)
15639{
15640 kern_return_t ret;
15641
15642 vm_map_lock(map);
15643 ret = KERN_INVALID_ADDRESS;
15644
15645 if (new_max_offset >= map->max_offset) {
15646 if (!vm_map_is_64bit(map)) {
15647 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15648 map->max_offset = new_max_offset;
15649 ret = KERN_SUCCESS;
15650 }
15651 } else {
15652 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15653 map->max_offset = new_max_offset;
15654 ret = KERN_SUCCESS;
15655 }
15656 }
15657 }
15658
15659 vm_map_unlock(map);
15660 return ret;
15661}
15662
15663
0c530ab8
A
15664/*
15665 * Raise a VM map's minimum offset.
15666 * To strictly enforce "page zero" reservation.
15667 */
15668kern_return_t
15669vm_map_raise_min_offset(
15670 vm_map_t map,
15671 vm_map_offset_t new_min_offset)
15672{
15673 vm_map_entry_t first_entry;
15674
39236c6e
A
15675 new_min_offset = vm_map_round_page(new_min_offset,
15676 VM_MAP_PAGE_MASK(map));
0c530ab8
A
15677
15678 vm_map_lock(map);
15679
15680 if (new_min_offset < map->min_offset) {
15681 /*
15682 * Can't move min_offset backwards, as that would expose
15683 * a part of the address space that was previously, and for
15684 * possibly good reasons, inaccessible.
15685 */
15686 vm_map_unlock(map);
15687 return KERN_INVALID_ADDRESS;
15688 }
3e170ce0
A
15689 if (new_min_offset >= map->max_offset) {
15690 /* can't go beyond the end of the address space */
15691 vm_map_unlock(map);
15692 return KERN_INVALID_ADDRESS;
15693 }
0c530ab8
A
15694
15695 first_entry = vm_map_first_entry(map);
15696 if (first_entry != vm_map_to_entry(map) &&
15697 first_entry->vme_start < new_min_offset) {
15698 /*
15699 * Some memory was already allocated below the new
15700 * minimun offset. It's too late to change it now...
15701 */
15702 vm_map_unlock(map);
15703 return KERN_NO_SPACE;
15704 }
15705
15706 map->min_offset = new_min_offset;
15707
3e170ce0
A
15708 assert(map->holes_list);
15709 map->holes_list->start = new_min_offset;
15710 assert(new_min_offset < map->holes_list->end);
15711
0c530ab8
A
15712 vm_map_unlock(map);
15713
15714 return KERN_SUCCESS;
15715}
2d21ac55
A
15716
15717/*
15718 * Set the limit on the maximum amount of user wired memory allowed for this map.
15719 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15720 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15721 * don't have to reach over to the BSD data structures.
15722 */
15723
15724void
15725vm_map_set_user_wire_limit(vm_map_t map,
15726 vm_size_t limit)
15727{
15728 map->user_wire_limit = limit;
15729}
593a1d5f 15730
b0d623f7
A
15731
15732void vm_map_switch_protect(vm_map_t map,
15733 boolean_t val)
593a1d5f
A
15734{
15735 vm_map_lock(map);
b0d623f7 15736 map->switch_protect=val;
593a1d5f 15737 vm_map_unlock(map);
b0d623f7 15738}
b7266188 15739
39236c6e
A
15740/*
15741 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15742 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15743 * bump both counters.
15744 */
15745void
15746vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15747{
15748 pmap_t pmap = vm_map_pmap(map);
15749
fe8ab488 15750 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15751 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15752}
15753
15754void
15755vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15756{
15757 pmap_t pmap = vm_map_pmap(map);
15758
fe8ab488 15759 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
39236c6e
A
15760 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15761}
15762
b7266188
A
15763/* Add (generate) code signature for memory range */
15764#if CONFIG_DYNAMIC_CODE_SIGNING
15765kern_return_t vm_map_sign(vm_map_t map,
15766 vm_map_offset_t start,
15767 vm_map_offset_t end)
15768{
15769 vm_map_entry_t entry;
15770 vm_page_t m;
15771 vm_object_t object;
15772
15773 /*
15774 * Vet all the input parameters and current type and state of the
15775 * underlaying object. Return with an error if anything is amiss.
15776 */
15777 if (map == VM_MAP_NULL)
15778 return(KERN_INVALID_ARGUMENT);
15779
15780 vm_map_lock_read(map);
15781
15782 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15783 /*
15784 * Must pass a valid non-submap address.
15785 */
15786 vm_map_unlock_read(map);
15787 return(KERN_INVALID_ADDRESS);
15788 }
15789
15790 if((entry->vme_start > start) || (entry->vme_end < end)) {
15791 /*
15792 * Map entry doesn't cover the requested range. Not handling
15793 * this situation currently.
15794 */
15795 vm_map_unlock_read(map);
15796 return(KERN_INVALID_ARGUMENT);
15797 }
15798
3e170ce0 15799 object = VME_OBJECT(entry);
b7266188
A
15800 if (object == VM_OBJECT_NULL) {
15801 /*
15802 * Object must already be present or we can't sign.
15803 */
15804 vm_map_unlock_read(map);
15805 return KERN_INVALID_ARGUMENT;
15806 }
15807
15808 vm_object_lock(object);
15809 vm_map_unlock_read(map);
15810
15811 while(start < end) {
15812 uint32_t refmod;
15813
3e170ce0
A
15814 m = vm_page_lookup(object,
15815 start - entry->vme_start + VME_OFFSET(entry));
b7266188
A
15816 if (m==VM_PAGE_NULL) {
15817 /* shoud we try to fault a page here? we can probably
15818 * demand it exists and is locked for this request */
15819 vm_object_unlock(object);
15820 return KERN_FAILURE;
15821 }
15822 /* deal with special page status */
15823 if (m->busy ||
15824 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15825 vm_object_unlock(object);
15826 return KERN_FAILURE;
15827 }
15828
15829 /* Page is OK... now "validate" it */
15830 /* This is the place where we'll call out to create a code
15831 * directory, later */
15832 m->cs_validated = TRUE;
15833
15834 /* The page is now "clean" for codesigning purposes. That means
15835 * we don't consider it as modified (wpmapped) anymore. But
15836 * we'll disconnect the page so we note any future modification
15837 * attempts. */
15838 m->wpmapped = FALSE;
39037602 15839 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
b7266188
A
15840
15841 /* Pull the dirty status from the pmap, since we cleared the
15842 * wpmapped bit */
15843 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 15844 SET_PAGE_DIRTY(m, FALSE);
b7266188
A
15845 }
15846
15847 /* On to the next page */
15848 start += PAGE_SIZE;
15849 }
15850 vm_object_unlock(object);
15851
15852 return KERN_SUCCESS;
15853}
15854#endif
6d2010ae 15855
fe8ab488
A
15856kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15857{
15858 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15859 vm_map_entry_t next_entry;
15860 kern_return_t kr = KERN_SUCCESS;
15861 vm_map_t zap_map;
15862
15863 vm_map_lock(map);
15864
15865 /*
15866 * We use a "zap_map" to avoid having to unlock
15867 * the "map" in vm_map_delete().
15868 */
15869 zap_map = vm_map_create(PMAP_NULL,
15870 map->min_offset,
15871 map->max_offset,
15872 map->hdr.entries_pageable);
15873
15874 if (zap_map == VM_MAP_NULL) {
15875 return KERN_RESOURCE_SHORTAGE;
15876 }
15877
15878 vm_map_set_page_shift(zap_map,
15879 VM_MAP_PAGE_SHIFT(map));
3e170ce0 15880 vm_map_disable_hole_optimization(zap_map);
fe8ab488
A
15881
15882 for (entry = vm_map_first_entry(map);
15883 entry != vm_map_to_entry(map);
15884 entry = next_entry) {
15885 next_entry = entry->vme_next;
15886
3e170ce0
A
15887 if (VME_OBJECT(entry) &&
15888 !entry->is_sub_map &&
15889 (VME_OBJECT(entry)->internal == TRUE) &&
15890 (VME_OBJECT(entry)->ref_count == 1)) {
fe8ab488 15891
3e170ce0
A
15892 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15893 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
fe8ab488
A
15894
15895 (void)vm_map_delete(map,
15896 entry->vme_start,
15897 entry->vme_end,
15898 VM_MAP_REMOVE_SAVE_ENTRIES,
15899 zap_map);
15900 }
15901 }
15902
15903 vm_map_unlock(map);
15904
15905 /*
15906 * Get rid of the "zap_maps" and all the map entries that
15907 * they may still contain.
15908 */
15909 if (zap_map != VM_MAP_NULL) {
15910 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15911 zap_map = VM_MAP_NULL;
15912 }
15913
15914 return kr;
15915}
15916
6d2010ae 15917
39037602
A
15918#if DEVELOPMENT || DEBUG
15919
15920int
15921vm_map_disconnect_page_mappings(
15922 vm_map_t map,
15923 boolean_t do_unnest)
6d2010ae
A
15924{
15925 vm_map_entry_t entry;
39037602
A
15926 int page_count = 0;
15927
15928 if (do_unnest == TRUE) {
15929#ifndef NO_NESTED_PMAP
15930 vm_map_lock(map);
15931
15932 for (entry = vm_map_first_entry(map);
15933 entry != vm_map_to_entry(map);
15934 entry = entry->vme_next) {
15935
15936 if (entry->is_sub_map && entry->use_pmap) {
15937 /*
15938 * Make sure the range between the start of this entry and
15939 * the end of this entry is no longer nested, so that
15940 * we will only remove mappings from the pmap in use by this
15941 * this task
15942 */
15943 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
15944 }
15945 }
15946 vm_map_unlock(map);
15947#endif
15948 }
6d2010ae 15949 vm_map_lock_read(map);
39037602
A
15950
15951 page_count = map->pmap->stats.resident_count;
15952
6d2010ae
A
15953 for (entry = vm_map_first_entry(map);
15954 entry != vm_map_to_entry(map);
15955 entry = entry->vme_next) {
6d2010ae 15956
39037602
A
15957 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
15958 (VME_OBJECT(entry)->phys_contiguous))) {
6d2010ae
A
15959 continue;
15960 }
39037602
A
15961 if (entry->is_sub_map)
15962 assert(!entry->use_pmap);
6d2010ae 15963
39037602 15964 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
6d2010ae 15965 }
6d2010ae
A
15966 vm_map_unlock_read(map);
15967
39037602 15968 return page_count;
6d2010ae
A
15969}
15970
39037602
A
15971#endif
15972
15973
15974#if CONFIG_FREEZE
15975
15976
3e170ce0
A
15977int c_freezer_swapout_count;
15978int c_freezer_compression_count = 0;
15979AbsoluteTime c_freezer_last_yield_ts = 0;
15980
6d2010ae
A
15981kern_return_t vm_map_freeze(
15982 vm_map_t map,
15983 unsigned int *purgeable_count,
15984 unsigned int *wired_count,
15985 unsigned int *clean_count,
15986 unsigned int *dirty_count,
39037602 15987 __unused unsigned int dirty_budget,
6d2010ae
A
15988 boolean_t *has_shared)
15989{
39236c6e
A
15990 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15991 kern_return_t kr = KERN_SUCCESS;
6d2010ae
A
15992
15993 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15994 *has_shared = FALSE;
15995
6d2010ae
A
15996 /*
15997 * We need the exclusive lock here so that we can
15998 * block any page faults or lookups while we are
15999 * in the middle of freezing this vm map.
16000 */
16001 vm_map_lock(map);
16002
39037602
A
16003 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
16004
16005 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
16006 kr = KERN_NO_SPACE;
16007 goto done;
6d2010ae 16008 }
39037602 16009
3e170ce0
A
16010 c_freezer_compression_count = 0;
16011 clock_get_uptime(&c_freezer_last_yield_ts);
16012
6d2010ae
A
16013 for (entry2 = vm_map_first_entry(map);
16014 entry2 != vm_map_to_entry(map);
16015 entry2 = entry2->vme_next) {
16016
3e170ce0 16017 vm_object_t src_object = VME_OBJECT(entry2);
6d2010ae 16018
39037602 16019 if (src_object &&
3e170ce0 16020 !entry2->is_sub_map &&
39037602 16021 !src_object->phys_contiguous) {
39236c6e 16022 /* If eligible, scan the entry, moving eligible pages over to our parent object */
6d2010ae 16023
39037602 16024 if (src_object->internal == TRUE) {
3e170ce0 16025
39037602
A
16026 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
16027 /*
16028 * Pages belonging to this object could be swapped to disk.
16029 * Make sure it's not a shared object because we could end
16030 * up just bringing it back in again.
16031 */
16032 if (src_object->ref_count > 1) {
16033 continue;
3e170ce0 16034 }
3e170ce0 16035 }
39037602 16036 vm_object_compressed_freezer_pageout(src_object);
3e170ce0
A
16037
16038 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
16039 kr = KERN_NO_SPACE;
16040 break;
39236c6e 16041 }
6d2010ae
A
16042 }
16043 }
16044 }
6d2010ae
A
16045done:
16046 vm_map_unlock(map);
6d2010ae 16047
39037602
A
16048 vm_object_compressed_freezer_done();
16049
16050 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
3e170ce0
A
16051 /*
16052 * reset the counter tracking the # of swapped c_segs
16053 * because we are now done with this freeze session and task.
16054 */
16055 c_freezer_swapout_count = 0;
16056 }
6d2010ae
A
16057 return kr;
16058}
16059
6d2010ae 16060#endif
e2d2fc5c 16061
e2d2fc5c
A
16062/*
16063 * vm_map_entry_should_cow_for_true_share:
16064 *
16065 * Determines if the map entry should be clipped and setup for copy-on-write
16066 * to avoid applying "true_share" to a large VM object when only a subset is
16067 * targeted.
16068 *
16069 * For now, we target only the map entries created for the Objective C
16070 * Garbage Collector, which initially have the following properties:
16071 * - alias == VM_MEMORY_MALLOC
16072 * - wired_count == 0
16073 * - !needs_copy
16074 * and a VM object with:
16075 * - internal
16076 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
16077 * - !true_share
16078 * - vo_size == ANON_CHUNK_SIZE
3e170ce0
A
16079 *
16080 * Only non-kernel map entries.
e2d2fc5c
A
16081 */
16082boolean_t
16083vm_map_entry_should_cow_for_true_share(
16084 vm_map_entry_t entry)
16085{
16086 vm_object_t object;
16087
16088 if (entry->is_sub_map) {
16089 /* entry does not point at a VM object */
16090 return FALSE;
16091 }
16092
16093 if (entry->needs_copy) {
16094 /* already set for copy_on_write: done! */
16095 return FALSE;
16096 }
16097
3e170ce0
A
16098 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
16099 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
fe8ab488 16100 /* not a malloc heap or Obj-C Garbage Collector heap */
e2d2fc5c
A
16101 return FALSE;
16102 }
16103
16104 if (entry->wired_count) {
16105 /* wired: can't change the map entry... */
fe8ab488 16106 vm_counters.should_cow_but_wired++;
e2d2fc5c
A
16107 return FALSE;
16108 }
16109
3e170ce0 16110 object = VME_OBJECT(entry);
e2d2fc5c
A
16111
16112 if (object == VM_OBJECT_NULL) {
16113 /* no object yet... */
16114 return FALSE;
16115 }
16116
16117 if (!object->internal) {
16118 /* not an internal object */
16119 return FALSE;
16120 }
16121
16122 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16123 /* not the default copy strategy */
16124 return FALSE;
16125 }
16126
16127 if (object->true_share) {
16128 /* already true_share: too late to avoid it */
16129 return FALSE;
16130 }
16131
3e170ce0 16132 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
fe8ab488
A
16133 object->vo_size != ANON_CHUNK_SIZE) {
16134 /* ... not an object created for the ObjC Garbage Collector */
16135 return FALSE;
16136 }
16137
3e170ce0 16138 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
fe8ab488
A
16139 object->vo_size != 2048 * 4096) {
16140 /* ... not a "MALLOC_SMALL" heap */
e2d2fc5c
A
16141 return FALSE;
16142 }
16143
16144 /*
16145 * All the criteria match: we have a large object being targeted for "true_share".
16146 * To limit the adverse side-effects linked with "true_share", tell the caller to
16147 * try and avoid setting up the entire object for "true_share" by clipping the
16148 * targeted range and setting it up for copy-on-write.
16149 */
16150 return TRUE;
16151}
39236c6e
A
16152
16153vm_map_offset_t
16154vm_map_round_page_mask(
16155 vm_map_offset_t offset,
16156 vm_map_offset_t mask)
16157{
16158 return VM_MAP_ROUND_PAGE(offset, mask);
16159}
16160
16161vm_map_offset_t
16162vm_map_trunc_page_mask(
16163 vm_map_offset_t offset,
16164 vm_map_offset_t mask)
16165{
16166 return VM_MAP_TRUNC_PAGE(offset, mask);
16167}
16168
3e170ce0
A
16169boolean_t
16170vm_map_page_aligned(
16171 vm_map_offset_t offset,
16172 vm_map_offset_t mask)
16173{
16174 return ((offset) & mask) == 0;
16175}
16176
39236c6e
A
16177int
16178vm_map_page_shift(
16179 vm_map_t map)
16180{
16181 return VM_MAP_PAGE_SHIFT(map);
16182}
16183
16184int
16185vm_map_page_size(
16186 vm_map_t map)
16187{
16188 return VM_MAP_PAGE_SIZE(map);
16189}
16190
3e170ce0 16191vm_map_offset_t
39236c6e
A
16192vm_map_page_mask(
16193 vm_map_t map)
16194{
16195 return VM_MAP_PAGE_MASK(map);
16196}
16197
16198kern_return_t
16199vm_map_set_page_shift(
16200 vm_map_t map,
16201 int pageshift)
16202{
16203 if (map->hdr.nentries != 0) {
16204 /* too late to change page size */
16205 return KERN_FAILURE;
16206 }
16207
16208 map->hdr.page_shift = pageshift;
16209
16210 return KERN_SUCCESS;
16211}
16212
16213kern_return_t
16214vm_map_query_volatile(
16215 vm_map_t map,
16216 mach_vm_size_t *volatile_virtual_size_p,
16217 mach_vm_size_t *volatile_resident_size_p,
3e170ce0
A
16218 mach_vm_size_t *volatile_compressed_size_p,
16219 mach_vm_size_t *volatile_pmap_size_p,
16220 mach_vm_size_t *volatile_compressed_pmap_size_p)
39236c6e
A
16221{
16222 mach_vm_size_t volatile_virtual_size;
16223 mach_vm_size_t volatile_resident_count;
3e170ce0 16224 mach_vm_size_t volatile_compressed_count;
39236c6e 16225 mach_vm_size_t volatile_pmap_count;
3e170ce0 16226 mach_vm_size_t volatile_compressed_pmap_count;
39236c6e
A
16227 mach_vm_size_t resident_count;
16228 vm_map_entry_t entry;
16229 vm_object_t object;
16230
16231 /* map should be locked by caller */
16232
16233 volatile_virtual_size = 0;
16234 volatile_resident_count = 0;
3e170ce0 16235 volatile_compressed_count = 0;
39236c6e 16236 volatile_pmap_count = 0;
3e170ce0 16237 volatile_compressed_pmap_count = 0;
39236c6e
A
16238
16239 for (entry = vm_map_first_entry(map);
16240 entry != vm_map_to_entry(map);
16241 entry = entry->vme_next) {
4bd07ac2
A
16242 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
16243
39236c6e
A
16244 if (entry->is_sub_map) {
16245 continue;
16246 }
16247 if (! (entry->protection & VM_PROT_WRITE)) {
16248 continue;
16249 }
3e170ce0 16250 object = VME_OBJECT(entry);
39236c6e
A
16251 if (object == VM_OBJECT_NULL) {
16252 continue;
16253 }
3e170ce0
A
16254 if (object->purgable != VM_PURGABLE_VOLATILE &&
16255 object->purgable != VM_PURGABLE_EMPTY) {
39236c6e
A
16256 continue;
16257 }
3e170ce0 16258 if (VME_OFFSET(entry)) {
39236c6e
A
16259 /*
16260 * If the map entry has been split and the object now
16261 * appears several times in the VM map, we don't want
16262 * to count the object's resident_page_count more than
16263 * once. We count it only for the first one, starting
16264 * at offset 0 and ignore the other VM map entries.
16265 */
16266 continue;
16267 }
16268 resident_count = object->resident_page_count;
3e170ce0 16269 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
39236c6e
A
16270 resident_count = 0;
16271 } else {
3e170ce0 16272 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
39236c6e
A
16273 }
16274
16275 volatile_virtual_size += entry->vme_end - entry->vme_start;
16276 volatile_resident_count += resident_count;
3e170ce0
A
16277 if (object->pager) {
16278 volatile_compressed_count +=
16279 vm_compressor_pager_get_count(object->pager);
16280 }
4bd07ac2
A
16281 pmap_compressed_bytes = 0;
16282 pmap_resident_bytes =
16283 pmap_query_resident(map->pmap,
16284 entry->vme_start,
16285 entry->vme_end,
16286 &pmap_compressed_bytes);
16287 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
16288 volatile_compressed_pmap_count += (pmap_compressed_bytes
16289 / PAGE_SIZE);
39236c6e
A
16290 }
16291
16292 /* map is still locked on return */
16293
16294 *volatile_virtual_size_p = volatile_virtual_size;
16295 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
3e170ce0 16296 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
39236c6e 16297 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
3e170ce0 16298 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
39236c6e
A
16299
16300 return KERN_SUCCESS;
16301}
fe8ab488 16302
3e170ce0
A
16303void
16304vm_map_sizes(vm_map_t map,
16305 vm_map_size_t * psize,
16306 vm_map_size_t * pfree,
16307 vm_map_size_t * plargest_free)
16308{
16309 vm_map_entry_t entry;
16310 vm_map_offset_t prev;
16311 vm_map_size_t free, total_free, largest_free;
16312 boolean_t end;
16313
39037602
A
16314 if (!map)
16315 {
16316 *psize = *pfree = *plargest_free = 0;
16317 return;
16318 }
3e170ce0
A
16319 total_free = largest_free = 0;
16320
16321 vm_map_lock_read(map);
16322 if (psize) *psize = map->max_offset - map->min_offset;
16323
16324 prev = map->min_offset;
16325 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16326 {
16327 end = (entry == vm_map_to_entry(map));
16328
16329 if (end) free = entry->vme_end - prev;
16330 else free = entry->vme_start - prev;
16331
16332 total_free += free;
16333 if (free > largest_free) largest_free = free;
16334
16335 if (end) break;
16336 prev = entry->vme_end;
16337 }
16338 vm_map_unlock_read(map);
16339 if (pfree) *pfree = total_free;
16340 if (plargest_free) *plargest_free = largest_free;
16341}
16342
fe8ab488
A
16343#if VM_SCAN_FOR_SHADOW_CHAIN
16344int vm_map_shadow_max(vm_map_t map);
16345int vm_map_shadow_max(
16346 vm_map_t map)
16347{
16348 int shadows, shadows_max;
16349 vm_map_entry_t entry;
16350 vm_object_t object, next_object;
16351
16352 if (map == NULL)
16353 return 0;
16354
16355 shadows_max = 0;
16356
16357 vm_map_lock_read(map);
16358
16359 for (entry = vm_map_first_entry(map);
16360 entry != vm_map_to_entry(map);
16361 entry = entry->vme_next) {
16362 if (entry->is_sub_map) {
16363 continue;
16364 }
3e170ce0 16365 object = VME_OBJECT(entry);
fe8ab488
A
16366 if (object == NULL) {
16367 continue;
16368 }
16369 vm_object_lock_shared(object);
16370 for (shadows = 0;
16371 object->shadow != NULL;
16372 shadows++, object = next_object) {
16373 next_object = object->shadow;
16374 vm_object_lock_shared(next_object);
16375 vm_object_unlock(object);
16376 }
16377 vm_object_unlock(object);
16378 if (shadows > shadows_max) {
16379 shadows_max = shadows;
16380 }
16381 }
16382
16383 vm_map_unlock_read(map);
16384
16385 return shadows_max;
16386}
16387#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
39037602
A
16388
16389void vm_commit_pagezero_status(vm_map_t lmap) {
16390 pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
16391}