]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_map.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
1c79356b
A
66#include <task_swapper.h>
67#include <mach_assert.h>
91447636 68#include <libkern/OSAtomic.h>
1c79356b
A
69
70#include <mach/kern_return.h>
71#include <mach/port.h>
72#include <mach/vm_attributes.h>
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
55e303ae 75#include <mach/vm_statistics.h>
91447636 76#include <mach/memory_object.h>
0c530ab8 77#include <mach/mach_vm.h>
91447636 78#include <machine/cpu_capabilities.h>
2d21ac55 79#include <mach/sdt.h>
91447636 80
1c79356b
A
81#include <kern/assert.h>
82#include <kern/counters.h>
91447636 83#include <kern/kalloc.h>
1c79356b 84#include <kern/zalloc.h>
91447636
A
85
86#include <vm/cpm.h>
39236c6e 87#include <vm/vm_compressor_pager.h>
1c79356b
A
88#include <vm/vm_init.h>
89#include <vm/vm_fault.h>
90#include <vm/vm_map.h>
91#include <vm/vm_object.h>
92#include <vm/vm_page.h>
b0d623f7 93#include <vm/vm_pageout.h>
1c79356b
A
94#include <vm/vm_kern.h>
95#include <ipc/ipc_port.h>
96#include <kern/sched_prim.h>
97#include <kern/misc_protos.h>
1c79356b
A
98#include <kern/xpr.h>
99
91447636
A
100#include <mach/vm_map_server.h>
101#include <mach/mach_host_server.h>
2d21ac55 102#include <vm/vm_protos.h>
b0d623f7 103#include <vm/vm_purgeable_internal.h>
91447636 104
91447636 105#include <vm/vm_protos.h>
2d21ac55 106#include <vm/vm_shared_region.h>
6d2010ae 107#include <vm/vm_map_store.h>
91447636 108
316670eb 109extern u_int32_t random(void); /* from <libkern/libkern.h> */
1c79356b
A
110/* Internal prototypes
111 */
2d21ac55 112
91447636
A
113static void vm_map_simplify_range(
114 vm_map_t map,
115 vm_map_offset_t start,
116 vm_map_offset_t end); /* forward */
117
118static boolean_t vm_map_range_check(
2d21ac55
A
119 vm_map_t map,
120 vm_map_offset_t start,
121 vm_map_offset_t end,
122 vm_map_entry_t *entry);
1c79356b 123
91447636 124static vm_map_entry_t _vm_map_entry_create(
7ddcb079 125 struct vm_map_header *map_header, boolean_t map_locked);
1c79356b 126
91447636 127static void _vm_map_entry_dispose(
2d21ac55
A
128 struct vm_map_header *map_header,
129 vm_map_entry_t entry);
1c79356b 130
91447636 131static void vm_map_pmap_enter(
2d21ac55
A
132 vm_map_t map,
133 vm_map_offset_t addr,
134 vm_map_offset_t end_addr,
135 vm_object_t object,
136 vm_object_offset_t offset,
137 vm_prot_t protection);
1c79356b 138
91447636 139static void _vm_map_clip_end(
2d21ac55
A
140 struct vm_map_header *map_header,
141 vm_map_entry_t entry,
142 vm_map_offset_t end);
91447636
A
143
144static void _vm_map_clip_start(
2d21ac55
A
145 struct vm_map_header *map_header,
146 vm_map_entry_t entry,
147 vm_map_offset_t start);
1c79356b 148
91447636 149static void vm_map_entry_delete(
2d21ac55
A
150 vm_map_t map,
151 vm_map_entry_t entry);
1c79356b 152
91447636 153static kern_return_t vm_map_delete(
2d21ac55
A
154 vm_map_t map,
155 vm_map_offset_t start,
156 vm_map_offset_t end,
157 int flags,
158 vm_map_t zap_map);
1c79356b 159
91447636 160static kern_return_t vm_map_copy_overwrite_unaligned(
2d21ac55
A
161 vm_map_t dst_map,
162 vm_map_entry_t entry,
163 vm_map_copy_t copy,
39236c6e
A
164 vm_map_address_t start,
165 boolean_t discard_on_success);
1c79356b 166
91447636 167static kern_return_t vm_map_copy_overwrite_aligned(
2d21ac55
A
168 vm_map_t dst_map,
169 vm_map_entry_t tmp_entry,
170 vm_map_copy_t copy,
171 vm_map_offset_t start,
172 pmap_t pmap);
1c79356b 173
91447636 174static kern_return_t vm_map_copyin_kernel_buffer(
2d21ac55
A
175 vm_map_t src_map,
176 vm_map_address_t src_addr,
177 vm_map_size_t len,
178 boolean_t src_destroy,
179 vm_map_copy_t *copy_result); /* OUT */
1c79356b 180
91447636 181static kern_return_t vm_map_copyout_kernel_buffer(
2d21ac55
A
182 vm_map_t map,
183 vm_map_address_t *addr, /* IN/OUT */
184 vm_map_copy_t copy,
39236c6e
A
185 boolean_t overwrite,
186 boolean_t consume_on_success);
1c79356b 187
91447636 188static void vm_map_fork_share(
2d21ac55
A
189 vm_map_t old_map,
190 vm_map_entry_t old_entry,
191 vm_map_t new_map);
1c79356b 192
91447636 193static boolean_t vm_map_fork_copy(
2d21ac55
A
194 vm_map_t old_map,
195 vm_map_entry_t *old_entry_p,
196 vm_map_t new_map);
1c79356b 197
0c530ab8 198void vm_map_region_top_walk(
2d21ac55
A
199 vm_map_entry_t entry,
200 vm_region_top_info_t top);
1c79356b 201
0c530ab8 202void vm_map_region_walk(
2d21ac55
A
203 vm_map_t map,
204 vm_map_offset_t va,
205 vm_map_entry_t entry,
206 vm_object_offset_t offset,
207 vm_object_size_t range,
208 vm_region_extended_info_t extended,
39236c6e
A
209 boolean_t look_for_pages,
210 mach_msg_type_number_t count);
91447636
A
211
212static kern_return_t vm_map_wire_nested(
2d21ac55
A
213 vm_map_t map,
214 vm_map_offset_t start,
215 vm_map_offset_t end,
216 vm_prot_t access_type,
217 boolean_t user_wire,
218 pmap_t map_pmap,
219 vm_map_offset_t pmap_addr);
91447636
A
220
221static kern_return_t vm_map_unwire_nested(
2d21ac55
A
222 vm_map_t map,
223 vm_map_offset_t start,
224 vm_map_offset_t end,
225 boolean_t user_wire,
226 pmap_t map_pmap,
227 vm_map_offset_t pmap_addr);
91447636
A
228
229static kern_return_t vm_map_overwrite_submap_recurse(
2d21ac55
A
230 vm_map_t dst_map,
231 vm_map_offset_t dst_addr,
232 vm_map_size_t dst_size);
91447636
A
233
234static kern_return_t vm_map_copy_overwrite_nested(
2d21ac55
A
235 vm_map_t dst_map,
236 vm_map_offset_t dst_addr,
237 vm_map_copy_t copy,
238 boolean_t interruptible,
6d2010ae
A
239 pmap_t pmap,
240 boolean_t discard_on_success);
91447636
A
241
242static kern_return_t vm_map_remap_extract(
2d21ac55
A
243 vm_map_t map,
244 vm_map_offset_t addr,
245 vm_map_size_t size,
246 boolean_t copy,
247 struct vm_map_header *map_header,
248 vm_prot_t *cur_protection,
249 vm_prot_t *max_protection,
250 vm_inherit_t inheritance,
251 boolean_t pageable);
91447636
A
252
253static kern_return_t vm_map_remap_range_allocate(
2d21ac55
A
254 vm_map_t map,
255 vm_map_address_t *address,
256 vm_map_size_t size,
257 vm_map_offset_t mask,
060df5ea 258 int flags,
2d21ac55 259 vm_map_entry_t *map_entry);
91447636
A
260
261static void vm_map_region_look_for_page(
2d21ac55
A
262 vm_map_t map,
263 vm_map_offset_t va,
264 vm_object_t object,
265 vm_object_offset_t offset,
266 int max_refcnt,
267 int depth,
39236c6e
A
268 vm_region_extended_info_t extended,
269 mach_msg_type_number_t count);
91447636
A
270
271static int vm_map_region_count_obj_refs(
2d21ac55
A
272 vm_map_entry_t entry,
273 vm_object_t object);
1c79356b 274
b0d623f7
A
275
276static kern_return_t vm_map_willneed(
277 vm_map_t map,
278 vm_map_offset_t start,
279 vm_map_offset_t end);
280
281static kern_return_t vm_map_reuse_pages(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286static kern_return_t vm_map_reusable_pages(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291static kern_return_t vm_map_can_reuse(
292 vm_map_t map,
293 vm_map_offset_t start,
294 vm_map_offset_t end);
295
6d2010ae 296
1c79356b
A
297/*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry. This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
316670eb 306
7ddcb079
A
307#define vm_map_entry_copy(NEW,OLD) \
308MACRO_BEGIN \
309boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
2d21ac55
A
310 *(NEW) = *(OLD); \
311 (NEW)->is_shared = FALSE; \
312 (NEW)->needs_wakeup = FALSE; \
313 (NEW)->in_transition = FALSE; \
314 (NEW)->wired_count = 0; \
315 (NEW)->user_wired_count = 0; \
b0d623f7 316 (NEW)->permanent = FALSE; \
316670eb 317 (NEW)->used_for_jit = FALSE; \
7ddcb079 318 (NEW)->from_reserved_zone = _vmec_reserved; \
1c79356b
A
319MACRO_END
320
7ddcb079
A
321#define vm_map_entry_copy_full(NEW,OLD) \
322MACRO_BEGIN \
323boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
324(*(NEW) = *(OLD)); \
325(NEW)->from_reserved_zone = _vmecf_reserved; \
326MACRO_END
1c79356b 327
2d21ac55
A
328/*
329 * Decide if we want to allow processes to execute from their data or stack areas.
330 * override_nx() returns true if we do. Data/stack execution can be enabled independently
331 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
332 * or allow_stack_exec to enable data execution for that type of data area for that particular
333 * ABI (or both by or'ing the flags together). These are initialized in the architecture
334 * specific pmap files since the default behavior varies according to architecture. The
335 * main reason it varies is because of the need to provide binary compatibility with old
336 * applications that were written before these restrictions came into being. In the old
337 * days, an app could execute anything it could read, but this has slowly been tightened
338 * up over time. The default behavior is:
339 *
340 * 32-bit PPC apps may execute from both stack and data areas
341 * 32-bit Intel apps may exeucte from data areas but not stack
342 * 64-bit PPC/Intel apps may not execute from either data or stack
343 *
344 * An application on any architecture may override these defaults by explicitly
345 * adding PROT_EXEC permission to the page in question with the mprotect(2)
346 * system call. This code here just determines what happens when an app tries to
347 * execute from a page that lacks execute permission.
348 *
349 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
6d2010ae
A
350 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
351 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
352 * execution from data areas for a particular binary even if the arch normally permits it. As
353 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
354 * to support some complicated use cases, notably browsers with out-of-process plugins that
355 * are not all NX-safe.
2d21ac55
A
356 */
357
358extern int allow_data_exec, allow_stack_exec;
359
360int
361override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
362{
363 int current_abi;
364
365 /*
366 * Determine if the app is running in 32 or 64 bit mode.
367 */
368
369 if (vm_map_is_64bit(map))
370 current_abi = VM_ABI_64;
371 else
372 current_abi = VM_ABI_32;
373
374 /*
375 * Determine if we should allow the execution based on whether it's a
376 * stack or data area and the current architecture.
377 */
378
379 if (user_tag == VM_MEMORY_STACK)
380 return allow_stack_exec & current_abi;
381
6d2010ae 382 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
2d21ac55
A
383}
384
385
1c79356b
A
386/*
387 * Virtual memory maps provide for the mapping, protection,
388 * and sharing of virtual memory objects. In addition,
389 * this module provides for an efficient virtual copy of
390 * memory from one map to another.
391 *
392 * Synchronization is required prior to most operations.
393 *
394 * Maps consist of an ordered doubly-linked list of simple
395 * entries; a single hint is used to speed up lookups.
396 *
397 * Sharing maps have been deleted from this version of Mach.
398 * All shared objects are now mapped directly into the respective
399 * maps. This requires a change in the copy on write strategy;
400 * the asymmetric (delayed) strategy is used for shared temporary
401 * objects instead of the symmetric (shadow) strategy. All maps
402 * are now "top level" maps (either task map, kernel map or submap
403 * of the kernel map).
404 *
405 * Since portions of maps are specified by start/end addreses,
406 * which may not align with existing map entries, all
407 * routines merely "clip" entries to these start/end values.
408 * [That is, an entry is split into two, bordering at a
409 * start or end value.] Note that these clippings may not
410 * always be necessary (as the two resulting entries are then
411 * not changed); however, the clipping is done for convenience.
412 * No attempt is currently made to "glue back together" two
413 * abutting entries.
414 *
415 * The symmetric (shadow) copy strategy implements virtual copy
416 * by copying VM object references from one map to
417 * another, and then marking both regions as copy-on-write.
418 * It is important to note that only one writeable reference
419 * to a VM object region exists in any map when this strategy
420 * is used -- this means that shadow object creation can be
421 * delayed until a write operation occurs. The symmetric (delayed)
422 * strategy allows multiple maps to have writeable references to
423 * the same region of a vm object, and hence cannot delay creating
424 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
425 * Copying of permanent objects is completely different; see
426 * vm_object_copy_strategically() in vm_object.c.
427 */
428
91447636
A
429static zone_t vm_map_zone; /* zone for vm_map structures */
430static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
7ddcb079
A
431static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
432 * allocations */
91447636 433static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
1c79356b
A
434
435
436/*
437 * Placeholder object for submap operations. This object is dropped
438 * into the range by a call to vm_map_find, and removed when
439 * vm_map_submap creates the submap.
440 */
441
442vm_object_t vm_submap_object;
443
91447636 444static void *map_data;
b0d623f7 445static vm_size_t map_data_size;
91447636 446static void *kentry_data;
b0d623f7 447static vm_size_t kentry_data_size;
1c79356b 448
b0d623f7 449#define NO_COALESCE_LIMIT ((1024 * 128) - 1)
1c79356b 450
55e303ae 451/* Skip acquiring locks if we're in the midst of a kernel core dump */
b0d623f7 452unsigned int not_in_kdp = 1;
55e303ae 453
6d2010ae
A
454unsigned int vm_map_set_cache_attr_count = 0;
455
456kern_return_t
457vm_map_set_cache_attr(
458 vm_map_t map,
459 vm_map_offset_t va)
460{
461 vm_map_entry_t map_entry;
462 vm_object_t object;
463 kern_return_t kr = KERN_SUCCESS;
464
465 vm_map_lock_read(map);
466
467 if (!vm_map_lookup_entry(map, va, &map_entry) ||
468 map_entry->is_sub_map) {
469 /*
470 * that memory is not properly mapped
471 */
472 kr = KERN_INVALID_ARGUMENT;
473 goto done;
474 }
475 object = map_entry->object.vm_object;
476
477 if (object == VM_OBJECT_NULL) {
478 /*
479 * there should be a VM object here at this point
480 */
481 kr = KERN_INVALID_ARGUMENT;
482 goto done;
483 }
484 vm_object_lock(object);
485 object->set_cache_attr = TRUE;
486 vm_object_unlock(object);
487
488 vm_map_set_cache_attr_count++;
489done:
490 vm_map_unlock_read(map);
491
492 return kr;
493}
494
495
593a1d5f
A
496#if CONFIG_CODE_DECRYPTION
497/*
498 * vm_map_apple_protected:
499 * This remaps the requested part of the object with an object backed by
500 * the decrypting pager.
501 * crypt_info contains entry points and session data for the crypt module.
502 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
503 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
504 */
0c530ab8
A
505kern_return_t
506vm_map_apple_protected(
507 vm_map_t map,
508 vm_map_offset_t start,
593a1d5f
A
509 vm_map_offset_t end,
510 struct pager_crypt_info *crypt_info)
0c530ab8
A
511{
512 boolean_t map_locked;
513 kern_return_t kr;
514 vm_map_entry_t map_entry;
515 memory_object_t protected_mem_obj;
516 vm_object_t protected_object;
517 vm_map_offset_t map_addr;
518
519 vm_map_lock_read(map);
520 map_locked = TRUE;
521
522 /* lookup the protected VM object */
523 if (!vm_map_lookup_entry(map,
524 start,
525 &map_entry) ||
593a1d5f 526 map_entry->vme_end < end ||
0c530ab8
A
527 map_entry->is_sub_map) {
528 /* that memory is not properly mapped */
529 kr = KERN_INVALID_ARGUMENT;
530 goto done;
531 }
532 protected_object = map_entry->object.vm_object;
533 if (protected_object == VM_OBJECT_NULL) {
534 /* there should be a VM object here at this point */
535 kr = KERN_INVALID_ARGUMENT;
536 goto done;
537 }
538
b0d623f7
A
539 /* make sure protected object stays alive while map is unlocked */
540 vm_object_reference(protected_object);
541
542 vm_map_unlock_read(map);
543 map_locked = FALSE;
544
0c530ab8
A
545 /*
546 * Lookup (and create if necessary) the protected memory object
547 * matching that VM object.
548 * If successful, this also grabs a reference on the memory object,
549 * to guarantee that it doesn't go away before we get a chance to map
550 * it.
551 */
593a1d5f 552 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
b0d623f7
A
553
554 /* release extra ref on protected object */
555 vm_object_deallocate(protected_object);
556
0c530ab8
A
557 if (protected_mem_obj == NULL) {
558 kr = KERN_FAILURE;
559 goto done;
560 }
561
0c530ab8
A
562 /* map this memory object in place of the current one */
563 map_addr = start;
2d21ac55
A
564 kr = vm_map_enter_mem_object(map,
565 &map_addr,
566 end - start,
567 (mach_vm_offset_t) 0,
568 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
569 (ipc_port_t) protected_mem_obj,
570 (map_entry->offset +
571 (start - map_entry->vme_start)),
572 TRUE,
573 map_entry->protection,
574 map_entry->max_protection,
575 map_entry->inheritance);
0c530ab8 576 assert(map_addr == start);
0c530ab8
A
577 /*
578 * Release the reference obtained by apple_protect_pager_setup().
579 * The mapping (if it succeeded) is now holding a reference on the
580 * memory object.
581 */
582 memory_object_deallocate(protected_mem_obj);
583
584done:
585 if (map_locked) {
586 vm_map_unlock_read(map);
587 }
588 return kr;
589}
593a1d5f 590#endif /* CONFIG_CODE_DECRYPTION */
0c530ab8
A
591
592
b0d623f7
A
593lck_grp_t vm_map_lck_grp;
594lck_grp_attr_t vm_map_lck_grp_attr;
595lck_attr_t vm_map_lck_attr;
596
597
593a1d5f
A
598/*
599 * vm_map_init:
600 *
601 * Initialize the vm_map module. Must be called before
602 * any other vm_map routines.
603 *
604 * Map and entry structures are allocated from zones -- we must
605 * initialize those zones.
606 *
607 * There are three zones of interest:
608 *
609 * vm_map_zone: used to allocate maps.
610 * vm_map_entry_zone: used to allocate map entries.
7ddcb079 611 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
593a1d5f
A
612 *
613 * The kernel allocates map entries from a special zone that is initially
614 * "crammed" with memory. It would be difficult (perhaps impossible) for
615 * the kernel to allocate more memory to a entry zone when it became
616 * empty since the very act of allocating memory implies the creation
617 * of a new entry.
618 */
1c79356b
A
619void
620vm_map_init(
621 void)
622{
7ddcb079 623 vm_size_t entry_zone_alloc_size;
316670eb
A
624 const char *mez_name = "VM map entries";
625
2d21ac55
A
626 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
627 PAGE_SIZE, "maps");
0b4c1975 628 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
7ddcb079
A
629#if defined(__LP64__)
630 entry_zone_alloc_size = PAGE_SIZE * 5;
631#else
632 entry_zone_alloc_size = PAGE_SIZE * 6;
633#endif
91447636 634 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
7ddcb079 635 1024*1024, entry_zone_alloc_size,
316670eb 636 mez_name);
0b4c1975 637 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
7ddcb079 638 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
316670eb 639 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b 640
7ddcb079
A
641 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
642 kentry_data_size * 64, kentry_data_size,
643 "Reserved VM map entries");
644 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
1c79356b 645
91447636 646 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
7ddcb079 647 16*1024, PAGE_SIZE, "VM map copies");
0b4c1975 648 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
1c79356b
A
649
650 /*
651 * Cram the map and kentry zones with initial data.
7ddcb079 652 * Set reserved_zone non-collectible to aid zone_gc().
1c79356b
A
653 */
654 zone_change(vm_map_zone, Z_COLLECT, FALSE);
7ddcb079
A
655
656 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
657 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
658 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
659 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
660 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
6d2010ae 661 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
316670eb 662 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
6d2010ae 663
7ddcb079
A
664 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
665 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
b0d623f7
A
666
667 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
668 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
669 lck_attr_setdefault(&vm_map_lck_attr);
316670eb
A
670
671#if CONFIG_FREEZE
672 default_freezer_init();
673#endif /* CONFIG_FREEZE */
1c79356b
A
674}
675
676void
677vm_map_steal_memory(
678 void)
679{
7ddcb079
A
680 uint32_t kentry_initial_pages;
681
b0d623f7 682 map_data_size = round_page(10 * sizeof(struct _vm_map));
1c79356b
A
683 map_data = pmap_steal_memory(map_data_size);
684
1c79356b 685 /*
7ddcb079
A
686 * kentry_initial_pages corresponds to the number of kernel map entries
687 * required during bootstrap until the asynchronous replenishment
688 * scheme is activated and/or entries are available from the general
689 * map entry pool.
1c79356b 690 */
7ddcb079
A
691#if defined(__LP64__)
692 kentry_initial_pages = 10;
693#else
694 kentry_initial_pages = 6;
1c79356b 695#endif
316670eb
A
696
697#if CONFIG_GZALLOC
698 /* If using the guard allocator, reserve more memory for the kernel
699 * reserved map entry pool.
700 */
701 if (gzalloc_enabled())
702 kentry_initial_pages *= 1024;
703#endif
704
7ddcb079 705 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
1c79356b
A
706 kentry_data = pmap_steal_memory(kentry_data_size);
707}
708
7ddcb079
A
709void vm_kernel_reserved_entry_init(void) {
710 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
711}
712
1c79356b
A
713/*
714 * vm_map_create:
715 *
716 * Creates and returns a new empty VM map with
717 * the given physical map structure, and having
718 * the given lower and upper address bounds.
719 */
720vm_map_t
721vm_map_create(
91447636
A
722 pmap_t pmap,
723 vm_map_offset_t min,
724 vm_map_offset_t max,
725 boolean_t pageable)
1c79356b 726{
2d21ac55 727 static int color_seed = 0;
1c79356b
A
728 register vm_map_t result;
729
730 result = (vm_map_t) zalloc(vm_map_zone);
731 if (result == VM_MAP_NULL)
732 panic("vm_map_create");
733
734 vm_map_first_entry(result) = vm_map_to_entry(result);
735 vm_map_last_entry(result) = vm_map_to_entry(result);
736 result->hdr.nentries = 0;
737 result->hdr.entries_pageable = pageable;
738
6d2010ae
A
739 vm_map_store_init( &(result->hdr) );
740
39236c6e
A
741 result->hdr.page_shift = PAGE_SHIFT;
742
1c79356b 743 result->size = 0;
2d21ac55
A
744 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
745 result->user_wire_size = 0;
1c79356b
A
746 result->ref_count = 1;
747#if TASK_SWAPPER
748 result->res_count = 1;
749 result->sw_state = MAP_SW_IN;
750#endif /* TASK_SWAPPER */
751 result->pmap = pmap;
752 result->min_offset = min;
753 result->max_offset = max;
754 result->wiring_required = FALSE;
755 result->no_zero_fill = FALSE;
316670eb 756 result->mapped_in_other_pmaps = FALSE;
1c79356b 757 result->wait_for_space = FALSE;
b0d623f7 758 result->switch_protect = FALSE;
6d2010ae
A
759 result->disable_vmentry_reuse = FALSE;
760 result->map_disallow_data_exec = FALSE;
761 result->highest_entry_end = 0;
1c79356b
A
762 result->first_free = vm_map_to_entry(result);
763 result->hint = vm_map_to_entry(result);
2d21ac55 764 result->color_rr = (color_seed++) & vm_color_mask;
6d2010ae
A
765 result->jit_entry_exists = FALSE;
766#if CONFIG_FREEZE
316670eb 767 result->default_freezer_handle = NULL;
6d2010ae 768#endif
1c79356b 769 vm_map_lock_init(result);
b0d623f7
A
770 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
771
1c79356b
A
772 return(result);
773}
774
775/*
776 * vm_map_entry_create: [ internal use only ]
777 *
778 * Allocates a VM map entry for insertion in the
779 * given map (or map copy). No fields are filled.
780 */
7ddcb079 781#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
1c79356b 782
7ddcb079
A
783#define vm_map_copy_entry_create(copy, map_locked) \
784 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
785unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1c79356b 786
91447636 787static vm_map_entry_t
1c79356b 788_vm_map_entry_create(
7ddcb079 789 struct vm_map_header *map_header, boolean_t __unused map_locked)
1c79356b 790{
7ddcb079
A
791 zone_t zone;
792 vm_map_entry_t entry;
1c79356b 793
7ddcb079
A
794 zone = vm_map_entry_zone;
795
796 assert(map_header->entries_pageable ? !map_locked : TRUE);
797
798 if (map_header->entries_pageable) {
799 entry = (vm_map_entry_t) zalloc(zone);
800 }
801 else {
802 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
803
804 if (entry == VM_MAP_ENTRY_NULL) {
805 zone = vm_map_entry_reserved_zone;
806 entry = (vm_map_entry_t) zalloc(zone);
807 OSAddAtomic(1, &reserved_zalloc_count);
808 } else
809 OSAddAtomic(1, &nonreserved_zalloc_count);
810 }
1c79356b 811
1c79356b
A
812 if (entry == VM_MAP_ENTRY_NULL)
813 panic("vm_map_entry_create");
7ddcb079
A
814 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
815
6d2010ae 816 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
316670eb 817#if MAP_ENTRY_CREATION_DEBUG
39236c6e
A
818 entry->vme_creation_maphdr = map_header;
819 fastbacktrace(&entry->vme_creation_bt[0],
820 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
316670eb 821#endif
1c79356b
A
822 return(entry);
823}
824
825/*
826 * vm_map_entry_dispose: [ internal use only ]
827 *
828 * Inverse of vm_map_entry_create.
2d21ac55
A
829 *
830 * write map lock held so no need to
831 * do anything special to insure correctness
832 * of the stores
1c79356b
A
833 */
834#define vm_map_entry_dispose(map, entry) \
6d2010ae 835 _vm_map_entry_dispose(&(map)->hdr, (entry))
1c79356b
A
836
837#define vm_map_copy_entry_dispose(map, entry) \
838 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
839
91447636 840static void
1c79356b
A
841_vm_map_entry_dispose(
842 register struct vm_map_header *map_header,
843 register vm_map_entry_t entry)
844{
845 register zone_t zone;
846
7ddcb079 847 if (map_header->entries_pageable || !(entry->from_reserved_zone))
2d21ac55 848 zone = vm_map_entry_zone;
1c79356b 849 else
7ddcb079
A
850 zone = vm_map_entry_reserved_zone;
851
852 if (!map_header->entries_pageable) {
853 if (zone == vm_map_entry_zone)
854 OSAddAtomic(-1, &nonreserved_zalloc_count);
855 else
856 OSAddAtomic(-1, &reserved_zalloc_count);
857 }
1c79356b 858
91447636 859 zfree(zone, entry);
1c79356b
A
860}
861
91447636 862#if MACH_ASSERT
91447636 863static boolean_t first_free_check = FALSE;
6d2010ae 864boolean_t
1c79356b
A
865first_free_is_valid(
866 vm_map_t map)
867{
1c79356b
A
868 if (!first_free_check)
869 return TRUE;
2d21ac55 870
6d2010ae 871 return( first_free_is_valid_store( map ));
1c79356b 872}
91447636 873#endif /* MACH_ASSERT */
1c79356b 874
1c79356b
A
875
876#define vm_map_copy_entry_link(copy, after_where, entry) \
6d2010ae 877 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1c79356b
A
878
879#define vm_map_copy_entry_unlink(copy, entry) \
6d2010ae 880 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1c79356b 881
1c79356b 882#if MACH_ASSERT && TASK_SWAPPER
1c79356b
A
883/*
884 * vm_map_res_reference:
885 *
886 * Adds another valid residence count to the given map.
887 *
888 * Map is locked so this function can be called from
889 * vm_map_swapin.
890 *
891 */
892void vm_map_res_reference(register vm_map_t map)
893{
894 /* assert map is locked */
895 assert(map->res_count >= 0);
896 assert(map->ref_count >= map->res_count);
897 if (map->res_count == 0) {
b0d623f7 898 lck_mtx_unlock(&map->s_lock);
1c79356b
A
899 vm_map_lock(map);
900 vm_map_swapin(map);
b0d623f7 901 lck_mtx_lock(&map->s_lock);
1c79356b
A
902 ++map->res_count;
903 vm_map_unlock(map);
904 } else
905 ++map->res_count;
906}
907
908/*
909 * vm_map_reference_swap:
910 *
911 * Adds valid reference and residence counts to the given map.
912 *
913 * The map may not be in memory (i.e. zero residence count).
914 *
915 */
916void vm_map_reference_swap(register vm_map_t map)
917{
918 assert(map != VM_MAP_NULL);
b0d623f7 919 lck_mtx_lock(&map->s_lock);
1c79356b
A
920 assert(map->res_count >= 0);
921 assert(map->ref_count >= map->res_count);
922 map->ref_count++;
923 vm_map_res_reference(map);
b0d623f7 924 lck_mtx_unlock(&map->s_lock);
1c79356b
A
925}
926
927/*
928 * vm_map_res_deallocate:
929 *
930 * Decrement residence count on a map; possibly causing swapout.
931 *
932 * The map must be in memory (i.e. non-zero residence count).
933 *
934 * The map is locked, so this function is callable from vm_map_deallocate.
935 *
936 */
937void vm_map_res_deallocate(register vm_map_t map)
938{
939 assert(map->res_count > 0);
940 if (--map->res_count == 0) {
b0d623f7 941 lck_mtx_unlock(&map->s_lock);
1c79356b
A
942 vm_map_lock(map);
943 vm_map_swapout(map);
944 vm_map_unlock(map);
b0d623f7 945 lck_mtx_lock(&map->s_lock);
1c79356b
A
946 }
947 assert(map->ref_count >= map->res_count);
948}
949#endif /* MACH_ASSERT && TASK_SWAPPER */
950
1c79356b
A
951/*
952 * vm_map_destroy:
953 *
954 * Actually destroy a map.
955 */
956void
957vm_map_destroy(
2d21ac55
A
958 vm_map_t map,
959 int flags)
91447636 960{
1c79356b 961 vm_map_lock(map);
2d21ac55
A
962
963 /* clean up regular map entries */
964 (void) vm_map_delete(map, map->min_offset, map->max_offset,
965 flags, VM_MAP_NULL);
966 /* clean up leftover special mappings (commpage, etc...) */
2d21ac55
A
967 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
968 flags, VM_MAP_NULL);
6d2010ae
A
969
970#if CONFIG_FREEZE
316670eb
A
971 if (map->default_freezer_handle) {
972 default_freezer_handle_deallocate(map->default_freezer_handle);
973 map->default_freezer_handle = NULL;
6d2010ae
A
974 }
975#endif
1c79356b
A
976 vm_map_unlock(map);
977
2d21ac55
A
978 assert(map->hdr.nentries == 0);
979
55e303ae
A
980 if(map->pmap)
981 pmap_destroy(map->pmap);
1c79356b 982
91447636 983 zfree(vm_map_zone, map);
1c79356b
A
984}
985
986#if TASK_SWAPPER
987/*
988 * vm_map_swapin/vm_map_swapout
989 *
990 * Swap a map in and out, either referencing or releasing its resources.
991 * These functions are internal use only; however, they must be exported
992 * because they may be called from macros, which are exported.
993 *
994 * In the case of swapout, there could be races on the residence count,
995 * so if the residence count is up, we return, assuming that a
996 * vm_map_deallocate() call in the near future will bring us back.
997 *
998 * Locking:
999 * -- We use the map write lock for synchronization among races.
1000 * -- The map write lock, and not the simple s_lock, protects the
1001 * swap state of the map.
1002 * -- If a map entry is a share map, then we hold both locks, in
1003 * hierarchical order.
1004 *
1005 * Synchronization Notes:
1006 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1007 * will block on the map lock and proceed when swapout is through.
1008 * 2) A vm_map_reference() call at this time is illegal, and will
1009 * cause a panic. vm_map_reference() is only allowed on resident
1010 * maps, since it refuses to block.
1011 * 3) A vm_map_swapin() call during a swapin will block, and
1012 * proceeed when the first swapin is done, turning into a nop.
1013 * This is the reason the res_count is not incremented until
1014 * after the swapin is complete.
1015 * 4) There is a timing hole after the checks of the res_count, before
1016 * the map lock is taken, during which a swapin may get the lock
1017 * before a swapout about to happen. If this happens, the swapin
1018 * will detect the state and increment the reference count, causing
1019 * the swapout to be a nop, thereby delaying it until a later
1020 * vm_map_deallocate. If the swapout gets the lock first, then
1021 * the swapin will simply block until the swapout is done, and
1022 * then proceed.
1023 *
1024 * Because vm_map_swapin() is potentially an expensive operation, it
1025 * should be used with caution.
1026 *
1027 * Invariants:
1028 * 1) A map with a residence count of zero is either swapped, or
1029 * being swapped.
1030 * 2) A map with a non-zero residence count is either resident,
1031 * or being swapped in.
1032 */
1033
1034int vm_map_swap_enable = 1;
1035
1036void vm_map_swapin (vm_map_t map)
1037{
1038 register vm_map_entry_t entry;
2d21ac55 1039
1c79356b
A
1040 if (!vm_map_swap_enable) /* debug */
1041 return;
1042
1043 /*
1044 * Map is locked
1045 * First deal with various races.
1046 */
1047 if (map->sw_state == MAP_SW_IN)
1048 /*
1049 * we raced with swapout and won. Returning will incr.
1050 * the res_count, turning the swapout into a nop.
1051 */
1052 return;
1053
1054 /*
1055 * The residence count must be zero. If we raced with another
1056 * swapin, the state would have been IN; if we raced with a
1057 * swapout (after another competing swapin), we must have lost
1058 * the race to get here (see above comment), in which case
1059 * res_count is still 0.
1060 */
1061 assert(map->res_count == 0);
1062
1063 /*
1064 * There are no intermediate states of a map going out or
1065 * coming in, since the map is locked during the transition.
1066 */
1067 assert(map->sw_state == MAP_SW_OUT);
1068
1069 /*
1070 * We now operate upon each map entry. If the entry is a sub-
1071 * or share-map, we call vm_map_res_reference upon it.
1072 * If the entry is an object, we call vm_object_res_reference
1073 * (this may iterate through the shadow chain).
1074 * Note that we hold the map locked the entire time,
1075 * even if we get back here via a recursive call in
1076 * vm_map_res_reference.
1077 */
1078 entry = vm_map_first_entry(map);
1079
1080 while (entry != vm_map_to_entry(map)) {
1081 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082 if (entry->is_sub_map) {
1083 vm_map_t lmap = entry->object.sub_map;
b0d623f7 1084 lck_mtx_lock(&lmap->s_lock);
1c79356b 1085 vm_map_res_reference(lmap);
b0d623f7 1086 lck_mtx_unlock(&lmap->s_lock);
1c79356b
A
1087 } else {
1088 vm_object_t object = entry->object.vm_object;
1089 vm_object_lock(object);
1090 /*
1091 * This call may iterate through the
1092 * shadow chain.
1093 */
1094 vm_object_res_reference(object);
1095 vm_object_unlock(object);
1096 }
1097 }
1098 entry = entry->vme_next;
1099 }
1100 assert(map->sw_state == MAP_SW_OUT);
1101 map->sw_state = MAP_SW_IN;
1102}
1103
1104void vm_map_swapout(vm_map_t map)
1105{
1106 register vm_map_entry_t entry;
1107
1108 /*
1109 * Map is locked
1110 * First deal with various races.
1111 * If we raced with a swapin and lost, the residence count
1112 * will have been incremented to 1, and we simply return.
1113 */
b0d623f7 1114 lck_mtx_lock(&map->s_lock);
1c79356b 1115 if (map->res_count != 0) {
b0d623f7 1116 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1117 return;
1118 }
b0d623f7 1119 lck_mtx_unlock(&map->s_lock);
1c79356b
A
1120
1121 /*
1122 * There are no intermediate states of a map going out or
1123 * coming in, since the map is locked during the transition.
1124 */
1125 assert(map->sw_state == MAP_SW_IN);
1126
1127 if (!vm_map_swap_enable)
1128 return;
1129
1130 /*
1131 * We now operate upon each map entry. If the entry is a sub-
1132 * or share-map, we call vm_map_res_deallocate upon it.
1133 * If the entry is an object, we call vm_object_res_deallocate
1134 * (this may iterate through the shadow chain).
1135 * Note that we hold the map locked the entire time,
1136 * even if we get back here via a recursive call in
1137 * vm_map_res_deallocate.
1138 */
1139 entry = vm_map_first_entry(map);
1140
1141 while (entry != vm_map_to_entry(map)) {
1142 if (entry->object.vm_object != VM_OBJECT_NULL) {
1143 if (entry->is_sub_map) {
1144 vm_map_t lmap = entry->object.sub_map;
b0d623f7 1145 lck_mtx_lock(&lmap->s_lock);
1c79356b 1146 vm_map_res_deallocate(lmap);
b0d623f7 1147 lck_mtx_unlock(&lmap->s_lock);
1c79356b
A
1148 } else {
1149 vm_object_t object = entry->object.vm_object;
1150 vm_object_lock(object);
1151 /*
1152 * This call may take a long time,
1153 * since it could actively push
1154 * out pages (if we implement it
1155 * that way).
1156 */
1157 vm_object_res_deallocate(object);
1158 vm_object_unlock(object);
1159 }
1160 }
1161 entry = entry->vme_next;
1162 }
1163 assert(map->sw_state == MAP_SW_IN);
1164 map->sw_state = MAP_SW_OUT;
1165}
1166
1167#endif /* TASK_SWAPPER */
1168
1c79356b
A
1169/*
1170 * vm_map_lookup_entry: [ internal use only ]
1171 *
6d2010ae
A
1172 * Calls into the vm map store layer to find the map
1173 * entry containing (or immediately preceding) the
1174 * specified address in the given map; the entry is returned
1c79356b
A
1175 * in the "entry" parameter. The boolean
1176 * result indicates whether the address is
1177 * actually contained in the map.
1178 */
1179boolean_t
1180vm_map_lookup_entry(
91447636
A
1181 register vm_map_t map,
1182 register vm_map_offset_t address,
1c79356b
A
1183 vm_map_entry_t *entry) /* OUT */
1184{
6d2010ae 1185 return ( vm_map_store_lookup_entry( map, address, entry ));
1c79356b
A
1186}
1187
1188/*
1189 * Routine: vm_map_find_space
1190 * Purpose:
1191 * Allocate a range in the specified virtual address map,
1192 * returning the entry allocated for that range.
1193 * Used by kmem_alloc, etc.
1194 *
1195 * The map must be NOT be locked. It will be returned locked
1196 * on KERN_SUCCESS, unlocked on failure.
1197 *
1198 * If an entry is allocated, the object/offset fields
1199 * are initialized to zero.
1200 */
1201kern_return_t
1202vm_map_find_space(
1203 register vm_map_t map,
91447636
A
1204 vm_map_offset_t *address, /* OUT */
1205 vm_map_size_t size,
1206 vm_map_offset_t mask,
0c530ab8 1207 int flags,
1c79356b
A
1208 vm_map_entry_t *o_entry) /* OUT */
1209{
1210 register vm_map_entry_t entry, new_entry;
91447636
A
1211 register vm_map_offset_t start;
1212 register vm_map_offset_t end;
1213
1214 if (size == 0) {
1215 *address = 0;
1216 return KERN_INVALID_ARGUMENT;
1217 }
1c79356b 1218
2d21ac55
A
1219 if (flags & VM_FLAGS_GUARD_AFTER) {
1220 /* account for the back guard page in the size */
39236c6e 1221 size += VM_MAP_PAGE_SIZE(map);
2d21ac55
A
1222 }
1223
7ddcb079 1224 new_entry = vm_map_entry_create(map, FALSE);
1c79356b
A
1225
1226 /*
1227 * Look for the first possible address; if there's already
1228 * something at this address, we have to start after it.
1229 */
1230
1231 vm_map_lock(map);
1232
6d2010ae
A
1233 if( map->disable_vmentry_reuse == TRUE) {
1234 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1235 } else {
1236 assert(first_free_is_valid(map));
1237 if ((entry = map->first_free) == vm_map_to_entry(map))
1238 start = map->min_offset;
1239 else
1240 start = entry->vme_end;
1241 }
1c79356b
A
1242
1243 /*
1244 * In any case, the "entry" always precedes
1245 * the proposed new region throughout the loop:
1246 */
1247
1248 while (TRUE) {
1249 register vm_map_entry_t next;
1250
1251 /*
1252 * Find the end of the proposed new region.
1253 * Be sure we didn't go beyond the end, or
1254 * wrap around the address.
1255 */
1256
2d21ac55
A
1257 if (flags & VM_FLAGS_GUARD_BEFORE) {
1258 /* reserve space for the front guard page */
39236c6e 1259 start += VM_MAP_PAGE_SIZE(map);
2d21ac55 1260 }
1c79356b 1261 end = ((start + mask) & ~mask);
2d21ac55 1262
1c79356b
A
1263 if (end < start) {
1264 vm_map_entry_dispose(map, new_entry);
1265 vm_map_unlock(map);
1266 return(KERN_NO_SPACE);
1267 }
1268 start = end;
1269 end += size;
1270
1271 if ((end > map->max_offset) || (end < start)) {
1272 vm_map_entry_dispose(map, new_entry);
1273 vm_map_unlock(map);
1274 return(KERN_NO_SPACE);
1275 }
1276
1277 /*
1278 * If there are no more entries, we must win.
1279 */
1280
1281 next = entry->vme_next;
1282 if (next == vm_map_to_entry(map))
1283 break;
1284
1285 /*
1286 * If there is another entry, it must be
1287 * after the end of the potential new region.
1288 */
1289
1290 if (next->vme_start >= end)
1291 break;
1292
1293 /*
1294 * Didn't fit -- move to the next entry.
1295 */
1296
1297 entry = next;
1298 start = entry->vme_end;
1299 }
1300
1301 /*
1302 * At this point,
1303 * "start" and "end" should define the endpoints of the
1304 * available new range, and
1305 * "entry" should refer to the region before the new
1306 * range, and
1307 *
1308 * the map should be locked.
1309 */
1310
2d21ac55
A
1311 if (flags & VM_FLAGS_GUARD_BEFORE) {
1312 /* go back for the front guard page */
39236c6e 1313 start -= VM_MAP_PAGE_SIZE(map);
2d21ac55 1314 }
1c79356b
A
1315 *address = start;
1316
e2d2fc5c 1317 assert(start < end);
1c79356b
A
1318 new_entry->vme_start = start;
1319 new_entry->vme_end = end;
1320 assert(page_aligned(new_entry->vme_start));
1321 assert(page_aligned(new_entry->vme_end));
39236c6e
A
1322 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1323 VM_MAP_PAGE_MASK(map)));
1324 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1325 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1326
1327 new_entry->is_shared = FALSE;
1328 new_entry->is_sub_map = FALSE;
1329 new_entry->use_pmap = FALSE;
1330 new_entry->object.vm_object = VM_OBJECT_NULL;
1331 new_entry->offset = (vm_object_offset_t) 0;
1332
1333 new_entry->needs_copy = FALSE;
1334
1335 new_entry->inheritance = VM_INHERIT_DEFAULT;
1336 new_entry->protection = VM_PROT_DEFAULT;
1337 new_entry->max_protection = VM_PROT_ALL;
1338 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1339 new_entry->wired_count = 0;
1340 new_entry->user_wired_count = 0;
1341
1342 new_entry->in_transition = FALSE;
1343 new_entry->needs_wakeup = FALSE;
2d21ac55 1344 new_entry->no_cache = FALSE;
b0d623f7 1345 new_entry->permanent = FALSE;
39236c6e
A
1346 new_entry->superpage_size = FALSE;
1347 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1348 new_entry->map_aligned = TRUE;
1349 } else {
1350 new_entry->map_aligned = FALSE;
1351 }
2d21ac55 1352
316670eb
A
1353 new_entry->used_for_jit = 0;
1354
2d21ac55 1355 new_entry->alias = 0;
b0d623f7 1356 new_entry->zero_wired_pages = FALSE;
1c79356b 1357
0c530ab8
A
1358 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1359
1c79356b
A
1360 /*
1361 * Insert the new entry into the list
1362 */
1363
6d2010ae 1364 vm_map_store_entry_link(map, entry, new_entry);
1c79356b
A
1365
1366 map->size += size;
1367
1368 /*
1369 * Update the lookup hint
1370 */
0c530ab8 1371 SAVE_HINT_MAP_WRITE(map, new_entry);
1c79356b
A
1372
1373 *o_entry = new_entry;
1374 return(KERN_SUCCESS);
1375}
1376
1377int vm_map_pmap_enter_print = FALSE;
1378int vm_map_pmap_enter_enable = FALSE;
1379
1380/*
91447636 1381 * Routine: vm_map_pmap_enter [internal only]
1c79356b
A
1382 *
1383 * Description:
1384 * Force pages from the specified object to be entered into
1385 * the pmap at the specified address if they are present.
1386 * As soon as a page not found in the object the scan ends.
1387 *
1388 * Returns:
1389 * Nothing.
1390 *
1391 * In/out conditions:
1392 * The source map should not be locked on entry.
1393 */
91447636 1394static void
1c79356b
A
1395vm_map_pmap_enter(
1396 vm_map_t map,
91447636
A
1397 register vm_map_offset_t addr,
1398 register vm_map_offset_t end_addr,
1c79356b
A
1399 register vm_object_t object,
1400 vm_object_offset_t offset,
1401 vm_prot_t protection)
1402{
2d21ac55
A
1403 int type_of_fault;
1404 kern_return_t kr;
0b4e3aa0 1405
55e303ae
A
1406 if(map->pmap == 0)
1407 return;
1408
1c79356b
A
1409 while (addr < end_addr) {
1410 register vm_page_t m;
1411
1412 vm_object_lock(object);
1c79356b
A
1413
1414 m = vm_page_lookup(object, offset);
91447636
A
1415 /*
1416 * ENCRYPTED SWAP:
1417 * The user should never see encrypted data, so do not
1418 * enter an encrypted page in the page table.
1419 */
1420 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
2d21ac55
A
1421 m->fictitious ||
1422 (m->unusual && ( m->error || m->restart || m->absent))) {
1c79356b
A
1423 vm_object_unlock(object);
1424 return;
1425 }
1426
1c79356b
A
1427 if (vm_map_pmap_enter_print) {
1428 printf("vm_map_pmap_enter:");
2d21ac55
A
1429 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1430 map, (unsigned long long)addr, object, (unsigned long long)offset);
1c79356b 1431 }
2d21ac55 1432 type_of_fault = DBG_CACHE_HIT_FAULT;
6d2010ae 1433 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
316670eb 1434 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2d21ac55 1435 &type_of_fault);
1c79356b 1436
1c79356b
A
1437 vm_object_unlock(object);
1438
1439 offset += PAGE_SIZE_64;
1440 addr += PAGE_SIZE;
1441 }
1442}
1443
91447636
A
1444boolean_t vm_map_pmap_is_empty(
1445 vm_map_t map,
1446 vm_map_offset_t start,
1447 vm_map_offset_t end);
1448boolean_t vm_map_pmap_is_empty(
1449 vm_map_t map,
1450 vm_map_offset_t start,
1451 vm_map_offset_t end)
1452{
2d21ac55
A
1453#ifdef MACHINE_PMAP_IS_EMPTY
1454 return pmap_is_empty(map->pmap, start, end);
1455#else /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1456 vm_map_offset_t offset;
1457 ppnum_t phys_page;
1458
1459 if (map->pmap == NULL) {
1460 return TRUE;
1461 }
2d21ac55 1462
91447636
A
1463 for (offset = start;
1464 offset < end;
1465 offset += PAGE_SIZE) {
1466 phys_page = pmap_find_phys(map->pmap, offset);
1467 if (phys_page) {
1468 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1469 "page %d at 0x%llx\n",
2d21ac55
A
1470 map, (long long)start, (long long)end,
1471 phys_page, (long long)offset);
91447636
A
1472 return FALSE;
1473 }
1474 }
1475 return TRUE;
2d21ac55 1476#endif /* MACHINE_PMAP_IS_EMPTY */
91447636
A
1477}
1478
316670eb
A
1479#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1480kern_return_t
1481vm_map_random_address_for_size(
1482 vm_map_t map,
1483 vm_map_offset_t *address,
1484 vm_map_size_t size)
1485{
1486 kern_return_t kr = KERN_SUCCESS;
1487 int tries = 0;
1488 vm_map_offset_t random_addr = 0;
1489 vm_map_offset_t hole_end;
1490
1491 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1492 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1493 vm_map_size_t vm_hole_size = 0;
1494 vm_map_size_t addr_space_size;
1495
1496 addr_space_size = vm_map_max(map) - vm_map_min(map);
1497
1498 assert(page_aligned(size));
1499
1500 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1501 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
39236c6e
A
1502 random_addr = vm_map_trunc_page(
1503 vm_map_min(map) +(random_addr % addr_space_size),
1504 VM_MAP_PAGE_MASK(map));
316670eb
A
1505
1506 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1507 if (prev_entry == vm_map_to_entry(map)) {
1508 next_entry = vm_map_first_entry(map);
1509 } else {
1510 next_entry = prev_entry->vme_next;
1511 }
1512 if (next_entry == vm_map_to_entry(map)) {
1513 hole_end = vm_map_max(map);
1514 } else {
1515 hole_end = next_entry->vme_start;
1516 }
1517 vm_hole_size = hole_end - random_addr;
1518 if (vm_hole_size >= size) {
1519 *address = random_addr;
1520 break;
1521 }
1522 }
1523 tries++;
1524 }
1525
1526 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1527 kr = KERN_NO_SPACE;
1528 }
1529 return kr;
1530}
1531
1c79356b
A
1532/*
1533 * Routine: vm_map_enter
1534 *
1535 * Description:
1536 * Allocate a range in the specified virtual address map.
1537 * The resulting range will refer to memory defined by
1538 * the given memory object and offset into that object.
1539 *
1540 * Arguments are as defined in the vm_map call.
1541 */
91447636
A
1542int _map_enter_debug = 0;
1543static unsigned int vm_map_enter_restore_successes = 0;
1544static unsigned int vm_map_enter_restore_failures = 0;
1c79356b
A
1545kern_return_t
1546vm_map_enter(
91447636 1547 vm_map_t map,
593a1d5f 1548 vm_map_offset_t *address, /* IN/OUT */
91447636 1549 vm_map_size_t size,
593a1d5f 1550 vm_map_offset_t mask,
1c79356b
A
1551 int flags,
1552 vm_object_t object,
1553 vm_object_offset_t offset,
1554 boolean_t needs_copy,
1555 vm_prot_t cur_protection,
1556 vm_prot_t max_protection,
1557 vm_inherit_t inheritance)
1558{
91447636 1559 vm_map_entry_t entry, new_entry;
2d21ac55 1560 vm_map_offset_t start, tmp_start, tmp_offset;
91447636 1561 vm_map_offset_t end, tmp_end;
b0d623f7
A
1562 vm_map_offset_t tmp2_start, tmp2_end;
1563 vm_map_offset_t step;
1c79356b 1564 kern_return_t result = KERN_SUCCESS;
91447636
A
1565 vm_map_t zap_old_map = VM_MAP_NULL;
1566 vm_map_t zap_new_map = VM_MAP_NULL;
1567 boolean_t map_locked = FALSE;
1568 boolean_t pmap_empty = TRUE;
1569 boolean_t new_mapping_established = FALSE;
1570 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1571 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1572 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2d21ac55
A
1573 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1574 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
b0d623f7 1575 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
316670eb 1576 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
b0d623f7 1577 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1c79356b 1578 char alias;
2d21ac55 1579 vm_map_offset_t effective_min_offset, effective_max_offset;
593a1d5f 1580 kern_return_t kr;
39236c6e 1581 boolean_t clear_map_aligned = FALSE;
593a1d5f 1582
b0d623f7
A
1583 if (superpage_size) {
1584 switch (superpage_size) {
1585 /*
1586 * Note that the current implementation only supports
1587 * a single size for superpages, SUPERPAGE_SIZE, per
1588 * architecture. As soon as more sizes are supposed
1589 * to be supported, SUPERPAGE_SIZE has to be replaced
1590 * with a lookup of the size depending on superpage_size.
1591 */
1592#ifdef __x86_64__
6d2010ae
A
1593 case SUPERPAGE_SIZE_ANY:
1594 /* handle it like 2 MB and round up to page size */
1595 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
b0d623f7
A
1596 case SUPERPAGE_SIZE_2MB:
1597 break;
1598#endif
1599 default:
1600 return KERN_INVALID_ARGUMENT;
1601 }
1602 mask = SUPERPAGE_SIZE-1;
1603 if (size & (SUPERPAGE_SIZE-1))
1604 return KERN_INVALID_ARGUMENT;
1605 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1606 }
1607
6d2010ae 1608
1c79356b 1609
2d21ac55
A
1610 if (is_submap) {
1611 if (purgable) {
1612 /* submaps can not be purgeable */
1613 return KERN_INVALID_ARGUMENT;
1614 }
1615 if (object == VM_OBJECT_NULL) {
1616 /* submaps can not be created lazily */
1617 return KERN_INVALID_ARGUMENT;
1618 }
1619 }
1620 if (flags & VM_FLAGS_ALREADY) {
1621 /*
1622 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1623 * is already present. For it to be meaningul, the requested
1624 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1625 * we shouldn't try and remove what was mapped there first
1626 * (!VM_FLAGS_OVERWRITE).
1627 */
1628 if ((flags & VM_FLAGS_ANYWHERE) ||
1629 (flags & VM_FLAGS_OVERWRITE)) {
1630 return KERN_INVALID_ARGUMENT;
1631 }
1632 }
1633
6d2010ae 1634 effective_min_offset = map->min_offset;
b0d623f7 1635
2d21ac55
A
1636 if (flags & VM_FLAGS_BEYOND_MAX) {
1637 /*
b0d623f7 1638 * Allow an insertion beyond the map's max offset.
2d21ac55
A
1639 */
1640 if (vm_map_is_64bit(map))
1641 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1642 else
1643 effective_max_offset = 0x00000000FFFFF000ULL;
1644 } else {
1645 effective_max_offset = map->max_offset;
1646 }
1647
1648 if (size == 0 ||
1649 (offset & PAGE_MASK_64) != 0) {
91447636
A
1650 *address = 0;
1651 return KERN_INVALID_ARGUMENT;
1652 }
1653
1c79356b 1654 VM_GET_FLAGS_ALIAS(flags, alias);
2d21ac55 1655
1c79356b
A
1656#define RETURN(value) { result = value; goto BailOut; }
1657
1658 assert(page_aligned(*address));
1659 assert(page_aligned(size));
91447636 1660
39236c6e
A
1661 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1662 /*
1663 * In most cases, the caller rounds the size up to the
1664 * map's page size.
1665 * If we get a size that is explicitly not map-aligned here,
1666 * we'll have to respect the caller's wish and mark the
1667 * mapping as "not map-aligned" to avoid tripping the
1668 * map alignment checks later.
1669 */
1670 clear_map_aligned = TRUE;
1671 }
1672
91447636
A
1673 /*
1674 * Only zero-fill objects are allowed to be purgable.
1675 * LP64todo - limit purgable objects to 32-bits for now
1676 */
1677 if (purgable &&
1678 (offset != 0 ||
1679 (object != VM_OBJECT_NULL &&
6d2010ae 1680 (object->vo_size != size ||
2d21ac55 1681 object->purgable == VM_PURGABLE_DENY))
b0d623f7 1682 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
91447636
A
1683 return KERN_INVALID_ARGUMENT;
1684
1685 if (!anywhere && overwrite) {
1686 /*
1687 * Create a temporary VM map to hold the old mappings in the
1688 * affected area while we create the new one.
1689 * This avoids releasing the VM map lock in
1690 * vm_map_entry_delete() and allows atomicity
1691 * when we want to replace some mappings with a new one.
1692 * It also allows us to restore the old VM mappings if the
1693 * new mapping fails.
1694 */
1695 zap_old_map = vm_map_create(PMAP_NULL,
1696 *address,
1697 *address + size,
b0d623f7 1698 map->hdr.entries_pageable);
39236c6e 1699 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
91447636
A
1700 }
1701
2d21ac55 1702StartAgain: ;
1c79356b
A
1703
1704 start = *address;
1705
1706 if (anywhere) {
1707 vm_map_lock(map);
91447636 1708 map_locked = TRUE;
6d2010ae 1709
316670eb
A
1710 if (entry_for_jit) {
1711 if (map->jit_entry_exists) {
1712 result = KERN_INVALID_ARGUMENT;
1713 goto BailOut;
1714 }
1715 /*
1716 * Get a random start address.
1717 */
1718 result = vm_map_random_address_for_size(map, address, size);
1719 if (result != KERN_SUCCESS) {
1720 goto BailOut;
1721 }
1722 start = *address;
6d2010ae 1723 }
1c79356b 1724
316670eb 1725
1c79356b
A
1726 /*
1727 * Calculate the first possible address.
1728 */
1729
2d21ac55
A
1730 if (start < effective_min_offset)
1731 start = effective_min_offset;
1732 if (start > effective_max_offset)
1c79356b
A
1733 RETURN(KERN_NO_SPACE);
1734
1735 /*
1736 * Look for the first possible address;
1737 * if there's already something at this
1738 * address, we have to start after it.
1739 */
1740
6d2010ae
A
1741 if( map->disable_vmentry_reuse == TRUE) {
1742 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1c79356b 1743 } else {
6d2010ae
A
1744 assert(first_free_is_valid(map));
1745
1746 entry = map->first_free;
1747
1748 if (entry == vm_map_to_entry(map)) {
1749 entry = NULL;
1750 } else {
1751 if (entry->vme_next == vm_map_to_entry(map)){
1752 /*
1753 * Hole at the end of the map.
1754 */
1755 entry = NULL;
1756 } else {
1757 if (start < (entry->vme_next)->vme_start ) {
1758 start = entry->vme_end;
39236c6e
A
1759 start = vm_map_round_page(start,
1760 VM_MAP_PAGE_MASK(map));
6d2010ae
A
1761 } else {
1762 /*
1763 * Need to do a lookup.
1764 */
1765 entry = NULL;
1766 }
1767 }
1768 }
1769
1770 if (entry == NULL) {
1771 vm_map_entry_t tmp_entry;
316670eb
A
1772 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1773 assert(!entry_for_jit);
6d2010ae 1774 start = tmp_entry->vme_end;
39236c6e
A
1775 start = vm_map_round_page(start,
1776 VM_MAP_PAGE_MASK(map));
316670eb 1777 }
6d2010ae
A
1778 entry = tmp_entry;
1779 }
1c79356b
A
1780 }
1781
1782 /*
1783 * In any case, the "entry" always precedes
1784 * the proposed new region throughout the
1785 * loop:
1786 */
1787
1788 while (TRUE) {
1789 register vm_map_entry_t next;
1790
2d21ac55 1791 /*
1c79356b
A
1792 * Find the end of the proposed new region.
1793 * Be sure we didn't go beyond the end, or
1794 * wrap around the address.
1795 */
1796
1797 end = ((start + mask) & ~mask);
39236c6e
A
1798 end = vm_map_round_page(end,
1799 VM_MAP_PAGE_MASK(map));
1c79356b
A
1800 if (end < start)
1801 RETURN(KERN_NO_SPACE);
1802 start = end;
39236c6e
A
1803 assert(VM_MAP_PAGE_ALIGNED(start,
1804 VM_MAP_PAGE_MASK(map)));
1c79356b
A
1805 end += size;
1806
2d21ac55 1807 if ((end > effective_max_offset) || (end < start)) {
1c79356b 1808 if (map->wait_for_space) {
2d21ac55
A
1809 if (size <= (effective_max_offset -
1810 effective_min_offset)) {
1c79356b
A
1811 assert_wait((event_t)map,
1812 THREAD_ABORTSAFE);
1813 vm_map_unlock(map);
91447636
A
1814 map_locked = FALSE;
1815 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
1816 goto StartAgain;
1817 }
1818 }
1819 RETURN(KERN_NO_SPACE);
1820 }
1821
1822 /*
1823 * If there are no more entries, we must win.
1824 */
1825
1826 next = entry->vme_next;
1827 if (next == vm_map_to_entry(map))
1828 break;
1829
1830 /*
1831 * If there is another entry, it must be
1832 * after the end of the potential new region.
1833 */
1834
1835 if (next->vme_start >= end)
1836 break;
1837
1838 /*
1839 * Didn't fit -- move to the next entry.
1840 */
1841
1842 entry = next;
1843 start = entry->vme_end;
39236c6e
A
1844 start = vm_map_round_page(start,
1845 VM_MAP_PAGE_MASK(map));
1c79356b
A
1846 }
1847 *address = start;
39236c6e
A
1848 assert(VM_MAP_PAGE_ALIGNED(*address,
1849 VM_MAP_PAGE_MASK(map)));
1c79356b 1850 } else {
1c79356b
A
1851 /*
1852 * Verify that:
1853 * the address doesn't itself violate
1854 * the mask requirement.
1855 */
1856
1857 vm_map_lock(map);
91447636 1858 map_locked = TRUE;
1c79356b
A
1859 if ((start & mask) != 0)
1860 RETURN(KERN_NO_SPACE);
1861
1862 /*
1863 * ... the address is within bounds
1864 */
1865
1866 end = start + size;
1867
2d21ac55
A
1868 if ((start < effective_min_offset) ||
1869 (end > effective_max_offset) ||
1c79356b
A
1870 (start >= end)) {
1871 RETURN(KERN_INVALID_ADDRESS);
1872 }
1873
91447636
A
1874 if (overwrite && zap_old_map != VM_MAP_NULL) {
1875 /*
1876 * Fixed mapping and "overwrite" flag: attempt to
1877 * remove all existing mappings in the specified
1878 * address range, saving them in our "zap_old_map".
1879 */
1880 (void) vm_map_delete(map, start, end,
1881 VM_MAP_REMOVE_SAVE_ENTRIES,
1882 zap_old_map);
1883 }
1884
1c79356b
A
1885 /*
1886 * ... the starting address isn't allocated
1887 */
1888
2d21ac55
A
1889 if (vm_map_lookup_entry(map, start, &entry)) {
1890 if (! (flags & VM_FLAGS_ALREADY)) {
1891 RETURN(KERN_NO_SPACE);
1892 }
1893 /*
1894 * Check if what's already there is what we want.
1895 */
1896 tmp_start = start;
1897 tmp_offset = offset;
1898 if (entry->vme_start < start) {
1899 tmp_start -= start - entry->vme_start;
1900 tmp_offset -= start - entry->vme_start;
1901
1902 }
1903 for (; entry->vme_start < end;
1904 entry = entry->vme_next) {
4a3eedf9
A
1905 /*
1906 * Check if the mapping's attributes
1907 * match the existing map entry.
1908 */
2d21ac55
A
1909 if (entry == vm_map_to_entry(map) ||
1910 entry->vme_start != tmp_start ||
1911 entry->is_sub_map != is_submap ||
2d21ac55
A
1912 entry->offset != tmp_offset ||
1913 entry->needs_copy != needs_copy ||
1914 entry->protection != cur_protection ||
1915 entry->max_protection != max_protection ||
1916 entry->inheritance != inheritance ||
1917 entry->alias != alias) {
1918 /* not the same mapping ! */
1919 RETURN(KERN_NO_SPACE);
1920 }
4a3eedf9
A
1921 /*
1922 * Check if the same object is being mapped.
1923 */
1924 if (is_submap) {
1925 if (entry->object.sub_map !=
1926 (vm_map_t) object) {
1927 /* not the same submap */
1928 RETURN(KERN_NO_SPACE);
1929 }
1930 } else {
1931 if (entry->object.vm_object != object) {
1932 /* not the same VM object... */
1933 vm_object_t obj2;
1934
1935 obj2 = entry->object.vm_object;
1936 if ((obj2 == VM_OBJECT_NULL ||
1937 obj2->internal) &&
1938 (object == VM_OBJECT_NULL ||
1939 object->internal)) {
1940 /*
1941 * ... but both are
1942 * anonymous memory,
1943 * so equivalent.
1944 */
1945 } else {
1946 RETURN(KERN_NO_SPACE);
1947 }
1948 }
1949 }
1950
2d21ac55
A
1951 tmp_offset += entry->vme_end - entry->vme_start;
1952 tmp_start += entry->vme_end - entry->vme_start;
1953 if (entry->vme_end >= end) {
1954 /* reached the end of our mapping */
1955 break;
1956 }
1957 }
1958 /* it all matches: let's use what's already there ! */
1959 RETURN(KERN_MEMORY_PRESENT);
1960 }
1c79356b
A
1961
1962 /*
1963 * ... the next region doesn't overlap the
1964 * end point.
1965 */
1966
1967 if ((entry->vme_next != vm_map_to_entry(map)) &&
1968 (entry->vme_next->vme_start < end))
1969 RETURN(KERN_NO_SPACE);
1970 }
1971
1972 /*
1973 * At this point,
1974 * "start" and "end" should define the endpoints of the
1975 * available new range, and
1976 * "entry" should refer to the region before the new
1977 * range, and
1978 *
1979 * the map should be locked.
1980 */
1981
1982 /*
1983 * See whether we can avoid creating a new entry (and object) by
1984 * extending one of our neighbors. [So far, we only attempt to
91447636
A
1985 * extend from below.] Note that we can never extend/join
1986 * purgable objects because they need to remain distinct
1987 * entities in order to implement their "volatile object"
1988 * semantics.
1c79356b
A
1989 */
1990
316670eb 1991 if (purgable || entry_for_jit) {
91447636
A
1992 if (object == VM_OBJECT_NULL) {
1993 object = vm_object_allocate(size);
1994 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
316670eb
A
1995 if (purgable) {
1996 object->purgable = VM_PURGABLE_NONVOLATILE;
1997 }
91447636
A
1998 offset = (vm_object_offset_t)0;
1999 }
2d21ac55
A
2000 } else if ((is_submap == FALSE) &&
2001 (object == VM_OBJECT_NULL) &&
2002 (entry != vm_map_to_entry(map)) &&
2003 (entry->vme_end == start) &&
2004 (!entry->is_shared) &&
2005 (!entry->is_sub_map) &&
6d2010ae 2006 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2d21ac55
A
2007 (entry->inheritance == inheritance) &&
2008 (entry->protection == cur_protection) &&
2009 (entry->max_protection == max_protection) &&
2010 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2011 (entry->in_transition == 0) &&
2012 (entry->no_cache == no_cache) &&
39236c6e
A
2013 /*
2014 * No coalescing if not map-aligned, to avoid propagating
2015 * that condition any further than needed:
2016 */
2017 (!entry->map_aligned || !clear_map_aligned) &&
b0d623f7
A
2018 ((entry->vme_end - entry->vme_start) + size <=
2019 (alias == VM_MEMORY_REALLOC ?
2020 ANON_CHUNK_SIZE :
2021 NO_COALESCE_LIMIT)) &&
2d21ac55 2022 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1c79356b 2023 if (vm_object_coalesce(entry->object.vm_object,
2d21ac55
A
2024 VM_OBJECT_NULL,
2025 entry->offset,
2026 (vm_object_offset_t) 0,
2027 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2028 (vm_map_size_t)(end - entry->vme_end))) {
1c79356b
A
2029
2030 /*
2031 * Coalesced the two objects - can extend
2032 * the previous map entry to include the
2033 * new range.
2034 */
2035 map->size += (end - entry->vme_end);
e2d2fc5c 2036 assert(entry->vme_start < end);
39236c6e
A
2037 assert(VM_MAP_PAGE_ALIGNED(end,
2038 VM_MAP_PAGE_MASK(map)));
1c79356b 2039 entry->vme_end = end;
6d2010ae 2040 vm_map_store_update_first_free(map, map->first_free);
1c79356b
A
2041 RETURN(KERN_SUCCESS);
2042 }
2043 }
2044
b0d623f7
A
2045 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2046 new_entry = NULL;
2047
2048 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2049 tmp2_end = tmp2_start + step;
2050 /*
2051 * Create a new entry
2052 * LP64todo - for now, we can only allocate 4GB internal objects
2053 * because the default pager can't page bigger ones. Remove this
2054 * when it can.
2055 *
2056 * XXX FBDP
2057 * The reserved "page zero" in each process's address space can
2058 * be arbitrarily large. Splitting it into separate 4GB objects and
2059 * therefore different VM map entries serves no purpose and just
2060 * slows down operations on the VM map, so let's not split the
2061 * allocation into 4GB chunks if the max protection is NONE. That
2062 * memory should never be accessible, so it will never get to the
2063 * default pager.
2064 */
2065 tmp_start = tmp2_start;
2066 if (object == VM_OBJECT_NULL &&
2067 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2068 max_protection != VM_PROT_NONE &&
2069 superpage_size == 0)
2070 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2071 else
2072 tmp_end = tmp2_end;
2073 do {
2074 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2075 object, offset, needs_copy,
2076 FALSE, FALSE,
2077 cur_protection, max_protection,
2078 VM_BEHAVIOR_DEFAULT,
316670eb 2079 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
6d2010ae 2080 0, no_cache,
39236c6e
A
2081 permanent,
2082 superpage_size,
2083 clear_map_aligned);
b0d623f7 2084 new_entry->alias = alias;
316670eb 2085 if (entry_for_jit){
6d2010ae
A
2086 if (!(map->jit_entry_exists)){
2087 new_entry->used_for_jit = TRUE;
2088 map->jit_entry_exists = TRUE;
2089 }
2090 }
2091
b0d623f7
A
2092 if (is_submap) {
2093 vm_map_t submap;
2094 boolean_t submap_is_64bit;
2095 boolean_t use_pmap;
2096
2097 new_entry->is_sub_map = TRUE;
2098 submap = (vm_map_t) object;
2099 submap_is_64bit = vm_map_is_64bit(submap);
2100 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2101 #ifndef NO_NESTED_PMAP
2102 if (use_pmap && submap->pmap == NULL) {
316670eb 2103 ledger_t ledger = map->pmap->ledger;
b0d623f7 2104 /* we need a sub pmap to nest... */
316670eb
A
2105 submap->pmap = pmap_create(ledger, 0,
2106 submap_is_64bit);
b0d623f7
A
2107 if (submap->pmap == NULL) {
2108 /* let's proceed without nesting... */
2109 }
2d21ac55 2110 }
b0d623f7
A
2111 if (use_pmap && submap->pmap != NULL) {
2112 kr = pmap_nest(map->pmap,
2113 submap->pmap,
2114 tmp_start,
2115 tmp_start,
2116 tmp_end - tmp_start);
2117 if (kr != KERN_SUCCESS) {
2118 printf("vm_map_enter: "
2119 "pmap_nest(0x%llx,0x%llx) "
2120 "error 0x%x\n",
2121 (long long)tmp_start,
2122 (long long)tmp_end,
2123 kr);
2124 } else {
2125 /* we're now nested ! */
2126 new_entry->use_pmap = TRUE;
2127 pmap_empty = FALSE;
2128 }
2129 }
2130 #endif /* NO_NESTED_PMAP */
2d21ac55 2131 }
b0d623f7
A
2132 entry = new_entry;
2133
2134 if (superpage_size) {
2135 vm_page_t pages, m;
2136 vm_object_t sp_object;
2137
2138 entry->offset = 0;
2139
2140 /* allocate one superpage */
2141 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2d21ac55 2142 if (kr != KERN_SUCCESS) {
b0d623f7
A
2143 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2144 RETURN(kr);
2145 }
2146
2147 /* create one vm_object per superpage */
2148 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2149 sp_object->phys_contiguous = TRUE;
6d2010ae 2150 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
b0d623f7
A
2151 entry->object.vm_object = sp_object;
2152
2153 /* enter the base pages into the object */
2154 vm_object_lock(sp_object);
2155 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2156 m = pages;
2157 pmap_zero_page(m->phys_page);
2158 pages = NEXT_PAGE(m);
2159 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2160 vm_page_insert(m, sp_object, offset);
2d21ac55 2161 }
b0d623f7 2162 vm_object_unlock(sp_object);
2d21ac55 2163 }
b0d623f7
A
2164 } while (tmp_end != tmp2_end &&
2165 (tmp_start = tmp_end) &&
2166 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2167 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2168 }
91447636 2169
1c79356b 2170 vm_map_unlock(map);
91447636
A
2171 map_locked = FALSE;
2172
2173 new_mapping_established = TRUE;
1c79356b
A
2174
2175 /* Wire down the new entry if the user
2176 * requested all new map entries be wired.
2177 */
b0d623f7 2178 if ((map->wiring_required)||(superpage_size)) {
91447636 2179 pmap_empty = FALSE; /* pmap won't be empty */
316670eb 2180 kr = vm_map_wire(map, start, end,
91447636 2181 new_entry->protection, TRUE);
316670eb 2182 RETURN(kr);
1c79356b
A
2183 }
2184
2185 if ((object != VM_OBJECT_NULL) &&
2186 (vm_map_pmap_enter_enable) &&
2187 (!anywhere) &&
2188 (!needs_copy) &&
2189 (size < (128*1024))) {
91447636 2190 pmap_empty = FALSE; /* pmap won't be empty */
0c530ab8 2191
2d21ac55 2192 if (override_nx(map, alias) && cur_protection)
0c530ab8 2193 cur_protection |= VM_PROT_EXECUTE;
2d21ac55 2194
1c79356b
A
2195 vm_map_pmap_enter(map, start, end,
2196 object, offset, cur_protection);
2197 }
2198
2d21ac55 2199BailOut: ;
593a1d5f
A
2200 if (result == KERN_SUCCESS) {
2201 vm_prot_t pager_prot;
2202 memory_object_t pager;
91447636 2203
593a1d5f
A
2204 if (pmap_empty &&
2205 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2206 assert(vm_map_pmap_is_empty(map,
2207 *address,
2208 *address+size));
2209 }
2210
2211 /*
2212 * For "named" VM objects, let the pager know that the
2213 * memory object is being mapped. Some pagers need to keep
2214 * track of this, to know when they can reclaim the memory
2215 * object, for example.
2216 * VM calls memory_object_map() for each mapping (specifying
2217 * the protection of each mapping) and calls
2218 * memory_object_last_unmap() when all the mappings are gone.
2219 */
2220 pager_prot = max_protection;
2221 if (needs_copy) {
2222 /*
2223 * Copy-On-Write mapping: won't modify
2224 * the memory object.
2225 */
2226 pager_prot &= ~VM_PROT_WRITE;
2227 }
2228 if (!is_submap &&
2229 object != VM_OBJECT_NULL &&
2230 object->named &&
2231 object->pager != MEMORY_OBJECT_NULL) {
2232 vm_object_lock(object);
2233 pager = object->pager;
2234 if (object->named &&
2235 pager != MEMORY_OBJECT_NULL) {
2236 assert(object->pager_ready);
2237 vm_object_mapping_wait(object, THREAD_UNINT);
2238 vm_object_mapping_begin(object);
2239 vm_object_unlock(object);
2240
2241 kr = memory_object_map(pager, pager_prot);
2242 assert(kr == KERN_SUCCESS);
2243
2244 vm_object_lock(object);
2245 vm_object_mapping_end(object);
2246 }
2247 vm_object_unlock(object);
2248 }
2249 } else {
91447636
A
2250 if (new_mapping_established) {
2251 /*
2252 * We have to get rid of the new mappings since we
2253 * won't make them available to the user.
2254 * Try and do that atomically, to minimize the risk
2255 * that someone else create new mappings that range.
2256 */
2257 zap_new_map = vm_map_create(PMAP_NULL,
2258 *address,
2259 *address + size,
b0d623f7 2260 map->hdr.entries_pageable);
39236c6e
A
2261 vm_map_set_page_shift(zap_new_map,
2262 VM_MAP_PAGE_SHIFT(map));
91447636
A
2263 if (!map_locked) {
2264 vm_map_lock(map);
2265 map_locked = TRUE;
2266 }
2267 (void) vm_map_delete(map, *address, *address+size,
2268 VM_MAP_REMOVE_SAVE_ENTRIES,
2269 zap_new_map);
2270 }
2271 if (zap_old_map != VM_MAP_NULL &&
2272 zap_old_map->hdr.nentries != 0) {
2273 vm_map_entry_t entry1, entry2;
2274
2275 /*
2276 * The new mapping failed. Attempt to restore
2277 * the old mappings, saved in the "zap_old_map".
2278 */
2279 if (!map_locked) {
2280 vm_map_lock(map);
2281 map_locked = TRUE;
2282 }
2283
2284 /* first check if the coast is still clear */
2285 start = vm_map_first_entry(zap_old_map)->vme_start;
2286 end = vm_map_last_entry(zap_old_map)->vme_end;
2287 if (vm_map_lookup_entry(map, start, &entry1) ||
2288 vm_map_lookup_entry(map, end, &entry2) ||
2289 entry1 != entry2) {
2290 /*
2291 * Part of that range has already been
2292 * re-mapped: we can't restore the old
2293 * mappings...
2294 */
2295 vm_map_enter_restore_failures++;
2296 } else {
2297 /*
2298 * Transfer the saved map entries from
2299 * "zap_old_map" to the original "map",
2300 * inserting them all after "entry1".
2301 */
2302 for (entry2 = vm_map_first_entry(zap_old_map);
2303 entry2 != vm_map_to_entry(zap_old_map);
2304 entry2 = vm_map_first_entry(zap_old_map)) {
2d21ac55
A
2305 vm_map_size_t entry_size;
2306
2307 entry_size = (entry2->vme_end -
2308 entry2->vme_start);
6d2010ae 2309 vm_map_store_entry_unlink(zap_old_map,
91447636 2310 entry2);
2d21ac55 2311 zap_old_map->size -= entry_size;
6d2010ae 2312 vm_map_store_entry_link(map, entry1, entry2);
2d21ac55 2313 map->size += entry_size;
91447636
A
2314 entry1 = entry2;
2315 }
2316 if (map->wiring_required) {
2317 /*
2318 * XXX TODO: we should rewire the
2319 * old pages here...
2320 */
2321 }
2322 vm_map_enter_restore_successes++;
2323 }
2324 }
2325 }
2326
2327 if (map_locked) {
2328 vm_map_unlock(map);
2329 }
2330
2331 /*
2332 * Get rid of the "zap_maps" and all the map entries that
2333 * they may still contain.
2334 */
2335 if (zap_old_map != VM_MAP_NULL) {
2d21ac55 2336 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2337 zap_old_map = VM_MAP_NULL;
2338 }
2339 if (zap_new_map != VM_MAP_NULL) {
2d21ac55 2340 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
91447636
A
2341 zap_new_map = VM_MAP_NULL;
2342 }
2343
2344 return result;
1c79356b
A
2345
2346#undef RETURN
2347}
2348
91447636 2349kern_return_t
2d21ac55
A
2350vm_map_enter_mem_object(
2351 vm_map_t target_map,
2352 vm_map_offset_t *address,
2353 vm_map_size_t initial_size,
2354 vm_map_offset_t mask,
2355 int flags,
2356 ipc_port_t port,
2357 vm_object_offset_t offset,
2358 boolean_t copy,
2359 vm_prot_t cur_protection,
2360 vm_prot_t max_protection,
2361 vm_inherit_t inheritance)
91447636 2362{
2d21ac55
A
2363 vm_map_address_t map_addr;
2364 vm_map_size_t map_size;
2365 vm_object_t object;
2366 vm_object_size_t size;
2367 kern_return_t result;
6d2010ae 2368 boolean_t mask_cur_protection, mask_max_protection;
39236c6e 2369 vm_map_offset_t offset_in_mapping;
6d2010ae
A
2370
2371 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2372 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2373 cur_protection &= ~VM_PROT_IS_MASK;
2374 max_protection &= ~VM_PROT_IS_MASK;
91447636
A
2375
2376 /*
2d21ac55 2377 * Check arguments for validity
91447636 2378 */
2d21ac55
A
2379 if ((target_map == VM_MAP_NULL) ||
2380 (cur_protection & ~VM_PROT_ALL) ||
2381 (max_protection & ~VM_PROT_ALL) ||
2382 (inheritance > VM_INHERIT_LAST_VALID) ||
2383 initial_size == 0)
2384 return KERN_INVALID_ARGUMENT;
6d2010ae 2385
39236c6e
A
2386 map_addr = vm_map_trunc_page(*address,
2387 VM_MAP_PAGE_MASK(target_map));
2388 map_size = vm_map_round_page(initial_size,
2389 VM_MAP_PAGE_MASK(target_map));
2390 size = vm_object_round_page(initial_size);
593a1d5f 2391
2d21ac55
A
2392 /*
2393 * Find the vm object (if any) corresponding to this port.
2394 */
2395 if (!IP_VALID(port)) {
2396 object = VM_OBJECT_NULL;
2397 offset = 0;
2398 copy = FALSE;
2399 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2400 vm_named_entry_t named_entry;
2401
2402 named_entry = (vm_named_entry_t) port->ip_kobject;
39236c6e
A
2403
2404 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2405 offset += named_entry->data_offset;
2406 }
2407
2d21ac55
A
2408 /* a few checks to make sure user is obeying rules */
2409 if (size == 0) {
2410 if (offset >= named_entry->size)
2411 return KERN_INVALID_RIGHT;
2412 size = named_entry->size - offset;
2413 }
6d2010ae
A
2414 if (mask_max_protection) {
2415 max_protection &= named_entry->protection;
2416 }
2417 if (mask_cur_protection) {
2418 cur_protection &= named_entry->protection;
2419 }
2d21ac55
A
2420 if ((named_entry->protection & max_protection) !=
2421 max_protection)
2422 return KERN_INVALID_RIGHT;
2423 if ((named_entry->protection & cur_protection) !=
2424 cur_protection)
2425 return KERN_INVALID_RIGHT;
22ba694c
A
2426 if (offset + size < offset) {
2427 /* overflow */
2428 return KERN_INVALID_ARGUMENT;
2429 }
2d21ac55
A
2430 if (named_entry->size < (offset + size))
2431 return KERN_INVALID_ARGUMENT;
2432
39236c6e
A
2433 if (named_entry->is_copy) {
2434 /* for a vm_map_copy, we can only map it whole */
2435 if ((size != named_entry->size) &&
2436 (vm_map_round_page(size,
2437 VM_MAP_PAGE_MASK(target_map)) ==
2438 named_entry->size)) {
2439 /* XXX FBDP use the rounded size... */
2440 size = vm_map_round_page(
2441 size,
2442 VM_MAP_PAGE_MASK(target_map));
2443 }
2444
2445 if (offset != 0 ||
2446 size != named_entry->size) {
2447 return KERN_INVALID_ARGUMENT;
2448 }
2449 }
2450
2d21ac55
A
2451 /* the callers parameter offset is defined to be the */
2452 /* offset from beginning of named entry offset in object */
2453 offset = offset + named_entry->offset;
2454
39236c6e
A
2455 if (! VM_MAP_PAGE_ALIGNED(size,
2456 VM_MAP_PAGE_MASK(target_map))) {
2457 /*
2458 * Let's not map more than requested;
2459 * vm_map_enter() will handle this "not map-aligned"
2460 * case.
2461 */
2462 map_size = size;
2463 }
2464
2d21ac55
A
2465 named_entry_lock(named_entry);
2466 if (named_entry->is_sub_map) {
2467 vm_map_t submap;
2468
39236c6e
A
2469 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2470 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2471 }
2472
2d21ac55
A
2473 submap = named_entry->backing.map;
2474 vm_map_lock(submap);
2475 vm_map_reference(submap);
2476 vm_map_unlock(submap);
2477 named_entry_unlock(named_entry);
2478
2479 result = vm_map_enter(target_map,
2480 &map_addr,
2481 map_size,
2482 mask,
2483 flags | VM_FLAGS_SUBMAP,
2484 (vm_object_t) submap,
2485 offset,
2486 copy,
2487 cur_protection,
2488 max_protection,
2489 inheritance);
2490 if (result != KERN_SUCCESS) {
2491 vm_map_deallocate(submap);
2492 } else {
2493 /*
2494 * No need to lock "submap" just to check its
2495 * "mapped" flag: that flag is never reset
2496 * once it's been set and if we race, we'll
2497 * just end up setting it twice, which is OK.
2498 */
316670eb
A
2499 if (submap->mapped_in_other_pmaps == FALSE &&
2500 vm_map_pmap(submap) != PMAP_NULL &&
2501 vm_map_pmap(submap) !=
2502 vm_map_pmap(target_map)) {
2d21ac55 2503 /*
316670eb
A
2504 * This submap is being mapped in a map
2505 * that uses a different pmap.
2506 * Set its "mapped_in_other_pmaps" flag
2507 * to indicate that we now need to
2508 * remove mappings from all pmaps rather
2509 * than just the submap's pmap.
2d21ac55
A
2510 */
2511 vm_map_lock(submap);
316670eb 2512 submap->mapped_in_other_pmaps = TRUE;
2d21ac55
A
2513 vm_map_unlock(submap);
2514 }
2515 *address = map_addr;
2516 }
2517 return result;
2518
2519 } else if (named_entry->is_pager) {
2520 unsigned int access;
2521 vm_prot_t protections;
2522 unsigned int wimg_mode;
2d21ac55
A
2523
2524 protections = named_entry->protection & VM_PROT_ALL;
2525 access = GET_MAP_MEM(named_entry->protection);
2526
39236c6e
A
2527 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2528 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2529 }
2530
2d21ac55
A
2531 object = vm_object_enter(named_entry->backing.pager,
2532 named_entry->size,
2533 named_entry->internal,
2534 FALSE,
2535 FALSE);
2536 if (object == VM_OBJECT_NULL) {
2537 named_entry_unlock(named_entry);
2538 return KERN_INVALID_OBJECT;
2539 }
2540
2541 /* JMM - drop reference on pager here */
2542
2543 /* create an extra ref for the named entry */
2544 vm_object_lock(object);
2545 vm_object_reference_locked(object);
2546 named_entry->backing.object = object;
2547 named_entry->is_pager = FALSE;
2548 named_entry_unlock(named_entry);
2549
2550 wimg_mode = object->wimg_bits;
6d2010ae 2551
2d21ac55
A
2552 if (access == MAP_MEM_IO) {
2553 wimg_mode = VM_WIMG_IO;
2554 } else if (access == MAP_MEM_COPYBACK) {
2555 wimg_mode = VM_WIMG_USE_DEFAULT;
316670eb
A
2556 } else if (access == MAP_MEM_INNERWBACK) {
2557 wimg_mode = VM_WIMG_INNERWBACK;
2d21ac55
A
2558 } else if (access == MAP_MEM_WTHRU) {
2559 wimg_mode = VM_WIMG_WTHRU;
2560 } else if (access == MAP_MEM_WCOMB) {
2561 wimg_mode = VM_WIMG_WCOMB;
2562 }
2d21ac55
A
2563
2564 /* wait for object (if any) to be ready */
2565 if (!named_entry->internal) {
2566 while (!object->pager_ready) {
2567 vm_object_wait(
2568 object,
2569 VM_OBJECT_EVENT_PAGER_READY,
2570 THREAD_UNINT);
2571 vm_object_lock(object);
2572 }
2573 }
2574
6d2010ae
A
2575 if (object->wimg_bits != wimg_mode)
2576 vm_object_change_wimg_mode(object, wimg_mode);
2d21ac55 2577
2d21ac55 2578 object->true_share = TRUE;
6d2010ae 2579
2d21ac55
A
2580 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2581 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2582 vm_object_unlock(object);
39236c6e
A
2583
2584 } else if (named_entry->is_copy) {
2585 kern_return_t kr;
2586 vm_map_copy_t copy_map;
2587 vm_map_entry_t copy_entry;
2588 vm_map_offset_t copy_addr;
2589
2590 if (flags & ~(VM_FLAGS_FIXED |
2591 VM_FLAGS_ANYWHERE |
2592 VM_FLAGS_OVERWRITE |
2593 VM_FLAGS_RETURN_DATA_ADDR)) {
2594 named_entry_unlock(named_entry);
2595 return KERN_INVALID_ARGUMENT;
2596 }
2597
2598 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2599 offset_in_mapping = offset - vm_object_trunc_page(offset);
2600 offset = vm_object_trunc_page(offset);
2601 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2602 }
2603
2604 copy_map = named_entry->backing.copy;
2605 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2606 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2607 /* unsupported type; should not happen */
2608 printf("vm_map_enter_mem_object: "
2609 "memory_entry->backing.copy "
2610 "unsupported type 0x%x\n",
2611 copy_map->type);
2612 named_entry_unlock(named_entry);
2613 return KERN_INVALID_ARGUMENT;
2614 }
2615
2616 /* reserve a contiguous range */
2617 kr = vm_map_enter(target_map,
2618 &map_addr,
2619 map_size,
2620 mask,
2621 flags & (VM_FLAGS_ANYWHERE |
2622 VM_FLAGS_OVERWRITE |
2623 VM_FLAGS_RETURN_DATA_ADDR),
2624 VM_OBJECT_NULL,
2625 0,
2626 FALSE, /* copy */
2627 cur_protection,
2628 max_protection,
2629 inheritance);
2630 if (kr != KERN_SUCCESS) {
2631 named_entry_unlock(named_entry);
2632 return kr;
2633 }
2634
2635 copy_addr = map_addr;
2636
2637 for (copy_entry = vm_map_copy_first_entry(copy_map);
2638 copy_entry != vm_map_copy_to_entry(copy_map);
2639 copy_entry = copy_entry->vme_next) {
2640 int remap_flags = 0;
2641 vm_map_t copy_submap;
2642 vm_object_t copy_object;
2643 vm_map_size_t copy_size;
2644 vm_object_offset_t copy_offset;
2645
2646 copy_offset = copy_entry->offset;
2647 copy_size = (copy_entry->vme_end -
2648 copy_entry->vme_start);
2649
2650 /* sanity check */
2651 if (copy_addr + copy_size >
2652 map_addr + map_size) {
2653 /* over-mapping too much !? */
2654 kr = KERN_INVALID_ARGUMENT;
2655 /* abort */
2656 break;
2657 }
2658
2659 /* take a reference on the object */
2660 if (copy_entry->is_sub_map) {
2661 remap_flags |= VM_FLAGS_SUBMAP;
2662 copy_submap =
2663 copy_entry->object.sub_map;
2664 vm_map_lock(copy_submap);
2665 vm_map_reference(copy_submap);
2666 vm_map_unlock(copy_submap);
2667 copy_object = (vm_object_t) copy_submap;
2668 } else {
2669 copy_object =
2670 copy_entry->object.vm_object;
2671 vm_object_reference(copy_object);
2672 }
2673
2674 /* over-map the object into destination */
2675 remap_flags |= flags;
2676 remap_flags |= VM_FLAGS_FIXED;
2677 remap_flags |= VM_FLAGS_OVERWRITE;
2678 remap_flags &= ~VM_FLAGS_ANYWHERE;
2679 kr = vm_map_enter(target_map,
2680 &copy_addr,
2681 copy_size,
2682 (vm_map_offset_t) 0,
2683 remap_flags,
2684 copy_object,
2685 copy_offset,
2686 copy,
2687 cur_protection,
2688 max_protection,
2689 inheritance);
2690 if (kr != KERN_SUCCESS) {
2691 if (copy_entry->is_sub_map) {
2692 vm_map_deallocate(copy_submap);
2693 } else {
2694 vm_object_deallocate(copy_object);
2695 }
2696 /* abort */
2697 break;
2698 }
2699
2700 /* next mapping */
2701 copy_addr += copy_size;
2702 }
2703
2704 if (kr == KERN_SUCCESS) {
2705 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2706 *address = map_addr + offset_in_mapping;
2707 } else {
2708 *address = map_addr;
2709 }
2710 }
2711 named_entry_unlock(named_entry);
2712
2713 if (kr != KERN_SUCCESS) {
2714 if (! (flags & VM_FLAGS_OVERWRITE)) {
2715 /* deallocate the contiguous range */
2716 (void) vm_deallocate(target_map,
2717 map_addr,
2718 map_size);
2719 }
2720 }
2721
2722 return kr;
2723
2d21ac55
A
2724 } else {
2725 /* This is the case where we are going to map */
2726 /* an already mapped object. If the object is */
2727 /* not ready it is internal. An external */
2728 /* object cannot be mapped until it is ready */
2729 /* we can therefore avoid the ready check */
2730 /* in this case. */
39236c6e
A
2731 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2732 offset_in_mapping = offset - vm_object_trunc_page(offset);
2733 offset = vm_object_trunc_page(offset);
2734 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2735 }
2736
2d21ac55
A
2737 object = named_entry->backing.object;
2738 assert(object != VM_OBJECT_NULL);
2739 named_entry_unlock(named_entry);
2740 vm_object_reference(object);
2741 }
2742 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2743 /*
2744 * JMM - This is temporary until we unify named entries
2745 * and raw memory objects.
2746 *
2747 * Detected fake ip_kotype for a memory object. In
2748 * this case, the port isn't really a port at all, but
2749 * instead is just a raw memory object.
2750 */
39236c6e
A
2751 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2752 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2753 }
2754
2d21ac55
A
2755 object = vm_object_enter((memory_object_t)port,
2756 size, FALSE, FALSE, FALSE);
2757 if (object == VM_OBJECT_NULL)
2758 return KERN_INVALID_OBJECT;
2759
2760 /* wait for object (if any) to be ready */
2761 if (object != VM_OBJECT_NULL) {
2762 if (object == kernel_object) {
2763 printf("Warning: Attempt to map kernel object"
2764 " by a non-private kernel entity\n");
2765 return KERN_INVALID_OBJECT;
2766 }
b0d623f7 2767 if (!object->pager_ready) {
2d21ac55 2768 vm_object_lock(object);
b0d623f7
A
2769
2770 while (!object->pager_ready) {
2771 vm_object_wait(object,
2772 VM_OBJECT_EVENT_PAGER_READY,
2773 THREAD_UNINT);
2774 vm_object_lock(object);
2775 }
2776 vm_object_unlock(object);
2d21ac55 2777 }
2d21ac55
A
2778 }
2779 } else {
2780 return KERN_INVALID_OBJECT;
2781 }
2782
593a1d5f
A
2783 if (object != VM_OBJECT_NULL &&
2784 object->named &&
2785 object->pager != MEMORY_OBJECT_NULL &&
2786 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2787 memory_object_t pager;
2788 vm_prot_t pager_prot;
2789 kern_return_t kr;
2790
2791 /*
2792 * For "named" VM objects, let the pager know that the
2793 * memory object is being mapped. Some pagers need to keep
2794 * track of this, to know when they can reclaim the memory
2795 * object, for example.
2796 * VM calls memory_object_map() for each mapping (specifying
2797 * the protection of each mapping) and calls
2798 * memory_object_last_unmap() when all the mappings are gone.
2799 */
2800 pager_prot = max_protection;
2801 if (copy) {
2802 /*
2803 * Copy-On-Write mapping: won't modify the
2804 * memory object.
2805 */
2806 pager_prot &= ~VM_PROT_WRITE;
2807 }
2808 vm_object_lock(object);
2809 pager = object->pager;
2810 if (object->named &&
2811 pager != MEMORY_OBJECT_NULL &&
2812 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2813 assert(object->pager_ready);
2814 vm_object_mapping_wait(object, THREAD_UNINT);
2815 vm_object_mapping_begin(object);
2816 vm_object_unlock(object);
2817
2818 kr = memory_object_map(pager, pager_prot);
2819 assert(kr == KERN_SUCCESS);
2820
2821 vm_object_lock(object);
2822 vm_object_mapping_end(object);
2823 }
2824 vm_object_unlock(object);
2825 }
2826
2d21ac55
A
2827 /*
2828 * Perform the copy if requested
2829 */
2830
2831 if (copy) {
2832 vm_object_t new_object;
2833 vm_object_offset_t new_offset;
2834
2835 result = vm_object_copy_strategically(object, offset, size,
2836 &new_object, &new_offset,
2837 &copy);
2838
2839
2840 if (result == KERN_MEMORY_RESTART_COPY) {
2841 boolean_t success;
2842 boolean_t src_needs_copy;
2843
2844 /*
2845 * XXX
2846 * We currently ignore src_needs_copy.
2847 * This really is the issue of how to make
2848 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2849 * non-kernel users to use. Solution forthcoming.
2850 * In the meantime, since we don't allow non-kernel
2851 * memory managers to specify symmetric copy,
2852 * we won't run into problems here.
2853 */
2854 new_object = object;
2855 new_offset = offset;
2856 success = vm_object_copy_quickly(&new_object,
2857 new_offset, size,
2858 &src_needs_copy,
2859 &copy);
2860 assert(success);
2861 result = KERN_SUCCESS;
2862 }
2863 /*
2864 * Throw away the reference to the
2865 * original object, as it won't be mapped.
2866 */
2867
2868 vm_object_deallocate(object);
2869
2870 if (result != KERN_SUCCESS)
2871 return result;
2872
2873 object = new_object;
2874 offset = new_offset;
2875 }
2876
2877 result = vm_map_enter(target_map,
2878 &map_addr, map_size,
2879 (vm_map_offset_t)mask,
2880 flags,
2881 object, offset,
2882 copy,
2883 cur_protection, max_protection, inheritance);
2884 if (result != KERN_SUCCESS)
2885 vm_object_deallocate(object);
39236c6e
A
2886
2887 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2888 *address = map_addr + offset_in_mapping;
2889 } else {
2890 *address = map_addr;
2891 }
2d21ac55
A
2892 return result;
2893}
2894
b0d623f7
A
2895
2896
2897
2898kern_return_t
2899vm_map_enter_mem_object_control(
2900 vm_map_t target_map,
2901 vm_map_offset_t *address,
2902 vm_map_size_t initial_size,
2903 vm_map_offset_t mask,
2904 int flags,
2905 memory_object_control_t control,
2906 vm_object_offset_t offset,
2907 boolean_t copy,
2908 vm_prot_t cur_protection,
2909 vm_prot_t max_protection,
2910 vm_inherit_t inheritance)
2911{
2912 vm_map_address_t map_addr;
2913 vm_map_size_t map_size;
2914 vm_object_t object;
2915 vm_object_size_t size;
2916 kern_return_t result;
2917 memory_object_t pager;
2918 vm_prot_t pager_prot;
2919 kern_return_t kr;
2920
2921 /*
2922 * Check arguments for validity
2923 */
2924 if ((target_map == VM_MAP_NULL) ||
2925 (cur_protection & ~VM_PROT_ALL) ||
2926 (max_protection & ~VM_PROT_ALL) ||
2927 (inheritance > VM_INHERIT_LAST_VALID) ||
2928 initial_size == 0)
2929 return KERN_INVALID_ARGUMENT;
2930
39236c6e
A
2931 map_addr = vm_map_trunc_page(*address,
2932 VM_MAP_PAGE_MASK(target_map));
2933 map_size = vm_map_round_page(initial_size,
2934 VM_MAP_PAGE_MASK(target_map));
b0d623f7
A
2935 size = vm_object_round_page(initial_size);
2936
2937 object = memory_object_control_to_vm_object(control);
2938
2939 if (object == VM_OBJECT_NULL)
2940 return KERN_INVALID_OBJECT;
2941
2942 if (object == kernel_object) {
2943 printf("Warning: Attempt to map kernel object"
2944 " by a non-private kernel entity\n");
2945 return KERN_INVALID_OBJECT;
2946 }
2947
2948 vm_object_lock(object);
2949 object->ref_count++;
2950 vm_object_res_reference(object);
2951
2952 /*
2953 * For "named" VM objects, let the pager know that the
2954 * memory object is being mapped. Some pagers need to keep
2955 * track of this, to know when they can reclaim the memory
2956 * object, for example.
2957 * VM calls memory_object_map() for each mapping (specifying
2958 * the protection of each mapping) and calls
2959 * memory_object_last_unmap() when all the mappings are gone.
2960 */
2961 pager_prot = max_protection;
2962 if (copy) {
2963 pager_prot &= ~VM_PROT_WRITE;
2964 }
2965 pager = object->pager;
2966 if (object->named &&
2967 pager != MEMORY_OBJECT_NULL &&
2968 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2969 assert(object->pager_ready);
2970 vm_object_mapping_wait(object, THREAD_UNINT);
2971 vm_object_mapping_begin(object);
2972 vm_object_unlock(object);
2973
2974 kr = memory_object_map(pager, pager_prot);
2975 assert(kr == KERN_SUCCESS);
2976
2977 vm_object_lock(object);
2978 vm_object_mapping_end(object);
2979 }
2980 vm_object_unlock(object);
2981
2982 /*
2983 * Perform the copy if requested
2984 */
2985
2986 if (copy) {
2987 vm_object_t new_object;
2988 vm_object_offset_t new_offset;
2989
2990 result = vm_object_copy_strategically(object, offset, size,
2991 &new_object, &new_offset,
2992 &copy);
2993
2994
2995 if (result == KERN_MEMORY_RESTART_COPY) {
2996 boolean_t success;
2997 boolean_t src_needs_copy;
2998
2999 /*
3000 * XXX
3001 * We currently ignore src_needs_copy.
3002 * This really is the issue of how to make
3003 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3004 * non-kernel users to use. Solution forthcoming.
3005 * In the meantime, since we don't allow non-kernel
3006 * memory managers to specify symmetric copy,
3007 * we won't run into problems here.
3008 */
3009 new_object = object;
3010 new_offset = offset;
3011 success = vm_object_copy_quickly(&new_object,
3012 new_offset, size,
3013 &src_needs_copy,
3014 &copy);
3015 assert(success);
3016 result = KERN_SUCCESS;
3017 }
3018 /*
3019 * Throw away the reference to the
3020 * original object, as it won't be mapped.
3021 */
3022
3023 vm_object_deallocate(object);
3024
3025 if (result != KERN_SUCCESS)
3026 return result;
3027
3028 object = new_object;
3029 offset = new_offset;
3030 }
3031
3032 result = vm_map_enter(target_map,
3033 &map_addr, map_size,
3034 (vm_map_offset_t)mask,
3035 flags,
3036 object, offset,
3037 copy,
3038 cur_protection, max_protection, inheritance);
3039 if (result != KERN_SUCCESS)
3040 vm_object_deallocate(object);
3041 *address = map_addr;
3042
3043 return result;
3044}
3045
3046
2d21ac55
A
3047#if VM_CPM
3048
3049#ifdef MACH_ASSERT
3050extern pmap_paddr_t avail_start, avail_end;
3051#endif
3052
3053/*
3054 * Allocate memory in the specified map, with the caveat that
3055 * the memory is physically contiguous. This call may fail
3056 * if the system can't find sufficient contiguous memory.
3057 * This call may cause or lead to heart-stopping amounts of
3058 * paging activity.
3059 *
3060 * Memory obtained from this call should be freed in the
3061 * normal way, viz., via vm_deallocate.
3062 */
3063kern_return_t
3064vm_map_enter_cpm(
3065 vm_map_t map,
3066 vm_map_offset_t *addr,
3067 vm_map_size_t size,
3068 int flags)
3069{
3070 vm_object_t cpm_obj;
3071 pmap_t pmap;
3072 vm_page_t m, pages;
3073 kern_return_t kr;
3074 vm_map_offset_t va, start, end, offset;
3075#if MACH_ASSERT
316670eb 3076 vm_map_offset_t prev_addr = 0;
2d21ac55
A
3077#endif /* MACH_ASSERT */
3078
3079 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3080
2d21ac55
A
3081 if (size == 0) {
3082 *addr = 0;
3083 return KERN_SUCCESS;
3084 }
3085 if (anywhere)
3086 *addr = vm_map_min(map);
3087 else
39236c6e
A
3088 *addr = vm_map_trunc_page(*addr,
3089 VM_MAP_PAGE_MASK(map));
3090 size = vm_map_round_page(size,
3091 VM_MAP_PAGE_MASK(map));
2d21ac55
A
3092
3093 /*
3094 * LP64todo - cpm_allocate should probably allow
3095 * allocations of >4GB, but not with the current
3096 * algorithm, so just cast down the size for now.
3097 */
3098 if (size > VM_MAX_ADDRESS)
3099 return KERN_RESOURCE_SHORTAGE;
3100 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
b0d623f7 3101 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2d21ac55
A
3102 return kr;
3103
3104 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3105 assert(cpm_obj != VM_OBJECT_NULL);
3106 assert(cpm_obj->internal);
316670eb 3107 assert(cpm_obj->vo_size == (vm_object_size_t)size);
2d21ac55
A
3108 assert(cpm_obj->can_persist == FALSE);
3109 assert(cpm_obj->pager_created == FALSE);
3110 assert(cpm_obj->pageout == FALSE);
3111 assert(cpm_obj->shadow == VM_OBJECT_NULL);
91447636
A
3112
3113 /*
3114 * Insert pages into object.
3115 */
3116
3117 vm_object_lock(cpm_obj);
3118 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3119 m = pages;
3120 pages = NEXT_PAGE(m);
0c530ab8 3121 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
91447636
A
3122
3123 assert(!m->gobbled);
3124 assert(!m->wanted);
3125 assert(!m->pageout);
3126 assert(!m->tabled);
b0d623f7 3127 assert(VM_PAGE_WIRED(m));
91447636
A
3128 /*
3129 * ENCRYPTED SWAP:
3130 * "m" is not supposed to be pageable, so it
3131 * should not be encrypted. It wouldn't be safe
3132 * to enter it in a new VM object while encrypted.
3133 */
3134 ASSERT_PAGE_DECRYPTED(m);
3135 assert(m->busy);
0c530ab8 3136 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
91447636
A
3137
3138 m->busy = FALSE;
3139 vm_page_insert(m, cpm_obj, offset);
3140 }
3141 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3142 vm_object_unlock(cpm_obj);
3143
3144 /*
3145 * Hang onto a reference on the object in case a
3146 * multi-threaded application for some reason decides
3147 * to deallocate the portion of the address space into
3148 * which we will insert this object.
3149 *
3150 * Unfortunately, we must insert the object now before
3151 * we can talk to the pmap module about which addresses
3152 * must be wired down. Hence, the race with a multi-
3153 * threaded app.
3154 */
3155 vm_object_reference(cpm_obj);
3156
3157 /*
3158 * Insert object into map.
3159 */
3160
3161 kr = vm_map_enter(
2d21ac55
A
3162 map,
3163 addr,
3164 size,
3165 (vm_map_offset_t)0,
3166 flags,
3167 cpm_obj,
3168 (vm_object_offset_t)0,
3169 FALSE,
3170 VM_PROT_ALL,
3171 VM_PROT_ALL,
3172 VM_INHERIT_DEFAULT);
91447636
A
3173
3174 if (kr != KERN_SUCCESS) {
3175 /*
3176 * A CPM object doesn't have can_persist set,
3177 * so all we have to do is deallocate it to
3178 * free up these pages.
3179 */
3180 assert(cpm_obj->pager_created == FALSE);
3181 assert(cpm_obj->can_persist == FALSE);
3182 assert(cpm_obj->pageout == FALSE);
3183 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3184 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3185 vm_object_deallocate(cpm_obj); /* kill creation ref */
3186 }
3187
3188 /*
3189 * Inform the physical mapping system that the
3190 * range of addresses may not fault, so that
3191 * page tables and such can be locked down as well.
3192 */
3193 start = *addr;
3194 end = start + size;
3195 pmap = vm_map_pmap(map);
3196 pmap_pageable(pmap, start, end, FALSE);
3197
3198 /*
3199 * Enter each page into the pmap, to avoid faults.
3200 * Note that this loop could be coded more efficiently,
3201 * if the need arose, rather than looking up each page
3202 * again.
3203 */
3204 for (offset = 0, va = start; offset < size;
3205 va += PAGE_SIZE, offset += PAGE_SIZE) {
2d21ac55
A
3206 int type_of_fault;
3207
91447636
A
3208 vm_object_lock(cpm_obj);
3209 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
91447636 3210 assert(m != VM_PAGE_NULL);
2d21ac55
A
3211
3212 vm_page_zero_fill(m);
3213
3214 type_of_fault = DBG_ZERO_FILL_FAULT;
3215
6d2010ae 3216 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
316670eb 3217 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2d21ac55
A
3218 &type_of_fault);
3219
3220 vm_object_unlock(cpm_obj);
91447636
A
3221 }
3222
3223#if MACH_ASSERT
3224 /*
3225 * Verify ordering in address space.
3226 */
3227 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3228 vm_object_lock(cpm_obj);
3229 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3230 vm_object_unlock(cpm_obj);
3231 if (m == VM_PAGE_NULL)
316670eb
A
3232 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3233 cpm_obj, (uint64_t)offset);
91447636
A
3234 assert(m->tabled);
3235 assert(!m->busy);
3236 assert(!m->wanted);
3237 assert(!m->fictitious);
3238 assert(!m->private);
3239 assert(!m->absent);
3240 assert(!m->error);
3241 assert(!m->cleaning);
316670eb 3242 assert(!m->laundry);
91447636
A
3243 assert(!m->precious);
3244 assert(!m->clustered);
3245 if (offset != 0) {
3246 if (m->phys_page != prev_addr + 1) {
316670eb
A
3247 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3248 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3249 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3250 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
91447636
A
3251 panic("vm_allocate_cpm: pages not contig!");
3252 }
3253 }
3254 prev_addr = m->phys_page;
3255 }
3256#endif /* MACH_ASSERT */
3257
3258 vm_object_deallocate(cpm_obj); /* kill extra ref */
3259
3260 return kr;
3261}
3262
3263
3264#else /* VM_CPM */
3265
3266/*
3267 * Interface is defined in all cases, but unless the kernel
3268 * is built explicitly for this option, the interface does
3269 * nothing.
3270 */
3271
3272kern_return_t
3273vm_map_enter_cpm(
3274 __unused vm_map_t map,
3275 __unused vm_map_offset_t *addr,
3276 __unused vm_map_size_t size,
3277 __unused int flags)
3278{
3279 return KERN_FAILURE;
3280}
3281#endif /* VM_CPM */
3282
b0d623f7
A
3283/* Not used without nested pmaps */
3284#ifndef NO_NESTED_PMAP
2d21ac55
A
3285/*
3286 * Clip and unnest a portion of a nested submap mapping.
3287 */
b0d623f7
A
3288
3289
2d21ac55
A
3290static void
3291vm_map_clip_unnest(
3292 vm_map_t map,
3293 vm_map_entry_t entry,
3294 vm_map_offset_t start_unnest,
3295 vm_map_offset_t end_unnest)
3296{
b0d623f7
A
3297 vm_map_offset_t old_start_unnest = start_unnest;
3298 vm_map_offset_t old_end_unnest = end_unnest;
3299
2d21ac55
A
3300 assert(entry->is_sub_map);
3301 assert(entry->object.sub_map != NULL);
3302
b0d623f7
A
3303 /*
3304 * Query the platform for the optimal unnest range.
3305 * DRK: There's some duplication of effort here, since
3306 * callers may have adjusted the range to some extent. This
3307 * routine was introduced to support 1GiB subtree nesting
3308 * for x86 platforms, which can also nest on 2MiB boundaries
3309 * depending on size/alignment.
3310 */
3311 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3312 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3313 }
3314
2d21ac55
A
3315 if (entry->vme_start > start_unnest ||
3316 entry->vme_end < end_unnest) {
3317 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3318 "bad nested entry: start=0x%llx end=0x%llx\n",
3319 (long long)start_unnest, (long long)end_unnest,
3320 (long long)entry->vme_start, (long long)entry->vme_end);
3321 }
b0d623f7 3322
2d21ac55
A
3323 if (start_unnest > entry->vme_start) {
3324 _vm_map_clip_start(&map->hdr,
3325 entry,
3326 start_unnest);
6d2010ae 3327 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3328 }
3329 if (entry->vme_end > end_unnest) {
3330 _vm_map_clip_end(&map->hdr,
3331 entry,
3332 end_unnest);
6d2010ae 3333 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3334 }
3335
3336 pmap_unnest(map->pmap,
3337 entry->vme_start,
3338 entry->vme_end - entry->vme_start);
316670eb 3339 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
2d21ac55
A
3340 /* clean up parent map/maps */
3341 vm_map_submap_pmap_clean(
3342 map, entry->vme_start,
3343 entry->vme_end,
3344 entry->object.sub_map,
3345 entry->offset);
3346 }
3347 entry->use_pmap = FALSE;
316670eb
A
3348 if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3349 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3350 }
2d21ac55 3351}
b0d623f7 3352#endif /* NO_NESTED_PMAP */
2d21ac55 3353
1c79356b
A
3354/*
3355 * vm_map_clip_start: [ internal use only ]
3356 *
3357 * Asserts that the given entry begins at or after
3358 * the specified address; if necessary,
3359 * it splits the entry into two.
3360 */
e2d2fc5c 3361void
2d21ac55
A
3362vm_map_clip_start(
3363 vm_map_t map,
3364 vm_map_entry_t entry,
3365 vm_map_offset_t startaddr)
3366{
0c530ab8 3367#ifndef NO_NESTED_PMAP
2d21ac55
A
3368 if (entry->use_pmap &&
3369 startaddr >= entry->vme_start) {
3370 vm_map_offset_t start_unnest, end_unnest;
3371
3372 /*
3373 * Make sure "startaddr" is no longer in a nested range
3374 * before we clip. Unnest only the minimum range the platform
3375 * can handle.
b0d623f7
A
3376 * vm_map_clip_unnest may perform additional adjustments to
3377 * the unnest range.
2d21ac55
A
3378 */
3379 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3380 end_unnest = start_unnest + pmap_nesting_size_min;
3381 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3382 }
3383#endif /* NO_NESTED_PMAP */
3384 if (startaddr > entry->vme_start) {
3385 if (entry->object.vm_object &&
3386 !entry->is_sub_map &&
3387 entry->object.vm_object->phys_contiguous) {
3388 pmap_remove(map->pmap,
3389 (addr64_t)(entry->vme_start),
3390 (addr64_t)(entry->vme_end));
3391 }
3392 _vm_map_clip_start(&map->hdr, entry, startaddr);
6d2010ae 3393 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3394 }
3395}
3396
1c79356b
A
3397
3398#define vm_map_copy_clip_start(copy, entry, startaddr) \
3399 MACRO_BEGIN \
3400 if ((startaddr) > (entry)->vme_start) \
3401 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3402 MACRO_END
3403
3404/*
3405 * This routine is called only when it is known that
3406 * the entry must be split.
3407 */
91447636 3408static void
1c79356b
A
3409_vm_map_clip_start(
3410 register struct vm_map_header *map_header,
3411 register vm_map_entry_t entry,
91447636 3412 register vm_map_offset_t start)
1c79356b
A
3413{
3414 register vm_map_entry_t new_entry;
3415
3416 /*
3417 * Split off the front portion --
3418 * note that we must insert the new
3419 * entry BEFORE this one, so that
3420 * this entry has the specified starting
3421 * address.
3422 */
3423
7ddcb079 3424 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
3425 vm_map_entry_copy_full(new_entry, entry);
3426
39236c6e
A
3427 assert(VM_MAP_PAGE_ALIGNED(start,
3428 VM_MAP_HDR_PAGE_MASK(map_header)));
1c79356b 3429 new_entry->vme_end = start;
e2d2fc5c 3430 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 3431 entry->offset += (start - entry->vme_start);
e2d2fc5c 3432 assert(start < entry->vme_end);
39236c6e
A
3433 assert(VM_MAP_PAGE_ALIGNED(start,
3434 VM_MAP_HDR_PAGE_MASK(map_header)));
1c79356b
A
3435 entry->vme_start = start;
3436
6d2010ae 3437 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
1c79356b
A
3438
3439 if (entry->is_sub_map)
2d21ac55 3440 vm_map_reference(new_entry->object.sub_map);
1c79356b
A
3441 else
3442 vm_object_reference(new_entry->object.vm_object);
3443}
3444
3445
3446/*
3447 * vm_map_clip_end: [ internal use only ]
3448 *
3449 * Asserts that the given entry ends at or before
3450 * the specified address; if necessary,
3451 * it splits the entry into two.
3452 */
e2d2fc5c 3453void
2d21ac55
A
3454vm_map_clip_end(
3455 vm_map_t map,
3456 vm_map_entry_t entry,
3457 vm_map_offset_t endaddr)
3458{
3459 if (endaddr > entry->vme_end) {
3460 /*
3461 * Within the scope of this clipping, limit "endaddr" to
3462 * the end of this map entry...
3463 */
3464 endaddr = entry->vme_end;
3465 }
3466#ifndef NO_NESTED_PMAP
3467 if (entry->use_pmap) {
3468 vm_map_offset_t start_unnest, end_unnest;
3469
3470 /*
3471 * Make sure the range between the start of this entry and
3472 * the new "endaddr" is no longer nested before we clip.
3473 * Unnest only the minimum range the platform can handle.
b0d623f7
A
3474 * vm_map_clip_unnest may perform additional adjustments to
3475 * the unnest range.
2d21ac55
A
3476 */
3477 start_unnest = entry->vme_start;
3478 end_unnest =
3479 (endaddr + pmap_nesting_size_min - 1) &
3480 ~(pmap_nesting_size_min - 1);
3481 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3482 }
3483#endif /* NO_NESTED_PMAP */
3484 if (endaddr < entry->vme_end) {
3485 if (entry->object.vm_object &&
3486 !entry->is_sub_map &&
3487 entry->object.vm_object->phys_contiguous) {
3488 pmap_remove(map->pmap,
3489 (addr64_t)(entry->vme_start),
3490 (addr64_t)(entry->vme_end));
3491 }
3492 _vm_map_clip_end(&map->hdr, entry, endaddr);
6d2010ae 3493 vm_map_store_update_first_free(map, map->first_free);
2d21ac55
A
3494 }
3495}
0c530ab8 3496
1c79356b
A
3497
3498#define vm_map_copy_clip_end(copy, entry, endaddr) \
3499 MACRO_BEGIN \
3500 if ((endaddr) < (entry)->vme_end) \
3501 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3502 MACRO_END
3503
3504/*
3505 * This routine is called only when it is known that
3506 * the entry must be split.
3507 */
91447636 3508static void
1c79356b
A
3509_vm_map_clip_end(
3510 register struct vm_map_header *map_header,
3511 register vm_map_entry_t entry,
2d21ac55 3512 register vm_map_offset_t end)
1c79356b
A
3513{
3514 register vm_map_entry_t new_entry;
3515
3516 /*
3517 * Create a new entry and insert it
3518 * AFTER the specified entry
3519 */
3520
7ddcb079 3521 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
1c79356b
A
3522 vm_map_entry_copy_full(new_entry, entry);
3523
e2d2fc5c 3524 assert(entry->vme_start < end);
39236c6e
A
3525 assert(VM_MAP_PAGE_ALIGNED(end,
3526 VM_MAP_HDR_PAGE_MASK(map_header)));
1c79356b
A
3527 new_entry->vme_start = entry->vme_end = end;
3528 new_entry->offset += (end - entry->vme_start);
e2d2fc5c 3529 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 3530
6d2010ae 3531 _vm_map_store_entry_link(map_header, entry, new_entry);
1c79356b
A
3532
3533 if (entry->is_sub_map)
2d21ac55 3534 vm_map_reference(new_entry->object.sub_map);
1c79356b
A
3535 else
3536 vm_object_reference(new_entry->object.vm_object);
3537}
3538
3539
3540/*
3541 * VM_MAP_RANGE_CHECK: [ internal use only ]
3542 *
3543 * Asserts that the starting and ending region
3544 * addresses fall within the valid range of the map.
3545 */
2d21ac55
A
3546#define VM_MAP_RANGE_CHECK(map, start, end) \
3547 MACRO_BEGIN \
3548 if (start < vm_map_min(map)) \
3549 start = vm_map_min(map); \
3550 if (end > vm_map_max(map)) \
3551 end = vm_map_max(map); \
3552 if (start > end) \
3553 start = end; \
3554 MACRO_END
1c79356b
A
3555
3556/*
3557 * vm_map_range_check: [ internal use only ]
3558 *
3559 * Check that the region defined by the specified start and
3560 * end addresses are wholly contained within a single map
3561 * entry or set of adjacent map entries of the spacified map,
3562 * i.e. the specified region contains no unmapped space.
3563 * If any or all of the region is unmapped, FALSE is returned.
3564 * Otherwise, TRUE is returned and if the output argument 'entry'
3565 * is not NULL it points to the map entry containing the start
3566 * of the region.
3567 *
3568 * The map is locked for reading on entry and is left locked.
3569 */
91447636 3570static boolean_t
1c79356b
A
3571vm_map_range_check(
3572 register vm_map_t map,
91447636
A
3573 register vm_map_offset_t start,
3574 register vm_map_offset_t end,
1c79356b
A
3575 vm_map_entry_t *entry)
3576{
3577 vm_map_entry_t cur;
91447636 3578 register vm_map_offset_t prev;
1c79356b
A
3579
3580 /*
3581 * Basic sanity checks first
3582 */
3583 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3584 return (FALSE);
3585
3586 /*
3587 * Check first if the region starts within a valid
3588 * mapping for the map.
3589 */
3590 if (!vm_map_lookup_entry(map, start, &cur))
3591 return (FALSE);
3592
3593 /*
3594 * Optimize for the case that the region is contained
3595 * in a single map entry.
3596 */
3597 if (entry != (vm_map_entry_t *) NULL)
3598 *entry = cur;
3599 if (end <= cur->vme_end)
3600 return (TRUE);
3601
3602 /*
3603 * If the region is not wholly contained within a
3604 * single entry, walk the entries looking for holes.
3605 */
3606 prev = cur->vme_end;
3607 cur = cur->vme_next;
3608 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3609 if (end <= cur->vme_end)
3610 return (TRUE);
3611 prev = cur->vme_end;
3612 cur = cur->vme_next;
3613 }
3614 return (FALSE);
3615}
3616
3617/*
3618 * vm_map_submap: [ kernel use only ]
3619 *
3620 * Mark the given range as handled by a subordinate map.
3621 *
3622 * This range must have been created with vm_map_find using
3623 * the vm_submap_object, and no other operations may have been
3624 * performed on this range prior to calling vm_map_submap.
3625 *
3626 * Only a limited number of operations can be performed
3627 * within this rage after calling vm_map_submap:
3628 * vm_fault
3629 * [Don't try vm_map_copyin!]
3630 *
3631 * To remove a submapping, one must first remove the
3632 * range from the superior map, and then destroy the
3633 * submap (if desired). [Better yet, don't try it.]
3634 */
3635kern_return_t
3636vm_map_submap(
91447636
A
3637 vm_map_t map,
3638 vm_map_offset_t start,
3639 vm_map_offset_t end,
1c79356b 3640 vm_map_t submap,
91447636 3641 vm_map_offset_t offset,
0c530ab8 3642#ifdef NO_NESTED_PMAP
91447636 3643 __unused
0c530ab8 3644#endif /* NO_NESTED_PMAP */
1c79356b
A
3645 boolean_t use_pmap)
3646{
3647 vm_map_entry_t entry;
3648 register kern_return_t result = KERN_INVALID_ARGUMENT;
3649 register vm_object_t object;
3650
3651 vm_map_lock(map);
3652
2d21ac55 3653 if (! vm_map_lookup_entry(map, start, &entry)) {
1c79356b 3654 entry = entry->vme_next;
2d21ac55 3655 }
1c79356b 3656
2d21ac55
A
3657 if (entry == vm_map_to_entry(map) ||
3658 entry->is_sub_map) {
1c79356b
A
3659 vm_map_unlock(map);
3660 return KERN_INVALID_ARGUMENT;
3661 }
3662
2d21ac55
A
3663 assert(!entry->use_pmap); /* we don't want to unnest anything here */
3664 vm_map_clip_start(map, entry, start);
1c79356b
A
3665 vm_map_clip_end(map, entry, end);
3666
3667 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3668 (!entry->is_sub_map) &&
3669 ((object = entry->object.vm_object) == vm_submap_object) &&
3670 (object->resident_page_count == 0) &&
3671 (object->copy == VM_OBJECT_NULL) &&
3672 (object->shadow == VM_OBJECT_NULL) &&
3673 (!object->pager_created)) {
2d21ac55
A
3674 entry->offset = (vm_object_offset_t)offset;
3675 entry->object.vm_object = VM_OBJECT_NULL;
3676 vm_object_deallocate(object);
3677 entry->is_sub_map = TRUE;
3678 entry->object.sub_map = submap;
3679 vm_map_reference(submap);
316670eb
A
3680 if (submap->mapped_in_other_pmaps == FALSE &&
3681 vm_map_pmap(submap) != PMAP_NULL &&
3682 vm_map_pmap(submap) != vm_map_pmap(map)) {
3683 /*
3684 * This submap is being mapped in a map
3685 * that uses a different pmap.
3686 * Set its "mapped_in_other_pmaps" flag
3687 * to indicate that we now need to
3688 * remove mappings from all pmaps rather
3689 * than just the submap's pmap.
3690 */
3691 submap->mapped_in_other_pmaps = TRUE;
3692 }
2d21ac55 3693
0c530ab8 3694#ifndef NO_NESTED_PMAP
2d21ac55
A
3695 if (use_pmap) {
3696 /* nest if platform code will allow */
3697 if(submap->pmap == NULL) {
316670eb
A
3698 ledger_t ledger = map->pmap->ledger;
3699 submap->pmap = pmap_create(ledger,
3700 (vm_map_size_t) 0, FALSE);
2d21ac55
A
3701 if(submap->pmap == PMAP_NULL) {
3702 vm_map_unlock(map);
3703 return(KERN_NO_SPACE);
55e303ae 3704 }
55e303ae 3705 }
2d21ac55
A
3706 result = pmap_nest(map->pmap,
3707 (entry->object.sub_map)->pmap,
3708 (addr64_t)start,
3709 (addr64_t)start,
3710 (uint64_t)(end - start));
3711 if(result)
3712 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3713 entry->use_pmap = TRUE;
3714 }
0c530ab8 3715#else /* NO_NESTED_PMAP */
2d21ac55 3716 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
0c530ab8 3717#endif /* NO_NESTED_PMAP */
2d21ac55 3718 result = KERN_SUCCESS;
1c79356b
A
3719 }
3720 vm_map_unlock(map);
3721
3722 return(result);
3723}
3724
3725/*
3726 * vm_map_protect:
3727 *
3728 * Sets the protection of the specified address
3729 * region in the target map. If "set_max" is
3730 * specified, the maximum protection is to be set;
3731 * otherwise, only the current protection is affected.
3732 */
3733kern_return_t
3734vm_map_protect(
3735 register vm_map_t map,
91447636
A
3736 register vm_map_offset_t start,
3737 register vm_map_offset_t end,
1c79356b
A
3738 register vm_prot_t new_prot,
3739 register boolean_t set_max)
3740{
3741 register vm_map_entry_t current;
2d21ac55 3742 register vm_map_offset_t prev;
1c79356b
A
3743 vm_map_entry_t entry;
3744 vm_prot_t new_max;
1c79356b
A
3745
3746 XPR(XPR_VM_MAP,
2d21ac55 3747 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
b0d623f7 3748 map, start, end, new_prot, set_max);
1c79356b
A
3749
3750 vm_map_lock(map);
3751
91447636
A
3752 /* LP64todo - remove this check when vm_map_commpage64()
3753 * no longer has to stuff in a map_entry for the commpage
3754 * above the map's max_offset.
3755 */
3756 if (start >= map->max_offset) {
3757 vm_map_unlock(map);
3758 return(KERN_INVALID_ADDRESS);
3759 }
3760
b0d623f7
A
3761 while(1) {
3762 /*
3763 * Lookup the entry. If it doesn't start in a valid
3764 * entry, return an error.
3765 */
3766 if (! vm_map_lookup_entry(map, start, &entry)) {
3767 vm_map_unlock(map);
3768 return(KERN_INVALID_ADDRESS);
3769 }
3770
3771 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3772 start = SUPERPAGE_ROUND_DOWN(start);
3773 continue;
3774 }
3775 break;
3776 }
3777 if (entry->superpage_size)
3778 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
3779
3780 /*
3781 * Make a first pass to check for protection and address
3782 * violations.
3783 */
3784
3785 current = entry;
3786 prev = current->vme_start;
3787 while ((current != vm_map_to_entry(map)) &&
3788 (current->vme_start < end)) {
3789
3790 /*
3791 * If there is a hole, return an error.
3792 */
3793 if (current->vme_start != prev) {
3794 vm_map_unlock(map);
3795 return(KERN_INVALID_ADDRESS);
3796 }
3797
3798 new_max = current->max_protection;
3799 if(new_prot & VM_PROT_COPY) {
3800 new_max |= VM_PROT_WRITE;
3801 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3802 vm_map_unlock(map);
3803 return(KERN_PROTECTION_FAILURE);
3804 }
3805 } else {
3806 if ((new_prot & new_max) != new_prot) {
3807 vm_map_unlock(map);
3808 return(KERN_PROTECTION_FAILURE);
3809 }
3810 }
3811
593a1d5f 3812
1c79356b
A
3813 prev = current->vme_end;
3814 current = current->vme_next;
3815 }
3816 if (end > prev) {
3817 vm_map_unlock(map);
3818 return(KERN_INVALID_ADDRESS);
3819 }
3820
3821 /*
3822 * Go back and fix up protections.
3823 * Clip to start here if the range starts within
3824 * the entry.
3825 */
3826
3827 current = entry;
2d21ac55
A
3828 if (current != vm_map_to_entry(map)) {
3829 /* clip and unnest if necessary */
3830 vm_map_clip_start(map, current, start);
1c79356b 3831 }
2d21ac55 3832
1c79356b
A
3833 while ((current != vm_map_to_entry(map)) &&
3834 (current->vme_start < end)) {
3835
3836 vm_prot_t old_prot;
3837
3838 vm_map_clip_end(map, current, end);
3839
2d21ac55
A
3840 assert(!current->use_pmap); /* clipping did unnest if needed */
3841
1c79356b
A
3842 old_prot = current->protection;
3843
3844 if(new_prot & VM_PROT_COPY) {
3845 /* caller is asking specifically to copy the */
3846 /* mapped data, this implies that max protection */
3847 /* will include write. Caller must be prepared */
3848 /* for loss of shared memory communication in the */
3849 /* target area after taking this step */
6d2010ae
A
3850
3851 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3852 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3853 current->offset = 0;
3854 }
1c79356b
A
3855 current->needs_copy = TRUE;
3856 current->max_protection |= VM_PROT_WRITE;
3857 }
3858
3859 if (set_max)
3860 current->protection =
3861 (current->max_protection =
2d21ac55
A
3862 new_prot & ~VM_PROT_COPY) &
3863 old_prot;
1c79356b
A
3864 else
3865 current->protection = new_prot & ~VM_PROT_COPY;
3866
3867 /*
3868 * Update physical map if necessary.
3869 * If the request is to turn off write protection,
3870 * we won't do it for real (in pmap). This is because
3871 * it would cause copy-on-write to fail. We've already
3872 * set, the new protection in the map, so if a
3873 * write-protect fault occurred, it will be fixed up
3874 * properly, COW or not.
3875 */
1c79356b 3876 if (current->protection != old_prot) {
1c79356b
A
3877 /* Look one level in we support nested pmaps */
3878 /* from mapped submaps which are direct entries */
3879 /* in our map */
0c530ab8 3880
2d21ac55 3881 vm_prot_t prot;
0c530ab8 3882
2d21ac55
A
3883 prot = current->protection & ~VM_PROT_WRITE;
3884
3885 if (override_nx(map, current->alias) && prot)
0c530ab8 3886 prot |= VM_PROT_EXECUTE;
2d21ac55 3887
0c530ab8 3888 if (current->is_sub_map && current->use_pmap) {
1c79356b 3889 pmap_protect(current->object.sub_map->pmap,
2d21ac55
A
3890 current->vme_start,
3891 current->vme_end,
3892 prot);
1c79356b 3893 } else {
2d21ac55
A
3894 pmap_protect(map->pmap,
3895 current->vme_start,
3896 current->vme_end,
3897 prot);
1c79356b 3898 }
1c79356b
A
3899 }
3900 current = current->vme_next;
3901 }
3902
5353443c 3903 current = entry;
91447636
A
3904 while ((current != vm_map_to_entry(map)) &&
3905 (current->vme_start <= end)) {
5353443c
A
3906 vm_map_simplify_entry(map, current);
3907 current = current->vme_next;
3908 }
3909
1c79356b
A
3910 vm_map_unlock(map);
3911 return(KERN_SUCCESS);
3912}
3913
3914/*
3915 * vm_map_inherit:
3916 *
3917 * Sets the inheritance of the specified address
3918 * range in the target map. Inheritance
3919 * affects how the map will be shared with
3920 * child maps at the time of vm_map_fork.
3921 */
3922kern_return_t
3923vm_map_inherit(
3924 register vm_map_t map,
91447636
A
3925 register vm_map_offset_t start,
3926 register vm_map_offset_t end,
1c79356b
A
3927 register vm_inherit_t new_inheritance)
3928{
3929 register vm_map_entry_t entry;
3930 vm_map_entry_t temp_entry;
3931
3932 vm_map_lock(map);
3933
3934 VM_MAP_RANGE_CHECK(map, start, end);
3935
3936 if (vm_map_lookup_entry(map, start, &temp_entry)) {
3937 entry = temp_entry;
1c79356b
A
3938 }
3939 else {
3940 temp_entry = temp_entry->vme_next;
3941 entry = temp_entry;
3942 }
3943
3944 /* first check entire range for submaps which can't support the */
3945 /* given inheritance. */
3946 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3947 if(entry->is_sub_map) {
91447636
A
3948 if(new_inheritance == VM_INHERIT_COPY) {
3949 vm_map_unlock(map);
1c79356b 3950 return(KERN_INVALID_ARGUMENT);
91447636 3951 }
1c79356b
A
3952 }
3953
3954 entry = entry->vme_next;
3955 }
3956
3957 entry = temp_entry;
2d21ac55
A
3958 if (entry != vm_map_to_entry(map)) {
3959 /* clip and unnest if necessary */
3960 vm_map_clip_start(map, entry, start);
3961 }
1c79356b
A
3962
3963 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3964 vm_map_clip_end(map, entry, end);
2d21ac55 3965 assert(!entry->use_pmap); /* clip did unnest if needed */
1c79356b
A
3966
3967 entry->inheritance = new_inheritance;
3968
3969 entry = entry->vme_next;
3970 }
3971
3972 vm_map_unlock(map);
3973 return(KERN_SUCCESS);
3974}
3975
2d21ac55
A
3976/*
3977 * Update the accounting for the amount of wired memory in this map. If the user has
3978 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
3979 */
3980
3981static kern_return_t
3982add_wire_counts(
3983 vm_map_t map,
3984 vm_map_entry_t entry,
3985 boolean_t user_wire)
3986{
3987 vm_map_size_t size;
3988
3989 if (user_wire) {
6d2010ae 3990 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
2d21ac55
A
3991
3992 /*
3993 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
3994 * this map entry.
3995 */
3996
3997 if (entry->user_wired_count == 0) {
3998 size = entry->vme_end - entry->vme_start;
3999
4000 /*
4001 * Since this is the first time the user is wiring this map entry, check to see if we're
4002 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4003 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4004 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4005 * limit, then we fail.
4006 */
4007
4008 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
6d2010ae
A
4009 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4010 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
2d21ac55
A
4011 return KERN_RESOURCE_SHORTAGE;
4012
4013 /*
4014 * The first time the user wires an entry, we also increment the wired_count and add this to
4015 * the total that has been wired in the map.
4016 */
4017
4018 if (entry->wired_count >= MAX_WIRE_COUNT)
4019 return KERN_FAILURE;
4020
4021 entry->wired_count++;
4022 map->user_wire_size += size;
4023 }
4024
4025 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4026 return KERN_FAILURE;
4027
4028 entry->user_wired_count++;
4029
4030 } else {
4031
4032 /*
4033 * The kernel's wiring the memory. Just bump the count and continue.
4034 */
4035
4036 if (entry->wired_count >= MAX_WIRE_COUNT)
4037 panic("vm_map_wire: too many wirings");
4038
4039 entry->wired_count++;
4040 }
4041
4042 return KERN_SUCCESS;
4043}
4044
4045/*
4046 * Update the memory wiring accounting now that the given map entry is being unwired.
4047 */
4048
4049static void
4050subtract_wire_counts(
4051 vm_map_t map,
4052 vm_map_entry_t entry,
4053 boolean_t user_wire)
4054{
4055
4056 if (user_wire) {
4057
4058 /*
4059 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4060 */
4061
4062 if (entry->user_wired_count == 1) {
4063
4064 /*
4065 * We're removing the last user wire reference. Decrement the wired_count and the total
4066 * user wired memory for this map.
4067 */
4068
4069 assert(entry->wired_count >= 1);
4070 entry->wired_count--;
4071 map->user_wire_size -= entry->vme_end - entry->vme_start;
4072 }
4073
4074 assert(entry->user_wired_count >= 1);
4075 entry->user_wired_count--;
4076
4077 } else {
4078
4079 /*
4080 * The kernel is unwiring the memory. Just update the count.
4081 */
4082
4083 assert(entry->wired_count >= 1);
4084 entry->wired_count--;
4085 }
4086}
4087
1c79356b
A
4088/*
4089 * vm_map_wire:
4090 *
4091 * Sets the pageability of the specified address range in the
4092 * target map as wired. Regions specified as not pageable require
4093 * locked-down physical memory and physical page maps. The
4094 * access_type variable indicates types of accesses that must not
4095 * generate page faults. This is checked against protection of
4096 * memory being locked-down.
4097 *
4098 * The map must not be locked, but a reference must remain to the
4099 * map throughout the call.
4100 */
91447636 4101static kern_return_t
1c79356b
A
4102vm_map_wire_nested(
4103 register vm_map_t map,
91447636
A
4104 register vm_map_offset_t start,
4105 register vm_map_offset_t end,
1c79356b
A
4106 register vm_prot_t access_type,
4107 boolean_t user_wire,
9bccf70c 4108 pmap_t map_pmap,
91447636 4109 vm_map_offset_t pmap_addr)
1c79356b
A
4110{
4111 register vm_map_entry_t entry;
4112 struct vm_map_entry *first_entry, tmp_entry;
91447636
A
4113 vm_map_t real_map;
4114 register vm_map_offset_t s,e;
1c79356b
A
4115 kern_return_t rc;
4116 boolean_t need_wakeup;
4117 boolean_t main_map = FALSE;
9bccf70c 4118 wait_interrupt_t interruptible_state;
0b4e3aa0 4119 thread_t cur_thread;
1c79356b 4120 unsigned int last_timestamp;
91447636 4121 vm_map_size_t size;
1c79356b
A
4122
4123 vm_map_lock(map);
4124 if(map_pmap == NULL)
4125 main_map = TRUE;
4126 last_timestamp = map->timestamp;
4127
4128 VM_MAP_RANGE_CHECK(map, start, end);
4129 assert(page_aligned(start));
4130 assert(page_aligned(end));
39236c6e
A
4131 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4132 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
0b4e3aa0
A
4133 if (start == end) {
4134 /* We wired what the caller asked for, zero pages */
4135 vm_map_unlock(map);
4136 return KERN_SUCCESS;
4137 }
1c79356b 4138
2d21ac55
A
4139 need_wakeup = FALSE;
4140 cur_thread = current_thread();
4141
4142 s = start;
4143 rc = KERN_SUCCESS;
4144
4145 if (vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b 4146 entry = first_entry;
2d21ac55
A
4147 /*
4148 * vm_map_clip_start will be done later.
4149 * We don't want to unnest any nested submaps here !
4150 */
1c79356b
A
4151 } else {
4152 /* Start address is not in map */
2d21ac55
A
4153 rc = KERN_INVALID_ADDRESS;
4154 goto done;
1c79356b
A
4155 }
4156
2d21ac55
A
4157 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4158 /*
4159 * At this point, we have wired from "start" to "s".
4160 * We still need to wire from "s" to "end".
4161 *
4162 * "entry" hasn't been clipped, so it could start before "s"
4163 * and/or end after "end".
4164 */
4165
4166 /* "e" is how far we want to wire in this entry */
4167 e = entry->vme_end;
4168 if (e > end)
4169 e = end;
4170
1c79356b
A
4171 /*
4172 * If another thread is wiring/unwiring this entry then
4173 * block after informing other thread to wake us up.
4174 */
4175 if (entry->in_transition) {
9bccf70c
A
4176 wait_result_t wait_result;
4177
1c79356b
A
4178 /*
4179 * We have not clipped the entry. Make sure that
4180 * the start address is in range so that the lookup
4181 * below will succeed.
2d21ac55
A
4182 * "s" is the current starting point: we've already
4183 * wired from "start" to "s" and we still have
4184 * to wire from "s" to "end".
1c79356b 4185 */
1c79356b
A
4186
4187 entry->needs_wakeup = TRUE;
4188
4189 /*
4190 * wake up anybody waiting on entries that we have
4191 * already wired.
4192 */
4193 if (need_wakeup) {
4194 vm_map_entry_wakeup(map);
4195 need_wakeup = FALSE;
4196 }
4197 /*
4198 * User wiring is interruptible
4199 */
9bccf70c 4200 wait_result = vm_map_entry_wait(map,
2d21ac55
A
4201 (user_wire) ? THREAD_ABORTSAFE :
4202 THREAD_UNINT);
9bccf70c 4203 if (user_wire && wait_result == THREAD_INTERRUPTED) {
1c79356b
A
4204 /*
4205 * undo the wirings we have done so far
4206 * We do not clear the needs_wakeup flag,
4207 * because we cannot tell if we were the
4208 * only one waiting.
4209 */
2d21ac55
A
4210 rc = KERN_FAILURE;
4211 goto done;
1c79356b
A
4212 }
4213
1c79356b
A
4214 /*
4215 * Cannot avoid a lookup here. reset timestamp.
4216 */
4217 last_timestamp = map->timestamp;
4218
4219 /*
4220 * The entry could have been clipped, look it up again.
4221 * Worse that can happen is, it may not exist anymore.
4222 */
4223 if (!vm_map_lookup_entry(map, s, &first_entry)) {
1c79356b
A
4224 /*
4225 * User: undo everything upto the previous
4226 * entry. let vm_map_unwire worry about
4227 * checking the validity of the range.
4228 */
2d21ac55
A
4229 rc = KERN_FAILURE;
4230 goto done;
1c79356b
A
4231 }
4232 entry = first_entry;
4233 continue;
4234 }
2d21ac55
A
4235
4236 if (entry->is_sub_map) {
91447636
A
4237 vm_map_offset_t sub_start;
4238 vm_map_offset_t sub_end;
4239 vm_map_offset_t local_start;
4240 vm_map_offset_t local_end;
1c79356b 4241 pmap_t pmap;
2d21ac55
A
4242
4243 vm_map_clip_start(map, entry, s);
1c79356b
A
4244 vm_map_clip_end(map, entry, end);
4245
9bccf70c 4246 sub_start = entry->offset;
2d21ac55
A
4247 sub_end = entry->vme_end;
4248 sub_end += entry->offset - entry->vme_start;
4249
1c79356b
A
4250 local_end = entry->vme_end;
4251 if(map_pmap == NULL) {
2d21ac55
A
4252 vm_object_t object;
4253 vm_object_offset_t offset;
4254 vm_prot_t prot;
4255 boolean_t wired;
4256 vm_map_entry_t local_entry;
4257 vm_map_version_t version;
4258 vm_map_t lookup_map;
4259
1c79356b
A
4260 if(entry->use_pmap) {
4261 pmap = entry->object.sub_map->pmap;
9bccf70c
A
4262 /* ppc implementation requires that */
4263 /* submaps pmap address ranges line */
4264 /* up with parent map */
4265#ifdef notdef
4266 pmap_addr = sub_start;
4267#endif
2d21ac55 4268 pmap_addr = s;
1c79356b
A
4269 } else {
4270 pmap = map->pmap;
2d21ac55 4271 pmap_addr = s;
1c79356b 4272 }
2d21ac55 4273
1c79356b 4274 if (entry->wired_count) {
2d21ac55
A
4275 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4276 goto done;
4277
4278 /*
4279 * The map was not unlocked:
4280 * no need to goto re-lookup.
4281 * Just go directly to next entry.
4282 */
1c79356b 4283 entry = entry->vme_next;
2d21ac55 4284 s = entry->vme_start;
1c79356b
A
4285 continue;
4286
2d21ac55 4287 }
9bccf70c 4288
2d21ac55
A
4289 /* call vm_map_lookup_locked to */
4290 /* cause any needs copy to be */
4291 /* evaluated */
4292 local_start = entry->vme_start;
4293 lookup_map = map;
4294 vm_map_lock_write_to_read(map);
4295 if(vm_map_lookup_locked(
4296 &lookup_map, local_start,
4297 access_type,
4298 OBJECT_LOCK_EXCLUSIVE,
4299 &version, &object,
4300 &offset, &prot, &wired,
4301 NULL,
4302 &real_map)) {
1c79356b 4303
2d21ac55
A
4304 vm_map_unlock_read(lookup_map);
4305 vm_map_unwire(map, start,
4306 s, user_wire);
4307 return(KERN_FAILURE);
4308 }
316670eb 4309 vm_object_unlock(object);
2d21ac55
A
4310 if(real_map != lookup_map)
4311 vm_map_unlock(real_map);
4312 vm_map_unlock_read(lookup_map);
4313 vm_map_lock(map);
1c79356b 4314
2d21ac55
A
4315 /* we unlocked, so must re-lookup */
4316 if (!vm_map_lookup_entry(map,
4317 local_start,
4318 &local_entry)) {
4319 rc = KERN_FAILURE;
4320 goto done;
4321 }
4322
4323 /*
4324 * entry could have been "simplified",
4325 * so re-clip
4326 */
4327 entry = local_entry;
4328 assert(s == local_start);
4329 vm_map_clip_start(map, entry, s);
4330 vm_map_clip_end(map, entry, end);
4331 /* re-compute "e" */
4332 e = entry->vme_end;
4333 if (e > end)
4334 e = end;
4335
4336 /* did we have a change of type? */
4337 if (!entry->is_sub_map) {
4338 last_timestamp = map->timestamp;
4339 continue;
1c79356b
A
4340 }
4341 } else {
9bccf70c 4342 local_start = entry->vme_start;
2d21ac55
A
4343 pmap = map_pmap;
4344 }
4345
4346 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4347 goto done;
4348
4349 entry->in_transition = TRUE;
4350
4351 vm_map_unlock(map);
4352 rc = vm_map_wire_nested(entry->object.sub_map,
1c79356b
A
4353 sub_start, sub_end,
4354 access_type,
2d21ac55
A
4355 user_wire, pmap, pmap_addr);
4356 vm_map_lock(map);
9bccf70c 4357
1c79356b
A
4358 /*
4359 * Find the entry again. It could have been clipped
4360 * after we unlocked the map.
4361 */
9bccf70c
A
4362 if (!vm_map_lookup_entry(map, local_start,
4363 &first_entry))
4364 panic("vm_map_wire: re-lookup failed");
4365 entry = first_entry;
1c79356b 4366
2d21ac55
A
4367 assert(local_start == s);
4368 /* re-compute "e" */
4369 e = entry->vme_end;
4370 if (e > end)
4371 e = end;
4372
1c79356b
A
4373 last_timestamp = map->timestamp;
4374 while ((entry != vm_map_to_entry(map)) &&
2d21ac55 4375 (entry->vme_start < e)) {
1c79356b
A
4376 assert(entry->in_transition);
4377 entry->in_transition = FALSE;
4378 if (entry->needs_wakeup) {
4379 entry->needs_wakeup = FALSE;
4380 need_wakeup = TRUE;
4381 }
4382 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
2d21ac55 4383 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
4384 }
4385 entry = entry->vme_next;
4386 }
4387 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 4388 goto done;
1c79356b 4389 }
2d21ac55
A
4390
4391 /* no need to relookup again */
4392 s = entry->vme_start;
1c79356b
A
4393 continue;
4394 }
4395
4396 /*
4397 * If this entry is already wired then increment
4398 * the appropriate wire reference count.
4399 */
9bccf70c 4400 if (entry->wired_count) {
1c79356b
A
4401 /*
4402 * entry is already wired down, get our reference
4403 * after clipping to our range.
4404 */
2d21ac55 4405 vm_map_clip_start(map, entry, s);
1c79356b 4406 vm_map_clip_end(map, entry, end);
1c79356b 4407
2d21ac55
A
4408 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4409 goto done;
4410
4411 /* map was not unlocked: no need to relookup */
1c79356b 4412 entry = entry->vme_next;
2d21ac55 4413 s = entry->vme_start;
1c79356b
A
4414 continue;
4415 }
4416
4417 /*
4418 * Unwired entry or wire request transmitted via submap
4419 */
4420
4421
4422 /*
4423 * Perform actions of vm_map_lookup that need the write
4424 * lock on the map: create a shadow object for a
4425 * copy-on-write region, or an object for a zero-fill
4426 * region.
4427 */
4428 size = entry->vme_end - entry->vme_start;
4429 /*
4430 * If wiring a copy-on-write page, we need to copy it now
4431 * even if we're only (currently) requesting read access.
4432 * This is aggressive, but once it's wired we can't move it.
4433 */
4434 if (entry->needs_copy) {
4435 vm_object_shadow(&entry->object.vm_object,
4436 &entry->offset, size);
4437 entry->needs_copy = FALSE;
4438 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4439 entry->object.vm_object = vm_object_allocate(size);
4440 entry->offset = (vm_object_offset_t)0;
4441 }
4442
2d21ac55 4443 vm_map_clip_start(map, entry, s);
1c79356b
A
4444 vm_map_clip_end(map, entry, end);
4445
2d21ac55 4446 /* re-compute "e" */
1c79356b 4447 e = entry->vme_end;
2d21ac55
A
4448 if (e > end)
4449 e = end;
1c79356b
A
4450
4451 /*
4452 * Check for holes and protection mismatch.
4453 * Holes: Next entry should be contiguous unless this
4454 * is the end of the region.
4455 * Protection: Access requested must be allowed, unless
4456 * wiring is by protection class
4457 */
2d21ac55
A
4458 if ((entry->vme_end < end) &&
4459 ((entry->vme_next == vm_map_to_entry(map)) ||
4460 (entry->vme_next->vme_start > entry->vme_end))) {
4461 /* found a hole */
4462 rc = KERN_INVALID_ADDRESS;
4463 goto done;
4464 }
4465 if ((entry->protection & access_type) != access_type) {
4466 /* found a protection problem */
4467 rc = KERN_PROTECTION_FAILURE;
4468 goto done;
1c79356b
A
4469 }
4470
4471 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4472
2d21ac55
A
4473 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4474 goto done;
1c79356b
A
4475
4476 entry->in_transition = TRUE;
4477
4478 /*
4479 * This entry might get split once we unlock the map.
4480 * In vm_fault_wire(), we need the current range as
4481 * defined by this entry. In order for this to work
4482 * along with a simultaneous clip operation, we make a
4483 * temporary copy of this entry and use that for the
4484 * wiring. Note that the underlying objects do not
4485 * change during a clip.
4486 */
4487 tmp_entry = *entry;
4488
4489 /*
4490 * The in_transition state guarentees that the entry
4491 * (or entries for this range, if split occured) will be
4492 * there when the map lock is acquired for the second time.
4493 */
4494 vm_map_unlock(map);
0b4e3aa0 4495
9bccf70c
A
4496 if (!user_wire && cur_thread != THREAD_NULL)
4497 interruptible_state = thread_interrupt_level(THREAD_UNINT);
91447636
A
4498 else
4499 interruptible_state = THREAD_UNINT;
9bccf70c 4500
1c79356b 4501 if(map_pmap)
9bccf70c 4502 rc = vm_fault_wire(map,
2d21ac55 4503 &tmp_entry, map_pmap, pmap_addr);
1c79356b 4504 else
9bccf70c 4505 rc = vm_fault_wire(map,
2d21ac55
A
4506 &tmp_entry, map->pmap,
4507 tmp_entry.vme_start);
0b4e3aa0
A
4508
4509 if (!user_wire && cur_thread != THREAD_NULL)
9bccf70c 4510 thread_interrupt_level(interruptible_state);
0b4e3aa0 4511
1c79356b
A
4512 vm_map_lock(map);
4513
4514 if (last_timestamp+1 != map->timestamp) {
4515 /*
4516 * Find the entry again. It could have been clipped
4517 * after we unlocked the map.
4518 */
4519 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 4520 &first_entry))
1c79356b
A
4521 panic("vm_map_wire: re-lookup failed");
4522
4523 entry = first_entry;
4524 }
4525
4526 last_timestamp = map->timestamp;
4527
4528 while ((entry != vm_map_to_entry(map)) &&
4529 (entry->vme_start < tmp_entry.vme_end)) {
4530 assert(entry->in_transition);
4531 entry->in_transition = FALSE;
4532 if (entry->needs_wakeup) {
4533 entry->needs_wakeup = FALSE;
4534 need_wakeup = TRUE;
4535 }
4536 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 4537 subtract_wire_counts(map, entry, user_wire);
1c79356b
A
4538 }
4539 entry = entry->vme_next;
4540 }
4541
4542 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
2d21ac55 4543 goto done;
1c79356b 4544 }
2d21ac55
A
4545
4546 s = entry->vme_start;
1c79356b 4547 } /* end while loop through map entries */
2d21ac55
A
4548
4549done:
4550 if (rc == KERN_SUCCESS) {
4551 /* repair any damage we may have made to the VM map */
4552 vm_map_simplify_range(map, start, end);
4553 }
4554
1c79356b
A
4555 vm_map_unlock(map);
4556
4557 /*
4558 * wake up anybody waiting on entries we wired.
4559 */
4560 if (need_wakeup)
4561 vm_map_entry_wakeup(map);
4562
2d21ac55
A
4563 if (rc != KERN_SUCCESS) {
4564 /* undo what has been wired so far */
4565 vm_map_unwire(map, start, s, user_wire);
4566 }
4567
4568 return rc;
1c79356b
A
4569
4570}
4571
4572kern_return_t
4573vm_map_wire(
4574 register vm_map_t map,
91447636
A
4575 register vm_map_offset_t start,
4576 register vm_map_offset_t end,
1c79356b
A
4577 register vm_prot_t access_type,
4578 boolean_t user_wire)
4579{
4580
4581 kern_return_t kret;
4582
1c79356b 4583 kret = vm_map_wire_nested(map, start, end, access_type,
2d21ac55 4584 user_wire, (pmap_t)NULL, 0);
1c79356b
A
4585 return kret;
4586}
4587
4588/*
4589 * vm_map_unwire:
4590 *
4591 * Sets the pageability of the specified address range in the target
4592 * as pageable. Regions specified must have been wired previously.
4593 *
4594 * The map must not be locked, but a reference must remain to the map
4595 * throughout the call.
4596 *
4597 * Kernel will panic on failures. User unwire ignores holes and
4598 * unwired and intransition entries to avoid losing memory by leaving
4599 * it unwired.
4600 */
91447636 4601static kern_return_t
1c79356b
A
4602vm_map_unwire_nested(
4603 register vm_map_t map,
91447636
A
4604 register vm_map_offset_t start,
4605 register vm_map_offset_t end,
1c79356b 4606 boolean_t user_wire,
9bccf70c 4607 pmap_t map_pmap,
91447636 4608 vm_map_offset_t pmap_addr)
1c79356b
A
4609{
4610 register vm_map_entry_t entry;
4611 struct vm_map_entry *first_entry, tmp_entry;
4612 boolean_t need_wakeup;
4613 boolean_t main_map = FALSE;
4614 unsigned int last_timestamp;
4615
4616 vm_map_lock(map);
4617 if(map_pmap == NULL)
4618 main_map = TRUE;
4619 last_timestamp = map->timestamp;
4620
4621 VM_MAP_RANGE_CHECK(map, start, end);
4622 assert(page_aligned(start));
4623 assert(page_aligned(end));
39236c6e
A
4624 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4625 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
1c79356b 4626
2d21ac55
A
4627 if (start == end) {
4628 /* We unwired what the caller asked for: zero pages */
4629 vm_map_unlock(map);
4630 return KERN_SUCCESS;
4631 }
4632
1c79356b
A
4633 if (vm_map_lookup_entry(map, start, &first_entry)) {
4634 entry = first_entry;
2d21ac55
A
4635 /*
4636 * vm_map_clip_start will be done later.
4637 * We don't want to unnest any nested sub maps here !
4638 */
1c79356b
A
4639 }
4640 else {
2d21ac55
A
4641 if (!user_wire) {
4642 panic("vm_map_unwire: start not found");
4643 }
1c79356b
A
4644 /* Start address is not in map. */
4645 vm_map_unlock(map);
4646 return(KERN_INVALID_ADDRESS);
4647 }
4648
b0d623f7
A
4649 if (entry->superpage_size) {
4650 /* superpages are always wired */
4651 vm_map_unlock(map);
4652 return KERN_INVALID_ADDRESS;
4653 }
4654
1c79356b
A
4655 need_wakeup = FALSE;
4656 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4657 if (entry->in_transition) {
4658 /*
4659 * 1)
4660 * Another thread is wiring down this entry. Note
4661 * that if it is not for the other thread we would
4662 * be unwiring an unwired entry. This is not
4663 * permitted. If we wait, we will be unwiring memory
4664 * we did not wire.
4665 *
4666 * 2)
4667 * Another thread is unwiring this entry. We did not
4668 * have a reference to it, because if we did, this
4669 * entry will not be getting unwired now.
4670 */
2d21ac55
A
4671 if (!user_wire) {
4672 /*
4673 * XXX FBDP
4674 * This could happen: there could be some
4675 * overlapping vslock/vsunlock operations
4676 * going on.
4677 * We should probably just wait and retry,
4678 * but then we have to be careful that this
4679 * entry could get "simplified" after
4680 * "in_transition" gets unset and before
4681 * we re-lookup the entry, so we would
4682 * have to re-clip the entry to avoid
4683 * re-unwiring what we have already unwired...
4684 * See vm_map_wire_nested().
4685 *
4686 * Or we could just ignore "in_transition"
4687 * here and proceed to decement the wired
4688 * count(s) on this entry. That should be fine
4689 * as long as "wired_count" doesn't drop all
4690 * the way to 0 (and we should panic if THAT
4691 * happens).
4692 */
1c79356b 4693 panic("vm_map_unwire: in_transition entry");
2d21ac55 4694 }
1c79356b
A
4695
4696 entry = entry->vme_next;
4697 continue;
4698 }
4699
2d21ac55 4700 if (entry->is_sub_map) {
91447636
A
4701 vm_map_offset_t sub_start;
4702 vm_map_offset_t sub_end;
4703 vm_map_offset_t local_end;
1c79356b 4704 pmap_t pmap;
2d21ac55 4705
1c79356b
A
4706 vm_map_clip_start(map, entry, start);
4707 vm_map_clip_end(map, entry, end);
4708
4709 sub_start = entry->offset;
4710 sub_end = entry->vme_end - entry->vme_start;
4711 sub_end += entry->offset;
4712 local_end = entry->vme_end;
4713 if(map_pmap == NULL) {
2d21ac55 4714 if(entry->use_pmap) {
1c79356b 4715 pmap = entry->object.sub_map->pmap;
9bccf70c 4716 pmap_addr = sub_start;
2d21ac55 4717 } else {
1c79356b 4718 pmap = map->pmap;
9bccf70c 4719 pmap_addr = start;
2d21ac55
A
4720 }
4721 if (entry->wired_count == 0 ||
4722 (user_wire && entry->user_wired_count == 0)) {
4723 if (!user_wire)
4724 panic("vm_map_unwire: entry is unwired");
4725 entry = entry->vme_next;
4726 continue;
4727 }
4728
4729 /*
4730 * Check for holes
4731 * Holes: Next entry should be contiguous unless
4732 * this is the end of the region.
4733 */
4734 if (((entry->vme_end < end) &&
4735 ((entry->vme_next == vm_map_to_entry(map)) ||
4736 (entry->vme_next->vme_start
4737 > entry->vme_end)))) {
4738 if (!user_wire)
4739 panic("vm_map_unwire: non-contiguous region");
1c79356b 4740/*
2d21ac55
A
4741 entry = entry->vme_next;
4742 continue;
1c79356b 4743*/
2d21ac55 4744 }
1c79356b 4745
2d21ac55 4746 subtract_wire_counts(map, entry, user_wire);
1c79356b 4747
2d21ac55
A
4748 if (entry->wired_count != 0) {
4749 entry = entry->vme_next;
4750 continue;
4751 }
1c79356b 4752
2d21ac55
A
4753 entry->in_transition = TRUE;
4754 tmp_entry = *entry;/* see comment in vm_map_wire() */
4755
4756 /*
4757 * We can unlock the map now. The in_transition state
4758 * guarantees existance of the entry.
4759 */
4760 vm_map_unlock(map);
4761 vm_map_unwire_nested(entry->object.sub_map,
4762 sub_start, sub_end, user_wire, pmap, pmap_addr);
4763 vm_map_lock(map);
1c79356b 4764
2d21ac55
A
4765 if (last_timestamp+1 != map->timestamp) {
4766 /*
4767 * Find the entry again. It could have been
4768 * clipped or deleted after we unlocked the map.
4769 */
4770 if (!vm_map_lookup_entry(map,
4771 tmp_entry.vme_start,
4772 &first_entry)) {
4773 if (!user_wire)
4774 panic("vm_map_unwire: re-lookup failed");
4775 entry = first_entry->vme_next;
4776 } else
4777 entry = first_entry;
4778 }
4779 last_timestamp = map->timestamp;
1c79356b 4780
1c79356b 4781 /*
2d21ac55
A
4782 * clear transition bit for all constituent entries
4783 * that were in the original entry (saved in
4784 * tmp_entry). Also check for waiters.
4785 */
4786 while ((entry != vm_map_to_entry(map)) &&
4787 (entry->vme_start < tmp_entry.vme_end)) {
4788 assert(entry->in_transition);
4789 entry->in_transition = FALSE;
4790 if (entry->needs_wakeup) {
4791 entry->needs_wakeup = FALSE;
4792 need_wakeup = TRUE;
4793 }
4794 entry = entry->vme_next;
1c79356b 4795 }
2d21ac55 4796 continue;
1c79356b 4797 } else {
2d21ac55
A
4798 vm_map_unlock(map);
4799 vm_map_unwire_nested(entry->object.sub_map,
4800 sub_start, sub_end, user_wire, map_pmap,
4801 pmap_addr);
4802 vm_map_lock(map);
1c79356b 4803
2d21ac55
A
4804 if (last_timestamp+1 != map->timestamp) {
4805 /*
4806 * Find the entry again. It could have been
4807 * clipped or deleted after we unlocked the map.
4808 */
4809 if (!vm_map_lookup_entry(map,
4810 tmp_entry.vme_start,
4811 &first_entry)) {
4812 if (!user_wire)
4813 panic("vm_map_unwire: re-lookup failed");
4814 entry = first_entry->vme_next;
4815 } else
4816 entry = first_entry;
4817 }
4818 last_timestamp = map->timestamp;
1c79356b
A
4819 }
4820 }
4821
4822
9bccf70c 4823 if ((entry->wired_count == 0) ||
2d21ac55 4824 (user_wire && entry->user_wired_count == 0)) {
1c79356b
A
4825 if (!user_wire)
4826 panic("vm_map_unwire: entry is unwired");
4827
4828 entry = entry->vme_next;
4829 continue;
4830 }
2d21ac55 4831
1c79356b 4832 assert(entry->wired_count > 0 &&
2d21ac55 4833 (!user_wire || entry->user_wired_count > 0));
1c79356b
A
4834
4835 vm_map_clip_start(map, entry, start);
4836 vm_map_clip_end(map, entry, end);
4837
4838 /*
4839 * Check for holes
4840 * Holes: Next entry should be contiguous unless
4841 * this is the end of the region.
4842 */
4843 if (((entry->vme_end < end) &&
2d21ac55
A
4844 ((entry->vme_next == vm_map_to_entry(map)) ||
4845 (entry->vme_next->vme_start > entry->vme_end)))) {
1c79356b
A
4846
4847 if (!user_wire)
4848 panic("vm_map_unwire: non-contiguous region");
4849 entry = entry->vme_next;
4850 continue;
4851 }
4852
2d21ac55 4853 subtract_wire_counts(map, entry, user_wire);
1c79356b 4854
9bccf70c 4855 if (entry->wired_count != 0) {
1c79356b
A
4856 entry = entry->vme_next;
4857 continue;
1c79356b
A
4858 }
4859
b0d623f7
A
4860 if(entry->zero_wired_pages) {
4861 entry->zero_wired_pages = FALSE;
4862 }
4863
1c79356b
A
4864 entry->in_transition = TRUE;
4865 tmp_entry = *entry; /* see comment in vm_map_wire() */
4866
4867 /*
4868 * We can unlock the map now. The in_transition state
4869 * guarantees existance of the entry.
4870 */
4871 vm_map_unlock(map);
4872 if(map_pmap) {
9bccf70c 4873 vm_fault_unwire(map,
2d21ac55 4874 &tmp_entry, FALSE, map_pmap, pmap_addr);
1c79356b 4875 } else {
9bccf70c 4876 vm_fault_unwire(map,
2d21ac55
A
4877 &tmp_entry, FALSE, map->pmap,
4878 tmp_entry.vme_start);
1c79356b
A
4879 }
4880 vm_map_lock(map);
4881
4882 if (last_timestamp+1 != map->timestamp) {
4883 /*
4884 * Find the entry again. It could have been clipped
4885 * or deleted after we unlocked the map.
4886 */
4887 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
2d21ac55 4888 &first_entry)) {
1c79356b 4889 if (!user_wire)
2d21ac55 4890 panic("vm_map_unwire: re-lookup failed");
1c79356b
A
4891 entry = first_entry->vme_next;
4892 } else
4893 entry = first_entry;
4894 }
4895 last_timestamp = map->timestamp;
4896
4897 /*
4898 * clear transition bit for all constituent entries that
4899 * were in the original entry (saved in tmp_entry). Also
4900 * check for waiters.
4901 */
4902 while ((entry != vm_map_to_entry(map)) &&
4903 (entry->vme_start < tmp_entry.vme_end)) {
4904 assert(entry->in_transition);
4905 entry->in_transition = FALSE;
4906 if (entry->needs_wakeup) {
4907 entry->needs_wakeup = FALSE;
4908 need_wakeup = TRUE;
4909 }
4910 entry = entry->vme_next;
4911 }
4912 }
91447636
A
4913
4914 /*
4915 * We might have fragmented the address space when we wired this
4916 * range of addresses. Attempt to re-coalesce these VM map entries
4917 * with their neighbors now that they're no longer wired.
4918 * Under some circumstances, address space fragmentation can
4919 * prevent VM object shadow chain collapsing, which can cause
4920 * swap space leaks.
4921 */
4922 vm_map_simplify_range(map, start, end);
4923
1c79356b
A
4924 vm_map_unlock(map);
4925 /*
4926 * wake up anybody waiting on entries that we have unwired.
4927 */
4928 if (need_wakeup)
4929 vm_map_entry_wakeup(map);
4930 return(KERN_SUCCESS);
4931
4932}
4933
4934kern_return_t
4935vm_map_unwire(
4936 register vm_map_t map,
91447636
A
4937 register vm_map_offset_t start,
4938 register vm_map_offset_t end,
1c79356b
A
4939 boolean_t user_wire)
4940{
9bccf70c 4941 return vm_map_unwire_nested(map, start, end,
2d21ac55 4942 user_wire, (pmap_t)NULL, 0);
1c79356b
A
4943}
4944
4945
4946/*
4947 * vm_map_entry_delete: [ internal use only ]
4948 *
4949 * Deallocate the given entry from the target map.
4950 */
91447636 4951static void
1c79356b
A
4952vm_map_entry_delete(
4953 register vm_map_t map,
4954 register vm_map_entry_t entry)
4955{
91447636 4956 register vm_map_offset_t s, e;
1c79356b
A
4957 register vm_object_t object;
4958 register vm_map_t submap;
1c79356b
A
4959
4960 s = entry->vme_start;
4961 e = entry->vme_end;
4962 assert(page_aligned(s));
4963 assert(page_aligned(e));
39236c6e
A
4964 if (entry->map_aligned == TRUE) {
4965 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
4966 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
4967 }
1c79356b
A
4968 assert(entry->wired_count == 0);
4969 assert(entry->user_wired_count == 0);
b0d623f7 4970 assert(!entry->permanent);
1c79356b
A
4971
4972 if (entry->is_sub_map) {
4973 object = NULL;
4974 submap = entry->object.sub_map;
4975 } else {
4976 submap = NULL;
4977 object = entry->object.vm_object;
4978 }
4979
6d2010ae 4980 vm_map_store_entry_unlink(map, entry);
1c79356b
A
4981 map->size -= e - s;
4982
4983 vm_map_entry_dispose(map, entry);
4984
4985 vm_map_unlock(map);
4986 /*
4987 * Deallocate the object only after removing all
4988 * pmap entries pointing to its pages.
4989 */
4990 if (submap)
4991 vm_map_deallocate(submap);
4992 else
2d21ac55 4993 vm_object_deallocate(object);
1c79356b
A
4994
4995}
4996
4997void
4998vm_map_submap_pmap_clean(
4999 vm_map_t map,
91447636
A
5000 vm_map_offset_t start,
5001 vm_map_offset_t end,
1c79356b 5002 vm_map_t sub_map,
91447636 5003 vm_map_offset_t offset)
1c79356b 5004{
91447636
A
5005 vm_map_offset_t submap_start;
5006 vm_map_offset_t submap_end;
5007 vm_map_size_t remove_size;
1c79356b
A
5008 vm_map_entry_t entry;
5009
5010 submap_end = offset + (end - start);
5011 submap_start = offset;
b7266188
A
5012
5013 vm_map_lock_read(sub_map);
1c79356b 5014 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
2d21ac55 5015
1c79356b
A
5016 remove_size = (entry->vme_end - entry->vme_start);
5017 if(offset > entry->vme_start)
5018 remove_size -= offset - entry->vme_start;
2d21ac55 5019
1c79356b
A
5020
5021 if(submap_end < entry->vme_end) {
5022 remove_size -=
5023 entry->vme_end - submap_end;
5024 }
5025 if(entry->is_sub_map) {
5026 vm_map_submap_pmap_clean(
5027 sub_map,
5028 start,
5029 start + remove_size,
5030 entry->object.sub_map,
5031 entry->offset);
5032 } else {
9bccf70c 5033
316670eb 5034 if((map->mapped_in_other_pmaps) && (map->ref_count)
2d21ac55 5035 && (entry->object.vm_object != NULL)) {
9bccf70c
A
5036 vm_object_pmap_protect(
5037 entry->object.vm_object,
6d2010ae 5038 entry->offset+(offset-entry->vme_start),
9bccf70c
A
5039 remove_size,
5040 PMAP_NULL,
5041 entry->vme_start,
5042 VM_PROT_NONE);
5043 } else {
5044 pmap_remove(map->pmap,
2d21ac55
A
5045 (addr64_t)start,
5046 (addr64_t)(start + remove_size));
9bccf70c 5047 }
1c79356b
A
5048 }
5049 }
5050
5051 entry = entry->vme_next;
2d21ac55 5052
1c79356b 5053 while((entry != vm_map_to_entry(sub_map))
2d21ac55 5054 && (entry->vme_start < submap_end)) {
1c79356b
A
5055 remove_size = (entry->vme_end - entry->vme_start);
5056 if(submap_end < entry->vme_end) {
5057 remove_size -= entry->vme_end - submap_end;
5058 }
5059 if(entry->is_sub_map) {
5060 vm_map_submap_pmap_clean(
5061 sub_map,
5062 (start + entry->vme_start) - offset,
5063 ((start + entry->vme_start) - offset) + remove_size,
5064 entry->object.sub_map,
5065 entry->offset);
5066 } else {
316670eb 5067 if((map->mapped_in_other_pmaps) && (map->ref_count)
2d21ac55 5068 && (entry->object.vm_object != NULL)) {
9bccf70c
A
5069 vm_object_pmap_protect(
5070 entry->object.vm_object,
5071 entry->offset,
5072 remove_size,
5073 PMAP_NULL,
5074 entry->vme_start,
5075 VM_PROT_NONE);
5076 } else {
5077 pmap_remove(map->pmap,
2d21ac55
A
5078 (addr64_t)((start + entry->vme_start)
5079 - offset),
5080 (addr64_t)(((start + entry->vme_start)
5081 - offset) + remove_size));
9bccf70c 5082 }
1c79356b
A
5083 }
5084 entry = entry->vme_next;
b7266188
A
5085 }
5086 vm_map_unlock_read(sub_map);
1c79356b
A
5087 return;
5088}
5089
5090/*
5091 * vm_map_delete: [ internal use only ]
5092 *
5093 * Deallocates the given address range from the target map.
5094 * Removes all user wirings. Unwires one kernel wiring if
5095 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5096 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5097 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5098 *
5099 * This routine is called with map locked and leaves map locked.
5100 */
91447636 5101static kern_return_t
1c79356b 5102vm_map_delete(
91447636
A
5103 vm_map_t map,
5104 vm_map_offset_t start,
5105 vm_map_offset_t end,
5106 int flags,
5107 vm_map_t zap_map)
1c79356b
A
5108{
5109 vm_map_entry_t entry, next;
5110 struct vm_map_entry *first_entry, tmp_entry;
2d21ac55 5111 register vm_map_offset_t s;
1c79356b
A
5112 register vm_object_t object;
5113 boolean_t need_wakeup;
5114 unsigned int last_timestamp = ~0; /* unlikely value */
5115 int interruptible;
1c79356b
A
5116
5117 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
2d21ac55 5118 THREAD_ABORTSAFE : THREAD_UNINT;
1c79356b
A
5119
5120 /*
5121 * All our DMA I/O operations in IOKit are currently done by
5122 * wiring through the map entries of the task requesting the I/O.
5123 * Because of this, we must always wait for kernel wirings
5124 * to go away on the entries before deleting them.
5125 *
5126 * Any caller who wants to actually remove a kernel wiring
5127 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5128 * properly remove one wiring instead of blasting through
5129 * them all.
5130 */
5131 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5132
b0d623f7
A
5133 while(1) {
5134 /*
5135 * Find the start of the region, and clip it
5136 */
5137 if (vm_map_lookup_entry(map, start, &first_entry)) {
5138 entry = first_entry;
5139 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
5140 start = SUPERPAGE_ROUND_DOWN(start);
5141 continue;
5142 }
5143 if (start == entry->vme_start) {
5144 /*
5145 * No need to clip. We don't want to cause
5146 * any unnecessary unnesting in this case...
5147 */
5148 } else {
5149 vm_map_clip_start(map, entry, start);
5150 }
5151
2d21ac55 5152 /*
b0d623f7
A
5153 * Fix the lookup hint now, rather than each
5154 * time through the loop.
2d21ac55 5155 */
b0d623f7 5156 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
2d21ac55 5157 } else {
b0d623f7 5158 entry = first_entry->vme_next;
2d21ac55 5159 }
b0d623f7 5160 break;
1c79356b 5161 }
b0d623f7
A
5162 if (entry->superpage_size)
5163 end = SUPERPAGE_ROUND_UP(end);
1c79356b
A
5164
5165 need_wakeup = FALSE;
5166 /*
5167 * Step through all entries in this region
5168 */
2d21ac55
A
5169 s = entry->vme_start;
5170 while ((entry != vm_map_to_entry(map)) && (s < end)) {
5171 /*
5172 * At this point, we have deleted all the memory entries
5173 * between "start" and "s". We still need to delete
5174 * all memory entries between "s" and "end".
5175 * While we were blocked and the map was unlocked, some
5176 * new memory entries could have been re-allocated between
5177 * "start" and "s" and we don't want to mess with those.
5178 * Some of those entries could even have been re-assembled
5179 * with an entry after "s" (in vm_map_simplify_entry()), so
5180 * we may have to vm_map_clip_start() again.
5181 */
1c79356b 5182
2d21ac55
A
5183 if (entry->vme_start >= s) {
5184 /*
5185 * This entry starts on or after "s"
5186 * so no need to clip its start.
5187 */
5188 } else {
5189 /*
5190 * This entry has been re-assembled by a
5191 * vm_map_simplify_entry(). We need to
5192 * re-clip its start.
5193 */
5194 vm_map_clip_start(map, entry, s);
5195 }
5196 if (entry->vme_end <= end) {
5197 /*
5198 * This entry is going away completely, so no need
5199 * to clip and possibly cause an unnecessary unnesting.
5200 */
5201 } else {
5202 vm_map_clip_end(map, entry, end);
5203 }
b0d623f7
A
5204
5205 if (entry->permanent) {
5206 panic("attempt to remove permanent VM map entry "
5207 "%p [0x%llx:0x%llx]\n",
5208 entry, (uint64_t) s, (uint64_t) end);
5209 }
5210
5211
1c79356b 5212 if (entry->in_transition) {
9bccf70c
A
5213 wait_result_t wait_result;
5214
1c79356b
A
5215 /*
5216 * Another thread is wiring/unwiring this entry.
5217 * Let the other thread know we are waiting.
5218 */
2d21ac55 5219 assert(s == entry->vme_start);
1c79356b
A
5220 entry->needs_wakeup = TRUE;
5221
5222 /*
5223 * wake up anybody waiting on entries that we have
5224 * already unwired/deleted.
5225 */
5226 if (need_wakeup) {
5227 vm_map_entry_wakeup(map);
5228 need_wakeup = FALSE;
5229 }
5230
9bccf70c 5231 wait_result = vm_map_entry_wait(map, interruptible);
1c79356b
A
5232
5233 if (interruptible &&
9bccf70c 5234 wait_result == THREAD_INTERRUPTED) {
1c79356b
A
5235 /*
5236 * We do not clear the needs_wakeup flag,
5237 * since we cannot tell if we were the only one.
5238 */
9bccf70c 5239 vm_map_unlock(map);
1c79356b 5240 return KERN_ABORTED;
9bccf70c 5241 }
1c79356b
A
5242
5243 /*
5244 * The entry could have been clipped or it
5245 * may not exist anymore. Look it up again.
5246 */
5247 if (!vm_map_lookup_entry(map, s, &first_entry)) {
5248 assert((map != kernel_map) &&
5249 (!entry->is_sub_map));
5250 /*
5251 * User: use the next entry
5252 */
5253 entry = first_entry->vme_next;
2d21ac55 5254 s = entry->vme_start;
1c79356b
A
5255 } else {
5256 entry = first_entry;
0c530ab8 5257 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 5258 }
9bccf70c 5259 last_timestamp = map->timestamp;
1c79356b
A
5260 continue;
5261 } /* end in_transition */
5262
5263 if (entry->wired_count) {
2d21ac55
A
5264 boolean_t user_wire;
5265
5266 user_wire = entry->user_wired_count > 0;
5267
1c79356b 5268 /*
b0d623f7 5269 * Remove a kernel wiring if requested
1c79356b 5270 */
b0d623f7 5271 if (flags & VM_MAP_REMOVE_KUNWIRE) {
1c79356b 5272 entry->wired_count--;
b0d623f7
A
5273 }
5274
5275 /*
5276 * Remove all user wirings for proper accounting
5277 */
5278 if (entry->user_wired_count > 0) {
5279 while (entry->user_wired_count)
5280 subtract_wire_counts(map, entry, user_wire);
5281 }
1c79356b
A
5282
5283 if (entry->wired_count != 0) {
2d21ac55 5284 assert(map != kernel_map);
1c79356b
A
5285 /*
5286 * Cannot continue. Typical case is when
5287 * a user thread has physical io pending on
5288 * on this page. Either wait for the
5289 * kernel wiring to go away or return an
5290 * error.
5291 */
5292 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
9bccf70c 5293 wait_result_t wait_result;
1c79356b 5294
2d21ac55 5295 assert(s == entry->vme_start);
1c79356b 5296 entry->needs_wakeup = TRUE;
9bccf70c 5297 wait_result = vm_map_entry_wait(map,
2d21ac55 5298 interruptible);
1c79356b
A
5299
5300 if (interruptible &&
2d21ac55 5301 wait_result == THREAD_INTERRUPTED) {
1c79356b 5302 /*
2d21ac55 5303 * We do not clear the
1c79356b
A
5304 * needs_wakeup flag, since we
5305 * cannot tell if we were the
5306 * only one.
2d21ac55 5307 */
9bccf70c 5308 vm_map_unlock(map);
1c79356b 5309 return KERN_ABORTED;
9bccf70c 5310 }
1c79356b
A
5311
5312 /*
2d21ac55 5313 * The entry could have been clipped or
1c79356b
A
5314 * it may not exist anymore. Look it
5315 * up again.
2d21ac55 5316 */
1c79356b 5317 if (!vm_map_lookup_entry(map, s,
2d21ac55
A
5318 &first_entry)) {
5319 assert(map != kernel_map);
1c79356b 5320 /*
2d21ac55
A
5321 * User: use the next entry
5322 */
1c79356b 5323 entry = first_entry->vme_next;
2d21ac55 5324 s = entry->vme_start;
1c79356b
A
5325 } else {
5326 entry = first_entry;
0c530ab8 5327 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b 5328 }
9bccf70c 5329 last_timestamp = map->timestamp;
1c79356b
A
5330 continue;
5331 }
5332 else {
5333 return KERN_FAILURE;
5334 }
5335 }
5336
5337 entry->in_transition = TRUE;
5338 /*
5339 * copy current entry. see comment in vm_map_wire()
5340 */
5341 tmp_entry = *entry;
2d21ac55 5342 assert(s == entry->vme_start);
1c79356b
A
5343
5344 /*
5345 * We can unlock the map now. The in_transition
5346 * state guarentees existance of the entry.
5347 */
5348 vm_map_unlock(map);
2d21ac55
A
5349
5350 if (tmp_entry.is_sub_map) {
5351 vm_map_t sub_map;
5352 vm_map_offset_t sub_start, sub_end;
5353 pmap_t pmap;
5354 vm_map_offset_t pmap_addr;
5355
5356
5357 sub_map = tmp_entry.object.sub_map;
5358 sub_start = tmp_entry.offset;
5359 sub_end = sub_start + (tmp_entry.vme_end -
5360 tmp_entry.vme_start);
5361 if (tmp_entry.use_pmap) {
5362 pmap = sub_map->pmap;
5363 pmap_addr = tmp_entry.vme_start;
5364 } else {
5365 pmap = map->pmap;
5366 pmap_addr = tmp_entry.vme_start;
5367 }
5368 (void) vm_map_unwire_nested(sub_map,
5369 sub_start, sub_end,
5370 user_wire,
5371 pmap, pmap_addr);
5372 } else {
5373
39236c6e
A
5374 if (tmp_entry.object.vm_object == kernel_object) {
5375 pmap_protect_options(
5376 map->pmap,
5377 tmp_entry.vme_start,
5378 tmp_entry.vme_end,
5379 VM_PROT_NONE,
5380 PMAP_OPTIONS_REMOVE,
5381 NULL);
5382 }
2d21ac55
A
5383 vm_fault_unwire(map, &tmp_entry,
5384 tmp_entry.object.vm_object == kernel_object,
5385 map->pmap, tmp_entry.vme_start);
5386 }
5387
1c79356b
A
5388 vm_map_lock(map);
5389
5390 if (last_timestamp+1 != map->timestamp) {
5391 /*
5392 * Find the entry again. It could have
5393 * been clipped after we unlocked the map.
5394 */
5395 if (!vm_map_lookup_entry(map, s, &first_entry)){
5396 assert((map != kernel_map) &&
2d21ac55 5397 (!entry->is_sub_map));
1c79356b 5398 first_entry = first_entry->vme_next;
2d21ac55 5399 s = first_entry->vme_start;
1c79356b 5400 } else {
0c530ab8 5401 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
5402 }
5403 } else {
0c530ab8 5404 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
5405 first_entry = entry;
5406 }
5407
5408 last_timestamp = map->timestamp;
5409
5410 entry = first_entry;
5411 while ((entry != vm_map_to_entry(map)) &&
5412 (entry->vme_start < tmp_entry.vme_end)) {
5413 assert(entry->in_transition);
5414 entry->in_transition = FALSE;
5415 if (entry->needs_wakeup) {
5416 entry->needs_wakeup = FALSE;
5417 need_wakeup = TRUE;
5418 }
5419 entry = entry->vme_next;
5420 }
5421 /*
5422 * We have unwired the entry(s). Go back and
5423 * delete them.
5424 */
5425 entry = first_entry;
5426 continue;
5427 }
5428
5429 /* entry is unwired */
5430 assert(entry->wired_count == 0);
5431 assert(entry->user_wired_count == 0);
5432
2d21ac55
A
5433 assert(s == entry->vme_start);
5434
5435 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5436 /*
5437 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5438 * vm_map_delete(), some map entries might have been
5439 * transferred to a "zap_map", which doesn't have a
5440 * pmap. The original pmap has already been flushed
5441 * in the vm_map_delete() call targeting the original
5442 * map, but when we get to destroying the "zap_map",
5443 * we don't have any pmap to flush, so let's just skip
5444 * all this.
5445 */
5446 } else if (entry->is_sub_map) {
5447 if (entry->use_pmap) {
0c530ab8
A
5448#ifndef NO_NESTED_PMAP
5449 pmap_unnest(map->pmap,
2d21ac55
A
5450 (addr64_t)entry->vme_start,
5451 entry->vme_end - entry->vme_start);
0c530ab8 5452#endif /* NO_NESTED_PMAP */
316670eb 5453 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
9bccf70c
A
5454 /* clean up parent map/maps */
5455 vm_map_submap_pmap_clean(
5456 map, entry->vme_start,
5457 entry->vme_end,
5458 entry->object.sub_map,
5459 entry->offset);
5460 }
2d21ac55 5461 } else {
1c79356b
A
5462 vm_map_submap_pmap_clean(
5463 map, entry->vme_start, entry->vme_end,
5464 entry->object.sub_map,
5465 entry->offset);
2d21ac55 5466 }
39236c6e
A
5467 } else if (entry->object.vm_object != kernel_object &&
5468 entry->object.vm_object != compressor_object) {
2d21ac55 5469 object = entry->object.vm_object;
39236c6e
A
5470 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5471 vm_object_pmap_protect_options(
55e303ae
A
5472 object, entry->offset,
5473 entry->vme_end - entry->vme_start,
5474 PMAP_NULL,
5475 entry->vme_start,
39236c6e
A
5476 VM_PROT_NONE,
5477 PMAP_OPTIONS_REMOVE);
5478 } else if ((entry->object.vm_object !=
5479 VM_OBJECT_NULL) ||
5480 (map->pmap == kernel_pmap)) {
5481 /* Remove translations associated
5482 * with this range unless the entry
5483 * does not have an object, or
5484 * it's the kernel map or a descendant
5485 * since the platform could potentially
5486 * create "backdoor" mappings invisible
5487 * to the VM. It is expected that
5488 * objectless, non-kernel ranges
5489 * do not have such VM invisible
5490 * translations.
5491 */
5492 pmap_remove_options(map->pmap,
5493 (addr64_t)entry->vme_start,
5494 (addr64_t)entry->vme_end,
5495 PMAP_OPTIONS_REMOVE);
1c79356b
A
5496 }
5497 }
5498
91447636
A
5499 /*
5500 * All pmap mappings for this map entry must have been
5501 * cleared by now.
5502 */
5503 assert(vm_map_pmap_is_empty(map,
5504 entry->vme_start,
5505 entry->vme_end));
5506
1c79356b
A
5507 next = entry->vme_next;
5508 s = next->vme_start;
5509 last_timestamp = map->timestamp;
91447636
A
5510
5511 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5512 zap_map != VM_MAP_NULL) {
2d21ac55 5513 vm_map_size_t entry_size;
91447636
A
5514 /*
5515 * The caller wants to save the affected VM map entries
5516 * into the "zap_map". The caller will take care of
5517 * these entries.
5518 */
5519 /* unlink the entry from "map" ... */
6d2010ae 5520 vm_map_store_entry_unlink(map, entry);
91447636 5521 /* ... and add it to the end of the "zap_map" */
6d2010ae 5522 vm_map_store_entry_link(zap_map,
91447636
A
5523 vm_map_last_entry(zap_map),
5524 entry);
2d21ac55
A
5525 entry_size = entry->vme_end - entry->vme_start;
5526 map->size -= entry_size;
5527 zap_map->size += entry_size;
5528 /* we didn't unlock the map, so no timestamp increase */
5529 last_timestamp--;
91447636
A
5530 } else {
5531 vm_map_entry_delete(map, entry);
5532 /* vm_map_entry_delete unlocks the map */
5533 vm_map_lock(map);
5534 }
5535
1c79356b
A
5536 entry = next;
5537
5538 if(entry == vm_map_to_entry(map)) {
5539 break;
5540 }
5541 if (last_timestamp+1 != map->timestamp) {
5542 /*
5543 * we are responsible for deleting everything
5544 * from the give space, if someone has interfered
5545 * we pick up where we left off, back fills should
5546 * be all right for anyone except map_delete and
5547 * we have to assume that the task has been fully
5548 * disabled before we get here
5549 */
5550 if (!vm_map_lookup_entry(map, s, &entry)){
5551 entry = entry->vme_next;
2d21ac55 5552 s = entry->vme_start;
1c79356b 5553 } else {
2d21ac55 5554 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
1c79356b
A
5555 }
5556 /*
5557 * others can not only allocate behind us, we can
5558 * also see coalesce while we don't have the map lock
5559 */
5560 if(entry == vm_map_to_entry(map)) {
5561 break;
5562 }
1c79356b
A
5563 }
5564 last_timestamp = map->timestamp;
5565 }
5566
5567 if (map->wait_for_space)
5568 thread_wakeup((event_t) map);
5569 /*
5570 * wake up anybody waiting on entries that we have already deleted.
5571 */
5572 if (need_wakeup)
5573 vm_map_entry_wakeup(map);
5574
5575 return KERN_SUCCESS;
5576}
5577
5578/*
5579 * vm_map_remove:
5580 *
5581 * Remove the given address range from the target map.
5582 * This is the exported form of vm_map_delete.
5583 */
5584kern_return_t
5585vm_map_remove(
5586 register vm_map_t map,
91447636
A
5587 register vm_map_offset_t start,
5588 register vm_map_offset_t end,
1c79356b
A
5589 register boolean_t flags)
5590{
5591 register kern_return_t result;
9bccf70c 5592
1c79356b
A
5593 vm_map_lock(map);
5594 VM_MAP_RANGE_CHECK(map, start, end);
39236c6e
A
5595 /*
5596 * For the zone_map, the kernel controls the allocation/freeing of memory.
5597 * Any free to the zone_map should be within the bounds of the map and
5598 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
5599 * free to the zone_map into a no-op, there is a problem and we should
5600 * panic.
5601 */
5602 if ((map == zone_map) && (start == end))
5603 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
91447636 5604 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
1c79356b 5605 vm_map_unlock(map);
91447636 5606
1c79356b
A
5607 return(result);
5608}
5609
5610
1c79356b
A
5611/*
5612 * Routine: vm_map_copy_discard
5613 *
5614 * Description:
5615 * Dispose of a map copy object (returned by
5616 * vm_map_copyin).
5617 */
5618void
5619vm_map_copy_discard(
5620 vm_map_copy_t copy)
5621{
1c79356b
A
5622 if (copy == VM_MAP_COPY_NULL)
5623 return;
5624
5625 switch (copy->type) {
5626 case VM_MAP_COPY_ENTRY_LIST:
5627 while (vm_map_copy_first_entry(copy) !=
2d21ac55 5628 vm_map_copy_to_entry(copy)) {
1c79356b
A
5629 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
5630
5631 vm_map_copy_entry_unlink(copy, entry);
39236c6e
A
5632 if (entry->is_sub_map) {
5633 vm_map_deallocate(entry->object.sub_map);
5634 } else {
5635 vm_object_deallocate(entry->object.vm_object);
5636 }
1c79356b
A
5637 vm_map_copy_entry_dispose(copy, entry);
5638 }
5639 break;
5640 case VM_MAP_COPY_OBJECT:
5641 vm_object_deallocate(copy->cpy_object);
5642 break;
1c79356b
A
5643 case VM_MAP_COPY_KERNEL_BUFFER:
5644
5645 /*
5646 * The vm_map_copy_t and possibly the data buffer were
5647 * allocated by a single call to kalloc(), i.e. the
5648 * vm_map_copy_t was not allocated out of the zone.
5649 */
91447636 5650 kfree(copy, copy->cpy_kalloc_size);
1c79356b
A
5651 return;
5652 }
91447636 5653 zfree(vm_map_copy_zone, copy);
1c79356b
A
5654}
5655
5656/*
5657 * Routine: vm_map_copy_copy
5658 *
5659 * Description:
5660 * Move the information in a map copy object to
5661 * a new map copy object, leaving the old one
5662 * empty.
5663 *
5664 * This is used by kernel routines that need
5665 * to look at out-of-line data (in copyin form)
5666 * before deciding whether to return SUCCESS.
5667 * If the routine returns FAILURE, the original
5668 * copy object will be deallocated; therefore,
5669 * these routines must make a copy of the copy
5670 * object and leave the original empty so that
5671 * deallocation will not fail.
5672 */
5673vm_map_copy_t
5674vm_map_copy_copy(
5675 vm_map_copy_t copy)
5676{
5677 vm_map_copy_t new_copy;
5678
5679 if (copy == VM_MAP_COPY_NULL)
5680 return VM_MAP_COPY_NULL;
5681
5682 /*
5683 * Allocate a new copy object, and copy the information
5684 * from the old one into it.
5685 */
5686
5687 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5688 *new_copy = *copy;
5689
5690 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5691 /*
5692 * The links in the entry chain must be
5693 * changed to point to the new copy object.
5694 */
5695 vm_map_copy_first_entry(copy)->vme_prev
5696 = vm_map_copy_to_entry(new_copy);
5697 vm_map_copy_last_entry(copy)->vme_next
5698 = vm_map_copy_to_entry(new_copy);
5699 }
5700
5701 /*
5702 * Change the old copy object into one that contains
5703 * nothing to be deallocated.
5704 */
5705 copy->type = VM_MAP_COPY_OBJECT;
5706 copy->cpy_object = VM_OBJECT_NULL;
5707
5708 /*
5709 * Return the new object.
5710 */
5711 return new_copy;
5712}
5713
91447636 5714static kern_return_t
1c79356b
A
5715vm_map_overwrite_submap_recurse(
5716 vm_map_t dst_map,
91447636
A
5717 vm_map_offset_t dst_addr,
5718 vm_map_size_t dst_size)
1c79356b 5719{
91447636 5720 vm_map_offset_t dst_end;
1c79356b
A
5721 vm_map_entry_t tmp_entry;
5722 vm_map_entry_t entry;
5723 kern_return_t result;
5724 boolean_t encountered_sub_map = FALSE;
5725
5726
5727
5728 /*
5729 * Verify that the destination is all writeable
5730 * initially. We have to trunc the destination
5731 * address and round the copy size or we'll end up
5732 * splitting entries in strange ways.
5733 */
5734
39236c6e
A
5735 dst_end = vm_map_round_page(dst_addr + dst_size,
5736 VM_MAP_PAGE_MASK(dst_map));
9bccf70c 5737 vm_map_lock(dst_map);
1c79356b
A
5738
5739start_pass_1:
1c79356b
A
5740 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5741 vm_map_unlock(dst_map);
5742 return(KERN_INVALID_ADDRESS);
5743 }
5744
39236c6e
A
5745 vm_map_clip_start(dst_map,
5746 tmp_entry,
5747 vm_map_trunc_page(dst_addr,
5748 VM_MAP_PAGE_MASK(dst_map)));
2d21ac55 5749 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
1c79356b
A
5750
5751 for (entry = tmp_entry;;) {
5752 vm_map_entry_t next;
5753
5754 next = entry->vme_next;
5755 while(entry->is_sub_map) {
91447636
A
5756 vm_map_offset_t sub_start;
5757 vm_map_offset_t sub_end;
5758 vm_map_offset_t local_end;
1c79356b
A
5759
5760 if (entry->in_transition) {
2d21ac55
A
5761 /*
5762 * Say that we are waiting, and wait for entry.
5763 */
1c79356b
A
5764 entry->needs_wakeup = TRUE;
5765 vm_map_entry_wait(dst_map, THREAD_UNINT);
5766
5767 goto start_pass_1;
5768 }
5769
5770 encountered_sub_map = TRUE;
5771 sub_start = entry->offset;
5772
5773 if(entry->vme_end < dst_end)
5774 sub_end = entry->vme_end;
5775 else
5776 sub_end = dst_end;
5777 sub_end -= entry->vme_start;
5778 sub_end += entry->offset;
5779 local_end = entry->vme_end;
5780 vm_map_unlock(dst_map);
5781
5782 result = vm_map_overwrite_submap_recurse(
2d21ac55
A
5783 entry->object.sub_map,
5784 sub_start,
5785 sub_end - sub_start);
1c79356b
A
5786
5787 if(result != KERN_SUCCESS)
5788 return result;
5789 if (dst_end <= entry->vme_end)
5790 return KERN_SUCCESS;
5791 vm_map_lock(dst_map);
5792 if(!vm_map_lookup_entry(dst_map, local_end,
5793 &tmp_entry)) {
5794 vm_map_unlock(dst_map);
5795 return(KERN_INVALID_ADDRESS);
5796 }
5797 entry = tmp_entry;
5798 next = entry->vme_next;
5799 }
5800
5801 if ( ! (entry->protection & VM_PROT_WRITE)) {
5802 vm_map_unlock(dst_map);
5803 return(KERN_PROTECTION_FAILURE);
5804 }
5805
5806 /*
5807 * If the entry is in transition, we must wait
5808 * for it to exit that state. Anything could happen
5809 * when we unlock the map, so start over.
5810 */
5811 if (entry->in_transition) {
5812
5813 /*
5814 * Say that we are waiting, and wait for entry.
5815 */
5816 entry->needs_wakeup = TRUE;
5817 vm_map_entry_wait(dst_map, THREAD_UNINT);
5818
5819 goto start_pass_1;
5820 }
5821
5822/*
5823 * our range is contained completely within this map entry
5824 */
5825 if (dst_end <= entry->vme_end) {
5826 vm_map_unlock(dst_map);
5827 return KERN_SUCCESS;
5828 }
5829/*
5830 * check that range specified is contiguous region
5831 */
5832 if ((next == vm_map_to_entry(dst_map)) ||
5833 (next->vme_start != entry->vme_end)) {
5834 vm_map_unlock(dst_map);
5835 return(KERN_INVALID_ADDRESS);
5836 }
5837
5838 /*
5839 * Check for permanent objects in the destination.
5840 */
5841 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2d21ac55
A
5842 ((!entry->object.vm_object->internal) ||
5843 (entry->object.vm_object->true_share))) {
1c79356b
A
5844 if(encountered_sub_map) {
5845 vm_map_unlock(dst_map);
5846 return(KERN_FAILURE);
5847 }
5848 }
5849
5850
5851 entry = next;
5852 }/* for */
5853 vm_map_unlock(dst_map);
5854 return(KERN_SUCCESS);
5855}
5856
5857/*
5858 * Routine: vm_map_copy_overwrite
5859 *
5860 * Description:
5861 * Copy the memory described by the map copy
5862 * object (copy; returned by vm_map_copyin) onto
5863 * the specified destination region (dst_map, dst_addr).
5864 * The destination must be writeable.
5865 *
5866 * Unlike vm_map_copyout, this routine actually
5867 * writes over previously-mapped memory. If the
5868 * previous mapping was to a permanent (user-supplied)
5869 * memory object, it is preserved.
5870 *
5871 * The attributes (protection and inheritance) of the
5872 * destination region are preserved.
5873 *
5874 * If successful, consumes the copy object.
5875 * Otherwise, the caller is responsible for it.
5876 *
5877 * Implementation notes:
5878 * To overwrite aligned temporary virtual memory, it is
5879 * sufficient to remove the previous mapping and insert
5880 * the new copy. This replacement is done either on
5881 * the whole region (if no permanent virtual memory
5882 * objects are embedded in the destination region) or
5883 * in individual map entries.
5884 *
5885 * To overwrite permanent virtual memory , it is necessary
5886 * to copy each page, as the external memory management
5887 * interface currently does not provide any optimizations.
5888 *
5889 * Unaligned memory also has to be copied. It is possible
5890 * to use 'vm_trickery' to copy the aligned data. This is
5891 * not done but not hard to implement.
5892 *
5893 * Once a page of permanent memory has been overwritten,
5894 * it is impossible to interrupt this function; otherwise,
5895 * the call would be neither atomic nor location-independent.
5896 * The kernel-state portion of a user thread must be
5897 * interruptible.
5898 *
5899 * It may be expensive to forward all requests that might
5900 * overwrite permanent memory (vm_write, vm_copy) to
5901 * uninterruptible kernel threads. This routine may be
5902 * called by interruptible threads; however, success is
5903 * not guaranteed -- if the request cannot be performed
5904 * atomically and interruptibly, an error indication is
5905 * returned.
5906 */
5907
91447636 5908static kern_return_t
1c79356b 5909vm_map_copy_overwrite_nested(
91447636
A
5910 vm_map_t dst_map,
5911 vm_map_address_t dst_addr,
5912 vm_map_copy_t copy,
5913 boolean_t interruptible,
6d2010ae
A
5914 pmap_t pmap,
5915 boolean_t discard_on_success)
1c79356b 5916{
91447636
A
5917 vm_map_offset_t dst_end;
5918 vm_map_entry_t tmp_entry;
5919 vm_map_entry_t entry;
5920 kern_return_t kr;
5921 boolean_t aligned = TRUE;
5922 boolean_t contains_permanent_objects = FALSE;
5923 boolean_t encountered_sub_map = FALSE;
5924 vm_map_offset_t base_addr;
5925 vm_map_size_t copy_size;
5926 vm_map_size_t total_size;
1c79356b
A
5927
5928
5929 /*
5930 * Check for null copy object.
5931 */
5932
5933 if (copy == VM_MAP_COPY_NULL)
5934 return(KERN_SUCCESS);
5935
5936 /*
5937 * Check for special kernel buffer allocated
5938 * by new_ipc_kmsg_copyin.
5939 */
5940
5941 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
0b4e3aa0 5942 return(vm_map_copyout_kernel_buffer(
2d21ac55 5943 dst_map, &dst_addr,
39236c6e 5944 copy, TRUE, discard_on_success));
1c79356b
A
5945 }
5946
5947 /*
5948 * Only works for entry lists at the moment. Will
5949 * support page lists later.
5950 */
5951
5952 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5953
5954 if (copy->size == 0) {
6d2010ae
A
5955 if (discard_on_success)
5956 vm_map_copy_discard(copy);
1c79356b
A
5957 return(KERN_SUCCESS);
5958 }
5959
5960 /*
5961 * Verify that the destination is all writeable
5962 * initially. We have to trunc the destination
5963 * address and round the copy size or we'll end up
5964 * splitting entries in strange ways.
5965 */
5966
39236c6e
A
5967 if (!VM_MAP_PAGE_ALIGNED(copy->size,
5968 VM_MAP_PAGE_MASK(dst_map)) ||
5969 !VM_MAP_PAGE_ALIGNED(copy->offset,
5970 VM_MAP_PAGE_MASK(dst_map)) ||
5971 !VM_MAP_PAGE_ALIGNED(dst_addr,
5972 VM_MAP_PAGE_MASK(dst_map)) ||
5973 dst_map->hdr.page_shift != copy->cpy_hdr.page_shift)
1c79356b
A
5974 {
5975 aligned = FALSE;
39236c6e
A
5976 dst_end = vm_map_round_page(dst_addr + copy->size,
5977 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
5978 } else {
5979 dst_end = dst_addr + copy->size;
5980 }
5981
1c79356b 5982 vm_map_lock(dst_map);
9bccf70c 5983
91447636
A
5984 /* LP64todo - remove this check when vm_map_commpage64()
5985 * no longer has to stuff in a map_entry for the commpage
5986 * above the map's max_offset.
5987 */
5988 if (dst_addr >= dst_map->max_offset) {
5989 vm_map_unlock(dst_map);
5990 return(KERN_INVALID_ADDRESS);
5991 }
5992
9bccf70c 5993start_pass_1:
1c79356b
A
5994 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5995 vm_map_unlock(dst_map);
5996 return(KERN_INVALID_ADDRESS);
5997 }
39236c6e
A
5998 vm_map_clip_start(dst_map,
5999 tmp_entry,
6000 vm_map_trunc_page(dst_addr,
6001 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
6002 for (entry = tmp_entry;;) {
6003 vm_map_entry_t next = entry->vme_next;
6004
6005 while(entry->is_sub_map) {
91447636
A
6006 vm_map_offset_t sub_start;
6007 vm_map_offset_t sub_end;
6008 vm_map_offset_t local_end;
1c79356b
A
6009
6010 if (entry->in_transition) {
6011
2d21ac55
A
6012 /*
6013 * Say that we are waiting, and wait for entry.
6014 */
1c79356b
A
6015 entry->needs_wakeup = TRUE;
6016 vm_map_entry_wait(dst_map, THREAD_UNINT);
6017
6018 goto start_pass_1;
6019 }
6020
6021 local_end = entry->vme_end;
6022 if (!(entry->needs_copy)) {
6023 /* if needs_copy we are a COW submap */
6024 /* in such a case we just replace so */
6025 /* there is no need for the follow- */
6026 /* ing check. */
6027 encountered_sub_map = TRUE;
6028 sub_start = entry->offset;
6029
6030 if(entry->vme_end < dst_end)
6031 sub_end = entry->vme_end;
6032 else
6033 sub_end = dst_end;
6034 sub_end -= entry->vme_start;
6035 sub_end += entry->offset;
6036 vm_map_unlock(dst_map);
6037
6038 kr = vm_map_overwrite_submap_recurse(
6039 entry->object.sub_map,
6040 sub_start,
6041 sub_end - sub_start);
6042 if(kr != KERN_SUCCESS)
6043 return kr;
6044 vm_map_lock(dst_map);
6045 }
6046
6047 if (dst_end <= entry->vme_end)
6048 goto start_overwrite;
6049 if(!vm_map_lookup_entry(dst_map, local_end,
6050 &entry)) {
6051 vm_map_unlock(dst_map);
6052 return(KERN_INVALID_ADDRESS);
6053 }
6054 next = entry->vme_next;
6055 }
6056
6057 if ( ! (entry->protection & VM_PROT_WRITE)) {
6058 vm_map_unlock(dst_map);
6059 return(KERN_PROTECTION_FAILURE);
6060 }
6061
6062 /*
6063 * If the entry is in transition, we must wait
6064 * for it to exit that state. Anything could happen
6065 * when we unlock the map, so start over.
6066 */
6067 if (entry->in_transition) {
6068
6069 /*
6070 * Say that we are waiting, and wait for entry.
6071 */
6072 entry->needs_wakeup = TRUE;
6073 vm_map_entry_wait(dst_map, THREAD_UNINT);
6074
6075 goto start_pass_1;
6076 }
6077
6078/*
6079 * our range is contained completely within this map entry
6080 */
6081 if (dst_end <= entry->vme_end)
6082 break;
6083/*
6084 * check that range specified is contiguous region
6085 */
6086 if ((next == vm_map_to_entry(dst_map)) ||
6087 (next->vme_start != entry->vme_end)) {
6088 vm_map_unlock(dst_map);
6089 return(KERN_INVALID_ADDRESS);
6090 }
6091
6092
6093 /*
6094 * Check for permanent objects in the destination.
6095 */
6096 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2d21ac55
A
6097 ((!entry->object.vm_object->internal) ||
6098 (entry->object.vm_object->true_share))) {
1c79356b
A
6099 contains_permanent_objects = TRUE;
6100 }
6101
6102 entry = next;
6103 }/* for */
6104
6105start_overwrite:
6106 /*
6107 * If there are permanent objects in the destination, then
6108 * the copy cannot be interrupted.
6109 */
6110
6111 if (interruptible && contains_permanent_objects) {
6112 vm_map_unlock(dst_map);
6113 return(KERN_FAILURE); /* XXX */
6114 }
6115
6116 /*
6117 *
6118 * Make a second pass, overwriting the data
6119 * At the beginning of each loop iteration,
6120 * the next entry to be overwritten is "tmp_entry"
6121 * (initially, the value returned from the lookup above),
6122 * and the starting address expected in that entry
6123 * is "start".
6124 */
6125
6126 total_size = copy->size;
6127 if(encountered_sub_map) {
6128 copy_size = 0;
6129 /* re-calculate tmp_entry since we've had the map */
6130 /* unlocked */
6131 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6132 vm_map_unlock(dst_map);
6133 return(KERN_INVALID_ADDRESS);
6134 }
6135 } else {
6136 copy_size = copy->size;
6137 }
6138
6139 base_addr = dst_addr;
6140 while(TRUE) {
6141 /* deconstruct the copy object and do in parts */
6142 /* only in sub_map, interruptable case */
6143 vm_map_entry_t copy_entry;
91447636
A
6144 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
6145 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
1c79356b 6146 int nentries;
91447636 6147 int remaining_entries = 0;
b0d623f7 6148 vm_map_offset_t new_offset = 0;
1c79356b
A
6149
6150 for (entry = tmp_entry; copy_size == 0;) {
6151 vm_map_entry_t next;
6152
6153 next = entry->vme_next;
6154
6155 /* tmp_entry and base address are moved along */
6156 /* each time we encounter a sub-map. Otherwise */
6157 /* entry can outpase tmp_entry, and the copy_size */
6158 /* may reflect the distance between them */
6159 /* if the current entry is found to be in transition */
6160 /* we will start over at the beginning or the last */
6161 /* encounter of a submap as dictated by base_addr */
6162 /* we will zero copy_size accordingly. */
6163 if (entry->in_transition) {
6164 /*
6165 * Say that we are waiting, and wait for entry.
6166 */
6167 entry->needs_wakeup = TRUE;
6168 vm_map_entry_wait(dst_map, THREAD_UNINT);
6169
1c79356b 6170 if(!vm_map_lookup_entry(dst_map, base_addr,
2d21ac55 6171 &tmp_entry)) {
1c79356b
A
6172 vm_map_unlock(dst_map);
6173 return(KERN_INVALID_ADDRESS);
6174 }
6175 copy_size = 0;
6176 entry = tmp_entry;
6177 continue;
6178 }
6179 if(entry->is_sub_map) {
91447636
A
6180 vm_map_offset_t sub_start;
6181 vm_map_offset_t sub_end;
6182 vm_map_offset_t local_end;
1c79356b
A
6183
6184 if (entry->needs_copy) {
6185 /* if this is a COW submap */
6186 /* just back the range with a */
6187 /* anonymous entry */
6188 if(entry->vme_end < dst_end)
6189 sub_end = entry->vme_end;
6190 else
6191 sub_end = dst_end;
6192 if(entry->vme_start < base_addr)
6193 sub_start = base_addr;
6194 else
6195 sub_start = entry->vme_start;
6196 vm_map_clip_end(
6197 dst_map, entry, sub_end);
6198 vm_map_clip_start(
6199 dst_map, entry, sub_start);
2d21ac55 6200 assert(!entry->use_pmap);
1c79356b
A
6201 entry->is_sub_map = FALSE;
6202 vm_map_deallocate(
6203 entry->object.sub_map);
6204 entry->object.sub_map = NULL;
6205 entry->is_shared = FALSE;
6206 entry->needs_copy = FALSE;
6207 entry->offset = 0;
2d21ac55
A
6208 /*
6209 * XXX FBDP
6210 * We should propagate the protections
6211 * of the submap entry here instead
6212 * of forcing them to VM_PROT_ALL...
6213 * Or better yet, we should inherit
6214 * the protection of the copy_entry.
6215 */
1c79356b
A
6216 entry->protection = VM_PROT_ALL;
6217 entry->max_protection = VM_PROT_ALL;
6218 entry->wired_count = 0;
6219 entry->user_wired_count = 0;
6220 if(entry->inheritance
2d21ac55
A
6221 == VM_INHERIT_SHARE)
6222 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
6223 continue;
6224 }
6225 /* first take care of any non-sub_map */
6226 /* entries to send */
6227 if(base_addr < entry->vme_start) {
6228 /* stuff to send */
6229 copy_size =
6230 entry->vme_start - base_addr;
6231 break;
6232 }
6233 sub_start = entry->offset;
6234
6235 if(entry->vme_end < dst_end)
6236 sub_end = entry->vme_end;
6237 else
6238 sub_end = dst_end;
6239 sub_end -= entry->vme_start;
6240 sub_end += entry->offset;
6241 local_end = entry->vme_end;
6242 vm_map_unlock(dst_map);
6243 copy_size = sub_end - sub_start;
6244
6245 /* adjust the copy object */
6246 if (total_size > copy_size) {
91447636
A
6247 vm_map_size_t local_size = 0;
6248 vm_map_size_t entry_size;
1c79356b 6249
2d21ac55
A
6250 nentries = 1;
6251 new_offset = copy->offset;
6252 copy_entry = vm_map_copy_first_entry(copy);
6253 while(copy_entry !=
6254 vm_map_copy_to_entry(copy)){
6255 entry_size = copy_entry->vme_end -
6256 copy_entry->vme_start;
6257 if((local_size < copy_size) &&
6258 ((local_size + entry_size)
6259 >= copy_size)) {
6260 vm_map_copy_clip_end(copy,
6261 copy_entry,
6262 copy_entry->vme_start +
6263 (copy_size - local_size));
6264 entry_size = copy_entry->vme_end -
6265 copy_entry->vme_start;
6266 local_size += entry_size;
6267 new_offset += entry_size;
6268 }
6269 if(local_size >= copy_size) {
6270 next_copy = copy_entry->vme_next;
6271 copy_entry->vme_next =
6272 vm_map_copy_to_entry(copy);
6273 previous_prev =
6274 copy->cpy_hdr.links.prev;
6275 copy->cpy_hdr.links.prev = copy_entry;
6276 copy->size = copy_size;
6277 remaining_entries =
6278 copy->cpy_hdr.nentries;
6279 remaining_entries -= nentries;
6280 copy->cpy_hdr.nentries = nentries;
6281 break;
6282 } else {
6283 local_size += entry_size;
6284 new_offset += entry_size;
6285 nentries++;
6286 }
6287 copy_entry = copy_entry->vme_next;
6288 }
1c79356b
A
6289 }
6290
6291 if((entry->use_pmap) && (pmap == NULL)) {
6292 kr = vm_map_copy_overwrite_nested(
6293 entry->object.sub_map,
6294 sub_start,
6295 copy,
6296 interruptible,
6d2010ae
A
6297 entry->object.sub_map->pmap,
6298 TRUE);
1c79356b
A
6299 } else if (pmap != NULL) {
6300 kr = vm_map_copy_overwrite_nested(
6301 entry->object.sub_map,
6302 sub_start,
6303 copy,
6d2010ae
A
6304 interruptible, pmap,
6305 TRUE);
1c79356b
A
6306 } else {
6307 kr = vm_map_copy_overwrite_nested(
6308 entry->object.sub_map,
6309 sub_start,
6310 copy,
6311 interruptible,
6d2010ae
A
6312 dst_map->pmap,
6313 TRUE);
1c79356b
A
6314 }
6315 if(kr != KERN_SUCCESS) {
6316 if(next_copy != NULL) {
2d21ac55
A
6317 copy->cpy_hdr.nentries +=
6318 remaining_entries;
6319 copy->cpy_hdr.links.prev->vme_next =
6320 next_copy;
6321 copy->cpy_hdr.links.prev
6322 = previous_prev;
6323 copy->size = total_size;
1c79356b
A
6324 }
6325 return kr;
6326 }
6327 if (dst_end <= local_end) {
6328 return(KERN_SUCCESS);
6329 }
6330 /* otherwise copy no longer exists, it was */
6331 /* destroyed after successful copy_overwrite */
6332 copy = (vm_map_copy_t)
2d21ac55 6333 zalloc(vm_map_copy_zone);
1c79356b 6334 vm_map_copy_first_entry(copy) =
2d21ac55
A
6335 vm_map_copy_last_entry(copy) =
6336 vm_map_copy_to_entry(copy);
1c79356b
A
6337 copy->type = VM_MAP_COPY_ENTRY_LIST;
6338 copy->offset = new_offset;
6339
e2d2fc5c
A
6340 /*
6341 * XXX FBDP
6342 * this does not seem to deal with
6343 * the VM map store (R&B tree)
6344 */
6345
1c79356b
A
6346 total_size -= copy_size;
6347 copy_size = 0;
6348 /* put back remainder of copy in container */
6349 if(next_copy != NULL) {
2d21ac55
A
6350 copy->cpy_hdr.nentries = remaining_entries;
6351 copy->cpy_hdr.links.next = next_copy;
6352 copy->cpy_hdr.links.prev = previous_prev;
6353 copy->size = total_size;
6354 next_copy->vme_prev =
6355 vm_map_copy_to_entry(copy);
6356 next_copy = NULL;
1c79356b
A
6357 }
6358 base_addr = local_end;
6359 vm_map_lock(dst_map);
6360 if(!vm_map_lookup_entry(dst_map,
2d21ac55 6361 local_end, &tmp_entry)) {
1c79356b
A
6362 vm_map_unlock(dst_map);
6363 return(KERN_INVALID_ADDRESS);
6364 }
6365 entry = tmp_entry;
6366 continue;
6367 }
6368 if (dst_end <= entry->vme_end) {
6369 copy_size = dst_end - base_addr;
6370 break;
6371 }
6372
6373 if ((next == vm_map_to_entry(dst_map)) ||
2d21ac55 6374 (next->vme_start != entry->vme_end)) {
1c79356b
A
6375 vm_map_unlock(dst_map);
6376 return(KERN_INVALID_ADDRESS);
6377 }
6378
6379 entry = next;
6380 }/* for */
6381
6382 next_copy = NULL;
6383 nentries = 1;
6384
6385 /* adjust the copy object */
6386 if (total_size > copy_size) {
91447636
A
6387 vm_map_size_t local_size = 0;
6388 vm_map_size_t entry_size;
1c79356b
A
6389
6390 new_offset = copy->offset;
6391 copy_entry = vm_map_copy_first_entry(copy);
6392 while(copy_entry != vm_map_copy_to_entry(copy)) {
6393 entry_size = copy_entry->vme_end -
2d21ac55 6394 copy_entry->vme_start;
1c79356b 6395 if((local_size < copy_size) &&
2d21ac55
A
6396 ((local_size + entry_size)
6397 >= copy_size)) {
1c79356b 6398 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55
A
6399 copy_entry->vme_start +
6400 (copy_size - local_size));
1c79356b 6401 entry_size = copy_entry->vme_end -
2d21ac55 6402 copy_entry->vme_start;
1c79356b
A
6403 local_size += entry_size;
6404 new_offset += entry_size;
6405 }
6406 if(local_size >= copy_size) {
6407 next_copy = copy_entry->vme_next;
6408 copy_entry->vme_next =
6409 vm_map_copy_to_entry(copy);
6410 previous_prev =
6411 copy->cpy_hdr.links.prev;
6412 copy->cpy_hdr.links.prev = copy_entry;
6413 copy->size = copy_size;
6414 remaining_entries =
6415 copy->cpy_hdr.nentries;
6416 remaining_entries -= nentries;
6417 copy->cpy_hdr.nentries = nentries;
6418 break;
6419 } else {
6420 local_size += entry_size;
6421 new_offset += entry_size;
6422 nentries++;
6423 }
6424 copy_entry = copy_entry->vme_next;
6425 }
6426 }
6427
6428 if (aligned) {
6429 pmap_t local_pmap;
6430
6431 if(pmap)
6432 local_pmap = pmap;
6433 else
6434 local_pmap = dst_map->pmap;
6435
6436 if ((kr = vm_map_copy_overwrite_aligned(
2d21ac55
A
6437 dst_map, tmp_entry, copy,
6438 base_addr, local_pmap)) != KERN_SUCCESS) {
1c79356b
A
6439 if(next_copy != NULL) {
6440 copy->cpy_hdr.nentries +=
2d21ac55 6441 remaining_entries;
1c79356b 6442 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 6443 next_copy;
1c79356b 6444 copy->cpy_hdr.links.prev =
2d21ac55 6445 previous_prev;
1c79356b
A
6446 copy->size += copy_size;
6447 }
6448 return kr;
6449 }
6450 vm_map_unlock(dst_map);
6451 } else {
2d21ac55
A
6452 /*
6453 * Performance gain:
6454 *
6455 * if the copy and dst address are misaligned but the same
6456 * offset within the page we can copy_not_aligned the
6457 * misaligned parts and copy aligned the rest. If they are
6458 * aligned but len is unaligned we simply need to copy
6459 * the end bit unaligned. We'll need to split the misaligned
6460 * bits of the region in this case !
6461 */
6462 /* ALWAYS UNLOCKS THE dst_map MAP */
39236c6e
A
6463 kr = vm_map_copy_overwrite_unaligned(
6464 dst_map,
6465 tmp_entry,
6466 copy,
6467 base_addr,
6468 discard_on_success);
6469 if (kr != KERN_SUCCESS) {
1c79356b
A
6470 if(next_copy != NULL) {
6471 copy->cpy_hdr.nentries +=
2d21ac55 6472 remaining_entries;
1c79356b 6473 copy->cpy_hdr.links.prev->vme_next =
2d21ac55 6474 next_copy;
1c79356b
A
6475 copy->cpy_hdr.links.prev =
6476 previous_prev;
6477 copy->size += copy_size;
6478 }
6479 return kr;
6480 }
6481 }
6482 total_size -= copy_size;
6483 if(total_size == 0)
6484 break;
6485 base_addr += copy_size;
6486 copy_size = 0;
6487 copy->offset = new_offset;
6488 if(next_copy != NULL) {
6489 copy->cpy_hdr.nentries = remaining_entries;
6490 copy->cpy_hdr.links.next = next_copy;
6491 copy->cpy_hdr.links.prev = previous_prev;
6492 next_copy->vme_prev = vm_map_copy_to_entry(copy);
6493 copy->size = total_size;
6494 }
6495 vm_map_lock(dst_map);
6496 while(TRUE) {
6497 if (!vm_map_lookup_entry(dst_map,
2d21ac55 6498 base_addr, &tmp_entry)) {
1c79356b
A
6499 vm_map_unlock(dst_map);
6500 return(KERN_INVALID_ADDRESS);
6501 }
6502 if (tmp_entry->in_transition) {
6503 entry->needs_wakeup = TRUE;
6504 vm_map_entry_wait(dst_map, THREAD_UNINT);
6505 } else {
6506 break;
6507 }
6508 }
39236c6e
A
6509 vm_map_clip_start(dst_map,
6510 tmp_entry,
6511 vm_map_trunc_page(base_addr,
6512 VM_MAP_PAGE_MASK(dst_map)));
1c79356b
A
6513
6514 entry = tmp_entry;
6515 } /* while */
6516
6517 /*
6518 * Throw away the vm_map_copy object
6519 */
6d2010ae
A
6520 if (discard_on_success)
6521 vm_map_copy_discard(copy);
1c79356b
A
6522
6523 return(KERN_SUCCESS);
6524}/* vm_map_copy_overwrite */
6525
6526kern_return_t
6527vm_map_copy_overwrite(
6528 vm_map_t dst_map,
91447636 6529 vm_map_offset_t dst_addr,
1c79356b
A
6530 vm_map_copy_t copy,
6531 boolean_t interruptible)
6532{
6d2010ae
A
6533 vm_map_size_t head_size, tail_size;
6534 vm_map_copy_t head_copy, tail_copy;
6535 vm_map_offset_t head_addr, tail_addr;
6536 vm_map_entry_t entry;
6537 kern_return_t kr;
6538
6539 head_size = 0;
6540 tail_size = 0;
6541 head_copy = NULL;
6542 tail_copy = NULL;
6543 head_addr = 0;
6544 tail_addr = 0;
6545
6546 if (interruptible ||
6547 copy == VM_MAP_COPY_NULL ||
6548 copy->type != VM_MAP_COPY_ENTRY_LIST) {
6549 /*
6550 * We can't split the "copy" map if we're interruptible
6551 * or if we don't have a "copy" map...
6552 */
6553 blunt_copy:
6554 return vm_map_copy_overwrite_nested(dst_map,
6555 dst_addr,
6556 copy,
6557 interruptible,
6558 (pmap_t) NULL,
6559 TRUE);
6560 }
6561
6562 if (copy->size < 3 * PAGE_SIZE) {
6563 /*
6564 * Too small to bother with optimizing...
6565 */
6566 goto blunt_copy;
6567 }
6568
39236c6e
A
6569 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
6570 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6d2010ae
A
6571 /*
6572 * Incompatible mis-alignment of source and destination...
6573 */
6574 goto blunt_copy;
6575 }
6576
6577 /*
6578 * Proper alignment or identical mis-alignment at the beginning.
6579 * Let's try and do a small unaligned copy first (if needed)
6580 * and then an aligned copy for the rest.
6581 */
6582 if (!page_aligned(dst_addr)) {
6583 head_addr = dst_addr;
39236c6e
A
6584 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
6585 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6d2010ae
A
6586 }
6587 if (!page_aligned(copy->offset + copy->size)) {
6588 /*
6589 * Mis-alignment at the end.
6590 * Do an aligned copy up to the last page and
6591 * then an unaligned copy for the remaining bytes.
6592 */
39236c6e
A
6593 tail_size = ((copy->offset + copy->size) &
6594 VM_MAP_PAGE_MASK(dst_map));
6d2010ae
A
6595 tail_addr = dst_addr + copy->size - tail_size;
6596 }
6597
6598 if (head_size + tail_size == copy->size) {
6599 /*
6600 * It's all unaligned, no optimization possible...
6601 */
6602 goto blunt_copy;
6603 }
6604
6605 /*
6606 * Can't optimize if there are any submaps in the
6607 * destination due to the way we free the "copy" map
6608 * progressively in vm_map_copy_overwrite_nested()
6609 * in that case.
6610 */
6611 vm_map_lock_read(dst_map);
6612 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6613 vm_map_unlock_read(dst_map);
6614 goto blunt_copy;
6615 }
6616 for (;
6617 (entry != vm_map_copy_to_entry(copy) &&
6618 entry->vme_start < dst_addr + copy->size);
6619 entry = entry->vme_next) {
6620 if (entry->is_sub_map) {
6621 vm_map_unlock_read(dst_map);
6622 goto blunt_copy;
6623 }
6624 }
6625 vm_map_unlock_read(dst_map);
6626
6627 if (head_size) {
6628 /*
6629 * Unaligned copy of the first "head_size" bytes, to reach
6630 * a page boundary.
6631 */
6632
6633 /*
6634 * Extract "head_copy" out of "copy".
6635 */
6636 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6637 vm_map_copy_first_entry(head_copy) =
6638 vm_map_copy_to_entry(head_copy);
6639 vm_map_copy_last_entry(head_copy) =
6640 vm_map_copy_to_entry(head_copy);
6641 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6642 head_copy->cpy_hdr.nentries = 0;
6643 head_copy->cpy_hdr.entries_pageable =
6644 copy->cpy_hdr.entries_pageable;
6645 vm_map_store_init(&head_copy->cpy_hdr);
6646
6647 head_copy->offset = copy->offset;
6648 head_copy->size = head_size;
6649
6650 copy->offset += head_size;
6651 copy->size -= head_size;
6652
6653 entry = vm_map_copy_first_entry(copy);
6654 vm_map_copy_clip_end(copy, entry, copy->offset);
6655 vm_map_copy_entry_unlink(copy, entry);
6656 vm_map_copy_entry_link(head_copy,
6657 vm_map_copy_to_entry(head_copy),
6658 entry);
6659
6660 /*
6661 * Do the unaligned copy.
6662 */
6663 kr = vm_map_copy_overwrite_nested(dst_map,
6664 head_addr,
6665 head_copy,
6666 interruptible,
6667 (pmap_t) NULL,
6668 FALSE);
6669 if (kr != KERN_SUCCESS)
6670 goto done;
6671 }
6672
6673 if (tail_size) {
6674 /*
6675 * Extract "tail_copy" out of "copy".
6676 */
6677 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6678 vm_map_copy_first_entry(tail_copy) =
6679 vm_map_copy_to_entry(tail_copy);
6680 vm_map_copy_last_entry(tail_copy) =
6681 vm_map_copy_to_entry(tail_copy);
6682 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6683 tail_copy->cpy_hdr.nentries = 0;
6684 tail_copy->cpy_hdr.entries_pageable =
6685 copy->cpy_hdr.entries_pageable;
6686 vm_map_store_init(&tail_copy->cpy_hdr);
6687
6688 tail_copy->offset = copy->offset + copy->size - tail_size;
6689 tail_copy->size = tail_size;
6690
6691 copy->size -= tail_size;
6692
6693 entry = vm_map_copy_last_entry(copy);
6694 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6695 entry = vm_map_copy_last_entry(copy);
6696 vm_map_copy_entry_unlink(copy, entry);
6697 vm_map_copy_entry_link(tail_copy,
6698 vm_map_copy_last_entry(tail_copy),
6699 entry);
6700 }
6701
6702 /*
6703 * Copy most (or possibly all) of the data.
6704 */
6705 kr = vm_map_copy_overwrite_nested(dst_map,
6706 dst_addr + head_size,
6707 copy,
6708 interruptible,
6709 (pmap_t) NULL,
6710 FALSE);
6711 if (kr != KERN_SUCCESS) {
6712 goto done;
6713 }
6714
6715 if (tail_size) {
6716 kr = vm_map_copy_overwrite_nested(dst_map,
6717 tail_addr,
6718 tail_copy,
6719 interruptible,
6720 (pmap_t) NULL,
6721 FALSE);
6722 }
6723
6724done:
6725 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6726 if (kr == KERN_SUCCESS) {
6727 /*
6728 * Discard all the copy maps.
6729 */
6730 if (head_copy) {
6731 vm_map_copy_discard(head_copy);
6732 head_copy = NULL;
6733 }
6734 vm_map_copy_discard(copy);
6735 if (tail_copy) {
6736 vm_map_copy_discard(tail_copy);
6737 tail_copy = NULL;
6738 }
6739 } else {
6740 /*
6741 * Re-assemble the original copy map.
6742 */
6743 if (head_copy) {
6744 entry = vm_map_copy_first_entry(head_copy);
6745 vm_map_copy_entry_unlink(head_copy, entry);
6746 vm_map_copy_entry_link(copy,
6747 vm_map_copy_to_entry(copy),
6748 entry);
6749 copy->offset -= head_size;
6750 copy->size += head_size;
6751 vm_map_copy_discard(head_copy);
6752 head_copy = NULL;
6753 }
6754 if (tail_copy) {
6755 entry = vm_map_copy_last_entry(tail_copy);
6756 vm_map_copy_entry_unlink(tail_copy, entry);
6757 vm_map_copy_entry_link(copy,
6758 vm_map_copy_last_entry(copy),
6759 entry);
6760 copy->size += tail_size;
6761 vm_map_copy_discard(tail_copy);
6762 tail_copy = NULL;
6763 }
6764 }
6765 return kr;
1c79356b
A
6766}
6767
6768
6769/*
91447636 6770 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
1c79356b
A
6771 *
6772 * Decription:
6773 * Physically copy unaligned data
6774 *
6775 * Implementation:
6776 * Unaligned parts of pages have to be physically copied. We use
6777 * a modified form of vm_fault_copy (which understands none-aligned
6778 * page offsets and sizes) to do the copy. We attempt to copy as
6779 * much memory in one go as possibly, however vm_fault_copy copies
6780 * within 1 memory object so we have to find the smaller of "amount left"
6781 * "source object data size" and "target object data size". With
6782 * unaligned data we don't need to split regions, therefore the source
6783 * (copy) object should be one map entry, the target range may be split
6784 * over multiple map entries however. In any event we are pessimistic
6785 * about these assumptions.
6786 *
6787 * Assumptions:
6788 * dst_map is locked on entry and is return locked on success,
6789 * unlocked on error.
6790 */
6791
91447636 6792static kern_return_t
1c79356b
A
6793vm_map_copy_overwrite_unaligned(
6794 vm_map_t dst_map,
6795 vm_map_entry_t entry,
6796 vm_map_copy_t copy,
39236c6e
A
6797 vm_map_offset_t start,
6798 boolean_t discard_on_success)
1c79356b 6799{
39236c6e
A
6800 vm_map_entry_t copy_entry;
6801 vm_map_entry_t copy_entry_next;
1c79356b
A
6802 vm_map_version_t version;
6803 vm_object_t dst_object;
6804 vm_object_offset_t dst_offset;
6805 vm_object_offset_t src_offset;
6806 vm_object_offset_t entry_offset;
91447636
A
6807 vm_map_offset_t entry_end;
6808 vm_map_size_t src_size,
1c79356b
A
6809 dst_size,
6810 copy_size,
6811 amount_left;
6812 kern_return_t kr = KERN_SUCCESS;
6813
39236c6e
A
6814
6815 copy_entry = vm_map_copy_first_entry(copy);
6816
1c79356b
A
6817 vm_map_lock_write_to_read(dst_map);
6818
91447636 6819 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
1c79356b
A
6820 amount_left = copy->size;
6821/*
6822 * unaligned so we never clipped this entry, we need the offset into
6823 * the vm_object not just the data.
6824 */
6825 while (amount_left > 0) {
6826
6827 if (entry == vm_map_to_entry(dst_map)) {
6828 vm_map_unlock_read(dst_map);
6829 return KERN_INVALID_ADDRESS;
6830 }
6831
6832 /* "start" must be within the current map entry */
6833 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6834
6835 dst_offset = start - entry->vme_start;
6836
6837 dst_size = entry->vme_end - start;
6838
6839 src_size = copy_entry->vme_end -
6840 (copy_entry->vme_start + src_offset);
6841
6842 if (dst_size < src_size) {
6843/*
6844 * we can only copy dst_size bytes before
6845 * we have to get the next destination entry
6846 */
6847 copy_size = dst_size;
6848 } else {
6849/*
6850 * we can only copy src_size bytes before
6851 * we have to get the next source copy entry
6852 */
6853 copy_size = src_size;
6854 }
6855
6856 if (copy_size > amount_left) {
6857 copy_size = amount_left;
6858 }
6859/*
6860 * Entry needs copy, create a shadow shadow object for
6861 * Copy on write region.
6862 */
6863 if (entry->needs_copy &&
2d21ac55 6864 ((entry->protection & VM_PROT_WRITE) != 0))
1c79356b
A
6865 {
6866 if (vm_map_lock_read_to_write(dst_map)) {
6867 vm_map_lock_read(dst_map);
6868 goto RetryLookup;
6869 }
6870 vm_object_shadow(&entry->object.vm_object,
2d21ac55
A
6871 &entry->offset,
6872 (vm_map_size_t)(entry->vme_end
6873 - entry->vme_start));
1c79356b
A
6874 entry->needs_copy = FALSE;
6875 vm_map_lock_write_to_read(dst_map);
6876 }
6877 dst_object = entry->object.vm_object;
6878/*
6879 * unlike with the virtual (aligned) copy we're going
6880 * to fault on it therefore we need a target object.
6881 */
6882 if (dst_object == VM_OBJECT_NULL) {
6883 if (vm_map_lock_read_to_write(dst_map)) {
6884 vm_map_lock_read(dst_map);
6885 goto RetryLookup;
6886 }
91447636 6887 dst_object = vm_object_allocate((vm_map_size_t)
2d21ac55 6888 entry->vme_end - entry->vme_start);
1c79356b
A
6889 entry->object.vm_object = dst_object;
6890 entry->offset = 0;
6891 vm_map_lock_write_to_read(dst_map);
6892 }
6893/*
6894 * Take an object reference and unlock map. The "entry" may
6895 * disappear or change when the map is unlocked.
6896 */
6897 vm_object_reference(dst_object);
6898 version.main_timestamp = dst_map->timestamp;
6899 entry_offset = entry->offset;
6900 entry_end = entry->vme_end;
6901 vm_map_unlock_read(dst_map);
6902/*
6903 * Copy as much as possible in one pass
6904 */
6905 kr = vm_fault_copy(
6906 copy_entry->object.vm_object,
6907 copy_entry->offset + src_offset,
6908 &copy_size,
6909 dst_object,
6910 entry_offset + dst_offset,
6911 dst_map,
6912 &version,
6913 THREAD_UNINT );
6914
6915 start += copy_size;
6916 src_offset += copy_size;
6917 amount_left -= copy_size;
6918/*
6919 * Release the object reference
6920 */
6921 vm_object_deallocate(dst_object);
6922/*
6923 * If a hard error occurred, return it now
6924 */
6925 if (kr != KERN_SUCCESS)
6926 return kr;
6927
6928 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
2d21ac55 6929 || amount_left == 0)
1c79356b
A
6930 {
6931/*
6932 * all done with this copy entry, dispose.
6933 */
39236c6e
A
6934 copy_entry_next = copy_entry->vme_next;
6935
6936 if (discard_on_success) {
6937 vm_map_copy_entry_unlink(copy, copy_entry);
6938 assert(!copy_entry->is_sub_map);
6939 vm_object_deallocate(
6940 copy_entry->object.vm_object);
6941 vm_map_copy_entry_dispose(copy, copy_entry);
6942 }
1c79356b 6943
39236c6e
A
6944 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
6945 amount_left) {
1c79356b
A
6946/*
6947 * not finished copying but run out of source
6948 */
6949 return KERN_INVALID_ADDRESS;
6950 }
39236c6e
A
6951
6952 copy_entry = copy_entry_next;
6953
1c79356b
A
6954 src_offset = 0;
6955 }
6956
6957 if (amount_left == 0)
6958 return KERN_SUCCESS;
6959
6960 vm_map_lock_read(dst_map);
6961 if (version.main_timestamp == dst_map->timestamp) {
6962 if (start == entry_end) {
6963/*
6964 * destination region is split. Use the version
6965 * information to avoid a lookup in the normal
6966 * case.
6967 */
6968 entry = entry->vme_next;
6969/*
6970 * should be contiguous. Fail if we encounter
6971 * a hole in the destination.
6972 */
6973 if (start != entry->vme_start) {
6974 vm_map_unlock_read(dst_map);
6975 return KERN_INVALID_ADDRESS ;
6976 }
6977 }
6978 } else {
6979/*
6980 * Map version check failed.
6981 * we must lookup the entry because somebody
6982 * might have changed the map behind our backs.
6983 */
2d21ac55 6984 RetryLookup:
1c79356b
A
6985 if (!vm_map_lookup_entry(dst_map, start, &entry))
6986 {
6987 vm_map_unlock_read(dst_map);
6988 return KERN_INVALID_ADDRESS ;
6989 }
6990 }
6991 }/* while */
6992
1c79356b
A
6993 return KERN_SUCCESS;
6994}/* vm_map_copy_overwrite_unaligned */
6995
6996/*
91447636 6997 * Routine: vm_map_copy_overwrite_aligned [internal use only]
1c79356b
A
6998 *
6999 * Description:
7000 * Does all the vm_trickery possible for whole pages.
7001 *
7002 * Implementation:
7003 *
7004 * If there are no permanent objects in the destination,
7005 * and the source and destination map entry zones match,
7006 * and the destination map entry is not shared,
7007 * then the map entries can be deleted and replaced
7008 * with those from the copy. The following code is the
7009 * basic idea of what to do, but there are lots of annoying
7010 * little details about getting protection and inheritance
7011 * right. Should add protection, inheritance, and sharing checks
7012 * to the above pass and make sure that no wiring is involved.
7013 */
7014
e2d2fc5c
A
7015int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7016int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7017int vm_map_copy_overwrite_aligned_src_large = 0;
7018
91447636 7019static kern_return_t
1c79356b
A
7020vm_map_copy_overwrite_aligned(
7021 vm_map_t dst_map,
7022 vm_map_entry_t tmp_entry,
7023 vm_map_copy_t copy,
91447636 7024 vm_map_offset_t start,
2d21ac55 7025 __unused pmap_t pmap)
1c79356b
A
7026{
7027 vm_object_t object;
7028 vm_map_entry_t copy_entry;
91447636
A
7029 vm_map_size_t copy_size;
7030 vm_map_size_t size;
1c79356b
A
7031 vm_map_entry_t entry;
7032
7033 while ((copy_entry = vm_map_copy_first_entry(copy))
2d21ac55 7034 != vm_map_copy_to_entry(copy))
1c79356b
A
7035 {
7036 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7037
7038 entry = tmp_entry;
2d21ac55 7039 assert(!entry->use_pmap); /* unnested when clipped earlier */
1c79356b
A
7040 if (entry == vm_map_to_entry(dst_map)) {
7041 vm_map_unlock(dst_map);
7042 return KERN_INVALID_ADDRESS;
7043 }
7044 size = (entry->vme_end - entry->vme_start);
7045 /*
7046 * Make sure that no holes popped up in the
7047 * address map, and that the protection is
7048 * still valid, in case the map was unlocked
7049 * earlier.
7050 */
7051
7052 if ((entry->vme_start != start) || ((entry->is_sub_map)
2d21ac55 7053 && !entry->needs_copy)) {
1c79356b
A
7054 vm_map_unlock(dst_map);
7055 return(KERN_INVALID_ADDRESS);
7056 }
7057 assert(entry != vm_map_to_entry(dst_map));
7058
7059 /*
7060 * Check protection again
7061 */
7062
7063 if ( ! (entry->protection & VM_PROT_WRITE)) {
7064 vm_map_unlock(dst_map);
7065 return(KERN_PROTECTION_FAILURE);
7066 }
7067
7068 /*
7069 * Adjust to source size first
7070 */
7071
7072 if (copy_size < size) {
7073 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7074 size = copy_size;
7075 }
7076
7077 /*
7078 * Adjust to destination size
7079 */
7080
7081 if (size < copy_size) {
7082 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 7083 copy_entry->vme_start + size);
1c79356b
A
7084 copy_size = size;
7085 }
7086
7087 assert((entry->vme_end - entry->vme_start) == size);
7088 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7089 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7090
7091 /*
7092 * If the destination contains temporary unshared memory,
7093 * we can perform the copy by throwing it away and
7094 * installing the source data.
7095 */
7096
7097 object = entry->object.vm_object;
7098 if ((!entry->is_shared &&
2d21ac55
A
7099 ((object == VM_OBJECT_NULL) ||
7100 (object->internal && !object->true_share))) ||
1c79356b
A
7101 entry->needs_copy) {
7102 vm_object_t old_object = entry->object.vm_object;
7103 vm_object_offset_t old_offset = entry->offset;
7104 vm_object_offset_t offset;
7105
7106 /*
7107 * Ensure that the source and destination aren't
7108 * identical
7109 */
7110 if (old_object == copy_entry->object.vm_object &&
7111 old_offset == copy_entry->offset) {
7112 vm_map_copy_entry_unlink(copy, copy_entry);
7113 vm_map_copy_entry_dispose(copy, copy_entry);
7114
7115 if (old_object != VM_OBJECT_NULL)
7116 vm_object_deallocate(old_object);
7117
7118 start = tmp_entry->vme_end;
7119 tmp_entry = tmp_entry->vme_next;
7120 continue;
7121 }
7122
e2d2fc5c
A
7123#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
7124#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
7125 if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7126 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7127 copy_size <= __TRADEOFF1_COPY_SIZE) {
7128 /*
7129 * Virtual vs. Physical copy tradeoff #1.
7130 *
7131 * Copying only a few pages out of a large
7132 * object: do a physical copy instead of
7133 * a virtual copy, to avoid possibly keeping
7134 * the entire large object alive because of
7135 * those few copy-on-write pages.
7136 */
7137 vm_map_copy_overwrite_aligned_src_large++;
7138 goto slow_copy;
7139 }
e2d2fc5c 7140
ebb1b9f4
A
7141 if (entry->alias >= VM_MEMORY_MALLOC &&
7142 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7143 vm_object_t new_object, new_shadow;
7144
7145 /*
7146 * We're about to map something over a mapping
7147 * established by malloc()...
7148 */
7149 new_object = copy_entry->object.vm_object;
7150 if (new_object != VM_OBJECT_NULL) {
7151 vm_object_lock_shared(new_object);
7152 }
7153 while (new_object != VM_OBJECT_NULL &&
e2d2fc5c
A
7154 !new_object->true_share &&
7155 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
ebb1b9f4
A
7156 new_object->internal) {
7157 new_shadow = new_object->shadow;
7158 if (new_shadow == VM_OBJECT_NULL) {
7159 break;
7160 }
7161 vm_object_lock_shared(new_shadow);
7162 vm_object_unlock(new_object);
7163 new_object = new_shadow;
7164 }
7165 if (new_object != VM_OBJECT_NULL) {
7166 if (!new_object->internal) {
7167 /*
7168 * The new mapping is backed
7169 * by an external object. We
7170 * don't want malloc'ed memory
7171 * to be replaced with such a
7172 * non-anonymous mapping, so
7173 * let's go off the optimized
7174 * path...
7175 */
e2d2fc5c 7176 vm_map_copy_overwrite_aligned_src_not_internal++;
ebb1b9f4
A
7177 vm_object_unlock(new_object);
7178 goto slow_copy;
7179 }
e2d2fc5c
A
7180 if (new_object->true_share ||
7181 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7182 /*
7183 * Same if there's a "true_share"
7184 * object in the shadow chain, or
7185 * an object with a non-default
7186 * (SYMMETRIC) copy strategy.
7187 */
7188 vm_map_copy_overwrite_aligned_src_not_symmetric++;
7189 vm_object_unlock(new_object);
7190 goto slow_copy;
7191 }
ebb1b9f4
A
7192 vm_object_unlock(new_object);
7193 }
7194 /*
7195 * The new mapping is still backed by
7196 * anonymous (internal) memory, so it's
7197 * OK to substitute it for the original
7198 * malloc() mapping.
7199 */
7200 }
7201
1c79356b
A
7202 if (old_object != VM_OBJECT_NULL) {
7203 if(entry->is_sub_map) {
9bccf70c 7204 if(entry->use_pmap) {
0c530ab8 7205#ifndef NO_NESTED_PMAP
9bccf70c 7206 pmap_unnest(dst_map->pmap,
2d21ac55
A
7207 (addr64_t)entry->vme_start,
7208 entry->vme_end - entry->vme_start);
0c530ab8 7209#endif /* NO_NESTED_PMAP */
316670eb 7210 if(dst_map->mapped_in_other_pmaps) {
9bccf70c
A
7211 /* clean up parent */
7212 /* map/maps */
2d21ac55
A
7213 vm_map_submap_pmap_clean(
7214 dst_map, entry->vme_start,
7215 entry->vme_end,
7216 entry->object.sub_map,
7217 entry->offset);
9bccf70c
A
7218 }
7219 } else {
7220 vm_map_submap_pmap_clean(
7221 dst_map, entry->vme_start,
7222 entry->vme_end,
7223 entry->object.sub_map,
7224 entry->offset);
7225 }
7226 vm_map_deallocate(
1c79356b 7227 entry->object.sub_map);
9bccf70c 7228 } else {
316670eb 7229 if(dst_map->mapped_in_other_pmaps) {
39236c6e 7230 vm_object_pmap_protect_options(
9bccf70c
A
7231 entry->object.vm_object,
7232 entry->offset,
7233 entry->vme_end
2d21ac55 7234 - entry->vme_start,
9bccf70c
A
7235 PMAP_NULL,
7236 entry->vme_start,
39236c6e
A
7237 VM_PROT_NONE,
7238 PMAP_OPTIONS_REMOVE);
9bccf70c 7239 } else {
39236c6e
A
7240 pmap_remove_options(
7241 dst_map->pmap,
7242 (addr64_t)(entry->vme_start),
7243 (addr64_t)(entry->vme_end),
7244 PMAP_OPTIONS_REMOVE);
9bccf70c 7245 }
1c79356b 7246 vm_object_deallocate(old_object);
9bccf70c 7247 }
1c79356b
A
7248 }
7249
7250 entry->is_sub_map = FALSE;
7251 entry->object = copy_entry->object;
7252 object = entry->object.vm_object;
7253 entry->needs_copy = copy_entry->needs_copy;
7254 entry->wired_count = 0;
7255 entry->user_wired_count = 0;
7256 offset = entry->offset = copy_entry->offset;
7257
7258 vm_map_copy_entry_unlink(copy, copy_entry);
7259 vm_map_copy_entry_dispose(copy, copy_entry);
2d21ac55 7260
1c79356b 7261 /*
2d21ac55 7262 * we could try to push pages into the pmap at this point, BUT
1c79356b
A
7263 * this optimization only saved on average 2 us per page if ALL
7264 * the pages in the source were currently mapped
7265 * and ALL the pages in the dest were touched, if there were fewer
7266 * than 2/3 of the pages touched, this optimization actually cost more cycles
2d21ac55 7267 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
1c79356b
A
7268 */
7269
1c79356b
A
7270 /*
7271 * Set up for the next iteration. The map
7272 * has not been unlocked, so the next
7273 * address should be at the end of this
7274 * entry, and the next map entry should be
7275 * the one following it.
7276 */
7277
7278 start = tmp_entry->vme_end;
7279 tmp_entry = tmp_entry->vme_next;
7280 } else {
7281 vm_map_version_t version;
ebb1b9f4
A
7282 vm_object_t dst_object;
7283 vm_object_offset_t dst_offset;
1c79356b
A
7284 kern_return_t r;
7285
ebb1b9f4 7286 slow_copy:
e2d2fc5c
A
7287 if (entry->needs_copy) {
7288 vm_object_shadow(&entry->object.vm_object,
7289 &entry->offset,
7290 (entry->vme_end -
7291 entry->vme_start));
7292 entry->needs_copy = FALSE;
7293 }
7294
ebb1b9f4
A
7295 dst_object = entry->object.vm_object;
7296 dst_offset = entry->offset;
7297
1c79356b
A
7298 /*
7299 * Take an object reference, and record
7300 * the map version information so that the
7301 * map can be safely unlocked.
7302 */
7303
ebb1b9f4
A
7304 if (dst_object == VM_OBJECT_NULL) {
7305 /*
7306 * We would usually have just taken the
7307 * optimized path above if the destination
7308 * object has not been allocated yet. But we
7309 * now disable that optimization if the copy
7310 * entry's object is not backed by anonymous
7311 * memory to avoid replacing malloc'ed
7312 * (i.e. re-usable) anonymous memory with a
7313 * not-so-anonymous mapping.
7314 * So we have to handle this case here and
7315 * allocate a new VM object for this map entry.
7316 */
7317 dst_object = vm_object_allocate(
7318 entry->vme_end - entry->vme_start);
7319 dst_offset = 0;
7320 entry->object.vm_object = dst_object;
7321 entry->offset = dst_offset;
7322
7323 }
7324
1c79356b
A
7325 vm_object_reference(dst_object);
7326
9bccf70c
A
7327 /* account for unlock bumping up timestamp */
7328 version.main_timestamp = dst_map->timestamp + 1;
1c79356b
A
7329
7330 vm_map_unlock(dst_map);
7331
7332 /*
7333 * Copy as much as possible in one pass
7334 */
7335
7336 copy_size = size;
7337 r = vm_fault_copy(
2d21ac55
A
7338 copy_entry->object.vm_object,
7339 copy_entry->offset,
7340 &copy_size,
7341 dst_object,
7342 dst_offset,
7343 dst_map,
7344 &version,
7345 THREAD_UNINT );
1c79356b
A
7346
7347 /*
7348 * Release the object reference
7349 */
7350
7351 vm_object_deallocate(dst_object);
7352
7353 /*
7354 * If a hard error occurred, return it now
7355 */
7356
7357 if (r != KERN_SUCCESS)
7358 return(r);
7359
7360 if (copy_size != 0) {
7361 /*
7362 * Dispose of the copied region
7363 */
7364
7365 vm_map_copy_clip_end(copy, copy_entry,
2d21ac55 7366 copy_entry->vme_start + copy_size);
1c79356b
A
7367 vm_map_copy_entry_unlink(copy, copy_entry);
7368 vm_object_deallocate(copy_entry->object.vm_object);
7369 vm_map_copy_entry_dispose(copy, copy_entry);
7370 }
7371
7372 /*
7373 * Pick up in the destination map where we left off.
7374 *
7375 * Use the version information to avoid a lookup
7376 * in the normal case.
7377 */
7378
7379 start += copy_size;
7380 vm_map_lock(dst_map);
e2d2fc5c
A
7381 if (version.main_timestamp == dst_map->timestamp &&
7382 copy_size != 0) {
1c79356b
A
7383 /* We can safely use saved tmp_entry value */
7384
7385 vm_map_clip_end(dst_map, tmp_entry, start);
7386 tmp_entry = tmp_entry->vme_next;
7387 } else {
7388 /* Must do lookup of tmp_entry */
7389
7390 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7391 vm_map_unlock(dst_map);
7392 return(KERN_INVALID_ADDRESS);
7393 }
7394 vm_map_clip_start(dst_map, tmp_entry, start);
7395 }
7396 }
7397 }/* while */
7398
7399 return(KERN_SUCCESS);
7400}/* vm_map_copy_overwrite_aligned */
7401
7402/*
91447636 7403 * Routine: vm_map_copyin_kernel_buffer [internal use only]
1c79356b
A
7404 *
7405 * Description:
7406 * Copy in data to a kernel buffer from space in the
91447636 7407 * source map. The original space may be optionally
1c79356b
A
7408 * deallocated.
7409 *
7410 * If successful, returns a new copy object.
7411 */
91447636 7412static kern_return_t
1c79356b
A
7413vm_map_copyin_kernel_buffer(
7414 vm_map_t src_map,
91447636
A
7415 vm_map_offset_t src_addr,
7416 vm_map_size_t len,
1c79356b
A
7417 boolean_t src_destroy,
7418 vm_map_copy_t *copy_result)
7419{
91447636 7420 kern_return_t kr;
1c79356b 7421 vm_map_copy_t copy;
b0d623f7
A
7422 vm_size_t kalloc_size;
7423
7424 if ((vm_size_t) len != len) {
7425 /* "len" is too big and doesn't fit in a "vm_size_t" */
7426 return KERN_RESOURCE_SHORTAGE;
7427 }
7428 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7429 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
1c79356b
A
7430
7431 copy = (vm_map_copy_t) kalloc(kalloc_size);
7432 if (copy == VM_MAP_COPY_NULL) {
7433 return KERN_RESOURCE_SHORTAGE;
7434 }
7435 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7436 copy->size = len;
7437 copy->offset = 0;
91447636 7438 copy->cpy_kdata = (void *) (copy + 1);
1c79356b
A
7439 copy->cpy_kalloc_size = kalloc_size;
7440
b0d623f7 7441 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
91447636
A
7442 if (kr != KERN_SUCCESS) {
7443 kfree(copy, kalloc_size);
7444 return kr;
1c79356b
A
7445 }
7446 if (src_destroy) {
39236c6e
A
7447 (void) vm_map_remove(
7448 src_map,
7449 vm_map_trunc_page(src_addr,
7450 VM_MAP_PAGE_MASK(src_map)),
7451 vm_map_round_page(src_addr + len,
7452 VM_MAP_PAGE_MASK(src_map)),
7453 (VM_MAP_REMOVE_INTERRUPTIBLE |
7454 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7455 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
1c79356b
A
7456 }
7457 *copy_result = copy;
7458 return KERN_SUCCESS;
7459}
7460
7461/*
91447636 7462 * Routine: vm_map_copyout_kernel_buffer [internal use only]
1c79356b
A
7463 *
7464 * Description:
7465 * Copy out data from a kernel buffer into space in the
7466 * destination map. The space may be otpionally dynamically
7467 * allocated.
7468 *
7469 * If successful, consumes the copy object.
7470 * Otherwise, the caller is responsible for it.
7471 */
91447636
A
7472static int vm_map_copyout_kernel_buffer_failures = 0;
7473static kern_return_t
1c79356b 7474vm_map_copyout_kernel_buffer(
91447636
A
7475 vm_map_t map,
7476 vm_map_address_t *addr, /* IN/OUT */
7477 vm_map_copy_t copy,
39236c6e
A
7478 boolean_t overwrite,
7479 boolean_t consume_on_success)
1c79356b
A
7480{
7481 kern_return_t kr = KERN_SUCCESS;
91447636 7482 thread_t thread = current_thread();
1c79356b
A
7483
7484 if (!overwrite) {
7485
7486 /*
7487 * Allocate space in the target map for the data
7488 */
7489 *addr = 0;
7490 kr = vm_map_enter(map,
7491 addr,
39236c6e
A
7492 vm_map_round_page(copy->size,
7493 VM_MAP_PAGE_MASK(map)),
91447636
A
7494 (vm_map_offset_t) 0,
7495 VM_FLAGS_ANYWHERE,
1c79356b
A
7496 VM_OBJECT_NULL,
7497 (vm_object_offset_t) 0,
7498 FALSE,
7499 VM_PROT_DEFAULT,
7500 VM_PROT_ALL,
7501 VM_INHERIT_DEFAULT);
7502 if (kr != KERN_SUCCESS)
91447636 7503 return kr;
1c79356b
A
7504 }
7505
7506 /*
7507 * Copyout the data from the kernel buffer to the target map.
7508 */
91447636 7509 if (thread->map == map) {
1c79356b
A
7510
7511 /*
7512 * If the target map is the current map, just do
7513 * the copy.
7514 */
b0d623f7
A
7515 assert((vm_size_t) copy->size == copy->size);
7516 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636 7517 kr = KERN_INVALID_ADDRESS;
1c79356b
A
7518 }
7519 }
7520 else {
7521 vm_map_t oldmap;
7522
7523 /*
7524 * If the target map is another map, assume the
7525 * target's address space identity for the duration
7526 * of the copy.
7527 */
7528 vm_map_reference(map);
7529 oldmap = vm_map_switch(map);
7530
b0d623f7
A
7531 assert((vm_size_t) copy->size == copy->size);
7532 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
91447636
A
7533 vm_map_copyout_kernel_buffer_failures++;
7534 kr = KERN_INVALID_ADDRESS;
1c79356b
A
7535 }
7536
7537 (void) vm_map_switch(oldmap);
7538 vm_map_deallocate(map);
7539 }
7540
91447636
A
7541 if (kr != KERN_SUCCESS) {
7542 /* the copy failed, clean up */
7543 if (!overwrite) {
7544 /*
7545 * Deallocate the space we allocated in the target map.
7546 */
39236c6e
A
7547 (void) vm_map_remove(
7548 map,
7549 vm_map_trunc_page(*addr,
7550 VM_MAP_PAGE_MASK(map)),
7551 vm_map_round_page((*addr +
7552 vm_map_round_page(copy->size,
7553 VM_MAP_PAGE_MASK(map))),
7554 VM_MAP_PAGE_MASK(map)),
7555 VM_MAP_NO_FLAGS);
91447636
A
7556 *addr = 0;
7557 }
7558 } else {
7559 /* copy was successful, dicard the copy structure */
39236c6e
A
7560 if (consume_on_success) {
7561 kfree(copy, copy->cpy_kalloc_size);
7562 }
91447636 7563 }
1c79356b 7564
91447636 7565 return kr;
1c79356b
A
7566}
7567
7568/*
7569 * Macro: vm_map_copy_insert
7570 *
7571 * Description:
7572 * Link a copy chain ("copy") into a map at the
7573 * specified location (after "where").
7574 * Side effects:
7575 * The copy chain is destroyed.
7576 * Warning:
7577 * The arguments are evaluated multiple times.
7578 */
7579#define vm_map_copy_insert(map, where, copy) \
7580MACRO_BEGIN \
6d2010ae
A
7581 vm_map_store_copy_insert(map, where, copy); \
7582 zfree(vm_map_copy_zone, copy); \
1c79356b
A
7583MACRO_END
7584
39236c6e
A
7585void
7586vm_map_copy_remap(
7587 vm_map_t map,
7588 vm_map_entry_t where,
7589 vm_map_copy_t copy,
7590 vm_map_offset_t adjustment,
7591 vm_prot_t cur_prot,
7592 vm_prot_t max_prot,
7593 vm_inherit_t inheritance)
7594{
7595 vm_map_entry_t copy_entry, new_entry;
7596
7597 for (copy_entry = vm_map_copy_first_entry(copy);
7598 copy_entry != vm_map_copy_to_entry(copy);
7599 copy_entry = copy_entry->vme_next) {
7600 /* get a new VM map entry for the map */
7601 new_entry = vm_map_entry_create(map,
7602 !map->hdr.entries_pageable);
7603 /* copy the "copy entry" to the new entry */
7604 vm_map_entry_copy(new_entry, copy_entry);
7605 /* adjust "start" and "end" */
7606 new_entry->vme_start += adjustment;
7607 new_entry->vme_end += adjustment;
7608 /* clear some attributes */
7609 new_entry->inheritance = inheritance;
7610 new_entry->protection = cur_prot;
7611 new_entry->max_protection = max_prot;
7612 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
7613 /* take an extra reference on the entry's "object" */
7614 if (new_entry->is_sub_map) {
7615 vm_map_lock(new_entry->object.sub_map);
7616 vm_map_reference(new_entry->object.sub_map);
7617 vm_map_unlock(new_entry->object.sub_map);
7618 } else {
7619 vm_object_reference(new_entry->object.vm_object);
7620 }
7621 /* insert the new entry in the map */
7622 vm_map_store_entry_link(map, where, new_entry);
7623 /* continue inserting the "copy entries" after the new entry */
7624 where = new_entry;
7625 }
7626}
7627
1c79356b
A
7628/*
7629 * Routine: vm_map_copyout
7630 *
7631 * Description:
7632 * Copy out a copy chain ("copy") into newly-allocated
7633 * space in the destination map.
7634 *
7635 * If successful, consumes the copy object.
7636 * Otherwise, the caller is responsible for it.
7637 */
39236c6e 7638
1c79356b
A
7639kern_return_t
7640vm_map_copyout(
91447636
A
7641 vm_map_t dst_map,
7642 vm_map_address_t *dst_addr, /* OUT */
7643 vm_map_copy_t copy)
39236c6e
A
7644{
7645 return vm_map_copyout_internal(dst_map, dst_addr, copy,
7646 TRUE, /* consume_on_success */
7647 VM_PROT_DEFAULT,
7648 VM_PROT_ALL,
7649 VM_INHERIT_DEFAULT);
7650}
7651
7652kern_return_t
7653vm_map_copyout_internal(
7654 vm_map_t dst_map,
7655 vm_map_address_t *dst_addr, /* OUT */
7656 vm_map_copy_t copy,
7657 boolean_t consume_on_success,
7658 vm_prot_t cur_protection,
7659 vm_prot_t max_protection,
7660 vm_inherit_t inheritance)
1c79356b 7661{
91447636
A
7662 vm_map_size_t size;
7663 vm_map_size_t adjustment;
7664 vm_map_offset_t start;
1c79356b
A
7665 vm_object_offset_t vm_copy_start;
7666 vm_map_entry_t last;
1c79356b
A
7667 vm_map_entry_t entry;
7668
7669 /*
7670 * Check for null copy object.
7671 */
7672
7673 if (copy == VM_MAP_COPY_NULL) {
7674 *dst_addr = 0;
7675 return(KERN_SUCCESS);
7676 }
7677
7678 /*
7679 * Check for special copy object, created
7680 * by vm_map_copyin_object.
7681 */
7682
7683 if (copy->type == VM_MAP_COPY_OBJECT) {
7684 vm_object_t object = copy->cpy_object;
7685 kern_return_t kr;
7686 vm_object_offset_t offset;
7687
91447636 7688 offset = vm_object_trunc_page(copy->offset);
39236c6e
A
7689 size = vm_map_round_page((copy->size +
7690 (vm_map_size_t)(copy->offset -
7691 offset)),
7692 VM_MAP_PAGE_MASK(dst_map));
1c79356b
A
7693 *dst_addr = 0;
7694 kr = vm_map_enter(dst_map, dst_addr, size,
91447636 7695 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
1c79356b
A
7696 object, offset, FALSE,
7697 VM_PROT_DEFAULT, VM_PROT_ALL,
7698 VM_INHERIT_DEFAULT);
7699 if (kr != KERN_SUCCESS)
7700 return(kr);
7701 /* Account for non-pagealigned copy object */
91447636 7702 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
39236c6e
A
7703 if (consume_on_success)
7704 zfree(vm_map_copy_zone, copy);
1c79356b
A
7705 return(KERN_SUCCESS);
7706 }
7707
7708 /*
7709 * Check for special kernel buffer allocated
7710 * by new_ipc_kmsg_copyin.
7711 */
7712
7713 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
39236c6e
A
7714 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7715 copy, FALSE,
7716 consume_on_success);
1c79356b
A
7717 }
7718
39236c6e 7719
1c79356b
A
7720 /*
7721 * Find space for the data
7722 */
7723
39236c6e
A
7724 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
7725 VM_MAP_COPY_PAGE_MASK(copy));
7726 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
7727 VM_MAP_COPY_PAGE_MASK(copy))
2d21ac55 7728 - vm_copy_start;
1c79356b 7729
39236c6e 7730
2d21ac55 7731StartAgain: ;
1c79356b
A
7732
7733 vm_map_lock(dst_map);
6d2010ae
A
7734 if( dst_map->disable_vmentry_reuse == TRUE) {
7735 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7736 last = entry;
7737 } else {
7738 assert(first_free_is_valid(dst_map));
7739 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
1c79356b 7740 vm_map_min(dst_map) : last->vme_end;
39236c6e
A
7741 start = vm_map_round_page(start,
7742 VM_MAP_PAGE_MASK(dst_map));
6d2010ae 7743 }
1c79356b
A
7744
7745 while (TRUE) {
7746 vm_map_entry_t next = last->vme_next;
91447636 7747 vm_map_offset_t end = start + size;
1c79356b
A
7748
7749 if ((end > dst_map->max_offset) || (end < start)) {
7750 if (dst_map->wait_for_space) {
7751 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7752 assert_wait((event_t) dst_map,
7753 THREAD_INTERRUPTIBLE);
7754 vm_map_unlock(dst_map);
91447636 7755 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
7756 goto StartAgain;
7757 }
7758 }
7759 vm_map_unlock(dst_map);
7760 return(KERN_NO_SPACE);
7761 }
7762
7763 if ((next == vm_map_to_entry(dst_map)) ||
7764 (next->vme_start >= end))
7765 break;
7766
7767 last = next;
7768 start = last->vme_end;
39236c6e
A
7769 start = vm_map_round_page(start,
7770 VM_MAP_PAGE_MASK(dst_map));
7771 }
7772
7773 adjustment = start - vm_copy_start;
7774 if (! consume_on_success) {
7775 /*
7776 * We're not allowed to consume "copy", so we'll have to
7777 * copy its map entries into the destination map below.
7778 * No need to re-allocate map entries from the correct
7779 * (pageable or not) zone, since we'll get new map entries
7780 * during the transfer.
7781 * We'll also adjust the map entries's "start" and "end"
7782 * during the transfer, to keep "copy"'s entries consistent
7783 * with its "offset".
7784 */
7785 goto after_adjustments;
1c79356b
A
7786 }
7787
7788 /*
7789 * Since we're going to just drop the map
7790 * entries from the copy into the destination
7791 * map, they must come from the same pool.
7792 */
7793
7794 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
2d21ac55
A
7795 /*
7796 * Mismatches occur when dealing with the default
7797 * pager.
7798 */
7799 zone_t old_zone;
7800 vm_map_entry_t next, new;
7801
7802 /*
7803 * Find the zone that the copies were allocated from
7804 */
7ddcb079 7805
2d21ac55
A
7806 entry = vm_map_copy_first_entry(copy);
7807
7808 /*
7809 * Reinitialize the copy so that vm_map_copy_entry_link
7810 * will work.
7811 */
6d2010ae 7812 vm_map_store_copy_reset(copy, entry);
2d21ac55 7813 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
2d21ac55
A
7814
7815 /*
7816 * Copy each entry.
7817 */
7818 while (entry != vm_map_copy_to_entry(copy)) {
7ddcb079 7819 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
2d21ac55
A
7820 vm_map_entry_copy_full(new, entry);
7821 new->use_pmap = FALSE; /* clr address space specifics */
7822 vm_map_copy_entry_link(copy,
7823 vm_map_copy_last_entry(copy),
7824 new);
7825 next = entry->vme_next;
7ddcb079 7826 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
2d21ac55
A
7827 zfree(old_zone, entry);
7828 entry = next;
7829 }
1c79356b
A
7830 }
7831
7832 /*
7833 * Adjust the addresses in the copy chain, and
7834 * reset the region attributes.
7835 */
7836
1c79356b
A
7837 for (entry = vm_map_copy_first_entry(copy);
7838 entry != vm_map_copy_to_entry(copy);
7839 entry = entry->vme_next) {
39236c6e
A
7840 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
7841 /*
7842 * We're injecting this copy entry into a map that
7843 * has the standard page alignment, so clear
7844 * "map_aligned" (which might have been inherited
7845 * from the original map entry).
7846 */
7847 entry->map_aligned = FALSE;
7848 }
7849
1c79356b
A
7850 entry->vme_start += adjustment;
7851 entry->vme_end += adjustment;
7852
39236c6e
A
7853 if (entry->map_aligned) {
7854 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
7855 VM_MAP_PAGE_MASK(dst_map)));
7856 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
7857 VM_MAP_PAGE_MASK(dst_map)));
7858 }
7859
1c79356b
A
7860 entry->inheritance = VM_INHERIT_DEFAULT;
7861 entry->protection = VM_PROT_DEFAULT;
7862 entry->max_protection = VM_PROT_ALL;
7863 entry->behavior = VM_BEHAVIOR_DEFAULT;
7864
7865 /*
7866 * If the entry is now wired,
7867 * map the pages into the destination map.
7868 */
7869 if (entry->wired_count != 0) {
2d21ac55
A
7870 register vm_map_offset_t va;
7871 vm_object_offset_t offset;
7872 register vm_object_t object;
7873 vm_prot_t prot;
7874 int type_of_fault;
1c79356b 7875
2d21ac55
A
7876 object = entry->object.vm_object;
7877 offset = entry->offset;
7878 va = entry->vme_start;
1c79356b 7879
2d21ac55
A
7880 pmap_pageable(dst_map->pmap,
7881 entry->vme_start,
7882 entry->vme_end,
7883 TRUE);
1c79356b 7884
2d21ac55
A
7885 while (va < entry->vme_end) {
7886 register vm_page_t m;
1c79356b 7887
2d21ac55
A
7888 /*
7889 * Look up the page in the object.
7890 * Assert that the page will be found in the
7891 * top object:
7892 * either
7893 * the object was newly created by
7894 * vm_object_copy_slowly, and has
7895 * copies of all of the pages from
7896 * the source object
7897 * or
7898 * the object was moved from the old
7899 * map entry; because the old map
7900 * entry was wired, all of the pages
7901 * were in the top-level object.
7902 * (XXX not true if we wire pages for
7903 * reading)
7904 */
7905 vm_object_lock(object);
91447636 7906
2d21ac55 7907 m = vm_page_lookup(object, offset);
b0d623f7 7908 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
2d21ac55
A
7909 m->absent)
7910 panic("vm_map_copyout: wiring %p", m);
1c79356b 7911
2d21ac55
A
7912 /*
7913 * ENCRYPTED SWAP:
7914 * The page is assumed to be wired here, so it
7915 * shouldn't be encrypted. Otherwise, we
7916 * couldn't enter it in the page table, since
7917 * we don't want the user to see the encrypted
7918 * data.
7919 */
7920 ASSERT_PAGE_DECRYPTED(m);
1c79356b 7921
2d21ac55 7922 prot = entry->protection;
1c79356b 7923
2d21ac55
A
7924 if (override_nx(dst_map, entry->alias) && prot)
7925 prot |= VM_PROT_EXECUTE;
1c79356b 7926
2d21ac55 7927 type_of_fault = DBG_CACHE_HIT_FAULT;
1c79356b 7928
6d2010ae 7929 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
316670eb 7930 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2d21ac55 7931 &type_of_fault);
1c79356b 7932
2d21ac55 7933 vm_object_unlock(object);
1c79356b 7934
2d21ac55
A
7935 offset += PAGE_SIZE_64;
7936 va += PAGE_SIZE;
1c79356b
A
7937 }
7938 }
7939 }
7940
39236c6e
A
7941after_adjustments:
7942
1c79356b
A
7943 /*
7944 * Correct the page alignment for the result
7945 */
7946
7947 *dst_addr = start + (copy->offset - vm_copy_start);
7948
7949 /*
7950 * Update the hints and the map size
7951 */
7952
39236c6e
A
7953 if (consume_on_success) {
7954 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7955 } else {
7956 SAVE_HINT_MAP_WRITE(dst_map, last);
7957 }
1c79356b
A
7958
7959 dst_map->size += size;
7960
7961 /*
7962 * Link in the copy
7963 */
7964
39236c6e
A
7965 if (consume_on_success) {
7966 vm_map_copy_insert(dst_map, last, copy);
7967 } else {
7968 vm_map_copy_remap(dst_map, last, copy, adjustment,
7969 cur_protection, max_protection,
7970 inheritance);
7971 }
1c79356b
A
7972
7973 vm_map_unlock(dst_map);
7974
7975 /*
7976 * XXX If wiring_required, call vm_map_pageable
7977 */
7978
7979 return(KERN_SUCCESS);
7980}
7981
1c79356b
A
7982/*
7983 * Routine: vm_map_copyin
7984 *
7985 * Description:
2d21ac55
A
7986 * see vm_map_copyin_common. Exported via Unsupported.exports.
7987 *
7988 */
7989
7990#undef vm_map_copyin
7991
7992kern_return_t
7993vm_map_copyin(
7994 vm_map_t src_map,
7995 vm_map_address_t src_addr,
7996 vm_map_size_t len,
7997 boolean_t src_destroy,
7998 vm_map_copy_t *copy_result) /* OUT */
7999{
8000 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8001 FALSE, copy_result, FALSE));
8002}
8003
8004/*
8005 * Routine: vm_map_copyin_common
8006 *
8007 * Description:
1c79356b
A
8008 * Copy the specified region (src_addr, len) from the
8009 * source address space (src_map), possibly removing
8010 * the region from the source address space (src_destroy).
8011 *
8012 * Returns:
8013 * A vm_map_copy_t object (copy_result), suitable for
8014 * insertion into another address space (using vm_map_copyout),
8015 * copying over another address space region (using
8016 * vm_map_copy_overwrite). If the copy is unused, it
8017 * should be destroyed (using vm_map_copy_discard).
8018 *
8019 * In/out conditions:
8020 * The source map should not be locked on entry.
8021 */
8022
8023typedef struct submap_map {
8024 vm_map_t parent_map;
91447636
A
8025 vm_map_offset_t base_start;
8026 vm_map_offset_t base_end;
2d21ac55 8027 vm_map_size_t base_len;
1c79356b
A
8028 struct submap_map *next;
8029} submap_map_t;
8030
8031kern_return_t
8032vm_map_copyin_common(
8033 vm_map_t src_map,
91447636
A
8034 vm_map_address_t src_addr,
8035 vm_map_size_t len,
1c79356b 8036 boolean_t src_destroy,
91447636 8037 __unused boolean_t src_volatile,
1c79356b
A
8038 vm_map_copy_t *copy_result, /* OUT */
8039 boolean_t use_maxprot)
8040{
1c79356b
A
8041 vm_map_entry_t tmp_entry; /* Result of last map lookup --
8042 * in multi-level lookup, this
8043 * entry contains the actual
8044 * vm_object/offset.
8045 */
8046 register
8047 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
8048
91447636 8049 vm_map_offset_t src_start; /* Start of current entry --
1c79356b
A
8050 * where copy is taking place now
8051 */
91447636 8052 vm_map_offset_t src_end; /* End of entire region to be
1c79356b 8053 * copied */
2d21ac55 8054 vm_map_offset_t src_base;
91447636 8055 vm_map_t base_map = src_map;
1c79356b
A
8056 boolean_t map_share=FALSE;
8057 submap_map_t *parent_maps = NULL;
8058
8059 register
8060 vm_map_copy_t copy; /* Resulting copy */
91447636 8061 vm_map_address_t copy_addr;
1c79356b
A
8062
8063 /*
8064 * Check for copies of zero bytes.
8065 */
8066
8067 if (len == 0) {
8068 *copy_result = VM_MAP_COPY_NULL;
8069 return(KERN_SUCCESS);
8070 }
8071
4a249263
A
8072 /*
8073 * Check that the end address doesn't overflow
8074 */
8075 src_end = src_addr + len;
8076 if (src_end < src_addr)
8077 return KERN_INVALID_ADDRESS;
8078
1c79356b
A
8079 /*
8080 * If the copy is sufficiently small, use a kernel buffer instead
8081 * of making a virtual copy. The theory being that the cost of
8082 * setting up VM (and taking C-O-W faults) dominates the copy costs
8083 * for small regions.
8084 */
8085 if ((len < msg_ool_size_small) && !use_maxprot)
2d21ac55
A
8086 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8087 src_destroy, copy_result);
1c79356b
A
8088
8089 /*
4a249263 8090 * Compute (page aligned) start and end of region
1c79356b 8091 */
39236c6e
A
8092 src_start = vm_map_trunc_page(src_addr,
8093 VM_MAP_PAGE_MASK(src_map));
8094 src_end = vm_map_round_page(src_end,
8095 VM_MAP_PAGE_MASK(src_map));
1c79356b 8096
b0d623f7 8097 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
1c79356b 8098
1c79356b
A
8099 /*
8100 * Allocate a header element for the list.
8101 *
8102 * Use the start and end in the header to
8103 * remember the endpoints prior to rounding.
8104 */
8105
8106 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8107 vm_map_copy_first_entry(copy) =
2d21ac55 8108 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
1c79356b
A
8109 copy->type = VM_MAP_COPY_ENTRY_LIST;
8110 copy->cpy_hdr.nentries = 0;
8111 copy->cpy_hdr.entries_pageable = TRUE;
39236c6e
A
8112#if 00
8113 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8114#else
8115 /*
8116 * The copy entries can be broken down for a variety of reasons,
8117 * so we can't guarantee that they will remain map-aligned...
8118 * Will need to adjust the first copy_entry's "vme_start" and
8119 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8120 * rather than the original map's alignment.
8121 */
8122 copy->cpy_hdr.page_shift = PAGE_SHIFT;
8123#endif
1c79356b 8124
6d2010ae
A
8125 vm_map_store_init( &(copy->cpy_hdr) );
8126
1c79356b
A
8127 copy->offset = src_addr;
8128 copy->size = len;
8129
7ddcb079 8130 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b
A
8131
8132#define RETURN(x) \
8133 MACRO_BEGIN \
8134 vm_map_unlock(src_map); \
9bccf70c
A
8135 if(src_map != base_map) \
8136 vm_map_deallocate(src_map); \
1c79356b
A
8137 if (new_entry != VM_MAP_ENTRY_NULL) \
8138 vm_map_copy_entry_dispose(copy,new_entry); \
8139 vm_map_copy_discard(copy); \
8140 { \
91447636 8141 submap_map_t *_ptr; \
1c79356b 8142 \
91447636 8143 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
1c79356b 8144 parent_maps=parent_maps->next; \
91447636
A
8145 if (_ptr->parent_map != base_map) \
8146 vm_map_deallocate(_ptr->parent_map); \
8147 kfree(_ptr, sizeof(submap_map_t)); \
1c79356b
A
8148 } \
8149 } \
8150 MACRO_RETURN(x); \
8151 MACRO_END
8152
8153 /*
8154 * Find the beginning of the region.
8155 */
8156
8157 vm_map_lock(src_map);
8158
8159 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
8160 RETURN(KERN_INVALID_ADDRESS);
8161 if(!tmp_entry->is_sub_map) {
8162 vm_map_clip_start(src_map, tmp_entry, src_start);
8163 }
8164 /* set for later submap fix-up */
8165 copy_addr = src_start;
8166
8167 /*
8168 * Go through entries until we get to the end.
8169 */
8170
8171 while (TRUE) {
8172 register
8173 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
91447636 8174 vm_map_size_t src_size; /* Size of source
1c79356b
A
8175 * map entry (in both
8176 * maps)
8177 */
8178
8179 register
8180 vm_object_t src_object; /* Object to copy */
8181 vm_object_offset_t src_offset;
8182
8183 boolean_t src_needs_copy; /* Should source map
8184 * be made read-only
8185 * for copy-on-write?
8186 */
8187
8188 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
8189
8190 boolean_t was_wired; /* Was source wired? */
8191 vm_map_version_t version; /* Version before locks
8192 * dropped to make copy
8193 */
8194 kern_return_t result; /* Return value from
8195 * copy_strategically.
8196 */
8197 while(tmp_entry->is_sub_map) {
91447636 8198 vm_map_size_t submap_len;
1c79356b
A
8199 submap_map_t *ptr;
8200
8201 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8202 ptr->next = parent_maps;
8203 parent_maps = ptr;
8204 ptr->parent_map = src_map;
8205 ptr->base_start = src_start;
8206 ptr->base_end = src_end;
8207 submap_len = tmp_entry->vme_end - src_start;
8208 if(submap_len > (src_end-src_start))
8209 submap_len = src_end-src_start;
2d21ac55 8210 ptr->base_len = submap_len;
1c79356b
A
8211
8212 src_start -= tmp_entry->vme_start;
8213 src_start += tmp_entry->offset;
8214 src_end = src_start + submap_len;
8215 src_map = tmp_entry->object.sub_map;
8216 vm_map_lock(src_map);
9bccf70c
A
8217 /* keep an outstanding reference for all maps in */
8218 /* the parents tree except the base map */
8219 vm_map_reference(src_map);
1c79356b
A
8220 vm_map_unlock(ptr->parent_map);
8221 if (!vm_map_lookup_entry(
2d21ac55 8222 src_map, src_start, &tmp_entry))
1c79356b
A
8223 RETURN(KERN_INVALID_ADDRESS);
8224 map_share = TRUE;
8225 if(!tmp_entry->is_sub_map)
2d21ac55 8226 vm_map_clip_start(src_map, tmp_entry, src_start);
1c79356b
A
8227 src_entry = tmp_entry;
8228 }
2d21ac55
A
8229 /* we are now in the lowest level submap... */
8230
0b4e3aa0 8231 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
55e303ae
A
8232 (tmp_entry->object.vm_object->phys_contiguous)) {
8233 /* This is not, supported for now.In future */
8234 /* we will need to detect the phys_contig */
8235 /* condition and then upgrade copy_slowly */
8236 /* to do physical copy from the device mem */
8237 /* based object. We can piggy-back off of */
8238 /* the was wired boolean to set-up the */
8239 /* proper handling */
0b4e3aa0
A
8240 RETURN(KERN_PROTECTION_FAILURE);
8241 }
1c79356b
A
8242 /*
8243 * Create a new address map entry to hold the result.
8244 * Fill in the fields from the appropriate source entries.
8245 * We must unlock the source map to do this if we need
8246 * to allocate a map entry.
8247 */
8248 if (new_entry == VM_MAP_ENTRY_NULL) {
2d21ac55
A
8249 version.main_timestamp = src_map->timestamp;
8250 vm_map_unlock(src_map);
1c79356b 8251
7ddcb079 8252 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
1c79356b 8253
2d21ac55
A
8254 vm_map_lock(src_map);
8255 if ((version.main_timestamp + 1) != src_map->timestamp) {
8256 if (!vm_map_lookup_entry(src_map, src_start,
8257 &tmp_entry)) {
8258 RETURN(KERN_INVALID_ADDRESS);
8259 }
8260 if (!tmp_entry->is_sub_map)
8261 vm_map_clip_start(src_map, tmp_entry, src_start);
8262 continue; /* restart w/ new tmp_entry */
1c79356b 8263 }
1c79356b
A
8264 }
8265
8266 /*
8267 * Verify that the region can be read.
8268 */
8269 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
2d21ac55 8270 !use_maxprot) ||
1c79356b
A
8271 (src_entry->max_protection & VM_PROT_READ) == 0)
8272 RETURN(KERN_PROTECTION_FAILURE);
8273
8274 /*
8275 * Clip against the endpoints of the entire region.
8276 */
8277
8278 vm_map_clip_end(src_map, src_entry, src_end);
8279
8280 src_size = src_entry->vme_end - src_start;
8281 src_object = src_entry->object.vm_object;
8282 src_offset = src_entry->offset;
8283 was_wired = (src_entry->wired_count != 0);
8284
8285 vm_map_entry_copy(new_entry, src_entry);
8286 new_entry->use_pmap = FALSE; /* clr address space specifics */
8287
8288 /*
8289 * Attempt non-blocking copy-on-write optimizations.
8290 */
8291
8292 if (src_destroy &&
8293 (src_object == VM_OBJECT_NULL ||
2d21ac55
A
8294 (src_object->internal && !src_object->true_share
8295 && !map_share))) {
8296 /*
8297 * If we are destroying the source, and the object
8298 * is internal, we can move the object reference
8299 * from the source to the copy. The copy is
8300 * copy-on-write only if the source is.
8301 * We make another reference to the object, because
8302 * destroying the source entry will deallocate it.
8303 */
8304 vm_object_reference(src_object);
1c79356b 8305
2d21ac55
A
8306 /*
8307 * Copy is always unwired. vm_map_copy_entry
8308 * set its wired count to zero.
8309 */
1c79356b 8310
2d21ac55 8311 goto CopySuccessful;
1c79356b
A
8312 }
8313
8314
2d21ac55 8315 RestartCopy:
1c79356b
A
8316 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8317 src_object, new_entry, new_entry->object.vm_object,
8318 was_wired, 0);
55e303ae 8319 if ((src_object == VM_OBJECT_NULL ||
2d21ac55
A
8320 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8321 vm_object_copy_quickly(
8322 &new_entry->object.vm_object,
8323 src_offset,
8324 src_size,
8325 &src_needs_copy,
8326 &new_entry_needs_copy)) {
1c79356b
A
8327
8328 new_entry->needs_copy = new_entry_needs_copy;
8329
8330 /*
8331 * Handle copy-on-write obligations
8332 */
8333
8334 if (src_needs_copy && !tmp_entry->needs_copy) {
0c530ab8
A
8335 vm_prot_t prot;
8336
8337 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
8338
8339 if (override_nx(src_map, src_entry->alias) && prot)
0c530ab8 8340 prot |= VM_PROT_EXECUTE;
2d21ac55 8341
55e303ae
A
8342 vm_object_pmap_protect(
8343 src_object,
8344 src_offset,
8345 src_size,
8346 (src_entry->is_shared ?
2d21ac55
A
8347 PMAP_NULL
8348 : src_map->pmap),
55e303ae 8349 src_entry->vme_start,
0c530ab8
A
8350 prot);
8351
55e303ae 8352 tmp_entry->needs_copy = TRUE;
1c79356b
A
8353 }
8354
8355 /*
8356 * The map has never been unlocked, so it's safe
8357 * to move to the next entry rather than doing
8358 * another lookup.
8359 */
8360
8361 goto CopySuccessful;
8362 }
8363
1c79356b
A
8364 /*
8365 * Take an object reference, so that we may
8366 * release the map lock(s).
8367 */
8368
8369 assert(src_object != VM_OBJECT_NULL);
8370 vm_object_reference(src_object);
8371
8372 /*
8373 * Record the timestamp for later verification.
8374 * Unlock the map.
8375 */
8376
8377 version.main_timestamp = src_map->timestamp;
9bccf70c 8378 vm_map_unlock(src_map); /* Increments timestamp once! */
1c79356b
A
8379
8380 /*
8381 * Perform the copy
8382 */
8383
8384 if (was_wired) {
55e303ae 8385 CopySlowly:
1c79356b
A
8386 vm_object_lock(src_object);
8387 result = vm_object_copy_slowly(
2d21ac55
A
8388 src_object,
8389 src_offset,
8390 src_size,
8391 THREAD_UNINT,
8392 &new_entry->object.vm_object);
1c79356b
A
8393 new_entry->offset = 0;
8394 new_entry->needs_copy = FALSE;
55e303ae
A
8395
8396 }
8397 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
2d21ac55 8398 (tmp_entry->is_shared || map_share)) {
55e303ae
A
8399 vm_object_t new_object;
8400
2d21ac55 8401 vm_object_lock_shared(src_object);
55e303ae 8402 new_object = vm_object_copy_delayed(
2d21ac55
A
8403 src_object,
8404 src_offset,
8405 src_size,
8406 TRUE);
55e303ae
A
8407 if (new_object == VM_OBJECT_NULL)
8408 goto CopySlowly;
8409
8410 new_entry->object.vm_object = new_object;
8411 new_entry->needs_copy = TRUE;
8412 result = KERN_SUCCESS;
8413
1c79356b
A
8414 } else {
8415 result = vm_object_copy_strategically(src_object,
2d21ac55
A
8416 src_offset,
8417 src_size,
8418 &new_entry->object.vm_object,
8419 &new_entry->offset,
8420 &new_entry_needs_copy);
1c79356b
A
8421
8422 new_entry->needs_copy = new_entry_needs_copy;
1c79356b
A
8423 }
8424
8425 if (result != KERN_SUCCESS &&
8426 result != KERN_MEMORY_RESTART_COPY) {
8427 vm_map_lock(src_map);
8428 RETURN(result);
8429 }
8430
8431 /*
8432 * Throw away the extra reference
8433 */
8434
8435 vm_object_deallocate(src_object);
8436
8437 /*
8438 * Verify that the map has not substantially
8439 * changed while the copy was being made.
8440 */
8441
9bccf70c 8442 vm_map_lock(src_map);
1c79356b
A
8443
8444 if ((version.main_timestamp + 1) == src_map->timestamp)
8445 goto VerificationSuccessful;
8446
8447 /*
8448 * Simple version comparison failed.
8449 *
8450 * Retry the lookup and verify that the
8451 * same object/offset are still present.
8452 *
8453 * [Note: a memory manager that colludes with
8454 * the calling task can detect that we have
8455 * cheated. While the map was unlocked, the
8456 * mapping could have been changed and restored.]
8457 */
8458
8459 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
8460 RETURN(KERN_INVALID_ADDRESS);
8461 }
8462
8463 src_entry = tmp_entry;
8464 vm_map_clip_start(src_map, src_entry, src_start);
8465
91447636
A
8466 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
8467 !use_maxprot) ||
8468 ((src_entry->max_protection & VM_PROT_READ) == 0))
1c79356b
A
8469 goto VerificationFailed;
8470
39236c6e
A
8471 if (src_entry->vme_end < new_entry->vme_end) {
8472 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
8473 VM_MAP_COPY_PAGE_MASK(copy)));
8474 new_entry->vme_end = src_entry->vme_end;
8475 src_size = new_entry->vme_end - src_start;
8476 }
1c79356b
A
8477
8478 if ((src_entry->object.vm_object != src_object) ||
8479 (src_entry->offset != src_offset) ) {
8480
8481 /*
8482 * Verification failed.
8483 *
8484 * Start over with this top-level entry.
8485 */
8486
2d21ac55 8487 VerificationFailed: ;
1c79356b
A
8488
8489 vm_object_deallocate(new_entry->object.vm_object);
8490 tmp_entry = src_entry;
8491 continue;
8492 }
8493
8494 /*
8495 * Verification succeeded.
8496 */
8497
2d21ac55 8498 VerificationSuccessful: ;
1c79356b
A
8499
8500 if (result == KERN_MEMORY_RESTART_COPY)
8501 goto RestartCopy;
8502
8503 /*
8504 * Copy succeeded.
8505 */
8506
2d21ac55 8507 CopySuccessful: ;
1c79356b
A
8508
8509 /*
8510 * Link in the new copy entry.
8511 */
8512
8513 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8514 new_entry);
8515
8516 /*
8517 * Determine whether the entire region
8518 * has been copied.
8519 */
2d21ac55 8520 src_base = src_start;
1c79356b
A
8521 src_start = new_entry->vme_end;
8522 new_entry = VM_MAP_ENTRY_NULL;
8523 while ((src_start >= src_end) && (src_end != 0)) {
8524 if (src_map != base_map) {
8525 submap_map_t *ptr;
8526
8527 ptr = parent_maps;
8528 assert(ptr != NULL);
8529 parent_maps = parent_maps->next;
2d21ac55
A
8530
8531 /* fix up the damage we did in that submap */
8532 vm_map_simplify_range(src_map,
8533 src_base,
8534 src_end);
8535
1c79356b 8536 vm_map_unlock(src_map);
9bccf70c
A
8537 vm_map_deallocate(src_map);
8538 vm_map_lock(ptr->parent_map);
1c79356b 8539 src_map = ptr->parent_map;
2d21ac55
A
8540 src_base = ptr->base_start;
8541 src_start = ptr->base_start + ptr->base_len;
1c79356b
A
8542 src_end = ptr->base_end;
8543 if ((src_end > src_start) &&
2d21ac55
A
8544 !vm_map_lookup_entry(
8545 src_map, src_start, &tmp_entry))
1c79356b 8546 RETURN(KERN_INVALID_ADDRESS);
91447636 8547 kfree(ptr, sizeof(submap_map_t));
1c79356b
A
8548 if(parent_maps == NULL)
8549 map_share = FALSE;
8550 src_entry = tmp_entry->vme_prev;
8551 } else
8552 break;
8553 }
8554 if ((src_start >= src_end) && (src_end != 0))
8555 break;
8556
8557 /*
8558 * Verify that there are no gaps in the region
8559 */
8560
8561 tmp_entry = src_entry->vme_next;
8562 if ((tmp_entry->vme_start != src_start) ||
39236c6e
A
8563 (tmp_entry == vm_map_to_entry(src_map))) {
8564
8565 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
8566 (vm_map_round_page(src_entry->vme_end,
8567 VM_MAP_PAGE_MASK(src_map)) ==
8568 src_end)) {
8569 vm_map_entry_t last_copy_entry;
8570 vm_map_offset_t adjustment;
8571
8572 /*
8573 * This is the last entry in the range we
8574 * want and it happens to miss a few pages
8575 * because it is not map-aligned (must have
8576 * been imported from a differently-aligned
8577 * map).
8578 * Let's say we're done, but first we have
8579 * to compensate for the alignment adjustment
8580 * we're about to do before returning.
8581 */
8582
8583 last_copy_entry = vm_map_copy_last_entry(copy);
8584 assert(last_copy_entry !=
8585 vm_map_copy_to_entry(copy));
8586 adjustment =
8587 (vm_map_round_page((copy->offset +
8588 copy->size),
8589 VM_MAP_PAGE_MASK(src_map)) -
8590 vm_map_round_page((copy->offset +
8591 copy->size),
8592 PAGE_MASK));
8593 last_copy_entry->vme_end += adjustment;
8594 last_copy_entry->map_aligned = FALSE;
8595 /* ... and we're done */
8596 break;
8597 }
8598
1c79356b 8599 RETURN(KERN_INVALID_ADDRESS);
39236c6e 8600 }
1c79356b
A
8601 }
8602
8603 /*
8604 * If the source should be destroyed, do it now, since the
8605 * copy was successful.
8606 */
8607 if (src_destroy) {
39236c6e
A
8608 (void) vm_map_delete(
8609 src_map,
8610 vm_map_trunc_page(src_addr,
8611 VM_MAP_PAGE_MASK(src_map)),
8612 src_end,
8613 ((src_map == kernel_map) ?
8614 VM_MAP_REMOVE_KUNWIRE :
8615 VM_MAP_NO_FLAGS),
8616 VM_MAP_NULL);
2d21ac55
A
8617 } else {
8618 /* fix up the damage we did in the base map */
39236c6e
A
8619 vm_map_simplify_range(
8620 src_map,
8621 vm_map_trunc_page(src_addr,
8622 VM_MAP_PAGE_MASK(src_map)),
8623 vm_map_round_page(src_end,
8624 VM_MAP_PAGE_MASK(src_map)));
1c79356b
A
8625 }
8626
8627 vm_map_unlock(src_map);
8628
39236c6e
A
8629 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
8630 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
8631
8632 /* adjust alignment of first copy_entry's "vme_start" */
8633 tmp_entry = vm_map_copy_first_entry(copy);
8634 if (tmp_entry != vm_map_copy_to_entry(copy)) {
8635 vm_map_offset_t adjustment;
8636 adjustment =
8637 (vm_map_trunc_page(copy->offset,
8638 PAGE_MASK) -
8639 vm_map_trunc_page(copy->offset,
8640 VM_MAP_PAGE_MASK(src_map)));
8641 if (adjustment) {
8642 assert(page_aligned(adjustment));
8643 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8644 tmp_entry->vme_start += adjustment;
8645 tmp_entry->offset += adjustment;
8646 copy_addr += adjustment;
8647 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8648 }
8649 }
8650
8651 /* adjust alignment of last copy_entry's "vme_end" */
8652 tmp_entry = vm_map_copy_last_entry(copy);
8653 if (tmp_entry != vm_map_copy_to_entry(copy)) {
8654 vm_map_offset_t adjustment;
8655 adjustment =
8656 (vm_map_round_page((copy->offset +
8657 copy->size),
8658 VM_MAP_PAGE_MASK(src_map)) -
8659 vm_map_round_page((copy->offset +
8660 copy->size),
8661 PAGE_MASK));
8662 if (adjustment) {
8663 assert(page_aligned(adjustment));
8664 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8665 tmp_entry->vme_end -= adjustment;
8666 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8667 }
8668 }
8669 }
8670
1c79356b
A
8671 /* Fix-up start and end points in copy. This is necessary */
8672 /* when the various entries in the copy object were picked */
8673 /* up from different sub-maps */
8674
8675 tmp_entry = vm_map_copy_first_entry(copy);
8676 while (tmp_entry != vm_map_copy_to_entry(copy)) {
39236c6e
A
8677 assert(VM_MAP_PAGE_ALIGNED(
8678 copy_addr + (tmp_entry->vme_end -
8679 tmp_entry->vme_start),
8680 VM_MAP_COPY_PAGE_MASK(copy)));
8681 assert(VM_MAP_PAGE_ALIGNED(
8682 copy_addr,
8683 VM_MAP_COPY_PAGE_MASK(copy)));
8684
8685 /*
8686 * The copy_entries will be injected directly into the
8687 * destination map and might not be "map aligned" there...
8688 */
8689 tmp_entry->map_aligned = FALSE;
8690
1c79356b
A
8691 tmp_entry->vme_end = copy_addr +
8692 (tmp_entry->vme_end - tmp_entry->vme_start);
8693 tmp_entry->vme_start = copy_addr;
e2d2fc5c 8694 assert(tmp_entry->vme_start < tmp_entry->vme_end);
1c79356b
A
8695 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8696 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8697 }
8698
8699 *copy_result = copy;
8700 return(KERN_SUCCESS);
8701
8702#undef RETURN
8703}
8704
39236c6e
A
8705kern_return_t
8706vm_map_copy_extract(
8707 vm_map_t src_map,
8708 vm_map_address_t src_addr,
8709 vm_map_size_t len,
8710 vm_map_copy_t *copy_result, /* OUT */
8711 vm_prot_t *cur_prot, /* OUT */
8712 vm_prot_t *max_prot)
8713{
8714 vm_map_offset_t src_start, src_end;
8715 vm_map_copy_t copy;
8716 kern_return_t kr;
8717
8718 /*
8719 * Check for copies of zero bytes.
8720 */
8721
8722 if (len == 0) {
8723 *copy_result = VM_MAP_COPY_NULL;
8724 return(KERN_SUCCESS);
8725 }
8726
8727 /*
8728 * Check that the end address doesn't overflow
8729 */
8730 src_end = src_addr + len;
8731 if (src_end < src_addr)
8732 return KERN_INVALID_ADDRESS;
8733
8734 /*
8735 * Compute (page aligned) start and end of region
8736 */
8737 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
8738 src_end = vm_map_round_page(src_end, PAGE_MASK);
8739
8740 /*
8741 * Allocate a header element for the list.
8742 *
8743 * Use the start and end in the header to
8744 * remember the endpoints prior to rounding.
8745 */
8746
8747 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8748 vm_map_copy_first_entry(copy) =
8749 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8750 copy->type = VM_MAP_COPY_ENTRY_LIST;
8751 copy->cpy_hdr.nentries = 0;
8752 copy->cpy_hdr.entries_pageable = TRUE;
8753
8754 vm_map_store_init(&copy->cpy_hdr);
8755
8756 copy->offset = 0;
8757 copy->size = len;
8758
8759 kr = vm_map_remap_extract(src_map,
8760 src_addr,
8761 len,
8762 FALSE, /* copy */
8763 &copy->cpy_hdr,
8764 cur_prot,
8765 max_prot,
8766 VM_INHERIT_SHARE,
8767 TRUE); /* pageable */
8768 if (kr != KERN_SUCCESS) {
8769 vm_map_copy_discard(copy);
8770 return kr;
8771 }
8772
8773 *copy_result = copy;
8774 return KERN_SUCCESS;
8775}
8776
1c79356b
A
8777/*
8778 * vm_map_copyin_object:
8779 *
8780 * Create a copy object from an object.
8781 * Our caller donates an object reference.
8782 */
8783
8784kern_return_t
8785vm_map_copyin_object(
8786 vm_object_t object,
8787 vm_object_offset_t offset, /* offset of region in object */
8788 vm_object_size_t size, /* size of region in object */
8789 vm_map_copy_t *copy_result) /* OUT */
8790{
8791 vm_map_copy_t copy; /* Resulting copy */
8792
8793 /*
8794 * We drop the object into a special copy object
8795 * that contains the object directly.
8796 */
8797
8798 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8799 copy->type = VM_MAP_COPY_OBJECT;
8800 copy->cpy_object = object;
1c79356b
A
8801 copy->offset = offset;
8802 copy->size = size;
8803
8804 *copy_result = copy;
8805 return(KERN_SUCCESS);
8806}
8807
91447636 8808static void
1c79356b
A
8809vm_map_fork_share(
8810 vm_map_t old_map,
8811 vm_map_entry_t old_entry,
8812 vm_map_t new_map)
8813{
8814 vm_object_t object;
8815 vm_map_entry_t new_entry;
1c79356b
A
8816
8817 /*
8818 * New sharing code. New map entry
8819 * references original object. Internal
8820 * objects use asynchronous copy algorithm for
8821 * future copies. First make sure we have
8822 * the right object. If we need a shadow,
8823 * or someone else already has one, then
8824 * make a new shadow and share it.
8825 */
8826
8827 object = old_entry->object.vm_object;
8828 if (old_entry->is_sub_map) {
8829 assert(old_entry->wired_count == 0);
0c530ab8 8830#ifndef NO_NESTED_PMAP
1c79356b 8831 if(old_entry->use_pmap) {
91447636
A
8832 kern_return_t result;
8833
1c79356b 8834 result = pmap_nest(new_map->pmap,
2d21ac55
A
8835 (old_entry->object.sub_map)->pmap,
8836 (addr64_t)old_entry->vme_start,
8837 (addr64_t)old_entry->vme_start,
8838 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
1c79356b
A
8839 if(result)
8840 panic("vm_map_fork_share: pmap_nest failed!");
8841 }
0c530ab8 8842#endif /* NO_NESTED_PMAP */
1c79356b 8843 } else if (object == VM_OBJECT_NULL) {
91447636 8844 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
2d21ac55 8845 old_entry->vme_start));
1c79356b
A
8846 old_entry->offset = 0;
8847 old_entry->object.vm_object = object;
8848 assert(!old_entry->needs_copy);
8849 } else if (object->copy_strategy !=
2d21ac55 8850 MEMORY_OBJECT_COPY_SYMMETRIC) {
1c79356b
A
8851
8852 /*
8853 * We are already using an asymmetric
8854 * copy, and therefore we already have
8855 * the right object.
8856 */
8857
8858 assert(! old_entry->needs_copy);
8859 }
8860 else if (old_entry->needs_copy || /* case 1 */
8861 object->shadowed || /* case 2 */
8862 (!object->true_share && /* case 3 */
2d21ac55 8863 !old_entry->is_shared &&
6d2010ae 8864 (object->vo_size >
2d21ac55
A
8865 (vm_map_size_t)(old_entry->vme_end -
8866 old_entry->vme_start)))) {
1c79356b
A
8867
8868 /*
8869 * We need to create a shadow.
8870 * There are three cases here.
8871 * In the first case, we need to
8872 * complete a deferred symmetrical
8873 * copy that we participated in.
8874 * In the second and third cases,
8875 * we need to create the shadow so
8876 * that changes that we make to the
8877 * object do not interfere with
8878 * any symmetrical copies which
8879 * have occured (case 2) or which
8880 * might occur (case 3).
8881 *
8882 * The first case is when we had
8883 * deferred shadow object creation
8884 * via the entry->needs_copy mechanism.
8885 * This mechanism only works when
8886 * only one entry points to the source
8887 * object, and we are about to create
8888 * a second entry pointing to the
8889 * same object. The problem is that
8890 * there is no way of mapping from
8891 * an object to the entries pointing
8892 * to it. (Deferred shadow creation
8893 * works with one entry because occurs
8894 * at fault time, and we walk from the
8895 * entry to the object when handling
8896 * the fault.)
8897 *
8898 * The second case is when the object
8899 * to be shared has already been copied
8900 * with a symmetric copy, but we point
8901 * directly to the object without
8902 * needs_copy set in our entry. (This
8903 * can happen because different ranges
8904 * of an object can be pointed to by
8905 * different entries. In particular,
8906 * a single entry pointing to an object
8907 * can be split by a call to vm_inherit,
8908 * which, combined with task_create, can
8909 * result in the different entries
8910 * having different needs_copy values.)
8911 * The shadowed flag in the object allows
8912 * us to detect this case. The problem
8913 * with this case is that if this object
8914 * has or will have shadows, then we
8915 * must not perform an asymmetric copy
8916 * of this object, since such a copy
8917 * allows the object to be changed, which
8918 * will break the previous symmetrical
8919 * copies (which rely upon the object
8920 * not changing). In a sense, the shadowed
8921 * flag says "don't change this object".
8922 * We fix this by creating a shadow
8923 * object for this object, and sharing
8924 * that. This works because we are free
8925 * to change the shadow object (and thus
8926 * to use an asymmetric copy strategy);
8927 * this is also semantically correct,
8928 * since this object is temporary, and
8929 * therefore a copy of the object is
8930 * as good as the object itself. (This
8931 * is not true for permanent objects,
8932 * since the pager needs to see changes,
8933 * which won't happen if the changes
8934 * are made to a copy.)
8935 *
8936 * The third case is when the object
8937 * to be shared has parts sticking
8938 * outside of the entry we're working
8939 * with, and thus may in the future
8940 * be subject to a symmetrical copy.
8941 * (This is a preemptive version of
8942 * case 2.)
8943 */
1c79356b
A
8944 vm_object_shadow(&old_entry->object.vm_object,
8945 &old_entry->offset,
91447636 8946 (vm_map_size_t) (old_entry->vme_end -
2d21ac55 8947 old_entry->vme_start));
1c79356b
A
8948
8949 /*
8950 * If we're making a shadow for other than
8951 * copy on write reasons, then we have
8952 * to remove write permission.
8953 */
8954
1c79356b
A
8955 if (!old_entry->needs_copy &&
8956 (old_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
8957 vm_prot_t prot;
8958
8959 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
8960
8961 if (override_nx(old_map, old_entry->alias) && prot)
0c530ab8 8962 prot |= VM_PROT_EXECUTE;
2d21ac55 8963
316670eb 8964 if (old_map->mapped_in_other_pmaps) {
9bccf70c
A
8965 vm_object_pmap_protect(
8966 old_entry->object.vm_object,
8967 old_entry->offset,
8968 (old_entry->vme_end -
2d21ac55 8969 old_entry->vme_start),
9bccf70c
A
8970 PMAP_NULL,
8971 old_entry->vme_start,
0c530ab8 8972 prot);
1c79356b 8973 } else {
9bccf70c 8974 pmap_protect(old_map->pmap,
2d21ac55
A
8975 old_entry->vme_start,
8976 old_entry->vme_end,
8977 prot);
1c79356b
A
8978 }
8979 }
8980
8981 old_entry->needs_copy = FALSE;
8982 object = old_entry->object.vm_object;
8983 }
6d2010ae 8984
1c79356b
A
8985
8986 /*
8987 * If object was using a symmetric copy strategy,
8988 * change its copy strategy to the default
8989 * asymmetric copy strategy, which is copy_delay
8990 * in the non-norma case and copy_call in the
8991 * norma case. Bump the reference count for the
8992 * new entry.
8993 */
8994
8995 if(old_entry->is_sub_map) {
8996 vm_map_lock(old_entry->object.sub_map);
8997 vm_map_reference(old_entry->object.sub_map);
8998 vm_map_unlock(old_entry->object.sub_map);
8999 } else {
9000 vm_object_lock(object);
2d21ac55 9001 vm_object_reference_locked(object);
1c79356b
A
9002 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9003 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9004 }
9005 vm_object_unlock(object);
9006 }
9007
9008 /*
9009 * Clone the entry, using object ref from above.
9010 * Mark both entries as shared.
9011 */
9012
7ddcb079
A
9013 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9014 * map or descendants */
1c79356b
A
9015 vm_map_entry_copy(new_entry, old_entry);
9016 old_entry->is_shared = TRUE;
9017 new_entry->is_shared = TRUE;
9018
9019 /*
9020 * Insert the entry into the new map -- we
9021 * know we're inserting at the end of the new
9022 * map.
9023 */
9024
6d2010ae 9025 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
1c79356b
A
9026
9027 /*
9028 * Update the physical map
9029 */
9030
9031 if (old_entry->is_sub_map) {
9032 /* Bill Angell pmap support goes here */
9033 } else {
9034 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
2d21ac55
A
9035 old_entry->vme_end - old_entry->vme_start,
9036 old_entry->vme_start);
1c79356b
A
9037 }
9038}
9039
91447636 9040static boolean_t
1c79356b
A
9041vm_map_fork_copy(
9042 vm_map_t old_map,
9043 vm_map_entry_t *old_entry_p,
9044 vm_map_t new_map)
9045{
9046 vm_map_entry_t old_entry = *old_entry_p;
91447636
A
9047 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9048 vm_map_offset_t start = old_entry->vme_start;
1c79356b
A
9049 vm_map_copy_t copy;
9050 vm_map_entry_t last = vm_map_last_entry(new_map);
9051
9052 vm_map_unlock(old_map);
9053 /*
9054 * Use maxprot version of copyin because we
9055 * care about whether this memory can ever
9056 * be accessed, not just whether it's accessible
9057 * right now.
9058 */
9059 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9060 != KERN_SUCCESS) {
9061 /*
9062 * The map might have changed while it
9063 * was unlocked, check it again. Skip
9064 * any blank space or permanently
9065 * unreadable region.
9066 */
9067 vm_map_lock(old_map);
9068 if (!vm_map_lookup_entry(old_map, start, &last) ||
55e303ae 9069 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
1c79356b
A
9070 last = last->vme_next;
9071 }
9072 *old_entry_p = last;
9073
9074 /*
9075 * XXX For some error returns, want to
9076 * XXX skip to the next element. Note
9077 * that INVALID_ADDRESS and
9078 * PROTECTION_FAILURE are handled above.
9079 */
9080
9081 return FALSE;
9082 }
9083
9084 /*
9085 * Insert the copy into the new map
9086 */
9087
9088 vm_map_copy_insert(new_map, last, copy);
9089
9090 /*
9091 * Pick up the traversal at the end of
9092 * the copied region.
9093 */
9094
9095 vm_map_lock(old_map);
9096 start += entry_size;
9097 if (! vm_map_lookup_entry(old_map, start, &last)) {
9098 last = last->vme_next;
9099 } else {
2d21ac55
A
9100 if (last->vme_start == start) {
9101 /*
9102 * No need to clip here and we don't
9103 * want to cause any unnecessary
9104 * unnesting...
9105 */
9106 } else {
9107 vm_map_clip_start(old_map, last, start);
9108 }
1c79356b
A
9109 }
9110 *old_entry_p = last;
9111
9112 return TRUE;
9113}
9114
9115/*
9116 * vm_map_fork:
9117 *
9118 * Create and return a new map based on the old
9119 * map, according to the inheritance values on the
9120 * regions in that map.
9121 *
9122 * The source map must not be locked.
9123 */
9124vm_map_t
9125vm_map_fork(
316670eb 9126 ledger_t ledger,
1c79356b
A
9127 vm_map_t old_map)
9128{
2d21ac55 9129 pmap_t new_pmap;
1c79356b
A
9130 vm_map_t new_map;
9131 vm_map_entry_t old_entry;
91447636 9132 vm_map_size_t new_size = 0, entry_size;
1c79356b
A
9133 vm_map_entry_t new_entry;
9134 boolean_t src_needs_copy;
9135 boolean_t new_entry_needs_copy;
9136
316670eb 9137 new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
b0d623f7
A
9138#if defined(__i386__) || defined(__x86_64__)
9139 old_map->pmap->pm_task_map != TASK_MAP_32BIT
9140#else
316670eb 9141#error Unknown architecture.
b0d623f7
A
9142#endif
9143 );
2d21ac55 9144
1c79356b
A
9145 vm_map_reference_swap(old_map);
9146 vm_map_lock(old_map);
9147
9148 new_map = vm_map_create(new_pmap,
2d21ac55
A
9149 old_map->min_offset,
9150 old_map->max_offset,
9151 old_map->hdr.entries_pageable);
39236c6e
A
9152 /* inherit the parent map's page size */
9153 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
1c79356b 9154 for (
2d21ac55
A
9155 old_entry = vm_map_first_entry(old_map);
9156 old_entry != vm_map_to_entry(old_map);
9157 ) {
1c79356b
A
9158
9159 entry_size = old_entry->vme_end - old_entry->vme_start;
9160
9161 switch (old_entry->inheritance) {
9162 case VM_INHERIT_NONE:
9163 break;
9164
9165 case VM_INHERIT_SHARE:
9166 vm_map_fork_share(old_map, old_entry, new_map);
9167 new_size += entry_size;
9168 break;
9169
9170 case VM_INHERIT_COPY:
9171
9172 /*
9173 * Inline the copy_quickly case;
9174 * upon failure, fall back on call
9175 * to vm_map_fork_copy.
9176 */
9177
9178 if(old_entry->is_sub_map)
9179 break;
9bccf70c 9180 if ((old_entry->wired_count != 0) ||
2d21ac55
A
9181 ((old_entry->object.vm_object != NULL) &&
9182 (old_entry->object.vm_object->true_share))) {
1c79356b
A
9183 goto slow_vm_map_fork_copy;
9184 }
9185
7ddcb079 9186 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
1c79356b
A
9187 vm_map_entry_copy(new_entry, old_entry);
9188 /* clear address space specifics */
9189 new_entry->use_pmap = FALSE;
9190
9191 if (! vm_object_copy_quickly(
2d21ac55
A
9192 &new_entry->object.vm_object,
9193 old_entry->offset,
9194 (old_entry->vme_end -
9195 old_entry->vme_start),
9196 &src_needs_copy,
9197 &new_entry_needs_copy)) {
1c79356b
A
9198 vm_map_entry_dispose(new_map, new_entry);
9199 goto slow_vm_map_fork_copy;
9200 }
9201
9202 /*
9203 * Handle copy-on-write obligations
9204 */
9205
9206 if (src_needs_copy && !old_entry->needs_copy) {
0c530ab8
A
9207 vm_prot_t prot;
9208
9209 prot = old_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
9210
9211 if (override_nx(old_map, old_entry->alias) && prot)
0c530ab8 9212 prot |= VM_PROT_EXECUTE;
2d21ac55 9213
1c79356b
A
9214 vm_object_pmap_protect(
9215 old_entry->object.vm_object,
9216 old_entry->offset,
9217 (old_entry->vme_end -
2d21ac55 9218 old_entry->vme_start),
1c79356b 9219 ((old_entry->is_shared
316670eb 9220 || old_map->mapped_in_other_pmaps)
2d21ac55
A
9221 ? PMAP_NULL :
9222 old_map->pmap),
1c79356b 9223 old_entry->vme_start,
0c530ab8 9224 prot);
1c79356b
A
9225
9226 old_entry->needs_copy = TRUE;
9227 }
9228 new_entry->needs_copy = new_entry_needs_copy;
9229
9230 /*
9231 * Insert the entry at the end
9232 * of the map.
9233 */
9234
6d2010ae 9235 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
1c79356b
A
9236 new_entry);
9237 new_size += entry_size;
9238 break;
9239
9240 slow_vm_map_fork_copy:
9241 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9242 new_size += entry_size;
9243 }
9244 continue;
9245 }
9246 old_entry = old_entry->vme_next;
9247 }
9248
9249 new_map->size = new_size;
9250 vm_map_unlock(old_map);
9251 vm_map_deallocate(old_map);
9252
9253 return(new_map);
9254}
9255
2d21ac55
A
9256/*
9257 * vm_map_exec:
9258 *
9259 * Setup the "new_map" with the proper execution environment according
9260 * to the type of executable (platform, 64bit, chroot environment).
9261 * Map the comm page and shared region, etc...
9262 */
9263kern_return_t
9264vm_map_exec(
9265 vm_map_t new_map,
9266 task_t task,
9267 void *fsroot,
9268 cpu_type_t cpu)
9269{
9270 SHARED_REGION_TRACE_DEBUG(
9271 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9272 current_task(), new_map, task, fsroot, cpu));
9273 (void) vm_commpage_enter(new_map, task);
9274 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9275 SHARED_REGION_TRACE_DEBUG(
9276 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9277 current_task(), new_map, task, fsroot, cpu));
9278 return KERN_SUCCESS;
9279}
1c79356b
A
9280
9281/*
9282 * vm_map_lookup_locked:
9283 *
9284 * Finds the VM object, offset, and
9285 * protection for a given virtual address in the
9286 * specified map, assuming a page fault of the
9287 * type specified.
9288 *
9289 * Returns the (object, offset, protection) for
9290 * this address, whether it is wired down, and whether
9291 * this map has the only reference to the data in question.
9292 * In order to later verify this lookup, a "version"
9293 * is returned.
9294 *
9295 * The map MUST be locked by the caller and WILL be
9296 * locked on exit. In order to guarantee the
9297 * existence of the returned object, it is returned
9298 * locked.
9299 *
9300 * If a lookup is requested with "write protection"
9301 * specified, the map may be changed to perform virtual
9302 * copying operations, although the data referenced will
9303 * remain the same.
9304 */
9305kern_return_t
9306vm_map_lookup_locked(
9307 vm_map_t *var_map, /* IN/OUT */
2d21ac55 9308 vm_map_offset_t vaddr,
91447636 9309 vm_prot_t fault_type,
2d21ac55 9310 int object_lock_type,
1c79356b
A
9311 vm_map_version_t *out_version, /* OUT */
9312 vm_object_t *object, /* OUT */
9313 vm_object_offset_t *offset, /* OUT */
9314 vm_prot_t *out_prot, /* OUT */
9315 boolean_t *wired, /* OUT */
2d21ac55 9316 vm_object_fault_info_t fault_info, /* OUT */
91447636 9317 vm_map_t *real_map)
1c79356b
A
9318{
9319 vm_map_entry_t entry;
9320 register vm_map_t map = *var_map;
9321 vm_map_t old_map = *var_map;
9322 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
91447636
A
9323 vm_map_offset_t cow_parent_vaddr = 0;
9324 vm_map_offset_t old_start = 0;
9325 vm_map_offset_t old_end = 0;
1c79356b 9326 register vm_prot_t prot;
6d2010ae
A
9327 boolean_t mask_protections;
9328 vm_prot_t original_fault_type;
9329
9330 /*
9331 * VM_PROT_MASK means that the caller wants us to use "fault_type"
9332 * as a mask against the mapping's actual protections, not as an
9333 * absolute value.
9334 */
9335 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9336 fault_type &= ~VM_PROT_IS_MASK;
9337 original_fault_type = fault_type;
1c79356b 9338
91447636 9339 *real_map = map;
6d2010ae
A
9340
9341RetryLookup:
9342 fault_type = original_fault_type;
1c79356b
A
9343
9344 /*
9345 * If the map has an interesting hint, try it before calling
9346 * full blown lookup routine.
9347 */
1c79356b 9348 entry = map->hint;
1c79356b
A
9349
9350 if ((entry == vm_map_to_entry(map)) ||
9351 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
9352 vm_map_entry_t tmp_entry;
9353
9354 /*
9355 * Entry was either not a valid hint, or the vaddr
9356 * was not contained in the entry, so do a full lookup.
9357 */
9358 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
9359 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
9360 vm_map_unlock(cow_sub_map_parent);
91447636 9361 if((*real_map != map)
2d21ac55 9362 && (*real_map != cow_sub_map_parent))
91447636 9363 vm_map_unlock(*real_map);
1c79356b
A
9364 return KERN_INVALID_ADDRESS;
9365 }
9366
9367 entry = tmp_entry;
9368 }
9369 if(map == old_map) {
9370 old_start = entry->vme_start;
9371 old_end = entry->vme_end;
9372 }
9373
9374 /*
9375 * Handle submaps. Drop lock on upper map, submap is
9376 * returned locked.
9377 */
9378
9379submap_recurse:
9380 if (entry->is_sub_map) {
91447636
A
9381 vm_map_offset_t local_vaddr;
9382 vm_map_offset_t end_delta;
9383 vm_map_offset_t start_delta;
1c79356b
A
9384 vm_map_entry_t submap_entry;
9385 boolean_t mapped_needs_copy=FALSE;
9386
9387 local_vaddr = vaddr;
9388
2d21ac55 9389 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
91447636
A
9390 /* if real_map equals map we unlock below */
9391 if ((*real_map != map) &&
2d21ac55 9392 (*real_map != cow_sub_map_parent))
91447636
A
9393 vm_map_unlock(*real_map);
9394 *real_map = entry->object.sub_map;
1c79356b
A
9395 }
9396
2d21ac55 9397 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
1c79356b
A
9398 if (!mapped_needs_copy) {
9399 if (vm_map_lock_read_to_write(map)) {
9400 vm_map_lock_read(map);
99c3a104 9401 *real_map = map;
1c79356b
A
9402 goto RetryLookup;
9403 }
9404 vm_map_lock_read(entry->object.sub_map);
99c3a104 9405 *var_map = entry->object.sub_map;
1c79356b
A
9406 cow_sub_map_parent = map;
9407 /* reset base to map before cow object */
9408 /* this is the map which will accept */
9409 /* the new cow object */
9410 old_start = entry->vme_start;
9411 old_end = entry->vme_end;
9412 cow_parent_vaddr = vaddr;
9413 mapped_needs_copy = TRUE;
9414 } else {
9415 vm_map_lock_read(entry->object.sub_map);
99c3a104 9416 *var_map = entry->object.sub_map;
1c79356b 9417 if((cow_sub_map_parent != map) &&
2d21ac55 9418 (*real_map != map))
1c79356b
A
9419 vm_map_unlock(map);
9420 }
9421 } else {
9422 vm_map_lock_read(entry->object.sub_map);
99c3a104 9423 *var_map = entry->object.sub_map;
1c79356b
A
9424 /* leave map locked if it is a target */
9425 /* cow sub_map above otherwise, just */
9426 /* follow the maps down to the object */
9427 /* here we unlock knowing we are not */
9428 /* revisiting the map. */
91447636 9429 if((*real_map != map) && (map != cow_sub_map_parent))
1c79356b
A
9430 vm_map_unlock_read(map);
9431 }
9432
99c3a104 9433 map = *var_map;
1c79356b
A
9434
9435 /* calculate the offset in the submap for vaddr */
9436 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
9437
2d21ac55 9438 RetrySubMap:
1c79356b
A
9439 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
9440 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
9441 vm_map_unlock(cow_sub_map_parent);
9442 }
91447636 9443 if((*real_map != map)
2d21ac55 9444 && (*real_map != cow_sub_map_parent)) {
91447636 9445 vm_map_unlock(*real_map);
1c79356b 9446 }
91447636 9447 *real_map = map;
1c79356b
A
9448 return KERN_INVALID_ADDRESS;
9449 }
2d21ac55 9450
1c79356b
A
9451 /* find the attenuated shadow of the underlying object */
9452 /* on our target map */
9453
9454 /* in english the submap object may extend beyond the */
9455 /* region mapped by the entry or, may only fill a portion */
9456 /* of it. For our purposes, we only care if the object */
9457 /* doesn't fill. In this case the area which will */
9458 /* ultimately be clipped in the top map will only need */
9459 /* to be as big as the portion of the underlying entry */
9460 /* which is mapped */
9461 start_delta = submap_entry->vme_start > entry->offset ?
2d21ac55 9462 submap_entry->vme_start - entry->offset : 0;
1c79356b
A
9463
9464 end_delta =
2d21ac55 9465 (entry->offset + start_delta + (old_end - old_start)) <=
1c79356b 9466 submap_entry->vme_end ?
2d21ac55
A
9467 0 : (entry->offset +
9468 (old_end - old_start))
9469 - submap_entry->vme_end;
1c79356b
A
9470
9471 old_start += start_delta;
9472 old_end -= end_delta;
9473
9474 if(submap_entry->is_sub_map) {
9475 entry = submap_entry;
9476 vaddr = local_vaddr;
9477 goto submap_recurse;
9478 }
9479
9480 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
9481
2d21ac55
A
9482 vm_object_t sub_object, copy_object;
9483 vm_object_offset_t copy_offset;
91447636
A
9484 vm_map_offset_t local_start;
9485 vm_map_offset_t local_end;
0b4e3aa0 9486 boolean_t copied_slowly = FALSE;
1c79356b
A
9487
9488 if (vm_map_lock_read_to_write(map)) {
9489 vm_map_lock_read(map);
9490 old_start -= start_delta;
9491 old_end += end_delta;
9492 goto RetrySubMap;
9493 }
0b4e3aa0
A
9494
9495
2d21ac55
A
9496 sub_object = submap_entry->object.vm_object;
9497 if (sub_object == VM_OBJECT_NULL) {
9498 sub_object =
1c79356b 9499 vm_object_allocate(
91447636 9500 (vm_map_size_t)
2d21ac55
A
9501 (submap_entry->vme_end -
9502 submap_entry->vme_start));
9503 submap_entry->object.vm_object = sub_object;
91447636 9504 submap_entry->offset = 0;
1c79356b
A
9505 }
9506 local_start = local_vaddr -
2d21ac55 9507 (cow_parent_vaddr - old_start);
1c79356b 9508 local_end = local_vaddr +
2d21ac55 9509 (old_end - cow_parent_vaddr);
1c79356b
A
9510 vm_map_clip_start(map, submap_entry, local_start);
9511 vm_map_clip_end(map, submap_entry, local_end);
2d21ac55
A
9512 /* unnesting was done in vm_map_clip_start/end() */
9513 assert(!submap_entry->use_pmap);
1c79356b
A
9514
9515 /* This is the COW case, lets connect */
9516 /* an entry in our space to the underlying */
9517 /* object in the submap, bypassing the */
9518 /* submap. */
0b4e3aa0
A
9519
9520
2d21ac55 9521 if(submap_entry->wired_count != 0 ||
4a3eedf9
A
9522 (sub_object->copy_strategy ==
9523 MEMORY_OBJECT_COPY_NONE)) {
2d21ac55
A
9524 vm_object_lock(sub_object);
9525 vm_object_copy_slowly(sub_object,
9526 submap_entry->offset,
9527 (submap_entry->vme_end -
9528 submap_entry->vme_start),
9529 FALSE,
9530 &copy_object);
9531 copied_slowly = TRUE;
0b4e3aa0 9532 } else {
2d21ac55 9533
0b4e3aa0 9534 /* set up shadow object */
2d21ac55 9535 copy_object = sub_object;
0b4e3aa0 9536 vm_object_reference(copy_object);
2d21ac55 9537 sub_object->shadowed = TRUE;
0b4e3aa0 9538 submap_entry->needs_copy = TRUE;
0c530ab8
A
9539
9540 prot = submap_entry->protection & ~VM_PROT_WRITE;
2d21ac55 9541
316670eb 9542 if (override_nx(old_map, submap_entry->alias) && prot)
0c530ab8 9543 prot |= VM_PROT_EXECUTE;
2d21ac55 9544
0b4e3aa0 9545 vm_object_pmap_protect(
2d21ac55 9546 sub_object,
1c79356b
A
9547 submap_entry->offset,
9548 submap_entry->vme_end -
2d21ac55 9549 submap_entry->vme_start,
9bccf70c 9550 (submap_entry->is_shared
316670eb 9551 || map->mapped_in_other_pmaps) ?
2d21ac55 9552 PMAP_NULL : map->pmap,
1c79356b 9553 submap_entry->vme_start,
0c530ab8 9554 prot);
0b4e3aa0 9555 }
1c79356b 9556
2d21ac55
A
9557 /*
9558 * Adjust the fault offset to the submap entry.
9559 */
9560 copy_offset = (local_vaddr -
9561 submap_entry->vme_start +
9562 submap_entry->offset);
1c79356b
A
9563
9564 /* This works diffently than the */
9565 /* normal submap case. We go back */
9566 /* to the parent of the cow map and*/
9567 /* clip out the target portion of */
9568 /* the sub_map, substituting the */
9569 /* new copy object, */
9570
9571 vm_map_unlock(map);
9572 local_start = old_start;
9573 local_end = old_end;
9574 map = cow_sub_map_parent;
9575 *var_map = cow_sub_map_parent;
9576 vaddr = cow_parent_vaddr;
9577 cow_sub_map_parent = NULL;
9578
2d21ac55
A
9579 if(!vm_map_lookup_entry(map,
9580 vaddr, &entry)) {
9581 vm_object_deallocate(
9582 copy_object);
9583 vm_map_lock_write_to_read(map);
9584 return KERN_INVALID_ADDRESS;
9585 }
9586
9587 /* clip out the portion of space */
9588 /* mapped by the sub map which */
9589 /* corresponds to the underlying */
9590 /* object */
9591
9592 /*
9593 * Clip (and unnest) the smallest nested chunk
9594 * possible around the faulting address...
9595 */
9596 local_start = vaddr & ~(pmap_nesting_size_min - 1);
9597 local_end = local_start + pmap_nesting_size_min;
9598 /*
9599 * ... but don't go beyond the "old_start" to "old_end"
9600 * range, to avoid spanning over another VM region
9601 * with a possibly different VM object and/or offset.
9602 */
9603 if (local_start < old_start) {
9604 local_start = old_start;
9605 }
9606 if (local_end > old_end) {
9607 local_end = old_end;
9608 }
9609 /*
9610 * Adjust copy_offset to the start of the range.
9611 */
9612 copy_offset -= (vaddr - local_start);
9613
1c79356b
A
9614 vm_map_clip_start(map, entry, local_start);
9615 vm_map_clip_end(map, entry, local_end);
2d21ac55
A
9616 /* unnesting was done in vm_map_clip_start/end() */
9617 assert(!entry->use_pmap);
1c79356b
A
9618
9619 /* substitute copy object for */
9620 /* shared map entry */
9621 vm_map_deallocate(entry->object.sub_map);
9622 entry->is_sub_map = FALSE;
1c79356b 9623 entry->object.vm_object = copy_object;
1c79356b 9624
2d21ac55
A
9625 /* propagate the submap entry's protections */
9626 entry->protection |= submap_entry->protection;
9627 entry->max_protection |= submap_entry->max_protection;
9628
0b4e3aa0 9629 if(copied_slowly) {
4a3eedf9 9630 entry->offset = local_start - old_start;
0b4e3aa0
A
9631 entry->needs_copy = FALSE;
9632 entry->is_shared = FALSE;
9633 } else {
2d21ac55 9634 entry->offset = copy_offset;
0b4e3aa0
A
9635 entry->needs_copy = TRUE;
9636 if(entry->inheritance == VM_INHERIT_SHARE)
9637 entry->inheritance = VM_INHERIT_COPY;
9638 if (map != old_map)
9639 entry->is_shared = TRUE;
9640 }
1c79356b 9641 if(entry->inheritance == VM_INHERIT_SHARE)
0b4e3aa0 9642 entry->inheritance = VM_INHERIT_COPY;
1c79356b
A
9643
9644 vm_map_lock_write_to_read(map);
9645 } else {
9646 if((cow_sub_map_parent)
2d21ac55
A
9647 && (cow_sub_map_parent != *real_map)
9648 && (cow_sub_map_parent != map)) {
1c79356b
A
9649 vm_map_unlock(cow_sub_map_parent);
9650 }
9651 entry = submap_entry;
9652 vaddr = local_vaddr;
9653 }
9654 }
9655
9656 /*
9657 * Check whether this task is allowed to have
9658 * this page.
9659 */
2d21ac55 9660
6601e61a 9661 prot = entry->protection;
0c530ab8 9662
316670eb 9663 if (override_nx(old_map, entry->alias) && prot) {
0c530ab8 9664 /*
2d21ac55 9665 * HACK -- if not a stack, then allow execution
0c530ab8
A
9666 */
9667 prot |= VM_PROT_EXECUTE;
2d21ac55
A
9668 }
9669
6d2010ae
A
9670 if (mask_protections) {
9671 fault_type &= prot;
9672 if (fault_type == VM_PROT_NONE) {
9673 goto protection_failure;
9674 }
9675 }
1c79356b 9676 if ((fault_type & (prot)) != fault_type) {
6d2010ae 9677 protection_failure:
2d21ac55
A
9678 if (*real_map != map) {
9679 vm_map_unlock(*real_map);
0c530ab8
A
9680 }
9681 *real_map = map;
9682
9683 if ((fault_type & VM_PROT_EXECUTE) && prot)
2d21ac55 9684 log_stack_execution_failure((addr64_t)vaddr, prot);
0c530ab8 9685
2d21ac55 9686 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
0c530ab8 9687 return KERN_PROTECTION_FAILURE;
1c79356b
A
9688 }
9689
9690 /*
9691 * If this page is not pageable, we have to get
9692 * it for all possible accesses.
9693 */
9694
91447636
A
9695 *wired = (entry->wired_count != 0);
9696 if (*wired)
0c530ab8 9697 fault_type = prot;
1c79356b
A
9698
9699 /*
9700 * If the entry was copy-on-write, we either ...
9701 */
9702
9703 if (entry->needs_copy) {
9704 /*
9705 * If we want to write the page, we may as well
9706 * handle that now since we've got the map locked.
9707 *
9708 * If we don't need to write the page, we just
9709 * demote the permissions allowed.
9710 */
9711
91447636 9712 if ((fault_type & VM_PROT_WRITE) || *wired) {
1c79356b
A
9713 /*
9714 * Make a new object, and place it in the
9715 * object chain. Note that no new references
9716 * have appeared -- one just moved from the
9717 * map to the new object.
9718 */
9719
9720 if (vm_map_lock_read_to_write(map)) {
9721 vm_map_lock_read(map);
9722 goto RetryLookup;
9723 }
9724 vm_object_shadow(&entry->object.vm_object,
9725 &entry->offset,
91447636 9726 (vm_map_size_t) (entry->vme_end -
2d21ac55 9727 entry->vme_start));
1c79356b
A
9728
9729 entry->object.vm_object->shadowed = TRUE;
9730 entry->needs_copy = FALSE;
9731 vm_map_lock_write_to_read(map);
9732 }
9733 else {
9734 /*
9735 * We're attempting to read a copy-on-write
9736 * page -- don't allow writes.
9737 */
9738
9739 prot &= (~VM_PROT_WRITE);
9740 }
9741 }
9742
9743 /*
9744 * Create an object if necessary.
9745 */
9746 if (entry->object.vm_object == VM_OBJECT_NULL) {
9747
9748 if (vm_map_lock_read_to_write(map)) {
9749 vm_map_lock_read(map);
9750 goto RetryLookup;
9751 }
9752
9753 entry->object.vm_object = vm_object_allocate(
91447636 9754 (vm_map_size_t)(entry->vme_end - entry->vme_start));
1c79356b
A
9755 entry->offset = 0;
9756 vm_map_lock_write_to_read(map);
9757 }
9758
9759 /*
9760 * Return the object/offset from this entry. If the entry
9761 * was copy-on-write or empty, it has been fixed up. Also
9762 * return the protection.
9763 */
9764
9765 *offset = (vaddr - entry->vme_start) + entry->offset;
9766 *object = entry->object.vm_object;
9767 *out_prot = prot;
2d21ac55
A
9768
9769 if (fault_info) {
9770 fault_info->interruptible = THREAD_UNINT; /* for now... */
9771 /* ... the caller will change "interruptible" if needed */
9772 fault_info->cluster_size = 0;
9773 fault_info->user_tag = entry->alias;
9774 fault_info->behavior = entry->behavior;
9775 fault_info->lo_offset = entry->offset;
9776 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9777 fault_info->no_cache = entry->no_cache;
b0d623f7 9778 fault_info->stealth = FALSE;
6d2010ae
A
9779 fault_info->io_sync = FALSE;
9780 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
0b4c1975 9781 fault_info->mark_zf_absent = FALSE;
316670eb 9782 fault_info->batch_pmap_op = FALSE;
2d21ac55 9783 }
1c79356b
A
9784
9785 /*
9786 * Lock the object to prevent it from disappearing
9787 */
2d21ac55
A
9788 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9789 vm_object_lock(*object);
9790 else
9791 vm_object_lock_shared(*object);
9792
1c79356b
A
9793 /*
9794 * Save the version number
9795 */
9796
9797 out_version->main_timestamp = map->timestamp;
9798
9799 return KERN_SUCCESS;
9800}
9801
9802
9803/*
9804 * vm_map_verify:
9805 *
9806 * Verifies that the map in question has not changed
9807 * since the given version. If successful, the map
9808 * will not change until vm_map_verify_done() is called.
9809 */
9810boolean_t
9811vm_map_verify(
9812 register vm_map_t map,
9813 register vm_map_version_t *version) /* REF */
9814{
9815 boolean_t result;
9816
9817 vm_map_lock_read(map);
9818 result = (map->timestamp == version->main_timestamp);
9819
9820 if (!result)
9821 vm_map_unlock_read(map);
9822
9823 return(result);
9824}
9825
9826/*
9827 * vm_map_verify_done:
9828 *
9829 * Releases locks acquired by a vm_map_verify.
9830 *
9831 * This is now a macro in vm/vm_map.h. It does a
9832 * vm_map_unlock_read on the map.
9833 */
9834
9835
91447636
A
9836/*
9837 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9838 * Goes away after regular vm_region_recurse function migrates to
9839 * 64 bits
9840 * vm_region_recurse: A form of vm_region which follows the
9841 * submaps in a target map
9842 *
9843 */
9844
9845kern_return_t
9846vm_map_region_recurse_64(
9847 vm_map_t map,
9848 vm_map_offset_t *address, /* IN/OUT */
9849 vm_map_size_t *size, /* OUT */
9850 natural_t *nesting_depth, /* IN/OUT */
9851 vm_region_submap_info_64_t submap_info, /* IN/OUT */
9852 mach_msg_type_number_t *count) /* IN/OUT */
9853{
39236c6e 9854 mach_msg_type_number_t original_count;
91447636
A
9855 vm_region_extended_info_data_t extended;
9856 vm_map_entry_t tmp_entry;
9857 vm_map_offset_t user_address;
9858 unsigned int user_max_depth;
9859
9860 /*
9861 * "curr_entry" is the VM map entry preceding or including the
9862 * address we're looking for.
9863 * "curr_map" is the map or sub-map containing "curr_entry".
6d2010ae
A
9864 * "curr_address" is the equivalent of the top map's "user_address"
9865 * in the current map.
91447636
A
9866 * "curr_offset" is the cumulated offset of "curr_map" in the
9867 * target task's address space.
9868 * "curr_depth" is the depth of "curr_map" in the chain of
9869 * sub-maps.
6d2010ae
A
9870 *
9871 * "curr_max_below" and "curr_max_above" limit the range (around
9872 * "curr_address") we should take into account in the current (sub)map.
9873 * They limit the range to what's visible through the map entries
9874 * we've traversed from the top map to the current map.
9875
91447636
A
9876 */
9877 vm_map_entry_t curr_entry;
6d2010ae 9878 vm_map_address_t curr_address;
91447636
A
9879 vm_map_offset_t curr_offset;
9880 vm_map_t curr_map;
9881 unsigned int curr_depth;
6d2010ae
A
9882 vm_map_offset_t curr_max_below, curr_max_above;
9883 vm_map_offset_t curr_skip;
91447636
A
9884
9885 /*
9886 * "next_" is the same as "curr_" but for the VM region immediately
9887 * after the address we're looking for. We need to keep track of this
9888 * too because we want to return info about that region if the
9889 * address we're looking for is not mapped.
9890 */
9891 vm_map_entry_t next_entry;
9892 vm_map_offset_t next_offset;
6d2010ae 9893 vm_map_offset_t next_address;
91447636
A
9894 vm_map_t next_map;
9895 unsigned int next_depth;
6d2010ae
A
9896 vm_map_offset_t next_max_below, next_max_above;
9897 vm_map_offset_t next_skip;
91447636 9898
2d21ac55
A
9899 boolean_t look_for_pages;
9900 vm_region_submap_short_info_64_t short_info;
9901
91447636
A
9902 if (map == VM_MAP_NULL) {
9903 /* no address space to work on */
9904 return KERN_INVALID_ARGUMENT;
9905 }
9906
39236c6e
A
9907
9908 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9909 /*
9910 * "info" structure is not big enough and
9911 * would overflow
9912 */
9913 return KERN_INVALID_ARGUMENT;
9914 }
9915
9916 original_count = *count;
9917
9918 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
9919 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9920 look_for_pages = FALSE;
9921 short_info = (vm_region_submap_short_info_64_t) submap_info;
9922 submap_info = NULL;
2d21ac55
A
9923 } else {
9924 look_for_pages = TRUE;
39236c6e 9925 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
2d21ac55 9926 short_info = NULL;
39236c6e
A
9927
9928 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
9929 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
9930 }
91447636 9931 }
39236c6e 9932
91447636
A
9933 user_address = *address;
9934 user_max_depth = *nesting_depth;
9935
9936 curr_entry = NULL;
9937 curr_map = map;
6d2010ae 9938 curr_address = user_address;
91447636 9939 curr_offset = 0;
6d2010ae 9940 curr_skip = 0;
91447636 9941 curr_depth = 0;
6d2010ae
A
9942 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9943 curr_max_below = curr_address;
91447636
A
9944
9945 next_entry = NULL;
9946 next_map = NULL;
6d2010ae 9947 next_address = 0;
91447636 9948 next_offset = 0;
6d2010ae 9949 next_skip = 0;
91447636 9950 next_depth = 0;
6d2010ae
A
9951 next_max_above = (vm_map_offset_t) -1;
9952 next_max_below = (vm_map_offset_t) -1;
91447636
A
9953
9954 if (not_in_kdp) {
9955 vm_map_lock_read(curr_map);
9956 }
9957
9958 for (;;) {
9959 if (vm_map_lookup_entry(curr_map,
6d2010ae 9960 curr_address,
91447636
A
9961 &tmp_entry)) {
9962 /* tmp_entry contains the address we're looking for */
9963 curr_entry = tmp_entry;
9964 } else {
6d2010ae 9965 vm_map_offset_t skip;
91447636
A
9966 /*
9967 * The address is not mapped. "tmp_entry" is the
9968 * map entry preceding the address. We want the next
9969 * one, if it exists.
9970 */
9971 curr_entry = tmp_entry->vme_next;
6d2010ae 9972
91447636 9973 if (curr_entry == vm_map_to_entry(curr_map) ||
6d2010ae
A
9974 (curr_entry->vme_start >=
9975 curr_address + curr_max_above)) {
91447636
A
9976 /* no next entry at this level: stop looking */
9977 if (not_in_kdp) {
9978 vm_map_unlock_read(curr_map);
9979 }
9980 curr_entry = NULL;
9981 curr_map = NULL;
9982 curr_offset = 0;
9983 curr_depth = 0;
6d2010ae
A
9984 curr_max_above = 0;
9985 curr_max_below = 0;
91447636
A
9986 break;
9987 }
6d2010ae
A
9988
9989 /* adjust current address and offset */
9990 skip = curr_entry->vme_start - curr_address;
9991 curr_address = curr_entry->vme_start;
9992 curr_skip = skip;
9993 curr_offset += skip;
9994 curr_max_above -= skip;
9995 curr_max_below = 0;
91447636
A
9996 }
9997
9998 /*
9999 * Is the next entry at this level closer to the address (or
10000 * deeper in the submap chain) than the one we had
10001 * so far ?
10002 */
10003 tmp_entry = curr_entry->vme_next;
10004 if (tmp_entry == vm_map_to_entry(curr_map)) {
10005 /* no next entry at this level */
6d2010ae
A
10006 } else if (tmp_entry->vme_start >=
10007 curr_address + curr_max_above) {
91447636
A
10008 /*
10009 * tmp_entry is beyond the scope of what we mapped of
10010 * this submap in the upper level: ignore it.
10011 */
10012 } else if ((next_entry == NULL) ||
10013 (tmp_entry->vme_start + curr_offset <=
10014 next_entry->vme_start + next_offset)) {
10015 /*
10016 * We didn't have a "next_entry" or this one is
10017 * closer to the address we're looking for:
10018 * use this "tmp_entry" as the new "next_entry".
10019 */
10020 if (next_entry != NULL) {
10021 /* unlock the last "next_map" */
10022 if (next_map != curr_map && not_in_kdp) {
10023 vm_map_unlock_read(next_map);
10024 }
10025 }
10026 next_entry = tmp_entry;
10027 next_map = curr_map;
91447636 10028 next_depth = curr_depth;
6d2010ae
A
10029 next_address = next_entry->vme_start;
10030 next_skip = curr_skip;
10031 next_offset = curr_offset;
10032 next_offset += (next_address - curr_address);
10033 next_max_above = MIN(next_max_above, curr_max_above);
10034 next_max_above = MIN(next_max_above,
10035 next_entry->vme_end - next_address);
10036 next_max_below = MIN(next_max_below, curr_max_below);
10037 next_max_below = MIN(next_max_below,
10038 next_address - next_entry->vme_start);
91447636
A
10039 }
10040
6d2010ae
A
10041 /*
10042 * "curr_max_{above,below}" allow us to keep track of the
10043 * portion of the submap that is actually mapped at this level:
10044 * the rest of that submap is irrelevant to us, since it's not
10045 * mapped here.
10046 * The relevant portion of the map starts at
10047 * "curr_entry->offset" up to the size of "curr_entry".
10048 */
10049 curr_max_above = MIN(curr_max_above,
10050 curr_entry->vme_end - curr_address);
10051 curr_max_below = MIN(curr_max_below,
10052 curr_address - curr_entry->vme_start);
10053
91447636
A
10054 if (!curr_entry->is_sub_map ||
10055 curr_depth >= user_max_depth) {
10056 /*
10057 * We hit a leaf map or we reached the maximum depth
10058 * we could, so stop looking. Keep the current map
10059 * locked.
10060 */
10061 break;
10062 }
10063
10064 /*
10065 * Get down to the next submap level.
10066 */
10067
10068 /*
10069 * Lock the next level and unlock the current level,
10070 * unless we need to keep it locked to access the "next_entry"
10071 * later.
10072 */
10073 if (not_in_kdp) {
10074 vm_map_lock_read(curr_entry->object.sub_map);
10075 }
10076 if (curr_map == next_map) {
10077 /* keep "next_map" locked in case we need it */
10078 } else {
10079 /* release this map */
b0d623f7
A
10080 if (not_in_kdp)
10081 vm_map_unlock_read(curr_map);
91447636
A
10082 }
10083
10084 /*
10085 * Adjust the offset. "curr_entry" maps the submap
10086 * at relative address "curr_entry->vme_start" in the
10087 * curr_map but skips the first "curr_entry->offset"
10088 * bytes of the submap.
10089 * "curr_offset" always represents the offset of a virtual
10090 * address in the curr_map relative to the absolute address
10091 * space (i.e. the top-level VM map).
10092 */
10093 curr_offset +=
6d2010ae
A
10094 (curr_entry->offset - curr_entry->vme_start);
10095 curr_address = user_address + curr_offset;
91447636
A
10096 /* switch to the submap */
10097 curr_map = curr_entry->object.sub_map;
10098 curr_depth++;
91447636
A
10099 curr_entry = NULL;
10100 }
10101
10102 if (curr_entry == NULL) {
10103 /* no VM region contains the address... */
10104 if (next_entry == NULL) {
10105 /* ... and no VM region follows it either */
10106 return KERN_INVALID_ADDRESS;
10107 }
10108 /* ... gather info about the next VM region */
10109 curr_entry = next_entry;
10110 curr_map = next_map; /* still locked ... */
6d2010ae
A
10111 curr_address = next_address;
10112 curr_skip = next_skip;
91447636
A
10113 curr_offset = next_offset;
10114 curr_depth = next_depth;
6d2010ae
A
10115 curr_max_above = next_max_above;
10116 curr_max_below = next_max_below;
10117 if (curr_map == map) {
10118 user_address = curr_address;
10119 }
91447636
A
10120 } else {
10121 /* we won't need "next_entry" after all */
10122 if (next_entry != NULL) {
10123 /* release "next_map" */
10124 if (next_map != curr_map && not_in_kdp) {
10125 vm_map_unlock_read(next_map);
10126 }
10127 }
10128 }
10129 next_entry = NULL;
10130 next_map = NULL;
10131 next_offset = 0;
6d2010ae 10132 next_skip = 0;
91447636 10133 next_depth = 0;
6d2010ae
A
10134 next_max_below = -1;
10135 next_max_above = -1;
91447636
A
10136
10137 *nesting_depth = curr_depth;
6d2010ae
A
10138 *size = curr_max_above + curr_max_below;
10139 *address = user_address + curr_skip - curr_max_below;
91447636 10140
b0d623f7
A
10141// LP64todo: all the current tools are 32bit, obviously never worked for 64b
10142// so probably should be a real 32b ID vs. ptr.
10143// Current users just check for equality
39236c6e 10144#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
b0d623f7 10145
2d21ac55
A
10146 if (look_for_pages) {
10147 submap_info->user_tag = curr_entry->alias;
10148 submap_info->offset = curr_entry->offset;
10149 submap_info->protection = curr_entry->protection;
10150 submap_info->inheritance = curr_entry->inheritance;
10151 submap_info->max_protection = curr_entry->max_protection;
10152 submap_info->behavior = curr_entry->behavior;
10153 submap_info->user_wired_count = curr_entry->user_wired_count;
10154 submap_info->is_submap = curr_entry->is_sub_map;
b0d623f7 10155 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
2d21ac55
A
10156 } else {
10157 short_info->user_tag = curr_entry->alias;
10158 short_info->offset = curr_entry->offset;
10159 short_info->protection = curr_entry->protection;
10160 short_info->inheritance = curr_entry->inheritance;
10161 short_info->max_protection = curr_entry->max_protection;
10162 short_info->behavior = curr_entry->behavior;
10163 short_info->user_wired_count = curr_entry->user_wired_count;
10164 short_info->is_submap = curr_entry->is_sub_map;
b0d623f7 10165 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
2d21ac55 10166 }
91447636
A
10167
10168 extended.pages_resident = 0;
10169 extended.pages_swapped_out = 0;
10170 extended.pages_shared_now_private = 0;
10171 extended.pages_dirtied = 0;
39236c6e 10172 extended.pages_reusable = 0;
91447636
A
10173 extended.external_pager = 0;
10174 extended.shadow_depth = 0;
10175
10176 if (not_in_kdp) {
10177 if (!curr_entry->is_sub_map) {
6d2010ae
A
10178 vm_map_offset_t range_start, range_end;
10179 range_start = MAX((curr_address - curr_max_below),
10180 curr_entry->vme_start);
10181 range_end = MIN((curr_address + curr_max_above),
10182 curr_entry->vme_end);
91447636 10183 vm_map_region_walk(curr_map,
6d2010ae 10184 range_start,
91447636 10185 curr_entry,
6d2010ae
A
10186 (curr_entry->offset +
10187 (range_start -
10188 curr_entry->vme_start)),
10189 range_end - range_start,
2d21ac55 10190 &extended,
39236c6e 10191 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
91447636
A
10192 if (extended.external_pager &&
10193 extended.ref_count == 2 &&
10194 extended.share_mode == SM_SHARED) {
2d21ac55 10195 extended.share_mode = SM_PRIVATE;
91447636 10196 }
91447636
A
10197 } else {
10198 if (curr_entry->use_pmap) {
2d21ac55 10199 extended.share_mode = SM_TRUESHARED;
91447636 10200 } else {
2d21ac55 10201 extended.share_mode = SM_PRIVATE;
91447636 10202 }
2d21ac55 10203 extended.ref_count =
91447636
A
10204 curr_entry->object.sub_map->ref_count;
10205 }
10206 }
10207
2d21ac55
A
10208 if (look_for_pages) {
10209 submap_info->pages_resident = extended.pages_resident;
10210 submap_info->pages_swapped_out = extended.pages_swapped_out;
10211 submap_info->pages_shared_now_private =
10212 extended.pages_shared_now_private;
10213 submap_info->pages_dirtied = extended.pages_dirtied;
10214 submap_info->external_pager = extended.external_pager;
10215 submap_info->shadow_depth = extended.shadow_depth;
10216 submap_info->share_mode = extended.share_mode;
10217 submap_info->ref_count = extended.ref_count;
39236c6e
A
10218
10219 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10220 submap_info->pages_reusable = extended.pages_reusable;
10221 }
2d21ac55
A
10222 } else {
10223 short_info->external_pager = extended.external_pager;
10224 short_info->shadow_depth = extended.shadow_depth;
10225 short_info->share_mode = extended.share_mode;
10226 short_info->ref_count = extended.ref_count;
10227 }
91447636
A
10228
10229 if (not_in_kdp) {
10230 vm_map_unlock_read(curr_map);
10231 }
10232
10233 return KERN_SUCCESS;
10234}
10235
1c79356b
A
10236/*
10237 * vm_region:
10238 *
10239 * User call to obtain information about a region in
10240 * a task's address map. Currently, only one flavor is
10241 * supported.
10242 *
10243 * XXX The reserved and behavior fields cannot be filled
10244 * in until the vm merge from the IK is completed, and
10245 * vm_reserve is implemented.
1c79356b
A
10246 */
10247
10248kern_return_t
91447636 10249vm_map_region(
1c79356b 10250 vm_map_t map,
91447636
A
10251 vm_map_offset_t *address, /* IN/OUT */
10252 vm_map_size_t *size, /* OUT */
1c79356b
A
10253 vm_region_flavor_t flavor, /* IN */
10254 vm_region_info_t info, /* OUT */
91447636
A
10255 mach_msg_type_number_t *count, /* IN/OUT */
10256 mach_port_t *object_name) /* OUT */
1c79356b
A
10257{
10258 vm_map_entry_t tmp_entry;
1c79356b 10259 vm_map_entry_t entry;
91447636 10260 vm_map_offset_t start;
1c79356b
A
10261
10262 if (map == VM_MAP_NULL)
10263 return(KERN_INVALID_ARGUMENT);
10264
10265 switch (flavor) {
91447636 10266
1c79356b 10267 case VM_REGION_BASIC_INFO:
2d21ac55 10268 /* legacy for old 32-bit objects info */
1c79356b 10269 {
2d21ac55 10270 vm_region_basic_info_t basic;
91447636 10271
2d21ac55
A
10272 if (*count < VM_REGION_BASIC_INFO_COUNT)
10273 return(KERN_INVALID_ARGUMENT);
1c79356b 10274
2d21ac55
A
10275 basic = (vm_region_basic_info_t) info;
10276 *count = VM_REGION_BASIC_INFO_COUNT;
1c79356b 10277
2d21ac55 10278 vm_map_lock_read(map);
1c79356b 10279
2d21ac55
A
10280 start = *address;
10281 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10282 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10283 vm_map_unlock_read(map);
10284 return(KERN_INVALID_ADDRESS);
10285 }
10286 } else {
10287 entry = tmp_entry;
1c79356b 10288 }
1c79356b 10289
2d21ac55 10290 start = entry->vme_start;
1c79356b 10291
2d21ac55
A
10292 basic->offset = (uint32_t)entry->offset;
10293 basic->protection = entry->protection;
10294 basic->inheritance = entry->inheritance;
10295 basic->max_protection = entry->max_protection;
10296 basic->behavior = entry->behavior;
10297 basic->user_wired_count = entry->user_wired_count;
10298 basic->reserved = entry->is_sub_map;
10299 *address = start;
10300 *size = (entry->vme_end - start);
91447636 10301
2d21ac55
A
10302 if (object_name) *object_name = IP_NULL;
10303 if (entry->is_sub_map) {
10304 basic->shared = FALSE;
10305 } else {
10306 basic->shared = entry->is_shared;
10307 }
91447636 10308
2d21ac55
A
10309 vm_map_unlock_read(map);
10310 return(KERN_SUCCESS);
91447636
A
10311 }
10312
10313 case VM_REGION_BASIC_INFO_64:
10314 {
2d21ac55 10315 vm_region_basic_info_64_t basic;
91447636 10316
2d21ac55
A
10317 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10318 return(KERN_INVALID_ARGUMENT);
10319
10320 basic = (vm_region_basic_info_64_t) info;
10321 *count = VM_REGION_BASIC_INFO_COUNT_64;
10322
10323 vm_map_lock_read(map);
10324
10325 start = *address;
10326 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10327 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10328 vm_map_unlock_read(map);
10329 return(KERN_INVALID_ADDRESS);
10330 }
10331 } else {
10332 entry = tmp_entry;
10333 }
91447636 10334
2d21ac55 10335 start = entry->vme_start;
91447636 10336
2d21ac55
A
10337 basic->offset = entry->offset;
10338 basic->protection = entry->protection;
10339 basic->inheritance = entry->inheritance;
10340 basic->max_protection = entry->max_protection;
10341 basic->behavior = entry->behavior;
10342 basic->user_wired_count = entry->user_wired_count;
10343 basic->reserved = entry->is_sub_map;
10344 *address = start;
10345 *size = (entry->vme_end - start);
91447636 10346
2d21ac55
A
10347 if (object_name) *object_name = IP_NULL;
10348 if (entry->is_sub_map) {
10349 basic->shared = FALSE;
10350 } else {
10351 basic->shared = entry->is_shared;
91447636 10352 }
2d21ac55
A
10353
10354 vm_map_unlock_read(map);
10355 return(KERN_SUCCESS);
1c79356b
A
10356 }
10357 case VM_REGION_EXTENDED_INFO:
2d21ac55
A
10358 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
10359 return(KERN_INVALID_ARGUMENT);
39236c6e
A
10360 /*fallthru*/
10361 case VM_REGION_EXTENDED_INFO__legacy:
10362 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
10363 return KERN_INVALID_ARGUMENT;
10364
10365 {
10366 vm_region_extended_info_t extended;
10367 mach_msg_type_number_t original_count;
1c79356b 10368
2d21ac55 10369 extended = (vm_region_extended_info_t) info;
1c79356b 10370
2d21ac55 10371 vm_map_lock_read(map);
1c79356b 10372
2d21ac55
A
10373 start = *address;
10374 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10375 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10376 vm_map_unlock_read(map);
10377 return(KERN_INVALID_ADDRESS);
10378 }
10379 } else {
10380 entry = tmp_entry;
1c79356b 10381 }
2d21ac55 10382 start = entry->vme_start;
1c79356b 10383
2d21ac55
A
10384 extended->protection = entry->protection;
10385 extended->user_tag = entry->alias;
10386 extended->pages_resident = 0;
10387 extended->pages_swapped_out = 0;
10388 extended->pages_shared_now_private = 0;
10389 extended->pages_dirtied = 0;
10390 extended->external_pager = 0;
10391 extended->shadow_depth = 0;
1c79356b 10392
39236c6e
A
10393 original_count = *count;
10394 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
10395 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
10396 } else {
10397 extended->pages_reusable = 0;
10398 *count = VM_REGION_EXTENDED_INFO_COUNT;
10399 }
10400
10401 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
1c79356b 10402
2d21ac55
A
10403 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
10404 extended->share_mode = SM_PRIVATE;
1c79356b 10405
2d21ac55
A
10406 if (object_name)
10407 *object_name = IP_NULL;
10408 *address = start;
10409 *size = (entry->vme_end - start);
1c79356b 10410
2d21ac55
A
10411 vm_map_unlock_read(map);
10412 return(KERN_SUCCESS);
1c79356b
A
10413 }
10414 case VM_REGION_TOP_INFO:
10415 {
2d21ac55 10416 vm_region_top_info_t top;
1c79356b 10417
2d21ac55
A
10418 if (*count < VM_REGION_TOP_INFO_COUNT)
10419 return(KERN_INVALID_ARGUMENT);
1c79356b 10420
2d21ac55
A
10421 top = (vm_region_top_info_t) info;
10422 *count = VM_REGION_TOP_INFO_COUNT;
1c79356b 10423
2d21ac55 10424 vm_map_lock_read(map);
1c79356b 10425
2d21ac55
A
10426 start = *address;
10427 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10428 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10429 vm_map_unlock_read(map);
10430 return(KERN_INVALID_ADDRESS);
10431 }
10432 } else {
10433 entry = tmp_entry;
1c79356b 10434
2d21ac55
A
10435 }
10436 start = entry->vme_start;
1c79356b 10437
2d21ac55
A
10438 top->private_pages_resident = 0;
10439 top->shared_pages_resident = 0;
1c79356b 10440
2d21ac55 10441 vm_map_region_top_walk(entry, top);
1c79356b 10442
2d21ac55
A
10443 if (object_name)
10444 *object_name = IP_NULL;
10445 *address = start;
10446 *size = (entry->vme_end - start);
1c79356b 10447
2d21ac55
A
10448 vm_map_unlock_read(map);
10449 return(KERN_SUCCESS);
1c79356b
A
10450 }
10451 default:
2d21ac55 10452 return(KERN_INVALID_ARGUMENT);
1c79356b
A
10453 }
10454}
10455
b0d623f7
A
10456#define OBJ_RESIDENT_COUNT(obj, entry_size) \
10457 MIN((entry_size), \
10458 ((obj)->all_reusable ? \
10459 (obj)->wired_page_count : \
10460 (obj)->resident_page_count - (obj)->reusable_page_count))
2d21ac55 10461
0c530ab8 10462void
91447636
A
10463vm_map_region_top_walk(
10464 vm_map_entry_t entry,
10465 vm_region_top_info_t top)
1c79356b 10466{
1c79356b 10467
91447636 10468 if (entry->object.vm_object == 0 || entry->is_sub_map) {
2d21ac55
A
10469 top->share_mode = SM_EMPTY;
10470 top->ref_count = 0;
10471 top->obj_id = 0;
10472 return;
1c79356b 10473 }
2d21ac55 10474
91447636 10475 {
2d21ac55
A
10476 struct vm_object *obj, *tmp_obj;
10477 int ref_count;
10478 uint32_t entry_size;
1c79356b 10479
b0d623f7 10480 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
1c79356b 10481
2d21ac55 10482 obj = entry->object.vm_object;
1c79356b 10483
2d21ac55
A
10484 vm_object_lock(obj);
10485
10486 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10487 ref_count--;
10488
b0d623f7 10489 assert(obj->reusable_page_count <= obj->resident_page_count);
2d21ac55
A
10490 if (obj->shadow) {
10491 if (ref_count == 1)
b0d623f7
A
10492 top->private_pages_resident =
10493 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55 10494 else
b0d623f7
A
10495 top->shared_pages_resident =
10496 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
10497 top->ref_count = ref_count;
10498 top->share_mode = SM_COW;
91447636 10499
2d21ac55
A
10500 while ((tmp_obj = obj->shadow)) {
10501 vm_object_lock(tmp_obj);
10502 vm_object_unlock(obj);
10503 obj = tmp_obj;
1c79356b 10504
2d21ac55
A
10505 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10506 ref_count--;
1c79356b 10507
b0d623f7
A
10508 assert(obj->reusable_page_count <= obj->resident_page_count);
10509 top->shared_pages_resident +=
10510 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
10511 top->ref_count += ref_count - 1;
10512 }
1c79356b 10513 } else {
6d2010ae
A
10514 if (entry->superpage_size) {
10515 top->share_mode = SM_LARGE_PAGE;
10516 top->shared_pages_resident = 0;
10517 top->private_pages_resident = entry_size;
10518 } else if (entry->needs_copy) {
2d21ac55 10519 top->share_mode = SM_COW;
b0d623f7
A
10520 top->shared_pages_resident =
10521 OBJ_RESIDENT_COUNT(obj, entry_size);
2d21ac55
A
10522 } else {
10523 if (ref_count == 1 ||
10524 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
10525 top->share_mode = SM_PRIVATE;
39236c6e
A
10526 top->private_pages_resident =
10527 OBJ_RESIDENT_COUNT(obj,
10528 entry_size);
2d21ac55
A
10529 } else {
10530 top->share_mode = SM_SHARED;
b0d623f7
A
10531 top->shared_pages_resident =
10532 OBJ_RESIDENT_COUNT(obj,
10533 entry_size);
2d21ac55
A
10534 }
10535 }
10536 top->ref_count = ref_count;
1c79356b 10537 }
b0d623f7 10538 /* XXX K64: obj_id will be truncated */
39236c6e 10539 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
1c79356b 10540
2d21ac55 10541 vm_object_unlock(obj);
1c79356b 10542 }
91447636
A
10543}
10544
0c530ab8 10545void
91447636
A
10546vm_map_region_walk(
10547 vm_map_t map,
2d21ac55
A
10548 vm_map_offset_t va,
10549 vm_map_entry_t entry,
91447636
A
10550 vm_object_offset_t offset,
10551 vm_object_size_t range,
2d21ac55 10552 vm_region_extended_info_t extended,
39236c6e
A
10553 boolean_t look_for_pages,
10554 mach_msg_type_number_t count)
91447636
A
10555{
10556 register struct vm_object *obj, *tmp_obj;
10557 register vm_map_offset_t last_offset;
10558 register int i;
10559 register int ref_count;
10560 struct vm_object *shadow_object;
10561 int shadow_depth;
10562
10563 if ((entry->object.vm_object == 0) ||
2d21ac55 10564 (entry->is_sub_map) ||
6d2010ae
A
10565 (entry->object.vm_object->phys_contiguous &&
10566 !entry->superpage_size)) {
2d21ac55
A
10567 extended->share_mode = SM_EMPTY;
10568 extended->ref_count = 0;
10569 return;
1c79356b 10570 }
6d2010ae
A
10571
10572 if (entry->superpage_size) {
10573 extended->shadow_depth = 0;
10574 extended->share_mode = SM_LARGE_PAGE;
10575 extended->ref_count = 1;
10576 extended->external_pager = 0;
10577 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
10578 extended->shadow_depth = 0;
10579 return;
10580 }
10581
91447636 10582 {
2d21ac55
A
10583 obj = entry->object.vm_object;
10584
10585 vm_object_lock(obj);
10586
10587 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10588 ref_count--;
10589
10590 if (look_for_pages) {
10591 for (last_offset = offset + range;
10592 offset < last_offset;
39236c6e
A
10593 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
10594 vm_map_region_look_for_page(map, va, obj,
10595 offset, ref_count,
10596 0, extended, count);
10597 }
b0d623f7
A
10598 } else {
10599 shadow_object = obj->shadow;
10600 shadow_depth = 0;
10601
10602 if ( !(obj->pager_trusted) && !(obj->internal))
10603 extended->external_pager = 1;
10604
10605 if (shadow_object != VM_OBJECT_NULL) {
10606 vm_object_lock(shadow_object);
10607 for (;
10608 shadow_object != VM_OBJECT_NULL;
10609 shadow_depth++) {
10610 vm_object_t next_shadow;
10611
10612 if ( !(shadow_object->pager_trusted) &&
10613 !(shadow_object->internal))
10614 extended->external_pager = 1;
10615
10616 next_shadow = shadow_object->shadow;
10617 if (next_shadow) {
10618 vm_object_lock(next_shadow);
10619 }
10620 vm_object_unlock(shadow_object);
10621 shadow_object = next_shadow;
2d21ac55 10622 }
2d21ac55 10623 }
b0d623f7 10624 extended->shadow_depth = shadow_depth;
2d21ac55 10625 }
2d21ac55
A
10626
10627 if (extended->shadow_depth || entry->needs_copy)
10628 extended->share_mode = SM_COW;
91447636 10629 else {
2d21ac55
A
10630 if (ref_count == 1)
10631 extended->share_mode = SM_PRIVATE;
10632 else {
10633 if (obj->true_share)
10634 extended->share_mode = SM_TRUESHARED;
10635 else
10636 extended->share_mode = SM_SHARED;
10637 }
91447636 10638 }
2d21ac55 10639 extended->ref_count = ref_count - extended->shadow_depth;
91447636 10640
2d21ac55
A
10641 for (i = 0; i < extended->shadow_depth; i++) {
10642 if ((tmp_obj = obj->shadow) == 0)
10643 break;
10644 vm_object_lock(tmp_obj);
10645 vm_object_unlock(obj);
1c79356b 10646
2d21ac55
A
10647 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
10648 ref_count--;
1c79356b 10649
2d21ac55
A
10650 extended->ref_count += ref_count;
10651 obj = tmp_obj;
10652 }
10653 vm_object_unlock(obj);
1c79356b 10654
2d21ac55
A
10655 if (extended->share_mode == SM_SHARED) {
10656 register vm_map_entry_t cur;
10657 register vm_map_entry_t last;
10658 int my_refs;
91447636 10659
2d21ac55
A
10660 obj = entry->object.vm_object;
10661 last = vm_map_to_entry(map);
10662 my_refs = 0;
91447636 10663
2d21ac55
A
10664 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10665 ref_count--;
10666 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10667 my_refs += vm_map_region_count_obj_refs(cur, obj);
91447636 10668
2d21ac55
A
10669 if (my_refs == ref_count)
10670 extended->share_mode = SM_PRIVATE_ALIASED;
10671 else if (my_refs > 1)
10672 extended->share_mode = SM_SHARED_ALIASED;
10673 }
91447636 10674 }
1c79356b
A
10675}
10676
1c79356b 10677
91447636
A
10678/* object is locked on entry and locked on return */
10679
10680
10681static void
10682vm_map_region_look_for_page(
10683 __unused vm_map_t map,
2d21ac55
A
10684 __unused vm_map_offset_t va,
10685 vm_object_t object,
10686 vm_object_offset_t offset,
91447636
A
10687 int max_refcnt,
10688 int depth,
39236c6e
A
10689 vm_region_extended_info_t extended,
10690 mach_msg_type_number_t count)
1c79356b 10691{
2d21ac55
A
10692 register vm_page_t p;
10693 register vm_object_t shadow;
10694 register int ref_count;
10695 vm_object_t caller_object;
2d21ac55 10696 kern_return_t kr;
91447636
A
10697 shadow = object->shadow;
10698 caller_object = object;
1c79356b 10699
91447636
A
10700
10701 while (TRUE) {
1c79356b 10702
91447636 10703 if ( !(object->pager_trusted) && !(object->internal))
2d21ac55 10704 extended->external_pager = 1;
1c79356b 10705
91447636
A
10706 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10707 if (shadow && (max_refcnt == 1))
10708 extended->pages_shared_now_private++;
1c79356b 10709
39236c6e 10710 if (!p->fictitious &&
91447636
A
10711 (p->dirty || pmap_is_modified(p->phys_page)))
10712 extended->pages_dirtied++;
39236c6e
A
10713 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
10714 if (p->reusable || p->object->all_reusable) {
10715 extended->pages_reusable++;
10716 }
10717 }
1c79356b 10718
39236c6e 10719 extended->pages_resident++;
91447636
A
10720
10721 if(object != caller_object)
2d21ac55 10722 vm_object_unlock(object);
91447636
A
10723
10724 return;
1c79356b 10725 }
2d21ac55 10726#if MACH_PAGEMAP
91447636
A
10727 if (object->existence_map) {
10728 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
1c79356b 10729
91447636 10730 extended->pages_swapped_out++;
1c79356b 10731
91447636 10732 if(object != caller_object)
2d21ac55 10733 vm_object_unlock(object);
1c79356b 10734
91447636
A
10735 return;
10736 }
39236c6e
A
10737 } else
10738#endif /* MACH_PAGEMAP */
10739 if (object->internal &&
10740 object->alive &&
10741 !object->terminating &&
10742 object->pager_ready) {
10743
10744 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
10745 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
10746 offset)
10747 == VM_EXTERNAL_STATE_EXISTS) {
10748 /* the pager has that page */
10749 extended->pages_swapped_out++;
10750 if (object != caller_object)
10751 vm_object_unlock(object);
10752 return;
10753 }
10754 } else {
10755 memory_object_t pager;
2d21ac55 10756
39236c6e
A
10757 vm_object_paging_begin(object);
10758 pager = object->pager;
10759 vm_object_unlock(object);
2d21ac55 10760
39236c6e
A
10761 kr = memory_object_data_request(
10762 pager,
10763 offset + object->paging_offset,
10764 0, /* just poke the pager */
10765 VM_PROT_READ,
10766 NULL);
2d21ac55 10767
39236c6e
A
10768 vm_object_lock(object);
10769 vm_object_paging_end(object);
10770
10771 if (kr == KERN_SUCCESS) {
10772 /* the pager has that page */
10773 extended->pages_swapped_out++;
10774 if (object != caller_object)
10775 vm_object_unlock(object);
10776 return;
10777 }
2d21ac55 10778 }
1c79356b 10779 }
2d21ac55 10780
91447636 10781 if (shadow) {
2d21ac55 10782 vm_object_lock(shadow);
1c79356b 10783
91447636
A
10784 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10785 ref_count--;
1c79356b 10786
91447636
A
10787 if (++depth > extended->shadow_depth)
10788 extended->shadow_depth = depth;
1c79356b 10789
91447636
A
10790 if (ref_count > max_refcnt)
10791 max_refcnt = ref_count;
10792
10793 if(object != caller_object)
2d21ac55 10794 vm_object_unlock(object);
91447636 10795
6d2010ae 10796 offset = offset + object->vo_shadow_offset;
91447636
A
10797 object = shadow;
10798 shadow = object->shadow;
10799 continue;
1c79356b 10800 }
91447636 10801 if(object != caller_object)
2d21ac55 10802 vm_object_unlock(object);
91447636
A
10803 break;
10804 }
10805}
1c79356b 10806
91447636
A
10807static int
10808vm_map_region_count_obj_refs(
10809 vm_map_entry_t entry,
10810 vm_object_t object)
10811{
10812 register int ref_count;
10813 register vm_object_t chk_obj;
10814 register vm_object_t tmp_obj;
1c79356b 10815
91447636 10816 if (entry->object.vm_object == 0)
2d21ac55 10817 return(0);
1c79356b 10818
91447636 10819 if (entry->is_sub_map)
2d21ac55 10820 return(0);
91447636 10821 else {
2d21ac55 10822 ref_count = 0;
1c79356b 10823
2d21ac55
A
10824 chk_obj = entry->object.vm_object;
10825 vm_object_lock(chk_obj);
1c79356b 10826
2d21ac55
A
10827 while (chk_obj) {
10828 if (chk_obj == object)
10829 ref_count++;
10830 tmp_obj = chk_obj->shadow;
10831 if (tmp_obj)
10832 vm_object_lock(tmp_obj);
10833 vm_object_unlock(chk_obj);
1c79356b 10834
2d21ac55
A
10835 chk_obj = tmp_obj;
10836 }
1c79356b 10837 }
91447636 10838 return(ref_count);
1c79356b
A
10839}
10840
10841
10842/*
91447636
A
10843 * Routine: vm_map_simplify
10844 *
10845 * Description:
10846 * Attempt to simplify the map representation in
10847 * the vicinity of the given starting address.
10848 * Note:
10849 * This routine is intended primarily to keep the
10850 * kernel maps more compact -- they generally don't
10851 * benefit from the "expand a map entry" technology
10852 * at allocation time because the adjacent entry
10853 * is often wired down.
1c79356b 10854 */
91447636
A
10855void
10856vm_map_simplify_entry(
10857 vm_map_t map,
10858 vm_map_entry_t this_entry)
1c79356b 10859{
91447636 10860 vm_map_entry_t prev_entry;
1c79356b 10861
91447636 10862 counter(c_vm_map_simplify_entry_called++);
1c79356b 10863
91447636 10864 prev_entry = this_entry->vme_prev;
1c79356b 10865
91447636 10866 if ((this_entry != vm_map_to_entry(map)) &&
2d21ac55 10867 (prev_entry != vm_map_to_entry(map)) &&
1c79356b 10868
91447636 10869 (prev_entry->vme_end == this_entry->vme_start) &&
1c79356b 10870
2d21ac55 10871 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
1c79356b 10872
91447636
A
10873 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10874 ((prev_entry->offset + (prev_entry->vme_end -
10875 prev_entry->vme_start))
10876 == this_entry->offset) &&
1c79356b 10877
39236c6e 10878 (prev_entry->map_aligned == this_entry->map_aligned) &&
91447636
A
10879 (prev_entry->inheritance == this_entry->inheritance) &&
10880 (prev_entry->protection == this_entry->protection) &&
10881 (prev_entry->max_protection == this_entry->max_protection) &&
10882 (prev_entry->behavior == this_entry->behavior) &&
10883 (prev_entry->alias == this_entry->alias) &&
b0d623f7 10884 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
2d21ac55 10885 (prev_entry->no_cache == this_entry->no_cache) &&
91447636
A
10886 (prev_entry->wired_count == this_entry->wired_count) &&
10887 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
1c79356b 10888
91447636 10889 (prev_entry->needs_copy == this_entry->needs_copy) &&
b0d623f7 10890 (prev_entry->permanent == this_entry->permanent) &&
1c79356b 10891
91447636
A
10892 (prev_entry->use_pmap == FALSE) &&
10893 (this_entry->use_pmap == FALSE) &&
10894 (prev_entry->in_transition == FALSE) &&
10895 (this_entry->in_transition == FALSE) &&
10896 (prev_entry->needs_wakeup == FALSE) &&
10897 (this_entry->needs_wakeup == FALSE) &&
10898 (prev_entry->is_shared == FALSE) &&
10899 (this_entry->is_shared == FALSE)
2d21ac55 10900 ) {
316670eb 10901 vm_map_store_entry_unlink(map, prev_entry);
e2d2fc5c 10902 assert(prev_entry->vme_start < this_entry->vme_end);
39236c6e
A
10903 if (prev_entry->map_aligned)
10904 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
10905 VM_MAP_PAGE_MASK(map)));
91447636
A
10906 this_entry->vme_start = prev_entry->vme_start;
10907 this_entry->offset = prev_entry->offset;
2d21ac55
A
10908 if (prev_entry->is_sub_map) {
10909 vm_map_deallocate(prev_entry->object.sub_map);
10910 } else {
10911 vm_object_deallocate(prev_entry->object.vm_object);
10912 }
91447636 10913 vm_map_entry_dispose(map, prev_entry);
0c530ab8 10914 SAVE_HINT_MAP_WRITE(map, this_entry);
91447636 10915 counter(c_vm_map_simplified++);
1c79356b 10916 }
91447636 10917}
1c79356b 10918
91447636
A
10919void
10920vm_map_simplify(
10921 vm_map_t map,
10922 vm_map_offset_t start)
10923{
10924 vm_map_entry_t this_entry;
1c79356b 10925
91447636
A
10926 vm_map_lock(map);
10927 if (vm_map_lookup_entry(map, start, &this_entry)) {
10928 vm_map_simplify_entry(map, this_entry);
10929 vm_map_simplify_entry(map, this_entry->vme_next);
10930 }
10931 counter(c_vm_map_simplify_called++);
10932 vm_map_unlock(map);
10933}
1c79356b 10934
91447636
A
10935static void
10936vm_map_simplify_range(
10937 vm_map_t map,
10938 vm_map_offset_t start,
10939 vm_map_offset_t end)
10940{
10941 vm_map_entry_t entry;
1c79356b 10942
91447636
A
10943 /*
10944 * The map should be locked (for "write") by the caller.
10945 */
1c79356b 10946
91447636
A
10947 if (start >= end) {
10948 /* invalid address range */
10949 return;
10950 }
1c79356b 10951
39236c6e
A
10952 start = vm_map_trunc_page(start,
10953 VM_MAP_PAGE_MASK(map));
10954 end = vm_map_round_page(end,
10955 VM_MAP_PAGE_MASK(map));
2d21ac55 10956
91447636
A
10957 if (!vm_map_lookup_entry(map, start, &entry)) {
10958 /* "start" is not mapped and "entry" ends before "start" */
10959 if (entry == vm_map_to_entry(map)) {
10960 /* start with first entry in the map */
10961 entry = vm_map_first_entry(map);
10962 } else {
10963 /* start with next entry */
10964 entry = entry->vme_next;
10965 }
10966 }
10967
10968 while (entry != vm_map_to_entry(map) &&
10969 entry->vme_start <= end) {
10970 /* try and coalesce "entry" with its previous entry */
10971 vm_map_simplify_entry(map, entry);
10972 entry = entry->vme_next;
10973 }
10974}
1c79356b 10975
1c79356b 10976
91447636
A
10977/*
10978 * Routine: vm_map_machine_attribute
10979 * Purpose:
10980 * Provide machine-specific attributes to mappings,
10981 * such as cachability etc. for machines that provide
10982 * them. NUMA architectures and machines with big/strange
10983 * caches will use this.
10984 * Note:
10985 * Responsibilities for locking and checking are handled here,
10986 * everything else in the pmap module. If any non-volatile
10987 * information must be kept, the pmap module should handle
10988 * it itself. [This assumes that attributes do not
10989 * need to be inherited, which seems ok to me]
10990 */
10991kern_return_t
10992vm_map_machine_attribute(
10993 vm_map_t map,
10994 vm_map_offset_t start,
10995 vm_map_offset_t end,
10996 vm_machine_attribute_t attribute,
10997 vm_machine_attribute_val_t* value) /* IN/OUT */
10998{
10999 kern_return_t ret;
11000 vm_map_size_t sync_size;
11001 vm_map_entry_t entry;
11002
11003 if (start < vm_map_min(map) || end > vm_map_max(map))
11004 return KERN_INVALID_ADDRESS;
1c79356b 11005
91447636
A
11006 /* Figure how much memory we need to flush (in page increments) */
11007 sync_size = end - start;
1c79356b 11008
91447636
A
11009 vm_map_lock(map);
11010
11011 if (attribute != MATTR_CACHE) {
11012 /* If we don't have to find physical addresses, we */
11013 /* don't have to do an explicit traversal here. */
11014 ret = pmap_attribute(map->pmap, start, end-start,
11015 attribute, value);
11016 vm_map_unlock(map);
11017 return ret;
11018 }
1c79356b 11019
91447636 11020 ret = KERN_SUCCESS; /* Assume it all worked */
1c79356b 11021
91447636
A
11022 while(sync_size) {
11023 if (vm_map_lookup_entry(map, start, &entry)) {
11024 vm_map_size_t sub_size;
11025 if((entry->vme_end - start) > sync_size) {
11026 sub_size = sync_size;
11027 sync_size = 0;
11028 } else {
11029 sub_size = entry->vme_end - start;
2d21ac55 11030 sync_size -= sub_size;
91447636
A
11031 }
11032 if(entry->is_sub_map) {
11033 vm_map_offset_t sub_start;
11034 vm_map_offset_t sub_end;
1c79356b 11035
91447636 11036 sub_start = (start - entry->vme_start)
2d21ac55 11037 + entry->offset;
91447636
A
11038 sub_end = sub_start + sub_size;
11039 vm_map_machine_attribute(
11040 entry->object.sub_map,
11041 sub_start,
11042 sub_end,
11043 attribute, value);
11044 } else {
11045 if(entry->object.vm_object) {
11046 vm_page_t m;
11047 vm_object_t object;
11048 vm_object_t base_object;
11049 vm_object_t last_object;
11050 vm_object_offset_t offset;
11051 vm_object_offset_t base_offset;
11052 vm_map_size_t range;
11053 range = sub_size;
11054 offset = (start - entry->vme_start)
2d21ac55 11055 + entry->offset;
91447636
A
11056 base_offset = offset;
11057 object = entry->object.vm_object;
11058 base_object = object;
11059 last_object = NULL;
1c79356b 11060
91447636 11061 vm_object_lock(object);
1c79356b 11062
91447636
A
11063 while (range) {
11064 m = vm_page_lookup(
11065 object, offset);
1c79356b 11066
91447636
A
11067 if (m && !m->fictitious) {
11068 ret =
2d21ac55
A
11069 pmap_attribute_cache_sync(
11070 m->phys_page,
11071 PAGE_SIZE,
11072 attribute, value);
91447636
A
11073
11074 } else if (object->shadow) {
6d2010ae 11075 offset = offset + object->vo_shadow_offset;
91447636
A
11076 last_object = object;
11077 object = object->shadow;
11078 vm_object_lock(last_object->shadow);
11079 vm_object_unlock(last_object);
11080 continue;
11081 }
11082 range -= PAGE_SIZE;
1c79356b 11083
91447636
A
11084 if (base_object != object) {
11085 vm_object_unlock(object);
11086 vm_object_lock(base_object);
11087 object = base_object;
11088 }
11089 /* Bump to the next page */
11090 base_offset += PAGE_SIZE;
11091 offset = base_offset;
11092 }
11093 vm_object_unlock(object);
11094 }
11095 }
11096 start += sub_size;
11097 } else {
11098 vm_map_unlock(map);
11099 return KERN_FAILURE;
11100 }
11101
1c79356b 11102 }
e5568f75 11103
91447636 11104 vm_map_unlock(map);
e5568f75 11105
91447636
A
11106 return ret;
11107}
e5568f75 11108
91447636
A
11109/*
11110 * vm_map_behavior_set:
11111 *
11112 * Sets the paging reference behavior of the specified address
11113 * range in the target map. Paging reference behavior affects
11114 * how pagein operations resulting from faults on the map will be
11115 * clustered.
11116 */
11117kern_return_t
11118vm_map_behavior_set(
11119 vm_map_t map,
11120 vm_map_offset_t start,
11121 vm_map_offset_t end,
11122 vm_behavior_t new_behavior)
11123{
11124 register vm_map_entry_t entry;
11125 vm_map_entry_t temp_entry;
e5568f75 11126
91447636 11127 XPR(XPR_VM_MAP,
2d21ac55 11128 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
b0d623f7 11129 map, start, end, new_behavior, 0);
e5568f75 11130
6d2010ae
A
11131 if (start > end ||
11132 start < vm_map_min(map) ||
11133 end > vm_map_max(map)) {
11134 return KERN_NO_SPACE;
11135 }
11136
91447636 11137 switch (new_behavior) {
b0d623f7
A
11138
11139 /*
11140 * This first block of behaviors all set a persistent state on the specified
11141 * memory range. All we have to do here is to record the desired behavior
11142 * in the vm_map_entry_t's.
11143 */
11144
91447636
A
11145 case VM_BEHAVIOR_DEFAULT:
11146 case VM_BEHAVIOR_RANDOM:
11147 case VM_BEHAVIOR_SEQUENTIAL:
11148 case VM_BEHAVIOR_RSEQNTL:
b0d623f7
A
11149 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11150 vm_map_lock(map);
11151
11152 /*
11153 * The entire address range must be valid for the map.
11154 * Note that vm_map_range_check() does a
11155 * vm_map_lookup_entry() internally and returns the
11156 * entry containing the start of the address range if
11157 * the entire range is valid.
11158 */
11159 if (vm_map_range_check(map, start, end, &temp_entry)) {
11160 entry = temp_entry;
11161 vm_map_clip_start(map, entry, start);
11162 }
11163 else {
11164 vm_map_unlock(map);
11165 return(KERN_INVALID_ADDRESS);
11166 }
11167
11168 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11169 vm_map_clip_end(map, entry, end);
11170 assert(!entry->use_pmap);
11171
11172 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11173 entry->zero_wired_pages = TRUE;
11174 } else {
11175 entry->behavior = new_behavior;
11176 }
11177 entry = entry->vme_next;
11178 }
11179
11180 vm_map_unlock(map);
91447636 11181 break;
b0d623f7
A
11182
11183 /*
11184 * The rest of these are different from the above in that they cause
11185 * an immediate action to take place as opposed to setting a behavior that
11186 * affects future actions.
11187 */
11188
91447636 11189 case VM_BEHAVIOR_WILLNEED:
b0d623f7
A
11190 return vm_map_willneed(map, start, end);
11191
91447636 11192 case VM_BEHAVIOR_DONTNEED:
b0d623f7
A
11193 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11194
11195 case VM_BEHAVIOR_FREE:
11196 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11197
11198 case VM_BEHAVIOR_REUSABLE:
11199 return vm_map_reusable_pages(map, start, end);
11200
11201 case VM_BEHAVIOR_REUSE:
11202 return vm_map_reuse_pages(map, start, end);
11203
11204 case VM_BEHAVIOR_CAN_REUSE:
11205 return vm_map_can_reuse(map, start, end);
11206
1c79356b 11207 default:
91447636 11208 return(KERN_INVALID_ARGUMENT);
1c79356b 11209 }
1c79356b 11210
b0d623f7
A
11211 return(KERN_SUCCESS);
11212}
11213
11214
11215/*
11216 * Internals for madvise(MADV_WILLNEED) system call.
11217 *
11218 * The present implementation is to do a read-ahead if the mapping corresponds
11219 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
11220 * and basically ignore the "advice" (which we are always free to do).
11221 */
11222
11223
11224static kern_return_t
11225vm_map_willneed(
11226 vm_map_t map,
11227 vm_map_offset_t start,
11228 vm_map_offset_t end
11229)
11230{
11231 vm_map_entry_t entry;
11232 vm_object_t object;
11233 memory_object_t pager;
11234 struct vm_object_fault_info fault_info;
11235 kern_return_t kr;
11236 vm_object_size_t len;
11237 vm_object_offset_t offset;
1c79356b 11238
91447636 11239 /*
b0d623f7
A
11240 * Fill in static values in fault_info. Several fields get ignored by the code
11241 * we call, but we'll fill them in anyway since uninitialized fields are bad
11242 * when it comes to future backwards compatibility.
91447636 11243 */
b0d623f7
A
11244
11245 fault_info.interruptible = THREAD_UNINT; /* ignored value */
11246 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
11247 fault_info.no_cache = FALSE; /* ignored value */
11248 fault_info.stealth = TRUE;
6d2010ae
A
11249 fault_info.io_sync = FALSE;
11250 fault_info.cs_bypass = FALSE;
0b4c1975 11251 fault_info.mark_zf_absent = FALSE;
316670eb 11252 fault_info.batch_pmap_op = FALSE;
b0d623f7
A
11253
11254 /*
11255 * The MADV_WILLNEED operation doesn't require any changes to the
11256 * vm_map_entry_t's, so the read lock is sufficient.
11257 */
11258
11259 vm_map_lock_read(map);
11260
11261 /*
11262 * The madvise semantics require that the address range be fully
11263 * allocated with no holes. Otherwise, we're required to return
11264 * an error.
11265 */
11266
6d2010ae
A
11267 if (! vm_map_range_check(map, start, end, &entry)) {
11268 vm_map_unlock_read(map);
11269 return KERN_INVALID_ADDRESS;
11270 }
b0d623f7 11271
6d2010ae
A
11272 /*
11273 * Examine each vm_map_entry_t in the range.
11274 */
11275 for (; entry != vm_map_to_entry(map) && start < end; ) {
11276
b0d623f7 11277 /*
6d2010ae
A
11278 * The first time through, the start address could be anywhere
11279 * within the vm_map_entry we found. So adjust the offset to
11280 * correspond. After that, the offset will always be zero to
11281 * correspond to the beginning of the current vm_map_entry.
b0d623f7 11282 */
6d2010ae 11283 offset = (start - entry->vme_start) + entry->offset;
b0d623f7 11284
6d2010ae
A
11285 /*
11286 * Set the length so we don't go beyond the end of the
11287 * map_entry or beyond the end of the range we were given.
11288 * This range could span also multiple map entries all of which
11289 * map different files, so make sure we only do the right amount
11290 * of I/O for each object. Note that it's possible for there
11291 * to be multiple map entries all referring to the same object
11292 * but with different page permissions, but it's not worth
11293 * trying to optimize that case.
11294 */
11295 len = MIN(entry->vme_end - start, end - start);
b0d623f7 11296
6d2010ae
A
11297 if ((vm_size_t) len != len) {
11298 /* 32-bit overflow */
11299 len = (vm_size_t) (0 - PAGE_SIZE);
11300 }
11301 fault_info.cluster_size = (vm_size_t) len;
11302 fault_info.lo_offset = offset;
11303 fault_info.hi_offset = offset + len;
11304 fault_info.user_tag = entry->alias;
b0d623f7 11305
6d2010ae
A
11306 /*
11307 * If there's no read permission to this mapping, then just
11308 * skip it.
11309 */
11310 if ((entry->protection & VM_PROT_READ) == 0) {
11311 entry = entry->vme_next;
11312 start = entry->vme_start;
11313 continue;
11314 }
b0d623f7 11315
6d2010ae
A
11316 /*
11317 * Find the file object backing this map entry. If there is
11318 * none, then we simply ignore the "will need" advice for this
11319 * entry and go on to the next one.
11320 */
11321 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
11322 entry = entry->vme_next;
11323 start = entry->vme_start;
11324 continue;
11325 }
b0d623f7 11326
6d2010ae
A
11327 /*
11328 * The data_request() could take a long time, so let's
11329 * release the map lock to avoid blocking other threads.
11330 */
11331 vm_map_unlock_read(map);
b0d623f7 11332
6d2010ae
A
11333 vm_object_paging_begin(object);
11334 pager = object->pager;
11335 vm_object_unlock(object);
b0d623f7 11336
6d2010ae
A
11337 /*
11338 * Get the data from the object asynchronously.
11339 *
11340 * Note that memory_object_data_request() places limits on the
11341 * amount of I/O it will do. Regardless of the len we
11342 * specified, it won't do more than MAX_UPL_TRANSFER and it
11343 * silently truncates the len to that size. This isn't
11344 * necessarily bad since madvise shouldn't really be used to
11345 * page in unlimited amounts of data. Other Unix variants
11346 * limit the willneed case as well. If this turns out to be an
11347 * issue for developers, then we can always adjust the policy
11348 * here and still be backwards compatible since this is all
11349 * just "advice".
11350 */
11351 kr = memory_object_data_request(
11352 pager,
11353 offset + object->paging_offset,
11354 0, /* ignored */
11355 VM_PROT_READ,
11356 (memory_object_fault_info_t)&fault_info);
b0d623f7 11357
6d2010ae
A
11358 vm_object_lock(object);
11359 vm_object_paging_end(object);
11360 vm_object_unlock(object);
b0d623f7 11361
6d2010ae
A
11362 /*
11363 * If we couldn't do the I/O for some reason, just give up on
11364 * the madvise. We still return success to the user since
11365 * madvise isn't supposed to fail when the advice can't be
11366 * taken.
11367 */
11368 if (kr != KERN_SUCCESS) {
11369 return KERN_SUCCESS;
11370 }
b0d623f7 11371
6d2010ae
A
11372 start += len;
11373 if (start >= end) {
11374 /* done */
11375 return KERN_SUCCESS;
11376 }
b0d623f7 11377
6d2010ae
A
11378 /* look up next entry */
11379 vm_map_lock_read(map);
11380 if (! vm_map_lookup_entry(map, start, &entry)) {
b0d623f7 11381 /*
6d2010ae 11382 * There's a new hole in the address range.
b0d623f7 11383 */
6d2010ae
A
11384 vm_map_unlock_read(map);
11385 return KERN_INVALID_ADDRESS;
b0d623f7 11386 }
6d2010ae 11387 }
b0d623f7
A
11388
11389 vm_map_unlock_read(map);
6d2010ae 11390 return KERN_SUCCESS;
b0d623f7
A
11391}
11392
11393static boolean_t
11394vm_map_entry_is_reusable(
11395 vm_map_entry_t entry)
11396{
11397 vm_object_t object;
11398
39236c6e
A
11399 switch (entry->alias) {
11400 case VM_MEMORY_MALLOC:
11401 case VM_MEMORY_MALLOC_SMALL:
11402 case VM_MEMORY_MALLOC_LARGE:
11403 case VM_MEMORY_REALLOC:
11404 case VM_MEMORY_MALLOC_TINY:
11405 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
11406 case VM_MEMORY_MALLOC_LARGE_REUSED:
11407 /*
11408 * This is a malloc() memory region: check if it's still
11409 * in its original state and can be re-used for more
11410 * malloc() allocations.
11411 */
11412 break;
11413 default:
11414 /*
11415 * Not a malloc() memory region: let the caller decide if
11416 * it's re-usable.
11417 */
11418 return TRUE;
11419 }
11420
b0d623f7
A
11421 if (entry->is_shared ||
11422 entry->is_sub_map ||
11423 entry->in_transition ||
11424 entry->protection != VM_PROT_DEFAULT ||
11425 entry->max_protection != VM_PROT_ALL ||
11426 entry->inheritance != VM_INHERIT_DEFAULT ||
11427 entry->no_cache ||
11428 entry->permanent ||
39236c6e 11429 entry->superpage_size != FALSE ||
b0d623f7
A
11430 entry->zero_wired_pages ||
11431 entry->wired_count != 0 ||
11432 entry->user_wired_count != 0) {
11433 return FALSE;
91447636 11434 }
b0d623f7
A
11435
11436 object = entry->object.vm_object;
11437 if (object == VM_OBJECT_NULL) {
11438 return TRUE;
11439 }
316670eb
A
11440 if (
11441#if 0
11442 /*
11443 * Let's proceed even if the VM object is potentially
11444 * shared.
11445 * We check for this later when processing the actual
11446 * VM pages, so the contents will be safe if shared.
11447 *
11448 * But we can still mark this memory region as "reusable" to
11449 * acknowledge that the caller did let us know that the memory
11450 * could be re-used and should not be penalized for holding
11451 * on to it. This allows its "resident size" to not include
11452 * the reusable range.
11453 */
11454 object->ref_count == 1 &&
11455#endif
b0d623f7
A
11456 object->wired_page_count == 0 &&
11457 object->copy == VM_OBJECT_NULL &&
11458 object->shadow == VM_OBJECT_NULL &&
11459 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11460 object->internal &&
11461 !object->true_share &&
6d2010ae 11462 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
b0d623f7
A
11463 !object->code_signed) {
11464 return TRUE;
1c79356b 11465 }
b0d623f7
A
11466 return FALSE;
11467
11468
11469}
1c79356b 11470
b0d623f7
A
11471static kern_return_t
11472vm_map_reuse_pages(
11473 vm_map_t map,
11474 vm_map_offset_t start,
11475 vm_map_offset_t end)
11476{
11477 vm_map_entry_t entry;
11478 vm_object_t object;
11479 vm_object_offset_t start_offset, end_offset;
11480
11481 /*
11482 * The MADV_REUSE operation doesn't require any changes to the
11483 * vm_map_entry_t's, so the read lock is sufficient.
11484 */
0b4e3aa0 11485
b0d623f7 11486 vm_map_lock_read(map);
1c79356b 11487
b0d623f7
A
11488 /*
11489 * The madvise semantics require that the address range be fully
11490 * allocated with no holes. Otherwise, we're required to return
11491 * an error.
11492 */
11493
11494 if (!vm_map_range_check(map, start, end, &entry)) {
11495 vm_map_unlock_read(map);
11496 vm_page_stats_reusable.reuse_pages_failure++;
11497 return KERN_INVALID_ADDRESS;
1c79356b 11498 }
91447636 11499
b0d623f7
A
11500 /*
11501 * Examine each vm_map_entry_t in the range.
11502 */
11503 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11504 entry = entry->vme_next) {
11505 /*
11506 * Sanity check on the VM map entry.
11507 */
11508 if (! vm_map_entry_is_reusable(entry)) {
11509 vm_map_unlock_read(map);
11510 vm_page_stats_reusable.reuse_pages_failure++;
11511 return KERN_INVALID_ADDRESS;
11512 }
11513
11514 /*
11515 * The first time through, the start address could be anywhere
11516 * within the vm_map_entry we found. So adjust the offset to
11517 * correspond.
11518 */
11519 if (entry->vme_start < start) {
11520 start_offset = start - entry->vme_start;
11521 } else {
11522 start_offset = 0;
11523 }
11524 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11525 start_offset += entry->offset;
11526 end_offset += entry->offset;
11527
11528 object = entry->object.vm_object;
11529 if (object != VM_OBJECT_NULL) {
39236c6e
A
11530 /* tell pmap to not count this range as "reusable" */
11531 pmap_reusable(map->pmap,
11532 MAX(start, entry->vme_start),
11533 MIN(end, entry->vme_end),
11534 FALSE);
b0d623f7
A
11535 vm_object_lock(object);
11536 vm_object_reuse_pages(object, start_offset, end_offset,
11537 TRUE);
11538 vm_object_unlock(object);
11539 }
11540
11541 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
11542 /*
11543 * XXX
11544 * We do not hold the VM map exclusively here.
11545 * The "alias" field is not that critical, so it's
11546 * safe to update it here, as long as it is the only
11547 * one that can be modified while holding the VM map
11548 * "shared".
11549 */
11550 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
11551 }
11552 }
11553
11554 vm_map_unlock_read(map);
11555 vm_page_stats_reusable.reuse_pages_success++;
11556 return KERN_SUCCESS;
1c79356b
A
11557}
11558
1c79356b 11559
b0d623f7
A
11560static kern_return_t
11561vm_map_reusable_pages(
11562 vm_map_t map,
11563 vm_map_offset_t start,
11564 vm_map_offset_t end)
11565{
11566 vm_map_entry_t entry;
11567 vm_object_t object;
11568 vm_object_offset_t start_offset, end_offset;
11569
11570 /*
11571 * The MADV_REUSABLE operation doesn't require any changes to the
11572 * vm_map_entry_t's, so the read lock is sufficient.
11573 */
11574
11575 vm_map_lock_read(map);
11576
11577 /*
11578 * The madvise semantics require that the address range be fully
11579 * allocated with no holes. Otherwise, we're required to return
11580 * an error.
11581 */
11582
11583 if (!vm_map_range_check(map, start, end, &entry)) {
11584 vm_map_unlock_read(map);
11585 vm_page_stats_reusable.reusable_pages_failure++;
11586 return KERN_INVALID_ADDRESS;
11587 }
11588
11589 /*
11590 * Examine each vm_map_entry_t in the range.
11591 */
11592 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11593 entry = entry->vme_next) {
11594 int kill_pages = 0;
11595
11596 /*
11597 * Sanity check on the VM map entry.
11598 */
11599 if (! vm_map_entry_is_reusable(entry)) {
11600 vm_map_unlock_read(map);
11601 vm_page_stats_reusable.reusable_pages_failure++;
11602 return KERN_INVALID_ADDRESS;
11603 }
11604
11605 /*
11606 * The first time through, the start address could be anywhere
11607 * within the vm_map_entry we found. So adjust the offset to
11608 * correspond.
11609 */
11610 if (entry->vme_start < start) {
11611 start_offset = start - entry->vme_start;
11612 } else {
11613 start_offset = 0;
11614 }
11615 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11616 start_offset += entry->offset;
11617 end_offset += entry->offset;
11618
11619 object = entry->object.vm_object;
11620 if (object == VM_OBJECT_NULL)
11621 continue;
11622
11623
11624 vm_object_lock(object);
11625 if (object->ref_count == 1 && !object->shadow)
11626 kill_pages = 1;
11627 else
11628 kill_pages = -1;
11629 if (kill_pages != -1) {
39236c6e
A
11630 /* tell pmap to count this range as "reusable" */
11631 pmap_reusable(map->pmap,
11632 MAX(start, entry->vme_start),
11633 MIN(end, entry->vme_end),
11634 TRUE);
b0d623f7
A
11635 vm_object_deactivate_pages(object,
11636 start_offset,
11637 end_offset - start_offset,
11638 kill_pages,
11639 TRUE /*reusable_pages*/);
11640 } else {
11641 vm_page_stats_reusable.reusable_pages_shared++;
11642 }
11643 vm_object_unlock(object);
11644
11645 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
11646 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
11647 /*
11648 * XXX
11649 * We do not hold the VM map exclusively here.
11650 * The "alias" field is not that critical, so it's
11651 * safe to update it here, as long as it is the only
11652 * one that can be modified while holding the VM map
11653 * "shared".
11654 */
11655 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
11656 }
11657 }
11658
11659 vm_map_unlock_read(map);
11660 vm_page_stats_reusable.reusable_pages_success++;
11661 return KERN_SUCCESS;
11662}
11663
11664
11665static kern_return_t
11666vm_map_can_reuse(
11667 vm_map_t map,
11668 vm_map_offset_t start,
11669 vm_map_offset_t end)
11670{
11671 vm_map_entry_t entry;
11672
11673 /*
11674 * The MADV_REUSABLE operation doesn't require any changes to the
11675 * vm_map_entry_t's, so the read lock is sufficient.
11676 */
11677
11678 vm_map_lock_read(map);
11679
11680 /*
11681 * The madvise semantics require that the address range be fully
11682 * allocated with no holes. Otherwise, we're required to return
11683 * an error.
11684 */
11685
11686 if (!vm_map_range_check(map, start, end, &entry)) {
11687 vm_map_unlock_read(map);
11688 vm_page_stats_reusable.can_reuse_failure++;
11689 return KERN_INVALID_ADDRESS;
11690 }
11691
11692 /*
11693 * Examine each vm_map_entry_t in the range.
11694 */
11695 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11696 entry = entry->vme_next) {
11697 /*
11698 * Sanity check on the VM map entry.
11699 */
11700 if (! vm_map_entry_is_reusable(entry)) {
11701 vm_map_unlock_read(map);
11702 vm_page_stats_reusable.can_reuse_failure++;
11703 return KERN_INVALID_ADDRESS;
11704 }
11705 }
11706
11707 vm_map_unlock_read(map);
11708 vm_page_stats_reusable.can_reuse_success++;
11709 return KERN_SUCCESS;
11710}
11711
11712
1c79356b 11713/*
91447636
A
11714 * Routine: vm_map_entry_insert
11715 *
11716 * Descritpion: This routine inserts a new vm_entry in a locked map.
1c79356b 11717 */
91447636
A
11718vm_map_entry_t
11719vm_map_entry_insert(
11720 vm_map_t map,
11721 vm_map_entry_t insp_entry,
11722 vm_map_offset_t start,
11723 vm_map_offset_t end,
11724 vm_object_t object,
11725 vm_object_offset_t offset,
11726 boolean_t needs_copy,
11727 boolean_t is_shared,
11728 boolean_t in_transition,
11729 vm_prot_t cur_protection,
11730 vm_prot_t max_protection,
11731 vm_behavior_t behavior,
11732 vm_inherit_t inheritance,
2d21ac55 11733 unsigned wired_count,
b0d623f7
A
11734 boolean_t no_cache,
11735 boolean_t permanent,
39236c6e
A
11736 unsigned int superpage_size,
11737 boolean_t clear_map_aligned)
1c79356b 11738{
91447636 11739 vm_map_entry_t new_entry;
1c79356b 11740
91447636 11741 assert(insp_entry != (vm_map_entry_t)0);
1c79356b 11742
7ddcb079 11743 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
1c79356b 11744
39236c6e
A
11745 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
11746 new_entry->map_aligned = TRUE;
11747 } else {
11748 new_entry->map_aligned = FALSE;
11749 }
11750 if (clear_map_aligned &&
11751 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
11752 new_entry->map_aligned = FALSE;
11753 }
11754
91447636
A
11755 new_entry->vme_start = start;
11756 new_entry->vme_end = end;
11757 assert(page_aligned(new_entry->vme_start));
11758 assert(page_aligned(new_entry->vme_end));
39236c6e
A
11759 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
11760 VM_MAP_PAGE_MASK(map)));
11761 if (new_entry->map_aligned) {
11762 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
11763 VM_MAP_PAGE_MASK(map)));
11764 }
e2d2fc5c 11765 assert(new_entry->vme_start < new_entry->vme_end);
1c79356b 11766
91447636
A
11767 new_entry->object.vm_object = object;
11768 new_entry->offset = offset;
11769 new_entry->is_shared = is_shared;
11770 new_entry->is_sub_map = FALSE;
11771 new_entry->needs_copy = needs_copy;
11772 new_entry->in_transition = in_transition;
11773 new_entry->needs_wakeup = FALSE;
11774 new_entry->inheritance = inheritance;
11775 new_entry->protection = cur_protection;
11776 new_entry->max_protection = max_protection;
11777 new_entry->behavior = behavior;
11778 new_entry->wired_count = wired_count;
11779 new_entry->user_wired_count = 0;
11780 new_entry->use_pmap = FALSE;
0c530ab8 11781 new_entry->alias = 0;
b0d623f7 11782 new_entry->zero_wired_pages = FALSE;
2d21ac55 11783 new_entry->no_cache = no_cache;
b0d623f7 11784 new_entry->permanent = permanent;
39236c6e
A
11785 if (superpage_size)
11786 new_entry->superpage_size = TRUE;
11787 else
11788 new_entry->superpage_size = FALSE;
6d2010ae 11789 new_entry->used_for_jit = FALSE;
1c79356b 11790
91447636
A
11791 /*
11792 * Insert the new entry into the list.
11793 */
1c79356b 11794
6d2010ae 11795 vm_map_store_entry_link(map, insp_entry, new_entry);
91447636
A
11796 map->size += end - start;
11797
11798 /*
11799 * Update the free space hint and the lookup hint.
11800 */
11801
0c530ab8 11802 SAVE_HINT_MAP_WRITE(map, new_entry);
91447636 11803 return new_entry;
1c79356b
A
11804}
11805
11806/*
91447636
A
11807 * Routine: vm_map_remap_extract
11808 *
11809 * Descritpion: This routine returns a vm_entry list from a map.
1c79356b 11810 */
91447636
A
11811static kern_return_t
11812vm_map_remap_extract(
11813 vm_map_t map,
11814 vm_map_offset_t addr,
11815 vm_map_size_t size,
11816 boolean_t copy,
11817 struct vm_map_header *map_header,
11818 vm_prot_t *cur_protection,
11819 vm_prot_t *max_protection,
11820 /* What, no behavior? */
11821 vm_inherit_t inheritance,
11822 boolean_t pageable)
1c79356b 11823{
91447636
A
11824 kern_return_t result;
11825 vm_map_size_t mapped_size;
11826 vm_map_size_t tmp_size;
11827 vm_map_entry_t src_entry; /* result of last map lookup */
11828 vm_map_entry_t new_entry;
11829 vm_object_offset_t offset;
11830 vm_map_offset_t map_address;
11831 vm_map_offset_t src_start; /* start of entry to map */
11832 vm_map_offset_t src_end; /* end of region to be mapped */
11833 vm_object_t object;
11834 vm_map_version_t version;
11835 boolean_t src_needs_copy;
11836 boolean_t new_entry_needs_copy;
1c79356b 11837
91447636 11838 assert(map != VM_MAP_NULL);
39236c6e
A
11839 assert(size != 0);
11840 assert(size == vm_map_round_page(size, PAGE_MASK));
91447636
A
11841 assert(inheritance == VM_INHERIT_NONE ||
11842 inheritance == VM_INHERIT_COPY ||
11843 inheritance == VM_INHERIT_SHARE);
1c79356b 11844
91447636
A
11845 /*
11846 * Compute start and end of region.
11847 */
39236c6e
A
11848 src_start = vm_map_trunc_page(addr, PAGE_MASK);
11849 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
11850
1c79356b 11851
91447636
A
11852 /*
11853 * Initialize map_header.
11854 */
11855 map_header->links.next = (struct vm_map_entry *)&map_header->links;
11856 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11857 map_header->nentries = 0;
11858 map_header->entries_pageable = pageable;
39236c6e 11859 map_header->page_shift = PAGE_SHIFT;
1c79356b 11860
6d2010ae
A
11861 vm_map_store_init( map_header );
11862
91447636
A
11863 *cur_protection = VM_PROT_ALL;
11864 *max_protection = VM_PROT_ALL;
1c79356b 11865
91447636
A
11866 map_address = 0;
11867 mapped_size = 0;
11868 result = KERN_SUCCESS;
1c79356b 11869
91447636
A
11870 /*
11871 * The specified source virtual space might correspond to
11872 * multiple map entries, need to loop on them.
11873 */
11874 vm_map_lock(map);
11875 while (mapped_size != size) {
11876 vm_map_size_t entry_size;
1c79356b 11877
91447636
A
11878 /*
11879 * Find the beginning of the region.
11880 */
11881 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11882 result = KERN_INVALID_ADDRESS;
11883 break;
11884 }
1c79356b 11885
91447636
A
11886 if (src_start < src_entry->vme_start ||
11887 (mapped_size && src_start != src_entry->vme_start)) {
11888 result = KERN_INVALID_ADDRESS;
11889 break;
11890 }
1c79356b 11891
91447636
A
11892 tmp_size = size - mapped_size;
11893 if (src_end > src_entry->vme_end)
11894 tmp_size -= (src_end - src_entry->vme_end);
1c79356b 11895
91447636 11896 entry_size = (vm_map_size_t)(src_entry->vme_end -
2d21ac55 11897 src_entry->vme_start);
1c79356b 11898
91447636
A
11899 if(src_entry->is_sub_map) {
11900 vm_map_reference(src_entry->object.sub_map);
11901 object = VM_OBJECT_NULL;
11902 } else {
11903 object = src_entry->object.vm_object;
55e303ae 11904
91447636
A
11905 if (object == VM_OBJECT_NULL) {
11906 object = vm_object_allocate(entry_size);
11907 src_entry->offset = 0;
11908 src_entry->object.vm_object = object;
11909 } else if (object->copy_strategy !=
11910 MEMORY_OBJECT_COPY_SYMMETRIC) {
11911 /*
11912 * We are already using an asymmetric
11913 * copy, and therefore we already have
11914 * the right object.
11915 */
11916 assert(!src_entry->needs_copy);
11917 } else if (src_entry->needs_copy || object->shadowed ||
11918 (object->internal && !object->true_share &&
2d21ac55 11919 !src_entry->is_shared &&
6d2010ae 11920 object->vo_size > entry_size)) {
1c79356b 11921
91447636
A
11922 vm_object_shadow(&src_entry->object.vm_object,
11923 &src_entry->offset,
11924 entry_size);
1c79356b 11925
91447636
A
11926 if (!src_entry->needs_copy &&
11927 (src_entry->protection & VM_PROT_WRITE)) {
0c530ab8
A
11928 vm_prot_t prot;
11929
11930 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
11931
11932 if (override_nx(map, src_entry->alias) && prot)
0c530ab8 11933 prot |= VM_PROT_EXECUTE;
2d21ac55 11934
316670eb 11935 if(map->mapped_in_other_pmaps) {
2d21ac55
A
11936 vm_object_pmap_protect(
11937 src_entry->object.vm_object,
11938 src_entry->offset,
11939 entry_size,
11940 PMAP_NULL,
0c530ab8 11941 src_entry->vme_start,
0c530ab8 11942 prot);
2d21ac55
A
11943 } else {
11944 pmap_protect(vm_map_pmap(map),
11945 src_entry->vme_start,
11946 src_entry->vme_end,
11947 prot);
91447636
A
11948 }
11949 }
1c79356b 11950
91447636
A
11951 object = src_entry->object.vm_object;
11952 src_entry->needs_copy = FALSE;
11953 }
1c79356b 11954
1c79356b 11955
91447636 11956 vm_object_lock(object);
2d21ac55 11957 vm_object_reference_locked(object); /* object ref. for new entry */
91447636 11958 if (object->copy_strategy ==
2d21ac55 11959 MEMORY_OBJECT_COPY_SYMMETRIC) {
91447636
A
11960 object->copy_strategy =
11961 MEMORY_OBJECT_COPY_DELAY;
11962 }
11963 vm_object_unlock(object);
11964 }
1c79356b 11965
91447636 11966 offset = src_entry->offset + (src_start - src_entry->vme_start);
1c79356b 11967
7ddcb079 11968 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
91447636
A
11969 vm_map_entry_copy(new_entry, src_entry);
11970 new_entry->use_pmap = FALSE; /* clr address space specifics */
1c79356b 11971
39236c6e
A
11972 new_entry->map_aligned = FALSE;
11973
91447636
A
11974 new_entry->vme_start = map_address;
11975 new_entry->vme_end = map_address + tmp_size;
e2d2fc5c 11976 assert(new_entry->vme_start < new_entry->vme_end);
91447636
A
11977 new_entry->inheritance = inheritance;
11978 new_entry->offset = offset;
1c79356b 11979
91447636
A
11980 /*
11981 * The new region has to be copied now if required.
11982 */
11983 RestartCopy:
11984 if (!copy) {
316670eb
A
11985 /*
11986 * Cannot allow an entry describing a JIT
11987 * region to be shared across address spaces.
11988 */
11989 if (src_entry->used_for_jit == TRUE) {
11990 result = KERN_INVALID_ARGUMENT;
11991 break;
11992 }
91447636
A
11993 src_entry->is_shared = TRUE;
11994 new_entry->is_shared = TRUE;
11995 if (!(new_entry->is_sub_map))
11996 new_entry->needs_copy = FALSE;
1c79356b 11997
91447636
A
11998 } else if (src_entry->is_sub_map) {
11999 /* make this a COW sub_map if not already */
12000 new_entry->needs_copy = TRUE;
12001 object = VM_OBJECT_NULL;
12002 } else if (src_entry->wired_count == 0 &&
2d21ac55
A
12003 vm_object_copy_quickly(&new_entry->object.vm_object,
12004 new_entry->offset,
12005 (new_entry->vme_end -
12006 new_entry->vme_start),
12007 &src_needs_copy,
12008 &new_entry_needs_copy)) {
55e303ae 12009
91447636
A
12010 new_entry->needs_copy = new_entry_needs_copy;
12011 new_entry->is_shared = FALSE;
1c79356b 12012
91447636
A
12013 /*
12014 * Handle copy_on_write semantics.
12015 */
12016 if (src_needs_copy && !src_entry->needs_copy) {
0c530ab8
A
12017 vm_prot_t prot;
12018
12019 prot = src_entry->protection & ~VM_PROT_WRITE;
2d21ac55
A
12020
12021 if (override_nx(map, src_entry->alias) && prot)
0c530ab8 12022 prot |= VM_PROT_EXECUTE;
2d21ac55 12023
91447636
A
12024 vm_object_pmap_protect(object,
12025 offset,
12026 entry_size,
12027 ((src_entry->is_shared
316670eb 12028 || map->mapped_in_other_pmaps) ?
91447636
A
12029 PMAP_NULL : map->pmap),
12030 src_entry->vme_start,
0c530ab8 12031 prot);
1c79356b 12032
91447636
A
12033 src_entry->needs_copy = TRUE;
12034 }
12035 /*
12036 * Throw away the old object reference of the new entry.
12037 */
12038 vm_object_deallocate(object);
1c79356b 12039
91447636
A
12040 } else {
12041 new_entry->is_shared = FALSE;
1c79356b 12042
91447636
A
12043 /*
12044 * The map can be safely unlocked since we
12045 * already hold a reference on the object.
12046 *
12047 * Record the timestamp of the map for later
12048 * verification, and unlock the map.
12049 */
12050 version.main_timestamp = map->timestamp;
12051 vm_map_unlock(map); /* Increments timestamp once! */
55e303ae 12052
91447636
A
12053 /*
12054 * Perform the copy.
12055 */
12056 if (src_entry->wired_count > 0) {
12057 vm_object_lock(object);
12058 result = vm_object_copy_slowly(
2d21ac55
A
12059 object,
12060 offset,
12061 entry_size,
12062 THREAD_UNINT,
12063 &new_entry->object.vm_object);
1c79356b 12064
91447636
A
12065 new_entry->offset = 0;
12066 new_entry->needs_copy = FALSE;
12067 } else {
12068 result = vm_object_copy_strategically(
2d21ac55
A
12069 object,
12070 offset,
12071 entry_size,
12072 &new_entry->object.vm_object,
12073 &new_entry->offset,
12074 &new_entry_needs_copy);
1c79356b 12075
91447636
A
12076 new_entry->needs_copy = new_entry_needs_copy;
12077 }
1c79356b 12078
91447636
A
12079 /*
12080 * Throw away the old object reference of the new entry.
12081 */
12082 vm_object_deallocate(object);
1c79356b 12083
91447636
A
12084 if (result != KERN_SUCCESS &&
12085 result != KERN_MEMORY_RESTART_COPY) {
12086 _vm_map_entry_dispose(map_header, new_entry);
12087 break;
12088 }
1c79356b 12089
91447636
A
12090 /*
12091 * Verify that the map has not substantially
12092 * changed while the copy was being made.
12093 */
1c79356b 12094
91447636
A
12095 vm_map_lock(map);
12096 if (version.main_timestamp + 1 != map->timestamp) {
12097 /*
12098 * Simple version comparison failed.
12099 *
12100 * Retry the lookup and verify that the
12101 * same object/offset are still present.
12102 */
12103 vm_object_deallocate(new_entry->
12104 object.vm_object);
12105 _vm_map_entry_dispose(map_header, new_entry);
12106 if (result == KERN_MEMORY_RESTART_COPY)
12107 result = KERN_SUCCESS;
12108 continue;
12109 }
1c79356b 12110
91447636
A
12111 if (result == KERN_MEMORY_RESTART_COPY) {
12112 vm_object_reference(object);
12113 goto RestartCopy;
12114 }
12115 }
1c79356b 12116
6d2010ae 12117 _vm_map_store_entry_link(map_header,
91447636 12118 map_header->links.prev, new_entry);
1c79356b 12119
6d2010ae
A
12120 /*Protections for submap mapping are irrelevant here*/
12121 if( !src_entry->is_sub_map ) {
12122 *cur_protection &= src_entry->protection;
12123 *max_protection &= src_entry->max_protection;
12124 }
91447636
A
12125 map_address += tmp_size;
12126 mapped_size += tmp_size;
12127 src_start += tmp_size;
1c79356b 12128
91447636 12129 } /* end while */
1c79356b 12130
91447636
A
12131 vm_map_unlock(map);
12132 if (result != KERN_SUCCESS) {
12133 /*
12134 * Free all allocated elements.
12135 */
12136 for (src_entry = map_header->links.next;
12137 src_entry != (struct vm_map_entry *)&map_header->links;
12138 src_entry = new_entry) {
12139 new_entry = src_entry->vme_next;
6d2010ae 12140 _vm_map_store_entry_unlink(map_header, src_entry);
39236c6e
A
12141 if (src_entry->is_sub_map) {
12142 vm_map_deallocate(src_entry->object.sub_map);
12143 } else {
12144 vm_object_deallocate(src_entry->object.vm_object);
12145 }
91447636
A
12146 _vm_map_entry_dispose(map_header, src_entry);
12147 }
12148 }
12149 return result;
1c79356b
A
12150}
12151
12152/*
91447636 12153 * Routine: vm_remap
1c79356b 12154 *
91447636
A
12155 * Map portion of a task's address space.
12156 * Mapped region must not overlap more than
12157 * one vm memory object. Protections and
12158 * inheritance attributes remain the same
12159 * as in the original task and are out parameters.
12160 * Source and Target task can be identical
12161 * Other attributes are identical as for vm_map()
1c79356b
A
12162 */
12163kern_return_t
91447636
A
12164vm_map_remap(
12165 vm_map_t target_map,
12166 vm_map_address_t *address,
12167 vm_map_size_t size,
12168 vm_map_offset_t mask,
060df5ea 12169 int flags,
91447636
A
12170 vm_map_t src_map,
12171 vm_map_offset_t memory_address,
1c79356b 12172 boolean_t copy,
1c79356b
A
12173 vm_prot_t *cur_protection,
12174 vm_prot_t *max_protection,
91447636 12175 vm_inherit_t inheritance)
1c79356b
A
12176{
12177 kern_return_t result;
91447636 12178 vm_map_entry_t entry;
0c530ab8 12179 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
1c79356b 12180 vm_map_entry_t new_entry;
91447636 12181 struct vm_map_header map_header;
39236c6e 12182 vm_map_offset_t offset_in_mapping;
1c79356b 12183
91447636
A
12184 if (target_map == VM_MAP_NULL)
12185 return KERN_INVALID_ARGUMENT;
1c79356b 12186
91447636 12187 switch (inheritance) {
2d21ac55
A
12188 case VM_INHERIT_NONE:
12189 case VM_INHERIT_COPY:
12190 case VM_INHERIT_SHARE:
91447636
A
12191 if (size != 0 && src_map != VM_MAP_NULL)
12192 break;
12193 /*FALL THRU*/
2d21ac55 12194 default:
91447636
A
12195 return KERN_INVALID_ARGUMENT;
12196 }
1c79356b 12197
39236c6e
A
12198 /*
12199 * If the user is requesting that we return the address of the
12200 * first byte of the data (rather than the base of the page),
12201 * then we use different rounding semantics: specifically,
12202 * we assume that (memory_address, size) describes a region
12203 * all of whose pages we must cover, rather than a base to be truncated
12204 * down and a size to be added to that base. So we figure out
12205 * the highest page that the requested region includes and make
12206 * sure that the size will cover it.
12207 *
12208 * The key example we're worried about it is of the form:
12209 *
12210 * memory_address = 0x1ff0, size = 0x20
12211 *
12212 * With the old semantics, we round down the memory_address to 0x1000
12213 * and round up the size to 0x1000, resulting in our covering *only*
12214 * page 0x1000. With the new semantics, we'd realize that the region covers
12215 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
12216 * 0x1000 and page 0x2000 in the region we remap.
12217 */
12218 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12219 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12220 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12221 } else {
12222 size = vm_map_round_page(size, PAGE_MASK);
12223 }
1c79356b 12224
91447636 12225 result = vm_map_remap_extract(src_map, memory_address,
2d21ac55
A
12226 size, copy, &map_header,
12227 cur_protection,
12228 max_protection,
12229 inheritance,
39236c6e 12230 target_map->hdr.entries_pageable);
1c79356b 12231
91447636
A
12232 if (result != KERN_SUCCESS) {
12233 return result;
12234 }
1c79356b 12235
91447636
A
12236 /*
12237 * Allocate/check a range of free virtual address
12238 * space for the target
1c79356b 12239 */
39236c6e
A
12240 *address = vm_map_trunc_page(*address,
12241 VM_MAP_PAGE_MASK(target_map));
91447636
A
12242 vm_map_lock(target_map);
12243 result = vm_map_remap_range_allocate(target_map, address, size,
060df5ea 12244 mask, flags, &insp_entry);
1c79356b 12245
91447636
A
12246 for (entry = map_header.links.next;
12247 entry != (struct vm_map_entry *)&map_header.links;
12248 entry = new_entry) {
12249 new_entry = entry->vme_next;
6d2010ae 12250 _vm_map_store_entry_unlink(&map_header, entry);
91447636
A
12251 if (result == KERN_SUCCESS) {
12252 entry->vme_start += *address;
12253 entry->vme_end += *address;
39236c6e 12254 assert(!entry->map_aligned);
6d2010ae 12255 vm_map_store_entry_link(target_map, insp_entry, entry);
91447636
A
12256 insp_entry = entry;
12257 } else {
12258 if (!entry->is_sub_map) {
12259 vm_object_deallocate(entry->object.vm_object);
12260 } else {
12261 vm_map_deallocate(entry->object.sub_map);
2d21ac55 12262 }
91447636 12263 _vm_map_entry_dispose(&map_header, entry);
1c79356b 12264 }
91447636 12265 }
1c79356b 12266
6d2010ae
A
12267 if( target_map->disable_vmentry_reuse == TRUE) {
12268 if( target_map->highest_entry_end < insp_entry->vme_end ){
12269 target_map->highest_entry_end = insp_entry->vme_end;
12270 }
12271 }
12272
91447636
A
12273 if (result == KERN_SUCCESS) {
12274 target_map->size += size;
0c530ab8 12275 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
91447636
A
12276 }
12277 vm_map_unlock(target_map);
1c79356b 12278
91447636
A
12279 if (result == KERN_SUCCESS && target_map->wiring_required)
12280 result = vm_map_wire(target_map, *address,
12281 *address + size, *cur_protection, TRUE);
39236c6e
A
12282
12283 /*
12284 * If requested, return the address of the data pointed to by the
12285 * request, rather than the base of the resulting page.
12286 */
12287 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12288 *address += offset_in_mapping;
12289 }
12290
91447636
A
12291 return result;
12292}
1c79356b 12293
91447636
A
12294/*
12295 * Routine: vm_map_remap_range_allocate
12296 *
12297 * Description:
12298 * Allocate a range in the specified virtual address map.
12299 * returns the address and the map entry just before the allocated
12300 * range
12301 *
12302 * Map must be locked.
12303 */
1c79356b 12304
91447636
A
12305static kern_return_t
12306vm_map_remap_range_allocate(
12307 vm_map_t map,
12308 vm_map_address_t *address, /* IN/OUT */
12309 vm_map_size_t size,
12310 vm_map_offset_t mask,
060df5ea 12311 int flags,
91447636
A
12312 vm_map_entry_t *map_entry) /* OUT */
12313{
060df5ea
A
12314 vm_map_entry_t entry;
12315 vm_map_offset_t start;
12316 vm_map_offset_t end;
12317 kern_return_t kr;
1c79356b 12318
2d21ac55 12319StartAgain: ;
1c79356b 12320
2d21ac55 12321 start = *address;
1c79356b 12322
060df5ea 12323 if (flags & VM_FLAGS_ANYWHERE)
2d21ac55
A
12324 {
12325 /*
12326 * Calculate the first possible address.
12327 */
1c79356b 12328
2d21ac55
A
12329 if (start < map->min_offset)
12330 start = map->min_offset;
12331 if (start > map->max_offset)
12332 return(KERN_NO_SPACE);
91447636 12333
2d21ac55
A
12334 /*
12335 * Look for the first possible address;
12336 * if there's already something at this
12337 * address, we have to start after it.
12338 */
1c79356b 12339
6d2010ae
A
12340 if( map->disable_vmentry_reuse == TRUE) {
12341 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2d21ac55 12342 } else {
6d2010ae
A
12343 assert(first_free_is_valid(map));
12344 if (start == map->min_offset) {
12345 if ((entry = map->first_free) != vm_map_to_entry(map))
12346 start = entry->vme_end;
12347 } else {
12348 vm_map_entry_t tmp_entry;
12349 if (vm_map_lookup_entry(map, start, &tmp_entry))
12350 start = tmp_entry->vme_end;
12351 entry = tmp_entry;
12352 }
39236c6e
A
12353 start = vm_map_round_page(start,
12354 VM_MAP_PAGE_MASK(map));
2d21ac55 12355 }
91447636 12356
2d21ac55
A
12357 /*
12358 * In any case, the "entry" always precedes
12359 * the proposed new region throughout the
12360 * loop:
12361 */
1c79356b 12362
2d21ac55
A
12363 while (TRUE) {
12364 register vm_map_entry_t next;
12365
12366 /*
12367 * Find the end of the proposed new region.
12368 * Be sure we didn't go beyond the end, or
12369 * wrap around the address.
12370 */
12371
12372 end = ((start + mask) & ~mask);
39236c6e
A
12373 end = vm_map_round_page(end,
12374 VM_MAP_PAGE_MASK(map));
2d21ac55
A
12375 if (end < start)
12376 return(KERN_NO_SPACE);
12377 start = end;
12378 end += size;
12379
12380 if ((end > map->max_offset) || (end < start)) {
12381 if (map->wait_for_space) {
12382 if (size <= (map->max_offset -
12383 map->min_offset)) {
12384 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
12385 vm_map_unlock(map);
12386 thread_block(THREAD_CONTINUE_NULL);
12387 vm_map_lock(map);
12388 goto StartAgain;
12389 }
12390 }
91447636 12391
2d21ac55
A
12392 return(KERN_NO_SPACE);
12393 }
1c79356b 12394
2d21ac55
A
12395 /*
12396 * If there are no more entries, we must win.
12397 */
1c79356b 12398
2d21ac55
A
12399 next = entry->vme_next;
12400 if (next == vm_map_to_entry(map))
12401 break;
1c79356b 12402
2d21ac55
A
12403 /*
12404 * If there is another entry, it must be
12405 * after the end of the potential new region.
12406 */
1c79356b 12407
2d21ac55
A
12408 if (next->vme_start >= end)
12409 break;
1c79356b 12410
2d21ac55
A
12411 /*
12412 * Didn't fit -- move to the next entry.
12413 */
1c79356b 12414
2d21ac55
A
12415 entry = next;
12416 start = entry->vme_end;
12417 }
12418 *address = start;
12419 } else {
12420 vm_map_entry_t temp_entry;
91447636 12421
2d21ac55
A
12422 /*
12423 * Verify that:
12424 * the address doesn't itself violate
12425 * the mask requirement.
12426 */
1c79356b 12427
2d21ac55
A
12428 if ((start & mask) != 0)
12429 return(KERN_NO_SPACE);
1c79356b 12430
1c79356b 12431
2d21ac55
A
12432 /*
12433 * ... the address is within bounds
12434 */
1c79356b 12435
2d21ac55 12436 end = start + size;
1c79356b 12437
2d21ac55
A
12438 if ((start < map->min_offset) ||
12439 (end > map->max_offset) ||
12440 (start >= end)) {
12441 return(KERN_INVALID_ADDRESS);
12442 }
1c79356b 12443
060df5ea
A
12444 /*
12445 * If we're asked to overwrite whatever was mapped in that
12446 * range, first deallocate that range.
12447 */
12448 if (flags & VM_FLAGS_OVERWRITE) {
12449 vm_map_t zap_map;
12450
12451 /*
12452 * We use a "zap_map" to avoid having to unlock
12453 * the "map" in vm_map_delete(), which would compromise
12454 * the atomicity of the "deallocate" and then "remap"
12455 * combination.
12456 */
12457 zap_map = vm_map_create(PMAP_NULL,
12458 start,
316670eb 12459 end,
060df5ea
A
12460 map->hdr.entries_pageable);
12461 if (zap_map == VM_MAP_NULL) {
12462 return KERN_RESOURCE_SHORTAGE;
12463 }
39236c6e 12464 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
060df5ea
A
12465
12466 kr = vm_map_delete(map, start, end,
12467 VM_MAP_REMOVE_SAVE_ENTRIES,
12468 zap_map);
12469 if (kr == KERN_SUCCESS) {
12470 vm_map_destroy(zap_map,
12471 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
12472 zap_map = VM_MAP_NULL;
12473 }
12474 }
12475
2d21ac55
A
12476 /*
12477 * ... the starting address isn't allocated
12478 */
91447636 12479
2d21ac55
A
12480 if (vm_map_lookup_entry(map, start, &temp_entry))
12481 return(KERN_NO_SPACE);
91447636 12482
2d21ac55 12483 entry = temp_entry;
91447636 12484
2d21ac55
A
12485 /*
12486 * ... the next region doesn't overlap the
12487 * end point.
12488 */
1c79356b 12489
2d21ac55
A
12490 if ((entry->vme_next != vm_map_to_entry(map)) &&
12491 (entry->vme_next->vme_start < end))
12492 return(KERN_NO_SPACE);
12493 }
12494 *map_entry = entry;
12495 return(KERN_SUCCESS);
91447636 12496}
1c79356b 12497
91447636
A
12498/*
12499 * vm_map_switch:
12500 *
12501 * Set the address map for the current thread to the specified map
12502 */
1c79356b 12503
91447636
A
12504vm_map_t
12505vm_map_switch(
12506 vm_map_t map)
12507{
12508 int mycpu;
12509 thread_t thread = current_thread();
12510 vm_map_t oldmap = thread->map;
1c79356b 12511
91447636
A
12512 mp_disable_preemption();
12513 mycpu = cpu_number();
1c79356b 12514
91447636
A
12515 /*
12516 * Deactivate the current map and activate the requested map
12517 */
12518 PMAP_SWITCH_USER(thread, map, mycpu);
1c79356b 12519
91447636
A
12520 mp_enable_preemption();
12521 return(oldmap);
12522}
1c79356b 12523
1c79356b 12524
91447636
A
12525/*
12526 * Routine: vm_map_write_user
12527 *
12528 * Description:
12529 * Copy out data from a kernel space into space in the
12530 * destination map. The space must already exist in the
12531 * destination map.
12532 * NOTE: This routine should only be called by threads
12533 * which can block on a page fault. i.e. kernel mode user
12534 * threads.
12535 *
12536 */
12537kern_return_t
12538vm_map_write_user(
12539 vm_map_t map,
12540 void *src_p,
12541 vm_map_address_t dst_addr,
12542 vm_size_t size)
12543{
12544 kern_return_t kr = KERN_SUCCESS;
1c79356b 12545
91447636
A
12546 if(current_map() == map) {
12547 if (copyout(src_p, dst_addr, size)) {
12548 kr = KERN_INVALID_ADDRESS;
12549 }
12550 } else {
12551 vm_map_t oldmap;
1c79356b 12552
91447636
A
12553 /* take on the identity of the target map while doing */
12554 /* the transfer */
1c79356b 12555
91447636
A
12556 vm_map_reference(map);
12557 oldmap = vm_map_switch(map);
12558 if (copyout(src_p, dst_addr, size)) {
12559 kr = KERN_INVALID_ADDRESS;
1c79356b 12560 }
91447636
A
12561 vm_map_switch(oldmap);
12562 vm_map_deallocate(map);
1c79356b 12563 }
91447636 12564 return kr;
1c79356b
A
12565}
12566
12567/*
91447636
A
12568 * Routine: vm_map_read_user
12569 *
12570 * Description:
12571 * Copy in data from a user space source map into the
12572 * kernel map. The space must already exist in the
12573 * kernel map.
12574 * NOTE: This routine should only be called by threads
12575 * which can block on a page fault. i.e. kernel mode user
12576 * threads.
1c79356b 12577 *
1c79356b
A
12578 */
12579kern_return_t
91447636
A
12580vm_map_read_user(
12581 vm_map_t map,
12582 vm_map_address_t src_addr,
12583 void *dst_p,
12584 vm_size_t size)
1c79356b 12585{
91447636 12586 kern_return_t kr = KERN_SUCCESS;
1c79356b 12587
91447636
A
12588 if(current_map() == map) {
12589 if (copyin(src_addr, dst_p, size)) {
12590 kr = KERN_INVALID_ADDRESS;
12591 }
12592 } else {
12593 vm_map_t oldmap;
1c79356b 12594
91447636
A
12595 /* take on the identity of the target map while doing */
12596 /* the transfer */
12597
12598 vm_map_reference(map);
12599 oldmap = vm_map_switch(map);
12600 if (copyin(src_addr, dst_p, size)) {
12601 kr = KERN_INVALID_ADDRESS;
12602 }
12603 vm_map_switch(oldmap);
12604 vm_map_deallocate(map);
1c79356b 12605 }
91447636
A
12606 return kr;
12607}
12608
1c79356b 12609
91447636
A
12610/*
12611 * vm_map_check_protection:
12612 *
12613 * Assert that the target map allows the specified
12614 * privilege on the entire address region given.
12615 * The entire region must be allocated.
12616 */
2d21ac55
A
12617boolean_t
12618vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
12619 vm_map_offset_t end, vm_prot_t protection)
91447636 12620{
2d21ac55
A
12621 vm_map_entry_t entry;
12622 vm_map_entry_t tmp_entry;
1c79356b 12623
91447636 12624 vm_map_lock(map);
1c79356b 12625
2d21ac55 12626 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
91447636 12627 {
2d21ac55
A
12628 vm_map_unlock(map);
12629 return (FALSE);
1c79356b
A
12630 }
12631
91447636
A
12632 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12633 vm_map_unlock(map);
12634 return(FALSE);
12635 }
1c79356b 12636
91447636
A
12637 entry = tmp_entry;
12638
12639 while (start < end) {
12640 if (entry == vm_map_to_entry(map)) {
12641 vm_map_unlock(map);
12642 return(FALSE);
1c79356b 12643 }
1c79356b 12644
91447636
A
12645 /*
12646 * No holes allowed!
12647 */
1c79356b 12648
91447636
A
12649 if (start < entry->vme_start) {
12650 vm_map_unlock(map);
12651 return(FALSE);
12652 }
12653
12654 /*
12655 * Check protection associated with entry.
12656 */
12657
12658 if ((entry->protection & protection) != protection) {
12659 vm_map_unlock(map);
12660 return(FALSE);
12661 }
12662
12663 /* go to next entry */
12664
12665 start = entry->vme_end;
12666 entry = entry->vme_next;
12667 }
12668 vm_map_unlock(map);
12669 return(TRUE);
1c79356b
A
12670}
12671
1c79356b 12672kern_return_t
91447636
A
12673vm_map_purgable_control(
12674 vm_map_t map,
12675 vm_map_offset_t address,
12676 vm_purgable_t control,
12677 int *state)
1c79356b 12678{
91447636
A
12679 vm_map_entry_t entry;
12680 vm_object_t object;
12681 kern_return_t kr;
1c79356b 12682
1c79356b 12683 /*
91447636
A
12684 * Vet all the input parameters and current type and state of the
12685 * underlaying object. Return with an error if anything is amiss.
1c79356b 12686 */
91447636
A
12687 if (map == VM_MAP_NULL)
12688 return(KERN_INVALID_ARGUMENT);
1c79356b 12689
91447636 12690 if (control != VM_PURGABLE_SET_STATE &&
b0d623f7
A
12691 control != VM_PURGABLE_GET_STATE &&
12692 control != VM_PURGABLE_PURGE_ALL)
91447636 12693 return(KERN_INVALID_ARGUMENT);
1c79356b 12694
b0d623f7
A
12695 if (control == VM_PURGABLE_PURGE_ALL) {
12696 vm_purgeable_object_purge_all();
12697 return KERN_SUCCESS;
12698 }
12699
91447636 12700 if (control == VM_PURGABLE_SET_STATE &&
b0d623f7 12701 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2d21ac55 12702 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
91447636
A
12703 return(KERN_INVALID_ARGUMENT);
12704
b0d623f7 12705 vm_map_lock_read(map);
91447636
A
12706
12707 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12708
12709 /*
12710 * Must pass a valid non-submap address.
12711 */
b0d623f7 12712 vm_map_unlock_read(map);
91447636
A
12713 return(KERN_INVALID_ADDRESS);
12714 }
12715
12716 if ((entry->protection & VM_PROT_WRITE) == 0) {
12717 /*
12718 * Can't apply purgable controls to something you can't write.
12719 */
b0d623f7 12720 vm_map_unlock_read(map);
91447636
A
12721 return(KERN_PROTECTION_FAILURE);
12722 }
12723
12724 object = entry->object.vm_object;
12725 if (object == VM_OBJECT_NULL) {
12726 /*
12727 * Object must already be present or it can't be purgable.
12728 */
b0d623f7 12729 vm_map_unlock_read(map);
91447636
A
12730 return KERN_INVALID_ARGUMENT;
12731 }
12732
12733 vm_object_lock(object);
12734
39236c6e 12735#if 00
91447636 12736 if (entry->offset != 0 ||
6d2010ae 12737 entry->vme_end - entry->vme_start != object->vo_size) {
91447636
A
12738 /*
12739 * Can only apply purgable controls to the whole (existing)
12740 * object at once.
12741 */
b0d623f7 12742 vm_map_unlock_read(map);
91447636
A
12743 vm_object_unlock(object);
12744 return KERN_INVALID_ARGUMENT;
1c79356b 12745 }
39236c6e 12746#endif
1c79356b 12747
b0d623f7 12748 vm_map_unlock_read(map);
1c79356b 12749
91447636 12750 kr = vm_object_purgable_control(object, control, state);
1c79356b 12751
91447636 12752 vm_object_unlock(object);
1c79356b 12753
91447636
A
12754 return kr;
12755}
1c79356b 12756
91447636 12757kern_return_t
b0d623f7 12758vm_map_page_query_internal(
2d21ac55 12759 vm_map_t target_map,
91447636 12760 vm_map_offset_t offset,
2d21ac55
A
12761 int *disposition,
12762 int *ref_count)
91447636 12763{
b0d623f7
A
12764 kern_return_t kr;
12765 vm_page_info_basic_data_t info;
12766 mach_msg_type_number_t count;
12767
12768 count = VM_PAGE_INFO_BASIC_COUNT;
12769 kr = vm_map_page_info(target_map,
12770 offset,
12771 VM_PAGE_INFO_BASIC,
12772 (vm_page_info_t) &info,
12773 &count);
12774 if (kr == KERN_SUCCESS) {
12775 *disposition = info.disposition;
12776 *ref_count = info.ref_count;
12777 } else {
12778 *disposition = 0;
12779 *ref_count = 0;
12780 }
2d21ac55 12781
b0d623f7
A
12782 return kr;
12783}
12784
12785kern_return_t
12786vm_map_page_info(
12787 vm_map_t map,
12788 vm_map_offset_t offset,
12789 vm_page_info_flavor_t flavor,
12790 vm_page_info_t info,
12791 mach_msg_type_number_t *count)
12792{
12793 vm_map_entry_t map_entry;
12794 vm_object_t object;
12795 vm_page_t m;
12796 kern_return_t kr;
12797 kern_return_t retval = KERN_SUCCESS;
12798 boolean_t top_object;
12799 int disposition;
12800 int ref_count;
b0d623f7
A
12801 vm_page_info_basic_t basic_info;
12802 int depth;
6d2010ae 12803 vm_map_offset_t offset_in_page;
2d21ac55 12804
b0d623f7
A
12805 switch (flavor) {
12806 case VM_PAGE_INFO_BASIC:
12807 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
6d2010ae
A
12808 /*
12809 * The "vm_page_info_basic_data" structure was not
12810 * properly padded, so allow the size to be off by
12811 * one to maintain backwards binary compatibility...
12812 */
12813 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12814 return KERN_INVALID_ARGUMENT;
b0d623f7
A
12815 }
12816 break;
12817 default:
12818 return KERN_INVALID_ARGUMENT;
91447636 12819 }
2d21ac55 12820
b0d623f7
A
12821 disposition = 0;
12822 ref_count = 0;
b0d623f7
A
12823 top_object = TRUE;
12824 depth = 0;
12825
12826 retval = KERN_SUCCESS;
6d2010ae 12827 offset_in_page = offset & PAGE_MASK;
39236c6e 12828 offset = vm_map_trunc_page(offset, PAGE_MASK);
b0d623f7
A
12829
12830 vm_map_lock_read(map);
12831
12832 /*
12833 * First, find the map entry covering "offset", going down
12834 * submaps if necessary.
12835 */
12836 for (;;) {
12837 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12838 vm_map_unlock_read(map);
12839 return KERN_INVALID_ADDRESS;
12840 }
12841 /* compute offset from this map entry's start */
12842 offset -= map_entry->vme_start;
12843 /* compute offset into this map entry's object (or submap) */
12844 offset += map_entry->offset;
12845
12846 if (map_entry->is_sub_map) {
12847 vm_map_t sub_map;
2d21ac55
A
12848
12849 sub_map = map_entry->object.sub_map;
12850 vm_map_lock_read(sub_map);
b0d623f7 12851 vm_map_unlock_read(map);
2d21ac55 12852
b0d623f7
A
12853 map = sub_map;
12854
12855 ref_count = MAX(ref_count, map->ref_count);
12856 continue;
1c79356b 12857 }
b0d623f7 12858 break;
91447636 12859 }
b0d623f7
A
12860
12861 object = map_entry->object.vm_object;
12862 if (object == VM_OBJECT_NULL) {
12863 /* no object -> no page */
12864 vm_map_unlock_read(map);
12865 goto done;
12866 }
12867
91447636 12868 vm_object_lock(object);
b0d623f7
A
12869 vm_map_unlock_read(map);
12870
12871 /*
12872 * Go down the VM object shadow chain until we find the page
12873 * we're looking for.
12874 */
12875 for (;;) {
12876 ref_count = MAX(ref_count, object->ref_count);
2d21ac55 12877
91447636 12878 m = vm_page_lookup(object, offset);
2d21ac55 12879
91447636 12880 if (m != VM_PAGE_NULL) {
b0d623f7 12881 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
91447636
A
12882 break;
12883 } else {
2d21ac55
A
12884#if MACH_PAGEMAP
12885 if (object->existence_map) {
b0d623f7
A
12886 if (vm_external_state_get(object->existence_map,
12887 offset) ==
12888 VM_EXTERNAL_STATE_EXISTS) {
2d21ac55
A
12889 /*
12890 * this page has been paged out
12891 */
b0d623f7 12892 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
2d21ac55
A
12893 break;
12894 }
12895 } else
12896#endif
39236c6e
A
12897 if (object->internal &&
12898 object->alive &&
12899 !object->terminating &&
12900 object->pager_ready) {
12901
12902 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12903 if (VM_COMPRESSOR_PAGER_STATE_GET(
12904 object,
12905 offset)
12906 == VM_EXTERNAL_STATE_EXISTS) {
12907 /* the pager has that page */
12908 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12909 break;
12910 }
12911 } else {
b0d623f7 12912 memory_object_t pager;
2d21ac55 12913
b0d623f7
A
12914 vm_object_paging_begin(object);
12915 pager = object->pager;
12916 vm_object_unlock(object);
2d21ac55 12917
2d21ac55 12918 /*
b0d623f7
A
12919 * Ask the default pager if
12920 * it has this page.
2d21ac55 12921 */
b0d623f7
A
12922 kr = memory_object_data_request(
12923 pager,
12924 offset + object->paging_offset,
12925 0, /* just poke the pager */
12926 VM_PROT_READ,
12927 NULL);
12928
12929 vm_object_lock(object);
12930 vm_object_paging_end(object);
12931
12932 if (kr == KERN_SUCCESS) {
12933 /* the default pager has it */
12934 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12935 break;
12936 }
2d21ac55
A
12937 }
12938 }
b0d623f7 12939
2d21ac55
A
12940 if (object->shadow != VM_OBJECT_NULL) {
12941 vm_object_t shadow;
12942
6d2010ae 12943 offset += object->vo_shadow_offset;
2d21ac55
A
12944 shadow = object->shadow;
12945
12946 vm_object_lock(shadow);
12947 vm_object_unlock(object);
12948
12949 object = shadow;
12950 top_object = FALSE;
b0d623f7 12951 depth++;
2d21ac55 12952 } else {
b0d623f7
A
12953// if (!object->internal)
12954// break;
12955// retval = KERN_FAILURE;
12956// goto done_with_object;
12957 break;
91447636 12958 }
91447636
A
12959 }
12960 }
91447636
A
12961 /* The ref_count is not strictly accurate, it measures the number */
12962 /* of entities holding a ref on the object, they may not be mapping */
12963 /* the object or may not be mapping the section holding the */
12964 /* target page but its still a ball park number and though an over- */
12965 /* count, it picks up the copy-on-write cases */
1c79356b 12966
91447636
A
12967 /* We could also get a picture of page sharing from pmap_attributes */
12968 /* but this would under count as only faulted-in mappings would */
12969 /* show up. */
1c79356b 12970
2d21ac55 12971 if (top_object == TRUE && object->shadow)
b0d623f7
A
12972 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12973
12974 if (! object->internal)
12975 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
2d21ac55
A
12976
12977 if (m == VM_PAGE_NULL)
b0d623f7 12978 goto done_with_object;
2d21ac55 12979
91447636 12980 if (m->fictitious) {
b0d623f7
A
12981 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12982 goto done_with_object;
91447636 12983 }
2d21ac55 12984 if (m->dirty || pmap_is_modified(m->phys_page))
b0d623f7 12985 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
1c79356b 12986
2d21ac55 12987 if (m->reference || pmap_is_referenced(m->phys_page))
b0d623f7 12988 disposition |= VM_PAGE_QUERY_PAGE_REF;
1c79356b 12989
2d21ac55 12990 if (m->speculative)
b0d623f7 12991 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
1c79356b 12992
593a1d5f 12993 if (m->cs_validated)
b0d623f7 12994 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
593a1d5f 12995 if (m->cs_tainted)
b0d623f7 12996 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
593a1d5f 12997
b0d623f7 12998done_with_object:
2d21ac55 12999 vm_object_unlock(object);
b0d623f7
A
13000done:
13001
13002 switch (flavor) {
13003 case VM_PAGE_INFO_BASIC:
13004 basic_info = (vm_page_info_basic_t) info;
13005 basic_info->disposition = disposition;
13006 basic_info->ref_count = ref_count;
39236c6e
A
13007 basic_info->object_id = (vm_object_id_t) (uintptr_t)
13008 VM_KERNEL_ADDRPERM(object);
6d2010ae
A
13009 basic_info->offset =
13010 (memory_object_offset_t) offset + offset_in_page;
b0d623f7
A
13011 basic_info->depth = depth;
13012 break;
13013 }
0c530ab8 13014
2d21ac55 13015 return retval;
91447636
A
13016}
13017
13018/*
13019 * vm_map_msync
13020 *
13021 * Synchronises the memory range specified with its backing store
13022 * image by either flushing or cleaning the contents to the appropriate
13023 * memory manager engaging in a memory object synchronize dialog with
13024 * the manager. The client doesn't return until the manager issues
13025 * m_o_s_completed message. MIG Magically converts user task parameter
13026 * to the task's address map.
13027 *
13028 * interpretation of sync_flags
13029 * VM_SYNC_INVALIDATE - discard pages, only return precious
13030 * pages to manager.
13031 *
13032 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13033 * - discard pages, write dirty or precious
13034 * pages back to memory manager.
13035 *
13036 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13037 * - write dirty or precious pages back to
13038 * the memory manager.
13039 *
13040 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
13041 * is a hole in the region, and we would
13042 * have returned KERN_SUCCESS, return
13043 * KERN_INVALID_ADDRESS instead.
13044 *
13045 * NOTE
13046 * The memory object attributes have not yet been implemented, this
13047 * function will have to deal with the invalidate attribute
13048 *
13049 * RETURNS
13050 * KERN_INVALID_TASK Bad task parameter
13051 * KERN_INVALID_ARGUMENT both sync and async were specified.
13052 * KERN_SUCCESS The usual.
13053 * KERN_INVALID_ADDRESS There was a hole in the region.
13054 */
13055
13056kern_return_t
13057vm_map_msync(
13058 vm_map_t map,
13059 vm_map_address_t address,
13060 vm_map_size_t size,
13061 vm_sync_t sync_flags)
13062{
13063 msync_req_t msr;
13064 msync_req_t new_msr;
13065 queue_chain_t req_q; /* queue of requests for this msync */
13066 vm_map_entry_t entry;
13067 vm_map_size_t amount_left;
13068 vm_object_offset_t offset;
13069 boolean_t do_sync_req;
91447636 13070 boolean_t had_hole = FALSE;
2d21ac55 13071 memory_object_t pager;
91447636
A
13072
13073 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13074 (sync_flags & VM_SYNC_SYNCHRONOUS))
13075 return(KERN_INVALID_ARGUMENT);
1c79356b
A
13076
13077 /*
91447636 13078 * align address and size on page boundaries
1c79356b 13079 */
39236c6e
A
13080 size = (vm_map_round_page(address + size,
13081 VM_MAP_PAGE_MASK(map)) -
13082 vm_map_trunc_page(address,
13083 VM_MAP_PAGE_MASK(map)));
13084 address = vm_map_trunc_page(address,
13085 VM_MAP_PAGE_MASK(map));
1c79356b 13086
91447636
A
13087 if (map == VM_MAP_NULL)
13088 return(KERN_INVALID_TASK);
1c79356b 13089
91447636
A
13090 if (size == 0)
13091 return(KERN_SUCCESS);
1c79356b 13092
91447636
A
13093 queue_init(&req_q);
13094 amount_left = size;
1c79356b 13095
91447636
A
13096 while (amount_left > 0) {
13097 vm_object_size_t flush_size;
13098 vm_object_t object;
1c79356b 13099
91447636
A
13100 vm_map_lock(map);
13101 if (!vm_map_lookup_entry(map,
39236c6e
A
13102 vm_map_trunc_page(
13103 address,
13104 VM_MAP_PAGE_MASK(map)),
13105 &entry)) {
91447636 13106
2d21ac55 13107 vm_map_size_t skip;
91447636
A
13108
13109 /*
13110 * hole in the address map.
13111 */
13112 had_hole = TRUE;
13113
13114 /*
13115 * Check for empty map.
13116 */
13117 if (entry == vm_map_to_entry(map) &&
13118 entry->vme_next == entry) {
13119 vm_map_unlock(map);
13120 break;
13121 }
13122 /*
13123 * Check that we don't wrap and that
13124 * we have at least one real map entry.
13125 */
13126 if ((map->hdr.nentries == 0) ||
13127 (entry->vme_next->vme_start < address)) {
13128 vm_map_unlock(map);
13129 break;
13130 }
13131 /*
13132 * Move up to the next entry if needed
13133 */
13134 skip = (entry->vme_next->vme_start - address);
13135 if (skip >= amount_left)
13136 amount_left = 0;
13137 else
13138 amount_left -= skip;
13139 address = entry->vme_next->vme_start;
13140 vm_map_unlock(map);
13141 continue;
13142 }
1c79356b 13143
91447636 13144 offset = address - entry->vme_start;
1c79356b 13145
91447636
A
13146 /*
13147 * do we have more to flush than is contained in this
13148 * entry ?
13149 */
13150 if (amount_left + entry->vme_start + offset > entry->vme_end) {
13151 flush_size = entry->vme_end -
2d21ac55 13152 (entry->vme_start + offset);
91447636
A
13153 } else {
13154 flush_size = amount_left;
13155 }
13156 amount_left -= flush_size;
13157 address += flush_size;
1c79356b 13158
91447636
A
13159 if (entry->is_sub_map == TRUE) {
13160 vm_map_t local_map;
13161 vm_map_offset_t local_offset;
1c79356b 13162
91447636
A
13163 local_map = entry->object.sub_map;
13164 local_offset = entry->offset;
13165 vm_map_unlock(map);
13166 if (vm_map_msync(
2d21ac55
A
13167 local_map,
13168 local_offset,
13169 flush_size,
13170 sync_flags) == KERN_INVALID_ADDRESS) {
91447636
A
13171 had_hole = TRUE;
13172 }
13173 continue;
13174 }
13175 object = entry->object.vm_object;
1c79356b 13176
91447636
A
13177 /*
13178 * We can't sync this object if the object has not been
13179 * created yet
13180 */
13181 if (object == VM_OBJECT_NULL) {
13182 vm_map_unlock(map);
13183 continue;
13184 }
13185 offset += entry->offset;
1c79356b 13186
91447636 13187 vm_object_lock(object);
1c79356b 13188
91447636 13189 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
b0d623f7
A
13190 int kill_pages = 0;
13191 boolean_t reusable_pages = FALSE;
91447636
A
13192
13193 if (sync_flags & VM_SYNC_KILLPAGES) {
b0d623f7 13194 if (object->ref_count == 1 && !object->shadow)
91447636
A
13195 kill_pages = 1;
13196 else
13197 kill_pages = -1;
13198 }
13199 if (kill_pages != -1)
13200 vm_object_deactivate_pages(object, offset,
b0d623f7 13201 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
91447636
A
13202 vm_object_unlock(object);
13203 vm_map_unlock(map);
13204 continue;
1c79356b 13205 }
91447636
A
13206 /*
13207 * We can't sync this object if there isn't a pager.
13208 * Don't bother to sync internal objects, since there can't
13209 * be any "permanent" storage for these objects anyway.
13210 */
13211 if ((object->pager == MEMORY_OBJECT_NULL) ||
13212 (object->internal) || (object->private)) {
13213 vm_object_unlock(object);
13214 vm_map_unlock(map);
13215 continue;
13216 }
13217 /*
13218 * keep reference on the object until syncing is done
13219 */
2d21ac55 13220 vm_object_reference_locked(object);
91447636 13221 vm_object_unlock(object);
1c79356b 13222
91447636 13223 vm_map_unlock(map);
1c79356b 13224
91447636 13225 do_sync_req = vm_object_sync(object,
2d21ac55
A
13226 offset,
13227 flush_size,
13228 sync_flags & VM_SYNC_INVALIDATE,
b0d623f7
A
13229 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13230 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
2d21ac55 13231 sync_flags & VM_SYNC_SYNCHRONOUS);
91447636
A
13232 /*
13233 * only send a m_o_s if we returned pages or if the entry
13234 * is writable (ie dirty pages may have already been sent back)
13235 */
b0d623f7 13236 if (!do_sync_req) {
2d21ac55
A
13237 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13238 /*
13239 * clear out the clustering and read-ahead hints
13240 */
13241 vm_object_lock(object);
13242
13243 object->pages_created = 0;
13244 object->pages_used = 0;
13245 object->sequential = 0;
13246 object->last_alloc = 0;
13247
13248 vm_object_unlock(object);
13249 }
91447636
A
13250 vm_object_deallocate(object);
13251 continue;
1c79356b 13252 }
91447636 13253 msync_req_alloc(new_msr);
1c79356b 13254
91447636
A
13255 vm_object_lock(object);
13256 offset += object->paging_offset;
1c79356b 13257
91447636
A
13258 new_msr->offset = offset;
13259 new_msr->length = flush_size;
13260 new_msr->object = object;
13261 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
2d21ac55
A
13262 re_iterate:
13263
13264 /*
13265 * We can't sync this object if there isn't a pager. The
13266 * pager can disappear anytime we're not holding the object
13267 * lock. So this has to be checked anytime we goto re_iterate.
13268 */
13269
13270 pager = object->pager;
13271
13272 if (pager == MEMORY_OBJECT_NULL) {
13273 vm_object_unlock(object);
13274 vm_object_deallocate(object);
39236c6e
A
13275 msync_req_free(new_msr);
13276 new_msr = NULL;
2d21ac55
A
13277 continue;
13278 }
13279
91447636
A
13280 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
13281 /*
13282 * need to check for overlapping entry, if found, wait
13283 * on overlapping msr to be done, then reiterate
13284 */
13285 msr_lock(msr);
13286 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
13287 ((offset >= msr->offset &&
13288 offset < (msr->offset + msr->length)) ||
13289 (msr->offset >= offset &&
13290 msr->offset < (offset + flush_size))))
13291 {
13292 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
13293 msr_unlock(msr);
13294 vm_object_unlock(object);
13295 thread_block(THREAD_CONTINUE_NULL);
13296 vm_object_lock(object);
13297 goto re_iterate;
13298 }
13299 msr_unlock(msr);
13300 }/* queue_iterate */
1c79356b 13301
91447636 13302 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
2d21ac55
A
13303
13304 vm_object_paging_begin(object);
91447636 13305 vm_object_unlock(object);
1c79356b 13306
91447636
A
13307 queue_enter(&req_q, new_msr, msync_req_t, req_q);
13308
13309 (void) memory_object_synchronize(
2d21ac55
A
13310 pager,
13311 offset,
13312 flush_size,
13313 sync_flags & ~VM_SYNC_CONTIGUOUS);
13314
13315 vm_object_lock(object);
13316 vm_object_paging_end(object);
13317 vm_object_unlock(object);
91447636
A
13318 }/* while */
13319
13320 /*
13321 * wait for memory_object_sychronize_completed messages from pager(s)
13322 */
13323
13324 while (!queue_empty(&req_q)) {
13325 msr = (msync_req_t)queue_first(&req_q);
13326 msr_lock(msr);
13327 while(msr->flag != VM_MSYNC_DONE) {
13328 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
13329 msr_unlock(msr);
13330 thread_block(THREAD_CONTINUE_NULL);
13331 msr_lock(msr);
13332 }/* while */
13333 queue_remove(&req_q, msr, msync_req_t, req_q);
13334 msr_unlock(msr);
13335 vm_object_deallocate(msr->object);
13336 msync_req_free(msr);
13337 }/* queue_iterate */
13338
13339 /* for proper msync() behaviour */
13340 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
13341 return(KERN_INVALID_ADDRESS);
13342
13343 return(KERN_SUCCESS);
13344}/* vm_msync */
1c79356b 13345
1c79356b 13346/*
91447636
A
13347 * Routine: convert_port_entry_to_map
13348 * Purpose:
13349 * Convert from a port specifying an entry or a task
13350 * to a map. Doesn't consume the port ref; produces a map ref,
13351 * which may be null. Unlike convert_port_to_map, the
13352 * port may be task or a named entry backed.
13353 * Conditions:
13354 * Nothing locked.
1c79356b 13355 */
1c79356b 13356
1c79356b 13357
91447636
A
13358vm_map_t
13359convert_port_entry_to_map(
13360 ipc_port_t port)
13361{
13362 vm_map_t map;
13363 vm_named_entry_t named_entry;
2d21ac55 13364 uint32_t try_failed_count = 0;
1c79356b 13365
91447636
A
13366 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13367 while(TRUE) {
13368 ip_lock(port);
13369 if(ip_active(port) && (ip_kotype(port)
2d21ac55 13370 == IKOT_NAMED_ENTRY)) {
91447636 13371 named_entry =
2d21ac55 13372 (vm_named_entry_t)port->ip_kobject;
b0d623f7 13373 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 13374 ip_unlock(port);
2d21ac55
A
13375
13376 try_failed_count++;
13377 mutex_pause(try_failed_count);
91447636
A
13378 continue;
13379 }
13380 named_entry->ref_count++;
b0d623f7 13381 lck_mtx_unlock(&(named_entry)->Lock);
91447636
A
13382 ip_unlock(port);
13383 if ((named_entry->is_sub_map) &&
2d21ac55
A
13384 (named_entry->protection
13385 & VM_PROT_WRITE)) {
91447636
A
13386 map = named_entry->backing.map;
13387 } else {
13388 mach_destroy_memory_entry(port);
13389 return VM_MAP_NULL;
13390 }
13391 vm_map_reference_swap(map);
13392 mach_destroy_memory_entry(port);
13393 break;
13394 }
13395 else
13396 return VM_MAP_NULL;
13397 }
1c79356b 13398 }
91447636
A
13399 else
13400 map = convert_port_to_map(port);
1c79356b 13401
91447636
A
13402 return map;
13403}
1c79356b 13404
91447636
A
13405/*
13406 * Routine: convert_port_entry_to_object
13407 * Purpose:
13408 * Convert from a port specifying a named entry to an
13409 * object. Doesn't consume the port ref; produces a map ref,
13410 * which may be null.
13411 * Conditions:
13412 * Nothing locked.
13413 */
1c79356b 13414
1c79356b 13415
91447636
A
13416vm_object_t
13417convert_port_entry_to_object(
13418 ipc_port_t port)
13419{
39236c6e 13420 vm_object_t object = VM_OBJECT_NULL;
91447636 13421 vm_named_entry_t named_entry;
39236c6e
A
13422 uint32_t try_failed_count = 0;
13423
13424 if (IP_VALID(port) &&
13425 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13426 try_again:
13427 ip_lock(port);
13428 if (ip_active(port) &&
13429 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13430 named_entry = (vm_named_entry_t)port->ip_kobject;
13431 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
91447636 13432 ip_unlock(port);
39236c6e
A
13433 try_failed_count++;
13434 mutex_pause(try_failed_count);
13435 goto try_again;
13436 }
13437 named_entry->ref_count++;
13438 lck_mtx_unlock(&(named_entry)->Lock);
13439 ip_unlock(port);
13440 if (!(named_entry->is_sub_map) &&
13441 !(named_entry->is_pager) &&
13442 !(named_entry->is_copy) &&
13443 (named_entry->protection & VM_PROT_WRITE)) {
13444 object = named_entry->backing.object;
13445 vm_object_reference(object);
91447636 13446 }
39236c6e 13447 mach_destroy_memory_entry(port);
1c79356b 13448 }
1c79356b 13449 }
91447636
A
13450
13451 return object;
1c79356b 13452}
9bccf70c
A
13453
13454/*
91447636
A
13455 * Export routines to other components for the things we access locally through
13456 * macros.
9bccf70c 13457 */
91447636
A
13458#undef current_map
13459vm_map_t
13460current_map(void)
9bccf70c 13461{
91447636 13462 return (current_map_fast());
9bccf70c
A
13463}
13464
13465/*
13466 * vm_map_reference:
13467 *
13468 * Most code internal to the osfmk will go through a
13469 * macro defining this. This is always here for the
13470 * use of other kernel components.
13471 */
13472#undef vm_map_reference
13473void
13474vm_map_reference(
13475 register vm_map_t map)
13476{
13477 if (map == VM_MAP_NULL)
13478 return;
13479
b0d623f7 13480 lck_mtx_lock(&map->s_lock);
9bccf70c
A
13481#if TASK_SWAPPER
13482 assert(map->res_count > 0);
13483 assert(map->ref_count >= map->res_count);
13484 map->res_count++;
13485#endif
13486 map->ref_count++;
b0d623f7 13487 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
13488}
13489
13490/*
13491 * vm_map_deallocate:
13492 *
13493 * Removes a reference from the specified map,
13494 * destroying it if no references remain.
13495 * The map should not be locked.
13496 */
13497void
13498vm_map_deallocate(
13499 register vm_map_t map)
13500{
13501 unsigned int ref;
13502
13503 if (map == VM_MAP_NULL)
13504 return;
13505
b0d623f7 13506 lck_mtx_lock(&map->s_lock);
9bccf70c
A
13507 ref = --map->ref_count;
13508 if (ref > 0) {
13509 vm_map_res_deallocate(map);
b0d623f7 13510 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
13511 return;
13512 }
13513 assert(map->ref_count == 0);
b0d623f7 13514 lck_mtx_unlock(&map->s_lock);
9bccf70c
A
13515
13516#if TASK_SWAPPER
13517 /*
13518 * The map residence count isn't decremented here because
13519 * the vm_map_delete below will traverse the entire map,
13520 * deleting entries, and the residence counts on objects
13521 * and sharing maps will go away then.
13522 */
13523#endif
13524
2d21ac55 13525 vm_map_destroy(map, VM_MAP_NO_FLAGS);
0c530ab8 13526}
91447636 13527
91447636 13528
0c530ab8
A
13529void
13530vm_map_disable_NX(vm_map_t map)
13531{
13532 if (map == NULL)
13533 return;
13534 if (map->pmap == NULL)
13535 return;
13536
13537 pmap_disable_NX(map->pmap);
13538}
13539
6d2010ae
A
13540void
13541vm_map_disallow_data_exec(vm_map_t map)
13542{
13543 if (map == NULL)
13544 return;
13545
13546 map->map_disallow_data_exec = TRUE;
13547}
13548
0c530ab8
A
13549/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
13550 * more descriptive.
13551 */
13552void
13553vm_map_set_32bit(vm_map_t map)
13554{
13555 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
13556}
13557
13558
13559void
13560vm_map_set_64bit(vm_map_t map)
13561{
13562 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
13563}
13564
13565vm_map_offset_t
13566vm_compute_max_offset(unsigned is64)
13567{
13568 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
13569}
13570
39236c6e
A
13571uint64_t
13572vm_map_get_max_aslr_slide_pages(vm_map_t map)
13573{
13574 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
13575}
13576
0c530ab8 13577boolean_t
2d21ac55
A
13578vm_map_is_64bit(
13579 vm_map_t map)
13580{
13581 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
13582}
13583
13584boolean_t
316670eb
A
13585vm_map_has_hard_pagezero(
13586 vm_map_t map,
13587 vm_map_offset_t pagezero_size)
0c530ab8
A
13588{
13589 /*
13590 * XXX FBDP
13591 * We should lock the VM map (for read) here but we can get away
13592 * with it for now because there can't really be any race condition:
13593 * the VM map's min_offset is changed only when the VM map is created
13594 * and when the zero page is established (when the binary gets loaded),
13595 * and this routine gets called only when the task terminates and the
13596 * VM map is being torn down, and when a new map is created via
13597 * load_machfile()/execve().
13598 */
316670eb 13599 return (map->min_offset >= pagezero_size);
0c530ab8
A
13600}
13601
13602void
13603vm_map_set_4GB_pagezero(vm_map_t map)
13604{
b0d623f7 13605#pragma unused(map)
b0d623f7 13606
0c530ab8
A
13607}
13608
13609void
13610vm_map_clear_4GB_pagezero(vm_map_t map)
13611{
b0d623f7 13612#pragma unused(map)
0c530ab8
A
13613}
13614
316670eb
A
13615/*
13616 * Raise a VM map's maximun offset.
13617 */
13618kern_return_t
13619vm_map_raise_max_offset(
13620 vm_map_t map,
13621 vm_map_offset_t new_max_offset)
13622{
13623 kern_return_t ret;
13624
13625 vm_map_lock(map);
13626 ret = KERN_INVALID_ADDRESS;
13627
13628 if (new_max_offset >= map->max_offset) {
13629 if (!vm_map_is_64bit(map)) {
13630 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
13631 map->max_offset = new_max_offset;
13632 ret = KERN_SUCCESS;
13633 }
13634 } else {
13635 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
13636 map->max_offset = new_max_offset;
13637 ret = KERN_SUCCESS;
13638 }
13639 }
13640 }
13641
13642 vm_map_unlock(map);
13643 return ret;
13644}
13645
13646
0c530ab8
A
13647/*
13648 * Raise a VM map's minimum offset.
13649 * To strictly enforce "page zero" reservation.
13650 */
13651kern_return_t
13652vm_map_raise_min_offset(
13653 vm_map_t map,
13654 vm_map_offset_t new_min_offset)
13655{
13656 vm_map_entry_t first_entry;
13657
39236c6e
A
13658 new_min_offset = vm_map_round_page(new_min_offset,
13659 VM_MAP_PAGE_MASK(map));
0c530ab8
A
13660
13661 vm_map_lock(map);
13662
13663 if (new_min_offset < map->min_offset) {
13664 /*
13665 * Can't move min_offset backwards, as that would expose
13666 * a part of the address space that was previously, and for
13667 * possibly good reasons, inaccessible.
13668 */
13669 vm_map_unlock(map);
13670 return KERN_INVALID_ADDRESS;
13671 }
13672
13673 first_entry = vm_map_first_entry(map);
13674 if (first_entry != vm_map_to_entry(map) &&
13675 first_entry->vme_start < new_min_offset) {
13676 /*
13677 * Some memory was already allocated below the new
13678 * minimun offset. It's too late to change it now...
13679 */
13680 vm_map_unlock(map);
13681 return KERN_NO_SPACE;
13682 }
13683
13684 map->min_offset = new_min_offset;
13685
13686 vm_map_unlock(map);
13687
13688 return KERN_SUCCESS;
13689}
2d21ac55
A
13690
13691/*
13692 * Set the limit on the maximum amount of user wired memory allowed for this map.
13693 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13694 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
13695 * don't have to reach over to the BSD data structures.
13696 */
13697
13698void
13699vm_map_set_user_wire_limit(vm_map_t map,
13700 vm_size_t limit)
13701{
13702 map->user_wire_limit = limit;
13703}
593a1d5f 13704
b0d623f7
A
13705
13706void vm_map_switch_protect(vm_map_t map,
13707 boolean_t val)
593a1d5f
A
13708{
13709 vm_map_lock(map);
b0d623f7 13710 map->switch_protect=val;
593a1d5f 13711 vm_map_unlock(map);
b0d623f7 13712}
b7266188 13713
39236c6e
A
13714/*
13715 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
13716 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
13717 * bump both counters.
13718 */
13719void
13720vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
13721{
13722 pmap_t pmap = vm_map_pmap(map);
13723
13724 ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13725 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13726}
13727
13728void
13729vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
13730{
13731 pmap_t pmap = vm_map_pmap(map);
13732
13733 ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13734 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13735}
13736
b7266188
A
13737/* Add (generate) code signature for memory range */
13738#if CONFIG_DYNAMIC_CODE_SIGNING
13739kern_return_t vm_map_sign(vm_map_t map,
13740 vm_map_offset_t start,
13741 vm_map_offset_t end)
13742{
13743 vm_map_entry_t entry;
13744 vm_page_t m;
13745 vm_object_t object;
13746
13747 /*
13748 * Vet all the input parameters and current type and state of the
13749 * underlaying object. Return with an error if anything is amiss.
13750 */
13751 if (map == VM_MAP_NULL)
13752 return(KERN_INVALID_ARGUMENT);
13753
13754 vm_map_lock_read(map);
13755
13756 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13757 /*
13758 * Must pass a valid non-submap address.
13759 */
13760 vm_map_unlock_read(map);
13761 return(KERN_INVALID_ADDRESS);
13762 }
13763
13764 if((entry->vme_start > start) || (entry->vme_end < end)) {
13765 /*
13766 * Map entry doesn't cover the requested range. Not handling
13767 * this situation currently.
13768 */
13769 vm_map_unlock_read(map);
13770 return(KERN_INVALID_ARGUMENT);
13771 }
13772
13773 object = entry->object.vm_object;
13774 if (object == VM_OBJECT_NULL) {
13775 /*
13776 * Object must already be present or we can't sign.
13777 */
13778 vm_map_unlock_read(map);
13779 return KERN_INVALID_ARGUMENT;
13780 }
13781
13782 vm_object_lock(object);
13783 vm_map_unlock_read(map);
13784
13785 while(start < end) {
13786 uint32_t refmod;
13787
13788 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13789 if (m==VM_PAGE_NULL) {
13790 /* shoud we try to fault a page here? we can probably
13791 * demand it exists and is locked for this request */
13792 vm_object_unlock(object);
13793 return KERN_FAILURE;
13794 }
13795 /* deal with special page status */
13796 if (m->busy ||
13797 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13798 vm_object_unlock(object);
13799 return KERN_FAILURE;
13800 }
13801
13802 /* Page is OK... now "validate" it */
13803 /* This is the place where we'll call out to create a code
13804 * directory, later */
13805 m->cs_validated = TRUE;
13806
13807 /* The page is now "clean" for codesigning purposes. That means
13808 * we don't consider it as modified (wpmapped) anymore. But
13809 * we'll disconnect the page so we note any future modification
13810 * attempts. */
13811 m->wpmapped = FALSE;
13812 refmod = pmap_disconnect(m->phys_page);
13813
13814 /* Pull the dirty status from the pmap, since we cleared the
13815 * wpmapped bit */
13816 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
316670eb 13817 SET_PAGE_DIRTY(m, FALSE);
b7266188
A
13818 }
13819
13820 /* On to the next page */
13821 start += PAGE_SIZE;
13822 }
13823 vm_object_unlock(object);
13824
13825 return KERN_SUCCESS;
13826}
13827#endif
6d2010ae
A
13828
13829#if CONFIG_FREEZE
13830
13831kern_return_t vm_map_freeze_walk(
13832 vm_map_t map,
13833 unsigned int *purgeable_count,
13834 unsigned int *wired_count,
13835 unsigned int *clean_count,
13836 unsigned int *dirty_count,
316670eb 13837 unsigned int dirty_budget,
6d2010ae
A
13838 boolean_t *has_shared)
13839{
13840 vm_map_entry_t entry;
13841
13842 vm_map_lock_read(map);
13843
13844 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13845 *has_shared = FALSE;
13846
13847 for (entry = vm_map_first_entry(map);
13848 entry != vm_map_to_entry(map);
13849 entry = entry->vme_next) {
13850 unsigned int purgeable, clean, dirty, wired;
13851 boolean_t shared;
13852
13853 if ((entry->object.vm_object == 0) ||
13854 (entry->is_sub_map) ||
13855 (entry->object.vm_object->phys_contiguous)) {
13856 continue;
13857 }
13858
316670eb 13859 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
6d2010ae
A
13860
13861 *purgeable_count += purgeable;
13862 *wired_count += wired;
13863 *clean_count += clean;
13864 *dirty_count += dirty;
13865
13866 if (shared) {
13867 *has_shared = TRUE;
13868 }
316670eb
A
13869
13870 /* Adjust pageout budget and finish up if reached */
13871 if (dirty_budget) {
13872 dirty_budget -= dirty;
13873 if (dirty_budget == 0) {
13874 break;
13875 }
13876 }
6d2010ae
A
13877 }
13878
13879 vm_map_unlock_read(map);
13880
13881 return KERN_SUCCESS;
13882}
13883
13884kern_return_t vm_map_freeze(
13885 vm_map_t map,
13886 unsigned int *purgeable_count,
13887 unsigned int *wired_count,
13888 unsigned int *clean_count,
13889 unsigned int *dirty_count,
316670eb 13890 unsigned int dirty_budget,
6d2010ae
A
13891 boolean_t *has_shared)
13892{
39236c6e
A
13893 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13894 kern_return_t kr = KERN_SUCCESS;
13895 boolean_t default_freezer_active = TRUE;
6d2010ae
A
13896
13897 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13898 *has_shared = FALSE;
13899
6d2010ae
A
13900 /*
13901 * We need the exclusive lock here so that we can
13902 * block any page faults or lookups while we are
13903 * in the middle of freezing this vm map.
13904 */
13905 vm_map_lock(map);
13906
39236c6e
A
13907 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13908 default_freezer_active = FALSE;
316670eb
A
13909 }
13910
39236c6e
A
13911 if (default_freezer_active) {
13912 if (map->default_freezer_handle == NULL) {
13913 map->default_freezer_handle = default_freezer_handle_allocate();
13914 }
13915
13916 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13917 /*
13918 * Can happen if default_freezer_handle passed in is NULL
13919 * Or, a table has already been allocated and associated
13920 * with this handle, i.e. the map is already frozen.
13921 */
13922 goto done;
13923 }
6d2010ae 13924 }
316670eb 13925
6d2010ae
A
13926 for (entry2 = vm_map_first_entry(map);
13927 entry2 != vm_map_to_entry(map);
13928 entry2 = entry2->vme_next) {
13929
13930 vm_object_t src_object = entry2->object.vm_object;
13931
6d2010ae 13932 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
39236c6e
A
13933 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13934 if (default_freezer_active) {
13935 unsigned int purgeable, clean, dirty, wired;
13936 boolean_t shared;
316670eb 13937
39236c6e
A
13938 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13939 src_object, map->default_freezer_handle);
13940
13941 *purgeable_count += purgeable;
13942 *wired_count += wired;
13943 *clean_count += clean;
13944 *dirty_count += dirty;
13945
13946 /* Adjust pageout budget and finish up if reached */
13947 if (dirty_budget) {
13948 dirty_budget -= dirty;
13949 if (dirty_budget == 0) {
13950 break;
13951 }
316670eb 13952 }
6d2010ae 13953
39236c6e
A
13954 if (shared) {
13955 *has_shared = TRUE;
13956 }
13957 } else {
13958 /*
13959 * To the compressor.
13960 */
13961 if (entry2->object.vm_object->internal == TRUE) {
13962 vm_object_pageout(entry2->object.vm_object);
13963 }
6d2010ae
A
13964 }
13965 }
13966 }
13967
39236c6e
A
13968 if (default_freezer_active) {
13969 /* Finally, throw out the pages to swap */
13970 default_freezer_pageout(map->default_freezer_handle);
13971 }
6d2010ae
A
13972
13973done:
13974 vm_map_unlock(map);
6d2010ae
A
13975
13976 return kr;
13977}
13978
316670eb 13979kern_return_t
6d2010ae
A
13980vm_map_thaw(
13981 vm_map_t map)
13982{
316670eb 13983 kern_return_t kr = KERN_SUCCESS;
6d2010ae 13984
39236c6e
A
13985 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13986 /*
13987 * We will on-demand thaw in the presence of the compressed pager.
13988 */
13989 return kr;
13990 }
13991
6d2010ae
A
13992 vm_map_lock(map);
13993
316670eb 13994 if (map->default_freezer_handle == NULL) {
6d2010ae
A
13995 /*
13996 * This map is not in a frozen state.
13997 */
316670eb 13998 kr = KERN_FAILURE;
6d2010ae
A
13999 goto out;
14000 }
6d2010ae 14001
39236c6e 14002 kr = default_freezer_unpack(map->default_freezer_handle);
6d2010ae
A
14003out:
14004 vm_map_unlock(map);
316670eb
A
14005
14006 return kr;
6d2010ae
A
14007}
14008#endif
e2d2fc5c 14009
e2d2fc5c
A
14010/*
14011 * vm_map_entry_should_cow_for_true_share:
14012 *
14013 * Determines if the map entry should be clipped and setup for copy-on-write
14014 * to avoid applying "true_share" to a large VM object when only a subset is
14015 * targeted.
14016 *
14017 * For now, we target only the map entries created for the Objective C
14018 * Garbage Collector, which initially have the following properties:
14019 * - alias == VM_MEMORY_MALLOC
14020 * - wired_count == 0
14021 * - !needs_copy
14022 * and a VM object with:
14023 * - internal
14024 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14025 * - !true_share
14026 * - vo_size == ANON_CHUNK_SIZE
14027 */
14028boolean_t
14029vm_map_entry_should_cow_for_true_share(
14030 vm_map_entry_t entry)
14031{
14032 vm_object_t object;
14033
14034 if (entry->is_sub_map) {
14035 /* entry does not point at a VM object */
14036 return FALSE;
14037 }
14038
14039 if (entry->needs_copy) {
14040 /* already set for copy_on_write: done! */
14041 return FALSE;
14042 }
14043
14044 if (entry->alias != VM_MEMORY_MALLOC) {
14045 /* not tagged as an ObjectiveC's Garbage Collector entry */
14046 return FALSE;
14047 }
14048
14049 if (entry->wired_count) {
14050 /* wired: can't change the map entry... */
14051 return FALSE;
14052 }
14053
14054 object = entry->object.vm_object;
14055
14056 if (object == VM_OBJECT_NULL) {
14057 /* no object yet... */
14058 return FALSE;
14059 }
14060
14061 if (!object->internal) {
14062 /* not an internal object */
14063 return FALSE;
14064 }
14065
14066 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14067 /* not the default copy strategy */
14068 return FALSE;
14069 }
14070
14071 if (object->true_share) {
14072 /* already true_share: too late to avoid it */
14073 return FALSE;
14074 }
14075
14076 if (object->vo_size != ANON_CHUNK_SIZE) {
14077 /* not an object created for the ObjC Garbage Collector */
14078 return FALSE;
14079 }
14080
14081 /*
14082 * All the criteria match: we have a large object being targeted for "true_share".
14083 * To limit the adverse side-effects linked with "true_share", tell the caller to
14084 * try and avoid setting up the entire object for "true_share" by clipping the
14085 * targeted range and setting it up for copy-on-write.
14086 */
14087 return TRUE;
14088}
39236c6e
A
14089
14090vm_map_offset_t
14091vm_map_round_page_mask(
14092 vm_map_offset_t offset,
14093 vm_map_offset_t mask)
14094{
14095 return VM_MAP_ROUND_PAGE(offset, mask);
14096}
14097
14098vm_map_offset_t
14099vm_map_trunc_page_mask(
14100 vm_map_offset_t offset,
14101 vm_map_offset_t mask)
14102{
14103 return VM_MAP_TRUNC_PAGE(offset, mask);
14104}
14105
14106int
14107vm_map_page_shift(
14108 vm_map_t map)
14109{
14110 return VM_MAP_PAGE_SHIFT(map);
14111}
14112
14113int
14114vm_map_page_size(
14115 vm_map_t map)
14116{
14117 return VM_MAP_PAGE_SIZE(map);
14118}
14119
14120int
14121vm_map_page_mask(
14122 vm_map_t map)
14123{
14124 return VM_MAP_PAGE_MASK(map);
14125}
14126
14127kern_return_t
14128vm_map_set_page_shift(
14129 vm_map_t map,
14130 int pageshift)
14131{
14132 if (map->hdr.nentries != 0) {
14133 /* too late to change page size */
14134 return KERN_FAILURE;
14135 }
14136
14137 map->hdr.page_shift = pageshift;
14138
14139 return KERN_SUCCESS;
14140}
14141
14142kern_return_t
14143vm_map_query_volatile(
14144 vm_map_t map,
14145 mach_vm_size_t *volatile_virtual_size_p,
14146 mach_vm_size_t *volatile_resident_size_p,
14147 mach_vm_size_t *volatile_pmap_size_p)
14148{
14149 mach_vm_size_t volatile_virtual_size;
14150 mach_vm_size_t volatile_resident_count;
14151 mach_vm_size_t volatile_pmap_count;
14152 mach_vm_size_t resident_count;
14153 vm_map_entry_t entry;
14154 vm_object_t object;
14155
14156 /* map should be locked by caller */
14157
14158 volatile_virtual_size = 0;
14159 volatile_resident_count = 0;
14160 volatile_pmap_count = 0;
14161
14162 for (entry = vm_map_first_entry(map);
14163 entry != vm_map_to_entry(map);
14164 entry = entry->vme_next) {
14165 if (entry->is_sub_map) {
14166 continue;
14167 }
14168 if (! (entry->protection & VM_PROT_WRITE)) {
14169 continue;
14170 }
14171 object = entry->object.vm_object;
14172 if (object == VM_OBJECT_NULL) {
14173 continue;
14174 }
14175 if (object->purgable != VM_PURGABLE_VOLATILE) {
14176 continue;
14177 }
14178 if (entry->offset != 0) {
14179 /*
14180 * If the map entry has been split and the object now
14181 * appears several times in the VM map, we don't want
14182 * to count the object's resident_page_count more than
14183 * once. We count it only for the first one, starting
14184 * at offset 0 and ignore the other VM map entries.
14185 */
14186 continue;
14187 }
14188 resident_count = object->resident_page_count;
14189 if ((entry->offset / PAGE_SIZE) >= resident_count) {
14190 resident_count = 0;
14191 } else {
14192 resident_count -= (entry->offset / PAGE_SIZE);
14193 }
14194
14195 volatile_virtual_size += entry->vme_end - entry->vme_start;
14196 volatile_resident_count += resident_count;
14197 volatile_pmap_count += pmap_query_resident(map->pmap,
14198 entry->vme_start,
14199 entry->vme_end);
14200 }
14201
14202 /* map is still locked on return */
14203
14204 *volatile_virtual_size_p = volatile_virtual_size;
14205 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
14206 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
14207
14208 return KERN_SUCCESS;
14209}