]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
xnu-3248.40.184.tar.gz
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/counters.h>
86 #include <kern/kalloc.h>
87 #include <kern/zalloc.h>
88
89 #include <vm/cpm.h>
90 #include <vm/vm_compressor_pager.h>
91 #include <vm/vm_init.h>
92 #include <vm/vm_fault.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_object.h>
95 #include <vm/vm_page.h>
96 #include <vm/vm_pageout.h>
97 #include <vm/vm_kern.h>
98 #include <ipc/ipc_port.h>
99 #include <kern/sched_prim.h>
100 #include <kern/misc_protos.h>
101 #include <kern/xpr.h>
102
103 #include <mach/vm_map_server.h>
104 #include <mach/mach_host_server.h>
105 #include <vm/vm_protos.h>
106 #include <vm/vm_purgeable_internal.h>
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110 #include <vm/vm_map_store.h>
111
112
113 extern u_int32_t random(void); /* from <libkern/libkern.h> */
114 /* Internal prototypes
115 */
116
117 static void vm_map_simplify_range(
118 vm_map_t map,
119 vm_map_offset_t start,
120 vm_map_offset_t end); /* forward */
121
122 static boolean_t vm_map_range_check(
123 vm_map_t map,
124 vm_map_offset_t start,
125 vm_map_offset_t end,
126 vm_map_entry_t *entry);
127
128 static vm_map_entry_t _vm_map_entry_create(
129 struct vm_map_header *map_header, boolean_t map_locked);
130
131 static void _vm_map_entry_dispose(
132 struct vm_map_header *map_header,
133 vm_map_entry_t entry);
134
135 static void vm_map_pmap_enter(
136 vm_map_t map,
137 vm_map_offset_t addr,
138 vm_map_offset_t end_addr,
139 vm_object_t object,
140 vm_object_offset_t offset,
141 vm_prot_t protection);
142
143 static void _vm_map_clip_end(
144 struct vm_map_header *map_header,
145 vm_map_entry_t entry,
146 vm_map_offset_t end);
147
148 static void _vm_map_clip_start(
149 struct vm_map_header *map_header,
150 vm_map_entry_t entry,
151 vm_map_offset_t start);
152
153 static void vm_map_entry_delete(
154 vm_map_t map,
155 vm_map_entry_t entry);
156
157 static kern_return_t vm_map_delete(
158 vm_map_t map,
159 vm_map_offset_t start,
160 vm_map_offset_t end,
161 int flags,
162 vm_map_t zap_map);
163
164 static kern_return_t vm_map_copy_overwrite_unaligned(
165 vm_map_t dst_map,
166 vm_map_entry_t entry,
167 vm_map_copy_t copy,
168 vm_map_address_t start,
169 boolean_t discard_on_success);
170
171 static kern_return_t vm_map_copy_overwrite_aligned(
172 vm_map_t dst_map,
173 vm_map_entry_t tmp_entry,
174 vm_map_copy_t copy,
175 vm_map_offset_t start,
176 pmap_t pmap);
177
178 static kern_return_t vm_map_copyin_kernel_buffer(
179 vm_map_t src_map,
180 vm_map_address_t src_addr,
181 vm_map_size_t len,
182 boolean_t src_destroy,
183 vm_map_copy_t *copy_result); /* OUT */
184
185 static kern_return_t vm_map_copyout_kernel_buffer(
186 vm_map_t map,
187 vm_map_address_t *addr, /* IN/OUT */
188 vm_map_copy_t copy,
189 boolean_t overwrite,
190 boolean_t consume_on_success);
191
192 static void vm_map_fork_share(
193 vm_map_t old_map,
194 vm_map_entry_t old_entry,
195 vm_map_t new_map);
196
197 static boolean_t vm_map_fork_copy(
198 vm_map_t old_map,
199 vm_map_entry_t *old_entry_p,
200 vm_map_t new_map);
201
202 void vm_map_region_top_walk(
203 vm_map_entry_t entry,
204 vm_region_top_info_t top);
205
206 void vm_map_region_walk(
207 vm_map_t map,
208 vm_map_offset_t va,
209 vm_map_entry_t entry,
210 vm_object_offset_t offset,
211 vm_object_size_t range,
212 vm_region_extended_info_t extended,
213 boolean_t look_for_pages,
214 mach_msg_type_number_t count);
215
216 static kern_return_t vm_map_wire_nested(
217 vm_map_t map,
218 vm_map_offset_t start,
219 vm_map_offset_t end,
220 vm_prot_t caller_prot,
221 boolean_t user_wire,
222 pmap_t map_pmap,
223 vm_map_offset_t pmap_addr,
224 ppnum_t *physpage_p);
225
226 static kern_return_t vm_map_unwire_nested(
227 vm_map_t map,
228 vm_map_offset_t start,
229 vm_map_offset_t end,
230 boolean_t user_wire,
231 pmap_t map_pmap,
232 vm_map_offset_t pmap_addr);
233
234 static kern_return_t vm_map_overwrite_submap_recurse(
235 vm_map_t dst_map,
236 vm_map_offset_t dst_addr,
237 vm_map_size_t dst_size);
238
239 static kern_return_t vm_map_copy_overwrite_nested(
240 vm_map_t dst_map,
241 vm_map_offset_t dst_addr,
242 vm_map_copy_t copy,
243 boolean_t interruptible,
244 pmap_t pmap,
245 boolean_t discard_on_success);
246
247 static kern_return_t vm_map_remap_extract(
248 vm_map_t map,
249 vm_map_offset_t addr,
250 vm_map_size_t size,
251 boolean_t copy,
252 struct vm_map_header *map_header,
253 vm_prot_t *cur_protection,
254 vm_prot_t *max_protection,
255 vm_inherit_t inheritance,
256 boolean_t pageable);
257
258 static kern_return_t vm_map_remap_range_allocate(
259 vm_map_t map,
260 vm_map_address_t *address,
261 vm_map_size_t size,
262 vm_map_offset_t mask,
263 int flags,
264 vm_map_entry_t *map_entry);
265
266 static void vm_map_region_look_for_page(
267 vm_map_t map,
268 vm_map_offset_t va,
269 vm_object_t object,
270 vm_object_offset_t offset,
271 int max_refcnt,
272 int depth,
273 vm_region_extended_info_t extended,
274 mach_msg_type_number_t count);
275
276 static int vm_map_region_count_obj_refs(
277 vm_map_entry_t entry,
278 vm_object_t object);
279
280
281 static kern_return_t vm_map_willneed(
282 vm_map_t map,
283 vm_map_offset_t start,
284 vm_map_offset_t end);
285
286 static kern_return_t vm_map_reuse_pages(
287 vm_map_t map,
288 vm_map_offset_t start,
289 vm_map_offset_t end);
290
291 static kern_return_t vm_map_reusable_pages(
292 vm_map_t map,
293 vm_map_offset_t start,
294 vm_map_offset_t end);
295
296 static kern_return_t vm_map_can_reuse(
297 vm_map_t map,
298 vm_map_offset_t start,
299 vm_map_offset_t end);
300
301 #if MACH_ASSERT
302 static kern_return_t vm_map_pageout(
303 vm_map_t map,
304 vm_map_offset_t start,
305 vm_map_offset_t end);
306 #endif /* MACH_ASSERT */
307
308 /*
309 * Macros to copy a vm_map_entry. We must be careful to correctly
310 * manage the wired page count. vm_map_entry_copy() creates a new
311 * map entry to the same memory - the wired count in the new entry
312 * must be set to zero. vm_map_entry_copy_full() creates a new
313 * entry that is identical to the old entry. This preserves the
314 * wire count; it's used for map splitting and zone changing in
315 * vm_map_copyout.
316 */
317
318 #define vm_map_entry_copy(NEW,OLD) \
319 MACRO_BEGIN \
320 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
321 *(NEW) = *(OLD); \
322 (NEW)->is_shared = FALSE; \
323 (NEW)->needs_wakeup = FALSE; \
324 (NEW)->in_transition = FALSE; \
325 (NEW)->wired_count = 0; \
326 (NEW)->user_wired_count = 0; \
327 (NEW)->permanent = FALSE; \
328 (NEW)->used_for_jit = FALSE; \
329 (NEW)->from_reserved_zone = _vmec_reserved; \
330 (NEW)->iokit_acct = FALSE; \
331 (NEW)->vme_resilient_codesign = FALSE; \
332 (NEW)->vme_resilient_media = FALSE; \
333 MACRO_END
334
335 #define vm_map_entry_copy_full(NEW,OLD) \
336 MACRO_BEGIN \
337 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
338 (*(NEW) = *(OLD)); \
339 (NEW)->from_reserved_zone = _vmecf_reserved; \
340 MACRO_END
341
342 /*
343 * Decide if we want to allow processes to execute from their data or stack areas.
344 * override_nx() returns true if we do. Data/stack execution can be enabled independently
345 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
346 * or allow_stack_exec to enable data execution for that type of data area for that particular
347 * ABI (or both by or'ing the flags together). These are initialized in the architecture
348 * specific pmap files since the default behavior varies according to architecture. The
349 * main reason it varies is because of the need to provide binary compatibility with old
350 * applications that were written before these restrictions came into being. In the old
351 * days, an app could execute anything it could read, but this has slowly been tightened
352 * up over time. The default behavior is:
353 *
354 * 32-bit PPC apps may execute from both stack and data areas
355 * 32-bit Intel apps may exeucte from data areas but not stack
356 * 64-bit PPC/Intel apps may not execute from either data or stack
357 *
358 * An application on any architecture may override these defaults by explicitly
359 * adding PROT_EXEC permission to the page in question with the mprotect(2)
360 * system call. This code here just determines what happens when an app tries to
361 * execute from a page that lacks execute permission.
362 *
363 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
364 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
365 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
366 * execution from data areas for a particular binary even if the arch normally permits it. As
367 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
368 * to support some complicated use cases, notably browsers with out-of-process plugins that
369 * are not all NX-safe.
370 */
371
372 extern int allow_data_exec, allow_stack_exec;
373
374 int
375 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
376 {
377 int current_abi;
378
379 if (map->pmap == kernel_pmap) return FALSE;
380
381 /*
382 * Determine if the app is running in 32 or 64 bit mode.
383 */
384
385 if (vm_map_is_64bit(map))
386 current_abi = VM_ABI_64;
387 else
388 current_abi = VM_ABI_32;
389
390 /*
391 * Determine if we should allow the execution based on whether it's a
392 * stack or data area and the current architecture.
393 */
394
395 if (user_tag == VM_MEMORY_STACK)
396 return allow_stack_exec & current_abi;
397
398 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
399 }
400
401
402 /*
403 * Virtual memory maps provide for the mapping, protection,
404 * and sharing of virtual memory objects. In addition,
405 * this module provides for an efficient virtual copy of
406 * memory from one map to another.
407 *
408 * Synchronization is required prior to most operations.
409 *
410 * Maps consist of an ordered doubly-linked list of simple
411 * entries; a single hint is used to speed up lookups.
412 *
413 * Sharing maps have been deleted from this version of Mach.
414 * All shared objects are now mapped directly into the respective
415 * maps. This requires a change in the copy on write strategy;
416 * the asymmetric (delayed) strategy is used for shared temporary
417 * objects instead of the symmetric (shadow) strategy. All maps
418 * are now "top level" maps (either task map, kernel map or submap
419 * of the kernel map).
420 *
421 * Since portions of maps are specified by start/end addreses,
422 * which may not align with existing map entries, all
423 * routines merely "clip" entries to these start/end values.
424 * [That is, an entry is split into two, bordering at a
425 * start or end value.] Note that these clippings may not
426 * always be necessary (as the two resulting entries are then
427 * not changed); however, the clipping is done for convenience.
428 * No attempt is currently made to "glue back together" two
429 * abutting entries.
430 *
431 * The symmetric (shadow) copy strategy implements virtual copy
432 * by copying VM object references from one map to
433 * another, and then marking both regions as copy-on-write.
434 * It is important to note that only one writeable reference
435 * to a VM object region exists in any map when this strategy
436 * is used -- this means that shadow object creation can be
437 * delayed until a write operation occurs. The symmetric (delayed)
438 * strategy allows multiple maps to have writeable references to
439 * the same region of a vm object, and hence cannot delay creating
440 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
441 * Copying of permanent objects is completely different; see
442 * vm_object_copy_strategically() in vm_object.c.
443 */
444
445 static zone_t vm_map_zone; /* zone for vm_map structures */
446 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
447 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
448 * allocations */
449 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
450 zone_t vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
451
452
453 /*
454 * Placeholder object for submap operations. This object is dropped
455 * into the range by a call to vm_map_find, and removed when
456 * vm_map_submap creates the submap.
457 */
458
459 vm_object_t vm_submap_object;
460
461 static void *map_data;
462 static vm_size_t map_data_size;
463 static void *kentry_data;
464 static vm_size_t kentry_data_size;
465 static void *map_holes_data;
466 static vm_size_t map_holes_data_size;
467
468 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
469
470 /* Skip acquiring locks if we're in the midst of a kernel core dump */
471 unsigned int not_in_kdp = 1;
472
473 unsigned int vm_map_set_cache_attr_count = 0;
474
475 kern_return_t
476 vm_map_set_cache_attr(
477 vm_map_t map,
478 vm_map_offset_t va)
479 {
480 vm_map_entry_t map_entry;
481 vm_object_t object;
482 kern_return_t kr = KERN_SUCCESS;
483
484 vm_map_lock_read(map);
485
486 if (!vm_map_lookup_entry(map, va, &map_entry) ||
487 map_entry->is_sub_map) {
488 /*
489 * that memory is not properly mapped
490 */
491 kr = KERN_INVALID_ARGUMENT;
492 goto done;
493 }
494 object = VME_OBJECT(map_entry);
495
496 if (object == VM_OBJECT_NULL) {
497 /*
498 * there should be a VM object here at this point
499 */
500 kr = KERN_INVALID_ARGUMENT;
501 goto done;
502 }
503 vm_object_lock(object);
504 object->set_cache_attr = TRUE;
505 vm_object_unlock(object);
506
507 vm_map_set_cache_attr_count++;
508 done:
509 vm_map_unlock_read(map);
510
511 return kr;
512 }
513
514
515 #if CONFIG_CODE_DECRYPTION
516 /*
517 * vm_map_apple_protected:
518 * This remaps the requested part of the object with an object backed by
519 * the decrypting pager.
520 * crypt_info contains entry points and session data for the crypt module.
521 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
522 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
523 */
524 kern_return_t
525 vm_map_apple_protected(
526 vm_map_t map,
527 vm_map_offset_t start,
528 vm_map_offset_t end,
529 vm_object_offset_t crypto_backing_offset,
530 struct pager_crypt_info *crypt_info)
531 {
532 boolean_t map_locked;
533 kern_return_t kr;
534 vm_map_entry_t map_entry;
535 struct vm_map_entry tmp_entry;
536 memory_object_t unprotected_mem_obj;
537 vm_object_t protected_object;
538 vm_map_offset_t map_addr;
539 vm_map_offset_t start_aligned, end_aligned;
540 vm_object_offset_t crypto_start, crypto_end;
541 int vm_flags;
542
543 map_locked = FALSE;
544 unprotected_mem_obj = MEMORY_OBJECT_NULL;
545
546 start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
547 end_aligned = vm_map_round_page(end, PAGE_MASK_64);
548 start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
549 end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
550
551 assert(start_aligned == start);
552 assert(end_aligned == end);
553
554 map_addr = start_aligned;
555 for (map_addr = start_aligned;
556 map_addr < end;
557 map_addr = tmp_entry.vme_end) {
558 vm_map_lock(map);
559 map_locked = TRUE;
560
561 /* lookup the protected VM object */
562 if (!vm_map_lookup_entry(map,
563 map_addr,
564 &map_entry) ||
565 map_entry->is_sub_map ||
566 VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
567 !(map_entry->protection & VM_PROT_EXECUTE)) {
568 /* that memory is not properly mapped */
569 kr = KERN_INVALID_ARGUMENT;
570 goto done;
571 }
572
573 /* get the protected object to be decrypted */
574 protected_object = VME_OBJECT(map_entry);
575 if (protected_object == VM_OBJECT_NULL) {
576 /* there should be a VM object here at this point */
577 kr = KERN_INVALID_ARGUMENT;
578 goto done;
579 }
580 /* ensure protected object stays alive while map is unlocked */
581 vm_object_reference(protected_object);
582
583 /* limit the map entry to the area we want to cover */
584 vm_map_clip_start(map, map_entry, start_aligned);
585 vm_map_clip_end(map, map_entry, end_aligned);
586
587 tmp_entry = *map_entry;
588 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
589 vm_map_unlock(map);
590 map_locked = FALSE;
591
592 /*
593 * This map entry might be only partially encrypted
594 * (if not fully "page-aligned").
595 */
596 crypto_start = 0;
597 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
598 if (tmp_entry.vme_start < start) {
599 if (tmp_entry.vme_start != start_aligned) {
600 kr = KERN_INVALID_ADDRESS;
601 }
602 crypto_start += (start - tmp_entry.vme_start);
603 }
604 if (tmp_entry.vme_end > end) {
605 if (tmp_entry.vme_end != end_aligned) {
606 kr = KERN_INVALID_ADDRESS;
607 }
608 crypto_end -= (tmp_entry.vme_end - end);
609 }
610
611 /*
612 * This "extra backing offset" is needed to get the decryption
613 * routine to use the right key. It adjusts for the possibly
614 * relative offset of an interposed "4K" pager...
615 */
616 if (crypto_backing_offset == (vm_object_offset_t) -1) {
617 crypto_backing_offset = VME_OFFSET(&tmp_entry);
618 }
619
620 /*
621 * Lookup (and create if necessary) the protected memory object
622 * matching that VM object.
623 * If successful, this also grabs a reference on the memory object,
624 * to guarantee that it doesn't go away before we get a chance to map
625 * it.
626 */
627 unprotected_mem_obj = apple_protect_pager_setup(
628 protected_object,
629 VME_OFFSET(&tmp_entry),
630 crypto_backing_offset,
631 crypt_info,
632 crypto_start,
633 crypto_end);
634
635 /* release extra ref on protected object */
636 vm_object_deallocate(protected_object);
637
638 if (unprotected_mem_obj == NULL) {
639 kr = KERN_FAILURE;
640 goto done;
641 }
642
643 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
644
645 /* map this memory object in place of the current one */
646 map_addr = tmp_entry.vme_start;
647 kr = vm_map_enter_mem_object(map,
648 &map_addr,
649 (tmp_entry.vme_end -
650 tmp_entry.vme_start),
651 (mach_vm_offset_t) 0,
652 vm_flags,
653 (ipc_port_t) unprotected_mem_obj,
654 0,
655 TRUE,
656 tmp_entry.protection,
657 tmp_entry.max_protection,
658 tmp_entry.inheritance);
659 assert(kr == KERN_SUCCESS);
660 assert(map_addr == tmp_entry.vme_start);
661
662 #if VM_MAP_DEBUG_APPLE_PROTECT
663 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
664 "backing:[object:%p,offset:0x%llx,"
665 "crypto_backing_offset:0x%llx,"
666 "crypto_start:0x%llx,crypto_end:0x%llx]\n",
667 map,
668 (uint64_t) map_addr,
669 (uint64_t) (map_addr + (tmp_entry.vme_end -
670 tmp_entry.vme_start)),
671 unprotected_mem_obj,
672 protected_object,
673 VME_OFFSET(&tmp_entry),
674 crypto_backing_offset,
675 crypto_start,
676 crypto_end);
677 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
678
679 /*
680 * Release the reference obtained by
681 * apple_protect_pager_setup().
682 * The mapping (if it succeeded) is now holding a reference on
683 * the memory object.
684 */
685 memory_object_deallocate(unprotected_mem_obj);
686 unprotected_mem_obj = MEMORY_OBJECT_NULL;
687
688 /* continue with next map entry */
689 crypto_backing_offset += (tmp_entry.vme_end -
690 tmp_entry.vme_start);
691 crypto_backing_offset -= crypto_start;
692 }
693 kr = KERN_SUCCESS;
694
695 done:
696 if (map_locked) {
697 vm_map_unlock(map);
698 }
699 return kr;
700 }
701 #endif /* CONFIG_CODE_DECRYPTION */
702
703
704 lck_grp_t vm_map_lck_grp;
705 lck_grp_attr_t vm_map_lck_grp_attr;
706 lck_attr_t vm_map_lck_attr;
707 lck_attr_t vm_map_lck_rw_attr;
708
709
710 /*
711 * vm_map_init:
712 *
713 * Initialize the vm_map module. Must be called before
714 * any other vm_map routines.
715 *
716 * Map and entry structures are allocated from zones -- we must
717 * initialize those zones.
718 *
719 * There are three zones of interest:
720 *
721 * vm_map_zone: used to allocate maps.
722 * vm_map_entry_zone: used to allocate map entries.
723 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
724 *
725 * The kernel allocates map entries from a special zone that is initially
726 * "crammed" with memory. It would be difficult (perhaps impossible) for
727 * the kernel to allocate more memory to a entry zone when it became
728 * empty since the very act of allocating memory implies the creation
729 * of a new entry.
730 */
731 void
732 vm_map_init(
733 void)
734 {
735 vm_size_t entry_zone_alloc_size;
736 const char *mez_name = "VM map entries";
737
738 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
739 PAGE_SIZE, "maps");
740 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
741 #if defined(__LP64__)
742 entry_zone_alloc_size = PAGE_SIZE * 5;
743 #else
744 entry_zone_alloc_size = PAGE_SIZE * 6;
745 #endif
746 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
747 1024*1024, entry_zone_alloc_size,
748 mez_name);
749 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
750 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
751 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
752
753 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
754 kentry_data_size * 64, kentry_data_size,
755 "Reserved VM map entries");
756 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
757
758 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
759 16*1024, PAGE_SIZE, "VM map copies");
760 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
761
762 vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
763 16*1024, PAGE_SIZE, "VM map holes");
764 zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
765
766 /*
767 * Cram the map and kentry zones with initial data.
768 * Set reserved_zone non-collectible to aid zone_gc().
769 */
770 zone_change(vm_map_zone, Z_COLLECT, FALSE);
771
772 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
773 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
774 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
775 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
776 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
777 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
778 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
779
780 zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
781 zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
782 zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
783 zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
784 zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
785 zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
786
787 /*
788 * Add the stolen memory to zones, adjust zone size and stolen counts.
789 */
790 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
791 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
792 zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
793 VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
794
795 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
796 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
797 lck_attr_setdefault(&vm_map_lck_attr);
798
799 lck_attr_setdefault(&vm_map_lck_rw_attr);
800 lck_attr_cleardebug(&vm_map_lck_rw_attr);
801
802 #if CONFIG_FREEZE
803 default_freezer_init();
804 #endif /* CONFIG_FREEZE */
805 }
806
807 void
808 vm_map_steal_memory(
809 void)
810 {
811 uint32_t kentry_initial_pages;
812
813 map_data_size = round_page(10 * sizeof(struct _vm_map));
814 map_data = pmap_steal_memory(map_data_size);
815
816 /*
817 * kentry_initial_pages corresponds to the number of kernel map entries
818 * required during bootstrap until the asynchronous replenishment
819 * scheme is activated and/or entries are available from the general
820 * map entry pool.
821 */
822 #if defined(__LP64__)
823 kentry_initial_pages = 10;
824 #else
825 kentry_initial_pages = 6;
826 #endif
827
828 #if CONFIG_GZALLOC
829 /* If using the guard allocator, reserve more memory for the kernel
830 * reserved map entry pool.
831 */
832 if (gzalloc_enabled())
833 kentry_initial_pages *= 1024;
834 #endif
835
836 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
837 kentry_data = pmap_steal_memory(kentry_data_size);
838
839 map_holes_data_size = kentry_data_size;
840 map_holes_data = pmap_steal_memory(map_holes_data_size);
841 }
842
843 void
844 vm_kernel_reserved_entry_init(void) {
845 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
846 zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
847 }
848
849 void
850 vm_map_disable_hole_optimization(vm_map_t map)
851 {
852 vm_map_entry_t head_entry, hole_entry, next_hole_entry;
853
854 if (map->holelistenabled) {
855
856 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
857
858 while (hole_entry != NULL) {
859
860 next_hole_entry = hole_entry->vme_next;
861
862 hole_entry->vme_next = NULL;
863 hole_entry->vme_prev = NULL;
864 zfree(vm_map_holes_zone, hole_entry);
865
866 if (next_hole_entry == head_entry) {
867 hole_entry = NULL;
868 } else {
869 hole_entry = next_hole_entry;
870 }
871 }
872
873 map->holes_list = NULL;
874 map->holelistenabled = FALSE;
875
876 map->first_free = vm_map_first_entry(map);
877 SAVE_HINT_HOLE_WRITE(map, NULL);
878 }
879 }
880
881 boolean_t
882 vm_kernel_map_is_kernel(vm_map_t map) {
883 return (map->pmap == kernel_pmap);
884 }
885
886 /*
887 * vm_map_create:
888 *
889 * Creates and returns a new empty VM map with
890 * the given physical map structure, and having
891 * the given lower and upper address bounds.
892 */
893
894 boolean_t vm_map_supports_hole_optimization = TRUE;
895
896 vm_map_t
897 vm_map_create(
898 pmap_t pmap,
899 vm_map_offset_t min,
900 vm_map_offset_t max,
901 boolean_t pageable)
902 {
903 static int color_seed = 0;
904 register vm_map_t result;
905 struct vm_map_links *hole_entry = NULL;
906
907 result = (vm_map_t) zalloc(vm_map_zone);
908 if (result == VM_MAP_NULL)
909 panic("vm_map_create");
910
911 vm_map_first_entry(result) = vm_map_to_entry(result);
912 vm_map_last_entry(result) = vm_map_to_entry(result);
913 result->hdr.nentries = 0;
914 result->hdr.entries_pageable = pageable;
915
916 vm_map_store_init( &(result->hdr) );
917
918 result->hdr.page_shift = PAGE_SHIFT;
919
920 result->size = 0;
921 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
922 result->user_wire_size = 0;
923 result->ref_count = 1;
924 #if TASK_SWAPPER
925 result->res_count = 1;
926 result->sw_state = MAP_SW_IN;
927 #endif /* TASK_SWAPPER */
928 result->pmap = pmap;
929 result->min_offset = min;
930 result->max_offset = max;
931 result->wiring_required = FALSE;
932 result->no_zero_fill = FALSE;
933 result->mapped_in_other_pmaps = FALSE;
934 result->wait_for_space = FALSE;
935 result->switch_protect = FALSE;
936 result->disable_vmentry_reuse = FALSE;
937 result->map_disallow_data_exec = FALSE;
938 result->highest_entry_end = 0;
939 result->first_free = vm_map_to_entry(result);
940 result->hint = vm_map_to_entry(result);
941 result->color_rr = (color_seed++) & vm_color_mask;
942 result->jit_entry_exists = FALSE;
943
944 if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
945 hole_entry = zalloc(vm_map_holes_zone);
946
947 hole_entry->start = min;
948 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
949 result->holes_list = result->hole_hint = hole_entry;
950 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
951 result->holelistenabled = TRUE;
952
953 } else {
954
955 result->holelistenabled = FALSE;
956 }
957
958 #if CONFIG_FREEZE
959 result->default_freezer_handle = NULL;
960 #endif
961 vm_map_lock_init(result);
962 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
963
964 return(result);
965 }
966
967 /*
968 * vm_map_entry_create: [ internal use only ]
969 *
970 * Allocates a VM map entry for insertion in the
971 * given map (or map copy). No fields are filled.
972 */
973 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
974
975 #define vm_map_copy_entry_create(copy, map_locked) \
976 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
977 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
978
979 static vm_map_entry_t
980 _vm_map_entry_create(
981 struct vm_map_header *map_header, boolean_t __unused map_locked)
982 {
983 zone_t zone;
984 vm_map_entry_t entry;
985
986 zone = vm_map_entry_zone;
987
988 assert(map_header->entries_pageable ? !map_locked : TRUE);
989
990 if (map_header->entries_pageable) {
991 entry = (vm_map_entry_t) zalloc(zone);
992 }
993 else {
994 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
995
996 if (entry == VM_MAP_ENTRY_NULL) {
997 zone = vm_map_entry_reserved_zone;
998 entry = (vm_map_entry_t) zalloc(zone);
999 OSAddAtomic(1, &reserved_zalloc_count);
1000 } else
1001 OSAddAtomic(1, &nonreserved_zalloc_count);
1002 }
1003
1004 if (entry == VM_MAP_ENTRY_NULL)
1005 panic("vm_map_entry_create");
1006 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1007
1008 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1009 #if MAP_ENTRY_CREATION_DEBUG
1010 entry->vme_creation_maphdr = map_header;
1011 fastbacktrace(&entry->vme_creation_bt[0],
1012 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1013 #endif
1014 return(entry);
1015 }
1016
1017 /*
1018 * vm_map_entry_dispose: [ internal use only ]
1019 *
1020 * Inverse of vm_map_entry_create.
1021 *
1022 * write map lock held so no need to
1023 * do anything special to insure correctness
1024 * of the stores
1025 */
1026 #define vm_map_entry_dispose(map, entry) \
1027 _vm_map_entry_dispose(&(map)->hdr, (entry))
1028
1029 #define vm_map_copy_entry_dispose(map, entry) \
1030 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1031
1032 static void
1033 _vm_map_entry_dispose(
1034 register struct vm_map_header *map_header,
1035 register vm_map_entry_t entry)
1036 {
1037 register zone_t zone;
1038
1039 if (map_header->entries_pageable || !(entry->from_reserved_zone))
1040 zone = vm_map_entry_zone;
1041 else
1042 zone = vm_map_entry_reserved_zone;
1043
1044 if (!map_header->entries_pageable) {
1045 if (zone == vm_map_entry_zone)
1046 OSAddAtomic(-1, &nonreserved_zalloc_count);
1047 else
1048 OSAddAtomic(-1, &reserved_zalloc_count);
1049 }
1050
1051 zfree(zone, entry);
1052 }
1053
1054 #if MACH_ASSERT
1055 static boolean_t first_free_check = FALSE;
1056 boolean_t
1057 first_free_is_valid(
1058 vm_map_t map)
1059 {
1060 if (!first_free_check)
1061 return TRUE;
1062
1063 return( first_free_is_valid_store( map ));
1064 }
1065 #endif /* MACH_ASSERT */
1066
1067
1068 #define vm_map_copy_entry_link(copy, after_where, entry) \
1069 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1070
1071 #define vm_map_copy_entry_unlink(copy, entry) \
1072 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1073
1074 #if MACH_ASSERT && TASK_SWAPPER
1075 /*
1076 * vm_map_res_reference:
1077 *
1078 * Adds another valid residence count to the given map.
1079 *
1080 * Map is locked so this function can be called from
1081 * vm_map_swapin.
1082 *
1083 */
1084 void vm_map_res_reference(register vm_map_t map)
1085 {
1086 /* assert map is locked */
1087 assert(map->res_count >= 0);
1088 assert(map->ref_count >= map->res_count);
1089 if (map->res_count == 0) {
1090 lck_mtx_unlock(&map->s_lock);
1091 vm_map_lock(map);
1092 vm_map_swapin(map);
1093 lck_mtx_lock(&map->s_lock);
1094 ++map->res_count;
1095 vm_map_unlock(map);
1096 } else
1097 ++map->res_count;
1098 }
1099
1100 /*
1101 * vm_map_reference_swap:
1102 *
1103 * Adds valid reference and residence counts to the given map.
1104 *
1105 * The map may not be in memory (i.e. zero residence count).
1106 *
1107 */
1108 void vm_map_reference_swap(register vm_map_t map)
1109 {
1110 assert(map != VM_MAP_NULL);
1111 lck_mtx_lock(&map->s_lock);
1112 assert(map->res_count >= 0);
1113 assert(map->ref_count >= map->res_count);
1114 map->ref_count++;
1115 vm_map_res_reference(map);
1116 lck_mtx_unlock(&map->s_lock);
1117 }
1118
1119 /*
1120 * vm_map_res_deallocate:
1121 *
1122 * Decrement residence count on a map; possibly causing swapout.
1123 *
1124 * The map must be in memory (i.e. non-zero residence count).
1125 *
1126 * The map is locked, so this function is callable from vm_map_deallocate.
1127 *
1128 */
1129 void vm_map_res_deallocate(register vm_map_t map)
1130 {
1131 assert(map->res_count > 0);
1132 if (--map->res_count == 0) {
1133 lck_mtx_unlock(&map->s_lock);
1134 vm_map_lock(map);
1135 vm_map_swapout(map);
1136 vm_map_unlock(map);
1137 lck_mtx_lock(&map->s_lock);
1138 }
1139 assert(map->ref_count >= map->res_count);
1140 }
1141 #endif /* MACH_ASSERT && TASK_SWAPPER */
1142
1143 /*
1144 * vm_map_destroy:
1145 *
1146 * Actually destroy a map.
1147 */
1148 void
1149 vm_map_destroy(
1150 vm_map_t map,
1151 int flags)
1152 {
1153 vm_map_lock(map);
1154
1155 /* final cleanup: no need to unnest shared region */
1156 flags |= VM_MAP_REMOVE_NO_UNNESTING;
1157
1158 /* clean up regular map entries */
1159 (void) vm_map_delete(map, map->min_offset, map->max_offset,
1160 flags, VM_MAP_NULL);
1161 /* clean up leftover special mappings (commpage, etc...) */
1162 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1163 flags, VM_MAP_NULL);
1164
1165 #if CONFIG_FREEZE
1166 if (map->default_freezer_handle) {
1167 default_freezer_handle_deallocate(map->default_freezer_handle);
1168 map->default_freezer_handle = NULL;
1169 }
1170 #endif
1171 vm_map_disable_hole_optimization(map);
1172 vm_map_unlock(map);
1173
1174 assert(map->hdr.nentries == 0);
1175
1176 if(map->pmap)
1177 pmap_destroy(map->pmap);
1178
1179 zfree(vm_map_zone, map);
1180 }
1181
1182 #if TASK_SWAPPER
1183 /*
1184 * vm_map_swapin/vm_map_swapout
1185 *
1186 * Swap a map in and out, either referencing or releasing its resources.
1187 * These functions are internal use only; however, they must be exported
1188 * because they may be called from macros, which are exported.
1189 *
1190 * In the case of swapout, there could be races on the residence count,
1191 * so if the residence count is up, we return, assuming that a
1192 * vm_map_deallocate() call in the near future will bring us back.
1193 *
1194 * Locking:
1195 * -- We use the map write lock for synchronization among races.
1196 * -- The map write lock, and not the simple s_lock, protects the
1197 * swap state of the map.
1198 * -- If a map entry is a share map, then we hold both locks, in
1199 * hierarchical order.
1200 *
1201 * Synchronization Notes:
1202 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1203 * will block on the map lock and proceed when swapout is through.
1204 * 2) A vm_map_reference() call at this time is illegal, and will
1205 * cause a panic. vm_map_reference() is only allowed on resident
1206 * maps, since it refuses to block.
1207 * 3) A vm_map_swapin() call during a swapin will block, and
1208 * proceeed when the first swapin is done, turning into a nop.
1209 * This is the reason the res_count is not incremented until
1210 * after the swapin is complete.
1211 * 4) There is a timing hole after the checks of the res_count, before
1212 * the map lock is taken, during which a swapin may get the lock
1213 * before a swapout about to happen. If this happens, the swapin
1214 * will detect the state and increment the reference count, causing
1215 * the swapout to be a nop, thereby delaying it until a later
1216 * vm_map_deallocate. If the swapout gets the lock first, then
1217 * the swapin will simply block until the swapout is done, and
1218 * then proceed.
1219 *
1220 * Because vm_map_swapin() is potentially an expensive operation, it
1221 * should be used with caution.
1222 *
1223 * Invariants:
1224 * 1) A map with a residence count of zero is either swapped, or
1225 * being swapped.
1226 * 2) A map with a non-zero residence count is either resident,
1227 * or being swapped in.
1228 */
1229
1230 int vm_map_swap_enable = 1;
1231
1232 void vm_map_swapin (vm_map_t map)
1233 {
1234 register vm_map_entry_t entry;
1235
1236 if (!vm_map_swap_enable) /* debug */
1237 return;
1238
1239 /*
1240 * Map is locked
1241 * First deal with various races.
1242 */
1243 if (map->sw_state == MAP_SW_IN)
1244 /*
1245 * we raced with swapout and won. Returning will incr.
1246 * the res_count, turning the swapout into a nop.
1247 */
1248 return;
1249
1250 /*
1251 * The residence count must be zero. If we raced with another
1252 * swapin, the state would have been IN; if we raced with a
1253 * swapout (after another competing swapin), we must have lost
1254 * the race to get here (see above comment), in which case
1255 * res_count is still 0.
1256 */
1257 assert(map->res_count == 0);
1258
1259 /*
1260 * There are no intermediate states of a map going out or
1261 * coming in, since the map is locked during the transition.
1262 */
1263 assert(map->sw_state == MAP_SW_OUT);
1264
1265 /*
1266 * We now operate upon each map entry. If the entry is a sub-
1267 * or share-map, we call vm_map_res_reference upon it.
1268 * If the entry is an object, we call vm_object_res_reference
1269 * (this may iterate through the shadow chain).
1270 * Note that we hold the map locked the entire time,
1271 * even if we get back here via a recursive call in
1272 * vm_map_res_reference.
1273 */
1274 entry = vm_map_first_entry(map);
1275
1276 while (entry != vm_map_to_entry(map)) {
1277 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1278 if (entry->is_sub_map) {
1279 vm_map_t lmap = VME_SUBMAP(entry);
1280 lck_mtx_lock(&lmap->s_lock);
1281 vm_map_res_reference(lmap);
1282 lck_mtx_unlock(&lmap->s_lock);
1283 } else {
1284 vm_object_t object = VME_OBEJCT(entry);
1285 vm_object_lock(object);
1286 /*
1287 * This call may iterate through the
1288 * shadow chain.
1289 */
1290 vm_object_res_reference(object);
1291 vm_object_unlock(object);
1292 }
1293 }
1294 entry = entry->vme_next;
1295 }
1296 assert(map->sw_state == MAP_SW_OUT);
1297 map->sw_state = MAP_SW_IN;
1298 }
1299
1300 void vm_map_swapout(vm_map_t map)
1301 {
1302 register vm_map_entry_t entry;
1303
1304 /*
1305 * Map is locked
1306 * First deal with various races.
1307 * If we raced with a swapin and lost, the residence count
1308 * will have been incremented to 1, and we simply return.
1309 */
1310 lck_mtx_lock(&map->s_lock);
1311 if (map->res_count != 0) {
1312 lck_mtx_unlock(&map->s_lock);
1313 return;
1314 }
1315 lck_mtx_unlock(&map->s_lock);
1316
1317 /*
1318 * There are no intermediate states of a map going out or
1319 * coming in, since the map is locked during the transition.
1320 */
1321 assert(map->sw_state == MAP_SW_IN);
1322
1323 if (!vm_map_swap_enable)
1324 return;
1325
1326 /*
1327 * We now operate upon each map entry. If the entry is a sub-
1328 * or share-map, we call vm_map_res_deallocate upon it.
1329 * If the entry is an object, we call vm_object_res_deallocate
1330 * (this may iterate through the shadow chain).
1331 * Note that we hold the map locked the entire time,
1332 * even if we get back here via a recursive call in
1333 * vm_map_res_deallocate.
1334 */
1335 entry = vm_map_first_entry(map);
1336
1337 while (entry != vm_map_to_entry(map)) {
1338 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1339 if (entry->is_sub_map) {
1340 vm_map_t lmap = VME_SUBMAP(entry);
1341 lck_mtx_lock(&lmap->s_lock);
1342 vm_map_res_deallocate(lmap);
1343 lck_mtx_unlock(&lmap->s_lock);
1344 } else {
1345 vm_object_t object = VME_OBJECT(entry);
1346 vm_object_lock(object);
1347 /*
1348 * This call may take a long time,
1349 * since it could actively push
1350 * out pages (if we implement it
1351 * that way).
1352 */
1353 vm_object_res_deallocate(object);
1354 vm_object_unlock(object);
1355 }
1356 }
1357 entry = entry->vme_next;
1358 }
1359 assert(map->sw_state == MAP_SW_IN);
1360 map->sw_state = MAP_SW_OUT;
1361 }
1362
1363 #endif /* TASK_SWAPPER */
1364
1365 /*
1366 * vm_map_lookup_entry: [ internal use only ]
1367 *
1368 * Calls into the vm map store layer to find the map
1369 * entry containing (or immediately preceding) the
1370 * specified address in the given map; the entry is returned
1371 * in the "entry" parameter. The boolean
1372 * result indicates whether the address is
1373 * actually contained in the map.
1374 */
1375 boolean_t
1376 vm_map_lookup_entry(
1377 register vm_map_t map,
1378 register vm_map_offset_t address,
1379 vm_map_entry_t *entry) /* OUT */
1380 {
1381 return ( vm_map_store_lookup_entry( map, address, entry ));
1382 }
1383
1384 /*
1385 * Routine: vm_map_find_space
1386 * Purpose:
1387 * Allocate a range in the specified virtual address map,
1388 * returning the entry allocated for that range.
1389 * Used by kmem_alloc, etc.
1390 *
1391 * The map must be NOT be locked. It will be returned locked
1392 * on KERN_SUCCESS, unlocked on failure.
1393 *
1394 * If an entry is allocated, the object/offset fields
1395 * are initialized to zero.
1396 */
1397 kern_return_t
1398 vm_map_find_space(
1399 register vm_map_t map,
1400 vm_map_offset_t *address, /* OUT */
1401 vm_map_size_t size,
1402 vm_map_offset_t mask,
1403 int flags,
1404 vm_map_entry_t *o_entry) /* OUT */
1405 {
1406 vm_map_entry_t entry, new_entry;
1407 register vm_map_offset_t start;
1408 register vm_map_offset_t end;
1409 vm_map_entry_t hole_entry;
1410
1411 if (size == 0) {
1412 *address = 0;
1413 return KERN_INVALID_ARGUMENT;
1414 }
1415
1416 if (flags & VM_FLAGS_GUARD_AFTER) {
1417 /* account for the back guard page in the size */
1418 size += VM_MAP_PAGE_SIZE(map);
1419 }
1420
1421 new_entry = vm_map_entry_create(map, FALSE);
1422
1423 /*
1424 * Look for the first possible address; if there's already
1425 * something at this address, we have to start after it.
1426 */
1427
1428 vm_map_lock(map);
1429
1430 if( map->disable_vmentry_reuse == TRUE) {
1431 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1432 } else {
1433 if (map->holelistenabled) {
1434 hole_entry = (vm_map_entry_t)map->holes_list;
1435
1436 if (hole_entry == NULL) {
1437 /*
1438 * No more space in the map?
1439 */
1440 vm_map_entry_dispose(map, new_entry);
1441 vm_map_unlock(map);
1442 return(KERN_NO_SPACE);
1443 }
1444
1445 entry = hole_entry;
1446 start = entry->vme_start;
1447 } else {
1448 assert(first_free_is_valid(map));
1449 if ((entry = map->first_free) == vm_map_to_entry(map))
1450 start = map->min_offset;
1451 else
1452 start = entry->vme_end;
1453 }
1454 }
1455
1456 /*
1457 * In any case, the "entry" always precedes
1458 * the proposed new region throughout the loop:
1459 */
1460
1461 while (TRUE) {
1462 register vm_map_entry_t next;
1463
1464 /*
1465 * Find the end of the proposed new region.
1466 * Be sure we didn't go beyond the end, or
1467 * wrap around the address.
1468 */
1469
1470 if (flags & VM_FLAGS_GUARD_BEFORE) {
1471 /* reserve space for the front guard page */
1472 start += VM_MAP_PAGE_SIZE(map);
1473 }
1474 end = ((start + mask) & ~mask);
1475
1476 if (end < start) {
1477 vm_map_entry_dispose(map, new_entry);
1478 vm_map_unlock(map);
1479 return(KERN_NO_SPACE);
1480 }
1481 start = end;
1482 end += size;
1483
1484 if ((end > map->max_offset) || (end < start)) {
1485 vm_map_entry_dispose(map, new_entry);
1486 vm_map_unlock(map);
1487 return(KERN_NO_SPACE);
1488 }
1489
1490 next = entry->vme_next;
1491
1492 if (map->holelistenabled) {
1493 if (entry->vme_end >= end)
1494 break;
1495 } else {
1496 /*
1497 * If there are no more entries, we must win.
1498 *
1499 * OR
1500 *
1501 * If there is another entry, it must be
1502 * after the end of the potential new region.
1503 */
1504
1505 if (next == vm_map_to_entry(map))
1506 break;
1507
1508 if (next->vme_start >= end)
1509 break;
1510 }
1511
1512 /*
1513 * Didn't fit -- move to the next entry.
1514 */
1515
1516 entry = next;
1517
1518 if (map->holelistenabled) {
1519 if (entry == (vm_map_entry_t) map->holes_list) {
1520 /*
1521 * Wrapped around
1522 */
1523 vm_map_entry_dispose(map, new_entry);
1524 vm_map_unlock(map);
1525 return(KERN_NO_SPACE);
1526 }
1527 start = entry->vme_start;
1528 } else {
1529 start = entry->vme_end;
1530 }
1531 }
1532
1533 if (map->holelistenabled) {
1534 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1535 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1536 }
1537 }
1538
1539 /*
1540 * At this point,
1541 * "start" and "end" should define the endpoints of the
1542 * available new range, and
1543 * "entry" should refer to the region before the new
1544 * range, and
1545 *
1546 * the map should be locked.
1547 */
1548
1549 if (flags & VM_FLAGS_GUARD_BEFORE) {
1550 /* go back for the front guard page */
1551 start -= VM_MAP_PAGE_SIZE(map);
1552 }
1553 *address = start;
1554
1555 assert(start < end);
1556 new_entry->vme_start = start;
1557 new_entry->vme_end = end;
1558 assert(page_aligned(new_entry->vme_start));
1559 assert(page_aligned(new_entry->vme_end));
1560 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1561 VM_MAP_PAGE_MASK(map)));
1562 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1563 VM_MAP_PAGE_MASK(map)));
1564
1565 new_entry->is_shared = FALSE;
1566 new_entry->is_sub_map = FALSE;
1567 new_entry->use_pmap = TRUE;
1568 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1569 VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1570
1571 new_entry->needs_copy = FALSE;
1572
1573 new_entry->inheritance = VM_INHERIT_DEFAULT;
1574 new_entry->protection = VM_PROT_DEFAULT;
1575 new_entry->max_protection = VM_PROT_ALL;
1576 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1577 new_entry->wired_count = 0;
1578 new_entry->user_wired_count = 0;
1579
1580 new_entry->in_transition = FALSE;
1581 new_entry->needs_wakeup = FALSE;
1582 new_entry->no_cache = FALSE;
1583 new_entry->permanent = FALSE;
1584 new_entry->superpage_size = FALSE;
1585 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1586 new_entry->map_aligned = TRUE;
1587 } else {
1588 new_entry->map_aligned = FALSE;
1589 }
1590
1591 new_entry->used_for_jit = FALSE;
1592 new_entry->zero_wired_pages = FALSE;
1593 new_entry->iokit_acct = FALSE;
1594 new_entry->vme_resilient_codesign = FALSE;
1595 new_entry->vme_resilient_media = FALSE;
1596
1597 int alias;
1598 VM_GET_FLAGS_ALIAS(flags, alias);
1599 VME_ALIAS_SET(new_entry, alias);
1600
1601 /*
1602 * Insert the new entry into the list
1603 */
1604
1605 vm_map_store_entry_link(map, entry, new_entry);
1606
1607 map->size += size;
1608
1609 /*
1610 * Update the lookup hint
1611 */
1612 SAVE_HINT_MAP_WRITE(map, new_entry);
1613
1614 *o_entry = new_entry;
1615 return(KERN_SUCCESS);
1616 }
1617
1618 int vm_map_pmap_enter_print = FALSE;
1619 int vm_map_pmap_enter_enable = FALSE;
1620
1621 /*
1622 * Routine: vm_map_pmap_enter [internal only]
1623 *
1624 * Description:
1625 * Force pages from the specified object to be entered into
1626 * the pmap at the specified address if they are present.
1627 * As soon as a page not found in the object the scan ends.
1628 *
1629 * Returns:
1630 * Nothing.
1631 *
1632 * In/out conditions:
1633 * The source map should not be locked on entry.
1634 */
1635 __unused static void
1636 vm_map_pmap_enter(
1637 vm_map_t map,
1638 register vm_map_offset_t addr,
1639 register vm_map_offset_t end_addr,
1640 register vm_object_t object,
1641 vm_object_offset_t offset,
1642 vm_prot_t protection)
1643 {
1644 int type_of_fault;
1645 kern_return_t kr;
1646
1647 if(map->pmap == 0)
1648 return;
1649
1650 while (addr < end_addr) {
1651 register vm_page_t m;
1652
1653
1654 /*
1655 * TODO:
1656 * From vm_map_enter(), we come into this function without the map
1657 * lock held or the object lock held.
1658 * We haven't taken a reference on the object either.
1659 * We should do a proper lookup on the map to make sure
1660 * that things are sane before we go locking objects that
1661 * could have been deallocated from under us.
1662 */
1663
1664 vm_object_lock(object);
1665
1666 m = vm_page_lookup(object, offset);
1667 /*
1668 * ENCRYPTED SWAP:
1669 * The user should never see encrypted data, so do not
1670 * enter an encrypted page in the page table.
1671 */
1672 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1673 m->fictitious ||
1674 (m->unusual && ( m->error || m->restart || m->absent))) {
1675 vm_object_unlock(object);
1676 return;
1677 }
1678
1679 if (vm_map_pmap_enter_print) {
1680 printf("vm_map_pmap_enter:");
1681 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1682 map, (unsigned long long)addr, object, (unsigned long long)offset);
1683 }
1684 type_of_fault = DBG_CACHE_HIT_FAULT;
1685 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1686 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1687 0, /* XXX need user tag / alias? */
1688 0, /* alternate accounting? */
1689 NULL,
1690 &type_of_fault);
1691
1692 vm_object_unlock(object);
1693
1694 offset += PAGE_SIZE_64;
1695 addr += PAGE_SIZE;
1696 }
1697 }
1698
1699 boolean_t vm_map_pmap_is_empty(
1700 vm_map_t map,
1701 vm_map_offset_t start,
1702 vm_map_offset_t end);
1703 boolean_t vm_map_pmap_is_empty(
1704 vm_map_t map,
1705 vm_map_offset_t start,
1706 vm_map_offset_t end)
1707 {
1708 #ifdef MACHINE_PMAP_IS_EMPTY
1709 return pmap_is_empty(map->pmap, start, end);
1710 #else /* MACHINE_PMAP_IS_EMPTY */
1711 vm_map_offset_t offset;
1712 ppnum_t phys_page;
1713
1714 if (map->pmap == NULL) {
1715 return TRUE;
1716 }
1717
1718 for (offset = start;
1719 offset < end;
1720 offset += PAGE_SIZE) {
1721 phys_page = pmap_find_phys(map->pmap, offset);
1722 if (phys_page) {
1723 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1724 "page %d at 0x%llx\n",
1725 map, (long long)start, (long long)end,
1726 phys_page, (long long)offset);
1727 return FALSE;
1728 }
1729 }
1730 return TRUE;
1731 #endif /* MACHINE_PMAP_IS_EMPTY */
1732 }
1733
1734 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1735 kern_return_t
1736 vm_map_random_address_for_size(
1737 vm_map_t map,
1738 vm_map_offset_t *address,
1739 vm_map_size_t size)
1740 {
1741 kern_return_t kr = KERN_SUCCESS;
1742 int tries = 0;
1743 vm_map_offset_t random_addr = 0;
1744 vm_map_offset_t hole_end;
1745
1746 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1747 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1748 vm_map_size_t vm_hole_size = 0;
1749 vm_map_size_t addr_space_size;
1750
1751 addr_space_size = vm_map_max(map) - vm_map_min(map);
1752
1753 assert(page_aligned(size));
1754
1755 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1756 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1757 random_addr = vm_map_trunc_page(
1758 vm_map_min(map) +(random_addr % addr_space_size),
1759 VM_MAP_PAGE_MASK(map));
1760
1761 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1762 if (prev_entry == vm_map_to_entry(map)) {
1763 next_entry = vm_map_first_entry(map);
1764 } else {
1765 next_entry = prev_entry->vme_next;
1766 }
1767 if (next_entry == vm_map_to_entry(map)) {
1768 hole_end = vm_map_max(map);
1769 } else {
1770 hole_end = next_entry->vme_start;
1771 }
1772 vm_hole_size = hole_end - random_addr;
1773 if (vm_hole_size >= size) {
1774 *address = random_addr;
1775 break;
1776 }
1777 }
1778 tries++;
1779 }
1780
1781 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1782 kr = KERN_NO_SPACE;
1783 }
1784 return kr;
1785 }
1786
1787 /*
1788 * Routine: vm_map_enter
1789 *
1790 * Description:
1791 * Allocate a range in the specified virtual address map.
1792 * The resulting range will refer to memory defined by
1793 * the given memory object and offset into that object.
1794 *
1795 * Arguments are as defined in the vm_map call.
1796 */
1797 int _map_enter_debug = 0;
1798 static unsigned int vm_map_enter_restore_successes = 0;
1799 static unsigned int vm_map_enter_restore_failures = 0;
1800 kern_return_t
1801 vm_map_enter(
1802 vm_map_t map,
1803 vm_map_offset_t *address, /* IN/OUT */
1804 vm_map_size_t size,
1805 vm_map_offset_t mask,
1806 int flags,
1807 vm_object_t object,
1808 vm_object_offset_t offset,
1809 boolean_t needs_copy,
1810 vm_prot_t cur_protection,
1811 vm_prot_t max_protection,
1812 vm_inherit_t inheritance)
1813 {
1814 vm_map_entry_t entry, new_entry;
1815 vm_map_offset_t start, tmp_start, tmp_offset;
1816 vm_map_offset_t end, tmp_end;
1817 vm_map_offset_t tmp2_start, tmp2_end;
1818 vm_map_offset_t step;
1819 kern_return_t result = KERN_SUCCESS;
1820 vm_map_t zap_old_map = VM_MAP_NULL;
1821 vm_map_t zap_new_map = VM_MAP_NULL;
1822 boolean_t map_locked = FALSE;
1823 boolean_t pmap_empty = TRUE;
1824 boolean_t new_mapping_established = FALSE;
1825 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1826 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1827 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1828 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1829 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1830 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1831 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1832 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1833 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1834 boolean_t resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1835 boolean_t resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1836 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1837 vm_tag_t alias, user_alias;
1838 vm_map_offset_t effective_min_offset, effective_max_offset;
1839 kern_return_t kr;
1840 boolean_t clear_map_aligned = FALSE;
1841 vm_map_entry_t hole_entry;
1842
1843 if (superpage_size) {
1844 switch (superpage_size) {
1845 /*
1846 * Note that the current implementation only supports
1847 * a single size for superpages, SUPERPAGE_SIZE, per
1848 * architecture. As soon as more sizes are supposed
1849 * to be supported, SUPERPAGE_SIZE has to be replaced
1850 * with a lookup of the size depending on superpage_size.
1851 */
1852 #ifdef __x86_64__
1853 case SUPERPAGE_SIZE_ANY:
1854 /* handle it like 2 MB and round up to page size */
1855 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1856 case SUPERPAGE_SIZE_2MB:
1857 break;
1858 #endif
1859 default:
1860 return KERN_INVALID_ARGUMENT;
1861 }
1862 mask = SUPERPAGE_SIZE-1;
1863 if (size & (SUPERPAGE_SIZE-1))
1864 return KERN_INVALID_ARGUMENT;
1865 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1866 }
1867
1868
1869
1870 if (resilient_codesign || resilient_media) {
1871 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1872 (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1873 return KERN_PROTECTION_FAILURE;
1874 }
1875 }
1876
1877 if (is_submap) {
1878 if (purgable) {
1879 /* submaps can not be purgeable */
1880 return KERN_INVALID_ARGUMENT;
1881 }
1882 if (object == VM_OBJECT_NULL) {
1883 /* submaps can not be created lazily */
1884 return KERN_INVALID_ARGUMENT;
1885 }
1886 }
1887 if (flags & VM_FLAGS_ALREADY) {
1888 /*
1889 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1890 * is already present. For it to be meaningul, the requested
1891 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1892 * we shouldn't try and remove what was mapped there first
1893 * (!VM_FLAGS_OVERWRITE).
1894 */
1895 if ((flags & VM_FLAGS_ANYWHERE) ||
1896 (flags & VM_FLAGS_OVERWRITE)) {
1897 return KERN_INVALID_ARGUMENT;
1898 }
1899 }
1900
1901 effective_min_offset = map->min_offset;
1902
1903 if (flags & VM_FLAGS_BEYOND_MAX) {
1904 /*
1905 * Allow an insertion beyond the map's max offset.
1906 */
1907 if (vm_map_is_64bit(map))
1908 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1909 else
1910 effective_max_offset = 0x00000000FFFFF000ULL;
1911 } else {
1912 effective_max_offset = map->max_offset;
1913 }
1914
1915 if (size == 0 ||
1916 (offset & PAGE_MASK_64) != 0) {
1917 *address = 0;
1918 return KERN_INVALID_ARGUMENT;
1919 }
1920
1921 VM_GET_FLAGS_ALIAS(flags, alias);
1922 if (map->pmap == kernel_pmap) {
1923 user_alias = VM_KERN_MEMORY_NONE;
1924 } else {
1925 user_alias = alias;
1926 }
1927
1928 #define RETURN(value) { result = value; goto BailOut; }
1929
1930 assert(page_aligned(*address));
1931 assert(page_aligned(size));
1932
1933 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1934 /*
1935 * In most cases, the caller rounds the size up to the
1936 * map's page size.
1937 * If we get a size that is explicitly not map-aligned here,
1938 * we'll have to respect the caller's wish and mark the
1939 * mapping as "not map-aligned" to avoid tripping the
1940 * map alignment checks later.
1941 */
1942 clear_map_aligned = TRUE;
1943 }
1944 if (!anywhere &&
1945 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1946 /*
1947 * We've been asked to map at a fixed address and that
1948 * address is not aligned to the map's specific alignment.
1949 * The caller should know what it's doing (i.e. most likely
1950 * mapping some fragmented copy map, transferring memory from
1951 * a VM map with a different alignment), so clear map_aligned
1952 * for this new VM map entry and proceed.
1953 */
1954 clear_map_aligned = TRUE;
1955 }
1956
1957 /*
1958 * Only zero-fill objects are allowed to be purgable.
1959 * LP64todo - limit purgable objects to 32-bits for now
1960 */
1961 if (purgable &&
1962 (offset != 0 ||
1963 (object != VM_OBJECT_NULL &&
1964 (object->vo_size != size ||
1965 object->purgable == VM_PURGABLE_DENY))
1966 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1967 return KERN_INVALID_ARGUMENT;
1968
1969 if (!anywhere && overwrite) {
1970 /*
1971 * Create a temporary VM map to hold the old mappings in the
1972 * affected area while we create the new one.
1973 * This avoids releasing the VM map lock in
1974 * vm_map_entry_delete() and allows atomicity
1975 * when we want to replace some mappings with a new one.
1976 * It also allows us to restore the old VM mappings if the
1977 * new mapping fails.
1978 */
1979 zap_old_map = vm_map_create(PMAP_NULL,
1980 *address,
1981 *address + size,
1982 map->hdr.entries_pageable);
1983 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1984 vm_map_disable_hole_optimization(zap_old_map);
1985 }
1986
1987 StartAgain: ;
1988
1989 start = *address;
1990
1991 if (anywhere) {
1992 vm_map_lock(map);
1993 map_locked = TRUE;
1994
1995 if (entry_for_jit) {
1996 if (map->jit_entry_exists) {
1997 result = KERN_INVALID_ARGUMENT;
1998 goto BailOut;
1999 }
2000 /*
2001 * Get a random start address.
2002 */
2003 result = vm_map_random_address_for_size(map, address, size);
2004 if (result != KERN_SUCCESS) {
2005 goto BailOut;
2006 }
2007 start = *address;
2008 }
2009
2010
2011 /*
2012 * Calculate the first possible address.
2013 */
2014
2015 if (start < effective_min_offset)
2016 start = effective_min_offset;
2017 if (start > effective_max_offset)
2018 RETURN(KERN_NO_SPACE);
2019
2020 /*
2021 * Look for the first possible address;
2022 * if there's already something at this
2023 * address, we have to start after it.
2024 */
2025
2026 if( map->disable_vmentry_reuse == TRUE) {
2027 VM_MAP_HIGHEST_ENTRY(map, entry, start);
2028 } else {
2029
2030 if (map->holelistenabled) {
2031 hole_entry = (vm_map_entry_t)map->holes_list;
2032
2033 if (hole_entry == NULL) {
2034 /*
2035 * No more space in the map?
2036 */
2037 result = KERN_NO_SPACE;
2038 goto BailOut;
2039 } else {
2040
2041 boolean_t found_hole = FALSE;
2042
2043 do {
2044 if (hole_entry->vme_start >= start) {
2045 start = hole_entry->vme_start;
2046 found_hole = TRUE;
2047 break;
2048 }
2049
2050 if (hole_entry->vme_end > start) {
2051 found_hole = TRUE;
2052 break;
2053 }
2054 hole_entry = hole_entry->vme_next;
2055
2056 } while (hole_entry != (vm_map_entry_t) map->holes_list);
2057
2058 if (found_hole == FALSE) {
2059 result = KERN_NO_SPACE;
2060 goto BailOut;
2061 }
2062
2063 entry = hole_entry;
2064
2065 if (start == 0)
2066 start += PAGE_SIZE_64;
2067 }
2068 } else {
2069 assert(first_free_is_valid(map));
2070
2071 entry = map->first_free;
2072
2073 if (entry == vm_map_to_entry(map)) {
2074 entry = NULL;
2075 } else {
2076 if (entry->vme_next == vm_map_to_entry(map)){
2077 /*
2078 * Hole at the end of the map.
2079 */
2080 entry = NULL;
2081 } else {
2082 if (start < (entry->vme_next)->vme_start ) {
2083 start = entry->vme_end;
2084 start = vm_map_round_page(start,
2085 VM_MAP_PAGE_MASK(map));
2086 } else {
2087 /*
2088 * Need to do a lookup.
2089 */
2090 entry = NULL;
2091 }
2092 }
2093 }
2094
2095 if (entry == NULL) {
2096 vm_map_entry_t tmp_entry;
2097 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2098 assert(!entry_for_jit);
2099 start = tmp_entry->vme_end;
2100 start = vm_map_round_page(start,
2101 VM_MAP_PAGE_MASK(map));
2102 }
2103 entry = tmp_entry;
2104 }
2105 }
2106 }
2107
2108 /*
2109 * In any case, the "entry" always precedes
2110 * the proposed new region throughout the
2111 * loop:
2112 */
2113
2114 while (TRUE) {
2115 register vm_map_entry_t next;
2116
2117 /*
2118 * Find the end of the proposed new region.
2119 * Be sure we didn't go beyond the end, or
2120 * wrap around the address.
2121 */
2122
2123 end = ((start + mask) & ~mask);
2124 end = vm_map_round_page(end,
2125 VM_MAP_PAGE_MASK(map));
2126 if (end < start)
2127 RETURN(KERN_NO_SPACE);
2128 start = end;
2129 assert(VM_MAP_PAGE_ALIGNED(start,
2130 VM_MAP_PAGE_MASK(map)));
2131 end += size;
2132
2133 if ((end > effective_max_offset) || (end < start)) {
2134 if (map->wait_for_space) {
2135 assert(!keep_map_locked);
2136 if (size <= (effective_max_offset -
2137 effective_min_offset)) {
2138 assert_wait((event_t)map,
2139 THREAD_ABORTSAFE);
2140 vm_map_unlock(map);
2141 map_locked = FALSE;
2142 thread_block(THREAD_CONTINUE_NULL);
2143 goto StartAgain;
2144 }
2145 }
2146 RETURN(KERN_NO_SPACE);
2147 }
2148
2149 next = entry->vme_next;
2150
2151 if (map->holelistenabled) {
2152 if (entry->vme_end >= end)
2153 break;
2154 } else {
2155 /*
2156 * If there are no more entries, we must win.
2157 *
2158 * OR
2159 *
2160 * If there is another entry, it must be
2161 * after the end of the potential new region.
2162 */
2163
2164 if (next == vm_map_to_entry(map))
2165 break;
2166
2167 if (next->vme_start >= end)
2168 break;
2169 }
2170
2171 /*
2172 * Didn't fit -- move to the next entry.
2173 */
2174
2175 entry = next;
2176
2177 if (map->holelistenabled) {
2178 if (entry == (vm_map_entry_t) map->holes_list) {
2179 /*
2180 * Wrapped around
2181 */
2182 result = KERN_NO_SPACE;
2183 goto BailOut;
2184 }
2185 start = entry->vme_start;
2186 } else {
2187 start = entry->vme_end;
2188 }
2189
2190 start = vm_map_round_page(start,
2191 VM_MAP_PAGE_MASK(map));
2192 }
2193
2194 if (map->holelistenabled) {
2195 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2196 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2197 }
2198 }
2199
2200 *address = start;
2201 assert(VM_MAP_PAGE_ALIGNED(*address,
2202 VM_MAP_PAGE_MASK(map)));
2203 } else {
2204 /*
2205 * Verify that:
2206 * the address doesn't itself violate
2207 * the mask requirement.
2208 */
2209
2210 vm_map_lock(map);
2211 map_locked = TRUE;
2212 if ((start & mask) != 0)
2213 RETURN(KERN_NO_SPACE);
2214
2215 /*
2216 * ... the address is within bounds
2217 */
2218
2219 end = start + size;
2220
2221 if ((start < effective_min_offset) ||
2222 (end > effective_max_offset) ||
2223 (start >= end)) {
2224 RETURN(KERN_INVALID_ADDRESS);
2225 }
2226
2227 if (overwrite && zap_old_map != VM_MAP_NULL) {
2228 /*
2229 * Fixed mapping and "overwrite" flag: attempt to
2230 * remove all existing mappings in the specified
2231 * address range, saving them in our "zap_old_map".
2232 */
2233 (void) vm_map_delete(map, start, end,
2234 (VM_MAP_REMOVE_SAVE_ENTRIES |
2235 VM_MAP_REMOVE_NO_MAP_ALIGN),
2236 zap_old_map);
2237 }
2238
2239 /*
2240 * ... the starting address isn't allocated
2241 */
2242
2243 if (vm_map_lookup_entry(map, start, &entry)) {
2244 if (! (flags & VM_FLAGS_ALREADY)) {
2245 RETURN(KERN_NO_SPACE);
2246 }
2247 /*
2248 * Check if what's already there is what we want.
2249 */
2250 tmp_start = start;
2251 tmp_offset = offset;
2252 if (entry->vme_start < start) {
2253 tmp_start -= start - entry->vme_start;
2254 tmp_offset -= start - entry->vme_start;
2255
2256 }
2257 for (; entry->vme_start < end;
2258 entry = entry->vme_next) {
2259 /*
2260 * Check if the mapping's attributes
2261 * match the existing map entry.
2262 */
2263 if (entry == vm_map_to_entry(map) ||
2264 entry->vme_start != tmp_start ||
2265 entry->is_sub_map != is_submap ||
2266 VME_OFFSET(entry) != tmp_offset ||
2267 entry->needs_copy != needs_copy ||
2268 entry->protection != cur_protection ||
2269 entry->max_protection != max_protection ||
2270 entry->inheritance != inheritance ||
2271 entry->iokit_acct != iokit_acct ||
2272 VME_ALIAS(entry) != alias) {
2273 /* not the same mapping ! */
2274 RETURN(KERN_NO_SPACE);
2275 }
2276 /*
2277 * Check if the same object is being mapped.
2278 */
2279 if (is_submap) {
2280 if (VME_SUBMAP(entry) !=
2281 (vm_map_t) object) {
2282 /* not the same submap */
2283 RETURN(KERN_NO_SPACE);
2284 }
2285 } else {
2286 if (VME_OBJECT(entry) != object) {
2287 /* not the same VM object... */
2288 vm_object_t obj2;
2289
2290 obj2 = VME_OBJECT(entry);
2291 if ((obj2 == VM_OBJECT_NULL ||
2292 obj2->internal) &&
2293 (object == VM_OBJECT_NULL ||
2294 object->internal)) {
2295 /*
2296 * ... but both are
2297 * anonymous memory,
2298 * so equivalent.
2299 */
2300 } else {
2301 RETURN(KERN_NO_SPACE);
2302 }
2303 }
2304 }
2305
2306 tmp_offset += entry->vme_end - entry->vme_start;
2307 tmp_start += entry->vme_end - entry->vme_start;
2308 if (entry->vme_end >= end) {
2309 /* reached the end of our mapping */
2310 break;
2311 }
2312 }
2313 /* it all matches: let's use what's already there ! */
2314 RETURN(KERN_MEMORY_PRESENT);
2315 }
2316
2317 /*
2318 * ... the next region doesn't overlap the
2319 * end point.
2320 */
2321
2322 if ((entry->vme_next != vm_map_to_entry(map)) &&
2323 (entry->vme_next->vme_start < end))
2324 RETURN(KERN_NO_SPACE);
2325 }
2326
2327 /*
2328 * At this point,
2329 * "start" and "end" should define the endpoints of the
2330 * available new range, and
2331 * "entry" should refer to the region before the new
2332 * range, and
2333 *
2334 * the map should be locked.
2335 */
2336
2337 /*
2338 * See whether we can avoid creating a new entry (and object) by
2339 * extending one of our neighbors. [So far, we only attempt to
2340 * extend from below.] Note that we can never extend/join
2341 * purgable objects because they need to remain distinct
2342 * entities in order to implement their "volatile object"
2343 * semantics.
2344 */
2345
2346 if (purgable || entry_for_jit) {
2347 if (object == VM_OBJECT_NULL) {
2348
2349 object = vm_object_allocate(size);
2350 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2351 object->true_share = TRUE;
2352 if (purgable) {
2353 task_t owner;
2354 object->purgable = VM_PURGABLE_NONVOLATILE;
2355 if (map->pmap == kernel_pmap) {
2356 /*
2357 * Purgeable mappings made in a kernel
2358 * map are "owned" by the kernel itself
2359 * rather than the current user task
2360 * because they're likely to be used by
2361 * more than this user task (see
2362 * execargs_purgeable_allocate(), for
2363 * example).
2364 */
2365 owner = kernel_task;
2366 } else {
2367 owner = current_task();
2368 }
2369 assert(object->vo_purgeable_owner == NULL);
2370 assert(object->resident_page_count == 0);
2371 assert(object->wired_page_count == 0);
2372 vm_object_lock(object);
2373 vm_purgeable_nonvolatile_enqueue(object, owner);
2374 vm_object_unlock(object);
2375 }
2376 offset = (vm_object_offset_t)0;
2377 }
2378 } else if ((is_submap == FALSE) &&
2379 (object == VM_OBJECT_NULL) &&
2380 (entry != vm_map_to_entry(map)) &&
2381 (entry->vme_end == start) &&
2382 (!entry->is_shared) &&
2383 (!entry->is_sub_map) &&
2384 (!entry->in_transition) &&
2385 (!entry->needs_wakeup) &&
2386 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2387 (entry->protection == cur_protection) &&
2388 (entry->max_protection == max_protection) &&
2389 (entry->inheritance == inheritance) &&
2390 ((user_alias == VM_MEMORY_REALLOC) ||
2391 (VME_ALIAS(entry) == alias)) &&
2392 (entry->no_cache == no_cache) &&
2393 (entry->permanent == permanent) &&
2394 (!entry->superpage_size && !superpage_size) &&
2395 /*
2396 * No coalescing if not map-aligned, to avoid propagating
2397 * that condition any further than needed:
2398 */
2399 (!entry->map_aligned || !clear_map_aligned) &&
2400 (!entry->zero_wired_pages) &&
2401 (!entry->used_for_jit && !entry_for_jit) &&
2402 (entry->iokit_acct == iokit_acct) &&
2403 (!entry->vme_resilient_codesign) &&
2404 (!entry->vme_resilient_media) &&
2405
2406 ((entry->vme_end - entry->vme_start) + size <=
2407 (user_alias == VM_MEMORY_REALLOC ?
2408 ANON_CHUNK_SIZE :
2409 NO_COALESCE_LIMIT)) &&
2410
2411 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2412 if (vm_object_coalesce(VME_OBJECT(entry),
2413 VM_OBJECT_NULL,
2414 VME_OFFSET(entry),
2415 (vm_object_offset_t) 0,
2416 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2417 (vm_map_size_t)(end - entry->vme_end))) {
2418
2419 /*
2420 * Coalesced the two objects - can extend
2421 * the previous map entry to include the
2422 * new range.
2423 */
2424 map->size += (end - entry->vme_end);
2425 assert(entry->vme_start < end);
2426 assert(VM_MAP_PAGE_ALIGNED(end,
2427 VM_MAP_PAGE_MASK(map)));
2428 if (__improbable(vm_debug_events))
2429 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2430 entry->vme_end = end;
2431 if (map->holelistenabled) {
2432 vm_map_store_update_first_free(map, entry, TRUE);
2433 } else {
2434 vm_map_store_update_first_free(map, map->first_free, TRUE);
2435 }
2436 new_mapping_established = TRUE;
2437 RETURN(KERN_SUCCESS);
2438 }
2439 }
2440
2441 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2442 new_entry = NULL;
2443
2444 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2445 tmp2_end = tmp2_start + step;
2446 /*
2447 * Create a new entry
2448 * LP64todo - for now, we can only allocate 4GB internal objects
2449 * because the default pager can't page bigger ones. Remove this
2450 * when it can.
2451 *
2452 * XXX FBDP
2453 * The reserved "page zero" in each process's address space can
2454 * be arbitrarily large. Splitting it into separate 4GB objects and
2455 * therefore different VM map entries serves no purpose and just
2456 * slows down operations on the VM map, so let's not split the
2457 * allocation into 4GB chunks if the max protection is NONE. That
2458 * memory should never be accessible, so it will never get to the
2459 * default pager.
2460 */
2461 tmp_start = tmp2_start;
2462 if (object == VM_OBJECT_NULL &&
2463 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2464 max_protection != VM_PROT_NONE &&
2465 superpage_size == 0)
2466 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2467 else
2468 tmp_end = tmp2_end;
2469 do {
2470 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2471 object, offset, needs_copy,
2472 FALSE, FALSE,
2473 cur_protection, max_protection,
2474 VM_BEHAVIOR_DEFAULT,
2475 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2476 0, no_cache,
2477 permanent,
2478 superpage_size,
2479 clear_map_aligned,
2480 is_submap);
2481
2482 assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2483 VME_ALIAS_SET(new_entry, alias);
2484
2485 if (entry_for_jit){
2486 if (!(map->jit_entry_exists)){
2487 new_entry->used_for_jit = TRUE;
2488 map->jit_entry_exists = TRUE;
2489 }
2490 }
2491
2492 if (resilient_codesign &&
2493 ! ((cur_protection | max_protection) &
2494 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2495 new_entry->vme_resilient_codesign = TRUE;
2496 }
2497
2498 if (resilient_media &&
2499 ! ((cur_protection | max_protection) &
2500 (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2501 new_entry->vme_resilient_media = TRUE;
2502 }
2503
2504 assert(!new_entry->iokit_acct);
2505 if (!is_submap &&
2506 object != VM_OBJECT_NULL &&
2507 object->purgable != VM_PURGABLE_DENY) {
2508 assert(new_entry->use_pmap);
2509 assert(!new_entry->iokit_acct);
2510 /*
2511 * Turn off pmap accounting since
2512 * purgeable objects have their
2513 * own ledgers.
2514 */
2515 new_entry->use_pmap = FALSE;
2516 } else if (!is_submap &&
2517 iokit_acct) {
2518 /* alternate accounting */
2519 assert(!new_entry->iokit_acct);
2520 assert(new_entry->use_pmap);
2521 new_entry->iokit_acct = TRUE;
2522 new_entry->use_pmap = FALSE;
2523 vm_map_iokit_mapped_region(
2524 map,
2525 (new_entry->vme_end -
2526 new_entry->vme_start));
2527 } else if (!is_submap) {
2528 assert(!new_entry->iokit_acct);
2529 assert(new_entry->use_pmap);
2530 }
2531
2532 if (is_submap) {
2533 vm_map_t submap;
2534 boolean_t submap_is_64bit;
2535 boolean_t use_pmap;
2536
2537 assert(new_entry->is_sub_map);
2538 assert(!new_entry->use_pmap);
2539 assert(!new_entry->iokit_acct);
2540 submap = (vm_map_t) object;
2541 submap_is_64bit = vm_map_is_64bit(submap);
2542 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2543 #ifndef NO_NESTED_PMAP
2544 if (use_pmap && submap->pmap == NULL) {
2545 ledger_t ledger = map->pmap->ledger;
2546 /* we need a sub pmap to nest... */
2547 submap->pmap = pmap_create(ledger, 0,
2548 submap_is_64bit);
2549 if (submap->pmap == NULL) {
2550 /* let's proceed without nesting... */
2551 }
2552 }
2553 if (use_pmap && submap->pmap != NULL) {
2554 kr = pmap_nest(map->pmap,
2555 submap->pmap,
2556 tmp_start,
2557 tmp_start,
2558 tmp_end - tmp_start);
2559 if (kr != KERN_SUCCESS) {
2560 printf("vm_map_enter: "
2561 "pmap_nest(0x%llx,0x%llx) "
2562 "error 0x%x\n",
2563 (long long)tmp_start,
2564 (long long)tmp_end,
2565 kr);
2566 } else {
2567 /* we're now nested ! */
2568 new_entry->use_pmap = TRUE;
2569 pmap_empty = FALSE;
2570 }
2571 }
2572 #endif /* NO_NESTED_PMAP */
2573 }
2574 entry = new_entry;
2575
2576 if (superpage_size) {
2577 vm_page_t pages, m;
2578 vm_object_t sp_object;
2579
2580 VME_OFFSET_SET(entry, 0);
2581
2582 /* allocate one superpage */
2583 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2584 if (kr != KERN_SUCCESS) {
2585 /* deallocate whole range... */
2586 new_mapping_established = TRUE;
2587 /* ... but only up to "tmp_end" */
2588 size -= end - tmp_end;
2589 RETURN(kr);
2590 }
2591
2592 /* create one vm_object per superpage */
2593 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2594 sp_object->phys_contiguous = TRUE;
2595 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2596 VME_OBJECT_SET(entry, sp_object);
2597 assert(entry->use_pmap);
2598
2599 /* enter the base pages into the object */
2600 vm_object_lock(sp_object);
2601 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2602 m = pages;
2603 pmap_zero_page(m->phys_page);
2604 pages = NEXT_PAGE(m);
2605 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2606 vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2607 }
2608 vm_object_unlock(sp_object);
2609 }
2610 } while (tmp_end != tmp2_end &&
2611 (tmp_start = tmp_end) &&
2612 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2613 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2614 }
2615
2616 new_mapping_established = TRUE;
2617
2618 BailOut:
2619 assert(map_locked == TRUE);
2620
2621 if (result == KERN_SUCCESS) {
2622 vm_prot_t pager_prot;
2623 memory_object_t pager;
2624
2625 #if DEBUG
2626 if (pmap_empty &&
2627 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2628 assert(vm_map_pmap_is_empty(map,
2629 *address,
2630 *address+size));
2631 }
2632 #endif /* DEBUG */
2633
2634 /*
2635 * For "named" VM objects, let the pager know that the
2636 * memory object is being mapped. Some pagers need to keep
2637 * track of this, to know when they can reclaim the memory
2638 * object, for example.
2639 * VM calls memory_object_map() for each mapping (specifying
2640 * the protection of each mapping) and calls
2641 * memory_object_last_unmap() when all the mappings are gone.
2642 */
2643 pager_prot = max_protection;
2644 if (needs_copy) {
2645 /*
2646 * Copy-On-Write mapping: won't modify
2647 * the memory object.
2648 */
2649 pager_prot &= ~VM_PROT_WRITE;
2650 }
2651 if (!is_submap &&
2652 object != VM_OBJECT_NULL &&
2653 object->named &&
2654 object->pager != MEMORY_OBJECT_NULL) {
2655 vm_object_lock(object);
2656 pager = object->pager;
2657 if (object->named &&
2658 pager != MEMORY_OBJECT_NULL) {
2659 assert(object->pager_ready);
2660 vm_object_mapping_wait(object, THREAD_UNINT);
2661 vm_object_mapping_begin(object);
2662 vm_object_unlock(object);
2663
2664 kr = memory_object_map(pager, pager_prot);
2665 assert(kr == KERN_SUCCESS);
2666
2667 vm_object_lock(object);
2668 vm_object_mapping_end(object);
2669 }
2670 vm_object_unlock(object);
2671 }
2672 }
2673
2674 assert(map_locked == TRUE);
2675
2676 if (!keep_map_locked) {
2677 vm_map_unlock(map);
2678 map_locked = FALSE;
2679 }
2680
2681 /*
2682 * We can't hold the map lock if we enter this block.
2683 */
2684
2685 if (result == KERN_SUCCESS) {
2686
2687 /* Wire down the new entry if the user
2688 * requested all new map entries be wired.
2689 */
2690 if ((map->wiring_required)||(superpage_size)) {
2691 assert(!keep_map_locked);
2692 pmap_empty = FALSE; /* pmap won't be empty */
2693 kr = vm_map_wire(map, start, end,
2694 new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2695 TRUE);
2696 result = kr;
2697 }
2698
2699 }
2700
2701 if (result != KERN_SUCCESS) {
2702 if (new_mapping_established) {
2703 /*
2704 * We have to get rid of the new mappings since we
2705 * won't make them available to the user.
2706 * Try and do that atomically, to minimize the risk
2707 * that someone else create new mappings that range.
2708 */
2709 zap_new_map = vm_map_create(PMAP_NULL,
2710 *address,
2711 *address + size,
2712 map->hdr.entries_pageable);
2713 vm_map_set_page_shift(zap_new_map,
2714 VM_MAP_PAGE_SHIFT(map));
2715 vm_map_disable_hole_optimization(zap_new_map);
2716
2717 if (!map_locked) {
2718 vm_map_lock(map);
2719 map_locked = TRUE;
2720 }
2721 (void) vm_map_delete(map, *address, *address+size,
2722 (VM_MAP_REMOVE_SAVE_ENTRIES |
2723 VM_MAP_REMOVE_NO_MAP_ALIGN),
2724 zap_new_map);
2725 }
2726 if (zap_old_map != VM_MAP_NULL &&
2727 zap_old_map->hdr.nentries != 0) {
2728 vm_map_entry_t entry1, entry2;
2729
2730 /*
2731 * The new mapping failed. Attempt to restore
2732 * the old mappings, saved in the "zap_old_map".
2733 */
2734 if (!map_locked) {
2735 vm_map_lock(map);
2736 map_locked = TRUE;
2737 }
2738
2739 /* first check if the coast is still clear */
2740 start = vm_map_first_entry(zap_old_map)->vme_start;
2741 end = vm_map_last_entry(zap_old_map)->vme_end;
2742 if (vm_map_lookup_entry(map, start, &entry1) ||
2743 vm_map_lookup_entry(map, end, &entry2) ||
2744 entry1 != entry2) {
2745 /*
2746 * Part of that range has already been
2747 * re-mapped: we can't restore the old
2748 * mappings...
2749 */
2750 vm_map_enter_restore_failures++;
2751 } else {
2752 /*
2753 * Transfer the saved map entries from
2754 * "zap_old_map" to the original "map",
2755 * inserting them all after "entry1".
2756 */
2757 for (entry2 = vm_map_first_entry(zap_old_map);
2758 entry2 != vm_map_to_entry(zap_old_map);
2759 entry2 = vm_map_first_entry(zap_old_map)) {
2760 vm_map_size_t entry_size;
2761
2762 entry_size = (entry2->vme_end -
2763 entry2->vme_start);
2764 vm_map_store_entry_unlink(zap_old_map,
2765 entry2);
2766 zap_old_map->size -= entry_size;
2767 vm_map_store_entry_link(map, entry1, entry2);
2768 map->size += entry_size;
2769 entry1 = entry2;
2770 }
2771 if (map->wiring_required) {
2772 /*
2773 * XXX TODO: we should rewire the
2774 * old pages here...
2775 */
2776 }
2777 vm_map_enter_restore_successes++;
2778 }
2779 }
2780 }
2781
2782 /*
2783 * The caller is responsible for releasing the lock if it requested to
2784 * keep the map locked.
2785 */
2786 if (map_locked && !keep_map_locked) {
2787 vm_map_unlock(map);
2788 }
2789
2790 /*
2791 * Get rid of the "zap_maps" and all the map entries that
2792 * they may still contain.
2793 */
2794 if (zap_old_map != VM_MAP_NULL) {
2795 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2796 zap_old_map = VM_MAP_NULL;
2797 }
2798 if (zap_new_map != VM_MAP_NULL) {
2799 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2800 zap_new_map = VM_MAP_NULL;
2801 }
2802
2803 return result;
2804
2805 #undef RETURN
2806 }
2807
2808
2809 /*
2810 * Counters for the prefault optimization.
2811 */
2812 int64_t vm_prefault_nb_pages = 0;
2813 int64_t vm_prefault_nb_bailout = 0;
2814
2815 static kern_return_t
2816 vm_map_enter_mem_object_helper(
2817 vm_map_t target_map,
2818 vm_map_offset_t *address,
2819 vm_map_size_t initial_size,
2820 vm_map_offset_t mask,
2821 int flags,
2822 ipc_port_t port,
2823 vm_object_offset_t offset,
2824 boolean_t copy,
2825 vm_prot_t cur_protection,
2826 vm_prot_t max_protection,
2827 vm_inherit_t inheritance,
2828 upl_page_list_ptr_t page_list,
2829 unsigned int page_list_count)
2830 {
2831 vm_map_address_t map_addr;
2832 vm_map_size_t map_size;
2833 vm_object_t object;
2834 vm_object_size_t size;
2835 kern_return_t result;
2836 boolean_t mask_cur_protection, mask_max_protection;
2837 boolean_t try_prefault = (page_list_count != 0);
2838 vm_map_offset_t offset_in_mapping = 0;
2839
2840 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2841 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2842 cur_protection &= ~VM_PROT_IS_MASK;
2843 max_protection &= ~VM_PROT_IS_MASK;
2844
2845 /*
2846 * Check arguments for validity
2847 */
2848 if ((target_map == VM_MAP_NULL) ||
2849 (cur_protection & ~VM_PROT_ALL) ||
2850 (max_protection & ~VM_PROT_ALL) ||
2851 (inheritance > VM_INHERIT_LAST_VALID) ||
2852 (try_prefault && (copy || !page_list)) ||
2853 initial_size == 0) {
2854 return KERN_INVALID_ARGUMENT;
2855 }
2856
2857 {
2858 map_addr = vm_map_trunc_page(*address,
2859 VM_MAP_PAGE_MASK(target_map));
2860 map_size = vm_map_round_page(initial_size,
2861 VM_MAP_PAGE_MASK(target_map));
2862 }
2863 size = vm_object_round_page(initial_size);
2864
2865 /*
2866 * Find the vm object (if any) corresponding to this port.
2867 */
2868 if (!IP_VALID(port)) {
2869 object = VM_OBJECT_NULL;
2870 offset = 0;
2871 copy = FALSE;
2872 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2873 vm_named_entry_t named_entry;
2874
2875 named_entry = (vm_named_entry_t) port->ip_kobject;
2876
2877 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2878 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2879 offset += named_entry->data_offset;
2880 }
2881
2882 /* a few checks to make sure user is obeying rules */
2883 if (size == 0) {
2884 if (offset >= named_entry->size)
2885 return KERN_INVALID_RIGHT;
2886 size = named_entry->size - offset;
2887 }
2888 if (mask_max_protection) {
2889 max_protection &= named_entry->protection;
2890 }
2891 if (mask_cur_protection) {
2892 cur_protection &= named_entry->protection;
2893 }
2894 if ((named_entry->protection & max_protection) !=
2895 max_protection)
2896 return KERN_INVALID_RIGHT;
2897 if ((named_entry->protection & cur_protection) !=
2898 cur_protection)
2899 return KERN_INVALID_RIGHT;
2900 if (offset + size < offset) {
2901 /* overflow */
2902 return KERN_INVALID_ARGUMENT;
2903 }
2904 if (named_entry->size < (offset + initial_size)) {
2905 return KERN_INVALID_ARGUMENT;
2906 }
2907
2908 if (named_entry->is_copy) {
2909 /* for a vm_map_copy, we can only map it whole */
2910 if ((size != named_entry->size) &&
2911 (vm_map_round_page(size,
2912 VM_MAP_PAGE_MASK(target_map)) ==
2913 named_entry->size)) {
2914 /* XXX FBDP use the rounded size... */
2915 size = vm_map_round_page(
2916 size,
2917 VM_MAP_PAGE_MASK(target_map));
2918 }
2919
2920 if (!(flags & VM_FLAGS_ANYWHERE) &&
2921 (offset != 0 ||
2922 size != named_entry->size)) {
2923 /*
2924 * XXX for a mapping at a "fixed" address,
2925 * we can't trim after mapping the whole
2926 * memory entry, so reject a request for a
2927 * partial mapping.
2928 */
2929 return KERN_INVALID_ARGUMENT;
2930 }
2931 }
2932
2933 /* the callers parameter offset is defined to be the */
2934 /* offset from beginning of named entry offset in object */
2935 offset = offset + named_entry->offset;
2936
2937 if (! VM_MAP_PAGE_ALIGNED(size,
2938 VM_MAP_PAGE_MASK(target_map))) {
2939 /*
2940 * Let's not map more than requested;
2941 * vm_map_enter() will handle this "not map-aligned"
2942 * case.
2943 */
2944 map_size = size;
2945 }
2946
2947 named_entry_lock(named_entry);
2948 if (named_entry->is_sub_map) {
2949 vm_map_t submap;
2950
2951 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2952 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2953 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2954 }
2955
2956 submap = named_entry->backing.map;
2957 vm_map_lock(submap);
2958 vm_map_reference(submap);
2959 vm_map_unlock(submap);
2960 named_entry_unlock(named_entry);
2961
2962 result = vm_map_enter(target_map,
2963 &map_addr,
2964 map_size,
2965 mask,
2966 flags | VM_FLAGS_SUBMAP,
2967 (vm_object_t) submap,
2968 offset,
2969 copy,
2970 cur_protection,
2971 max_protection,
2972 inheritance);
2973 if (result != KERN_SUCCESS) {
2974 vm_map_deallocate(submap);
2975 } else {
2976 /*
2977 * No need to lock "submap" just to check its
2978 * "mapped" flag: that flag is never reset
2979 * once it's been set and if we race, we'll
2980 * just end up setting it twice, which is OK.
2981 */
2982 if (submap->mapped_in_other_pmaps == FALSE &&
2983 vm_map_pmap(submap) != PMAP_NULL &&
2984 vm_map_pmap(submap) !=
2985 vm_map_pmap(target_map)) {
2986 /*
2987 * This submap is being mapped in a map
2988 * that uses a different pmap.
2989 * Set its "mapped_in_other_pmaps" flag
2990 * to indicate that we now need to
2991 * remove mappings from all pmaps rather
2992 * than just the submap's pmap.
2993 */
2994 vm_map_lock(submap);
2995 submap->mapped_in_other_pmaps = TRUE;
2996 vm_map_unlock(submap);
2997 }
2998 *address = map_addr;
2999 }
3000 return result;
3001
3002 } else if (named_entry->is_pager) {
3003 unsigned int access;
3004 vm_prot_t protections;
3005 unsigned int wimg_mode;
3006
3007 protections = named_entry->protection & VM_PROT_ALL;
3008 access = GET_MAP_MEM(named_entry->protection);
3009
3010 if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3011 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3012 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3013 }
3014
3015 object = vm_object_enter(named_entry->backing.pager,
3016 named_entry->size,
3017 named_entry->internal,
3018 FALSE,
3019 FALSE);
3020 if (object == VM_OBJECT_NULL) {
3021 named_entry_unlock(named_entry);
3022 return KERN_INVALID_OBJECT;
3023 }
3024
3025 /* JMM - drop reference on pager here */
3026
3027 /* create an extra ref for the named entry */
3028 vm_object_lock(object);
3029 vm_object_reference_locked(object);
3030 named_entry->backing.object = object;
3031 named_entry->is_pager = FALSE;
3032 named_entry_unlock(named_entry);
3033
3034 wimg_mode = object->wimg_bits;
3035
3036 if (access == MAP_MEM_IO) {
3037 wimg_mode = VM_WIMG_IO;
3038 } else if (access == MAP_MEM_COPYBACK) {
3039 wimg_mode = VM_WIMG_USE_DEFAULT;
3040 } else if (access == MAP_MEM_INNERWBACK) {
3041 wimg_mode = VM_WIMG_INNERWBACK;
3042 } else if (access == MAP_MEM_WTHRU) {
3043 wimg_mode = VM_WIMG_WTHRU;
3044 } else if (access == MAP_MEM_WCOMB) {
3045 wimg_mode = VM_WIMG_WCOMB;
3046 }
3047
3048 /* wait for object (if any) to be ready */
3049 if (!named_entry->internal) {
3050 while (!object->pager_ready) {
3051 vm_object_wait(
3052 object,
3053 VM_OBJECT_EVENT_PAGER_READY,
3054 THREAD_UNINT);
3055 vm_object_lock(object);
3056 }
3057 }
3058
3059 if (object->wimg_bits != wimg_mode)
3060 vm_object_change_wimg_mode(object, wimg_mode);
3061
3062 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3063 if (!object->true_share &&
3064 vm_object_tracking_inited) {
3065 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3066 int num = 0;
3067
3068 num = OSBacktrace(bt,
3069 VM_OBJECT_TRACKING_BTDEPTH);
3070 btlog_add_entry(vm_object_tracking_btlog,
3071 object,
3072 VM_OBJECT_TRACKING_OP_TRUESHARE,
3073 bt,
3074 num);
3075 }
3076 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3077
3078 object->true_share = TRUE;
3079
3080 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3081 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3082 vm_object_unlock(object);
3083
3084 } else if (named_entry->is_copy) {
3085 kern_return_t kr;
3086 vm_map_copy_t copy_map;
3087 vm_map_entry_t copy_entry;
3088 vm_map_offset_t copy_addr;
3089
3090 if (flags & ~(VM_FLAGS_FIXED |
3091 VM_FLAGS_ANYWHERE |
3092 VM_FLAGS_OVERWRITE |
3093 VM_FLAGS_RETURN_4K_DATA_ADDR |
3094 VM_FLAGS_RETURN_DATA_ADDR)) {
3095 named_entry_unlock(named_entry);
3096 return KERN_INVALID_ARGUMENT;
3097 }
3098
3099 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3100 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3101 offset_in_mapping = offset - vm_object_trunc_page(offset);
3102 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3103 offset_in_mapping &= ~((signed)(0xFFF));
3104 offset = vm_object_trunc_page(offset);
3105 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3106 }
3107
3108 copy_map = named_entry->backing.copy;
3109 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3110 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3111 /* unsupported type; should not happen */
3112 printf("vm_map_enter_mem_object: "
3113 "memory_entry->backing.copy "
3114 "unsupported type 0x%x\n",
3115 copy_map->type);
3116 named_entry_unlock(named_entry);
3117 return KERN_INVALID_ARGUMENT;
3118 }
3119
3120 /* reserve a contiguous range */
3121 kr = vm_map_enter(target_map,
3122 &map_addr,
3123 /* map whole mem entry, trim later: */
3124 named_entry->size,
3125 mask,
3126 flags & (VM_FLAGS_ANYWHERE |
3127 VM_FLAGS_OVERWRITE |
3128 VM_FLAGS_RETURN_4K_DATA_ADDR |
3129 VM_FLAGS_RETURN_DATA_ADDR),
3130 VM_OBJECT_NULL,
3131 0,
3132 FALSE, /* copy */
3133 cur_protection,
3134 max_protection,
3135 inheritance);
3136 if (kr != KERN_SUCCESS) {
3137 named_entry_unlock(named_entry);
3138 return kr;
3139 }
3140
3141 copy_addr = map_addr;
3142
3143 for (copy_entry = vm_map_copy_first_entry(copy_map);
3144 copy_entry != vm_map_copy_to_entry(copy_map);
3145 copy_entry = copy_entry->vme_next) {
3146 int remap_flags = 0;
3147 vm_map_t copy_submap;
3148 vm_object_t copy_object;
3149 vm_map_size_t copy_size;
3150 vm_object_offset_t copy_offset;
3151
3152 copy_offset = VME_OFFSET(copy_entry);
3153 copy_size = (copy_entry->vme_end -
3154 copy_entry->vme_start);
3155
3156 /* sanity check */
3157 if ((copy_addr + copy_size) >
3158 (map_addr +
3159 named_entry->size /* XXX full size */ )) {
3160 /* over-mapping too much !? */
3161 kr = KERN_INVALID_ARGUMENT;
3162 /* abort */
3163 break;
3164 }
3165
3166 /* take a reference on the object */
3167 if (copy_entry->is_sub_map) {
3168 remap_flags |= VM_FLAGS_SUBMAP;
3169 copy_submap = VME_SUBMAP(copy_entry);
3170 vm_map_lock(copy_submap);
3171 vm_map_reference(copy_submap);
3172 vm_map_unlock(copy_submap);
3173 copy_object = (vm_object_t) copy_submap;
3174 } else {
3175 copy_object = VME_OBJECT(copy_entry);
3176 vm_object_reference(copy_object);
3177 }
3178
3179 /* over-map the object into destination */
3180 remap_flags |= flags;
3181 remap_flags |= VM_FLAGS_FIXED;
3182 remap_flags |= VM_FLAGS_OVERWRITE;
3183 remap_flags &= ~VM_FLAGS_ANYWHERE;
3184 kr = vm_map_enter(target_map,
3185 &copy_addr,
3186 copy_size,
3187 (vm_map_offset_t) 0,
3188 remap_flags,
3189 copy_object,
3190 copy_offset,
3191 copy,
3192 cur_protection,
3193 max_protection,
3194 inheritance);
3195 if (kr != KERN_SUCCESS) {
3196 if (copy_entry->is_sub_map) {
3197 vm_map_deallocate(copy_submap);
3198 } else {
3199 vm_object_deallocate(copy_object);
3200 }
3201 /* abort */
3202 break;
3203 }
3204
3205 /* next mapping */
3206 copy_addr += copy_size;
3207 }
3208
3209 if (kr == KERN_SUCCESS) {
3210 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3211 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3212 *address = map_addr + offset_in_mapping;
3213 } else {
3214 *address = map_addr;
3215 }
3216
3217 if (offset) {
3218 /*
3219 * Trim in front, from 0 to "offset".
3220 */
3221 vm_map_remove(target_map,
3222 map_addr,
3223 map_addr + offset,
3224 0);
3225 *address += offset;
3226 }
3227 if (offset + map_size < named_entry->size) {
3228 /*
3229 * Trim in back, from
3230 * "offset + map_size" to
3231 * "named_entry->size".
3232 */
3233 vm_map_remove(target_map,
3234 (map_addr +
3235 offset + map_size),
3236 (map_addr +
3237 named_entry->size),
3238 0);
3239 }
3240 }
3241 named_entry_unlock(named_entry);
3242
3243 if (kr != KERN_SUCCESS) {
3244 if (! (flags & VM_FLAGS_OVERWRITE)) {
3245 /* deallocate the contiguous range */
3246 (void) vm_deallocate(target_map,
3247 map_addr,
3248 map_size);
3249 }
3250 }
3251
3252 return kr;
3253
3254 } else {
3255 /* This is the case where we are going to map */
3256 /* an already mapped object. If the object is */
3257 /* not ready it is internal. An external */
3258 /* object cannot be mapped until it is ready */
3259 /* we can therefore avoid the ready check */
3260 /* in this case. */
3261 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3262 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3263 offset_in_mapping = offset - vm_object_trunc_page(offset);
3264 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3265 offset_in_mapping &= ~((signed)(0xFFF));
3266 offset = vm_object_trunc_page(offset);
3267 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3268 }
3269
3270 object = named_entry->backing.object;
3271 assert(object != VM_OBJECT_NULL);
3272 named_entry_unlock(named_entry);
3273 vm_object_reference(object);
3274 }
3275 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3276 /*
3277 * JMM - This is temporary until we unify named entries
3278 * and raw memory objects.
3279 *
3280 * Detected fake ip_kotype for a memory object. In
3281 * this case, the port isn't really a port at all, but
3282 * instead is just a raw memory object.
3283 */
3284 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3285 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3286 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3287 }
3288
3289 object = vm_object_enter((memory_object_t)port,
3290 size, FALSE, FALSE, FALSE);
3291 if (object == VM_OBJECT_NULL)
3292 return KERN_INVALID_OBJECT;
3293
3294 /* wait for object (if any) to be ready */
3295 if (object != VM_OBJECT_NULL) {
3296 if (object == kernel_object) {
3297 printf("Warning: Attempt to map kernel object"
3298 " by a non-private kernel entity\n");
3299 return KERN_INVALID_OBJECT;
3300 }
3301 if (!object->pager_ready) {
3302 vm_object_lock(object);
3303
3304 while (!object->pager_ready) {
3305 vm_object_wait(object,
3306 VM_OBJECT_EVENT_PAGER_READY,
3307 THREAD_UNINT);
3308 vm_object_lock(object);
3309 }
3310 vm_object_unlock(object);
3311 }
3312 }
3313 } else {
3314 return KERN_INVALID_OBJECT;
3315 }
3316
3317 if (object != VM_OBJECT_NULL &&
3318 object->named &&
3319 object->pager != MEMORY_OBJECT_NULL &&
3320 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3321 memory_object_t pager;
3322 vm_prot_t pager_prot;
3323 kern_return_t kr;
3324
3325 /*
3326 * For "named" VM objects, let the pager know that the
3327 * memory object is being mapped. Some pagers need to keep
3328 * track of this, to know when they can reclaim the memory
3329 * object, for example.
3330 * VM calls memory_object_map() for each mapping (specifying
3331 * the protection of each mapping) and calls
3332 * memory_object_last_unmap() when all the mappings are gone.
3333 */
3334 pager_prot = max_protection;
3335 if (copy) {
3336 /*
3337 * Copy-On-Write mapping: won't modify the
3338 * memory object.
3339 */
3340 pager_prot &= ~VM_PROT_WRITE;
3341 }
3342 vm_object_lock(object);
3343 pager = object->pager;
3344 if (object->named &&
3345 pager != MEMORY_OBJECT_NULL &&
3346 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3347 assert(object->pager_ready);
3348 vm_object_mapping_wait(object, THREAD_UNINT);
3349 vm_object_mapping_begin(object);
3350 vm_object_unlock(object);
3351
3352 kr = memory_object_map(pager, pager_prot);
3353 assert(kr == KERN_SUCCESS);
3354
3355 vm_object_lock(object);
3356 vm_object_mapping_end(object);
3357 }
3358 vm_object_unlock(object);
3359 }
3360
3361 /*
3362 * Perform the copy if requested
3363 */
3364
3365 if (copy) {
3366 vm_object_t new_object;
3367 vm_object_offset_t new_offset;
3368
3369 result = vm_object_copy_strategically(object, offset,
3370 map_size,
3371 &new_object, &new_offset,
3372 &copy);
3373
3374
3375 if (result == KERN_MEMORY_RESTART_COPY) {
3376 boolean_t success;
3377 boolean_t src_needs_copy;
3378
3379 /*
3380 * XXX
3381 * We currently ignore src_needs_copy.
3382 * This really is the issue of how to make
3383 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3384 * non-kernel users to use. Solution forthcoming.
3385 * In the meantime, since we don't allow non-kernel
3386 * memory managers to specify symmetric copy,
3387 * we won't run into problems here.
3388 */
3389 new_object = object;
3390 new_offset = offset;
3391 success = vm_object_copy_quickly(&new_object,
3392 new_offset,
3393 map_size,
3394 &src_needs_copy,
3395 &copy);
3396 assert(success);
3397 result = KERN_SUCCESS;
3398 }
3399 /*
3400 * Throw away the reference to the
3401 * original object, as it won't be mapped.
3402 */
3403
3404 vm_object_deallocate(object);
3405
3406 if (result != KERN_SUCCESS) {
3407 return result;
3408 }
3409
3410 object = new_object;
3411 offset = new_offset;
3412 }
3413
3414 /*
3415 * If users want to try to prefault pages, the mapping and prefault
3416 * needs to be atomic.
3417 */
3418 if (try_prefault)
3419 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3420
3421 {
3422 result = vm_map_enter(target_map,
3423 &map_addr, map_size,
3424 (vm_map_offset_t)mask,
3425 flags,
3426 object, offset,
3427 copy,
3428 cur_protection, max_protection,
3429 inheritance);
3430 }
3431 if (result != KERN_SUCCESS)
3432 vm_object_deallocate(object);
3433
3434 /*
3435 * Try to prefault, and do not forget to release the vm map lock.
3436 */
3437 if (result == KERN_SUCCESS && try_prefault) {
3438 mach_vm_address_t va = map_addr;
3439 kern_return_t kr = KERN_SUCCESS;
3440 unsigned int i = 0;
3441
3442 for (i = 0; i < page_list_count; ++i) {
3443 if (UPL_VALID_PAGE(page_list, i)) {
3444 /*
3445 * If this function call failed, we should stop
3446 * trying to optimize, other calls are likely
3447 * going to fail too.
3448 *
3449 * We are not gonna report an error for such
3450 * failure though. That's an optimization, not
3451 * something critical.
3452 */
3453 kr = pmap_enter_options(target_map->pmap,
3454 va, UPL_PHYS_PAGE(page_list, i),
3455 cur_protection, VM_PROT_NONE,
3456 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3457 if (kr != KERN_SUCCESS) {
3458 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3459 break;
3460 }
3461 OSIncrementAtomic64(&vm_prefault_nb_pages);
3462 }
3463
3464 /* Next virtual address */
3465 va += PAGE_SIZE;
3466 }
3467 vm_map_unlock(target_map);
3468 }
3469
3470 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3471 VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3472 *address = map_addr + offset_in_mapping;
3473 } else {
3474 *address = map_addr;
3475 }
3476 return result;
3477 }
3478
3479 kern_return_t
3480 vm_map_enter_mem_object(
3481 vm_map_t target_map,
3482 vm_map_offset_t *address,
3483 vm_map_size_t initial_size,
3484 vm_map_offset_t mask,
3485 int flags,
3486 ipc_port_t port,
3487 vm_object_offset_t offset,
3488 boolean_t copy,
3489 vm_prot_t cur_protection,
3490 vm_prot_t max_protection,
3491 vm_inherit_t inheritance)
3492 {
3493 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3494 port, offset, copy, cur_protection, max_protection,
3495 inheritance, NULL, 0);
3496 }
3497
3498 kern_return_t
3499 vm_map_enter_mem_object_prefault(
3500 vm_map_t target_map,
3501 vm_map_offset_t *address,
3502 vm_map_size_t initial_size,
3503 vm_map_offset_t mask,
3504 int flags,
3505 ipc_port_t port,
3506 vm_object_offset_t offset,
3507 vm_prot_t cur_protection,
3508 vm_prot_t max_protection,
3509 upl_page_list_ptr_t page_list,
3510 unsigned int page_list_count)
3511 {
3512 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3513 port, offset, FALSE, cur_protection, max_protection,
3514 VM_INHERIT_DEFAULT, page_list, page_list_count);
3515 }
3516
3517
3518 kern_return_t
3519 vm_map_enter_mem_object_control(
3520 vm_map_t target_map,
3521 vm_map_offset_t *address,
3522 vm_map_size_t initial_size,
3523 vm_map_offset_t mask,
3524 int flags,
3525 memory_object_control_t control,
3526 vm_object_offset_t offset,
3527 boolean_t copy,
3528 vm_prot_t cur_protection,
3529 vm_prot_t max_protection,
3530 vm_inherit_t inheritance)
3531 {
3532 vm_map_address_t map_addr;
3533 vm_map_size_t map_size;
3534 vm_object_t object;
3535 vm_object_size_t size;
3536 kern_return_t result;
3537 memory_object_t pager;
3538 vm_prot_t pager_prot;
3539 kern_return_t kr;
3540
3541 /*
3542 * Check arguments for validity
3543 */
3544 if ((target_map == VM_MAP_NULL) ||
3545 (cur_protection & ~VM_PROT_ALL) ||
3546 (max_protection & ~VM_PROT_ALL) ||
3547 (inheritance > VM_INHERIT_LAST_VALID) ||
3548 initial_size == 0) {
3549 return KERN_INVALID_ARGUMENT;
3550 }
3551
3552 {
3553 map_addr = vm_map_trunc_page(*address,
3554 VM_MAP_PAGE_MASK(target_map));
3555 map_size = vm_map_round_page(initial_size,
3556 VM_MAP_PAGE_MASK(target_map));
3557 }
3558 size = vm_object_round_page(initial_size);
3559
3560 object = memory_object_control_to_vm_object(control);
3561
3562 if (object == VM_OBJECT_NULL)
3563 return KERN_INVALID_OBJECT;
3564
3565 if (object == kernel_object) {
3566 printf("Warning: Attempt to map kernel object"
3567 " by a non-private kernel entity\n");
3568 return KERN_INVALID_OBJECT;
3569 }
3570
3571 vm_object_lock(object);
3572 object->ref_count++;
3573 vm_object_res_reference(object);
3574
3575 /*
3576 * For "named" VM objects, let the pager know that the
3577 * memory object is being mapped. Some pagers need to keep
3578 * track of this, to know when they can reclaim the memory
3579 * object, for example.
3580 * VM calls memory_object_map() for each mapping (specifying
3581 * the protection of each mapping) and calls
3582 * memory_object_last_unmap() when all the mappings are gone.
3583 */
3584 pager_prot = max_protection;
3585 if (copy) {
3586 pager_prot &= ~VM_PROT_WRITE;
3587 }
3588 pager = object->pager;
3589 if (object->named &&
3590 pager != MEMORY_OBJECT_NULL &&
3591 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3592 assert(object->pager_ready);
3593 vm_object_mapping_wait(object, THREAD_UNINT);
3594 vm_object_mapping_begin(object);
3595 vm_object_unlock(object);
3596
3597 kr = memory_object_map(pager, pager_prot);
3598 assert(kr == KERN_SUCCESS);
3599
3600 vm_object_lock(object);
3601 vm_object_mapping_end(object);
3602 }
3603 vm_object_unlock(object);
3604
3605 /*
3606 * Perform the copy if requested
3607 */
3608
3609 if (copy) {
3610 vm_object_t new_object;
3611 vm_object_offset_t new_offset;
3612
3613 result = vm_object_copy_strategically(object, offset, size,
3614 &new_object, &new_offset,
3615 &copy);
3616
3617
3618 if (result == KERN_MEMORY_RESTART_COPY) {
3619 boolean_t success;
3620 boolean_t src_needs_copy;
3621
3622 /*
3623 * XXX
3624 * We currently ignore src_needs_copy.
3625 * This really is the issue of how to make
3626 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3627 * non-kernel users to use. Solution forthcoming.
3628 * In the meantime, since we don't allow non-kernel
3629 * memory managers to specify symmetric copy,
3630 * we won't run into problems here.
3631 */
3632 new_object = object;
3633 new_offset = offset;
3634 success = vm_object_copy_quickly(&new_object,
3635 new_offset, size,
3636 &src_needs_copy,
3637 &copy);
3638 assert(success);
3639 result = KERN_SUCCESS;
3640 }
3641 /*
3642 * Throw away the reference to the
3643 * original object, as it won't be mapped.
3644 */
3645
3646 vm_object_deallocate(object);
3647
3648 if (result != KERN_SUCCESS) {
3649 return result;
3650 }
3651
3652 object = new_object;
3653 offset = new_offset;
3654 }
3655
3656 {
3657 result = vm_map_enter(target_map,
3658 &map_addr, map_size,
3659 (vm_map_offset_t)mask,
3660 flags,
3661 object, offset,
3662 copy,
3663 cur_protection, max_protection,
3664 inheritance);
3665 }
3666 if (result != KERN_SUCCESS)
3667 vm_object_deallocate(object);
3668 *address = map_addr;
3669
3670 return result;
3671 }
3672
3673
3674 #if VM_CPM
3675
3676 #ifdef MACH_ASSERT
3677 extern pmap_paddr_t avail_start, avail_end;
3678 #endif
3679
3680 /*
3681 * Allocate memory in the specified map, with the caveat that
3682 * the memory is physically contiguous. This call may fail
3683 * if the system can't find sufficient contiguous memory.
3684 * This call may cause or lead to heart-stopping amounts of
3685 * paging activity.
3686 *
3687 * Memory obtained from this call should be freed in the
3688 * normal way, viz., via vm_deallocate.
3689 */
3690 kern_return_t
3691 vm_map_enter_cpm(
3692 vm_map_t map,
3693 vm_map_offset_t *addr,
3694 vm_map_size_t size,
3695 int flags)
3696 {
3697 vm_object_t cpm_obj;
3698 pmap_t pmap;
3699 vm_page_t m, pages;
3700 kern_return_t kr;
3701 vm_map_offset_t va, start, end, offset;
3702 #if MACH_ASSERT
3703 vm_map_offset_t prev_addr = 0;
3704 #endif /* MACH_ASSERT */
3705
3706 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3707 vm_tag_t tag;
3708
3709 VM_GET_FLAGS_ALIAS(flags, tag);
3710
3711 if (size == 0) {
3712 *addr = 0;
3713 return KERN_SUCCESS;
3714 }
3715 if (anywhere)
3716 *addr = vm_map_min(map);
3717 else
3718 *addr = vm_map_trunc_page(*addr,
3719 VM_MAP_PAGE_MASK(map));
3720 size = vm_map_round_page(size,
3721 VM_MAP_PAGE_MASK(map));
3722
3723 /*
3724 * LP64todo - cpm_allocate should probably allow
3725 * allocations of >4GB, but not with the current
3726 * algorithm, so just cast down the size for now.
3727 */
3728 if (size > VM_MAX_ADDRESS)
3729 return KERN_RESOURCE_SHORTAGE;
3730 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3731 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3732 return kr;
3733
3734 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3735 assert(cpm_obj != VM_OBJECT_NULL);
3736 assert(cpm_obj->internal);
3737 assert(cpm_obj->vo_size == (vm_object_size_t)size);
3738 assert(cpm_obj->can_persist == FALSE);
3739 assert(cpm_obj->pager_created == FALSE);
3740 assert(cpm_obj->pageout == FALSE);
3741 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3742
3743 /*
3744 * Insert pages into object.
3745 */
3746
3747 vm_object_lock(cpm_obj);
3748 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3749 m = pages;
3750 pages = NEXT_PAGE(m);
3751 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3752
3753 assert(!m->gobbled);
3754 assert(!m->wanted);
3755 assert(!m->pageout);
3756 assert(!m->tabled);
3757 assert(VM_PAGE_WIRED(m));
3758 /*
3759 * ENCRYPTED SWAP:
3760 * "m" is not supposed to be pageable, so it
3761 * should not be encrypted. It wouldn't be safe
3762 * to enter it in a new VM object while encrypted.
3763 */
3764 ASSERT_PAGE_DECRYPTED(m);
3765 assert(m->busy);
3766 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3767
3768 m->busy = FALSE;
3769 vm_page_insert(m, cpm_obj, offset);
3770 }
3771 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3772 vm_object_unlock(cpm_obj);
3773
3774 /*
3775 * Hang onto a reference on the object in case a
3776 * multi-threaded application for some reason decides
3777 * to deallocate the portion of the address space into
3778 * which we will insert this object.
3779 *
3780 * Unfortunately, we must insert the object now before
3781 * we can talk to the pmap module about which addresses
3782 * must be wired down. Hence, the race with a multi-
3783 * threaded app.
3784 */
3785 vm_object_reference(cpm_obj);
3786
3787 /*
3788 * Insert object into map.
3789 */
3790
3791 kr = vm_map_enter(
3792 map,
3793 addr,
3794 size,
3795 (vm_map_offset_t)0,
3796 flags,
3797 cpm_obj,
3798 (vm_object_offset_t)0,
3799 FALSE,
3800 VM_PROT_ALL,
3801 VM_PROT_ALL,
3802 VM_INHERIT_DEFAULT);
3803
3804 if (kr != KERN_SUCCESS) {
3805 /*
3806 * A CPM object doesn't have can_persist set,
3807 * so all we have to do is deallocate it to
3808 * free up these pages.
3809 */
3810 assert(cpm_obj->pager_created == FALSE);
3811 assert(cpm_obj->can_persist == FALSE);
3812 assert(cpm_obj->pageout == FALSE);
3813 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3814 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3815 vm_object_deallocate(cpm_obj); /* kill creation ref */
3816 }
3817
3818 /*
3819 * Inform the physical mapping system that the
3820 * range of addresses may not fault, so that
3821 * page tables and such can be locked down as well.
3822 */
3823 start = *addr;
3824 end = start + size;
3825 pmap = vm_map_pmap(map);
3826 pmap_pageable(pmap, start, end, FALSE);
3827
3828 /*
3829 * Enter each page into the pmap, to avoid faults.
3830 * Note that this loop could be coded more efficiently,
3831 * if the need arose, rather than looking up each page
3832 * again.
3833 */
3834 for (offset = 0, va = start; offset < size;
3835 va += PAGE_SIZE, offset += PAGE_SIZE) {
3836 int type_of_fault;
3837
3838 vm_object_lock(cpm_obj);
3839 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3840 assert(m != VM_PAGE_NULL);
3841
3842 vm_page_zero_fill(m);
3843
3844 type_of_fault = DBG_ZERO_FILL_FAULT;
3845
3846 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3847 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3848 &type_of_fault);
3849
3850 vm_object_unlock(cpm_obj);
3851 }
3852
3853 #if MACH_ASSERT
3854 /*
3855 * Verify ordering in address space.
3856 */
3857 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3858 vm_object_lock(cpm_obj);
3859 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3860 vm_object_unlock(cpm_obj);
3861 if (m == VM_PAGE_NULL)
3862 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3863 cpm_obj, (uint64_t)offset);
3864 assert(m->tabled);
3865 assert(!m->busy);
3866 assert(!m->wanted);
3867 assert(!m->fictitious);
3868 assert(!m->private);
3869 assert(!m->absent);
3870 assert(!m->error);
3871 assert(!m->cleaning);
3872 assert(!m->laundry);
3873 assert(!m->precious);
3874 assert(!m->clustered);
3875 if (offset != 0) {
3876 if (m->phys_page != prev_addr + 1) {
3877 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3878 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3879 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3880 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3881 panic("vm_allocate_cpm: pages not contig!");
3882 }
3883 }
3884 prev_addr = m->phys_page;
3885 }
3886 #endif /* MACH_ASSERT */
3887
3888 vm_object_deallocate(cpm_obj); /* kill extra ref */
3889
3890 return kr;
3891 }
3892
3893
3894 #else /* VM_CPM */
3895
3896 /*
3897 * Interface is defined in all cases, but unless the kernel
3898 * is built explicitly for this option, the interface does
3899 * nothing.
3900 */
3901
3902 kern_return_t
3903 vm_map_enter_cpm(
3904 __unused vm_map_t map,
3905 __unused vm_map_offset_t *addr,
3906 __unused vm_map_size_t size,
3907 __unused int flags)
3908 {
3909 return KERN_FAILURE;
3910 }
3911 #endif /* VM_CPM */
3912
3913 /* Not used without nested pmaps */
3914 #ifndef NO_NESTED_PMAP
3915 /*
3916 * Clip and unnest a portion of a nested submap mapping.
3917 */
3918
3919
3920 static void
3921 vm_map_clip_unnest(
3922 vm_map_t map,
3923 vm_map_entry_t entry,
3924 vm_map_offset_t start_unnest,
3925 vm_map_offset_t end_unnest)
3926 {
3927 vm_map_offset_t old_start_unnest = start_unnest;
3928 vm_map_offset_t old_end_unnest = end_unnest;
3929
3930 assert(entry->is_sub_map);
3931 assert(VME_SUBMAP(entry) != NULL);
3932 assert(entry->use_pmap);
3933
3934 /*
3935 * Query the platform for the optimal unnest range.
3936 * DRK: There's some duplication of effort here, since
3937 * callers may have adjusted the range to some extent. This
3938 * routine was introduced to support 1GiB subtree nesting
3939 * for x86 platforms, which can also nest on 2MiB boundaries
3940 * depending on size/alignment.
3941 */
3942 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3943 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3944 }
3945
3946 if (entry->vme_start > start_unnest ||
3947 entry->vme_end < end_unnest) {
3948 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3949 "bad nested entry: start=0x%llx end=0x%llx\n",
3950 (long long)start_unnest, (long long)end_unnest,
3951 (long long)entry->vme_start, (long long)entry->vme_end);
3952 }
3953
3954 if (start_unnest > entry->vme_start) {
3955 _vm_map_clip_start(&map->hdr,
3956 entry,
3957 start_unnest);
3958 if (map->holelistenabled) {
3959 vm_map_store_update_first_free(map, NULL, FALSE);
3960 } else {
3961 vm_map_store_update_first_free(map, map->first_free, FALSE);
3962 }
3963 }
3964 if (entry->vme_end > end_unnest) {
3965 _vm_map_clip_end(&map->hdr,
3966 entry,
3967 end_unnest);
3968 if (map->holelistenabled) {
3969 vm_map_store_update_first_free(map, NULL, FALSE);
3970 } else {
3971 vm_map_store_update_first_free(map, map->first_free, FALSE);
3972 }
3973 }
3974
3975 pmap_unnest(map->pmap,
3976 entry->vme_start,
3977 entry->vme_end - entry->vme_start);
3978 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3979 /* clean up parent map/maps */
3980 vm_map_submap_pmap_clean(
3981 map, entry->vme_start,
3982 entry->vme_end,
3983 VME_SUBMAP(entry),
3984 VME_OFFSET(entry));
3985 }
3986 entry->use_pmap = FALSE;
3987 if ((map->pmap != kernel_pmap) &&
3988 (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
3989 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
3990 }
3991 }
3992 #endif /* NO_NESTED_PMAP */
3993
3994 /*
3995 * vm_map_clip_start: [ internal use only ]
3996 *
3997 * Asserts that the given entry begins at or after
3998 * the specified address; if necessary,
3999 * it splits the entry into two.
4000 */
4001 void
4002 vm_map_clip_start(
4003 vm_map_t map,
4004 vm_map_entry_t entry,
4005 vm_map_offset_t startaddr)
4006 {
4007 #ifndef NO_NESTED_PMAP
4008 if (entry->is_sub_map &&
4009 entry->use_pmap &&
4010 startaddr >= entry->vme_start) {
4011 vm_map_offset_t start_unnest, end_unnest;
4012
4013 /*
4014 * Make sure "startaddr" is no longer in a nested range
4015 * before we clip. Unnest only the minimum range the platform
4016 * can handle.
4017 * vm_map_clip_unnest may perform additional adjustments to
4018 * the unnest range.
4019 */
4020 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4021 end_unnest = start_unnest + pmap_nesting_size_min;
4022 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4023 }
4024 #endif /* NO_NESTED_PMAP */
4025 if (startaddr > entry->vme_start) {
4026 if (VME_OBJECT(entry) &&
4027 !entry->is_sub_map &&
4028 VME_OBJECT(entry)->phys_contiguous) {
4029 pmap_remove(map->pmap,
4030 (addr64_t)(entry->vme_start),
4031 (addr64_t)(entry->vme_end));
4032 }
4033 _vm_map_clip_start(&map->hdr, entry, startaddr);
4034 if (map->holelistenabled) {
4035 vm_map_store_update_first_free(map, NULL, FALSE);
4036 } else {
4037 vm_map_store_update_first_free(map, map->first_free, FALSE);
4038 }
4039 }
4040 }
4041
4042
4043 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4044 MACRO_BEGIN \
4045 if ((startaddr) > (entry)->vme_start) \
4046 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4047 MACRO_END
4048
4049 /*
4050 * This routine is called only when it is known that
4051 * the entry must be split.
4052 */
4053 static void
4054 _vm_map_clip_start(
4055 register struct vm_map_header *map_header,
4056 register vm_map_entry_t entry,
4057 register vm_map_offset_t start)
4058 {
4059 register vm_map_entry_t new_entry;
4060
4061 /*
4062 * Split off the front portion --
4063 * note that we must insert the new
4064 * entry BEFORE this one, so that
4065 * this entry has the specified starting
4066 * address.
4067 */
4068
4069 if (entry->map_aligned) {
4070 assert(VM_MAP_PAGE_ALIGNED(start,
4071 VM_MAP_HDR_PAGE_MASK(map_header)));
4072 }
4073
4074 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4075 vm_map_entry_copy_full(new_entry, entry);
4076
4077 new_entry->vme_end = start;
4078 assert(new_entry->vme_start < new_entry->vme_end);
4079 VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4080 assert(start < entry->vme_end);
4081 entry->vme_start = start;
4082
4083 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4084
4085 if (entry->is_sub_map)
4086 vm_map_reference(VME_SUBMAP(new_entry));
4087 else
4088 vm_object_reference(VME_OBJECT(new_entry));
4089 }
4090
4091
4092 /*
4093 * vm_map_clip_end: [ internal use only ]
4094 *
4095 * Asserts that the given entry ends at or before
4096 * the specified address; if necessary,
4097 * it splits the entry into two.
4098 */
4099 void
4100 vm_map_clip_end(
4101 vm_map_t map,
4102 vm_map_entry_t entry,
4103 vm_map_offset_t endaddr)
4104 {
4105 if (endaddr > entry->vme_end) {
4106 /*
4107 * Within the scope of this clipping, limit "endaddr" to
4108 * the end of this map entry...
4109 */
4110 endaddr = entry->vme_end;
4111 }
4112 #ifndef NO_NESTED_PMAP
4113 if (entry->is_sub_map && entry->use_pmap) {
4114 vm_map_offset_t start_unnest, end_unnest;
4115
4116 /*
4117 * Make sure the range between the start of this entry and
4118 * the new "endaddr" is no longer nested before we clip.
4119 * Unnest only the minimum range the platform can handle.
4120 * vm_map_clip_unnest may perform additional adjustments to
4121 * the unnest range.
4122 */
4123 start_unnest = entry->vme_start;
4124 end_unnest =
4125 (endaddr + pmap_nesting_size_min - 1) &
4126 ~(pmap_nesting_size_min - 1);
4127 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4128 }
4129 #endif /* NO_NESTED_PMAP */
4130 if (endaddr < entry->vme_end) {
4131 if (VME_OBJECT(entry) &&
4132 !entry->is_sub_map &&
4133 VME_OBJECT(entry)->phys_contiguous) {
4134 pmap_remove(map->pmap,
4135 (addr64_t)(entry->vme_start),
4136 (addr64_t)(entry->vme_end));
4137 }
4138 _vm_map_clip_end(&map->hdr, entry, endaddr);
4139 if (map->holelistenabled) {
4140 vm_map_store_update_first_free(map, NULL, FALSE);
4141 } else {
4142 vm_map_store_update_first_free(map, map->first_free, FALSE);
4143 }
4144 }
4145 }
4146
4147
4148 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4149 MACRO_BEGIN \
4150 if ((endaddr) < (entry)->vme_end) \
4151 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4152 MACRO_END
4153
4154 /*
4155 * This routine is called only when it is known that
4156 * the entry must be split.
4157 */
4158 static void
4159 _vm_map_clip_end(
4160 register struct vm_map_header *map_header,
4161 register vm_map_entry_t entry,
4162 register vm_map_offset_t end)
4163 {
4164 register vm_map_entry_t new_entry;
4165
4166 /*
4167 * Create a new entry and insert it
4168 * AFTER the specified entry
4169 */
4170
4171 if (entry->map_aligned) {
4172 assert(VM_MAP_PAGE_ALIGNED(end,
4173 VM_MAP_HDR_PAGE_MASK(map_header)));
4174 }
4175
4176 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4177 vm_map_entry_copy_full(new_entry, entry);
4178
4179 assert(entry->vme_start < end);
4180 new_entry->vme_start = entry->vme_end = end;
4181 VME_OFFSET_SET(new_entry,
4182 VME_OFFSET(new_entry) + (end - entry->vme_start));
4183 assert(new_entry->vme_start < new_entry->vme_end);
4184
4185 _vm_map_store_entry_link(map_header, entry, new_entry);
4186
4187 if (entry->is_sub_map)
4188 vm_map_reference(VME_SUBMAP(new_entry));
4189 else
4190 vm_object_reference(VME_OBJECT(new_entry));
4191 }
4192
4193
4194 /*
4195 * VM_MAP_RANGE_CHECK: [ internal use only ]
4196 *
4197 * Asserts that the starting and ending region
4198 * addresses fall within the valid range of the map.
4199 */
4200 #define VM_MAP_RANGE_CHECK(map, start, end) \
4201 MACRO_BEGIN \
4202 if (start < vm_map_min(map)) \
4203 start = vm_map_min(map); \
4204 if (end > vm_map_max(map)) \
4205 end = vm_map_max(map); \
4206 if (start > end) \
4207 start = end; \
4208 MACRO_END
4209
4210 /*
4211 * vm_map_range_check: [ internal use only ]
4212 *
4213 * Check that the region defined by the specified start and
4214 * end addresses are wholly contained within a single map
4215 * entry or set of adjacent map entries of the spacified map,
4216 * i.e. the specified region contains no unmapped space.
4217 * If any or all of the region is unmapped, FALSE is returned.
4218 * Otherwise, TRUE is returned and if the output argument 'entry'
4219 * is not NULL it points to the map entry containing the start
4220 * of the region.
4221 *
4222 * The map is locked for reading on entry and is left locked.
4223 */
4224 static boolean_t
4225 vm_map_range_check(
4226 register vm_map_t map,
4227 register vm_map_offset_t start,
4228 register vm_map_offset_t end,
4229 vm_map_entry_t *entry)
4230 {
4231 vm_map_entry_t cur;
4232 register vm_map_offset_t prev;
4233
4234 /*
4235 * Basic sanity checks first
4236 */
4237 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4238 return (FALSE);
4239
4240 /*
4241 * Check first if the region starts within a valid
4242 * mapping for the map.
4243 */
4244 if (!vm_map_lookup_entry(map, start, &cur))
4245 return (FALSE);
4246
4247 /*
4248 * Optimize for the case that the region is contained
4249 * in a single map entry.
4250 */
4251 if (entry != (vm_map_entry_t *) NULL)
4252 *entry = cur;
4253 if (end <= cur->vme_end)
4254 return (TRUE);
4255
4256 /*
4257 * If the region is not wholly contained within a
4258 * single entry, walk the entries looking for holes.
4259 */
4260 prev = cur->vme_end;
4261 cur = cur->vme_next;
4262 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4263 if (end <= cur->vme_end)
4264 return (TRUE);
4265 prev = cur->vme_end;
4266 cur = cur->vme_next;
4267 }
4268 return (FALSE);
4269 }
4270
4271 /*
4272 * vm_map_submap: [ kernel use only ]
4273 *
4274 * Mark the given range as handled by a subordinate map.
4275 *
4276 * This range must have been created with vm_map_find using
4277 * the vm_submap_object, and no other operations may have been
4278 * performed on this range prior to calling vm_map_submap.
4279 *
4280 * Only a limited number of operations can be performed
4281 * within this rage after calling vm_map_submap:
4282 * vm_fault
4283 * [Don't try vm_map_copyin!]
4284 *
4285 * To remove a submapping, one must first remove the
4286 * range from the superior map, and then destroy the
4287 * submap (if desired). [Better yet, don't try it.]
4288 */
4289 kern_return_t
4290 vm_map_submap(
4291 vm_map_t map,
4292 vm_map_offset_t start,
4293 vm_map_offset_t end,
4294 vm_map_t submap,
4295 vm_map_offset_t offset,
4296 #ifdef NO_NESTED_PMAP
4297 __unused
4298 #endif /* NO_NESTED_PMAP */
4299 boolean_t use_pmap)
4300 {
4301 vm_map_entry_t entry;
4302 register kern_return_t result = KERN_INVALID_ARGUMENT;
4303 register vm_object_t object;
4304
4305 vm_map_lock(map);
4306
4307 if (! vm_map_lookup_entry(map, start, &entry)) {
4308 entry = entry->vme_next;
4309 }
4310
4311 if (entry == vm_map_to_entry(map) ||
4312 entry->is_sub_map) {
4313 vm_map_unlock(map);
4314 return KERN_INVALID_ARGUMENT;
4315 }
4316
4317 vm_map_clip_start(map, entry, start);
4318 vm_map_clip_end(map, entry, end);
4319
4320 if ((entry->vme_start == start) && (entry->vme_end == end) &&
4321 (!entry->is_sub_map) &&
4322 ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4323 (object->resident_page_count == 0) &&
4324 (object->copy == VM_OBJECT_NULL) &&
4325 (object->shadow == VM_OBJECT_NULL) &&
4326 (!object->pager_created)) {
4327 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4328 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4329 vm_object_deallocate(object);
4330 entry->is_sub_map = TRUE;
4331 entry->use_pmap = FALSE;
4332 VME_SUBMAP_SET(entry, submap);
4333 vm_map_reference(submap);
4334 if (submap->mapped_in_other_pmaps == FALSE &&
4335 vm_map_pmap(submap) != PMAP_NULL &&
4336 vm_map_pmap(submap) != vm_map_pmap(map)) {
4337 /*
4338 * This submap is being mapped in a map
4339 * that uses a different pmap.
4340 * Set its "mapped_in_other_pmaps" flag
4341 * to indicate that we now need to
4342 * remove mappings from all pmaps rather
4343 * than just the submap's pmap.
4344 */
4345 submap->mapped_in_other_pmaps = TRUE;
4346 }
4347
4348 #ifndef NO_NESTED_PMAP
4349 if (use_pmap) {
4350 /* nest if platform code will allow */
4351 if(submap->pmap == NULL) {
4352 ledger_t ledger = map->pmap->ledger;
4353 submap->pmap = pmap_create(ledger,
4354 (vm_map_size_t) 0, FALSE);
4355 if(submap->pmap == PMAP_NULL) {
4356 vm_map_unlock(map);
4357 return(KERN_NO_SPACE);
4358 }
4359 }
4360 result = pmap_nest(map->pmap,
4361 (VME_SUBMAP(entry))->pmap,
4362 (addr64_t)start,
4363 (addr64_t)start,
4364 (uint64_t)(end - start));
4365 if(result)
4366 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4367 entry->use_pmap = TRUE;
4368 }
4369 #else /* NO_NESTED_PMAP */
4370 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4371 #endif /* NO_NESTED_PMAP */
4372 result = KERN_SUCCESS;
4373 }
4374 vm_map_unlock(map);
4375
4376 return(result);
4377 }
4378
4379
4380 /*
4381 * vm_map_protect:
4382 *
4383 * Sets the protection of the specified address
4384 * region in the target map. If "set_max" is
4385 * specified, the maximum protection is to be set;
4386 * otherwise, only the current protection is affected.
4387 */
4388 kern_return_t
4389 vm_map_protect(
4390 register vm_map_t map,
4391 register vm_map_offset_t start,
4392 register vm_map_offset_t end,
4393 register vm_prot_t new_prot,
4394 register boolean_t set_max)
4395 {
4396 register vm_map_entry_t current;
4397 register vm_map_offset_t prev;
4398 vm_map_entry_t entry;
4399 vm_prot_t new_max;
4400
4401 XPR(XPR_VM_MAP,
4402 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4403 map, start, end, new_prot, set_max);
4404
4405 vm_map_lock(map);
4406
4407 /* LP64todo - remove this check when vm_map_commpage64()
4408 * no longer has to stuff in a map_entry for the commpage
4409 * above the map's max_offset.
4410 */
4411 if (start >= map->max_offset) {
4412 vm_map_unlock(map);
4413 return(KERN_INVALID_ADDRESS);
4414 }
4415
4416 while(1) {
4417 /*
4418 * Lookup the entry. If it doesn't start in a valid
4419 * entry, return an error.
4420 */
4421 if (! vm_map_lookup_entry(map, start, &entry)) {
4422 vm_map_unlock(map);
4423 return(KERN_INVALID_ADDRESS);
4424 }
4425
4426 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4427 start = SUPERPAGE_ROUND_DOWN(start);
4428 continue;
4429 }
4430 break;
4431 }
4432 if (entry->superpage_size)
4433 end = SUPERPAGE_ROUND_UP(end);
4434
4435 /*
4436 * Make a first pass to check for protection and address
4437 * violations.
4438 */
4439
4440 current = entry;
4441 prev = current->vme_start;
4442 while ((current != vm_map_to_entry(map)) &&
4443 (current->vme_start < end)) {
4444
4445 /*
4446 * If there is a hole, return an error.
4447 */
4448 if (current->vme_start != prev) {
4449 vm_map_unlock(map);
4450 return(KERN_INVALID_ADDRESS);
4451 }
4452
4453 new_max = current->max_protection;
4454 if(new_prot & VM_PROT_COPY) {
4455 new_max |= VM_PROT_WRITE;
4456 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4457 vm_map_unlock(map);
4458 return(KERN_PROTECTION_FAILURE);
4459 }
4460 } else {
4461 if ((new_prot & new_max) != new_prot) {
4462 vm_map_unlock(map);
4463 return(KERN_PROTECTION_FAILURE);
4464 }
4465 }
4466
4467
4468 prev = current->vme_end;
4469 current = current->vme_next;
4470 }
4471 if (end > prev) {
4472 vm_map_unlock(map);
4473 return(KERN_INVALID_ADDRESS);
4474 }
4475
4476 /*
4477 * Go back and fix up protections.
4478 * Clip to start here if the range starts within
4479 * the entry.
4480 */
4481
4482 current = entry;
4483 if (current != vm_map_to_entry(map)) {
4484 /* clip and unnest if necessary */
4485 vm_map_clip_start(map, current, start);
4486 }
4487
4488 while ((current != vm_map_to_entry(map)) &&
4489 (current->vme_start < end)) {
4490
4491 vm_prot_t old_prot;
4492
4493 vm_map_clip_end(map, current, end);
4494
4495 if (current->is_sub_map) {
4496 /* clipping did unnest if needed */
4497 assert(!current->use_pmap);
4498 }
4499
4500 old_prot = current->protection;
4501
4502 if(new_prot & VM_PROT_COPY) {
4503 /* caller is asking specifically to copy the */
4504 /* mapped data, this implies that max protection */
4505 /* will include write. Caller must be prepared */
4506 /* for loss of shared memory communication in the */
4507 /* target area after taking this step */
4508
4509 if (current->is_sub_map == FALSE &&
4510 VME_OBJECT(current) == VM_OBJECT_NULL) {
4511 VME_OBJECT_SET(current,
4512 vm_object_allocate(
4513 (vm_map_size_t)
4514 (current->vme_end -
4515 current->vme_start)));
4516 VME_OFFSET_SET(current, 0);
4517 assert(current->use_pmap);
4518 }
4519 assert(current->wired_count == 0);
4520 current->needs_copy = TRUE;
4521 current->max_protection |= VM_PROT_WRITE;
4522 }
4523
4524 if (set_max)
4525 current->protection =
4526 (current->max_protection =
4527 new_prot & ~VM_PROT_COPY) &
4528 old_prot;
4529 else
4530 current->protection = new_prot & ~VM_PROT_COPY;
4531
4532 /*
4533 * Update physical map if necessary.
4534 * If the request is to turn off write protection,
4535 * we won't do it for real (in pmap). This is because
4536 * it would cause copy-on-write to fail. We've already
4537 * set, the new protection in the map, so if a
4538 * write-protect fault occurred, it will be fixed up
4539 * properly, COW or not.
4540 */
4541 if (current->protection != old_prot) {
4542 /* Look one level in we support nested pmaps */
4543 /* from mapped submaps which are direct entries */
4544 /* in our map */
4545
4546 vm_prot_t prot;
4547
4548 prot = current->protection & ~VM_PROT_WRITE;
4549
4550 if (override_nx(map, VME_ALIAS(current)) && prot)
4551 prot |= VM_PROT_EXECUTE;
4552
4553
4554 if (current->is_sub_map && current->use_pmap) {
4555 pmap_protect(VME_SUBMAP(current)->pmap,
4556 current->vme_start,
4557 current->vme_end,
4558 prot);
4559 } else {
4560 pmap_protect(map->pmap,
4561 current->vme_start,
4562 current->vme_end,
4563 prot);
4564 }
4565 }
4566 current = current->vme_next;
4567 }
4568
4569 current = entry;
4570 while ((current != vm_map_to_entry(map)) &&
4571 (current->vme_start <= end)) {
4572 vm_map_simplify_entry(map, current);
4573 current = current->vme_next;
4574 }
4575
4576 vm_map_unlock(map);
4577 return(KERN_SUCCESS);
4578 }
4579
4580 /*
4581 * vm_map_inherit:
4582 *
4583 * Sets the inheritance of the specified address
4584 * range in the target map. Inheritance
4585 * affects how the map will be shared with
4586 * child maps at the time of vm_map_fork.
4587 */
4588 kern_return_t
4589 vm_map_inherit(
4590 register vm_map_t map,
4591 register vm_map_offset_t start,
4592 register vm_map_offset_t end,
4593 register vm_inherit_t new_inheritance)
4594 {
4595 register vm_map_entry_t entry;
4596 vm_map_entry_t temp_entry;
4597
4598 vm_map_lock(map);
4599
4600 VM_MAP_RANGE_CHECK(map, start, end);
4601
4602 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4603 entry = temp_entry;
4604 }
4605 else {
4606 temp_entry = temp_entry->vme_next;
4607 entry = temp_entry;
4608 }
4609
4610 /* first check entire range for submaps which can't support the */
4611 /* given inheritance. */
4612 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4613 if(entry->is_sub_map) {
4614 if(new_inheritance == VM_INHERIT_COPY) {
4615 vm_map_unlock(map);
4616 return(KERN_INVALID_ARGUMENT);
4617 }
4618 }
4619
4620 entry = entry->vme_next;
4621 }
4622
4623 entry = temp_entry;
4624 if (entry != vm_map_to_entry(map)) {
4625 /* clip and unnest if necessary */
4626 vm_map_clip_start(map, entry, start);
4627 }
4628
4629 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4630 vm_map_clip_end(map, entry, end);
4631 if (entry->is_sub_map) {
4632 /* clip did unnest if needed */
4633 assert(!entry->use_pmap);
4634 }
4635
4636 entry->inheritance = new_inheritance;
4637
4638 entry = entry->vme_next;
4639 }
4640
4641 vm_map_unlock(map);
4642 return(KERN_SUCCESS);
4643 }
4644
4645 /*
4646 * Update the accounting for the amount of wired memory in this map. If the user has
4647 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4648 */
4649
4650 static kern_return_t
4651 add_wire_counts(
4652 vm_map_t map,
4653 vm_map_entry_t entry,
4654 boolean_t user_wire)
4655 {
4656 vm_map_size_t size;
4657
4658 if (user_wire) {
4659 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
4660
4661 /*
4662 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4663 * this map entry.
4664 */
4665
4666 if (entry->user_wired_count == 0) {
4667 size = entry->vme_end - entry->vme_start;
4668
4669 /*
4670 * Since this is the first time the user is wiring this map entry, check to see if we're
4671 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4672 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4673 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4674 * limit, then we fail.
4675 */
4676
4677 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4678 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4679 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4680 return KERN_RESOURCE_SHORTAGE;
4681
4682 /*
4683 * The first time the user wires an entry, we also increment the wired_count and add this to
4684 * the total that has been wired in the map.
4685 */
4686
4687 if (entry->wired_count >= MAX_WIRE_COUNT)
4688 return KERN_FAILURE;
4689
4690 entry->wired_count++;
4691 map->user_wire_size += size;
4692 }
4693
4694 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4695 return KERN_FAILURE;
4696
4697 entry->user_wired_count++;
4698
4699 } else {
4700
4701 /*
4702 * The kernel's wiring the memory. Just bump the count and continue.
4703 */
4704
4705 if (entry->wired_count >= MAX_WIRE_COUNT)
4706 panic("vm_map_wire: too many wirings");
4707
4708 entry->wired_count++;
4709 }
4710
4711 return KERN_SUCCESS;
4712 }
4713
4714 /*
4715 * Update the memory wiring accounting now that the given map entry is being unwired.
4716 */
4717
4718 static void
4719 subtract_wire_counts(
4720 vm_map_t map,
4721 vm_map_entry_t entry,
4722 boolean_t user_wire)
4723 {
4724
4725 if (user_wire) {
4726
4727 /*
4728 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4729 */
4730
4731 if (entry->user_wired_count == 1) {
4732
4733 /*
4734 * We're removing the last user wire reference. Decrement the wired_count and the total
4735 * user wired memory for this map.
4736 */
4737
4738 assert(entry->wired_count >= 1);
4739 entry->wired_count--;
4740 map->user_wire_size -= entry->vme_end - entry->vme_start;
4741 }
4742
4743 assert(entry->user_wired_count >= 1);
4744 entry->user_wired_count--;
4745
4746 } else {
4747
4748 /*
4749 * The kernel is unwiring the memory. Just update the count.
4750 */
4751
4752 assert(entry->wired_count >= 1);
4753 entry->wired_count--;
4754 }
4755 }
4756
4757 /*
4758 * vm_map_wire:
4759 *
4760 * Sets the pageability of the specified address range in the
4761 * target map as wired. Regions specified as not pageable require
4762 * locked-down physical memory and physical page maps. The
4763 * access_type variable indicates types of accesses that must not
4764 * generate page faults. This is checked against protection of
4765 * memory being locked-down.
4766 *
4767 * The map must not be locked, but a reference must remain to the
4768 * map throughout the call.
4769 */
4770 static kern_return_t
4771 vm_map_wire_nested(
4772 register vm_map_t map,
4773 register vm_map_offset_t start,
4774 register vm_map_offset_t end,
4775 register vm_prot_t caller_prot,
4776 boolean_t user_wire,
4777 pmap_t map_pmap,
4778 vm_map_offset_t pmap_addr,
4779 ppnum_t *physpage_p)
4780 {
4781 register vm_map_entry_t entry;
4782 register vm_prot_t access_type;
4783 struct vm_map_entry *first_entry, tmp_entry;
4784 vm_map_t real_map;
4785 register vm_map_offset_t s,e;
4786 kern_return_t rc;
4787 boolean_t need_wakeup;
4788 boolean_t main_map = FALSE;
4789 wait_interrupt_t interruptible_state;
4790 thread_t cur_thread;
4791 unsigned int last_timestamp;
4792 vm_map_size_t size;
4793 boolean_t wire_and_extract;
4794
4795 access_type = (caller_prot & VM_PROT_ALL);
4796
4797 wire_and_extract = FALSE;
4798 if (physpage_p != NULL) {
4799 /*
4800 * The caller wants the physical page number of the
4801 * wired page. We return only one physical page number
4802 * so this works for only one page at a time.
4803 */
4804 if ((end - start) != PAGE_SIZE) {
4805 return KERN_INVALID_ARGUMENT;
4806 }
4807 wire_and_extract = TRUE;
4808 *physpage_p = 0;
4809 }
4810
4811 vm_map_lock(map);
4812 if(map_pmap == NULL)
4813 main_map = TRUE;
4814 last_timestamp = map->timestamp;
4815
4816 VM_MAP_RANGE_CHECK(map, start, end);
4817 assert(page_aligned(start));
4818 assert(page_aligned(end));
4819 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4820 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4821 if (start == end) {
4822 /* We wired what the caller asked for, zero pages */
4823 vm_map_unlock(map);
4824 return KERN_SUCCESS;
4825 }
4826
4827 need_wakeup = FALSE;
4828 cur_thread = current_thread();
4829
4830 s = start;
4831 rc = KERN_SUCCESS;
4832
4833 if (vm_map_lookup_entry(map, s, &first_entry)) {
4834 entry = first_entry;
4835 /*
4836 * vm_map_clip_start will be done later.
4837 * We don't want to unnest any nested submaps here !
4838 */
4839 } else {
4840 /* Start address is not in map */
4841 rc = KERN_INVALID_ADDRESS;
4842 goto done;
4843 }
4844
4845 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4846 /*
4847 * At this point, we have wired from "start" to "s".
4848 * We still need to wire from "s" to "end".
4849 *
4850 * "entry" hasn't been clipped, so it could start before "s"
4851 * and/or end after "end".
4852 */
4853
4854 /* "e" is how far we want to wire in this entry */
4855 e = entry->vme_end;
4856 if (e > end)
4857 e = end;
4858
4859 /*
4860 * If another thread is wiring/unwiring this entry then
4861 * block after informing other thread to wake us up.
4862 */
4863 if (entry->in_transition) {
4864 wait_result_t wait_result;
4865
4866 /*
4867 * We have not clipped the entry. Make sure that
4868 * the start address is in range so that the lookup
4869 * below will succeed.
4870 * "s" is the current starting point: we've already
4871 * wired from "start" to "s" and we still have
4872 * to wire from "s" to "end".
4873 */
4874
4875 entry->needs_wakeup = TRUE;
4876
4877 /*
4878 * wake up anybody waiting on entries that we have
4879 * already wired.
4880 */
4881 if (need_wakeup) {
4882 vm_map_entry_wakeup(map);
4883 need_wakeup = FALSE;
4884 }
4885 /*
4886 * User wiring is interruptible
4887 */
4888 wait_result = vm_map_entry_wait(map,
4889 (user_wire) ? THREAD_ABORTSAFE :
4890 THREAD_UNINT);
4891 if (user_wire && wait_result == THREAD_INTERRUPTED) {
4892 /*
4893 * undo the wirings we have done so far
4894 * We do not clear the needs_wakeup flag,
4895 * because we cannot tell if we were the
4896 * only one waiting.
4897 */
4898 rc = KERN_FAILURE;
4899 goto done;
4900 }
4901
4902 /*
4903 * Cannot avoid a lookup here. reset timestamp.
4904 */
4905 last_timestamp = map->timestamp;
4906
4907 /*
4908 * The entry could have been clipped, look it up again.
4909 * Worse that can happen is, it may not exist anymore.
4910 */
4911 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4912 /*
4913 * User: undo everything upto the previous
4914 * entry. let vm_map_unwire worry about
4915 * checking the validity of the range.
4916 */
4917 rc = KERN_FAILURE;
4918 goto done;
4919 }
4920 entry = first_entry;
4921 continue;
4922 }
4923
4924 if (entry->is_sub_map) {
4925 vm_map_offset_t sub_start;
4926 vm_map_offset_t sub_end;
4927 vm_map_offset_t local_start;
4928 vm_map_offset_t local_end;
4929 pmap_t pmap;
4930
4931 if (wire_and_extract) {
4932 /*
4933 * Wiring would result in copy-on-write
4934 * which would not be compatible with
4935 * the sharing we have with the original
4936 * provider of this memory.
4937 */
4938 rc = KERN_INVALID_ARGUMENT;
4939 goto done;
4940 }
4941
4942 vm_map_clip_start(map, entry, s);
4943 vm_map_clip_end(map, entry, end);
4944
4945 sub_start = VME_OFFSET(entry);
4946 sub_end = entry->vme_end;
4947 sub_end += VME_OFFSET(entry) - entry->vme_start;
4948
4949 local_end = entry->vme_end;
4950 if(map_pmap == NULL) {
4951 vm_object_t object;
4952 vm_object_offset_t offset;
4953 vm_prot_t prot;
4954 boolean_t wired;
4955 vm_map_entry_t local_entry;
4956 vm_map_version_t version;
4957 vm_map_t lookup_map;
4958
4959 if(entry->use_pmap) {
4960 pmap = VME_SUBMAP(entry)->pmap;
4961 /* ppc implementation requires that */
4962 /* submaps pmap address ranges line */
4963 /* up with parent map */
4964 #ifdef notdef
4965 pmap_addr = sub_start;
4966 #endif
4967 pmap_addr = s;
4968 } else {
4969 pmap = map->pmap;
4970 pmap_addr = s;
4971 }
4972
4973 if (entry->wired_count) {
4974 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4975 goto done;
4976
4977 /*
4978 * The map was not unlocked:
4979 * no need to goto re-lookup.
4980 * Just go directly to next entry.
4981 */
4982 entry = entry->vme_next;
4983 s = entry->vme_start;
4984 continue;
4985
4986 }
4987
4988 /* call vm_map_lookup_locked to */
4989 /* cause any needs copy to be */
4990 /* evaluated */
4991 local_start = entry->vme_start;
4992 lookup_map = map;
4993 vm_map_lock_write_to_read(map);
4994 if(vm_map_lookup_locked(
4995 &lookup_map, local_start,
4996 access_type,
4997 OBJECT_LOCK_EXCLUSIVE,
4998 &version, &object,
4999 &offset, &prot, &wired,
5000 NULL,
5001 &real_map)) {
5002
5003 vm_map_unlock_read(lookup_map);
5004 assert(map_pmap == NULL);
5005 vm_map_unwire(map, start,
5006 s, user_wire);
5007 return(KERN_FAILURE);
5008 }
5009 vm_object_unlock(object);
5010 if(real_map != lookup_map)
5011 vm_map_unlock(real_map);
5012 vm_map_unlock_read(lookup_map);
5013 vm_map_lock(map);
5014
5015 /* we unlocked, so must re-lookup */
5016 if (!vm_map_lookup_entry(map,
5017 local_start,
5018 &local_entry)) {
5019 rc = KERN_FAILURE;
5020 goto done;
5021 }
5022
5023 /*
5024 * entry could have been "simplified",
5025 * so re-clip
5026 */
5027 entry = local_entry;
5028 assert(s == local_start);
5029 vm_map_clip_start(map, entry, s);
5030 vm_map_clip_end(map, entry, end);
5031 /* re-compute "e" */
5032 e = entry->vme_end;
5033 if (e > end)
5034 e = end;
5035
5036 /* did we have a change of type? */
5037 if (!entry->is_sub_map) {
5038 last_timestamp = map->timestamp;
5039 continue;
5040 }
5041 } else {
5042 local_start = entry->vme_start;
5043 pmap = map_pmap;
5044 }
5045
5046 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5047 goto done;
5048
5049 entry->in_transition = TRUE;
5050
5051 vm_map_unlock(map);
5052 rc = vm_map_wire_nested(VME_SUBMAP(entry),
5053 sub_start, sub_end,
5054 caller_prot,
5055 user_wire, pmap, pmap_addr,
5056 NULL);
5057 vm_map_lock(map);
5058
5059 /*
5060 * Find the entry again. It could have been clipped
5061 * after we unlocked the map.
5062 */
5063 if (!vm_map_lookup_entry(map, local_start,
5064 &first_entry))
5065 panic("vm_map_wire: re-lookup failed");
5066 entry = first_entry;
5067
5068 assert(local_start == s);
5069 /* re-compute "e" */
5070 e = entry->vme_end;
5071 if (e > end)
5072 e = end;
5073
5074 last_timestamp = map->timestamp;
5075 while ((entry != vm_map_to_entry(map)) &&
5076 (entry->vme_start < e)) {
5077 assert(entry->in_transition);
5078 entry->in_transition = FALSE;
5079 if (entry->needs_wakeup) {
5080 entry->needs_wakeup = FALSE;
5081 need_wakeup = TRUE;
5082 }
5083 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5084 subtract_wire_counts(map, entry, user_wire);
5085 }
5086 entry = entry->vme_next;
5087 }
5088 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5089 goto done;
5090 }
5091
5092 /* no need to relookup again */
5093 s = entry->vme_start;
5094 continue;
5095 }
5096
5097 /*
5098 * If this entry is already wired then increment
5099 * the appropriate wire reference count.
5100 */
5101 if (entry->wired_count) {
5102
5103 if ((entry->protection & access_type) != access_type) {
5104 /* found a protection problem */
5105
5106 /*
5107 * XXX FBDP
5108 * We should always return an error
5109 * in this case but since we didn't
5110 * enforce it before, let's do
5111 * it only for the new "wire_and_extract"
5112 * code path for now...
5113 */
5114 if (wire_and_extract) {
5115 rc = KERN_PROTECTION_FAILURE;
5116 goto done;
5117 }
5118 }
5119
5120 /*
5121 * entry is already wired down, get our reference
5122 * after clipping to our range.
5123 */
5124 vm_map_clip_start(map, entry, s);
5125 vm_map_clip_end(map, entry, end);
5126
5127 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5128 goto done;
5129
5130 if (wire_and_extract) {
5131 vm_object_t object;
5132 vm_object_offset_t offset;
5133 vm_page_t m;
5134
5135 /*
5136 * We don't have to "wire" the page again
5137 * bit we still have to "extract" its
5138 * physical page number, after some sanity
5139 * checks.
5140 */
5141 assert((entry->vme_end - entry->vme_start)
5142 == PAGE_SIZE);
5143 assert(!entry->needs_copy);
5144 assert(!entry->is_sub_map);
5145 assert(VME_OBJECT(entry));
5146 if (((entry->vme_end - entry->vme_start)
5147 != PAGE_SIZE) ||
5148 entry->needs_copy ||
5149 entry->is_sub_map ||
5150 VME_OBJECT(entry) == VM_OBJECT_NULL) {
5151 rc = KERN_INVALID_ARGUMENT;
5152 goto done;
5153 }
5154
5155 object = VME_OBJECT(entry);
5156 offset = VME_OFFSET(entry);
5157 /* need exclusive lock to update m->dirty */
5158 if (entry->protection & VM_PROT_WRITE) {
5159 vm_object_lock(object);
5160 } else {
5161 vm_object_lock_shared(object);
5162 }
5163 m = vm_page_lookup(object, offset);
5164 assert(m != VM_PAGE_NULL);
5165 assert(m->wire_count);
5166 if (m != VM_PAGE_NULL && m->wire_count) {
5167 *physpage_p = m->phys_page;
5168 if (entry->protection & VM_PROT_WRITE) {
5169 vm_object_lock_assert_exclusive(
5170 m->object);
5171 m->dirty = TRUE;
5172 }
5173 } else {
5174 /* not already wired !? */
5175 *physpage_p = 0;
5176 }
5177 vm_object_unlock(object);
5178 }
5179
5180 /* map was not unlocked: no need to relookup */
5181 entry = entry->vme_next;
5182 s = entry->vme_start;
5183 continue;
5184 }
5185
5186 /*
5187 * Unwired entry or wire request transmitted via submap
5188 */
5189
5190
5191 /*
5192 * Perform actions of vm_map_lookup that need the write
5193 * lock on the map: create a shadow object for a
5194 * copy-on-write region, or an object for a zero-fill
5195 * region.
5196 */
5197 size = entry->vme_end - entry->vme_start;
5198 /*
5199 * If wiring a copy-on-write page, we need to copy it now
5200 * even if we're only (currently) requesting read access.
5201 * This is aggressive, but once it's wired we can't move it.
5202 */
5203 if (entry->needs_copy) {
5204 if (wire_and_extract) {
5205 /*
5206 * We're supposed to share with the original
5207 * provider so should not be "needs_copy"
5208 */
5209 rc = KERN_INVALID_ARGUMENT;
5210 goto done;
5211 }
5212
5213 VME_OBJECT_SHADOW(entry, size);
5214 entry->needs_copy = FALSE;
5215 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5216 if (wire_and_extract) {
5217 /*
5218 * We're supposed to share with the original
5219 * provider so should already have an object.
5220 */
5221 rc = KERN_INVALID_ARGUMENT;
5222 goto done;
5223 }
5224 VME_OBJECT_SET(entry, vm_object_allocate(size));
5225 VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5226 assert(entry->use_pmap);
5227 }
5228
5229 vm_map_clip_start(map, entry, s);
5230 vm_map_clip_end(map, entry, end);
5231
5232 /* re-compute "e" */
5233 e = entry->vme_end;
5234 if (e > end)
5235 e = end;
5236
5237 /*
5238 * Check for holes and protection mismatch.
5239 * Holes: Next entry should be contiguous unless this
5240 * is the end of the region.
5241 * Protection: Access requested must be allowed, unless
5242 * wiring is by protection class
5243 */
5244 if ((entry->vme_end < end) &&
5245 ((entry->vme_next == vm_map_to_entry(map)) ||
5246 (entry->vme_next->vme_start > entry->vme_end))) {
5247 /* found a hole */
5248 rc = KERN_INVALID_ADDRESS;
5249 goto done;
5250 }
5251 if ((entry->protection & access_type) != access_type) {
5252 /* found a protection problem */
5253 rc = KERN_PROTECTION_FAILURE;
5254 goto done;
5255 }
5256
5257 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5258
5259 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5260 goto done;
5261
5262 entry->in_transition = TRUE;
5263
5264 /*
5265 * This entry might get split once we unlock the map.
5266 * In vm_fault_wire(), we need the current range as
5267 * defined by this entry. In order for this to work
5268 * along with a simultaneous clip operation, we make a
5269 * temporary copy of this entry and use that for the
5270 * wiring. Note that the underlying objects do not
5271 * change during a clip.
5272 */
5273 tmp_entry = *entry;
5274
5275 /*
5276 * The in_transition state guarentees that the entry
5277 * (or entries for this range, if split occured) will be
5278 * there when the map lock is acquired for the second time.
5279 */
5280 vm_map_unlock(map);
5281
5282 if (!user_wire && cur_thread != THREAD_NULL)
5283 interruptible_state = thread_interrupt_level(THREAD_UNINT);
5284 else
5285 interruptible_state = THREAD_UNINT;
5286
5287 if(map_pmap)
5288 rc = vm_fault_wire(map,
5289 &tmp_entry, caller_prot, map_pmap, pmap_addr,
5290 physpage_p);
5291 else
5292 rc = vm_fault_wire(map,
5293 &tmp_entry, caller_prot, map->pmap,
5294 tmp_entry.vme_start,
5295 physpage_p);
5296
5297 if (!user_wire && cur_thread != THREAD_NULL)
5298 thread_interrupt_level(interruptible_state);
5299
5300 vm_map_lock(map);
5301
5302 if (last_timestamp+1 != map->timestamp) {
5303 /*
5304 * Find the entry again. It could have been clipped
5305 * after we unlocked the map.
5306 */
5307 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5308 &first_entry))
5309 panic("vm_map_wire: re-lookup failed");
5310
5311 entry = first_entry;
5312 }
5313
5314 last_timestamp = map->timestamp;
5315
5316 while ((entry != vm_map_to_entry(map)) &&
5317 (entry->vme_start < tmp_entry.vme_end)) {
5318 assert(entry->in_transition);
5319 entry->in_transition = FALSE;
5320 if (entry->needs_wakeup) {
5321 entry->needs_wakeup = FALSE;
5322 need_wakeup = TRUE;
5323 }
5324 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5325 subtract_wire_counts(map, entry, user_wire);
5326 }
5327 entry = entry->vme_next;
5328 }
5329
5330 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
5331 goto done;
5332 }
5333
5334 s = entry->vme_start;
5335 } /* end while loop through map entries */
5336
5337 done:
5338 if (rc == KERN_SUCCESS) {
5339 /* repair any damage we may have made to the VM map */
5340 vm_map_simplify_range(map, start, end);
5341 }
5342
5343 vm_map_unlock(map);
5344
5345 /*
5346 * wake up anybody waiting on entries we wired.
5347 */
5348 if (need_wakeup)
5349 vm_map_entry_wakeup(map);
5350
5351 if (rc != KERN_SUCCESS) {
5352 /* undo what has been wired so far */
5353 vm_map_unwire_nested(map, start, s, user_wire,
5354 map_pmap, pmap_addr);
5355 if (physpage_p) {
5356 *physpage_p = 0;
5357 }
5358 }
5359
5360 return rc;
5361
5362 }
5363
5364 kern_return_t
5365 vm_map_wire_external(
5366 register vm_map_t map,
5367 register vm_map_offset_t start,
5368 register vm_map_offset_t end,
5369 register vm_prot_t caller_prot,
5370 boolean_t user_wire)
5371 {
5372 kern_return_t kret;
5373
5374 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5375 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5376 kret = vm_map_wire_nested(map, start, end, caller_prot,
5377 user_wire, (pmap_t)NULL, 0, NULL);
5378 return kret;
5379 }
5380
5381 kern_return_t
5382 vm_map_wire(
5383 register vm_map_t map,
5384 register vm_map_offset_t start,
5385 register vm_map_offset_t end,
5386 register vm_prot_t caller_prot,
5387 boolean_t user_wire)
5388 {
5389 kern_return_t kret;
5390
5391 kret = vm_map_wire_nested(map, start, end, caller_prot,
5392 user_wire, (pmap_t)NULL, 0, NULL);
5393 return kret;
5394 }
5395
5396 kern_return_t
5397 vm_map_wire_and_extract_external(
5398 vm_map_t map,
5399 vm_map_offset_t start,
5400 vm_prot_t caller_prot,
5401 boolean_t user_wire,
5402 ppnum_t *physpage_p)
5403 {
5404 kern_return_t kret;
5405
5406 caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5407 caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5408 kret = vm_map_wire_nested(map,
5409 start,
5410 start+VM_MAP_PAGE_SIZE(map),
5411 caller_prot,
5412 user_wire,
5413 (pmap_t)NULL,
5414 0,
5415 physpage_p);
5416 if (kret != KERN_SUCCESS &&
5417 physpage_p != NULL) {
5418 *physpage_p = 0;
5419 }
5420 return kret;
5421 }
5422
5423 kern_return_t
5424 vm_map_wire_and_extract(
5425 vm_map_t map,
5426 vm_map_offset_t start,
5427 vm_prot_t caller_prot,
5428 boolean_t user_wire,
5429 ppnum_t *physpage_p)
5430 {
5431 kern_return_t kret;
5432
5433 kret = vm_map_wire_nested(map,
5434 start,
5435 start+VM_MAP_PAGE_SIZE(map),
5436 caller_prot,
5437 user_wire,
5438 (pmap_t)NULL,
5439 0,
5440 physpage_p);
5441 if (kret != KERN_SUCCESS &&
5442 physpage_p != NULL) {
5443 *physpage_p = 0;
5444 }
5445 return kret;
5446 }
5447
5448 /*
5449 * vm_map_unwire:
5450 *
5451 * Sets the pageability of the specified address range in the target
5452 * as pageable. Regions specified must have been wired previously.
5453 *
5454 * The map must not be locked, but a reference must remain to the map
5455 * throughout the call.
5456 *
5457 * Kernel will panic on failures. User unwire ignores holes and
5458 * unwired and intransition entries to avoid losing memory by leaving
5459 * it unwired.
5460 */
5461 static kern_return_t
5462 vm_map_unwire_nested(
5463 register vm_map_t map,
5464 register vm_map_offset_t start,
5465 register vm_map_offset_t end,
5466 boolean_t user_wire,
5467 pmap_t map_pmap,
5468 vm_map_offset_t pmap_addr)
5469 {
5470 register vm_map_entry_t entry;
5471 struct vm_map_entry *first_entry, tmp_entry;
5472 boolean_t need_wakeup;
5473 boolean_t main_map = FALSE;
5474 unsigned int last_timestamp;
5475
5476 vm_map_lock(map);
5477 if(map_pmap == NULL)
5478 main_map = TRUE;
5479 last_timestamp = map->timestamp;
5480
5481 VM_MAP_RANGE_CHECK(map, start, end);
5482 assert(page_aligned(start));
5483 assert(page_aligned(end));
5484 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5485 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5486
5487 if (start == end) {
5488 /* We unwired what the caller asked for: zero pages */
5489 vm_map_unlock(map);
5490 return KERN_SUCCESS;
5491 }
5492
5493 if (vm_map_lookup_entry(map, start, &first_entry)) {
5494 entry = first_entry;
5495 /*
5496 * vm_map_clip_start will be done later.
5497 * We don't want to unnest any nested sub maps here !
5498 */
5499 }
5500 else {
5501 if (!user_wire) {
5502 panic("vm_map_unwire: start not found");
5503 }
5504 /* Start address is not in map. */
5505 vm_map_unlock(map);
5506 return(KERN_INVALID_ADDRESS);
5507 }
5508
5509 if (entry->superpage_size) {
5510 /* superpages are always wired */
5511 vm_map_unlock(map);
5512 return KERN_INVALID_ADDRESS;
5513 }
5514
5515 need_wakeup = FALSE;
5516 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5517 if (entry->in_transition) {
5518 /*
5519 * 1)
5520 * Another thread is wiring down this entry. Note
5521 * that if it is not for the other thread we would
5522 * be unwiring an unwired entry. This is not
5523 * permitted. If we wait, we will be unwiring memory
5524 * we did not wire.
5525 *
5526 * 2)
5527 * Another thread is unwiring this entry. We did not
5528 * have a reference to it, because if we did, this
5529 * entry will not be getting unwired now.
5530 */
5531 if (!user_wire) {
5532 /*
5533 * XXX FBDP
5534 * This could happen: there could be some
5535 * overlapping vslock/vsunlock operations
5536 * going on.
5537 * We should probably just wait and retry,
5538 * but then we have to be careful that this
5539 * entry could get "simplified" after
5540 * "in_transition" gets unset and before
5541 * we re-lookup the entry, so we would
5542 * have to re-clip the entry to avoid
5543 * re-unwiring what we have already unwired...
5544 * See vm_map_wire_nested().
5545 *
5546 * Or we could just ignore "in_transition"
5547 * here and proceed to decement the wired
5548 * count(s) on this entry. That should be fine
5549 * as long as "wired_count" doesn't drop all
5550 * the way to 0 (and we should panic if THAT
5551 * happens).
5552 */
5553 panic("vm_map_unwire: in_transition entry");
5554 }
5555
5556 entry = entry->vme_next;
5557 continue;
5558 }
5559
5560 if (entry->is_sub_map) {
5561 vm_map_offset_t sub_start;
5562 vm_map_offset_t sub_end;
5563 vm_map_offset_t local_end;
5564 pmap_t pmap;
5565
5566 vm_map_clip_start(map, entry, start);
5567 vm_map_clip_end(map, entry, end);
5568
5569 sub_start = VME_OFFSET(entry);
5570 sub_end = entry->vme_end - entry->vme_start;
5571 sub_end += VME_OFFSET(entry);
5572 local_end = entry->vme_end;
5573 if(map_pmap == NULL) {
5574 if(entry->use_pmap) {
5575 pmap = VME_SUBMAP(entry)->pmap;
5576 pmap_addr = sub_start;
5577 } else {
5578 pmap = map->pmap;
5579 pmap_addr = start;
5580 }
5581 if (entry->wired_count == 0 ||
5582 (user_wire && entry->user_wired_count == 0)) {
5583 if (!user_wire)
5584 panic("vm_map_unwire: entry is unwired");
5585 entry = entry->vme_next;
5586 continue;
5587 }
5588
5589 /*
5590 * Check for holes
5591 * Holes: Next entry should be contiguous unless
5592 * this is the end of the region.
5593 */
5594 if (((entry->vme_end < end) &&
5595 ((entry->vme_next == vm_map_to_entry(map)) ||
5596 (entry->vme_next->vme_start
5597 > entry->vme_end)))) {
5598 if (!user_wire)
5599 panic("vm_map_unwire: non-contiguous region");
5600 /*
5601 entry = entry->vme_next;
5602 continue;
5603 */
5604 }
5605
5606 subtract_wire_counts(map, entry, user_wire);
5607
5608 if (entry->wired_count != 0) {
5609 entry = entry->vme_next;
5610 continue;
5611 }
5612
5613 entry->in_transition = TRUE;
5614 tmp_entry = *entry;/* see comment in vm_map_wire() */
5615
5616 /*
5617 * We can unlock the map now. The in_transition state
5618 * guarantees existance of the entry.
5619 */
5620 vm_map_unlock(map);
5621 vm_map_unwire_nested(VME_SUBMAP(entry),
5622 sub_start, sub_end, user_wire, pmap, pmap_addr);
5623 vm_map_lock(map);
5624
5625 if (last_timestamp+1 != map->timestamp) {
5626 /*
5627 * Find the entry again. It could have been
5628 * clipped or deleted after we unlocked the map.
5629 */
5630 if (!vm_map_lookup_entry(map,
5631 tmp_entry.vme_start,
5632 &first_entry)) {
5633 if (!user_wire)
5634 panic("vm_map_unwire: re-lookup failed");
5635 entry = first_entry->vme_next;
5636 } else
5637 entry = first_entry;
5638 }
5639 last_timestamp = map->timestamp;
5640
5641 /*
5642 * clear transition bit for all constituent entries
5643 * that were in the original entry (saved in
5644 * tmp_entry). Also check for waiters.
5645 */
5646 while ((entry != vm_map_to_entry(map)) &&
5647 (entry->vme_start < tmp_entry.vme_end)) {
5648 assert(entry->in_transition);
5649 entry->in_transition = FALSE;
5650 if (entry->needs_wakeup) {
5651 entry->needs_wakeup = FALSE;
5652 need_wakeup = TRUE;
5653 }
5654 entry = entry->vme_next;
5655 }
5656 continue;
5657 } else {
5658 vm_map_unlock(map);
5659 vm_map_unwire_nested(VME_SUBMAP(entry),
5660 sub_start, sub_end, user_wire, map_pmap,
5661 pmap_addr);
5662 vm_map_lock(map);
5663
5664 if (last_timestamp+1 != map->timestamp) {
5665 /*
5666 * Find the entry again. It could have been
5667 * clipped or deleted after we unlocked the map.
5668 */
5669 if (!vm_map_lookup_entry(map,
5670 tmp_entry.vme_start,
5671 &first_entry)) {
5672 if (!user_wire)
5673 panic("vm_map_unwire: re-lookup failed");
5674 entry = first_entry->vme_next;
5675 } else
5676 entry = first_entry;
5677 }
5678 last_timestamp = map->timestamp;
5679 }
5680 }
5681
5682
5683 if ((entry->wired_count == 0) ||
5684 (user_wire && entry->user_wired_count == 0)) {
5685 if (!user_wire)
5686 panic("vm_map_unwire: entry is unwired");
5687
5688 entry = entry->vme_next;
5689 continue;
5690 }
5691
5692 assert(entry->wired_count > 0 &&
5693 (!user_wire || entry->user_wired_count > 0));
5694
5695 vm_map_clip_start(map, entry, start);
5696 vm_map_clip_end(map, entry, end);
5697
5698 /*
5699 * Check for holes
5700 * Holes: Next entry should be contiguous unless
5701 * this is the end of the region.
5702 */
5703 if (((entry->vme_end < end) &&
5704 ((entry->vme_next == vm_map_to_entry(map)) ||
5705 (entry->vme_next->vme_start > entry->vme_end)))) {
5706
5707 if (!user_wire)
5708 panic("vm_map_unwire: non-contiguous region");
5709 entry = entry->vme_next;
5710 continue;
5711 }
5712
5713 subtract_wire_counts(map, entry, user_wire);
5714
5715 if (entry->wired_count != 0) {
5716 entry = entry->vme_next;
5717 continue;
5718 }
5719
5720 if(entry->zero_wired_pages) {
5721 entry->zero_wired_pages = FALSE;
5722 }
5723
5724 entry->in_transition = TRUE;
5725 tmp_entry = *entry; /* see comment in vm_map_wire() */
5726
5727 /*
5728 * We can unlock the map now. The in_transition state
5729 * guarantees existance of the entry.
5730 */
5731 vm_map_unlock(map);
5732 if(map_pmap) {
5733 vm_fault_unwire(map,
5734 &tmp_entry, FALSE, map_pmap, pmap_addr);
5735 } else {
5736 vm_fault_unwire(map,
5737 &tmp_entry, FALSE, map->pmap,
5738 tmp_entry.vme_start);
5739 }
5740 vm_map_lock(map);
5741
5742 if (last_timestamp+1 != map->timestamp) {
5743 /*
5744 * Find the entry again. It could have been clipped
5745 * or deleted after we unlocked the map.
5746 */
5747 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5748 &first_entry)) {
5749 if (!user_wire)
5750 panic("vm_map_unwire: re-lookup failed");
5751 entry = first_entry->vme_next;
5752 } else
5753 entry = first_entry;
5754 }
5755 last_timestamp = map->timestamp;
5756
5757 /*
5758 * clear transition bit for all constituent entries that
5759 * were in the original entry (saved in tmp_entry). Also
5760 * check for waiters.
5761 */
5762 while ((entry != vm_map_to_entry(map)) &&
5763 (entry->vme_start < tmp_entry.vme_end)) {
5764 assert(entry->in_transition);
5765 entry->in_transition = FALSE;
5766 if (entry->needs_wakeup) {
5767 entry->needs_wakeup = FALSE;
5768 need_wakeup = TRUE;
5769 }
5770 entry = entry->vme_next;
5771 }
5772 }
5773
5774 /*
5775 * We might have fragmented the address space when we wired this
5776 * range of addresses. Attempt to re-coalesce these VM map entries
5777 * with their neighbors now that they're no longer wired.
5778 * Under some circumstances, address space fragmentation can
5779 * prevent VM object shadow chain collapsing, which can cause
5780 * swap space leaks.
5781 */
5782 vm_map_simplify_range(map, start, end);
5783
5784 vm_map_unlock(map);
5785 /*
5786 * wake up anybody waiting on entries that we have unwired.
5787 */
5788 if (need_wakeup)
5789 vm_map_entry_wakeup(map);
5790 return(KERN_SUCCESS);
5791
5792 }
5793
5794 kern_return_t
5795 vm_map_unwire(
5796 register vm_map_t map,
5797 register vm_map_offset_t start,
5798 register vm_map_offset_t end,
5799 boolean_t user_wire)
5800 {
5801 return vm_map_unwire_nested(map, start, end,
5802 user_wire, (pmap_t)NULL, 0);
5803 }
5804
5805
5806 /*
5807 * vm_map_entry_delete: [ internal use only ]
5808 *
5809 * Deallocate the given entry from the target map.
5810 */
5811 static void
5812 vm_map_entry_delete(
5813 register vm_map_t map,
5814 register vm_map_entry_t entry)
5815 {
5816 register vm_map_offset_t s, e;
5817 register vm_object_t object;
5818 register vm_map_t submap;
5819
5820 s = entry->vme_start;
5821 e = entry->vme_end;
5822 assert(page_aligned(s));
5823 assert(page_aligned(e));
5824 if (entry->map_aligned == TRUE) {
5825 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5826 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5827 }
5828 assert(entry->wired_count == 0);
5829 assert(entry->user_wired_count == 0);
5830 assert(!entry->permanent);
5831
5832 if (entry->is_sub_map) {
5833 object = NULL;
5834 submap = VME_SUBMAP(entry);
5835 } else {
5836 submap = NULL;
5837 object = VME_OBJECT(entry);
5838 }
5839
5840 vm_map_store_entry_unlink(map, entry);
5841 map->size -= e - s;
5842
5843 vm_map_entry_dispose(map, entry);
5844
5845 vm_map_unlock(map);
5846 /*
5847 * Deallocate the object only after removing all
5848 * pmap entries pointing to its pages.
5849 */
5850 if (submap)
5851 vm_map_deallocate(submap);
5852 else
5853 vm_object_deallocate(object);
5854
5855 }
5856
5857 void
5858 vm_map_submap_pmap_clean(
5859 vm_map_t map,
5860 vm_map_offset_t start,
5861 vm_map_offset_t end,
5862 vm_map_t sub_map,
5863 vm_map_offset_t offset)
5864 {
5865 vm_map_offset_t submap_start;
5866 vm_map_offset_t submap_end;
5867 vm_map_size_t remove_size;
5868 vm_map_entry_t entry;
5869
5870 submap_end = offset + (end - start);
5871 submap_start = offset;
5872
5873 vm_map_lock_read(sub_map);
5874 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5875
5876 remove_size = (entry->vme_end - entry->vme_start);
5877 if(offset > entry->vme_start)
5878 remove_size -= offset - entry->vme_start;
5879
5880
5881 if(submap_end < entry->vme_end) {
5882 remove_size -=
5883 entry->vme_end - submap_end;
5884 }
5885 if(entry->is_sub_map) {
5886 vm_map_submap_pmap_clean(
5887 sub_map,
5888 start,
5889 start + remove_size,
5890 VME_SUBMAP(entry),
5891 VME_OFFSET(entry));
5892 } else {
5893
5894 if((map->mapped_in_other_pmaps) && (map->ref_count)
5895 && (VME_OBJECT(entry) != NULL)) {
5896 vm_object_pmap_protect_options(
5897 VME_OBJECT(entry),
5898 (VME_OFFSET(entry) +
5899 offset -
5900 entry->vme_start),
5901 remove_size,
5902 PMAP_NULL,
5903 entry->vme_start,
5904 VM_PROT_NONE,
5905 PMAP_OPTIONS_REMOVE);
5906 } else {
5907 pmap_remove(map->pmap,
5908 (addr64_t)start,
5909 (addr64_t)(start + remove_size));
5910 }
5911 }
5912 }
5913
5914 entry = entry->vme_next;
5915
5916 while((entry != vm_map_to_entry(sub_map))
5917 && (entry->vme_start < submap_end)) {
5918 remove_size = (entry->vme_end - entry->vme_start);
5919 if(submap_end < entry->vme_end) {
5920 remove_size -= entry->vme_end - submap_end;
5921 }
5922 if(entry->is_sub_map) {
5923 vm_map_submap_pmap_clean(
5924 sub_map,
5925 (start + entry->vme_start) - offset,
5926 ((start + entry->vme_start) - offset) + remove_size,
5927 VME_SUBMAP(entry),
5928 VME_OFFSET(entry));
5929 } else {
5930 if((map->mapped_in_other_pmaps) && (map->ref_count)
5931 && (VME_OBJECT(entry) != NULL)) {
5932 vm_object_pmap_protect_options(
5933 VME_OBJECT(entry),
5934 VME_OFFSET(entry),
5935 remove_size,
5936 PMAP_NULL,
5937 entry->vme_start,
5938 VM_PROT_NONE,
5939 PMAP_OPTIONS_REMOVE);
5940 } else {
5941 pmap_remove(map->pmap,
5942 (addr64_t)((start + entry->vme_start)
5943 - offset),
5944 (addr64_t)(((start + entry->vme_start)
5945 - offset) + remove_size));
5946 }
5947 }
5948 entry = entry->vme_next;
5949 }
5950 vm_map_unlock_read(sub_map);
5951 return;
5952 }
5953
5954 /*
5955 * vm_map_delete: [ internal use only ]
5956 *
5957 * Deallocates the given address range from the target map.
5958 * Removes all user wirings. Unwires one kernel wiring if
5959 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5960 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5961 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5962 *
5963 * This routine is called with map locked and leaves map locked.
5964 */
5965 static kern_return_t
5966 vm_map_delete(
5967 vm_map_t map,
5968 vm_map_offset_t start,
5969 vm_map_offset_t end,
5970 int flags,
5971 vm_map_t zap_map)
5972 {
5973 vm_map_entry_t entry, next;
5974 struct vm_map_entry *first_entry, tmp_entry;
5975 register vm_map_offset_t s;
5976 register vm_object_t object;
5977 boolean_t need_wakeup;
5978 unsigned int last_timestamp = ~0; /* unlikely value */
5979 int interruptible;
5980
5981 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5982 THREAD_ABORTSAFE : THREAD_UNINT;
5983
5984 /*
5985 * All our DMA I/O operations in IOKit are currently done by
5986 * wiring through the map entries of the task requesting the I/O.
5987 * Because of this, we must always wait for kernel wirings
5988 * to go away on the entries before deleting them.
5989 *
5990 * Any caller who wants to actually remove a kernel wiring
5991 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5992 * properly remove one wiring instead of blasting through
5993 * them all.
5994 */
5995 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5996
5997 while(1) {
5998 /*
5999 * Find the start of the region, and clip it
6000 */
6001 if (vm_map_lookup_entry(map, start, &first_entry)) {
6002 entry = first_entry;
6003 if (map == kalloc_map &&
6004 (entry->vme_start != start ||
6005 entry->vme_end != end)) {
6006 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6007 "mismatched entry %p [0x%llx:0x%llx]\n",
6008 map,
6009 (uint64_t)start,
6010 (uint64_t)end,
6011 entry,
6012 (uint64_t)entry->vme_start,
6013 (uint64_t)entry->vme_end);
6014 }
6015 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
6016 start = SUPERPAGE_ROUND_DOWN(start);
6017 continue;
6018 }
6019 if (start == entry->vme_start) {
6020 /*
6021 * No need to clip. We don't want to cause
6022 * any unnecessary unnesting in this case...
6023 */
6024 } else {
6025 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6026 entry->map_aligned &&
6027 !VM_MAP_PAGE_ALIGNED(
6028 start,
6029 VM_MAP_PAGE_MASK(map))) {
6030 /*
6031 * The entry will no longer be
6032 * map-aligned after clipping
6033 * and the caller said it's OK.
6034 */
6035 entry->map_aligned = FALSE;
6036 }
6037 if (map == kalloc_map) {
6038 panic("vm_map_delete(%p,0x%llx,0x%llx):"
6039 " clipping %p at 0x%llx\n",
6040 map,
6041 (uint64_t)start,
6042 (uint64_t)end,
6043 entry,
6044 (uint64_t)start);
6045 }
6046 vm_map_clip_start(map, entry, start);
6047 }
6048
6049 /*
6050 * Fix the lookup hint now, rather than each
6051 * time through the loop.
6052 */
6053 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6054 } else {
6055 if (map->pmap == kernel_pmap &&
6056 map->ref_count != 0) {
6057 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6058 "no map entry at 0x%llx\n",
6059 map,
6060 (uint64_t)start,
6061 (uint64_t)end,
6062 (uint64_t)start);
6063 }
6064 entry = first_entry->vme_next;
6065 }
6066 break;
6067 }
6068 if (entry->superpage_size)
6069 end = SUPERPAGE_ROUND_UP(end);
6070
6071 need_wakeup = FALSE;
6072 /*
6073 * Step through all entries in this region
6074 */
6075 s = entry->vme_start;
6076 while ((entry != vm_map_to_entry(map)) && (s < end)) {
6077 /*
6078 * At this point, we have deleted all the memory entries
6079 * between "start" and "s". We still need to delete
6080 * all memory entries between "s" and "end".
6081 * While we were blocked and the map was unlocked, some
6082 * new memory entries could have been re-allocated between
6083 * "start" and "s" and we don't want to mess with those.
6084 * Some of those entries could even have been re-assembled
6085 * with an entry after "s" (in vm_map_simplify_entry()), so
6086 * we may have to vm_map_clip_start() again.
6087 */
6088
6089 if (entry->vme_start >= s) {
6090 /*
6091 * This entry starts on or after "s"
6092 * so no need to clip its start.
6093 */
6094 } else {
6095 /*
6096 * This entry has been re-assembled by a
6097 * vm_map_simplify_entry(). We need to
6098 * re-clip its start.
6099 */
6100 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6101 entry->map_aligned &&
6102 !VM_MAP_PAGE_ALIGNED(s,
6103 VM_MAP_PAGE_MASK(map))) {
6104 /*
6105 * The entry will no longer be map-aligned
6106 * after clipping and the caller said it's OK.
6107 */
6108 entry->map_aligned = FALSE;
6109 }
6110 if (map == kalloc_map) {
6111 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6112 "clipping %p at 0x%llx\n",
6113 map,
6114 (uint64_t)start,
6115 (uint64_t)end,
6116 entry,
6117 (uint64_t)s);
6118 }
6119 vm_map_clip_start(map, entry, s);
6120 }
6121 if (entry->vme_end <= end) {
6122 /*
6123 * This entry is going away completely, so no need
6124 * to clip and possibly cause an unnecessary unnesting.
6125 */
6126 } else {
6127 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6128 entry->map_aligned &&
6129 !VM_MAP_PAGE_ALIGNED(end,
6130 VM_MAP_PAGE_MASK(map))) {
6131 /*
6132 * The entry will no longer be map-aligned
6133 * after clipping and the caller said it's OK.
6134 */
6135 entry->map_aligned = FALSE;
6136 }
6137 if (map == kalloc_map) {
6138 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6139 "clipping %p at 0x%llx\n",
6140 map,
6141 (uint64_t)start,
6142 (uint64_t)end,
6143 entry,
6144 (uint64_t)end);
6145 }
6146 vm_map_clip_end(map, entry, end);
6147 }
6148
6149 if (entry->permanent) {
6150 panic("attempt to remove permanent VM map entry "
6151 "%p [0x%llx:0x%llx]\n",
6152 entry, (uint64_t) s, (uint64_t) end);
6153 }
6154
6155
6156 if (entry->in_transition) {
6157 wait_result_t wait_result;
6158
6159 /*
6160 * Another thread is wiring/unwiring this entry.
6161 * Let the other thread know we are waiting.
6162 */
6163 assert(s == entry->vme_start);
6164 entry->needs_wakeup = TRUE;
6165
6166 /*
6167 * wake up anybody waiting on entries that we have
6168 * already unwired/deleted.
6169 */
6170 if (need_wakeup) {
6171 vm_map_entry_wakeup(map);
6172 need_wakeup = FALSE;
6173 }
6174
6175 wait_result = vm_map_entry_wait(map, interruptible);
6176
6177 if (interruptible &&
6178 wait_result == THREAD_INTERRUPTED) {
6179 /*
6180 * We do not clear the needs_wakeup flag,
6181 * since we cannot tell if we were the only one.
6182 */
6183 return KERN_ABORTED;
6184 }
6185
6186 /*
6187 * The entry could have been clipped or it
6188 * may not exist anymore. Look it up again.
6189 */
6190 if (!vm_map_lookup_entry(map, s, &first_entry)) {
6191 /*
6192 * User: use the next entry
6193 */
6194 entry = first_entry->vme_next;
6195 s = entry->vme_start;
6196 } else {
6197 entry = first_entry;
6198 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6199 }
6200 last_timestamp = map->timestamp;
6201 continue;
6202 } /* end in_transition */
6203
6204 if (entry->wired_count) {
6205 boolean_t user_wire;
6206
6207 user_wire = entry->user_wired_count > 0;
6208
6209 /*
6210 * Remove a kernel wiring if requested
6211 */
6212 if (flags & VM_MAP_REMOVE_KUNWIRE) {
6213 entry->wired_count--;
6214 }
6215
6216 /*
6217 * Remove all user wirings for proper accounting
6218 */
6219 if (entry->user_wired_count > 0) {
6220 while (entry->user_wired_count)
6221 subtract_wire_counts(map, entry, user_wire);
6222 }
6223
6224 if (entry->wired_count != 0) {
6225 assert(map != kernel_map);
6226 /*
6227 * Cannot continue. Typical case is when
6228 * a user thread has physical io pending on
6229 * on this page. Either wait for the
6230 * kernel wiring to go away or return an
6231 * error.
6232 */
6233 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6234 wait_result_t wait_result;
6235
6236 assert(s == entry->vme_start);
6237 entry->needs_wakeup = TRUE;
6238 wait_result = vm_map_entry_wait(map,
6239 interruptible);
6240
6241 if (interruptible &&
6242 wait_result == THREAD_INTERRUPTED) {
6243 /*
6244 * We do not clear the
6245 * needs_wakeup flag, since we
6246 * cannot tell if we were the
6247 * only one.
6248 */
6249 return KERN_ABORTED;
6250 }
6251
6252 /*
6253 * The entry could have been clipped or
6254 * it may not exist anymore. Look it
6255 * up again.
6256 */
6257 if (!vm_map_lookup_entry(map, s,
6258 &first_entry)) {
6259 assert(map != kernel_map);
6260 /*
6261 * User: use the next entry
6262 */
6263 entry = first_entry->vme_next;
6264 s = entry->vme_start;
6265 } else {
6266 entry = first_entry;
6267 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6268 }
6269 last_timestamp = map->timestamp;
6270 continue;
6271 }
6272 else {
6273 return KERN_FAILURE;
6274 }
6275 }
6276
6277 entry->in_transition = TRUE;
6278 /*
6279 * copy current entry. see comment in vm_map_wire()
6280 */
6281 tmp_entry = *entry;
6282 assert(s == entry->vme_start);
6283
6284 /*
6285 * We can unlock the map now. The in_transition
6286 * state guarentees existance of the entry.
6287 */
6288 vm_map_unlock(map);
6289
6290 if (tmp_entry.is_sub_map) {
6291 vm_map_t sub_map;
6292 vm_map_offset_t sub_start, sub_end;
6293 pmap_t pmap;
6294 vm_map_offset_t pmap_addr;
6295
6296
6297 sub_map = VME_SUBMAP(&tmp_entry);
6298 sub_start = VME_OFFSET(&tmp_entry);
6299 sub_end = sub_start + (tmp_entry.vme_end -
6300 tmp_entry.vme_start);
6301 if (tmp_entry.use_pmap) {
6302 pmap = sub_map->pmap;
6303 pmap_addr = tmp_entry.vme_start;
6304 } else {
6305 pmap = map->pmap;
6306 pmap_addr = tmp_entry.vme_start;
6307 }
6308 (void) vm_map_unwire_nested(sub_map,
6309 sub_start, sub_end,
6310 user_wire,
6311 pmap, pmap_addr);
6312 } else {
6313
6314 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6315 pmap_protect_options(
6316 map->pmap,
6317 tmp_entry.vme_start,
6318 tmp_entry.vme_end,
6319 VM_PROT_NONE,
6320 PMAP_OPTIONS_REMOVE,
6321 NULL);
6322 }
6323 vm_fault_unwire(map, &tmp_entry,
6324 VME_OBJECT(&tmp_entry) == kernel_object,
6325 map->pmap, tmp_entry.vme_start);
6326 }
6327
6328 vm_map_lock(map);
6329
6330 if (last_timestamp+1 != map->timestamp) {
6331 /*
6332 * Find the entry again. It could have
6333 * been clipped after we unlocked the map.
6334 */
6335 if (!vm_map_lookup_entry(map, s, &first_entry)){
6336 assert((map != kernel_map) &&
6337 (!entry->is_sub_map));
6338 first_entry = first_entry->vme_next;
6339 s = first_entry->vme_start;
6340 } else {
6341 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6342 }
6343 } else {
6344 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6345 first_entry = entry;
6346 }
6347
6348 last_timestamp = map->timestamp;
6349
6350 entry = first_entry;
6351 while ((entry != vm_map_to_entry(map)) &&
6352 (entry->vme_start < tmp_entry.vme_end)) {
6353 assert(entry->in_transition);
6354 entry->in_transition = FALSE;
6355 if (entry->needs_wakeup) {
6356 entry->needs_wakeup = FALSE;
6357 need_wakeup = TRUE;
6358 }
6359 entry = entry->vme_next;
6360 }
6361 /*
6362 * We have unwired the entry(s). Go back and
6363 * delete them.
6364 */
6365 entry = first_entry;
6366 continue;
6367 }
6368
6369 /* entry is unwired */
6370 assert(entry->wired_count == 0);
6371 assert(entry->user_wired_count == 0);
6372
6373 assert(s == entry->vme_start);
6374
6375 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6376 /*
6377 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6378 * vm_map_delete(), some map entries might have been
6379 * transferred to a "zap_map", which doesn't have a
6380 * pmap. The original pmap has already been flushed
6381 * in the vm_map_delete() call targeting the original
6382 * map, but when we get to destroying the "zap_map",
6383 * we don't have any pmap to flush, so let's just skip
6384 * all this.
6385 */
6386 } else if (entry->is_sub_map) {
6387 if (entry->use_pmap) {
6388 #ifndef NO_NESTED_PMAP
6389 int pmap_flags;
6390
6391 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6392 /*
6393 * This is the final cleanup of the
6394 * address space being terminated.
6395 * No new mappings are expected and
6396 * we don't really need to unnest the
6397 * shared region (and lose the "global"
6398 * pmap mappings, if applicable).
6399 *
6400 * Tell the pmap layer that we're
6401 * "clean" wrt nesting.
6402 */
6403 pmap_flags = PMAP_UNNEST_CLEAN;
6404 } else {
6405 /*
6406 * We're unmapping part of the nested
6407 * shared region, so we can't keep the
6408 * nested pmap.
6409 */
6410 pmap_flags = 0;
6411 }
6412 pmap_unnest_options(
6413 map->pmap,
6414 (addr64_t)entry->vme_start,
6415 entry->vme_end - entry->vme_start,
6416 pmap_flags);
6417 #endif /* NO_NESTED_PMAP */
6418 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6419 /* clean up parent map/maps */
6420 vm_map_submap_pmap_clean(
6421 map, entry->vme_start,
6422 entry->vme_end,
6423 VME_SUBMAP(entry),
6424 VME_OFFSET(entry));
6425 }
6426 } else {
6427 vm_map_submap_pmap_clean(
6428 map, entry->vme_start, entry->vme_end,
6429 VME_SUBMAP(entry),
6430 VME_OFFSET(entry));
6431 }
6432 } else if (VME_OBJECT(entry) != kernel_object &&
6433 VME_OBJECT(entry) != compressor_object) {
6434 object = VME_OBJECT(entry);
6435 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6436 vm_object_pmap_protect_options(
6437 object, VME_OFFSET(entry),
6438 entry->vme_end - entry->vme_start,
6439 PMAP_NULL,
6440 entry->vme_start,
6441 VM_PROT_NONE,
6442 PMAP_OPTIONS_REMOVE);
6443 } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6444 (map->pmap == kernel_pmap)) {
6445 /* Remove translations associated
6446 * with this range unless the entry
6447 * does not have an object, or
6448 * it's the kernel map or a descendant
6449 * since the platform could potentially
6450 * create "backdoor" mappings invisible
6451 * to the VM. It is expected that
6452 * objectless, non-kernel ranges
6453 * do not have such VM invisible
6454 * translations.
6455 */
6456 pmap_remove_options(map->pmap,
6457 (addr64_t)entry->vme_start,
6458 (addr64_t)entry->vme_end,
6459 PMAP_OPTIONS_REMOVE);
6460 }
6461 }
6462
6463 if (entry->iokit_acct) {
6464 /* alternate accounting */
6465 vm_map_iokit_unmapped_region(map,
6466 (entry->vme_end -
6467 entry->vme_start));
6468 entry->iokit_acct = FALSE;
6469 }
6470
6471 /*
6472 * All pmap mappings for this map entry must have been
6473 * cleared by now.
6474 */
6475 #if DEBUG
6476 assert(vm_map_pmap_is_empty(map,
6477 entry->vme_start,
6478 entry->vme_end));
6479 #endif /* DEBUG */
6480
6481 next = entry->vme_next;
6482
6483 if (map->pmap == kernel_pmap &&
6484 map->ref_count != 0 &&
6485 entry->vme_end < end &&
6486 (next == vm_map_to_entry(map) ||
6487 next->vme_start != entry->vme_end)) {
6488 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6489 "hole after %p at 0x%llx\n",
6490 map,
6491 (uint64_t)start,
6492 (uint64_t)end,
6493 entry,
6494 (uint64_t)entry->vme_end);
6495 }
6496
6497 s = next->vme_start;
6498 last_timestamp = map->timestamp;
6499
6500 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6501 zap_map != VM_MAP_NULL) {
6502 vm_map_size_t entry_size;
6503 /*
6504 * The caller wants to save the affected VM map entries
6505 * into the "zap_map". The caller will take care of
6506 * these entries.
6507 */
6508 /* unlink the entry from "map" ... */
6509 vm_map_store_entry_unlink(map, entry);
6510 /* ... and add it to the end of the "zap_map" */
6511 vm_map_store_entry_link(zap_map,
6512 vm_map_last_entry(zap_map),
6513 entry);
6514 entry_size = entry->vme_end - entry->vme_start;
6515 map->size -= entry_size;
6516 zap_map->size += entry_size;
6517 /* we didn't unlock the map, so no timestamp increase */
6518 last_timestamp--;
6519 } else {
6520 vm_map_entry_delete(map, entry);
6521 /* vm_map_entry_delete unlocks the map */
6522 vm_map_lock(map);
6523 }
6524
6525 entry = next;
6526
6527 if(entry == vm_map_to_entry(map)) {
6528 break;
6529 }
6530 if (last_timestamp+1 != map->timestamp) {
6531 /*
6532 * we are responsible for deleting everything
6533 * from the give space, if someone has interfered
6534 * we pick up where we left off, back fills should
6535 * be all right for anyone except map_delete and
6536 * we have to assume that the task has been fully
6537 * disabled before we get here
6538 */
6539 if (!vm_map_lookup_entry(map, s, &entry)){
6540 entry = entry->vme_next;
6541 s = entry->vme_start;
6542 } else {
6543 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6544 }
6545 /*
6546 * others can not only allocate behind us, we can
6547 * also see coalesce while we don't have the map lock
6548 */
6549 if(entry == vm_map_to_entry(map)) {
6550 break;
6551 }
6552 }
6553 last_timestamp = map->timestamp;
6554 }
6555
6556 if (map->wait_for_space)
6557 thread_wakeup((event_t) map);
6558 /*
6559 * wake up anybody waiting on entries that we have already deleted.
6560 */
6561 if (need_wakeup)
6562 vm_map_entry_wakeup(map);
6563
6564 return KERN_SUCCESS;
6565 }
6566
6567 /*
6568 * vm_map_remove:
6569 *
6570 * Remove the given address range from the target map.
6571 * This is the exported form of vm_map_delete.
6572 */
6573 kern_return_t
6574 vm_map_remove(
6575 register vm_map_t map,
6576 register vm_map_offset_t start,
6577 register vm_map_offset_t end,
6578 register boolean_t flags)
6579 {
6580 register kern_return_t result;
6581
6582 vm_map_lock(map);
6583 VM_MAP_RANGE_CHECK(map, start, end);
6584 /*
6585 * For the zone_map, the kernel controls the allocation/freeing of memory.
6586 * Any free to the zone_map should be within the bounds of the map and
6587 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6588 * free to the zone_map into a no-op, there is a problem and we should
6589 * panic.
6590 */
6591 if ((map == zone_map) && (start == end))
6592 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6593 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6594 vm_map_unlock(map);
6595
6596 return(result);
6597 }
6598
6599
6600 /*
6601 * Routine: vm_map_copy_discard
6602 *
6603 * Description:
6604 * Dispose of a map copy object (returned by
6605 * vm_map_copyin).
6606 */
6607 void
6608 vm_map_copy_discard(
6609 vm_map_copy_t copy)
6610 {
6611 if (copy == VM_MAP_COPY_NULL)
6612 return;
6613
6614 switch (copy->type) {
6615 case VM_MAP_COPY_ENTRY_LIST:
6616 while (vm_map_copy_first_entry(copy) !=
6617 vm_map_copy_to_entry(copy)) {
6618 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6619
6620 vm_map_copy_entry_unlink(copy, entry);
6621 if (entry->is_sub_map) {
6622 vm_map_deallocate(VME_SUBMAP(entry));
6623 } else {
6624 vm_object_deallocate(VME_OBJECT(entry));
6625 }
6626 vm_map_copy_entry_dispose(copy, entry);
6627 }
6628 break;
6629 case VM_MAP_COPY_OBJECT:
6630 vm_object_deallocate(copy->cpy_object);
6631 break;
6632 case VM_MAP_COPY_KERNEL_BUFFER:
6633
6634 /*
6635 * The vm_map_copy_t and possibly the data buffer were
6636 * allocated by a single call to kalloc(), i.e. the
6637 * vm_map_copy_t was not allocated out of the zone.
6638 */
6639 if (copy->size > msg_ool_size_small || copy->offset)
6640 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6641 (long long)copy->size, (long long)copy->offset);
6642 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6643 return;
6644 }
6645 zfree(vm_map_copy_zone, copy);
6646 }
6647
6648 /*
6649 * Routine: vm_map_copy_copy
6650 *
6651 * Description:
6652 * Move the information in a map copy object to
6653 * a new map copy object, leaving the old one
6654 * empty.
6655 *
6656 * This is used by kernel routines that need
6657 * to look at out-of-line data (in copyin form)
6658 * before deciding whether to return SUCCESS.
6659 * If the routine returns FAILURE, the original
6660 * copy object will be deallocated; therefore,
6661 * these routines must make a copy of the copy
6662 * object and leave the original empty so that
6663 * deallocation will not fail.
6664 */
6665 vm_map_copy_t
6666 vm_map_copy_copy(
6667 vm_map_copy_t copy)
6668 {
6669 vm_map_copy_t new_copy;
6670
6671 if (copy == VM_MAP_COPY_NULL)
6672 return VM_MAP_COPY_NULL;
6673
6674 /*
6675 * Allocate a new copy object, and copy the information
6676 * from the old one into it.
6677 */
6678
6679 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6680 new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6681 *new_copy = *copy;
6682
6683 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6684 /*
6685 * The links in the entry chain must be
6686 * changed to point to the new copy object.
6687 */
6688 vm_map_copy_first_entry(copy)->vme_prev
6689 = vm_map_copy_to_entry(new_copy);
6690 vm_map_copy_last_entry(copy)->vme_next
6691 = vm_map_copy_to_entry(new_copy);
6692 }
6693
6694 /*
6695 * Change the old copy object into one that contains
6696 * nothing to be deallocated.
6697 */
6698 copy->type = VM_MAP_COPY_OBJECT;
6699 copy->cpy_object = VM_OBJECT_NULL;
6700
6701 /*
6702 * Return the new object.
6703 */
6704 return new_copy;
6705 }
6706
6707 static kern_return_t
6708 vm_map_overwrite_submap_recurse(
6709 vm_map_t dst_map,
6710 vm_map_offset_t dst_addr,
6711 vm_map_size_t dst_size)
6712 {
6713 vm_map_offset_t dst_end;
6714 vm_map_entry_t tmp_entry;
6715 vm_map_entry_t entry;
6716 kern_return_t result;
6717 boolean_t encountered_sub_map = FALSE;
6718
6719
6720
6721 /*
6722 * Verify that the destination is all writeable
6723 * initially. We have to trunc the destination
6724 * address and round the copy size or we'll end up
6725 * splitting entries in strange ways.
6726 */
6727
6728 dst_end = vm_map_round_page(dst_addr + dst_size,
6729 VM_MAP_PAGE_MASK(dst_map));
6730 vm_map_lock(dst_map);
6731
6732 start_pass_1:
6733 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6734 vm_map_unlock(dst_map);
6735 return(KERN_INVALID_ADDRESS);
6736 }
6737
6738 vm_map_clip_start(dst_map,
6739 tmp_entry,
6740 vm_map_trunc_page(dst_addr,
6741 VM_MAP_PAGE_MASK(dst_map)));
6742 if (tmp_entry->is_sub_map) {
6743 /* clipping did unnest if needed */
6744 assert(!tmp_entry->use_pmap);
6745 }
6746
6747 for (entry = tmp_entry;;) {
6748 vm_map_entry_t next;
6749
6750 next = entry->vme_next;
6751 while(entry->is_sub_map) {
6752 vm_map_offset_t sub_start;
6753 vm_map_offset_t sub_end;
6754 vm_map_offset_t local_end;
6755
6756 if (entry->in_transition) {
6757 /*
6758 * Say that we are waiting, and wait for entry.
6759 */
6760 entry->needs_wakeup = TRUE;
6761 vm_map_entry_wait(dst_map, THREAD_UNINT);
6762
6763 goto start_pass_1;
6764 }
6765
6766 encountered_sub_map = TRUE;
6767 sub_start = VME_OFFSET(entry);
6768
6769 if(entry->vme_end < dst_end)
6770 sub_end = entry->vme_end;
6771 else
6772 sub_end = dst_end;
6773 sub_end -= entry->vme_start;
6774 sub_end += VME_OFFSET(entry);
6775 local_end = entry->vme_end;
6776 vm_map_unlock(dst_map);
6777
6778 result = vm_map_overwrite_submap_recurse(
6779 VME_SUBMAP(entry),
6780 sub_start,
6781 sub_end - sub_start);
6782
6783 if(result != KERN_SUCCESS)
6784 return result;
6785 if (dst_end <= entry->vme_end)
6786 return KERN_SUCCESS;
6787 vm_map_lock(dst_map);
6788 if(!vm_map_lookup_entry(dst_map, local_end,
6789 &tmp_entry)) {
6790 vm_map_unlock(dst_map);
6791 return(KERN_INVALID_ADDRESS);
6792 }
6793 entry = tmp_entry;
6794 next = entry->vme_next;
6795 }
6796
6797 if ( ! (entry->protection & VM_PROT_WRITE)) {
6798 vm_map_unlock(dst_map);
6799 return(KERN_PROTECTION_FAILURE);
6800 }
6801
6802 /*
6803 * If the entry is in transition, we must wait
6804 * for it to exit that state. Anything could happen
6805 * when we unlock the map, so start over.
6806 */
6807 if (entry->in_transition) {
6808
6809 /*
6810 * Say that we are waiting, and wait for entry.
6811 */
6812 entry->needs_wakeup = TRUE;
6813 vm_map_entry_wait(dst_map, THREAD_UNINT);
6814
6815 goto start_pass_1;
6816 }
6817
6818 /*
6819 * our range is contained completely within this map entry
6820 */
6821 if (dst_end <= entry->vme_end) {
6822 vm_map_unlock(dst_map);
6823 return KERN_SUCCESS;
6824 }
6825 /*
6826 * check that range specified is contiguous region
6827 */
6828 if ((next == vm_map_to_entry(dst_map)) ||
6829 (next->vme_start != entry->vme_end)) {
6830 vm_map_unlock(dst_map);
6831 return(KERN_INVALID_ADDRESS);
6832 }
6833
6834 /*
6835 * Check for permanent objects in the destination.
6836 */
6837 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6838 ((!VME_OBJECT(entry)->internal) ||
6839 (VME_OBJECT(entry)->true_share))) {
6840 if(encountered_sub_map) {
6841 vm_map_unlock(dst_map);
6842 return(KERN_FAILURE);
6843 }
6844 }
6845
6846
6847 entry = next;
6848 }/* for */
6849 vm_map_unlock(dst_map);
6850 return(KERN_SUCCESS);
6851 }
6852
6853 /*
6854 * Routine: vm_map_copy_overwrite
6855 *
6856 * Description:
6857 * Copy the memory described by the map copy
6858 * object (copy; returned by vm_map_copyin) onto
6859 * the specified destination region (dst_map, dst_addr).
6860 * The destination must be writeable.
6861 *
6862 * Unlike vm_map_copyout, this routine actually
6863 * writes over previously-mapped memory. If the
6864 * previous mapping was to a permanent (user-supplied)
6865 * memory object, it is preserved.
6866 *
6867 * The attributes (protection and inheritance) of the
6868 * destination region are preserved.
6869 *
6870 * If successful, consumes the copy object.
6871 * Otherwise, the caller is responsible for it.
6872 *
6873 * Implementation notes:
6874 * To overwrite aligned temporary virtual memory, it is
6875 * sufficient to remove the previous mapping and insert
6876 * the new copy. This replacement is done either on
6877 * the whole region (if no permanent virtual memory
6878 * objects are embedded in the destination region) or
6879 * in individual map entries.
6880 *
6881 * To overwrite permanent virtual memory , it is necessary
6882 * to copy each page, as the external memory management
6883 * interface currently does not provide any optimizations.
6884 *
6885 * Unaligned memory also has to be copied. It is possible
6886 * to use 'vm_trickery' to copy the aligned data. This is
6887 * not done but not hard to implement.
6888 *
6889 * Once a page of permanent memory has been overwritten,
6890 * it is impossible to interrupt this function; otherwise,
6891 * the call would be neither atomic nor location-independent.
6892 * The kernel-state portion of a user thread must be
6893 * interruptible.
6894 *
6895 * It may be expensive to forward all requests that might
6896 * overwrite permanent memory (vm_write, vm_copy) to
6897 * uninterruptible kernel threads. This routine may be
6898 * called by interruptible threads; however, success is
6899 * not guaranteed -- if the request cannot be performed
6900 * atomically and interruptibly, an error indication is
6901 * returned.
6902 */
6903
6904 static kern_return_t
6905 vm_map_copy_overwrite_nested(
6906 vm_map_t dst_map,
6907 vm_map_address_t dst_addr,
6908 vm_map_copy_t copy,
6909 boolean_t interruptible,
6910 pmap_t pmap,
6911 boolean_t discard_on_success)
6912 {
6913 vm_map_offset_t dst_end;
6914 vm_map_entry_t tmp_entry;
6915 vm_map_entry_t entry;
6916 kern_return_t kr;
6917 boolean_t aligned = TRUE;
6918 boolean_t contains_permanent_objects = FALSE;
6919 boolean_t encountered_sub_map = FALSE;
6920 vm_map_offset_t base_addr;
6921 vm_map_size_t copy_size;
6922 vm_map_size_t total_size;
6923
6924
6925 /*
6926 * Check for null copy object.
6927 */
6928
6929 if (copy == VM_MAP_COPY_NULL)
6930 return(KERN_SUCCESS);
6931
6932 /*
6933 * Check for special kernel buffer allocated
6934 * by new_ipc_kmsg_copyin.
6935 */
6936
6937 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6938 return(vm_map_copyout_kernel_buffer(
6939 dst_map, &dst_addr,
6940 copy, TRUE, discard_on_success));
6941 }
6942
6943 /*
6944 * Only works for entry lists at the moment. Will
6945 * support page lists later.
6946 */
6947
6948 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6949
6950 if (copy->size == 0) {
6951 if (discard_on_success)
6952 vm_map_copy_discard(copy);
6953 return(KERN_SUCCESS);
6954 }
6955
6956 /*
6957 * Verify that the destination is all writeable
6958 * initially. We have to trunc the destination
6959 * address and round the copy size or we'll end up
6960 * splitting entries in strange ways.
6961 */
6962
6963 if (!VM_MAP_PAGE_ALIGNED(copy->size,
6964 VM_MAP_PAGE_MASK(dst_map)) ||
6965 !VM_MAP_PAGE_ALIGNED(copy->offset,
6966 VM_MAP_PAGE_MASK(dst_map)) ||
6967 !VM_MAP_PAGE_ALIGNED(dst_addr,
6968 VM_MAP_PAGE_MASK(dst_map)))
6969 {
6970 aligned = FALSE;
6971 dst_end = vm_map_round_page(dst_addr + copy->size,
6972 VM_MAP_PAGE_MASK(dst_map));
6973 } else {
6974 dst_end = dst_addr + copy->size;
6975 }
6976
6977 vm_map_lock(dst_map);
6978
6979 /* LP64todo - remove this check when vm_map_commpage64()
6980 * no longer has to stuff in a map_entry for the commpage
6981 * above the map's max_offset.
6982 */
6983 if (dst_addr >= dst_map->max_offset) {
6984 vm_map_unlock(dst_map);
6985 return(KERN_INVALID_ADDRESS);
6986 }
6987
6988 start_pass_1:
6989 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6990 vm_map_unlock(dst_map);
6991 return(KERN_INVALID_ADDRESS);
6992 }
6993 vm_map_clip_start(dst_map,
6994 tmp_entry,
6995 vm_map_trunc_page(dst_addr,
6996 VM_MAP_PAGE_MASK(dst_map)));
6997 for (entry = tmp_entry;;) {
6998 vm_map_entry_t next = entry->vme_next;
6999
7000 while(entry->is_sub_map) {
7001 vm_map_offset_t sub_start;
7002 vm_map_offset_t sub_end;
7003 vm_map_offset_t local_end;
7004
7005 if (entry->in_transition) {
7006
7007 /*
7008 * Say that we are waiting, and wait for entry.
7009 */
7010 entry->needs_wakeup = TRUE;
7011 vm_map_entry_wait(dst_map, THREAD_UNINT);
7012
7013 goto start_pass_1;
7014 }
7015
7016 local_end = entry->vme_end;
7017 if (!(entry->needs_copy)) {
7018 /* if needs_copy we are a COW submap */
7019 /* in such a case we just replace so */
7020 /* there is no need for the follow- */
7021 /* ing check. */
7022 encountered_sub_map = TRUE;
7023 sub_start = VME_OFFSET(entry);
7024
7025 if(entry->vme_end < dst_end)
7026 sub_end = entry->vme_end;
7027 else
7028 sub_end = dst_end;
7029 sub_end -= entry->vme_start;
7030 sub_end += VME_OFFSET(entry);
7031 vm_map_unlock(dst_map);
7032
7033 kr = vm_map_overwrite_submap_recurse(
7034 VME_SUBMAP(entry),
7035 sub_start,
7036 sub_end - sub_start);
7037 if(kr != KERN_SUCCESS)
7038 return kr;
7039 vm_map_lock(dst_map);
7040 }
7041
7042 if (dst_end <= entry->vme_end)
7043 goto start_overwrite;
7044 if(!vm_map_lookup_entry(dst_map, local_end,
7045 &entry)) {
7046 vm_map_unlock(dst_map);
7047 return(KERN_INVALID_ADDRESS);
7048 }
7049 next = entry->vme_next;
7050 }
7051
7052 if ( ! (entry->protection & VM_PROT_WRITE)) {
7053 vm_map_unlock(dst_map);
7054 return(KERN_PROTECTION_FAILURE);
7055 }
7056
7057 /*
7058 * If the entry is in transition, we must wait
7059 * for it to exit that state. Anything could happen
7060 * when we unlock the map, so start over.
7061 */
7062 if (entry->in_transition) {
7063
7064 /*
7065 * Say that we are waiting, and wait for entry.
7066 */
7067 entry->needs_wakeup = TRUE;
7068 vm_map_entry_wait(dst_map, THREAD_UNINT);
7069
7070 goto start_pass_1;
7071 }
7072
7073 /*
7074 * our range is contained completely within this map entry
7075 */
7076 if (dst_end <= entry->vme_end)
7077 break;
7078 /*
7079 * check that range specified is contiguous region
7080 */
7081 if ((next == vm_map_to_entry(dst_map)) ||
7082 (next->vme_start != entry->vme_end)) {
7083 vm_map_unlock(dst_map);
7084 return(KERN_INVALID_ADDRESS);
7085 }
7086
7087
7088 /*
7089 * Check for permanent objects in the destination.
7090 */
7091 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7092 ((!VME_OBJECT(entry)->internal) ||
7093 (VME_OBJECT(entry)->true_share))) {
7094 contains_permanent_objects = TRUE;
7095 }
7096
7097 entry = next;
7098 }/* for */
7099
7100 start_overwrite:
7101 /*
7102 * If there are permanent objects in the destination, then
7103 * the copy cannot be interrupted.
7104 */
7105
7106 if (interruptible && contains_permanent_objects) {
7107 vm_map_unlock(dst_map);
7108 return(KERN_FAILURE); /* XXX */
7109 }
7110
7111 /*
7112 *
7113 * Make a second pass, overwriting the data
7114 * At the beginning of each loop iteration,
7115 * the next entry to be overwritten is "tmp_entry"
7116 * (initially, the value returned from the lookup above),
7117 * and the starting address expected in that entry
7118 * is "start".
7119 */
7120
7121 total_size = copy->size;
7122 if(encountered_sub_map) {
7123 copy_size = 0;
7124 /* re-calculate tmp_entry since we've had the map */
7125 /* unlocked */
7126 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7127 vm_map_unlock(dst_map);
7128 return(KERN_INVALID_ADDRESS);
7129 }
7130 } else {
7131 copy_size = copy->size;
7132 }
7133
7134 base_addr = dst_addr;
7135 while(TRUE) {
7136 /* deconstruct the copy object and do in parts */
7137 /* only in sub_map, interruptable case */
7138 vm_map_entry_t copy_entry;
7139 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
7140 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
7141 int nentries;
7142 int remaining_entries = 0;
7143 vm_map_offset_t new_offset = 0;
7144
7145 for (entry = tmp_entry; copy_size == 0;) {
7146 vm_map_entry_t next;
7147
7148 next = entry->vme_next;
7149
7150 /* tmp_entry and base address are moved along */
7151 /* each time we encounter a sub-map. Otherwise */
7152 /* entry can outpase tmp_entry, and the copy_size */
7153 /* may reflect the distance between them */
7154 /* if the current entry is found to be in transition */
7155 /* we will start over at the beginning or the last */
7156 /* encounter of a submap as dictated by base_addr */
7157 /* we will zero copy_size accordingly. */
7158 if (entry->in_transition) {
7159 /*
7160 * Say that we are waiting, and wait for entry.
7161 */
7162 entry->needs_wakeup = TRUE;
7163 vm_map_entry_wait(dst_map, THREAD_UNINT);
7164
7165 if(!vm_map_lookup_entry(dst_map, base_addr,
7166 &tmp_entry)) {
7167 vm_map_unlock(dst_map);
7168 return(KERN_INVALID_ADDRESS);
7169 }
7170 copy_size = 0;
7171 entry = tmp_entry;
7172 continue;
7173 }
7174 if(entry->is_sub_map) {
7175 vm_map_offset_t sub_start;
7176 vm_map_offset_t sub_end;
7177 vm_map_offset_t local_end;
7178
7179 if (entry->needs_copy) {
7180 /* if this is a COW submap */
7181 /* just back the range with a */
7182 /* anonymous entry */
7183 if(entry->vme_end < dst_end)
7184 sub_end = entry->vme_end;
7185 else
7186 sub_end = dst_end;
7187 if(entry->vme_start < base_addr)
7188 sub_start = base_addr;
7189 else
7190 sub_start = entry->vme_start;
7191 vm_map_clip_end(
7192 dst_map, entry, sub_end);
7193 vm_map_clip_start(
7194 dst_map, entry, sub_start);
7195 assert(!entry->use_pmap);
7196 entry->is_sub_map = FALSE;
7197 vm_map_deallocate(
7198 VME_SUBMAP(entry));
7199 VME_SUBMAP_SET(entry, NULL);
7200 entry->is_shared = FALSE;
7201 entry->needs_copy = FALSE;
7202 VME_OFFSET_SET(entry, 0);
7203 /*
7204 * XXX FBDP
7205 * We should propagate the protections
7206 * of the submap entry here instead
7207 * of forcing them to VM_PROT_ALL...
7208 * Or better yet, we should inherit
7209 * the protection of the copy_entry.
7210 */
7211 entry->protection = VM_PROT_ALL;
7212 entry->max_protection = VM_PROT_ALL;
7213 entry->wired_count = 0;
7214 entry->user_wired_count = 0;
7215 if(entry->inheritance
7216 == VM_INHERIT_SHARE)
7217 entry->inheritance = VM_INHERIT_COPY;
7218 continue;
7219 }
7220 /* first take care of any non-sub_map */
7221 /* entries to send */
7222 if(base_addr < entry->vme_start) {
7223 /* stuff to send */
7224 copy_size =
7225 entry->vme_start - base_addr;
7226 break;
7227 }
7228 sub_start = VME_OFFSET(entry);
7229
7230 if(entry->vme_end < dst_end)
7231 sub_end = entry->vme_end;
7232 else
7233 sub_end = dst_end;
7234 sub_end -= entry->vme_start;
7235 sub_end += VME_OFFSET(entry);
7236 local_end = entry->vme_end;
7237 vm_map_unlock(dst_map);
7238 copy_size = sub_end - sub_start;
7239
7240 /* adjust the copy object */
7241 if (total_size > copy_size) {
7242 vm_map_size_t local_size = 0;
7243 vm_map_size_t entry_size;
7244
7245 nentries = 1;
7246 new_offset = copy->offset;
7247 copy_entry = vm_map_copy_first_entry(copy);
7248 while(copy_entry !=
7249 vm_map_copy_to_entry(copy)){
7250 entry_size = copy_entry->vme_end -
7251 copy_entry->vme_start;
7252 if((local_size < copy_size) &&
7253 ((local_size + entry_size)
7254 >= copy_size)) {
7255 vm_map_copy_clip_end(copy,
7256 copy_entry,
7257 copy_entry->vme_start +
7258 (copy_size - local_size));
7259 entry_size = copy_entry->vme_end -
7260 copy_entry->vme_start;
7261 local_size += entry_size;
7262 new_offset += entry_size;
7263 }
7264 if(local_size >= copy_size) {
7265 next_copy = copy_entry->vme_next;
7266 copy_entry->vme_next =
7267 vm_map_copy_to_entry(copy);
7268 previous_prev =
7269 copy->cpy_hdr.links.prev;
7270 copy->cpy_hdr.links.prev = copy_entry;
7271 copy->size = copy_size;
7272 remaining_entries =
7273 copy->cpy_hdr.nentries;
7274 remaining_entries -= nentries;
7275 copy->cpy_hdr.nentries = nentries;
7276 break;
7277 } else {
7278 local_size += entry_size;
7279 new_offset += entry_size;
7280 nentries++;
7281 }
7282 copy_entry = copy_entry->vme_next;
7283 }
7284 }
7285
7286 if((entry->use_pmap) && (pmap == NULL)) {
7287 kr = vm_map_copy_overwrite_nested(
7288 VME_SUBMAP(entry),
7289 sub_start,
7290 copy,
7291 interruptible,
7292 VME_SUBMAP(entry)->pmap,
7293 TRUE);
7294 } else if (pmap != NULL) {
7295 kr = vm_map_copy_overwrite_nested(
7296 VME_SUBMAP(entry),
7297 sub_start,
7298 copy,
7299 interruptible, pmap,
7300 TRUE);
7301 } else {
7302 kr = vm_map_copy_overwrite_nested(
7303 VME_SUBMAP(entry),
7304 sub_start,
7305 copy,
7306 interruptible,
7307 dst_map->pmap,
7308 TRUE);
7309 }
7310 if(kr != KERN_SUCCESS) {
7311 if(next_copy != NULL) {
7312 copy->cpy_hdr.nentries +=
7313 remaining_entries;
7314 copy->cpy_hdr.links.prev->vme_next =
7315 next_copy;
7316 copy->cpy_hdr.links.prev
7317 = previous_prev;
7318 copy->size = total_size;
7319 }
7320 return kr;
7321 }
7322 if (dst_end <= local_end) {
7323 return(KERN_SUCCESS);
7324 }
7325 /* otherwise copy no longer exists, it was */
7326 /* destroyed after successful copy_overwrite */
7327 copy = (vm_map_copy_t)
7328 zalloc(vm_map_copy_zone);
7329 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7330 vm_map_copy_first_entry(copy) =
7331 vm_map_copy_last_entry(copy) =
7332 vm_map_copy_to_entry(copy);
7333 copy->type = VM_MAP_COPY_ENTRY_LIST;
7334 copy->offset = new_offset;
7335
7336 /*
7337 * XXX FBDP
7338 * this does not seem to deal with
7339 * the VM map store (R&B tree)
7340 */
7341
7342 total_size -= copy_size;
7343 copy_size = 0;
7344 /* put back remainder of copy in container */
7345 if(next_copy != NULL) {
7346 copy->cpy_hdr.nentries = remaining_entries;
7347 copy->cpy_hdr.links.next = next_copy;
7348 copy->cpy_hdr.links.prev = previous_prev;
7349 copy->size = total_size;
7350 next_copy->vme_prev =
7351 vm_map_copy_to_entry(copy);
7352 next_copy = NULL;
7353 }
7354 base_addr = local_end;
7355 vm_map_lock(dst_map);
7356 if(!vm_map_lookup_entry(dst_map,
7357 local_end, &tmp_entry)) {
7358 vm_map_unlock(dst_map);
7359 return(KERN_INVALID_ADDRESS);
7360 }
7361 entry = tmp_entry;
7362 continue;
7363 }
7364 if (dst_end <= entry->vme_end) {
7365 copy_size = dst_end - base_addr;
7366 break;
7367 }
7368
7369 if ((next == vm_map_to_entry(dst_map)) ||
7370 (next->vme_start != entry->vme_end)) {
7371 vm_map_unlock(dst_map);
7372 return(KERN_INVALID_ADDRESS);
7373 }
7374
7375 entry = next;
7376 }/* for */
7377
7378 next_copy = NULL;
7379 nentries = 1;
7380
7381 /* adjust the copy object */
7382 if (total_size > copy_size) {
7383 vm_map_size_t local_size = 0;
7384 vm_map_size_t entry_size;
7385
7386 new_offset = copy->offset;
7387 copy_entry = vm_map_copy_first_entry(copy);
7388 while(copy_entry != vm_map_copy_to_entry(copy)) {
7389 entry_size = copy_entry->vme_end -
7390 copy_entry->vme_start;
7391 if((local_size < copy_size) &&
7392 ((local_size + entry_size)
7393 >= copy_size)) {
7394 vm_map_copy_clip_end(copy, copy_entry,
7395 copy_entry->vme_start +
7396 (copy_size - local_size));
7397 entry_size = copy_entry->vme_end -
7398 copy_entry->vme_start;
7399 local_size += entry_size;
7400 new_offset += entry_size;
7401 }
7402 if(local_size >= copy_size) {
7403 next_copy = copy_entry->vme_next;
7404 copy_entry->vme_next =
7405 vm_map_copy_to_entry(copy);
7406 previous_prev =
7407 copy->cpy_hdr.links.prev;
7408 copy->cpy_hdr.links.prev = copy_entry;
7409 copy->size = copy_size;
7410 remaining_entries =
7411 copy->cpy_hdr.nentries;
7412 remaining_entries -= nentries;
7413 copy->cpy_hdr.nentries = nentries;
7414 break;
7415 } else {
7416 local_size += entry_size;
7417 new_offset += entry_size;
7418 nentries++;
7419 }
7420 copy_entry = copy_entry->vme_next;
7421 }
7422 }
7423
7424 if (aligned) {
7425 pmap_t local_pmap;
7426
7427 if(pmap)
7428 local_pmap = pmap;
7429 else
7430 local_pmap = dst_map->pmap;
7431
7432 if ((kr = vm_map_copy_overwrite_aligned(
7433 dst_map, tmp_entry, copy,
7434 base_addr, local_pmap)) != KERN_SUCCESS) {
7435 if(next_copy != NULL) {
7436 copy->cpy_hdr.nentries +=
7437 remaining_entries;
7438 copy->cpy_hdr.links.prev->vme_next =
7439 next_copy;
7440 copy->cpy_hdr.links.prev =
7441 previous_prev;
7442 copy->size += copy_size;
7443 }
7444 return kr;
7445 }
7446 vm_map_unlock(dst_map);
7447 } else {
7448 /*
7449 * Performance gain:
7450 *
7451 * if the copy and dst address are misaligned but the same
7452 * offset within the page we can copy_not_aligned the
7453 * misaligned parts and copy aligned the rest. If they are
7454 * aligned but len is unaligned we simply need to copy
7455 * the end bit unaligned. We'll need to split the misaligned
7456 * bits of the region in this case !
7457 */
7458 /* ALWAYS UNLOCKS THE dst_map MAP */
7459 kr = vm_map_copy_overwrite_unaligned(
7460 dst_map,
7461 tmp_entry,
7462 copy,
7463 base_addr,
7464 discard_on_success);
7465 if (kr != KERN_SUCCESS) {
7466 if(next_copy != NULL) {
7467 copy->cpy_hdr.nentries +=
7468 remaining_entries;
7469 copy->cpy_hdr.links.prev->vme_next =
7470 next_copy;
7471 copy->cpy_hdr.links.prev =
7472 previous_prev;
7473 copy->size += copy_size;
7474 }
7475 return kr;
7476 }
7477 }
7478 total_size -= copy_size;
7479 if(total_size == 0)
7480 break;
7481 base_addr += copy_size;
7482 copy_size = 0;
7483 copy->offset = new_offset;
7484 if(next_copy != NULL) {
7485 copy->cpy_hdr.nentries = remaining_entries;
7486 copy->cpy_hdr.links.next = next_copy;
7487 copy->cpy_hdr.links.prev = previous_prev;
7488 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7489 copy->size = total_size;
7490 }
7491 vm_map_lock(dst_map);
7492 while(TRUE) {
7493 if (!vm_map_lookup_entry(dst_map,
7494 base_addr, &tmp_entry)) {
7495 vm_map_unlock(dst_map);
7496 return(KERN_INVALID_ADDRESS);
7497 }
7498 if (tmp_entry->in_transition) {
7499 entry->needs_wakeup = TRUE;
7500 vm_map_entry_wait(dst_map, THREAD_UNINT);
7501 } else {
7502 break;
7503 }
7504 }
7505 vm_map_clip_start(dst_map,
7506 tmp_entry,
7507 vm_map_trunc_page(base_addr,
7508 VM_MAP_PAGE_MASK(dst_map)));
7509
7510 entry = tmp_entry;
7511 } /* while */
7512
7513 /*
7514 * Throw away the vm_map_copy object
7515 */
7516 if (discard_on_success)
7517 vm_map_copy_discard(copy);
7518
7519 return(KERN_SUCCESS);
7520 }/* vm_map_copy_overwrite */
7521
7522 kern_return_t
7523 vm_map_copy_overwrite(
7524 vm_map_t dst_map,
7525 vm_map_offset_t dst_addr,
7526 vm_map_copy_t copy,
7527 boolean_t interruptible)
7528 {
7529 vm_map_size_t head_size, tail_size;
7530 vm_map_copy_t head_copy, tail_copy;
7531 vm_map_offset_t head_addr, tail_addr;
7532 vm_map_entry_t entry;
7533 kern_return_t kr;
7534
7535 head_size = 0;
7536 tail_size = 0;
7537 head_copy = NULL;
7538 tail_copy = NULL;
7539 head_addr = 0;
7540 tail_addr = 0;
7541
7542 if (interruptible ||
7543 copy == VM_MAP_COPY_NULL ||
7544 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7545 /*
7546 * We can't split the "copy" map if we're interruptible
7547 * or if we don't have a "copy" map...
7548 */
7549 blunt_copy:
7550 return vm_map_copy_overwrite_nested(dst_map,
7551 dst_addr,
7552 copy,
7553 interruptible,
7554 (pmap_t) NULL,
7555 TRUE);
7556 }
7557
7558 if (copy->size < 3 * PAGE_SIZE) {
7559 /*
7560 * Too small to bother with optimizing...
7561 */
7562 goto blunt_copy;
7563 }
7564
7565 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7566 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7567 /*
7568 * Incompatible mis-alignment of source and destination...
7569 */
7570 goto blunt_copy;
7571 }
7572
7573 /*
7574 * Proper alignment or identical mis-alignment at the beginning.
7575 * Let's try and do a small unaligned copy first (if needed)
7576 * and then an aligned copy for the rest.
7577 */
7578 if (!page_aligned(dst_addr)) {
7579 head_addr = dst_addr;
7580 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7581 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7582 }
7583 if (!page_aligned(copy->offset + copy->size)) {
7584 /*
7585 * Mis-alignment at the end.
7586 * Do an aligned copy up to the last page and
7587 * then an unaligned copy for the remaining bytes.
7588 */
7589 tail_size = ((copy->offset + copy->size) &
7590 VM_MAP_PAGE_MASK(dst_map));
7591 tail_addr = dst_addr + copy->size - tail_size;
7592 }
7593
7594 if (head_size + tail_size == copy->size) {
7595 /*
7596 * It's all unaligned, no optimization possible...
7597 */
7598 goto blunt_copy;
7599 }
7600
7601 /*
7602 * Can't optimize if there are any submaps in the
7603 * destination due to the way we free the "copy" map
7604 * progressively in vm_map_copy_overwrite_nested()
7605 * in that case.
7606 */
7607 vm_map_lock_read(dst_map);
7608 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7609 vm_map_unlock_read(dst_map);
7610 goto blunt_copy;
7611 }
7612 for (;
7613 (entry != vm_map_copy_to_entry(copy) &&
7614 entry->vme_start < dst_addr + copy->size);
7615 entry = entry->vme_next) {
7616 if (entry->is_sub_map) {
7617 vm_map_unlock_read(dst_map);
7618 goto blunt_copy;
7619 }
7620 }
7621 vm_map_unlock_read(dst_map);
7622
7623 if (head_size) {
7624 /*
7625 * Unaligned copy of the first "head_size" bytes, to reach
7626 * a page boundary.
7627 */
7628
7629 /*
7630 * Extract "head_copy" out of "copy".
7631 */
7632 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7633 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7634 vm_map_copy_first_entry(head_copy) =
7635 vm_map_copy_to_entry(head_copy);
7636 vm_map_copy_last_entry(head_copy) =
7637 vm_map_copy_to_entry(head_copy);
7638 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7639 head_copy->cpy_hdr.nentries = 0;
7640 head_copy->cpy_hdr.entries_pageable =
7641 copy->cpy_hdr.entries_pageable;
7642 vm_map_store_init(&head_copy->cpy_hdr);
7643
7644 head_copy->offset = copy->offset;
7645 head_copy->size = head_size;
7646
7647 copy->offset += head_size;
7648 copy->size -= head_size;
7649
7650 entry = vm_map_copy_first_entry(copy);
7651 vm_map_copy_clip_end(copy, entry, copy->offset);
7652 vm_map_copy_entry_unlink(copy, entry);
7653 vm_map_copy_entry_link(head_copy,
7654 vm_map_copy_to_entry(head_copy),
7655 entry);
7656
7657 /*
7658 * Do the unaligned copy.
7659 */
7660 kr = vm_map_copy_overwrite_nested(dst_map,
7661 head_addr,
7662 head_copy,
7663 interruptible,
7664 (pmap_t) NULL,
7665 FALSE);
7666 if (kr != KERN_SUCCESS)
7667 goto done;
7668 }
7669
7670 if (tail_size) {
7671 /*
7672 * Extract "tail_copy" out of "copy".
7673 */
7674 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7675 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7676 vm_map_copy_first_entry(tail_copy) =
7677 vm_map_copy_to_entry(tail_copy);
7678 vm_map_copy_last_entry(tail_copy) =
7679 vm_map_copy_to_entry(tail_copy);
7680 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7681 tail_copy->cpy_hdr.nentries = 0;
7682 tail_copy->cpy_hdr.entries_pageable =
7683 copy->cpy_hdr.entries_pageable;
7684 vm_map_store_init(&tail_copy->cpy_hdr);
7685
7686 tail_copy->offset = copy->offset + copy->size - tail_size;
7687 tail_copy->size = tail_size;
7688
7689 copy->size -= tail_size;
7690
7691 entry = vm_map_copy_last_entry(copy);
7692 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7693 entry = vm_map_copy_last_entry(copy);
7694 vm_map_copy_entry_unlink(copy, entry);
7695 vm_map_copy_entry_link(tail_copy,
7696 vm_map_copy_last_entry(tail_copy),
7697 entry);
7698 }
7699
7700 /*
7701 * Copy most (or possibly all) of the data.
7702 */
7703 kr = vm_map_copy_overwrite_nested(dst_map,
7704 dst_addr + head_size,
7705 copy,
7706 interruptible,
7707 (pmap_t) NULL,
7708 FALSE);
7709 if (kr != KERN_SUCCESS) {
7710 goto done;
7711 }
7712
7713 if (tail_size) {
7714 kr = vm_map_copy_overwrite_nested(dst_map,
7715 tail_addr,
7716 tail_copy,
7717 interruptible,
7718 (pmap_t) NULL,
7719 FALSE);
7720 }
7721
7722 done:
7723 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7724 if (kr == KERN_SUCCESS) {
7725 /*
7726 * Discard all the copy maps.
7727 */
7728 if (head_copy) {
7729 vm_map_copy_discard(head_copy);
7730 head_copy = NULL;
7731 }
7732 vm_map_copy_discard(copy);
7733 if (tail_copy) {
7734 vm_map_copy_discard(tail_copy);
7735 tail_copy = NULL;
7736 }
7737 } else {
7738 /*
7739 * Re-assemble the original copy map.
7740 */
7741 if (head_copy) {
7742 entry = vm_map_copy_first_entry(head_copy);
7743 vm_map_copy_entry_unlink(head_copy, entry);
7744 vm_map_copy_entry_link(copy,
7745 vm_map_copy_to_entry(copy),
7746 entry);
7747 copy->offset -= head_size;
7748 copy->size += head_size;
7749 vm_map_copy_discard(head_copy);
7750 head_copy = NULL;
7751 }
7752 if (tail_copy) {
7753 entry = vm_map_copy_last_entry(tail_copy);
7754 vm_map_copy_entry_unlink(tail_copy, entry);
7755 vm_map_copy_entry_link(copy,
7756 vm_map_copy_last_entry(copy),
7757 entry);
7758 copy->size += tail_size;
7759 vm_map_copy_discard(tail_copy);
7760 tail_copy = NULL;
7761 }
7762 }
7763 return kr;
7764 }
7765
7766
7767 /*
7768 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
7769 *
7770 * Decription:
7771 * Physically copy unaligned data
7772 *
7773 * Implementation:
7774 * Unaligned parts of pages have to be physically copied. We use
7775 * a modified form of vm_fault_copy (which understands none-aligned
7776 * page offsets and sizes) to do the copy. We attempt to copy as
7777 * much memory in one go as possibly, however vm_fault_copy copies
7778 * within 1 memory object so we have to find the smaller of "amount left"
7779 * "source object data size" and "target object data size". With
7780 * unaligned data we don't need to split regions, therefore the source
7781 * (copy) object should be one map entry, the target range may be split
7782 * over multiple map entries however. In any event we are pessimistic
7783 * about these assumptions.
7784 *
7785 * Assumptions:
7786 * dst_map is locked on entry and is return locked on success,
7787 * unlocked on error.
7788 */
7789
7790 static kern_return_t
7791 vm_map_copy_overwrite_unaligned(
7792 vm_map_t dst_map,
7793 vm_map_entry_t entry,
7794 vm_map_copy_t copy,
7795 vm_map_offset_t start,
7796 boolean_t discard_on_success)
7797 {
7798 vm_map_entry_t copy_entry;
7799 vm_map_entry_t copy_entry_next;
7800 vm_map_version_t version;
7801 vm_object_t dst_object;
7802 vm_object_offset_t dst_offset;
7803 vm_object_offset_t src_offset;
7804 vm_object_offset_t entry_offset;
7805 vm_map_offset_t entry_end;
7806 vm_map_size_t src_size,
7807 dst_size,
7808 copy_size,
7809 amount_left;
7810 kern_return_t kr = KERN_SUCCESS;
7811
7812
7813 copy_entry = vm_map_copy_first_entry(copy);
7814
7815 vm_map_lock_write_to_read(dst_map);
7816
7817 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7818 amount_left = copy->size;
7819 /*
7820 * unaligned so we never clipped this entry, we need the offset into
7821 * the vm_object not just the data.
7822 */
7823 while (amount_left > 0) {
7824
7825 if (entry == vm_map_to_entry(dst_map)) {
7826 vm_map_unlock_read(dst_map);
7827 return KERN_INVALID_ADDRESS;
7828 }
7829
7830 /* "start" must be within the current map entry */
7831 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7832
7833 dst_offset = start - entry->vme_start;
7834
7835 dst_size = entry->vme_end - start;
7836
7837 src_size = copy_entry->vme_end -
7838 (copy_entry->vme_start + src_offset);
7839
7840 if (dst_size < src_size) {
7841 /*
7842 * we can only copy dst_size bytes before
7843 * we have to get the next destination entry
7844 */
7845 copy_size = dst_size;
7846 } else {
7847 /*
7848 * we can only copy src_size bytes before
7849 * we have to get the next source copy entry
7850 */
7851 copy_size = src_size;
7852 }
7853
7854 if (copy_size > amount_left) {
7855 copy_size = amount_left;
7856 }
7857 /*
7858 * Entry needs copy, create a shadow shadow object for
7859 * Copy on write region.
7860 */
7861 if (entry->needs_copy &&
7862 ((entry->protection & VM_PROT_WRITE) != 0))
7863 {
7864 if (vm_map_lock_read_to_write(dst_map)) {
7865 vm_map_lock_read(dst_map);
7866 goto RetryLookup;
7867 }
7868 VME_OBJECT_SHADOW(entry,
7869 (vm_map_size_t)(entry->vme_end
7870 - entry->vme_start));
7871 entry->needs_copy = FALSE;
7872 vm_map_lock_write_to_read(dst_map);
7873 }
7874 dst_object = VME_OBJECT(entry);
7875 /*
7876 * unlike with the virtual (aligned) copy we're going
7877 * to fault on it therefore we need a target object.
7878 */
7879 if (dst_object == VM_OBJECT_NULL) {
7880 if (vm_map_lock_read_to_write(dst_map)) {
7881 vm_map_lock_read(dst_map);
7882 goto RetryLookup;
7883 }
7884 dst_object = vm_object_allocate((vm_map_size_t)
7885 entry->vme_end - entry->vme_start);
7886 VME_OBJECT(entry) = dst_object;
7887 VME_OFFSET_SET(entry, 0);
7888 assert(entry->use_pmap);
7889 vm_map_lock_write_to_read(dst_map);
7890 }
7891 /*
7892 * Take an object reference and unlock map. The "entry" may
7893 * disappear or change when the map is unlocked.
7894 */
7895 vm_object_reference(dst_object);
7896 version.main_timestamp = dst_map->timestamp;
7897 entry_offset = VME_OFFSET(entry);
7898 entry_end = entry->vme_end;
7899 vm_map_unlock_read(dst_map);
7900 /*
7901 * Copy as much as possible in one pass
7902 */
7903 kr = vm_fault_copy(
7904 VME_OBJECT(copy_entry),
7905 VME_OFFSET(copy_entry) + src_offset,
7906 &copy_size,
7907 dst_object,
7908 entry_offset + dst_offset,
7909 dst_map,
7910 &version,
7911 THREAD_UNINT );
7912
7913 start += copy_size;
7914 src_offset += copy_size;
7915 amount_left -= copy_size;
7916 /*
7917 * Release the object reference
7918 */
7919 vm_object_deallocate(dst_object);
7920 /*
7921 * If a hard error occurred, return it now
7922 */
7923 if (kr != KERN_SUCCESS)
7924 return kr;
7925
7926 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7927 || amount_left == 0)
7928 {
7929 /*
7930 * all done with this copy entry, dispose.
7931 */
7932 copy_entry_next = copy_entry->vme_next;
7933
7934 if (discard_on_success) {
7935 vm_map_copy_entry_unlink(copy, copy_entry);
7936 assert(!copy_entry->is_sub_map);
7937 vm_object_deallocate(VME_OBJECT(copy_entry));
7938 vm_map_copy_entry_dispose(copy, copy_entry);
7939 }
7940
7941 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7942 amount_left) {
7943 /*
7944 * not finished copying but run out of source
7945 */
7946 return KERN_INVALID_ADDRESS;
7947 }
7948
7949 copy_entry = copy_entry_next;
7950
7951 src_offset = 0;
7952 }
7953
7954 if (amount_left == 0)
7955 return KERN_SUCCESS;
7956
7957 vm_map_lock_read(dst_map);
7958 if (version.main_timestamp == dst_map->timestamp) {
7959 if (start == entry_end) {
7960 /*
7961 * destination region is split. Use the version
7962 * information to avoid a lookup in the normal
7963 * case.
7964 */
7965 entry = entry->vme_next;
7966 /*
7967 * should be contiguous. Fail if we encounter
7968 * a hole in the destination.
7969 */
7970 if (start != entry->vme_start) {
7971 vm_map_unlock_read(dst_map);
7972 return KERN_INVALID_ADDRESS ;
7973 }
7974 }
7975 } else {
7976 /*
7977 * Map version check failed.
7978 * we must lookup the entry because somebody
7979 * might have changed the map behind our backs.
7980 */
7981 RetryLookup:
7982 if (!vm_map_lookup_entry(dst_map, start, &entry))
7983 {
7984 vm_map_unlock_read(dst_map);
7985 return KERN_INVALID_ADDRESS ;
7986 }
7987 }
7988 }/* while */
7989
7990 return KERN_SUCCESS;
7991 }/* vm_map_copy_overwrite_unaligned */
7992
7993 /*
7994 * Routine: vm_map_copy_overwrite_aligned [internal use only]
7995 *
7996 * Description:
7997 * Does all the vm_trickery possible for whole pages.
7998 *
7999 * Implementation:
8000 *
8001 * If there are no permanent objects in the destination,
8002 * and the source and destination map entry zones match,
8003 * and the destination map entry is not shared,
8004 * then the map entries can be deleted and replaced
8005 * with those from the copy. The following code is the
8006 * basic idea of what to do, but there are lots of annoying
8007 * little details about getting protection and inheritance
8008 * right. Should add protection, inheritance, and sharing checks
8009 * to the above pass and make sure that no wiring is involved.
8010 */
8011
8012 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8013 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8014 int vm_map_copy_overwrite_aligned_src_large = 0;
8015
8016 static kern_return_t
8017 vm_map_copy_overwrite_aligned(
8018 vm_map_t dst_map,
8019 vm_map_entry_t tmp_entry,
8020 vm_map_copy_t copy,
8021 vm_map_offset_t start,
8022 __unused pmap_t pmap)
8023 {
8024 vm_object_t object;
8025 vm_map_entry_t copy_entry;
8026 vm_map_size_t copy_size;
8027 vm_map_size_t size;
8028 vm_map_entry_t entry;
8029
8030 while ((copy_entry = vm_map_copy_first_entry(copy))
8031 != vm_map_copy_to_entry(copy))
8032 {
8033 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8034
8035 entry = tmp_entry;
8036 if (entry->is_sub_map) {
8037 /* unnested when clipped earlier */
8038 assert(!entry->use_pmap);
8039 }
8040 if (entry == vm_map_to_entry(dst_map)) {
8041 vm_map_unlock(dst_map);
8042 return KERN_INVALID_ADDRESS;
8043 }
8044 size = (entry->vme_end - entry->vme_start);
8045 /*
8046 * Make sure that no holes popped up in the
8047 * address map, and that the protection is
8048 * still valid, in case the map was unlocked
8049 * earlier.
8050 */
8051
8052 if ((entry->vme_start != start) || ((entry->is_sub_map)
8053 && !entry->needs_copy)) {
8054 vm_map_unlock(dst_map);
8055 return(KERN_INVALID_ADDRESS);
8056 }
8057 assert(entry != vm_map_to_entry(dst_map));
8058
8059 /*
8060 * Check protection again
8061 */
8062
8063 if ( ! (entry->protection & VM_PROT_WRITE)) {
8064 vm_map_unlock(dst_map);
8065 return(KERN_PROTECTION_FAILURE);
8066 }
8067
8068 /*
8069 * Adjust to source size first
8070 */
8071
8072 if (copy_size < size) {
8073 if (entry->map_aligned &&
8074 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8075 VM_MAP_PAGE_MASK(dst_map))) {
8076 /* no longer map-aligned */
8077 entry->map_aligned = FALSE;
8078 }
8079 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8080 size = copy_size;
8081 }
8082
8083 /*
8084 * Adjust to destination size
8085 */
8086
8087 if (size < copy_size) {
8088 vm_map_copy_clip_end(copy, copy_entry,
8089 copy_entry->vme_start + size);
8090 copy_size = size;
8091 }
8092
8093 assert((entry->vme_end - entry->vme_start) == size);
8094 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8095 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8096
8097 /*
8098 * If the destination contains temporary unshared memory,
8099 * we can perform the copy by throwing it away and
8100 * installing the source data.
8101 */
8102
8103 object = VME_OBJECT(entry);
8104 if ((!entry->is_shared &&
8105 ((object == VM_OBJECT_NULL) ||
8106 (object->internal && !object->true_share))) ||
8107 entry->needs_copy) {
8108 vm_object_t old_object = VME_OBJECT(entry);
8109 vm_object_offset_t old_offset = VME_OFFSET(entry);
8110 vm_object_offset_t offset;
8111
8112 /*
8113 * Ensure that the source and destination aren't
8114 * identical
8115 */
8116 if (old_object == VME_OBJECT(copy_entry) &&
8117 old_offset == VME_OFFSET(copy_entry)) {
8118 vm_map_copy_entry_unlink(copy, copy_entry);
8119 vm_map_copy_entry_dispose(copy, copy_entry);
8120
8121 if (old_object != VM_OBJECT_NULL)
8122 vm_object_deallocate(old_object);
8123
8124 start = tmp_entry->vme_end;
8125 tmp_entry = tmp_entry->vme_next;
8126 continue;
8127 }
8128
8129 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8130 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
8131 if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8132 VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8133 copy_size <= __TRADEOFF1_COPY_SIZE) {
8134 /*
8135 * Virtual vs. Physical copy tradeoff #1.
8136 *
8137 * Copying only a few pages out of a large
8138 * object: do a physical copy instead of
8139 * a virtual copy, to avoid possibly keeping
8140 * the entire large object alive because of
8141 * those few copy-on-write pages.
8142 */
8143 vm_map_copy_overwrite_aligned_src_large++;
8144 goto slow_copy;
8145 }
8146
8147 if ((dst_map->pmap != kernel_pmap) &&
8148 (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8149 (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8150 vm_object_t new_object, new_shadow;
8151
8152 /*
8153 * We're about to map something over a mapping
8154 * established by malloc()...
8155 */
8156 new_object = VME_OBJECT(copy_entry);
8157 if (new_object != VM_OBJECT_NULL) {
8158 vm_object_lock_shared(new_object);
8159 }
8160 while (new_object != VM_OBJECT_NULL &&
8161 !new_object->true_share &&
8162 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8163 new_object->internal) {
8164 new_shadow = new_object->shadow;
8165 if (new_shadow == VM_OBJECT_NULL) {
8166 break;
8167 }
8168 vm_object_lock_shared(new_shadow);
8169 vm_object_unlock(new_object);
8170 new_object = new_shadow;
8171 }
8172 if (new_object != VM_OBJECT_NULL) {
8173 if (!new_object->internal) {
8174 /*
8175 * The new mapping is backed
8176 * by an external object. We
8177 * don't want malloc'ed memory
8178 * to be replaced with such a
8179 * non-anonymous mapping, so
8180 * let's go off the optimized
8181 * path...
8182 */
8183 vm_map_copy_overwrite_aligned_src_not_internal++;
8184 vm_object_unlock(new_object);
8185 goto slow_copy;
8186 }
8187 if (new_object->true_share ||
8188 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8189 /*
8190 * Same if there's a "true_share"
8191 * object in the shadow chain, or
8192 * an object with a non-default
8193 * (SYMMETRIC) copy strategy.
8194 */
8195 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8196 vm_object_unlock(new_object);
8197 goto slow_copy;
8198 }
8199 vm_object_unlock(new_object);
8200 }
8201 /*
8202 * The new mapping is still backed by
8203 * anonymous (internal) memory, so it's
8204 * OK to substitute it for the original
8205 * malloc() mapping.
8206 */
8207 }
8208
8209 if (old_object != VM_OBJECT_NULL) {
8210 if(entry->is_sub_map) {
8211 if(entry->use_pmap) {
8212 #ifndef NO_NESTED_PMAP
8213 pmap_unnest(dst_map->pmap,
8214 (addr64_t)entry->vme_start,
8215 entry->vme_end - entry->vme_start);
8216 #endif /* NO_NESTED_PMAP */
8217 if(dst_map->mapped_in_other_pmaps) {
8218 /* clean up parent */
8219 /* map/maps */
8220 vm_map_submap_pmap_clean(
8221 dst_map, entry->vme_start,
8222 entry->vme_end,
8223 VME_SUBMAP(entry),
8224 VME_OFFSET(entry));
8225 }
8226 } else {
8227 vm_map_submap_pmap_clean(
8228 dst_map, entry->vme_start,
8229 entry->vme_end,
8230 VME_SUBMAP(entry),
8231 VME_OFFSET(entry));
8232 }
8233 vm_map_deallocate(VME_SUBMAP(entry));
8234 } else {
8235 if(dst_map->mapped_in_other_pmaps) {
8236 vm_object_pmap_protect_options(
8237 VME_OBJECT(entry),
8238 VME_OFFSET(entry),
8239 entry->vme_end
8240 - entry->vme_start,
8241 PMAP_NULL,
8242 entry->vme_start,
8243 VM_PROT_NONE,
8244 PMAP_OPTIONS_REMOVE);
8245 } else {
8246 pmap_remove_options(
8247 dst_map->pmap,
8248 (addr64_t)(entry->vme_start),
8249 (addr64_t)(entry->vme_end),
8250 PMAP_OPTIONS_REMOVE);
8251 }
8252 vm_object_deallocate(old_object);
8253 }
8254 }
8255
8256 entry->is_sub_map = FALSE;
8257 VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8258 object = VME_OBJECT(entry);
8259 entry->needs_copy = copy_entry->needs_copy;
8260 entry->wired_count = 0;
8261 entry->user_wired_count = 0;
8262 offset = VME_OFFSET(copy_entry);
8263 VME_OFFSET_SET(entry, offset);
8264
8265 vm_map_copy_entry_unlink(copy, copy_entry);
8266 vm_map_copy_entry_dispose(copy, copy_entry);
8267
8268 /*
8269 * we could try to push pages into the pmap at this point, BUT
8270 * this optimization only saved on average 2 us per page if ALL
8271 * the pages in the source were currently mapped
8272 * and ALL the pages in the dest were touched, if there were fewer
8273 * than 2/3 of the pages touched, this optimization actually cost more cycles
8274 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8275 */
8276
8277 /*
8278 * Set up for the next iteration. The map
8279 * has not been unlocked, so the next
8280 * address should be at the end of this
8281 * entry, and the next map entry should be
8282 * the one following it.
8283 */
8284
8285 start = tmp_entry->vme_end;
8286 tmp_entry = tmp_entry->vme_next;
8287 } else {
8288 vm_map_version_t version;
8289 vm_object_t dst_object;
8290 vm_object_offset_t dst_offset;
8291 kern_return_t r;
8292
8293 slow_copy:
8294 if (entry->needs_copy) {
8295 VME_OBJECT_SHADOW(entry,
8296 (entry->vme_end -
8297 entry->vme_start));
8298 entry->needs_copy = FALSE;
8299 }
8300
8301 dst_object = VME_OBJECT(entry);
8302 dst_offset = VME_OFFSET(entry);
8303
8304 /*
8305 * Take an object reference, and record
8306 * the map version information so that the
8307 * map can be safely unlocked.
8308 */
8309
8310 if (dst_object == VM_OBJECT_NULL) {
8311 /*
8312 * We would usually have just taken the
8313 * optimized path above if the destination
8314 * object has not been allocated yet. But we
8315 * now disable that optimization if the copy
8316 * entry's object is not backed by anonymous
8317 * memory to avoid replacing malloc'ed
8318 * (i.e. re-usable) anonymous memory with a
8319 * not-so-anonymous mapping.
8320 * So we have to handle this case here and
8321 * allocate a new VM object for this map entry.
8322 */
8323 dst_object = vm_object_allocate(
8324 entry->vme_end - entry->vme_start);
8325 dst_offset = 0;
8326 VME_OBJECT_SET(entry, dst_object);
8327 VME_OFFSET_SET(entry, dst_offset);
8328 assert(entry->use_pmap);
8329
8330 }
8331
8332 vm_object_reference(dst_object);
8333
8334 /* account for unlock bumping up timestamp */
8335 version.main_timestamp = dst_map->timestamp + 1;
8336
8337 vm_map_unlock(dst_map);
8338
8339 /*
8340 * Copy as much as possible in one pass
8341 */
8342
8343 copy_size = size;
8344 r = vm_fault_copy(
8345 VME_OBJECT(copy_entry),
8346 VME_OFFSET(copy_entry),
8347 &copy_size,
8348 dst_object,
8349 dst_offset,
8350 dst_map,
8351 &version,
8352 THREAD_UNINT );
8353
8354 /*
8355 * Release the object reference
8356 */
8357
8358 vm_object_deallocate(dst_object);
8359
8360 /*
8361 * If a hard error occurred, return it now
8362 */
8363
8364 if (r != KERN_SUCCESS)
8365 return(r);
8366
8367 if (copy_size != 0) {
8368 /*
8369 * Dispose of the copied region
8370 */
8371
8372 vm_map_copy_clip_end(copy, copy_entry,
8373 copy_entry->vme_start + copy_size);
8374 vm_map_copy_entry_unlink(copy, copy_entry);
8375 vm_object_deallocate(VME_OBJECT(copy_entry));
8376 vm_map_copy_entry_dispose(copy, copy_entry);
8377 }
8378
8379 /*
8380 * Pick up in the destination map where we left off.
8381 *
8382 * Use the version information to avoid a lookup
8383 * in the normal case.
8384 */
8385
8386 start += copy_size;
8387 vm_map_lock(dst_map);
8388 if (version.main_timestamp == dst_map->timestamp &&
8389 copy_size != 0) {
8390 /* We can safely use saved tmp_entry value */
8391
8392 if (tmp_entry->map_aligned &&
8393 !VM_MAP_PAGE_ALIGNED(
8394 start,
8395 VM_MAP_PAGE_MASK(dst_map))) {
8396 /* no longer map-aligned */
8397 tmp_entry->map_aligned = FALSE;
8398 }
8399 vm_map_clip_end(dst_map, tmp_entry, start);
8400 tmp_entry = tmp_entry->vme_next;
8401 } else {
8402 /* Must do lookup of tmp_entry */
8403
8404 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8405 vm_map_unlock(dst_map);
8406 return(KERN_INVALID_ADDRESS);
8407 }
8408 if (tmp_entry->map_aligned &&
8409 !VM_MAP_PAGE_ALIGNED(
8410 start,
8411 VM_MAP_PAGE_MASK(dst_map))) {
8412 /* no longer map-aligned */
8413 tmp_entry->map_aligned = FALSE;
8414 }
8415 vm_map_clip_start(dst_map, tmp_entry, start);
8416 }
8417 }
8418 }/* while */
8419
8420 return(KERN_SUCCESS);
8421 }/* vm_map_copy_overwrite_aligned */
8422
8423 /*
8424 * Routine: vm_map_copyin_kernel_buffer [internal use only]
8425 *
8426 * Description:
8427 * Copy in data to a kernel buffer from space in the
8428 * source map. The original space may be optionally
8429 * deallocated.
8430 *
8431 * If successful, returns a new copy object.
8432 */
8433 static kern_return_t
8434 vm_map_copyin_kernel_buffer(
8435 vm_map_t src_map,
8436 vm_map_offset_t src_addr,
8437 vm_map_size_t len,
8438 boolean_t src_destroy,
8439 vm_map_copy_t *copy_result)
8440 {
8441 kern_return_t kr;
8442 vm_map_copy_t copy;
8443 vm_size_t kalloc_size;
8444
8445 if (len > msg_ool_size_small)
8446 return KERN_INVALID_ARGUMENT;
8447
8448 kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8449
8450 copy = (vm_map_copy_t)kalloc(kalloc_size);
8451 if (copy == VM_MAP_COPY_NULL)
8452 return KERN_RESOURCE_SHORTAGE;
8453 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8454 copy->size = len;
8455 copy->offset = 0;
8456
8457 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8458 if (kr != KERN_SUCCESS) {
8459 kfree(copy, kalloc_size);
8460 return kr;
8461 }
8462 if (src_destroy) {
8463 (void) vm_map_remove(
8464 src_map,
8465 vm_map_trunc_page(src_addr,
8466 VM_MAP_PAGE_MASK(src_map)),
8467 vm_map_round_page(src_addr + len,
8468 VM_MAP_PAGE_MASK(src_map)),
8469 (VM_MAP_REMOVE_INTERRUPTIBLE |
8470 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8471 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
8472 }
8473 *copy_result = copy;
8474 return KERN_SUCCESS;
8475 }
8476
8477 /*
8478 * Routine: vm_map_copyout_kernel_buffer [internal use only]
8479 *
8480 * Description:
8481 * Copy out data from a kernel buffer into space in the
8482 * destination map. The space may be otpionally dynamically
8483 * allocated.
8484 *
8485 * If successful, consumes the copy object.
8486 * Otherwise, the caller is responsible for it.
8487 */
8488 static int vm_map_copyout_kernel_buffer_failures = 0;
8489 static kern_return_t
8490 vm_map_copyout_kernel_buffer(
8491 vm_map_t map,
8492 vm_map_address_t *addr, /* IN/OUT */
8493 vm_map_copy_t copy,
8494 boolean_t overwrite,
8495 boolean_t consume_on_success)
8496 {
8497 kern_return_t kr = KERN_SUCCESS;
8498 thread_t thread = current_thread();
8499
8500 /*
8501 * check for corrupted vm_map_copy structure
8502 */
8503 if (copy->size > msg_ool_size_small || copy->offset)
8504 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8505 (long long)copy->size, (long long)copy->offset);
8506
8507 if (!overwrite) {
8508
8509 /*
8510 * Allocate space in the target map for the data
8511 */
8512 *addr = 0;
8513 kr = vm_map_enter(map,
8514 addr,
8515 vm_map_round_page(copy->size,
8516 VM_MAP_PAGE_MASK(map)),
8517 (vm_map_offset_t) 0,
8518 VM_FLAGS_ANYWHERE,
8519 VM_OBJECT_NULL,
8520 (vm_object_offset_t) 0,
8521 FALSE,
8522 VM_PROT_DEFAULT,
8523 VM_PROT_ALL,
8524 VM_INHERIT_DEFAULT);
8525 if (kr != KERN_SUCCESS)
8526 return kr;
8527 }
8528
8529 /*
8530 * Copyout the data from the kernel buffer to the target map.
8531 */
8532 if (thread->map == map) {
8533
8534 /*
8535 * If the target map is the current map, just do
8536 * the copy.
8537 */
8538 assert((vm_size_t) copy->size == copy->size);
8539 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8540 kr = KERN_INVALID_ADDRESS;
8541 }
8542 }
8543 else {
8544 vm_map_t oldmap;
8545
8546 /*
8547 * If the target map is another map, assume the
8548 * target's address space identity for the duration
8549 * of the copy.
8550 */
8551 vm_map_reference(map);
8552 oldmap = vm_map_switch(map);
8553
8554 assert((vm_size_t) copy->size == copy->size);
8555 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8556 vm_map_copyout_kernel_buffer_failures++;
8557 kr = KERN_INVALID_ADDRESS;
8558 }
8559
8560 (void) vm_map_switch(oldmap);
8561 vm_map_deallocate(map);
8562 }
8563
8564 if (kr != KERN_SUCCESS) {
8565 /* the copy failed, clean up */
8566 if (!overwrite) {
8567 /*
8568 * Deallocate the space we allocated in the target map.
8569 */
8570 (void) vm_map_remove(
8571 map,
8572 vm_map_trunc_page(*addr,
8573 VM_MAP_PAGE_MASK(map)),
8574 vm_map_round_page((*addr +
8575 vm_map_round_page(copy->size,
8576 VM_MAP_PAGE_MASK(map))),
8577 VM_MAP_PAGE_MASK(map)),
8578 VM_MAP_NO_FLAGS);
8579 *addr = 0;
8580 }
8581 } else {
8582 /* copy was successful, dicard the copy structure */
8583 if (consume_on_success) {
8584 kfree(copy, copy->size + cpy_kdata_hdr_sz);
8585 }
8586 }
8587
8588 return kr;
8589 }
8590
8591 /*
8592 * Macro: vm_map_copy_insert
8593 *
8594 * Description:
8595 * Link a copy chain ("copy") into a map at the
8596 * specified location (after "where").
8597 * Side effects:
8598 * The copy chain is destroyed.
8599 * Warning:
8600 * The arguments are evaluated multiple times.
8601 */
8602 #define vm_map_copy_insert(map, where, copy) \
8603 MACRO_BEGIN \
8604 vm_map_store_copy_insert(map, where, copy); \
8605 zfree(vm_map_copy_zone, copy); \
8606 MACRO_END
8607
8608 void
8609 vm_map_copy_remap(
8610 vm_map_t map,
8611 vm_map_entry_t where,
8612 vm_map_copy_t copy,
8613 vm_map_offset_t adjustment,
8614 vm_prot_t cur_prot,
8615 vm_prot_t max_prot,
8616 vm_inherit_t inheritance)
8617 {
8618 vm_map_entry_t copy_entry, new_entry;
8619
8620 for (copy_entry = vm_map_copy_first_entry(copy);
8621 copy_entry != vm_map_copy_to_entry(copy);
8622 copy_entry = copy_entry->vme_next) {
8623 /* get a new VM map entry for the map */
8624 new_entry = vm_map_entry_create(map,
8625 !map->hdr.entries_pageable);
8626 /* copy the "copy entry" to the new entry */
8627 vm_map_entry_copy(new_entry, copy_entry);
8628 /* adjust "start" and "end" */
8629 new_entry->vme_start += adjustment;
8630 new_entry->vme_end += adjustment;
8631 /* clear some attributes */
8632 new_entry->inheritance = inheritance;
8633 new_entry->protection = cur_prot;
8634 new_entry->max_protection = max_prot;
8635 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8636 /* take an extra reference on the entry's "object" */
8637 if (new_entry->is_sub_map) {
8638 assert(!new_entry->use_pmap); /* not nested */
8639 vm_map_lock(VME_SUBMAP(new_entry));
8640 vm_map_reference(VME_SUBMAP(new_entry));
8641 vm_map_unlock(VME_SUBMAP(new_entry));
8642 } else {
8643 vm_object_reference(VME_OBJECT(new_entry));
8644 }
8645 /* insert the new entry in the map */
8646 vm_map_store_entry_link(map, where, new_entry);
8647 /* continue inserting the "copy entries" after the new entry */
8648 where = new_entry;
8649 }
8650 }
8651
8652
8653 boolean_t
8654 vm_map_copy_validate_size(
8655 vm_map_t dst_map,
8656 vm_map_copy_t copy,
8657 vm_map_size_t size)
8658 {
8659 if (copy == VM_MAP_COPY_NULL)
8660 return FALSE;
8661 switch (copy->type) {
8662 case VM_MAP_COPY_OBJECT:
8663 case VM_MAP_COPY_KERNEL_BUFFER:
8664 if (size == copy->size)
8665 return TRUE;
8666 break;
8667 case VM_MAP_COPY_ENTRY_LIST:
8668 /*
8669 * potential page-size rounding prevents us from exactly
8670 * validating this flavor of vm_map_copy, but we can at least
8671 * assert that it's within a range.
8672 */
8673 if (copy->size >= size &&
8674 copy->size <= vm_map_round_page(size,
8675 VM_MAP_PAGE_MASK(dst_map)))
8676 return TRUE;
8677 break;
8678 default:
8679 break;
8680 }
8681 return FALSE;
8682 }
8683
8684
8685 /*
8686 * Routine: vm_map_copyout
8687 *
8688 * Description:
8689 * Copy out a copy chain ("copy") into newly-allocated
8690 * space in the destination map.
8691 *
8692 * If successful, consumes the copy object.
8693 * Otherwise, the caller is responsible for it.
8694 */
8695
8696 kern_return_t
8697 vm_map_copyout(
8698 vm_map_t dst_map,
8699 vm_map_address_t *dst_addr, /* OUT */
8700 vm_map_copy_t copy)
8701 {
8702 return vm_map_copyout_internal(dst_map, dst_addr, copy,
8703 TRUE, /* consume_on_success */
8704 VM_PROT_DEFAULT,
8705 VM_PROT_ALL,
8706 VM_INHERIT_DEFAULT);
8707 }
8708
8709 kern_return_t
8710 vm_map_copyout_internal(
8711 vm_map_t dst_map,
8712 vm_map_address_t *dst_addr, /* OUT */
8713 vm_map_copy_t copy,
8714 boolean_t consume_on_success,
8715 vm_prot_t cur_protection,
8716 vm_prot_t max_protection,
8717 vm_inherit_t inheritance)
8718 {
8719 vm_map_size_t size;
8720 vm_map_size_t adjustment;
8721 vm_map_offset_t start;
8722 vm_object_offset_t vm_copy_start;
8723 vm_map_entry_t last;
8724 vm_map_entry_t entry;
8725 vm_map_entry_t hole_entry;
8726
8727 /*
8728 * Check for null copy object.
8729 */
8730
8731 if (copy == VM_MAP_COPY_NULL) {
8732 *dst_addr = 0;
8733 return(KERN_SUCCESS);
8734 }
8735
8736 /*
8737 * Check for special copy object, created
8738 * by vm_map_copyin_object.
8739 */
8740
8741 if (copy->type == VM_MAP_COPY_OBJECT) {
8742 vm_object_t object = copy->cpy_object;
8743 kern_return_t kr;
8744 vm_object_offset_t offset;
8745
8746 offset = vm_object_trunc_page(copy->offset);
8747 size = vm_map_round_page((copy->size +
8748 (vm_map_size_t)(copy->offset -
8749 offset)),
8750 VM_MAP_PAGE_MASK(dst_map));
8751 *dst_addr = 0;
8752 kr = vm_map_enter(dst_map, dst_addr, size,
8753 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8754 object, offset, FALSE,
8755 VM_PROT_DEFAULT, VM_PROT_ALL,
8756 VM_INHERIT_DEFAULT);
8757 if (kr != KERN_SUCCESS)
8758 return(kr);
8759 /* Account for non-pagealigned copy object */
8760 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8761 if (consume_on_success)
8762 zfree(vm_map_copy_zone, copy);
8763 return(KERN_SUCCESS);
8764 }
8765
8766 /*
8767 * Check for special kernel buffer allocated
8768 * by new_ipc_kmsg_copyin.
8769 */
8770
8771 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8772 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8773 copy, FALSE,
8774 consume_on_success);
8775 }
8776
8777
8778 /*
8779 * Find space for the data
8780 */
8781
8782 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8783 VM_MAP_COPY_PAGE_MASK(copy));
8784 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8785 VM_MAP_COPY_PAGE_MASK(copy))
8786 - vm_copy_start;
8787
8788
8789 StartAgain: ;
8790
8791 vm_map_lock(dst_map);
8792 if( dst_map->disable_vmentry_reuse == TRUE) {
8793 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8794 last = entry;
8795 } else {
8796 if (dst_map->holelistenabled) {
8797 hole_entry = (vm_map_entry_t)dst_map->holes_list;
8798
8799 if (hole_entry == NULL) {
8800 /*
8801 * No more space in the map?
8802 */
8803 vm_map_unlock(dst_map);
8804 return(KERN_NO_SPACE);
8805 }
8806
8807 last = hole_entry;
8808 start = last->vme_start;
8809 } else {
8810 assert(first_free_is_valid(dst_map));
8811 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8812 vm_map_min(dst_map) : last->vme_end;
8813 }
8814 start = vm_map_round_page(start,
8815 VM_MAP_PAGE_MASK(dst_map));
8816 }
8817
8818 while (TRUE) {
8819 vm_map_entry_t next = last->vme_next;
8820 vm_map_offset_t end = start + size;
8821
8822 if ((end > dst_map->max_offset) || (end < start)) {
8823 if (dst_map->wait_for_space) {
8824 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8825 assert_wait((event_t) dst_map,
8826 THREAD_INTERRUPTIBLE);
8827 vm_map_unlock(dst_map);
8828 thread_block(THREAD_CONTINUE_NULL);
8829 goto StartAgain;
8830 }
8831 }
8832 vm_map_unlock(dst_map);
8833 return(KERN_NO_SPACE);
8834 }
8835
8836 if (dst_map->holelistenabled) {
8837 if (last->vme_end >= end)
8838 break;
8839 } else {
8840 /*
8841 * If there are no more entries, we must win.
8842 *
8843 * OR
8844 *
8845 * If there is another entry, it must be
8846 * after the end of the potential new region.
8847 */
8848
8849 if (next == vm_map_to_entry(dst_map))
8850 break;
8851
8852 if (next->vme_start >= end)
8853 break;
8854 }
8855
8856 last = next;
8857
8858 if (dst_map->holelistenabled) {
8859 if (last == (vm_map_entry_t) dst_map->holes_list) {
8860 /*
8861 * Wrapped around
8862 */
8863 vm_map_unlock(dst_map);
8864 return(KERN_NO_SPACE);
8865 }
8866 start = last->vme_start;
8867 } else {
8868 start = last->vme_end;
8869 }
8870 start = vm_map_round_page(start,
8871 VM_MAP_PAGE_MASK(dst_map));
8872 }
8873
8874 if (dst_map->holelistenabled) {
8875 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
8876 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
8877 }
8878 }
8879
8880
8881 adjustment = start - vm_copy_start;
8882 if (! consume_on_success) {
8883 /*
8884 * We're not allowed to consume "copy", so we'll have to
8885 * copy its map entries into the destination map below.
8886 * No need to re-allocate map entries from the correct
8887 * (pageable or not) zone, since we'll get new map entries
8888 * during the transfer.
8889 * We'll also adjust the map entries's "start" and "end"
8890 * during the transfer, to keep "copy"'s entries consistent
8891 * with its "offset".
8892 */
8893 goto after_adjustments;
8894 }
8895
8896 /*
8897 * Since we're going to just drop the map
8898 * entries from the copy into the destination
8899 * map, they must come from the same pool.
8900 */
8901
8902 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8903 /*
8904 * Mismatches occur when dealing with the default
8905 * pager.
8906 */
8907 zone_t old_zone;
8908 vm_map_entry_t next, new;
8909
8910 /*
8911 * Find the zone that the copies were allocated from
8912 */
8913
8914 entry = vm_map_copy_first_entry(copy);
8915
8916 /*
8917 * Reinitialize the copy so that vm_map_copy_entry_link
8918 * will work.
8919 */
8920 vm_map_store_copy_reset(copy, entry);
8921 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8922
8923 /*
8924 * Copy each entry.
8925 */
8926 while (entry != vm_map_copy_to_entry(copy)) {
8927 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8928 vm_map_entry_copy_full(new, entry);
8929 assert(!new->iokit_acct);
8930 if (new->is_sub_map) {
8931 /* clr address space specifics */
8932 new->use_pmap = FALSE;
8933 }
8934 vm_map_copy_entry_link(copy,
8935 vm_map_copy_last_entry(copy),
8936 new);
8937 next = entry->vme_next;
8938 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8939 zfree(old_zone, entry);
8940 entry = next;
8941 }
8942 }
8943
8944 /*
8945 * Adjust the addresses in the copy chain, and
8946 * reset the region attributes.
8947 */
8948
8949 for (entry = vm_map_copy_first_entry(copy);
8950 entry != vm_map_copy_to_entry(copy);
8951 entry = entry->vme_next) {
8952 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8953 /*
8954 * We're injecting this copy entry into a map that
8955 * has the standard page alignment, so clear
8956 * "map_aligned" (which might have been inherited
8957 * from the original map entry).
8958 */
8959 entry->map_aligned = FALSE;
8960 }
8961
8962 entry->vme_start += adjustment;
8963 entry->vme_end += adjustment;
8964
8965 if (entry->map_aligned) {
8966 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8967 VM_MAP_PAGE_MASK(dst_map)));
8968 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8969 VM_MAP_PAGE_MASK(dst_map)));
8970 }
8971
8972 entry->inheritance = VM_INHERIT_DEFAULT;
8973 entry->protection = VM_PROT_DEFAULT;
8974 entry->max_protection = VM_PROT_ALL;
8975 entry->behavior = VM_BEHAVIOR_DEFAULT;
8976
8977 /*
8978 * If the entry is now wired,
8979 * map the pages into the destination map.
8980 */
8981 if (entry->wired_count != 0) {
8982 register vm_map_offset_t va;
8983 vm_object_offset_t offset;
8984 register vm_object_t object;
8985 vm_prot_t prot;
8986 int type_of_fault;
8987
8988 object = VME_OBJECT(entry);
8989 offset = VME_OFFSET(entry);
8990 va = entry->vme_start;
8991
8992 pmap_pageable(dst_map->pmap,
8993 entry->vme_start,
8994 entry->vme_end,
8995 TRUE);
8996
8997 while (va < entry->vme_end) {
8998 register vm_page_t m;
8999
9000 /*
9001 * Look up the page in the object.
9002 * Assert that the page will be found in the
9003 * top object:
9004 * either
9005 * the object was newly created by
9006 * vm_object_copy_slowly, and has
9007 * copies of all of the pages from
9008 * the source object
9009 * or
9010 * the object was moved from the old
9011 * map entry; because the old map
9012 * entry was wired, all of the pages
9013 * were in the top-level object.
9014 * (XXX not true if we wire pages for
9015 * reading)
9016 */
9017 vm_object_lock(object);
9018
9019 m = vm_page_lookup(object, offset);
9020 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9021 m->absent)
9022 panic("vm_map_copyout: wiring %p", m);
9023
9024 /*
9025 * ENCRYPTED SWAP:
9026 * The page is assumed to be wired here, so it
9027 * shouldn't be encrypted. Otherwise, we
9028 * couldn't enter it in the page table, since
9029 * we don't want the user to see the encrypted
9030 * data.
9031 */
9032 ASSERT_PAGE_DECRYPTED(m);
9033
9034 prot = entry->protection;
9035
9036 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9037 prot)
9038 prot |= VM_PROT_EXECUTE;
9039
9040 type_of_fault = DBG_CACHE_HIT_FAULT;
9041
9042 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9043 VM_PAGE_WIRED(m), FALSE, FALSE,
9044 FALSE, VME_ALIAS(entry),
9045 ((entry->iokit_acct ||
9046 (!entry->is_sub_map &&
9047 !entry->use_pmap))
9048 ? PMAP_OPTIONS_ALT_ACCT
9049 : 0),
9050 NULL, &type_of_fault);
9051
9052 vm_object_unlock(object);
9053
9054 offset += PAGE_SIZE_64;
9055 va += PAGE_SIZE;
9056 }
9057 }
9058 }
9059
9060 after_adjustments:
9061
9062 /*
9063 * Correct the page alignment for the result
9064 */
9065
9066 *dst_addr = start + (copy->offset - vm_copy_start);
9067
9068 /*
9069 * Update the hints and the map size
9070 */
9071
9072 if (consume_on_success) {
9073 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9074 } else {
9075 SAVE_HINT_MAP_WRITE(dst_map, last);
9076 }
9077
9078 dst_map->size += size;
9079
9080 /*
9081 * Link in the copy
9082 */
9083
9084 if (consume_on_success) {
9085 vm_map_copy_insert(dst_map, last, copy);
9086 } else {
9087 vm_map_copy_remap(dst_map, last, copy, adjustment,
9088 cur_protection, max_protection,
9089 inheritance);
9090 }
9091
9092 vm_map_unlock(dst_map);
9093
9094 /*
9095 * XXX If wiring_required, call vm_map_pageable
9096 */
9097
9098 return(KERN_SUCCESS);
9099 }
9100
9101 /*
9102 * Routine: vm_map_copyin
9103 *
9104 * Description:
9105 * see vm_map_copyin_common. Exported via Unsupported.exports.
9106 *
9107 */
9108
9109 #undef vm_map_copyin
9110
9111 kern_return_t
9112 vm_map_copyin(
9113 vm_map_t src_map,
9114 vm_map_address_t src_addr,
9115 vm_map_size_t len,
9116 boolean_t src_destroy,
9117 vm_map_copy_t *copy_result) /* OUT */
9118 {
9119 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9120 FALSE, copy_result, FALSE));
9121 }
9122
9123 /*
9124 * Routine: vm_map_copyin_common
9125 *
9126 * Description:
9127 * Copy the specified region (src_addr, len) from the
9128 * source address space (src_map), possibly removing
9129 * the region from the source address space (src_destroy).
9130 *
9131 * Returns:
9132 * A vm_map_copy_t object (copy_result), suitable for
9133 * insertion into another address space (using vm_map_copyout),
9134 * copying over another address space region (using
9135 * vm_map_copy_overwrite). If the copy is unused, it
9136 * should be destroyed (using vm_map_copy_discard).
9137 *
9138 * In/out conditions:
9139 * The source map should not be locked on entry.
9140 */
9141
9142 typedef struct submap_map {
9143 vm_map_t parent_map;
9144 vm_map_offset_t base_start;
9145 vm_map_offset_t base_end;
9146 vm_map_size_t base_len;
9147 struct submap_map *next;
9148 } submap_map_t;
9149
9150 kern_return_t
9151 vm_map_copyin_common(
9152 vm_map_t src_map,
9153 vm_map_address_t src_addr,
9154 vm_map_size_t len,
9155 boolean_t src_destroy,
9156 __unused boolean_t src_volatile,
9157 vm_map_copy_t *copy_result, /* OUT */
9158 boolean_t use_maxprot)
9159 {
9160 int flags;
9161
9162 flags = 0;
9163 if (src_destroy) {
9164 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9165 }
9166 if (use_maxprot) {
9167 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9168 }
9169 return vm_map_copyin_internal(src_map,
9170 src_addr,
9171 len,
9172 flags,
9173 copy_result);
9174 }
9175 kern_return_t
9176 vm_map_copyin_internal(
9177 vm_map_t src_map,
9178 vm_map_address_t src_addr,
9179 vm_map_size_t len,
9180 int flags,
9181 vm_map_copy_t *copy_result) /* OUT */
9182 {
9183 vm_map_entry_t tmp_entry; /* Result of last map lookup --
9184 * in multi-level lookup, this
9185 * entry contains the actual
9186 * vm_object/offset.
9187 */
9188 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
9189
9190 vm_map_offset_t src_start; /* Start of current entry --
9191 * where copy is taking place now
9192 */
9193 vm_map_offset_t src_end; /* End of entire region to be
9194 * copied */
9195 vm_map_offset_t src_base;
9196 vm_map_t base_map = src_map;
9197 boolean_t map_share=FALSE;
9198 submap_map_t *parent_maps = NULL;
9199
9200 vm_map_copy_t copy; /* Resulting copy */
9201 vm_map_address_t copy_addr;
9202 vm_map_size_t copy_size;
9203 boolean_t src_destroy;
9204 boolean_t use_maxprot;
9205
9206 if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9207 return KERN_INVALID_ARGUMENT;
9208 }
9209
9210 src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9211 use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9212
9213 /*
9214 * Check for copies of zero bytes.
9215 */
9216
9217 if (len == 0) {
9218 *copy_result = VM_MAP_COPY_NULL;
9219 return(KERN_SUCCESS);
9220 }
9221
9222 /*
9223 * Check that the end address doesn't overflow
9224 */
9225 src_end = src_addr + len;
9226 if (src_end < src_addr)
9227 return KERN_INVALID_ADDRESS;
9228
9229 /*
9230 * If the copy is sufficiently small, use a kernel buffer instead
9231 * of making a virtual copy. The theory being that the cost of
9232 * setting up VM (and taking C-O-W faults) dominates the copy costs
9233 * for small regions.
9234 */
9235 if ((len < msg_ool_size_small) &&
9236 !use_maxprot &&
9237 !(flags & VM_MAP_COPYIN_ENTRY_LIST))
9238 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9239 src_destroy, copy_result);
9240
9241 /*
9242 * Compute (page aligned) start and end of region
9243 */
9244 src_start = vm_map_trunc_page(src_addr,
9245 VM_MAP_PAGE_MASK(src_map));
9246 src_end = vm_map_round_page(src_end,
9247 VM_MAP_PAGE_MASK(src_map));
9248
9249 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9250
9251 /*
9252 * Allocate a header element for the list.
9253 *
9254 * Use the start and end in the header to
9255 * remember the endpoints prior to rounding.
9256 */
9257
9258 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9259 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9260 vm_map_copy_first_entry(copy) =
9261 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9262 copy->type = VM_MAP_COPY_ENTRY_LIST;
9263 copy->cpy_hdr.nentries = 0;
9264 copy->cpy_hdr.entries_pageable = TRUE;
9265 #if 00
9266 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9267 #else
9268 /*
9269 * The copy entries can be broken down for a variety of reasons,
9270 * so we can't guarantee that they will remain map-aligned...
9271 * Will need to adjust the first copy_entry's "vme_start" and
9272 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9273 * rather than the original map's alignment.
9274 */
9275 copy->cpy_hdr.page_shift = PAGE_SHIFT;
9276 #endif
9277
9278 vm_map_store_init( &(copy->cpy_hdr) );
9279
9280 copy->offset = src_addr;
9281 copy->size = len;
9282
9283 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9284
9285 #define RETURN(x) \
9286 MACRO_BEGIN \
9287 vm_map_unlock(src_map); \
9288 if(src_map != base_map) \
9289 vm_map_deallocate(src_map); \
9290 if (new_entry != VM_MAP_ENTRY_NULL) \
9291 vm_map_copy_entry_dispose(copy,new_entry); \
9292 vm_map_copy_discard(copy); \
9293 { \
9294 submap_map_t *_ptr; \
9295 \
9296 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9297 parent_maps=parent_maps->next; \
9298 if (_ptr->parent_map != base_map) \
9299 vm_map_deallocate(_ptr->parent_map); \
9300 kfree(_ptr, sizeof(submap_map_t)); \
9301 } \
9302 } \
9303 MACRO_RETURN(x); \
9304 MACRO_END
9305
9306 /*
9307 * Find the beginning of the region.
9308 */
9309
9310 vm_map_lock(src_map);
9311
9312 /*
9313 * Lookup the original "src_addr" rather than the truncated
9314 * "src_start", in case "src_start" falls in a non-map-aligned
9315 * map entry *before* the map entry that contains "src_addr"...
9316 */
9317 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9318 RETURN(KERN_INVALID_ADDRESS);
9319 if(!tmp_entry->is_sub_map) {
9320 /*
9321 * ... but clip to the map-rounded "src_start" rather than
9322 * "src_addr" to preserve map-alignment. We'll adjust the
9323 * first copy entry at the end, if needed.
9324 */
9325 vm_map_clip_start(src_map, tmp_entry, src_start);
9326 }
9327 if (src_start < tmp_entry->vme_start) {
9328 /*
9329 * Move "src_start" up to the start of the
9330 * first map entry to copy.
9331 */
9332 src_start = tmp_entry->vme_start;
9333 }
9334 /* set for later submap fix-up */
9335 copy_addr = src_start;
9336
9337 /*
9338 * Go through entries until we get to the end.
9339 */
9340
9341 while (TRUE) {
9342 register
9343 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
9344 vm_map_size_t src_size; /* Size of source
9345 * map entry (in both
9346 * maps)
9347 */
9348
9349 register
9350 vm_object_t src_object; /* Object to copy */
9351 vm_object_offset_t src_offset;
9352
9353 boolean_t src_needs_copy; /* Should source map
9354 * be made read-only
9355 * for copy-on-write?
9356 */
9357
9358 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
9359
9360 boolean_t was_wired; /* Was source wired? */
9361 vm_map_version_t version; /* Version before locks
9362 * dropped to make copy
9363 */
9364 kern_return_t result; /* Return value from
9365 * copy_strategically.
9366 */
9367 while(tmp_entry->is_sub_map) {
9368 vm_map_size_t submap_len;
9369 submap_map_t *ptr;
9370
9371 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9372 ptr->next = parent_maps;
9373 parent_maps = ptr;
9374 ptr->parent_map = src_map;
9375 ptr->base_start = src_start;
9376 ptr->base_end = src_end;
9377 submap_len = tmp_entry->vme_end - src_start;
9378 if(submap_len > (src_end-src_start))
9379 submap_len = src_end-src_start;
9380 ptr->base_len = submap_len;
9381
9382 src_start -= tmp_entry->vme_start;
9383 src_start += VME_OFFSET(tmp_entry);
9384 src_end = src_start + submap_len;
9385 src_map = VME_SUBMAP(tmp_entry);
9386 vm_map_lock(src_map);
9387 /* keep an outstanding reference for all maps in */
9388 /* the parents tree except the base map */
9389 vm_map_reference(src_map);
9390 vm_map_unlock(ptr->parent_map);
9391 if (!vm_map_lookup_entry(
9392 src_map, src_start, &tmp_entry))
9393 RETURN(KERN_INVALID_ADDRESS);
9394 map_share = TRUE;
9395 if(!tmp_entry->is_sub_map)
9396 vm_map_clip_start(src_map, tmp_entry, src_start);
9397 src_entry = tmp_entry;
9398 }
9399 /* we are now in the lowest level submap... */
9400
9401 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9402 (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9403 /* This is not, supported for now.In future */
9404 /* we will need to detect the phys_contig */
9405 /* condition and then upgrade copy_slowly */
9406 /* to do physical copy from the device mem */
9407 /* based object. We can piggy-back off of */
9408 /* the was wired boolean to set-up the */
9409 /* proper handling */
9410 RETURN(KERN_PROTECTION_FAILURE);
9411 }
9412 /*
9413 * Create a new address map entry to hold the result.
9414 * Fill in the fields from the appropriate source entries.
9415 * We must unlock the source map to do this if we need
9416 * to allocate a map entry.
9417 */
9418 if (new_entry == VM_MAP_ENTRY_NULL) {
9419 version.main_timestamp = src_map->timestamp;
9420 vm_map_unlock(src_map);
9421
9422 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9423
9424 vm_map_lock(src_map);
9425 if ((version.main_timestamp + 1) != src_map->timestamp) {
9426 if (!vm_map_lookup_entry(src_map, src_start,
9427 &tmp_entry)) {
9428 RETURN(KERN_INVALID_ADDRESS);
9429 }
9430 if (!tmp_entry->is_sub_map)
9431 vm_map_clip_start(src_map, tmp_entry, src_start);
9432 continue; /* restart w/ new tmp_entry */
9433 }
9434 }
9435
9436 /*
9437 * Verify that the region can be read.
9438 */
9439 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9440 !use_maxprot) ||
9441 (src_entry->max_protection & VM_PROT_READ) == 0)
9442 RETURN(KERN_PROTECTION_FAILURE);
9443
9444 /*
9445 * Clip against the endpoints of the entire region.
9446 */
9447
9448 vm_map_clip_end(src_map, src_entry, src_end);
9449
9450 src_size = src_entry->vme_end - src_start;
9451 src_object = VME_OBJECT(src_entry);
9452 src_offset = VME_OFFSET(src_entry);
9453 was_wired = (src_entry->wired_count != 0);
9454
9455 vm_map_entry_copy(new_entry, src_entry);
9456 if (new_entry->is_sub_map) {
9457 /* clr address space specifics */
9458 new_entry->use_pmap = FALSE;
9459 }
9460
9461 /*
9462 * Attempt non-blocking copy-on-write optimizations.
9463 */
9464
9465 if (src_destroy &&
9466 (src_object == VM_OBJECT_NULL ||
9467 (src_object->internal && !src_object->true_share
9468 && !map_share))) {
9469 /*
9470 * If we are destroying the source, and the object
9471 * is internal, we can move the object reference
9472 * from the source to the copy. The copy is
9473 * copy-on-write only if the source is.
9474 * We make another reference to the object, because
9475 * destroying the source entry will deallocate it.
9476 */
9477 vm_object_reference(src_object);
9478
9479 /*
9480 * Copy is always unwired. vm_map_copy_entry
9481 * set its wired count to zero.
9482 */
9483
9484 goto CopySuccessful;
9485 }
9486
9487
9488 RestartCopy:
9489 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9490 src_object, new_entry, VME_OBJECT(new_entry),
9491 was_wired, 0);
9492 if ((src_object == VM_OBJECT_NULL ||
9493 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9494 vm_object_copy_quickly(
9495 &VME_OBJECT(new_entry),
9496 src_offset,
9497 src_size,
9498 &src_needs_copy,
9499 &new_entry_needs_copy)) {
9500
9501 new_entry->needs_copy = new_entry_needs_copy;
9502
9503 /*
9504 * Handle copy-on-write obligations
9505 */
9506
9507 if (src_needs_copy && !tmp_entry->needs_copy) {
9508 vm_prot_t prot;
9509
9510 prot = src_entry->protection & ~VM_PROT_WRITE;
9511
9512 if (override_nx(src_map, VME_ALIAS(src_entry))
9513 && prot)
9514 prot |= VM_PROT_EXECUTE;
9515
9516 vm_object_pmap_protect(
9517 src_object,
9518 src_offset,
9519 src_size,
9520 (src_entry->is_shared ?
9521 PMAP_NULL
9522 : src_map->pmap),
9523 src_entry->vme_start,
9524 prot);
9525
9526 assert(tmp_entry->wired_count == 0);
9527 tmp_entry->needs_copy = TRUE;
9528 }
9529
9530 /*
9531 * The map has never been unlocked, so it's safe
9532 * to move to the next entry rather than doing
9533 * another lookup.
9534 */
9535
9536 goto CopySuccessful;
9537 }
9538
9539 /*
9540 * Take an object reference, so that we may
9541 * release the map lock(s).
9542 */
9543
9544 assert(src_object != VM_OBJECT_NULL);
9545 vm_object_reference(src_object);
9546
9547 /*
9548 * Record the timestamp for later verification.
9549 * Unlock the map.
9550 */
9551
9552 version.main_timestamp = src_map->timestamp;
9553 vm_map_unlock(src_map); /* Increments timestamp once! */
9554
9555 /*
9556 * Perform the copy
9557 */
9558
9559 if (was_wired) {
9560 CopySlowly:
9561 vm_object_lock(src_object);
9562 result = vm_object_copy_slowly(
9563 src_object,
9564 src_offset,
9565 src_size,
9566 THREAD_UNINT,
9567 &VME_OBJECT(new_entry));
9568 VME_OFFSET_SET(new_entry, 0);
9569 new_entry->needs_copy = FALSE;
9570
9571 }
9572 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9573 (tmp_entry->is_shared || map_share)) {
9574 vm_object_t new_object;
9575
9576 vm_object_lock_shared(src_object);
9577 new_object = vm_object_copy_delayed(
9578 src_object,
9579 src_offset,
9580 src_size,
9581 TRUE);
9582 if (new_object == VM_OBJECT_NULL)
9583 goto CopySlowly;
9584
9585 VME_OBJECT_SET(new_entry, new_object);
9586 assert(new_entry->wired_count == 0);
9587 new_entry->needs_copy = TRUE;
9588 assert(!new_entry->iokit_acct);
9589 assert(new_object->purgable == VM_PURGABLE_DENY);
9590 new_entry->use_pmap = TRUE;
9591 result = KERN_SUCCESS;
9592
9593 } else {
9594 vm_object_offset_t new_offset;
9595 new_offset = VME_OFFSET(new_entry);
9596 result = vm_object_copy_strategically(src_object,
9597 src_offset,
9598 src_size,
9599 &VME_OBJECT(new_entry),
9600 &new_offset,
9601 &new_entry_needs_copy);
9602 if (new_offset != VME_OFFSET(new_entry)) {
9603 VME_OFFSET_SET(new_entry, new_offset);
9604 }
9605
9606 new_entry->needs_copy = new_entry_needs_copy;
9607 }
9608
9609 if (result != KERN_SUCCESS &&
9610 result != KERN_MEMORY_RESTART_COPY) {
9611 vm_map_lock(src_map);
9612 RETURN(result);
9613 }
9614
9615 /*
9616 * Throw away the extra reference
9617 */
9618
9619 vm_object_deallocate(src_object);
9620
9621 /*
9622 * Verify that the map has not substantially
9623 * changed while the copy was being made.
9624 */
9625
9626 vm_map_lock(src_map);
9627
9628 if ((version.main_timestamp + 1) == src_map->timestamp)
9629 goto VerificationSuccessful;
9630
9631 /*
9632 * Simple version comparison failed.
9633 *
9634 * Retry the lookup and verify that the
9635 * same object/offset are still present.
9636 *
9637 * [Note: a memory manager that colludes with
9638 * the calling task can detect that we have
9639 * cheated. While the map was unlocked, the
9640 * mapping could have been changed and restored.]
9641 */
9642
9643 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9644 if (result != KERN_MEMORY_RESTART_COPY) {
9645 vm_object_deallocate(VME_OBJECT(new_entry));
9646 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9647 assert(!new_entry->iokit_acct);
9648 new_entry->use_pmap = TRUE;
9649 }
9650 RETURN(KERN_INVALID_ADDRESS);
9651 }
9652
9653 src_entry = tmp_entry;
9654 vm_map_clip_start(src_map, src_entry, src_start);
9655
9656 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9657 !use_maxprot) ||
9658 ((src_entry->max_protection & VM_PROT_READ) == 0))
9659 goto VerificationFailed;
9660
9661 if (src_entry->vme_end < new_entry->vme_end) {
9662 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9663 VM_MAP_COPY_PAGE_MASK(copy)));
9664 new_entry->vme_end = src_entry->vme_end;
9665 src_size = new_entry->vme_end - src_start;
9666 }
9667
9668 if ((VME_OBJECT(src_entry) != src_object) ||
9669 (VME_OFFSET(src_entry) != src_offset) ) {
9670
9671 /*
9672 * Verification failed.
9673 *
9674 * Start over with this top-level entry.
9675 */
9676
9677 VerificationFailed: ;
9678
9679 vm_object_deallocate(VME_OBJECT(new_entry));
9680 tmp_entry = src_entry;
9681 continue;
9682 }
9683
9684 /*
9685 * Verification succeeded.
9686 */
9687
9688 VerificationSuccessful: ;
9689
9690 if (result == KERN_MEMORY_RESTART_COPY)
9691 goto RestartCopy;
9692
9693 /*
9694 * Copy succeeded.
9695 */
9696
9697 CopySuccessful: ;
9698
9699 /*
9700 * Link in the new copy entry.
9701 */
9702
9703 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9704 new_entry);
9705
9706 /*
9707 * Determine whether the entire region
9708 * has been copied.
9709 */
9710 src_base = src_start;
9711 src_start = new_entry->vme_end;
9712 new_entry = VM_MAP_ENTRY_NULL;
9713 while ((src_start >= src_end) && (src_end != 0)) {
9714 submap_map_t *ptr;
9715
9716 if (src_map == base_map) {
9717 /* back to the top */
9718 break;
9719 }
9720
9721 ptr = parent_maps;
9722 assert(ptr != NULL);
9723 parent_maps = parent_maps->next;
9724
9725 /* fix up the damage we did in that submap */
9726 vm_map_simplify_range(src_map,
9727 src_base,
9728 src_end);
9729
9730 vm_map_unlock(src_map);
9731 vm_map_deallocate(src_map);
9732 vm_map_lock(ptr->parent_map);
9733 src_map = ptr->parent_map;
9734 src_base = ptr->base_start;
9735 src_start = ptr->base_start + ptr->base_len;
9736 src_end = ptr->base_end;
9737 if (!vm_map_lookup_entry(src_map,
9738 src_start,
9739 &tmp_entry) &&
9740 (src_end > src_start)) {
9741 RETURN(KERN_INVALID_ADDRESS);
9742 }
9743 kfree(ptr, sizeof(submap_map_t));
9744 if (parent_maps == NULL)
9745 map_share = FALSE;
9746 src_entry = tmp_entry->vme_prev;
9747 }
9748
9749 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9750 (src_start >= src_addr + len) &&
9751 (src_addr + len != 0)) {
9752 /*
9753 * Stop copying now, even though we haven't reached
9754 * "src_end". We'll adjust the end of the last copy
9755 * entry at the end, if needed.
9756 *
9757 * If src_map's aligment is different from the
9758 * system's page-alignment, there could be
9759 * extra non-map-aligned map entries between
9760 * the original (non-rounded) "src_addr + len"
9761 * and the rounded "src_end".
9762 * We do not want to copy those map entries since
9763 * they're not part of the copied range.
9764 */
9765 break;
9766 }
9767
9768 if ((src_start >= src_end) && (src_end != 0))
9769 break;
9770
9771 /*
9772 * Verify that there are no gaps in the region
9773 */
9774
9775 tmp_entry = src_entry->vme_next;
9776 if ((tmp_entry->vme_start != src_start) ||
9777 (tmp_entry == vm_map_to_entry(src_map))) {
9778 RETURN(KERN_INVALID_ADDRESS);
9779 }
9780 }
9781
9782 /*
9783 * If the source should be destroyed, do it now, since the
9784 * copy was successful.
9785 */
9786 if (src_destroy) {
9787 (void) vm_map_delete(
9788 src_map,
9789 vm_map_trunc_page(src_addr,
9790 VM_MAP_PAGE_MASK(src_map)),
9791 src_end,
9792 ((src_map == kernel_map) ?
9793 VM_MAP_REMOVE_KUNWIRE :
9794 VM_MAP_NO_FLAGS),
9795 VM_MAP_NULL);
9796 } else {
9797 /* fix up the damage we did in the base map */
9798 vm_map_simplify_range(
9799 src_map,
9800 vm_map_trunc_page(src_addr,
9801 VM_MAP_PAGE_MASK(src_map)),
9802 vm_map_round_page(src_end,
9803 VM_MAP_PAGE_MASK(src_map)));
9804 }
9805
9806 vm_map_unlock(src_map);
9807
9808 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9809 vm_map_offset_t original_start, original_offset, original_end;
9810
9811 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9812
9813 /* adjust alignment of first copy_entry's "vme_start" */
9814 tmp_entry = vm_map_copy_first_entry(copy);
9815 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9816 vm_map_offset_t adjustment;
9817
9818 original_start = tmp_entry->vme_start;
9819 original_offset = VME_OFFSET(tmp_entry);
9820
9821 /* map-align the start of the first copy entry... */
9822 adjustment = (tmp_entry->vme_start -
9823 vm_map_trunc_page(
9824 tmp_entry->vme_start,
9825 VM_MAP_PAGE_MASK(src_map)));
9826 tmp_entry->vme_start -= adjustment;
9827 VME_OFFSET_SET(tmp_entry,
9828 VME_OFFSET(tmp_entry) - adjustment);
9829 copy_addr -= adjustment;
9830 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9831 /* ... adjust for mis-aligned start of copy range */
9832 adjustment =
9833 (vm_map_trunc_page(copy->offset,
9834 PAGE_MASK) -
9835 vm_map_trunc_page(copy->offset,
9836 VM_MAP_PAGE_MASK(src_map)));
9837 if (adjustment) {
9838 assert(page_aligned(adjustment));
9839 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9840 tmp_entry->vme_start += adjustment;
9841 VME_OFFSET_SET(tmp_entry,
9842 (VME_OFFSET(tmp_entry) +
9843 adjustment));
9844 copy_addr += adjustment;
9845 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9846 }
9847
9848 /*
9849 * Assert that the adjustments haven't exposed
9850 * more than was originally copied...
9851 */
9852 assert(tmp_entry->vme_start >= original_start);
9853 assert(VME_OFFSET(tmp_entry) >= original_offset);
9854 /*
9855 * ... and that it did not adjust outside of a
9856 * a single 16K page.
9857 */
9858 assert(vm_map_trunc_page(tmp_entry->vme_start,
9859 VM_MAP_PAGE_MASK(src_map)) ==
9860 vm_map_trunc_page(original_start,
9861 VM_MAP_PAGE_MASK(src_map)));
9862 }
9863
9864 /* adjust alignment of last copy_entry's "vme_end" */
9865 tmp_entry = vm_map_copy_last_entry(copy);
9866 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9867 vm_map_offset_t adjustment;
9868
9869 original_end = tmp_entry->vme_end;
9870
9871 /* map-align the end of the last copy entry... */
9872 tmp_entry->vme_end =
9873 vm_map_round_page(tmp_entry->vme_end,
9874 VM_MAP_PAGE_MASK(src_map));
9875 /* ... adjust for mis-aligned end of copy range */
9876 adjustment =
9877 (vm_map_round_page((copy->offset +
9878 copy->size),
9879 VM_MAP_PAGE_MASK(src_map)) -
9880 vm_map_round_page((copy->offset +
9881 copy->size),
9882 PAGE_MASK));
9883 if (adjustment) {
9884 assert(page_aligned(adjustment));
9885 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9886 tmp_entry->vme_end -= adjustment;
9887 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9888 }
9889
9890 /*
9891 * Assert that the adjustments haven't exposed
9892 * more than was originally copied...
9893 */
9894 assert(tmp_entry->vme_end <= original_end);
9895 /*
9896 * ... and that it did not adjust outside of a
9897 * a single 16K page.
9898 */
9899 assert(vm_map_round_page(tmp_entry->vme_end,
9900 VM_MAP_PAGE_MASK(src_map)) ==
9901 vm_map_round_page(original_end,
9902 VM_MAP_PAGE_MASK(src_map)));
9903 }
9904 }
9905
9906 /* Fix-up start and end points in copy. This is necessary */
9907 /* when the various entries in the copy object were picked */
9908 /* up from different sub-maps */
9909
9910 tmp_entry = vm_map_copy_first_entry(copy);
9911 copy_size = 0; /* compute actual size */
9912 while (tmp_entry != vm_map_copy_to_entry(copy)) {
9913 assert(VM_MAP_PAGE_ALIGNED(
9914 copy_addr + (tmp_entry->vme_end -
9915 tmp_entry->vme_start),
9916 VM_MAP_COPY_PAGE_MASK(copy)));
9917 assert(VM_MAP_PAGE_ALIGNED(
9918 copy_addr,
9919 VM_MAP_COPY_PAGE_MASK(copy)));
9920
9921 /*
9922 * The copy_entries will be injected directly into the
9923 * destination map and might not be "map aligned" there...
9924 */
9925 tmp_entry->map_aligned = FALSE;
9926
9927 tmp_entry->vme_end = copy_addr +
9928 (tmp_entry->vme_end - tmp_entry->vme_start);
9929 tmp_entry->vme_start = copy_addr;
9930 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9931 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9932 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9933 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9934 }
9935
9936 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9937 copy_size < copy->size) {
9938 /*
9939 * The actual size of the VM map copy is smaller than what
9940 * was requested by the caller. This must be because some
9941 * PAGE_SIZE-sized pages are missing at the end of the last
9942 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9943 * The caller might not have been aware of those missing
9944 * pages and might not want to be aware of it, which is
9945 * fine as long as they don't try to access (and crash on)
9946 * those missing pages.
9947 * Let's adjust the size of the "copy", to avoid failing
9948 * in vm_map_copyout() or vm_map_copy_overwrite().
9949 */
9950 assert(vm_map_round_page(copy_size,
9951 VM_MAP_PAGE_MASK(src_map)) ==
9952 vm_map_round_page(copy->size,
9953 VM_MAP_PAGE_MASK(src_map)));
9954 copy->size = copy_size;
9955 }
9956
9957 *copy_result = copy;
9958 return(KERN_SUCCESS);
9959
9960 #undef RETURN
9961 }
9962
9963 kern_return_t
9964 vm_map_copy_extract(
9965 vm_map_t src_map,
9966 vm_map_address_t src_addr,
9967 vm_map_size_t len,
9968 vm_map_copy_t *copy_result, /* OUT */
9969 vm_prot_t *cur_prot, /* OUT */
9970 vm_prot_t *max_prot)
9971 {
9972 vm_map_offset_t src_start, src_end;
9973 vm_map_copy_t copy;
9974 kern_return_t kr;
9975
9976 /*
9977 * Check for copies of zero bytes.
9978 */
9979
9980 if (len == 0) {
9981 *copy_result = VM_MAP_COPY_NULL;
9982 return(KERN_SUCCESS);
9983 }
9984
9985 /*
9986 * Check that the end address doesn't overflow
9987 */
9988 src_end = src_addr + len;
9989 if (src_end < src_addr)
9990 return KERN_INVALID_ADDRESS;
9991
9992 /*
9993 * Compute (page aligned) start and end of region
9994 */
9995 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9996 src_end = vm_map_round_page(src_end, PAGE_MASK);
9997
9998 /*
9999 * Allocate a header element for the list.
10000 *
10001 * Use the start and end in the header to
10002 * remember the endpoints prior to rounding.
10003 */
10004
10005 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10006 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10007 vm_map_copy_first_entry(copy) =
10008 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10009 copy->type = VM_MAP_COPY_ENTRY_LIST;
10010 copy->cpy_hdr.nentries = 0;
10011 copy->cpy_hdr.entries_pageable = TRUE;
10012
10013 vm_map_store_init(&copy->cpy_hdr);
10014
10015 copy->offset = 0;
10016 copy->size = len;
10017
10018 kr = vm_map_remap_extract(src_map,
10019 src_addr,
10020 len,
10021 FALSE, /* copy */
10022 &copy->cpy_hdr,
10023 cur_prot,
10024 max_prot,
10025 VM_INHERIT_SHARE,
10026 TRUE); /* pageable */
10027 if (kr != KERN_SUCCESS) {
10028 vm_map_copy_discard(copy);
10029 return kr;
10030 }
10031
10032 *copy_result = copy;
10033 return KERN_SUCCESS;
10034 }
10035
10036 /*
10037 * vm_map_copyin_object:
10038 *
10039 * Create a copy object from an object.
10040 * Our caller donates an object reference.
10041 */
10042
10043 kern_return_t
10044 vm_map_copyin_object(
10045 vm_object_t object,
10046 vm_object_offset_t offset, /* offset of region in object */
10047 vm_object_size_t size, /* size of region in object */
10048 vm_map_copy_t *copy_result) /* OUT */
10049 {
10050 vm_map_copy_t copy; /* Resulting copy */
10051
10052 /*
10053 * We drop the object into a special copy object
10054 * that contains the object directly.
10055 */
10056
10057 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10058 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10059 copy->type = VM_MAP_COPY_OBJECT;
10060 copy->cpy_object = object;
10061 copy->offset = offset;
10062 copy->size = size;
10063
10064 *copy_result = copy;
10065 return(KERN_SUCCESS);
10066 }
10067
10068 static void
10069 vm_map_fork_share(
10070 vm_map_t old_map,
10071 vm_map_entry_t old_entry,
10072 vm_map_t new_map)
10073 {
10074 vm_object_t object;
10075 vm_map_entry_t new_entry;
10076
10077 /*
10078 * New sharing code. New map entry
10079 * references original object. Internal
10080 * objects use asynchronous copy algorithm for
10081 * future copies. First make sure we have
10082 * the right object. If we need a shadow,
10083 * or someone else already has one, then
10084 * make a new shadow and share it.
10085 */
10086
10087 object = VME_OBJECT(old_entry);
10088 if (old_entry->is_sub_map) {
10089 assert(old_entry->wired_count == 0);
10090 #ifndef NO_NESTED_PMAP
10091 if(old_entry->use_pmap) {
10092 kern_return_t result;
10093
10094 result = pmap_nest(new_map->pmap,
10095 (VME_SUBMAP(old_entry))->pmap,
10096 (addr64_t)old_entry->vme_start,
10097 (addr64_t)old_entry->vme_start,
10098 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10099 if(result)
10100 panic("vm_map_fork_share: pmap_nest failed!");
10101 }
10102 #endif /* NO_NESTED_PMAP */
10103 } else if (object == VM_OBJECT_NULL) {
10104 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10105 old_entry->vme_start));
10106 VME_OFFSET_SET(old_entry, 0);
10107 VME_OBJECT_SET(old_entry, object);
10108 old_entry->use_pmap = TRUE;
10109 assert(!old_entry->needs_copy);
10110 } else if (object->copy_strategy !=
10111 MEMORY_OBJECT_COPY_SYMMETRIC) {
10112
10113 /*
10114 * We are already using an asymmetric
10115 * copy, and therefore we already have
10116 * the right object.
10117 */
10118
10119 assert(! old_entry->needs_copy);
10120 }
10121 else if (old_entry->needs_copy || /* case 1 */
10122 object->shadowed || /* case 2 */
10123 (!object->true_share && /* case 3 */
10124 !old_entry->is_shared &&
10125 (object->vo_size >
10126 (vm_map_size_t)(old_entry->vme_end -
10127 old_entry->vme_start)))) {
10128
10129 /*
10130 * We need to create a shadow.
10131 * There are three cases here.
10132 * In the first case, we need to
10133 * complete a deferred symmetrical
10134 * copy that we participated in.
10135 * In the second and third cases,
10136 * we need to create the shadow so
10137 * that changes that we make to the
10138 * object do not interfere with
10139 * any symmetrical copies which
10140 * have occured (case 2) or which
10141 * might occur (case 3).
10142 *
10143 * The first case is when we had
10144 * deferred shadow object creation
10145 * via the entry->needs_copy mechanism.
10146 * This mechanism only works when
10147 * only one entry points to the source
10148 * object, and we are about to create
10149 * a second entry pointing to the
10150 * same object. The problem is that
10151 * there is no way of mapping from
10152 * an object to the entries pointing
10153 * to it. (Deferred shadow creation
10154 * works with one entry because occurs
10155 * at fault time, and we walk from the
10156 * entry to the object when handling
10157 * the fault.)
10158 *
10159 * The second case is when the object
10160 * to be shared has already been copied
10161 * with a symmetric copy, but we point
10162 * directly to the object without
10163 * needs_copy set in our entry. (This
10164 * can happen because different ranges
10165 * of an object can be pointed to by
10166 * different entries. In particular,
10167 * a single entry pointing to an object
10168 * can be split by a call to vm_inherit,
10169 * which, combined with task_create, can
10170 * result in the different entries
10171 * having different needs_copy values.)
10172 * The shadowed flag in the object allows
10173 * us to detect this case. The problem
10174 * with this case is that if this object
10175 * has or will have shadows, then we
10176 * must not perform an asymmetric copy
10177 * of this object, since such a copy
10178 * allows the object to be changed, which
10179 * will break the previous symmetrical
10180 * copies (which rely upon the object
10181 * not changing). In a sense, the shadowed
10182 * flag says "don't change this object".
10183 * We fix this by creating a shadow
10184 * object for this object, and sharing
10185 * that. This works because we are free
10186 * to change the shadow object (and thus
10187 * to use an asymmetric copy strategy);
10188 * this is also semantically correct,
10189 * since this object is temporary, and
10190 * therefore a copy of the object is
10191 * as good as the object itself. (This
10192 * is not true for permanent objects,
10193 * since the pager needs to see changes,
10194 * which won't happen if the changes
10195 * are made to a copy.)
10196 *
10197 * The third case is when the object
10198 * to be shared has parts sticking
10199 * outside of the entry we're working
10200 * with, and thus may in the future
10201 * be subject to a symmetrical copy.
10202 * (This is a preemptive version of
10203 * case 2.)
10204 */
10205 VME_OBJECT_SHADOW(old_entry,
10206 (vm_map_size_t) (old_entry->vme_end -
10207 old_entry->vme_start));
10208
10209 /*
10210 * If we're making a shadow for other than
10211 * copy on write reasons, then we have
10212 * to remove write permission.
10213 */
10214
10215 if (!old_entry->needs_copy &&
10216 (old_entry->protection & VM_PROT_WRITE)) {
10217 vm_prot_t prot;
10218
10219 prot = old_entry->protection & ~VM_PROT_WRITE;
10220
10221 if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10222 prot |= VM_PROT_EXECUTE;
10223
10224 if (old_map->mapped_in_other_pmaps) {
10225 vm_object_pmap_protect(
10226 VME_OBJECT(old_entry),
10227 VME_OFFSET(old_entry),
10228 (old_entry->vme_end -
10229 old_entry->vme_start),
10230 PMAP_NULL,
10231 old_entry->vme_start,
10232 prot);
10233 } else {
10234 pmap_protect(old_map->pmap,
10235 old_entry->vme_start,
10236 old_entry->vme_end,
10237 prot);
10238 }
10239 }
10240
10241 old_entry->needs_copy = FALSE;
10242 object = VME_OBJECT(old_entry);
10243 }
10244
10245
10246 /*
10247 * If object was using a symmetric copy strategy,
10248 * change its copy strategy to the default
10249 * asymmetric copy strategy, which is copy_delay
10250 * in the non-norma case and copy_call in the
10251 * norma case. Bump the reference count for the
10252 * new entry.
10253 */
10254
10255 if(old_entry->is_sub_map) {
10256 vm_map_lock(VME_SUBMAP(old_entry));
10257 vm_map_reference(VME_SUBMAP(old_entry));
10258 vm_map_unlock(VME_SUBMAP(old_entry));
10259 } else {
10260 vm_object_lock(object);
10261 vm_object_reference_locked(object);
10262 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10263 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10264 }
10265 vm_object_unlock(object);
10266 }
10267
10268 /*
10269 * Clone the entry, using object ref from above.
10270 * Mark both entries as shared.
10271 */
10272
10273 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10274 * map or descendants */
10275 vm_map_entry_copy(new_entry, old_entry);
10276 old_entry->is_shared = TRUE;
10277 new_entry->is_shared = TRUE;
10278
10279 /*
10280 * Insert the entry into the new map -- we
10281 * know we're inserting at the end of the new
10282 * map.
10283 */
10284
10285 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10286
10287 /*
10288 * Update the physical map
10289 */
10290
10291 if (old_entry->is_sub_map) {
10292 /* Bill Angell pmap support goes here */
10293 } else {
10294 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10295 old_entry->vme_end - old_entry->vme_start,
10296 old_entry->vme_start);
10297 }
10298 }
10299
10300 static boolean_t
10301 vm_map_fork_copy(
10302 vm_map_t old_map,
10303 vm_map_entry_t *old_entry_p,
10304 vm_map_t new_map)
10305 {
10306 vm_map_entry_t old_entry = *old_entry_p;
10307 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10308 vm_map_offset_t start = old_entry->vme_start;
10309 vm_map_copy_t copy;
10310 vm_map_entry_t last = vm_map_last_entry(new_map);
10311
10312 vm_map_unlock(old_map);
10313 /*
10314 * Use maxprot version of copyin because we
10315 * care about whether this memory can ever
10316 * be accessed, not just whether it's accessible
10317 * right now.
10318 */
10319 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
10320 != KERN_SUCCESS) {
10321 /*
10322 * The map might have changed while it
10323 * was unlocked, check it again. Skip
10324 * any blank space or permanently
10325 * unreadable region.
10326 */
10327 vm_map_lock(old_map);
10328 if (!vm_map_lookup_entry(old_map, start, &last) ||
10329 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10330 last = last->vme_next;
10331 }
10332 *old_entry_p = last;
10333
10334 /*
10335 * XXX For some error returns, want to
10336 * XXX skip to the next element. Note
10337 * that INVALID_ADDRESS and
10338 * PROTECTION_FAILURE are handled above.
10339 */
10340
10341 return FALSE;
10342 }
10343
10344 /*
10345 * Insert the copy into the new map
10346 */
10347
10348 vm_map_copy_insert(new_map, last, copy);
10349
10350 /*
10351 * Pick up the traversal at the end of
10352 * the copied region.
10353 */
10354
10355 vm_map_lock(old_map);
10356 start += entry_size;
10357 if (! vm_map_lookup_entry(old_map, start, &last)) {
10358 last = last->vme_next;
10359 } else {
10360 if (last->vme_start == start) {
10361 /*
10362 * No need to clip here and we don't
10363 * want to cause any unnecessary
10364 * unnesting...
10365 */
10366 } else {
10367 vm_map_clip_start(old_map, last, start);
10368 }
10369 }
10370 *old_entry_p = last;
10371
10372 return TRUE;
10373 }
10374
10375 /*
10376 * vm_map_fork:
10377 *
10378 * Create and return a new map based on the old
10379 * map, according to the inheritance values on the
10380 * regions in that map.
10381 *
10382 * The source map must not be locked.
10383 */
10384 vm_map_t
10385 vm_map_fork(
10386 ledger_t ledger,
10387 vm_map_t old_map)
10388 {
10389 pmap_t new_pmap;
10390 vm_map_t new_map;
10391 vm_map_entry_t old_entry;
10392 vm_map_size_t new_size = 0, entry_size;
10393 vm_map_entry_t new_entry;
10394 boolean_t src_needs_copy;
10395 boolean_t new_entry_needs_copy;
10396 boolean_t pmap_is64bit;
10397
10398 pmap_is64bit =
10399 #if defined(__i386__) || defined(__x86_64__)
10400 old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10401 #else
10402 #error Unknown architecture.
10403 #endif
10404
10405 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10406
10407 vm_map_reference_swap(old_map);
10408 vm_map_lock(old_map);
10409
10410 new_map = vm_map_create(new_pmap,
10411 old_map->min_offset,
10412 old_map->max_offset,
10413 old_map->hdr.entries_pageable);
10414 /* inherit the parent map's page size */
10415 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10416 for (
10417 old_entry = vm_map_first_entry(old_map);
10418 old_entry != vm_map_to_entry(old_map);
10419 ) {
10420
10421 entry_size = old_entry->vme_end - old_entry->vme_start;
10422
10423 switch (old_entry->inheritance) {
10424 case VM_INHERIT_NONE:
10425 break;
10426
10427 case VM_INHERIT_SHARE:
10428 vm_map_fork_share(old_map, old_entry, new_map);
10429 new_size += entry_size;
10430 break;
10431
10432 case VM_INHERIT_COPY:
10433
10434 /*
10435 * Inline the copy_quickly case;
10436 * upon failure, fall back on call
10437 * to vm_map_fork_copy.
10438 */
10439
10440 if(old_entry->is_sub_map)
10441 break;
10442 if ((old_entry->wired_count != 0) ||
10443 ((VME_OBJECT(old_entry) != NULL) &&
10444 (VME_OBJECT(old_entry)->true_share))) {
10445 goto slow_vm_map_fork_copy;
10446 }
10447
10448 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10449 vm_map_entry_copy(new_entry, old_entry);
10450 if (new_entry->is_sub_map) {
10451 /* clear address space specifics */
10452 new_entry->use_pmap = FALSE;
10453 }
10454
10455 if (! vm_object_copy_quickly(
10456 &VME_OBJECT(new_entry),
10457 VME_OFFSET(old_entry),
10458 (old_entry->vme_end -
10459 old_entry->vme_start),
10460 &src_needs_copy,
10461 &new_entry_needs_copy)) {
10462 vm_map_entry_dispose(new_map, new_entry);
10463 goto slow_vm_map_fork_copy;
10464 }
10465
10466 /*
10467 * Handle copy-on-write obligations
10468 */
10469
10470 if (src_needs_copy && !old_entry->needs_copy) {
10471 vm_prot_t prot;
10472
10473 prot = old_entry->protection & ~VM_PROT_WRITE;
10474
10475 if (override_nx(old_map, VME_ALIAS(old_entry))
10476 && prot)
10477 prot |= VM_PROT_EXECUTE;
10478
10479 vm_object_pmap_protect(
10480 VME_OBJECT(old_entry),
10481 VME_OFFSET(old_entry),
10482 (old_entry->vme_end -
10483 old_entry->vme_start),
10484 ((old_entry->is_shared
10485 || old_map->mapped_in_other_pmaps)
10486 ? PMAP_NULL :
10487 old_map->pmap),
10488 old_entry->vme_start,
10489 prot);
10490
10491 assert(old_entry->wired_count == 0);
10492 old_entry->needs_copy = TRUE;
10493 }
10494 new_entry->needs_copy = new_entry_needs_copy;
10495
10496 /*
10497 * Insert the entry at the end
10498 * of the map.
10499 */
10500
10501 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10502 new_entry);
10503 new_size += entry_size;
10504 break;
10505
10506 slow_vm_map_fork_copy:
10507 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
10508 new_size += entry_size;
10509 }
10510 continue;
10511 }
10512 old_entry = old_entry->vme_next;
10513 }
10514
10515
10516 new_map->size = new_size;
10517 vm_map_unlock(old_map);
10518 vm_map_deallocate(old_map);
10519
10520 return(new_map);
10521 }
10522
10523 /*
10524 * vm_map_exec:
10525 *
10526 * Setup the "new_map" with the proper execution environment according
10527 * to the type of executable (platform, 64bit, chroot environment).
10528 * Map the comm page and shared region, etc...
10529 */
10530 kern_return_t
10531 vm_map_exec(
10532 vm_map_t new_map,
10533 task_t task,
10534 void *fsroot,
10535 cpu_type_t cpu)
10536 {
10537 SHARED_REGION_TRACE_DEBUG(
10538 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10539 (void *)VM_KERNEL_ADDRPERM(current_task()),
10540 (void *)VM_KERNEL_ADDRPERM(new_map),
10541 (void *)VM_KERNEL_ADDRPERM(task),
10542 (void *)VM_KERNEL_ADDRPERM(fsroot),
10543 cpu));
10544 (void) vm_commpage_enter(new_map, task);
10545 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
10546 SHARED_REGION_TRACE_DEBUG(
10547 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10548 (void *)VM_KERNEL_ADDRPERM(current_task()),
10549 (void *)VM_KERNEL_ADDRPERM(new_map),
10550 (void *)VM_KERNEL_ADDRPERM(task),
10551 (void *)VM_KERNEL_ADDRPERM(fsroot),
10552 cpu));
10553 return KERN_SUCCESS;
10554 }
10555
10556 /*
10557 * vm_map_lookup_locked:
10558 *
10559 * Finds the VM object, offset, and
10560 * protection for a given virtual address in the
10561 * specified map, assuming a page fault of the
10562 * type specified.
10563 *
10564 * Returns the (object, offset, protection) for
10565 * this address, whether it is wired down, and whether
10566 * this map has the only reference to the data in question.
10567 * In order to later verify this lookup, a "version"
10568 * is returned.
10569 *
10570 * The map MUST be locked by the caller and WILL be
10571 * locked on exit. In order to guarantee the
10572 * existence of the returned object, it is returned
10573 * locked.
10574 *
10575 * If a lookup is requested with "write protection"
10576 * specified, the map may be changed to perform virtual
10577 * copying operations, although the data referenced will
10578 * remain the same.
10579 */
10580 kern_return_t
10581 vm_map_lookup_locked(
10582 vm_map_t *var_map, /* IN/OUT */
10583 vm_map_offset_t vaddr,
10584 vm_prot_t fault_type,
10585 int object_lock_type,
10586 vm_map_version_t *out_version, /* OUT */
10587 vm_object_t *object, /* OUT */
10588 vm_object_offset_t *offset, /* OUT */
10589 vm_prot_t *out_prot, /* OUT */
10590 boolean_t *wired, /* OUT */
10591 vm_object_fault_info_t fault_info, /* OUT */
10592 vm_map_t *real_map)
10593 {
10594 vm_map_entry_t entry;
10595 register vm_map_t map = *var_map;
10596 vm_map_t old_map = *var_map;
10597 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
10598 vm_map_offset_t cow_parent_vaddr = 0;
10599 vm_map_offset_t old_start = 0;
10600 vm_map_offset_t old_end = 0;
10601 register vm_prot_t prot;
10602 boolean_t mask_protections;
10603 boolean_t force_copy;
10604 vm_prot_t original_fault_type;
10605
10606 /*
10607 * VM_PROT_MASK means that the caller wants us to use "fault_type"
10608 * as a mask against the mapping's actual protections, not as an
10609 * absolute value.
10610 */
10611 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10612 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10613 fault_type &= VM_PROT_ALL;
10614 original_fault_type = fault_type;
10615
10616 *real_map = map;
10617
10618 RetryLookup:
10619 fault_type = original_fault_type;
10620
10621 /*
10622 * If the map has an interesting hint, try it before calling
10623 * full blown lookup routine.
10624 */
10625 entry = map->hint;
10626
10627 if ((entry == vm_map_to_entry(map)) ||
10628 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10629 vm_map_entry_t tmp_entry;
10630
10631 /*
10632 * Entry was either not a valid hint, or the vaddr
10633 * was not contained in the entry, so do a full lookup.
10634 */
10635 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10636 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10637 vm_map_unlock(cow_sub_map_parent);
10638 if((*real_map != map)
10639 && (*real_map != cow_sub_map_parent))
10640 vm_map_unlock(*real_map);
10641 return KERN_INVALID_ADDRESS;
10642 }
10643
10644 entry = tmp_entry;
10645 }
10646 if(map == old_map) {
10647 old_start = entry->vme_start;
10648 old_end = entry->vme_end;
10649 }
10650
10651 /*
10652 * Handle submaps. Drop lock on upper map, submap is
10653 * returned locked.
10654 */
10655
10656 submap_recurse:
10657 if (entry->is_sub_map) {
10658 vm_map_offset_t local_vaddr;
10659 vm_map_offset_t end_delta;
10660 vm_map_offset_t start_delta;
10661 vm_map_entry_t submap_entry;
10662 boolean_t mapped_needs_copy=FALSE;
10663
10664 local_vaddr = vaddr;
10665
10666 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10667 /* if real_map equals map we unlock below */
10668 if ((*real_map != map) &&
10669 (*real_map != cow_sub_map_parent))
10670 vm_map_unlock(*real_map);
10671 *real_map = VME_SUBMAP(entry);
10672 }
10673
10674 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10675 if (!mapped_needs_copy) {
10676 if (vm_map_lock_read_to_write(map)) {
10677 vm_map_lock_read(map);
10678 *real_map = map;
10679 goto RetryLookup;
10680 }
10681 vm_map_lock_read(VME_SUBMAP(entry));
10682 *var_map = VME_SUBMAP(entry);
10683 cow_sub_map_parent = map;
10684 /* reset base to map before cow object */
10685 /* this is the map which will accept */
10686 /* the new cow object */
10687 old_start = entry->vme_start;
10688 old_end = entry->vme_end;
10689 cow_parent_vaddr = vaddr;
10690 mapped_needs_copy = TRUE;
10691 } else {
10692 vm_map_lock_read(VME_SUBMAP(entry));
10693 *var_map = VME_SUBMAP(entry);
10694 if((cow_sub_map_parent != map) &&
10695 (*real_map != map))
10696 vm_map_unlock(map);
10697 }
10698 } else {
10699 vm_map_lock_read(VME_SUBMAP(entry));
10700 *var_map = VME_SUBMAP(entry);
10701 /* leave map locked if it is a target */
10702 /* cow sub_map above otherwise, just */
10703 /* follow the maps down to the object */
10704 /* here we unlock knowing we are not */
10705 /* revisiting the map. */
10706 if((*real_map != map) && (map != cow_sub_map_parent))
10707 vm_map_unlock_read(map);
10708 }
10709
10710 map = *var_map;
10711
10712 /* calculate the offset in the submap for vaddr */
10713 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
10714
10715 RetrySubMap:
10716 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10717 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10718 vm_map_unlock(cow_sub_map_parent);
10719 }
10720 if((*real_map != map)
10721 && (*real_map != cow_sub_map_parent)) {
10722 vm_map_unlock(*real_map);
10723 }
10724 *real_map = map;
10725 return KERN_INVALID_ADDRESS;
10726 }
10727
10728 /* find the attenuated shadow of the underlying object */
10729 /* on our target map */
10730
10731 /* in english the submap object may extend beyond the */
10732 /* region mapped by the entry or, may only fill a portion */
10733 /* of it. For our purposes, we only care if the object */
10734 /* doesn't fill. In this case the area which will */
10735 /* ultimately be clipped in the top map will only need */
10736 /* to be as big as the portion of the underlying entry */
10737 /* which is mapped */
10738 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
10739 submap_entry->vme_start - VME_OFFSET(entry) : 0;
10740
10741 end_delta =
10742 (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
10743 submap_entry->vme_end ?
10744 0 : (VME_OFFSET(entry) +
10745 (old_end - old_start))
10746 - submap_entry->vme_end;
10747
10748 old_start += start_delta;
10749 old_end -= end_delta;
10750
10751 if(submap_entry->is_sub_map) {
10752 entry = submap_entry;
10753 vaddr = local_vaddr;
10754 goto submap_recurse;
10755 }
10756
10757 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10758
10759 vm_object_t sub_object, copy_object;
10760 vm_object_offset_t copy_offset;
10761 vm_map_offset_t local_start;
10762 vm_map_offset_t local_end;
10763 boolean_t copied_slowly = FALSE;
10764
10765 if (vm_map_lock_read_to_write(map)) {
10766 vm_map_lock_read(map);
10767 old_start -= start_delta;
10768 old_end += end_delta;
10769 goto RetrySubMap;
10770 }
10771
10772
10773 sub_object = VME_OBJECT(submap_entry);
10774 if (sub_object == VM_OBJECT_NULL) {
10775 sub_object =
10776 vm_object_allocate(
10777 (vm_map_size_t)
10778 (submap_entry->vme_end -
10779 submap_entry->vme_start));
10780 VME_OBJECT_SET(submap_entry, sub_object);
10781 VME_OFFSET_SET(submap_entry, 0);
10782 }
10783 local_start = local_vaddr -
10784 (cow_parent_vaddr - old_start);
10785 local_end = local_vaddr +
10786 (old_end - cow_parent_vaddr);
10787 vm_map_clip_start(map, submap_entry, local_start);
10788 vm_map_clip_end(map, submap_entry, local_end);
10789 if (submap_entry->is_sub_map) {
10790 /* unnesting was done when clipping */
10791 assert(!submap_entry->use_pmap);
10792 }
10793
10794 /* This is the COW case, lets connect */
10795 /* an entry in our space to the underlying */
10796 /* object in the submap, bypassing the */
10797 /* submap. */
10798
10799
10800 if(submap_entry->wired_count != 0 ||
10801 (sub_object->copy_strategy ==
10802 MEMORY_OBJECT_COPY_NONE)) {
10803 vm_object_lock(sub_object);
10804 vm_object_copy_slowly(sub_object,
10805 VME_OFFSET(submap_entry),
10806 (submap_entry->vme_end -
10807 submap_entry->vme_start),
10808 FALSE,
10809 &copy_object);
10810 copied_slowly = TRUE;
10811 } else {
10812
10813 /* set up shadow object */
10814 copy_object = sub_object;
10815 vm_object_reference(copy_object);
10816 sub_object->shadowed = TRUE;
10817 assert(submap_entry->wired_count == 0);
10818 submap_entry->needs_copy = TRUE;
10819
10820 prot = submap_entry->protection & ~VM_PROT_WRITE;
10821
10822 if (override_nx(old_map,
10823 VME_ALIAS(submap_entry))
10824 && prot)
10825 prot |= VM_PROT_EXECUTE;
10826
10827 vm_object_pmap_protect(
10828 sub_object,
10829 VME_OFFSET(submap_entry),
10830 submap_entry->vme_end -
10831 submap_entry->vme_start,
10832 (submap_entry->is_shared
10833 || map->mapped_in_other_pmaps) ?
10834 PMAP_NULL : map->pmap,
10835 submap_entry->vme_start,
10836 prot);
10837 }
10838
10839 /*
10840 * Adjust the fault offset to the submap entry.
10841 */
10842 copy_offset = (local_vaddr -
10843 submap_entry->vme_start +
10844 VME_OFFSET(submap_entry));
10845
10846 /* This works diffently than the */
10847 /* normal submap case. We go back */
10848 /* to the parent of the cow map and*/
10849 /* clip out the target portion of */
10850 /* the sub_map, substituting the */
10851 /* new copy object, */
10852
10853 vm_map_unlock(map);
10854 local_start = old_start;
10855 local_end = old_end;
10856 map = cow_sub_map_parent;
10857 *var_map = cow_sub_map_parent;
10858 vaddr = cow_parent_vaddr;
10859 cow_sub_map_parent = NULL;
10860
10861 if(!vm_map_lookup_entry(map,
10862 vaddr, &entry)) {
10863 vm_object_deallocate(
10864 copy_object);
10865 vm_map_lock_write_to_read(map);
10866 return KERN_INVALID_ADDRESS;
10867 }
10868
10869 /* clip out the portion of space */
10870 /* mapped by the sub map which */
10871 /* corresponds to the underlying */
10872 /* object */
10873
10874 /*
10875 * Clip (and unnest) the smallest nested chunk
10876 * possible around the faulting address...
10877 */
10878 local_start = vaddr & ~(pmap_nesting_size_min - 1);
10879 local_end = local_start + pmap_nesting_size_min;
10880 /*
10881 * ... but don't go beyond the "old_start" to "old_end"
10882 * range, to avoid spanning over another VM region
10883 * with a possibly different VM object and/or offset.
10884 */
10885 if (local_start < old_start) {
10886 local_start = old_start;
10887 }
10888 if (local_end > old_end) {
10889 local_end = old_end;
10890 }
10891 /*
10892 * Adjust copy_offset to the start of the range.
10893 */
10894 copy_offset -= (vaddr - local_start);
10895
10896 vm_map_clip_start(map, entry, local_start);
10897 vm_map_clip_end(map, entry, local_end);
10898 if (entry->is_sub_map) {
10899 /* unnesting was done when clipping */
10900 assert(!entry->use_pmap);
10901 }
10902
10903 /* substitute copy object for */
10904 /* shared map entry */
10905 vm_map_deallocate(VME_SUBMAP(entry));
10906 assert(!entry->iokit_acct);
10907 entry->is_sub_map = FALSE;
10908 entry->use_pmap = TRUE;
10909 VME_OBJECT_SET(entry, copy_object);
10910
10911 /* propagate the submap entry's protections */
10912 entry->protection |= submap_entry->protection;
10913 entry->max_protection |= submap_entry->max_protection;
10914
10915 if(copied_slowly) {
10916 VME_OFFSET_SET(entry, local_start - old_start);
10917 entry->needs_copy = FALSE;
10918 entry->is_shared = FALSE;
10919 } else {
10920 VME_OFFSET_SET(entry, copy_offset);
10921 assert(entry->wired_count == 0);
10922 entry->needs_copy = TRUE;
10923 if(entry->inheritance == VM_INHERIT_SHARE)
10924 entry->inheritance = VM_INHERIT_COPY;
10925 if (map != old_map)
10926 entry->is_shared = TRUE;
10927 }
10928 if(entry->inheritance == VM_INHERIT_SHARE)
10929 entry->inheritance = VM_INHERIT_COPY;
10930
10931 vm_map_lock_write_to_read(map);
10932 } else {
10933 if((cow_sub_map_parent)
10934 && (cow_sub_map_parent != *real_map)
10935 && (cow_sub_map_parent != map)) {
10936 vm_map_unlock(cow_sub_map_parent);
10937 }
10938 entry = submap_entry;
10939 vaddr = local_vaddr;
10940 }
10941 }
10942
10943 /*
10944 * Check whether this task is allowed to have
10945 * this page.
10946 */
10947
10948 prot = entry->protection;
10949
10950 if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
10951 /*
10952 * HACK -- if not a stack, then allow execution
10953 */
10954 prot |= VM_PROT_EXECUTE;
10955 }
10956
10957 if (mask_protections) {
10958 fault_type &= prot;
10959 if (fault_type == VM_PROT_NONE) {
10960 goto protection_failure;
10961 }
10962 }
10963 if ((fault_type & (prot)) != fault_type) {
10964 protection_failure:
10965 if (*real_map != map) {
10966 vm_map_unlock(*real_map);
10967 }
10968 *real_map = map;
10969
10970 if ((fault_type & VM_PROT_EXECUTE) && prot)
10971 log_stack_execution_failure((addr64_t)vaddr, prot);
10972
10973 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10974 return KERN_PROTECTION_FAILURE;
10975 }
10976
10977 /*
10978 * If this page is not pageable, we have to get
10979 * it for all possible accesses.
10980 */
10981
10982 *wired = (entry->wired_count != 0);
10983 if (*wired)
10984 fault_type = prot;
10985
10986 /*
10987 * If the entry was copy-on-write, we either ...
10988 */
10989
10990 if (entry->needs_copy) {
10991 /*
10992 * If we want to write the page, we may as well
10993 * handle that now since we've got the map locked.
10994 *
10995 * If we don't need to write the page, we just
10996 * demote the permissions allowed.
10997 */
10998
10999 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11000 /*
11001 * Make a new object, and place it in the
11002 * object chain. Note that no new references
11003 * have appeared -- one just moved from the
11004 * map to the new object.
11005 */
11006
11007 if (vm_map_lock_read_to_write(map)) {
11008 vm_map_lock_read(map);
11009 goto RetryLookup;
11010 }
11011 VME_OBJECT_SHADOW(entry,
11012 (vm_map_size_t) (entry->vme_end -
11013 entry->vme_start));
11014
11015 VME_OBJECT(entry)->shadowed = TRUE;
11016 entry->needs_copy = FALSE;
11017 vm_map_lock_write_to_read(map);
11018 }
11019 else {
11020 /*
11021 * We're attempting to read a copy-on-write
11022 * page -- don't allow writes.
11023 */
11024
11025 prot &= (~VM_PROT_WRITE);
11026 }
11027 }
11028
11029 /*
11030 * Create an object if necessary.
11031 */
11032 if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11033
11034 if (vm_map_lock_read_to_write(map)) {
11035 vm_map_lock_read(map);
11036 goto RetryLookup;
11037 }
11038
11039 VME_OBJECT_SET(entry,
11040 vm_object_allocate(
11041 (vm_map_size_t)(entry->vme_end -
11042 entry->vme_start)));
11043 VME_OFFSET_SET(entry, 0);
11044 vm_map_lock_write_to_read(map);
11045 }
11046
11047 /*
11048 * Return the object/offset from this entry. If the entry
11049 * was copy-on-write or empty, it has been fixed up. Also
11050 * return the protection.
11051 */
11052
11053 *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11054 *object = VME_OBJECT(entry);
11055 *out_prot = prot;
11056
11057 if (fault_info) {
11058 fault_info->interruptible = THREAD_UNINT; /* for now... */
11059 /* ... the caller will change "interruptible" if needed */
11060 fault_info->cluster_size = 0;
11061 fault_info->user_tag = VME_ALIAS(entry);
11062 fault_info->pmap_options = 0;
11063 if (entry->iokit_acct ||
11064 (!entry->is_sub_map && !entry->use_pmap)) {
11065 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11066 }
11067 fault_info->behavior = entry->behavior;
11068 fault_info->lo_offset = VME_OFFSET(entry);
11069 fault_info->hi_offset =
11070 (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11071 fault_info->no_cache = entry->no_cache;
11072 fault_info->stealth = FALSE;
11073 fault_info->io_sync = FALSE;
11074 if (entry->used_for_jit ||
11075 entry->vme_resilient_codesign) {
11076 fault_info->cs_bypass = TRUE;
11077 } else {
11078 fault_info->cs_bypass = FALSE;
11079 }
11080 fault_info->mark_zf_absent = FALSE;
11081 fault_info->batch_pmap_op = FALSE;
11082 }
11083
11084 /*
11085 * Lock the object to prevent it from disappearing
11086 */
11087 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11088 vm_object_lock(*object);
11089 else
11090 vm_object_lock_shared(*object);
11091
11092 /*
11093 * Save the version number
11094 */
11095
11096 out_version->main_timestamp = map->timestamp;
11097
11098 return KERN_SUCCESS;
11099 }
11100
11101
11102 /*
11103 * vm_map_verify:
11104 *
11105 * Verifies that the map in question has not changed
11106 * since the given version. If successful, the map
11107 * will not change until vm_map_verify_done() is called.
11108 */
11109 boolean_t
11110 vm_map_verify(
11111 register vm_map_t map,
11112 register vm_map_version_t *version) /* REF */
11113 {
11114 boolean_t result;
11115
11116 vm_map_lock_read(map);
11117 result = (map->timestamp == version->main_timestamp);
11118
11119 if (!result)
11120 vm_map_unlock_read(map);
11121
11122 return(result);
11123 }
11124
11125 /*
11126 * vm_map_verify_done:
11127 *
11128 * Releases locks acquired by a vm_map_verify.
11129 *
11130 * This is now a macro in vm/vm_map.h. It does a
11131 * vm_map_unlock_read on the map.
11132 */
11133
11134
11135 /*
11136 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11137 * Goes away after regular vm_region_recurse function migrates to
11138 * 64 bits
11139 * vm_region_recurse: A form of vm_region which follows the
11140 * submaps in a target map
11141 *
11142 */
11143
11144 kern_return_t
11145 vm_map_region_recurse_64(
11146 vm_map_t map,
11147 vm_map_offset_t *address, /* IN/OUT */
11148 vm_map_size_t *size, /* OUT */
11149 natural_t *nesting_depth, /* IN/OUT */
11150 vm_region_submap_info_64_t submap_info, /* IN/OUT */
11151 mach_msg_type_number_t *count) /* IN/OUT */
11152 {
11153 mach_msg_type_number_t original_count;
11154 vm_region_extended_info_data_t extended;
11155 vm_map_entry_t tmp_entry;
11156 vm_map_offset_t user_address;
11157 unsigned int user_max_depth;
11158
11159 /*
11160 * "curr_entry" is the VM map entry preceding or including the
11161 * address we're looking for.
11162 * "curr_map" is the map or sub-map containing "curr_entry".
11163 * "curr_address" is the equivalent of the top map's "user_address"
11164 * in the current map.
11165 * "curr_offset" is the cumulated offset of "curr_map" in the
11166 * target task's address space.
11167 * "curr_depth" is the depth of "curr_map" in the chain of
11168 * sub-maps.
11169 *
11170 * "curr_max_below" and "curr_max_above" limit the range (around
11171 * "curr_address") we should take into account in the current (sub)map.
11172 * They limit the range to what's visible through the map entries
11173 * we've traversed from the top map to the current map.
11174
11175 */
11176 vm_map_entry_t curr_entry;
11177 vm_map_address_t curr_address;
11178 vm_map_offset_t curr_offset;
11179 vm_map_t curr_map;
11180 unsigned int curr_depth;
11181 vm_map_offset_t curr_max_below, curr_max_above;
11182 vm_map_offset_t curr_skip;
11183
11184 /*
11185 * "next_" is the same as "curr_" but for the VM region immediately
11186 * after the address we're looking for. We need to keep track of this
11187 * too because we want to return info about that region if the
11188 * address we're looking for is not mapped.
11189 */
11190 vm_map_entry_t next_entry;
11191 vm_map_offset_t next_offset;
11192 vm_map_offset_t next_address;
11193 vm_map_t next_map;
11194 unsigned int next_depth;
11195 vm_map_offset_t next_max_below, next_max_above;
11196 vm_map_offset_t next_skip;
11197
11198 boolean_t look_for_pages;
11199 vm_region_submap_short_info_64_t short_info;
11200
11201 if (map == VM_MAP_NULL) {
11202 /* no address space to work on */
11203 return KERN_INVALID_ARGUMENT;
11204 }
11205
11206
11207 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11208 /*
11209 * "info" structure is not big enough and
11210 * would overflow
11211 */
11212 return KERN_INVALID_ARGUMENT;
11213 }
11214
11215 original_count = *count;
11216
11217 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11218 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11219 look_for_pages = FALSE;
11220 short_info = (vm_region_submap_short_info_64_t) submap_info;
11221 submap_info = NULL;
11222 } else {
11223 look_for_pages = TRUE;
11224 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11225 short_info = NULL;
11226
11227 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11228 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11229 }
11230 }
11231
11232 user_address = *address;
11233 user_max_depth = *nesting_depth;
11234
11235 if (not_in_kdp) {
11236 vm_map_lock_read(map);
11237 }
11238
11239 recurse_again:
11240 curr_entry = NULL;
11241 curr_map = map;
11242 curr_address = user_address;
11243 curr_offset = 0;
11244 curr_skip = 0;
11245 curr_depth = 0;
11246 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11247 curr_max_below = curr_address;
11248
11249 next_entry = NULL;
11250 next_map = NULL;
11251 next_address = 0;
11252 next_offset = 0;
11253 next_skip = 0;
11254 next_depth = 0;
11255 next_max_above = (vm_map_offset_t) -1;
11256 next_max_below = (vm_map_offset_t) -1;
11257
11258 for (;;) {
11259 if (vm_map_lookup_entry(curr_map,
11260 curr_address,
11261 &tmp_entry)) {
11262 /* tmp_entry contains the address we're looking for */
11263 curr_entry = tmp_entry;
11264 } else {
11265 vm_map_offset_t skip;
11266 /*
11267 * The address is not mapped. "tmp_entry" is the
11268 * map entry preceding the address. We want the next
11269 * one, if it exists.
11270 */
11271 curr_entry = tmp_entry->vme_next;
11272
11273 if (curr_entry == vm_map_to_entry(curr_map) ||
11274 (curr_entry->vme_start >=
11275 curr_address + curr_max_above)) {
11276 /* no next entry at this level: stop looking */
11277 if (not_in_kdp) {
11278 vm_map_unlock_read(curr_map);
11279 }
11280 curr_entry = NULL;
11281 curr_map = NULL;
11282 curr_skip = 0;
11283 curr_offset = 0;
11284 curr_depth = 0;
11285 curr_max_above = 0;
11286 curr_max_below = 0;
11287 break;
11288 }
11289
11290 /* adjust current address and offset */
11291 skip = curr_entry->vme_start - curr_address;
11292 curr_address = curr_entry->vme_start;
11293 curr_skip += skip;
11294 curr_offset += skip;
11295 curr_max_above -= skip;
11296 curr_max_below = 0;
11297 }
11298
11299 /*
11300 * Is the next entry at this level closer to the address (or
11301 * deeper in the submap chain) than the one we had
11302 * so far ?
11303 */
11304 tmp_entry = curr_entry->vme_next;
11305 if (tmp_entry == vm_map_to_entry(curr_map)) {
11306 /* no next entry at this level */
11307 } else if (tmp_entry->vme_start >=
11308 curr_address + curr_max_above) {
11309 /*
11310 * tmp_entry is beyond the scope of what we mapped of
11311 * this submap in the upper level: ignore it.
11312 */
11313 } else if ((next_entry == NULL) ||
11314 (tmp_entry->vme_start + curr_offset <=
11315 next_entry->vme_start + next_offset)) {
11316 /*
11317 * We didn't have a "next_entry" or this one is
11318 * closer to the address we're looking for:
11319 * use this "tmp_entry" as the new "next_entry".
11320 */
11321 if (next_entry != NULL) {
11322 /* unlock the last "next_map" */
11323 if (next_map != curr_map && not_in_kdp) {
11324 vm_map_unlock_read(next_map);
11325 }
11326 }
11327 next_entry = tmp_entry;
11328 next_map = curr_map;
11329 next_depth = curr_depth;
11330 next_address = next_entry->vme_start;
11331 next_skip = curr_skip;
11332 next_skip += (next_address - curr_address);
11333 next_offset = curr_offset;
11334 next_offset += (next_address - curr_address);
11335 next_max_above = MIN(next_max_above, curr_max_above);
11336 next_max_above = MIN(next_max_above,
11337 next_entry->vme_end - next_address);
11338 next_max_below = MIN(next_max_below, curr_max_below);
11339 next_max_below = MIN(next_max_below,
11340 next_address - next_entry->vme_start);
11341 }
11342
11343 /*
11344 * "curr_max_{above,below}" allow us to keep track of the
11345 * portion of the submap that is actually mapped at this level:
11346 * the rest of that submap is irrelevant to us, since it's not
11347 * mapped here.
11348 * The relevant portion of the map starts at
11349 * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11350 */
11351 curr_max_above = MIN(curr_max_above,
11352 curr_entry->vme_end - curr_address);
11353 curr_max_below = MIN(curr_max_below,
11354 curr_address - curr_entry->vme_start);
11355
11356 if (!curr_entry->is_sub_map ||
11357 curr_depth >= user_max_depth) {
11358 /*
11359 * We hit a leaf map or we reached the maximum depth
11360 * we could, so stop looking. Keep the current map
11361 * locked.
11362 */
11363 break;
11364 }
11365
11366 /*
11367 * Get down to the next submap level.
11368 */
11369
11370 /*
11371 * Lock the next level and unlock the current level,
11372 * unless we need to keep it locked to access the "next_entry"
11373 * later.
11374 */
11375 if (not_in_kdp) {
11376 vm_map_lock_read(VME_SUBMAP(curr_entry));
11377 }
11378 if (curr_map == next_map) {
11379 /* keep "next_map" locked in case we need it */
11380 } else {
11381 /* release this map */
11382 if (not_in_kdp)
11383 vm_map_unlock_read(curr_map);
11384 }
11385
11386 /*
11387 * Adjust the offset. "curr_entry" maps the submap
11388 * at relative address "curr_entry->vme_start" in the
11389 * curr_map but skips the first "VME_OFFSET(curr_entry)"
11390 * bytes of the submap.
11391 * "curr_offset" always represents the offset of a virtual
11392 * address in the curr_map relative to the absolute address
11393 * space (i.e. the top-level VM map).
11394 */
11395 curr_offset +=
11396 (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11397 curr_address = user_address + curr_offset;
11398 /* switch to the submap */
11399 curr_map = VME_SUBMAP(curr_entry);
11400 curr_depth++;
11401 curr_entry = NULL;
11402 }
11403
11404 if (curr_entry == NULL) {
11405 /* no VM region contains the address... */
11406 if (next_entry == NULL) {
11407 /* ... and no VM region follows it either */
11408 return KERN_INVALID_ADDRESS;
11409 }
11410 /* ... gather info about the next VM region */
11411 curr_entry = next_entry;
11412 curr_map = next_map; /* still locked ... */
11413 curr_address = next_address;
11414 curr_skip = next_skip;
11415 curr_offset = next_offset;
11416 curr_depth = next_depth;
11417 curr_max_above = next_max_above;
11418 curr_max_below = next_max_below;
11419 } else {
11420 /* we won't need "next_entry" after all */
11421 if (next_entry != NULL) {
11422 /* release "next_map" */
11423 if (next_map != curr_map && not_in_kdp) {
11424 vm_map_unlock_read(next_map);
11425 }
11426 }
11427 }
11428 next_entry = NULL;
11429 next_map = NULL;
11430 next_offset = 0;
11431 next_skip = 0;
11432 next_depth = 0;
11433 next_max_below = -1;
11434 next_max_above = -1;
11435
11436 if (curr_entry->is_sub_map &&
11437 curr_depth < user_max_depth) {
11438 /*
11439 * We're not as deep as we could be: we must have
11440 * gone back up after not finding anything mapped
11441 * below the original top-level map entry's.
11442 * Let's move "curr_address" forward and recurse again.
11443 */
11444 user_address = curr_address;
11445 goto recurse_again;
11446 }
11447
11448 *nesting_depth = curr_depth;
11449 *size = curr_max_above + curr_max_below;
11450 *address = user_address + curr_skip - curr_max_below;
11451
11452 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11453 // so probably should be a real 32b ID vs. ptr.
11454 // Current users just check for equality
11455 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11456
11457 if (look_for_pages) {
11458 submap_info->user_tag = VME_ALIAS(curr_entry);
11459 submap_info->offset = VME_OFFSET(curr_entry);
11460 submap_info->protection = curr_entry->protection;
11461 submap_info->inheritance = curr_entry->inheritance;
11462 submap_info->max_protection = curr_entry->max_protection;
11463 submap_info->behavior = curr_entry->behavior;
11464 submap_info->user_wired_count = curr_entry->user_wired_count;
11465 submap_info->is_submap = curr_entry->is_sub_map;
11466 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11467 } else {
11468 short_info->user_tag = VME_ALIAS(curr_entry);
11469 short_info->offset = VME_OFFSET(curr_entry);
11470 short_info->protection = curr_entry->protection;
11471 short_info->inheritance = curr_entry->inheritance;
11472 short_info->max_protection = curr_entry->max_protection;
11473 short_info->behavior = curr_entry->behavior;
11474 short_info->user_wired_count = curr_entry->user_wired_count;
11475 short_info->is_submap = curr_entry->is_sub_map;
11476 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11477 }
11478
11479 extended.pages_resident = 0;
11480 extended.pages_swapped_out = 0;
11481 extended.pages_shared_now_private = 0;
11482 extended.pages_dirtied = 0;
11483 extended.pages_reusable = 0;
11484 extended.external_pager = 0;
11485 extended.shadow_depth = 0;
11486 extended.share_mode = SM_EMPTY;
11487 extended.ref_count = 0;
11488
11489 if (not_in_kdp) {
11490 if (!curr_entry->is_sub_map) {
11491 vm_map_offset_t range_start, range_end;
11492 range_start = MAX((curr_address - curr_max_below),
11493 curr_entry->vme_start);
11494 range_end = MIN((curr_address + curr_max_above),
11495 curr_entry->vme_end);
11496 vm_map_region_walk(curr_map,
11497 range_start,
11498 curr_entry,
11499 (VME_OFFSET(curr_entry) +
11500 (range_start -
11501 curr_entry->vme_start)),
11502 range_end - range_start,
11503 &extended,
11504 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11505 if (extended.external_pager &&
11506 extended.ref_count == 2 &&
11507 extended.share_mode == SM_SHARED) {
11508 extended.share_mode = SM_PRIVATE;
11509 }
11510 } else {
11511 if (curr_entry->use_pmap) {
11512 extended.share_mode = SM_TRUESHARED;
11513 } else {
11514 extended.share_mode = SM_PRIVATE;
11515 }
11516 extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11517 }
11518 }
11519
11520 if (look_for_pages) {
11521 submap_info->pages_resident = extended.pages_resident;
11522 submap_info->pages_swapped_out = extended.pages_swapped_out;
11523 submap_info->pages_shared_now_private =
11524 extended.pages_shared_now_private;
11525 submap_info->pages_dirtied = extended.pages_dirtied;
11526 submap_info->external_pager = extended.external_pager;
11527 submap_info->shadow_depth = extended.shadow_depth;
11528 submap_info->share_mode = extended.share_mode;
11529 submap_info->ref_count = extended.ref_count;
11530
11531 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11532 submap_info->pages_reusable = extended.pages_reusable;
11533 }
11534 } else {
11535 short_info->external_pager = extended.external_pager;
11536 short_info->shadow_depth = extended.shadow_depth;
11537 short_info->share_mode = extended.share_mode;
11538 short_info->ref_count = extended.ref_count;
11539 }
11540
11541 if (not_in_kdp) {
11542 vm_map_unlock_read(curr_map);
11543 }
11544
11545 return KERN_SUCCESS;
11546 }
11547
11548 /*
11549 * vm_region:
11550 *
11551 * User call to obtain information about a region in
11552 * a task's address map. Currently, only one flavor is
11553 * supported.
11554 *
11555 * XXX The reserved and behavior fields cannot be filled
11556 * in until the vm merge from the IK is completed, and
11557 * vm_reserve is implemented.
11558 */
11559
11560 kern_return_t
11561 vm_map_region(
11562 vm_map_t map,
11563 vm_map_offset_t *address, /* IN/OUT */
11564 vm_map_size_t *size, /* OUT */
11565 vm_region_flavor_t flavor, /* IN */
11566 vm_region_info_t info, /* OUT */
11567 mach_msg_type_number_t *count, /* IN/OUT */
11568 mach_port_t *object_name) /* OUT */
11569 {
11570 vm_map_entry_t tmp_entry;
11571 vm_map_entry_t entry;
11572 vm_map_offset_t start;
11573
11574 if (map == VM_MAP_NULL)
11575 return(KERN_INVALID_ARGUMENT);
11576
11577 switch (flavor) {
11578
11579 case VM_REGION_BASIC_INFO:
11580 /* legacy for old 32-bit objects info */
11581 {
11582 vm_region_basic_info_t basic;
11583
11584 if (*count < VM_REGION_BASIC_INFO_COUNT)
11585 return(KERN_INVALID_ARGUMENT);
11586
11587 basic = (vm_region_basic_info_t) info;
11588 *count = VM_REGION_BASIC_INFO_COUNT;
11589
11590 vm_map_lock_read(map);
11591
11592 start = *address;
11593 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11594 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11595 vm_map_unlock_read(map);
11596 return(KERN_INVALID_ADDRESS);
11597 }
11598 } else {
11599 entry = tmp_entry;
11600 }
11601
11602 start = entry->vme_start;
11603
11604 basic->offset = (uint32_t)VME_OFFSET(entry);
11605 basic->protection = entry->protection;
11606 basic->inheritance = entry->inheritance;
11607 basic->max_protection = entry->max_protection;
11608 basic->behavior = entry->behavior;
11609 basic->user_wired_count = entry->user_wired_count;
11610 basic->reserved = entry->is_sub_map;
11611 *address = start;
11612 *size = (entry->vme_end - start);
11613
11614 if (object_name) *object_name = IP_NULL;
11615 if (entry->is_sub_map) {
11616 basic->shared = FALSE;
11617 } else {
11618 basic->shared = entry->is_shared;
11619 }
11620
11621 vm_map_unlock_read(map);
11622 return(KERN_SUCCESS);
11623 }
11624
11625 case VM_REGION_BASIC_INFO_64:
11626 {
11627 vm_region_basic_info_64_t basic;
11628
11629 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11630 return(KERN_INVALID_ARGUMENT);
11631
11632 basic = (vm_region_basic_info_64_t) info;
11633 *count = VM_REGION_BASIC_INFO_COUNT_64;
11634
11635 vm_map_lock_read(map);
11636
11637 start = *address;
11638 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11639 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11640 vm_map_unlock_read(map);
11641 return(KERN_INVALID_ADDRESS);
11642 }
11643 } else {
11644 entry = tmp_entry;
11645 }
11646
11647 start = entry->vme_start;
11648
11649 basic->offset = VME_OFFSET(entry);
11650 basic->protection = entry->protection;
11651 basic->inheritance = entry->inheritance;
11652 basic->max_protection = entry->max_protection;
11653 basic->behavior = entry->behavior;
11654 basic->user_wired_count = entry->user_wired_count;
11655 basic->reserved = entry->is_sub_map;
11656 *address = start;
11657 *size = (entry->vme_end - start);
11658
11659 if (object_name) *object_name = IP_NULL;
11660 if (entry->is_sub_map) {
11661 basic->shared = FALSE;
11662 } else {
11663 basic->shared = entry->is_shared;
11664 }
11665
11666 vm_map_unlock_read(map);
11667 return(KERN_SUCCESS);
11668 }
11669 case VM_REGION_EXTENDED_INFO:
11670 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11671 return(KERN_INVALID_ARGUMENT);
11672 /*fallthru*/
11673 case VM_REGION_EXTENDED_INFO__legacy:
11674 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11675 return KERN_INVALID_ARGUMENT;
11676
11677 {
11678 vm_region_extended_info_t extended;
11679 mach_msg_type_number_t original_count;
11680
11681 extended = (vm_region_extended_info_t) info;
11682
11683 vm_map_lock_read(map);
11684
11685 start = *address;
11686 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11687 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11688 vm_map_unlock_read(map);
11689 return(KERN_INVALID_ADDRESS);
11690 }
11691 } else {
11692 entry = tmp_entry;
11693 }
11694 start = entry->vme_start;
11695
11696 extended->protection = entry->protection;
11697 extended->user_tag = VME_ALIAS(entry);
11698 extended->pages_resident = 0;
11699 extended->pages_swapped_out = 0;
11700 extended->pages_shared_now_private = 0;
11701 extended->pages_dirtied = 0;
11702 extended->external_pager = 0;
11703 extended->shadow_depth = 0;
11704
11705 original_count = *count;
11706 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11707 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11708 } else {
11709 extended->pages_reusable = 0;
11710 *count = VM_REGION_EXTENDED_INFO_COUNT;
11711 }
11712
11713 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
11714
11715 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11716 extended->share_mode = SM_PRIVATE;
11717
11718 if (object_name)
11719 *object_name = IP_NULL;
11720 *address = start;
11721 *size = (entry->vme_end - start);
11722
11723 vm_map_unlock_read(map);
11724 return(KERN_SUCCESS);
11725 }
11726 case VM_REGION_TOP_INFO:
11727 {
11728 vm_region_top_info_t top;
11729
11730 if (*count < VM_REGION_TOP_INFO_COUNT)
11731 return(KERN_INVALID_ARGUMENT);
11732
11733 top = (vm_region_top_info_t) info;
11734 *count = VM_REGION_TOP_INFO_COUNT;
11735
11736 vm_map_lock_read(map);
11737
11738 start = *address;
11739 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11740 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11741 vm_map_unlock_read(map);
11742 return(KERN_INVALID_ADDRESS);
11743 }
11744 } else {
11745 entry = tmp_entry;
11746
11747 }
11748 start = entry->vme_start;
11749
11750 top->private_pages_resident = 0;
11751 top->shared_pages_resident = 0;
11752
11753 vm_map_region_top_walk(entry, top);
11754
11755 if (object_name)
11756 *object_name = IP_NULL;
11757 *address = start;
11758 *size = (entry->vme_end - start);
11759
11760 vm_map_unlock_read(map);
11761 return(KERN_SUCCESS);
11762 }
11763 default:
11764 return(KERN_INVALID_ARGUMENT);
11765 }
11766 }
11767
11768 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
11769 MIN((entry_size), \
11770 ((obj)->all_reusable ? \
11771 (obj)->wired_page_count : \
11772 (obj)->resident_page_count - (obj)->reusable_page_count))
11773
11774 void
11775 vm_map_region_top_walk(
11776 vm_map_entry_t entry,
11777 vm_region_top_info_t top)
11778 {
11779
11780 if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
11781 top->share_mode = SM_EMPTY;
11782 top->ref_count = 0;
11783 top->obj_id = 0;
11784 return;
11785 }
11786
11787 {
11788 struct vm_object *obj, *tmp_obj;
11789 int ref_count;
11790 uint32_t entry_size;
11791
11792 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11793
11794 obj = VME_OBJECT(entry);
11795
11796 vm_object_lock(obj);
11797
11798 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11799 ref_count--;
11800
11801 assert(obj->reusable_page_count <= obj->resident_page_count);
11802 if (obj->shadow) {
11803 if (ref_count == 1)
11804 top->private_pages_resident =
11805 OBJ_RESIDENT_COUNT(obj, entry_size);
11806 else
11807 top->shared_pages_resident =
11808 OBJ_RESIDENT_COUNT(obj, entry_size);
11809 top->ref_count = ref_count;
11810 top->share_mode = SM_COW;
11811
11812 while ((tmp_obj = obj->shadow)) {
11813 vm_object_lock(tmp_obj);
11814 vm_object_unlock(obj);
11815 obj = tmp_obj;
11816
11817 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11818 ref_count--;
11819
11820 assert(obj->reusable_page_count <= obj->resident_page_count);
11821 top->shared_pages_resident +=
11822 OBJ_RESIDENT_COUNT(obj, entry_size);
11823 top->ref_count += ref_count - 1;
11824 }
11825 } else {
11826 if (entry->superpage_size) {
11827 top->share_mode = SM_LARGE_PAGE;
11828 top->shared_pages_resident = 0;
11829 top->private_pages_resident = entry_size;
11830 } else if (entry->needs_copy) {
11831 top->share_mode = SM_COW;
11832 top->shared_pages_resident =
11833 OBJ_RESIDENT_COUNT(obj, entry_size);
11834 } else {
11835 if (ref_count == 1 ||
11836 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11837 top->share_mode = SM_PRIVATE;
11838 top->private_pages_resident =
11839 OBJ_RESIDENT_COUNT(obj,
11840 entry_size);
11841 } else {
11842 top->share_mode = SM_SHARED;
11843 top->shared_pages_resident =
11844 OBJ_RESIDENT_COUNT(obj,
11845 entry_size);
11846 }
11847 }
11848 top->ref_count = ref_count;
11849 }
11850 /* XXX K64: obj_id will be truncated */
11851 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11852
11853 vm_object_unlock(obj);
11854 }
11855 }
11856
11857 void
11858 vm_map_region_walk(
11859 vm_map_t map,
11860 vm_map_offset_t va,
11861 vm_map_entry_t entry,
11862 vm_object_offset_t offset,
11863 vm_object_size_t range,
11864 vm_region_extended_info_t extended,
11865 boolean_t look_for_pages,
11866 mach_msg_type_number_t count)
11867 {
11868 register struct vm_object *obj, *tmp_obj;
11869 register vm_map_offset_t last_offset;
11870 register int i;
11871 register int ref_count;
11872 struct vm_object *shadow_object;
11873 int shadow_depth;
11874
11875 if ((VME_OBJECT(entry) == 0) ||
11876 (entry->is_sub_map) ||
11877 (VME_OBJECT(entry)->phys_contiguous &&
11878 !entry->superpage_size)) {
11879 extended->share_mode = SM_EMPTY;
11880 extended->ref_count = 0;
11881 return;
11882 }
11883
11884 if (entry->superpage_size) {
11885 extended->shadow_depth = 0;
11886 extended->share_mode = SM_LARGE_PAGE;
11887 extended->ref_count = 1;
11888 extended->external_pager = 0;
11889 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11890 extended->shadow_depth = 0;
11891 return;
11892 }
11893
11894 {
11895 obj = VME_OBJECT(entry);
11896
11897 vm_object_lock(obj);
11898
11899 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11900 ref_count--;
11901
11902 if (look_for_pages) {
11903 for (last_offset = offset + range;
11904 offset < last_offset;
11905 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11906 vm_map_region_look_for_page(map, va, obj,
11907 offset, ref_count,
11908 0, extended, count);
11909 }
11910 } else {
11911 shadow_object = obj->shadow;
11912 shadow_depth = 0;
11913
11914 if ( !(obj->pager_trusted) && !(obj->internal))
11915 extended->external_pager = 1;
11916
11917 if (shadow_object != VM_OBJECT_NULL) {
11918 vm_object_lock(shadow_object);
11919 for (;
11920 shadow_object != VM_OBJECT_NULL;
11921 shadow_depth++) {
11922 vm_object_t next_shadow;
11923
11924 if ( !(shadow_object->pager_trusted) &&
11925 !(shadow_object->internal))
11926 extended->external_pager = 1;
11927
11928 next_shadow = shadow_object->shadow;
11929 if (next_shadow) {
11930 vm_object_lock(next_shadow);
11931 }
11932 vm_object_unlock(shadow_object);
11933 shadow_object = next_shadow;
11934 }
11935 }
11936 extended->shadow_depth = shadow_depth;
11937 }
11938
11939 if (extended->shadow_depth || entry->needs_copy)
11940 extended->share_mode = SM_COW;
11941 else {
11942 if (ref_count == 1)
11943 extended->share_mode = SM_PRIVATE;
11944 else {
11945 if (obj->true_share)
11946 extended->share_mode = SM_TRUESHARED;
11947 else
11948 extended->share_mode = SM_SHARED;
11949 }
11950 }
11951 extended->ref_count = ref_count - extended->shadow_depth;
11952
11953 for (i = 0; i < extended->shadow_depth; i++) {
11954 if ((tmp_obj = obj->shadow) == 0)
11955 break;
11956 vm_object_lock(tmp_obj);
11957 vm_object_unlock(obj);
11958
11959 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11960 ref_count--;
11961
11962 extended->ref_count += ref_count;
11963 obj = tmp_obj;
11964 }
11965 vm_object_unlock(obj);
11966
11967 if (extended->share_mode == SM_SHARED) {
11968 register vm_map_entry_t cur;
11969 register vm_map_entry_t last;
11970 int my_refs;
11971
11972 obj = VME_OBJECT(entry);
11973 last = vm_map_to_entry(map);
11974 my_refs = 0;
11975
11976 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11977 ref_count--;
11978 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11979 my_refs += vm_map_region_count_obj_refs(cur, obj);
11980
11981 if (my_refs == ref_count)
11982 extended->share_mode = SM_PRIVATE_ALIASED;
11983 else if (my_refs > 1)
11984 extended->share_mode = SM_SHARED_ALIASED;
11985 }
11986 }
11987 }
11988
11989
11990 /* object is locked on entry and locked on return */
11991
11992
11993 static void
11994 vm_map_region_look_for_page(
11995 __unused vm_map_t map,
11996 __unused vm_map_offset_t va,
11997 vm_object_t object,
11998 vm_object_offset_t offset,
11999 int max_refcnt,
12000 int depth,
12001 vm_region_extended_info_t extended,
12002 mach_msg_type_number_t count)
12003 {
12004 register vm_page_t p;
12005 register vm_object_t shadow;
12006 register int ref_count;
12007 vm_object_t caller_object;
12008 kern_return_t kr;
12009 shadow = object->shadow;
12010 caller_object = object;
12011
12012
12013 while (TRUE) {
12014
12015 if ( !(object->pager_trusted) && !(object->internal))
12016 extended->external_pager = 1;
12017
12018 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12019 if (shadow && (max_refcnt == 1))
12020 extended->pages_shared_now_private++;
12021
12022 if (!p->fictitious &&
12023 (p->dirty || pmap_is_modified(p->phys_page)))
12024 extended->pages_dirtied++;
12025 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12026 if (p->reusable || p->object->all_reusable) {
12027 extended->pages_reusable++;
12028 }
12029 }
12030
12031 extended->pages_resident++;
12032
12033 if(object != caller_object)
12034 vm_object_unlock(object);
12035
12036 return;
12037 }
12038 #if MACH_PAGEMAP
12039 if (object->existence_map) {
12040 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
12041
12042 extended->pages_swapped_out++;
12043
12044 if(object != caller_object)
12045 vm_object_unlock(object);
12046
12047 return;
12048 }
12049 } else
12050 #endif /* MACH_PAGEMAP */
12051 if (object->internal &&
12052 object->alive &&
12053 !object->terminating &&
12054 object->pager_ready) {
12055
12056 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12057 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
12058 offset)
12059 == VM_EXTERNAL_STATE_EXISTS) {
12060 /* the pager has that page */
12061 extended->pages_swapped_out++;
12062 if (object != caller_object)
12063 vm_object_unlock(object);
12064 return;
12065 }
12066 } else {
12067 memory_object_t pager;
12068
12069 vm_object_paging_begin(object);
12070 pager = object->pager;
12071 vm_object_unlock(object);
12072
12073 kr = memory_object_data_request(
12074 pager,
12075 offset + object->paging_offset,
12076 0, /* just poke the pager */
12077 VM_PROT_READ,
12078 NULL);
12079
12080 vm_object_lock(object);
12081 vm_object_paging_end(object);
12082
12083 if (kr == KERN_SUCCESS) {
12084 /* the pager has that page */
12085 extended->pages_swapped_out++;
12086 if (object != caller_object)
12087 vm_object_unlock(object);
12088 return;
12089 }
12090 }
12091 }
12092
12093 if (shadow) {
12094 vm_object_lock(shadow);
12095
12096 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12097 ref_count--;
12098
12099 if (++depth > extended->shadow_depth)
12100 extended->shadow_depth = depth;
12101
12102 if (ref_count > max_refcnt)
12103 max_refcnt = ref_count;
12104
12105 if(object != caller_object)
12106 vm_object_unlock(object);
12107
12108 offset = offset + object->vo_shadow_offset;
12109 object = shadow;
12110 shadow = object->shadow;
12111 continue;
12112 }
12113 if(object != caller_object)
12114 vm_object_unlock(object);
12115 break;
12116 }
12117 }
12118
12119 static int
12120 vm_map_region_count_obj_refs(
12121 vm_map_entry_t entry,
12122 vm_object_t object)
12123 {
12124 register int ref_count;
12125 register vm_object_t chk_obj;
12126 register vm_object_t tmp_obj;
12127
12128 if (VME_OBJECT(entry) == 0)
12129 return(0);
12130
12131 if (entry->is_sub_map)
12132 return(0);
12133 else {
12134 ref_count = 0;
12135
12136 chk_obj = VME_OBJECT(entry);
12137 vm_object_lock(chk_obj);
12138
12139 while (chk_obj) {
12140 if (chk_obj == object)
12141 ref_count++;
12142 tmp_obj = chk_obj->shadow;
12143 if (tmp_obj)
12144 vm_object_lock(tmp_obj);
12145 vm_object_unlock(chk_obj);
12146
12147 chk_obj = tmp_obj;
12148 }
12149 }
12150 return(ref_count);
12151 }
12152
12153
12154 /*
12155 * Routine: vm_map_simplify
12156 *
12157 * Description:
12158 * Attempt to simplify the map representation in
12159 * the vicinity of the given starting address.
12160 * Note:
12161 * This routine is intended primarily to keep the
12162 * kernel maps more compact -- they generally don't
12163 * benefit from the "expand a map entry" technology
12164 * at allocation time because the adjacent entry
12165 * is often wired down.
12166 */
12167 void
12168 vm_map_simplify_entry(
12169 vm_map_t map,
12170 vm_map_entry_t this_entry)
12171 {
12172 vm_map_entry_t prev_entry;
12173
12174 counter(c_vm_map_simplify_entry_called++);
12175
12176 prev_entry = this_entry->vme_prev;
12177
12178 if ((this_entry != vm_map_to_entry(map)) &&
12179 (prev_entry != vm_map_to_entry(map)) &&
12180
12181 (prev_entry->vme_end == this_entry->vme_start) &&
12182
12183 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12184 (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12185 ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12186 prev_entry->vme_start))
12187 == VME_OFFSET(this_entry)) &&
12188
12189 (prev_entry->behavior == this_entry->behavior) &&
12190 (prev_entry->needs_copy == this_entry->needs_copy) &&
12191 (prev_entry->protection == this_entry->protection) &&
12192 (prev_entry->max_protection == this_entry->max_protection) &&
12193 (prev_entry->inheritance == this_entry->inheritance) &&
12194 (prev_entry->use_pmap == this_entry->use_pmap) &&
12195 (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12196 (prev_entry->no_cache == this_entry->no_cache) &&
12197 (prev_entry->permanent == this_entry->permanent) &&
12198 (prev_entry->map_aligned == this_entry->map_aligned) &&
12199 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12200 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12201 /* from_reserved_zone: OK if that field doesn't match */
12202 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12203 (prev_entry->vme_resilient_codesign ==
12204 this_entry->vme_resilient_codesign) &&
12205 (prev_entry->vme_resilient_media ==
12206 this_entry->vme_resilient_media) &&
12207
12208 (prev_entry->wired_count == this_entry->wired_count) &&
12209 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12210
12211 (prev_entry->in_transition == FALSE) &&
12212 (this_entry->in_transition == FALSE) &&
12213 (prev_entry->needs_wakeup == FALSE) &&
12214 (this_entry->needs_wakeup == FALSE) &&
12215 (prev_entry->is_shared == FALSE) &&
12216 (this_entry->is_shared == FALSE) &&
12217 (prev_entry->superpage_size == FALSE) &&
12218 (this_entry->superpage_size == FALSE)
12219 ) {
12220 vm_map_store_entry_unlink(map, prev_entry);
12221 assert(prev_entry->vme_start < this_entry->vme_end);
12222 if (prev_entry->map_aligned)
12223 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12224 VM_MAP_PAGE_MASK(map)));
12225 this_entry->vme_start = prev_entry->vme_start;
12226 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12227
12228 if (map->holelistenabled) {
12229 vm_map_store_update_first_free(map, this_entry, TRUE);
12230 }
12231
12232 if (prev_entry->is_sub_map) {
12233 vm_map_deallocate(VME_SUBMAP(prev_entry));
12234 } else {
12235 vm_object_deallocate(VME_OBJECT(prev_entry));
12236 }
12237 vm_map_entry_dispose(map, prev_entry);
12238 SAVE_HINT_MAP_WRITE(map, this_entry);
12239 counter(c_vm_map_simplified++);
12240 }
12241 }
12242
12243 void
12244 vm_map_simplify(
12245 vm_map_t map,
12246 vm_map_offset_t start)
12247 {
12248 vm_map_entry_t this_entry;
12249
12250 vm_map_lock(map);
12251 if (vm_map_lookup_entry(map, start, &this_entry)) {
12252 vm_map_simplify_entry(map, this_entry);
12253 vm_map_simplify_entry(map, this_entry->vme_next);
12254 }
12255 counter(c_vm_map_simplify_called++);
12256 vm_map_unlock(map);
12257 }
12258
12259 static void
12260 vm_map_simplify_range(
12261 vm_map_t map,
12262 vm_map_offset_t start,
12263 vm_map_offset_t end)
12264 {
12265 vm_map_entry_t entry;
12266
12267 /*
12268 * The map should be locked (for "write") by the caller.
12269 */
12270
12271 if (start >= end) {
12272 /* invalid address range */
12273 return;
12274 }
12275
12276 start = vm_map_trunc_page(start,
12277 VM_MAP_PAGE_MASK(map));
12278 end = vm_map_round_page(end,
12279 VM_MAP_PAGE_MASK(map));
12280
12281 if (!vm_map_lookup_entry(map, start, &entry)) {
12282 /* "start" is not mapped and "entry" ends before "start" */
12283 if (entry == vm_map_to_entry(map)) {
12284 /* start with first entry in the map */
12285 entry = vm_map_first_entry(map);
12286 } else {
12287 /* start with next entry */
12288 entry = entry->vme_next;
12289 }
12290 }
12291
12292 while (entry != vm_map_to_entry(map) &&
12293 entry->vme_start <= end) {
12294 /* try and coalesce "entry" with its previous entry */
12295 vm_map_simplify_entry(map, entry);
12296 entry = entry->vme_next;
12297 }
12298 }
12299
12300
12301 /*
12302 * Routine: vm_map_machine_attribute
12303 * Purpose:
12304 * Provide machine-specific attributes to mappings,
12305 * such as cachability etc. for machines that provide
12306 * them. NUMA architectures and machines with big/strange
12307 * caches will use this.
12308 * Note:
12309 * Responsibilities for locking and checking are handled here,
12310 * everything else in the pmap module. If any non-volatile
12311 * information must be kept, the pmap module should handle
12312 * it itself. [This assumes that attributes do not
12313 * need to be inherited, which seems ok to me]
12314 */
12315 kern_return_t
12316 vm_map_machine_attribute(
12317 vm_map_t map,
12318 vm_map_offset_t start,
12319 vm_map_offset_t end,
12320 vm_machine_attribute_t attribute,
12321 vm_machine_attribute_val_t* value) /* IN/OUT */
12322 {
12323 kern_return_t ret;
12324 vm_map_size_t sync_size;
12325 vm_map_entry_t entry;
12326
12327 if (start < vm_map_min(map) || end > vm_map_max(map))
12328 return KERN_INVALID_ADDRESS;
12329
12330 /* Figure how much memory we need to flush (in page increments) */
12331 sync_size = end - start;
12332
12333 vm_map_lock(map);
12334
12335 if (attribute != MATTR_CACHE) {
12336 /* If we don't have to find physical addresses, we */
12337 /* don't have to do an explicit traversal here. */
12338 ret = pmap_attribute(map->pmap, start, end-start,
12339 attribute, value);
12340 vm_map_unlock(map);
12341 return ret;
12342 }
12343
12344 ret = KERN_SUCCESS; /* Assume it all worked */
12345
12346 while(sync_size) {
12347 if (vm_map_lookup_entry(map, start, &entry)) {
12348 vm_map_size_t sub_size;
12349 if((entry->vme_end - start) > sync_size) {
12350 sub_size = sync_size;
12351 sync_size = 0;
12352 } else {
12353 sub_size = entry->vme_end - start;
12354 sync_size -= sub_size;
12355 }
12356 if(entry->is_sub_map) {
12357 vm_map_offset_t sub_start;
12358 vm_map_offset_t sub_end;
12359
12360 sub_start = (start - entry->vme_start)
12361 + VME_OFFSET(entry);
12362 sub_end = sub_start + sub_size;
12363 vm_map_machine_attribute(
12364 VME_SUBMAP(entry),
12365 sub_start,
12366 sub_end,
12367 attribute, value);
12368 } else {
12369 if (VME_OBJECT(entry)) {
12370 vm_page_t m;
12371 vm_object_t object;
12372 vm_object_t base_object;
12373 vm_object_t last_object;
12374 vm_object_offset_t offset;
12375 vm_object_offset_t base_offset;
12376 vm_map_size_t range;
12377 range = sub_size;
12378 offset = (start - entry->vme_start)
12379 + VME_OFFSET(entry);
12380 base_offset = offset;
12381 object = VME_OBJECT(entry);
12382 base_object = object;
12383 last_object = NULL;
12384
12385 vm_object_lock(object);
12386
12387 while (range) {
12388 m = vm_page_lookup(
12389 object, offset);
12390
12391 if (m && !m->fictitious) {
12392 ret =
12393 pmap_attribute_cache_sync(
12394 m->phys_page,
12395 PAGE_SIZE,
12396 attribute, value);
12397
12398 } else if (object->shadow) {
12399 offset = offset + object->vo_shadow_offset;
12400 last_object = object;
12401 object = object->shadow;
12402 vm_object_lock(last_object->shadow);
12403 vm_object_unlock(last_object);
12404 continue;
12405 }
12406 range -= PAGE_SIZE;
12407
12408 if (base_object != object) {
12409 vm_object_unlock(object);
12410 vm_object_lock(base_object);
12411 object = base_object;
12412 }
12413 /* Bump to the next page */
12414 base_offset += PAGE_SIZE;
12415 offset = base_offset;
12416 }
12417 vm_object_unlock(object);
12418 }
12419 }
12420 start += sub_size;
12421 } else {
12422 vm_map_unlock(map);
12423 return KERN_FAILURE;
12424 }
12425
12426 }
12427
12428 vm_map_unlock(map);
12429
12430 return ret;
12431 }
12432
12433 /*
12434 * vm_map_behavior_set:
12435 *
12436 * Sets the paging reference behavior of the specified address
12437 * range in the target map. Paging reference behavior affects
12438 * how pagein operations resulting from faults on the map will be
12439 * clustered.
12440 */
12441 kern_return_t
12442 vm_map_behavior_set(
12443 vm_map_t map,
12444 vm_map_offset_t start,
12445 vm_map_offset_t end,
12446 vm_behavior_t new_behavior)
12447 {
12448 register vm_map_entry_t entry;
12449 vm_map_entry_t temp_entry;
12450
12451 XPR(XPR_VM_MAP,
12452 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12453 map, start, end, new_behavior, 0);
12454
12455 if (start > end ||
12456 start < vm_map_min(map) ||
12457 end > vm_map_max(map)) {
12458 return KERN_NO_SPACE;
12459 }
12460
12461 switch (new_behavior) {
12462
12463 /*
12464 * This first block of behaviors all set a persistent state on the specified
12465 * memory range. All we have to do here is to record the desired behavior
12466 * in the vm_map_entry_t's.
12467 */
12468
12469 case VM_BEHAVIOR_DEFAULT:
12470 case VM_BEHAVIOR_RANDOM:
12471 case VM_BEHAVIOR_SEQUENTIAL:
12472 case VM_BEHAVIOR_RSEQNTL:
12473 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12474 vm_map_lock(map);
12475
12476 /*
12477 * The entire address range must be valid for the map.
12478 * Note that vm_map_range_check() does a
12479 * vm_map_lookup_entry() internally and returns the
12480 * entry containing the start of the address range if
12481 * the entire range is valid.
12482 */
12483 if (vm_map_range_check(map, start, end, &temp_entry)) {
12484 entry = temp_entry;
12485 vm_map_clip_start(map, entry, start);
12486 }
12487 else {
12488 vm_map_unlock(map);
12489 return(KERN_INVALID_ADDRESS);
12490 }
12491
12492 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12493 vm_map_clip_end(map, entry, end);
12494 if (entry->is_sub_map) {
12495 assert(!entry->use_pmap);
12496 }
12497
12498 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12499 entry->zero_wired_pages = TRUE;
12500 } else {
12501 entry->behavior = new_behavior;
12502 }
12503 entry = entry->vme_next;
12504 }
12505
12506 vm_map_unlock(map);
12507 break;
12508
12509 /*
12510 * The rest of these are different from the above in that they cause
12511 * an immediate action to take place as opposed to setting a behavior that
12512 * affects future actions.
12513 */
12514
12515 case VM_BEHAVIOR_WILLNEED:
12516 return vm_map_willneed(map, start, end);
12517
12518 case VM_BEHAVIOR_DONTNEED:
12519 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12520
12521 case VM_BEHAVIOR_FREE:
12522 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12523
12524 case VM_BEHAVIOR_REUSABLE:
12525 return vm_map_reusable_pages(map, start, end);
12526
12527 case VM_BEHAVIOR_REUSE:
12528 return vm_map_reuse_pages(map, start, end);
12529
12530 case VM_BEHAVIOR_CAN_REUSE:
12531 return vm_map_can_reuse(map, start, end);
12532
12533 #if MACH_ASSERT
12534 case VM_BEHAVIOR_PAGEOUT:
12535 return vm_map_pageout(map, start, end);
12536 #endif /* MACH_ASSERT */
12537
12538 default:
12539 return(KERN_INVALID_ARGUMENT);
12540 }
12541
12542 return(KERN_SUCCESS);
12543 }
12544
12545
12546 /*
12547 * Internals for madvise(MADV_WILLNEED) system call.
12548 *
12549 * The present implementation is to do a read-ahead if the mapping corresponds
12550 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
12551 * and basically ignore the "advice" (which we are always free to do).
12552 */
12553
12554
12555 static kern_return_t
12556 vm_map_willneed(
12557 vm_map_t map,
12558 vm_map_offset_t start,
12559 vm_map_offset_t end
12560 )
12561 {
12562 vm_map_entry_t entry;
12563 vm_object_t object;
12564 memory_object_t pager;
12565 struct vm_object_fault_info fault_info;
12566 kern_return_t kr;
12567 vm_object_size_t len;
12568 vm_object_offset_t offset;
12569
12570 /*
12571 * Fill in static values in fault_info. Several fields get ignored by the code
12572 * we call, but we'll fill them in anyway since uninitialized fields are bad
12573 * when it comes to future backwards compatibility.
12574 */
12575
12576 fault_info.interruptible = THREAD_UNINT; /* ignored value */
12577 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
12578 fault_info.no_cache = FALSE; /* ignored value */
12579 fault_info.stealth = TRUE;
12580 fault_info.io_sync = FALSE;
12581 fault_info.cs_bypass = FALSE;
12582 fault_info.mark_zf_absent = FALSE;
12583 fault_info.batch_pmap_op = FALSE;
12584
12585 /*
12586 * The MADV_WILLNEED operation doesn't require any changes to the
12587 * vm_map_entry_t's, so the read lock is sufficient.
12588 */
12589
12590 vm_map_lock_read(map);
12591
12592 /*
12593 * The madvise semantics require that the address range be fully
12594 * allocated with no holes. Otherwise, we're required to return
12595 * an error.
12596 */
12597
12598 if (! vm_map_range_check(map, start, end, &entry)) {
12599 vm_map_unlock_read(map);
12600 return KERN_INVALID_ADDRESS;
12601 }
12602
12603 /*
12604 * Examine each vm_map_entry_t in the range.
12605 */
12606 for (; entry != vm_map_to_entry(map) && start < end; ) {
12607
12608 /*
12609 * The first time through, the start address could be anywhere
12610 * within the vm_map_entry we found. So adjust the offset to
12611 * correspond. After that, the offset will always be zero to
12612 * correspond to the beginning of the current vm_map_entry.
12613 */
12614 offset = (start - entry->vme_start) + VME_OFFSET(entry);
12615
12616 /*
12617 * Set the length so we don't go beyond the end of the
12618 * map_entry or beyond the end of the range we were given.
12619 * This range could span also multiple map entries all of which
12620 * map different files, so make sure we only do the right amount
12621 * of I/O for each object. Note that it's possible for there
12622 * to be multiple map entries all referring to the same object
12623 * but with different page permissions, but it's not worth
12624 * trying to optimize that case.
12625 */
12626 len = MIN(entry->vme_end - start, end - start);
12627
12628 if ((vm_size_t) len != len) {
12629 /* 32-bit overflow */
12630 len = (vm_size_t) (0 - PAGE_SIZE);
12631 }
12632 fault_info.cluster_size = (vm_size_t) len;
12633 fault_info.lo_offset = offset;
12634 fault_info.hi_offset = offset + len;
12635 fault_info.user_tag = VME_ALIAS(entry);
12636 fault_info.pmap_options = 0;
12637 if (entry->iokit_acct ||
12638 (!entry->is_sub_map && !entry->use_pmap)) {
12639 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12640 }
12641
12642 /*
12643 * If there's no read permission to this mapping, then just
12644 * skip it.
12645 */
12646 if ((entry->protection & VM_PROT_READ) == 0) {
12647 entry = entry->vme_next;
12648 start = entry->vme_start;
12649 continue;
12650 }
12651
12652 /*
12653 * Find the file object backing this map entry. If there is
12654 * none, then we simply ignore the "will need" advice for this
12655 * entry and go on to the next one.
12656 */
12657 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12658 entry = entry->vme_next;
12659 start = entry->vme_start;
12660 continue;
12661 }
12662
12663 /*
12664 * The data_request() could take a long time, so let's
12665 * release the map lock to avoid blocking other threads.
12666 */
12667 vm_map_unlock_read(map);
12668
12669 vm_object_paging_begin(object);
12670 pager = object->pager;
12671 vm_object_unlock(object);
12672
12673 /*
12674 * Get the data from the object asynchronously.
12675 *
12676 * Note that memory_object_data_request() places limits on the
12677 * amount of I/O it will do. Regardless of the len we
12678 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12679 * silently truncates the len to that size. This isn't
12680 * necessarily bad since madvise shouldn't really be used to
12681 * page in unlimited amounts of data. Other Unix variants
12682 * limit the willneed case as well. If this turns out to be an
12683 * issue for developers, then we can always adjust the policy
12684 * here and still be backwards compatible since this is all
12685 * just "advice".
12686 */
12687 kr = memory_object_data_request(
12688 pager,
12689 offset + object->paging_offset,
12690 0, /* ignored */
12691 VM_PROT_READ,
12692 (memory_object_fault_info_t)&fault_info);
12693
12694 vm_object_lock(object);
12695 vm_object_paging_end(object);
12696 vm_object_unlock(object);
12697
12698 /*
12699 * If we couldn't do the I/O for some reason, just give up on
12700 * the madvise. We still return success to the user since
12701 * madvise isn't supposed to fail when the advice can't be
12702 * taken.
12703 */
12704 if (kr != KERN_SUCCESS) {
12705 return KERN_SUCCESS;
12706 }
12707
12708 start += len;
12709 if (start >= end) {
12710 /* done */
12711 return KERN_SUCCESS;
12712 }
12713
12714 /* look up next entry */
12715 vm_map_lock_read(map);
12716 if (! vm_map_lookup_entry(map, start, &entry)) {
12717 /*
12718 * There's a new hole in the address range.
12719 */
12720 vm_map_unlock_read(map);
12721 return KERN_INVALID_ADDRESS;
12722 }
12723 }
12724
12725 vm_map_unlock_read(map);
12726 return KERN_SUCCESS;
12727 }
12728
12729 static boolean_t
12730 vm_map_entry_is_reusable(
12731 vm_map_entry_t entry)
12732 {
12733 /* Only user map entries */
12734
12735 vm_object_t object;
12736
12737 if (entry->is_sub_map) {
12738 return FALSE;
12739 }
12740
12741 switch (VME_ALIAS(entry)) {
12742 case VM_MEMORY_MALLOC:
12743 case VM_MEMORY_MALLOC_SMALL:
12744 case VM_MEMORY_MALLOC_LARGE:
12745 case VM_MEMORY_REALLOC:
12746 case VM_MEMORY_MALLOC_TINY:
12747 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12748 case VM_MEMORY_MALLOC_LARGE_REUSED:
12749 /*
12750 * This is a malloc() memory region: check if it's still
12751 * in its original state and can be re-used for more
12752 * malloc() allocations.
12753 */
12754 break;
12755 default:
12756 /*
12757 * Not a malloc() memory region: let the caller decide if
12758 * it's re-usable.
12759 */
12760 return TRUE;
12761 }
12762
12763 if (entry->is_shared ||
12764 entry->is_sub_map ||
12765 entry->in_transition ||
12766 entry->protection != VM_PROT_DEFAULT ||
12767 entry->max_protection != VM_PROT_ALL ||
12768 entry->inheritance != VM_INHERIT_DEFAULT ||
12769 entry->no_cache ||
12770 entry->permanent ||
12771 entry->superpage_size != FALSE ||
12772 entry->zero_wired_pages ||
12773 entry->wired_count != 0 ||
12774 entry->user_wired_count != 0) {
12775 return FALSE;
12776 }
12777
12778 object = VME_OBJECT(entry);
12779 if (object == VM_OBJECT_NULL) {
12780 return TRUE;
12781 }
12782 if (
12783 #if 0
12784 /*
12785 * Let's proceed even if the VM object is potentially
12786 * shared.
12787 * We check for this later when processing the actual
12788 * VM pages, so the contents will be safe if shared.
12789 *
12790 * But we can still mark this memory region as "reusable" to
12791 * acknowledge that the caller did let us know that the memory
12792 * could be re-used and should not be penalized for holding
12793 * on to it. This allows its "resident size" to not include
12794 * the reusable range.
12795 */
12796 object->ref_count == 1 &&
12797 #endif
12798 object->wired_page_count == 0 &&
12799 object->copy == VM_OBJECT_NULL &&
12800 object->shadow == VM_OBJECT_NULL &&
12801 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12802 object->internal &&
12803 !object->true_share &&
12804 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12805 !object->code_signed) {
12806 return TRUE;
12807 }
12808 return FALSE;
12809
12810
12811 }
12812
12813 static kern_return_t
12814 vm_map_reuse_pages(
12815 vm_map_t map,
12816 vm_map_offset_t start,
12817 vm_map_offset_t end)
12818 {
12819 vm_map_entry_t entry;
12820 vm_object_t object;
12821 vm_object_offset_t start_offset, end_offset;
12822
12823 /*
12824 * The MADV_REUSE operation doesn't require any changes to the
12825 * vm_map_entry_t's, so the read lock is sufficient.
12826 */
12827
12828 vm_map_lock_read(map);
12829 assert(map->pmap != kernel_pmap); /* protect alias access */
12830
12831 /*
12832 * The madvise semantics require that the address range be fully
12833 * allocated with no holes. Otherwise, we're required to return
12834 * an error.
12835 */
12836
12837 if (!vm_map_range_check(map, start, end, &entry)) {
12838 vm_map_unlock_read(map);
12839 vm_page_stats_reusable.reuse_pages_failure++;
12840 return KERN_INVALID_ADDRESS;
12841 }
12842
12843 /*
12844 * Examine each vm_map_entry_t in the range.
12845 */
12846 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12847 entry = entry->vme_next) {
12848 /*
12849 * Sanity check on the VM map entry.
12850 */
12851 if (! vm_map_entry_is_reusable(entry)) {
12852 vm_map_unlock_read(map);
12853 vm_page_stats_reusable.reuse_pages_failure++;
12854 return KERN_INVALID_ADDRESS;
12855 }
12856
12857 /*
12858 * The first time through, the start address could be anywhere
12859 * within the vm_map_entry we found. So adjust the offset to
12860 * correspond.
12861 */
12862 if (entry->vme_start < start) {
12863 start_offset = start - entry->vme_start;
12864 } else {
12865 start_offset = 0;
12866 }
12867 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12868 start_offset += VME_OFFSET(entry);
12869 end_offset += VME_OFFSET(entry);
12870
12871 assert(!entry->is_sub_map);
12872 object = VME_OBJECT(entry);
12873 if (object != VM_OBJECT_NULL) {
12874 vm_object_lock(object);
12875 vm_object_reuse_pages(object, start_offset, end_offset,
12876 TRUE);
12877 vm_object_unlock(object);
12878 }
12879
12880 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12881 /*
12882 * XXX
12883 * We do not hold the VM map exclusively here.
12884 * The "alias" field is not that critical, so it's
12885 * safe to update it here, as long as it is the only
12886 * one that can be modified while holding the VM map
12887 * "shared".
12888 */
12889 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
12890 }
12891 }
12892
12893 vm_map_unlock_read(map);
12894 vm_page_stats_reusable.reuse_pages_success++;
12895 return KERN_SUCCESS;
12896 }
12897
12898
12899 static kern_return_t
12900 vm_map_reusable_pages(
12901 vm_map_t map,
12902 vm_map_offset_t start,
12903 vm_map_offset_t end)
12904 {
12905 vm_map_entry_t entry;
12906 vm_object_t object;
12907 vm_object_offset_t start_offset, end_offset;
12908 vm_map_offset_t pmap_offset;
12909
12910 /*
12911 * The MADV_REUSABLE operation doesn't require any changes to the
12912 * vm_map_entry_t's, so the read lock is sufficient.
12913 */
12914
12915 vm_map_lock_read(map);
12916 assert(map->pmap != kernel_pmap); /* protect alias access */
12917
12918 /*
12919 * The madvise semantics require that the address range be fully
12920 * allocated with no holes. Otherwise, we're required to return
12921 * an error.
12922 */
12923
12924 if (!vm_map_range_check(map, start, end, &entry)) {
12925 vm_map_unlock_read(map);
12926 vm_page_stats_reusable.reusable_pages_failure++;
12927 return KERN_INVALID_ADDRESS;
12928 }
12929
12930 /*
12931 * Examine each vm_map_entry_t in the range.
12932 */
12933 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12934 entry = entry->vme_next) {
12935 int kill_pages = 0;
12936
12937 /*
12938 * Sanity check on the VM map entry.
12939 */
12940 if (! vm_map_entry_is_reusable(entry)) {
12941 vm_map_unlock_read(map);
12942 vm_page_stats_reusable.reusable_pages_failure++;
12943 return KERN_INVALID_ADDRESS;
12944 }
12945
12946 /*
12947 * The first time through, the start address could be anywhere
12948 * within the vm_map_entry we found. So adjust the offset to
12949 * correspond.
12950 */
12951 if (entry->vme_start < start) {
12952 start_offset = start - entry->vme_start;
12953 pmap_offset = start;
12954 } else {
12955 start_offset = 0;
12956 pmap_offset = entry->vme_start;
12957 }
12958 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12959 start_offset += VME_OFFSET(entry);
12960 end_offset += VME_OFFSET(entry);
12961
12962 assert(!entry->is_sub_map);
12963 object = VME_OBJECT(entry);
12964 if (object == VM_OBJECT_NULL)
12965 continue;
12966
12967
12968 vm_object_lock(object);
12969 if (object->ref_count == 1 &&
12970 !object->shadow &&
12971 /*
12972 * "iokit_acct" entries are billed for their virtual size
12973 * (rather than for their resident pages only), so they
12974 * wouldn't benefit from making pages reusable, and it
12975 * would be hard to keep track of pages that are both
12976 * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12977 */
12978 !(entry->iokit_acct ||
12979 (!entry->is_sub_map && !entry->use_pmap)))
12980 kill_pages = 1;
12981 else
12982 kill_pages = -1;
12983 if (kill_pages != -1) {
12984 vm_object_deactivate_pages(object,
12985 start_offset,
12986 end_offset - start_offset,
12987 kill_pages,
12988 TRUE /*reusable_pages*/,
12989 map->pmap,
12990 pmap_offset);
12991 } else {
12992 vm_page_stats_reusable.reusable_pages_shared++;
12993 }
12994 vm_object_unlock(object);
12995
12996 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
12997 VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
12998 /*
12999 * XXX
13000 * We do not hold the VM map exclusively here.
13001 * The "alias" field is not that critical, so it's
13002 * safe to update it here, as long as it is the only
13003 * one that can be modified while holding the VM map
13004 * "shared".
13005 */
13006 VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13007 }
13008 }
13009
13010 vm_map_unlock_read(map);
13011 vm_page_stats_reusable.reusable_pages_success++;
13012 return KERN_SUCCESS;
13013 }
13014
13015
13016 static kern_return_t
13017 vm_map_can_reuse(
13018 vm_map_t map,
13019 vm_map_offset_t start,
13020 vm_map_offset_t end)
13021 {
13022 vm_map_entry_t entry;
13023
13024 /*
13025 * The MADV_REUSABLE operation doesn't require any changes to the
13026 * vm_map_entry_t's, so the read lock is sufficient.
13027 */
13028
13029 vm_map_lock_read(map);
13030 assert(map->pmap != kernel_pmap); /* protect alias access */
13031
13032 /*
13033 * The madvise semantics require that the address range be fully
13034 * allocated with no holes. Otherwise, we're required to return
13035 * an error.
13036 */
13037
13038 if (!vm_map_range_check(map, start, end, &entry)) {
13039 vm_map_unlock_read(map);
13040 vm_page_stats_reusable.can_reuse_failure++;
13041 return KERN_INVALID_ADDRESS;
13042 }
13043
13044 /*
13045 * Examine each vm_map_entry_t in the range.
13046 */
13047 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13048 entry = entry->vme_next) {
13049 /*
13050 * Sanity check on the VM map entry.
13051 */
13052 if (! vm_map_entry_is_reusable(entry)) {
13053 vm_map_unlock_read(map);
13054 vm_page_stats_reusable.can_reuse_failure++;
13055 return KERN_INVALID_ADDRESS;
13056 }
13057 }
13058
13059 vm_map_unlock_read(map);
13060 vm_page_stats_reusable.can_reuse_success++;
13061 return KERN_SUCCESS;
13062 }
13063
13064
13065 #if MACH_ASSERT
13066 static kern_return_t
13067 vm_map_pageout(
13068 vm_map_t map,
13069 vm_map_offset_t start,
13070 vm_map_offset_t end)
13071 {
13072 vm_map_entry_t entry;
13073
13074 /*
13075 * The MADV_PAGEOUT operation doesn't require any changes to the
13076 * vm_map_entry_t's, so the read lock is sufficient.
13077 */
13078
13079 vm_map_lock_read(map);
13080
13081 /*
13082 * The madvise semantics require that the address range be fully
13083 * allocated with no holes. Otherwise, we're required to return
13084 * an error.
13085 */
13086
13087 if (!vm_map_range_check(map, start, end, &entry)) {
13088 vm_map_unlock_read(map);
13089 return KERN_INVALID_ADDRESS;
13090 }
13091
13092 /*
13093 * Examine each vm_map_entry_t in the range.
13094 */
13095 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13096 entry = entry->vme_next) {
13097 vm_object_t object;
13098
13099 /*
13100 * Sanity check on the VM map entry.
13101 */
13102 if (entry->is_sub_map) {
13103 vm_map_t submap;
13104 vm_map_offset_t submap_start;
13105 vm_map_offset_t submap_end;
13106 vm_map_entry_t submap_entry;
13107
13108 submap = VME_SUBMAP(entry);
13109 submap_start = VME_OFFSET(entry);
13110 submap_end = submap_start + (entry->vme_end -
13111 entry->vme_start);
13112
13113 vm_map_lock_read(submap);
13114
13115 if (! vm_map_range_check(submap,
13116 submap_start,
13117 submap_end,
13118 &submap_entry)) {
13119 vm_map_unlock_read(submap);
13120 vm_map_unlock_read(map);
13121 return KERN_INVALID_ADDRESS;
13122 }
13123
13124 object = VME_OBJECT(submap_entry);
13125 if (submap_entry->is_sub_map ||
13126 object == VM_OBJECT_NULL ||
13127 !object->internal) {
13128 vm_map_unlock_read(submap);
13129 continue;
13130 }
13131
13132 vm_object_pageout(object);
13133
13134 vm_map_unlock_read(submap);
13135 submap = VM_MAP_NULL;
13136 submap_entry = VM_MAP_ENTRY_NULL;
13137 continue;
13138 }
13139
13140 object = VME_OBJECT(entry);
13141 if (entry->is_sub_map ||
13142 object == VM_OBJECT_NULL ||
13143 !object->internal) {
13144 continue;
13145 }
13146
13147 vm_object_pageout(object);
13148 }
13149
13150 vm_map_unlock_read(map);
13151 return KERN_SUCCESS;
13152 }
13153 #endif /* MACH_ASSERT */
13154
13155
13156 /*
13157 * Routine: vm_map_entry_insert
13158 *
13159 * Descritpion: This routine inserts a new vm_entry in a locked map.
13160 */
13161 vm_map_entry_t
13162 vm_map_entry_insert(
13163 vm_map_t map,
13164 vm_map_entry_t insp_entry,
13165 vm_map_offset_t start,
13166 vm_map_offset_t end,
13167 vm_object_t object,
13168 vm_object_offset_t offset,
13169 boolean_t needs_copy,
13170 boolean_t is_shared,
13171 boolean_t in_transition,
13172 vm_prot_t cur_protection,
13173 vm_prot_t max_protection,
13174 vm_behavior_t behavior,
13175 vm_inherit_t inheritance,
13176 unsigned wired_count,
13177 boolean_t no_cache,
13178 boolean_t permanent,
13179 unsigned int superpage_size,
13180 boolean_t clear_map_aligned,
13181 boolean_t is_submap)
13182 {
13183 vm_map_entry_t new_entry;
13184
13185 assert(insp_entry != (vm_map_entry_t)0);
13186
13187 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13188
13189 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13190 new_entry->map_aligned = TRUE;
13191 } else {
13192 new_entry->map_aligned = FALSE;
13193 }
13194 if (clear_map_aligned &&
13195 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13196 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13197 new_entry->map_aligned = FALSE;
13198 }
13199
13200 new_entry->vme_start = start;
13201 new_entry->vme_end = end;
13202 assert(page_aligned(new_entry->vme_start));
13203 assert(page_aligned(new_entry->vme_end));
13204 if (new_entry->map_aligned) {
13205 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13206 VM_MAP_PAGE_MASK(map)));
13207 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13208 VM_MAP_PAGE_MASK(map)));
13209 }
13210 assert(new_entry->vme_start < new_entry->vme_end);
13211
13212 VME_OBJECT_SET(new_entry, object);
13213 VME_OFFSET_SET(new_entry, offset);
13214 new_entry->is_shared = is_shared;
13215 new_entry->is_sub_map = is_submap;
13216 new_entry->needs_copy = needs_copy;
13217 new_entry->in_transition = in_transition;
13218 new_entry->needs_wakeup = FALSE;
13219 new_entry->inheritance = inheritance;
13220 new_entry->protection = cur_protection;
13221 new_entry->max_protection = max_protection;
13222 new_entry->behavior = behavior;
13223 new_entry->wired_count = wired_count;
13224 new_entry->user_wired_count = 0;
13225 if (is_submap) {
13226 /*
13227 * submap: "use_pmap" means "nested".
13228 * default: false.
13229 */
13230 new_entry->use_pmap = FALSE;
13231 } else {
13232 /*
13233 * object: "use_pmap" means "use pmap accounting" for footprint.
13234 * default: true.
13235 */
13236 new_entry->use_pmap = TRUE;
13237 }
13238 VME_ALIAS_SET(new_entry, 0);
13239 new_entry->zero_wired_pages = FALSE;
13240 new_entry->no_cache = no_cache;
13241 new_entry->permanent = permanent;
13242 if (superpage_size)
13243 new_entry->superpage_size = TRUE;
13244 else
13245 new_entry->superpage_size = FALSE;
13246 new_entry->used_for_jit = FALSE;
13247 new_entry->iokit_acct = FALSE;
13248 new_entry->vme_resilient_codesign = FALSE;
13249 new_entry->vme_resilient_media = FALSE;
13250
13251 /*
13252 * Insert the new entry into the list.
13253 */
13254
13255 vm_map_store_entry_link(map, insp_entry, new_entry);
13256 map->size += end - start;
13257
13258 /*
13259 * Update the free space hint and the lookup hint.
13260 */
13261
13262 SAVE_HINT_MAP_WRITE(map, new_entry);
13263 return new_entry;
13264 }
13265
13266 /*
13267 * Routine: vm_map_remap_extract
13268 *
13269 * Descritpion: This routine returns a vm_entry list from a map.
13270 */
13271 static kern_return_t
13272 vm_map_remap_extract(
13273 vm_map_t map,
13274 vm_map_offset_t addr,
13275 vm_map_size_t size,
13276 boolean_t copy,
13277 struct vm_map_header *map_header,
13278 vm_prot_t *cur_protection,
13279 vm_prot_t *max_protection,
13280 /* What, no behavior? */
13281 vm_inherit_t inheritance,
13282 boolean_t pageable)
13283 {
13284 kern_return_t result;
13285 vm_map_size_t mapped_size;
13286 vm_map_size_t tmp_size;
13287 vm_map_entry_t src_entry; /* result of last map lookup */
13288 vm_map_entry_t new_entry;
13289 vm_object_offset_t offset;
13290 vm_map_offset_t map_address;
13291 vm_map_offset_t src_start; /* start of entry to map */
13292 vm_map_offset_t src_end; /* end of region to be mapped */
13293 vm_object_t object;
13294 vm_map_version_t version;
13295 boolean_t src_needs_copy;
13296 boolean_t new_entry_needs_copy;
13297
13298 assert(map != VM_MAP_NULL);
13299 assert(size != 0);
13300 assert(size == vm_map_round_page(size, PAGE_MASK));
13301 assert(inheritance == VM_INHERIT_NONE ||
13302 inheritance == VM_INHERIT_COPY ||
13303 inheritance == VM_INHERIT_SHARE);
13304
13305 /*
13306 * Compute start and end of region.
13307 */
13308 src_start = vm_map_trunc_page(addr, PAGE_MASK);
13309 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13310
13311
13312 /*
13313 * Initialize map_header.
13314 */
13315 map_header->links.next = (struct vm_map_entry *)&map_header->links;
13316 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13317 map_header->nentries = 0;
13318 map_header->entries_pageable = pageable;
13319 map_header->page_shift = PAGE_SHIFT;
13320
13321 vm_map_store_init( map_header );
13322
13323 *cur_protection = VM_PROT_ALL;
13324 *max_protection = VM_PROT_ALL;
13325
13326 map_address = 0;
13327 mapped_size = 0;
13328 result = KERN_SUCCESS;
13329
13330 /*
13331 * The specified source virtual space might correspond to
13332 * multiple map entries, need to loop on them.
13333 */
13334 vm_map_lock(map);
13335 while (mapped_size != size) {
13336 vm_map_size_t entry_size;
13337
13338 /*
13339 * Find the beginning of the region.
13340 */
13341 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13342 result = KERN_INVALID_ADDRESS;
13343 break;
13344 }
13345
13346 if (src_start < src_entry->vme_start ||
13347 (mapped_size && src_start != src_entry->vme_start)) {
13348 result = KERN_INVALID_ADDRESS;
13349 break;
13350 }
13351
13352 tmp_size = size - mapped_size;
13353 if (src_end > src_entry->vme_end)
13354 tmp_size -= (src_end - src_entry->vme_end);
13355
13356 entry_size = (vm_map_size_t)(src_entry->vme_end -
13357 src_entry->vme_start);
13358
13359 if(src_entry->is_sub_map) {
13360 vm_map_reference(VME_SUBMAP(src_entry));
13361 object = VM_OBJECT_NULL;
13362 } else {
13363 object = VME_OBJECT(src_entry);
13364 if (src_entry->iokit_acct) {
13365 /*
13366 * This entry uses "IOKit accounting".
13367 */
13368 } else if (object != VM_OBJECT_NULL &&
13369 object->purgable != VM_PURGABLE_DENY) {
13370 /*
13371 * Purgeable objects have their own accounting:
13372 * no pmap accounting for them.
13373 */
13374 assert(!src_entry->use_pmap);
13375 } else {
13376 /*
13377 * Not IOKit or purgeable:
13378 * must be accounted by pmap stats.
13379 */
13380 assert(src_entry->use_pmap);
13381 }
13382
13383 if (object == VM_OBJECT_NULL) {
13384 object = vm_object_allocate(entry_size);
13385 VME_OFFSET_SET(src_entry, 0);
13386 VME_OBJECT_SET(src_entry, object);
13387 } else if (object->copy_strategy !=
13388 MEMORY_OBJECT_COPY_SYMMETRIC) {
13389 /*
13390 * We are already using an asymmetric
13391 * copy, and therefore we already have
13392 * the right object.
13393 */
13394 assert(!src_entry->needs_copy);
13395 } else if (src_entry->needs_copy || object->shadowed ||
13396 (object->internal && !object->true_share &&
13397 !src_entry->is_shared &&
13398 object->vo_size > entry_size)) {
13399
13400 VME_OBJECT_SHADOW(src_entry, entry_size);
13401
13402 if (!src_entry->needs_copy &&
13403 (src_entry->protection & VM_PROT_WRITE)) {
13404 vm_prot_t prot;
13405
13406 prot = src_entry->protection & ~VM_PROT_WRITE;
13407
13408 if (override_nx(map,
13409 VME_ALIAS(src_entry))
13410 && prot)
13411 prot |= VM_PROT_EXECUTE;
13412
13413 if(map->mapped_in_other_pmaps) {
13414 vm_object_pmap_protect(
13415 VME_OBJECT(src_entry),
13416 VME_OFFSET(src_entry),
13417 entry_size,
13418 PMAP_NULL,
13419 src_entry->vme_start,
13420 prot);
13421 } else {
13422 pmap_protect(vm_map_pmap(map),
13423 src_entry->vme_start,
13424 src_entry->vme_end,
13425 prot);
13426 }
13427 }
13428
13429 object = VME_OBJECT(src_entry);
13430 src_entry->needs_copy = FALSE;
13431 }
13432
13433
13434 vm_object_lock(object);
13435 vm_object_reference_locked(object); /* object ref. for new entry */
13436 if (object->copy_strategy ==
13437 MEMORY_OBJECT_COPY_SYMMETRIC) {
13438 object->copy_strategy =
13439 MEMORY_OBJECT_COPY_DELAY;
13440 }
13441 vm_object_unlock(object);
13442 }
13443
13444 offset = (VME_OFFSET(src_entry) +
13445 (src_start - src_entry->vme_start));
13446
13447 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13448 vm_map_entry_copy(new_entry, src_entry);
13449 if (new_entry->is_sub_map) {
13450 /* clr address space specifics */
13451 new_entry->use_pmap = FALSE;
13452 }
13453
13454 new_entry->map_aligned = FALSE;
13455
13456 new_entry->vme_start = map_address;
13457 new_entry->vme_end = map_address + tmp_size;
13458 assert(new_entry->vme_start < new_entry->vme_end);
13459 new_entry->inheritance = inheritance;
13460 VME_OFFSET_SET(new_entry, offset);
13461
13462 /*
13463 * The new region has to be copied now if required.
13464 */
13465 RestartCopy:
13466 if (!copy) {
13467 /*
13468 * Cannot allow an entry describing a JIT
13469 * region to be shared across address spaces.
13470 */
13471 if (src_entry->used_for_jit == TRUE) {
13472 result = KERN_INVALID_ARGUMENT;
13473 break;
13474 }
13475 src_entry->is_shared = TRUE;
13476 new_entry->is_shared = TRUE;
13477 if (!(new_entry->is_sub_map))
13478 new_entry->needs_copy = FALSE;
13479
13480 } else if (src_entry->is_sub_map) {
13481 /* make this a COW sub_map if not already */
13482 assert(new_entry->wired_count == 0);
13483 new_entry->needs_copy = TRUE;
13484 object = VM_OBJECT_NULL;
13485 } else if (src_entry->wired_count == 0 &&
13486 vm_object_copy_quickly(&VME_OBJECT(new_entry),
13487 VME_OFFSET(new_entry),
13488 (new_entry->vme_end -
13489 new_entry->vme_start),
13490 &src_needs_copy,
13491 &new_entry_needs_copy)) {
13492
13493 new_entry->needs_copy = new_entry_needs_copy;
13494 new_entry->is_shared = FALSE;
13495
13496 /*
13497 * Handle copy_on_write semantics.
13498 */
13499 if (src_needs_copy && !src_entry->needs_copy) {
13500 vm_prot_t prot;
13501
13502 prot = src_entry->protection & ~VM_PROT_WRITE;
13503
13504 if (override_nx(map,
13505 VME_ALIAS(src_entry))
13506 && prot)
13507 prot |= VM_PROT_EXECUTE;
13508
13509 vm_object_pmap_protect(object,
13510 offset,
13511 entry_size,
13512 ((src_entry->is_shared
13513 || map->mapped_in_other_pmaps) ?
13514 PMAP_NULL : map->pmap),
13515 src_entry->vme_start,
13516 prot);
13517
13518 assert(src_entry->wired_count == 0);
13519 src_entry->needs_copy = TRUE;
13520 }
13521 /*
13522 * Throw away the old object reference of the new entry.
13523 */
13524 vm_object_deallocate(object);
13525
13526 } else {
13527 new_entry->is_shared = FALSE;
13528
13529 /*
13530 * The map can be safely unlocked since we
13531 * already hold a reference on the object.
13532 *
13533 * Record the timestamp of the map for later
13534 * verification, and unlock the map.
13535 */
13536 version.main_timestamp = map->timestamp;
13537 vm_map_unlock(map); /* Increments timestamp once! */
13538
13539 /*
13540 * Perform the copy.
13541 */
13542 if (src_entry->wired_count > 0) {
13543 vm_object_lock(object);
13544 result = vm_object_copy_slowly(
13545 object,
13546 offset,
13547 entry_size,
13548 THREAD_UNINT,
13549 &VME_OBJECT(new_entry));
13550
13551 VME_OFFSET_SET(new_entry, 0);
13552 new_entry->needs_copy = FALSE;
13553 } else {
13554 vm_object_offset_t new_offset;
13555
13556 new_offset = VME_OFFSET(new_entry);
13557 result = vm_object_copy_strategically(
13558 object,
13559 offset,
13560 entry_size,
13561 &VME_OBJECT(new_entry),
13562 &new_offset,
13563 &new_entry_needs_copy);
13564 if (new_offset != VME_OFFSET(new_entry)) {
13565 VME_OFFSET_SET(new_entry, new_offset);
13566 }
13567
13568 new_entry->needs_copy = new_entry_needs_copy;
13569 }
13570
13571 /*
13572 * Throw away the old object reference of the new entry.
13573 */
13574 vm_object_deallocate(object);
13575
13576 if (result != KERN_SUCCESS &&
13577 result != KERN_MEMORY_RESTART_COPY) {
13578 _vm_map_entry_dispose(map_header, new_entry);
13579 break;
13580 }
13581
13582 /*
13583 * Verify that the map has not substantially
13584 * changed while the copy was being made.
13585 */
13586
13587 vm_map_lock(map);
13588 if (version.main_timestamp + 1 != map->timestamp) {
13589 /*
13590 * Simple version comparison failed.
13591 *
13592 * Retry the lookup and verify that the
13593 * same object/offset are still present.
13594 */
13595 vm_object_deallocate(VME_OBJECT(new_entry));
13596 _vm_map_entry_dispose(map_header, new_entry);
13597 if (result == KERN_MEMORY_RESTART_COPY)
13598 result = KERN_SUCCESS;
13599 continue;
13600 }
13601
13602 if (result == KERN_MEMORY_RESTART_COPY) {
13603 vm_object_reference(object);
13604 goto RestartCopy;
13605 }
13606 }
13607
13608 _vm_map_store_entry_link(map_header,
13609 map_header->links.prev, new_entry);
13610
13611 /*Protections for submap mapping are irrelevant here*/
13612 if( !src_entry->is_sub_map ) {
13613 *cur_protection &= src_entry->protection;
13614 *max_protection &= src_entry->max_protection;
13615 }
13616 map_address += tmp_size;
13617 mapped_size += tmp_size;
13618 src_start += tmp_size;
13619
13620 } /* end while */
13621
13622 vm_map_unlock(map);
13623 if (result != KERN_SUCCESS) {
13624 /*
13625 * Free all allocated elements.
13626 */
13627 for (src_entry = map_header->links.next;
13628 src_entry != (struct vm_map_entry *)&map_header->links;
13629 src_entry = new_entry) {
13630 new_entry = src_entry->vme_next;
13631 _vm_map_store_entry_unlink(map_header, src_entry);
13632 if (src_entry->is_sub_map) {
13633 vm_map_deallocate(VME_SUBMAP(src_entry));
13634 } else {
13635 vm_object_deallocate(VME_OBJECT(src_entry));
13636 }
13637 _vm_map_entry_dispose(map_header, src_entry);
13638 }
13639 }
13640 return result;
13641 }
13642
13643 /*
13644 * Routine: vm_remap
13645 *
13646 * Map portion of a task's address space.
13647 * Mapped region must not overlap more than
13648 * one vm memory object. Protections and
13649 * inheritance attributes remain the same
13650 * as in the original task and are out parameters.
13651 * Source and Target task can be identical
13652 * Other attributes are identical as for vm_map()
13653 */
13654 kern_return_t
13655 vm_map_remap(
13656 vm_map_t target_map,
13657 vm_map_address_t *address,
13658 vm_map_size_t size,
13659 vm_map_offset_t mask,
13660 int flags,
13661 vm_map_t src_map,
13662 vm_map_offset_t memory_address,
13663 boolean_t copy,
13664 vm_prot_t *cur_protection,
13665 vm_prot_t *max_protection,
13666 vm_inherit_t inheritance)
13667 {
13668 kern_return_t result;
13669 vm_map_entry_t entry;
13670 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
13671 vm_map_entry_t new_entry;
13672 struct vm_map_header map_header;
13673 vm_map_offset_t offset_in_mapping;
13674
13675 if (target_map == VM_MAP_NULL)
13676 return KERN_INVALID_ARGUMENT;
13677
13678 switch (inheritance) {
13679 case VM_INHERIT_NONE:
13680 case VM_INHERIT_COPY:
13681 case VM_INHERIT_SHARE:
13682 if (size != 0 && src_map != VM_MAP_NULL)
13683 break;
13684 /*FALL THRU*/
13685 default:
13686 return KERN_INVALID_ARGUMENT;
13687 }
13688
13689 /*
13690 * If the user is requesting that we return the address of the
13691 * first byte of the data (rather than the base of the page),
13692 * then we use different rounding semantics: specifically,
13693 * we assume that (memory_address, size) describes a region
13694 * all of whose pages we must cover, rather than a base to be truncated
13695 * down and a size to be added to that base. So we figure out
13696 * the highest page that the requested region includes and make
13697 * sure that the size will cover it.
13698 *
13699 * The key example we're worried about it is of the form:
13700 *
13701 * memory_address = 0x1ff0, size = 0x20
13702 *
13703 * With the old semantics, we round down the memory_address to 0x1000
13704 * and round up the size to 0x1000, resulting in our covering *only*
13705 * page 0x1000. With the new semantics, we'd realize that the region covers
13706 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
13707 * 0x1000 and page 0x2000 in the region we remap.
13708 */
13709 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13710 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
13711 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
13712 } else {
13713 size = vm_map_round_page(size, PAGE_MASK);
13714 }
13715
13716 result = vm_map_remap_extract(src_map, memory_address,
13717 size, copy, &map_header,
13718 cur_protection,
13719 max_protection,
13720 inheritance,
13721 target_map->hdr.entries_pageable);
13722
13723 if (result != KERN_SUCCESS) {
13724 return result;
13725 }
13726
13727 /*
13728 * Allocate/check a range of free virtual address
13729 * space for the target
13730 */
13731 *address = vm_map_trunc_page(*address,
13732 VM_MAP_PAGE_MASK(target_map));
13733 vm_map_lock(target_map);
13734 result = vm_map_remap_range_allocate(target_map, address, size,
13735 mask, flags, &insp_entry);
13736
13737 for (entry = map_header.links.next;
13738 entry != (struct vm_map_entry *)&map_header.links;
13739 entry = new_entry) {
13740 new_entry = entry->vme_next;
13741 _vm_map_store_entry_unlink(&map_header, entry);
13742 if (result == KERN_SUCCESS) {
13743 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13744 /* no codesigning -> read-only access */
13745 assert(!entry->used_for_jit);
13746 entry->max_protection = VM_PROT_READ;
13747 entry->protection = VM_PROT_READ;
13748 entry->vme_resilient_codesign = TRUE;
13749 }
13750 entry->vme_start += *address;
13751 entry->vme_end += *address;
13752 assert(!entry->map_aligned);
13753 vm_map_store_entry_link(target_map, insp_entry, entry);
13754 insp_entry = entry;
13755 } else {
13756 if (!entry->is_sub_map) {
13757 vm_object_deallocate(VME_OBJECT(entry));
13758 } else {
13759 vm_map_deallocate(VME_SUBMAP(entry));
13760 }
13761 _vm_map_entry_dispose(&map_header, entry);
13762 }
13763 }
13764
13765 if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13766 *cur_protection = VM_PROT_READ;
13767 *max_protection = VM_PROT_READ;
13768 }
13769
13770 if( target_map->disable_vmentry_reuse == TRUE) {
13771 if( target_map->highest_entry_end < insp_entry->vme_end ){
13772 target_map->highest_entry_end = insp_entry->vme_end;
13773 }
13774 }
13775
13776 if (result == KERN_SUCCESS) {
13777 target_map->size += size;
13778 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
13779 }
13780 vm_map_unlock(target_map);
13781
13782 if (result == KERN_SUCCESS && target_map->wiring_required)
13783 result = vm_map_wire(target_map, *address,
13784 *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
13785 TRUE);
13786
13787 /*
13788 * If requested, return the address of the data pointed to by the
13789 * request, rather than the base of the resulting page.
13790 */
13791 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13792 *address += offset_in_mapping;
13793 }
13794
13795 return result;
13796 }
13797
13798 /*
13799 * Routine: vm_map_remap_range_allocate
13800 *
13801 * Description:
13802 * Allocate a range in the specified virtual address map.
13803 * returns the address and the map entry just before the allocated
13804 * range
13805 *
13806 * Map must be locked.
13807 */
13808
13809 static kern_return_t
13810 vm_map_remap_range_allocate(
13811 vm_map_t map,
13812 vm_map_address_t *address, /* IN/OUT */
13813 vm_map_size_t size,
13814 vm_map_offset_t mask,
13815 int flags,
13816 vm_map_entry_t *map_entry) /* OUT */
13817 {
13818 vm_map_entry_t entry;
13819 vm_map_offset_t start;
13820 vm_map_offset_t end;
13821 kern_return_t kr;
13822 vm_map_entry_t hole_entry;
13823
13824 StartAgain: ;
13825
13826 start = *address;
13827
13828 if (flags & VM_FLAGS_ANYWHERE)
13829 {
13830 /*
13831 * Calculate the first possible address.
13832 */
13833
13834 if (start < map->min_offset)
13835 start = map->min_offset;
13836 if (start > map->max_offset)
13837 return(KERN_NO_SPACE);
13838
13839 /*
13840 * Look for the first possible address;
13841 * if there's already something at this
13842 * address, we have to start after it.
13843 */
13844
13845 if( map->disable_vmentry_reuse == TRUE) {
13846 VM_MAP_HIGHEST_ENTRY(map, entry, start);
13847 } else {
13848
13849 if (map->holelistenabled) {
13850 hole_entry = (vm_map_entry_t)map->holes_list;
13851
13852 if (hole_entry == NULL) {
13853 /*
13854 * No more space in the map?
13855 */
13856 return(KERN_NO_SPACE);
13857 } else {
13858
13859 boolean_t found_hole = FALSE;
13860
13861 do {
13862 if (hole_entry->vme_start >= start) {
13863 start = hole_entry->vme_start;
13864 found_hole = TRUE;
13865 break;
13866 }
13867
13868 if (hole_entry->vme_end > start) {
13869 found_hole = TRUE;
13870 break;
13871 }
13872 hole_entry = hole_entry->vme_next;
13873
13874 } while (hole_entry != (vm_map_entry_t) map->holes_list);
13875
13876 if (found_hole == FALSE) {
13877 return (KERN_NO_SPACE);
13878 }
13879
13880 entry = hole_entry;
13881 }
13882 } else {
13883 assert(first_free_is_valid(map));
13884 if (start == map->min_offset) {
13885 if ((entry = map->first_free) != vm_map_to_entry(map))
13886 start = entry->vme_end;
13887 } else {
13888 vm_map_entry_t tmp_entry;
13889 if (vm_map_lookup_entry(map, start, &tmp_entry))
13890 start = tmp_entry->vme_end;
13891 entry = tmp_entry;
13892 }
13893 }
13894 start = vm_map_round_page(start,
13895 VM_MAP_PAGE_MASK(map));
13896 }
13897
13898 /*
13899 * In any case, the "entry" always precedes
13900 * the proposed new region throughout the
13901 * loop:
13902 */
13903
13904 while (TRUE) {
13905 register vm_map_entry_t next;
13906
13907 /*
13908 * Find the end of the proposed new region.
13909 * Be sure we didn't go beyond the end, or
13910 * wrap around the address.
13911 */
13912
13913 end = ((start + mask) & ~mask);
13914 end = vm_map_round_page(end,
13915 VM_MAP_PAGE_MASK(map));
13916 if (end < start)
13917 return(KERN_NO_SPACE);
13918 start = end;
13919 end += size;
13920
13921 if ((end > map->max_offset) || (end < start)) {
13922 if (map->wait_for_space) {
13923 if (size <= (map->max_offset -
13924 map->min_offset)) {
13925 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13926 vm_map_unlock(map);
13927 thread_block(THREAD_CONTINUE_NULL);
13928 vm_map_lock(map);
13929 goto StartAgain;
13930 }
13931 }
13932
13933 return(KERN_NO_SPACE);
13934 }
13935
13936 next = entry->vme_next;
13937
13938 if (map->holelistenabled) {
13939 if (entry->vme_end >= end)
13940 break;
13941 } else {
13942 /*
13943 * If there are no more entries, we must win.
13944 *
13945 * OR
13946 *
13947 * If there is another entry, it must be
13948 * after the end of the potential new region.
13949 */
13950
13951 if (next == vm_map_to_entry(map))
13952 break;
13953
13954 if (next->vme_start >= end)
13955 break;
13956 }
13957
13958 /*
13959 * Didn't fit -- move to the next entry.
13960 */
13961
13962 entry = next;
13963
13964 if (map->holelistenabled) {
13965 if (entry == (vm_map_entry_t) map->holes_list) {
13966 /*
13967 * Wrapped around
13968 */
13969 return(KERN_NO_SPACE);
13970 }
13971 start = entry->vme_start;
13972 } else {
13973 start = entry->vme_end;
13974 }
13975 }
13976
13977 if (map->holelistenabled) {
13978
13979 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
13980 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
13981 }
13982 }
13983
13984 *address = start;
13985
13986 } else {
13987 vm_map_entry_t temp_entry;
13988
13989 /*
13990 * Verify that:
13991 * the address doesn't itself violate
13992 * the mask requirement.
13993 */
13994
13995 if ((start & mask) != 0)
13996 return(KERN_NO_SPACE);
13997
13998
13999 /*
14000 * ... the address is within bounds
14001 */
14002
14003 end = start + size;
14004
14005 if ((start < map->min_offset) ||
14006 (end > map->max_offset) ||
14007 (start >= end)) {
14008 return(KERN_INVALID_ADDRESS);
14009 }
14010
14011 /*
14012 * If we're asked to overwrite whatever was mapped in that
14013 * range, first deallocate that range.
14014 */
14015 if (flags & VM_FLAGS_OVERWRITE) {
14016 vm_map_t zap_map;
14017
14018 /*
14019 * We use a "zap_map" to avoid having to unlock
14020 * the "map" in vm_map_delete(), which would compromise
14021 * the atomicity of the "deallocate" and then "remap"
14022 * combination.
14023 */
14024 zap_map = vm_map_create(PMAP_NULL,
14025 start,
14026 end,
14027 map->hdr.entries_pageable);
14028 if (zap_map == VM_MAP_NULL) {
14029 return KERN_RESOURCE_SHORTAGE;
14030 }
14031 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14032 vm_map_disable_hole_optimization(zap_map);
14033
14034 kr = vm_map_delete(map, start, end,
14035 (VM_MAP_REMOVE_SAVE_ENTRIES |
14036 VM_MAP_REMOVE_NO_MAP_ALIGN),
14037 zap_map);
14038 if (kr == KERN_SUCCESS) {
14039 vm_map_destroy(zap_map,
14040 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14041 zap_map = VM_MAP_NULL;
14042 }
14043 }
14044
14045 /*
14046 * ... the starting address isn't allocated
14047 */
14048
14049 if (vm_map_lookup_entry(map, start, &temp_entry))
14050 return(KERN_NO_SPACE);
14051
14052 entry = temp_entry;
14053
14054 /*
14055 * ... the next region doesn't overlap the
14056 * end point.
14057 */
14058
14059 if ((entry->vme_next != vm_map_to_entry(map)) &&
14060 (entry->vme_next->vme_start < end))
14061 return(KERN_NO_SPACE);
14062 }
14063 *map_entry = entry;
14064 return(KERN_SUCCESS);
14065 }
14066
14067 /*
14068 * vm_map_switch:
14069 *
14070 * Set the address map for the current thread to the specified map
14071 */
14072
14073 vm_map_t
14074 vm_map_switch(
14075 vm_map_t map)
14076 {
14077 int mycpu;
14078 thread_t thread = current_thread();
14079 vm_map_t oldmap = thread->map;
14080
14081 mp_disable_preemption();
14082 mycpu = cpu_number();
14083
14084 /*
14085 * Deactivate the current map and activate the requested map
14086 */
14087 PMAP_SWITCH_USER(thread, map, mycpu);
14088
14089 mp_enable_preemption();
14090 return(oldmap);
14091 }
14092
14093
14094 /*
14095 * Routine: vm_map_write_user
14096 *
14097 * Description:
14098 * Copy out data from a kernel space into space in the
14099 * destination map. The space must already exist in the
14100 * destination map.
14101 * NOTE: This routine should only be called by threads
14102 * which can block on a page fault. i.e. kernel mode user
14103 * threads.
14104 *
14105 */
14106 kern_return_t
14107 vm_map_write_user(
14108 vm_map_t map,
14109 void *src_p,
14110 vm_map_address_t dst_addr,
14111 vm_size_t size)
14112 {
14113 kern_return_t kr = KERN_SUCCESS;
14114
14115 if(current_map() == map) {
14116 if (copyout(src_p, dst_addr, size)) {
14117 kr = KERN_INVALID_ADDRESS;
14118 }
14119 } else {
14120 vm_map_t oldmap;
14121
14122 /* take on the identity of the target map while doing */
14123 /* the transfer */
14124
14125 vm_map_reference(map);
14126 oldmap = vm_map_switch(map);
14127 if (copyout(src_p, dst_addr, size)) {
14128 kr = KERN_INVALID_ADDRESS;
14129 }
14130 vm_map_switch(oldmap);
14131 vm_map_deallocate(map);
14132 }
14133 return kr;
14134 }
14135
14136 /*
14137 * Routine: vm_map_read_user
14138 *
14139 * Description:
14140 * Copy in data from a user space source map into the
14141 * kernel map. The space must already exist in the
14142 * kernel map.
14143 * NOTE: This routine should only be called by threads
14144 * which can block on a page fault. i.e. kernel mode user
14145 * threads.
14146 *
14147 */
14148 kern_return_t
14149 vm_map_read_user(
14150 vm_map_t map,
14151 vm_map_address_t src_addr,
14152 void *dst_p,
14153 vm_size_t size)
14154 {
14155 kern_return_t kr = KERN_SUCCESS;
14156
14157 if(current_map() == map) {
14158 if (copyin(src_addr, dst_p, size)) {
14159 kr = KERN_INVALID_ADDRESS;
14160 }
14161 } else {
14162 vm_map_t oldmap;
14163
14164 /* take on the identity of the target map while doing */
14165 /* the transfer */
14166
14167 vm_map_reference(map);
14168 oldmap = vm_map_switch(map);
14169 if (copyin(src_addr, dst_p, size)) {
14170 kr = KERN_INVALID_ADDRESS;
14171 }
14172 vm_map_switch(oldmap);
14173 vm_map_deallocate(map);
14174 }
14175 return kr;
14176 }
14177
14178
14179 /*
14180 * vm_map_check_protection:
14181 *
14182 * Assert that the target map allows the specified
14183 * privilege on the entire address region given.
14184 * The entire region must be allocated.
14185 */
14186 boolean_t
14187 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14188 vm_map_offset_t end, vm_prot_t protection)
14189 {
14190 vm_map_entry_t entry;
14191 vm_map_entry_t tmp_entry;
14192
14193 vm_map_lock(map);
14194
14195 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14196 {
14197 vm_map_unlock(map);
14198 return (FALSE);
14199 }
14200
14201 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14202 vm_map_unlock(map);
14203 return(FALSE);
14204 }
14205
14206 entry = tmp_entry;
14207
14208 while (start < end) {
14209 if (entry == vm_map_to_entry(map)) {
14210 vm_map_unlock(map);
14211 return(FALSE);
14212 }
14213
14214 /*
14215 * No holes allowed!
14216 */
14217
14218 if (start < entry->vme_start) {
14219 vm_map_unlock(map);
14220 return(FALSE);
14221 }
14222
14223 /*
14224 * Check protection associated with entry.
14225 */
14226
14227 if ((entry->protection & protection) != protection) {
14228 vm_map_unlock(map);
14229 return(FALSE);
14230 }
14231
14232 /* go to next entry */
14233
14234 start = entry->vme_end;
14235 entry = entry->vme_next;
14236 }
14237 vm_map_unlock(map);
14238 return(TRUE);
14239 }
14240
14241 kern_return_t
14242 vm_map_purgable_control(
14243 vm_map_t map,
14244 vm_map_offset_t address,
14245 vm_purgable_t control,
14246 int *state)
14247 {
14248 vm_map_entry_t entry;
14249 vm_object_t object;
14250 kern_return_t kr;
14251 boolean_t was_nonvolatile;
14252
14253 /*
14254 * Vet all the input parameters and current type and state of the
14255 * underlaying object. Return with an error if anything is amiss.
14256 */
14257 if (map == VM_MAP_NULL)
14258 return(KERN_INVALID_ARGUMENT);
14259
14260 if (control != VM_PURGABLE_SET_STATE &&
14261 control != VM_PURGABLE_GET_STATE &&
14262 control != VM_PURGABLE_PURGE_ALL)
14263 return(KERN_INVALID_ARGUMENT);
14264
14265 if (control == VM_PURGABLE_PURGE_ALL) {
14266 vm_purgeable_object_purge_all();
14267 return KERN_SUCCESS;
14268 }
14269
14270 if (control == VM_PURGABLE_SET_STATE &&
14271 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14272 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14273 return(KERN_INVALID_ARGUMENT);
14274
14275 vm_map_lock_read(map);
14276
14277 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14278
14279 /*
14280 * Must pass a valid non-submap address.
14281 */
14282 vm_map_unlock_read(map);
14283 return(KERN_INVALID_ADDRESS);
14284 }
14285
14286 if ((entry->protection & VM_PROT_WRITE) == 0) {
14287 /*
14288 * Can't apply purgable controls to something you can't write.
14289 */
14290 vm_map_unlock_read(map);
14291 return(KERN_PROTECTION_FAILURE);
14292 }
14293
14294 object = VME_OBJECT(entry);
14295 if (object == VM_OBJECT_NULL ||
14296 object->purgable == VM_PURGABLE_DENY) {
14297 /*
14298 * Object must already be present and be purgeable.
14299 */
14300 vm_map_unlock_read(map);
14301 return KERN_INVALID_ARGUMENT;
14302 }
14303
14304 vm_object_lock(object);
14305
14306 #if 00
14307 if (VME_OFFSET(entry) != 0 ||
14308 entry->vme_end - entry->vme_start != object->vo_size) {
14309 /*
14310 * Can only apply purgable controls to the whole (existing)
14311 * object at once.
14312 */
14313 vm_map_unlock_read(map);
14314 vm_object_unlock(object);
14315 return KERN_INVALID_ARGUMENT;
14316 }
14317 #endif
14318
14319 assert(!entry->is_sub_map);
14320 assert(!entry->use_pmap); /* purgeable has its own accounting */
14321
14322 vm_map_unlock_read(map);
14323
14324 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14325
14326 kr = vm_object_purgable_control(object, control, state);
14327
14328 if (was_nonvolatile &&
14329 object->purgable != VM_PURGABLE_NONVOLATILE &&
14330 map->pmap == kernel_pmap) {
14331 #if DEBUG
14332 object->vo_purgeable_volatilizer = kernel_task;
14333 #endif /* DEBUG */
14334 }
14335
14336 vm_object_unlock(object);
14337
14338 return kr;
14339 }
14340
14341 kern_return_t
14342 vm_map_page_query_internal(
14343 vm_map_t target_map,
14344 vm_map_offset_t offset,
14345 int *disposition,
14346 int *ref_count)
14347 {
14348 kern_return_t kr;
14349 vm_page_info_basic_data_t info;
14350 mach_msg_type_number_t count;
14351
14352 count = VM_PAGE_INFO_BASIC_COUNT;
14353 kr = vm_map_page_info(target_map,
14354 offset,
14355 VM_PAGE_INFO_BASIC,
14356 (vm_page_info_t) &info,
14357 &count);
14358 if (kr == KERN_SUCCESS) {
14359 *disposition = info.disposition;
14360 *ref_count = info.ref_count;
14361 } else {
14362 *disposition = 0;
14363 *ref_count = 0;
14364 }
14365
14366 return kr;
14367 }
14368
14369 kern_return_t
14370 vm_map_page_info(
14371 vm_map_t map,
14372 vm_map_offset_t offset,
14373 vm_page_info_flavor_t flavor,
14374 vm_page_info_t info,
14375 mach_msg_type_number_t *count)
14376 {
14377 vm_map_entry_t map_entry;
14378 vm_object_t object;
14379 vm_page_t m;
14380 kern_return_t kr;
14381 kern_return_t retval = KERN_SUCCESS;
14382 boolean_t top_object;
14383 int disposition;
14384 int ref_count;
14385 vm_page_info_basic_t basic_info;
14386 int depth;
14387 vm_map_offset_t offset_in_page;
14388
14389 switch (flavor) {
14390 case VM_PAGE_INFO_BASIC:
14391 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14392 /*
14393 * The "vm_page_info_basic_data" structure was not
14394 * properly padded, so allow the size to be off by
14395 * one to maintain backwards binary compatibility...
14396 */
14397 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14398 return KERN_INVALID_ARGUMENT;
14399 }
14400 break;
14401 default:
14402 return KERN_INVALID_ARGUMENT;
14403 }
14404
14405 disposition = 0;
14406 ref_count = 0;
14407 top_object = TRUE;
14408 depth = 0;
14409
14410 retval = KERN_SUCCESS;
14411 offset_in_page = offset & PAGE_MASK;
14412 offset = vm_map_trunc_page(offset, PAGE_MASK);
14413
14414 vm_map_lock_read(map);
14415
14416 /*
14417 * First, find the map entry covering "offset", going down
14418 * submaps if necessary.
14419 */
14420 for (;;) {
14421 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14422 vm_map_unlock_read(map);
14423 return KERN_INVALID_ADDRESS;
14424 }
14425 /* compute offset from this map entry's start */
14426 offset -= map_entry->vme_start;
14427 /* compute offset into this map entry's object (or submap) */
14428 offset += VME_OFFSET(map_entry);
14429
14430 if (map_entry->is_sub_map) {
14431 vm_map_t sub_map;
14432
14433 sub_map = VME_SUBMAP(map_entry);
14434 vm_map_lock_read(sub_map);
14435 vm_map_unlock_read(map);
14436
14437 map = sub_map;
14438
14439 ref_count = MAX(ref_count, map->ref_count);
14440 continue;
14441 }
14442 break;
14443 }
14444
14445 object = VME_OBJECT(map_entry);
14446 if (object == VM_OBJECT_NULL) {
14447 /* no object -> no page */
14448 vm_map_unlock_read(map);
14449 goto done;
14450 }
14451
14452 vm_object_lock(object);
14453 vm_map_unlock_read(map);
14454
14455 /*
14456 * Go down the VM object shadow chain until we find the page
14457 * we're looking for.
14458 */
14459 for (;;) {
14460 ref_count = MAX(ref_count, object->ref_count);
14461
14462 m = vm_page_lookup(object, offset);
14463
14464 if (m != VM_PAGE_NULL) {
14465 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14466 break;
14467 } else {
14468 #if MACH_PAGEMAP
14469 if (object->existence_map) {
14470 if (vm_external_state_get(object->existence_map,
14471 offset) ==
14472 VM_EXTERNAL_STATE_EXISTS) {
14473 /*
14474 * this page has been paged out
14475 */
14476 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14477 break;
14478 }
14479 } else
14480 #endif
14481 if (object->internal &&
14482 object->alive &&
14483 !object->terminating &&
14484 object->pager_ready) {
14485
14486 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14487 if (VM_COMPRESSOR_PAGER_STATE_GET(
14488 object,
14489 offset)
14490 == VM_EXTERNAL_STATE_EXISTS) {
14491 /* the pager has that page */
14492 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14493 break;
14494 }
14495 } else {
14496 memory_object_t pager;
14497
14498 vm_object_paging_begin(object);
14499 pager = object->pager;
14500 vm_object_unlock(object);
14501
14502 /*
14503 * Ask the default pager if
14504 * it has this page.
14505 */
14506 kr = memory_object_data_request(
14507 pager,
14508 offset + object->paging_offset,
14509 0, /* just poke the pager */
14510 VM_PROT_READ,
14511 NULL);
14512
14513 vm_object_lock(object);
14514 vm_object_paging_end(object);
14515
14516 if (kr == KERN_SUCCESS) {
14517 /* the default pager has it */
14518 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14519 break;
14520 }
14521 }
14522 }
14523
14524 if (object->shadow != VM_OBJECT_NULL) {
14525 vm_object_t shadow;
14526
14527 offset += object->vo_shadow_offset;
14528 shadow = object->shadow;
14529
14530 vm_object_lock(shadow);
14531 vm_object_unlock(object);
14532
14533 object = shadow;
14534 top_object = FALSE;
14535 depth++;
14536 } else {
14537 // if (!object->internal)
14538 // break;
14539 // retval = KERN_FAILURE;
14540 // goto done_with_object;
14541 break;
14542 }
14543 }
14544 }
14545 /* The ref_count is not strictly accurate, it measures the number */
14546 /* of entities holding a ref on the object, they may not be mapping */
14547 /* the object or may not be mapping the section holding the */
14548 /* target page but its still a ball park number and though an over- */
14549 /* count, it picks up the copy-on-write cases */
14550
14551 /* We could also get a picture of page sharing from pmap_attributes */
14552 /* but this would under count as only faulted-in mappings would */
14553 /* show up. */
14554
14555 if (top_object == TRUE && object->shadow)
14556 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14557
14558 if (! object->internal)
14559 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14560
14561 if (m == VM_PAGE_NULL)
14562 goto done_with_object;
14563
14564 if (m->fictitious) {
14565 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14566 goto done_with_object;
14567 }
14568 if (m->dirty || pmap_is_modified(m->phys_page))
14569 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14570
14571 if (m->reference || pmap_is_referenced(m->phys_page))
14572 disposition |= VM_PAGE_QUERY_PAGE_REF;
14573
14574 if (m->speculative)
14575 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14576
14577 if (m->cs_validated)
14578 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14579 if (m->cs_tainted)
14580 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14581 if (m->cs_nx)
14582 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14583
14584 done_with_object:
14585 vm_object_unlock(object);
14586 done:
14587
14588 switch (flavor) {
14589 case VM_PAGE_INFO_BASIC:
14590 basic_info = (vm_page_info_basic_t) info;
14591 basic_info->disposition = disposition;
14592 basic_info->ref_count = ref_count;
14593 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14594 VM_KERNEL_ADDRPERM(object);
14595 basic_info->offset =
14596 (memory_object_offset_t) offset + offset_in_page;
14597 basic_info->depth = depth;
14598 break;
14599 }
14600
14601 return retval;
14602 }
14603
14604 /*
14605 * vm_map_msync
14606 *
14607 * Synchronises the memory range specified with its backing store
14608 * image by either flushing or cleaning the contents to the appropriate
14609 * memory manager engaging in a memory object synchronize dialog with
14610 * the manager. The client doesn't return until the manager issues
14611 * m_o_s_completed message. MIG Magically converts user task parameter
14612 * to the task's address map.
14613 *
14614 * interpretation of sync_flags
14615 * VM_SYNC_INVALIDATE - discard pages, only return precious
14616 * pages to manager.
14617 *
14618 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14619 * - discard pages, write dirty or precious
14620 * pages back to memory manager.
14621 *
14622 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14623 * - write dirty or precious pages back to
14624 * the memory manager.
14625 *
14626 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
14627 * is a hole in the region, and we would
14628 * have returned KERN_SUCCESS, return
14629 * KERN_INVALID_ADDRESS instead.
14630 *
14631 * NOTE
14632 * The memory object attributes have not yet been implemented, this
14633 * function will have to deal with the invalidate attribute
14634 *
14635 * RETURNS
14636 * KERN_INVALID_TASK Bad task parameter
14637 * KERN_INVALID_ARGUMENT both sync and async were specified.
14638 * KERN_SUCCESS The usual.
14639 * KERN_INVALID_ADDRESS There was a hole in the region.
14640 */
14641
14642 kern_return_t
14643 vm_map_msync(
14644 vm_map_t map,
14645 vm_map_address_t address,
14646 vm_map_size_t size,
14647 vm_sync_t sync_flags)
14648 {
14649 msync_req_t msr;
14650 msync_req_t new_msr;
14651 queue_chain_t req_q; /* queue of requests for this msync */
14652 vm_map_entry_t entry;
14653 vm_map_size_t amount_left;
14654 vm_object_offset_t offset;
14655 boolean_t do_sync_req;
14656 boolean_t had_hole = FALSE;
14657 memory_object_t pager;
14658 vm_map_offset_t pmap_offset;
14659
14660 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14661 (sync_flags & VM_SYNC_SYNCHRONOUS))
14662 return(KERN_INVALID_ARGUMENT);
14663
14664 /*
14665 * align address and size on page boundaries
14666 */
14667 size = (vm_map_round_page(address + size,
14668 VM_MAP_PAGE_MASK(map)) -
14669 vm_map_trunc_page(address,
14670 VM_MAP_PAGE_MASK(map)));
14671 address = vm_map_trunc_page(address,
14672 VM_MAP_PAGE_MASK(map));
14673
14674 if (map == VM_MAP_NULL)
14675 return(KERN_INVALID_TASK);
14676
14677 if (size == 0)
14678 return(KERN_SUCCESS);
14679
14680 queue_init(&req_q);
14681 amount_left = size;
14682
14683 while (amount_left > 0) {
14684 vm_object_size_t flush_size;
14685 vm_object_t object;
14686
14687 vm_map_lock(map);
14688 if (!vm_map_lookup_entry(map,
14689 address,
14690 &entry)) {
14691
14692 vm_map_size_t skip;
14693
14694 /*
14695 * hole in the address map.
14696 */
14697 had_hole = TRUE;
14698
14699 /*
14700 * Check for empty map.
14701 */
14702 if (entry == vm_map_to_entry(map) &&
14703 entry->vme_next == entry) {
14704 vm_map_unlock(map);
14705 break;
14706 }
14707 /*
14708 * Check that we don't wrap and that
14709 * we have at least one real map entry.
14710 */
14711 if ((map->hdr.nentries == 0) ||
14712 (entry->vme_next->vme_start < address)) {
14713 vm_map_unlock(map);
14714 break;
14715 }
14716 /*
14717 * Move up to the next entry if needed
14718 */
14719 skip = (entry->vme_next->vme_start - address);
14720 if (skip >= amount_left)
14721 amount_left = 0;
14722 else
14723 amount_left -= skip;
14724 address = entry->vme_next->vme_start;
14725 vm_map_unlock(map);
14726 continue;
14727 }
14728
14729 offset = address - entry->vme_start;
14730 pmap_offset = address;
14731
14732 /*
14733 * do we have more to flush than is contained in this
14734 * entry ?
14735 */
14736 if (amount_left + entry->vme_start + offset > entry->vme_end) {
14737 flush_size = entry->vme_end -
14738 (entry->vme_start + offset);
14739 } else {
14740 flush_size = amount_left;
14741 }
14742 amount_left -= flush_size;
14743 address += flush_size;
14744
14745 if (entry->is_sub_map == TRUE) {
14746 vm_map_t local_map;
14747 vm_map_offset_t local_offset;
14748
14749 local_map = VME_SUBMAP(entry);
14750 local_offset = VME_OFFSET(entry);
14751 vm_map_unlock(map);
14752 if (vm_map_msync(
14753 local_map,
14754 local_offset,
14755 flush_size,
14756 sync_flags) == KERN_INVALID_ADDRESS) {
14757 had_hole = TRUE;
14758 }
14759 continue;
14760 }
14761 object = VME_OBJECT(entry);
14762
14763 /*
14764 * We can't sync this object if the object has not been
14765 * created yet
14766 */
14767 if (object == VM_OBJECT_NULL) {
14768 vm_map_unlock(map);
14769 continue;
14770 }
14771 offset += VME_OFFSET(entry);
14772
14773 vm_object_lock(object);
14774
14775 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
14776 int kill_pages = 0;
14777 boolean_t reusable_pages = FALSE;
14778
14779 if (sync_flags & VM_SYNC_KILLPAGES) {
14780 if (object->ref_count == 1 && !object->shadow)
14781 kill_pages = 1;
14782 else
14783 kill_pages = -1;
14784 }
14785 if (kill_pages != -1)
14786 vm_object_deactivate_pages(
14787 object,
14788 offset,
14789 (vm_object_size_t) flush_size,
14790 kill_pages,
14791 reusable_pages,
14792 map->pmap,
14793 pmap_offset);
14794 vm_object_unlock(object);
14795 vm_map_unlock(map);
14796 continue;
14797 }
14798 /*
14799 * We can't sync this object if there isn't a pager.
14800 * Don't bother to sync internal objects, since there can't
14801 * be any "permanent" storage for these objects anyway.
14802 */
14803 if ((object->pager == MEMORY_OBJECT_NULL) ||
14804 (object->internal) || (object->private)) {
14805 vm_object_unlock(object);
14806 vm_map_unlock(map);
14807 continue;
14808 }
14809 /*
14810 * keep reference on the object until syncing is done
14811 */
14812 vm_object_reference_locked(object);
14813 vm_object_unlock(object);
14814
14815 vm_map_unlock(map);
14816
14817 do_sync_req = vm_object_sync(object,
14818 offset,
14819 flush_size,
14820 sync_flags & VM_SYNC_INVALIDATE,
14821 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
14822 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
14823 sync_flags & VM_SYNC_SYNCHRONOUS);
14824 /*
14825 * only send a m_o_s if we returned pages or if the entry
14826 * is writable (ie dirty pages may have already been sent back)
14827 */
14828 if (!do_sync_req) {
14829 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
14830 /*
14831 * clear out the clustering and read-ahead hints
14832 */
14833 vm_object_lock(object);
14834
14835 object->pages_created = 0;
14836 object->pages_used = 0;
14837 object->sequential = 0;
14838 object->last_alloc = 0;
14839
14840 vm_object_unlock(object);
14841 }
14842 vm_object_deallocate(object);
14843 continue;
14844 }
14845 msync_req_alloc(new_msr);
14846
14847 vm_object_lock(object);
14848 offset += object->paging_offset;
14849
14850 new_msr->offset = offset;
14851 new_msr->length = flush_size;
14852 new_msr->object = object;
14853 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
14854 re_iterate:
14855
14856 /*
14857 * We can't sync this object if there isn't a pager. The
14858 * pager can disappear anytime we're not holding the object
14859 * lock. So this has to be checked anytime we goto re_iterate.
14860 */
14861
14862 pager = object->pager;
14863
14864 if (pager == MEMORY_OBJECT_NULL) {
14865 vm_object_unlock(object);
14866 vm_object_deallocate(object);
14867 msync_req_free(new_msr);
14868 new_msr = NULL;
14869 continue;
14870 }
14871
14872 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14873 /*
14874 * need to check for overlapping entry, if found, wait
14875 * on overlapping msr to be done, then reiterate
14876 */
14877 msr_lock(msr);
14878 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14879 ((offset >= msr->offset &&
14880 offset < (msr->offset + msr->length)) ||
14881 (msr->offset >= offset &&
14882 msr->offset < (offset + flush_size))))
14883 {
14884 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14885 msr_unlock(msr);
14886 vm_object_unlock(object);
14887 thread_block(THREAD_CONTINUE_NULL);
14888 vm_object_lock(object);
14889 goto re_iterate;
14890 }
14891 msr_unlock(msr);
14892 }/* queue_iterate */
14893
14894 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14895
14896 vm_object_paging_begin(object);
14897 vm_object_unlock(object);
14898
14899 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14900
14901 (void) memory_object_synchronize(
14902 pager,
14903 offset,
14904 flush_size,
14905 sync_flags & ~VM_SYNC_CONTIGUOUS);
14906
14907 vm_object_lock(object);
14908 vm_object_paging_end(object);
14909 vm_object_unlock(object);
14910 }/* while */
14911
14912 /*
14913 * wait for memory_object_sychronize_completed messages from pager(s)
14914 */
14915
14916 while (!queue_empty(&req_q)) {
14917 msr = (msync_req_t)queue_first(&req_q);
14918 msr_lock(msr);
14919 while(msr->flag != VM_MSYNC_DONE) {
14920 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14921 msr_unlock(msr);
14922 thread_block(THREAD_CONTINUE_NULL);
14923 msr_lock(msr);
14924 }/* while */
14925 queue_remove(&req_q, msr, msync_req_t, req_q);
14926 msr_unlock(msr);
14927 vm_object_deallocate(msr->object);
14928 msync_req_free(msr);
14929 }/* queue_iterate */
14930
14931 /* for proper msync() behaviour */
14932 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14933 return(KERN_INVALID_ADDRESS);
14934
14935 return(KERN_SUCCESS);
14936 }/* vm_msync */
14937
14938 /*
14939 * Routine: convert_port_entry_to_map
14940 * Purpose:
14941 * Convert from a port specifying an entry or a task
14942 * to a map. Doesn't consume the port ref; produces a map ref,
14943 * which may be null. Unlike convert_port_to_map, the
14944 * port may be task or a named entry backed.
14945 * Conditions:
14946 * Nothing locked.
14947 */
14948
14949
14950 vm_map_t
14951 convert_port_entry_to_map(
14952 ipc_port_t port)
14953 {
14954 vm_map_t map;
14955 vm_named_entry_t named_entry;
14956 uint32_t try_failed_count = 0;
14957
14958 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14959 while(TRUE) {
14960 ip_lock(port);
14961 if(ip_active(port) && (ip_kotype(port)
14962 == IKOT_NAMED_ENTRY)) {
14963 named_entry =
14964 (vm_named_entry_t)port->ip_kobject;
14965 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14966 ip_unlock(port);
14967
14968 try_failed_count++;
14969 mutex_pause(try_failed_count);
14970 continue;
14971 }
14972 named_entry->ref_count++;
14973 lck_mtx_unlock(&(named_entry)->Lock);
14974 ip_unlock(port);
14975 if ((named_entry->is_sub_map) &&
14976 (named_entry->protection
14977 & VM_PROT_WRITE)) {
14978 map = named_entry->backing.map;
14979 } else {
14980 mach_destroy_memory_entry(port);
14981 return VM_MAP_NULL;
14982 }
14983 vm_map_reference_swap(map);
14984 mach_destroy_memory_entry(port);
14985 break;
14986 }
14987 else
14988 return VM_MAP_NULL;
14989 }
14990 }
14991 else
14992 map = convert_port_to_map(port);
14993
14994 return map;
14995 }
14996
14997 /*
14998 * Routine: convert_port_entry_to_object
14999 * Purpose:
15000 * Convert from a port specifying a named entry to an
15001 * object. Doesn't consume the port ref; produces a map ref,
15002 * which may be null.
15003 * Conditions:
15004 * Nothing locked.
15005 */
15006
15007
15008 vm_object_t
15009 convert_port_entry_to_object(
15010 ipc_port_t port)
15011 {
15012 vm_object_t object = VM_OBJECT_NULL;
15013 vm_named_entry_t named_entry;
15014 uint32_t try_failed_count = 0;
15015
15016 if (IP_VALID(port) &&
15017 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15018 try_again:
15019 ip_lock(port);
15020 if (ip_active(port) &&
15021 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15022 named_entry = (vm_named_entry_t)port->ip_kobject;
15023 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15024 ip_unlock(port);
15025 try_failed_count++;
15026 mutex_pause(try_failed_count);
15027 goto try_again;
15028 }
15029 named_entry->ref_count++;
15030 lck_mtx_unlock(&(named_entry)->Lock);
15031 ip_unlock(port);
15032 if (!(named_entry->is_sub_map) &&
15033 !(named_entry->is_pager) &&
15034 !(named_entry->is_copy) &&
15035 (named_entry->protection & VM_PROT_WRITE)) {
15036 object = named_entry->backing.object;
15037 vm_object_reference(object);
15038 }
15039 mach_destroy_memory_entry(port);
15040 }
15041 }
15042
15043 return object;
15044 }
15045
15046 /*
15047 * Export routines to other components for the things we access locally through
15048 * macros.
15049 */
15050 #undef current_map
15051 vm_map_t
15052 current_map(void)
15053 {
15054 return (current_map_fast());
15055 }
15056
15057 /*
15058 * vm_map_reference:
15059 *
15060 * Most code internal to the osfmk will go through a
15061 * macro defining this. This is always here for the
15062 * use of other kernel components.
15063 */
15064 #undef vm_map_reference
15065 void
15066 vm_map_reference(
15067 register vm_map_t map)
15068 {
15069 if (map == VM_MAP_NULL)
15070 return;
15071
15072 lck_mtx_lock(&map->s_lock);
15073 #if TASK_SWAPPER
15074 assert(map->res_count > 0);
15075 assert(map->ref_count >= map->res_count);
15076 map->res_count++;
15077 #endif
15078 map->ref_count++;
15079 lck_mtx_unlock(&map->s_lock);
15080 }
15081
15082 /*
15083 * vm_map_deallocate:
15084 *
15085 * Removes a reference from the specified map,
15086 * destroying it if no references remain.
15087 * The map should not be locked.
15088 */
15089 void
15090 vm_map_deallocate(
15091 register vm_map_t map)
15092 {
15093 unsigned int ref;
15094
15095 if (map == VM_MAP_NULL)
15096 return;
15097
15098 lck_mtx_lock(&map->s_lock);
15099 ref = --map->ref_count;
15100 if (ref > 0) {
15101 vm_map_res_deallocate(map);
15102 lck_mtx_unlock(&map->s_lock);
15103 return;
15104 }
15105 assert(map->ref_count == 0);
15106 lck_mtx_unlock(&map->s_lock);
15107
15108 #if TASK_SWAPPER
15109 /*
15110 * The map residence count isn't decremented here because
15111 * the vm_map_delete below will traverse the entire map,
15112 * deleting entries, and the residence counts on objects
15113 * and sharing maps will go away then.
15114 */
15115 #endif
15116
15117 vm_map_destroy(map, VM_MAP_NO_FLAGS);
15118 }
15119
15120
15121 void
15122 vm_map_disable_NX(vm_map_t map)
15123 {
15124 if (map == NULL)
15125 return;
15126 if (map->pmap == NULL)
15127 return;
15128
15129 pmap_disable_NX(map->pmap);
15130 }
15131
15132 void
15133 vm_map_disallow_data_exec(vm_map_t map)
15134 {
15135 if (map == NULL)
15136 return;
15137
15138 map->map_disallow_data_exec = TRUE;
15139 }
15140
15141 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15142 * more descriptive.
15143 */
15144 void
15145 vm_map_set_32bit(vm_map_t map)
15146 {
15147 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15148 }
15149
15150
15151 void
15152 vm_map_set_64bit(vm_map_t map)
15153 {
15154 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15155 }
15156
15157 vm_map_offset_t
15158 vm_compute_max_offset(boolean_t is64)
15159 {
15160 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15161 }
15162
15163 uint64_t
15164 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15165 {
15166 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15167 }
15168
15169 boolean_t
15170 vm_map_is_64bit(
15171 vm_map_t map)
15172 {
15173 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15174 }
15175
15176 boolean_t
15177 vm_map_has_hard_pagezero(
15178 vm_map_t map,
15179 vm_map_offset_t pagezero_size)
15180 {
15181 /*
15182 * XXX FBDP
15183 * We should lock the VM map (for read) here but we can get away
15184 * with it for now because there can't really be any race condition:
15185 * the VM map's min_offset is changed only when the VM map is created
15186 * and when the zero page is established (when the binary gets loaded),
15187 * and this routine gets called only when the task terminates and the
15188 * VM map is being torn down, and when a new map is created via
15189 * load_machfile()/execve().
15190 */
15191 return (map->min_offset >= pagezero_size);
15192 }
15193
15194 /*
15195 * Raise a VM map's maximun offset.
15196 */
15197 kern_return_t
15198 vm_map_raise_max_offset(
15199 vm_map_t map,
15200 vm_map_offset_t new_max_offset)
15201 {
15202 kern_return_t ret;
15203
15204 vm_map_lock(map);
15205 ret = KERN_INVALID_ADDRESS;
15206
15207 if (new_max_offset >= map->max_offset) {
15208 if (!vm_map_is_64bit(map)) {
15209 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15210 map->max_offset = new_max_offset;
15211 ret = KERN_SUCCESS;
15212 }
15213 } else {
15214 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15215 map->max_offset = new_max_offset;
15216 ret = KERN_SUCCESS;
15217 }
15218 }
15219 }
15220
15221 vm_map_unlock(map);
15222 return ret;
15223 }
15224
15225
15226 /*
15227 * Raise a VM map's minimum offset.
15228 * To strictly enforce "page zero" reservation.
15229 */
15230 kern_return_t
15231 vm_map_raise_min_offset(
15232 vm_map_t map,
15233 vm_map_offset_t new_min_offset)
15234 {
15235 vm_map_entry_t first_entry;
15236
15237 new_min_offset = vm_map_round_page(new_min_offset,
15238 VM_MAP_PAGE_MASK(map));
15239
15240 vm_map_lock(map);
15241
15242 if (new_min_offset < map->min_offset) {
15243 /*
15244 * Can't move min_offset backwards, as that would expose
15245 * a part of the address space that was previously, and for
15246 * possibly good reasons, inaccessible.
15247 */
15248 vm_map_unlock(map);
15249 return KERN_INVALID_ADDRESS;
15250 }
15251 if (new_min_offset >= map->max_offset) {
15252 /* can't go beyond the end of the address space */
15253 vm_map_unlock(map);
15254 return KERN_INVALID_ADDRESS;
15255 }
15256
15257 first_entry = vm_map_first_entry(map);
15258 if (first_entry != vm_map_to_entry(map) &&
15259 first_entry->vme_start < new_min_offset) {
15260 /*
15261 * Some memory was already allocated below the new
15262 * minimun offset. It's too late to change it now...
15263 */
15264 vm_map_unlock(map);
15265 return KERN_NO_SPACE;
15266 }
15267
15268 map->min_offset = new_min_offset;
15269
15270 assert(map->holes_list);
15271 map->holes_list->start = new_min_offset;
15272 assert(new_min_offset < map->holes_list->end);
15273
15274 vm_map_unlock(map);
15275
15276 return KERN_SUCCESS;
15277 }
15278
15279 /*
15280 * Set the limit on the maximum amount of user wired memory allowed for this map.
15281 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15282 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
15283 * don't have to reach over to the BSD data structures.
15284 */
15285
15286 void
15287 vm_map_set_user_wire_limit(vm_map_t map,
15288 vm_size_t limit)
15289 {
15290 map->user_wire_limit = limit;
15291 }
15292
15293
15294 void vm_map_switch_protect(vm_map_t map,
15295 boolean_t val)
15296 {
15297 vm_map_lock(map);
15298 map->switch_protect=val;
15299 vm_map_unlock(map);
15300 }
15301
15302 /*
15303 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15304 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15305 * bump both counters.
15306 */
15307 void
15308 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15309 {
15310 pmap_t pmap = vm_map_pmap(map);
15311
15312 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15313 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15314 }
15315
15316 void
15317 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15318 {
15319 pmap_t pmap = vm_map_pmap(map);
15320
15321 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15322 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15323 }
15324
15325 /* Add (generate) code signature for memory range */
15326 #if CONFIG_DYNAMIC_CODE_SIGNING
15327 kern_return_t vm_map_sign(vm_map_t map,
15328 vm_map_offset_t start,
15329 vm_map_offset_t end)
15330 {
15331 vm_map_entry_t entry;
15332 vm_page_t m;
15333 vm_object_t object;
15334
15335 /*
15336 * Vet all the input parameters and current type and state of the
15337 * underlaying object. Return with an error if anything is amiss.
15338 */
15339 if (map == VM_MAP_NULL)
15340 return(KERN_INVALID_ARGUMENT);
15341
15342 vm_map_lock_read(map);
15343
15344 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15345 /*
15346 * Must pass a valid non-submap address.
15347 */
15348 vm_map_unlock_read(map);
15349 return(KERN_INVALID_ADDRESS);
15350 }
15351
15352 if((entry->vme_start > start) || (entry->vme_end < end)) {
15353 /*
15354 * Map entry doesn't cover the requested range. Not handling
15355 * this situation currently.
15356 */
15357 vm_map_unlock_read(map);
15358 return(KERN_INVALID_ARGUMENT);
15359 }
15360
15361 object = VME_OBJECT(entry);
15362 if (object == VM_OBJECT_NULL) {
15363 /*
15364 * Object must already be present or we can't sign.
15365 */
15366 vm_map_unlock_read(map);
15367 return KERN_INVALID_ARGUMENT;
15368 }
15369
15370 vm_object_lock(object);
15371 vm_map_unlock_read(map);
15372
15373 while(start < end) {
15374 uint32_t refmod;
15375
15376 m = vm_page_lookup(object,
15377 start - entry->vme_start + VME_OFFSET(entry));
15378 if (m==VM_PAGE_NULL) {
15379 /* shoud we try to fault a page here? we can probably
15380 * demand it exists and is locked for this request */
15381 vm_object_unlock(object);
15382 return KERN_FAILURE;
15383 }
15384 /* deal with special page status */
15385 if (m->busy ||
15386 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15387 vm_object_unlock(object);
15388 return KERN_FAILURE;
15389 }
15390
15391 /* Page is OK... now "validate" it */
15392 /* This is the place where we'll call out to create a code
15393 * directory, later */
15394 m->cs_validated = TRUE;
15395
15396 /* The page is now "clean" for codesigning purposes. That means
15397 * we don't consider it as modified (wpmapped) anymore. But
15398 * we'll disconnect the page so we note any future modification
15399 * attempts. */
15400 m->wpmapped = FALSE;
15401 refmod = pmap_disconnect(m->phys_page);
15402
15403 /* Pull the dirty status from the pmap, since we cleared the
15404 * wpmapped bit */
15405 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15406 SET_PAGE_DIRTY(m, FALSE);
15407 }
15408
15409 /* On to the next page */
15410 start += PAGE_SIZE;
15411 }
15412 vm_object_unlock(object);
15413
15414 return KERN_SUCCESS;
15415 }
15416 #endif
15417
15418 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15419 {
15420 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
15421 vm_map_entry_t next_entry;
15422 kern_return_t kr = KERN_SUCCESS;
15423 vm_map_t zap_map;
15424
15425 vm_map_lock(map);
15426
15427 /*
15428 * We use a "zap_map" to avoid having to unlock
15429 * the "map" in vm_map_delete().
15430 */
15431 zap_map = vm_map_create(PMAP_NULL,
15432 map->min_offset,
15433 map->max_offset,
15434 map->hdr.entries_pageable);
15435
15436 if (zap_map == VM_MAP_NULL) {
15437 return KERN_RESOURCE_SHORTAGE;
15438 }
15439
15440 vm_map_set_page_shift(zap_map,
15441 VM_MAP_PAGE_SHIFT(map));
15442 vm_map_disable_hole_optimization(zap_map);
15443
15444 for (entry = vm_map_first_entry(map);
15445 entry != vm_map_to_entry(map);
15446 entry = next_entry) {
15447 next_entry = entry->vme_next;
15448
15449 if (VME_OBJECT(entry) &&
15450 !entry->is_sub_map &&
15451 (VME_OBJECT(entry)->internal == TRUE) &&
15452 (VME_OBJECT(entry)->ref_count == 1)) {
15453
15454 *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15455 *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15456
15457 (void)vm_map_delete(map,
15458 entry->vme_start,
15459 entry->vme_end,
15460 VM_MAP_REMOVE_SAVE_ENTRIES,
15461 zap_map);
15462 }
15463 }
15464
15465 vm_map_unlock(map);
15466
15467 /*
15468 * Get rid of the "zap_maps" and all the map entries that
15469 * they may still contain.
15470 */
15471 if (zap_map != VM_MAP_NULL) {
15472 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15473 zap_map = VM_MAP_NULL;
15474 }
15475
15476 return kr;
15477 }
15478
15479 #if CONFIG_FREEZE
15480
15481 kern_return_t vm_map_freeze_walk(
15482 vm_map_t map,
15483 unsigned int *purgeable_count,
15484 unsigned int *wired_count,
15485 unsigned int *clean_count,
15486 unsigned int *dirty_count,
15487 unsigned int dirty_budget,
15488 boolean_t *has_shared)
15489 {
15490 vm_map_entry_t entry;
15491
15492 vm_map_lock_read(map);
15493
15494 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15495 *has_shared = FALSE;
15496
15497 for (entry = vm_map_first_entry(map);
15498 entry != vm_map_to_entry(map);
15499 entry = entry->vme_next) {
15500 unsigned int purgeable, clean, dirty, wired;
15501 boolean_t shared;
15502
15503 if ((VME_OBJECT(entry) == 0) ||
15504 (entry->is_sub_map) ||
15505 (VME_OBJECT(entry)->phys_contiguous)) {
15506 continue;
15507 }
15508
15509 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
15510
15511 *purgeable_count += purgeable;
15512 *wired_count += wired;
15513 *clean_count += clean;
15514 *dirty_count += dirty;
15515
15516 if (shared) {
15517 *has_shared = TRUE;
15518 }
15519
15520 /* Adjust pageout budget and finish up if reached */
15521 if (dirty_budget) {
15522 dirty_budget -= dirty;
15523 if (dirty_budget == 0) {
15524 break;
15525 }
15526 }
15527 }
15528
15529 vm_map_unlock_read(map);
15530
15531 return KERN_SUCCESS;
15532 }
15533
15534 int c_freezer_swapout_count;
15535 int c_freezer_compression_count = 0;
15536 AbsoluteTime c_freezer_last_yield_ts = 0;
15537
15538 kern_return_t vm_map_freeze(
15539 vm_map_t map,
15540 unsigned int *purgeable_count,
15541 unsigned int *wired_count,
15542 unsigned int *clean_count,
15543 unsigned int *dirty_count,
15544 unsigned int dirty_budget,
15545 boolean_t *has_shared)
15546 {
15547 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
15548 kern_return_t kr = KERN_SUCCESS;
15549 boolean_t default_freezer_active = TRUE;
15550
15551 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15552 *has_shared = FALSE;
15553
15554 /*
15555 * We need the exclusive lock here so that we can
15556 * block any page faults or lookups while we are
15557 * in the middle of freezing this vm map.
15558 */
15559 vm_map_lock(map);
15560
15561 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15562 default_freezer_active = FALSE;
15563
15564 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15565 kr = KERN_NO_SPACE;
15566 goto done;
15567 }
15568 }
15569 assert(default_freezer_active == FALSE);
15570
15571 if (default_freezer_active) {
15572 if (map->default_freezer_handle == NULL) {
15573 map->default_freezer_handle = default_freezer_handle_allocate();
15574 }
15575
15576 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
15577 /*
15578 * Can happen if default_freezer_handle passed in is NULL
15579 * Or, a table has already been allocated and associated
15580 * with this handle, i.e. the map is already frozen.
15581 */
15582 goto done;
15583 }
15584 }
15585 c_freezer_compression_count = 0;
15586 clock_get_uptime(&c_freezer_last_yield_ts);
15587
15588 for (entry2 = vm_map_first_entry(map);
15589 entry2 != vm_map_to_entry(map);
15590 entry2 = entry2->vme_next) {
15591
15592 vm_object_t src_object = VME_OBJECT(entry2);
15593
15594 if (VME_OBJECT(entry2) &&
15595 !entry2->is_sub_map &&
15596 !VME_OBJECT(entry2)->phys_contiguous) {
15597 /* If eligible, scan the entry, moving eligible pages over to our parent object */
15598 if (default_freezer_active) {
15599 unsigned int purgeable, clean, dirty, wired;
15600 boolean_t shared;
15601
15602 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
15603 src_object, map->default_freezer_handle);
15604
15605 *purgeable_count += purgeable;
15606 *wired_count += wired;
15607 *clean_count += clean;
15608 *dirty_count += dirty;
15609
15610 /* Adjust pageout budget and finish up if reached */
15611 if (dirty_budget) {
15612 dirty_budget -= dirty;
15613 if (dirty_budget == 0) {
15614 break;
15615 }
15616 }
15617
15618 if (shared) {
15619 *has_shared = TRUE;
15620 }
15621 } else {
15622 if (VME_OBJECT(entry2)->internal == TRUE) {
15623
15624 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15625 /*
15626 * Pages belonging to this object could be swapped to disk.
15627 * Make sure it's not a shared object because we could end
15628 * up just bringing it back in again.
15629 */
15630 if (VME_OBJECT(entry2)->ref_count > 1) {
15631 continue;
15632 }
15633 }
15634 vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
15635 }
15636
15637 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15638 kr = KERN_NO_SPACE;
15639 break;
15640 }
15641 }
15642 }
15643 }
15644
15645 if (default_freezer_active) {
15646 /* Finally, throw out the pages to swap */
15647 default_freezer_pageout(map->default_freezer_handle);
15648 }
15649
15650 done:
15651 vm_map_unlock(map);
15652
15653 if (!default_freezer_active) {
15654 vm_object_compressed_freezer_done();
15655 }
15656 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15657 /*
15658 * reset the counter tracking the # of swapped c_segs
15659 * because we are now done with this freeze session and task.
15660 */
15661 c_freezer_swapout_count = 0;
15662 }
15663 return kr;
15664 }
15665
15666 kern_return_t
15667 vm_map_thaw(
15668 vm_map_t map)
15669 {
15670 kern_return_t kr = KERN_SUCCESS;
15671
15672 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15673 /*
15674 * We will on-demand thaw in the presence of the compressed pager.
15675 */
15676 return kr;
15677 }
15678
15679 vm_map_lock(map);
15680
15681 if (map->default_freezer_handle == NULL) {
15682 /*
15683 * This map is not in a frozen state.
15684 */
15685 kr = KERN_FAILURE;
15686 goto out;
15687 }
15688
15689 kr = default_freezer_unpack(map->default_freezer_handle);
15690 out:
15691 vm_map_unlock(map);
15692
15693 return kr;
15694 }
15695 #endif
15696
15697 /*
15698 * vm_map_entry_should_cow_for_true_share:
15699 *
15700 * Determines if the map entry should be clipped and setup for copy-on-write
15701 * to avoid applying "true_share" to a large VM object when only a subset is
15702 * targeted.
15703 *
15704 * For now, we target only the map entries created for the Objective C
15705 * Garbage Collector, which initially have the following properties:
15706 * - alias == VM_MEMORY_MALLOC
15707 * - wired_count == 0
15708 * - !needs_copy
15709 * and a VM object with:
15710 * - internal
15711 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15712 * - !true_share
15713 * - vo_size == ANON_CHUNK_SIZE
15714 *
15715 * Only non-kernel map entries.
15716 */
15717 boolean_t
15718 vm_map_entry_should_cow_for_true_share(
15719 vm_map_entry_t entry)
15720 {
15721 vm_object_t object;
15722
15723 if (entry->is_sub_map) {
15724 /* entry does not point at a VM object */
15725 return FALSE;
15726 }
15727
15728 if (entry->needs_copy) {
15729 /* already set for copy_on_write: done! */
15730 return FALSE;
15731 }
15732
15733 if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
15734 VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
15735 /* not a malloc heap or Obj-C Garbage Collector heap */
15736 return FALSE;
15737 }
15738
15739 if (entry->wired_count) {
15740 /* wired: can't change the map entry... */
15741 vm_counters.should_cow_but_wired++;
15742 return FALSE;
15743 }
15744
15745 object = VME_OBJECT(entry);
15746
15747 if (object == VM_OBJECT_NULL) {
15748 /* no object yet... */
15749 return FALSE;
15750 }
15751
15752 if (!object->internal) {
15753 /* not an internal object */
15754 return FALSE;
15755 }
15756
15757 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
15758 /* not the default copy strategy */
15759 return FALSE;
15760 }
15761
15762 if (object->true_share) {
15763 /* already true_share: too late to avoid it */
15764 return FALSE;
15765 }
15766
15767 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
15768 object->vo_size != ANON_CHUNK_SIZE) {
15769 /* ... not an object created for the ObjC Garbage Collector */
15770 return FALSE;
15771 }
15772
15773 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
15774 object->vo_size != 2048 * 4096) {
15775 /* ... not a "MALLOC_SMALL" heap */
15776 return FALSE;
15777 }
15778
15779 /*
15780 * All the criteria match: we have a large object being targeted for "true_share".
15781 * To limit the adverse side-effects linked with "true_share", tell the caller to
15782 * try and avoid setting up the entire object for "true_share" by clipping the
15783 * targeted range and setting it up for copy-on-write.
15784 */
15785 return TRUE;
15786 }
15787
15788 vm_map_offset_t
15789 vm_map_round_page_mask(
15790 vm_map_offset_t offset,
15791 vm_map_offset_t mask)
15792 {
15793 return VM_MAP_ROUND_PAGE(offset, mask);
15794 }
15795
15796 vm_map_offset_t
15797 vm_map_trunc_page_mask(
15798 vm_map_offset_t offset,
15799 vm_map_offset_t mask)
15800 {
15801 return VM_MAP_TRUNC_PAGE(offset, mask);
15802 }
15803
15804 boolean_t
15805 vm_map_page_aligned(
15806 vm_map_offset_t offset,
15807 vm_map_offset_t mask)
15808 {
15809 return ((offset) & mask) == 0;
15810 }
15811
15812 int
15813 vm_map_page_shift(
15814 vm_map_t map)
15815 {
15816 return VM_MAP_PAGE_SHIFT(map);
15817 }
15818
15819 int
15820 vm_map_page_size(
15821 vm_map_t map)
15822 {
15823 return VM_MAP_PAGE_SIZE(map);
15824 }
15825
15826 vm_map_offset_t
15827 vm_map_page_mask(
15828 vm_map_t map)
15829 {
15830 return VM_MAP_PAGE_MASK(map);
15831 }
15832
15833 kern_return_t
15834 vm_map_set_page_shift(
15835 vm_map_t map,
15836 int pageshift)
15837 {
15838 if (map->hdr.nentries != 0) {
15839 /* too late to change page size */
15840 return KERN_FAILURE;
15841 }
15842
15843 map->hdr.page_shift = pageshift;
15844
15845 return KERN_SUCCESS;
15846 }
15847
15848 int
15849 vm_map_purge(
15850 vm_map_t map)
15851 {
15852 int num_object_purged;
15853 vm_map_entry_t entry;
15854 vm_map_offset_t next_address;
15855 vm_object_t object;
15856 int state;
15857 kern_return_t kr;
15858
15859 num_object_purged = 0;
15860
15861 vm_map_lock_read(map);
15862 entry = vm_map_first_entry(map);
15863 while (entry != vm_map_to_entry(map)) {
15864 if (entry->is_sub_map) {
15865 goto next;
15866 }
15867 if (! (entry->protection & VM_PROT_WRITE)) {
15868 goto next;
15869 }
15870 object = VME_OBJECT(entry);
15871 if (object == VM_OBJECT_NULL) {
15872 goto next;
15873 }
15874 if (object->purgable != VM_PURGABLE_VOLATILE) {
15875 goto next;
15876 }
15877
15878 vm_object_lock(object);
15879 #if 00
15880 if (VME_OFFSET(entry) != 0 ||
15881 (entry->vme_end - entry->vme_start) != object->vo_size) {
15882 vm_object_unlock(object);
15883 goto next;
15884 }
15885 #endif
15886 next_address = entry->vme_end;
15887 vm_map_unlock_read(map);
15888 state = VM_PURGABLE_EMPTY;
15889 kr = vm_object_purgable_control(object,
15890 VM_PURGABLE_SET_STATE,
15891 &state);
15892 if (kr == KERN_SUCCESS) {
15893 num_object_purged++;
15894 }
15895 vm_object_unlock(object);
15896
15897 vm_map_lock_read(map);
15898 if (vm_map_lookup_entry(map, next_address, &entry)) {
15899 continue;
15900 }
15901 next:
15902 entry = entry->vme_next;
15903 }
15904 vm_map_unlock_read(map);
15905
15906 return num_object_purged;
15907 }
15908
15909 kern_return_t
15910 vm_map_query_volatile(
15911 vm_map_t map,
15912 mach_vm_size_t *volatile_virtual_size_p,
15913 mach_vm_size_t *volatile_resident_size_p,
15914 mach_vm_size_t *volatile_compressed_size_p,
15915 mach_vm_size_t *volatile_pmap_size_p,
15916 mach_vm_size_t *volatile_compressed_pmap_size_p)
15917 {
15918 mach_vm_size_t volatile_virtual_size;
15919 mach_vm_size_t volatile_resident_count;
15920 mach_vm_size_t volatile_compressed_count;
15921 mach_vm_size_t volatile_pmap_count;
15922 mach_vm_size_t volatile_compressed_pmap_count;
15923 mach_vm_size_t resident_count;
15924 vm_map_entry_t entry;
15925 vm_object_t object;
15926
15927 /* map should be locked by caller */
15928
15929 volatile_virtual_size = 0;
15930 volatile_resident_count = 0;
15931 volatile_compressed_count = 0;
15932 volatile_pmap_count = 0;
15933 volatile_compressed_pmap_count = 0;
15934
15935 for (entry = vm_map_first_entry(map);
15936 entry != vm_map_to_entry(map);
15937 entry = entry->vme_next) {
15938 mach_vm_size_t pmap_resident_bytes, pmap_compressed_bytes;
15939
15940 if (entry->is_sub_map) {
15941 continue;
15942 }
15943 if (! (entry->protection & VM_PROT_WRITE)) {
15944 continue;
15945 }
15946 object = VME_OBJECT(entry);
15947 if (object == VM_OBJECT_NULL) {
15948 continue;
15949 }
15950 if (object->purgable != VM_PURGABLE_VOLATILE &&
15951 object->purgable != VM_PURGABLE_EMPTY) {
15952 continue;
15953 }
15954 if (VME_OFFSET(entry)) {
15955 /*
15956 * If the map entry has been split and the object now
15957 * appears several times in the VM map, we don't want
15958 * to count the object's resident_page_count more than
15959 * once. We count it only for the first one, starting
15960 * at offset 0 and ignore the other VM map entries.
15961 */
15962 continue;
15963 }
15964 resident_count = object->resident_page_count;
15965 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
15966 resident_count = 0;
15967 } else {
15968 resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
15969 }
15970
15971 volatile_virtual_size += entry->vme_end - entry->vme_start;
15972 volatile_resident_count += resident_count;
15973 if (object->pager) {
15974 volatile_compressed_count +=
15975 vm_compressor_pager_get_count(object->pager);
15976 }
15977 pmap_compressed_bytes = 0;
15978 pmap_resident_bytes =
15979 pmap_query_resident(map->pmap,
15980 entry->vme_start,
15981 entry->vme_end,
15982 &pmap_compressed_bytes);
15983 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
15984 volatile_compressed_pmap_count += (pmap_compressed_bytes
15985 / PAGE_SIZE);
15986 }
15987
15988 /* map is still locked on return */
15989
15990 *volatile_virtual_size_p = volatile_virtual_size;
15991 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
15992 *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
15993 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
15994 *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
15995
15996 return KERN_SUCCESS;
15997 }
15998
15999 void
16000 vm_map_sizes(vm_map_t map,
16001 vm_map_size_t * psize,
16002 vm_map_size_t * pfree,
16003 vm_map_size_t * plargest_free)
16004 {
16005 vm_map_entry_t entry;
16006 vm_map_offset_t prev;
16007 vm_map_size_t free, total_free, largest_free;
16008 boolean_t end;
16009
16010 total_free = largest_free = 0;
16011
16012 vm_map_lock_read(map);
16013 if (psize) *psize = map->max_offset - map->min_offset;
16014
16015 prev = map->min_offset;
16016 for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16017 {
16018 end = (entry == vm_map_to_entry(map));
16019
16020 if (end) free = entry->vme_end - prev;
16021 else free = entry->vme_start - prev;
16022
16023 total_free += free;
16024 if (free > largest_free) largest_free = free;
16025
16026 if (end) break;
16027 prev = entry->vme_end;
16028 }
16029 vm_map_unlock_read(map);
16030 if (pfree) *pfree = total_free;
16031 if (plargest_free) *plargest_free = largest_free;
16032 }
16033
16034 #if VM_SCAN_FOR_SHADOW_CHAIN
16035 int vm_map_shadow_max(vm_map_t map);
16036 int vm_map_shadow_max(
16037 vm_map_t map)
16038 {
16039 int shadows, shadows_max;
16040 vm_map_entry_t entry;
16041 vm_object_t object, next_object;
16042
16043 if (map == NULL)
16044 return 0;
16045
16046 shadows_max = 0;
16047
16048 vm_map_lock_read(map);
16049
16050 for (entry = vm_map_first_entry(map);
16051 entry != vm_map_to_entry(map);
16052 entry = entry->vme_next) {
16053 if (entry->is_sub_map) {
16054 continue;
16055 }
16056 object = VME_OBJECT(entry);
16057 if (object == NULL) {
16058 continue;
16059 }
16060 vm_object_lock_shared(object);
16061 for (shadows = 0;
16062 object->shadow != NULL;
16063 shadows++, object = next_object) {
16064 next_object = object->shadow;
16065 vm_object_lock_shared(next_object);
16066 vm_object_unlock(object);
16067 }
16068 vm_object_unlock(object);
16069 if (shadows > shadows_max) {
16070 shadows_max = shadows;
16071 }
16072 }
16073
16074 vm_map_unlock_read(map);
16075
16076 return shadows_max;
16077 }
16078 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */