]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_map.c
a22b763c17218f25395b275c9bdd1d3d7f1dcf6f
[apple/xnu.git] / osfmk / vm / vm_map.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_map.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 * Date: 1985
62 *
63 * Virtual memory mapping module.
64 */
65
66 #include <task_swapper.h>
67 #include <mach_assert.h>
68
69 #include <vm/vm_options.h>
70
71 #include <libkern/OSAtomic.h>
72
73 #include <mach/kern_return.h>
74 #include <mach/port.h>
75 #include <mach/vm_attributes.h>
76 #include <mach/vm_param.h>
77 #include <mach/vm_behavior.h>
78 #include <mach/vm_statistics.h>
79 #include <mach/memory_object.h>
80 #include <mach/mach_vm.h>
81 #include <machine/cpu_capabilities.h>
82 #include <mach/sdt.h>
83
84 #include <kern/assert.h>
85 #include <kern/counters.h>
86 #include <kern/kalloc.h>
87 #include <kern/zalloc.h>
88
89 #include <vm/cpm.h>
90 #include <vm/vm_compressor_pager.h>
91 #include <vm/vm_init.h>
92 #include <vm/vm_fault.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_object.h>
95 #include <vm/vm_page.h>
96 #include <vm/vm_pageout.h>
97 #include <vm/vm_kern.h>
98 #include <ipc/ipc_port.h>
99 #include <kern/sched_prim.h>
100 #include <kern/misc_protos.h>
101 #include <kern/xpr.h>
102
103 #include <mach/vm_map_server.h>
104 #include <mach/mach_host_server.h>
105 #include <vm/vm_protos.h>
106 #include <vm/vm_purgeable_internal.h>
107
108 #include <vm/vm_protos.h>
109 #include <vm/vm_shared_region.h>
110 #include <vm/vm_map_store.h>
111
112 extern u_int32_t random(void); /* from <libkern/libkern.h> */
113 /* Internal prototypes
114 */
115
116 static void vm_map_simplify_range(
117 vm_map_t map,
118 vm_map_offset_t start,
119 vm_map_offset_t end); /* forward */
120
121 static boolean_t vm_map_range_check(
122 vm_map_t map,
123 vm_map_offset_t start,
124 vm_map_offset_t end,
125 vm_map_entry_t *entry);
126
127 static vm_map_entry_t _vm_map_entry_create(
128 struct vm_map_header *map_header, boolean_t map_locked);
129
130 static void _vm_map_entry_dispose(
131 struct vm_map_header *map_header,
132 vm_map_entry_t entry);
133
134 static void vm_map_pmap_enter(
135 vm_map_t map,
136 vm_map_offset_t addr,
137 vm_map_offset_t end_addr,
138 vm_object_t object,
139 vm_object_offset_t offset,
140 vm_prot_t protection);
141
142 static void _vm_map_clip_end(
143 struct vm_map_header *map_header,
144 vm_map_entry_t entry,
145 vm_map_offset_t end);
146
147 static void _vm_map_clip_start(
148 struct vm_map_header *map_header,
149 vm_map_entry_t entry,
150 vm_map_offset_t start);
151
152 static void vm_map_entry_delete(
153 vm_map_t map,
154 vm_map_entry_t entry);
155
156 static kern_return_t vm_map_delete(
157 vm_map_t map,
158 vm_map_offset_t start,
159 vm_map_offset_t end,
160 int flags,
161 vm_map_t zap_map);
162
163 static kern_return_t vm_map_copy_overwrite_unaligned(
164 vm_map_t dst_map,
165 vm_map_entry_t entry,
166 vm_map_copy_t copy,
167 vm_map_address_t start,
168 boolean_t discard_on_success);
169
170 static kern_return_t vm_map_copy_overwrite_aligned(
171 vm_map_t dst_map,
172 vm_map_entry_t tmp_entry,
173 vm_map_copy_t copy,
174 vm_map_offset_t start,
175 pmap_t pmap);
176
177 static kern_return_t vm_map_copyin_kernel_buffer(
178 vm_map_t src_map,
179 vm_map_address_t src_addr,
180 vm_map_size_t len,
181 boolean_t src_destroy,
182 vm_map_copy_t *copy_result); /* OUT */
183
184 static kern_return_t vm_map_copyout_kernel_buffer(
185 vm_map_t map,
186 vm_map_address_t *addr, /* IN/OUT */
187 vm_map_copy_t copy,
188 boolean_t overwrite,
189 boolean_t consume_on_success);
190
191 static void vm_map_fork_share(
192 vm_map_t old_map,
193 vm_map_entry_t old_entry,
194 vm_map_t new_map);
195
196 static boolean_t vm_map_fork_copy(
197 vm_map_t old_map,
198 vm_map_entry_t *old_entry_p,
199 vm_map_t new_map);
200
201 void vm_map_region_top_walk(
202 vm_map_entry_t entry,
203 vm_region_top_info_t top);
204
205 void vm_map_region_walk(
206 vm_map_t map,
207 vm_map_offset_t va,
208 vm_map_entry_t entry,
209 vm_object_offset_t offset,
210 vm_object_size_t range,
211 vm_region_extended_info_t extended,
212 boolean_t look_for_pages,
213 mach_msg_type_number_t count);
214
215 static kern_return_t vm_map_wire_nested(
216 vm_map_t map,
217 vm_map_offset_t start,
218 vm_map_offset_t end,
219 vm_prot_t access_type,
220 boolean_t user_wire,
221 pmap_t map_pmap,
222 vm_map_offset_t pmap_addr,
223 ppnum_t *physpage_p);
224
225 static kern_return_t vm_map_unwire_nested(
226 vm_map_t map,
227 vm_map_offset_t start,
228 vm_map_offset_t end,
229 boolean_t user_wire,
230 pmap_t map_pmap,
231 vm_map_offset_t pmap_addr);
232
233 static kern_return_t vm_map_overwrite_submap_recurse(
234 vm_map_t dst_map,
235 vm_map_offset_t dst_addr,
236 vm_map_size_t dst_size);
237
238 static kern_return_t vm_map_copy_overwrite_nested(
239 vm_map_t dst_map,
240 vm_map_offset_t dst_addr,
241 vm_map_copy_t copy,
242 boolean_t interruptible,
243 pmap_t pmap,
244 boolean_t discard_on_success);
245
246 static kern_return_t vm_map_remap_extract(
247 vm_map_t map,
248 vm_map_offset_t addr,
249 vm_map_size_t size,
250 boolean_t copy,
251 struct vm_map_header *map_header,
252 vm_prot_t *cur_protection,
253 vm_prot_t *max_protection,
254 vm_inherit_t inheritance,
255 boolean_t pageable);
256
257 static kern_return_t vm_map_remap_range_allocate(
258 vm_map_t map,
259 vm_map_address_t *address,
260 vm_map_size_t size,
261 vm_map_offset_t mask,
262 int flags,
263 vm_map_entry_t *map_entry);
264
265 static void vm_map_region_look_for_page(
266 vm_map_t map,
267 vm_map_offset_t va,
268 vm_object_t object,
269 vm_object_offset_t offset,
270 int max_refcnt,
271 int depth,
272 vm_region_extended_info_t extended,
273 mach_msg_type_number_t count);
274
275 static int vm_map_region_count_obj_refs(
276 vm_map_entry_t entry,
277 vm_object_t object);
278
279
280 static kern_return_t vm_map_willneed(
281 vm_map_t map,
282 vm_map_offset_t start,
283 vm_map_offset_t end);
284
285 static kern_return_t vm_map_reuse_pages(
286 vm_map_t map,
287 vm_map_offset_t start,
288 vm_map_offset_t end);
289
290 static kern_return_t vm_map_reusable_pages(
291 vm_map_t map,
292 vm_map_offset_t start,
293 vm_map_offset_t end);
294
295 static kern_return_t vm_map_can_reuse(
296 vm_map_t map,
297 vm_map_offset_t start,
298 vm_map_offset_t end);
299
300
301 /*
302 * Macros to copy a vm_map_entry. We must be careful to correctly
303 * manage the wired page count. vm_map_entry_copy() creates a new
304 * map entry to the same memory - the wired count in the new entry
305 * must be set to zero. vm_map_entry_copy_full() creates a new
306 * entry that is identical to the old entry. This preserves the
307 * wire count; it's used for map splitting and zone changing in
308 * vm_map_copyout.
309 */
310
311 #define vm_map_entry_copy(NEW,OLD) \
312 MACRO_BEGIN \
313 boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \
314 *(NEW) = *(OLD); \
315 (NEW)->is_shared = FALSE; \
316 (NEW)->needs_wakeup = FALSE; \
317 (NEW)->in_transition = FALSE; \
318 (NEW)->wired_count = 0; \
319 (NEW)->user_wired_count = 0; \
320 (NEW)->permanent = FALSE; \
321 (NEW)->used_for_jit = FALSE; \
322 (NEW)->from_reserved_zone = _vmec_reserved; \
323 (NEW)->iokit_acct = FALSE; \
324 MACRO_END
325
326 #define vm_map_entry_copy_full(NEW,OLD) \
327 MACRO_BEGIN \
328 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \
329 (*(NEW) = *(OLD)); \
330 (NEW)->from_reserved_zone = _vmecf_reserved; \
331 MACRO_END
332
333 /*
334 * Decide if we want to allow processes to execute from their data or stack areas.
335 * override_nx() returns true if we do. Data/stack execution can be enabled independently
336 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
337 * or allow_stack_exec to enable data execution for that type of data area for that particular
338 * ABI (or both by or'ing the flags together). These are initialized in the architecture
339 * specific pmap files since the default behavior varies according to architecture. The
340 * main reason it varies is because of the need to provide binary compatibility with old
341 * applications that were written before these restrictions came into being. In the old
342 * days, an app could execute anything it could read, but this has slowly been tightened
343 * up over time. The default behavior is:
344 *
345 * 32-bit PPC apps may execute from both stack and data areas
346 * 32-bit Intel apps may exeucte from data areas but not stack
347 * 64-bit PPC/Intel apps may not execute from either data or stack
348 *
349 * An application on any architecture may override these defaults by explicitly
350 * adding PROT_EXEC permission to the page in question with the mprotect(2)
351 * system call. This code here just determines what happens when an app tries to
352 * execute from a page that lacks execute permission.
353 *
354 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
355 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
356 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
357 * execution from data areas for a particular binary even if the arch normally permits it. As
358 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
359 * to support some complicated use cases, notably browsers with out-of-process plugins that
360 * are not all NX-safe.
361 */
362
363 extern int allow_data_exec, allow_stack_exec;
364
365 int
366 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
367 {
368 int current_abi;
369
370 /*
371 * Determine if the app is running in 32 or 64 bit mode.
372 */
373
374 if (vm_map_is_64bit(map))
375 current_abi = VM_ABI_64;
376 else
377 current_abi = VM_ABI_32;
378
379 /*
380 * Determine if we should allow the execution based on whether it's a
381 * stack or data area and the current architecture.
382 */
383
384 if (user_tag == VM_MEMORY_STACK)
385 return allow_stack_exec & current_abi;
386
387 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
388 }
389
390
391 /*
392 * Virtual memory maps provide for the mapping, protection,
393 * and sharing of virtual memory objects. In addition,
394 * this module provides for an efficient virtual copy of
395 * memory from one map to another.
396 *
397 * Synchronization is required prior to most operations.
398 *
399 * Maps consist of an ordered doubly-linked list of simple
400 * entries; a single hint is used to speed up lookups.
401 *
402 * Sharing maps have been deleted from this version of Mach.
403 * All shared objects are now mapped directly into the respective
404 * maps. This requires a change in the copy on write strategy;
405 * the asymmetric (delayed) strategy is used for shared temporary
406 * objects instead of the symmetric (shadow) strategy. All maps
407 * are now "top level" maps (either task map, kernel map or submap
408 * of the kernel map).
409 *
410 * Since portions of maps are specified by start/end addreses,
411 * which may not align with existing map entries, all
412 * routines merely "clip" entries to these start/end values.
413 * [That is, an entry is split into two, bordering at a
414 * start or end value.] Note that these clippings may not
415 * always be necessary (as the two resulting entries are then
416 * not changed); however, the clipping is done for convenience.
417 * No attempt is currently made to "glue back together" two
418 * abutting entries.
419 *
420 * The symmetric (shadow) copy strategy implements virtual copy
421 * by copying VM object references from one map to
422 * another, and then marking both regions as copy-on-write.
423 * It is important to note that only one writeable reference
424 * to a VM object region exists in any map when this strategy
425 * is used -- this means that shadow object creation can be
426 * delayed until a write operation occurs. The symmetric (delayed)
427 * strategy allows multiple maps to have writeable references to
428 * the same region of a vm object, and hence cannot delay creating
429 * its copy objects. See vm_object_copy_quickly() in vm_object.c.
430 * Copying of permanent objects is completely different; see
431 * vm_object_copy_strategically() in vm_object.c.
432 */
433
434 static zone_t vm_map_zone; /* zone for vm_map structures */
435 static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
436 static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking
437 * allocations */
438 static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
439
440
441 /*
442 * Placeholder object for submap operations. This object is dropped
443 * into the range by a call to vm_map_find, and removed when
444 * vm_map_submap creates the submap.
445 */
446
447 vm_object_t vm_submap_object;
448
449 static void *map_data;
450 static vm_size_t map_data_size;
451 static void *kentry_data;
452 static vm_size_t kentry_data_size;
453
454 #define NO_COALESCE_LIMIT ((1024 * 128) - 1)
455
456 /* Skip acquiring locks if we're in the midst of a kernel core dump */
457 unsigned int not_in_kdp = 1;
458
459 unsigned int vm_map_set_cache_attr_count = 0;
460
461 kern_return_t
462 vm_map_set_cache_attr(
463 vm_map_t map,
464 vm_map_offset_t va)
465 {
466 vm_map_entry_t map_entry;
467 vm_object_t object;
468 kern_return_t kr = KERN_SUCCESS;
469
470 vm_map_lock_read(map);
471
472 if (!vm_map_lookup_entry(map, va, &map_entry) ||
473 map_entry->is_sub_map) {
474 /*
475 * that memory is not properly mapped
476 */
477 kr = KERN_INVALID_ARGUMENT;
478 goto done;
479 }
480 object = map_entry->object.vm_object;
481
482 if (object == VM_OBJECT_NULL) {
483 /*
484 * there should be a VM object here at this point
485 */
486 kr = KERN_INVALID_ARGUMENT;
487 goto done;
488 }
489 vm_object_lock(object);
490 object->set_cache_attr = TRUE;
491 vm_object_unlock(object);
492
493 vm_map_set_cache_attr_count++;
494 done:
495 vm_map_unlock_read(map);
496
497 return kr;
498 }
499
500
501 #if CONFIG_CODE_DECRYPTION
502 /*
503 * vm_map_apple_protected:
504 * This remaps the requested part of the object with an object backed by
505 * the decrypting pager.
506 * crypt_info contains entry points and session data for the crypt module.
507 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
508 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
509 */
510 kern_return_t
511 vm_map_apple_protected(
512 vm_map_t map,
513 vm_map_offset_t start,
514 vm_map_offset_t end,
515 struct pager_crypt_info *crypt_info)
516 {
517 boolean_t map_locked;
518 kern_return_t kr;
519 vm_map_entry_t map_entry;
520 memory_object_t protected_mem_obj;
521 vm_object_t protected_object;
522 vm_map_offset_t map_addr;
523
524 vm_map_lock_read(map);
525 map_locked = TRUE;
526
527 /* lookup the protected VM object */
528 if (!vm_map_lookup_entry(map,
529 start,
530 &map_entry) ||
531 map_entry->vme_end < end ||
532 map_entry->is_sub_map ||
533 !(map_entry->protection & VM_PROT_EXECUTE)) {
534 /* that memory is not properly mapped */
535 kr = KERN_INVALID_ARGUMENT;
536 goto done;
537 }
538 protected_object = map_entry->object.vm_object;
539 if (protected_object == VM_OBJECT_NULL) {
540 /* there should be a VM object here at this point */
541 kr = KERN_INVALID_ARGUMENT;
542 goto done;
543 }
544
545 /* make sure protected object stays alive while map is unlocked */
546 vm_object_reference(protected_object);
547
548 vm_map_unlock_read(map);
549 map_locked = FALSE;
550
551 /*
552 * Lookup (and create if necessary) the protected memory object
553 * matching that VM object.
554 * If successful, this also grabs a reference on the memory object,
555 * to guarantee that it doesn't go away before we get a chance to map
556 * it.
557 */
558 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
559
560 /* release extra ref on protected object */
561 vm_object_deallocate(protected_object);
562
563 if (protected_mem_obj == NULL) {
564 kr = KERN_FAILURE;
565 goto done;
566 }
567
568 /* map this memory object in place of the current one */
569 map_addr = start;
570 kr = vm_map_enter_mem_object(map,
571 &map_addr,
572 end - start,
573 (mach_vm_offset_t) 0,
574 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
575 (ipc_port_t) protected_mem_obj,
576 (map_entry->offset +
577 (start - map_entry->vme_start)),
578 TRUE,
579 map_entry->protection,
580 map_entry->max_protection,
581 map_entry->inheritance);
582 assert(map_addr == start);
583 /*
584 * Release the reference obtained by apple_protect_pager_setup().
585 * The mapping (if it succeeded) is now holding a reference on the
586 * memory object.
587 */
588 memory_object_deallocate(protected_mem_obj);
589
590 done:
591 if (map_locked) {
592 vm_map_unlock_read(map);
593 }
594 return kr;
595 }
596 #endif /* CONFIG_CODE_DECRYPTION */
597
598
599 lck_grp_t vm_map_lck_grp;
600 lck_grp_attr_t vm_map_lck_grp_attr;
601 lck_attr_t vm_map_lck_attr;
602 lck_attr_t vm_map_lck_rw_attr;
603
604
605 /*
606 * vm_map_init:
607 *
608 * Initialize the vm_map module. Must be called before
609 * any other vm_map routines.
610 *
611 * Map and entry structures are allocated from zones -- we must
612 * initialize those zones.
613 *
614 * There are three zones of interest:
615 *
616 * vm_map_zone: used to allocate maps.
617 * vm_map_entry_zone: used to allocate map entries.
618 * vm_map_entry_reserved_zone: fallback zone for kernel map entries
619 *
620 * The kernel allocates map entries from a special zone that is initially
621 * "crammed" with memory. It would be difficult (perhaps impossible) for
622 * the kernel to allocate more memory to a entry zone when it became
623 * empty since the very act of allocating memory implies the creation
624 * of a new entry.
625 */
626 void
627 vm_map_init(
628 void)
629 {
630 vm_size_t entry_zone_alloc_size;
631 const char *mez_name = "VM map entries";
632
633 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
634 PAGE_SIZE, "maps");
635 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
636 #if defined(__LP64__)
637 entry_zone_alloc_size = PAGE_SIZE * 5;
638 #else
639 entry_zone_alloc_size = PAGE_SIZE * 6;
640 #endif
641 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
642 1024*1024, entry_zone_alloc_size,
643 mez_name);
644 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
645 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
646 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
647
648 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
649 kentry_data_size * 64, kentry_data_size,
650 "Reserved VM map entries");
651 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
652
653 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
654 16*1024, PAGE_SIZE, "VM map copies");
655 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
656
657 /*
658 * Cram the map and kentry zones with initial data.
659 * Set reserved_zone non-collectible to aid zone_gc().
660 */
661 zone_change(vm_map_zone, Z_COLLECT, FALSE);
662
663 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
664 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
665 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
666 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
667 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
668 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
669 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
670
671 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
672 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
673
674 lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
675 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
676 lck_attr_setdefault(&vm_map_lck_attr);
677
678 lck_attr_setdefault(&vm_map_lck_rw_attr);
679 lck_attr_cleardebug(&vm_map_lck_rw_attr);
680
681 #if CONFIG_FREEZE
682 default_freezer_init();
683 #endif /* CONFIG_FREEZE */
684 }
685
686 void
687 vm_map_steal_memory(
688 void)
689 {
690 uint32_t kentry_initial_pages;
691
692 map_data_size = round_page(10 * sizeof(struct _vm_map));
693 map_data = pmap_steal_memory(map_data_size);
694
695 /*
696 * kentry_initial_pages corresponds to the number of kernel map entries
697 * required during bootstrap until the asynchronous replenishment
698 * scheme is activated and/or entries are available from the general
699 * map entry pool.
700 */
701 #if defined(__LP64__)
702 kentry_initial_pages = 10;
703 #else
704 kentry_initial_pages = 6;
705 #endif
706
707 #if CONFIG_GZALLOC
708 /* If using the guard allocator, reserve more memory for the kernel
709 * reserved map entry pool.
710 */
711 if (gzalloc_enabled())
712 kentry_initial_pages *= 1024;
713 #endif
714
715 kentry_data_size = kentry_initial_pages * PAGE_SIZE;
716 kentry_data = pmap_steal_memory(kentry_data_size);
717 }
718
719 void vm_kernel_reserved_entry_init(void) {
720 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
721 }
722
723 /*
724 * vm_map_create:
725 *
726 * Creates and returns a new empty VM map with
727 * the given physical map structure, and having
728 * the given lower and upper address bounds.
729 */
730 vm_map_t
731 vm_map_create(
732 pmap_t pmap,
733 vm_map_offset_t min,
734 vm_map_offset_t max,
735 boolean_t pageable)
736 {
737 static int color_seed = 0;
738 register vm_map_t result;
739
740 result = (vm_map_t) zalloc(vm_map_zone);
741 if (result == VM_MAP_NULL)
742 panic("vm_map_create");
743
744 vm_map_first_entry(result) = vm_map_to_entry(result);
745 vm_map_last_entry(result) = vm_map_to_entry(result);
746 result->hdr.nentries = 0;
747 result->hdr.entries_pageable = pageable;
748
749 vm_map_store_init( &(result->hdr) );
750
751 result->hdr.page_shift = PAGE_SHIFT;
752
753 result->size = 0;
754 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */
755 result->user_wire_size = 0;
756 result->ref_count = 1;
757 #if TASK_SWAPPER
758 result->res_count = 1;
759 result->sw_state = MAP_SW_IN;
760 #endif /* TASK_SWAPPER */
761 result->pmap = pmap;
762 result->min_offset = min;
763 result->max_offset = max;
764 result->wiring_required = FALSE;
765 result->no_zero_fill = FALSE;
766 result->mapped_in_other_pmaps = FALSE;
767 result->wait_for_space = FALSE;
768 result->switch_protect = FALSE;
769 result->disable_vmentry_reuse = FALSE;
770 result->map_disallow_data_exec = FALSE;
771 result->highest_entry_end = 0;
772 result->first_free = vm_map_to_entry(result);
773 result->hint = vm_map_to_entry(result);
774 result->color_rr = (color_seed++) & vm_color_mask;
775 result->jit_entry_exists = FALSE;
776 #if CONFIG_FREEZE
777 result->default_freezer_handle = NULL;
778 #endif
779 vm_map_lock_init(result);
780 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
781
782 return(result);
783 }
784
785 /*
786 * vm_map_entry_create: [ internal use only ]
787 *
788 * Allocates a VM map entry for insertion in the
789 * given map (or map copy). No fields are filled.
790 */
791 #define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked)
792
793 #define vm_map_copy_entry_create(copy, map_locked) \
794 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
795 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
796
797 static vm_map_entry_t
798 _vm_map_entry_create(
799 struct vm_map_header *map_header, boolean_t __unused map_locked)
800 {
801 zone_t zone;
802 vm_map_entry_t entry;
803
804 zone = vm_map_entry_zone;
805
806 assert(map_header->entries_pageable ? !map_locked : TRUE);
807
808 if (map_header->entries_pageable) {
809 entry = (vm_map_entry_t) zalloc(zone);
810 }
811 else {
812 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
813
814 if (entry == VM_MAP_ENTRY_NULL) {
815 zone = vm_map_entry_reserved_zone;
816 entry = (vm_map_entry_t) zalloc(zone);
817 OSAddAtomic(1, &reserved_zalloc_count);
818 } else
819 OSAddAtomic(1, &nonreserved_zalloc_count);
820 }
821
822 if (entry == VM_MAP_ENTRY_NULL)
823 panic("vm_map_entry_create");
824 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
825
826 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
827 #if MAP_ENTRY_CREATION_DEBUG
828 entry->vme_creation_maphdr = map_header;
829 fastbacktrace(&entry->vme_creation_bt[0],
830 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
831 #endif
832 return(entry);
833 }
834
835 /*
836 * vm_map_entry_dispose: [ internal use only ]
837 *
838 * Inverse of vm_map_entry_create.
839 *
840 * write map lock held so no need to
841 * do anything special to insure correctness
842 * of the stores
843 */
844 #define vm_map_entry_dispose(map, entry) \
845 _vm_map_entry_dispose(&(map)->hdr, (entry))
846
847 #define vm_map_copy_entry_dispose(map, entry) \
848 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
849
850 static void
851 _vm_map_entry_dispose(
852 register struct vm_map_header *map_header,
853 register vm_map_entry_t entry)
854 {
855 register zone_t zone;
856
857 if (map_header->entries_pageable || !(entry->from_reserved_zone))
858 zone = vm_map_entry_zone;
859 else
860 zone = vm_map_entry_reserved_zone;
861
862 if (!map_header->entries_pageable) {
863 if (zone == vm_map_entry_zone)
864 OSAddAtomic(-1, &nonreserved_zalloc_count);
865 else
866 OSAddAtomic(-1, &reserved_zalloc_count);
867 }
868
869 zfree(zone, entry);
870 }
871
872 #if MACH_ASSERT
873 static boolean_t first_free_check = FALSE;
874 boolean_t
875 first_free_is_valid(
876 vm_map_t map)
877 {
878 if (!first_free_check)
879 return TRUE;
880
881 return( first_free_is_valid_store( map ));
882 }
883 #endif /* MACH_ASSERT */
884
885
886 #define vm_map_copy_entry_link(copy, after_where, entry) \
887 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
888
889 #define vm_map_copy_entry_unlink(copy, entry) \
890 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
891
892 #if MACH_ASSERT && TASK_SWAPPER
893 /*
894 * vm_map_res_reference:
895 *
896 * Adds another valid residence count to the given map.
897 *
898 * Map is locked so this function can be called from
899 * vm_map_swapin.
900 *
901 */
902 void vm_map_res_reference(register vm_map_t map)
903 {
904 /* assert map is locked */
905 assert(map->res_count >= 0);
906 assert(map->ref_count >= map->res_count);
907 if (map->res_count == 0) {
908 lck_mtx_unlock(&map->s_lock);
909 vm_map_lock(map);
910 vm_map_swapin(map);
911 lck_mtx_lock(&map->s_lock);
912 ++map->res_count;
913 vm_map_unlock(map);
914 } else
915 ++map->res_count;
916 }
917
918 /*
919 * vm_map_reference_swap:
920 *
921 * Adds valid reference and residence counts to the given map.
922 *
923 * The map may not be in memory (i.e. zero residence count).
924 *
925 */
926 void vm_map_reference_swap(register vm_map_t map)
927 {
928 assert(map != VM_MAP_NULL);
929 lck_mtx_lock(&map->s_lock);
930 assert(map->res_count >= 0);
931 assert(map->ref_count >= map->res_count);
932 map->ref_count++;
933 vm_map_res_reference(map);
934 lck_mtx_unlock(&map->s_lock);
935 }
936
937 /*
938 * vm_map_res_deallocate:
939 *
940 * Decrement residence count on a map; possibly causing swapout.
941 *
942 * The map must be in memory (i.e. non-zero residence count).
943 *
944 * The map is locked, so this function is callable from vm_map_deallocate.
945 *
946 */
947 void vm_map_res_deallocate(register vm_map_t map)
948 {
949 assert(map->res_count > 0);
950 if (--map->res_count == 0) {
951 lck_mtx_unlock(&map->s_lock);
952 vm_map_lock(map);
953 vm_map_swapout(map);
954 vm_map_unlock(map);
955 lck_mtx_lock(&map->s_lock);
956 }
957 assert(map->ref_count >= map->res_count);
958 }
959 #endif /* MACH_ASSERT && TASK_SWAPPER */
960
961 /*
962 * vm_map_destroy:
963 *
964 * Actually destroy a map.
965 */
966 void
967 vm_map_destroy(
968 vm_map_t map,
969 int flags)
970 {
971 vm_map_lock(map);
972
973 /* clean up regular map entries */
974 (void) vm_map_delete(map, map->min_offset, map->max_offset,
975 flags, VM_MAP_NULL);
976 /* clean up leftover special mappings (commpage, etc...) */
977 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
978 flags, VM_MAP_NULL);
979
980 #if CONFIG_FREEZE
981 if (map->default_freezer_handle) {
982 default_freezer_handle_deallocate(map->default_freezer_handle);
983 map->default_freezer_handle = NULL;
984 }
985 #endif
986 vm_map_unlock(map);
987
988 assert(map->hdr.nentries == 0);
989
990 if(map->pmap)
991 pmap_destroy(map->pmap);
992
993 zfree(vm_map_zone, map);
994 }
995
996 #if TASK_SWAPPER
997 /*
998 * vm_map_swapin/vm_map_swapout
999 *
1000 * Swap a map in and out, either referencing or releasing its resources.
1001 * These functions are internal use only; however, they must be exported
1002 * because they may be called from macros, which are exported.
1003 *
1004 * In the case of swapout, there could be races on the residence count,
1005 * so if the residence count is up, we return, assuming that a
1006 * vm_map_deallocate() call in the near future will bring us back.
1007 *
1008 * Locking:
1009 * -- We use the map write lock for synchronization among races.
1010 * -- The map write lock, and not the simple s_lock, protects the
1011 * swap state of the map.
1012 * -- If a map entry is a share map, then we hold both locks, in
1013 * hierarchical order.
1014 *
1015 * Synchronization Notes:
1016 * 1) If a vm_map_swapin() call happens while swapout in progress, it
1017 * will block on the map lock and proceed when swapout is through.
1018 * 2) A vm_map_reference() call at this time is illegal, and will
1019 * cause a panic. vm_map_reference() is only allowed on resident
1020 * maps, since it refuses to block.
1021 * 3) A vm_map_swapin() call during a swapin will block, and
1022 * proceeed when the first swapin is done, turning into a nop.
1023 * This is the reason the res_count is not incremented until
1024 * after the swapin is complete.
1025 * 4) There is a timing hole after the checks of the res_count, before
1026 * the map lock is taken, during which a swapin may get the lock
1027 * before a swapout about to happen. If this happens, the swapin
1028 * will detect the state and increment the reference count, causing
1029 * the swapout to be a nop, thereby delaying it until a later
1030 * vm_map_deallocate. If the swapout gets the lock first, then
1031 * the swapin will simply block until the swapout is done, and
1032 * then proceed.
1033 *
1034 * Because vm_map_swapin() is potentially an expensive operation, it
1035 * should be used with caution.
1036 *
1037 * Invariants:
1038 * 1) A map with a residence count of zero is either swapped, or
1039 * being swapped.
1040 * 2) A map with a non-zero residence count is either resident,
1041 * or being swapped in.
1042 */
1043
1044 int vm_map_swap_enable = 1;
1045
1046 void vm_map_swapin (vm_map_t map)
1047 {
1048 register vm_map_entry_t entry;
1049
1050 if (!vm_map_swap_enable) /* debug */
1051 return;
1052
1053 /*
1054 * Map is locked
1055 * First deal with various races.
1056 */
1057 if (map->sw_state == MAP_SW_IN)
1058 /*
1059 * we raced with swapout and won. Returning will incr.
1060 * the res_count, turning the swapout into a nop.
1061 */
1062 return;
1063
1064 /*
1065 * The residence count must be zero. If we raced with another
1066 * swapin, the state would have been IN; if we raced with a
1067 * swapout (after another competing swapin), we must have lost
1068 * the race to get here (see above comment), in which case
1069 * res_count is still 0.
1070 */
1071 assert(map->res_count == 0);
1072
1073 /*
1074 * There are no intermediate states of a map going out or
1075 * coming in, since the map is locked during the transition.
1076 */
1077 assert(map->sw_state == MAP_SW_OUT);
1078
1079 /*
1080 * We now operate upon each map entry. If the entry is a sub-
1081 * or share-map, we call vm_map_res_reference upon it.
1082 * If the entry is an object, we call vm_object_res_reference
1083 * (this may iterate through the shadow chain).
1084 * Note that we hold the map locked the entire time,
1085 * even if we get back here via a recursive call in
1086 * vm_map_res_reference.
1087 */
1088 entry = vm_map_first_entry(map);
1089
1090 while (entry != vm_map_to_entry(map)) {
1091 if (entry->object.vm_object != VM_OBJECT_NULL) {
1092 if (entry->is_sub_map) {
1093 vm_map_t lmap = entry->object.sub_map;
1094 lck_mtx_lock(&lmap->s_lock);
1095 vm_map_res_reference(lmap);
1096 lck_mtx_unlock(&lmap->s_lock);
1097 } else {
1098 vm_object_t object = entry->object.vm_object;
1099 vm_object_lock(object);
1100 /*
1101 * This call may iterate through the
1102 * shadow chain.
1103 */
1104 vm_object_res_reference(object);
1105 vm_object_unlock(object);
1106 }
1107 }
1108 entry = entry->vme_next;
1109 }
1110 assert(map->sw_state == MAP_SW_OUT);
1111 map->sw_state = MAP_SW_IN;
1112 }
1113
1114 void vm_map_swapout(vm_map_t map)
1115 {
1116 register vm_map_entry_t entry;
1117
1118 /*
1119 * Map is locked
1120 * First deal with various races.
1121 * If we raced with a swapin and lost, the residence count
1122 * will have been incremented to 1, and we simply return.
1123 */
1124 lck_mtx_lock(&map->s_lock);
1125 if (map->res_count != 0) {
1126 lck_mtx_unlock(&map->s_lock);
1127 return;
1128 }
1129 lck_mtx_unlock(&map->s_lock);
1130
1131 /*
1132 * There are no intermediate states of a map going out or
1133 * coming in, since the map is locked during the transition.
1134 */
1135 assert(map->sw_state == MAP_SW_IN);
1136
1137 if (!vm_map_swap_enable)
1138 return;
1139
1140 /*
1141 * We now operate upon each map entry. If the entry is a sub-
1142 * or share-map, we call vm_map_res_deallocate upon it.
1143 * If the entry is an object, we call vm_object_res_deallocate
1144 * (this may iterate through the shadow chain).
1145 * Note that we hold the map locked the entire time,
1146 * even if we get back here via a recursive call in
1147 * vm_map_res_deallocate.
1148 */
1149 entry = vm_map_first_entry(map);
1150
1151 while (entry != vm_map_to_entry(map)) {
1152 if (entry->object.vm_object != VM_OBJECT_NULL) {
1153 if (entry->is_sub_map) {
1154 vm_map_t lmap = entry->object.sub_map;
1155 lck_mtx_lock(&lmap->s_lock);
1156 vm_map_res_deallocate(lmap);
1157 lck_mtx_unlock(&lmap->s_lock);
1158 } else {
1159 vm_object_t object = entry->object.vm_object;
1160 vm_object_lock(object);
1161 /*
1162 * This call may take a long time,
1163 * since it could actively push
1164 * out pages (if we implement it
1165 * that way).
1166 */
1167 vm_object_res_deallocate(object);
1168 vm_object_unlock(object);
1169 }
1170 }
1171 entry = entry->vme_next;
1172 }
1173 assert(map->sw_state == MAP_SW_IN);
1174 map->sw_state = MAP_SW_OUT;
1175 }
1176
1177 #endif /* TASK_SWAPPER */
1178
1179 /*
1180 * vm_map_lookup_entry: [ internal use only ]
1181 *
1182 * Calls into the vm map store layer to find the map
1183 * entry containing (or immediately preceding) the
1184 * specified address in the given map; the entry is returned
1185 * in the "entry" parameter. The boolean
1186 * result indicates whether the address is
1187 * actually contained in the map.
1188 */
1189 boolean_t
1190 vm_map_lookup_entry(
1191 register vm_map_t map,
1192 register vm_map_offset_t address,
1193 vm_map_entry_t *entry) /* OUT */
1194 {
1195 return ( vm_map_store_lookup_entry( map, address, entry ));
1196 }
1197
1198 /*
1199 * Routine: vm_map_find_space
1200 * Purpose:
1201 * Allocate a range in the specified virtual address map,
1202 * returning the entry allocated for that range.
1203 * Used by kmem_alloc, etc.
1204 *
1205 * The map must be NOT be locked. It will be returned locked
1206 * on KERN_SUCCESS, unlocked on failure.
1207 *
1208 * If an entry is allocated, the object/offset fields
1209 * are initialized to zero.
1210 */
1211 kern_return_t
1212 vm_map_find_space(
1213 register vm_map_t map,
1214 vm_map_offset_t *address, /* OUT */
1215 vm_map_size_t size,
1216 vm_map_offset_t mask,
1217 int flags,
1218 vm_map_entry_t *o_entry) /* OUT */
1219 {
1220 register vm_map_entry_t entry, new_entry;
1221 register vm_map_offset_t start;
1222 register vm_map_offset_t end;
1223
1224 if (size == 0) {
1225 *address = 0;
1226 return KERN_INVALID_ARGUMENT;
1227 }
1228
1229 if (flags & VM_FLAGS_GUARD_AFTER) {
1230 /* account for the back guard page in the size */
1231 size += VM_MAP_PAGE_SIZE(map);
1232 }
1233
1234 new_entry = vm_map_entry_create(map, FALSE);
1235
1236 /*
1237 * Look for the first possible address; if there's already
1238 * something at this address, we have to start after it.
1239 */
1240
1241 vm_map_lock(map);
1242
1243 if( map->disable_vmentry_reuse == TRUE) {
1244 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1245 } else {
1246 assert(first_free_is_valid(map));
1247 if ((entry = map->first_free) == vm_map_to_entry(map))
1248 start = map->min_offset;
1249 else
1250 start = entry->vme_end;
1251 }
1252
1253 /*
1254 * In any case, the "entry" always precedes
1255 * the proposed new region throughout the loop:
1256 */
1257
1258 while (TRUE) {
1259 register vm_map_entry_t next;
1260
1261 /*
1262 * Find the end of the proposed new region.
1263 * Be sure we didn't go beyond the end, or
1264 * wrap around the address.
1265 */
1266
1267 if (flags & VM_FLAGS_GUARD_BEFORE) {
1268 /* reserve space for the front guard page */
1269 start += VM_MAP_PAGE_SIZE(map);
1270 }
1271 end = ((start + mask) & ~mask);
1272
1273 if (end < start) {
1274 vm_map_entry_dispose(map, new_entry);
1275 vm_map_unlock(map);
1276 return(KERN_NO_SPACE);
1277 }
1278 start = end;
1279 end += size;
1280
1281 if ((end > map->max_offset) || (end < start)) {
1282 vm_map_entry_dispose(map, new_entry);
1283 vm_map_unlock(map);
1284 return(KERN_NO_SPACE);
1285 }
1286
1287 /*
1288 * If there are no more entries, we must win.
1289 */
1290
1291 next = entry->vme_next;
1292 if (next == vm_map_to_entry(map))
1293 break;
1294
1295 /*
1296 * If there is another entry, it must be
1297 * after the end of the potential new region.
1298 */
1299
1300 if (next->vme_start >= end)
1301 break;
1302
1303 /*
1304 * Didn't fit -- move to the next entry.
1305 */
1306
1307 entry = next;
1308 start = entry->vme_end;
1309 }
1310
1311 /*
1312 * At this point,
1313 * "start" and "end" should define the endpoints of the
1314 * available new range, and
1315 * "entry" should refer to the region before the new
1316 * range, and
1317 *
1318 * the map should be locked.
1319 */
1320
1321 if (flags & VM_FLAGS_GUARD_BEFORE) {
1322 /* go back for the front guard page */
1323 start -= VM_MAP_PAGE_SIZE(map);
1324 }
1325 *address = start;
1326
1327 assert(start < end);
1328 new_entry->vme_start = start;
1329 new_entry->vme_end = end;
1330 assert(page_aligned(new_entry->vme_start));
1331 assert(page_aligned(new_entry->vme_end));
1332 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1333 VM_MAP_PAGE_MASK(map)));
1334 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1335 VM_MAP_PAGE_MASK(map)));
1336
1337 new_entry->is_shared = FALSE;
1338 new_entry->is_sub_map = FALSE;
1339 new_entry->use_pmap = TRUE;
1340 new_entry->object.vm_object = VM_OBJECT_NULL;
1341 new_entry->offset = (vm_object_offset_t) 0;
1342
1343 new_entry->needs_copy = FALSE;
1344
1345 new_entry->inheritance = VM_INHERIT_DEFAULT;
1346 new_entry->protection = VM_PROT_DEFAULT;
1347 new_entry->max_protection = VM_PROT_ALL;
1348 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1349 new_entry->wired_count = 0;
1350 new_entry->user_wired_count = 0;
1351
1352 new_entry->in_transition = FALSE;
1353 new_entry->needs_wakeup = FALSE;
1354 new_entry->no_cache = FALSE;
1355 new_entry->permanent = FALSE;
1356 new_entry->superpage_size = FALSE;
1357 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1358 new_entry->map_aligned = TRUE;
1359 } else {
1360 new_entry->map_aligned = FALSE;
1361 }
1362
1363 new_entry->used_for_jit = 0;
1364
1365 new_entry->alias = 0;
1366 new_entry->zero_wired_pages = FALSE;
1367 new_entry->iokit_acct = FALSE;
1368
1369 VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1370
1371 /*
1372 * Insert the new entry into the list
1373 */
1374
1375 vm_map_store_entry_link(map, entry, new_entry);
1376
1377 map->size += size;
1378
1379 /*
1380 * Update the lookup hint
1381 */
1382 SAVE_HINT_MAP_WRITE(map, new_entry);
1383
1384 *o_entry = new_entry;
1385 return(KERN_SUCCESS);
1386 }
1387
1388 int vm_map_pmap_enter_print = FALSE;
1389 int vm_map_pmap_enter_enable = FALSE;
1390
1391 /*
1392 * Routine: vm_map_pmap_enter [internal only]
1393 *
1394 * Description:
1395 * Force pages from the specified object to be entered into
1396 * the pmap at the specified address if they are present.
1397 * As soon as a page not found in the object the scan ends.
1398 *
1399 * Returns:
1400 * Nothing.
1401 *
1402 * In/out conditions:
1403 * The source map should not be locked on entry.
1404 */
1405 __unused static void
1406 vm_map_pmap_enter(
1407 vm_map_t map,
1408 register vm_map_offset_t addr,
1409 register vm_map_offset_t end_addr,
1410 register vm_object_t object,
1411 vm_object_offset_t offset,
1412 vm_prot_t protection)
1413 {
1414 int type_of_fault;
1415 kern_return_t kr;
1416
1417 if(map->pmap == 0)
1418 return;
1419
1420 while (addr < end_addr) {
1421 register vm_page_t m;
1422
1423
1424 /*
1425 * TODO:
1426 * From vm_map_enter(), we come into this function without the map
1427 * lock held or the object lock held.
1428 * We haven't taken a reference on the object either.
1429 * We should do a proper lookup on the map to make sure
1430 * that things are sane before we go locking objects that
1431 * could have been deallocated from under us.
1432 */
1433
1434 vm_object_lock(object);
1435
1436 m = vm_page_lookup(object, offset);
1437 /*
1438 * ENCRYPTED SWAP:
1439 * The user should never see encrypted data, so do not
1440 * enter an encrypted page in the page table.
1441 */
1442 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1443 m->fictitious ||
1444 (m->unusual && ( m->error || m->restart || m->absent))) {
1445 vm_object_unlock(object);
1446 return;
1447 }
1448
1449 if (vm_map_pmap_enter_print) {
1450 printf("vm_map_pmap_enter:");
1451 printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1452 map, (unsigned long long)addr, object, (unsigned long long)offset);
1453 }
1454 type_of_fault = DBG_CACHE_HIT_FAULT;
1455 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1456 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1457 0, /* XXX need user tag / alias? */
1458 0, /* alternate accounting? */
1459 NULL,
1460 &type_of_fault);
1461
1462 vm_object_unlock(object);
1463
1464 offset += PAGE_SIZE_64;
1465 addr += PAGE_SIZE;
1466 }
1467 }
1468
1469 boolean_t vm_map_pmap_is_empty(
1470 vm_map_t map,
1471 vm_map_offset_t start,
1472 vm_map_offset_t end);
1473 boolean_t vm_map_pmap_is_empty(
1474 vm_map_t map,
1475 vm_map_offset_t start,
1476 vm_map_offset_t end)
1477 {
1478 #ifdef MACHINE_PMAP_IS_EMPTY
1479 return pmap_is_empty(map->pmap, start, end);
1480 #else /* MACHINE_PMAP_IS_EMPTY */
1481 vm_map_offset_t offset;
1482 ppnum_t phys_page;
1483
1484 if (map->pmap == NULL) {
1485 return TRUE;
1486 }
1487
1488 for (offset = start;
1489 offset < end;
1490 offset += PAGE_SIZE) {
1491 phys_page = pmap_find_phys(map->pmap, offset);
1492 if (phys_page) {
1493 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1494 "page %d at 0x%llx\n",
1495 map, (long long)start, (long long)end,
1496 phys_page, (long long)offset);
1497 return FALSE;
1498 }
1499 }
1500 return TRUE;
1501 #endif /* MACHINE_PMAP_IS_EMPTY */
1502 }
1503
1504 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1505 kern_return_t
1506 vm_map_random_address_for_size(
1507 vm_map_t map,
1508 vm_map_offset_t *address,
1509 vm_map_size_t size)
1510 {
1511 kern_return_t kr = KERN_SUCCESS;
1512 int tries = 0;
1513 vm_map_offset_t random_addr = 0;
1514 vm_map_offset_t hole_end;
1515
1516 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL;
1517 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL;
1518 vm_map_size_t vm_hole_size = 0;
1519 vm_map_size_t addr_space_size;
1520
1521 addr_space_size = vm_map_max(map) - vm_map_min(map);
1522
1523 assert(page_aligned(size));
1524
1525 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1526 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1527 random_addr = vm_map_trunc_page(
1528 vm_map_min(map) +(random_addr % addr_space_size),
1529 VM_MAP_PAGE_MASK(map));
1530
1531 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1532 if (prev_entry == vm_map_to_entry(map)) {
1533 next_entry = vm_map_first_entry(map);
1534 } else {
1535 next_entry = prev_entry->vme_next;
1536 }
1537 if (next_entry == vm_map_to_entry(map)) {
1538 hole_end = vm_map_max(map);
1539 } else {
1540 hole_end = next_entry->vme_start;
1541 }
1542 vm_hole_size = hole_end - random_addr;
1543 if (vm_hole_size >= size) {
1544 *address = random_addr;
1545 break;
1546 }
1547 }
1548 tries++;
1549 }
1550
1551 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1552 kr = KERN_NO_SPACE;
1553 }
1554 return kr;
1555 }
1556
1557 /*
1558 * Routine: vm_map_enter
1559 *
1560 * Description:
1561 * Allocate a range in the specified virtual address map.
1562 * The resulting range will refer to memory defined by
1563 * the given memory object and offset into that object.
1564 *
1565 * Arguments are as defined in the vm_map call.
1566 */
1567 int _map_enter_debug = 0;
1568 static unsigned int vm_map_enter_restore_successes = 0;
1569 static unsigned int vm_map_enter_restore_failures = 0;
1570 kern_return_t
1571 vm_map_enter(
1572 vm_map_t map,
1573 vm_map_offset_t *address, /* IN/OUT */
1574 vm_map_size_t size,
1575 vm_map_offset_t mask,
1576 int flags,
1577 vm_object_t object,
1578 vm_object_offset_t offset,
1579 boolean_t needs_copy,
1580 vm_prot_t cur_protection,
1581 vm_prot_t max_protection,
1582 vm_inherit_t inheritance)
1583 {
1584 vm_map_entry_t entry, new_entry;
1585 vm_map_offset_t start, tmp_start, tmp_offset;
1586 vm_map_offset_t end, tmp_end;
1587 vm_map_offset_t tmp2_start, tmp2_end;
1588 vm_map_offset_t step;
1589 kern_return_t result = KERN_SUCCESS;
1590 vm_map_t zap_old_map = VM_MAP_NULL;
1591 vm_map_t zap_new_map = VM_MAP_NULL;
1592 boolean_t map_locked = FALSE;
1593 boolean_t pmap_empty = TRUE;
1594 boolean_t new_mapping_established = FALSE;
1595 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1596 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1597 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1598 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1599 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1600 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1601 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1602 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1603 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1604 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1605 char alias;
1606 vm_map_offset_t effective_min_offset, effective_max_offset;
1607 kern_return_t kr;
1608 boolean_t clear_map_aligned = FALSE;
1609
1610 if (superpage_size) {
1611 switch (superpage_size) {
1612 /*
1613 * Note that the current implementation only supports
1614 * a single size for superpages, SUPERPAGE_SIZE, per
1615 * architecture. As soon as more sizes are supposed
1616 * to be supported, SUPERPAGE_SIZE has to be replaced
1617 * with a lookup of the size depending on superpage_size.
1618 */
1619 #ifdef __x86_64__
1620 case SUPERPAGE_SIZE_ANY:
1621 /* handle it like 2 MB and round up to page size */
1622 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1623 case SUPERPAGE_SIZE_2MB:
1624 break;
1625 #endif
1626 default:
1627 return KERN_INVALID_ARGUMENT;
1628 }
1629 mask = SUPERPAGE_SIZE-1;
1630 if (size & (SUPERPAGE_SIZE-1))
1631 return KERN_INVALID_ARGUMENT;
1632 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */
1633 }
1634
1635
1636
1637 if (is_submap) {
1638 if (purgable) {
1639 /* submaps can not be purgeable */
1640 return KERN_INVALID_ARGUMENT;
1641 }
1642 if (object == VM_OBJECT_NULL) {
1643 /* submaps can not be created lazily */
1644 return KERN_INVALID_ARGUMENT;
1645 }
1646 }
1647 if (flags & VM_FLAGS_ALREADY) {
1648 /*
1649 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1650 * is already present. For it to be meaningul, the requested
1651 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1652 * we shouldn't try and remove what was mapped there first
1653 * (!VM_FLAGS_OVERWRITE).
1654 */
1655 if ((flags & VM_FLAGS_ANYWHERE) ||
1656 (flags & VM_FLAGS_OVERWRITE)) {
1657 return KERN_INVALID_ARGUMENT;
1658 }
1659 }
1660
1661 effective_min_offset = map->min_offset;
1662
1663 if (flags & VM_FLAGS_BEYOND_MAX) {
1664 /*
1665 * Allow an insertion beyond the map's max offset.
1666 */
1667 if (vm_map_is_64bit(map))
1668 effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1669 else
1670 effective_max_offset = 0x00000000FFFFF000ULL;
1671 } else {
1672 effective_max_offset = map->max_offset;
1673 }
1674
1675 if (size == 0 ||
1676 (offset & PAGE_MASK_64) != 0) {
1677 *address = 0;
1678 return KERN_INVALID_ARGUMENT;
1679 }
1680
1681 VM_GET_FLAGS_ALIAS(flags, alias);
1682
1683 #define RETURN(value) { result = value; goto BailOut; }
1684
1685 assert(page_aligned(*address));
1686 assert(page_aligned(size));
1687
1688 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1689 /*
1690 * In most cases, the caller rounds the size up to the
1691 * map's page size.
1692 * If we get a size that is explicitly not map-aligned here,
1693 * we'll have to respect the caller's wish and mark the
1694 * mapping as "not map-aligned" to avoid tripping the
1695 * map alignment checks later.
1696 */
1697 clear_map_aligned = TRUE;
1698 }
1699 if (!anywhere &&
1700 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1701 /*
1702 * We've been asked to map at a fixed address and that
1703 * address is not aligned to the map's specific alignment.
1704 * The caller should know what it's doing (i.e. most likely
1705 * mapping some fragmented copy map, transferring memory from
1706 * a VM map with a different alignment), so clear map_aligned
1707 * for this new VM map entry and proceed.
1708 */
1709 clear_map_aligned = TRUE;
1710 }
1711
1712 /*
1713 * Only zero-fill objects are allowed to be purgable.
1714 * LP64todo - limit purgable objects to 32-bits for now
1715 */
1716 if (purgable &&
1717 (offset != 0 ||
1718 (object != VM_OBJECT_NULL &&
1719 (object->vo_size != size ||
1720 object->purgable == VM_PURGABLE_DENY))
1721 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1722 return KERN_INVALID_ARGUMENT;
1723
1724 if (!anywhere && overwrite) {
1725 /*
1726 * Create a temporary VM map to hold the old mappings in the
1727 * affected area while we create the new one.
1728 * This avoids releasing the VM map lock in
1729 * vm_map_entry_delete() and allows atomicity
1730 * when we want to replace some mappings with a new one.
1731 * It also allows us to restore the old VM mappings if the
1732 * new mapping fails.
1733 */
1734 zap_old_map = vm_map_create(PMAP_NULL,
1735 *address,
1736 *address + size,
1737 map->hdr.entries_pageable);
1738 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1739 }
1740
1741 StartAgain: ;
1742
1743 start = *address;
1744
1745 if (anywhere) {
1746 vm_map_lock(map);
1747 map_locked = TRUE;
1748
1749 if (entry_for_jit) {
1750 if (map->jit_entry_exists) {
1751 result = KERN_INVALID_ARGUMENT;
1752 goto BailOut;
1753 }
1754 /*
1755 * Get a random start address.
1756 */
1757 result = vm_map_random_address_for_size(map, address, size);
1758 if (result != KERN_SUCCESS) {
1759 goto BailOut;
1760 }
1761 start = *address;
1762 }
1763
1764
1765 /*
1766 * Calculate the first possible address.
1767 */
1768
1769 if (start < effective_min_offset)
1770 start = effective_min_offset;
1771 if (start > effective_max_offset)
1772 RETURN(KERN_NO_SPACE);
1773
1774 /*
1775 * Look for the first possible address;
1776 * if there's already something at this
1777 * address, we have to start after it.
1778 */
1779
1780 if( map->disable_vmentry_reuse == TRUE) {
1781 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1782 } else {
1783 assert(first_free_is_valid(map));
1784
1785 entry = map->first_free;
1786
1787 if (entry == vm_map_to_entry(map)) {
1788 entry = NULL;
1789 } else {
1790 if (entry->vme_next == vm_map_to_entry(map)){
1791 /*
1792 * Hole at the end of the map.
1793 */
1794 entry = NULL;
1795 } else {
1796 if (start < (entry->vme_next)->vme_start ) {
1797 start = entry->vme_end;
1798 start = vm_map_round_page(start,
1799 VM_MAP_PAGE_MASK(map));
1800 } else {
1801 /*
1802 * Need to do a lookup.
1803 */
1804 entry = NULL;
1805 }
1806 }
1807 }
1808
1809 if (entry == NULL) {
1810 vm_map_entry_t tmp_entry;
1811 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1812 assert(!entry_for_jit);
1813 start = tmp_entry->vme_end;
1814 start = vm_map_round_page(start,
1815 VM_MAP_PAGE_MASK(map));
1816 }
1817 entry = tmp_entry;
1818 }
1819 }
1820
1821 /*
1822 * In any case, the "entry" always precedes
1823 * the proposed new region throughout the
1824 * loop:
1825 */
1826
1827 while (TRUE) {
1828 register vm_map_entry_t next;
1829
1830 /*
1831 * Find the end of the proposed new region.
1832 * Be sure we didn't go beyond the end, or
1833 * wrap around the address.
1834 */
1835
1836 end = ((start + mask) & ~mask);
1837 end = vm_map_round_page(end,
1838 VM_MAP_PAGE_MASK(map));
1839 if (end < start)
1840 RETURN(KERN_NO_SPACE);
1841 start = end;
1842 assert(VM_MAP_PAGE_ALIGNED(start,
1843 VM_MAP_PAGE_MASK(map)));
1844 end += size;
1845
1846 if ((end > effective_max_offset) || (end < start)) {
1847 if (map->wait_for_space) {
1848 assert(!keep_map_locked);
1849 if (size <= (effective_max_offset -
1850 effective_min_offset)) {
1851 assert_wait((event_t)map,
1852 THREAD_ABORTSAFE);
1853 vm_map_unlock(map);
1854 map_locked = FALSE;
1855 thread_block(THREAD_CONTINUE_NULL);
1856 goto StartAgain;
1857 }
1858 }
1859 RETURN(KERN_NO_SPACE);
1860 }
1861
1862 /*
1863 * If there are no more entries, we must win.
1864 */
1865
1866 next = entry->vme_next;
1867 if (next == vm_map_to_entry(map))
1868 break;
1869
1870 /*
1871 * If there is another entry, it must be
1872 * after the end of the potential new region.
1873 */
1874
1875 if (next->vme_start >= end)
1876 break;
1877
1878 /*
1879 * Didn't fit -- move to the next entry.
1880 */
1881
1882 entry = next;
1883 start = entry->vme_end;
1884 start = vm_map_round_page(start,
1885 VM_MAP_PAGE_MASK(map));
1886 }
1887 *address = start;
1888 assert(VM_MAP_PAGE_ALIGNED(*address,
1889 VM_MAP_PAGE_MASK(map)));
1890 } else {
1891 /*
1892 * Verify that:
1893 * the address doesn't itself violate
1894 * the mask requirement.
1895 */
1896
1897 vm_map_lock(map);
1898 map_locked = TRUE;
1899 if ((start & mask) != 0)
1900 RETURN(KERN_NO_SPACE);
1901
1902 /*
1903 * ... the address is within bounds
1904 */
1905
1906 end = start + size;
1907
1908 if ((start < effective_min_offset) ||
1909 (end > effective_max_offset) ||
1910 (start >= end)) {
1911 RETURN(KERN_INVALID_ADDRESS);
1912 }
1913
1914 if (overwrite && zap_old_map != VM_MAP_NULL) {
1915 /*
1916 * Fixed mapping and "overwrite" flag: attempt to
1917 * remove all existing mappings in the specified
1918 * address range, saving them in our "zap_old_map".
1919 */
1920 (void) vm_map_delete(map, start, end,
1921 (VM_MAP_REMOVE_SAVE_ENTRIES |
1922 VM_MAP_REMOVE_NO_MAP_ALIGN),
1923 zap_old_map);
1924 }
1925
1926 /*
1927 * ... the starting address isn't allocated
1928 */
1929
1930 if (vm_map_lookup_entry(map, start, &entry)) {
1931 if (! (flags & VM_FLAGS_ALREADY)) {
1932 RETURN(KERN_NO_SPACE);
1933 }
1934 /*
1935 * Check if what's already there is what we want.
1936 */
1937 tmp_start = start;
1938 tmp_offset = offset;
1939 if (entry->vme_start < start) {
1940 tmp_start -= start - entry->vme_start;
1941 tmp_offset -= start - entry->vme_start;
1942
1943 }
1944 for (; entry->vme_start < end;
1945 entry = entry->vme_next) {
1946 /*
1947 * Check if the mapping's attributes
1948 * match the existing map entry.
1949 */
1950 if (entry == vm_map_to_entry(map) ||
1951 entry->vme_start != tmp_start ||
1952 entry->is_sub_map != is_submap ||
1953 entry->offset != tmp_offset ||
1954 entry->needs_copy != needs_copy ||
1955 entry->protection != cur_protection ||
1956 entry->max_protection != max_protection ||
1957 entry->inheritance != inheritance ||
1958 entry->iokit_acct != iokit_acct ||
1959 entry->alias != alias) {
1960 /* not the same mapping ! */
1961 RETURN(KERN_NO_SPACE);
1962 }
1963 /*
1964 * Check if the same object is being mapped.
1965 */
1966 if (is_submap) {
1967 if (entry->object.sub_map !=
1968 (vm_map_t) object) {
1969 /* not the same submap */
1970 RETURN(KERN_NO_SPACE);
1971 }
1972 } else {
1973 if (entry->object.vm_object != object) {
1974 /* not the same VM object... */
1975 vm_object_t obj2;
1976
1977 obj2 = entry->object.vm_object;
1978 if ((obj2 == VM_OBJECT_NULL ||
1979 obj2->internal) &&
1980 (object == VM_OBJECT_NULL ||
1981 object->internal)) {
1982 /*
1983 * ... but both are
1984 * anonymous memory,
1985 * so equivalent.
1986 */
1987 } else {
1988 RETURN(KERN_NO_SPACE);
1989 }
1990 }
1991 }
1992
1993 tmp_offset += entry->vme_end - entry->vme_start;
1994 tmp_start += entry->vme_end - entry->vme_start;
1995 if (entry->vme_end >= end) {
1996 /* reached the end of our mapping */
1997 break;
1998 }
1999 }
2000 /* it all matches: let's use what's already there ! */
2001 RETURN(KERN_MEMORY_PRESENT);
2002 }
2003
2004 /*
2005 * ... the next region doesn't overlap the
2006 * end point.
2007 */
2008
2009 if ((entry->vme_next != vm_map_to_entry(map)) &&
2010 (entry->vme_next->vme_start < end))
2011 RETURN(KERN_NO_SPACE);
2012 }
2013
2014 /*
2015 * At this point,
2016 * "start" and "end" should define the endpoints of the
2017 * available new range, and
2018 * "entry" should refer to the region before the new
2019 * range, and
2020 *
2021 * the map should be locked.
2022 */
2023
2024 /*
2025 * See whether we can avoid creating a new entry (and object) by
2026 * extending one of our neighbors. [So far, we only attempt to
2027 * extend from below.] Note that we can never extend/join
2028 * purgable objects because they need to remain distinct
2029 * entities in order to implement their "volatile object"
2030 * semantics.
2031 */
2032
2033 if (purgable || entry_for_jit) {
2034 if (object == VM_OBJECT_NULL) {
2035 object = vm_object_allocate(size);
2036 object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2037 object->true_share = TRUE;
2038 if (purgable) {
2039 task_t owner;
2040 object->purgable = VM_PURGABLE_NONVOLATILE;
2041 if (map->pmap == kernel_pmap) {
2042 /*
2043 * Purgeable mappings made in a kernel
2044 * map are "owned" by the kernel itself
2045 * rather than the current user task
2046 * because they're likely to be used by
2047 * more than this user task (see
2048 * execargs_purgeable_allocate(), for
2049 * example).
2050 */
2051 owner = kernel_task;
2052 } else {
2053 owner = current_task();
2054 }
2055 assert(object->vo_purgeable_owner == NULL);
2056 assert(object->resident_page_count == 0);
2057 assert(object->wired_page_count == 0);
2058 vm_object_lock(object);
2059 vm_purgeable_nonvolatile_enqueue(object, owner);
2060 vm_object_unlock(object);
2061 }
2062 offset = (vm_object_offset_t)0;
2063 }
2064 } else if ((is_submap == FALSE) &&
2065 (object == VM_OBJECT_NULL) &&
2066 (entry != vm_map_to_entry(map)) &&
2067 (entry->vme_end == start) &&
2068 (!entry->is_shared) &&
2069 (!entry->is_sub_map) &&
2070 (!entry->in_transition) &&
2071 (!entry->needs_wakeup) &&
2072 (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2073 (entry->protection == cur_protection) &&
2074 (entry->max_protection == max_protection) &&
2075 (entry->inheritance == inheritance) &&
2076 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2077 (entry->no_cache == no_cache) &&
2078 (entry->permanent == permanent) &&
2079 (!entry->superpage_size && !superpage_size) &&
2080 /*
2081 * No coalescing if not map-aligned, to avoid propagating
2082 * that condition any further than needed:
2083 */
2084 (!entry->map_aligned || !clear_map_aligned) &&
2085 (!entry->zero_wired_pages) &&
2086 (!entry->used_for_jit && !entry_for_jit) &&
2087 (entry->iokit_acct == iokit_acct) &&
2088
2089 ((entry->vme_end - entry->vme_start) + size <=
2090 (alias == VM_MEMORY_REALLOC ?
2091 ANON_CHUNK_SIZE :
2092 NO_COALESCE_LIMIT)) &&
2093
2094 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2095 if (vm_object_coalesce(entry->object.vm_object,
2096 VM_OBJECT_NULL,
2097 entry->offset,
2098 (vm_object_offset_t) 0,
2099 (vm_map_size_t)(entry->vme_end - entry->vme_start),
2100 (vm_map_size_t)(end - entry->vme_end))) {
2101
2102 /*
2103 * Coalesced the two objects - can extend
2104 * the previous map entry to include the
2105 * new range.
2106 */
2107 map->size += (end - entry->vme_end);
2108 assert(entry->vme_start < end);
2109 assert(VM_MAP_PAGE_ALIGNED(end,
2110 VM_MAP_PAGE_MASK(map)));
2111 entry->vme_end = end;
2112 vm_map_store_update_first_free(map, map->first_free);
2113 new_mapping_established = TRUE;
2114 RETURN(KERN_SUCCESS);
2115 }
2116 }
2117
2118 step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2119 new_entry = NULL;
2120
2121 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2122 tmp2_end = tmp2_start + step;
2123 /*
2124 * Create a new entry
2125 * LP64todo - for now, we can only allocate 4GB internal objects
2126 * because the default pager can't page bigger ones. Remove this
2127 * when it can.
2128 *
2129 * XXX FBDP
2130 * The reserved "page zero" in each process's address space can
2131 * be arbitrarily large. Splitting it into separate 4GB objects and
2132 * therefore different VM map entries serves no purpose and just
2133 * slows down operations on the VM map, so let's not split the
2134 * allocation into 4GB chunks if the max protection is NONE. That
2135 * memory should never be accessible, so it will never get to the
2136 * default pager.
2137 */
2138 tmp_start = tmp2_start;
2139 if (object == VM_OBJECT_NULL &&
2140 size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2141 max_protection != VM_PROT_NONE &&
2142 superpage_size == 0)
2143 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2144 else
2145 tmp_end = tmp2_end;
2146 do {
2147 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2148 object, offset, needs_copy,
2149 FALSE, FALSE,
2150 cur_protection, max_protection,
2151 VM_BEHAVIOR_DEFAULT,
2152 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2153 0, no_cache,
2154 permanent,
2155 superpage_size,
2156 clear_map_aligned,
2157 is_submap);
2158 new_entry->alias = alias;
2159 if (entry_for_jit){
2160 if (!(map->jit_entry_exists)){
2161 new_entry->used_for_jit = TRUE;
2162 map->jit_entry_exists = TRUE;
2163 }
2164 }
2165
2166 assert(!new_entry->iokit_acct);
2167 if (!is_submap &&
2168 object != VM_OBJECT_NULL &&
2169 object->purgable != VM_PURGABLE_DENY) {
2170 assert(new_entry->use_pmap);
2171 assert(!new_entry->iokit_acct);
2172 /*
2173 * Turn off pmap accounting since
2174 * purgeable objects have their
2175 * own ledgers.
2176 */
2177 new_entry->use_pmap = FALSE;
2178 } else if (!is_submap &&
2179 iokit_acct) {
2180 /* alternate accounting */
2181 assert(!new_entry->iokit_acct);
2182 assert(new_entry->use_pmap);
2183 new_entry->iokit_acct = TRUE;
2184 new_entry->use_pmap = FALSE;
2185 vm_map_iokit_mapped_region(
2186 map,
2187 (new_entry->vme_end -
2188 new_entry->vme_start));
2189 } else if (!is_submap) {
2190 assert(!new_entry->iokit_acct);
2191 assert(new_entry->use_pmap);
2192 }
2193
2194 if (is_submap) {
2195 vm_map_t submap;
2196 boolean_t submap_is_64bit;
2197 boolean_t use_pmap;
2198
2199 assert(new_entry->is_sub_map);
2200 assert(!new_entry->use_pmap);
2201 assert(!new_entry->iokit_acct);
2202 submap = (vm_map_t) object;
2203 submap_is_64bit = vm_map_is_64bit(submap);
2204 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2205 #ifndef NO_NESTED_PMAP
2206 if (use_pmap && submap->pmap == NULL) {
2207 ledger_t ledger = map->pmap->ledger;
2208 /* we need a sub pmap to nest... */
2209 submap->pmap = pmap_create(ledger, 0,
2210 submap_is_64bit);
2211 if (submap->pmap == NULL) {
2212 /* let's proceed without nesting... */
2213 }
2214 }
2215 if (use_pmap && submap->pmap != NULL) {
2216 kr = pmap_nest(map->pmap,
2217 submap->pmap,
2218 tmp_start,
2219 tmp_start,
2220 tmp_end - tmp_start);
2221 if (kr != KERN_SUCCESS) {
2222 printf("vm_map_enter: "
2223 "pmap_nest(0x%llx,0x%llx) "
2224 "error 0x%x\n",
2225 (long long)tmp_start,
2226 (long long)tmp_end,
2227 kr);
2228 } else {
2229 /* we're now nested ! */
2230 new_entry->use_pmap = TRUE;
2231 pmap_empty = FALSE;
2232 }
2233 }
2234 #endif /* NO_NESTED_PMAP */
2235 }
2236 entry = new_entry;
2237
2238 if (superpage_size) {
2239 vm_page_t pages, m;
2240 vm_object_t sp_object;
2241
2242 entry->offset = 0;
2243
2244 /* allocate one superpage */
2245 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2246 if (kr != KERN_SUCCESS) {
2247 new_mapping_established = TRUE; /* will cause deallocation of whole range */
2248 RETURN(kr);
2249 }
2250
2251 /* create one vm_object per superpage */
2252 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2253 sp_object->phys_contiguous = TRUE;
2254 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2255 entry->object.vm_object = sp_object;
2256 assert(entry->use_pmap);
2257
2258 /* enter the base pages into the object */
2259 vm_object_lock(sp_object);
2260 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2261 m = pages;
2262 pmap_zero_page(m->phys_page);
2263 pages = NEXT_PAGE(m);
2264 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2265 vm_page_insert(m, sp_object, offset);
2266 }
2267 vm_object_unlock(sp_object);
2268 }
2269 } while (tmp_end != tmp2_end &&
2270 (tmp_start = tmp_end) &&
2271 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2272 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2273 }
2274
2275 new_mapping_established = TRUE;
2276
2277 BailOut:
2278 assert(map_locked == TRUE);
2279
2280 if (result == KERN_SUCCESS) {
2281 vm_prot_t pager_prot;
2282 memory_object_t pager;
2283
2284 #if DEBUG
2285 if (pmap_empty &&
2286 !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2287 assert(vm_map_pmap_is_empty(map,
2288 *address,
2289 *address+size));
2290 }
2291 #endif /* DEBUG */
2292
2293 /*
2294 * For "named" VM objects, let the pager know that the
2295 * memory object is being mapped. Some pagers need to keep
2296 * track of this, to know when they can reclaim the memory
2297 * object, for example.
2298 * VM calls memory_object_map() for each mapping (specifying
2299 * the protection of each mapping) and calls
2300 * memory_object_last_unmap() when all the mappings are gone.
2301 */
2302 pager_prot = max_protection;
2303 if (needs_copy) {
2304 /*
2305 * Copy-On-Write mapping: won't modify
2306 * the memory object.
2307 */
2308 pager_prot &= ~VM_PROT_WRITE;
2309 }
2310 if (!is_submap &&
2311 object != VM_OBJECT_NULL &&
2312 object->named &&
2313 object->pager != MEMORY_OBJECT_NULL) {
2314 vm_object_lock(object);
2315 pager = object->pager;
2316 if (object->named &&
2317 pager != MEMORY_OBJECT_NULL) {
2318 assert(object->pager_ready);
2319 vm_object_mapping_wait(object, THREAD_UNINT);
2320 vm_object_mapping_begin(object);
2321 vm_object_unlock(object);
2322
2323 kr = memory_object_map(pager, pager_prot);
2324 assert(kr == KERN_SUCCESS);
2325
2326 vm_object_lock(object);
2327 vm_object_mapping_end(object);
2328 }
2329 vm_object_unlock(object);
2330 }
2331 }
2332
2333 assert(map_locked == TRUE);
2334
2335 if (!keep_map_locked) {
2336 vm_map_unlock(map);
2337 map_locked = FALSE;
2338 }
2339
2340 /*
2341 * We can't hold the map lock if we enter this block.
2342 */
2343
2344 if (result == KERN_SUCCESS) {
2345
2346 /* Wire down the new entry if the user
2347 * requested all new map entries be wired.
2348 */
2349 if ((map->wiring_required)||(superpage_size)) {
2350 assert(!keep_map_locked);
2351 pmap_empty = FALSE; /* pmap won't be empty */
2352 kr = vm_map_wire(map, start, end,
2353 new_entry->protection, TRUE);
2354 result = kr;
2355 }
2356
2357 }
2358
2359 if (result != KERN_SUCCESS) {
2360 if (new_mapping_established) {
2361 /*
2362 * We have to get rid of the new mappings since we
2363 * won't make them available to the user.
2364 * Try and do that atomically, to minimize the risk
2365 * that someone else create new mappings that range.
2366 */
2367 zap_new_map = vm_map_create(PMAP_NULL,
2368 *address,
2369 *address + size,
2370 map->hdr.entries_pageable);
2371 vm_map_set_page_shift(zap_new_map,
2372 VM_MAP_PAGE_SHIFT(map));
2373 if (!map_locked) {
2374 vm_map_lock(map);
2375 map_locked = TRUE;
2376 }
2377 (void) vm_map_delete(map, *address, *address+size,
2378 (VM_MAP_REMOVE_SAVE_ENTRIES |
2379 VM_MAP_REMOVE_NO_MAP_ALIGN),
2380 zap_new_map);
2381 }
2382 if (zap_old_map != VM_MAP_NULL &&
2383 zap_old_map->hdr.nentries != 0) {
2384 vm_map_entry_t entry1, entry2;
2385
2386 /*
2387 * The new mapping failed. Attempt to restore
2388 * the old mappings, saved in the "zap_old_map".
2389 */
2390 if (!map_locked) {
2391 vm_map_lock(map);
2392 map_locked = TRUE;
2393 }
2394
2395 /* first check if the coast is still clear */
2396 start = vm_map_first_entry(zap_old_map)->vme_start;
2397 end = vm_map_last_entry(zap_old_map)->vme_end;
2398 if (vm_map_lookup_entry(map, start, &entry1) ||
2399 vm_map_lookup_entry(map, end, &entry2) ||
2400 entry1 != entry2) {
2401 /*
2402 * Part of that range has already been
2403 * re-mapped: we can't restore the old
2404 * mappings...
2405 */
2406 vm_map_enter_restore_failures++;
2407 } else {
2408 /*
2409 * Transfer the saved map entries from
2410 * "zap_old_map" to the original "map",
2411 * inserting them all after "entry1".
2412 */
2413 for (entry2 = vm_map_first_entry(zap_old_map);
2414 entry2 != vm_map_to_entry(zap_old_map);
2415 entry2 = vm_map_first_entry(zap_old_map)) {
2416 vm_map_size_t entry_size;
2417
2418 entry_size = (entry2->vme_end -
2419 entry2->vme_start);
2420 vm_map_store_entry_unlink(zap_old_map,
2421 entry2);
2422 zap_old_map->size -= entry_size;
2423 vm_map_store_entry_link(map, entry1, entry2);
2424 map->size += entry_size;
2425 entry1 = entry2;
2426 }
2427 if (map->wiring_required) {
2428 /*
2429 * XXX TODO: we should rewire the
2430 * old pages here...
2431 */
2432 }
2433 vm_map_enter_restore_successes++;
2434 }
2435 }
2436 }
2437
2438 /*
2439 * The caller is responsible for releasing the lock if it requested to
2440 * keep the map locked.
2441 */
2442 if (map_locked && !keep_map_locked) {
2443 vm_map_unlock(map);
2444 }
2445
2446 /*
2447 * Get rid of the "zap_maps" and all the map entries that
2448 * they may still contain.
2449 */
2450 if (zap_old_map != VM_MAP_NULL) {
2451 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2452 zap_old_map = VM_MAP_NULL;
2453 }
2454 if (zap_new_map != VM_MAP_NULL) {
2455 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2456 zap_new_map = VM_MAP_NULL;
2457 }
2458
2459 return result;
2460
2461 #undef RETURN
2462 }
2463
2464 /*
2465 * Counters for the prefault optimization.
2466 */
2467 int64_t vm_prefault_nb_pages = 0;
2468 int64_t vm_prefault_nb_bailout = 0;
2469
2470 static kern_return_t
2471 vm_map_enter_mem_object_helper(
2472 vm_map_t target_map,
2473 vm_map_offset_t *address,
2474 vm_map_size_t initial_size,
2475 vm_map_offset_t mask,
2476 int flags,
2477 ipc_port_t port,
2478 vm_object_offset_t offset,
2479 boolean_t copy,
2480 vm_prot_t cur_protection,
2481 vm_prot_t max_protection,
2482 vm_inherit_t inheritance,
2483 upl_page_list_ptr_t page_list,
2484 unsigned int page_list_count)
2485 {
2486 vm_map_address_t map_addr;
2487 vm_map_size_t map_size;
2488 vm_object_t object;
2489 vm_object_size_t size;
2490 kern_return_t result;
2491 boolean_t mask_cur_protection, mask_max_protection;
2492 boolean_t try_prefault = (page_list_count != 0);
2493 vm_map_offset_t offset_in_mapping;
2494
2495 mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2496 mask_max_protection = max_protection & VM_PROT_IS_MASK;
2497 cur_protection &= ~VM_PROT_IS_MASK;
2498 max_protection &= ~VM_PROT_IS_MASK;
2499
2500 /*
2501 * Check arguments for validity
2502 */
2503 if ((target_map == VM_MAP_NULL) ||
2504 (cur_protection & ~VM_PROT_ALL) ||
2505 (max_protection & ~VM_PROT_ALL) ||
2506 (inheritance > VM_INHERIT_LAST_VALID) ||
2507 (try_prefault && (copy || !page_list)) ||
2508 initial_size == 0)
2509 return KERN_INVALID_ARGUMENT;
2510
2511 map_addr = vm_map_trunc_page(*address,
2512 VM_MAP_PAGE_MASK(target_map));
2513 map_size = vm_map_round_page(initial_size,
2514 VM_MAP_PAGE_MASK(target_map));
2515 size = vm_object_round_page(initial_size);
2516
2517 /*
2518 * Find the vm object (if any) corresponding to this port.
2519 */
2520 if (!IP_VALID(port)) {
2521 object = VM_OBJECT_NULL;
2522 offset = 0;
2523 copy = FALSE;
2524 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2525 vm_named_entry_t named_entry;
2526
2527 named_entry = (vm_named_entry_t) port->ip_kobject;
2528
2529 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2530 offset += named_entry->data_offset;
2531 }
2532
2533 /* a few checks to make sure user is obeying rules */
2534 if (size == 0) {
2535 if (offset >= named_entry->size)
2536 return KERN_INVALID_RIGHT;
2537 size = named_entry->size - offset;
2538 }
2539 if (mask_max_protection) {
2540 max_protection &= named_entry->protection;
2541 }
2542 if (mask_cur_protection) {
2543 cur_protection &= named_entry->protection;
2544 }
2545 if ((named_entry->protection & max_protection) !=
2546 max_protection)
2547 return KERN_INVALID_RIGHT;
2548 if ((named_entry->protection & cur_protection) !=
2549 cur_protection)
2550 return KERN_INVALID_RIGHT;
2551 if (offset + size < offset) {
2552 /* overflow */
2553 return KERN_INVALID_ARGUMENT;
2554 }
2555 if (named_entry->size < (offset + size))
2556 return KERN_INVALID_ARGUMENT;
2557
2558 if (named_entry->is_copy) {
2559 /* for a vm_map_copy, we can only map it whole */
2560 if ((size != named_entry->size) &&
2561 (vm_map_round_page(size,
2562 VM_MAP_PAGE_MASK(target_map)) ==
2563 named_entry->size)) {
2564 /* XXX FBDP use the rounded size... */
2565 size = vm_map_round_page(
2566 size,
2567 VM_MAP_PAGE_MASK(target_map));
2568 }
2569
2570 if (!(flags & VM_FLAGS_ANYWHERE) &&
2571 (offset != 0 ||
2572 size != named_entry->size)) {
2573 /*
2574 * XXX for a mapping at a "fixed" address,
2575 * we can't trim after mapping the whole
2576 * memory entry, so reject a request for a
2577 * partial mapping.
2578 */
2579 return KERN_INVALID_ARGUMENT;
2580 }
2581 }
2582
2583 /* the callers parameter offset is defined to be the */
2584 /* offset from beginning of named entry offset in object */
2585 offset = offset + named_entry->offset;
2586
2587 if (! VM_MAP_PAGE_ALIGNED(size,
2588 VM_MAP_PAGE_MASK(target_map))) {
2589 /*
2590 * Let's not map more than requested;
2591 * vm_map_enter() will handle this "not map-aligned"
2592 * case.
2593 */
2594 map_size = size;
2595 }
2596
2597 named_entry_lock(named_entry);
2598 if (named_entry->is_sub_map) {
2599 vm_map_t submap;
2600
2601 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2602 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2603 }
2604
2605 submap = named_entry->backing.map;
2606 vm_map_lock(submap);
2607 vm_map_reference(submap);
2608 vm_map_unlock(submap);
2609 named_entry_unlock(named_entry);
2610
2611 result = vm_map_enter(target_map,
2612 &map_addr,
2613 map_size,
2614 mask,
2615 flags | VM_FLAGS_SUBMAP,
2616 (vm_object_t) submap,
2617 offset,
2618 copy,
2619 cur_protection,
2620 max_protection,
2621 inheritance);
2622 if (result != KERN_SUCCESS) {
2623 vm_map_deallocate(submap);
2624 } else {
2625 /*
2626 * No need to lock "submap" just to check its
2627 * "mapped" flag: that flag is never reset
2628 * once it's been set and if we race, we'll
2629 * just end up setting it twice, which is OK.
2630 */
2631 if (submap->mapped_in_other_pmaps == FALSE &&
2632 vm_map_pmap(submap) != PMAP_NULL &&
2633 vm_map_pmap(submap) !=
2634 vm_map_pmap(target_map)) {
2635 /*
2636 * This submap is being mapped in a map
2637 * that uses a different pmap.
2638 * Set its "mapped_in_other_pmaps" flag
2639 * to indicate that we now need to
2640 * remove mappings from all pmaps rather
2641 * than just the submap's pmap.
2642 */
2643 vm_map_lock(submap);
2644 submap->mapped_in_other_pmaps = TRUE;
2645 vm_map_unlock(submap);
2646 }
2647 *address = map_addr;
2648 }
2649 return result;
2650
2651 } else if (named_entry->is_pager) {
2652 unsigned int access;
2653 vm_prot_t protections;
2654 unsigned int wimg_mode;
2655
2656 protections = named_entry->protection & VM_PROT_ALL;
2657 access = GET_MAP_MEM(named_entry->protection);
2658
2659 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2660 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2661 }
2662
2663 object = vm_object_enter(named_entry->backing.pager,
2664 named_entry->size,
2665 named_entry->internal,
2666 FALSE,
2667 FALSE);
2668 if (object == VM_OBJECT_NULL) {
2669 named_entry_unlock(named_entry);
2670 return KERN_INVALID_OBJECT;
2671 }
2672
2673 /* JMM - drop reference on pager here */
2674
2675 /* create an extra ref for the named entry */
2676 vm_object_lock(object);
2677 vm_object_reference_locked(object);
2678 named_entry->backing.object = object;
2679 named_entry->is_pager = FALSE;
2680 named_entry_unlock(named_entry);
2681
2682 wimg_mode = object->wimg_bits;
2683
2684 if (access == MAP_MEM_IO) {
2685 wimg_mode = VM_WIMG_IO;
2686 } else if (access == MAP_MEM_COPYBACK) {
2687 wimg_mode = VM_WIMG_USE_DEFAULT;
2688 } else if (access == MAP_MEM_INNERWBACK) {
2689 wimg_mode = VM_WIMG_INNERWBACK;
2690 } else if (access == MAP_MEM_WTHRU) {
2691 wimg_mode = VM_WIMG_WTHRU;
2692 } else if (access == MAP_MEM_WCOMB) {
2693 wimg_mode = VM_WIMG_WCOMB;
2694 }
2695
2696 /* wait for object (if any) to be ready */
2697 if (!named_entry->internal) {
2698 while (!object->pager_ready) {
2699 vm_object_wait(
2700 object,
2701 VM_OBJECT_EVENT_PAGER_READY,
2702 THREAD_UNINT);
2703 vm_object_lock(object);
2704 }
2705 }
2706
2707 if (object->wimg_bits != wimg_mode)
2708 vm_object_change_wimg_mode(object, wimg_mode);
2709
2710 #if VM_OBJECT_TRACKING_OP_TRUESHARE
2711 if (!object->true_share &&
2712 vm_object_tracking_inited) {
2713 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
2714 int num = 0;
2715
2716 num = OSBacktrace(bt,
2717 VM_OBJECT_TRACKING_BTDEPTH);
2718 btlog_add_entry(vm_object_tracking_btlog,
2719 object,
2720 VM_OBJECT_TRACKING_OP_TRUESHARE,
2721 bt,
2722 num);
2723 }
2724 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
2725
2726 object->true_share = TRUE;
2727
2728 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2729 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2730 vm_object_unlock(object);
2731
2732 } else if (named_entry->is_copy) {
2733 kern_return_t kr;
2734 vm_map_copy_t copy_map;
2735 vm_map_entry_t copy_entry;
2736 vm_map_offset_t copy_addr;
2737
2738 if (flags & ~(VM_FLAGS_FIXED |
2739 VM_FLAGS_ANYWHERE |
2740 VM_FLAGS_OVERWRITE |
2741 VM_FLAGS_RETURN_DATA_ADDR)) {
2742 named_entry_unlock(named_entry);
2743 return KERN_INVALID_ARGUMENT;
2744 }
2745
2746 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2747 offset_in_mapping = offset - vm_object_trunc_page(offset);
2748 offset = vm_object_trunc_page(offset);
2749 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2750 }
2751
2752 copy_map = named_entry->backing.copy;
2753 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2754 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2755 /* unsupported type; should not happen */
2756 printf("vm_map_enter_mem_object: "
2757 "memory_entry->backing.copy "
2758 "unsupported type 0x%x\n",
2759 copy_map->type);
2760 named_entry_unlock(named_entry);
2761 return KERN_INVALID_ARGUMENT;
2762 }
2763
2764 /* reserve a contiguous range */
2765 kr = vm_map_enter(target_map,
2766 &map_addr,
2767 /* map whole mem entry, trim later: */
2768 named_entry->size,
2769 mask,
2770 flags & (VM_FLAGS_ANYWHERE |
2771 VM_FLAGS_OVERWRITE |
2772 VM_FLAGS_RETURN_DATA_ADDR),
2773 VM_OBJECT_NULL,
2774 0,
2775 FALSE, /* copy */
2776 cur_protection,
2777 max_protection,
2778 inheritance);
2779 if (kr != KERN_SUCCESS) {
2780 named_entry_unlock(named_entry);
2781 return kr;
2782 }
2783
2784 copy_addr = map_addr;
2785
2786 for (copy_entry = vm_map_copy_first_entry(copy_map);
2787 copy_entry != vm_map_copy_to_entry(copy_map);
2788 copy_entry = copy_entry->vme_next) {
2789 int remap_flags = 0;
2790 vm_map_t copy_submap;
2791 vm_object_t copy_object;
2792 vm_map_size_t copy_size;
2793 vm_object_offset_t copy_offset;
2794
2795 copy_offset = copy_entry->offset;
2796 copy_size = (copy_entry->vme_end -
2797 copy_entry->vme_start);
2798
2799 /* sanity check */
2800 if ((copy_addr + copy_size) >
2801 (map_addr +
2802 named_entry->size /* XXX full size */ )) {
2803 /* over-mapping too much !? */
2804 kr = KERN_INVALID_ARGUMENT;
2805 /* abort */
2806 break;
2807 }
2808
2809 /* take a reference on the object */
2810 if (copy_entry->is_sub_map) {
2811 remap_flags |= VM_FLAGS_SUBMAP;
2812 copy_submap =
2813 copy_entry->object.sub_map;
2814 vm_map_lock(copy_submap);
2815 vm_map_reference(copy_submap);
2816 vm_map_unlock(copy_submap);
2817 copy_object = (vm_object_t) copy_submap;
2818 } else {
2819 copy_object =
2820 copy_entry->object.vm_object;
2821 vm_object_reference(copy_object);
2822 }
2823
2824 /* over-map the object into destination */
2825 remap_flags |= flags;
2826 remap_flags |= VM_FLAGS_FIXED;
2827 remap_flags |= VM_FLAGS_OVERWRITE;
2828 remap_flags &= ~VM_FLAGS_ANYWHERE;
2829 kr = vm_map_enter(target_map,
2830 &copy_addr,
2831 copy_size,
2832 (vm_map_offset_t) 0,
2833 remap_flags,
2834 copy_object,
2835 copy_offset,
2836 copy,
2837 cur_protection,
2838 max_protection,
2839 inheritance);
2840 if (kr != KERN_SUCCESS) {
2841 if (copy_entry->is_sub_map) {
2842 vm_map_deallocate(copy_submap);
2843 } else {
2844 vm_object_deallocate(copy_object);
2845 }
2846 /* abort */
2847 break;
2848 }
2849
2850 /* next mapping */
2851 copy_addr += copy_size;
2852 }
2853
2854 if (kr == KERN_SUCCESS) {
2855 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2856 *address = map_addr + offset_in_mapping;
2857 } else {
2858 *address = map_addr;
2859 }
2860
2861 if (offset) {
2862 /*
2863 * Trim in front, from 0 to "offset".
2864 */
2865 vm_map_remove(target_map,
2866 map_addr,
2867 map_addr + offset,
2868 0);
2869 *address += offset;
2870 }
2871 if (offset + map_size < named_entry->size) {
2872 /*
2873 * Trim in back, from
2874 * "offset + map_size" to
2875 * "named_entry->size".
2876 */
2877 vm_map_remove(target_map,
2878 (map_addr +
2879 offset + map_size),
2880 (map_addr +
2881 named_entry->size),
2882 0);
2883 }
2884 }
2885 named_entry_unlock(named_entry);
2886
2887 if (kr != KERN_SUCCESS) {
2888 if (! (flags & VM_FLAGS_OVERWRITE)) {
2889 /* deallocate the contiguous range */
2890 (void) vm_deallocate(target_map,
2891 map_addr,
2892 map_size);
2893 }
2894 }
2895
2896 return kr;
2897
2898 } else {
2899 /* This is the case where we are going to map */
2900 /* an already mapped object. If the object is */
2901 /* not ready it is internal. An external */
2902 /* object cannot be mapped until it is ready */
2903 /* we can therefore avoid the ready check */
2904 /* in this case. */
2905 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2906 offset_in_mapping = offset - vm_object_trunc_page(offset);
2907 offset = vm_object_trunc_page(offset);
2908 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2909 }
2910
2911 object = named_entry->backing.object;
2912 assert(object != VM_OBJECT_NULL);
2913 named_entry_unlock(named_entry);
2914 vm_object_reference(object);
2915 }
2916 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2917 /*
2918 * JMM - This is temporary until we unify named entries
2919 * and raw memory objects.
2920 *
2921 * Detected fake ip_kotype for a memory object. In
2922 * this case, the port isn't really a port at all, but
2923 * instead is just a raw memory object.
2924 */
2925 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2926 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2927 }
2928
2929 object = vm_object_enter((memory_object_t)port,
2930 size, FALSE, FALSE, FALSE);
2931 if (object == VM_OBJECT_NULL)
2932 return KERN_INVALID_OBJECT;
2933
2934 /* wait for object (if any) to be ready */
2935 if (object != VM_OBJECT_NULL) {
2936 if (object == kernel_object) {
2937 printf("Warning: Attempt to map kernel object"
2938 " by a non-private kernel entity\n");
2939 return KERN_INVALID_OBJECT;
2940 }
2941 if (!object->pager_ready) {
2942 vm_object_lock(object);
2943
2944 while (!object->pager_ready) {
2945 vm_object_wait(object,
2946 VM_OBJECT_EVENT_PAGER_READY,
2947 THREAD_UNINT);
2948 vm_object_lock(object);
2949 }
2950 vm_object_unlock(object);
2951 }
2952 }
2953 } else {
2954 return KERN_INVALID_OBJECT;
2955 }
2956
2957 if (object != VM_OBJECT_NULL &&
2958 object->named &&
2959 object->pager != MEMORY_OBJECT_NULL &&
2960 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2961 memory_object_t pager;
2962 vm_prot_t pager_prot;
2963 kern_return_t kr;
2964
2965 /*
2966 * For "named" VM objects, let the pager know that the
2967 * memory object is being mapped. Some pagers need to keep
2968 * track of this, to know when they can reclaim the memory
2969 * object, for example.
2970 * VM calls memory_object_map() for each mapping (specifying
2971 * the protection of each mapping) and calls
2972 * memory_object_last_unmap() when all the mappings are gone.
2973 */
2974 pager_prot = max_protection;
2975 if (copy) {
2976 /*
2977 * Copy-On-Write mapping: won't modify the
2978 * memory object.
2979 */
2980 pager_prot &= ~VM_PROT_WRITE;
2981 }
2982 vm_object_lock(object);
2983 pager = object->pager;
2984 if (object->named &&
2985 pager != MEMORY_OBJECT_NULL &&
2986 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2987 assert(object->pager_ready);
2988 vm_object_mapping_wait(object, THREAD_UNINT);
2989 vm_object_mapping_begin(object);
2990 vm_object_unlock(object);
2991
2992 kr = memory_object_map(pager, pager_prot);
2993 assert(kr == KERN_SUCCESS);
2994
2995 vm_object_lock(object);
2996 vm_object_mapping_end(object);
2997 }
2998 vm_object_unlock(object);
2999 }
3000
3001 /*
3002 * Perform the copy if requested
3003 */
3004
3005 if (copy) {
3006 vm_object_t new_object;
3007 vm_object_offset_t new_offset;
3008
3009 result = vm_object_copy_strategically(object, offset, size,
3010 &new_object, &new_offset,
3011 &copy);
3012
3013
3014 if (result == KERN_MEMORY_RESTART_COPY) {
3015 boolean_t success;
3016 boolean_t src_needs_copy;
3017
3018 /*
3019 * XXX
3020 * We currently ignore src_needs_copy.
3021 * This really is the issue of how to make
3022 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3023 * non-kernel users to use. Solution forthcoming.
3024 * In the meantime, since we don't allow non-kernel
3025 * memory managers to specify symmetric copy,
3026 * we won't run into problems here.
3027 */
3028 new_object = object;
3029 new_offset = offset;
3030 success = vm_object_copy_quickly(&new_object,
3031 new_offset, size,
3032 &src_needs_copy,
3033 &copy);
3034 assert(success);
3035 result = KERN_SUCCESS;
3036 }
3037 /*
3038 * Throw away the reference to the
3039 * original object, as it won't be mapped.
3040 */
3041
3042 vm_object_deallocate(object);
3043
3044 if (result != KERN_SUCCESS)
3045 return result;
3046
3047 object = new_object;
3048 offset = new_offset;
3049 }
3050
3051 /*
3052 * If users want to try to prefault pages, the mapping and prefault
3053 * needs to be atomic.
3054 */
3055 if (try_prefault)
3056 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3057 result = vm_map_enter(target_map,
3058 &map_addr, map_size,
3059 (vm_map_offset_t)mask,
3060 flags,
3061 object, offset,
3062 copy,
3063 cur_protection, max_protection, inheritance);
3064 if (result != KERN_SUCCESS)
3065 vm_object_deallocate(object);
3066
3067 /*
3068 * Try to prefault, and do not forget to release the vm map lock.
3069 */
3070 if (result == KERN_SUCCESS && try_prefault) {
3071 mach_vm_address_t va = map_addr;
3072 kern_return_t kr = KERN_SUCCESS;
3073 unsigned int i = 0;
3074
3075 for (i = 0; i < page_list_count; ++i) {
3076 if (UPL_VALID_PAGE(page_list, i)) {
3077 /*
3078 * If this function call failed, we should stop
3079 * trying to optimize, other calls are likely
3080 * going to fail too.
3081 *
3082 * We are not gonna report an error for such
3083 * failure though. That's an optimization, not
3084 * something critical.
3085 */
3086 kr = pmap_enter_options(target_map->pmap,
3087 va, UPL_PHYS_PAGE(page_list, i),
3088 cur_protection, VM_PROT_NONE,
3089 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3090 if (kr != KERN_SUCCESS) {
3091 OSIncrementAtomic64(&vm_prefault_nb_bailout);
3092 goto BailOut;
3093 }
3094 OSIncrementAtomic64(&vm_prefault_nb_pages);
3095 }
3096
3097 /* Next virtual address */
3098 va += PAGE_SIZE;
3099 }
3100 BailOut:
3101 vm_map_unlock(target_map);
3102 }
3103
3104 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
3105 *address = map_addr + offset_in_mapping;
3106 } else {
3107 *address = map_addr;
3108 }
3109 return result;
3110 }
3111
3112 kern_return_t
3113 vm_map_enter_mem_object(
3114 vm_map_t target_map,
3115 vm_map_offset_t *address,
3116 vm_map_size_t initial_size,
3117 vm_map_offset_t mask,
3118 int flags,
3119 ipc_port_t port,
3120 vm_object_offset_t offset,
3121 boolean_t copy,
3122 vm_prot_t cur_protection,
3123 vm_prot_t max_protection,
3124 vm_inherit_t inheritance)
3125 {
3126 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3127 port, offset, copy, cur_protection, max_protection,
3128 inheritance, NULL, 0);
3129 }
3130
3131 kern_return_t
3132 vm_map_enter_mem_object_prefault(
3133 vm_map_t target_map,
3134 vm_map_offset_t *address,
3135 vm_map_size_t initial_size,
3136 vm_map_offset_t mask,
3137 int flags,
3138 ipc_port_t port,
3139 vm_object_offset_t offset,
3140 vm_prot_t cur_protection,
3141 vm_prot_t max_protection,
3142 upl_page_list_ptr_t page_list,
3143 unsigned int page_list_count)
3144 {
3145 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3146 port, offset, FALSE, cur_protection, max_protection,
3147 VM_INHERIT_DEFAULT, page_list, page_list_count);
3148 }
3149
3150
3151 kern_return_t
3152 vm_map_enter_mem_object_control(
3153 vm_map_t target_map,
3154 vm_map_offset_t *address,
3155 vm_map_size_t initial_size,
3156 vm_map_offset_t mask,
3157 int flags,
3158 memory_object_control_t control,
3159 vm_object_offset_t offset,
3160 boolean_t copy,
3161 vm_prot_t cur_protection,
3162 vm_prot_t max_protection,
3163 vm_inherit_t inheritance)
3164 {
3165 vm_map_address_t map_addr;
3166 vm_map_size_t map_size;
3167 vm_object_t object;
3168 vm_object_size_t size;
3169 kern_return_t result;
3170 memory_object_t pager;
3171 vm_prot_t pager_prot;
3172 kern_return_t kr;
3173
3174 /*
3175 * Check arguments for validity
3176 */
3177 if ((target_map == VM_MAP_NULL) ||
3178 (cur_protection & ~VM_PROT_ALL) ||
3179 (max_protection & ~VM_PROT_ALL) ||
3180 (inheritance > VM_INHERIT_LAST_VALID) ||
3181 initial_size == 0)
3182 return KERN_INVALID_ARGUMENT;
3183
3184 map_addr = vm_map_trunc_page(*address,
3185 VM_MAP_PAGE_MASK(target_map));
3186 map_size = vm_map_round_page(initial_size,
3187 VM_MAP_PAGE_MASK(target_map));
3188 size = vm_object_round_page(initial_size);
3189
3190 object = memory_object_control_to_vm_object(control);
3191
3192 if (object == VM_OBJECT_NULL)
3193 return KERN_INVALID_OBJECT;
3194
3195 if (object == kernel_object) {
3196 printf("Warning: Attempt to map kernel object"
3197 " by a non-private kernel entity\n");
3198 return KERN_INVALID_OBJECT;
3199 }
3200
3201 vm_object_lock(object);
3202 object->ref_count++;
3203 vm_object_res_reference(object);
3204
3205 /*
3206 * For "named" VM objects, let the pager know that the
3207 * memory object is being mapped. Some pagers need to keep
3208 * track of this, to know when they can reclaim the memory
3209 * object, for example.
3210 * VM calls memory_object_map() for each mapping (specifying
3211 * the protection of each mapping) and calls
3212 * memory_object_last_unmap() when all the mappings are gone.
3213 */
3214 pager_prot = max_protection;
3215 if (copy) {
3216 pager_prot &= ~VM_PROT_WRITE;
3217 }
3218 pager = object->pager;
3219 if (object->named &&
3220 pager != MEMORY_OBJECT_NULL &&
3221 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3222 assert(object->pager_ready);
3223 vm_object_mapping_wait(object, THREAD_UNINT);
3224 vm_object_mapping_begin(object);
3225 vm_object_unlock(object);
3226
3227 kr = memory_object_map(pager, pager_prot);
3228 assert(kr == KERN_SUCCESS);
3229
3230 vm_object_lock(object);
3231 vm_object_mapping_end(object);
3232 }
3233 vm_object_unlock(object);
3234
3235 /*
3236 * Perform the copy if requested
3237 */
3238
3239 if (copy) {
3240 vm_object_t new_object;
3241 vm_object_offset_t new_offset;
3242
3243 result = vm_object_copy_strategically(object, offset, size,
3244 &new_object, &new_offset,
3245 &copy);
3246
3247
3248 if (result == KERN_MEMORY_RESTART_COPY) {
3249 boolean_t success;
3250 boolean_t src_needs_copy;
3251
3252 /*
3253 * XXX
3254 * We currently ignore src_needs_copy.
3255 * This really is the issue of how to make
3256 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3257 * non-kernel users to use. Solution forthcoming.
3258 * In the meantime, since we don't allow non-kernel
3259 * memory managers to specify symmetric copy,
3260 * we won't run into problems here.
3261 */
3262 new_object = object;
3263 new_offset = offset;
3264 success = vm_object_copy_quickly(&new_object,
3265 new_offset, size,
3266 &src_needs_copy,
3267 &copy);
3268 assert(success);
3269 result = KERN_SUCCESS;
3270 }
3271 /*
3272 * Throw away the reference to the
3273 * original object, as it won't be mapped.
3274 */
3275
3276 vm_object_deallocate(object);
3277
3278 if (result != KERN_SUCCESS)
3279 return result;
3280
3281 object = new_object;
3282 offset = new_offset;
3283 }
3284
3285 result = vm_map_enter(target_map,
3286 &map_addr, map_size,
3287 (vm_map_offset_t)mask,
3288 flags,
3289 object, offset,
3290 copy,
3291 cur_protection, max_protection, inheritance);
3292 if (result != KERN_SUCCESS)
3293 vm_object_deallocate(object);
3294 *address = map_addr;
3295
3296 return result;
3297 }
3298
3299
3300 #if VM_CPM
3301
3302 #ifdef MACH_ASSERT
3303 extern pmap_paddr_t avail_start, avail_end;
3304 #endif
3305
3306 /*
3307 * Allocate memory in the specified map, with the caveat that
3308 * the memory is physically contiguous. This call may fail
3309 * if the system can't find sufficient contiguous memory.
3310 * This call may cause or lead to heart-stopping amounts of
3311 * paging activity.
3312 *
3313 * Memory obtained from this call should be freed in the
3314 * normal way, viz., via vm_deallocate.
3315 */
3316 kern_return_t
3317 vm_map_enter_cpm(
3318 vm_map_t map,
3319 vm_map_offset_t *addr,
3320 vm_map_size_t size,
3321 int flags)
3322 {
3323 vm_object_t cpm_obj;
3324 pmap_t pmap;
3325 vm_page_t m, pages;
3326 kern_return_t kr;
3327 vm_map_offset_t va, start, end, offset;
3328 #if MACH_ASSERT
3329 vm_map_offset_t prev_addr = 0;
3330 #endif /* MACH_ASSERT */
3331
3332 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3333
3334 if (size == 0) {
3335 *addr = 0;
3336 return KERN_SUCCESS;
3337 }
3338 if (anywhere)
3339 *addr = vm_map_min(map);
3340 else
3341 *addr = vm_map_trunc_page(*addr,
3342 VM_MAP_PAGE_MASK(map));
3343 size = vm_map_round_page(size,
3344 VM_MAP_PAGE_MASK(map));
3345
3346 /*
3347 * LP64todo - cpm_allocate should probably allow
3348 * allocations of >4GB, but not with the current
3349 * algorithm, so just cast down the size for now.
3350 */
3351 if (size > VM_MAX_ADDRESS)
3352 return KERN_RESOURCE_SHORTAGE;
3353 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3354 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3355 return kr;
3356
3357 cpm_obj = vm_object_allocate((vm_object_size_t)size);
3358 assert(cpm_obj != VM_OBJECT_NULL);
3359 assert(cpm_obj->internal);
3360 assert(cpm_obj->vo_size == (vm_object_size_t)size);
3361 assert(cpm_obj->can_persist == FALSE);
3362 assert(cpm_obj->pager_created == FALSE);
3363 assert(cpm_obj->pageout == FALSE);
3364 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3365
3366 /*
3367 * Insert pages into object.
3368 */
3369
3370 vm_object_lock(cpm_obj);
3371 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3372 m = pages;
3373 pages = NEXT_PAGE(m);
3374 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3375
3376 assert(!m->gobbled);
3377 assert(!m->wanted);
3378 assert(!m->pageout);
3379 assert(!m->tabled);
3380 assert(VM_PAGE_WIRED(m));
3381 /*
3382 * ENCRYPTED SWAP:
3383 * "m" is not supposed to be pageable, so it
3384 * should not be encrypted. It wouldn't be safe
3385 * to enter it in a new VM object while encrypted.
3386 */
3387 ASSERT_PAGE_DECRYPTED(m);
3388 assert(m->busy);
3389 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3390
3391 m->busy = FALSE;
3392 vm_page_insert(m, cpm_obj, offset);
3393 }
3394 assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3395 vm_object_unlock(cpm_obj);
3396
3397 /*
3398 * Hang onto a reference on the object in case a
3399 * multi-threaded application for some reason decides
3400 * to deallocate the portion of the address space into
3401 * which we will insert this object.
3402 *
3403 * Unfortunately, we must insert the object now before
3404 * we can talk to the pmap module about which addresses
3405 * must be wired down. Hence, the race with a multi-
3406 * threaded app.
3407 */
3408 vm_object_reference(cpm_obj);
3409
3410 /*
3411 * Insert object into map.
3412 */
3413
3414 kr = vm_map_enter(
3415 map,
3416 addr,
3417 size,
3418 (vm_map_offset_t)0,
3419 flags,
3420 cpm_obj,
3421 (vm_object_offset_t)0,
3422 FALSE,
3423 VM_PROT_ALL,
3424 VM_PROT_ALL,
3425 VM_INHERIT_DEFAULT);
3426
3427 if (kr != KERN_SUCCESS) {
3428 /*
3429 * A CPM object doesn't have can_persist set,
3430 * so all we have to do is deallocate it to
3431 * free up these pages.
3432 */
3433 assert(cpm_obj->pager_created == FALSE);
3434 assert(cpm_obj->can_persist == FALSE);
3435 assert(cpm_obj->pageout == FALSE);
3436 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3437 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3438 vm_object_deallocate(cpm_obj); /* kill creation ref */
3439 }
3440
3441 /*
3442 * Inform the physical mapping system that the
3443 * range of addresses may not fault, so that
3444 * page tables and such can be locked down as well.
3445 */
3446 start = *addr;
3447 end = start + size;
3448 pmap = vm_map_pmap(map);
3449 pmap_pageable(pmap, start, end, FALSE);
3450
3451 /*
3452 * Enter each page into the pmap, to avoid faults.
3453 * Note that this loop could be coded more efficiently,
3454 * if the need arose, rather than looking up each page
3455 * again.
3456 */
3457 for (offset = 0, va = start; offset < size;
3458 va += PAGE_SIZE, offset += PAGE_SIZE) {
3459 int type_of_fault;
3460
3461 vm_object_lock(cpm_obj);
3462 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3463 assert(m != VM_PAGE_NULL);
3464
3465 vm_page_zero_fill(m);
3466
3467 type_of_fault = DBG_ZERO_FILL_FAULT;
3468
3469 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3470 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3471 &type_of_fault);
3472
3473 vm_object_unlock(cpm_obj);
3474 }
3475
3476 #if MACH_ASSERT
3477 /*
3478 * Verify ordering in address space.
3479 */
3480 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3481 vm_object_lock(cpm_obj);
3482 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3483 vm_object_unlock(cpm_obj);
3484 if (m == VM_PAGE_NULL)
3485 panic("vm_allocate_cpm: obj %p off 0x%llx no page",
3486 cpm_obj, (uint64_t)offset);
3487 assert(m->tabled);
3488 assert(!m->busy);
3489 assert(!m->wanted);
3490 assert(!m->fictitious);
3491 assert(!m->private);
3492 assert(!m->absent);
3493 assert(!m->error);
3494 assert(!m->cleaning);
3495 assert(!m->laundry);
3496 assert(!m->precious);
3497 assert(!m->clustered);
3498 if (offset != 0) {
3499 if (m->phys_page != prev_addr + 1) {
3500 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3501 (uint64_t)start, (uint64_t)end, (uint64_t)va);
3502 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3503 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3504 panic("vm_allocate_cpm: pages not contig!");
3505 }
3506 }
3507 prev_addr = m->phys_page;
3508 }
3509 #endif /* MACH_ASSERT */
3510
3511 vm_object_deallocate(cpm_obj); /* kill extra ref */
3512
3513 return kr;
3514 }
3515
3516
3517 #else /* VM_CPM */
3518
3519 /*
3520 * Interface is defined in all cases, but unless the kernel
3521 * is built explicitly for this option, the interface does
3522 * nothing.
3523 */
3524
3525 kern_return_t
3526 vm_map_enter_cpm(
3527 __unused vm_map_t map,
3528 __unused vm_map_offset_t *addr,
3529 __unused vm_map_size_t size,
3530 __unused int flags)
3531 {
3532 return KERN_FAILURE;
3533 }
3534 #endif /* VM_CPM */
3535
3536 /* Not used without nested pmaps */
3537 #ifndef NO_NESTED_PMAP
3538 /*
3539 * Clip and unnest a portion of a nested submap mapping.
3540 */
3541
3542
3543 static void
3544 vm_map_clip_unnest(
3545 vm_map_t map,
3546 vm_map_entry_t entry,
3547 vm_map_offset_t start_unnest,
3548 vm_map_offset_t end_unnest)
3549 {
3550 vm_map_offset_t old_start_unnest = start_unnest;
3551 vm_map_offset_t old_end_unnest = end_unnest;
3552
3553 assert(entry->is_sub_map);
3554 assert(entry->object.sub_map != NULL);
3555 assert(entry->use_pmap);
3556
3557 /*
3558 * Query the platform for the optimal unnest range.
3559 * DRK: There's some duplication of effort here, since
3560 * callers may have adjusted the range to some extent. This
3561 * routine was introduced to support 1GiB subtree nesting
3562 * for x86 platforms, which can also nest on 2MiB boundaries
3563 * depending on size/alignment.
3564 */
3565 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3566 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3567 }
3568
3569 if (entry->vme_start > start_unnest ||
3570 entry->vme_end < end_unnest) {
3571 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3572 "bad nested entry: start=0x%llx end=0x%llx\n",
3573 (long long)start_unnest, (long long)end_unnest,
3574 (long long)entry->vme_start, (long long)entry->vme_end);
3575 }
3576
3577 if (start_unnest > entry->vme_start) {
3578 _vm_map_clip_start(&map->hdr,
3579 entry,
3580 start_unnest);
3581 vm_map_store_update_first_free(map, map->first_free);
3582 }
3583 if (entry->vme_end > end_unnest) {
3584 _vm_map_clip_end(&map->hdr,
3585 entry,
3586 end_unnest);
3587 vm_map_store_update_first_free(map, map->first_free);
3588 }
3589
3590 pmap_unnest(map->pmap,
3591 entry->vme_start,
3592 entry->vme_end - entry->vme_start);
3593 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3594 /* clean up parent map/maps */
3595 vm_map_submap_pmap_clean(
3596 map, entry->vme_start,
3597 entry->vme_end,
3598 entry->object.sub_map,
3599 entry->offset);
3600 }
3601 entry->use_pmap = FALSE;
3602 if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3603 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3604 }
3605 }
3606 #endif /* NO_NESTED_PMAP */
3607
3608 /*
3609 * vm_map_clip_start: [ internal use only ]
3610 *
3611 * Asserts that the given entry begins at or after
3612 * the specified address; if necessary,
3613 * it splits the entry into two.
3614 */
3615 void
3616 vm_map_clip_start(
3617 vm_map_t map,
3618 vm_map_entry_t entry,
3619 vm_map_offset_t startaddr)
3620 {
3621 #ifndef NO_NESTED_PMAP
3622 if (entry->is_sub_map &&
3623 entry->use_pmap &&
3624 startaddr >= entry->vme_start) {
3625 vm_map_offset_t start_unnest, end_unnest;
3626
3627 /*
3628 * Make sure "startaddr" is no longer in a nested range
3629 * before we clip. Unnest only the minimum range the platform
3630 * can handle.
3631 * vm_map_clip_unnest may perform additional adjustments to
3632 * the unnest range.
3633 */
3634 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3635 end_unnest = start_unnest + pmap_nesting_size_min;
3636 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3637 }
3638 #endif /* NO_NESTED_PMAP */
3639 if (startaddr > entry->vme_start) {
3640 if (entry->object.vm_object &&
3641 !entry->is_sub_map &&
3642 entry->object.vm_object->phys_contiguous) {
3643 pmap_remove(map->pmap,
3644 (addr64_t)(entry->vme_start),
3645 (addr64_t)(entry->vme_end));
3646 }
3647 _vm_map_clip_start(&map->hdr, entry, startaddr);
3648 vm_map_store_update_first_free(map, map->first_free);
3649 }
3650 }
3651
3652
3653 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3654 MACRO_BEGIN \
3655 if ((startaddr) > (entry)->vme_start) \
3656 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3657 MACRO_END
3658
3659 /*
3660 * This routine is called only when it is known that
3661 * the entry must be split.
3662 */
3663 static void
3664 _vm_map_clip_start(
3665 register struct vm_map_header *map_header,
3666 register vm_map_entry_t entry,
3667 register vm_map_offset_t start)
3668 {
3669 register vm_map_entry_t new_entry;
3670
3671 /*
3672 * Split off the front portion --
3673 * note that we must insert the new
3674 * entry BEFORE this one, so that
3675 * this entry has the specified starting
3676 * address.
3677 */
3678
3679 if (entry->map_aligned) {
3680 assert(VM_MAP_PAGE_ALIGNED(start,
3681 VM_MAP_HDR_PAGE_MASK(map_header)));
3682 }
3683
3684 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3685 vm_map_entry_copy_full(new_entry, entry);
3686
3687 new_entry->vme_end = start;
3688 assert(new_entry->vme_start < new_entry->vme_end);
3689 entry->offset += (start - entry->vme_start);
3690 assert(start < entry->vme_end);
3691 entry->vme_start = start;
3692
3693 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3694
3695 if (entry->is_sub_map)
3696 vm_map_reference(new_entry->object.sub_map);
3697 else
3698 vm_object_reference(new_entry->object.vm_object);
3699 }
3700
3701
3702 /*
3703 * vm_map_clip_end: [ internal use only ]
3704 *
3705 * Asserts that the given entry ends at or before
3706 * the specified address; if necessary,
3707 * it splits the entry into two.
3708 */
3709 void
3710 vm_map_clip_end(
3711 vm_map_t map,
3712 vm_map_entry_t entry,
3713 vm_map_offset_t endaddr)
3714 {
3715 if (endaddr > entry->vme_end) {
3716 /*
3717 * Within the scope of this clipping, limit "endaddr" to
3718 * the end of this map entry...
3719 */
3720 endaddr = entry->vme_end;
3721 }
3722 #ifndef NO_NESTED_PMAP
3723 if (entry->is_sub_map && entry->use_pmap) {
3724 vm_map_offset_t start_unnest, end_unnest;
3725
3726 /*
3727 * Make sure the range between the start of this entry and
3728 * the new "endaddr" is no longer nested before we clip.
3729 * Unnest only the minimum range the platform can handle.
3730 * vm_map_clip_unnest may perform additional adjustments to
3731 * the unnest range.
3732 */
3733 start_unnest = entry->vme_start;
3734 end_unnest =
3735 (endaddr + pmap_nesting_size_min - 1) &
3736 ~(pmap_nesting_size_min - 1);
3737 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3738 }
3739 #endif /* NO_NESTED_PMAP */
3740 if (endaddr < entry->vme_end) {
3741 if (entry->object.vm_object &&
3742 !entry->is_sub_map &&
3743 entry->object.vm_object->phys_contiguous) {
3744 pmap_remove(map->pmap,
3745 (addr64_t)(entry->vme_start),
3746 (addr64_t)(entry->vme_end));
3747 }
3748 _vm_map_clip_end(&map->hdr, entry, endaddr);
3749 vm_map_store_update_first_free(map, map->first_free);
3750 }
3751 }
3752
3753
3754 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3755 MACRO_BEGIN \
3756 if ((endaddr) < (entry)->vme_end) \
3757 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3758 MACRO_END
3759
3760 /*
3761 * This routine is called only when it is known that
3762 * the entry must be split.
3763 */
3764 static void
3765 _vm_map_clip_end(
3766 register struct vm_map_header *map_header,
3767 register vm_map_entry_t entry,
3768 register vm_map_offset_t end)
3769 {
3770 register vm_map_entry_t new_entry;
3771
3772 /*
3773 * Create a new entry and insert it
3774 * AFTER the specified entry
3775 */
3776
3777 if (entry->map_aligned) {
3778 assert(VM_MAP_PAGE_ALIGNED(end,
3779 VM_MAP_HDR_PAGE_MASK(map_header)));
3780 }
3781
3782 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3783 vm_map_entry_copy_full(new_entry, entry);
3784
3785 assert(entry->vme_start < end);
3786 new_entry->vme_start = entry->vme_end = end;
3787 new_entry->offset += (end - entry->vme_start);
3788 assert(new_entry->vme_start < new_entry->vme_end);
3789
3790 _vm_map_store_entry_link(map_header, entry, new_entry);
3791
3792 if (entry->is_sub_map)
3793 vm_map_reference(new_entry->object.sub_map);
3794 else
3795 vm_object_reference(new_entry->object.vm_object);
3796 }
3797
3798
3799 /*
3800 * VM_MAP_RANGE_CHECK: [ internal use only ]
3801 *
3802 * Asserts that the starting and ending region
3803 * addresses fall within the valid range of the map.
3804 */
3805 #define VM_MAP_RANGE_CHECK(map, start, end) \
3806 MACRO_BEGIN \
3807 if (start < vm_map_min(map)) \
3808 start = vm_map_min(map); \
3809 if (end > vm_map_max(map)) \
3810 end = vm_map_max(map); \
3811 if (start > end) \
3812 start = end; \
3813 MACRO_END
3814
3815 /*
3816 * vm_map_range_check: [ internal use only ]
3817 *
3818 * Check that the region defined by the specified start and
3819 * end addresses are wholly contained within a single map
3820 * entry or set of adjacent map entries of the spacified map,
3821 * i.e. the specified region contains no unmapped space.
3822 * If any or all of the region is unmapped, FALSE is returned.
3823 * Otherwise, TRUE is returned and if the output argument 'entry'
3824 * is not NULL it points to the map entry containing the start
3825 * of the region.
3826 *
3827 * The map is locked for reading on entry and is left locked.
3828 */
3829 static boolean_t
3830 vm_map_range_check(
3831 register vm_map_t map,
3832 register vm_map_offset_t start,
3833 register vm_map_offset_t end,
3834 vm_map_entry_t *entry)
3835 {
3836 vm_map_entry_t cur;
3837 register vm_map_offset_t prev;
3838
3839 /*
3840 * Basic sanity checks first
3841 */
3842 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3843 return (FALSE);
3844
3845 /*
3846 * Check first if the region starts within a valid
3847 * mapping for the map.
3848 */
3849 if (!vm_map_lookup_entry(map, start, &cur))
3850 return (FALSE);
3851
3852 /*
3853 * Optimize for the case that the region is contained
3854 * in a single map entry.
3855 */
3856 if (entry != (vm_map_entry_t *) NULL)
3857 *entry = cur;
3858 if (end <= cur->vme_end)
3859 return (TRUE);
3860
3861 /*
3862 * If the region is not wholly contained within a
3863 * single entry, walk the entries looking for holes.
3864 */
3865 prev = cur->vme_end;
3866 cur = cur->vme_next;
3867 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3868 if (end <= cur->vme_end)
3869 return (TRUE);
3870 prev = cur->vme_end;
3871 cur = cur->vme_next;
3872 }
3873 return (FALSE);
3874 }
3875
3876 /*
3877 * vm_map_submap: [ kernel use only ]
3878 *
3879 * Mark the given range as handled by a subordinate map.
3880 *
3881 * This range must have been created with vm_map_find using
3882 * the vm_submap_object, and no other operations may have been
3883 * performed on this range prior to calling vm_map_submap.
3884 *
3885 * Only a limited number of operations can be performed
3886 * within this rage after calling vm_map_submap:
3887 * vm_fault
3888 * [Don't try vm_map_copyin!]
3889 *
3890 * To remove a submapping, one must first remove the
3891 * range from the superior map, and then destroy the
3892 * submap (if desired). [Better yet, don't try it.]
3893 */
3894 kern_return_t
3895 vm_map_submap(
3896 vm_map_t map,
3897 vm_map_offset_t start,
3898 vm_map_offset_t end,
3899 vm_map_t submap,
3900 vm_map_offset_t offset,
3901 #ifdef NO_NESTED_PMAP
3902 __unused
3903 #endif /* NO_NESTED_PMAP */
3904 boolean_t use_pmap)
3905 {
3906 vm_map_entry_t entry;
3907 register kern_return_t result = KERN_INVALID_ARGUMENT;
3908 register vm_object_t object;
3909
3910 vm_map_lock(map);
3911
3912 if (! vm_map_lookup_entry(map, start, &entry)) {
3913 entry = entry->vme_next;
3914 }
3915
3916 if (entry == vm_map_to_entry(map) ||
3917 entry->is_sub_map) {
3918 vm_map_unlock(map);
3919 return KERN_INVALID_ARGUMENT;
3920 }
3921
3922 vm_map_clip_start(map, entry, start);
3923 vm_map_clip_end(map, entry, end);
3924
3925 if ((entry->vme_start == start) && (entry->vme_end == end) &&
3926 (!entry->is_sub_map) &&
3927 ((object = entry->object.vm_object) == vm_submap_object) &&
3928 (object->resident_page_count == 0) &&
3929 (object->copy == VM_OBJECT_NULL) &&
3930 (object->shadow == VM_OBJECT_NULL) &&
3931 (!object->pager_created)) {
3932 entry->offset = (vm_object_offset_t)offset;
3933 entry->object.vm_object = VM_OBJECT_NULL;
3934 vm_object_deallocate(object);
3935 entry->is_sub_map = TRUE;
3936 entry->use_pmap = FALSE;
3937 entry->object.sub_map = submap;
3938 vm_map_reference(submap);
3939 if (submap->mapped_in_other_pmaps == FALSE &&
3940 vm_map_pmap(submap) != PMAP_NULL &&
3941 vm_map_pmap(submap) != vm_map_pmap(map)) {
3942 /*
3943 * This submap is being mapped in a map
3944 * that uses a different pmap.
3945 * Set its "mapped_in_other_pmaps" flag
3946 * to indicate that we now need to
3947 * remove mappings from all pmaps rather
3948 * than just the submap's pmap.
3949 */
3950 submap->mapped_in_other_pmaps = TRUE;
3951 }
3952
3953 #ifndef NO_NESTED_PMAP
3954 if (use_pmap) {
3955 /* nest if platform code will allow */
3956 if(submap->pmap == NULL) {
3957 ledger_t ledger = map->pmap->ledger;
3958 submap->pmap = pmap_create(ledger,
3959 (vm_map_size_t) 0, FALSE);
3960 if(submap->pmap == PMAP_NULL) {
3961 vm_map_unlock(map);
3962 return(KERN_NO_SPACE);
3963 }
3964 }
3965 result = pmap_nest(map->pmap,
3966 (entry->object.sub_map)->pmap,
3967 (addr64_t)start,
3968 (addr64_t)start,
3969 (uint64_t)(end - start));
3970 if(result)
3971 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3972 entry->use_pmap = TRUE;
3973 }
3974 #else /* NO_NESTED_PMAP */
3975 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3976 #endif /* NO_NESTED_PMAP */
3977 result = KERN_SUCCESS;
3978 }
3979 vm_map_unlock(map);
3980
3981 return(result);
3982 }
3983
3984 /*
3985 * vm_map_protect:
3986 *
3987 * Sets the protection of the specified address
3988 * region in the target map. If "set_max" is
3989 * specified, the maximum protection is to be set;
3990 * otherwise, only the current protection is affected.
3991 */
3992 kern_return_t
3993 vm_map_protect(
3994 register vm_map_t map,
3995 register vm_map_offset_t start,
3996 register vm_map_offset_t end,
3997 register vm_prot_t new_prot,
3998 register boolean_t set_max)
3999 {
4000 register vm_map_entry_t current;
4001 register vm_map_offset_t prev;
4002 vm_map_entry_t entry;
4003 vm_prot_t new_max;
4004
4005 XPR(XPR_VM_MAP,
4006 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4007 map, start, end, new_prot, set_max);
4008
4009 vm_map_lock(map);
4010
4011 /* LP64todo - remove this check when vm_map_commpage64()
4012 * no longer has to stuff in a map_entry for the commpage
4013 * above the map's max_offset.
4014 */
4015 if (start >= map->max_offset) {
4016 vm_map_unlock(map);
4017 return(KERN_INVALID_ADDRESS);
4018 }
4019
4020 while(1) {
4021 /*
4022 * Lookup the entry. If it doesn't start in a valid
4023 * entry, return an error.
4024 */
4025 if (! vm_map_lookup_entry(map, start, &entry)) {
4026 vm_map_unlock(map);
4027 return(KERN_INVALID_ADDRESS);
4028 }
4029
4030 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4031 start = SUPERPAGE_ROUND_DOWN(start);
4032 continue;
4033 }
4034 break;
4035 }
4036 if (entry->superpage_size)
4037 end = SUPERPAGE_ROUND_UP(end);
4038
4039 /*
4040 * Make a first pass to check for protection and address
4041 * violations.
4042 */
4043
4044 current = entry;
4045 prev = current->vme_start;
4046 while ((current != vm_map_to_entry(map)) &&
4047 (current->vme_start < end)) {
4048
4049 /*
4050 * If there is a hole, return an error.
4051 */
4052 if (current->vme_start != prev) {
4053 vm_map_unlock(map);
4054 return(KERN_INVALID_ADDRESS);
4055 }
4056
4057 new_max = current->max_protection;
4058 if(new_prot & VM_PROT_COPY) {
4059 new_max |= VM_PROT_WRITE;
4060 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4061 vm_map_unlock(map);
4062 return(KERN_PROTECTION_FAILURE);
4063 }
4064 } else {
4065 if ((new_prot & new_max) != new_prot) {
4066 vm_map_unlock(map);
4067 return(KERN_PROTECTION_FAILURE);
4068 }
4069 }
4070
4071
4072 prev = current->vme_end;
4073 current = current->vme_next;
4074 }
4075 if (end > prev) {
4076 vm_map_unlock(map);
4077 return(KERN_INVALID_ADDRESS);
4078 }
4079
4080 /*
4081 * Go back and fix up protections.
4082 * Clip to start here if the range starts within
4083 * the entry.
4084 */
4085
4086 current = entry;
4087 if (current != vm_map_to_entry(map)) {
4088 /* clip and unnest if necessary */
4089 vm_map_clip_start(map, current, start);
4090 }
4091
4092 while ((current != vm_map_to_entry(map)) &&
4093 (current->vme_start < end)) {
4094
4095 vm_prot_t old_prot;
4096
4097 vm_map_clip_end(map, current, end);
4098
4099 if (current->is_sub_map) {
4100 /* clipping did unnest if needed */
4101 assert(!current->use_pmap);
4102 }
4103
4104 old_prot = current->protection;
4105
4106 if(new_prot & VM_PROT_COPY) {
4107 /* caller is asking specifically to copy the */
4108 /* mapped data, this implies that max protection */
4109 /* will include write. Caller must be prepared */
4110 /* for loss of shared memory communication in the */
4111 /* target area after taking this step */
4112
4113 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
4114 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
4115 current->offset = 0;
4116 assert(current->use_pmap);
4117 }
4118 current->needs_copy = TRUE;
4119 current->max_protection |= VM_PROT_WRITE;
4120 }
4121
4122 if (set_max)
4123 current->protection =
4124 (current->max_protection =
4125 new_prot & ~VM_PROT_COPY) &
4126 old_prot;
4127 else
4128 current->protection = new_prot & ~VM_PROT_COPY;
4129
4130 /*
4131 * Update physical map if necessary.
4132 * If the request is to turn off write protection,
4133 * we won't do it for real (in pmap). This is because
4134 * it would cause copy-on-write to fail. We've already
4135 * set, the new protection in the map, so if a
4136 * write-protect fault occurred, it will be fixed up
4137 * properly, COW or not.
4138 */
4139 if (current->protection != old_prot) {
4140 /* Look one level in we support nested pmaps */
4141 /* from mapped submaps which are direct entries */
4142 /* in our map */
4143
4144 vm_prot_t prot;
4145
4146 prot = current->protection & ~VM_PROT_WRITE;
4147
4148 if (override_nx(map, current->alias) && prot)
4149 prot |= VM_PROT_EXECUTE;
4150
4151 if (current->is_sub_map && current->use_pmap) {
4152 pmap_protect(current->object.sub_map->pmap,
4153 current->vme_start,
4154 current->vme_end,
4155 prot);
4156 } else {
4157 pmap_protect(map->pmap,
4158 current->vme_start,
4159 current->vme_end,
4160 prot);
4161 }
4162 }
4163 current = current->vme_next;
4164 }
4165
4166 current = entry;
4167 while ((current != vm_map_to_entry(map)) &&
4168 (current->vme_start <= end)) {
4169 vm_map_simplify_entry(map, current);
4170 current = current->vme_next;
4171 }
4172
4173 vm_map_unlock(map);
4174 return(KERN_SUCCESS);
4175 }
4176
4177 /*
4178 * vm_map_inherit:
4179 *
4180 * Sets the inheritance of the specified address
4181 * range in the target map. Inheritance
4182 * affects how the map will be shared with
4183 * child maps at the time of vm_map_fork.
4184 */
4185 kern_return_t
4186 vm_map_inherit(
4187 register vm_map_t map,
4188 register vm_map_offset_t start,
4189 register vm_map_offset_t end,
4190 register vm_inherit_t new_inheritance)
4191 {
4192 register vm_map_entry_t entry;
4193 vm_map_entry_t temp_entry;
4194
4195 vm_map_lock(map);
4196
4197 VM_MAP_RANGE_CHECK(map, start, end);
4198
4199 if (vm_map_lookup_entry(map, start, &temp_entry)) {
4200 entry = temp_entry;
4201 }
4202 else {
4203 temp_entry = temp_entry->vme_next;
4204 entry = temp_entry;
4205 }
4206
4207 /* first check entire range for submaps which can't support the */
4208 /* given inheritance. */
4209 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4210 if(entry->is_sub_map) {
4211 if(new_inheritance == VM_INHERIT_COPY) {
4212 vm_map_unlock(map);
4213 return(KERN_INVALID_ARGUMENT);
4214 }
4215 }
4216
4217 entry = entry->vme_next;
4218 }
4219
4220 entry = temp_entry;
4221 if (entry != vm_map_to_entry(map)) {
4222 /* clip and unnest if necessary */
4223 vm_map_clip_start(map, entry, start);
4224 }
4225
4226 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4227 vm_map_clip_end(map, entry, end);
4228 if (entry->is_sub_map) {
4229 /* clip did unnest if needed */
4230 assert(!entry->use_pmap);
4231 }
4232
4233 entry->inheritance = new_inheritance;
4234
4235 entry = entry->vme_next;
4236 }
4237
4238 vm_map_unlock(map);
4239 return(KERN_SUCCESS);
4240 }
4241
4242 /*
4243 * Update the accounting for the amount of wired memory in this map. If the user has
4244 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails.
4245 */
4246
4247 static kern_return_t
4248 add_wire_counts(
4249 vm_map_t map,
4250 vm_map_entry_t entry,
4251 boolean_t user_wire)
4252 {
4253 vm_map_size_t size;
4254
4255 if (user_wire) {
4256 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count;
4257
4258 /*
4259 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring
4260 * this map entry.
4261 */
4262
4263 if (entry->user_wired_count == 0) {
4264 size = entry->vme_end - entry->vme_start;
4265
4266 /*
4267 * Since this is the first time the user is wiring this map entry, check to see if we're
4268 * exceeding the user wire limits. There is a per map limit which is the smaller of either
4269 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also
4270 * a system-wide limit on the amount of memory all users can wire. If the user is over either
4271 * limit, then we fail.
4272 */
4273
4274 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4275 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4276 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4277 return KERN_RESOURCE_SHORTAGE;
4278
4279 /*
4280 * The first time the user wires an entry, we also increment the wired_count and add this to
4281 * the total that has been wired in the map.
4282 */
4283
4284 if (entry->wired_count >= MAX_WIRE_COUNT)
4285 return KERN_FAILURE;
4286
4287 entry->wired_count++;
4288 map->user_wire_size += size;
4289 }
4290
4291 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4292 return KERN_FAILURE;
4293
4294 entry->user_wired_count++;
4295
4296 } else {
4297
4298 /*
4299 * The kernel's wiring the memory. Just bump the count and continue.
4300 */
4301
4302 if (entry->wired_count >= MAX_WIRE_COUNT)
4303 panic("vm_map_wire: too many wirings");
4304
4305 entry->wired_count++;
4306 }
4307
4308 return KERN_SUCCESS;
4309 }
4310
4311 /*
4312 * Update the memory wiring accounting now that the given map entry is being unwired.
4313 */
4314
4315 static void
4316 subtract_wire_counts(
4317 vm_map_t map,
4318 vm_map_entry_t entry,
4319 boolean_t user_wire)
4320 {
4321
4322 if (user_wire) {
4323
4324 /*
4325 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference.
4326 */
4327
4328 if (entry->user_wired_count == 1) {
4329
4330 /*
4331 * We're removing the last user wire reference. Decrement the wired_count and the total
4332 * user wired memory for this map.
4333 */
4334
4335 assert(entry->wired_count >= 1);
4336 entry->wired_count--;
4337 map->user_wire_size -= entry->vme_end - entry->vme_start;
4338 }
4339
4340 assert(entry->user_wired_count >= 1);
4341 entry->user_wired_count--;
4342
4343 } else {
4344
4345 /*
4346 * The kernel is unwiring the memory. Just update the count.
4347 */
4348
4349 assert(entry->wired_count >= 1);
4350 entry->wired_count--;
4351 }
4352 }
4353
4354 /*
4355 * vm_map_wire:
4356 *
4357 * Sets the pageability of the specified address range in the
4358 * target map as wired. Regions specified as not pageable require
4359 * locked-down physical memory and physical page maps. The
4360 * access_type variable indicates types of accesses that must not
4361 * generate page faults. This is checked against protection of
4362 * memory being locked-down.
4363 *
4364 * The map must not be locked, but a reference must remain to the
4365 * map throughout the call.
4366 */
4367 static kern_return_t
4368 vm_map_wire_nested(
4369 register vm_map_t map,
4370 register vm_map_offset_t start,
4371 register vm_map_offset_t end,
4372 register vm_prot_t access_type,
4373 boolean_t user_wire,
4374 pmap_t map_pmap,
4375 vm_map_offset_t pmap_addr,
4376 ppnum_t *physpage_p)
4377 {
4378 register vm_map_entry_t entry;
4379 struct vm_map_entry *first_entry, tmp_entry;
4380 vm_map_t real_map;
4381 register vm_map_offset_t s,e;
4382 kern_return_t rc;
4383 boolean_t need_wakeup;
4384 boolean_t main_map = FALSE;
4385 wait_interrupt_t interruptible_state;
4386 thread_t cur_thread;
4387 unsigned int last_timestamp;
4388 vm_map_size_t size;
4389 boolean_t wire_and_extract;
4390
4391 wire_and_extract = FALSE;
4392 if (physpage_p != NULL) {
4393 /*
4394 * The caller wants the physical page number of the
4395 * wired page. We return only one physical page number
4396 * so this works for only one page at a time.
4397 */
4398 if ((end - start) != PAGE_SIZE) {
4399 return KERN_INVALID_ARGUMENT;
4400 }
4401 wire_and_extract = TRUE;
4402 *physpage_p = 0;
4403 }
4404
4405 vm_map_lock(map);
4406 if(map_pmap == NULL)
4407 main_map = TRUE;
4408 last_timestamp = map->timestamp;
4409
4410 VM_MAP_RANGE_CHECK(map, start, end);
4411 assert(page_aligned(start));
4412 assert(page_aligned(end));
4413 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4414 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4415 if (start == end) {
4416 /* We wired what the caller asked for, zero pages */
4417 vm_map_unlock(map);
4418 return KERN_SUCCESS;
4419 }
4420
4421 need_wakeup = FALSE;
4422 cur_thread = current_thread();
4423
4424 s = start;
4425 rc = KERN_SUCCESS;
4426
4427 if (vm_map_lookup_entry(map, s, &first_entry)) {
4428 entry = first_entry;
4429 /*
4430 * vm_map_clip_start will be done later.
4431 * We don't want to unnest any nested submaps here !
4432 */
4433 } else {
4434 /* Start address is not in map */
4435 rc = KERN_INVALID_ADDRESS;
4436 goto done;
4437 }
4438
4439 while ((entry != vm_map_to_entry(map)) && (s < end)) {
4440 /*
4441 * At this point, we have wired from "start" to "s".
4442 * We still need to wire from "s" to "end".
4443 *
4444 * "entry" hasn't been clipped, so it could start before "s"
4445 * and/or end after "end".
4446 */
4447
4448 /* "e" is how far we want to wire in this entry */
4449 e = entry->vme_end;
4450 if (e > end)
4451 e = end;
4452
4453 /*
4454 * If another thread is wiring/unwiring this entry then
4455 * block after informing other thread to wake us up.
4456 */
4457 if (entry->in_transition) {
4458 wait_result_t wait_result;
4459
4460 /*
4461 * We have not clipped the entry. Make sure that
4462 * the start address is in range so that the lookup
4463 * below will succeed.
4464 * "s" is the current starting point: we've already
4465 * wired from "start" to "s" and we still have
4466 * to wire from "s" to "end".
4467 */
4468
4469 entry->needs_wakeup = TRUE;
4470
4471 /*
4472 * wake up anybody waiting on entries that we have
4473 * already wired.
4474 */
4475 if (need_wakeup) {
4476 vm_map_entry_wakeup(map);
4477 need_wakeup = FALSE;
4478 }
4479 /*
4480 * User wiring is interruptible
4481 */
4482 wait_result = vm_map_entry_wait(map,
4483 (user_wire) ? THREAD_ABORTSAFE :
4484 THREAD_UNINT);
4485 if (user_wire && wait_result == THREAD_INTERRUPTED) {
4486 /*
4487 * undo the wirings we have done so far
4488 * We do not clear the needs_wakeup flag,
4489 * because we cannot tell if we were the
4490 * only one waiting.
4491 */
4492 rc = KERN_FAILURE;
4493 goto done;
4494 }
4495
4496 /*
4497 * Cannot avoid a lookup here. reset timestamp.
4498 */
4499 last_timestamp = map->timestamp;
4500
4501 /*
4502 * The entry could have been clipped, look it up again.
4503 * Worse that can happen is, it may not exist anymore.
4504 */
4505 if (!vm_map_lookup_entry(map, s, &first_entry)) {
4506 /*
4507 * User: undo everything upto the previous
4508 * entry. let vm_map_unwire worry about
4509 * checking the validity of the range.
4510 */
4511 rc = KERN_FAILURE;
4512 goto done;
4513 }
4514 entry = first_entry;
4515 continue;
4516 }
4517
4518 if (entry->is_sub_map) {
4519 vm_map_offset_t sub_start;
4520 vm_map_offset_t sub_end;
4521 vm_map_offset_t local_start;
4522 vm_map_offset_t local_end;
4523 pmap_t pmap;
4524
4525 if (wire_and_extract) {
4526 /*
4527 * Wiring would result in copy-on-write
4528 * which would not be compatible with
4529 * the sharing we have with the original
4530 * provider of this memory.
4531 */
4532 rc = KERN_INVALID_ARGUMENT;
4533 goto done;
4534 }
4535
4536 vm_map_clip_start(map, entry, s);
4537 vm_map_clip_end(map, entry, end);
4538
4539 sub_start = entry->offset;
4540 sub_end = entry->vme_end;
4541 sub_end += entry->offset - entry->vme_start;
4542
4543 local_end = entry->vme_end;
4544 if(map_pmap == NULL) {
4545 vm_object_t object;
4546 vm_object_offset_t offset;
4547 vm_prot_t prot;
4548 boolean_t wired;
4549 vm_map_entry_t local_entry;
4550 vm_map_version_t version;
4551 vm_map_t lookup_map;
4552
4553 if(entry->use_pmap) {
4554 pmap = entry->object.sub_map->pmap;
4555 /* ppc implementation requires that */
4556 /* submaps pmap address ranges line */
4557 /* up with parent map */
4558 #ifdef notdef
4559 pmap_addr = sub_start;
4560 #endif
4561 pmap_addr = s;
4562 } else {
4563 pmap = map->pmap;
4564 pmap_addr = s;
4565 }
4566
4567 if (entry->wired_count) {
4568 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4569 goto done;
4570
4571 /*
4572 * The map was not unlocked:
4573 * no need to goto re-lookup.
4574 * Just go directly to next entry.
4575 */
4576 entry = entry->vme_next;
4577 s = entry->vme_start;
4578 continue;
4579
4580 }
4581
4582 /* call vm_map_lookup_locked to */
4583 /* cause any needs copy to be */
4584 /* evaluated */
4585 local_start = entry->vme_start;
4586 lookup_map = map;
4587 vm_map_lock_write_to_read(map);
4588 if(vm_map_lookup_locked(
4589 &lookup_map, local_start,
4590 access_type,
4591 OBJECT_LOCK_EXCLUSIVE,
4592 &version, &object,
4593 &offset, &prot, &wired,
4594 NULL,
4595 &real_map)) {
4596
4597 vm_map_unlock_read(lookup_map);
4598 vm_map_unwire(map, start,
4599 s, user_wire);
4600 return(KERN_FAILURE);
4601 }
4602 vm_object_unlock(object);
4603 if(real_map != lookup_map)
4604 vm_map_unlock(real_map);
4605 vm_map_unlock_read(lookup_map);
4606 vm_map_lock(map);
4607
4608 /* we unlocked, so must re-lookup */
4609 if (!vm_map_lookup_entry(map,
4610 local_start,
4611 &local_entry)) {
4612 rc = KERN_FAILURE;
4613 goto done;
4614 }
4615
4616 /*
4617 * entry could have been "simplified",
4618 * so re-clip
4619 */
4620 entry = local_entry;
4621 assert(s == local_start);
4622 vm_map_clip_start(map, entry, s);
4623 vm_map_clip_end(map, entry, end);
4624 /* re-compute "e" */
4625 e = entry->vme_end;
4626 if (e > end)
4627 e = end;
4628
4629 /* did we have a change of type? */
4630 if (!entry->is_sub_map) {
4631 last_timestamp = map->timestamp;
4632 continue;
4633 }
4634 } else {
4635 local_start = entry->vme_start;
4636 pmap = map_pmap;
4637 }
4638
4639 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4640 goto done;
4641
4642 entry->in_transition = TRUE;
4643
4644 vm_map_unlock(map);
4645 rc = vm_map_wire_nested(entry->object.sub_map,
4646 sub_start, sub_end,
4647 access_type,
4648 user_wire, pmap, pmap_addr,
4649 NULL);
4650 vm_map_lock(map);
4651
4652 /*
4653 * Find the entry again. It could have been clipped
4654 * after we unlocked the map.
4655 */
4656 if (!vm_map_lookup_entry(map, local_start,
4657 &first_entry))
4658 panic("vm_map_wire: re-lookup failed");
4659 entry = first_entry;
4660
4661 assert(local_start == s);
4662 /* re-compute "e" */
4663 e = entry->vme_end;
4664 if (e > end)
4665 e = end;
4666
4667 last_timestamp = map->timestamp;
4668 while ((entry != vm_map_to_entry(map)) &&
4669 (entry->vme_start < e)) {
4670 assert(entry->in_transition);
4671 entry->in_transition = FALSE;
4672 if (entry->needs_wakeup) {
4673 entry->needs_wakeup = FALSE;
4674 need_wakeup = TRUE;
4675 }
4676 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4677 subtract_wire_counts(map, entry, user_wire);
4678 }
4679 entry = entry->vme_next;
4680 }
4681 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4682 goto done;
4683 }
4684
4685 /* no need to relookup again */
4686 s = entry->vme_start;
4687 continue;
4688 }
4689
4690 /*
4691 * If this entry is already wired then increment
4692 * the appropriate wire reference count.
4693 */
4694 if (entry->wired_count) {
4695
4696 if ((entry->protection & access_type) != access_type) {
4697 /* found a protection problem */
4698
4699 /*
4700 * XXX FBDP
4701 * We should always return an error
4702 * in this case but since we didn't
4703 * enforce it before, let's do
4704 * it only for the new "wire_and_extract"
4705 * code path for now...
4706 */
4707 if (wire_and_extract) {
4708 rc = KERN_PROTECTION_FAILURE;
4709 goto done;
4710 }
4711 }
4712
4713 /*
4714 * entry is already wired down, get our reference
4715 * after clipping to our range.
4716 */
4717 vm_map_clip_start(map, entry, s);
4718 vm_map_clip_end(map, entry, end);
4719
4720 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4721 goto done;
4722
4723 if (wire_and_extract) {
4724 vm_object_t object;
4725 vm_object_offset_t offset;
4726 vm_page_t m;
4727
4728 /*
4729 * We don't have to "wire" the page again
4730 * bit we still have to "extract" its
4731 * physical page number, after some sanity
4732 * checks.
4733 */
4734 assert((entry->vme_end - entry->vme_start)
4735 == PAGE_SIZE);
4736 assert(!entry->needs_copy);
4737 assert(!entry->is_sub_map);
4738 assert(entry->object.vm_object);
4739 if (((entry->vme_end - entry->vme_start)
4740 != PAGE_SIZE) ||
4741 entry->needs_copy ||
4742 entry->is_sub_map ||
4743 entry->object.vm_object == VM_OBJECT_NULL) {
4744 rc = KERN_INVALID_ARGUMENT;
4745 goto done;
4746 }
4747
4748 object = entry->object.vm_object;
4749 offset = entry->offset;
4750 /* need exclusive lock to update m->dirty */
4751 if (entry->protection & VM_PROT_WRITE) {
4752 vm_object_lock(object);
4753 } else {
4754 vm_object_lock_shared(object);
4755 }
4756 m = vm_page_lookup(object, offset);
4757 assert(m != VM_PAGE_NULL);
4758 assert(m->wire_count);
4759 if (m != VM_PAGE_NULL && m->wire_count) {
4760 *physpage_p = m->phys_page;
4761 if (entry->protection & VM_PROT_WRITE) {
4762 vm_object_lock_assert_exclusive(
4763 m->object);
4764 m->dirty = TRUE;
4765 }
4766 } else {
4767 /* not already wired !? */
4768 *physpage_p = 0;
4769 }
4770 vm_object_unlock(object);
4771 }
4772
4773 /* map was not unlocked: no need to relookup */
4774 entry = entry->vme_next;
4775 s = entry->vme_start;
4776 continue;
4777 }
4778
4779 /*
4780 * Unwired entry or wire request transmitted via submap
4781 */
4782
4783
4784 /*
4785 * Perform actions of vm_map_lookup that need the write
4786 * lock on the map: create a shadow object for a
4787 * copy-on-write region, or an object for a zero-fill
4788 * region.
4789 */
4790 size = entry->vme_end - entry->vme_start;
4791 /*
4792 * If wiring a copy-on-write page, we need to copy it now
4793 * even if we're only (currently) requesting read access.
4794 * This is aggressive, but once it's wired we can't move it.
4795 */
4796 if (entry->needs_copy) {
4797 if (wire_and_extract) {
4798 /*
4799 * We're supposed to share with the original
4800 * provider so should not be "needs_copy"
4801 */
4802 rc = KERN_INVALID_ARGUMENT;
4803 goto done;
4804 }
4805
4806 vm_object_shadow(&entry->object.vm_object,
4807 &entry->offset, size);
4808 entry->needs_copy = FALSE;
4809 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4810 if (wire_and_extract) {
4811 /*
4812 * We're supposed to share with the original
4813 * provider so should already have an object.
4814 */
4815 rc = KERN_INVALID_ARGUMENT;
4816 goto done;
4817 }
4818 entry->object.vm_object = vm_object_allocate(size);
4819 entry->offset = (vm_object_offset_t)0;
4820 assert(entry->use_pmap);
4821 }
4822
4823 vm_map_clip_start(map, entry, s);
4824 vm_map_clip_end(map, entry, end);
4825
4826 /* re-compute "e" */
4827 e = entry->vme_end;
4828 if (e > end)
4829 e = end;
4830
4831 /*
4832 * Check for holes and protection mismatch.
4833 * Holes: Next entry should be contiguous unless this
4834 * is the end of the region.
4835 * Protection: Access requested must be allowed, unless
4836 * wiring is by protection class
4837 */
4838 if ((entry->vme_end < end) &&
4839 ((entry->vme_next == vm_map_to_entry(map)) ||
4840 (entry->vme_next->vme_start > entry->vme_end))) {
4841 /* found a hole */
4842 rc = KERN_INVALID_ADDRESS;
4843 goto done;
4844 }
4845 if ((entry->protection & access_type) != access_type) {
4846 /* found a protection problem */
4847 rc = KERN_PROTECTION_FAILURE;
4848 goto done;
4849 }
4850
4851 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4852
4853 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4854 goto done;
4855
4856 entry->in_transition = TRUE;
4857
4858 /*
4859 * This entry might get split once we unlock the map.
4860 * In vm_fault_wire(), we need the current range as
4861 * defined by this entry. In order for this to work
4862 * along with a simultaneous clip operation, we make a
4863 * temporary copy of this entry and use that for the
4864 * wiring. Note that the underlying objects do not
4865 * change during a clip.
4866 */
4867 tmp_entry = *entry;
4868
4869 /*
4870 * The in_transition state guarentees that the entry
4871 * (or entries for this range, if split occured) will be
4872 * there when the map lock is acquired for the second time.
4873 */
4874 vm_map_unlock(map);
4875
4876 if (!user_wire && cur_thread != THREAD_NULL)
4877 interruptible_state = thread_interrupt_level(THREAD_UNINT);
4878 else
4879 interruptible_state = THREAD_UNINT;
4880
4881 if(map_pmap)
4882 rc = vm_fault_wire(map,
4883 &tmp_entry, map_pmap, pmap_addr,
4884 physpage_p);
4885 else
4886 rc = vm_fault_wire(map,
4887 &tmp_entry, map->pmap,
4888 tmp_entry.vme_start,
4889 physpage_p);
4890
4891 if (!user_wire && cur_thread != THREAD_NULL)
4892 thread_interrupt_level(interruptible_state);
4893
4894 vm_map_lock(map);
4895
4896 if (last_timestamp+1 != map->timestamp) {
4897 /*
4898 * Find the entry again. It could have been clipped
4899 * after we unlocked the map.
4900 */
4901 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4902 &first_entry))
4903 panic("vm_map_wire: re-lookup failed");
4904
4905 entry = first_entry;
4906 }
4907
4908 last_timestamp = map->timestamp;
4909
4910 while ((entry != vm_map_to_entry(map)) &&
4911 (entry->vme_start < tmp_entry.vme_end)) {
4912 assert(entry->in_transition);
4913 entry->in_transition = FALSE;
4914 if (entry->needs_wakeup) {
4915 entry->needs_wakeup = FALSE;
4916 need_wakeup = TRUE;
4917 }
4918 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4919 subtract_wire_counts(map, entry, user_wire);
4920 }
4921 entry = entry->vme_next;
4922 }
4923
4924 if (rc != KERN_SUCCESS) { /* from vm_*_wire */
4925 goto done;
4926 }
4927
4928 s = entry->vme_start;
4929 } /* end while loop through map entries */
4930
4931 done:
4932 if (rc == KERN_SUCCESS) {
4933 /* repair any damage we may have made to the VM map */
4934 vm_map_simplify_range(map, start, end);
4935 }
4936
4937 vm_map_unlock(map);
4938
4939 /*
4940 * wake up anybody waiting on entries we wired.
4941 */
4942 if (need_wakeup)
4943 vm_map_entry_wakeup(map);
4944
4945 if (rc != KERN_SUCCESS) {
4946 /* undo what has been wired so far */
4947 vm_map_unwire(map, start, s, user_wire);
4948 if (physpage_p) {
4949 *physpage_p = 0;
4950 }
4951 }
4952
4953 return rc;
4954
4955 }
4956
4957 kern_return_t
4958 vm_map_wire(
4959 register vm_map_t map,
4960 register vm_map_offset_t start,
4961 register vm_map_offset_t end,
4962 register vm_prot_t access_type,
4963 boolean_t user_wire)
4964 {
4965
4966 kern_return_t kret;
4967
4968 kret = vm_map_wire_nested(map, start, end, access_type,
4969 user_wire, (pmap_t)NULL, 0, NULL);
4970 return kret;
4971 }
4972
4973 kern_return_t
4974 vm_map_wire_and_extract(
4975 vm_map_t map,
4976 vm_map_offset_t start,
4977 vm_prot_t access_type,
4978 boolean_t user_wire,
4979 ppnum_t *physpage_p)
4980 {
4981
4982 kern_return_t kret;
4983
4984 kret = vm_map_wire_nested(map,
4985 start,
4986 start+VM_MAP_PAGE_SIZE(map),
4987 access_type,
4988 user_wire,
4989 (pmap_t)NULL,
4990 0,
4991 physpage_p);
4992 if (kret != KERN_SUCCESS &&
4993 physpage_p != NULL) {
4994 *physpage_p = 0;
4995 }
4996 return kret;
4997 }
4998
4999 /*
5000 * vm_map_unwire:
5001 *
5002 * Sets the pageability of the specified address range in the target
5003 * as pageable. Regions specified must have been wired previously.
5004 *
5005 * The map must not be locked, but a reference must remain to the map
5006 * throughout the call.
5007 *
5008 * Kernel will panic on failures. User unwire ignores holes and
5009 * unwired and intransition entries to avoid losing memory by leaving
5010 * it unwired.
5011 */
5012 static kern_return_t
5013 vm_map_unwire_nested(
5014 register vm_map_t map,
5015 register vm_map_offset_t start,
5016 register vm_map_offset_t end,
5017 boolean_t user_wire,
5018 pmap_t map_pmap,
5019 vm_map_offset_t pmap_addr)
5020 {
5021 register vm_map_entry_t entry;
5022 struct vm_map_entry *first_entry, tmp_entry;
5023 boolean_t need_wakeup;
5024 boolean_t main_map = FALSE;
5025 unsigned int last_timestamp;
5026
5027 vm_map_lock(map);
5028 if(map_pmap == NULL)
5029 main_map = TRUE;
5030 last_timestamp = map->timestamp;
5031
5032 VM_MAP_RANGE_CHECK(map, start, end);
5033 assert(page_aligned(start));
5034 assert(page_aligned(end));
5035 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5036 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5037
5038 if (start == end) {
5039 /* We unwired what the caller asked for: zero pages */
5040 vm_map_unlock(map);
5041 return KERN_SUCCESS;
5042 }
5043
5044 if (vm_map_lookup_entry(map, start, &first_entry)) {
5045 entry = first_entry;
5046 /*
5047 * vm_map_clip_start will be done later.
5048 * We don't want to unnest any nested sub maps here !
5049 */
5050 }
5051 else {
5052 if (!user_wire) {
5053 panic("vm_map_unwire: start not found");
5054 }
5055 /* Start address is not in map. */
5056 vm_map_unlock(map);
5057 return(KERN_INVALID_ADDRESS);
5058 }
5059
5060 if (entry->superpage_size) {
5061 /* superpages are always wired */
5062 vm_map_unlock(map);
5063 return KERN_INVALID_ADDRESS;
5064 }
5065
5066 need_wakeup = FALSE;
5067 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5068 if (entry->in_transition) {
5069 /*
5070 * 1)
5071 * Another thread is wiring down this entry. Note
5072 * that if it is not for the other thread we would
5073 * be unwiring an unwired entry. This is not
5074 * permitted. If we wait, we will be unwiring memory
5075 * we did not wire.
5076 *
5077 * 2)
5078 * Another thread is unwiring this entry. We did not
5079 * have a reference to it, because if we did, this
5080 * entry will not be getting unwired now.
5081 */
5082 if (!user_wire) {
5083 /*
5084 * XXX FBDP
5085 * This could happen: there could be some
5086 * overlapping vslock/vsunlock operations
5087 * going on.
5088 * We should probably just wait and retry,
5089 * but then we have to be careful that this
5090 * entry could get "simplified" after
5091 * "in_transition" gets unset and before
5092 * we re-lookup the entry, so we would
5093 * have to re-clip the entry to avoid
5094 * re-unwiring what we have already unwired...
5095 * See vm_map_wire_nested().
5096 *
5097 * Or we could just ignore "in_transition"
5098 * here and proceed to decement the wired
5099 * count(s) on this entry. That should be fine
5100 * as long as "wired_count" doesn't drop all
5101 * the way to 0 (and we should panic if THAT
5102 * happens).
5103 */
5104 panic("vm_map_unwire: in_transition entry");
5105 }
5106
5107 entry = entry->vme_next;
5108 continue;
5109 }
5110
5111 if (entry->is_sub_map) {
5112 vm_map_offset_t sub_start;
5113 vm_map_offset_t sub_end;
5114 vm_map_offset_t local_end;
5115 pmap_t pmap;
5116
5117 vm_map_clip_start(map, entry, start);
5118 vm_map_clip_end(map, entry, end);
5119
5120 sub_start = entry->offset;
5121 sub_end = entry->vme_end - entry->vme_start;
5122 sub_end += entry->offset;
5123 local_end = entry->vme_end;
5124 if(map_pmap == NULL) {
5125 if(entry->use_pmap) {
5126 pmap = entry->object.sub_map->pmap;
5127 pmap_addr = sub_start;
5128 } else {
5129 pmap = map->pmap;
5130 pmap_addr = start;
5131 }
5132 if (entry->wired_count == 0 ||
5133 (user_wire && entry->user_wired_count == 0)) {
5134 if (!user_wire)
5135 panic("vm_map_unwire: entry is unwired");
5136 entry = entry->vme_next;
5137 continue;
5138 }
5139
5140 /*
5141 * Check for holes
5142 * Holes: Next entry should be contiguous unless
5143 * this is the end of the region.
5144 */
5145 if (((entry->vme_end < end) &&
5146 ((entry->vme_next == vm_map_to_entry(map)) ||
5147 (entry->vme_next->vme_start
5148 > entry->vme_end)))) {
5149 if (!user_wire)
5150 panic("vm_map_unwire: non-contiguous region");
5151 /*
5152 entry = entry->vme_next;
5153 continue;
5154 */
5155 }
5156
5157 subtract_wire_counts(map, entry, user_wire);
5158
5159 if (entry->wired_count != 0) {
5160 entry = entry->vme_next;
5161 continue;
5162 }
5163
5164 entry->in_transition = TRUE;
5165 tmp_entry = *entry;/* see comment in vm_map_wire() */
5166
5167 /*
5168 * We can unlock the map now. The in_transition state
5169 * guarantees existance of the entry.
5170 */
5171 vm_map_unlock(map);
5172 vm_map_unwire_nested(entry->object.sub_map,
5173 sub_start, sub_end, user_wire, pmap, pmap_addr);
5174 vm_map_lock(map);
5175
5176 if (last_timestamp+1 != map->timestamp) {
5177 /*
5178 * Find the entry again. It could have been
5179 * clipped or deleted after we unlocked the map.
5180 */
5181 if (!vm_map_lookup_entry(map,
5182 tmp_entry.vme_start,
5183 &first_entry)) {
5184 if (!user_wire)
5185 panic("vm_map_unwire: re-lookup failed");
5186 entry = first_entry->vme_next;
5187 } else
5188 entry = first_entry;
5189 }
5190 last_timestamp = map->timestamp;
5191
5192 /*
5193 * clear transition bit for all constituent entries
5194 * that were in the original entry (saved in
5195 * tmp_entry). Also check for waiters.
5196 */
5197 while ((entry != vm_map_to_entry(map)) &&
5198 (entry->vme_start < tmp_entry.vme_end)) {
5199 assert(entry->in_transition);
5200 entry->in_transition = FALSE;
5201 if (entry->needs_wakeup) {
5202 entry->needs_wakeup = FALSE;
5203 need_wakeup = TRUE;
5204 }
5205 entry = entry->vme_next;
5206 }
5207 continue;
5208 } else {
5209 vm_map_unlock(map);
5210 vm_map_unwire_nested(entry->object.sub_map,
5211 sub_start, sub_end, user_wire, map_pmap,
5212 pmap_addr);
5213 vm_map_lock(map);
5214
5215 if (last_timestamp+1 != map->timestamp) {
5216 /*
5217 * Find the entry again. It could have been
5218 * clipped or deleted after we unlocked the map.
5219 */
5220 if (!vm_map_lookup_entry(map,
5221 tmp_entry.vme_start,
5222 &first_entry)) {
5223 if (!user_wire)
5224 panic("vm_map_unwire: re-lookup failed");
5225 entry = first_entry->vme_next;
5226 } else
5227 entry = first_entry;
5228 }
5229 last_timestamp = map->timestamp;
5230 }
5231 }
5232
5233
5234 if ((entry->wired_count == 0) ||
5235 (user_wire && entry->user_wired_count == 0)) {
5236 if (!user_wire)
5237 panic("vm_map_unwire: entry is unwired");
5238
5239 entry = entry->vme_next;
5240 continue;
5241 }
5242
5243 assert(entry->wired_count > 0 &&
5244 (!user_wire || entry->user_wired_count > 0));
5245
5246 vm_map_clip_start(map, entry, start);
5247 vm_map_clip_end(map, entry, end);
5248
5249 /*
5250 * Check for holes
5251 * Holes: Next entry should be contiguous unless
5252 * this is the end of the region.
5253 */
5254 if (((entry->vme_end < end) &&
5255 ((entry->vme_next == vm_map_to_entry(map)) ||
5256 (entry->vme_next->vme_start > entry->vme_end)))) {
5257
5258 if (!user_wire)
5259 panic("vm_map_unwire: non-contiguous region");
5260 entry = entry->vme_next;
5261 continue;
5262 }
5263
5264 subtract_wire_counts(map, entry, user_wire);
5265
5266 if (entry->wired_count != 0) {
5267 entry = entry->vme_next;
5268 continue;
5269 }
5270
5271 if(entry->zero_wired_pages) {
5272 entry->zero_wired_pages = FALSE;
5273 }
5274
5275 entry->in_transition = TRUE;
5276 tmp_entry = *entry; /* see comment in vm_map_wire() */
5277
5278 /*
5279 * We can unlock the map now. The in_transition state
5280 * guarantees existance of the entry.
5281 */
5282 vm_map_unlock(map);
5283 if(map_pmap) {
5284 vm_fault_unwire(map,
5285 &tmp_entry, FALSE, map_pmap, pmap_addr);
5286 } else {
5287 vm_fault_unwire(map,
5288 &tmp_entry, FALSE, map->pmap,
5289 tmp_entry.vme_start);
5290 }
5291 vm_map_lock(map);
5292
5293 if (last_timestamp+1 != map->timestamp) {
5294 /*
5295 * Find the entry again. It could have been clipped
5296 * or deleted after we unlocked the map.
5297 */
5298 if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5299 &first_entry)) {
5300 if (!user_wire)
5301 panic("vm_map_unwire: re-lookup failed");
5302 entry = first_entry->vme_next;
5303 } else
5304 entry = first_entry;
5305 }
5306 last_timestamp = map->timestamp;
5307
5308 /*
5309 * clear transition bit for all constituent entries that
5310 * were in the original entry (saved in tmp_entry). Also
5311 * check for waiters.
5312 */
5313 while ((entry != vm_map_to_entry(map)) &&
5314 (entry->vme_start < tmp_entry.vme_end)) {
5315 assert(entry->in_transition);
5316 entry->in_transition = FALSE;
5317 if (entry->needs_wakeup) {
5318 entry->needs_wakeup = FALSE;
5319 need_wakeup = TRUE;
5320 }
5321 entry = entry->vme_next;
5322 }
5323 }
5324
5325 /*
5326 * We might have fragmented the address space when we wired this
5327 * range of addresses. Attempt to re-coalesce these VM map entries
5328 * with their neighbors now that they're no longer wired.
5329 * Under some circumstances, address space fragmentation can
5330 * prevent VM object shadow chain collapsing, which can cause
5331 * swap space leaks.
5332 */
5333 vm_map_simplify_range(map, start, end);
5334
5335 vm_map_unlock(map);
5336 /*
5337 * wake up anybody waiting on entries that we have unwired.
5338 */
5339 if (need_wakeup)
5340 vm_map_entry_wakeup(map);
5341 return(KERN_SUCCESS);
5342
5343 }
5344
5345 kern_return_t
5346 vm_map_unwire(
5347 register vm_map_t map,
5348 register vm_map_offset_t start,
5349 register vm_map_offset_t end,
5350 boolean_t user_wire)
5351 {
5352 return vm_map_unwire_nested(map, start, end,
5353 user_wire, (pmap_t)NULL, 0);
5354 }
5355
5356
5357 /*
5358 * vm_map_entry_delete: [ internal use only ]
5359 *
5360 * Deallocate the given entry from the target map.
5361 */
5362 static void
5363 vm_map_entry_delete(
5364 register vm_map_t map,
5365 register vm_map_entry_t entry)
5366 {
5367 register vm_map_offset_t s, e;
5368 register vm_object_t object;
5369 register vm_map_t submap;
5370
5371 s = entry->vme_start;
5372 e = entry->vme_end;
5373 assert(page_aligned(s));
5374 assert(page_aligned(e));
5375 if (entry->map_aligned == TRUE) {
5376 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5377 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5378 }
5379 assert(entry->wired_count == 0);
5380 assert(entry->user_wired_count == 0);
5381 assert(!entry->permanent);
5382
5383 if (entry->is_sub_map) {
5384 object = NULL;
5385 submap = entry->object.sub_map;
5386 } else {
5387 submap = NULL;
5388 object = entry->object.vm_object;
5389 }
5390
5391 vm_map_store_entry_unlink(map, entry);
5392 map->size -= e - s;
5393
5394 vm_map_entry_dispose(map, entry);
5395
5396 vm_map_unlock(map);
5397 /*
5398 * Deallocate the object only after removing all
5399 * pmap entries pointing to its pages.
5400 */
5401 if (submap)
5402 vm_map_deallocate(submap);
5403 else
5404 vm_object_deallocate(object);
5405
5406 }
5407
5408 void
5409 vm_map_submap_pmap_clean(
5410 vm_map_t map,
5411 vm_map_offset_t start,
5412 vm_map_offset_t end,
5413 vm_map_t sub_map,
5414 vm_map_offset_t offset)
5415 {
5416 vm_map_offset_t submap_start;
5417 vm_map_offset_t submap_end;
5418 vm_map_size_t remove_size;
5419 vm_map_entry_t entry;
5420
5421 submap_end = offset + (end - start);
5422 submap_start = offset;
5423
5424 vm_map_lock_read(sub_map);
5425 if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5426
5427 remove_size = (entry->vme_end - entry->vme_start);
5428 if(offset > entry->vme_start)
5429 remove_size -= offset - entry->vme_start;
5430
5431
5432 if(submap_end < entry->vme_end) {
5433 remove_size -=
5434 entry->vme_end - submap_end;
5435 }
5436 if(entry->is_sub_map) {
5437 vm_map_submap_pmap_clean(
5438 sub_map,
5439 start,
5440 start + remove_size,
5441 entry->object.sub_map,
5442 entry->offset);
5443 } else {
5444
5445 if((map->mapped_in_other_pmaps) && (map->ref_count)
5446 && (entry->object.vm_object != NULL)) {
5447 vm_object_pmap_protect(
5448 entry->object.vm_object,
5449 entry->offset+(offset-entry->vme_start),
5450 remove_size,
5451 PMAP_NULL,
5452 entry->vme_start,
5453 VM_PROT_NONE);
5454 } else {
5455 pmap_remove(map->pmap,
5456 (addr64_t)start,
5457 (addr64_t)(start + remove_size));
5458 }
5459 }
5460 }
5461
5462 entry = entry->vme_next;
5463
5464 while((entry != vm_map_to_entry(sub_map))
5465 && (entry->vme_start < submap_end)) {
5466 remove_size = (entry->vme_end - entry->vme_start);
5467 if(submap_end < entry->vme_end) {
5468 remove_size -= entry->vme_end - submap_end;
5469 }
5470 if(entry->is_sub_map) {
5471 vm_map_submap_pmap_clean(
5472 sub_map,
5473 (start + entry->vme_start) - offset,
5474 ((start + entry->vme_start) - offset) + remove_size,
5475 entry->object.sub_map,
5476 entry->offset);
5477 } else {
5478 if((map->mapped_in_other_pmaps) && (map->ref_count)
5479 && (entry->object.vm_object != NULL)) {
5480 vm_object_pmap_protect(
5481 entry->object.vm_object,
5482 entry->offset,
5483 remove_size,
5484 PMAP_NULL,
5485 entry->vme_start,
5486 VM_PROT_NONE);
5487 } else {
5488 pmap_remove(map->pmap,
5489 (addr64_t)((start + entry->vme_start)
5490 - offset),
5491 (addr64_t)(((start + entry->vme_start)
5492 - offset) + remove_size));
5493 }
5494 }
5495 entry = entry->vme_next;
5496 }
5497 vm_map_unlock_read(sub_map);
5498 return;
5499 }
5500
5501 /*
5502 * vm_map_delete: [ internal use only ]
5503 *
5504 * Deallocates the given address range from the target map.
5505 * Removes all user wirings. Unwires one kernel wiring if
5506 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go
5507 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps
5508 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5509 *
5510 * This routine is called with map locked and leaves map locked.
5511 */
5512 static kern_return_t
5513 vm_map_delete(
5514 vm_map_t map,
5515 vm_map_offset_t start,
5516 vm_map_offset_t end,
5517 int flags,
5518 vm_map_t zap_map)
5519 {
5520 vm_map_entry_t entry, next;
5521 struct vm_map_entry *first_entry, tmp_entry;
5522 register vm_map_offset_t s;
5523 register vm_object_t object;
5524 boolean_t need_wakeup;
5525 unsigned int last_timestamp = ~0; /* unlikely value */
5526 int interruptible;
5527
5528 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5529 THREAD_ABORTSAFE : THREAD_UNINT;
5530
5531 /*
5532 * All our DMA I/O operations in IOKit are currently done by
5533 * wiring through the map entries of the task requesting the I/O.
5534 * Because of this, we must always wait for kernel wirings
5535 * to go away on the entries before deleting them.
5536 *
5537 * Any caller who wants to actually remove a kernel wiring
5538 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5539 * properly remove one wiring instead of blasting through
5540 * them all.
5541 */
5542 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5543
5544 while(1) {
5545 /*
5546 * Find the start of the region, and clip it
5547 */
5548 if (vm_map_lookup_entry(map, start, &first_entry)) {
5549 entry = first_entry;
5550 if (map == kalloc_map &&
5551 (entry->vme_start != start ||
5552 entry->vme_end != end)) {
5553 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5554 "mismatched entry %p [0x%llx:0x%llx]\n",
5555 map,
5556 (uint64_t)start,
5557 (uint64_t)end,
5558 entry,
5559 (uint64_t)entry->vme_start,
5560 (uint64_t)entry->vme_end);
5561 }
5562 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start);
5563 start = SUPERPAGE_ROUND_DOWN(start);
5564 continue;
5565 }
5566 if (start == entry->vme_start) {
5567 /*
5568 * No need to clip. We don't want to cause
5569 * any unnecessary unnesting in this case...
5570 */
5571 } else {
5572 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5573 entry->map_aligned &&
5574 !VM_MAP_PAGE_ALIGNED(
5575 start,
5576 VM_MAP_PAGE_MASK(map))) {
5577 /*
5578 * The entry will no longer be
5579 * map-aligned after clipping
5580 * and the caller said it's OK.
5581 */
5582 entry->map_aligned = FALSE;
5583 }
5584 if (map == kalloc_map) {
5585 panic("vm_map_delete(%p,0x%llx,0x%llx):"
5586 " clipping %p at 0x%llx\n",
5587 map,
5588 (uint64_t)start,
5589 (uint64_t)end,
5590 entry,
5591 (uint64_t)start);
5592 }
5593 vm_map_clip_start(map, entry, start);
5594 }
5595
5596 /*
5597 * Fix the lookup hint now, rather than each
5598 * time through the loop.
5599 */
5600 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5601 } else {
5602 if (map->pmap == kernel_pmap &&
5603 map->ref_count != 0) {
5604 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5605 "no map entry at 0x%llx\n",
5606 map,
5607 (uint64_t)start,
5608 (uint64_t)end,
5609 (uint64_t)start);
5610 }
5611 entry = first_entry->vme_next;
5612 }
5613 break;
5614 }
5615 if (entry->superpage_size)
5616 end = SUPERPAGE_ROUND_UP(end);
5617
5618 need_wakeup = FALSE;
5619 /*
5620 * Step through all entries in this region
5621 */
5622 s = entry->vme_start;
5623 while ((entry != vm_map_to_entry(map)) && (s < end)) {
5624 /*
5625 * At this point, we have deleted all the memory entries
5626 * between "start" and "s". We still need to delete
5627 * all memory entries between "s" and "end".
5628 * While we were blocked and the map was unlocked, some
5629 * new memory entries could have been re-allocated between
5630 * "start" and "s" and we don't want to mess with those.
5631 * Some of those entries could even have been re-assembled
5632 * with an entry after "s" (in vm_map_simplify_entry()), so
5633 * we may have to vm_map_clip_start() again.
5634 */
5635
5636 if (entry->vme_start >= s) {
5637 /*
5638 * This entry starts on or after "s"
5639 * so no need to clip its start.
5640 */
5641 } else {
5642 /*
5643 * This entry has been re-assembled by a
5644 * vm_map_simplify_entry(). We need to
5645 * re-clip its start.
5646 */
5647 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5648 entry->map_aligned &&
5649 !VM_MAP_PAGE_ALIGNED(s,
5650 VM_MAP_PAGE_MASK(map))) {
5651 /*
5652 * The entry will no longer be map-aligned
5653 * after clipping and the caller said it's OK.
5654 */
5655 entry->map_aligned = FALSE;
5656 }
5657 if (map == kalloc_map) {
5658 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5659 "clipping %p at 0x%llx\n",
5660 map,
5661 (uint64_t)start,
5662 (uint64_t)end,
5663 entry,
5664 (uint64_t)s);
5665 }
5666 vm_map_clip_start(map, entry, s);
5667 }
5668 if (entry->vme_end <= end) {
5669 /*
5670 * This entry is going away completely, so no need
5671 * to clip and possibly cause an unnecessary unnesting.
5672 */
5673 } else {
5674 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5675 entry->map_aligned &&
5676 !VM_MAP_PAGE_ALIGNED(end,
5677 VM_MAP_PAGE_MASK(map))) {
5678 /*
5679 * The entry will no longer be map-aligned
5680 * after clipping and the caller said it's OK.
5681 */
5682 entry->map_aligned = FALSE;
5683 }
5684 if (map == kalloc_map) {
5685 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5686 "clipping %p at 0x%llx\n",
5687 map,
5688 (uint64_t)start,
5689 (uint64_t)end,
5690 entry,
5691 (uint64_t)end);
5692 }
5693 vm_map_clip_end(map, entry, end);
5694 }
5695
5696 if (entry->permanent) {
5697 panic("attempt to remove permanent VM map entry "
5698 "%p [0x%llx:0x%llx]\n",
5699 entry, (uint64_t) s, (uint64_t) end);
5700 }
5701
5702
5703 if (entry->in_transition) {
5704 wait_result_t wait_result;
5705
5706 /*
5707 * Another thread is wiring/unwiring this entry.
5708 * Let the other thread know we are waiting.
5709 */
5710 assert(s == entry->vme_start);
5711 entry->needs_wakeup = TRUE;
5712
5713 /*
5714 * wake up anybody waiting on entries that we have
5715 * already unwired/deleted.
5716 */
5717 if (need_wakeup) {
5718 vm_map_entry_wakeup(map);
5719 need_wakeup = FALSE;
5720 }
5721
5722 wait_result = vm_map_entry_wait(map, interruptible);
5723
5724 if (interruptible &&
5725 wait_result == THREAD_INTERRUPTED) {
5726 /*
5727 * We do not clear the needs_wakeup flag,
5728 * since we cannot tell if we were the only one.
5729 */
5730 return KERN_ABORTED;
5731 }
5732
5733 /*
5734 * The entry could have been clipped or it
5735 * may not exist anymore. Look it up again.
5736 */
5737 if (!vm_map_lookup_entry(map, s, &first_entry)) {
5738 assert((map != kernel_map) &&
5739 (!entry->is_sub_map));
5740 /*
5741 * User: use the next entry
5742 */
5743 entry = first_entry->vme_next;
5744 s = entry->vme_start;
5745 } else {
5746 entry = first_entry;
5747 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5748 }
5749 last_timestamp = map->timestamp;
5750 continue;
5751 } /* end in_transition */
5752
5753 if (entry->wired_count) {
5754 boolean_t user_wire;
5755
5756 user_wire = entry->user_wired_count > 0;
5757
5758 /*
5759 * Remove a kernel wiring if requested
5760 */
5761 if (flags & VM_MAP_REMOVE_KUNWIRE) {
5762 entry->wired_count--;
5763 }
5764
5765 /*
5766 * Remove all user wirings for proper accounting
5767 */
5768 if (entry->user_wired_count > 0) {
5769 while (entry->user_wired_count)
5770 subtract_wire_counts(map, entry, user_wire);
5771 }
5772
5773 if (entry->wired_count != 0) {
5774 assert(map != kernel_map);
5775 /*
5776 * Cannot continue. Typical case is when
5777 * a user thread has physical io pending on
5778 * on this page. Either wait for the
5779 * kernel wiring to go away or return an
5780 * error.
5781 */
5782 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5783 wait_result_t wait_result;
5784
5785 assert(s == entry->vme_start);
5786 entry->needs_wakeup = TRUE;
5787 wait_result = vm_map_entry_wait(map,
5788 interruptible);
5789
5790 if (interruptible &&
5791 wait_result == THREAD_INTERRUPTED) {
5792 /*
5793 * We do not clear the
5794 * needs_wakeup flag, since we
5795 * cannot tell if we were the
5796 * only one.
5797 */
5798 return KERN_ABORTED;
5799 }
5800
5801 /*
5802 * The entry could have been clipped or
5803 * it may not exist anymore. Look it
5804 * up again.
5805 */
5806 if (!vm_map_lookup_entry(map, s,
5807 &first_entry)) {
5808 assert(map != kernel_map);
5809 /*
5810 * User: use the next entry
5811 */
5812 entry = first_entry->vme_next;
5813 s = entry->vme_start;
5814 } else {
5815 entry = first_entry;
5816 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5817 }
5818 last_timestamp = map->timestamp;
5819 continue;
5820 }
5821 else {
5822 return KERN_FAILURE;
5823 }
5824 }
5825
5826 entry->in_transition = TRUE;
5827 /*
5828 * copy current entry. see comment in vm_map_wire()
5829 */
5830 tmp_entry = *entry;
5831 assert(s == entry->vme_start);
5832
5833 /*
5834 * We can unlock the map now. The in_transition
5835 * state guarentees existance of the entry.
5836 */
5837 vm_map_unlock(map);
5838
5839 if (tmp_entry.is_sub_map) {
5840 vm_map_t sub_map;
5841 vm_map_offset_t sub_start, sub_end;
5842 pmap_t pmap;
5843 vm_map_offset_t pmap_addr;
5844
5845
5846 sub_map = tmp_entry.object.sub_map;
5847 sub_start = tmp_entry.offset;
5848 sub_end = sub_start + (tmp_entry.vme_end -
5849 tmp_entry.vme_start);
5850 if (tmp_entry.use_pmap) {
5851 pmap = sub_map->pmap;
5852 pmap_addr = tmp_entry.vme_start;
5853 } else {
5854 pmap = map->pmap;
5855 pmap_addr = tmp_entry.vme_start;
5856 }
5857 (void) vm_map_unwire_nested(sub_map,
5858 sub_start, sub_end,
5859 user_wire,
5860 pmap, pmap_addr);
5861 } else {
5862
5863 if (tmp_entry.object.vm_object == kernel_object) {
5864 pmap_protect_options(
5865 map->pmap,
5866 tmp_entry.vme_start,
5867 tmp_entry.vme_end,
5868 VM_PROT_NONE,
5869 PMAP_OPTIONS_REMOVE,
5870 NULL);
5871 }
5872 vm_fault_unwire(map, &tmp_entry,
5873 tmp_entry.object.vm_object == kernel_object,
5874 map->pmap, tmp_entry.vme_start);
5875 }
5876
5877 vm_map_lock(map);
5878
5879 if (last_timestamp+1 != map->timestamp) {
5880 /*
5881 * Find the entry again. It could have
5882 * been clipped after we unlocked the map.
5883 */
5884 if (!vm_map_lookup_entry(map, s, &first_entry)){
5885 assert((map != kernel_map) &&
5886 (!entry->is_sub_map));
5887 first_entry = first_entry->vme_next;
5888 s = first_entry->vme_start;
5889 } else {
5890 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5891 }
5892 } else {
5893 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5894 first_entry = entry;
5895 }
5896
5897 last_timestamp = map->timestamp;
5898
5899 entry = first_entry;
5900 while ((entry != vm_map_to_entry(map)) &&
5901 (entry->vme_start < tmp_entry.vme_end)) {
5902 assert(entry->in_transition);
5903 entry->in_transition = FALSE;
5904 if (entry->needs_wakeup) {
5905 entry->needs_wakeup = FALSE;
5906 need_wakeup = TRUE;
5907 }
5908 entry = entry->vme_next;
5909 }
5910 /*
5911 * We have unwired the entry(s). Go back and
5912 * delete them.
5913 */
5914 entry = first_entry;
5915 continue;
5916 }
5917
5918 /* entry is unwired */
5919 assert(entry->wired_count == 0);
5920 assert(entry->user_wired_count == 0);
5921
5922 assert(s == entry->vme_start);
5923
5924 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5925 /*
5926 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5927 * vm_map_delete(), some map entries might have been
5928 * transferred to a "zap_map", which doesn't have a
5929 * pmap. The original pmap has already been flushed
5930 * in the vm_map_delete() call targeting the original
5931 * map, but when we get to destroying the "zap_map",
5932 * we don't have any pmap to flush, so let's just skip
5933 * all this.
5934 */
5935 } else if (entry->is_sub_map) {
5936 if (entry->use_pmap) {
5937 #ifndef NO_NESTED_PMAP
5938 pmap_unnest(map->pmap,
5939 (addr64_t)entry->vme_start,
5940 entry->vme_end - entry->vme_start);
5941 #endif /* NO_NESTED_PMAP */
5942 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5943 /* clean up parent map/maps */
5944 vm_map_submap_pmap_clean(
5945 map, entry->vme_start,
5946 entry->vme_end,
5947 entry->object.sub_map,
5948 entry->offset);
5949 }
5950 } else {
5951 vm_map_submap_pmap_clean(
5952 map, entry->vme_start, entry->vme_end,
5953 entry->object.sub_map,
5954 entry->offset);
5955 }
5956 } else if (entry->object.vm_object != kernel_object &&
5957 entry->object.vm_object != compressor_object) {
5958 object = entry->object.vm_object;
5959 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5960 vm_object_pmap_protect_options(
5961 object, entry->offset,
5962 entry->vme_end - entry->vme_start,
5963 PMAP_NULL,
5964 entry->vme_start,
5965 VM_PROT_NONE,
5966 PMAP_OPTIONS_REMOVE);
5967 } else if ((entry->object.vm_object !=
5968 VM_OBJECT_NULL) ||
5969 (map->pmap == kernel_pmap)) {
5970 /* Remove translations associated
5971 * with this range unless the entry
5972 * does not have an object, or
5973 * it's the kernel map or a descendant
5974 * since the platform could potentially
5975 * create "backdoor" mappings invisible
5976 * to the VM. It is expected that
5977 * objectless, non-kernel ranges
5978 * do not have such VM invisible
5979 * translations.
5980 */
5981 pmap_remove_options(map->pmap,
5982 (addr64_t)entry->vme_start,
5983 (addr64_t)entry->vme_end,
5984 PMAP_OPTIONS_REMOVE);
5985 }
5986 }
5987
5988 if (entry->iokit_acct) {
5989 /* alternate accounting */
5990 vm_map_iokit_unmapped_region(map,
5991 (entry->vme_end -
5992 entry->vme_start));
5993 entry->iokit_acct = FALSE;
5994 }
5995
5996 /*
5997 * All pmap mappings for this map entry must have been
5998 * cleared by now.
5999 */
6000 #if DEBUG
6001 assert(vm_map_pmap_is_empty(map,
6002 entry->vme_start,
6003 entry->vme_end));
6004 #endif /* DEBUG */
6005
6006 next = entry->vme_next;
6007
6008 if (map->pmap == kernel_pmap &&
6009 map->ref_count != 0 &&
6010 entry->vme_end < end &&
6011 (next == vm_map_to_entry(map) ||
6012 next->vme_start != entry->vme_end)) {
6013 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6014 "hole after %p at 0x%llx\n",
6015 map,
6016 (uint64_t)start,
6017 (uint64_t)end,
6018 entry,
6019 (uint64_t)entry->vme_end);
6020 }
6021
6022 s = next->vme_start;
6023 last_timestamp = map->timestamp;
6024
6025 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6026 zap_map != VM_MAP_NULL) {
6027 vm_map_size_t entry_size;
6028 /*
6029 * The caller wants to save the affected VM map entries
6030 * into the "zap_map". The caller will take care of
6031 * these entries.
6032 */
6033 /* unlink the entry from "map" ... */
6034 vm_map_store_entry_unlink(map, entry);
6035 /* ... and add it to the end of the "zap_map" */
6036 vm_map_store_entry_link(zap_map,
6037 vm_map_last_entry(zap_map),
6038 entry);
6039 entry_size = entry->vme_end - entry->vme_start;
6040 map->size -= entry_size;
6041 zap_map->size += entry_size;
6042 /* we didn't unlock the map, so no timestamp increase */
6043 last_timestamp--;
6044 } else {
6045 vm_map_entry_delete(map, entry);
6046 /* vm_map_entry_delete unlocks the map */
6047 vm_map_lock(map);
6048 }
6049
6050 entry = next;
6051
6052 if(entry == vm_map_to_entry(map)) {
6053 break;
6054 }
6055 if (last_timestamp+1 != map->timestamp) {
6056 /*
6057 * we are responsible for deleting everything
6058 * from the give space, if someone has interfered
6059 * we pick up where we left off, back fills should
6060 * be all right for anyone except map_delete and
6061 * we have to assume that the task has been fully
6062 * disabled before we get here
6063 */
6064 if (!vm_map_lookup_entry(map, s, &entry)){
6065 entry = entry->vme_next;
6066 s = entry->vme_start;
6067 } else {
6068 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6069 }
6070 /*
6071 * others can not only allocate behind us, we can
6072 * also see coalesce while we don't have the map lock
6073 */
6074 if(entry == vm_map_to_entry(map)) {
6075 break;
6076 }
6077 }
6078 last_timestamp = map->timestamp;
6079 }
6080
6081 if (map->wait_for_space)
6082 thread_wakeup((event_t) map);
6083 /*
6084 * wake up anybody waiting on entries that we have already deleted.
6085 */
6086 if (need_wakeup)
6087 vm_map_entry_wakeup(map);
6088
6089 return KERN_SUCCESS;
6090 }
6091
6092 /*
6093 * vm_map_remove:
6094 *
6095 * Remove the given address range from the target map.
6096 * This is the exported form of vm_map_delete.
6097 */
6098 kern_return_t
6099 vm_map_remove(
6100 register vm_map_t map,
6101 register vm_map_offset_t start,
6102 register vm_map_offset_t end,
6103 register boolean_t flags)
6104 {
6105 register kern_return_t result;
6106
6107 vm_map_lock(map);
6108 VM_MAP_RANGE_CHECK(map, start, end);
6109 /*
6110 * For the zone_map, the kernel controls the allocation/freeing of memory.
6111 * Any free to the zone_map should be within the bounds of the map and
6112 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6113 * free to the zone_map into a no-op, there is a problem and we should
6114 * panic.
6115 */
6116 if ((map == zone_map) && (start == end))
6117 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6118 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6119 vm_map_unlock(map);
6120
6121 return(result);
6122 }
6123
6124
6125 /*
6126 * Routine: vm_map_copy_discard
6127 *
6128 * Description:
6129 * Dispose of a map copy object (returned by
6130 * vm_map_copyin).
6131 */
6132 void
6133 vm_map_copy_discard(
6134 vm_map_copy_t copy)
6135 {
6136 if (copy == VM_MAP_COPY_NULL)
6137 return;
6138
6139 switch (copy->type) {
6140 case VM_MAP_COPY_ENTRY_LIST:
6141 while (vm_map_copy_first_entry(copy) !=
6142 vm_map_copy_to_entry(copy)) {
6143 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
6144
6145 vm_map_copy_entry_unlink(copy, entry);
6146 if (entry->is_sub_map) {
6147 vm_map_deallocate(entry->object.sub_map);
6148 } else {
6149 vm_object_deallocate(entry->object.vm_object);
6150 }
6151 vm_map_copy_entry_dispose(copy, entry);
6152 }
6153 break;
6154 case VM_MAP_COPY_OBJECT:
6155 vm_object_deallocate(copy->cpy_object);
6156 break;
6157 case VM_MAP_COPY_KERNEL_BUFFER:
6158
6159 /*
6160 * The vm_map_copy_t and possibly the data buffer were
6161 * allocated by a single call to kalloc(), i.e. the
6162 * vm_map_copy_t was not allocated out of the zone.
6163 */
6164 kfree(copy, copy->cpy_kalloc_size);
6165 return;
6166 }
6167 zfree(vm_map_copy_zone, copy);
6168 }
6169
6170 /*
6171 * Routine: vm_map_copy_copy
6172 *
6173 * Description:
6174 * Move the information in a map copy object to
6175 * a new map copy object, leaving the old one
6176 * empty.
6177 *
6178 * This is used by kernel routines that need
6179 * to look at out-of-line data (in copyin form)
6180 * before deciding whether to return SUCCESS.
6181 * If the routine returns FAILURE, the original
6182 * copy object will be deallocated; therefore,
6183 * these routines must make a copy of the copy
6184 * object and leave the original empty so that
6185 * deallocation will not fail.
6186 */
6187 vm_map_copy_t
6188 vm_map_copy_copy(
6189 vm_map_copy_t copy)
6190 {
6191 vm_map_copy_t new_copy;
6192
6193 if (copy == VM_MAP_COPY_NULL)
6194 return VM_MAP_COPY_NULL;
6195
6196 /*
6197 * Allocate a new copy object, and copy the information
6198 * from the old one into it.
6199 */
6200
6201 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6202 *new_copy = *copy;
6203
6204 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6205 /*
6206 * The links in the entry chain must be
6207 * changed to point to the new copy object.
6208 */
6209 vm_map_copy_first_entry(copy)->vme_prev
6210 = vm_map_copy_to_entry(new_copy);
6211 vm_map_copy_last_entry(copy)->vme_next
6212 = vm_map_copy_to_entry(new_copy);
6213 }
6214
6215 /*
6216 * Change the old copy object into one that contains
6217 * nothing to be deallocated.
6218 */
6219 copy->type = VM_MAP_COPY_OBJECT;
6220 copy->cpy_object = VM_OBJECT_NULL;
6221
6222 /*
6223 * Return the new object.
6224 */
6225 return new_copy;
6226 }
6227
6228 static kern_return_t
6229 vm_map_overwrite_submap_recurse(
6230 vm_map_t dst_map,
6231 vm_map_offset_t dst_addr,
6232 vm_map_size_t dst_size)
6233 {
6234 vm_map_offset_t dst_end;
6235 vm_map_entry_t tmp_entry;
6236 vm_map_entry_t entry;
6237 kern_return_t result;
6238 boolean_t encountered_sub_map = FALSE;
6239
6240
6241
6242 /*
6243 * Verify that the destination is all writeable
6244 * initially. We have to trunc the destination
6245 * address and round the copy size or we'll end up
6246 * splitting entries in strange ways.
6247 */
6248
6249 dst_end = vm_map_round_page(dst_addr + dst_size,
6250 VM_MAP_PAGE_MASK(dst_map));
6251 vm_map_lock(dst_map);
6252
6253 start_pass_1:
6254 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6255 vm_map_unlock(dst_map);
6256 return(KERN_INVALID_ADDRESS);
6257 }
6258
6259 vm_map_clip_start(dst_map,
6260 tmp_entry,
6261 vm_map_trunc_page(dst_addr,
6262 VM_MAP_PAGE_MASK(dst_map)));
6263 if (tmp_entry->is_sub_map) {
6264 /* clipping did unnest if needed */
6265 assert(!tmp_entry->use_pmap);
6266 }
6267
6268 for (entry = tmp_entry;;) {
6269 vm_map_entry_t next;
6270
6271 next = entry->vme_next;
6272 while(entry->is_sub_map) {
6273 vm_map_offset_t sub_start;
6274 vm_map_offset_t sub_end;
6275 vm_map_offset_t local_end;
6276
6277 if (entry->in_transition) {
6278 /*
6279 * Say that we are waiting, and wait for entry.
6280 */
6281 entry->needs_wakeup = TRUE;
6282 vm_map_entry_wait(dst_map, THREAD_UNINT);
6283
6284 goto start_pass_1;
6285 }
6286
6287 encountered_sub_map = TRUE;
6288 sub_start = entry->offset;
6289
6290 if(entry->vme_end < dst_end)
6291 sub_end = entry->vme_end;
6292 else
6293 sub_end = dst_end;
6294 sub_end -= entry->vme_start;
6295 sub_end += entry->offset;
6296 local_end = entry->vme_end;
6297 vm_map_unlock(dst_map);
6298
6299 result = vm_map_overwrite_submap_recurse(
6300 entry->object.sub_map,
6301 sub_start,
6302 sub_end - sub_start);
6303
6304 if(result != KERN_SUCCESS)
6305 return result;
6306 if (dst_end <= entry->vme_end)
6307 return KERN_SUCCESS;
6308 vm_map_lock(dst_map);
6309 if(!vm_map_lookup_entry(dst_map, local_end,
6310 &tmp_entry)) {
6311 vm_map_unlock(dst_map);
6312 return(KERN_INVALID_ADDRESS);
6313 }
6314 entry = tmp_entry;
6315 next = entry->vme_next;
6316 }
6317
6318 if ( ! (entry->protection & VM_PROT_WRITE)) {
6319 vm_map_unlock(dst_map);
6320 return(KERN_PROTECTION_FAILURE);
6321 }
6322
6323 /*
6324 * If the entry is in transition, we must wait
6325 * for it to exit that state. Anything could happen
6326 * when we unlock the map, so start over.
6327 */
6328 if (entry->in_transition) {
6329
6330 /*
6331 * Say that we are waiting, and wait for entry.
6332 */
6333 entry->needs_wakeup = TRUE;
6334 vm_map_entry_wait(dst_map, THREAD_UNINT);
6335
6336 goto start_pass_1;
6337 }
6338
6339 /*
6340 * our range is contained completely within this map entry
6341 */
6342 if (dst_end <= entry->vme_end) {
6343 vm_map_unlock(dst_map);
6344 return KERN_SUCCESS;
6345 }
6346 /*
6347 * check that range specified is contiguous region
6348 */
6349 if ((next == vm_map_to_entry(dst_map)) ||
6350 (next->vme_start != entry->vme_end)) {
6351 vm_map_unlock(dst_map);
6352 return(KERN_INVALID_ADDRESS);
6353 }
6354
6355 /*
6356 * Check for permanent objects in the destination.
6357 */
6358 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6359 ((!entry->object.vm_object->internal) ||
6360 (entry->object.vm_object->true_share))) {
6361 if(encountered_sub_map) {
6362 vm_map_unlock(dst_map);
6363 return(KERN_FAILURE);
6364 }
6365 }
6366
6367
6368 entry = next;
6369 }/* for */
6370 vm_map_unlock(dst_map);
6371 return(KERN_SUCCESS);
6372 }
6373
6374 /*
6375 * Routine: vm_map_copy_overwrite
6376 *
6377 * Description:
6378 * Copy the memory described by the map copy
6379 * object (copy; returned by vm_map_copyin) onto
6380 * the specified destination region (dst_map, dst_addr).
6381 * The destination must be writeable.
6382 *
6383 * Unlike vm_map_copyout, this routine actually
6384 * writes over previously-mapped memory. If the
6385 * previous mapping was to a permanent (user-supplied)
6386 * memory object, it is preserved.
6387 *
6388 * The attributes (protection and inheritance) of the
6389 * destination region are preserved.
6390 *
6391 * If successful, consumes the copy object.
6392 * Otherwise, the caller is responsible for it.
6393 *
6394 * Implementation notes:
6395 * To overwrite aligned temporary virtual memory, it is
6396 * sufficient to remove the previous mapping and insert
6397 * the new copy. This replacement is done either on
6398 * the whole region (if no permanent virtual memory
6399 * objects are embedded in the destination region) or
6400 * in individual map entries.
6401 *
6402 * To overwrite permanent virtual memory , it is necessary
6403 * to copy each page, as the external memory management
6404 * interface currently does not provide any optimizations.
6405 *
6406 * Unaligned memory also has to be copied. It is possible
6407 * to use 'vm_trickery' to copy the aligned data. This is
6408 * not done but not hard to implement.
6409 *
6410 * Once a page of permanent memory has been overwritten,
6411 * it is impossible to interrupt this function; otherwise,
6412 * the call would be neither atomic nor location-independent.
6413 * The kernel-state portion of a user thread must be
6414 * interruptible.
6415 *
6416 * It may be expensive to forward all requests that might
6417 * overwrite permanent memory (vm_write, vm_copy) to
6418 * uninterruptible kernel threads. This routine may be
6419 * called by interruptible threads; however, success is
6420 * not guaranteed -- if the request cannot be performed
6421 * atomically and interruptibly, an error indication is
6422 * returned.
6423 */
6424
6425 static kern_return_t
6426 vm_map_copy_overwrite_nested(
6427 vm_map_t dst_map,
6428 vm_map_address_t dst_addr,
6429 vm_map_copy_t copy,
6430 boolean_t interruptible,
6431 pmap_t pmap,
6432 boolean_t discard_on_success)
6433 {
6434 vm_map_offset_t dst_end;
6435 vm_map_entry_t tmp_entry;
6436 vm_map_entry_t entry;
6437 kern_return_t kr;
6438 boolean_t aligned = TRUE;
6439 boolean_t contains_permanent_objects = FALSE;
6440 boolean_t encountered_sub_map = FALSE;
6441 vm_map_offset_t base_addr;
6442 vm_map_size_t copy_size;
6443 vm_map_size_t total_size;
6444
6445
6446 /*
6447 * Check for null copy object.
6448 */
6449
6450 if (copy == VM_MAP_COPY_NULL)
6451 return(KERN_SUCCESS);
6452
6453 /*
6454 * Check for special kernel buffer allocated
6455 * by new_ipc_kmsg_copyin.
6456 */
6457
6458 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6459 return(vm_map_copyout_kernel_buffer(
6460 dst_map, &dst_addr,
6461 copy, TRUE, discard_on_success));
6462 }
6463
6464 /*
6465 * Only works for entry lists at the moment. Will
6466 * support page lists later.
6467 */
6468
6469 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6470
6471 if (copy->size == 0) {
6472 if (discard_on_success)
6473 vm_map_copy_discard(copy);
6474 return(KERN_SUCCESS);
6475 }
6476
6477 /*
6478 * Verify that the destination is all writeable
6479 * initially. We have to trunc the destination
6480 * address and round the copy size or we'll end up
6481 * splitting entries in strange ways.
6482 */
6483
6484 if (!VM_MAP_PAGE_ALIGNED(copy->size,
6485 VM_MAP_PAGE_MASK(dst_map)) ||
6486 !VM_MAP_PAGE_ALIGNED(copy->offset,
6487 VM_MAP_PAGE_MASK(dst_map)) ||
6488 !VM_MAP_PAGE_ALIGNED(dst_addr,
6489 VM_MAP_PAGE_MASK(dst_map)))
6490 {
6491 aligned = FALSE;
6492 dst_end = vm_map_round_page(dst_addr + copy->size,
6493 VM_MAP_PAGE_MASK(dst_map));
6494 } else {
6495 dst_end = dst_addr + copy->size;
6496 }
6497
6498 vm_map_lock(dst_map);
6499
6500 /* LP64todo - remove this check when vm_map_commpage64()
6501 * no longer has to stuff in a map_entry for the commpage
6502 * above the map's max_offset.
6503 */
6504 if (dst_addr >= dst_map->max_offset) {
6505 vm_map_unlock(dst_map);
6506 return(KERN_INVALID_ADDRESS);
6507 }
6508
6509 start_pass_1:
6510 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6511 vm_map_unlock(dst_map);
6512 return(KERN_INVALID_ADDRESS);
6513 }
6514 vm_map_clip_start(dst_map,
6515 tmp_entry,
6516 vm_map_trunc_page(dst_addr,
6517 VM_MAP_PAGE_MASK(dst_map)));
6518 for (entry = tmp_entry;;) {
6519 vm_map_entry_t next = entry->vme_next;
6520
6521 while(entry->is_sub_map) {
6522 vm_map_offset_t sub_start;
6523 vm_map_offset_t sub_end;
6524 vm_map_offset_t local_end;
6525
6526 if (entry->in_transition) {
6527
6528 /*
6529 * Say that we are waiting, and wait for entry.
6530 */
6531 entry->needs_wakeup = TRUE;
6532 vm_map_entry_wait(dst_map, THREAD_UNINT);
6533
6534 goto start_pass_1;
6535 }
6536
6537 local_end = entry->vme_end;
6538 if (!(entry->needs_copy)) {
6539 /* if needs_copy we are a COW submap */
6540 /* in such a case we just replace so */
6541 /* there is no need for the follow- */
6542 /* ing check. */
6543 encountered_sub_map = TRUE;
6544 sub_start = entry->offset;
6545
6546 if(entry->vme_end < dst_end)
6547 sub_end = entry->vme_end;
6548 else
6549 sub_end = dst_end;
6550 sub_end -= entry->vme_start;
6551 sub_end += entry->offset;
6552 vm_map_unlock(dst_map);
6553
6554 kr = vm_map_overwrite_submap_recurse(
6555 entry->object.sub_map,
6556 sub_start,
6557 sub_end - sub_start);
6558 if(kr != KERN_SUCCESS)
6559 return kr;
6560 vm_map_lock(dst_map);
6561 }
6562
6563 if (dst_end <= entry->vme_end)
6564 goto start_overwrite;
6565 if(!vm_map_lookup_entry(dst_map, local_end,
6566 &entry)) {
6567 vm_map_unlock(dst_map);
6568 return(KERN_INVALID_ADDRESS);
6569 }
6570 next = entry->vme_next;
6571 }
6572
6573 if ( ! (entry->protection & VM_PROT_WRITE)) {
6574 vm_map_unlock(dst_map);
6575 return(KERN_PROTECTION_FAILURE);
6576 }
6577
6578 /*
6579 * If the entry is in transition, we must wait
6580 * for it to exit that state. Anything could happen
6581 * when we unlock the map, so start over.
6582 */
6583 if (entry->in_transition) {
6584
6585 /*
6586 * Say that we are waiting, and wait for entry.
6587 */
6588 entry->needs_wakeup = TRUE;
6589 vm_map_entry_wait(dst_map, THREAD_UNINT);
6590
6591 goto start_pass_1;
6592 }
6593
6594 /*
6595 * our range is contained completely within this map entry
6596 */
6597 if (dst_end <= entry->vme_end)
6598 break;
6599 /*
6600 * check that range specified is contiguous region
6601 */
6602 if ((next == vm_map_to_entry(dst_map)) ||
6603 (next->vme_start != entry->vme_end)) {
6604 vm_map_unlock(dst_map);
6605 return(KERN_INVALID_ADDRESS);
6606 }
6607
6608
6609 /*
6610 * Check for permanent objects in the destination.
6611 */
6612 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6613 ((!entry->object.vm_object->internal) ||
6614 (entry->object.vm_object->true_share))) {
6615 contains_permanent_objects = TRUE;
6616 }
6617
6618 entry = next;
6619 }/* for */
6620
6621 start_overwrite:
6622 /*
6623 * If there are permanent objects in the destination, then
6624 * the copy cannot be interrupted.
6625 */
6626
6627 if (interruptible && contains_permanent_objects) {
6628 vm_map_unlock(dst_map);
6629 return(KERN_FAILURE); /* XXX */
6630 }
6631
6632 /*
6633 *
6634 * Make a second pass, overwriting the data
6635 * At the beginning of each loop iteration,
6636 * the next entry to be overwritten is "tmp_entry"
6637 * (initially, the value returned from the lookup above),
6638 * and the starting address expected in that entry
6639 * is "start".
6640 */
6641
6642 total_size = copy->size;
6643 if(encountered_sub_map) {
6644 copy_size = 0;
6645 /* re-calculate tmp_entry since we've had the map */
6646 /* unlocked */
6647 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6648 vm_map_unlock(dst_map);
6649 return(KERN_INVALID_ADDRESS);
6650 }
6651 } else {
6652 copy_size = copy->size;
6653 }
6654
6655 base_addr = dst_addr;
6656 while(TRUE) {
6657 /* deconstruct the copy object and do in parts */
6658 /* only in sub_map, interruptable case */
6659 vm_map_entry_t copy_entry;
6660 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL;
6661 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL;
6662 int nentries;
6663 int remaining_entries = 0;
6664 vm_map_offset_t new_offset = 0;
6665
6666 for (entry = tmp_entry; copy_size == 0;) {
6667 vm_map_entry_t next;
6668
6669 next = entry->vme_next;
6670
6671 /* tmp_entry and base address are moved along */
6672 /* each time we encounter a sub-map. Otherwise */
6673 /* entry can outpase tmp_entry, and the copy_size */
6674 /* may reflect the distance between them */
6675 /* if the current entry is found to be in transition */
6676 /* we will start over at the beginning or the last */
6677 /* encounter of a submap as dictated by base_addr */
6678 /* we will zero copy_size accordingly. */
6679 if (entry->in_transition) {
6680 /*
6681 * Say that we are waiting, and wait for entry.
6682 */
6683 entry->needs_wakeup = TRUE;
6684 vm_map_entry_wait(dst_map, THREAD_UNINT);
6685
6686 if(!vm_map_lookup_entry(dst_map, base_addr,
6687 &tmp_entry)) {
6688 vm_map_unlock(dst_map);
6689 return(KERN_INVALID_ADDRESS);
6690 }
6691 copy_size = 0;
6692 entry = tmp_entry;
6693 continue;
6694 }
6695 if(entry->is_sub_map) {
6696 vm_map_offset_t sub_start;
6697 vm_map_offset_t sub_end;
6698 vm_map_offset_t local_end;
6699
6700 if (entry->needs_copy) {
6701 /* if this is a COW submap */
6702 /* just back the range with a */
6703 /* anonymous entry */
6704 if(entry->vme_end < dst_end)
6705 sub_end = entry->vme_end;
6706 else
6707 sub_end = dst_end;
6708 if(entry->vme_start < base_addr)
6709 sub_start = base_addr;
6710 else
6711 sub_start = entry->vme_start;
6712 vm_map_clip_end(
6713 dst_map, entry, sub_end);
6714 vm_map_clip_start(
6715 dst_map, entry, sub_start);
6716 assert(!entry->use_pmap);
6717 entry->is_sub_map = FALSE;
6718 vm_map_deallocate(
6719 entry->object.sub_map);
6720 entry->object.sub_map = NULL;
6721 entry->is_shared = FALSE;
6722 entry->needs_copy = FALSE;
6723 entry->offset = 0;
6724 /*
6725 * XXX FBDP
6726 * We should propagate the protections
6727 * of the submap entry here instead
6728 * of forcing them to VM_PROT_ALL...
6729 * Or better yet, we should inherit
6730 * the protection of the copy_entry.
6731 */
6732 entry->protection = VM_PROT_ALL;
6733 entry->max_protection = VM_PROT_ALL;
6734 entry->wired_count = 0;
6735 entry->user_wired_count = 0;
6736 if(entry->inheritance
6737 == VM_INHERIT_SHARE)
6738 entry->inheritance = VM_INHERIT_COPY;
6739 continue;
6740 }
6741 /* first take care of any non-sub_map */
6742 /* entries to send */
6743 if(base_addr < entry->vme_start) {
6744 /* stuff to send */
6745 copy_size =
6746 entry->vme_start - base_addr;
6747 break;
6748 }
6749 sub_start = entry->offset;
6750
6751 if(entry->vme_end < dst_end)
6752 sub_end = entry->vme_end;
6753 else
6754 sub_end = dst_end;
6755 sub_end -= entry->vme_start;
6756 sub_end += entry->offset;
6757 local_end = entry->vme_end;
6758 vm_map_unlock(dst_map);
6759 copy_size = sub_end - sub_start;
6760
6761 /* adjust the copy object */
6762 if (total_size > copy_size) {
6763 vm_map_size_t local_size = 0;
6764 vm_map_size_t entry_size;
6765
6766 nentries = 1;
6767 new_offset = copy->offset;
6768 copy_entry = vm_map_copy_first_entry(copy);
6769 while(copy_entry !=
6770 vm_map_copy_to_entry(copy)){
6771 entry_size = copy_entry->vme_end -
6772 copy_entry->vme_start;
6773 if((local_size < copy_size) &&
6774 ((local_size + entry_size)
6775 >= copy_size)) {
6776 vm_map_copy_clip_end(copy,
6777 copy_entry,
6778 copy_entry->vme_start +
6779 (copy_size - local_size));
6780 entry_size = copy_entry->vme_end -
6781 copy_entry->vme_start;
6782 local_size += entry_size;
6783 new_offset += entry_size;
6784 }
6785 if(local_size >= copy_size) {
6786 next_copy = copy_entry->vme_next;
6787 copy_entry->vme_next =
6788 vm_map_copy_to_entry(copy);
6789 previous_prev =
6790 copy->cpy_hdr.links.prev;
6791 copy->cpy_hdr.links.prev = copy_entry;
6792 copy->size = copy_size;
6793 remaining_entries =
6794 copy->cpy_hdr.nentries;
6795 remaining_entries -= nentries;
6796 copy->cpy_hdr.nentries = nentries;
6797 break;
6798 } else {
6799 local_size += entry_size;
6800 new_offset += entry_size;
6801 nentries++;
6802 }
6803 copy_entry = copy_entry->vme_next;
6804 }
6805 }
6806
6807 if((entry->use_pmap) && (pmap == NULL)) {
6808 kr = vm_map_copy_overwrite_nested(
6809 entry->object.sub_map,
6810 sub_start,
6811 copy,
6812 interruptible,
6813 entry->object.sub_map->pmap,
6814 TRUE);
6815 } else if (pmap != NULL) {
6816 kr = vm_map_copy_overwrite_nested(
6817 entry->object.sub_map,
6818 sub_start,
6819 copy,
6820 interruptible, pmap,
6821 TRUE);
6822 } else {
6823 kr = vm_map_copy_overwrite_nested(
6824 entry->object.sub_map,
6825 sub_start,
6826 copy,
6827 interruptible,
6828 dst_map->pmap,
6829 TRUE);
6830 }
6831 if(kr != KERN_SUCCESS) {
6832 if(next_copy != NULL) {
6833 copy->cpy_hdr.nentries +=
6834 remaining_entries;
6835 copy->cpy_hdr.links.prev->vme_next =
6836 next_copy;
6837 copy->cpy_hdr.links.prev
6838 = previous_prev;
6839 copy->size = total_size;
6840 }
6841 return kr;
6842 }
6843 if (dst_end <= local_end) {
6844 return(KERN_SUCCESS);
6845 }
6846 /* otherwise copy no longer exists, it was */
6847 /* destroyed after successful copy_overwrite */
6848 copy = (vm_map_copy_t)
6849 zalloc(vm_map_copy_zone);
6850 vm_map_copy_first_entry(copy) =
6851 vm_map_copy_last_entry(copy) =
6852 vm_map_copy_to_entry(copy);
6853 copy->type = VM_MAP_COPY_ENTRY_LIST;
6854 copy->offset = new_offset;
6855
6856 /*
6857 * XXX FBDP
6858 * this does not seem to deal with
6859 * the VM map store (R&B tree)
6860 */
6861
6862 total_size -= copy_size;
6863 copy_size = 0;
6864 /* put back remainder of copy in container */
6865 if(next_copy != NULL) {
6866 copy->cpy_hdr.nentries = remaining_entries;
6867 copy->cpy_hdr.links.next = next_copy;
6868 copy->cpy_hdr.links.prev = previous_prev;
6869 copy->size = total_size;
6870 next_copy->vme_prev =
6871 vm_map_copy_to_entry(copy);
6872 next_copy = NULL;
6873 }
6874 base_addr = local_end;
6875 vm_map_lock(dst_map);
6876 if(!vm_map_lookup_entry(dst_map,
6877 local_end, &tmp_entry)) {
6878 vm_map_unlock(dst_map);
6879 return(KERN_INVALID_ADDRESS);
6880 }
6881 entry = tmp_entry;
6882 continue;
6883 }
6884 if (dst_end <= entry->vme_end) {
6885 copy_size = dst_end - base_addr;
6886 break;
6887 }
6888
6889 if ((next == vm_map_to_entry(dst_map)) ||
6890 (next->vme_start != entry->vme_end)) {
6891 vm_map_unlock(dst_map);
6892 return(KERN_INVALID_ADDRESS);
6893 }
6894
6895 entry = next;
6896 }/* for */
6897
6898 next_copy = NULL;
6899 nentries = 1;
6900
6901 /* adjust the copy object */
6902 if (total_size > copy_size) {
6903 vm_map_size_t local_size = 0;
6904 vm_map_size_t entry_size;
6905
6906 new_offset = copy->offset;
6907 copy_entry = vm_map_copy_first_entry(copy);
6908 while(copy_entry != vm_map_copy_to_entry(copy)) {
6909 entry_size = copy_entry->vme_end -
6910 copy_entry->vme_start;
6911 if((local_size < copy_size) &&
6912 ((local_size + entry_size)
6913 >= copy_size)) {
6914 vm_map_copy_clip_end(copy, copy_entry,
6915 copy_entry->vme_start +
6916 (copy_size - local_size));
6917 entry_size = copy_entry->vme_end -
6918 copy_entry->vme_start;
6919 local_size += entry_size;
6920 new_offset += entry_size;
6921 }
6922 if(local_size >= copy_size) {
6923 next_copy = copy_entry->vme_next;
6924 copy_entry->vme_next =
6925 vm_map_copy_to_entry(copy);
6926 previous_prev =
6927 copy->cpy_hdr.links.prev;
6928 copy->cpy_hdr.links.prev = copy_entry;
6929 copy->size = copy_size;
6930 remaining_entries =
6931 copy->cpy_hdr.nentries;
6932 remaining_entries -= nentries;
6933 copy->cpy_hdr.nentries = nentries;
6934 break;
6935 } else {
6936 local_size += entry_size;
6937 new_offset += entry_size;
6938 nentries++;
6939 }
6940 copy_entry = copy_entry->vme_next;
6941 }
6942 }
6943
6944 if (aligned) {
6945 pmap_t local_pmap;
6946
6947 if(pmap)
6948 local_pmap = pmap;
6949 else
6950 local_pmap = dst_map->pmap;
6951
6952 if ((kr = vm_map_copy_overwrite_aligned(
6953 dst_map, tmp_entry, copy,
6954 base_addr, local_pmap)) != KERN_SUCCESS) {
6955 if(next_copy != NULL) {
6956 copy->cpy_hdr.nentries +=
6957 remaining_entries;
6958 copy->cpy_hdr.links.prev->vme_next =
6959 next_copy;
6960 copy->cpy_hdr.links.prev =
6961 previous_prev;
6962 copy->size += copy_size;
6963 }
6964 return kr;
6965 }
6966 vm_map_unlock(dst_map);
6967 } else {
6968 /*
6969 * Performance gain:
6970 *
6971 * if the copy and dst address are misaligned but the same
6972 * offset within the page we can copy_not_aligned the
6973 * misaligned parts and copy aligned the rest. If they are
6974 * aligned but len is unaligned we simply need to copy
6975 * the end bit unaligned. We'll need to split the misaligned
6976 * bits of the region in this case !
6977 */
6978 /* ALWAYS UNLOCKS THE dst_map MAP */
6979 kr = vm_map_copy_overwrite_unaligned(
6980 dst_map,
6981 tmp_entry,
6982 copy,
6983 base_addr,
6984 discard_on_success);
6985 if (kr != KERN_SUCCESS) {
6986 if(next_copy != NULL) {
6987 copy->cpy_hdr.nentries +=
6988 remaining_entries;
6989 copy->cpy_hdr.links.prev->vme_next =
6990 next_copy;
6991 copy->cpy_hdr.links.prev =
6992 previous_prev;
6993 copy->size += copy_size;
6994 }
6995 return kr;
6996 }
6997 }
6998 total_size -= copy_size;
6999 if(total_size == 0)
7000 break;
7001 base_addr += copy_size;
7002 copy_size = 0;
7003 copy->offset = new_offset;
7004 if(next_copy != NULL) {
7005 copy->cpy_hdr.nentries = remaining_entries;
7006 copy->cpy_hdr.links.next = next_copy;
7007 copy->cpy_hdr.links.prev = previous_prev;
7008 next_copy->vme_prev = vm_map_copy_to_entry(copy);
7009 copy->size = total_size;
7010 }
7011 vm_map_lock(dst_map);
7012 while(TRUE) {
7013 if (!vm_map_lookup_entry(dst_map,
7014 base_addr, &tmp_entry)) {
7015 vm_map_unlock(dst_map);
7016 return(KERN_INVALID_ADDRESS);
7017 }
7018 if (tmp_entry->in_transition) {
7019 entry->needs_wakeup = TRUE;
7020 vm_map_entry_wait(dst_map, THREAD_UNINT);
7021 } else {
7022 break;
7023 }
7024 }
7025 vm_map_clip_start(dst_map,
7026 tmp_entry,
7027 vm_map_trunc_page(base_addr,
7028 VM_MAP_PAGE_MASK(dst_map)));
7029
7030 entry = tmp_entry;
7031 } /* while */
7032
7033 /*
7034 * Throw away the vm_map_copy object
7035 */
7036 if (discard_on_success)
7037 vm_map_copy_discard(copy);
7038
7039 return(KERN_SUCCESS);
7040 }/* vm_map_copy_overwrite */
7041
7042 kern_return_t
7043 vm_map_copy_overwrite(
7044 vm_map_t dst_map,
7045 vm_map_offset_t dst_addr,
7046 vm_map_copy_t copy,
7047 boolean_t interruptible)
7048 {
7049 vm_map_size_t head_size, tail_size;
7050 vm_map_copy_t head_copy, tail_copy;
7051 vm_map_offset_t head_addr, tail_addr;
7052 vm_map_entry_t entry;
7053 kern_return_t kr;
7054
7055 head_size = 0;
7056 tail_size = 0;
7057 head_copy = NULL;
7058 tail_copy = NULL;
7059 head_addr = 0;
7060 tail_addr = 0;
7061
7062 if (interruptible ||
7063 copy == VM_MAP_COPY_NULL ||
7064 copy->type != VM_MAP_COPY_ENTRY_LIST) {
7065 /*
7066 * We can't split the "copy" map if we're interruptible
7067 * or if we don't have a "copy" map...
7068 */
7069 blunt_copy:
7070 return vm_map_copy_overwrite_nested(dst_map,
7071 dst_addr,
7072 copy,
7073 interruptible,
7074 (pmap_t) NULL,
7075 TRUE);
7076 }
7077
7078 if (copy->size < 3 * PAGE_SIZE) {
7079 /*
7080 * Too small to bother with optimizing...
7081 */
7082 goto blunt_copy;
7083 }
7084
7085 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7086 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7087 /*
7088 * Incompatible mis-alignment of source and destination...
7089 */
7090 goto blunt_copy;
7091 }
7092
7093 /*
7094 * Proper alignment or identical mis-alignment at the beginning.
7095 * Let's try and do a small unaligned copy first (if needed)
7096 * and then an aligned copy for the rest.
7097 */
7098 if (!page_aligned(dst_addr)) {
7099 head_addr = dst_addr;
7100 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7101 (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7102 }
7103 if (!page_aligned(copy->offset + copy->size)) {
7104 /*
7105 * Mis-alignment at the end.
7106 * Do an aligned copy up to the last page and
7107 * then an unaligned copy for the remaining bytes.
7108 */
7109 tail_size = ((copy->offset + copy->size) &
7110 VM_MAP_PAGE_MASK(dst_map));
7111 tail_addr = dst_addr + copy->size - tail_size;
7112 }
7113
7114 if (head_size + tail_size == copy->size) {
7115 /*
7116 * It's all unaligned, no optimization possible...
7117 */
7118 goto blunt_copy;
7119 }
7120
7121 /*
7122 * Can't optimize if there are any submaps in the
7123 * destination due to the way we free the "copy" map
7124 * progressively in vm_map_copy_overwrite_nested()
7125 * in that case.
7126 */
7127 vm_map_lock_read(dst_map);
7128 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7129 vm_map_unlock_read(dst_map);
7130 goto blunt_copy;
7131 }
7132 for (;
7133 (entry != vm_map_copy_to_entry(copy) &&
7134 entry->vme_start < dst_addr + copy->size);
7135 entry = entry->vme_next) {
7136 if (entry->is_sub_map) {
7137 vm_map_unlock_read(dst_map);
7138 goto blunt_copy;
7139 }
7140 }
7141 vm_map_unlock_read(dst_map);
7142
7143 if (head_size) {
7144 /*
7145 * Unaligned copy of the first "head_size" bytes, to reach
7146 * a page boundary.
7147 */
7148
7149 /*
7150 * Extract "head_copy" out of "copy".
7151 */
7152 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7153 vm_map_copy_first_entry(head_copy) =
7154 vm_map_copy_to_entry(head_copy);
7155 vm_map_copy_last_entry(head_copy) =
7156 vm_map_copy_to_entry(head_copy);
7157 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7158 head_copy->cpy_hdr.nentries = 0;
7159 head_copy->cpy_hdr.entries_pageable =
7160 copy->cpy_hdr.entries_pageable;
7161 vm_map_store_init(&head_copy->cpy_hdr);
7162
7163 head_copy->offset = copy->offset;
7164 head_copy->size = head_size;
7165
7166 copy->offset += head_size;
7167 copy->size -= head_size;
7168
7169 entry = vm_map_copy_first_entry(copy);
7170 vm_map_copy_clip_end(copy, entry, copy->offset);
7171 vm_map_copy_entry_unlink(copy, entry);
7172 vm_map_copy_entry_link(head_copy,
7173 vm_map_copy_to_entry(head_copy),
7174 entry);
7175
7176 /*
7177 * Do the unaligned copy.
7178 */
7179 kr = vm_map_copy_overwrite_nested(dst_map,
7180 head_addr,
7181 head_copy,
7182 interruptible,
7183 (pmap_t) NULL,
7184 FALSE);
7185 if (kr != KERN_SUCCESS)
7186 goto done;
7187 }
7188
7189 if (tail_size) {
7190 /*
7191 * Extract "tail_copy" out of "copy".
7192 */
7193 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7194 vm_map_copy_first_entry(tail_copy) =
7195 vm_map_copy_to_entry(tail_copy);
7196 vm_map_copy_last_entry(tail_copy) =
7197 vm_map_copy_to_entry(tail_copy);
7198 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7199 tail_copy->cpy_hdr.nentries = 0;
7200 tail_copy->cpy_hdr.entries_pageable =
7201 copy->cpy_hdr.entries_pageable;
7202 vm_map_store_init(&tail_copy->cpy_hdr);
7203
7204 tail_copy->offset = copy->offset + copy->size - tail_size;
7205 tail_copy->size = tail_size;
7206
7207 copy->size -= tail_size;
7208
7209 entry = vm_map_copy_last_entry(copy);
7210 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7211 entry = vm_map_copy_last_entry(copy);
7212 vm_map_copy_entry_unlink(copy, entry);
7213 vm_map_copy_entry_link(tail_copy,
7214 vm_map_copy_last_entry(tail_copy),
7215 entry);
7216 }
7217
7218 /*
7219 * Copy most (or possibly all) of the data.
7220 */
7221 kr = vm_map_copy_overwrite_nested(dst_map,
7222 dst_addr + head_size,
7223 copy,
7224 interruptible,
7225 (pmap_t) NULL,
7226 FALSE);
7227 if (kr != KERN_SUCCESS) {
7228 goto done;
7229 }
7230
7231 if (tail_size) {
7232 kr = vm_map_copy_overwrite_nested(dst_map,
7233 tail_addr,
7234 tail_copy,
7235 interruptible,
7236 (pmap_t) NULL,
7237 FALSE);
7238 }
7239
7240 done:
7241 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7242 if (kr == KERN_SUCCESS) {
7243 /*
7244 * Discard all the copy maps.
7245 */
7246 if (head_copy) {
7247 vm_map_copy_discard(head_copy);
7248 head_copy = NULL;
7249 }
7250 vm_map_copy_discard(copy);
7251 if (tail_copy) {
7252 vm_map_copy_discard(tail_copy);
7253 tail_copy = NULL;
7254 }
7255 } else {
7256 /*
7257 * Re-assemble the original copy map.
7258 */
7259 if (head_copy) {
7260 entry = vm_map_copy_first_entry(head_copy);
7261 vm_map_copy_entry_unlink(head_copy, entry);
7262 vm_map_copy_entry_link(copy,
7263 vm_map_copy_to_entry(copy),
7264 entry);
7265 copy->offset -= head_size;
7266 copy->size += head_size;
7267 vm_map_copy_discard(head_copy);
7268 head_copy = NULL;
7269 }
7270 if (tail_copy) {
7271 entry = vm_map_copy_last_entry(tail_copy);
7272 vm_map_copy_entry_unlink(tail_copy, entry);
7273 vm_map_copy_entry_link(copy,
7274 vm_map_copy_last_entry(copy),
7275 entry);
7276 copy->size += tail_size;
7277 vm_map_copy_discard(tail_copy);
7278 tail_copy = NULL;
7279 }
7280 }
7281 return kr;
7282 }
7283
7284
7285 /*
7286 * Routine: vm_map_copy_overwrite_unaligned [internal use only]
7287 *
7288 * Decription:
7289 * Physically copy unaligned data
7290 *
7291 * Implementation:
7292 * Unaligned parts of pages have to be physically copied. We use
7293 * a modified form of vm_fault_copy (which understands none-aligned
7294 * page offsets and sizes) to do the copy. We attempt to copy as
7295 * much memory in one go as possibly, however vm_fault_copy copies
7296 * within 1 memory object so we have to find the smaller of "amount left"
7297 * "source object data size" and "target object data size". With
7298 * unaligned data we don't need to split regions, therefore the source
7299 * (copy) object should be one map entry, the target range may be split
7300 * over multiple map entries however. In any event we are pessimistic
7301 * about these assumptions.
7302 *
7303 * Assumptions:
7304 * dst_map is locked on entry and is return locked on success,
7305 * unlocked on error.
7306 */
7307
7308 static kern_return_t
7309 vm_map_copy_overwrite_unaligned(
7310 vm_map_t dst_map,
7311 vm_map_entry_t entry,
7312 vm_map_copy_t copy,
7313 vm_map_offset_t start,
7314 boolean_t discard_on_success)
7315 {
7316 vm_map_entry_t copy_entry;
7317 vm_map_entry_t copy_entry_next;
7318 vm_map_version_t version;
7319 vm_object_t dst_object;
7320 vm_object_offset_t dst_offset;
7321 vm_object_offset_t src_offset;
7322 vm_object_offset_t entry_offset;
7323 vm_map_offset_t entry_end;
7324 vm_map_size_t src_size,
7325 dst_size,
7326 copy_size,
7327 amount_left;
7328 kern_return_t kr = KERN_SUCCESS;
7329
7330
7331 copy_entry = vm_map_copy_first_entry(copy);
7332
7333 vm_map_lock_write_to_read(dst_map);
7334
7335 src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7336 amount_left = copy->size;
7337 /*
7338 * unaligned so we never clipped this entry, we need the offset into
7339 * the vm_object not just the data.
7340 */
7341 while (amount_left > 0) {
7342
7343 if (entry == vm_map_to_entry(dst_map)) {
7344 vm_map_unlock_read(dst_map);
7345 return KERN_INVALID_ADDRESS;
7346 }
7347
7348 /* "start" must be within the current map entry */
7349 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7350
7351 dst_offset = start - entry->vme_start;
7352
7353 dst_size = entry->vme_end - start;
7354
7355 src_size = copy_entry->vme_end -
7356 (copy_entry->vme_start + src_offset);
7357
7358 if (dst_size < src_size) {
7359 /*
7360 * we can only copy dst_size bytes before
7361 * we have to get the next destination entry
7362 */
7363 copy_size = dst_size;
7364 } else {
7365 /*
7366 * we can only copy src_size bytes before
7367 * we have to get the next source copy entry
7368 */
7369 copy_size = src_size;
7370 }
7371
7372 if (copy_size > amount_left) {
7373 copy_size = amount_left;
7374 }
7375 /*
7376 * Entry needs copy, create a shadow shadow object for
7377 * Copy on write region.
7378 */
7379 if (entry->needs_copy &&
7380 ((entry->protection & VM_PROT_WRITE) != 0))
7381 {
7382 if (vm_map_lock_read_to_write(dst_map)) {
7383 vm_map_lock_read(dst_map);
7384 goto RetryLookup;
7385 }
7386 vm_object_shadow(&entry->object.vm_object,
7387 &entry->offset,
7388 (vm_map_size_t)(entry->vme_end
7389 - entry->vme_start));
7390 entry->needs_copy = FALSE;
7391 vm_map_lock_write_to_read(dst_map);
7392 }
7393 dst_object = entry->object.vm_object;
7394 /*
7395 * unlike with the virtual (aligned) copy we're going
7396 * to fault on it therefore we need a target object.
7397 */
7398 if (dst_object == VM_OBJECT_NULL) {
7399 if (vm_map_lock_read_to_write(dst_map)) {
7400 vm_map_lock_read(dst_map);
7401 goto RetryLookup;
7402 }
7403 dst_object = vm_object_allocate((vm_map_size_t)
7404 entry->vme_end - entry->vme_start);
7405 entry->object.vm_object = dst_object;
7406 entry->offset = 0;
7407 assert(entry->use_pmap);
7408 vm_map_lock_write_to_read(dst_map);
7409 }
7410 /*
7411 * Take an object reference and unlock map. The "entry" may
7412 * disappear or change when the map is unlocked.
7413 */
7414 vm_object_reference(dst_object);
7415 version.main_timestamp = dst_map->timestamp;
7416 entry_offset = entry->offset;
7417 entry_end = entry->vme_end;
7418 vm_map_unlock_read(dst_map);
7419 /*
7420 * Copy as much as possible in one pass
7421 */
7422 kr = vm_fault_copy(
7423 copy_entry->object.vm_object,
7424 copy_entry->offset + src_offset,
7425 &copy_size,
7426 dst_object,
7427 entry_offset + dst_offset,
7428 dst_map,
7429 &version,
7430 THREAD_UNINT );
7431
7432 start += copy_size;
7433 src_offset += copy_size;
7434 amount_left -= copy_size;
7435 /*
7436 * Release the object reference
7437 */
7438 vm_object_deallocate(dst_object);
7439 /*
7440 * If a hard error occurred, return it now
7441 */
7442 if (kr != KERN_SUCCESS)
7443 return kr;
7444
7445 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7446 || amount_left == 0)
7447 {
7448 /*
7449 * all done with this copy entry, dispose.
7450 */
7451 copy_entry_next = copy_entry->vme_next;
7452
7453 if (discard_on_success) {
7454 vm_map_copy_entry_unlink(copy, copy_entry);
7455 assert(!copy_entry->is_sub_map);
7456 vm_object_deallocate(
7457 copy_entry->object.vm_object);
7458 vm_map_copy_entry_dispose(copy, copy_entry);
7459 }
7460
7461 if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7462 amount_left) {
7463 /*
7464 * not finished copying but run out of source
7465 */
7466 return KERN_INVALID_ADDRESS;
7467 }
7468
7469 copy_entry = copy_entry_next;
7470
7471 src_offset = 0;
7472 }
7473
7474 if (amount_left == 0)
7475 return KERN_SUCCESS;
7476
7477 vm_map_lock_read(dst_map);
7478 if (version.main_timestamp == dst_map->timestamp) {
7479 if (start == entry_end) {
7480 /*
7481 * destination region is split. Use the version
7482 * information to avoid a lookup in the normal
7483 * case.
7484 */
7485 entry = entry->vme_next;
7486 /*
7487 * should be contiguous. Fail if we encounter
7488 * a hole in the destination.
7489 */
7490 if (start != entry->vme_start) {
7491 vm_map_unlock_read(dst_map);
7492 return KERN_INVALID_ADDRESS ;
7493 }
7494 }
7495 } else {
7496 /*
7497 * Map version check failed.
7498 * we must lookup the entry because somebody
7499 * might have changed the map behind our backs.
7500 */
7501 RetryLookup:
7502 if (!vm_map_lookup_entry(dst_map, start, &entry))
7503 {
7504 vm_map_unlock_read(dst_map);
7505 return KERN_INVALID_ADDRESS ;
7506 }
7507 }
7508 }/* while */
7509
7510 return KERN_SUCCESS;
7511 }/* vm_map_copy_overwrite_unaligned */
7512
7513 /*
7514 * Routine: vm_map_copy_overwrite_aligned [internal use only]
7515 *
7516 * Description:
7517 * Does all the vm_trickery possible for whole pages.
7518 *
7519 * Implementation:
7520 *
7521 * If there are no permanent objects in the destination,
7522 * and the source and destination map entry zones match,
7523 * and the destination map entry is not shared,
7524 * then the map entries can be deleted and replaced
7525 * with those from the copy. The following code is the
7526 * basic idea of what to do, but there are lots of annoying
7527 * little details about getting protection and inheritance
7528 * right. Should add protection, inheritance, and sharing checks
7529 * to the above pass and make sure that no wiring is involved.
7530 */
7531
7532 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7533 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7534 int vm_map_copy_overwrite_aligned_src_large = 0;
7535
7536 static kern_return_t
7537 vm_map_copy_overwrite_aligned(
7538 vm_map_t dst_map,
7539 vm_map_entry_t tmp_entry,
7540 vm_map_copy_t copy,
7541 vm_map_offset_t start,
7542 __unused pmap_t pmap)
7543 {
7544 vm_object_t object;
7545 vm_map_entry_t copy_entry;
7546 vm_map_size_t copy_size;
7547 vm_map_size_t size;
7548 vm_map_entry_t entry;
7549
7550 while ((copy_entry = vm_map_copy_first_entry(copy))
7551 != vm_map_copy_to_entry(copy))
7552 {
7553 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7554
7555 entry = tmp_entry;
7556 if (entry->is_sub_map) {
7557 /* unnested when clipped earlier */
7558 assert(!entry->use_pmap);
7559 }
7560 if (entry == vm_map_to_entry(dst_map)) {
7561 vm_map_unlock(dst_map);
7562 return KERN_INVALID_ADDRESS;
7563 }
7564 size = (entry->vme_end - entry->vme_start);
7565 /*
7566 * Make sure that no holes popped up in the
7567 * address map, and that the protection is
7568 * still valid, in case the map was unlocked
7569 * earlier.
7570 */
7571
7572 if ((entry->vme_start != start) || ((entry->is_sub_map)
7573 && !entry->needs_copy)) {
7574 vm_map_unlock(dst_map);
7575 return(KERN_INVALID_ADDRESS);
7576 }
7577 assert(entry != vm_map_to_entry(dst_map));
7578
7579 /*
7580 * Check protection again
7581 */
7582
7583 if ( ! (entry->protection & VM_PROT_WRITE)) {
7584 vm_map_unlock(dst_map);
7585 return(KERN_PROTECTION_FAILURE);
7586 }
7587
7588 /*
7589 * Adjust to source size first
7590 */
7591
7592 if (copy_size < size) {
7593 if (entry->map_aligned &&
7594 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
7595 VM_MAP_PAGE_MASK(dst_map))) {
7596 /* no longer map-aligned */
7597 entry->map_aligned = FALSE;
7598 }
7599 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7600 size = copy_size;
7601 }
7602
7603 /*
7604 * Adjust to destination size
7605 */
7606
7607 if (size < copy_size) {
7608 vm_map_copy_clip_end(copy, copy_entry,
7609 copy_entry->vme_start + size);
7610 copy_size = size;
7611 }
7612
7613 assert((entry->vme_end - entry->vme_start) == size);
7614 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7615 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7616
7617 /*
7618 * If the destination contains temporary unshared memory,
7619 * we can perform the copy by throwing it away and
7620 * installing the source data.
7621 */
7622
7623 object = entry->object.vm_object;
7624 if ((!entry->is_shared &&
7625 ((object == VM_OBJECT_NULL) ||
7626 (object->internal && !object->true_share))) ||
7627 entry->needs_copy) {
7628 vm_object_t old_object = entry->object.vm_object;
7629 vm_object_offset_t old_offset = entry->offset;
7630 vm_object_offset_t offset;
7631
7632 /*
7633 * Ensure that the source and destination aren't
7634 * identical
7635 */
7636 if (old_object == copy_entry->object.vm_object &&
7637 old_offset == copy_entry->offset) {
7638 vm_map_copy_entry_unlink(copy, copy_entry);
7639 vm_map_copy_entry_dispose(copy, copy_entry);
7640
7641 if (old_object != VM_OBJECT_NULL)
7642 vm_object_deallocate(old_object);
7643
7644 start = tmp_entry->vme_end;
7645 tmp_entry = tmp_entry->vme_next;
7646 continue;
7647 }
7648
7649 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
7650 #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */
7651 if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7652 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7653 copy_size <= __TRADEOFF1_COPY_SIZE) {
7654 /*
7655 * Virtual vs. Physical copy tradeoff #1.
7656 *
7657 * Copying only a few pages out of a large
7658 * object: do a physical copy instead of
7659 * a virtual copy, to avoid possibly keeping
7660 * the entire large object alive because of
7661 * those few copy-on-write pages.
7662 */
7663 vm_map_copy_overwrite_aligned_src_large++;
7664 goto slow_copy;
7665 }
7666
7667 if (entry->alias >= VM_MEMORY_MALLOC &&
7668 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7669 vm_object_t new_object, new_shadow;
7670
7671 /*
7672 * We're about to map something over a mapping
7673 * established by malloc()...
7674 */
7675 new_object = copy_entry->object.vm_object;
7676 if (new_object != VM_OBJECT_NULL) {
7677 vm_object_lock_shared(new_object);
7678 }
7679 while (new_object != VM_OBJECT_NULL &&
7680 !new_object->true_share &&
7681 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7682 new_object->internal) {
7683 new_shadow = new_object->shadow;
7684 if (new_shadow == VM_OBJECT_NULL) {
7685 break;
7686 }
7687 vm_object_lock_shared(new_shadow);
7688 vm_object_unlock(new_object);
7689 new_object = new_shadow;
7690 }
7691 if (new_object != VM_OBJECT_NULL) {
7692 if (!new_object->internal) {
7693 /*
7694 * The new mapping is backed
7695 * by an external object. We
7696 * don't want malloc'ed memory
7697 * to be replaced with such a
7698 * non-anonymous mapping, so
7699 * let's go off the optimized
7700 * path...
7701 */
7702 vm_map_copy_overwrite_aligned_src_not_internal++;
7703 vm_object_unlock(new_object);
7704 goto slow_copy;
7705 }
7706 if (new_object->true_share ||
7707 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7708 /*
7709 * Same if there's a "true_share"
7710 * object in the shadow chain, or
7711 * an object with a non-default
7712 * (SYMMETRIC) copy strategy.
7713 */
7714 vm_map_copy_overwrite_aligned_src_not_symmetric++;
7715 vm_object_unlock(new_object);
7716 goto slow_copy;
7717 }
7718 vm_object_unlock(new_object);
7719 }
7720 /*
7721 * The new mapping is still backed by
7722 * anonymous (internal) memory, so it's
7723 * OK to substitute it for the original
7724 * malloc() mapping.
7725 */
7726 }
7727
7728 if (old_object != VM_OBJECT_NULL) {
7729 if(entry->is_sub_map) {
7730 if(entry->use_pmap) {
7731 #ifndef NO_NESTED_PMAP
7732 pmap_unnest(dst_map->pmap,
7733 (addr64_t)entry->vme_start,
7734 entry->vme_end - entry->vme_start);
7735 #endif /* NO_NESTED_PMAP */
7736 if(dst_map->mapped_in_other_pmaps) {
7737 /* clean up parent */
7738 /* map/maps */
7739 vm_map_submap_pmap_clean(
7740 dst_map, entry->vme_start,
7741 entry->vme_end,
7742 entry->object.sub_map,
7743 entry->offset);
7744 }
7745 } else {
7746 vm_map_submap_pmap_clean(
7747 dst_map, entry->vme_start,
7748 entry->vme_end,
7749 entry->object.sub_map,
7750 entry->offset);
7751 }
7752 vm_map_deallocate(
7753 entry->object.sub_map);
7754 } else {
7755 if(dst_map->mapped_in_other_pmaps) {
7756 vm_object_pmap_protect_options(
7757 entry->object.vm_object,
7758 entry->offset,
7759 entry->vme_end
7760 - entry->vme_start,
7761 PMAP_NULL,
7762 entry->vme_start,
7763 VM_PROT_NONE,
7764 PMAP_OPTIONS_REMOVE);
7765 } else {
7766 pmap_remove_options(
7767 dst_map->pmap,
7768 (addr64_t)(entry->vme_start),
7769 (addr64_t)(entry->vme_end),
7770 PMAP_OPTIONS_REMOVE);
7771 }
7772 vm_object_deallocate(old_object);
7773 }
7774 }
7775
7776 entry->is_sub_map = FALSE;
7777 entry->object = copy_entry->object;
7778 object = entry->object.vm_object;
7779 entry->needs_copy = copy_entry->needs_copy;
7780 entry->wired_count = 0;
7781 entry->user_wired_count = 0;
7782 offset = entry->offset = copy_entry->offset;
7783
7784 vm_map_copy_entry_unlink(copy, copy_entry);
7785 vm_map_copy_entry_dispose(copy, copy_entry);
7786
7787 /*
7788 * we could try to push pages into the pmap at this point, BUT
7789 * this optimization only saved on average 2 us per page if ALL
7790 * the pages in the source were currently mapped
7791 * and ALL the pages in the dest were touched, if there were fewer
7792 * than 2/3 of the pages touched, this optimization actually cost more cycles
7793 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7794 */
7795
7796 /*
7797 * Set up for the next iteration. The map
7798 * has not been unlocked, so the next
7799 * address should be at the end of this
7800 * entry, and the next map entry should be
7801 * the one following it.
7802 */
7803
7804 start = tmp_entry->vme_end;
7805 tmp_entry = tmp_entry->vme_next;
7806 } else {
7807 vm_map_version_t version;
7808 vm_object_t dst_object;
7809 vm_object_offset_t dst_offset;
7810 kern_return_t r;
7811
7812 slow_copy:
7813 if (entry->needs_copy) {
7814 vm_object_shadow(&entry->object.vm_object,
7815 &entry->offset,
7816 (entry->vme_end -
7817 entry->vme_start));
7818 entry->needs_copy = FALSE;
7819 }
7820
7821 dst_object = entry->object.vm_object;
7822 dst_offset = entry->offset;
7823
7824 /*
7825 * Take an object reference, and record
7826 * the map version information so that the
7827 * map can be safely unlocked.
7828 */
7829
7830 if (dst_object == VM_OBJECT_NULL) {
7831 /*
7832 * We would usually have just taken the
7833 * optimized path above if the destination
7834 * object has not been allocated yet. But we
7835 * now disable that optimization if the copy
7836 * entry's object is not backed by anonymous
7837 * memory to avoid replacing malloc'ed
7838 * (i.e. re-usable) anonymous memory with a
7839 * not-so-anonymous mapping.
7840 * So we have to handle this case here and
7841 * allocate a new VM object for this map entry.
7842 */
7843 dst_object = vm_object_allocate(
7844 entry->vme_end - entry->vme_start);
7845 dst_offset = 0;
7846 entry->object.vm_object = dst_object;
7847 entry->offset = dst_offset;
7848 assert(entry->use_pmap);
7849
7850 }
7851
7852 vm_object_reference(dst_object);
7853
7854 /* account for unlock bumping up timestamp */
7855 version.main_timestamp = dst_map->timestamp + 1;
7856
7857 vm_map_unlock(dst_map);
7858
7859 /*
7860 * Copy as much as possible in one pass
7861 */
7862
7863 copy_size = size;
7864 r = vm_fault_copy(
7865 copy_entry->object.vm_object,
7866 copy_entry->offset,
7867 &copy_size,
7868 dst_object,
7869 dst_offset,
7870 dst_map,
7871 &version,
7872 THREAD_UNINT );
7873
7874 /*
7875 * Release the object reference
7876 */
7877
7878 vm_object_deallocate(dst_object);
7879
7880 /*
7881 * If a hard error occurred, return it now
7882 */
7883
7884 if (r != KERN_SUCCESS)
7885 return(r);
7886
7887 if (copy_size != 0) {
7888 /*
7889 * Dispose of the copied region
7890 */
7891
7892 vm_map_copy_clip_end(copy, copy_entry,
7893 copy_entry->vme_start + copy_size);
7894 vm_map_copy_entry_unlink(copy, copy_entry);
7895 vm_object_deallocate(copy_entry->object.vm_object);
7896 vm_map_copy_entry_dispose(copy, copy_entry);
7897 }
7898
7899 /*
7900 * Pick up in the destination map where we left off.
7901 *
7902 * Use the version information to avoid a lookup
7903 * in the normal case.
7904 */
7905
7906 start += copy_size;
7907 vm_map_lock(dst_map);
7908 if (version.main_timestamp == dst_map->timestamp &&
7909 copy_size != 0) {
7910 /* We can safely use saved tmp_entry value */
7911
7912 if (tmp_entry->map_aligned &&
7913 !VM_MAP_PAGE_ALIGNED(
7914 start,
7915 VM_MAP_PAGE_MASK(dst_map))) {
7916 /* no longer map-aligned */
7917 tmp_entry->map_aligned = FALSE;
7918 }
7919 vm_map_clip_end(dst_map, tmp_entry, start);
7920 tmp_entry = tmp_entry->vme_next;
7921 } else {
7922 /* Must do lookup of tmp_entry */
7923
7924 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7925 vm_map_unlock(dst_map);
7926 return(KERN_INVALID_ADDRESS);
7927 }
7928 if (tmp_entry->map_aligned &&
7929 !VM_MAP_PAGE_ALIGNED(
7930 start,
7931 VM_MAP_PAGE_MASK(dst_map))) {
7932 /* no longer map-aligned */
7933 tmp_entry->map_aligned = FALSE;
7934 }
7935 vm_map_clip_start(dst_map, tmp_entry, start);
7936 }
7937 }
7938 }/* while */
7939
7940 return(KERN_SUCCESS);
7941 }/* vm_map_copy_overwrite_aligned */
7942
7943 /*
7944 * Routine: vm_map_copyin_kernel_buffer [internal use only]
7945 *
7946 * Description:
7947 * Copy in data to a kernel buffer from space in the
7948 * source map. The original space may be optionally
7949 * deallocated.
7950 *
7951 * If successful, returns a new copy object.
7952 */
7953 static kern_return_t
7954 vm_map_copyin_kernel_buffer(
7955 vm_map_t src_map,
7956 vm_map_offset_t src_addr,
7957 vm_map_size_t len,
7958 boolean_t src_destroy,
7959 vm_map_copy_t *copy_result)
7960 {
7961 kern_return_t kr;
7962 vm_map_copy_t copy;
7963 vm_size_t kalloc_size;
7964
7965 if ((vm_size_t) len != len) {
7966 /* "len" is too big and doesn't fit in a "vm_size_t" */
7967 return KERN_RESOURCE_SHORTAGE;
7968 }
7969 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7970 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7971
7972 copy = (vm_map_copy_t) kalloc(kalloc_size);
7973 if (copy == VM_MAP_COPY_NULL) {
7974 return KERN_RESOURCE_SHORTAGE;
7975 }
7976 copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7977 copy->size = len;
7978 copy->offset = 0;
7979 copy->cpy_kdata = (void *) (copy + 1);
7980 copy->cpy_kalloc_size = kalloc_size;
7981
7982 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7983 if (kr != KERN_SUCCESS) {
7984 kfree(copy, kalloc_size);
7985 return kr;
7986 }
7987 if (src_destroy) {
7988 (void) vm_map_remove(
7989 src_map,
7990 vm_map_trunc_page(src_addr,
7991 VM_MAP_PAGE_MASK(src_map)),
7992 vm_map_round_page(src_addr + len,
7993 VM_MAP_PAGE_MASK(src_map)),
7994 (VM_MAP_REMOVE_INTERRUPTIBLE |
7995 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7996 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
7997 }
7998 *copy_result = copy;
7999 return KERN_SUCCESS;
8000 }
8001
8002 /*
8003 * Routine: vm_map_copyout_kernel_buffer [internal use only]
8004 *
8005 * Description:
8006 * Copy out data from a kernel buffer into space in the
8007 * destination map. The space may be otpionally dynamically
8008 * allocated.
8009 *
8010 * If successful, consumes the copy object.
8011 * Otherwise, the caller is responsible for it.
8012 */
8013 static int vm_map_copyout_kernel_buffer_failures = 0;
8014 static kern_return_t
8015 vm_map_copyout_kernel_buffer(
8016 vm_map_t map,
8017 vm_map_address_t *addr, /* IN/OUT */
8018 vm_map_copy_t copy,
8019 boolean_t overwrite,
8020 boolean_t consume_on_success)
8021 {
8022 kern_return_t kr = KERN_SUCCESS;
8023 thread_t thread = current_thread();
8024
8025 if (!overwrite) {
8026
8027 /*
8028 * Allocate space in the target map for the data
8029 */
8030 *addr = 0;
8031 kr = vm_map_enter(map,
8032 addr,
8033 vm_map_round_page(copy->size,
8034 VM_MAP_PAGE_MASK(map)),
8035 (vm_map_offset_t) 0,
8036 VM_FLAGS_ANYWHERE,
8037 VM_OBJECT_NULL,
8038 (vm_object_offset_t) 0,
8039 FALSE,
8040 VM_PROT_DEFAULT,
8041 VM_PROT_ALL,
8042 VM_INHERIT_DEFAULT);
8043 if (kr != KERN_SUCCESS)
8044 return kr;
8045 }
8046
8047 /*
8048 * Copyout the data from the kernel buffer to the target map.
8049 */
8050 if (thread->map == map) {
8051
8052 /*
8053 * If the target map is the current map, just do
8054 * the copy.
8055 */
8056 assert((vm_size_t) copy->size == copy->size);
8057 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8058 kr = KERN_INVALID_ADDRESS;
8059 }
8060 }
8061 else {
8062 vm_map_t oldmap;
8063
8064 /*
8065 * If the target map is another map, assume the
8066 * target's address space identity for the duration
8067 * of the copy.
8068 */
8069 vm_map_reference(map);
8070 oldmap = vm_map_switch(map);
8071
8072 assert((vm_size_t) copy->size == copy->size);
8073 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8074 vm_map_copyout_kernel_buffer_failures++;
8075 kr = KERN_INVALID_ADDRESS;
8076 }
8077
8078 (void) vm_map_switch(oldmap);
8079 vm_map_deallocate(map);
8080 }
8081
8082 if (kr != KERN_SUCCESS) {
8083 /* the copy failed, clean up */
8084 if (!overwrite) {
8085 /*
8086 * Deallocate the space we allocated in the target map.
8087 */
8088 (void) vm_map_remove(
8089 map,
8090 vm_map_trunc_page(*addr,
8091 VM_MAP_PAGE_MASK(map)),
8092 vm_map_round_page((*addr +
8093 vm_map_round_page(copy->size,
8094 VM_MAP_PAGE_MASK(map))),
8095 VM_MAP_PAGE_MASK(map)),
8096 VM_MAP_NO_FLAGS);
8097 *addr = 0;
8098 }
8099 } else {
8100 /* copy was successful, dicard the copy structure */
8101 if (consume_on_success) {
8102 kfree(copy, copy->cpy_kalloc_size);
8103 }
8104 }
8105
8106 return kr;
8107 }
8108
8109 /*
8110 * Macro: vm_map_copy_insert
8111 *
8112 * Description:
8113 * Link a copy chain ("copy") into a map at the
8114 * specified location (after "where").
8115 * Side effects:
8116 * The copy chain is destroyed.
8117 * Warning:
8118 * The arguments are evaluated multiple times.
8119 */
8120 #define vm_map_copy_insert(map, where, copy) \
8121 MACRO_BEGIN \
8122 vm_map_store_copy_insert(map, where, copy); \
8123 zfree(vm_map_copy_zone, copy); \
8124 MACRO_END
8125
8126 void
8127 vm_map_copy_remap(
8128 vm_map_t map,
8129 vm_map_entry_t where,
8130 vm_map_copy_t copy,
8131 vm_map_offset_t adjustment,
8132 vm_prot_t cur_prot,
8133 vm_prot_t max_prot,
8134 vm_inherit_t inheritance)
8135 {
8136 vm_map_entry_t copy_entry, new_entry;
8137
8138 for (copy_entry = vm_map_copy_first_entry(copy);
8139 copy_entry != vm_map_copy_to_entry(copy);
8140 copy_entry = copy_entry->vme_next) {
8141 /* get a new VM map entry for the map */
8142 new_entry = vm_map_entry_create(map,
8143 !map->hdr.entries_pageable);
8144 /* copy the "copy entry" to the new entry */
8145 vm_map_entry_copy(new_entry, copy_entry);
8146 /* adjust "start" and "end" */
8147 new_entry->vme_start += adjustment;
8148 new_entry->vme_end += adjustment;
8149 /* clear some attributes */
8150 new_entry->inheritance = inheritance;
8151 new_entry->protection = cur_prot;
8152 new_entry->max_protection = max_prot;
8153 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8154 /* take an extra reference on the entry's "object" */
8155 if (new_entry->is_sub_map) {
8156 assert(!new_entry->use_pmap); /* not nested */
8157 vm_map_lock(new_entry->object.sub_map);
8158 vm_map_reference(new_entry->object.sub_map);
8159 vm_map_unlock(new_entry->object.sub_map);
8160 } else {
8161 vm_object_reference(new_entry->object.vm_object);
8162 }
8163 /* insert the new entry in the map */
8164 vm_map_store_entry_link(map, where, new_entry);
8165 /* continue inserting the "copy entries" after the new entry */
8166 where = new_entry;
8167 }
8168 }
8169
8170 /*
8171 * Routine: vm_map_copyout
8172 *
8173 * Description:
8174 * Copy out a copy chain ("copy") into newly-allocated
8175 * space in the destination map.
8176 *
8177 * If successful, consumes the copy object.
8178 * Otherwise, the caller is responsible for it.
8179 */
8180
8181 kern_return_t
8182 vm_map_copyout(
8183 vm_map_t dst_map,
8184 vm_map_address_t *dst_addr, /* OUT */
8185 vm_map_copy_t copy)
8186 {
8187 return vm_map_copyout_internal(dst_map, dst_addr, copy,
8188 TRUE, /* consume_on_success */
8189 VM_PROT_DEFAULT,
8190 VM_PROT_ALL,
8191 VM_INHERIT_DEFAULT);
8192 }
8193
8194 kern_return_t
8195 vm_map_copyout_internal(
8196 vm_map_t dst_map,
8197 vm_map_address_t *dst_addr, /* OUT */
8198 vm_map_copy_t copy,
8199 boolean_t consume_on_success,
8200 vm_prot_t cur_protection,
8201 vm_prot_t max_protection,
8202 vm_inherit_t inheritance)
8203 {
8204 vm_map_size_t size;
8205 vm_map_size_t adjustment;
8206 vm_map_offset_t start;
8207 vm_object_offset_t vm_copy_start;
8208 vm_map_entry_t last;
8209 vm_map_entry_t entry;
8210
8211 /*
8212 * Check for null copy object.
8213 */
8214
8215 if (copy == VM_MAP_COPY_NULL) {
8216 *dst_addr = 0;
8217 return(KERN_SUCCESS);
8218 }
8219
8220 /*
8221 * Check for special copy object, created
8222 * by vm_map_copyin_object.
8223 */
8224
8225 if (copy->type == VM_MAP_COPY_OBJECT) {
8226 vm_object_t object = copy->cpy_object;
8227 kern_return_t kr;
8228 vm_object_offset_t offset;
8229
8230 offset = vm_object_trunc_page(copy->offset);
8231 size = vm_map_round_page((copy->size +
8232 (vm_map_size_t)(copy->offset -
8233 offset)),
8234 VM_MAP_PAGE_MASK(dst_map));
8235 *dst_addr = 0;
8236 kr = vm_map_enter(dst_map, dst_addr, size,
8237 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8238 object, offset, FALSE,
8239 VM_PROT_DEFAULT, VM_PROT_ALL,
8240 VM_INHERIT_DEFAULT);
8241 if (kr != KERN_SUCCESS)
8242 return(kr);
8243 /* Account for non-pagealigned copy object */
8244 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8245 if (consume_on_success)
8246 zfree(vm_map_copy_zone, copy);
8247 return(KERN_SUCCESS);
8248 }
8249
8250 /*
8251 * Check for special kernel buffer allocated
8252 * by new_ipc_kmsg_copyin.
8253 */
8254
8255 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8256 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8257 copy, FALSE,
8258 consume_on_success);
8259 }
8260
8261
8262 /*
8263 * Find space for the data
8264 */
8265
8266 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8267 VM_MAP_COPY_PAGE_MASK(copy));
8268 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8269 VM_MAP_COPY_PAGE_MASK(copy))
8270 - vm_copy_start;
8271
8272
8273 StartAgain: ;
8274
8275 vm_map_lock(dst_map);
8276 if( dst_map->disable_vmentry_reuse == TRUE) {
8277 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8278 last = entry;
8279 } else {
8280 assert(first_free_is_valid(dst_map));
8281 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8282 vm_map_min(dst_map) : last->vme_end;
8283 start = vm_map_round_page(start,
8284 VM_MAP_PAGE_MASK(dst_map));
8285 }
8286
8287 while (TRUE) {
8288 vm_map_entry_t next = last->vme_next;
8289 vm_map_offset_t end = start + size;
8290
8291 if ((end > dst_map->max_offset) || (end < start)) {
8292 if (dst_map->wait_for_space) {
8293 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8294 assert_wait((event_t) dst_map,
8295 THREAD_INTERRUPTIBLE);
8296 vm_map_unlock(dst_map);
8297 thread_block(THREAD_CONTINUE_NULL);
8298 goto StartAgain;
8299 }
8300 }
8301 vm_map_unlock(dst_map);
8302 return(KERN_NO_SPACE);
8303 }
8304
8305 if ((next == vm_map_to_entry(dst_map)) ||
8306 (next->vme_start >= end))
8307 break;
8308
8309 last = next;
8310 start = last->vme_end;
8311 start = vm_map_round_page(start,
8312 VM_MAP_PAGE_MASK(dst_map));
8313 }
8314
8315 adjustment = start - vm_copy_start;
8316 if (! consume_on_success) {
8317 /*
8318 * We're not allowed to consume "copy", so we'll have to
8319 * copy its map entries into the destination map below.
8320 * No need to re-allocate map entries from the correct
8321 * (pageable or not) zone, since we'll get new map entries
8322 * during the transfer.
8323 * We'll also adjust the map entries's "start" and "end"
8324 * during the transfer, to keep "copy"'s entries consistent
8325 * with its "offset".
8326 */
8327 goto after_adjustments;
8328 }
8329
8330 /*
8331 * Since we're going to just drop the map
8332 * entries from the copy into the destination
8333 * map, they must come from the same pool.
8334 */
8335
8336 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8337 /*
8338 * Mismatches occur when dealing with the default
8339 * pager.
8340 */
8341 zone_t old_zone;
8342 vm_map_entry_t next, new;
8343
8344 /*
8345 * Find the zone that the copies were allocated from
8346 */
8347
8348 entry = vm_map_copy_first_entry(copy);
8349
8350 /*
8351 * Reinitialize the copy so that vm_map_copy_entry_link
8352 * will work.
8353 */
8354 vm_map_store_copy_reset(copy, entry);
8355 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8356
8357 /*
8358 * Copy each entry.
8359 */
8360 while (entry != vm_map_copy_to_entry(copy)) {
8361 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8362 vm_map_entry_copy_full(new, entry);
8363 assert(!new->iokit_acct);
8364 if (new->is_sub_map) {
8365 /* clr address space specifics */
8366 new->use_pmap = FALSE;
8367 }
8368 vm_map_copy_entry_link(copy,
8369 vm_map_copy_last_entry(copy),
8370 new);
8371 next = entry->vme_next;
8372 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8373 zfree(old_zone, entry);
8374 entry = next;
8375 }
8376 }
8377
8378 /*
8379 * Adjust the addresses in the copy chain, and
8380 * reset the region attributes.
8381 */
8382
8383 for (entry = vm_map_copy_first_entry(copy);
8384 entry != vm_map_copy_to_entry(copy);
8385 entry = entry->vme_next) {
8386 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8387 /*
8388 * We're injecting this copy entry into a map that
8389 * has the standard page alignment, so clear
8390 * "map_aligned" (which might have been inherited
8391 * from the original map entry).
8392 */
8393 entry->map_aligned = FALSE;
8394 }
8395
8396 entry->vme_start += adjustment;
8397 entry->vme_end += adjustment;
8398
8399 if (entry->map_aligned) {
8400 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8401 VM_MAP_PAGE_MASK(dst_map)));
8402 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8403 VM_MAP_PAGE_MASK(dst_map)));
8404 }
8405
8406 entry->inheritance = VM_INHERIT_DEFAULT;
8407 entry->protection = VM_PROT_DEFAULT;
8408 entry->max_protection = VM_PROT_ALL;
8409 entry->behavior = VM_BEHAVIOR_DEFAULT;
8410
8411 /*
8412 * If the entry is now wired,
8413 * map the pages into the destination map.
8414 */
8415 if (entry->wired_count != 0) {
8416 register vm_map_offset_t va;
8417 vm_object_offset_t offset;
8418 register vm_object_t object;
8419 vm_prot_t prot;
8420 int type_of_fault;
8421
8422 object = entry->object.vm_object;
8423 offset = entry->offset;
8424 va = entry->vme_start;
8425
8426 pmap_pageable(dst_map->pmap,
8427 entry->vme_start,
8428 entry->vme_end,
8429 TRUE);
8430
8431 while (va < entry->vme_end) {
8432 register vm_page_t m;
8433
8434 /*
8435 * Look up the page in the object.
8436 * Assert that the page will be found in the
8437 * top object:
8438 * either
8439 * the object was newly created by
8440 * vm_object_copy_slowly, and has
8441 * copies of all of the pages from
8442 * the source object
8443 * or
8444 * the object was moved from the old
8445 * map entry; because the old map
8446 * entry was wired, all of the pages
8447 * were in the top-level object.
8448 * (XXX not true if we wire pages for
8449 * reading)
8450 */
8451 vm_object_lock(object);
8452
8453 m = vm_page_lookup(object, offset);
8454 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
8455 m->absent)
8456 panic("vm_map_copyout: wiring %p", m);
8457
8458 /*
8459 * ENCRYPTED SWAP:
8460 * The page is assumed to be wired here, so it
8461 * shouldn't be encrypted. Otherwise, we
8462 * couldn't enter it in the page table, since
8463 * we don't want the user to see the encrypted
8464 * data.
8465 */
8466 ASSERT_PAGE_DECRYPTED(m);
8467
8468 prot = entry->protection;
8469
8470 if (override_nx(dst_map, entry->alias) && prot)
8471 prot |= VM_PROT_EXECUTE;
8472
8473 type_of_fault = DBG_CACHE_HIT_FAULT;
8474
8475 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
8476 VM_PAGE_WIRED(m), FALSE, FALSE,
8477 FALSE, entry->alias,
8478 ((entry->iokit_acct ||
8479 (!entry->is_sub_map &&
8480 !entry->use_pmap))
8481 ? PMAP_OPTIONS_ALT_ACCT
8482 : 0),
8483 NULL, &type_of_fault);
8484
8485 vm_object_unlock(object);
8486
8487 offset += PAGE_SIZE_64;
8488 va += PAGE_SIZE;
8489 }
8490 }
8491 }
8492
8493 after_adjustments:
8494
8495 /*
8496 * Correct the page alignment for the result
8497 */
8498
8499 *dst_addr = start + (copy->offset - vm_copy_start);
8500
8501 /*
8502 * Update the hints and the map size
8503 */
8504
8505 if (consume_on_success) {
8506 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
8507 } else {
8508 SAVE_HINT_MAP_WRITE(dst_map, last);
8509 }
8510
8511 dst_map->size += size;
8512
8513 /*
8514 * Link in the copy
8515 */
8516
8517 if (consume_on_success) {
8518 vm_map_copy_insert(dst_map, last, copy);
8519 } else {
8520 vm_map_copy_remap(dst_map, last, copy, adjustment,
8521 cur_protection, max_protection,
8522 inheritance);
8523 }
8524
8525 vm_map_unlock(dst_map);
8526
8527 /*
8528 * XXX If wiring_required, call vm_map_pageable
8529 */
8530
8531 return(KERN_SUCCESS);
8532 }
8533
8534 /*
8535 * Routine: vm_map_copyin
8536 *
8537 * Description:
8538 * see vm_map_copyin_common. Exported via Unsupported.exports.
8539 *
8540 */
8541
8542 #undef vm_map_copyin
8543
8544 kern_return_t
8545 vm_map_copyin(
8546 vm_map_t src_map,
8547 vm_map_address_t src_addr,
8548 vm_map_size_t len,
8549 boolean_t src_destroy,
8550 vm_map_copy_t *copy_result) /* OUT */
8551 {
8552 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8553 FALSE, copy_result, FALSE));
8554 }
8555
8556 /*
8557 * Routine: vm_map_copyin_common
8558 *
8559 * Description:
8560 * Copy the specified region (src_addr, len) from the
8561 * source address space (src_map), possibly removing
8562 * the region from the source address space (src_destroy).
8563 *
8564 * Returns:
8565 * A vm_map_copy_t object (copy_result), suitable for
8566 * insertion into another address space (using vm_map_copyout),
8567 * copying over another address space region (using
8568 * vm_map_copy_overwrite). If the copy is unused, it
8569 * should be destroyed (using vm_map_copy_discard).
8570 *
8571 * In/out conditions:
8572 * The source map should not be locked on entry.
8573 */
8574
8575 typedef struct submap_map {
8576 vm_map_t parent_map;
8577 vm_map_offset_t base_start;
8578 vm_map_offset_t base_end;
8579 vm_map_size_t base_len;
8580 struct submap_map *next;
8581 } submap_map_t;
8582
8583 kern_return_t
8584 vm_map_copyin_common(
8585 vm_map_t src_map,
8586 vm_map_address_t src_addr,
8587 vm_map_size_t len,
8588 boolean_t src_destroy,
8589 __unused boolean_t src_volatile,
8590 vm_map_copy_t *copy_result, /* OUT */
8591 boolean_t use_maxprot)
8592 {
8593 vm_map_entry_t tmp_entry; /* Result of last map lookup --
8594 * in multi-level lookup, this
8595 * entry contains the actual
8596 * vm_object/offset.
8597 */
8598 register
8599 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */
8600
8601 vm_map_offset_t src_start; /* Start of current entry --
8602 * where copy is taking place now
8603 */
8604 vm_map_offset_t src_end; /* End of entire region to be
8605 * copied */
8606 vm_map_offset_t src_base;
8607 vm_map_t base_map = src_map;
8608 boolean_t map_share=FALSE;
8609 submap_map_t *parent_maps = NULL;
8610
8611 register
8612 vm_map_copy_t copy; /* Resulting copy */
8613 vm_map_address_t copy_addr;
8614 vm_map_size_t copy_size;
8615
8616 /*
8617 * Check for copies of zero bytes.
8618 */
8619
8620 if (len == 0) {
8621 *copy_result = VM_MAP_COPY_NULL;
8622 return(KERN_SUCCESS);
8623 }
8624
8625 /*
8626 * Check that the end address doesn't overflow
8627 */
8628 src_end = src_addr + len;
8629 if (src_end < src_addr)
8630 return KERN_INVALID_ADDRESS;
8631
8632 /*
8633 * If the copy is sufficiently small, use a kernel buffer instead
8634 * of making a virtual copy. The theory being that the cost of
8635 * setting up VM (and taking C-O-W faults) dominates the copy costs
8636 * for small regions.
8637 */
8638 if ((len < msg_ool_size_small) && !use_maxprot)
8639 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8640 src_destroy, copy_result);
8641
8642 /*
8643 * Compute (page aligned) start and end of region
8644 */
8645 src_start = vm_map_trunc_page(src_addr,
8646 VM_MAP_PAGE_MASK(src_map));
8647 src_end = vm_map_round_page(src_end,
8648 VM_MAP_PAGE_MASK(src_map));
8649
8650 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
8651
8652 /*
8653 * Allocate a header element for the list.
8654 *
8655 * Use the start and end in the header to
8656 * remember the endpoints prior to rounding.
8657 */
8658
8659 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8660 vm_map_copy_first_entry(copy) =
8661 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8662 copy->type = VM_MAP_COPY_ENTRY_LIST;
8663 copy->cpy_hdr.nentries = 0;
8664 copy->cpy_hdr.entries_pageable = TRUE;
8665 #if 00
8666 copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8667 #else
8668 /*
8669 * The copy entries can be broken down for a variety of reasons,
8670 * so we can't guarantee that they will remain map-aligned...
8671 * Will need to adjust the first copy_entry's "vme_start" and
8672 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8673 * rather than the original map's alignment.
8674 */
8675 copy->cpy_hdr.page_shift = PAGE_SHIFT;
8676 #endif
8677
8678 vm_map_store_init( &(copy->cpy_hdr) );
8679
8680 copy->offset = src_addr;
8681 copy->size = len;
8682
8683 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8684
8685 #define RETURN(x) \
8686 MACRO_BEGIN \
8687 vm_map_unlock(src_map); \
8688 if(src_map != base_map) \
8689 vm_map_deallocate(src_map); \
8690 if (new_entry != VM_MAP_ENTRY_NULL) \
8691 vm_map_copy_entry_dispose(copy,new_entry); \
8692 vm_map_copy_discard(copy); \
8693 { \
8694 submap_map_t *_ptr; \
8695 \
8696 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8697 parent_maps=parent_maps->next; \
8698 if (_ptr->parent_map != base_map) \
8699 vm_map_deallocate(_ptr->parent_map); \
8700 kfree(_ptr, sizeof(submap_map_t)); \
8701 } \
8702 } \
8703 MACRO_RETURN(x); \
8704 MACRO_END
8705
8706 /*
8707 * Find the beginning of the region.
8708 */
8709
8710 vm_map_lock(src_map);
8711
8712 /*
8713 * Lookup the original "src_addr" rather than the truncated
8714 * "src_start", in case "src_start" falls in a non-map-aligned
8715 * map entry *before* the map entry that contains "src_addr"...
8716 */
8717 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
8718 RETURN(KERN_INVALID_ADDRESS);
8719 if(!tmp_entry->is_sub_map) {
8720 /*
8721 * ... but clip to the map-rounded "src_start" rather than
8722 * "src_addr" to preserve map-alignment. We'll adjust the
8723 * first copy entry at the end, if needed.
8724 */
8725 vm_map_clip_start(src_map, tmp_entry, src_start);
8726 }
8727 if (src_start < tmp_entry->vme_start) {
8728 /*
8729 * Move "src_start" up to the start of the
8730 * first map entry to copy.
8731 */
8732 src_start = tmp_entry->vme_start;
8733 }
8734 /* set for later submap fix-up */
8735 copy_addr = src_start;
8736
8737 /*
8738 * Go through entries until we get to the end.
8739 */
8740
8741 while (TRUE) {
8742 register
8743 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
8744 vm_map_size_t src_size; /* Size of source
8745 * map entry (in both
8746 * maps)
8747 */
8748
8749 register
8750 vm_object_t src_object; /* Object to copy */
8751 vm_object_offset_t src_offset;
8752
8753 boolean_t src_needs_copy; /* Should source map
8754 * be made read-only
8755 * for copy-on-write?
8756 */
8757
8758 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
8759
8760 boolean_t was_wired; /* Was source wired? */
8761 vm_map_version_t version; /* Version before locks
8762 * dropped to make copy
8763 */
8764 kern_return_t result; /* Return value from
8765 * copy_strategically.
8766 */
8767 while(tmp_entry->is_sub_map) {
8768 vm_map_size_t submap_len;
8769 submap_map_t *ptr;
8770
8771 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8772 ptr->next = parent_maps;
8773 parent_maps = ptr;
8774 ptr->parent_map = src_map;
8775 ptr->base_start = src_start;
8776 ptr->base_end = src_end;
8777 submap_len = tmp_entry->vme_end - src_start;
8778 if(submap_len > (src_end-src_start))
8779 submap_len = src_end-src_start;
8780 ptr->base_len = submap_len;
8781
8782 src_start -= tmp_entry->vme_start;
8783 src_start += tmp_entry->offset;
8784 src_end = src_start + submap_len;
8785 src_map = tmp_entry->object.sub_map;
8786 vm_map_lock(src_map);
8787 /* keep an outstanding reference for all maps in */
8788 /* the parents tree except the base map */
8789 vm_map_reference(src_map);
8790 vm_map_unlock(ptr->parent_map);
8791 if (!vm_map_lookup_entry(
8792 src_map, src_start, &tmp_entry))
8793 RETURN(KERN_INVALID_ADDRESS);
8794 map_share = TRUE;
8795 if(!tmp_entry->is_sub_map)
8796 vm_map_clip_start(src_map, tmp_entry, src_start);
8797 src_entry = tmp_entry;
8798 }
8799 /* we are now in the lowest level submap... */
8800
8801 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
8802 (tmp_entry->object.vm_object->phys_contiguous)) {
8803 /* This is not, supported for now.In future */
8804 /* we will need to detect the phys_contig */
8805 /* condition and then upgrade copy_slowly */
8806 /* to do physical copy from the device mem */
8807 /* based object. We can piggy-back off of */
8808 /* the was wired boolean to set-up the */
8809 /* proper handling */
8810 RETURN(KERN_PROTECTION_FAILURE);
8811 }
8812 /*
8813 * Create a new address map entry to hold the result.
8814 * Fill in the fields from the appropriate source entries.
8815 * We must unlock the source map to do this if we need
8816 * to allocate a map entry.
8817 */
8818 if (new_entry == VM_MAP_ENTRY_NULL) {
8819 version.main_timestamp = src_map->timestamp;
8820 vm_map_unlock(src_map);
8821
8822 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8823
8824 vm_map_lock(src_map);
8825 if ((version.main_timestamp + 1) != src_map->timestamp) {
8826 if (!vm_map_lookup_entry(src_map, src_start,
8827 &tmp_entry)) {
8828 RETURN(KERN_INVALID_ADDRESS);
8829 }
8830 if (!tmp_entry->is_sub_map)
8831 vm_map_clip_start(src_map, tmp_entry, src_start);
8832 continue; /* restart w/ new tmp_entry */
8833 }
8834 }
8835
8836 /*
8837 * Verify that the region can be read.
8838 */
8839 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
8840 !use_maxprot) ||
8841 (src_entry->max_protection & VM_PROT_READ) == 0)
8842 RETURN(KERN_PROTECTION_FAILURE);
8843
8844 /*
8845 * Clip against the endpoints of the entire region.
8846 */
8847
8848 vm_map_clip_end(src_map, src_entry, src_end);
8849
8850 src_size = src_entry->vme_end - src_start;
8851 src_object = src_entry->object.vm_object;
8852 src_offset = src_entry->offset;
8853 was_wired = (src_entry->wired_count != 0);
8854
8855 vm_map_entry_copy(new_entry, src_entry);
8856 if (new_entry->is_sub_map) {
8857 /* clr address space specifics */
8858 new_entry->use_pmap = FALSE;
8859 }
8860
8861 /*
8862 * Attempt non-blocking copy-on-write optimizations.
8863 */
8864
8865 if (src_destroy &&
8866 (src_object == VM_OBJECT_NULL ||
8867 (src_object->internal && !src_object->true_share
8868 && !map_share))) {
8869 /*
8870 * If we are destroying the source, and the object
8871 * is internal, we can move the object reference
8872 * from the source to the copy. The copy is
8873 * copy-on-write only if the source is.
8874 * We make another reference to the object, because
8875 * destroying the source entry will deallocate it.
8876 */
8877 vm_object_reference(src_object);
8878
8879 /*
8880 * Copy is always unwired. vm_map_copy_entry
8881 * set its wired count to zero.
8882 */
8883
8884 goto CopySuccessful;
8885 }
8886
8887
8888 RestartCopy:
8889 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8890 src_object, new_entry, new_entry->object.vm_object,
8891 was_wired, 0);
8892 if ((src_object == VM_OBJECT_NULL ||
8893 (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8894 vm_object_copy_quickly(
8895 &new_entry->object.vm_object,
8896 src_offset,
8897 src_size,
8898 &src_needs_copy,
8899 &new_entry_needs_copy)) {
8900
8901 new_entry->needs_copy = new_entry_needs_copy;
8902
8903 /*
8904 * Handle copy-on-write obligations
8905 */
8906
8907 if (src_needs_copy && !tmp_entry->needs_copy) {
8908 vm_prot_t prot;
8909
8910 prot = src_entry->protection & ~VM_PROT_WRITE;
8911
8912 if (override_nx(src_map, src_entry->alias) && prot)
8913 prot |= VM_PROT_EXECUTE;
8914
8915 vm_object_pmap_protect(
8916 src_object,
8917 src_offset,
8918 src_size,
8919 (src_entry->is_shared ?
8920 PMAP_NULL
8921 : src_map->pmap),
8922 src_entry->vme_start,
8923 prot);
8924
8925 tmp_entry->needs_copy = TRUE;
8926 }
8927
8928 /*
8929 * The map has never been unlocked, so it's safe
8930 * to move to the next entry rather than doing
8931 * another lookup.
8932 */
8933
8934 goto CopySuccessful;
8935 }
8936
8937 /*
8938 * Take an object reference, so that we may
8939 * release the map lock(s).
8940 */
8941
8942 assert(src_object != VM_OBJECT_NULL);
8943 vm_object_reference(src_object);
8944
8945 /*
8946 * Record the timestamp for later verification.
8947 * Unlock the map.
8948 */
8949
8950 version.main_timestamp = src_map->timestamp;
8951 vm_map_unlock(src_map); /* Increments timestamp once! */
8952
8953 /*
8954 * Perform the copy
8955 */
8956
8957 if (was_wired) {
8958 CopySlowly:
8959 vm_object_lock(src_object);
8960 result = vm_object_copy_slowly(
8961 src_object,
8962 src_offset,
8963 src_size,
8964 THREAD_UNINT,
8965 &new_entry->object.vm_object);
8966 new_entry->offset = 0;
8967 new_entry->needs_copy = FALSE;
8968
8969 }
8970 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8971 (tmp_entry->is_shared || map_share)) {
8972 vm_object_t new_object;
8973
8974 vm_object_lock_shared(src_object);
8975 new_object = vm_object_copy_delayed(
8976 src_object,
8977 src_offset,
8978 src_size,
8979 TRUE);
8980 if (new_object == VM_OBJECT_NULL)
8981 goto CopySlowly;
8982
8983 new_entry->object.vm_object = new_object;
8984 new_entry->needs_copy = TRUE;
8985 assert(!new_entry->iokit_acct);
8986 assert(new_object->purgable == VM_PURGABLE_DENY);
8987 new_entry->use_pmap = TRUE;
8988 result = KERN_SUCCESS;
8989
8990 } else {
8991 result = vm_object_copy_strategically(src_object,
8992 src_offset,
8993 src_size,
8994 &new_entry->object.vm_object,
8995 &new_entry->offset,
8996 &new_entry_needs_copy);
8997
8998 new_entry->needs_copy = new_entry_needs_copy;
8999 }
9000
9001 if (result != KERN_SUCCESS &&
9002 result != KERN_MEMORY_RESTART_COPY) {
9003 vm_map_lock(src_map);
9004 RETURN(result);
9005 }
9006
9007 /*
9008 * Throw away the extra reference
9009 */
9010
9011 vm_object_deallocate(src_object);
9012
9013 /*
9014 * Verify that the map has not substantially
9015 * changed while the copy was being made.
9016 */
9017
9018 vm_map_lock(src_map);
9019
9020 if ((version.main_timestamp + 1) == src_map->timestamp)
9021 goto VerificationSuccessful;
9022
9023 /*
9024 * Simple version comparison failed.
9025 *
9026 * Retry the lookup and verify that the
9027 * same object/offset are still present.
9028 *
9029 * [Note: a memory manager that colludes with
9030 * the calling task can detect that we have
9031 * cheated. While the map was unlocked, the
9032 * mapping could have been changed and restored.]
9033 */
9034
9035 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9036 if (result != KERN_MEMORY_RESTART_COPY) {
9037 vm_object_deallocate(new_entry->object.vm_object);
9038 new_entry->object.vm_object = VM_OBJECT_NULL;
9039 assert(!new_entry->iokit_acct);
9040 new_entry->use_pmap = TRUE;
9041 }
9042 RETURN(KERN_INVALID_ADDRESS);
9043 }
9044
9045 src_entry = tmp_entry;
9046 vm_map_clip_start(src_map, src_entry, src_start);
9047
9048 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9049 !use_maxprot) ||
9050 ((src_entry->max_protection & VM_PROT_READ) == 0))
9051 goto VerificationFailed;
9052
9053 if (src_entry->vme_end < new_entry->vme_end) {
9054 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9055 VM_MAP_COPY_PAGE_MASK(copy)));
9056 new_entry->vme_end = src_entry->vme_end;
9057 src_size = new_entry->vme_end - src_start;
9058 }
9059
9060 if ((src_entry->object.vm_object != src_object) ||
9061 (src_entry->offset != src_offset) ) {
9062
9063 /*
9064 * Verification failed.
9065 *
9066 * Start over with this top-level entry.
9067 */
9068
9069 VerificationFailed: ;
9070
9071 vm_object_deallocate(new_entry->object.vm_object);
9072 tmp_entry = src_entry;
9073 continue;
9074 }
9075
9076 /*
9077 * Verification succeeded.
9078 */
9079
9080 VerificationSuccessful: ;
9081
9082 if (result == KERN_MEMORY_RESTART_COPY)
9083 goto RestartCopy;
9084
9085 /*
9086 * Copy succeeded.
9087 */
9088
9089 CopySuccessful: ;
9090
9091 /*
9092 * Link in the new copy entry.
9093 */
9094
9095 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9096 new_entry);
9097
9098 /*
9099 * Determine whether the entire region
9100 * has been copied.
9101 */
9102 src_base = src_start;
9103 src_start = new_entry->vme_end;
9104 new_entry = VM_MAP_ENTRY_NULL;
9105 while ((src_start >= src_end) && (src_end != 0)) {
9106 submap_map_t *ptr;
9107
9108 if (src_map == base_map) {
9109 /* back to the top */
9110 break;
9111 }
9112
9113 ptr = parent_maps;
9114 assert(ptr != NULL);
9115 parent_maps = parent_maps->next;
9116
9117 /* fix up the damage we did in that submap */
9118 vm_map_simplify_range(src_map,
9119 src_base,
9120 src_end);
9121
9122 vm_map_unlock(src_map);
9123 vm_map_deallocate(src_map);
9124 vm_map_lock(ptr->parent_map);
9125 src_map = ptr->parent_map;
9126 src_base = ptr->base_start;
9127 src_start = ptr->base_start + ptr->base_len;
9128 src_end = ptr->base_end;
9129 if (!vm_map_lookup_entry(src_map,
9130 src_start,
9131 &tmp_entry) &&
9132 (src_end > src_start)) {
9133 RETURN(KERN_INVALID_ADDRESS);
9134 }
9135 kfree(ptr, sizeof(submap_map_t));
9136 if (parent_maps == NULL)
9137 map_share = FALSE;
9138 src_entry = tmp_entry->vme_prev;
9139 }
9140
9141 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9142 (src_start >= src_addr + len) &&
9143 (src_addr + len != 0)) {
9144 /*
9145 * Stop copying now, even though we haven't reached
9146 * "src_end". We'll adjust the end of the last copy
9147 * entry at the end, if needed.
9148 *
9149 * If src_map's aligment is different from the
9150 * system's page-alignment, there could be
9151 * extra non-map-aligned map entries between
9152 * the original (non-rounded) "src_addr + len"
9153 * and the rounded "src_end".
9154 * We do not want to copy those map entries since
9155 * they're not part of the copied range.
9156 */
9157 break;
9158 }
9159
9160 if ((src_start >= src_end) && (src_end != 0))
9161 break;
9162
9163 /*
9164 * Verify that there are no gaps in the region
9165 */
9166
9167 tmp_entry = src_entry->vme_next;
9168 if ((tmp_entry->vme_start != src_start) ||
9169 (tmp_entry == vm_map_to_entry(src_map))) {
9170 RETURN(KERN_INVALID_ADDRESS);
9171 }
9172 }
9173
9174 /*
9175 * If the source should be destroyed, do it now, since the
9176 * copy was successful.
9177 */
9178 if (src_destroy) {
9179 (void) vm_map_delete(
9180 src_map,
9181 vm_map_trunc_page(src_addr,
9182 VM_MAP_PAGE_MASK(src_map)),
9183 src_end,
9184 ((src_map == kernel_map) ?
9185 VM_MAP_REMOVE_KUNWIRE :
9186 VM_MAP_NO_FLAGS),
9187 VM_MAP_NULL);
9188 } else {
9189 /* fix up the damage we did in the base map */
9190 vm_map_simplify_range(
9191 src_map,
9192 vm_map_trunc_page(src_addr,
9193 VM_MAP_PAGE_MASK(src_map)),
9194 vm_map_round_page(src_end,
9195 VM_MAP_PAGE_MASK(src_map)));
9196 }
9197
9198 vm_map_unlock(src_map);
9199
9200 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9201 vm_map_offset_t original_start, original_offset, original_end;
9202
9203 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9204
9205 /* adjust alignment of first copy_entry's "vme_start" */
9206 tmp_entry = vm_map_copy_first_entry(copy);
9207 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9208 vm_map_offset_t adjustment;
9209
9210 original_start = tmp_entry->vme_start;
9211 original_offset = tmp_entry->offset;
9212
9213 /* map-align the start of the first copy entry... */
9214 adjustment = (tmp_entry->vme_start -
9215 vm_map_trunc_page(
9216 tmp_entry->vme_start,
9217 VM_MAP_PAGE_MASK(src_map)));
9218 tmp_entry->vme_start -= adjustment;
9219 tmp_entry->offset -= adjustment;
9220 copy_addr -= adjustment;
9221 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9222 /* ... adjust for mis-aligned start of copy range */
9223 adjustment =
9224 (vm_map_trunc_page(copy->offset,
9225 PAGE_MASK) -
9226 vm_map_trunc_page(copy->offset,
9227 VM_MAP_PAGE_MASK(src_map)));
9228 if (adjustment) {
9229 assert(page_aligned(adjustment));
9230 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9231 tmp_entry->vme_start += adjustment;
9232 tmp_entry->offset += adjustment;
9233 copy_addr += adjustment;
9234 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9235 }
9236
9237 /*
9238 * Assert that the adjustments haven't exposed
9239 * more than was originally copied...
9240 */
9241 assert(tmp_entry->vme_start >= original_start);
9242 assert(tmp_entry->offset >= original_offset);
9243 /*
9244 * ... and that it did not adjust outside of a
9245 * a single 16K page.
9246 */
9247 assert(vm_map_trunc_page(tmp_entry->vme_start,
9248 VM_MAP_PAGE_MASK(src_map)) ==
9249 vm_map_trunc_page(original_start,
9250 VM_MAP_PAGE_MASK(src_map)));
9251 }
9252
9253 /* adjust alignment of last copy_entry's "vme_end" */
9254 tmp_entry = vm_map_copy_last_entry(copy);
9255 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9256 vm_map_offset_t adjustment;
9257
9258 original_end = tmp_entry->vme_end;
9259
9260 /* map-align the end of the last copy entry... */
9261 tmp_entry->vme_end =
9262 vm_map_round_page(tmp_entry->vme_end,
9263 VM_MAP_PAGE_MASK(src_map));
9264 /* ... adjust for mis-aligned end of copy range */
9265 adjustment =
9266 (vm_map_round_page((copy->offset +
9267 copy->size),
9268 VM_MAP_PAGE_MASK(src_map)) -
9269 vm_map_round_page((copy->offset +
9270 copy->size),
9271 PAGE_MASK));
9272 if (adjustment) {
9273 assert(page_aligned(adjustment));
9274 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9275 tmp_entry->vme_end -= adjustment;
9276 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9277 }
9278
9279 /*
9280 * Assert that the adjustments haven't exposed
9281 * more than was originally copied...
9282 */
9283 assert(tmp_entry->vme_end <= original_end);
9284 /*
9285 * ... and that it did not adjust outside of a
9286 * a single 16K page.
9287 */
9288 assert(vm_map_round_page(tmp_entry->vme_end,
9289 VM_MAP_PAGE_MASK(src_map)) ==
9290 vm_map_round_page(original_end,
9291 VM_MAP_PAGE_MASK(src_map)));
9292 }
9293 }
9294
9295 /* Fix-up start and end points in copy. This is necessary */
9296 /* when the various entries in the copy object were picked */
9297 /* up from different sub-maps */
9298
9299 tmp_entry = vm_map_copy_first_entry(copy);
9300 copy_size = 0; /* compute actual size */
9301 while (tmp_entry != vm_map_copy_to_entry(copy)) {
9302 assert(VM_MAP_PAGE_ALIGNED(
9303 copy_addr + (tmp_entry->vme_end -
9304 tmp_entry->vme_start),
9305 VM_MAP_COPY_PAGE_MASK(copy)));
9306 assert(VM_MAP_PAGE_ALIGNED(
9307 copy_addr,
9308 VM_MAP_COPY_PAGE_MASK(copy)));
9309
9310 /*
9311 * The copy_entries will be injected directly into the
9312 * destination map and might not be "map aligned" there...
9313 */
9314 tmp_entry->map_aligned = FALSE;
9315
9316 tmp_entry->vme_end = copy_addr +
9317 (tmp_entry->vme_end - tmp_entry->vme_start);
9318 tmp_entry->vme_start = copy_addr;
9319 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9320 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9321 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9322 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9323 }
9324
9325 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9326 copy_size < copy->size) {
9327 /*
9328 * The actual size of the VM map copy is smaller than what
9329 * was requested by the caller. This must be because some
9330 * PAGE_SIZE-sized pages are missing at the end of the last
9331 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9332 * The caller might not have been aware of those missing
9333 * pages and might not want to be aware of it, which is
9334 * fine as long as they don't try to access (and crash on)
9335 * those missing pages.
9336 * Let's adjust the size of the "copy", to avoid failing
9337 * in vm_map_copyout() or vm_map_copy_overwrite().
9338 */
9339 assert(vm_map_round_page(copy_size,
9340 VM_MAP_PAGE_MASK(src_map)) ==
9341 vm_map_round_page(copy->size,
9342 VM_MAP_PAGE_MASK(src_map)));
9343 copy->size = copy_size;
9344 }
9345
9346 *copy_result = copy;
9347 return(KERN_SUCCESS);
9348
9349 #undef RETURN
9350 }
9351
9352 kern_return_t
9353 vm_map_copy_extract(
9354 vm_map_t src_map,
9355 vm_map_address_t src_addr,
9356 vm_map_size_t len,
9357 vm_map_copy_t *copy_result, /* OUT */
9358 vm_prot_t *cur_prot, /* OUT */
9359 vm_prot_t *max_prot)
9360 {
9361 vm_map_offset_t src_start, src_end;
9362 vm_map_copy_t copy;
9363 kern_return_t kr;
9364
9365 /*
9366 * Check for copies of zero bytes.
9367 */
9368
9369 if (len == 0) {
9370 *copy_result = VM_MAP_COPY_NULL;
9371 return(KERN_SUCCESS);
9372 }
9373
9374 /*
9375 * Check that the end address doesn't overflow
9376 */
9377 src_end = src_addr + len;
9378 if (src_end < src_addr)
9379 return KERN_INVALID_ADDRESS;
9380
9381 /*
9382 * Compute (page aligned) start and end of region
9383 */
9384 src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9385 src_end = vm_map_round_page(src_end, PAGE_MASK);
9386
9387 /*
9388 * Allocate a header element for the list.
9389 *
9390 * Use the start and end in the header to
9391 * remember the endpoints prior to rounding.
9392 */
9393
9394 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9395 vm_map_copy_first_entry(copy) =
9396 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9397 copy->type = VM_MAP_COPY_ENTRY_LIST;
9398 copy->cpy_hdr.nentries = 0;
9399 copy->cpy_hdr.entries_pageable = TRUE;
9400
9401 vm_map_store_init(&copy->cpy_hdr);
9402
9403 copy->offset = 0;
9404 copy->size = len;
9405
9406 kr = vm_map_remap_extract(src_map,
9407 src_addr,
9408 len,
9409 FALSE, /* copy */
9410 &copy->cpy_hdr,
9411 cur_prot,
9412 max_prot,
9413 VM_INHERIT_SHARE,
9414 TRUE); /* pageable */
9415 if (kr != KERN_SUCCESS) {
9416 vm_map_copy_discard(copy);
9417 return kr;
9418 }
9419
9420 *copy_result = copy;
9421 return KERN_SUCCESS;
9422 }
9423
9424 /*
9425 * vm_map_copyin_object:
9426 *
9427 * Create a copy object from an object.
9428 * Our caller donates an object reference.
9429 */
9430
9431 kern_return_t
9432 vm_map_copyin_object(
9433 vm_object_t object,
9434 vm_object_offset_t offset, /* offset of region in object */
9435 vm_object_size_t size, /* size of region in object */
9436 vm_map_copy_t *copy_result) /* OUT */
9437 {
9438 vm_map_copy_t copy; /* Resulting copy */
9439
9440 /*
9441 * We drop the object into a special copy object
9442 * that contains the object directly.
9443 */
9444
9445 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9446 copy->type = VM_MAP_COPY_OBJECT;
9447 copy->cpy_object = object;
9448 copy->offset = offset;
9449 copy->size = size;
9450
9451 *copy_result = copy;
9452 return(KERN_SUCCESS);
9453 }
9454
9455 static void
9456 vm_map_fork_share(
9457 vm_map_t old_map,
9458 vm_map_entry_t old_entry,
9459 vm_map_t new_map)
9460 {
9461 vm_object_t object;
9462 vm_map_entry_t new_entry;
9463
9464 /*
9465 * New sharing code. New map entry
9466 * references original object. Internal
9467 * objects use asynchronous copy algorithm for
9468 * future copies. First make sure we have
9469 * the right object. If we need a shadow,
9470 * or someone else already has one, then
9471 * make a new shadow and share it.
9472 */
9473
9474 object = old_entry->object.vm_object;
9475 if (old_entry->is_sub_map) {
9476 assert(old_entry->wired_count == 0);
9477 #ifndef NO_NESTED_PMAP
9478 if(old_entry->use_pmap) {
9479 kern_return_t result;
9480
9481 result = pmap_nest(new_map->pmap,
9482 (old_entry->object.sub_map)->pmap,
9483 (addr64_t)old_entry->vme_start,
9484 (addr64_t)old_entry->vme_start,
9485 (uint64_t)(old_entry->vme_end - old_entry->vme_start));
9486 if(result)
9487 panic("vm_map_fork_share: pmap_nest failed!");
9488 }
9489 #endif /* NO_NESTED_PMAP */
9490 } else if (object == VM_OBJECT_NULL) {
9491 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
9492 old_entry->vme_start));
9493 old_entry->offset = 0;
9494 old_entry->object.vm_object = object;
9495 old_entry->use_pmap = TRUE;
9496 assert(!old_entry->needs_copy);
9497 } else if (object->copy_strategy !=
9498 MEMORY_OBJECT_COPY_SYMMETRIC) {
9499
9500 /*
9501 * We are already using an asymmetric
9502 * copy, and therefore we already have
9503 * the right object.
9504 */
9505
9506 assert(! old_entry->needs_copy);
9507 }
9508 else if (old_entry->needs_copy || /* case 1 */
9509 object->shadowed || /* case 2 */
9510 (!object->true_share && /* case 3 */
9511 !old_entry->is_shared &&
9512 (object->vo_size >
9513 (vm_map_size_t)(old_entry->vme_end -
9514 old_entry->vme_start)))) {
9515
9516 /*
9517 * We need to create a shadow.
9518 * There are three cases here.
9519 * In the first case, we need to
9520 * complete a deferred symmetrical
9521 * copy that we participated in.
9522 * In the second and third cases,
9523 * we need to create the shadow so
9524 * that changes that we make to the
9525 * object do not interfere with
9526 * any symmetrical copies which
9527 * have occured (case 2) or which
9528 * might occur (case 3).
9529 *
9530 * The first case is when we had
9531 * deferred shadow object creation
9532 * via the entry->needs_copy mechanism.
9533 * This mechanism only works when
9534 * only one entry points to the source
9535 * object, and we are about to create
9536 * a second entry pointing to the
9537 * same object. The problem is that
9538 * there is no way of mapping from
9539 * an object to the entries pointing
9540 * to it. (Deferred shadow creation
9541 * works with one entry because occurs
9542 * at fault time, and we walk from the
9543 * entry to the object when handling
9544 * the fault.)
9545 *
9546 * The second case is when the object
9547 * to be shared has already been copied
9548 * with a symmetric copy, but we point
9549 * directly to the object without
9550 * needs_copy set in our entry. (This
9551 * can happen because different ranges
9552 * of an object can be pointed to by
9553 * different entries. In particular,
9554 * a single entry pointing to an object
9555 * can be split by a call to vm_inherit,
9556 * which, combined with task_create, can
9557 * result in the different entries
9558 * having different needs_copy values.)
9559 * The shadowed flag in the object allows
9560 * us to detect this case. The problem
9561 * with this case is that if this object
9562 * has or will have shadows, then we
9563 * must not perform an asymmetric copy
9564 * of this object, since such a copy
9565 * allows the object to be changed, which
9566 * will break the previous symmetrical
9567 * copies (which rely upon the object
9568 * not changing). In a sense, the shadowed
9569 * flag says "don't change this object".
9570 * We fix this by creating a shadow
9571 * object for this object, and sharing
9572 * that. This works because we are free
9573 * to change the shadow object (and thus
9574 * to use an asymmetric copy strategy);
9575 * this is also semantically correct,
9576 * since this object is temporary, and
9577 * therefore a copy of the object is
9578 * as good as the object itself. (This
9579 * is not true for permanent objects,
9580 * since the pager needs to see changes,
9581 * which won't happen if the changes
9582 * are made to a copy.)
9583 *
9584 * The third case is when the object
9585 * to be shared has parts sticking
9586 * outside of the entry we're working
9587 * with, and thus may in the future
9588 * be subject to a symmetrical copy.
9589 * (This is a preemptive version of
9590 * case 2.)
9591 */
9592 vm_object_shadow(&old_entry->object.vm_object,
9593 &old_entry->offset,
9594 (vm_map_size_t) (old_entry->vme_end -
9595 old_entry->vme_start));
9596
9597 /*
9598 * If we're making a shadow for other than
9599 * copy on write reasons, then we have
9600 * to remove write permission.
9601 */
9602
9603 if (!old_entry->needs_copy &&
9604 (old_entry->protection & VM_PROT_WRITE)) {
9605 vm_prot_t prot;
9606
9607 prot = old_entry->protection & ~VM_PROT_WRITE;
9608
9609 if (override_nx(old_map, old_entry->alias) && prot)
9610 prot |= VM_PROT_EXECUTE;
9611
9612 if (old_map->mapped_in_other_pmaps) {
9613 vm_object_pmap_protect(
9614 old_entry->object.vm_object,
9615 old_entry->offset,
9616 (old_entry->vme_end -
9617 old_entry->vme_start),
9618 PMAP_NULL,
9619 old_entry->vme_start,
9620 prot);
9621 } else {
9622 pmap_protect(old_map->pmap,
9623 old_entry->vme_start,
9624 old_entry->vme_end,
9625 prot);
9626 }
9627 }
9628
9629 old_entry->needs_copy = FALSE;
9630 object = old_entry->object.vm_object;
9631 }
9632
9633
9634 /*
9635 * If object was using a symmetric copy strategy,
9636 * change its copy strategy to the default
9637 * asymmetric copy strategy, which is copy_delay
9638 * in the non-norma case and copy_call in the
9639 * norma case. Bump the reference count for the
9640 * new entry.
9641 */
9642
9643 if(old_entry->is_sub_map) {
9644 vm_map_lock(old_entry->object.sub_map);
9645 vm_map_reference(old_entry->object.sub_map);
9646 vm_map_unlock(old_entry->object.sub_map);
9647 } else {
9648 vm_object_lock(object);
9649 vm_object_reference_locked(object);
9650 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9651 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9652 }
9653 vm_object_unlock(object);
9654 }
9655
9656 /*
9657 * Clone the entry, using object ref from above.
9658 * Mark both entries as shared.
9659 */
9660
9661 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9662 * map or descendants */
9663 vm_map_entry_copy(new_entry, old_entry);
9664 old_entry->is_shared = TRUE;
9665 new_entry->is_shared = TRUE;
9666
9667 /*
9668 * Insert the entry into the new map -- we
9669 * know we're inserting at the end of the new
9670 * map.
9671 */
9672
9673 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
9674
9675 /*
9676 * Update the physical map
9677 */
9678
9679 if (old_entry->is_sub_map) {
9680 /* Bill Angell pmap support goes here */
9681 } else {
9682 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
9683 old_entry->vme_end - old_entry->vme_start,
9684 old_entry->vme_start);
9685 }
9686 }
9687
9688 static boolean_t
9689 vm_map_fork_copy(
9690 vm_map_t old_map,
9691 vm_map_entry_t *old_entry_p,
9692 vm_map_t new_map)
9693 {
9694 vm_map_entry_t old_entry = *old_entry_p;
9695 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9696 vm_map_offset_t start = old_entry->vme_start;
9697 vm_map_copy_t copy;
9698 vm_map_entry_t last = vm_map_last_entry(new_map);
9699
9700 vm_map_unlock(old_map);
9701 /*
9702 * Use maxprot version of copyin because we
9703 * care about whether this memory can ever
9704 * be accessed, not just whether it's accessible
9705 * right now.
9706 */
9707 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9708 != KERN_SUCCESS) {
9709 /*
9710 * The map might have changed while it
9711 * was unlocked, check it again. Skip
9712 * any blank space or permanently
9713 * unreadable region.
9714 */
9715 vm_map_lock(old_map);
9716 if (!vm_map_lookup_entry(old_map, start, &last) ||
9717 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
9718 last = last->vme_next;
9719 }
9720 *old_entry_p = last;
9721
9722 /*
9723 * XXX For some error returns, want to
9724 * XXX skip to the next element. Note
9725 * that INVALID_ADDRESS and
9726 * PROTECTION_FAILURE are handled above.
9727 */
9728
9729 return FALSE;
9730 }
9731
9732 /*
9733 * Insert the copy into the new map
9734 */
9735
9736 vm_map_copy_insert(new_map, last, copy);
9737
9738 /*
9739 * Pick up the traversal at the end of
9740 * the copied region.
9741 */
9742
9743 vm_map_lock(old_map);
9744 start += entry_size;
9745 if (! vm_map_lookup_entry(old_map, start, &last)) {
9746 last = last->vme_next;
9747 } else {
9748 if (last->vme_start == start) {
9749 /*
9750 * No need to clip here and we don't
9751 * want to cause any unnecessary
9752 * unnesting...
9753 */
9754 } else {
9755 vm_map_clip_start(old_map, last, start);
9756 }
9757 }
9758 *old_entry_p = last;
9759
9760 return TRUE;
9761 }
9762
9763 /*
9764 * vm_map_fork:
9765 *
9766 * Create and return a new map based on the old
9767 * map, according to the inheritance values on the
9768 * regions in that map.
9769 *
9770 * The source map must not be locked.
9771 */
9772 vm_map_t
9773 vm_map_fork(
9774 ledger_t ledger,
9775 vm_map_t old_map)
9776 {
9777 pmap_t new_pmap;
9778 vm_map_t new_map;
9779 vm_map_entry_t old_entry;
9780 vm_map_size_t new_size = 0, entry_size;
9781 vm_map_entry_t new_entry;
9782 boolean_t src_needs_copy;
9783 boolean_t new_entry_needs_copy;
9784
9785 new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
9786 #if defined(__i386__) || defined(__x86_64__)
9787 old_map->pmap->pm_task_map != TASK_MAP_32BIT
9788 #else
9789 #error Unknown architecture.
9790 #endif
9791 );
9792
9793 vm_map_reference_swap(old_map);
9794 vm_map_lock(old_map);
9795
9796 new_map = vm_map_create(new_pmap,
9797 old_map->min_offset,
9798 old_map->max_offset,
9799 old_map->hdr.entries_pageable);
9800 /* inherit the parent map's page size */
9801 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
9802 for (
9803 old_entry = vm_map_first_entry(old_map);
9804 old_entry != vm_map_to_entry(old_map);
9805 ) {
9806
9807 entry_size = old_entry->vme_end - old_entry->vme_start;
9808
9809 switch (old_entry->inheritance) {
9810 case VM_INHERIT_NONE:
9811 break;
9812
9813 case VM_INHERIT_SHARE:
9814 vm_map_fork_share(old_map, old_entry, new_map);
9815 new_size += entry_size;
9816 break;
9817
9818 case VM_INHERIT_COPY:
9819
9820 /*
9821 * Inline the copy_quickly case;
9822 * upon failure, fall back on call
9823 * to vm_map_fork_copy.
9824 */
9825
9826 if(old_entry->is_sub_map)
9827 break;
9828 if ((old_entry->wired_count != 0) ||
9829 ((old_entry->object.vm_object != NULL) &&
9830 (old_entry->object.vm_object->true_share))) {
9831 goto slow_vm_map_fork_copy;
9832 }
9833
9834 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
9835 vm_map_entry_copy(new_entry, old_entry);
9836 if (new_entry->is_sub_map) {
9837 /* clear address space specifics */
9838 new_entry->use_pmap = FALSE;
9839 }
9840
9841 if (! vm_object_copy_quickly(
9842 &new_entry->object.vm_object,
9843 old_entry->offset,
9844 (old_entry->vme_end -
9845 old_entry->vme_start),
9846 &src_needs_copy,
9847 &new_entry_needs_copy)) {
9848 vm_map_entry_dispose(new_map, new_entry);
9849 goto slow_vm_map_fork_copy;
9850 }
9851
9852 /*
9853 * Handle copy-on-write obligations
9854 */
9855
9856 if (src_needs_copy && !old_entry->needs_copy) {
9857 vm_prot_t prot;
9858
9859 prot = old_entry->protection & ~VM_PROT_WRITE;
9860
9861 if (override_nx(old_map, old_entry->alias) && prot)
9862 prot |= VM_PROT_EXECUTE;
9863
9864 vm_object_pmap_protect(
9865 old_entry->object.vm_object,
9866 old_entry->offset,
9867 (old_entry->vme_end -
9868 old_entry->vme_start),
9869 ((old_entry->is_shared
9870 || old_map->mapped_in_other_pmaps)
9871 ? PMAP_NULL :
9872 old_map->pmap),
9873 old_entry->vme_start,
9874 prot);
9875
9876 old_entry->needs_copy = TRUE;
9877 }
9878 new_entry->needs_copy = new_entry_needs_copy;
9879
9880 /*
9881 * Insert the entry at the end
9882 * of the map.
9883 */
9884
9885 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
9886 new_entry);
9887 new_size += entry_size;
9888 break;
9889
9890 slow_vm_map_fork_copy:
9891 if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9892 new_size += entry_size;
9893 }
9894 continue;
9895 }
9896 old_entry = old_entry->vme_next;
9897 }
9898
9899
9900 new_map->size = new_size;
9901 vm_map_unlock(old_map);
9902 vm_map_deallocate(old_map);
9903
9904 return(new_map);
9905 }
9906
9907 /*
9908 * vm_map_exec:
9909 *
9910 * Setup the "new_map" with the proper execution environment according
9911 * to the type of executable (platform, 64bit, chroot environment).
9912 * Map the comm page and shared region, etc...
9913 */
9914 kern_return_t
9915 vm_map_exec(
9916 vm_map_t new_map,
9917 task_t task,
9918 void *fsroot,
9919 cpu_type_t cpu)
9920 {
9921 SHARED_REGION_TRACE_DEBUG(
9922 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9923 (void *)VM_KERNEL_ADDRPERM(current_task()),
9924 (void *)VM_KERNEL_ADDRPERM(new_map),
9925 (void *)VM_KERNEL_ADDRPERM(task),
9926 (void *)VM_KERNEL_ADDRPERM(fsroot),
9927 cpu));
9928 (void) vm_commpage_enter(new_map, task);
9929 (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9930 SHARED_REGION_TRACE_DEBUG(
9931 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9932 (void *)VM_KERNEL_ADDRPERM(current_task()),
9933 (void *)VM_KERNEL_ADDRPERM(new_map),
9934 (void *)VM_KERNEL_ADDRPERM(task),
9935 (void *)VM_KERNEL_ADDRPERM(fsroot),
9936 cpu));
9937 return KERN_SUCCESS;
9938 }
9939
9940 /*
9941 * vm_map_lookup_locked:
9942 *
9943 * Finds the VM object, offset, and
9944 * protection for a given virtual address in the
9945 * specified map, assuming a page fault of the
9946 * type specified.
9947 *
9948 * Returns the (object, offset, protection) for
9949 * this address, whether it is wired down, and whether
9950 * this map has the only reference to the data in question.
9951 * In order to later verify this lookup, a "version"
9952 * is returned.
9953 *
9954 * The map MUST be locked by the caller and WILL be
9955 * locked on exit. In order to guarantee the
9956 * existence of the returned object, it is returned
9957 * locked.
9958 *
9959 * If a lookup is requested with "write protection"
9960 * specified, the map may be changed to perform virtual
9961 * copying operations, although the data referenced will
9962 * remain the same.
9963 */
9964 kern_return_t
9965 vm_map_lookup_locked(
9966 vm_map_t *var_map, /* IN/OUT */
9967 vm_map_offset_t vaddr,
9968 vm_prot_t fault_type,
9969 int object_lock_type,
9970 vm_map_version_t *out_version, /* OUT */
9971 vm_object_t *object, /* OUT */
9972 vm_object_offset_t *offset, /* OUT */
9973 vm_prot_t *out_prot, /* OUT */
9974 boolean_t *wired, /* OUT */
9975 vm_object_fault_info_t fault_info, /* OUT */
9976 vm_map_t *real_map)
9977 {
9978 vm_map_entry_t entry;
9979 register vm_map_t map = *var_map;
9980 vm_map_t old_map = *var_map;
9981 vm_map_t cow_sub_map_parent = VM_MAP_NULL;
9982 vm_map_offset_t cow_parent_vaddr = 0;
9983 vm_map_offset_t old_start = 0;
9984 vm_map_offset_t old_end = 0;
9985 register vm_prot_t prot;
9986 boolean_t mask_protections;
9987 boolean_t force_copy;
9988 vm_prot_t original_fault_type;
9989
9990 /*
9991 * VM_PROT_MASK means that the caller wants us to use "fault_type"
9992 * as a mask against the mapping's actual protections, not as an
9993 * absolute value.
9994 */
9995 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9996 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
9997 fault_type &= VM_PROT_ALL;
9998 original_fault_type = fault_type;
9999
10000 *real_map = map;
10001
10002 RetryLookup:
10003 fault_type = original_fault_type;
10004
10005 /*
10006 * If the map has an interesting hint, try it before calling
10007 * full blown lookup routine.
10008 */
10009 entry = map->hint;
10010
10011 if ((entry == vm_map_to_entry(map)) ||
10012 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10013 vm_map_entry_t tmp_entry;
10014
10015 /*
10016 * Entry was either not a valid hint, or the vaddr
10017 * was not contained in the entry, so do a full lookup.
10018 */
10019 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10020 if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10021 vm_map_unlock(cow_sub_map_parent);
10022 if((*real_map != map)
10023 && (*real_map != cow_sub_map_parent))
10024 vm_map_unlock(*real_map);
10025 return KERN_INVALID_ADDRESS;
10026 }
10027
10028 entry = tmp_entry;
10029 }
10030 if(map == old_map) {
10031 old_start = entry->vme_start;
10032 old_end = entry->vme_end;
10033 }
10034
10035 /*
10036 * Handle submaps. Drop lock on upper map, submap is
10037 * returned locked.
10038 */
10039
10040 submap_recurse:
10041 if (entry->is_sub_map) {
10042 vm_map_offset_t local_vaddr;
10043 vm_map_offset_t end_delta;
10044 vm_map_offset_t start_delta;
10045 vm_map_entry_t submap_entry;
10046 boolean_t mapped_needs_copy=FALSE;
10047
10048 local_vaddr = vaddr;
10049
10050 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10051 /* if real_map equals map we unlock below */
10052 if ((*real_map != map) &&
10053 (*real_map != cow_sub_map_parent))
10054 vm_map_unlock(*real_map);
10055 *real_map = entry->object.sub_map;
10056 }
10057
10058 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10059 if (!mapped_needs_copy) {
10060 if (vm_map_lock_read_to_write(map)) {
10061 vm_map_lock_read(map);
10062 *real_map = map;
10063 goto RetryLookup;
10064 }
10065 vm_map_lock_read(entry->object.sub_map);
10066 *var_map = entry->object.sub_map;
10067 cow_sub_map_parent = map;
10068 /* reset base to map before cow object */
10069 /* this is the map which will accept */
10070 /* the new cow object */
10071 old_start = entry->vme_start;
10072 old_end = entry->vme_end;
10073 cow_parent_vaddr = vaddr;
10074 mapped_needs_copy = TRUE;
10075 } else {
10076 vm_map_lock_read(entry->object.sub_map);
10077 *var_map = entry->object.sub_map;
10078 if((cow_sub_map_parent != map) &&
10079 (*real_map != map))
10080 vm_map_unlock(map);
10081 }
10082 } else {
10083 vm_map_lock_read(entry->object.sub_map);
10084 *var_map = entry->object.sub_map;
10085 /* leave map locked if it is a target */
10086 /* cow sub_map above otherwise, just */
10087 /* follow the maps down to the object */
10088 /* here we unlock knowing we are not */
10089 /* revisiting the map. */
10090 if((*real_map != map) && (map != cow_sub_map_parent))
10091 vm_map_unlock_read(map);
10092 }
10093
10094 map = *var_map;
10095
10096 /* calculate the offset in the submap for vaddr */
10097 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
10098
10099 RetrySubMap:
10100 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10101 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10102 vm_map_unlock(cow_sub_map_parent);
10103 }
10104 if((*real_map != map)
10105 && (*real_map != cow_sub_map_parent)) {
10106 vm_map_unlock(*real_map);
10107 }
10108 *real_map = map;
10109 return KERN_INVALID_ADDRESS;
10110 }
10111
10112 /* find the attenuated shadow of the underlying object */
10113 /* on our target map */
10114
10115 /* in english the submap object may extend beyond the */
10116 /* region mapped by the entry or, may only fill a portion */
10117 /* of it. For our purposes, we only care if the object */
10118 /* doesn't fill. In this case the area which will */
10119 /* ultimately be clipped in the top map will only need */
10120 /* to be as big as the portion of the underlying entry */
10121 /* which is mapped */
10122 start_delta = submap_entry->vme_start > entry->offset ?
10123 submap_entry->vme_start - entry->offset : 0;
10124
10125 end_delta =
10126 (entry->offset + start_delta + (old_end - old_start)) <=
10127 submap_entry->vme_end ?
10128 0 : (entry->offset +
10129 (old_end - old_start))
10130 - submap_entry->vme_end;
10131
10132 old_start += start_delta;
10133 old_end -= end_delta;
10134
10135 if(submap_entry->is_sub_map) {
10136 entry = submap_entry;
10137 vaddr = local_vaddr;
10138 goto submap_recurse;
10139 }
10140
10141 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10142
10143 vm_object_t sub_object, copy_object;
10144 vm_object_offset_t copy_offset;
10145 vm_map_offset_t local_start;
10146 vm_map_offset_t local_end;
10147 boolean_t copied_slowly = FALSE;
10148
10149 if (vm_map_lock_read_to_write(map)) {
10150 vm_map_lock_read(map);
10151 old_start -= start_delta;
10152 old_end += end_delta;
10153 goto RetrySubMap;
10154 }
10155
10156
10157 sub_object = submap_entry->object.vm_object;
10158 if (sub_object == VM_OBJECT_NULL) {
10159 sub_object =
10160 vm_object_allocate(
10161 (vm_map_size_t)
10162 (submap_entry->vme_end -
10163 submap_entry->vme_start));
10164 submap_entry->object.vm_object = sub_object;
10165 submap_entry->offset = 0;
10166 }
10167 local_start = local_vaddr -
10168 (cow_parent_vaddr - old_start);
10169 local_end = local_vaddr +
10170 (old_end - cow_parent_vaddr);
10171 vm_map_clip_start(map, submap_entry, local_start);
10172 vm_map_clip_end(map, submap_entry, local_end);
10173 if (submap_entry->is_sub_map) {
10174 /* unnesting was done when clipping */
10175 assert(!submap_entry->use_pmap);
10176 }
10177
10178 /* This is the COW case, lets connect */
10179 /* an entry in our space to the underlying */
10180 /* object in the submap, bypassing the */
10181 /* submap. */
10182
10183
10184 if(submap_entry->wired_count != 0 ||
10185 (sub_object->copy_strategy ==
10186 MEMORY_OBJECT_COPY_NONE)) {
10187 vm_object_lock(sub_object);
10188 vm_object_copy_slowly(sub_object,
10189 submap_entry->offset,
10190 (submap_entry->vme_end -
10191 submap_entry->vme_start),
10192 FALSE,
10193 &copy_object);
10194 copied_slowly = TRUE;
10195 } else {
10196
10197 /* set up shadow object */
10198 copy_object = sub_object;
10199 vm_object_reference(copy_object);
10200 sub_object->shadowed = TRUE;
10201 submap_entry->needs_copy = TRUE;
10202
10203 prot = submap_entry->protection & ~VM_PROT_WRITE;
10204
10205 if (override_nx(old_map, submap_entry->alias) && prot)
10206 prot |= VM_PROT_EXECUTE;
10207
10208 vm_object_pmap_protect(
10209 sub_object,
10210 submap_entry->offset,
10211 submap_entry->vme_end -
10212 submap_entry->vme_start,
10213 (submap_entry->is_shared
10214 || map->mapped_in_other_pmaps) ?
10215 PMAP_NULL : map->pmap,
10216 submap_entry->vme_start,
10217 prot);
10218 }
10219
10220 /*
10221 * Adjust the fault offset to the submap entry.
10222 */
10223 copy_offset = (local_vaddr -
10224 submap_entry->vme_start +
10225 submap_entry->offset);
10226
10227 /* This works diffently than the */
10228 /* normal submap case. We go back */
10229 /* to the parent of the cow map and*/
10230 /* clip out the target portion of */
10231 /* the sub_map, substituting the */
10232 /* new copy object, */
10233
10234 vm_map_unlock(map);
10235 local_start = old_start;
10236 local_end = old_end;
10237 map = cow_sub_map_parent;
10238 *var_map = cow_sub_map_parent;
10239 vaddr = cow_parent_vaddr;
10240 cow_sub_map_parent = NULL;
10241
10242 if(!vm_map_lookup_entry(map,
10243 vaddr, &entry)) {
10244 vm_object_deallocate(
10245 copy_object);
10246 vm_map_lock_write_to_read(map);
10247 return KERN_INVALID_ADDRESS;
10248 }
10249
10250 /* clip out the portion of space */
10251 /* mapped by the sub map which */
10252 /* corresponds to the underlying */
10253 /* object */
10254
10255 /*
10256 * Clip (and unnest) the smallest nested chunk
10257 * possible around the faulting address...
10258 */
10259 local_start = vaddr & ~(pmap_nesting_size_min - 1);
10260 local_end = local_start + pmap_nesting_size_min;
10261 /*
10262 * ... but don't go beyond the "old_start" to "old_end"
10263 * range, to avoid spanning over another VM region
10264 * with a possibly different VM object and/or offset.
10265 */
10266 if (local_start < old_start) {
10267 local_start = old_start;
10268 }
10269 if (local_end > old_end) {
10270 local_end = old_end;
10271 }
10272 /*
10273 * Adjust copy_offset to the start of the range.
10274 */
10275 copy_offset -= (vaddr - local_start);
10276
10277 vm_map_clip_start(map, entry, local_start);
10278 vm_map_clip_end(map, entry, local_end);
10279 if (entry->is_sub_map) {
10280 /* unnesting was done when clipping */
10281 assert(!entry->use_pmap);
10282 }
10283
10284 /* substitute copy object for */
10285 /* shared map entry */
10286 vm_map_deallocate(entry->object.sub_map);
10287 assert(!entry->iokit_acct);
10288 entry->is_sub_map = FALSE;
10289 entry->use_pmap = TRUE;
10290 entry->object.vm_object = copy_object;
10291
10292 /* propagate the submap entry's protections */
10293 entry->protection |= submap_entry->protection;
10294 entry->max_protection |= submap_entry->max_protection;
10295
10296 if(copied_slowly) {
10297 entry->offset = local_start - old_start;
10298 entry->needs_copy = FALSE;
10299 entry->is_shared = FALSE;
10300 } else {
10301 entry->offset = copy_offset;
10302 entry->needs_copy = TRUE;
10303 if(entry->inheritance == VM_INHERIT_SHARE)
10304 entry->inheritance = VM_INHERIT_COPY;
10305 if (map != old_map)
10306 entry->is_shared = TRUE;
10307 }
10308 if(entry->inheritance == VM_INHERIT_SHARE)
10309 entry->inheritance = VM_INHERIT_COPY;
10310
10311 vm_map_lock_write_to_read(map);
10312 } else {
10313 if((cow_sub_map_parent)
10314 && (cow_sub_map_parent != *real_map)
10315 && (cow_sub_map_parent != map)) {
10316 vm_map_unlock(cow_sub_map_parent);
10317 }
10318 entry = submap_entry;
10319 vaddr = local_vaddr;
10320 }
10321 }
10322
10323 /*
10324 * Check whether this task is allowed to have
10325 * this page.
10326 */
10327
10328 prot = entry->protection;
10329
10330 if (override_nx(old_map, entry->alias) && prot) {
10331 /*
10332 * HACK -- if not a stack, then allow execution
10333 */
10334 prot |= VM_PROT_EXECUTE;
10335 }
10336
10337 if (mask_protections) {
10338 fault_type &= prot;
10339 if (fault_type == VM_PROT_NONE) {
10340 goto protection_failure;
10341 }
10342 }
10343 if ((fault_type & (prot)) != fault_type) {
10344 protection_failure:
10345 if (*real_map != map) {
10346 vm_map_unlock(*real_map);
10347 }
10348 *real_map = map;
10349
10350 if ((fault_type & VM_PROT_EXECUTE) && prot)
10351 log_stack_execution_failure((addr64_t)vaddr, prot);
10352
10353 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10354 return KERN_PROTECTION_FAILURE;
10355 }
10356
10357 /*
10358 * If this page is not pageable, we have to get
10359 * it for all possible accesses.
10360 */
10361
10362 *wired = (entry->wired_count != 0);
10363 if (*wired)
10364 fault_type = prot;
10365
10366 /*
10367 * If the entry was copy-on-write, we either ...
10368 */
10369
10370 if (entry->needs_copy) {
10371 /*
10372 * If we want to write the page, we may as well
10373 * handle that now since we've got the map locked.
10374 *
10375 * If we don't need to write the page, we just
10376 * demote the permissions allowed.
10377 */
10378
10379 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
10380 /*
10381 * Make a new object, and place it in the
10382 * object chain. Note that no new references
10383 * have appeared -- one just moved from the
10384 * map to the new object.
10385 */
10386
10387 if (vm_map_lock_read_to_write(map)) {
10388 vm_map_lock_read(map);
10389 goto RetryLookup;
10390 }
10391 vm_object_shadow(&entry->object.vm_object,
10392 &entry->offset,
10393 (vm_map_size_t) (entry->vme_end -
10394 entry->vme_start));
10395
10396 entry->object.vm_object->shadowed = TRUE;
10397 entry->needs_copy = FALSE;
10398 vm_map_lock_write_to_read(map);
10399 }
10400 else {
10401 /*
10402 * We're attempting to read a copy-on-write
10403 * page -- don't allow writes.
10404 */
10405
10406 prot &= (~VM_PROT_WRITE);
10407 }
10408 }
10409
10410 /*
10411 * Create an object if necessary.
10412 */
10413 if (entry->object.vm_object == VM_OBJECT_NULL) {
10414
10415 if (vm_map_lock_read_to_write(map)) {
10416 vm_map_lock_read(map);
10417 goto RetryLookup;
10418 }
10419
10420 entry->object.vm_object = vm_object_allocate(
10421 (vm_map_size_t)(entry->vme_end - entry->vme_start));
10422 entry->offset = 0;
10423 vm_map_lock_write_to_read(map);
10424 }
10425
10426 /*
10427 * Return the object/offset from this entry. If the entry
10428 * was copy-on-write or empty, it has been fixed up. Also
10429 * return the protection.
10430 */
10431
10432 *offset = (vaddr - entry->vme_start) + entry->offset;
10433 *object = entry->object.vm_object;
10434 *out_prot = prot;
10435
10436 if (fault_info) {
10437 fault_info->interruptible = THREAD_UNINT; /* for now... */
10438 /* ... the caller will change "interruptible" if needed */
10439 fault_info->cluster_size = 0;
10440 fault_info->user_tag = entry->alias;
10441 fault_info->pmap_options = 0;
10442 if (entry->iokit_acct ||
10443 (!entry->is_sub_map && !entry->use_pmap)) {
10444 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10445 }
10446 fault_info->behavior = entry->behavior;
10447 fault_info->lo_offset = entry->offset;
10448 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
10449 fault_info->no_cache = entry->no_cache;
10450 fault_info->stealth = FALSE;
10451 fault_info->io_sync = FALSE;
10452 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
10453 fault_info->mark_zf_absent = FALSE;
10454 fault_info->batch_pmap_op = FALSE;
10455 }
10456
10457 /*
10458 * Lock the object to prevent it from disappearing
10459 */
10460 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
10461 vm_object_lock(*object);
10462 else
10463 vm_object_lock_shared(*object);
10464
10465 /*
10466 * Save the version number
10467 */
10468
10469 out_version->main_timestamp = map->timestamp;
10470
10471 return KERN_SUCCESS;
10472 }
10473
10474
10475 /*
10476 * vm_map_verify:
10477 *
10478 * Verifies that the map in question has not changed
10479 * since the given version. If successful, the map
10480 * will not change until vm_map_verify_done() is called.
10481 */
10482 boolean_t
10483 vm_map_verify(
10484 register vm_map_t map,
10485 register vm_map_version_t *version) /* REF */
10486 {
10487 boolean_t result;
10488
10489 vm_map_lock_read(map);
10490 result = (map->timestamp == version->main_timestamp);
10491
10492 if (!result)
10493 vm_map_unlock_read(map);
10494
10495 return(result);
10496 }
10497
10498 /*
10499 * vm_map_verify_done:
10500 *
10501 * Releases locks acquired by a vm_map_verify.
10502 *
10503 * This is now a macro in vm/vm_map.h. It does a
10504 * vm_map_unlock_read on the map.
10505 */
10506
10507
10508 /*
10509 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
10510 * Goes away after regular vm_region_recurse function migrates to
10511 * 64 bits
10512 * vm_region_recurse: A form of vm_region which follows the
10513 * submaps in a target map
10514 *
10515 */
10516
10517 kern_return_t
10518 vm_map_region_recurse_64(
10519 vm_map_t map,
10520 vm_map_offset_t *address, /* IN/OUT */
10521 vm_map_size_t *size, /* OUT */
10522 natural_t *nesting_depth, /* IN/OUT */
10523 vm_region_submap_info_64_t submap_info, /* IN/OUT */
10524 mach_msg_type_number_t *count) /* IN/OUT */
10525 {
10526 mach_msg_type_number_t original_count;
10527 vm_region_extended_info_data_t extended;
10528 vm_map_entry_t tmp_entry;
10529 vm_map_offset_t user_address;
10530 unsigned int user_max_depth;
10531
10532 /*
10533 * "curr_entry" is the VM map entry preceding or including the
10534 * address we're looking for.
10535 * "curr_map" is the map or sub-map containing "curr_entry".
10536 * "curr_address" is the equivalent of the top map's "user_address"
10537 * in the current map.
10538 * "curr_offset" is the cumulated offset of "curr_map" in the
10539 * target task's address space.
10540 * "curr_depth" is the depth of "curr_map" in the chain of
10541 * sub-maps.
10542 *
10543 * "curr_max_below" and "curr_max_above" limit the range (around
10544 * "curr_address") we should take into account in the current (sub)map.
10545 * They limit the range to what's visible through the map entries
10546 * we've traversed from the top map to the current map.
10547
10548 */
10549 vm_map_entry_t curr_entry;
10550 vm_map_address_t curr_address;
10551 vm_map_offset_t curr_offset;
10552 vm_map_t curr_map;
10553 unsigned int curr_depth;
10554 vm_map_offset_t curr_max_below, curr_max_above;
10555 vm_map_offset_t curr_skip;
10556
10557 /*
10558 * "next_" is the same as "curr_" but for the VM region immediately
10559 * after the address we're looking for. We need to keep track of this
10560 * too because we want to return info about that region if the
10561 * address we're looking for is not mapped.
10562 */
10563 vm_map_entry_t next_entry;
10564 vm_map_offset_t next_offset;
10565 vm_map_offset_t next_address;
10566 vm_map_t next_map;
10567 unsigned int next_depth;
10568 vm_map_offset_t next_max_below, next_max_above;
10569 vm_map_offset_t next_skip;
10570
10571 boolean_t look_for_pages;
10572 vm_region_submap_short_info_64_t short_info;
10573
10574 if (map == VM_MAP_NULL) {
10575 /* no address space to work on */
10576 return KERN_INVALID_ARGUMENT;
10577 }
10578
10579
10580 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
10581 /*
10582 * "info" structure is not big enough and
10583 * would overflow
10584 */
10585 return KERN_INVALID_ARGUMENT;
10586 }
10587
10588 original_count = *count;
10589
10590 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
10591 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
10592 look_for_pages = FALSE;
10593 short_info = (vm_region_submap_short_info_64_t) submap_info;
10594 submap_info = NULL;
10595 } else {
10596 look_for_pages = TRUE;
10597 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
10598 short_info = NULL;
10599
10600 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10601 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
10602 }
10603 }
10604
10605 user_address = *address;
10606 user_max_depth = *nesting_depth;
10607
10608 curr_entry = NULL;
10609 curr_map = map;
10610 curr_address = user_address;
10611 curr_offset = 0;
10612 curr_skip = 0;
10613 curr_depth = 0;
10614 curr_max_above = ((vm_map_offset_t) -1) - curr_address;
10615 curr_max_below = curr_address;
10616
10617 next_entry = NULL;
10618 next_map = NULL;
10619 next_address = 0;
10620 next_offset = 0;
10621 next_skip = 0;
10622 next_depth = 0;
10623 next_max_above = (vm_map_offset_t) -1;
10624 next_max_below = (vm_map_offset_t) -1;
10625
10626 if (not_in_kdp) {
10627 vm_map_lock_read(curr_map);
10628 }
10629
10630 for (;;) {
10631 if (vm_map_lookup_entry(curr_map,
10632 curr_address,
10633 &tmp_entry)) {
10634 /* tmp_entry contains the address we're looking for */
10635 curr_entry = tmp_entry;
10636 } else {
10637 vm_map_offset_t skip;
10638 /*
10639 * The address is not mapped. "tmp_entry" is the
10640 * map entry preceding the address. We want the next
10641 * one, if it exists.
10642 */
10643 curr_entry = tmp_entry->vme_next;
10644
10645 if (curr_entry == vm_map_to_entry(curr_map) ||
10646 (curr_entry->vme_start >=
10647 curr_address + curr_max_above)) {
10648 /* no next entry at this level: stop looking */
10649 if (not_in_kdp) {
10650 vm_map_unlock_read(curr_map);
10651 }
10652 curr_entry = NULL;
10653 curr_map = NULL;
10654 curr_offset = 0;
10655 curr_depth = 0;
10656 curr_max_above = 0;
10657 curr_max_below = 0;
10658 break;
10659 }
10660
10661 /* adjust current address and offset */
10662 skip = curr_entry->vme_start - curr_address;
10663 curr_address = curr_entry->vme_start;
10664 curr_skip = skip;
10665 curr_offset += skip;
10666 curr_max_above -= skip;
10667 curr_max_below = 0;
10668 }
10669
10670 /*
10671 * Is the next entry at this level closer to the address (or
10672 * deeper in the submap chain) than the one we had
10673 * so far ?
10674 */
10675 tmp_entry = curr_entry->vme_next;
10676 if (tmp_entry == vm_map_to_entry(curr_map)) {
10677 /* no next entry at this level */
10678 } else if (tmp_entry->vme_start >=
10679 curr_address + curr_max_above) {
10680 /*
10681 * tmp_entry is beyond the scope of what we mapped of
10682 * this submap in the upper level: ignore it.
10683 */
10684 } else if ((next_entry == NULL) ||
10685 (tmp_entry->vme_start + curr_offset <=
10686 next_entry->vme_start + next_offset)) {
10687 /*
10688 * We didn't have a "next_entry" or this one is
10689 * closer to the address we're looking for:
10690 * use this "tmp_entry" as the new "next_entry".
10691 */
10692 if (next_entry != NULL) {
10693 /* unlock the last "next_map" */
10694 if (next_map != curr_map && not_in_kdp) {
10695 vm_map_unlock_read(next_map);
10696 }
10697 }
10698 next_entry = tmp_entry;
10699 next_map = curr_map;
10700 next_depth = curr_depth;
10701 next_address = next_entry->vme_start;
10702 next_skip = curr_skip;
10703 next_offset = curr_offset;
10704 next_offset += (next_address - curr_address);
10705 next_max_above = MIN(next_max_above, curr_max_above);
10706 next_max_above = MIN(next_max_above,
10707 next_entry->vme_end - next_address);
10708 next_max_below = MIN(next_max_below, curr_max_below);
10709 next_max_below = MIN(next_max_below,
10710 next_address - next_entry->vme_start);
10711 }
10712
10713 /*
10714 * "curr_max_{above,below}" allow us to keep track of the
10715 * portion of the submap that is actually mapped at this level:
10716 * the rest of that submap is irrelevant to us, since it's not
10717 * mapped here.
10718 * The relevant portion of the map starts at
10719 * "curr_entry->offset" up to the size of "curr_entry".
10720 */
10721 curr_max_above = MIN(curr_max_above,
10722 curr_entry->vme_end - curr_address);
10723 curr_max_below = MIN(curr_max_below,
10724 curr_address - curr_entry->vme_start);
10725
10726 if (!curr_entry->is_sub_map ||
10727 curr_depth >= user_max_depth) {
10728 /*
10729 * We hit a leaf map or we reached the maximum depth
10730 * we could, so stop looking. Keep the current map
10731 * locked.
10732 */
10733 break;
10734 }
10735
10736 /*
10737 * Get down to the next submap level.
10738 */
10739
10740 /*
10741 * Lock the next level and unlock the current level,
10742 * unless we need to keep it locked to access the "next_entry"
10743 * later.
10744 */
10745 if (not_in_kdp) {
10746 vm_map_lock_read(curr_entry->object.sub_map);
10747 }
10748 if (curr_map == next_map) {
10749 /* keep "next_map" locked in case we need it */
10750 } else {
10751 /* release this map */
10752 if (not_in_kdp)
10753 vm_map_unlock_read(curr_map);
10754 }
10755
10756 /*
10757 * Adjust the offset. "curr_entry" maps the submap
10758 * at relative address "curr_entry->vme_start" in the
10759 * curr_map but skips the first "curr_entry->offset"
10760 * bytes of the submap.
10761 * "curr_offset" always represents the offset of a virtual
10762 * address in the curr_map relative to the absolute address
10763 * space (i.e. the top-level VM map).
10764 */
10765 curr_offset +=
10766 (curr_entry->offset - curr_entry->vme_start);
10767 curr_address = user_address + curr_offset;
10768 /* switch to the submap */
10769 curr_map = curr_entry->object.sub_map;
10770 curr_depth++;
10771 curr_entry = NULL;
10772 }
10773
10774 if (curr_entry == NULL) {
10775 /* no VM region contains the address... */
10776 if (next_entry == NULL) {
10777 /* ... and no VM region follows it either */
10778 return KERN_INVALID_ADDRESS;
10779 }
10780 /* ... gather info about the next VM region */
10781 curr_entry = next_entry;
10782 curr_map = next_map; /* still locked ... */
10783 curr_address = next_address;
10784 curr_skip = next_skip;
10785 curr_offset = next_offset;
10786 curr_depth = next_depth;
10787 curr_max_above = next_max_above;
10788 curr_max_below = next_max_below;
10789 if (curr_map == map) {
10790 user_address = curr_address;
10791 }
10792 } else {
10793 /* we won't need "next_entry" after all */
10794 if (next_entry != NULL) {
10795 /* release "next_map" */
10796 if (next_map != curr_map && not_in_kdp) {
10797 vm_map_unlock_read(next_map);
10798 }
10799 }
10800 }
10801 next_entry = NULL;
10802 next_map = NULL;
10803 next_offset = 0;
10804 next_skip = 0;
10805 next_depth = 0;
10806 next_max_below = -1;
10807 next_max_above = -1;
10808
10809 *nesting_depth = curr_depth;
10810 *size = curr_max_above + curr_max_below;
10811 *address = user_address + curr_skip - curr_max_below;
10812
10813 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
10814 // so probably should be a real 32b ID vs. ptr.
10815 // Current users just check for equality
10816 #define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10817
10818 if (look_for_pages) {
10819 submap_info->user_tag = curr_entry->alias;
10820 submap_info->offset = curr_entry->offset;
10821 submap_info->protection = curr_entry->protection;
10822 submap_info->inheritance = curr_entry->inheritance;
10823 submap_info->max_protection = curr_entry->max_protection;
10824 submap_info->behavior = curr_entry->behavior;
10825 submap_info->user_wired_count = curr_entry->user_wired_count;
10826 submap_info->is_submap = curr_entry->is_sub_map;
10827 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10828 } else {
10829 short_info->user_tag = curr_entry->alias;
10830 short_info->offset = curr_entry->offset;
10831 short_info->protection = curr_entry->protection;
10832 short_info->inheritance = curr_entry->inheritance;
10833 short_info->max_protection = curr_entry->max_protection;
10834 short_info->behavior = curr_entry->behavior;
10835 short_info->user_wired_count = curr_entry->user_wired_count;
10836 short_info->is_submap = curr_entry->is_sub_map;
10837 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10838 }
10839
10840 extended.pages_resident = 0;
10841 extended.pages_swapped_out = 0;
10842 extended.pages_shared_now_private = 0;
10843 extended.pages_dirtied = 0;
10844 extended.pages_reusable = 0;
10845 extended.external_pager = 0;
10846 extended.shadow_depth = 0;
10847
10848 if (not_in_kdp) {
10849 if (!curr_entry->is_sub_map) {
10850 vm_map_offset_t range_start, range_end;
10851 range_start = MAX((curr_address - curr_max_below),
10852 curr_entry->vme_start);
10853 range_end = MIN((curr_address + curr_max_above),
10854 curr_entry->vme_end);
10855 vm_map_region_walk(curr_map,
10856 range_start,
10857 curr_entry,
10858 (curr_entry->offset +
10859 (range_start -
10860 curr_entry->vme_start)),
10861 range_end - range_start,
10862 &extended,
10863 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
10864 if (extended.external_pager &&
10865 extended.ref_count == 2 &&
10866 extended.share_mode == SM_SHARED) {
10867 extended.share_mode = SM_PRIVATE;
10868 }
10869 } else {
10870 if (curr_entry->use_pmap) {
10871 extended.share_mode = SM_TRUESHARED;
10872 } else {
10873 extended.share_mode = SM_PRIVATE;
10874 }
10875 extended.ref_count =
10876 curr_entry->object.sub_map->ref_count;
10877 }
10878 }
10879
10880 if (look_for_pages) {
10881 submap_info->pages_resident = extended.pages_resident;
10882 submap_info->pages_swapped_out = extended.pages_swapped_out;
10883 submap_info->pages_shared_now_private =
10884 extended.pages_shared_now_private;
10885 submap_info->pages_dirtied = extended.pages_dirtied;
10886 submap_info->external_pager = extended.external_pager;
10887 submap_info->shadow_depth = extended.shadow_depth;
10888 submap_info->share_mode = extended.share_mode;
10889 submap_info->ref_count = extended.ref_count;
10890
10891 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10892 submap_info->pages_reusable = extended.pages_reusable;
10893 }
10894 } else {
10895 short_info->external_pager = extended.external_pager;
10896 short_info->shadow_depth = extended.shadow_depth;
10897 short_info->share_mode = extended.share_mode;
10898 short_info->ref_count = extended.ref_count;
10899 }
10900
10901 if (not_in_kdp) {
10902 vm_map_unlock_read(curr_map);
10903 }
10904
10905 return KERN_SUCCESS;
10906 }
10907
10908 /*
10909 * vm_region:
10910 *
10911 * User call to obtain information about a region in
10912 * a task's address map. Currently, only one flavor is
10913 * supported.
10914 *
10915 * XXX The reserved and behavior fields cannot be filled
10916 * in until the vm merge from the IK is completed, and
10917 * vm_reserve is implemented.
10918 */
10919
10920 kern_return_t
10921 vm_map_region(
10922 vm_map_t map,
10923 vm_map_offset_t *address, /* IN/OUT */
10924 vm_map_size_t *size, /* OUT */
10925 vm_region_flavor_t flavor, /* IN */
10926 vm_region_info_t info, /* OUT */
10927 mach_msg_type_number_t *count, /* IN/OUT */
10928 mach_port_t *object_name) /* OUT */
10929 {
10930 vm_map_entry_t tmp_entry;
10931 vm_map_entry_t entry;
10932 vm_map_offset_t start;
10933
10934 if (map == VM_MAP_NULL)
10935 return(KERN_INVALID_ARGUMENT);
10936
10937 switch (flavor) {
10938
10939 case VM_REGION_BASIC_INFO:
10940 /* legacy for old 32-bit objects info */
10941 {
10942 vm_region_basic_info_t basic;
10943
10944 if (*count < VM_REGION_BASIC_INFO_COUNT)
10945 return(KERN_INVALID_ARGUMENT);
10946
10947 basic = (vm_region_basic_info_t) info;
10948 *count = VM_REGION_BASIC_INFO_COUNT;
10949
10950 vm_map_lock_read(map);
10951
10952 start = *address;
10953 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10954 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10955 vm_map_unlock_read(map);
10956 return(KERN_INVALID_ADDRESS);
10957 }
10958 } else {
10959 entry = tmp_entry;
10960 }
10961
10962 start = entry->vme_start;
10963
10964 basic->offset = (uint32_t)entry->offset;
10965 basic->protection = entry->protection;
10966 basic->inheritance = entry->inheritance;
10967 basic->max_protection = entry->max_protection;
10968 basic->behavior = entry->behavior;
10969 basic->user_wired_count = entry->user_wired_count;
10970 basic->reserved = entry->is_sub_map;
10971 *address = start;
10972 *size = (entry->vme_end - start);
10973
10974 if (object_name) *object_name = IP_NULL;
10975 if (entry->is_sub_map) {
10976 basic->shared = FALSE;
10977 } else {
10978 basic->shared = entry->is_shared;
10979 }
10980
10981 vm_map_unlock_read(map);
10982 return(KERN_SUCCESS);
10983 }
10984
10985 case VM_REGION_BASIC_INFO_64:
10986 {
10987 vm_region_basic_info_64_t basic;
10988
10989 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10990 return(KERN_INVALID_ARGUMENT);
10991
10992 basic = (vm_region_basic_info_64_t) info;
10993 *count = VM_REGION_BASIC_INFO_COUNT_64;
10994
10995 vm_map_lock_read(map);
10996
10997 start = *address;
10998 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10999 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11000 vm_map_unlock_read(map);
11001 return(KERN_INVALID_ADDRESS);
11002 }
11003 } else {
11004 entry = tmp_entry;
11005 }
11006
11007 start = entry->vme_start;
11008
11009 basic->offset = entry->offset;
11010 basic->protection = entry->protection;
11011 basic->inheritance = entry->inheritance;
11012 basic->max_protection = entry->max_protection;
11013 basic->behavior = entry->behavior;
11014 basic->user_wired_count = entry->user_wired_count;
11015 basic->reserved = entry->is_sub_map;
11016 *address = start;
11017 *size = (entry->vme_end - start);
11018
11019 if (object_name) *object_name = IP_NULL;
11020 if (entry->is_sub_map) {
11021 basic->shared = FALSE;
11022 } else {
11023 basic->shared = entry->is_shared;
11024 }
11025
11026 vm_map_unlock_read(map);
11027 return(KERN_SUCCESS);
11028 }
11029 case VM_REGION_EXTENDED_INFO:
11030 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11031 return(KERN_INVALID_ARGUMENT);
11032 /*fallthru*/
11033 case VM_REGION_EXTENDED_INFO__legacy:
11034 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11035 return KERN_INVALID_ARGUMENT;
11036
11037 {
11038 vm_region_extended_info_t extended;
11039 mach_msg_type_number_t original_count;
11040
11041 extended = (vm_region_extended_info_t) info;
11042
11043 vm_map_lock_read(map);
11044
11045 start = *address;
11046 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11047 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11048 vm_map_unlock_read(map);
11049 return(KERN_INVALID_ADDRESS);
11050 }
11051 } else {
11052 entry = tmp_entry;
11053 }
11054 start = entry->vme_start;
11055
11056 extended->protection = entry->protection;
11057 extended->user_tag = entry->alias;
11058 extended->pages_resident = 0;
11059 extended->pages_swapped_out = 0;
11060 extended->pages_shared_now_private = 0;
11061 extended->pages_dirtied = 0;
11062 extended->external_pager = 0;
11063 extended->shadow_depth = 0;
11064
11065 original_count = *count;
11066 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11067 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11068 } else {
11069 extended->pages_reusable = 0;
11070 *count = VM_REGION_EXTENDED_INFO_COUNT;
11071 }
11072
11073 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
11074
11075 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11076 extended->share_mode = SM_PRIVATE;
11077
11078 if (object_name)
11079 *object_name = IP_NULL;
11080 *address = start;
11081 *size = (entry->vme_end - start);
11082
11083 vm_map_unlock_read(map);
11084 return(KERN_SUCCESS);
11085 }
11086 case VM_REGION_TOP_INFO:
11087 {
11088 vm_region_top_info_t top;
11089
11090 if (*count < VM_REGION_TOP_INFO_COUNT)
11091 return(KERN_INVALID_ARGUMENT);
11092
11093 top = (vm_region_top_info_t) info;
11094 *count = VM_REGION_TOP_INFO_COUNT;
11095
11096 vm_map_lock_read(map);
11097
11098 start = *address;
11099 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11100 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11101 vm_map_unlock_read(map);
11102 return(KERN_INVALID_ADDRESS);
11103 }
11104 } else {
11105 entry = tmp_entry;
11106
11107 }
11108 start = entry->vme_start;
11109
11110 top->private_pages_resident = 0;
11111 top->shared_pages_resident = 0;
11112
11113 vm_map_region_top_walk(entry, top);
11114
11115 if (object_name)
11116 *object_name = IP_NULL;
11117 *address = start;
11118 *size = (entry->vme_end - start);
11119
11120 vm_map_unlock_read(map);
11121 return(KERN_SUCCESS);
11122 }
11123 default:
11124 return(KERN_INVALID_ARGUMENT);
11125 }
11126 }
11127
11128 #define OBJ_RESIDENT_COUNT(obj, entry_size) \
11129 MIN((entry_size), \
11130 ((obj)->all_reusable ? \
11131 (obj)->wired_page_count : \
11132 (obj)->resident_page_count - (obj)->reusable_page_count))
11133
11134 void
11135 vm_map_region_top_walk(
11136 vm_map_entry_t entry,
11137 vm_region_top_info_t top)
11138 {
11139
11140 if (entry->object.vm_object == 0 || entry->is_sub_map) {
11141 top->share_mode = SM_EMPTY;
11142 top->ref_count = 0;
11143 top->obj_id = 0;
11144 return;
11145 }
11146
11147 {
11148 struct vm_object *obj, *tmp_obj;
11149 int ref_count;
11150 uint32_t entry_size;
11151
11152 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11153
11154 obj = entry->object.vm_object;
11155
11156 vm_object_lock(obj);
11157
11158 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11159 ref_count--;
11160
11161 assert(obj->reusable_page_count <= obj->resident_page_count);
11162 if (obj->shadow) {
11163 if (ref_count == 1)
11164 top->private_pages_resident =
11165 OBJ_RESIDENT_COUNT(obj, entry_size);
11166 else
11167 top->shared_pages_resident =
11168 OBJ_RESIDENT_COUNT(obj, entry_size);
11169 top->ref_count = ref_count;
11170 top->share_mode = SM_COW;
11171
11172 while ((tmp_obj = obj->shadow)) {
11173 vm_object_lock(tmp_obj);
11174 vm_object_unlock(obj);
11175 obj = tmp_obj;
11176
11177 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11178 ref_count--;
11179
11180 assert(obj->reusable_page_count <= obj->resident_page_count);
11181 top->shared_pages_resident +=
11182 OBJ_RESIDENT_COUNT(obj, entry_size);
11183 top->ref_count += ref_count - 1;
11184 }
11185 } else {
11186 if (entry->superpage_size) {
11187 top->share_mode = SM_LARGE_PAGE;
11188 top->shared_pages_resident = 0;
11189 top->private_pages_resident = entry_size;
11190 } else if (entry->needs_copy) {
11191 top->share_mode = SM_COW;
11192 top->shared_pages_resident =
11193 OBJ_RESIDENT_COUNT(obj, entry_size);
11194 } else {
11195 if (ref_count == 1 ||
11196 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11197 top->share_mode = SM_PRIVATE;
11198 top->private_pages_resident =
11199 OBJ_RESIDENT_COUNT(obj,
11200 entry_size);
11201 } else {
11202 top->share_mode = SM_SHARED;
11203 top->shared_pages_resident =
11204 OBJ_RESIDENT_COUNT(obj,
11205 entry_size);
11206 }
11207 }
11208 top->ref_count = ref_count;
11209 }
11210 /* XXX K64: obj_id will be truncated */
11211 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11212
11213 vm_object_unlock(obj);
11214 }
11215 }
11216
11217 void
11218 vm_map_region_walk(
11219 vm_map_t map,
11220 vm_map_offset_t va,
11221 vm_map_entry_t entry,
11222 vm_object_offset_t offset,
11223 vm_object_size_t range,
11224 vm_region_extended_info_t extended,
11225 boolean_t look_for_pages,
11226 mach_msg_type_number_t count)
11227 {
11228 register struct vm_object *obj, *tmp_obj;
11229 register vm_map_offset_t last_offset;
11230 register int i;
11231 register int ref_count;
11232 struct vm_object *shadow_object;
11233 int shadow_depth;
11234
11235 if ((entry->object.vm_object == 0) ||
11236 (entry->is_sub_map) ||
11237 (entry->object.vm_object->phys_contiguous &&
11238 !entry->superpage_size)) {
11239 extended->share_mode = SM_EMPTY;
11240 extended->ref_count = 0;
11241 return;
11242 }
11243
11244 if (entry->superpage_size) {
11245 extended->shadow_depth = 0;
11246 extended->share_mode = SM_LARGE_PAGE;
11247 extended->ref_count = 1;
11248 extended->external_pager = 0;
11249 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11250 extended->shadow_depth = 0;
11251 return;
11252 }
11253
11254 {
11255 obj = entry->object.vm_object;
11256
11257 vm_object_lock(obj);
11258
11259 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11260 ref_count--;
11261
11262 if (look_for_pages) {
11263 for (last_offset = offset + range;
11264 offset < last_offset;
11265 offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11266 vm_map_region_look_for_page(map, va, obj,
11267 offset, ref_count,
11268 0, extended, count);
11269 }
11270 } else {
11271 shadow_object = obj->shadow;
11272 shadow_depth = 0;
11273
11274 if ( !(obj->pager_trusted) && !(obj->internal))
11275 extended->external_pager = 1;
11276
11277 if (shadow_object != VM_OBJECT_NULL) {
11278 vm_object_lock(shadow_object);
11279 for (;
11280 shadow_object != VM_OBJECT_NULL;
11281 shadow_depth++) {
11282 vm_object_t next_shadow;
11283
11284 if ( !(shadow_object->pager_trusted) &&
11285 !(shadow_object->internal))
11286 extended->external_pager = 1;
11287
11288 next_shadow = shadow_object->shadow;
11289 if (next_shadow) {
11290 vm_object_lock(next_shadow);
11291 }
11292 vm_object_unlock(shadow_object);
11293 shadow_object = next_shadow;
11294 }
11295 }
11296 extended->shadow_depth = shadow_depth;
11297 }
11298
11299 if (extended->shadow_depth || entry->needs_copy)
11300 extended->share_mode = SM_COW;
11301 else {
11302 if (ref_count == 1)
11303 extended->share_mode = SM_PRIVATE;
11304 else {
11305 if (obj->true_share)
11306 extended->share_mode = SM_TRUESHARED;
11307 else
11308 extended->share_mode = SM_SHARED;
11309 }
11310 }
11311 extended->ref_count = ref_count - extended->shadow_depth;
11312
11313 for (i = 0; i < extended->shadow_depth; i++) {
11314 if ((tmp_obj = obj->shadow) == 0)
11315 break;
11316 vm_object_lock(tmp_obj);
11317 vm_object_unlock(obj);
11318
11319 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11320 ref_count--;
11321
11322 extended->ref_count += ref_count;
11323 obj = tmp_obj;
11324 }
11325 vm_object_unlock(obj);
11326
11327 if (extended->share_mode == SM_SHARED) {
11328 register vm_map_entry_t cur;
11329 register vm_map_entry_t last;
11330 int my_refs;
11331
11332 obj = entry->object.vm_object;
11333 last = vm_map_to_entry(map);
11334 my_refs = 0;
11335
11336 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11337 ref_count--;
11338 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11339 my_refs += vm_map_region_count_obj_refs(cur, obj);
11340
11341 if (my_refs == ref_count)
11342 extended->share_mode = SM_PRIVATE_ALIASED;
11343 else if (my_refs > 1)
11344 extended->share_mode = SM_SHARED_ALIASED;
11345 }
11346 }
11347 }
11348
11349
11350 /* object is locked on entry and locked on return */
11351
11352
11353 static void
11354 vm_map_region_look_for_page(
11355 __unused vm_map_t map,
11356 __unused vm_map_offset_t va,
11357 vm_object_t object,
11358 vm_object_offset_t offset,
11359 int max_refcnt,
11360 int depth,
11361 vm_region_extended_info_t extended,
11362 mach_msg_type_number_t count)
11363 {
11364 register vm_page_t p;
11365 register vm_object_t shadow;
11366 register int ref_count;
11367 vm_object_t caller_object;
11368 kern_return_t kr;
11369 shadow = object->shadow;
11370 caller_object = object;
11371
11372
11373 while (TRUE) {
11374
11375 if ( !(object->pager_trusted) && !(object->internal))
11376 extended->external_pager = 1;
11377
11378 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
11379 if (shadow && (max_refcnt == 1))
11380 extended->pages_shared_now_private++;
11381
11382 if (!p->fictitious &&
11383 (p->dirty || pmap_is_modified(p->phys_page)))
11384 extended->pages_dirtied++;
11385 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
11386 if (p->reusable || p->object->all_reusable) {
11387 extended->pages_reusable++;
11388 }
11389 }
11390
11391 extended->pages_resident++;
11392
11393 if(object != caller_object)
11394 vm_object_unlock(object);
11395
11396 return;
11397 }
11398 #if MACH_PAGEMAP
11399 if (object->existence_map) {
11400 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
11401
11402 extended->pages_swapped_out++;
11403
11404 if(object != caller_object)
11405 vm_object_unlock(object);
11406
11407 return;
11408 }
11409 } else
11410 #endif /* MACH_PAGEMAP */
11411 if (object->internal &&
11412 object->alive &&
11413 !object->terminating &&
11414 object->pager_ready) {
11415
11416 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
11417 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
11418 offset)
11419 == VM_EXTERNAL_STATE_EXISTS) {
11420 /* the pager has that page */
11421 extended->pages_swapped_out++;
11422 if (object != caller_object)
11423 vm_object_unlock(object);
11424 return;
11425 }
11426 } else {
11427 memory_object_t pager;
11428
11429 vm_object_paging_begin(object);
11430 pager = object->pager;
11431 vm_object_unlock(object);
11432
11433 kr = memory_object_data_request(
11434 pager,
11435 offset + object->paging_offset,
11436 0, /* just poke the pager */
11437 VM_PROT_READ,
11438 NULL);
11439
11440 vm_object_lock(object);
11441 vm_object_paging_end(object);
11442
11443 if (kr == KERN_SUCCESS) {
11444 /* the pager has that page */
11445 extended->pages_swapped_out++;
11446 if (object != caller_object)
11447 vm_object_unlock(object);
11448 return;
11449 }
11450 }
11451 }
11452
11453 if (shadow) {
11454 vm_object_lock(shadow);
11455
11456 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
11457 ref_count--;
11458
11459 if (++depth > extended->shadow_depth)
11460 extended->shadow_depth = depth;
11461
11462 if (ref_count > max_refcnt)
11463 max_refcnt = ref_count;
11464
11465 if(object != caller_object)
11466 vm_object_unlock(object);
11467
11468 offset = offset + object->vo_shadow_offset;
11469 object = shadow;
11470 shadow = object->shadow;
11471 continue;
11472 }
11473 if(object != caller_object)
11474 vm_object_unlock(object);
11475 break;
11476 }
11477 }
11478
11479 static int
11480 vm_map_region_count_obj_refs(
11481 vm_map_entry_t entry,
11482 vm_object_t object)
11483 {
11484 register int ref_count;
11485 register vm_object_t chk_obj;
11486 register vm_object_t tmp_obj;
11487
11488 if (entry->object.vm_object == 0)
11489 return(0);
11490
11491 if (entry->is_sub_map)
11492 return(0);
11493 else {
11494 ref_count = 0;
11495
11496 chk_obj = entry->object.vm_object;
11497 vm_object_lock(chk_obj);
11498
11499 while (chk_obj) {
11500 if (chk_obj == object)
11501 ref_count++;
11502 tmp_obj = chk_obj->shadow;
11503 if (tmp_obj)
11504 vm_object_lock(tmp_obj);
11505 vm_object_unlock(chk_obj);
11506
11507 chk_obj = tmp_obj;
11508 }
11509 }
11510 return(ref_count);
11511 }
11512
11513
11514 /*
11515 * Routine: vm_map_simplify
11516 *
11517 * Description:
11518 * Attempt to simplify the map representation in
11519 * the vicinity of the given starting address.
11520 * Note:
11521 * This routine is intended primarily to keep the
11522 * kernel maps more compact -- they generally don't
11523 * benefit from the "expand a map entry" technology
11524 * at allocation time because the adjacent entry
11525 * is often wired down.
11526 */
11527 void
11528 vm_map_simplify_entry(
11529 vm_map_t map,
11530 vm_map_entry_t this_entry)
11531 {
11532 vm_map_entry_t prev_entry;
11533
11534 counter(c_vm_map_simplify_entry_called++);
11535
11536 prev_entry = this_entry->vme_prev;
11537
11538 if ((this_entry != vm_map_to_entry(map)) &&
11539 (prev_entry != vm_map_to_entry(map)) &&
11540
11541 (prev_entry->vme_end == this_entry->vme_start) &&
11542
11543 (prev_entry->is_sub_map == this_entry->is_sub_map) &&
11544 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
11545 ((prev_entry->offset + (prev_entry->vme_end -
11546 prev_entry->vme_start))
11547 == this_entry->offset) &&
11548
11549 (prev_entry->behavior == this_entry->behavior) &&
11550 (prev_entry->needs_copy == this_entry->needs_copy) &&
11551 (prev_entry->protection == this_entry->protection) &&
11552 (prev_entry->max_protection == this_entry->max_protection) &&
11553 (prev_entry->inheritance == this_entry->inheritance) &&
11554 (prev_entry->use_pmap == this_entry->use_pmap) &&
11555 (prev_entry->alias == this_entry->alias) &&
11556 (prev_entry->no_cache == this_entry->no_cache) &&
11557 (prev_entry->permanent == this_entry->permanent) &&
11558 (prev_entry->map_aligned == this_entry->map_aligned) &&
11559 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
11560 (prev_entry->used_for_jit == this_entry->used_for_jit) &&
11561 /* from_reserved_zone: OK if that field doesn't match */
11562 (prev_entry->iokit_acct == this_entry->iokit_acct) &&
11563
11564 (prev_entry->wired_count == this_entry->wired_count) &&
11565 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
11566
11567 (prev_entry->in_transition == FALSE) &&
11568 (this_entry->in_transition == FALSE) &&
11569 (prev_entry->needs_wakeup == FALSE) &&
11570 (this_entry->needs_wakeup == FALSE) &&
11571 (prev_entry->is_shared == FALSE) &&
11572 (this_entry->is_shared == FALSE) &&
11573 (prev_entry->superpage_size == FALSE) &&
11574 (this_entry->superpage_size == FALSE)
11575 ) {
11576 vm_map_store_entry_unlink(map, prev_entry);
11577 assert(prev_entry->vme_start < this_entry->vme_end);
11578 if (prev_entry->map_aligned)
11579 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
11580 VM_MAP_PAGE_MASK(map)));
11581 this_entry->vme_start = prev_entry->vme_start;
11582 this_entry->offset = prev_entry->offset;
11583 if (prev_entry->is_sub_map) {
11584 vm_map_deallocate(prev_entry->object.sub_map);
11585 } else {
11586 vm_object_deallocate(prev_entry->object.vm_object);
11587 }
11588 vm_map_entry_dispose(map, prev_entry);
11589 SAVE_HINT_MAP_WRITE(map, this_entry);
11590 counter(c_vm_map_simplified++);
11591 }
11592 }
11593
11594 void
11595 vm_map_simplify(
11596 vm_map_t map,
11597 vm_map_offset_t start)
11598 {
11599 vm_map_entry_t this_entry;
11600
11601 vm_map_lock(map);
11602 if (vm_map_lookup_entry(map, start, &this_entry)) {
11603 vm_map_simplify_entry(map, this_entry);
11604 vm_map_simplify_entry(map, this_entry->vme_next);
11605 }
11606 counter(c_vm_map_simplify_called++);
11607 vm_map_unlock(map);
11608 }
11609
11610 static void
11611 vm_map_simplify_range(
11612 vm_map_t map,
11613 vm_map_offset_t start,
11614 vm_map_offset_t end)
11615 {
11616 vm_map_entry_t entry;
11617
11618 /*
11619 * The map should be locked (for "write") by the caller.
11620 */
11621
11622 if (start >= end) {
11623 /* invalid address range */
11624 return;
11625 }
11626
11627 start = vm_map_trunc_page(start,
11628 VM_MAP_PAGE_MASK(map));
11629 end = vm_map_round_page(end,
11630 VM_MAP_PAGE_MASK(map));
11631
11632 if (!vm_map_lookup_entry(map, start, &entry)) {
11633 /* "start" is not mapped and "entry" ends before "start" */
11634 if (entry == vm_map_to_entry(map)) {
11635 /* start with first entry in the map */
11636 entry = vm_map_first_entry(map);
11637 } else {
11638 /* start with next entry */
11639 entry = entry->vme_next;
11640 }
11641 }
11642
11643 while (entry != vm_map_to_entry(map) &&
11644 entry->vme_start <= end) {
11645 /* try and coalesce "entry" with its previous entry */
11646 vm_map_simplify_entry(map, entry);
11647 entry = entry->vme_next;
11648 }
11649 }
11650
11651
11652 /*
11653 * Routine: vm_map_machine_attribute
11654 * Purpose:
11655 * Provide machine-specific attributes to mappings,
11656 * such as cachability etc. for machines that provide
11657 * them. NUMA architectures and machines with big/strange
11658 * caches will use this.
11659 * Note:
11660 * Responsibilities for locking and checking are handled here,
11661 * everything else in the pmap module. If any non-volatile
11662 * information must be kept, the pmap module should handle
11663 * it itself. [This assumes that attributes do not
11664 * need to be inherited, which seems ok to me]
11665 */
11666 kern_return_t
11667 vm_map_machine_attribute(
11668 vm_map_t map,
11669 vm_map_offset_t start,
11670 vm_map_offset_t end,
11671 vm_machine_attribute_t attribute,
11672 vm_machine_attribute_val_t* value) /* IN/OUT */
11673 {
11674 kern_return_t ret;
11675 vm_map_size_t sync_size;
11676 vm_map_entry_t entry;
11677
11678 if (start < vm_map_min(map) || end > vm_map_max(map))
11679 return KERN_INVALID_ADDRESS;
11680
11681 /* Figure how much memory we need to flush (in page increments) */
11682 sync_size = end - start;
11683
11684 vm_map_lock(map);
11685
11686 if (attribute != MATTR_CACHE) {
11687 /* If we don't have to find physical addresses, we */
11688 /* don't have to do an explicit traversal here. */
11689 ret = pmap_attribute(map->pmap, start, end-start,
11690 attribute, value);
11691 vm_map_unlock(map);
11692 return ret;
11693 }
11694
11695 ret = KERN_SUCCESS; /* Assume it all worked */
11696
11697 while(sync_size) {
11698 if (vm_map_lookup_entry(map, start, &entry)) {
11699 vm_map_size_t sub_size;
11700 if((entry->vme_end - start) > sync_size) {
11701 sub_size = sync_size;
11702 sync_size = 0;
11703 } else {
11704 sub_size = entry->vme_end - start;
11705 sync_size -= sub_size;
11706 }
11707 if(entry->is_sub_map) {
11708 vm_map_offset_t sub_start;
11709 vm_map_offset_t sub_end;
11710
11711 sub_start = (start - entry->vme_start)
11712 + entry->offset;
11713 sub_end = sub_start + sub_size;
11714 vm_map_machine_attribute(
11715 entry->object.sub_map,
11716 sub_start,
11717 sub_end,
11718 attribute, value);
11719 } else {
11720 if(entry->object.vm_object) {
11721 vm_page_t m;
11722 vm_object_t object;
11723 vm_object_t base_object;
11724 vm_object_t last_object;
11725 vm_object_offset_t offset;
11726 vm_object_offset_t base_offset;
11727 vm_map_size_t range;
11728 range = sub_size;
11729 offset = (start - entry->vme_start)
11730 + entry->offset;
11731 base_offset = offset;
11732 object = entry->object.vm_object;
11733 base_object = object;
11734 last_object = NULL;
11735
11736 vm_object_lock(object);
11737
11738 while (range) {
11739 m = vm_page_lookup(
11740 object, offset);
11741
11742 if (m && !m->fictitious) {
11743 ret =
11744 pmap_attribute_cache_sync(
11745 m->phys_page,
11746 PAGE_SIZE,
11747 attribute, value);
11748
11749 } else if (object->shadow) {
11750 offset = offset + object->vo_shadow_offset;
11751 last_object = object;
11752 object = object->shadow;
11753 vm_object_lock(last_object->shadow);
11754 vm_object_unlock(last_object);
11755 continue;
11756 }
11757 range -= PAGE_SIZE;
11758
11759 if (base_object != object) {
11760 vm_object_unlock(object);
11761 vm_object_lock(base_object);
11762 object = base_object;
11763 }
11764 /* Bump to the next page */
11765 base_offset += PAGE_SIZE;
11766 offset = base_offset;
11767 }
11768 vm_object_unlock(object);
11769 }
11770 }
11771 start += sub_size;
11772 } else {
11773 vm_map_unlock(map);
11774 return KERN_FAILURE;
11775 }
11776
11777 }
11778
11779 vm_map_unlock(map);
11780
11781 return ret;
11782 }
11783
11784 /*
11785 * vm_map_behavior_set:
11786 *
11787 * Sets the paging reference behavior of the specified address
11788 * range in the target map. Paging reference behavior affects
11789 * how pagein operations resulting from faults on the map will be
11790 * clustered.
11791 */
11792 kern_return_t
11793 vm_map_behavior_set(
11794 vm_map_t map,
11795 vm_map_offset_t start,
11796 vm_map_offset_t end,
11797 vm_behavior_t new_behavior)
11798 {
11799 register vm_map_entry_t entry;
11800 vm_map_entry_t temp_entry;
11801
11802 XPR(XPR_VM_MAP,
11803 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11804 map, start, end, new_behavior, 0);
11805
11806 if (start > end ||
11807 start < vm_map_min(map) ||
11808 end > vm_map_max(map)) {
11809 return KERN_NO_SPACE;
11810 }
11811
11812 switch (new_behavior) {
11813
11814 /*
11815 * This first block of behaviors all set a persistent state on the specified
11816 * memory range. All we have to do here is to record the desired behavior
11817 * in the vm_map_entry_t's.
11818 */
11819
11820 case VM_BEHAVIOR_DEFAULT:
11821 case VM_BEHAVIOR_RANDOM:
11822 case VM_BEHAVIOR_SEQUENTIAL:
11823 case VM_BEHAVIOR_RSEQNTL:
11824 case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11825 vm_map_lock(map);
11826
11827 /*
11828 * The entire address range must be valid for the map.
11829 * Note that vm_map_range_check() does a
11830 * vm_map_lookup_entry() internally and returns the
11831 * entry containing the start of the address range if
11832 * the entire range is valid.
11833 */
11834 if (vm_map_range_check(map, start, end, &temp_entry)) {
11835 entry = temp_entry;
11836 vm_map_clip_start(map, entry, start);
11837 }
11838 else {
11839 vm_map_unlock(map);
11840 return(KERN_INVALID_ADDRESS);
11841 }
11842
11843 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11844 vm_map_clip_end(map, entry, end);
11845 if (entry->is_sub_map) {
11846 assert(!entry->use_pmap);
11847 }
11848
11849 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11850 entry->zero_wired_pages = TRUE;
11851 } else {
11852 entry->behavior = new_behavior;
11853 }
11854 entry = entry->vme_next;
11855 }
11856
11857 vm_map_unlock(map);
11858 break;
11859
11860 /*
11861 * The rest of these are different from the above in that they cause
11862 * an immediate action to take place as opposed to setting a behavior that
11863 * affects future actions.
11864 */
11865
11866 case VM_BEHAVIOR_WILLNEED:
11867 return vm_map_willneed(map, start, end);
11868
11869 case VM_BEHAVIOR_DONTNEED:
11870 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11871
11872 case VM_BEHAVIOR_FREE:
11873 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11874
11875 case VM_BEHAVIOR_REUSABLE:
11876 return vm_map_reusable_pages(map, start, end);
11877
11878 case VM_BEHAVIOR_REUSE:
11879 return vm_map_reuse_pages(map, start, end);
11880
11881 case VM_BEHAVIOR_CAN_REUSE:
11882 return vm_map_can_reuse(map, start, end);
11883
11884 default:
11885 return(KERN_INVALID_ARGUMENT);
11886 }
11887
11888 return(KERN_SUCCESS);
11889 }
11890
11891
11892 /*
11893 * Internals for madvise(MADV_WILLNEED) system call.
11894 *
11895 * The present implementation is to do a read-ahead if the mapping corresponds
11896 * to a mapped regular file. If it's an anonymous mapping, then we do nothing
11897 * and basically ignore the "advice" (which we are always free to do).
11898 */
11899
11900
11901 static kern_return_t
11902 vm_map_willneed(
11903 vm_map_t map,
11904 vm_map_offset_t start,
11905 vm_map_offset_t end
11906 )
11907 {
11908 vm_map_entry_t entry;
11909 vm_object_t object;
11910 memory_object_t pager;
11911 struct vm_object_fault_info fault_info;
11912 kern_return_t kr;
11913 vm_object_size_t len;
11914 vm_object_offset_t offset;
11915
11916 /*
11917 * Fill in static values in fault_info. Several fields get ignored by the code
11918 * we call, but we'll fill them in anyway since uninitialized fields are bad
11919 * when it comes to future backwards compatibility.
11920 */
11921
11922 fault_info.interruptible = THREAD_UNINT; /* ignored value */
11923 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
11924 fault_info.no_cache = FALSE; /* ignored value */
11925 fault_info.stealth = TRUE;
11926 fault_info.io_sync = FALSE;
11927 fault_info.cs_bypass = FALSE;
11928 fault_info.mark_zf_absent = FALSE;
11929 fault_info.batch_pmap_op = FALSE;
11930
11931 /*
11932 * The MADV_WILLNEED operation doesn't require any changes to the
11933 * vm_map_entry_t's, so the read lock is sufficient.
11934 */
11935
11936 vm_map_lock_read(map);
11937
11938 /*
11939 * The madvise semantics require that the address range be fully
11940 * allocated with no holes. Otherwise, we're required to return
11941 * an error.
11942 */
11943
11944 if (! vm_map_range_check(map, start, end, &entry)) {
11945 vm_map_unlock_read(map);
11946 return KERN_INVALID_ADDRESS;
11947 }
11948
11949 /*
11950 * Examine each vm_map_entry_t in the range.
11951 */
11952 for (; entry != vm_map_to_entry(map) && start < end; ) {
11953
11954 /*
11955 * The first time through, the start address could be anywhere
11956 * within the vm_map_entry we found. So adjust the offset to
11957 * correspond. After that, the offset will always be zero to
11958 * correspond to the beginning of the current vm_map_entry.
11959 */
11960 offset = (start - entry->vme_start) + entry->offset;
11961
11962 /*
11963 * Set the length so we don't go beyond the end of the
11964 * map_entry or beyond the end of the range we were given.
11965 * This range could span also multiple map entries all of which
11966 * map different files, so make sure we only do the right amount
11967 * of I/O for each object. Note that it's possible for there
11968 * to be multiple map entries all referring to the same object
11969 * but with different page permissions, but it's not worth
11970 * trying to optimize that case.
11971 */
11972 len = MIN(entry->vme_end - start, end - start);
11973
11974 if ((vm_size_t) len != len) {
11975 /* 32-bit overflow */
11976 len = (vm_size_t) (0 - PAGE_SIZE);
11977 }
11978 fault_info.cluster_size = (vm_size_t) len;
11979 fault_info.lo_offset = offset;
11980 fault_info.hi_offset = offset + len;
11981 fault_info.user_tag = entry->alias;
11982 fault_info.pmap_options = 0;
11983 if (entry->iokit_acct ||
11984 (!entry->is_sub_map && !entry->use_pmap)) {
11985 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11986 }
11987
11988 /*
11989 * If there's no read permission to this mapping, then just
11990 * skip it.
11991 */
11992 if ((entry->protection & VM_PROT_READ) == 0) {
11993 entry = entry->vme_next;
11994 start = entry->vme_start;
11995 continue;
11996 }
11997
11998 /*
11999 * Find the file object backing this map entry. If there is
12000 * none, then we simply ignore the "will need" advice for this
12001 * entry and go on to the next one.
12002 */
12003 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12004 entry = entry->vme_next;
12005 start = entry->vme_start;
12006 continue;
12007 }
12008
12009 /*
12010 * The data_request() could take a long time, so let's
12011 * release the map lock to avoid blocking other threads.
12012 */
12013 vm_map_unlock_read(map);
12014
12015 vm_object_paging_begin(object);
12016 pager = object->pager;
12017 vm_object_unlock(object);
12018
12019 /*
12020 * Get the data from the object asynchronously.
12021 *
12022 * Note that memory_object_data_request() places limits on the
12023 * amount of I/O it will do. Regardless of the len we
12024 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12025 * silently truncates the len to that size. This isn't
12026 * necessarily bad since madvise shouldn't really be used to
12027 * page in unlimited amounts of data. Other Unix variants
12028 * limit the willneed case as well. If this turns out to be an
12029 * issue for developers, then we can always adjust the policy
12030 * here and still be backwards compatible since this is all
12031 * just "advice".
12032 */
12033 kr = memory_object_data_request(
12034 pager,
12035 offset + object->paging_offset,
12036 0, /* ignored */
12037 VM_PROT_READ,
12038 (memory_object_fault_info_t)&fault_info);
12039
12040 vm_object_lock(object);
12041 vm_object_paging_end(object);
12042 vm_object_unlock(object);
12043
12044 /*
12045 * If we couldn't do the I/O for some reason, just give up on
12046 * the madvise. We still return success to the user since
12047 * madvise isn't supposed to fail when the advice can't be
12048 * taken.
12049 */
12050 if (kr != KERN_SUCCESS) {
12051 return KERN_SUCCESS;
12052 }
12053
12054 start += len;
12055 if (start >= end) {
12056 /* done */
12057 return KERN_SUCCESS;
12058 }
12059
12060 /* look up next entry */
12061 vm_map_lock_read(map);
12062 if (! vm_map_lookup_entry(map, start, &entry)) {
12063 /*
12064 * There's a new hole in the address range.
12065 */
12066 vm_map_unlock_read(map);
12067 return KERN_INVALID_ADDRESS;
12068 }
12069 }
12070
12071 vm_map_unlock_read(map);
12072 return KERN_SUCCESS;
12073 }
12074
12075 static boolean_t
12076 vm_map_entry_is_reusable(
12077 vm_map_entry_t entry)
12078 {
12079 vm_object_t object;
12080
12081 switch (entry->alias) {
12082 case VM_MEMORY_MALLOC:
12083 case VM_MEMORY_MALLOC_SMALL:
12084 case VM_MEMORY_MALLOC_LARGE:
12085 case VM_MEMORY_REALLOC:
12086 case VM_MEMORY_MALLOC_TINY:
12087 case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12088 case VM_MEMORY_MALLOC_LARGE_REUSED:
12089 /*
12090 * This is a malloc() memory region: check if it's still
12091 * in its original state and can be re-used for more
12092 * malloc() allocations.
12093 */
12094 break;
12095 default:
12096 /*
12097 * Not a malloc() memory region: let the caller decide if
12098 * it's re-usable.
12099 */
12100 return TRUE;
12101 }
12102
12103 if (entry->is_shared ||
12104 entry->is_sub_map ||
12105 entry->in_transition ||
12106 entry->protection != VM_PROT_DEFAULT ||
12107 entry->max_protection != VM_PROT_ALL ||
12108 entry->inheritance != VM_INHERIT_DEFAULT ||
12109 entry->no_cache ||
12110 entry->permanent ||
12111 entry->superpage_size != FALSE ||
12112 entry->zero_wired_pages ||
12113 entry->wired_count != 0 ||
12114 entry->user_wired_count != 0) {
12115 return FALSE;
12116 }
12117
12118 object = entry->object.vm_object;
12119 if (object == VM_OBJECT_NULL) {
12120 return TRUE;
12121 }
12122 if (
12123 #if 0
12124 /*
12125 * Let's proceed even if the VM object is potentially
12126 * shared.
12127 * We check for this later when processing the actual
12128 * VM pages, so the contents will be safe if shared.
12129 *
12130 * But we can still mark this memory region as "reusable" to
12131 * acknowledge that the caller did let us know that the memory
12132 * could be re-used and should not be penalized for holding
12133 * on to it. This allows its "resident size" to not include
12134 * the reusable range.
12135 */
12136 object->ref_count == 1 &&
12137 #endif
12138 object->wired_page_count == 0 &&
12139 object->copy == VM_OBJECT_NULL &&
12140 object->shadow == VM_OBJECT_NULL &&
12141 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12142 object->internal &&
12143 !object->true_share &&
12144 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12145 !object->code_signed) {
12146 return TRUE;
12147 }
12148 return FALSE;
12149
12150
12151 }
12152
12153 static kern_return_t
12154 vm_map_reuse_pages(
12155 vm_map_t map,
12156 vm_map_offset_t start,
12157 vm_map_offset_t end)
12158 {
12159 vm_map_entry_t entry;
12160 vm_object_t object;
12161 vm_object_offset_t start_offset, end_offset;
12162
12163 /*
12164 * The MADV_REUSE operation doesn't require any changes to the
12165 * vm_map_entry_t's, so the read lock is sufficient.
12166 */
12167
12168 vm_map_lock_read(map);
12169
12170 /*
12171 * The madvise semantics require that the address range be fully
12172 * allocated with no holes. Otherwise, we're required to return
12173 * an error.
12174 */
12175
12176 if (!vm_map_range_check(map, start, end, &entry)) {
12177 vm_map_unlock_read(map);
12178 vm_page_stats_reusable.reuse_pages_failure++;
12179 return KERN_INVALID_ADDRESS;
12180 }
12181
12182 /*
12183 * Examine each vm_map_entry_t in the range.
12184 */
12185 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12186 entry = entry->vme_next) {
12187 /*
12188 * Sanity check on the VM map entry.
12189 */
12190 if (! vm_map_entry_is_reusable(entry)) {
12191 vm_map_unlock_read(map);
12192 vm_page_stats_reusable.reuse_pages_failure++;
12193 return KERN_INVALID_ADDRESS;
12194 }
12195
12196 /*
12197 * The first time through, the start address could be anywhere
12198 * within the vm_map_entry we found. So adjust the offset to
12199 * correspond.
12200 */
12201 if (entry->vme_start < start) {
12202 start_offset = start - entry->vme_start;
12203 } else {
12204 start_offset = 0;
12205 }
12206 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12207 start_offset += entry->offset;
12208 end_offset += entry->offset;
12209
12210 object = entry->object.vm_object;
12211 if (object != VM_OBJECT_NULL) {
12212 vm_object_lock(object);
12213 vm_object_reuse_pages(object, start_offset, end_offset,
12214 TRUE);
12215 vm_object_unlock(object);
12216 }
12217
12218 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12219 /*
12220 * XXX
12221 * We do not hold the VM map exclusively here.
12222 * The "alias" field is not that critical, so it's
12223 * safe to update it here, as long as it is the only
12224 * one that can be modified while holding the VM map
12225 * "shared".
12226 */
12227 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
12228 }
12229 }
12230
12231 vm_map_unlock_read(map);
12232 vm_page_stats_reusable.reuse_pages_success++;
12233 return KERN_SUCCESS;
12234 }
12235
12236
12237 static kern_return_t
12238 vm_map_reusable_pages(
12239 vm_map_t map,
12240 vm_map_offset_t start,
12241 vm_map_offset_t end)
12242 {
12243 vm_map_entry_t entry;
12244 vm_object_t object;
12245 vm_object_offset_t start_offset, end_offset;
12246
12247 /*
12248 * The MADV_REUSABLE operation doesn't require any changes to the
12249 * vm_map_entry_t's, so the read lock is sufficient.
12250 */
12251
12252 vm_map_lock_read(map);
12253
12254 /*
12255 * The madvise semantics require that the address range be fully
12256 * allocated with no holes. Otherwise, we're required to return
12257 * an error.
12258 */
12259
12260 if (!vm_map_range_check(map, start, end, &entry)) {
12261 vm_map_unlock_read(map);
12262 vm_page_stats_reusable.reusable_pages_failure++;
12263 return KERN_INVALID_ADDRESS;
12264 }
12265
12266 /*
12267 * Examine each vm_map_entry_t in the range.
12268 */
12269 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12270 entry = entry->vme_next) {
12271 int kill_pages = 0;
12272
12273 /*
12274 * Sanity check on the VM map entry.
12275 */
12276 if (! vm_map_entry_is_reusable(entry)) {
12277 vm_map_unlock_read(map);
12278 vm_page_stats_reusable.reusable_pages_failure++;
12279 return KERN_INVALID_ADDRESS;
12280 }
12281
12282 /*
12283 * The first time through, the start address could be anywhere
12284 * within the vm_map_entry we found. So adjust the offset to
12285 * correspond.
12286 */
12287 if (entry->vme_start < start) {
12288 start_offset = start - entry->vme_start;
12289 } else {
12290 start_offset = 0;
12291 }
12292 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12293 start_offset += entry->offset;
12294 end_offset += entry->offset;
12295
12296 object = entry->object.vm_object;
12297 if (object == VM_OBJECT_NULL)
12298 continue;
12299
12300
12301 vm_object_lock(object);
12302 if (object->ref_count == 1 &&
12303 !object->shadow &&
12304 /*
12305 * "iokit_acct" entries are billed for their virtual size
12306 * (rather than for their resident pages only), so they
12307 * wouldn't benefit from making pages reusable, and it
12308 * would be hard to keep track of pages that are both
12309 * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12310 */
12311 !(entry->iokit_acct ||
12312 (!entry->is_sub_map && !entry->use_pmap)))
12313 kill_pages = 1;
12314 else
12315 kill_pages = -1;
12316 if (kill_pages != -1) {
12317 vm_object_deactivate_pages(object,
12318 start_offset,
12319 end_offset - start_offset,
12320 kill_pages,
12321 TRUE /*reusable_pages*/);
12322 } else {
12323 vm_page_stats_reusable.reusable_pages_shared++;
12324 }
12325 vm_object_unlock(object);
12326
12327 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
12328 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
12329 /*
12330 * XXX
12331 * We do not hold the VM map exclusively here.
12332 * The "alias" field is not that critical, so it's
12333 * safe to update it here, as long as it is the only
12334 * one that can be modified while holding the VM map
12335 * "shared".
12336 */
12337 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
12338 }
12339 }
12340
12341 vm_map_unlock_read(map);
12342 vm_page_stats_reusable.reusable_pages_success++;
12343 return KERN_SUCCESS;
12344 }
12345
12346
12347 static kern_return_t
12348 vm_map_can_reuse(
12349 vm_map_t map,
12350 vm_map_offset_t start,
12351 vm_map_offset_t end)
12352 {
12353 vm_map_entry_t entry;
12354
12355 /*
12356 * The MADV_REUSABLE operation doesn't require any changes to the
12357 * vm_map_entry_t's, so the read lock is sufficient.
12358 */
12359
12360 vm_map_lock_read(map);
12361
12362 /*
12363 * The madvise semantics require that the address range be fully
12364 * allocated with no holes. Otherwise, we're required to return
12365 * an error.
12366 */
12367
12368 if (!vm_map_range_check(map, start, end, &entry)) {
12369 vm_map_unlock_read(map);
12370 vm_page_stats_reusable.can_reuse_failure++;
12371 return KERN_INVALID_ADDRESS;
12372 }
12373
12374 /*
12375 * Examine each vm_map_entry_t in the range.
12376 */
12377 for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12378 entry = entry->vme_next) {
12379 /*
12380 * Sanity check on the VM map entry.
12381 */
12382 if (! vm_map_entry_is_reusable(entry)) {
12383 vm_map_unlock_read(map);
12384 vm_page_stats_reusable.can_reuse_failure++;
12385 return KERN_INVALID_ADDRESS;
12386 }
12387 }
12388
12389 vm_map_unlock_read(map);
12390 vm_page_stats_reusable.can_reuse_success++;
12391 return KERN_SUCCESS;
12392 }
12393
12394
12395 /*
12396 * Routine: vm_map_entry_insert
12397 *
12398 * Descritpion: This routine inserts a new vm_entry in a locked map.
12399 */
12400 vm_map_entry_t
12401 vm_map_entry_insert(
12402 vm_map_t map,
12403 vm_map_entry_t insp_entry,
12404 vm_map_offset_t start,
12405 vm_map_offset_t end,
12406 vm_object_t object,
12407 vm_object_offset_t offset,
12408 boolean_t needs_copy,
12409 boolean_t is_shared,
12410 boolean_t in_transition,
12411 vm_prot_t cur_protection,
12412 vm_prot_t max_protection,
12413 vm_behavior_t behavior,
12414 vm_inherit_t inheritance,
12415 unsigned wired_count,
12416 boolean_t no_cache,
12417 boolean_t permanent,
12418 unsigned int superpage_size,
12419 boolean_t clear_map_aligned,
12420 boolean_t is_submap)
12421 {
12422 vm_map_entry_t new_entry;
12423
12424 assert(insp_entry != (vm_map_entry_t)0);
12425
12426 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
12427
12428 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
12429 new_entry->map_aligned = TRUE;
12430 } else {
12431 new_entry->map_aligned = FALSE;
12432 }
12433 if (clear_map_aligned &&
12434 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
12435 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
12436 new_entry->map_aligned = FALSE;
12437 }
12438
12439 new_entry->vme_start = start;
12440 new_entry->vme_end = end;
12441 assert(page_aligned(new_entry->vme_start));
12442 assert(page_aligned(new_entry->vme_end));
12443 if (new_entry->map_aligned) {
12444 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
12445 VM_MAP_PAGE_MASK(map)));
12446 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
12447 VM_MAP_PAGE_MASK(map)));
12448 }
12449 assert(new_entry->vme_start < new_entry->vme_end);
12450
12451 new_entry->object.vm_object = object;
12452 new_entry->offset = offset;
12453 new_entry->is_shared = is_shared;
12454 new_entry->is_sub_map = is_submap;
12455 new_entry->needs_copy = needs_copy;
12456 new_entry->in_transition = in_transition;
12457 new_entry->needs_wakeup = FALSE;
12458 new_entry->inheritance = inheritance;
12459 new_entry->protection = cur_protection;
12460 new_entry->max_protection = max_protection;
12461 new_entry->behavior = behavior;
12462 new_entry->wired_count = wired_count;
12463 new_entry->user_wired_count = 0;
12464 if (is_submap) {
12465 /*
12466 * submap: "use_pmap" means "nested".
12467 * default: false.
12468 */
12469 new_entry->use_pmap = FALSE;
12470 } else {
12471 /*
12472 * object: "use_pmap" means "use pmap accounting" for footprint.
12473 * default: true.
12474 */
12475 new_entry->use_pmap = TRUE;
12476 }
12477 new_entry->alias = 0;
12478 new_entry->zero_wired_pages = FALSE;
12479 new_entry->no_cache = no_cache;
12480 new_entry->permanent = permanent;
12481 if (superpage_size)
12482 new_entry->superpage_size = TRUE;
12483 else
12484 new_entry->superpage_size = FALSE;
12485 new_entry->used_for_jit = FALSE;
12486 new_entry->iokit_acct = FALSE;
12487
12488 /*
12489 * Insert the new entry into the list.
12490 */
12491
12492 vm_map_store_entry_link(map, insp_entry, new_entry);
12493 map->size += end - start;
12494
12495 /*
12496 * Update the free space hint and the lookup hint.
12497 */
12498
12499 SAVE_HINT_MAP_WRITE(map, new_entry);
12500 return new_entry;
12501 }
12502
12503 /*
12504 * Routine: vm_map_remap_extract
12505 *
12506 * Descritpion: This routine returns a vm_entry list from a map.
12507 */
12508 static kern_return_t
12509 vm_map_remap_extract(
12510 vm_map_t map,
12511 vm_map_offset_t addr,
12512 vm_map_size_t size,
12513 boolean_t copy,
12514 struct vm_map_header *map_header,
12515 vm_prot_t *cur_protection,
12516 vm_prot_t *max_protection,
12517 /* What, no behavior? */
12518 vm_inherit_t inheritance,
12519 boolean_t pageable)
12520 {
12521 kern_return_t result;
12522 vm_map_size_t mapped_size;
12523 vm_map_size_t tmp_size;
12524 vm_map_entry_t src_entry; /* result of last map lookup */
12525 vm_map_entry_t new_entry;
12526 vm_object_offset_t offset;
12527 vm_map_offset_t map_address;
12528 vm_map_offset_t src_start; /* start of entry to map */
12529 vm_map_offset_t src_end; /* end of region to be mapped */
12530 vm_object_t object;
12531 vm_map_version_t version;
12532 boolean_t src_needs_copy;
12533 boolean_t new_entry_needs_copy;
12534
12535 assert(map != VM_MAP_NULL);
12536 assert(size != 0);
12537 assert(size == vm_map_round_page(size, PAGE_MASK));
12538 assert(inheritance == VM_INHERIT_NONE ||
12539 inheritance == VM_INHERIT_COPY ||
12540 inheritance == VM_INHERIT_SHARE);
12541
12542 /*
12543 * Compute start and end of region.
12544 */
12545 src_start = vm_map_trunc_page(addr, PAGE_MASK);
12546 src_end = vm_map_round_page(src_start + size, PAGE_MASK);
12547
12548
12549 /*
12550 * Initialize map_header.
12551 */
12552 map_header->links.next = (struct vm_map_entry *)&map_header->links;
12553 map_header->links.prev = (struct vm_map_entry *)&map_header->links;
12554 map_header->nentries = 0;
12555 map_header->entries_pageable = pageable;
12556 map_header->page_shift = PAGE_SHIFT;
12557
12558 vm_map_store_init( map_header );
12559
12560 *cur_protection = VM_PROT_ALL;
12561 *max_protection = VM_PROT_ALL;
12562
12563 map_address = 0;
12564 mapped_size = 0;
12565 result = KERN_SUCCESS;
12566
12567 /*
12568 * The specified source virtual space might correspond to
12569 * multiple map entries, need to loop on them.
12570 */
12571 vm_map_lock(map);
12572 while (mapped_size != size) {
12573 vm_map_size_t entry_size;
12574
12575 /*
12576 * Find the beginning of the region.
12577 */
12578 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
12579 result = KERN_INVALID_ADDRESS;
12580 break;
12581 }
12582
12583 if (src_start < src_entry->vme_start ||
12584 (mapped_size && src_start != src_entry->vme_start)) {
12585 result = KERN_INVALID_ADDRESS;
12586 break;
12587 }
12588
12589 tmp_size = size - mapped_size;
12590 if (src_end > src_entry->vme_end)
12591 tmp_size -= (src_end - src_entry->vme_end);
12592
12593 entry_size = (vm_map_size_t)(src_entry->vme_end -
12594 src_entry->vme_start);
12595
12596 if(src_entry->is_sub_map) {
12597 vm_map_reference(src_entry->object.sub_map);
12598 object = VM_OBJECT_NULL;
12599 } else {
12600 object = src_entry->object.vm_object;
12601 if (src_entry->iokit_acct) {
12602 /*
12603 * This entry uses "IOKit accounting".
12604 */
12605 } else if (object != VM_OBJECT_NULL &&
12606 object->purgable != VM_PURGABLE_DENY) {
12607 /*
12608 * Purgeable objects have their own accounting:
12609 * no pmap accounting for them.
12610 */
12611 assert(!src_entry->use_pmap);
12612 } else {
12613 /*
12614 * Not IOKit or purgeable:
12615 * must be accounted by pmap stats.
12616 */
12617 assert(src_entry->use_pmap);
12618 }
12619
12620 if (object == VM_OBJECT_NULL) {
12621 object = vm_object_allocate(entry_size);
12622 src_entry->offset = 0;
12623 src_entry->object.vm_object = object;
12624 } else if (object->copy_strategy !=
12625 MEMORY_OBJECT_COPY_SYMMETRIC) {
12626 /*
12627 * We are already using an asymmetric
12628 * copy, and therefore we already have
12629 * the right object.
12630 */
12631 assert(!src_entry->needs_copy);
12632 } else if (src_entry->needs_copy || object->shadowed ||
12633 (object->internal && !object->true_share &&
12634 !src_entry->is_shared &&
12635 object->vo_size > entry_size)) {
12636
12637 vm_object_shadow(&src_entry->object.vm_object,
12638 &src_entry->offset,
12639 entry_size);
12640
12641 if (!src_entry->needs_copy &&
12642 (src_entry->protection & VM_PROT_WRITE)) {
12643 vm_prot_t prot;
12644
12645 prot = src_entry->protection & ~VM_PROT_WRITE;
12646
12647 if (override_nx(map, src_entry->alias) && prot)
12648 prot |= VM_PROT_EXECUTE;
12649
12650 if(map->mapped_in_other_pmaps) {
12651 vm_object_pmap_protect(
12652 src_entry->object.vm_object,
12653 src_entry->offset,
12654 entry_size,
12655 PMAP_NULL,
12656 src_entry->vme_start,
12657 prot);
12658 } else {
12659 pmap_protect(vm_map_pmap(map),
12660 src_entry->vme_start,
12661 src_entry->vme_end,
12662 prot);
12663 }
12664 }
12665
12666 object = src_entry->object.vm_object;
12667 src_entry->needs_copy = FALSE;
12668 }
12669
12670
12671 vm_object_lock(object);
12672 vm_object_reference_locked(object); /* object ref. for new entry */
12673 if (object->copy_strategy ==
12674 MEMORY_OBJECT_COPY_SYMMETRIC) {
12675 object->copy_strategy =
12676 MEMORY_OBJECT_COPY_DELAY;
12677 }
12678 vm_object_unlock(object);
12679 }
12680
12681 offset = src_entry->offset + (src_start - src_entry->vme_start);
12682
12683 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
12684 vm_map_entry_copy(new_entry, src_entry);
12685 if (new_entry->is_sub_map) {
12686 /* clr address space specifics */
12687 new_entry->use_pmap = FALSE;
12688 }
12689
12690 new_entry->map_aligned = FALSE;
12691
12692 new_entry->vme_start = map_address;
12693 new_entry->vme_end = map_address + tmp_size;
12694 assert(new_entry->vme_start < new_entry->vme_end);
12695 new_entry->inheritance = inheritance;
12696 new_entry->offset = offset;
12697
12698 /*
12699 * The new region has to be copied now if required.
12700 */
12701 RestartCopy:
12702 if (!copy) {
12703 /*
12704 * Cannot allow an entry describing a JIT
12705 * region to be shared across address spaces.
12706 */
12707 if (src_entry->used_for_jit == TRUE) {
12708 result = KERN_INVALID_ARGUMENT;
12709 break;
12710 }
12711 src_entry->is_shared = TRUE;
12712 new_entry->is_shared = TRUE;
12713 if (!(new_entry->is_sub_map))
12714 new_entry->needs_copy = FALSE;
12715
12716 } else if (src_entry->is_sub_map) {
12717 /* make this a COW sub_map if not already */
12718 new_entry->needs_copy = TRUE;
12719 object = VM_OBJECT_NULL;
12720 } else if (src_entry->wired_count == 0 &&
12721 vm_object_copy_quickly(&new_entry->object.vm_object,
12722 new_entry->offset,
12723 (new_entry->vme_end -
12724 new_entry->vme_start),
12725 &src_needs_copy,
12726 &new_entry_needs_copy)) {
12727
12728 new_entry->needs_copy = new_entry_needs_copy;
12729 new_entry->is_shared = FALSE;
12730
12731 /*
12732 * Handle copy_on_write semantics.
12733 */
12734 if (src_needs_copy && !src_entry->needs_copy) {
12735 vm_prot_t prot;
12736
12737 prot = src_entry->protection & ~VM_PROT_WRITE;
12738
12739 if (override_nx(map, src_entry->alias) && prot)
12740 prot |= VM_PROT_EXECUTE;
12741
12742 vm_object_pmap_protect(object,
12743 offset,
12744 entry_size,
12745 ((src_entry->is_shared
12746 || map->mapped_in_other_pmaps) ?
12747 PMAP_NULL : map->pmap),
12748 src_entry->vme_start,
12749 prot);
12750
12751 src_entry->needs_copy = TRUE;
12752 }
12753 /*
12754 * Throw away the old object reference of the new entry.
12755 */
12756 vm_object_deallocate(object);
12757
12758 } else {
12759 new_entry->is_shared = FALSE;
12760
12761 /*
12762 * The map can be safely unlocked since we
12763 * already hold a reference on the object.
12764 *
12765 * Record the timestamp of the map for later
12766 * verification, and unlock the map.
12767 */
12768 version.main_timestamp = map->timestamp;
12769 vm_map_unlock(map); /* Increments timestamp once! */
12770
12771 /*
12772 * Perform the copy.
12773 */
12774 if (src_entry->wired_count > 0) {
12775 vm_object_lock(object);
12776 result = vm_object_copy_slowly(
12777 object,
12778 offset,
12779 entry_size,
12780 THREAD_UNINT,
12781 &new_entry->object.vm_object);
12782
12783 new_entry->offset = 0;
12784 new_entry->needs_copy = FALSE;
12785 } else {
12786 result = vm_object_copy_strategically(
12787 object,
12788 offset,
12789 entry_size,
12790 &new_entry->object.vm_object,
12791 &new_entry->offset,
12792 &new_entry_needs_copy);
12793
12794 new_entry->needs_copy = new_entry_needs_copy;
12795 }
12796
12797 /*
12798 * Throw away the old object reference of the new entry.
12799 */
12800 vm_object_deallocate(object);
12801
12802 if (result != KERN_SUCCESS &&
12803 result != KERN_MEMORY_RESTART_COPY) {
12804 _vm_map_entry_dispose(map_header, new_entry);
12805 break;
12806 }
12807
12808 /*
12809 * Verify that the map has not substantially
12810 * changed while the copy was being made.
12811 */
12812
12813 vm_map_lock(map);
12814 if (version.main_timestamp + 1 != map->timestamp) {
12815 /*
12816 * Simple version comparison failed.
12817 *
12818 * Retry the lookup and verify that the
12819 * same object/offset are still present.
12820 */
12821 vm_object_deallocate(new_entry->
12822 object.vm_object);
12823 _vm_map_entry_dispose(map_header, new_entry);
12824 if (result == KERN_MEMORY_RESTART_COPY)
12825 result = KERN_SUCCESS;
12826 continue;
12827 }
12828
12829 if (result == KERN_MEMORY_RESTART_COPY) {
12830 vm_object_reference(object);
12831 goto RestartCopy;
12832 }
12833 }
12834
12835 _vm_map_store_entry_link(map_header,
12836 map_header->links.prev, new_entry);
12837
12838 /*Protections for submap mapping are irrelevant here*/
12839 if( !src_entry->is_sub_map ) {
12840 *cur_protection &= src_entry->protection;
12841 *max_protection &= src_entry->max_protection;
12842 }
12843 map_address += tmp_size;
12844 mapped_size += tmp_size;
12845 src_start += tmp_size;
12846
12847 } /* end while */
12848
12849 vm_map_unlock(map);
12850 if (result != KERN_SUCCESS) {
12851 /*
12852 * Free all allocated elements.
12853 */
12854 for (src_entry = map_header->links.next;
12855 src_entry != (struct vm_map_entry *)&map_header->links;
12856 src_entry = new_entry) {
12857 new_entry = src_entry->vme_next;
12858 _vm_map_store_entry_unlink(map_header, src_entry);
12859 if (src_entry->is_sub_map) {
12860 vm_map_deallocate(src_entry->object.sub_map);
12861 } else {
12862 vm_object_deallocate(src_entry->object.vm_object);
12863 }
12864 _vm_map_entry_dispose(map_header, src_entry);
12865 }
12866 }
12867 return result;
12868 }
12869
12870 /*
12871 * Routine: vm_remap
12872 *
12873 * Map portion of a task's address space.
12874 * Mapped region must not overlap more than
12875 * one vm memory object. Protections and
12876 * inheritance attributes remain the same
12877 * as in the original task and are out parameters.
12878 * Source and Target task can be identical
12879 * Other attributes are identical as for vm_map()
12880 */
12881 kern_return_t
12882 vm_map_remap(
12883 vm_map_t target_map,
12884 vm_map_address_t *address,
12885 vm_map_size_t size,
12886 vm_map_offset_t mask,
12887 int flags,
12888 vm_map_t src_map,
12889 vm_map_offset_t memory_address,
12890 boolean_t copy,
12891 vm_prot_t *cur_protection,
12892 vm_prot_t *max_protection,
12893 vm_inherit_t inheritance)
12894 {
12895 kern_return_t result;
12896 vm_map_entry_t entry;
12897 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL;
12898 vm_map_entry_t new_entry;
12899 struct vm_map_header map_header;
12900 vm_map_offset_t offset_in_mapping;
12901
12902 if (target_map == VM_MAP_NULL)
12903 return KERN_INVALID_ARGUMENT;
12904
12905 switch (inheritance) {
12906 case VM_INHERIT_NONE:
12907 case VM_INHERIT_COPY:
12908 case VM_INHERIT_SHARE:
12909 if (size != 0 && src_map != VM_MAP_NULL)
12910 break;
12911 /*FALL THRU*/
12912 default:
12913 return KERN_INVALID_ARGUMENT;
12914 }
12915
12916 /*
12917 * If the user is requesting that we return the address of the
12918 * first byte of the data (rather than the base of the page),
12919 * then we use different rounding semantics: specifically,
12920 * we assume that (memory_address, size) describes a region
12921 * all of whose pages we must cover, rather than a base to be truncated
12922 * down and a size to be added to that base. So we figure out
12923 * the highest page that the requested region includes and make
12924 * sure that the size will cover it.
12925 *
12926 * The key example we're worried about it is of the form:
12927 *
12928 * memory_address = 0x1ff0, size = 0x20
12929 *
12930 * With the old semantics, we round down the memory_address to 0x1000
12931 * and round up the size to 0x1000, resulting in our covering *only*
12932 * page 0x1000. With the new semantics, we'd realize that the region covers
12933 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page
12934 * 0x1000 and page 0x2000 in the region we remap.
12935 */
12936 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12937 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12938 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12939 } else {
12940 size = vm_map_round_page(size, PAGE_MASK);
12941 }
12942
12943 result = vm_map_remap_extract(src_map, memory_address,
12944 size, copy, &map_header,
12945 cur_protection,
12946 max_protection,
12947 inheritance,
12948 target_map->hdr.entries_pageable);
12949
12950 if (result != KERN_SUCCESS) {
12951 return result;
12952 }
12953
12954 /*
12955 * Allocate/check a range of free virtual address
12956 * space for the target
12957 */
12958 *address = vm_map_trunc_page(*address,
12959 VM_MAP_PAGE_MASK(target_map));
12960 vm_map_lock(target_map);
12961 result = vm_map_remap_range_allocate(target_map, address, size,
12962 mask, flags, &insp_entry);
12963
12964 for (entry = map_header.links.next;
12965 entry != (struct vm_map_entry *)&map_header.links;
12966 entry = new_entry) {
12967 new_entry = entry->vme_next;
12968 _vm_map_store_entry_unlink(&map_header, entry);
12969 if (result == KERN_SUCCESS) {
12970 entry->vme_start += *address;
12971 entry->vme_end += *address;
12972 assert(!entry->map_aligned);
12973 vm_map_store_entry_link(target_map, insp_entry, entry);
12974 insp_entry = entry;
12975 } else {
12976 if (!entry->is_sub_map) {
12977 vm_object_deallocate(entry->object.vm_object);
12978 } else {
12979 vm_map_deallocate(entry->object.sub_map);
12980 }
12981 _vm_map_entry_dispose(&map_header, entry);
12982 }
12983 }
12984
12985 if( target_map->disable_vmentry_reuse == TRUE) {
12986 if( target_map->highest_entry_end < insp_entry->vme_end ){
12987 target_map->highest_entry_end = insp_entry->vme_end;
12988 }
12989 }
12990
12991 if (result == KERN_SUCCESS) {
12992 target_map->size += size;
12993 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
12994 }
12995 vm_map_unlock(target_map);
12996
12997 if (result == KERN_SUCCESS && target_map->wiring_required)
12998 result = vm_map_wire(target_map, *address,
12999 *address + size, *cur_protection, TRUE);
13000
13001 /*
13002 * If requested, return the address of the data pointed to by the
13003 * request, rather than the base of the resulting page.
13004 */
13005 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13006 *address += offset_in_mapping;
13007 }
13008
13009 return result;
13010 }
13011
13012 /*
13013 * Routine: vm_map_remap_range_allocate
13014 *
13015 * Description:
13016 * Allocate a range in the specified virtual address map.
13017 * returns the address and the map entry just before the allocated
13018 * range
13019 *
13020 * Map must be locked.
13021 */
13022
13023 static kern_return_t
13024 vm_map_remap_range_allocate(
13025 vm_map_t map,
13026 vm_map_address_t *address, /* IN/OUT */
13027 vm_map_size_t size,
13028 vm_map_offset_t mask,
13029 int flags,
13030 vm_map_entry_t *map_entry) /* OUT */
13031 {
13032 vm_map_entry_t entry;
13033 vm_map_offset_t start;
13034 vm_map_offset_t end;
13035 kern_return_t kr;
13036
13037 StartAgain: ;
13038
13039 start = *address;
13040
13041 if (flags & VM_FLAGS_ANYWHERE)
13042 {
13043 /*
13044 * Calculate the first possible address.
13045 */
13046
13047 if (start < map->min_offset)
13048 start = map->min_offset;
13049 if (start > map->max_offset)
13050 return(KERN_NO_SPACE);
13051
13052 /*
13053 * Look for the first possible address;
13054 * if there's already something at this
13055 * address, we have to start after it.
13056 */
13057
13058 if( map->disable_vmentry_reuse == TRUE) {
13059 VM_MAP_HIGHEST_ENTRY(map, entry, start);
13060 } else {
13061 assert(first_free_is_valid(map));
13062 if (start == map->min_offset) {
13063 if ((entry = map->first_free) != vm_map_to_entry(map))
13064 start = entry->vme_end;
13065 } else {
13066 vm_map_entry_t tmp_entry;
13067 if (vm_map_lookup_entry(map, start, &tmp_entry))
13068 start = tmp_entry->vme_end;
13069 entry = tmp_entry;
13070 }
13071 start = vm_map_round_page(start,
13072 VM_MAP_PAGE_MASK(map));
13073 }
13074
13075 /*
13076 * In any case, the "entry" always precedes
13077 * the proposed new region throughout the
13078 * loop:
13079 */
13080
13081 while (TRUE) {
13082 register vm_map_entry_t next;
13083
13084 /*
13085 * Find the end of the proposed new region.
13086 * Be sure we didn't go beyond the end, or
13087 * wrap around the address.
13088 */
13089
13090 end = ((start + mask) & ~mask);
13091 end = vm_map_round_page(end,
13092 VM_MAP_PAGE_MASK(map));
13093 if (end < start)
13094 return(KERN_NO_SPACE);
13095 start = end;
13096 end += size;
13097
13098 if ((end > map->max_offset) || (end < start)) {
13099 if (map->wait_for_space) {
13100 if (size <= (map->max_offset -
13101 map->min_offset)) {
13102 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13103 vm_map_unlock(map);
13104 thread_block(THREAD_CONTINUE_NULL);
13105 vm_map_lock(map);
13106 goto StartAgain;
13107 }
13108 }
13109
13110 return(KERN_NO_SPACE);
13111 }
13112
13113 /*
13114 * If there are no more entries, we must win.
13115 */
13116
13117 next = entry->vme_next;
13118 if (next == vm_map_to_entry(map))
13119 break;
13120
13121 /*
13122 * If there is another entry, it must be
13123 * after the end of the potential new region.
13124 */
13125
13126 if (next->vme_start >= end)
13127 break;
13128
13129 /*
13130 * Didn't fit -- move to the next entry.
13131 */
13132
13133 entry = next;
13134 start = entry->vme_end;
13135 }
13136 *address = start;
13137 } else {
13138 vm_map_entry_t temp_entry;
13139
13140 /*
13141 * Verify that:
13142 * the address doesn't itself violate
13143 * the mask requirement.
13144 */
13145
13146 if ((start & mask) != 0)
13147 return(KERN_NO_SPACE);
13148
13149
13150 /*
13151 * ... the address is within bounds
13152 */
13153
13154 end = start + size;
13155
13156 if ((start < map->min_offset) ||
13157 (end > map->max_offset) ||
13158 (start >= end)) {
13159 return(KERN_INVALID_ADDRESS);
13160 }
13161
13162 /*
13163 * If we're asked to overwrite whatever was mapped in that
13164 * range, first deallocate that range.
13165 */
13166 if (flags & VM_FLAGS_OVERWRITE) {
13167 vm_map_t zap_map;
13168
13169 /*
13170 * We use a "zap_map" to avoid having to unlock
13171 * the "map" in vm_map_delete(), which would compromise
13172 * the atomicity of the "deallocate" and then "remap"
13173 * combination.
13174 */
13175 zap_map = vm_map_create(PMAP_NULL,
13176 start,
13177 end,
13178 map->hdr.entries_pageable);
13179 if (zap_map == VM_MAP_NULL) {
13180 return KERN_RESOURCE_SHORTAGE;
13181 }
13182 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
13183
13184 kr = vm_map_delete(map, start, end,
13185 (VM_MAP_REMOVE_SAVE_ENTRIES |
13186 VM_MAP_REMOVE_NO_MAP_ALIGN),
13187 zap_map);
13188 if (kr == KERN_SUCCESS) {
13189 vm_map_destroy(zap_map,
13190 VM_MAP_REMOVE_NO_PMAP_CLEANUP);
13191 zap_map = VM_MAP_NULL;
13192 }
13193 }
13194
13195 /*
13196 * ... the starting address isn't allocated
13197 */
13198
13199 if (vm_map_lookup_entry(map, start, &temp_entry))
13200 return(KERN_NO_SPACE);
13201
13202 entry = temp_entry;
13203
13204 /*
13205 * ... the next region doesn't overlap the
13206 * end point.
13207 */
13208
13209 if ((entry->vme_next != vm_map_to_entry(map)) &&
13210 (entry->vme_next->vme_start < end))
13211 return(KERN_NO_SPACE);
13212 }
13213 *map_entry = entry;
13214 return(KERN_SUCCESS);
13215 }
13216
13217 /*
13218 * vm_map_switch:
13219 *
13220 * Set the address map for the current thread to the specified map
13221 */
13222
13223 vm_map_t
13224 vm_map_switch(
13225 vm_map_t map)
13226 {
13227 int mycpu;
13228 thread_t thread = current_thread();
13229 vm_map_t oldmap = thread->map;
13230
13231 mp_disable_preemption();
13232 mycpu = cpu_number();
13233
13234 /*
13235 * Deactivate the current map and activate the requested map
13236 */
13237 PMAP_SWITCH_USER(thread, map, mycpu);
13238
13239 mp_enable_preemption();
13240 return(oldmap);
13241 }
13242
13243
13244 /*
13245 * Routine: vm_map_write_user
13246 *
13247 * Description:
13248 * Copy out data from a kernel space into space in the
13249 * destination map. The space must already exist in the
13250 * destination map.
13251 * NOTE: This routine should only be called by threads
13252 * which can block on a page fault. i.e. kernel mode user
13253 * threads.
13254 *
13255 */
13256 kern_return_t
13257 vm_map_write_user(
13258 vm_map_t map,
13259 void *src_p,
13260 vm_map_address_t dst_addr,
13261 vm_size_t size)
13262 {
13263 kern_return_t kr = KERN_SUCCESS;
13264
13265 if(current_map() == map) {
13266 if (copyout(src_p, dst_addr, size)) {
13267 kr = KERN_INVALID_ADDRESS;
13268 }
13269 } else {
13270 vm_map_t oldmap;
13271
13272 /* take on the identity of the target map while doing */
13273 /* the transfer */
13274
13275 vm_map_reference(map);
13276 oldmap = vm_map_switch(map);
13277 if (copyout(src_p, dst_addr, size)) {
13278 kr = KERN_INVALID_ADDRESS;
13279 }
13280 vm_map_switch(oldmap);
13281 vm_map_deallocate(map);
13282 }
13283 return kr;
13284 }
13285
13286 /*
13287 * Routine: vm_map_read_user
13288 *
13289 * Description:
13290 * Copy in data from a user space source map into the
13291 * kernel map. The space must already exist in the
13292 * kernel map.
13293 * NOTE: This routine should only be called by threads
13294 * which can block on a page fault. i.e. kernel mode user
13295 * threads.
13296 *
13297 */
13298 kern_return_t
13299 vm_map_read_user(
13300 vm_map_t map,
13301 vm_map_address_t src_addr,
13302 void *dst_p,
13303 vm_size_t size)
13304 {
13305 kern_return_t kr = KERN_SUCCESS;
13306
13307 if(current_map() == map) {
13308 if (copyin(src_addr, dst_p, size)) {
13309 kr = KERN_INVALID_ADDRESS;
13310 }
13311 } else {
13312 vm_map_t oldmap;
13313
13314 /* take on the identity of the target map while doing */
13315 /* the transfer */
13316
13317 vm_map_reference(map);
13318 oldmap = vm_map_switch(map);
13319 if (copyin(src_addr, dst_p, size)) {
13320 kr = KERN_INVALID_ADDRESS;
13321 }
13322 vm_map_switch(oldmap);
13323 vm_map_deallocate(map);
13324 }
13325 return kr;
13326 }
13327
13328
13329 /*
13330 * vm_map_check_protection:
13331 *
13332 * Assert that the target map allows the specified
13333 * privilege on the entire address region given.
13334 * The entire region must be allocated.
13335 */
13336 boolean_t
13337 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
13338 vm_map_offset_t end, vm_prot_t protection)
13339 {
13340 vm_map_entry_t entry;
13341 vm_map_entry_t tmp_entry;
13342
13343 vm_map_lock(map);
13344
13345 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
13346 {
13347 vm_map_unlock(map);
13348 return (FALSE);
13349 }
13350
13351 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13352 vm_map_unlock(map);
13353 return(FALSE);
13354 }
13355
13356 entry = tmp_entry;
13357
13358 while (start < end) {
13359 if (entry == vm_map_to_entry(map)) {
13360 vm_map_unlock(map);
13361 return(FALSE);
13362 }
13363
13364 /*
13365 * No holes allowed!
13366 */
13367
13368 if (start < entry->vme_start) {
13369 vm_map_unlock(map);
13370 return(FALSE);
13371 }
13372
13373 /*
13374 * Check protection associated with entry.
13375 */
13376
13377 if ((entry->protection & protection) != protection) {
13378 vm_map_unlock(map);
13379 return(FALSE);
13380 }
13381
13382 /* go to next entry */
13383
13384 start = entry->vme_end;
13385 entry = entry->vme_next;
13386 }
13387 vm_map_unlock(map);
13388 return(TRUE);
13389 }
13390
13391 kern_return_t
13392 vm_map_purgable_control(
13393 vm_map_t map,
13394 vm_map_offset_t address,
13395 vm_purgable_t control,
13396 int *state)
13397 {
13398 vm_map_entry_t entry;
13399 vm_object_t object;
13400 kern_return_t kr;
13401 boolean_t was_nonvolatile;
13402
13403 /*
13404 * Vet all the input parameters and current type and state of the
13405 * underlaying object. Return with an error if anything is amiss.
13406 */
13407 if (map == VM_MAP_NULL)
13408 return(KERN_INVALID_ARGUMENT);
13409
13410 if (control != VM_PURGABLE_SET_STATE &&
13411 control != VM_PURGABLE_GET_STATE &&
13412 control != VM_PURGABLE_PURGE_ALL)
13413 return(KERN_INVALID_ARGUMENT);
13414
13415 if (control == VM_PURGABLE_PURGE_ALL) {
13416 vm_purgeable_object_purge_all();
13417 return KERN_SUCCESS;
13418 }
13419
13420 if (control == VM_PURGABLE_SET_STATE &&
13421 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
13422 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
13423 return(KERN_INVALID_ARGUMENT);
13424
13425 vm_map_lock_read(map);
13426
13427 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
13428
13429 /*
13430 * Must pass a valid non-submap address.
13431 */
13432 vm_map_unlock_read(map);
13433 return(KERN_INVALID_ADDRESS);
13434 }
13435
13436 if ((entry->protection & VM_PROT_WRITE) == 0) {
13437 /*
13438 * Can't apply purgable controls to something you can't write.
13439 */
13440 vm_map_unlock_read(map);
13441 return(KERN_PROTECTION_FAILURE);
13442 }
13443
13444 object = entry->object.vm_object;
13445 if (object == VM_OBJECT_NULL ||
13446 object->purgable == VM_PURGABLE_DENY) {
13447 /*
13448 * Object must already be present and be purgeable.
13449 */
13450 vm_map_unlock_read(map);
13451 return KERN_INVALID_ARGUMENT;
13452 }
13453
13454 vm_object_lock(object);
13455
13456 #if 00
13457 if (entry->offset != 0 ||
13458 entry->vme_end - entry->vme_start != object->vo_size) {
13459 /*
13460 * Can only apply purgable controls to the whole (existing)
13461 * object at once.
13462 */
13463 vm_map_unlock_read(map);
13464 vm_object_unlock(object);
13465 return KERN_INVALID_ARGUMENT;
13466 }
13467 #endif
13468
13469 assert(!entry->is_sub_map);
13470 assert(!entry->use_pmap); /* purgeable has its own accounting */
13471
13472 vm_map_unlock_read(map);
13473
13474 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
13475
13476 kr = vm_object_purgable_control(object, control, state);
13477
13478 if (was_nonvolatile &&
13479 object->purgable != VM_PURGABLE_NONVOLATILE &&
13480 map->pmap == kernel_pmap) {
13481 #if DEBUG
13482 object->vo_purgeable_volatilizer = kernel_task;
13483 #endif /* DEBUG */
13484 }
13485
13486 vm_object_unlock(object);
13487
13488 return kr;
13489 }
13490
13491 kern_return_t
13492 vm_map_page_query_internal(
13493 vm_map_t target_map,
13494 vm_map_offset_t offset,
13495 int *disposition,
13496 int *ref_count)
13497 {
13498 kern_return_t kr;
13499 vm_page_info_basic_data_t info;
13500 mach_msg_type_number_t count;
13501
13502 count = VM_PAGE_INFO_BASIC_COUNT;
13503 kr = vm_map_page_info(target_map,
13504 offset,
13505 VM_PAGE_INFO_BASIC,
13506 (vm_page_info_t) &info,
13507 &count);
13508 if (kr == KERN_SUCCESS) {
13509 *disposition = info.disposition;
13510 *ref_count = info.ref_count;
13511 } else {
13512 *disposition = 0;
13513 *ref_count = 0;
13514 }
13515
13516 return kr;
13517 }
13518
13519 kern_return_t
13520 vm_map_page_info(
13521 vm_map_t map,
13522 vm_map_offset_t offset,
13523 vm_page_info_flavor_t flavor,
13524 vm_page_info_t info,
13525 mach_msg_type_number_t *count)
13526 {
13527 vm_map_entry_t map_entry;
13528 vm_object_t object;
13529 vm_page_t m;
13530 kern_return_t kr;
13531 kern_return_t retval = KERN_SUCCESS;
13532 boolean_t top_object;
13533 int disposition;
13534 int ref_count;
13535 vm_page_info_basic_t basic_info;
13536 int depth;
13537 vm_map_offset_t offset_in_page;
13538
13539 switch (flavor) {
13540 case VM_PAGE_INFO_BASIC:
13541 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
13542 /*
13543 * The "vm_page_info_basic_data" structure was not
13544 * properly padded, so allow the size to be off by
13545 * one to maintain backwards binary compatibility...
13546 */
13547 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
13548 return KERN_INVALID_ARGUMENT;
13549 }
13550 break;
13551 default:
13552 return KERN_INVALID_ARGUMENT;
13553 }
13554
13555 disposition = 0;
13556 ref_count = 0;
13557 top_object = TRUE;
13558 depth = 0;
13559
13560 retval = KERN_SUCCESS;
13561 offset_in_page = offset & PAGE_MASK;
13562 offset = vm_map_trunc_page(offset, PAGE_MASK);
13563
13564 vm_map_lock_read(map);
13565
13566 /*
13567 * First, find the map entry covering "offset", going down
13568 * submaps if necessary.
13569 */
13570 for (;;) {
13571 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
13572 vm_map_unlock_read(map);
13573 return KERN_INVALID_ADDRESS;
13574 }
13575 /* compute offset from this map entry's start */
13576 offset -= map_entry->vme_start;
13577 /* compute offset into this map entry's object (or submap) */
13578 offset += map_entry->offset;
13579
13580 if (map_entry->is_sub_map) {
13581 vm_map_t sub_map;
13582
13583 sub_map = map_entry->object.sub_map;
13584 vm_map_lock_read(sub_map);
13585 vm_map_unlock_read(map);
13586
13587 map = sub_map;
13588
13589 ref_count = MAX(ref_count, map->ref_count);
13590 continue;
13591 }
13592 break;
13593 }
13594
13595 object = map_entry->object.vm_object;
13596 if (object == VM_OBJECT_NULL) {
13597 /* no object -> no page */
13598 vm_map_unlock_read(map);
13599 goto done;
13600 }
13601
13602 vm_object_lock(object);
13603 vm_map_unlock_read(map);
13604
13605 /*
13606 * Go down the VM object shadow chain until we find the page
13607 * we're looking for.
13608 */
13609 for (;;) {
13610 ref_count = MAX(ref_count, object->ref_count);
13611
13612 m = vm_page_lookup(object, offset);
13613
13614 if (m != VM_PAGE_NULL) {
13615 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
13616 break;
13617 } else {
13618 #if MACH_PAGEMAP
13619 if (object->existence_map) {
13620 if (vm_external_state_get(object->existence_map,
13621 offset) ==
13622 VM_EXTERNAL_STATE_EXISTS) {
13623 /*
13624 * this page has been paged out
13625 */
13626 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13627 break;
13628 }
13629 } else
13630 #endif
13631 if (object->internal &&
13632 object->alive &&
13633 !object->terminating &&
13634 object->pager_ready) {
13635
13636 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13637 if (VM_COMPRESSOR_PAGER_STATE_GET(
13638 object,
13639 offset)
13640 == VM_EXTERNAL_STATE_EXISTS) {
13641 /* the pager has that page */
13642 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13643 break;
13644 }
13645 } else {
13646 memory_object_t pager;
13647
13648 vm_object_paging_begin(object);
13649 pager = object->pager;
13650 vm_object_unlock(object);
13651
13652 /*
13653 * Ask the default pager if
13654 * it has this page.
13655 */
13656 kr = memory_object_data_request(
13657 pager,
13658 offset + object->paging_offset,
13659 0, /* just poke the pager */
13660 VM_PROT_READ,
13661 NULL);
13662
13663 vm_object_lock(object);
13664 vm_object_paging_end(object);
13665
13666 if (kr == KERN_SUCCESS) {
13667 /* the default pager has it */
13668 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13669 break;
13670 }
13671 }
13672 }
13673
13674 if (object->shadow != VM_OBJECT_NULL) {
13675 vm_object_t shadow;
13676
13677 offset += object->vo_shadow_offset;
13678 shadow = object->shadow;
13679
13680 vm_object_lock(shadow);
13681 vm_object_unlock(object);
13682
13683 object = shadow;
13684 top_object = FALSE;
13685 depth++;
13686 } else {
13687 // if (!object->internal)
13688 // break;
13689 // retval = KERN_FAILURE;
13690 // goto done_with_object;
13691 break;
13692 }
13693 }
13694 }
13695 /* The ref_count is not strictly accurate, it measures the number */
13696 /* of entities holding a ref on the object, they may not be mapping */
13697 /* the object or may not be mapping the section holding the */
13698 /* target page but its still a ball park number and though an over- */
13699 /* count, it picks up the copy-on-write cases */
13700
13701 /* We could also get a picture of page sharing from pmap_attributes */
13702 /* but this would under count as only faulted-in mappings would */
13703 /* show up. */
13704
13705 if (top_object == TRUE && object->shadow)
13706 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
13707
13708 if (! object->internal)
13709 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
13710
13711 if (m == VM_PAGE_NULL)
13712 goto done_with_object;
13713
13714 if (m->fictitious) {
13715 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
13716 goto done_with_object;
13717 }
13718 if (m->dirty || pmap_is_modified(m->phys_page))
13719 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
13720
13721 if (m->reference || pmap_is_referenced(m->phys_page))
13722 disposition |= VM_PAGE_QUERY_PAGE_REF;
13723
13724 if (m->speculative)
13725 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
13726
13727 if (m->cs_validated)
13728 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
13729 if (m->cs_tainted)
13730 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
13731
13732 done_with_object:
13733 vm_object_unlock(object);
13734 done:
13735
13736 switch (flavor) {
13737 case VM_PAGE_INFO_BASIC:
13738 basic_info = (vm_page_info_basic_t) info;
13739 basic_info->disposition = disposition;
13740 basic_info->ref_count = ref_count;
13741 basic_info->object_id = (vm_object_id_t) (uintptr_t)
13742 VM_KERNEL_ADDRPERM(object);
13743 basic_info->offset =
13744 (memory_object_offset_t) offset + offset_in_page;
13745 basic_info->depth = depth;
13746 break;
13747 }
13748
13749 return retval;
13750 }
13751
13752 /*
13753 * vm_map_msync
13754 *
13755 * Synchronises the memory range specified with its backing store
13756 * image by either flushing or cleaning the contents to the appropriate
13757 * memory manager engaging in a memory object synchronize dialog with
13758 * the manager. The client doesn't return until the manager issues
13759 * m_o_s_completed message. MIG Magically converts user task parameter
13760 * to the task's address map.
13761 *
13762 * interpretation of sync_flags
13763 * VM_SYNC_INVALIDATE - discard pages, only return precious
13764 * pages to manager.
13765 *
13766 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13767 * - discard pages, write dirty or precious
13768 * pages back to memory manager.
13769 *
13770 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13771 * - write dirty or precious pages back to
13772 * the memory manager.
13773 *
13774 * VM_SYNC_CONTIGUOUS - does everything normally, but if there
13775 * is a hole in the region, and we would
13776 * have returned KERN_SUCCESS, return
13777 * KERN_INVALID_ADDRESS instead.
13778 *
13779 * NOTE
13780 * The memory object attributes have not yet been implemented, this
13781 * function will have to deal with the invalidate attribute
13782 *
13783 * RETURNS
13784 * KERN_INVALID_TASK Bad task parameter
13785 * KERN_INVALID_ARGUMENT both sync and async were specified.
13786 * KERN_SUCCESS The usual.
13787 * KERN_INVALID_ADDRESS There was a hole in the region.
13788 */
13789
13790 kern_return_t
13791 vm_map_msync(
13792 vm_map_t map,
13793 vm_map_address_t address,
13794 vm_map_size_t size,
13795 vm_sync_t sync_flags)
13796 {
13797 msync_req_t msr;
13798 msync_req_t new_msr;
13799 queue_chain_t req_q; /* queue of requests for this msync */
13800 vm_map_entry_t entry;
13801 vm_map_size_t amount_left;
13802 vm_object_offset_t offset;
13803 boolean_t do_sync_req;
13804 boolean_t had_hole = FALSE;
13805 memory_object_t pager;
13806
13807 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13808 (sync_flags & VM_SYNC_SYNCHRONOUS))
13809 return(KERN_INVALID_ARGUMENT);
13810
13811 /*
13812 * align address and size on page boundaries
13813 */
13814 size = (vm_map_round_page(address + size,
13815 VM_MAP_PAGE_MASK(map)) -
13816 vm_map_trunc_page(address,
13817 VM_MAP_PAGE_MASK(map)));
13818 address = vm_map_trunc_page(address,
13819 VM_MAP_PAGE_MASK(map));
13820
13821 if (map == VM_MAP_NULL)
13822 return(KERN_INVALID_TASK);
13823
13824 if (size == 0)
13825 return(KERN_SUCCESS);
13826
13827 queue_init(&req_q);
13828 amount_left = size;
13829
13830 while (amount_left > 0) {
13831 vm_object_size_t flush_size;
13832 vm_object_t object;
13833
13834 vm_map_lock(map);
13835 if (!vm_map_lookup_entry(map,
13836 vm_map_trunc_page(
13837 address,
13838 VM_MAP_PAGE_MASK(map)),
13839 &entry)) {
13840
13841 vm_map_size_t skip;
13842
13843 /*
13844 * hole in the address map.
13845 */
13846 had_hole = TRUE;
13847
13848 /*
13849 * Check for empty map.
13850 */
13851 if (entry == vm_map_to_entry(map) &&
13852 entry->vme_next == entry) {
13853 vm_map_unlock(map);
13854 break;
13855 }
13856 /*
13857 * Check that we don't wrap and that
13858 * we have at least one real map entry.
13859 */
13860 if ((map->hdr.nentries == 0) ||
13861 (entry->vme_next->vme_start < address)) {
13862 vm_map_unlock(map);
13863 break;
13864 }
13865 /*
13866 * Move up to the next entry if needed
13867 */
13868 skip = (entry->vme_next->vme_start - address);
13869 if (skip >= amount_left)
13870 amount_left = 0;
13871 else
13872 amount_left -= skip;
13873 address = entry->vme_next->vme_start;
13874 vm_map_unlock(map);
13875 continue;
13876 }
13877
13878 offset = address - entry->vme_start;
13879
13880 /*
13881 * do we have more to flush than is contained in this
13882 * entry ?
13883 */
13884 if (amount_left + entry->vme_start + offset > entry->vme_end) {
13885 flush_size = entry->vme_end -
13886 (entry->vme_start + offset);
13887 } else {
13888 flush_size = amount_left;
13889 }
13890 amount_left -= flush_size;
13891 address += flush_size;
13892
13893 if (entry->is_sub_map == TRUE) {
13894 vm_map_t local_map;
13895 vm_map_offset_t local_offset;
13896
13897 local_map = entry->object.sub_map;
13898 local_offset = entry->offset;
13899 vm_map_unlock(map);
13900 if (vm_map_msync(
13901 local_map,
13902 local_offset,
13903 flush_size,
13904 sync_flags) == KERN_INVALID_ADDRESS) {
13905 had_hole = TRUE;
13906 }
13907 continue;
13908 }
13909 object = entry->object.vm_object;
13910
13911 /*
13912 * We can't sync this object if the object has not been
13913 * created yet
13914 */
13915 if (object == VM_OBJECT_NULL) {
13916 vm_map_unlock(map);
13917 continue;
13918 }
13919 offset += entry->offset;
13920
13921 vm_object_lock(object);
13922
13923 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
13924 int kill_pages = 0;
13925 boolean_t reusable_pages = FALSE;
13926
13927 if (sync_flags & VM_SYNC_KILLPAGES) {
13928 if (object->ref_count == 1 && !object->shadow)
13929 kill_pages = 1;
13930 else
13931 kill_pages = -1;
13932 }
13933 if (kill_pages != -1)
13934 vm_object_deactivate_pages(object, offset,
13935 (vm_object_size_t)flush_size, kill_pages, reusable_pages);
13936 vm_object_unlock(object);
13937 vm_map_unlock(map);
13938 continue;
13939 }
13940 /*
13941 * We can't sync this object if there isn't a pager.
13942 * Don't bother to sync internal objects, since there can't
13943 * be any "permanent" storage for these objects anyway.
13944 */
13945 if ((object->pager == MEMORY_OBJECT_NULL) ||
13946 (object->internal) || (object->private)) {
13947 vm_object_unlock(object);
13948 vm_map_unlock(map);
13949 continue;
13950 }
13951 /*
13952 * keep reference on the object until syncing is done
13953 */
13954 vm_object_reference_locked(object);
13955 vm_object_unlock(object);
13956
13957 vm_map_unlock(map);
13958
13959 do_sync_req = vm_object_sync(object,
13960 offset,
13961 flush_size,
13962 sync_flags & VM_SYNC_INVALIDATE,
13963 ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13964 (sync_flags & VM_SYNC_ASYNCHRONOUS)),
13965 sync_flags & VM_SYNC_SYNCHRONOUS);
13966 /*
13967 * only send a m_o_s if we returned pages or if the entry
13968 * is writable (ie dirty pages may have already been sent back)
13969 */
13970 if (!do_sync_req) {
13971 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13972 /*
13973 * clear out the clustering and read-ahead hints
13974 */
13975 vm_object_lock(object);
13976
13977 object->pages_created = 0;
13978 object->pages_used = 0;
13979 object->sequential = 0;
13980 object->last_alloc = 0;
13981
13982 vm_object_unlock(object);
13983 }
13984 vm_object_deallocate(object);
13985 continue;
13986 }
13987 msync_req_alloc(new_msr);
13988
13989 vm_object_lock(object);
13990 offset += object->paging_offset;
13991
13992 new_msr->offset = offset;
13993 new_msr->length = flush_size;
13994 new_msr->object = object;
13995 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
13996 re_iterate:
13997
13998 /*
13999 * We can't sync this object if there isn't a pager. The
14000 * pager can disappear anytime we're not holding the object
14001 * lock. So this has to be checked anytime we goto re_iterate.
14002 */
14003
14004 pager = object->pager;
14005
14006 if (pager == MEMORY_OBJECT_NULL) {
14007 vm_object_unlock(object);
14008 vm_object_deallocate(object);
14009 msync_req_free(new_msr);
14010 new_msr = NULL;
14011 continue;
14012 }
14013
14014 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14015 /*
14016 * need to check for overlapping entry, if found, wait
14017 * on overlapping msr to be done, then reiterate
14018 */
14019 msr_lock(msr);
14020 if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14021 ((offset >= msr->offset &&
14022 offset < (msr->offset + msr->length)) ||
14023 (msr->offset >= offset &&
14024 msr->offset < (offset + flush_size))))
14025 {
14026 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14027 msr_unlock(msr);
14028 vm_object_unlock(object);
14029 thread_block(THREAD_CONTINUE_NULL);
14030 vm_object_lock(object);
14031 goto re_iterate;
14032 }
14033 msr_unlock(msr);
14034 }/* queue_iterate */
14035
14036 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14037
14038 vm_object_paging_begin(object);
14039 vm_object_unlock(object);
14040
14041 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14042
14043 (void) memory_object_synchronize(
14044 pager,
14045 offset,
14046 flush_size,
14047 sync_flags & ~VM_SYNC_CONTIGUOUS);
14048
14049 vm_object_lock(object);
14050 vm_object_paging_end(object);
14051 vm_object_unlock(object);
14052 }/* while */
14053
14054 /*
14055 * wait for memory_object_sychronize_completed messages from pager(s)
14056 */
14057
14058 while (!queue_empty(&req_q)) {
14059 msr = (msync_req_t)queue_first(&req_q);
14060 msr_lock(msr);
14061 while(msr->flag != VM_MSYNC_DONE) {
14062 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14063 msr_unlock(msr);
14064 thread_block(THREAD_CONTINUE_NULL);
14065 msr_lock(msr);
14066 }/* while */
14067 queue_remove(&req_q, msr, msync_req_t, req_q);
14068 msr_unlock(msr);
14069 vm_object_deallocate(msr->object);
14070 msync_req_free(msr);
14071 }/* queue_iterate */
14072
14073 /* for proper msync() behaviour */
14074 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14075 return(KERN_INVALID_ADDRESS);
14076
14077 return(KERN_SUCCESS);
14078 }/* vm_msync */
14079
14080 /*
14081 * Routine: convert_port_entry_to_map
14082 * Purpose:
14083 * Convert from a port specifying an entry or a task
14084 * to a map. Doesn't consume the port ref; produces a map ref,
14085 * which may be null. Unlike convert_port_to_map, the
14086 * port may be task or a named entry backed.
14087 * Conditions:
14088 * Nothing locked.
14089 */
14090
14091
14092 vm_map_t
14093 convert_port_entry_to_map(
14094 ipc_port_t port)
14095 {
14096 vm_map_t map;
14097 vm_named_entry_t named_entry;
14098 uint32_t try_failed_count = 0;
14099
14100 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14101 while(TRUE) {
14102 ip_lock(port);
14103 if(ip_active(port) && (ip_kotype(port)
14104 == IKOT_NAMED_ENTRY)) {
14105 named_entry =
14106 (vm_named_entry_t)port->ip_kobject;
14107 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14108 ip_unlock(port);
14109
14110 try_failed_count++;
14111 mutex_pause(try_failed_count);
14112 continue;
14113 }
14114 named_entry->ref_count++;
14115 lck_mtx_unlock(&(named_entry)->Lock);
14116 ip_unlock(port);
14117 if ((named_entry->is_sub_map) &&
14118 (named_entry->protection
14119 & VM_PROT_WRITE)) {
14120 map = named_entry->backing.map;
14121 } else {
14122 mach_destroy_memory_entry(port);
14123 return VM_MAP_NULL;
14124 }
14125 vm_map_reference_swap(map);
14126 mach_destroy_memory_entry(port);
14127 break;
14128 }
14129 else
14130 return VM_MAP_NULL;
14131 }
14132 }
14133 else
14134 map = convert_port_to_map(port);
14135
14136 return map;
14137 }
14138
14139 /*
14140 * Routine: convert_port_entry_to_object
14141 * Purpose:
14142 * Convert from a port specifying a named entry to an
14143 * object. Doesn't consume the port ref; produces a map ref,
14144 * which may be null.
14145 * Conditions:
14146 * Nothing locked.
14147 */
14148
14149
14150 vm_object_t
14151 convert_port_entry_to_object(
14152 ipc_port_t port)
14153 {
14154 vm_object_t object = VM_OBJECT_NULL;
14155 vm_named_entry_t named_entry;
14156 uint32_t try_failed_count = 0;
14157
14158 if (IP_VALID(port) &&
14159 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14160 try_again:
14161 ip_lock(port);
14162 if (ip_active(port) &&
14163 (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14164 named_entry = (vm_named_entry_t)port->ip_kobject;
14165 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14166 ip_unlock(port);
14167 try_failed_count++;
14168 mutex_pause(try_failed_count);
14169 goto try_again;
14170 }
14171 named_entry->ref_count++;
14172 lck_mtx_unlock(&(named_entry)->Lock);
14173 ip_unlock(port);
14174 if (!(named_entry->is_sub_map) &&
14175 !(named_entry->is_pager) &&
14176 !(named_entry->is_copy) &&
14177 (named_entry->protection & VM_PROT_WRITE)) {
14178 object = named_entry->backing.object;
14179 vm_object_reference(object);
14180 }
14181 mach_destroy_memory_entry(port);
14182 }
14183 }
14184
14185 return object;
14186 }
14187
14188 /*
14189 * Export routines to other components for the things we access locally through
14190 * macros.
14191 */
14192 #undef current_map
14193 vm_map_t
14194 current_map(void)
14195 {
14196 return (current_map_fast());
14197 }
14198
14199 /*
14200 * vm_map_reference:
14201 *
14202 * Most code internal to the osfmk will go through a
14203 * macro defining this. This is always here for the
14204 * use of other kernel components.
14205 */
14206 #undef vm_map_reference
14207 void
14208 vm_map_reference(
14209 register vm_map_t map)
14210 {
14211 if (map == VM_MAP_NULL)
14212 return;
14213
14214 lck_mtx_lock(&map->s_lock);
14215 #if TASK_SWAPPER
14216 assert(map->res_count > 0);
14217 assert(map->ref_count >= map->res_count);
14218 map->res_count++;
14219 #endif
14220 map->ref_count++;
14221 lck_mtx_unlock(&map->s_lock);
14222 }
14223
14224 /*
14225 * vm_map_deallocate:
14226 *
14227 * Removes a reference from the specified map,
14228 * destroying it if no references remain.
14229 * The map should not be locked.
14230 */
14231 void
14232 vm_map_deallocate(
14233 register vm_map_t map)
14234 {
14235 unsigned int ref;
14236
14237 if (map == VM_MAP_NULL)
14238 return;
14239
14240 lck_mtx_lock(&map->s_lock);
14241 ref = --map->ref_count;
14242 if (ref > 0) {
14243 vm_map_res_deallocate(map);
14244 lck_mtx_unlock(&map->s_lock);
14245 return;
14246 }
14247 assert(map->ref_count == 0);
14248 lck_mtx_unlock(&map->s_lock);
14249
14250 #if TASK_SWAPPER
14251 /*
14252 * The map residence count isn't decremented here because
14253 * the vm_map_delete below will traverse the entire map,
14254 * deleting entries, and the residence counts on objects
14255 * and sharing maps will go away then.
14256 */
14257 #endif
14258
14259 vm_map_destroy(map, VM_MAP_NO_FLAGS);
14260 }
14261
14262
14263 void
14264 vm_map_disable_NX(vm_map_t map)
14265 {
14266 if (map == NULL)
14267 return;
14268 if (map->pmap == NULL)
14269 return;
14270
14271 pmap_disable_NX(map->pmap);
14272 }
14273
14274 void
14275 vm_map_disallow_data_exec(vm_map_t map)
14276 {
14277 if (map == NULL)
14278 return;
14279
14280 map->map_disallow_data_exec = TRUE;
14281 }
14282
14283 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
14284 * more descriptive.
14285 */
14286 void
14287 vm_map_set_32bit(vm_map_t map)
14288 {
14289 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
14290 }
14291
14292
14293 void
14294 vm_map_set_64bit(vm_map_t map)
14295 {
14296 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
14297 }
14298
14299 vm_map_offset_t
14300 vm_compute_max_offset(unsigned is64)
14301 {
14302 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
14303 }
14304
14305 uint64_t
14306 vm_map_get_max_aslr_slide_pages(vm_map_t map)
14307 {
14308 return (1 << (vm_map_is_64bit(map) ? 16 : 8));
14309 }
14310
14311 boolean_t
14312 vm_map_is_64bit(
14313 vm_map_t map)
14314 {
14315 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
14316 }
14317
14318 boolean_t
14319 vm_map_has_hard_pagezero(
14320 vm_map_t map,
14321 vm_map_offset_t pagezero_size)
14322 {
14323 /*
14324 * XXX FBDP
14325 * We should lock the VM map (for read) here but we can get away
14326 * with it for now because there can't really be any race condition:
14327 * the VM map's min_offset is changed only when the VM map is created
14328 * and when the zero page is established (when the binary gets loaded),
14329 * and this routine gets called only when the task terminates and the
14330 * VM map is being torn down, and when a new map is created via
14331 * load_machfile()/execve().
14332 */
14333 return (map->min_offset >= pagezero_size);
14334 }
14335
14336 /*
14337 * Raise a VM map's maximun offset.
14338 */
14339 kern_return_t
14340 vm_map_raise_max_offset(
14341 vm_map_t map,
14342 vm_map_offset_t new_max_offset)
14343 {
14344 kern_return_t ret;
14345
14346 vm_map_lock(map);
14347 ret = KERN_INVALID_ADDRESS;
14348
14349 if (new_max_offset >= map->max_offset) {
14350 if (!vm_map_is_64bit(map)) {
14351 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
14352 map->max_offset = new_max_offset;
14353 ret = KERN_SUCCESS;
14354 }
14355 } else {
14356 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
14357 map->max_offset = new_max_offset;
14358 ret = KERN_SUCCESS;
14359 }
14360 }
14361 }
14362
14363 vm_map_unlock(map);
14364 return ret;
14365 }
14366
14367
14368 /*
14369 * Raise a VM map's minimum offset.
14370 * To strictly enforce "page zero" reservation.
14371 */
14372 kern_return_t
14373 vm_map_raise_min_offset(
14374 vm_map_t map,
14375 vm_map_offset_t new_min_offset)
14376 {
14377 vm_map_entry_t first_entry;
14378
14379 new_min_offset = vm_map_round_page(new_min_offset,
14380 VM_MAP_PAGE_MASK(map));
14381
14382 vm_map_lock(map);
14383
14384 if (new_min_offset < map->min_offset) {
14385 /*
14386 * Can't move min_offset backwards, as that would expose
14387 * a part of the address space that was previously, and for
14388 * possibly good reasons, inaccessible.
14389 */
14390 vm_map_unlock(map);
14391 return KERN_INVALID_ADDRESS;
14392 }
14393
14394 first_entry = vm_map_first_entry(map);
14395 if (first_entry != vm_map_to_entry(map) &&
14396 first_entry->vme_start < new_min_offset) {
14397 /*
14398 * Some memory was already allocated below the new
14399 * minimun offset. It's too late to change it now...
14400 */
14401 vm_map_unlock(map);
14402 return KERN_NO_SPACE;
14403 }
14404
14405 map->min_offset = new_min_offset;
14406
14407 vm_map_unlock(map);
14408
14409 return KERN_SUCCESS;
14410 }
14411
14412 /*
14413 * Set the limit on the maximum amount of user wired memory allowed for this map.
14414 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
14415 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we
14416 * don't have to reach over to the BSD data structures.
14417 */
14418
14419 void
14420 vm_map_set_user_wire_limit(vm_map_t map,
14421 vm_size_t limit)
14422 {
14423 map->user_wire_limit = limit;
14424 }
14425
14426
14427 void vm_map_switch_protect(vm_map_t map,
14428 boolean_t val)
14429 {
14430 vm_map_lock(map);
14431 map->switch_protect=val;
14432 vm_map_unlock(map);
14433 }
14434
14435 /*
14436 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
14437 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
14438 * bump both counters.
14439 */
14440 void
14441 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
14442 {
14443 pmap_t pmap = vm_map_pmap(map);
14444
14445 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14446 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14447 }
14448
14449 void
14450 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
14451 {
14452 pmap_t pmap = vm_map_pmap(map);
14453
14454 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14455 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14456 }
14457
14458 /* Add (generate) code signature for memory range */
14459 #if CONFIG_DYNAMIC_CODE_SIGNING
14460 kern_return_t vm_map_sign(vm_map_t map,
14461 vm_map_offset_t start,
14462 vm_map_offset_t end)
14463 {
14464 vm_map_entry_t entry;
14465 vm_page_t m;
14466 vm_object_t object;
14467
14468 /*
14469 * Vet all the input parameters and current type and state of the
14470 * underlaying object. Return with an error if anything is amiss.
14471 */
14472 if (map == VM_MAP_NULL)
14473 return(KERN_INVALID_ARGUMENT);
14474
14475 vm_map_lock_read(map);
14476
14477 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
14478 /*
14479 * Must pass a valid non-submap address.
14480 */
14481 vm_map_unlock_read(map);
14482 return(KERN_INVALID_ADDRESS);
14483 }
14484
14485 if((entry->vme_start > start) || (entry->vme_end < end)) {
14486 /*
14487 * Map entry doesn't cover the requested range. Not handling
14488 * this situation currently.
14489 */
14490 vm_map_unlock_read(map);
14491 return(KERN_INVALID_ARGUMENT);
14492 }
14493
14494 object = entry->object.vm_object;
14495 if (object == VM_OBJECT_NULL) {
14496 /*
14497 * Object must already be present or we can't sign.
14498 */
14499 vm_map_unlock_read(map);
14500 return KERN_INVALID_ARGUMENT;
14501 }
14502
14503 vm_object_lock(object);
14504 vm_map_unlock_read(map);
14505
14506 while(start < end) {
14507 uint32_t refmod;
14508
14509 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
14510 if (m==VM_PAGE_NULL) {
14511 /* shoud we try to fault a page here? we can probably
14512 * demand it exists and is locked for this request */
14513 vm_object_unlock(object);
14514 return KERN_FAILURE;
14515 }
14516 /* deal with special page status */
14517 if (m->busy ||
14518 (m->unusual && (m->error || m->restart || m->private || m->absent))) {
14519 vm_object_unlock(object);
14520 return KERN_FAILURE;
14521 }
14522
14523 /* Page is OK... now "validate" it */
14524 /* This is the place where we'll call out to create a code
14525 * directory, later */
14526 m->cs_validated = TRUE;
14527
14528 /* The page is now "clean" for codesigning purposes. That means
14529 * we don't consider it as modified (wpmapped) anymore. But
14530 * we'll disconnect the page so we note any future modification
14531 * attempts. */
14532 m->wpmapped = FALSE;
14533 refmod = pmap_disconnect(m->phys_page);
14534
14535 /* Pull the dirty status from the pmap, since we cleared the
14536 * wpmapped bit */
14537 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
14538 SET_PAGE_DIRTY(m, FALSE);
14539 }
14540
14541 /* On to the next page */
14542 start += PAGE_SIZE;
14543 }
14544 vm_object_unlock(object);
14545
14546 return KERN_SUCCESS;
14547 }
14548 #endif
14549
14550 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
14551 {
14552 vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
14553 vm_map_entry_t next_entry;
14554 kern_return_t kr = KERN_SUCCESS;
14555 vm_map_t zap_map;
14556
14557 vm_map_lock(map);
14558
14559 /*
14560 * We use a "zap_map" to avoid having to unlock
14561 * the "map" in vm_map_delete().
14562 */
14563 zap_map = vm_map_create(PMAP_NULL,
14564 map->min_offset,
14565 map->max_offset,
14566 map->hdr.entries_pageable);
14567
14568 if (zap_map == VM_MAP_NULL) {
14569 return KERN_RESOURCE_SHORTAGE;
14570 }
14571
14572 vm_map_set_page_shift(zap_map,
14573 VM_MAP_PAGE_SHIFT(map));
14574
14575 for (entry = vm_map_first_entry(map);
14576 entry != vm_map_to_entry(map);
14577 entry = next_entry) {
14578 next_entry = entry->vme_next;
14579
14580 if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE)
14581 && (entry->object.vm_object->ref_count == 1)) {
14582
14583 *reclaimed_resident += entry->object.vm_object->resident_page_count;
14584 *reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager);
14585
14586 (void)vm_map_delete(map,
14587 entry->vme_start,
14588 entry->vme_end,
14589 VM_MAP_REMOVE_SAVE_ENTRIES,
14590 zap_map);
14591 }
14592 }
14593
14594 vm_map_unlock(map);
14595
14596 /*
14597 * Get rid of the "zap_maps" and all the map entries that
14598 * they may still contain.
14599 */
14600 if (zap_map != VM_MAP_NULL) {
14601 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14602 zap_map = VM_MAP_NULL;
14603 }
14604
14605 return kr;
14606 }
14607
14608 #if CONFIG_FREEZE
14609
14610 kern_return_t vm_map_freeze_walk(
14611 vm_map_t map,
14612 unsigned int *purgeable_count,
14613 unsigned int *wired_count,
14614 unsigned int *clean_count,
14615 unsigned int *dirty_count,
14616 unsigned int dirty_budget,
14617 boolean_t *has_shared)
14618 {
14619 vm_map_entry_t entry;
14620
14621 vm_map_lock_read(map);
14622
14623 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14624 *has_shared = FALSE;
14625
14626 for (entry = vm_map_first_entry(map);
14627 entry != vm_map_to_entry(map);
14628 entry = entry->vme_next) {
14629 unsigned int purgeable, clean, dirty, wired;
14630 boolean_t shared;
14631
14632 if ((entry->object.vm_object == 0) ||
14633 (entry->is_sub_map) ||
14634 (entry->object.vm_object->phys_contiguous)) {
14635 continue;
14636 }
14637
14638 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
14639
14640 *purgeable_count += purgeable;
14641 *wired_count += wired;
14642 *clean_count += clean;
14643 *dirty_count += dirty;
14644
14645 if (shared) {
14646 *has_shared = TRUE;
14647 }
14648
14649 /* Adjust pageout budget and finish up if reached */
14650 if (dirty_budget) {
14651 dirty_budget -= dirty;
14652 if (dirty_budget == 0) {
14653 break;
14654 }
14655 }
14656 }
14657
14658 vm_map_unlock_read(map);
14659
14660 return KERN_SUCCESS;
14661 }
14662
14663 kern_return_t vm_map_freeze(
14664 vm_map_t map,
14665 unsigned int *purgeable_count,
14666 unsigned int *wired_count,
14667 unsigned int *clean_count,
14668 unsigned int *dirty_count,
14669 unsigned int dirty_budget,
14670 boolean_t *has_shared)
14671 {
14672 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
14673 kern_return_t kr = KERN_SUCCESS;
14674 boolean_t default_freezer_active = TRUE;
14675
14676 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14677 *has_shared = FALSE;
14678
14679 /*
14680 * We need the exclusive lock here so that we can
14681 * block any page faults or lookups while we are
14682 * in the middle of freezing this vm map.
14683 */
14684 vm_map_lock(map);
14685
14686 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14687 default_freezer_active = FALSE;
14688 }
14689
14690 if (default_freezer_active) {
14691 if (map->default_freezer_handle == NULL) {
14692 map->default_freezer_handle = default_freezer_handle_allocate();
14693 }
14694
14695 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
14696 /*
14697 * Can happen if default_freezer_handle passed in is NULL
14698 * Or, a table has already been allocated and associated
14699 * with this handle, i.e. the map is already frozen.
14700 */
14701 goto done;
14702 }
14703 }
14704
14705 for (entry2 = vm_map_first_entry(map);
14706 entry2 != vm_map_to_entry(map);
14707 entry2 = entry2->vme_next) {
14708
14709 vm_object_t src_object = entry2->object.vm_object;
14710
14711 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
14712 /* If eligible, scan the entry, moving eligible pages over to our parent object */
14713 if (default_freezer_active) {
14714 unsigned int purgeable, clean, dirty, wired;
14715 boolean_t shared;
14716
14717 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
14718 src_object, map->default_freezer_handle);
14719
14720 *purgeable_count += purgeable;
14721 *wired_count += wired;
14722 *clean_count += clean;
14723 *dirty_count += dirty;
14724
14725 /* Adjust pageout budget and finish up if reached */
14726 if (dirty_budget) {
14727 dirty_budget -= dirty;
14728 if (dirty_budget == 0) {
14729 break;
14730 }
14731 }
14732
14733 if (shared) {
14734 *has_shared = TRUE;
14735 }
14736 } else {
14737 /*
14738 * To the compressor.
14739 */
14740 if (entry2->object.vm_object->internal == TRUE) {
14741 vm_object_pageout(entry2->object.vm_object);
14742 }
14743 }
14744 }
14745 }
14746
14747 if (default_freezer_active) {
14748 /* Finally, throw out the pages to swap */
14749 default_freezer_pageout(map->default_freezer_handle);
14750 }
14751
14752 done:
14753 vm_map_unlock(map);
14754
14755 return kr;
14756 }
14757
14758 kern_return_t
14759 vm_map_thaw(
14760 vm_map_t map)
14761 {
14762 kern_return_t kr = KERN_SUCCESS;
14763
14764 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14765 /*
14766 * We will on-demand thaw in the presence of the compressed pager.
14767 */
14768 return kr;
14769 }
14770
14771 vm_map_lock(map);
14772
14773 if (map->default_freezer_handle == NULL) {
14774 /*
14775 * This map is not in a frozen state.
14776 */
14777 kr = KERN_FAILURE;
14778 goto out;
14779 }
14780
14781 kr = default_freezer_unpack(map->default_freezer_handle);
14782 out:
14783 vm_map_unlock(map);
14784
14785 return kr;
14786 }
14787 #endif
14788
14789 /*
14790 * vm_map_entry_should_cow_for_true_share:
14791 *
14792 * Determines if the map entry should be clipped and setup for copy-on-write
14793 * to avoid applying "true_share" to a large VM object when only a subset is
14794 * targeted.
14795 *
14796 * For now, we target only the map entries created for the Objective C
14797 * Garbage Collector, which initially have the following properties:
14798 * - alias == VM_MEMORY_MALLOC
14799 * - wired_count == 0
14800 * - !needs_copy
14801 * and a VM object with:
14802 * - internal
14803 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14804 * - !true_share
14805 * - vo_size == ANON_CHUNK_SIZE
14806 */
14807 boolean_t
14808 vm_map_entry_should_cow_for_true_share(
14809 vm_map_entry_t entry)
14810 {
14811 vm_object_t object;
14812
14813 if (entry->is_sub_map) {
14814 /* entry does not point at a VM object */
14815 return FALSE;
14816 }
14817
14818 if (entry->needs_copy) {
14819 /* already set for copy_on_write: done! */
14820 return FALSE;
14821 }
14822
14823 if (entry->alias != VM_MEMORY_MALLOC &&
14824 entry->alias != VM_MEMORY_MALLOC_SMALL) {
14825 /* not a malloc heap or Obj-C Garbage Collector heap */
14826 return FALSE;
14827 }
14828
14829 if (entry->wired_count) {
14830 /* wired: can't change the map entry... */
14831 vm_counters.should_cow_but_wired++;
14832 return FALSE;
14833 }
14834
14835 object = entry->object.vm_object;
14836
14837 if (object == VM_OBJECT_NULL) {
14838 /* no object yet... */
14839 return FALSE;
14840 }
14841
14842 if (!object->internal) {
14843 /* not an internal object */
14844 return FALSE;
14845 }
14846
14847 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14848 /* not the default copy strategy */
14849 return FALSE;
14850 }
14851
14852 if (object->true_share) {
14853 /* already true_share: too late to avoid it */
14854 return FALSE;
14855 }
14856
14857 if (entry->alias == VM_MEMORY_MALLOC &&
14858 object->vo_size != ANON_CHUNK_SIZE) {
14859 /* ... not an object created for the ObjC Garbage Collector */
14860 return FALSE;
14861 }
14862
14863 if (entry->alias == VM_MEMORY_MALLOC_SMALL &&
14864 object->vo_size != 2048 * 4096) {
14865 /* ... not a "MALLOC_SMALL" heap */
14866 return FALSE;
14867 }
14868
14869 /*
14870 * All the criteria match: we have a large object being targeted for "true_share".
14871 * To limit the adverse side-effects linked with "true_share", tell the caller to
14872 * try and avoid setting up the entire object for "true_share" by clipping the
14873 * targeted range and setting it up for copy-on-write.
14874 */
14875 return TRUE;
14876 }
14877
14878 vm_map_offset_t
14879 vm_map_round_page_mask(
14880 vm_map_offset_t offset,
14881 vm_map_offset_t mask)
14882 {
14883 return VM_MAP_ROUND_PAGE(offset, mask);
14884 }
14885
14886 vm_map_offset_t
14887 vm_map_trunc_page_mask(
14888 vm_map_offset_t offset,
14889 vm_map_offset_t mask)
14890 {
14891 return VM_MAP_TRUNC_PAGE(offset, mask);
14892 }
14893
14894 int
14895 vm_map_page_shift(
14896 vm_map_t map)
14897 {
14898 return VM_MAP_PAGE_SHIFT(map);
14899 }
14900
14901 int
14902 vm_map_page_size(
14903 vm_map_t map)
14904 {
14905 return VM_MAP_PAGE_SIZE(map);
14906 }
14907
14908 int
14909 vm_map_page_mask(
14910 vm_map_t map)
14911 {
14912 return VM_MAP_PAGE_MASK(map);
14913 }
14914
14915 kern_return_t
14916 vm_map_set_page_shift(
14917 vm_map_t map,
14918 int pageshift)
14919 {
14920 if (map->hdr.nentries != 0) {
14921 /* too late to change page size */
14922 return KERN_FAILURE;
14923 }
14924
14925 map->hdr.page_shift = pageshift;
14926
14927 return KERN_SUCCESS;
14928 }
14929
14930 int
14931 vm_map_purge(
14932 vm_map_t map)
14933 {
14934 int num_object_purged;
14935 vm_map_entry_t entry;
14936 vm_map_offset_t next_address;
14937 vm_object_t object;
14938 int state;
14939 kern_return_t kr;
14940
14941 num_object_purged = 0;
14942
14943 vm_map_lock_read(map);
14944 entry = vm_map_first_entry(map);
14945 while (entry != vm_map_to_entry(map)) {
14946 if (entry->is_sub_map) {
14947 goto next;
14948 }
14949 if (! (entry->protection & VM_PROT_WRITE)) {
14950 goto next;
14951 }
14952 object = entry->object.vm_object;
14953 if (object == VM_OBJECT_NULL) {
14954 goto next;
14955 }
14956 if (object->purgable != VM_PURGABLE_VOLATILE) {
14957 goto next;
14958 }
14959
14960 vm_object_lock(object);
14961 #if 00
14962 if (entry->offset != 0 ||
14963 (entry->vme_end - entry->vme_start) != object->vo_size) {
14964 vm_object_unlock(object);
14965 goto next;
14966 }
14967 #endif
14968 next_address = entry->vme_end;
14969 vm_map_unlock_read(map);
14970 state = VM_PURGABLE_EMPTY;
14971 kr = vm_object_purgable_control(object,
14972 VM_PURGABLE_SET_STATE,
14973 &state);
14974 if (kr == KERN_SUCCESS) {
14975 num_object_purged++;
14976 }
14977 vm_object_unlock(object);
14978
14979 vm_map_lock_read(map);
14980 if (vm_map_lookup_entry(map, next_address, &entry)) {
14981 continue;
14982 }
14983 next:
14984 entry = entry->vme_next;
14985 }
14986 vm_map_unlock_read(map);
14987
14988 return num_object_purged;
14989 }
14990
14991 kern_return_t
14992 vm_map_query_volatile(
14993 vm_map_t map,
14994 mach_vm_size_t *volatile_virtual_size_p,
14995 mach_vm_size_t *volatile_resident_size_p,
14996 mach_vm_size_t *volatile_pmap_size_p)
14997 {
14998 mach_vm_size_t volatile_virtual_size;
14999 mach_vm_size_t volatile_resident_count;
15000 mach_vm_size_t volatile_pmap_count;
15001 mach_vm_size_t resident_count;
15002 vm_map_entry_t entry;
15003 vm_object_t object;
15004
15005 /* map should be locked by caller */
15006
15007 volatile_virtual_size = 0;
15008 volatile_resident_count = 0;
15009 volatile_pmap_count = 0;
15010
15011 for (entry = vm_map_first_entry(map);
15012 entry != vm_map_to_entry(map);
15013 entry = entry->vme_next) {
15014 if (entry->is_sub_map) {
15015 continue;
15016 }
15017 if (! (entry->protection & VM_PROT_WRITE)) {
15018 continue;
15019 }
15020 object = entry->object.vm_object;
15021 if (object == VM_OBJECT_NULL) {
15022 continue;
15023 }
15024 if (object->purgable != VM_PURGABLE_VOLATILE) {
15025 continue;
15026 }
15027 if (entry->offset != 0) {
15028 /*
15029 * If the map entry has been split and the object now
15030 * appears several times in the VM map, we don't want
15031 * to count the object's resident_page_count more than
15032 * once. We count it only for the first one, starting
15033 * at offset 0 and ignore the other VM map entries.
15034 */
15035 continue;
15036 }
15037 resident_count = object->resident_page_count;
15038 if ((entry->offset / PAGE_SIZE) >= resident_count) {
15039 resident_count = 0;
15040 } else {
15041 resident_count -= (entry->offset / PAGE_SIZE);
15042 }
15043
15044 volatile_virtual_size += entry->vme_end - entry->vme_start;
15045 volatile_resident_count += resident_count;
15046 volatile_pmap_count += pmap_query_resident(map->pmap,
15047 entry->vme_start,
15048 entry->vme_end);
15049 }
15050
15051 /* map is still locked on return */
15052
15053 *volatile_virtual_size_p = volatile_virtual_size;
15054 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
15055 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
15056
15057 return KERN_SUCCESS;
15058 }
15059
15060 #if VM_SCAN_FOR_SHADOW_CHAIN
15061 int vm_map_shadow_max(vm_map_t map);
15062 int vm_map_shadow_max(
15063 vm_map_t map)
15064 {
15065 int shadows, shadows_max;
15066 vm_map_entry_t entry;
15067 vm_object_t object, next_object;
15068
15069 if (map == NULL)
15070 return 0;
15071
15072 shadows_max = 0;
15073
15074 vm_map_lock_read(map);
15075
15076 for (entry = vm_map_first_entry(map);
15077 entry != vm_map_to_entry(map);
15078 entry = entry->vme_next) {
15079 if (entry->is_sub_map) {
15080 continue;
15081 }
15082 object = entry->object.vm_object;
15083 if (object == NULL) {
15084 continue;
15085 }
15086 vm_object_lock_shared(object);
15087 for (shadows = 0;
15088 object->shadow != NULL;
15089 shadows++, object = next_object) {
15090 next_object = object->shadow;
15091 vm_object_lock_shared(next_object);
15092 vm_object_unlock(object);
15093 }
15094 vm_object_unlock(object);
15095 if (shadows > shadows_max) {
15096 shadows_max = shadows;
15097 }
15098 }
15099
15100 vm_map_unlock_read(map);
15101
15102 return shadows_max;
15103 }
15104 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */