]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_object.c
7d04a7adfd791c19251d46e4a51819461bfe41a6
[apple/xnu.git] / osfmk / vm / vm_object.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * @OSF_COPYRIGHT@
32 */
33 /*
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
36 * All Rights Reserved.
37 *
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
43 *
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 *
48 * Carnegie Mellon requests users of this software to return to
49 *
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
54 *
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
57 */
58 /*
59 */
60 /*
61 * File: vm/vm_object.c
62 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 *
64 * Virtual memory object module.
65 */
66
67 #include <mach_pagemap.h>
68 #include <task_swapper.h>
69
70 #include <mach/mach_types.h>
71 #include <mach/memory_object.h>
72 #include <mach/memory_object_default.h>
73 #include <mach/memory_object_control_server.h>
74 #include <mach/vm_param.h>
75
76 #include <ipc/ipc_types.h>
77 #include <ipc/ipc_port.h>
78
79 #include <kern/kern_types.h>
80 #include <kern/assert.h>
81 #include <kern/lock.h>
82 #include <kern/queue.h>
83 #include <kern/xpr.h>
84 #include <kern/zalloc.h>
85 #include <kern/host.h>
86 #include <kern/host_statistics.h>
87 #include <kern/processor.h>
88 #include <kern/misc_protos.h>
89
90 #include <vm/memory_object.h>
91 #include <vm/vm_fault.h>
92 #include <vm/vm_map.h>
93 #include <vm/vm_object.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pageout.h>
96 #include <vm/vm_protos.h>
97
98 /*
99 * Virtual memory objects maintain the actual data
100 * associated with allocated virtual memory. A given
101 * page of memory exists within exactly one object.
102 *
103 * An object is only deallocated when all "references"
104 * are given up.
105 *
106 * Associated with each object is a list of all resident
107 * memory pages belonging to that object; this list is
108 * maintained by the "vm_page" module, but locked by the object's
109 * lock.
110 *
111 * Each object also records the memory object reference
112 * that is used by the kernel to request and write
113 * back data (the memory object, field "pager"), etc...
114 *
115 * Virtual memory objects are allocated to provide
116 * zero-filled memory (vm_allocate) or map a user-defined
117 * memory object into a virtual address space (vm_map).
118 *
119 * Virtual memory objects that refer to a user-defined
120 * memory object are called "permanent", because all changes
121 * made in virtual memory are reflected back to the
122 * memory manager, which may then store it permanently.
123 * Other virtual memory objects are called "temporary",
124 * meaning that changes need be written back only when
125 * necessary to reclaim pages, and that storage associated
126 * with the object can be discarded once it is no longer
127 * mapped.
128 *
129 * A permanent memory object may be mapped into more
130 * than one virtual address space. Moreover, two threads
131 * may attempt to make the first mapping of a memory
132 * object concurrently. Only one thread is allowed to
133 * complete this mapping; all others wait for the
134 * "pager_initialized" field is asserted, indicating
135 * that the first thread has initialized all of the
136 * necessary fields in the virtual memory object structure.
137 *
138 * The kernel relies on a *default memory manager* to
139 * provide backing storage for the zero-filled virtual
140 * memory objects. The pager memory objects associated
141 * with these temporary virtual memory objects are only
142 * requested from the default memory manager when it
143 * becomes necessary. Virtual memory objects
144 * that depend on the default memory manager are called
145 * "internal". The "pager_created" field is provided to
146 * indicate whether these ports have ever been allocated.
147 *
148 * The kernel may also create virtual memory objects to
149 * hold changed pages after a copy-on-write operation.
150 * In this case, the virtual memory object (and its
151 * backing storage -- its memory object) only contain
152 * those pages that have been changed. The "shadow"
153 * field refers to the virtual memory object that contains
154 * the remainder of the contents. The "shadow_offset"
155 * field indicates where in the "shadow" these contents begin.
156 * The "copy" field refers to a virtual memory object
157 * to which changed pages must be copied before changing
158 * this object, in order to implement another form
159 * of copy-on-write optimization.
160 *
161 * The virtual memory object structure also records
162 * the attributes associated with its memory object.
163 * The "pager_ready", "can_persist" and "copy_strategy"
164 * fields represent those attributes. The "cached_list"
165 * field is used in the implementation of the persistence
166 * attribute.
167 *
168 * ZZZ Continue this comment.
169 */
170
171 /* Forward declarations for internal functions. */
172 static kern_return_t vm_object_terminate(
173 vm_object_t object);
174
175 extern void vm_object_remove(
176 vm_object_t object);
177
178 static vm_object_t vm_object_cache_trim(
179 boolean_t called_from_vm_object_deallocate);
180
181 static void vm_object_deactivate_all_pages(
182 vm_object_t object);
183
184 static kern_return_t vm_object_copy_call(
185 vm_object_t src_object,
186 vm_object_offset_t src_offset,
187 vm_object_size_t size,
188 vm_object_t *_result_object);
189
190 static void vm_object_do_collapse(
191 vm_object_t object,
192 vm_object_t backing_object);
193
194 static void vm_object_do_bypass(
195 vm_object_t object,
196 vm_object_t backing_object);
197
198 static void vm_object_release_pager(
199 memory_object_t pager);
200
201 static zone_t vm_object_zone; /* vm backing store zone */
202
203 /*
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
206 */
207 static struct vm_object kernel_object_store;
208 vm_object_t kernel_object;
209
210 /*
211 * The submap object is used as a placeholder for vm_map_submap
212 * operations. The object is declared in vm_map.c because it
213 * is exported by the vm_map module. The storage is declared
214 * here because it must be initialized here.
215 */
216 static struct vm_object vm_submap_object_store;
217
218 /*
219 * Virtual memory objects are initialized from
220 * a template (see vm_object_allocate).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see _vm_object_allocate()).
225 */
226 static struct vm_object vm_object_template;
227
228 /*
229 * Virtual memory objects that are not referenced by
230 * any address maps, but that are allowed to persist
231 * (an attribute specified by the associated memory manager),
232 * are kept in a queue (vm_object_cached_list).
233 *
234 * When an object from this queue is referenced again,
235 * for example to make another address space mapping,
236 * it must be removed from the queue. That is, the
237 * queue contains *only* objects with zero references.
238 *
239 * The kernel may choose to terminate objects from this
240 * queue in order to reclaim storage. The current policy
241 * is to permit a fixed maximum number of unreferenced
242 * objects (vm_object_cached_max).
243 *
244 * A spin lock (accessed by routines
245 * vm_object_cache_{lock,lock_try,unlock}) governs the
246 * object cache. It must be held when objects are
247 * added to or removed from the cache (in vm_object_terminate).
248 * The routines that acquire a reference to a virtual
249 * memory object based on one of the memory object ports
250 * must also lock the cache.
251 *
252 * Ideally, the object cache should be more isolated
253 * from the reference mechanism, so that the lock need
254 * not be held to make simple references.
255 */
256 static queue_head_t vm_object_cached_list;
257 static int vm_object_cached_count=0;
258 static int vm_object_cached_high; /* highest # cached objects */
259 static int vm_object_cached_max = 512; /* may be patched*/
260
261 static decl_mutex_data(,vm_object_cached_lock_data)
262
263 #define vm_object_cache_lock() \
264 mutex_lock(&vm_object_cached_lock_data)
265 #define vm_object_cache_lock_try() \
266 mutex_try(&vm_object_cached_lock_data)
267 #define vm_object_cache_unlock() \
268 mutex_unlock(&vm_object_cached_lock_data)
269
270 #define VM_OBJECT_HASH_COUNT 1024
271 static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT];
272 static struct zone *vm_object_hash_zone;
273
274 struct vm_object_hash_entry {
275 queue_chain_t hash_link; /* hash chain link */
276 memory_object_t pager; /* pager we represent */
277 vm_object_t object; /* corresponding object */
278 boolean_t waiting; /* someone waiting for
279 * termination */
280 };
281
282 typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
283 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
284
285 #define VM_OBJECT_HASH_SHIFT 8
286 #define vm_object_hash(pager) \
287 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
288
289 void vm_object_hash_entry_free(
290 vm_object_hash_entry_t entry);
291
292 /*
293 * vm_object_hash_lookup looks up a pager in the hashtable
294 * and returns the corresponding entry, with optional removal.
295 */
296
297 static vm_object_hash_entry_t
298 vm_object_hash_lookup(
299 memory_object_t pager,
300 boolean_t remove_entry)
301 {
302 register queue_t bucket;
303 register vm_object_hash_entry_t entry;
304
305 bucket = &vm_object_hashtable[vm_object_hash(pager)];
306
307 entry = (vm_object_hash_entry_t)queue_first(bucket);
308 while (!queue_end(bucket, (queue_entry_t)entry)) {
309 if (entry->pager == pager && !remove_entry)
310 return(entry);
311 else if (entry->pager == pager) {
312 queue_remove(bucket, entry,
313 vm_object_hash_entry_t, hash_link);
314 return(entry);
315 }
316
317 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link);
318 }
319
320 return(VM_OBJECT_HASH_ENTRY_NULL);
321 }
322
323 /*
324 * vm_object_hash_enter enters the specified
325 * pager / cache object association in the hashtable.
326 */
327
328 static void
329 vm_object_hash_insert(
330 vm_object_hash_entry_t entry)
331 {
332 register queue_t bucket;
333
334 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)];
335
336 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link);
337 }
338
339 static vm_object_hash_entry_t
340 vm_object_hash_entry_alloc(
341 memory_object_t pager)
342 {
343 vm_object_hash_entry_t entry;
344
345 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone);
346 entry->pager = pager;
347 entry->object = VM_OBJECT_NULL;
348 entry->waiting = FALSE;
349
350 return(entry);
351 }
352
353 void
354 vm_object_hash_entry_free(
355 vm_object_hash_entry_t entry)
356 {
357 zfree(vm_object_hash_zone, entry);
358 }
359
360 /*
361 * vm_object_allocate:
362 *
363 * Returns a new object with the given size.
364 */
365
366 __private_extern__ void
367 _vm_object_allocate(
368 vm_object_size_t size,
369 vm_object_t object)
370 {
371 XPR(XPR_VM_OBJECT,
372 "vm_object_allocate, object 0x%X size 0x%X\n",
373 (integer_t)object, size, 0,0,0);
374
375 *object = vm_object_template;
376 queue_init(&object->memq);
377 queue_init(&object->msr_q);
378 #ifdef UPL_DEBUG
379 queue_init(&object->uplq);
380 #endif /* UPL_DEBUG */
381 vm_object_lock_init(object);
382 object->size = size;
383 }
384
385 __private_extern__ vm_object_t
386 vm_object_allocate(
387 vm_object_size_t size)
388 {
389 register vm_object_t object;
390
391 object = (vm_object_t) zalloc(vm_object_zone);
392
393 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
394
395 if (object != VM_OBJECT_NULL)
396 _vm_object_allocate(size, object);
397
398 return object;
399 }
400
401 /*
402 * vm_object_bootstrap:
403 *
404 * Initialize the VM objects module.
405 */
406 __private_extern__ void
407 vm_object_bootstrap(void)
408 {
409 register int i;
410
411 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object),
412 round_page_32(512*1024),
413 round_page_32(12*1024),
414 "vm objects");
415
416 queue_init(&vm_object_cached_list);
417 mutex_init(&vm_object_cached_lock_data, 0);
418
419 vm_object_hash_zone =
420 zinit((vm_size_t) sizeof (struct vm_object_hash_entry),
421 round_page_32(512*1024),
422 round_page_32(12*1024),
423 "vm object hash entries");
424
425 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
426 queue_init(&vm_object_hashtable[i]);
427
428 /*
429 * Fill in a template object, for quick initialization
430 */
431
432 /* memq; Lock; init after allocation */
433 vm_object_template.size = 0;
434 vm_object_template.memq_hint = VM_PAGE_NULL;
435 vm_object_template.ref_count = 1;
436 #if TASK_SWAPPER
437 vm_object_template.res_count = 1;
438 #endif /* TASK_SWAPPER */
439 vm_object_template.resident_page_count = 0;
440 vm_object_template.copy = VM_OBJECT_NULL;
441 vm_object_template.shadow = VM_OBJECT_NULL;
442 vm_object_template.shadow_offset = (vm_object_offset_t) 0;
443 vm_object_template.cow_hint = ~(vm_offset_t)0;
444 vm_object_template.true_share = FALSE;
445
446 vm_object_template.pager = MEMORY_OBJECT_NULL;
447 vm_object_template.paging_offset = 0;
448 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL;
449 /* msr_q; init after allocation */
450
451 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC;
452 vm_object_template.absent_count = 0;
453 vm_object_template.paging_in_progress = 0;
454
455 /* Begin bitfields */
456 vm_object_template.all_wanted = 0; /* all bits FALSE */
457 vm_object_template.pager_created = FALSE;
458 vm_object_template.pager_initialized = FALSE;
459 vm_object_template.pager_ready = FALSE;
460 vm_object_template.pager_trusted = FALSE;
461 vm_object_template.can_persist = FALSE;
462 vm_object_template.internal = TRUE;
463 vm_object_template.temporary = TRUE;
464 vm_object_template.private = FALSE;
465 vm_object_template.pageout = FALSE;
466 vm_object_template.alive = TRUE;
467 vm_object_template.purgable = VM_OBJECT_NONPURGABLE;
468 vm_object_template.silent_overwrite = FALSE;
469 vm_object_template.advisory_pageout = FALSE;
470 vm_object_template.shadowed = FALSE;
471 vm_object_template.terminating = FALSE;
472 vm_object_template.shadow_severed = FALSE;
473 vm_object_template.phys_contiguous = FALSE;
474 vm_object_template.nophyscache = FALSE;
475 /* End bitfields */
476
477 /* cache bitfields */
478 vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
479
480 /* cached_list; init after allocation */
481 vm_object_template.last_alloc = (vm_object_offset_t) 0;
482 vm_object_template.cluster_size = 0;
483 #if MACH_PAGEMAP
484 vm_object_template.existence_map = VM_EXTERNAL_NULL;
485 #endif /* MACH_PAGEMAP */
486 #if MACH_ASSERT
487 vm_object_template.paging_object = VM_OBJECT_NULL;
488 #endif /* MACH_ASSERT */
489
490 /*
491 * Initialize the "kernel object"
492 */
493
494 kernel_object = &kernel_object_store;
495
496 /*
497 * Note that in the following size specifications, we need to add 1 because
498 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
499 */
500
501 #ifdef ppc
502 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
503 kernel_object);
504 #else
505 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
506 kernel_object);
507 #endif
508 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
509
510 /*
511 * Initialize the "submap object". Make it as large as the
512 * kernel object so that no limit is imposed on submap sizes.
513 */
514
515 vm_submap_object = &vm_submap_object_store;
516 #ifdef ppc
517 _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1,
518 vm_submap_object);
519 #else
520 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1,
521 vm_submap_object);
522 #endif
523 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
524
525 /*
526 * Create an "extra" reference to this object so that we never
527 * try to deallocate it; zfree doesn't like to be called with
528 * non-zone memory.
529 */
530 vm_object_reference(vm_submap_object);
531
532 #if MACH_PAGEMAP
533 vm_external_module_initialize();
534 #endif /* MACH_PAGEMAP */
535 }
536
537 __private_extern__ void
538 vm_object_init(void)
539 {
540 /*
541 * Finish initializing the kernel object.
542 */
543 }
544
545 /* remove the typedef below when emergency work-around is taken out */
546 typedef struct vnode_pager {
547 memory_object_t pager;
548 memory_object_t pager_handle; /* pager */
549 memory_object_control_t control_handle; /* memory object's control handle */
550 void *vnode_handle; /* vnode handle */
551 } *vnode_pager_t;
552
553 #define MIGHT_NOT_CACHE_SHADOWS 1
554 #if MIGHT_NOT_CACHE_SHADOWS
555 static int cache_shadows = TRUE;
556 #endif /* MIGHT_NOT_CACHE_SHADOWS */
557
558 /*
559 * vm_object_deallocate:
560 *
561 * Release a reference to the specified object,
562 * gained either through a vm_object_allocate
563 * or a vm_object_reference call. When all references
564 * are gone, storage associated with this object
565 * may be relinquished.
566 *
567 * No object may be locked.
568 */
569 __private_extern__ void
570 vm_object_deallocate(
571 register vm_object_t object)
572 {
573 boolean_t retry_cache_trim = FALSE;
574 vm_object_t shadow = VM_OBJECT_NULL;
575
576 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
577 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
578
579
580 while (object != VM_OBJECT_NULL) {
581
582 /*
583 * The cache holds a reference (uncounted) to
584 * the object; we must lock it before removing
585 * the object.
586 */
587 for (;;) {
588 vm_object_cache_lock();
589
590 /*
591 * if we try to take a regular lock here
592 * we risk deadlocking against someone
593 * holding a lock on this object while
594 * trying to vm_object_deallocate a different
595 * object
596 */
597 if (vm_object_lock_try(object))
598 break;
599 vm_object_cache_unlock();
600 mutex_pause(); /* wait a bit */
601 }
602 assert(object->ref_count > 0);
603
604 /*
605 * If the object has a named reference, and only
606 * that reference would remain, inform the pager
607 * about the last "mapping" reference going away.
608 */
609 if ((object->ref_count == 2) && (object->named)) {
610 memory_object_t pager = object->pager;
611
612 /* Notify the Pager that there are no */
613 /* more mappers for this object */
614
615 if (pager != MEMORY_OBJECT_NULL) {
616 vm_object_unlock(object);
617 vm_object_cache_unlock();
618
619 memory_object_unmap(pager);
620
621 for (;;) {
622 vm_object_cache_lock();
623
624 /*
625 * if we try to take a regular lock here
626 * we risk deadlocking against someone
627 * holding a lock on this object while
628 * trying to vm_object_deallocate a different
629 * object
630 */
631 if (vm_object_lock_try(object))
632 break;
633 vm_object_cache_unlock();
634 mutex_pause(); /* wait a bit */
635 }
636 assert(object->ref_count > 0);
637 }
638 }
639
640 /*
641 * Lose the reference. If other references
642 * remain, then we are done, unless we need
643 * to retry a cache trim.
644 * If it is the last reference, then keep it
645 * until any pending initialization is completed.
646 */
647
648 /* if the object is terminating, it cannot go into */
649 /* the cache and we obviously should not call */
650 /* terminate again. */
651
652 if ((object->ref_count > 1) || object->terminating) {
653 object->ref_count--;
654 vm_object_res_deallocate(object);
655 vm_object_cache_unlock();
656
657 if (object->ref_count == 1 &&
658 object->shadow != VM_OBJECT_NULL) {
659 /*
660 * There's only one reference left on this
661 * VM object. We can't tell if it's a valid
662 * one (from a mapping for example) or if this
663 * object is just part of a possibly stale and
664 * useless shadow chain.
665 * We would like to try and collapse it into
666 * its parent, but we don't have any pointers
667 * back to this parent object.
668 * But we can try and collapse this object with
669 * its own shadows, in case these are useless
670 * too...
671 * We can't bypass this object though, since we
672 * don't know if this last reference on it is
673 * meaningful or not.
674 */
675 vm_object_collapse(object, 0, FALSE);
676 }
677
678 vm_object_unlock(object);
679 if (retry_cache_trim &&
680 ((object = vm_object_cache_trim(TRUE)) !=
681 VM_OBJECT_NULL)) {
682 continue;
683 }
684 return;
685 }
686
687 /*
688 * We have to wait for initialization
689 * before destroying or caching the object.
690 */
691
692 if (object->pager_created && ! object->pager_initialized) {
693 assert(! object->can_persist);
694 vm_object_assert_wait(object,
695 VM_OBJECT_EVENT_INITIALIZED,
696 THREAD_UNINT);
697 vm_object_unlock(object);
698 vm_object_cache_unlock();
699 thread_block(THREAD_CONTINUE_NULL);
700 continue;
701 }
702
703 /*
704 * If this object can persist, then enter it in
705 * the cache. Otherwise, terminate it.
706 *
707 * NOTE: Only permanent objects are cached, and
708 * permanent objects cannot have shadows. This
709 * affects the residence counting logic in a minor
710 * way (can do it in-line, mostly).
711 */
712
713 if ((object->can_persist) && (object->alive)) {
714 /*
715 * Now it is safe to decrement reference count,
716 * and to return if reference count is > 0.
717 */
718 if (--object->ref_count > 0) {
719 vm_object_res_deallocate(object);
720 vm_object_unlock(object);
721 vm_object_cache_unlock();
722 if (retry_cache_trim &&
723 ((object = vm_object_cache_trim(TRUE)) !=
724 VM_OBJECT_NULL)) {
725 continue;
726 }
727 return;
728 }
729
730 #if MIGHT_NOT_CACHE_SHADOWS
731 /*
732 * Remove shadow now if we don't
733 * want to cache shadows.
734 */
735 if (! cache_shadows) {
736 shadow = object->shadow;
737 object->shadow = VM_OBJECT_NULL;
738 }
739 #endif /* MIGHT_NOT_CACHE_SHADOWS */
740
741 /*
742 * Enter the object onto the queue of
743 * cached objects, and deactivate
744 * all of its pages.
745 */
746 assert(object->shadow == VM_OBJECT_NULL);
747 VM_OBJ_RES_DECR(object);
748 XPR(XPR_VM_OBJECT,
749 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
750 (integer_t)object,
751 (integer_t)vm_object_cached_list.next,
752 (integer_t)vm_object_cached_list.prev,0,0);
753
754 vm_object_cached_count++;
755 if (vm_object_cached_count > vm_object_cached_high)
756 vm_object_cached_high = vm_object_cached_count;
757 queue_enter(&vm_object_cached_list, object,
758 vm_object_t, cached_list);
759 vm_object_cache_unlock();
760 vm_object_deactivate_all_pages(object);
761 vm_object_unlock(object);
762
763 #if MIGHT_NOT_CACHE_SHADOWS
764 /*
765 * If we have a shadow that we need
766 * to deallocate, do so now, remembering
767 * to trim the cache later.
768 */
769 if (! cache_shadows && shadow != VM_OBJECT_NULL) {
770 object = shadow;
771 retry_cache_trim = TRUE;
772 continue;
773 }
774 #endif /* MIGHT_NOT_CACHE_SHADOWS */
775
776 /*
777 * Trim the cache. If the cache trim
778 * returns with a shadow for us to deallocate,
779 * then remember to retry the cache trim
780 * when we are done deallocating the shadow.
781 * Otherwise, we are done.
782 */
783
784 object = vm_object_cache_trim(TRUE);
785 if (object == VM_OBJECT_NULL) {
786 return;
787 }
788 retry_cache_trim = TRUE;
789
790 } else {
791 /*
792 * This object is not cachable; terminate it.
793 */
794 XPR(XPR_VM_OBJECT,
795 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
796 (integer_t)object, object->resident_page_count,
797 object->paging_in_progress,
798 (void *)current_thread(),object->ref_count);
799
800 VM_OBJ_RES_DECR(object); /* XXX ? */
801 /*
802 * Terminate this object. If it had a shadow,
803 * then deallocate it; otherwise, if we need
804 * to retry a cache trim, do so now; otherwise,
805 * we are done. "pageout" objects have a shadow,
806 * but maintain a "paging reference" rather than
807 * a normal reference.
808 */
809 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
810 if(vm_object_terminate(object) != KERN_SUCCESS) {
811 return;
812 }
813 if (shadow != VM_OBJECT_NULL) {
814 object = shadow;
815 continue;
816 }
817 if (retry_cache_trim &&
818 ((object = vm_object_cache_trim(TRUE)) !=
819 VM_OBJECT_NULL)) {
820 continue;
821 }
822 return;
823 }
824 }
825 assert(! retry_cache_trim);
826 }
827
828 /*
829 * Check to see whether we really need to trim
830 * down the cache. If so, remove an object from
831 * the cache, terminate it, and repeat.
832 *
833 * Called with, and returns with, cache lock unlocked.
834 */
835 vm_object_t
836 vm_object_cache_trim(
837 boolean_t called_from_vm_object_deallocate)
838 {
839 register vm_object_t object = VM_OBJECT_NULL;
840 vm_object_t shadow;
841
842 for (;;) {
843
844 /*
845 * If we no longer need to trim the cache,
846 * then we are done.
847 */
848
849 vm_object_cache_lock();
850 if (vm_object_cached_count <= vm_object_cached_max) {
851 vm_object_cache_unlock();
852 return VM_OBJECT_NULL;
853 }
854
855 /*
856 * We must trim down the cache, so remove
857 * the first object in the cache.
858 */
859 XPR(XPR_VM_OBJECT,
860 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
861 (integer_t)vm_object_cached_list.next,
862 (integer_t)vm_object_cached_list.prev, 0, 0, 0);
863
864 object = (vm_object_t) queue_first(&vm_object_cached_list);
865 if(object == (vm_object_t) &vm_object_cached_list) {
866 /* something's wrong with the calling parameter or */
867 /* the value of vm_object_cached_count, just fix */
868 /* and return */
869 if(vm_object_cached_max < 0)
870 vm_object_cached_max = 0;
871 vm_object_cached_count = 0;
872 vm_object_cache_unlock();
873 return VM_OBJECT_NULL;
874 }
875 vm_object_lock(object);
876 queue_remove(&vm_object_cached_list, object, vm_object_t,
877 cached_list);
878 vm_object_cached_count--;
879
880 /*
881 * Since this object is in the cache, we know
882 * that it is initialized and has no references.
883 * Take a reference to avoid recursive deallocations.
884 */
885
886 assert(object->pager_initialized);
887 assert(object->ref_count == 0);
888 object->ref_count++;
889
890 /*
891 * Terminate the object.
892 * If the object had a shadow, we let vm_object_deallocate
893 * deallocate it. "pageout" objects have a shadow, but
894 * maintain a "paging reference" rather than a normal
895 * reference.
896 * (We are careful here to limit recursion.)
897 */
898 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
899 if(vm_object_terminate(object) != KERN_SUCCESS)
900 continue;
901 if (shadow != VM_OBJECT_NULL) {
902 if (called_from_vm_object_deallocate) {
903 return shadow;
904 } else {
905 vm_object_deallocate(shadow);
906 }
907 }
908 }
909 }
910
911 boolean_t vm_object_terminate_remove_all = FALSE;
912
913 /*
914 * Routine: vm_object_terminate
915 * Purpose:
916 * Free all resources associated with a vm_object.
917 * In/out conditions:
918 * Upon entry, the object must be locked,
919 * and the object must have exactly one reference.
920 *
921 * The shadow object reference is left alone.
922 *
923 * The object must be unlocked if its found that pages
924 * must be flushed to a backing object. If someone
925 * manages to map the object while it is being flushed
926 * the object is returned unlocked and unchanged. Otherwise,
927 * upon exit, the cache will be unlocked, and the
928 * object will cease to exist.
929 */
930 static kern_return_t
931 vm_object_terminate(
932 register vm_object_t object)
933 {
934 memory_object_t pager;
935 register vm_page_t p;
936 vm_object_t shadow_object;
937
938 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n",
939 (integer_t)object, object->ref_count, 0, 0, 0);
940
941 if (!object->pageout && (!object->temporary || object->can_persist)
942 && (object->pager != NULL || object->shadow_severed)) {
943 vm_object_cache_unlock();
944 while (!queue_empty(&object->memq)) {
945 /*
946 * Clear pager_trusted bit so that the pages get yanked
947 * out of the object instead of cleaned in place. This
948 * prevents a deadlock in XMM and makes more sense anyway.
949 */
950 object->pager_trusted = FALSE;
951
952 p = (vm_page_t) queue_first(&object->memq);
953
954 VM_PAGE_CHECK(p);
955
956 if (p->busy || p->cleaning) {
957 if(p->cleaning || p->absent) {
958 vm_object_paging_wait(object, THREAD_UNINT);
959 continue;
960 } else {
961 panic("vm_object_terminate.3 0x%x 0x%x", object, p);
962 }
963 }
964
965 vm_page_lock_queues();
966 p->busy = TRUE;
967 VM_PAGE_QUEUES_REMOVE(p);
968 vm_page_unlock_queues();
969
970 if (p->absent || p->private) {
971
972 /*
973 * For private pages, VM_PAGE_FREE just
974 * leaves the page structure around for
975 * its owner to clean up. For absent
976 * pages, the structure is returned to
977 * the appropriate pool.
978 */
979
980 goto free_page;
981 }
982
983 if (p->fictitious)
984 panic("vm_object_terminate.4 0x%x 0x%x", object, p);
985
986 if (!p->dirty)
987 p->dirty = pmap_is_modified(p->phys_page);
988
989 if ((p->dirty || p->precious) && !p->error && object->alive) {
990 vm_pageout_cluster(p); /* flush page */
991 vm_object_paging_wait(object, THREAD_UNINT);
992 XPR(XPR_VM_OBJECT,
993 "vm_object_terminate restart, object 0x%X ref %d\n",
994 (integer_t)object, object->ref_count, 0, 0, 0);
995 } else {
996 free_page:
997 VM_PAGE_FREE(p);
998 }
999 }
1000 vm_object_unlock(object);
1001 vm_object_cache_lock();
1002 vm_object_lock(object);
1003 }
1004
1005 /*
1006 * Make sure the object isn't already being terminated
1007 */
1008 if(object->terminating) {
1009 object->ref_count -= 1;
1010 assert(object->ref_count > 0);
1011 vm_object_cache_unlock();
1012 vm_object_unlock(object);
1013 return KERN_FAILURE;
1014 }
1015
1016 /*
1017 * Did somebody get a reference to the object while we were
1018 * cleaning it?
1019 */
1020 if(object->ref_count != 1) {
1021 object->ref_count -= 1;
1022 assert(object->ref_count > 0);
1023 vm_object_res_deallocate(object);
1024 vm_object_cache_unlock();
1025 vm_object_unlock(object);
1026 return KERN_FAILURE;
1027 }
1028
1029 /*
1030 * Make sure no one can look us up now.
1031 */
1032
1033 object->terminating = TRUE;
1034 object->alive = FALSE;
1035 vm_object_remove(object);
1036
1037 /*
1038 * Detach the object from its shadow if we are the shadow's
1039 * copy. The reference we hold on the shadow must be dropped
1040 * by our caller.
1041 */
1042 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) &&
1043 !(object->pageout)) {
1044 vm_object_lock(shadow_object);
1045 if (shadow_object->copy == object)
1046 shadow_object->copy = VM_OBJECT_NULL;
1047 vm_object_unlock(shadow_object);
1048 }
1049
1050 /*
1051 * The pageout daemon might be playing with our pages.
1052 * Now that the object is dead, it won't touch any more
1053 * pages, but some pages might already be on their way out.
1054 * Hence, we wait until the active paging activities have ceased
1055 * before we break the association with the pager itself.
1056 */
1057 while (object->paging_in_progress != 0) {
1058 vm_object_cache_unlock();
1059 vm_object_wait(object,
1060 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
1061 THREAD_UNINT);
1062 vm_object_cache_lock();
1063 vm_object_lock(object);
1064 }
1065
1066 pager = object->pager;
1067 object->pager = MEMORY_OBJECT_NULL;
1068
1069 if (pager != MEMORY_OBJECT_NULL)
1070 memory_object_control_disable(object->pager_control);
1071 vm_object_cache_unlock();
1072
1073 object->ref_count--;
1074 #if TASK_SWAPPER
1075 assert(object->res_count == 0);
1076 #endif /* TASK_SWAPPER */
1077
1078 assert (object->ref_count == 0);
1079
1080 /*
1081 * Clean or free the pages, as appropriate.
1082 * It is possible for us to find busy/absent pages,
1083 * if some faults on this object were aborted.
1084 */
1085 if (object->pageout) {
1086 assert(shadow_object != VM_OBJECT_NULL);
1087 assert(shadow_object == object->shadow);
1088
1089 vm_pageout_object_terminate(object);
1090
1091 } else if ((object->temporary && !object->can_persist) ||
1092 (pager == MEMORY_OBJECT_NULL)) {
1093 while (!queue_empty(&object->memq)) {
1094 p = (vm_page_t) queue_first(&object->memq);
1095
1096 VM_PAGE_CHECK(p);
1097 VM_PAGE_FREE(p);
1098 }
1099 } else if (!queue_empty(&object->memq)) {
1100 panic("vm_object_terminate: queue just emptied isn't");
1101 }
1102
1103 assert(object->paging_in_progress == 0);
1104 assert(object->ref_count == 0);
1105
1106 /*
1107 * If the pager has not already been released by
1108 * vm_object_destroy, we need to terminate it and
1109 * release our reference to it here.
1110 */
1111 if (pager != MEMORY_OBJECT_NULL) {
1112 vm_object_unlock(object);
1113 vm_object_release_pager(pager);
1114 vm_object_lock(object);
1115 }
1116
1117 /* kick off anyone waiting on terminating */
1118 object->terminating = FALSE;
1119 vm_object_paging_begin(object);
1120 vm_object_paging_end(object);
1121 vm_object_unlock(object);
1122
1123 #if MACH_PAGEMAP
1124 vm_external_destroy(object->existence_map, object->size);
1125 #endif /* MACH_PAGEMAP */
1126
1127 /*
1128 * Free the space for the object.
1129 */
1130 zfree(vm_object_zone, object);
1131 return KERN_SUCCESS;
1132 }
1133
1134 /*
1135 * Routine: vm_object_pager_wakeup
1136 * Purpose: Wake up anyone waiting for termination of a pager.
1137 */
1138
1139 static void
1140 vm_object_pager_wakeup(
1141 memory_object_t pager)
1142 {
1143 vm_object_hash_entry_t entry;
1144 boolean_t waiting = FALSE;
1145
1146 /*
1147 * If anyone was waiting for the memory_object_terminate
1148 * to be queued, wake them up now.
1149 */
1150 vm_object_cache_lock();
1151 entry = vm_object_hash_lookup(pager, TRUE);
1152 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
1153 waiting = entry->waiting;
1154 vm_object_cache_unlock();
1155 if (entry != VM_OBJECT_HASH_ENTRY_NULL) {
1156 if (waiting)
1157 thread_wakeup((event_t) pager);
1158 vm_object_hash_entry_free(entry);
1159 }
1160 }
1161
1162 /*
1163 * Routine: vm_object_release_pager
1164 * Purpose: Terminate the pager and, upon completion,
1165 * release our last reference to it.
1166 * just like memory_object_terminate, except
1167 * that we wake up anyone blocked in vm_object_enter
1168 * waiting for termination message to be queued
1169 * before calling memory_object_init.
1170 */
1171 static void
1172 vm_object_release_pager(
1173 memory_object_t pager)
1174 {
1175
1176 /*
1177 * Terminate the pager.
1178 */
1179
1180 (void) memory_object_terminate(pager);
1181
1182 /*
1183 * Wakeup anyone waiting for this terminate
1184 */
1185 vm_object_pager_wakeup(pager);
1186
1187 /*
1188 * Release reference to pager.
1189 */
1190 memory_object_deallocate(pager);
1191 }
1192
1193 /*
1194 * Routine: vm_object_destroy
1195 * Purpose:
1196 * Shut down a VM object, despite the
1197 * presence of address map (or other) references
1198 * to the vm_object.
1199 */
1200 kern_return_t
1201 vm_object_destroy(
1202 vm_object_t object,
1203 __unused kern_return_t reason)
1204 {
1205 memory_object_t old_pager;
1206
1207 if (object == VM_OBJECT_NULL)
1208 return(KERN_SUCCESS);
1209
1210 /*
1211 * Remove the pager association immediately.
1212 *
1213 * This will prevent the memory manager from further
1214 * meddling. [If it wanted to flush data or make
1215 * other changes, it should have done so before performing
1216 * the destroy call.]
1217 */
1218
1219 vm_object_cache_lock();
1220 vm_object_lock(object);
1221 object->can_persist = FALSE;
1222 object->named = FALSE;
1223 object->alive = FALSE;
1224
1225 /*
1226 * Rip out the pager from the vm_object now...
1227 */
1228
1229 vm_object_remove(object);
1230 old_pager = object->pager;
1231 object->pager = MEMORY_OBJECT_NULL;
1232 if (old_pager != MEMORY_OBJECT_NULL)
1233 memory_object_control_disable(object->pager_control);
1234 vm_object_cache_unlock();
1235
1236 /*
1237 * Wait for the existing paging activity (that got
1238 * through before we nulled out the pager) to subside.
1239 */
1240
1241 vm_object_paging_wait(object, THREAD_UNINT);
1242 vm_object_unlock(object);
1243
1244 /*
1245 * Terminate the object now.
1246 */
1247 if (old_pager != MEMORY_OBJECT_NULL) {
1248 vm_object_release_pager(old_pager);
1249
1250 /*
1251 * JMM - Release the caller's reference. This assumes the
1252 * caller had a reference to release, which is a big (but
1253 * currently valid) assumption if this is driven from the
1254 * vnode pager (it is holding a named reference when making
1255 * this call)..
1256 */
1257 vm_object_deallocate(object);
1258
1259 }
1260 return(KERN_SUCCESS);
1261 }
1262
1263 /*
1264 * vm_object_deactivate_pages
1265 *
1266 * Deactivate all pages in the specified object. (Keep its pages
1267 * in memory even though it is no longer referenced.)
1268 *
1269 * The object must be locked.
1270 */
1271 static void
1272 vm_object_deactivate_all_pages(
1273 register vm_object_t object)
1274 {
1275 register vm_page_t p;
1276
1277 queue_iterate(&object->memq, p, vm_page_t, listq) {
1278 vm_page_lock_queues();
1279 if (!p->busy)
1280 vm_page_deactivate(p);
1281 vm_page_unlock_queues();
1282 }
1283 }
1284
1285 __private_extern__ void
1286 vm_object_deactivate_pages(
1287 vm_object_t object,
1288 vm_object_offset_t offset,
1289 vm_object_size_t size,
1290 boolean_t kill_page)
1291 {
1292 vm_object_t orig_object;
1293 int pages_moved = 0;
1294 int pages_found = 0;
1295
1296 /*
1297 * entered with object lock held, acquire a paging reference to
1298 * prevent the memory_object and control ports from
1299 * being destroyed.
1300 */
1301 orig_object = object;
1302
1303 for (;;) {
1304 register vm_page_t m;
1305 vm_object_offset_t toffset;
1306 vm_object_size_t tsize;
1307
1308 vm_object_paging_begin(object);
1309 vm_page_lock_queues();
1310
1311 for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) {
1312
1313 if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) {
1314
1315 pages_found++;
1316
1317 if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) {
1318
1319 assert(!m->laundry);
1320
1321 m->reference = FALSE;
1322 pmap_clear_reference(m->phys_page);
1323
1324 if ((kill_page) && (object->internal)) {
1325 m->precious = FALSE;
1326 m->dirty = FALSE;
1327 pmap_clear_modify(m->phys_page);
1328 vm_external_state_clr(object->existence_map, offset);
1329 }
1330 VM_PAGE_QUEUES_REMOVE(m);
1331
1332 assert(!m->laundry);
1333 assert(m->object != kernel_object);
1334 assert(m->pageq.next == NULL &&
1335 m->pageq.prev == NULL);
1336 if(m->zero_fill) {
1337 queue_enter_first(
1338 &vm_page_queue_zf,
1339 m, vm_page_t, pageq);
1340 } else {
1341 queue_enter_first(
1342 &vm_page_queue_inactive,
1343 m, vm_page_t, pageq);
1344 }
1345
1346 m->inactive = TRUE;
1347 if (!m->fictitious)
1348 vm_page_inactive_count++;
1349
1350 pages_moved++;
1351 }
1352 }
1353 }
1354 vm_page_unlock_queues();
1355 vm_object_paging_end(object);
1356
1357 if (object->shadow) {
1358 vm_object_t tmp_object;
1359
1360 kill_page = 0;
1361
1362 offset += object->shadow_offset;
1363
1364 tmp_object = object->shadow;
1365 vm_object_lock(tmp_object);
1366
1367 if (object != orig_object)
1368 vm_object_unlock(object);
1369 object = tmp_object;
1370 } else
1371 break;
1372 }
1373 if (object != orig_object)
1374 vm_object_unlock(object);
1375 }
1376
1377 /*
1378 * Routine: vm_object_pmap_protect
1379 *
1380 * Purpose:
1381 * Reduces the permission for all physical
1382 * pages in the specified object range.
1383 *
1384 * If removing write permission only, it is
1385 * sufficient to protect only the pages in
1386 * the top-level object; only those pages may
1387 * have write permission.
1388 *
1389 * If removing all access, we must follow the
1390 * shadow chain from the top-level object to
1391 * remove access to all pages in shadowed objects.
1392 *
1393 * The object must *not* be locked. The object must
1394 * be temporary/internal.
1395 *
1396 * If pmap is not NULL, this routine assumes that
1397 * the only mappings for the pages are in that
1398 * pmap.
1399 */
1400
1401 __private_extern__ void
1402 vm_object_pmap_protect(
1403 register vm_object_t object,
1404 register vm_object_offset_t offset,
1405 vm_object_size_t size,
1406 pmap_t pmap,
1407 vm_map_offset_t pmap_start,
1408 vm_prot_t prot)
1409 {
1410 if (object == VM_OBJECT_NULL)
1411 return;
1412 size = vm_object_round_page(size);
1413 offset = vm_object_trunc_page(offset);
1414
1415 vm_object_lock(object);
1416
1417 assert(object->internal);
1418
1419 while (TRUE) {
1420 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) {
1421 vm_object_unlock(object);
1422 pmap_protect(pmap, pmap_start, pmap_start + size, prot);
1423 return;
1424 }
1425
1426 /* if we are doing large ranges with respect to resident */
1427 /* page count then we should interate over pages otherwise */
1428 /* inverse page look-up will be faster */
1429 if (ptoa_64(object->resident_page_count / 4) < size) {
1430 vm_page_t p;
1431 vm_object_offset_t end;
1432
1433 end = offset + size;
1434
1435 if (pmap != PMAP_NULL) {
1436 queue_iterate(&object->memq, p, vm_page_t, listq) {
1437 if (!p->fictitious &&
1438 (offset <= p->offset) && (p->offset < end)) {
1439 vm_map_offset_t start;
1440
1441 start = pmap_start + p->offset - offset;
1442 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot);
1443 }
1444 }
1445 } else {
1446 queue_iterate(&object->memq, p, vm_page_t, listq) {
1447 if (!p->fictitious &&
1448 (offset <= p->offset) && (p->offset < end)) {
1449
1450 pmap_page_protect(p->phys_page,
1451 prot & ~p->page_lock);
1452 }
1453 }
1454 }
1455 } else {
1456 vm_page_t p;
1457 vm_object_offset_t end;
1458 vm_object_offset_t target_off;
1459
1460 end = offset + size;
1461
1462 if (pmap != PMAP_NULL) {
1463 for(target_off = offset;
1464 target_off < end;
1465 target_off += PAGE_SIZE) {
1466 p = vm_page_lookup(object, target_off);
1467 if (p != VM_PAGE_NULL) {
1468 vm_offset_t start;
1469 start = pmap_start +
1470 (vm_offset_t)(p->offset - offset);
1471 pmap_protect(pmap, start,
1472 start + PAGE_SIZE, prot);
1473 }
1474 }
1475 } else {
1476 for(target_off = offset;
1477 target_off < end; target_off += PAGE_SIZE) {
1478 p = vm_page_lookup(object, target_off);
1479 if (p != VM_PAGE_NULL) {
1480 pmap_page_protect(p->phys_page,
1481 prot & ~p->page_lock);
1482 }
1483 }
1484 }
1485 }
1486
1487 if (prot == VM_PROT_NONE) {
1488 /*
1489 * Must follow shadow chain to remove access
1490 * to pages in shadowed objects.
1491 */
1492 register vm_object_t next_object;
1493
1494 next_object = object->shadow;
1495 if (next_object != VM_OBJECT_NULL) {
1496 offset += object->shadow_offset;
1497 vm_object_lock(next_object);
1498 vm_object_unlock(object);
1499 object = next_object;
1500 }
1501 else {
1502 /*
1503 * End of chain - we are done.
1504 */
1505 break;
1506 }
1507 }
1508 else {
1509 /*
1510 * Pages in shadowed objects may never have
1511 * write permission - we may stop here.
1512 */
1513 break;
1514 }
1515 }
1516
1517 vm_object_unlock(object);
1518 }
1519
1520 /*
1521 * Routine: vm_object_copy_slowly
1522 *
1523 * Description:
1524 * Copy the specified range of the source
1525 * virtual memory object without using
1526 * protection-based optimizations (such
1527 * as copy-on-write). The pages in the
1528 * region are actually copied.
1529 *
1530 * In/out conditions:
1531 * The caller must hold a reference and a lock
1532 * for the source virtual memory object. The source
1533 * object will be returned *unlocked*.
1534 *
1535 * Results:
1536 * If the copy is completed successfully, KERN_SUCCESS is
1537 * returned. If the caller asserted the interruptible
1538 * argument, and an interruption occurred while waiting
1539 * for a user-generated event, MACH_SEND_INTERRUPTED is
1540 * returned. Other values may be returned to indicate
1541 * hard errors during the copy operation.
1542 *
1543 * A new virtual memory object is returned in a
1544 * parameter (_result_object). The contents of this
1545 * new object, starting at a zero offset, are a copy
1546 * of the source memory region. In the event of
1547 * an error, this parameter will contain the value
1548 * VM_OBJECT_NULL.
1549 */
1550 __private_extern__ kern_return_t
1551 vm_object_copy_slowly(
1552 register vm_object_t src_object,
1553 vm_object_offset_t src_offset,
1554 vm_object_size_t size,
1555 boolean_t interruptible,
1556 vm_object_t *_result_object) /* OUT */
1557 {
1558 vm_object_t new_object;
1559 vm_object_offset_t new_offset;
1560
1561 vm_object_offset_t src_lo_offset = src_offset;
1562 vm_object_offset_t src_hi_offset = src_offset + size;
1563
1564 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1565 src_object, src_offset, size, 0, 0);
1566
1567 if (size == 0) {
1568 vm_object_unlock(src_object);
1569 *_result_object = VM_OBJECT_NULL;
1570 return(KERN_INVALID_ARGUMENT);
1571 }
1572
1573 /*
1574 * Prevent destruction of the source object while we copy.
1575 */
1576
1577 assert(src_object->ref_count > 0);
1578 src_object->ref_count++;
1579 VM_OBJ_RES_INCR(src_object);
1580 vm_object_unlock(src_object);
1581
1582 /*
1583 * Create a new object to hold the copied pages.
1584 * A few notes:
1585 * We fill the new object starting at offset 0,
1586 * regardless of the input offset.
1587 * We don't bother to lock the new object within
1588 * this routine, since we have the only reference.
1589 */
1590
1591 new_object = vm_object_allocate(size);
1592 new_offset = 0;
1593 vm_object_lock(new_object);
1594
1595 assert(size == trunc_page_64(size)); /* Will the loop terminate? */
1596
1597 for ( ;
1598 size != 0 ;
1599 src_offset += PAGE_SIZE_64,
1600 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64
1601 ) {
1602 vm_page_t new_page;
1603 vm_fault_return_t result;
1604
1605 while ((new_page = vm_page_alloc(new_object, new_offset))
1606 == VM_PAGE_NULL) {
1607 if (!vm_page_wait(interruptible)) {
1608 vm_object_unlock(new_object);
1609 vm_object_deallocate(new_object);
1610 vm_object_deallocate(src_object);
1611 *_result_object = VM_OBJECT_NULL;
1612 return(MACH_SEND_INTERRUPTED);
1613 }
1614 }
1615
1616 do {
1617 vm_prot_t prot = VM_PROT_READ;
1618 vm_page_t _result_page;
1619 vm_page_t top_page;
1620 register
1621 vm_page_t result_page;
1622 kern_return_t error_code;
1623
1624 vm_object_lock(src_object);
1625 vm_object_paging_begin(src_object);
1626
1627 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1628 result = vm_fault_page(src_object, src_offset,
1629 VM_PROT_READ, FALSE, interruptible,
1630 src_lo_offset, src_hi_offset,
1631 VM_BEHAVIOR_SEQUENTIAL,
1632 &prot, &_result_page, &top_page,
1633 (int *)0,
1634 &error_code, FALSE, FALSE, NULL, 0);
1635
1636 switch(result) {
1637 case VM_FAULT_SUCCESS:
1638 result_page = _result_page;
1639
1640 /*
1641 * We don't need to hold the object
1642 * lock -- the busy page will be enough.
1643 * [We don't care about picking up any
1644 * new modifications.]
1645 *
1646 * Copy the page to the new object.
1647 *
1648 * POLICY DECISION:
1649 * If result_page is clean,
1650 * we could steal it instead
1651 * of copying.
1652 */
1653
1654 vm_object_unlock(result_page->object);
1655 vm_page_copy(result_page, new_page);
1656
1657 /*
1658 * Let go of both pages (make them
1659 * not busy, perform wakeup, activate).
1660 */
1661
1662 new_page->busy = FALSE;
1663 new_page->dirty = TRUE;
1664 vm_object_lock(result_page->object);
1665 PAGE_WAKEUP_DONE(result_page);
1666
1667 vm_page_lock_queues();
1668 if (!result_page->active &&
1669 !result_page->inactive)
1670 vm_page_activate(result_page);
1671 vm_page_activate(new_page);
1672 vm_page_unlock_queues();
1673
1674 /*
1675 * Release paging references and
1676 * top-level placeholder page, if any.
1677 */
1678
1679 vm_fault_cleanup(result_page->object,
1680 top_page);
1681
1682 break;
1683
1684 case VM_FAULT_RETRY:
1685 break;
1686
1687 case VM_FAULT_FICTITIOUS_SHORTAGE:
1688 vm_page_more_fictitious();
1689 break;
1690
1691 case VM_FAULT_MEMORY_SHORTAGE:
1692 if (vm_page_wait(interruptible))
1693 break;
1694 /* fall thru */
1695
1696 case VM_FAULT_INTERRUPTED:
1697 vm_page_free(new_page);
1698 vm_object_unlock(new_object);
1699 vm_object_deallocate(new_object);
1700 vm_object_deallocate(src_object);
1701 *_result_object = VM_OBJECT_NULL;
1702 return(MACH_SEND_INTERRUPTED);
1703
1704 case VM_FAULT_MEMORY_ERROR:
1705 /*
1706 * A policy choice:
1707 * (a) ignore pages that we can't
1708 * copy
1709 * (b) return the null object if
1710 * any page fails [chosen]
1711 */
1712
1713 vm_page_lock_queues();
1714 vm_page_free(new_page);
1715 vm_page_unlock_queues();
1716 vm_object_unlock(new_object);
1717 vm_object_deallocate(new_object);
1718 vm_object_deallocate(src_object);
1719 *_result_object = VM_OBJECT_NULL;
1720 return(error_code ? error_code:
1721 KERN_MEMORY_ERROR);
1722 }
1723 } while (result != VM_FAULT_SUCCESS);
1724 }
1725
1726 /*
1727 * Lose the extra reference, and return our object.
1728 */
1729
1730 vm_object_unlock(new_object);
1731 vm_object_deallocate(src_object);
1732 *_result_object = new_object;
1733 return(KERN_SUCCESS);
1734 }
1735
1736 /*
1737 * Routine: vm_object_copy_quickly
1738 *
1739 * Purpose:
1740 * Copy the specified range of the source virtual
1741 * memory object, if it can be done without waiting
1742 * for user-generated events.
1743 *
1744 * Results:
1745 * If the copy is successful, the copy is returned in
1746 * the arguments; otherwise, the arguments are not
1747 * affected.
1748 *
1749 * In/out conditions:
1750 * The object should be unlocked on entry and exit.
1751 */
1752
1753 /*ARGSUSED*/
1754 __private_extern__ boolean_t
1755 vm_object_copy_quickly(
1756 vm_object_t *_object, /* INOUT */
1757 __unused vm_object_offset_t offset, /* IN */
1758 __unused vm_object_size_t size, /* IN */
1759 boolean_t *_src_needs_copy, /* OUT */
1760 boolean_t *_dst_needs_copy) /* OUT */
1761 {
1762 vm_object_t object = *_object;
1763 memory_object_copy_strategy_t copy_strategy;
1764
1765 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1766 *_object, offset, size, 0, 0);
1767 if (object == VM_OBJECT_NULL) {
1768 *_src_needs_copy = FALSE;
1769 *_dst_needs_copy = FALSE;
1770 return(TRUE);
1771 }
1772
1773 vm_object_lock(object);
1774
1775 copy_strategy = object->copy_strategy;
1776
1777 switch (copy_strategy) {
1778 case MEMORY_OBJECT_COPY_SYMMETRIC:
1779
1780 /*
1781 * Symmetric copy strategy.
1782 * Make another reference to the object.
1783 * Leave object/offset unchanged.
1784 */
1785
1786 assert(object->ref_count > 0);
1787 object->ref_count++;
1788 vm_object_res_reference(object);
1789 object->shadowed = TRUE;
1790 vm_object_unlock(object);
1791
1792 /*
1793 * Both source and destination must make
1794 * shadows, and the source must be made
1795 * read-only if not already.
1796 */
1797
1798 *_src_needs_copy = TRUE;
1799 *_dst_needs_copy = TRUE;
1800
1801 break;
1802
1803 case MEMORY_OBJECT_COPY_DELAY:
1804 vm_object_unlock(object);
1805 return(FALSE);
1806
1807 default:
1808 vm_object_unlock(object);
1809 return(FALSE);
1810 }
1811 return(TRUE);
1812 }
1813
1814 static int copy_call_count = 0;
1815 static int copy_call_sleep_count = 0;
1816 static int copy_call_restart_count = 0;
1817
1818 /*
1819 * Routine: vm_object_copy_call [internal]
1820 *
1821 * Description:
1822 * Copy the source object (src_object), using the
1823 * user-managed copy algorithm.
1824 *
1825 * In/out conditions:
1826 * The source object must be locked on entry. It
1827 * will be *unlocked* on exit.
1828 *
1829 * Results:
1830 * If the copy is successful, KERN_SUCCESS is returned.
1831 * A new object that represents the copied virtual
1832 * memory is returned in a parameter (*_result_object).
1833 * If the return value indicates an error, this parameter
1834 * is not valid.
1835 */
1836 static kern_return_t
1837 vm_object_copy_call(
1838 vm_object_t src_object,
1839 vm_object_offset_t src_offset,
1840 vm_object_size_t size,
1841 vm_object_t *_result_object) /* OUT */
1842 {
1843 kern_return_t kr;
1844 vm_object_t copy;
1845 boolean_t check_ready = FALSE;
1846
1847 /*
1848 * If a copy is already in progress, wait and retry.
1849 *
1850 * XXX
1851 * Consider making this call interruptable, as Mike
1852 * intended it to be.
1853 *
1854 * XXXO
1855 * Need a counter or version or something to allow
1856 * us to use the copy that the currently requesting
1857 * thread is obtaining -- is it worth adding to the
1858 * vm object structure? Depends how common this case it.
1859 */
1860 copy_call_count++;
1861 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1862 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1863 THREAD_UNINT);
1864 copy_call_restart_count++;
1865 }
1866
1867 /*
1868 * Indicate (for the benefit of memory_object_create_copy)
1869 * that we want a copy for src_object. (Note that we cannot
1870 * do a real assert_wait before calling memory_object_copy,
1871 * so we simply set the flag.)
1872 */
1873
1874 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL);
1875 vm_object_unlock(src_object);
1876
1877 /*
1878 * Ask the memory manager to give us a memory object
1879 * which represents a copy of the src object.
1880 * The memory manager may give us a memory object
1881 * which we already have, or it may give us a
1882 * new memory object. This memory object will arrive
1883 * via memory_object_create_copy.
1884 */
1885
1886 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */
1887 if (kr != KERN_SUCCESS) {
1888 return kr;
1889 }
1890
1891 /*
1892 * Wait for the copy to arrive.
1893 */
1894 vm_object_lock(src_object);
1895 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) {
1896 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL,
1897 THREAD_UNINT);
1898 copy_call_sleep_count++;
1899 }
1900 Retry:
1901 assert(src_object->copy != VM_OBJECT_NULL);
1902 copy = src_object->copy;
1903 if (!vm_object_lock_try(copy)) {
1904 vm_object_unlock(src_object);
1905 mutex_pause(); /* wait a bit */
1906 vm_object_lock(src_object);
1907 goto Retry;
1908 }
1909 if (copy->size < src_offset+size)
1910 copy->size = src_offset+size;
1911
1912 if (!copy->pager_ready)
1913 check_ready = TRUE;
1914
1915 /*
1916 * Return the copy.
1917 */
1918 *_result_object = copy;
1919 vm_object_unlock(copy);
1920 vm_object_unlock(src_object);
1921
1922 /* Wait for the copy to be ready. */
1923 if (check_ready == TRUE) {
1924 vm_object_lock(copy);
1925 while (!copy->pager_ready) {
1926 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT);
1927 }
1928 vm_object_unlock(copy);
1929 }
1930
1931 return KERN_SUCCESS;
1932 }
1933
1934 static int copy_delayed_lock_collisions = 0;
1935 static int copy_delayed_max_collisions = 0;
1936 static int copy_delayed_lock_contention = 0;
1937 static int copy_delayed_protect_iterate = 0;
1938
1939 /*
1940 * Routine: vm_object_copy_delayed [internal]
1941 *
1942 * Description:
1943 * Copy the specified virtual memory object, using
1944 * the asymmetric copy-on-write algorithm.
1945 *
1946 * In/out conditions:
1947 * The src_object must be locked on entry. It will be unlocked
1948 * on exit - so the caller must also hold a reference to it.
1949 *
1950 * This routine will not block waiting for user-generated
1951 * events. It is not interruptible.
1952 */
1953 __private_extern__ vm_object_t
1954 vm_object_copy_delayed(
1955 vm_object_t src_object,
1956 vm_object_offset_t src_offset,
1957 vm_object_size_t size)
1958 {
1959 vm_object_t new_copy = VM_OBJECT_NULL;
1960 vm_object_t old_copy;
1961 vm_page_t p;
1962 vm_object_size_t copy_size = src_offset + size;
1963
1964 int collisions = 0;
1965 /*
1966 * The user-level memory manager wants to see all of the changes
1967 * to this object, but it has promised not to make any changes on
1968 * its own.
1969 *
1970 * Perform an asymmetric copy-on-write, as follows:
1971 * Create a new object, called a "copy object" to hold
1972 * pages modified by the new mapping (i.e., the copy,
1973 * not the original mapping).
1974 * Record the original object as the backing object for
1975 * the copy object. If the original mapping does not
1976 * change a page, it may be used read-only by the copy.
1977 * Record the copy object in the original object.
1978 * When the original mapping causes a page to be modified,
1979 * it must be copied to a new page that is "pushed" to
1980 * the copy object.
1981 * Mark the new mapping (the copy object) copy-on-write.
1982 * This makes the copy object itself read-only, allowing
1983 * it to be reused if the original mapping makes no
1984 * changes, and simplifying the synchronization required
1985 * in the "push" operation described above.
1986 *
1987 * The copy-on-write is said to be assymetric because the original
1988 * object is *not* marked copy-on-write. A copied page is pushed
1989 * to the copy object, regardless which party attempted to modify
1990 * the page.
1991 *
1992 * Repeated asymmetric copy operations may be done. If the
1993 * original object has not been changed since the last copy, its
1994 * copy object can be reused. Otherwise, a new copy object can be
1995 * inserted between the original object and its previous copy
1996 * object. Since any copy object is read-only, this cannot affect
1997 * affect the contents of the previous copy object.
1998 *
1999 * Note that a copy object is higher in the object tree than the
2000 * original object; therefore, use of the copy object recorded in
2001 * the original object must be done carefully, to avoid deadlock.
2002 */
2003
2004 Retry:
2005
2006 /*
2007 * Wait for paging in progress.
2008 */
2009 if (!src_object->true_share)
2010 vm_object_paging_wait(src_object, THREAD_UNINT);
2011
2012 /*
2013 * See whether we can reuse the result of a previous
2014 * copy operation.
2015 */
2016
2017 old_copy = src_object->copy;
2018 if (old_copy != VM_OBJECT_NULL) {
2019 /*
2020 * Try to get the locks (out of order)
2021 */
2022 if (!vm_object_lock_try(old_copy)) {
2023 vm_object_unlock(src_object);
2024 mutex_pause();
2025
2026 /* Heisenberg Rules */
2027 copy_delayed_lock_collisions++;
2028 if (collisions++ == 0)
2029 copy_delayed_lock_contention++;
2030
2031 if (collisions > copy_delayed_max_collisions)
2032 copy_delayed_max_collisions = collisions;
2033
2034 vm_object_lock(src_object);
2035 goto Retry;
2036 }
2037
2038 /*
2039 * Determine whether the old copy object has
2040 * been modified.
2041 */
2042
2043 if (old_copy->resident_page_count == 0 &&
2044 !old_copy->pager_created) {
2045 /*
2046 * It has not been modified.
2047 *
2048 * Return another reference to
2049 * the existing copy-object if
2050 * we can safely grow it (if
2051 * needed).
2052 */
2053
2054 if (old_copy->size < copy_size) {
2055 /*
2056 * We can't perform a delayed copy if any of the
2057 * pages in the extended range are wired (because
2058 * we can't safely take write permission away from
2059 * wired pages). If the pages aren't wired, then
2060 * go ahead and protect them.
2061 */
2062 copy_delayed_protect_iterate++;
2063 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2064 if (!p->fictitious &&
2065 p->offset >= old_copy->size &&
2066 p->offset < copy_size) {
2067 if (p->wire_count > 0) {
2068 vm_object_unlock(old_copy);
2069 vm_object_unlock(src_object);
2070
2071 if (new_copy != VM_OBJECT_NULL) {
2072 vm_object_unlock(new_copy);
2073 vm_object_deallocate(new_copy);
2074 }
2075
2076 return VM_OBJECT_NULL;
2077 } else {
2078 pmap_page_protect(p->phys_page,
2079 (VM_PROT_ALL & ~VM_PROT_WRITE &
2080 ~p->page_lock));
2081 }
2082 }
2083 }
2084 old_copy->size = copy_size;
2085 }
2086
2087 vm_object_reference_locked(old_copy);
2088 vm_object_unlock(old_copy);
2089 vm_object_unlock(src_object);
2090
2091 if (new_copy != VM_OBJECT_NULL) {
2092 vm_object_unlock(new_copy);
2093 vm_object_deallocate(new_copy);
2094 }
2095
2096 return(old_copy);
2097 }
2098
2099 /*
2100 * Adjust the size argument so that the newly-created
2101 * copy object will be large enough to back either the
2102 * old copy object or the new mapping.
2103 */
2104 if (old_copy->size > copy_size)
2105 copy_size = old_copy->size;
2106
2107 if (new_copy == VM_OBJECT_NULL) {
2108 vm_object_unlock(old_copy);
2109 vm_object_unlock(src_object);
2110 new_copy = vm_object_allocate(copy_size);
2111 vm_object_lock(src_object);
2112 vm_object_lock(new_copy);
2113 goto Retry;
2114 }
2115 new_copy->size = copy_size;
2116
2117 /*
2118 * The copy-object is always made large enough to
2119 * completely shadow the original object, since
2120 * it may have several users who want to shadow
2121 * the original object at different points.
2122 */
2123
2124 assert((old_copy->shadow == src_object) &&
2125 (old_copy->shadow_offset == (vm_object_offset_t) 0));
2126
2127 } else if (new_copy == VM_OBJECT_NULL) {
2128 vm_object_unlock(src_object);
2129 new_copy = vm_object_allocate(copy_size);
2130 vm_object_lock(src_object);
2131 vm_object_lock(new_copy);
2132 goto Retry;
2133 }
2134
2135 /*
2136 * We now have the src object locked, and the new copy object
2137 * allocated and locked (and potentially the old copy locked).
2138 * Before we go any further, make sure we can still perform
2139 * a delayed copy, as the situation may have changed.
2140 *
2141 * Specifically, we can't perform a delayed copy if any of the
2142 * pages in the range are wired (because we can't safely take
2143 * write permission away from wired pages). If the pages aren't
2144 * wired, then go ahead and protect them.
2145 */
2146 copy_delayed_protect_iterate++;
2147 queue_iterate(&src_object->memq, p, vm_page_t, listq) {
2148 if (!p->fictitious && p->offset < copy_size) {
2149 if (p->wire_count > 0) {
2150 if (old_copy)
2151 vm_object_unlock(old_copy);
2152 vm_object_unlock(src_object);
2153 vm_object_unlock(new_copy);
2154 vm_object_deallocate(new_copy);
2155 return VM_OBJECT_NULL;
2156 } else {
2157 pmap_page_protect(p->phys_page,
2158 (VM_PROT_ALL & ~VM_PROT_WRITE &
2159 ~p->page_lock));
2160 }
2161 }
2162 }
2163
2164 if (old_copy != VM_OBJECT_NULL) {
2165 /*
2166 * Make the old copy-object shadow the new one.
2167 * It will receive no more pages from the original
2168 * object.
2169 */
2170
2171 src_object->ref_count--; /* remove ref. from old_copy */
2172 assert(src_object->ref_count > 0);
2173 old_copy->shadow = new_copy;
2174 assert(new_copy->ref_count > 0);
2175 new_copy->ref_count++; /* for old_copy->shadow ref. */
2176
2177 #if TASK_SWAPPER
2178 if (old_copy->res_count) {
2179 VM_OBJ_RES_INCR(new_copy);
2180 VM_OBJ_RES_DECR(src_object);
2181 }
2182 #endif
2183
2184 vm_object_unlock(old_copy); /* done with old_copy */
2185 }
2186
2187 /*
2188 * Point the new copy at the existing object.
2189 */
2190 new_copy->shadow = src_object;
2191 new_copy->shadow_offset = 0;
2192 new_copy->shadowed = TRUE; /* caller must set needs_copy */
2193 assert(src_object->ref_count > 0);
2194 src_object->ref_count++;
2195 VM_OBJ_RES_INCR(src_object);
2196 src_object->copy = new_copy;
2197 vm_object_unlock(src_object);
2198 vm_object_unlock(new_copy);
2199
2200 XPR(XPR_VM_OBJECT,
2201 "vm_object_copy_delayed: used copy object %X for source %X\n",
2202 (integer_t)new_copy, (integer_t)src_object, 0, 0, 0);
2203
2204 return(new_copy);
2205 }
2206
2207 /*
2208 * Routine: vm_object_copy_strategically
2209 *
2210 * Purpose:
2211 * Perform a copy according to the source object's
2212 * declared strategy. This operation may block,
2213 * and may be interrupted.
2214 */
2215 __private_extern__ kern_return_t
2216 vm_object_copy_strategically(
2217 register vm_object_t src_object,
2218 vm_object_offset_t src_offset,
2219 vm_object_size_t size,
2220 vm_object_t *dst_object, /* OUT */
2221 vm_object_offset_t *dst_offset, /* OUT */
2222 boolean_t *dst_needs_copy) /* OUT */
2223 {
2224 boolean_t result;
2225 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */
2226 memory_object_copy_strategy_t copy_strategy;
2227
2228 assert(src_object != VM_OBJECT_NULL);
2229
2230 vm_object_lock(src_object);
2231
2232 /*
2233 * The copy strategy is only valid if the memory manager
2234 * is "ready". Internal objects are always ready.
2235 */
2236
2237 while (!src_object->internal && !src_object->pager_ready) {
2238 wait_result_t wait_result;
2239
2240 wait_result = vm_object_sleep( src_object,
2241 VM_OBJECT_EVENT_PAGER_READY,
2242 interruptible);
2243 if (wait_result != THREAD_AWAKENED) {
2244 vm_object_unlock(src_object);
2245 *dst_object = VM_OBJECT_NULL;
2246 *dst_offset = 0;
2247 *dst_needs_copy = FALSE;
2248 return(MACH_SEND_INTERRUPTED);
2249 }
2250 }
2251
2252 copy_strategy = src_object->copy_strategy;
2253
2254 /*
2255 * Use the appropriate copy strategy.
2256 */
2257
2258 switch (copy_strategy) {
2259 case MEMORY_OBJECT_COPY_DELAY:
2260 *dst_object = vm_object_copy_delayed(src_object,
2261 src_offset, size);
2262 if (*dst_object != VM_OBJECT_NULL) {
2263 *dst_offset = src_offset;
2264 *dst_needs_copy = TRUE;
2265 result = KERN_SUCCESS;
2266 break;
2267 }
2268 vm_object_lock(src_object);
2269 /* fall thru when delayed copy not allowed */
2270
2271 case MEMORY_OBJECT_COPY_NONE:
2272 result = vm_object_copy_slowly(src_object, src_offset, size,
2273 interruptible, dst_object);
2274 if (result == KERN_SUCCESS) {
2275 *dst_offset = 0;
2276 *dst_needs_copy = FALSE;
2277 }
2278 break;
2279
2280 case MEMORY_OBJECT_COPY_CALL:
2281 result = vm_object_copy_call(src_object, src_offset, size,
2282 dst_object);
2283 if (result == KERN_SUCCESS) {
2284 *dst_offset = src_offset;
2285 *dst_needs_copy = TRUE;
2286 }
2287 break;
2288
2289 case MEMORY_OBJECT_COPY_SYMMETRIC:
2290 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0);
2291 vm_object_unlock(src_object);
2292 result = KERN_MEMORY_RESTART_COPY;
2293 break;
2294
2295 default:
2296 panic("copy_strategically: bad strategy");
2297 result = KERN_INVALID_ARGUMENT;
2298 }
2299 return(result);
2300 }
2301
2302 /*
2303 * vm_object_shadow:
2304 *
2305 * Create a new object which is backed by the
2306 * specified existing object range. The source
2307 * object reference is deallocated.
2308 *
2309 * The new object and offset into that object
2310 * are returned in the source parameters.
2311 */
2312 boolean_t vm_object_shadow_check = FALSE;
2313
2314 __private_extern__ boolean_t
2315 vm_object_shadow(
2316 vm_object_t *object, /* IN/OUT */
2317 vm_object_offset_t *offset, /* IN/OUT */
2318 vm_object_size_t length)
2319 {
2320 register vm_object_t source;
2321 register vm_object_t result;
2322
2323 source = *object;
2324 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
2325
2326 /*
2327 * Determine if we really need a shadow.
2328 */
2329
2330 if (vm_object_shadow_check && source->ref_count == 1 &&
2331 (source->shadow == VM_OBJECT_NULL ||
2332 source->shadow->copy == VM_OBJECT_NULL))
2333 {
2334 source->shadowed = FALSE;
2335 return FALSE;
2336 }
2337
2338 /*
2339 * Allocate a new object with the given length
2340 */
2341
2342 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL)
2343 panic("vm_object_shadow: no object for shadowing");
2344
2345 /*
2346 * The new object shadows the source object, adding
2347 * a reference to it. Our caller changes his reference
2348 * to point to the new object, removing a reference to
2349 * the source object. Net result: no change of reference
2350 * count.
2351 */
2352 result->shadow = source;
2353
2354 /*
2355 * Store the offset into the source object,
2356 * and fix up the offset into the new object.
2357 */
2358
2359 result->shadow_offset = *offset;
2360
2361 /*
2362 * Return the new things
2363 */
2364
2365 *offset = 0;
2366 *object = result;
2367 return TRUE;
2368 }
2369
2370 /*
2371 * The relationship between vm_object structures and
2372 * the memory_object requires careful synchronization.
2373 *
2374 * All associations are created by memory_object_create_named
2375 * for external pagers and vm_object_pager_create for internal
2376 * objects as follows:
2377 *
2378 * pager: the memory_object itself, supplied by
2379 * the user requesting a mapping (or the kernel,
2380 * when initializing internal objects); the
2381 * kernel simulates holding send rights by keeping
2382 * a port reference;
2383 *
2384 * pager_request:
2385 * the memory object control port,
2386 * created by the kernel; the kernel holds
2387 * receive (and ownership) rights to this
2388 * port, but no other references.
2389 *
2390 * When initialization is complete, the "initialized" field
2391 * is asserted. Other mappings using a particular memory object,
2392 * and any references to the vm_object gained through the
2393 * port association must wait for this initialization to occur.
2394 *
2395 * In order to allow the memory manager to set attributes before
2396 * requests (notably virtual copy operations, but also data or
2397 * unlock requests) are made, a "ready" attribute is made available.
2398 * Only the memory manager may affect the value of this attribute.
2399 * Its value does not affect critical kernel functions, such as
2400 * internal object initialization or destruction. [Furthermore,
2401 * memory objects created by the kernel are assumed to be ready
2402 * immediately; the default memory manager need not explicitly
2403 * set the "ready" attribute.]
2404 *
2405 * [Both the "initialized" and "ready" attribute wait conditions
2406 * use the "pager" field as the wait event.]
2407 *
2408 * The port associations can be broken down by any of the
2409 * following routines:
2410 * vm_object_terminate:
2411 * No references to the vm_object remain, and
2412 * the object cannot (or will not) be cached.
2413 * This is the normal case, and is done even
2414 * though one of the other cases has already been
2415 * done.
2416 * memory_object_destroy:
2417 * The memory manager has requested that the
2418 * kernel relinquish references to the memory
2419 * object. [The memory manager may not want to
2420 * destroy the memory object, but may wish to
2421 * refuse or tear down existing memory mappings.]
2422 *
2423 * Each routine that breaks an association must break all of
2424 * them at once. At some later time, that routine must clear
2425 * the pager field and release the memory object references.
2426 * [Furthermore, each routine must cope with the simultaneous
2427 * or previous operations of the others.]
2428 *
2429 * In addition to the lock on the object, the vm_object_cache_lock
2430 * governs the associations. References gained through the
2431 * association require use of the cache lock.
2432 *
2433 * Because the pager field may be cleared spontaneously, it
2434 * cannot be used to determine whether a memory object has
2435 * ever been associated with a particular vm_object. [This
2436 * knowledge is important to the shadow object mechanism.]
2437 * For this reason, an additional "created" attribute is
2438 * provided.
2439 *
2440 * During various paging operations, the pager reference found in the
2441 * vm_object must be valid. To prevent this from being released,
2442 * (other than being removed, i.e., made null), routines may use
2443 * the vm_object_paging_begin/end routines [actually, macros].
2444 * The implementation uses the "paging_in_progress" and "wanted" fields.
2445 * [Operations that alter the validity of the pager values include the
2446 * termination routines and vm_object_collapse.]
2447 */
2448
2449 #if 0
2450 static void vm_object_abort_activity(
2451 vm_object_t object);
2452
2453 /*
2454 * Routine: vm_object_abort_activity [internal use only]
2455 * Purpose:
2456 * Abort paging requests pending on this object.
2457 * In/out conditions:
2458 * The object is locked on entry and exit.
2459 */
2460 static void
2461 vm_object_abort_activity(
2462 vm_object_t object)
2463 {
2464 register
2465 vm_page_t p;
2466 vm_page_t next;
2467
2468 XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n",
2469 (integer_t)object, 0, 0, 0, 0);
2470
2471 /*
2472 * Abort all activity that would be waiting
2473 * for a result on this memory object.
2474 *
2475 * We could also choose to destroy all pages
2476 * that we have in memory for this object, but
2477 * we don't.
2478 */
2479
2480 p = (vm_page_t) queue_first(&object->memq);
2481 while (!queue_end(&object->memq, (queue_entry_t) p)) {
2482 next = (vm_page_t) queue_next(&p->listq);
2483
2484 /*
2485 * If it's being paged in, destroy it.
2486 * If an unlock has been requested, start it again.
2487 */
2488
2489 if (p->busy && p->absent) {
2490 VM_PAGE_FREE(p);
2491 }
2492 else {
2493 if (p->unlock_request != VM_PROT_NONE)
2494 p->unlock_request = VM_PROT_NONE;
2495 PAGE_WAKEUP(p);
2496 }
2497
2498 p = next;
2499 }
2500
2501 /*
2502 * Wake up threads waiting for the memory object to
2503 * become ready.
2504 */
2505
2506 object->pager_ready = TRUE;
2507 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2508 }
2509
2510 /*
2511 * Routine: vm_object_pager_dead
2512 *
2513 * Purpose:
2514 * A port is being destroy, and the IPC kobject code
2515 * can't tell if it represents a pager port or not.
2516 * So this function is called each time it sees a port
2517 * die.
2518 * THIS IS HORRIBLY INEFFICIENT. We should only call
2519 * this routine if we had requested a notification on
2520 * the port.
2521 */
2522
2523 __private_extern__ void
2524 vm_object_pager_dead(
2525 ipc_port_t pager)
2526 {
2527 vm_object_t object;
2528 vm_object_hash_entry_t entry;
2529
2530 /*
2531 * Perform essentially the same operations as in vm_object_lookup,
2532 * except that this time we look up based on the memory_object
2533 * port, not the control port.
2534 */
2535 vm_object_cache_lock();
2536 entry = vm_object_hash_lookup(pager, FALSE);
2537 if (entry == VM_OBJECT_HASH_ENTRY_NULL ||
2538 entry->object == VM_OBJECT_NULL) {
2539 vm_object_cache_unlock();
2540 return;
2541 }
2542
2543 object = entry->object;
2544 entry->object = VM_OBJECT_NULL;
2545
2546 vm_object_lock(object);
2547 if (object->ref_count == 0) {
2548 XPR(XPR_VM_OBJECT_CACHE,
2549 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2550 (integer_t)object,
2551 (integer_t)vm_object_cached_list.next,
2552 (integer_t)vm_object_cached_list.prev, 0,0);
2553
2554 queue_remove(&vm_object_cached_list, object,
2555 vm_object_t, cached_list);
2556 vm_object_cached_count--;
2557 }
2558 object->ref_count++;
2559 vm_object_res_reference(object);
2560
2561 object->can_persist = FALSE;
2562
2563 assert(object->pager == pager);
2564
2565 /*
2566 * Remove the pager association.
2567 *
2568 * Note that the memory_object itself is dead, so
2569 * we don't bother with it.
2570 */
2571
2572 object->pager = MEMORY_OBJECT_NULL;
2573
2574 vm_object_unlock(object);
2575 vm_object_cache_unlock();
2576
2577 vm_object_pager_wakeup(pager);
2578
2579 /*
2580 * Release the pager reference. Note that there's no
2581 * point in trying the memory_object_terminate call
2582 * because the memory_object itself is dead. Also
2583 * release the memory_object_control reference, since
2584 * the pager didn't do that either.
2585 */
2586
2587 memory_object_deallocate(pager);
2588 memory_object_control_deallocate(object->pager_request);
2589
2590
2591 /*
2592 * Restart pending page requests
2593 */
2594 vm_object_lock(object);
2595 vm_object_abort_activity(object);
2596 vm_object_unlock(object);
2597
2598 /*
2599 * Lose the object reference.
2600 */
2601
2602 vm_object_deallocate(object);
2603 }
2604 #endif
2605
2606 /*
2607 * Routine: vm_object_enter
2608 * Purpose:
2609 * Find a VM object corresponding to the given
2610 * pager; if no such object exists, create one,
2611 * and initialize the pager.
2612 */
2613 vm_object_t
2614 vm_object_enter(
2615 memory_object_t pager,
2616 vm_object_size_t size,
2617 boolean_t internal,
2618 boolean_t init,
2619 boolean_t named)
2620 {
2621 register vm_object_t object;
2622 vm_object_t new_object;
2623 boolean_t must_init;
2624 vm_object_hash_entry_t entry, new_entry;
2625
2626 if (pager == MEMORY_OBJECT_NULL)
2627 return(vm_object_allocate(size));
2628
2629 new_object = VM_OBJECT_NULL;
2630 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2631 must_init = init;
2632
2633 /*
2634 * Look for an object associated with this port.
2635 */
2636
2637 vm_object_cache_lock();
2638 do {
2639 entry = vm_object_hash_lookup(pager, FALSE);
2640
2641 if (entry == VM_OBJECT_HASH_ENTRY_NULL) {
2642 if (new_object == VM_OBJECT_NULL) {
2643 /*
2644 * We must unlock to create a new object;
2645 * if we do so, we must try the lookup again.
2646 */
2647 vm_object_cache_unlock();
2648 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL);
2649 new_entry = vm_object_hash_entry_alloc(pager);
2650 new_object = vm_object_allocate(size);
2651 vm_object_cache_lock();
2652 } else {
2653 /*
2654 * Lookup failed twice, and we have something
2655 * to insert; set the object.
2656 */
2657 vm_object_hash_insert(new_entry);
2658 entry = new_entry;
2659 entry->object = new_object;
2660 new_entry = VM_OBJECT_HASH_ENTRY_NULL;
2661 new_object = VM_OBJECT_NULL;
2662 must_init = TRUE;
2663 }
2664 } else if (entry->object == VM_OBJECT_NULL) {
2665 /*
2666 * If a previous object is being terminated,
2667 * we must wait for the termination message
2668 * to be queued (and lookup the entry again).
2669 */
2670 entry->waiting = TRUE;
2671 entry = VM_OBJECT_HASH_ENTRY_NULL;
2672 assert_wait((event_t) pager, THREAD_UNINT);
2673 vm_object_cache_unlock();
2674 thread_block(THREAD_CONTINUE_NULL);
2675 vm_object_cache_lock();
2676 }
2677 } while (entry == VM_OBJECT_HASH_ENTRY_NULL);
2678
2679 object = entry->object;
2680 assert(object != VM_OBJECT_NULL);
2681
2682 if (!must_init) {
2683 vm_object_lock(object);
2684 assert(!internal || object->internal);
2685 if (named) {
2686 assert(!object->named);
2687 object->named = TRUE;
2688 }
2689 if (object->ref_count == 0) {
2690 XPR(XPR_VM_OBJECT_CACHE,
2691 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2692 (integer_t)object,
2693 (integer_t)vm_object_cached_list.next,
2694 (integer_t)vm_object_cached_list.prev, 0,0);
2695 queue_remove(&vm_object_cached_list, object,
2696 vm_object_t, cached_list);
2697 vm_object_cached_count--;
2698 }
2699 object->ref_count++;
2700 vm_object_res_reference(object);
2701 vm_object_unlock(object);
2702
2703 VM_STAT(hits++);
2704 }
2705 assert(object->ref_count > 0);
2706
2707 VM_STAT(lookups++);
2708
2709 vm_object_cache_unlock();
2710
2711 XPR(XPR_VM_OBJECT,
2712 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2713 (integer_t)pager, (integer_t)object, must_init, 0, 0);
2714
2715 /*
2716 * If we raced to create a vm_object but lost, let's
2717 * throw away ours.
2718 */
2719
2720 if (new_object != VM_OBJECT_NULL)
2721 vm_object_deallocate(new_object);
2722
2723 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL)
2724 vm_object_hash_entry_free(new_entry);
2725
2726 if (must_init) {
2727 memory_object_control_t control;
2728
2729 /*
2730 * Allocate request port.
2731 */
2732
2733 control = memory_object_control_allocate(object);
2734 assert (control != MEMORY_OBJECT_CONTROL_NULL);
2735
2736 vm_object_lock(object);
2737 assert(object != kernel_object);
2738
2739 /*
2740 * Copy the reference we were given.
2741 */
2742
2743 memory_object_reference(pager);
2744 object->pager_created = TRUE;
2745 object->pager = pager;
2746 object->internal = internal;
2747 object->pager_trusted = internal;
2748 if (!internal) {
2749 /* copy strategy invalid until set by memory manager */
2750 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID;
2751 }
2752 object->pager_control = control;
2753 object->pager_ready = FALSE;
2754
2755 vm_object_unlock(object);
2756
2757 /*
2758 * Let the pager know we're using it.
2759 */
2760
2761 (void) memory_object_init(pager,
2762 object->pager_control,
2763 PAGE_SIZE);
2764
2765 vm_object_lock(object);
2766 if (named)
2767 object->named = TRUE;
2768 if (internal) {
2769 object->pager_ready = TRUE;
2770 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
2771 }
2772
2773 object->pager_initialized = TRUE;
2774 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED);
2775 } else {
2776 vm_object_lock(object);
2777 }
2778
2779 /*
2780 * [At this point, the object must be locked]
2781 */
2782
2783 /*
2784 * Wait for the work above to be done by the first
2785 * thread to map this object.
2786 */
2787
2788 while (!object->pager_initialized) {
2789 vm_object_sleep(object,
2790 VM_OBJECT_EVENT_INITIALIZED,
2791 THREAD_UNINT);
2792 }
2793 vm_object_unlock(object);
2794
2795 XPR(XPR_VM_OBJECT,
2796 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2797 (integer_t)object, (integer_t)object->pager, internal, 0,0);
2798 return(object);
2799 }
2800
2801 /*
2802 * Routine: vm_object_pager_create
2803 * Purpose:
2804 * Create a memory object for an internal object.
2805 * In/out conditions:
2806 * The object is locked on entry and exit;
2807 * it may be unlocked within this call.
2808 * Limitations:
2809 * Only one thread may be performing a
2810 * vm_object_pager_create on an object at
2811 * a time. Presumably, only the pageout
2812 * daemon will be using this routine.
2813 */
2814
2815 void
2816 vm_object_pager_create(
2817 register vm_object_t object)
2818 {
2819 memory_object_t pager;
2820 vm_object_hash_entry_t entry;
2821 #if MACH_PAGEMAP
2822 vm_object_size_t size;
2823 vm_external_map_t map;
2824 #endif /* MACH_PAGEMAP */
2825
2826 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n",
2827 (integer_t)object, 0,0,0,0);
2828
2829 assert(object != kernel_object);
2830
2831 if (memory_manager_default_check() != KERN_SUCCESS)
2832 return;
2833
2834 /*
2835 * Prevent collapse or termination by holding a paging reference
2836 */
2837
2838 vm_object_paging_begin(object);
2839 if (object->pager_created) {
2840 /*
2841 * Someone else got to it first...
2842 * wait for them to finish initializing the ports
2843 */
2844 while (!object->pager_initialized) {
2845 vm_object_sleep(object,
2846 VM_OBJECT_EVENT_INITIALIZED,
2847 THREAD_UNINT);
2848 }
2849 vm_object_paging_end(object);
2850 return;
2851 }
2852
2853 /*
2854 * Indicate that a memory object has been assigned
2855 * before dropping the lock, to prevent a race.
2856 */
2857
2858 object->pager_created = TRUE;
2859 object->paging_offset = 0;
2860
2861 #if MACH_PAGEMAP
2862 size = object->size;
2863 #endif /* MACH_PAGEMAP */
2864 vm_object_unlock(object);
2865
2866 #if MACH_PAGEMAP
2867 map = vm_external_create(size);
2868 vm_object_lock(object);
2869 assert(object->size == size);
2870 object->existence_map = map;
2871 vm_object_unlock(object);
2872 #endif /* MACH_PAGEMAP */
2873
2874 /*
2875 * Create the [internal] pager, and associate it with this object.
2876 *
2877 * We make the association here so that vm_object_enter()
2878 * can look up the object to complete initializing it. No
2879 * user will ever map this object.
2880 */
2881 {
2882 memory_object_default_t dmm;
2883 vm_size_t cluster_size;
2884
2885 /* acquire a reference for the default memory manager */
2886 dmm = memory_manager_default_reference(&cluster_size);
2887 assert(cluster_size >= PAGE_SIZE);
2888
2889 object->cluster_size = cluster_size; /* XXX ??? */
2890 assert(object->temporary);
2891
2892 /* create our new memory object */
2893 (void) memory_object_create(dmm, object->size, &pager);
2894
2895 memory_object_default_deallocate(dmm);
2896 }
2897
2898 entry = vm_object_hash_entry_alloc(pager);
2899
2900 vm_object_cache_lock();
2901 vm_object_hash_insert(entry);
2902
2903 entry->object = object;
2904 vm_object_cache_unlock();
2905
2906 /*
2907 * A reference was returned by
2908 * memory_object_create(), and it is
2909 * copied by vm_object_enter().
2910 */
2911
2912 if (vm_object_enter(pager, object->size, TRUE, TRUE, FALSE) != object)
2913 panic("vm_object_pager_create: mismatch");
2914
2915 /*
2916 * Drop the reference we were passed.
2917 */
2918 memory_object_deallocate(pager);
2919
2920 vm_object_lock(object);
2921
2922 /*
2923 * Release the paging reference
2924 */
2925 vm_object_paging_end(object);
2926 }
2927
2928 /*
2929 * Routine: vm_object_remove
2930 * Purpose:
2931 * Eliminate the pager/object association
2932 * for this pager.
2933 * Conditions:
2934 * The object cache must be locked.
2935 */
2936 __private_extern__ void
2937 vm_object_remove(
2938 vm_object_t object)
2939 {
2940 memory_object_t pager;
2941
2942 if ((pager = object->pager) != MEMORY_OBJECT_NULL) {
2943 vm_object_hash_entry_t entry;
2944
2945 entry = vm_object_hash_lookup(pager, FALSE);
2946 if (entry != VM_OBJECT_HASH_ENTRY_NULL)
2947 entry->object = VM_OBJECT_NULL;
2948 }
2949
2950 }
2951
2952 /*
2953 * Global variables for vm_object_collapse():
2954 *
2955 * Counts for normal collapses and bypasses.
2956 * Debugging variables, to watch or disable collapse.
2957 */
2958 static long object_collapses = 0;
2959 static long object_bypasses = 0;
2960
2961 static boolean_t vm_object_collapse_allowed = TRUE;
2962 static boolean_t vm_object_bypass_allowed = TRUE;
2963
2964 static int vm_external_discarded;
2965 static int vm_external_collapsed;
2966
2967 unsigned long vm_object_collapse_encrypted = 0;
2968
2969 /*
2970 * Routine: vm_object_do_collapse
2971 * Purpose:
2972 * Collapse an object with the object backing it.
2973 * Pages in the backing object are moved into the
2974 * parent, and the backing object is deallocated.
2975 * Conditions:
2976 * Both objects and the cache are locked; the page
2977 * queues are unlocked.
2978 *
2979 */
2980 static void
2981 vm_object_do_collapse(
2982 vm_object_t object,
2983 vm_object_t backing_object)
2984 {
2985 vm_page_t p, pp;
2986 vm_object_offset_t new_offset, backing_offset;
2987 vm_object_size_t size;
2988
2989 backing_offset = object->shadow_offset;
2990 size = object->size;
2991
2992 /*
2993 * Move all in-memory pages from backing_object
2994 * to the parent. Pages that have been paged out
2995 * will be overwritten by any of the parent's
2996 * pages that shadow them.
2997 */
2998
2999 while (!queue_empty(&backing_object->memq)) {
3000
3001 p = (vm_page_t) queue_first(&backing_object->memq);
3002
3003 new_offset = (p->offset - backing_offset);
3004
3005 assert(!p->busy || p->absent);
3006
3007 /*
3008 * If the parent has a page here, or if
3009 * this page falls outside the parent,
3010 * dispose of it.
3011 *
3012 * Otherwise, move it as planned.
3013 */
3014
3015 if (p->offset < backing_offset || new_offset >= size) {
3016 VM_PAGE_FREE(p);
3017 } else {
3018 /*
3019 * ENCRYPTED SWAP:
3020 * The encryption key includes the "pager" and the
3021 * "paging_offset". These might not be the same in
3022 * the new object, so we can't just move an encrypted
3023 * page from one object to the other. We can't just
3024 * decrypt the page here either, because that would drop
3025 * the object lock.
3026 * The caller should check for encrypted pages before
3027 * attempting to collapse.
3028 */
3029 ASSERT_PAGE_DECRYPTED(p);
3030
3031 pp = vm_page_lookup(object, new_offset);
3032 if (pp == VM_PAGE_NULL) {
3033
3034 /*
3035 * Parent now has no page.
3036 * Move the backing object's page up.
3037 */
3038
3039 vm_page_rename(p, object, new_offset);
3040 #if MACH_PAGEMAP
3041 } else if (pp->absent) {
3042
3043 /*
3044 * Parent has an absent page...
3045 * it's not being paged in, so
3046 * it must really be missing from
3047 * the parent.
3048 *
3049 * Throw out the absent page...
3050 * any faults looking for that
3051 * page will restart with the new
3052 * one.
3053 */
3054
3055 VM_PAGE_FREE(pp);
3056 vm_page_rename(p, object, new_offset);
3057 #endif /* MACH_PAGEMAP */
3058 } else {
3059 assert(! pp->absent);
3060
3061 /*
3062 * Parent object has a real page.
3063 * Throw away the backing object's
3064 * page.
3065 */
3066 VM_PAGE_FREE(p);
3067 }
3068 }
3069 }
3070
3071 #if !MACH_PAGEMAP
3072 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL
3073 || (!backing_object->pager_created
3074 && backing_object->pager == MEMORY_OBJECT_NULL));
3075 #else
3076 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL);
3077 #endif /* !MACH_PAGEMAP */
3078
3079 if (backing_object->pager != MEMORY_OBJECT_NULL) {
3080 vm_object_hash_entry_t entry;
3081
3082 /*
3083 * Move the pager from backing_object to object.
3084 *
3085 * XXX We're only using part of the paging space
3086 * for keeps now... we ought to discard the
3087 * unused portion.
3088 */
3089
3090 assert(!object->paging_in_progress);
3091 object->pager = backing_object->pager;
3092 entry = vm_object_hash_lookup(object->pager, FALSE);
3093 assert(entry != VM_OBJECT_HASH_ENTRY_NULL);
3094 entry->object = object;
3095 object->pager_created = backing_object->pager_created;
3096 object->pager_control = backing_object->pager_control;
3097 object->pager_ready = backing_object->pager_ready;
3098 object->pager_initialized = backing_object->pager_initialized;
3099 object->cluster_size = backing_object->cluster_size;
3100 object->paging_offset =
3101 backing_object->paging_offset + backing_offset;
3102 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
3103 memory_object_control_collapse(object->pager_control,
3104 object);
3105 }
3106 }
3107
3108 vm_object_cache_unlock();
3109
3110 #if MACH_PAGEMAP
3111 /*
3112 * If the shadow offset is 0, the use the existence map from
3113 * the backing object if there is one. If the shadow offset is
3114 * not zero, toss it.
3115 *
3116 * XXX - If the shadow offset is not 0 then a bit copy is needed
3117 * if the map is to be salvaged. For now, we just just toss the
3118 * old map, giving the collapsed object no map. This means that
3119 * the pager is invoked for zero fill pages. If analysis shows
3120 * that this happens frequently and is a performance hit, then
3121 * this code should be fixed to salvage the map.
3122 */
3123 assert(object->existence_map == VM_EXTERNAL_NULL);
3124 if (backing_offset || (size != backing_object->size)) {
3125 vm_external_discarded++;
3126 vm_external_destroy(backing_object->existence_map,
3127 backing_object->size);
3128 }
3129 else {
3130 vm_external_collapsed++;
3131 object->existence_map = backing_object->existence_map;
3132 }
3133 backing_object->existence_map = VM_EXTERNAL_NULL;
3134 #endif /* MACH_PAGEMAP */
3135
3136 /*
3137 * Object now shadows whatever backing_object did.
3138 * Note that the reference to backing_object->shadow
3139 * moves from within backing_object to within object.
3140 */
3141
3142 assert(!object->phys_contiguous);
3143 assert(!backing_object->phys_contiguous);
3144 object->shadow = backing_object->shadow;
3145 if (object->shadow) {
3146 object->shadow_offset += backing_object->shadow_offset;
3147 } else {
3148 /* no shadow, therefore no shadow offset... */
3149 object->shadow_offset = 0;
3150 }
3151 assert((object->shadow == VM_OBJECT_NULL) ||
3152 (object->shadow->copy != backing_object));
3153
3154 /*
3155 * Discard backing_object.
3156 *
3157 * Since the backing object has no pages, no
3158 * pager left, and no object references within it,
3159 * all that is necessary is to dispose of it.
3160 */
3161
3162 assert((backing_object->ref_count == 1) &&
3163 (backing_object->resident_page_count == 0) &&
3164 (backing_object->paging_in_progress == 0));
3165
3166 backing_object->alive = FALSE;
3167 vm_object_unlock(backing_object);
3168
3169 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n",
3170 (integer_t)backing_object, 0,0,0,0);
3171
3172 zfree(vm_object_zone, backing_object);
3173
3174 object_collapses++;
3175 }
3176
3177 static void
3178 vm_object_do_bypass(
3179 vm_object_t object,
3180 vm_object_t backing_object)
3181 {
3182 /*
3183 * Make the parent shadow the next object
3184 * in the chain.
3185 */
3186
3187 #if TASK_SWAPPER
3188 /*
3189 * Do object reference in-line to
3190 * conditionally increment shadow's
3191 * residence count. If object is not
3192 * resident, leave residence count
3193 * on shadow alone.
3194 */
3195 if (backing_object->shadow != VM_OBJECT_NULL) {
3196 vm_object_lock(backing_object->shadow);
3197 backing_object->shadow->ref_count++;
3198 if (object->res_count != 0)
3199 vm_object_res_reference(backing_object->shadow);
3200 vm_object_unlock(backing_object->shadow);
3201 }
3202 #else /* TASK_SWAPPER */
3203 vm_object_reference(backing_object->shadow);
3204 #endif /* TASK_SWAPPER */
3205
3206 assert(!object->phys_contiguous);
3207 assert(!backing_object->phys_contiguous);
3208 object->shadow = backing_object->shadow;
3209 if (object->shadow) {
3210 object->shadow_offset += backing_object->shadow_offset;
3211 } else {
3212 /* no shadow, therefore no shadow offset... */
3213 object->shadow_offset = 0;
3214 }
3215
3216 /*
3217 * Backing object might have had a copy pointer
3218 * to us. If it did, clear it.
3219 */
3220 if (backing_object->copy == object) {
3221 backing_object->copy = VM_OBJECT_NULL;
3222 }
3223
3224 /*
3225 * Drop the reference count on backing_object.
3226 #if TASK_SWAPPER
3227 * Since its ref_count was at least 2, it
3228 * will not vanish; so we don't need to call
3229 * vm_object_deallocate.
3230 * [FBDP: that doesn't seem to be true any more]
3231 *
3232 * The res_count on the backing object is
3233 * conditionally decremented. It's possible
3234 * (via vm_pageout_scan) to get here with
3235 * a "swapped" object, which has a 0 res_count,
3236 * in which case, the backing object res_count
3237 * is already down by one.
3238 #else
3239 * Don't call vm_object_deallocate unless
3240 * ref_count drops to zero.
3241 *
3242 * The ref_count can drop to zero here if the
3243 * backing object could be bypassed but not
3244 * collapsed, such as when the backing object
3245 * is temporary and cachable.
3246 #endif
3247 */
3248 if (backing_object->ref_count > 1) {
3249 backing_object->ref_count--;
3250 #if TASK_SWAPPER
3251 if (object->res_count != 0)
3252 vm_object_res_deallocate(backing_object);
3253 assert(backing_object->ref_count > 0);
3254 #endif /* TASK_SWAPPER */
3255 vm_object_unlock(backing_object);
3256 } else {
3257
3258 /*
3259 * Drop locks so that we can deallocate
3260 * the backing object.
3261 */
3262
3263 #if TASK_SWAPPER
3264 if (object->res_count == 0) {
3265 /* XXX get a reference for the deallocate below */
3266 vm_object_res_reference(backing_object);
3267 }
3268 #endif /* TASK_SWAPPER */
3269 vm_object_unlock(object);
3270 vm_object_unlock(backing_object);
3271 vm_object_deallocate(backing_object);
3272
3273 /*
3274 * Relock object. We don't have to reverify
3275 * its state since vm_object_collapse will
3276 * do that for us as it starts at the
3277 * top of its loop.
3278 */
3279
3280 vm_object_lock(object);
3281 }
3282
3283 object_bypasses++;
3284 }
3285
3286
3287 /*
3288 * vm_object_collapse:
3289 *
3290 * Perform an object collapse or an object bypass if appropriate.
3291 * The real work of collapsing and bypassing is performed in
3292 * the routines vm_object_do_collapse and vm_object_do_bypass.
3293 *
3294 * Requires that the object be locked and the page queues be unlocked.
3295 *
3296 */
3297 static unsigned long vm_object_collapse_calls = 0;
3298 static unsigned long vm_object_collapse_objects = 0;
3299 static unsigned long vm_object_collapse_do_collapse = 0;
3300 static unsigned long vm_object_collapse_do_bypass = 0;
3301 __private_extern__ void
3302 vm_object_collapse(
3303 register vm_object_t object,
3304 register vm_object_offset_t hint_offset,
3305 boolean_t can_bypass)
3306 {
3307 register vm_object_t backing_object;
3308 register unsigned int rcount;
3309 register unsigned int size;
3310 vm_object_offset_t collapse_min_offset;
3311 vm_object_offset_t collapse_max_offset;
3312 vm_page_t page;
3313 vm_object_t original_object;
3314
3315 vm_object_collapse_calls++;
3316
3317 if (! vm_object_collapse_allowed &&
3318 ! (can_bypass && vm_object_bypass_allowed)) {
3319 return;
3320 }
3321
3322 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n",
3323 (integer_t)object, 0,0,0,0);
3324
3325 if (object == VM_OBJECT_NULL)
3326 return;
3327
3328 original_object = object;
3329
3330 while (TRUE) {
3331 vm_object_collapse_objects++;
3332 /*
3333 * Verify that the conditions are right for either
3334 * collapse or bypass:
3335 */
3336
3337 /*
3338 * There is a backing object, and
3339 */
3340
3341 backing_object = object->shadow;
3342 if (backing_object == VM_OBJECT_NULL) {
3343 if (object != original_object) {
3344 vm_object_unlock(object);
3345 }
3346 return;
3347 }
3348
3349 /*
3350 * No pages in the object are currently
3351 * being paged out, and
3352 */
3353 if (object->paging_in_progress != 0 ||
3354 object->absent_count != 0) {
3355 /* try and collapse the rest of the shadow chain */
3356 vm_object_lock(backing_object);
3357 if (object != original_object) {
3358 vm_object_unlock(object);
3359 }
3360 object = backing_object;
3361 continue;
3362 }
3363
3364 vm_object_lock(backing_object);
3365
3366 /*
3367 * ...
3368 * The backing object is not read_only,
3369 * and no pages in the backing object are
3370 * currently being paged out.
3371 * The backing object is internal.
3372 *
3373 */
3374
3375 if (!backing_object->internal ||
3376 backing_object->paging_in_progress != 0) {
3377 /* try and collapse the rest of the shadow chain */
3378 if (object != original_object) {
3379 vm_object_unlock(object);
3380 }
3381 object = backing_object;
3382 continue;
3383 }
3384
3385 /*
3386 * The backing object can't be a copy-object:
3387 * the shadow_offset for the copy-object must stay
3388 * as 0. Furthermore (for the 'we have all the
3389 * pages' case), if we bypass backing_object and
3390 * just shadow the next object in the chain, old
3391 * pages from that object would then have to be copied
3392 * BOTH into the (former) backing_object and into the
3393 * parent object.
3394 */
3395 if (backing_object->shadow != VM_OBJECT_NULL &&
3396 backing_object->shadow->copy == backing_object) {
3397 /* try and collapse the rest of the shadow chain */
3398 if (object != original_object) {
3399 vm_object_unlock(object);
3400 }
3401 object = backing_object;
3402 continue;
3403 }
3404
3405 /*
3406 * We can now try to either collapse the backing
3407 * object (if the parent is the only reference to
3408 * it) or (perhaps) remove the parent's reference
3409 * to it.
3410 *
3411 * If there is exactly one reference to the backing
3412 * object, we may be able to collapse it into the
3413 * parent.
3414 *
3415 * If MACH_PAGEMAP is defined:
3416 * The parent must not have a pager created for it,
3417 * since collapsing a backing_object dumps new pages
3418 * into the parent that its pager doesn't know about
3419 * (and the collapse code can't merge the existence
3420 * maps).
3421 * Otherwise:
3422 * As long as one of the objects is still not known
3423 * to the pager, we can collapse them.
3424 */
3425 if (backing_object->ref_count == 1 &&
3426 (!object->pager_created
3427 #if !MACH_PAGEMAP
3428 || !backing_object->pager_created
3429 #endif /*!MACH_PAGEMAP */
3430 ) && vm_object_collapse_allowed) {
3431
3432 XPR(XPR_VM_OBJECT,
3433 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3434 (integer_t)backing_object, (integer_t)object,
3435 (integer_t)backing_object->pager,
3436 (integer_t)backing_object->pager_control, 0);
3437
3438 /*
3439 * We need the cache lock for collapsing,
3440 * but we must not deadlock.
3441 */
3442
3443 if (! vm_object_cache_lock_try()) {
3444 if (object != original_object) {
3445 vm_object_unlock(object);
3446 }
3447 vm_object_unlock(backing_object);
3448 return;
3449 }
3450
3451 /*
3452 * ENCRYPTED SWAP
3453 * We can't collapse the object if it contains
3454 * any encypted page, because the encryption key
3455 * includes the <object,offset> info. We can't
3456 * drop the object lock in vm_object_do_collapse()
3457 * so we can't decrypt the page there either.
3458 */
3459 if (vm_pages_encrypted) {
3460 collapse_min_offset = object->shadow_offset;
3461 collapse_max_offset =
3462 object->shadow_offset + object->size;
3463 queue_iterate(&backing_object->memq,
3464 page, vm_page_t, listq) {
3465 if (page->encrypted &&
3466 (page->offset >=
3467 collapse_min_offset) &&
3468 (page->offset <
3469 collapse_max_offset)) {
3470 /*
3471 * We found an encrypted page
3472 * in the backing object,
3473 * within the range covered
3474 * by the parent object: we can
3475 * not collapse them.
3476 */
3477 vm_object_collapse_encrypted++;
3478 vm_object_cache_unlock();
3479 goto try_bypass;
3480 }
3481 }
3482 }
3483
3484 /*
3485 * Collapse the object with its backing
3486 * object, and try again with the object's
3487 * new backing object.
3488 */
3489
3490 vm_object_do_collapse(object, backing_object);
3491 vm_object_collapse_do_collapse++;
3492 continue;
3493 }
3494
3495 try_bypass:
3496 /*
3497 * Collapsing the backing object was not possible
3498 * or permitted, so let's try bypassing it.
3499 */
3500
3501 if (! (can_bypass && vm_object_bypass_allowed)) {
3502 /* try and collapse the rest of the shadow chain */
3503 if (object != original_object) {
3504 vm_object_unlock(object);
3505 }
3506 object = backing_object;
3507 continue;
3508 }
3509
3510
3511 /*
3512 * If the object doesn't have all its pages present,
3513 * we have to make sure no pages in the backing object
3514 * "show through" before bypassing it.
3515 */
3516 size = atop(object->size);
3517 rcount = object->resident_page_count;
3518 if (rcount != size) {
3519 vm_object_offset_t offset;
3520 vm_object_offset_t backing_offset;
3521 unsigned int backing_rcount;
3522 unsigned int lookups = 0;
3523
3524 /*
3525 * If the backing object has a pager but no pagemap,
3526 * then we cannot bypass it, because we don't know
3527 * what pages it has.
3528 */
3529 if (backing_object->pager_created
3530 #if MACH_PAGEMAP
3531 && (backing_object->existence_map == VM_EXTERNAL_NULL)
3532 #endif /* MACH_PAGEMAP */
3533 ) {
3534 /* try and collapse the rest of the shadow chain */
3535 if (object != original_object) {
3536 vm_object_unlock(object);
3537 }
3538 object = backing_object;
3539 continue;
3540 }
3541
3542 /*
3543 * If the object has a pager but no pagemap,
3544 * then we cannot bypass it, because we don't know
3545 * what pages it has.
3546 */
3547 if (object->pager_created
3548 #if MACH_PAGEMAP
3549 && (object->existence_map == VM_EXTERNAL_NULL)
3550 #endif /* MACH_PAGEMAP */
3551 ) {
3552 /* try and collapse the rest of the shadow chain */
3553 if (object != original_object) {
3554 vm_object_unlock(object);
3555 }
3556 object = backing_object;
3557 continue;
3558 }
3559
3560 /*
3561 * If all of the pages in the backing object are
3562 * shadowed by the parent object, the parent
3563 * object no longer has to shadow the backing
3564 * object; it can shadow the next one in the
3565 * chain.
3566 *
3567 * If the backing object has existence info,
3568 * we must check examine its existence info
3569 * as well.
3570 *
3571 */
3572
3573 backing_offset = object->shadow_offset;
3574 backing_rcount = backing_object->resident_page_count;
3575
3576 #define EXISTS_IN_OBJECT(obj, off, rc) \
3577 (vm_external_state_get((obj)->existence_map, \
3578 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3579 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3580
3581 /*
3582 * Check the hint location first
3583 * (since it is often the quickest way out of here).
3584 */
3585 if (object->cow_hint != ~(vm_offset_t)0)
3586 hint_offset = (vm_object_offset_t)object->cow_hint;
3587 else
3588 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ?
3589 (hint_offset - 8 * PAGE_SIZE_64) : 0;
3590
3591 if (EXISTS_IN_OBJECT(backing_object, hint_offset +
3592 backing_offset, backing_rcount) &&
3593 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) {
3594 /* dependency right at the hint */
3595 object->cow_hint = (vm_offset_t)hint_offset;
3596 /* try and collapse the rest of the shadow chain */
3597 if (object != original_object) {
3598 vm_object_unlock(object);
3599 }
3600 object = backing_object;
3601 continue;
3602 }
3603
3604 /*
3605 * If the object's window onto the backing_object
3606 * is large compared to the number of resident
3607 * pages in the backing object, it makes sense to
3608 * walk the backing_object's resident pages first.
3609 *
3610 * NOTE: Pages may be in both the existence map and
3611 * resident. So, we can't permanently decrement
3612 * the rcount here because the second loop may
3613 * find the same pages in the backing object'
3614 * existence map that we found here and we would
3615 * double-decrement the rcount. We also may or
3616 * may not have found the
3617 */
3618 if (backing_rcount && size >
3619 ((backing_object->existence_map) ?
3620 backing_rcount : (backing_rcount >> 1))) {
3621 unsigned int rc = rcount;
3622 vm_page_t p;
3623
3624 backing_rcount = backing_object->resident_page_count;
3625 p = (vm_page_t)queue_first(&backing_object->memq);
3626 do {
3627 /* Until we get more than one lookup lock */
3628 if (lookups > 256) {
3629 lookups = 0;
3630 delay(1);
3631 }
3632
3633 offset = (p->offset - backing_offset);
3634 if (offset < object->size &&
3635 offset != hint_offset &&
3636 !EXISTS_IN_OBJECT(object, offset, rc)) {
3637 /* found a dependency */
3638 object->cow_hint = (vm_offset_t)offset;
3639 break;
3640 }
3641 p = (vm_page_t) queue_next(&p->listq);
3642
3643 } while (--backing_rcount);
3644 if (backing_rcount != 0 ) {
3645 /* try and collapse the rest of the shadow chain */
3646 if (object != original_object) {
3647 vm_object_unlock(object);
3648 }
3649 object = backing_object;
3650 continue;
3651 }
3652 }
3653
3654 /*
3655 * Walk through the offsets looking for pages in the
3656 * backing object that show through to the object.
3657 */
3658 if (backing_rcount || backing_object->existence_map) {
3659 offset = hint_offset;
3660
3661 while((offset =
3662 (offset + PAGE_SIZE_64 < object->size) ?
3663 (offset + PAGE_SIZE_64) : 0) != hint_offset) {
3664
3665 /* Until we get more than one lookup lock */
3666 if (lookups > 256) {
3667 lookups = 0;
3668 delay(1);
3669 }
3670
3671 if (EXISTS_IN_OBJECT(backing_object, offset +
3672 backing_offset, backing_rcount) &&
3673 !EXISTS_IN_OBJECT(object, offset, rcount)) {
3674 /* found a dependency */
3675 object->cow_hint = (vm_offset_t)offset;
3676 break;
3677 }
3678 }
3679 if (offset != hint_offset) {
3680 /* try and collapse the rest of the shadow chain */
3681 if (object != original_object) {
3682 vm_object_unlock(object);
3683 }
3684 object = backing_object;
3685 continue;
3686 }
3687 }
3688 }
3689
3690 /* reset the offset hint for any objects deeper in the chain */
3691 object->cow_hint = (vm_offset_t)0;
3692
3693 /*
3694 * All interesting pages in the backing object
3695 * already live in the parent or its pager.
3696 * Thus we can bypass the backing object.
3697 */
3698
3699 vm_object_do_bypass(object, backing_object);
3700 vm_object_collapse_do_bypass++;
3701
3702 /*
3703 * Try again with this object's new backing object.
3704 */
3705
3706 continue;
3707 }
3708
3709 if (object != original_object) {
3710 vm_object_unlock(object);
3711 }
3712 }
3713
3714 /*
3715 * Routine: vm_object_page_remove: [internal]
3716 * Purpose:
3717 * Removes all physical pages in the specified
3718 * object range from the object's list of pages.
3719 *
3720 * In/out conditions:
3721 * The object must be locked.
3722 * The object must not have paging_in_progress, usually
3723 * guaranteed by not having a pager.
3724 */
3725 unsigned int vm_object_page_remove_lookup = 0;
3726 unsigned int vm_object_page_remove_iterate = 0;
3727
3728 __private_extern__ void
3729 vm_object_page_remove(
3730 register vm_object_t object,
3731 register vm_object_offset_t start,
3732 register vm_object_offset_t end)
3733 {
3734 register vm_page_t p, next;
3735
3736 /*
3737 * One and two page removals are most popular.
3738 * The factor of 16 here is somewhat arbitrary.
3739 * It balances vm_object_lookup vs iteration.
3740 */
3741
3742 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) {
3743 vm_object_page_remove_lookup++;
3744
3745 for (; start < end; start += PAGE_SIZE_64) {
3746 p = vm_page_lookup(object, start);
3747 if (p != VM_PAGE_NULL) {
3748 assert(!p->cleaning && !p->pageout);
3749 if (!p->fictitious)
3750 pmap_disconnect(p->phys_page);
3751 VM_PAGE_FREE(p);
3752 }
3753 }
3754 } else {
3755 vm_object_page_remove_iterate++;
3756
3757 p = (vm_page_t) queue_first(&object->memq);
3758 while (!queue_end(&object->memq, (queue_entry_t) p)) {
3759 next = (vm_page_t) queue_next(&p->listq);
3760 if ((start <= p->offset) && (p->offset < end)) {
3761 assert(!p->cleaning && !p->pageout);
3762 if (!p->fictitious)
3763 pmap_disconnect(p->phys_page);
3764 VM_PAGE_FREE(p);
3765 }
3766 p = next;
3767 }
3768 }
3769 }
3770
3771
3772 /*
3773 * Routine: vm_object_coalesce
3774 * Function: Coalesces two objects backing up adjoining
3775 * regions of memory into a single object.
3776 *
3777 * returns TRUE if objects were combined.
3778 *
3779 * NOTE: Only works at the moment if the second object is NULL -
3780 * if it's not, which object do we lock first?
3781 *
3782 * Parameters:
3783 * prev_object First object to coalesce
3784 * prev_offset Offset into prev_object
3785 * next_object Second object into coalesce
3786 * next_offset Offset into next_object
3787 *
3788 * prev_size Size of reference to prev_object
3789 * next_size Size of reference to next_object
3790 *
3791 * Conditions:
3792 * The object(s) must *not* be locked. The map must be locked
3793 * to preserve the reference to the object(s).
3794 */
3795 static int vm_object_coalesce_count = 0;
3796
3797 __private_extern__ boolean_t
3798 vm_object_coalesce(
3799 register vm_object_t prev_object,
3800 vm_object_t next_object,
3801 vm_object_offset_t prev_offset,
3802 __unused vm_object_offset_t next_offset,
3803 vm_object_size_t prev_size,
3804 vm_object_size_t next_size)
3805 {
3806 vm_object_size_t newsize;
3807
3808 #ifdef lint
3809 next_offset++;
3810 #endif /* lint */
3811
3812 if (next_object != VM_OBJECT_NULL) {
3813 return(FALSE);
3814 }
3815
3816 if (prev_object == VM_OBJECT_NULL) {
3817 return(TRUE);
3818 }
3819
3820 XPR(XPR_VM_OBJECT,
3821 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3822 (integer_t)prev_object, prev_offset, prev_size, next_size, 0);
3823
3824 vm_object_lock(prev_object);
3825
3826 /*
3827 * Try to collapse the object first
3828 */
3829 vm_object_collapse(prev_object, prev_offset, TRUE);
3830
3831 /*
3832 * Can't coalesce if pages not mapped to
3833 * prev_entry may be in use any way:
3834 * . more than one reference
3835 * . paged out
3836 * . shadows another object
3837 * . has a copy elsewhere
3838 * . is purgable
3839 * . paging references (pages might be in page-list)
3840 */
3841
3842 if ((prev_object->ref_count > 1) ||
3843 prev_object->pager_created ||
3844 (prev_object->shadow != VM_OBJECT_NULL) ||
3845 (prev_object->copy != VM_OBJECT_NULL) ||
3846 (prev_object->true_share != FALSE) ||
3847 (prev_object->purgable != VM_OBJECT_NONPURGABLE) ||
3848 (prev_object->paging_in_progress != 0)) {
3849 vm_object_unlock(prev_object);
3850 return(FALSE);
3851 }
3852
3853 vm_object_coalesce_count++;
3854
3855 /*
3856 * Remove any pages that may still be in the object from
3857 * a previous deallocation.
3858 */
3859 vm_object_page_remove(prev_object,
3860 prev_offset + prev_size,
3861 prev_offset + prev_size + next_size);
3862
3863 /*
3864 * Extend the object if necessary.
3865 */
3866 newsize = prev_offset + prev_size + next_size;
3867 if (newsize > prev_object->size) {
3868 #if MACH_PAGEMAP
3869 /*
3870 * We cannot extend an object that has existence info,
3871 * since the existence info might then fail to cover
3872 * the entire object.
3873 *
3874 * This assertion must be true because the object
3875 * has no pager, and we only create existence info
3876 * for objects with pagers.
3877 */
3878 assert(prev_object->existence_map == VM_EXTERNAL_NULL);
3879 #endif /* MACH_PAGEMAP */
3880 prev_object->size = newsize;
3881 }
3882
3883 vm_object_unlock(prev_object);
3884 return(TRUE);
3885 }
3886
3887 /*
3888 * Attach a set of physical pages to an object, so that they can
3889 * be mapped by mapping the object. Typically used to map IO memory.
3890 *
3891 * The mapping function and its private data are used to obtain the
3892 * physical addresses for each page to be mapped.
3893 */
3894 void
3895 vm_object_page_map(
3896 vm_object_t object,
3897 vm_object_offset_t offset,
3898 vm_object_size_t size,
3899 vm_object_offset_t (*map_fn)(void *map_fn_data,
3900 vm_object_offset_t offset),
3901 void *map_fn_data) /* private to map_fn */
3902 {
3903 int num_pages;
3904 int i;
3905 vm_page_t m;
3906 vm_page_t old_page;
3907 vm_object_offset_t addr;
3908
3909 num_pages = atop_64(size);
3910
3911 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) {
3912
3913 addr = (*map_fn)(map_fn_data, offset);
3914
3915 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL)
3916 vm_page_more_fictitious();
3917
3918 vm_object_lock(object);
3919 if ((old_page = vm_page_lookup(object, offset))
3920 != VM_PAGE_NULL)
3921 {
3922 vm_page_lock_queues();
3923 vm_page_free(old_page);
3924 vm_page_unlock_queues();
3925 }
3926
3927 vm_page_init(m, addr);
3928 /* private normally requires lock_queues but since we */
3929 /* are initializing the page, its not necessary here */
3930 m->private = TRUE; /* don`t free page */
3931 m->wire_count = 1;
3932 vm_page_insert(m, object, offset);
3933
3934 PAGE_WAKEUP_DONE(m);
3935 vm_object_unlock(object);
3936 }
3937 }
3938
3939 #include <mach_kdb.h>
3940
3941 #if MACH_KDB
3942 #include <ddb/db_output.h>
3943 #include <vm/vm_print.h>
3944
3945 #define printf kdbprintf
3946
3947 extern boolean_t vm_object_cached(
3948 vm_object_t object);
3949
3950 extern void print_bitstring(
3951 char byte);
3952
3953 boolean_t vm_object_print_pages = FALSE;
3954
3955 void
3956 print_bitstring(
3957 char byte)
3958 {
3959 printf("%c%c%c%c%c%c%c%c",
3960 ((byte & (1 << 0)) ? '1' : '0'),
3961 ((byte & (1 << 1)) ? '1' : '0'),
3962 ((byte & (1 << 2)) ? '1' : '0'),
3963 ((byte & (1 << 3)) ? '1' : '0'),
3964 ((byte & (1 << 4)) ? '1' : '0'),
3965 ((byte & (1 << 5)) ? '1' : '0'),
3966 ((byte & (1 << 6)) ? '1' : '0'),
3967 ((byte & (1 << 7)) ? '1' : '0'));
3968 }
3969
3970 boolean_t
3971 vm_object_cached(
3972 register vm_object_t object)
3973 {
3974 register vm_object_t o;
3975
3976 queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) {
3977 if (object == o) {
3978 return TRUE;
3979 }
3980 }
3981 return FALSE;
3982 }
3983
3984 #if MACH_PAGEMAP
3985 /*
3986 * vm_external_print: [ debug ]
3987 */
3988 void
3989 vm_external_print(
3990 vm_external_map_t emap,
3991 vm_size_t size)
3992 {
3993 if (emap == VM_EXTERNAL_NULL) {
3994 printf("0 ");
3995 } else {
3996 vm_size_t existence_size = stob(size);
3997 printf("{ size=%d, map=[", existence_size);
3998 if (existence_size > 0) {
3999 print_bitstring(emap[0]);
4000 }
4001 if (existence_size > 1) {
4002 print_bitstring(emap[1]);
4003 }
4004 if (existence_size > 2) {
4005 printf("...");
4006 print_bitstring(emap[existence_size-1]);
4007 }
4008 printf("] }\n");
4009 }
4010 return;
4011 }
4012 #endif /* MACH_PAGEMAP */
4013
4014 int
4015 vm_follow_object(
4016 vm_object_t object)
4017 {
4018 int count = 0;
4019 int orig_db_indent = db_indent;
4020
4021 while (TRUE) {
4022 if (object == VM_OBJECT_NULL) {
4023 db_indent = orig_db_indent;
4024 return count;
4025 }
4026
4027 count += 1;
4028
4029 iprintf("object 0x%x", object);
4030 printf(", shadow=0x%x", object->shadow);
4031 printf(", copy=0x%x", object->copy);
4032 printf(", pager=0x%x", object->pager);
4033 printf(", ref=%d\n", object->ref_count);
4034
4035 db_indent += 2;
4036 object = object->shadow;
4037 }
4038
4039 }
4040
4041 /*
4042 * vm_object_print: [ debug ]
4043 */
4044 void
4045 vm_object_print(
4046 db_addr_t db_addr,
4047 __unused boolean_t have_addr,
4048 __unused int arg_count,
4049 __unused char *modif)
4050 {
4051 vm_object_t object;
4052 register vm_page_t p;
4053 const char *s;
4054
4055 register int count;
4056
4057 object = (vm_object_t) (long) db_addr;
4058 if (object == VM_OBJECT_NULL)
4059 return;
4060
4061 iprintf("object 0x%x\n", object);
4062
4063 db_indent += 2;
4064
4065 iprintf("size=0x%x", object->size);
4066 printf(", cluster=0x%x", object->cluster_size);
4067 printf(", memq_hint=%p", object->memq_hint);
4068 printf(", ref_count=%d\n", object->ref_count);
4069 iprintf("");
4070 #if TASK_SWAPPER
4071 printf("res_count=%d, ", object->res_count);
4072 #endif /* TASK_SWAPPER */
4073 printf("resident_page_count=%d\n", object->resident_page_count);
4074
4075 iprintf("shadow=0x%x", object->shadow);
4076 if (object->shadow) {
4077 register int i = 0;
4078 vm_object_t shadow = object;
4079 while((shadow = shadow->shadow))
4080 i++;
4081 printf(" (depth %d)", i);
4082 }
4083 printf(", copy=0x%x", object->copy);
4084 printf(", shadow_offset=0x%x", object->shadow_offset);
4085 printf(", last_alloc=0x%x\n", object->last_alloc);
4086
4087 iprintf("pager=0x%x", object->pager);
4088 printf(", paging_offset=0x%x", object->paging_offset);
4089 printf(", pager_control=0x%x\n", object->pager_control);
4090
4091 iprintf("copy_strategy=%d[", object->copy_strategy);
4092 switch (object->copy_strategy) {
4093 case MEMORY_OBJECT_COPY_NONE:
4094 printf("copy_none");
4095 break;
4096
4097 case MEMORY_OBJECT_COPY_CALL:
4098 printf("copy_call");
4099 break;
4100
4101 case MEMORY_OBJECT_COPY_DELAY:
4102 printf("copy_delay");
4103 break;
4104
4105 case MEMORY_OBJECT_COPY_SYMMETRIC:
4106 printf("copy_symmetric");
4107 break;
4108
4109 case MEMORY_OBJECT_COPY_INVALID:
4110 printf("copy_invalid");
4111 break;
4112
4113 default:
4114 printf("?");
4115 }
4116 printf("]");
4117 printf(", absent_count=%d\n", object->absent_count);
4118
4119 iprintf("all_wanted=0x%x<", object->all_wanted);
4120 s = "";
4121 if (vm_object_wanted(object, VM_OBJECT_EVENT_INITIALIZED)) {
4122 printf("%sinit", s);
4123 s = ",";
4124 }
4125 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGER_READY)) {
4126 printf("%sready", s);
4127 s = ",";
4128 }
4129 if (vm_object_wanted(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS)) {
4130 printf("%spaging", s);
4131 s = ",";
4132 }
4133 if (vm_object_wanted(object, VM_OBJECT_EVENT_ABSENT_COUNT)) {
4134 printf("%sabsent", s);
4135 s = ",";
4136 }
4137 if (vm_object_wanted(object, VM_OBJECT_EVENT_LOCK_IN_PROGRESS)) {
4138 printf("%slock", s);
4139 s = ",";
4140 }
4141 if (vm_object_wanted(object, VM_OBJECT_EVENT_UNCACHING)) {
4142 printf("%suncaching", s);
4143 s = ",";
4144 }
4145 if (vm_object_wanted(object, VM_OBJECT_EVENT_COPY_CALL)) {
4146 printf("%scopy_call", s);
4147 s = ",";
4148 }
4149 if (vm_object_wanted(object, VM_OBJECT_EVENT_CACHING)) {
4150 printf("%scaching", s);
4151 s = ",";
4152 }
4153 printf(">");
4154 printf(", paging_in_progress=%d\n", object->paging_in_progress);
4155
4156 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4157 (object->pager_created ? "" : "!"),
4158 (object->pager_initialized ? "" : "!"),
4159 (object->pager_ready ? "" : "!"),
4160 (object->can_persist ? "" : "!"),
4161 (object->pager_trusted ? "" : "!"),
4162 (object->pageout ? "" : "!"),
4163 (object->internal ? "internal" : "external"),
4164 (object->temporary ? "temporary" : "permanent"));
4165 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
4166 (object->alive ? "" : "!"),
4167 ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"),
4168 ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"),
4169 ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"),
4170 (object->shadowed ? "" : "!"),
4171 (vm_object_cached(object) ? "" : "!"),
4172 (object->private ? "" : "!"));
4173 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4174 (object->advisory_pageout ? "" : "!"),
4175 (object->silent_overwrite ? "" : "!"));
4176
4177 #if MACH_PAGEMAP
4178 iprintf("existence_map=");
4179 vm_external_print(object->existence_map, object->size);
4180 #endif /* MACH_PAGEMAP */
4181 #if MACH_ASSERT
4182 iprintf("paging_object=0x%x\n", object->paging_object);
4183 #endif /* MACH_ASSERT */
4184
4185 if (vm_object_print_pages) {
4186 count = 0;
4187 p = (vm_page_t) queue_first(&object->memq);
4188 while (!queue_end(&object->memq, (queue_entry_t) p)) {
4189 if (count == 0) {
4190 iprintf("memory:=");
4191 } else if (count == 2) {
4192 printf("\n");
4193 iprintf(" ...");
4194 count = 0;
4195 } else {
4196 printf(",");
4197 }
4198 count++;
4199
4200 printf("(off=0x%llX,page=%p)", p->offset, p);
4201 p = (vm_page_t) queue_next(&p->listq);
4202 }
4203 if (count != 0) {
4204 printf("\n");
4205 }
4206 }
4207 db_indent -= 2;
4208 }
4209
4210
4211 /*
4212 * vm_object_find [ debug ]
4213 *
4214 * Find all tasks which reference the given vm_object.
4215 */
4216
4217 boolean_t vm_object_find(vm_object_t object);
4218 boolean_t vm_object_print_verbose = FALSE;
4219
4220 boolean_t
4221 vm_object_find(
4222 vm_object_t object)
4223 {
4224 task_t task;
4225 vm_map_t map;
4226 vm_map_entry_t entry;
4227 processor_set_t pset = &default_pset;
4228 boolean_t found = FALSE;
4229
4230 queue_iterate(&pset->tasks, task, task_t, pset_tasks) {
4231 map = task->map;
4232 for (entry = vm_map_first_entry(map);
4233 entry && entry != vm_map_to_entry(map);
4234 entry = entry->vme_next) {
4235
4236 vm_object_t obj;
4237
4238 /*
4239 * For the time being skip submaps,
4240 * only the kernel can have submaps,
4241 * and unless we are interested in
4242 * kernel objects, we can simply skip
4243 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4244 * for a full solution.
4245 */
4246 if (entry->is_sub_map)
4247 continue;
4248 if (entry)
4249 obj = entry->object.vm_object;
4250 else
4251 continue;
4252
4253 while (obj != VM_OBJECT_NULL) {
4254 if (obj == object) {
4255 if (!found) {
4256 printf("TASK\t\tMAP\t\tENTRY\n");
4257 found = TRUE;
4258 }
4259 printf("0x%x\t0x%x\t0x%x\n",
4260 task, map, entry);
4261 }
4262 obj = obj->shadow;
4263 }
4264 }
4265 }
4266
4267 return(found);
4268 }
4269
4270 #endif /* MACH_KDB */
4271
4272 kern_return_t
4273 vm_object_populate_with_private(
4274 vm_object_t object,
4275 vm_object_offset_t offset,
4276 ppnum_t phys_page,
4277 vm_size_t size)
4278 {
4279 ppnum_t base_page;
4280 vm_object_offset_t base_offset;
4281
4282
4283 if(!object->private)
4284 return KERN_FAILURE;
4285
4286 base_page = phys_page;
4287
4288 vm_object_lock(object);
4289 if(!object->phys_contiguous) {
4290 vm_page_t m;
4291 if((base_offset = trunc_page_64(offset)) != offset) {
4292 vm_object_unlock(object);
4293 return KERN_FAILURE;
4294 }
4295 base_offset += object->paging_offset;
4296 while(size) {
4297 m = vm_page_lookup(object, base_offset);
4298 if(m != VM_PAGE_NULL) {
4299 if(m->fictitious) {
4300 vm_page_lock_queues();
4301 m->fictitious = FALSE;
4302 m->private = TRUE;
4303 m->phys_page = base_page;
4304 if(!m->busy) {
4305 m->busy = TRUE;
4306 }
4307 if(!m->absent) {
4308 m->absent = TRUE;
4309 object->absent_count++;
4310 }
4311 m->list_req_pending = TRUE;
4312 vm_page_unlock_queues();
4313 } else if (m->phys_page != base_page) {
4314 /* pmap call to clear old mapping */
4315 pmap_disconnect(m->phys_page);
4316 m->phys_page = base_page;
4317 }
4318
4319 /*
4320 * ENCRYPTED SWAP:
4321 * We're not pointing to the same
4322 * physical page any longer and the
4323 * contents of the new one are not
4324 * supposed to be encrypted.
4325 * XXX What happens to the original
4326 * physical page. Is it lost ?
4327 */
4328 m->encrypted = FALSE;
4329
4330 } else {
4331 while ((m = vm_page_grab_fictitious())
4332 == VM_PAGE_NULL)
4333 vm_page_more_fictitious();
4334 vm_page_lock_queues();
4335 m->fictitious = FALSE;
4336 m->private = TRUE;
4337 m->phys_page = base_page;
4338 m->list_req_pending = TRUE;
4339 m->absent = TRUE;
4340 m->unusual = TRUE;
4341 object->absent_count++;
4342 vm_page_unlock_queues();
4343 vm_page_insert(m, object, base_offset);
4344 }
4345 base_page++; /* Go to the next physical page */
4346 base_offset += PAGE_SIZE;
4347 size -= PAGE_SIZE;
4348 }
4349 } else {
4350 /* NOTE: we should check the original settings here */
4351 /* if we have a size > zero a pmap call should be made */
4352 /* to disable the range */
4353
4354 /* pmap_? */
4355
4356 /* shadows on contiguous memory are not allowed */
4357 /* we therefore can use the offset field */
4358 object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
4359 object->size = size;
4360 }
4361 vm_object_unlock(object);
4362 return KERN_SUCCESS;
4363 }
4364
4365 /*
4366 * memory_object_free_from_cache:
4367 *
4368 * Walk the vm_object cache list, removing and freeing vm_objects
4369 * which are backed by the pager identified by the caller, (pager_ops).
4370 * Remove up to "count" objects, if there are that may available
4371 * in the cache.
4372 *
4373 * Walk the list at most once, return the number of vm_objects
4374 * actually freed.
4375 */
4376
4377 __private_extern__ kern_return_t
4378 memory_object_free_from_cache(
4379 __unused host_t host,
4380 memory_object_pager_ops_t pager_ops,
4381 int *count)
4382 {
4383
4384 int object_released = 0;
4385
4386 register vm_object_t object = VM_OBJECT_NULL;
4387 vm_object_t shadow;
4388
4389 /*
4390 if(host == HOST_NULL)
4391 return(KERN_INVALID_ARGUMENT);
4392 */
4393
4394 try_again:
4395 vm_object_cache_lock();
4396
4397 queue_iterate(&vm_object_cached_list, object,
4398 vm_object_t, cached_list) {
4399 if (object->pager &&
4400 (pager_ops == object->pager->mo_pager_ops)) {
4401 vm_object_lock(object);
4402 queue_remove(&vm_object_cached_list, object,
4403 vm_object_t, cached_list);
4404 vm_object_cached_count--;
4405
4406 /*
4407 * Since this object is in the cache, we know
4408 * that it is initialized and has only a pager's
4409 * (implicit) reference. Take a reference to avoid
4410 * recursive deallocations.
4411 */
4412
4413 assert(object->pager_initialized);
4414 assert(object->ref_count == 0);
4415 object->ref_count++;
4416
4417 /*
4418 * Terminate the object.
4419 * If the object had a shadow, we let
4420 * vm_object_deallocate deallocate it.
4421 * "pageout" objects have a shadow, but
4422 * maintain a "paging reference" rather
4423 * than a normal reference.
4424 * (We are careful here to limit recursion.)
4425 */
4426 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4427 if ((vm_object_terminate(object) == KERN_SUCCESS)
4428 && (shadow != VM_OBJECT_NULL)) {
4429 vm_object_deallocate(shadow);
4430 }
4431
4432 if(object_released++ == *count)
4433 return KERN_SUCCESS;
4434 goto try_again;
4435 }
4436 }
4437 vm_object_cache_unlock();
4438 *count = object_released;
4439 return KERN_SUCCESS;
4440 }
4441
4442
4443
4444 kern_return_t
4445 memory_object_create_named(
4446 memory_object_t pager,
4447 memory_object_offset_t size,
4448 memory_object_control_t *control)
4449 {
4450 vm_object_t object;
4451 vm_object_hash_entry_t entry;
4452
4453 *control = MEMORY_OBJECT_CONTROL_NULL;
4454 if (pager == MEMORY_OBJECT_NULL)
4455 return KERN_INVALID_ARGUMENT;
4456
4457 vm_object_cache_lock();
4458 entry = vm_object_hash_lookup(pager, FALSE);
4459 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) &&
4460 (entry->object != VM_OBJECT_NULL)) {
4461 if (entry->object->named == TRUE)
4462 panic("memory_object_create_named: caller already holds the right"); }
4463
4464 vm_object_cache_unlock();
4465 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE))
4466 == VM_OBJECT_NULL) {
4467 return(KERN_INVALID_OBJECT);
4468 }
4469
4470 /* wait for object (if any) to be ready */
4471 if (object != VM_OBJECT_NULL) {
4472 vm_object_lock(object);
4473 object->named = TRUE;
4474 while (!object->pager_ready) {
4475 vm_object_sleep(object,
4476 VM_OBJECT_EVENT_PAGER_READY,
4477 THREAD_UNINT);
4478 }
4479 *control = object->pager_control;
4480 vm_object_unlock(object);
4481 }
4482 return (KERN_SUCCESS);
4483 }
4484
4485
4486 /*
4487 * Routine: memory_object_recover_named [user interface]
4488 * Purpose:
4489 * Attempt to recover a named reference for a VM object.
4490 * VM will verify that the object has not already started
4491 * down the termination path, and if it has, will optionally
4492 * wait for that to finish.
4493 * Returns:
4494 * KERN_SUCCESS - we recovered a named reference on the object
4495 * KERN_FAILURE - we could not recover a reference (object dead)
4496 * KERN_INVALID_ARGUMENT - bad memory object control
4497 */
4498 kern_return_t
4499 memory_object_recover_named(
4500 memory_object_control_t control,
4501 boolean_t wait_on_terminating)
4502 {
4503 vm_object_t object;
4504
4505 vm_object_cache_lock();
4506 object = memory_object_control_to_vm_object(control);
4507 if (object == VM_OBJECT_NULL) {
4508 vm_object_cache_unlock();
4509 return (KERN_INVALID_ARGUMENT);
4510 }
4511
4512 restart:
4513 vm_object_lock(object);
4514
4515 if (object->terminating && wait_on_terminating) {
4516 vm_object_cache_unlock();
4517 vm_object_wait(object,
4518 VM_OBJECT_EVENT_PAGING_IN_PROGRESS,
4519 THREAD_UNINT);
4520 vm_object_cache_lock();
4521 goto restart;
4522 }
4523
4524 if (!object->alive) {
4525 vm_object_cache_unlock();
4526 vm_object_unlock(object);
4527 return KERN_FAILURE;
4528 }
4529
4530 if (object->named == TRUE) {
4531 vm_object_cache_unlock();
4532 vm_object_unlock(object);
4533 return KERN_SUCCESS;
4534 }
4535
4536 if((object->ref_count == 0) && (!object->terminating)){
4537 queue_remove(&vm_object_cached_list, object,
4538 vm_object_t, cached_list);
4539 vm_object_cached_count--;
4540 XPR(XPR_VM_OBJECT_CACHE,
4541 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4542 (integer_t)object,
4543 (integer_t)vm_object_cached_list.next,
4544 (integer_t)vm_object_cached_list.prev, 0,0);
4545 }
4546
4547 vm_object_cache_unlock();
4548
4549 object->named = TRUE;
4550 object->ref_count++;
4551 vm_object_res_reference(object);
4552 while (!object->pager_ready) {
4553 vm_object_sleep(object,
4554 VM_OBJECT_EVENT_PAGER_READY,
4555 THREAD_UNINT);
4556 }
4557 vm_object_unlock(object);
4558 return (KERN_SUCCESS);
4559 }
4560
4561
4562 /*
4563 * vm_object_release_name:
4564 *
4565 * Enforces name semantic on memory_object reference count decrement
4566 * This routine should not be called unless the caller holds a name
4567 * reference gained through the memory_object_create_named.
4568 *
4569 * If the TERMINATE_IDLE flag is set, the call will return if the
4570 * reference count is not 1. i.e. idle with the only remaining reference
4571 * being the name.
4572 * If the decision is made to proceed the name field flag is set to
4573 * false and the reference count is decremented. If the RESPECT_CACHE
4574 * flag is set and the reference count has gone to zero, the
4575 * memory_object is checked to see if it is cacheable otherwise when
4576 * the reference count is zero, it is simply terminated.
4577 */
4578
4579 __private_extern__ kern_return_t
4580 vm_object_release_name(
4581 vm_object_t object,
4582 int flags)
4583 {
4584 vm_object_t shadow;
4585 boolean_t original_object = TRUE;
4586
4587 while (object != VM_OBJECT_NULL) {
4588
4589 /*
4590 * The cache holds a reference (uncounted) to
4591 * the object. We must locke it before removing
4592 * the object.
4593 *
4594 */
4595
4596 vm_object_cache_lock();
4597 vm_object_lock(object);
4598 assert(object->alive);
4599 if(original_object)
4600 assert(object->named);
4601 assert(object->ref_count > 0);
4602
4603 /*
4604 * We have to wait for initialization before
4605 * destroying or caching the object.
4606 */
4607
4608 if (object->pager_created && !object->pager_initialized) {
4609 assert(!object->can_persist);
4610 vm_object_assert_wait(object,
4611 VM_OBJECT_EVENT_INITIALIZED,
4612 THREAD_UNINT);
4613 vm_object_unlock(object);
4614 vm_object_cache_unlock();
4615 thread_block(THREAD_CONTINUE_NULL);
4616 continue;
4617 }
4618
4619 if (((object->ref_count > 1)
4620 && (flags & MEMORY_OBJECT_TERMINATE_IDLE))
4621 || (object->terminating)) {
4622 vm_object_unlock(object);
4623 vm_object_cache_unlock();
4624 return KERN_FAILURE;
4625 } else {
4626 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) {
4627 vm_object_unlock(object);
4628 vm_object_cache_unlock();
4629 return KERN_SUCCESS;
4630 }
4631 }
4632
4633 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) &&
4634 (object->ref_count == 1)) {
4635 if(original_object)
4636 object->named = FALSE;
4637 vm_object_unlock(object);
4638 vm_object_cache_unlock();
4639 /* let vm_object_deallocate push this thing into */
4640 /* the cache, if that it is where it is bound */
4641 vm_object_deallocate(object);
4642 return KERN_SUCCESS;
4643 }
4644 VM_OBJ_RES_DECR(object);
4645 shadow = object->pageout?VM_OBJECT_NULL:object->shadow;
4646 if(object->ref_count == 1) {
4647 if(vm_object_terminate(object) != KERN_SUCCESS) {
4648 if(original_object) {
4649 return KERN_FAILURE;
4650 } else {
4651 return KERN_SUCCESS;
4652 }
4653 }
4654 if (shadow != VM_OBJECT_NULL) {
4655 original_object = FALSE;
4656 object = shadow;
4657 continue;
4658 }
4659 return KERN_SUCCESS;
4660 } else {
4661 object->ref_count--;
4662 assert(object->ref_count > 0);
4663 if(original_object)
4664 object->named = FALSE;
4665 vm_object_unlock(object);
4666 vm_object_cache_unlock();
4667 return KERN_SUCCESS;
4668 }
4669 }
4670 /*NOTREACHED*/
4671 assert(0);
4672 return KERN_FAILURE;
4673 }
4674
4675
4676 __private_extern__ kern_return_t
4677 vm_object_lock_request(
4678 vm_object_t object,
4679 vm_object_offset_t offset,
4680 vm_object_size_t size,
4681 memory_object_return_t should_return,
4682 int flags,
4683 vm_prot_t prot)
4684 {
4685 __unused boolean_t should_flush;
4686
4687 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
4688
4689 XPR(XPR_MEMORY_OBJECT,
4690 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4691 (integer_t)object, offset, size,
4692 (((should_return&1)<<1)|should_flush), prot);
4693
4694 /*
4695 * Check for bogus arguments.
4696 */
4697 if (object == VM_OBJECT_NULL)
4698 return (KERN_INVALID_ARGUMENT);
4699
4700 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
4701 return (KERN_INVALID_ARGUMENT);
4702
4703 size = round_page_64(size);
4704
4705 /*
4706 * Lock the object, and acquire a paging reference to
4707 * prevent the memory_object reference from being released.
4708 */
4709 vm_object_lock(object);
4710 vm_object_paging_begin(object);
4711
4712 (void)vm_object_update(object,
4713 offset, size, NULL, NULL, should_return, flags, prot);
4714
4715 vm_object_paging_end(object);
4716 vm_object_unlock(object);
4717
4718 return (KERN_SUCCESS);
4719 }
4720
4721 /*
4722 * Empty a purgable object by grabbing the physical pages assigned to it and
4723 * putting them on the free queue without writing them to backing store, etc.
4724 * When the pages are next touched they will be demand zero-fill pages. We
4725 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4726 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4727 * than happy to grab these since this is a purgable object. We mark the
4728 * object as "empty" after reaping its pages.
4729 *
4730 * On entry the object and page queues are locked, the object must be a
4731 * purgable object with no delayed copies pending.
4732 */
4733 unsigned int
4734 vm_object_purge(vm_object_t object)
4735 {
4736 vm_page_t p, next;
4737 unsigned int num_purged_pages;
4738 vm_page_t local_freeq;
4739 unsigned long local_freed;
4740 int purge_loop_quota;
4741 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4742 #define PURGE_BATCH_FREE_LIMIT 50
4743 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
4744 #define PURGE_LOOP_QUOTA 100
4745
4746 num_purged_pages = 0;
4747 if (object->purgable == VM_OBJECT_NONPURGABLE)
4748 return num_purged_pages;
4749
4750 object->purgable = VM_OBJECT_PURGABLE_EMPTY;
4751
4752 assert(object->copy == VM_OBJECT_NULL);
4753 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4754 purge_loop_quota = PURGE_LOOP_QUOTA;
4755
4756 local_freeq = VM_PAGE_NULL;
4757 local_freed = 0;
4758
4759 /*
4760 * Go through the object's resident pages and try and discard them.
4761 */
4762 next = (vm_page_t)queue_first(&object->memq);
4763 while (!queue_end(&object->memq, (queue_entry_t)next)) {
4764 p = next;
4765 next = (vm_page_t)queue_next(&next->listq);
4766
4767 if (purge_loop_quota-- == 0) {
4768 /*
4769 * Avoid holding the page queues lock for too long.
4770 * Let someone else take it for a while if needed.
4771 * Keep holding the object's lock to guarantee that
4772 * the object's page list doesn't change under us
4773 * while we yield.
4774 */
4775 if (local_freeq != VM_PAGE_NULL) {
4776 /*
4777 * Flush our queue of pages to free.
4778 */
4779 vm_page_free_list(local_freeq);
4780 local_freeq = VM_PAGE_NULL;
4781 local_freed = 0;
4782 }
4783 vm_page_unlock_queues();
4784 mutex_pause();
4785 vm_page_lock_queues();
4786
4787 /* resume with the current page and a new quota */
4788 purge_loop_quota = PURGE_LOOP_QUOTA;
4789 }
4790
4791
4792 if (p->busy || p->cleaning || p->laundry ||
4793 p->list_req_pending) {
4794 /* page is being acted upon, so don't mess with it */
4795 continue;
4796 }
4797 if (p->wire_count) {
4798 /* don't discard a wired page */
4799 continue;
4800 }
4801
4802 if (p->tabled) {
4803 /* clean up the object/offset table */
4804 vm_page_remove(p);
4805 }
4806 if (p->absent) {
4807 /* update the object's count of absent pages */
4808 vm_object_absent_release(object);
4809 }
4810
4811 /* we can discard this page */
4812
4813 /* advertize that this page is in a transition state */
4814 p->busy = TRUE;
4815
4816 if (p->no_isync == TRUE) {
4817 /* the page hasn't been mapped yet */
4818 /* (optimization to delay the i-cache sync) */
4819 } else {
4820 /* unmap the page */
4821 int refmod_state;
4822
4823 refmod_state = pmap_disconnect(p->phys_page);
4824 if (refmod_state & VM_MEM_MODIFIED) {
4825 p->dirty = TRUE;
4826 }
4827 }
4828
4829 if (p->dirty || p->precious) {
4830 /* we saved the cost of cleaning this page ! */
4831 num_purged_pages++;
4832 vm_page_purged_count++;
4833 }
4834
4835 /* remove page from active or inactive queue... */
4836 VM_PAGE_QUEUES_REMOVE(p);
4837
4838 /* ... and put it on our queue of pages to free */
4839 assert(!p->laundry);
4840 assert(p->object != kernel_object);
4841 assert(p->pageq.next == NULL &&
4842 p->pageq.prev == NULL);
4843 p->pageq.next = (queue_entry_t) local_freeq;
4844 local_freeq = p;
4845 if (++local_freed >= PURGE_BATCH_FREE_LIMIT) {
4846 /* flush our queue of pages to free */
4847 vm_page_free_list(local_freeq);
4848 local_freeq = VM_PAGE_NULL;
4849 local_freed = 0;
4850 }
4851 }
4852
4853 /* flush our local queue of pages to free one last time */
4854 if (local_freeq != VM_PAGE_NULL) {
4855 vm_page_free_list(local_freeq);
4856 local_freeq = VM_PAGE_NULL;
4857 local_freed = 0;
4858 }
4859
4860 return num_purged_pages;
4861 }
4862
4863 /*
4864 * vm_object_purgable_control() allows the caller to control and investigate the
4865 * state of a purgable object. A purgable object is created via a call to
4866 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
4867 * never be coalesced with any other object -- even other purgable objects --
4868 * and will thus always remain a distinct object. A purgable object has
4869 * special semantics when its reference count is exactly 1. If its reference
4870 * count is greater than 1, then a purgable object will behave like a normal
4871 * object and attempts to use this interface will result in an error return
4872 * of KERN_INVALID_ARGUMENT.
4873 *
4874 * A purgable object may be put into a "volatile" state which will make the
4875 * object's pages elligable for being reclaimed without paging to backing
4876 * store if the system runs low on memory. If the pages in a volatile
4877 * purgable object are reclaimed, the purgable object is said to have been
4878 * "emptied." When a purgable object is emptied the system will reclaim as
4879 * many pages from the object as it can in a convenient manner (pages already
4880 * en route to backing store or busy for other reasons are left as is). When
4881 * a purgable object is made volatile, its pages will generally be reclaimed
4882 * before other pages in the application's working set. This semantic is
4883 * generally used by applications which can recreate the data in the object
4884 * faster than it can be paged in. One such example might be media assets
4885 * which can be reread from a much faster RAID volume.
4886 *
4887 * A purgable object may be designated as "non-volatile" which means it will
4888 * behave like all other objects in the system with pages being written to and
4889 * read from backing store as needed to satisfy system memory needs. If the
4890 * object was emptied before the object was made non-volatile, that fact will
4891 * be returned as the old state of the purgable object (see
4892 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
4893 * were reclaimed as part of emptying the object will be refaulted in as
4894 * zero-fill on demand. It is up to the application to note that an object
4895 * was emptied and recreate the objects contents if necessary. When a
4896 * purgable object is made non-volatile, its pages will generally not be paged
4897 * out to backing store in the immediate future. A purgable object may also
4898 * be manually emptied.
4899 *
4900 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
4901 * volatile purgable object may be queried at any time. This information may
4902 * be used as a control input to let the application know when the system is
4903 * experiencing memory pressure and is reclaiming memory.
4904 *
4905 * The specified address may be any address within the purgable object. If
4906 * the specified address does not represent any object in the target task's
4907 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
4908 * object containing the specified address is not a purgable object, then
4909 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
4910 * returned.
4911 *
4912 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
4913 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
4914 * state is used to set the new state of the purgable object and return its
4915 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
4916 * object is returned in the parameter state.
4917 *
4918 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
4919 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
4920 * the non-volatile, volatile and volatile/empty states described above.
4921 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
4922 * immediately reclaim as many pages in the object as can be conveniently
4923 * collected (some may have already been written to backing store or be
4924 * otherwise busy).
4925 *
4926 * The process of making a purgable object non-volatile and determining its
4927 * previous state is atomic. Thus, if a purgable object is made
4928 * VM_PURGABLE_NONVOLATILE and the old state is returned as
4929 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
4930 * completely intact and will remain so until the object is made volatile
4931 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
4932 * was reclaimed while it was in a volatile state and its previous contents
4933 * have been lost.
4934 */
4935 /*
4936 * The object must be locked.
4937 */
4938 kern_return_t
4939 vm_object_purgable_control(
4940 vm_object_t object,
4941 vm_purgable_t control,
4942 int *state)
4943 {
4944 int old_state;
4945 vm_page_t p;
4946
4947 if (object == VM_OBJECT_NULL) {
4948 /*
4949 * Object must already be present or it can't be purgable.
4950 */
4951 return KERN_INVALID_ARGUMENT;
4952 }
4953
4954 /*
4955 * Get current state of the purgable object.
4956 */
4957 switch (object->purgable) {
4958 case VM_OBJECT_NONPURGABLE:
4959 return KERN_INVALID_ARGUMENT;
4960
4961 case VM_OBJECT_PURGABLE_NONVOLATILE:
4962 old_state = VM_PURGABLE_NONVOLATILE;
4963 break;
4964
4965 case VM_OBJECT_PURGABLE_VOLATILE:
4966 old_state = VM_PURGABLE_VOLATILE;
4967 break;
4968
4969 case VM_OBJECT_PURGABLE_EMPTY:
4970 old_state = VM_PURGABLE_EMPTY;
4971 break;
4972
4973 default:
4974 old_state = VM_PURGABLE_NONVOLATILE;
4975 panic("Bad state (%d) for purgable object!\n",
4976 object->purgable);
4977 /*NOTREACHED*/
4978 }
4979
4980 /* purgable cant have delayed copies - now or in the future */
4981 assert(object->copy == VM_OBJECT_NULL);
4982 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE);
4983
4984 /*
4985 * Execute the desired operation.
4986 */
4987 if (control == VM_PURGABLE_GET_STATE) {
4988 *state = old_state;
4989 return KERN_SUCCESS;
4990 }
4991
4992 switch (*state) {
4993 case VM_PURGABLE_NONVOLATILE:
4994 vm_page_lock_queues();
4995 if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) {
4996 assert(vm_page_purgeable_count >=
4997 object->resident_page_count);
4998 vm_page_purgeable_count -= object->resident_page_count;
4999 }
5000
5001 object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE;
5002
5003 /*
5004 * If the object wasn't emptied, then mark all pages of the
5005 * object as referenced in order to give them a complete turn
5006 * of the virtual memory "clock" before becoming candidates
5007 * for paging out (if the system is suffering from memory
5008 * pressure). We don't really need to set the pmap reference
5009 * bits (which would be expensive) since the software copies
5010 * are believed if they're set to true ...
5011 */
5012 if (old_state != VM_PURGABLE_EMPTY) {
5013 for (p = (vm_page_t)queue_first(&object->memq);
5014 !queue_end(&object->memq, (queue_entry_t)p);
5015 p = (vm_page_t)queue_next(&p->listq))
5016 p->reference = TRUE;
5017 }
5018
5019 vm_page_unlock_queues();
5020
5021 break;
5022
5023 case VM_PURGABLE_VOLATILE:
5024 vm_page_lock_queues();
5025
5026 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5027 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5028 vm_page_purgeable_count += object->resident_page_count;
5029 }
5030
5031 object->purgable = VM_OBJECT_PURGABLE_VOLATILE;
5032
5033 /*
5034 * We want the newly volatile purgable object to be a
5035 * candidate for the pageout scan before other pages in the
5036 * application if the system is suffering from memory
5037 * pressure. To do this, we move a page of the object from
5038 * the active queue onto the inactive queue in order to
5039 * promote the object for early reclaim. We only need to move
5040 * a single page since the pageout scan will reap the entire
5041 * purgable object if it finds a single page in a volatile
5042 * state. Obviously we don't do this if there are no pages
5043 * associated with the object or we find a page of the object
5044 * already on the inactive queue.
5045 */
5046 for (p = (vm_page_t)queue_first(&object->memq);
5047 !queue_end(&object->memq, (queue_entry_t)p);
5048 p = (vm_page_t)queue_next(&p->listq)) {
5049 if (p->inactive) {
5050 /* already a page on the inactive queue */
5051 break;
5052 }
5053 if (p->active && !p->busy) {
5054 /* found one we can move */
5055 vm_page_deactivate(p);
5056 break;
5057 }
5058 }
5059 vm_page_unlock_queues();
5060
5061 break;
5062
5063
5064 case VM_PURGABLE_EMPTY:
5065 vm_page_lock_queues();
5066 if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE &&
5067 object->purgable != VM_OBJECT_PURGABLE_EMPTY) {
5068 vm_page_purgeable_count += object->resident_page_count;
5069 }
5070 (void) vm_object_purge(object);
5071 vm_page_unlock_queues();
5072 break;
5073
5074 }
5075 *state = old_state;
5076
5077 return KERN_SUCCESS;
5078 }
5079
5080 #if TASK_SWAPPER
5081 /*
5082 * vm_object_res_deallocate
5083 *
5084 * (recursively) decrement residence counts on vm objects and their shadows.
5085 * Called from vm_object_deallocate and when swapping out an object.
5086 *
5087 * The object is locked, and remains locked throughout the function,
5088 * even as we iterate down the shadow chain. Locks on intermediate objects
5089 * will be dropped, but not the original object.
5090 *
5091 * NOTE: this function used to use recursion, rather than iteration.
5092 */
5093
5094 __private_extern__ void
5095 vm_object_res_deallocate(
5096 vm_object_t object)
5097 {
5098 vm_object_t orig_object = object;
5099 /*
5100 * Object is locked so it can be called directly
5101 * from vm_object_deallocate. Original object is never
5102 * unlocked.
5103 */
5104 assert(object->res_count > 0);
5105 while (--object->res_count == 0) {
5106 assert(object->ref_count >= object->res_count);
5107 vm_object_deactivate_all_pages(object);
5108 /* iterate on shadow, if present */
5109 if (object->shadow != VM_OBJECT_NULL) {
5110 vm_object_t tmp_object = object->shadow;
5111 vm_object_lock(tmp_object);
5112 if (object != orig_object)
5113 vm_object_unlock(object);
5114 object = tmp_object;
5115 assert(object->res_count > 0);
5116 } else
5117 break;
5118 }
5119 if (object != orig_object)
5120 vm_object_unlock(object);
5121 }
5122
5123 /*
5124 * vm_object_res_reference
5125 *
5126 * Internal function to increment residence count on a vm object
5127 * and its shadows. It is called only from vm_object_reference, and
5128 * when swapping in a vm object, via vm_map_swap.
5129 *
5130 * The object is locked, and remains locked throughout the function,
5131 * even as we iterate down the shadow chain. Locks on intermediate objects
5132 * will be dropped, but not the original object.
5133 *
5134 * NOTE: this function used to use recursion, rather than iteration.
5135 */
5136
5137 __private_extern__ void
5138 vm_object_res_reference(
5139 vm_object_t object)
5140 {
5141 vm_object_t orig_object = object;
5142 /*
5143 * Object is locked, so this can be called directly
5144 * from vm_object_reference. This lock is never released.
5145 */
5146 while ((++object->res_count == 1) &&
5147 (object->shadow != VM_OBJECT_NULL)) {
5148 vm_object_t tmp_object = object->shadow;
5149
5150 assert(object->ref_count >= object->res_count);
5151 vm_object_lock(tmp_object);
5152 if (object != orig_object)
5153 vm_object_unlock(object);
5154 object = tmp_object;
5155 }
5156 if (object != orig_object)
5157 vm_object_unlock(object);
5158 assert(orig_object->ref_count >= orig_object->res_count);
5159 }
5160 #endif /* TASK_SWAPPER */
5161
5162 /*
5163 * vm_object_reference:
5164 *
5165 * Gets another reference to the given object.
5166 */
5167 #ifdef vm_object_reference
5168 #undef vm_object_reference
5169 #endif
5170 __private_extern__ void
5171 vm_object_reference(
5172 register vm_object_t object)
5173 {
5174 if (object == VM_OBJECT_NULL)
5175 return;
5176
5177 vm_object_lock(object);
5178 assert(object->ref_count > 0);
5179 vm_object_reference_locked(object);
5180 vm_object_unlock(object);
5181 }
5182
5183 #ifdef MACH_BSD
5184 /*
5185 * Scale the vm_object_cache
5186 * This is required to make sure that the vm_object_cache is big
5187 * enough to effectively cache the mapped file.
5188 * This is really important with UBC as all the regular file vnodes
5189 * have memory object associated with them. Havving this cache too
5190 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5191 *
5192 * This is also needed as number of vnodes can be dynamically scaled.
5193 */
5194 kern_return_t
5195 adjust_vm_object_cache(
5196 __unused vm_size_t oval,
5197 vm_size_t nval)
5198 {
5199 vm_object_cached_max = nval;
5200 vm_object_cache_trim(FALSE);
5201 return (KERN_SUCCESS);
5202 }
5203 #endif /* MACH_BSD */
5204
5205
5206 /*
5207 * vm_object_transpose
5208 *
5209 * This routine takes two VM objects of the same size and exchanges
5210 * their backing store.
5211 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5212 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5213 *
5214 * The VM objects must not be locked by caller.
5215 */
5216 kern_return_t
5217 vm_object_transpose(
5218 vm_object_t object1,
5219 vm_object_t object2,
5220 vm_object_size_t transpose_size)
5221 {
5222 vm_object_t tmp_object;
5223 kern_return_t retval;
5224 boolean_t object1_locked, object2_locked;
5225 boolean_t object1_paging, object2_paging;
5226 vm_page_t page;
5227 vm_object_offset_t page_offset;
5228
5229 tmp_object = VM_OBJECT_NULL;
5230 object1_locked = FALSE; object2_locked = FALSE;
5231 object1_paging = FALSE; object2_paging = FALSE;
5232
5233 if (object1 == object2 ||
5234 object1 == VM_OBJECT_NULL ||
5235 object2 == VM_OBJECT_NULL) {
5236 /*
5237 * If the 2 VM objects are the same, there's
5238 * no point in exchanging their backing store.
5239 */
5240 retval = KERN_INVALID_VALUE;
5241 goto done;
5242 }
5243
5244 vm_object_lock(object1);
5245 object1_locked = TRUE;
5246 if (object1->copy || object1->shadow || object1->shadowed ||
5247 object1->purgable != VM_OBJECT_NONPURGABLE) {
5248 /*
5249 * We don't deal with copy or shadow objects (yet).
5250 */
5251 retval = KERN_INVALID_VALUE;
5252 goto done;
5253 }
5254 /*
5255 * Since we're about to mess with the object's backing store,
5256 * mark it as "paging_in_progress". Note that this is not enough
5257 * to prevent any paging activity on this object, so the caller should
5258 * have "quiesced" the objects beforehand, via a UPL operation with
5259 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5260 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5261 */
5262 vm_object_paging_begin(object1);
5263 object1_paging = TRUE;
5264 vm_object_unlock(object1);
5265 object1_locked = FALSE;
5266
5267 /*
5268 * Same as above for the 2nd object...
5269 */
5270 vm_object_lock(object2);
5271 object2_locked = TRUE;
5272 if (object2->copy || object2->shadow || object2->shadowed ||
5273 object2->purgable != VM_OBJECT_NONPURGABLE) {
5274 retval = KERN_INVALID_VALUE;
5275 goto done;
5276 }
5277 vm_object_paging_begin(object2);
5278 object2_paging = TRUE;
5279 vm_object_unlock(object2);
5280 object2_locked = FALSE;
5281
5282 /*
5283 * Allocate a temporary VM object to hold object1's contents
5284 * while we copy object2 to object1.
5285 */
5286 tmp_object = vm_object_allocate(transpose_size);
5287 vm_object_lock(tmp_object);
5288 vm_object_paging_begin(tmp_object);
5289 tmp_object->can_persist = FALSE;
5290
5291 /*
5292 * Since we need to lock both objects at the same time,
5293 * make sure we always lock them in the same order to
5294 * avoid deadlocks.
5295 */
5296 if (object1 < object2) {
5297 vm_object_lock(object1);
5298 vm_object_lock(object2);
5299 } else {
5300 vm_object_lock(object2);
5301 vm_object_lock(object1);
5302 }
5303 object1_locked = TRUE;
5304 object2_locked = TRUE;
5305
5306 if (object1->size != object2->size ||
5307 object1->size != transpose_size) {
5308 /*
5309 * If the 2 objects don't have the same size, we can't
5310 * exchange their backing stores or one would overflow.
5311 * If their size doesn't match the caller's
5312 * "transpose_size", we can't do it either because the
5313 * transpose operation will affect the entire span of
5314 * the objects.
5315 */
5316 retval = KERN_INVALID_VALUE;
5317 goto done;
5318 }
5319
5320
5321 /*
5322 * Transpose the lists of resident pages.
5323 */
5324 if (object1->phys_contiguous || queue_empty(&object1->memq)) {
5325 /*
5326 * No pages in object1, just transfer pages
5327 * from object2 to object1. No need to go through
5328 * an intermediate object.
5329 */
5330 while (!queue_empty(&object2->memq)) {
5331 page = (vm_page_t) queue_first(&object2->memq);
5332 vm_page_rename(page, object1, page->offset);
5333 }
5334 assert(queue_empty(&object2->memq));
5335 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) {
5336 /*
5337 * No pages in object2, just transfer pages
5338 * from object1 to object2. No need to go through
5339 * an intermediate object.
5340 */
5341 while (!queue_empty(&object1->memq)) {
5342 page = (vm_page_t) queue_first(&object1->memq);
5343 vm_page_rename(page, object2, page->offset);
5344 }
5345 assert(queue_empty(&object1->memq));
5346 } else {
5347 /* transfer object1's pages to tmp_object */
5348 vm_page_lock_queues();
5349 while (!queue_empty(&object1->memq)) {
5350 page = (vm_page_t) queue_first(&object1->memq);
5351 page_offset = page->offset;
5352 vm_page_remove(page);
5353 page->offset = page_offset;
5354 queue_enter(&tmp_object->memq, page, vm_page_t, listq);
5355 }
5356 vm_page_unlock_queues();
5357 assert(queue_empty(&object1->memq));
5358 /* transfer object2's pages to object1 */
5359 while (!queue_empty(&object2->memq)) {
5360 page = (vm_page_t) queue_first(&object2->memq);
5361 vm_page_rename(page, object1, page->offset);
5362 }
5363 assert(queue_empty(&object2->memq));
5364 /* transfer tmp_object's pages to object1 */
5365 while (!queue_empty(&tmp_object->memq)) {
5366 page = (vm_page_t) queue_first(&tmp_object->memq);
5367 queue_remove(&tmp_object->memq, page,
5368 vm_page_t, listq);
5369 vm_page_insert(page, object2, page->offset);
5370 }
5371 assert(queue_empty(&tmp_object->memq));
5372 }
5373
5374 /* no need to transpose the size: they should be identical */
5375 assert(object1->size == object2->size);
5376
5377 #define __TRANSPOSE_FIELD(field) \
5378 MACRO_BEGIN \
5379 tmp_object->field = object1->field; \
5380 object1->field = object2->field; \
5381 object2->field = tmp_object->field; \
5382 MACRO_END
5383
5384 assert(!object1->copy);
5385 assert(!object2->copy);
5386
5387 assert(!object1->shadow);
5388 assert(!object2->shadow);
5389
5390 __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */
5391 __TRANSPOSE_FIELD(pager);
5392 __TRANSPOSE_FIELD(paging_offset);
5393
5394 __TRANSPOSE_FIELD(pager_control);
5395 /* update the memory_objects' pointers back to the VM objects */
5396 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5397 memory_object_control_collapse(object1->pager_control,
5398 object1);
5399 }
5400 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
5401 memory_object_control_collapse(object2->pager_control,
5402 object2);
5403 }
5404
5405 __TRANSPOSE_FIELD(absent_count);
5406
5407 assert(object1->paging_in_progress);
5408 assert(object2->paging_in_progress);
5409
5410 __TRANSPOSE_FIELD(pager_created);
5411 __TRANSPOSE_FIELD(pager_initialized);
5412 __TRANSPOSE_FIELD(pager_ready);
5413 __TRANSPOSE_FIELD(pager_trusted);
5414 __TRANSPOSE_FIELD(internal);
5415 __TRANSPOSE_FIELD(temporary);
5416 __TRANSPOSE_FIELD(private);
5417 __TRANSPOSE_FIELD(pageout);
5418 __TRANSPOSE_FIELD(true_share);
5419 __TRANSPOSE_FIELD(phys_contiguous);
5420 __TRANSPOSE_FIELD(nophyscache);
5421 __TRANSPOSE_FIELD(last_alloc);
5422 __TRANSPOSE_FIELD(sequential);
5423 __TRANSPOSE_FIELD(cluster_size);
5424 __TRANSPOSE_FIELD(existence_map);
5425 __TRANSPOSE_FIELD(cow_hint);
5426 __TRANSPOSE_FIELD(wimg_bits);
5427
5428 #undef __TRANSPOSE_FIELD
5429
5430 retval = KERN_SUCCESS;
5431
5432 done:
5433 /*
5434 * Cleanup.
5435 */
5436 if (tmp_object != VM_OBJECT_NULL) {
5437 vm_object_paging_end(tmp_object);
5438 vm_object_unlock(tmp_object);
5439 /*
5440 * Re-initialize the temporary object to avoid
5441 * deallocating a real pager.
5442 */
5443 _vm_object_allocate(transpose_size, tmp_object);
5444 vm_object_deallocate(tmp_object);
5445 tmp_object = VM_OBJECT_NULL;
5446 }
5447
5448 if (object1_locked) {
5449 vm_object_unlock(object1);
5450 object1_locked = FALSE;
5451 }
5452 if (object2_locked) {
5453 vm_object_unlock(object2);
5454 object2_locked = FALSE;
5455 }
5456 if (object1_paging) {
5457 vm_object_lock(object1);
5458 vm_object_paging_end(object1);
5459 vm_object_unlock(object1);
5460 object1_paging = FALSE;
5461 }
5462 if (object2_paging) {
5463 vm_object_lock(object2);
5464 vm_object_paging_end(object2);
5465 vm_object_unlock(object2);
5466 object2_paging = FALSE;
5467 }
5468
5469 return retval;
5470 }
5471
5472
5473 /* Allow manipulation of individual page state. This is actually part of */
5474 /* the UPL regimen but takes place on the VM object rather than on a UPL */
5475
5476 kern_return_t
5477 vm_object_page_op(
5478 vm_object_t object,
5479 vm_object_offset_t offset,
5480 int ops,
5481 ppnum_t *phys_entry,
5482 int *flags)
5483 {
5484 vm_page_t dst_page;
5485
5486 vm_object_lock(object);
5487
5488 if(ops & UPL_POP_PHYSICAL) {
5489 if(object->phys_contiguous) {
5490 if (phys_entry) {
5491 *phys_entry = (ppnum_t)
5492 (object->shadow_offset >> 12);
5493 }
5494 vm_object_unlock(object);
5495 return KERN_SUCCESS;
5496 } else {
5497 vm_object_unlock(object);
5498 return KERN_INVALID_OBJECT;
5499 }
5500 }
5501 if(object->phys_contiguous) {
5502 vm_object_unlock(object);
5503 return KERN_INVALID_OBJECT;
5504 }
5505
5506 while(TRUE) {
5507 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
5508 vm_object_unlock(object);
5509 return KERN_FAILURE;
5510 }
5511
5512 /* Sync up on getting the busy bit */
5513 if((dst_page->busy || dst_page->cleaning) &&
5514 (((ops & UPL_POP_SET) &&
5515 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
5516 /* someone else is playing with the page, we will */
5517 /* have to wait */
5518 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5519 continue;
5520 }
5521
5522 if (ops & UPL_POP_DUMP) {
5523 vm_page_lock_queues();
5524
5525 if (dst_page->no_isync == FALSE)
5526 pmap_disconnect(dst_page->phys_page);
5527 vm_page_free(dst_page);
5528
5529 vm_page_unlock_queues();
5530 break;
5531 }
5532
5533 if (flags) {
5534 *flags = 0;
5535
5536 /* Get the condition of flags before requested ops */
5537 /* are undertaken */
5538
5539 if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
5540 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
5541 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
5542 if(dst_page->absent) *flags |= UPL_POP_ABSENT;
5543 if(dst_page->busy) *flags |= UPL_POP_BUSY;
5544 }
5545
5546 /* The caller should have made a call either contingent with */
5547 /* or prior to this call to set UPL_POP_BUSY */
5548 if(ops & UPL_POP_SET) {
5549 /* The protection granted with this assert will */
5550 /* not be complete. If the caller violates the */
5551 /* convention and attempts to change page state */
5552 /* without first setting busy we may not see it */
5553 /* because the page may already be busy. However */
5554 /* if such violations occur we will assert sooner */
5555 /* or later. */
5556 assert(dst_page->busy || (ops & UPL_POP_BUSY));
5557 if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
5558 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
5559 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
5560 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
5561 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
5562 }
5563
5564 if(ops & UPL_POP_CLR) {
5565 assert(dst_page->busy);
5566 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
5567 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
5568 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
5569 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
5570 if (ops & UPL_POP_BUSY) {
5571 dst_page->busy = FALSE;
5572 PAGE_WAKEUP(dst_page);
5573 }
5574 }
5575
5576 if (dst_page->encrypted) {
5577 /*
5578 * ENCRYPTED SWAP:
5579 * We need to decrypt this encrypted page before the
5580 * caller can access its contents.
5581 * But if the caller really wants to access the page's
5582 * contents, they have to keep the page "busy".
5583 * Otherwise, the page could get recycled or re-encrypted
5584 * at any time.
5585 */
5586 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
5587 dst_page->busy) {
5588 /*
5589 * The page is stable enough to be accessed by
5590 * the caller, so make sure its contents are
5591 * not encrypted.
5592 */
5593 vm_page_decrypt(dst_page, 0);
5594 } else {
5595 /*
5596 * The page is not busy, so don't bother
5597 * decrypting it, since anything could
5598 * happen to it between now and when the
5599 * caller wants to access it.
5600 * We should not give the caller access
5601 * to this page.
5602 */
5603 assert(!phys_entry);
5604 }
5605 }
5606
5607 if (phys_entry) {
5608 /*
5609 * The physical page number will remain valid
5610 * only if the page is kept busy.
5611 * ENCRYPTED SWAP: make sure we don't let the
5612 * caller access an encrypted page.
5613 */
5614 assert(dst_page->busy);
5615 assert(!dst_page->encrypted);
5616 *phys_entry = dst_page->phys_page;
5617 }
5618
5619 break;
5620 }
5621
5622 vm_object_unlock(object);
5623 return KERN_SUCCESS;
5624
5625 }
5626
5627 /*
5628 * vm_object_range_op offers performance enhancement over
5629 * vm_object_page_op for page_op functions which do not require page
5630 * level state to be returned from the call. Page_op was created to provide
5631 * a low-cost alternative to page manipulation via UPLs when only a single
5632 * page was involved. The range_op call establishes the ability in the _op
5633 * family of functions to work on multiple pages where the lack of page level
5634 * state handling allows the caller to avoid the overhead of the upl structures.
5635 */
5636
5637 kern_return_t
5638 vm_object_range_op(
5639 vm_object_t object,
5640 vm_object_offset_t offset_beg,
5641 vm_object_offset_t offset_end,
5642 int ops,
5643 int *range)
5644 {
5645 vm_object_offset_t offset;
5646 vm_page_t dst_page;
5647
5648 if (object->resident_page_count == 0) {
5649 if (range) {
5650 if (ops & UPL_ROP_PRESENT)
5651 *range = 0;
5652 else
5653 *range = offset_end - offset_beg;
5654 }
5655 return KERN_SUCCESS;
5656 }
5657 vm_object_lock(object);
5658
5659 if (object->phys_contiguous) {
5660 vm_object_unlock(object);
5661 return KERN_INVALID_OBJECT;
5662 }
5663
5664 offset = offset_beg;
5665
5666 while (offset < offset_end) {
5667 dst_page = vm_page_lookup(object, offset);
5668 if (dst_page != VM_PAGE_NULL) {
5669 if (ops & UPL_ROP_DUMP) {
5670 if (dst_page->busy || dst_page->cleaning) {
5671 /*
5672 * someone else is playing with the
5673 * page, we will have to wait
5674 */
5675 PAGE_SLEEP(object,
5676 dst_page, THREAD_UNINT);
5677 /*
5678 * need to relook the page up since it's
5679 * state may have changed while we slept
5680 * it might even belong to a different object
5681 * at this point
5682 */
5683 continue;
5684 }
5685 vm_page_lock_queues();
5686
5687 if (dst_page->no_isync == FALSE)
5688 pmap_disconnect(dst_page->phys_page);
5689 vm_page_free(dst_page);
5690
5691 vm_page_unlock_queues();
5692 } else if (ops & UPL_ROP_ABSENT)
5693 break;
5694 } else if (ops & UPL_ROP_PRESENT)
5695 break;
5696
5697 offset += PAGE_SIZE;
5698 }
5699 vm_object_unlock(object);
5700
5701 if (range)
5702 *range = offset - offset_beg;
5703
5704 return KERN_SUCCESS;
5705 }