2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
54 * File: vm/vm_object.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Virtual memory object module.
60 #include <mach_pagemap.h>
61 #include <task_swapper.h>
63 #include <mach/mach_types.h>
64 #include <mach/memory_object.h>
65 #include <mach/memory_object_default.h>
66 #include <mach/memory_object_control_server.h>
67 #include <mach/vm_param.h>
69 #include <ipc/ipc_types.h>
70 #include <ipc/ipc_port.h>
72 #include <kern/kern_types.h>
73 #include <kern/assert.h>
74 #include <kern/lock.h>
75 #include <kern/queue.h>
77 #include <kern/zalloc.h>
78 #include <kern/host.h>
79 #include <kern/host_statistics.h>
80 #include <kern/processor.h>
81 #include <kern/misc_protos.h>
83 #include <vm/memory_object.h>
84 #include <vm/vm_fault.h>
85 #include <vm/vm_map.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_protos.h>
92 * Virtual memory objects maintain the actual data
93 * associated with allocated virtual memory. A given
94 * page of memory exists within exactly one object.
96 * An object is only deallocated when all "references"
99 * Associated with each object is a list of all resident
100 * memory pages belonging to that object; this list is
101 * maintained by the "vm_page" module, but locked by the object's
104 * Each object also records the memory object reference
105 * that is used by the kernel to request and write
106 * back data (the memory object, field "pager"), etc...
108 * Virtual memory objects are allocated to provide
109 * zero-filled memory (vm_allocate) or map a user-defined
110 * memory object into a virtual address space (vm_map).
112 * Virtual memory objects that refer to a user-defined
113 * memory object are called "permanent", because all changes
114 * made in virtual memory are reflected back to the
115 * memory manager, which may then store it permanently.
116 * Other virtual memory objects are called "temporary",
117 * meaning that changes need be written back only when
118 * necessary to reclaim pages, and that storage associated
119 * with the object can be discarded once it is no longer
122 * A permanent memory object may be mapped into more
123 * than one virtual address space. Moreover, two threads
124 * may attempt to make the first mapping of a memory
125 * object concurrently. Only one thread is allowed to
126 * complete this mapping; all others wait for the
127 * "pager_initialized" field is asserted, indicating
128 * that the first thread has initialized all of the
129 * necessary fields in the virtual memory object structure.
131 * The kernel relies on a *default memory manager* to
132 * provide backing storage for the zero-filled virtual
133 * memory objects. The pager memory objects associated
134 * with these temporary virtual memory objects are only
135 * requested from the default memory manager when it
136 * becomes necessary. Virtual memory objects
137 * that depend on the default memory manager are called
138 * "internal". The "pager_created" field is provided to
139 * indicate whether these ports have ever been allocated.
141 * The kernel may also create virtual memory objects to
142 * hold changed pages after a copy-on-write operation.
143 * In this case, the virtual memory object (and its
144 * backing storage -- its memory object) only contain
145 * those pages that have been changed. The "shadow"
146 * field refers to the virtual memory object that contains
147 * the remainder of the contents. The "shadow_offset"
148 * field indicates where in the "shadow" these contents begin.
149 * The "copy" field refers to a virtual memory object
150 * to which changed pages must be copied before changing
151 * this object, in order to implement another form
152 * of copy-on-write optimization.
154 * The virtual memory object structure also records
155 * the attributes associated with its memory object.
156 * The "pager_ready", "can_persist" and "copy_strategy"
157 * fields represent those attributes. The "cached_list"
158 * field is used in the implementation of the persistence
161 * ZZZ Continue this comment.
164 /* Forward declarations for internal functions. */
165 static kern_return_t
vm_object_terminate(
168 extern void vm_object_remove(
171 static vm_object_t
vm_object_cache_trim(
172 boolean_t called_from_vm_object_deallocate
);
174 static void vm_object_deactivate_all_pages(
177 static kern_return_t
vm_object_copy_call(
178 vm_object_t src_object
,
179 vm_object_offset_t src_offset
,
180 vm_object_size_t size
,
181 vm_object_t
*_result_object
);
183 static void vm_object_do_collapse(
185 vm_object_t backing_object
);
187 static void vm_object_do_bypass(
189 vm_object_t backing_object
);
191 static void vm_object_release_pager(
192 memory_object_t pager
);
194 static zone_t vm_object_zone
; /* vm backing store zone */
197 * All wired-down kernel memory belongs to a single virtual
198 * memory object (kernel_object) to avoid wasting data structures.
200 static struct vm_object kernel_object_store
;
201 __private_extern__ vm_object_t kernel_object
= &kernel_object_store
;
204 * The submap object is used as a placeholder for vm_map_submap
205 * operations. The object is declared in vm_map.c because it
206 * is exported by the vm_map module. The storage is declared
207 * here because it must be initialized here.
209 static struct vm_object vm_submap_object_store
;
212 * Virtual memory objects are initialized from
213 * a template (see vm_object_allocate).
215 * When adding a new field to the virtual memory
216 * object structure, be sure to add initialization
217 * (see _vm_object_allocate()).
219 static struct vm_object vm_object_template
;
222 * Virtual memory objects that are not referenced by
223 * any address maps, but that are allowed to persist
224 * (an attribute specified by the associated memory manager),
225 * are kept in a queue (vm_object_cached_list).
227 * When an object from this queue is referenced again,
228 * for example to make another address space mapping,
229 * it must be removed from the queue. That is, the
230 * queue contains *only* objects with zero references.
232 * The kernel may choose to terminate objects from this
233 * queue in order to reclaim storage. The current policy
234 * is to permit a fixed maximum number of unreferenced
235 * objects (vm_object_cached_max).
237 * A spin lock (accessed by routines
238 * vm_object_cache_{lock,lock_try,unlock}) governs the
239 * object cache. It must be held when objects are
240 * added to or removed from the cache (in vm_object_terminate).
241 * The routines that acquire a reference to a virtual
242 * memory object based on one of the memory object ports
243 * must also lock the cache.
245 * Ideally, the object cache should be more isolated
246 * from the reference mechanism, so that the lock need
247 * not be held to make simple references.
249 static queue_head_t vm_object_cached_list
;
250 static int vm_object_cached_count
=0;
251 static int vm_object_cached_high
; /* highest # cached objects */
252 static int vm_object_cached_max
= 512; /* may be patched*/
254 static decl_mutex_data(,vm_object_cached_lock_data
)
256 #define vm_object_cache_lock() \
257 mutex_lock(&vm_object_cached_lock_data)
258 #define vm_object_cache_lock_try() \
259 mutex_try(&vm_object_cached_lock_data)
260 #define vm_object_cache_unlock() \
261 mutex_unlock(&vm_object_cached_lock_data)
263 #define VM_OBJECT_HASH_COUNT 1024
264 static queue_head_t vm_object_hashtable
[VM_OBJECT_HASH_COUNT
];
265 static struct zone
*vm_object_hash_zone
;
267 struct vm_object_hash_entry
{
268 queue_chain_t hash_link
; /* hash chain link */
269 memory_object_t pager
; /* pager we represent */
270 vm_object_t object
; /* corresponding object */
271 boolean_t waiting
; /* someone waiting for
275 typedef struct vm_object_hash_entry
*vm_object_hash_entry_t
;
276 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
278 #define VM_OBJECT_HASH_SHIFT 8
279 #define vm_object_hash(pager) \
280 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
282 void vm_object_hash_entry_free(
283 vm_object_hash_entry_t entry
);
286 * vm_object_hash_lookup looks up a pager in the hashtable
287 * and returns the corresponding entry, with optional removal.
290 static vm_object_hash_entry_t
291 vm_object_hash_lookup(
292 memory_object_t pager
,
293 boolean_t remove_entry
)
295 register queue_t bucket
;
296 register vm_object_hash_entry_t entry
;
298 bucket
= &vm_object_hashtable
[vm_object_hash(pager
)];
300 entry
= (vm_object_hash_entry_t
)queue_first(bucket
);
301 while (!queue_end(bucket
, (queue_entry_t
)entry
)) {
302 if (entry
->pager
== pager
&& !remove_entry
)
304 else if (entry
->pager
== pager
) {
305 queue_remove(bucket
, entry
,
306 vm_object_hash_entry_t
, hash_link
);
310 entry
= (vm_object_hash_entry_t
)queue_next(&entry
->hash_link
);
313 return(VM_OBJECT_HASH_ENTRY_NULL
);
317 * vm_object_hash_enter enters the specified
318 * pager / cache object association in the hashtable.
322 vm_object_hash_insert(
323 vm_object_hash_entry_t entry
)
325 register queue_t bucket
;
327 bucket
= &vm_object_hashtable
[vm_object_hash(entry
->pager
)];
329 queue_enter(bucket
, entry
, vm_object_hash_entry_t
, hash_link
);
332 static vm_object_hash_entry_t
333 vm_object_hash_entry_alloc(
334 memory_object_t pager
)
336 vm_object_hash_entry_t entry
;
338 entry
= (vm_object_hash_entry_t
)zalloc(vm_object_hash_zone
);
339 entry
->pager
= pager
;
340 entry
->object
= VM_OBJECT_NULL
;
341 entry
->waiting
= FALSE
;
347 vm_object_hash_entry_free(
348 vm_object_hash_entry_t entry
)
350 zfree(vm_object_hash_zone
, entry
);
354 * vm_object_allocate:
356 * Returns a new object with the given size.
359 __private_extern__
void
361 vm_object_size_t size
,
365 "vm_object_allocate, object 0x%X size 0x%X\n",
366 (integer_t
)object
, size
, 0,0,0);
368 *object
= vm_object_template
;
369 queue_init(&object
->memq
);
370 queue_init(&object
->msr_q
);
372 queue_init(&object
->uplq
);
373 #endif /* UPL_DEBUG */
374 vm_object_lock_init(object
);
378 __private_extern__ vm_object_t
380 vm_object_size_t size
)
382 register vm_object_t object
;
384 object
= (vm_object_t
) zalloc(vm_object_zone
);
386 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
388 if (object
!= VM_OBJECT_NULL
)
389 _vm_object_allocate(size
, object
);
395 * vm_object_bootstrap:
397 * Initialize the VM objects module.
399 __private_extern__
void
400 vm_object_bootstrap(void)
404 vm_object_zone
= zinit((vm_size_t
) sizeof(struct vm_object
),
405 round_page_32(512*1024),
406 round_page_32(12*1024),
409 queue_init(&vm_object_cached_list
);
410 mutex_init(&vm_object_cached_lock_data
, 0);
412 vm_object_hash_zone
=
413 zinit((vm_size_t
) sizeof (struct vm_object_hash_entry
),
414 round_page_32(512*1024),
415 round_page_32(12*1024),
416 "vm object hash entries");
418 for (i
= 0; i
< VM_OBJECT_HASH_COUNT
; i
++)
419 queue_init(&vm_object_hashtable
[i
]);
422 * Fill in a template object, for quick initialization
425 /* memq; Lock; init after allocation */
426 vm_object_template
.size
= 0;
427 vm_object_template
.memq_hint
= VM_PAGE_NULL
;
428 vm_object_template
.ref_count
= 1;
430 vm_object_template
.res_count
= 1;
431 #endif /* TASK_SWAPPER */
432 vm_object_template
.resident_page_count
= 0;
433 vm_object_template
.copy
= VM_OBJECT_NULL
;
434 vm_object_template
.shadow
= VM_OBJECT_NULL
;
435 vm_object_template
.shadow_offset
= (vm_object_offset_t
) 0;
436 vm_object_template
.cow_hint
= ~(vm_offset_t
)0;
437 vm_object_template
.true_share
= FALSE
;
439 vm_object_template
.pager
= MEMORY_OBJECT_NULL
;
440 vm_object_template
.paging_offset
= 0;
441 vm_object_template
.pager_control
= MEMORY_OBJECT_CONTROL_NULL
;
442 /* msr_q; init after allocation */
444 vm_object_template
.copy_strategy
= MEMORY_OBJECT_COPY_SYMMETRIC
;
445 vm_object_template
.absent_count
= 0;
446 vm_object_template
.paging_in_progress
= 0;
448 /* Begin bitfields */
449 vm_object_template
.all_wanted
= 0; /* all bits FALSE */
450 vm_object_template
.pager_created
= FALSE
;
451 vm_object_template
.pager_initialized
= FALSE
;
452 vm_object_template
.pager_ready
= FALSE
;
453 vm_object_template
.pager_trusted
= FALSE
;
454 vm_object_template
.can_persist
= FALSE
;
455 vm_object_template
.internal
= TRUE
;
456 vm_object_template
.temporary
= TRUE
;
457 vm_object_template
.private = FALSE
;
458 vm_object_template
.pageout
= FALSE
;
459 vm_object_template
.alive
= TRUE
;
460 vm_object_template
.purgable
= VM_OBJECT_NONPURGABLE
;
461 vm_object_template
.silent_overwrite
= FALSE
;
462 vm_object_template
.advisory_pageout
= FALSE
;
463 vm_object_template
.shadowed
= FALSE
;
464 vm_object_template
.terminating
= FALSE
;
465 vm_object_template
.shadow_severed
= FALSE
;
466 vm_object_template
.phys_contiguous
= FALSE
;
467 vm_object_template
.nophyscache
= FALSE
;
470 /* cache bitfields */
471 vm_object_template
.wimg_bits
= VM_WIMG_DEFAULT
;
473 /* cached_list; init after allocation */
474 vm_object_template
.last_alloc
= (vm_object_offset_t
) 0;
475 vm_object_template
.cluster_size
= 0;
477 vm_object_template
.existence_map
= VM_EXTERNAL_NULL
;
478 #endif /* MACH_PAGEMAP */
480 vm_object_template
.paging_object
= VM_OBJECT_NULL
;
481 #endif /* MACH_ASSERT */
484 * Initialize the "kernel object"
487 kernel_object
= &kernel_object_store
;
490 * Note that in the following size specifications, we need to add 1 because
491 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
495 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
498 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
501 kernel_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
504 * Initialize the "submap object". Make it as large as the
505 * kernel object so that no limit is imposed on submap sizes.
508 vm_submap_object
= &vm_submap_object_store
;
510 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
513 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
516 vm_submap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
519 * Create an "extra" reference to this object so that we never
520 * try to deallocate it; zfree doesn't like to be called with
523 vm_object_reference(vm_submap_object
);
526 vm_external_module_initialize();
527 #endif /* MACH_PAGEMAP */
530 __private_extern__
void
534 * Finish initializing the kernel object.
538 /* remove the typedef below when emergency work-around is taken out */
539 typedef struct vnode_pager
{
540 memory_object_t pager
;
541 memory_object_t pager_handle
; /* pager */
542 memory_object_control_t control_handle
; /* memory object's control handle */
543 void *vnode_handle
; /* vnode handle */
546 #define MIGHT_NOT_CACHE_SHADOWS 1
547 #if MIGHT_NOT_CACHE_SHADOWS
548 static int cache_shadows
= TRUE
;
549 #endif /* MIGHT_NOT_CACHE_SHADOWS */
552 * vm_object_deallocate:
554 * Release a reference to the specified object,
555 * gained either through a vm_object_allocate
556 * or a vm_object_reference call. When all references
557 * are gone, storage associated with this object
558 * may be relinquished.
560 * No object may be locked.
562 __private_extern__
void
563 vm_object_deallocate(
564 register vm_object_t object
)
566 boolean_t retry_cache_trim
= FALSE
;
567 vm_object_t shadow
= VM_OBJECT_NULL
;
569 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
570 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
573 while (object
!= VM_OBJECT_NULL
) {
576 * The cache holds a reference (uncounted) to
577 * the object; we must lock it before removing
581 vm_object_cache_lock();
584 * if we try to take a regular lock here
585 * we risk deadlocking against someone
586 * holding a lock on this object while
587 * trying to vm_object_deallocate a different
590 if (vm_object_lock_try(object
))
592 vm_object_cache_unlock();
593 mutex_pause(); /* wait a bit */
595 assert(object
->ref_count
> 0);
598 * If the object has a named reference, and only
599 * that reference would remain, inform the pager
600 * about the last "mapping" reference going away.
602 if ((object
->ref_count
== 2) && (object
->named
)) {
603 memory_object_t pager
= object
->pager
;
605 /* Notify the Pager that there are no */
606 /* more mappers for this object */
608 if (pager
!= MEMORY_OBJECT_NULL
) {
609 vm_object_unlock(object
);
610 vm_object_cache_unlock();
612 memory_object_unmap(pager
);
615 vm_object_cache_lock();
618 * if we try to take a regular lock here
619 * we risk deadlocking against someone
620 * holding a lock on this object while
621 * trying to vm_object_deallocate a different
624 if (vm_object_lock_try(object
))
626 vm_object_cache_unlock();
627 mutex_pause(); /* wait a bit */
629 assert(object
->ref_count
> 0);
634 * Lose the reference. If other references
635 * remain, then we are done, unless we need
636 * to retry a cache trim.
637 * If it is the last reference, then keep it
638 * until any pending initialization is completed.
641 /* if the object is terminating, it cannot go into */
642 /* the cache and we obviously should not call */
643 /* terminate again. */
645 if ((object
->ref_count
> 1) || object
->terminating
) {
647 vm_object_res_deallocate(object
);
648 vm_object_cache_unlock();
650 if (object
->ref_count
== 1 &&
651 object
->shadow
!= VM_OBJECT_NULL
) {
653 * We don't use this VM object anymore. We
654 * would like to collapse it into its parent(s),
655 * but we don't have any pointers back to these
657 * But we can try and collapse this object with
658 * its own shadows, in case these are useless
661 vm_object_collapse(object
, 0);
664 vm_object_unlock(object
);
665 if (retry_cache_trim
&&
666 ((object
= vm_object_cache_trim(TRUE
)) !=
674 * We have to wait for initialization
675 * before destroying or caching the object.
678 if (object
->pager_created
&& ! object
->pager_initialized
) {
679 assert(! object
->can_persist
);
680 vm_object_assert_wait(object
,
681 VM_OBJECT_EVENT_INITIALIZED
,
683 vm_object_unlock(object
);
684 vm_object_cache_unlock();
685 thread_block(THREAD_CONTINUE_NULL
);
690 * If this object can persist, then enter it in
691 * the cache. Otherwise, terminate it.
693 * NOTE: Only permanent objects are cached, and
694 * permanent objects cannot have shadows. This
695 * affects the residence counting logic in a minor
696 * way (can do it in-line, mostly).
699 if ((object
->can_persist
) && (object
->alive
)) {
701 * Now it is safe to decrement reference count,
702 * and to return if reference count is > 0.
704 if (--object
->ref_count
> 0) {
705 vm_object_res_deallocate(object
);
706 vm_object_unlock(object
);
707 vm_object_cache_unlock();
708 if (retry_cache_trim
&&
709 ((object
= vm_object_cache_trim(TRUE
)) !=
716 #if MIGHT_NOT_CACHE_SHADOWS
718 * Remove shadow now if we don't
719 * want to cache shadows.
721 if (! cache_shadows
) {
722 shadow
= object
->shadow
;
723 object
->shadow
= VM_OBJECT_NULL
;
725 #endif /* MIGHT_NOT_CACHE_SHADOWS */
728 * Enter the object onto the queue of
729 * cached objects, and deactivate
732 assert(object
->shadow
== VM_OBJECT_NULL
);
733 VM_OBJ_RES_DECR(object
);
735 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
737 (integer_t
)vm_object_cached_list
.next
,
738 (integer_t
)vm_object_cached_list
.prev
,0,0);
740 vm_object_cached_count
++;
741 if (vm_object_cached_count
> vm_object_cached_high
)
742 vm_object_cached_high
= vm_object_cached_count
;
743 queue_enter(&vm_object_cached_list
, object
,
744 vm_object_t
, cached_list
);
745 vm_object_cache_unlock();
746 vm_object_deactivate_all_pages(object
);
747 vm_object_unlock(object
);
749 #if MIGHT_NOT_CACHE_SHADOWS
751 * If we have a shadow that we need
752 * to deallocate, do so now, remembering
753 * to trim the cache later.
755 if (! cache_shadows
&& shadow
!= VM_OBJECT_NULL
) {
757 retry_cache_trim
= TRUE
;
760 #endif /* MIGHT_NOT_CACHE_SHADOWS */
763 * Trim the cache. If the cache trim
764 * returns with a shadow for us to deallocate,
765 * then remember to retry the cache trim
766 * when we are done deallocating the shadow.
767 * Otherwise, we are done.
770 object
= vm_object_cache_trim(TRUE
);
771 if (object
== VM_OBJECT_NULL
) {
774 retry_cache_trim
= TRUE
;
778 * This object is not cachable; terminate it.
781 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
782 (integer_t
)object
, object
->resident_page_count
,
783 object
->paging_in_progress
,
784 (void *)current_thread(),object
->ref_count
);
786 VM_OBJ_RES_DECR(object
); /* XXX ? */
788 * Terminate this object. If it had a shadow,
789 * then deallocate it; otherwise, if we need
790 * to retry a cache trim, do so now; otherwise,
791 * we are done. "pageout" objects have a shadow,
792 * but maintain a "paging reference" rather than
793 * a normal reference.
795 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
796 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
799 if (shadow
!= VM_OBJECT_NULL
) {
803 if (retry_cache_trim
&&
804 ((object
= vm_object_cache_trim(TRUE
)) !=
811 assert(! retry_cache_trim
);
815 * Check to see whether we really need to trim
816 * down the cache. If so, remove an object from
817 * the cache, terminate it, and repeat.
819 * Called with, and returns with, cache lock unlocked.
822 vm_object_cache_trim(
823 boolean_t called_from_vm_object_deallocate
)
825 register vm_object_t object
= VM_OBJECT_NULL
;
831 * If we no longer need to trim the cache,
835 vm_object_cache_lock();
836 if (vm_object_cached_count
<= vm_object_cached_max
) {
837 vm_object_cache_unlock();
838 return VM_OBJECT_NULL
;
842 * We must trim down the cache, so remove
843 * the first object in the cache.
846 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
847 (integer_t
)vm_object_cached_list
.next
,
848 (integer_t
)vm_object_cached_list
.prev
, 0, 0, 0);
850 object
= (vm_object_t
) queue_first(&vm_object_cached_list
);
851 if(object
== (vm_object_t
) &vm_object_cached_list
) {
852 /* something's wrong with the calling parameter or */
853 /* the value of vm_object_cached_count, just fix */
855 if(vm_object_cached_max
< 0)
856 vm_object_cached_max
= 0;
857 vm_object_cached_count
= 0;
858 vm_object_cache_unlock();
859 return VM_OBJECT_NULL
;
861 vm_object_lock(object
);
862 queue_remove(&vm_object_cached_list
, object
, vm_object_t
,
864 vm_object_cached_count
--;
867 * Since this object is in the cache, we know
868 * that it is initialized and has no references.
869 * Take a reference to avoid recursive deallocations.
872 assert(object
->pager_initialized
);
873 assert(object
->ref_count
== 0);
877 * Terminate the object.
878 * If the object had a shadow, we let vm_object_deallocate
879 * deallocate it. "pageout" objects have a shadow, but
880 * maintain a "paging reference" rather than a normal
882 * (We are careful here to limit recursion.)
884 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
885 if(vm_object_terminate(object
) != KERN_SUCCESS
)
887 if (shadow
!= VM_OBJECT_NULL
) {
888 if (called_from_vm_object_deallocate
) {
891 vm_object_deallocate(shadow
);
897 boolean_t vm_object_terminate_remove_all
= FALSE
;
900 * Routine: vm_object_terminate
902 * Free all resources associated with a vm_object.
904 * Upon entry, the object must be locked,
905 * and the object must have exactly one reference.
907 * The shadow object reference is left alone.
909 * The object must be unlocked if its found that pages
910 * must be flushed to a backing object. If someone
911 * manages to map the object while it is being flushed
912 * the object is returned unlocked and unchanged. Otherwise,
913 * upon exit, the cache will be unlocked, and the
914 * object will cease to exist.
918 register vm_object_t object
)
920 memory_object_t pager
;
921 register vm_page_t p
;
922 vm_object_t shadow_object
;
924 XPR(XPR_VM_OBJECT
, "vm_object_terminate, object 0x%X ref %d\n",
925 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
927 if (!object
->pageout
&& (!object
->temporary
|| object
->can_persist
)
928 && (object
->pager
!= NULL
|| object
->shadow_severed
)) {
929 vm_object_cache_unlock();
930 while (!queue_empty(&object
->memq
)) {
932 * Clear pager_trusted bit so that the pages get yanked
933 * out of the object instead of cleaned in place. This
934 * prevents a deadlock in XMM and makes more sense anyway.
936 object
->pager_trusted
= FALSE
;
938 p
= (vm_page_t
) queue_first(&object
->memq
);
942 if (p
->busy
|| p
->cleaning
) {
943 if(p
->cleaning
|| p
->absent
) {
944 vm_object_paging_wait(object
, THREAD_UNINT
);
947 panic("vm_object_terminate.3 0x%x 0x%x", object
, p
);
951 vm_page_lock_queues();
953 VM_PAGE_QUEUES_REMOVE(p
);
954 vm_page_unlock_queues();
956 if (p
->absent
|| p
->private) {
959 * For private pages, VM_PAGE_FREE just
960 * leaves the page structure around for
961 * its owner to clean up. For absent
962 * pages, the structure is returned to
963 * the appropriate pool.
970 panic("vm_object_terminate.4 0x%x 0x%x", object
, p
);
973 p
->dirty
= pmap_is_modified(p
->phys_page
);
975 if ((p
->dirty
|| p
->precious
) && !p
->error
&& object
->alive
) {
976 vm_pageout_cluster(p
); /* flush page */
977 vm_object_paging_wait(object
, THREAD_UNINT
);
979 "vm_object_terminate restart, object 0x%X ref %d\n",
980 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
986 vm_object_unlock(object
);
987 vm_object_cache_lock();
988 vm_object_lock(object
);
992 * Make sure the object isn't already being terminated
994 if(object
->terminating
) {
995 object
->ref_count
-= 1;
996 assert(object
->ref_count
> 0);
997 vm_object_cache_unlock();
998 vm_object_unlock(object
);
1003 * Did somebody get a reference to the object while we were
1006 if(object
->ref_count
!= 1) {
1007 object
->ref_count
-= 1;
1008 assert(object
->ref_count
> 0);
1009 vm_object_res_deallocate(object
);
1010 vm_object_cache_unlock();
1011 vm_object_unlock(object
);
1012 return KERN_FAILURE
;
1016 * Make sure no one can look us up now.
1019 object
->terminating
= TRUE
;
1020 object
->alive
= FALSE
;
1021 vm_object_remove(object
);
1024 * Detach the object from its shadow if we are the shadow's
1025 * copy. The reference we hold on the shadow must be dropped
1028 if (((shadow_object
= object
->shadow
) != VM_OBJECT_NULL
) &&
1029 !(object
->pageout
)) {
1030 vm_object_lock(shadow_object
);
1031 if (shadow_object
->copy
== object
)
1032 shadow_object
->copy
= VM_OBJECT_NULL
;
1033 vm_object_unlock(shadow_object
);
1037 * The pageout daemon might be playing with our pages.
1038 * Now that the object is dead, it won't touch any more
1039 * pages, but some pages might already be on their way out.
1040 * Hence, we wait until the active paging activities have ceased
1041 * before we break the association with the pager itself.
1043 while (object
->paging_in_progress
!= 0) {
1044 vm_object_cache_unlock();
1045 vm_object_wait(object
,
1046 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
1048 vm_object_cache_lock();
1049 vm_object_lock(object
);
1052 pager
= object
->pager
;
1053 object
->pager
= MEMORY_OBJECT_NULL
;
1055 if (pager
!= MEMORY_OBJECT_NULL
)
1056 memory_object_control_disable(object
->pager_control
);
1057 vm_object_cache_unlock();
1059 object
->ref_count
--;
1061 assert(object
->res_count
== 0);
1062 #endif /* TASK_SWAPPER */
1064 assert (object
->ref_count
== 0);
1067 * Clean or free the pages, as appropriate.
1068 * It is possible for us to find busy/absent pages,
1069 * if some faults on this object were aborted.
1071 if (object
->pageout
) {
1072 assert(shadow_object
!= VM_OBJECT_NULL
);
1073 assert(shadow_object
== object
->shadow
);
1075 vm_pageout_object_terminate(object
);
1077 } else if ((object
->temporary
&& !object
->can_persist
) ||
1078 (pager
== MEMORY_OBJECT_NULL
)) {
1079 while (!queue_empty(&object
->memq
)) {
1080 p
= (vm_page_t
) queue_first(&object
->memq
);
1085 } else if (!queue_empty(&object
->memq
)) {
1086 panic("vm_object_terminate: queue just emptied isn't");
1089 assert(object
->paging_in_progress
== 0);
1090 assert(object
->ref_count
== 0);
1093 * If the pager has not already been released by
1094 * vm_object_destroy, we need to terminate it and
1095 * release our reference to it here.
1097 if (pager
!= MEMORY_OBJECT_NULL
) {
1098 vm_object_unlock(object
);
1099 vm_object_release_pager(pager
);
1100 vm_object_lock(object
);
1103 /* kick off anyone waiting on terminating */
1104 object
->terminating
= FALSE
;
1105 vm_object_paging_begin(object
);
1106 vm_object_paging_end(object
);
1107 vm_object_unlock(object
);
1110 vm_external_destroy(object
->existence_map
, object
->size
);
1111 #endif /* MACH_PAGEMAP */
1114 * Free the space for the object.
1116 zfree(vm_object_zone
, object
);
1117 return KERN_SUCCESS
;
1121 * Routine: vm_object_pager_wakeup
1122 * Purpose: Wake up anyone waiting for termination of a pager.
1126 vm_object_pager_wakeup(
1127 memory_object_t pager
)
1129 vm_object_hash_entry_t entry
;
1130 boolean_t waiting
= FALSE
;
1133 * If anyone was waiting for the memory_object_terminate
1134 * to be queued, wake them up now.
1136 vm_object_cache_lock();
1137 entry
= vm_object_hash_lookup(pager
, TRUE
);
1138 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
1139 waiting
= entry
->waiting
;
1140 vm_object_cache_unlock();
1141 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
) {
1143 thread_wakeup((event_t
) pager
);
1144 vm_object_hash_entry_free(entry
);
1149 * Routine: vm_object_release_pager
1150 * Purpose: Terminate the pager and, upon completion,
1151 * release our last reference to it.
1152 * just like memory_object_terminate, except
1153 * that we wake up anyone blocked in vm_object_enter
1154 * waiting for termination message to be queued
1155 * before calling memory_object_init.
1158 vm_object_release_pager(
1159 memory_object_t pager
)
1163 * Terminate the pager.
1166 (void) memory_object_terminate(pager
);
1169 * Wakeup anyone waiting for this terminate
1171 vm_object_pager_wakeup(pager
);
1174 * Release reference to pager.
1176 memory_object_deallocate(pager
);
1180 * Routine: vm_object_destroy
1182 * Shut down a VM object, despite the
1183 * presence of address map (or other) references
1189 __unused kern_return_t reason
)
1191 memory_object_t old_pager
;
1193 if (object
== VM_OBJECT_NULL
)
1194 return(KERN_SUCCESS
);
1197 * Remove the pager association immediately.
1199 * This will prevent the memory manager from further
1200 * meddling. [If it wanted to flush data or make
1201 * other changes, it should have done so before performing
1202 * the destroy call.]
1205 vm_object_cache_lock();
1206 vm_object_lock(object
);
1207 object
->can_persist
= FALSE
;
1208 object
->named
= FALSE
;
1209 object
->alive
= FALSE
;
1212 * Rip out the pager from the vm_object now...
1215 vm_object_remove(object
);
1216 old_pager
= object
->pager
;
1217 object
->pager
= MEMORY_OBJECT_NULL
;
1218 if (old_pager
!= MEMORY_OBJECT_NULL
)
1219 memory_object_control_disable(object
->pager_control
);
1220 vm_object_cache_unlock();
1223 * Wait for the existing paging activity (that got
1224 * through before we nulled out the pager) to subside.
1227 vm_object_paging_wait(object
, THREAD_UNINT
);
1228 vm_object_unlock(object
);
1231 * Terminate the object now.
1233 if (old_pager
!= MEMORY_OBJECT_NULL
) {
1234 vm_object_release_pager(old_pager
);
1237 * JMM - Release the caller's reference. This assumes the
1238 * caller had a reference to release, which is a big (but
1239 * currently valid) assumption if this is driven from the
1240 * vnode pager (it is holding a named reference when making
1243 vm_object_deallocate(object
);
1246 return(KERN_SUCCESS
);
1250 * vm_object_deactivate_pages
1252 * Deactivate all pages in the specified object. (Keep its pages
1253 * in memory even though it is no longer referenced.)
1255 * The object must be locked.
1258 vm_object_deactivate_all_pages(
1259 register vm_object_t object
)
1261 register vm_page_t p
;
1263 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1264 vm_page_lock_queues();
1266 vm_page_deactivate(p
);
1267 vm_page_unlock_queues();
1271 __private_extern__
void
1272 vm_object_deactivate_pages(
1274 vm_object_offset_t offset
,
1275 vm_object_size_t size
,
1276 boolean_t kill_page
)
1278 vm_object_t orig_object
;
1279 int pages_moved
= 0;
1280 int pages_found
= 0;
1283 * entered with object lock held, acquire a paging reference to
1284 * prevent the memory_object and control ports from
1287 orig_object
= object
;
1290 register vm_page_t m
;
1291 vm_object_offset_t toffset
;
1292 vm_object_size_t tsize
;
1294 vm_object_paging_begin(object
);
1295 vm_page_lock_queues();
1297 for (tsize
= size
, toffset
= offset
; tsize
; tsize
-= PAGE_SIZE
, toffset
+= PAGE_SIZE
) {
1299 if ((m
= vm_page_lookup(object
, toffset
)) != VM_PAGE_NULL
) {
1303 if ((m
->wire_count
== 0) && (!m
->private) && (!m
->gobbled
) && (!m
->busy
)) {
1305 assert(!m
->laundry
);
1307 m
->reference
= FALSE
;
1308 pmap_clear_reference(m
->phys_page
);
1310 if ((kill_page
) && (object
->internal
)) {
1311 m
->precious
= FALSE
;
1313 pmap_clear_modify(m
->phys_page
);
1314 vm_external_state_clr(object
->existence_map
, offset
);
1316 VM_PAGE_QUEUES_REMOVE(m
);
1318 assert(!m
->laundry
);
1319 assert(m
->object
!= kernel_object
);
1320 assert(m
->pageq
.next
== NULL
&&
1321 m
->pageq
.prev
== NULL
);
1325 m
, vm_page_t
, pageq
);
1328 &vm_page_queue_inactive
,
1329 m
, vm_page_t
, pageq
);
1334 vm_page_inactive_count
++;
1340 vm_page_unlock_queues();
1341 vm_object_paging_end(object
);
1343 if (object
->shadow
) {
1344 vm_object_t tmp_object
;
1348 offset
+= object
->shadow_offset
;
1350 tmp_object
= object
->shadow
;
1351 vm_object_lock(tmp_object
);
1353 if (object
!= orig_object
)
1354 vm_object_unlock(object
);
1355 object
= tmp_object
;
1359 if (object
!= orig_object
)
1360 vm_object_unlock(object
);
1364 * Routine: vm_object_pmap_protect
1367 * Reduces the permission for all physical
1368 * pages in the specified object range.
1370 * If removing write permission only, it is
1371 * sufficient to protect only the pages in
1372 * the top-level object; only those pages may
1373 * have write permission.
1375 * If removing all access, we must follow the
1376 * shadow chain from the top-level object to
1377 * remove access to all pages in shadowed objects.
1379 * The object must *not* be locked. The object must
1380 * be temporary/internal.
1382 * If pmap is not NULL, this routine assumes that
1383 * the only mappings for the pages are in that
1387 __private_extern__
void
1388 vm_object_pmap_protect(
1389 register vm_object_t object
,
1390 register vm_object_offset_t offset
,
1391 vm_object_size_t size
,
1393 vm_map_offset_t pmap_start
,
1396 if (object
== VM_OBJECT_NULL
)
1398 size
= vm_object_round_page(size
);
1399 offset
= vm_object_trunc_page(offset
);
1401 vm_object_lock(object
);
1403 assert(object
->internal
);
1406 if (ptoa_64(object
->resident_page_count
) > size
/2 && pmap
!= PMAP_NULL
) {
1407 vm_object_unlock(object
);
1408 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
1412 /* if we are doing large ranges with respect to resident */
1413 /* page count then we should interate over pages otherwise */
1414 /* inverse page look-up will be faster */
1415 if (ptoa_64(object
->resident_page_count
/ 4) < size
) {
1417 vm_object_offset_t end
;
1419 end
= offset
+ size
;
1421 if (pmap
!= PMAP_NULL
) {
1422 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1423 if (!p
->fictitious
&&
1424 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1425 vm_map_offset_t start
;
1427 start
= pmap_start
+ p
->offset
- offset
;
1428 pmap_protect(pmap
, start
, start
+ PAGE_SIZE_64
, prot
);
1432 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1433 if (!p
->fictitious
&&
1434 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1436 pmap_page_protect(p
->phys_page
,
1437 prot
& ~p
->page_lock
);
1443 vm_object_offset_t end
;
1444 vm_object_offset_t target_off
;
1446 end
= offset
+ size
;
1448 if (pmap
!= PMAP_NULL
) {
1449 for(target_off
= offset
;
1451 target_off
+= PAGE_SIZE
) {
1452 p
= vm_page_lookup(object
, target_off
);
1453 if (p
!= VM_PAGE_NULL
) {
1455 start
= pmap_start
+
1456 (vm_offset_t
)(p
->offset
- offset
);
1457 pmap_protect(pmap
, start
,
1458 start
+ PAGE_SIZE
, prot
);
1462 for(target_off
= offset
;
1463 target_off
< end
; target_off
+= PAGE_SIZE
) {
1464 p
= vm_page_lookup(object
, target_off
);
1465 if (p
!= VM_PAGE_NULL
) {
1466 pmap_page_protect(p
->phys_page
,
1467 prot
& ~p
->page_lock
);
1473 if (prot
== VM_PROT_NONE
) {
1475 * Must follow shadow chain to remove access
1476 * to pages in shadowed objects.
1478 register vm_object_t next_object
;
1480 next_object
= object
->shadow
;
1481 if (next_object
!= VM_OBJECT_NULL
) {
1482 offset
+= object
->shadow_offset
;
1483 vm_object_lock(next_object
);
1484 vm_object_unlock(object
);
1485 object
= next_object
;
1489 * End of chain - we are done.
1496 * Pages in shadowed objects may never have
1497 * write permission - we may stop here.
1503 vm_object_unlock(object
);
1507 * Routine: vm_object_copy_slowly
1510 * Copy the specified range of the source
1511 * virtual memory object without using
1512 * protection-based optimizations (such
1513 * as copy-on-write). The pages in the
1514 * region are actually copied.
1516 * In/out conditions:
1517 * The caller must hold a reference and a lock
1518 * for the source virtual memory object. The source
1519 * object will be returned *unlocked*.
1522 * If the copy is completed successfully, KERN_SUCCESS is
1523 * returned. If the caller asserted the interruptible
1524 * argument, and an interruption occurred while waiting
1525 * for a user-generated event, MACH_SEND_INTERRUPTED is
1526 * returned. Other values may be returned to indicate
1527 * hard errors during the copy operation.
1529 * A new virtual memory object is returned in a
1530 * parameter (_result_object). The contents of this
1531 * new object, starting at a zero offset, are a copy
1532 * of the source memory region. In the event of
1533 * an error, this parameter will contain the value
1536 __private_extern__ kern_return_t
1537 vm_object_copy_slowly(
1538 register vm_object_t src_object
,
1539 vm_object_offset_t src_offset
,
1540 vm_object_size_t size
,
1541 boolean_t interruptible
,
1542 vm_object_t
*_result_object
) /* OUT */
1544 vm_object_t new_object
;
1545 vm_object_offset_t new_offset
;
1547 vm_object_offset_t src_lo_offset
= src_offset
;
1548 vm_object_offset_t src_hi_offset
= src_offset
+ size
;
1550 XPR(XPR_VM_OBJECT
, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
1551 src_object
, src_offset
, size
, 0, 0);
1554 vm_object_unlock(src_object
);
1555 *_result_object
= VM_OBJECT_NULL
;
1556 return(KERN_INVALID_ARGUMENT
);
1560 * Prevent destruction of the source object while we copy.
1563 assert(src_object
->ref_count
> 0);
1564 src_object
->ref_count
++;
1565 VM_OBJ_RES_INCR(src_object
);
1566 vm_object_unlock(src_object
);
1569 * Create a new object to hold the copied pages.
1571 * We fill the new object starting at offset 0,
1572 * regardless of the input offset.
1573 * We don't bother to lock the new object within
1574 * this routine, since we have the only reference.
1577 new_object
= vm_object_allocate(size
);
1579 vm_object_lock(new_object
);
1581 assert(size
== trunc_page_64(size
)); /* Will the loop terminate? */
1585 src_offset
+= PAGE_SIZE_64
,
1586 new_offset
+= PAGE_SIZE_64
, size
-= PAGE_SIZE_64
1589 vm_fault_return_t result
;
1591 while ((new_page
= vm_page_alloc(new_object
, new_offset
))
1593 if (!vm_page_wait(interruptible
)) {
1594 vm_object_unlock(new_object
);
1595 vm_object_deallocate(new_object
);
1596 vm_object_deallocate(src_object
);
1597 *_result_object
= VM_OBJECT_NULL
;
1598 return(MACH_SEND_INTERRUPTED
);
1603 vm_prot_t prot
= VM_PROT_READ
;
1604 vm_page_t _result_page
;
1607 vm_page_t result_page
;
1608 kern_return_t error_code
;
1610 vm_object_lock(src_object
);
1611 vm_object_paging_begin(src_object
);
1613 XPR(XPR_VM_FAULT
,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
1614 result
= vm_fault_page(src_object
, src_offset
,
1615 VM_PROT_READ
, FALSE
, interruptible
,
1616 src_lo_offset
, src_hi_offset
,
1617 VM_BEHAVIOR_SEQUENTIAL
,
1618 &prot
, &_result_page
, &top_page
,
1620 &error_code
, FALSE
, FALSE
, NULL
, 0);
1623 case VM_FAULT_SUCCESS
:
1624 result_page
= _result_page
;
1627 * We don't need to hold the object
1628 * lock -- the busy page will be enough.
1629 * [We don't care about picking up any
1630 * new modifications.]
1632 * Copy the page to the new object.
1635 * If result_page is clean,
1636 * we could steal it instead
1640 vm_object_unlock(result_page
->object
);
1641 vm_page_copy(result_page
, new_page
);
1644 * Let go of both pages (make them
1645 * not busy, perform wakeup, activate).
1648 new_page
->busy
= FALSE
;
1649 new_page
->dirty
= TRUE
;
1650 vm_object_lock(result_page
->object
);
1651 PAGE_WAKEUP_DONE(result_page
);
1653 vm_page_lock_queues();
1654 if (!result_page
->active
&&
1655 !result_page
->inactive
)
1656 vm_page_activate(result_page
);
1657 vm_page_activate(new_page
);
1658 vm_page_unlock_queues();
1661 * Release paging references and
1662 * top-level placeholder page, if any.
1665 vm_fault_cleanup(result_page
->object
,
1670 case VM_FAULT_RETRY
:
1673 case VM_FAULT_FICTITIOUS_SHORTAGE
:
1674 vm_page_more_fictitious();
1677 case VM_FAULT_MEMORY_SHORTAGE
:
1678 if (vm_page_wait(interruptible
))
1682 case VM_FAULT_INTERRUPTED
:
1683 vm_page_free(new_page
);
1684 vm_object_unlock(new_object
);
1685 vm_object_deallocate(new_object
);
1686 vm_object_deallocate(src_object
);
1687 *_result_object
= VM_OBJECT_NULL
;
1688 return(MACH_SEND_INTERRUPTED
);
1690 case VM_FAULT_MEMORY_ERROR
:
1693 * (a) ignore pages that we can't
1695 * (b) return the null object if
1696 * any page fails [chosen]
1699 vm_page_lock_queues();
1700 vm_page_free(new_page
);
1701 vm_page_unlock_queues();
1702 vm_object_unlock(new_object
);
1703 vm_object_deallocate(new_object
);
1704 vm_object_deallocate(src_object
);
1705 *_result_object
= VM_OBJECT_NULL
;
1706 return(error_code
? error_code
:
1709 } while (result
!= VM_FAULT_SUCCESS
);
1713 * Lose the extra reference, and return our object.
1716 vm_object_unlock(new_object
);
1717 vm_object_deallocate(src_object
);
1718 *_result_object
= new_object
;
1719 return(KERN_SUCCESS
);
1723 * Routine: vm_object_copy_quickly
1726 * Copy the specified range of the source virtual
1727 * memory object, if it can be done without waiting
1728 * for user-generated events.
1731 * If the copy is successful, the copy is returned in
1732 * the arguments; otherwise, the arguments are not
1735 * In/out conditions:
1736 * The object should be unlocked on entry and exit.
1740 __private_extern__ boolean_t
1741 vm_object_copy_quickly(
1742 vm_object_t
*_object
, /* INOUT */
1743 __unused vm_object_offset_t offset
, /* IN */
1744 __unused vm_object_size_t size
, /* IN */
1745 boolean_t
*_src_needs_copy
, /* OUT */
1746 boolean_t
*_dst_needs_copy
) /* OUT */
1748 vm_object_t object
= *_object
;
1749 memory_object_copy_strategy_t copy_strategy
;
1751 XPR(XPR_VM_OBJECT
, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
1752 *_object
, offset
, size
, 0, 0);
1753 if (object
== VM_OBJECT_NULL
) {
1754 *_src_needs_copy
= FALSE
;
1755 *_dst_needs_copy
= FALSE
;
1759 vm_object_lock(object
);
1761 copy_strategy
= object
->copy_strategy
;
1763 switch (copy_strategy
) {
1764 case MEMORY_OBJECT_COPY_SYMMETRIC
:
1767 * Symmetric copy strategy.
1768 * Make another reference to the object.
1769 * Leave object/offset unchanged.
1772 assert(object
->ref_count
> 0);
1773 object
->ref_count
++;
1774 vm_object_res_reference(object
);
1775 object
->shadowed
= TRUE
;
1776 vm_object_unlock(object
);
1779 * Both source and destination must make
1780 * shadows, and the source must be made
1781 * read-only if not already.
1784 *_src_needs_copy
= TRUE
;
1785 *_dst_needs_copy
= TRUE
;
1789 case MEMORY_OBJECT_COPY_DELAY
:
1790 vm_object_unlock(object
);
1794 vm_object_unlock(object
);
1800 static int copy_call_count
= 0;
1801 static int copy_call_sleep_count
= 0;
1802 static int copy_call_restart_count
= 0;
1805 * Routine: vm_object_copy_call [internal]
1808 * Copy the source object (src_object), using the
1809 * user-managed copy algorithm.
1811 * In/out conditions:
1812 * The source object must be locked on entry. It
1813 * will be *unlocked* on exit.
1816 * If the copy is successful, KERN_SUCCESS is returned.
1817 * A new object that represents the copied virtual
1818 * memory is returned in a parameter (*_result_object).
1819 * If the return value indicates an error, this parameter
1822 static kern_return_t
1823 vm_object_copy_call(
1824 vm_object_t src_object
,
1825 vm_object_offset_t src_offset
,
1826 vm_object_size_t size
,
1827 vm_object_t
*_result_object
) /* OUT */
1831 boolean_t check_ready
= FALSE
;
1834 * If a copy is already in progress, wait and retry.
1837 * Consider making this call interruptable, as Mike
1838 * intended it to be.
1841 * Need a counter or version or something to allow
1842 * us to use the copy that the currently requesting
1843 * thread is obtaining -- is it worth adding to the
1844 * vm object structure? Depends how common this case it.
1847 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
1848 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
1850 copy_call_restart_count
++;
1854 * Indicate (for the benefit of memory_object_create_copy)
1855 * that we want a copy for src_object. (Note that we cannot
1856 * do a real assert_wait before calling memory_object_copy,
1857 * so we simply set the flag.)
1860 vm_object_set_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
);
1861 vm_object_unlock(src_object
);
1864 * Ask the memory manager to give us a memory object
1865 * which represents a copy of the src object.
1866 * The memory manager may give us a memory object
1867 * which we already have, or it may give us a
1868 * new memory object. This memory object will arrive
1869 * via memory_object_create_copy.
1872 kr
= KERN_FAILURE
; /* XXX need to change memory_object.defs */
1873 if (kr
!= KERN_SUCCESS
) {
1878 * Wait for the copy to arrive.
1880 vm_object_lock(src_object
);
1881 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
1882 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
1884 copy_call_sleep_count
++;
1887 assert(src_object
->copy
!= VM_OBJECT_NULL
);
1888 copy
= src_object
->copy
;
1889 if (!vm_object_lock_try(copy
)) {
1890 vm_object_unlock(src_object
);
1891 mutex_pause(); /* wait a bit */
1892 vm_object_lock(src_object
);
1895 if (copy
->size
< src_offset
+size
)
1896 copy
->size
= src_offset
+size
;
1898 if (!copy
->pager_ready
)
1904 *_result_object
= copy
;
1905 vm_object_unlock(copy
);
1906 vm_object_unlock(src_object
);
1908 /* Wait for the copy to be ready. */
1909 if (check_ready
== TRUE
) {
1910 vm_object_lock(copy
);
1911 while (!copy
->pager_ready
) {
1912 vm_object_sleep(copy
, VM_OBJECT_EVENT_PAGER_READY
, THREAD_UNINT
);
1914 vm_object_unlock(copy
);
1917 return KERN_SUCCESS
;
1920 static int copy_delayed_lock_collisions
= 0;
1921 static int copy_delayed_max_collisions
= 0;
1922 static int copy_delayed_lock_contention
= 0;
1923 static int copy_delayed_protect_iterate
= 0;
1926 * Routine: vm_object_copy_delayed [internal]
1929 * Copy the specified virtual memory object, using
1930 * the asymmetric copy-on-write algorithm.
1932 * In/out conditions:
1933 * The src_object must be locked on entry. It will be unlocked
1934 * on exit - so the caller must also hold a reference to it.
1936 * This routine will not block waiting for user-generated
1937 * events. It is not interruptible.
1939 __private_extern__ vm_object_t
1940 vm_object_copy_delayed(
1941 vm_object_t src_object
,
1942 vm_object_offset_t src_offset
,
1943 vm_object_size_t size
)
1945 vm_object_t new_copy
= VM_OBJECT_NULL
;
1946 vm_object_t old_copy
;
1948 vm_object_size_t copy_size
= src_offset
+ size
;
1952 * The user-level memory manager wants to see all of the changes
1953 * to this object, but it has promised not to make any changes on
1956 * Perform an asymmetric copy-on-write, as follows:
1957 * Create a new object, called a "copy object" to hold
1958 * pages modified by the new mapping (i.e., the copy,
1959 * not the original mapping).
1960 * Record the original object as the backing object for
1961 * the copy object. If the original mapping does not
1962 * change a page, it may be used read-only by the copy.
1963 * Record the copy object in the original object.
1964 * When the original mapping causes a page to be modified,
1965 * it must be copied to a new page that is "pushed" to
1967 * Mark the new mapping (the copy object) copy-on-write.
1968 * This makes the copy object itself read-only, allowing
1969 * it to be reused if the original mapping makes no
1970 * changes, and simplifying the synchronization required
1971 * in the "push" operation described above.
1973 * The copy-on-write is said to be assymetric because the original
1974 * object is *not* marked copy-on-write. A copied page is pushed
1975 * to the copy object, regardless which party attempted to modify
1978 * Repeated asymmetric copy operations may be done. If the
1979 * original object has not been changed since the last copy, its
1980 * copy object can be reused. Otherwise, a new copy object can be
1981 * inserted between the original object and its previous copy
1982 * object. Since any copy object is read-only, this cannot affect
1983 * affect the contents of the previous copy object.
1985 * Note that a copy object is higher in the object tree than the
1986 * original object; therefore, use of the copy object recorded in
1987 * the original object must be done carefully, to avoid deadlock.
1993 * Wait for paging in progress.
1995 if (!src_object
->true_share
)
1996 vm_object_paging_wait(src_object
, THREAD_UNINT
);
1999 * See whether we can reuse the result of a previous
2003 old_copy
= src_object
->copy
;
2004 if (old_copy
!= VM_OBJECT_NULL
) {
2006 * Try to get the locks (out of order)
2008 if (!vm_object_lock_try(old_copy
)) {
2009 vm_object_unlock(src_object
);
2012 /* Heisenberg Rules */
2013 copy_delayed_lock_collisions
++;
2014 if (collisions
++ == 0)
2015 copy_delayed_lock_contention
++;
2017 if (collisions
> copy_delayed_max_collisions
)
2018 copy_delayed_max_collisions
= collisions
;
2020 vm_object_lock(src_object
);
2025 * Determine whether the old copy object has
2029 if (old_copy
->resident_page_count
== 0 &&
2030 !old_copy
->pager_created
) {
2032 * It has not been modified.
2034 * Return another reference to
2035 * the existing copy-object if
2036 * we can safely grow it (if
2040 if (old_copy
->size
< copy_size
) {
2042 * We can't perform a delayed copy if any of the
2043 * pages in the extended range are wired (because
2044 * we can't safely take write permission away from
2045 * wired pages). If the pages aren't wired, then
2046 * go ahead and protect them.
2048 copy_delayed_protect_iterate
++;
2049 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2050 if (!p
->fictitious
&&
2051 p
->offset
>= old_copy
->size
&&
2052 p
->offset
< copy_size
) {
2053 if (p
->wire_count
> 0) {
2054 vm_object_unlock(old_copy
);
2055 vm_object_unlock(src_object
);
2057 if (new_copy
!= VM_OBJECT_NULL
) {
2058 vm_object_unlock(new_copy
);
2059 vm_object_deallocate(new_copy
);
2062 return VM_OBJECT_NULL
;
2064 pmap_page_protect(p
->phys_page
,
2065 (VM_PROT_ALL
& ~VM_PROT_WRITE
&
2070 old_copy
->size
= copy_size
;
2073 vm_object_reference_locked(old_copy
);
2074 vm_object_unlock(old_copy
);
2075 vm_object_unlock(src_object
);
2077 if (new_copy
!= VM_OBJECT_NULL
) {
2078 vm_object_unlock(new_copy
);
2079 vm_object_deallocate(new_copy
);
2086 * Adjust the size argument so that the newly-created
2087 * copy object will be large enough to back either the
2088 * old copy object or the new mapping.
2090 if (old_copy
->size
> copy_size
)
2091 copy_size
= old_copy
->size
;
2093 if (new_copy
== VM_OBJECT_NULL
) {
2094 vm_object_unlock(old_copy
);
2095 vm_object_unlock(src_object
);
2096 new_copy
= vm_object_allocate(copy_size
);
2097 vm_object_lock(src_object
);
2098 vm_object_lock(new_copy
);
2101 new_copy
->size
= copy_size
;
2104 * The copy-object is always made large enough to
2105 * completely shadow the original object, since
2106 * it may have several users who want to shadow
2107 * the original object at different points.
2110 assert((old_copy
->shadow
== src_object
) &&
2111 (old_copy
->shadow_offset
== (vm_object_offset_t
) 0));
2113 } else if (new_copy
== VM_OBJECT_NULL
) {
2114 vm_object_unlock(src_object
);
2115 new_copy
= vm_object_allocate(copy_size
);
2116 vm_object_lock(src_object
);
2117 vm_object_lock(new_copy
);
2122 * We now have the src object locked, and the new copy object
2123 * allocated and locked (and potentially the old copy locked).
2124 * Before we go any further, make sure we can still perform
2125 * a delayed copy, as the situation may have changed.
2127 * Specifically, we can't perform a delayed copy if any of the
2128 * pages in the range are wired (because we can't safely take
2129 * write permission away from wired pages). If the pages aren't
2130 * wired, then go ahead and protect them.
2132 copy_delayed_protect_iterate
++;
2133 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2134 if (!p
->fictitious
&& p
->offset
< copy_size
) {
2135 if (p
->wire_count
> 0) {
2137 vm_object_unlock(old_copy
);
2138 vm_object_unlock(src_object
);
2139 vm_object_unlock(new_copy
);
2140 vm_object_deallocate(new_copy
);
2141 return VM_OBJECT_NULL
;
2143 pmap_page_protect(p
->phys_page
,
2144 (VM_PROT_ALL
& ~VM_PROT_WRITE
&
2150 if (old_copy
!= VM_OBJECT_NULL
) {
2152 * Make the old copy-object shadow the new one.
2153 * It will receive no more pages from the original
2157 src_object
->ref_count
--; /* remove ref. from old_copy */
2158 assert(src_object
->ref_count
> 0);
2159 old_copy
->shadow
= new_copy
;
2160 assert(new_copy
->ref_count
> 0);
2161 new_copy
->ref_count
++; /* for old_copy->shadow ref. */
2164 if (old_copy
->res_count
) {
2165 VM_OBJ_RES_INCR(new_copy
);
2166 VM_OBJ_RES_DECR(src_object
);
2170 vm_object_unlock(old_copy
); /* done with old_copy */
2174 * Point the new copy at the existing object.
2176 new_copy
->shadow
= src_object
;
2177 new_copy
->shadow_offset
= 0;
2178 new_copy
->shadowed
= TRUE
; /* caller must set needs_copy */
2179 assert(src_object
->ref_count
> 0);
2180 src_object
->ref_count
++;
2181 VM_OBJ_RES_INCR(src_object
);
2182 src_object
->copy
= new_copy
;
2183 vm_object_unlock(src_object
);
2184 vm_object_unlock(new_copy
);
2187 "vm_object_copy_delayed: used copy object %X for source %X\n",
2188 (integer_t
)new_copy
, (integer_t
)src_object
, 0, 0, 0);
2194 * Routine: vm_object_copy_strategically
2197 * Perform a copy according to the source object's
2198 * declared strategy. This operation may block,
2199 * and may be interrupted.
2201 __private_extern__ kern_return_t
2202 vm_object_copy_strategically(
2203 register vm_object_t src_object
,
2204 vm_object_offset_t src_offset
,
2205 vm_object_size_t size
,
2206 vm_object_t
*dst_object
, /* OUT */
2207 vm_object_offset_t
*dst_offset
, /* OUT */
2208 boolean_t
*dst_needs_copy
) /* OUT */
2211 boolean_t interruptible
= THREAD_ABORTSAFE
; /* XXX */
2212 memory_object_copy_strategy_t copy_strategy
;
2214 assert(src_object
!= VM_OBJECT_NULL
);
2216 vm_object_lock(src_object
);
2219 * The copy strategy is only valid if the memory manager
2220 * is "ready". Internal objects are always ready.
2223 while (!src_object
->internal
&& !src_object
->pager_ready
) {
2224 wait_result_t wait_result
;
2226 wait_result
= vm_object_sleep( src_object
,
2227 VM_OBJECT_EVENT_PAGER_READY
,
2229 if (wait_result
!= THREAD_AWAKENED
) {
2230 vm_object_unlock(src_object
);
2231 *dst_object
= VM_OBJECT_NULL
;
2233 *dst_needs_copy
= FALSE
;
2234 return(MACH_SEND_INTERRUPTED
);
2238 copy_strategy
= src_object
->copy_strategy
;
2241 * Use the appropriate copy strategy.
2244 switch (copy_strategy
) {
2245 case MEMORY_OBJECT_COPY_DELAY
:
2246 *dst_object
= vm_object_copy_delayed(src_object
,
2248 if (*dst_object
!= VM_OBJECT_NULL
) {
2249 *dst_offset
= src_offset
;
2250 *dst_needs_copy
= TRUE
;
2251 result
= KERN_SUCCESS
;
2254 vm_object_lock(src_object
);
2255 /* fall thru when delayed copy not allowed */
2257 case MEMORY_OBJECT_COPY_NONE
:
2258 result
= vm_object_copy_slowly(src_object
, src_offset
, size
,
2259 interruptible
, dst_object
);
2260 if (result
== KERN_SUCCESS
) {
2262 *dst_needs_copy
= FALSE
;
2266 case MEMORY_OBJECT_COPY_CALL
:
2267 result
= vm_object_copy_call(src_object
, src_offset
, size
,
2269 if (result
== KERN_SUCCESS
) {
2270 *dst_offset
= src_offset
;
2271 *dst_needs_copy
= TRUE
;
2275 case MEMORY_OBJECT_COPY_SYMMETRIC
:
2276 XPR(XPR_VM_OBJECT
, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t
)src_object
, src_offset
, size
, 0, 0);
2277 vm_object_unlock(src_object
);
2278 result
= KERN_MEMORY_RESTART_COPY
;
2282 panic("copy_strategically: bad strategy");
2283 result
= KERN_INVALID_ARGUMENT
;
2291 * Create a new object which is backed by the
2292 * specified existing object range. The source
2293 * object reference is deallocated.
2295 * The new object and offset into that object
2296 * are returned in the source parameters.
2298 boolean_t vm_object_shadow_check
= FALSE
;
2300 __private_extern__ boolean_t
2302 vm_object_t
*object
, /* IN/OUT */
2303 vm_object_offset_t
*offset
, /* IN/OUT */
2304 vm_object_size_t length
)
2306 register vm_object_t source
;
2307 register vm_object_t result
;
2310 assert(source
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
2313 * Determine if we really need a shadow.
2316 if (vm_object_shadow_check
&& source
->ref_count
== 1 &&
2317 (source
->shadow
== VM_OBJECT_NULL
||
2318 source
->shadow
->copy
== VM_OBJECT_NULL
))
2320 source
->shadowed
= FALSE
;
2325 * Allocate a new object with the given length
2328 if ((result
= vm_object_allocate(length
)) == VM_OBJECT_NULL
)
2329 panic("vm_object_shadow: no object for shadowing");
2332 * The new object shadows the source object, adding
2333 * a reference to it. Our caller changes his reference
2334 * to point to the new object, removing a reference to
2335 * the source object. Net result: no change of reference
2338 result
->shadow
= source
;
2341 * Store the offset into the source object,
2342 * and fix up the offset into the new object.
2345 result
->shadow_offset
= *offset
;
2348 * Return the new things
2357 * The relationship between vm_object structures and
2358 * the memory_object requires careful synchronization.
2360 * All associations are created by memory_object_create_named
2361 * for external pagers and vm_object_pager_create for internal
2362 * objects as follows:
2364 * pager: the memory_object itself, supplied by
2365 * the user requesting a mapping (or the kernel,
2366 * when initializing internal objects); the
2367 * kernel simulates holding send rights by keeping
2371 * the memory object control port,
2372 * created by the kernel; the kernel holds
2373 * receive (and ownership) rights to this
2374 * port, but no other references.
2376 * When initialization is complete, the "initialized" field
2377 * is asserted. Other mappings using a particular memory object,
2378 * and any references to the vm_object gained through the
2379 * port association must wait for this initialization to occur.
2381 * In order to allow the memory manager to set attributes before
2382 * requests (notably virtual copy operations, but also data or
2383 * unlock requests) are made, a "ready" attribute is made available.
2384 * Only the memory manager may affect the value of this attribute.
2385 * Its value does not affect critical kernel functions, such as
2386 * internal object initialization or destruction. [Furthermore,
2387 * memory objects created by the kernel are assumed to be ready
2388 * immediately; the default memory manager need not explicitly
2389 * set the "ready" attribute.]
2391 * [Both the "initialized" and "ready" attribute wait conditions
2392 * use the "pager" field as the wait event.]
2394 * The port associations can be broken down by any of the
2395 * following routines:
2396 * vm_object_terminate:
2397 * No references to the vm_object remain, and
2398 * the object cannot (or will not) be cached.
2399 * This is the normal case, and is done even
2400 * though one of the other cases has already been
2402 * memory_object_destroy:
2403 * The memory manager has requested that the
2404 * kernel relinquish references to the memory
2405 * object. [The memory manager may not want to
2406 * destroy the memory object, but may wish to
2407 * refuse or tear down existing memory mappings.]
2409 * Each routine that breaks an association must break all of
2410 * them at once. At some later time, that routine must clear
2411 * the pager field and release the memory object references.
2412 * [Furthermore, each routine must cope with the simultaneous
2413 * or previous operations of the others.]
2415 * In addition to the lock on the object, the vm_object_cache_lock
2416 * governs the associations. References gained through the
2417 * association require use of the cache lock.
2419 * Because the pager field may be cleared spontaneously, it
2420 * cannot be used to determine whether a memory object has
2421 * ever been associated with a particular vm_object. [This
2422 * knowledge is important to the shadow object mechanism.]
2423 * For this reason, an additional "created" attribute is
2426 * During various paging operations, the pager reference found in the
2427 * vm_object must be valid. To prevent this from being released,
2428 * (other than being removed, i.e., made null), routines may use
2429 * the vm_object_paging_begin/end routines [actually, macros].
2430 * The implementation uses the "paging_in_progress" and "wanted" fields.
2431 * [Operations that alter the validity of the pager values include the
2432 * termination routines and vm_object_collapse.]
2436 static void vm_object_abort_activity(
2437 vm_object_t object
);
2440 * Routine: vm_object_abort_activity [internal use only]
2442 * Abort paging requests pending on this object.
2443 * In/out conditions:
2444 * The object is locked on entry and exit.
2447 vm_object_abort_activity(
2454 XPR(XPR_VM_OBJECT
, "vm_object_abort_activity, object 0x%X\n",
2455 (integer_t
)object
, 0, 0, 0, 0);
2458 * Abort all activity that would be waiting
2459 * for a result on this memory object.
2461 * We could also choose to destroy all pages
2462 * that we have in memory for this object, but
2466 p
= (vm_page_t
) queue_first(&object
->memq
);
2467 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
2468 next
= (vm_page_t
) queue_next(&p
->listq
);
2471 * If it's being paged in, destroy it.
2472 * If an unlock has been requested, start it again.
2475 if (p
->busy
&& p
->absent
) {
2479 if (p
->unlock_request
!= VM_PROT_NONE
)
2480 p
->unlock_request
= VM_PROT_NONE
;
2488 * Wake up threads waiting for the memory object to
2492 object
->pager_ready
= TRUE
;
2493 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
2497 * Routine: vm_object_pager_dead
2500 * A port is being destroy, and the IPC kobject code
2501 * can't tell if it represents a pager port or not.
2502 * So this function is called each time it sees a port
2504 * THIS IS HORRIBLY INEFFICIENT. We should only call
2505 * this routine if we had requested a notification on
2509 __private_extern__
void
2510 vm_object_pager_dead(
2514 vm_object_hash_entry_t entry
;
2517 * Perform essentially the same operations as in vm_object_lookup,
2518 * except that this time we look up based on the memory_object
2519 * port, not the control port.
2521 vm_object_cache_lock();
2522 entry
= vm_object_hash_lookup(pager
, FALSE
);
2523 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
||
2524 entry
->object
== VM_OBJECT_NULL
) {
2525 vm_object_cache_unlock();
2529 object
= entry
->object
;
2530 entry
->object
= VM_OBJECT_NULL
;
2532 vm_object_lock(object
);
2533 if (object
->ref_count
== 0) {
2534 XPR(XPR_VM_OBJECT_CACHE
,
2535 "vm_object_destroy: removing %x from cache, head (%x, %x)\n",
2537 (integer_t
)vm_object_cached_list
.next
,
2538 (integer_t
)vm_object_cached_list
.prev
, 0,0);
2540 queue_remove(&vm_object_cached_list
, object
,
2541 vm_object_t
, cached_list
);
2542 vm_object_cached_count
--;
2544 object
->ref_count
++;
2545 vm_object_res_reference(object
);
2547 object
->can_persist
= FALSE
;
2549 assert(object
->pager
== pager
);
2552 * Remove the pager association.
2554 * Note that the memory_object itself is dead, so
2555 * we don't bother with it.
2558 object
->pager
= MEMORY_OBJECT_NULL
;
2560 vm_object_unlock(object
);
2561 vm_object_cache_unlock();
2563 vm_object_pager_wakeup(pager
);
2566 * Release the pager reference. Note that there's no
2567 * point in trying the memory_object_terminate call
2568 * because the memory_object itself is dead. Also
2569 * release the memory_object_control reference, since
2570 * the pager didn't do that either.
2573 memory_object_deallocate(pager
);
2574 memory_object_control_deallocate(object
->pager_request
);
2578 * Restart pending page requests
2580 vm_object_lock(object
);
2581 vm_object_abort_activity(object
);
2582 vm_object_unlock(object
);
2585 * Lose the object reference.
2588 vm_object_deallocate(object
);
2593 * Routine: vm_object_enter
2595 * Find a VM object corresponding to the given
2596 * pager; if no such object exists, create one,
2597 * and initialize the pager.
2601 memory_object_t pager
,
2602 vm_object_size_t size
,
2607 register vm_object_t object
;
2608 vm_object_t new_object
;
2609 boolean_t must_init
;
2610 vm_object_hash_entry_t entry
, new_entry
;
2612 if (pager
== MEMORY_OBJECT_NULL
)
2613 return(vm_object_allocate(size
));
2615 new_object
= VM_OBJECT_NULL
;
2616 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2620 * Look for an object associated with this port.
2623 vm_object_cache_lock();
2625 entry
= vm_object_hash_lookup(pager
, FALSE
);
2627 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
) {
2628 if (new_object
== VM_OBJECT_NULL
) {
2630 * We must unlock to create a new object;
2631 * if we do so, we must try the lookup again.
2633 vm_object_cache_unlock();
2634 assert(new_entry
== VM_OBJECT_HASH_ENTRY_NULL
);
2635 new_entry
= vm_object_hash_entry_alloc(pager
);
2636 new_object
= vm_object_allocate(size
);
2637 vm_object_cache_lock();
2640 * Lookup failed twice, and we have something
2641 * to insert; set the object.
2643 vm_object_hash_insert(new_entry
);
2645 entry
->object
= new_object
;
2646 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2647 new_object
= VM_OBJECT_NULL
;
2650 } else if (entry
->object
== VM_OBJECT_NULL
) {
2652 * If a previous object is being terminated,
2653 * we must wait for the termination message
2654 * to be queued (and lookup the entry again).
2656 entry
->waiting
= TRUE
;
2657 entry
= VM_OBJECT_HASH_ENTRY_NULL
;
2658 assert_wait((event_t
) pager
, THREAD_UNINT
);
2659 vm_object_cache_unlock();
2660 thread_block(THREAD_CONTINUE_NULL
);
2661 vm_object_cache_lock();
2663 } while (entry
== VM_OBJECT_HASH_ENTRY_NULL
);
2665 object
= entry
->object
;
2666 assert(object
!= VM_OBJECT_NULL
);
2669 vm_object_lock(object
);
2670 assert(!internal
|| object
->internal
);
2672 assert(!object
->named
);
2673 object
->named
= TRUE
;
2675 if (object
->ref_count
== 0) {
2676 XPR(XPR_VM_OBJECT_CACHE
,
2677 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
2679 (integer_t
)vm_object_cached_list
.next
,
2680 (integer_t
)vm_object_cached_list
.prev
, 0,0);
2681 queue_remove(&vm_object_cached_list
, object
,
2682 vm_object_t
, cached_list
);
2683 vm_object_cached_count
--;
2685 object
->ref_count
++;
2686 vm_object_res_reference(object
);
2687 vm_object_unlock(object
);
2691 assert(object
->ref_count
> 0);
2695 vm_object_cache_unlock();
2698 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
2699 (integer_t
)pager
, (integer_t
)object
, must_init
, 0, 0);
2702 * If we raced to create a vm_object but lost, let's
2706 if (new_object
!= VM_OBJECT_NULL
)
2707 vm_object_deallocate(new_object
);
2709 if (new_entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
2710 vm_object_hash_entry_free(new_entry
);
2713 memory_object_control_t control
;
2716 * Allocate request port.
2719 control
= memory_object_control_allocate(object
);
2720 assert (control
!= MEMORY_OBJECT_CONTROL_NULL
);
2722 vm_object_lock(object
);
2723 assert(object
!= kernel_object
);
2726 * Copy the reference we were given.
2729 memory_object_reference(pager
);
2730 object
->pager_created
= TRUE
;
2731 object
->pager
= pager
;
2732 object
->internal
= internal
;
2733 object
->pager_trusted
= internal
;
2735 /* copy strategy invalid until set by memory manager */
2736 object
->copy_strategy
= MEMORY_OBJECT_COPY_INVALID
;
2738 object
->pager_control
= control
;
2739 object
->pager_ready
= FALSE
;
2741 vm_object_unlock(object
);
2744 * Let the pager know we're using it.
2747 (void) memory_object_init(pager
,
2748 object
->pager_control
,
2751 vm_object_lock(object
);
2753 object
->named
= TRUE
;
2755 object
->pager_ready
= TRUE
;
2756 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
2759 object
->pager_initialized
= TRUE
;
2760 vm_object_wakeup(object
, VM_OBJECT_EVENT_INITIALIZED
);
2762 vm_object_lock(object
);
2766 * [At this point, the object must be locked]
2770 * Wait for the work above to be done by the first
2771 * thread to map this object.
2774 while (!object
->pager_initialized
) {
2775 vm_object_sleep(object
,
2776 VM_OBJECT_EVENT_INITIALIZED
,
2779 vm_object_unlock(object
);
2782 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
2783 (integer_t
)object
, (integer_t
)object
->pager
, internal
, 0,0);
2788 * Routine: vm_object_pager_create
2790 * Create a memory object for an internal object.
2791 * In/out conditions:
2792 * The object is locked on entry and exit;
2793 * it may be unlocked within this call.
2795 * Only one thread may be performing a
2796 * vm_object_pager_create on an object at
2797 * a time. Presumably, only the pageout
2798 * daemon will be using this routine.
2802 vm_object_pager_create(
2803 register vm_object_t object
)
2805 memory_object_t pager
;
2806 vm_object_hash_entry_t entry
;
2808 vm_object_size_t size
;
2809 vm_external_map_t map
;
2810 #endif /* MACH_PAGEMAP */
2812 XPR(XPR_VM_OBJECT
, "vm_object_pager_create, object 0x%X\n",
2813 (integer_t
)object
, 0,0,0,0);
2815 assert(object
!= kernel_object
);
2817 if (memory_manager_default_check() != KERN_SUCCESS
)
2821 * Prevent collapse or termination by holding a paging reference
2824 vm_object_paging_begin(object
);
2825 if (object
->pager_created
) {
2827 * Someone else got to it first...
2828 * wait for them to finish initializing the ports
2830 while (!object
->pager_initialized
) {
2831 vm_object_sleep(object
,
2832 VM_OBJECT_EVENT_INITIALIZED
,
2835 vm_object_paging_end(object
);
2840 * Indicate that a memory object has been assigned
2841 * before dropping the lock, to prevent a race.
2844 object
->pager_created
= TRUE
;
2845 object
->paging_offset
= 0;
2848 size
= object
->size
;
2849 #endif /* MACH_PAGEMAP */
2850 vm_object_unlock(object
);
2853 map
= vm_external_create(size
);
2854 vm_object_lock(object
);
2855 assert(object
->size
== size
);
2856 object
->existence_map
= map
;
2857 vm_object_unlock(object
);
2858 #endif /* MACH_PAGEMAP */
2861 * Create the [internal] pager, and associate it with this object.
2863 * We make the association here so that vm_object_enter()
2864 * can look up the object to complete initializing it. No
2865 * user will ever map this object.
2868 memory_object_default_t dmm
;
2869 vm_size_t cluster_size
;
2871 /* acquire a reference for the default memory manager */
2872 dmm
= memory_manager_default_reference(&cluster_size
);
2873 assert(cluster_size
>= PAGE_SIZE
);
2875 object
->cluster_size
= cluster_size
; /* XXX ??? */
2876 assert(object
->temporary
);
2878 /* create our new memory object */
2879 (void) memory_object_create(dmm
, object
->size
, &pager
);
2881 memory_object_default_deallocate(dmm
);
2884 entry
= vm_object_hash_entry_alloc(pager
);
2886 vm_object_cache_lock();
2887 vm_object_hash_insert(entry
);
2889 entry
->object
= object
;
2890 vm_object_cache_unlock();
2893 * A reference was returned by
2894 * memory_object_create(), and it is
2895 * copied by vm_object_enter().
2898 if (vm_object_enter(pager
, object
->size
, TRUE
, TRUE
, FALSE
) != object
)
2899 panic("vm_object_pager_create: mismatch");
2902 * Drop the reference we were passed.
2904 memory_object_deallocate(pager
);
2906 vm_object_lock(object
);
2909 * Release the paging reference
2911 vm_object_paging_end(object
);
2915 * Routine: vm_object_remove
2917 * Eliminate the pager/object association
2920 * The object cache must be locked.
2922 __private_extern__
void
2926 memory_object_t pager
;
2928 if ((pager
= object
->pager
) != MEMORY_OBJECT_NULL
) {
2929 vm_object_hash_entry_t entry
;
2931 entry
= vm_object_hash_lookup(pager
, FALSE
);
2932 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
2933 entry
->object
= VM_OBJECT_NULL
;
2939 * Global variables for vm_object_collapse():
2941 * Counts for normal collapses and bypasses.
2942 * Debugging variables, to watch or disable collapse.
2944 static long object_collapses
= 0;
2945 static long object_bypasses
= 0;
2947 static boolean_t vm_object_collapse_allowed
= TRUE
;
2948 static boolean_t vm_object_bypass_allowed
= TRUE
;
2950 static int vm_external_discarded
;
2951 static int vm_external_collapsed
;
2953 unsigned long vm_object_collapse_encrypted
= 0;
2956 * Routine: vm_object_do_collapse
2958 * Collapse an object with the object backing it.
2959 * Pages in the backing object are moved into the
2960 * parent, and the backing object is deallocated.
2962 * Both objects and the cache are locked; the page
2963 * queues are unlocked.
2967 vm_object_do_collapse(
2969 vm_object_t backing_object
)
2972 vm_object_offset_t new_offset
, backing_offset
;
2973 vm_object_size_t size
;
2975 backing_offset
= object
->shadow_offset
;
2976 size
= object
->size
;
2979 * Move all in-memory pages from backing_object
2980 * to the parent. Pages that have been paged out
2981 * will be overwritten by any of the parent's
2982 * pages that shadow them.
2985 while (!queue_empty(&backing_object
->memq
)) {
2987 p
= (vm_page_t
) queue_first(&backing_object
->memq
);
2989 new_offset
= (p
->offset
- backing_offset
);
2991 assert(!p
->busy
|| p
->absent
);
2994 * If the parent has a page here, or if
2995 * this page falls outside the parent,
2998 * Otherwise, move it as planned.
3001 if (p
->offset
< backing_offset
|| new_offset
>= size
) {
3006 * The encryption key includes the "pager" and the
3007 * "paging_offset". These might not be the same in
3008 * the new object, so we can't just move an encrypted
3009 * page from one object to the other. We can't just
3010 * decrypt the page here either, because that would drop
3012 * The caller should check for encrypted pages before
3013 * attempting to collapse.
3015 ASSERT_PAGE_DECRYPTED(p
);
3017 pp
= vm_page_lookup(object
, new_offset
);
3018 if (pp
== VM_PAGE_NULL
) {
3021 * Parent now has no page.
3022 * Move the backing object's page up.
3025 vm_page_rename(p
, object
, new_offset
);
3027 } else if (pp
->absent
) {
3030 * Parent has an absent page...
3031 * it's not being paged in, so
3032 * it must really be missing from
3035 * Throw out the absent page...
3036 * any faults looking for that
3037 * page will restart with the new
3042 vm_page_rename(p
, object
, new_offset
);
3043 #endif /* MACH_PAGEMAP */
3045 assert(! pp
->absent
);
3048 * Parent object has a real page.
3049 * Throw away the backing object's
3058 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
3059 || (!backing_object
->pager_created
3060 && backing_object
->pager
== MEMORY_OBJECT_NULL
));
3062 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
);
3063 #endif /* !MACH_PAGEMAP */
3065 if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
3066 vm_object_hash_entry_t entry
;
3069 * Move the pager from backing_object to object.
3071 * XXX We're only using part of the paging space
3072 * for keeps now... we ought to discard the
3076 assert(!object
->paging_in_progress
);
3077 object
->pager
= backing_object
->pager
;
3078 entry
= vm_object_hash_lookup(object
->pager
, FALSE
);
3079 assert(entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
3080 entry
->object
= object
;
3081 object
->pager_created
= backing_object
->pager_created
;
3082 object
->pager_control
= backing_object
->pager_control
;
3083 object
->pager_ready
= backing_object
->pager_ready
;
3084 object
->pager_initialized
= backing_object
->pager_initialized
;
3085 object
->cluster_size
= backing_object
->cluster_size
;
3086 object
->paging_offset
=
3087 backing_object
->paging_offset
+ backing_offset
;
3088 if (object
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
3089 memory_object_control_collapse(object
->pager_control
,
3094 vm_object_cache_unlock();
3098 * If the shadow offset is 0, the use the existence map from
3099 * the backing object if there is one. If the shadow offset is
3100 * not zero, toss it.
3102 * XXX - If the shadow offset is not 0 then a bit copy is needed
3103 * if the map is to be salvaged. For now, we just just toss the
3104 * old map, giving the collapsed object no map. This means that
3105 * the pager is invoked for zero fill pages. If analysis shows
3106 * that this happens frequently and is a performance hit, then
3107 * this code should be fixed to salvage the map.
3109 assert(object
->existence_map
== VM_EXTERNAL_NULL
);
3110 if (backing_offset
|| (size
!= backing_object
->size
)) {
3111 vm_external_discarded
++;
3112 vm_external_destroy(backing_object
->existence_map
,
3113 backing_object
->size
);
3116 vm_external_collapsed
++;
3117 object
->existence_map
= backing_object
->existence_map
;
3119 backing_object
->existence_map
= VM_EXTERNAL_NULL
;
3120 #endif /* MACH_PAGEMAP */
3123 * Object now shadows whatever backing_object did.
3124 * Note that the reference to backing_object->shadow
3125 * moves from within backing_object to within object.
3128 assert(!object
->phys_contiguous
);
3129 assert(!backing_object
->phys_contiguous
);
3130 object
->shadow
= backing_object
->shadow
;
3131 if (object
->shadow
) {
3132 object
->shadow_offset
+= backing_object
->shadow_offset
;
3134 /* no shadow, therefore no shadow offset... */
3135 object
->shadow_offset
= 0;
3137 assert((object
->shadow
== VM_OBJECT_NULL
) ||
3138 (object
->shadow
->copy
!= backing_object
));
3141 * Discard backing_object.
3143 * Since the backing object has no pages, no
3144 * pager left, and no object references within it,
3145 * all that is necessary is to dispose of it.
3148 assert((backing_object
->ref_count
== 1) &&
3149 (backing_object
->resident_page_count
== 0) &&
3150 (backing_object
->paging_in_progress
== 0));
3152 backing_object
->alive
= FALSE
;
3153 vm_object_unlock(backing_object
);
3155 XPR(XPR_VM_OBJECT
, "vm_object_collapse, collapsed 0x%X\n",
3156 (integer_t
)backing_object
, 0,0,0,0);
3158 zfree(vm_object_zone
, backing_object
);
3164 vm_object_do_bypass(
3166 vm_object_t backing_object
)
3169 * Make the parent shadow the next object
3175 * Do object reference in-line to
3176 * conditionally increment shadow's
3177 * residence count. If object is not
3178 * resident, leave residence count
3181 if (backing_object
->shadow
!= VM_OBJECT_NULL
) {
3182 vm_object_lock(backing_object
->shadow
);
3183 backing_object
->shadow
->ref_count
++;
3184 if (object
->res_count
!= 0)
3185 vm_object_res_reference(backing_object
->shadow
);
3186 vm_object_unlock(backing_object
->shadow
);
3188 #else /* TASK_SWAPPER */
3189 vm_object_reference(backing_object
->shadow
);
3190 #endif /* TASK_SWAPPER */
3192 assert(!object
->phys_contiguous
);
3193 assert(!backing_object
->phys_contiguous
);
3194 object
->shadow
= backing_object
->shadow
;
3195 if (object
->shadow
) {
3196 object
->shadow_offset
+= backing_object
->shadow_offset
;
3198 /* no shadow, therefore no shadow offset... */
3199 object
->shadow_offset
= 0;
3203 * Backing object might have had a copy pointer
3204 * to us. If it did, clear it.
3206 if (backing_object
->copy
== object
) {
3207 backing_object
->copy
= VM_OBJECT_NULL
;
3211 * Drop the reference count on backing_object.
3213 * Since its ref_count was at least 2, it
3214 * will not vanish; so we don't need to call
3215 * vm_object_deallocate.
3216 * [FBDP: that doesn't seem to be true any more]
3218 * The res_count on the backing object is
3219 * conditionally decremented. It's possible
3220 * (via vm_pageout_scan) to get here with
3221 * a "swapped" object, which has a 0 res_count,
3222 * in which case, the backing object res_count
3223 * is already down by one.
3225 * Don't call vm_object_deallocate unless
3226 * ref_count drops to zero.
3228 * The ref_count can drop to zero here if the
3229 * backing object could be bypassed but not
3230 * collapsed, such as when the backing object
3231 * is temporary and cachable.
3234 if (backing_object
->ref_count
> 1) {
3235 backing_object
->ref_count
--;
3237 if (object
->res_count
!= 0)
3238 vm_object_res_deallocate(backing_object
);
3239 assert(backing_object
->ref_count
> 0);
3240 #endif /* TASK_SWAPPER */
3241 vm_object_unlock(backing_object
);
3245 * Drop locks so that we can deallocate
3246 * the backing object.
3250 if (object
->res_count
== 0) {
3251 /* XXX get a reference for the deallocate below */
3252 vm_object_res_reference(backing_object
);
3254 #endif /* TASK_SWAPPER */
3255 vm_object_unlock(object
);
3256 vm_object_unlock(backing_object
);
3257 vm_object_deallocate(backing_object
);
3260 * Relock object. We don't have to reverify
3261 * its state since vm_object_collapse will
3262 * do that for us as it starts at the
3266 vm_object_lock(object
);
3274 * vm_object_collapse:
3276 * Perform an object collapse or an object bypass if appropriate.
3277 * The real work of collapsing and bypassing is performed in
3278 * the routines vm_object_do_collapse and vm_object_do_bypass.
3280 * Requires that the object be locked and the page queues be unlocked.
3283 static unsigned long vm_object_collapse_calls
= 0;
3284 static unsigned long vm_object_collapse_objects
= 0;
3285 static unsigned long vm_object_collapse_do_collapse
= 0;
3286 static unsigned long vm_object_collapse_do_bypass
= 0;
3287 __private_extern__
void
3289 register vm_object_t object
,
3290 register vm_object_offset_t hint_offset
)
3292 register vm_object_t backing_object
;
3293 register unsigned int rcount
;
3294 register unsigned int size
;
3295 vm_object_offset_t collapse_min_offset
;
3296 vm_object_offset_t collapse_max_offset
;
3298 vm_object_t original_object
;
3300 vm_object_collapse_calls
++;
3302 if (! vm_object_collapse_allowed
&& ! vm_object_bypass_allowed
) {
3306 XPR(XPR_VM_OBJECT
, "vm_object_collapse, obj 0x%X\n",
3307 (integer_t
)object
, 0,0,0,0);
3309 if (object
== VM_OBJECT_NULL
)
3312 original_object
= object
;
3315 vm_object_collapse_objects
++;
3317 * Verify that the conditions are right for either
3318 * collapse or bypass:
3322 * There is a backing object, and
3325 backing_object
= object
->shadow
;
3326 if (backing_object
== VM_OBJECT_NULL
) {
3327 if (object
!= original_object
) {
3328 vm_object_unlock(object
);
3334 * No pages in the object are currently
3335 * being paged out, and
3337 if (object
->paging_in_progress
!= 0 ||
3338 object
->absent_count
!= 0) {
3339 /* try and collapse the rest of the shadow chain */
3340 vm_object_lock(backing_object
);
3341 if (object
!= original_object
) {
3342 vm_object_unlock(object
);
3344 object
= backing_object
;
3348 vm_object_lock(backing_object
);
3352 * The backing object is not read_only,
3353 * and no pages in the backing object are
3354 * currently being paged out.
3355 * The backing object is internal.
3359 if (!backing_object
->internal
||
3360 backing_object
->paging_in_progress
!= 0) {
3361 /* try and collapse the rest of the shadow chain */
3362 if (object
!= original_object
) {
3363 vm_object_unlock(object
);
3365 object
= backing_object
;
3370 * The backing object can't be a copy-object:
3371 * the shadow_offset for the copy-object must stay
3372 * as 0. Furthermore (for the 'we have all the
3373 * pages' case), if we bypass backing_object and
3374 * just shadow the next object in the chain, old
3375 * pages from that object would then have to be copied
3376 * BOTH into the (former) backing_object and into the
3379 if (backing_object
->shadow
!= VM_OBJECT_NULL
&&
3380 backing_object
->shadow
->copy
== backing_object
) {
3381 /* try and collapse the rest of the shadow chain */
3382 if (object
!= original_object
) {
3383 vm_object_unlock(object
);
3385 object
= backing_object
;
3390 * We can now try to either collapse the backing
3391 * object (if the parent is the only reference to
3392 * it) or (perhaps) remove the parent's reference
3395 * If there is exactly one reference to the backing
3396 * object, we may be able to collapse it into the
3399 * If MACH_PAGEMAP is defined:
3400 * The parent must not have a pager created for it,
3401 * since collapsing a backing_object dumps new pages
3402 * into the parent that its pager doesn't know about
3403 * (and the collapse code can't merge the existence
3406 * As long as one of the objects is still not known
3407 * to the pager, we can collapse them.
3409 if (backing_object
->ref_count
== 1 &&
3410 (!object
->pager_created
3412 || !backing_object
->pager_created
3413 #endif /*!MACH_PAGEMAP */
3414 ) && vm_object_collapse_allowed
) {
3417 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3418 (integer_t
)backing_object
, (integer_t
)object
,
3419 (integer_t
)backing_object
->pager
,
3420 (integer_t
)backing_object
->pager_control
, 0);
3423 * We need the cache lock for collapsing,
3424 * but we must not deadlock.
3427 if (! vm_object_cache_lock_try()) {
3428 if (object
!= original_object
) {
3429 vm_object_unlock(object
);
3431 vm_object_unlock(backing_object
);
3437 * We can't collapse the object if it contains
3438 * any encypted page, because the encryption key
3439 * includes the <object,offset> info. We can't
3440 * drop the object lock in vm_object_do_collapse()
3441 * so we can't decrypt the page there either.
3443 if (vm_pages_encrypted
) {
3444 collapse_min_offset
= object
->shadow_offset
;
3445 collapse_max_offset
=
3446 object
->shadow_offset
+ object
->size
;
3447 queue_iterate(&backing_object
->memq
,
3448 page
, vm_page_t
, listq
) {
3449 if (page
->encrypted
&&
3451 collapse_min_offset
) &&
3453 collapse_max_offset
)) {
3455 * We found an encrypted page
3456 * in the backing object,
3457 * within the range covered
3458 * by the parent object: we can
3459 * not collapse them.
3461 vm_object_collapse_encrypted
++;
3462 vm_object_cache_unlock();
3469 * Collapse the object with its backing
3470 * object, and try again with the object's
3471 * new backing object.
3474 vm_object_do_collapse(object
, backing_object
);
3475 vm_object_collapse_do_collapse
++;
3481 * Collapsing the backing object was not possible
3482 * or permitted, so let's try bypassing it.
3485 if (! vm_object_bypass_allowed
) {
3486 /* try and collapse the rest of the shadow chain */
3487 if (object
!= original_object
) {
3488 vm_object_unlock(object
);
3490 object
= backing_object
;
3496 * If the object doesn't have all its pages present,
3497 * we have to make sure no pages in the backing object
3498 * "show through" before bypassing it.
3500 size
= atop(object
->size
);
3501 rcount
= object
->resident_page_count
;
3502 if (rcount
!= size
) {
3503 vm_object_offset_t offset
;
3504 vm_object_offset_t backing_offset
;
3505 unsigned int backing_rcount
;
3506 unsigned int lookups
= 0;
3509 * If the backing object has a pager but no pagemap,
3510 * then we cannot bypass it, because we don't know
3511 * what pages it has.
3513 if (backing_object
->pager_created
3515 && (backing_object
->existence_map
== VM_EXTERNAL_NULL
)
3516 #endif /* MACH_PAGEMAP */
3518 /* try and collapse the rest of the shadow chain */
3519 if (object
!= original_object
) {
3520 vm_object_unlock(object
);
3522 object
= backing_object
;
3527 * If the object has a pager but no pagemap,
3528 * then we cannot bypass it, because we don't know
3529 * what pages it has.
3531 if (object
->pager_created
3533 && (object
->existence_map
== VM_EXTERNAL_NULL
)
3534 #endif /* MACH_PAGEMAP */
3536 /* try and collapse the rest of the shadow chain */
3537 if (object
!= original_object
) {
3538 vm_object_unlock(object
);
3540 object
= backing_object
;
3545 * If all of the pages in the backing object are
3546 * shadowed by the parent object, the parent
3547 * object no longer has to shadow the backing
3548 * object; it can shadow the next one in the
3551 * If the backing object has existence info,
3552 * we must check examine its existence info
3557 backing_offset
= object
->shadow_offset
;
3558 backing_rcount
= backing_object
->resident_page_count
;
3560 #define EXISTS_IN_OBJECT(obj, off, rc) \
3561 (vm_external_state_get((obj)->existence_map, \
3562 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3563 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3566 * Check the hint location first
3567 * (since it is often the quickest way out of here).
3569 if (object
->cow_hint
!= ~(vm_offset_t
)0)
3570 hint_offset
= (vm_object_offset_t
)object
->cow_hint
;
3572 hint_offset
= (hint_offset
> 8 * PAGE_SIZE_64
) ?
3573 (hint_offset
- 8 * PAGE_SIZE_64
) : 0;
3575 if (EXISTS_IN_OBJECT(backing_object
, hint_offset
+
3576 backing_offset
, backing_rcount
) &&
3577 !EXISTS_IN_OBJECT(object
, hint_offset
, rcount
)) {
3578 /* dependency right at the hint */
3579 object
->cow_hint
= (vm_offset_t
)hint_offset
;
3580 /* try and collapse the rest of the shadow chain */
3581 if (object
!= original_object
) {
3582 vm_object_unlock(object
);
3584 object
= backing_object
;
3589 * If the object's window onto the backing_object
3590 * is large compared to the number of resident
3591 * pages in the backing object, it makes sense to
3592 * walk the backing_object's resident pages first.
3594 * NOTE: Pages may be in both the existence map and
3595 * resident. So, we can't permanently decrement
3596 * the rcount here because the second loop may
3597 * find the same pages in the backing object'
3598 * existence map that we found here and we would
3599 * double-decrement the rcount. We also may or
3600 * may not have found the
3602 if (backing_rcount
&& size
>
3603 ((backing_object
->existence_map
) ?
3604 backing_rcount
: (backing_rcount
>> 1))) {
3605 unsigned int rc
= rcount
;
3608 backing_rcount
= backing_object
->resident_page_count
;
3609 p
= (vm_page_t
)queue_first(&backing_object
->memq
);
3611 /* Until we get more than one lookup lock */
3612 if (lookups
> 256) {
3617 offset
= (p
->offset
- backing_offset
);
3618 if (offset
< object
->size
&&
3619 offset
!= hint_offset
&&
3620 !EXISTS_IN_OBJECT(object
, offset
, rc
)) {
3621 /* found a dependency */
3622 object
->cow_hint
= (vm_offset_t
)offset
;
3625 p
= (vm_page_t
) queue_next(&p
->listq
);
3627 } while (--backing_rcount
);
3628 if (backing_rcount
!= 0 ) {
3629 /* try and collapse the rest of the shadow chain */
3630 if (object
!= original_object
) {
3631 vm_object_unlock(object
);
3633 object
= backing_object
;
3639 * Walk through the offsets looking for pages in the
3640 * backing object that show through to the object.
3642 if (backing_rcount
|| backing_object
->existence_map
) {
3643 offset
= hint_offset
;
3646 (offset
+ PAGE_SIZE_64
< object
->size
) ?
3647 (offset
+ PAGE_SIZE_64
) : 0) != hint_offset
) {
3649 /* Until we get more than one lookup lock */
3650 if (lookups
> 256) {
3655 if (EXISTS_IN_OBJECT(backing_object
, offset
+
3656 backing_offset
, backing_rcount
) &&
3657 !EXISTS_IN_OBJECT(object
, offset
, rcount
)) {
3658 /* found a dependency */
3659 object
->cow_hint
= (vm_offset_t
)offset
;
3663 if (offset
!= hint_offset
) {
3664 /* try and collapse the rest of the shadow chain */
3665 if (object
!= original_object
) {
3666 vm_object_unlock(object
);
3668 object
= backing_object
;
3674 /* reset the offset hint for any objects deeper in the chain */
3675 object
->cow_hint
= (vm_offset_t
)0;
3678 * All interesting pages in the backing object
3679 * already live in the parent or its pager.
3680 * Thus we can bypass the backing object.
3683 vm_object_do_bypass(object
, backing_object
);
3684 vm_object_collapse_do_bypass
++;
3687 * Try again with this object's new backing object.
3693 if (object
!= original_object
) {
3694 vm_object_unlock(object
);
3699 * Routine: vm_object_page_remove: [internal]
3701 * Removes all physical pages in the specified
3702 * object range from the object's list of pages.
3704 * In/out conditions:
3705 * The object must be locked.
3706 * The object must not have paging_in_progress, usually
3707 * guaranteed by not having a pager.
3709 unsigned int vm_object_page_remove_lookup
= 0;
3710 unsigned int vm_object_page_remove_iterate
= 0;
3712 __private_extern__
void
3713 vm_object_page_remove(
3714 register vm_object_t object
,
3715 register vm_object_offset_t start
,
3716 register vm_object_offset_t end
)
3718 register vm_page_t p
, next
;
3721 * One and two page removals are most popular.
3722 * The factor of 16 here is somewhat arbitrary.
3723 * It balances vm_object_lookup vs iteration.
3726 if (atop_64(end
- start
) < (unsigned)object
->resident_page_count
/16) {
3727 vm_object_page_remove_lookup
++;
3729 for (; start
< end
; start
+= PAGE_SIZE_64
) {
3730 p
= vm_page_lookup(object
, start
);
3731 if (p
!= VM_PAGE_NULL
) {
3732 assert(!p
->cleaning
&& !p
->pageout
);
3734 pmap_disconnect(p
->phys_page
);
3739 vm_object_page_remove_iterate
++;
3741 p
= (vm_page_t
) queue_first(&object
->memq
);
3742 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
3743 next
= (vm_page_t
) queue_next(&p
->listq
);
3744 if ((start
<= p
->offset
) && (p
->offset
< end
)) {
3745 assert(!p
->cleaning
&& !p
->pageout
);
3747 pmap_disconnect(p
->phys_page
);
3757 * Routine: vm_object_coalesce
3758 * Function: Coalesces two objects backing up adjoining
3759 * regions of memory into a single object.
3761 * returns TRUE if objects were combined.
3763 * NOTE: Only works at the moment if the second object is NULL -
3764 * if it's not, which object do we lock first?
3767 * prev_object First object to coalesce
3768 * prev_offset Offset into prev_object
3769 * next_object Second object into coalesce
3770 * next_offset Offset into next_object
3772 * prev_size Size of reference to prev_object
3773 * next_size Size of reference to next_object
3776 * The object(s) must *not* be locked. The map must be locked
3777 * to preserve the reference to the object(s).
3779 static int vm_object_coalesce_count
= 0;
3781 __private_extern__ boolean_t
3783 register vm_object_t prev_object
,
3784 vm_object_t next_object
,
3785 vm_object_offset_t prev_offset
,
3786 __unused vm_object_offset_t next_offset
,
3787 vm_object_size_t prev_size
,
3788 vm_object_size_t next_size
)
3790 vm_object_size_t newsize
;
3796 if (next_object
!= VM_OBJECT_NULL
) {
3800 if (prev_object
== VM_OBJECT_NULL
) {
3805 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
3806 (integer_t
)prev_object
, prev_offset
, prev_size
, next_size
, 0);
3808 vm_object_lock(prev_object
);
3811 * Try to collapse the object first
3813 vm_object_collapse(prev_object
, prev_offset
);
3816 * Can't coalesce if pages not mapped to
3817 * prev_entry may be in use any way:
3818 * . more than one reference
3820 * . shadows another object
3821 * . has a copy elsewhere
3823 * . paging references (pages might be in page-list)
3826 if ((prev_object
->ref_count
> 1) ||
3827 prev_object
->pager_created
||
3828 (prev_object
->shadow
!= VM_OBJECT_NULL
) ||
3829 (prev_object
->copy
!= VM_OBJECT_NULL
) ||
3830 (prev_object
->true_share
!= FALSE
) ||
3831 (prev_object
->purgable
!= VM_OBJECT_NONPURGABLE
) ||
3832 (prev_object
->paging_in_progress
!= 0)) {
3833 vm_object_unlock(prev_object
);
3837 vm_object_coalesce_count
++;
3840 * Remove any pages that may still be in the object from
3841 * a previous deallocation.
3843 vm_object_page_remove(prev_object
,
3844 prev_offset
+ prev_size
,
3845 prev_offset
+ prev_size
+ next_size
);
3848 * Extend the object if necessary.
3850 newsize
= prev_offset
+ prev_size
+ next_size
;
3851 if (newsize
> prev_object
->size
) {
3854 * We cannot extend an object that has existence info,
3855 * since the existence info might then fail to cover
3856 * the entire object.
3858 * This assertion must be true because the object
3859 * has no pager, and we only create existence info
3860 * for objects with pagers.
3862 assert(prev_object
->existence_map
== VM_EXTERNAL_NULL
);
3863 #endif /* MACH_PAGEMAP */
3864 prev_object
->size
= newsize
;
3867 vm_object_unlock(prev_object
);
3872 * Attach a set of physical pages to an object, so that they can
3873 * be mapped by mapping the object. Typically used to map IO memory.
3875 * The mapping function and its private data are used to obtain the
3876 * physical addresses for each page to be mapped.
3881 vm_object_offset_t offset
,
3882 vm_object_size_t size
,
3883 vm_object_offset_t (*map_fn
)(void *map_fn_data
,
3884 vm_object_offset_t offset
),
3885 void *map_fn_data
) /* private to map_fn */
3891 vm_object_offset_t addr
;
3893 num_pages
= atop_64(size
);
3895 for (i
= 0; i
< num_pages
; i
++, offset
+= PAGE_SIZE_64
) {
3897 addr
= (*map_fn
)(map_fn_data
, offset
);
3899 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
)
3900 vm_page_more_fictitious();
3902 vm_object_lock(object
);
3903 if ((old_page
= vm_page_lookup(object
, offset
))
3906 vm_page_lock_queues();
3907 vm_page_free(old_page
);
3908 vm_page_unlock_queues();
3911 vm_page_init(m
, addr
);
3912 /* private normally requires lock_queues but since we */
3913 /* are initializing the page, its not necessary here */
3914 m
->private = TRUE
; /* don`t free page */
3916 vm_page_insert(m
, object
, offset
);
3918 PAGE_WAKEUP_DONE(m
);
3919 vm_object_unlock(object
);
3923 #include <mach_kdb.h>
3926 #include <ddb/db_output.h>
3927 #include <vm/vm_print.h>
3929 #define printf kdbprintf
3931 extern boolean_t
vm_object_cached(
3932 vm_object_t object
);
3934 extern void print_bitstring(
3937 boolean_t vm_object_print_pages
= FALSE
;
3943 printf("%c%c%c%c%c%c%c%c",
3944 ((byte
& (1 << 0)) ? '1' : '0'),
3945 ((byte
& (1 << 1)) ? '1' : '0'),
3946 ((byte
& (1 << 2)) ? '1' : '0'),
3947 ((byte
& (1 << 3)) ? '1' : '0'),
3948 ((byte
& (1 << 4)) ? '1' : '0'),
3949 ((byte
& (1 << 5)) ? '1' : '0'),
3950 ((byte
& (1 << 6)) ? '1' : '0'),
3951 ((byte
& (1 << 7)) ? '1' : '0'));
3956 register vm_object_t object
)
3958 register vm_object_t o
;
3960 queue_iterate(&vm_object_cached_list
, o
, vm_object_t
, cached_list
) {
3970 * vm_external_print: [ debug ]
3974 vm_external_map_t emap
,
3977 if (emap
== VM_EXTERNAL_NULL
) {
3980 vm_size_t existence_size
= stob(size
);
3981 printf("{ size=%d, map=[", existence_size
);
3982 if (existence_size
> 0) {
3983 print_bitstring(emap
[0]);
3985 if (existence_size
> 1) {
3986 print_bitstring(emap
[1]);
3988 if (existence_size
> 2) {
3990 print_bitstring(emap
[existence_size
-1]);
3996 #endif /* MACH_PAGEMAP */
4003 int orig_db_indent
= db_indent
;
4006 if (object
== VM_OBJECT_NULL
) {
4007 db_indent
= orig_db_indent
;
4013 iprintf("object 0x%x", object
);
4014 printf(", shadow=0x%x", object
->shadow
);
4015 printf(", copy=0x%x", object
->copy
);
4016 printf(", pager=0x%x", object
->pager
);
4017 printf(", ref=%d\n", object
->ref_count
);
4020 object
= object
->shadow
;
4026 * vm_object_print: [ debug ]
4031 __unused boolean_t have_addr
,
4032 __unused
int arg_count
,
4033 __unused
char *modif
)
4036 register vm_page_t p
;
4041 object
= (vm_object_t
) (long) db_addr
;
4042 if (object
== VM_OBJECT_NULL
)
4045 iprintf("object 0x%x\n", object
);
4049 iprintf("size=0x%x", object
->size
);
4050 printf(", cluster=0x%x", object
->cluster_size
);
4051 printf(", memq_hint=%p", object
->memq_hint
);
4052 printf(", ref_count=%d\n", object
->ref_count
);
4055 printf("res_count=%d, ", object
->res_count
);
4056 #endif /* TASK_SWAPPER */
4057 printf("resident_page_count=%d\n", object
->resident_page_count
);
4059 iprintf("shadow=0x%x", object
->shadow
);
4060 if (object
->shadow
) {
4062 vm_object_t shadow
= object
;
4063 while((shadow
= shadow
->shadow
))
4065 printf(" (depth %d)", i
);
4067 printf(", copy=0x%x", object
->copy
);
4068 printf(", shadow_offset=0x%x", object
->shadow_offset
);
4069 printf(", last_alloc=0x%x\n", object
->last_alloc
);
4071 iprintf("pager=0x%x", object
->pager
);
4072 printf(", paging_offset=0x%x", object
->paging_offset
);
4073 printf(", pager_control=0x%x\n", object
->pager_control
);
4075 iprintf("copy_strategy=%d[", object
->copy_strategy
);
4076 switch (object
->copy_strategy
) {
4077 case MEMORY_OBJECT_COPY_NONE
:
4078 printf("copy_none");
4081 case MEMORY_OBJECT_COPY_CALL
:
4082 printf("copy_call");
4085 case MEMORY_OBJECT_COPY_DELAY
:
4086 printf("copy_delay");
4089 case MEMORY_OBJECT_COPY_SYMMETRIC
:
4090 printf("copy_symmetric");
4093 case MEMORY_OBJECT_COPY_INVALID
:
4094 printf("copy_invalid");
4101 printf(", absent_count=%d\n", object
->absent_count
);
4103 iprintf("all_wanted=0x%x<", object
->all_wanted
);
4105 if (vm_object_wanted(object
, VM_OBJECT_EVENT_INITIALIZED
)) {
4106 printf("%sinit", s
);
4109 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGER_READY
)) {
4110 printf("%sready", s
);
4113 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGING_IN_PROGRESS
)) {
4114 printf("%spaging", s
);
4117 if (vm_object_wanted(object
, VM_OBJECT_EVENT_ABSENT_COUNT
)) {
4118 printf("%sabsent", s
);
4121 if (vm_object_wanted(object
, VM_OBJECT_EVENT_LOCK_IN_PROGRESS
)) {
4122 printf("%slock", s
);
4125 if (vm_object_wanted(object
, VM_OBJECT_EVENT_UNCACHING
)) {
4126 printf("%suncaching", s
);
4129 if (vm_object_wanted(object
, VM_OBJECT_EVENT_COPY_CALL
)) {
4130 printf("%scopy_call", s
);
4133 if (vm_object_wanted(object
, VM_OBJECT_EVENT_CACHING
)) {
4134 printf("%scaching", s
);
4138 printf(", paging_in_progress=%d\n", object
->paging_in_progress
);
4140 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4141 (object
->pager_created
? "" : "!"),
4142 (object
->pager_initialized
? "" : "!"),
4143 (object
->pager_ready
? "" : "!"),
4144 (object
->can_persist
? "" : "!"),
4145 (object
->pager_trusted
? "" : "!"),
4146 (object
->pageout
? "" : "!"),
4147 (object
->internal
? "internal" : "external"),
4148 (object
->temporary
? "temporary" : "permanent"));
4149 iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n",
4150 (object
->alive
? "" : "!"),
4151 ((object
->purgable
!= VM_OBJECT_NONPURGABLE
) ? "" : "!"),
4152 ((object
->purgable
== VM_OBJECT_PURGABLE_VOLATILE
) ? "" : "!"),
4153 ((object
->purgable
== VM_OBJECT_PURGABLE_EMPTY
) ? "" : "!"),
4154 (object
->shadowed
? "" : "!"),
4155 (vm_object_cached(object
) ? "" : "!"),
4156 (object
->private ? "" : "!"));
4157 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4158 (object
->advisory_pageout
? "" : "!"),
4159 (object
->silent_overwrite
? "" : "!"));
4162 iprintf("existence_map=");
4163 vm_external_print(object
->existence_map
, object
->size
);
4164 #endif /* MACH_PAGEMAP */
4166 iprintf("paging_object=0x%x\n", object
->paging_object
);
4167 #endif /* MACH_ASSERT */
4169 if (vm_object_print_pages
) {
4171 p
= (vm_page_t
) queue_first(&object
->memq
);
4172 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
4174 iprintf("memory:=");
4175 } else if (count
== 2) {
4184 printf("(off=0x%llX,page=%p)", p
->offset
, p
);
4185 p
= (vm_page_t
) queue_next(&p
->listq
);
4196 * vm_object_find [ debug ]
4198 * Find all tasks which reference the given vm_object.
4201 boolean_t
vm_object_find(vm_object_t object
);
4202 boolean_t vm_object_print_verbose
= FALSE
;
4210 vm_map_entry_t entry
;
4211 processor_set_t pset
= &default_pset
;
4212 boolean_t found
= FALSE
;
4214 queue_iterate(&pset
->tasks
, task
, task_t
, pset_tasks
) {
4216 for (entry
= vm_map_first_entry(map
);
4217 entry
&& entry
!= vm_map_to_entry(map
);
4218 entry
= entry
->vme_next
) {
4223 * For the time being skip submaps,
4224 * only the kernel can have submaps,
4225 * and unless we are interested in
4226 * kernel objects, we can simply skip
4227 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4228 * for a full solution.
4230 if (entry
->is_sub_map
)
4233 obj
= entry
->object
.vm_object
;
4237 while (obj
!= VM_OBJECT_NULL
) {
4238 if (obj
== object
) {
4240 printf("TASK\t\tMAP\t\tENTRY\n");
4243 printf("0x%x\t0x%x\t0x%x\n",
4254 #endif /* MACH_KDB */
4257 vm_object_populate_with_private(
4259 vm_object_offset_t offset
,
4264 vm_object_offset_t base_offset
;
4267 if(!object
->private)
4268 return KERN_FAILURE
;
4270 base_page
= phys_page
;
4272 vm_object_lock(object
);
4273 if(!object
->phys_contiguous
) {
4275 if((base_offset
= trunc_page_64(offset
)) != offset
) {
4276 vm_object_unlock(object
);
4277 return KERN_FAILURE
;
4279 base_offset
+= object
->paging_offset
;
4281 m
= vm_page_lookup(object
, base_offset
);
4282 if(m
!= VM_PAGE_NULL
) {
4284 vm_page_lock_queues();
4285 m
->fictitious
= FALSE
;
4287 m
->phys_page
= base_page
;
4293 object
->absent_count
++;
4295 m
->list_req_pending
= TRUE
;
4296 vm_page_unlock_queues();
4297 } else if (m
->phys_page
!= base_page
) {
4298 /* pmap call to clear old mapping */
4299 pmap_disconnect(m
->phys_page
);
4300 m
->phys_page
= base_page
;
4305 * We're not pointing to the same
4306 * physical page any longer and the
4307 * contents of the new one are not
4308 * supposed to be encrypted.
4309 * XXX What happens to the original
4310 * physical page. Is it lost ?
4312 m
->encrypted
= FALSE
;
4315 while ((m
= vm_page_grab_fictitious())
4317 vm_page_more_fictitious();
4318 vm_page_lock_queues();
4319 m
->fictitious
= FALSE
;
4321 m
->phys_page
= base_page
;
4322 m
->list_req_pending
= TRUE
;
4325 object
->absent_count
++;
4326 vm_page_unlock_queues();
4327 vm_page_insert(m
, object
, base_offset
);
4329 base_page
++; /* Go to the next physical page */
4330 base_offset
+= PAGE_SIZE
;
4334 /* NOTE: we should check the original settings here */
4335 /* if we have a size > zero a pmap call should be made */
4336 /* to disable the range */
4340 /* shadows on contiguous memory are not allowed */
4341 /* we therefore can use the offset field */
4342 object
->shadow_offset
= (vm_object_offset_t
)(phys_page
<< 12);
4343 object
->size
= size
;
4345 vm_object_unlock(object
);
4346 return KERN_SUCCESS
;
4350 * memory_object_free_from_cache:
4352 * Walk the vm_object cache list, removing and freeing vm_objects
4353 * which are backed by the pager identified by the caller, (pager_id).
4354 * Remove up to "count" objects, if there are that may available
4357 * Walk the list at most once, return the number of vm_objects
4361 __private_extern__ kern_return_t
4362 memory_object_free_from_cache(
4363 __unused host_t host
,
4368 int object_released
= 0;
4370 register vm_object_t object
= VM_OBJECT_NULL
;
4374 if(host == HOST_NULL)
4375 return(KERN_INVALID_ARGUMENT);
4379 vm_object_cache_lock();
4381 queue_iterate(&vm_object_cached_list
, object
,
4382 vm_object_t
, cached_list
) {
4383 if (object
->pager
&& (pager_id
== object
->pager
->pager
)) {
4384 vm_object_lock(object
);
4385 queue_remove(&vm_object_cached_list
, object
,
4386 vm_object_t
, cached_list
);
4387 vm_object_cached_count
--;
4390 * Since this object is in the cache, we know
4391 * that it is initialized and has only a pager's
4392 * (implicit) reference. Take a reference to avoid
4393 * recursive deallocations.
4396 assert(object
->pager_initialized
);
4397 assert(object
->ref_count
== 0);
4398 object
->ref_count
++;
4401 * Terminate the object.
4402 * If the object had a shadow, we let
4403 * vm_object_deallocate deallocate it.
4404 * "pageout" objects have a shadow, but
4405 * maintain a "paging reference" rather
4406 * than a normal reference.
4407 * (We are careful here to limit recursion.)
4409 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
4410 if ((vm_object_terminate(object
) == KERN_SUCCESS
)
4411 && (shadow
!= VM_OBJECT_NULL
)) {
4412 vm_object_deallocate(shadow
);
4415 if(object_released
++ == *count
)
4416 return KERN_SUCCESS
;
4420 vm_object_cache_unlock();
4421 *count
= object_released
;
4422 return KERN_SUCCESS
;
4428 memory_object_create_named(
4429 memory_object_t pager
,
4430 memory_object_offset_t size
,
4431 memory_object_control_t
*control
)
4434 vm_object_hash_entry_t entry
;
4436 *control
= MEMORY_OBJECT_CONTROL_NULL
;
4437 if (pager
== MEMORY_OBJECT_NULL
)
4438 return KERN_INVALID_ARGUMENT
;
4440 vm_object_cache_lock();
4441 entry
= vm_object_hash_lookup(pager
, FALSE
);
4442 if ((entry
!= VM_OBJECT_HASH_ENTRY_NULL
) &&
4443 (entry
->object
!= VM_OBJECT_NULL
)) {
4444 if (entry
->object
->named
== TRUE
)
4445 panic("memory_object_create_named: caller already holds the right"); }
4447 vm_object_cache_unlock();
4448 if ((object
= vm_object_enter(pager
, size
, FALSE
, FALSE
, TRUE
))
4449 == VM_OBJECT_NULL
) {
4450 return(KERN_INVALID_OBJECT
);
4453 /* wait for object (if any) to be ready */
4454 if (object
!= VM_OBJECT_NULL
) {
4455 vm_object_lock(object
);
4456 object
->named
= TRUE
;
4457 while (!object
->pager_ready
) {
4458 vm_object_sleep(object
,
4459 VM_OBJECT_EVENT_PAGER_READY
,
4462 *control
= object
->pager_control
;
4463 vm_object_unlock(object
);
4465 return (KERN_SUCCESS
);
4470 * Routine: memory_object_recover_named [user interface]
4472 * Attempt to recover a named reference for a VM object.
4473 * VM will verify that the object has not already started
4474 * down the termination path, and if it has, will optionally
4475 * wait for that to finish.
4477 * KERN_SUCCESS - we recovered a named reference on the object
4478 * KERN_FAILURE - we could not recover a reference (object dead)
4479 * KERN_INVALID_ARGUMENT - bad memory object control
4482 memory_object_recover_named(
4483 memory_object_control_t control
,
4484 boolean_t wait_on_terminating
)
4488 vm_object_cache_lock();
4489 object
= memory_object_control_to_vm_object(control
);
4490 if (object
== VM_OBJECT_NULL
) {
4491 vm_object_cache_unlock();
4492 return (KERN_INVALID_ARGUMENT
);
4496 vm_object_lock(object
);
4498 if (object
->terminating
&& wait_on_terminating
) {
4499 vm_object_cache_unlock();
4500 vm_object_wait(object
,
4501 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
4503 vm_object_cache_lock();
4507 if (!object
->alive
) {
4508 vm_object_cache_unlock();
4509 vm_object_unlock(object
);
4510 return KERN_FAILURE
;
4513 if (object
->named
== TRUE
) {
4514 vm_object_cache_unlock();
4515 vm_object_unlock(object
);
4516 return KERN_SUCCESS
;
4519 if((object
->ref_count
== 0) && (!object
->terminating
)){
4520 queue_remove(&vm_object_cached_list
, object
,
4521 vm_object_t
, cached_list
);
4522 vm_object_cached_count
--;
4523 XPR(XPR_VM_OBJECT_CACHE
,
4524 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4526 (integer_t
)vm_object_cached_list
.next
,
4527 (integer_t
)vm_object_cached_list
.prev
, 0,0);
4530 vm_object_cache_unlock();
4532 object
->named
= TRUE
;
4533 object
->ref_count
++;
4534 vm_object_res_reference(object
);
4535 while (!object
->pager_ready
) {
4536 vm_object_sleep(object
,
4537 VM_OBJECT_EVENT_PAGER_READY
,
4540 vm_object_unlock(object
);
4541 return (KERN_SUCCESS
);
4546 * vm_object_release_name:
4548 * Enforces name semantic on memory_object reference count decrement
4549 * This routine should not be called unless the caller holds a name
4550 * reference gained through the memory_object_create_named.
4552 * If the TERMINATE_IDLE flag is set, the call will return if the
4553 * reference count is not 1. i.e. idle with the only remaining reference
4555 * If the decision is made to proceed the name field flag is set to
4556 * false and the reference count is decremented. If the RESPECT_CACHE
4557 * flag is set and the reference count has gone to zero, the
4558 * memory_object is checked to see if it is cacheable otherwise when
4559 * the reference count is zero, it is simply terminated.
4562 __private_extern__ kern_return_t
4563 vm_object_release_name(
4568 boolean_t original_object
= TRUE
;
4570 while (object
!= VM_OBJECT_NULL
) {
4573 * The cache holds a reference (uncounted) to
4574 * the object. We must locke it before removing
4579 vm_object_cache_lock();
4580 vm_object_lock(object
);
4581 assert(object
->alive
);
4583 assert(object
->named
);
4584 assert(object
->ref_count
> 0);
4587 * We have to wait for initialization before
4588 * destroying or caching the object.
4591 if (object
->pager_created
&& !object
->pager_initialized
) {
4592 assert(!object
->can_persist
);
4593 vm_object_assert_wait(object
,
4594 VM_OBJECT_EVENT_INITIALIZED
,
4596 vm_object_unlock(object
);
4597 vm_object_cache_unlock();
4598 thread_block(THREAD_CONTINUE_NULL
);
4602 if (((object
->ref_count
> 1)
4603 && (flags
& MEMORY_OBJECT_TERMINATE_IDLE
))
4604 || (object
->terminating
)) {
4605 vm_object_unlock(object
);
4606 vm_object_cache_unlock();
4607 return KERN_FAILURE
;
4609 if (flags
& MEMORY_OBJECT_RELEASE_NO_OP
) {
4610 vm_object_unlock(object
);
4611 vm_object_cache_unlock();
4612 return KERN_SUCCESS
;
4616 if ((flags
& MEMORY_OBJECT_RESPECT_CACHE
) &&
4617 (object
->ref_count
== 1)) {
4619 object
->named
= FALSE
;
4620 vm_object_unlock(object
);
4621 vm_object_cache_unlock();
4622 /* let vm_object_deallocate push this thing into */
4623 /* the cache, if that it is where it is bound */
4624 vm_object_deallocate(object
);
4625 return KERN_SUCCESS
;
4627 VM_OBJ_RES_DECR(object
);
4628 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
4629 if(object
->ref_count
== 1) {
4630 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
4631 if(original_object
) {
4632 return KERN_FAILURE
;
4634 return KERN_SUCCESS
;
4637 if (shadow
!= VM_OBJECT_NULL
) {
4638 original_object
= FALSE
;
4642 return KERN_SUCCESS
;
4644 object
->ref_count
--;
4645 assert(object
->ref_count
> 0);
4647 object
->named
= FALSE
;
4648 vm_object_unlock(object
);
4649 vm_object_cache_unlock();
4650 return KERN_SUCCESS
;
4655 return KERN_FAILURE
;
4659 __private_extern__ kern_return_t
4660 vm_object_lock_request(
4662 vm_object_offset_t offset
,
4663 vm_object_size_t size
,
4664 memory_object_return_t should_return
,
4668 __unused boolean_t should_flush
;
4670 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
4672 XPR(XPR_MEMORY_OBJECT
,
4673 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
4674 (integer_t
)object
, offset
, size
,
4675 (((should_return
&1)<<1)|should_flush
), prot
);
4678 * Check for bogus arguments.
4680 if (object
== VM_OBJECT_NULL
)
4681 return (KERN_INVALID_ARGUMENT
);
4683 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
4684 return (KERN_INVALID_ARGUMENT
);
4686 size
= round_page_64(size
);
4689 * Lock the object, and acquire a paging reference to
4690 * prevent the memory_object reference from being released.
4692 vm_object_lock(object
);
4693 vm_object_paging_begin(object
);
4695 (void)vm_object_update(object
,
4696 offset
, size
, NULL
, NULL
, should_return
, flags
, prot
);
4698 vm_object_paging_end(object
);
4699 vm_object_unlock(object
);
4701 return (KERN_SUCCESS
);
4705 * Empty a purgable object by grabbing the physical pages assigned to it and
4706 * putting them on the free queue without writing them to backing store, etc.
4707 * When the pages are next touched they will be demand zero-fill pages. We
4708 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
4709 * skip referenced/dirty pages, pages on the active queue, etc. We're more
4710 * than happy to grab these since this is a purgable object. We mark the
4711 * object as "empty" after reaping its pages.
4713 * On entry the object and page queues are locked, the object must be a
4714 * purgable object with no delayed copies pending.
4717 vm_object_purge(vm_object_t object
)
4720 unsigned int num_purged_pages
;
4721 vm_page_t local_freeq
;
4722 unsigned long local_freed
;
4723 int purge_loop_quota
;
4724 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
4725 #define PURGE_BATCH_FREE_LIMIT 50
4726 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
4727 #define PURGE_LOOP_QUOTA 100
4729 num_purged_pages
= 0;
4730 if (object
->purgable
== VM_OBJECT_NONPURGABLE
)
4731 return num_purged_pages
;
4733 object
->purgable
= VM_OBJECT_PURGABLE_EMPTY
;
4735 assert(object
->copy
== VM_OBJECT_NULL
);
4736 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
4737 purge_loop_quota
= PURGE_LOOP_QUOTA
;
4739 local_freeq
= VM_PAGE_NULL
;
4743 * Go through the object's resident pages and try and discard them.
4745 next
= (vm_page_t
)queue_first(&object
->memq
);
4746 while (!queue_end(&object
->memq
, (queue_entry_t
)next
)) {
4748 next
= (vm_page_t
)queue_next(&next
->listq
);
4750 if (purge_loop_quota
-- == 0) {
4752 * Avoid holding the page queues lock for too long.
4753 * Let someone else take it for a while if needed.
4754 * Keep holding the object's lock to guarantee that
4755 * the object's page list doesn't change under us
4758 if (local_freeq
!= VM_PAGE_NULL
) {
4760 * Flush our queue of pages to free.
4762 vm_page_free_list(local_freeq
);
4763 local_freeq
= VM_PAGE_NULL
;
4766 vm_page_unlock_queues();
4768 vm_page_lock_queues();
4770 /* resume with the current page and a new quota */
4771 purge_loop_quota
= PURGE_LOOP_QUOTA
;
4775 if (p
->busy
|| p
->cleaning
|| p
->laundry
||
4776 p
->list_req_pending
) {
4777 /* page is being acted upon, so don't mess with it */
4780 if (p
->wire_count
) {
4781 /* don't discard a wired page */
4786 /* clean up the object/offset table */
4790 /* update the object's count of absent pages */
4791 vm_object_absent_release(object
);
4794 /* we can discard this page */
4796 /* advertize that this page is in a transition state */
4799 if (p
->no_isync
== TRUE
) {
4800 /* the page hasn't been mapped yet */
4801 /* (optimization to delay the i-cache sync) */
4803 /* unmap the page */
4806 refmod_state
= pmap_disconnect(p
->phys_page
);
4807 if (refmod_state
& VM_MEM_MODIFIED
) {
4812 if (p
->dirty
|| p
->precious
) {
4813 /* we saved the cost of cleaning this page ! */
4815 vm_page_purged_count
++;
4818 /* remove page from active or inactive queue... */
4819 VM_PAGE_QUEUES_REMOVE(p
);
4821 /* ... and put it on our queue of pages to free */
4822 assert(!p
->laundry
);
4823 assert(p
->object
!= kernel_object
);
4824 assert(p
->pageq
.next
== NULL
&&
4825 p
->pageq
.prev
== NULL
);
4826 p
->pageq
.next
= (queue_entry_t
) local_freeq
;
4828 if (++local_freed
>= PURGE_BATCH_FREE_LIMIT
) {
4829 /* flush our queue of pages to free */
4830 vm_page_free_list(local_freeq
);
4831 local_freeq
= VM_PAGE_NULL
;
4836 /* flush our local queue of pages to free one last time */
4837 if (local_freeq
!= VM_PAGE_NULL
) {
4838 vm_page_free_list(local_freeq
);
4839 local_freeq
= VM_PAGE_NULL
;
4843 return num_purged_pages
;
4847 * vm_object_purgable_control() allows the caller to control and investigate the
4848 * state of a purgable object. A purgable object is created via a call to
4849 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will
4850 * never be coalesced with any other object -- even other purgable objects --
4851 * and will thus always remain a distinct object. A purgable object has
4852 * special semantics when its reference count is exactly 1. If its reference
4853 * count is greater than 1, then a purgable object will behave like a normal
4854 * object and attempts to use this interface will result in an error return
4855 * of KERN_INVALID_ARGUMENT.
4857 * A purgable object may be put into a "volatile" state which will make the
4858 * object's pages elligable for being reclaimed without paging to backing
4859 * store if the system runs low on memory. If the pages in a volatile
4860 * purgable object are reclaimed, the purgable object is said to have been
4861 * "emptied." When a purgable object is emptied the system will reclaim as
4862 * many pages from the object as it can in a convenient manner (pages already
4863 * en route to backing store or busy for other reasons are left as is). When
4864 * a purgable object is made volatile, its pages will generally be reclaimed
4865 * before other pages in the application's working set. This semantic is
4866 * generally used by applications which can recreate the data in the object
4867 * faster than it can be paged in. One such example might be media assets
4868 * which can be reread from a much faster RAID volume.
4870 * A purgable object may be designated as "non-volatile" which means it will
4871 * behave like all other objects in the system with pages being written to and
4872 * read from backing store as needed to satisfy system memory needs. If the
4873 * object was emptied before the object was made non-volatile, that fact will
4874 * be returned as the old state of the purgable object (see
4875 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
4876 * were reclaimed as part of emptying the object will be refaulted in as
4877 * zero-fill on demand. It is up to the application to note that an object
4878 * was emptied and recreate the objects contents if necessary. When a
4879 * purgable object is made non-volatile, its pages will generally not be paged
4880 * out to backing store in the immediate future. A purgable object may also
4881 * be manually emptied.
4883 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
4884 * volatile purgable object may be queried at any time. This information may
4885 * be used as a control input to let the application know when the system is
4886 * experiencing memory pressure and is reclaiming memory.
4888 * The specified address may be any address within the purgable object. If
4889 * the specified address does not represent any object in the target task's
4890 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
4891 * object containing the specified address is not a purgable object, then
4892 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
4895 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
4896 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
4897 * state is used to set the new state of the purgable object and return its
4898 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable
4899 * object is returned in the parameter state.
4901 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
4902 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
4903 * the non-volatile, volatile and volatile/empty states described above.
4904 * Setting the state of a purgable object to VM_PURGABLE_EMPTY will
4905 * immediately reclaim as many pages in the object as can be conveniently
4906 * collected (some may have already been written to backing store or be
4909 * The process of making a purgable object non-volatile and determining its
4910 * previous state is atomic. Thus, if a purgable object is made
4911 * VM_PURGABLE_NONVOLATILE and the old state is returned as
4912 * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are
4913 * completely intact and will remain so until the object is made volatile
4914 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
4915 * was reclaimed while it was in a volatile state and its previous contents
4919 * The object must be locked.
4922 vm_object_purgable_control(
4924 vm_purgable_t control
,
4930 if (object
== VM_OBJECT_NULL
) {
4932 * Object must already be present or it can't be purgable.
4934 return KERN_INVALID_ARGUMENT
;
4938 * Get current state of the purgable object.
4940 switch (object
->purgable
) {
4941 case VM_OBJECT_NONPURGABLE
:
4942 return KERN_INVALID_ARGUMENT
;
4944 case VM_OBJECT_PURGABLE_NONVOLATILE
:
4945 old_state
= VM_PURGABLE_NONVOLATILE
;
4948 case VM_OBJECT_PURGABLE_VOLATILE
:
4949 old_state
= VM_PURGABLE_VOLATILE
;
4952 case VM_OBJECT_PURGABLE_EMPTY
:
4953 old_state
= VM_PURGABLE_EMPTY
;
4957 old_state
= VM_PURGABLE_NONVOLATILE
;
4958 panic("Bad state (%d) for purgable object!\n",
4963 /* purgable cant have delayed copies - now or in the future */
4964 assert(object
->copy
== VM_OBJECT_NULL
);
4965 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
4968 * Execute the desired operation.
4970 if (control
== VM_PURGABLE_GET_STATE
) {
4972 return KERN_SUCCESS
;
4976 case VM_PURGABLE_NONVOLATILE
:
4977 vm_page_lock_queues();
4978 if (object
->purgable
!= VM_OBJECT_PURGABLE_NONVOLATILE
) {
4979 assert(vm_page_purgeable_count
>=
4980 object
->resident_page_count
);
4981 vm_page_purgeable_count
-= object
->resident_page_count
;
4984 object
->purgable
= VM_OBJECT_PURGABLE_NONVOLATILE
;
4987 * If the object wasn't emptied, then mark all pages of the
4988 * object as referenced in order to give them a complete turn
4989 * of the virtual memory "clock" before becoming candidates
4990 * for paging out (if the system is suffering from memory
4991 * pressure). We don't really need to set the pmap reference
4992 * bits (which would be expensive) since the software copies
4993 * are believed if they're set to true ...
4995 if (old_state
!= VM_PURGABLE_EMPTY
) {
4996 for (p
= (vm_page_t
)queue_first(&object
->memq
);
4997 !queue_end(&object
->memq
, (queue_entry_t
)p
);
4998 p
= (vm_page_t
)queue_next(&p
->listq
))
4999 p
->reference
= TRUE
;
5002 vm_page_unlock_queues();
5006 case VM_PURGABLE_VOLATILE
:
5007 vm_page_lock_queues();
5009 if (object
->purgable
!= VM_OBJECT_PURGABLE_VOLATILE
&&
5010 object
->purgable
!= VM_OBJECT_PURGABLE_EMPTY
) {
5011 vm_page_purgeable_count
+= object
->resident_page_count
;
5014 object
->purgable
= VM_OBJECT_PURGABLE_VOLATILE
;
5017 * We want the newly volatile purgable object to be a
5018 * candidate for the pageout scan before other pages in the
5019 * application if the system is suffering from memory
5020 * pressure. To do this, we move a page of the object from
5021 * the active queue onto the inactive queue in order to
5022 * promote the object for early reclaim. We only need to move
5023 * a single page since the pageout scan will reap the entire
5024 * purgable object if it finds a single page in a volatile
5025 * state. Obviously we don't do this if there are no pages
5026 * associated with the object or we find a page of the object
5027 * already on the inactive queue.
5029 for (p
= (vm_page_t
)queue_first(&object
->memq
);
5030 !queue_end(&object
->memq
, (queue_entry_t
)p
);
5031 p
= (vm_page_t
)queue_next(&p
->listq
)) {
5033 /* already a page on the inactive queue */
5036 if (p
->active
&& !p
->busy
) {
5037 /* found one we can move */
5038 vm_page_deactivate(p
);
5042 vm_page_unlock_queues();
5047 case VM_PURGABLE_EMPTY
:
5048 vm_page_lock_queues();
5049 if (object
->purgable
!= VM_OBJECT_PURGABLE_VOLATILE
&&
5050 object
->purgable
!= VM_OBJECT_PURGABLE_EMPTY
) {
5051 vm_page_purgeable_count
+= object
->resident_page_count
;
5053 (void) vm_object_purge(object
);
5054 vm_page_unlock_queues();
5060 return KERN_SUCCESS
;
5065 * vm_object_res_deallocate
5067 * (recursively) decrement residence counts on vm objects and their shadows.
5068 * Called from vm_object_deallocate and when swapping out an object.
5070 * The object is locked, and remains locked throughout the function,
5071 * even as we iterate down the shadow chain. Locks on intermediate objects
5072 * will be dropped, but not the original object.
5074 * NOTE: this function used to use recursion, rather than iteration.
5077 __private_extern__
void
5078 vm_object_res_deallocate(
5081 vm_object_t orig_object
= object
;
5083 * Object is locked so it can be called directly
5084 * from vm_object_deallocate. Original object is never
5087 assert(object
->res_count
> 0);
5088 while (--object
->res_count
== 0) {
5089 assert(object
->ref_count
>= object
->res_count
);
5090 vm_object_deactivate_all_pages(object
);
5091 /* iterate on shadow, if present */
5092 if (object
->shadow
!= VM_OBJECT_NULL
) {
5093 vm_object_t tmp_object
= object
->shadow
;
5094 vm_object_lock(tmp_object
);
5095 if (object
!= orig_object
)
5096 vm_object_unlock(object
);
5097 object
= tmp_object
;
5098 assert(object
->res_count
> 0);
5102 if (object
!= orig_object
)
5103 vm_object_unlock(object
);
5107 * vm_object_res_reference
5109 * Internal function to increment residence count on a vm object
5110 * and its shadows. It is called only from vm_object_reference, and
5111 * when swapping in a vm object, via vm_map_swap.
5113 * The object is locked, and remains locked throughout the function,
5114 * even as we iterate down the shadow chain. Locks on intermediate objects
5115 * will be dropped, but not the original object.
5117 * NOTE: this function used to use recursion, rather than iteration.
5120 __private_extern__
void
5121 vm_object_res_reference(
5124 vm_object_t orig_object
= object
;
5126 * Object is locked, so this can be called directly
5127 * from vm_object_reference. This lock is never released.
5129 while ((++object
->res_count
== 1) &&
5130 (object
->shadow
!= VM_OBJECT_NULL
)) {
5131 vm_object_t tmp_object
= object
->shadow
;
5133 assert(object
->ref_count
>= object
->res_count
);
5134 vm_object_lock(tmp_object
);
5135 if (object
!= orig_object
)
5136 vm_object_unlock(object
);
5137 object
= tmp_object
;
5139 if (object
!= orig_object
)
5140 vm_object_unlock(object
);
5141 assert(orig_object
->ref_count
>= orig_object
->res_count
);
5143 #endif /* TASK_SWAPPER */
5146 * vm_object_reference:
5148 * Gets another reference to the given object.
5150 #ifdef vm_object_reference
5151 #undef vm_object_reference
5153 __private_extern__
void
5154 vm_object_reference(
5155 register vm_object_t object
)
5157 if (object
== VM_OBJECT_NULL
)
5160 vm_object_lock(object
);
5161 assert(object
->ref_count
> 0);
5162 vm_object_reference_locked(object
);
5163 vm_object_unlock(object
);
5168 * Scale the vm_object_cache
5169 * This is required to make sure that the vm_object_cache is big
5170 * enough to effectively cache the mapped file.
5171 * This is really important with UBC as all the regular file vnodes
5172 * have memory object associated with them. Havving this cache too
5173 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5175 * This is also needed as number of vnodes can be dynamically scaled.
5178 adjust_vm_object_cache(
5179 __unused vm_size_t oval
,
5182 vm_object_cached_max
= nval
;
5183 vm_object_cache_trim(FALSE
);
5184 return (KERN_SUCCESS
);
5186 #endif /* MACH_BSD */
5190 * vm_object_transpose
5192 * This routine takes two VM objects of the same size and exchanges
5193 * their backing store.
5194 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5195 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5197 * The VM objects must not be locked by caller.
5200 vm_object_transpose(
5201 vm_object_t object1
,
5202 vm_object_t object2
,
5203 vm_object_size_t transpose_size
)
5205 vm_object_t tmp_object
;
5206 kern_return_t retval
;
5207 boolean_t object1_locked
, object2_locked
;
5208 boolean_t object1_paging
, object2_paging
;
5210 vm_object_offset_t page_offset
;
5212 tmp_object
= VM_OBJECT_NULL
;
5213 object1_locked
= FALSE
; object2_locked
= FALSE
;
5214 object1_paging
= FALSE
; object2_paging
= FALSE
;
5216 if (object1
== object2
||
5217 object1
== VM_OBJECT_NULL
||
5218 object2
== VM_OBJECT_NULL
) {
5220 * If the 2 VM objects are the same, there's
5221 * no point in exchanging their backing store.
5223 retval
= KERN_INVALID_VALUE
;
5227 vm_object_lock(object1
);
5228 object1_locked
= TRUE
;
5229 if (object1
->copy
|| object1
->shadow
|| object1
->shadowed
||
5230 object1
->purgable
!= VM_OBJECT_NONPURGABLE
) {
5232 * We don't deal with copy or shadow objects (yet).
5234 retval
= KERN_INVALID_VALUE
;
5238 * Since we're about to mess with the object's backing store,
5239 * mark it as "paging_in_progress". Note that this is not enough
5240 * to prevent any paging activity on this object, so the caller should
5241 * have "quiesced" the objects beforehand, via a UPL operation with
5242 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5243 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5245 vm_object_paging_begin(object1
);
5246 object1_paging
= TRUE
;
5247 vm_object_unlock(object1
);
5248 object1_locked
= FALSE
;
5251 * Same as above for the 2nd object...
5253 vm_object_lock(object2
);
5254 object2_locked
= TRUE
;
5255 if (object2
->copy
|| object2
->shadow
|| object2
->shadowed
||
5256 object2
->purgable
!= VM_OBJECT_NONPURGABLE
) {
5257 retval
= KERN_INVALID_VALUE
;
5260 vm_object_paging_begin(object2
);
5261 object2_paging
= TRUE
;
5262 vm_object_unlock(object2
);
5263 object2_locked
= FALSE
;
5266 * Allocate a temporary VM object to hold object1's contents
5267 * while we copy object2 to object1.
5269 tmp_object
= vm_object_allocate(transpose_size
);
5270 vm_object_lock(tmp_object
);
5271 vm_object_paging_begin(tmp_object
);
5272 tmp_object
->can_persist
= FALSE
;
5275 * Since we need to lock both objects at the same time,
5276 * make sure we always lock them in the same order to
5279 if (object1
< object2
) {
5280 vm_object_lock(object1
);
5281 vm_object_lock(object2
);
5283 vm_object_lock(object2
);
5284 vm_object_lock(object1
);
5286 object1_locked
= TRUE
;
5287 object2_locked
= TRUE
;
5289 if (object1
->size
!= object2
->size
||
5290 object1
->size
!= transpose_size
) {
5292 * If the 2 objects don't have the same size, we can't
5293 * exchange their backing stores or one would overflow.
5294 * If their size doesn't match the caller's
5295 * "transpose_size", we can't do it either because the
5296 * transpose operation will affect the entire span of
5299 retval
= KERN_INVALID_VALUE
;
5305 * Transpose the lists of resident pages.
5307 if (object1
->phys_contiguous
|| queue_empty(&object1
->memq
)) {
5309 * No pages in object1, just transfer pages
5310 * from object2 to object1. No need to go through
5311 * an intermediate object.
5313 while (!queue_empty(&object2
->memq
)) {
5314 page
= (vm_page_t
) queue_first(&object2
->memq
);
5315 vm_page_rename(page
, object1
, page
->offset
);
5317 assert(queue_empty(&object2
->memq
));
5318 } else if (object2
->phys_contiguous
|| queue_empty(&object2
->memq
)) {
5320 * No pages in object2, just transfer pages
5321 * from object1 to object2. No need to go through
5322 * an intermediate object.
5324 while (!queue_empty(&object1
->memq
)) {
5325 page
= (vm_page_t
) queue_first(&object1
->memq
);
5326 vm_page_rename(page
, object2
, page
->offset
);
5328 assert(queue_empty(&object1
->memq
));
5330 /* transfer object1's pages to tmp_object */
5331 vm_page_lock_queues();
5332 while (!queue_empty(&object1
->memq
)) {
5333 page
= (vm_page_t
) queue_first(&object1
->memq
);
5334 page_offset
= page
->offset
;
5335 vm_page_remove(page
);
5336 page
->offset
= page_offset
;
5337 queue_enter(&tmp_object
->memq
, page
, vm_page_t
, listq
);
5339 vm_page_unlock_queues();
5340 assert(queue_empty(&object1
->memq
));
5341 /* transfer object2's pages to object1 */
5342 while (!queue_empty(&object2
->memq
)) {
5343 page
= (vm_page_t
) queue_first(&object2
->memq
);
5344 vm_page_rename(page
, object1
, page
->offset
);
5346 assert(queue_empty(&object2
->memq
));
5347 /* transfer tmp_object's pages to object1 */
5348 while (!queue_empty(&tmp_object
->memq
)) {
5349 page
= (vm_page_t
) queue_first(&tmp_object
->memq
);
5350 queue_remove(&tmp_object
->memq
, page
,
5352 vm_page_insert(page
, object2
, page
->offset
);
5354 assert(queue_empty(&tmp_object
->memq
));
5357 /* no need to transpose the size: they should be identical */
5358 assert(object1
->size
== object2
->size
);
5360 #define __TRANSPOSE_FIELD(field) \
5362 tmp_object->field = object1->field; \
5363 object1->field = object2->field; \
5364 object2->field = tmp_object->field; \
5367 assert(!object1
->copy
);
5368 assert(!object2
->copy
);
5370 assert(!object1
->shadow
);
5371 assert(!object2
->shadow
);
5373 __TRANSPOSE_FIELD(shadow_offset
); /* used by phys_contiguous objects */
5374 __TRANSPOSE_FIELD(pager
);
5375 __TRANSPOSE_FIELD(paging_offset
);
5377 __TRANSPOSE_FIELD(pager_control
);
5378 /* update the memory_objects' pointers back to the VM objects */
5379 if (object1
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5380 memory_object_control_collapse(object1
->pager_control
,
5383 if (object2
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5384 memory_object_control_collapse(object2
->pager_control
,
5388 __TRANSPOSE_FIELD(absent_count
);
5390 assert(object1
->paging_in_progress
);
5391 assert(object2
->paging_in_progress
);
5393 __TRANSPOSE_FIELD(pager_created
);
5394 __TRANSPOSE_FIELD(pager_initialized
);
5395 __TRANSPOSE_FIELD(pager_ready
);
5396 __TRANSPOSE_FIELD(pager_trusted
);
5397 __TRANSPOSE_FIELD(internal
);
5398 __TRANSPOSE_FIELD(temporary
);
5399 __TRANSPOSE_FIELD(private);
5400 __TRANSPOSE_FIELD(pageout
);
5401 __TRANSPOSE_FIELD(true_share
);
5402 __TRANSPOSE_FIELD(phys_contiguous
);
5403 __TRANSPOSE_FIELD(nophyscache
);
5404 __TRANSPOSE_FIELD(last_alloc
);
5405 __TRANSPOSE_FIELD(sequential
);
5406 __TRANSPOSE_FIELD(cluster_size
);
5407 __TRANSPOSE_FIELD(existence_map
);
5408 __TRANSPOSE_FIELD(cow_hint
);
5409 __TRANSPOSE_FIELD(wimg_bits
);
5411 #undef __TRANSPOSE_FIELD
5413 retval
= KERN_SUCCESS
;
5419 if (tmp_object
!= VM_OBJECT_NULL
) {
5420 vm_object_paging_end(tmp_object
);
5421 vm_object_unlock(tmp_object
);
5423 * Re-initialize the temporary object to avoid
5424 * deallocating a real pager.
5426 _vm_object_allocate(transpose_size
, tmp_object
);
5427 vm_object_deallocate(tmp_object
);
5428 tmp_object
= VM_OBJECT_NULL
;
5431 if (object1_locked
) {
5432 vm_object_unlock(object1
);
5433 object1_locked
= FALSE
;
5435 if (object2_locked
) {
5436 vm_object_unlock(object2
);
5437 object2_locked
= FALSE
;
5439 if (object1_paging
) {
5440 vm_object_lock(object1
);
5441 vm_object_paging_end(object1
);
5442 vm_object_unlock(object1
);
5443 object1_paging
= FALSE
;
5445 if (object2_paging
) {
5446 vm_object_lock(object2
);
5447 vm_object_paging_end(object2
);
5448 vm_object_unlock(object2
);
5449 object2_paging
= FALSE
;