2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Virtual memory object module.
66 #include <mach_pagemap.h>
67 #include <task_swapper.h>
69 #include <mach/mach_types.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/vm_param.h>
75 #include <ipc/ipc_types.h>
76 #include <ipc/ipc_port.h>
78 #include <kern/kern_types.h>
79 #include <kern/assert.h>
80 #include <kern/lock.h>
81 #include <kern/queue.h>
83 #include <kern/zalloc.h>
84 #include <kern/host.h>
85 #include <kern/host_statistics.h>
86 #include <kern/processor.h>
87 #include <kern/misc_protos.h>
89 #include <vm/memory_object.h>
90 #include <vm/vm_fault.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_page.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_protos.h>
96 #include <vm/vm_purgeable_internal.h>
99 * Virtual memory objects maintain the actual data
100 * associated with allocated virtual memory. A given
101 * page of memory exists within exactly one object.
103 * An object is only deallocated when all "references"
106 * Associated with each object is a list of all resident
107 * memory pages belonging to that object; this list is
108 * maintained by the "vm_page" module, but locked by the object's
111 * Each object also records the memory object reference
112 * that is used by the kernel to request and write
113 * back data (the memory object, field "pager"), etc...
115 * Virtual memory objects are allocated to provide
116 * zero-filled memory (vm_allocate) or map a user-defined
117 * memory object into a virtual address space (vm_map).
119 * Virtual memory objects that refer to a user-defined
120 * memory object are called "permanent", because all changes
121 * made in virtual memory are reflected back to the
122 * memory manager, which may then store it permanently.
123 * Other virtual memory objects are called "temporary",
124 * meaning that changes need be written back only when
125 * necessary to reclaim pages, and that storage associated
126 * with the object can be discarded once it is no longer
129 * A permanent memory object may be mapped into more
130 * than one virtual address space. Moreover, two threads
131 * may attempt to make the first mapping of a memory
132 * object concurrently. Only one thread is allowed to
133 * complete this mapping; all others wait for the
134 * "pager_initialized" field is asserted, indicating
135 * that the first thread has initialized all of the
136 * necessary fields in the virtual memory object structure.
138 * The kernel relies on a *default memory manager* to
139 * provide backing storage for the zero-filled virtual
140 * memory objects. The pager memory objects associated
141 * with these temporary virtual memory objects are only
142 * requested from the default memory manager when it
143 * becomes necessary. Virtual memory objects
144 * that depend on the default memory manager are called
145 * "internal". The "pager_created" field is provided to
146 * indicate whether these ports have ever been allocated.
148 * The kernel may also create virtual memory objects to
149 * hold changed pages after a copy-on-write operation.
150 * In this case, the virtual memory object (and its
151 * backing storage -- its memory object) only contain
152 * those pages that have been changed. The "shadow"
153 * field refers to the virtual memory object that contains
154 * the remainder of the contents. The "shadow_offset"
155 * field indicates where in the "shadow" these contents begin.
156 * The "copy" field refers to a virtual memory object
157 * to which changed pages must be copied before changing
158 * this object, in order to implement another form
159 * of copy-on-write optimization.
161 * The virtual memory object structure also records
162 * the attributes associated with its memory object.
163 * The "pager_ready", "can_persist" and "copy_strategy"
164 * fields represent those attributes. The "cached_list"
165 * field is used in the implementation of the persistence
168 * ZZZ Continue this comment.
171 /* Forward declarations for internal functions. */
172 static kern_return_t
vm_object_terminate(
175 extern void vm_object_remove(
178 static vm_object_t
vm_object_cache_trim(
179 boolean_t called_from_vm_object_deallocate
);
181 static void vm_object_deactivate_all_pages(
184 static kern_return_t
vm_object_copy_call(
185 vm_object_t src_object
,
186 vm_object_offset_t src_offset
,
187 vm_object_size_t size
,
188 vm_object_t
*_result_object
);
190 static void vm_object_do_collapse(
192 vm_object_t backing_object
);
194 static void vm_object_do_bypass(
196 vm_object_t backing_object
);
198 static void vm_object_release_pager(
199 memory_object_t pager
);
201 static zone_t vm_object_zone
; /* vm backing store zone */
204 * All wired-down kernel memory belongs to a single virtual
205 * memory object (kernel_object) to avoid wasting data structures.
207 static struct vm_object kernel_object_store
;
208 vm_object_t kernel_object
;
212 * The submap object is used as a placeholder for vm_map_submap
213 * operations. The object is declared in vm_map.c because it
214 * is exported by the vm_map module. The storage is declared
215 * here because it must be initialized here.
217 static struct vm_object vm_submap_object_store
;
220 * Virtual memory objects are initialized from
221 * a template (see vm_object_allocate).
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
225 * (see _vm_object_allocate()).
227 static struct vm_object vm_object_template
;
230 * Virtual memory objects that are not referenced by
231 * any address maps, but that are allowed to persist
232 * (an attribute specified by the associated memory manager),
233 * are kept in a queue (vm_object_cached_list).
235 * When an object from this queue is referenced again,
236 * for example to make another address space mapping,
237 * it must be removed from the queue. That is, the
238 * queue contains *only* objects with zero references.
240 * The kernel may choose to terminate objects from this
241 * queue in order to reclaim storage. The current policy
242 * is to permit a fixed maximum number of unreferenced
243 * objects (vm_object_cached_max).
245 * A spin lock (accessed by routines
246 * vm_object_cache_{lock,lock_try,unlock}) governs the
247 * object cache. It must be held when objects are
248 * added to or removed from the cache (in vm_object_terminate).
249 * The routines that acquire a reference to a virtual
250 * memory object based on one of the memory object ports
251 * must also lock the cache.
253 * Ideally, the object cache should be more isolated
254 * from the reference mechanism, so that the lock need
255 * not be held to make simple references.
257 static queue_head_t vm_object_cached_list
;
258 static int vm_object_cached_count
=0;
259 static int vm_object_cached_high
; /* highest # cached objects */
260 static int vm_object_cached_max
= 512; /* may be patched*/
262 static decl_mutex_data(,vm_object_cached_lock_data
)
264 #define vm_object_cache_lock() \
265 mutex_lock(&vm_object_cached_lock_data)
266 #define vm_object_cache_lock_try() \
267 mutex_try(&vm_object_cached_lock_data)
268 #define vm_object_cache_unlock() \
269 mutex_unlock(&vm_object_cached_lock_data)
271 #define VM_OBJECT_HASH_COUNT 1024
272 static queue_head_t vm_object_hashtable
[VM_OBJECT_HASH_COUNT
];
273 static struct zone
*vm_object_hash_zone
;
275 struct vm_object_hash_entry
{
276 queue_chain_t hash_link
; /* hash chain link */
277 memory_object_t pager
; /* pager we represent */
278 vm_object_t object
; /* corresponding object */
279 boolean_t waiting
; /* someone waiting for
283 typedef struct vm_object_hash_entry
*vm_object_hash_entry_t
;
284 #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0)
286 #define VM_OBJECT_HASH_SHIFT 8
287 #define vm_object_hash(pager) \
288 ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)
290 void vm_object_hash_entry_free(
291 vm_object_hash_entry_t entry
);
293 static void vm_object_reap(vm_object_t object
);
294 static void vm_object_reap_async(vm_object_t object
);
295 static void vm_object_reaper_thread(void);
296 static queue_head_t vm_object_reaper_queue
; /* protected by vm_object_cache_lock() */
297 unsigned int vm_object_reap_count
= 0;
298 unsigned int vm_object_reap_count_async
= 0;
301 * vm_object_hash_lookup looks up a pager in the hashtable
302 * and returns the corresponding entry, with optional removal.
305 static vm_object_hash_entry_t
306 vm_object_hash_lookup(
307 memory_object_t pager
,
308 boolean_t remove_entry
)
310 register queue_t bucket
;
311 register vm_object_hash_entry_t entry
;
313 bucket
= &vm_object_hashtable
[vm_object_hash(pager
)];
315 entry
= (vm_object_hash_entry_t
)queue_first(bucket
);
316 while (!queue_end(bucket
, (queue_entry_t
)entry
)) {
317 if (entry
->pager
== pager
&& !remove_entry
)
319 else if (entry
->pager
== pager
) {
320 queue_remove(bucket
, entry
,
321 vm_object_hash_entry_t
, hash_link
);
325 entry
= (vm_object_hash_entry_t
)queue_next(&entry
->hash_link
);
328 return(VM_OBJECT_HASH_ENTRY_NULL
);
332 * vm_object_hash_enter enters the specified
333 * pager / cache object association in the hashtable.
337 vm_object_hash_insert(
338 vm_object_hash_entry_t entry
)
340 register queue_t bucket
;
342 bucket
= &vm_object_hashtable
[vm_object_hash(entry
->pager
)];
344 queue_enter(bucket
, entry
, vm_object_hash_entry_t
, hash_link
);
347 static vm_object_hash_entry_t
348 vm_object_hash_entry_alloc(
349 memory_object_t pager
)
351 vm_object_hash_entry_t entry
;
353 entry
= (vm_object_hash_entry_t
)zalloc(vm_object_hash_zone
);
354 entry
->pager
= pager
;
355 entry
->object
= VM_OBJECT_NULL
;
356 entry
->waiting
= FALSE
;
362 vm_object_hash_entry_free(
363 vm_object_hash_entry_t entry
)
365 zfree(vm_object_hash_zone
, entry
);
369 * vm_object_allocate:
371 * Returns a new object with the given size.
374 __private_extern__
void
376 vm_object_size_t size
,
380 "vm_object_allocate, object 0x%X size 0x%X\n",
381 (integer_t
)object
, size
, 0,0,0);
383 *object
= vm_object_template
;
384 queue_init(&object
->memq
);
385 queue_init(&object
->msr_q
);
387 queue_init(&object
->uplq
);
388 #endif /* UPL_DEBUG */
389 vm_object_lock_init(object
);
393 __private_extern__ vm_object_t
395 vm_object_size_t size
)
397 register vm_object_t object
;
399 object
= (vm_object_t
) zalloc(vm_object_zone
);
401 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
403 if (object
!= VM_OBJECT_NULL
)
404 _vm_object_allocate(size
, object
);
410 lck_grp_t vm_object_lck_grp
;
411 lck_grp_attr_t vm_object_lck_grp_attr
;
412 lck_attr_t vm_object_lck_attr
;
413 lck_attr_t kernel_object_lck_attr
;
416 * vm_object_bootstrap:
418 * Initialize the VM objects module.
420 __private_extern__
void
421 vm_object_bootstrap(void)
425 vm_object_zone
= zinit((vm_size_t
) sizeof(struct vm_object
),
426 round_page_32(512*1024),
427 round_page_32(12*1024),
430 queue_init(&vm_object_reaper_queue
);
431 queue_init(&vm_object_cached_list
);
432 mutex_init(&vm_object_cached_lock_data
, 0);
434 vm_object_hash_zone
=
435 zinit((vm_size_t
) sizeof (struct vm_object_hash_entry
),
436 round_page_32(512*1024),
437 round_page_32(12*1024),
438 "vm object hash entries");
440 for (i
= 0; i
< VM_OBJECT_HASH_COUNT
; i
++)
441 queue_init(&vm_object_hashtable
[i
]);
443 vm_object_init_lck_grp();
446 * Fill in a template object, for quick initialization
449 /* memq; Lock; init after allocation */
450 vm_object_template
.memq
.prev
= NULL
;
451 vm_object_template
.memq
.next
= NULL
;
454 * We can't call vm_object_lock_init() here because that will
455 * allocate some memory and VM is not fully initialized yet.
456 * The lock will be initialized for each allocate object in
457 * _vm_object_allocate(), so we don't need to initialize it in
458 * the vm_object_template.
460 vm_object_lock_init(&vm_object_template
);
462 vm_object_template
.size
= 0;
463 vm_object_template
.memq_hint
= VM_PAGE_NULL
;
464 vm_object_template
.ref_count
= 1;
466 vm_object_template
.res_count
= 1;
467 #endif /* TASK_SWAPPER */
468 vm_object_template
.resident_page_count
= 0;
469 vm_object_template
.copy
= VM_OBJECT_NULL
;
470 vm_object_template
.shadow
= VM_OBJECT_NULL
;
471 vm_object_template
.shadow_offset
= (vm_object_offset_t
) 0;
472 vm_object_template
.pager
= MEMORY_OBJECT_NULL
;
473 vm_object_template
.paging_offset
= 0;
474 vm_object_template
.pager_control
= MEMORY_OBJECT_CONTROL_NULL
;
475 vm_object_template
.copy_strategy
= MEMORY_OBJECT_COPY_SYMMETRIC
;
476 vm_object_template
.paging_in_progress
= 0;
478 /* Begin bitfields */
479 vm_object_template
.all_wanted
= 0; /* all bits FALSE */
480 vm_object_template
.pager_created
= FALSE
;
481 vm_object_template
.pager_initialized
= FALSE
;
482 vm_object_template
.pager_ready
= FALSE
;
483 vm_object_template
.pager_trusted
= FALSE
;
484 vm_object_template
.can_persist
= FALSE
;
485 vm_object_template
.internal
= TRUE
;
486 vm_object_template
.temporary
= TRUE
;
487 vm_object_template
.private = FALSE
;
488 vm_object_template
.pageout
= FALSE
;
489 vm_object_template
.alive
= TRUE
;
490 vm_object_template
.purgable
= VM_PURGABLE_DENY
;
491 vm_object_template
.shadowed
= FALSE
;
492 vm_object_template
.silent_overwrite
= FALSE
;
493 vm_object_template
.advisory_pageout
= FALSE
;
494 vm_object_template
.true_share
= FALSE
;
495 vm_object_template
.terminating
= FALSE
;
496 vm_object_template
.named
= FALSE
;
497 vm_object_template
.shadow_severed
= FALSE
;
498 vm_object_template
.phys_contiguous
= FALSE
;
499 vm_object_template
.nophyscache
= FALSE
;
502 vm_object_template
.cached_list
.prev
= NULL
;
503 vm_object_template
.cached_list
.next
= NULL
;
504 vm_object_template
.msr_q
.prev
= NULL
;
505 vm_object_template
.msr_q
.next
= NULL
;
507 vm_object_template
.last_alloc
= (vm_object_offset_t
) 0;
508 vm_object_template
.sequential
= (vm_object_offset_t
) 0;
509 vm_object_template
.pages_created
= 0;
510 vm_object_template
.pages_used
= 0;
513 vm_object_template
.existence_map
= VM_EXTERNAL_NULL
;
514 #endif /* MACH_PAGEMAP */
515 vm_object_template
.cow_hint
= ~(vm_offset_t
)0;
517 vm_object_template
.paging_object
= VM_OBJECT_NULL
;
518 #endif /* MACH_ASSERT */
520 /* cache bitfields */
521 vm_object_template
.wimg_bits
= VM_WIMG_DEFAULT
;
522 vm_object_template
.code_signed
= FALSE
;
523 vm_object_template
.not_in_use
= 0;
525 vm_object_template
.uplq
.prev
= NULL
;
526 vm_object_template
.uplq
.next
= NULL
;
527 #endif /* UPL_DEBUG */
529 bzero(&vm_object_template
.pip_holders
,
530 sizeof (vm_object_template
.pip_holders
));
531 #endif /* VM_PIP_DEBUG */
533 vm_object_template
.objq
.next
=NULL
;
534 vm_object_template
.objq
.prev
=NULL
;
538 * Initialize the "kernel object"
541 kernel_object
= &kernel_object_store
;
544 * Note that in the following size specifications, we need to add 1 because
545 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
549 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
552 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
555 kernel_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
558 * Initialize the "submap object". Make it as large as the
559 * kernel object so that no limit is imposed on submap sizes.
562 vm_submap_object
= &vm_submap_object_store
;
564 _vm_object_allocate((vm_last_addr
- VM_MIN_KERNEL_ADDRESS
) + 1,
567 _vm_object_allocate((VM_MAX_KERNEL_ADDRESS
- VM_MIN_KERNEL_ADDRESS
) + 1,
570 vm_submap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
573 * Create an "extra" reference to this object so that we never
574 * try to deallocate it; zfree doesn't like to be called with
577 vm_object_reference(vm_submap_object
);
580 vm_external_module_initialize();
581 #endif /* MACH_PAGEMAP */
585 vm_object_reaper_init(void)
590 kr
= kernel_thread_start_priority(
591 (thread_continue_t
) vm_object_reaper_thread
,
595 if (kr
!= KERN_SUCCESS
) {
596 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr
);
598 thread_deallocate(thread
);
601 __private_extern__
void
605 * Finish initializing the kernel object.
610 __private_extern__
void
611 vm_object_init_lck_grp(void)
614 * initialze the vm_object lock world
616 lck_grp_attr_setdefault(&vm_object_lck_grp_attr
);
617 lck_grp_init(&vm_object_lck_grp
, "vm_object", &vm_object_lck_grp_attr
);
618 lck_attr_setdefault(&vm_object_lck_attr
);
619 lck_attr_setdefault(&kernel_object_lck_attr
);
620 lck_attr_cleardebug(&kernel_object_lck_attr
);
624 #define MIGHT_NOT_CACHE_SHADOWS 1
625 #if MIGHT_NOT_CACHE_SHADOWS
626 static int cache_shadows
= TRUE
;
627 #endif /* MIGHT_NOT_CACHE_SHADOWS */
630 * vm_object_deallocate:
632 * Release a reference to the specified object,
633 * gained either through a vm_object_allocate
634 * or a vm_object_reference call. When all references
635 * are gone, storage associated with this object
636 * may be relinquished.
638 * No object may be locked.
640 unsigned long vm_object_deallocate_shared_successes
= 0;
641 unsigned long vm_object_deallocate_shared_failures
= 0;
642 unsigned long vm_object_deallocate_shared_swap_failures
= 0;
643 __private_extern__
void
644 vm_object_deallocate(
645 register vm_object_t object
)
647 boolean_t retry_cache_trim
= FALSE
;
648 vm_object_t shadow
= VM_OBJECT_NULL
;
649 uint32_t try_failed_count
= 0;
651 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
652 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
654 if (object
== VM_OBJECT_NULL
)
657 if (object
== kernel_object
) {
658 vm_object_lock(kernel_object
);
659 kernel_object
->ref_count
--;
660 if (kernel_object
->ref_count
== 0) {
661 panic("vm_object_deallocate: losing kernel_object\n");
663 vm_object_unlock(kernel_object
);
667 if (object
->ref_count
> 2 ||
668 (!object
->named
&& object
->ref_count
> 1)) {
669 UInt32 original_ref_count
;
670 volatile UInt32
*ref_count_p
;
674 * The object currently looks like it is not being
675 * kept alive solely by the reference we're about to release.
676 * Let's try and release our reference without taking
677 * all the locks we would need if we had to terminate the
678 * object (cache lock + exclusive object lock).
679 * Lock the object "shared" to make sure we don't race with
680 * anyone holding it "exclusive".
682 vm_object_lock_shared(object
);
683 ref_count_p
= (volatile UInt32
*) &object
->ref_count
;
684 original_ref_count
= object
->ref_count
;
686 * Test again as "ref_count" could have changed.
687 * "named" shouldn't change.
689 if (original_ref_count
> 2 ||
690 (!object
->named
&& original_ref_count
> 1)) {
691 atomic_swap
= OSCompareAndSwap(
693 original_ref_count
- 1,
694 (UInt32
*) &object
->ref_count
);
695 if (atomic_swap
== FALSE
) {
696 vm_object_deallocate_shared_swap_failures
++;
702 vm_object_unlock(object
);
705 /* ref_count was updated atomically ! */
706 vm_object_deallocate_shared_successes
++;
711 * Someone else updated the ref_count at the same
712 * time and we lost the race. Fall back to the usual
713 * slow but safe path...
715 vm_object_deallocate_shared_failures
++;
718 while (object
!= VM_OBJECT_NULL
) {
721 * The cache holds a reference (uncounted) to
722 * the object; we must lock it before removing
726 vm_object_cache_lock();
729 * if we try to take a regular lock here
730 * we risk deadlocking against someone
731 * holding a lock on this object while
732 * trying to vm_object_deallocate a different
735 if (vm_object_lock_try(object
))
737 vm_object_cache_unlock();
740 mutex_pause(try_failed_count
); /* wait a bit */
742 assert(object
->ref_count
> 0);
745 * If the object has a named reference, and only
746 * that reference would remain, inform the pager
747 * about the last "mapping" reference going away.
749 if ((object
->ref_count
== 2) && (object
->named
)) {
750 memory_object_t pager
= object
->pager
;
752 /* Notify the Pager that there are no */
753 /* more mappers for this object */
755 if (pager
!= MEMORY_OBJECT_NULL
) {
756 vm_object_unlock(object
);
757 vm_object_cache_unlock();
759 memory_object_unmap(pager
);
761 try_failed_count
= 0;
763 vm_object_cache_lock();
766 * if we try to take a regular lock here
767 * we risk deadlocking against someone
768 * holding a lock on this object while
769 * trying to vm_object_deallocate a different
772 if (vm_object_lock_try(object
))
774 vm_object_cache_unlock();
777 mutex_pause(try_failed_count
); /* wait a bit */
779 assert(object
->ref_count
> 0);
784 * Lose the reference. If other references
785 * remain, then we are done, unless we need
786 * to retry a cache trim.
787 * If it is the last reference, then keep it
788 * until any pending initialization is completed.
791 /* if the object is terminating, it cannot go into */
792 /* the cache and we obviously should not call */
793 /* terminate again. */
795 if ((object
->ref_count
> 1) || object
->terminating
) {
796 vm_object_lock_assert_exclusive(object
);
798 vm_object_res_deallocate(object
);
799 vm_object_cache_unlock();
801 if (object
->ref_count
== 1 &&
802 object
->shadow
!= VM_OBJECT_NULL
) {
804 * There's only one reference left on this
805 * VM object. We can't tell if it's a valid
806 * one (from a mapping for example) or if this
807 * object is just part of a possibly stale and
808 * useless shadow chain.
809 * We would like to try and collapse it into
810 * its parent, but we don't have any pointers
811 * back to this parent object.
812 * But we can try and collapse this object with
813 * its own shadows, in case these are useless
815 * We can't bypass this object though, since we
816 * don't know if this last reference on it is
819 vm_object_collapse(object
, 0, FALSE
);
822 vm_object_unlock(object
);
823 if (retry_cache_trim
&&
824 ((object
= vm_object_cache_trim(TRUE
)) !=
832 * We have to wait for initialization
833 * before destroying or caching the object.
836 if (object
->pager_created
&& ! object
->pager_initialized
) {
837 assert(! object
->can_persist
);
838 vm_object_assert_wait(object
,
839 VM_OBJECT_EVENT_INITIALIZED
,
841 vm_object_unlock(object
);
842 vm_object_cache_unlock();
843 thread_block(THREAD_CONTINUE_NULL
);
848 * If this object can persist, then enter it in
849 * the cache. Otherwise, terminate it.
851 * NOTE: Only permanent objects are cached, and
852 * permanent objects cannot have shadows. This
853 * affects the residence counting logic in a minor
854 * way (can do it in-line, mostly).
857 if ((object
->can_persist
) && (object
->alive
)) {
859 * Now it is safe to decrement reference count,
860 * and to return if reference count is > 0.
862 vm_object_lock_assert_exclusive(object
);
863 if (--object
->ref_count
> 0) {
864 vm_object_res_deallocate(object
);
865 vm_object_unlock(object
);
866 vm_object_cache_unlock();
867 if (retry_cache_trim
&&
868 ((object
= vm_object_cache_trim(TRUE
)) !=
875 #if MIGHT_NOT_CACHE_SHADOWS
877 * Remove shadow now if we don't
878 * want to cache shadows.
880 if (! cache_shadows
) {
881 shadow
= object
->shadow
;
882 object
->shadow
= VM_OBJECT_NULL
;
884 #endif /* MIGHT_NOT_CACHE_SHADOWS */
887 * Enter the object onto the queue of
888 * cached objects, and deactivate
891 assert(object
->shadow
== VM_OBJECT_NULL
);
892 VM_OBJ_RES_DECR(object
);
894 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n",
896 (integer_t
)vm_object_cached_list
.next
,
897 (integer_t
)vm_object_cached_list
.prev
,0,0);
899 vm_object_cached_count
++;
900 if (vm_object_cached_count
> vm_object_cached_high
)
901 vm_object_cached_high
= vm_object_cached_count
;
902 queue_enter(&vm_object_cached_list
, object
,
903 vm_object_t
, cached_list
);
904 vm_object_cache_unlock();
905 vm_object_deactivate_all_pages(object
);
906 vm_object_unlock(object
);
908 #if MIGHT_NOT_CACHE_SHADOWS
910 * If we have a shadow that we need
911 * to deallocate, do so now, remembering
912 * to trim the cache later.
914 if (! cache_shadows
&& shadow
!= VM_OBJECT_NULL
) {
916 retry_cache_trim
= TRUE
;
919 #endif /* MIGHT_NOT_CACHE_SHADOWS */
922 * Trim the cache. If the cache trim
923 * returns with a shadow for us to deallocate,
924 * then remember to retry the cache trim
925 * when we are done deallocating the shadow.
926 * Otherwise, we are done.
929 object
= vm_object_cache_trim(TRUE
);
930 if (object
== VM_OBJECT_NULL
) {
933 retry_cache_trim
= TRUE
;
937 * This object is not cachable; terminate it.
940 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n",
941 (integer_t
)object
, object
->resident_page_count
,
942 object
->paging_in_progress
,
943 (void *)current_thread(),object
->ref_count
);
945 VM_OBJ_RES_DECR(object
); /* XXX ? */
947 * Terminate this object. If it had a shadow,
948 * then deallocate it; otherwise, if we need
949 * to retry a cache trim, do so now; otherwise,
950 * we are done. "pageout" objects have a shadow,
951 * but maintain a "paging reference" rather than
952 * a normal reference.
954 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
955 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
958 if (shadow
!= VM_OBJECT_NULL
) {
962 if (retry_cache_trim
&&
963 ((object
= vm_object_cache_trim(TRUE
)) !=
970 assert(! retry_cache_trim
);
974 * Check to see whether we really need to trim
975 * down the cache. If so, remove an object from
976 * the cache, terminate it, and repeat.
978 * Called with, and returns with, cache lock unlocked.
981 vm_object_cache_trim(
982 boolean_t called_from_vm_object_deallocate
)
984 register vm_object_t object
= VM_OBJECT_NULL
;
990 * If we no longer need to trim the cache,
994 vm_object_cache_lock();
995 if (vm_object_cached_count
<= vm_object_cached_max
) {
996 vm_object_cache_unlock();
997 return VM_OBJECT_NULL
;
1001 * We must trim down the cache, so remove
1002 * the first object in the cache.
1005 "vm_object_cache_trim: removing from front of cache (%x, %x)\n",
1006 (integer_t
)vm_object_cached_list
.next
,
1007 (integer_t
)vm_object_cached_list
.prev
, 0, 0, 0);
1009 object
= (vm_object_t
) queue_first(&vm_object_cached_list
);
1010 if(object
== (vm_object_t
) &vm_object_cached_list
) {
1011 /* something's wrong with the calling parameter or */
1012 /* the value of vm_object_cached_count, just fix */
1014 if(vm_object_cached_max
< 0)
1015 vm_object_cached_max
= 0;
1016 vm_object_cached_count
= 0;
1017 vm_object_cache_unlock();
1018 return VM_OBJECT_NULL
;
1020 vm_object_lock(object
);
1021 queue_remove(&vm_object_cached_list
, object
, vm_object_t
,
1023 vm_object_cached_count
--;
1026 * Since this object is in the cache, we know
1027 * that it is initialized and has no references.
1028 * Take a reference to avoid recursive deallocations.
1031 assert(object
->pager_initialized
);
1032 assert(object
->ref_count
== 0);
1033 vm_object_lock_assert_exclusive(object
);
1034 object
->ref_count
++;
1037 * Terminate the object.
1038 * If the object had a shadow, we let vm_object_deallocate
1039 * deallocate it. "pageout" objects have a shadow, but
1040 * maintain a "paging reference" rather than a normal
1042 * (We are careful here to limit recursion.)
1044 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
1045 if(vm_object_terminate(object
) != KERN_SUCCESS
)
1047 if (shadow
!= VM_OBJECT_NULL
) {
1048 if (called_from_vm_object_deallocate
) {
1051 vm_object_deallocate(shadow
);
1057 #define VM_OBJ_TERM_STATS DEBUG
1058 #if VM_OBJ_TERM_STATS
1059 uint32_t vm_object_terminate_pages_freed
= 0;
1060 uint32_t vm_object_terminate_pages_removed
= 0;
1061 uint32_t vm_object_terminate_batches
= 0;
1062 uint32_t vm_object_terminate_biggest_batch
= 0;
1063 #endif /* VM_OBJ_TERM_STATS */
1065 #define V_O_T_MAX_BATCH 256
1068 * Routine: vm_object_terminate
1070 * Free all resources associated with a vm_object.
1071 * In/out conditions:
1072 * Upon entry, the object must be locked,
1073 * and the object must have exactly one reference.
1075 * The shadow object reference is left alone.
1077 * The object must be unlocked if its found that pages
1078 * must be flushed to a backing object. If someone
1079 * manages to map the object while it is being flushed
1080 * the object is returned unlocked and unchanged. Otherwise,
1081 * upon exit, the cache will be unlocked, and the
1082 * object will cease to exist.
1084 static kern_return_t
1085 vm_object_terminate(
1086 register vm_object_t object
)
1088 register vm_page_t p
;
1089 vm_object_t shadow_object
;
1090 vm_page_t local_free_q
;
1092 #if VM_OBJ_TERM_STATS
1093 uint32_t local_free_count
;
1094 uint32_t pages_removed
;
1095 #endif /* VM_OBJ_TERM_STATS */
1097 #if VM_OBJ_TERM_STATS
1098 #define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) \
1100 if (_pages_removed) { \
1101 hw_atomic_add(&vm_object_terminate_batches, 1); \
1102 hw_atomic_add(&vm_object_terminate_pages_removed, \
1104 hw_atomic_add(&vm_object_terminate_pages_freed, \
1105 _local_free_count); \
1106 if (_local_free_count > \
1107 vm_object_terminate_biggest_batch) { \
1108 vm_object_terminate_biggest_batch = \
1109 _local_free_count; \
1111 _local_free_count = 0; \
1114 #else /* VM_OBJ_TERM_STATS */
1115 #define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count)
1116 #endif /* VM_OBJ_TERM_STATS */
1118 #define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \
1120 VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \
1121 if (_local_free_q) { \
1122 vm_page_free_list(_local_free_q); \
1123 _local_free_q = VM_PAGE_NULL; \
1129 XPR(XPR_VM_OBJECT
, "vm_object_terminate, object 0x%X ref %d\n",
1130 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
1132 local_free_q
= VM_PAGE_NULL
;
1133 #if VM_OBJ_TERM_STATS
1134 local_free_count
= 0;
1136 #endif /* VM_OBJ_TERM_STATS */
1138 if (!object
->pageout
&& (!object
->temporary
|| object
->can_persist
)
1139 && (object
->pager
!= NULL
|| object
->shadow_severed
)) {
1140 vm_object_cache_unlock();
1141 loop_count
= V_O_T_MAX_BATCH
;
1142 vm_page_lock_queues();
1143 while (!queue_empty(&object
->memq
)) {
1144 if (--loop_count
== 0) {
1146 * Free the pages we've reclaimed so far and
1147 * take a little break to avoid hogging
1148 * the page queues lock too long.
1150 VM_OBJ_TERM_FREELIST(pages_removed
,
1153 mutex_yield(&vm_page_queue_lock
);
1154 loop_count
= V_O_T_MAX_BATCH
;
1157 * Clear pager_trusted bit so that the pages get yanked
1158 * out of the object instead of cleaned in place. This
1159 * prevents a deadlock in XMM and makes more sense anyway.
1161 object
->pager_trusted
= FALSE
;
1163 p
= (vm_page_t
) queue_first(&object
->memq
);
1167 if (p
->busy
|| p
->cleaning
) {
1168 if(p
->cleaning
|| p
->absent
) {
1169 /* free the pages reclaimed so far */
1170 VM_OBJ_TERM_FREELIST(pages_removed
,
1173 vm_page_unlock_queues();
1174 vm_object_paging_wait(object
, THREAD_UNINT
);
1175 vm_page_lock_queues();
1178 panic("vm_object_terminate.3 %p %p", object
, p
);
1183 VM_PAGE_QUEUES_REMOVE(p
);
1184 #if VM_OBJ_TERM_STATS
1186 #endif /* VM_OBJ_TERM_STATS */
1188 if (p
->absent
|| p
->private) {
1191 * For private pages, VM_PAGE_FREE just
1192 * leaves the page structure around for
1193 * its owner to clean up. For absent
1194 * pages, the structure is returned to
1195 * the appropriate pool.
1201 if (p
->fictitious
) {
1202 if (p
->phys_page
== vm_page_guard_addr
) {
1205 panic("vm_object_terminate.4 %p %p", object
, p
);
1208 if (!p
->dirty
&& p
->wpmapped
)
1209 p
->dirty
= pmap_is_modified(p
->phys_page
);
1211 if ((p
->dirty
|| p
->precious
) && !p
->error
&& object
->alive
) {
1212 /* free the pages reclaimed so far */
1213 VM_OBJ_TERM_FREELIST(pages_removed
,
1216 vm_page_unlock_queues();
1217 vm_pageout_cluster(p
); /* flush page */
1218 vm_object_paging_wait(object
, THREAD_UNINT
);
1220 "vm_object_terminate restart, object 0x%X ref %d\n",
1221 (integer_t
)object
, object
->ref_count
, 0, 0, 0);
1222 vm_page_lock_queues();
1226 * Add this page to our list of reclaimed pages,
1227 * to be freed later.
1229 vm_page_free_prepare(p
);
1230 p
->pageq
.next
= (queue_entry_t
) local_free_q
;
1232 #if VM_OBJ_TERM_STATS
1234 #endif /* VM_OBJ_TERM_STATS */
1239 * Free the remaining reclaimed pages.
1241 VM_OBJ_TERM_FREELIST(pages_removed
,
1244 vm_page_unlock_queues();
1245 vm_object_unlock(object
);
1246 vm_object_cache_lock();
1247 vm_object_lock(object
);
1251 * Make sure the object isn't already being terminated
1253 if(object
->terminating
) {
1254 vm_object_lock_assert_exclusive(object
);
1255 object
->ref_count
--;
1256 assert(object
->ref_count
> 0);
1257 vm_object_cache_unlock();
1258 vm_object_unlock(object
);
1259 return KERN_FAILURE
;
1263 * Did somebody get a reference to the object while we were
1266 if(object
->ref_count
!= 1) {
1267 vm_object_lock_assert_exclusive(object
);
1268 object
->ref_count
--;
1269 assert(object
->ref_count
> 0);
1270 vm_object_res_deallocate(object
);
1271 vm_object_cache_unlock();
1272 vm_object_unlock(object
);
1273 return KERN_FAILURE
;
1277 * Make sure no one can look us up now.
1280 object
->terminating
= TRUE
;
1281 object
->alive
= FALSE
;
1282 vm_object_remove(object
);
1285 * Detach the object from its shadow if we are the shadow's
1286 * copy. The reference we hold on the shadow must be dropped
1289 if (((shadow_object
= object
->shadow
) != VM_OBJECT_NULL
) &&
1290 !(object
->pageout
)) {
1291 vm_object_lock(shadow_object
);
1292 if (shadow_object
->copy
== object
)
1293 shadow_object
->copy
= VM_OBJECT_NULL
;
1294 vm_object_unlock(shadow_object
);
1297 if (object
->paging_in_progress
!= 0) {
1299 * There are still some paging_in_progress references
1300 * on this object, meaning that there are some paging
1301 * or other I/O operations in progress for this VM object.
1302 * Such operations take some paging_in_progress references
1303 * up front to ensure that the object doesn't go away, but
1304 * they may also need to acquire a reference on the VM object,
1305 * to map it in kernel space, for example. That means that
1306 * they may end up releasing the last reference on the VM
1307 * object, triggering its termination, while still holding
1308 * paging_in_progress references. Waiting for these
1309 * pending paging_in_progress references to go away here would
1312 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1313 * complete the VM object termination if it still holds
1314 * paging_in_progress references at this point.
1316 * No new paging_in_progress should appear now that the
1317 * VM object is "terminating" and not "alive".
1319 vm_object_reap_async(object
);
1320 vm_object_cache_unlock();
1321 vm_object_unlock(object
);
1323 * Return KERN_FAILURE to let the caller know that we
1324 * haven't completed the termination and it can't drop this
1325 * object's reference on its shadow object yet.
1326 * The reaper thread will take care of that once it has
1327 * completed this object's termination.
1329 return KERN_FAILURE
;
1332 /* complete the VM object termination */
1333 vm_object_reap(object
);
1334 object
= VM_OBJECT_NULL
;
1335 /* cache lock and object lock were released by vm_object_reap() */
1338 * KERN_SUCCESS means that this object has been terminated
1339 * and no longer needs its shadow object but still holds a
1341 * The caller is responsible for dropping that reference.
1342 * We can't call vm_object_deallocate() here because that
1343 * would create a recursion.
1345 return KERN_SUCCESS
;
1351 * Complete the termination of a VM object after it's been marked
1352 * as "terminating" and "!alive" by vm_object_terminate().
1354 * The VM object cache and the VM object must be locked by caller.
1355 * The locks will be released on return and the VM object is no longer valid.
1361 memory_object_t pager
;
1363 vm_page_t local_free_q
;
1365 #if VM_OBJ_TERM_STATS
1366 uint32_t local_free_count
;
1367 #endif /* VM_OBJ_TERM_STATS */
1370 mutex_assert(&vm_object_cached_lock_data
, MA_OWNED
);
1372 vm_object_lock_assert_exclusive(object
);
1373 assert(object
->paging_in_progress
== 0);
1375 vm_object_reap_count
++;
1377 local_free_q
= VM_PAGE_NULL
;
1378 #if VM_OBJ_TERM_STATS
1379 local_free_count
= 0;
1380 #endif /* VM_OBJ_TERM_STATS */
1382 pager
= object
->pager
;
1383 object
->pager
= MEMORY_OBJECT_NULL
;
1385 if (pager
!= MEMORY_OBJECT_NULL
)
1386 memory_object_control_disable(object
->pager_control
);
1387 vm_object_cache_unlock();
1389 vm_object_lock_assert_exclusive(object
);
1390 object
->ref_count
--;
1392 assert(object
->res_count
== 0);
1393 #endif /* TASK_SWAPPER */
1395 assert (object
->ref_count
== 0);
1397 /* remove from purgeable queue if it's on */
1398 if (object
->objq
.next
|| object
->objq
.prev
) {
1399 purgeable_q_t queue
= vm_purgeable_object_remove(object
);
1402 /* Must take page lock for this - using it to protect token queue */
1403 vm_page_lock_queues();
1404 vm_purgeable_token_delete_first(queue
);
1406 assert(queue
->debug_count_objects
>=0);
1407 vm_page_unlock_queues();
1411 * Clean or free the pages, as appropriate.
1412 * It is possible for us to find busy/absent pages,
1413 * if some faults on this object were aborted.
1415 if (object
->pageout
) {
1416 assert(object
->shadow
!= VM_OBJECT_NULL
);
1418 vm_pageout_object_terminate(object
);
1420 } else if ((object
->temporary
&& !object
->can_persist
) ||
1421 (pager
== MEMORY_OBJECT_NULL
)) {
1422 loop_count
= V_O_T_MAX_BATCH
;
1423 vm_page_lock_queues();
1424 while (!queue_empty(&object
->memq
)) {
1425 if (--loop_count
== 0) {
1427 * Free the pages we reclaimed so far
1428 * and take a little break to avoid
1429 * hogging the page queue lock too long
1431 VM_OBJ_TERM_FREELIST(local_free_count
,
1434 mutex_yield(&vm_page_queue_lock
);
1435 loop_count
= V_O_T_MAX_BATCH
;
1437 p
= (vm_page_t
) queue_first(&object
->memq
);
1439 vm_page_free_prepare(p
);
1441 assert(p
->pageq
.next
== NULL
&& p
->pageq
.prev
== NULL
);
1442 p
->pageq
.next
= (queue_entry_t
) local_free_q
;
1444 #if VM_OBJ_TERM_STATS
1446 #endif /* VM_OBJ_TERM_STATS */
1449 * Free the remaining reclaimed pages
1451 VM_OBJ_TERM_FREELIST(local_free_count
,
1454 vm_page_unlock_queues();
1455 } else if (!queue_empty(&object
->memq
)) {
1456 panic("vm_object_reap: queue just emptied isn't");
1459 assert(object
->paging_in_progress
== 0);
1460 assert(object
->ref_count
== 0);
1463 * If the pager has not already been released by
1464 * vm_object_destroy, we need to terminate it and
1465 * release our reference to it here.
1467 if (pager
!= MEMORY_OBJECT_NULL
) {
1468 vm_object_unlock(object
);
1469 vm_object_release_pager(pager
);
1470 vm_object_lock(object
);
1473 /* kick off anyone waiting on terminating */
1474 object
->terminating
= FALSE
;
1475 vm_object_paging_begin(object
);
1476 vm_object_paging_end(object
);
1477 vm_object_unlock(object
);
1480 vm_external_destroy(object
->existence_map
, object
->size
);
1481 #endif /* MACH_PAGEMAP */
1483 object
->shadow
= VM_OBJECT_NULL
;
1485 vm_object_lock_destroy(object
);
1487 * Free the space for the object.
1489 zfree(vm_object_zone
, object
);
1490 object
= VM_OBJECT_NULL
;
1494 vm_object_reap_async(
1498 mutex_assert(&vm_object_cached_lock_data
, MA_OWNED
);
1500 vm_object_lock_assert_exclusive(object
);
1502 vm_object_reap_count_async
++;
1504 /* enqueue the VM object... */
1505 queue_enter(&vm_object_reaper_queue
, object
,
1506 vm_object_t
, cached_list
);
1507 /* ... and wake up the reaper thread */
1508 thread_wakeup((event_t
) &vm_object_reaper_queue
);
1512 vm_object_reaper_thread(void)
1514 vm_object_t object
, shadow_object
;
1516 vm_object_cache_lock();
1518 while (!queue_empty(&vm_object_reaper_queue
)) {
1519 queue_remove_first(&vm_object_reaper_queue
,
1523 vm_object_lock(object
);
1524 assert(object
->terminating
);
1525 assert(!object
->alive
);
1528 * The pageout daemon might be playing with our pages.
1529 * Now that the object is dead, it won't touch any more
1530 * pages, but some pages might already be on their way out.
1531 * Hence, we wait until the active paging activities have
1532 * ceased before we break the association with the pager
1535 while (object
->paging_in_progress
!= 0) {
1536 vm_object_cache_unlock();
1537 vm_object_wait(object
,
1538 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
1540 vm_object_cache_lock();
1541 vm_object_lock(object
);
1545 object
->pageout
? VM_OBJECT_NULL
: object
->shadow
;
1547 vm_object_reap(object
);
1548 /* cache is unlocked and object is no longer valid */
1549 object
= VM_OBJECT_NULL
;
1551 if (shadow_object
!= VM_OBJECT_NULL
) {
1553 * Drop the reference "object" was holding on
1554 * its shadow object.
1556 vm_object_deallocate(shadow_object
);
1557 shadow_object
= VM_OBJECT_NULL
;
1560 vm_object_cache_lock();
1563 /* wait for more work... */
1564 assert_wait((event_t
) &vm_object_reaper_queue
, THREAD_UNINT
);
1565 vm_object_cache_unlock();
1566 thread_block((thread_continue_t
) vm_object_reaper_thread
);
1571 * Routine: vm_object_pager_wakeup
1572 * Purpose: Wake up anyone waiting for termination of a pager.
1576 vm_object_pager_wakeup(
1577 memory_object_t pager
)
1579 vm_object_hash_entry_t entry
;
1580 boolean_t waiting
= FALSE
;
1583 * If anyone was waiting for the memory_object_terminate
1584 * to be queued, wake them up now.
1586 vm_object_cache_lock();
1587 entry
= vm_object_hash_lookup(pager
, TRUE
);
1588 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
1589 waiting
= entry
->waiting
;
1590 vm_object_cache_unlock();
1591 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
) {
1593 thread_wakeup((event_t
) pager
);
1594 vm_object_hash_entry_free(entry
);
1599 * Routine: vm_object_release_pager
1600 * Purpose: Terminate the pager and, upon completion,
1601 * release our last reference to it.
1602 * just like memory_object_terminate, except
1603 * that we wake up anyone blocked in vm_object_enter
1604 * waiting for termination message to be queued
1605 * before calling memory_object_init.
1608 vm_object_release_pager(
1609 memory_object_t pager
)
1613 * Terminate the pager.
1616 (void) memory_object_terminate(pager
);
1619 * Wakeup anyone waiting for this terminate
1621 vm_object_pager_wakeup(pager
);
1624 * Release reference to pager.
1626 memory_object_deallocate(pager
);
1630 * Routine: vm_object_destroy
1632 * Shut down a VM object, despite the
1633 * presence of address map (or other) references
1639 __unused kern_return_t reason
)
1641 memory_object_t old_pager
;
1643 if (object
== VM_OBJECT_NULL
)
1644 return(KERN_SUCCESS
);
1647 * Remove the pager association immediately.
1649 * This will prevent the memory manager from further
1650 * meddling. [If it wanted to flush data or make
1651 * other changes, it should have done so before performing
1652 * the destroy call.]
1655 vm_object_cache_lock();
1656 vm_object_lock(object
);
1657 object
->can_persist
= FALSE
;
1658 object
->named
= FALSE
;
1659 object
->alive
= FALSE
;
1662 * Rip out the pager from the vm_object now...
1665 vm_object_remove(object
);
1666 old_pager
= object
->pager
;
1667 object
->pager
= MEMORY_OBJECT_NULL
;
1668 if (old_pager
!= MEMORY_OBJECT_NULL
)
1669 memory_object_control_disable(object
->pager_control
);
1670 vm_object_cache_unlock();
1673 * Wait for the existing paging activity (that got
1674 * through before we nulled out the pager) to subside.
1677 vm_object_paging_wait(object
, THREAD_UNINT
);
1678 vm_object_unlock(object
);
1681 * Terminate the object now.
1683 if (old_pager
!= MEMORY_OBJECT_NULL
) {
1684 vm_object_release_pager(old_pager
);
1687 * JMM - Release the caller's reference. This assumes the
1688 * caller had a reference to release, which is a big (but
1689 * currently valid) assumption if this is driven from the
1690 * vnode pager (it is holding a named reference when making
1693 vm_object_deallocate(object
);
1696 return(KERN_SUCCESS
);
1699 #define VM_OBJ_DEACT_ALL_STATS DEBUG
1700 #if VM_OBJ_DEACT_ALL_STATS
1701 uint32_t vm_object_deactivate_all_pages_batches
= 0;
1702 uint32_t vm_object_deactivate_all_pages_pages
= 0;
1703 #endif /* VM_OBJ_DEACT_ALL_STATS */
1705 * vm_object_deactivate_pages
1707 * Deactivate all pages in the specified object. (Keep its pages
1708 * in memory even though it is no longer referenced.)
1710 * The object must be locked.
1713 vm_object_deactivate_all_pages(
1714 register vm_object_t object
)
1716 register vm_page_t p
;
1718 #if VM_OBJ_DEACT_ALL_STATS
1720 #endif /* VM_OBJ_DEACT_ALL_STATS */
1721 #define V_O_D_A_P_MAX_BATCH 256
1723 loop_count
= V_O_D_A_P_MAX_BATCH
;
1724 #if VM_OBJ_DEACT_ALL_STATS
1726 #endif /* VM_OBJ_DEACT_ALL_STATS */
1727 vm_page_lock_queues();
1728 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1729 if (--loop_count
== 0) {
1730 #if VM_OBJ_DEACT_ALL_STATS
1731 hw_atomic_add(&vm_object_deactivate_all_pages_batches
,
1733 hw_atomic_add(&vm_object_deactivate_all_pages_pages
,
1736 #endif /* VM_OBJ_DEACT_ALL_STATS */
1737 mutex_yield(&vm_page_queue_lock
);
1738 loop_count
= V_O_D_A_P_MAX_BATCH
;
1740 if (!p
->busy
&& !p
->throttled
) {
1741 #if VM_OBJ_DEACT_ALL_STATS
1743 #endif /* VM_OBJ_DEACT_ALL_STATS */
1744 vm_page_deactivate(p
);
1747 #if VM_OBJ_DEACT_ALL_STATS
1749 hw_atomic_add(&vm_object_deactivate_all_pages_batches
, 1);
1750 hw_atomic_add(&vm_object_deactivate_all_pages_pages
,
1754 #endif /* VM_OBJ_DEACT_ALL_STATS */
1755 vm_page_unlock_queues();
1758 __private_extern__
void
1759 vm_object_deactivate_pages(
1761 vm_object_offset_t offset
,
1762 vm_object_size_t size
,
1763 boolean_t kill_page
)
1765 vm_object_t orig_object
;
1766 int pages_moved
= 0;
1767 int pages_found
= 0;
1770 * entered with object lock held, acquire a paging reference to
1771 * prevent the memory_object and control ports from
1774 orig_object
= object
;
1777 register vm_page_t m
;
1778 vm_object_offset_t toffset
;
1779 vm_object_size_t tsize
;
1781 vm_object_paging_begin(object
);
1782 vm_page_lock_queues();
1784 for (tsize
= size
, toffset
= offset
; tsize
; tsize
-= PAGE_SIZE
, toffset
+= PAGE_SIZE
) {
1786 if ((m
= vm_page_lookup(object
, toffset
)) != VM_PAGE_NULL
) {
1790 if ((m
->wire_count
== 0) && (!m
->private) && (!m
->gobbled
) && (!m
->busy
)) {
1792 assert(!m
->laundry
);
1794 m
->reference
= FALSE
;
1795 pmap_clear_reference(m
->phys_page
);
1797 if ((kill_page
) && (object
->internal
)) {
1798 m
->precious
= FALSE
;
1800 pmap_clear_modify(m
->phys_page
);
1802 vm_external_state_clr(object
->existence_map
, offset
);
1803 #endif /* MACH_PAGEMAP */
1806 if (!m
->throttled
) {
1807 VM_PAGE_QUEUES_REMOVE(m
);
1809 assert(!m
->laundry
);
1810 assert(m
->object
!= kernel_object
);
1811 assert(m
->pageq
.next
== NULL
&&
1812 m
->pageq
.prev
== NULL
);
1817 m
, vm_page_t
, pageq
);
1818 vm_zf_queue_count
++;
1821 &vm_page_queue_inactive
,
1822 m
, vm_page_t
, pageq
);
1826 if (!m
->fictitious
) {
1827 vm_page_inactive_count
++;
1828 token_new_pagecount
++;
1830 assert(m
->phys_page
== vm_page_fictitious_addr
);
1838 vm_page_unlock_queues();
1839 vm_object_paging_end(object
);
1841 if (object
->shadow
) {
1842 vm_object_t tmp_object
;
1846 offset
+= object
->shadow_offset
;
1848 tmp_object
= object
->shadow
;
1849 vm_object_lock(tmp_object
);
1851 if (object
!= orig_object
)
1852 vm_object_unlock(object
);
1853 object
= tmp_object
;
1857 if (object
!= orig_object
)
1858 vm_object_unlock(object
);
1862 * Routine: vm_object_pmap_protect
1865 * Reduces the permission for all physical
1866 * pages in the specified object range.
1868 * If removing write permission only, it is
1869 * sufficient to protect only the pages in
1870 * the top-level object; only those pages may
1871 * have write permission.
1873 * If removing all access, we must follow the
1874 * shadow chain from the top-level object to
1875 * remove access to all pages in shadowed objects.
1877 * The object must *not* be locked. The object must
1878 * be temporary/internal.
1880 * If pmap is not NULL, this routine assumes that
1881 * the only mappings for the pages are in that
1885 __private_extern__
void
1886 vm_object_pmap_protect(
1887 register vm_object_t object
,
1888 register vm_object_offset_t offset
,
1889 vm_object_size_t size
,
1891 vm_map_offset_t pmap_start
,
1894 if (object
== VM_OBJECT_NULL
)
1896 size
= vm_object_round_page(size
);
1897 offset
= vm_object_trunc_page(offset
);
1899 vm_object_lock(object
);
1901 if (object
->phys_contiguous
) {
1903 vm_object_unlock(object
);
1904 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
1906 vm_object_offset_t phys_start
, phys_end
, phys_addr
;
1908 phys_start
= object
->shadow_offset
+ offset
;
1909 phys_end
= phys_start
+ size
;
1910 assert(phys_start
<= phys_end
);
1911 assert(phys_end
<= object
->shadow_offset
+ object
->size
);
1912 vm_object_unlock(object
);
1914 for (phys_addr
= phys_start
;
1915 phys_addr
< phys_end
;
1916 phys_addr
+= PAGE_SIZE_64
) {
1917 pmap_page_protect(phys_addr
>> 12, prot
);
1923 assert(object
->internal
);
1926 if (ptoa_64(object
->resident_page_count
) > size
/2 && pmap
!= PMAP_NULL
) {
1927 vm_object_unlock(object
);
1928 pmap_protect(pmap
, pmap_start
, pmap_start
+ size
, prot
);
1932 /* if we are doing large ranges with respect to resident */
1933 /* page count then we should interate over pages otherwise */
1934 /* inverse page look-up will be faster */
1935 if (ptoa_64(object
->resident_page_count
/ 4) < size
) {
1937 vm_object_offset_t end
;
1939 end
= offset
+ size
;
1941 if (pmap
!= PMAP_NULL
) {
1942 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1943 if (!p
->fictitious
&&
1944 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1945 vm_map_offset_t start
;
1947 start
= pmap_start
+ p
->offset
- offset
;
1948 pmap_protect(pmap
, start
, start
+ PAGE_SIZE_64
, prot
);
1952 queue_iterate(&object
->memq
, p
, vm_page_t
, listq
) {
1953 if (!p
->fictitious
&&
1954 (offset
<= p
->offset
) && (p
->offset
< end
)) {
1956 pmap_page_protect(p
->phys_page
, prot
);
1962 vm_object_offset_t end
;
1963 vm_object_offset_t target_off
;
1965 end
= offset
+ size
;
1967 if (pmap
!= PMAP_NULL
) {
1968 for(target_off
= offset
;
1970 target_off
+= PAGE_SIZE
) {
1971 p
= vm_page_lookup(object
, target_off
);
1972 if (p
!= VM_PAGE_NULL
) {
1974 start
= pmap_start
+
1975 (vm_offset_t
)(p
->offset
- offset
);
1976 pmap_protect(pmap
, start
,
1977 start
+ PAGE_SIZE
, prot
);
1981 for(target_off
= offset
;
1982 target_off
< end
; target_off
+= PAGE_SIZE
) {
1983 p
= vm_page_lookup(object
, target_off
);
1984 if (p
!= VM_PAGE_NULL
) {
1985 pmap_page_protect(p
->phys_page
, prot
);
1991 if (prot
== VM_PROT_NONE
) {
1993 * Must follow shadow chain to remove access
1994 * to pages in shadowed objects.
1996 register vm_object_t next_object
;
1998 next_object
= object
->shadow
;
1999 if (next_object
!= VM_OBJECT_NULL
) {
2000 offset
+= object
->shadow_offset
;
2001 vm_object_lock(next_object
);
2002 vm_object_unlock(object
);
2003 object
= next_object
;
2007 * End of chain - we are done.
2014 * Pages in shadowed objects may never have
2015 * write permission - we may stop here.
2021 vm_object_unlock(object
);
2025 * Routine: vm_object_copy_slowly
2028 * Copy the specified range of the source
2029 * virtual memory object without using
2030 * protection-based optimizations (such
2031 * as copy-on-write). The pages in the
2032 * region are actually copied.
2034 * In/out conditions:
2035 * The caller must hold a reference and a lock
2036 * for the source virtual memory object. The source
2037 * object will be returned *unlocked*.
2040 * If the copy is completed successfully, KERN_SUCCESS is
2041 * returned. If the caller asserted the interruptible
2042 * argument, and an interruption occurred while waiting
2043 * for a user-generated event, MACH_SEND_INTERRUPTED is
2044 * returned. Other values may be returned to indicate
2045 * hard errors during the copy operation.
2047 * A new virtual memory object is returned in a
2048 * parameter (_result_object). The contents of this
2049 * new object, starting at a zero offset, are a copy
2050 * of the source memory region. In the event of
2051 * an error, this parameter will contain the value
2054 __private_extern__ kern_return_t
2055 vm_object_copy_slowly(
2056 register vm_object_t src_object
,
2057 vm_object_offset_t src_offset
,
2058 vm_object_size_t size
,
2059 boolean_t interruptible
,
2060 vm_object_t
*_result_object
) /* OUT */
2062 vm_object_t new_object
;
2063 vm_object_offset_t new_offset
;
2065 struct vm_object_fault_info fault_info
;
2067 XPR(XPR_VM_OBJECT
, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n",
2068 src_object
, src_offset
, size
, 0, 0);
2071 vm_object_unlock(src_object
);
2072 *_result_object
= VM_OBJECT_NULL
;
2073 return(KERN_INVALID_ARGUMENT
);
2077 * Prevent destruction of the source object while we copy.
2080 vm_object_reference_locked(src_object
);
2081 vm_object_unlock(src_object
);
2084 * Create a new object to hold the copied pages.
2086 * We fill the new object starting at offset 0,
2087 * regardless of the input offset.
2088 * We don't bother to lock the new object within
2089 * this routine, since we have the only reference.
2092 new_object
= vm_object_allocate(size
);
2095 assert(size
== trunc_page_64(size
)); /* Will the loop terminate? */
2097 fault_info
.interruptible
= interruptible
;
2098 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
2099 fault_info
.user_tag
= 0;
2100 fault_info
.lo_offset
= src_offset
;
2101 fault_info
.hi_offset
= src_offset
+ size
;
2102 fault_info
.no_cache
= FALSE
;
2106 src_offset
+= PAGE_SIZE_64
,
2107 new_offset
+= PAGE_SIZE_64
, size
-= PAGE_SIZE_64
2110 vm_fault_return_t result
;
2112 vm_object_lock(new_object
);
2114 while ((new_page
= vm_page_alloc(new_object
, new_offset
))
2117 vm_object_unlock(new_object
);
2119 if (!vm_page_wait(interruptible
)) {
2120 vm_object_deallocate(new_object
);
2121 vm_object_deallocate(src_object
);
2122 *_result_object
= VM_OBJECT_NULL
;
2123 return(MACH_SEND_INTERRUPTED
);
2125 vm_object_lock(new_object
);
2127 vm_object_unlock(new_object
);
2130 vm_prot_t prot
= VM_PROT_READ
;
2131 vm_page_t _result_page
;
2134 vm_page_t result_page
;
2135 kern_return_t error_code
;
2137 vm_object_lock(src_object
);
2138 vm_object_paging_begin(src_object
);
2140 fault_info
.cluster_size
= size
;
2142 XPR(XPR_VM_FAULT
,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0);
2143 result
= vm_fault_page(src_object
, src_offset
,
2144 VM_PROT_READ
, FALSE
,
2145 &prot
, &_result_page
, &top_page
,
2147 &error_code
, FALSE
, FALSE
, &fault_info
);
2150 case VM_FAULT_SUCCESS
:
2151 result_page
= _result_page
;
2154 * We don't need to hold the object
2155 * lock -- the busy page will be enough.
2156 * [We don't care about picking up any
2157 * new modifications.]
2159 * Copy the page to the new object.
2162 * If result_page is clean,
2163 * we could steal it instead
2167 vm_object_unlock(result_page
->object
);
2168 vm_page_copy(result_page
, new_page
);
2171 * Let go of both pages (make them
2172 * not busy, perform wakeup, activate).
2174 vm_object_lock(new_object
);
2175 new_page
->dirty
= TRUE
;
2176 PAGE_WAKEUP_DONE(new_page
);
2177 vm_object_unlock(new_object
);
2179 vm_object_lock(result_page
->object
);
2180 PAGE_WAKEUP_DONE(result_page
);
2182 vm_page_lockspin_queues();
2183 if (!result_page
->active
&&
2184 !result_page
->inactive
&&
2185 !result_page
->throttled
)
2186 vm_page_activate(result_page
);
2187 vm_page_activate(new_page
);
2188 vm_page_unlock_queues();
2191 * Release paging references and
2192 * top-level placeholder page, if any.
2195 vm_fault_cleanup(result_page
->object
,
2200 case VM_FAULT_RETRY
:
2203 case VM_FAULT_FICTITIOUS_SHORTAGE
:
2204 vm_page_more_fictitious();
2207 case VM_FAULT_MEMORY_SHORTAGE
:
2208 if (vm_page_wait(interruptible
))
2212 case VM_FAULT_INTERRUPTED
:
2213 vm_page_free(new_page
);
2214 vm_object_deallocate(new_object
);
2215 vm_object_deallocate(src_object
);
2216 *_result_object
= VM_OBJECT_NULL
;
2217 return(MACH_SEND_INTERRUPTED
);
2219 case VM_FAULT_MEMORY_ERROR
:
2222 * (a) ignore pages that we can't
2224 * (b) return the null object if
2225 * any page fails [chosen]
2228 vm_page_lock_queues();
2229 vm_page_free(new_page
);
2230 vm_page_unlock_queues();
2232 vm_object_deallocate(new_object
);
2233 vm_object_deallocate(src_object
);
2234 *_result_object
= VM_OBJECT_NULL
;
2235 return(error_code
? error_code
:
2238 } while (result
!= VM_FAULT_SUCCESS
);
2242 * Lose the extra reference, and return our object.
2244 vm_object_deallocate(src_object
);
2245 *_result_object
= new_object
;
2246 return(KERN_SUCCESS
);
2250 * Routine: vm_object_copy_quickly
2253 * Copy the specified range of the source virtual
2254 * memory object, if it can be done without waiting
2255 * for user-generated events.
2258 * If the copy is successful, the copy is returned in
2259 * the arguments; otherwise, the arguments are not
2262 * In/out conditions:
2263 * The object should be unlocked on entry and exit.
2267 __private_extern__ boolean_t
2268 vm_object_copy_quickly(
2269 vm_object_t
*_object
, /* INOUT */
2270 __unused vm_object_offset_t offset
, /* IN */
2271 __unused vm_object_size_t size
, /* IN */
2272 boolean_t
*_src_needs_copy
, /* OUT */
2273 boolean_t
*_dst_needs_copy
) /* OUT */
2275 vm_object_t object
= *_object
;
2276 memory_object_copy_strategy_t copy_strategy
;
2278 XPR(XPR_VM_OBJECT
, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n",
2279 *_object
, offset
, size
, 0, 0);
2280 if (object
== VM_OBJECT_NULL
) {
2281 *_src_needs_copy
= FALSE
;
2282 *_dst_needs_copy
= FALSE
;
2286 vm_object_lock(object
);
2288 copy_strategy
= object
->copy_strategy
;
2290 switch (copy_strategy
) {
2291 case MEMORY_OBJECT_COPY_SYMMETRIC
:
2294 * Symmetric copy strategy.
2295 * Make another reference to the object.
2296 * Leave object/offset unchanged.
2299 vm_object_reference_locked(object
);
2300 object
->shadowed
= TRUE
;
2301 vm_object_unlock(object
);
2304 * Both source and destination must make
2305 * shadows, and the source must be made
2306 * read-only if not already.
2309 *_src_needs_copy
= TRUE
;
2310 *_dst_needs_copy
= TRUE
;
2314 case MEMORY_OBJECT_COPY_DELAY
:
2315 vm_object_unlock(object
);
2319 vm_object_unlock(object
);
2325 static int copy_call_count
= 0;
2326 static int copy_call_sleep_count
= 0;
2327 static int copy_call_restart_count
= 0;
2330 * Routine: vm_object_copy_call [internal]
2333 * Copy the source object (src_object), using the
2334 * user-managed copy algorithm.
2336 * In/out conditions:
2337 * The source object must be locked on entry. It
2338 * will be *unlocked* on exit.
2341 * If the copy is successful, KERN_SUCCESS is returned.
2342 * A new object that represents the copied virtual
2343 * memory is returned in a parameter (*_result_object).
2344 * If the return value indicates an error, this parameter
2347 static kern_return_t
2348 vm_object_copy_call(
2349 vm_object_t src_object
,
2350 vm_object_offset_t src_offset
,
2351 vm_object_size_t size
,
2352 vm_object_t
*_result_object
) /* OUT */
2356 boolean_t check_ready
= FALSE
;
2357 uint32_t try_failed_count
= 0;
2360 * If a copy is already in progress, wait and retry.
2363 * Consider making this call interruptable, as Mike
2364 * intended it to be.
2367 * Need a counter or version or something to allow
2368 * us to use the copy that the currently requesting
2369 * thread is obtaining -- is it worth adding to the
2370 * vm object structure? Depends how common this case it.
2373 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
2374 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
2376 copy_call_restart_count
++;
2380 * Indicate (for the benefit of memory_object_create_copy)
2381 * that we want a copy for src_object. (Note that we cannot
2382 * do a real assert_wait before calling memory_object_copy,
2383 * so we simply set the flag.)
2386 vm_object_set_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
);
2387 vm_object_unlock(src_object
);
2390 * Ask the memory manager to give us a memory object
2391 * which represents a copy of the src object.
2392 * The memory manager may give us a memory object
2393 * which we already have, or it may give us a
2394 * new memory object. This memory object will arrive
2395 * via memory_object_create_copy.
2398 kr
= KERN_FAILURE
; /* XXX need to change memory_object.defs */
2399 if (kr
!= KERN_SUCCESS
) {
2404 * Wait for the copy to arrive.
2406 vm_object_lock(src_object
);
2407 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
2408 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
2410 copy_call_sleep_count
++;
2413 assert(src_object
->copy
!= VM_OBJECT_NULL
);
2414 copy
= src_object
->copy
;
2415 if (!vm_object_lock_try(copy
)) {
2416 vm_object_unlock(src_object
);
2419 mutex_pause(try_failed_count
); /* wait a bit */
2421 vm_object_lock(src_object
);
2424 if (copy
->size
< src_offset
+size
)
2425 copy
->size
= src_offset
+size
;
2427 if (!copy
->pager_ready
)
2433 *_result_object
= copy
;
2434 vm_object_unlock(copy
);
2435 vm_object_unlock(src_object
);
2437 /* Wait for the copy to be ready. */
2438 if (check_ready
== TRUE
) {
2439 vm_object_lock(copy
);
2440 while (!copy
->pager_ready
) {
2441 vm_object_sleep(copy
, VM_OBJECT_EVENT_PAGER_READY
, THREAD_UNINT
);
2443 vm_object_unlock(copy
);
2446 return KERN_SUCCESS
;
2449 static int copy_delayed_lock_collisions
= 0;
2450 static int copy_delayed_max_collisions
= 0;
2451 static int copy_delayed_lock_contention
= 0;
2452 static int copy_delayed_protect_iterate
= 0;
2455 * Routine: vm_object_copy_delayed [internal]
2458 * Copy the specified virtual memory object, using
2459 * the asymmetric copy-on-write algorithm.
2461 * In/out conditions:
2462 * The src_object must be locked on entry. It will be unlocked
2463 * on exit - so the caller must also hold a reference to it.
2465 * This routine will not block waiting for user-generated
2466 * events. It is not interruptible.
2468 __private_extern__ vm_object_t
2469 vm_object_copy_delayed(
2470 vm_object_t src_object
,
2471 vm_object_offset_t src_offset
,
2472 vm_object_size_t size
,
2473 boolean_t src_object_shared
)
2475 vm_object_t new_copy
= VM_OBJECT_NULL
;
2476 vm_object_t old_copy
;
2478 vm_object_size_t copy_size
= src_offset
+ size
;
2483 * The user-level memory manager wants to see all of the changes
2484 * to this object, but it has promised not to make any changes on
2487 * Perform an asymmetric copy-on-write, as follows:
2488 * Create a new object, called a "copy object" to hold
2489 * pages modified by the new mapping (i.e., the copy,
2490 * not the original mapping).
2491 * Record the original object as the backing object for
2492 * the copy object. If the original mapping does not
2493 * change a page, it may be used read-only by the copy.
2494 * Record the copy object in the original object.
2495 * When the original mapping causes a page to be modified,
2496 * it must be copied to a new page that is "pushed" to
2498 * Mark the new mapping (the copy object) copy-on-write.
2499 * This makes the copy object itself read-only, allowing
2500 * it to be reused if the original mapping makes no
2501 * changes, and simplifying the synchronization required
2502 * in the "push" operation described above.
2504 * The copy-on-write is said to be assymetric because the original
2505 * object is *not* marked copy-on-write. A copied page is pushed
2506 * to the copy object, regardless which party attempted to modify
2509 * Repeated asymmetric copy operations may be done. If the
2510 * original object has not been changed since the last copy, its
2511 * copy object can be reused. Otherwise, a new copy object can be
2512 * inserted between the original object and its previous copy
2513 * object. Since any copy object is read-only, this cannot affect
2514 * affect the contents of the previous copy object.
2516 * Note that a copy object is higher in the object tree than the
2517 * original object; therefore, use of the copy object recorded in
2518 * the original object must be done carefully, to avoid deadlock.
2524 * Wait for paging in progress.
2526 if (!src_object
->true_share
&& src_object
->paging_in_progress
) {
2527 if (src_object_shared
== TRUE
) {
2528 vm_object_unlock(src_object
);
2530 vm_object_lock(src_object
);
2531 src_object_shared
= FALSE
;
2533 vm_object_paging_wait(src_object
, THREAD_UNINT
);
2536 * See whether we can reuse the result of a previous
2540 old_copy
= src_object
->copy
;
2541 if (old_copy
!= VM_OBJECT_NULL
) {
2545 * Try to get the locks (out of order)
2547 if (src_object_shared
== TRUE
)
2548 lock_granted
= vm_object_lock_try_shared(old_copy
);
2550 lock_granted
= vm_object_lock_try(old_copy
);
2552 if (!lock_granted
) {
2553 vm_object_unlock(src_object
);
2555 if (collisions
++ == 0)
2556 copy_delayed_lock_contention
++;
2557 mutex_pause(collisions
);
2559 /* Heisenberg Rules */
2560 copy_delayed_lock_collisions
++;
2562 if (collisions
> copy_delayed_max_collisions
)
2563 copy_delayed_max_collisions
= collisions
;
2565 if (src_object_shared
== TRUE
)
2566 vm_object_lock_shared(src_object
);
2568 vm_object_lock(src_object
);
2574 * Determine whether the old copy object has
2578 if (old_copy
->resident_page_count
== 0 &&
2579 !old_copy
->pager_created
) {
2581 * It has not been modified.
2583 * Return another reference to
2584 * the existing copy-object if
2585 * we can safely grow it (if
2589 if (old_copy
->size
< copy_size
) {
2590 if (src_object_shared
== TRUE
) {
2591 vm_object_unlock(old_copy
);
2592 vm_object_unlock(src_object
);
2594 vm_object_lock(src_object
);
2595 src_object_shared
= FALSE
;
2599 * We can't perform a delayed copy if any of the
2600 * pages in the extended range are wired (because
2601 * we can't safely take write permission away from
2602 * wired pages). If the pages aren't wired, then
2603 * go ahead and protect them.
2605 copy_delayed_protect_iterate
++;
2607 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2608 if (!p
->fictitious
&&
2609 p
->offset
>= old_copy
->size
&&
2610 p
->offset
< copy_size
) {
2611 if (p
->wire_count
> 0) {
2612 vm_object_unlock(old_copy
);
2613 vm_object_unlock(src_object
);
2615 if (new_copy
!= VM_OBJECT_NULL
) {
2616 vm_object_unlock(new_copy
);
2617 vm_object_deallocate(new_copy
);
2620 return VM_OBJECT_NULL
;
2622 pmap_page_protect(p
->phys_page
,
2623 (VM_PROT_ALL
& ~VM_PROT_WRITE
));
2627 old_copy
->size
= copy_size
;
2629 if (src_object_shared
== TRUE
)
2630 vm_object_reference_shared(old_copy
);
2632 vm_object_reference_locked(old_copy
);
2633 vm_object_unlock(old_copy
);
2634 vm_object_unlock(src_object
);
2636 if (new_copy
!= VM_OBJECT_NULL
) {
2637 vm_object_unlock(new_copy
);
2638 vm_object_deallocate(new_copy
);
2646 * Adjust the size argument so that the newly-created
2647 * copy object will be large enough to back either the
2648 * old copy object or the new mapping.
2650 if (old_copy
->size
> copy_size
)
2651 copy_size
= old_copy
->size
;
2653 if (new_copy
== VM_OBJECT_NULL
) {
2654 vm_object_unlock(old_copy
);
2655 vm_object_unlock(src_object
);
2656 new_copy
= vm_object_allocate(copy_size
);
2657 vm_object_lock(src_object
);
2658 vm_object_lock(new_copy
);
2660 src_object_shared
= FALSE
;
2663 new_copy
->size
= copy_size
;
2666 * The copy-object is always made large enough to
2667 * completely shadow the original object, since
2668 * it may have several users who want to shadow
2669 * the original object at different points.
2672 assert((old_copy
->shadow
== src_object
) &&
2673 (old_copy
->shadow_offset
== (vm_object_offset_t
) 0));
2675 } else if (new_copy
== VM_OBJECT_NULL
) {
2676 vm_object_unlock(src_object
);
2677 new_copy
= vm_object_allocate(copy_size
);
2678 vm_object_lock(src_object
);
2679 vm_object_lock(new_copy
);
2681 src_object_shared
= FALSE
;
2686 * We now have the src object locked, and the new copy object
2687 * allocated and locked (and potentially the old copy locked).
2688 * Before we go any further, make sure we can still perform
2689 * a delayed copy, as the situation may have changed.
2691 * Specifically, we can't perform a delayed copy if any of the
2692 * pages in the range are wired (because we can't safely take
2693 * write permission away from wired pages). If the pages aren't
2694 * wired, then go ahead and protect them.
2696 copy_delayed_protect_iterate
++;
2698 queue_iterate(&src_object
->memq
, p
, vm_page_t
, listq
) {
2699 if (!p
->fictitious
&& p
->offset
< copy_size
) {
2700 if (p
->wire_count
> 0) {
2702 vm_object_unlock(old_copy
);
2703 vm_object_unlock(src_object
);
2704 vm_object_unlock(new_copy
);
2705 vm_object_deallocate(new_copy
);
2706 return VM_OBJECT_NULL
;
2708 pmap_page_protect(p
->phys_page
,
2709 (VM_PROT_ALL
& ~VM_PROT_WRITE
));
2713 if (old_copy
!= VM_OBJECT_NULL
) {
2715 * Make the old copy-object shadow the new one.
2716 * It will receive no more pages from the original
2720 /* remove ref. from old_copy */
2721 vm_object_lock_assert_exclusive(src_object
);
2722 src_object
->ref_count
--;
2723 assert(src_object
->ref_count
> 0);
2724 vm_object_lock_assert_exclusive(old_copy
);
2725 old_copy
->shadow
= new_copy
;
2726 vm_object_lock_assert_exclusive(new_copy
);
2727 assert(new_copy
->ref_count
> 0);
2728 new_copy
->ref_count
++; /* for old_copy->shadow ref. */
2731 if (old_copy
->res_count
) {
2732 VM_OBJ_RES_INCR(new_copy
);
2733 VM_OBJ_RES_DECR(src_object
);
2737 vm_object_unlock(old_copy
); /* done with old_copy */
2741 * Point the new copy at the existing object.
2743 vm_object_lock_assert_exclusive(new_copy
);
2744 new_copy
->shadow
= src_object
;
2745 new_copy
->shadow_offset
= 0;
2746 new_copy
->shadowed
= TRUE
; /* caller must set needs_copy */
2748 vm_object_lock_assert_exclusive(src_object
);
2749 vm_object_reference_locked(src_object
);
2750 src_object
->copy
= new_copy
;
2751 vm_object_unlock(src_object
);
2752 vm_object_unlock(new_copy
);
2755 "vm_object_copy_delayed: used copy object %X for source %X\n",
2756 (integer_t
)new_copy
, (integer_t
)src_object
, 0, 0, 0);
2762 * Routine: vm_object_copy_strategically
2765 * Perform a copy according to the source object's
2766 * declared strategy. This operation may block,
2767 * and may be interrupted.
2769 __private_extern__ kern_return_t
2770 vm_object_copy_strategically(
2771 register vm_object_t src_object
,
2772 vm_object_offset_t src_offset
,
2773 vm_object_size_t size
,
2774 vm_object_t
*dst_object
, /* OUT */
2775 vm_object_offset_t
*dst_offset
, /* OUT */
2776 boolean_t
*dst_needs_copy
) /* OUT */
2779 boolean_t interruptible
= THREAD_ABORTSAFE
; /* XXX */
2780 boolean_t object_lock_shared
= FALSE
;
2781 memory_object_copy_strategy_t copy_strategy
;
2783 assert(src_object
!= VM_OBJECT_NULL
);
2785 copy_strategy
= src_object
->copy_strategy
;
2787 if (copy_strategy
== MEMORY_OBJECT_COPY_DELAY
) {
2788 vm_object_lock_shared(src_object
);
2789 object_lock_shared
= TRUE
;
2791 vm_object_lock(src_object
);
2794 * The copy strategy is only valid if the memory manager
2795 * is "ready". Internal objects are always ready.
2798 while (!src_object
->internal
&& !src_object
->pager_ready
) {
2799 wait_result_t wait_result
;
2801 if (object_lock_shared
== TRUE
) {
2802 vm_object_unlock(src_object
);
2803 vm_object_lock(src_object
);
2804 object_lock_shared
= FALSE
;
2807 wait_result
= vm_object_sleep( src_object
,
2808 VM_OBJECT_EVENT_PAGER_READY
,
2810 if (wait_result
!= THREAD_AWAKENED
) {
2811 vm_object_unlock(src_object
);
2812 *dst_object
= VM_OBJECT_NULL
;
2814 *dst_needs_copy
= FALSE
;
2815 return(MACH_SEND_INTERRUPTED
);
2820 * Use the appropriate copy strategy.
2823 switch (copy_strategy
) {
2824 case MEMORY_OBJECT_COPY_DELAY
:
2825 *dst_object
= vm_object_copy_delayed(src_object
,
2826 src_offset
, size
, object_lock_shared
);
2827 if (*dst_object
!= VM_OBJECT_NULL
) {
2828 *dst_offset
= src_offset
;
2829 *dst_needs_copy
= TRUE
;
2830 result
= KERN_SUCCESS
;
2833 vm_object_lock(src_object
);
2834 /* fall thru when delayed copy not allowed */
2836 case MEMORY_OBJECT_COPY_NONE
:
2837 result
= vm_object_copy_slowly(src_object
, src_offset
, size
,
2838 interruptible
, dst_object
);
2839 if (result
== KERN_SUCCESS
) {
2841 *dst_needs_copy
= FALSE
;
2845 case MEMORY_OBJECT_COPY_CALL
:
2846 result
= vm_object_copy_call(src_object
, src_offset
, size
,
2848 if (result
== KERN_SUCCESS
) {
2849 *dst_offset
= src_offset
;
2850 *dst_needs_copy
= TRUE
;
2854 case MEMORY_OBJECT_COPY_SYMMETRIC
:
2855 XPR(XPR_VM_OBJECT
, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t
)src_object
, src_offset
, size
, 0, 0);
2856 vm_object_unlock(src_object
);
2857 result
= KERN_MEMORY_RESTART_COPY
;
2861 panic("copy_strategically: bad strategy");
2862 result
= KERN_INVALID_ARGUMENT
;
2870 * Create a new object which is backed by the
2871 * specified existing object range. The source
2872 * object reference is deallocated.
2874 * The new object and offset into that object
2875 * are returned in the source parameters.
2877 boolean_t vm_object_shadow_check
= FALSE
;
2879 __private_extern__ boolean_t
2881 vm_object_t
*object
, /* IN/OUT */
2882 vm_object_offset_t
*offset
, /* IN/OUT */
2883 vm_object_size_t length
)
2885 register vm_object_t source
;
2886 register vm_object_t result
;
2892 * This assertion is valid but it gets triggered by Rosetta for example
2893 * due to a combination of vm_remap() that changes a VM object's
2894 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
2895 * that then sets "needs_copy" on its map entry. This creates a
2896 * mapping situation that VM should never see and doesn't know how to
2898 * It's not clear if this can create any real problem but we should
2899 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
2900 * do more than just set "needs_copy" to handle the copy-on-write...
2901 * In the meantime, let's disable the assertion.
2903 assert(source
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
2907 * Determine if we really need a shadow.
2910 if (vm_object_shadow_check
&& source
->ref_count
== 1 &&
2911 (source
->shadow
== VM_OBJECT_NULL
||
2912 source
->shadow
->copy
== VM_OBJECT_NULL
))
2914 source
->shadowed
= FALSE
;
2919 * Allocate a new object with the given length
2922 if ((result
= vm_object_allocate(length
)) == VM_OBJECT_NULL
)
2923 panic("vm_object_shadow: no object for shadowing");
2926 * The new object shadows the source object, adding
2927 * a reference to it. Our caller changes his reference
2928 * to point to the new object, removing a reference to
2929 * the source object. Net result: no change of reference
2932 result
->shadow
= source
;
2935 * Store the offset into the source object,
2936 * and fix up the offset into the new object.
2939 result
->shadow_offset
= *offset
;
2942 * Return the new things
2951 * The relationship between vm_object structures and
2952 * the memory_object requires careful synchronization.
2954 * All associations are created by memory_object_create_named
2955 * for external pagers and vm_object_pager_create for internal
2956 * objects as follows:
2958 * pager: the memory_object itself, supplied by
2959 * the user requesting a mapping (or the kernel,
2960 * when initializing internal objects); the
2961 * kernel simulates holding send rights by keeping
2965 * the memory object control port,
2966 * created by the kernel; the kernel holds
2967 * receive (and ownership) rights to this
2968 * port, but no other references.
2970 * When initialization is complete, the "initialized" field
2971 * is asserted. Other mappings using a particular memory object,
2972 * and any references to the vm_object gained through the
2973 * port association must wait for this initialization to occur.
2975 * In order to allow the memory manager to set attributes before
2976 * requests (notably virtual copy operations, but also data or
2977 * unlock requests) are made, a "ready" attribute is made available.
2978 * Only the memory manager may affect the value of this attribute.
2979 * Its value does not affect critical kernel functions, such as
2980 * internal object initialization or destruction. [Furthermore,
2981 * memory objects created by the kernel are assumed to be ready
2982 * immediately; the default memory manager need not explicitly
2983 * set the "ready" attribute.]
2985 * [Both the "initialized" and "ready" attribute wait conditions
2986 * use the "pager" field as the wait event.]
2988 * The port associations can be broken down by any of the
2989 * following routines:
2990 * vm_object_terminate:
2991 * No references to the vm_object remain, and
2992 * the object cannot (or will not) be cached.
2993 * This is the normal case, and is done even
2994 * though one of the other cases has already been
2996 * memory_object_destroy:
2997 * The memory manager has requested that the
2998 * kernel relinquish references to the memory
2999 * object. [The memory manager may not want to
3000 * destroy the memory object, but may wish to
3001 * refuse or tear down existing memory mappings.]
3003 * Each routine that breaks an association must break all of
3004 * them at once. At some later time, that routine must clear
3005 * the pager field and release the memory object references.
3006 * [Furthermore, each routine must cope with the simultaneous
3007 * or previous operations of the others.]
3009 * In addition to the lock on the object, the vm_object_cache_lock
3010 * governs the associations. References gained through the
3011 * association require use of the cache lock.
3013 * Because the pager field may be cleared spontaneously, it
3014 * cannot be used to determine whether a memory object has
3015 * ever been associated with a particular vm_object. [This
3016 * knowledge is important to the shadow object mechanism.]
3017 * For this reason, an additional "created" attribute is
3020 * During various paging operations, the pager reference found in the
3021 * vm_object must be valid. To prevent this from being released,
3022 * (other than being removed, i.e., made null), routines may use
3023 * the vm_object_paging_begin/end routines [actually, macros].
3024 * The implementation uses the "paging_in_progress" and "wanted" fields.
3025 * [Operations that alter the validity of the pager values include the
3026 * termination routines and vm_object_collapse.]
3031 * Routine: vm_object_enter
3033 * Find a VM object corresponding to the given
3034 * pager; if no such object exists, create one,
3035 * and initialize the pager.
3039 memory_object_t pager
,
3040 vm_object_size_t size
,
3045 register vm_object_t object
;
3046 vm_object_t new_object
;
3047 boolean_t must_init
;
3048 vm_object_hash_entry_t entry
, new_entry
;
3049 uint32_t try_failed_count
= 0;
3051 if (pager
== MEMORY_OBJECT_NULL
)
3052 return(vm_object_allocate(size
));
3054 new_object
= VM_OBJECT_NULL
;
3055 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
3059 * Look for an object associated with this port.
3062 vm_object_cache_lock();
3064 entry
= vm_object_hash_lookup(pager
, FALSE
);
3066 if (entry
== VM_OBJECT_HASH_ENTRY_NULL
) {
3067 if (new_object
== VM_OBJECT_NULL
) {
3069 * We must unlock to create a new object;
3070 * if we do so, we must try the lookup again.
3072 vm_object_cache_unlock();
3073 assert(new_entry
== VM_OBJECT_HASH_ENTRY_NULL
);
3074 new_entry
= vm_object_hash_entry_alloc(pager
);
3075 new_object
= vm_object_allocate(size
);
3076 vm_object_cache_lock();
3079 * Lookup failed twice, and we have something
3080 * to insert; set the object.
3082 vm_object_hash_insert(new_entry
);
3084 entry
->object
= new_object
;
3085 new_entry
= VM_OBJECT_HASH_ENTRY_NULL
;
3086 new_object
= VM_OBJECT_NULL
;
3089 } else if (entry
->object
== VM_OBJECT_NULL
) {
3091 * If a previous object is being terminated,
3092 * we must wait for the termination message
3093 * to be queued (and lookup the entry again).
3095 entry
->waiting
= TRUE
;
3096 entry
= VM_OBJECT_HASH_ENTRY_NULL
;
3097 assert_wait((event_t
) pager
, THREAD_UNINT
);
3098 vm_object_cache_unlock();
3099 thread_block(THREAD_CONTINUE_NULL
);
3100 vm_object_cache_lock();
3102 } while (entry
== VM_OBJECT_HASH_ENTRY_NULL
);
3104 object
= entry
->object
;
3105 assert(object
!= VM_OBJECT_NULL
);
3108 if (!vm_object_lock_try(object
)) {
3110 vm_object_cache_unlock();
3113 mutex_pause(try_failed_count
); /* wait a bit */
3117 assert(!internal
|| object
->internal
);
3119 assert(!object
->named
);
3120 object
->named
= TRUE
;
3122 if (object
->ref_count
== 0) {
3123 XPR(XPR_VM_OBJECT_CACHE
,
3124 "vm_object_enter: removing %x from cache, head (%x, %x)\n",
3126 (integer_t
)vm_object_cached_list
.next
,
3127 (integer_t
)vm_object_cached_list
.prev
, 0,0);
3128 queue_remove(&vm_object_cached_list
, object
,
3129 vm_object_t
, cached_list
);
3130 vm_object_cached_count
--;
3132 vm_object_lock_assert_exclusive(object
);
3133 object
->ref_count
++;
3134 vm_object_res_reference(object
);
3135 vm_object_unlock(object
);
3139 assert(object
->ref_count
> 0);
3141 VM_STAT_INCR(lookups
);
3143 vm_object_cache_unlock();
3146 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n",
3147 (integer_t
)pager
, (integer_t
)object
, must_init
, 0, 0);
3150 * If we raced to create a vm_object but lost, let's
3154 if (new_object
!= VM_OBJECT_NULL
)
3155 vm_object_deallocate(new_object
);
3157 if (new_entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
3158 vm_object_hash_entry_free(new_entry
);
3161 memory_object_control_t control
;
3164 * Allocate request port.
3167 control
= memory_object_control_allocate(object
);
3168 assert (control
!= MEMORY_OBJECT_CONTROL_NULL
);
3170 vm_object_lock(object
);
3171 assert(object
!= kernel_object
);
3174 * Copy the reference we were given.
3177 memory_object_reference(pager
);
3178 object
->pager_created
= TRUE
;
3179 object
->pager
= pager
;
3180 object
->internal
= internal
;
3181 object
->pager_trusted
= internal
;
3183 /* copy strategy invalid until set by memory manager */
3184 object
->copy_strategy
= MEMORY_OBJECT_COPY_INVALID
;
3186 object
->pager_control
= control
;
3187 object
->pager_ready
= FALSE
;
3189 vm_object_unlock(object
);
3192 * Let the pager know we're using it.
3195 (void) memory_object_init(pager
,
3196 object
->pager_control
,
3199 vm_object_lock(object
);
3201 object
->named
= TRUE
;
3203 object
->pager_ready
= TRUE
;
3204 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
3207 object
->pager_initialized
= TRUE
;
3208 vm_object_wakeup(object
, VM_OBJECT_EVENT_INITIALIZED
);
3210 vm_object_lock(object
);
3214 * [At this point, the object must be locked]
3218 * Wait for the work above to be done by the first
3219 * thread to map this object.
3222 while (!object
->pager_initialized
) {
3223 vm_object_sleep(object
,
3224 VM_OBJECT_EVENT_INITIALIZED
,
3227 vm_object_unlock(object
);
3230 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n",
3231 (integer_t
)object
, (integer_t
)object
->pager
, internal
, 0,0);
3236 * Routine: vm_object_pager_create
3238 * Create a memory object for an internal object.
3239 * In/out conditions:
3240 * The object is locked on entry and exit;
3241 * it may be unlocked within this call.
3243 * Only one thread may be performing a
3244 * vm_object_pager_create on an object at
3245 * a time. Presumably, only the pageout
3246 * daemon will be using this routine.
3250 vm_object_pager_create(
3251 register vm_object_t object
)
3253 memory_object_t pager
;
3254 vm_object_hash_entry_t entry
;
3256 vm_object_size_t size
;
3257 vm_external_map_t map
;
3258 #endif /* MACH_PAGEMAP */
3260 XPR(XPR_VM_OBJECT
, "vm_object_pager_create, object 0x%X\n",
3261 (integer_t
)object
, 0,0,0,0);
3263 assert(object
!= kernel_object
);
3265 if (memory_manager_default_check() != KERN_SUCCESS
)
3269 * Prevent collapse or termination by holding a paging reference
3272 vm_object_paging_begin(object
);
3273 if (object
->pager_created
) {
3275 * Someone else got to it first...
3276 * wait for them to finish initializing the ports
3278 while (!object
->pager_initialized
) {
3279 vm_object_sleep(object
,
3280 VM_OBJECT_EVENT_INITIALIZED
,
3283 vm_object_paging_end(object
);
3288 * Indicate that a memory object has been assigned
3289 * before dropping the lock, to prevent a race.
3292 object
->pager_created
= TRUE
;
3293 object
->paging_offset
= 0;
3296 size
= object
->size
;
3297 #endif /* MACH_PAGEMAP */
3298 vm_object_unlock(object
);
3301 map
= vm_external_create(size
);
3302 vm_object_lock(object
);
3303 assert(object
->size
== size
);
3304 object
->existence_map
= map
;
3305 vm_object_unlock(object
);
3306 #endif /* MACH_PAGEMAP */
3309 * Create the [internal] pager, and associate it with this object.
3311 * We make the association here so that vm_object_enter()
3312 * can look up the object to complete initializing it. No
3313 * user will ever map this object.
3316 memory_object_default_t dmm
;
3318 /* acquire a reference for the default memory manager */
3319 dmm
= memory_manager_default_reference();
3321 assert(object
->temporary
);
3323 /* create our new memory object */
3324 (void) memory_object_create(dmm
, object
->size
, &pager
);
3326 memory_object_default_deallocate(dmm
);
3329 entry
= vm_object_hash_entry_alloc(pager
);
3331 vm_object_cache_lock();
3332 vm_object_hash_insert(entry
);
3334 entry
->object
= object
;
3335 vm_object_cache_unlock();
3338 * A reference was returned by
3339 * memory_object_create(), and it is
3340 * copied by vm_object_enter().
3343 if (vm_object_enter(pager
, object
->size
, TRUE
, TRUE
, FALSE
) != object
)
3344 panic("vm_object_pager_create: mismatch");
3347 * Drop the reference we were passed.
3349 memory_object_deallocate(pager
);
3351 vm_object_lock(object
);
3354 * Release the paging reference
3356 vm_object_paging_end(object
);
3360 * Routine: vm_object_remove
3362 * Eliminate the pager/object association
3365 * The object cache must be locked.
3367 __private_extern__
void
3371 memory_object_t pager
;
3373 if ((pager
= object
->pager
) != MEMORY_OBJECT_NULL
) {
3374 vm_object_hash_entry_t entry
;
3376 entry
= vm_object_hash_lookup(pager
, FALSE
);
3377 if (entry
!= VM_OBJECT_HASH_ENTRY_NULL
)
3378 entry
->object
= VM_OBJECT_NULL
;
3384 * Global variables for vm_object_collapse():
3386 * Counts for normal collapses and bypasses.
3387 * Debugging variables, to watch or disable collapse.
3389 static long object_collapses
= 0;
3390 static long object_bypasses
= 0;
3392 static boolean_t vm_object_collapse_allowed
= TRUE
;
3393 static boolean_t vm_object_bypass_allowed
= TRUE
;
3396 static int vm_external_discarded
;
3397 static int vm_external_collapsed
;
3400 unsigned long vm_object_collapse_encrypted
= 0;
3403 * Routine: vm_object_do_collapse
3405 * Collapse an object with the object backing it.
3406 * Pages in the backing object are moved into the
3407 * parent, and the backing object is deallocated.
3409 * Both objects and the cache are locked; the page
3410 * queues are unlocked.
3414 vm_object_do_collapse(
3416 vm_object_t backing_object
)
3419 vm_object_offset_t new_offset
, backing_offset
;
3420 vm_object_size_t size
;
3422 backing_offset
= object
->shadow_offset
;
3423 size
= object
->size
;
3426 * Move all in-memory pages from backing_object
3427 * to the parent. Pages that have been paged out
3428 * will be overwritten by any of the parent's
3429 * pages that shadow them.
3432 while (!queue_empty(&backing_object
->memq
)) {
3434 p
= (vm_page_t
) queue_first(&backing_object
->memq
);
3436 new_offset
= (p
->offset
- backing_offset
);
3438 assert(!p
->busy
|| p
->absent
);
3441 * If the parent has a page here, or if
3442 * this page falls outside the parent,
3445 * Otherwise, move it as planned.
3448 if (p
->offset
< backing_offset
|| new_offset
>= size
) {
3453 * The encryption key includes the "pager" and the
3454 * "paging_offset". These will not change during the
3455 * object collapse, so we can just move an encrypted
3456 * page from one object to the other in this case.
3457 * We can't decrypt the page here, since we can't drop
3461 vm_object_collapse_encrypted
++;
3463 pp
= vm_page_lookup(object
, new_offset
);
3464 if (pp
== VM_PAGE_NULL
) {
3467 * Parent now has no page.
3468 * Move the backing object's page up.
3471 vm_page_rename(p
, object
, new_offset
, TRUE
);
3473 } else if (pp
->absent
) {
3476 * Parent has an absent page...
3477 * it's not being paged in, so
3478 * it must really be missing from
3481 * Throw out the absent page...
3482 * any faults looking for that
3483 * page will restart with the new
3488 vm_page_rename(p
, object
, new_offset
, TRUE
);
3489 #endif /* MACH_PAGEMAP */
3491 assert(! pp
->absent
);
3494 * Parent object has a real page.
3495 * Throw away the backing object's
3504 assert((!object
->pager_created
&& (object
->pager
== MEMORY_OBJECT_NULL
))
3505 || (!backing_object
->pager_created
3506 && (backing_object
->pager
== MEMORY_OBJECT_NULL
)));
3508 assert(!object
->pager_created
&& object
->pager
== MEMORY_OBJECT_NULL
);
3509 #endif /* !MACH_PAGEMAP */
3511 if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
3512 vm_object_hash_entry_t entry
;
3515 * Move the pager from backing_object to object.
3517 * XXX We're only using part of the paging space
3518 * for keeps now... we ought to discard the
3522 assert(!object
->paging_in_progress
);
3523 object
->pager
= backing_object
->pager
;
3524 entry
= vm_object_hash_lookup(object
->pager
, FALSE
);
3525 assert(entry
!= VM_OBJECT_HASH_ENTRY_NULL
);
3526 entry
->object
= object
;
3527 object
->pager_created
= backing_object
->pager_created
;
3528 object
->pager_control
= backing_object
->pager_control
;
3529 object
->pager_ready
= backing_object
->pager_ready
;
3530 object
->pager_initialized
= backing_object
->pager_initialized
;
3531 object
->paging_offset
=
3532 backing_object
->paging_offset
+ backing_offset
;
3533 if (object
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
3534 memory_object_control_collapse(object
->pager_control
,
3539 vm_object_cache_unlock();
3543 * If the shadow offset is 0, the use the existence map from
3544 * the backing object if there is one. If the shadow offset is
3545 * not zero, toss it.
3547 * XXX - If the shadow offset is not 0 then a bit copy is needed
3548 * if the map is to be salvaged. For now, we just just toss the
3549 * old map, giving the collapsed object no map. This means that
3550 * the pager is invoked for zero fill pages. If analysis shows
3551 * that this happens frequently and is a performance hit, then
3552 * this code should be fixed to salvage the map.
3554 assert(object
->existence_map
== VM_EXTERNAL_NULL
);
3555 if (backing_offset
|| (size
!= backing_object
->size
)) {
3556 vm_external_discarded
++;
3557 vm_external_destroy(backing_object
->existence_map
,
3558 backing_object
->size
);
3561 vm_external_collapsed
++;
3562 object
->existence_map
= backing_object
->existence_map
;
3564 backing_object
->existence_map
= VM_EXTERNAL_NULL
;
3565 #endif /* MACH_PAGEMAP */
3568 * Object now shadows whatever backing_object did.
3569 * Note that the reference to backing_object->shadow
3570 * moves from within backing_object to within object.
3573 assert(!object
->phys_contiguous
);
3574 assert(!backing_object
->phys_contiguous
);
3575 object
->shadow
= backing_object
->shadow
;
3576 if (object
->shadow
) {
3577 object
->shadow_offset
+= backing_object
->shadow_offset
;
3579 /* no shadow, therefore no shadow offset... */
3580 object
->shadow_offset
= 0;
3582 assert((object
->shadow
== VM_OBJECT_NULL
) ||
3583 (object
->shadow
->copy
!= backing_object
));
3586 * Discard backing_object.
3588 * Since the backing object has no pages, no
3589 * pager left, and no object references within it,
3590 * all that is necessary is to dispose of it.
3593 assert((backing_object
->ref_count
== 1) &&
3594 (backing_object
->resident_page_count
== 0) &&
3595 (backing_object
->paging_in_progress
== 0));
3597 backing_object
->alive
= FALSE
;
3598 vm_object_unlock(backing_object
);
3600 XPR(XPR_VM_OBJECT
, "vm_object_collapse, collapsed 0x%X\n",
3601 (integer_t
)backing_object
, 0,0,0,0);
3603 vm_object_lock_destroy(backing_object
);
3605 zfree(vm_object_zone
, backing_object
);
3611 vm_object_do_bypass(
3613 vm_object_t backing_object
)
3616 * Make the parent shadow the next object
3620 vm_object_lock_assert_exclusive(backing_object
);
3624 * Do object reference in-line to
3625 * conditionally increment shadow's
3626 * residence count. If object is not
3627 * resident, leave residence count
3630 if (backing_object
->shadow
!= VM_OBJECT_NULL
) {
3631 vm_object_lock(backing_object
->shadow
);
3632 vm_object_lock_assert_exclusive(backing_object
->shadow
);
3633 backing_object
->shadow
->ref_count
++;
3634 if (object
->res_count
!= 0)
3635 vm_object_res_reference(backing_object
->shadow
);
3636 vm_object_unlock(backing_object
->shadow
);
3638 #else /* TASK_SWAPPER */
3639 vm_object_reference(backing_object
->shadow
);
3640 #endif /* TASK_SWAPPER */
3642 assert(!object
->phys_contiguous
);
3643 assert(!backing_object
->phys_contiguous
);
3644 object
->shadow
= backing_object
->shadow
;
3645 if (object
->shadow
) {
3646 object
->shadow_offset
+= backing_object
->shadow_offset
;
3648 /* no shadow, therefore no shadow offset... */
3649 object
->shadow_offset
= 0;
3653 * Backing object might have had a copy pointer
3654 * to us. If it did, clear it.
3656 if (backing_object
->copy
== object
) {
3657 backing_object
->copy
= VM_OBJECT_NULL
;
3661 * Drop the reference count on backing_object.
3663 * Since its ref_count was at least 2, it
3664 * will not vanish; so we don't need to call
3665 * vm_object_deallocate.
3666 * [FBDP: that doesn't seem to be true any more]
3668 * The res_count on the backing object is
3669 * conditionally decremented. It's possible
3670 * (via vm_pageout_scan) to get here with
3671 * a "swapped" object, which has a 0 res_count,
3672 * in which case, the backing object res_count
3673 * is already down by one.
3675 * Don't call vm_object_deallocate unless
3676 * ref_count drops to zero.
3678 * The ref_count can drop to zero here if the
3679 * backing object could be bypassed but not
3680 * collapsed, such as when the backing object
3681 * is temporary and cachable.
3684 if (backing_object
->ref_count
> 1) {
3685 vm_object_lock_assert_exclusive(backing_object
);
3686 backing_object
->ref_count
--;
3688 if (object
->res_count
!= 0)
3689 vm_object_res_deallocate(backing_object
);
3690 assert(backing_object
->ref_count
> 0);
3691 #endif /* TASK_SWAPPER */
3692 vm_object_unlock(backing_object
);
3696 * Drop locks so that we can deallocate
3697 * the backing object.
3701 if (object
->res_count
== 0) {
3702 /* XXX get a reference for the deallocate below */
3703 vm_object_res_reference(backing_object
);
3705 #endif /* TASK_SWAPPER */
3706 vm_object_unlock(object
);
3707 vm_object_unlock(backing_object
);
3708 vm_object_deallocate(backing_object
);
3711 * Relock object. We don't have to reverify
3712 * its state since vm_object_collapse will
3713 * do that for us as it starts at the
3717 vm_object_lock(object
);
3725 * vm_object_collapse:
3727 * Perform an object collapse or an object bypass if appropriate.
3728 * The real work of collapsing and bypassing is performed in
3729 * the routines vm_object_do_collapse and vm_object_do_bypass.
3731 * Requires that the object be locked and the page queues be unlocked.
3734 static unsigned long vm_object_collapse_calls
= 0;
3735 static unsigned long vm_object_collapse_objects
= 0;
3736 static unsigned long vm_object_collapse_do_collapse
= 0;
3737 static unsigned long vm_object_collapse_do_bypass
= 0;
3738 static unsigned long vm_object_collapse_delays
= 0;
3739 __private_extern__
void
3741 register vm_object_t object
,
3742 register vm_object_offset_t hint_offset
,
3743 boolean_t can_bypass
)
3745 register vm_object_t backing_object
;
3746 register unsigned int rcount
;
3747 register unsigned int size
;
3748 vm_object_t original_object
;
3750 vm_object_collapse_calls
++;
3752 if (! vm_object_collapse_allowed
&&
3753 ! (can_bypass
&& vm_object_bypass_allowed
)) {
3757 XPR(XPR_VM_OBJECT
, "vm_object_collapse, obj 0x%X\n",
3758 (integer_t
)object
, 0,0,0,0);
3760 if (object
== VM_OBJECT_NULL
)
3763 original_object
= object
;
3766 vm_object_collapse_objects
++;
3768 * Verify that the conditions are right for either
3769 * collapse or bypass:
3773 * There is a backing object, and
3776 backing_object
= object
->shadow
;
3777 if (backing_object
== VM_OBJECT_NULL
) {
3778 if (object
!= original_object
) {
3779 vm_object_unlock(object
);
3785 * No pages in the object are currently
3786 * being paged out, and
3788 if (object
->paging_in_progress
!= 0) {
3789 /* try and collapse the rest of the shadow chain */
3790 vm_object_lock(backing_object
);
3791 if (object
!= original_object
) {
3792 vm_object_unlock(object
);
3794 object
= backing_object
;
3798 vm_object_lock(backing_object
);
3802 * The backing object is not read_only,
3803 * and no pages in the backing object are
3804 * currently being paged out.
3805 * The backing object is internal.
3809 if (!backing_object
->internal
||
3810 backing_object
->paging_in_progress
!= 0) {
3811 /* try and collapse the rest of the shadow chain */
3812 if (object
!= original_object
) {
3813 vm_object_unlock(object
);
3815 object
= backing_object
;
3820 * The backing object can't be a copy-object:
3821 * the shadow_offset for the copy-object must stay
3822 * as 0. Furthermore (for the 'we have all the
3823 * pages' case), if we bypass backing_object and
3824 * just shadow the next object in the chain, old
3825 * pages from that object would then have to be copied
3826 * BOTH into the (former) backing_object and into the
3829 if (backing_object
->shadow
!= VM_OBJECT_NULL
&&
3830 backing_object
->shadow
->copy
== backing_object
) {
3831 /* try and collapse the rest of the shadow chain */
3832 if (object
!= original_object
) {
3833 vm_object_unlock(object
);
3835 object
= backing_object
;
3840 * We can now try to either collapse the backing
3841 * object (if the parent is the only reference to
3842 * it) or (perhaps) remove the parent's reference
3845 * If there is exactly one reference to the backing
3846 * object, we may be able to collapse it into the
3849 * If MACH_PAGEMAP is defined:
3850 * The parent must not have a pager created for it,
3851 * since collapsing a backing_object dumps new pages
3852 * into the parent that its pager doesn't know about
3853 * (and the collapse code can't merge the existence
3856 * As long as one of the objects is still not known
3857 * to the pager, we can collapse them.
3859 if (backing_object
->ref_count
== 1 &&
3860 (!object
->pager_created
3862 || !backing_object
->pager_created
3863 #endif /*!MACH_PAGEMAP */
3864 ) && vm_object_collapse_allowed
) {
3867 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n",
3868 (integer_t
)backing_object
, (integer_t
)object
,
3869 (integer_t
)backing_object
->pager
,
3870 (integer_t
)backing_object
->pager_control
, 0);
3873 * We need the cache lock for collapsing,
3874 * but we must not deadlock.
3877 if (! vm_object_cache_lock_try()) {
3878 if (object
!= original_object
) {
3879 vm_object_unlock(object
);
3881 vm_object_unlock(backing_object
);
3886 * Collapse the object with its backing
3887 * object, and try again with the object's
3888 * new backing object.
3891 vm_object_do_collapse(object
, backing_object
);
3892 vm_object_collapse_do_collapse
++;
3897 * Collapsing the backing object was not possible
3898 * or permitted, so let's try bypassing it.
3901 if (! (can_bypass
&& vm_object_bypass_allowed
)) {
3902 /* try and collapse the rest of the shadow chain */
3903 if (object
!= original_object
) {
3904 vm_object_unlock(object
);
3906 object
= backing_object
;
3912 * If the object doesn't have all its pages present,
3913 * we have to make sure no pages in the backing object
3914 * "show through" before bypassing it.
3916 size
= atop(object
->size
);
3917 rcount
= object
->resident_page_count
;
3918 if (rcount
!= size
) {
3919 vm_object_offset_t offset
;
3920 vm_object_offset_t backing_offset
;
3921 unsigned int backing_rcount
;
3922 unsigned int lookups
= 0;
3925 * If the backing object has a pager but no pagemap,
3926 * then we cannot bypass it, because we don't know
3927 * what pages it has.
3929 if (backing_object
->pager_created
3931 && (backing_object
->existence_map
== VM_EXTERNAL_NULL
)
3932 #endif /* MACH_PAGEMAP */
3934 /* try and collapse the rest of the shadow chain */
3935 if (object
!= original_object
) {
3936 vm_object_unlock(object
);
3938 object
= backing_object
;
3943 * If the object has a pager but no pagemap,
3944 * then we cannot bypass it, because we don't know
3945 * what pages it has.
3947 if (object
->pager_created
3949 && (object
->existence_map
== VM_EXTERNAL_NULL
)
3950 #endif /* MACH_PAGEMAP */
3952 /* try and collapse the rest of the shadow chain */
3953 if (object
!= original_object
) {
3954 vm_object_unlock(object
);
3956 object
= backing_object
;
3961 * If all of the pages in the backing object are
3962 * shadowed by the parent object, the parent
3963 * object no longer has to shadow the backing
3964 * object; it can shadow the next one in the
3967 * If the backing object has existence info,
3968 * we must check examine its existence info
3973 backing_offset
= object
->shadow_offset
;
3974 backing_rcount
= backing_object
->resident_page_count
;
3977 #define EXISTS_IN_OBJECT(obj, off, rc) \
3978 (vm_external_state_get((obj)->existence_map, \
3979 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \
3980 ((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3982 #define EXISTS_IN_OBJECT(obj, off, rc) \
3983 (((rc) && ++lookups && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
3984 #endif /* MACH_PAGEMAP */
3987 * Check the hint location first
3988 * (since it is often the quickest way out of here).
3990 if (object
->cow_hint
!= ~(vm_offset_t
)0)
3991 hint_offset
= (vm_object_offset_t
)object
->cow_hint
;
3993 hint_offset
= (hint_offset
> 8 * PAGE_SIZE_64
) ?
3994 (hint_offset
- 8 * PAGE_SIZE_64
) : 0;
3996 if (EXISTS_IN_OBJECT(backing_object
, hint_offset
+
3997 backing_offset
, backing_rcount
) &&
3998 !EXISTS_IN_OBJECT(object
, hint_offset
, rcount
)) {
3999 /* dependency right at the hint */
4000 object
->cow_hint
= (vm_offset_t
)hint_offset
;
4001 /* try and collapse the rest of the shadow chain */
4002 if (object
!= original_object
) {
4003 vm_object_unlock(object
);
4005 object
= backing_object
;
4010 * If the object's window onto the backing_object
4011 * is large compared to the number of resident
4012 * pages in the backing object, it makes sense to
4013 * walk the backing_object's resident pages first.
4015 * NOTE: Pages may be in both the existence map and
4016 * resident. So, we can't permanently decrement
4017 * the rcount here because the second loop may
4018 * find the same pages in the backing object'
4019 * existence map that we found here and we would
4020 * double-decrement the rcount. We also may or
4021 * may not have found the
4023 if (backing_rcount
&&
4025 size
> ((backing_object
->existence_map
) ?
4026 backing_rcount
: (backing_rcount
>> 1))
4028 size
> (backing_rcount
>> 1)
4029 #endif /* MACH_PAGEMAP */
4031 unsigned int rc
= rcount
;
4034 backing_rcount
= backing_object
->resident_page_count
;
4035 p
= (vm_page_t
)queue_first(&backing_object
->memq
);
4037 /* Until we get more than one lookup lock */
4038 if (lookups
> 256) {
4039 vm_object_collapse_delays
++;
4044 offset
= (p
->offset
- backing_offset
);
4045 if (offset
< object
->size
&&
4046 offset
!= hint_offset
&&
4047 !EXISTS_IN_OBJECT(object
, offset
, rc
)) {
4048 /* found a dependency */
4049 object
->cow_hint
= (vm_offset_t
)offset
;
4052 p
= (vm_page_t
) queue_next(&p
->listq
);
4054 } while (--backing_rcount
);
4055 if (backing_rcount
!= 0 ) {
4056 /* try and collapse the rest of the shadow chain */
4057 if (object
!= original_object
) {
4058 vm_object_unlock(object
);
4060 object
= backing_object
;
4066 * Walk through the offsets looking for pages in the
4067 * backing object that show through to the object.
4070 if (backing_rcount
|| backing_object
->existence_map
) {
4072 if (backing_rcount
) {
4073 #endif /* MACH_PAGEMAP */
4074 offset
= hint_offset
;
4077 (offset
+ PAGE_SIZE_64
< object
->size
) ?
4078 (offset
+ PAGE_SIZE_64
) : 0) != hint_offset
) {
4080 /* Until we get more than one lookup lock */
4081 if (lookups
> 256) {
4082 vm_object_collapse_delays
++;
4087 if (EXISTS_IN_OBJECT(backing_object
, offset
+
4088 backing_offset
, backing_rcount
) &&
4089 !EXISTS_IN_OBJECT(object
, offset
, rcount
)) {
4090 /* found a dependency */
4091 object
->cow_hint
= (vm_offset_t
)offset
;
4095 if (offset
!= hint_offset
) {
4096 /* try and collapse the rest of the shadow chain */
4097 if (object
!= original_object
) {
4098 vm_object_unlock(object
);
4100 object
= backing_object
;
4106 /* reset the offset hint for any objects deeper in the chain */
4107 object
->cow_hint
= (vm_offset_t
)0;
4110 * All interesting pages in the backing object
4111 * already live in the parent or its pager.
4112 * Thus we can bypass the backing object.
4115 vm_object_do_bypass(object
, backing_object
);
4116 vm_object_collapse_do_bypass
++;
4119 * Try again with this object's new backing object.
4125 if (object
!= original_object
) {
4126 vm_object_unlock(object
);
4131 * Routine: vm_object_page_remove: [internal]
4133 * Removes all physical pages in the specified
4134 * object range from the object's list of pages.
4136 * In/out conditions:
4137 * The object must be locked.
4138 * The object must not have paging_in_progress, usually
4139 * guaranteed by not having a pager.
4141 unsigned int vm_object_page_remove_lookup
= 0;
4142 unsigned int vm_object_page_remove_iterate
= 0;
4144 __private_extern__
void
4145 vm_object_page_remove(
4146 register vm_object_t object
,
4147 register vm_object_offset_t start
,
4148 register vm_object_offset_t end
)
4150 register vm_page_t p
, next
;
4153 * One and two page removals are most popular.
4154 * The factor of 16 here is somewhat arbitrary.
4155 * It balances vm_object_lookup vs iteration.
4158 if (atop_64(end
- start
) < (unsigned)object
->resident_page_count
/16) {
4159 vm_object_page_remove_lookup
++;
4161 for (; start
< end
; start
+= PAGE_SIZE_64
) {
4162 p
= vm_page_lookup(object
, start
);
4163 if (p
!= VM_PAGE_NULL
) {
4164 assert(!p
->cleaning
&& !p
->pageout
);
4165 if (!p
->fictitious
&& p
->pmapped
)
4166 pmap_disconnect(p
->phys_page
);
4171 vm_object_page_remove_iterate
++;
4173 p
= (vm_page_t
) queue_first(&object
->memq
);
4174 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
4175 next
= (vm_page_t
) queue_next(&p
->listq
);
4176 if ((start
<= p
->offset
) && (p
->offset
< end
)) {
4177 assert(!p
->cleaning
&& !p
->pageout
);
4178 if (!p
->fictitious
&& p
->pmapped
)
4179 pmap_disconnect(p
->phys_page
);
4189 * Routine: vm_object_coalesce
4190 * Function: Coalesces two objects backing up adjoining
4191 * regions of memory into a single object.
4193 * returns TRUE if objects were combined.
4195 * NOTE: Only works at the moment if the second object is NULL -
4196 * if it's not, which object do we lock first?
4199 * prev_object First object to coalesce
4200 * prev_offset Offset into prev_object
4201 * next_object Second object into coalesce
4202 * next_offset Offset into next_object
4204 * prev_size Size of reference to prev_object
4205 * next_size Size of reference to next_object
4208 * The object(s) must *not* be locked. The map must be locked
4209 * to preserve the reference to the object(s).
4211 static int vm_object_coalesce_count
= 0;
4213 __private_extern__ boolean_t
4215 register vm_object_t prev_object
,
4216 vm_object_t next_object
,
4217 vm_object_offset_t prev_offset
,
4218 __unused vm_object_offset_t next_offset
,
4219 vm_object_size_t prev_size
,
4220 vm_object_size_t next_size
)
4222 vm_object_size_t newsize
;
4228 if (next_object
!= VM_OBJECT_NULL
) {
4232 if (prev_object
== VM_OBJECT_NULL
) {
4237 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n",
4238 (integer_t
)prev_object
, prev_offset
, prev_size
, next_size
, 0);
4240 vm_object_lock(prev_object
);
4243 * Try to collapse the object first
4245 vm_object_collapse(prev_object
, prev_offset
, TRUE
);
4248 * Can't coalesce if pages not mapped to
4249 * prev_entry may be in use any way:
4250 * . more than one reference
4252 * . shadows another object
4253 * . has a copy elsewhere
4255 * . paging references (pages might be in page-list)
4258 if ((prev_object
->ref_count
> 1) ||
4259 prev_object
->pager_created
||
4260 (prev_object
->shadow
!= VM_OBJECT_NULL
) ||
4261 (prev_object
->copy
!= VM_OBJECT_NULL
) ||
4262 (prev_object
->true_share
!= FALSE
) ||
4263 (prev_object
->purgable
!= VM_PURGABLE_DENY
) ||
4264 (prev_object
->paging_in_progress
!= 0)) {
4265 vm_object_unlock(prev_object
);
4269 vm_object_coalesce_count
++;
4272 * Remove any pages that may still be in the object from
4273 * a previous deallocation.
4275 vm_object_page_remove(prev_object
,
4276 prev_offset
+ prev_size
,
4277 prev_offset
+ prev_size
+ next_size
);
4280 * Extend the object if necessary.
4282 newsize
= prev_offset
+ prev_size
+ next_size
;
4283 if (newsize
> prev_object
->size
) {
4286 * We cannot extend an object that has existence info,
4287 * since the existence info might then fail to cover
4288 * the entire object.
4290 * This assertion must be true because the object
4291 * has no pager, and we only create existence info
4292 * for objects with pagers.
4294 assert(prev_object
->existence_map
== VM_EXTERNAL_NULL
);
4295 #endif /* MACH_PAGEMAP */
4296 prev_object
->size
= newsize
;
4299 vm_object_unlock(prev_object
);
4304 * Attach a set of physical pages to an object, so that they can
4305 * be mapped by mapping the object. Typically used to map IO memory.
4307 * The mapping function and its private data are used to obtain the
4308 * physical addresses for each page to be mapped.
4313 vm_object_offset_t offset
,
4314 vm_object_size_t size
,
4315 vm_object_offset_t (*map_fn
)(void *map_fn_data
,
4316 vm_object_offset_t offset
),
4317 void *map_fn_data
) /* private to map_fn */
4323 vm_object_offset_t addr
;
4325 num_pages
= atop_64(size
);
4327 for (i
= 0; i
< num_pages
; i
++, offset
+= PAGE_SIZE_64
) {
4329 addr
= (*map_fn
)(map_fn_data
, offset
);
4331 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
)
4332 vm_page_more_fictitious();
4334 vm_object_lock(object
);
4335 if ((old_page
= vm_page_lookup(object
, offset
))
4338 vm_page_lock_queues();
4339 vm_page_free(old_page
);
4340 vm_page_unlock_queues();
4343 vm_page_init(m
, addr
);
4344 /* private normally requires lock_queues but since we */
4345 /* are initializing the page, its not necessary here */
4346 m
->private = TRUE
; /* don`t free page */
4348 vm_page_insert(m
, object
, offset
);
4350 PAGE_WAKEUP_DONE(m
);
4351 vm_object_unlock(object
);
4355 #include <mach_kdb.h>
4358 #include <ddb/db_output.h>
4359 #include <vm/vm_print.h>
4361 #define printf kdbprintf
4363 extern boolean_t
vm_object_cached(
4364 vm_object_t object
);
4366 extern void print_bitstring(
4369 boolean_t vm_object_print_pages
= FALSE
;
4375 printf("%c%c%c%c%c%c%c%c",
4376 ((byte
& (1 << 0)) ? '1' : '0'),
4377 ((byte
& (1 << 1)) ? '1' : '0'),
4378 ((byte
& (1 << 2)) ? '1' : '0'),
4379 ((byte
& (1 << 3)) ? '1' : '0'),
4380 ((byte
& (1 << 4)) ? '1' : '0'),
4381 ((byte
& (1 << 5)) ? '1' : '0'),
4382 ((byte
& (1 << 6)) ? '1' : '0'),
4383 ((byte
& (1 << 7)) ? '1' : '0'));
4388 register vm_object_t object
)
4390 register vm_object_t o
;
4392 queue_iterate(&vm_object_cached_list
, o
, vm_object_t
, cached_list
) {
4402 * vm_external_print: [ debug ]
4406 vm_external_map_t emap
,
4409 if (emap
== VM_EXTERNAL_NULL
) {
4412 vm_size_t existence_size
= stob(size
);
4413 printf("{ size=%d, map=[", existence_size
);
4414 if (existence_size
> 0) {
4415 print_bitstring(emap
[0]);
4417 if (existence_size
> 1) {
4418 print_bitstring(emap
[1]);
4420 if (existence_size
> 2) {
4422 print_bitstring(emap
[existence_size
-1]);
4428 #endif /* MACH_PAGEMAP */
4435 int orig_db_indent
= db_indent
;
4438 if (object
== VM_OBJECT_NULL
) {
4439 db_indent
= orig_db_indent
;
4445 iprintf("object 0x%x", object
);
4446 printf(", shadow=0x%x", object
->shadow
);
4447 printf(", copy=0x%x", object
->copy
);
4448 printf(", pager=0x%x", object
->pager
);
4449 printf(", ref=%d\n", object
->ref_count
);
4452 object
= object
->shadow
;
4458 * vm_object_print: [ debug ]
4461 vm_object_print(db_expr_t db_addr
, __unused boolean_t have_addr
,
4462 __unused db_expr_t arg_count
, __unused
char *modif
)
4465 register vm_page_t p
;
4470 object
= (vm_object_t
) (long) db_addr
;
4471 if (object
== VM_OBJECT_NULL
)
4474 iprintf("object 0x%x\n", object
);
4478 iprintf("size=0x%x", object
->size
);
4479 printf(", memq_hint=%p", object
->memq_hint
);
4480 printf(", ref_count=%d\n", object
->ref_count
);
4483 printf("res_count=%d, ", object
->res_count
);
4484 #endif /* TASK_SWAPPER */
4485 printf("resident_page_count=%d\n", object
->resident_page_count
);
4487 iprintf("shadow=0x%x", object
->shadow
);
4488 if (object
->shadow
) {
4490 vm_object_t shadow
= object
;
4491 while((shadow
= shadow
->shadow
))
4493 printf(" (depth %d)", i
);
4495 printf(", copy=0x%x", object
->copy
);
4496 printf(", shadow_offset=0x%x", object
->shadow_offset
);
4497 printf(", last_alloc=0x%x\n", object
->last_alloc
);
4499 iprintf("pager=0x%x", object
->pager
);
4500 printf(", paging_offset=0x%x", object
->paging_offset
);
4501 printf(", pager_control=0x%x\n", object
->pager_control
);
4503 iprintf("copy_strategy=%d[", object
->copy_strategy
);
4504 switch (object
->copy_strategy
) {
4505 case MEMORY_OBJECT_COPY_NONE
:
4506 printf("copy_none");
4509 case MEMORY_OBJECT_COPY_CALL
:
4510 printf("copy_call");
4513 case MEMORY_OBJECT_COPY_DELAY
:
4514 printf("copy_delay");
4517 case MEMORY_OBJECT_COPY_SYMMETRIC
:
4518 printf("copy_symmetric");
4521 case MEMORY_OBJECT_COPY_INVALID
:
4522 printf("copy_invalid");
4530 iprintf("all_wanted=0x%x<", object
->all_wanted
);
4532 if (vm_object_wanted(object
, VM_OBJECT_EVENT_INITIALIZED
)) {
4533 printf("%sinit", s
);
4536 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGER_READY
)) {
4537 printf("%sready", s
);
4540 if (vm_object_wanted(object
, VM_OBJECT_EVENT_PAGING_IN_PROGRESS
)) {
4541 printf("%spaging", s
);
4544 if (vm_object_wanted(object
, VM_OBJECT_EVENT_LOCK_IN_PROGRESS
)) {
4545 printf("%slock", s
);
4548 if (vm_object_wanted(object
, VM_OBJECT_EVENT_UNCACHING
)) {
4549 printf("%suncaching", s
);
4552 if (vm_object_wanted(object
, VM_OBJECT_EVENT_COPY_CALL
)) {
4553 printf("%scopy_call", s
);
4556 if (vm_object_wanted(object
, VM_OBJECT_EVENT_CACHING
)) {
4557 printf("%scaching", s
);
4561 printf(", paging_in_progress=%d\n", object
->paging_in_progress
);
4563 iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n",
4564 (object
->pager_created
? "" : "!"),
4565 (object
->pager_initialized
? "" : "!"),
4566 (object
->pager_ready
? "" : "!"),
4567 (object
->can_persist
? "" : "!"),
4568 (object
->pager_trusted
? "" : "!"),
4569 (object
->pageout
? "" : "!"),
4570 (object
->internal
? "internal" : "external"),
4571 (object
->temporary
? "temporary" : "permanent"));
4572 iprintf("%salive, %spurgeable, %spurgeable_volatile, %spurgeable_empty, %sshadowed, %scached, %sprivate\n",
4573 (object
->alive
? "" : "!"),
4574 ((object
->purgable
!= VM_PURGABLE_DENY
) ? "" : "!"),
4575 ((object
->purgable
== VM_PURGABLE_VOLATILE
) ? "" : "!"),
4576 ((object
->purgable
== VM_PURGABLE_EMPTY
) ? "" : "!"),
4577 (object
->shadowed
? "" : "!"),
4578 (vm_object_cached(object
) ? "" : "!"),
4579 (object
->private ? "" : "!"));
4580 iprintf("%sadvisory_pageout, %ssilent_overwrite\n",
4581 (object
->advisory_pageout
? "" : "!"),
4582 (object
->silent_overwrite
? "" : "!"));
4585 iprintf("existence_map=");
4586 vm_external_print(object
->existence_map
, object
->size
);
4587 #endif /* MACH_PAGEMAP */
4589 iprintf("paging_object=0x%x\n", object
->paging_object
);
4590 #endif /* MACH_ASSERT */
4592 if (vm_object_print_pages
) {
4594 p
= (vm_page_t
) queue_first(&object
->memq
);
4595 while (!queue_end(&object
->memq
, (queue_entry_t
) p
)) {
4597 iprintf("memory:=");
4598 } else if (count
== 2) {
4607 printf("(off=0x%llX,page=%p)", p
->offset
, p
);
4608 p
= (vm_page_t
) queue_next(&p
->listq
);
4619 * vm_object_find [ debug ]
4621 * Find all tasks which reference the given vm_object.
4624 boolean_t
vm_object_find(vm_object_t object
);
4625 boolean_t vm_object_print_verbose
= FALSE
;
4633 vm_map_entry_t entry
;
4634 boolean_t found
= FALSE
;
4636 queue_iterate(&tasks
, task
, task_t
, tasks
) {
4638 for (entry
= vm_map_first_entry(map
);
4639 entry
&& entry
!= vm_map_to_entry(map
);
4640 entry
= entry
->vme_next
) {
4645 * For the time being skip submaps,
4646 * only the kernel can have submaps,
4647 * and unless we are interested in
4648 * kernel objects, we can simply skip
4649 * submaps. See sb/dejan/nmk18b7/src/mach_kernel/vm
4650 * for a full solution.
4652 if (entry
->is_sub_map
)
4655 obj
= entry
->object
.vm_object
;
4659 while (obj
!= VM_OBJECT_NULL
) {
4660 if (obj
== object
) {
4662 printf("TASK\t\tMAP\t\tENTRY\n");
4665 printf("0x%x\t0x%x\t0x%x\n",
4676 #endif /* MACH_KDB */
4679 vm_object_populate_with_private(
4681 vm_object_offset_t offset
,
4686 vm_object_offset_t base_offset
;
4689 if(!object
->private)
4690 return KERN_FAILURE
;
4692 base_page
= phys_page
;
4694 vm_object_lock(object
);
4695 if(!object
->phys_contiguous
) {
4697 if((base_offset
= trunc_page_64(offset
)) != offset
) {
4698 vm_object_unlock(object
);
4699 return KERN_FAILURE
;
4701 base_offset
+= object
->paging_offset
;
4703 m
= vm_page_lookup(object
, base_offset
);
4704 if(m
!= VM_PAGE_NULL
) {
4707 vm_page_guard_addr
) {
4708 vm_page_lockspin_queues();
4709 m
->fictitious
= FALSE
;
4711 m
->phys_page
= base_page
;
4718 m
->list_req_pending
= TRUE
;
4719 vm_page_unlock_queues();
4721 } else if (m
->phys_page
!= base_page
) {
4724 * pmap call to clear old mapping
4726 pmap_disconnect(m
->phys_page
);
4728 m
->phys_page
= base_page
;
4733 * We're not pointing to the same
4734 * physical page any longer and the
4735 * contents of the new one are not
4736 * supposed to be encrypted.
4737 * XXX What happens to the original
4738 * physical page. Is it lost ?
4740 m
->encrypted
= FALSE
;
4743 while ((m
= vm_page_grab_fictitious())
4745 vm_page_more_fictitious();
4746 vm_page_lockspin_queues();
4747 m
->fictitious
= FALSE
;
4749 m
->phys_page
= base_page
;
4750 m
->list_req_pending
= TRUE
;
4753 vm_page_unlock_queues();
4754 vm_page_insert(m
, object
, base_offset
);
4756 base_page
++; /* Go to the next physical page */
4757 base_offset
+= PAGE_SIZE
;
4761 /* NOTE: we should check the original settings here */
4762 /* if we have a size > zero a pmap call should be made */
4763 /* to disable the range */
4767 /* shadows on contiguous memory are not allowed */
4768 /* we therefore can use the offset field */
4769 object
->shadow_offset
= (vm_object_offset_t
)(phys_page
<< 12);
4770 object
->size
= size
;
4772 vm_object_unlock(object
);
4773 return KERN_SUCCESS
;
4777 * memory_object_free_from_cache:
4779 * Walk the vm_object cache list, removing and freeing vm_objects
4780 * which are backed by the pager identified by the caller, (pager_ops).
4781 * Remove up to "count" objects, if there are that may available
4784 * Walk the list at most once, return the number of vm_objects
4788 __private_extern__ kern_return_t
4789 memory_object_free_from_cache(
4790 __unused host_t host
,
4791 memory_object_pager_ops_t pager_ops
,
4795 int object_released
= 0;
4797 register vm_object_t object
= VM_OBJECT_NULL
;
4801 if(host == HOST_NULL)
4802 return(KERN_INVALID_ARGUMENT);
4806 vm_object_cache_lock();
4808 queue_iterate(&vm_object_cached_list
, object
,
4809 vm_object_t
, cached_list
) {
4810 if (object
->pager
&&
4811 (pager_ops
== object
->pager
->mo_pager_ops
)) {
4812 vm_object_lock(object
);
4813 queue_remove(&vm_object_cached_list
, object
,
4814 vm_object_t
, cached_list
);
4815 vm_object_cached_count
--;
4818 * Since this object is in the cache, we know
4819 * that it is initialized and has only a pager's
4820 * (implicit) reference. Take a reference to avoid
4821 * recursive deallocations.
4824 assert(object
->pager_initialized
);
4825 assert(object
->ref_count
== 0);
4826 vm_object_lock_assert_exclusive(object
);
4827 object
->ref_count
++;
4830 * Terminate the object.
4831 * If the object had a shadow, we let
4832 * vm_object_deallocate deallocate it.
4833 * "pageout" objects have a shadow, but
4834 * maintain a "paging reference" rather
4835 * than a normal reference.
4836 * (We are careful here to limit recursion.)
4838 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
4839 if ((vm_object_terminate(object
) == KERN_SUCCESS
)
4840 && (shadow
!= VM_OBJECT_NULL
)) {
4841 vm_object_deallocate(shadow
);
4844 if(object_released
++ == *count
)
4845 return KERN_SUCCESS
;
4849 vm_object_cache_unlock();
4850 *count
= object_released
;
4851 return KERN_SUCCESS
;
4857 memory_object_create_named(
4858 memory_object_t pager
,
4859 memory_object_offset_t size
,
4860 memory_object_control_t
*control
)
4863 vm_object_hash_entry_t entry
;
4865 *control
= MEMORY_OBJECT_CONTROL_NULL
;
4866 if (pager
== MEMORY_OBJECT_NULL
)
4867 return KERN_INVALID_ARGUMENT
;
4869 vm_object_cache_lock();
4870 entry
= vm_object_hash_lookup(pager
, FALSE
);
4871 if ((entry
!= VM_OBJECT_HASH_ENTRY_NULL
) &&
4872 (entry
->object
!= VM_OBJECT_NULL
)) {
4873 if (entry
->object
->named
== TRUE
)
4874 panic("memory_object_create_named: caller already holds the right"); }
4876 vm_object_cache_unlock();
4877 if ((object
= vm_object_enter(pager
, size
, FALSE
, FALSE
, TRUE
))
4878 == VM_OBJECT_NULL
) {
4879 return(KERN_INVALID_OBJECT
);
4882 /* wait for object (if any) to be ready */
4883 if (object
!= VM_OBJECT_NULL
) {
4884 vm_object_lock(object
);
4885 object
->named
= TRUE
;
4886 while (!object
->pager_ready
) {
4887 vm_object_sleep(object
,
4888 VM_OBJECT_EVENT_PAGER_READY
,
4891 *control
= object
->pager_control
;
4892 vm_object_unlock(object
);
4894 return (KERN_SUCCESS
);
4899 * Routine: memory_object_recover_named [user interface]
4901 * Attempt to recover a named reference for a VM object.
4902 * VM will verify that the object has not already started
4903 * down the termination path, and if it has, will optionally
4904 * wait for that to finish.
4906 * KERN_SUCCESS - we recovered a named reference on the object
4907 * KERN_FAILURE - we could not recover a reference (object dead)
4908 * KERN_INVALID_ARGUMENT - bad memory object control
4911 memory_object_recover_named(
4912 memory_object_control_t control
,
4913 boolean_t wait_on_terminating
)
4917 vm_object_cache_lock();
4918 object
= memory_object_control_to_vm_object(control
);
4919 if (object
== VM_OBJECT_NULL
) {
4920 vm_object_cache_unlock();
4921 return (KERN_INVALID_ARGUMENT
);
4925 vm_object_lock(object
);
4927 if (object
->terminating
&& wait_on_terminating
) {
4928 vm_object_cache_unlock();
4929 vm_object_wait(object
,
4930 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
4932 vm_object_cache_lock();
4936 if (!object
->alive
) {
4937 vm_object_cache_unlock();
4938 vm_object_unlock(object
);
4939 return KERN_FAILURE
;
4942 if (object
->named
== TRUE
) {
4943 vm_object_cache_unlock();
4944 vm_object_unlock(object
);
4945 return KERN_SUCCESS
;
4948 if((object
->ref_count
== 0) && (!object
->terminating
)){
4949 queue_remove(&vm_object_cached_list
, object
,
4950 vm_object_t
, cached_list
);
4951 vm_object_cached_count
--;
4952 XPR(XPR_VM_OBJECT_CACHE
,
4953 "memory_object_recover_named: removing %X, head (%X, %X)\n",
4955 (integer_t
)vm_object_cached_list
.next
,
4956 (integer_t
)vm_object_cached_list
.prev
, 0,0);
4959 vm_object_cache_unlock();
4961 object
->named
= TRUE
;
4962 vm_object_lock_assert_exclusive(object
);
4963 object
->ref_count
++;
4964 vm_object_res_reference(object
);
4965 while (!object
->pager_ready
) {
4966 vm_object_sleep(object
,
4967 VM_OBJECT_EVENT_PAGER_READY
,
4970 vm_object_unlock(object
);
4971 return (KERN_SUCCESS
);
4976 * vm_object_release_name:
4978 * Enforces name semantic on memory_object reference count decrement
4979 * This routine should not be called unless the caller holds a name
4980 * reference gained through the memory_object_create_named.
4982 * If the TERMINATE_IDLE flag is set, the call will return if the
4983 * reference count is not 1. i.e. idle with the only remaining reference
4985 * If the decision is made to proceed the name field flag is set to
4986 * false and the reference count is decremented. If the RESPECT_CACHE
4987 * flag is set and the reference count has gone to zero, the
4988 * memory_object is checked to see if it is cacheable otherwise when
4989 * the reference count is zero, it is simply terminated.
4992 __private_extern__ kern_return_t
4993 vm_object_release_name(
4998 boolean_t original_object
= TRUE
;
5000 while (object
!= VM_OBJECT_NULL
) {
5003 * The cache holds a reference (uncounted) to
5004 * the object. We must locke it before removing
5009 vm_object_cache_lock();
5010 vm_object_lock(object
);
5011 assert(object
->alive
);
5013 assert(object
->named
);
5014 assert(object
->ref_count
> 0);
5017 * We have to wait for initialization before
5018 * destroying or caching the object.
5021 if (object
->pager_created
&& !object
->pager_initialized
) {
5022 assert(!object
->can_persist
);
5023 vm_object_assert_wait(object
,
5024 VM_OBJECT_EVENT_INITIALIZED
,
5026 vm_object_unlock(object
);
5027 vm_object_cache_unlock();
5028 thread_block(THREAD_CONTINUE_NULL
);
5032 if (((object
->ref_count
> 1)
5033 && (flags
& MEMORY_OBJECT_TERMINATE_IDLE
))
5034 || (object
->terminating
)) {
5035 vm_object_unlock(object
);
5036 vm_object_cache_unlock();
5037 return KERN_FAILURE
;
5039 if (flags
& MEMORY_OBJECT_RELEASE_NO_OP
) {
5040 vm_object_unlock(object
);
5041 vm_object_cache_unlock();
5042 return KERN_SUCCESS
;
5046 if ((flags
& MEMORY_OBJECT_RESPECT_CACHE
) &&
5047 (object
->ref_count
== 1)) {
5049 object
->named
= FALSE
;
5050 vm_object_unlock(object
);
5051 vm_object_cache_unlock();
5052 /* let vm_object_deallocate push this thing into */
5053 /* the cache, if that it is where it is bound */
5054 vm_object_deallocate(object
);
5055 return KERN_SUCCESS
;
5057 VM_OBJ_RES_DECR(object
);
5058 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
5059 if(object
->ref_count
== 1) {
5060 if(vm_object_terminate(object
) != KERN_SUCCESS
) {
5061 if(original_object
) {
5062 return KERN_FAILURE
;
5064 return KERN_SUCCESS
;
5067 if (shadow
!= VM_OBJECT_NULL
) {
5068 original_object
= FALSE
;
5072 return KERN_SUCCESS
;
5074 vm_object_lock_assert_exclusive(object
);
5075 object
->ref_count
--;
5076 assert(object
->ref_count
> 0);
5078 object
->named
= FALSE
;
5079 vm_object_unlock(object
);
5080 vm_object_cache_unlock();
5081 return KERN_SUCCESS
;
5086 return KERN_FAILURE
;
5090 __private_extern__ kern_return_t
5091 vm_object_lock_request(
5093 vm_object_offset_t offset
,
5094 vm_object_size_t size
,
5095 memory_object_return_t should_return
,
5099 __unused boolean_t should_flush
;
5101 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
5103 XPR(XPR_MEMORY_OBJECT
,
5104 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n",
5105 (integer_t
)object
, offset
, size
,
5106 (((should_return
&1)<<1)|should_flush
), prot
);
5109 * Check for bogus arguments.
5111 if (object
== VM_OBJECT_NULL
)
5112 return (KERN_INVALID_ARGUMENT
);
5114 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
)
5115 return (KERN_INVALID_ARGUMENT
);
5117 size
= round_page_64(size
);
5120 * Lock the object, and acquire a paging reference to
5121 * prevent the memory_object reference from being released.
5123 vm_object_lock(object
);
5124 vm_object_paging_begin(object
);
5126 (void)vm_object_update(object
,
5127 offset
, size
, NULL
, NULL
, should_return
, flags
, prot
);
5129 vm_object_paging_end(object
);
5130 vm_object_unlock(object
);
5132 return (KERN_SUCCESS
);
5136 * Empty a purgeable object by grabbing the physical pages assigned to it and
5137 * putting them on the free queue without writing them to backing store, etc.
5138 * When the pages are next touched they will be demand zero-fill pages. We
5139 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
5140 * skip referenced/dirty pages, pages on the active queue, etc. We're more
5141 * than happy to grab these since this is a purgeable object. We mark the
5142 * object as "empty" after reaping its pages.
5144 * On entry the object and page queues are locked, the object must be a
5145 * purgeable object with no delayed copies pending.
5148 vm_object_purge(vm_object_t object
)
5151 unsigned int num_purged_pages
;
5152 vm_page_t local_freeq
;
5153 unsigned long local_freed
;
5154 int purge_loop_quota
;
5155 /* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */
5156 #define PURGE_BATCH_FREE_LIMIT 50
5157 /* release page queues lock every PURGE_LOOP_QUOTA iterations */
5158 #define PURGE_LOOP_QUOTA 100
5160 num_purged_pages
= 0;
5161 if (object
->purgable
== VM_PURGABLE_DENY
)
5162 return num_purged_pages
;
5164 assert(object
->purgable
!= VM_PURGABLE_NONVOLATILE
);
5165 object
->purgable
= VM_PURGABLE_EMPTY
;
5167 assert(object
->copy
== VM_OBJECT_NULL
);
5168 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5169 purge_loop_quota
= PURGE_LOOP_QUOTA
;
5171 local_freeq
= VM_PAGE_NULL
;
5175 * Go through the object's resident pages and try and discard them.
5177 next
= (vm_page_t
)queue_first(&object
->memq
);
5178 while (!queue_end(&object
->memq
, (queue_entry_t
)next
)) {
5180 next
= (vm_page_t
)queue_next(&next
->listq
);
5182 if (purge_loop_quota
-- == 0) {
5184 * Avoid holding the page queues lock for too long.
5185 * Let someone else take it for a while if needed.
5186 * Keep holding the object's lock to guarantee that
5187 * the object's page list doesn't change under us
5190 if (local_freeq
!= VM_PAGE_NULL
) {
5192 * Flush our queue of pages to free.
5194 vm_page_free_list(local_freeq
);
5195 local_freeq
= VM_PAGE_NULL
;
5198 mutex_yield(&vm_page_queue_lock
);
5200 /* resume with the current page and a new quota */
5201 purge_loop_quota
= PURGE_LOOP_QUOTA
;
5205 if (p
->busy
|| p
->cleaning
|| p
->laundry
||
5206 p
->list_req_pending
) {
5207 /* page is being acted upon, so don't mess with it */
5210 if (p
->wire_count
) {
5211 /* don't discard a wired page */
5215 assert(!p
->laundry
);
5216 assert(p
->object
!= kernel_object
);
5218 /* we can discard this page */
5220 /* advertize that this page is in a transition state */
5223 if (p
->pmapped
== TRUE
) {
5224 /* unmap the page */
5227 refmod_state
= pmap_disconnect(p
->phys_page
);
5228 if (refmod_state
& VM_MEM_MODIFIED
) {
5233 if (p
->dirty
|| p
->precious
) {
5234 /* we saved the cost of cleaning this page ! */
5236 vm_page_purged_count
++;
5239 vm_page_free_prepare(p
);
5241 /* ... and put it on our queue of pages to free */
5242 assert(p
->pageq
.next
== NULL
&&
5243 p
->pageq
.prev
== NULL
);
5244 p
->pageq
.next
= (queue_entry_t
) local_freeq
;
5246 if (++local_freed
>= PURGE_BATCH_FREE_LIMIT
) {
5247 /* flush our queue of pages to free */
5248 vm_page_free_list(local_freeq
);
5249 local_freeq
= VM_PAGE_NULL
;
5254 /* flush our local queue of pages to free one last time */
5255 if (local_freeq
!= VM_PAGE_NULL
) {
5256 vm_page_free_list(local_freeq
);
5257 local_freeq
= VM_PAGE_NULL
;
5261 return num_purged_pages
;
5265 * vm_object_purgeable_control() allows the caller to control and investigate the
5266 * state of a purgeable object. A purgeable object is created via a call to
5267 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
5268 * never be coalesced with any other object -- even other purgeable objects --
5269 * and will thus always remain a distinct object. A purgeable object has
5270 * special semantics when its reference count is exactly 1. If its reference
5271 * count is greater than 1, then a purgeable object will behave like a normal
5272 * object and attempts to use this interface will result in an error return
5273 * of KERN_INVALID_ARGUMENT.
5275 * A purgeable object may be put into a "volatile" state which will make the
5276 * object's pages elligable for being reclaimed without paging to backing
5277 * store if the system runs low on memory. If the pages in a volatile
5278 * purgeable object are reclaimed, the purgeable object is said to have been
5279 * "emptied." When a purgeable object is emptied the system will reclaim as
5280 * many pages from the object as it can in a convenient manner (pages already
5281 * en route to backing store or busy for other reasons are left as is). When
5282 * a purgeable object is made volatile, its pages will generally be reclaimed
5283 * before other pages in the application's working set. This semantic is
5284 * generally used by applications which can recreate the data in the object
5285 * faster than it can be paged in. One such example might be media assets
5286 * which can be reread from a much faster RAID volume.
5288 * A purgeable object may be designated as "non-volatile" which means it will
5289 * behave like all other objects in the system with pages being written to and
5290 * read from backing store as needed to satisfy system memory needs. If the
5291 * object was emptied before the object was made non-volatile, that fact will
5292 * be returned as the old state of the purgeable object (see
5293 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
5294 * were reclaimed as part of emptying the object will be refaulted in as
5295 * zero-fill on demand. It is up to the application to note that an object
5296 * was emptied and recreate the objects contents if necessary. When a
5297 * purgeable object is made non-volatile, its pages will generally not be paged
5298 * out to backing store in the immediate future. A purgeable object may also
5299 * be manually emptied.
5301 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
5302 * volatile purgeable object may be queried at any time. This information may
5303 * be used as a control input to let the application know when the system is
5304 * experiencing memory pressure and is reclaiming memory.
5306 * The specified address may be any address within the purgeable object. If
5307 * the specified address does not represent any object in the target task's
5308 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
5309 * object containing the specified address is not a purgeable object, then
5310 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
5313 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
5314 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
5315 * state is used to set the new state of the purgeable object and return its
5316 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
5317 * object is returned in the parameter state.
5319 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
5320 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
5321 * the non-volatile, volatile and volatile/empty states described above.
5322 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
5323 * immediately reclaim as many pages in the object as can be conveniently
5324 * collected (some may have already been written to backing store or be
5327 * The process of making a purgeable object non-volatile and determining its
5328 * previous state is atomic. Thus, if a purgeable object is made
5329 * VM_PURGABLE_NONVOLATILE and the old state is returned as
5330 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
5331 * completely intact and will remain so until the object is made volatile
5332 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
5333 * was reclaimed while it was in a volatile state and its previous contents
5337 * The object must be locked.
5340 vm_object_purgable_control(
5342 vm_purgable_t control
,
5348 if (object
== VM_OBJECT_NULL
) {
5350 * Object must already be present or it can't be purgeable.
5352 return KERN_INVALID_ARGUMENT
;
5356 * Get current state of the purgeable object.
5358 old_state
= object
->purgable
;
5359 if (old_state
== VM_PURGABLE_DENY
)
5360 return KERN_INVALID_ARGUMENT
;
5362 /* purgeable cant have delayed copies - now or in the future */
5363 assert(object
->copy
== VM_OBJECT_NULL
);
5364 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5367 * Execute the desired operation.
5369 if (control
== VM_PURGABLE_GET_STATE
) {
5371 return KERN_SUCCESS
;
5374 new_state
= *state
& VM_PURGABLE_STATE_MASK
;
5375 switch (new_state
) {
5376 case VM_PURGABLE_DENY
:
5377 case VM_PURGABLE_NONVOLATILE
:
5378 object
->purgable
= new_state
;
5380 if (old_state
!= VM_PURGABLE_NONVOLATILE
) {
5381 vm_page_lock_queues();
5382 assert(vm_page_purgeable_count
>=
5383 object
->resident_page_count
);
5384 vm_page_purgeable_count
-= object
->resident_page_count
;
5386 if (old_state
==VM_PURGABLE_VOLATILE
) {
5387 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
5388 purgeable_q_t queue
= vm_purgeable_object_remove(object
);
5391 vm_purgeable_token_delete_first(queue
);
5392 assert(queue
->debug_count_objects
>=0);
5394 vm_page_unlock_queues();
5398 case VM_PURGABLE_VOLATILE
:
5400 if ((old_state
!= VM_PURGABLE_NONVOLATILE
) && (old_state
!= VM_PURGABLE_VOLATILE
))
5402 purgeable_q_t queue
;
5404 /* find the correct queue */
5405 if ((*state
&VM_PURGABLE_ORDERING_MASK
) == VM_PURGABLE_ORDERING_OBSOLETE
)
5406 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
5408 if ((*state
&VM_PURGABLE_BEHAVIOR_MASK
) == VM_PURGABLE_BEHAVIOR_FIFO
)
5409 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
5411 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_LIFO
];
5414 if (old_state
== VM_PURGABLE_NONVOLATILE
) {
5415 /* try to add token... this can fail */
5416 vm_page_lock_queues();
5418 kern_return_t result
= vm_purgeable_token_add(queue
);
5419 if (result
!= KERN_SUCCESS
) {
5420 vm_page_unlock_queues();
5423 vm_page_purgeable_count
+= object
->resident_page_count
;
5425 vm_page_unlock_queues();
5427 object
->purgable
= new_state
;
5429 /* object should not be on a queue */
5430 assert(object
->objq
.next
== NULL
&& object
->objq
.prev
== NULL
);
5432 else if (old_state
== VM_PURGABLE_VOLATILE
) {
5434 * if reassigning priorities / purgeable groups, we don't change the
5435 * token queue. So moving priorities will not make pages stay around longer.
5436 * Reasoning is that the algorithm gives most priority to the most important
5437 * object. If a new token is added, the most important object' priority is boosted.
5438 * This biases the system already for purgeable queues that move a lot.
5439 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
5441 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
5443 purgeable_q_t old_queue
=vm_purgeable_object_remove(object
);
5446 if (old_queue
!= queue
) {
5447 kern_return_t result
;
5449 /* Changing queue. Have to move token. */
5450 vm_page_lock_queues();
5451 vm_purgeable_token_delete_first(old_queue
);
5452 result
= vm_purgeable_token_add(queue
);
5453 vm_page_unlock_queues();
5455 assert(result
==KERN_SUCCESS
); /* this should never fail since we just freed a token */
5458 vm_purgeable_object_add(object
, queue
, (*state
&VM_VOLATILE_GROUP_MASK
)>>VM_VOLATILE_GROUP_SHIFT
);
5460 assert(queue
->debug_count_objects
>=0);
5465 case VM_PURGABLE_EMPTY
:
5466 if (old_state
!= new_state
)
5468 assert(old_state
==VM_PURGABLE_NONVOLATILE
|| old_state
==VM_PURGABLE_VOLATILE
);
5469 if(old_state
==VM_PURGABLE_VOLATILE
) {
5470 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
5471 purgeable_q_t old_queue
=vm_purgeable_object_remove(object
);
5473 vm_page_lock_queues();
5474 vm_purgeable_token_delete_first(old_queue
);
5477 if (old_state
==VM_PURGABLE_NONVOLATILE
) {
5478 vm_page_purgeable_count
+= object
->resident_page_count
;
5479 vm_page_lock_queues();
5481 (void) vm_object_purge(object
);
5482 vm_page_unlock_queues();
5489 return KERN_SUCCESS
;
5494 * vm_object_res_deallocate
5496 * (recursively) decrement residence counts on vm objects and their shadows.
5497 * Called from vm_object_deallocate and when swapping out an object.
5499 * The object is locked, and remains locked throughout the function,
5500 * even as we iterate down the shadow chain. Locks on intermediate objects
5501 * will be dropped, but not the original object.
5503 * NOTE: this function used to use recursion, rather than iteration.
5506 __private_extern__
void
5507 vm_object_res_deallocate(
5510 vm_object_t orig_object
= object
;
5512 * Object is locked so it can be called directly
5513 * from vm_object_deallocate. Original object is never
5516 assert(object
->res_count
> 0);
5517 while (--object
->res_count
== 0) {
5518 assert(object
->ref_count
>= object
->res_count
);
5519 vm_object_deactivate_all_pages(object
);
5520 /* iterate on shadow, if present */
5521 if (object
->shadow
!= VM_OBJECT_NULL
) {
5522 vm_object_t tmp_object
= object
->shadow
;
5523 vm_object_lock(tmp_object
);
5524 if (object
!= orig_object
)
5525 vm_object_unlock(object
);
5526 object
= tmp_object
;
5527 assert(object
->res_count
> 0);
5531 if (object
!= orig_object
)
5532 vm_object_unlock(object
);
5536 * vm_object_res_reference
5538 * Internal function to increment residence count on a vm object
5539 * and its shadows. It is called only from vm_object_reference, and
5540 * when swapping in a vm object, via vm_map_swap.
5542 * The object is locked, and remains locked throughout the function,
5543 * even as we iterate down the shadow chain. Locks on intermediate objects
5544 * will be dropped, but not the original object.
5546 * NOTE: this function used to use recursion, rather than iteration.
5549 __private_extern__
void
5550 vm_object_res_reference(
5553 vm_object_t orig_object
= object
;
5555 * Object is locked, so this can be called directly
5556 * from vm_object_reference. This lock is never released.
5558 while ((++object
->res_count
== 1) &&
5559 (object
->shadow
!= VM_OBJECT_NULL
)) {
5560 vm_object_t tmp_object
= object
->shadow
;
5562 assert(object
->ref_count
>= object
->res_count
);
5563 vm_object_lock(tmp_object
);
5564 if (object
!= orig_object
)
5565 vm_object_unlock(object
);
5566 object
= tmp_object
;
5568 if (object
!= orig_object
)
5569 vm_object_unlock(object
);
5570 assert(orig_object
->ref_count
>= orig_object
->res_count
);
5572 #endif /* TASK_SWAPPER */
5575 * vm_object_reference:
5577 * Gets another reference to the given object.
5579 #ifdef vm_object_reference
5580 #undef vm_object_reference
5582 __private_extern__
void
5583 vm_object_reference(
5584 register vm_object_t object
)
5586 if (object
== VM_OBJECT_NULL
)
5589 vm_object_lock(object
);
5590 assert(object
->ref_count
> 0);
5591 vm_object_reference_locked(object
);
5592 vm_object_unlock(object
);
5597 * Scale the vm_object_cache
5598 * This is required to make sure that the vm_object_cache is big
5599 * enough to effectively cache the mapped file.
5600 * This is really important with UBC as all the regular file vnodes
5601 * have memory object associated with them. Havving this cache too
5602 * small results in rapid reclaim of vnodes and hurts performance a LOT!
5604 * This is also needed as number of vnodes can be dynamically scaled.
5607 adjust_vm_object_cache(
5608 __unused vm_size_t oval
,
5611 vm_object_cached_max
= nval
;
5612 vm_object_cache_trim(FALSE
);
5613 return (KERN_SUCCESS
);
5615 #endif /* MACH_BSD */
5619 * vm_object_transpose
5621 * This routine takes two VM objects of the same size and exchanges
5622 * their backing store.
5623 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
5624 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
5626 * The VM objects must not be locked by caller.
5629 vm_object_transpose(
5630 vm_object_t object1
,
5631 vm_object_t object2
,
5632 vm_object_size_t transpose_size
)
5634 vm_object_t tmp_object
;
5635 kern_return_t retval
;
5636 boolean_t object1_locked
, object2_locked
;
5637 boolean_t object1_paging
, object2_paging
;
5639 vm_object_offset_t page_offset
;
5641 tmp_object
= VM_OBJECT_NULL
;
5642 object1_locked
= FALSE
; object2_locked
= FALSE
;
5643 object1_paging
= FALSE
; object2_paging
= FALSE
;
5645 if (object1
== object2
||
5646 object1
== VM_OBJECT_NULL
||
5647 object2
== VM_OBJECT_NULL
) {
5649 * If the 2 VM objects are the same, there's
5650 * no point in exchanging their backing store.
5652 retval
= KERN_INVALID_VALUE
;
5656 vm_object_lock(object1
);
5657 object1_locked
= TRUE
;
5658 if (!object1
->alive
|| object1
->terminating
||
5659 object1
->copy
|| object1
->shadow
|| object1
->shadowed
||
5660 object1
->purgable
!= VM_PURGABLE_DENY
) {
5662 * We don't deal with copy or shadow objects (yet).
5664 retval
= KERN_INVALID_VALUE
;
5668 * Since we're about to mess with the object's backing store,
5669 * mark it as "paging_in_progress". Note that this is not enough
5670 * to prevent any paging activity on this object, so the caller should
5671 * have "quiesced" the objects beforehand, via a UPL operation with
5672 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
5673 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
5675 vm_object_paging_begin(object1
);
5676 object1_paging
= TRUE
;
5677 vm_object_unlock(object1
);
5678 object1_locked
= FALSE
;
5681 * Same as above for the 2nd object...
5683 vm_object_lock(object2
);
5684 object2_locked
= TRUE
;
5685 if (! object2
->alive
|| object2
->terminating
||
5686 object2
->copy
|| object2
->shadow
|| object2
->shadowed
||
5687 object2
->purgable
!= VM_PURGABLE_DENY
) {
5688 retval
= KERN_INVALID_VALUE
;
5691 vm_object_paging_begin(object2
);
5692 object2_paging
= TRUE
;
5693 vm_object_unlock(object2
);
5694 object2_locked
= FALSE
;
5697 * Allocate a temporary VM object to hold object1's contents
5698 * while we copy object2 to object1.
5700 tmp_object
= vm_object_allocate(transpose_size
);
5701 vm_object_lock(tmp_object
);
5702 vm_object_paging_begin(tmp_object
);
5703 tmp_object
->can_persist
= FALSE
;
5706 * Since we need to lock both objects at the same time,
5707 * make sure we always lock them in the same order to
5710 if (object1
< object2
) {
5711 vm_object_lock(object1
);
5712 vm_object_lock(object2
);
5714 vm_object_lock(object2
);
5715 vm_object_lock(object1
);
5717 object1_locked
= TRUE
;
5718 object2_locked
= TRUE
;
5720 if (object1
->size
!= object2
->size
||
5721 object1
->size
!= transpose_size
) {
5723 * If the 2 objects don't have the same size, we can't
5724 * exchange their backing stores or one would overflow.
5725 * If their size doesn't match the caller's
5726 * "transpose_size", we can't do it either because the
5727 * transpose operation will affect the entire span of
5730 retval
= KERN_INVALID_VALUE
;
5736 * Transpose the lists of resident pages.
5737 * This also updates the resident_page_count and the memq_hint.
5739 if (object1
->phys_contiguous
|| queue_empty(&object1
->memq
)) {
5741 * No pages in object1, just transfer pages
5742 * from object2 to object1. No need to go through
5743 * an intermediate object.
5745 while (!queue_empty(&object2
->memq
)) {
5746 page
= (vm_page_t
) queue_first(&object2
->memq
);
5747 vm_page_rename(page
, object1
, page
->offset
, FALSE
);
5749 assert(queue_empty(&object2
->memq
));
5750 } else if (object2
->phys_contiguous
|| queue_empty(&object2
->memq
)) {
5752 * No pages in object2, just transfer pages
5753 * from object1 to object2. No need to go through
5754 * an intermediate object.
5756 while (!queue_empty(&object1
->memq
)) {
5757 page
= (vm_page_t
) queue_first(&object1
->memq
);
5758 vm_page_rename(page
, object2
, page
->offset
, FALSE
);
5760 assert(queue_empty(&object1
->memq
));
5762 /* transfer object1's pages to tmp_object */
5763 vm_page_lock_queues();
5764 while (!queue_empty(&object1
->memq
)) {
5765 page
= (vm_page_t
) queue_first(&object1
->memq
);
5766 page_offset
= page
->offset
;
5767 vm_page_remove(page
);
5768 page
->offset
= page_offset
;
5769 queue_enter(&tmp_object
->memq
, page
, vm_page_t
, listq
);
5771 vm_page_unlock_queues();
5772 assert(queue_empty(&object1
->memq
));
5773 /* transfer object2's pages to object1 */
5774 while (!queue_empty(&object2
->memq
)) {
5775 page
= (vm_page_t
) queue_first(&object2
->memq
);
5776 vm_page_rename(page
, object1
, page
->offset
, FALSE
);
5778 assert(queue_empty(&object2
->memq
));
5779 /* transfer tmp_object's pages to object1 */
5780 while (!queue_empty(&tmp_object
->memq
)) {
5781 page
= (vm_page_t
) queue_first(&tmp_object
->memq
);
5782 queue_remove(&tmp_object
->memq
, page
,
5784 vm_page_insert(page
, object2
, page
->offset
);
5786 assert(queue_empty(&tmp_object
->memq
));
5789 #define __TRANSPOSE_FIELD(field) \
5791 tmp_object->field = object1->field; \
5792 object1->field = object2->field; \
5793 object2->field = tmp_object->field; \
5796 /* "size" should be identical */
5797 assert(object1
->size
== object2
->size
);
5798 /* "Lock" refers to the object not its contents */
5799 /* "ref_count" refers to the object not its contents */
5801 /* "res_count" refers to the object not its contents */
5803 /* "resident_page_count" was updated above when transposing pages */
5804 /* there should be no "copy" */
5805 assert(!object1
->copy
);
5806 assert(!object2
->copy
);
5807 /* there should be no "shadow" */
5808 assert(!object1
->shadow
);
5809 assert(!object2
->shadow
);
5810 __TRANSPOSE_FIELD(shadow_offset
); /* used by phys_contiguous objects */
5811 __TRANSPOSE_FIELD(pager
);
5812 __TRANSPOSE_FIELD(paging_offset
);
5813 __TRANSPOSE_FIELD(pager_control
);
5814 /* update the memory_objects' pointers back to the VM objects */
5815 if (object1
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5816 memory_object_control_collapse(object1
->pager_control
,
5819 if (object2
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
5820 memory_object_control_collapse(object2
->pager_control
,
5823 __TRANSPOSE_FIELD(copy_strategy
);
5824 /* "paging_in_progress" refers to the object not its contents */
5825 assert(object1
->paging_in_progress
);
5826 assert(object2
->paging_in_progress
);
5827 /* "all_wanted" refers to the object not its contents */
5828 __TRANSPOSE_FIELD(pager_created
);
5829 __TRANSPOSE_FIELD(pager_initialized
);
5830 __TRANSPOSE_FIELD(pager_ready
);
5831 __TRANSPOSE_FIELD(pager_trusted
);
5832 __TRANSPOSE_FIELD(can_persist
);
5833 __TRANSPOSE_FIELD(internal
);
5834 __TRANSPOSE_FIELD(temporary
);
5835 __TRANSPOSE_FIELD(private);
5836 __TRANSPOSE_FIELD(pageout
);
5837 /* "alive" should be set */
5838 assert(object1
->alive
);
5839 assert(object2
->alive
);
5840 /* "purgeable" should be non-purgeable */
5841 assert(object1
->purgable
== VM_PURGABLE_DENY
);
5842 assert(object2
->purgable
== VM_PURGABLE_DENY
);
5843 /* "shadowed" refers to the the object not its contents */
5844 __TRANSPOSE_FIELD(silent_overwrite
);
5845 __TRANSPOSE_FIELD(advisory_pageout
);
5846 __TRANSPOSE_FIELD(true_share
);
5847 /* "terminating" should not be set */
5848 assert(!object1
->terminating
);
5849 assert(!object2
->terminating
);
5850 __TRANSPOSE_FIELD(named
);
5851 /* "shadow_severed" refers to the object not its contents */
5852 __TRANSPOSE_FIELD(phys_contiguous
);
5853 __TRANSPOSE_FIELD(nophyscache
);
5854 /* "cached_list" should be NULL */
5855 assert(object1
->cached_list
.prev
== NULL
);
5856 assert(object1
->cached_list
.next
== NULL
);
5857 assert(object2
->cached_list
.prev
== NULL
);
5858 assert(object2
->cached_list
.next
== NULL
);
5859 /* "msr_q" is linked to the object not its contents */
5860 assert(queue_empty(&object1
->msr_q
));
5861 assert(queue_empty(&object2
->msr_q
));
5862 __TRANSPOSE_FIELD(last_alloc
);
5863 __TRANSPOSE_FIELD(sequential
);
5864 __TRANSPOSE_FIELD(pages_created
);
5865 __TRANSPOSE_FIELD(pages_used
);
5867 __TRANSPOSE_FIELD(existence_map
);
5869 __TRANSPOSE_FIELD(cow_hint
);
5871 __TRANSPOSE_FIELD(paging_object
);
5873 __TRANSPOSE_FIELD(wimg_bits
);
5874 __TRANSPOSE_FIELD(code_signed
);
5875 __TRANSPOSE_FIELD(not_in_use
);
5877 /* "uplq" refers to the object not its contents (see upl_transpose()) */
5880 #undef __TRANSPOSE_FIELD
5882 retval
= KERN_SUCCESS
;
5888 if (tmp_object
!= VM_OBJECT_NULL
) {
5889 vm_object_paging_end(tmp_object
);
5890 vm_object_unlock(tmp_object
);
5892 * Re-initialize the temporary object to avoid
5893 * deallocating a real pager.
5895 _vm_object_allocate(transpose_size
, tmp_object
);
5896 vm_object_deallocate(tmp_object
);
5897 tmp_object
= VM_OBJECT_NULL
;
5900 if (object1_locked
) {
5901 vm_object_unlock(object1
);
5902 object1_locked
= FALSE
;
5904 if (object2_locked
) {
5905 vm_object_unlock(object2
);
5906 object2_locked
= FALSE
;
5908 if (object1_paging
) {
5909 vm_object_lock(object1
);
5910 vm_object_paging_end(object1
);
5911 vm_object_unlock(object1
);
5912 object1_paging
= FALSE
;
5914 if (object2_paging
) {
5915 vm_object_lock(object2
);
5916 vm_object_paging_end(object2
);
5917 vm_object_unlock(object2
);
5918 object2_paging
= FALSE
;
5926 * vm_object_build_cluster
5928 * Determine how big a cluster we should issue an I/O for...
5930 * Inputs: *start == offset of page needed
5931 * *length == maximum cluster pager can handle
5932 * Outputs: *start == beginning offset of cluster
5933 * *length == length of cluster to try
5935 * The original *start will be encompassed by the cluster
5938 extern int speculative_reads_disabled
;
5940 uint32_t pre_heat_scaling
[MAX_UPL_TRANSFER
];
5941 uint32_t pre_heat_cluster
[MAX_UPL_TRANSFER
];
5943 #define PRE_HEAT_MULTIPLIER 4
5945 __private_extern__
void
5946 vm_object_cluster_size(vm_object_t object
, vm_object_offset_t
*start
,
5947 vm_size_t
*length
, vm_object_fault_info_t fault_info
)
5949 vm_size_t pre_heat_size
;
5950 vm_size_t tail_size
;
5951 vm_size_t head_size
;
5952 vm_size_t max_length
;
5953 vm_size_t cluster_size
;
5954 vm_object_offset_t object_size
;
5955 vm_object_offset_t orig_start
;
5956 vm_object_offset_t target_start
;
5957 vm_object_offset_t offset
;
5958 vm_behavior_t behavior
;
5959 boolean_t look_behind
= TRUE
;
5960 boolean_t look_ahead
= TRUE
;
5962 int sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
5964 assert( !(*length
& PAGE_MASK
));
5965 assert( !(*start
& PAGE_MASK_64
));
5967 if ( (max_length
= *length
) > (MAX_UPL_TRANSFER
* PAGE_SIZE
) )
5968 max_length
= (MAX_UPL_TRANSFER
* PAGE_SIZE
);
5970 * we'll always return a cluster size of at least
5971 * 1 page, since the original fault must always
5974 *length
= PAGE_SIZE
;
5976 if (speculative_reads_disabled
|| fault_info
== NULL
|| max_length
== 0) {
5978 * no cluster... just fault the page in
5982 orig_start
= *start
;
5983 target_start
= orig_start
;
5984 cluster_size
= round_page_32(fault_info
->cluster_size
);
5985 behavior
= fault_info
->behavior
;
5987 vm_object_lock(object
);
5989 if (object
->internal
)
5990 object_size
= object
->size
;
5991 else if (object
->pager
!= MEMORY_OBJECT_NULL
)
5992 vnode_pager_get_object_size(object
->pager
, &object_size
);
5994 goto out
; /* pager is gone for this object, nothing more to do */
5996 object_size
= round_page_64(object_size
);
5998 if (orig_start
>= object_size
) {
6000 * fault occurred beyond the EOF...
6001 * we need to punt w/o changing the
6006 if (object
->pages_used
> object
->pages_created
) {
6008 * must have wrapped our 32 bit counters
6011 object
->pages_used
= object
->pages_created
= 0;
6013 if ((sequential_run
= object
->sequential
)) {
6014 if (sequential_run
< 0) {
6015 sequential_behavior
= VM_BEHAVIOR_RSEQNTL
;
6016 sequential_run
= 0 - sequential_run
;
6018 sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
6024 behavior
= VM_BEHAVIOR_DEFAULT
;
6026 case VM_BEHAVIOR_DEFAULT
:
6027 if (object
->internal
&& fault_info
->user_tag
== VM_MEMORY_STACK
)
6030 if (sequential_run
>= (3 * PAGE_SIZE
)) {
6031 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6033 if ((behavior
= sequential_behavior
) == VM_BEHAVIOR_SEQUENTIAL
)
6034 look_behind
= FALSE
;
6038 uint32_t pages_unused
;
6040 if (object
->pages_created
< 32 * PRE_HEAT_MULTIPLIER
) {
6044 pre_heat_size
= PAGE_SIZE
* 8 * PRE_HEAT_MULTIPLIER
;
6047 pages_unused
= object
->pages_created
- object
->pages_used
;
6049 if (pages_unused
< (object
->pages_created
/ 8)) {
6050 pre_heat_size
= PAGE_SIZE
* 32 * PRE_HEAT_MULTIPLIER
;
6051 } else if (pages_unused
< (object
->pages_created
/ 4)) {
6052 pre_heat_size
= PAGE_SIZE
* 16 * PRE_HEAT_MULTIPLIER
;
6053 } else if (pages_unused
< (object
->pages_created
/ 2)) {
6054 pre_heat_size
= PAGE_SIZE
* 8 * PRE_HEAT_MULTIPLIER
;
6056 pre_heat_size
= PAGE_SIZE
* 4 * PRE_HEAT_MULTIPLIER
;
6061 case VM_BEHAVIOR_RANDOM
:
6062 if ((pre_heat_size
= cluster_size
) <= PAGE_SIZE
)
6066 case VM_BEHAVIOR_SEQUENTIAL
:
6067 if ((pre_heat_size
= cluster_size
) == 0)
6068 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6069 look_behind
= FALSE
;
6073 case VM_BEHAVIOR_RSEQNTL
:
6074 if ((pre_heat_size
= cluster_size
) == 0)
6075 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6081 if (pre_heat_size
> max_length
)
6082 pre_heat_size
= max_length
;
6084 if (behavior
== VM_BEHAVIOR_DEFAULT
&& vm_page_free_count
< vm_page_free_target
)
6087 if (look_ahead
== TRUE
) {
6088 if (look_behind
== TRUE
)
6089 target_start
&= ~(pre_heat_size
- 1);
6091 if ((target_start
+ pre_heat_size
) > object_size
)
6092 pre_heat_size
= (vm_size_t
)(trunc_page_64(object_size
- target_start
));
6094 tail_size
= pre_heat_size
- (orig_start
- target_start
) - PAGE_SIZE
;
6096 if (pre_heat_size
> target_start
)
6097 pre_heat_size
= target_start
;
6100 pre_heat_scaling
[pre_heat_size
/ PAGE_SIZE
]++;
6102 if (pre_heat_size
<= PAGE_SIZE
)
6105 if (look_behind
== TRUE
) {
6107 * take a look at the pages before the original
6110 head_size
= pre_heat_size
- tail_size
- PAGE_SIZE
;
6112 for (offset
= orig_start
- PAGE_SIZE_64
; head_size
; offset
-= PAGE_SIZE_64
, head_size
-= PAGE_SIZE
) {
6114 * don't poke below the lowest offset
6116 if (offset
< fault_info
->lo_offset
)
6119 * for external objects and internal objects w/o an existence map
6120 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6123 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
6125 * we know for a fact that the pager can't provide the page
6126 * so don't include it or any pages beyond it in this cluster
6131 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
6133 * don't bridge resident pages
6138 *length
+= PAGE_SIZE
;
6141 if (look_ahead
== TRUE
) {
6142 for (offset
= orig_start
+ PAGE_SIZE_64
; tail_size
; offset
+= PAGE_SIZE_64
, tail_size
-= PAGE_SIZE
) {
6144 * don't poke above the highest offset
6146 if (offset
>= fault_info
->hi_offset
)
6149 * for external objects and internal objects w/o an existence map
6150 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN
6153 if (vm_external_state_get(object
->existence_map
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
6155 * we know for a fact that the pager can't provide the page
6156 * so don't include it or any pages beyond it in this cluster
6161 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
6163 * don't bridge resident pages
6167 *length
+= PAGE_SIZE
;
6171 pre_heat_cluster
[*length
/ PAGE_SIZE
]++;
6173 vm_object_unlock(object
);
6178 * Allow manipulation of individual page state. This is actually part of
6179 * the UPL regimen but takes place on the VM object rather than on a UPL
6185 vm_object_offset_t offset
,
6187 ppnum_t
*phys_entry
,
6192 vm_object_lock(object
);
6194 if(ops
& UPL_POP_PHYSICAL
) {
6195 if(object
->phys_contiguous
) {
6197 *phys_entry
= (ppnum_t
)
6198 (object
->shadow_offset
>> 12);
6200 vm_object_unlock(object
);
6201 return KERN_SUCCESS
;
6203 vm_object_unlock(object
);
6204 return KERN_INVALID_OBJECT
;
6207 if(object
->phys_contiguous
) {
6208 vm_object_unlock(object
);
6209 return KERN_INVALID_OBJECT
;
6213 if((dst_page
= vm_page_lookup(object
,offset
)) == VM_PAGE_NULL
) {
6214 vm_object_unlock(object
);
6215 return KERN_FAILURE
;
6218 /* Sync up on getting the busy bit */
6219 if((dst_page
->busy
|| dst_page
->cleaning
) &&
6220 (((ops
& UPL_POP_SET
) &&
6221 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
6222 /* someone else is playing with the page, we will */
6224 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
6228 if (ops
& UPL_POP_DUMP
) {
6229 if (dst_page
->pmapped
== TRUE
)
6230 pmap_disconnect(dst_page
->phys_page
);
6232 vm_page_lock_queues();
6233 vm_page_free(dst_page
);
6234 vm_page_unlock_queues();
6242 /* Get the condition of flags before requested ops */
6243 /* are undertaken */
6245 if(dst_page
->dirty
) *flags
|= UPL_POP_DIRTY
;
6246 if(dst_page
->pageout
) *flags
|= UPL_POP_PAGEOUT
;
6247 if(dst_page
->precious
) *flags
|= UPL_POP_PRECIOUS
;
6248 if(dst_page
->absent
) *flags
|= UPL_POP_ABSENT
;
6249 if(dst_page
->busy
) *flags
|= UPL_POP_BUSY
;
6252 /* The caller should have made a call either contingent with */
6253 /* or prior to this call to set UPL_POP_BUSY */
6254 if(ops
& UPL_POP_SET
) {
6255 /* The protection granted with this assert will */
6256 /* not be complete. If the caller violates the */
6257 /* convention and attempts to change page state */
6258 /* without first setting busy we may not see it */
6259 /* because the page may already be busy. However */
6260 /* if such violations occur we will assert sooner */
6262 assert(dst_page
->busy
|| (ops
& UPL_POP_BUSY
));
6263 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= TRUE
;
6264 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= TRUE
;
6265 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= TRUE
;
6266 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= TRUE
;
6267 if (ops
& UPL_POP_BUSY
) dst_page
->busy
= TRUE
;
6270 if(ops
& UPL_POP_CLR
) {
6271 assert(dst_page
->busy
);
6272 if (ops
& UPL_POP_DIRTY
) dst_page
->dirty
= FALSE
;
6273 if (ops
& UPL_POP_PAGEOUT
) dst_page
->pageout
= FALSE
;
6274 if (ops
& UPL_POP_PRECIOUS
) dst_page
->precious
= FALSE
;
6275 if (ops
& UPL_POP_ABSENT
) dst_page
->absent
= FALSE
;
6276 if (ops
& UPL_POP_BUSY
) {
6277 dst_page
->busy
= FALSE
;
6278 PAGE_WAKEUP(dst_page
);
6282 if (dst_page
->encrypted
) {
6285 * We need to decrypt this encrypted page before the
6286 * caller can access its contents.
6287 * But if the caller really wants to access the page's
6288 * contents, they have to keep the page "busy".
6289 * Otherwise, the page could get recycled or re-encrypted
6292 if ((ops
& UPL_POP_SET
) && (ops
& UPL_POP_BUSY
) &&
6295 * The page is stable enough to be accessed by
6296 * the caller, so make sure its contents are
6299 vm_page_decrypt(dst_page
, 0);
6302 * The page is not busy, so don't bother
6303 * decrypting it, since anything could
6304 * happen to it between now and when the
6305 * caller wants to access it.
6306 * We should not give the caller access
6309 assert(!phys_entry
);
6315 * The physical page number will remain valid
6316 * only if the page is kept busy.
6317 * ENCRYPTED SWAP: make sure we don't let the
6318 * caller access an encrypted page.
6320 assert(dst_page
->busy
);
6321 assert(!dst_page
->encrypted
);
6322 *phys_entry
= dst_page
->phys_page
;
6328 vm_object_unlock(object
);
6329 return KERN_SUCCESS
;
6334 * vm_object_range_op offers performance enhancement over
6335 * vm_object_page_op for page_op functions which do not require page
6336 * level state to be returned from the call. Page_op was created to provide
6337 * a low-cost alternative to page manipulation via UPLs when only a single
6338 * page was involved. The range_op call establishes the ability in the _op
6339 * family of functions to work on multiple pages where the lack of page level
6340 * state handling allows the caller to avoid the overhead of the upl structures.
6346 vm_object_offset_t offset_beg
,
6347 vm_object_offset_t offset_end
,
6351 vm_object_offset_t offset
;
6354 if (object
->resident_page_count
== 0) {
6356 if (ops
& UPL_ROP_PRESENT
)
6359 *range
= offset_end
- offset_beg
;
6361 return KERN_SUCCESS
;
6363 vm_object_lock(object
);
6365 if (object
->phys_contiguous
) {
6366 vm_object_unlock(object
);
6367 return KERN_INVALID_OBJECT
;
6370 offset
= offset_beg
& ~PAGE_MASK_64
;
6372 while (offset
< offset_end
) {
6373 dst_page
= vm_page_lookup(object
, offset
);
6374 if (dst_page
!= VM_PAGE_NULL
) {
6375 if (ops
& UPL_ROP_DUMP
) {
6376 if (dst_page
->busy
|| dst_page
->cleaning
) {
6378 * someone else is playing with the
6379 * page, we will have to wait
6381 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
6383 * need to relook the page up since it's
6384 * state may have changed while we slept
6385 * it might even belong to a different object
6390 if (dst_page
->pmapped
== TRUE
)
6391 pmap_disconnect(dst_page
->phys_page
);
6393 vm_page_lock_queues();
6394 vm_page_free(dst_page
);
6395 vm_page_unlock_queues();
6397 } else if (ops
& UPL_ROP_ABSENT
)
6399 } else if (ops
& UPL_ROP_PRESENT
)
6402 offset
+= PAGE_SIZE
;
6404 vm_object_unlock(object
);
6407 if (offset
> offset_end
)
6408 offset
= offset_end
;
6409 if(offset
> offset_beg
)
6410 *range
= offset
- offset_beg
;
6413 return KERN_SUCCESS
;
6417 uint32_t scan_object_collision
= 0;
6420 vm_object_lock(vm_object_t object
)
6422 if (object
== vm_pageout_scan_wants_object
) {
6423 scan_object_collision
++;
6426 lck_rw_lock_exclusive(&object
->Lock
);
6430 vm_object_lock_try(vm_object_t object
)
6432 if (object
== vm_pageout_scan_wants_object
) {
6433 scan_object_collision
++;
6436 return (lck_rw_try_lock_exclusive(&object
->Lock
));
6440 vm_object_lock_shared(vm_object_t object
)
6442 if (object
== vm_pageout_scan_wants_object
) {
6443 scan_object_collision
++;
6446 lck_rw_lock_shared(&object
->Lock
);
6450 vm_object_lock_try_shared(vm_object_t object
)
6452 if (object
== vm_pageout_scan_wants_object
) {
6453 scan_object_collision
++;
6456 return (lck_rw_try_lock_shared(&object
->Lock
));