2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * File: vm/vm_object.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Virtual memory object module.
66 #include <mach_pagemap.h>
67 #include <task_swapper.h>
69 #include <mach/mach_types.h>
70 #include <mach/memory_object.h>
71 #include <mach/memory_object_default.h>
72 #include <mach/memory_object_control_server.h>
73 #include <mach/vm_param.h>
77 #include <ipc/ipc_types.h>
78 #include <ipc/ipc_port.h>
80 #include <kern/kern_types.h>
81 #include <kern/assert.h>
82 #include <kern/queue.h>
83 #include <kern/kalloc.h>
84 #include <kern/zalloc.h>
85 #include <kern/host.h>
86 #include <kern/host_statistics.h>
87 #include <kern/processor.h>
88 #include <kern/misc_protos.h>
89 #include <kern/policy_internal.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_compressor_pager.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <vm/vm_object.h>
96 #include <vm/vm_page.h>
97 #include <vm/vm_pageout.h>
98 #include <vm/vm_protos.h>
99 #include <vm/vm_purgeable_internal.h>
101 #include <vm/vm_compressor.h>
103 #if CONFIG_PHANTOM_CACHE
104 #include <vm/vm_phantom_cache.h>
107 #if VM_OBJECT_ACCESS_TRACKING
108 uint64_t vm_object_access_tracking_reads
= 0;
109 uint64_t vm_object_access_tracking_writes
= 0;
110 #endif /* VM_OBJECT_ACCESS_TRACKING */
112 boolean_t vm_object_collapse_compressor_allowed
= TRUE
;
114 struct vm_counters vm_counters
;
116 #if DEVELOPMENT || DEBUG
117 extern struct memory_object_pager_ops shared_region_pager_ops
;
118 extern unsigned int shared_region_pagers_resident_count
;
119 extern unsigned int shared_region_pagers_resident_peak
;
120 #endif /* DEVELOPMENT || DEBUG */
122 #if VM_OBJECT_TRACKING
123 boolean_t vm_object_tracking_inited
= FALSE
;
124 btlog_t
*vm_object_tracking_btlog
;
127 vm_object_tracking_init(void)
129 int vm_object_tracking
;
131 vm_object_tracking
= 1;
132 PE_parse_boot_argn("vm_object_tracking", &vm_object_tracking
,
133 sizeof(vm_object_tracking
));
135 if (vm_object_tracking
) {
136 vm_object_tracking_btlog
= btlog_create(
137 VM_OBJECT_TRACKING_NUM_RECORDS
,
138 VM_OBJECT_TRACKING_BTDEPTH
,
139 TRUE
/* caller_will_remove_entries_for_element? */);
140 assert(vm_object_tracking_btlog
);
141 vm_object_tracking_inited
= TRUE
;
144 #endif /* VM_OBJECT_TRACKING */
147 * Virtual memory objects maintain the actual data
148 * associated with allocated virtual memory. A given
149 * page of memory exists within exactly one object.
151 * An object is only deallocated when all "references"
154 * Associated with each object is a list of all resident
155 * memory pages belonging to that object; this list is
156 * maintained by the "vm_page" module, but locked by the object's
159 * Each object also records the memory object reference
160 * that is used by the kernel to request and write
161 * back data (the memory object, field "pager"), etc...
163 * Virtual memory objects are allocated to provide
164 * zero-filled memory (vm_allocate) or map a user-defined
165 * memory object into a virtual address space (vm_map).
167 * Virtual memory objects that refer to a user-defined
168 * memory object are called "permanent", because all changes
169 * made in virtual memory are reflected back to the
170 * memory manager, which may then store it permanently.
171 * Other virtual memory objects are called "temporary",
172 * meaning that changes need be written back only when
173 * necessary to reclaim pages, and that storage associated
174 * with the object can be discarded once it is no longer
177 * A permanent memory object may be mapped into more
178 * than one virtual address space. Moreover, two threads
179 * may attempt to make the first mapping of a memory
180 * object concurrently. Only one thread is allowed to
181 * complete this mapping; all others wait for the
182 * "pager_initialized" field is asserted, indicating
183 * that the first thread has initialized all of the
184 * necessary fields in the virtual memory object structure.
186 * The kernel relies on a *default memory manager* to
187 * provide backing storage for the zero-filled virtual
188 * memory objects. The pager memory objects associated
189 * with these temporary virtual memory objects are only
190 * requested from the default memory manager when it
191 * becomes necessary. Virtual memory objects
192 * that depend on the default memory manager are called
193 * "internal". The "pager_created" field is provided to
194 * indicate whether these ports have ever been allocated.
196 * The kernel may also create virtual memory objects to
197 * hold changed pages after a copy-on-write operation.
198 * In this case, the virtual memory object (and its
199 * backing storage -- its memory object) only contain
200 * those pages that have been changed. The "shadow"
201 * field refers to the virtual memory object that contains
202 * the remainder of the contents. The "shadow_offset"
203 * field indicates where in the "shadow" these contents begin.
204 * The "copy" field refers to a virtual memory object
205 * to which changed pages must be copied before changing
206 * this object, in order to implement another form
207 * of copy-on-write optimization.
209 * The virtual memory object structure also records
210 * the attributes associated with its memory object.
211 * The "pager_ready", "can_persist" and "copy_strategy"
212 * fields represent those attributes. The "cached_list"
213 * field is used in the implementation of the persistence
216 * ZZZ Continue this comment.
219 /* Forward declarations for internal functions. */
220 static kern_return_t
vm_object_terminate(
223 static kern_return_t
vm_object_copy_call(
224 vm_object_t src_object
,
225 vm_object_offset_t src_offset
,
226 vm_object_size_t size
,
227 vm_object_t
*_result_object
);
229 static void vm_object_do_collapse(
231 vm_object_t backing_object
);
233 static void vm_object_do_bypass(
235 vm_object_t backing_object
);
237 static void vm_object_release_pager(
238 memory_object_t pager
);
240 SECURITY_READ_ONLY_LATE(zone_t
) vm_object_zone
; /* vm backing store zone */
243 * All wired-down kernel memory belongs to a single virtual
244 * memory object (kernel_object) to avoid wasting data structures.
246 static struct vm_object kernel_object_store VM_PAGE_PACKED_ALIGNED
;
247 vm_object_t kernel_object
;
249 static struct vm_object compressor_object_store VM_PAGE_PACKED_ALIGNED
;
250 vm_object_t compressor_object
= &compressor_object_store
;
253 * The submap object is used as a placeholder for vm_map_submap
254 * operations. The object is declared in vm_map.c because it
255 * is exported by the vm_map module. The storage is declared
256 * here because it must be initialized here.
258 static struct vm_object vm_submap_object_store VM_PAGE_PACKED_ALIGNED
;
261 * Virtual memory objects are initialized from
262 * a template (see vm_object_allocate).
264 * When adding a new field to the virtual memory
265 * object structure, be sure to add initialization
266 * (see _vm_object_allocate()).
268 static const struct vm_object vm_object_template
= {
272 * The lock will be initialized for each allocated object in
273 * _vm_object_allocate(), so we don't need to initialize it in
274 * the vm_object_template.
276 #if DEVELOPMENT || DEBUG
280 .memq_hint
= VM_PAGE_NULL
,
284 #endif /* TASK_SWAPPER */
285 .resident_page_count
= 0,
286 .wired_page_count
= 0,
287 .reusable_page_count
= 0,
288 .copy
= VM_OBJECT_NULL
,
289 .shadow
= VM_OBJECT_NULL
,
290 .vo_shadow_offset
= (vm_object_offset_t
) 0,
291 .pager
= MEMORY_OBJECT_NULL
,
293 .pager_control
= MEMORY_OBJECT_CONTROL_NULL
,
294 .copy_strategy
= MEMORY_OBJECT_COPY_SYMMETRIC
,
295 .paging_in_progress
= 0,
297 .__object1_unused_bits
= 0,
298 #endif /* __LP64__ */
299 .activity_in_progress
= 0,
301 /* Begin bitfields */
302 .all_wanted
= 0, /* all bits FALSE */
303 .pager_created
= FALSE
,
304 .pager_initialized
= FALSE
,
305 .pager_ready
= FALSE
,
306 .pager_trusted
= FALSE
,
307 .can_persist
= FALSE
,
312 .purgable
= VM_PURGABLE_DENY
,
313 .purgeable_when_ripe
= FALSE
,
314 .purgeable_only_by_kernel
= FALSE
,
317 .terminating
= FALSE
,
319 .shadow_severed
= FALSE
,
320 .phys_contiguous
= FALSE
,
321 .nophyscache
= FALSE
,
324 .cached_list
.prev
= NULL
,
325 .cached_list
.next
= NULL
,
327 .last_alloc
= (vm_object_offset_t
) 0,
328 .sequential
= (vm_object_offset_t
) 0,
331 .scan_collisions
= 0,
332 #if CONFIG_PHANTOM_CACHE
333 .phantom_object_id
= 0,
335 .cow_hint
= ~(vm_offset_t
)0,
337 /* cache bitfields */
338 .wimg_bits
= VM_WIMG_USE_DEFAULT
,
339 .set_cache_attr
= FALSE
,
340 .object_is_shared_cache
= FALSE
,
341 .code_signed
= FALSE
,
343 .mapping_in_progress
= FALSE
,
344 .phantom_isssd
= FALSE
,
345 .volatile_empty
= FALSE
,
346 .volatile_fault
= FALSE
,
347 .all_reusable
= FALSE
,
348 .blocked_access
= FALSE
,
349 .vo_ledger_tag
= VM_LEDGER_TAG_NONE
,
350 .vo_no_footprint
= FALSE
,
351 #if CONFIG_IOSCHED || UPL_DEBUG
354 #endif /* UPL_DEBUG */
357 #endif /* VM_PIP_DEBUG */
361 .task_objq
.next
= NULL
,
362 .task_objq
.prev
= NULL
,
364 .purgeable_queue_type
= PURGEABLE_Q_TYPE_MAX
,
365 .purgeable_queue_group
= 0,
367 .wire_tag
= VM_KERN_MEMORY_NONE
,
368 #if !VM_TAG_ACTIVE_UPDATE
369 .wired_objq
.next
= NULL
,
370 .wired_objq
.prev
= NULL
,
371 #endif /* ! VM_TAG_ACTIVE_UPDATE */
373 .io_tracking
= FALSE
,
375 #if CONFIG_SECLUDED_MEMORY
376 .eligible_for_secluded
= FALSE
,
377 .can_grab_secluded
= FALSE
,
378 #else /* CONFIG_SECLUDED_MEMORY */
379 .__object3_unused_bits
= 0,
380 #endif /* CONFIG_SECLUDED_MEMORY */
382 #if VM_OBJECT_ACCESS_TRACKING
383 .access_tracking
= FALSE
,
384 .access_tracking_reads
= 0,
385 .access_tracking_writes
= 0,
386 #endif /* VM_OBJECT_ACCESS_TRACKING */
389 .purgeable_owner_bt
= {0},
390 .vo_purgeable_volatilizer
= NULL
,
391 .purgeable_volatilizer_bt
= {0},
395 LCK_GRP_DECLARE(vm_object_lck_grp
, "vm_object");
396 LCK_GRP_DECLARE(vm_object_cache_lck_grp
, "vm_object_cache");
397 LCK_ATTR_DECLARE(vm_object_lck_attr
, 0, 0);
398 LCK_ATTR_DECLARE(kernel_object_lck_attr
, 0, LCK_ATTR_DEBUG
);
399 LCK_ATTR_DECLARE(compressor_object_lck_attr
, 0, LCK_ATTR_DEBUG
);
401 unsigned int vm_page_purged_wired
= 0;
402 unsigned int vm_page_purged_busy
= 0;
403 unsigned int vm_page_purged_others
= 0;
405 static queue_head_t vm_object_cached_list
;
406 static uint32_t vm_object_cache_pages_freed
= 0;
407 static uint32_t vm_object_cache_pages_moved
= 0;
408 static uint32_t vm_object_cache_pages_skipped
= 0;
409 static uint32_t vm_object_cache_adds
= 0;
410 static uint32_t vm_object_cached_count
= 0;
411 static LCK_MTX_EARLY_DECLARE_ATTR(vm_object_cached_lock_data
,
412 &vm_object_cache_lck_grp
, &vm_object_lck_attr
);
414 static uint32_t vm_object_page_grab_failed
= 0;
415 static uint32_t vm_object_page_grab_skipped
= 0;
416 static uint32_t vm_object_page_grab_returned
= 0;
417 static uint32_t vm_object_page_grab_pmapped
= 0;
418 static uint32_t vm_object_page_grab_reactivations
= 0;
420 #define vm_object_cache_lock_spin() \
421 lck_mtx_lock_spin(&vm_object_cached_lock_data)
422 #define vm_object_cache_unlock() \
423 lck_mtx_unlock(&vm_object_cached_lock_data)
425 static void vm_object_cache_remove_locked(vm_object_t
);
428 static void vm_object_reap(vm_object_t object
);
429 static void vm_object_reap_async(vm_object_t object
);
430 static void vm_object_reaper_thread(void);
432 static LCK_MTX_EARLY_DECLARE_ATTR(vm_object_reaper_lock_data
,
433 &vm_object_lck_grp
, &vm_object_lck_attr
);
435 static queue_head_t vm_object_reaper_queue
; /* protected by vm_object_reaper_lock() */
436 unsigned int vm_object_reap_count
= 0;
437 unsigned int vm_object_reap_count_async
= 0;
439 #define vm_object_reaper_lock() \
440 lck_mtx_lock(&vm_object_reaper_lock_data)
441 #define vm_object_reaper_lock_spin() \
442 lck_mtx_lock_spin(&vm_object_reaper_lock_data)
443 #define vm_object_reaper_unlock() \
444 lck_mtx_unlock(&vm_object_reaper_lock_data)
447 /* I/O Re-prioritization request list */
448 queue_head_t io_reprioritize_list
= QUEUE_HEAD_INITIALIZER(io_reprioritize_list
);
450 LCK_SPIN_DECLARE_ATTR(io_reprioritize_list_lock
,
451 &vm_object_lck_grp
, &vm_object_lck_attr
);
453 #define IO_REPRIORITIZE_LIST_LOCK() \
454 lck_spin_lock_grp(&io_reprioritize_list_lock, &vm_object_lck_grp)
455 #define IO_REPRIORITIZE_LIST_UNLOCK() \
456 lck_spin_unlock(&io_reprioritize_list_lock)
458 #define MAX_IO_REPRIORITIZE_REQS 8192
459 ZONE_DECLARE(io_reprioritize_req_zone
, "io_reprioritize_req",
460 sizeof(struct io_reprioritize_req
), ZC_NOGC
);
462 /* I/O Re-prioritization thread */
463 int io_reprioritize_wakeup
= 0;
464 static void io_reprioritize_thread(void *param __unused
, wait_result_t wr __unused
);
466 #define IO_REPRIO_THREAD_WAKEUP() thread_wakeup((event_t)&io_reprioritize_wakeup)
467 #define IO_REPRIO_THREAD_CONTINUATION() \
469 assert_wait(&io_reprioritize_wakeup, THREAD_UNINT); \
470 thread_block(io_reprioritize_thread); \
473 void vm_page_request_reprioritize(vm_object_t
, uint64_t, uint32_t, int);
474 void vm_page_handle_prio_inversion(vm_object_t
, vm_page_t
);
475 void vm_decmp_upl_reprioritize(upl_t
, int);
480 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
485 * vm_object_allocate:
487 * Returns a new object with the given size.
490 __private_extern__
void
492 vm_object_size_t size
,
495 *object
= vm_object_template
;
496 vm_page_queue_init(&object
->memq
);
497 #if UPL_DEBUG || CONFIG_IOSCHED
498 queue_init(&object
->uplq
);
500 vm_object_lock_init(object
);
501 object
->vo_size
= vm_object_round_page(size
);
503 #if VM_OBJECT_TRACKING_OP_CREATED
504 if (vm_object_tracking_inited
) {
505 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
508 numsaved
= OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
509 btlog_add_entry(vm_object_tracking_btlog
,
511 VM_OBJECT_TRACKING_OP_CREATED
,
515 #endif /* VM_OBJECT_TRACKING_OP_CREATED */
518 __private_extern__ vm_object_t
520 vm_object_size_t size
)
524 object
= (vm_object_t
) zalloc(vm_object_zone
);
526 // dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */
528 if (object
!= VM_OBJECT_NULL
) {
529 _vm_object_allocate(size
, object
);
535 TUNABLE(bool, workaround_41447923
, "workaround_41447923", false);
538 * vm_object_bootstrap:
540 * Initialize the VM objects module.
544 vm_object_bootstrap(void)
546 vm_size_t vm_object_size
;
548 assert(sizeof(mo_ipc_object_bits_t
) == sizeof(ipc_object_bits_t
));
550 vm_object_size
= (sizeof(struct vm_object
) + (VM_PAGE_PACKED_PTR_ALIGNMENT
- 1)) &
551 ~(VM_PAGE_PACKED_PTR_ALIGNMENT
- 1);
553 vm_object_zone
= zone_create_ext("vm objects", vm_object_size
,
554 ZC_NOENCRYPT
| ZC_ALIGNMENT_REQUIRED
,
555 ZONE_ID_ANY
, ^(zone_t z
){
556 #if defined(__LP64__)
557 zone_set_submap_idx(z
, Z_SUBMAP_IDX_VA_RESTRICTED_MAP
);
563 queue_init(&vm_object_cached_list
);
565 queue_init(&vm_object_reaper_queue
);
568 * Initialize the "kernel object"
571 kernel_object
= &kernel_object_store
;
574 * Note that in the following size specifications, we need to add 1 because
575 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size.
578 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS
+ 1,
581 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS
+ 1,
583 kernel_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
584 compressor_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
585 kernel_object
->no_tag_update
= TRUE
;
588 * Initialize the "submap object". Make it as large as the
589 * kernel object so that no limit is imposed on submap sizes.
592 vm_submap_object
= &vm_submap_object_store
;
593 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS
+ 1,
595 vm_submap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
598 * Create an "extra" reference to this object so that we never
599 * try to deallocate it; zfree doesn't like to be called with
602 vm_object_reference(vm_submap_object
);
607 vm_io_reprioritize_init(void)
609 kern_return_t result
;
610 thread_t thread
= THREAD_NULL
;
612 result
= kernel_thread_start_priority(io_reprioritize_thread
, NULL
, 95 /* MAXPRI_KERNEL */, &thread
);
613 if (result
== KERN_SUCCESS
) {
614 thread_set_thread_name(thread
, "VM_io_reprioritize_thread");
615 thread_deallocate(thread
);
617 panic("Could not create io_reprioritize_thread");
623 vm_object_reaper_init(void)
628 kr
= kernel_thread_start_priority(
629 (thread_continue_t
) vm_object_reaper_thread
,
633 if (kr
!= KERN_SUCCESS
) {
634 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr
);
636 thread_set_thread_name(thread
, "VM_object_reaper_thread");
637 thread_deallocate(thread
);
642 * vm_object_deallocate:
644 * Release a reference to the specified object,
645 * gained either through a vm_object_allocate
646 * or a vm_object_reference call. When all references
647 * are gone, storage associated with this object
648 * may be relinquished.
650 * No object may be locked.
652 unsigned long vm_object_deallocate_shared_successes
= 0;
653 unsigned long vm_object_deallocate_shared_failures
= 0;
654 unsigned long vm_object_deallocate_shared_swap_failures
= 0;
656 __private_extern__
void
657 vm_object_deallocate(
660 vm_object_t shadow
= VM_OBJECT_NULL
;
662 // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */
663 // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */
665 if (object
== VM_OBJECT_NULL
) {
669 if (object
== kernel_object
|| object
== compressor_object
) {
670 vm_object_lock_shared(object
);
672 OSAddAtomic(-1, &object
->ref_count
);
674 if (object
->ref_count
== 0) {
675 if (object
== kernel_object
) {
676 panic("vm_object_deallocate: losing kernel_object\n");
678 panic("vm_object_deallocate: losing compressor_object\n");
681 vm_object_unlock(object
);
685 if (object
->ref_count
== 2 &&
688 * This "named" object's reference count is about to
690 * we'll need to call memory_object_last_unmap().
692 } else if (object
->ref_count
== 2 &&
694 object
->shadow
!= VM_OBJECT_NULL
) {
696 * This internal object's reference count is about to
697 * drop from 2 to 1 and it has a shadow object:
698 * we'll want to try and collapse this object with its
701 } else if (object
->ref_count
>= 2) {
702 UInt32 original_ref_count
;
703 volatile UInt32
*ref_count_p
;
707 * The object currently looks like it is not being
708 * kept alive solely by the reference we're about to release.
709 * Let's try and release our reference without taking
710 * all the locks we would need if we had to terminate the
711 * object (cache lock + exclusive object lock).
712 * Lock the object "shared" to make sure we don't race with
713 * anyone holding it "exclusive".
715 vm_object_lock_shared(object
);
716 ref_count_p
= (volatile UInt32
*) &object
->ref_count
;
717 original_ref_count
= object
->ref_count
;
719 * Test again as "ref_count" could have changed.
720 * "named" shouldn't change.
722 if (original_ref_count
== 2 &&
724 /* need to take slow path for m_o_last_unmap() */
726 } else if (original_ref_count
== 2 &&
728 object
->shadow
!= VM_OBJECT_NULL
) {
729 /* need to take slow path for vm_object_collapse() */
731 } else if (original_ref_count
< 2) {
732 /* need to take slow path for vm_object_terminate() */
735 /* try an atomic update with the shared lock */
736 atomic_swap
= OSCompareAndSwap(
738 original_ref_count
- 1,
739 (UInt32
*) &object
->ref_count
);
740 if (atomic_swap
== FALSE
) {
741 vm_object_deallocate_shared_swap_failures
++;
742 /* fall back to the slow path... */
746 vm_object_unlock(object
);
750 * ref_count was updated atomically !
752 vm_object_deallocate_shared_successes
++;
757 * Someone else updated the ref_count at the same
758 * time and we lost the race. Fall back to the usual
759 * slow but safe path...
761 vm_object_deallocate_shared_failures
++;
764 while (object
!= VM_OBJECT_NULL
) {
765 vm_object_lock(object
);
767 assert(object
->ref_count
> 0);
770 * If the object has a named reference, and only
771 * that reference would remain, inform the pager
772 * about the last "mapping" reference going away.
774 if ((object
->ref_count
== 2) && (object
->named
)) {
775 memory_object_t pager
= object
->pager
;
777 /* Notify the Pager that there are no */
778 /* more mappers for this object */
780 if (pager
!= MEMORY_OBJECT_NULL
) {
781 vm_object_mapping_wait(object
, THREAD_UNINT
);
782 vm_object_mapping_begin(object
);
783 vm_object_unlock(object
);
785 memory_object_last_unmap(pager
);
787 vm_object_lock(object
);
788 vm_object_mapping_end(object
);
790 assert(object
->ref_count
> 0);
794 * Lose the reference. If other references
795 * remain, then we are done, unless we need
796 * to retry a cache trim.
797 * If it is the last reference, then keep it
798 * until any pending initialization is completed.
801 /* if the object is terminating, it cannot go into */
802 /* the cache and we obviously should not call */
803 /* terminate again. */
805 if ((object
->ref_count
> 1) || object
->terminating
) {
806 vm_object_lock_assert_exclusive(object
);
808 vm_object_res_deallocate(object
);
810 if (object
->ref_count
== 1 &&
811 object
->shadow
!= VM_OBJECT_NULL
) {
813 * There's only one reference left on this
814 * VM object. We can't tell if it's a valid
815 * one (from a mapping for example) or if this
816 * object is just part of a possibly stale and
817 * useless shadow chain.
818 * We would like to try and collapse it into
819 * its parent, but we don't have any pointers
820 * back to this parent object.
821 * But we can try and collapse this object with
822 * its own shadows, in case these are useless
824 * We can't bypass this object though, since we
825 * don't know if this last reference on it is
828 vm_object_collapse(object
, 0, FALSE
);
830 vm_object_unlock(object
);
835 * We have to wait for initialization
836 * before destroying or caching the object.
839 if (object
->pager_created
&& !object
->pager_initialized
) {
840 assert(!object
->can_persist
);
841 vm_object_assert_wait(object
,
842 VM_OBJECT_EVENT_INITIALIZED
,
844 vm_object_unlock(object
);
846 thread_block(THREAD_CONTINUE_NULL
);
850 VM_OBJ_RES_DECR(object
); /* XXX ? */
852 * Terminate this object. If it had a shadow,
853 * then deallocate it; otherwise, if we need
854 * to retry a cache trim, do so now; otherwise,
855 * we are done. "pageout" objects have a shadow,
856 * but maintain a "paging reference" rather than
857 * a normal reference.
859 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
861 if (vm_object_terminate(object
) != KERN_SUCCESS
) {
864 if (shadow
!= VM_OBJECT_NULL
) {
882 vm_object_lock_assert_exclusive(object
);
884 next_p
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
885 p_limit
= MIN(50, object
->resident_page_count
);
887 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)next_p
) && --p_limit
> 0) {
889 next_p
= (vm_page_t
)vm_page_queue_next(&next_p
->vmp_listq
);
891 if (VM_PAGE_WIRED(p
) || p
->vmp_busy
|| p
->vmp_cleaning
|| p
->vmp_laundry
|| p
->vmp_fictitious
) {
892 goto move_page_in_obj
;
895 if (p
->vmp_pmapped
|| p
->vmp_dirty
|| p
->vmp_precious
) {
896 vm_page_lockspin_queues();
898 if (p
->vmp_pmapped
) {
901 vm_object_page_grab_pmapped
++;
903 if (p
->vmp_reference
== FALSE
|| p
->vmp_dirty
== FALSE
) {
904 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(p
));
906 if (refmod_state
& VM_MEM_REFERENCED
) {
907 p
->vmp_reference
= TRUE
;
909 if (refmod_state
& VM_MEM_MODIFIED
) {
910 SET_PAGE_DIRTY(p
, FALSE
);
913 if (p
->vmp_dirty
== FALSE
&& p
->vmp_precious
== FALSE
) {
914 refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(p
));
916 if (refmod_state
& VM_MEM_REFERENCED
) {
917 p
->vmp_reference
= TRUE
;
919 if (refmod_state
& VM_MEM_MODIFIED
) {
920 SET_PAGE_DIRTY(p
, FALSE
);
923 if (p
->vmp_dirty
== FALSE
) {
928 if ((p
->vmp_q_state
!= VM_PAGE_ON_ACTIVE_Q
) && p
->vmp_reference
== TRUE
) {
931 VM_STAT_INCR(reactivations
);
932 vm_object_page_grab_reactivations
++;
934 vm_page_unlock_queues();
936 vm_page_queue_remove(&object
->memq
, p
, vmp_listq
);
937 vm_page_queue_enter(&object
->memq
, p
, vmp_listq
);
942 vm_page_lockspin_queues();
944 vm_page_free_prepare_queues(p
);
945 vm_object_page_grab_returned
++;
946 vm_object_page_grab_skipped
+= p_skipped
;
948 vm_page_unlock_queues();
950 vm_page_free_prepare_object(p
, TRUE
);
954 vm_object_page_grab_skipped
+= p_skipped
;
955 vm_object_page_grab_failed
++;
962 #define EVICT_PREPARE_LIMIT 64
965 static clock_sec_t vm_object_cache_aging_ts
= 0;
968 vm_object_cache_remove_locked(
971 assert(object
->purgable
== VM_PURGABLE_DENY
);
973 queue_remove(&vm_object_cached_list
, object
, vm_object_t
, cached_list
);
974 object
->cached_list
.next
= NULL
;
975 object
->cached_list
.prev
= NULL
;
977 vm_object_cached_count
--;
981 vm_object_cache_remove(
984 vm_object_cache_lock_spin();
986 if (object
->cached_list
.next
&&
987 object
->cached_list
.prev
) {
988 vm_object_cache_remove_locked(object
);
991 vm_object_cache_unlock();
1001 assert(object
->purgable
== VM_PURGABLE_DENY
);
1003 if (object
->resident_page_count
== 0) {
1006 clock_get_system_nanotime(&sec
, &nsec
);
1008 vm_object_cache_lock_spin();
1010 if (object
->cached_list
.next
== NULL
&&
1011 object
->cached_list
.prev
== NULL
) {
1012 queue_enter(&vm_object_cached_list
, object
, vm_object_t
, cached_list
);
1013 object
->vo_cache_ts
= sec
+ EVICT_AGE
;
1014 object
->vo_cache_pages_to_scan
= object
->resident_page_count
;
1016 vm_object_cached_count
++;
1017 vm_object_cache_adds
++;
1019 vm_object_cache_unlock();
1023 vm_object_cache_evict(
1025 int max_objects_to_examine
)
1027 vm_object_t object
= VM_OBJECT_NULL
;
1028 vm_object_t next_obj
= VM_OBJECT_NULL
;
1029 vm_page_t local_free_q
= VM_PAGE_NULL
;
1033 vm_page_t ep_array
[EVICT_PREPARE_LIMIT
];
1039 uint32_t ep_skipped
= 0;
1043 KERNEL_DEBUG(0x13001ec | DBG_FUNC_START
, 0, 0, 0, 0, 0);
1045 * do a couple of quick checks to see if it's
1046 * worthwhile grabbing the lock
1048 if (queue_empty(&vm_object_cached_list
)) {
1049 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1052 clock_get_system_nanotime(&sec
, &nsec
);
1055 * the object on the head of the queue has not
1056 * yet sufficiently aged
1058 if (sec
< vm_object_cache_aging_ts
) {
1059 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1063 * don't need the queue lock to find
1064 * and lock an object on the cached list
1066 vm_page_unlock_queues();
1068 vm_object_cache_lock_spin();
1071 next_obj
= (vm_object_t
)queue_first(&vm_object_cached_list
);
1073 while (!queue_end(&vm_object_cached_list
, (queue_entry_t
)next_obj
) && object_cnt
++ < max_objects_to_examine
) {
1075 next_obj
= (vm_object_t
)queue_next(&next_obj
->cached_list
);
1077 assert(object
->purgable
== VM_PURGABLE_DENY
);
1079 if (sec
< object
->vo_cache_ts
) {
1080 KERNEL_DEBUG(0x130020c, object
, object
->resident_page_count
, object
->vo_cache_ts
, sec
, 0);
1082 vm_object_cache_aging_ts
= object
->vo_cache_ts
;
1083 object
= VM_OBJECT_NULL
;
1086 if (!vm_object_lock_try_scan(object
)) {
1088 * just skip over this guy for now... if we find
1089 * an object to steal pages from, we'll revist in a bit...
1090 * hopefully, the lock will have cleared
1092 KERNEL_DEBUG(0x13001f8, object
, object
->resident_page_count
, 0, 0, 0);
1094 object
= VM_OBJECT_NULL
;
1097 if (vm_page_queue_empty(&object
->memq
) || object
->vo_cache_pages_to_scan
== 0) {
1099 * this case really shouldn't happen, but it's not fatal
1100 * so deal with it... if we don't remove the object from
1101 * the list, we'll never move past it.
1103 KERNEL_DEBUG(0x13001fc, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1105 vm_object_cache_remove_locked(object
);
1106 vm_object_unlock(object
);
1107 object
= VM_OBJECT_NULL
;
1111 * we have a locked object with pages...
1112 * time to start harvesting
1116 vm_object_cache_unlock();
1118 if (object
== VM_OBJECT_NULL
) {
1123 * object is locked at this point and
1124 * has resident pages
1126 next_p
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
1129 * break the page scan into 2 pieces to minimize the time spent
1130 * behind the page queue lock...
1131 * the list of pages on these unused objects is likely to be cold
1132 * w/r to the cpu cache which increases the time to scan the list
1133 * tenfold... and we may have a 'run' of pages we can't utilize that
1134 * needs to be skipped over...
1136 if ((ep_limit
= num_to_evict
- (ep_freed
+ ep_moved
)) > EVICT_PREPARE_LIMIT
) {
1137 ep_limit
= EVICT_PREPARE_LIMIT
;
1141 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)next_p
) && object
->vo_cache_pages_to_scan
&& ep_count
< ep_limit
) {
1143 next_p
= (vm_page_t
)vm_page_queue_next(&next_p
->vmp_listq
);
1145 object
->vo_cache_pages_to_scan
--;
1147 if (VM_PAGE_WIRED(p
) || p
->vmp_busy
|| p
->vmp_cleaning
|| p
->vmp_laundry
) {
1148 vm_page_queue_remove(&object
->memq
, p
, vmp_listq
);
1149 vm_page_queue_enter(&object
->memq
, p
, vmp_listq
);
1154 if (p
->vmp_wpmapped
|| p
->vmp_dirty
|| p
->vmp_precious
) {
1155 vm_page_queue_remove(&object
->memq
, p
, vmp_listq
);
1156 vm_page_queue_enter(&object
->memq
, p
, vmp_listq
);
1158 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(p
));
1160 ep_array
[ep_count
++] = p
;
1162 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START
, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1164 vm_page_lockspin_queues();
1166 for (ep_index
= 0; ep_index
< ep_count
; ep_index
++) {
1167 p
= ep_array
[ep_index
];
1169 if (p
->vmp_wpmapped
|| p
->vmp_dirty
|| p
->vmp_precious
) {
1170 p
->vmp_reference
= FALSE
;
1171 p
->vmp_no_cache
= FALSE
;
1174 * we've already filtered out pages that are in the laundry
1175 * so if we get here, this page can't be on the pageout queue
1177 vm_page_queues_remove(p
, FALSE
);
1178 vm_page_enqueue_inactive(p
, TRUE
);
1182 #if CONFIG_PHANTOM_CACHE
1183 vm_phantom_cache_add_ghost(p
);
1185 vm_page_free_prepare_queues(p
);
1187 assert(p
->vmp_pageq
.next
== 0 && p
->vmp_pageq
.prev
== 0);
1189 * Add this page to our list of reclaimed pages,
1190 * to be freed later.
1192 p
->vmp_snext
= local_free_q
;
1198 vm_page_unlock_queues();
1200 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END
, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1203 vm_page_free_list(local_free_q
, TRUE
);
1204 local_free_q
= VM_PAGE_NULL
;
1206 if (object
->vo_cache_pages_to_scan
== 0) {
1207 KERNEL_DEBUG(0x1300208, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1209 vm_object_cache_remove(object
);
1211 KERNEL_DEBUG(0x13001fc, object
, object
->resident_page_count
, ep_freed
, ep_moved
, 0);
1214 * done with this object
1216 vm_object_unlock(object
);
1217 object
= VM_OBJECT_NULL
;
1220 * at this point, we are not holding any locks
1222 if ((ep_freed
+ ep_moved
) >= num_to_evict
) {
1224 * we've reached our target for the
1225 * number of pages to evict
1229 vm_object_cache_lock_spin();
1232 * put the page queues lock back to the caller's
1235 vm_page_lock_queues();
1237 vm_object_cache_pages_freed
+= ep_freed
;
1238 vm_object_cache_pages_moved
+= ep_moved
;
1239 vm_object_cache_pages_skipped
+= ep_skipped
;
1241 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END
, ep_freed
, 0, 0, 0, 0);
1246 * Routine: vm_object_terminate
1248 * Free all resources associated with a vm_object.
1249 * In/out conditions:
1250 * Upon entry, the object must be locked,
1251 * and the object must have exactly one reference.
1253 * The shadow object reference is left alone.
1255 * The object must be unlocked if its found that pages
1256 * must be flushed to a backing object. If someone
1257 * manages to map the object while it is being flushed
1258 * the object is returned unlocked and unchanged. Otherwise,
1259 * upon exit, the cache will be unlocked, and the
1260 * object will cease to exist.
1262 static kern_return_t
1263 vm_object_terminate(
1266 vm_object_t shadow_object
;
1268 vm_object_lock_assert_exclusive(object
);
1270 if (!object
->pageout
&& (!object
->internal
&& object
->can_persist
) &&
1271 (object
->pager
!= NULL
|| object
->shadow_severed
)) {
1273 * Clear pager_trusted bit so that the pages get yanked
1274 * out of the object instead of cleaned in place. This
1275 * prevents a deadlock in XMM and makes more sense anyway.
1277 object
->pager_trusted
= FALSE
;
1279 vm_object_reap_pages(object
, REAP_TERMINATE
);
1282 * Make sure the object isn't already being terminated
1284 if (object
->terminating
) {
1285 vm_object_lock_assert_exclusive(object
);
1286 object
->ref_count
--;
1287 assert(object
->ref_count
> 0);
1288 vm_object_unlock(object
);
1289 return KERN_FAILURE
;
1293 * Did somebody get a reference to the object while we were
1296 if (object
->ref_count
!= 1) {
1297 vm_object_lock_assert_exclusive(object
);
1298 object
->ref_count
--;
1299 assert(object
->ref_count
> 0);
1300 vm_object_res_deallocate(object
);
1301 vm_object_unlock(object
);
1302 return KERN_FAILURE
;
1306 * Make sure no one can look us up now.
1309 object
->terminating
= TRUE
;
1310 object
->alive
= FALSE
;
1312 if (!object
->internal
&&
1313 object
->cached_list
.next
&&
1314 object
->cached_list
.prev
) {
1315 vm_object_cache_remove(object
);
1319 * Detach the object from its shadow if we are the shadow's
1320 * copy. The reference we hold on the shadow must be dropped
1323 if (((shadow_object
= object
->shadow
) != VM_OBJECT_NULL
) &&
1324 !(object
->pageout
)) {
1325 vm_object_lock(shadow_object
);
1326 if (shadow_object
->copy
== object
) {
1327 shadow_object
->copy
= VM_OBJECT_NULL
;
1329 vm_object_unlock(shadow_object
);
1332 if (object
->paging_in_progress
!= 0 ||
1333 object
->activity_in_progress
!= 0) {
1335 * There are still some paging_in_progress references
1336 * on this object, meaning that there are some paging
1337 * or other I/O operations in progress for this VM object.
1338 * Such operations take some paging_in_progress references
1339 * up front to ensure that the object doesn't go away, but
1340 * they may also need to acquire a reference on the VM object,
1341 * to map it in kernel space, for example. That means that
1342 * they may end up releasing the last reference on the VM
1343 * object, triggering its termination, while still holding
1344 * paging_in_progress references. Waiting for these
1345 * pending paging_in_progress references to go away here would
1348 * To avoid deadlocking, we'll let the vm_object_reaper_thread
1349 * complete the VM object termination if it still holds
1350 * paging_in_progress references at this point.
1352 * No new paging_in_progress should appear now that the
1353 * VM object is "terminating" and not "alive".
1355 vm_object_reap_async(object
);
1356 vm_object_unlock(object
);
1358 * Return KERN_FAILURE to let the caller know that we
1359 * haven't completed the termination and it can't drop this
1360 * object's reference on its shadow object yet.
1361 * The reaper thread will take care of that once it has
1362 * completed this object's termination.
1364 return KERN_FAILURE
;
1367 * complete the VM object termination
1369 vm_object_reap(object
);
1370 object
= VM_OBJECT_NULL
;
1373 * the object lock was released by vm_object_reap()
1375 * KERN_SUCCESS means that this object has been terminated
1376 * and no longer needs its shadow object but still holds a
1378 * The caller is responsible for dropping that reference.
1379 * We can't call vm_object_deallocate() here because that
1380 * would create a recursion.
1382 return KERN_SUCCESS
;
1389 * Complete the termination of a VM object after it's been marked
1390 * as "terminating" and "!alive" by vm_object_terminate().
1392 * The VM object must be locked by caller.
1393 * The lock will be released on return and the VM object is no longer valid.
1400 memory_object_t pager
;
1402 vm_object_lock_assert_exclusive(object
);
1403 assert(object
->paging_in_progress
== 0);
1404 assert(object
->activity_in_progress
== 0);
1406 vm_object_reap_count
++;
1409 * Disown this purgeable object to cleanup its owner's purgeable
1410 * ledgers. We need to do this before disconnecting the object
1411 * from its pager, to properly account for compressed pages.
1413 if (object
->internal
&&
1414 (object
->purgable
!= VM_PURGABLE_DENY
||
1415 object
->vo_ledger_tag
)) {
1420 if (object
->vo_no_footprint
) {
1421 ledger_flags
|= VM_LEDGER_FLAG_NO_FOOTPRINT
;
1423 assert(!object
->alive
);
1424 assert(object
->terminating
);
1425 kr
= vm_object_ownership_change(object
,
1426 object
->vo_ledger_tag
, /* unchanged */
1427 NULL
, /* no owner */
1429 FALSE
); /* task_objq not locked */
1430 assert(kr
== KERN_SUCCESS
);
1431 assert(object
->vo_owner
== NULL
);
1434 #if DEVELOPMENT || DEBUG
1435 if (object
->object_is_shared_cache
&&
1436 object
->pager
!= NULL
&&
1437 object
->pager
->mo_pager_ops
== &shared_region_pager_ops
) {
1438 OSAddAtomic(-object
->resident_page_count
, &shared_region_pagers_resident_count
);
1440 #endif /* DEVELOPMENT || DEBUG */
1442 pager
= object
->pager
;
1443 object
->pager
= MEMORY_OBJECT_NULL
;
1445 if (pager
!= MEMORY_OBJECT_NULL
) {
1446 memory_object_control_disable(object
->pager_control
);
1449 object
->ref_count
--;
1451 assert(object
->res_count
== 0);
1452 #endif /* TASK_SWAPPER */
1454 assert(object
->ref_count
== 0);
1457 * remove from purgeable queue if it's on
1459 if (object
->internal
) {
1460 assert(VM_OBJECT_OWNER(object
) == TASK_NULL
);
1462 VM_OBJECT_UNWIRED(object
);
1464 if (object
->purgable
== VM_PURGABLE_DENY
) {
1465 /* not purgeable: nothing to do */
1466 } else if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1467 purgeable_q_t queue
;
1469 queue
= vm_purgeable_object_remove(object
);
1472 if (object
->purgeable_when_ripe
) {
1474 * Must take page lock for this -
1475 * using it to protect token queue
1477 vm_page_lock_queues();
1478 vm_purgeable_token_delete_first(queue
);
1480 assert(queue
->debug_count_objects
>= 0);
1481 vm_page_unlock_queues();
1485 * Update "vm_page_purgeable_count" in bulk and mark
1486 * object as VM_PURGABLE_EMPTY to avoid updating
1487 * "vm_page_purgeable_count" again in vm_page_remove()
1488 * when reaping the pages.
1491 assert(object
->resident_page_count
>=
1492 object
->wired_page_count
);
1493 delta
= (object
->resident_page_count
-
1494 object
->wired_page_count
);
1496 assert(vm_page_purgeable_count
>= delta
);
1498 (SInt32
*)&vm_page_purgeable_count
);
1500 if (object
->wired_page_count
!= 0) {
1501 assert(vm_page_purgeable_wired_count
>=
1502 object
->wired_page_count
);
1503 OSAddAtomic(-object
->wired_page_count
,
1504 (SInt32
*)&vm_page_purgeable_wired_count
);
1506 object
->purgable
= VM_PURGABLE_EMPTY
;
1507 } else if (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1508 object
->purgable
== VM_PURGABLE_EMPTY
) {
1509 /* remove from nonvolatile queue */
1510 vm_purgeable_nonvolatile_dequeue(object
);
1512 panic("object %p in unexpected purgeable state 0x%x\n",
1513 object
, object
->purgable
);
1515 if (object
->transposed
&&
1516 object
->cached_list
.next
!= NULL
&&
1517 object
->cached_list
.prev
== NULL
) {
1519 * object->cached_list.next "points" to the
1520 * object that was transposed with this object.
1523 assert(object
->cached_list
.next
== NULL
);
1525 assert(object
->cached_list
.prev
== NULL
);
1528 if (object
->pageout
) {
1530 * free all remaining pages tabled on
1532 * clean up it's shadow
1534 assert(object
->shadow
!= VM_OBJECT_NULL
);
1536 vm_pageout_object_terminate(object
);
1537 } else if (object
->resident_page_count
) {
1539 * free all remaining pages tabled on
1542 vm_object_reap_pages(object
, REAP_REAP
);
1544 assert(vm_page_queue_empty(&object
->memq
));
1545 assert(object
->paging_in_progress
== 0);
1546 assert(object
->activity_in_progress
== 0);
1547 assert(object
->ref_count
== 0);
1550 * If the pager has not already been released by
1551 * vm_object_destroy, we need to terminate it and
1552 * release our reference to it here.
1554 if (pager
!= MEMORY_OBJECT_NULL
) {
1555 vm_object_unlock(object
);
1556 vm_object_release_pager(pager
);
1557 vm_object_lock(object
);
1560 /* kick off anyone waiting on terminating */
1561 object
->terminating
= FALSE
;
1562 vm_object_paging_begin(object
);
1563 vm_object_paging_end(object
);
1564 vm_object_unlock(object
);
1566 object
->shadow
= VM_OBJECT_NULL
;
1568 #if VM_OBJECT_TRACKING
1569 if (vm_object_tracking_inited
) {
1570 btlog_remove_entries_for_element(vm_object_tracking_btlog
,
1573 #endif /* VM_OBJECT_TRACKING */
1575 vm_object_lock_destroy(object
);
1577 * Free the space for the object.
1579 zfree(vm_object_zone
, object
);
1580 object
= VM_OBJECT_NULL
;
1584 unsigned int vm_max_batch
= 256;
1586 #define V_O_R_MAX_BATCH 128
1588 #define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch)
1591 #define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \
1593 if (_local_free_q) { \
1594 if (do_disconnect) { \
1596 for (m = _local_free_q; \
1597 m != VM_PAGE_NULL; \
1598 m = m->vmp_snext) { \
1599 if (m->vmp_pmapped) { \
1600 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)); \
1604 vm_page_free_list(_local_free_q, TRUE); \
1605 _local_free_q = VM_PAGE_NULL; \
1611 vm_object_reap_pages(
1617 vm_page_t local_free_q
= VM_PAGE_NULL
;
1619 boolean_t disconnect_on_release
;
1620 pmap_flush_context pmap_flush_context_storage
;
1622 if (reap_type
== REAP_DATA_FLUSH
) {
1624 * We need to disconnect pages from all pmaps before
1625 * releasing them to the free list
1627 disconnect_on_release
= TRUE
;
1630 * Either the caller has already disconnected the pages
1631 * from all pmaps, or we disconnect them here as we add
1632 * them to out local list of pages to be released.
1633 * No need to re-disconnect them when we release the pages
1636 disconnect_on_release
= FALSE
;
1639 restart_after_sleep
:
1640 if (vm_page_queue_empty(&object
->memq
)) {
1643 loop_count
= BATCH_LIMIT(V_O_R_MAX_BATCH
);
1645 if (reap_type
== REAP_PURGEABLE
) {
1646 pmap_flush_context_init(&pmap_flush_context_storage
);
1649 vm_page_lockspin_queues();
1651 next
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
1653 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)next
)) {
1655 next
= (vm_page_t
)vm_page_queue_next(&next
->vmp_listq
);
1657 if (--loop_count
== 0) {
1658 vm_page_unlock_queues();
1661 if (reap_type
== REAP_PURGEABLE
) {
1662 pmap_flush(&pmap_flush_context_storage
);
1663 pmap_flush_context_init(&pmap_flush_context_storage
);
1666 * Free the pages we reclaimed so far
1667 * and take a little break to avoid
1668 * hogging the page queue lock too long
1670 VM_OBJ_REAP_FREELIST(local_free_q
,
1671 disconnect_on_release
);
1676 loop_count
= BATCH_LIMIT(V_O_R_MAX_BATCH
);
1678 vm_page_lockspin_queues();
1680 if (reap_type
== REAP_DATA_FLUSH
|| reap_type
== REAP_TERMINATE
) {
1681 if (p
->vmp_busy
|| p
->vmp_cleaning
) {
1682 vm_page_unlock_queues();
1684 * free the pages reclaimed so far
1686 VM_OBJ_REAP_FREELIST(local_free_q
,
1687 disconnect_on_release
);
1689 PAGE_SLEEP(object
, p
, THREAD_UNINT
);
1691 goto restart_after_sleep
;
1693 if (p
->vmp_laundry
) {
1694 vm_pageout_steal_laundry(p
, TRUE
);
1697 switch (reap_type
) {
1698 case REAP_DATA_FLUSH
:
1699 if (VM_PAGE_WIRED(p
)) {
1701 * this is an odd case... perhaps we should
1702 * zero-fill this page since we're conceptually
1703 * tossing its data at this point, but leaving
1704 * it on the object to honor the 'wire' contract
1710 case REAP_PURGEABLE
:
1711 if (VM_PAGE_WIRED(p
)) {
1713 * can't purge a wired page
1715 vm_page_purged_wired
++;
1718 if (p
->vmp_laundry
&& !p
->vmp_busy
&& !p
->vmp_cleaning
) {
1719 vm_pageout_steal_laundry(p
, TRUE
);
1722 if (p
->vmp_cleaning
|| p
->vmp_laundry
|| p
->vmp_absent
) {
1724 * page is being acted upon,
1725 * so don't mess with it
1727 vm_page_purged_others
++;
1732 * We can't reclaim a busy page but we can
1733 * make it more likely to be paged (it's not wired) to make
1734 * sure that it gets considered by
1735 * vm_pageout_scan() later.
1737 if (VM_PAGE_PAGEABLE(p
)) {
1738 vm_page_deactivate(p
);
1740 vm_page_purged_busy
++;
1744 assert(VM_PAGE_OBJECT(p
) != kernel_object
);
1747 * we can discard this page...
1749 if (p
->vmp_pmapped
== TRUE
) {
1753 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(p
), PMAP_OPTIONS_NOFLUSH
| PMAP_OPTIONS_NOREFMOD
, (void *)&pmap_flush_context_storage
);
1755 vm_page_purged_count
++;
1759 case REAP_TERMINATE
:
1760 if (p
->vmp_absent
|| p
->vmp_private
) {
1762 * For private pages, VM_PAGE_FREE just
1763 * leaves the page structure around for
1764 * its owner to clean up. For absent
1765 * pages, the structure is returned to
1766 * the appropriate pool.
1770 if (p
->vmp_fictitious
) {
1771 assert(VM_PAGE_GET_PHYS_PAGE(p
) == vm_page_guard_addr
);
1774 if (!p
->vmp_dirty
&& p
->vmp_wpmapped
) {
1775 p
->vmp_dirty
= pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
));
1778 if ((p
->vmp_dirty
|| p
->vmp_precious
) && !p
->vmp_error
&& object
->alive
) {
1779 assert(!object
->internal
);
1781 p
->vmp_free_when_done
= TRUE
;
1783 if (!p
->vmp_laundry
) {
1784 vm_page_queues_remove(p
, TRUE
);
1786 * flush page... page will be freed
1787 * upon completion of I/O
1789 vm_pageout_cluster(p
);
1791 vm_page_unlock_queues();
1793 * free the pages reclaimed so far
1795 VM_OBJ_REAP_FREELIST(local_free_q
,
1796 disconnect_on_release
);
1798 vm_object_paging_wait(object
, THREAD_UNINT
);
1800 goto restart_after_sleep
;
1807 vm_page_free_prepare_queues(p
);
1808 assert(p
->vmp_pageq
.next
== 0 && p
->vmp_pageq
.prev
== 0);
1810 * Add this page to our list of reclaimed pages,
1811 * to be freed later.
1813 p
->vmp_snext
= local_free_q
;
1816 vm_page_unlock_queues();
1819 * Free the remaining reclaimed pages
1821 if (reap_type
== REAP_PURGEABLE
) {
1822 pmap_flush(&pmap_flush_context_storage
);
1825 VM_OBJ_REAP_FREELIST(local_free_q
,
1826 disconnect_on_release
);
1831 vm_object_reap_async(
1834 vm_object_lock_assert_exclusive(object
);
1836 vm_object_reaper_lock_spin();
1838 vm_object_reap_count_async
++;
1840 /* enqueue the VM object... */
1841 queue_enter(&vm_object_reaper_queue
, object
,
1842 vm_object_t
, cached_list
);
1844 vm_object_reaper_unlock();
1846 /* ... and wake up the reaper thread */
1847 thread_wakeup((event_t
) &vm_object_reaper_queue
);
1852 vm_object_reaper_thread(void)
1854 vm_object_t object
, shadow_object
;
1856 vm_object_reaper_lock_spin();
1858 while (!queue_empty(&vm_object_reaper_queue
)) {
1859 queue_remove_first(&vm_object_reaper_queue
,
1864 vm_object_reaper_unlock();
1865 vm_object_lock(object
);
1867 assert(object
->terminating
);
1868 assert(!object
->alive
);
1871 * The pageout daemon might be playing with our pages.
1872 * Now that the object is dead, it won't touch any more
1873 * pages, but some pages might already be on their way out.
1874 * Hence, we wait until the active paging activities have
1875 * ceased before we break the association with the pager
1878 while (object
->paging_in_progress
!= 0 ||
1879 object
->activity_in_progress
!= 0) {
1880 vm_object_wait(object
,
1881 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
1883 vm_object_lock(object
);
1887 object
->pageout
? VM_OBJECT_NULL
: object
->shadow
;
1889 vm_object_reap(object
);
1890 /* cache is unlocked and object is no longer valid */
1891 object
= VM_OBJECT_NULL
;
1893 if (shadow_object
!= VM_OBJECT_NULL
) {
1895 * Drop the reference "object" was holding on
1896 * its shadow object.
1898 vm_object_deallocate(shadow_object
);
1899 shadow_object
= VM_OBJECT_NULL
;
1901 vm_object_reaper_lock_spin();
1904 /* wait for more work... */
1905 assert_wait((event_t
) &vm_object_reaper_queue
, THREAD_UNINT
);
1907 vm_object_reaper_unlock();
1909 thread_block((thread_continue_t
) vm_object_reaper_thread
);
1914 * Routine: vm_object_release_pager
1915 * Purpose: Terminate the pager and, upon completion,
1916 * release our last reference to it.
1919 vm_object_release_pager(
1920 memory_object_t pager
)
1923 * Terminate the pager.
1926 (void) memory_object_terminate(pager
);
1929 * Release reference to pager.
1931 memory_object_deallocate(pager
);
1935 * Routine: vm_object_destroy
1937 * Shut down a VM object, despite the
1938 * presence of address map (or other) references
1944 __unused kern_return_t reason
)
1946 memory_object_t old_pager
;
1948 if (object
== VM_OBJECT_NULL
) {
1949 return KERN_SUCCESS
;
1953 * Remove the pager association immediately.
1955 * This will prevent the memory manager from further
1956 * meddling. [If it wanted to flush data or make
1957 * other changes, it should have done so before performing
1958 * the destroy call.]
1961 vm_object_lock(object
);
1962 object
->can_persist
= FALSE
;
1963 object
->named
= FALSE
;
1964 object
->alive
= FALSE
;
1966 #if DEVELOPMENT || DEBUG
1967 if (object
->object_is_shared_cache
&&
1968 object
->pager
!= NULL
&&
1969 object
->pager
->mo_pager_ops
== &shared_region_pager_ops
) {
1970 OSAddAtomic(-object
->resident_page_count
, &shared_region_pagers_resident_count
);
1972 #endif /* DEVELOPMENT || DEBUG */
1974 old_pager
= object
->pager
;
1975 object
->pager
= MEMORY_OBJECT_NULL
;
1976 if (old_pager
!= MEMORY_OBJECT_NULL
) {
1977 memory_object_control_disable(object
->pager_control
);
1981 * Wait for the existing paging activity (that got
1982 * through before we nulled out the pager) to subside.
1985 vm_object_paging_wait(object
, THREAD_UNINT
);
1986 vm_object_unlock(object
);
1989 * Terminate the object now.
1991 if (old_pager
!= MEMORY_OBJECT_NULL
) {
1992 vm_object_release_pager(old_pager
);
1995 * JMM - Release the caller's reference. This assumes the
1996 * caller had a reference to release, which is a big (but
1997 * currently valid) assumption if this is driven from the
1998 * vnode pager (it is holding a named reference when making
2001 vm_object_deallocate(object
);
2003 return KERN_SUCCESS
;
2007 * The "chunk" macros are used by routines below when looking for pages to deactivate. These
2008 * exist because of the need to handle shadow chains. When deactivating pages, we only
2009 * want to deactive the ones at the top most level in the object chain. In order to do
2010 * this efficiently, the specified address range is divided up into "chunks" and we use
2011 * a bit map to keep track of which pages have already been processed as we descend down
2012 * the shadow chain. These chunk macros hide the details of the bit map implementation
2013 * as much as we can.
2015 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is
2016 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest
2017 * order bit represents page 0 in the current range and highest order bit represents
2020 * For further convenience, we also use negative logic for the page state in the bit map.
2021 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has
2022 * been processed. This way we can simply test the 64-bit long word to see if it's zero
2023 * to easily tell if the whole range has been processed. Therefore, the bit map starts
2024 * out with all the bits set. The macros below hide all these details from the caller.
2027 #define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */
2028 /* be the same as the number of bits in */
2029 /* the chunk_state_t type. We use 64 */
2030 /* just for convenience. */
2032 #define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */
2034 typedef uint64_t chunk_state_t
;
2037 * The bit map uses negative logic, so we start out with all 64 bits set to indicate
2038 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE,
2039 * then we mark pages beyond the len as having been "processed" so that we don't waste time
2040 * looking at pages in that range. This can save us from unnecessarily chasing down the
2044 #define CHUNK_INIT(c, len) \
2048 (c) = 0xffffffffffffffffLL; \
2050 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \
2051 MARK_PAGE_HANDLED(c, p); \
2056 * Return true if all pages in the chunk have not yet been processed.
2059 #define CHUNK_NOT_COMPLETE(c) ((c) != 0)
2062 * Return true if the page at offset 'p' in the bit map has already been handled
2063 * while processing a higher level object in the shadow chain.
2066 #define PAGE_ALREADY_HANDLED(c, p) (((c) & (1ULL << (p))) == 0)
2069 * Mark the page at offset 'p' in the bit map as having been processed.
2072 #define MARK_PAGE_HANDLED(c, p) \
2074 (c) = (c) & ~(1ULL << (p)); \
2079 * Return true if the page at the given offset has been paged out. Object is
2080 * locked upon entry and returned locked.
2086 vm_object_offset_t offset
)
2088 if (object
->internal
&&
2090 !object
->terminating
&&
2091 object
->pager_ready
) {
2092 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
)
2093 == VM_EXTERNAL_STATE_EXISTS
) {
2103 * madvise_free_debug
2105 * To help debug madvise(MADV_FREE*) mis-usage, this triggers a
2106 * zero-fill as soon as a page is affected by a madvise(MADV_FREE*), to
2107 * simulate the loss of the page's contents as if the page had been
2108 * reclaimed and then re-faulted.
2110 #if DEVELOPMENT || DEBUG
2111 int madvise_free_debug
= 1;
2113 int madvise_free_debug
= 0;
2116 __options_decl(deactivate_flags_t
, uint32_t, {
2117 DEACTIVATE_KILL
= 0x1,
2118 DEACTIVATE_REUSABLE
= 0x2,
2119 DEACTIVATE_ALL_REUSABLE
= 0x4,
2120 DEACTIVATE_CLEAR_REFMOD
= 0x8
2124 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any
2125 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify
2126 * a size that is less than or equal to the CHUNK_SIZE.
2130 deactivate_pages_in_object(
2132 vm_object_offset_t offset
,
2133 vm_object_size_t size
,
2134 deactivate_flags_t flags
,
2135 chunk_state_t
*chunk_state
,
2136 pmap_flush_context
*pfc
,
2138 vm_map_offset_t pmap_offset
)
2142 struct vm_page_delayed_work dw_array
;
2143 struct vm_page_delayed_work
*dwp
, *dwp_start
;
2144 bool dwp_finish_ctx
= TRUE
;
2147 unsigned int reusable
= 0;
2150 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the
2151 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may
2152 * have pages marked as having been processed already. We stop the loop early if we find we've handled
2153 * all the pages in the chunk.
2156 dwp_start
= dwp
= NULL
;
2158 dw_limit
= DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT
);
2159 dwp_start
= vm_page_delayed_work_get_ctx();
2160 if (dwp_start
== NULL
) {
2161 dwp_start
= &dw_array
;
2163 dwp_finish_ctx
= FALSE
;
2168 for (p
= 0; size
&& CHUNK_NOT_COMPLETE(*chunk_state
); p
++, size
-= PAGE_SIZE_64
, offset
+= PAGE_SIZE_64
, pmap_offset
+= PAGE_SIZE_64
) {
2170 * If this offset has already been found and handled in a higher level object, then don't
2171 * do anything with it in the current shadow object.
2174 if (PAGE_ALREADY_HANDLED(*chunk_state
, p
)) {
2179 * See if the page at this offset is around. First check to see if the page is resident,
2180 * then if not, check the existence map or with the pager.
2183 if ((m
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
2185 * We found a page we were looking for. Mark it as "handled" now in the chunk_state
2186 * so that we won't bother looking for a page at this offset again if there are more
2187 * shadow objects. Then deactivate the page.
2190 MARK_PAGE_HANDLED(*chunk_state
, p
);
2192 if ((!VM_PAGE_WIRED(m
)) && (!m
->vmp_private
) && (!m
->vmp_gobbled
) && (!m
->vmp_busy
) &&
2193 (!m
->vmp_laundry
) && (!m
->vmp_cleaning
) && !(m
->vmp_free_when_done
)) {
2194 int clear_refmod_mask
;
2199 clear_refmod_mask
= VM_MEM_REFERENCED
;
2200 dwp
->dw_mask
|= DW_clear_reference
;
2202 if ((flags
& DEACTIVATE_KILL
) && (object
->internal
)) {
2203 if (madvise_free_debug
) {
2205 * zero-fill the page now
2206 * to simulate it being
2207 * reclaimed and re-faulted.
2209 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
2211 m
->vmp_precious
= FALSE
;
2212 m
->vmp_dirty
= FALSE
;
2214 clear_refmod_mask
|= VM_MEM_MODIFIED
;
2215 if (m
->vmp_q_state
== VM_PAGE_ON_THROTTLED_Q
) {
2217 * This page is now clean and
2218 * reclaimable. Move it out
2219 * of the throttled queue, so
2220 * that vm_pageout_scan() can
2223 dwp
->dw_mask
|= DW_move_page
;
2226 VM_COMPRESSOR_PAGER_STATE_CLR(object
, offset
);
2228 if ((flags
& DEACTIVATE_REUSABLE
) && !m
->vmp_reusable
) {
2229 assert(!(flags
& DEACTIVATE_ALL_REUSABLE
));
2230 assert(!object
->all_reusable
);
2231 m
->vmp_reusable
= TRUE
;
2232 object
->reusable_page_count
++;
2233 assert(object
->resident_page_count
>= object
->reusable_page_count
);
2236 * Tell pmap this page is now
2237 * "reusable" (to update pmap
2238 * stats for all mappings).
2240 pmap_options
|= PMAP_OPTIONS_SET_REUSABLE
;
2243 if (flags
& DEACTIVATE_CLEAR_REFMOD
) {
2245 * The caller didn't clear the refmod bits in advance.
2246 * Clear them for this page now.
2248 pmap_options
|= PMAP_OPTIONS_NOFLUSH
;
2249 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m
),
2255 if ((m
->vmp_q_state
!= VM_PAGE_ON_THROTTLED_Q
) &&
2256 !(flags
& (DEACTIVATE_REUSABLE
| DEACTIVATE_ALL_REUSABLE
))) {
2257 dwp
->dw_mask
|= DW_move_page
;
2261 VM_PAGE_ADD_DELAYED_WORK(dwp
, m
,
2265 if (dw_count
>= dw_limit
) {
2267 OSAddAtomic(reusable
,
2268 &vm_page_stats_reusable
.reusable_count
);
2269 vm_page_stats_reusable
.reusable
+= reusable
;
2272 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
2280 * The page at this offset isn't memory resident, check to see if it's
2281 * been paged out. If so, mark it as handled so we don't bother looking
2282 * for it in the shadow chain.
2285 if (page_is_paged_out(object
, offset
)) {
2286 MARK_PAGE_HANDLED(*chunk_state
, p
);
2289 * If we're killing a non-resident page, then clear the page in the existence
2290 * map so we don't bother paging it back in if it's touched again in the future.
2293 if ((flags
& DEACTIVATE_KILL
) && (object
->internal
)) {
2294 VM_COMPRESSOR_PAGER_STATE_CLR(object
, offset
);
2296 if (pmap
!= PMAP_NULL
) {
2298 * Tell pmap that this page
2299 * is no longer mapped, to
2300 * adjust the footprint ledger
2301 * because this page is no
2302 * longer compressed.
2304 pmap_remove_options(
2309 PMAP_OPTIONS_REMOVE
);
2317 OSAddAtomic(reusable
, &vm_page_stats_reusable
.reusable_count
);
2318 vm_page_stats_reusable
.reusable
+= reusable
;
2323 vm_page_do_delayed_work(object
, VM_KERN_MEMORY_NONE
, dwp_start
, dw_count
);
2328 if (dwp_start
&& dwp_finish_ctx
) {
2329 vm_page_delayed_work_finish_ctx(dwp_start
);
2330 dwp_start
= dwp
= NULL
;
2336 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk"
2337 * will always be less than or equal to the given size. The total range is divided up
2338 * into chunks for efficiency and performance related to the locks and handling the shadow
2339 * chain. This routine returns how much of the given "size" it actually processed. It's
2340 * up to the caler to loop and keep calling this routine until the entire range they want
2341 * to process has been done.
2342 * Iff clear_refmod is true, pmap_clear_refmod_options is called for each physical page in this range.
2345 static vm_object_size_t
2347 vm_object_t orig_object
,
2348 vm_object_offset_t offset
,
2349 vm_object_size_t size
,
2350 deactivate_flags_t flags
,
2351 pmap_flush_context
*pfc
,
2353 vm_map_offset_t pmap_offset
)
2356 vm_object_t tmp_object
;
2357 vm_object_size_t length
;
2358 chunk_state_t chunk_state
;
2362 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the
2363 * remaining size the caller asked for.
2366 length
= MIN(size
, CHUNK_SIZE
);
2369 * The chunk_state keeps track of which pages we've already processed if there's
2370 * a shadow chain on this object. At this point, we haven't done anything with this
2371 * range of pages yet, so initialize the state to indicate no pages processed yet.
2374 CHUNK_INIT(chunk_state
, length
);
2375 object
= orig_object
;
2378 * Start at the top level object and iterate around the loop once for each object
2379 * in the shadow chain. We stop processing early if we've already found all the pages
2380 * in the range. Otherwise we stop when we run out of shadow objects.
2383 while (object
&& CHUNK_NOT_COMPLETE(chunk_state
)) {
2384 vm_object_paging_begin(object
);
2386 deactivate_pages_in_object(object
, offset
, length
, flags
, &chunk_state
, pfc
, pmap
, pmap_offset
);
2388 vm_object_paging_end(object
);
2391 * We've finished with this object, see if there's a shadow object. If
2392 * there is, update the offset and lock the new object. We also turn off
2393 * kill_page at this point since we only kill pages in the top most object.
2396 tmp_object
= object
->shadow
;
2399 assert(!(flags
& DEACTIVATE_KILL
) || (flags
& DEACTIVATE_CLEAR_REFMOD
));
2400 flags
&= ~(DEACTIVATE_KILL
| DEACTIVATE_REUSABLE
| DEACTIVATE_ALL_REUSABLE
);
2401 offset
+= object
->vo_shadow_offset
;
2402 vm_object_lock(tmp_object
);
2405 if (object
!= orig_object
) {
2406 vm_object_unlock(object
);
2409 object
= tmp_object
;
2412 if (object
&& object
!= orig_object
) {
2413 vm_object_unlock(object
);
2422 * Move any resident pages in the specified range to the inactive queue. If kill_page is set,
2423 * we also clear the modified status of the page and "forget" any changes that have been made
2427 __private_extern__
void
2428 vm_object_deactivate_pages(
2430 vm_object_offset_t offset
,
2431 vm_object_size_t size
,
2432 boolean_t kill_page
,
2433 boolean_t reusable_page
,
2435 vm_map_offset_t pmap_offset
)
2437 vm_object_size_t length
;
2438 boolean_t all_reusable
;
2439 pmap_flush_context pmap_flush_context_storage
;
2440 unsigned int pmap_clear_refmod_mask
= VM_MEM_REFERENCED
;
2441 unsigned int pmap_clear_refmod_options
= 0;
2442 deactivate_flags_t flags
= DEACTIVATE_CLEAR_REFMOD
;
2443 bool refmod_cleared
= false;
2445 flags
|= DEACTIVATE_KILL
;
2447 if (reusable_page
) {
2448 flags
|= DEACTIVATE_REUSABLE
;
2452 * We break the range up into chunks and do one chunk at a time. This is for
2453 * efficiency and performance while handling the shadow chains and the locks.
2454 * The deactivate_a_chunk() function returns how much of the range it processed.
2455 * We keep calling this routine until the given size is exhausted.
2459 all_reusable
= FALSE
;
2462 * For the sake of accurate "reusable" pmap stats, we need
2463 * to tell pmap about each page that is no longer "reusable",
2464 * so we can't do the "all_reusable" optimization.
2466 * If we do go with the all_reusable optimization, we can't
2467 * return if size is 0 since we could have "all_reusable == TRUE"
2468 * In this case, we save the overhead of doing the pmap_flush_context
2475 if (reusable_page
&&
2477 object
->vo_size
!= 0 &&
2478 object
->vo_size
== size
&&
2479 object
->reusable_page_count
== 0) {
2480 all_reusable
= TRUE
;
2481 reusable_page
= FALSE
;
2482 flags
|= DEACTIVATE_ALL_REUSABLE
;
2486 if ((reusable_page
|| all_reusable
) && object
->all_reusable
) {
2487 /* This means MADV_FREE_REUSABLE has been called twice, which
2488 * is probably illegal. */
2493 pmap_flush_context_init(&pmap_flush_context_storage
);
2496 * If we're deactivating multiple pages, try to perform one bulk pmap operation.
2497 * We can't do this if we're killing pages and there's a shadow chain as
2498 * we don't yet know which pages are in the top object (pages in shadow copies aren't
2500 * And we can only do this on hardware that supports it.
2502 if (size
> PAGE_SIZE
&& (!kill_page
|| !object
->shadow
)) {
2503 if (kill_page
&& object
->internal
) {
2504 pmap_clear_refmod_mask
|= VM_MEM_MODIFIED
;
2506 if (reusable_page
) {
2507 pmap_clear_refmod_options
|= PMAP_OPTIONS_SET_REUSABLE
;
2510 refmod_cleared
= pmap_clear_refmod_range_options(pmap
, pmap_offset
, pmap_offset
+ size
, pmap_clear_refmod_mask
, pmap_clear_refmod_options
);
2511 if (refmod_cleared
) {
2512 // We were able to clear all the refmod bits. So deactivate_a_chunk doesn't need to do it.
2513 flags
&= ~DEACTIVATE_CLEAR_REFMOD
;
2518 length
= deactivate_a_chunk(object
, offset
, size
, flags
,
2519 &pmap_flush_context_storage
, pmap
, pmap_offset
);
2523 pmap_offset
+= length
;
2525 pmap_flush(&pmap_flush_context_storage
);
2528 if (!object
->all_reusable
) {
2529 unsigned int reusable
;
2531 object
->all_reusable
= TRUE
;
2532 assert(object
->reusable_page_count
== 0);
2533 /* update global stats */
2534 reusable
= object
->resident_page_count
;
2535 OSAddAtomic(reusable
,
2536 &vm_page_stats_reusable
.reusable_count
);
2537 vm_page_stats_reusable
.reusable
+= reusable
;
2538 vm_page_stats_reusable
.all_reusable_calls
++;
2540 } else if (reusable_page
) {
2541 vm_page_stats_reusable
.partial_reusable_calls
++;
2546 vm_object_reuse_pages(
2548 vm_object_offset_t start_offset
,
2549 vm_object_offset_t end_offset
,
2550 boolean_t allow_partial_reuse
)
2552 vm_object_offset_t cur_offset
;
2554 unsigned int reused
, reusable
;
2556 #define VM_OBJECT_REUSE_PAGE(object, m, reused) \
2558 if ((m) != VM_PAGE_NULL && \
2559 (m)->vmp_reusable) { \
2560 assert((object)->reusable_page_count <= \
2561 (object)->resident_page_count); \
2562 assert((object)->reusable_page_count > 0); \
2563 (object)->reusable_page_count--; \
2564 (m)->vmp_reusable = FALSE; \
2567 * Tell pmap that this page is no longer \
2568 * "reusable", to update the "reusable" stats \
2569 * for all the pmaps that have mapped this \
2572 pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE((m)), \
2574 (PMAP_OPTIONS_CLEAR_REUSABLE \
2575 | PMAP_OPTIONS_NOFLUSH), \
2583 vm_object_lock_assert_exclusive(object
);
2585 if (object
->all_reusable
) {
2586 panic("object %p all_reusable: can't update pmap stats\n",
2588 assert(object
->reusable_page_count
== 0);
2589 object
->all_reusable
= FALSE
;
2590 if (end_offset
- start_offset
== object
->vo_size
||
2591 !allow_partial_reuse
) {
2592 vm_page_stats_reusable
.all_reuse_calls
++;
2593 reused
= object
->resident_page_count
;
2595 vm_page_stats_reusable
.partial_reuse_calls
++;
2596 vm_page_queue_iterate(&object
->memq
, m
, vmp_listq
) {
2597 if (m
->vmp_offset
< start_offset
||
2598 m
->vmp_offset
>= end_offset
) {
2599 m
->vmp_reusable
= TRUE
;
2600 object
->reusable_page_count
++;
2601 assert(object
->resident_page_count
>= object
->reusable_page_count
);
2604 assert(!m
->vmp_reusable
);
2609 } else if (object
->resident_page_count
>
2610 ((end_offset
- start_offset
) >> PAGE_SHIFT
)) {
2611 vm_page_stats_reusable
.partial_reuse_calls
++;
2612 for (cur_offset
= start_offset
;
2613 cur_offset
< end_offset
;
2614 cur_offset
+= PAGE_SIZE_64
) {
2615 if (object
->reusable_page_count
== 0) {
2618 m
= vm_page_lookup(object
, cur_offset
);
2619 VM_OBJECT_REUSE_PAGE(object
, m
, reused
);
2622 vm_page_stats_reusable
.partial_reuse_calls
++;
2623 vm_page_queue_iterate(&object
->memq
, m
, vmp_listq
) {
2624 if (object
->reusable_page_count
== 0) {
2627 if (m
->vmp_offset
< start_offset
||
2628 m
->vmp_offset
>= end_offset
) {
2631 VM_OBJECT_REUSE_PAGE(object
, m
, reused
);
2635 /* update global stats */
2636 OSAddAtomic(reusable
- reused
, &vm_page_stats_reusable
.reusable_count
);
2637 vm_page_stats_reusable
.reused
+= reused
;
2638 vm_page_stats_reusable
.reusable
+= reusable
;
2642 * Routine: vm_object_pmap_protect
2645 * Reduces the permission for all physical
2646 * pages in the specified object range.
2648 * If removing write permission only, it is
2649 * sufficient to protect only the pages in
2650 * the top-level object; only those pages may
2651 * have write permission.
2653 * If removing all access, we must follow the
2654 * shadow chain from the top-level object to
2655 * remove access to all pages in shadowed objects.
2657 * The object must *not* be locked. The object must
2660 * If pmap is not NULL, this routine assumes that
2661 * the only mappings for the pages are in that
2665 __private_extern__
void
2666 vm_object_pmap_protect(
2668 vm_object_offset_t offset
,
2669 vm_object_size_t size
,
2671 vm_map_size_t pmap_page_size
,
2672 vm_map_offset_t pmap_start
,
2675 vm_object_pmap_protect_options(object
, offset
, size
, pmap
,
2677 pmap_start
, prot
, 0);
2680 __private_extern__
void
2681 vm_object_pmap_protect_options(
2683 vm_object_offset_t offset
,
2684 vm_object_size_t size
,
2686 vm_map_size_t pmap_page_size
,
2687 vm_map_offset_t pmap_start
,
2691 pmap_flush_context pmap_flush_context_storage
;
2692 boolean_t delayed_pmap_flush
= FALSE
;
2693 vm_object_offset_t offset_in_object
;
2694 vm_object_size_t size_in_object
;
2696 if (object
== VM_OBJECT_NULL
) {
2699 if (pmap_page_size
> PAGE_SIZE
) {
2700 /* for 16K map on 4K device... */
2701 pmap_page_size
= PAGE_SIZE
;
2704 * If we decide to work on the object itself, extend the range to
2705 * cover a full number of native pages.
2707 size_in_object
= vm_object_round_page(offset
+ size
) - vm_object_trunc_page(offset
);
2708 offset_in_object
= vm_object_trunc_page(offset
);
2710 * If we decide to work on the pmap, use the exact range specified,
2711 * so no rounding/truncating offset and size. They should already
2712 * be aligned to pmap_page_size.
2714 assertf(!(offset
& (pmap_page_size
- 1)) && !(size
& (pmap_page_size
- 1)),
2715 "offset 0x%llx size 0x%llx pmap_page_size 0x%llx",
2716 offset
, size
, (uint64_t)pmap_page_size
);
2718 vm_object_lock(object
);
2720 if (object
->phys_contiguous
) {
2722 vm_object_unlock(object
);
2723 pmap_protect_options(pmap
,
2727 options
& ~PMAP_OPTIONS_NOFLUSH
,
2730 vm_object_offset_t phys_start
, phys_end
, phys_addr
;
2732 phys_start
= object
->vo_shadow_offset
+ offset_in_object
;
2733 phys_end
= phys_start
+ size_in_object
;
2734 assert(phys_start
<= phys_end
);
2735 assert(phys_end
<= object
->vo_shadow_offset
+ object
->vo_size
);
2736 vm_object_unlock(object
);
2738 pmap_flush_context_init(&pmap_flush_context_storage
);
2739 delayed_pmap_flush
= FALSE
;
2741 for (phys_addr
= phys_start
;
2742 phys_addr
< phys_end
;
2743 phys_addr
+= PAGE_SIZE_64
) {
2744 pmap_page_protect_options(
2745 (ppnum_t
) (phys_addr
>> PAGE_SHIFT
),
2747 options
| PMAP_OPTIONS_NOFLUSH
,
2748 (void *)&pmap_flush_context_storage
);
2749 delayed_pmap_flush
= TRUE
;
2751 if (delayed_pmap_flush
== TRUE
) {
2752 pmap_flush(&pmap_flush_context_storage
);
2758 assert(object
->internal
);
2761 if (ptoa_64(object
->resident_page_count
) > size_in_object
/ 2 && pmap
!= PMAP_NULL
) {
2762 vm_object_unlock(object
);
2763 if (pmap_page_size
< PAGE_SIZE
) {
2764 DEBUG4K_PMAP("pmap %p start 0x%llx end 0x%llx prot 0x%x: pmap_protect()\n", pmap
, (uint64_t)pmap_start
, pmap_start
+ size
, prot
);
2766 pmap_protect_options(pmap
, pmap_start
, pmap_start
+ size
, prot
,
2767 options
& ~PMAP_OPTIONS_NOFLUSH
, NULL
);
2771 if (pmap_page_size
< PAGE_SIZE
) {
2772 DEBUG4K_PMAP("pmap %p start 0x%llx end 0x%llx prot 0x%x: offset 0x%llx size 0x%llx object %p offset 0x%llx size 0x%llx\n", pmap
, (uint64_t)pmap_start
, pmap_start
+ size
, prot
, offset
, size
, object
, offset_in_object
, size_in_object
);
2775 pmap_flush_context_init(&pmap_flush_context_storage
);
2776 delayed_pmap_flush
= FALSE
;
2779 * if we are doing large ranges with respect to resident
2780 * page count then we should interate over pages otherwise
2781 * inverse page look-up will be faster
2783 if (ptoa_64(object
->resident_page_count
/ 4) < size_in_object
) {
2785 vm_object_offset_t end
;
2787 end
= offset_in_object
+ size_in_object
;
2789 vm_page_queue_iterate(&object
->memq
, p
, vmp_listq
) {
2790 if (!p
->vmp_fictitious
&& (offset_in_object
<= p
->vmp_offset
) && (p
->vmp_offset
< end
)) {
2791 vm_map_offset_t start
;
2794 * XXX FBDP 4K: intentionally using "offset" here instead
2795 * of "offset_in_object", since "start" is a pmap address.
2797 start
= pmap_start
+ p
->vmp_offset
- offset
;
2799 if (pmap
!= PMAP_NULL
) {
2800 vm_map_offset_t curr
;
2802 curr
< start
+ PAGE_SIZE_64
;
2803 curr
+= pmap_page_size
) {
2804 if (curr
< pmap_start
) {
2807 if (curr
>= pmap_start
+ size
) {
2810 pmap_protect_options(
2813 curr
+ pmap_page_size
,
2815 options
| PMAP_OPTIONS_NOFLUSH
,
2816 &pmap_flush_context_storage
);
2819 pmap_page_protect_options(
2820 VM_PAGE_GET_PHYS_PAGE(p
),
2822 options
| PMAP_OPTIONS_NOFLUSH
,
2823 &pmap_flush_context_storage
);
2825 delayed_pmap_flush
= TRUE
;
2830 vm_object_offset_t end
;
2831 vm_object_offset_t target_off
;
2833 end
= offset_in_object
+ size_in_object
;
2835 for (target_off
= offset_in_object
;
2836 target_off
< end
; target_off
+= PAGE_SIZE
) {
2837 p
= vm_page_lookup(object
, target_off
);
2839 if (p
!= VM_PAGE_NULL
) {
2840 vm_object_offset_t start
;
2843 * XXX FBDP 4K: intentionally using "offset" here instead
2844 * of "offset_in_object", since "start" is a pmap address.
2846 start
= pmap_start
+ (p
->vmp_offset
- offset
);
2848 if (pmap
!= PMAP_NULL
) {
2849 vm_map_offset_t curr
;
2851 curr
< start
+ PAGE_SIZE
;
2852 curr
+= pmap_page_size
) {
2853 if (curr
< pmap_start
) {
2856 if (curr
>= pmap_start
+ size
) {
2859 pmap_protect_options(
2862 curr
+ pmap_page_size
,
2864 options
| PMAP_OPTIONS_NOFLUSH
,
2865 &pmap_flush_context_storage
);
2868 pmap_page_protect_options(
2869 VM_PAGE_GET_PHYS_PAGE(p
),
2871 options
| PMAP_OPTIONS_NOFLUSH
,
2872 &pmap_flush_context_storage
);
2874 delayed_pmap_flush
= TRUE
;
2878 if (delayed_pmap_flush
== TRUE
) {
2879 pmap_flush(&pmap_flush_context_storage
);
2882 if (prot
== VM_PROT_NONE
) {
2884 * Must follow shadow chain to remove access
2885 * to pages in shadowed objects.
2887 vm_object_t next_object
;
2889 next_object
= object
->shadow
;
2890 if (next_object
!= VM_OBJECT_NULL
) {
2891 offset_in_object
+= object
->vo_shadow_offset
;
2892 offset
+= object
->vo_shadow_offset
;
2893 vm_object_lock(next_object
);
2894 vm_object_unlock(object
);
2895 object
= next_object
;
2898 * End of chain - we are done.
2904 * Pages in shadowed objects may never have
2905 * write permission - we may stop here.
2911 vm_object_unlock(object
);
2914 uint32_t vm_page_busy_absent_skipped
= 0;
2917 * Routine: vm_object_copy_slowly
2920 * Copy the specified range of the source
2921 * virtual memory object without using
2922 * protection-based optimizations (such
2923 * as copy-on-write). The pages in the
2924 * region are actually copied.
2926 * In/out conditions:
2927 * The caller must hold a reference and a lock
2928 * for the source virtual memory object. The source
2929 * object will be returned *unlocked*.
2932 * If the copy is completed successfully, KERN_SUCCESS is
2933 * returned. If the caller asserted the interruptible
2934 * argument, and an interruption occurred while waiting
2935 * for a user-generated event, MACH_SEND_INTERRUPTED is
2936 * returned. Other values may be returned to indicate
2937 * hard errors during the copy operation.
2939 * A new virtual memory object is returned in a
2940 * parameter (_result_object). The contents of this
2941 * new object, starting at a zero offset, are a copy
2942 * of the source memory region. In the event of
2943 * an error, this parameter will contain the value
2946 __private_extern__ kern_return_t
2947 vm_object_copy_slowly(
2948 vm_object_t src_object
,
2949 vm_object_offset_t src_offset
,
2950 vm_object_size_t size
,
2951 boolean_t interruptible
,
2952 vm_object_t
*_result_object
) /* OUT */
2954 vm_object_t new_object
;
2955 vm_object_offset_t new_offset
;
2957 struct vm_object_fault_info fault_info
= {};
2960 vm_object_unlock(src_object
);
2961 *_result_object
= VM_OBJECT_NULL
;
2962 return KERN_INVALID_ARGUMENT
;
2966 * Prevent destruction of the source object while we copy.
2969 vm_object_reference_locked(src_object
);
2970 vm_object_unlock(src_object
);
2973 * Create a new object to hold the copied pages.
2975 * We fill the new object starting at offset 0,
2976 * regardless of the input offset.
2977 * We don't bother to lock the new object within
2978 * this routine, since we have the only reference.
2981 size
= vm_object_round_page(src_offset
+ size
) - vm_object_trunc_page(src_offset
);
2982 src_offset
= vm_object_trunc_page(src_offset
);
2983 new_object
= vm_object_allocate(size
);
2986 assert(size
== trunc_page_64(size
)); /* Will the loop terminate? */
2988 fault_info
.interruptible
= interruptible
;
2989 fault_info
.behavior
= VM_BEHAVIOR_SEQUENTIAL
;
2990 fault_info
.lo_offset
= src_offset
;
2991 fault_info
.hi_offset
= src_offset
+ size
;
2992 fault_info
.stealth
= TRUE
;
2996 src_offset
+= PAGE_SIZE_64
,
2997 new_offset
+= PAGE_SIZE_64
, size
-= PAGE_SIZE_64
3000 vm_fault_return_t result
;
3002 vm_object_lock(new_object
);
3004 while ((new_page
= vm_page_alloc(new_object
, new_offset
))
3006 vm_object_unlock(new_object
);
3008 if (!vm_page_wait(interruptible
)) {
3009 vm_object_deallocate(new_object
);
3010 vm_object_deallocate(src_object
);
3011 *_result_object
= VM_OBJECT_NULL
;
3012 return MACH_SEND_INTERRUPTED
;
3014 vm_object_lock(new_object
);
3016 vm_object_unlock(new_object
);
3019 vm_prot_t prot
= VM_PROT_READ
;
3020 vm_page_t _result_page
;
3022 vm_page_t result_page
;
3023 kern_return_t error_code
;
3024 vm_object_t result_page_object
;
3027 vm_object_lock(src_object
);
3029 if (src_object
->internal
&&
3030 src_object
->shadow
== VM_OBJECT_NULL
&&
3031 (src_object
->pager
== NULL
||
3032 (VM_COMPRESSOR_PAGER_STATE_GET(src_object
,
3034 VM_EXTERNAL_STATE_ABSENT
))) {
3035 boolean_t can_skip_page
;
3037 _result_page
= vm_page_lookup(src_object
,
3039 if (_result_page
== VM_PAGE_NULL
) {
3041 * This page is neither resident nor
3042 * compressed and there's no shadow
3043 * object below "src_object", so this
3044 * page is really missing.
3045 * There's no need to zero-fill it just
3046 * to copy it: let's leave it missing
3047 * in "new_object" and get zero-filled
3050 can_skip_page
= TRUE
;
3051 } else if (workaround_41447923
&&
3052 src_object
->pager
== NULL
&&
3053 _result_page
!= VM_PAGE_NULL
&&
3054 _result_page
->vmp_busy
&&
3055 _result_page
->vmp_absent
&&
3056 src_object
->purgable
== VM_PURGABLE_DENY
&&
3057 !src_object
->blocked_access
) {
3059 * This page is "busy" and "absent"
3060 * but not because we're waiting for
3061 * it to be decompressed. It must
3062 * be because it's a "no zero fill"
3063 * page that is currently not
3064 * accessible until it gets overwritten
3065 * by a device driver.
3066 * Since its initial state would have
3067 * been "zero-filled", let's leave the
3068 * copy page missing and get zero-filled
3071 assert(src_object
->internal
);
3072 assert(src_object
->shadow
== NULL
);
3073 assert(src_object
->pager
== NULL
);
3074 can_skip_page
= TRUE
;
3075 vm_page_busy_absent_skipped
++;
3077 can_skip_page
= FALSE
;
3079 if (can_skip_page
) {
3080 vm_object_unlock(src_object
);
3081 /* free the unused "new_page"... */
3082 vm_object_lock(new_object
);
3083 VM_PAGE_FREE(new_page
);
3084 new_page
= VM_PAGE_NULL
;
3085 vm_object_unlock(new_object
);
3086 /* ...and go to next page in "src_object" */
3087 result
= VM_FAULT_SUCCESS
;
3092 vm_object_paging_begin(src_object
);
3094 /* cap size at maximum UPL size */
3095 upl_size_t cluster_size
;
3096 if (os_convert_overflow(size
, &cluster_size
)) {
3097 cluster_size
= 0 - (upl_size_t
)PAGE_SIZE
;
3099 fault_info
.cluster_size
= cluster_size
;
3101 _result_page
= VM_PAGE_NULL
;
3102 result
= vm_fault_page(src_object
, src_offset
,
3103 VM_PROT_READ
, FALSE
,
3104 FALSE
, /* page not looked up */
3105 &prot
, &_result_page
, &top_page
,
3107 &error_code
, FALSE
, FALSE
, &fault_info
);
3110 case VM_FAULT_SUCCESS
:
3111 result_page
= _result_page
;
3112 result_page_object
= VM_PAGE_OBJECT(result_page
);
3115 * Copy the page to the new object.
3118 * If result_page is clean,
3119 * we could steal it instead
3123 vm_page_copy(result_page
, new_page
);
3124 vm_object_unlock(result_page_object
);
3127 * Let go of both pages (make them
3128 * not busy, perform wakeup, activate).
3130 vm_object_lock(new_object
);
3131 SET_PAGE_DIRTY(new_page
, FALSE
);
3132 PAGE_WAKEUP_DONE(new_page
);
3133 vm_object_unlock(new_object
);
3135 vm_object_lock(result_page_object
);
3136 PAGE_WAKEUP_DONE(result_page
);
3138 vm_page_lockspin_queues();
3139 if ((result_page
->vmp_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) ||
3140 (result_page
->vmp_q_state
== VM_PAGE_NOT_ON_Q
)) {
3141 vm_page_activate(result_page
);
3143 vm_page_activate(new_page
);
3144 vm_page_unlock_queues();
3147 * Release paging references and
3148 * top-level placeholder page, if any.
3151 vm_fault_cleanup(result_page_object
,
3156 case VM_FAULT_RETRY
:
3159 case VM_FAULT_MEMORY_SHORTAGE
:
3160 if (vm_page_wait(interruptible
)) {
3165 case VM_FAULT_INTERRUPTED
:
3166 vm_object_lock(new_object
);
3167 VM_PAGE_FREE(new_page
);
3168 vm_object_unlock(new_object
);
3170 vm_object_deallocate(new_object
);
3171 vm_object_deallocate(src_object
);
3172 *_result_object
= VM_OBJECT_NULL
;
3173 return MACH_SEND_INTERRUPTED
;
3175 case VM_FAULT_SUCCESS_NO_VM_PAGE
:
3176 /* success but no VM page: fail */
3177 vm_object_paging_end(src_object
);
3178 vm_object_unlock(src_object
);
3180 case VM_FAULT_MEMORY_ERROR
:
3183 * (a) ignore pages that we can't
3185 * (b) return the null object if
3186 * any page fails [chosen]
3189 vm_object_lock(new_object
);
3190 VM_PAGE_FREE(new_page
);
3191 vm_object_unlock(new_object
);
3193 vm_object_deallocate(new_object
);
3194 vm_object_deallocate(src_object
);
3195 *_result_object
= VM_OBJECT_NULL
;
3196 return error_code
? error_code
:
3200 panic("vm_object_copy_slowly: unexpected error"
3201 " 0x%x from vm_fault_page()\n", result
);
3203 } while (result
!= VM_FAULT_SUCCESS
);
3207 * Lose the extra reference, and return our object.
3209 vm_object_deallocate(src_object
);
3210 *_result_object
= new_object
;
3211 return KERN_SUCCESS
;
3215 * Routine: vm_object_copy_quickly
3218 * Copy the specified range of the source virtual
3219 * memory object, if it can be done without waiting
3220 * for user-generated events.
3223 * If the copy is successful, the copy is returned in
3224 * the arguments; otherwise, the arguments are not
3227 * In/out conditions:
3228 * The object should be unlocked on entry and exit.
3232 __private_extern__ boolean_t
3233 vm_object_copy_quickly(
3234 vm_object_t
*_object
, /* INOUT */
3235 __unused vm_object_offset_t offset
, /* IN */
3236 __unused vm_object_size_t size
, /* IN */
3237 boolean_t
*_src_needs_copy
, /* OUT */
3238 boolean_t
*_dst_needs_copy
) /* OUT */
3240 vm_object_t object
= *_object
;
3241 memory_object_copy_strategy_t copy_strategy
;
3243 if (object
== VM_OBJECT_NULL
) {
3244 *_src_needs_copy
= FALSE
;
3245 *_dst_needs_copy
= FALSE
;
3249 vm_object_lock(object
);
3251 copy_strategy
= object
->copy_strategy
;
3253 switch (copy_strategy
) {
3254 case MEMORY_OBJECT_COPY_SYMMETRIC
:
3257 * Symmetric copy strategy.
3258 * Make another reference to the object.
3259 * Leave object/offset unchanged.
3262 vm_object_reference_locked(object
);
3263 object
->shadowed
= TRUE
;
3264 vm_object_unlock(object
);
3267 * Both source and destination must make
3268 * shadows, and the source must be made
3269 * read-only if not already.
3272 *_src_needs_copy
= TRUE
;
3273 *_dst_needs_copy
= TRUE
;
3277 case MEMORY_OBJECT_COPY_DELAY
:
3278 vm_object_unlock(object
);
3282 vm_object_unlock(object
);
3288 static int copy_call_count
= 0;
3289 static int copy_call_sleep_count
= 0;
3290 static int copy_call_restart_count
= 0;
3293 * Routine: vm_object_copy_call [internal]
3296 * Copy the source object (src_object), using the
3297 * user-managed copy algorithm.
3299 * In/out conditions:
3300 * The source object must be locked on entry. It
3301 * will be *unlocked* on exit.
3304 * If the copy is successful, KERN_SUCCESS is returned.
3305 * A new object that represents the copied virtual
3306 * memory is returned in a parameter (*_result_object).
3307 * If the return value indicates an error, this parameter
3310 static kern_return_t
3311 vm_object_copy_call(
3312 vm_object_t src_object
,
3313 vm_object_offset_t src_offset
,
3314 vm_object_size_t size
,
3315 vm_object_t
*_result_object
) /* OUT */
3319 boolean_t check_ready
= FALSE
;
3320 uint32_t try_failed_count
= 0;
3323 * If a copy is already in progress, wait and retry.
3326 * Consider making this call interruptable, as Mike
3327 * intended it to be.
3330 * Need a counter or version or something to allow
3331 * us to use the copy that the currently requesting
3332 * thread is obtaining -- is it worth adding to the
3333 * vm object structure? Depends how common this case it.
3336 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
3337 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
3339 copy_call_restart_count
++;
3343 * Indicate (for the benefit of memory_object_create_copy)
3344 * that we want a copy for src_object. (Note that we cannot
3345 * do a real assert_wait before calling memory_object_copy,
3346 * so we simply set the flag.)
3349 vm_object_set_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
);
3350 vm_object_unlock(src_object
);
3353 * Ask the memory manager to give us a memory object
3354 * which represents a copy of the src object.
3355 * The memory manager may give us a memory object
3356 * which we already have, or it may give us a
3357 * new memory object. This memory object will arrive
3358 * via memory_object_create_copy.
3361 kr
= KERN_FAILURE
; /* XXX need to change memory_object.defs */
3362 if (kr
!= KERN_SUCCESS
) {
3367 * Wait for the copy to arrive.
3369 vm_object_lock(src_object
);
3370 while (vm_object_wanted(src_object
, VM_OBJECT_EVENT_COPY_CALL
)) {
3371 vm_object_sleep(src_object
, VM_OBJECT_EVENT_COPY_CALL
,
3373 copy_call_sleep_count
++;
3376 assert(src_object
->copy
!= VM_OBJECT_NULL
);
3377 copy
= src_object
->copy
;
3378 if (!vm_object_lock_try(copy
)) {
3379 vm_object_unlock(src_object
);
3382 mutex_pause(try_failed_count
); /* wait a bit */
3384 vm_object_lock(src_object
);
3387 if (copy
->vo_size
< src_offset
+ size
) {
3388 assertf(page_aligned(src_offset
+ size
),
3389 "object %p size 0x%llx",
3390 copy
, (uint64_t)(src_offset
+ size
));
3391 copy
->vo_size
= src_offset
+ size
;
3394 if (!copy
->pager_ready
) {
3401 *_result_object
= copy
;
3402 vm_object_unlock(copy
);
3403 vm_object_unlock(src_object
);
3405 /* Wait for the copy to be ready. */
3406 if (check_ready
== TRUE
) {
3407 vm_object_lock(copy
);
3408 while (!copy
->pager_ready
) {
3409 vm_object_sleep(copy
, VM_OBJECT_EVENT_PAGER_READY
, THREAD_UNINT
);
3411 vm_object_unlock(copy
);
3414 return KERN_SUCCESS
;
3417 static int copy_delayed_lock_collisions
= 0;
3418 static int copy_delayed_max_collisions
= 0;
3419 static int copy_delayed_lock_contention
= 0;
3420 static int copy_delayed_protect_iterate
= 0;
3423 * Routine: vm_object_copy_delayed [internal]
3426 * Copy the specified virtual memory object, using
3427 * the asymmetric copy-on-write algorithm.
3429 * In/out conditions:
3430 * The src_object must be locked on entry. It will be unlocked
3431 * on exit - so the caller must also hold a reference to it.
3433 * This routine will not block waiting for user-generated
3434 * events. It is not interruptible.
3436 __private_extern__ vm_object_t
3437 vm_object_copy_delayed(
3438 vm_object_t src_object
,
3439 vm_object_offset_t src_offset
,
3440 vm_object_size_t size
,
3441 boolean_t src_object_shared
)
3443 vm_object_t new_copy
= VM_OBJECT_NULL
;
3444 vm_object_t old_copy
;
3446 vm_object_size_t copy_size
= src_offset
+ size
;
3447 pmap_flush_context pmap_flush_context_storage
;
3448 boolean_t delayed_pmap_flush
= FALSE
;
3453 * The user-level memory manager wants to see all of the changes
3454 * to this object, but it has promised not to make any changes on
3457 * Perform an asymmetric copy-on-write, as follows:
3458 * Create a new object, called a "copy object" to hold
3459 * pages modified by the new mapping (i.e., the copy,
3460 * not the original mapping).
3461 * Record the original object as the backing object for
3462 * the copy object. If the original mapping does not
3463 * change a page, it may be used read-only by the copy.
3464 * Record the copy object in the original object.
3465 * When the original mapping causes a page to be modified,
3466 * it must be copied to a new page that is "pushed" to
3468 * Mark the new mapping (the copy object) copy-on-write.
3469 * This makes the copy object itself read-only, allowing
3470 * it to be reused if the original mapping makes no
3471 * changes, and simplifying the synchronization required
3472 * in the "push" operation described above.
3474 * The copy-on-write is said to be assymetric because the original
3475 * object is *not* marked copy-on-write. A copied page is pushed
3476 * to the copy object, regardless which party attempted to modify
3479 * Repeated asymmetric copy operations may be done. If the
3480 * original object has not been changed since the last copy, its
3481 * copy object can be reused. Otherwise, a new copy object can be
3482 * inserted between the original object and its previous copy
3483 * object. Since any copy object is read-only, this cannot affect
3484 * affect the contents of the previous copy object.
3486 * Note that a copy object is higher in the object tree than the
3487 * original object; therefore, use of the copy object recorded in
3488 * the original object must be done carefully, to avoid deadlock.
3491 copy_size
= vm_object_round_page(copy_size
);
3495 * Wait for paging in progress.
3497 if (!src_object
->true_share
&&
3498 (src_object
->paging_in_progress
!= 0 ||
3499 src_object
->activity_in_progress
!= 0)) {
3500 if (src_object_shared
== TRUE
) {
3501 vm_object_unlock(src_object
);
3502 vm_object_lock(src_object
);
3503 src_object_shared
= FALSE
;
3506 vm_object_paging_wait(src_object
, THREAD_UNINT
);
3509 * See whether we can reuse the result of a previous
3513 old_copy
= src_object
->copy
;
3514 if (old_copy
!= VM_OBJECT_NULL
) {
3518 * Try to get the locks (out of order)
3520 if (src_object_shared
== TRUE
) {
3521 lock_granted
= vm_object_lock_try_shared(old_copy
);
3523 lock_granted
= vm_object_lock_try(old_copy
);
3526 if (!lock_granted
) {
3527 vm_object_unlock(src_object
);
3529 if (collisions
++ == 0) {
3530 copy_delayed_lock_contention
++;
3532 mutex_pause(collisions
);
3534 /* Heisenberg Rules */
3535 copy_delayed_lock_collisions
++;
3537 if (collisions
> copy_delayed_max_collisions
) {
3538 copy_delayed_max_collisions
= collisions
;
3541 if (src_object_shared
== TRUE
) {
3542 vm_object_lock_shared(src_object
);
3544 vm_object_lock(src_object
);
3551 * Determine whether the old copy object has
3555 if (old_copy
->resident_page_count
== 0 &&
3556 !old_copy
->pager_created
) {
3558 * It has not been modified.
3560 * Return another reference to
3561 * the existing copy-object if
3562 * we can safely grow it (if
3566 if (old_copy
->vo_size
< copy_size
) {
3567 if (src_object_shared
== TRUE
) {
3568 vm_object_unlock(old_copy
);
3569 vm_object_unlock(src_object
);
3571 vm_object_lock(src_object
);
3572 src_object_shared
= FALSE
;
3576 * We can't perform a delayed copy if any of the
3577 * pages in the extended range are wired (because
3578 * we can't safely take write permission away from
3579 * wired pages). If the pages aren't wired, then
3580 * go ahead and protect them.
3582 copy_delayed_protect_iterate
++;
3584 pmap_flush_context_init(&pmap_flush_context_storage
);
3585 delayed_pmap_flush
= FALSE
;
3587 vm_page_queue_iterate(&src_object
->memq
, p
, vmp_listq
) {
3588 if (!p
->vmp_fictitious
&&
3589 p
->vmp_offset
>= old_copy
->vo_size
&&
3590 p
->vmp_offset
< copy_size
) {
3591 if (VM_PAGE_WIRED(p
)) {
3592 vm_object_unlock(old_copy
);
3593 vm_object_unlock(src_object
);
3595 if (new_copy
!= VM_OBJECT_NULL
) {
3596 vm_object_unlock(new_copy
);
3597 vm_object_deallocate(new_copy
);
3599 if (delayed_pmap_flush
== TRUE
) {
3600 pmap_flush(&pmap_flush_context_storage
);
3603 return VM_OBJECT_NULL
;
3605 pmap_page_protect_options(VM_PAGE_GET_PHYS_PAGE(p
), (VM_PROT_ALL
& ~VM_PROT_WRITE
),
3606 PMAP_OPTIONS_NOFLUSH
, (void *)&pmap_flush_context_storage
);
3607 delayed_pmap_flush
= TRUE
;
3611 if (delayed_pmap_flush
== TRUE
) {
3612 pmap_flush(&pmap_flush_context_storage
);
3615 assertf(page_aligned(copy_size
),
3616 "object %p size 0x%llx",
3617 old_copy
, (uint64_t)copy_size
);
3618 old_copy
->vo_size
= copy_size
;
3620 if (src_object_shared
== TRUE
) {
3621 vm_object_reference_shared(old_copy
);
3623 vm_object_reference_locked(old_copy
);
3625 vm_object_unlock(old_copy
);
3626 vm_object_unlock(src_object
);
3628 if (new_copy
!= VM_OBJECT_NULL
) {
3629 vm_object_unlock(new_copy
);
3630 vm_object_deallocate(new_copy
);
3638 * Adjust the size argument so that the newly-created
3639 * copy object will be large enough to back either the
3640 * old copy object or the new mapping.
3642 if (old_copy
->vo_size
> copy_size
) {
3643 copy_size
= old_copy
->vo_size
;
3646 if (new_copy
== VM_OBJECT_NULL
) {
3647 vm_object_unlock(old_copy
);
3648 vm_object_unlock(src_object
);
3649 new_copy
= vm_object_allocate(copy_size
);
3650 vm_object_lock(src_object
);
3651 vm_object_lock(new_copy
);
3653 src_object_shared
= FALSE
;
3656 assertf(page_aligned(copy_size
),
3657 "object %p size 0x%llx",
3658 new_copy
, (uint64_t)copy_size
);
3659 new_copy
->vo_size
= copy_size
;
3662 * The copy-object is always made large enough to
3663 * completely shadow the original object, since
3664 * it may have several users who want to shadow
3665 * the original object at different points.
3668 assert((old_copy
->shadow
== src_object
) &&
3669 (old_copy
->vo_shadow_offset
== (vm_object_offset_t
) 0));
3670 } else if (new_copy
== VM_OBJECT_NULL
) {
3671 vm_object_unlock(src_object
);
3672 new_copy
= vm_object_allocate(copy_size
);
3673 vm_object_lock(src_object
);
3674 vm_object_lock(new_copy
);
3676 src_object_shared
= FALSE
;
3681 * We now have the src object locked, and the new copy object
3682 * allocated and locked (and potentially the old copy locked).
3683 * Before we go any further, make sure we can still perform
3684 * a delayed copy, as the situation may have changed.
3686 * Specifically, we can't perform a delayed copy if any of the
3687 * pages in the range are wired (because we can't safely take
3688 * write permission away from wired pages). If the pages aren't
3689 * wired, then go ahead and protect them.
3691 copy_delayed_protect_iterate
++;
3693 pmap_flush_context_init(&pmap_flush_context_storage
);
3694 delayed_pmap_flush
= FALSE
;
3696 vm_page_queue_iterate(&src_object
->memq
, p
, vmp_listq
) {
3697 if (!p
->vmp_fictitious
&& p
->vmp_offset
< copy_size
) {
3698 if (VM_PAGE_WIRED(p
)) {
3700 vm_object_unlock(old_copy
);
3702 vm_object_unlock(src_object
);
3703 vm_object_unlock(new_copy
);
3704 vm_object_deallocate(new_copy
);
3706 if (delayed_pmap_flush
== TRUE
) {
3707 pmap_flush(&pmap_flush_context_storage
);
3710 return VM_OBJECT_NULL
;
3712 pmap_page_protect_options(VM_PAGE_GET_PHYS_PAGE(p
), (VM_PROT_ALL
& ~VM_PROT_WRITE
),
3713 PMAP_OPTIONS_NOFLUSH
, (void *)&pmap_flush_context_storage
);
3714 delayed_pmap_flush
= TRUE
;
3718 if (delayed_pmap_flush
== TRUE
) {
3719 pmap_flush(&pmap_flush_context_storage
);
3722 if (old_copy
!= VM_OBJECT_NULL
) {
3724 * Make the old copy-object shadow the new one.
3725 * It will receive no more pages from the original
3729 /* remove ref. from old_copy */
3730 vm_object_lock_assert_exclusive(src_object
);
3731 src_object
->ref_count
--;
3732 assert(src_object
->ref_count
> 0);
3733 vm_object_lock_assert_exclusive(old_copy
);
3734 old_copy
->shadow
= new_copy
;
3735 vm_object_lock_assert_exclusive(new_copy
);
3736 assert(new_copy
->ref_count
> 0);
3737 new_copy
->ref_count
++; /* for old_copy->shadow ref. */
3740 if (old_copy
->res_count
) {
3741 VM_OBJ_RES_INCR(new_copy
);
3742 VM_OBJ_RES_DECR(src_object
);
3746 vm_object_unlock(old_copy
); /* done with old_copy */
3750 * Point the new copy at the existing object.
3752 vm_object_lock_assert_exclusive(new_copy
);
3753 new_copy
->shadow
= src_object
;
3754 new_copy
->vo_shadow_offset
= 0;
3755 new_copy
->shadowed
= TRUE
; /* caller must set needs_copy */
3757 vm_object_lock_assert_exclusive(src_object
);
3758 vm_object_reference_locked(src_object
);
3759 src_object
->copy
= new_copy
;
3760 vm_object_unlock(src_object
);
3761 vm_object_unlock(new_copy
);
3767 * Routine: vm_object_copy_strategically
3770 * Perform a copy according to the source object's
3771 * declared strategy. This operation may block,
3772 * and may be interrupted.
3774 __private_extern__ kern_return_t
3775 vm_object_copy_strategically(
3776 vm_object_t src_object
,
3777 vm_object_offset_t src_offset
,
3778 vm_object_size_t size
,
3779 vm_object_t
*dst_object
, /* OUT */
3780 vm_object_offset_t
*dst_offset
, /* OUT */
3781 boolean_t
*dst_needs_copy
) /* OUT */
3784 boolean_t interruptible
= THREAD_ABORTSAFE
; /* XXX */
3785 boolean_t object_lock_shared
= FALSE
;
3786 memory_object_copy_strategy_t copy_strategy
;
3788 assert(src_object
!= VM_OBJECT_NULL
);
3790 copy_strategy
= src_object
->copy_strategy
;
3792 if (copy_strategy
== MEMORY_OBJECT_COPY_DELAY
) {
3793 vm_object_lock_shared(src_object
);
3794 object_lock_shared
= TRUE
;
3796 vm_object_lock(src_object
);
3800 * The copy strategy is only valid if the memory manager
3801 * is "ready". Internal objects are always ready.
3804 while (!src_object
->internal
&& !src_object
->pager_ready
) {
3805 wait_result_t wait_result
;
3807 if (object_lock_shared
== TRUE
) {
3808 vm_object_unlock(src_object
);
3809 vm_object_lock(src_object
);
3810 object_lock_shared
= FALSE
;
3813 wait_result
= vm_object_sleep( src_object
,
3814 VM_OBJECT_EVENT_PAGER_READY
,
3816 if (wait_result
!= THREAD_AWAKENED
) {
3817 vm_object_unlock(src_object
);
3818 *dst_object
= VM_OBJECT_NULL
;
3820 *dst_needs_copy
= FALSE
;
3821 return MACH_SEND_INTERRUPTED
;
3826 * Use the appropriate copy strategy.
3829 switch (copy_strategy
) {
3830 case MEMORY_OBJECT_COPY_DELAY
:
3831 *dst_object
= vm_object_copy_delayed(src_object
,
3832 src_offset
, size
, object_lock_shared
);
3833 if (*dst_object
!= VM_OBJECT_NULL
) {
3834 *dst_offset
= src_offset
;
3835 *dst_needs_copy
= TRUE
;
3836 result
= KERN_SUCCESS
;
3839 vm_object_lock(src_object
);
3840 OS_FALLTHROUGH
; /* fall thru when delayed copy not allowed */
3842 case MEMORY_OBJECT_COPY_NONE
:
3843 result
= vm_object_copy_slowly(src_object
, src_offset
, size
,
3844 interruptible
, dst_object
);
3845 if (result
== KERN_SUCCESS
) {
3846 *dst_offset
= src_offset
- vm_object_trunc_page(src_offset
);
3847 *dst_needs_copy
= FALSE
;
3851 case MEMORY_OBJECT_COPY_CALL
:
3852 result
= vm_object_copy_call(src_object
, src_offset
, size
,
3854 if (result
== KERN_SUCCESS
) {
3855 *dst_offset
= src_offset
;
3856 *dst_needs_copy
= TRUE
;
3860 case MEMORY_OBJECT_COPY_SYMMETRIC
:
3861 vm_object_unlock(src_object
);
3862 result
= KERN_MEMORY_RESTART_COPY
;
3866 panic("copy_strategically: bad strategy");
3867 result
= KERN_INVALID_ARGUMENT
;
3875 * Create a new object which is backed by the
3876 * specified existing object range. The source
3877 * object reference is deallocated.
3879 * The new object and offset into that object
3880 * are returned in the source parameters.
3882 boolean_t vm_object_shadow_check
= TRUE
;
3884 __private_extern__ boolean_t
3886 vm_object_t
*object
, /* IN/OUT */
3887 vm_object_offset_t
*offset
, /* IN/OUT */
3888 vm_object_size_t length
)
3894 assert(source
!= VM_OBJECT_NULL
);
3895 if (source
== VM_OBJECT_NULL
) {
3902 * This assertion is valid but it gets triggered by Rosetta for example
3903 * due to a combination of vm_remap() that changes a VM object's
3904 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY)
3905 * that then sets "needs_copy" on its map entry. This creates a
3906 * mapping situation that VM should never see and doesn't know how to
3908 * It's not clear if this can create any real problem but we should
3909 * look into fixing this, probably by having vm_protect(VM_PROT_COPY)
3910 * do more than just set "needs_copy" to handle the copy-on-write...
3911 * In the meantime, let's disable the assertion.
3913 assert(source
->copy_strategy
== MEMORY_OBJECT_COPY_SYMMETRIC
);
3917 * Determine if we really need a shadow.
3919 * If the source object is larger than what we are trying
3920 * to create, then force the shadow creation even if the
3921 * ref count is 1. This will allow us to [potentially]
3922 * collapse the underlying object away in the future
3923 * (freeing up the extra data it might contain and that
3927 assert(source
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
); /* Purgeable objects shouldn't have shadow objects. */
3929 if (vm_object_shadow_check
&&
3930 source
->vo_size
== length
&&
3931 source
->ref_count
== 1) {
3933 * Lock the object and check again.
3934 * We also check to see if there's
3935 * a shadow or copy object involved.
3936 * We can't do that earlier because
3937 * without the object locked, there
3938 * could be a collapse and the chain
3939 * gets modified leaving us with an
3942 vm_object_lock(source
);
3943 if (source
->vo_size
== length
&&
3944 source
->ref_count
== 1 &&
3945 (source
->shadow
== VM_OBJECT_NULL
||
3946 source
->shadow
->copy
== VM_OBJECT_NULL
)) {
3947 source
->shadowed
= FALSE
;
3948 vm_object_unlock(source
);
3951 /* things changed while we were locking "source"... */
3952 vm_object_unlock(source
);
3956 * *offset is the map entry's offset into the VM object and
3957 * is aligned to the map's page size.
3958 * VM objects need to be aligned to the system's page size.
3959 * Record the necessary adjustment and re-align the offset so
3960 * that result->vo_shadow_offset is properly page-aligned.
3962 vm_object_offset_t offset_adjustment
;
3963 offset_adjustment
= *offset
- vm_object_trunc_page(*offset
);
3964 length
= vm_object_round_page(length
+ offset_adjustment
);
3965 *offset
= vm_object_trunc_page(*offset
);
3968 * Allocate a new object with the given length
3971 if ((result
= vm_object_allocate(length
)) == VM_OBJECT_NULL
) {
3972 panic("vm_object_shadow: no object for shadowing");
3976 * The new object shadows the source object, adding
3977 * a reference to it. Our caller changes his reference
3978 * to point to the new object, removing a reference to
3979 * the source object. Net result: no change of reference
3982 result
->shadow
= source
;
3985 * Store the offset into the source object,
3986 * and fix up the offset into the new object.
3989 result
->vo_shadow_offset
= *offset
;
3990 assertf(page_aligned(result
->vo_shadow_offset
),
3991 "result %p shadow offset 0x%llx",
3992 result
, result
->vo_shadow_offset
);
3995 * Return the new things
3999 if (offset_adjustment
) {
4001 * Make the map entry point to the equivalent offset
4002 * in the new object.
4004 DEBUG4K_COPY("adjusting offset @ %p from 0x%llx to 0x%llx for object %p length: 0x%llx\n", offset
, *offset
, *offset
+ offset_adjustment
, result
, length
);
4005 *offset
+= offset_adjustment
;
4012 * The relationship between vm_object structures and
4013 * the memory_object requires careful synchronization.
4015 * All associations are created by memory_object_create_named
4016 * for external pagers and vm_object_compressor_pager_create for internal
4017 * objects as follows:
4019 * pager: the memory_object itself, supplied by
4020 * the user requesting a mapping (or the kernel,
4021 * when initializing internal objects); the
4022 * kernel simulates holding send rights by keeping
4026 * the memory object control port,
4027 * created by the kernel; the kernel holds
4028 * receive (and ownership) rights to this
4029 * port, but no other references.
4031 * When initialization is complete, the "initialized" field
4032 * is asserted. Other mappings using a particular memory object,
4033 * and any references to the vm_object gained through the
4034 * port association must wait for this initialization to occur.
4036 * In order to allow the memory manager to set attributes before
4037 * requests (notably virtual copy operations, but also data or
4038 * unlock requests) are made, a "ready" attribute is made available.
4039 * Only the memory manager may affect the value of this attribute.
4040 * Its value does not affect critical kernel functions, such as
4041 * internal object initialization or destruction. [Furthermore,
4042 * memory objects created by the kernel are assumed to be ready
4043 * immediately; the default memory manager need not explicitly
4044 * set the "ready" attribute.]
4046 * [Both the "initialized" and "ready" attribute wait conditions
4047 * use the "pager" field as the wait event.]
4049 * The port associations can be broken down by any of the
4050 * following routines:
4051 * vm_object_terminate:
4052 * No references to the vm_object remain, and
4053 * the object cannot (or will not) be cached.
4054 * This is the normal case, and is done even
4055 * though one of the other cases has already been
4057 * memory_object_destroy:
4058 * The memory manager has requested that the
4059 * kernel relinquish references to the memory
4060 * object. [The memory manager may not want to
4061 * destroy the memory object, but may wish to
4062 * refuse or tear down existing memory mappings.]
4064 * Each routine that breaks an association must break all of
4065 * them at once. At some later time, that routine must clear
4066 * the pager field and release the memory object references.
4067 * [Furthermore, each routine must cope with the simultaneous
4068 * or previous operations of the others.]
4070 * Because the pager field may be cleared spontaneously, it
4071 * cannot be used to determine whether a memory object has
4072 * ever been associated with a particular vm_object. [This
4073 * knowledge is important to the shadow object mechanism.]
4074 * For this reason, an additional "created" attribute is
4077 * During various paging operations, the pager reference found in the
4078 * vm_object must be valid. To prevent this from being released,
4079 * (other than being removed, i.e., made null), routines may use
4080 * the vm_object_paging_begin/end routines [actually, macros].
4081 * The implementation uses the "paging_in_progress" and "wanted" fields.
4082 * [Operations that alter the validity of the pager values include the
4083 * termination routines and vm_object_collapse.]
4088 * Routine: vm_object_memory_object_associate
4090 * Associate a VM object to the given pager.
4091 * If a VM object is not provided, create one.
4092 * Initialize the pager.
4095 vm_object_memory_object_associate(
4096 memory_object_t pager
,
4098 vm_object_size_t size
,
4101 memory_object_control_t control
;
4103 assert(pager
!= MEMORY_OBJECT_NULL
);
4105 if (object
!= VM_OBJECT_NULL
) {
4106 assert(object
->internal
);
4107 assert(object
->pager_created
);
4108 assert(!object
->pager_initialized
);
4109 assert(!object
->pager_ready
);
4110 assert(object
->pager_trusted
);
4112 object
= vm_object_allocate(size
);
4113 assert(object
!= VM_OBJECT_NULL
);
4114 object
->internal
= FALSE
;
4115 object
->pager_trusted
= FALSE
;
4116 /* copy strategy invalid until set by memory manager */
4117 object
->copy_strategy
= MEMORY_OBJECT_COPY_INVALID
;
4121 * Allocate request port.
4124 control
= memory_object_control_allocate(object
);
4125 assert(control
!= MEMORY_OBJECT_CONTROL_NULL
);
4127 vm_object_lock(object
);
4129 assert(!object
->pager_ready
);
4130 assert(!object
->pager_initialized
);
4131 assert(object
->pager
== NULL
);
4132 assert(object
->pager_control
== NULL
);
4135 * Copy the reference we were given.
4138 memory_object_reference(pager
);
4139 object
->pager_created
= TRUE
;
4140 object
->pager
= pager
;
4141 object
->pager_control
= control
;
4142 object
->pager_ready
= FALSE
;
4144 vm_object_unlock(object
);
4147 * Let the pager know we're using it.
4150 (void) memory_object_init(pager
,
4151 object
->pager_control
,
4154 vm_object_lock(object
);
4156 object
->named
= TRUE
;
4158 if (object
->internal
) {
4159 object
->pager_ready
= TRUE
;
4160 vm_object_wakeup(object
, VM_OBJECT_EVENT_PAGER_READY
);
4163 object
->pager_initialized
= TRUE
;
4164 vm_object_wakeup(object
, VM_OBJECT_EVENT_INITIALIZED
);
4166 vm_object_unlock(object
);
4172 * Routine: vm_object_compressor_pager_create
4174 * Create a memory object for an internal object.
4175 * In/out conditions:
4176 * The object is locked on entry and exit;
4177 * it may be unlocked within this call.
4179 * Only one thread may be performing a
4180 * vm_object_compressor_pager_create on an object at
4181 * a time. Presumably, only the pageout
4182 * daemon will be using this routine.
4186 vm_object_compressor_pager_create(
4189 memory_object_t pager
;
4190 vm_object_t pager_object
= VM_OBJECT_NULL
;
4192 assert(object
!= kernel_object
);
4195 * Prevent collapse or termination by holding a paging reference
4198 vm_object_paging_begin(object
);
4199 if (object
->pager_created
) {
4201 * Someone else got to it first...
4202 * wait for them to finish initializing the ports
4204 while (!object
->pager_initialized
) {
4205 vm_object_sleep(object
,
4206 VM_OBJECT_EVENT_INITIALIZED
,
4209 vm_object_paging_end(object
);
4213 if ((uint32_t) (object
->vo_size
/ PAGE_SIZE
) !=
4214 (object
->vo_size
/ PAGE_SIZE
)) {
4215 #if DEVELOPMENT || DEBUG
4216 printf("vm_object_compressor_pager_create(%p): "
4217 "object size 0x%llx >= 0x%llx\n",
4219 (uint64_t) object
->vo_size
,
4220 0x0FFFFFFFFULL
* PAGE_SIZE
);
4221 #endif /* DEVELOPMENT || DEBUG */
4222 vm_object_paging_end(object
);
4227 * Indicate that a memory object has been assigned
4228 * before dropping the lock, to prevent a race.
4231 object
->pager_created
= TRUE
;
4232 object
->pager_trusted
= TRUE
;
4233 object
->paging_offset
= 0;
4235 vm_object_unlock(object
);
4238 * Create the [internal] pager, and associate it with this object.
4240 * We make the association here so that vm_object_enter()
4241 * can look up the object to complete initializing it. No
4242 * user will ever map this object.
4245 /* create our new memory object */
4246 assert((uint32_t) (object
->vo_size
/ PAGE_SIZE
) ==
4247 (object
->vo_size
/ PAGE_SIZE
));
4248 (void) compressor_memory_object_create(
4249 (memory_object_size_t
) object
->vo_size
,
4251 if (pager
== NULL
) {
4252 panic("vm_object_compressor_pager_create(): "
4253 "no pager for object %p size 0x%llx\n",
4254 object
, (uint64_t) object
->vo_size
);
4259 * A reference was returned by
4260 * memory_object_create(), and it is
4261 * copied by vm_object_memory_object_associate().
4264 pager_object
= vm_object_memory_object_associate(pager
,
4268 if (pager_object
!= object
) {
4269 panic("vm_object_compressor_pager_create: mismatch (pager: %p, pager_object: %p, orig_object: %p, orig_object size: 0x%llx)\n", pager
, pager_object
, object
, (uint64_t) object
->vo_size
);
4273 * Drop the reference we were passed.
4275 memory_object_deallocate(pager
);
4277 vm_object_lock(object
);
4280 * Release the paging reference
4282 vm_object_paging_end(object
);
4286 * Global variables for vm_object_collapse():
4288 * Counts for normal collapses and bypasses.
4289 * Debugging variables, to watch or disable collapse.
4291 static long object_collapses
= 0;
4292 static long object_bypasses
= 0;
4294 static boolean_t vm_object_collapse_allowed
= TRUE
;
4295 static boolean_t vm_object_bypass_allowed
= TRUE
;
4297 void vm_object_do_collapse_compressor(vm_object_t object
,
4298 vm_object_t backing_object
);
4300 vm_object_do_collapse_compressor(
4302 vm_object_t backing_object
)
4304 vm_object_offset_t new_offset
, backing_offset
;
4305 vm_object_size_t size
;
4307 vm_counters
.do_collapse_compressor
++;
4309 vm_object_lock_assert_exclusive(object
);
4310 vm_object_lock_assert_exclusive(backing_object
);
4312 size
= object
->vo_size
;
4315 * Move all compressed pages from backing_object
4319 for (backing_offset
= object
->vo_shadow_offset
;
4320 backing_offset
< object
->vo_shadow_offset
+ object
->vo_size
;
4321 backing_offset
+= PAGE_SIZE
) {
4322 memory_object_offset_t backing_pager_offset
;
4324 /* find the next compressed page at or after this offset */
4325 backing_pager_offset
= (backing_offset
+
4326 backing_object
->paging_offset
);
4327 backing_pager_offset
= vm_compressor_pager_next_compressed(
4328 backing_object
->pager
,
4329 backing_pager_offset
);
4330 if (backing_pager_offset
== (memory_object_offset_t
) -1) {
4331 /* no more compressed pages */
4334 backing_offset
= (backing_pager_offset
-
4335 backing_object
->paging_offset
);
4337 new_offset
= backing_offset
- object
->vo_shadow_offset
;
4339 if (new_offset
>= object
->vo_size
) {
4340 /* we're out of the scope of "object": done */
4344 if ((vm_page_lookup(object
, new_offset
) != VM_PAGE_NULL
) ||
4345 (vm_compressor_pager_state_get(object
->pager
,
4347 object
->paging_offset
)) ==
4348 VM_EXTERNAL_STATE_EXISTS
)) {
4350 * This page already exists in object, resident or
4352 * We don't need this compressed page in backing_object
4353 * and it will be reclaimed when we release
4360 * backing_object has this page in the VM compressor and
4361 * we need to transfer it to object.
4363 vm_counters
.do_collapse_compressor_pages
++;
4364 vm_compressor_pager_transfer(
4367 (new_offset
+ object
->paging_offset
),
4369 backing_object
->pager
,
4370 (backing_offset
+ backing_object
->paging_offset
));
4375 * Routine: vm_object_do_collapse
4377 * Collapse an object with the object backing it.
4378 * Pages in the backing object are moved into the
4379 * parent, and the backing object is deallocated.
4381 * Both objects and the cache are locked; the page
4382 * queues are unlocked.
4386 vm_object_do_collapse(
4388 vm_object_t backing_object
)
4391 vm_object_offset_t new_offset
, backing_offset
;
4392 vm_object_size_t size
;
4394 vm_object_lock_assert_exclusive(object
);
4395 vm_object_lock_assert_exclusive(backing_object
);
4397 assert(object
->purgable
== VM_PURGABLE_DENY
);
4398 assert(backing_object
->purgable
== VM_PURGABLE_DENY
);
4400 backing_offset
= object
->vo_shadow_offset
;
4401 size
= object
->vo_size
;
4404 * Move all in-memory pages from backing_object
4405 * to the parent. Pages that have been paged out
4406 * will be overwritten by any of the parent's
4407 * pages that shadow them.
4410 while (!vm_page_queue_empty(&backing_object
->memq
)) {
4411 p
= (vm_page_t
) vm_page_queue_first(&backing_object
->memq
);
4413 new_offset
= (p
->vmp_offset
- backing_offset
);
4415 assert(!p
->vmp_busy
|| p
->vmp_absent
);
4418 * If the parent has a page here, or if
4419 * this page falls outside the parent,
4422 * Otherwise, move it as planned.
4425 if (p
->vmp_offset
< backing_offset
|| new_offset
>= size
) {
4428 pp
= vm_page_lookup(object
, new_offset
);
4429 if (pp
== VM_PAGE_NULL
) {
4430 if (VM_COMPRESSOR_PAGER_STATE_GET(object
,
4432 == VM_EXTERNAL_STATE_EXISTS
) {
4434 * Parent object has this page
4435 * in the VM compressor.
4436 * Throw away the backing
4442 * Parent now has no page.
4443 * Move the backing object's page
4446 vm_page_rename(p
, object
, new_offset
);
4449 assert(!pp
->vmp_absent
);
4452 * Parent object has a real page.
4453 * Throw away the backing object's
4461 if (vm_object_collapse_compressor_allowed
&&
4462 object
->pager
!= MEMORY_OBJECT_NULL
&&
4463 backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
4464 /* move compressed pages from backing_object to object */
4465 vm_object_do_collapse_compressor(object
, backing_object
);
4466 } else if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
4467 assert((!object
->pager_created
&&
4468 (object
->pager
== MEMORY_OBJECT_NULL
)) ||
4469 (!backing_object
->pager_created
&&
4470 (backing_object
->pager
== MEMORY_OBJECT_NULL
)));
4472 * Move the pager from backing_object to object.
4474 * XXX We're only using part of the paging space
4475 * for keeps now... we ought to discard the
4479 assert(!object
->paging_in_progress
);
4480 assert(!object
->activity_in_progress
);
4481 assert(!object
->pager_created
);
4482 assert(object
->pager
== NULL
);
4483 object
->pager
= backing_object
->pager
;
4485 object
->pager_created
= backing_object
->pager_created
;
4486 object
->pager_control
= backing_object
->pager_control
;
4487 object
->pager_ready
= backing_object
->pager_ready
;
4488 object
->pager_initialized
= backing_object
->pager_initialized
;
4489 object
->paging_offset
=
4490 backing_object
->paging_offset
+ backing_offset
;
4491 if (object
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
4492 memory_object_control_collapse(object
->pager_control
,
4495 /* the backing_object has lost its pager: reset all fields */
4496 backing_object
->pager_created
= FALSE
;
4497 backing_object
->pager_control
= NULL
;
4498 backing_object
->pager_ready
= FALSE
;
4499 backing_object
->paging_offset
= 0;
4500 backing_object
->pager
= NULL
;
4503 * Object now shadows whatever backing_object did.
4504 * Note that the reference to backing_object->shadow
4505 * moves from within backing_object to within object.
4508 assert(!object
->phys_contiguous
);
4509 assert(!backing_object
->phys_contiguous
);
4510 object
->shadow
= backing_object
->shadow
;
4511 if (object
->shadow
) {
4512 assertf(page_aligned(object
->vo_shadow_offset
),
4513 "object %p shadow_offset 0x%llx",
4514 object
, object
->vo_shadow_offset
);
4515 assertf(page_aligned(backing_object
->vo_shadow_offset
),
4516 "backing_object %p shadow_offset 0x%llx",
4517 backing_object
, backing_object
->vo_shadow_offset
);
4518 object
->vo_shadow_offset
+= backing_object
->vo_shadow_offset
;
4519 /* "backing_object" gave its shadow to "object" */
4520 backing_object
->shadow
= VM_OBJECT_NULL
;
4521 backing_object
->vo_shadow_offset
= 0;
4523 /* no shadow, therefore no shadow offset... */
4524 object
->vo_shadow_offset
= 0;
4526 assert((object
->shadow
== VM_OBJECT_NULL
) ||
4527 (object
->shadow
->copy
!= backing_object
));
4530 * Discard backing_object.
4532 * Since the backing object has no pages, no
4533 * pager left, and no object references within it,
4534 * all that is necessary is to dispose of it.
4538 assert(backing_object
->ref_count
== 1);
4539 assert(backing_object
->resident_page_count
== 0);
4540 assert(backing_object
->paging_in_progress
== 0);
4541 assert(backing_object
->activity_in_progress
== 0);
4542 assert(backing_object
->shadow
== VM_OBJECT_NULL
);
4543 assert(backing_object
->vo_shadow_offset
== 0);
4545 if (backing_object
->pager
!= MEMORY_OBJECT_NULL
) {
4546 /* ... unless it has a pager; need to terminate pager too */
4547 vm_counters
.do_collapse_terminate
++;
4548 if (vm_object_terminate(backing_object
) != KERN_SUCCESS
) {
4549 vm_counters
.do_collapse_terminate_failure
++;
4554 assert(backing_object
->pager
== NULL
);
4556 backing_object
->alive
= FALSE
;
4557 vm_object_unlock(backing_object
);
4559 #if VM_OBJECT_TRACKING
4560 if (vm_object_tracking_inited
) {
4561 btlog_remove_entries_for_element(vm_object_tracking_btlog
,
4564 #endif /* VM_OBJECT_TRACKING */
4566 vm_object_lock_destroy(backing_object
);
4568 zfree(vm_object_zone
, backing_object
);
4572 vm_object_do_bypass(
4574 vm_object_t backing_object
)
4577 * Make the parent shadow the next object
4581 vm_object_lock_assert_exclusive(object
);
4582 vm_object_lock_assert_exclusive(backing_object
);
4586 * Do object reference in-line to
4587 * conditionally increment shadow's
4588 * residence count. If object is not
4589 * resident, leave residence count
4592 if (backing_object
->shadow
!= VM_OBJECT_NULL
) {
4593 vm_object_lock(backing_object
->shadow
);
4594 vm_object_lock_assert_exclusive(backing_object
->shadow
);
4595 backing_object
->shadow
->ref_count
++;
4596 if (object
->res_count
!= 0) {
4597 vm_object_res_reference(backing_object
->shadow
);
4599 vm_object_unlock(backing_object
->shadow
);
4601 #else /* TASK_SWAPPER */
4602 vm_object_reference(backing_object
->shadow
);
4603 #endif /* TASK_SWAPPER */
4605 assert(!object
->phys_contiguous
);
4606 assert(!backing_object
->phys_contiguous
);
4607 object
->shadow
= backing_object
->shadow
;
4608 if (object
->shadow
) {
4609 assertf(page_aligned(object
->vo_shadow_offset
),
4610 "object %p shadow_offset 0x%llx",
4611 object
, object
->vo_shadow_offset
);
4612 assertf(page_aligned(backing_object
->vo_shadow_offset
),
4613 "backing_object %p shadow_offset 0x%llx",
4614 backing_object
, backing_object
->vo_shadow_offset
);
4615 object
->vo_shadow_offset
+= backing_object
->vo_shadow_offset
;
4617 /* no shadow, therefore no shadow offset... */
4618 object
->vo_shadow_offset
= 0;
4622 * Backing object might have had a copy pointer
4623 * to us. If it did, clear it.
4625 if (backing_object
->copy
== object
) {
4626 backing_object
->copy
= VM_OBJECT_NULL
;
4630 * Drop the reference count on backing_object.
4632 * Since its ref_count was at least 2, it
4633 * will not vanish; so we don't need to call
4634 * vm_object_deallocate.
4635 * [with a caveat for "named" objects]
4637 * The res_count on the backing object is
4638 * conditionally decremented. It's possible
4639 * (via vm_pageout_scan) to get here with
4640 * a "swapped" object, which has a 0 res_count,
4641 * in which case, the backing object res_count
4642 * is already down by one.
4644 * Don't call vm_object_deallocate unless
4645 * ref_count drops to zero.
4647 * The ref_count can drop to zero here if the
4648 * backing object could be bypassed but not
4649 * collapsed, such as when the backing object
4650 * is temporary and cachable.
4653 if (backing_object
->ref_count
> 2 ||
4654 (!backing_object
->named
&& backing_object
->ref_count
> 1)) {
4655 vm_object_lock_assert_exclusive(backing_object
);
4656 backing_object
->ref_count
--;
4658 if (object
->res_count
!= 0) {
4659 vm_object_res_deallocate(backing_object
);
4661 assert(backing_object
->ref_count
> 0);
4662 #endif /* TASK_SWAPPER */
4663 vm_object_unlock(backing_object
);
4666 * Drop locks so that we can deallocate
4667 * the backing object.
4671 if (object
->res_count
== 0) {
4672 /* XXX get a reference for the deallocate below */
4673 vm_object_res_reference(backing_object
);
4675 #endif /* TASK_SWAPPER */
4677 * vm_object_collapse (the caller of this function) is
4678 * now called from contexts that may not guarantee that a
4679 * valid reference is held on the object... w/o a valid
4680 * reference, it is unsafe and unwise (you will definitely
4681 * regret it) to unlock the object and then retake the lock
4682 * since the object may be terminated and recycled in between.
4683 * The "activity_in_progress" reference will keep the object
4686 vm_object_activity_begin(object
);
4687 vm_object_unlock(object
);
4689 vm_object_unlock(backing_object
);
4690 vm_object_deallocate(backing_object
);
4693 * Relock object. We don't have to reverify
4694 * its state since vm_object_collapse will
4695 * do that for us as it starts at the
4699 vm_object_lock(object
);
4700 vm_object_activity_end(object
);
4708 * vm_object_collapse:
4710 * Perform an object collapse or an object bypass if appropriate.
4711 * The real work of collapsing and bypassing is performed in
4712 * the routines vm_object_do_collapse and vm_object_do_bypass.
4714 * Requires that the object be locked and the page queues be unlocked.
4717 static unsigned long vm_object_collapse_calls
= 0;
4718 static unsigned long vm_object_collapse_objects
= 0;
4719 static unsigned long vm_object_collapse_do_collapse
= 0;
4720 static unsigned long vm_object_collapse_do_bypass
= 0;
4722 __private_extern__
void
4725 vm_object_offset_t hint_offset
,
4726 boolean_t can_bypass
)
4728 vm_object_t backing_object
;
4729 unsigned int rcount
;
4731 vm_object_t original_object
;
4732 int object_lock_type
;
4733 int backing_object_lock_type
;
4735 vm_object_collapse_calls
++;
4737 assertf(page_aligned(hint_offset
), "hint_offset 0x%llx", hint_offset
);
4739 if (!vm_object_collapse_allowed
&&
4740 !(can_bypass
&& vm_object_bypass_allowed
)) {
4744 if (object
== VM_OBJECT_NULL
) {
4748 original_object
= object
;
4751 * The top object was locked "exclusive" by the caller.
4752 * In the first pass, to determine if we can collapse the shadow chain,
4753 * take a "shared" lock on the shadow objects. If we can collapse,
4754 * we'll have to go down the chain again with exclusive locks.
4756 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4757 backing_object_lock_type
= OBJECT_LOCK_SHARED
;
4760 object
= original_object
;
4761 vm_object_lock_assert_exclusive(object
);
4764 vm_object_collapse_objects
++;
4766 * Verify that the conditions are right for either
4767 * collapse or bypass:
4771 * There is a backing object, and
4774 backing_object
= object
->shadow
;
4775 if (backing_object
== VM_OBJECT_NULL
) {
4776 if (object
!= original_object
) {
4777 vm_object_unlock(object
);
4781 if (backing_object_lock_type
== OBJECT_LOCK_SHARED
) {
4782 vm_object_lock_shared(backing_object
);
4784 vm_object_lock(backing_object
);
4788 * No pages in the object are currently
4789 * being paged out, and
4791 if (object
->paging_in_progress
!= 0 ||
4792 object
->activity_in_progress
!= 0) {
4793 /* try and collapse the rest of the shadow chain */
4794 if (object
!= original_object
) {
4795 vm_object_unlock(object
);
4797 object
= backing_object
;
4798 object_lock_type
= backing_object_lock_type
;
4804 * The backing object is not read_only,
4805 * and no pages in the backing object are
4806 * currently being paged out.
4807 * The backing object is internal.
4811 if (!backing_object
->internal
||
4812 backing_object
->paging_in_progress
!= 0 ||
4813 backing_object
->activity_in_progress
!= 0) {
4814 /* try and collapse the rest of the shadow chain */
4815 if (object
!= original_object
) {
4816 vm_object_unlock(object
);
4818 object
= backing_object
;
4819 object_lock_type
= backing_object_lock_type
;
4824 * Purgeable objects are not supposed to engage in
4825 * copy-on-write activities, so should not have
4826 * any shadow objects or be a shadow object to another
4828 * Collapsing a purgeable object would require some
4829 * updates to the purgeable compressed ledgers.
4831 if (object
->purgable
!= VM_PURGABLE_DENY
||
4832 backing_object
->purgable
!= VM_PURGABLE_DENY
) {
4833 panic("vm_object_collapse() attempting to collapse "
4834 "purgeable object: %p(%d) %p(%d)\n",
4835 object
, object
->purgable
,
4836 backing_object
, backing_object
->purgable
);
4837 /* try and collapse the rest of the shadow chain */
4838 if (object
!= original_object
) {
4839 vm_object_unlock(object
);
4841 object
= backing_object
;
4842 object_lock_type
= backing_object_lock_type
;
4847 * The backing object can't be a copy-object:
4848 * the shadow_offset for the copy-object must stay
4849 * as 0. Furthermore (for the 'we have all the
4850 * pages' case), if we bypass backing_object and
4851 * just shadow the next object in the chain, old
4852 * pages from that object would then have to be copied
4853 * BOTH into the (former) backing_object and into the
4856 if (backing_object
->shadow
!= VM_OBJECT_NULL
&&
4857 backing_object
->shadow
->copy
== backing_object
) {
4858 /* try and collapse the rest of the shadow chain */
4859 if (object
!= original_object
) {
4860 vm_object_unlock(object
);
4862 object
= backing_object
;
4863 object_lock_type
= backing_object_lock_type
;
4868 * We can now try to either collapse the backing
4869 * object (if the parent is the only reference to
4870 * it) or (perhaps) remove the parent's reference
4873 * If there is exactly one reference to the backing
4874 * object, we may be able to collapse it into the
4877 * As long as one of the objects is still not known
4878 * to the pager, we can collapse them.
4880 if (backing_object
->ref_count
== 1 &&
4881 (vm_object_collapse_compressor_allowed
||
4882 !object
->pager_created
4883 || (!backing_object
->pager_created
)
4884 ) && vm_object_collapse_allowed
) {
4886 * We need the exclusive lock on the VM objects.
4888 if (backing_object_lock_type
!= OBJECT_LOCK_EXCLUSIVE
) {
4890 * We have an object and its shadow locked
4891 * "shared". We can't just upgrade the locks
4892 * to "exclusive", as some other thread might
4893 * also have these objects locked "shared" and
4894 * attempt to upgrade one or the other to
4895 * "exclusive". The upgrades would block
4896 * forever waiting for the other "shared" locks
4898 * So we have to release the locks and go
4899 * down the shadow chain again (since it could
4900 * have changed) with "exclusive" locking.
4902 vm_object_unlock(backing_object
);
4903 if (object
!= original_object
) {
4904 vm_object_unlock(object
);
4906 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4907 backing_object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
4912 * Collapse the object with its backing
4913 * object, and try again with the object's
4914 * new backing object.
4917 vm_object_do_collapse(object
, backing_object
);
4918 vm_object_collapse_do_collapse
++;
4923 * Collapsing the backing object was not possible
4924 * or permitted, so let's try bypassing it.
4927 if (!(can_bypass
&& vm_object_bypass_allowed
)) {
4928 /* try and collapse the rest of the shadow chain */
4929 if (object
!= original_object
) {
4930 vm_object_unlock(object
);
4932 object
= backing_object
;
4933 object_lock_type
= backing_object_lock_type
;
4939 * If the object doesn't have all its pages present,
4940 * we have to make sure no pages in the backing object
4941 * "show through" before bypassing it.
4943 size
= (unsigned int)atop(object
->vo_size
);
4944 rcount
= object
->resident_page_count
;
4946 if (rcount
!= size
) {
4947 vm_object_offset_t offset
;
4948 vm_object_offset_t backing_offset
;
4949 unsigned int backing_rcount
;
4952 * If the backing object has a pager but no pagemap,
4953 * then we cannot bypass it, because we don't know
4954 * what pages it has.
4956 if (backing_object
->pager_created
) {
4957 /* try and collapse the rest of the shadow chain */
4958 if (object
!= original_object
) {
4959 vm_object_unlock(object
);
4961 object
= backing_object
;
4962 object_lock_type
= backing_object_lock_type
;
4967 * If the object has a pager but no pagemap,
4968 * then we cannot bypass it, because we don't know
4969 * what pages it has.
4971 if (object
->pager_created
) {
4972 /* try and collapse the rest of the shadow chain */
4973 if (object
!= original_object
) {
4974 vm_object_unlock(object
);
4976 object
= backing_object
;
4977 object_lock_type
= backing_object_lock_type
;
4981 backing_offset
= object
->vo_shadow_offset
;
4982 backing_rcount
= backing_object
->resident_page_count
;
4984 if ((int)backing_rcount
- (int)(atop(backing_object
->vo_size
) - size
) > (int)rcount
) {
4986 * we have enough pages in the backing object to guarantee that
4987 * at least 1 of them must be 'uncovered' by a resident page
4988 * in the object we're evaluating, so move on and
4989 * try to collapse the rest of the shadow chain
4991 if (object
!= original_object
) {
4992 vm_object_unlock(object
);
4994 object
= backing_object
;
4995 object_lock_type
= backing_object_lock_type
;
5000 * If all of the pages in the backing object are
5001 * shadowed by the parent object, the parent
5002 * object no longer has to shadow the backing
5003 * object; it can shadow the next one in the
5006 * If the backing object has existence info,
5007 * we must check examine its existence info
5012 #define EXISTS_IN_OBJECT(obj, off, rc) \
5013 ((VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \
5014 == VM_EXTERNAL_STATE_EXISTS) || \
5015 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--))
5018 * Check the hint location first
5019 * (since it is often the quickest way out of here).
5021 if (object
->cow_hint
!= ~(vm_offset_t
)0) {
5022 hint_offset
= (vm_object_offset_t
)object
->cow_hint
;
5024 hint_offset
= (hint_offset
> 8 * PAGE_SIZE_64
) ?
5025 (hint_offset
- 8 * PAGE_SIZE_64
) : 0;
5028 if (EXISTS_IN_OBJECT(backing_object
, hint_offset
+
5029 backing_offset
, backing_rcount
) &&
5030 !EXISTS_IN_OBJECT(object
, hint_offset
, rcount
)) {
5031 /* dependency right at the hint */
5032 object
->cow_hint
= (vm_offset_t
) hint_offset
; /* atomic */
5033 /* try and collapse the rest of the shadow chain */
5034 if (object
!= original_object
) {
5035 vm_object_unlock(object
);
5037 object
= backing_object
;
5038 object_lock_type
= backing_object_lock_type
;
5043 * If the object's window onto the backing_object
5044 * is large compared to the number of resident
5045 * pages in the backing object, it makes sense to
5046 * walk the backing_object's resident pages first.
5048 * NOTE: Pages may be in both the existence map and/or
5049 * resident, so if we don't find a dependency while
5050 * walking the backing object's resident page list
5051 * directly, and there is an existence map, we'll have
5052 * to run the offset based 2nd pass. Because we may
5053 * have to run both passes, we need to be careful
5054 * not to decrement 'rcount' in the 1st pass
5056 if (backing_rcount
&& backing_rcount
< (size
/ 8)) {
5057 unsigned int rc
= rcount
;
5060 backing_rcount
= backing_object
->resident_page_count
;
5061 p
= (vm_page_t
)vm_page_queue_first(&backing_object
->memq
);
5063 offset
= (p
->vmp_offset
- backing_offset
);
5065 if (offset
< object
->vo_size
&&
5066 offset
!= hint_offset
&&
5067 !EXISTS_IN_OBJECT(object
, offset
, rc
)) {
5068 /* found a dependency */
5069 object
->cow_hint
= (vm_offset_t
) offset
; /* atomic */
5073 p
= (vm_page_t
) vm_page_queue_next(&p
->vmp_listq
);
5074 } while (--backing_rcount
);
5075 if (backing_rcount
!= 0) {
5076 /* try and collapse the rest of the shadow chain */
5077 if (object
!= original_object
) {
5078 vm_object_unlock(object
);
5080 object
= backing_object
;
5081 object_lock_type
= backing_object_lock_type
;
5087 * Walk through the offsets looking for pages in the
5088 * backing object that show through to the object.
5090 if (backing_rcount
) {
5091 offset
= hint_offset
;
5094 (offset
+ PAGE_SIZE_64
< object
->vo_size
) ?
5095 (offset
+ PAGE_SIZE_64
) : 0) != hint_offset
) {
5096 if (EXISTS_IN_OBJECT(backing_object
, offset
+
5097 backing_offset
, backing_rcount
) &&
5098 !EXISTS_IN_OBJECT(object
, offset
, rcount
)) {
5099 /* found a dependency */
5100 object
->cow_hint
= (vm_offset_t
) offset
; /* atomic */
5104 if (offset
!= hint_offset
) {
5105 /* try and collapse the rest of the shadow chain */
5106 if (object
!= original_object
) {
5107 vm_object_unlock(object
);
5109 object
= backing_object
;
5110 object_lock_type
= backing_object_lock_type
;
5117 * We need "exclusive" locks on the 2 VM objects.
5119 if (backing_object_lock_type
!= OBJECT_LOCK_EXCLUSIVE
) {
5120 vm_object_unlock(backing_object
);
5121 if (object
!= original_object
) {
5122 vm_object_unlock(object
);
5124 object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
5125 backing_object_lock_type
= OBJECT_LOCK_EXCLUSIVE
;
5129 /* reset the offset hint for any objects deeper in the chain */
5130 object
->cow_hint
= (vm_offset_t
)0;
5133 * All interesting pages in the backing object
5134 * already live in the parent or its pager.
5135 * Thus we can bypass the backing object.
5138 vm_object_do_bypass(object
, backing_object
);
5139 vm_object_collapse_do_bypass
++;
5142 * Try again with this object's new backing object.
5150 * if (object != original_object) {
5151 * vm_object_unlock(object);
5157 * Routine: vm_object_page_remove: [internal]
5159 * Removes all physical pages in the specified
5160 * object range from the object's list of pages.
5162 * In/out conditions:
5163 * The object must be locked.
5164 * The object must not have paging_in_progress, usually
5165 * guaranteed by not having a pager.
5167 unsigned int vm_object_page_remove_lookup
= 0;
5168 unsigned int vm_object_page_remove_iterate
= 0;
5170 __private_extern__
void
5171 vm_object_page_remove(
5173 vm_object_offset_t start
,
5174 vm_object_offset_t end
)
5179 * One and two page removals are most popular.
5180 * The factor of 16 here is somewhat arbitrary.
5181 * It balances vm_object_lookup vs iteration.
5184 if (atop_64(end
- start
) < (unsigned)object
->resident_page_count
/ 16) {
5185 vm_object_page_remove_lookup
++;
5187 for (; start
< end
; start
+= PAGE_SIZE_64
) {
5188 p
= vm_page_lookup(object
, start
);
5189 if (p
!= VM_PAGE_NULL
) {
5190 assert(!p
->vmp_cleaning
&& !p
->vmp_laundry
);
5191 if (!p
->vmp_fictitious
&& p
->vmp_pmapped
) {
5192 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(p
));
5198 vm_object_page_remove_iterate
++;
5200 p
= (vm_page_t
) vm_page_queue_first(&object
->memq
);
5201 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
) p
)) {
5202 next
= (vm_page_t
) vm_page_queue_next(&p
->vmp_listq
);
5203 if ((start
<= p
->vmp_offset
) && (p
->vmp_offset
< end
)) {
5204 assert(!p
->vmp_cleaning
&& !p
->vmp_laundry
);
5205 if (!p
->vmp_fictitious
&& p
->vmp_pmapped
) {
5206 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(p
));
5217 * Routine: vm_object_coalesce
5218 * Function: Coalesces two objects backing up adjoining
5219 * regions of memory into a single object.
5221 * returns TRUE if objects were combined.
5223 * NOTE: Only works at the moment if the second object is NULL -
5224 * if it's not, which object do we lock first?
5227 * prev_object First object to coalesce
5228 * prev_offset Offset into prev_object
5229 * next_object Second object into coalesce
5230 * next_offset Offset into next_object
5232 * prev_size Size of reference to prev_object
5233 * next_size Size of reference to next_object
5236 * The object(s) must *not* be locked. The map must be locked
5237 * to preserve the reference to the object(s).
5239 static int vm_object_coalesce_count
= 0;
5241 __private_extern__ boolean_t
5243 vm_object_t prev_object
,
5244 vm_object_t next_object
,
5245 vm_object_offset_t prev_offset
,
5246 __unused vm_object_offset_t next_offset
,
5247 vm_object_size_t prev_size
,
5248 vm_object_size_t next_size
)
5250 vm_object_size_t newsize
;
5256 if (next_object
!= VM_OBJECT_NULL
) {
5260 if (prev_object
== VM_OBJECT_NULL
) {
5264 vm_object_lock(prev_object
);
5267 * Try to collapse the object first
5269 vm_object_collapse(prev_object
, prev_offset
, TRUE
);
5272 * Can't coalesce if pages not mapped to
5273 * prev_entry may be in use any way:
5274 * . more than one reference
5276 * . shadows another object
5277 * . has a copy elsewhere
5279 * . paging references (pages might be in page-list)
5282 if ((prev_object
->ref_count
> 1) ||
5283 prev_object
->pager_created
||
5284 (prev_object
->shadow
!= VM_OBJECT_NULL
) ||
5285 (prev_object
->copy
!= VM_OBJECT_NULL
) ||
5286 (prev_object
->true_share
!= FALSE
) ||
5287 (prev_object
->purgable
!= VM_PURGABLE_DENY
) ||
5288 (prev_object
->paging_in_progress
!= 0) ||
5289 (prev_object
->activity_in_progress
!= 0)) {
5290 vm_object_unlock(prev_object
);
5294 vm_object_coalesce_count
++;
5297 * Remove any pages that may still be in the object from
5298 * a previous deallocation.
5300 vm_object_page_remove(prev_object
,
5301 prev_offset
+ prev_size
,
5302 prev_offset
+ prev_size
+ next_size
);
5305 * Extend the object if necessary.
5307 newsize
= prev_offset
+ prev_size
+ next_size
;
5308 if (newsize
> prev_object
->vo_size
) {
5309 assertf(page_aligned(newsize
),
5310 "object %p size 0x%llx",
5311 prev_object
, (uint64_t)newsize
);
5312 prev_object
->vo_size
= newsize
;
5315 vm_object_unlock(prev_object
);
5320 vm_object_populate_with_private(
5322 vm_object_offset_t offset
,
5327 vm_object_offset_t base_offset
;
5330 if (!object
->private) {
5331 return KERN_FAILURE
;
5334 base_page
= phys_page
;
5336 vm_object_lock(object
);
5338 if (!object
->phys_contiguous
) {
5341 if ((base_offset
= trunc_page_64(offset
)) != offset
) {
5342 vm_object_unlock(object
);
5343 return KERN_FAILURE
;
5345 base_offset
+= object
->paging_offset
;
5348 m
= vm_page_lookup(object
, base_offset
);
5350 if (m
!= VM_PAGE_NULL
) {
5351 if (m
->vmp_fictitious
) {
5352 if (VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
) {
5353 vm_page_lockspin_queues();
5354 m
->vmp_private
= TRUE
;
5355 vm_page_unlock_queues();
5357 m
->vmp_fictitious
= FALSE
;
5358 VM_PAGE_SET_PHYS_PAGE(m
, base_page
);
5360 } else if (VM_PAGE_GET_PHYS_PAGE(m
) != base_page
) {
5361 if (!m
->vmp_private
) {
5363 * we'd leak a real page... that can't be right
5365 panic("vm_object_populate_with_private - %p not private", m
);
5367 if (m
->vmp_pmapped
) {
5369 * pmap call to clear old mapping
5371 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
5373 VM_PAGE_SET_PHYS_PAGE(m
, base_page
);
5376 while ((m
= vm_page_grab_fictitious()) == VM_PAGE_NULL
) {
5377 vm_page_more_fictitious();
5381 * private normally requires lock_queues but since we
5382 * are initializing the page, its not necessary here
5384 m
->vmp_private
= TRUE
;
5385 m
->vmp_fictitious
= FALSE
;
5386 VM_PAGE_SET_PHYS_PAGE(m
, base_page
);
5387 m
->vmp_unusual
= TRUE
;
5388 m
->vmp_busy
= FALSE
;
5390 vm_page_insert(m
, object
, base_offset
);
5392 base_page
++; /* Go to the next physical page */
5393 base_offset
+= PAGE_SIZE
;
5397 /* NOTE: we should check the original settings here */
5398 /* if we have a size > zero a pmap call should be made */
5399 /* to disable the range */
5403 /* shadows on contiguous memory are not allowed */
5404 /* we therefore can use the offset field */
5405 object
->vo_shadow_offset
= (vm_object_offset_t
)phys_page
<< PAGE_SHIFT
;
5406 assertf(page_aligned(size
),
5407 "object %p size 0x%llx",
5408 object
, (uint64_t)size
);
5409 object
->vo_size
= size
;
5411 vm_object_unlock(object
);
5413 return KERN_SUCCESS
;
5418 memory_object_create_named(
5419 memory_object_t pager
,
5420 memory_object_offset_t size
,
5421 memory_object_control_t
*control
)
5425 *control
= MEMORY_OBJECT_CONTROL_NULL
;
5426 if (pager
== MEMORY_OBJECT_NULL
) {
5427 return KERN_INVALID_ARGUMENT
;
5430 object
= vm_object_memory_object_associate(pager
,
5434 if (object
== VM_OBJECT_NULL
) {
5435 return KERN_INVALID_OBJECT
;
5438 /* wait for object (if any) to be ready */
5439 if (object
!= VM_OBJECT_NULL
) {
5440 vm_object_lock(object
);
5441 object
->named
= TRUE
;
5442 while (!object
->pager_ready
) {
5443 vm_object_sleep(object
,
5444 VM_OBJECT_EVENT_PAGER_READY
,
5447 *control
= object
->pager_control
;
5448 vm_object_unlock(object
);
5450 return KERN_SUCCESS
;
5455 * Routine: memory_object_recover_named [user interface]
5457 * Attempt to recover a named reference for a VM object.
5458 * VM will verify that the object has not already started
5459 * down the termination path, and if it has, will optionally
5460 * wait for that to finish.
5462 * KERN_SUCCESS - we recovered a named reference on the object
5463 * KERN_FAILURE - we could not recover a reference (object dead)
5464 * KERN_INVALID_ARGUMENT - bad memory object control
5467 memory_object_recover_named(
5468 memory_object_control_t control
,
5469 boolean_t wait_on_terminating
)
5473 object
= memory_object_control_to_vm_object(control
);
5474 if (object
== VM_OBJECT_NULL
) {
5475 return KERN_INVALID_ARGUMENT
;
5478 vm_object_lock(object
);
5480 if (object
->terminating
&& wait_on_terminating
) {
5481 vm_object_wait(object
,
5482 VM_OBJECT_EVENT_PAGING_IN_PROGRESS
,
5487 if (!object
->alive
) {
5488 vm_object_unlock(object
);
5489 return KERN_FAILURE
;
5492 if (object
->named
== TRUE
) {
5493 vm_object_unlock(object
);
5494 return KERN_SUCCESS
;
5496 object
->named
= TRUE
;
5497 vm_object_lock_assert_exclusive(object
);
5498 object
->ref_count
++;
5499 vm_object_res_reference(object
);
5500 while (!object
->pager_ready
) {
5501 vm_object_sleep(object
,
5502 VM_OBJECT_EVENT_PAGER_READY
,
5505 vm_object_unlock(object
);
5506 return KERN_SUCCESS
;
5511 * vm_object_release_name:
5513 * Enforces name semantic on memory_object reference count decrement
5514 * This routine should not be called unless the caller holds a name
5515 * reference gained through the memory_object_create_named.
5517 * If the TERMINATE_IDLE flag is set, the call will return if the
5518 * reference count is not 1. i.e. idle with the only remaining reference
5520 * If the decision is made to proceed the name field flag is set to
5521 * false and the reference count is decremented. If the RESPECT_CACHE
5522 * flag is set and the reference count has gone to zero, the
5523 * memory_object is checked to see if it is cacheable otherwise when
5524 * the reference count is zero, it is simply terminated.
5527 __private_extern__ kern_return_t
5528 vm_object_release_name(
5533 boolean_t original_object
= TRUE
;
5535 while (object
!= VM_OBJECT_NULL
) {
5536 vm_object_lock(object
);
5538 assert(object
->alive
);
5539 if (original_object
) {
5540 assert(object
->named
);
5542 assert(object
->ref_count
> 0);
5545 * We have to wait for initialization before
5546 * destroying or caching the object.
5549 if (object
->pager_created
&& !object
->pager_initialized
) {
5550 assert(!object
->can_persist
);
5551 vm_object_assert_wait(object
,
5552 VM_OBJECT_EVENT_INITIALIZED
,
5554 vm_object_unlock(object
);
5555 thread_block(THREAD_CONTINUE_NULL
);
5559 if (((object
->ref_count
> 1)
5560 && (flags
& MEMORY_OBJECT_TERMINATE_IDLE
))
5561 || (object
->terminating
)) {
5562 vm_object_unlock(object
);
5563 return KERN_FAILURE
;
5565 if (flags
& MEMORY_OBJECT_RELEASE_NO_OP
) {
5566 vm_object_unlock(object
);
5567 return KERN_SUCCESS
;
5571 if ((flags
& MEMORY_OBJECT_RESPECT_CACHE
) &&
5572 (object
->ref_count
== 1)) {
5573 if (original_object
) {
5574 object
->named
= FALSE
;
5576 vm_object_unlock(object
);
5577 /* let vm_object_deallocate push this thing into */
5578 /* the cache, if that it is where it is bound */
5579 vm_object_deallocate(object
);
5580 return KERN_SUCCESS
;
5582 VM_OBJ_RES_DECR(object
);
5583 shadow
= object
->pageout
?VM_OBJECT_NULL
:object
->shadow
;
5585 if (object
->ref_count
== 1) {
5586 if (vm_object_terminate(object
) != KERN_SUCCESS
) {
5587 if (original_object
) {
5588 return KERN_FAILURE
;
5590 return KERN_SUCCESS
;
5593 if (shadow
!= VM_OBJECT_NULL
) {
5594 original_object
= FALSE
;
5598 return KERN_SUCCESS
;
5600 vm_object_lock_assert_exclusive(object
);
5601 object
->ref_count
--;
5602 assert(object
->ref_count
> 0);
5603 if (original_object
) {
5604 object
->named
= FALSE
;
5606 vm_object_unlock(object
);
5607 return KERN_SUCCESS
;
5612 return KERN_FAILURE
;
5616 __private_extern__ kern_return_t
5617 vm_object_lock_request(
5619 vm_object_offset_t offset
,
5620 vm_object_size_t size
,
5621 memory_object_return_t should_return
,
5625 __unused boolean_t should_flush
;
5627 should_flush
= flags
& MEMORY_OBJECT_DATA_FLUSH
;
5630 * Check for bogus arguments.
5632 if (object
== VM_OBJECT_NULL
) {
5633 return KERN_INVALID_ARGUMENT
;
5636 if ((prot
& ~VM_PROT_ALL
) != 0 && prot
!= VM_PROT_NO_CHANGE
) {
5637 return KERN_INVALID_ARGUMENT
;
5642 * extend range for conservative operations (copy-on-write, sync, ...)
5643 * truncate range for destructive operations (purge, ...)
5645 size
= vm_object_round_page(offset
+ size
) - vm_object_trunc_page(offset
);
5646 offset
= vm_object_trunc_page(offset
);
5649 * Lock the object, and acquire a paging reference to
5650 * prevent the memory_object reference from being released.
5652 vm_object_lock(object
);
5653 vm_object_paging_begin(object
);
5655 (void)vm_object_update(object
,
5656 offset
, size
, NULL
, NULL
, should_return
, flags
, prot
);
5658 vm_object_paging_end(object
);
5659 vm_object_unlock(object
);
5661 return KERN_SUCCESS
;
5665 * Empty a purgeable object by grabbing the physical pages assigned to it and
5666 * putting them on the free queue without writing them to backing store, etc.
5667 * When the pages are next touched they will be demand zero-fill pages. We
5668 * skip pages which are busy, being paged in/out, wired, etc. We do _not_
5669 * skip referenced/dirty pages, pages on the active queue, etc. We're more
5670 * than happy to grab these since this is a purgeable object. We mark the
5671 * object as "empty" after reaping its pages.
5673 * On entry the object must be locked and it must be
5674 * purgeable with no delayed copies pending.
5677 vm_object_purge(vm_object_t object
, int flags
)
5679 unsigned int object_page_count
= 0, pgcount
= 0;
5680 uint64_t total_purged_pgcount
= 0;
5681 boolean_t skipped_object
= FALSE
;
5683 vm_object_lock_assert_exclusive(object
);
5685 if (object
->purgable
== VM_PURGABLE_DENY
) {
5689 assert(object
->copy
== VM_OBJECT_NULL
);
5690 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5693 * We need to set the object's state to VM_PURGABLE_EMPTY *before*
5694 * reaping its pages. We update vm_page_purgeable_count in bulk
5695 * and we don't want vm_page_remove() to update it again for each
5696 * page we reap later.
5698 * For the purgeable ledgers, pages from VOLATILE and EMPTY objects
5699 * are all accounted for in the "volatile" ledgers, so this does not
5700 * make any difference.
5701 * If we transitioned directly from NONVOLATILE to EMPTY,
5702 * vm_page_purgeable_count must have been updated when the object
5703 * was dequeued from its volatile queue and the purgeable ledgers
5704 * must have also been updated accordingly at that time (in
5705 * vm_object_purgable_control()).
5707 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
5709 assert(object
->resident_page_count
>=
5710 object
->wired_page_count
);
5711 delta
= (object
->resident_page_count
-
5712 object
->wired_page_count
);
5714 assert(vm_page_purgeable_count
>=
5717 (SInt32
*)&vm_page_purgeable_count
);
5719 if (object
->wired_page_count
!= 0) {
5720 assert(vm_page_purgeable_wired_count
>=
5721 object
->wired_page_count
);
5722 OSAddAtomic(-object
->wired_page_count
,
5723 (SInt32
*)&vm_page_purgeable_wired_count
);
5725 object
->purgable
= VM_PURGABLE_EMPTY
;
5727 assert(object
->purgable
== VM_PURGABLE_EMPTY
);
5729 object_page_count
= object
->resident_page_count
;
5731 vm_object_reap_pages(object
, REAP_PURGEABLE
);
5733 if (object
->resident_page_count
>= object_page_count
) {
5734 total_purged_pgcount
= 0;
5736 total_purged_pgcount
= object_page_count
- object
->resident_page_count
;
5739 if (object
->pager
!= NULL
) {
5740 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
5742 if (object
->activity_in_progress
== 0 &&
5743 object
->paging_in_progress
== 0) {
5745 * Also reap any memory coming from this object
5746 * in the VM compressor.
5748 * There are no operations in progress on the VM object
5749 * and no operation can start while we're holding the
5750 * VM object lock, so it's safe to reap the compressed
5751 * pages and update the page counts.
5753 pgcount
= vm_compressor_pager_get_count(object
->pager
);
5755 pgcount
= vm_compressor_pager_reap_pages(object
->pager
, flags
);
5756 vm_compressor_pager_count(object
->pager
,
5760 vm_object_owner_compressed_update(object
,
5763 if (!(flags
& C_DONT_BLOCK
)) {
5764 assert(vm_compressor_pager_get_count(object
->pager
)
5769 * There's some kind of paging activity in progress
5770 * for this object, which could result in a page
5771 * being compressed or decompressed, possibly while
5772 * the VM object is not locked, so it could race
5775 * We can't really synchronize this without possibly
5776 * causing a deadlock when the compressor needs to
5777 * allocate or free memory while compressing or
5778 * decompressing a page from a purgeable object
5779 * mapped in the kernel_map...
5781 * So let's not attempt to purge the compressor
5782 * pager if there's any kind of operation in
5783 * progress on the VM object.
5785 skipped_object
= TRUE
;
5789 vm_object_lock_assert_exclusive(object
);
5791 total_purged_pgcount
+= pgcount
;
5793 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE
, (MACHDBG_CODE(DBG_MACH_VM
, OBJECT_PURGE_ONE
)),
5794 VM_KERNEL_UNSLIDE_OR_PERM(object
), /* purged object */
5796 total_purged_pgcount
,
5800 return total_purged_pgcount
;
5805 * vm_object_purgeable_control() allows the caller to control and investigate the
5806 * state of a purgeable object. A purgeable object is created via a call to
5807 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will
5808 * never be coalesced with any other object -- even other purgeable objects --
5809 * and will thus always remain a distinct object. A purgeable object has
5810 * special semantics when its reference count is exactly 1. If its reference
5811 * count is greater than 1, then a purgeable object will behave like a normal
5812 * object and attempts to use this interface will result in an error return
5813 * of KERN_INVALID_ARGUMENT.
5815 * A purgeable object may be put into a "volatile" state which will make the
5816 * object's pages elligable for being reclaimed without paging to backing
5817 * store if the system runs low on memory. If the pages in a volatile
5818 * purgeable object are reclaimed, the purgeable object is said to have been
5819 * "emptied." When a purgeable object is emptied the system will reclaim as
5820 * many pages from the object as it can in a convenient manner (pages already
5821 * en route to backing store or busy for other reasons are left as is). When
5822 * a purgeable object is made volatile, its pages will generally be reclaimed
5823 * before other pages in the application's working set. This semantic is
5824 * generally used by applications which can recreate the data in the object
5825 * faster than it can be paged in. One such example might be media assets
5826 * which can be reread from a much faster RAID volume.
5828 * A purgeable object may be designated as "non-volatile" which means it will
5829 * behave like all other objects in the system with pages being written to and
5830 * read from backing store as needed to satisfy system memory needs. If the
5831 * object was emptied before the object was made non-volatile, that fact will
5832 * be returned as the old state of the purgeable object (see
5833 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which
5834 * were reclaimed as part of emptying the object will be refaulted in as
5835 * zero-fill on demand. It is up to the application to note that an object
5836 * was emptied and recreate the objects contents if necessary. When a
5837 * purgeable object is made non-volatile, its pages will generally not be paged
5838 * out to backing store in the immediate future. A purgeable object may also
5839 * be manually emptied.
5841 * Finally, the current state (non-volatile, volatile, volatile & empty) of a
5842 * volatile purgeable object may be queried at any time. This information may
5843 * be used as a control input to let the application know when the system is
5844 * experiencing memory pressure and is reclaiming memory.
5846 * The specified address may be any address within the purgeable object. If
5847 * the specified address does not represent any object in the target task's
5848 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the
5849 * object containing the specified address is not a purgeable object, then
5850 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be
5853 * The control parameter may be any one of VM_PURGABLE_SET_STATE or
5854 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter
5855 * state is used to set the new state of the purgeable object and return its
5856 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable
5857 * object is returned in the parameter state.
5859 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE,
5860 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent
5861 * the non-volatile, volatile and volatile/empty states described above.
5862 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will
5863 * immediately reclaim as many pages in the object as can be conveniently
5864 * collected (some may have already been written to backing store or be
5867 * The process of making a purgeable object non-volatile and determining its
5868 * previous state is atomic. Thus, if a purgeable object is made
5869 * VM_PURGABLE_NONVOLATILE and the old state is returned as
5870 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are
5871 * completely intact and will remain so until the object is made volatile
5872 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object
5873 * was reclaimed while it was in a volatile state and its previous contents
5877 * The object must be locked.
5880 vm_object_purgable_control(
5882 vm_purgable_t control
,
5888 if (object
== VM_OBJECT_NULL
) {
5890 * Object must already be present or it can't be purgeable.
5892 return KERN_INVALID_ARGUMENT
;
5895 vm_object_lock_assert_exclusive(object
);
5898 * Get current state of the purgeable object.
5900 old_state
= object
->purgable
;
5901 if (old_state
== VM_PURGABLE_DENY
) {
5902 return KERN_INVALID_ARGUMENT
;
5905 /* purgeable cant have delayed copies - now or in the future */
5906 assert(object
->copy
== VM_OBJECT_NULL
);
5907 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
5910 * Execute the desired operation.
5912 if (control
== VM_PURGABLE_GET_STATE
) {
5914 return KERN_SUCCESS
;
5917 if (control
== VM_PURGABLE_SET_STATE
&&
5918 object
->purgeable_only_by_kernel
) {
5919 return KERN_PROTECTION_FAILURE
;
5922 if (control
!= VM_PURGABLE_SET_STATE
&&
5923 control
!= VM_PURGABLE_SET_STATE_FROM_KERNEL
) {
5924 return KERN_INVALID_ARGUMENT
;
5927 if ((*state
) & VM_PURGABLE_DEBUG_EMPTY
) {
5928 object
->volatile_empty
= TRUE
;
5930 if ((*state
) & VM_PURGABLE_DEBUG_FAULT
) {
5931 object
->volatile_fault
= TRUE
;
5934 new_state
= *state
& VM_PURGABLE_STATE_MASK
;
5935 if (new_state
== VM_PURGABLE_VOLATILE
) {
5936 if (old_state
== VM_PURGABLE_EMPTY
) {
5937 /* what's been emptied must stay empty */
5938 new_state
= VM_PURGABLE_EMPTY
;
5940 if (object
->volatile_empty
) {
5941 /* debugging mode: go straight to empty */
5942 new_state
= VM_PURGABLE_EMPTY
;
5946 switch (new_state
) {
5947 case VM_PURGABLE_DENY
:
5949 * Attempting to convert purgeable memory to non-purgeable:
5952 return KERN_INVALID_ARGUMENT
;
5953 case VM_PURGABLE_NONVOLATILE
:
5954 object
->purgable
= new_state
;
5956 if (old_state
== VM_PURGABLE_VOLATILE
) {
5959 assert(object
->resident_page_count
>=
5960 object
->wired_page_count
);
5961 delta
= (object
->resident_page_count
-
5962 object
->wired_page_count
);
5964 assert(vm_page_purgeable_count
>= delta
);
5968 (SInt32
*)&vm_page_purgeable_count
);
5970 if (object
->wired_page_count
!= 0) {
5971 assert(vm_page_purgeable_wired_count
>=
5972 object
->wired_page_count
);
5973 OSAddAtomic(-object
->wired_page_count
,
5974 (SInt32
*)&vm_page_purgeable_wired_count
);
5977 vm_page_lock_queues();
5979 /* object should be on a queue */
5980 assert(object
->objq
.next
!= NULL
&&
5981 object
->objq
.prev
!= NULL
);
5982 purgeable_q_t queue
;
5985 * Move object from its volatile queue to the
5986 * non-volatile queue...
5988 queue
= vm_purgeable_object_remove(object
);
5991 if (object
->purgeable_when_ripe
) {
5992 vm_purgeable_token_delete_last(queue
);
5994 assert(queue
->debug_count_objects
>= 0);
5996 vm_page_unlock_queues();
5998 if (old_state
== VM_PURGABLE_VOLATILE
||
5999 old_state
== VM_PURGABLE_EMPTY
) {
6001 * Transfer the object's pages from the volatile to
6002 * non-volatile ledgers.
6004 vm_purgeable_accounting(object
, VM_PURGABLE_VOLATILE
);
6009 case VM_PURGABLE_VOLATILE
:
6010 if (object
->volatile_fault
) {
6014 vm_page_queue_iterate(&object
->memq
, p
, vmp_listq
) {
6017 p
->vmp_fictitious
) {
6020 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(p
));
6021 if ((refmod
& VM_MEM_MODIFIED
) &&
6023 SET_PAGE_DIRTY(p
, FALSE
);
6028 assert(old_state
!= VM_PURGABLE_EMPTY
);
6030 purgeable_q_t queue
;
6032 /* find the correct queue */
6033 if ((*state
& VM_PURGABLE_ORDERING_MASK
) == VM_PURGABLE_ORDERING_OBSOLETE
) {
6034 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_OBSOLETE
];
6036 if ((*state
& VM_PURGABLE_BEHAVIOR_MASK
) == VM_PURGABLE_BEHAVIOR_FIFO
) {
6037 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
6039 queue
= &purgeable_queues
[PURGEABLE_Q_TYPE_LIFO
];
6043 if (old_state
== VM_PURGABLE_NONVOLATILE
||
6044 old_state
== VM_PURGABLE_EMPTY
) {
6047 if ((*state
& VM_PURGABLE_NO_AGING_MASK
) ==
6048 VM_PURGABLE_NO_AGING
) {
6049 object
->purgeable_when_ripe
= FALSE
;
6051 object
->purgeable_when_ripe
= TRUE
;
6054 if (object
->purgeable_when_ripe
) {
6055 kern_return_t result
;
6057 /* try to add token... this can fail */
6058 vm_page_lock_queues();
6060 result
= vm_purgeable_token_add(queue
);
6061 if (result
!= KERN_SUCCESS
) {
6062 vm_page_unlock_queues();
6065 vm_page_unlock_queues();
6068 assert(object
->resident_page_count
>=
6069 object
->wired_page_count
);
6070 delta
= (object
->resident_page_count
-
6071 object
->wired_page_count
);
6075 &vm_page_purgeable_count
);
6077 if (object
->wired_page_count
!= 0) {
6078 OSAddAtomic(object
->wired_page_count
,
6079 &vm_page_purgeable_wired_count
);
6082 object
->purgable
= new_state
;
6084 /* object should be on "non-volatile" queue */
6085 assert(object
->objq
.next
!= NULL
);
6086 assert(object
->objq
.prev
!= NULL
);
6087 } else if (old_state
== VM_PURGABLE_VOLATILE
) {
6088 purgeable_q_t old_queue
;
6089 boolean_t purgeable_when_ripe
;
6092 * if reassigning priorities / purgeable groups, we don't change the
6093 * token queue. So moving priorities will not make pages stay around longer.
6094 * Reasoning is that the algorithm gives most priority to the most important
6095 * object. If a new token is added, the most important object' priority is boosted.
6096 * This biases the system already for purgeable queues that move a lot.
6097 * It doesn't seem more biasing is neccessary in this case, where no new object is added.
6099 assert(object
->objq
.next
!= NULL
&& object
->objq
.prev
!= NULL
); /* object should be on a queue */
6101 old_queue
= vm_purgeable_object_remove(object
);
6104 if ((*state
& VM_PURGABLE_NO_AGING_MASK
) ==
6105 VM_PURGABLE_NO_AGING
) {
6106 purgeable_when_ripe
= FALSE
;
6108 purgeable_when_ripe
= TRUE
;
6111 if (old_queue
!= queue
||
6112 (purgeable_when_ripe
!=
6113 object
->purgeable_when_ripe
)) {
6114 kern_return_t result
;
6116 /* Changing queue. Have to move token. */
6117 vm_page_lock_queues();
6118 if (object
->purgeable_when_ripe
) {
6119 vm_purgeable_token_delete_last(old_queue
);
6121 object
->purgeable_when_ripe
= purgeable_when_ripe
;
6122 if (object
->purgeable_when_ripe
) {
6123 result
= vm_purgeable_token_add(queue
);
6124 assert(result
== KERN_SUCCESS
); /* this should never fail since we just freed a token */
6126 vm_page_unlock_queues();
6130 vm_purgeable_object_add(object
, queue
, (*state
& VM_VOLATILE_GROUP_MASK
) >> VM_VOLATILE_GROUP_SHIFT
);
6131 if (old_state
== VM_PURGABLE_NONVOLATILE
) {
6132 vm_purgeable_accounting(object
,
6133 VM_PURGABLE_NONVOLATILE
);
6136 assert(queue
->debug_count_objects
>= 0);
6141 case VM_PURGABLE_EMPTY
:
6142 if (object
->volatile_fault
) {
6146 vm_page_queue_iterate(&object
->memq
, p
, vmp_listq
) {
6149 p
->vmp_fictitious
) {
6152 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(p
));
6153 if ((refmod
& VM_MEM_MODIFIED
) &&
6155 SET_PAGE_DIRTY(p
, FALSE
);
6160 if (old_state
== VM_PURGABLE_VOLATILE
) {
6161 purgeable_q_t old_queue
;
6163 /* object should be on a queue */
6164 assert(object
->objq
.next
!= NULL
&&
6165 object
->objq
.prev
!= NULL
);
6167 old_queue
= vm_purgeable_object_remove(object
);
6169 if (object
->purgeable_when_ripe
) {
6170 vm_page_lock_queues();
6171 vm_purgeable_token_delete_first(old_queue
);
6172 vm_page_unlock_queues();
6176 if (old_state
== VM_PURGABLE_NONVOLATILE
) {
6178 * This object's pages were previously accounted as
6179 * "non-volatile" and now need to be accounted as
6182 vm_purgeable_accounting(object
,
6183 VM_PURGABLE_NONVOLATILE
);
6185 * Set to VM_PURGABLE_EMPTY because the pages are no
6186 * longer accounted in the "non-volatile" ledger
6187 * and are also not accounted for in
6188 * "vm_page_purgeable_count".
6190 object
->purgable
= VM_PURGABLE_EMPTY
;
6193 (void) vm_object_purge(object
, 0);
6194 assert(object
->purgable
== VM_PURGABLE_EMPTY
);
6201 vm_object_lock_assert_exclusive(object
);
6203 return KERN_SUCCESS
;
6207 vm_object_get_page_counts(
6209 vm_object_offset_t offset
,
6210 vm_object_size_t size
,
6211 unsigned int *resident_page_count
,
6212 unsigned int *dirty_page_count
)
6214 kern_return_t kr
= KERN_SUCCESS
;
6215 boolean_t count_dirty_pages
= FALSE
;
6216 vm_page_t p
= VM_PAGE_NULL
;
6217 unsigned int local_resident_count
= 0;
6218 unsigned int local_dirty_count
= 0;
6219 vm_object_offset_t cur_offset
= 0;
6220 vm_object_offset_t end_offset
= 0;
6222 if (object
== VM_OBJECT_NULL
) {
6223 return KERN_INVALID_ARGUMENT
;
6227 cur_offset
= offset
;
6229 end_offset
= offset
+ size
;
6231 vm_object_lock_assert_exclusive(object
);
6233 if (dirty_page_count
!= NULL
) {
6234 count_dirty_pages
= TRUE
;
6237 if (resident_page_count
!= NULL
&& count_dirty_pages
== FALSE
) {
6240 * - we only want the resident page count, and,
6241 * - the entire object is exactly covered by the request.
6243 if (offset
== 0 && (object
->vo_size
== size
)) {
6244 *resident_page_count
= object
->resident_page_count
;
6249 if (object
->resident_page_count
<= (size
>> PAGE_SHIFT
)) {
6250 vm_page_queue_iterate(&object
->memq
, p
, vmp_listq
) {
6251 if (p
->vmp_offset
>= cur_offset
&& p
->vmp_offset
< end_offset
) {
6252 local_resident_count
++;
6254 if (count_dirty_pages
) {
6255 if (p
->vmp_dirty
|| (p
->vmp_wpmapped
&& pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
6256 local_dirty_count
++;
6262 for (cur_offset
= offset
; cur_offset
< end_offset
; cur_offset
+= PAGE_SIZE_64
) {
6263 p
= vm_page_lookup(object
, cur_offset
);
6265 if (p
!= VM_PAGE_NULL
) {
6266 local_resident_count
++;
6268 if (count_dirty_pages
) {
6269 if (p
->vmp_dirty
|| (p
->vmp_wpmapped
&& pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p
)))) {
6270 local_dirty_count
++;
6277 if (resident_page_count
!= NULL
) {
6278 *resident_page_count
= local_resident_count
;
6281 if (dirty_page_count
!= NULL
) {
6282 *dirty_page_count
= local_dirty_count
;
6292 * vm_object_res_deallocate
6294 * (recursively) decrement residence counts on vm objects and their shadows.
6295 * Called from vm_object_deallocate and when swapping out an object.
6297 * The object is locked, and remains locked throughout the function,
6298 * even as we iterate down the shadow chain. Locks on intermediate objects
6299 * will be dropped, but not the original object.
6301 * NOTE: this function used to use recursion, rather than iteration.
6304 __private_extern__
void
6305 vm_object_res_deallocate(
6308 vm_object_t orig_object
= object
;
6310 * Object is locked so it can be called directly
6311 * from vm_object_deallocate. Original object is never
6314 assert(object
->res_count
> 0);
6315 while (--object
->res_count
== 0) {
6316 assert(object
->ref_count
>= object
->res_count
);
6317 vm_object_deactivate_all_pages(object
);
6318 /* iterate on shadow, if present */
6319 if (object
->shadow
!= VM_OBJECT_NULL
) {
6320 vm_object_t tmp_object
= object
->shadow
;
6321 vm_object_lock(tmp_object
);
6322 if (object
!= orig_object
) {
6323 vm_object_unlock(object
);
6325 object
= tmp_object
;
6326 assert(object
->res_count
> 0);
6331 if (object
!= orig_object
) {
6332 vm_object_unlock(object
);
6337 * vm_object_res_reference
6339 * Internal function to increment residence count on a vm object
6340 * and its shadows. It is called only from vm_object_reference, and
6341 * when swapping in a vm object, via vm_map_swap.
6343 * The object is locked, and remains locked throughout the function,
6344 * even as we iterate down the shadow chain. Locks on intermediate objects
6345 * will be dropped, but not the original object.
6347 * NOTE: this function used to use recursion, rather than iteration.
6350 __private_extern__
void
6351 vm_object_res_reference(
6354 vm_object_t orig_object
= object
;
6356 * Object is locked, so this can be called directly
6357 * from vm_object_reference. This lock is never released.
6359 while ((++object
->res_count
== 1) &&
6360 (object
->shadow
!= VM_OBJECT_NULL
)) {
6361 vm_object_t tmp_object
= object
->shadow
;
6363 assert(object
->ref_count
>= object
->res_count
);
6364 vm_object_lock(tmp_object
);
6365 if (object
!= orig_object
) {
6366 vm_object_unlock(object
);
6368 object
= tmp_object
;
6370 if (object
!= orig_object
) {
6371 vm_object_unlock(object
);
6373 assert(orig_object
->ref_count
>= orig_object
->res_count
);
6375 #endif /* TASK_SWAPPER */
6378 * vm_object_reference:
6380 * Gets another reference to the given object.
6382 #ifdef vm_object_reference
6383 #undef vm_object_reference
6385 __private_extern__
void
6386 vm_object_reference(
6389 if (object
== VM_OBJECT_NULL
) {
6393 vm_object_lock(object
);
6394 assert(object
->ref_count
> 0);
6395 vm_object_reference_locked(object
);
6396 vm_object_unlock(object
);
6400 * vm_object_transpose
6402 * This routine takes two VM objects of the same size and exchanges
6403 * their backing store.
6404 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE
6405 * and UPL_BLOCK_ACCESS if they are referenced anywhere.
6407 * The VM objects must not be locked by caller.
6409 unsigned int vm_object_transpose_count
= 0;
6411 vm_object_transpose(
6412 vm_object_t object1
,
6413 vm_object_t object2
,
6414 vm_object_size_t transpose_size
)
6416 vm_object_t tmp_object
;
6417 kern_return_t retval
;
6418 boolean_t object1_locked
, object2_locked
;
6420 vm_object_offset_t page_offset
;
6422 tmp_object
= VM_OBJECT_NULL
;
6423 object1_locked
= FALSE
; object2_locked
= FALSE
;
6425 if (object1
== object2
||
6426 object1
== VM_OBJECT_NULL
||
6427 object2
== VM_OBJECT_NULL
) {
6429 * If the 2 VM objects are the same, there's
6430 * no point in exchanging their backing store.
6432 retval
= KERN_INVALID_VALUE
;
6437 * Since we need to lock both objects at the same time,
6438 * make sure we always lock them in the same order to
6441 if (object1
> object2
) {
6442 tmp_object
= object1
;
6444 object2
= tmp_object
;
6448 * Allocate a temporary VM object to hold object1's contents
6449 * while we copy object2 to object1.
6451 tmp_object
= vm_object_allocate(transpose_size
);
6452 vm_object_lock(tmp_object
);
6453 tmp_object
->can_persist
= FALSE
;
6457 * Grab control of the 1st VM object.
6459 vm_object_lock(object1
);
6460 object1_locked
= TRUE
;
6461 if (!object1
->alive
|| object1
->terminating
||
6462 object1
->copy
|| object1
->shadow
|| object1
->shadowed
||
6463 object1
->purgable
!= VM_PURGABLE_DENY
) {
6465 * We don't deal with copy or shadow objects (yet).
6467 retval
= KERN_INVALID_VALUE
;
6471 * We're about to mess with the object's backing store and
6472 * taking a "paging_in_progress" reference wouldn't be enough
6473 * to prevent any paging activity on this object, so the caller should
6474 * have "quiesced" the objects beforehand, via a UPL operation with
6475 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired)
6476 * and UPL_BLOCK_ACCESS (to mark the pages "busy").
6478 * Wait for any paging operation to complete (but only paging, not
6479 * other kind of activities not linked to the pager). After we're
6480 * statisfied that there's no more paging in progress, we keep the
6481 * object locked, to guarantee that no one tries to access its pager.
6483 vm_object_paging_only_wait(object1
, THREAD_UNINT
);
6486 * Same as above for the 2nd object...
6488 vm_object_lock(object2
);
6489 object2_locked
= TRUE
;
6490 if (!object2
->alive
|| object2
->terminating
||
6491 object2
->copy
|| object2
->shadow
|| object2
->shadowed
||
6492 object2
->purgable
!= VM_PURGABLE_DENY
) {
6493 retval
= KERN_INVALID_VALUE
;
6496 vm_object_paging_only_wait(object2
, THREAD_UNINT
);
6499 if (object1
->vo_size
!= object2
->vo_size
||
6500 object1
->vo_size
!= transpose_size
) {
6502 * If the 2 objects don't have the same size, we can't
6503 * exchange their backing stores or one would overflow.
6504 * If their size doesn't match the caller's
6505 * "transpose_size", we can't do it either because the
6506 * transpose operation will affect the entire span of
6509 retval
= KERN_INVALID_VALUE
;
6515 * Transpose the lists of resident pages.
6516 * This also updates the resident_page_count and the memq_hint.
6518 if (object1
->phys_contiguous
|| vm_page_queue_empty(&object1
->memq
)) {
6520 * No pages in object1, just transfer pages
6521 * from object2 to object1. No need to go through
6522 * an intermediate object.
6524 while (!vm_page_queue_empty(&object2
->memq
)) {
6525 page
= (vm_page_t
) vm_page_queue_first(&object2
->memq
);
6526 vm_page_rename(page
, object1
, page
->vmp_offset
);
6528 assert(vm_page_queue_empty(&object2
->memq
));
6529 } else if (object2
->phys_contiguous
|| vm_page_queue_empty(&object2
->memq
)) {
6531 * No pages in object2, just transfer pages
6532 * from object1 to object2. No need to go through
6533 * an intermediate object.
6535 while (!vm_page_queue_empty(&object1
->memq
)) {
6536 page
= (vm_page_t
) vm_page_queue_first(&object1
->memq
);
6537 vm_page_rename(page
, object2
, page
->vmp_offset
);
6539 assert(vm_page_queue_empty(&object1
->memq
));
6541 /* transfer object1's pages to tmp_object */
6542 while (!vm_page_queue_empty(&object1
->memq
)) {
6543 page
= (vm_page_t
) vm_page_queue_first(&object1
->memq
);
6544 page_offset
= page
->vmp_offset
;
6545 vm_page_remove(page
, TRUE
);
6546 page
->vmp_offset
= page_offset
;
6547 vm_page_queue_enter(&tmp_object
->memq
, page
, vmp_listq
);
6549 assert(vm_page_queue_empty(&object1
->memq
));
6550 /* transfer object2's pages to object1 */
6551 while (!vm_page_queue_empty(&object2
->memq
)) {
6552 page
= (vm_page_t
) vm_page_queue_first(&object2
->memq
);
6553 vm_page_rename(page
, object1
, page
->vmp_offset
);
6555 assert(vm_page_queue_empty(&object2
->memq
));
6556 /* transfer tmp_object's pages to object2 */
6557 while (!vm_page_queue_empty(&tmp_object
->memq
)) {
6558 page
= (vm_page_t
) vm_page_queue_first(&tmp_object
->memq
);
6559 vm_page_queue_remove(&tmp_object
->memq
, page
, vmp_listq
);
6560 vm_page_insert(page
, object2
, page
->vmp_offset
);
6562 assert(vm_page_queue_empty(&tmp_object
->memq
));
6565 #define __TRANSPOSE_FIELD(field) \
6567 tmp_object->field = object1->field; \
6568 object1->field = object2->field; \
6569 object2->field = tmp_object->field; \
6572 /* "Lock" refers to the object not its contents */
6573 /* "size" should be identical */
6574 assert(object1
->vo_size
== object2
->vo_size
);
6575 /* "memq_hint" was updated above when transposing pages */
6576 /* "ref_count" refers to the object not its contents */
6577 assert(object1
->ref_count
>= 1);
6578 assert(object2
->ref_count
>= 1);
6580 /* "res_count" refers to the object not its contents */
6582 /* "resident_page_count" was updated above when transposing pages */
6583 /* "wired_page_count" was updated above when transposing pages */
6584 #if !VM_TAG_ACTIVE_UPDATE
6585 /* "wired_objq" was dealt with along with "wired_page_count" */
6586 #endif /* ! VM_TAG_ACTIVE_UPDATE */
6587 /* "reusable_page_count" was updated above when transposing pages */
6588 /* there should be no "copy" */
6589 assert(!object1
->copy
);
6590 assert(!object2
->copy
);
6591 /* there should be no "shadow" */
6592 assert(!object1
->shadow
);
6593 assert(!object2
->shadow
);
6594 __TRANSPOSE_FIELD(vo_shadow_offset
); /* used by phys_contiguous objects */
6595 __TRANSPOSE_FIELD(pager
);
6596 __TRANSPOSE_FIELD(paging_offset
);
6597 __TRANSPOSE_FIELD(pager_control
);
6598 /* update the memory_objects' pointers back to the VM objects */
6599 if (object1
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
6600 memory_object_control_collapse(object1
->pager_control
,
6603 if (object2
->pager_control
!= MEMORY_OBJECT_CONTROL_NULL
) {
6604 memory_object_control_collapse(object2
->pager_control
,
6607 __TRANSPOSE_FIELD(copy_strategy
);
6608 /* "paging_in_progress" refers to the object not its contents */
6609 assert(!object1
->paging_in_progress
);
6610 assert(!object2
->paging_in_progress
);
6611 assert(object1
->activity_in_progress
);
6612 assert(object2
->activity_in_progress
);
6613 /* "all_wanted" refers to the object not its contents */
6614 __TRANSPOSE_FIELD(pager_created
);
6615 __TRANSPOSE_FIELD(pager_initialized
);
6616 __TRANSPOSE_FIELD(pager_ready
);
6617 __TRANSPOSE_FIELD(pager_trusted
);
6618 __TRANSPOSE_FIELD(can_persist
);
6619 __TRANSPOSE_FIELD(internal
);
6620 __TRANSPOSE_FIELD(private);
6621 __TRANSPOSE_FIELD(pageout
);
6622 /* "alive" should be set */
6623 assert(object1
->alive
);
6624 assert(object2
->alive
);
6625 /* "purgeable" should be non-purgeable */
6626 assert(object1
->purgable
== VM_PURGABLE_DENY
);
6627 assert(object2
->purgable
== VM_PURGABLE_DENY
);
6628 /* "shadowed" refers to the the object not its contents */
6629 __TRANSPOSE_FIELD(purgeable_when_ripe
);
6630 __TRANSPOSE_FIELD(true_share
);
6631 /* "terminating" should not be set */
6632 assert(!object1
->terminating
);
6633 assert(!object2
->terminating
);
6634 /* transfer "named" reference if needed */
6635 if (object1
->named
&& !object2
->named
) {
6636 assert(object1
->ref_count
>= 2);
6637 assert(object2
->ref_count
>= 1);
6638 object1
->ref_count
--;
6639 object2
->ref_count
++;
6640 } else if (!object1
->named
&& object2
->named
) {
6641 assert(object1
->ref_count
>= 1);
6642 assert(object2
->ref_count
>= 2);
6643 object1
->ref_count
++;
6644 object2
->ref_count
--;
6646 __TRANSPOSE_FIELD(named
);
6647 /* "shadow_severed" refers to the object not its contents */
6648 __TRANSPOSE_FIELD(phys_contiguous
);
6649 __TRANSPOSE_FIELD(nophyscache
);
6650 /* "cached_list.next" points to transposed object */
6651 object1
->cached_list
.next
= (queue_entry_t
) object2
;
6652 object2
->cached_list
.next
= (queue_entry_t
) object1
;
6653 /* "cached_list.prev" should be NULL */
6654 assert(object1
->cached_list
.prev
== NULL
);
6655 assert(object2
->cached_list
.prev
== NULL
);
6656 __TRANSPOSE_FIELD(last_alloc
);
6657 __TRANSPOSE_FIELD(sequential
);
6658 __TRANSPOSE_FIELD(pages_created
);
6659 __TRANSPOSE_FIELD(pages_used
);
6660 __TRANSPOSE_FIELD(scan_collisions
);
6661 __TRANSPOSE_FIELD(cow_hint
);
6662 __TRANSPOSE_FIELD(wimg_bits
);
6663 __TRANSPOSE_FIELD(set_cache_attr
);
6664 __TRANSPOSE_FIELD(code_signed
);
6665 object1
->transposed
= TRUE
;
6666 object2
->transposed
= TRUE
;
6667 __TRANSPOSE_FIELD(mapping_in_progress
);
6668 __TRANSPOSE_FIELD(volatile_empty
);
6669 __TRANSPOSE_FIELD(volatile_fault
);
6670 __TRANSPOSE_FIELD(all_reusable
);
6671 assert(object1
->blocked_access
);
6672 assert(object2
->blocked_access
);
6673 __TRANSPOSE_FIELD(set_cache_attr
);
6674 assert(!object1
->object_is_shared_cache
);
6675 assert(!object2
->object_is_shared_cache
);
6676 /* ignore purgeable_queue_type and purgeable_queue_group */
6677 assert(!object1
->io_tracking
);
6678 assert(!object2
->io_tracking
);
6679 #if VM_OBJECT_ACCESS_TRACKING
6680 assert(!object1
->access_tracking
);
6681 assert(!object2
->access_tracking
);
6682 #endif /* VM_OBJECT_ACCESS_TRACKING */
6683 __TRANSPOSE_FIELD(no_tag_update
);
6684 #if CONFIG_SECLUDED_MEMORY
6685 assert(!object1
->eligible_for_secluded
);
6686 assert(!object2
->eligible_for_secluded
);
6687 assert(!object1
->can_grab_secluded
);
6688 assert(!object2
->can_grab_secluded
);
6689 #else /* CONFIG_SECLUDED_MEMORY */
6690 assert(object1
->__object3_unused_bits
== 0);
6691 assert(object2
->__object3_unused_bits
== 0);
6692 #endif /* CONFIG_SECLUDED_MEMORY */
6694 /* "uplq" refers to the object not its contents (see upl_transpose()) */
6696 assert((object1
->purgable
== VM_PURGABLE_DENY
) || (object1
->objq
.next
== NULL
));
6697 assert((object1
->purgable
== VM_PURGABLE_DENY
) || (object1
->objq
.prev
== NULL
));
6698 assert((object2
->purgable
== VM_PURGABLE_DENY
) || (object2
->objq
.next
== NULL
));
6699 assert((object2
->purgable
== VM_PURGABLE_DENY
) || (object2
->objq
.prev
== NULL
));
6701 #undef __TRANSPOSE_FIELD
6703 retval
= KERN_SUCCESS
;
6709 if (tmp_object
!= VM_OBJECT_NULL
) {
6710 vm_object_unlock(tmp_object
);
6712 * Re-initialize the temporary object to avoid
6713 * deallocating a real pager.
6715 _vm_object_allocate(transpose_size
, tmp_object
);
6716 vm_object_deallocate(tmp_object
);
6717 tmp_object
= VM_OBJECT_NULL
;
6720 if (object1_locked
) {
6721 vm_object_unlock(object1
);
6722 object1_locked
= FALSE
;
6724 if (object2_locked
) {
6725 vm_object_unlock(object2
);
6726 object2_locked
= FALSE
;
6729 vm_object_transpose_count
++;
6736 * vm_object_cluster_size
6738 * Determine how big a cluster we should issue an I/O for...
6740 * Inputs: *start == offset of page needed
6741 * *length == maximum cluster pager can handle
6742 * Outputs: *start == beginning offset of cluster
6743 * *length == length of cluster to try
6745 * The original *start will be encompassed by the cluster
6748 extern int speculative_reads_disabled
;
6751 * Try to always keep these values an even multiple of PAGE_SIZE. We use these values
6752 * to derive min_ph_bytes and max_ph_bytes (IMP: bytes not # of pages) and expect those values to
6753 * always be page-aligned. The derivation could involve operations (e.g. division)
6754 * that could give us non-page-size aligned values if we start out with values that
6755 * are odd multiples of PAGE_SIZE.
6758 unsigned int preheat_max_bytes
= (1024 * 512);
6759 #else /* CONFIG_EMBEDDED */
6760 unsigned int preheat_max_bytes
= MAX_UPL_TRANSFER_BYTES
;
6761 #endif /* CONFIG_EMBEDDED */
6762 unsigned int preheat_min_bytes
= (1024 * 32);
6765 __private_extern__
void
6766 vm_object_cluster_size(vm_object_t object
, vm_object_offset_t
*start
,
6767 vm_size_t
*length
, vm_object_fault_info_t fault_info
, uint32_t *io_streaming
)
6769 vm_size_t pre_heat_size
;
6770 vm_size_t tail_size
;
6771 vm_size_t head_size
;
6772 vm_size_t max_length
;
6773 vm_size_t cluster_size
;
6774 vm_object_offset_t object_size
;
6775 vm_object_offset_t orig_start
;
6776 vm_object_offset_t target_start
;
6777 vm_object_offset_t offset
;
6778 vm_behavior_t behavior
;
6779 boolean_t look_behind
= TRUE
;
6780 boolean_t look_ahead
= TRUE
;
6781 boolean_t isSSD
= FALSE
;
6782 uint32_t throttle_limit
;
6784 int sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
6785 vm_size_t max_ph_size
;
6786 vm_size_t min_ph_size
;
6788 assert( !(*length
& PAGE_MASK
));
6789 assert( !(*start
& PAGE_MASK_64
));
6792 * remember maxiumum length of run requested
6794 max_length
= *length
;
6796 * we'll always return a cluster size of at least
6797 * 1 page, since the original fault must always
6800 *length
= PAGE_SIZE
;
6803 if (speculative_reads_disabled
|| fault_info
== NULL
) {
6805 * no cluster... just fault the page in
6809 orig_start
= *start
;
6810 target_start
= orig_start
;
6811 cluster_size
= round_page(fault_info
->cluster_size
);
6812 behavior
= fault_info
->behavior
;
6814 vm_object_lock(object
);
6816 if (object
->pager
== MEMORY_OBJECT_NULL
) {
6817 goto out
; /* pager is gone for this object, nothing more to do */
6819 vnode_pager_get_isSSD(object
->pager
, &isSSD
);
6821 min_ph_size
= round_page(preheat_min_bytes
);
6822 max_ph_size
= round_page(preheat_max_bytes
);
6824 #if !CONFIG_EMBEDDED
6829 if (min_ph_size
& PAGE_MASK_64
) {
6830 min_ph_size
= trunc_page(min_ph_size
);
6833 if (max_ph_size
& PAGE_MASK_64
) {
6834 max_ph_size
= trunc_page(max_ph_size
);
6837 #endif /* !CONFIG_EMBEDDED */
6839 if (min_ph_size
< PAGE_SIZE
) {
6840 min_ph_size
= PAGE_SIZE
;
6843 if (max_ph_size
< PAGE_SIZE
) {
6844 max_ph_size
= PAGE_SIZE
;
6845 } else if (max_ph_size
> MAX_UPL_TRANSFER_BYTES
) {
6846 max_ph_size
= MAX_UPL_TRANSFER_BYTES
;
6849 if (max_length
> max_ph_size
) {
6850 max_length
= max_ph_size
;
6853 if (max_length
<= PAGE_SIZE
) {
6857 if (object
->internal
) {
6858 object_size
= object
->vo_size
;
6860 vnode_pager_get_object_size(object
->pager
, &object_size
);
6863 object_size
= round_page_64(object_size
);
6865 if (orig_start
>= object_size
) {
6867 * fault occurred beyond the EOF...
6868 * we need to punt w/o changing the
6873 if (object
->pages_used
> object
->pages_created
) {
6875 * must have wrapped our 32 bit counters
6878 object
->pages_used
= object
->pages_created
= 0;
6880 if ((sequential_run
= object
->sequential
)) {
6881 if (sequential_run
< 0) {
6882 sequential_behavior
= VM_BEHAVIOR_RSEQNTL
;
6883 sequential_run
= 0 - sequential_run
;
6885 sequential_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
6890 behavior
= VM_BEHAVIOR_DEFAULT
;
6893 case VM_BEHAVIOR_DEFAULT
:
6894 if (object
->internal
&& fault_info
->user_tag
== VM_MEMORY_STACK
) {
6898 if (sequential_run
>= (3 * PAGE_SIZE
)) {
6899 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6901 if (sequential_behavior
== VM_BEHAVIOR_SEQUENTIAL
) {
6902 look_behind
= FALSE
;
6909 if (object
->pages_created
< (20 * (min_ph_size
>> PAGE_SHIFT
))) {
6913 pre_heat_size
= min_ph_size
;
6916 * Linear growth in PH size: The maximum size is max_length...
6917 * this cacluation will result in a size that is neither a
6918 * power of 2 nor a multiple of PAGE_SIZE... so round
6919 * it up to the nearest PAGE_SIZE boundary
6921 pre_heat_size
= (max_length
* (uint64_t)object
->pages_used
) / object
->pages_created
;
6923 if (pre_heat_size
< min_ph_size
) {
6924 pre_heat_size
= min_ph_size
;
6926 pre_heat_size
= round_page(pre_heat_size
);
6932 case VM_BEHAVIOR_RANDOM
:
6933 if ((pre_heat_size
= cluster_size
) <= PAGE_SIZE
) {
6938 case VM_BEHAVIOR_SEQUENTIAL
:
6939 if ((pre_heat_size
= cluster_size
) == 0) {
6940 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6942 look_behind
= FALSE
;
6947 case VM_BEHAVIOR_RSEQNTL
:
6948 if ((pre_heat_size
= cluster_size
) == 0) {
6949 pre_heat_size
= sequential_run
+ PAGE_SIZE
;
6956 throttle_limit
= (uint32_t) max_length
;
6957 assert(throttle_limit
== max_length
);
6959 if (vnode_pager_get_throttle_io_limit(object
->pager
, &throttle_limit
) == KERN_SUCCESS
) {
6960 if (max_length
> throttle_limit
) {
6961 max_length
= throttle_limit
;
6964 if (pre_heat_size
> max_length
) {
6965 pre_heat_size
= max_length
;
6968 if (behavior
== VM_BEHAVIOR_DEFAULT
&& (pre_heat_size
> min_ph_size
)) {
6969 unsigned int consider_free
= vm_page_free_count
+ vm_page_cleaned_count
;
6971 if (consider_free
< vm_page_throttle_limit
) {
6972 pre_heat_size
= trunc_page(pre_heat_size
/ 16);
6973 } else if (consider_free
< vm_page_free_target
) {
6974 pre_heat_size
= trunc_page(pre_heat_size
/ 4);
6977 if (pre_heat_size
< min_ph_size
) {
6978 pre_heat_size
= min_ph_size
;
6981 if (look_ahead
== TRUE
) {
6982 if (look_behind
== TRUE
) {
6984 * if we get here its due to a random access...
6985 * so we want to center the original fault address
6986 * within the cluster we will issue... make sure
6987 * to calculate 'head_size' as a multiple of PAGE_SIZE...
6988 * 'pre_heat_size' is a multiple of PAGE_SIZE but not
6989 * necessarily an even number of pages so we need to truncate
6990 * the result to a PAGE_SIZE boundary
6992 head_size
= trunc_page(pre_heat_size
/ 2);
6994 if (target_start
> head_size
) {
6995 target_start
-= head_size
;
7001 * 'target_start' at this point represents the beginning offset
7002 * of the cluster we are considering... 'orig_start' will be in
7003 * the center of this cluster if we didn't have to clip the start
7004 * due to running into the start of the file
7007 if ((target_start
+ pre_heat_size
) > object_size
) {
7008 pre_heat_size
= (vm_size_t
)(round_page_64(object_size
- target_start
));
7011 * at this point caclulate the number of pages beyond the original fault
7012 * address that we want to consider... this is guaranteed not to extend beyond
7013 * the current EOF...
7015 assert((vm_size_t
)(orig_start
- target_start
) == (orig_start
- target_start
));
7016 tail_size
= pre_heat_size
- (vm_size_t
)(orig_start
- target_start
) - PAGE_SIZE
;
7018 if (pre_heat_size
> target_start
) {
7020 * since pre_heat_size is always smaller then 2^32,
7021 * if it is larger then target_start (a 64 bit value)
7022 * it is safe to clip target_start to 32 bits
7024 pre_heat_size
= (vm_size_t
) target_start
;
7028 assert( !(target_start
& PAGE_MASK_64
));
7029 assert( !(pre_heat_size
& PAGE_MASK_64
));
7031 if (pre_heat_size
<= PAGE_SIZE
) {
7035 if (look_behind
== TRUE
) {
7037 * take a look at the pages before the original
7038 * faulting offset... recalculate this in case
7039 * we had to clip 'pre_heat_size' above to keep
7040 * from running past the EOF.
7042 head_size
= pre_heat_size
- tail_size
- PAGE_SIZE
;
7044 for (offset
= orig_start
- PAGE_SIZE_64
; head_size
; offset
-= PAGE_SIZE_64
, head_size
-= PAGE_SIZE
) {
7046 * don't poke below the lowest offset
7048 if (offset
< fault_info
->lo_offset
) {
7052 * for external objects or internal objects w/o a pager,
7053 * VM_COMPRESSOR_PAGER_STATE_GET will return VM_EXTERNAL_STATE_UNKNOWN
7055 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
7058 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
7060 * don't bridge resident pages
7065 *length
+= PAGE_SIZE
;
7068 if (look_ahead
== TRUE
) {
7069 for (offset
= orig_start
+ PAGE_SIZE_64
; tail_size
; offset
+= PAGE_SIZE_64
, tail_size
-= PAGE_SIZE
) {
7071 * don't poke above the highest offset
7073 if (offset
>= fault_info
->hi_offset
) {
7076 assert(offset
< object_size
);
7079 * for external objects or internal objects w/o a pager,
7080 * VM_COMPRESSOR_PAGER_STATE_GET will return VM_EXTERNAL_STATE_UNKNOWN
7082 if (VM_COMPRESSOR_PAGER_STATE_GET(object
, offset
) == VM_EXTERNAL_STATE_ABSENT
) {
7085 if (vm_page_lookup(object
, offset
) != VM_PAGE_NULL
) {
7087 * don't bridge resident pages
7091 *length
+= PAGE_SIZE
;
7095 if (*length
> max_length
) {
7096 *length
= max_length
;
7099 vm_object_unlock(object
);
7101 DTRACE_VM1(clustersize
, vm_size_t
, *length
);
7106 * Allow manipulation of individual page state. This is actually part of
7107 * the UPL regimen but takes place on the VM object rather than on a UPL
7113 vm_object_offset_t offset
,
7115 ppnum_t
*phys_entry
,
7120 vm_object_lock(object
);
7122 if (ops
& UPL_POP_PHYSICAL
) {
7123 if (object
->phys_contiguous
) {
7125 *phys_entry
= (ppnum_t
)
7126 (object
->vo_shadow_offset
>> PAGE_SHIFT
);
7128 vm_object_unlock(object
);
7129 return KERN_SUCCESS
;
7131 vm_object_unlock(object
);
7132 return KERN_INVALID_OBJECT
;
7135 if (object
->phys_contiguous
) {
7136 vm_object_unlock(object
);
7137 return KERN_INVALID_OBJECT
;
7141 if ((dst_page
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
) {
7142 vm_object_unlock(object
);
7143 return KERN_FAILURE
;
7146 /* Sync up on getting the busy bit */
7147 if ((dst_page
->vmp_busy
|| dst_page
->vmp_cleaning
) &&
7148 (((ops
& UPL_POP_SET
) &&
7149 (ops
& UPL_POP_BUSY
)) || (ops
& UPL_POP_DUMP
))) {
7150 /* someone else is playing with the page, we will */
7152 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
7156 if (ops
& UPL_POP_DUMP
) {
7157 if (dst_page
->vmp_pmapped
== TRUE
) {
7158 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page
));
7161 VM_PAGE_FREE(dst_page
);
7168 /* Get the condition of flags before requested ops */
7169 /* are undertaken */
7171 if (dst_page
->vmp_dirty
) {
7172 *flags
|= UPL_POP_DIRTY
;
7174 if (dst_page
->vmp_free_when_done
) {
7175 *flags
|= UPL_POP_PAGEOUT
;
7177 if (dst_page
->vmp_precious
) {
7178 *flags
|= UPL_POP_PRECIOUS
;
7180 if (dst_page
->vmp_absent
) {
7181 *flags
|= UPL_POP_ABSENT
;
7183 if (dst_page
->vmp_busy
) {
7184 *flags
|= UPL_POP_BUSY
;
7188 /* The caller should have made a call either contingent with */
7189 /* or prior to this call to set UPL_POP_BUSY */
7190 if (ops
& UPL_POP_SET
) {
7191 /* The protection granted with this assert will */
7192 /* not be complete. If the caller violates the */
7193 /* convention and attempts to change page state */
7194 /* without first setting busy we may not see it */
7195 /* because the page may already be busy. However */
7196 /* if such violations occur we will assert sooner */
7198 assert(dst_page
->vmp_busy
|| (ops
& UPL_POP_BUSY
));
7199 if (ops
& UPL_POP_DIRTY
) {
7200 SET_PAGE_DIRTY(dst_page
, FALSE
);
7202 if (ops
& UPL_POP_PAGEOUT
) {
7203 dst_page
->vmp_free_when_done
= TRUE
;
7205 if (ops
& UPL_POP_PRECIOUS
) {
7206 dst_page
->vmp_precious
= TRUE
;
7208 if (ops
& UPL_POP_ABSENT
) {
7209 dst_page
->vmp_absent
= TRUE
;
7211 if (ops
& UPL_POP_BUSY
) {
7212 dst_page
->vmp_busy
= TRUE
;
7216 if (ops
& UPL_POP_CLR
) {
7217 assert(dst_page
->vmp_busy
);
7218 if (ops
& UPL_POP_DIRTY
) {
7219 dst_page
->vmp_dirty
= FALSE
;
7221 if (ops
& UPL_POP_PAGEOUT
) {
7222 dst_page
->vmp_free_when_done
= FALSE
;
7224 if (ops
& UPL_POP_PRECIOUS
) {
7225 dst_page
->vmp_precious
= FALSE
;
7227 if (ops
& UPL_POP_ABSENT
) {
7228 dst_page
->vmp_absent
= FALSE
;
7230 if (ops
& UPL_POP_BUSY
) {
7231 dst_page
->vmp_busy
= FALSE
;
7232 PAGE_WAKEUP(dst_page
);
7237 * The physical page number will remain valid
7238 * only if the page is kept busy.
7240 assert(dst_page
->vmp_busy
);
7241 *phys_entry
= VM_PAGE_GET_PHYS_PAGE(dst_page
);
7247 vm_object_unlock(object
);
7248 return KERN_SUCCESS
;
7252 * vm_object_range_op offers performance enhancement over
7253 * vm_object_page_op for page_op functions which do not require page
7254 * level state to be returned from the call. Page_op was created to provide
7255 * a low-cost alternative to page manipulation via UPLs when only a single
7256 * page was involved. The range_op call establishes the ability in the _op
7257 * family of functions to work on multiple pages where the lack of page level
7258 * state handling allows the caller to avoid the overhead of the upl structures.
7264 vm_object_offset_t offset_beg
,
7265 vm_object_offset_t offset_end
,
7269 vm_object_offset_t offset
;
7272 if (offset_end
- offset_beg
> (uint32_t) -1) {
7273 /* range is too big and would overflow "*range" */
7274 return KERN_INVALID_ARGUMENT
;
7276 if (object
->resident_page_count
== 0) {
7278 if (ops
& UPL_ROP_PRESENT
) {
7281 *range
= (uint32_t) (offset_end
- offset_beg
);
7282 assert(*range
== (offset_end
- offset_beg
));
7285 return KERN_SUCCESS
;
7287 vm_object_lock(object
);
7289 if (object
->phys_contiguous
) {
7290 vm_object_unlock(object
);
7291 return KERN_INVALID_OBJECT
;
7294 offset
= offset_beg
& ~PAGE_MASK_64
;
7296 while (offset
< offset_end
) {
7297 dst_page
= vm_page_lookup(object
, offset
);
7298 if (dst_page
!= VM_PAGE_NULL
) {
7299 if (ops
& UPL_ROP_DUMP
) {
7300 if (dst_page
->vmp_busy
|| dst_page
->vmp_cleaning
) {
7302 * someone else is playing with the
7303 * page, we will have to wait
7305 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
7307 * need to relook the page up since it's
7308 * state may have changed while we slept
7309 * it might even belong to a different object
7314 if (dst_page
->vmp_laundry
) {
7315 vm_pageout_steal_laundry(dst_page
, FALSE
);
7318 if (dst_page
->vmp_pmapped
== TRUE
) {
7319 pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(dst_page
));
7322 VM_PAGE_FREE(dst_page
);
7323 } else if ((ops
& UPL_ROP_ABSENT
)
7324 && (!dst_page
->vmp_absent
|| dst_page
->vmp_busy
)) {
7327 } else if (ops
& UPL_ROP_PRESENT
) {
7331 offset
+= PAGE_SIZE
;
7333 vm_object_unlock(object
);
7336 if (offset
> offset_end
) {
7337 offset
= offset_end
;
7339 if (offset
> offset_beg
) {
7340 *range
= (uint32_t) (offset
- offset_beg
);
7341 assert(*range
== (offset
- offset_beg
));
7346 return KERN_SUCCESS
;
7350 * Used to point a pager directly to a range of memory (when the pager may be associated
7351 * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently
7352 * expect that the virtual address will denote the start of a range that is physically contiguous.
7355 pager_map_to_phys_contiguous(
7356 memory_object_control_t object
,
7357 memory_object_offset_t offset
,
7358 addr64_t base_vaddr
,
7362 boolean_t clobbered_private
;
7363 kern_return_t retval
;
7364 vm_object_t pager_object
;
7366 page_num
= pmap_find_phys(kernel_pmap
, base_vaddr
);
7369 retval
= KERN_FAILURE
;
7373 pager_object
= memory_object_control_to_vm_object(object
);
7375 if (!pager_object
) {
7376 retval
= KERN_FAILURE
;
7380 clobbered_private
= pager_object
->private;
7381 if (pager_object
->private != TRUE
) {
7382 vm_object_lock(pager_object
);
7383 pager_object
->private = TRUE
;
7384 vm_object_unlock(pager_object
);
7386 retval
= vm_object_populate_with_private(pager_object
, offset
, page_num
, size
);
7388 if (retval
!= KERN_SUCCESS
) {
7389 if (pager_object
->private != clobbered_private
) {
7390 vm_object_lock(pager_object
);
7391 pager_object
->private = clobbered_private
;
7392 vm_object_unlock(pager_object
);
7400 uint32_t scan_object_collision
= 0;
7403 vm_object_lock(vm_object_t object
)
7405 if (object
== vm_pageout_scan_wants_object
) {
7406 scan_object_collision
++;
7409 DTRACE_VM(vm_object_lock_w
);
7410 lck_rw_lock_exclusive(&object
->Lock
);
7411 #if DEVELOPMENT || DEBUG
7412 object
->Lock_owner
= current_thread();
7417 vm_object_lock_avoid(vm_object_t object
)
7419 if (object
== vm_pageout_scan_wants_object
) {
7420 scan_object_collision
++;
7427 _vm_object_lock_try(vm_object_t object
)
7431 retval
= lck_rw_try_lock_exclusive(&object
->Lock
);
7432 #if DEVELOPMENT || DEBUG
7433 if (retval
== TRUE
) {
7434 DTRACE_VM(vm_object_lock_w
);
7435 object
->Lock_owner
= current_thread();
7442 vm_object_lock_try(vm_object_t object
)
7445 * Called from hibernate path so check before blocking.
7447 if (vm_object_lock_avoid(object
) && ml_get_interrupts_enabled() && get_preemption_level() == 0) {
7450 return _vm_object_lock_try(object
);
7454 * Lock the object exclusive.
7456 * Returns true iff the thread had to spin or block before
7457 * acquiring the lock.
7460 vm_object_lock_check_contended(vm_object_t object
)
7463 if (object
== vm_pageout_scan_wants_object
) {
7464 scan_object_collision
++;
7467 DTRACE_VM(vm_object_lock_w
);
7468 contended
= lck_rw_lock_exclusive_check_contended(&object
->Lock
);
7469 #if DEVELOPMENT || DEBUG
7470 object
->Lock_owner
= current_thread();
7476 vm_object_lock_shared(vm_object_t object
)
7478 if (vm_object_lock_avoid(object
)) {
7481 DTRACE_VM(vm_object_lock_r
);
7482 lck_rw_lock_shared(&object
->Lock
);
7486 vm_object_lock_yield_shared(vm_object_t object
)
7488 boolean_t retval
= FALSE
, force_yield
= FALSE
;;
7490 vm_object_lock_assert_shared(object
);
7492 force_yield
= vm_object_lock_avoid(object
);
7494 retval
= lck_rw_lock_yield_shared(&object
->Lock
, force_yield
);
7496 DTRACE_VM(vm_object_lock_yield
);
7503 vm_object_lock_try_shared(vm_object_t object
)
7507 if (vm_object_lock_avoid(object
)) {
7510 retval
= lck_rw_try_lock_shared(&object
->Lock
);
7512 DTRACE_VM(vm_object_lock_r
);
7518 vm_object_lock_upgrade(vm_object_t object
)
7522 retval
= lck_rw_lock_shared_to_exclusive(&object
->Lock
);
7523 #if DEVELOPMENT || DEBUG
7524 if (retval
== TRUE
) {
7525 DTRACE_VM(vm_object_lock_w
);
7526 object
->Lock_owner
= current_thread();
7533 vm_object_unlock(vm_object_t object
)
7535 #if DEVELOPMENT || DEBUG
7536 if (object
->Lock_owner
) {
7537 if (object
->Lock_owner
!= current_thread()) {
7538 panic("vm_object_unlock: not owner - %p\n", object
);
7540 object
->Lock_owner
= 0;
7541 DTRACE_VM(vm_object_unlock
);
7544 lck_rw_done(&object
->Lock
);
7548 unsigned int vm_object_change_wimg_mode_count
= 0;
7551 * The object must be locked
7554 vm_object_change_wimg_mode(vm_object_t object
, unsigned int wimg_mode
)
7558 vm_object_lock_assert_exclusive(object
);
7560 vm_object_paging_wait(object
, THREAD_UNINT
);
7562 vm_page_queue_iterate(&object
->memq
, p
, vmp_listq
) {
7563 if (!p
->vmp_fictitious
) {
7564 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(p
), wimg_mode
);
7567 if (wimg_mode
== VM_WIMG_USE_DEFAULT
) {
7568 object
->set_cache_attr
= FALSE
;
7570 object
->set_cache_attr
= TRUE
;
7573 object
->wimg_bits
= wimg_mode
;
7575 vm_object_change_wimg_mode_count
++;
7580 extern struct freezer_context freezer_context_global
;
7583 * This routine does the "relocation" of previously
7584 * compressed pages belonging to this object that are
7585 * residing in a number of compressed segments into
7586 * a set of compressed segments dedicated to hold
7587 * compressed pages belonging to this object.
7590 extern AbsoluteTime c_freezer_last_yield_ts
;
7592 #define MAX_FREE_BATCH 32
7593 #define FREEZER_DUTY_CYCLE_ON_MS 5
7594 #define FREEZER_DUTY_CYCLE_OFF_MS 5
7596 static int c_freezer_should_yield(void);
7600 c_freezer_should_yield()
7602 AbsoluteTime cur_time
;
7605 assert(c_freezer_last_yield_ts
);
7606 clock_get_uptime(&cur_time
);
7608 SUB_ABSOLUTETIME(&cur_time
, &c_freezer_last_yield_ts
);
7609 absolutetime_to_nanoseconds(cur_time
, &nsecs
);
7611 if (nsecs
> 1000 * 1000 * FREEZER_DUTY_CYCLE_ON_MS
) {
7619 vm_object_compressed_freezer_done()
7621 vm_compressor_finished_filling( &(freezer_context_global
.freezer_ctx_chead
));
7626 vm_object_compressed_freezer_pageout(
7627 vm_object_t object
, uint32_t dirty_budget
)
7630 vm_page_t local_freeq
= NULL
;
7631 int local_freed
= 0;
7632 kern_return_t retval
= KERN_SUCCESS
;
7633 int obj_resident_page_count_snapshot
= 0;
7634 uint32_t paged_out_count
= 0;
7636 assert(object
!= VM_OBJECT_NULL
);
7637 assert(object
->internal
);
7639 vm_object_lock(object
);
7641 if (!object
->pager_initialized
|| object
->pager
== MEMORY_OBJECT_NULL
) {
7642 if (!object
->pager_initialized
) {
7643 vm_object_collapse(object
, (vm_object_offset_t
) 0, TRUE
);
7645 if (!object
->pager_initialized
) {
7646 vm_object_compressor_pager_create(object
);
7650 if (!object
->pager_initialized
|| object
->pager
== MEMORY_OBJECT_NULL
) {
7651 vm_object_unlock(object
);
7652 return paged_out_count
;
7657 * We could be freezing a shared internal object that might
7658 * be part of some other thread's current VM operations.
7659 * We skip it if there's a paging-in-progress or activity-in-progress
7660 * because we could be here a long time with the map lock held.
7662 * Note: We are holding the map locked while we wait.
7663 * This is fine in the freezer path because the task
7664 * is suspended and so this latency is acceptable.
7666 if (object
->paging_in_progress
|| object
->activity_in_progress
) {
7667 vm_object_unlock(object
);
7668 return paged_out_count
;
7671 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
7672 vm_object_offset_t curr_offset
= 0;
7675 * Go through the object and make sure that any
7676 * previously compressed pages are relocated into
7677 * a compressed segment associated with our "freezer_chead".
7679 while (curr_offset
< object
->vo_size
) {
7680 curr_offset
= vm_compressor_pager_next_compressed(object
->pager
, curr_offset
);
7682 if (curr_offset
== (vm_object_offset_t
) -1) {
7686 retval
= vm_compressor_pager_relocate(object
->pager
, curr_offset
, &(freezer_context_global
.freezer_ctx_chead
));
7688 if (retval
!= KERN_SUCCESS
) {
7692 curr_offset
+= PAGE_SIZE_64
;
7697 * We can't hold the object lock while heading down into the compressed pager
7698 * layer because we might need the kernel map lock down there to allocate new
7699 * compressor data structures. And if this same object is mapped in the kernel
7700 * and there's a fault on it, then that thread will want the object lock while
7701 * holding the kernel map lock.
7703 * Since we are going to drop/grab the object lock repeatedly, we must make sure
7704 * we won't be stuck in an infinite loop if the same page(s) keep getting
7705 * decompressed. So we grab a snapshot of the number of pages in the object and
7706 * we won't process any more than that number of pages.
7709 obj_resident_page_count_snapshot
= object
->resident_page_count
;
7711 vm_object_activity_begin(object
);
7713 while ((obj_resident_page_count_snapshot
--) && !vm_page_queue_empty(&object
->memq
) && paged_out_count
< dirty_budget
) {
7714 p
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
7716 KERNEL_DEBUG(0xe0430004 | DBG_FUNC_START
, object
, local_freed
, 0, 0, 0);
7718 vm_page_lockspin_queues();
7720 if (p
->vmp_cleaning
|| p
->vmp_fictitious
|| p
->vmp_busy
|| p
->vmp_absent
|| p
->vmp_unusual
|| p
->vmp_error
|| VM_PAGE_WIRED(p
)) {
7721 vm_page_unlock_queues();
7723 KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END
, object
, local_freed
, 1, 0, 0);
7725 vm_page_queue_remove(&object
->memq
, p
, vmp_listq
);
7726 vm_page_queue_enter(&object
->memq
, p
, vmp_listq
);
7731 if (p
->vmp_pmapped
== TRUE
) {
7732 int refmod_state
, pmap_flags
;
7734 if (p
->vmp_dirty
|| p
->vmp_precious
) {
7735 pmap_flags
= PMAP_OPTIONS_COMPRESSOR
;
7737 pmap_flags
= PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
7740 refmod_state
= pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(p
), pmap_flags
, NULL
);
7741 if (refmod_state
& VM_MEM_MODIFIED
) {
7742 SET_PAGE_DIRTY(p
, FALSE
);
7746 if (p
->vmp_dirty
== FALSE
&& p
->vmp_precious
== FALSE
) {
7748 * Clean and non-precious page.
7750 vm_page_unlock_queues();
7753 KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END
, object
, local_freed
, 2, 0, 0);
7757 if (p
->vmp_laundry
) {
7758 vm_pageout_steal_laundry(p
, TRUE
);
7761 vm_page_queues_remove(p
, TRUE
);
7763 vm_page_unlock_queues();
7767 * In case the compressor fails to compress this page, we need it at
7768 * the back of the object memq so that we don't keep trying to process it.
7769 * Make the move here while we have the object lock held.
7772 vm_page_queue_remove(&object
->memq
, p
, vmp_listq
);
7773 vm_page_queue_enter(&object
->memq
, p
, vmp_listq
);
7776 * Grab an activity_in_progress here for vm_pageout_compress_page() to consume.
7778 * Mark the page busy so no one messes with it while we have the object lock dropped.
7782 vm_object_activity_begin(object
);
7784 vm_object_unlock(object
);
7786 if (vm_pageout_compress_page(&(freezer_context_global
.freezer_ctx_chead
),
7787 (freezer_context_global
.freezer_ctx_compressor_scratch_buf
),
7788 p
) == KERN_SUCCESS
) {
7790 * page has already been un-tabled from the object via 'vm_page_remove'
7792 p
->vmp_snext
= local_freeq
;
7797 if (local_freed
>= MAX_FREE_BATCH
) {
7798 OSAddAtomic64(local_freed
, &vm_pageout_vminfo
.vm_pageout_compressions
);
7800 vm_page_free_list(local_freeq
, TRUE
);
7805 freezer_context_global
.freezer_ctx_uncompressed_pages
++;
7807 KERNEL_DEBUG(0xe0430004 | DBG_FUNC_END
, object
, local_freed
, 0, 0, 0);
7809 if (local_freed
== 0 && c_freezer_should_yield()) {
7810 thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS
);
7811 clock_get_uptime(&c_freezer_last_yield_ts
);
7814 vm_object_lock(object
);
7818 OSAddAtomic64(local_freed
, &vm_pageout_vminfo
.vm_pageout_compressions
);
7820 vm_page_free_list(local_freeq
, TRUE
);
7826 vm_object_activity_end(object
);
7828 vm_object_unlock(object
);
7830 if (c_freezer_should_yield()) {
7831 thread_yield_internal(FREEZER_DUTY_CYCLE_OFF_MS
);
7832 clock_get_uptime(&c_freezer_last_yield_ts
);
7834 return paged_out_count
;
7837 #endif /* CONFIG_FREEZE */
7845 struct vm_pageout_queue
*iq
;
7847 if (!VM_CONFIG_COMPRESSOR_IS_PRESENT
) {
7851 iq
= &vm_pageout_queue_internal
;
7853 assert(object
!= VM_OBJECT_NULL
);
7855 vm_object_lock(object
);
7857 if (!object
->internal
||
7858 object
->terminating
||
7860 vm_object_unlock(object
);
7864 if (!object
->pager_initialized
|| object
->pager
== MEMORY_OBJECT_NULL
) {
7865 if (!object
->pager_initialized
) {
7866 vm_object_collapse(object
, (vm_object_offset_t
) 0, TRUE
);
7868 if (!object
->pager_initialized
) {
7869 vm_object_compressor_pager_create(object
);
7873 if (!object
->pager_initialized
|| object
->pager
== MEMORY_OBJECT_NULL
) {
7874 vm_object_unlock(object
);
7880 next
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
7882 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)next
)) {
7884 next
= (vm_page_t
)vm_page_queue_next(&next
->vmp_listq
);
7886 assert(p
->vmp_q_state
!= VM_PAGE_ON_FREE_Q
);
7888 if ((p
->vmp_q_state
== VM_PAGE_ON_THROTTLED_Q
) ||
7894 p
->vmp_fictitious
||
7897 * Page is already being cleaned or can't be cleaned.
7901 if (vm_compressor_low_on_space()) {
7905 /* Throw to the pageout queue */
7907 vm_page_lockspin_queues();
7909 if (VM_PAGE_Q_THROTTLED(iq
)) {
7910 iq
->pgo_draining
= TRUE
;
7912 assert_wait((event_t
) (&iq
->pgo_laundry
+ 1),
7913 THREAD_INTERRUPTIBLE
);
7914 vm_page_unlock_queues();
7915 vm_object_unlock(object
);
7917 thread_block(THREAD_CONTINUE_NULL
);
7919 vm_object_lock(object
);
7923 assert(!p
->vmp_fictitious
);
7924 assert(!p
->vmp_busy
);
7925 assert(!p
->vmp_absent
);
7926 assert(!p
->vmp_unusual
);
7927 assert(!p
->vmp_error
);
7928 assert(!VM_PAGE_WIRED(p
));
7929 assert(!p
->vmp_cleaning
);
7931 if (p
->vmp_pmapped
== TRUE
) {
7936 * Tell pmap the page should be accounted
7937 * for as "compressed" if it's been modified.
7940 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
7941 if (p
->vmp_dirty
|| p
->vmp_precious
) {
7943 * We already know it's been modified,
7944 * so tell pmap to account for it
7947 pmap_options
= PMAP_OPTIONS_COMPRESSOR
;
7949 refmod_state
= pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(p
),
7952 if (refmod_state
& VM_MEM_MODIFIED
) {
7953 SET_PAGE_DIRTY(p
, FALSE
);
7957 if (!p
->vmp_dirty
&& !p
->vmp_precious
) {
7958 vm_page_unlock_queues();
7962 vm_page_queues_remove(p
, TRUE
);
7964 vm_pageout_cluster(p
);
7966 vm_page_unlock_queues();
7968 vm_object_unlock(object
);
7974 vm_page_request_reprioritize(vm_object_t o
, uint64_t blkno
, uint32_t len
, int prio
)
7976 io_reprioritize_req_t req
;
7977 struct vnode
*devvp
= NULL
;
7979 if (vnode_pager_get_object_devvp(o
->pager
, (uintptr_t *)&devvp
) != KERN_SUCCESS
) {
7984 * Create the request for I/O reprioritization.
7985 * We use the noblock variant of zalloc because we're holding the object
7986 * lock here and we could cause a deadlock in low memory conditions.
7988 req
= (io_reprioritize_req_t
)zalloc_noblock(io_reprioritize_req_zone
);
7994 req
->priority
= prio
;
7997 /* Insert request into the reprioritization list */
7998 IO_REPRIORITIZE_LIST_LOCK();
7999 queue_enter(&io_reprioritize_list
, req
, io_reprioritize_req_t
, io_reprioritize_list
);
8000 IO_REPRIORITIZE_LIST_UNLOCK();
8002 /* Wakeup reprioritize thread */
8003 IO_REPRIO_THREAD_WAKEUP();
8009 vm_decmp_upl_reprioritize(upl_t upl
, int prio
)
8013 io_reprioritize_req_t req
;
8014 struct vnode
*devvp
= NULL
;
8018 uint64_t *io_upl_reprio_info
;
8021 if ((upl
->flags
& UPL_TRACKED_BY_OBJECT
) == 0 || (upl
->flags
& UPL_EXPEDITE_SUPPORTED
) == 0) {
8026 * We dont want to perform any allocations with the upl lock held since that might
8027 * result in a deadlock. If the system is low on memory, the pageout thread would
8028 * try to pageout stuff and might wait on this lock. If we are waiting for the memory to
8029 * be freed up by the pageout thread, it would be a deadlock.
8033 /* First step is just to get the size of the upl to find out how big the reprio info is */
8034 if (!upl_try_lock(upl
)) {
8038 if (upl
->decmp_io_upl
== NULL
) {
8039 /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */
8044 io_upl
= upl
->decmp_io_upl
;
8045 assert((io_upl
->flags
& UPL_DECMP_REAL_IO
) != 0);
8046 assertf(page_aligned(io_upl
->u_offset
) && page_aligned(io_upl
->u_size
),
8047 "upl %p offset 0x%llx size 0x%x\n",
8048 io_upl
, io_upl
->u_offset
, io_upl
->u_size
);
8049 io_upl_size
= io_upl
->u_size
;
8052 /* Now perform the allocation */
8053 io_upl_reprio_info
= (uint64_t *)kheap_alloc(KHEAP_TEMP
,
8054 sizeof(uint64_t) * atop(io_upl_size
), Z_WAITOK
);
8055 if (io_upl_reprio_info
== NULL
) {
8059 /* Now again take the lock, recheck the state and grab out the required info */
8060 if (!upl_try_lock(upl
)) {
8064 if (upl
->decmp_io_upl
== NULL
|| upl
->decmp_io_upl
!= io_upl
) {
8065 /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */
8069 memcpy(io_upl_reprio_info
, io_upl
->upl_reprio_info
,
8070 sizeof(uint64_t) * atop(io_upl_size
));
8072 /* Get the VM object for this UPL */
8073 if (io_upl
->flags
& UPL_SHADOWED
) {
8074 object
= io_upl
->map_object
->shadow
;
8076 object
= io_upl
->map_object
;
8079 /* Get the dev vnode ptr for this object */
8080 if (!object
|| !object
->pager
||
8081 vnode_pager_get_object_devvp(object
->pager
, (uintptr_t *)&devvp
) != KERN_SUCCESS
) {
8088 /* Now we have all the information needed to do the expedite */
8091 while (offset
< io_upl_size
) {
8092 blkno
= io_upl_reprio_info
[atop(offset
)] & UPL_REPRIO_INFO_MASK
;
8093 len
= (io_upl_reprio_info
[atop(offset
)] >> UPL_REPRIO_INFO_SHIFT
) & UPL_REPRIO_INFO_MASK
;
8096 * This implementation may cause some spurious expedites due to the
8097 * fact that we dont cleanup the blkno & len from the upl_reprio_info
8098 * even after the I/O is complete.
8101 if (blkno
!= 0 && len
!= 0) {
8102 /* Create the request for I/O reprioritization */
8103 req
= (io_reprioritize_req_t
)zalloc(io_reprioritize_req_zone
);
8104 assert(req
!= NULL
);
8107 req
->priority
= prio
;
8110 /* Insert request into the reprioritization list */
8111 IO_REPRIORITIZE_LIST_LOCK();
8112 queue_enter(&io_reprioritize_list
, req
, io_reprioritize_req_t
, io_reprioritize_list
);
8113 IO_REPRIORITIZE_LIST_UNLOCK();
8117 offset
+= PAGE_SIZE
;
8121 /* Wakeup reprioritize thread */
8122 IO_REPRIO_THREAD_WAKEUP();
8125 kheap_free(KHEAP_TEMP
, io_upl_reprio_info
,
8126 sizeof(uint64_t) * atop(io_upl_size
));
8130 vm_page_handle_prio_inversion(vm_object_t o
, vm_page_t m
)
8133 upl_page_info_t
*pl
;
8134 unsigned int i
, num_pages
;
8137 cur_tier
= proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO
);
8140 * Scan through all UPLs associated with the object to find the
8141 * UPL containing the contended page.
8143 queue_iterate(&o
->uplq
, upl
, upl_t
, uplq
) {
8144 if (((upl
->flags
& UPL_EXPEDITE_SUPPORTED
) == 0) || upl
->upl_priority
<= cur_tier
) {
8147 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
8148 assertf(page_aligned(upl
->u_offset
) && page_aligned(upl
->u_size
),
8149 "upl %p offset 0x%llx size 0x%x\n",
8150 upl
, upl
->u_offset
, upl
->u_size
);
8151 num_pages
= (upl
->u_size
/ PAGE_SIZE
);
8154 * For each page in the UPL page list, see if it matches the contended
8155 * page and was issued as a low prio I/O.
8157 for (i
= 0; i
< num_pages
; i
++) {
8158 if (UPL_PAGE_PRESENT(pl
, i
) && VM_PAGE_GET_PHYS_PAGE(m
) == pl
[i
].phys_addr
) {
8159 if ((upl
->flags
& UPL_DECMP_REQ
) && upl
->decmp_io_upl
) {
8160 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_PAGE_EXPEDITE
)) | DBG_FUNC_NONE
, VM_KERNEL_UNSLIDE_OR_PERM(upl
->upl_creator
), VM_KERNEL_UNSLIDE_OR_PERM(m
),
8161 VM_KERNEL_UNSLIDE_OR_PERM(upl
), upl
->upl_priority
, 0);
8162 vm_decmp_upl_reprioritize(upl
, cur_tier
);
8165 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM
, VM_PAGE_EXPEDITE
)) | DBG_FUNC_NONE
, VM_KERNEL_UNSLIDE_OR_PERM(upl
->upl_creator
), VM_KERNEL_UNSLIDE_OR_PERM(m
),
8166 upl
->upl_reprio_info
[i
], upl
->upl_priority
, 0);
8167 if (UPL_REPRIO_INFO_BLKNO(upl
, i
) != 0 && UPL_REPRIO_INFO_LEN(upl
, i
) != 0) {
8168 vm_page_request_reprioritize(o
, UPL_REPRIO_INFO_BLKNO(upl
, i
), UPL_REPRIO_INFO_LEN(upl
, i
), cur_tier
);
8173 /* Check if we found any hits */
8174 if (i
!= num_pages
) {
8183 vm_page_sleep(vm_object_t o
, vm_page_t m
, int interruptible
)
8187 KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM
, VM_PAGE_SLEEP
)) | DBG_FUNC_START
, o
, m
, 0, 0, 0);
8189 if (o
->io_tracking
&& ((m
->vmp_busy
== TRUE
) || (m
->vmp_cleaning
== TRUE
) || VM_PAGE_WIRED(m
))) {
8191 * Indicates page is busy due to an I/O. Issue a reprioritize request if necessary.
8193 vm_page_handle_prio_inversion(o
, m
);
8195 m
->vmp_wanted
= TRUE
;
8196 ret
= thread_sleep_vm_object(o
, m
, interruptible
);
8197 KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM
, VM_PAGE_SLEEP
)) | DBG_FUNC_END
, o
, m
, 0, 0, 0);
8202 io_reprioritize_thread(void *param __unused
, wait_result_t wr __unused
)
8204 io_reprioritize_req_t req
= NULL
;
8207 IO_REPRIORITIZE_LIST_LOCK();
8208 if (queue_empty(&io_reprioritize_list
)) {
8209 IO_REPRIORITIZE_LIST_UNLOCK();
8213 queue_remove_first(&io_reprioritize_list
, req
, io_reprioritize_req_t
, io_reprioritize_list
);
8214 IO_REPRIORITIZE_LIST_UNLOCK();
8216 vnode_pager_issue_reprioritize_io(req
->devvp
, req
->blkno
, req
->len
, req
->priority
);
8217 zfree(io_reprioritize_req_zone
, req
);
8220 IO_REPRIO_THREAD_CONTINUATION();
8224 #if VM_OBJECT_ACCESS_TRACKING
8226 vm_object_access_tracking(
8228 int *access_tracking_p
,
8229 uint32_t *access_tracking_reads_p
,
8230 uint32_t *access_tracking_writes_p
)
8232 int access_tracking
;
8234 access_tracking
= !!*access_tracking_p
;
8236 vm_object_lock(object
);
8237 *access_tracking_p
= object
->access_tracking
;
8238 if (access_tracking_reads_p
) {
8239 *access_tracking_reads_p
= object
->access_tracking_reads
;
8241 if (access_tracking_writes_p
) {
8242 *access_tracking_writes_p
= object
->access_tracking_writes
;
8244 object
->access_tracking
= access_tracking
;
8245 object
->access_tracking_reads
= 0;
8246 object
->access_tracking_writes
= 0;
8247 vm_object_unlock(object
);
8249 if (access_tracking
) {
8250 vm_object_pmap_protect_options(object
,
8260 #endif /* VM_OBJECT_ACCESS_TRACKING */
8263 vm_object_ledger_tag_ledgers(
8265 int *ledger_idx_volatile
,
8266 int *ledger_idx_nonvolatile
,
8267 int *ledger_idx_volatile_compressed
,
8268 int *ledger_idx_nonvolatile_compressed
,
8269 boolean_t
*do_footprint
)
8271 assert(object
->shadow
== VM_OBJECT_NULL
);
8273 *do_footprint
= !object
->vo_no_footprint
;
8275 switch (object
->vo_ledger_tag
) {
8276 case VM_LEDGER_TAG_NONE
:
8278 * Regular purgeable memory:
8279 * counts in footprint only when nonvolatile.
8281 *do_footprint
= TRUE
;
8282 assert(object
->purgable
!= VM_PURGABLE_DENY
);
8283 *ledger_idx_volatile
= task_ledgers
.purgeable_volatile
;
8284 *ledger_idx_nonvolatile
= task_ledgers
.purgeable_nonvolatile
;
8285 *ledger_idx_volatile_compressed
= task_ledgers
.purgeable_volatile_compressed
;
8286 *ledger_idx_nonvolatile_compressed
= task_ledgers
.purgeable_nonvolatile_compressed
;
8288 case VM_LEDGER_TAG_DEFAULT
:
8290 * "default" tagged memory:
8291 * counts in footprint only when nonvolatile and not marked
8292 * as "no_footprint".
8294 *ledger_idx_volatile
= task_ledgers
.tagged_nofootprint
;
8295 *ledger_idx_volatile_compressed
= task_ledgers
.tagged_nofootprint_compressed
;
8296 if (*do_footprint
) {
8297 *ledger_idx_nonvolatile
= task_ledgers
.tagged_footprint
;
8298 *ledger_idx_nonvolatile_compressed
= task_ledgers
.tagged_footprint_compressed
;
8300 *ledger_idx_nonvolatile
= task_ledgers
.tagged_nofootprint
;
8301 *ledger_idx_nonvolatile_compressed
= task_ledgers
.tagged_nofootprint_compressed
;
8304 case VM_LEDGER_TAG_NETWORK
:
8306 * "network" tagged memory:
8307 * never counts in footprint.
8309 *do_footprint
= FALSE
;
8310 *ledger_idx_volatile
= task_ledgers
.network_volatile
;
8311 *ledger_idx_volatile_compressed
= task_ledgers
.network_volatile_compressed
;
8312 *ledger_idx_nonvolatile
= task_ledgers
.network_nonvolatile
;
8313 *ledger_idx_nonvolatile_compressed
= task_ledgers
.network_nonvolatile_compressed
;
8315 case VM_LEDGER_TAG_MEDIA
:
8317 * "media" tagged memory:
8318 * counts in footprint only when nonvolatile and not marked
8319 * as "no footprint".
8321 *ledger_idx_volatile
= task_ledgers
.media_nofootprint
;
8322 *ledger_idx_volatile_compressed
= task_ledgers
.media_nofootprint_compressed
;
8323 if (*do_footprint
) {
8324 *ledger_idx_nonvolatile
= task_ledgers
.media_footprint
;
8325 *ledger_idx_nonvolatile_compressed
= task_ledgers
.media_footprint_compressed
;
8327 *ledger_idx_nonvolatile
= task_ledgers
.media_nofootprint
;
8328 *ledger_idx_nonvolatile_compressed
= task_ledgers
.media_nofootprint_compressed
;
8331 case VM_LEDGER_TAG_GRAPHICS
:
8333 * "graphics" tagged memory:
8334 * counts in footprint only when nonvolatile and not marked
8335 * as "no footprint".
8337 *ledger_idx_volatile
= task_ledgers
.graphics_nofootprint
;
8338 *ledger_idx_volatile_compressed
= task_ledgers
.graphics_nofootprint_compressed
;
8339 if (*do_footprint
) {
8340 *ledger_idx_nonvolatile
= task_ledgers
.graphics_footprint
;
8341 *ledger_idx_nonvolatile_compressed
= task_ledgers
.graphics_footprint_compressed
;
8343 *ledger_idx_nonvolatile
= task_ledgers
.graphics_nofootprint
;
8344 *ledger_idx_nonvolatile_compressed
= task_ledgers
.graphics_nofootprint_compressed
;
8347 case VM_LEDGER_TAG_NEURAL
:
8349 * "neural" tagged memory:
8350 * counts in footprint only when nonvolatile and not marked
8351 * as "no footprint".
8353 *ledger_idx_volatile
= task_ledgers
.neural_nofootprint
;
8354 *ledger_idx_volatile_compressed
= task_ledgers
.neural_nofootprint_compressed
;
8355 if (*do_footprint
) {
8356 *ledger_idx_nonvolatile
= task_ledgers
.neural_footprint
;
8357 *ledger_idx_nonvolatile_compressed
= task_ledgers
.neural_footprint_compressed
;
8359 *ledger_idx_nonvolatile
= task_ledgers
.neural_nofootprint
;
8360 *ledger_idx_nonvolatile_compressed
= task_ledgers
.neural_nofootprint_compressed
;
8364 panic("%s: object %p has unsupported ledger_tag %d\n",
8365 __FUNCTION__
, object
, object
->vo_ledger_tag
);
8370 vm_object_ownership_change(
8374 int new_ledger_flags
,
8375 boolean_t old_task_objq_locked
)
8379 int resident_count
, wired_count
;
8380 unsigned int compressed_count
;
8381 int ledger_idx_volatile
;
8382 int ledger_idx_nonvolatile
;
8383 int ledger_idx_volatile_compressed
;
8384 int ledger_idx_nonvolatile_compressed
;
8386 int ledger_idx_compressed
;
8387 boolean_t do_footprint
, old_no_footprint
, new_no_footprint
;
8388 boolean_t new_task_objq_locked
;
8390 vm_object_lock_assert_exclusive(object
);
8392 if (!object
->internal
) {
8393 return KERN_INVALID_ARGUMENT
;
8395 if (new_ledger_tag
== VM_LEDGER_TAG_NONE
&&
8396 object
->purgable
== VM_PURGABLE_DENY
) {
8397 /* non-purgeable memory must have a valid non-zero ledger tag */
8398 return KERN_INVALID_ARGUMENT
;
8400 if (new_ledger_tag
< 0 ||
8401 new_ledger_tag
> VM_LEDGER_TAG_MAX
) {
8402 return KERN_INVALID_ARGUMENT
;
8404 if (new_ledger_flags
& ~VM_LEDGER_FLAGS
) {
8405 return KERN_INVALID_ARGUMENT
;
8407 if (object
->vo_ledger_tag
== VM_LEDGER_TAG_NONE
&&
8408 object
->purgable
== VM_PURGABLE_DENY
) {
8410 * This VM object is neither ledger-tagged nor purgeable.
8411 * We can convert it to "ledger tag" ownership iff it
8412 * has not been used at all yet (no resident pages and
8413 * no pager) and it's going to be assigned to a valid task.
8415 if (object
->resident_page_count
!= 0 ||
8416 object
->pager
!= NULL
||
8417 object
->pager_created
||
8418 object
->ref_count
!= 1 ||
8419 object
->vo_owner
!= TASK_NULL
||
8420 object
->copy_strategy
!= MEMORY_OBJECT_COPY_NONE
||
8421 new_owner
== TASK_NULL
) {
8422 return KERN_FAILURE
;
8426 if (new_ledger_flags
& VM_LEDGER_FLAG_NO_FOOTPRINT
) {
8427 new_no_footprint
= TRUE
;
8429 new_no_footprint
= FALSE
;
8432 if (!new_no_footprint
&&
8433 object
->purgable
!= VM_PURGABLE_DENY
&&
8434 new_owner
!= TASK_NULL
&&
8435 new_owner
!= VM_OBJECT_OWNER_DISOWNED
&&
8436 new_owner
->task_legacy_footprint
) {
8438 * This task has been granted "legacy footprint" and should
8439 * not be charged for its IOKit purgeable memory. Since we
8440 * might now change the accounting of such memory to the
8441 * "graphics" ledger, for example, give it the "no footprint"
8444 new_no_footprint
= TRUE
;
8446 #endif /* __arm64__ */
8447 assert(object
->copy_strategy
== MEMORY_OBJECT_COPY_NONE
);
8448 assert(object
->shadow
== VM_OBJECT_NULL
);
8449 assert(object
->copy
== VM_OBJECT_NULL
);
8451 old_ledger_tag
= object
->vo_ledger_tag
;
8452 old_no_footprint
= object
->vo_no_footprint
;
8453 old_owner
= VM_OBJECT_OWNER(object
);
8455 DTRACE_VM8(object_ownership_change
,
8456 vm_object_t
, object
,
8458 int, old_ledger_tag
,
8459 int, old_no_footprint
,
8461 int, new_ledger_tag
,
8462 int, new_no_footprint
,
8463 int, VM_OBJECT_ID(object
));
8465 assert(object
->internal
);
8466 resident_count
= object
->resident_page_count
- object
->wired_page_count
;
8467 wired_count
= object
->wired_page_count
;
8468 compressed_count
= vm_compressor_pager_get_count(object
->pager
);
8471 * Deal with the old owner and/or ledger tag, if needed.
8473 if (old_owner
!= TASK_NULL
&&
8474 ((old_owner
!= new_owner
) /* new owner ... */
8476 (old_no_footprint
!= new_no_footprint
) /* new "no_footprint" */
8478 old_ledger_tag
!= new_ledger_tag
)) { /* ... new ledger */
8480 * Take this object off of the old owner's ledgers.
8482 vm_object_ledger_tag_ledgers(object
,
8483 &ledger_idx_volatile
,
8484 &ledger_idx_nonvolatile
,
8485 &ledger_idx_volatile_compressed
,
8486 &ledger_idx_nonvolatile_compressed
,
8488 if (object
->purgable
== VM_PURGABLE_VOLATILE
||
8489 object
->purgable
== VM_PURGABLE_EMPTY
) {
8490 ledger_idx
= ledger_idx_volatile
;
8491 ledger_idx_compressed
= ledger_idx_volatile_compressed
;
8493 ledger_idx
= ledger_idx_nonvolatile
;
8494 ledger_idx_compressed
= ledger_idx_nonvolatile_compressed
;
8496 if (resident_count
) {
8498 * Adjust the appropriate old owners's ledgers by the
8499 * number of resident pages.
8501 ledger_debit(old_owner
->ledger
,
8503 ptoa_64(resident_count
));
8504 /* adjust old owner's footprint */
8506 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8507 object
->purgable
!= VM_PURGABLE_EMPTY
) {
8508 ledger_debit(old_owner
->ledger
,
8509 task_ledgers
.phys_footprint
,
8510 ptoa_64(resident_count
));
8514 /* wired pages are always nonvolatile */
8515 ledger_debit(old_owner
->ledger
,
8516 ledger_idx_nonvolatile
,
8517 ptoa_64(wired_count
));
8519 ledger_debit(old_owner
->ledger
,
8520 task_ledgers
.phys_footprint
,
8521 ptoa_64(wired_count
));
8524 if (compressed_count
) {
8526 * Adjust the appropriate old owner's ledgers
8527 * by the number of compressed pages.
8529 ledger_debit(old_owner
->ledger
,
8530 ledger_idx_compressed
,
8531 ptoa_64(compressed_count
));
8533 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8534 object
->purgable
!= VM_PURGABLE_EMPTY
) {
8535 ledger_debit(old_owner
->ledger
,
8536 task_ledgers
.phys_footprint
,
8537 ptoa_64(compressed_count
));
8540 if (old_owner
!= new_owner
) {
8541 /* remove object from old_owner's list of owned objects */
8542 DTRACE_VM2(object_owner_remove
,
8543 vm_object_t
, object
,
8545 if (!old_task_objq_locked
) {
8546 task_objq_lock(old_owner
);
8548 old_owner
->task_owned_objects
--;
8549 queue_remove(&old_owner
->task_objq
, object
,
8550 vm_object_t
, task_objq
);
8551 switch (object
->purgable
) {
8552 case VM_PURGABLE_NONVOLATILE
:
8553 case VM_PURGABLE_EMPTY
:
8554 vm_purgeable_nonvolatile_owner_update(old_owner
,
8557 case VM_PURGABLE_VOLATILE
:
8558 vm_purgeable_volatile_owner_update(old_owner
,
8564 if (!old_task_objq_locked
) {
8565 task_objq_unlock(old_owner
);
8571 * Switch to new ledger tag and/or owner.
8574 new_task_objq_locked
= FALSE
;
8575 if (new_owner
!= old_owner
&&
8576 new_owner
!= TASK_NULL
&&
8577 new_owner
!= VM_OBJECT_OWNER_DISOWNED
) {
8579 * If the new owner is not accepting new objects ("disowning"),
8580 * the object becomes "disowned" and will be added to
8581 * the kernel's task_objq.
8583 * Check first without locking, to avoid blocking while the
8584 * task is disowning its objects.
8586 if (new_owner
->task_objects_disowning
) {
8587 new_owner
= VM_OBJECT_OWNER_DISOWNED
;
8589 task_objq_lock(new_owner
);
8590 /* check again now that we have the lock */
8591 if (new_owner
->task_objects_disowning
) {
8592 new_owner
= VM_OBJECT_OWNER_DISOWNED
;
8593 task_objq_unlock(new_owner
);
8595 new_task_objq_locked
= TRUE
;
8600 object
->vo_ledger_tag
= new_ledger_tag
;
8601 object
->vo_owner
= new_owner
;
8602 object
->vo_no_footprint
= new_no_footprint
;
8604 if (new_owner
== VM_OBJECT_OWNER_DISOWNED
) {
8606 * Disowned objects are added to the kernel's task_objq but
8607 * are marked as owned by "VM_OBJECT_OWNER_DISOWNED" to
8608 * differentiate them from objects intentionally owned by
8611 assert(old_owner
!= kernel_task
);
8612 new_owner
= kernel_task
;
8613 assert(!new_task_objq_locked
);
8614 task_objq_lock(new_owner
);
8615 new_task_objq_locked
= TRUE
;
8619 * Deal with the new owner and/or ledger tag, if needed.
8621 if (new_owner
!= TASK_NULL
&&
8622 ((new_owner
!= old_owner
) /* new owner ... */
8624 (new_no_footprint
!= old_no_footprint
) /* ... new "no_footprint" */
8626 new_ledger_tag
!= old_ledger_tag
)) { /* ... new ledger */
8628 * Add this object to the new owner's ledgers.
8630 vm_object_ledger_tag_ledgers(object
,
8631 &ledger_idx_volatile
,
8632 &ledger_idx_nonvolatile
,
8633 &ledger_idx_volatile_compressed
,
8634 &ledger_idx_nonvolatile_compressed
,
8636 if (object
->purgable
== VM_PURGABLE_VOLATILE
||
8637 object
->purgable
== VM_PURGABLE_EMPTY
) {
8638 ledger_idx
= ledger_idx_volatile
;
8639 ledger_idx_compressed
= ledger_idx_volatile_compressed
;
8641 ledger_idx
= ledger_idx_nonvolatile
;
8642 ledger_idx_compressed
= ledger_idx_nonvolatile_compressed
;
8644 if (resident_count
) {
8646 * Adjust the appropriate new owners's ledgers by the
8647 * number of resident pages.
8649 ledger_credit(new_owner
->ledger
,
8651 ptoa_64(resident_count
));
8652 /* adjust new owner's footprint */
8654 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8655 object
->purgable
!= VM_PURGABLE_EMPTY
) {
8656 ledger_credit(new_owner
->ledger
,
8657 task_ledgers
.phys_footprint
,
8658 ptoa_64(resident_count
));
8662 /* wired pages are always nonvolatile */
8663 ledger_credit(new_owner
->ledger
,
8664 ledger_idx_nonvolatile
,
8665 ptoa_64(wired_count
));
8667 ledger_credit(new_owner
->ledger
,
8668 task_ledgers
.phys_footprint
,
8669 ptoa_64(wired_count
));
8672 if (compressed_count
) {
8674 * Adjust the new owner's ledgers by the number of
8677 ledger_credit(new_owner
->ledger
,
8678 ledger_idx_compressed
,
8679 ptoa_64(compressed_count
));
8681 object
->purgable
!= VM_PURGABLE_VOLATILE
&&
8682 object
->purgable
!= VM_PURGABLE_EMPTY
) {
8683 ledger_credit(new_owner
->ledger
,
8684 task_ledgers
.phys_footprint
,
8685 ptoa_64(compressed_count
));
8688 if (new_owner
!= old_owner
) {
8689 /* add object to new_owner's list of owned objects */
8690 DTRACE_VM2(object_owner_add
,
8691 vm_object_t
, object
,
8693 assert(new_task_objq_locked
);
8694 new_owner
->task_owned_objects
++;
8695 queue_enter(&new_owner
->task_objq
, object
,
8696 vm_object_t
, task_objq
);
8697 switch (object
->purgable
) {
8698 case VM_PURGABLE_NONVOLATILE
:
8699 case VM_PURGABLE_EMPTY
:
8700 vm_purgeable_nonvolatile_owner_update(new_owner
,
8703 case VM_PURGABLE_VOLATILE
:
8704 vm_purgeable_volatile_owner_update(new_owner
,
8713 if (new_task_objq_locked
) {
8714 task_objq_unlock(new_owner
);
8717 return KERN_SUCCESS
;
8721 vm_owned_objects_disown(
8724 vm_object_t next_object
;
8736 if (task
->task_objects_disowned
) {
8737 /* task has already disowned its owned objects */
8738 assert(task
->task_volatile_objects
== 0);
8739 assert(task
->task_nonvolatile_objects
== 0);
8740 assert(task
->task_owned_objects
== 0);
8744 task_objq_lock(task
);
8746 task
->task_objects_disowning
= TRUE
;
8748 for (object
= (vm_object_t
) queue_first(&task
->task_objq
);
8749 !queue_end(&task
->task_objq
, (queue_entry_t
) object
);
8750 object
= next_object
) {
8751 if (task
->task_nonvolatile_objects
== 0 &&
8752 task
->task_volatile_objects
== 0 &&
8753 task
->task_owned_objects
== 0) {
8754 /* no more objects owned by "task" */
8758 next_object
= (vm_object_t
) queue_next(&object
->task_objq
);
8761 assert(object
->vo_purgeable_volatilizer
== NULL
);
8763 assert(object
->vo_owner
== task
);
8764 if (!vm_object_lock_try(object
)) {
8765 task_objq_unlock(task
);
8766 mutex_pause(collisions
++);
8769 /* transfer ownership to the kernel */
8770 assert(VM_OBJECT_OWNER(object
) != kernel_task
);
8771 kr
= vm_object_ownership_change(
8773 object
->vo_ledger_tag
, /* unchanged */
8774 VM_OBJECT_OWNER_DISOWNED
, /* new owner */
8775 0, /* new_ledger_flags */
8776 TRUE
); /* old_owner->task_objq locked */
8777 assert(kr
== KERN_SUCCESS
);
8778 assert(object
->vo_owner
== VM_OBJECT_OWNER_DISOWNED
);
8779 vm_object_unlock(object
);
8782 if (__improbable(task
->task_volatile_objects
!= 0 ||
8783 task
->task_nonvolatile_objects
!= 0 ||
8784 task
->task_owned_objects
!= 0)) {
8785 panic("%s(%p): volatile=%d nonvolatile=%d owned=%d q=%p q_first=%p q_last=%p",
8788 task
->task_volatile_objects
,
8789 task
->task_nonvolatile_objects
,
8790 task
->task_owned_objects
,
8792 queue_first(&task
->task_objq
),
8793 queue_last(&task
->task_objq
));
8796 /* there shouldn't be any objects owned by task now */
8797 assert(task
->task_volatile_objects
== 0);
8798 assert(task
->task_nonvolatile_objects
== 0);
8799 assert(task
->task_owned_objects
== 0);
8800 assert(task
->task_objects_disowning
);
8802 /* and we don't need to try and disown again */
8803 task
->task_objects_disowned
= TRUE
;
8805 task_objq_unlock(task
);