2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
33 #include <IOKit/IOHibernatePrivate.h>
35 #include <kern/policy_internal.h>
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp
, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock
, &vm_swap_data_lock_grp
);
40 #if defined(XNU_TARGET_OS_OSX)
42 * launchd explicitly turns ON swap later during boot on macOS devices.
44 boolean_t compressor_store_stop_compaction
= TRUE
;
46 boolean_t compressor_store_stop_compaction
= FALSE
;
49 boolean_t vm_swapfile_create_needed
= FALSE
;
50 boolean_t vm_swapfile_gc_needed
= FALSE
;
52 int vm_swapper_throttle
= -1;
53 uint64_t vm_swapout_thread_id
;
55 uint64_t vm_swap_put_failures
= 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t vm_swap_get_failures
= 0; /* Fatal */
57 uint64_t vm_swap_put_failures_no_swap_file
= 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int vm_num_swap_files_config
= 0;
59 int vm_num_swap_files
= 0;
60 int vm_num_pinned_swap_files
= 0;
61 int vm_swapout_thread_processed_segments
= 0;
62 int vm_swapout_thread_awakened
= 0;
63 bool vm_swapout_thread_running
= FALSE
;
64 int vm_swapfile_create_thread_awakened
= 0;
65 int vm_swapfile_create_thread_running
= 0;
66 int vm_swapfile_gc_thread_awakened
= 0;
67 int vm_swapfile_gc_thread_running
= 0;
69 int64_t vm_swappin_avail
= 0;
70 boolean_t vm_swappin_enabled
= FALSE
;
71 unsigned int vm_swapfile_total_segs_alloced
= 0;
72 unsigned int vm_swapfile_total_segs_used
= 0;
74 char swapfilename
[MAX_SWAPFILENAME_LEN
+ 1] = SWAP_FILE_NAME
;
76 extern vm_map_t compressor_map
;
79 #define SWAP_READY 0x1 /* Swap file is ready to be used */
80 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
81 #define SWAP_WANTED 0x4 /* Swap file has waiters */
82 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
83 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
87 queue_head_t swp_queue
; /* list of swap files */
88 char *swp_path
; /* saved pathname of swap file */
89 struct vnode
*swp_vp
; /* backing vnode */
90 uint64_t swp_size
; /* size of this swap file */
91 uint8_t *swp_bitmap
; /* bitmap showing the alloced/freed slots in the swap file */
92 unsigned int swp_pathlen
; /* length of pathname */
93 unsigned int swp_nsegs
; /* #segments we can use */
94 unsigned int swp_nseginuse
; /* #segments in use */
95 unsigned int swp_index
; /* index of this swap file */
96 unsigned int swp_flags
; /* state of swap file */
97 unsigned int swp_free_hint
; /* offset of 1st free chunk */
98 unsigned int swp_io_count
; /* count of outstanding I/Os */
99 c_segment_t
*swp_csegs
; /* back pointers to the c_segments. Used during swap reclaim. */
101 struct trim_list
*swp_delayed_trim_list_head
;
102 unsigned int swp_delayed_trim_count
;
105 queue_head_t swf_global_queue
;
106 boolean_t swp_trim_supported
= FALSE
;
108 extern clock_sec_t dont_trim_until_ts
;
109 clock_sec_t vm_swapfile_last_failed_to_create_ts
= 0;
110 clock_sec_t vm_swapfile_last_successful_create_ts
= 0;
111 int vm_swapfile_can_be_created
= FALSE
;
112 boolean_t delayed_trim_handling_in_progress
= FALSE
;
114 boolean_t hibernate_in_progress_with_pinned_swap
= FALSE
;
116 static void vm_swapout_thread_throttle_adjust(void);
117 static void vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
);
118 static void vm_swapout_thread(void);
119 static void vm_swapfile_create_thread(void);
120 static void vm_swapfile_gc_thread(void);
121 static void vm_swap_defragment(void);
122 static void vm_swap_handle_delayed_trims(boolean_t
);
123 static void vm_swap_do_delayed_trim(struct swapfile
*);
124 static void vm_swap_wait_on_trim_handling_in_progress(void);
126 extern int vnode_getwithref(struct vnode
* vp
);
128 boolean_t vm_swap_force_defrag
= FALSE
, vm_swap_force_reclaim
= FALSE
;
130 #if !XNU_TARGET_OS_OSX
133 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
134 * number of swapfiles allowed. That increases wired memory overhead.
135 * So we want to keep the max swapfiles same on both DEV/RELEASE so
136 * that the memory overhead is similar for performance comparisons.
138 #define VM_MAX_SWAP_FILE_NUM 5
140 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
142 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
143 #define VM_SWAP_SHOULD_PIN(_size) FALSE
144 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
145 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
146 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
148 #else /* !XNU_TARGET_OS_OSX */
150 #define VM_MAX_SWAP_FILE_NUM 100
151 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
153 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
154 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
155 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
156 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
157 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
159 #endif /* !XNU_TARGET_OS_OSX */
161 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
162 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
163 #define VM_SWAPFILE_DELAYED_CREATE 15
165 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
168 #if CHECKSUM_THE_SWAP
169 extern unsigned int hash_string(char *cp
, int len
);
172 #if RECORD_THE_COMPRESSED_DATA
173 boolean_t c_compressed_record_init_done
= FALSE
;
174 int c_compressed_record_write_error
= 0;
175 struct vnode
*c_compressed_record_vp
= NULL
;
176 uint64_t c_compressed_record_file_offset
= 0;
177 void c_compressed_record_init(void);
178 void c_compressed_record_write(char *, int);
181 extern void vm_pageout_io_throttle(void);
183 static struct swapfile
*vm_swapfile_for_handle(uint64_t);
186 * Called with the vm_swap_data_lock held.
189 static struct swapfile
*
190 vm_swapfile_for_handle(uint64_t f_offset
)
192 uint64_t file_offset
= 0;
193 unsigned int swapfile_index
= 0;
194 struct swapfile
* swf
= NULL
;
196 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
197 swapfile_index
= (f_offset
>> SWAP_DEVICE_SHIFT
);
199 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
201 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
202 if (swapfile_index
== swf
->swp_index
) {
206 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
209 if (queue_end(&swf_global_queue
, (queue_entry_t
) swf
)) {
218 #include <libkern/crypto/aesxts.h>
220 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
222 boolean_t swap_crypt_initialized
;
223 void swap_crypt_initialize(void);
225 symmetric_xts xts_modectx
;
226 uint32_t swap_crypt_key1
[8]; /* big enough for a 256 bit random key */
227 uint32_t swap_crypt_key2
[8]; /* big enough for a 256 bit random key */
229 #if DEVELOPMENT || DEBUG
230 boolean_t swap_crypt_xts_tested
= FALSE
;
231 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
232 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
233 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
234 #endif /* DEVELOPMENT || DEBUG */
236 unsigned long vm_page_encrypt_counter
;
237 unsigned long vm_page_decrypt_counter
;
241 swap_crypt_initialize(void)
243 uint8_t *enckey1
, *enckey2
;
244 int keylen1
, keylen2
;
247 assert(swap_crypt_initialized
== FALSE
);
249 keylen1
= sizeof(swap_crypt_key1
);
250 enckey1
= (uint8_t *)&swap_crypt_key1
;
251 keylen2
= sizeof(swap_crypt_key2
);
252 enckey2
= (uint8_t *)&swap_crypt_key2
;
254 error
= cc_rand_generate((void *)enckey1
, keylen1
);
257 error
= cc_rand_generate((void *)enckey2
, keylen2
);
260 error
= xts_start(0, NULL
, enckey1
, keylen1
, enckey2
, keylen2
, 0, 0, &xts_modectx
);
263 swap_crypt_initialized
= TRUE
;
265 #if DEVELOPMENT || DEBUG
275 assert(swap_crypt_xts_tested
== FALSE
);
278 * Validate the encryption algorithms.
280 * First initialize the test data.
282 for (i
= 0; i
< 4096; i
++) {
283 swap_crypt_test_page_ref
[i
] = (char) i
;
285 ivnum
[0] = (uint64_t)0xaa;
287 iv
= (uint8_t *)ivnum
;
289 refptr
= (uint8_t *)swap_crypt_test_page_ref
;
290 encptr
= (uint8_t *)swap_crypt_test_page_encrypt
;
291 decptr
= (uint8_t *)swap_crypt_test_page_decrypt
;
295 rc
= xts_encrypt(refptr
, size
, encptr
, iv
, &xts_modectx
);
298 /* compare result with original - should NOT match */
299 for (i
= 0; i
< 4096; i
++) {
300 if (swap_crypt_test_page_encrypt
[i
] !=
301 swap_crypt_test_page_ref
[i
]) {
308 rc
= xts_decrypt(encptr
, size
, decptr
, iv
, &xts_modectx
);
311 /* compare result with original */
312 for (i
= 0; i
< 4096; i
++) {
313 if (swap_crypt_test_page_decrypt
[i
] !=
314 swap_crypt_test_page_ref
[i
]) {
315 panic("encryption test failed");
318 /* encrypt in place */
319 rc
= xts_encrypt(decptr
, size
, decptr
, iv
, &xts_modectx
);
322 /* decrypt in place */
323 rc
= xts_decrypt(decptr
, size
, decptr
, iv
, &xts_modectx
);
326 for (i
= 0; i
< 4096; i
++) {
327 if (swap_crypt_test_page_decrypt
[i
] !=
328 swap_crypt_test_page_ref
[i
]) {
329 panic("in place encryption test failed");
332 swap_crypt_xts_tested
= TRUE
;
333 #endif /* DEVELOPMENT || DEBUG */
338 vm_swap_encrypt(c_segment_t c_seg
)
346 if (swap_crypt_initialized
== FALSE
) {
347 swap_crypt_initialize();
350 #if DEVELOPMENT || DEBUG
351 C_SEG_MAKE_WRITEABLE(c_seg
);
353 ptr
= (uint8_t *)c_seg
->c_store
.c_buffer
;
354 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
356 ivnum
[0] = (uint64_t)c_seg
;
358 iv
= (uint8_t *)ivnum
;
360 rc
= xts_encrypt(ptr
, size
, ptr
, iv
, &xts_modectx
);
363 vm_page_encrypt_counter
+= (size
/ PAGE_SIZE_64
);
365 #if DEVELOPMENT || DEBUG
366 C_SEG_WRITE_PROTECT(c_seg
);
371 vm_swap_decrypt(c_segment_t c_seg
)
379 assert(swap_crypt_initialized
);
381 #if DEVELOPMENT || DEBUG
382 C_SEG_MAKE_WRITEABLE(c_seg
);
384 ptr
= (uint8_t *)c_seg
->c_store
.c_buffer
;
385 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
387 ivnum
[0] = (uint64_t)c_seg
;
389 iv
= (uint8_t *)ivnum
;
391 rc
= xts_decrypt(ptr
, size
, ptr
, iv
, &xts_modectx
);
394 vm_page_decrypt_counter
+= (size
/ PAGE_SIZE_64
);
396 #if DEVELOPMENT || DEBUG
397 C_SEG_WRITE_PROTECT(c_seg
);
400 #endif /* ENCRYPTED_SWAP */
404 vm_compressor_swap_init()
406 thread_t thread
= NULL
;
408 queue_init(&swf_global_queue
);
410 if (kernel_thread_start_priority((thread_continue_t
)vm_swapout_thread
, NULL
,
411 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
412 panic("vm_swapout_thread: create failed");
414 thread_set_thread_name(thread
, "VM_swapout");
415 vm_swapout_thread_id
= thread
->thread_id
;
417 thread_deallocate(thread
);
419 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_create_thread
, NULL
,
420 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
421 panic("vm_swapfile_create_thread: create failed");
424 thread_set_thread_name(thread
, "VM_swapfile_create");
425 thread_deallocate(thread
);
427 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_gc_thread
, NULL
,
428 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
429 panic("vm_swapfile_gc_thread: create failed");
431 thread_set_thread_name(thread
, "VM_swapfile_gc");
434 * Swapfile garbage collection will need to allocate memory
435 * to complete its swap reclaim and in-memory compaction.
436 * So allow it to dip into the reserved VM page pool.
439 thread
->options
|= TH_OPT_VMPRIV
;
440 thread_unlock(thread
);
442 thread_deallocate(thread
);
444 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
445 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, THROTTLE_LEVEL_COMPRESSOR_TIER2
);
446 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
447 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
449 #if !XNU_TARGET_OS_OSX
451 * dummy value until the swap file gets created
452 * when we drive the first c_segment_t to the
453 * swapout queue... at that time we will
454 * know the true size we have to work with
456 c_overage_swapped_limit
= 16;
457 #endif /* !XNU_TARGET_OS_OSX */
459 vm_num_swap_files_config
= VM_MAX_SWAP_FILE_NUM
;
460 #if DEVELOPMENT || DEBUG
461 typeof(vm_num_swap_files_config
) parsed_vm_max_num_swap_files
= 0;
462 if (PE_parse_boot_argn("vm_max_num_swap_files", &parsed_vm_max_num_swap_files
, sizeof(parsed_vm_max_num_swap_files
))) {
463 if (parsed_vm_max_num_swap_files
> 0) {
464 vm_num_swap_files_config
= parsed_vm_max_num_swap_files
;
466 printf("WARNING: Ignoring vm_max_num_swap_files=%d boot-arg. Value must be > 0\n", parsed_vm_max_num_swap_files
);
470 printf("Maximum number of VM swap files: %d\n", vm_num_swap_files_config
);
472 printf("VM Swap Subsystem is ON\n");
476 #if RECORD_THE_COMPRESSED_DATA
479 c_compressed_record_init()
481 if (c_compressed_record_init_done
== FALSE
) {
482 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp
);
483 c_compressed_record_init_done
= TRUE
;
488 c_compressed_record_write(char *buf
, int size
)
490 if (c_compressed_record_write_error
== 0) {
491 c_compressed_record_write_error
= vm_record_file_write(c_compressed_record_vp
, c_compressed_record_file_offset
, buf
, size
);
492 c_compressed_record_file_offset
+= size
;
498 int compaction_swapper_inited
= 0;
501 vm_compaction_swapper_do_init(void)
507 if (compaction_swapper_inited
) {
511 if (vm_compressor_mode
!= VM_PAGER_COMPRESSOR_WITH_SWAP
) {
512 compaction_swapper_inited
= 1;
515 lck_mtx_lock(&vm_swap_data_lock
);
517 if (!compaction_swapper_inited
) {
518 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
519 pathname
= kheap_alloc(KHEAP_TEMP
, namelen
, Z_WAITOK
| Z_ZERO
);
520 snprintf(pathname
, namelen
, "%s%d", swapfilename
, 0);
522 vm_swapfile_open(pathname
, &vp
);
525 if (vnode_pager_isSSD(vp
) == FALSE
) {
527 * swap files live on an HDD, so let's make sure to start swapping
528 * much earlier since we're not worried about SSD write-wear and
529 * we have so little write bandwidth to work with
530 * these values were derived expermentially by running the performance
531 * teams stock test for evaluating HDD performance against various
532 * combinations and looking and comparing overall results.
533 * Note that the > relationship between these 4 values must be maintained
535 if (vm_compressor_minorcompact_threshold_divisor_overridden
== 0) {
536 vm_compressor_minorcompact_threshold_divisor
= 15;
538 if (vm_compressor_majorcompact_threshold_divisor_overridden
== 0) {
539 vm_compressor_majorcompact_threshold_divisor
= 18;
541 if (vm_compressor_unthrottle_threshold_divisor_overridden
== 0) {
542 vm_compressor_unthrottle_threshold_divisor
= 24;
544 if (vm_compressor_catchup_threshold_divisor_overridden
== 0) {
545 vm_compressor_catchup_threshold_divisor
= 30;
548 #if XNU_TARGET_OS_OSX
549 vnode_setswapmount(vp
);
550 vm_swappin_avail
= vnode_getswappin_avail(vp
);
552 if (vm_swappin_avail
) {
553 vm_swappin_enabled
= TRUE
;
555 #endif /* XNU_TARGET_OS_OSX */
556 vm_swapfile_close((uint64_t)pathname
, vp
);
558 kheap_free(KHEAP_TEMP
, pathname
, namelen
);
560 compaction_swapper_inited
= 1;
562 lck_mtx_unlock(&vm_swap_data_lock
);
567 vm_swap_consider_defragmenting(int flags
)
569 boolean_t force_defrag
= (flags
& VM_SWAP_FLAGS_FORCE_DEFRAG
);
570 boolean_t force_reclaim
= (flags
& VM_SWAP_FLAGS_FORCE_RECLAIM
);
572 if (compressor_store_stop_compaction
== FALSE
&& !VM_SWAP_BUSY() &&
573 (force_defrag
|| force_reclaim
|| VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
574 if (!vm_swapfile_gc_thread_running
|| force_defrag
|| force_reclaim
) {
575 lck_mtx_lock(&vm_swap_data_lock
);
578 vm_swap_force_defrag
= TRUE
;
582 vm_swap_force_reclaim
= TRUE
;
585 if (!vm_swapfile_gc_thread_running
) {
586 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
589 lck_mtx_unlock(&vm_swap_data_lock
);
595 int vm_swap_defragment_yielded
= 0;
596 int vm_swap_defragment_swapin
= 0;
597 int vm_swap_defragment_free
= 0;
598 int vm_swap_defragment_busy
= 0;
601 extern uint32_t c_segment_pages_compressed_incore
;
602 extern uint32_t c_segment_pages_compressed_nearing_limit
;
603 extern uint32_t c_segment_count
;
604 extern uint32_t c_segments_nearing_limit
;
606 boolean_t
memorystatus_kill_on_VM_compressor_space_shortage(boolean_t
);
608 extern bool freezer_incore_cseg_acct
;
609 #endif /* CONFIG_FREEZE */
617 * have to grab the master lock w/o holding
618 * any locks in spin mode
620 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
622 lck_mtx_lock_spin_always(c_list_lock
);
624 while (!queue_empty(&c_swappedout_sparse_list_head
)) {
625 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_BUSY()) {
626 vm_swap_defragment_yielded
++;
629 c_seg
= (c_segment_t
)queue_first(&c_swappedout_sparse_list_head
);
631 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
633 assert(c_seg
->c_state
== C_ON_SWAPPEDOUTSPARSE_Q
);
636 lck_mtx_unlock_always(c_list_lock
);
638 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
640 * c_seg_wait_on_busy consumes c_seg->c_lock
642 c_seg_wait_on_busy(c_seg
);
644 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
646 lck_mtx_lock_spin_always(c_list_lock
);
648 vm_swap_defragment_busy
++;
651 if (c_seg
->c_bytes_used
== 0) {
653 * c_seg_free_locked consumes the c_list_lock
657 c_seg_free_locked(c_seg
);
659 vm_swap_defragment_free
++;
661 lck_mtx_unlock_always(c_list_lock
);
664 if (freezer_incore_cseg_acct
) {
665 if ((c_seg
->c_slots_used
+ c_segment_pages_compressed_incore
) >= c_segment_pages_compressed_nearing_limit
) {
666 memorystatus_kill_on_VM_compressor_space_shortage(TRUE
/* async */);
669 uint32_t incore_seg_count
= c_segment_count
- c_swappedout_count
- c_swappedout_sparse_count
;
670 if ((incore_seg_count
+ 1) >= c_segments_nearing_limit
) {
671 memorystatus_kill_on_VM_compressor_space_shortage(TRUE
/* async */);
674 #endif /* CONFIG_FREEZE */
675 if (c_seg_swapin(c_seg
, TRUE
, FALSE
) == 0) {
676 lck_mtx_unlock_always(&c_seg
->c_lock
);
679 vm_swap_defragment_swapin
++;
681 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
683 vm_pageout_io_throttle();
686 * because write waiters have privilege over readers,
687 * dropping and immediately retaking the master lock will
688 * still allow any thread waiting to acquire the
689 * master lock exclusively an opportunity to take it
691 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
693 lck_mtx_lock_spin_always(c_list_lock
);
695 lck_mtx_unlock_always(c_list_lock
);
697 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
703 vm_swapfile_create_thread(void)
708 current_thread()->options
|= TH_OPT_VMPRIV
;
710 vm_swapfile_create_thread_awakened
++;
711 vm_swapfile_create_thread_running
= 1;
715 * walk through the list of swap files
716 * and do the delayed frees/trims for
717 * any swap file whose count of delayed
718 * frees is above the batch limit
720 vm_swap_handle_delayed_trims(FALSE
);
722 lck_mtx_lock(&vm_swap_data_lock
);
724 if (hibernate_in_progress_with_pinned_swap
== TRUE
) {
728 if (compressor_store_stop_compaction
== TRUE
) {
732 clock_get_system_nanotime(&sec
, &nsec
);
734 if (VM_SWAP_SHOULD_CREATE(sec
) == 0) {
738 lck_mtx_unlock(&vm_swap_data_lock
);
740 if (vm_swap_create_file() == FALSE
) {
741 vm_swapfile_last_failed_to_create_ts
= sec
;
742 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec
);
744 vm_swapfile_last_successful_create_ts
= sec
;
747 vm_swapfile_create_thread_running
= 0;
749 if (hibernate_in_progress_with_pinned_swap
== TRUE
) {
750 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
753 if (compressor_store_stop_compaction
== TRUE
) {
754 thread_wakeup((event_t
)&compressor_store_stop_compaction
);
757 assert_wait((event_t
)&vm_swapfile_create_needed
, THREAD_UNINT
);
759 lck_mtx_unlock(&vm_swap_data_lock
);
761 thread_block((thread_continue_t
)vm_swapfile_create_thread
);
770 hibernate_pin_swap(boolean_t start
)
772 vm_compaction_swapper_do_init();
774 if (start
== FALSE
) {
775 lck_mtx_lock(&vm_swap_data_lock
);
776 hibernate_in_progress_with_pinned_swap
= FALSE
;
777 lck_mtx_unlock(&vm_swap_data_lock
);
781 if (vm_swappin_enabled
== FALSE
) {
785 lck_mtx_lock(&vm_swap_data_lock
);
787 hibernate_in_progress_with_pinned_swap
= TRUE
;
789 while (vm_swapfile_create_thread_running
|| vm_swapfile_gc_thread_running
) {
790 assert_wait((event_t
)&hibernate_in_progress_with_pinned_swap
, THREAD_UNINT
);
792 lck_mtx_unlock(&vm_swap_data_lock
);
794 thread_block(THREAD_CONTINUE_NULL
);
796 lck_mtx_lock(&vm_swap_data_lock
);
798 if (vm_num_swap_files
> vm_num_pinned_swap_files
) {
799 hibernate_in_progress_with_pinned_swap
= FALSE
;
800 lck_mtx_unlock(&vm_swap_data_lock
);
802 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
803 vm_num_swap_files
, vm_num_pinned_swap_files
);
806 lck_mtx_unlock(&vm_swap_data_lock
);
808 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE
)) {
809 if (vm_swap_create_file() == FALSE
) {
818 vm_swapfile_gc_thread(void)
820 boolean_t need_defragment
;
821 boolean_t need_reclaim
;
823 vm_swapfile_gc_thread_awakened
++;
824 vm_swapfile_gc_thread_running
= 1;
827 lck_mtx_lock(&vm_swap_data_lock
);
829 if (hibernate_in_progress_with_pinned_swap
== TRUE
) {
833 if (VM_SWAP_BUSY() || compressor_store_stop_compaction
== TRUE
) {
837 need_defragment
= FALSE
;
838 need_reclaim
= FALSE
;
840 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
841 need_defragment
= TRUE
;
844 if (VM_SWAP_SHOULD_RECLAIM()) {
845 need_defragment
= TRUE
;
848 if (need_defragment
== FALSE
&& need_reclaim
== FALSE
) {
852 vm_swap_force_defrag
= FALSE
;
853 vm_swap_force_reclaim
= FALSE
;
855 lck_mtx_unlock(&vm_swap_data_lock
);
857 if (need_defragment
== TRUE
) {
858 vm_swap_defragment();
860 if (need_reclaim
== TRUE
) {
864 vm_swapfile_gc_thread_running
= 0;
866 if (hibernate_in_progress_with_pinned_swap
== TRUE
) {
867 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
870 if (compressor_store_stop_compaction
== TRUE
) {
871 thread_wakeup((event_t
)&compressor_store_stop_compaction
);
874 assert_wait((event_t
)&vm_swapfile_gc_needed
, THREAD_UNINT
);
876 lck_mtx_unlock(&vm_swap_data_lock
);
878 thread_block((thread_continue_t
)vm_swapfile_gc_thread
);
885 #define VM_SWAPOUT_LIMIT_T2P 4
886 #define VM_SWAPOUT_LIMIT_T1P 4
887 #define VM_SWAPOUT_LIMIT_T0P 6
888 #define VM_SWAPOUT_LIMIT_T0 8
889 #define VM_SWAPOUT_LIMIT_MAX 8
891 #define VM_SWAPOUT_START 0
892 #define VM_SWAPOUT_T2_PASSIVE 1
893 #define VM_SWAPOUT_T1_PASSIVE 2
894 #define VM_SWAPOUT_T0_PASSIVE 3
895 #define VM_SWAPOUT_T0 4
897 int vm_swapout_state
= VM_SWAPOUT_START
;
898 int vm_swapout_limit
= 1;
900 int vm_swapper_entered_T0
= 0;
901 int vm_swapper_entered_T0P
= 0;
902 int vm_swapper_entered_T1P
= 0;
903 int vm_swapper_entered_T2P
= 0;
907 vm_swapout_thread_throttle_adjust(void)
909 switch (vm_swapout_state
) {
910 case VM_SWAPOUT_START
:
912 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
913 vm_swapper_entered_T2P
++;
915 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
916 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
917 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
918 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
919 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T2P
;
920 vm_swapout_state
= VM_SWAPOUT_T2_PASSIVE
;
924 case VM_SWAPOUT_T2_PASSIVE
:
926 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
927 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER0
;
928 vm_swapper_entered_T0P
++;
930 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
931 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
932 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
933 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
934 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T0P
;
935 vm_swapout_state
= VM_SWAPOUT_T0_PASSIVE
;
939 if (swapout_target_age
|| hibernate_flushing
== TRUE
) {
940 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
941 vm_swapper_entered_T1P
++;
943 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
944 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
945 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
946 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
947 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T1P
;
948 vm_swapout_state
= VM_SWAPOUT_T1_PASSIVE
;
952 case VM_SWAPOUT_T1_PASSIVE
:
954 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
955 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER0
;
956 vm_swapper_entered_T0P
++;
958 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
959 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
960 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
961 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
962 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T0P
;
963 vm_swapout_state
= VM_SWAPOUT_T0_PASSIVE
;
967 if (swapout_target_age
== 0 && hibernate_flushing
== FALSE
) {
968 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
969 vm_swapper_entered_T2P
++;
971 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
972 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
973 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
974 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
975 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T2P
;
976 vm_swapout_state
= VM_SWAPOUT_T2_PASSIVE
;
980 case VM_SWAPOUT_T0_PASSIVE
:
982 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
983 vm_swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
984 vm_swapper_entered_T2P
++;
986 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
987 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, vm_swapper_throttle
);
988 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
989 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
990 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T2P
;
991 vm_swapout_state
= VM_SWAPOUT_T2_PASSIVE
;
995 if (SWAPPER_NEEDS_TO_CATCHUP()) {
996 vm_swapper_entered_T0
++;
998 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
999 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_DISABLE
);
1000 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T0
;
1001 vm_swapout_state
= VM_SWAPOUT_T0
;
1007 if (SWAPPER_HAS_CAUGHTUP()) {
1008 vm_swapper_entered_T0P
++;
1010 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
1011 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
1012 vm_swapout_limit
= VM_SWAPOUT_LIMIT_T0P
;
1013 vm_swapout_state
= VM_SWAPOUT_T0_PASSIVE
;
1019 int vm_swapout_found_empty
= 0;
1021 struct swapout_io_completion vm_swapout_ctx
[VM_SWAPOUT_LIMIT_MAX
];
1023 int vm_swapout_soc_busy
= 0;
1024 int vm_swapout_soc_done
= 0;
1027 static struct swapout_io_completion
*
1028 vm_swapout_find_free_soc(void)
1032 for (i
= 0; i
< VM_SWAPOUT_LIMIT_MAX
; i
++) {
1033 if (vm_swapout_ctx
[i
].swp_io_busy
== 0) {
1034 return &vm_swapout_ctx
[i
];
1037 assert(vm_swapout_soc_busy
== VM_SWAPOUT_LIMIT_MAX
);
1042 static struct swapout_io_completion
*
1043 vm_swapout_find_done_soc(void)
1047 if (vm_swapout_soc_done
) {
1048 for (i
= 0; i
< VM_SWAPOUT_LIMIT_MAX
; i
++) {
1049 if (vm_swapout_ctx
[i
].swp_io_done
) {
1050 return &vm_swapout_ctx
[i
];
1058 vm_swapout_complete_soc(struct swapout_io_completion
*soc
)
1062 if (soc
->swp_io_error
) {
1068 lck_mtx_unlock_always(c_list_lock
);
1070 vm_swap_put_finish(soc
->swp_swf
, &soc
->swp_f_offset
, soc
->swp_io_error
, TRUE
/*drop iocount*/);
1071 vm_swapout_finish(soc
->swp_c_seg
, soc
->swp_f_offset
, soc
->swp_c_size
, kr
);
1073 lck_mtx_lock_spin_always(c_list_lock
);
1075 soc
->swp_io_done
= 0;
1076 soc
->swp_io_busy
= 0;
1078 vm_swapout_soc_busy
--;
1079 vm_swapout_soc_done
--;
1084 vm_swapout_thread(void)
1087 c_segment_t c_seg
= NULL
;
1088 kern_return_t kr
= KERN_SUCCESS
;
1089 struct swapout_io_completion
*soc
;
1091 current_thread()->options
|= TH_OPT_VMPRIV
;
1093 vm_swapout_thread_awakened
++;
1095 lck_mtx_lock_spin_always(c_list_lock
);
1097 vm_swapout_thread_running
= TRUE
;
1099 while (!queue_empty(&c_swapout_list_head
) && vm_swapout_soc_busy
< vm_swapout_limit
&& !compressor_store_stop_compaction
) {
1100 c_seg
= (c_segment_t
)queue_first(&c_swapout_list_head
);
1102 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1104 assert(c_seg
->c_state
== C_ON_SWAPOUT_Q
);
1106 if (c_seg
->c_busy
) {
1107 lck_mtx_unlock_always(c_list_lock
);
1109 c_seg_wait_on_busy(c_seg
);
1111 lck_mtx_lock_spin_always(c_list_lock
);
1115 vm_swapout_thread_processed_segments
++;
1117 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
1120 assert(c_seg
->c_bytes_used
== 0);
1122 if (!c_seg
->c_on_minorcompact_q
) {
1123 c_seg_need_delayed_compaction(c_seg
, TRUE
);
1126 c_seg_switch_state(c_seg
, C_IS_EMPTY
, FALSE
);
1127 lck_mtx_unlock_always(&c_seg
->c_lock
);
1128 lck_mtx_unlock_always(c_list_lock
);
1130 vm_swapout_found_empty
++;
1131 goto c_seg_is_empty
;
1134 c_seg
->c_busy_swapping
= 1;
1136 c_seg_switch_state(c_seg
, C_ON_SWAPIO_Q
, FALSE
);
1138 lck_mtx_unlock_always(c_list_lock
);
1139 lck_mtx_unlock_always(&c_seg
->c_lock
);
1141 #if CHECKSUM_THE_SWAP
1142 c_seg
->cseg_hash
= hash_string((char *)c_seg
->c_store
.c_buffer
, (int)size
);
1143 c_seg
->cseg_swap_size
= size
;
1144 #endif /* CHECKSUM_THE_SWAP */
1147 vm_swap_encrypt(c_seg
);
1148 #endif /* ENCRYPTED_SWAP */
1150 soc
= vm_swapout_find_free_soc();
1153 soc
->swp_upl_ctx
.io_context
= (void *)soc
;
1154 soc
->swp_upl_ctx
.io_done
= (void *)vm_swapout_iodone
;
1155 soc
->swp_upl_ctx
.io_error
= 0;
1157 kr
= vm_swap_put((vm_offset_t
)c_seg
->c_store
.c_buffer
, &soc
->swp_f_offset
, size
, c_seg
, soc
);
1159 if (kr
!= KERN_SUCCESS
) {
1160 if (soc
->swp_io_done
) {
1161 lck_mtx_lock_spin_always(c_list_lock
);
1163 soc
->swp_io_done
= 0;
1164 vm_swapout_soc_done
--;
1166 lck_mtx_unlock_always(c_list_lock
);
1168 vm_swapout_finish(c_seg
, soc
->swp_f_offset
, size
, kr
);
1170 soc
->swp_io_busy
= 1;
1171 vm_swapout_soc_busy
++;
1175 if (c_swapout_count
== 0) {
1176 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE
);
1179 lck_mtx_lock_spin_always(c_list_lock
);
1181 while ((soc
= vm_swapout_find_done_soc())) {
1182 vm_swapout_complete_soc(soc
);
1184 lck_mtx_unlock_always(c_list_lock
);
1186 vm_swapout_thread_throttle_adjust();
1188 lck_mtx_lock_spin_always(c_list_lock
);
1190 while ((soc
= vm_swapout_find_done_soc())) {
1191 vm_swapout_complete_soc(soc
);
1193 lck_mtx_unlock_always(c_list_lock
);
1195 vm_pageout_io_throttle();
1197 lck_mtx_lock_spin_always(c_list_lock
);
1200 * Recheck if we have some c_segs to wakeup
1201 * post throttle. And, check to see if we
1202 * have any more swapouts needed.
1204 if (vm_swapout_soc_done
) {
1208 assert_wait((event_t
)&c_swapout_list_head
, THREAD_UNINT
);
1210 vm_swapout_thread_running
= FALSE
;
1212 lck_mtx_unlock_always(c_list_lock
);
1214 thread_block((thread_continue_t
)vm_swapout_thread
);
1221 vm_swapout_iodone(void *io_context
, int error
)
1223 struct swapout_io_completion
*soc
;
1225 soc
= (struct swapout_io_completion
*)io_context
;
1227 lck_mtx_lock_spin_always(c_list_lock
);
1229 soc
->swp_io_done
= 1;
1230 soc
->swp_io_error
= error
;
1231 vm_swapout_soc_done
++;
1233 if (!vm_swapout_thread_running
) {
1234 thread_wakeup((event_t
)&c_swapout_list_head
);
1237 lck_mtx_unlock_always(c_list_lock
);
1242 vm_swapout_finish(c_segment_t c_seg
, uint64_t f_offset
, uint32_t size
, kern_return_t kr
)
1244 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
1246 if (kr
== KERN_SUCCESS
) {
1247 kernel_memory_depopulate(compressor_map
, (vm_offset_t
)c_seg
->c_store
.c_buffer
, size
,
1248 KMA_COMPRESSOR
, VM_KERN_MEMORY_COMPRESSOR
);
1252 vm_swap_decrypt(c_seg
);
1254 #endif /* ENCRYPTED_SWAP */
1255 lck_mtx_lock_spin_always(c_list_lock
);
1256 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1258 if (kr
== KERN_SUCCESS
) {
1259 int new_state
= C_ON_SWAPPEDOUT_Q
;
1260 boolean_t insert_head
= FALSE
;
1262 if (hibernate_flushing
== TRUE
) {
1263 if (c_seg
->c_generation_id
>= first_c_segment_to_warm_generation_id
&&
1264 c_seg
->c_generation_id
<= last_c_segment_to_warm_generation_id
) {
1267 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg
)) {
1268 new_state
= C_ON_SWAPPEDOUTSPARSE_Q
;
1271 c_seg_switch_state(c_seg
, new_state
, insert_head
);
1273 c_seg
->c_store
.c_swap_handle
= f_offset
;
1275 counter_add(&vm_statistics_swapouts
, size
>> PAGE_SHIFT
);
1277 if (c_seg
->c_bytes_used
) {
1278 OSAddAtomic64(-c_seg
->c_bytes_used
, &compressor_bytes_used
);
1283 * Successful swapout. Decrement the in-core compressed pages count.
1285 OSAddAtomic(-(c_seg
->c_slots_used
), &c_segment_pages_compressed_incore
);
1286 assertf(c_segment_pages_compressed_incore
>= 0, "-ve incore count %p 0x%x", c_seg
, c_segment_pages_compressed_incore
);
1287 #endif /* CONFIG_FREEZE */
1289 if (c_seg
->c_overage_swap
== TRUE
) {
1290 c_seg
->c_overage_swap
= FALSE
;
1291 c_overage_swapped_count
--;
1295 if (c_seg
->c_task_owner
) {
1296 c_seg_update_task_owner(c_seg
, NULL
);
1298 #endif /* CONFIG_FREEZE */
1300 c_seg_switch_state(c_seg
, C_ON_AGE_Q
, FALSE
);
1302 if (!c_seg
->c_on_minorcompact_q
&& C_SEG_UNUSED_BYTES(c_seg
) >= PAGE_SIZE
) {
1303 c_seg_need_delayed_compaction(c_seg
, TRUE
);
1306 assert(c_seg
->c_busy_swapping
);
1307 assert(c_seg
->c_busy
);
1309 c_seg
->c_busy_swapping
= 0;
1310 lck_mtx_unlock_always(c_list_lock
);
1312 C_SEG_WAKEUP_DONE(c_seg
);
1313 lck_mtx_unlock_always(&c_seg
->c_lock
);
1315 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
1320 vm_swap_create_file()
1324 boolean_t swap_file_created
= FALSE
;
1325 boolean_t swap_file_reuse
= FALSE
;
1326 boolean_t swap_file_pin
= FALSE
;
1327 struct swapfile
*swf
= NULL
;
1330 * make sure we've got all the info we need
1331 * to potentially pin a swap file... we could
1332 * be swapping out due to hibernation w/o ever
1333 * having run vm_pageout_scan, which is normally
1334 * the trigger to do the init
1336 vm_compaction_swapper_do_init();
1339 * Any swapfile structure ready for re-use?
1342 lck_mtx_lock(&vm_swap_data_lock
);
1344 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1346 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1347 if (swf
->swp_flags
== SWAP_REUSE
) {
1348 swap_file_reuse
= TRUE
;
1351 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1354 lck_mtx_unlock(&vm_swap_data_lock
);
1356 if (swap_file_reuse
== FALSE
) {
1357 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
1359 swf
= kalloc_flags(sizeof *swf
, Z_WAITOK
| Z_ZERO
);
1360 swf
->swp_index
= vm_num_swap_files
+ 1;
1361 swf
->swp_pathlen
= namelen
;
1362 swf
->swp_path
= kheap_alloc(KHEAP_DATA_BUFFERS
, swf
->swp_pathlen
,
1365 snprintf(swf
->swp_path
, namelen
, "%s%d", swapfilename
, vm_num_swap_files
);
1368 vm_swapfile_open(swf
->swp_path
, &swf
->swp_vp
);
1370 if (swf
->swp_vp
== NULL
) {
1371 if (swap_file_reuse
== FALSE
) {
1372 kheap_free(KHEAP_DATA_BUFFERS
, swf
->swp_path
, swf
->swp_pathlen
);
1373 kfree(swf
, sizeof *swf
);
1377 vm_swapfile_can_be_created
= TRUE
;
1379 size
= MAX_SWAP_FILE_SIZE
;
1381 while (size
>= MIN_SWAP_FILE_SIZE
) {
1382 swap_file_pin
= VM_SWAP_SHOULD_PIN(size
);
1384 if (vm_swapfile_preallocate(swf
->swp_vp
, &size
, &swap_file_pin
) == 0) {
1385 int num_bytes_for_bitmap
= 0;
1387 swap_file_created
= TRUE
;
1389 swf
->swp_size
= size
;
1390 swf
->swp_nsegs
= (unsigned int) (size
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1391 swf
->swp_nseginuse
= 0;
1392 swf
->swp_free_hint
= 0;
1394 num_bytes_for_bitmap
= MAX((swf
->swp_nsegs
>> 3), 1);
1396 * Allocate a bitmap that describes the
1397 * number of segments held by this swapfile.
1399 swf
->swp_bitmap
= kheap_alloc(KHEAP_DATA_BUFFERS
,
1400 num_bytes_for_bitmap
, Z_WAITOK
| Z_ZERO
);
1402 swf
->swp_csegs
= kalloc_flags(swf
->swp_nsegs
* sizeof(c_segment_t
),
1406 * passing a NULL trim_list into vnode_trim_list
1407 * will return ENOTSUP if trim isn't supported
1410 if (vnode_trim_list(swf
->swp_vp
, NULL
, FALSE
) == 0) {
1411 swp_trim_supported
= TRUE
;
1414 lck_mtx_lock(&vm_swap_data_lock
);
1416 swf
->swp_flags
= SWAP_READY
;
1418 if (swap_file_reuse
== FALSE
) {
1419 queue_enter(&swf_global_queue
, swf
, struct swapfile
*, swp_queue
);
1422 vm_num_swap_files
++;
1424 vm_swapfile_total_segs_alloced
+= swf
->swp_nsegs
;
1426 if (swap_file_pin
== TRUE
) {
1427 vm_num_pinned_swap_files
++;
1428 swf
->swp_flags
|= SWAP_PINNED
;
1429 vm_swappin_avail
-= swf
->swp_size
;
1432 lck_mtx_unlock(&vm_swap_data_lock
);
1434 thread_wakeup((event_t
) &vm_num_swap_files
);
1435 #if !XNU_TARGET_OS_OSX
1436 if (vm_num_swap_files
== 1) {
1437 c_overage_swapped_limit
= (uint32_t)size
/ C_SEG_BUFSIZE
;
1439 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
) {
1440 c_overage_swapped_limit
/= 2;
1443 #endif /* !XNU_TARGET_OS_OSX */
1449 if (swap_file_created
== FALSE
) {
1450 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
1454 if (swap_file_reuse
== FALSE
) {
1455 kheap_free(KHEAP_DATA_BUFFERS
, swf
->swp_path
, swf
->swp_pathlen
);
1456 kfree(swf
, sizeof *swf
);
1459 return swap_file_created
;
1462 extern void vnode_put(struct vnode
* vp
);
1464 vm_swap_get(c_segment_t c_seg
, uint64_t f_offset
, uint64_t size
)
1466 struct swapfile
*swf
= NULL
;
1467 uint64_t file_offset
= 0;
1470 assert(c_seg
->c_store
.c_buffer
);
1472 lck_mtx_lock(&vm_swap_data_lock
);
1474 swf
= vm_swapfile_for_handle(f_offset
);
1476 if (swf
== NULL
|| (!(swf
->swp_flags
& SWAP_READY
) && !(swf
->swp_flags
& SWAP_RECLAIM
))) {
1477 vm_swap_get_failures
++;
1481 swf
->swp_io_count
++;
1483 lck_mtx_unlock(&vm_swap_data_lock
);
1485 #if DEVELOPMENT || DEBUG
1486 C_SEG_MAKE_WRITEABLE(c_seg
);
1488 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1490 if ((retval
= vnode_getwithref(swf
->swp_vp
)) != 0) {
1491 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval
);
1493 retval
= vm_swapfile_io(swf
->swp_vp
, file_offset
, (uint64_t)c_seg
->c_store
.c_buffer
, (int)(size
/ PAGE_SIZE_64
), SWAP_READ
, NULL
);
1494 vnode_put(swf
->swp_vp
);
1497 #if DEVELOPMENT || DEBUG
1498 C_SEG_WRITE_PROTECT(c_seg
);
1501 counter_add(&vm_statistics_swapins
, size
>> PAGE_SHIFT
);
1503 vm_swap_get_failures
++;
1507 * Free this slot in the swap structure.
1509 vm_swap_free(f_offset
);
1511 lck_mtx_lock(&vm_swap_data_lock
);
1512 swf
->swp_io_count
--;
1514 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1515 swf
->swp_flags
&= ~SWAP_WANTED
;
1516 thread_wakeup((event_t
) &swf
->swp_flags
);
1519 lck_mtx_unlock(&vm_swap_data_lock
);
1522 return KERN_SUCCESS
;
1524 return KERN_FAILURE
;
1529 vm_swap_put(vm_offset_t addr
, uint64_t *f_offset
, uint32_t size
, c_segment_t c_seg
, struct swapout_io_completion
*soc
)
1531 unsigned int segidx
= 0;
1532 struct swapfile
*swf
= NULL
;
1533 uint64_t file_offset
= 0;
1534 uint64_t swapfile_index
= 0;
1535 unsigned int byte_for_segidx
= 0;
1536 unsigned int offset_within_byte
= 0;
1537 boolean_t swf_eligible
= FALSE
;
1538 boolean_t waiting
= FALSE
;
1539 boolean_t retried
= FALSE
;
1543 void *upl_ctx
= NULL
;
1544 boolean_t drop_iocount
= FALSE
;
1546 if (addr
== 0 || f_offset
== NULL
|| compressor_store_stop_compaction
) {
1547 return KERN_FAILURE
;
1550 lck_mtx_lock(&vm_swap_data_lock
);
1552 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1554 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1555 segidx
= swf
->swp_free_hint
;
1557 swf_eligible
= (swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
< swf
->swp_nsegs
);
1560 while (segidx
< swf
->swp_nsegs
) {
1561 byte_for_segidx
= segidx
>> 3;
1562 offset_within_byte
= segidx
% 8;
1564 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1569 (swf
->swp_bitmap
)[byte_for_segidx
] |= (1 << offset_within_byte
);
1571 file_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
1572 swf
->swp_nseginuse
++;
1573 swf
->swp_io_count
++;
1574 swf
->swp_csegs
[segidx
] = c_seg
;
1576 swapfile_index
= swf
->swp_index
;
1577 vm_swapfile_total_segs_used
++;
1579 clock_get_system_nanotime(&sec
, &nsec
);
1581 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
) {
1582 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1585 lck_mtx_unlock(&vm_swap_data_lock
);
1590 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1592 assert(queue_end(&swf_global_queue
, (queue_entry_t
) swf
));
1595 * we've run out of swap segments, but may not
1596 * be in a position to immediately create a new swap
1597 * file if we've recently failed to create due to a lack
1598 * of free space in the root filesystem... we'll try
1599 * to kick that create off, but in any event we're going
1600 * to take a breather (up to 1 second) so that we're not caught in a tight
1601 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1602 * segments into swap files only to have them immediately put back
1603 * on the c_age queue due to vm_swap_put failing.
1605 * if we're doing these puts due to a hibernation flush,
1606 * no need to block... setting hibernate_no_swapspace to TRUE,
1607 * will cause "vm_compressor_compact_and_swap" to immediately abort
1609 clock_get_system_nanotime(&sec
, &nsec
);
1611 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
) {
1612 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1615 if (hibernate_flushing
== FALSE
|| VM_SWAP_SHOULD_CREATE(sec
)) {
1617 assert_wait_timeout((event_t
) &vm_num_swap_files
, THREAD_INTERRUPTIBLE
, 1000, 1000 * NSEC_PER_USEC
);
1619 hibernate_no_swapspace
= TRUE
;
1622 lck_mtx_unlock(&vm_swap_data_lock
);
1624 if (waiting
== TRUE
) {
1625 thread_block(THREAD_CONTINUE_NULL
);
1627 if (retried
== FALSE
&& hibernate_flushing
== TRUE
) {
1632 vm_swap_put_failures_no_swap_file
++;
1634 return KERN_FAILURE
;
1637 assert(c_seg
->c_busy_swapping
);
1638 assert(c_seg
->c_busy
);
1639 assert(!c_seg
->c_on_minorcompact_q
);
1641 *f_offset
= (swapfile_index
<< SWAP_DEVICE_SHIFT
) | file_offset
;
1644 soc
->swp_c_seg
= c_seg
;
1645 soc
->swp_c_size
= size
;
1649 soc
->swp_io_error
= 0;
1650 soc
->swp_io_done
= 0;
1652 upl_ctx
= (void *)&soc
->swp_upl_ctx
;
1655 if ((error
= vnode_getwithref(swf
->swp_vp
)) != 0) {
1656 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error
);
1658 error
= vm_swapfile_io(swf
->swp_vp
, file_offset
, addr
, (int) (size
/ PAGE_SIZE_64
), SWAP_WRITE
, upl_ctx
);
1659 drop_iocount
= TRUE
;
1662 if (error
|| upl_ctx
== NULL
) {
1663 return vm_swap_put_finish(swf
, f_offset
, error
, drop_iocount
);
1666 return KERN_SUCCESS
;
1670 vm_swap_put_finish(struct swapfile
*swf
, uint64_t *f_offset
, int error
, boolean_t drop_iocount
)
1673 vnode_put(swf
->swp_vp
);
1676 lck_mtx_lock(&vm_swap_data_lock
);
1678 swf
->swp_io_count
--;
1680 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1681 swf
->swp_flags
&= ~SWAP_WANTED
;
1682 thread_wakeup((event_t
) &swf
->swp_flags
);
1684 lck_mtx_unlock(&vm_swap_data_lock
);
1687 vm_swap_free(*f_offset
);
1688 vm_swap_put_failures
++;
1690 return KERN_FAILURE
;
1692 return KERN_SUCCESS
;
1697 vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
)
1699 uint64_t file_offset
= 0;
1700 unsigned int segidx
= 0;
1703 if ((swf
->swp_flags
& SWAP_READY
) || (swf
->swp_flags
& SWAP_RECLAIM
)) {
1704 unsigned int byte_for_segidx
= 0;
1705 unsigned int offset_within_byte
= 0;
1707 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1708 segidx
= (unsigned int) (file_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1710 byte_for_segidx
= segidx
>> 3;
1711 offset_within_byte
= segidx
% 8;
1713 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1714 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1716 swf
->swp_csegs
[segidx
] = NULL
;
1718 swf
->swp_nseginuse
--;
1719 vm_swapfile_total_segs_used
--;
1721 if (segidx
< swf
->swp_free_hint
) {
1722 swf
->swp_free_hint
= segidx
;
1725 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
) {
1726 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1732 uint32_t vm_swap_free_now_count
= 0;
1733 uint32_t vm_swap_free_delayed_count
= 0;
1737 vm_swap_free(uint64_t f_offset
)
1739 struct swapfile
*swf
= NULL
;
1740 struct trim_list
*tl
= NULL
;
1744 if (swp_trim_supported
== TRUE
) {
1745 tl
= kalloc(sizeof(struct trim_list
));
1748 lck_mtx_lock(&vm_swap_data_lock
);
1750 swf
= vm_swapfile_for_handle(f_offset
);
1752 if (swf
&& (swf
->swp_flags
& (SWAP_READY
| SWAP_RECLAIM
))) {
1753 if (swp_trim_supported
== FALSE
|| (swf
->swp_flags
& SWAP_RECLAIM
)) {
1755 * don't delay the free if the underlying disk doesn't support
1756 * trim, or we're in the midst of reclaiming this swap file since
1757 * we don't want to move segments that are technically free
1758 * but not yet handled by the delayed free mechanism
1760 vm_swap_free_now(swf
, f_offset
);
1762 vm_swap_free_now_count
++;
1765 tl
->tl_offset
= f_offset
& SWAP_SLOT_MASK
;
1766 tl
->tl_length
= COMPRESSED_SWAP_CHUNK_SIZE
;
1768 tl
->tl_next
= swf
->swp_delayed_trim_list_head
;
1769 swf
->swp_delayed_trim_list_head
= tl
;
1770 swf
->swp_delayed_trim_count
++;
1773 if (VM_SWAP_SHOULD_TRIM(swf
) && !vm_swapfile_create_thread_running
) {
1774 clock_get_system_nanotime(&sec
, &nsec
);
1776 if (sec
> dont_trim_until_ts
) {
1777 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1780 vm_swap_free_delayed_count
++;
1783 lck_mtx_unlock(&vm_swap_data_lock
);
1786 kfree(tl
, sizeof(struct trim_list
));
1792 vm_swap_wait_on_trim_handling_in_progress()
1794 while (delayed_trim_handling_in_progress
== TRUE
) {
1795 assert_wait((event_t
) &delayed_trim_handling_in_progress
, THREAD_UNINT
);
1796 lck_mtx_unlock(&vm_swap_data_lock
);
1798 thread_block(THREAD_CONTINUE_NULL
);
1800 lck_mtx_lock(&vm_swap_data_lock
);
1806 vm_swap_handle_delayed_trims(boolean_t force_now
)
1808 struct swapfile
*swf
= NULL
;
1811 * serialize the race between us and vm_swap_reclaim...
1812 * if vm_swap_reclaim wins it will turn off SWAP_READY
1813 * on the victim it has chosen... we can just skip over
1814 * that file since vm_swap_reclaim will first process
1815 * all of the delayed trims associated with it
1818 if (compressor_store_stop_compaction
== TRUE
) {
1822 lck_mtx_lock(&vm_swap_data_lock
);
1824 delayed_trim_handling_in_progress
= TRUE
;
1826 lck_mtx_unlock(&vm_swap_data_lock
);
1829 * no need to hold the lock to walk the swf list since
1830 * vm_swap_create (the only place where we add to this list)
1831 * is run on the same thread as this function
1832 * and vm_swap_reclaim doesn't remove items from this list
1833 * instead marking them with SWAP_REUSE for future re-use
1835 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1837 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1838 if ((swf
->swp_flags
& SWAP_READY
) && (force_now
== TRUE
|| VM_SWAP_SHOULD_TRIM(swf
))) {
1839 assert(!(swf
->swp_flags
& SWAP_RECLAIM
));
1840 vm_swap_do_delayed_trim(swf
);
1842 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1844 lck_mtx_lock(&vm_swap_data_lock
);
1846 delayed_trim_handling_in_progress
= FALSE
;
1847 thread_wakeup((event_t
) &delayed_trim_handling_in_progress
);
1849 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
) {
1850 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1853 lck_mtx_unlock(&vm_swap_data_lock
);
1857 vm_swap_do_delayed_trim(struct swapfile
*swf
)
1859 struct trim_list
*tl
, *tl_head
;
1862 if (compressor_store_stop_compaction
== TRUE
) {
1866 if ((error
= vnode_getwithref(swf
->swp_vp
)) != 0) {
1867 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error
);
1871 lck_mtx_lock(&vm_swap_data_lock
);
1873 tl_head
= swf
->swp_delayed_trim_list_head
;
1874 swf
->swp_delayed_trim_list_head
= NULL
;
1875 swf
->swp_delayed_trim_count
= 0;
1877 lck_mtx_unlock(&vm_swap_data_lock
);
1879 vnode_trim_list(swf
->swp_vp
, tl_head
, TRUE
);
1881 (void) vnode_put(swf
->swp_vp
);
1883 while ((tl
= tl_head
) != NULL
) {
1884 unsigned int segidx
= 0;
1885 unsigned int byte_for_segidx
= 0;
1886 unsigned int offset_within_byte
= 0;
1888 lck_mtx_lock(&vm_swap_data_lock
);
1890 segidx
= (unsigned int) (tl
->tl_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1892 byte_for_segidx
= segidx
>> 3;
1893 offset_within_byte
= segidx
% 8;
1895 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1896 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1898 swf
->swp_csegs
[segidx
] = NULL
;
1900 swf
->swp_nseginuse
--;
1901 vm_swapfile_total_segs_used
--;
1903 if (segidx
< swf
->swp_free_hint
) {
1904 swf
->swp_free_hint
= segidx
;
1907 lck_mtx_unlock(&vm_swap_data_lock
);
1909 tl_head
= tl
->tl_next
;
1911 kfree(tl
, sizeof(struct trim_list
));
1922 int vm_swap_reclaim_yielded
= 0;
1925 vm_swap_reclaim(void)
1927 vm_offset_t addr
= 0;
1928 unsigned int segidx
= 0;
1929 uint64_t f_offset
= 0;
1930 struct swapfile
*swf
= NULL
;
1931 struct swapfile
*smallest_swf
= NULL
;
1932 unsigned int min_nsegs
= 0;
1933 unsigned int byte_for_segidx
= 0;
1934 unsigned int offset_within_byte
= 0;
1935 uint32_t c_size
= 0;
1937 c_segment_t c_seg
= NULL
;
1939 if (kernel_memory_allocate(compressor_map
, (vm_offset_t
*)(&addr
), C_SEG_BUFSIZE
, 0, KMA_KOBJECT
, VM_KERN_MEMORY_COMPRESSOR
) != KERN_SUCCESS
) {
1940 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1943 lck_mtx_lock(&vm_swap_data_lock
);
1946 * if we're running the swapfile list looking for
1947 * candidates with delayed trims, we need to
1948 * wait before making our decision concerning
1949 * the swapfile we want to reclaim
1951 vm_swap_wait_on_trim_handling_in_progress();
1954 * from here until we knock down the SWAP_READY bit,
1955 * we need to remain behind the vm_swap_data_lock...
1956 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1957 * will not consider this swapfile for processing
1959 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1960 min_nsegs
= MAX_SWAP_FILE_SIZE
/ COMPRESSED_SWAP_CHUNK_SIZE
;
1961 smallest_swf
= NULL
;
1963 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1964 if ((swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
<= min_nsegs
)) {
1966 min_nsegs
= swf
->swp_nseginuse
;
1968 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1971 if (smallest_swf
== NULL
) {
1978 swf
->swp_flags
&= ~SWAP_READY
;
1979 swf
->swp_flags
|= SWAP_RECLAIM
;
1981 if (swf
->swp_delayed_trim_count
) {
1982 lck_mtx_unlock(&vm_swap_data_lock
);
1984 vm_swap_do_delayed_trim(swf
);
1986 lck_mtx_lock(&vm_swap_data_lock
);
1990 while (segidx
< swf
->swp_nsegs
) {
1993 * Wait for outgoing I/Os.
1995 while (swf
->swp_io_count
) {
1996 swf
->swp_flags
|= SWAP_WANTED
;
1998 assert_wait((event_t
) &swf
->swp_flags
, THREAD_UNINT
);
1999 lck_mtx_unlock(&vm_swap_data_lock
);
2001 thread_block(THREAD_CONTINUE_NULL
);
2003 lck_mtx_lock(&vm_swap_data_lock
);
2005 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
2006 vm_swap_reclaim_yielded
++;
2010 byte_for_segidx
= segidx
>> 3;
2011 offset_within_byte
= segidx
% 8;
2013 if (((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) == 0) {
2018 c_seg
= swf
->swp_csegs
[segidx
];
2021 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2023 if (c_seg
->c_busy
) {
2025 * a swapped out c_segment in the process of being freed will remain in the
2026 * busy state until after the vm_swap_free is called on it... vm_swap_free
2027 * takes the vm_swap_data_lock, so can't change the swap state until after
2028 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2029 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2030 * at that point, we re-look up the swap state which will now indicate that
2031 * this c_segment no longer exists.
2033 c_seg
->c_wanted
= 1;
2035 assert_wait((event_t
) (c_seg
), THREAD_UNINT
);
2036 lck_mtx_unlock_always(&c_seg
->c_lock
);
2038 lck_mtx_unlock(&vm_swap_data_lock
);
2040 thread_block(THREAD_CONTINUE_NULL
);
2042 lck_mtx_lock(&vm_swap_data_lock
);
2044 goto ReTry_for_cseg
;
2046 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
2048 f_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
2050 assert(c_seg
== swf
->swp_csegs
[segidx
]);
2051 swf
->swp_csegs
[segidx
] = NULL
;
2052 swf
->swp_nseginuse
--;
2054 vm_swapfile_total_segs_used
--;
2056 lck_mtx_unlock(&vm_swap_data_lock
);
2058 assert(C_SEG_IS_ONDISK(c_seg
));
2061 c_seg
->c_busy_swapping
= 1;
2062 #if !CHECKSUM_THE_SWAP
2063 c_seg_trim_tail(c_seg
);
2065 c_size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
2067 assert(c_size
<= C_SEG_BUFSIZE
&& c_size
);
2069 lck_mtx_unlock_always(&c_seg
->c_lock
);
2071 if (vnode_getwithref(swf
->swp_vp
)) {
2072 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2073 vm_swap_get_failures
++;
2074 goto swap_io_failed
;
2076 if (vm_swapfile_io(swf
->swp_vp
, f_offset
, addr
, (int)(c_size
/ PAGE_SIZE_64
), SWAP_READ
, NULL
)) {
2078 * reading the data back in failed, so convert c_seg
2079 * to a swapped in c_segment that contains no data
2081 c_seg_swapin_requeue(c_seg
, FALSE
, TRUE
, FALSE
);
2083 * returns with c_busy_swapping cleared
2085 vnode_put(swf
->swp_vp
);
2086 vm_swap_get_failures
++;
2087 goto swap_io_failed
;
2089 vnode_put(swf
->swp_vp
);
2092 counter_add(&vm_statistics_swapins
, c_size
>> PAGE_SHIFT
);
2094 if (vm_swap_put(addr
, &f_offset
, c_size
, c_seg
, NULL
)) {
2095 vm_offset_t c_buffer
;
2098 * the put failed, so convert c_seg to a fully swapped in c_segment
2101 c_buffer
= (vm_offset_t
)C_SEG_BUFFER_ADDRESS(c_seg
->c_mysegno
);
2103 kernel_memory_populate(compressor_map
, c_buffer
, c_size
, KMA_COMPRESSOR
, VM_KERN_MEMORY_COMPRESSOR
);
2105 memcpy((char *)c_buffer
, (char *)addr
, c_size
);
2107 c_seg
->c_store
.c_buffer
= (int32_t *)c_buffer
;
2109 vm_swap_decrypt(c_seg
);
2110 #endif /* ENCRYPTED_SWAP */
2111 c_seg_swapin_requeue(c_seg
, TRUE
, TRUE
, FALSE
);
2113 * returns with c_busy_swapping cleared
2115 OSAddAtomic64(c_seg
->c_bytes_used
, &compressor_bytes_used
);
2117 goto swap_io_failed
;
2119 counter_add(&vm_statistics_swapouts
, c_size
>> PAGE_SHIFT
);
2121 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
2123 assert(C_SEG_IS_ONDISK(c_seg
));
2125 * The c_seg will now know about the new location on disk.
2127 c_seg
->c_store
.c_swap_handle
= f_offset
;
2129 assert(c_seg
->c_busy_swapping
);
2130 c_seg
->c_busy_swapping
= 0;
2132 assert(c_seg
->c_busy
);
2133 C_SEG_WAKEUP_DONE(c_seg
);
2135 lck_mtx_unlock_always(&c_seg
->c_lock
);
2136 lck_mtx_lock(&vm_swap_data_lock
);
2139 if (swf
->swp_nseginuse
) {
2140 swf
->swp_flags
&= ~SWAP_RECLAIM
;
2141 swf
->swp_flags
|= SWAP_READY
;
2146 * We don't remove this inactive swf from the queue.
2147 * That way, we can re-use it when needed again and
2148 * preserve the namespace. The delayed_trim processing
2149 * is also dependent on us not removing swfs from the queue.
2151 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2153 vm_swapfile_total_segs_alloced
-= swf
->swp_nsegs
;
2155 lck_mtx_unlock(&vm_swap_data_lock
);
2157 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
2159 kfree(swf
->swp_csegs
, swf
->swp_nsegs
* sizeof(c_segment_t
));
2160 kheap_free(KHEAP_DATA_BUFFERS
, swf
->swp_bitmap
,
2161 MAX((swf
->swp_nsegs
>> 3), 1));
2163 lck_mtx_lock(&vm_swap_data_lock
);
2165 if (swf
->swp_flags
& SWAP_PINNED
) {
2166 vm_num_pinned_swap_files
--;
2167 vm_swappin_avail
+= swf
->swp_size
;
2172 swf
->swp_free_hint
= 0;
2174 swf
->swp_flags
= SWAP_REUSE
;
2176 vm_num_swap_files
--;
2179 thread_wakeup((event_t
) &swf
->swp_flags
);
2180 lck_mtx_unlock(&vm_swap_data_lock
);
2182 kmem_free(compressor_map
, (vm_offset_t
) addr
, C_SEG_BUFSIZE
);
2187 vm_swap_get_total_space(void)
2189 uint64_t total_space
= 0;
2191 total_space
= (uint64_t)vm_swapfile_total_segs_alloced
* COMPRESSED_SWAP_CHUNK_SIZE
;
2197 vm_swap_get_used_space(void)
2199 uint64_t used_space
= 0;
2201 used_space
= (uint64_t)vm_swapfile_total_segs_used
* COMPRESSED_SWAP_CHUNK_SIZE
;
2207 vm_swap_get_free_space(void)
2209 return vm_swap_get_total_space() - vm_swap_get_used_space();
2213 vm_swap_get_max_configured_space(void)
2215 int num_swap_files
= (vm_num_swap_files_config
? vm_num_swap_files_config
: VM_MAX_SWAP_FILE_NUM
);
2216 return num_swap_files
* MAX_SWAP_FILE_SIZE
;
2220 vm_swap_low_on_space(void)
2222 if (vm_num_swap_files
== 0 && vm_swapfile_can_be_created
== FALSE
) {
2226 if (((vm_swapfile_total_segs_alloced
- vm_swapfile_total_segs_used
) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS
) / 8)) {
2227 if (vm_num_swap_files
== 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2231 if (vm_swapfile_last_failed_to_create_ts
>= vm_swapfile_last_successful_create_ts
) {
2239 vm_swap_out_of_space(void)
2241 if ((vm_num_swap_files
== vm_num_swap_files_config
) &&
2242 ((vm_swapfile_total_segs_alloced
- vm_swapfile_total_segs_used
) < VM_SWAPOUT_LIMIT_MAX
)) {
2244 * Last swapfile and we have only space for the
2245 * last few swapouts.
2254 vm_swap_files_pinned(void)
2258 if (vm_swappin_enabled
== FALSE
) {
2262 result
= (vm_num_pinned_swap_files
== vm_num_swap_files
);
2269 vm_swap_max_budget(uint64_t *freeze_daily_budget
)
2271 boolean_t use_device_value
= FALSE
;
2272 struct swapfile
*swf
= NULL
;
2274 if (vm_num_swap_files
) {
2275 lck_mtx_lock(&vm_swap_data_lock
);
2277 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
2280 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
2281 if (swf
->swp_flags
== SWAP_READY
) {
2282 assert(swf
->swp_vp
);
2284 if (vm_swap_vol_get_budget(swf
->swp_vp
, freeze_daily_budget
) == 0) {
2285 use_device_value
= TRUE
;
2289 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
2293 lck_mtx_unlock(&vm_swap_data_lock
);
2296 * This block is used for the initial budget value before any swap files
2297 * are created. We create a temp swap file to get the budget.
2300 struct vnode
*temp_vp
= NULL
;
2302 vm_swapfile_open(swapfilename
, &temp_vp
);
2305 if (vm_swap_vol_get_budget(temp_vp
, freeze_daily_budget
) == 0) {
2306 use_device_value
= TRUE
;
2309 vm_swapfile_close((uint64_t)&swapfilename
, temp_vp
);
2312 *freeze_daily_budget
= 0;
2316 return use_device_value
;
2318 #endif /* CONFIG_FREEZE */