2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_protos.h>
32 #include <IOKit/IOHibernatePrivate.h>
34 #include <kern/policy_internal.h>
36 boolean_t compressor_store_stop_compaction
= FALSE
;
37 boolean_t vm_swapfile_create_needed
= FALSE
;
38 boolean_t vm_swapfile_gc_needed
= FALSE
;
40 int swapper_throttle
= -1;
41 boolean_t swapper_throttle_inited
= FALSE
;
42 uint64_t vm_swapout_thread_id
;
44 uint64_t vm_swap_put_failures
= 0;
45 uint64_t vm_swap_get_failures
= 0;
46 int vm_num_swap_files
= 0;
47 int vm_num_pinned_swap_files
= 0;
48 int vm_swapout_thread_processed_segments
= 0;
49 int vm_swapout_thread_awakened
= 0;
50 int vm_swapfile_create_thread_awakened
= 0;
51 int vm_swapfile_create_thread_running
= 0;
52 int vm_swapfile_gc_thread_awakened
= 0;
53 int vm_swapfile_gc_thread_running
= 0;
55 int64_t vm_swappin_avail
= 0;
56 boolean_t vm_swappin_enabled
= FALSE
;
57 unsigned int vm_swapfile_total_segs_alloced
= 0;
58 unsigned int vm_swapfile_total_segs_used
= 0;
60 char swapfilename
[MAX_SWAPFILENAME_LEN
+ 1] = SWAP_FILE_NAME
;
62 extern vm_map_t compressor_map
;
65 #define SWAP_READY 0x1 /* Swap file is ready to be used */
66 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
67 #define SWAP_WANTED 0x4 /* Swap file has waiters */
68 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
69 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
73 queue_head_t swp_queue
; /* list of swap files */
74 char *swp_path
; /* saved pathname of swap file */
75 struct vnode
*swp_vp
; /* backing vnode */
76 uint64_t swp_size
; /* size of this swap file */
77 uint8_t *swp_bitmap
; /* bitmap showing the alloced/freed slots in the swap file */
78 unsigned int swp_pathlen
; /* length of pathname */
79 unsigned int swp_nsegs
; /* #segments we can use */
80 unsigned int swp_nseginuse
; /* #segments in use */
81 unsigned int swp_index
; /* index of this swap file */
82 unsigned int swp_flags
; /* state of swap file */
83 unsigned int swp_free_hint
; /* offset of 1st free chunk */
84 unsigned int swp_io_count
; /* count of outstanding I/Os */
85 c_segment_t
*swp_csegs
; /* back pointers to the c_segments. Used during swap reclaim. */
87 struct trim_list
*swp_delayed_trim_list_head
;
88 unsigned int swp_delayed_trim_count
;
91 queue_head_t swf_global_queue
;
92 boolean_t swp_trim_supported
= FALSE
;
94 extern clock_sec_t dont_trim_until_ts
;
95 clock_sec_t vm_swapfile_last_failed_to_create_ts
= 0;
96 clock_sec_t vm_swapfile_last_successful_create_ts
= 0;
97 int vm_swapfile_can_be_created
= FALSE
;
98 boolean_t delayed_trim_handling_in_progress
= FALSE
;
100 boolean_t hibernate_in_progress_with_pinned_swap
= FALSE
;
102 static void vm_swapout_thread_throttle_adjust(void);
103 static void vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
);
104 static void vm_swapout_thread(void);
105 static void vm_swapfile_create_thread(void);
106 static void vm_swapfile_gc_thread(void);
107 static void vm_swap_defragment(void);
108 static void vm_swap_handle_delayed_trims(boolean_t
);
109 static void vm_swap_do_delayed_trim(struct swapfile
*);
110 static void vm_swap_wait_on_trim_handling_in_progress(void);
115 * Only 1 swap file currently allowed.
117 #define VM_MAX_SWAP_FILE_NUM 1
118 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
120 #define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16) ? 1 : 0)
121 #define VM_SWAP_SHOULD_RECLAIM() FALSE
122 #define VM_SWAP_SHOULD_ABORT_RECLAIM() FALSE
123 #define VM_SWAP_SHOULD_PIN(_size) FALSE
124 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
125 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
126 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
128 #else /* CONFIG_EMBEDDED */
130 #define VM_MAX_SWAP_FILE_NUM 100
131 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
133 #define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
134 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
135 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
136 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
137 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
138 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
139 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
141 #endif /* CONFIG_EMBEDDED */
143 #define VM_SWAPFILE_DELAYED_CREATE 15
145 #define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
148 #if CHECKSUM_THE_SWAP
149 extern unsigned int hash_string(char *cp
, int len
);
152 #if RECORD_THE_COMPRESSED_DATA
153 boolean_t c_compressed_record_init_done
= FALSE
;
154 int c_compressed_record_write_error
= 0;
155 struct vnode
*c_compressed_record_vp
= NULL
;
156 uint64_t c_compressed_record_file_offset
= 0;
157 void c_compressed_record_init(void);
158 void c_compressed_record_write(char *, int);
161 extern void vm_pageout_io_throttle(void);
163 static struct swapfile
*vm_swapfile_for_handle(uint64_t);
166 * Called with the vm_swap_data_lock held.
169 static struct swapfile
*
170 vm_swapfile_for_handle(uint64_t f_offset
)
173 uint64_t file_offset
= 0;
174 unsigned int swapfile_index
= 0;
175 struct swapfile
* swf
= NULL
;
177 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
178 swapfile_index
= (f_offset
>> SWAP_DEVICE_SHIFT
);
180 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
182 while(queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
184 if (swapfile_index
== swf
->swp_index
) {
188 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
191 if (queue_end(&swf_global_queue
, (queue_entry_t
) swf
)) {
200 #include <libkern/crypto/aes.h>
201 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
203 #define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */
205 boolean_t swap_crypt_ctx_initialized
;
206 void swap_crypt_ctx_initialize(void);
208 aes_ctx swap_crypt_ctx
;
209 const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
] = {0xa, };
210 uint32_t swap_crypt_key
[8]; /* big enough for a 256 key */
212 unsigned long vm_page_encrypt_counter
;
213 unsigned long vm_page_decrypt_counter
;
217 boolean_t swap_crypt_ctx_tested
= FALSE
;
218 unsigned char swap_crypt_test_page_ref
[4096] __attribute__((aligned(4096)));
219 unsigned char swap_crypt_test_page_encrypt
[4096] __attribute__((aligned(4096)));
220 unsigned char swap_crypt_test_page_decrypt
[4096] __attribute__((aligned(4096)));
224 * Initialize the encryption context: key and key size.
226 void swap_crypt_ctx_initialize(void); /* forward */
228 swap_crypt_ctx_initialize(void)
233 * No need for locking to protect swap_crypt_ctx_initialized
234 * because the first use of encryption will come from the
235 * pageout thread (we won't pagein before there's been a pageout)
236 * and there's only one pageout thread.
238 if (swap_crypt_ctx_initialized
== FALSE
) {
240 i
< (sizeof (swap_crypt_key
) /
241 sizeof (swap_crypt_key
[0]));
243 swap_crypt_key
[i
] = random();
245 aes_encrypt_key((const unsigned char *) swap_crypt_key
,
246 SWAP_CRYPT_AES_KEY_SIZE
,
247 &swap_crypt_ctx
.encrypt
);
248 aes_decrypt_key((const unsigned char *) swap_crypt_key
,
249 SWAP_CRYPT_AES_KEY_SIZE
,
250 &swap_crypt_ctx
.decrypt
);
251 swap_crypt_ctx_initialized
= TRUE
;
256 * Validate the encryption algorithms.
258 if (swap_crypt_ctx_tested
== FALSE
) {
260 for (i
= 0; i
< 4096; i
++) {
261 swap_crypt_test_page_ref
[i
] = (char) i
;
264 aes_encrypt_cbc(swap_crypt_test_page_ref
,
266 PAGE_SIZE
/ AES_BLOCK_SIZE
,
267 swap_crypt_test_page_encrypt
,
268 &swap_crypt_ctx
.encrypt
);
270 aes_decrypt_cbc(swap_crypt_test_page_encrypt
,
272 PAGE_SIZE
/ AES_BLOCK_SIZE
,
273 swap_crypt_test_page_decrypt
,
274 &swap_crypt_ctx
.decrypt
);
275 /* compare result with original */
276 for (i
= 0; i
< 4096; i
++) {
277 if (swap_crypt_test_page_decrypt
[i
] !=
278 swap_crypt_test_page_ref
[i
]) {
279 panic("encryption test failed");
284 aes_encrypt_cbc(swap_crypt_test_page_decrypt
,
286 PAGE_SIZE
/ AES_BLOCK_SIZE
,
287 swap_crypt_test_page_decrypt
,
288 &swap_crypt_ctx
.encrypt
);
289 /* decrypt in place */
290 aes_decrypt_cbc(swap_crypt_test_page_decrypt
,
292 PAGE_SIZE
/ AES_BLOCK_SIZE
,
293 swap_crypt_test_page_decrypt
,
294 &swap_crypt_ctx
.decrypt
);
295 for (i
= 0; i
< 4096; i
++) {
296 if (swap_crypt_test_page_decrypt
[i
] !=
297 swap_crypt_test_page_ref
[i
]) {
298 panic("in place encryption test failed");
302 swap_crypt_ctx_tested
= TRUE
;
309 vm_swap_encrypt(c_segment_t c_seg
)
311 vm_offset_t kernel_vaddr
= 0;
315 unsigned char aes_iv
[AES_BLOCK_SIZE
];
319 assert(swap_crypt_ctx_initialized
);
321 #if DEVELOPMENT || DEBUG
322 C_SEG_MAKE_WRITEABLE(c_seg
);
324 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
326 encrypt_iv
.c_seg
= (void*)c_seg
;
328 /* encrypt the "initial vector" */
329 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
332 &encrypt_iv
.aes_iv
[0],
333 &swap_crypt_ctx
.encrypt
);
335 kernel_vaddr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
336 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
339 * Encrypt the c_segment.
341 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
342 &encrypt_iv
.aes_iv
[0],
343 (unsigned int)(size
/ AES_BLOCK_SIZE
),
344 (unsigned char *) kernel_vaddr
,
345 &swap_crypt_ctx
.encrypt
);
347 vm_page_encrypt_counter
+= (size
/PAGE_SIZE_64
);
349 #if DEVELOPMENT || DEBUG
350 C_SEG_WRITE_PROTECT(c_seg
);
355 vm_swap_decrypt(c_segment_t c_seg
)
358 vm_offset_t kernel_vaddr
= 0;
362 unsigned char aes_iv
[AES_BLOCK_SIZE
];
367 assert(swap_crypt_ctx_initialized
);
369 #if DEVELOPMENT || DEBUG
370 C_SEG_MAKE_WRITEABLE(c_seg
);
373 * Prepare an "initial vector" for the decryption.
374 * It has to be the same as the "initial vector" we
375 * used to encrypt that page.
377 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
379 decrypt_iv
.c_seg
= (void*)c_seg
;
381 /* encrypt the "initial vector" */
382 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
385 &decrypt_iv
.aes_iv
[0],
386 &swap_crypt_ctx
.encrypt
);
388 kernel_vaddr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
389 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
392 * Decrypt the c_segment.
394 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
395 &decrypt_iv
.aes_iv
[0],
396 (unsigned int) (size
/ AES_BLOCK_SIZE
),
397 (unsigned char *) kernel_vaddr
,
398 &swap_crypt_ctx
.decrypt
);
400 vm_page_decrypt_counter
+= (size
/PAGE_SIZE_64
);
402 #if DEVELOPMENT || DEBUG
403 C_SEG_WRITE_PROTECT(c_seg
);
406 #endif /* ENCRYPTED_SWAP */
410 vm_compressor_swap_init()
412 thread_t thread
= NULL
;
414 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr
);
415 lck_grp_init(&vm_swap_data_lock_grp
,
417 &vm_swap_data_lock_grp_attr
);
418 lck_attr_setdefault(&vm_swap_data_lock_attr
);
419 lck_mtx_init_ext(&vm_swap_data_lock
,
420 &vm_swap_data_lock_ext
,
421 &vm_swap_data_lock_grp
,
422 &vm_swap_data_lock_attr
);
424 queue_init(&swf_global_queue
);
427 if (kernel_thread_start_priority((thread_continue_t
)vm_swapout_thread
, NULL
,
428 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
429 panic("vm_swapout_thread: create failed");
431 vm_swapout_thread_id
= thread
->thread_id
;
433 thread_deallocate(thread
);
435 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_create_thread
, NULL
,
436 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
437 panic("vm_swapfile_create_thread: create failed");
440 thread_deallocate(thread
);
442 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_gc_thread
, NULL
,
443 BASEPRI_VM
, &thread
) != KERN_SUCCESS
) {
444 panic("vm_swapfile_gc_thread: create failed");
446 thread_deallocate(thread
);
448 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
449 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, THROTTLE_LEVEL_COMPRESSOR_TIER2
);
450 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
451 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
454 if (swap_crypt_ctx_initialized
== FALSE
) {
455 swap_crypt_ctx_initialize();
457 #endif /* ENCRYPTED_SWAP */
461 * dummy value until the swap file gets created
462 * when we drive the first c_segment_t to the
463 * swapout queue... at that time we will
464 * know the true size we have to work with
466 c_overage_swapped_limit
= 16;
468 printf("VM Swap Subsystem is ON\n");
472 #if RECORD_THE_COMPRESSED_DATA
475 c_compressed_record_init()
477 if (c_compressed_record_init_done
== FALSE
) {
478 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp
);
479 c_compressed_record_init_done
= TRUE
;
484 c_compressed_record_write(char *buf
, int size
)
486 if (c_compressed_record_write_error
== 0) {
487 c_compressed_record_write_error
= vm_record_file_write(c_compressed_record_vp
, c_compressed_record_file_offset
, buf
, size
);
488 c_compressed_record_file_offset
+= size
;
494 int compaction_swapper_inited
= 0;
497 vm_compaction_swapper_do_init(void)
503 if (compaction_swapper_inited
)
506 if (vm_compressor_mode
!= VM_PAGER_COMPRESSOR_WITH_SWAP
) {
507 compaction_swapper_inited
= 1;
510 lck_mtx_lock(&vm_swap_data_lock
);
512 if ( !compaction_swapper_inited
) {
514 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
515 pathname
= (char*)kalloc(namelen
);
516 memset(pathname
, 0, namelen
);
517 snprintf(pathname
, namelen
, "%s%d", swapfilename
, 0);
519 vm_swapfile_open(pathname
, &vp
);
523 if (vnode_pager_isSSD(vp
) == FALSE
) {
524 vm_compressor_minorcompact_threshold_divisor
= 18;
525 vm_compressor_majorcompact_threshold_divisor
= 22;
526 vm_compressor_unthrottle_threshold_divisor
= 32;
529 vnode_setswapmount(vp
);
530 vm_swappin_avail
= vnode_getswappin_avail(vp
);
532 if (vm_swappin_avail
)
533 vm_swappin_enabled
= TRUE
;
535 vm_swapfile_close((uint64_t)pathname
, vp
);
537 kfree(pathname
, namelen
);
539 compaction_swapper_inited
= 1;
541 lck_mtx_unlock(&vm_swap_data_lock
);
547 vm_swap_consider_defragmenting()
549 if (compressor_store_stop_compaction
== FALSE
&& !VM_SWAP_BUSY() &&
550 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
552 if (!vm_swapfile_gc_thread_running
) {
553 lck_mtx_lock(&vm_swap_data_lock
);
555 if (!vm_swapfile_gc_thread_running
)
556 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
558 lck_mtx_unlock(&vm_swap_data_lock
);
564 int vm_swap_defragment_yielded
= 0;
565 int vm_swap_defragment_swapin
= 0;
566 int vm_swap_defragment_free
= 0;
567 int vm_swap_defragment_busy
= 0;
576 * have to grab the master lock w/o holding
577 * any locks in spin mode
579 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
581 lck_mtx_lock_spin_always(c_list_lock
);
583 while (!queue_empty(&c_swappedout_sparse_list_head
)) {
585 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_BUSY()) {
586 vm_swap_defragment_yielded
++;
589 c_seg
= (c_segment_t
)queue_first(&c_swappedout_sparse_list_head
);
591 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
593 assert(c_seg
->c_state
== C_ON_SWAPPEDOUTSPARSE_Q
);
596 lck_mtx_unlock_always(c_list_lock
);
598 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
600 * c_seg_wait_on_busy consumes c_seg->c_lock
602 c_seg_wait_on_busy(c_seg
);
604 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
606 lck_mtx_lock_spin_always(c_list_lock
);
608 vm_swap_defragment_busy
++;
611 if (c_seg
->c_bytes_used
== 0) {
613 * c_seg_free_locked consumes the c_list_lock
617 c_seg_free_locked(c_seg
);
619 vm_swap_defragment_free
++;
621 lck_mtx_unlock_always(c_list_lock
);
623 if (c_seg_swapin(c_seg
, TRUE
, FALSE
) == 0)
624 lck_mtx_unlock_always(&c_seg
->c_lock
);
626 vm_swap_defragment_swapin
++;
628 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
630 vm_pageout_io_throttle();
633 * because write waiters have privilege over readers,
634 * dropping and immediately retaking the master lock will
635 * still allow any thread waiting to acquire the
636 * master lock exclusively an opportunity to take it
638 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
640 lck_mtx_lock_spin_always(c_list_lock
);
642 lck_mtx_unlock_always(c_list_lock
);
644 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
650 vm_swapfile_create_thread(void)
655 current_thread()->options
|= TH_OPT_VMPRIV
;
657 vm_swapfile_create_thread_awakened
++;
658 vm_swapfile_create_thread_running
= 1;
662 * walk through the list of swap files
663 * and do the delayed frees/trims for
664 * any swap file whose count of delayed
665 * frees is above the batch limit
667 vm_swap_handle_delayed_trims(FALSE
);
669 lck_mtx_lock(&vm_swap_data_lock
);
671 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
674 clock_get_system_nanotime(&sec
, &nsec
);
676 if (VM_SWAP_SHOULD_CREATE(sec
) == 0)
679 lck_mtx_unlock(&vm_swap_data_lock
);
681 if (vm_swap_create_file() == FALSE
) {
682 vm_swapfile_last_failed_to_create_ts
= sec
;
683 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec
);
686 vm_swapfile_last_successful_create_ts
= sec
;
688 vm_swapfile_create_thread_running
= 0;
690 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
691 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
693 assert_wait((event_t
)&vm_swapfile_create_needed
, THREAD_UNINT
);
695 lck_mtx_unlock(&vm_swap_data_lock
);
697 thread_block((thread_continue_t
)vm_swapfile_create_thread
);
706 hibernate_pin_swap(boolean_t start
)
708 vm_compaction_swapper_do_init();
710 if (start
== FALSE
) {
712 lck_mtx_lock(&vm_swap_data_lock
);
713 hibernate_in_progress_with_pinned_swap
= FALSE
;
714 lck_mtx_unlock(&vm_swap_data_lock
);
716 return (KERN_SUCCESS
);
718 if (vm_swappin_enabled
== FALSE
)
719 return (KERN_SUCCESS
);
721 lck_mtx_lock(&vm_swap_data_lock
);
723 hibernate_in_progress_with_pinned_swap
= TRUE
;
725 while (vm_swapfile_create_thread_running
|| vm_swapfile_gc_thread_running
) {
727 assert_wait((event_t
)&hibernate_in_progress_with_pinned_swap
, THREAD_UNINT
);
729 lck_mtx_unlock(&vm_swap_data_lock
);
731 thread_block(THREAD_CONTINUE_NULL
);
733 lck_mtx_lock(&vm_swap_data_lock
);
735 if (vm_num_swap_files
> vm_num_pinned_swap_files
) {
736 hibernate_in_progress_with_pinned_swap
= FALSE
;
737 lck_mtx_unlock(&vm_swap_data_lock
);
739 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
740 vm_num_swap_files
, vm_num_pinned_swap_files
);
741 return (KERN_FAILURE
);
743 lck_mtx_unlock(&vm_swap_data_lock
);
745 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE
)) {
746 if (vm_swap_create_file() == FALSE
)
749 return (KERN_SUCCESS
);
754 vm_swapfile_gc_thread(void)
757 boolean_t need_defragment
;
758 boolean_t need_reclaim
;
760 vm_swapfile_gc_thread_awakened
++;
761 vm_swapfile_gc_thread_running
= 1;
765 lck_mtx_lock(&vm_swap_data_lock
);
767 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
770 if (VM_SWAP_BUSY() || compressor_store_stop_compaction
== TRUE
)
773 need_defragment
= FALSE
;
774 need_reclaim
= FALSE
;
776 if (VM_SWAP_SHOULD_DEFRAGMENT())
777 need_defragment
= TRUE
;
779 if (VM_SWAP_SHOULD_RECLAIM()) {
780 need_defragment
= TRUE
;
783 if (need_defragment
== FALSE
&& need_reclaim
== FALSE
)
786 lck_mtx_unlock(&vm_swap_data_lock
);
788 if (need_defragment
== TRUE
)
789 vm_swap_defragment();
790 if (need_reclaim
== TRUE
)
793 vm_swapfile_gc_thread_running
= 0;
795 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
796 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
798 assert_wait((event_t
)&vm_swapfile_gc_needed
, THREAD_UNINT
);
800 lck_mtx_unlock(&vm_swap_data_lock
);
802 thread_block((thread_continue_t
)vm_swapfile_gc_thread
);
809 int swapper_entered_T0
= 0;
810 int swapper_entered_T1
= 0;
811 int swapper_entered_T2
= 0;
814 vm_swapout_thread_throttle_adjust(void)
816 int swapper_throttle_new
;
818 if (swapper_throttle_inited
== FALSE
) {
820 * force this thread to be set to the correct
823 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
824 swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
825 swapper_throttle_inited
= TRUE
;
826 swapper_entered_T2
++;
829 swapper_throttle_new
= swapper_throttle
;
832 switch(swapper_throttle
) {
834 case THROTTLE_LEVEL_COMPRESSOR_TIER2
:
836 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age
|| hibernate_flushing
== TRUE
) {
837 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
838 swapper_entered_T1
++;
843 case THROTTLE_LEVEL_COMPRESSOR_TIER1
:
845 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
846 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER0
;
847 swapper_entered_T0
++;
850 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age
== 0 && hibernate_flushing
== FALSE
) {
851 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
852 swapper_entered_T2
++;
857 case THROTTLE_LEVEL_COMPRESSOR_TIER0
:
859 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
860 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
861 swapper_entered_T2
++;
864 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
865 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
866 swapper_entered_T1
++;
872 if (swapper_throttle
!= swapper_throttle_new
) {
873 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
874 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, swapper_throttle_new
);
875 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
876 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
878 swapper_throttle
= swapper_throttle_new
;
883 int vm_swapout_found_empty
= 0;
886 vm_swapout_thread(void)
888 uint64_t f_offset
= 0;
890 c_segment_t c_seg
= NULL
;
891 kern_return_t kr
= KERN_SUCCESS
;
892 vm_offset_t addr
= 0;
894 current_thread()->options
|= TH_OPT_VMPRIV
;
896 vm_swapout_thread_awakened
++;
898 lck_mtx_lock_spin_always(c_list_lock
);
900 while (!queue_empty(&c_swapout_list_head
)) {
902 c_seg
= (c_segment_t
)queue_first(&c_swapout_list_head
);
904 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
906 assert(c_seg
->c_state
== C_ON_SWAPOUT_Q
);
909 lck_mtx_unlock_always(c_list_lock
);
911 c_seg_wait_on_busy(c_seg
);
913 lck_mtx_lock_spin_always(c_list_lock
);
917 vm_swapout_thread_processed_segments
++;
919 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
922 assert(c_seg
->c_bytes_used
== 0);
924 if (!c_seg
->c_on_minorcompact_q
)
925 c_seg_need_delayed_compaction(c_seg
, TRUE
);
927 c_seg_switch_state(c_seg
, C_IS_EMPTY
, FALSE
);
928 lck_mtx_unlock_always(&c_seg
->c_lock
);
929 lck_mtx_unlock_always(c_list_lock
);
931 vm_swapout_found_empty
++;
935 c_seg
->c_busy_swapping
= 1;
937 lck_mtx_unlock_always(c_list_lock
);
939 addr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
941 lck_mtx_unlock_always(&c_seg
->c_lock
);
943 #if CHECKSUM_THE_SWAP
944 c_seg
->cseg_hash
= hash_string((char*)addr
, (int)size
);
945 c_seg
->cseg_swap_size
= size
;
946 #endif /* CHECKSUM_THE_SWAP */
949 vm_swap_encrypt(c_seg
);
950 #endif /* ENCRYPTED_SWAP */
952 vm_swapout_thread_throttle_adjust();
954 kr
= vm_swap_put((vm_offset_t
) addr
, &f_offset
, size
, c_seg
);
956 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
958 if (kr
== KERN_SUCCESS
) {
959 kernel_memory_depopulate(compressor_map
, (vm_offset_t
) addr
, size
, KMA_COMPRESSOR
);
963 vm_swap_decrypt(c_seg
);
965 #endif /* ENCRYPTED_SWAP */
966 lck_mtx_lock_spin_always(c_list_lock
);
967 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
969 if (kr
== KERN_SUCCESS
) {
970 int new_state
= C_ON_SWAPPEDOUT_Q
;
971 boolean_t insert_head
= FALSE
;
973 if (hibernate_flushing
== TRUE
) {
974 if (c_seg
->c_generation_id
>= first_c_segment_to_warm_generation_id
&&
975 c_seg
->c_generation_id
<= last_c_segment_to_warm_generation_id
)
977 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg
))
978 new_state
= C_ON_SWAPPEDOUTSPARSE_Q
;
980 c_seg_switch_state(c_seg
, new_state
, insert_head
);
982 c_seg
->c_store
.c_swap_handle
= f_offset
;
984 VM_STAT_INCR_BY(swapouts
, size
>> PAGE_SHIFT
);
986 if (c_seg
->c_bytes_used
)
987 OSAddAtomic64(-c_seg
->c_bytes_used
, &compressor_bytes_used
);
989 if (c_seg
->c_overage_swap
== TRUE
) {
990 c_seg
->c_overage_swap
= FALSE
;
991 c_overage_swapped_count
--;
993 c_seg_switch_state(c_seg
, C_ON_AGE_Q
, FALSE
);
995 if (!c_seg
->c_on_minorcompact_q
&& C_SEG_UNUSED_BYTES(c_seg
) >= PAGE_SIZE
)
996 c_seg_need_delayed_compaction(c_seg
, TRUE
);
998 assert(c_seg
->c_busy_swapping
);
999 assert(c_seg
->c_busy
);
1001 c_seg
->c_busy_swapping
= 0;
1002 lck_mtx_unlock_always(c_list_lock
);
1004 C_SEG_WAKEUP_DONE(c_seg
);
1005 lck_mtx_unlock_always(&c_seg
->c_lock
);
1007 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
1009 vm_pageout_io_throttle();
1011 if (c_swapout_count
== 0)
1012 vm_swap_consider_defragmenting();
1014 lck_mtx_lock_spin_always(c_list_lock
);
1017 assert_wait((event_t
)&c_swapout_list_head
, THREAD_UNINT
);
1019 lck_mtx_unlock_always(c_list_lock
);
1021 thread_block((thread_continue_t
)vm_swapout_thread
);
1027 vm_swap_create_file()
1031 boolean_t swap_file_created
= FALSE
;
1032 boolean_t swap_file_reuse
= FALSE
;
1033 boolean_t swap_file_pin
= FALSE
;
1034 struct swapfile
*swf
= NULL
;
1037 * make sure we've got all the info we need
1038 * to potentially pin a swap file... we could
1039 * be swapping out due to hibernation w/o ever
1040 * having run vm_pageout_scan, which is normally
1041 * the trigger to do the init
1043 vm_compaction_swapper_do_init();
1046 * Any swapfile structure ready for re-use?
1049 lck_mtx_lock(&vm_swap_data_lock
);
1051 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1053 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1054 if (swf
->swp_flags
== SWAP_REUSE
) {
1055 swap_file_reuse
= TRUE
;
1058 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1061 lck_mtx_unlock(&vm_swap_data_lock
);
1063 if (swap_file_reuse
== FALSE
) {
1065 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
1067 swf
= (struct swapfile
*) kalloc(sizeof *swf
);
1068 memset(swf
, 0, sizeof(*swf
));
1070 swf
->swp_index
= vm_num_swap_files
+ 1;
1071 swf
->swp_pathlen
= namelen
;
1072 swf
->swp_path
= (char*)kalloc(swf
->swp_pathlen
);
1074 memset(swf
->swp_path
, 0, namelen
);
1076 snprintf(swf
->swp_path
, namelen
, "%s%d", swapfilename
, vm_num_swap_files
);
1079 vm_swapfile_open(swf
->swp_path
, &swf
->swp_vp
);
1081 if (swf
->swp_vp
== NULL
) {
1082 if (swap_file_reuse
== FALSE
) {
1083 kfree(swf
->swp_path
, swf
->swp_pathlen
);
1084 kfree(swf
, sizeof *swf
);
1088 vm_swapfile_can_be_created
= TRUE
;
1090 size
= MAX_SWAP_FILE_SIZE
;
1092 while (size
>= MIN_SWAP_FILE_SIZE
) {
1094 swap_file_pin
= VM_SWAP_SHOULD_PIN(size
);
1096 if (vm_swapfile_preallocate(swf
->swp_vp
, &size
, &swap_file_pin
) == 0) {
1098 int num_bytes_for_bitmap
= 0;
1100 swap_file_created
= TRUE
;
1102 swf
->swp_size
= size
;
1103 swf
->swp_nsegs
= (unsigned int) (size
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1104 swf
->swp_nseginuse
= 0;
1105 swf
->swp_free_hint
= 0;
1107 num_bytes_for_bitmap
= MAX((swf
->swp_nsegs
>> 3) , 1);
1109 * Allocate a bitmap that describes the
1110 * number of segments held by this swapfile.
1112 swf
->swp_bitmap
= (uint8_t*)kalloc(num_bytes_for_bitmap
);
1113 memset(swf
->swp_bitmap
, 0, num_bytes_for_bitmap
);
1115 swf
->swp_csegs
= (c_segment_t
*) kalloc(swf
->swp_nsegs
* sizeof(c_segment_t
));
1116 memset(swf
->swp_csegs
, 0, (swf
->swp_nsegs
* sizeof(c_segment_t
)));
1119 * passing a NULL trim_list into vnode_trim_list
1120 * will return ENOTSUP if trim isn't supported
1123 if (vnode_trim_list(swf
->swp_vp
, NULL
, FALSE
) == 0)
1124 swp_trim_supported
= TRUE
;
1126 lck_mtx_lock(&vm_swap_data_lock
);
1128 swf
->swp_flags
= SWAP_READY
;
1130 if (swap_file_reuse
== FALSE
) {
1131 queue_enter(&swf_global_queue
, swf
, struct swapfile
*, swp_queue
);
1134 vm_num_swap_files
++;
1136 vm_swapfile_total_segs_alloced
+= swf
->swp_nsegs
;
1138 if (swap_file_pin
== TRUE
) {
1139 vm_num_pinned_swap_files
++;
1140 swf
->swp_flags
|= SWAP_PINNED
;
1141 vm_swappin_avail
-= swf
->swp_size
;
1144 lck_mtx_unlock(&vm_swap_data_lock
);
1146 thread_wakeup((event_t
) &vm_num_swap_files
);
1148 if (vm_num_swap_files
== 1) {
1150 c_overage_swapped_limit
= (uint32_t)size
/ C_SEG_BUFSIZE
;
1152 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE
)
1153 c_overage_swapped_limit
/= 2;
1162 if (swap_file_created
== FALSE
) {
1164 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
1168 if (swap_file_reuse
== FALSE
) {
1169 kfree(swf
->swp_path
, swf
->swp_pathlen
);
1170 kfree(swf
, sizeof *swf
);
1173 return swap_file_created
;
1178 vm_swap_get(c_segment_t c_seg
, uint64_t f_offset
, uint64_t size
)
1180 struct swapfile
*swf
= NULL
;
1181 uint64_t file_offset
= 0;
1184 assert(c_seg
->c_store
.c_buffer
);
1186 lck_mtx_lock(&vm_swap_data_lock
);
1188 swf
= vm_swapfile_for_handle(f_offset
);
1190 if (swf
== NULL
|| ( !(swf
->swp_flags
& SWAP_READY
) && !(swf
->swp_flags
& SWAP_RECLAIM
))) {
1194 swf
->swp_io_count
++;
1196 lck_mtx_unlock(&vm_swap_data_lock
);
1198 #if DEVELOPMENT || DEBUG
1199 C_SEG_MAKE_WRITEABLE(c_seg
);
1201 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1202 retval
= vm_swapfile_io(swf
->swp_vp
, file_offset
, (uint64_t)c_seg
->c_store
.c_buffer
, (int)(size
/ PAGE_SIZE_64
), SWAP_READ
);
1204 #if DEVELOPMENT || DEBUG
1205 C_SEG_WRITE_PROTECT(c_seg
);
1208 VM_STAT_INCR_BY(swapins
, size
>> PAGE_SHIFT
);
1210 vm_swap_get_failures
++;
1213 * Free this slot in the swap structure.
1215 vm_swap_free(f_offset
);
1217 lck_mtx_lock(&vm_swap_data_lock
);
1218 swf
->swp_io_count
--;
1220 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1222 swf
->swp_flags
&= ~SWAP_WANTED
;
1223 thread_wakeup((event_t
) &swf
->swp_flags
);
1226 lck_mtx_unlock(&vm_swap_data_lock
);
1229 return KERN_SUCCESS
;
1231 return KERN_FAILURE
;
1235 vm_swap_put(vm_offset_t addr
, uint64_t *f_offset
, uint64_t size
, c_segment_t c_seg
)
1237 unsigned int segidx
= 0;
1238 struct swapfile
*swf
= NULL
;
1239 uint64_t file_offset
= 0;
1240 uint64_t swapfile_index
= 0;
1241 unsigned int byte_for_segidx
= 0;
1242 unsigned int offset_within_byte
= 0;
1243 boolean_t swf_eligible
= FALSE
;
1244 boolean_t waiting
= FALSE
;
1245 boolean_t retried
= FALSE
;
1250 if (addr
== 0 || f_offset
== NULL
) {
1251 return KERN_FAILURE
;
1254 lck_mtx_lock(&vm_swap_data_lock
);
1256 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1258 while(queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1260 segidx
= swf
->swp_free_hint
;
1262 swf_eligible
= (swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
< swf
->swp_nsegs
);
1266 while(segidx
< swf
->swp_nsegs
) {
1268 byte_for_segidx
= segidx
>> 3;
1269 offset_within_byte
= segidx
% 8;
1271 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1276 (swf
->swp_bitmap
)[byte_for_segidx
] |= (1 << offset_within_byte
);
1278 file_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
1279 swf
->swp_nseginuse
++;
1280 swf
->swp_io_count
++;
1281 swapfile_index
= swf
->swp_index
;
1283 vm_swapfile_total_segs_used
++;
1285 clock_get_system_nanotime(&sec
, &nsec
);
1287 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
)
1288 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1290 lck_mtx_unlock(&vm_swap_data_lock
);
1295 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1297 assert(queue_end(&swf_global_queue
, (queue_entry_t
) swf
));
1300 * we've run out of swap segments, but may not
1301 * be in a position to immediately create a new swap
1302 * file if we've recently failed to create due to a lack
1303 * of free space in the root filesystem... we'll try
1304 * to kick that create off, but in any event we're going
1305 * to take a breather (up to 1 second) so that we're not caught in a tight
1306 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1307 * segments into swap files only to have them immediately put back
1308 * on the c_age queue due to vm_swap_put failing.
1310 * if we're doing these puts due to a hibernation flush,
1311 * no need to block... setting hibernate_no_swapspace to TRUE,
1312 * will cause "vm_compressor_compact_and_swap" to immediately abort
1314 clock_get_system_nanotime(&sec
, &nsec
);
1316 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
)
1317 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1319 if (hibernate_flushing
== FALSE
|| VM_SWAP_SHOULD_CREATE(sec
)) {
1321 assert_wait_timeout((event_t
) &vm_num_swap_files
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
1323 hibernate_no_swapspace
= TRUE
;
1325 lck_mtx_unlock(&vm_swap_data_lock
);
1327 if (waiting
== TRUE
) {
1328 thread_block(THREAD_CONTINUE_NULL
);
1330 if (retried
== FALSE
&& hibernate_flushing
== TRUE
) {
1335 vm_swap_put_failures
++;
1337 return KERN_FAILURE
;
1340 assert(c_seg
->c_busy_swapping
);
1341 assert(c_seg
->c_busy
);
1342 assert(!c_seg
->c_on_minorcompact_q
);
1344 error
= vm_swapfile_io(swf
->swp_vp
, file_offset
, addr
, (int) (size
/ PAGE_SIZE_64
), SWAP_WRITE
);
1346 lck_mtx_lock(&vm_swap_data_lock
);
1348 swf
->swp_csegs
[segidx
] = c_seg
;
1350 swf
->swp_io_count
--;
1352 *f_offset
= (swapfile_index
<< SWAP_DEVICE_SHIFT
) | file_offset
;
1354 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1356 swf
->swp_flags
&= ~SWAP_WANTED
;
1357 thread_wakeup((event_t
) &swf
->swp_flags
);
1360 lck_mtx_unlock(&vm_swap_data_lock
);
1363 vm_swap_free(*f_offset
);
1365 vm_swap_put_failures
++;
1367 return KERN_FAILURE
;
1369 return KERN_SUCCESS
;
1375 vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
)
1377 uint64_t file_offset
= 0;
1378 unsigned int segidx
= 0;
1381 if ((swf
->swp_flags
& SWAP_READY
) || (swf
->swp_flags
& SWAP_RECLAIM
)) {
1383 unsigned int byte_for_segidx
= 0;
1384 unsigned int offset_within_byte
= 0;
1386 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1387 segidx
= (unsigned int) (file_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1389 byte_for_segidx
= segidx
>> 3;
1390 offset_within_byte
= segidx
% 8;
1392 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1394 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1396 swf
->swp_csegs
[segidx
] = NULL
;
1398 swf
->swp_nseginuse
--;
1399 vm_swapfile_total_segs_used
--;
1401 if (segidx
< swf
->swp_free_hint
) {
1402 swf
->swp_free_hint
= segidx
;
1405 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
)
1406 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1411 uint32_t vm_swap_free_now_count
= 0;
1412 uint32_t vm_swap_free_delayed_count
= 0;
1416 vm_swap_free(uint64_t f_offset
)
1418 struct swapfile
*swf
= NULL
;
1419 struct trim_list
*tl
= NULL
;
1423 if (swp_trim_supported
== TRUE
)
1424 tl
= kalloc(sizeof(struct trim_list
));
1426 lck_mtx_lock(&vm_swap_data_lock
);
1428 swf
= vm_swapfile_for_handle(f_offset
);
1430 if (swf
&& (swf
->swp_flags
& (SWAP_READY
| SWAP_RECLAIM
))) {
1432 if (swp_trim_supported
== FALSE
|| (swf
->swp_flags
& SWAP_RECLAIM
)) {
1434 * don't delay the free if the underlying disk doesn't support
1435 * trim, or we're in the midst of reclaiming this swap file since
1436 * we don't want to move segments that are technically free
1437 * but not yet handled by the delayed free mechanism
1439 vm_swap_free_now(swf
, f_offset
);
1441 vm_swap_free_now_count
++;
1444 tl
->tl_offset
= f_offset
& SWAP_SLOT_MASK
;
1445 tl
->tl_length
= COMPRESSED_SWAP_CHUNK_SIZE
;
1447 tl
->tl_next
= swf
->swp_delayed_trim_list_head
;
1448 swf
->swp_delayed_trim_list_head
= tl
;
1449 swf
->swp_delayed_trim_count
++;
1452 if (VM_SWAP_SHOULD_TRIM(swf
) && !vm_swapfile_create_thread_running
) {
1453 clock_get_system_nanotime(&sec
, &nsec
);
1455 if (sec
> dont_trim_until_ts
)
1456 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1458 vm_swap_free_delayed_count
++;
1461 lck_mtx_unlock(&vm_swap_data_lock
);
1464 kfree(tl
, sizeof(struct trim_list
));
1469 vm_swap_wait_on_trim_handling_in_progress()
1471 while (delayed_trim_handling_in_progress
== TRUE
) {
1473 assert_wait((event_t
) &delayed_trim_handling_in_progress
, THREAD_UNINT
);
1474 lck_mtx_unlock(&vm_swap_data_lock
);
1476 thread_block(THREAD_CONTINUE_NULL
);
1478 lck_mtx_lock(&vm_swap_data_lock
);
1484 vm_swap_handle_delayed_trims(boolean_t force_now
)
1486 struct swapfile
*swf
= NULL
;
1489 * serialize the race between us and vm_swap_reclaim...
1490 * if vm_swap_reclaim wins it will turn off SWAP_READY
1491 * on the victim it has chosen... we can just skip over
1492 * that file since vm_swap_reclaim will first process
1493 * all of the delayed trims associated with it
1495 lck_mtx_lock(&vm_swap_data_lock
);
1497 delayed_trim_handling_in_progress
= TRUE
;
1499 lck_mtx_unlock(&vm_swap_data_lock
);
1502 * no need to hold the lock to walk the swf list since
1503 * vm_swap_create (the only place where we add to this list)
1504 * is run on the same thread as this function
1505 * and vm_swap_reclaim doesn't remove items from this list
1506 * instead marking them with SWAP_REUSE for future re-use
1508 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1510 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1512 if ((swf
->swp_flags
& SWAP_READY
) && (force_now
== TRUE
|| VM_SWAP_SHOULD_TRIM(swf
))) {
1514 assert(!(swf
->swp_flags
& SWAP_RECLAIM
));
1515 vm_swap_do_delayed_trim(swf
);
1517 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1519 lck_mtx_lock(&vm_swap_data_lock
);
1521 delayed_trim_handling_in_progress
= FALSE
;
1522 thread_wakeup((event_t
) &delayed_trim_handling_in_progress
);
1524 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
)
1525 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1527 lck_mtx_unlock(&vm_swap_data_lock
);
1532 vm_swap_do_delayed_trim(struct swapfile
*swf
)
1534 struct trim_list
*tl
, *tl_head
;
1536 lck_mtx_lock(&vm_swap_data_lock
);
1538 tl_head
= swf
->swp_delayed_trim_list_head
;
1539 swf
->swp_delayed_trim_list_head
= NULL
;
1540 swf
->swp_delayed_trim_count
= 0;
1542 lck_mtx_unlock(&vm_swap_data_lock
);
1544 vnode_trim_list(swf
->swp_vp
, tl_head
, TRUE
);
1546 while ((tl
= tl_head
) != NULL
) {
1547 unsigned int segidx
= 0;
1548 unsigned int byte_for_segidx
= 0;
1549 unsigned int offset_within_byte
= 0;
1551 lck_mtx_lock(&vm_swap_data_lock
);
1553 segidx
= (unsigned int) (tl
->tl_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1555 byte_for_segidx
= segidx
>> 3;
1556 offset_within_byte
= segidx
% 8;
1558 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1560 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1562 swf
->swp_csegs
[segidx
] = NULL
;
1564 swf
->swp_nseginuse
--;
1565 vm_swapfile_total_segs_used
--;
1567 if (segidx
< swf
->swp_free_hint
) {
1568 swf
->swp_free_hint
= segidx
;
1571 lck_mtx_unlock(&vm_swap_data_lock
);
1573 tl_head
= tl
->tl_next
;
1575 kfree(tl
, sizeof(struct trim_list
));
1586 int vm_swap_reclaim_yielded
= 0;
1589 vm_swap_reclaim(void)
1591 vm_offset_t addr
= 0;
1592 unsigned int segidx
= 0;
1593 uint64_t f_offset
= 0;
1594 struct swapfile
*swf
= NULL
;
1595 struct swapfile
*smallest_swf
= NULL
;
1596 unsigned int min_nsegs
= 0;
1597 unsigned int byte_for_segidx
= 0;
1598 unsigned int offset_within_byte
= 0;
1599 uint32_t c_size
= 0;
1601 c_segment_t c_seg
= NULL
;
1603 if (kernel_memory_allocate(compressor_map
, (vm_offset_t
*)(&addr
), C_SEG_BUFSIZE
, 0, KMA_KOBJECT
, VM_KERN_MEMORY_COMPRESSOR
) != KERN_SUCCESS
) {
1604 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1607 lck_mtx_lock(&vm_swap_data_lock
);
1610 * if we're running the swapfile list looking for
1611 * candidates with delayed trims, we need to
1612 * wait before making our decision concerning
1613 * the swapfile we want to reclaim
1615 vm_swap_wait_on_trim_handling_in_progress();
1618 * from here until we knock down the SWAP_READY bit,
1619 * we need to remain behind the vm_swap_data_lock...
1620 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1621 * will not consider this swapfile for processing
1623 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1624 min_nsegs
= MAX_SWAP_FILE_SIZE
/ COMPRESSED_SWAP_CHUNK_SIZE
;
1625 smallest_swf
= NULL
;
1627 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1629 if ((swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
<= min_nsegs
)) {
1632 min_nsegs
= swf
->swp_nseginuse
;
1634 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1637 if (smallest_swf
== NULL
)
1643 swf
->swp_flags
&= ~SWAP_READY
;
1644 swf
->swp_flags
|= SWAP_RECLAIM
;
1646 if (swf
->swp_delayed_trim_count
) {
1648 lck_mtx_unlock(&vm_swap_data_lock
);
1650 vm_swap_do_delayed_trim(swf
);
1652 lck_mtx_lock(&vm_swap_data_lock
);
1656 while (segidx
< swf
->swp_nsegs
) {
1660 * Wait for outgoing I/Os.
1662 while (swf
->swp_io_count
) {
1664 swf
->swp_flags
|= SWAP_WANTED
;
1666 assert_wait((event_t
) &swf
->swp_flags
, THREAD_UNINT
);
1667 lck_mtx_unlock(&vm_swap_data_lock
);
1669 thread_block(THREAD_CONTINUE_NULL
);
1671 lck_mtx_lock(&vm_swap_data_lock
);
1673 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1674 vm_swap_reclaim_yielded
++;
1678 byte_for_segidx
= segidx
>> 3;
1679 offset_within_byte
= segidx
% 8;
1681 if (((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) == 0) {
1687 c_seg
= swf
->swp_csegs
[segidx
];
1690 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1692 if (c_seg
->c_busy
) {
1694 * a swapped out c_segment in the process of being freed will remain in the
1695 * busy state until after the vm_swap_free is called on it... vm_swap_free
1696 * takes the vm_swap_data_lock, so can't change the swap state until after
1697 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1698 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1699 * at that point, we re-look up the swap state which will now indicate that
1700 * this c_segment no longer exists.
1702 c_seg
->c_wanted
= 1;
1704 assert_wait((event_t
) (c_seg
), THREAD_UNINT
);
1705 lck_mtx_unlock_always(&c_seg
->c_lock
);
1707 lck_mtx_unlock(&vm_swap_data_lock
);
1709 thread_block(THREAD_CONTINUE_NULL
);
1711 lck_mtx_lock(&vm_swap_data_lock
);
1713 goto ReTry_for_cseg
;
1715 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1717 f_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
1719 assert(c_seg
== swf
->swp_csegs
[segidx
]);
1720 swf
->swp_csegs
[segidx
] = NULL
;
1721 swf
->swp_nseginuse
--;
1723 vm_swapfile_total_segs_used
--;
1725 lck_mtx_unlock(&vm_swap_data_lock
);
1727 assert(C_SEG_IS_ONDISK(c_seg
));
1730 c_seg
->c_busy_swapping
= 1;
1731 #if !CHECKSUM_THE_SWAP
1732 c_seg_trim_tail(c_seg
);
1734 c_size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
1736 assert(c_size
<= C_SEG_BUFSIZE
&& c_size
);
1738 lck_mtx_unlock_always(&c_seg
->c_lock
);
1740 if (vm_swapfile_io(swf
->swp_vp
, f_offset
, addr
, (int)(c_size
/ PAGE_SIZE_64
), SWAP_READ
)) {
1743 * reading the data back in failed, so convert c_seg
1744 * to a swapped in c_segment that contains no data
1746 c_seg_swapin_requeue(c_seg
, FALSE
, TRUE
, FALSE
);
1748 * returns with c_busy_swapping cleared
1751 vm_swap_get_failures
++;
1752 goto swap_io_failed
;
1754 VM_STAT_INCR_BY(swapins
, c_size
>> PAGE_SHIFT
);
1756 if (vm_swap_put(addr
, &f_offset
, c_size
, c_seg
)) {
1757 vm_offset_t c_buffer
;
1760 * the put failed, so convert c_seg to a fully swapped in c_segment
1763 c_buffer
= (vm_offset_t
)C_SEG_BUFFER_ADDRESS(c_seg
->c_mysegno
);
1765 kernel_memory_populate(compressor_map
, c_buffer
, c_size
, KMA_COMPRESSOR
, VM_KERN_MEMORY_COMPRESSOR
);
1767 memcpy((char *)c_buffer
, (char *)addr
, c_size
);
1769 c_seg
->c_store
.c_buffer
= (int32_t *)c_buffer
;
1771 vm_swap_decrypt(c_seg
);
1772 #endif /* ENCRYPTED_SWAP */
1773 c_seg_swapin_requeue(c_seg
, TRUE
, TRUE
, FALSE
);
1775 * returns with c_busy_swapping cleared
1777 OSAddAtomic64(c_seg
->c_bytes_used
, &compressor_bytes_used
);
1779 goto swap_io_failed
;
1781 VM_STAT_INCR_BY(swapouts
, c_size
>> PAGE_SHIFT
);
1783 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1785 assert(C_SEG_IS_ONDISK(c_seg
));
1787 * The c_seg will now know about the new location on disk.
1789 c_seg
->c_store
.c_swap_handle
= f_offset
;
1791 assert(c_seg
->c_busy_swapping
);
1792 c_seg
->c_busy_swapping
= 0;
1794 assert(c_seg
->c_busy
);
1795 C_SEG_WAKEUP_DONE(c_seg
);
1797 lck_mtx_unlock_always(&c_seg
->c_lock
);
1798 lck_mtx_lock(&vm_swap_data_lock
);
1801 if (swf
->swp_nseginuse
) {
1803 swf
->swp_flags
&= ~SWAP_RECLAIM
;
1804 swf
->swp_flags
|= SWAP_READY
;
1809 * We don't remove this inactive swf from the queue.
1810 * That way, we can re-use it when needed again and
1811 * preserve the namespace. The delayed_trim processing
1812 * is also dependent on us not removing swfs from the queue.
1814 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1816 vm_num_swap_files
--;
1818 vm_swapfile_total_segs_alloced
-= swf
->swp_nsegs
;
1820 lck_mtx_unlock(&vm_swap_data_lock
);
1822 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
1824 kfree(swf
->swp_csegs
, swf
->swp_nsegs
* sizeof(c_segment_t
));
1825 kfree(swf
->swp_bitmap
, MAX((swf
->swp_nsegs
>> 3), 1));
1827 lck_mtx_lock(&vm_swap_data_lock
);
1829 if (swf
->swp_flags
& SWAP_PINNED
) {
1830 vm_num_pinned_swap_files
--;
1831 vm_swappin_avail
+= swf
->swp_size
;
1836 swf
->swp_free_hint
= 0;
1838 swf
->swp_flags
= SWAP_REUSE
;
1841 thread_wakeup((event_t
) &swf
->swp_flags
);
1842 lck_mtx_unlock(&vm_swap_data_lock
);
1844 kmem_free(compressor_map
, (vm_offset_t
) addr
, C_SEG_BUFSIZE
);
1849 vm_swap_get_total_space(void)
1851 uint64_t total_space
= 0;
1853 total_space
= (uint64_t)vm_swapfile_total_segs_alloced
* COMPRESSED_SWAP_CHUNK_SIZE
;
1859 vm_swap_get_used_space(void)
1861 uint64_t used_space
= 0;
1863 used_space
= (uint64_t)vm_swapfile_total_segs_used
* COMPRESSED_SWAP_CHUNK_SIZE
;
1869 vm_swap_get_free_space(void)
1871 return (vm_swap_get_total_space() - vm_swap_get_used_space());
1876 vm_swap_low_on_space(void)
1879 if (vm_num_swap_files
== 0 && vm_swapfile_can_be_created
== FALSE
)
1882 if (((vm_swapfile_total_segs_alloced
- vm_swapfile_total_segs_used
) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS
) / 8)) {
1884 if (vm_num_swap_files
== 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1887 if (vm_swapfile_last_failed_to_create_ts
>= vm_swapfile_last_successful_create_ts
)
1894 vm_swap_files_pinned(void)
1898 if (vm_swappin_enabled
== FALSE
)
1901 result
= (vm_num_pinned_swap_files
== vm_num_swap_files
);