2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_protos.h>
32 #include <IOKit/IOHibernatePrivate.h>
34 #include <kern/policy_internal.h>
36 boolean_t compressor_store_stop_compaction
= FALSE
;
37 boolean_t vm_swapfile_create_needed
= FALSE
;
38 boolean_t vm_swapfile_gc_needed
= FALSE
;
40 int swapper_throttle
= -1;
41 boolean_t swapper_throttle_inited
= FALSE
;
42 uint64_t vm_swapout_thread_id
;
44 uint64_t vm_swap_put_failures
= 0;
45 uint64_t vm_swap_get_failures
= 0;
46 int vm_num_swap_files
= 0;
47 int vm_num_pinned_swap_files
= 0;
48 int vm_swapout_thread_processed_segments
= 0;
49 int vm_swapout_thread_awakened
= 0;
50 int vm_swapfile_create_thread_awakened
= 0;
51 int vm_swapfile_create_thread_running
= 0;
52 int vm_swapfile_gc_thread_awakened
= 0;
53 int vm_swapfile_gc_thread_running
= 0;
55 int64_t vm_swappin_avail
= 0;
56 boolean_t vm_swappin_enabled
= FALSE
;
57 unsigned int vm_swapfile_total_segs_alloced
= 0;
58 unsigned int vm_swapfile_total_segs_used
= 0;
60 extern vm_map_t compressor_map
;
63 #define SWAP_READY 0x1 /* Swap file is ready to be used */
64 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
65 #define SWAP_WANTED 0x4 /* Swap file has waiters */
66 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
67 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
71 queue_head_t swp_queue
; /* list of swap files */
72 char *swp_path
; /* saved pathname of swap file */
73 struct vnode
*swp_vp
; /* backing vnode */
74 uint64_t swp_size
; /* size of this swap file */
75 uint8_t *swp_bitmap
; /* bitmap showing the alloced/freed slots in the swap file */
76 unsigned int swp_pathlen
; /* length of pathname */
77 unsigned int swp_nsegs
; /* #segments we can use */
78 unsigned int swp_nseginuse
; /* #segments in use */
79 unsigned int swp_index
; /* index of this swap file */
80 unsigned int swp_flags
; /* state of swap file */
81 unsigned int swp_free_hint
; /* offset of 1st free chunk */
82 unsigned int swp_io_count
; /* count of outstanding I/Os */
83 c_segment_t
*swp_csegs
; /* back pointers to the c_segments. Used during swap reclaim. */
85 struct trim_list
*swp_delayed_trim_list_head
;
86 unsigned int swp_delayed_trim_count
;
89 queue_head_t swf_global_queue
;
90 boolean_t swp_trim_supported
= FALSE
;
92 extern clock_sec_t dont_trim_until_ts
;
93 clock_sec_t vm_swapfile_last_failed_to_create_ts
= 0;
94 clock_sec_t vm_swapfile_last_successful_create_ts
= 0;
95 int vm_swapfile_can_be_created
= FALSE
;
96 boolean_t delayed_trim_handling_in_progress
= FALSE
;
98 boolean_t hibernate_in_progress_with_pinned_swap
= FALSE
;
100 static void vm_swapout_thread_throttle_adjust(void);
101 static void vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
);
102 static void vm_swapout_thread(void);
103 static void vm_swapfile_create_thread(void);
104 static void vm_swapfile_gc_thread(void);
105 static void vm_swap_defragment();
106 static void vm_swap_handle_delayed_trims(boolean_t
);
107 static void vm_swap_do_delayed_trim();
108 static void vm_swap_wait_on_trim_handling_in_progress(void);
112 #define VM_MAX_SWAP_FILE_NUM 100
113 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
115 #define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
116 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
117 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
118 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
119 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
120 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
121 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
124 #define VM_SWAPFILE_DELAYED_CREATE 15
126 #define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
129 #if CHECKSUM_THE_SWAP
130 extern unsigned int hash_string(char *cp
, int len
);
133 #if RECORD_THE_COMPRESSED_DATA
134 boolean_t c_compressed_record_init_done
= FALSE
;
135 int c_compressed_record_write_error
= 0;
136 struct vnode
*c_compressed_record_vp
= NULL
;
137 uint64_t c_compressed_record_file_offset
= 0;
138 void c_compressed_record_init(void);
139 void c_compressed_record_write(char *, int);
143 extern boolean_t swap_crypt_ctx_initialized
;
144 extern void swap_crypt_ctx_initialize(void);
145 extern const unsigned char swap_crypt_null_iv
[AES_BLOCK_SIZE
];
146 extern aes_ctx swap_crypt_ctx
;
147 extern unsigned long vm_page_encrypt_counter
;
148 extern unsigned long vm_page_decrypt_counter
;
149 #endif /* ENCRYPTED_SWAP */
151 extern void vm_pageout_io_throttle(void);
153 static struct swapfile
*vm_swapfile_for_handle(uint64_t);
156 * Called with the vm_swap_data_lock held.
159 static struct swapfile
*
160 vm_swapfile_for_handle(uint64_t f_offset
)
163 uint64_t file_offset
= 0;
164 unsigned int swapfile_index
= 0;
165 struct swapfile
* swf
= NULL
;
167 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
168 swapfile_index
= (f_offset
>> SWAP_DEVICE_SHIFT
);
170 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
172 while(queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
174 if (swapfile_index
== swf
->swp_index
) {
178 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
181 if (queue_end(&swf_global_queue
, (queue_entry_t
) swf
)) {
189 vm_compressor_swap_init()
191 thread_t thread
= NULL
;
193 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr
);
194 lck_grp_init(&vm_swap_data_lock_grp
,
196 &vm_swap_data_lock_grp_attr
);
197 lck_attr_setdefault(&vm_swap_data_lock_attr
);
198 lck_mtx_init_ext(&vm_swap_data_lock
,
199 &vm_swap_data_lock_ext
,
200 &vm_swap_data_lock_grp
,
201 &vm_swap_data_lock_attr
);
203 queue_init(&swf_global_queue
);
206 if (kernel_thread_start_priority((thread_continue_t
)vm_swapout_thread
, NULL
,
207 BASEPRI_PREEMPT
- 1, &thread
) != KERN_SUCCESS
) {
208 panic("vm_swapout_thread: create failed");
210 vm_swapout_thread_id
= thread
->thread_id
;
212 thread_deallocate(thread
);
214 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_create_thread
, NULL
,
215 BASEPRI_PREEMPT
- 1, &thread
) != KERN_SUCCESS
) {
216 panic("vm_swapfile_create_thread: create failed");
219 thread_deallocate(thread
);
221 if (kernel_thread_start_priority((thread_continue_t
)vm_swapfile_gc_thread
, NULL
,
222 BASEPRI_PREEMPT
- 1, &thread
) != KERN_SUCCESS
) {
223 panic("vm_swapfile_gc_thread: create failed");
225 thread_deallocate(thread
);
227 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
228 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, THROTTLE_LEVEL_COMPRESSOR_TIER2
);
229 proc_set_thread_policy_with_tid(kernel_task
, thread
->thread_id
,
230 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
233 if (swap_crypt_ctx_initialized
== FALSE
) {
234 swap_crypt_ctx_initialize();
236 #endif /* ENCRYPTED_SWAP */
238 memset(swapfilename
, 0, MAX_SWAPFILENAME_LEN
+ 1);
240 printf("VM Swap Subsystem is ON\n");
244 #if RECORD_THE_COMPRESSED_DATA
247 c_compressed_record_init()
249 if (c_compressed_record_init_done
== FALSE
) {
250 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp
);
251 c_compressed_record_init_done
= TRUE
;
256 c_compressed_record_write(char *buf
, int size
)
258 if (c_compressed_record_write_error
== 0) {
259 c_compressed_record_write_error
= vm_record_file_write(c_compressed_record_vp
, c_compressed_record_file_offset
, buf
, size
);
260 c_compressed_record_file_offset
+= size
;
266 int compaction_swapper_inited
= 0;
269 vm_compaction_swapper_do_init(void)
275 if (compaction_swapper_inited
)
278 if (vm_compressor_mode
!= VM_PAGER_COMPRESSOR_WITH_SWAP
) {
279 compaction_swapper_inited
= 1;
282 lck_mtx_lock(&vm_swap_data_lock
);
284 if ( !compaction_swapper_inited
) {
286 if (strlen(swapfilename
) == 0) {
288 * If no swapfile name has been set, we'll
289 * use the default name.
291 * Also, this function is only called from the vm_pageout_scan thread
292 * via vm_consider_waking_compactor_swapper,
293 * so we don't need to worry about a race in checking/setting the name here.
295 strlcpy(swapfilename
, SWAP_FILE_NAME
, MAX_SWAPFILENAME_LEN
);
297 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
298 pathname
= (char*)kalloc(namelen
);
299 memset(pathname
, 0, namelen
);
300 snprintf(pathname
, namelen
, "%s%d", swapfilename
, 0);
302 vm_swapfile_open(pathname
, &vp
);
306 if (vnode_pager_isSSD(vp
) == FALSE
) {
307 vm_compressor_minorcompact_threshold_divisor
= 18;
308 vm_compressor_majorcompact_threshold_divisor
= 22;
309 vm_compressor_unthrottle_threshold_divisor
= 32;
311 vnode_setswapmount(vp
);
312 vm_swappin_avail
= vnode_getswappin_avail(vp
);
314 if (vm_swappin_avail
)
315 vm_swappin_enabled
= TRUE
;
316 vm_swapfile_close((uint64_t)pathname
, vp
);
318 kfree(pathname
, namelen
);
320 compaction_swapper_inited
= 1;
322 lck_mtx_unlock(&vm_swap_data_lock
);
329 vm_swap_encrypt(c_segment_t c_seg
)
331 vm_offset_t kernel_vaddr
= 0;
335 unsigned char aes_iv
[AES_BLOCK_SIZE
];
339 assert(swap_crypt_ctx_initialized
);
341 #if DEVELOPMENT || DEBUG
342 C_SEG_MAKE_WRITEABLE(c_seg
);
344 bzero(&encrypt_iv
.aes_iv
[0], sizeof (encrypt_iv
.aes_iv
));
346 encrypt_iv
.c_seg
= (void*)c_seg
;
348 /* encrypt the "initial vector" */
349 aes_encrypt_cbc((const unsigned char *) &encrypt_iv
.aes_iv
[0],
352 &encrypt_iv
.aes_iv
[0],
353 &swap_crypt_ctx
.encrypt
);
355 kernel_vaddr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
356 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
359 * Encrypt the c_segment.
361 aes_encrypt_cbc((const unsigned char *) kernel_vaddr
,
362 &encrypt_iv
.aes_iv
[0],
363 (unsigned int)(size
/ AES_BLOCK_SIZE
),
364 (unsigned char *) kernel_vaddr
,
365 &swap_crypt_ctx
.encrypt
);
367 vm_page_encrypt_counter
+= (size
/PAGE_SIZE_64
);
369 #if DEVELOPMENT || DEBUG
370 C_SEG_WRITE_PROTECT(c_seg
);
375 vm_swap_decrypt(c_segment_t c_seg
)
378 vm_offset_t kernel_vaddr
= 0;
382 unsigned char aes_iv
[AES_BLOCK_SIZE
];
387 assert(swap_crypt_ctx_initialized
);
389 #if DEVELOPMENT || DEBUG
390 C_SEG_MAKE_WRITEABLE(c_seg
);
393 * Prepare an "initial vector" for the decryption.
394 * It has to be the same as the "initial vector" we
395 * used to encrypt that page.
397 bzero(&decrypt_iv
.aes_iv
[0], sizeof (decrypt_iv
.aes_iv
));
399 decrypt_iv
.c_seg
= (void*)c_seg
;
401 /* encrypt the "initial vector" */
402 aes_encrypt_cbc((const unsigned char *) &decrypt_iv
.aes_iv
[0],
405 &decrypt_iv
.aes_iv
[0],
406 &swap_crypt_ctx
.encrypt
);
408 kernel_vaddr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
409 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
412 * Decrypt the c_segment.
414 aes_decrypt_cbc((const unsigned char *) kernel_vaddr
,
415 &decrypt_iv
.aes_iv
[0],
416 (unsigned int) (size
/ AES_BLOCK_SIZE
),
417 (unsigned char *) kernel_vaddr
,
418 &swap_crypt_ctx
.decrypt
);
420 vm_page_decrypt_counter
+= (size
/PAGE_SIZE_64
);
422 #if DEVELOPMENT || DEBUG
423 C_SEG_WRITE_PROTECT(c_seg
);
426 #endif /* ENCRYPTED_SWAP */
430 vm_swap_consider_defragmenting()
432 if (compressor_store_stop_compaction
== FALSE
&& !VM_SWAP_BUSY() &&
433 (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
435 if (!vm_swapfile_gc_thread_running
) {
436 lck_mtx_lock(&vm_swap_data_lock
);
438 if (!vm_swapfile_gc_thread_running
)
439 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
441 lck_mtx_unlock(&vm_swap_data_lock
);
447 int vm_swap_defragment_yielded
= 0;
448 int vm_swap_defragment_swapin
= 0;
449 int vm_swap_defragment_free
= 0;
450 int vm_swap_defragment_busy
= 0;
459 * have to grab the master lock w/o holding
460 * any locks in spin mode
462 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
464 lck_mtx_lock_spin_always(c_list_lock
);
466 while (!queue_empty(&c_swappedout_sparse_list_head
)) {
468 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_BUSY()) {
469 vm_swap_defragment_yielded
++;
472 c_seg
= (c_segment_t
)queue_first(&c_swappedout_sparse_list_head
);
474 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
476 assert(c_seg
->c_state
== C_ON_SWAPPEDOUTSPARSE_Q
);
479 lck_mtx_unlock_always(c_list_lock
);
481 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
483 * c_seg_wait_on_busy consumes c_seg->c_lock
485 c_seg_wait_on_busy(c_seg
);
487 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
489 lck_mtx_lock_spin_always(c_list_lock
);
491 vm_swap_defragment_busy
++;
494 if (c_seg
->c_bytes_used
== 0) {
496 * c_seg_free_locked consumes the c_list_lock
500 c_seg_free_locked(c_seg
);
502 vm_swap_defragment_free
++;
504 lck_mtx_unlock_always(c_list_lock
);
506 if (c_seg_swapin(c_seg
, TRUE
, FALSE
) == 0)
507 lck_mtx_unlock_always(&c_seg
->c_lock
);
509 vm_swap_defragment_swapin
++;
511 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
513 vm_pageout_io_throttle();
516 * because write waiters have privilege over readers,
517 * dropping and immediately retaking the master lock will
518 * still allow any thread waiting to acquire the
519 * master lock exclusively an opportunity to take it
521 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
523 lck_mtx_lock_spin_always(c_list_lock
);
525 lck_mtx_unlock_always(c_list_lock
);
527 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
533 vm_swapfile_create_thread(void)
538 current_thread()->options
|= TH_OPT_VMPRIV
;
540 vm_swapfile_create_thread_awakened
++;
541 vm_swapfile_create_thread_running
= 1;
545 * walk through the list of swap files
546 * and do the delayed frees/trims for
547 * any swap file whose count of delayed
548 * frees is above the batch limit
550 vm_swap_handle_delayed_trims(FALSE
);
552 lck_mtx_lock(&vm_swap_data_lock
);
554 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
557 clock_get_system_nanotime(&sec
, &nsec
);
559 if (VM_SWAP_SHOULD_CREATE(sec
) == 0)
562 lck_mtx_unlock(&vm_swap_data_lock
);
564 if (vm_swap_create_file() == FALSE
) {
565 vm_swapfile_last_failed_to_create_ts
= sec
;
566 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec
);
569 vm_swapfile_last_successful_create_ts
= sec
;
571 vm_swapfile_create_thread_running
= 0;
573 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
574 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
576 assert_wait((event_t
)&vm_swapfile_create_needed
, THREAD_UNINT
);
578 lck_mtx_unlock(&vm_swap_data_lock
);
580 thread_block((thread_continue_t
)vm_swapfile_create_thread
);
589 hibernate_pin_swap(boolean_t start
)
591 vm_compaction_swapper_do_init();
593 if (start
== FALSE
) {
595 lck_mtx_lock(&vm_swap_data_lock
);
596 hibernate_in_progress_with_pinned_swap
= FALSE
;
597 lck_mtx_unlock(&vm_swap_data_lock
);
599 return (KERN_SUCCESS
);
601 if (vm_swappin_enabled
== FALSE
)
602 return (KERN_SUCCESS
);
604 lck_mtx_lock(&vm_swap_data_lock
);
606 hibernate_in_progress_with_pinned_swap
= TRUE
;
608 while (vm_swapfile_create_thread_running
|| vm_swapfile_gc_thread_running
) {
610 assert_wait((event_t
)&hibernate_in_progress_with_pinned_swap
, THREAD_UNINT
);
612 lck_mtx_unlock(&vm_swap_data_lock
);
614 thread_block(THREAD_CONTINUE_NULL
);
616 lck_mtx_lock(&vm_swap_data_lock
);
618 if (vm_num_swap_files
> vm_num_pinned_swap_files
) {
619 hibernate_in_progress_with_pinned_swap
= FALSE
;
620 lck_mtx_unlock(&vm_swap_data_lock
);
622 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
623 vm_num_swap_files
, vm_num_pinned_swap_files
);
624 return (KERN_FAILURE
);
626 lck_mtx_unlock(&vm_swap_data_lock
);
628 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE
)) {
629 if (vm_swap_create_file() == FALSE
)
632 return (KERN_SUCCESS
);
637 vm_swapfile_gc_thread(void)
640 boolean_t need_defragment
;
641 boolean_t need_reclaim
;
643 vm_swapfile_gc_thread_awakened
++;
644 vm_swapfile_gc_thread_running
= 1;
648 lck_mtx_lock(&vm_swap_data_lock
);
650 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
653 if (VM_SWAP_BUSY() || compressor_store_stop_compaction
== TRUE
)
656 need_defragment
= FALSE
;
657 need_reclaim
= FALSE
;
659 if (VM_SWAP_SHOULD_DEFRAGMENT())
660 need_defragment
= TRUE
;
662 if (VM_SWAP_SHOULD_RECLAIM()) {
663 need_defragment
= TRUE
;
666 if (need_defragment
== FALSE
&& need_reclaim
== FALSE
)
669 lck_mtx_unlock(&vm_swap_data_lock
);
671 if (need_defragment
== TRUE
)
672 vm_swap_defragment();
673 if (need_reclaim
== TRUE
)
676 vm_swapfile_gc_thread_running
= 0;
678 if (hibernate_in_progress_with_pinned_swap
== TRUE
)
679 thread_wakeup((event_t
)&hibernate_in_progress_with_pinned_swap
);
681 assert_wait((event_t
)&vm_swapfile_gc_needed
, THREAD_UNINT
);
683 lck_mtx_unlock(&vm_swap_data_lock
);
685 thread_block((thread_continue_t
)vm_swapfile_gc_thread
);
692 int swapper_entered_T0
= 0;
693 int swapper_entered_T1
= 0;
694 int swapper_entered_T2
= 0;
697 vm_swapout_thread_throttle_adjust(void)
699 int swapper_throttle_new
;
701 if (swapper_throttle_inited
== FALSE
) {
703 * force this thread to be set to the correct
706 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
707 swapper_throttle
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
708 swapper_throttle_inited
= TRUE
;
709 swapper_entered_T2
++;
712 swapper_throttle_new
= swapper_throttle
;
715 switch(swapper_throttle
) {
717 case THROTTLE_LEVEL_COMPRESSOR_TIER2
:
719 if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age
|| hibernate_flushing
== TRUE
) {
720 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
721 swapper_entered_T1
++;
726 case THROTTLE_LEVEL_COMPRESSOR_TIER1
:
728 if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) {
729 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER0
;
730 swapper_entered_T0
++;
733 if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age
== 0 && hibernate_flushing
== FALSE
) {
734 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
735 swapper_entered_T2
++;
740 case THROTTLE_LEVEL_COMPRESSOR_TIER0
:
742 if (COMPRESSOR_NEEDS_TO_SWAP() == 0) {
743 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER2
;
744 swapper_entered_T2
++;
747 if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) {
748 swapper_throttle_new
= THROTTLE_LEVEL_COMPRESSOR_TIER1
;
749 swapper_entered_T1
++;
755 if (swapper_throttle
!= swapper_throttle_new
) {
756 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
757 TASK_POLICY_INTERNAL
, TASK_POLICY_IO
, swapper_throttle_new
);
758 proc_set_thread_policy_with_tid(kernel_task
, vm_swapout_thread_id
,
759 TASK_POLICY_INTERNAL
, TASK_POLICY_PASSIVE_IO
, TASK_POLICY_ENABLE
);
761 swapper_throttle
= swapper_throttle_new
;
766 int vm_swapout_found_empty
= 0;
769 vm_swapout_thread(void)
771 uint64_t f_offset
= 0;
773 c_segment_t c_seg
= NULL
;
774 kern_return_t kr
= KERN_SUCCESS
;
775 vm_offset_t addr
= 0;
777 current_thread()->options
|= TH_OPT_VMPRIV
;
779 vm_swapout_thread_awakened
++;
781 lck_mtx_lock_spin_always(c_list_lock
);
783 while (!queue_empty(&c_swapout_list_head
)) {
785 c_seg
= (c_segment_t
)queue_first(&c_swapout_list_head
);
787 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
789 assert(c_seg
->c_state
== C_ON_SWAPOUT_Q
);
792 lck_mtx_unlock_always(c_list_lock
);
794 c_seg_wait_on_busy(c_seg
);
796 lck_mtx_lock_spin_always(c_list_lock
);
800 vm_swapout_thread_processed_segments
++;
802 size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
805 assert(c_seg
->c_bytes_used
== 0);
807 if (!c_seg
->c_on_minorcompact_q
)
808 c_seg_need_delayed_compaction(c_seg
, TRUE
);
810 c_seg_switch_state(c_seg
, C_IS_EMPTY
, FALSE
);
811 lck_mtx_unlock_always(&c_seg
->c_lock
);
812 lck_mtx_unlock_always(c_list_lock
);
814 vm_swapout_found_empty
++;
818 c_seg
->c_busy_swapping
= 1;
820 lck_mtx_unlock_always(c_list_lock
);
822 addr
= (vm_offset_t
) c_seg
->c_store
.c_buffer
;
824 lck_mtx_unlock_always(&c_seg
->c_lock
);
826 #if CHECKSUM_THE_SWAP
827 c_seg
->cseg_hash
= hash_string((char*)addr
, (int)size
);
828 c_seg
->cseg_swap_size
= size
;
829 #endif /* CHECKSUM_THE_SWAP */
832 vm_swap_encrypt(c_seg
);
833 #endif /* ENCRYPTED_SWAP */
835 vm_swapout_thread_throttle_adjust();
837 kr
= vm_swap_put((vm_offset_t
) addr
, &f_offset
, size
, c_seg
);
839 PAGE_REPLACEMENT_DISALLOWED(TRUE
);
841 if (kr
== KERN_SUCCESS
) {
842 kernel_memory_depopulate(compressor_map
, (vm_offset_t
) addr
, size
, KMA_COMPRESSOR
);
846 vm_swap_decrypt(c_seg
);
848 #endif /* ENCRYPTED_SWAP */
849 lck_mtx_lock_spin_always(c_list_lock
);
850 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
852 if (kr
== KERN_SUCCESS
) {
853 int new_state
= C_ON_SWAPPEDOUT_Q
;
854 boolean_t insert_head
= FALSE
;
856 if (hibernate_flushing
== TRUE
) {
857 if (c_seg
->c_generation_id
>= first_c_segment_to_warm_generation_id
&&
858 c_seg
->c_generation_id
<= last_c_segment_to_warm_generation_id
)
860 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg
))
861 new_state
= C_ON_SWAPPEDOUTSPARSE_Q
;
863 c_seg_switch_state(c_seg
, new_state
, insert_head
);
865 c_seg
->c_store
.c_swap_handle
= f_offset
;
867 VM_STAT_INCR_BY(swapouts
, size
>> PAGE_SHIFT
);
869 if (c_seg
->c_bytes_used
)
870 OSAddAtomic64(-c_seg
->c_bytes_used
, &compressor_bytes_used
);
872 if (c_seg
->c_overage_swap
== TRUE
) {
873 c_seg
->c_overage_swap
= FALSE
;
874 c_overage_swapped_count
--;
876 c_seg_switch_state(c_seg
, C_ON_AGE_Q
, FALSE
);
878 if (!c_seg
->c_on_minorcompact_q
&& C_SEG_UNUSED_BYTES(c_seg
) >= PAGE_SIZE
)
879 c_seg_need_delayed_compaction(c_seg
, TRUE
);
881 assert(c_seg
->c_busy_swapping
);
882 assert(c_seg
->c_busy
);
884 c_seg
->c_busy_swapping
= 0;
885 lck_mtx_unlock_always(c_list_lock
);
887 C_SEG_WAKEUP_DONE(c_seg
);
888 lck_mtx_unlock_always(&c_seg
->c_lock
);
890 PAGE_REPLACEMENT_DISALLOWED(FALSE
);
892 vm_pageout_io_throttle();
894 if (c_swapout_count
== 0)
895 vm_swap_consider_defragmenting();
897 lck_mtx_lock_spin_always(c_list_lock
);
900 assert_wait((event_t
)&c_swapout_list_head
, THREAD_UNINT
);
902 lck_mtx_unlock_always(c_list_lock
);
904 thread_block((thread_continue_t
)vm_swapout_thread
);
910 vm_swap_create_file()
914 boolean_t swap_file_created
= FALSE
;
915 boolean_t swap_file_reuse
= FALSE
;
916 boolean_t swap_file_pin
= FALSE
;
917 struct swapfile
*swf
= NULL
;
920 * make sure we've got all the info we need
921 * to potentially pin a swap file... we could
922 * be swapping out due to hibernation w/o ever
923 * having run vm_pageout_scan, which is normally
924 * the trigger to do the init
926 vm_compaction_swapper_do_init();
929 * Any swapfile structure ready for re-use?
932 lck_mtx_lock(&vm_swap_data_lock
);
934 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
936 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
937 if (swf
->swp_flags
== SWAP_REUSE
) {
938 swap_file_reuse
= TRUE
;
941 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
944 lck_mtx_unlock(&vm_swap_data_lock
);
946 if (swap_file_reuse
== FALSE
) {
948 if (strlen(swapfilename
) == 0) {
950 * If no swapfile name has been set, we'll
951 * use the default name.
953 * Also, this function is only called from the swapfile management thread.
954 * So we don't need to worry about a race in checking/setting the name here.
957 strlcpy(swapfilename
, SWAP_FILE_NAME
, MAX_SWAPFILENAME_LEN
);
960 namelen
= (int)strlen(swapfilename
) + SWAPFILENAME_INDEX_LEN
+ 1;
962 swf
= (struct swapfile
*) kalloc(sizeof *swf
);
963 memset(swf
, 0, sizeof(*swf
));
965 swf
->swp_index
= vm_num_swap_files
+ 1;
966 swf
->swp_pathlen
= namelen
;
967 swf
->swp_path
= (char*)kalloc(swf
->swp_pathlen
);
969 memset(swf
->swp_path
, 0, namelen
);
971 snprintf(swf
->swp_path
, namelen
, "%s%d", swapfilename
, vm_num_swap_files
);
974 vm_swapfile_open(swf
->swp_path
, &swf
->swp_vp
);
976 if (swf
->swp_vp
== NULL
) {
977 if (swap_file_reuse
== FALSE
) {
978 kfree(swf
->swp_path
, swf
->swp_pathlen
);
979 kfree(swf
, sizeof *swf
);
983 vm_swapfile_can_be_created
= TRUE
;
985 size
= MAX_SWAP_FILE_SIZE
;
987 while (size
>= MIN_SWAP_FILE_SIZE
) {
989 swap_file_pin
= VM_SWAP_SHOULD_PIN(size
);
991 if (vm_swapfile_preallocate(swf
->swp_vp
, &size
, &swap_file_pin
) == 0) {
993 int num_bytes_for_bitmap
= 0;
995 swap_file_created
= TRUE
;
997 swf
->swp_size
= size
;
998 swf
->swp_nsegs
= (unsigned int) (size
/ COMPRESSED_SWAP_CHUNK_SIZE
);
999 swf
->swp_nseginuse
= 0;
1000 swf
->swp_free_hint
= 0;
1002 num_bytes_for_bitmap
= MAX((swf
->swp_nsegs
>> 3) , 1);
1004 * Allocate a bitmap that describes the
1005 * number of segments held by this swapfile.
1007 swf
->swp_bitmap
= (uint8_t*)kalloc(num_bytes_for_bitmap
);
1008 memset(swf
->swp_bitmap
, 0, num_bytes_for_bitmap
);
1010 swf
->swp_csegs
= (c_segment_t
*) kalloc(swf
->swp_nsegs
* sizeof(c_segment_t
));
1011 memset(swf
->swp_csegs
, 0, (swf
->swp_nsegs
* sizeof(c_segment_t
)));
1014 * passing a NULL trim_list into vnode_trim_list
1015 * will return ENOTSUP if trim isn't supported
1018 if (vnode_trim_list(swf
->swp_vp
, NULL
, FALSE
) == 0)
1019 swp_trim_supported
= TRUE
;
1021 lck_mtx_lock(&vm_swap_data_lock
);
1023 swf
->swp_flags
= SWAP_READY
;
1025 if (swap_file_reuse
== FALSE
) {
1026 queue_enter(&swf_global_queue
, swf
, struct swapfile
*, swp_queue
);
1029 vm_num_swap_files
++;
1031 vm_swapfile_total_segs_alloced
+= swf
->swp_nsegs
;
1033 if (swap_file_pin
== TRUE
) {
1034 vm_num_pinned_swap_files
++;
1035 swf
->swp_flags
|= SWAP_PINNED
;
1036 vm_swappin_avail
-= swf
->swp_size
;
1039 lck_mtx_unlock(&vm_swap_data_lock
);
1041 thread_wakeup((event_t
) &vm_num_swap_files
);
1048 if (swap_file_created
== FALSE
) {
1050 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
1054 if (swap_file_reuse
== FALSE
) {
1055 kfree(swf
->swp_path
, swf
->swp_pathlen
);
1056 kfree(swf
, sizeof *swf
);
1059 return swap_file_created
;
1064 vm_swap_get(c_segment_t c_seg
, uint64_t f_offset
, uint64_t size
)
1066 struct swapfile
*swf
= NULL
;
1067 uint64_t file_offset
= 0;
1070 assert(c_seg
->c_store
.c_buffer
);
1072 lck_mtx_lock(&vm_swap_data_lock
);
1074 swf
= vm_swapfile_for_handle(f_offset
);
1076 if (swf
== NULL
|| ( !(swf
->swp_flags
& SWAP_READY
) && !(swf
->swp_flags
& SWAP_RECLAIM
))) {
1080 swf
->swp_io_count
++;
1082 lck_mtx_unlock(&vm_swap_data_lock
);
1084 #if DEVELOPMENT || DEBUG
1085 C_SEG_MAKE_WRITEABLE(c_seg
);
1087 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1088 retval
= vm_swapfile_io(swf
->swp_vp
, file_offset
, c_seg
->c_store
.c_buffer
, (int)(size
/ PAGE_SIZE_64
), SWAP_READ
);
1090 #if DEVELOPMENT || DEBUG
1091 C_SEG_WRITE_PROTECT(c_seg
);
1094 VM_STAT_INCR_BY(swapins
, size
>> PAGE_SHIFT
);
1096 vm_swap_get_failures
++;
1099 * Free this slot in the swap structure.
1101 vm_swap_free(f_offset
);
1103 lck_mtx_lock(&vm_swap_data_lock
);
1104 swf
->swp_io_count
--;
1106 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1108 swf
->swp_flags
&= ~SWAP_WANTED
;
1109 thread_wakeup((event_t
) &swf
->swp_flags
);
1112 lck_mtx_unlock(&vm_swap_data_lock
);
1115 return KERN_SUCCESS
;
1117 return KERN_FAILURE
;
1121 vm_swap_put(vm_offset_t addr
, uint64_t *f_offset
, uint64_t size
, c_segment_t c_seg
)
1123 unsigned int segidx
= 0;
1124 struct swapfile
*swf
= NULL
;
1125 uint64_t file_offset
= 0;
1126 uint64_t swapfile_index
= 0;
1127 unsigned int byte_for_segidx
= 0;
1128 unsigned int offset_within_byte
= 0;
1129 boolean_t swf_eligible
= FALSE
;
1130 boolean_t waiting
= FALSE
;
1131 boolean_t retried
= FALSE
;
1136 if (addr
== 0 || f_offset
== NULL
) {
1137 return KERN_FAILURE
;
1140 lck_mtx_lock(&vm_swap_data_lock
);
1142 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1144 while(queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1146 segidx
= swf
->swp_free_hint
;
1148 swf_eligible
= (swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
< swf
->swp_nsegs
);
1152 while(segidx
< swf
->swp_nsegs
) {
1154 byte_for_segidx
= segidx
>> 3;
1155 offset_within_byte
= segidx
% 8;
1157 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1162 (swf
->swp_bitmap
)[byte_for_segidx
] |= (1 << offset_within_byte
);
1164 file_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
1165 swf
->swp_nseginuse
++;
1166 swf
->swp_io_count
++;
1167 swapfile_index
= swf
->swp_index
;
1169 vm_swapfile_total_segs_used
++;
1171 clock_get_system_nanotime(&sec
, &nsec
);
1173 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
)
1174 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1176 lck_mtx_unlock(&vm_swap_data_lock
);
1181 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1183 assert(queue_end(&swf_global_queue
, (queue_entry_t
) swf
));
1186 * we've run out of swap segments, but may not
1187 * be in a position to immediately create a new swap
1188 * file if we've recently failed to create due to a lack
1189 * of free space in the root filesystem... we'll try
1190 * to kick that create off, but in any event we're going
1191 * to take a breather (up to 1 second) so that we're not caught in a tight
1192 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1193 * segments into swap files only to have them immediately put back
1194 * on the c_age queue due to vm_swap_put failing.
1196 * if we're doing these puts due to a hibernation flush,
1197 * no need to block... setting hibernate_no_swapspace to TRUE,
1198 * will cause "vm_compressor_compact_and_swap" to immediately abort
1200 clock_get_system_nanotime(&sec
, &nsec
);
1202 if (VM_SWAP_SHOULD_CREATE(sec
) && !vm_swapfile_create_thread_running
)
1203 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1205 if (hibernate_flushing
== FALSE
|| VM_SWAP_SHOULD_CREATE(sec
)) {
1207 assert_wait_timeout((event_t
) &vm_num_swap_files
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
1209 hibernate_no_swapspace
= TRUE
;
1211 lck_mtx_unlock(&vm_swap_data_lock
);
1213 if (waiting
== TRUE
) {
1214 thread_block(THREAD_CONTINUE_NULL
);
1216 if (retried
== FALSE
&& hibernate_flushing
== TRUE
) {
1221 vm_swap_put_failures
++;
1223 return KERN_FAILURE
;
1226 error
= vm_swapfile_io(swf
->swp_vp
, file_offset
, addr
, (int) (size
/ PAGE_SIZE_64
), SWAP_WRITE
);
1228 lck_mtx_lock(&vm_swap_data_lock
);
1230 swf
->swp_csegs
[segidx
] = c_seg
;
1232 swf
->swp_io_count
--;
1234 *f_offset
= (swapfile_index
<< SWAP_DEVICE_SHIFT
) | file_offset
;
1236 if ((swf
->swp_flags
& SWAP_WANTED
) && swf
->swp_io_count
== 0) {
1238 swf
->swp_flags
&= ~SWAP_WANTED
;
1239 thread_wakeup((event_t
) &swf
->swp_flags
);
1242 lck_mtx_unlock(&vm_swap_data_lock
);
1245 vm_swap_free(*f_offset
);
1247 vm_swap_put_failures
++;
1249 return KERN_FAILURE
;
1251 return KERN_SUCCESS
;
1257 vm_swap_free_now(struct swapfile
*swf
, uint64_t f_offset
)
1259 uint64_t file_offset
= 0;
1260 unsigned int segidx
= 0;
1263 if ((swf
->swp_flags
& SWAP_READY
) || (swf
->swp_flags
& SWAP_RECLAIM
)) {
1265 unsigned int byte_for_segidx
= 0;
1266 unsigned int offset_within_byte
= 0;
1268 file_offset
= (f_offset
& SWAP_SLOT_MASK
);
1269 segidx
= (unsigned int) (file_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1271 byte_for_segidx
= segidx
>> 3;
1272 offset_within_byte
= segidx
% 8;
1274 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1276 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1278 swf
->swp_csegs
[segidx
] = NULL
;
1280 swf
->swp_nseginuse
--;
1281 vm_swapfile_total_segs_used
--;
1283 if (segidx
< swf
->swp_free_hint
) {
1284 swf
->swp_free_hint
= segidx
;
1287 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
)
1288 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1293 uint32_t vm_swap_free_now_count
= 0;
1294 uint32_t vm_swap_free_delayed_count
= 0;
1298 vm_swap_free(uint64_t f_offset
)
1300 struct swapfile
*swf
= NULL
;
1301 struct trim_list
*tl
= NULL
;
1305 if (swp_trim_supported
== TRUE
)
1306 tl
= kalloc(sizeof(struct trim_list
));
1308 lck_mtx_lock(&vm_swap_data_lock
);
1310 swf
= vm_swapfile_for_handle(f_offset
);
1312 if (swf
&& (swf
->swp_flags
& (SWAP_READY
| SWAP_RECLAIM
))) {
1314 if (swp_trim_supported
== FALSE
|| (swf
->swp_flags
& SWAP_RECLAIM
)) {
1316 * don't delay the free if the underlying disk doesn't support
1317 * trim, or we're in the midst of reclaiming this swap file since
1318 * we don't want to move segments that are technically free
1319 * but not yet handled by the delayed free mechanism
1321 vm_swap_free_now(swf
, f_offset
);
1323 vm_swap_free_now_count
++;
1326 tl
->tl_offset
= f_offset
& SWAP_SLOT_MASK
;
1327 tl
->tl_length
= COMPRESSED_SWAP_CHUNK_SIZE
;
1329 tl
->tl_next
= swf
->swp_delayed_trim_list_head
;
1330 swf
->swp_delayed_trim_list_head
= tl
;
1331 swf
->swp_delayed_trim_count
++;
1334 if (VM_SWAP_SHOULD_TRIM(swf
) && !vm_swapfile_create_thread_running
) {
1335 clock_get_system_nanotime(&sec
, &nsec
);
1337 if (sec
> dont_trim_until_ts
)
1338 thread_wakeup((event_t
) &vm_swapfile_create_needed
);
1340 vm_swap_free_delayed_count
++;
1343 lck_mtx_unlock(&vm_swap_data_lock
);
1346 kfree(tl
, sizeof(struct trim_list
));
1351 vm_swap_wait_on_trim_handling_in_progress()
1353 while (delayed_trim_handling_in_progress
== TRUE
) {
1355 assert_wait((event_t
) &delayed_trim_handling_in_progress
, THREAD_UNINT
);
1356 lck_mtx_unlock(&vm_swap_data_lock
);
1358 thread_block(THREAD_CONTINUE_NULL
);
1360 lck_mtx_lock(&vm_swap_data_lock
);
1366 vm_swap_handle_delayed_trims(boolean_t force_now
)
1368 struct swapfile
*swf
= NULL
;
1371 * serialize the race between us and vm_swap_reclaim...
1372 * if vm_swap_reclaim wins it will turn off SWAP_READY
1373 * on the victim it has chosen... we can just skip over
1374 * that file since vm_swap_reclaim will first process
1375 * all of the delayed trims associated with it
1377 lck_mtx_lock(&vm_swap_data_lock
);
1379 delayed_trim_handling_in_progress
= TRUE
;
1381 lck_mtx_unlock(&vm_swap_data_lock
);
1384 * no need to hold the lock to walk the swf list since
1385 * vm_swap_create (the only place where we add to this list)
1386 * is run on the same thread as this function
1387 * and vm_swap_reclaim doesn't remove items from this list
1388 * instead marking them with SWAP_REUSE for future re-use
1390 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1392 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1394 if ((swf
->swp_flags
& SWAP_READY
) && (force_now
== TRUE
|| VM_SWAP_SHOULD_TRIM(swf
))) {
1396 assert(!(swf
->swp_flags
& SWAP_RECLAIM
));
1397 vm_swap_do_delayed_trim(swf
);
1399 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1401 lck_mtx_lock(&vm_swap_data_lock
);
1403 delayed_trim_handling_in_progress
= FALSE
;
1404 thread_wakeup((event_t
) &delayed_trim_handling_in_progress
);
1406 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running
)
1407 thread_wakeup((event_t
) &vm_swapfile_gc_needed
);
1409 lck_mtx_unlock(&vm_swap_data_lock
);
1414 vm_swap_do_delayed_trim(struct swapfile
*swf
)
1416 struct trim_list
*tl
, *tl_head
;
1418 lck_mtx_lock(&vm_swap_data_lock
);
1420 tl_head
= swf
->swp_delayed_trim_list_head
;
1421 swf
->swp_delayed_trim_list_head
= NULL
;
1422 swf
->swp_delayed_trim_count
= 0;
1424 lck_mtx_unlock(&vm_swap_data_lock
);
1426 vnode_trim_list(swf
->swp_vp
, tl_head
, TRUE
);
1428 while ((tl
= tl_head
) != NULL
) {
1429 unsigned int segidx
= 0;
1430 unsigned int byte_for_segidx
= 0;
1431 unsigned int offset_within_byte
= 0;
1433 lck_mtx_lock(&vm_swap_data_lock
);
1435 segidx
= (unsigned int) (tl
->tl_offset
/ COMPRESSED_SWAP_CHUNK_SIZE
);
1437 byte_for_segidx
= segidx
>> 3;
1438 offset_within_byte
= segidx
% 8;
1440 if ((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) {
1442 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1444 swf
->swp_csegs
[segidx
] = NULL
;
1446 swf
->swp_nseginuse
--;
1447 vm_swapfile_total_segs_used
--;
1449 if (segidx
< swf
->swp_free_hint
) {
1450 swf
->swp_free_hint
= segidx
;
1453 lck_mtx_unlock(&vm_swap_data_lock
);
1455 tl_head
= tl
->tl_next
;
1457 kfree(tl
, sizeof(struct trim_list
));
1468 int vm_swap_reclaim_yielded
= 0;
1471 vm_swap_reclaim(void)
1473 vm_offset_t addr
= 0;
1474 unsigned int segidx
= 0;
1475 uint64_t f_offset
= 0;
1476 struct swapfile
*swf
= NULL
;
1477 struct swapfile
*smallest_swf
= NULL
;
1478 unsigned int min_nsegs
= 0;
1479 unsigned int byte_for_segidx
= 0;
1480 unsigned int offset_within_byte
= 0;
1481 uint32_t c_size
= 0;
1483 c_segment_t c_seg
= NULL
;
1485 if (kernel_memory_allocate(compressor_map
, (vm_offset_t
*)(&addr
), C_SEG_BUFSIZE
, 0, KMA_KOBJECT
, VM_KERN_MEMORY_COMPRESSOR
) != KERN_SUCCESS
) {
1486 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1489 lck_mtx_lock(&vm_swap_data_lock
);
1492 * if we're running the swapfile list looking for
1493 * candidates with delayed trims, we need to
1494 * wait before making our decision concerning
1495 * the swapfile we want to reclaim
1497 vm_swap_wait_on_trim_handling_in_progress();
1500 * from here until we knock down the SWAP_READY bit,
1501 * we need to remain behind the vm_swap_data_lock...
1502 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1503 * will not consider this swapfile for processing
1505 swf
= (struct swapfile
*) queue_first(&swf_global_queue
);
1506 min_nsegs
= MAX_SWAP_FILE_SIZE
/ COMPRESSED_SWAP_CHUNK_SIZE
;
1507 smallest_swf
= NULL
;
1509 while (queue_end(&swf_global_queue
, (queue_entry_t
)swf
) == FALSE
) {
1511 if ((swf
->swp_flags
& SWAP_READY
) && (swf
->swp_nseginuse
<= min_nsegs
)) {
1514 min_nsegs
= swf
->swp_nseginuse
;
1516 swf
= (struct swapfile
*) queue_next(&swf
->swp_queue
);
1519 if (smallest_swf
== NULL
)
1525 swf
->swp_flags
&= ~SWAP_READY
;
1526 swf
->swp_flags
|= SWAP_RECLAIM
;
1528 if (swf
->swp_delayed_trim_count
) {
1530 lck_mtx_unlock(&vm_swap_data_lock
);
1532 vm_swap_do_delayed_trim(swf
);
1534 lck_mtx_lock(&vm_swap_data_lock
);
1538 while (segidx
< swf
->swp_nsegs
) {
1542 * Wait for outgoing I/Os.
1544 while (swf
->swp_io_count
) {
1546 swf
->swp_flags
|= SWAP_WANTED
;
1548 assert_wait((event_t
) &swf
->swp_flags
, THREAD_UNINT
);
1549 lck_mtx_unlock(&vm_swap_data_lock
);
1551 thread_block(THREAD_CONTINUE_NULL
);
1553 lck_mtx_lock(&vm_swap_data_lock
);
1555 if (compressor_store_stop_compaction
== TRUE
|| VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1556 vm_swap_reclaim_yielded
++;
1560 byte_for_segidx
= segidx
>> 3;
1561 offset_within_byte
= segidx
% 8;
1563 if (((swf
->swp_bitmap
)[byte_for_segidx
] & (1 << offset_within_byte
)) == 0) {
1569 c_seg
= swf
->swp_csegs
[segidx
];
1572 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1574 if (c_seg
->c_busy
) {
1576 * a swapped out c_segment in the process of being freed will remain in the
1577 * busy state until after the vm_swap_free is called on it... vm_swap_free
1578 * takes the vm_swap_data_lock, so can't change the swap state until after
1579 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1580 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1581 * at that point, we re-look up the swap state which will now indicate that
1582 * this c_segment no longer exists.
1584 c_seg
->c_wanted
= 1;
1586 assert_wait((event_t
) (c_seg
), THREAD_UNINT
);
1587 lck_mtx_unlock_always(&c_seg
->c_lock
);
1589 lck_mtx_unlock(&vm_swap_data_lock
);
1591 thread_block(THREAD_CONTINUE_NULL
);
1593 lck_mtx_lock(&vm_swap_data_lock
);
1595 goto ReTry_for_cseg
;
1597 (swf
->swp_bitmap
)[byte_for_segidx
] &= ~(1 << offset_within_byte
);
1599 f_offset
= segidx
* COMPRESSED_SWAP_CHUNK_SIZE
;
1601 assert(c_seg
== swf
->swp_csegs
[segidx
]);
1602 swf
->swp_csegs
[segidx
] = NULL
;
1603 swf
->swp_nseginuse
--;
1605 vm_swapfile_total_segs_used
--;
1607 lck_mtx_unlock(&vm_swap_data_lock
);
1609 assert(C_SEG_IS_ONDISK(c_seg
));
1612 c_seg
->c_busy_swapping
= 1;
1613 #if !CHECKSUM_THE_SWAP
1614 c_seg_trim_tail(c_seg
);
1616 c_size
= round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg
->c_populated_offset
));
1618 assert(c_size
<= C_SEG_BUFSIZE
&& c_size
);
1620 lck_mtx_unlock_always(&c_seg
->c_lock
);
1622 if (vm_swapfile_io(swf
->swp_vp
, f_offset
, addr
, (int)(c_size
/ PAGE_SIZE_64
), SWAP_READ
)) {
1625 * reading the data back in failed, so convert c_seg
1626 * to a swapped in c_segment that contains no data
1628 c_seg_swapin_requeue(c_seg
, FALSE
, TRUE
, FALSE
);
1630 * returns with c_busy_swapping cleared
1633 vm_swap_get_failures
++;
1634 goto swap_io_failed
;
1636 VM_STAT_INCR_BY(swapins
, c_size
>> PAGE_SHIFT
);
1638 if (vm_swap_put(addr
, &f_offset
, c_size
, c_seg
)) {
1639 vm_offset_t c_buffer
;
1642 * the put failed, so convert c_seg to a fully swapped in c_segment
1645 c_buffer
= (vm_offset_t
)C_SEG_BUFFER_ADDRESS(c_seg
->c_mysegno
);
1647 kernel_memory_populate(compressor_map
, c_buffer
, c_size
, KMA_COMPRESSOR
, VM_KERN_MEMORY_COMPRESSOR
);
1649 memcpy((char *)c_buffer
, (char *)addr
, c_size
);
1651 c_seg
->c_store
.c_buffer
= (int32_t *)c_buffer
;
1653 vm_swap_decrypt(c_seg
);
1654 #endif /* ENCRYPTED_SWAP */
1655 c_seg_swapin_requeue(c_seg
, TRUE
, TRUE
, FALSE
);
1657 * returns with c_busy_swapping cleared
1659 OSAddAtomic64(c_seg
->c_bytes_used
, &compressor_bytes_used
);
1661 goto swap_io_failed
;
1663 VM_STAT_INCR_BY(swapouts
, c_size
>> PAGE_SHIFT
);
1665 lck_mtx_lock_spin_always(&c_seg
->c_lock
);
1667 assert(C_SEG_IS_ONDISK(c_seg
));
1669 * The c_seg will now know about the new location on disk.
1671 c_seg
->c_store
.c_swap_handle
= f_offset
;
1673 assert(c_seg
->c_busy_swapping
);
1674 c_seg
->c_busy_swapping
= 0;
1676 assert(c_seg
->c_busy
);
1677 C_SEG_WAKEUP_DONE(c_seg
);
1679 lck_mtx_unlock_always(&c_seg
->c_lock
);
1680 lck_mtx_lock(&vm_swap_data_lock
);
1683 if (swf
->swp_nseginuse
) {
1685 swf
->swp_flags
&= ~SWAP_RECLAIM
;
1686 swf
->swp_flags
|= SWAP_READY
;
1691 * We don't remove this inactive swf from the queue.
1692 * That way, we can re-use it when needed again and
1693 * preserve the namespace. The delayed_trim processing
1694 * is also dependent on us not removing swfs from the queue.
1696 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
1698 vm_num_swap_files
--;
1700 vm_swapfile_total_segs_alloced
-= swf
->swp_nsegs
;
1702 lck_mtx_unlock(&vm_swap_data_lock
);
1704 vm_swapfile_close((uint64_t)(swf
->swp_path
), swf
->swp_vp
);
1706 kfree(swf
->swp_csegs
, swf
->swp_nsegs
* sizeof(c_segment_t
));
1707 kfree(swf
->swp_bitmap
, MAX((swf
->swp_nsegs
>> 3), 1));
1709 lck_mtx_lock(&vm_swap_data_lock
);
1711 if (swf
->swp_flags
& SWAP_PINNED
) {
1712 vm_num_pinned_swap_files
--;
1713 vm_swappin_avail
+= swf
->swp_size
;
1718 swf
->swp_free_hint
= 0;
1720 swf
->swp_flags
= SWAP_REUSE
;
1723 thread_wakeup((event_t
) &swf
->swp_flags
);
1724 lck_mtx_unlock(&vm_swap_data_lock
);
1726 kmem_free(compressor_map
, (vm_offset_t
) addr
, C_SEG_BUFSIZE
);
1731 vm_swap_get_total_space(void)
1733 uint64_t total_space
= 0;
1735 total_space
= (uint64_t)vm_swapfile_total_segs_alloced
* COMPRESSED_SWAP_CHUNK_SIZE
;
1741 vm_swap_get_used_space(void)
1743 uint64_t used_space
= 0;
1745 used_space
= (uint64_t)vm_swapfile_total_segs_used
* COMPRESSED_SWAP_CHUNK_SIZE
;
1751 vm_swap_get_free_space(void)
1753 return (vm_swap_get_total_space() - vm_swap_get_used_space());
1758 vm_swap_low_on_space(void)
1761 if (vm_num_swap_files
== 0 && vm_swapfile_can_be_created
== FALSE
)
1764 if (((vm_swapfile_total_segs_alloced
- vm_swapfile_total_segs_used
) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS
) / 8)) {
1766 if (vm_num_swap_files
== 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
1769 if (vm_swapfile_last_failed_to_create_ts
>= vm_swapfile_last_successful_create_ts
)
1776 vm_swap_files_pinned(void)
1780 if (vm_swappin_enabled
== FALSE
)
1783 result
= (vm_num_pinned_swap_files
== vm_num_swap_files
);