#include <IOKit/IOHibernatePrivate.h>
+#include <kern/policy_internal.h>
boolean_t compressor_store_stop_compaction = FALSE;
-boolean_t vm_swap_up = FALSE;
-boolean_t vm_swapfile_mgmt_needed = FALSE;
+boolean_t vm_swapfile_create_needed = FALSE;
+boolean_t vm_swapfile_gc_needed = FALSE;
int swapper_throttle = -1;
boolean_t swapper_throttle_inited = FALSE;
uint64_t vm_swap_put_failures = 0;
uint64_t vm_swap_get_failures = 0;
int vm_num_swap_files = 0;
+int vm_num_pinned_swap_files = 0;
int vm_swapout_thread_processed_segments = 0;
int vm_swapout_thread_awakened = 0;
-int vm_swapfile_mgmt_thread_awakened = 0;
-int vm_swapfile_mgmt_thread_running = 0;
+int vm_swapfile_create_thread_awakened = 0;
+int vm_swapfile_create_thread_running = 0;
+int vm_swapfile_gc_thread_awakened = 0;
+int vm_swapfile_gc_thread_running = 0;
+int64_t vm_swappin_avail = 0;
+boolean_t vm_swappin_enabled = FALSE;
unsigned int vm_swapfile_total_segs_alloced = 0;
unsigned int vm_swapfile_total_segs_used = 0;
+extern vm_map_t compressor_map;
+
#define SWAP_READY 0x1 /* Swap file is ready to be used */
#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
#define SWAP_WANTED 0x4 /* Swap file has waiters */
#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
+#define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
+
struct swapfile{
queue_head_t swp_queue; /* list of swap files */
struct trim_list *swp_delayed_trim_list_head;
unsigned int swp_delayed_trim_count;
- boolean_t swp_trim_supported;
};
queue_head_t swf_global_queue;
-
-#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
+boolean_t swp_trim_supported = FALSE;
extern clock_sec_t dont_trim_until_ts;
clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
+clock_sec_t vm_swapfile_last_successful_create_ts = 0;
+int vm_swapfile_can_be_created = FALSE;
+boolean_t delayed_trim_handling_in_progress = FALSE;
+
+boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
static void vm_swapout_thread_throttle_adjust(void);
static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
static void vm_swapout_thread(void);
-static void vm_swapfile_mgmt_thread(void);
+static void vm_swapfile_create_thread(void);
+static void vm_swapfile_gc_thread(void);
static void vm_swap_defragment();
static void vm_swap_handle_delayed_trims(boolean_t);
static void vm_swap_do_delayed_trim();
+static void vm_swap_wait_on_trim_handling_in_progress(void);
+
+
+#define VM_MAX_SWAP_FILE_NUM 100
+#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
-#define VM_SWAPFILE_DELAYED_CREATE 30
#define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0)
#define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0)
-#define VM_SWAP_SHOULD_CREATE(cur_ts) (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
+#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS) ? 1 : 0)
+#define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
+#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < VM_MAX_SWAP_FILE_NUM) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
+#define VM_SWAPFILE_DELAYED_CREATE 15
+
#define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
extern unsigned int hash_string(char *cp, int len);
#endif
-#if CRYPTO
+#if RECORD_THE_COMPRESSED_DATA
+boolean_t c_compressed_record_init_done = FALSE;
+int c_compressed_record_write_error = 0;
+struct vnode *c_compressed_record_vp = NULL;
+uint64_t c_compressed_record_file_offset = 0;
+void c_compressed_record_init(void);
+void c_compressed_record_write(char *, int);
+#endif
+
+#if ENCRYPTED_SWAP
extern boolean_t swap_crypt_ctx_initialized;
extern void swap_crypt_ctx_initialize(void);
extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE];
extern aes_ctx swap_crypt_ctx;
extern unsigned long vm_page_encrypt_counter;
extern unsigned long vm_page_decrypt_counter;
-#endif /* CRYPTO */
+#endif /* ENCRYPTED_SWAP */
extern void vm_pageout_io_throttle(void);
-struct swapfile *vm_swapfile_for_handle(uint64_t);
+static struct swapfile *vm_swapfile_for_handle(uint64_t);
/*
* Called with the vm_swap_data_lock held.
*/
-struct swapfile *
+static struct swapfile *
vm_swapfile_for_handle(uint64_t f_offset)
{
}
void
-vm_swap_init()
+vm_compressor_swap_init()
{
- static boolean_t vm_swap_try_init = FALSE;
thread_t thread = NULL;
- if (vm_swap_try_init == TRUE) {
- return;
- }
-
- vm_swap_try_init = TRUE;
-
lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
lck_grp_init(&vm_swap_data_lock_grp,
"vm_swap_data",
queue_init(&swf_global_queue);
- if (vm_swap_create_file()) {
- if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
- BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
- panic("vm_swapout_thread: create failed");
- }
- thread->options |= TH_OPT_VMPRIV;
- vm_swapout_thread_id = thread->thread_id;
-
- thread_deallocate(thread);
-
- if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_mgmt_thread, NULL,
+ if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
- panic("vm_swapfile_mgmt_thread: create failed");
- }
- thread->options |= TH_OPT_VMPRIV;
+ panic("vm_swapout_thread: create failed");
+ }
+ vm_swapout_thread_id = thread->thread_id;
- thread_deallocate(thread);
-
-#if CRYPTO
- if (swap_crypt_ctx_initialized == FALSE) {
- swap_crypt_ctx_initialize();
- }
-#endif /* CRYPTO */
-
- vm_swap_up = TRUE;
+ thread_deallocate(thread);
-#if SANITY_CHECK_SWAP_ROUTINES
-extern lck_attr_t *vm_compressor_lck_attr;
-extern lck_grp_t *vm_compressor_lck_grp;
+ if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
+ BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
+ panic("vm_swapfile_create_thread: create failed");
+ }
- /*
- * Changes COMPRESSED_SWAP_CHUNK_SIZE to make it (4*KB).
- * Changes MIN_SWAP_FILE_SIZE to (4*KB).
- * Changes MAX_SWAP_FILE_SIZE to (4*KB).
- * That will then cause the below allocations to create
- * 4 new swap files and put/get/free from them.
- */
- {
- c_segment_t c_seg = NULL, c_seg1 = NULL, c_seg2 = NULL, c_seg3 = NULL;
- vm_offset_t addr = 0;
- vm_offset_t dup_addr = 0;
- kern_return_t kr = KERN_SUCCESS;
- uint64_t f_offset = 0;
- uint64_t f_offset1 = 0;
- uint64_t f_offset2 = 0;
- uint64_t f_offset3 = 0;
-
- if ((kr = kernel_memory_allocate(kernel_map,
- &addr,
- 4 * COMPRESSED_SWAP_CHUNK_SIZE,
- 0,
- KMA_KOBJECT))) {
- printf("kernel_memory_allocate failed with %d\n", kr);
- goto done;
- }
+ thread_deallocate(thread);
- if ((kr = kernel_memory_allocate(kernel_map,
- &dup_addr,
- 4 * COMPRESSED_SWAP_CHUNK_SIZE,
- 0,
- KMA_KOBJECT))) {
- printf("kernel_memory_allocate failed with %d\n", kr);
- goto done;
- }
+ if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
+ BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) {
+ panic("vm_swapfile_gc_thread: create failed");
+ }
+ thread_deallocate(thread);
- c_seg = (c_segment_t) kalloc(sizeof(*c_seg));
- memset(c_seg, 0, sizeof(*c_seg));
-#if __i386__ || __x86_64__
- lck_mtx_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#else /* __i386__ || __x86_64__ */
- lck_spin_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#endif /* __i386__ || __x86_64__ */
-
+ proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
+ TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
+ proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
+ TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
- c_seg1 = (c_segment_t) kalloc(sizeof(*c_seg));
- memset(c_seg1, 0, sizeof(*c_seg));
-#if __i386__ || __x86_64__
- lck_mtx_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#else /* __i386__ || __x86_64__ */
- lck_spin_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#endif /* __i386__ || __x86_64__ */
-
+#if ENCRYPTED_SWAP
+ if (swap_crypt_ctx_initialized == FALSE) {
+ swap_crypt_ctx_initialize();
+ }
+#endif /* ENCRYPTED_SWAP */
+
+ memset(swapfilename, 0, MAX_SWAPFILENAME_LEN + 1);
- c_seg2 = (c_segment_t) kalloc(sizeof(*c_seg));
- memset(c_seg2, 0, sizeof(*c_seg));
-#if __i386__ || __x86_64__
- lck_mtx_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#else /* __i386__ || __x86_64__ */
- lck_spin_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#endif /* __i386__ || __x86_64__ */
-
+ printf("VM Swap Subsystem is ON\n");
+}
- c_seg3 = (c_segment_t) kalloc(sizeof(*c_seg));
- memset(c_seg3, 0, sizeof(*c_seg));
-#if __i386__ || __x86_64__
- lck_mtx_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#else /* __i386__ || __x86_64__ */
- lck_spin_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr);
-#endif /* __i386__ || __x86_64__ */
-
- memset((void*)addr, (int) 'a', PAGE_SIZE_64);
- memset((void*)(addr + PAGE_SIZE_64), (int) 'b', PAGE_SIZE_64);
- memset((void*)(addr + (2 * PAGE_SIZE_64)), (int) 'c', PAGE_SIZE_64);
- memset((void*)(addr + (3 * PAGE_SIZE_64)), (int) 'd', PAGE_SIZE_64);
+#if RECORD_THE_COMPRESSED_DATA
- vm_swap_put(addr, &f_offset, PAGE_SIZE_64, c_seg);
- c_seg->c_store.c_swap_handle = f_offset;
+void
+c_compressed_record_init()
+{
+ if (c_compressed_record_init_done == FALSE) {
+ vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
+ c_compressed_record_init_done = TRUE;
+ }
+}
- vm_swap_put(addr + PAGE_SIZE_64, &f_offset1, PAGE_SIZE_64, c_seg1);
- c_seg1->c_store.c_swap_handle = f_offset1;
+void
+c_compressed_record_write(char *buf, int size)
+{
+ if (c_compressed_record_write_error == 0) {
+ c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
+ c_compressed_record_file_offset += size;
+ }
+}
+#endif
- vm_swap_put(addr + (2 * PAGE_SIZE_64), &f_offset2, PAGE_SIZE_64, c_seg2);
- c_seg2->c_store.c_swap_handle = f_offset2;
- vm_swap_put(addr + (3 * PAGE_SIZE_64), &f_offset3, PAGE_SIZE_64, c_seg3);
- c_seg3->c_store.c_swap_handle = f_offset3;
-
- //vm_swap_free(f_offset);
- vm_swap_get(dup_addr, f_offset, PAGE_SIZE_64);
+int compaction_swapper_inited = 0;
- //vm_swap_free(f_offset1);
- vm_swap_reclaim();
- vm_swap_get(dup_addr + PAGE_SIZE_64, c_seg1->c_store.c_swap_handle, PAGE_SIZE_64);
+void
+vm_compaction_swapper_do_init(void)
+{
+ struct vnode *vp;
+ char *pathname;
+ int namelen;
- //vm_swap_free(f_offset2);
- vm_swap_reclaim();
- vm_swap_get(dup_addr + (2 * PAGE_SIZE_64), c_seg2->c_store.c_swap_handle, PAGE_SIZE_64);
+ if (compaction_swapper_inited)
+ return;
- //vm_swap_free(f_offset3);
- vm_swap_reclaim();
- vm_swap_get(dup_addr + (3 * PAGE_SIZE_64), c_seg3->c_store.c_swap_handle, PAGE_SIZE_64);
+ if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
+ compaction_swapper_inited = 1;
+ return;
+ }
+ lck_mtx_lock(&vm_swap_data_lock);
- if (memcmp((void*)addr, (void*)dup_addr, PAGE_SIZE_64)) {
- panic("First page data mismatch\n");
- kr = KERN_FAILURE;
- goto done;
- }
+ if ( !compaction_swapper_inited) {
- if (memcmp((void*)(addr + PAGE_SIZE_64), (void*)(dup_addr + PAGE_SIZE_64), PAGE_SIZE_64)) {
- panic("Second page data mismatch 0x%lx, 0x%lxn", addr, dup_addr);
- kr = KERN_FAILURE;
- goto done;
- }
+ if (strlen(swapfilename) == 0) {
+ /*
+ * If no swapfile name has been set, we'll
+ * use the default name.
+ *
+ * Also, this function is only called from the vm_pageout_scan thread
+ * via vm_consider_waking_compactor_swapper,
+ * so we don't need to worry about a race in checking/setting the name here.
+ */
+ strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
+ }
+ namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
+ pathname = (char*)kalloc(namelen);
+ memset(pathname, 0, namelen);
+ snprintf(pathname, namelen, "%s%d", swapfilename, 0);
- if (memcmp((void*)(addr + (2 * PAGE_SIZE_64)), (void*)(dup_addr + (2 * PAGE_SIZE_64)), PAGE_SIZE_64)) {
- panic("Third page data mismatch\n");
- kr = KERN_FAILURE;
- goto done;
- }
+ vm_swapfile_open(pathname, &vp);
- if (memcmp((void*)(addr + (3 * PAGE_SIZE_64)), (void*)(dup_addr + (3 * PAGE_SIZE_64)), PAGE_SIZE_64)) {
- panic("Fourth page data mismatch 0x%lx, 0x%lxn", addr, dup_addr);
- kr = KERN_FAILURE;
- goto done;
+ if (vp) {
+
+ if (vnode_pager_isSSD(vp) == FALSE) {
+ vm_compressor_minorcompact_threshold_divisor = 18;
+ vm_compressor_majorcompact_threshold_divisor = 22;
+ vm_compressor_unthrottle_threshold_divisor = 32;
}
+ vnode_setswapmount(vp);
+ vm_swappin_avail = vnode_getswappin_avail(vp);
-done:
- printf("Sanity check %s\n", ((kr != KERN_SUCCESS) ? "FAILED" : "SUCCEEDED"));
- kfree((void*)addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE);
- addr = 0;
- kfree((void*)dup_addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE);
- dup_addr = 0;
+ if (vm_swappin_avail)
+ vm_swappin_enabled = TRUE;
+ vm_swapfile_close((uint64_t)pathname, vp);
}
-#endif /* SANITY_CHECK_SWAP_ROUTINES */
+ kfree(pathname, namelen);
+
+ compaction_swapper_inited = 1;
}
-
- printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF");
+ lck_mtx_unlock(&vm_swap_data_lock);
}
-#if CRYPTO
+
+
+#if ENCRYPTED_SWAP
void
vm_swap_encrypt(c_segment_t c_seg)
{
assert(swap_crypt_ctx_initialized);
+#if DEVELOPMENT || DEBUG
+ C_SEG_MAKE_WRITEABLE(c_seg);
+#endif
bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv));
encrypt_iv.c_seg = (void*)c_seg;
&swap_crypt_ctx.encrypt);
vm_page_encrypt_counter += (size/PAGE_SIZE_64);
+
+#if DEVELOPMENT || DEBUG
+ C_SEG_WRITE_PROTECT(c_seg);
+#endif
}
void
assert(swap_crypt_ctx_initialized);
+#if DEVELOPMENT || DEBUG
+ C_SEG_MAKE_WRITEABLE(c_seg);
+#endif
/*
* Prepare an "initial vector" for the decryption.
* It has to be the same as the "initial vector" we
&swap_crypt_ctx.decrypt);
vm_page_decrypt_counter += (size/PAGE_SIZE_64);
+
+#if DEVELOPMENT || DEBUG
+ C_SEG_WRITE_PROTECT(c_seg);
+#endif
}
-#endif /* CRYPTO */
+#endif /* ENCRYPTED_SWAP */
void
vm_swap_consider_defragmenting()
{
- if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() && (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
+ if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
+ (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
- if (!vm_swapfile_mgmt_thread_running) {
+ if (!vm_swapfile_gc_thread_running) {
lck_mtx_lock(&vm_swap_data_lock);
- if (!vm_swapfile_mgmt_thread_running)
- thread_wakeup((event_t) &vm_swapfile_mgmt_needed);
+ if (!vm_swapfile_gc_thread_running)
+ thread_wakeup((event_t) &vm_swapfile_gc_needed);
lck_mtx_unlock(&vm_swap_data_lock);
}
lck_mtx_lock_spin_always(&c_seg->c_lock);
- assert(c_seg->c_on_swappedout_sparse_q);
+ assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
if (c_seg->c_busy) {
lck_mtx_unlock_always(c_list_lock);
* c_seg_free_locked consumes the c_list_lock
* and c_seg->c_lock
*/
+ C_SEG_BUSY(c_seg);
c_seg_free_locked(c_seg);
vm_swap_defragment_free++;
} else {
lck_mtx_unlock_always(c_list_lock);
- c_seg_swapin(c_seg, TRUE);
- lck_mtx_unlock_always(&c_seg->c_lock);
+ if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
+ lck_mtx_unlock_always(&c_seg->c_lock);
vm_swap_defragment_swapin++;
}
static void
-vm_swapfile_mgmt_thread(void)
+vm_swapfile_create_thread(void)
{
-
- boolean_t did_work = FALSE;
clock_sec_t sec;
clock_nsec_t nsec;
- vm_swapfile_mgmt_thread_awakened++;
- vm_swapfile_mgmt_thread_running = 1;
-
-try_again:
+ current_thread()->options |= TH_OPT_VMPRIV;
- do {
- if (vm_swap_up == FALSE)
- break;
- did_work = FALSE;
- clock_get_system_nanotime(&sec, &nsec);
+ vm_swapfile_create_thread_awakened++;
+ vm_swapfile_create_thread_running = 1;
+ while (TRUE) {
/*
* walk through the list of swap files
* and do the delayed frees/trims for
*/
vm_swap_handle_delayed_trims(FALSE);
- if (VM_SWAP_SHOULD_CREATE(sec)) {
- if (vm_swap_create_file() == TRUE)
- did_work = TRUE;
- else {
- vm_swapfile_last_failed_to_create_ts = sec;
- HIBLOG("vm_swap_create_file failed @ %lu secs\n", sec);
- }
- }
- if (VM_SWAP_SHOULD_DEFRAGMENT()) {
- proc_set_task_policy_thread(kernel_task, current_thread()->thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
+ lck_mtx_lock(&vm_swap_data_lock);
- vm_swap_defragment();
+ if (hibernate_in_progress_with_pinned_swap == TRUE)
+ break;
- if (!VM_SWAP_BUSY())
- did_work = TRUE;
+ clock_get_system_nanotime(&sec, &nsec);
- proc_set_task_policy_thread(kernel_task, current_thread()->thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1);
- }
- if (VM_SWAP_SHOULD_RECLAIM()) {
- proc_set_task_policy_thread(kernel_task, current_thread()->thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
+ if (VM_SWAP_SHOULD_CREATE(sec) == 0)
+ break;
- vm_swap_defragment();
- vm_swap_reclaim();
+ lck_mtx_unlock(&vm_swap_data_lock);
- if (!VM_SWAP_BUSY())
- did_work = TRUE;
+ if (vm_swap_create_file() == FALSE) {
+ vm_swapfile_last_failed_to_create_ts = sec;
+ HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
- proc_set_task_policy_thread(kernel_task, current_thread()->thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1);
- }
+ } else
+ vm_swapfile_last_successful_create_ts = sec;
+ }
+ vm_swapfile_create_thread_running = 0;
+
+ if (hibernate_in_progress_with_pinned_swap == TRUE)
+ thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
+
+ assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
+
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ thread_block((thread_continue_t)vm_swapfile_create_thread);
+
+ /* NOTREACHED */
+}
- } while (did_work == TRUE);
+
+#if HIBERNATION
+
+kern_return_t
+hibernate_pin_swap(boolean_t start)
+{
+ vm_compaction_swapper_do_init();
+
+ if (start == FALSE) {
+
+ lck_mtx_lock(&vm_swap_data_lock);
+ hibernate_in_progress_with_pinned_swap = FALSE;
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ return (KERN_SUCCESS);
+ }
+ if (vm_swappin_enabled == FALSE)
+ return (KERN_SUCCESS);
lck_mtx_lock(&vm_swap_data_lock);
- clock_get_system_nanotime(&sec, &nsec);
+ hibernate_in_progress_with_pinned_swap = TRUE;
+
+ while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
+
+ assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
+
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ lck_mtx_lock(&vm_swap_data_lock);
+ }
+ if (vm_num_swap_files > vm_num_pinned_swap_files) {
+ hibernate_in_progress_with_pinned_swap = FALSE;
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
+ vm_num_swap_files, vm_num_pinned_swap_files);
+ return (KERN_FAILURE);
+ }
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
+ if (vm_swap_create_file() == FALSE)
+ break;
+ }
+ return (KERN_SUCCESS);
+}
+#endif
+
+static void
+vm_swapfile_gc_thread(void)
+
+{
+ boolean_t need_defragment;
+ boolean_t need_reclaim;
+
+ vm_swapfile_gc_thread_awakened++;
+ vm_swapfile_gc_thread_running = 1;
+
+ while (TRUE) {
+
+ lck_mtx_lock(&vm_swap_data_lock);
+
+ if (hibernate_in_progress_with_pinned_swap == TRUE)
+ break;
+
+ if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
+ break;
+
+ need_defragment = FALSE;
+ need_reclaim = FALSE;
+
+ if (VM_SWAP_SHOULD_DEFRAGMENT())
+ need_defragment = TRUE;
+
+ if (VM_SWAP_SHOULD_RECLAIM()) {
+ need_defragment = TRUE;
+ need_reclaim = TRUE;
+ }
+ if (need_defragment == FALSE && need_reclaim == FALSE)
+ break;
- if (vm_swap_up == TRUE && (VM_SWAP_SHOULD_CREATE(sec) || ((!VM_SWAP_BUSY() && compressor_store_stop_compaction == FALSE) &&
- (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())))) {
lck_mtx_unlock(&vm_swap_data_lock);
- goto try_again;
+
+ if (need_defragment == TRUE)
+ vm_swap_defragment();
+ if (need_reclaim == TRUE)
+ vm_swap_reclaim();
}
+ vm_swapfile_gc_thread_running = 0;
- vm_swapfile_mgmt_thread_running = 0;
+ if (hibernate_in_progress_with_pinned_swap == TRUE)
+ thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
- assert_wait((event_t)&vm_swapfile_mgmt_needed, THREAD_UNINT);
+ assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
lck_mtx_unlock(&vm_swap_data_lock);
- thread_block((thread_continue_t)vm_swapfile_mgmt_thread);
+ thread_block((thread_continue_t)vm_swapfile_gc_thread);
/* NOTREACHED */
}
}
done:
if (swapper_throttle != swapper_throttle_new) {
- proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
- proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id,
- TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
+ proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
+ TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new);
+ proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
+ TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
swapper_throttle = swapper_throttle_new;
}
}
+int vm_swapout_found_empty = 0;
+
static void
vm_swapout_thread(void)
{
kern_return_t kr = KERN_SUCCESS;
vm_offset_t addr = 0;
+ current_thread()->options |= TH_OPT_VMPRIV;
+
vm_swapout_thread_awakened++;
lck_mtx_lock_spin_always(c_list_lock);
lck_mtx_lock_spin_always(&c_seg->c_lock);
- assert(c_seg->c_on_swapout_q);
+ assert(c_seg->c_state == C_ON_SWAPOUT_Q);
if (c_seg->c_busy) {
lck_mtx_unlock_always(c_list_lock);
continue;
}
- queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list);
- c_seg->c_on_swapout_q = 0;
- c_swapout_count--;
-
vm_swapout_thread_processed_segments++;
- thread_wakeup((event_t)&compaction_swapper_running);
-
size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
if (size == 0) {
- c_seg_free_locked(c_seg);
- goto c_seg_was_freed;
+ assert(c_seg->c_bytes_used == 0);
+
+ if (!c_seg->c_on_minorcompact_q)
+ c_seg_need_delayed_compaction(c_seg, TRUE);
+
+ c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
+ lck_mtx_unlock_always(&c_seg->c_lock);
+ lck_mtx_unlock_always(c_list_lock);
+
+ vm_swapout_found_empty++;
+ goto c_seg_is_empty;
}
- c_seg->c_busy = 1;
+ C_SEG_BUSY(c_seg);
c_seg->c_busy_swapping = 1;
lck_mtx_unlock_always(c_list_lock);
c_seg->cseg_swap_size = size;
#endif /* CHECKSUM_THE_SWAP */
-#if CRYPTO
+#if ENCRYPTED_SWAP
vm_swap_encrypt(c_seg);
-#endif /* CRYPTO */
+#endif /* ENCRYPTED_SWAP */
vm_swapout_thread_throttle_adjust();
PAGE_REPLACEMENT_DISALLOWED(TRUE);
+ if (kr == KERN_SUCCESS) {
+ kernel_memory_depopulate(compressor_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
+ }
+#if ENCRYPTED_SWAP
+ else {
+ vm_swap_decrypt(c_seg);
+ }
+#endif /* ENCRYPTED_SWAP */
lck_mtx_lock_spin_always(c_list_lock);
lck_mtx_lock_spin_always(&c_seg->c_lock);
if (kr == KERN_SUCCESS) {
+ int new_state = C_ON_SWAPPEDOUT_Q;
+ boolean_t insert_head = FALSE;
- if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) {
+ if (hibernate_flushing == TRUE) {
+ if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
+ c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
+ insert_head = TRUE;
+ } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
+ new_state = C_ON_SWAPPEDOUTSPARSE_Q;
- c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg);
- c_seg->c_on_swappedout_sparse_q = 1;
- c_swappedout_sparse_count++;
+ c_seg_switch_state(c_seg, new_state, insert_head);
- } else {
- if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
- c_seg->c_generation_id <= last_c_segment_to_warm_generation_id))
- queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
- else
- queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list);
- c_seg->c_on_swappedout_q = 1;
- c_swappedout_count++;
- }
c_seg->c_store.c_swap_handle = f_offset;
- c_seg->c_ondisk = 1;
VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
if (c_seg->c_bytes_used)
OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
} else {
-#if CRYPTO
- vm_swap_decrypt(c_seg);
-#endif /* CRYPTO */
- c_seg_insert_into_q(&c_age_list_head, c_seg);
- c_seg->c_on_age_q = 1;
- c_age_count++;
+ if (c_seg->c_overage_swap == TRUE) {
+ c_seg->c_overage_swap = FALSE;
+ c_overage_swapped_count--;
+ }
+ c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
- vm_swap_put_failures++;
+ if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
+ c_seg_need_delayed_compaction(c_seg, TRUE);
}
- lck_mtx_unlock_always(c_list_lock);
+ assert(c_seg->c_busy_swapping);
+ assert(c_seg->c_busy);
c_seg->c_busy_swapping = 0;
+ lck_mtx_unlock_always(c_list_lock);
C_SEG_WAKEUP_DONE(c_seg);
-
- if (c_seg->c_must_free)
- c_seg_free(c_seg);
- else
- lck_mtx_unlock_always(&c_seg->c_lock);
-
- if (kr == KERN_SUCCESS)
- kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR);
+ lck_mtx_unlock_always(&c_seg->c_lock);
PAGE_REPLACEMENT_DISALLOWED(FALSE);
- if (kr == KERN_SUCCESS)
- kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE);
-
vm_pageout_io_throttle();
-c_seg_was_freed:
+c_seg_is_empty:
if (c_swapout_count == 0)
vm_swap_consider_defragmenting();
int namelen = 0;
boolean_t swap_file_created = FALSE;
boolean_t swap_file_reuse = FALSE;
+ boolean_t swap_file_pin = FALSE;
struct swapfile *swf = NULL;
-
- if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
- }
+ /*
+ * make sure we've got all the info we need
+ * to potentially pin a swap file... we could
+ * be swapping out due to hibernation w/o ever
+ * having run vm_pageout_scan, which is normally
+ * the trigger to do the init
+ */
+ vm_compaction_swapper_do_init();
/*
* Any swapfile structure ready for re-use?
if (swap_file_reuse == FALSE) {
- namelen = SWAPFILENAME_LEN + SWAPFILENAME_INDEX_LEN + 1;
+ if (strlen(swapfilename) == 0) {
+ /*
+ * If no swapfile name has been set, we'll
+ * use the default name.
+ *
+ * Also, this function is only called from the swapfile management thread.
+ * So we don't need to worry about a race in checking/setting the name here.
+ */
+
+ strlcpy(swapfilename, SWAP_FILE_NAME, MAX_SWAPFILENAME_LEN);
+ }
+
+ namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
swf = (struct swapfile*) kalloc(sizeof *swf);
memset(swf, 0, sizeof(*swf));
memset(swf->swp_path, 0, namelen);
- snprintf(swf->swp_path, namelen, "%s%d", SWAP_FILE_NAME, vm_num_swap_files + 1);
+ snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
}
vm_swapfile_open(swf->swp_path, &swf->swp_vp);
}
return FALSE;
}
+ vm_swapfile_can_be_created = TRUE;
+
size = MAX_SWAP_FILE_SIZE;
while (size >= MIN_SWAP_FILE_SIZE) {
- if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) {
+ swap_file_pin = VM_SWAP_SHOULD_PIN(size);
+
+ if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
int num_bytes_for_bitmap = 0;
* will return ENOTSUP if trim isn't supported
* and 0 if it is
*/
- if (vnode_trim_list(swf->swp_vp, NULL))
- swf->swp_trim_supported = FALSE;
- else
- swf->swp_trim_supported = TRUE;
+ if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
+ swp_trim_supported = TRUE;
lck_mtx_lock(&vm_swap_data_lock);
vm_swapfile_total_segs_alloced += swf->swp_nsegs;
+ if (swap_file_pin == TRUE) {
+ vm_num_pinned_swap_files++;
+ swf->swp_flags |= SWAP_PINNED;
+ vm_swappin_avail -= swf->swp_size;
+ }
+
lck_mtx_unlock(&vm_swap_data_lock);
thread_wakeup((event_t) &vm_num_swap_files);
-
break;
} else {
kern_return_t
-vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size)
+vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
{
struct swapfile *swf = NULL;
uint64_t file_offset = 0;
- int retval;
+ int retval = 0;
- if (addr == 0) {
- return KERN_FAILURE;
- }
+ assert(c_seg->c_store.c_buffer);
lck_mtx_lock(&vm_swap_data_lock);
swf = vm_swapfile_for_handle(f_offset);
- if (swf) {
- if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
-
- swf->swp_io_count++;
- file_offset = (f_offset & SWAP_SLOT_MASK);
-
- lck_mtx_unlock(&vm_swap_data_lock);
+ if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
+ retval = 1;
+ goto done;
+ }
+ swf->swp_io_count++;
- } else {
+ lck_mtx_unlock(&vm_swap_data_lock);
- lck_mtx_unlock(&vm_swap_data_lock);
- return KERN_FAILURE;
- }
- } else {
-
- lck_mtx_unlock(&vm_swap_data_lock);
- return KERN_FAILURE;
- }
+#if DEVELOPMENT || DEBUG
+ C_SEG_MAKE_WRITEABLE(c_seg);
+#endif
+ file_offset = (f_offset & SWAP_SLOT_MASK);
+ retval = vm_swapfile_io(swf->swp_vp, file_offset, c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ);
- retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ);
+#if DEVELOPMENT || DEBUG
+ C_SEG_WRITE_PROTECT(c_seg);
+#endif
+ if (retval == 0)
+ VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
+ else
+ vm_swap_get_failures++;
/*
* Free this slot in the swap structure.
swf->swp_flags &= ~SWAP_WANTED;
thread_wakeup((event_t) &swf->swp_flags);
}
- if (retval == 0)
- VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
+done:
lck_mtx_unlock(&vm_swap_data_lock);
if (retval == 0)
return KERN_SUCCESS;
- else {
- vm_swap_get_failures++;
+ else
return KERN_FAILURE;
- }
}
kern_return_t
unsigned int offset_within_byte = 0;
boolean_t swf_eligible = FALSE;
boolean_t waiting = FALSE;
+ boolean_t retried = FALSE;
int error = 0;
clock_sec_t sec;
clock_nsec_t nsec;
if (addr == 0 || f_offset == NULL) {
return KERN_FAILURE;
}
-
+retry:
lck_mtx_lock(&vm_swap_data_lock);
swf = (struct swapfile*) queue_first(&swf_global_queue);
clock_get_system_nanotime(&sec, &nsec);
- if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running)
- thread_wakeup((event_t) &vm_swapfile_mgmt_needed);
+ if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
+ thread_wakeup((event_t) &vm_swapfile_create_needed);
lck_mtx_unlock(&vm_swap_data_lock);
*/
clock_get_system_nanotime(&sec, &nsec);
- if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running)
- thread_wakeup((event_t) &vm_swapfile_mgmt_needed);
+ if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
+ thread_wakeup((event_t) &vm_swapfile_create_needed);
if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
waiting = TRUE;
lck_mtx_unlock(&vm_swap_data_lock);
- if (waiting == TRUE)
+ if (waiting == TRUE) {
thread_block(THREAD_CONTINUE_NULL);
+ if (retried == FALSE && hibernate_flushing == TRUE) {
+ retried = TRUE;
+ goto retry;
+ }
+ }
+ vm_swap_put_failures++;
+
return KERN_FAILURE;
done:
lck_mtx_unlock(&vm_swap_data_lock);
-#if SANITY_CHECK_SWAP_ROUTINES
- printf("Returned 0x%llx as offset\n", *f_offset);
-#endif /* SANITY_CHECK_SWAP_ROUTINES */
-
if (error) {
vm_swap_free(*f_offset);
+ vm_swap_put_failures++;
+
return KERN_FAILURE;
}
return KERN_SUCCESS;
swf->swp_free_hint = segidx;
}
}
- if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_mgmt_thread_running)
- thread_wakeup((event_t) &vm_swapfile_mgmt_needed);
+ if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
+ thread_wakeup((event_t) &vm_swapfile_gc_needed);
}
- lck_mtx_unlock(&vm_swap_data_lock);
}
vm_swap_free(uint64_t f_offset)
{
struct swapfile *swf = NULL;
- struct trim_list *tl;
+ struct trim_list *tl = NULL;
clock_sec_t sec;
clock_nsec_t nsec;
+ if (swp_trim_supported == TRUE)
+ tl = kalloc(sizeof(struct trim_list));
+
lck_mtx_lock(&vm_swap_data_lock);
swf = vm_swapfile_for_handle(f_offset);
if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
- if (swf->swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
+ if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
/*
* don't delay the free if the underlying disk doesn't support
* trim, or we're in the midst of reclaiming this swap file since
vm_swap_free_now(swf, f_offset);
vm_swap_free_now_count++;
- return;
+ goto done;
}
- tl = kalloc(sizeof(struct trim_list));
-
tl->tl_offset = f_offset & SWAP_SLOT_MASK;
tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
tl->tl_next = swf->swp_delayed_trim_list_head;
swf->swp_delayed_trim_list_head = tl;
swf->swp_delayed_trim_count++;
+ tl = NULL;
- if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_mgmt_thread_running) {
+ if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
clock_get_system_nanotime(&sec, &nsec);
if (sec > dont_trim_until_ts)
- thread_wakeup((event_t) &vm_swapfile_mgmt_needed);
+ thread_wakeup((event_t) &vm_swapfile_create_needed);
}
vm_swap_free_delayed_count++;
}
+done:
lck_mtx_unlock(&vm_swap_data_lock);
+
+ if (tl != NULL)
+ kfree(tl, sizeof(struct trim_list));
}
+static void
+vm_swap_wait_on_trim_handling_in_progress()
+{
+ while (delayed_trim_handling_in_progress == TRUE) {
+
+ assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ thread_block(THREAD_CONTINUE_NULL);
+
+ lck_mtx_lock(&vm_swap_data_lock);
+ }
+}
+
+
static void
vm_swap_handle_delayed_trims(boolean_t force_now)
{
struct swapfile *swf = NULL;
/*
- * because swap files are created or reclaimed on the
- * same thread that calls this function, it's safe
- * to iterate "swf_global_queue" w/o holding
- * the lock since those are the only 2 cases that can
- * change the items on the "swf_global_queue"
+ * serialize the race between us and vm_swap_reclaim...
+ * if vm_swap_reclaim wins it will turn off SWAP_READY
+ * on the victim it has chosen... we can just skip over
+ * that file since vm_swap_reclaim will first process
+ * all of the delayed trims associated with it
+ */
+ lck_mtx_lock(&vm_swap_data_lock);
+
+ delayed_trim_handling_in_progress = TRUE;
+
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+ /*
+ * no need to hold the lock to walk the swf list since
+ * vm_swap_create (the only place where we add to this list)
+ * is run on the same thread as this function
+ * and vm_swap_reclaim doesn't remove items from this list
+ * instead marking them with SWAP_REUSE for future re-use
*/
swf = (struct swapfile*) queue_first(&swf_global_queue);
while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
- assert(!(swf->swp_flags & SWAP_RECLAIM));
+ if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
- if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf)))
+ assert(!(swf->swp_flags & SWAP_RECLAIM));
vm_swap_do_delayed_trim(swf);
-
+ }
swf = (struct swapfile*) queue_next(&swf->swp_queue);
}
-}
+ lck_mtx_lock(&vm_swap_data_lock);
+
+ delayed_trim_handling_in_progress = FALSE;
+ thread_wakeup((event_t) &delayed_trim_handling_in_progress);
+ if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
+ thread_wakeup((event_t) &vm_swapfile_gc_needed);
+
+ lck_mtx_unlock(&vm_swap_data_lock);
+
+}
static void
vm_swap_do_delayed_trim(struct swapfile *swf)
lck_mtx_unlock(&vm_swap_data_lock);
- vnode_trim_list(swf->swp_vp, tl_head);
+ vnode_trim_list(swf->swp_vp, tl_head, TRUE);
while ((tl = tl_head) != NULL) {
unsigned int segidx = 0;
c_segment_t c_seg = NULL;
- if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) {
+ if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
}
lck_mtx_lock(&vm_swap_data_lock);
+ /*
+ * if we're running the swapfile list looking for
+ * candidates with delayed trims, we need to
+ * wait before making our decision concerning
+ * the swapfile we want to reclaim
+ */
+ vm_swap_wait_on_trim_handling_in_progress();
+
+ /*
+ * from here until we knock down the SWAP_READY bit,
+ * we need to remain behind the vm_swap_data_lock...
+ * once that bit has been turned off, "vm_swap_handle_delayed_trims"
+ * will not consider this swapfile for processing
+ */
swf = (struct swapfile*) queue_first(&swf_global_queue);
min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
smallest_swf = NULL;
while (segidx < swf->swp_nsegs) {
ReTry_for_cseg:
- if (compressor_store_stop_compaction == TRUE || (swf->swp_trim_supported == FALSE && VM_SWAP_BUSY())) {
- vm_swap_reclaim_yielded++;
- break;
- }
/*
* Wait for outgoing I/Os.
*/
lck_mtx_lock(&vm_swap_data_lock);
}
+ if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
+ vm_swap_reclaim_yielded++;
+ break;
+ }
byte_for_segidx = segidx >> 3;
offset_within_byte = segidx % 8;
}
c_seg = swf->swp_csegs[segidx];
+ assert(c_seg);
lck_mtx_lock_spin_always(&c_seg->c_lock);
- assert(c_seg->c_ondisk);
-
if (c_seg->c_busy) {
-
+ /*
+ * a swapped out c_segment in the process of being freed will remain in the
+ * busy state until after the vm_swap_free is called on it... vm_swap_free
+ * takes the vm_swap_data_lock, so can't change the swap state until after
+ * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
+ * which will allow c_seg_free_locked to clear busy and wake up this thread...
+ * at that point, we re-look up the swap state which will now indicate that
+ * this c_segment no longer exists.
+ */
c_seg->c_wanted = 1;
assert_wait((event_t) (c_seg), THREAD_UNINT);
(swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
-
+
+ assert(c_seg == swf->swp_csegs[segidx]);
swf->swp_csegs[segidx] = NULL;
swf->swp_nseginuse--;
vm_swapfile_total_segs_used--;
lck_mtx_unlock(&vm_swap_data_lock);
-
- if (c_seg->c_must_free) {
- c_seg_free(c_seg);
- } else {
+ assert(C_SEG_IS_ONDISK(c_seg));
- c_seg->c_busy = 1;
- c_seg->c_busy_swapping = 1;
+ C_SEG_BUSY(c_seg);
+ c_seg->c_busy_swapping = 1;
#if !CHECKSUM_THE_SWAP
- c_seg_trim_tail(c_seg);
+ c_seg_trim_tail(c_seg);
#endif
-
-#if SANITY_CHECK_SWAP_ROUTINES
-
- c_size = COMPRESSED_SWAP_CHUNK_SIZE;
-
-#else /* SANITY_CHECK_SWAP_ROUTINES */
-
- c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
+ c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
- assert(c_size <= C_SEG_BUFSIZE);
+ assert(c_size <= C_SEG_BUFSIZE && c_size);
-#endif /* SANITY_CHECK_SWAP_ROUTINES */
-
- lck_mtx_unlock_always(&c_seg->c_lock);
+ lck_mtx_unlock_always(&c_seg->c_lock);
- if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
+ if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) {
- /*
- * reading the data back in failed, so convert c_seg
- * to a swapped in c_segment that contains no data
- */
- c_seg->c_store.c_buffer = (int32_t *)NULL;
- c_seg_swapin_requeue(c_seg);
+ /*
+ * reading the data back in failed, so convert c_seg
+ * to a swapped in c_segment that contains no data
+ */
+ c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
+ /*
+ * returns with c_busy_swapping cleared
+ */
- goto swap_io_failed;
- }
- VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
+ vm_swap_get_failures++;
+ goto swap_io_failed;
+ }
+ VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
- if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
- vm_offset_t c_buffer;
+ if (vm_swap_put(addr, &f_offset, c_size, c_seg)) {
+ vm_offset_t c_buffer;
- /*
- * the put failed, so convert c_seg to a fully swapped in c_segment
- * with valid data
- */
- if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS)
- panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
- kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR);
+ /*
+ * the put failed, so convert c_seg to a fully swapped in c_segment
+ * with valid data
+ */
+ c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
- memcpy((char *)c_buffer, (char *)addr, c_size);
+ kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
- c_seg->c_store.c_buffer = (int32_t *)c_buffer;
-#if CRYPTO
- vm_swap_decrypt(c_seg);
-#endif /* CRYPTO */
- c_seg_swapin_requeue(c_seg);
+ memcpy((char *)c_buffer, (char *)addr, c_size);
- OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
+ c_seg->c_store.c_buffer = (int32_t *)c_buffer;
+#if ENCRYPTED_SWAP
+ vm_swap_decrypt(c_seg);
+#endif /* ENCRYPTED_SWAP */
+ c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
+ /*
+ * returns with c_busy_swapping cleared
+ */
+ OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
- goto swap_io_failed;
- }
- VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
+ goto swap_io_failed;
+ }
+ VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
- lck_mtx_lock_spin_always(&c_seg->c_lock);
+ lck_mtx_lock_spin_always(&c_seg->c_lock);
- assert(c_seg->c_ondisk);
- /*
- * The c_seg will now know about the new location on disk.
- */
- c_seg->c_store.c_swap_handle = f_offset;
+ assert(C_SEG_IS_ONDISK(c_seg));
+ /*
+ * The c_seg will now know about the new location on disk.
+ */
+ c_seg->c_store.c_swap_handle = f_offset;
+
+ assert(c_seg->c_busy_swapping);
+ c_seg->c_busy_swapping = 0;
swap_io_failed:
- c_seg->c_busy_swapping = 0;
-
- if (c_seg->c_must_free)
- c_seg_free(c_seg);
- else {
- C_SEG_WAKEUP_DONE(c_seg);
+ assert(c_seg->c_busy);
+ C_SEG_WAKEUP_DONE(c_seg);
- lck_mtx_unlock_always(&c_seg->c_lock);
- }
- }
+ lck_mtx_unlock_always(&c_seg->c_lock);
lck_mtx_lock(&vm_swap_data_lock);
}
/*
* We don't remove this inactive swf from the queue.
* That way, we can re-use it when needed again and
- * preserve the namespace.
+ * preserve the namespace. The delayed_trim processing
+ * is also dependent on us not removing swfs from the queue.
*/
//queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
lck_mtx_lock(&vm_swap_data_lock);
+ if (swf->swp_flags & SWAP_PINNED) {
+ vm_num_pinned_swap_files--;
+ vm_swappin_avail += swf->swp_size;
+ }
+
swf->swp_vp = NULL;
swf->swp_size = 0;
swf->swp_free_hint = 0;
swf->swp_nsegs = 0;
swf->swp_flags = SWAP_REUSE;
- thread_wakeup((event_t) &swf->swp_flags);
done:
+ thread_wakeup((event_t) &swf->swp_flags);
lck_mtx_unlock(&vm_swap_data_lock);
- kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
+ kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
}
{
return (vm_swap_get_total_space() - vm_swap_get_used_space());
}
+
+
+int
+vm_swap_low_on_space(void)
+{
+
+ if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
+ return (0);
+
+ if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
+
+ if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
+ return (0);
+
+ if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
+ return (1);
+ }
+ return (0);
+}
+
+boolean_t
+vm_swap_files_pinned(void)
+{
+ boolean_t result;
+
+ if (vm_swappin_enabled == FALSE)
+ return(TRUE);
+
+ result = (vm_num_pinned_swap_files == vm_num_swap_files);
+
+ return (result);
+}