]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_compressor_backing_store.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include "vm_compressor_backing_store.h"
d9a64523 30#include <vm/vm_pageout.h>
39236c6e
A
31#include <vm/vm_protos.h>
32
33#include <IOKit/IOHibernatePrivate.h>
34
39037602 35#include <kern/policy_internal.h>
39236c6e
A
36
37boolean_t compressor_store_stop_compaction = FALSE;
fe8ab488
A
38boolean_t vm_swapfile_create_needed = FALSE;
39boolean_t vm_swapfile_gc_needed = FALSE;
39236c6e 40
d9a64523 41int vm_swapper_throttle = -1;
39236c6e
A
42uint64_t vm_swapout_thread_id;
43
44uint64_t vm_swap_put_failures = 0;
45uint64_t vm_swap_get_failures = 0;
d9a64523 46int vm_num_swap_files_config = 0;
39236c6e 47int vm_num_swap_files = 0;
39037602 48int vm_num_pinned_swap_files = 0;
39236c6e
A
49int vm_swapout_thread_processed_segments = 0;
50int vm_swapout_thread_awakened = 0;
fe8ab488
A
51int vm_swapfile_create_thread_awakened = 0;
52int vm_swapfile_create_thread_running = 0;
53int vm_swapfile_gc_thread_awakened = 0;
54int vm_swapfile_gc_thread_running = 0;
39236c6e 55
3e170ce0 56int64_t vm_swappin_avail = 0;
39037602 57boolean_t vm_swappin_enabled = FALSE;
39236c6e
A
58unsigned int vm_swapfile_total_segs_alloced = 0;
59unsigned int vm_swapfile_total_segs_used = 0;
60
5ba3f43e
A
61char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
62
39037602
A
63extern vm_map_t compressor_map;
64
39236c6e
A
65
66#define SWAP_READY 0x1 /* Swap file is ready to be used */
67#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
68#define SWAP_WANTED 0x4 /* Swap file has waiters */
69#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
3e170ce0
A
70#define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
71
39236c6e
A
72
73struct swapfile{
74 queue_head_t swp_queue; /* list of swap files */
75 char *swp_path; /* saved pathname of swap file */
76 struct vnode *swp_vp; /* backing vnode */
77 uint64_t swp_size; /* size of this swap file */
78 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
79 unsigned int swp_pathlen; /* length of pathname */
80 unsigned int swp_nsegs; /* #segments we can use */
81 unsigned int swp_nseginuse; /* #segments in use */
82 unsigned int swp_index; /* index of this swap file */
83 unsigned int swp_flags; /* state of swap file */
84 unsigned int swp_free_hint; /* offset of 1st free chunk */
85 unsigned int swp_io_count; /* count of outstanding I/Os */
86 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
87
88 struct trim_list *swp_delayed_trim_list_head;
89 unsigned int swp_delayed_trim_count;
39236c6e
A
90};
91
92queue_head_t swf_global_queue;
fe8ab488 93boolean_t swp_trim_supported = FALSE;
39236c6e 94
39236c6e
A
95extern clock_sec_t dont_trim_until_ts;
96clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
fe8ab488
A
97clock_sec_t vm_swapfile_last_successful_create_ts = 0;
98int vm_swapfile_can_be_created = FALSE;
99boolean_t delayed_trim_handling_in_progress = FALSE;
39236c6e 100
39037602
A
101boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
102
39236c6e
A
103static void vm_swapout_thread_throttle_adjust(void);
104static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
105static void vm_swapout_thread(void);
fe8ab488
A
106static void vm_swapfile_create_thread(void);
107static void vm_swapfile_gc_thread(void);
5ba3f43e 108static void vm_swap_defragment(void);
39236c6e 109static void vm_swap_handle_delayed_trims(boolean_t);
5ba3f43e 110static void vm_swap_do_delayed_trim(struct swapfile *);
fe8ab488
A
111static void vm_swap_wait_on_trim_handling_in_progress(void);
112
39236c6e 113
d9a64523
A
114boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
115
5ba3f43e 116#if CONFIG_EMBEDDED
d9a64523
A
117
118#if DEVELOPMENT || DEBUG
119#define VM_MAX_SWAP_FILE_NUM 100
120#else /* DEVELOPMENT || DEBUG */
121#define VM_MAX_SWAP_FILE_NUM 5
122#endif /* DEVELOPMENT || DEBUG */
123
5ba3f43e
A
124#define VM_SWAPFILE_DELAYED_TRIM_MAX 4
125
d9a64523 126#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
5ba3f43e 127#define VM_SWAP_SHOULD_PIN(_size) FALSE
d9a64523 128#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
5ba3f43e
A
129 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
130#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
131
132#else /* CONFIG_EMBEDDED */
39236c6e 133
3e170ce0
A
134#define VM_MAX_SWAP_FILE_NUM 100
135#define VM_SWAPFILE_DELAYED_TRIM_MAX 128
136
d9a64523 137#define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
3e170ce0 138#define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
d9a64523 139#define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
39236c6e
A
140 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
141#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
142
5ba3f43e 143#endif /* CONFIG_EMBEDDED */
39236c6e 144
d9a64523
A
145#define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
146#define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
fe8ab488
A
147#define VM_SWAPFILE_DELAYED_CREATE 15
148
d9a64523 149#define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
39236c6e
A
150
151
152#if CHECKSUM_THE_SWAP
153extern unsigned int hash_string(char *cp, int len);
154#endif
155
3e170ce0
A
156#if RECORD_THE_COMPRESSED_DATA
157boolean_t c_compressed_record_init_done = FALSE;
158int c_compressed_record_write_error = 0;
159struct vnode *c_compressed_record_vp = NULL;
160uint64_t c_compressed_record_file_offset = 0;
161void c_compressed_record_init(void);
162void c_compressed_record_write(char *, int);
163#endif
164
39236c6e
A
165extern void vm_pageout_io_throttle(void);
166
39037602 167static struct swapfile *vm_swapfile_for_handle(uint64_t);
39236c6e
A
168
169/*
170 * Called with the vm_swap_data_lock held.
171 */
172
39037602 173static struct swapfile *
39236c6e
A
174vm_swapfile_for_handle(uint64_t f_offset)
175{
176
177 uint64_t file_offset = 0;
178 unsigned int swapfile_index = 0;
179 struct swapfile* swf = NULL;
180
181 file_offset = (f_offset & SWAP_SLOT_MASK);
182 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
183
184 swf = (struct swapfile*) queue_first(&swf_global_queue);
185
186 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
187
188 if (swapfile_index == swf->swp_index) {
189 break;
190 }
191
192 swf = (struct swapfile*) queue_next(&swf->swp_queue);
193 }
194
195 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
196 swf = NULL;
197 }
198
199 return swf;
200}
201
fe8ab488 202#if ENCRYPTED_SWAP
39236c6e 203
d9a64523 204#include <libkern/crypto/aesxts.h>
39236c6e 205
d9a64523 206extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
39236c6e 207
d9a64523
A
208boolean_t swap_crypt_initialized;
209void swap_crypt_initialize(void);
3e170ce0 210
d9a64523
A
211symmetric_xts xts_modectx;
212uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
213uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
3e170ce0 214
d9a64523
A
215#if DEVELOPMENT || DEBUG
216boolean_t swap_crypt_xts_tested = FALSE;
217unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
218unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
219unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
220#endif /* DEVELOPMENT || DEBUG */
3e170ce0 221
d9a64523
A
222unsigned long vm_page_encrypt_counter;
223unsigned long vm_page_decrypt_counter;
3e170ce0
A
224
225
fe8ab488 226void
d9a64523 227swap_crypt_initialize(void)
fe8ab488 228{
d9a64523
A
229 uint8_t *enckey1, *enckey2;
230 int keylen1, keylen2;
231 int error;
39037602 232
d9a64523
A
233 assert(swap_crypt_initialized == FALSE);
234
235 keylen1 = sizeof(swap_crypt_key1);
236 enckey1 = (uint8_t *)&swap_crypt_key1;
237 keylen2 = sizeof(swap_crypt_key2);
238 enckey2 = (uint8_t *)&swap_crypt_key2;
239
240 error = cc_rand_generate((void *)enckey1, keylen1);
241 assert(!error);
242
243 error = cc_rand_generate((void *)enckey2, keylen2);
244 assert(!error);
245
246 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
247 assert(!error);
248
249 swap_crypt_initialized = TRUE;
250
251#if DEVELOPMENT || DEBUG
252 uint8_t *encptr;
253 uint8_t *decptr;
254 uint8_t *refptr;
255 uint8_t *iv;
256 uint64_t ivnum[2];
257 int size = 0;
258 int i = 0;
259 int rc = 0;
260
261 assert(swap_crypt_xts_tested == FALSE);
39236c6e 262
5ba3f43e
A
263 /*
264 * Validate the encryption algorithms.
d9a64523
A
265 *
266 * First initialize the test data.
5ba3f43e 267 */
d9a64523
A
268 for (i = 0; i < 4096; i++) {
269 swap_crypt_test_page_ref[i] = (char) i;
270 }
271 ivnum[0] = (uint64_t)0xaa;
272 ivnum[1] = 0;
273 iv = (uint8_t *)ivnum;
274
275 refptr = (uint8_t *)swap_crypt_test_page_ref;
276 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
277 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
278 size = 4096;
279
280 /* encrypt */
281 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
282 assert(!rc);
283
284 /* compare result with original - should NOT match */
285 for (i = 0; i < 4096; i ++) {
286 if (swap_crypt_test_page_encrypt[i] !=
287 swap_crypt_test_page_ref[i]) {
288 break;
5ba3f43e 289 }
d9a64523
A
290 }
291 assert(i != 4096);
39037602 292
d9a64523
A
293 /* decrypt */
294 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
295 assert(!rc);
39037602 296
d9a64523
A
297 /* compare result with original */
298 for (i = 0; i < 4096; i ++) {
299 if (swap_crypt_test_page_decrypt[i] !=
300 swap_crypt_test_page_ref[i]) {
301 panic("encryption test failed");
302 }
303 }
304 /* encrypt in place */
305 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
306 assert(!rc);
307
308 /* decrypt in place */
309 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
310 assert(!rc);
311
312 for (i = 0; i < 4096; i ++) {
313 if (swap_crypt_test_page_decrypt[i] !=
314 swap_crypt_test_page_ref[i]) {
315 panic("in place encryption test failed");
316 }
39037602 317 }
d9a64523
A
318 swap_crypt_xts_tested = TRUE;
319#endif /* DEVELOPMENT || DEBUG */
39236c6e
A
320}
321
fe8ab488 322
39236c6e
A
323void
324vm_swap_encrypt(c_segment_t c_seg)
325{
d9a64523
A
326 uint8_t *ptr;
327 uint8_t *iv;
328 uint64_t ivnum[2];
329 int size = 0;
330 int rc = 0;
331
332 if (swap_crypt_initialized == FALSE)
333 swap_crypt_initialize();
39236c6e 334
39037602
A
335#if DEVELOPMENT || DEBUG
336 C_SEG_MAKE_WRITEABLE(c_seg);
337#endif
d9a64523 338 ptr = (uint8_t *)c_seg->c_store.c_buffer;
39236c6e
A
339 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
340
d9a64523
A
341 ivnum[0] = (uint64_t)c_seg;
342 ivnum[1] = 0;
343 iv = (uint8_t *)ivnum;
344
345 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
346 assert(!rc);
39236c6e
A
347
348 vm_page_encrypt_counter += (size/PAGE_SIZE_64);
39037602
A
349
350#if DEVELOPMENT || DEBUG
351 C_SEG_WRITE_PROTECT(c_seg);
352#endif
39236c6e
A
353}
354
355void
356vm_swap_decrypt(c_segment_t c_seg)
357{
d9a64523
A
358 uint8_t *ptr;
359 uint8_t *iv;
360 uint64_t ivnum[2];
361 int size = 0;
362 int rc = 0;
39236c6e 363
d9a64523 364 assert(swap_crypt_initialized);
39236c6e 365
39037602
A
366#if DEVELOPMENT || DEBUG
367 C_SEG_MAKE_WRITEABLE(c_seg);
368#endif
d9a64523 369 ptr = (uint8_t *)c_seg->c_store.c_buffer;
39236c6e
A
370 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
371
d9a64523
A
372 ivnum[0] = (uint64_t)c_seg;
373 ivnum[1] = 0;
374 iv = (uint8_t *)ivnum;
375
376 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
377 assert(!rc);
39236c6e
A
378
379 vm_page_decrypt_counter += (size/PAGE_SIZE_64);
39037602
A
380
381#if DEVELOPMENT || DEBUG
382 C_SEG_WRITE_PROTECT(c_seg);
383#endif
39236c6e 384}
fe8ab488 385#endif /* ENCRYPTED_SWAP */
39236c6e
A
386
387
5ba3f43e
A
388void
389vm_compressor_swap_init()
390{
391 thread_t thread = NULL;
392
393 lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr);
394 lck_grp_init(&vm_swap_data_lock_grp,
395 "vm_swap_data",
396 &vm_swap_data_lock_grp_attr);
397 lck_attr_setdefault(&vm_swap_data_lock_attr);
398 lck_mtx_init_ext(&vm_swap_data_lock,
399 &vm_swap_data_lock_ext,
400 &vm_swap_data_lock_grp,
401 &vm_swap_data_lock_attr);
402
403 queue_init(&swf_global_queue);
404
405
406 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
407 BASEPRI_VM, &thread) != KERN_SUCCESS) {
408 panic("vm_swapout_thread: create failed");
409 }
d9a64523 410 thread_set_thread_name(thread, "VM_swapout");
5ba3f43e
A
411 vm_swapout_thread_id = thread->thread_id;
412
413 thread_deallocate(thread);
414
415 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
416 BASEPRI_VM, &thread) != KERN_SUCCESS) {
417 panic("vm_swapfile_create_thread: create failed");
418 }
419
d9a64523 420 thread_set_thread_name(thread, "VM_swapfile_create");
5ba3f43e
A
421 thread_deallocate(thread);
422
423 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
424 BASEPRI_VM, &thread) != KERN_SUCCESS) {
425 panic("vm_swapfile_gc_thread: create failed");
426 }
d9a64523 427 thread_set_thread_name(thread, "VM_swapfile_gc");
5ba3f43e
A
428 thread_deallocate(thread);
429
430 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
431 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
432 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
433 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
434
5ba3f43e
A
435#if CONFIG_EMBEDDED
436 /*
437 * dummy value until the swap file gets created
438 * when we drive the first c_segment_t to the
439 * swapout queue... at that time we will
440 * know the true size we have to work with
441 */
442 c_overage_swapped_limit = 16;
443#endif
d9a64523
A
444
445 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
446
5ba3f43e
A
447 printf("VM Swap Subsystem is ON\n");
448}
449
450
451#if RECORD_THE_COMPRESSED_DATA
452
453void
454c_compressed_record_init()
455{
456 if (c_compressed_record_init_done == FALSE) {
457 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
458 c_compressed_record_init_done = TRUE;
459 }
460}
461
462void
463c_compressed_record_write(char *buf, int size)
464{
465 if (c_compressed_record_write_error == 0) {
466 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
467 c_compressed_record_file_offset += size;
468 }
469}
470#endif
471
472
473int compaction_swapper_inited = 0;
474
475void
476vm_compaction_swapper_do_init(void)
477{
478 struct vnode *vp;
479 char *pathname;
480 int namelen;
481
482 if (compaction_swapper_inited)
483 return;
484
485 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
486 compaction_swapper_inited = 1;
487 return;
488 }
489 lck_mtx_lock(&vm_swap_data_lock);
490
491 if ( !compaction_swapper_inited) {
492
493 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
494 pathname = (char*)kalloc(namelen);
495 memset(pathname, 0, namelen);
496 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
497
498 vm_swapfile_open(pathname, &vp);
499
500 if (vp) {
501
502 if (vnode_pager_isSSD(vp) == FALSE) {
d9a64523
A
503 /*
504 * swap files live on an HDD, so let's make sure to start swapping
505 * much earlier since we're not worried about SSD write-wear and
506 * we have so little write bandwidth to work with
507 * these values were derived expermentially by running the performance
508 * teams stock test for evaluating HDD performance against various
509 * combinations and looking and comparing overall results.
510 * Note that the > relationship between these 4 values must be maintained
511 */
512 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0)
513 vm_compressor_minorcompact_threshold_divisor = 15;
514 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0)
515 vm_compressor_majorcompact_threshold_divisor = 18;
516 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0)
517 vm_compressor_unthrottle_threshold_divisor = 24;
518 if (vm_compressor_catchup_threshold_divisor_overridden == 0)
519 vm_compressor_catchup_threshold_divisor = 30;
5ba3f43e
A
520 }
521#if !CONFIG_EMBEDDED
522 vnode_setswapmount(vp);
523 vm_swappin_avail = vnode_getswappin_avail(vp);
524
525 if (vm_swappin_avail)
526 vm_swappin_enabled = TRUE;
527#endif
528 vm_swapfile_close((uint64_t)pathname, vp);
529 }
530 kfree(pathname, namelen);
531
532 compaction_swapper_inited = 1;
533 }
534 lck_mtx_unlock(&vm_swap_data_lock);
535}
536
537
39236c6e 538void
d9a64523 539vm_swap_consider_defragmenting(int flags)
39236c6e 540{
d9a64523
A
541 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
542 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
543
fe8ab488 544 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
d9a64523 545 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
39236c6e 546
d9a64523 547 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
39236c6e
A
548 lck_mtx_lock(&vm_swap_data_lock);
549
d9a64523
A
550 if (force_defrag) {
551 vm_swap_force_defrag = TRUE;
552 }
553
554 if (force_reclaim) {
555 vm_swap_force_reclaim = TRUE;
556 }
557
fe8ab488
A
558 if (!vm_swapfile_gc_thread_running)
559 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e
A
560
561 lck_mtx_unlock(&vm_swap_data_lock);
562 }
563 }
564}
565
566
567int vm_swap_defragment_yielded = 0;
568int vm_swap_defragment_swapin = 0;
569int vm_swap_defragment_free = 0;
570int vm_swap_defragment_busy = 0;
571
572
573static void
574vm_swap_defragment()
575{
576 c_segment_t c_seg;
577
578 /*
579 * have to grab the master lock w/o holding
580 * any locks in spin mode
581 */
582 PAGE_REPLACEMENT_DISALLOWED(TRUE);
583
584 lck_mtx_lock_spin_always(c_list_lock);
585
586 while (!queue_empty(&c_swappedout_sparse_list_head)) {
587
588 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
589 vm_swap_defragment_yielded++;
590 break;
591 }
592 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
593
594 lck_mtx_lock_spin_always(&c_seg->c_lock);
595
3e170ce0 596 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
39236c6e
A
597
598 if (c_seg->c_busy) {
599 lck_mtx_unlock_always(c_list_lock);
600
601 PAGE_REPLACEMENT_DISALLOWED(FALSE);
602 /*
603 * c_seg_wait_on_busy consumes c_seg->c_lock
604 */
605 c_seg_wait_on_busy(c_seg);
606
607 PAGE_REPLACEMENT_DISALLOWED(TRUE);
608
609 lck_mtx_lock_spin_always(c_list_lock);
610
611 vm_swap_defragment_busy++;
612 continue;
613 }
614 if (c_seg->c_bytes_used == 0) {
615 /*
616 * c_seg_free_locked consumes the c_list_lock
617 * and c_seg->c_lock
618 */
3e170ce0 619 C_SEG_BUSY(c_seg);
39236c6e
A
620 c_seg_free_locked(c_seg);
621
622 vm_swap_defragment_free++;
623 } else {
624 lck_mtx_unlock_always(c_list_lock);
625
39037602
A
626 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0)
627 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e
A
628
629 vm_swap_defragment_swapin++;
630 }
631 PAGE_REPLACEMENT_DISALLOWED(FALSE);
632
633 vm_pageout_io_throttle();
634
635 /*
636 * because write waiters have privilege over readers,
637 * dropping and immediately retaking the master lock will
638 * still allow any thread waiting to acquire the
639 * master lock exclusively an opportunity to take it
640 */
641 PAGE_REPLACEMENT_DISALLOWED(TRUE);
642
643 lck_mtx_lock_spin_always(c_list_lock);
644 }
645 lck_mtx_unlock_always(c_list_lock);
646
647 PAGE_REPLACEMENT_DISALLOWED(FALSE);
648}
649
650
651
652static void
fe8ab488 653vm_swapfile_create_thread(void)
39236c6e 654{
39236c6e
A
655 clock_sec_t sec;
656 clock_nsec_t nsec;
657
3e170ce0
A
658 current_thread()->options |= TH_OPT_VMPRIV;
659
fe8ab488
A
660 vm_swapfile_create_thread_awakened++;
661 vm_swapfile_create_thread_running = 1;
39236c6e 662
fe8ab488 663 while (TRUE) {
39236c6e
A
664 /*
665 * walk through the list of swap files
666 * and do the delayed frees/trims for
667 * any swap file whose count of delayed
668 * frees is above the batch limit
669 */
670 vm_swap_handle_delayed_trims(FALSE);
671
fe8ab488 672 lck_mtx_lock(&vm_swap_data_lock);
39236c6e 673
39037602
A
674 if (hibernate_in_progress_with_pinned_swap == TRUE)
675 break;
676
fe8ab488 677 clock_get_system_nanotime(&sec, &nsec);
39236c6e 678
fe8ab488
A
679 if (VM_SWAP_SHOULD_CREATE(sec) == 0)
680 break;
39236c6e 681
fe8ab488 682 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 683
fe8ab488
A
684 if (vm_swap_create_file() == FALSE) {
685 vm_swapfile_last_failed_to_create_ts = sec;
686 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
39236c6e 687
fe8ab488
A
688 } else
689 vm_swapfile_last_successful_create_ts = sec;
690 }
691 vm_swapfile_create_thread_running = 0;
39236c6e 692
39037602
A
693 if (hibernate_in_progress_with_pinned_swap == TRUE)
694 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
695
fe8ab488 696 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
39236c6e 697
fe8ab488 698 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 699
fe8ab488
A
700 thread_block((thread_continue_t)vm_swapfile_create_thread);
701
702 /* NOTREACHED */
703}
39236c6e 704
39236c6e 705
39037602
A
706#if HIBERNATION
707
708kern_return_t
709hibernate_pin_swap(boolean_t start)
710{
711 vm_compaction_swapper_do_init();
712
713 if (start == FALSE) {
714
715 lck_mtx_lock(&vm_swap_data_lock);
716 hibernate_in_progress_with_pinned_swap = FALSE;
717 lck_mtx_unlock(&vm_swap_data_lock);
718
719 return (KERN_SUCCESS);
720 }
721 if (vm_swappin_enabled == FALSE)
722 return (KERN_SUCCESS);
723
724 lck_mtx_lock(&vm_swap_data_lock);
725
726 hibernate_in_progress_with_pinned_swap = TRUE;
727
728 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
729
730 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
731
732 lck_mtx_unlock(&vm_swap_data_lock);
733
734 thread_block(THREAD_CONTINUE_NULL);
735
736 lck_mtx_lock(&vm_swap_data_lock);
737 }
738 if (vm_num_swap_files > vm_num_pinned_swap_files) {
739 hibernate_in_progress_with_pinned_swap = FALSE;
740 lck_mtx_unlock(&vm_swap_data_lock);
741
742 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
743 vm_num_swap_files, vm_num_pinned_swap_files);
744 return (KERN_FAILURE);
745 }
746 lck_mtx_unlock(&vm_swap_data_lock);
747
748 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
749 if (vm_swap_create_file() == FALSE)
750 break;
751 }
752 return (KERN_SUCCESS);
753}
754#endif
755
fe8ab488
A
756static void
757vm_swapfile_gc_thread(void)
39037602 758
fe8ab488
A
759{
760 boolean_t need_defragment;
761 boolean_t need_reclaim;
762
763 vm_swapfile_gc_thread_awakened++;
764 vm_swapfile_gc_thread_running = 1;
765
766 while (TRUE) {
767
768 lck_mtx_lock(&vm_swap_data_lock);
769
39037602
A
770 if (hibernate_in_progress_with_pinned_swap == TRUE)
771 break;
772
fe8ab488
A
773 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE)
774 break;
775
776 need_defragment = FALSE;
777 need_reclaim = FALSE;
778
779 if (VM_SWAP_SHOULD_DEFRAGMENT())
780 need_defragment = TRUE;
781
782 if (VM_SWAP_SHOULD_RECLAIM()) {
783 need_defragment = TRUE;
784 need_reclaim = TRUE;
785 }
786 if (need_defragment == FALSE && need_reclaim == FALSE)
787 break;
788
d9a64523
A
789 vm_swap_force_defrag = FALSE;
790 vm_swap_force_reclaim = FALSE;
791
39236c6e 792 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 793
fe8ab488
A
794 if (need_defragment == TRUE)
795 vm_swap_defragment();
796 if (need_reclaim == TRUE)
797 vm_swap_reclaim();
798 }
799 vm_swapfile_gc_thread_running = 0;
39236c6e 800
39037602
A
801 if (hibernate_in_progress_with_pinned_swap == TRUE)
802 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
803
fe8ab488 804 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
39236c6e
A
805
806 lck_mtx_unlock(&vm_swap_data_lock);
807
fe8ab488 808 thread_block((thread_continue_t)vm_swapfile_gc_thread);
39236c6e
A
809
810 /* NOTREACHED */
811}
812
813
814
d9a64523
A
815#define VM_SWAPOUT_LIMIT_T2P 4
816#define VM_SWAPOUT_LIMIT_T1P 4
817#define VM_SWAPOUT_LIMIT_T0P 6
818#define VM_SWAPOUT_LIMIT_T0 8
819#define VM_SWAPOUT_LIMIT_MAX 8
820
821#define VM_SWAPOUT_START 0
822#define VM_SWAPOUT_T2_PASSIVE 1
823#define VM_SWAPOUT_T1_PASSIVE 2
824#define VM_SWAPOUT_T0_PASSIVE 3
825#define VM_SWAPOUT_T0 4
826
827int vm_swapout_state = VM_SWAPOUT_START;
828int vm_swapout_limit = 1;
829
830int vm_swapper_entered_T0 = 0;
831int vm_swapper_entered_T0P = 0;
832int vm_swapper_entered_T1P = 0;
833int vm_swapper_entered_T2P = 0;
834
39236c6e
A
835
836static void
837vm_swapout_thread_throttle_adjust(void)
838{
39236c6e 839
d9a64523
A
840 switch(vm_swapout_state) {
841
842 case VM_SWAPOUT_START:
843
844 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
845 vm_swapper_entered_T2P++;
846
847 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
848 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
849 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
850 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
851 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
852 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
853
854 break;
39236c6e 855
d9a64523 856 case VM_SWAPOUT_T2_PASSIVE:
39236c6e 857
d9a64523
A
858 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
859 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
860 vm_swapper_entered_T0P++;
39236c6e 861
d9a64523
A
862 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
863 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
864 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
865 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
866 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
867 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
39236c6e 868
39236c6e
A
869 break;
870 }
d9a64523
A
871 if (swapout_target_age || hibernate_flushing == TRUE) {
872 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
873 vm_swapper_entered_T1P++;
874
875 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
876 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
877 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
878 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
879 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
880 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
881 }
39236c6e
A
882 break;
883
d9a64523
A
884 case VM_SWAPOUT_T1_PASSIVE:
885
886 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
887 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
888 vm_swapper_entered_T0P++;
889
890 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
891 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
892 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
893 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
894 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
895 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
39236c6e 896
39236c6e
A
897 break;
898 }
d9a64523
A
899 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
900
901 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
902 vm_swapper_entered_T2P++;
903
904 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
905 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
906 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
907 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
908 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
909 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
39236c6e 910 }
d9a64523 911 break;
39236c6e 912
d9a64523
A
913 case VM_SWAPOUT_T0_PASSIVE:
914
915 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
916 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
917 vm_swapper_entered_T2P++;
918
919 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
920 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
921 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
922 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
923 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
924 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
39236c6e 925
39236c6e
A
926 break;
927 }
d9a64523
A
928 if (SWAPPER_NEEDS_TO_CATCHUP()) {
929 vm_swapper_entered_T0++;
930
931 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
932 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
933 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
934 vm_swapout_state = VM_SWAPOUT_T0;
935 }
936 break;
937
938 case VM_SWAPOUT_T0:
939
940 if (SWAPPER_HAS_CAUGHTUP()) {
941 vm_swapper_entered_T0P++;
942
943 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
944 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
945 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
946 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
39236c6e
A
947 }
948 break;
949 }
d9a64523
A
950}
951
952int vm_swapout_found_empty = 0;
953
954struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
39236c6e 955
d9a64523
A
956int vm_swapout_soc_busy = 0;
957int vm_swapout_soc_done = 0;
958
959
960static struct swapout_io_completion *
961vm_swapout_find_free_soc(void)
962{ int i;
963
964 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
965 if (vm_swapout_ctx[i].swp_io_busy == 0)
966 return (&vm_swapout_ctx[i]);
39236c6e 967 }
d9a64523
A
968 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
969
970 return NULL;
39236c6e
A
971}
972
d9a64523
A
973static struct swapout_io_completion *
974vm_swapout_find_done_soc(void)
975{ int i;
976
977 if (vm_swapout_soc_done) {
978 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
979 if (vm_swapout_ctx[i].swp_io_done)
980 return (&vm_swapout_ctx[i]);
981 }
982 }
983 return NULL;
984}
985
986static void
987vm_swapout_complete_soc(struct swapout_io_completion *soc)
988{
989 kern_return_t kr;
990
991 if (soc->swp_io_error)
992 kr = KERN_FAILURE;
993 else
994 kr = KERN_SUCCESS;
995
996 lck_mtx_unlock_always(c_list_lock);
997
998 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error);
999 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1000
1001 lck_mtx_lock_spin_always(c_list_lock);
1002
1003 soc->swp_io_done = 0;
1004 soc->swp_io_busy = 0;
1005
1006 vm_swapout_soc_busy--;
1007 vm_swapout_soc_done--;
1008}
39236c6e 1009
3e170ce0 1010
39236c6e
A
1011static void
1012vm_swapout_thread(void)
1013{
39236c6e
A
1014 uint32_t size = 0;
1015 c_segment_t c_seg = NULL;
1016 kern_return_t kr = KERN_SUCCESS;
d9a64523 1017 struct swapout_io_completion *soc;
39236c6e 1018
3e170ce0
A
1019 current_thread()->options |= TH_OPT_VMPRIV;
1020
39236c6e
A
1021 vm_swapout_thread_awakened++;
1022
1023 lck_mtx_lock_spin_always(c_list_lock);
d9a64523
A
1024again:
1025 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit) {
39236c6e
A
1026
1027 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1028
1029 lck_mtx_lock_spin_always(&c_seg->c_lock);
1030
3e170ce0 1031 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
39236c6e
A
1032
1033 if (c_seg->c_busy) {
39236c6e
A
1034 lck_mtx_unlock_always(c_list_lock);
1035
8a3053a0 1036 c_seg_wait_on_busy(c_seg);
39236c6e
A
1037
1038 lck_mtx_lock_spin_always(c_list_lock);
1039
1040 continue;
1041 }
39236c6e
A
1042 vm_swapout_thread_processed_segments++;
1043
8a3053a0
A
1044 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1045
1046 if (size == 0) {
3e170ce0
A
1047 assert(c_seg->c_bytes_used == 0);
1048
39037602
A
1049 if (!c_seg->c_on_minorcompact_q)
1050 c_seg_need_delayed_compaction(c_seg, TRUE);
1051
3e170ce0
A
1052 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1053 lck_mtx_unlock_always(&c_seg->c_lock);
1054 lck_mtx_unlock_always(c_list_lock);
1055
1056 vm_swapout_found_empty++;
1057 goto c_seg_is_empty;
8a3053a0 1058 }
fe8ab488 1059 C_SEG_BUSY(c_seg);
8a3053a0
A
1060 c_seg->c_busy_swapping = 1;
1061
d9a64523 1062 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
39236c6e 1063
d9a64523 1064 lck_mtx_unlock_always(c_list_lock);
39236c6e
A
1065 lck_mtx_unlock_always(&c_seg->c_lock);
1066
1067#if CHECKSUM_THE_SWAP
d9a64523 1068 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
39236c6e
A
1069 c_seg->cseg_swap_size = size;
1070#endif /* CHECKSUM_THE_SWAP */
1071
fe8ab488 1072#if ENCRYPTED_SWAP
39236c6e 1073 vm_swap_encrypt(c_seg);
fe8ab488 1074#endif /* ENCRYPTED_SWAP */
39236c6e 1075
d9a64523
A
1076 soc = vm_swapout_find_free_soc();
1077 assert(soc);
39236c6e 1078
d9a64523
A
1079 soc->swp_upl_ctx.io_context = (void *)soc;
1080 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1081 soc->swp_upl_ctx.io_error = 0;
39236c6e 1082
d9a64523 1083 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
39236c6e 1084
d9a64523
A
1085 if (kr != KERN_SUCCESS) {
1086 if (soc->swp_io_done) {
1087 lck_mtx_lock_spin_always(c_list_lock);
1088
1089 soc->swp_io_done = 0;
1090 vm_swapout_soc_done--;
1091
1092 lck_mtx_unlock_always(c_list_lock);
1093 }
1094 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1095 } else {
1096 soc->swp_io_busy = 1;
1097 vm_swapout_soc_busy++;
39037602 1098 }
d9a64523
A
1099 vm_swapout_thread_throttle_adjust();
1100 vm_pageout_io_throttle();
1101
1102c_seg_is_empty:
1103 if (c_swapout_count == 0)
1104 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1105
39236c6e 1106 lck_mtx_lock_spin_always(c_list_lock);
39236c6e 1107
d9a64523
A
1108 if ((soc = vm_swapout_find_done_soc()))
1109 vm_swapout_complete_soc(soc);
1110 }
1111 if ((soc = vm_swapout_find_done_soc())) {
1112 vm_swapout_complete_soc(soc);
1113 goto again;
1114 }
1115 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
39236c6e 1116
d9a64523 1117 lck_mtx_unlock_always(c_list_lock);
39236c6e 1118
d9a64523
A
1119 thread_block((thread_continue_t)vm_swapout_thread);
1120
1121 /* NOTREACHED */
1122}
39236c6e 1123
39236c6e 1124
d9a64523
A
1125void
1126vm_swapout_iodone(void *io_context, int error)
1127{
1128 struct swapout_io_completion *soc;
39037602 1129
d9a64523 1130 soc = (struct swapout_io_completion *)io_context;
39236c6e 1131
d9a64523 1132 lck_mtx_lock_spin_always(c_list_lock);
39037602 1133
d9a64523
A
1134 soc->swp_io_done = 1;
1135 soc->swp_io_error = error;
1136 vm_swapout_soc_done++;
1137
1138 thread_wakeup((event_t)&c_swapout_list_head);
1139
1140 lck_mtx_unlock_always(c_list_lock);
1141}
39236c6e 1142
39236c6e 1143
d9a64523
A
1144static void
1145vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1146{
39236c6e 1147
d9a64523
A
1148 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1149
1150 if (kr == KERN_SUCCESS) {
1151 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size, KMA_COMPRESSOR);
1152 }
1153#if ENCRYPTED_SWAP
1154 else {
1155 vm_swap_decrypt(c_seg);
39236c6e 1156 }
d9a64523
A
1157#endif /* ENCRYPTED_SWAP */
1158 lck_mtx_lock_spin_always(c_list_lock);
1159 lck_mtx_lock_spin_always(&c_seg->c_lock);
39236c6e 1160
d9a64523
A
1161 if (kr == KERN_SUCCESS) {
1162 int new_state = C_ON_SWAPPEDOUT_Q;
1163 boolean_t insert_head = FALSE;
1164
1165 if (hibernate_flushing == TRUE) {
1166 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1167 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)
1168 insert_head = TRUE;
1169 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg))
1170 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1171
1172 c_seg_switch_state(c_seg, new_state, insert_head);
1173
1174 c_seg->c_store.c_swap_handle = f_offset;
39236c6e 1175
d9a64523
A
1176 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
1177
1178 if (c_seg->c_bytes_used)
1179 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1180 } else {
1181 if (c_seg->c_overage_swap == TRUE) {
1182 c_seg->c_overage_swap = FALSE;
1183 c_overage_swapped_count--;
1184 }
1185 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1186
1187 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE)
1188 c_seg_need_delayed_compaction(c_seg, TRUE);
1189 }
1190 assert(c_seg->c_busy_swapping);
1191 assert(c_seg->c_busy);
1192
1193 c_seg->c_busy_swapping = 0;
39236c6e
A
1194 lck_mtx_unlock_always(c_list_lock);
1195
d9a64523
A
1196 C_SEG_WAKEUP_DONE(c_seg);
1197 lck_mtx_unlock_always(&c_seg->c_lock);
1198
1199 PAGE_REPLACEMENT_DISALLOWED(FALSE);
39236c6e
A
1200}
1201
d9a64523 1202
39236c6e
A
1203boolean_t
1204vm_swap_create_file()
1205{
1206 uint64_t size = 0;
1207 int namelen = 0;
1208 boolean_t swap_file_created = FALSE;
1209 boolean_t swap_file_reuse = FALSE;
3e170ce0 1210 boolean_t swap_file_pin = FALSE;
39236c6e
A
1211 struct swapfile *swf = NULL;
1212
39037602
A
1213 /*
1214 * make sure we've got all the info we need
1215 * to potentially pin a swap file... we could
1216 * be swapping out due to hibernation w/o ever
1217 * having run vm_pageout_scan, which is normally
1218 * the trigger to do the init
1219 */
1220 vm_compaction_swapper_do_init();
1221
39236c6e
A
1222 /*
1223 * Any swapfile structure ready for re-use?
1224 */
1225
1226 lck_mtx_lock(&vm_swap_data_lock);
1227
1228 swf = (struct swapfile*) queue_first(&swf_global_queue);
1229
1230 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1231 if (swf->swp_flags == SWAP_REUSE) {
1232 swap_file_reuse = TRUE;
1233 break;
1234 }
1235 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1236 }
1237
1238 lck_mtx_unlock(&vm_swap_data_lock);
1239
1240 if (swap_file_reuse == FALSE) {
1241
fe8ab488 1242 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
39236c6e
A
1243
1244 swf = (struct swapfile*) kalloc(sizeof *swf);
1245 memset(swf, 0, sizeof(*swf));
1246
1247 swf->swp_index = vm_num_swap_files + 1;
1248 swf->swp_pathlen = namelen;
1249 swf->swp_path = (char*)kalloc(swf->swp_pathlen);
1250
1251 memset(swf->swp_path, 0, namelen);
1252
fe8ab488 1253 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
39236c6e
A
1254 }
1255
1256 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1257
1258 if (swf->swp_vp == NULL) {
1259 if (swap_file_reuse == FALSE) {
1260 kfree(swf->swp_path, swf->swp_pathlen);
1261 kfree(swf, sizeof *swf);
1262 }
1263 return FALSE;
1264 }
fe8ab488
A
1265 vm_swapfile_can_be_created = TRUE;
1266
39236c6e
A
1267 size = MAX_SWAP_FILE_SIZE;
1268
1269 while (size >= MIN_SWAP_FILE_SIZE) {
1270
3e170ce0
A
1271 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1272
1273 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
39236c6e
A
1274
1275 int num_bytes_for_bitmap = 0;
1276
1277 swap_file_created = TRUE;
1278
1279 swf->swp_size = size;
1280 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
1281 swf->swp_nseginuse = 0;
1282 swf->swp_free_hint = 0;
1283
1284 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1);
1285 /*
1286 * Allocate a bitmap that describes the
1287 * number of segments held by this swapfile.
1288 */
1289 swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap);
1290 memset(swf->swp_bitmap, 0, num_bytes_for_bitmap);
1291
1292 swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t));
1293 memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t)));
1294
1295 /*
1296 * passing a NULL trim_list into vnode_trim_list
1297 * will return ENOTSUP if trim isn't supported
1298 * and 0 if it is
1299 */
fe8ab488
A
1300 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0)
1301 swp_trim_supported = TRUE;
39236c6e
A
1302
1303 lck_mtx_lock(&vm_swap_data_lock);
1304
1305 swf->swp_flags = SWAP_READY;
1306
1307 if (swap_file_reuse == FALSE) {
1308 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1309 }
1310
1311 vm_num_swap_files++;
1312
1313 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1314
3e170ce0 1315 if (swap_file_pin == TRUE) {
39037602 1316 vm_num_pinned_swap_files++;
3e170ce0
A
1317 swf->swp_flags |= SWAP_PINNED;
1318 vm_swappin_avail -= swf->swp_size;
1319 }
1320
39236c6e
A
1321 lck_mtx_unlock(&vm_swap_data_lock);
1322
1323 thread_wakeup((event_t) &vm_num_swap_files);
5ba3f43e
A
1324#if CONFIG_EMBEDDED
1325 if (vm_num_swap_files == 1) {
1326
1327 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
1328
1329 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE)
1330 c_overage_swapped_limit /= 2;
1331 }
1332#endif
39236c6e
A
1333 break;
1334 } else {
1335
1336 size = size / 2;
1337 }
1338 }
1339 if (swap_file_created == FALSE) {
1340
1341 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1342
1343 swf->swp_vp = NULL;
1344
1345 if (swap_file_reuse == FALSE) {
1346 kfree(swf->swp_path, swf->swp_pathlen);
1347 kfree(swf, sizeof *swf);
1348 }
1349 }
1350 return swap_file_created;
1351}
1352
1353
1354kern_return_t
39037602 1355vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
39236c6e
A
1356{
1357 struct swapfile *swf = NULL;
1358 uint64_t file_offset = 0;
fe8ab488 1359 int retval = 0;
39236c6e 1360
39037602 1361 assert(c_seg->c_store.c_buffer);
39236c6e
A
1362
1363 lck_mtx_lock(&vm_swap_data_lock);
1364
1365 swf = vm_swapfile_for_handle(f_offset);
1366
fe8ab488
A
1367 if (swf == NULL || ( !(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1368 retval = 1;
1369 goto done;
39236c6e 1370 }
fe8ab488
A
1371 swf->swp_io_count++;
1372
1373 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 1374
39037602
A
1375#if DEVELOPMENT || DEBUG
1376 C_SEG_MAKE_WRITEABLE(c_seg);
1377#endif
fe8ab488 1378 file_offset = (f_offset & SWAP_SLOT_MASK);
d9a64523 1379 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
39236c6e 1380
39037602
A
1381#if DEVELOPMENT || DEBUG
1382 C_SEG_WRITE_PROTECT(c_seg);
1383#endif
fe8ab488
A
1384 if (retval == 0)
1385 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1386 else
1387 vm_swap_get_failures++;
1388
39236c6e
A
1389 /*
1390 * Free this slot in the swap structure.
1391 */
1392 vm_swap_free(f_offset);
1393
1394 lck_mtx_lock(&vm_swap_data_lock);
1395 swf->swp_io_count--;
1396
1397 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1398
1399 swf->swp_flags &= ~SWAP_WANTED;
1400 thread_wakeup((event_t) &swf->swp_flags);
1401 }
fe8ab488 1402done:
39236c6e
A
1403 lck_mtx_unlock(&vm_swap_data_lock);
1404
1405 if (retval == 0)
1406 return KERN_SUCCESS;
fe8ab488 1407 else
39236c6e 1408 return KERN_FAILURE;
39236c6e
A
1409}
1410
1411kern_return_t
d9a64523 1412vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
39236c6e
A
1413{
1414 unsigned int segidx = 0;
1415 struct swapfile *swf = NULL;
1416 uint64_t file_offset = 0;
1417 uint64_t swapfile_index = 0;
1418 unsigned int byte_for_segidx = 0;
1419 unsigned int offset_within_byte = 0;
1420 boolean_t swf_eligible = FALSE;
1421 boolean_t waiting = FALSE;
fe8ab488 1422 boolean_t retried = FALSE;
39236c6e
A
1423 int error = 0;
1424 clock_sec_t sec;
1425 clock_nsec_t nsec;
d9a64523 1426 void *upl_ctx = NULL;
39236c6e
A
1427
1428 if (addr == 0 || f_offset == NULL) {
1429 return KERN_FAILURE;
1430 }
fe8ab488 1431retry:
39236c6e
A
1432 lck_mtx_lock(&vm_swap_data_lock);
1433
1434 swf = (struct swapfile*) queue_first(&swf_global_queue);
1435
1436 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1437
1438 segidx = swf->swp_free_hint;
1439
1440 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1441
1442 if (swf_eligible) {
1443
1444 while(segidx < swf->swp_nsegs) {
1445
1446 byte_for_segidx = segidx >> 3;
1447 offset_within_byte = segidx % 8;
1448
1449 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1450 segidx++;
1451 continue;
1452 }
1453
1454 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1455
1456 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1457 swf->swp_nseginuse++;
1458 swf->swp_io_count++;
d9a64523 1459 swf->swp_csegs[segidx] = c_seg;
39236c6e 1460
d9a64523 1461 swapfile_index = swf->swp_index;
39236c6e
A
1462 vm_swapfile_total_segs_used++;
1463
1464 clock_get_system_nanotime(&sec, &nsec);
1465
fe8ab488
A
1466 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1467 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1468
1469 lck_mtx_unlock(&vm_swap_data_lock);
1470
d9a64523 1471 goto issue_io;
39236c6e
A
1472 }
1473 }
1474 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1475 }
1476 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1477
1478 /*
1479 * we've run out of swap segments, but may not
1480 * be in a position to immediately create a new swap
1481 * file if we've recently failed to create due to a lack
1482 * of free space in the root filesystem... we'll try
1483 * to kick that create off, but in any event we're going
1484 * to take a breather (up to 1 second) so that we're not caught in a tight
1485 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1486 * segments into swap files only to have them immediately put back
1487 * on the c_age queue due to vm_swap_put failing.
1488 *
1489 * if we're doing these puts due to a hibernation flush,
1490 * no need to block... setting hibernate_no_swapspace to TRUE,
1491 * will cause "vm_compressor_compact_and_swap" to immediately abort
1492 */
1493 clock_get_system_nanotime(&sec, &nsec);
1494
fe8ab488
A
1495 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running)
1496 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1497
1498 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1499 waiting = TRUE;
1500 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
1501 } else
1502 hibernate_no_swapspace = TRUE;
1503
1504 lck_mtx_unlock(&vm_swap_data_lock);
1505
fe8ab488 1506 if (waiting == TRUE) {
39236c6e
A
1507 thread_block(THREAD_CONTINUE_NULL);
1508
fe8ab488
A
1509 if (retried == FALSE && hibernate_flushing == TRUE) {
1510 retried = TRUE;
1511 goto retry;
1512 }
1513 }
3e170ce0 1514 vm_swap_put_failures++;
fe8ab488 1515
39236c6e
A
1516 return KERN_FAILURE;
1517
d9a64523 1518issue_io:
a39ff7e2
A
1519 assert(c_seg->c_busy_swapping);
1520 assert(c_seg->c_busy);
1521 assert(!c_seg->c_on_minorcompact_q);
1522
d9a64523
A
1523 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1524
1525 if (soc) {
1526 soc->swp_c_seg = c_seg;
1527 soc->swp_c_size = size;
39236c6e 1528
d9a64523 1529 soc->swp_swf = swf;
39236c6e 1530
d9a64523
A
1531 soc->swp_io_error = 0;
1532 soc->swp_io_done = 0;
39236c6e 1533
d9a64523
A
1534 upl_ctx = (void *)&soc->swp_upl_ctx;
1535 }
1536 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
39236c6e 1537
d9a64523
A
1538 if (error || upl_ctx == NULL)
1539 return (vm_swap_put_finish(swf, f_offset, error));
1540
1541 return KERN_SUCCESS;
1542}
1543
1544kern_return_t
1545vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error)
1546{
1547 lck_mtx_lock(&vm_swap_data_lock);
1548
1549 swf->swp_io_count--;
39236c6e
A
1550
1551 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1552
1553 swf->swp_flags &= ~SWAP_WANTED;
1554 thread_wakeup((event_t) &swf->swp_flags);
1555 }
39236c6e
A
1556 lck_mtx_unlock(&vm_swap_data_lock);
1557
39236c6e
A
1558 if (error) {
1559 vm_swap_free(*f_offset);
3e170ce0
A
1560 vm_swap_put_failures++;
1561
39236c6e
A
1562 return KERN_FAILURE;
1563 }
1564 return KERN_SUCCESS;
1565}
1566
1567
39236c6e
A
1568static void
1569vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1570{
1571 uint64_t file_offset = 0;
1572 unsigned int segidx = 0;
1573
1574
1575 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1576
1577 unsigned int byte_for_segidx = 0;
1578 unsigned int offset_within_byte = 0;
1579
1580 file_offset = (f_offset & SWAP_SLOT_MASK);
1581 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1582
1583 byte_for_segidx = segidx >> 3;
1584 offset_within_byte = segidx % 8;
1585
1586 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1587
1588 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1589
1590 swf->swp_csegs[segidx] = NULL;
1591
1592 swf->swp_nseginuse--;
1593 vm_swapfile_total_segs_used--;
1594
1595 if (segidx < swf->swp_free_hint) {
1596 swf->swp_free_hint = segidx;
1597 }
1598 }
fe8ab488
A
1599 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1600 thread_wakeup((event_t) &vm_swapfile_gc_needed);
39236c6e 1601 }
39236c6e
A
1602}
1603
1604
1605uint32_t vm_swap_free_now_count = 0;
1606uint32_t vm_swap_free_delayed_count = 0;
1607
1608
1609void
1610vm_swap_free(uint64_t f_offset)
1611{
1612 struct swapfile *swf = NULL;
fe8ab488 1613 struct trim_list *tl = NULL;
39236c6e
A
1614 clock_sec_t sec;
1615 clock_nsec_t nsec;
1616
fe8ab488
A
1617 if (swp_trim_supported == TRUE)
1618 tl = kalloc(sizeof(struct trim_list));
1619
39236c6e
A
1620 lck_mtx_lock(&vm_swap_data_lock);
1621
1622 swf = vm_swapfile_for_handle(f_offset);
1623
1624 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1625
fe8ab488 1626 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
39236c6e
A
1627 /*
1628 * don't delay the free if the underlying disk doesn't support
1629 * trim, or we're in the midst of reclaiming this swap file since
1630 * we don't want to move segments that are technically free
1631 * but not yet handled by the delayed free mechanism
1632 */
1633 vm_swap_free_now(swf, f_offset);
1634
1635 vm_swap_free_now_count++;
fe8ab488 1636 goto done;
39236c6e 1637 }
39236c6e
A
1638 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1639 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1640
1641 tl->tl_next = swf->swp_delayed_trim_list_head;
1642 swf->swp_delayed_trim_list_head = tl;
1643 swf->swp_delayed_trim_count++;
fe8ab488 1644 tl = NULL;
39236c6e 1645
fe8ab488 1646 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
39236c6e
A
1647 clock_get_system_nanotime(&sec, &nsec);
1648
1649 if (sec > dont_trim_until_ts)
fe8ab488 1650 thread_wakeup((event_t) &vm_swapfile_create_needed);
39236c6e
A
1651 }
1652 vm_swap_free_delayed_count++;
1653 }
fe8ab488 1654done:
39236c6e 1655 lck_mtx_unlock(&vm_swap_data_lock);
fe8ab488
A
1656
1657 if (tl != NULL)
1658 kfree(tl, sizeof(struct trim_list));
39236c6e
A
1659}
1660
1661
fe8ab488
A
1662static void
1663vm_swap_wait_on_trim_handling_in_progress()
1664{
1665 while (delayed_trim_handling_in_progress == TRUE) {
1666
1667 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1668 lck_mtx_unlock(&vm_swap_data_lock);
1669
1670 thread_block(THREAD_CONTINUE_NULL);
1671
1672 lck_mtx_lock(&vm_swap_data_lock);
1673 }
1674}
1675
1676
39236c6e
A
1677static void
1678vm_swap_handle_delayed_trims(boolean_t force_now)
1679{
1680 struct swapfile *swf = NULL;
1681
1682 /*
fe8ab488
A
1683 * serialize the race between us and vm_swap_reclaim...
1684 * if vm_swap_reclaim wins it will turn off SWAP_READY
1685 * on the victim it has chosen... we can just skip over
1686 * that file since vm_swap_reclaim will first process
1687 * all of the delayed trims associated with it
1688 */
1689 lck_mtx_lock(&vm_swap_data_lock);
1690
1691 delayed_trim_handling_in_progress = TRUE;
1692
1693 lck_mtx_unlock(&vm_swap_data_lock);
1694
1695 /*
1696 * no need to hold the lock to walk the swf list since
1697 * vm_swap_create (the only place where we add to this list)
1698 * is run on the same thread as this function
1699 * and vm_swap_reclaim doesn't remove items from this list
1700 * instead marking them with SWAP_REUSE for future re-use
39236c6e
A
1701 */
1702 swf = (struct swapfile*) queue_first(&swf_global_queue);
1703
1704 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1705
fe8ab488 1706 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
39236c6e 1707
fe8ab488 1708 assert(!(swf->swp_flags & SWAP_RECLAIM));
39236c6e 1709 vm_swap_do_delayed_trim(swf);
fe8ab488 1710 }
39236c6e
A
1711 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1712 }
fe8ab488
A
1713 lck_mtx_lock(&vm_swap_data_lock);
1714
1715 delayed_trim_handling_in_progress = FALSE;
1716 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
39236c6e 1717
fe8ab488
A
1718 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running)
1719 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1720
1721 lck_mtx_unlock(&vm_swap_data_lock);
1722
1723}
39236c6e
A
1724
1725static void
1726vm_swap_do_delayed_trim(struct swapfile *swf)
1727{
1728 struct trim_list *tl, *tl_head;
1729
1730 lck_mtx_lock(&vm_swap_data_lock);
1731
1732 tl_head = swf->swp_delayed_trim_list_head;
1733 swf->swp_delayed_trim_list_head = NULL;
1734 swf->swp_delayed_trim_count = 0;
1735
1736 lck_mtx_unlock(&vm_swap_data_lock);
1737
fe8ab488 1738 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
39236c6e
A
1739
1740 while ((tl = tl_head) != NULL) {
1741 unsigned int segidx = 0;
1742 unsigned int byte_for_segidx = 0;
1743 unsigned int offset_within_byte = 0;
1744
1745 lck_mtx_lock(&vm_swap_data_lock);
1746
1747 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1748
1749 byte_for_segidx = segidx >> 3;
1750 offset_within_byte = segidx % 8;
1751
1752 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1753
1754 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1755
1756 swf->swp_csegs[segidx] = NULL;
1757
1758 swf->swp_nseginuse--;
1759 vm_swapfile_total_segs_used--;
1760
1761 if (segidx < swf->swp_free_hint) {
1762 swf->swp_free_hint = segidx;
1763 }
1764 }
1765 lck_mtx_unlock(&vm_swap_data_lock);
1766
1767 tl_head = tl->tl_next;
1768
1769 kfree(tl, sizeof(struct trim_list));
1770 }
1771}
1772
1773
1774void
1775vm_swap_flush()
1776{
1777 return;
1778}
1779
1780int vm_swap_reclaim_yielded = 0;
1781
1782void
1783vm_swap_reclaim(void)
1784{
1785 vm_offset_t addr = 0;
1786 unsigned int segidx = 0;
1787 uint64_t f_offset = 0;
1788 struct swapfile *swf = NULL;
1789 struct swapfile *smallest_swf = NULL;
1790 unsigned int min_nsegs = 0;
1791 unsigned int byte_for_segidx = 0;
1792 unsigned int offset_within_byte = 0;
1793 uint32_t c_size = 0;
1794
1795 c_segment_t c_seg = NULL;
1796
39037602 1797 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
39236c6e
A
1798 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1799 }
1800
1801 lck_mtx_lock(&vm_swap_data_lock);
1802
fe8ab488
A
1803 /*
1804 * if we're running the swapfile list looking for
1805 * candidates with delayed trims, we need to
1806 * wait before making our decision concerning
1807 * the swapfile we want to reclaim
1808 */
1809 vm_swap_wait_on_trim_handling_in_progress();
1810
1811 /*
1812 * from here until we knock down the SWAP_READY bit,
1813 * we need to remain behind the vm_swap_data_lock...
1814 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1815 * will not consider this swapfile for processing
1816 */
39236c6e
A
1817 swf = (struct swapfile*) queue_first(&swf_global_queue);
1818 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1819 smallest_swf = NULL;
1820
1821 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1822
1823 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1824
1825 smallest_swf = swf;
1826 min_nsegs = swf->swp_nseginuse;
1827 }
1828 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1829 }
1830
1831 if (smallest_swf == NULL)
1832 goto done;
1833
1834 swf = smallest_swf;
1835
1836
1837 swf->swp_flags &= ~SWAP_READY;
1838 swf->swp_flags |= SWAP_RECLAIM;
1839
1840 if (swf->swp_delayed_trim_count) {
1841
1842 lck_mtx_unlock(&vm_swap_data_lock);
1843
1844 vm_swap_do_delayed_trim(swf);
1845
1846 lck_mtx_lock(&vm_swap_data_lock);
1847 }
1848 segidx = 0;
1849
1850 while (segidx < swf->swp_nsegs) {
1851
1852ReTry_for_cseg:
39236c6e
A
1853 /*
1854 * Wait for outgoing I/Os.
1855 */
1856 while (swf->swp_io_count) {
1857
1858 swf->swp_flags |= SWAP_WANTED;
1859
1860 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1861 lck_mtx_unlock(&vm_swap_data_lock);
1862
1863 thread_block(THREAD_CONTINUE_NULL);
1864
1865 lck_mtx_lock(&vm_swap_data_lock);
1866 }
fe8ab488
A
1867 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1868 vm_swap_reclaim_yielded++;
1869 break;
1870 }
39236c6e
A
1871
1872 byte_for_segidx = segidx >> 3;
1873 offset_within_byte = segidx % 8;
1874
1875 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
1876
1877 segidx++;
1878 continue;
1879 }
1880
1881 c_seg = swf->swp_csegs[segidx];
3e170ce0 1882 assert(c_seg);
39236c6e
A
1883
1884 lck_mtx_lock_spin_always(&c_seg->c_lock);
1885
39236c6e 1886 if (c_seg->c_busy) {
3e170ce0
A
1887 /*
1888 * a swapped out c_segment in the process of being freed will remain in the
1889 * busy state until after the vm_swap_free is called on it... vm_swap_free
1890 * takes the vm_swap_data_lock, so can't change the swap state until after
1891 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
1892 * which will allow c_seg_free_locked to clear busy and wake up this thread...
1893 * at that point, we re-look up the swap state which will now indicate that
1894 * this c_segment no longer exists.
1895 */
39236c6e
A
1896 c_seg->c_wanted = 1;
1897
1898 assert_wait((event_t) (c_seg), THREAD_UNINT);
1899 lck_mtx_unlock_always(&c_seg->c_lock);
1900
1901 lck_mtx_unlock(&vm_swap_data_lock);
1902
1903 thread_block(THREAD_CONTINUE_NULL);
1904
1905 lck_mtx_lock(&vm_swap_data_lock);
1906
1907 goto ReTry_for_cseg;
1908 }
1909 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1910
1911 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
3e170ce0
A
1912
1913 assert(c_seg == swf->swp_csegs[segidx]);
39236c6e
A
1914 swf->swp_csegs[segidx] = NULL;
1915 swf->swp_nseginuse--;
1916
1917 vm_swapfile_total_segs_used--;
1918
1919 lck_mtx_unlock(&vm_swap_data_lock);
39236c6e 1920
3e170ce0
A
1921 assert(C_SEG_IS_ONDISK(c_seg));
1922
1923 C_SEG_BUSY(c_seg);
1924 c_seg->c_busy_swapping = 1;
39236c6e 1925#if !CHECKSUM_THE_SWAP
3e170ce0 1926 c_seg_trim_tail(c_seg);
39236c6e 1927#endif
3e170ce0 1928 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
39236c6e 1929
3e170ce0 1930 assert(c_size <= C_SEG_BUFSIZE && c_size);
39236c6e 1931
3e170ce0 1932 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e 1933
d9a64523 1934 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
39236c6e 1935
3e170ce0
A
1936 /*
1937 * reading the data back in failed, so convert c_seg
1938 * to a swapped in c_segment that contains no data
1939 */
39037602 1940 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
3e170ce0
A
1941 /*
1942 * returns with c_busy_swapping cleared
1943 */
39236c6e 1944
3e170ce0
A
1945 vm_swap_get_failures++;
1946 goto swap_io_failed;
1947 }
1948 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
39236c6e 1949
d9a64523 1950 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
3e170ce0 1951 vm_offset_t c_buffer;
39236c6e 1952
3e170ce0
A
1953 /*
1954 * the put failed, so convert c_seg to a fully swapped in c_segment
1955 * with valid data
1956 */
1957 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
fe8ab488 1958
39037602 1959 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
39236c6e 1960
3e170ce0 1961 memcpy((char *)c_buffer, (char *)addr, c_size);
39236c6e 1962
3e170ce0 1963 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
fe8ab488 1964#if ENCRYPTED_SWAP
3e170ce0 1965 vm_swap_decrypt(c_seg);
fe8ab488 1966#endif /* ENCRYPTED_SWAP */
39037602 1967 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
3e170ce0
A
1968 /*
1969 * returns with c_busy_swapping cleared
1970 */
1971 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
39236c6e 1972
3e170ce0
A
1973 goto swap_io_failed;
1974 }
1975 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
39236c6e 1976
3e170ce0 1977 lck_mtx_lock_spin_always(&c_seg->c_lock);
39236c6e 1978
3e170ce0
A
1979 assert(C_SEG_IS_ONDISK(c_seg));
1980 /*
1981 * The c_seg will now know about the new location on disk.
1982 */
1983 c_seg->c_store.c_swap_handle = f_offset;
39037602
A
1984
1985 assert(c_seg->c_busy_swapping);
3e170ce0 1986 c_seg->c_busy_swapping = 0;
39236c6e 1987swap_io_failed:
39037602 1988 assert(c_seg->c_busy);
3e170ce0 1989 C_SEG_WAKEUP_DONE(c_seg);
39236c6e 1990
3e170ce0 1991 lck_mtx_unlock_always(&c_seg->c_lock);
39236c6e
A
1992 lck_mtx_lock(&vm_swap_data_lock);
1993 }
1994
1995 if (swf->swp_nseginuse) {
1996
1997 swf->swp_flags &= ~SWAP_RECLAIM;
1998 swf->swp_flags |= SWAP_READY;
1999
2000 goto done;
2001 }
2002 /*
2003 * We don't remove this inactive swf from the queue.
2004 * That way, we can re-use it when needed again and
fe8ab488
A
2005 * preserve the namespace. The delayed_trim processing
2006 * is also dependent on us not removing swfs from the queue.
39236c6e
A
2007 */
2008 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2009
2010 vm_num_swap_files--;
2011
2012 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2013
2014 lck_mtx_unlock(&vm_swap_data_lock);
2015
2016 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2017
2018 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
2019 kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1));
2020
2021 lck_mtx_lock(&vm_swap_data_lock);
2022
3e170ce0 2023 if (swf->swp_flags & SWAP_PINNED) {
39037602 2024 vm_num_pinned_swap_files--;
3e170ce0
A
2025 vm_swappin_avail += swf->swp_size;
2026 }
2027
39236c6e
A
2028 swf->swp_vp = NULL;
2029 swf->swp_size = 0;
2030 swf->swp_free_hint = 0;
2031 swf->swp_nsegs = 0;
2032 swf->swp_flags = SWAP_REUSE;
2033
39236c6e 2034done:
fe8ab488 2035 thread_wakeup((event_t) &swf->swp_flags);
39236c6e
A
2036 lck_mtx_unlock(&vm_swap_data_lock);
2037
39037602 2038 kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
39236c6e
A
2039}
2040
2041
2042uint64_t
2043vm_swap_get_total_space(void)
2044{
2045 uint64_t total_space = 0;
2046
2047 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
2048
2049 return total_space;
2050}
2051
2052uint64_t
2053vm_swap_get_used_space(void)
2054{
2055 uint64_t used_space = 0;
2056
2057 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
2058
2059 return used_space;
2060}
2061
2062uint64_t
2063vm_swap_get_free_space(void)
2064{
2065 return (vm_swap_get_total_space() - vm_swap_get_used_space());
2066}
fe8ab488
A
2067
2068
2069int
2070vm_swap_low_on_space(void)
2071{
2072
2073 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE)
2074 return (0);
2075
2076 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
2077
2078 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE())
2079 return (0);
2080
2081 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts)
2082 return (1);
2083 }
2084 return (0);
2085}
39037602
A
2086
2087boolean_t
2088vm_swap_files_pinned(void)
2089{
2090 boolean_t result;
2091
2092 if (vm_swappin_enabled == FALSE)
d9a64523 2093 return (TRUE);
39037602
A
2094
2095 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2096
2097 return (result);
2098}
d9a64523
A
2099
2100#if CONFIG_FREEZE
2101boolean_t
2102vm_swap_max_budget(uint64_t *freeze_daily_budget)
2103{
2104 boolean_t use_device_value = FALSE;
2105 struct swapfile *swf = NULL;
2106
2107 if (vm_num_swap_files) {
2108 lck_mtx_lock(&vm_swap_data_lock);
2109
2110 swf = (struct swapfile*) queue_first(&swf_global_queue);
2111
2112 if (swf) {
2113 while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2114
2115 if (swf->swp_flags == SWAP_READY) {
2116
2117 assert(swf->swp_vp);
2118
2119 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2120 use_device_value = TRUE;
2121 }
2122 break;
2123 }
2124 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2125 }
2126 }
2127
2128 lck_mtx_unlock(&vm_swap_data_lock);
2129
2130 } else {
2131
2132 /*
2133 * This block is used for the initial budget value before any swap files
2134 * are created. We create a temp swap file to get the budget.
2135 */
2136
2137 struct vnode *temp_vp = NULL;
2138
2139 vm_swapfile_open(swapfilename, &temp_vp);
2140
2141 if (temp_vp) {
2142
2143 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2144 use_device_value = TRUE;
2145 }
2146
2147 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2148 temp_vp = NULL;
2149 } else {
2150 *freeze_daily_budget = 0;
2151 }
2152 }
2153
2154 return use_device_value;
2155}
2156#endif /* CONFIG_FREEZE */