]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_compressor_backing_store.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_backing_store.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include "vm_compressor_backing_store.h"
30 #include <vm/vm_pageout.h>
31 #include <vm/vm_protos.h>
32
33 #include <IOKit/IOHibernatePrivate.h>
34
35 #include <kern/policy_internal.h>
36
37 LCK_GRP_DECLARE(vm_swap_data_lock_grp, "vm_swap_data");
38 LCK_MTX_EARLY_DECLARE(vm_swap_data_lock, &vm_swap_data_lock_grp);
39
40 #if defined(XNU_TARGET_OS_OSX)
41 /*
42 * launchd explicitly turns ON swap later during boot on macOS devices.
43 */
44 boolean_t compressor_store_stop_compaction = TRUE;
45 #else
46 boolean_t compressor_store_stop_compaction = FALSE;
47 #endif
48
49 boolean_t vm_swapfile_create_needed = FALSE;
50 boolean_t vm_swapfile_gc_needed = FALSE;
51
52 int vm_swapper_throttle = -1;
53 uint64_t vm_swapout_thread_id;
54
55 uint64_t vm_swap_put_failures = 0; /* Likely failed I/O. Data is still in memory. */
56 uint64_t vm_swap_get_failures = 0; /* Fatal */
57 uint64_t vm_swap_put_failures_no_swap_file = 0; /* Possibly not fatal because we might just need a new swapfile. */
58 int vm_num_swap_files_config = 0;
59 int vm_num_swap_files = 0;
60 int vm_num_pinned_swap_files = 0;
61 int vm_swapout_thread_processed_segments = 0;
62 int vm_swapout_thread_awakened = 0;
63 bool vm_swapout_thread_running = FALSE;
64 int vm_swapfile_create_thread_awakened = 0;
65 int vm_swapfile_create_thread_running = 0;
66 int vm_swapfile_gc_thread_awakened = 0;
67 int vm_swapfile_gc_thread_running = 0;
68
69 int64_t vm_swappin_avail = 0;
70 boolean_t vm_swappin_enabled = FALSE;
71 unsigned int vm_swapfile_total_segs_alloced = 0;
72 unsigned int vm_swapfile_total_segs_used = 0;
73
74 char swapfilename[MAX_SWAPFILENAME_LEN + 1] = SWAP_FILE_NAME;
75
76 extern vm_map_t compressor_map;
77
78
79 #define SWAP_READY 0x1 /* Swap file is ready to be used */
80 #define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */
81 #define SWAP_WANTED 0x4 /* Swap file has waiters */
82 #define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/
83 #define SWAP_PINNED 0x10 /* Swap file is pinned (FusionDrive) */
84
85
86 struct swapfile {
87 queue_head_t swp_queue; /* list of swap files */
88 char *swp_path; /* saved pathname of swap file */
89 struct vnode *swp_vp; /* backing vnode */
90 uint64_t swp_size; /* size of this swap file */
91 uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */
92 unsigned int swp_pathlen; /* length of pathname */
93 unsigned int swp_nsegs; /* #segments we can use */
94 unsigned int swp_nseginuse; /* #segments in use */
95 unsigned int swp_index; /* index of this swap file */
96 unsigned int swp_flags; /* state of swap file */
97 unsigned int swp_free_hint; /* offset of 1st free chunk */
98 unsigned int swp_io_count; /* count of outstanding I/Os */
99 c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */
100
101 struct trim_list *swp_delayed_trim_list_head;
102 unsigned int swp_delayed_trim_count;
103 };
104
105 queue_head_t swf_global_queue;
106 boolean_t swp_trim_supported = FALSE;
107
108 extern clock_sec_t dont_trim_until_ts;
109 clock_sec_t vm_swapfile_last_failed_to_create_ts = 0;
110 clock_sec_t vm_swapfile_last_successful_create_ts = 0;
111 int vm_swapfile_can_be_created = FALSE;
112 boolean_t delayed_trim_handling_in_progress = FALSE;
113
114 boolean_t hibernate_in_progress_with_pinned_swap = FALSE;
115
116 static void vm_swapout_thread_throttle_adjust(void);
117 static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset);
118 static void vm_swapout_thread(void);
119 static void vm_swapfile_create_thread(void);
120 static void vm_swapfile_gc_thread(void);
121 static void vm_swap_defragment(void);
122 static void vm_swap_handle_delayed_trims(boolean_t);
123 static void vm_swap_do_delayed_trim(struct swapfile *);
124 static void vm_swap_wait_on_trim_handling_in_progress(void);
125
126 extern int vnode_getwithref(struct vnode* vp);
127
128 boolean_t vm_swap_force_defrag = FALSE, vm_swap_force_reclaim = FALSE;
129
130 #if CONFIG_EMBEDDED
131
132 /*
133 * For CONFIG_FREEZE, we scale the c_segments_limit based on the
134 * number of swapfiles allowed. That increases wired memory overhead.
135 * So we want to keep the max swapfiles same on both DEV/RELEASE so
136 * that the memory overhead is similar for performance comparisons.
137 */
138 #define VM_MAX_SWAP_FILE_NUM 5
139
140 #define VM_SWAPFILE_DELAYED_TRIM_MAX 4
141
142 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 16))) ? 1 : 0)
143 #define VM_SWAP_SHOULD_PIN(_size) FALSE
144 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
145 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
146 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
147
148 #else /* CONFIG_EMBEDDED */
149
150 #define VM_MAX_SWAP_FILE_NUM 100
151 #define VM_SWAPFILE_DELAYED_TRIM_MAX 128
152
153 #define VM_SWAP_SHOULD_DEFRAGMENT() (((vm_swap_force_defrag == TRUE) || (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4))) ? 1 : 0)
154 #define VM_SWAP_SHOULD_PIN(_size) (vm_swappin_avail > 0 && vm_swappin_avail >= (int64_t)(_size))
155 #define VM_SWAP_SHOULD_CREATE(cur_ts) ((vm_num_swap_files < vm_num_swap_files_config) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \
156 ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0)
157 #define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0)
158
159 #endif /* CONFIG_EMBEDDED */
160
161 #define VM_SWAP_SHOULD_RECLAIM() (((vm_swap_force_reclaim == TRUE) || ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS)) ? 1 : 0)
162 #define VM_SWAP_SHOULD_ABORT_RECLAIM() (((vm_swap_force_reclaim == FALSE) && ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) <= SWAPFILE_RECLAIM_MINIMUM_SEGS)) ? 1 : 0)
163 #define VM_SWAPFILE_DELAYED_CREATE 15
164
165 #define VM_SWAP_BUSY() ((c_swapout_count && (vm_swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0)
166
167
168 #if CHECKSUM_THE_SWAP
169 extern unsigned int hash_string(char *cp, int len);
170 #endif
171
172 #if RECORD_THE_COMPRESSED_DATA
173 boolean_t c_compressed_record_init_done = FALSE;
174 int c_compressed_record_write_error = 0;
175 struct vnode *c_compressed_record_vp = NULL;
176 uint64_t c_compressed_record_file_offset = 0;
177 void c_compressed_record_init(void);
178 void c_compressed_record_write(char *, int);
179 #endif
180
181 extern void vm_pageout_io_throttle(void);
182
183 static struct swapfile *vm_swapfile_for_handle(uint64_t);
184
185 /*
186 * Called with the vm_swap_data_lock held.
187 */
188
189 static struct swapfile *
190 vm_swapfile_for_handle(uint64_t f_offset)
191 {
192 uint64_t file_offset = 0;
193 unsigned int swapfile_index = 0;
194 struct swapfile* swf = NULL;
195
196 file_offset = (f_offset & SWAP_SLOT_MASK);
197 swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT);
198
199 swf = (struct swapfile*) queue_first(&swf_global_queue);
200
201 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
202 if (swapfile_index == swf->swp_index) {
203 break;
204 }
205
206 swf = (struct swapfile*) queue_next(&swf->swp_queue);
207 }
208
209 if (queue_end(&swf_global_queue, (queue_entry_t) swf)) {
210 swf = NULL;
211 }
212
213 return swf;
214 }
215
216 #if ENCRYPTED_SWAP
217
218 #include <libkern/crypto/aesxts.h>
219
220 extern int cc_rand_generate(void *, size_t); /* from libkern/cyrpto/rand.h> */
221
222 boolean_t swap_crypt_initialized;
223 void swap_crypt_initialize(void);
224
225 symmetric_xts xts_modectx;
226 uint32_t swap_crypt_key1[8]; /* big enough for a 256 bit random key */
227 uint32_t swap_crypt_key2[8]; /* big enough for a 256 bit random key */
228
229 #if DEVELOPMENT || DEBUG
230 boolean_t swap_crypt_xts_tested = FALSE;
231 unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096)));
232 unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096)));
233 unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096)));
234 #endif /* DEVELOPMENT || DEBUG */
235
236 unsigned long vm_page_encrypt_counter;
237 unsigned long vm_page_decrypt_counter;
238
239
240 void
241 swap_crypt_initialize(void)
242 {
243 uint8_t *enckey1, *enckey2;
244 int keylen1, keylen2;
245 int error;
246
247 assert(swap_crypt_initialized == FALSE);
248
249 keylen1 = sizeof(swap_crypt_key1);
250 enckey1 = (uint8_t *)&swap_crypt_key1;
251 keylen2 = sizeof(swap_crypt_key2);
252 enckey2 = (uint8_t *)&swap_crypt_key2;
253
254 error = cc_rand_generate((void *)enckey1, keylen1);
255 assert(!error);
256
257 error = cc_rand_generate((void *)enckey2, keylen2);
258 assert(!error);
259
260 error = xts_start(0, NULL, enckey1, keylen1, enckey2, keylen2, 0, 0, &xts_modectx);
261 assert(!error);
262
263 swap_crypt_initialized = TRUE;
264
265 #if DEVELOPMENT || DEBUG
266 uint8_t *encptr;
267 uint8_t *decptr;
268 uint8_t *refptr;
269 uint8_t *iv;
270 uint64_t ivnum[2];
271 int size = 0;
272 int i = 0;
273 int rc = 0;
274
275 assert(swap_crypt_xts_tested == FALSE);
276
277 /*
278 * Validate the encryption algorithms.
279 *
280 * First initialize the test data.
281 */
282 for (i = 0; i < 4096; i++) {
283 swap_crypt_test_page_ref[i] = (char) i;
284 }
285 ivnum[0] = (uint64_t)0xaa;
286 ivnum[1] = 0;
287 iv = (uint8_t *)ivnum;
288
289 refptr = (uint8_t *)swap_crypt_test_page_ref;
290 encptr = (uint8_t *)swap_crypt_test_page_encrypt;
291 decptr = (uint8_t *)swap_crypt_test_page_decrypt;
292 size = 4096;
293
294 /* encrypt */
295 rc = xts_encrypt(refptr, size, encptr, iv, &xts_modectx);
296 assert(!rc);
297
298 /* compare result with original - should NOT match */
299 for (i = 0; i < 4096; i++) {
300 if (swap_crypt_test_page_encrypt[i] !=
301 swap_crypt_test_page_ref[i]) {
302 break;
303 }
304 }
305 assert(i != 4096);
306
307 /* decrypt */
308 rc = xts_decrypt(encptr, size, decptr, iv, &xts_modectx);
309 assert(!rc);
310
311 /* compare result with original */
312 for (i = 0; i < 4096; i++) {
313 if (swap_crypt_test_page_decrypt[i] !=
314 swap_crypt_test_page_ref[i]) {
315 panic("encryption test failed");
316 }
317 }
318 /* encrypt in place */
319 rc = xts_encrypt(decptr, size, decptr, iv, &xts_modectx);
320 assert(!rc);
321
322 /* decrypt in place */
323 rc = xts_decrypt(decptr, size, decptr, iv, &xts_modectx);
324 assert(!rc);
325
326 for (i = 0; i < 4096; i++) {
327 if (swap_crypt_test_page_decrypt[i] !=
328 swap_crypt_test_page_ref[i]) {
329 panic("in place encryption test failed");
330 }
331 }
332 swap_crypt_xts_tested = TRUE;
333 #endif /* DEVELOPMENT || DEBUG */
334 }
335
336
337 void
338 vm_swap_encrypt(c_segment_t c_seg)
339 {
340 uint8_t *ptr;
341 uint8_t *iv;
342 uint64_t ivnum[2];
343 int size = 0;
344 int rc = 0;
345
346 if (swap_crypt_initialized == FALSE) {
347 swap_crypt_initialize();
348 }
349
350 #if DEVELOPMENT || DEBUG
351 C_SEG_MAKE_WRITEABLE(c_seg);
352 #endif
353 ptr = (uint8_t *)c_seg->c_store.c_buffer;
354 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
355
356 ivnum[0] = (uint64_t)c_seg;
357 ivnum[1] = 0;
358 iv = (uint8_t *)ivnum;
359
360 rc = xts_encrypt(ptr, size, ptr, iv, &xts_modectx);
361 assert(!rc);
362
363 vm_page_encrypt_counter += (size / PAGE_SIZE_64);
364
365 #if DEVELOPMENT || DEBUG
366 C_SEG_WRITE_PROTECT(c_seg);
367 #endif
368 }
369
370 void
371 vm_swap_decrypt(c_segment_t c_seg)
372 {
373 uint8_t *ptr;
374 uint8_t *iv;
375 uint64_t ivnum[2];
376 int size = 0;
377 int rc = 0;
378
379 assert(swap_crypt_initialized);
380
381 #if DEVELOPMENT || DEBUG
382 C_SEG_MAKE_WRITEABLE(c_seg);
383 #endif
384 ptr = (uint8_t *)c_seg->c_store.c_buffer;
385 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
386
387 ivnum[0] = (uint64_t)c_seg;
388 ivnum[1] = 0;
389 iv = (uint8_t *)ivnum;
390
391 rc = xts_decrypt(ptr, size, ptr, iv, &xts_modectx);
392 assert(!rc);
393
394 vm_page_decrypt_counter += (size / PAGE_SIZE_64);
395
396 #if DEVELOPMENT || DEBUG
397 C_SEG_WRITE_PROTECT(c_seg);
398 #endif
399 }
400 #endif /* ENCRYPTED_SWAP */
401
402
403 void
404 vm_compressor_swap_init()
405 {
406 thread_t thread = NULL;
407
408 queue_init(&swf_global_queue);
409
410 if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL,
411 BASEPRI_VM, &thread) != KERN_SUCCESS) {
412 panic("vm_swapout_thread: create failed");
413 }
414 thread_set_thread_name(thread, "VM_swapout");
415 vm_swapout_thread_id = thread->thread_id;
416
417 thread_deallocate(thread);
418
419 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_create_thread, NULL,
420 BASEPRI_VM, &thread) != KERN_SUCCESS) {
421 panic("vm_swapfile_create_thread: create failed");
422 }
423
424 thread_set_thread_name(thread, "VM_swapfile_create");
425 thread_deallocate(thread);
426
427 if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_gc_thread, NULL,
428 BASEPRI_VM, &thread) != KERN_SUCCESS) {
429 panic("vm_swapfile_gc_thread: create failed");
430 }
431 thread_set_thread_name(thread, "VM_swapfile_gc");
432
433 /*
434 * Swapfile garbage collection will need to allocate memory
435 * to complete its swap reclaim and in-memory compaction.
436 * So allow it to dip into the reserved VM page pool.
437 */
438 thread_lock(thread);
439 thread->options |= TH_OPT_VMPRIV;
440 thread_unlock(thread);
441
442 thread_deallocate(thread);
443
444 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
445 TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
446 proc_set_thread_policy_with_tid(kernel_task, thread->thread_id,
447 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
448
449 #if CONFIG_EMBEDDED
450 /*
451 * dummy value until the swap file gets created
452 * when we drive the first c_segment_t to the
453 * swapout queue... at that time we will
454 * know the true size we have to work with
455 */
456 c_overage_swapped_limit = 16;
457 #endif
458
459 vm_num_swap_files_config = VM_MAX_SWAP_FILE_NUM;
460
461 printf("VM Swap Subsystem is ON\n");
462 }
463
464
465 #if RECORD_THE_COMPRESSED_DATA
466
467 void
468 c_compressed_record_init()
469 {
470 if (c_compressed_record_init_done == FALSE) {
471 vm_swapfile_open("/tmp/compressed_data", &c_compressed_record_vp);
472 c_compressed_record_init_done = TRUE;
473 }
474 }
475
476 void
477 c_compressed_record_write(char *buf, int size)
478 {
479 if (c_compressed_record_write_error == 0) {
480 c_compressed_record_write_error = vm_record_file_write(c_compressed_record_vp, c_compressed_record_file_offset, buf, size);
481 c_compressed_record_file_offset += size;
482 }
483 }
484 #endif
485
486
487 int compaction_swapper_inited = 0;
488
489 void
490 vm_compaction_swapper_do_init(void)
491 {
492 struct vnode *vp;
493 char *pathname;
494 int namelen;
495
496 if (compaction_swapper_inited) {
497 return;
498 }
499
500 if (vm_compressor_mode != VM_PAGER_COMPRESSOR_WITH_SWAP) {
501 compaction_swapper_inited = 1;
502 return;
503 }
504 lck_mtx_lock(&vm_swap_data_lock);
505
506 if (!compaction_swapper_inited) {
507 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
508 pathname = kheap_alloc(KHEAP_TEMP, namelen, Z_WAITOK | Z_ZERO);
509 snprintf(pathname, namelen, "%s%d", swapfilename, 0);
510
511 vm_swapfile_open(pathname, &vp);
512
513 if (vp) {
514 if (vnode_pager_isSSD(vp) == FALSE) {
515 /*
516 * swap files live on an HDD, so let's make sure to start swapping
517 * much earlier since we're not worried about SSD write-wear and
518 * we have so little write bandwidth to work with
519 * these values were derived expermentially by running the performance
520 * teams stock test for evaluating HDD performance against various
521 * combinations and looking and comparing overall results.
522 * Note that the > relationship between these 4 values must be maintained
523 */
524 if (vm_compressor_minorcompact_threshold_divisor_overridden == 0) {
525 vm_compressor_minorcompact_threshold_divisor = 15;
526 }
527 if (vm_compressor_majorcompact_threshold_divisor_overridden == 0) {
528 vm_compressor_majorcompact_threshold_divisor = 18;
529 }
530 if (vm_compressor_unthrottle_threshold_divisor_overridden == 0) {
531 vm_compressor_unthrottle_threshold_divisor = 24;
532 }
533 if (vm_compressor_catchup_threshold_divisor_overridden == 0) {
534 vm_compressor_catchup_threshold_divisor = 30;
535 }
536 }
537 #if !CONFIG_EMBEDDED
538 vnode_setswapmount(vp);
539 vm_swappin_avail = vnode_getswappin_avail(vp);
540
541 if (vm_swappin_avail) {
542 vm_swappin_enabled = TRUE;
543 }
544 #endif
545 vm_swapfile_close((uint64_t)pathname, vp);
546 }
547 kheap_free(KHEAP_TEMP, pathname, namelen);
548
549 compaction_swapper_inited = 1;
550 }
551 lck_mtx_unlock(&vm_swap_data_lock);
552 }
553
554
555 void
556 vm_swap_consider_defragmenting(int flags)
557 {
558 boolean_t force_defrag = (flags & VM_SWAP_FLAGS_FORCE_DEFRAG);
559 boolean_t force_reclaim = (flags & VM_SWAP_FLAGS_FORCE_RECLAIM);
560
561 if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() &&
562 (force_defrag || force_reclaim || VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) {
563 if (!vm_swapfile_gc_thread_running || force_defrag || force_reclaim) {
564 lck_mtx_lock(&vm_swap_data_lock);
565
566 if (force_defrag) {
567 vm_swap_force_defrag = TRUE;
568 }
569
570 if (force_reclaim) {
571 vm_swap_force_reclaim = TRUE;
572 }
573
574 if (!vm_swapfile_gc_thread_running) {
575 thread_wakeup((event_t) &vm_swapfile_gc_needed);
576 }
577
578 lck_mtx_unlock(&vm_swap_data_lock);
579 }
580 }
581 }
582
583
584 int vm_swap_defragment_yielded = 0;
585 int vm_swap_defragment_swapin = 0;
586 int vm_swap_defragment_free = 0;
587 int vm_swap_defragment_busy = 0;
588
589 #if CONFIG_FREEZE
590 extern uint32_t c_segment_pages_compressed_incore;
591 extern uint32_t c_segment_pages_compressed_nearing_limit;
592 extern uint32_t c_segment_count;
593 extern uint32_t c_segments_nearing_limit;
594
595 boolean_t memorystatus_kill_on_VM_compressor_space_shortage(boolean_t);
596
597 extern bool freezer_incore_cseg_acct;
598 #endif /* CONFIG_FREEZE */
599
600 static void
601 vm_swap_defragment()
602 {
603 c_segment_t c_seg;
604
605 /*
606 * have to grab the master lock w/o holding
607 * any locks in spin mode
608 */
609 PAGE_REPLACEMENT_DISALLOWED(TRUE);
610
611 lck_mtx_lock_spin_always(c_list_lock);
612
613 while (!queue_empty(&c_swappedout_sparse_list_head)) {
614 if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) {
615 vm_swap_defragment_yielded++;
616 break;
617 }
618 c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head);
619
620 lck_mtx_lock_spin_always(&c_seg->c_lock);
621
622 assert(c_seg->c_state == C_ON_SWAPPEDOUTSPARSE_Q);
623
624 if (c_seg->c_busy) {
625 lck_mtx_unlock_always(c_list_lock);
626
627 PAGE_REPLACEMENT_DISALLOWED(FALSE);
628 /*
629 * c_seg_wait_on_busy consumes c_seg->c_lock
630 */
631 c_seg_wait_on_busy(c_seg);
632
633 PAGE_REPLACEMENT_DISALLOWED(TRUE);
634
635 lck_mtx_lock_spin_always(c_list_lock);
636
637 vm_swap_defragment_busy++;
638 continue;
639 }
640 if (c_seg->c_bytes_used == 0) {
641 /*
642 * c_seg_free_locked consumes the c_list_lock
643 * and c_seg->c_lock
644 */
645 C_SEG_BUSY(c_seg);
646 c_seg_free_locked(c_seg);
647
648 vm_swap_defragment_free++;
649 } else {
650 lck_mtx_unlock_always(c_list_lock);
651
652 #if CONFIG_FREEZE
653 if (freezer_incore_cseg_acct) {
654 if ((c_seg->c_slots_used + c_segment_pages_compressed_incore) >= c_segment_pages_compressed_nearing_limit) {
655 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
656 }
657
658 uint32_t incore_seg_count = c_segment_count - c_swappedout_count - c_swappedout_sparse_count;
659 if ((incore_seg_count + 1) >= c_segments_nearing_limit) {
660 memorystatus_kill_on_VM_compressor_space_shortage(TRUE /* async */);
661 }
662 }
663 #endif /* CONFIG_FREEZE */
664 if (c_seg_swapin(c_seg, TRUE, FALSE) == 0) {
665 lck_mtx_unlock_always(&c_seg->c_lock);
666 }
667
668 vm_swap_defragment_swapin++;
669 }
670 PAGE_REPLACEMENT_DISALLOWED(FALSE);
671
672 vm_pageout_io_throttle();
673
674 /*
675 * because write waiters have privilege over readers,
676 * dropping and immediately retaking the master lock will
677 * still allow any thread waiting to acquire the
678 * master lock exclusively an opportunity to take it
679 */
680 PAGE_REPLACEMENT_DISALLOWED(TRUE);
681
682 lck_mtx_lock_spin_always(c_list_lock);
683 }
684 lck_mtx_unlock_always(c_list_lock);
685
686 PAGE_REPLACEMENT_DISALLOWED(FALSE);
687 }
688
689
690
691 static void
692 vm_swapfile_create_thread(void)
693 {
694 clock_sec_t sec;
695 clock_nsec_t nsec;
696
697 current_thread()->options |= TH_OPT_VMPRIV;
698
699 vm_swapfile_create_thread_awakened++;
700 vm_swapfile_create_thread_running = 1;
701
702 while (TRUE) {
703 /*
704 * walk through the list of swap files
705 * and do the delayed frees/trims for
706 * any swap file whose count of delayed
707 * frees is above the batch limit
708 */
709 vm_swap_handle_delayed_trims(FALSE);
710
711 lck_mtx_lock(&vm_swap_data_lock);
712
713 if (hibernate_in_progress_with_pinned_swap == TRUE) {
714 break;
715 }
716
717 if (compressor_store_stop_compaction == TRUE) {
718 break;
719 }
720
721 clock_get_system_nanotime(&sec, &nsec);
722
723 if (VM_SWAP_SHOULD_CREATE(sec) == 0) {
724 break;
725 }
726
727 lck_mtx_unlock(&vm_swap_data_lock);
728
729 if (vm_swap_create_file() == FALSE) {
730 vm_swapfile_last_failed_to_create_ts = sec;
731 HIBLOG("vm_swap_create_file failed @ %lu secs\n", (unsigned long)sec);
732 } else {
733 vm_swapfile_last_successful_create_ts = sec;
734 }
735 }
736 vm_swapfile_create_thread_running = 0;
737
738 if (hibernate_in_progress_with_pinned_swap == TRUE) {
739 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
740 }
741
742 if (compressor_store_stop_compaction == TRUE) {
743 thread_wakeup((event_t)&compressor_store_stop_compaction);
744 }
745
746 assert_wait((event_t)&vm_swapfile_create_needed, THREAD_UNINT);
747
748 lck_mtx_unlock(&vm_swap_data_lock);
749
750 thread_block((thread_continue_t)vm_swapfile_create_thread);
751
752 /* NOTREACHED */
753 }
754
755
756 #if HIBERNATION
757
758 kern_return_t
759 hibernate_pin_swap(boolean_t start)
760 {
761 vm_compaction_swapper_do_init();
762
763 if (start == FALSE) {
764 lck_mtx_lock(&vm_swap_data_lock);
765 hibernate_in_progress_with_pinned_swap = FALSE;
766 lck_mtx_unlock(&vm_swap_data_lock);
767
768 return KERN_SUCCESS;
769 }
770 if (vm_swappin_enabled == FALSE) {
771 return KERN_SUCCESS;
772 }
773
774 lck_mtx_lock(&vm_swap_data_lock);
775
776 hibernate_in_progress_with_pinned_swap = TRUE;
777
778 while (vm_swapfile_create_thread_running || vm_swapfile_gc_thread_running) {
779 assert_wait((event_t)&hibernate_in_progress_with_pinned_swap, THREAD_UNINT);
780
781 lck_mtx_unlock(&vm_swap_data_lock);
782
783 thread_block(THREAD_CONTINUE_NULL);
784
785 lck_mtx_lock(&vm_swap_data_lock);
786 }
787 if (vm_num_swap_files > vm_num_pinned_swap_files) {
788 hibernate_in_progress_with_pinned_swap = FALSE;
789 lck_mtx_unlock(&vm_swap_data_lock);
790
791 HIBLOG("hibernate_pin_swap failed - vm_num_swap_files = %d, vm_num_pinned_swap_files = %d\n",
792 vm_num_swap_files, vm_num_pinned_swap_files);
793 return KERN_FAILURE;
794 }
795 lck_mtx_unlock(&vm_swap_data_lock);
796
797 while (VM_SWAP_SHOULD_PIN(MAX_SWAP_FILE_SIZE)) {
798 if (vm_swap_create_file() == FALSE) {
799 break;
800 }
801 }
802 return KERN_SUCCESS;
803 }
804 #endif
805
806 static void
807 vm_swapfile_gc_thread(void)
808 {
809 boolean_t need_defragment;
810 boolean_t need_reclaim;
811
812 vm_swapfile_gc_thread_awakened++;
813 vm_swapfile_gc_thread_running = 1;
814
815 while (TRUE) {
816 lck_mtx_lock(&vm_swap_data_lock);
817
818 if (hibernate_in_progress_with_pinned_swap == TRUE) {
819 break;
820 }
821
822 if (VM_SWAP_BUSY() || compressor_store_stop_compaction == TRUE) {
823 break;
824 }
825
826 need_defragment = FALSE;
827 need_reclaim = FALSE;
828
829 if (VM_SWAP_SHOULD_DEFRAGMENT()) {
830 need_defragment = TRUE;
831 }
832
833 if (VM_SWAP_SHOULD_RECLAIM()) {
834 need_defragment = TRUE;
835 need_reclaim = TRUE;
836 }
837 if (need_defragment == FALSE && need_reclaim == FALSE) {
838 break;
839 }
840
841 vm_swap_force_defrag = FALSE;
842 vm_swap_force_reclaim = FALSE;
843
844 lck_mtx_unlock(&vm_swap_data_lock);
845
846 if (need_defragment == TRUE) {
847 vm_swap_defragment();
848 }
849 if (need_reclaim == TRUE) {
850 vm_swap_reclaim();
851 }
852 }
853 vm_swapfile_gc_thread_running = 0;
854
855 if (hibernate_in_progress_with_pinned_swap == TRUE) {
856 thread_wakeup((event_t)&hibernate_in_progress_with_pinned_swap);
857 }
858
859 if (compressor_store_stop_compaction == TRUE) {
860 thread_wakeup((event_t)&compressor_store_stop_compaction);
861 }
862
863 assert_wait((event_t)&vm_swapfile_gc_needed, THREAD_UNINT);
864
865 lck_mtx_unlock(&vm_swap_data_lock);
866
867 thread_block((thread_continue_t)vm_swapfile_gc_thread);
868
869 /* NOTREACHED */
870 }
871
872
873
874 #define VM_SWAPOUT_LIMIT_T2P 4
875 #define VM_SWAPOUT_LIMIT_T1P 4
876 #define VM_SWAPOUT_LIMIT_T0P 6
877 #define VM_SWAPOUT_LIMIT_T0 8
878 #define VM_SWAPOUT_LIMIT_MAX 8
879
880 #define VM_SWAPOUT_START 0
881 #define VM_SWAPOUT_T2_PASSIVE 1
882 #define VM_SWAPOUT_T1_PASSIVE 2
883 #define VM_SWAPOUT_T0_PASSIVE 3
884 #define VM_SWAPOUT_T0 4
885
886 int vm_swapout_state = VM_SWAPOUT_START;
887 int vm_swapout_limit = 1;
888
889 int vm_swapper_entered_T0 = 0;
890 int vm_swapper_entered_T0P = 0;
891 int vm_swapper_entered_T1P = 0;
892 int vm_swapper_entered_T2P = 0;
893
894
895 static void
896 vm_swapout_thread_throttle_adjust(void)
897 {
898 switch (vm_swapout_state) {
899 case VM_SWAPOUT_START:
900
901 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
902 vm_swapper_entered_T2P++;
903
904 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
905 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
906 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
907 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
908 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
909 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
910
911 break;
912
913 case VM_SWAPOUT_T2_PASSIVE:
914
915 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
916 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
917 vm_swapper_entered_T0P++;
918
919 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
920 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
921 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
922 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
923 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
924 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
925
926 break;
927 }
928 if (swapout_target_age || hibernate_flushing == TRUE) {
929 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1;
930 vm_swapper_entered_T1P++;
931
932 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
933 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
934 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
935 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
936 vm_swapout_limit = VM_SWAPOUT_LIMIT_T1P;
937 vm_swapout_state = VM_SWAPOUT_T1_PASSIVE;
938 }
939 break;
940
941 case VM_SWAPOUT_T1_PASSIVE:
942
943 if (SWAPPER_NEEDS_TO_UNTHROTTLE()) {
944 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER0;
945 vm_swapper_entered_T0P++;
946
947 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
948 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
949 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
950 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
951 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
952 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
953
954 break;
955 }
956 if (swapout_target_age == 0 && hibernate_flushing == FALSE) {
957 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
958 vm_swapper_entered_T2P++;
959
960 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
961 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
962 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
963 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
964 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
965 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
966 }
967 break;
968
969 case VM_SWAPOUT_T0_PASSIVE:
970
971 if (SWAPPER_NEEDS_TO_RETHROTTLE()) {
972 vm_swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER2;
973 vm_swapper_entered_T2P++;
974
975 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
976 TASK_POLICY_INTERNAL, TASK_POLICY_IO, vm_swapper_throttle);
977 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
978 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
979 vm_swapout_limit = VM_SWAPOUT_LIMIT_T2P;
980 vm_swapout_state = VM_SWAPOUT_T2_PASSIVE;
981
982 break;
983 }
984 if (SWAPPER_NEEDS_TO_CATCHUP()) {
985 vm_swapper_entered_T0++;
986
987 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
988 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_DISABLE);
989 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0;
990 vm_swapout_state = VM_SWAPOUT_T0;
991 }
992 break;
993
994 case VM_SWAPOUT_T0:
995
996 if (SWAPPER_HAS_CAUGHTUP()) {
997 vm_swapper_entered_T0P++;
998
999 proc_set_thread_policy_with_tid(kernel_task, vm_swapout_thread_id,
1000 TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1001 vm_swapout_limit = VM_SWAPOUT_LIMIT_T0P;
1002 vm_swapout_state = VM_SWAPOUT_T0_PASSIVE;
1003 }
1004 break;
1005 }
1006 }
1007
1008 int vm_swapout_found_empty = 0;
1009
1010 struct swapout_io_completion vm_swapout_ctx[VM_SWAPOUT_LIMIT_MAX];
1011
1012 int vm_swapout_soc_busy = 0;
1013 int vm_swapout_soc_done = 0;
1014
1015
1016 static struct swapout_io_completion *
1017 vm_swapout_find_free_soc(void)
1018 {
1019 int i;
1020
1021 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1022 if (vm_swapout_ctx[i].swp_io_busy == 0) {
1023 return &vm_swapout_ctx[i];
1024 }
1025 }
1026 assert(vm_swapout_soc_busy == VM_SWAPOUT_LIMIT_MAX);
1027
1028 return NULL;
1029 }
1030
1031 static struct swapout_io_completion *
1032 vm_swapout_find_done_soc(void)
1033 {
1034 int i;
1035
1036 if (vm_swapout_soc_done) {
1037 for (i = 0; i < VM_SWAPOUT_LIMIT_MAX; i++) {
1038 if (vm_swapout_ctx[i].swp_io_done) {
1039 return &vm_swapout_ctx[i];
1040 }
1041 }
1042 }
1043 return NULL;
1044 }
1045
1046 static void
1047 vm_swapout_complete_soc(struct swapout_io_completion *soc)
1048 {
1049 kern_return_t kr;
1050
1051 if (soc->swp_io_error) {
1052 kr = KERN_FAILURE;
1053 } else {
1054 kr = KERN_SUCCESS;
1055 }
1056
1057 lck_mtx_unlock_always(c_list_lock);
1058
1059 vm_swap_put_finish(soc->swp_swf, &soc->swp_f_offset, soc->swp_io_error, TRUE /*drop iocount*/);
1060 vm_swapout_finish(soc->swp_c_seg, soc->swp_f_offset, soc->swp_c_size, kr);
1061
1062 lck_mtx_lock_spin_always(c_list_lock);
1063
1064 soc->swp_io_done = 0;
1065 soc->swp_io_busy = 0;
1066
1067 vm_swapout_soc_busy--;
1068 vm_swapout_soc_done--;
1069 }
1070
1071
1072 static void
1073 vm_swapout_thread(void)
1074 {
1075 uint32_t size = 0;
1076 c_segment_t c_seg = NULL;
1077 kern_return_t kr = KERN_SUCCESS;
1078 struct swapout_io_completion *soc;
1079
1080 current_thread()->options |= TH_OPT_VMPRIV;
1081
1082 vm_swapout_thread_awakened++;
1083
1084 lck_mtx_lock_spin_always(c_list_lock);
1085
1086 vm_swapout_thread_running = TRUE;
1087 again:
1088 while (!queue_empty(&c_swapout_list_head) && vm_swapout_soc_busy < vm_swapout_limit && !compressor_store_stop_compaction) {
1089 c_seg = (c_segment_t)queue_first(&c_swapout_list_head);
1090
1091 lck_mtx_lock_spin_always(&c_seg->c_lock);
1092
1093 assert(c_seg->c_state == C_ON_SWAPOUT_Q);
1094
1095 if (c_seg->c_busy) {
1096 lck_mtx_unlock_always(c_list_lock);
1097
1098 c_seg_wait_on_busy(c_seg);
1099
1100 lck_mtx_lock_spin_always(c_list_lock);
1101
1102 continue;
1103 }
1104 vm_swapout_thread_processed_segments++;
1105
1106 size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
1107
1108 if (size == 0) {
1109 assert(c_seg->c_bytes_used == 0);
1110
1111 if (!c_seg->c_on_minorcompact_q) {
1112 c_seg_need_delayed_compaction(c_seg, TRUE);
1113 }
1114
1115 c_seg_switch_state(c_seg, C_IS_EMPTY, FALSE);
1116 lck_mtx_unlock_always(&c_seg->c_lock);
1117 lck_mtx_unlock_always(c_list_lock);
1118
1119 vm_swapout_found_empty++;
1120 goto c_seg_is_empty;
1121 }
1122 C_SEG_BUSY(c_seg);
1123 c_seg->c_busy_swapping = 1;
1124
1125 c_seg_switch_state(c_seg, C_ON_SWAPIO_Q, FALSE);
1126
1127 lck_mtx_unlock_always(c_list_lock);
1128 lck_mtx_unlock_always(&c_seg->c_lock);
1129
1130 #if CHECKSUM_THE_SWAP
1131 c_seg->cseg_hash = hash_string((char *)c_seg->c_store.c_buffer, (int)size);
1132 c_seg->cseg_swap_size = size;
1133 #endif /* CHECKSUM_THE_SWAP */
1134
1135 #if ENCRYPTED_SWAP
1136 vm_swap_encrypt(c_seg);
1137 #endif /* ENCRYPTED_SWAP */
1138
1139 soc = vm_swapout_find_free_soc();
1140 assert(soc);
1141
1142 soc->swp_upl_ctx.io_context = (void *)soc;
1143 soc->swp_upl_ctx.io_done = (void *)vm_swapout_iodone;
1144 soc->swp_upl_ctx.io_error = 0;
1145
1146 kr = vm_swap_put((vm_offset_t)c_seg->c_store.c_buffer, &soc->swp_f_offset, size, c_seg, soc);
1147
1148 if (kr != KERN_SUCCESS) {
1149 if (soc->swp_io_done) {
1150 lck_mtx_lock_spin_always(c_list_lock);
1151
1152 soc->swp_io_done = 0;
1153 vm_swapout_soc_done--;
1154
1155 lck_mtx_unlock_always(c_list_lock);
1156 }
1157 vm_swapout_finish(c_seg, soc->swp_f_offset, size, kr);
1158 } else {
1159 soc->swp_io_busy = 1;
1160 vm_swapout_soc_busy++;
1161 }
1162
1163 c_seg_is_empty:
1164 if (c_swapout_count == 0) {
1165 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_NONE);
1166 }
1167
1168 lck_mtx_lock_spin_always(c_list_lock);
1169
1170 while ((soc = vm_swapout_find_done_soc())) {
1171 vm_swapout_complete_soc(soc);
1172 }
1173 lck_mtx_unlock_always(c_list_lock);
1174
1175 vm_swapout_thread_throttle_adjust();
1176
1177 lck_mtx_lock_spin_always(c_list_lock);
1178 }
1179 while ((soc = vm_swapout_find_done_soc())) {
1180 vm_swapout_complete_soc(soc);
1181 }
1182 lck_mtx_unlock_always(c_list_lock);
1183
1184 vm_pageout_io_throttle();
1185
1186 lck_mtx_lock_spin_always(c_list_lock);
1187
1188 /*
1189 * Recheck if we have some c_segs to wakeup
1190 * post throttle. And, check to see if we
1191 * have any more swapouts needed.
1192 */
1193 if (vm_swapout_soc_done) {
1194 goto again;
1195 }
1196
1197 assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT);
1198
1199 vm_swapout_thread_running = FALSE;
1200
1201 lck_mtx_unlock_always(c_list_lock);
1202
1203 thread_block((thread_continue_t)vm_swapout_thread);
1204
1205 /* NOTREACHED */
1206 }
1207
1208
1209 void
1210 vm_swapout_iodone(void *io_context, int error)
1211 {
1212 struct swapout_io_completion *soc;
1213
1214 soc = (struct swapout_io_completion *)io_context;
1215
1216 lck_mtx_lock_spin_always(c_list_lock);
1217
1218 soc->swp_io_done = 1;
1219 soc->swp_io_error = error;
1220 vm_swapout_soc_done++;
1221
1222 if (!vm_swapout_thread_running) {
1223 thread_wakeup((event_t)&c_swapout_list_head);
1224 }
1225
1226 lck_mtx_unlock_always(c_list_lock);
1227 }
1228
1229
1230 static void
1231 vm_swapout_finish(c_segment_t c_seg, uint64_t f_offset, uint32_t size, kern_return_t kr)
1232 {
1233 PAGE_REPLACEMENT_DISALLOWED(TRUE);
1234
1235 if (kr == KERN_SUCCESS) {
1236 kernel_memory_depopulate(compressor_map, (vm_offset_t)c_seg->c_store.c_buffer, size,
1237 KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
1238 }
1239 #if ENCRYPTED_SWAP
1240 else {
1241 vm_swap_decrypt(c_seg);
1242 }
1243 #endif /* ENCRYPTED_SWAP */
1244 lck_mtx_lock_spin_always(c_list_lock);
1245 lck_mtx_lock_spin_always(&c_seg->c_lock);
1246
1247 if (kr == KERN_SUCCESS) {
1248 int new_state = C_ON_SWAPPEDOUT_Q;
1249 boolean_t insert_head = FALSE;
1250
1251 if (hibernate_flushing == TRUE) {
1252 if (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id &&
1253 c_seg->c_generation_id <= last_c_segment_to_warm_generation_id) {
1254 insert_head = TRUE;
1255 }
1256 } else if (C_SEG_ONDISK_IS_SPARSE(c_seg)) {
1257 new_state = C_ON_SWAPPEDOUTSPARSE_Q;
1258 }
1259
1260 c_seg_switch_state(c_seg, new_state, insert_head);
1261
1262 c_seg->c_store.c_swap_handle = f_offset;
1263
1264 VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT);
1265
1266 if (c_seg->c_bytes_used) {
1267 OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used);
1268 }
1269
1270 #if CONFIG_FREEZE
1271 /*
1272 * Successful swapout. Decrement the in-core compressed pages count.
1273 */
1274 OSAddAtomic(-(c_seg->c_slots_used), &c_segment_pages_compressed_incore);
1275 assertf(c_segment_pages_compressed_incore >= 0, "-ve incore count %p 0x%x", c_seg, c_segment_pages_compressed_incore);
1276 #endif /* CONFIG_FREEZE */
1277 } else {
1278 if (c_seg->c_overage_swap == TRUE) {
1279 c_seg->c_overage_swap = FALSE;
1280 c_overage_swapped_count--;
1281 }
1282
1283 #if CONFIG_FREEZE
1284 if (c_seg->c_task_owner) {
1285 c_seg_update_task_owner(c_seg, NULL);
1286 }
1287 #endif /* CONFIG_FREEZE */
1288
1289 c_seg_switch_state(c_seg, C_ON_AGE_Q, FALSE);
1290
1291 if (!c_seg->c_on_minorcompact_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) {
1292 c_seg_need_delayed_compaction(c_seg, TRUE);
1293 }
1294 }
1295 assert(c_seg->c_busy_swapping);
1296 assert(c_seg->c_busy);
1297
1298 c_seg->c_busy_swapping = 0;
1299 lck_mtx_unlock_always(c_list_lock);
1300
1301 C_SEG_WAKEUP_DONE(c_seg);
1302 lck_mtx_unlock_always(&c_seg->c_lock);
1303
1304 PAGE_REPLACEMENT_DISALLOWED(FALSE);
1305 }
1306
1307
1308 boolean_t
1309 vm_swap_create_file()
1310 {
1311 uint64_t size = 0;
1312 int namelen = 0;
1313 boolean_t swap_file_created = FALSE;
1314 boolean_t swap_file_reuse = FALSE;
1315 boolean_t swap_file_pin = FALSE;
1316 struct swapfile *swf = NULL;
1317
1318 /*
1319 * make sure we've got all the info we need
1320 * to potentially pin a swap file... we could
1321 * be swapping out due to hibernation w/o ever
1322 * having run vm_pageout_scan, which is normally
1323 * the trigger to do the init
1324 */
1325 vm_compaction_swapper_do_init();
1326
1327 /*
1328 * Any swapfile structure ready for re-use?
1329 */
1330
1331 lck_mtx_lock(&vm_swap_data_lock);
1332
1333 swf = (struct swapfile*) queue_first(&swf_global_queue);
1334
1335 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1336 if (swf->swp_flags == SWAP_REUSE) {
1337 swap_file_reuse = TRUE;
1338 break;
1339 }
1340 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1341 }
1342
1343 lck_mtx_unlock(&vm_swap_data_lock);
1344
1345 if (swap_file_reuse == FALSE) {
1346 namelen = (int)strlen(swapfilename) + SWAPFILENAME_INDEX_LEN + 1;
1347
1348 swf = kalloc_flags(sizeof *swf, Z_WAITOK | Z_ZERO);
1349 swf->swp_index = vm_num_swap_files + 1;
1350 swf->swp_pathlen = namelen;
1351 swf->swp_path = kheap_alloc(KHEAP_DATA_BUFFERS, swf->swp_pathlen,
1352 Z_WAITOK | Z_ZERO);
1353
1354 snprintf(swf->swp_path, namelen, "%s%d", swapfilename, vm_num_swap_files);
1355 }
1356
1357 vm_swapfile_open(swf->swp_path, &swf->swp_vp);
1358
1359 if (swf->swp_vp == NULL) {
1360 if (swap_file_reuse == FALSE) {
1361 kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);
1362 kfree(swf, sizeof *swf);
1363 }
1364 return FALSE;
1365 }
1366 vm_swapfile_can_be_created = TRUE;
1367
1368 size = MAX_SWAP_FILE_SIZE;
1369
1370 while (size >= MIN_SWAP_FILE_SIZE) {
1371 swap_file_pin = VM_SWAP_SHOULD_PIN(size);
1372
1373 if (vm_swapfile_preallocate(swf->swp_vp, &size, &swap_file_pin) == 0) {
1374 int num_bytes_for_bitmap = 0;
1375
1376 swap_file_created = TRUE;
1377
1378 swf->swp_size = size;
1379 swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE);
1380 swf->swp_nseginuse = 0;
1381 swf->swp_free_hint = 0;
1382
1383 num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3), 1);
1384 /*
1385 * Allocate a bitmap that describes the
1386 * number of segments held by this swapfile.
1387 */
1388 swf->swp_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
1389 num_bytes_for_bitmap, Z_WAITOK | Z_ZERO);
1390
1391 swf->swp_csegs = kalloc_flags(swf->swp_nsegs * sizeof(c_segment_t),
1392 Z_WAITOK | Z_ZERO);
1393
1394 /*
1395 * passing a NULL trim_list into vnode_trim_list
1396 * will return ENOTSUP if trim isn't supported
1397 * and 0 if it is
1398 */
1399 if (vnode_trim_list(swf->swp_vp, NULL, FALSE) == 0) {
1400 swp_trim_supported = TRUE;
1401 }
1402
1403 lck_mtx_lock(&vm_swap_data_lock);
1404
1405 swf->swp_flags = SWAP_READY;
1406
1407 if (swap_file_reuse == FALSE) {
1408 queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue);
1409 }
1410
1411 vm_num_swap_files++;
1412
1413 vm_swapfile_total_segs_alloced += swf->swp_nsegs;
1414
1415 if (swap_file_pin == TRUE) {
1416 vm_num_pinned_swap_files++;
1417 swf->swp_flags |= SWAP_PINNED;
1418 vm_swappin_avail -= swf->swp_size;
1419 }
1420
1421 lck_mtx_unlock(&vm_swap_data_lock);
1422
1423 thread_wakeup((event_t) &vm_num_swap_files);
1424 #if CONFIG_EMBEDDED
1425 if (vm_num_swap_files == 1) {
1426 c_overage_swapped_limit = (uint32_t)size / C_SEG_BUFSIZE;
1427
1428 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1429 c_overage_swapped_limit /= 2;
1430 }
1431 }
1432 #endif
1433 break;
1434 } else {
1435 size = size / 2;
1436 }
1437 }
1438 if (swap_file_created == FALSE) {
1439 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
1440
1441 swf->swp_vp = NULL;
1442
1443 if (swap_file_reuse == FALSE) {
1444 kheap_free(KHEAP_DATA_BUFFERS, swf->swp_path, swf->swp_pathlen);
1445 kfree(swf, sizeof *swf);
1446 }
1447 }
1448 return swap_file_created;
1449 }
1450
1451 extern void vnode_put(struct vnode* vp);
1452 kern_return_t
1453 vm_swap_get(c_segment_t c_seg, uint64_t f_offset, uint64_t size)
1454 {
1455 struct swapfile *swf = NULL;
1456 uint64_t file_offset = 0;
1457 int retval = 0;
1458
1459 assert(c_seg->c_store.c_buffer);
1460
1461 lck_mtx_lock(&vm_swap_data_lock);
1462
1463 swf = vm_swapfile_for_handle(f_offset);
1464
1465 if (swf == NULL || (!(swf->swp_flags & SWAP_READY) && !(swf->swp_flags & SWAP_RECLAIM))) {
1466 vm_swap_get_failures++;
1467 retval = 1;
1468 goto done;
1469 }
1470 swf->swp_io_count++;
1471
1472 lck_mtx_unlock(&vm_swap_data_lock);
1473
1474 #if DEVELOPMENT || DEBUG
1475 C_SEG_MAKE_WRITEABLE(c_seg);
1476 #endif
1477 file_offset = (f_offset & SWAP_SLOT_MASK);
1478
1479 if ((retval = vnode_getwithref(swf->swp_vp)) != 0) {
1480 printf("vm_swap_get: vnode_getwithref on swapfile failed with %d\n", retval);
1481 } else {
1482 retval = vm_swapfile_io(swf->swp_vp, file_offset, (uint64_t)c_seg->c_store.c_buffer, (int)(size / PAGE_SIZE_64), SWAP_READ, NULL);
1483 vnode_put(swf->swp_vp);
1484 }
1485
1486 #if DEVELOPMENT || DEBUG
1487 C_SEG_WRITE_PROTECT(c_seg);
1488 #endif
1489 if (retval == 0) {
1490 VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT);
1491 } else {
1492 vm_swap_get_failures++;
1493 }
1494
1495 /*
1496 * Free this slot in the swap structure.
1497 */
1498 vm_swap_free(f_offset);
1499
1500 lck_mtx_lock(&vm_swap_data_lock);
1501 swf->swp_io_count--;
1502
1503 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1504 swf->swp_flags &= ~SWAP_WANTED;
1505 thread_wakeup((event_t) &swf->swp_flags);
1506 }
1507 done:
1508 lck_mtx_unlock(&vm_swap_data_lock);
1509
1510 if (retval == 0) {
1511 return KERN_SUCCESS;
1512 } else {
1513 return KERN_FAILURE;
1514 }
1515 }
1516
1517 kern_return_t
1518 vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint32_t size, c_segment_t c_seg, struct swapout_io_completion *soc)
1519 {
1520 unsigned int segidx = 0;
1521 struct swapfile *swf = NULL;
1522 uint64_t file_offset = 0;
1523 uint64_t swapfile_index = 0;
1524 unsigned int byte_for_segidx = 0;
1525 unsigned int offset_within_byte = 0;
1526 boolean_t swf_eligible = FALSE;
1527 boolean_t waiting = FALSE;
1528 boolean_t retried = FALSE;
1529 int error = 0;
1530 clock_sec_t sec;
1531 clock_nsec_t nsec;
1532 void *upl_ctx = NULL;
1533 boolean_t drop_iocount = FALSE;
1534
1535 if (addr == 0 || f_offset == NULL || compressor_store_stop_compaction) {
1536 return KERN_FAILURE;
1537 }
1538 retry:
1539 lck_mtx_lock(&vm_swap_data_lock);
1540
1541 swf = (struct swapfile*) queue_first(&swf_global_queue);
1542
1543 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1544 segidx = swf->swp_free_hint;
1545
1546 swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs);
1547
1548 if (swf_eligible) {
1549 while (segidx < swf->swp_nsegs) {
1550 byte_for_segidx = segidx >> 3;
1551 offset_within_byte = segidx % 8;
1552
1553 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1554 segidx++;
1555 continue;
1556 }
1557
1558 (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte);
1559
1560 file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
1561 swf->swp_nseginuse++;
1562 swf->swp_io_count++;
1563 swf->swp_csegs[segidx] = c_seg;
1564
1565 swapfile_index = swf->swp_index;
1566 vm_swapfile_total_segs_used++;
1567
1568 clock_get_system_nanotime(&sec, &nsec);
1569
1570 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1571 thread_wakeup((event_t) &vm_swapfile_create_needed);
1572 }
1573
1574 lck_mtx_unlock(&vm_swap_data_lock);
1575
1576 goto issue_io;
1577 }
1578 }
1579 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1580 }
1581 assert(queue_end(&swf_global_queue, (queue_entry_t) swf));
1582
1583 /*
1584 * we've run out of swap segments, but may not
1585 * be in a position to immediately create a new swap
1586 * file if we've recently failed to create due to a lack
1587 * of free space in the root filesystem... we'll try
1588 * to kick that create off, but in any event we're going
1589 * to take a breather (up to 1 second) so that we're not caught in a tight
1590 * loop back in "vm_compressor_compact_and_swap" trying to stuff
1591 * segments into swap files only to have them immediately put back
1592 * on the c_age queue due to vm_swap_put failing.
1593 *
1594 * if we're doing these puts due to a hibernation flush,
1595 * no need to block... setting hibernate_no_swapspace to TRUE,
1596 * will cause "vm_compressor_compact_and_swap" to immediately abort
1597 */
1598 clock_get_system_nanotime(&sec, &nsec);
1599
1600 if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_create_thread_running) {
1601 thread_wakeup((event_t) &vm_swapfile_create_needed);
1602 }
1603
1604 if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) {
1605 waiting = TRUE;
1606 assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000 * NSEC_PER_USEC);
1607 } else {
1608 hibernate_no_swapspace = TRUE;
1609 }
1610
1611 lck_mtx_unlock(&vm_swap_data_lock);
1612
1613 if (waiting == TRUE) {
1614 thread_block(THREAD_CONTINUE_NULL);
1615
1616 if (retried == FALSE && hibernate_flushing == TRUE) {
1617 retried = TRUE;
1618 goto retry;
1619 }
1620 }
1621 vm_swap_put_failures_no_swap_file++;
1622
1623 return KERN_FAILURE;
1624
1625 issue_io:
1626 assert(c_seg->c_busy_swapping);
1627 assert(c_seg->c_busy);
1628 assert(!c_seg->c_on_minorcompact_q);
1629
1630 *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset;
1631
1632 if (soc) {
1633 soc->swp_c_seg = c_seg;
1634 soc->swp_c_size = size;
1635
1636 soc->swp_swf = swf;
1637
1638 soc->swp_io_error = 0;
1639 soc->swp_io_done = 0;
1640
1641 upl_ctx = (void *)&soc->swp_upl_ctx;
1642 }
1643
1644 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1645 printf("vm_swap_put: vnode_getwithref on swapfile failed with %d\n", error);
1646 } else {
1647 error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE, upl_ctx);
1648 drop_iocount = TRUE;
1649 }
1650
1651 if (error || upl_ctx == NULL) {
1652 return vm_swap_put_finish(swf, f_offset, error, drop_iocount);
1653 }
1654
1655 return KERN_SUCCESS;
1656 }
1657
1658 kern_return_t
1659 vm_swap_put_finish(struct swapfile *swf, uint64_t *f_offset, int error, boolean_t drop_iocount)
1660 {
1661 if (drop_iocount) {
1662 vnode_put(swf->swp_vp);
1663 }
1664
1665 lck_mtx_lock(&vm_swap_data_lock);
1666
1667 swf->swp_io_count--;
1668
1669 if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) {
1670 swf->swp_flags &= ~SWAP_WANTED;
1671 thread_wakeup((event_t) &swf->swp_flags);
1672 }
1673 lck_mtx_unlock(&vm_swap_data_lock);
1674
1675 if (error) {
1676 vm_swap_free(*f_offset);
1677 vm_swap_put_failures++;
1678
1679 return KERN_FAILURE;
1680 }
1681 return KERN_SUCCESS;
1682 }
1683
1684
1685 static void
1686 vm_swap_free_now(struct swapfile *swf, uint64_t f_offset)
1687 {
1688 uint64_t file_offset = 0;
1689 unsigned int segidx = 0;
1690
1691
1692 if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) {
1693 unsigned int byte_for_segidx = 0;
1694 unsigned int offset_within_byte = 0;
1695
1696 file_offset = (f_offset & SWAP_SLOT_MASK);
1697 segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1698
1699 byte_for_segidx = segidx >> 3;
1700 offset_within_byte = segidx % 8;
1701
1702 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1703 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1704
1705 swf->swp_csegs[segidx] = NULL;
1706
1707 swf->swp_nseginuse--;
1708 vm_swapfile_total_segs_used--;
1709
1710 if (segidx < swf->swp_free_hint) {
1711 swf->swp_free_hint = segidx;
1712 }
1713 }
1714 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1715 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1716 }
1717 }
1718 }
1719
1720
1721 uint32_t vm_swap_free_now_count = 0;
1722 uint32_t vm_swap_free_delayed_count = 0;
1723
1724
1725 void
1726 vm_swap_free(uint64_t f_offset)
1727 {
1728 struct swapfile *swf = NULL;
1729 struct trim_list *tl = NULL;
1730 clock_sec_t sec;
1731 clock_nsec_t nsec;
1732
1733 if (swp_trim_supported == TRUE) {
1734 tl = kalloc(sizeof(struct trim_list));
1735 }
1736
1737 lck_mtx_lock(&vm_swap_data_lock);
1738
1739 swf = vm_swapfile_for_handle(f_offset);
1740
1741 if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) {
1742 if (swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) {
1743 /*
1744 * don't delay the free if the underlying disk doesn't support
1745 * trim, or we're in the midst of reclaiming this swap file since
1746 * we don't want to move segments that are technically free
1747 * but not yet handled by the delayed free mechanism
1748 */
1749 vm_swap_free_now(swf, f_offset);
1750
1751 vm_swap_free_now_count++;
1752 goto done;
1753 }
1754 tl->tl_offset = f_offset & SWAP_SLOT_MASK;
1755 tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE;
1756
1757 tl->tl_next = swf->swp_delayed_trim_list_head;
1758 swf->swp_delayed_trim_list_head = tl;
1759 swf->swp_delayed_trim_count++;
1760 tl = NULL;
1761
1762 if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_create_thread_running) {
1763 clock_get_system_nanotime(&sec, &nsec);
1764
1765 if (sec > dont_trim_until_ts) {
1766 thread_wakeup((event_t) &vm_swapfile_create_needed);
1767 }
1768 }
1769 vm_swap_free_delayed_count++;
1770 }
1771 done:
1772 lck_mtx_unlock(&vm_swap_data_lock);
1773
1774 if (tl != NULL) {
1775 kfree(tl, sizeof(struct trim_list));
1776 }
1777 }
1778
1779
1780 static void
1781 vm_swap_wait_on_trim_handling_in_progress()
1782 {
1783 while (delayed_trim_handling_in_progress == TRUE) {
1784 assert_wait((event_t) &delayed_trim_handling_in_progress, THREAD_UNINT);
1785 lck_mtx_unlock(&vm_swap_data_lock);
1786
1787 thread_block(THREAD_CONTINUE_NULL);
1788
1789 lck_mtx_lock(&vm_swap_data_lock);
1790 }
1791 }
1792
1793
1794 static void
1795 vm_swap_handle_delayed_trims(boolean_t force_now)
1796 {
1797 struct swapfile *swf = NULL;
1798
1799 /*
1800 * serialize the race between us and vm_swap_reclaim...
1801 * if vm_swap_reclaim wins it will turn off SWAP_READY
1802 * on the victim it has chosen... we can just skip over
1803 * that file since vm_swap_reclaim will first process
1804 * all of the delayed trims associated with it
1805 */
1806
1807 if (compressor_store_stop_compaction == TRUE) {
1808 return;
1809 }
1810
1811 lck_mtx_lock(&vm_swap_data_lock);
1812
1813 delayed_trim_handling_in_progress = TRUE;
1814
1815 lck_mtx_unlock(&vm_swap_data_lock);
1816
1817 /*
1818 * no need to hold the lock to walk the swf list since
1819 * vm_swap_create (the only place where we add to this list)
1820 * is run on the same thread as this function
1821 * and vm_swap_reclaim doesn't remove items from this list
1822 * instead marking them with SWAP_REUSE for future re-use
1823 */
1824 swf = (struct swapfile*) queue_first(&swf_global_queue);
1825
1826 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1827 if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) {
1828 assert(!(swf->swp_flags & SWAP_RECLAIM));
1829 vm_swap_do_delayed_trim(swf);
1830 }
1831 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1832 }
1833 lck_mtx_lock(&vm_swap_data_lock);
1834
1835 delayed_trim_handling_in_progress = FALSE;
1836 thread_wakeup((event_t) &delayed_trim_handling_in_progress);
1837
1838 if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_gc_thread_running) {
1839 thread_wakeup((event_t) &vm_swapfile_gc_needed);
1840 }
1841
1842 lck_mtx_unlock(&vm_swap_data_lock);
1843 }
1844
1845 static void
1846 vm_swap_do_delayed_trim(struct swapfile *swf)
1847 {
1848 struct trim_list *tl, *tl_head;
1849 int error;
1850
1851 if (compressor_store_stop_compaction == TRUE) {
1852 return;
1853 }
1854
1855 if ((error = vnode_getwithref(swf->swp_vp)) != 0) {
1856 printf("vm_swap_do_delayed_trim: vnode_getwithref on swapfile failed with %d\n", error);
1857 return;
1858 }
1859
1860 lck_mtx_lock(&vm_swap_data_lock);
1861
1862 tl_head = swf->swp_delayed_trim_list_head;
1863 swf->swp_delayed_trim_list_head = NULL;
1864 swf->swp_delayed_trim_count = 0;
1865
1866 lck_mtx_unlock(&vm_swap_data_lock);
1867
1868 vnode_trim_list(swf->swp_vp, tl_head, TRUE);
1869
1870 (void) vnode_put(swf->swp_vp);
1871
1872 while ((tl = tl_head) != NULL) {
1873 unsigned int segidx = 0;
1874 unsigned int byte_for_segidx = 0;
1875 unsigned int offset_within_byte = 0;
1876
1877 lck_mtx_lock(&vm_swap_data_lock);
1878
1879 segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE);
1880
1881 byte_for_segidx = segidx >> 3;
1882 offset_within_byte = segidx % 8;
1883
1884 if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) {
1885 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
1886
1887 swf->swp_csegs[segidx] = NULL;
1888
1889 swf->swp_nseginuse--;
1890 vm_swapfile_total_segs_used--;
1891
1892 if (segidx < swf->swp_free_hint) {
1893 swf->swp_free_hint = segidx;
1894 }
1895 }
1896 lck_mtx_unlock(&vm_swap_data_lock);
1897
1898 tl_head = tl->tl_next;
1899
1900 kfree(tl, sizeof(struct trim_list));
1901 }
1902 }
1903
1904
1905 void
1906 vm_swap_flush()
1907 {
1908 return;
1909 }
1910
1911 int vm_swap_reclaim_yielded = 0;
1912
1913 void
1914 vm_swap_reclaim(void)
1915 {
1916 vm_offset_t addr = 0;
1917 unsigned int segidx = 0;
1918 uint64_t f_offset = 0;
1919 struct swapfile *swf = NULL;
1920 struct swapfile *smallest_swf = NULL;
1921 unsigned int min_nsegs = 0;
1922 unsigned int byte_for_segidx = 0;
1923 unsigned int offset_within_byte = 0;
1924 uint32_t c_size = 0;
1925
1926 c_segment_t c_seg = NULL;
1927
1928 if (kernel_memory_allocate(compressor_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT, VM_KERN_MEMORY_COMPRESSOR) != KERN_SUCCESS) {
1929 panic("vm_swap_reclaim: kernel_memory_allocate failed\n");
1930 }
1931
1932 lck_mtx_lock(&vm_swap_data_lock);
1933
1934 /*
1935 * if we're running the swapfile list looking for
1936 * candidates with delayed trims, we need to
1937 * wait before making our decision concerning
1938 * the swapfile we want to reclaim
1939 */
1940 vm_swap_wait_on_trim_handling_in_progress();
1941
1942 /*
1943 * from here until we knock down the SWAP_READY bit,
1944 * we need to remain behind the vm_swap_data_lock...
1945 * once that bit has been turned off, "vm_swap_handle_delayed_trims"
1946 * will not consider this swapfile for processing
1947 */
1948 swf = (struct swapfile*) queue_first(&swf_global_queue);
1949 min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE;
1950 smallest_swf = NULL;
1951
1952 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
1953 if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) {
1954 smallest_swf = swf;
1955 min_nsegs = swf->swp_nseginuse;
1956 }
1957 swf = (struct swapfile*) queue_next(&swf->swp_queue);
1958 }
1959
1960 if (smallest_swf == NULL) {
1961 goto done;
1962 }
1963
1964 swf = smallest_swf;
1965
1966
1967 swf->swp_flags &= ~SWAP_READY;
1968 swf->swp_flags |= SWAP_RECLAIM;
1969
1970 if (swf->swp_delayed_trim_count) {
1971 lck_mtx_unlock(&vm_swap_data_lock);
1972
1973 vm_swap_do_delayed_trim(swf);
1974
1975 lck_mtx_lock(&vm_swap_data_lock);
1976 }
1977 segidx = 0;
1978
1979 while (segidx < swf->swp_nsegs) {
1980 ReTry_for_cseg:
1981 /*
1982 * Wait for outgoing I/Os.
1983 */
1984 while (swf->swp_io_count) {
1985 swf->swp_flags |= SWAP_WANTED;
1986
1987 assert_wait((event_t) &swf->swp_flags, THREAD_UNINT);
1988 lck_mtx_unlock(&vm_swap_data_lock);
1989
1990 thread_block(THREAD_CONTINUE_NULL);
1991
1992 lck_mtx_lock(&vm_swap_data_lock);
1993 }
1994 if (compressor_store_stop_compaction == TRUE || VM_SWAP_SHOULD_ABORT_RECLAIM() || VM_SWAP_BUSY()) {
1995 vm_swap_reclaim_yielded++;
1996 break;
1997 }
1998
1999 byte_for_segidx = segidx >> 3;
2000 offset_within_byte = segidx % 8;
2001
2002 if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) {
2003 segidx++;
2004 continue;
2005 }
2006
2007 c_seg = swf->swp_csegs[segidx];
2008 assert(c_seg);
2009
2010 lck_mtx_lock_spin_always(&c_seg->c_lock);
2011
2012 if (c_seg->c_busy) {
2013 /*
2014 * a swapped out c_segment in the process of being freed will remain in the
2015 * busy state until after the vm_swap_free is called on it... vm_swap_free
2016 * takes the vm_swap_data_lock, so can't change the swap state until after
2017 * we drop the vm_swap_data_lock... once we do, vm_swap_free will complete
2018 * which will allow c_seg_free_locked to clear busy and wake up this thread...
2019 * at that point, we re-look up the swap state which will now indicate that
2020 * this c_segment no longer exists.
2021 */
2022 c_seg->c_wanted = 1;
2023
2024 assert_wait((event_t) (c_seg), THREAD_UNINT);
2025 lck_mtx_unlock_always(&c_seg->c_lock);
2026
2027 lck_mtx_unlock(&vm_swap_data_lock);
2028
2029 thread_block(THREAD_CONTINUE_NULL);
2030
2031 lck_mtx_lock(&vm_swap_data_lock);
2032
2033 goto ReTry_for_cseg;
2034 }
2035 (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte);
2036
2037 f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE;
2038
2039 assert(c_seg == swf->swp_csegs[segidx]);
2040 swf->swp_csegs[segidx] = NULL;
2041 swf->swp_nseginuse--;
2042
2043 vm_swapfile_total_segs_used--;
2044
2045 lck_mtx_unlock(&vm_swap_data_lock);
2046
2047 assert(C_SEG_IS_ONDISK(c_seg));
2048
2049 C_SEG_BUSY(c_seg);
2050 c_seg->c_busy_swapping = 1;
2051 #if !CHECKSUM_THE_SWAP
2052 c_seg_trim_tail(c_seg);
2053 #endif
2054 c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset));
2055
2056 assert(c_size <= C_SEG_BUFSIZE && c_size);
2057
2058 lck_mtx_unlock_always(&c_seg->c_lock);
2059
2060 if (vnode_getwithref(swf->swp_vp)) {
2061 printf("vm_swap_reclaim: vnode_getwithref on swapfile failed.\n");
2062 vm_swap_get_failures++;
2063 goto swap_io_failed;
2064 } else {
2065 if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ, NULL)) {
2066 /*
2067 * reading the data back in failed, so convert c_seg
2068 * to a swapped in c_segment that contains no data
2069 */
2070 c_seg_swapin_requeue(c_seg, FALSE, TRUE, FALSE);
2071 /*
2072 * returns with c_busy_swapping cleared
2073 */
2074 vnode_put(swf->swp_vp);
2075 vm_swap_get_failures++;
2076 goto swap_io_failed;
2077 }
2078 vnode_put(swf->swp_vp);
2079 }
2080
2081 VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT);
2082
2083 if (vm_swap_put(addr, &f_offset, c_size, c_seg, NULL)) {
2084 vm_offset_t c_buffer;
2085
2086 /*
2087 * the put failed, so convert c_seg to a fully swapped in c_segment
2088 * with valid data
2089 */
2090 c_buffer = (vm_offset_t)C_SEG_BUFFER_ADDRESS(c_seg->c_mysegno);
2091
2092 kernel_memory_populate(compressor_map, c_buffer, c_size, KMA_COMPRESSOR, VM_KERN_MEMORY_COMPRESSOR);
2093
2094 memcpy((char *)c_buffer, (char *)addr, c_size);
2095
2096 c_seg->c_store.c_buffer = (int32_t *)c_buffer;
2097 #if ENCRYPTED_SWAP
2098 vm_swap_decrypt(c_seg);
2099 #endif /* ENCRYPTED_SWAP */
2100 c_seg_swapin_requeue(c_seg, TRUE, TRUE, FALSE);
2101 /*
2102 * returns with c_busy_swapping cleared
2103 */
2104 OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used);
2105
2106 goto swap_io_failed;
2107 }
2108 VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT);
2109
2110 lck_mtx_lock_spin_always(&c_seg->c_lock);
2111
2112 assert(C_SEG_IS_ONDISK(c_seg));
2113 /*
2114 * The c_seg will now know about the new location on disk.
2115 */
2116 c_seg->c_store.c_swap_handle = f_offset;
2117
2118 assert(c_seg->c_busy_swapping);
2119 c_seg->c_busy_swapping = 0;
2120 swap_io_failed:
2121 assert(c_seg->c_busy);
2122 C_SEG_WAKEUP_DONE(c_seg);
2123
2124 lck_mtx_unlock_always(&c_seg->c_lock);
2125 lck_mtx_lock(&vm_swap_data_lock);
2126 }
2127
2128 if (swf->swp_nseginuse) {
2129 swf->swp_flags &= ~SWAP_RECLAIM;
2130 swf->swp_flags |= SWAP_READY;
2131
2132 goto done;
2133 }
2134 /*
2135 * We don't remove this inactive swf from the queue.
2136 * That way, we can re-use it when needed again and
2137 * preserve the namespace. The delayed_trim processing
2138 * is also dependent on us not removing swfs from the queue.
2139 */
2140 //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue);
2141
2142 vm_swapfile_total_segs_alloced -= swf->swp_nsegs;
2143
2144 lck_mtx_unlock(&vm_swap_data_lock);
2145
2146 vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp);
2147
2148 kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t));
2149 kheap_free(KHEAP_DATA_BUFFERS, swf->swp_bitmap,
2150 MAX((swf->swp_nsegs >> 3), 1));
2151
2152 lck_mtx_lock(&vm_swap_data_lock);
2153
2154 if (swf->swp_flags & SWAP_PINNED) {
2155 vm_num_pinned_swap_files--;
2156 vm_swappin_avail += swf->swp_size;
2157 }
2158
2159 swf->swp_vp = NULL;
2160 swf->swp_size = 0;
2161 swf->swp_free_hint = 0;
2162 swf->swp_nsegs = 0;
2163 swf->swp_flags = SWAP_REUSE;
2164
2165 vm_num_swap_files--;
2166
2167 done:
2168 thread_wakeup((event_t) &swf->swp_flags);
2169 lck_mtx_unlock(&vm_swap_data_lock);
2170
2171 kmem_free(compressor_map, (vm_offset_t) addr, C_SEG_BUFSIZE);
2172 }
2173
2174
2175 uint64_t
2176 vm_swap_get_total_space(void)
2177 {
2178 uint64_t total_space = 0;
2179
2180 total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE;
2181
2182 return total_space;
2183 }
2184
2185 uint64_t
2186 vm_swap_get_used_space(void)
2187 {
2188 uint64_t used_space = 0;
2189
2190 used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE;
2191
2192 return used_space;
2193 }
2194
2195 uint64_t
2196 vm_swap_get_free_space(void)
2197 {
2198 return vm_swap_get_total_space() - vm_swap_get_used_space();
2199 }
2200
2201 uint64_t
2202 vm_swap_get_max_configured_space(void)
2203 {
2204 int num_swap_files = (vm_num_swap_files_config ? vm_num_swap_files_config : VM_MAX_SWAP_FILE_NUM);
2205 return num_swap_files * MAX_SWAP_FILE_SIZE;
2206 }
2207
2208 int
2209 vm_swap_low_on_space(void)
2210 {
2211 if (vm_num_swap_files == 0 && vm_swapfile_can_be_created == FALSE) {
2212 return 0;
2213 }
2214
2215 if (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < ((unsigned int)VM_SWAPFILE_HIWATER_SEGS) / 8)) {
2216 if (vm_num_swap_files == 0 && !SWAPPER_NEEDS_TO_UNTHROTTLE()) {
2217 return 0;
2218 }
2219
2220 if (vm_swapfile_last_failed_to_create_ts >= vm_swapfile_last_successful_create_ts) {
2221 return 1;
2222 }
2223 }
2224 return 0;
2225 }
2226
2227 int
2228 vm_swap_out_of_space(void)
2229 {
2230 if ((vm_num_swap_files == vm_num_swap_files_config) &&
2231 ((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < VM_SWAPOUT_LIMIT_MAX)) {
2232 /*
2233 * Last swapfile and we have only space for the
2234 * last few swapouts.
2235 */
2236 return 1;
2237 }
2238
2239 return 0;
2240 }
2241
2242 boolean_t
2243 vm_swap_files_pinned(void)
2244 {
2245 boolean_t result;
2246
2247 if (vm_swappin_enabled == FALSE) {
2248 return TRUE;
2249 }
2250
2251 result = (vm_num_pinned_swap_files == vm_num_swap_files);
2252
2253 return result;
2254 }
2255
2256 #if CONFIG_FREEZE
2257 boolean_t
2258 vm_swap_max_budget(uint64_t *freeze_daily_budget)
2259 {
2260 boolean_t use_device_value = FALSE;
2261 struct swapfile *swf = NULL;
2262
2263 if (vm_num_swap_files) {
2264 lck_mtx_lock(&vm_swap_data_lock);
2265
2266 swf = (struct swapfile*) queue_first(&swf_global_queue);
2267
2268 if (swf) {
2269 while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) {
2270 if (swf->swp_flags == SWAP_READY) {
2271 assert(swf->swp_vp);
2272
2273 if (vm_swap_vol_get_budget(swf->swp_vp, freeze_daily_budget) == 0) {
2274 use_device_value = TRUE;
2275 }
2276 break;
2277 }
2278 swf = (struct swapfile*) queue_next(&swf->swp_queue);
2279 }
2280 }
2281
2282 lck_mtx_unlock(&vm_swap_data_lock);
2283 } else {
2284 /*
2285 * This block is used for the initial budget value before any swap files
2286 * are created. We create a temp swap file to get the budget.
2287 */
2288
2289 struct vnode *temp_vp = NULL;
2290
2291 vm_swapfile_open(swapfilename, &temp_vp);
2292
2293 if (temp_vp) {
2294 if (vm_swap_vol_get_budget(temp_vp, freeze_daily_budget) == 0) {
2295 use_device_value = TRUE;
2296 }
2297
2298 vm_swapfile_close((uint64_t)&swapfilename, temp_vp);
2299 temp_vp = NULL;
2300 } else {
2301 *freeze_daily_budget = 0;
2302 }
2303 }
2304
2305 return use_device_value;
2306 }
2307 #endif /* CONFIG_FREEZE */